Я написал код для очистки веб-сайта https://www1.nseindia.com/live_market/dynaContent/live_watch/fxTracker/optChainDataByExpDates.jsp?symbol=USDINR&instrument=OPTCUR&expiryDt=17JUL2020 У меня есть код, который обращается к столбцам, но не к данным. Кто-нибудь может сказать мне, что я делаю не так? Я сделал это с помощью beautifulsoup и посмотрел на структуру HTML страницы.
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime as dt,time
import os
from pathlib import Path
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) chrome/80.0.3987.132 Safari/537.36','Accept-Language': 'en-US,en;q=0.9','Accept-Encoding': 'gzip, deflate'}
url = "https://www1.nseindia.com/live_market/dynaContent/live_watch/fxTracker/optChainDataByExpDates.jsp"
symbol= 'USDINR'
exp= '29JUL2020'
page = requests.get(url, params = {"symbol": symbol,"instrument": "OPTCUR","date": exp}, headers = headers)
page.status_code
page.content
soup= BeautifulSoup(page.content, 'html.parser')
#print(soup.prettify())
table_it=soup.find_all(class_="opttbldata")
table_cls_1=soup.find_all(id="octable")
#module 1 : Getting table columns
col_list=[]
for mytable in table_cls_1:
table_head= mytable.find('thead')
try:
rows=table_head.find_all('tr')
for tr in rows:
cols = tr.find_all('th')
for th in cols:
er=th.text
#ee=er.encode('utf8')
col_list.append(er)
except:
print("no thead")
col_list_fnl= [e for e in col_list if e not in ('CALLS','PUTS','Chart','\xc2\xa0','\xa0')]
print( col_list_fnl)
#module 2: Getting Data
table_cls_2= soup.find( id="octable")
all_trs = table_cls_2.find_all('tr')
req_row = table_cls_2.find_all('tr')
new_table= pd.DataFrame( index= range(0, len(req_row)-3), columns = col_list_fnl)
row_marker=0
for row_number, tr_nos in enumerate(req_row):
if row_number<=1 or row_number == len(req_row)-1:
continue
td_columns = tr_nos.find_all('td')
select_cols = td_columns[1:22]
cols_horizontal = range(0, len( select_cols))
for nu, column in enumerate(select_cols):
utf_string = column.get_text()
utf_string = utf_string.strip('\n\r\t":')
# tr=utf_string.decode('utf8')
# tr=tr.replace(',' , '')
new_table.iloc[row_marker,[nu]] = utf_string
row_marker+= 1
print(new_table)
new_table['Expiry'] = exp
new_table['Date'] = date
new_table= new_table.replace('-',"0")