Чтобы удалить страницу, вам нужно использовать requests.session
для правильной установки файлов cookie. Также в запросе POST есть параметр ud
, который использует страница и должен быть правильно настроен.
Например (это очищает все станции и отчеты и сохраняет его в словаре data
):
import requests
from bs4 import BeautifulSoup
from pprint import pprint
url = 'http://delhigovt.nic.in/FireReport/r_publicSearch.asp?user=public'
post_url = 'http://delhigovt.nic.in/FireReport/a_publicSearch.asp'
params = {'ud': '',
'fstation': '',
'caller': '',
'add': '',
'frmdate': '',
'todate': '',
'save': 'Search'}
def open_report(s, url):
url = 'http://delhigovt.nic.in/FireReport/' + url
print(url)
soup = BeautifulSoup(s.get(url).content, 'lxml')
# just return some text here
return soup.select('body > table')[1].get_text(strip=True, separator=' ')
data = {}
with requests.session() as s:
soup = BeautifulSoup(s.get(url).content, 'lxml')
stations = {}
for option in soup.select('select[name="fstation"] option[value]:not(:contains("Select Fire Station"))'):
stations[option.get_text(strip=True)] = option['value']
params['ud'] = soup.select_one('input[name="ud"][value]')['value']
for k, v in stations.items():
print('Scraping station {} id={}'.format(k, v))
params['fstation'] = int(v)
soup = BeautifulSoup( s.post(post_url, data=params).content, 'lxml' )
for tr in soup.select('tr:has(> td > a[href^="f_publicReport.asp?rep_no="])'):
no, fire_report_no, date, address = tr.select('td')
link = fire_report_no.a['href']
data.setdefault(k, [])
data[k].append( (no.get_text(strip=True), fire_report_no.get_text(strip=True), date.get_text(strip=True), address.get_text(strip=True), link, open_report(s, link)) )
pprint(data[k][-1])
# pprint(data) # <-- here is your data
Отпечатки:
Scraping station Badli id=33
http://delhigovt.nic.in/FireReport/f_publicReport.asp?rep_no=200600024&ud=6668
('1',
'200600024',
'1-Apr-2006',
'Shahbad, Daulat Pur.',
'f_publicReport.asp?rep_no=200600024&ud=6668',
'Current Date:\xa0\xa0\xa0Tuesday, January 7, 2020 Fire Report '
'Number  : 200600024 Operational Jurisdiction of Fire Station : '
'Badli Information Received From: PCR Full Address of Incident Place: '
'Shahbad, Daulat Pur. Date of Receipt of Call : Saturday, April 1, 2006 '
'Time of Receipt of Call \t : 17\xa0Hrs\xa0:\xa055\xa0Min Time of '
'Departure From Fire Station: 17\xa0Hrs\xa0:\xa056\xa0Min Approximate '
'Distance From Fire Station: 3\xa0\xa0Kilometers Time of Arrival at Fire '
'Scene: 17\xa0Hrs\xa0:\xa059\xa0Min Nature of Call Fire Date of Leaving From '
'Fire Scene: 4/1/2006 Time of Leaving From Fire Scene: 18\xa0Hrs\xa0:\xa0'
'30\xa0Min Type of Occupancy: Others Occupancy Details in Case of Others: '
'NDPL Category of Fire: Small Type of Building: Low Rise Details of Affected '
'Area: Fire was in electrical wiring. Divisional Officer Delhi Fire Service '
'Disclaimer: This is a computer generated report.\r\n'
'Neither department nor its associates, information providers or content '
'providers warrant or guarantee the timeliness, sequence, accuracy or '
'completeness of this information.')
http://delhigovt.nic.in/FireReport/f_publicReport.asp?rep_no=200600161&ud=6668
('2',
'200600161',
'5-Apr-2006',
'Haidarpur towards Mubarak Pur , Outer Ring Road, Near Nullah, Delhi.',
'f_publicReport.asp?rep_no=200600161&ud=6668',
'Current Date:\xa0\xa0\xa0Tuesday, January 7, 2020 Fire Report '
'Number  : 200600161 Operational Jurisdiction of Fire Station : '
'Badli Information Received From: PCR Full Address of Incident Place: '
'Haidarpur towards Mubarak Pur , Outer Ring Road, Near Nullah, Delhi. Date of '
'Receipt of Call : Wednesday, April 5, 2006 Time of Receipt of Call \t'
' : 19\xa0Hrs\xa0:\xa010\xa0Min Time of Departure From Fire Station: '
'19\xa0Hrs\xa0:\xa011\xa0Min Approximate Distance From Fire Station: '
'1.5\xa0\xa0Kilometers Time of Arrival at Fire Scene: 19\xa0Hrs\xa0:\xa013\xa0'
'Min Nature of Call Fire Date of Leaving From Fire Scene: 4/5/2006 Time of '
'Leaving From Fire Scene: 20\xa0Hrs\xa0:\xa050\xa0Min Type of Occupancy: '
'Others Occupancy Details in Case of Others: MCD Category of Fire: Small Type '
'of Building: Others Building Details in Case of Others: On Road Details of '
'Affected Area: Fire was in Rubbish and dry tree on road. Divisional Officer '
'Delhi Fire Service Disclaimer: This is a computer generated report.\r\n'
'Neither department nor its associates, information providers or content '
'providers warrant or guarantee the timeliness, sequence, accuracy or '
'completeness of this information.')
...and so on.