import pandas as pd
import requests
from bs4 import BeautifulSoup
#url = "https://en.wikipedia.org/wiki/Template:2019%E2%80%9320_coronavirus_pandemic_data/Philippines_medical_cases_summary"
css_content = {
'status-a': 'Admitted',
'status-r': 'Recovered',
'status-d': 'Died',
'yes':'Yes',
'no': 'No',
'tba':'TBA',
"covid-sticky":'skip_header'
}
def Check_att(source,value,attribute='class'):
# <tag att='value'> <td class='x'>
if col_value : return col_value
if value in source.attrs.get(attribute, []) :
return css_content.get(value,'')
return ''
url = 'https://en.wikipedia.org/wiki/2020_coronavirus_pandemic_in_the_Philippines'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.find('table', class_='wikitable')
column_names = [col_name.text.rstrip('\n').strip() for col_name in table.select('tr.covid-sticky > th')]
n_rows = len(table.select('tr > td'))
df = pd.DataFrame(columns = column_names,index= range(0,n_rows))
for row_index,row in enumerate(table.find_all('tr')[1:],0):
# if Check_att(row,"covid-sticky") :continue
columns = row.find_all('td')
for col_index , column in enumerate(columns,0):
col_value = ''
col_value = Check_att(column,'status-a')
col_value = Check_att(column,'status-r')
col_value = Check_att(column,'status-d')
col_value = Check_att(column,'yes')
col_value = Check_att(column,'no')
col_value = Check_att(column,'tba')
if not col_value :
col_value = column.get_text().rstrip('\n').strip()
df.iat[row_index,col_index] = col_value
for col in df:
try:
df[col] = df[col].astype(float)
except ValueError:
pass
print(df)