Как очистить данные от таблицы? - PullRequest
0 голосов
/ 03 августа 2020
import requests
from bs4 import BeautifulSoup
import pandas as pd

res = requests.get("https://www.worldometers.info/coronavirus/#countries")

soup = BeautifulSoup(res.text, "html.parser")

table = soup.find("table", {"id":"main_table_countries_today"})
columns = [i.get_text(strip=True) for i in table.find("thead").find_all("th")]

rows = []

for row in table.find("tbody").find_all("tr"):
    rows.append([i.get_text(strip=True) for i in row.find_all("td")])


df = pd.DataFrame(rows, columns=columns)
df.to_csv("data.csv", index=False)
print(df)

Output:
           #           Country,Other  ... 1 Deathevery X ppl 1 Testevery X ppl
0                  North America  ...                                     
1                  South America  ...                                     
2                           Asia  ...                                     
3                         Europe  ...                                     
4                         Africa  ...                                     
..   ...                     ...  ...                ...               ...
218  211               St. Barth  ...                                    8
219  212  British Virgin Islands  ...             30,249                24
220  213   Saint Pierre Miquelon  ...                                     
221  214                Anguilla  ...                                   40
222  215                   China  ...            310,601                16

[223 rows x 19 columns]

    

Я изменил вышесказанное, но почему вместо таблицы отображается только часть данных? И как я могу указать столбцы с помощью индекса? Потому что я хотел бы выбрать пять столбцов для хранения данных «Страна», «Общее количество случаев», «Общее количество смертей», «Общее восстановление» и «Население»

1 Ответ

0 голосов
/ 03 августа 2020
import requests
from bs4 import BeautifulSoup
import pandas as pd

res = requests.get("https://www.worldometers.info/coronavirus/#countries")

soup = BeautifulSoup(res.text, "html.parser")

table = soup.find("table", {"id":"main_table_countries_today"})
columns = [i.get_text(strip=True) for i in table.find("thead").find_all("th")]

rows = []

for row in table.find("tbody").find_all("tr"):
    rows.append([i.get_text(strip=True) for i in row.find_all("td")])


df = pd.DataFrame(rows, columns=columns)
df.to_csv("data.csv", index=False)
print(df)

Вывод:

#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",Tot Cases/1M pop,Deaths/1M pop,TotalTests,Tests/1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl
,North America,"5,657,552","+5,378","222,196",+295,"2,919,610","+4,662","2,515,746","26,013",,,,,,North America,,,
,South America,"4,245,834","+1,360","146,906",+89,"2,851,587",+188,"1,247,341","14,300",,,,,,South America,,,
,Asia,"4,453,650","+3,721","99,365",+41,"3,301,717","+5,326","1,052,568","19,086",,,,,,Asia,,,
,Europe,"2,898,953",+456,"203,794",,"1,748,496",+41,"946,663","5,143",,,,,,Europe,,,
,Africa,"961,388",,"20,350",,"615,346",+2,"325,692","1,150",,,,,,Africa,,,
,Oceania,"20,106",+397,246,+13,"12,276",+202,"7,584",43,,,,,,Australia/Oceania,,,
,,721,,15,,651,,55,4,,,,,,,,,
,World,"18,238,204","+11,312","692,872",+438,"11,449,683","+10,421","6,095,649","65,739","2,340",88.9,,,,All,,,
1,USA,"4,813,647",,"158,365",,"2,380,217",,"2,275,065","18,623","14,535",478,"59,935,508","180,977","331,176,957",North America,69,"2,091",6
2,Brazil,"2,733,677",,"94,130",,"1,884,051",,"755,496","8,318","12,853",443,"13,096,132","61,573","212,694,204",South America,78,"2,260",16
3,India,"1,805,838","+1,136","38,176",+15,"1,188,389","+1,161","579,273","8,944","1,307",28,"20,202,858","14,627","1,381,196,835",Asia,765,"36,180",68
4,Russia,"850,870",,"14,128",,"650,173",,"186,569","2,300","5,830",97,"28,793,260","197,295","145,940,242",Europe,172,"10,330",5
5,South Africa,"511,485",,"8,366",,"347,227",,"155,892",539,"8,615",141,"3,036,779","51,147","59,373,395",Africa,116,"7,097",20
...
...
...

введите описание изображения здесь

...