Кажется, что сайт динамический, так как быстрая проверка источника показывает, что сама таблица не отображается в DOM. Следовательно, вам необходимо использовать инструмент управления браузером, например selenium
:
from selenium import webdriver
from bs4 import BeautifulSoup as soup
import re
from collections import namedtuple
d = webdriver.Chrome('/Users/jamespetullo/Downloads/chromedriver')
d.get('https://www.timeshighereducation.com/world-university-rankings/2018/world-ranking#!/page/0/length/25/sort_by/rank/sort_order/asc/cols/scores')
def page_results(html):
school = namedtuple('school', ['ranking', 'name', 'location', 'scores'])
rankings = [i.text for i in soup(html, 'lxml').find_all('td', {'class':'rank sorting_1 sorting_2'})]
names = [i.text for i in soup(html, 'lxml').find_all('a', {'class':'ranking-institution-title'})]
locations = [i.text for i in soup(html, 'lxml').find_all('div', {'class':'location'})]
full_scores = [i.text for i in soup(html, 'lxml').find_all('td', {'class':re.compile('scores\s+[\w_]+\-score')})]
final_scores = [dict(zip(['overall', 'teaching', 'research', 'citations', 'income', 'outlook'], full_scores[i:i+6])) for i in range(0, len(full_scores), 6)]
return [school(*i) for i in zip(rankings, names, locations, final_scores)]
pages = [page_results(d.page_source)]
links = d.find_elements_by_tag_name('a')
for link in links:
if link.text.isdigit():
try:
link.click()
pages.append(page_results(d.page_source))
except:
pass
Пример вывода:
[[school(ranking=u'1', name=u'University of Oxford', location=u'United Kingdom', scores={'outlook': u'95.0', 'overall': u'94.3', 'research': u'99.5', 'citations': u'99.1', 'income': u'63.7', 'teaching': u'86.7'}), school(ranking=u'2', name=u'University of Cambridge', location=u'United Kingdom', scores={'outlook': u'93.0', 'overall': u'93.2', 'research': u'97.8', 'citations': u'97.5', 'income': u'51.5', 'teaching': u'87.8'}), school(ranking=u'=3', name=u'California Institute of Technology', location=u'United States', scores={'outlook': u'59.7', 'overall': u'93.0', 'research': u'97.5', 'citations': u'99.5', 'income': u'92.6', 'teaching': u'90.3'}), school(ranking=u'=3', name=u'Stanford University', location=u'United States', scores={'outlook': u'77.6', 'overall': u'93.0', 'research': u'96.7', 'citations': u'99.9', 'income': u'60.5', 'teaching': u'89.1'}), school(ranking=u'5', name=u'Massachusetts Institute of Technology', location=u'United States', scores={'outlook': u'87.6', 'overall': u'92.5', 'research': u'91.9', 'citations': u'100.0', 'income': u'88.4', 'teaching': u'87.3'}), school(ranking=u'6', name=u'Harvard University', location=u'United States', scores={'outlook': u'79.7', 'overall': u'91.8', 'research': u'98.4', 'citations': u'99.7', 'income': u'46.4', 'teaching': u'84.2'}), school(ranking=u'7', name=u'Princeton University', location=u'United States', scores={'outlook': u'78.7', 'overall': u'91.1', 'research': u'93.9', 'citations': u'99.6', 'income': u'58.0', 'teaching': u'85.7'}), school(ranking=u'8', name=u'Imperial College London', location=u'United Kingdom', scores={'outlook': u'96.6', 'overall': u'89.2', 'research': u'88.7', 'citations': u'96.7', 'income': u'71.6', 'teaching': u'81.7'}), school(ranking=u'9', name=u'University of Chicago', location=u'United States', scores={'outlook': u'69.6', 'overall': u'88.6', 'research': u'90.1', 'citations': u'99.4', 'income': u'39.8', 'teaching': u'85.3'}), school(ranking=u'=10', name=u'ETH Zurich \u2013 Swiss Federal Institute of Technology Zurich', location=u'Switzerland', scores={'outlook': u'98.1', 'overall': u'87.7', 'research': u'92.0', 'citations': u'94.3', 'income': u'60.3', 'teaching': u'76.4'}), school(ranking=u'=10', name=u'University of Pennsylvania', location=u'United States', scores={'outlook': u'61.3', 'overall': u'87.7', 'research': u'90.1', 'citations': u'98.5', 'income': u'56.9', 'teaching': u'83.7'}), school(ranking=u'12', name=u'Yale University', location=u'United States', scores={'outlook': u'64.6', 'overall': u'87.6', 'research': u'87.0', 'citations': u'98.4', 'income': u'45.1', 'teaching': u'86.7'}), school(ranking=u'13', name=u'Johns Hopkins University', location=u'United States', scores={'outlook': u'70.6', 'overall': u'86.5', 'research': u'88.1', 'citations': u'98.4', 'income': u'95.8', 'teaching': u'76.1'}), school(ranking=u'14', name=u'Columbia University', location=u'United States', scores={'outlook': u'76.6', 'overall': u'86.0', 'research': u'83.3', 'citations': u'98.8', 'income': u'41.3', 'teaching': u'82.2'}), school(ranking=u'15', name=u'University of California, Los Angeles', location=u'United States', scores={'outlook': u'59.5', 'overall': u'85.7', 'research': u'88.1', 'citations': u'97.9', 'income': u'48.6', 'teaching': u'80.7'}), school(ranking=u'16', name=u'UCL', location=u'United Kingdom', scores={'outlook': u'94.6', 'overall': u'85.3', 'research': u'88.2', 'citations': u'94.6', 'income': u'41.2', 'teaching': u'74.4'}), school(ranking=u'17', name=u'Duke University', location=u'United States', scores={'outlook': u'62.5', 'overall': u'85.1', 'research': u'80.6', 'citations': u'98.3', 'income': u'100.0', 'teaching': u'80.7'}), school(ranking=u'18', name=u'University of California, Berkeley', location=u'United States', scores={'outlook': u'64.5', 'overall': u'84.3', 'research': u'84.5', 'citations': u'99.8', 'income': u'37.5', 'teaching': u'77.4'}), school(ranking=u'19', name=u'Cornell University', location=u'United States', scores={'outlook': u'69.2', 'overall': u'84.2', 'research': u'86.6', 'citations': u'97.6', 'income': u'34.6', 'teaching': u'76.2'}), school(ranking=u'20', name=u'Northwestern University', location=u'United States', scores={'outlook': u'59.2', 'overall': u'83.3', 'research': u'86.7', 'citations': u'96.9', 'income': u'78.2', 'teaching': u'72.6'}), school(ranking=u'21', name=u'University of Michigan', location=u'United States', scores={'outlook': u'55.8', 'overall': u'83.1', 'research': u'86.3', 'citations': u'95.7', 'income': u'46.2', 'teaching': u'77.2'}), school(ranking=u'=22', name=u'National University of Singapore', location=u'Singapore', scores={'outlook': u'95.8', 'overall': u'82.8', 'research': u'88.2', 'citations': u'81.3', 'income': u'61.9', 'teaching': u'77.4'}), school(ranking=u'=22', name=u'University of Toronto', location=u'Canada', scores={'outlook': u'80.1', 'overall': u'82.8', 'research': u'84.8', 'citations': u'92.6', 'income': u'46.5', 'teaching': u'74.6'}), school(ranking=u'24', name=u'Carnegie Mellon University', location=u'United States', scores={'outlook': u'79.1', 'overall': u'81.9', 'research': u'83.7', 'citations': u'99.7', 'income': u'50.4', 'teaching': u'65.8'}), school(ranking=u'=25', name=u'London School of Economics and Political Science', location=u'United Kingdom', scores={'outlook': u'92.2', 'overall': u'79.4', 'research': u'72.0', 'citations': u'94.9', 'income': u'33.7', 'teaching': u'71.8'})]]