Этот сценарий проходит через каждый datarow и извлекает каждый элемент в отдельности, а затем добавляет их в DataFrame панды.
from bs4 import BeautifulSoup
import requests
import pandas as pd
page_link = 'https://www.thespread.com/nhl-hockey-public-betting-chart'
page_response = requests.get(page_link, timeout=5)
# here, we fetch the content from the url, using the requests library
page_content = BeautifulSoup(page_response.content, "html.parser")
# Take out the <div> of name and get its value
tables = page_content.find_all('div', class_='datarow')
# Iterate through rows
rows = []
# Iterate through each datarow and pull out each home/away separately
for table in tables:
# Get time and date
time_and_date_tag = table.find_all('div', attrs={"class": "time"})[0].contents
date = time_and_date_tag[1]
time = time_and_date_tag[-1]
# Get teams
teams_tag = table.find_all('div', attrs={"class": "datacell teams"})[0].contents[-1].contents
home_team = teams_tag[1].text
away_team = teams_tag[-1].text
# Get opening
opening_tag = table.find_all('div', attrs={"class": "child-open"})[0].contents
home_open_value = opening_tag[1]
away_open_value = opening_tag[-1]
# Get current
current_tag = table.find_all('div', attrs={"class": "child-current"})[0].contents
home_current_value = current_tag[1]
away_current_value = current_tag[-1]
# Create list
rows.append([time, date, home_team, away_team,
home_open_value, away_open_value,
home_current_value, away_current_value])
columns = ['time', 'date', 'home_team', 'away_team',
'home_open', 'away_open',
'home_current', 'away_current']
print(pd.DataFrame(rows, columns=columns))