Эта страница использует JavaScript
/ AJAX
(XHR
) для получения данных. Используя DevTools
в Chrome
/ Firefox
, вы можете найти запросы, используемые браузером, и получить URL и все заголовки для непосредственного создания кода в Python. С requests
вы можете получить это. Он отправляет его как HTML
(не JSON
), так что вам все еще нужно lxml
или BeautifulSoup
, чтобы получить данные от HTML
Ему нужен заголовок X-Fsign': 'SW9D1eZo'
, чтобы получить его, но я не не знаю, если это значение не меняется. Возможно, вам придется искать это значение в других файлах.
import requests
from bs4 import BeautifulSoup as BS
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0',
# 'Accept': '*/*',
# 'Accept-Language': '*',
# 'Accept-Encoding': 'gzip, deflate, br',
# 'X-Referer': 'https://www.livescore.in/it/calcio/germania/bundesliga/classifiche/',
'X-Fsign': 'SW9D1eZo',
# 'X-Requested-With': 'XMLHttpRequest',
# 'Referer': 'https://d.livescore.in/it/x/feed/proxy-local',
}
#url = 'https://d.livescore.in/it/x/feed/ss_4_UoXxkTs4_dAfCUJq0_over_under_overall'
url = 'https://d.livescore.in/it/x/feed/ss_4_UoXxkTs4_dAfCUJq0_over_under_home'
r = requests.get(url, headers=headers)
print(r.status_code)
print(r.text[:1000])
soup = BS(r.text, 'html.parser')
all_results = []
all_tables = soup.find_all('div', {'class': 'table__body'})
for table in all_tables:
data = []
for row in table.find_all('div', {'class': 'table__row'}):
row_data = []
for cell in row.find_all('div', {'class': 'table__cell'}):
cell_data = cell.text.strip()
row_data.append(cell_data)
data.append(row_data)
all_results.append(data)
for number, data in enumerate(all_results):
print('---', number + 0.5, '---')
for row in data:
print(row)
# --- save ---
import csv
for number, data in enumerate(all_results):
filename = 'output-{}.csv'.format(number+0.5)
fh = open(filename, 'w')
csv_writer = csv.writer(fh)
csv_writer.writerows(data)
fh.close()
# - or -
import pandas as pd
for number, data in enumerate(all_results):
filename = 'output-{}.csv'.format(number+0.5)
df = pd.DataFrame(data)
df.to_csv(filena,e, index=False, header=False)
Результат:
--- 0.5 ---
['1.', 'Hoffenheim', '13', '13', '0', '19:31', '3.8', '?\n+\n+\n+\n+\n+']
['2.', 'Dortmund', '12', '12', '0', '41:10', '4.3', '?\n+\n+\n+\n+\n+']
['3.', 'Augusta', '12', '12', '0', '24:21', '3.8', '?\n+\n+\n+\n+\n+']
['4.', 'Paderborn', '12', '12', '0', '16:29', '3.8', '?\n+\n+\n+\n+\n+']
['5.', 'RB Lipsia', '12', '12', '0', '32:13', '3.8', '?\n+\n+\n+\n+\n+']
['6.', 'Francoforte', '12', '12', '0', '27:16', '3.6', '?\n+\n+\n+\n+\n+']
['7.', 'Dusseldorf', '12', '12', '0', '13:23', '3', '?\n+\n+\n+\n+\n+']
['8.', 'Colonia', '12', '12', '0', '20:16', '3', '?\n+\n+\n+\n+\n+']
['9.', 'Union Berlino', '12', '12', '0', '18:16', '2.8', '?\n+\n+\n+\n+\n+']
['10.', 'Bayern', '12', '11', '1', '36:11', '3.9', '?\n+\n-\n+\n+\n+']
['11.', 'Brema', '11', '11', '0', '8:26', '3.1', '?\n+\n+\n+\n+\n+']
['12.', 'Leverkusen', '12', '11', '1', '21:13', '2.8', '?\n+\n+\n+\n+\n+']
['13.', 'Schalke', '12', '11', '1', '17:17', '2.8', '?\n+\n+\n+\n+\n+']
['14.', 'Magonza', '12', '11', '1', '14:20', '2.8', '?\n+\n-\n+\n+\n+']
['15.', 'Wolfsburg', '12', '11', '1', '16:13', '2.4', '?\n+\n+\n+\n+\n+']
['16.', 'Friburgo', '11', '11', '0', '13:13', '2.4', '?\n+\n+\n+\n+\n+']
['17.', 'Monchengladbach', '11', '10', '1', '27:13', '3.6', '?\n+\n+\n+\n+\n+']
['18.', 'Hertha', '12', '10', '2', '12:26', '3.2', '?\n+\n+\n-\n+\n-']
--- 1.5 ---
['1.', 'Hoffenheim', '13', '13', '0', '19:31', '3.8', '?\n+\n+\n+\n+\n+']
['2.', 'Augusta', '12', '12', '0', '24:21', '3.8', '?\n+\n+\n+\n+\n+']
['3.', 'RB Lipsia', '12', '12', '0', '32:13', '3.8', '?\n+\n+\n+\n+\n+']
['4.', 'Bayern', '12', '11', '1', '36:11', '3.9', '?\n+\n-\n+\n+\n+']
['5.', 'Paderborn', '12', '11', '1', '16:29', '3.8', '?\n+\n+\n+\n+\n+']
['6.', 'Francoforte', '12', '11', '1', '27:16', '3.6', '?\n+\n+\n+\n+\n+']
['7.', 'Union Berlino', '12', '11', '1', '18:16', '2.8', '?\n+\n+\n+\n+\n+']
['8.', 'Dortmund', '12', '10', '2', '41:10', '4.3', '?\n-\n+\n+\n+\n+']
['9.', 'Monchengladbach', '11', '10', '1', '27:13', '3.6', '?\n+\n+\n+\n+\n+']
['10.', 'Brema', '11', '10', '1', '8:26', '3.1', '?\n+\n+\n+\n+\n-']
['11.', 'Dusseldorf', '12', '10', '2', '13:23', '3', '?\n+\n+\n+\n-\n+']
['12.', 'Colonia', '12', '10', '2', '20:16', '3', '?\n+\n+\n+\n+\n-']
['13.', 'Leverkusen', '12', '10', '2', '21:13', '2.8', '?\n+\n+\n+\n-\n+']
['14.', 'Schalke', '12', '10', '2', '17:17', '2.8', '?\n+\n+\n+\n+\n-']
['15.', 'Wolfsburg', '12', '10', '2', '16:13', '2.4', '?\n+\n+\n+\n+\n+']
['16.', 'Hertha', '12', '9', '3', '12:26', '3.2', '?\n+\n+\n-\n+\n-']
['17.', 'Magonza', '12', '9', '3', '14:20', '2.8', '?\n+\n-\n+\n+\n-']
['18.', 'Friburgo', '11', '8', '3', '13:13', '2.4', '?\n+\n-\n+\n+\n-']
--- 2.5 ---
['1.', 'Hoffenheim', '13', '11', '2', '19:31', '3.8', '?\n+\n+\n+\n+\n+']
['2.', 'Dortmund', '12', '10', '2', '41:10', '4.3', '?\n-\n+\n+\n+\n+']
['3.', 'Bayern', '12', '10', '2', '36:11', '3.9', '?\n+\n-\n+\n-\n+']
['4.', 'Augusta', '12', '10', '2', '24:21', '3.8', '?\n+\n-\n+\n+\n+']
['5.', 'Paderborn', '12', '9', '3', '16:29', '3.8', '?\n+\n+\n+\n+\n-']
['6.', 'RB Lipsia', '12', '9', '3', '32:13', '3.8', '?\n-\n+\n+\n+\n+']
['7.', 'Francoforte', '12', '9', '3', '27:16', '3.6', '?\n+\n+\n-\n+\n+']
['8.', 'Hertha', '12', '9', '3', '12:26', '3.2', '?\n+\n+\n-\n+\n-']
['9.', 'Monchengladbach', '11', '8', '3', '27:13', '3.6', '?\n-\n+\n-\n+\n+']
['10.', 'Colonia', '12', '8', '4', '20:16', '3', '?\n+\n+\n+\n+\n-']
['11.', 'Magonza', '12', '8', '4', '14:20', '2.8', '?\n-\n-\n+\n+\n-']
['12.', 'Brema', '11', '7', '4', '8:26', '3.1', '?\n-\n-\n+\n+\n-']
['13.', 'Dusseldorf', '12', '7', '5', '13:23', '3', '?\n+\n+\n-\n-\n+']
['14.', 'Schalke', '12', '7', '5', '17:17', '2.8', '?\n+\n-\n-\n+\n-']
['15.', 'Union Berlino', '12', '6', '6', '18:16', '2.8', '?\n+\n+\n-\n-\n-']
['16.', 'Leverkusen', '12', '6', '6', '21:13', '2.8', '?\n-\n+\n+\n-\n+']
['17.', 'Wolfsburg', '12', '5', '7', '16:13', '2.4', '?\n+\n-\n+\n-\n+']
['18.', 'Friburgo', '11', '5', '6', '13:13', '2.4', '?\n-\n-\n-\n+\n-']
--- 3.5 ---
['1.', 'Dortmund', '12', '9', '3', '41:10', '4.3', '?\n-\n+\n+\n+\n+']
['2.', 'Bayern', '12', '7', '5', '36:11', '3.9', '?\n+\n-\n+\n-\n+']
['3.', 'RB Lipsia', '12', '7', '5', '32:13', '3.8', '?\n-\n-\n+\n+\n+']
['4.', 'Paderborn', '12', '6', '6', '16:29', '3.8', '?\n-\n+\n+\n-\n-']
['5.', 'Francoforte', '12', '6', '6', '27:16', '3.6', '?\n-\n+\n-\n+\n+']
['6.', 'Monchengladbach', '11', '6', '5', '27:13', '3.6', '?\n-\n+\n-\n-\n+']
['7.', 'Hertha', '12', '6', '6', '12:26', '3.2', '?\n+\n+\n-\n+\n-']
['8.', 'Hoffenheim', '13', '5', '8', '19:31', '3.8', '?\n+\n+\n-\n-\n-']
['9.', 'Augusta', '12', '5', '7', '24:21', '3.8', '?\n+\n-\n-\n+\n-']
['10.', 'Colonia', '12', '5', '7', '20:16', '3', '?\n-\n+\n+\n+\n-']
['11.', 'Magonza', '12', '5', '7', '14:20', '2.8', '?\n-\n-\n+\n-\n-']
['12.', 'Brema', '11', '4', '7', '8:26', '3.1', '?\n-\n-\n-\n+\n-']
['13.', 'Dusseldorf', '12', '4', '8', '13:23', '3', '?\n+\n+\n-\n-\n-']
['14.', 'Union Berlino', '12', '4', '8', '18:16', '2.8', '?\n+\n+\n-\n-\n-']
['15.', 'Leverkusen', '12', '3', '9', '21:13', '2.8', '?\n-\n+\n-\n-\n-']
['16.', 'Schalke', '12', '3', '9', '17:17', '2.8', '?\n+\n-\n-\n+\n-']
['17.', 'Wolfsburg', '12', '2', '10', '16:13', '2.4', '?\n+\n-\n-\n-\n-']
['18.', 'Friburgo', '11', '2', '9', '13:13', '2.4', '?\n-\n-\n-\n+\n-']
--- 4.5 ---
['1.', 'Dortmund', '12', '6', '6', '41:10', '4.3', '?\n-\n-\n+\n+\n+']
['2.', 'Hoffenheim', '13', '5', '8', '19:31', '3.8', '?\n+\n+\n-\n-\n-']
['3.', 'Paderborn', '12', '5', '7', '16:29', '3.8', '?\n-\n+\n+\n-\n-']
['4.', 'Bayern', '12', '4', '8', '36:11', '3.9', '?\n+\n-\n+\n-\n+']
['5.', 'Augusta', '12', '3', '9', '24:21', '3.8', '?\n+\n-\n-\n+\n-']
['6.', 'Francoforte', '12', '3', '9', '27:16', '3.6', '?\n-\n+\n-\n+\n-']
['7.', 'Monchengladbach', '11', '3', '8', '27:13', '3.6', '?\n-\n-\n-\n-\n+']
['8.', 'Hertha', '12', '3', '9', '12:26', '3.2', '?\n+\n-\n-\n-\n-']
['9.', 'RB Lipsia', '12', '2', '10', '32:13', '3.8', '?\n-\n-\n-\n-\n-']
['10.', 'Brema', '11', '2', '9', '8:26', '3.1', '?\n-\n-\n-\n+\n-']
['11.', 'Dusseldorf', '12', '2', '10', '13:23', '3', '?\n+\n+\n-\n-\n-']
['12.', 'Leverkusen', '12', '2', '10', '21:13', '2.8', '?\n-\n+\n-\n-\n-']
['13.', 'Schalke', '12', '2', '10', '17:17', '2.8', '?\n+\n-\n-\n-\n-']
['14.', 'Colonia', '12', '1', '11', '20:16', '3', '?\n-\n+\n-\n-\n-']
['15.', 'Union Berlino', '12', '1', '11', '18:16', '2.8', '?\n-\n+\n-\n-\n-']
['16.', 'Magonza', '12', '1', '11', '14:20', '2.8', '?\n-\n-\n-\n-\n-']
['17.', 'Wolfsburg', '12', '1', '11', '16:13', '2.4', '?\n-\n-\n-\n-\n-']
['18.', 'Friburgo', '11', '0', '11', '13:13', '2.4', '?\n-\n-\n-\n-\n-']
--- 5.5 ---
['1.', 'Dortmund', '12', '4', '8', '41:10', '4.3', '?\n-\n-\n-\n+\n+']
['2.', 'Hoffenheim', '13', '3', '10', '19:31', '3.8', '?\n+\n-\n-\n-\n-']
['3.', 'Monchengladbach', '11', '3', '8', '27:13', '3.6', '?\n-\n-\n-\n-\n+']
['4.', 'Bayern', '12', '2', '10', '36:11', '3.9', '?\n-\n-\n-\n-\n+']
['5.', 'Paderborn', '12', '2', '10', '16:29', '3.8', '?\n-\n+\n-\n-\n-']
['6.', 'Francoforte', '12', '2', '10', '27:16', '3.6', '?\n-\n-\n-\n+\n-']
['7.', 'Augusta', '12', '1', '11', '24:21', '3.8', '?\n-\n-\n-\n+\n-']
['8.', 'RB Lipsia', '12', '1', '11', '32:13', '3.8', '?\n-\n-\n-\n-\n-']
['9.', 'Hertha', '12', '1', '11', '12:26', '3.2', '?\n-\n-\n-\n-\n-']
['10.', 'Dusseldorf', '12', '1', '11', '13:23', '3', '?\n+\n-\n-\n-\n-']
['11.', 'Leverkusen', '12', '1', '11', '21:13', '2.8', '?\n-\n+\n-\n-\n-']
['12.', 'Schalke', '12', '1', '11', '17:17', '2.8', '?\n-\n-\n-\n-\n-']
['13.', 'Brema', '11', '0', '11', '8:26', '3.1', '?\n-\n-\n-\n-\n-']
['14.', 'Colonia', '12', '0', '12', '20:16', '3', '?\n-\n-\n-\n-\n-']
['15.', 'Union Berlino', '12', '0', '12', '18:16', '2.8', '?\n-\n-\n-\n-\n-']
['16.', 'Magonza', '12', '0', '12', '14:20', '2.8', '?\n-\n-\n-\n-\n-']
['17.', 'Wolfsburg', '12', '0', '12', '16:13', '2.4', '?\n-\n-\n-\n-\n-']
['18.', 'Friburgo', '11', '0', '11', '13:13', '2.4', '?\n-\n-\n-\n-\n-']
--- 6.5 ---
['1.', 'Bayern', '12', '2', '10', '36:11', '3.9', '?\n-\n-\n-\n-\n+']
['2.', 'Augusta', '12', '1', '11', '24:21', '3.8', '?\n-\n-\n-\n+\n-']
['3.', 'RB Lipsia', '12', '1', '11', '32:13', '3.8', '?\n-\n-\n-\n-\n-']
['4.', 'Leverkusen', '12', '1', '11', '21:13', '2.8', '?\n-\n+\n-\n-\n-']
['5.', 'Dortmund', '12', '0', '12', '41:10', '4.3', '?\n-\n-\n-\n-\n-']
['6.', 'Hoffenheim', '13', '0', '13', '19:31', '3.8', '?\n-\n-\n-\n-\n-']
['7.', 'Paderborn', '12', '0', '12', '16:29', '3.8', '?\n-\n-\n-\n-\n-']
['8.', 'Francoforte', '12', '0', '12', '27:16', '3.6', '?\n-\n-\n-\n-\n-']
['9.', 'Monchengladbach', '11', '0', '11', '27:13', '3.6', '?\n-\n-\n-\n-\n-']
['10.', 'Hertha', '12', '0', '12', '12:26', '3.2', '?\n-\n-\n-\n-\n-']
['11.', 'Brema', '11', '0', '11', '8:26', '3.1', '?\n-\n-\n-\n-\n-']
['12.', 'Dusseldorf', '12', '0', '12', '13:23', '3', '?\n-\n-\n-\n-\n-']
['13.', 'Colonia', '12', '0', '12', '20:16', '3', '?\n-\n-\n-\n-\n-']
['14.', 'Union Berlino', '12', '0', '12', '18:16', '2.8', '?\n-\n-\n-\n-\n-']
['15.', 'Schalke', '12', '0', '12', '17:17', '2.8', '?\n-\n-\n-\n-\n-']
['16.', 'Magonza', '12', '0', '12', '14:20', '2.8', '?\n-\n-\n-\n-\n-']
['17.', 'Wolfsburg', '12', '0', '12', '16:13', '2.4', '?\n-\n-\n-\n-\n-']
['18.', 'Friburgo', '11', '0', '11', '13:13', '2.4', '?\n-\n-\n-\n-\n-']
Кстати: В DevTool
вы можете получить это также как CURL command
и позже преобразовать в Python код на https://curl.trillworks.com/
РЕДАКТИРОВАТЬ: Больше / меньше 1,5 Away.
import requests
from bs4 import BeautifulSoup as BS
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0',
'X-Fsign': 'SW9D1eZo',
}
url = 'https://d.livescore.in/it/x/feed/ss_4_UoXxkTs4_dAfCUJq0_over_under_away'
r = requests.get(url, headers=headers)
soup = BS(r.text, 'html.parser')
all_tables = soup.find_all('div', {'class': 'table__body'})
table = all_tables[1] # second table
data = []
for row in table.find_all('div', {'class': 'table__row'}):
row_data = []
for cell in row.find_all('div', {'class': 'table__cell'}):
cell_data = cell.text.strip()
row_data.append(cell_data)
data.append(row_data)
for row in data:
print(row)
Результат:
['1.', 'Bayern', '12', '12', '0', '35:15', '4.2', '?\n+\n+\n+\n+\n+']
['2.', 'Dusseldorf', '12', '12', '0', '13:26', '3.3', '?\n+\n+\n+\n+\n+']
['3.', 'Magonza', '12', '11', '1', '19:32', '4.3', '?\n+\n+\n+\n+\n+']
['4.', 'Dortmund', '12', '11', '1', '25:22', '3.9', '?\n+\n+\n+\n+\n+']
['5.', 'Colonia', '11', '11', '0', '16:26', '3.8', '?\n+\n+\n+\n+\n+']
['6.', 'RB Lipsia', '12', '11', '1', '30:13', '3.6', '?\n+\n-\n+\n+\n+']
['7.', 'Leverkusen', '12', '11', '1', '20:17', '3.1', '?\n+\n+\n+\n+\n-']
['8.', 'Friburgo', '13', '11', '2', '18:21', '3', '?\n-\n+\n+\n+\n+']
['9.', 'Paderborn', '12', '11', '1', '13:23', '3', '?\n+\n+\n+\n+\n+']
['10.', 'Union Berlino', '12', '11', '1', '13:22', '2.9', '?\n+\n+\n+\n+\n+']
['11.', 'Brema', '12', '10', '2', '17:27', '3.7', '?\n+\n+\n-\n-\n+']
['12.', 'Augusta', '12', '10', '2', '12:29', '3.4', '?\n+\n+\n+\n+\n+']
['13.', 'Hertha', '12', '10', '2', '18:20', '3.2', '?\n+\n+\n+\n-\n+']
['14.', 'Wolfsburg', '12', '10', '2', '18:17', '2.9', '?\n+\n+\n+\n+\n+']
['15.', 'Francoforte', '11', '9', '2', '11:21', '2.9', '?\n+\n+\n+\n+\n-']
['16.', 'Schalke', '12', '9', '3', '15:18', '2.8', '?\n+\n-\n-\n+\n+']
['17.', 'Monchengladbach', '12', '9', '3', '19:14', '2.8', '?\n+\n+\n+\n+\n-']
['18.', 'Hoffenheim', '11', '8', '3', '15:11', '2.4', '?\n+\n-\n+\n+\n+']
Снимок экрана: