Почему HTML не то же самое, что когда я проверяю элемент? - PullRequest
0 голосов
/ 15 марта 2020

Я пытаюсь отсканировать этот сайт: "https://no.unibet.com/betting/sports/filter/chess". Когда я проверяю переменную page_soup (см. Код ниже), я вижу, что искомого элемента там нет. Почему?

from bs4 import BeautifulSoup
from selenium import webdriver

url = 'https://no.unibet.com/betting/sports/filter/chess/'

chrome_path = r"C:\Users\lakha\OneDrive\Skrivebord\chromedriver_win32 (1)\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
driver.get(url)

html = driver.page_source

page_soup = BeautifulSoup(html, features="lxml")

containers = page_soup.findAll("ul", {"class" : "KambiBC-list-view__column KambiBC-list-view__event-list"})
print(len(containers))#returns 0

Ответы [ 3 ]

1 голос
/ 15 марта 2020

Веб-сайт загружается через JavaScript динамически после загрузки страницы, поэтому я заметил запрос XHR к конечной точке JSON для получения необходимой информации, которую вы ищете. это можно найти в Developer-Tools в вашем браузере, а затем перейти к разделу Network.

import requests
import json

r = requests.get("https://eu-offering.kambicdn.org/offering/v2018/ub/listView/chess.json?lang=no_NO&market=NO&client_id=2&channel_id=1&ncid=1584287770903&useCombined=true").json()


print(r.keys())

print(json.dumps(r, indent=4))

Вывод:

dict_keys(['events', 'terms', 'activeTermIds', 'soonMode', 'categoryGroups', 'activeCategories', 'activeEventTypes', 'eventTypes', 'defaultEventType'])
{
    "events": [
        {
            "event": {
                "id": 1006198062,
                "name": "Vachier-Lagrave, M - Caruana, Fabiano",
                "nameDelimiter": "-",
                "englishName": "Vachier-Lagrave, Maxime - Caruana, Fabiano",
                "homeName": "Vachier-Lagrave, M",
                "awayName": "Caruana, Fabiano",
                "start": "2020-03-17T11:00:00Z",
                "group": "Kandidater",
                "groupId": 2000055248,
                "path": [
                    {
                        "id": 1000190837,
                        "name": "Sjakk",
                        "englishName": "Chess",
                        "termKey": "chess"
                    },
                    {
                        "id": 1000190838,
                        "name": "VM",
                        "englishName": "World Championship",
                        "termKey": "world_championship"
                    },
                    {
                        "id": 2000055248,
                        "name": "Kandidater",
                        "englishName": "Candidates",
                        "termKey": "candidates"
                    }
                ],
                "nonLiveBoCount": 3,
                "sport": "CHESS",
                "tags": [
                    "MATCH"
                ],
                "state": "NOT_STARTED"
            },
            "betOffers": [
                {
                    "id": 2208576284,
                    "closed": "2020-03-17T11:00:00Z",
                    "criterion": {
                        "id": 1001836486,
                        "label": "Kampodds",
                        "englishLabel": "Match Odds",
                        "order": [
                            0
                        ]
                    },
                    "betOfferType": {
                        "id": 2,
                        "name": "Kamp",
                        "englishName": "Match"
                    },
                    "eventId": 1006198062,
                    "outcomes": [
                        {
                            "id": 2733454562,
                            "label": "1",
                            "englishLabel": "1",
                            "odds": 8000,
                            "type": "OT_ONE",
                            "betOfferId": 2208576284,
                            "changedDate": "2020-03-07T12:54:29Z",
                            "oddsFractional": "7/1",
                            "oddsAmerican": "700",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        },
                        {
                            "id": 2733454563,
                            "label": "X",
                            "englishLabel": "X",
                            "odds": 1250,
                            "type": "OT_CROSS",
                            "betOfferId": 2208576284,
                            "changedDate": "2020-03-07T12:54:29Z",
                            "oddsFractional": "1/4",
                            "oddsAmerican": "-400",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        },
                        {
                            "id": 2733454564,
                            "label": "2",
                            "englishLabel": "2",
                            "odds": 7000,
                            "type": "OT_TWO",
                            "betOfferId": 2208576284,
                            "changedDate": "2020-03-07T12:54:29Z",
                            "oddsFractional": "6/1",
                            "oddsAmerican": "600",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        }
                    ],
                    "tags": [
                        "OFFERED_PREMATCH",
                        "MAIN"
                    ],
                    "sortOrder": 1,
                    "cashOutStatus": "DISABLED"
                }
            ]
        },
        {
            "event": {
                "id": 1006147747,
                "name": "Liren Ding - Hao Wang",
                "nameDelimiter": "-",
                "englishName": "Liren Ding - Hao Wang",
                "homeName": "Liren Ding",
                "awayName": "Hao Wang",
                "start": "2020-03-17T11:00:00Z",
                "group": "Kandidater",
                "groupId": 2000055248,
                "path": [
                    {
                        "id": 1000190837,
                        "name": "Sjakk",
                        "englishName": "Chess",
                        "termKey": "chess"
                    },
                    {
                        "id": 1000190838,
                        "name": "VM",
                        "englishName": "World Championship",
                        "termKey": "world_championship"
                    },
                    {
                        "id": 2000055248,
                        "name": "Kandidater",
                        "englishName": "Candidates",
                        "termKey": "candidates"
                    }
                ],
                "nonLiveBoCount": 3,
                "sport": "CHESS",
                "tags": [
                    "MATCH"
                ],
                "state": "NOT_STARTED"
            },
            "betOffers": [
                {
                    "id": 2205691273,
                    "closed": "2020-03-17T11:00:00Z",
                    "criterion": {
                        "id": 1001836486,
                        "label": "Kampodds",
                        "englishLabel": "Match Odds",
                        "order": [
                            0
                        ]
                    },
                    "betOfferType": {
                        "id": 2,
                        "name": "Kamp",
                        "englishName": "Match"
                    },
                    "eventId": 1006147747,
                    "outcomes": [
                        {
                            "id": 2723380316,
                            "label": "1",
                            "englishLabel": "1",
                            "odds": 4100,
                            "type": "OT_ONE",
                            "betOfferId": 2205691273,
                            "changedDate": "2020-03-12T15:54:12Z",
                            "oddsFractional": "3/1",
                            "oddsAmerican": "310",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        },
                        {
                            "id": 2723380317,
                            "label": "X",
                            "englishLabel": "X",
                            "odds": 1330,
                            "type": "OT_CROSS",
                            "betOfferId": 2205691273,
                            "changedDate": "2020-03-12T15:54:12Z",
                            "oddsFractional": "33/100",
                            "oddsAmerican": "-305",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        },
                        {
                            "id": 2723380318,
                            "label": "2",
                            "englishLabel": "2",
                            "odds": 13000,
                            "type": "OT_TWO",
                            "betOfferId": 2205691273,
                            "changedDate": "2020-03-12T15:54:12Z",
                            "oddsFractional": "12/1",
                            "oddsAmerican": "1200",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        }
                    ],
                    "tags": [
                        "OFFERED_PREMATCH",
                        "MAIN"
                    ],
                    "sortOrder": 1,
                    "cashOutStatus": "DISABLED"
                }
            ]
        },
        {
            "event": {
                "id": 1006147748,
                "name": "Giri, A - Nepomniachtchi, Ian",
                "nameDelimiter": "-",
                "englishName": "Giri, Anish - Nepomniachtchi, Ian",
                "homeName": "Giri, A",
                "awayName": "Nepomniachtchi, Ian",
                "start": "2020-03-17T11:00:00Z",
                "group": "Kandidater",
                "groupId": 2000055248,
                "path": [
                    {
                        "id": 1000190837,
                        "name": "Sjakk",
                        "englishName": "Chess",
                        "termKey": "chess"
                    },
                    {
                        "id": 1000190838,
                        "name": "VM",
                        "englishName": "World Championship",
                        "termKey": "world_championship"
                    },
                    {
                        "id": 2000055248,
                        "name": "Kandidater",
                        "englishName": "Candidates",
                        "termKey": "candidates"
                    }
                ],
                "nonLiveBoCount": 3,
                "sport": "CHESS",
                "tags": [
                    "MATCH"
                ],
                "state": "NOT_STARTED"
            },
            "betOffers": [
                {
                    "id": 2205691270,
                    "closed": "2020-03-17T11:00:00Z",
                    "criterion": {
                        "id": 1001836486,
                        "label": "Kampodds",
                        "englishLabel": "Match Odds",
                        "order": [
                            0
                        ]
                    },
                    "betOfferType": {
                        "id": 2,
                        "name": "Kamp",
                        "englishName": "Match"
                    },
                    "eventId": 1006147748,
                    "outcomes": [
                        {
                            "id": 2723380307,
                            "label": "1",
                            "englishLabel": "1",
                            "odds": 4500,
                            "type": "OT_ONE",
                            "betOfferId": 2205691270,
                            "changedDate": "2020-03-01T14:00:24Z",
                            "oddsFractional": "7/2",
                            "oddsAmerican": "350",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        },
                        {
                            "id": 2723380308,
                            "label": "X",
                            "englishLabel": "X",
                            "odds": 1400,
                            "type": "OT_CROSS",
                            "betOfferId": 2205691270,
                            "changedDate": "2020-03-01T14:00:24Z",
                            "oddsFractional": "2/5",
                            "oddsAmerican": "-250",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        },
                        {
                            "id": 2723380309,
                            "label": "2",
                            "englishLabel": "2",
                            "odds": 7500,
                            "type": "OT_TWO",
                            "betOfferId": 2205691270,
                            "changedDate": "2020-03-01T14:00:24Z",
                            "oddsFractional": "13/2",
                            "oddsAmerican": "650",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        }
                    ],
                    "tags": [
                        "OFFERED_PREMATCH",
                        "MAIN"
                    ],
                    "sortOrder": 1,
                    "cashOutStatus": "DISABLED"
                }
            ]
        },
        {
            "event": {
                "id": 1006147749,
                "name": "Grischuk, Alexander - Alekseenko, Kirill",
                "nameDelimiter": "-",
                "englishName": "Grischuk, Alexander - Alekseenko, Kirill",
                "homeName": "Grischuk, Alexander",
                "awayName": "Alekseenko, Kirill",
                "start": "2020-03-17T11:00:00Z",
                "group": "Kandidater",
                "groupId": 2000055248,
                "path": [
                    {
                        "id": 1000190837,
                        "name": "Sjakk",
                        "englishName": "Chess",
                        "termKey": "chess"
                    },
                    {
                        "id": 1000190838,
                        "name": "VM",
                        "englishName": "World Championship",
                        "termKey": "world_championship"
                    },
                    {
                        "id": 2000055248,
                        "name": "Kandidater",
                        "englishName": "Candidates",
                        "termKey": "candidates"
                    }
                ],
                "nonLiveBoCount": 3,
                "sport": "CHESS",
                "tags": [
                    "MATCH"
                ],
                "state": "NOT_STARTED"
            },
            "betOffers": [
                {
                    "id": 2205691271,
                    "closed": "2020-03-17T11:00:00Z",
                    "criterion": {
                        "id": 1001836486,
                        "label": "Kampodds",
                        "englishLabel": "Match Odds",
                        "order": [
                            0
                        ]
                    },
                    "betOfferType": {
                        "id": 2,
                        "name": "Kamp",
                        "englishName": "Match"
                    },
                    "eventId": 1006147749,
                    "outcomes": [
                        {
                            "id": 2723380310,
                            "label": "1",
                            "englishLabel": "1",
                            "odds": 2750,
                            "type": "OT_ONE",
                            "betOfferId": 2205691271,
                            "changedDate": "2020-03-07T13:14:51Z",
                            "oddsFractional": "7/4",
                            "oddsAmerican": "175",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        },
                        {
                            "id": 2723380311,
                            "label": "X",
                            "englishLabel": "X",
                            "odds": 1550,
                            "type": "OT_CROSS",
                            "betOfferId": 2205691271,
                            "changedDate": "2020-03-07T13:14:51Z",
                            "oddsFractional": "11/20",
                            "oddsAmerican": "-182",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        },
                        {
                            "id": 2723380312,
                            "label": "2",
                            "englishLabel": "2",
                            "odds": 16000,
                            "type": "OT_TWO",
                            "betOfferId": 2205691271,
                            "changedDate": "2020-03-07T13:14:51Z",
                            "oddsFractional": "15/1",
                            "oddsAmerican": "1500",
                            "status": "OPEN",
                            "cashOutStatus": "ENABLED"
                        }
                    ],
                    "tags": [
                        "OFFERED_PREMATCH",
                        "MAIN"
                    ],
                    "sortOrder": 1,
                    "cashOutStatus": "DISABLED"
                }
            ]
        }
    ],
    "terms": [
        {
            "type": "SPORT",
            "id": "/chess",
            "termKey": "chess",
            "localizedName": "Sjakk",
            "parentId": "/",
            "englishName": "Chess"
        }
    ],
    "activeTermIds": [
        "/chess"
    ],
    "soonMode": "DAILY",
    "categoryGroups": [
        {
            "categoryGroupName": "list_view",
            "categories": [
                {
                    "id": 16299,
                    "englishName": "Most Popular",
                    "localizedName": "Mest popul\u00e6re"
                }
            ]
        }
    ],
    "activeCategories": [
        "16299"
    ],
    "activeEventTypes": [
        "matches"
    ],
    "eventTypes": [
        "competitions",
        "matches"
    ],
    "defaultEventType": "matches"
}
1 голос
/ 15 марта 2020

Я не уверен, что понимаю смысл использования веб-драйвера Selenium в качестве гигантской библиотеки запросов только для сброса данных c HTML в BeautifulSoup. Это в значительной степени противоречит цели webdriver, которая заключается в динамическом перемещении по странице и ожидании работы JS.

Вот пример использования селектора CSS в webdriver для извлечения нужных элементов:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()  
options.add_argument("--headless")  
driver = webdriver.Chrome(options=options)
driver.get("https://no.unibet.com/betting/sports/filter/chess/")    
selector = "ul[class='KambiBC-list-view__column KambiBC-list-view__event-list']"

for elem in driver.find_elements_by_css_selector(selector):
    print(elem.text)

Вывод:

Tue
04:00 AM
Giri, A
Nepomniachtchi, Ian
+3
Giri, A
4.50
Uavgjort
1.40
Nepomniachtchi, Ian
7.50
Tue
04:00 AM
Grischuk, Alexander
Alekseenko, Kirill
+3
Grischuk, Alexander
2.75
Uavgjort
1.55
Alekseenko, Kirill
16.00
Tue
04:00 AM
Liren Ding
Hao Wang
+3
Liren Ding
4.10
Uavgjort
1.33
Hao Wang
13.00
Tue
04:00 AM
Vachier-Lagrave, M
Caruana, Fabiano
+3
Vachier-Lagrave, M
8.00
Uavgjort
1.25
Caruana, Fabiano
7.00

Вы также можете рассмотреть возможность попадания на конечную точку JSON напрямую, чтобы запросить данные, вместо того, чтобы проходить через DOM как это сообщение красиво показывает.

0 голосов
/ 15 марта 2020

Если вы хотите оставить браузер без заголовка и проанализировать необходимые элементы, вы можете попробовать, как показано ниже. Следующий скрипт выполняет ожидание, пока спиннер не уйдет go, чтобы он мог взаимодействовать с нужными элементами.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

url = "https://no.unibet.com/betting/sports/filter/chess"

with webdriver.Chrome() as driver:
    wait = WebDriverWait(driver, 30)
    driver.get(url)
    wait.until(EC.invisibility_of_element_located((By.CSS_SELECTOR,".KambiBC-spinner-inner")))

    for elem in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".KambiBC-event-participants"))):
        print(elem.text)
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...