как найти список родителей в beautifulsoup - PullRequest
0 голосов
/ 26 мая 2020
import requests
from bs4 import BeautifulSoup

url ="https://www.hltv.org/stats/matches/mapstatsid/103093/furia-vs-chaos"
headers= {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}

response = requests.get(url, headers = headers)
soup = BeautifulSoup(response.content, "html.parser")
#stat_tables = soup.find_all("table", class_="stats-table")



results = {}

all_results = soup.find_all("div", class_="round-history-half")
for partial_result in all_results:
    half_results = partial_result.find_all("img")
    for result in half_results:
        if (result["title"]):
            rounds_won = result["title"].split("-")
            key = int(rounds_won[0]) + int(rounds_won[1])
            results[key] = result["title"]

for key in sorted(results):
    print(key, results[key])


Этот код дает мне все оценки матча, и я хотел бы найти родителя, скажем, print (results [4]), чтобы узнать, кто победил. Я не знаю, как найти родителей из списка.

результат должен быть

<img alt="FURIA" src="https://static.hltv.org/images/team/logo/8297" class="round-history-team" title="FURIA">

или

Furia

Ответы [ 3 ]

1 голос
/ 26 мая 2020

Возможно, вы захотите изменить структуру данных, чтобы упростить работу с: EDIT: обновлено новой структурой данных

import json
import requests
from bs4 import BeautifulSoup

url = 'https://www.hltv.org/stats/matches/mapstatsid/103093/furia-vs-chaos'
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
#stat_tables = soup.find_all('table', class_='stats-table')


results = {}

# get the match root element
match = soup.find('div', class_='standard-box round-history-con')
# get all the teams in the match
teams = [team['title'] for team in match.find_all('img', class_='round-history-team')]
# get the rows in the match visualization
rows = match.find_all('div', class_='round-history-team-row')
for row in rows:
    # get the index and round element for each round
    for idx, rnd in enumerate(row.find_all('img', class_='round-history-outcome')):
        # make sure there is data in the round
        if (rnd['title']):
            # split the string
            scores = rnd['title'].split('-')
            # get the team and score, this won't work if they get out of order
            results[idx+1] = {team: score for team, score in zip(teams, scores)}


sorted_results = {i: results[i]
                  for i in sorted(results.keys())}

print(json.dumps(sorted_results, indent=4))

Вывод:

{
    "1": {
        "FURIA": "0",
        "Chaos": "1"
    },
    "2": {
        "FURIA": "0",
        "Chaos": "2"
    },
    "3": {
        "FURIA": "0",
        "Chaos": "3"
    },
    "4": {
        "FURIA": "1",
        "Chaos": "3"
    },
    "5": {
        "FURIA": "2",
        "Chaos": "3"
    },
    "6": {
        "FURIA": "3",
        "Chaos": "3"
    },
    "7": {
        "FURIA": "4",
        "Chaos": "3"
    },
    "8": {
        "FURIA": "5",
        "Chaos": "3"
    },
    "9": {
        "FURIA": "5",
        "Chaos": "4"
    },
    "10": {
        "FURIA": "5",
        "Chaos": "5"
    },
    "11": {
        "FURIA": "5",
        "Chaos": "6"
    },
    "12": {
        "FURIA": "5",
        "Chaos": "7"
    },
    "13": {
        "FURIA": "6",
        "Chaos": "7"
    },
    "14": {
        "FURIA": "7",
        "Chaos": "7"
    },
    "15": {
        "FURIA": "8",
        "Chaos": "7"
    },
    "16": {
        "FURIA": "9",
        "Chaos": "7"
    },
    "17": {
        "FURIA": "10",
        "Chaos": "7"
    },
    "18": {
        "FURIA": "10",
        "Chaos": "8"
    },
    "19": {
        "FURIA": "10",
        "Chaos": "9"
    },
    "20": {
        "FURIA": "10",
        "Chaos": "10"
    },
    "21": {
        "FURIA": "11",
        "Chaos": "10"
    },
    "22": {
        "FURIA": "12",
        "Chaos": "10"
    },
    "23": {
        "FURIA": "13",
        "Chaos": "10"
    "24": {
        "FURIA": "14",
        "Chaos": "10"
    },
    "25": {
        "FURIA": "15",
        "Chaos": "10"
    },
    "26": {
        "FURIA": "15",
        "Chaos": "11"
    },
    "27": {
        "FURIA": "16",
        "Chaos": "11"
    }
}
1 голос
/ 26 мая 2020

попробуйте это ...

if team_left[0].find_all("div", class_="bold won"):
    print(team_left[0].find_all("img")[0].get("alt"))
else:
    print(team_right[0].find_all("img")[0].get("alt"))
1 голос
/ 26 мая 2020

используйте это,

# retrieve the parent first.
divs = soup.find_all("div", class_="round-history-team-row")

for div in divs:
    parent_img_title = div.find('img', class_="round-history-team")['title']
    print("Title : " + parent_img_title)

    for result in div.find_all("img", class_="round-history-outcome"):
        if result["title"]:
            rounds_won = result["title"].split("-")
            key = int(rounds_won[0]) + int(rounds_won[1])
            print("Key %d" % key)
            results[key] = result["title"]

выходы,

Title : FURIA
Key 4
Key 5
...
Title : Chaos
Key 1
Key 2
...