В: Какой обходной путь для вложенной ошибки l oop - find () не принимает аргументов с ключевыми словами - PullRequest
0 голосов
/ 22 апреля 2020

Я получаю сообщение об ошибке 'find () не принимает ни одного ключевого аргумента' в строке кода place = racers.find('td', class_='horse_number').get_text()

Я предполагаю, что это связано с тем, что l oop вложено - находить в find проблема ??

Моя цель - получить подробную информацию о гонке в первом l oop, во втором l oop повторить каждого бегуна в гонке, третьем для l oop, чтобы получить время, которое соответствует каждому вложенному оператору if.

            for race in results:

                race_number = race.find('td', class_='raceNumber').get_text()


                race_name1 = race.find('td', class_='raceTitle').get_text()

                race_title1 = race.find('td', class_='raceInformation').get_text()
                race_title1 = ' '.join(race_title1.split())

                race_distance1 = race.find('td', class_='distance').get_text()

                tableofdata = race.find('table', class_='raceFieldTable')

                for racers in tableofdata:

                    place = racers.find('td', class_='horse_number').get_text()


                    horsename = racers.find('a', class_='horse_name_link')
                    horsename = horsename.text.replace('HorseName: ', '') if horsename else ''

                    prizemoney = racers.find('td', class_='prizemoney')
                    prizemoney = prizemoney.text.replace('Prizemoney: ', '') if prizemoney else ''

                    barrier = racers.find('td', class_='barrier')
                    barrier = barrier.text.replace('Row: ', '') if barrier else ''

                    #tabnumber = race.find('td', class_='horse_number')
                    #tabnumber = tabnumber.text.replace('HorseNumber: ', '') if tabnumber else ''
                    #print(tabnumber, tr2)
                    trainer = racers.find_all('td', class_='trainer-short')
                    trainer = trainer.text.replace('Trainer: ', '') if trainer else ''

                    driver = racers.find_all('td', class_='driver-short')
                    driver = driver.text.replace('Driver: ', '') if driver else ''

                    margin = racers.find_all('td', class_='margin') 
                    margin = margin.text.replace('Margin: ', '') if margin else ''

                    startingprice = racers.find_all('td', class_='starting_price')
                    startingprice = startingprice.text.replace('StartingOdds: ', '') 
                    startingprice = startingprice.replace('Â', ' ')if startingprice else ''

                    stewardscomments = racers.find_all('span', class_='stewardsTooltip')
                    stewardscomments = stewardscomments.text.replace('StewardsComments: ', '') if horsename else ''

                    scratchingnumber = racers.find_all('td', class_='number')
                    scratchingnumber = scratchingnumber.text.replace('Scratching: ', '') if scratchingnumber else ''
                    tableoftimes = race.find('table', class_='raceTimes')

                    for row in tableoftimes.select('td>strong:contains(":")'):
                            for t in row: 
                                if "Track Rating:" in t:
                                    trackrating = t.next_element.strip()
                                else:
                                    trackrating = ''
                                    if "Gross Time:" in t:
                                        grosstime = t.next_element.strip()
                                    else:
                                        grosstime = ''
                                        if "Mile Rate:" in t:
                                            milerate = t.next_element.strip()
                                        else:
                                            milerate = ''
                                            if "Lead Time:" in t:
                                                leadtime = t.next_element.strip()
                                            else:
                                                leadtime = ''
                                                if "First Quarter:" in t:
                                                    firstquarter = t.next_element.strip()
                                                else:
                                                    firstquarter = ''
                                                    if "Second Quarter:" in t:
                                                        secondquarter = t.next_element.strip()
                                                    else:
                                                        secondquarter = ''
                                                        if "Third Quarter:" in t:
                                                            thirdquarter = t.next_element.strip()
                                                        else:
                                                            thirdquarter = ''
                                                            if "Fourth Quarter:" in t:
                                                                fourthquarter = t.next_element.strip()
                                                            else:
                                                                fourthquarter = ''

Последний запрос - это замена не работает - все равно печатает $ 2,40 в CSV-файл

file = open('harnessresults.csv', 'w', newline='', encoding='utf8')
writer = csv.writer(file)
....
startingprice = startingprice.replace('Â', ' ')if startingprice else ''
....
writer.writerow([tr2, race_number, race_name1, race_title1, race_distance1, place, horsename, prizemoney, barrier, trainer, driver, margin, startingprice, stewardscomments, scratchingnumber, trackrating, grosstime, milerate, leadtime, firstquarter, secondquarter, thirdquarter, fourthquarter])

ОБНОВЛЕНО

Начало HTML со списком выглядит как ниже

from datetime import datetime, date, timedelta
import requests
import re
import csv
import os
import numpy
import pandas as pd
from bs4 import BeautifulSoup as bs
from simplified_scrapy import SimplifiedDoc,req,utils

file = open('harnessresults.csv', 'w', newline='', encoding='utf8')
writer = csv.writer(file)

base_url = "http://www.harness.org.au/racing/results/?firstDate="
base1_url = "http://www.harness.org.au"

webpage_response = requests.get('http://www.harness.org.au/racing/results/?firstDate=')

soup = bs(webpage_response.content, "html.parser")

format = "%d-%m-%y"
delta = timedelta(days=1)
yesterday = datetime.today() - timedelta(days=1)

enddate = datetime(2020, 4, 20)
#prints header in csv
writer.writerow(['Venue', 'RaceNumber', 'RaceName', 'RaceTitle', 'RaceDistance', 'Place', 'HorseName', 'Prizemoney', 'Row', 'Trainer', 'Driver', 'Margin', 'StartingOdds', 'StewardsComments', 'Scratching', 'TrackRating', 'Gross_Time', 'Mile_Rate', 'Lead_Time', 'First_Quarter', 'Second_Quarter', 'Third_Quarter', 'Fourth_Quarter'])


while enddate <= yesterday:
    enddate += timedelta(days=1)
    enddate1 = enddate.strftime("%d-%m-%y") 
    new_url = base_url + str(enddate1)
    soup12 = requests.get(new_url)
    soup1 = bs(soup12.content, "html.parser") 
    table1 = soup1.find('table', class_='meetingListFull')

    tr = table1.find_all('tr', {'class':['odd', 'even']})

    for tr1 in tr:
        tr2 = tr1.find('a').get_text()
        tr3 = tr1.find('a')['href']
        newurl = base1_url + tr3
        with requests.Session() as s:
            webpage_response = s.get(newurl)
            soup = bs(webpage_response.content, "html.parser")
            #soup1 = soup.select('.content')
            results = soup.find_all('div', {'class':'forPrint'})
            #resultsv2 = soup.find_all('table', {'class':'raceFieldTable'})

Ожидайте, что CSV будет выглядеть как

enter image description here

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...