Gevent Django удаленное соединение закрыто - PullRequest
0 голосов
/ 17 июня 2019

Я написал следующий код:
из django.core.management.base import BaseCommand, CommandError из sbbetting.models import Team, League, Fixture, запросы на импорт страны из bs4 import BeautifulSoup из django.core.exceptions import ObjectDoesNotExistfrom datetime import datetime, timedelta

pool = gevent.pool.Pool()

class Command(BaseCommand):
    def get_page(self, url, use_headers=None):
        headers = {
        "Host": "d.flashscore.com",
        }
        if(use_headers):
            response = requests.get(url, headers=headers)
        else:
            response = requests.get(url)

        return BeautifulSoup(response.content, features="lxml")

    def find_ids(self, input):
        return_value = []
        for index in range (0, len(input)):
            if(input[index:index + 3] == "AA÷"):
                return_value.append(input[index+3:index+11])

        return return_value

    def create_related_fixtures(self, fixture, related_fixtures):
        for match in related_fixtures[0:10]:
            gevent.sleep(10)
            match_code = match.get('onclick')[17:25]

            base_url = "https://www.flashscore.com/match/" + match_code
            summary_url = "https://d.flashscore.com/x/feed/d_su_" + match_code + "_en_1"

            fixture_data = self.get_page(base_url)
            summary_data = self.get_page(summary_url, True)

            teams = fixture_data.find_all('div', {'class': 'side-images-row'})

            home_id = teams[0].find('a').get('onclick').split('/')[3].split("'")[0]
            away_id = teams[1].find('a').get('onclick').split('/')[3].split("'")[0]
            home_name = teams[0].find('img').get('alt').split(" (")[0]
            away_name = teams[1].find('img').get('alt').split(" (")[0]

            country_name = str(fixture_data.find('div', {'class': 'fleft'}).find_all('span')[1].text.split(":")[0]).lower().title()

            league_name = fixture_data.find('div', {'class': 'fleft'}).find_all('span')[1].text.split(":")[1].split(" -")[0].replace(" ", "", 1)
            league_url = "https://flashscore.com" + fixture_data.find('div', {'class': 'fleft'}).find('a').get('onclick').split("'")[1].split("'")[0]

            league_data = self.get_page(league_url)
            season = league_data.find('div', {'class': 'tournament-season'}).text

            country = Country.create(country_name)
            league = League.create(league_name, season, country)

            home = Team.create(home_name, league, home_id)
            away = Team.create(away_name, league, away_id)

            fh_goals_home = 0
            fh_goals_away = 0
            sh_goals_home = 0
            sh_goals_away = 0

            all_fields_populated = True
            if(len(summary_data.find_all('div', {'class': 'detailMS__incidentsHeader'})) > 1):
                fh_goals_home = int(summary_data.find_all('div', {'class': 'detailMS__incidentsHeader'})[0].find('span', {'class': 'p1_home'}).text)
                fh_goals_away = int(summary_data.find_all('div', {'class': 'detailMS__incidentsHeader'})[0].find('span', {'class': 'p1_away'}).text)

                sh_goals_home = int(summary_data.find_all('div', {'class': 'detailMS__incidentsHeader'})[1].find('span', {'class': 'p2_home'}).text)
                sh_goals_away = int(summary_data.find_all('div', {'class': 'detailMS__incidentsHeader'})[1].find('span', {'class': 'p2_away'}).text)

                total_goals_home = fh_goals_home + sh_goals_home
                total_goals_away = fh_goals_away + sh_goals_away

            else:
                match_result = fixture_data.find('div', {'id': "event_detail_current_result"}).find_all('span', {'class': 'scoreboard'})
                total_goals_home = int(match_result[0].text)
                total_goals_away = int(match_result[1].text)
                all_fields_populated = False

            total_goals = total_goals_home + total_goals_away

    def analyse_schedule(self, fixture):
        gevent.sleep(10)
        base_url = "https://flashscore.com/match/{}".format(fixture)
        fixture_data = self.get_page(base_url)

        date = datetime.utcfromtimestamp(int(fixture_data.find_all('script')[9].text.split('= ')[8].split(";")[0])) + timedelta(hours=2)

        country_name = str(fixture_data.find('div', {'class': 'fleft'}).find_all('span')[1].text.split(":")[0]).lower().title()

        teams = fixture_data.find_all('div', {'class': 'side-images-row'})
        home_id = teams[0].find('a').get('onclick').split('/')[3].split("'")[0]
        away_id = teams[1].find('a').get('onclick').split('/')[3].split("'")[0]

        league_name = fixture_data.find('div', {'class': 'fleft'}).find_all('span')[1].text.split(":")[1].split(" -")[0].replace(" ", "", 1)
        league_url = "https://flashscore.com" + fixture_data.find('div', {'class': 'fleft'}).find('a').get('onclick').split("'")[1].split("'")[0]

        league_data = self.get_page(league_url)
        season = league_data.find('div', {'class': 'tournament-season'}).text

        country = Country.create(name=country_name)
        league = League.create(name=league_name, season=season, country=country)

        home_name = teams[0].find('img').get('alt').split(" (")[0]
        away_name = teams[1].find('img').get('alt').split(" (")[0]

        home = Team.create(name=home_name, league=league, flashscore_id=home_id)
        away = Team.create(name=away_name, league=league, flashscore_id=away_id)            

        new_fixture = Fixture(home=home, away=away, date=date, league=league, flashscore_id=fixture)
        new_fixture.save()

        h2h_url = "https://d.flashscore.com/x/feed/d_hh_" + fixture + "_en_1"
        h2h_data = self.get_page(h2h_url, True)
        h2h_list = h2h_data.find_all('div', {'class': 'h2h-wrapper'})

        home_overal = h2h_list[0]
        away_overal = h2h_list[1]
        h2h = h2h_list[2]

        home_matches = home_overal.find('table').find('tbody').find_all('tr')
        away_matches = away_overal.find('table').find('tbody').find_all('tr')
        home_home_matches = h2h_data.find('div', {'id': 'tab-h2h-home'}).find('table').find('tbody').find_all('tr')
        away_away_matches = h2h_data.find('div', {'id': 'tab-h2h-away'}).find('table').find('tbody').find_all('tr')

        if(len(home_matches) > 10 and len(away_matches) > 10):
            home_overal_details_process = gevent.spawn(self.create_related_fixtures, new_fixture, home_matches)
            away_overal_details_process = gevent.spawn(self.create_related_fixtures, new_fixture, away_matches)

        if(len(home_home_matches) > 10 and len(away_away_matches) > 10):
            home_home_details_process = gevent.spawn(self.create_related_fixtures, new_fixture, home_home_matches)
            away_away_details_process = gevent.spawn(self.create_related_fixtures, new_fixture, away_away_matches)

    def handle(self, *args, **options):
        schedule_data = self.get_page("https://d.flashscore.com/x/feed/f_1_3_2_en_1", True)
        schedule_fixtures = self.find_ids(schedule_data.text)

        list(pool.imap_unordered(self.analyse_schedule, schedule_fixtures))

        self.stdout.write(self.style.SUCCESS('Successfully run command'))

Но когда я запускаю эту программу, время от времени я получаю следующую ошибку:

requests.exceptions.ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

Сначала я попытался решить эту проблему, добавив gevent.sleep(3)к каждой петле, но это не сработало.Затем я попытался увеличить значение gevent.sleep, но это также не работает.Исправление обезьян производится в manage.py (я использую Django)

Может кто-нибудь помочь мне избежать этой проблемы?

...