Невозможно войти с python запросами, возвращаясь на страницу входа - PullRequest
0 голосов
/ 11 января 2020

Я пытаюсь войти в систему с requests. Я уже пытался получить все заголовки запроса, а также пытался сначала отправить команду post, но все равно не повезло. Это страница, на которой я хотел бы войти: https://app.textmaster.com/sign_in

мой полный код выглядит следующим образом:

import bs4
import csv
import requests
import io

class Scraper:

    USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.19 16.47 Safari/537.36'
    BASE_URL = 'https://app.textmaster.com/'
    HEADERS = {
    'authority': 'app.textmaster.com',
    'method': 'GET',
    'path': '/sign_in',
    'scheme': 'https',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    # 'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7,de-DE;q=0.6,de;q=0.5,es;q=0.4',
    'cache-control': 'max-age=0',
    # 'cookie': '__cfduid=d7f44a0609d068787e651f2b02361e98f1559677645; tm_user_subdomain=eu; _ga=GA1.2.750647803.1559677650; _fbp=fb.1.1559677650541.1238343741; __adroll_fpc=a0b53df5cec0fdb37a2333eb600de462-s2-1559677651144; _ga=GA1.3.750647803.1559677650; first_page_url=%252F; referrer_url=https%253A%252F%252Fwww.google.com%252F; hubspotutk=c83690f9decc2a43ad5dbf93dc65d7f0; _gid=GA1.2.1337788018.1578269194; __hssrc=1; __hstc=16116521.c83690f9decc2a43ad5dbf93dc65d7f0.1561982007244.1578677034723.1578744030017.61; __hssc=16116521.1.1578744030017; locale=pt-PT; arp_scroll_position=0; tm_login_status=logged+in; _TextMaster.com_session=NW56OVNXZjlXSHE1TXIrQXNGUWJTWWtrQnFUaW5oZjFNWGcxYUQ3dDJuTXAzelo2c0VzWDVTTW0yNXVwNkNzeTRJSko1WHBuSUdrc1ZBeC9BQzZpYko5RC8rT1BBT3pOL3k0Yk5RMFFLZXFQQjZrSUtCUjF4U21vWElKemRBdWp0ZUpRUElmeVdVUzBWdVBKbjhJMjJHeGx0Z1BHaFFCT25IeThXejFnVUs3aDhWRFlCZDIzT091TGVwTUo4Rno2WlFETFhPelNvUC84Tk5MRmxWcm05ZGN4VXozaFhhdUQrOC90VVNSSUk2SFF4MFVoUEo4dFNPSmwzRGNqK3dIc3hkSnN6dm9RYTFDc3RWckJwczlIc3Y2RTg5L1BwN2s3eXV4d2xYZXpiZzlVREJybldVYUNZaFZYME93Wm9qaFNQQ1d0c2hXR0IrbVJuOTFIWW1NQVdLR1h6RlNkT3I4ZDlNWVE4K2lRMmV3PS0tR2pzYjk0TlZxaTZHcVlKdGxBN3BEdz09--5d82d7d0acc92d74c20225b2f7aac55d0beaba13; __ar_v4=FZ7ISAP6XFCWVCHSI4YDBZ%3A20200030%3A39012%7CXGGEFNKZ6JE3BKC22JQJST%3A20200030%3A39012%7CGPAMRTN4ONFDDBPFKNYYUL%3A20200030%3A39012',
    'referer': 'https://app.textmaster.com/authors/dashboard',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
}
    def __init__(self):
        # if there is any error, will be stored here
        self.errors = []

        self.session = requests.session()
        self.authenticated = self.authenticate()
    def request(self, url: str) -> bs4.BeautifulSoup:
        '''
            get a page and return a bs object
        '''
        res = self.session.get(url, headers=self.HEADERS)
        html = Scraper.make_soup(res.text)
        return html
    def authenticate(self) -> bool:
        LOGIN_URL = self.BASE_URL + 'sign_in'
        # enter in the authentation page
        soup = self.request(LOGIN_URL)
        # gets the authenticity token from the login page
        # it's necesary to perform the login
        input_token = soup.find(
            'input', {'name': 'authenticity_token',  'type': 'hidden'}
        )
        # if the token was not found, add the error
        # and conclude the authentation
        if not input_token:
            self.errors.append('Could\'t get the authentation token')
            return False

        # get the token from the page
        token = input_token.attrs['value']
        email = '********'
        passw = '*******'

        # using the user, password and token, perform the authentation
        payload = {
            'utf8': '',
            'authenticity_token': token,
            'user[email]': email,
            'user[password]': passw,
            'commit': 'Log in'
        }

        # send the payload
        rest = self.session.get(LOGIN_URL, headers=self.HEADERS, data=payload, stream=True) ; testing if get post fixes
        res = self.session.post(LOGIN_URL, headers=self.HEADERS, data=payload)
        print(res.url)
        # successful authentation will redirect to /authors/dashboard
        authenticated = '/authors/dashboard' in res.url
        return authenticated
    def extract_data(self, soup: bs4.BeautifulSoup) -> dict:

        question_items = soup.find_all('div', {'class': 'q-question-item'})
        results = []

        for question_item in question_items:
            question_info = question_item.find(
                'div', {'class': 'q-question-info'})
            question_ano = question_info.find_all('span')[0]
            question_banca = question_info.find_all('span')[1]
            question_enunciation = question_item.find(
                'div', {'class': 'q-question-enunciation'})

            question_ano_text = question_ano.text.replace('Ano:', '').strip()
            question_banca_text = question_banca.text.replace(
                'Banca:', '').strip()
            question_enunciation_text = question_enunciation.text

            row_data = {
                'ano': question_ano_text,
                'banca': question_banca_text,
                'enunciation': question_enunciation_text,
            }

            letters = ('', 'a', 'b', 'c', 'd', 'e')
            for i in range(5):
                row_data['choice_' + letters[i + 1]] = ''

            question_options = question_item.find(
                'ul', {'class': 'q-question-options'}).find_all('li')

            if len(question_options) == 5:

                for i in range(5):
                    enum = question_options[i].find(
                        'div', {'class': 'q-item-enum'})
                    row_data['choice_' + letters[i + 1]
                             ] = enum.text if enum is not None else ''

                row_data['type'] = 'multiple'
            elif len(question_options) == 2:
                row_data['type'] = 'true-false'
            else:
                row_data['type'] = 'N/A'

            results.append(row_data)

        return results

    @staticmethod
    def make_soup(html: str) -> bs4.BeautifulSoup:
        return bs4.BeautifulSoup(html, 'html.parser')
if __name__ == "__main__":
    scraper = Scraper()

Если это основа c вопрос, не могли бы вы рассказать мне, как я могу продолжить изучение этой проблемы?

Спасибо!

...