Question

Я хочу сканировать веб-приложение (ASP.NET), для которого требуется логин.

URL, с которого я хочу сканировать данные: https://wo.gruposese.com/mod_Clientes/Pedido

URL логина: https://wo.gruposese.com/Login.aspx?ReturnUrl=%2fDefault.aspx

Я попробовал следующий код (я использую прокси, но в этом коде я пропускаю часть прокси, потому что она работает нормально, что означает, что я могу получить простой HTML-код с веб-сайта, но просто не могу войти - так что не должно быть проблемы):

import requests
from lxml import html

username = "I-am-a-username"
password = "I-am-a-password"
url = "https://wo.gruposese.com/Login.aspx?ReturnUrl=%2fDefault.aspx"

session_requests = requests.session()
result = session_requests.get(url)

tree = html.fromstring(result.text)
auth_key = list(set(tree.xpath("//input[@name='__VIEWSTATE']/@value")))[0]

payload = {
    "LoginControl$UserName": username,
    "LoginControl$Password": password,
    "__VIEWSTATE": auth_key
}

session_requests.get(url, data=payload, headers=dict(referer=url))

Однако я получаю, казалось бы, бесконечное сообщение об ошибке:

---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
C:\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
    158             conn = connection.create_connection(
--> 159                 (self._dns_host, self.port), self.timeout, **extra_kw)
    160 

C:\Anaconda3\lib\site-packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
     56 
---> 57     for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
     58         af, socktype, proto, canonname, sa = res

C:\Anaconda3\lib\socket.py in getaddrinfo(host, port, family, type, proto, flags)
    747     addrlist = []
--> 748     for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
    749         af, socktype, proto, canonname, sa = res

gaierror: [Errno 11001] getaddrinfo failed

During handling of the above exception, another exception occurred:

NewConnectionError                        Traceback (most recent call last)
C:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    599                                                   body=body, headers=headers,
--> 600                                                   chunked=chunked)
    601 

C:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    342         try:
--> 343             self._validate_conn(conn)
    344         except (SocketTimeout, BaseSSLError) as e:

C:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in _validate_conn(self, conn)
    838         if not getattr(conn, 'sock', None):  # AppEngine might not have  `.sock`
--> 839             conn.connect()
    840 

C:\Anaconda3\lib\site-packages\urllib3\connection.py in connect(self)
    300         # Add certificate verification
--> 301         conn = self._new_conn()
    302         hostname = self.host

C:\Anaconda3\lib\site-packages\urllib3\connection.py in _new_conn(self)
    167             raise NewConnectionError(
--> 168                 self, "Failed to establish a new connection: %s" % e)
    169 

NewConnectionError: <urllib3.connection.VerifiedHTTPSConnection object at 0x00000006B0B48470>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed

During handling of the above exception, another exception occurred:

MaxRetryError                             Traceback (most recent call last)
C:\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    448                     retries=self.max_retries,
--> 449                     timeout=timeout
    450                 )

C:\Anaconda3\lib\site-packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    637             retries = retries.increment(method, url, error=e, _pool=self,
--> 638                                         _stacktrace=sys.exc_info()[2])
    639             retries.sleep()

C:\Anaconda3\lib\site-packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
    397         if new_retry.is_exhausted():
--> 398             raise MaxRetryError(_pool, url, error or ResponseError(cause))
    399 

MaxRetryError: HTTPSConnectionPool(host='wo.gruposese.com', port=443): Max retries exceeded with url: /Login.aspx?ReturnUrl=%2fDefault.aspx (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x00000006B0B48470>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
<ipython-input-1-f6691b6c307b> in <module>
     23 
     24 
---> 25 session_requests.get(url, data=payload, headers=dict(referer=url))

C:\Anaconda3\lib\site-packages\requests\sessions.py in get(self, url, **kwargs)
    544 
    545         kwargs.setdefault('allow_redirects', True)
--> 546         return self.request('GET', url, **kwargs)
    547 
    548     def options(self, url, **kwargs):

C:\Anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    531         }
    532         send_kwargs.update(settings)
--> 533         resp = self.send(prep, **send_kwargs)
    534 
    535         return resp

C:\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
    644 
    645         # Send the request
--> 646         r = adapter.send(request, **kwargs)
    647 
    648         # Total elapsed time of the request (approximately)

C:\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    514                 raise SSLError(e, request=request)
    515 
--> 516             raise ConnectionError(e, request=request)
    517 
    518         except ClosedPoolError as e:

ConnectionError: HTTPSConnectionPool(host='wo.gruposese.com', port=443): Max retries exceeded with url: /Login.aspx?ReturnUrl=%2fDefault.aspx (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x00000006B0B48470>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))

У меня два вопроса:

Я явно что-то здесь не так, но я уже столько старался, и у меня ничего не работает. Обычно я предполагаю наличие токена csrf для использования в словаре полезной нагрузки - но на этом веб-сайте его нет ... Я не уверен, является ли проблема "__VIEWSTATE" здесь.
Кроме того, мне интересно, как я могу продолжить после входа на реальную веб-страницу (см. URL-адрес выше) в этом случае ASP.net и сканировать данные оттуда?

P.S. Мне не разрешено устанавливать Selenium, и, следовательно, я должен решить проблему с помощью запросов или проверки.

Сканирование веб-приложения (ASP.net) с логином

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Сканирование веб-приложения (ASP.net) с логином

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Похожие темы