Запрос POST с использованием scrapy.FormRequest - PullRequest
0 голосов
/ 14 июля 2020

Мне нужно получить данные из v2?count=3 со страницы https://support.hpe.com/hpesc/public/km/Security-Bulletin-Library#sort=relevancy&layout=table&numberOfResults=25&f:@kmdocsecuritybulletin= [4000003] &f:@kmdoclanguagecode= [cv1871440, cv1871463] & hpe = 1

Данные Мне нужно показано на изображении

class HPUXSpider(_BaseSpider):
    name = 'hp_ux_spider'

    def start_requests(self):
        return [scrapy.FormRequest(
            url='https://platform.cloud.coveo.com/rest/search/v2?count=3',
            method='POST',
            formdata={
                'actionsHistory': r'[{"name":"Query","time":"\"2020-07-13T12:49:51.480Z\""},{"name":"Query","time":"\"2020-07-13T10:44:35.303Z\""},{"name":"Query","time":"\"2020-07-13T07:49:10.078Z\""},{"name":"Query","time":"\"2020-07-13T06:58:59.532Z\""},{"name":"Query","time":"\"2020-07-13T06:57:24.599Z\""},{"name":"Query","time":"\"2020-07-12T21:47:41.323Z\""},{"name":"Query","time":"\"2020-07-12T16:38:19.741Z\""},{"name":"Query","time":"\"2020-07-12T06:04:36.049Z\""},{"name":"Query","time":"\"2020-07-12T05:59:39.814Z\""},{"name":"Query","time":"\"2020-07-11T19:31:55.963Z\""},{"name":"Query","time":"\"2020-07-11T19:29:55.997Z\""},{"name":"Query","time":"\"2020-07-11T19:23:29.999Z\""},{"name":"Query","time":"\"2020-07-11T19:21:09.859Z\""},{"name":"Query","time":"\"2020-07-11T19:19:03.748Z\""},{"name":"Query","time":"\"2020-07-11T19:17:23.735Z\""},{"name":"Query","time":"\"2020-07-11T19:14:51.152Z\""},{"name":"Query","time":"\"2020-07-11T18:54:03.418Z\""},{"name":"Query","time":"\"2020-07-11T12:28:39.484Z\""},{"name":"Query","time":"\"2020-07-10T13:08:42.876Z\""},{"name":"Query","time":"\"2020-07-10T12:57:51.285Z\""}]',
                'referrer': 'https://support.hpe.com/hpesc/public/km/Security-Bulletin-Library',
                'visitorId': '33b0ede7-3274-486f-a31c-23ed3001ad91',
                'isGuestUser': 'false',
                'aq': '(@kmdoctypedetails==cv66000018) ((NOT @kmdoctype=cv60000001)) (@kmdocsecuritybulletin==4000003) (@kmdoclanguagecode==(cv1871440,cv1871463))',
                'cq': '(@source=="cdp-km-document-pro-h4-v2")',
                'searchHub': 'HPE-SecurityBulletins-Page',
                'locale': 'ru',
                'firstResult': '0',
                'numberOfResults': '25',
                'excerptLength': '500',
                'enableDidYouMean': 'true',
                'sortCriteria': 'relevancy',
                'queryFunctions': '[]',
                'rankingFunctions': '[]',
                'groupBy': r'[{"field":"@kmdocsecuritybulletin","maximumNumberOfValues":20,"sortCriteria":"nosort","injectionDepth":1000,"completeFacetWithStandardValues":true,"allowedValues":["4000019","4000018","4000005","4000004","4000017","4000003","4000009","4000006","4000007","4000008","4000001","4000002","4000010","4000011","4000012","4000013","4000014","4000015","4000016"],"advancedQueryOverride":"(@kmdoctypedetails==cv66000018) ((NOT @kmdoctype=cv60000001)) (@kmdoclanguagecode==(cv1871440,cv1871463))","constantQueryOverride":"(@source==\"cdp-km-document-pro-h4-v2\")"},{"field":"@kmdoclanguagecode","maximumNumberOfValues":6,"sortCriteria":"Score","injectionDepth":1000,"completeFacetWithStandardValues":true,"allowedValues":["cv1871440","cv1871463"],"advancedQueryOverride":"(@kmdoctypedetails==cv66000018) ((NOT @kmdoctype=cv60000001)) (@kmdocsecuritybulletin==4000003)","constantQueryOverride":"(@source==\"cdp-km-document-pro-h4-v2\")"},{"field":"@kmdoctopissue","maximumNumberOfValues":6,"sortCriteria":"Score","injectionDepth":1000,"completeFacetWithStandardValues":true,"allowedValues":[],"advancedQueryOverride":"(@kmdoctypedetails==cv66000018) ((NOT @kmdoctype=cv60000001)) (@kmdocsecuritybulletin==4000003) (@kmdoclanguagecode==(cv1871440,cv1871463))","constantQueryOverride":"(@source==\"cdp-km-document-pro-h4-v2\") @kmdoctopissueexpirationdate>today"},{"field":"@kmdocdisclosurelevel","maximumNumberOfValues":6,"sortCriteria":"Score","injectionDepth":1000,"completeFacetWithStandardValues":true,"allowedValues":[]},{"field":"@hpescuniversaldate","completeFacetWithStandardValues":true,"maximumNumberOfValues":1,"sortCriteria":"nosort","generateAutomaticRanges":true,"advancedQueryOverride":"(@kmdoctypedetails==cv66000018) ((NOT @kmdoctype=cv60000001)) (@kmdocsecuritybulletin==4000003) (@kmdoclanguagecode==(cv1871440,cv1871463)) @uri","constantQueryOverride":"(@source==\"cdp-km-document-pro-h4-v2\") @hpescuniversaldate>1970/01/01@00:00:00"},{"field":"@hpescuniversaldate","completeFacetWithStandardValues":true,"maximumNumberOfValues":1,"sortCriteria":"nosort","generateAutomaticRanges":true,"constantQueryOverride":"(@source==\"cdp-km-document-pro-h4-v2\") @hpescuniversaldate>1970/01/01@00:00:00 @hpescuniversaldate>1970/01/01@00:00:00"},{"field":"@hpescuniversaldate","maximumNumberOfValues":5,"sortCriteria":"nosort","injectionDepth":1000,"completeFacetWithStandardValues":true,"rangeValues":[{"start":"1900-01-31T18:20:09.000Z","end":"2020-07-13T17:00:00.000Z","label":"All dates","endInclusive":false},{"start":"2020-07-05T17:00:00.000Z","end":"2020-07-13T17:00:00.000Z","label":"Last 7 days","endInclusive":false},{"start":"2020-06-12T17:00:00.000Z","end":"2020-07-13T17:00:00.000Z","label":"Last 30 days","endInclusive":false},{"start":"2020-05-13T17:00:00.000Z","end":"2020-07-13T17:00:00.000Z","label":"Last 60 days","endInclusive":false},{"start":"2020-04-13T17:00:00.000Z","end":"2020-07-12T17:00:00.000Z","label":"Last 90 days","endInclusive":false}]}]',
                'facetOptions': '{}',
                'categoryFacets': '[]',
                'retrieveFirstSentences': 'true',
                'timezone': 'Asia/Tomsk',
                'enableQuerySyntax': 'false',
                'enableDuplicateFiltering': 'false',
                'enableCollaborativeRating': 'false',
                'debug': 'false',
                'context': '{"tracking_id":"HPESCXwxYkRD5BgcAAFnGlJ0AAAAY","active_features":"DCS,DHFWS,SA2,patchCoveoSearchToggle,sa2_product_focus_target_levels_toggle,toggleCsr,toggleSecBulletin","user_tracking_id":"XwRimRD5AcgAAFl2OMkAAAAW"}',
                'allowQueriesWithoutKeywords': 'true',
            },
            callback=self.save_response,
            cb_kwargs=dict(path_dir=DATA_DIR, file_name='1.json')
        ) ]

Журнал

2020-07-14 07:17:33 [scrapy.core.engine] INFO: Spider opened
2020-07-14 07:17:33 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
2020-07-14 07:17:33 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
2020-07-14 07:17:34 [scrapy_user_agents.middlewares] DEBUG: Assigned User-Agent Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36
2020-07-14 07:17:34 [scrapy.core.engine] DEBUG: Crawled (401) <POST https://platform.cloud.coveo.com/rest/search/v2?count=3> (referer: None)
2020-07-14 07:17:34 [scrapy.spidermiddlewares.httperror] INFO: Ignoring response <401 https://platform.cloud.coveo.com/rest/search/v2?count=3>: HTTP status code is not handled or not allowed
2020-07-14 07:17:34 [scrapy.core.engine] INFO: Closing spider (finished)
2020-07-14 07:17:34 [scrapy.statscollectors] INFO: Dumping Scrapy stats:

Что я делаю не так?

Traceback1

Traceback (most recent call last):
  File "/usr/local/lib/python3.7/site-packages/scrapy/crawler.py", line 192, in crawl
    return self._crawl(crawler, *args, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/scrapy/crawler.py", line 196, in _crawl
    d = crawler.crawl(*args, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/twisted/internet/defer.py", line 1613, in unwindGenerator
    return _cancellableInlineCallbacks(gen)
  File "/usr/local/lib/python3.7/site-packages/twisted/internet/defer.py", line 1529, in _cancellableInlineCallbacks
    _inlineCallbacks(None, g, status)
--- <exception caught here> ---
  File "/usr/local/lib/python3.7/site-packages/twisted/internet/defer.py", line 1418, in _inlineCallbacks
    result = g.send(result)
  File "/usr/local/lib/python3.7/site-packages/scrapy/crawler.py", line 88, in crawl
    start_requests = iter(self.spider.start_requests())
  File "/code/hp_ux/splash/spiders/hp_ux_spider.py", line 50, in start_requests
    cb_kwargs=dict(path_dir=DATA_DIR, file_name='1.json')
  File "/usr/local/lib/python3.7/site-packages/scrapy/http/request/form.py", line 27, in __init__
    super(FormRequest, self).__init__(*args, **kwargs)
builtins.TypeError: __init__() got an unexpected keyword argument 'params'

2020-07-14 11:32:04 [twisted] CRITICAL: 
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/site-packages/twisted/internet/defer.py", line 1418, in _inlineCallbacks
    result = g.send(result)
  File "/usr/local/lib/python3.7/site-packages/scrapy/crawler.py", line 88, in crawl
    start_requests = iter(self.spider.start_requests())
  File "/code/hp_ux/splash/spiders/hp_ux_spider.py", line 50, in start_requests
    cb_kwargs=dict(path_dir=DATA_DIR, file_name='1.json')
  File "/usr/local/lib/python3.7/site-packages/scrapy/http/request/form.py", line 27, in __init__
    super(FormRequest, self).__init__(*args, **kwargs)
TypeError: __init__() got an unexpected keyword argument 'params'

1 Ответ

1 голос
/ 14 июля 2020

вы должны использовать заголовки с авторизацией для этого сайта:

    def parse(self, response):

        headers = {
            'Connection': 'keep-alive',
            'Authorization': 'Bearer eyJhbGciOiJIUzI1NiJ9.eyJwaXBlbGluZSI6ImNkcC1ocGVzYy1waXBlbGluZS1wcm8taDQtdjEyIiwidXNlckdyb3VwcyI6WyJMT0NBTF9QT1JUQUxfSFBQX1VTRVJTIiwiTE9DQUxfUE9SVEFMX0NPVU5UUllfVVMiLCJMT0NBTF9QT1JUQUxfTEFOR1VBR0VfRU4iLCJMT0NBTF9QT1JUQUxfQ09NUEFOWV9IUEUiLCJMT0NBTF9QT1JUQUxfR1VFU1RfVVNFUlMiXSwidjgiOnRydWUsIm9yZ2FuaXphdGlvbiI6Imhld2xldHRwYWNrYXJkcHJvZHVjdGlvbml3bWc5Yjl3IiwidXNlcklkcyI6W3sicHJvdmlkZXIiOiJFbWFpbCBTZWN1cml0eSBQcm92aWRlciIsIm5hbWUiOiJhbm9ueW1vdXNAY292ZW8uY29tIiwidHlwZSI6IlVzZXIifV0sInJvbGVzIjpbInF1ZXJ5RXhlY3V0b3IiXSwiZXhwIjoxNTk0ODEzODI0LCJpYXQiOjE1OTQ3Mjc0MjR9.O-SGmzsy2QdMClI9CfmN5MY9G1JBQmCe9m379zFpa4Y',
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36',
            'Content-Type': 'application/x-www-form-urlencoded; charset="UTF-8"',
            'Accept': '*/*',
            'Origin': 'https://support.hpe.com',
            'Sec-Fetch-Site': 'cross-site',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Dest': 'empty',
            'Referer': 'https://support.hpe.com/hpesc/public/km/Security-Bulletin-Library',
            'Accept-Language': 'en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7,uk;q=0.6,en-GB;q=0.5',
        }
        data = {
              'actionsHistory': '[{"name":"Query","time":"\\"2020-07-14T11:50:24.995Z\\""},{"name":"Query","time":"\\"2020-07-14T11:15:14.602Z\\""}]',
              'referrer': '',
              'visitorId': 'deabe929-cc0e-41eb-ab62-f62e40aca82a',
              'isGuestUser': 'false',
              'aq': '(@kmdoctypedetails==cv66000018) ((NOT @kmdoctype=cv60000001)) (@kmdocsecuritybulletin==4000003) (@kmdoclanguagecode==(cv1871440,cv1871463))',
              'cq': '(@source=="cdp-km-document-pro-h4-v2")',
              'searchHub': 'HPE-SecurityBulletins-Page',
              'locale': 'en',
              'firstResult': '25',
              'numberOfResults': '25',
              'excerptLength': '500',
              'enableDidYouMean': 'true',
              'sortCriteria': 'relevancy',
              'queryFunctions': '[]',
              'rankingFunctions': '[]',
              'groupBy': '[{"field":"@kmdocsecuritybulletin","maximumNumberOfValues":20,"sortCriteria":"nosort","injectionDepth":1000,"completeFacetWithStandardValues":true,"allowedValues":["4000019","4000018","4000005","4000004","4000017","4000003","4000009","4000006","4000007","4000008","4000001","4000002","4000010","4000011","4000012","4000013","4000014","4000015","4000016"],"advancedQueryOverride":"(@kmdoctypedetails==cv66000018) ((NOT @kmdoctype=cv60000001)) (@kmdoclanguagecode==(cv1871440,cv1871463))","constantQueryOverride":"(@source==\\"cdp-km-document-pro-h4-v2\\")"},{"field":"@kmdoclanguagecode","maximumNumberOfValues":6,"sortCriteria":"Score","injectionDepth":1000,"completeFacetWithStandardValues":true,"allowedValues":["cv1871440","cv1871463"],"advancedQueryOverride":"(@kmdoctypedetails==cv66000018) ((NOT @kmdoctype=cv60000001)) (@kmdocsecuritybulletin==4000003)","constantQueryOverride":"(@source==\\"cdp-km-document-pro-h4-v2\\")"},{"field":"@kmdoctopissue","maximumNumberOfValues":6,"sortCriteria":"Score","injectionDepth":1000,"completeFacetWithStandardValues":true,"allowedValues":[],"advancedQueryOverride":"(@kmdoctypedetails==cv66000018) ((NOT @kmdoctype=cv60000001)) (@kmdocsecuritybulletin==4000003) (@kmdoclanguagecode==(cv1871440,cv1871463))","constantQueryOverride":"(@source==\\"cdp-km-document-pro-h4-v2\\") @kmdoctopissueexpirationdate>today"},{"field":"@kmdocdisclosurelevel","maximumNumberOfValues":6,"sortCriteria":"Score","injectionDepth":1000,"completeFacetWithStandardValues":true,"allowedValues":[]},{"field":"@hpescuniversaldate","maximumNumberOfValues":5,"sortCriteria":"nosort","injectionDepth":1000,"completeFacetWithStandardValues":true,"rangeValues":[{"start":"1900-01-31T21:57:56.000Z","end":"2020-07-14T21:00:00.000Z","label":"All dates","endInclusive":false},{"start":"2020-07-06T21:00:00.000Z","end":"2020-07-14T21:00:00.000Z","label":"Last 7 days","endInclusive":false},{"start":"2020-06-13T21:00:00.000Z","end":"2020-07-14T21:00:00.000Z","label":"Last 30 days","endInclusive":false},{"start":"2020-05-14T21:00:00.000Z","end":"2020-07-14T21:00:00.000Z","label":"Last 60 days","endInclusive":false},{"start":"2020-04-14T21:00:00.000Z","end":"2020-07-13T21:00:00.000Z","label":"Last 90 days","endInclusive":false}]},{"field":"@hpescuniversaldate","completeFacetWithStandardValues":true,"maximumNumberOfValues":1,"sortCriteria":"nosort","generateAutomaticRanges":true,"advancedQueryOverride":"(@kmdoctypedetails==cv66000018) ((NOT @kmdoctype=cv60000001)) (@kmdocsecuritybulletin==4000003) (@kmdoclanguagecode==(cv1871440,cv1871463)) @uri","constantQueryOverride":"(@source==\\"cdp-km-document-pro-h4-v2\\") @hpescuniversaldate>1970/01/01@00:00:00"},{"field":"@hpescuniversaldate","completeFacetWithStandardValues":true,"maximumNumberOfValues":1,"sortCriteria":"nosort","generateAutomaticRanges":true,"constantQueryOverride":"(@source==\\"cdp-km-document-pro-h4-v2\\") @hpescuniversaldate>1970/01/01@00:00:00 @hpescuniversaldate>1970/01/01@00:00:00"}]',
              'facetOptions': '{}',
              'categoryFacets': '[]',
              'retrieveFirstSentences': 'true',
              'timezone': 'Europe/Kiev',
              'enableQuerySyntax': 'false',
              'enableDuplicateFiltering': 'false',
              'enableCollaborativeRating': 'false',
              'debug': 'false',
              'context': '{"tracking_id":"HPESCXw2cKBD5AcgAADvUM8IAAAAa","active_features":"DCS,DHFWS,SA2,patchCoveoSearchToggle,sa2_product_focus_target_levels_toggle,toggleCsr,toggleSecBulletin","user_tracking_id":"Xw2TthD5AcgAACecWi0AAAAZ"}',
              'allowQueriesWithoutKeywords': 'true'
        }

        url = 'https://platform.cloud.coveo.com/rest/search/v2?count=3'
        yield scrapy.FormRequest(
                url=url,
                formdata=data,
                headers=headers,
                callback=self.parse_result
            )

    def parse_result(self, response):
        j_obj = json.loads(response.body_as_unicode())
        print(j_obj)
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...