Очистка Google с помощью recaptcha плагина puppeteer extra - PullRequest
0 голосов
/ 27 мая 2020

Я использую узел и кукловод с плагином для решения рекапчи в результатах поиска Google.

До вчерашнего дня он работал как шарм. Google reCaptcha решен, но он не перенаправляет на отображение страницы результатов поиска после решения капчи, как это было сделано до того, как начались проблемы.

Я заметил, что теперь Google reCaptcha обслуживается с кадром, а рекапча дополнительного плагина кукловода потенциально может это сделать как здесь сказано: https://www.npmjs.com/package/puppeteer-extra-plugin-recaptcha

Мой код:

// puppeteer-extra is a drop-in replacement for puppeteer,
// it augments the installed puppeteer with plugin functionality
const puppeteer = require('puppeteer-extra')

// add recaptcha plugin and provide it your 2captcha token (= their apiKey)
// 2captcha is the builtin solution provider but others would work as well.
// Please note: You need to add funds to your 2captcha account for this to work
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha')
puppeteer.use(
  RecaptchaPlugin({
    provider: {
      id: '2captcha',
      token: 'd4e...' // REPLACE THIS WITH YOUR OWN 2CAPTCHA API KEY ⚡
    },
    visualFeedback: true // colorize reCAPTCHAs (violet = detected, green = solved)
  })
)

// puppeteer usage as normal
puppeteer.launch({ headless: true }).then(async browser => {
  const page = await browser.newPage()
  //await page.goto('https://www.google.com/recaptcha/api2/demo')
  await page.goto('https://www.google.com/search?q=google&oq=google&aqs=chrome..69i57j0l5j69i60l2.735j0j7&sourceid=chrome&ie=UTF-8')

  // That's it, a single line of code to solve reCAPTCHAs ?
  await page.solveRecaptchas()

  await Promise.all([
    page.waitForNavigation()
  ])
  let bodyHTML = await page.evaluate(() => document.body.innerHTML);
  console.log(bodyHTML)
  await page.screenshot({ path: 'response.png', fullPage: true })
  await browser.close()
})

, а вот отладка:

seo@serv:~/google$ DEBUG=puppeteer-extra,puppeteer-extra-plugin:* node test.js

  puppeteer-extra-plugin:base:recaptcha Initialized. +0ms
  puppeteer-extra-plugin:recaptcha Initialized {
  visualFeedback: true,
  throwOnError: false,
  provider: { id: '2captcha', token: 'd4e...' }
} +0ms
  puppeteer-extra plugin registered recaptcha +0ms
  puppeteer-extra no dependencies are missing +1ms
  puppeteer-extra orderPlugins:before [ 'recaptcha' ] +0ms
  puppeteer-extra orderPlugins:after [ 'recaptcha' ] +0ms
  puppeteer-extra-plugin:recaptcha onPageCreated about:blank +0ms
  puppeteer-extra-plugin:recaptcha solveRecaptchas +0ms
  puppeteer-extra-plugin:recaptcha findRecaptchas +0ms
  puppeteer-extra-plugin:recaptcha hasRecaptchaScriptTag true +0ms
  puppeteer-extra-plugin:recaptcha waitForRecaptchaClient - start 2020-05-23T22:04:15.081Z +0ms
  puppeteer-extra-plugin:recaptcha waitForRecaptchaClient - end 2020-05-23T22:04:15.091Z +0ms
  puppeteer-extra-plugin:recaptcha _generateContentScript findRecaptchas undefined +0ms
  puppeteer-extra-plugin:recaptcha findRecaptchas {
  captchas: [
    {
      sitekey: '6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b',
      callback: 'submitCallback',
      id: 'oda83noeflau',
      widgetId: 0,
      display: [Object],
      url: 'https://www.google.com/sorry/index?continue=https://www.google.com/search%3Fq%3Dklima%26oq%3Dgoogle%26aqs%3Dchrome..69i57j0l5j69i60l2.735j0j7%26sourceid%3Dchrome%26ie%3DUTF-8&q=EgS8pdi4GN67pvYFIhkA8aeDS9yQqQWq3BMCa6xUSD_F653Fxcx7MgFy',
      hasResponseElement: true
    }
  ],
  error: null
} +0ms
  puppeteer-extra-plugin:recaptcha getRecaptchaSolutions +0ms
  puppeteer-extra-plugin:recaptcha:2captcha Requesting solution.. {
  provider: '2captcha',
  id: 'oda83noeflau',
  requestAt: 2020-05-23T22:04:15.100Z
} +0ms
  puppeteer-extra-plugin:recaptcha:2captcha Got response {
  err: null,
  result: {
    id: '63956086379',
    text: '03AGdBq26iX0JWbJGTu1Bbl98SH2ZQQxAsLELiNrf9Iz14ORUZPvdjRgNPuF2dgpERJb1ylQqQyw4dSDs8K-UruIlaeE8K9c064cCHYOeuwvggqV91bzYkZDbX39NUToTr7UDgO0LOTts803ELdwNxp5FRZutEqpwK4aTjzX9u8ROXJ1-v4ozbJofb38-zVnhm1eftXYpLxllJpXonswmha5GJRPIwPjKv4RZCdz6xTQRN1wtOPX4emUPBj2qadsKpTg633JV46sh2c66vT7Syb4BSFajNFfR_GbbFdPlnDcd5_E4gg3kV58c'
  },
  invalid: [Function: bound ]
} +2s
  puppeteer-extra-plugin:recaptcha getRecaptchaSolutions {
  solutions: [
    {
      provider: '2captcha',
      id: 'oda83noeflau',
      requestAt: 2020-05-23T22:04:15.100Z,
      providerCaptchaId: '63956086379',
      text: '03AGdBq26iX0JWbJGTu1Bbl98SH2ZQQxAsLELiNrf9Iz14ORUZPvdjRgNPuF2dgpERJb1ylQqQyw4dSDs8K-UruIlaeE8K9c064cCHYOeuwvggqV91bzYkZDbX39NUToTr7UDgO0LOTts803ELdwNxp5FRZutEqpwK4aTjzX9u8ROXJ1-v4ozbJofb38-zVnhm1eftXYpLxllJpXonswmha5GJRPIwPjKv4RZCdz6xTQRN1wtOPX4emUPBj2qadsKpTg633JV46sh2c66vT7Syb4BSFajNFfR_GbbFdPlnDcd5_E4gg3kV58c',
      responseAt: 2020-05-23T22:04:17.370Z,
      hasSolution: true,
      duration: 2.27
    }
  ],
  error: undefined
} +0ms
  puppeteer-extra-plugin:recaptcha enterRecaptchaSolutions +0ms
  puppeteer-extra-plugin:recaptcha _generateContentScript enterRecaptchaSolutions {
  solutions: [
    {
      provider: '2captcha',
      id: 'oda83noeflau',
      requestAt: 2020-05-23T22:04:15.100Z,
      providerCaptchaId: '63956086379',
      text: '03AGdBq26iX0JWbJGTu1Bbl98SH2ZQQxAsLELiNrf9Iz14ORUZPvdjRgNPuF2dgpERJb1ylQqQyw4dSDs8K-UruIlaeE8K9c064cCHYOeuwvggqV91bzYkZDbX39NUToTr7UDgO0LOTts803ELdwNxp5FRZutEqpwK4aTjzX9u8ROXJ1-v4ozbJofb38-zVnhm1eftXYpLxllJpXonswmha5GJRPIwPjKv4RZCdz6xTQRN1wtOPX4emUPBj2qadsKpTg633JV46sh2c66vT7Syb4BSFajNFfR_GbbFdPlnDcd5_E4gg3kV58c',
      responseAt: 2020-05-23T22:04:17.370Z,
      hasSolution: true,
      duration: 2.27
    }
  ]
} +0ms
  puppeteer-extra-plugin:recaptcha enterRecaptchaSolutions {
  solved: [
    {
      id: 'oda83noeflau',
      responseElement: true,
      responseCallback: true,
      isSolved: true,
      solvedAt: {}
    }
  ],
  error: undefined
} +0ms
  puppeteer-extra-plugin:recaptcha solveRecaptchas {
  captchas: [
    {
      sitekey: '6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b',
      callback: 'submitCallback',
      id: 'oda83noeflau',
      widgetId: 0,
      display: [Object],
      url: 'https://www.google.com/sorry/index?continue=https://www.google.com/search%3Fq%3Dklima%26oq%3Dgoogle%26aqs%3Dchrome..69i57j0l5j69i60l2.735j0j7%26sourceid%3Dchrome%26ie%3DUTF-8&q=EgS8pdi4GN67pvYFIhkA8aeDS9yQqQWq3BMCa6xUSD_F653Fxcx7MgFy',
      hasResponseElement: true
    }
  ],
  solutions: [
    {
      provider: '2captcha',
      id: 'oda83noeflau',
      requestAt: 2020-05-23T22:04:15.100Z,
      providerCaptchaId: '63956086379',
      text: '03AGdBq26iX0JWbJGTu1Bbl98SH2ZQQxAsLELiNrf9Iz14ORUZPvdjRgNPuF2dgpERJb1ylQqQyw4dSDs8K-UruIlaeE8K9c064cCHYOeuwvggqV91bzYkZDbX39NUToTr7UDgO0LOTts803ELdwNxp5FRZutEqpwK4aTjzX9u8ROXJ1-v4ozbJofb38-zVnhm1eftXYpLxllJpXonswmha5GJRPIwPjKv4RZCdz6xTQRN1wtOPX4emUPBj2qadsKpTg633JV46sh2c66vT7Syb4BSFajNFfR_GbbFdPlnDcd5_E4gg3kV58c',
      responseAt: 2020-05-23T22:04:17.370Z,
      hasSolution: true,
      duration: 2.27
    }
  ],
  solved: [
    {
      id: 'oda83noeflau',
      responseElement: true,
      responseCallback: true,
      isSolved: true,
      solvedAt: {}
    }
  ],
  error: undefined
} +0ms

<div style="max-width:400px;">
<hr noshade="" size="1" style="color:#ccc; background-color:#ccc;"><br>
<form id="captcha-form" action="index" method="post">
<script type="text/javascript" async="" src="https://www.gstatic.com/recaptcha/releases/BT5UwN2jyUJCo7TdbwTYi_58/recaptcha__en.js"></script><script src="https://www.google.com/recaptcha/api.js" async="" defer=""></script>
<script>var submitCallback = function(response) {document.getElementById('captcha-form').submit();};</script>
<div id="recaptcha" class="g-recaptcha" data-sitekey="6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b" data-callback="submitCallback" data-s="cojAJSZCdLw81Mw5102PzG5LCy2bEgY-Qgb0-Oa8bdMLsAjcUB7GHfYlSTUsLPcez0e4N8BYqFCc-jZ-XabU9E0GKLwuWbMScAFsxh0IqK2nuqHNLT3f4jCHZU113pPZQt8Wc56k250zig5yoYxpMvWQ85CIvEfWfG3BrT60dccP788N2aLdbn12pmWzOsKuLt893rL8KxnpaMHs4ZsiPkUJdCsAy6AzaRAinNs4eV23RDtrGHNi-u0"><div style="width: 304px; height: 78px;"><div><iframe src="https://www.google.com/recaptcha/api2/anchor?ar=1&amp;k=6LfwuyUTAAAAAOAmoS0fdqijC2PbbdH4kjq62Y1b&amp;co=aHR0cHM6Ly93d3cuZ29vZ2xlLmNvbTo0NDM.&amp;hl=en&amp;v=BT5UwN2jyUJCo7TdbwTYi_58&amp;size=normal&amp;s=cojAJSZCdLw81Mw5102PzG5LCy2bEgY-Qgb0-Oa8bdMLsAjcUB7GHfYlSTUsLPcez0e4N8BYqFCc-jZ-XabU9E0GKLwuWbMScAFsxh0IqK2nuqHNLT3f4jCHZU113pPZQt8Wc56k250zig5yoYxpMvWQ85CIvEfWfG3BrT60dccP788N2aLdbn12pmWzOsKuLt893rL8KxnpaMHs4ZsiPkUJdCsAy6AzaRAinNs4eV23RDtrGHNi-u0&amp;cb=r91kon3sihd2" width="304" height="78" role="presentation" name="a-hui2ii7cbhhz" frameborder="0" scrolling="no" sandbox="allow-forms allow-popups allow-same-origin allow-scripts allow-top-navigation allow-modals allow-popups-to-escape-sandbox"></iframe></div><textarea id="g-recaptcha-response" name="g-recaptcha-response" class="g-recaptcha-response" style="width: 250px; height: 40px; border: 1px solid rgb(193, 193, 193); margin: 10px 25px; padding: 0px; resize: none; display: none;"></textarea></div><iframe style="display: none;"></iframe></div>
<input type="hidden" name="q" value="EgS8pdi4GN67pvYFIhkA8aeDS9yQqQWq3BMCa6xUSD_F653Fxcx7MgFy"><input type="hidden" name="continue" value="https://www.google.com/search?q=klima&amp;oq=google&amp;aqs=chrome..69i57j0l5j69i60l2.735j0j7&amp;sourceid=chrome&amp;ie=UTF-8">
</form>
<hr noshade="" size="1" style="color:#ccc; background-color:#ccc;">

<div style="font-size:13px;">
<b>About this page</b><br><br>

Our systems have detected unusual traffic from your computer network.  This page checks to see if it's really you sending the requests, and not a robot.  <a href="#" onclick="document.getElementById('infoDiv').style.display='block';">Why did this happen?</a><br><br>

<div id="infoDiv" style="display:none; background-color:#eee; padding:10px; margin:0 0 15px 0; line-height:1.4em;">
This page appears when Google automatically detects requests coming from your computer network which appear to be in violation of the <a href="//www.google.com/policies/terms/">Terms of Service</a>. The block will expire shortly after those requests stop.  In the meantime, solving the above CAPTCHA will let you continue to use our services.<br><br>This traffic may have been sent by malicious software, a browser plug-in, or a script that sends automated requests.  If you share your network connection, ask your administrator for help — a different computer using the same IP address may be responsible.  <a href="//support.google.com/websearch/answer/86640">Learn more</a><br><br>Sometimes you may be asked to solve the CAPTCHA if you are using advanced terms that robots are known to use, or sending requests very quickly.
</div>

IP address: x.x.x.x<br>Time: 2020-05-23T22:04:17Z<br>URL: https://www.google.com/search?q=klima&amp;oq=google&amp;aqs=chrome..69i57j0l5j69i60l2.735j0j7&amp;sourceid=chrome&amp;ie=UTF-8<br>
</div>
</div>

Captcha решена но меня не перенаправляют на страницу результатов поиска Google: (

Пробовал также с решением iframe, но при отладке он говорит, что не нашел ни одной капчи.

// Loop over all potential frames on that page
for (const frame of page.mainFrame().childFrames()) {
  // Attempt to solve any potential reCAPTCHAs in those frames
  await frame.solveRecaptchas()
}

Мне нужно help - я плохо знаю узел. Заранее благодарю за помощь!

1 Ответ

1 голос
/ 10 июня 2020

Вам нужно получить значение атрибута data-s и отправить его провайдеру решения капчи. По крайней мере, AntiCaptcha теперь поддерживает это.

...