Мне нужен скрипт Python, который получает доход Google AdSense, и я нашел скребок AdSense:
http://pypi.python.org/pypi/adsense_scraper/0.5
Он использует Twill и html5lib, чтобы очистить данные о доходах Google AdSense. Когда я использую его, я получаю это сообщение об ошибке:
Traceback (most recent call last):
File "adsense_scraper.py", line 163, in <module>
data = main()
File "adsense_scraper.py", line 154, in main
b = get_adsense(login, password)
File "adsense_scraper.py", line 128, in get_adsense
b.submit()
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\browser.py", line 467, in submit
self._journey('open', request)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\browser.py", line 523, in _journey
r = func(*args, **kwargs)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 212, in open
return self._mech_open(url, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 238, in _mech_open
response = UserAgentBase.open(self, request, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 192, in open
response = meth(req, response)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_http.py", line 590, in http_response
"http", request, response, code, msg, hdrs)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 209, in error
result = apply(self._call_chain, args)
File "C:\Python26\lib\urllib2.py", line 361, in _call_chain
result = func(*args)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_http.py", line 135, in http_error_302
return self.parent.open(new)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 212, in open
return self._mech_open(url, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 238, in _mech_open
response = UserAgentBase.open(self, request, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 192, in open
response = meth(req, response)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\utils.py", line 442, in http_response
"refresh", msg, hdrs)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 209, in error
result = apply(self._call_chain, args)
File "C:\Python26\lib\urllib2.py", line 361, in _call_chain
result = func(*args)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_http.py", line 135, in http_error_302
return self.parent.open(new)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 212, in open
return self._mech_open(url, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_mechanize.py", line 238, in _mech_open
response = UserAgentBase.open(self, request, data)
File "c:\python26\lib\site-packages\twill-0.9-py2.6.egg\twill\other_packages\_mechanize_dist\_opener.py", line 181, in open
response = urlopen(self, req, data)
File "C:\Python26\lib\urllib2.py", line 406, in _open 'unknown_open', req)
File "C:\Python26\lib\urllib2.py", line 361, in _call_chain result = func(*args)
File "C:\Python26\lib\urllib2.py", line 1163, in unknown_open raise URLError('unknown url type: %s' % type)
urllib2.URLError: <urlopen error unknown url type: 'http>
Итак, важная вещь:
urllib2.URLError: <urlopen error unknown url type: 'http>
Может кто-нибудь сказать мне, где ошибка? Есть ли еще лучший способ получить данные через Python? Спасибо