Я пытаюсь очистить содержимое с веб-сайта, но получаю указанную ниже ошибку
Метод:
def scrape_newtimes():
"""Scrapes content from the NewTimes"""
url = 'https://www.newtimes.co.rw/'
r = requests.get(url, headers=HEADERS)
tree = fromstring(r.content)
links = tree.xpath('//div[@class="x-small-push clearfix"]/a/@href')
for link in links:
r = requests.get(link, headers=HEADERS)
blog_tree = fromstring(r.content)
paras = blog_tree.xpath('//div[@class="article-content"]/p')
para = extract_paratext(paras)
text = extract_text(para)
if not text:
continue
yield '"%s" %s' % (text, link)
Ошибка, которую я получаю:
>>> sc = scrape_newtimes()
>>> string_1 = next(sc)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "D:\Projects\bird\bird-env\bot.py", line 58, in scrape_newtimes
r = requests.get(link, headers=HEADERS)
File "D:\Projects\bird\venv\lib\site-packages\requests\api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "D:\Projects\bird\venv\lib\site-packages\requests\api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "D:\Projects\bird\venv\lib\site-packages\requests\sessions.py", line 519, in request
prep = self.prepare_request(req)
File "D:\Projects\bird\venv\lib\site-packages\requests\sessions.py", line 462, in prepare_request
hooks=merge_hooks(request.hooks, self.hooks),
File "D:\Projects\bird\venv\lib\site-packages\requests\models.py", line 313, in prepare
self.prepare_url(url, params)
File "D:\Projects\bird\venv\lib\site-packages\requests\models.py", line 387, in prepare_url
raise MissingSchema(error)
requests.exceptions.MissingSchema: Invalid URL '/news/londons-kings-college-launch-civil-service-programme-rwanda': No schema supplied. Perhaps you meant http:///news/londons-kings-college-launch-civil-service-programme-rwanda?
>>>