Мне нужно приложение на моем django веб-сайте, способное отображать статьи с данного веб-сайта. Пока у меня есть это:
views.py
import requests
from django.shortcuts import render, redirect
from bs4 import BeautifulSoup as BSoup
from .models import Headline
requests.packages.urllib3.disable_warnings()
def news_list(request):
headlines = Headline.objects.all()[::-1]
context = {
'object_list': headlines,
}
return render(request, "news/home.html", context)
def scrape(request):
session = requests.Session()
session.headers = {"User-Agent": "Googlebot/2.1 (+http://www.google.com/bot.html)"}
url = "https://www.theonion.com/"
content = session.get(url, verify=False).content
soup = BSoup(content, "html.parser")
News = soup.find_all('div', {"class":"curation-module__item"})
for artcile in News:
main = artcile.find_all('a')[0]
link = main['href']
image_src = str(main.find('img')['srcset']).split(" ")[-4]
title = main['title']
new_headline = Headline()
new_headline.title = title
new_headline.url = link
new_headline.image = image_src
new_headline.save()
return redirect("../")
My models.py
from django.conf import settings
from django.db import models
# Create your models here.
# Scrape data coming from websites
# The posts will contain images, urls and titles
# model - headline(title, image, url)
# model - userprofile(user, last_scrape)
class Headline(models.Model):
title = models.CharField(max_length=200)
image = models.URLField(null=True, blank=True)
url = models.TextField()
def __str__(self):
return self.title
У меня тоже есть страница html и установлен urls.py
, Всякий раз, когда я пытаюсь запросить статьи, он выдает эту ошибку
SSLError at /news/scrape/
HTTPSConnectionPool(host='www.theonion.com', port=443): Max retries exceeded
with url: / (Caused by SSLError(SSLError(1, '[SSL: UNKNOWN_PROTOCOL]
unknown protocol (_ssl.c:841)'),))
Есть мысли?