Это код, который я запускаю:
import requests
import records
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from sqlalchemy.exc import IntegrityError
db = records.Database('sqlite:///crawler_database.db')
db.query('''CREATE TABLE IF NOT EXISTS links (
url text PRIMARY KEY,
created_at datetime,
visited_at datetime NULL)''')
db.query('''CREATE TABLE IF NOT EXISTS numbers (url text, number integer,
PRIMARY KEY (url, number))''')
def store_link(url):
try:
db.query('''INSERT INTO links (url, created_at)
VALUES (:url, CURRENT_TIMESTAMP)''', url=url)
except IntegrityError as ie:
# This link already exists, do nothing
pass
def store_number(url, number):
try:
db.query('''INSERT INTO numbers (url, number)
VALUES (:url, :number)''', url=url, number=number)
except IntegrityError as ie:
# This number already exists, do nothing
pass
def mark_visited(url):
db.query('''UPDATE links SET visited_at=CURRENT_TIMESTAMP
WHERE url=:url''', url=url)
def get_random_unvisited_link():
link = db.query('''SELECT * FROM links
WHERE visited_at IS NULL
ORDER BY RANDOM() LIMIT 1''').first()
return None if link is None else link.url
def visit(url):
html = requests.get(url).text
html_soup = BeautifulSoup(html, 'html.parser')
new_links = []
for td in html_soup.find_all("td"):
store_number(url, int(td.text.strip()))
for link in html_soup.find_all("a"):
link_url = link.get('href')
if link_url is None:
continue
full_url = urljoin(url, link_url)
new_links.append(full_url)
return new_links
store_link('http://www.webscrapingfordatascience.com/crawler/')
url_to_visit = get_random_unvisited_link()
while url_to_visit is not None:
print('Now visiting:', url_to_visit)
new_links = visit(url_to_visit)
print(len(new_links), 'new link(s) found')
for link in new_links:
store_link(link)
mark_visited(url_to_visit)
url_to_visit = get_random_unvisited_link()
Это моя ошибка:
ProgrammingError: (sqlite3.ProgrammingError) Невозможно работать с закрытой базой данных.
Ошибка обратного вызова говорит, что указывает на эту строку, url_to_visit = get_random_unvisited_link()
.
Я не могу понять, почему она закрыта или что происходит. Кто-нибудь, кто хотел бы помочь мне решить эту проблему?
Это полная обратная связь:
ERROR:sqlalchemy.pool.impl.NullPool:Error closing cursor
Traceback (most recent call last):
File "C:\Users\LENOVO\anaconda3\lib\site-packages\sqlalchemy\engine\result.py", line 1324, in fetchone
row = self._fetchone_impl()
File "C:\Users\LENOVO\anaconda3\lib\site-packages\sqlalchemy\engine\result.py", line 1204, in _fetchone_impl
return self.cursor.fetchone()
sqlite3.ProgrammingError: Cannot operate on a closed database.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\LENOVO\anaconda3\lib\site-packages\sqlalchemy\engine\base.py", line 1339, in _safe_close_cursor
cursor.close()
sqlite3.ProgrammingError: Cannot operate on a closed database.