У меня очень простой c паук, который я пытаюсь переместить в QThread. Вот мой код для моего основного файла.
from PySide2 import QtWidgets
from PySide2.QtCore import QThread, Signal
from PySide2.QtWidgets import QTableWidgetItem
from scrapy import signals
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
import twisted
from twisted.internet import threads
from Basic.FollowSpider import FollowSpider
from Basic.url import Ui_MainWindow
import qt5reactor
class DownloadThread(QThread):
data_downloaded = Signal(object)
def __init__(self):
QThread.__init__(self)
# self.url = url
def run(self):
configure_logging()
runner = CrawlerRunner()
runner.crawl(FollowSpider, domain="reddit.com")
for p in runner.crawlers :
p.signals.connect(self.crawler_results, signal=signals.item_scraped)
def crawler_results(self, item):
self.data_downloaded.emit('%s\n%s' % (item))
print(item)
class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow) :
def __init__(self, parent=None) :
super(MainWindow, self).__init__()
self.setupUi(self)
self.showMaximized()
self.pushButton.pressed.connect(self.start_download)
def start_download(self) :
self.threads = []
downloader = DownloadThread()
downloader.data_downloaded.connect(self.on_data_ready)
self.threads.append(downloader)
downloader.start()
def on_data_ready(self, item) :
row = self.tableWidget.rowCount()
url = item["url"]
it = QTableWidgetItem(url)
self.tableWidget.insertRow(row)
self.tableWidget.setItem(row, 0, it)
def closeEvent(self, event) :
super(MainWindow, self).closeEvent(event)
twisted.internet.reactor.stop()
# TODO test memory
if __name__ == "__main__" :
app = QtWidgets.QApplication([])
qt5reactor.install()
main_window = MainWindow()
main_window.show()
twisted.internet.reactor.run()
Когда сигнал испускается и данные готовы, я использую on_data_ready для создания виджета Qtablewidget. Мой код Pyside2 выглядит следующим образом:
# -*- coding: utf-8 -*-
################################################################################
## Form generated from reading UI file 'basic-repro.ui'
##
## Created by: Qt User Interface Compiler version 5.14.2
##
## WARNING! All changes made in this file will be lost when recompiling UI file!
################################################################################
from PySide2.QtCore import (QCoreApplication, QDate, QDateTime, QMetaObject,
QObject, QPoint, QRect, QSize, QTime, QUrl, Qt)
from PySide2.QtGui import (QBrush, QColor, QConicalGradient, QCursor, QFont,
QFontDatabase, QIcon, QKeySequence, QLinearGradient, QPalette, QPainter,
QPixmap, QRadialGradient)
from PySide2.QtWidgets import *
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
if not MainWindow.objectName():
MainWindow.setObjectName(u"MainWindow")
MainWindow.resize(475, 454)
self.centralwidget = QWidget(MainWindow)
self.centralwidget.setObjectName(u"centralwidget")
self.tableWidget = QTableWidget(self.centralwidget)
if (self.tableWidget.columnCount() < 1):
self.tableWidget.setColumnCount(1)
__qtablewidgetitem = QTableWidgetItem()
self.tableWidget.setHorizontalHeaderItem(0, __qtablewidgetitem)
self.tableWidget.setObjectName(u"tableWidget")
self.tableWidget.setGeometry(QRect(0, 10, 461, 351))
self.pushButton = QPushButton(self.centralwidget)
self.pushButton.setObjectName(u"pushButton")
self.pushButton.setGeometry(QRect(370, 370, 89, 25))
MainWindow.setCentralWidget(self.centralwidget)
self.statusbar = QStatusBar(MainWindow)
self.statusbar.setObjectName(u"statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QMetaObject.connectSlotsByName(MainWindow)
# setupUi
def retranslateUi(self, MainWindow):
MainWindow.setWindowTitle(QCoreApplication.translate("MainWindow", u"MainWindow", None))
___qtablewidgetitem = self.tableWidget.horizontalHeaderItem(0)
___qtablewidgetitem.setText(QCoreApplication.translate("MainWindow", u"URL", None));
self.pushButton.setText(QCoreApplication.translate("MainWindow", u"Process", None))
# retranslateUi
Каждый раз, когда я запускаю код, я получаю
...
'scrapy.extensions.memusage.MemoryUsage',
'scrapy.downloadermiddlewares.stats.DownloaderStats']
**QObject::startTimer: Timers cannot be started from another thread**
2020-05-09 21:03:40 [scrapy.middleware] INFO: Enabled spider middlewares:
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
'scrapy.spidermiddlewares.referer.RefererMiddleware',
'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
'scrapy.spidermiddlewares.depth.DepthMiddleware']
2020-05-09 21:03:40 [scrapy.middleware] INFO: Enabled item pipelines:
[]
2020-05-09 21:03:40 [scrapy.core.engine] INFO: Spider opened
2020-05-09 21:03:40 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
QObject::killTimer: Timers cannot be stopped from another thread
QObject::startTimer: Timers cannot be started from another thread
QObject::killTimer: Timers cannot be stopped from another thread
QObject::startTimer: Timers cannot be started from another thread
QObject::killTimer: Timers cannot be stopped from another thread
QObject::startTimer: Timers cannot be started from another thread
2020-05-09 21:03:40 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
QObject::killTimer: Timers cannot be stopped from another thread
QObject::startTimer: Timers cannot be started from another thread
Я читаю несколько других вопросов, например this и это . Однако я не могу понять, как я взаимодействую с GUI?
моим кодом Followspider для воспроизведения
import re
import scrapy.signals
from scrapy.http import Request, HtmlResponse
from scrapy.linkextractors import LinkExtractor
from six.moves.urllib.parse import urlparse
from items import ScrapyItems
class FollowSpider( scrapy.Spider ) :
name = 'followall'
custom_settings = {
'CLOSESPIDER_PAGECOUNT' : 10,
}
def __init__(self, **kw) :
super( FollowSpider, self ).__init__( **kw )
url = kw.get( 'url' ) or kw.get( 'domain' ) or 'https://www.reddit.com'
if not url.startswith( 'http://' ) and not url.startswith( 'https://' ) :
url = 'http://%s/' % url
self.url = url
self.allowed_domains = [re.sub(r'^www\.', '', urlparse(url).hostname)]
self.link_extractor = LinkExtractor()
def start_requests(self):
return [Request(self.url, callback=self.parse, dont_filter=True)]
def parse(self, response):
page = self._get_item(response)
r = [page]
r.extend(self._extract_requests(response))
return r
def _get_item(self, response):
item = ScrapyItems(
url=response.url,
)
return item
def _extract_requests(self, response):
r = []
if isinstance(response, HtmlResponse):
links = self.link_extractor.extract_links( response )
r.extend( Request( x.url, callback=self.parse ) for x in links )
return r