Запуск Scrapy из Qthread. QObject :: killTimer: Таймеры не могут быть остановлены / запущены из другого потока - PullRequest
0 голосов
/ 09 мая 2020

У меня очень простой c паук, который я пытаюсь переместить в QThread. Вот мой код для моего основного файла.

from PySide2 import QtWidgets
from PySide2.QtCore import QThread, Signal
from PySide2.QtWidgets import QTableWidgetItem
from scrapy import signals
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
import twisted
from twisted.internet import threads

from Basic.FollowSpider import FollowSpider
from Basic.url import Ui_MainWindow
import qt5reactor

class DownloadThread(QThread):

    data_downloaded = Signal(object)

    def __init__(self):
        QThread.__init__(self)
        # self.url = url

    def run(self):
        configure_logging()
        runner = CrawlerRunner()
        runner.crawl(FollowSpider, domain="reddit.com")
        for p in runner.crawlers :
            p.signals.connect(self.crawler_results, signal=signals.item_scraped)

    def crawler_results(self, item):
        self.data_downloaded.emit('%s\n%s' % (item))
        print(item)


class MainWindow(QtWidgets.QMainWindow, Ui_MainWindow) :

    def __init__(self, parent=None) :
        super(MainWindow, self).__init__()

        self.setupUi(self)
        self.showMaximized()
        self.pushButton.pressed.connect(self.start_download)

    def start_download(self) :
        self.threads = []
        downloader = DownloadThread()
        downloader.data_downloaded.connect(self.on_data_ready)
        self.threads.append(downloader)
        downloader.start()

    def on_data_ready(self, item) :
        row = self.tableWidget.rowCount()
        url = item["url"]
        it = QTableWidgetItem(url)
        self.tableWidget.insertRow(row)
        self.tableWidget.setItem(row, 0, it)


    def closeEvent(self, event) :
        super(MainWindow, self).closeEvent(event)
        twisted.internet.reactor.stop()
        # TODO test memory


if __name__ == "__main__" :
    app = QtWidgets.QApplication([])
    qt5reactor.install()
    main_window = MainWindow()
    main_window.show()
    twisted.internet.reactor.run()

Когда сигнал испускается и данные готовы, я использую on_data_ready для создания виджета Qtablewidget. Мой код Pyside2 выглядит следующим образом:

# -*- coding: utf-8 -*-

################################################################################
## Form generated from reading UI file 'basic-repro.ui'
##
## Created by: Qt User Interface Compiler version 5.14.2
##
## WARNING! All changes made in this file will be lost when recompiling UI file!
################################################################################

from PySide2.QtCore import (QCoreApplication, QDate, QDateTime, QMetaObject,
    QObject, QPoint, QRect, QSize, QTime, QUrl, Qt)
from PySide2.QtGui import (QBrush, QColor, QConicalGradient, QCursor, QFont,
    QFontDatabase, QIcon, QKeySequence, QLinearGradient, QPalette, QPainter,
    QPixmap, QRadialGradient)
from PySide2.QtWidgets import *


class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        if not MainWindow.objectName():
            MainWindow.setObjectName(u"MainWindow")
        MainWindow.resize(475, 454)
        self.centralwidget = QWidget(MainWindow)
        self.centralwidget.setObjectName(u"centralwidget")
        self.tableWidget = QTableWidget(self.centralwidget)
        if (self.tableWidget.columnCount() < 1):
            self.tableWidget.setColumnCount(1)
        __qtablewidgetitem = QTableWidgetItem()
        self.tableWidget.setHorizontalHeaderItem(0, __qtablewidgetitem)
        self.tableWidget.setObjectName(u"tableWidget")
        self.tableWidget.setGeometry(QRect(0, 10, 461, 351))
        self.pushButton = QPushButton(self.centralwidget)
        self.pushButton.setObjectName(u"pushButton")
        self.pushButton.setGeometry(QRect(370, 370, 89, 25))
        MainWindow.setCentralWidget(self.centralwidget)
        self.statusbar = QStatusBar(MainWindow)
        self.statusbar.setObjectName(u"statusbar")
        MainWindow.setStatusBar(self.statusbar)

        self.retranslateUi(MainWindow)

        QMetaObject.connectSlotsByName(MainWindow)
    # setupUi

    def retranslateUi(self, MainWindow):
        MainWindow.setWindowTitle(QCoreApplication.translate("MainWindow", u"MainWindow", None))
        ___qtablewidgetitem = self.tableWidget.horizontalHeaderItem(0)
        ___qtablewidgetitem.setText(QCoreApplication.translate("MainWindow", u"URL", None));
        self.pushButton.setText(QCoreApplication.translate("MainWindow", u"Process", None))
    # retranslateUi

Каждый раз, когда я запускаю код, я получаю

...
 'scrapy.extensions.memusage.MemoryUsage',
 'scrapy.downloadermiddlewares.stats.DownloaderStats']
**QObject::startTimer: Timers cannot be started from another thread**
2020-05-09 21:03:40 [scrapy.middleware] INFO: Enabled spider middlewares:
['scrapy.spidermiddlewares.httperror.HttpErrorMiddleware',
 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware',
 'scrapy.spidermiddlewares.referer.RefererMiddleware',
 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware',
 'scrapy.spidermiddlewares.depth.DepthMiddleware']
2020-05-09 21:03:40 [scrapy.middleware] INFO: Enabled item pipelines:
[]
2020-05-09 21:03:40 [scrapy.core.engine] INFO: Spider opened
2020-05-09 21:03:40 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
QObject::killTimer: Timers cannot be stopped from another thread
QObject::startTimer: Timers cannot be started from another thread
QObject::killTimer: Timers cannot be stopped from another thread
QObject::startTimer: Timers cannot be started from another thread
QObject::killTimer: Timers cannot be stopped from another thread
QObject::startTimer: Timers cannot be started from another thread
2020-05-09 21:03:40 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
QObject::killTimer: Timers cannot be stopped from another thread
QObject::startTimer: Timers cannot be started from another thread

Я читаю несколько других вопросов, например this и это . Однако я не могу понять, как я взаимодействую с GUI?

моим кодом Followspider для воспроизведения

import re
import scrapy.signals
from scrapy.http import Request, HtmlResponse
from scrapy.linkextractors import LinkExtractor
from six.moves.urllib.parse import urlparse
from items import ScrapyItems

class FollowSpider( scrapy.Spider ) :

    name = 'followall'
    custom_settings = {
        'CLOSESPIDER_PAGECOUNT' : 10,
    }

    def __init__(self, **kw) :
        super( FollowSpider, self ).__init__( **kw )
        url = kw.get( 'url' ) or kw.get( 'domain' ) or 'https://www.reddit.com'
        if not url.startswith( 'http://' ) and not url.startswith( 'https://' ) :
            url = 'http://%s/' % url
        self.url = url
        self.allowed_domains = [re.sub(r'^www\.', '', urlparse(url).hostname)]
        self.link_extractor = LinkExtractor()

    def start_requests(self):
        return [Request(self.url, callback=self.parse, dont_filter=True)]

    def parse(self, response):
        page = self._get_item(response)
        r = [page]
        r.extend(self._extract_requests(response))
        return r

    def _get_item(self, response):
        item = ScrapyItems(
            url=response.url,
        )
        return item

    def _extract_requests(self, response):
        r = []
        if isinstance(response, HtmlResponse):
            links = self.link_extractor.extract_links( response )
            r.extend( Request( x.url, callback=self.parse ) for x in links )
        return r
...