如何将物品放入Pyqt5小部件?

问题描述 投票:1回答:2

我正在尝试为Scrapy爬虫制作一个简单的GUI,用户可以按下“开始”按钮来运行抓取,并在textBrowser(或其他qt小部件,请提示)中查看已删除的结果。

My spider:

import scrapy, json


class CarSpider(scrapy.Spider):
    name = 'car'
    start_urls = ["https://www.target-website.com/"]

    def parse(self, response):
        """some code """
            yield scrapy.Request(url=url, callback=self.parse_page)

    def parse_page(self, response):
        items = json.loads(response.body_as_unicode())['items']
        for i in items:
            ...
            scraped_item = {
                'Make': make,
                'Model': model,
                'Year': year,                    
            }
            yield scraped_item

应用程序设计是在Qt Designer中完成的:

GUI:

from PyQt5 import QtCore, QtGui, QtWidgets


class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(801, 612)
        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        sizePolicy.setHeightForWidth(MainWindow.sizePolicy().hasHeightForWidth())
        MainWindow.setSizePolicy(sizePolicy)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        sizePolicy.setHeightForWidth(self.centralwidget.sizePolicy().hasHeightForWidth())
        self.centralwidget.setSizePolicy(sizePolicy)
        self.centralwidget.setObjectName("centralwidget")
        self.pushButton = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton.setGeometry(QtCore.QRect(10, 10, 211, 41))
        self.pushButton.setObjectName("pushButton")
        self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_2.setEnabled(False)
        self.pushButton_2.setGeometry(QtCore.QRect(10, 60, 211, 41))
        self.pushButton_2.setObjectName("pushButton_2")
        self.textBrowser = QtWidgets.QTextBrowser(self.centralwidget)
        self.textBrowser.setGeometry(QtCore.QRect(240, 10, 551, 571))
        self.textBrowser.setObjectName("textBrowser")
        MainWindow.setCentralWidget(self.centralwidget)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)

        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)

    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
        self.pushButton.setText(_translate("MainWindow", "Run Scraper"))
        self.pushButton_2.setText(_translate("MainWindow", "Stop"))

这是我试图处理数据的代码:

data_handler.py:

from PyQt5 import QtWidgets
from PyQt5.QtCore import pyqtSignal, QThread
from my_gui import Ui_MainWindow 
import sys, os 
import subprocess


class SpiderThread(QThread):
    signal = pyqtSignal()
    output_signal = pyqtSignal('PyQt_PyObject')

    def __init__(self):
        QThread.__init__(self)

    def __del__(self):
        self.wait()

    def run(self):
        if os.path.exists('result.csv'):
            os.remove('result.csv')
        cmd = "scrapy crawl car"
        proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
        self.proc_id = proc.pid
        print(self.proc_id)
        out = proc.communicate()
        for line in out:
            self.output_signal.emit(line)
        self.signal.emit()


class mywindow(QtWidgets.QMainWindow):
    def __init__(self):
        super(mywindow, self).__init__()
        self.ui = Ui_MainWindow()
        self.ui.setupUi(self)
        self.ui.pushButton.clicked.connect(self.slot_method)

        self.crawling_thread = SpiderThread()
        self.crawling_thread.signal.connect(self.finished)
        self.crawling_thread.output_signal.connect(self.update_text)
        self.ui.pushButton_2.clicked.connect(self.stop)

    def slot_method(self):
        self.ui.pushButton.setEnabled(False)
        self.ui.pushButton_2.setEnabled(True)
        self.ui.textBrowser.setText('')
        self.ui.textBrowser.append('started scraping...')
        self.crawling_thread.start()

    def finished(self):
        self.ui.textBrowser.append('finished scraping')  # Show the output to the user
        self.ui.pushButton.setEnabled(True)  # Enable the pushButton
        self.ui.pushButton_2.setEnabled(False)

    def update_text(self, signal):
        self.ui.textBrowser.append(signal.decode("utf-8"))

    def stop(self):
        print(self.crawling_thread.proc_id)
        os.kill(self.crawling_thread.proc_id)
        self.ui.textBrowser.append('Scraping stopped...')
        self.ui.pushButton.setEnabled(True)  # Enable the pushButton
        self.ui.pushButton_2.setEnabled(False)


def main():
    app = QtWidgets.QApplication([])
    application = mywindow()
    application.show()
    sys.exit(app.exec())


if __name__ == '__main__':
    main()

使用此代码,我只能将stdout作为文本,并在完成刮取后才将其放在textBrowser中。如果刮刮20-30分钟 - 我看不到textBrowser的任何变化。是否有机会获取物品并实时显示?也许有一个解决方案,用第二个按钮停止/暂停抓取过程?

python pyqt scrapy pyqt5
2个回答
0
投票

您应该使用qazxsw poi而不是使用subprocess.Popen()+ QThread,因为通过信号通知您可以更轻松地执行任务。

我创建了一个应用程序,扫描项目中的所有蜘蛛,在QComboBox中显示它们,您可以在其中选择要运行的蜘蛛,然后有一个按钮,允许您通过在QTextBrowser中显示日志来启动或停止应用程序。

假设scrapy项目具有以下结构(该项目是scrapy的一个例子,你可以找到它QProcess):

here

用户必须选择.cfg文件,这将显示可用的蜘蛛,然后根据需要按下开始 - 停止按钮。

tutorial
├── scrapy.cfg
└── tutorial
    ├── __init__.py
    ├── items.py
    ├── middlewares.py
    ├── pipelines.py
    ├── settings.py
    └── spiders
        ├── __init__.py
        ├── toscrape-css.py
        └── toscrape-xpath.py

输出:

from functools import partial from PyQt5 import QtCore, QtGui, QtWidgets class ScrapyWorker(QtCore.QObject): logChanged = QtCore.pyqtSignal(str) started = QtCore.pyqtSignal() finished = QtCore.pyqtSignal() def __init__(self, parent=None): super(ScrapyWorker, self).__init__(parent) self._process = QtCore.QProcess(self) self._process.setProcessChannelMode(QtCore.QProcess.MergedChannels) self._process.readyReadStandardOutput.connect(self.on_readyReadStandardOutput) self._process.setProgram('scrapy') self._process.started.connect(self.started) self._process.finished.connect(self.finished) def run(self, project, spider): self._process.setWorkingDirectory(project) self._process.setArguments(['crawl', spider]) self._process.start() @QtCore.pyqtSlot() def on_readyReadStandardOutput(self): data = self._process.readAllStandardOutput().data().decode() self.logChanged.emit(data) @QtCore.pyqtSlot() def stop(self): self._process.kill() def spiders(self, project): process = QtCore.QProcess() process.setProcessChannelMode(QtCore.QProcess.MergedChannels) process.setWorkingDirectory(project) loop = QtCore.QEventLoop() process.finished.connect(loop.quit) process.start('scrapy', ['list']) loop.exec_() return process.readAllStandardOutput().data().decode().split() class MainWindow(QtWidgets.QMainWindow): def __init__(self, parent=None): super(MainWindow, self).__init__(parent) self.project_le = QtWidgets.QLineEdit() self.project_button = QtWidgets.QPushButton('Select Project') self.spider_combobox = QtWidgets.QComboBox() self.start_stop_button = QtWidgets.QPushButton("Start", checkable=True) self.text_edit = QtWidgets.QTextBrowser() central_widget = QtWidgets.QWidget() self.setCentralWidget(central_widget) lay = QtWidgets.QVBoxLayout(central_widget) hlay = QtWidgets.QHBoxLayout() hlay.addWidget(self.project_le) hlay.addWidget(self.project_button) lay.addLayout(hlay) hlay2 = QtWidgets.QHBoxLayout() hlay2.addWidget(QtWidgets.QLabel("spiders:")) hlay2.addWidget(self.spider_combobox, 1) lay.addLayout(hlay2) lay.addWidget(self.start_stop_button) lay.addWidget(self.text_edit) self.start_stop_button.setEnabled(False) self.scrapy_worker = ScrapyWorker(self) self.scrapy_worker.logChanged.connect(self.insert_log) self.scrapy_worker.started.connect(self.text_edit.clear) self.scrapy_worker.finished.connect(partial(self.start_stop_button.setChecked, False)) self.start_stop_button.toggled.connect(self.on_checked) self.project_button.clicked.connect(self.select_project) self.resize(640, 480) @QtCore.pyqtSlot(bool) def on_checked(self, state): if state: filename = self.project_le.text() finfo = QtCore.QFileInfo(filename) directory = finfo.dir().absolutePath() self.scrapy_worker.run(directory, self.spider_combobox.currentText()) self.start_stop_button.setText('Stop') else: self.start_stop_button.setText('Start') self.scrapy_worker.stop() @QtCore.pyqtSlot() def select_project(self): filename, _ = QtWidgets.QFileDialog.getOpenFileName( self, "Select .cfg file", QtCore.QDir.currentPath(), "Configure File (*.cfg)" ) if filename: self.project_le.setText(filename) finfo = QtCore.QFileInfo(filename) directory = finfo.dir().absolutePath() spiders = self.scrapy_worker.spiders(directory) self.spider_combobox.clear() self.spider_combobox.addItems(spiders) self.start_stop_button.setEnabled(True if spiders else False) @QtCore.pyqtSlot(str) def insert_log(self, text): prev_cursor = self.text_edit.textCursor() self.text_edit.moveCursor(QtGui.QTextCursor.End) self.text_edit.insertPlainText(text) self.text_edit.setTextCursor(prev_cursor) if __name__ == '__main__': import sys app = QtWidgets.QApplication(sys.argv) app.setStyle('fusion') w = MainWindow() w.show() sys.exit(app.exec_())


0
投票

您可以收听enter image description here事件并使用每辆新车更新UI。

© www.soinside.com 2019 - 2024. All rights reserved.