我被困在代码的最后部分以创建一个小型搜索引擎。到目前为止,我已经能够让用户执行一些操作,例如选择要存储要搜索文件的文件夹,创建索引,搜索关键字,然后将关键字周围的文本摘录导出到txt文件。这是布局
这是我使用的代码:
from PyQt5 import QtCore, QtGui, QtWidgets, QtWidgets
from PyQt5.QtWidgets import QHeaderView
import os, os.path
import glob
import os
from PyPDF2 import PdfFileReader, PdfFileWriter
import pdftotext
from whoosh import index
from whoosh.fields import Schema, TEXT, ID, STORED
from whoosh.analysis import RegexTokenizer
from whoosh.analysis import StopFilter
from whoosh import scoring
from whoosh.index import open_dir
from whoosh import qparser
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(1126, 879)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.pushButton = QtWidgets.QPushButton(self.centralwidget)
self.pushButton.setGeometry(QtCore.QRect(40, 30, 100, 30))
self.pushButton.setObjectName("pushButton")
self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)
self.pushButton_2.setGeometry(QtCore.QRect(180, 30, 120, 30))
self.pushButton_2.setObjectName("pushButton_2")
self.pushButton_3 = QtWidgets.QPushButton(self.centralwidget)
self.pushButton_3.setGeometry(QtCore.QRect(620, 30, 80, 30))
self.pushButton_3.setObjectName("pushButton_3")
self.lineEdit = QtWidgets.QLineEdit(self.centralwidget)
self.lineEdit.setGeometry(QtCore.QRect(380, 60, 191, 21))
self.lineEdit.setObjectName("lineEdit")
self.lineEdit_2 = QtWidgets.QLineEdit(self.centralwidget)
self.lineEdit_2.setGeometry(QtCore.QRect(40, 90, 50, 21))
self.lineEdit_2.setObjectName("lineEdit_2")
self.label = QtWidgets.QLabel(self.centralwidget)
self.label.setGeometry(QtCore.QRect(380, 30, 50, 35))
font = QtGui.QFont()
font.setPointSize(10)
self.label.setFont(font)
self.label.setObjectName("label")
self.label2 = QtWidgets.QLabel(self.centralwidget)
self.label2.setGeometry(QtCore.QRect(40, 70, 150, 16))
font = QtGui.QFont()
font.setPointSize(10)
self.label2.setFont(font)
self.label2.setObjectName("label")
self.tableView = QtWidgets.QTableView(self.centralwidget)
self.tableView.setGeometry(QtCore.QRect(0, 120, 1121, 721))
self.tableView.setObjectName("tableView")
self.horizontal_header = self.tableView.horizontalHeader()
self.vertical_header = self.tableView.verticalHeader()
self.horizontal_header.setSectionResizeMode(
QHeaderView.ResizeToContents
)
self.vertical_header.setSectionResizeMode(
QHeaderView.ResizeToContents
)
self.horizontal_header.setStretchLastSection(True)
self.tableView.showGrid()
self.tableView.wordWrap()
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 1126, 21))
self.menubar.setObjectName("menubar")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
self.pushButton.clicked.connect(self.open_directory)
self.pushButton_2.clicked.connect(self.createindex)
self.pushButton_3.clicked.connect(self.export)
self.lineEdit.returnPressed.connect(self.search)
def open_directory(self):
self.dialog = QtWidgets.QFileDialog()
self.folder_path = self.dialog.getExistingDirectory(None, "Select Folder")
return self.folder_path
def createindex(self):
os.chdir(self.folder_path)
self.mypdfiles = glob.glob("*.pdf")
#creation of folder for splitted files
MYDIR = ("Splitted")
CHECK_FOLDER = os.path.isdir(MYDIR)
if not CHECK_FOLDER:
os.makedirs(MYDIR)
# save split downloaded file and save into new folder
for self.file in self.mypdfiles:
self.fname = os.path.splitext(os.path.basename(self.file))[0]
self.pdf = PdfFileReader(self.file)
for self.page in range(self.pdf.getNumPages()):
self.pdfwrite = PdfFileWriter()
self.pdfwrite.addPage(self.pdf.getPage(self.page))
self.outputfilename = '{}_page_{}.pdf'.format(self.fname, self.page+1)
with open(os.path.join("./Splitted", self.outputfilename), 'wb') as out:
self.pdfwrite.write(out)
print('Created: {}'.format(self.outputfilename))
#set working directory
os.chdir(self.folder_path + "/Splitted")
self.spltittedfiles = glob.glob("*.pdf")
MYDIR = ("Txt")
CHECK_FOLDER = os.path.isdir(MYDIR)
if not CHECK_FOLDER:
os.makedirs(MYDIR)
# Load your PDF
for self.file in self.spltittedfiles:
with open(self.file, "rb") as f:
self.pdf = pdftotext.PDF(f)
#creation of folder for splitted files
# Save all text to a txt file.
with open(os.path.join("./TXT", os.path.splitext(os.path.basename(self.file))[0] + ".txt") , 'w', encoding = 'utf-8') as f:
f.write("\n\n".join(self.pdf))
f.close()
os.chdir(self.folder_path)
MYDIR = ("indexdir")
CHECK_FOLDER = os.path.isdir(MYDIR)
if not CHECK_FOLDER:
os.makedirs(MYDIR)
self.my_analyzer = RegexTokenizer()| StopFilter(lang = "en")
self.schema = Schema(title=TEXT(stored=True),path=ID(stored=True),
content=TEXT(analyzer = self.my_analyzer),
textdata=TEXT(stored=True))
# set an index writer to add document as per schema
self.ix = index.create_in("indexdir",self.schema)
self.writer = self.ix.writer()
self.filepaths = [os.path.join("./Splitted/Txt",i) for i in os.listdir("./Splitted/Txt")]
for path in self.filepaths:
self.fp = open(path, "r", encoding='utf-8')
self.text = self.fp.read()
self.writer.add_document(title = os.path.splitext(os.path.basename(path))[0] , path=path, content=self.text,textdata=self.text)
self.fp.close()
self.writer.commit()
def search(self):
os.chdir(self.folder_path)
self.ix = open_dir("indexdir")
MYDIR = ("Results")
CHECK_FOLDER = os.path.isdir(MYDIR)
if not CHECK_FOLDER:
os.makedirs(MYDIR)
self.text = self.lineEdit.text()
self.query_str = self.text
self.query = qparser.QueryParser("textdata", schema = self.ix.schema)
self.q = self.query.parse(self.query_str)
self.topN = self.lineEdit_2.text()
if self.lineEdit_2.text() == "":
self.topN = 1000
else:
self.topN = int(self.lineEdit_2.text())
with self.ix.searcher(weighting=scoring.Frequency) as searcher:
self.results = searcher.search(self.q, terms=True, limit=self.topN)
for self.i in range(self.topN):
print(self.results[self.i]['title'], self.results[self.i]['textdata'])
def export(self):
with self.ix.searcher(weighting=scoring.Frequency) as searcher:
self.results = searcher.search(self.q, terms=True, limit= None)
for self.i in range(self.topN):
with open(os.path.join(self.folder_path, self.text + ".txt"), 'a') as f:
print(self.results[self.i]['title'], self.results[self.i]['textdata'], file=f)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "Search Text"))
self.pushButton.setText(_translate("MainWindow", "Select Folder"))
self.pushButton_2.setText(_translate("MainWindow", "Create Database"))
self.pushButton_3.setText(_translate("MainWindow", "Export"))
self.label.setText(_translate("MainWindow", "Search"))
self.label2.setText(_translate("MainWindow", "Top Results"))
if __name__ == "__main__":
import sys
app = QtWidgets.QApplication(sys.argv)
MainWindow = QtWidgets.QMainWindow()
ui = Ui_MainWindow()
ui.setupUi(MainWindow)
MainWindow.show()
sys.exit(app.exec_())
我现在要做的是也在表中显示结果。我一直试图了解如何将搜索功能的返回值“发送”到表中以及如何显示它。它应该具有两列:File_Page和Content,并且与选择的顶部结果一样多的行。然后,每一行应显示具有hit和感兴趣的文本的文件,如下所示:
到目前为止,我已经能够只设置表的参数,但仅此而已。是否有任何手段让表格在不按任何其他按钮的情况下如何获得结果?正如我到目前为止所了解的,是否可以从代码的不同位置触发相同的功能,但我没有发现相反的意思,即仅用一个信号激活两个功能
我发现了很多例子,但没有一个适合目标。我仍在学习如何使用Python,但从未使用过C ++。
使用其他信号触发layoutChanged。即QLineEdit的信号。
假设我想在输入“搜索”字段中的内容后立即更新搜索结果,请谅解我。
在这种情况下,这是一个整洁的工作示例,展示了立即搜索5个条目:
from PySide2.QtWidgets import QWidget, QApplication, QPushButton, QVBoxLayout, QLineEdit
from PySide2.QtCore import QAbstractTableModel, QModelIndex, Qt, QObject
from PySide2.QtWidgets import QTableView
import sys
class Table(QAbstractTableModel):
def __init__(self, data):
super().__init__()
self._data = data
def data(self, index: QModelIndex, role: int = ...):
if role == Qt.DisplayRole:
return self._data[index.row()][index.column()]
def rowCount(self, parent: QModelIndex = ...) -> int:
return len(self._data)
def columnCount(self, parent: QModelIndex = ...) -> int:
return len(self._data[0])
def overWriteData(self, new_list):
self._data = new_list
class MainWindow(QWidget):
def __init__(self):
super(MainWindow, self).__init__()
self.table = QTableView()
self.line = QLineEdit()
self.layout = QVBoxLayout()
self.layout.addWidget(self.table)
self.layout.addWidget(self.line)
self.data = [('stack overflow', 'some_fancy_data'),
('stack overflow', 'some_fancy_data'),
('stack underflow', 'some_fancy_data'),
('Server Fault', 'some_fancy_data'),
('Ask Ubuntu', 'some_fancy_data')]
self.model = Table(self.data)
self.table.setModel(self.model)
self.setLayout(self.layout)
self.line.textChanged.connect(self.update)
def update(self):
filtered = [i for i in self.data if self.line.text() in i[0]]
if filtered:
self.model.overWriteData(filtered)
self.model.layoutChanged.emit()
if __name__ == "__main__":
app = QApplication(sys.argv)
window = MainWindow()
window.show()
sys.exit(app.exec_())