我试图建立一份我已按页码拆分并以txt格式转换的议会笔录索引。我正在使用Whoosh的最新版本来构建它,但是它给了我锁定错误。这是代码:
import os
from whoosh import index
from whoosh.index import create_in
from whoosh.fields import Schema, TEXT, ID
import sys
def createSearchableData(root):
'''
Schema definition: title(name of file), path(as ID), content(indexed
but not stored),textdata (stored text content)
'''
schema = Schema(title=TEXT(stored=True),path=ID(stored=True),\
content=TEXT,textdata=TEXT(stored=True))
if not os.path.exists("indexdir"):
os.mkdir("indexdir")
# Creating a index writer to add document as per schema
ix = index.create_in("indexdir",schema)
writer = ix.writer()
filepaths = [os.path.join(root,i) for i in os.listdir(root)]
for path in filepaths:
fp = open(path,'rb')
print(path)
text = fp.read()
writer.add_document(title=path.split("\\")[1], path=path,\
content=text,textdata=text)
fp.close()
writer.commit()
root = "C:\\Users\\vitop\\OneDrive\\Desktop\\Final Project\\Test\\Splitted\\Txt"
createSearchableData(root)
这是指出的错误:
LockError Traceback (most recent call last)
<ipython-input-4-e8b4a33a2859> in <module>
31
32 root = "C:\\Users\\vitop\\OneDrive\\Desktop\\Final Project\\Test\\Splitted\\Txt"
---> 33 createSearchableData(root)
<ipython-input-4-e8b4a33a2859> in createSearchableData(root)
18 # Creating a index writer to add document as per schema
19 ix = index.create_in("indexdir",schema)
---> 20 writer = ix.writer()
21
22 filepaths = [os.path.join(root,i) for i in os.listdir(root)]
~\Anaconda3\lib\site-packages\whoosh\index.py in writer(self, procs, **kwargs)
462 else:
463 from whoosh.writing import SegmentWriter
--> 464 return SegmentWriter(self, **kwargs)
465
466 def lock(self, name):
~\Anaconda3\lib\site-packages\whoosh\writing.py in __init__(self, ix, poolclass, timeout, delay, _lk, limitmb, docbase, codec, compound, **kwargs)
513 if not try_for(self.writelock.acquire, timeout=timeout,
514 delay=delay):
--> 515 raise LockError
516
517 if codec is None:
LockError:
该代码创建了文件夹并设置了索引文件。我试图对Whoosh documentation使用建议的解决方法,但它不起作用。
已摆脱二进制选项并为txt文件编码“ utf-8”的固定。
for path in filepaths:
fp = open(path,'r', encoding = "utf-8")
print(path)
text = fp.read()
writer.add_document(title=path.split("\\")[1], path=path,
content=text,textdata=text)
fp.close()
writer.commit()