一个人怎么可以检索使用PyLucene所有索引文件的特定字段?

问题描述 投票:0回答:1

在java中它可以使用“MatchAllDocsQuery()”来完成,但对于Pylucene没有说明文件提到它怎么可以这样做。

这是Python代码张贴个人查询,然后从中提取检索文档的所有字段。

INDEX_DIR = "directory/where/the/document/index/is/stored"

import sys, os, lucene

from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher

def run(searcher, analyzer):
    while True:
        print
        print("Hit enter with no input to quit.")
        command = input("Query:")
        if command == '':
            return

        print
        print("Searching for:", command)
        query = QueryParser("contents", analyzer).parse(command)
        #query = "MatchAllDocsQuery()"
        scoreDocs = searcher.search(query, 50).scoreDocs
        print("%s total matching documents." % len(scoreDocs))

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            table = dict((field.name(), field.stringValue()) for field in doc.getFields())
            print(table['doi'])
            #print('path:', doc.get("path"), 'name:', doc.get("name"), 'title:', doc.get("text"))


if __name__ == '__main__':
    lucene.initVM()
    print('lucene', lucene.VERSION)
    base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
    directory = SimpleFSDirectory.open(Paths.get(INDEX_DIR))
    print("Directory name is given below")
    print(directory)

    searcher = IndexSearcher(DirectoryReader.open(directory))
    print(searcher)
    analyzer = StandardAnalyzer()

    # Calling the run function for execution
    run(searcher, analyzer)
    del searcher
python lucene pylucene
1个回答
0
投票

查询中的微小变化可以使Lucene的检索所有索引文件。这是为了只替换与命令变量(命令=“.✱。”)。该.✱。所有搜索在所有文件中的字段和字段值(用星号标记)。

INDEX_DIR = "directory/where/the/document/index/is/stored"

import sys, os, lucene

from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher

def run(searcher, analyzer):
    command = ".*."
    print("Searching for:", command)
    query = QueryParser("contents", analyzer).parse(command)
    #query = "MatchAllDocsQuery()"
    scoreDocs = searcher.search(query, 50).scoreDocs
    print("%s total matching documents." % len(scoreDocs))

    for scoreDoc in scoreDocs:
        doc = searcher.doc(scoreDoc.doc)
        table = dict((field.name(), field.stringValue()) for field in doc.getFields())
        print(table['doi'])
            #print('path:', doc.get("path"), 'name:', doc.get("name"), 'title:', doc.get("text"))


if __name__ == '__main__':
    lucene.initVM()
    print('lucene', lucene.VERSION)
    base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
    directory = SimpleFSDirectory.open(Paths.get(INDEX_DIR))
    print("Directory name is given below")
    print(directory)

    searcher = IndexSearcher(DirectoryReader.open(directory))
    print(searcher)
    analyzer = StandardAnalyzer()

    # Calling the run function for execution
    run(searcher, analyzer)
    del searcher
© www.soinside.com 2019 - 2024. All rights reserved.