我有一个包含 1400 万条记录的文件,每条记录有 2 个字段。我已经使用以下配置索引了文档:
private fun getIndexWriter(directory: String): IndexWriter = try {
val indexWriterConfig = IndexWriterConfig(KeywordAnalyzer())
indexWriterConfig.ramBufferSizeMB = 2048.0
indexWriterConfig.openMode = IndexWriterConfig.OpenMode.CREATE_OR_APPEND
IndexWriter(FSDirectory.open(Paths.get(directory)), indexWriterConfig)
} catch (exception: Exception) {
logger.error("File missing: {} ", exception.message)
throw Exception("File missing: ${exception.message}")
}
fun indexDocs(documents: List<Document>)= mutex.withLock {
coroutineScope {
val indexedDocs = getIndexWriter("directory_path").use { indexWriter ->
try {
documents.forEach { document ->
try {
indexWriter.addDocument(document)
} catch (exception: IOException) {
logger.error("Failed to add document: {} to passive index", document)
}
}
indexWriter.flush()
indexWriter.commit()
} catch (exception: IOException) {
logger.error("Failed to commit")
}
indexWriter.docStats.numDocs
}
}
}
完成索引和启动应用程序大约需要 5 分钟,但是我想检查是否可以进行一些优化来减少索引所需的时间。
提前感谢您的帮助!
您使用了多少个线程?根据您的 cpu 核心数量调整和增加线程大小。