有没有办法让 FlexSearch (https://github.com/nextapps-de/flexsearch) 只查找包含确切字符序列的结果?我尝试按照建议将分辨率设置为 25,将阈值设置为 22,也尝试使用深度,但我不断得到接近的单词(有时有点远,但至少长度匹配),但并不总是完全匹配我的序列。
我的索引词有时是 3 个字母的缩写,所以可能会扰乱上下文搜索。
如果您使用以下代码片段,通过输入 CTD,您会得到 CTD(好的)和 CDT(不好)。如果您输入 CAA,您会得到 CAA(好的)和 Candidate(不好)
var data =["CTD","CDT", "Candidate","CRT","CAA"];
(function(){
const index = new FlexSearch.Index({
charset: "latin:advanced",
tokenize: "full",
resolution : 25,
threshold : 22,
cache: true
});
for(var i = 0; i < data.length; i++){
index.add(i, data[i]);
}
var suggestions = document.getElementById("suggestions");
var userinput = document.getElementById("userinput");
userinput.addEventListener("input", show_results, true);
function show_results(){
var value = this.value;
var results = index.search(value);
var entry, childs = suggestions.childNodes;
var i = 0, len = results.length;
for(; i < len; i++){
entry = childs[i];
if(!entry){
entry = document.createElement("div");
suggestions.appendChild(entry);
}
entry.textContent = data[results[i]];
}
while(childs.length > len){
suggestions.removeChild(childs[i])
}
}
}());
<!doctype html>
<html>
<head>
<title>FlexSearch Sample</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/FlexSearch/0.7.31/flexsearch.compact.min.js"></script>
</head>
<body>
<input type="text" id="userinput" placeholder="Search by keyword...">
<br></br>
<div id="suggestions"></div>
</body>
</html>
我可能在那里遗漏了一些东西,但我相信有一种方法可以让库生成一个dumber索引和精确匹配,而不是这样一个具有紧密匹配的智能索引。
这符合您的要求吗?
附注
为了避免浪费时间搜索差异:我仅更改了
FlexSearch.Index
调用选项。
var data = ["CTD", "CDT", "Candidate", "CRT", "CAA"];
(function() {
const index = new FlexSearch.Index({
charset: "latin",
tokenize: "full",
matcher: "simple",
cache: true
});
for (var i = 0; i < data.length; i++) {
index.add(i, data[i]);
}
var suggestions = document.getElementById("suggestions");
var userinput = document.getElementById("userinput");
userinput.addEventListener("input", show_results, true);
function show_results() {
var value = this.value;
var results = index.search(value);
var entry, childs = suggestions.childNodes;
var i = 0,
len = results.length;
for (; i < len; i++) {
entry = childs[i];
if (!entry) {
entry = document.createElement("div");
suggestions.appendChild(entry);
}
entry.textContent = data[results[i]];
}
while (childs.length > len) {
suggestions.removeChild(childs[i])
}
}
}());
<!doctype html>
<html>
<head>
<title>FlexSearch Sample</title>
<script src="https://cdn.jsdelivr.net/gh/nextapps-de/flexsearch@master/dist/flexsearch.compact.js"></script>
</head>
<body>
<input type="text" id="userinput" placeholder="Search by keyword...">
<br></br>
<div id="suggestions"></div>
</body>
</html>
只是为 Daniele Ricci 的答案添加更多想法:有时,如果请求包含数字,“简单”匹配器仍然会出现误报(如果我们正在寻找严格的模式)。同样,我的方法可能过于经验主义,无法获得完全正确的行为,但这里有一个产生误报的示例。将 tokenize 更改为 strict 可以改善这种情况,但随后您会丢失部分单词的匹配。默认匹配器似乎是误报较少的一个......仍然有一些......:(
如果你输入C3、C0或4,你会得到它们...
var data = ["CA", "VIS-CD", "CATDIR-U"];
(function() {
const index = new FlexSearch.Index({
tokenize: "full",
matcher: "default",
cache: true
});
for (var i = 0; i < data.length; i++) {
index.add(i, data[i]);
}
var suggestions = document.getElementById("suggestions");
var userinput = document.getElementById("userinput");
userinput.addEventListener("input", show_results, true);
function show_results() {
var value = this.value;
var results = index.search(value);
var entry, childs = suggestions.childNodes;
var i = 0,
len = results.length;
for (; i < len; i++) {
entry = childs[i];
if (!entry) {
entry = document.createElement("div");
suggestions.appendChild(entry);
}
entry.textContent = data[results[i]];
}
while (childs.length > len) {
suggestions.removeChild(childs[i])
}
}
}());
<!doctype html>
<html>
<head>
<title>FlexSearch Sample</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/FlexSearch/0.7.31/flexsearch.compact.min.js"></script>
</head>
<body>
<input type="text" id="userinput" placeholder="Search by keyword...">
<br></br>
<div id="suggestions"></div>
</body>
</html>