我有一个大字符串(1000 个单词),我想将其与数组的所有元素进行比较,该数组也包含大字符串,用于所有 3 个或更多连续单词匹配。我已经用正则表达式实现了它但是得到了空白匹配数组。
较小文本的示例:
let textToCompare = "Hello there how are you doing with your life";
let textsToCompareWith= [
{ id:1, text:"Hope you are doing good with your life" },
{ id:2, text:"what are you doing with your life. hello there how are you" },
{ id:3, text:"hello there mate" }
];
预期产出:
[
{id:1, matchedText:["with your life"]},
{id:2, matchedText:["are you doing with your life","hello there how are you"]},
{id:3, matchedText:[]}
];
当前输出:
[
{id:1, matchedText:[]},
{id:2, matchedText:[]},
{id:3, matchedText:[]}
];
我的代码:
let regex = new RegExp("\\b" + textToCompare.split(" ").join("\\b.*\\b") + "\\b", "gi");
let output = textsToCompareWith.map(textObj => {
// Match against each element in the array
let matchedText = textObj?.text.match(regex);
console.log(matchedText);
return {
id: textObj.id,
matchedText: matchedText ? matchedText : [] // Return an empty array if no match is found
};
});
console.log(output);
你可以互相检查每个单词并留意最后一个单词。
const
compare = (w1, w2) => {
const
result = [],
ends = {};
for (let i = 0; i < w1.length; i++) {
for (let j = 0; j < w2.length; j++) {
if (w1[i] !== w2[j]) continue;
let k = 0;
while (i + k < w1.length && j + k < w2.length) {
if (w1[i + k] !== w2[j + k]) break;
k++;
}
if (k > 2 && !ends[j + k]) {
result.push(w2.slice(j, j + k).join(' '));
ends[j + k] = true;
}
}
}
return result;
},
lower = s => s.toLowerCase(),
textToCompare = "Hello there how are you doing with your life",
textsToCompareWith = [{ id: 1, text: "Hope you are doing good with your life" }, { id: 2, text: "what are you doing with your life. hello there how are you" }, { id: 3, text: "hello there mate" }],
words = textToCompare.match(/\w+/g).map(lower),
result = textsToCompareWith.map(({ id, text }) => ({
id,
matchedText: compare(words, text.match(/\w+/g).map(lower))
}));
console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }
一种略有不同的方法,避免使用单词。
const
compare = (w1, w2) => {
const
result = [],
skip = {};
for (let i = 0; i < w1.length; i++) {
for (let j = 0; j < w2.length; j++) {
if (skip[j] || w1[i] !== w2[j]) continue;
let k = 0;
while (i + k < w1.length && j + k < w2.length) {
if (w1[i + k] !== w2[j + k]) break;
k++;
}
if (k > 2) {
result.push(w2.slice(j, j + k).join(' '));
while (k--) skip[j + k] = true;
}
}
}
return result;
},
lower = s => s.toLowerCase(),
textToCompare = "Hello there how are you doing with your life",
textsToCompareWith = [{ id: 1, text: "Hope you are doing good with your life" }, { id: 2, text: "what are you doing with your life. hello there how are you" }, { id: 3, text: "hello there mate" }],
words = textToCompare.match(/\w+/g).map(lower),
result = textsToCompareWith.map(({ id, text }) => ({
id,
matchedText: compare(words, text.match(/\w+/g).map(lower))
}));
console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }
我创建了一个答案,只是为了我自己学习 JavaScript。把东西拼凑在一起,我想出了:
let textToCompare = "Hello there how are you doing with your life";
let words = textToCompare.split(/\s+/);
let x = words.length;
let textsToCompareWith= [
{ id:1, text:"Hope you are doing good with your life" },
{ id:2, text:"what are you doing with your life. hello there how are you" },
{ id:3, text:"hello there mate" }
];
let combos = [...chunks(words)];
combos.sort(function(a, b){return b.length - a.length});
console.log(textsToCompareWith.map(({ id, text }) => ({id, matchedText: FindMatches(text)})));
function* chunks(arr) {
for (let i = 0; i < x-2; i++) {
for (let j = i+3; j < x+1; j++) {
yield arr.slice(i,j).join(" ");
}
}
}
function FindMatches(s) {
var r = [];
for (let i = 0; i < combos.length; i++) {
re = new RegExp(`\\b${combos[i]}\\b`, 'i');
if (re.test(s)) {
r.push(combos[i]);
s = s.replace(re, ' ');
}
}
return r;
}
我很确定这段代码会有很多缺陷并且看起来很笨拙,但我的想法是根据它可以被空格分割的假设,将你的输入分割成 3 个以上单词的块。然后我尝试按长度对结果数组进行排序,这样我们就不会先找到较小的子串。
谁知道,也许这里的东西真的有用。