我有这个JavaScript数组:
let a = [
[0, "<p><strong>Lorem Ipsum</strong> is simply dummy text of "],
[1, "<strong>"],
[0, "the"],
[1, "</strong>"],
[0, " printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type "],
[-1,"and"],
[1, "test"],
[0, " scrambled it to make a type"],
[1, " added"],
[0, "</p>"],
[1, "<ul><li>test</li></ul>"]
];
我试图根据以下条件提取数组的组:
以上面数组的子数组为例:
[1, "<strong>"],
[0, "the"],
[1, "</strong>"]
这个子数组是一个组,条件是a[0] == 1
和a[1]
是HTML标记的开头。 a [1]包含<strong>
,它是任何有效HTML标记的开头,所以我想从开始标记开始直到结束标记。
如下所示是一组:
let group = [
{
[1,"<strong>"],
[0,"the"],
[1,"</strong>"]
},
{
[1,"<ul><li>test</li></ul>"]
}
];
我想根据以下条件提取组:
a[i][0] == 1
,a[i][1]
是有效HTML标记的开头a[i][0] == 0
,它在步骤1和3中的规则之前和之后。a[i][0] == 1
,a[i][1]
是有效HTML标记的结尾。这三个整个规则将包含一个组或一个JavaScript对象。
还可以有一种情况:
[1,"<ul><li>test</li></ul>"]
数组项包含整个组<ul><li>test</li></ul>
。这也应包含在最终结果数组中。
let a = [
[
0,
"<p><strong>Lorem Ipsum</strong> is simply dummy text of "
],
[
1,
"<strong>"
],
[
0,
"the"
],
[
1,
"</strong>"
],
[
0,
" printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type "
],
[-1,
"and"
],
[
1,
"test"
],
[
0,
" scrambled it to make a type"
],
[
1,
" added"
],
[
0,
"</p>"
],
[
1,
"<ul><li>test</li></ul>"
]
];
checkAndRemoveGroups(a, 1);
function checkAndRemoveGroups(arr, group) {
let htmlOpenRegex = /<([\w \d \s]+)([^<]+)([^<]+) *[^/?]>/g;
let groupArray = new Array();
let depth = 0;
//Iterate the array to find out groups and push the items
for (let i = 0; i < arr.length; i++) {
if (arr[i][0] == group && arr[i][1].match(htmlOpenRegex)) {
depth += 1;
groupArray.push({
Index: i,
Value: arr[i],
TagType: "Open"
});
}
}
console.log(groupArray);
}
您可以使用数组来打开和关闭标记,并检查其长度,如果需要更多标记来关闭顶部标记。
function getTags(string) {
var regex = /<(\/?[^>]+)>/g,
m,
result = [];
while ((m = regex.exec(string)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
result.push(m[1])
}
return result;
}
var array = [[0, "<p><strong>Lorem Ipsum</strong> is simply dummy text of "], [1, "<strong>"], [0, "the"], [1, "</strong>"], [0, " printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type "], [-1, "and"], [1, "test"], [0, " scrambled it to make a type"], [1, " added"], [0, "</p>"], [1, "<ul><li>test</li></ul>"]],
result = [],
nested = [],
tags,
i = 0;
while (i < array.length) {
if (array[i][0] === 1) {
tags = getTags(array[i][1]);
if (!tags.length) {
i++;
continue;
}
result.push([]); // new group found
while (i < array.length) {
tags.forEach(function (t) {
if (t.startsWith('/')) {
if (nested[nested.length - 1] === t.slice(1)) {
nested.length--;
}
return;
}
nested.push(t);
});
result[result.length - 1].push(array[i]);
if (!nested.length) {
break;
}
i++;
tags = getTags(array[i][1]);
}
}
i++;
}
console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }
我和斯科特在一起......我认为必须有更好的方法去做你想做的事。我知道你正试图从这个数组中解决问题,但是对于这个问题,你可能有一个完全不同的方法,你没有嵌套在子数组中的html。
- 编辑 - 我误解了你在找什么,所以我原来的回答并没有真正告诉你出了什么问题,我把它删除了。再看看这个。
这正是你想要收到的吗?如果你正在检查html正则表达式的每个元素,我不知道你将如何得到[0,"the"]
。并且每个元素都将在它自己的对象中,这似乎不是你想要的。
let group = [
{
[1,"<strong>"],
[0,"the"],
[1,"</strong>"]
},
{
[1,"<ul><li>test</li></ul>"]
}
];