有没有更好的DOM抓词方法

问题描述 投票:0回答:0

现在,我正在开发一个简单的 chrome 扩展。在这个阶段,我正在尝试:

  1. 基本上获取 DOM 上的所有单个单词(作为数组返回)
  2. 随机选择其中的10个
  3. 检查以确保它们实际上是有效的英文单词(在后端处理)
  4. 遍历 DOM 找到那些词,然后在 DOM 上围绕该词包裹一个跨度,它将包含我自己的样式(“selected-word”类)。

这是我的内容脚本:

// Function to show a container above the clicked word

// Get all text nodes in the DOM
function getTextNodes(node) {
  var nodes = [];
  if (node.nodeType == Node.TEXT_NODE) {
    nodes.push(node);
  } else {
    var children = node.childNodes;
    for (var i = 0, len = children.length; i < len; ++i) {
      nodes.push.apply(nodes, getTextNodes(children[i]));
    }
  }
  return nodes;
}

// Get all text content from the DOM, excluding anchor tags and links
function getTextContent() {
  var textNodes = getTextNodes(document.body);
  var textContent = "";
  for (var i = 0, len = textNodes.length; i < len; ++i) {
    var parent = textNodes[i].parentNode;
    if (
      parent.tagName != "A" &&
      parent.tagName != "LINK" &&
      parent.tagName != "SCRIPT"
    ) {
      textContent += textNodes[i].nodeValue.trim() + " ";
    }
  }
  return textContent.trim();
}

async function replaceInDOM(randomWords) {
  for (const word of randomWords) {
    const elements = document.querySelectorAll("*");
    let firstInstance;
    elements.forEach((element) => {
      if (element.innerHTML.includes(word)) {
        firstInstance = element;
        return;
      }
    });
    if (firstInstance) {
      const newHTML = firstInstance.innerHTML.replace(
        new RegExp(`\\b${word}\\b`, "g"),
        `<span class="selected-word">${word}</span>`
      );
      firstInstance.innerHTML = newHTML;
    }
  }
}

// Get a random selection of words from the text content
async function getRandomWords(textContent, numWords) {
  var words = textContent.split(/\s+/);
  var randomWords = [];
  var finalWords = [];
  for (var i = 0; i < numWords; ++i) {
    var index;
    var word;
    do {
      index = Math.floor(Math.random() * words.length);
      word = words[index].replace(/[^a-zA-Z]/g, "");
    } while (!word);
    randomWords.push(word);
  }
  chrome.runtime.sendMessage(
    {
      action: "cleanRandomWords",
      url: "http://localhost:8000/clean/common",
      randomWords: randomWords,
    },
    async (response) => {
      finalWords = response.validWords;
      alert(finalWords.length);
      await replaceInDOM(finalWords);
    }
  );
  return finalWords;
}

(async () => {
  //
  var textContent = getTextContent();
  var randomWords = await getRandomWords(textContent, 10);
})();

我遇到的问题是:

  1. 有时一个词被拆分(即完整的词是“对冲”,但由于某种原因选择了“他”,并在其周围包裹了跨度)
  2. 并非我返回的所有随机选择的单词都在 DOM 中找到。例如,我可以返回 10 个单词,但只有 5-6 个单词被 span 包围。

有没有人对如何改进我的功能有什么建议或想法,以便我更好地解决这些问题?

提前致谢!

javascript dom google-chrome-extension browser
© www.soinside.com 2019 - 2024. All rights reserved.