问题描述 投票:0回答:1

  1. <table> <tr> <td> 1 </td> </tr> <tr> <td> 2 </td> </tr> </table>

tents转换为
[table, [tr, [td, [1]]], [tr, [td, [2]]]]

将此列表连接到Myers差异算法以找到差异。

当我获得差异时,就HTML而言,我没有得到最佳的差异(编辑数量最少)。

编码读取HTML并创建列表。


    import htmlparser from 'htmlparser2'; import {diffHierarchical} from './beta-myers-diff.js';; /** * Converts HTML string to a hierarchical array structure * @param {string} html - The HTML string to parse * @returns {Array} - A hierarchical array representation of the HTML */ export function htmlToHierarchicalArray(html) { // Parse HTML to DOM using htmlparser2 let dom = null; const handler = new htmlparser.DomHandler((error, nodes) => { if(error) { console.error('Error parsing HTML:', error); return; } // Find root element(s) const tags = nodes.filter(node => node.type === 'tag'); if(tags.length > 0) { dom = tags[0]; // Take the first tag as the root (usually there's only one root) } else if(nodes.length > 0) { dom = nodes[0]; // Take the first node if no tags } }); const parser = new htmlparser.Parser(handler, { decodeEntities: false }); parser.write(html); parser.end(); // Convert DOM to hierarchical array structure if (!dom) { return []; // Return empty array if parsing failed } // Convert the single root element to our hierarchical structure return nodeToHierarchicalArray(dom); } /** * Recursive function to convert DOM nodes to hierarchical array * @param {Array|Object} nodes - DOM node or array of nodes * @returns {Array} - Hierarchical array representation */ function domToHierarchicalArray(nodes) { // Handle single node if(!Array.isArray(nodes)) { return nodeToHierarchicalArray(nodes); } // Handle array of nodes return nodes.map(node => nodeToHierarchicalArray(node)); } /** * Convert a single DOM node to hierarchical array * @param {Object} node - DOM node * @returns {Array} - Hierarchical array representation */ function nodeToHierarchicalArray(node) { // Handle text node if(node.type === 'text') { const text = node.data.trim(); if(text === '') { return null; // Skip empty text nodes } // Return text node as a single-element array to maintain consistency return [node]; } // Handle tag node if(node.type === 'tag') { // Create array with the complete node object as first element const result = [node]; // Process children and add them to the array if(node.children && node.children.length > 0) { const childArrays = node.children .map(child => nodeToHierarchicalArray(child)) .filter(item => item !== null); // Filter out null items (empty text) if(childArrays.length > 0) { result.push(...childArrays); } } return result; } // Other node types (comment, directive, etc.) - skip return null; } /** * Test function for the table row swapping example */ function testTableRowSwapping() { const oldHtml = `<table> <tr> <td> 1 </td> </tr> <tr> <td> 2 </td> </tr> </table>`; const newHtml = `<table> <tr> <td> 2 </td> </tr> <tr> <td> 1 </td> </tr> </table>`; // Parse HTML to hierarchical arrays const oldArray = htmlToHierarchicalArray(oldHtml); const newArray = htmlToHierarchicalArray(newHtml); const editScript = diffHierarchical(oldArray, newArray); } testTableRowSwapping();

Myersdiff代码分散了创建的列表:


export class Keep { constructor(line, isSubtree = false) { this.type = 'Keep'; this.line = line; this.isSubtree = isSubtree; } } export class Insert { constructor(line, isSubtree = false) { this.type = 'Insert'; this.line = line; this.isSubtree = isSubtree; } } export class Remove { constructor(line, isSubtree = false) { this.type = 'Remove'; this.line = line; this.isSubtree = isSubtree; } } // Represents a point on the frontier with its history export class Frontier { constructor(x, history) { this.x = x; // x-coordinate in edit graph this.history = history; // sequence of operations to reach this point } } /** * Compare two node objects for equality * This is a critical function that determines when two nodes are considered "the same" */ function areNodesEqual(nodeA, nodeB) { // If both are text nodes, compare their trimmed content if (nodeA.type === 'text' && nodeB.type === 'text') { return nodeA.data.trim() === nodeB.data.trim(); } // If both are tags, compare tag names if (nodeA.type === 'tag' && nodeB.type === 'tag') { return nodeA.name === nodeB.name; } // Different types are never equal return false; } /** * Compare two subtrees for structural equality */ function areSubtreesEqual(subtreeA, subtreeB) { // Both must be arrays if (!Array.isArray(subtreeA) || !Array.isArray(subtreeB)) { return false; } // Must have the same number of elements if (subtreeA.length !== subtreeB.length) { return false; } // Root nodes must be equal if (!areNodesEqual(subtreeA[0], subtreeB[0])) { return false; } // Compare all children recursively for (let i = 1; i < subtreeA.length; i++) { if (!areSubtreesEqual(subtreeA[i], subtreeB[i])) { return false; } } return true; } /** * Hierarchical Myers diff algorithm * @param {Array} old - Old hierarchical array * @param {Array} current - New hierarchical array * @returns {Array} - Edit script */ function hierarchicalMyersDiff(old, current) { // Base case - if either is empty if (old.length === 0) { return current.map(item => new Insert(item, true)); } if (current.length === 0) { return old.map(item => new Remove(item, true)); } // Compare root nodes of the arrays const oldRoot = old[0]; const currentRoot = current[0]; // If the root nodes are equal, we have a potential match if (areNodesEqual(oldRoot, currentRoot)) { // First, check if these are leaf nodes or if their subtrees are equal if (old.length === 1 && current.length === 1) { // Leaf nodes - just keep return [new Keep(currentRoot)]; } // If both have exactly the same structure, keep the entire subtree if (areSubtreesEqual(old, current)) { return [new Keep(current, true)]; } // The roots match but the subtrees differ - recursively diff the children const result = [new Keep(currentRoot)]; // Extract children (everything except the root node) const oldChildren = old.slice(1); const currentChildren = current.slice(1); // Apply standard Myers diff to the children arrays const childDiff = myersDiff(oldChildren, currentChildren); result.push(...childDiff); return result; } // Root nodes are different - try standard Myers diff on the top level return myersDiff(old, current); } /** * Standard Myers diff algorithm for arrays * This is similar to the original myersDiff but adapted for our hierarchical arrays */ function myersDiff(old, current) { // Initialize the frontier with starting point const frontier = { 1: new Frontier(0, []) }; // Convert from 1-based to 0-based indexing function one(idx) { return idx - 1; } const aMax = old.length; // horizontal size of edit graph const bMax = current.length; // vertical size of edit graph // Main loop: try increasing numbers of edits for (let d = 0; d <= aMax + bMax + 1; d++) { // For each value of D, try all possible diagonals for (let k = -d; k <= d; k += 2) { // Determine whether to go down or right const goDown = (k === -d || (k !== d && frontier[k - 1].x < frontier[k + 1].x)); // Find starting point for this iteration let oldX, history; if (goDown) { ({ x: oldX, history } = frontier[k + 1]); var x = oldX; } else { ({ x: oldX, history } = frontier[k - 1]); var x = oldX + 1; } // Clone history to avoid modifying previous paths history = [...history]; let y = x - k; // Calculate y from x and k // Record the edit we made to get here if (y >= 1 && y <= bMax && goDown) { // Insert from new sequence when going down const currentItem = current[one(y)]; // Check if this is a subtree (an array) or a leaf node const isSubtree = Array.isArray(currentItem) && currentItem.length > 0; if (isSubtree) { // Add the entire subtree as a single insert operation history.push(new Insert(currentItem, true)); } else { history.push(new Insert(currentItem)); } } else if (x >= 1 && x <= aMax) { // Remove from old sequence when going right const oldItem = old[one(x)]; // Check if this is a subtree (an array) or a leaf node const isSubtree = Array.isArray(oldItem) && oldItem.length > 0; if (isSubtree) { // Add the entire subtree as a single remove operation history.push(new Remove(oldItem, true)); } else { history.push(new Remove(oldItem)); } } // Follow the snake: match diagonal moves as far as possible while (x < aMax && y < bMax) { const oldItem = old[one(x + 1)]; const currentItem = current[one(y + 1)]; // Both items must be arrays (subtrees) to compare if (Array.isArray(oldItem) && Array.isArray(currentItem)) { // Check if the subtrees are equal if (areSubtreesEqual(oldItem, currentItem)) { x += 1; y += 1; history.push(new Keep(currentItem, true)); } else { // Check if just the root nodes are equal if (oldItem.length > 0 && currentItem.length > 0 && areNodesEqual(oldItem[0], currentItem[0])) { x += 1; y += 1; // Add a Keep for the root node, then recursively diff the children history.push(new Keep(currentItem[0])); // Apply hierarchical diff to the children const oldChildren = oldItem.slice(1); const currentChildren = currentItem.slice(1); if (oldChildren.length > 0 || currentChildren.length > 0) { const childDiff = hierarchicalMyersDiff(...oldChildren, ...currentChildren); history.push(...childDiff); } } else { break; // No match, end of snake } } } else { break; // Different types, end of snake } } // Check if we've reached the end if (x >= aMax && y >= bMax) { return history; // Found solution } else { // Save this point in the frontier frontier[k] = new Frontier(x, history); } } } // If no solution found with the hierarchical approach, fallback to treating each element individually // This should be rare but ensures we always produce a result const fallbackResult = []; // Remove all old elements for (let i = 0; i < old.length; i++) { fallbackResult.push(new Remove(old[i])); } // Insert all new elements for (let i = 0; i < current.length; i++) { fallbackResult.push(new Insert(current[i])); } return fallbackResult; } /** * Process the edit script to expand subtree operations into individual node operations * This is necessary to integrate with the existing system */ export function expandEditScript(editScript) { const expanded = []; for (const operation of editScript) { if (operation.isSubtree) { // Convert subtree operation to individual node operations const node = operation.line[0]; // Root node const children = operation.line.slice(1); // Child subtrees // Add operation for the root node if (operation.type === 'Keep') { expanded.push(new Keep(node)); } else if (operation.type === 'Insert') { expanded.push(new Insert(node)); } else if (operation.type === 'Remove') { expanded.push(new Remove(node)); } // Recursively process children for (const child of children) { const childOperations = expandSubtreeOperation(child, operation.type); expanded.push(...childOperations); } } else { // Single node operation - add as is expanded.push(operation); } } return expanded; } /** * Helper function to expand a single subtree operation into individual node operations */ function expandSubtreeOperation(subtree, operationType) { const result = []; if (!Array.isArray(subtree)) { // Not a subtree, just a single node if (operationType === 'Keep') { result.push(new Keep(subtree)); } else if (operationType === 'Insert') { result.push(new Insert(subtree)); } else if (operationType === 'Remove') { result.push(new Remove(subtree)); } return result; } // Process the root node const rootNode = subtree[0]; if (operationType === 'Keep') { result.push(new Keep(rootNode)); } else if (operationType === 'Insert') { result.push(new Insert(rootNode)); } else if (operationType === 'Remove') { result.push(new Remove(rootNode)); } // Process all children recursively for (let i = 1; i < subtree.length; i++) { const childOperations = expandSubtreeOperation(subtree[i], operationType); result.push(...childOperations); } return result; } /** * Main entry point - applies hierarchical Myers diff and returns expanded edit script */ export function diffHierarchical(old, current) { const editScript = hierarchicalMyersDiff(old, current); return expandEditScript(editScript); }

在HTML字符串上运行Myers差异后的期望:

戈德:
     <table>
        <tr>
            <td>
                1
            </td>
        </tr>
        <tr>
            <td>
                2
            </td>
        </tr>
    </table>
new:
<table> <tr> <td> 2 </td> </tr> <tr> <td> 1 </td> </tr> </table>

它返回具有2个编辑的差异,该差异为1个,为1个,插入1个。相反,它返回4个编辑,其中删除了1个,插入了2个,插入了1个,插入了1个,并删除了2个。我该如何解决? sorry共享如此长的代码,但这是我在这个问题中讨论的用例的最小可重复示例。代码的第一部分只是读取HTML并将其编码为列表。第二部分是在第一部分生成的序列上运行的Myers diff实现。

在主题3中,请尝试修复JavaScript

import htmlparser from 'htmlparser2';
import { diffHierarchical } from './beta-myers-diff.js';

/**
 * Converts HTML string to a hierarchical array structure
 * @param {string} html - The HTML string to parse
 * @returns {Array} - A hierarchical array representation of the HTML
 */
export function htmlToHierarchicalArray(html) {
  let dom = null;

  const handler = new htmlparser.DomHandler((error, nodes) => {
    if (error) {
      console.error('Error parsing HTML:', error);
      return;
    }

    dom = nodes;
  });

  const parser = new htmlparser.Parser(handler, { decodeEntities: false });
  parser.write(html);
  parser.end();

  if (!dom) {
    return [];
  }

  return domToHierarchicalArray(dom);
}

/**
 * Recursive function to convert DOM nodes to hierarchical array
 * @param {Array|Object} nodes - DOM node or array of nodes
 * @returns {Array} - Hierarchical array representation
 */
function domToHierarchicalArray(nodes) {
  if (!Array.isArray(nodes)) {
    return nodeToHierarchicalArray(nodes);
  }

  return nodes.map(node => nodeToHierarchicalArray(node)).filter(item => item !== null);
}

/**
 * Convert a single DOM node to hierarchical array
 * @param {Object} node - DOM node
 * @returns {Array} - Hierarchical array representation
 */
function nodeToHierarchicalArray(node) {
  if (node.type === 'text') {
    const text = node.data.trim();
    return text === '' ? null : text;
  }

  if (node.type === 'tag') {
    const result = [node.name];
    if (node.attribs && Object.keys(node.attribs).length > 0) {
      result.push(node.attribs);
    }
    if (node.children && node.children.length > 0) {
      const childArrays = node.children.map(child => nodeToHierarchicalArray(child)).filter(item => item !== null);
      if (childArrays.length > 0) {
        result.push(childArrays);
      }
    }
    return result;
  }

  return null;
}

/**
 * Test function for the table row swapping example
 */
function testTableRowSwapping() {
  const oldHtml = `<table>
      <tr>
          <td>1</td>
      </tr>
      <tr>
          <td>2</td>
      </tr>
  </table>`;

  const newHtml = `<table>
      <tr>
          <td>2</td>
      </tr>
      <tr>
          <td>1</td>
      </tr>
  </table>`;

  const oldArray = htmlToHierarchicalArray(oldHtml);
  const newArray = htmlToHierarchicalArray(newHtml);

  const editScript = diffHierarchical(oldArray, newArray);
  console.log(editScript);
}

testTableRowSwapping();

html algorithm data-structures diff graph-theory
1个回答
0
投票

最新问题
© www.soinside.com 2019 - 2025. All rights reserved.