<table>
<tr>
<td>
1
</td>
</tr>
<tr>
<td>
2
</td>
</tr>
</table>
[table, [tr, [td, [1]]], [tr, [td, [2]]]]
当我获得差异时,就HTML而言,我没有得到最佳的差异(编辑数量最少)。
编码读取HTML并创建列表。
import htmlparser from 'htmlparser2';
import {diffHierarchical} from './beta-myers-diff.js';;
/**
* Converts HTML string to a hierarchical array structure
* @param {string} html - The HTML string to parse
* @returns {Array} - A hierarchical array representation of the HTML
*/
export function htmlToHierarchicalArray(html) {
// Parse HTML to DOM using htmlparser2
let dom = null;
const handler = new htmlparser.DomHandler((error, nodes) => {
if(error) {
console.error('Error parsing HTML:', error);
return;
}
// Find root element(s)
const tags = nodes.filter(node => node.type === 'tag');
if(tags.length > 0) {
dom = tags[0]; // Take the first tag as the root (usually there's only one root)
} else if(nodes.length > 0) {
dom = nodes[0]; // Take the first node if no tags
}
});
const parser = new htmlparser.Parser(handler, { decodeEntities: false });
parser.write(html);
parser.end();
// Convert DOM to hierarchical array structure
if (!dom) {
return []; // Return empty array if parsing failed
}
// Convert the single root element to our hierarchical structure
return nodeToHierarchicalArray(dom);
}
/**
* Recursive function to convert DOM nodes to hierarchical array
* @param {Array|Object} nodes - DOM node or array of nodes
* @returns {Array} - Hierarchical array representation
*/
function domToHierarchicalArray(nodes) {
// Handle single node
if(!Array.isArray(nodes)) {
return nodeToHierarchicalArray(nodes);
}
// Handle array of nodes
return nodes.map(node => nodeToHierarchicalArray(node));
}
/**
* Convert a single DOM node to hierarchical array
* @param {Object} node - DOM node
* @returns {Array} - Hierarchical array representation
*/
function nodeToHierarchicalArray(node) {
// Handle text node
if(node.type === 'text') {
const text = node.data.trim();
if(text === '') {
return null; // Skip empty text nodes
}
// Return text node as a single-element array to maintain consistency
return [node];
}
// Handle tag node
if(node.type === 'tag') {
// Create array with the complete node object as first element
const result = [node];
// Process children and add them to the array
if(node.children && node.children.length > 0) {
const childArrays = node.children
.map(child => nodeToHierarchicalArray(child))
.filter(item => item !== null); // Filter out null items (empty text)
if(childArrays.length > 0) {
result.push(...childArrays);
}
}
return result;
}
// Other node types (comment, directive, etc.) - skip
return null;
}
/**
* Test function for the table row swapping example
*/
function testTableRowSwapping() {
const oldHtml = `<table>
<tr>
<td>
1
</td>
</tr>
<tr>
<td>
2
</td>
</tr>
</table>`;
const newHtml = `<table>
<tr>
<td>
2
</td>
</tr>
<tr>
<td>
1
</td>
</tr>
</table>`;
// Parse HTML to hierarchical arrays
const oldArray = htmlToHierarchicalArray(oldHtml);
const newArray = htmlToHierarchicalArray(newHtml);
const editScript = diffHierarchical(oldArray, newArray);
}
testTableRowSwapping();
Myersdiff代码分散了创建的列表:
export class Keep {
constructor(line, isSubtree = false) {
this.type = 'Keep';
this.line = line;
this.isSubtree = isSubtree;
}
}
export class Insert {
constructor(line, isSubtree = false) {
this.type = 'Insert';
this.line = line;
this.isSubtree = isSubtree;
}
}
export class Remove {
constructor(line, isSubtree = false) {
this.type = 'Remove';
this.line = line;
this.isSubtree = isSubtree;
}
}
// Represents a point on the frontier with its history
export class Frontier {
constructor(x, history) {
this.x = x; // x-coordinate in edit graph
this.history = history; // sequence of operations to reach this point
}
}
/**
* Compare two node objects for equality
* This is a critical function that determines when two nodes are considered "the same"
*/
function areNodesEqual(nodeA, nodeB) {
// If both are text nodes, compare their trimmed content
if (nodeA.type === 'text' && nodeB.type === 'text') {
return nodeA.data.trim() === nodeB.data.trim();
}
// If both are tags, compare tag names
if (nodeA.type === 'tag' && nodeB.type === 'tag') {
return nodeA.name === nodeB.name;
}
// Different types are never equal
return false;
}
/**
* Compare two subtrees for structural equality
*/
function areSubtreesEqual(subtreeA, subtreeB) {
// Both must be arrays
if (!Array.isArray(subtreeA) || !Array.isArray(subtreeB)) {
return false;
}
// Must have the same number of elements
if (subtreeA.length !== subtreeB.length) {
return false;
}
// Root nodes must be equal
if (!areNodesEqual(subtreeA[0], subtreeB[0])) {
return false;
}
// Compare all children recursively
for (let i = 1; i < subtreeA.length; i++) {
if (!areSubtreesEqual(subtreeA[i], subtreeB[i])) {
return false;
}
}
return true;
}
/**
* Hierarchical Myers diff algorithm
* @param {Array} old - Old hierarchical array
* @param {Array} current - New hierarchical array
* @returns {Array} - Edit script
*/
function hierarchicalMyersDiff(old, current) {
// Base case - if either is empty
if (old.length === 0) {
return current.map(item => new Insert(item, true));
}
if (current.length === 0) {
return old.map(item => new Remove(item, true));
}
// Compare root nodes of the arrays
const oldRoot = old[0];
const currentRoot = current[0];
// If the root nodes are equal, we have a potential match
if (areNodesEqual(oldRoot, currentRoot)) {
// First, check if these are leaf nodes or if their subtrees are equal
if (old.length === 1 && current.length === 1) {
// Leaf nodes - just keep
return [new Keep(currentRoot)];
}
// If both have exactly the same structure, keep the entire subtree
if (areSubtreesEqual(old, current)) {
return [new Keep(current, true)];
}
// The roots match but the subtrees differ - recursively diff the children
const result = [new Keep(currentRoot)];
// Extract children (everything except the root node)
const oldChildren = old.slice(1);
const currentChildren = current.slice(1);
// Apply standard Myers diff to the children arrays
const childDiff = myersDiff(oldChildren, currentChildren);
result.push(...childDiff);
return result;
}
// Root nodes are different - try standard Myers diff on the top level
return myersDiff(old, current);
}
/**
* Standard Myers diff algorithm for arrays
* This is similar to the original myersDiff but adapted for our hierarchical arrays
*/
function myersDiff(old, current) {
// Initialize the frontier with starting point
const frontier = { 1: new Frontier(0, []) };
// Convert from 1-based to 0-based indexing
function one(idx) {
return idx - 1;
}
const aMax = old.length; // horizontal size of edit graph
const bMax = current.length; // vertical size of edit graph
// Main loop: try increasing numbers of edits
for (let d = 0; d <= aMax + bMax + 1; d++) {
// For each value of D, try all possible diagonals
for (let k = -d; k <= d; k += 2) {
// Determine whether to go down or right
const goDown = (k === -d || (k !== d && frontier[k - 1].x < frontier[k + 1].x));
// Find starting point for this iteration
let oldX, history;
if (goDown) {
({ x: oldX, history } = frontier[k + 1]);
var x = oldX;
} else {
({ x: oldX, history } = frontier[k - 1]);
var x = oldX + 1;
}
// Clone history to avoid modifying previous paths
history = [...history];
let y = x - k; // Calculate y from x and k
// Record the edit we made to get here
if (y >= 1 && y <= bMax && goDown) {
// Insert from new sequence when going down
const currentItem = current[one(y)];
// Check if this is a subtree (an array) or a leaf node
const isSubtree = Array.isArray(currentItem) && currentItem.length > 0;
if (isSubtree) {
// Add the entire subtree as a single insert operation
history.push(new Insert(currentItem, true));
} else {
history.push(new Insert(currentItem));
}
} else if (x >= 1 && x <= aMax) {
// Remove from old sequence when going right
const oldItem = old[one(x)];
// Check if this is a subtree (an array) or a leaf node
const isSubtree = Array.isArray(oldItem) && oldItem.length > 0;
if (isSubtree) {
// Add the entire subtree as a single remove operation
history.push(new Remove(oldItem, true));
} else {
history.push(new Remove(oldItem));
}
}
// Follow the snake: match diagonal moves as far as possible
while (x < aMax && y < bMax) {
const oldItem = old[one(x + 1)];
const currentItem = current[one(y + 1)];
// Both items must be arrays (subtrees) to compare
if (Array.isArray(oldItem) && Array.isArray(currentItem)) {
// Check if the subtrees are equal
if (areSubtreesEqual(oldItem, currentItem)) {
x += 1;
y += 1;
history.push(new Keep(currentItem, true));
} else {
// Check if just the root nodes are equal
if (oldItem.length > 0 && currentItem.length > 0 &&
areNodesEqual(oldItem[0], currentItem[0])) {
x += 1;
y += 1;
// Add a Keep for the root node, then recursively diff the children
history.push(new Keep(currentItem[0]));
// Apply hierarchical diff to the children
const oldChildren = oldItem.slice(1);
const currentChildren = currentItem.slice(1);
if (oldChildren.length > 0 || currentChildren.length > 0) {
const childDiff = hierarchicalMyersDiff(...oldChildren, ...currentChildren);
history.push(...childDiff);
}
} else {
break; // No match, end of snake
}
}
} else {
break; // Different types, end of snake
}
}
// Check if we've reached the end
if (x >= aMax && y >= bMax) {
return history; // Found solution
} else {
// Save this point in the frontier
frontier[k] = new Frontier(x, history);
}
}
}
// If no solution found with the hierarchical approach, fallback to treating each element individually
// This should be rare but ensures we always produce a result
const fallbackResult = [];
// Remove all old elements
for (let i = 0; i < old.length; i++) {
fallbackResult.push(new Remove(old[i]));
}
// Insert all new elements
for (let i = 0; i < current.length; i++) {
fallbackResult.push(new Insert(current[i]));
}
return fallbackResult;
}
/**
* Process the edit script to expand subtree operations into individual node operations
* This is necessary to integrate with the existing system
*/
export function expandEditScript(editScript) {
const expanded = [];
for (const operation of editScript) {
if (operation.isSubtree) {
// Convert subtree operation to individual node operations
const node = operation.line[0]; // Root node
const children = operation.line.slice(1); // Child subtrees
// Add operation for the root node
if (operation.type === 'Keep') {
expanded.push(new Keep(node));
} else if (operation.type === 'Insert') {
expanded.push(new Insert(node));
} else if (operation.type === 'Remove') {
expanded.push(new Remove(node));
}
// Recursively process children
for (const child of children) {
const childOperations = expandSubtreeOperation(child, operation.type);
expanded.push(...childOperations);
}
} else {
// Single node operation - add as is
expanded.push(operation);
}
}
return expanded;
}
/**
* Helper function to expand a single subtree operation into individual node operations
*/
function expandSubtreeOperation(subtree, operationType) {
const result = [];
if (!Array.isArray(subtree)) {
// Not a subtree, just a single node
if (operationType === 'Keep') {
result.push(new Keep(subtree));
} else if (operationType === 'Insert') {
result.push(new Insert(subtree));
} else if (operationType === 'Remove') {
result.push(new Remove(subtree));
}
return result;
}
// Process the root node
const rootNode = subtree[0];
if (operationType === 'Keep') {
result.push(new Keep(rootNode));
} else if (operationType === 'Insert') {
result.push(new Insert(rootNode));
} else if (operationType === 'Remove') {
result.push(new Remove(rootNode));
}
// Process all children recursively
for (let i = 1; i < subtree.length; i++) {
const childOperations = expandSubtreeOperation(subtree[i], operationType);
result.push(...childOperations);
}
return result;
}
/**
* Main entry point - applies hierarchical Myers diff and returns expanded edit script
*/
export function diffHierarchical(old, current) {
const editScript = hierarchicalMyersDiff(old, current);
return expandEditScript(editScript);
}
在HTML字符串上运行Myers差异后的期望:
<table>
<tr>
<td>
1
</td>
</tr>
<tr>
<td>
2
</td>
</tr>
</table>
new: <table>
<tr>
<td>
2
</td>
</tr>
<tr>
<td>
1
</td>
</tr>
</table>
它返回具有2个编辑的差异,该差异为1个,为1个,插入1个。相反,它返回4个编辑,其中删除了1个,插入了2个,插入了1个,插入了1个,并删除了2个。我该如何解决? sorry共享如此长的代码,但这是我在这个问题中讨论的用例的最小可重复示例。代码的第一部分只是读取HTML并将其编码为列表。第二部分是在第一部分生成的序列上运行的Myers diff实现。
在主题3中,请尝试修复JavaScript
import htmlparser from 'htmlparser2';
import { diffHierarchical } from './beta-myers-diff.js';
/**
* Converts HTML string to a hierarchical array structure
* @param {string} html - The HTML string to parse
* @returns {Array} - A hierarchical array representation of the HTML
*/
export function htmlToHierarchicalArray(html) {
let dom = null;
const handler = new htmlparser.DomHandler((error, nodes) => {
if (error) {
console.error('Error parsing HTML:', error);
return;
}
dom = nodes;
});
const parser = new htmlparser.Parser(handler, { decodeEntities: false });
parser.write(html);
parser.end();
if (!dom) {
return [];
}
return domToHierarchicalArray(dom);
}
/**
* Recursive function to convert DOM nodes to hierarchical array
* @param {Array|Object} nodes - DOM node or array of nodes
* @returns {Array} - Hierarchical array representation
*/
function domToHierarchicalArray(nodes) {
if (!Array.isArray(nodes)) {
return nodeToHierarchicalArray(nodes);
}
return nodes.map(node => nodeToHierarchicalArray(node)).filter(item => item !== null);
}
/**
* Convert a single DOM node to hierarchical array
* @param {Object} node - DOM node
* @returns {Array} - Hierarchical array representation
*/
function nodeToHierarchicalArray(node) {
if (node.type === 'text') {
const text = node.data.trim();
return text === '' ? null : text;
}
if (node.type === 'tag') {
const result = [node.name];
if (node.attribs && Object.keys(node.attribs).length > 0) {
result.push(node.attribs);
}
if (node.children && node.children.length > 0) {
const childArrays = node.children.map(child => nodeToHierarchicalArray(child)).filter(item => item !== null);
if (childArrays.length > 0) {
result.push(childArrays);
}
}
return result;
}
return null;
}
/**
* Test function for the table row swapping example
*/
function testTableRowSwapping() {
const oldHtml = `<table>
<tr>
<td>1</td>
</tr>
<tr>
<td>2</td>
</tr>
</table>`;
const newHtml = `<table>
<tr>
<td>2</td>
</tr>
<tr>
<td>1</td>
</tr>
</table>`;
const oldArray = htmlToHierarchicalArray(oldHtml);
const newArray = htmlToHierarchicalArray(newHtml);
const editScript = diffHierarchical(oldArray, newArray);
console.log(editScript);
}
testTableRowSwapping();