我正在尝试使用 Apache POI 复制 XWPFDocument 的段落。由于 POI 无法在任意点插入预制段落,因此我阅读了大量答案,建议首先使用 insertNewParagraph() 插入一次性段落,然后使用 setParagraph( 将临时段落替换为我实际想要的段落) )。这变得更加复杂,因为 insertNewParagraph 不能只接受作为正文元素列表所需索引的输入(如 XWPFTable.addRow(row,pos) 的工作原理),并且必须向其传递 XmlCursor。
TestIn.docx 我创建为一个测试,包含 6 个段落 A、B、C、D、E、F。
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.xwpf.usermodel.IBodyElement;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.xmlbeans.XmlCursor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
public class ParagraphIssue
{
public void debugElement (IBodyElement elem, StringBuilder s, XWPFParagraph a, XWPFParagraph b, XWPFParagraph c, XWPFParagraph d, XWPFParagraph e, XWPFParagraph f,
XWPFParagraph t1, XWPFParagraph r1, XWPFParagraph t2, XWPFParagraph r2)
{
if (s.length () > 0) s.append (" ");
if (elem == a) s.append ("A");
else if (elem == b) s.append ("B");
else if (elem == c) s.append ("C");
else if (elem == d) s.append ("D");
else if (elem == e) s.append ("E");
else if (elem == f) s.append ("F");
else if (elem == t1) s.append ("T1");
else if (elem == r1) s.append ("R1");
else if (elem == t2) s.append ("T2");
else if (elem == r2) s.append ("R2");
else s.append ("U");
}
public void debug (XWPFDocument doc, XWPFParagraph a, XWPFParagraph b, XWPFParagraph c, XWPFParagraph d, XWPFParagraph e, XWPFParagraph f,
XWPFParagraph t1, XWPFParagraph r1, XWPFParagraph t2, XWPFParagraph r2)
{
StringBuilder s = new StringBuilder ();
for (IBodyElement elem : doc.getBodyElements ())
debugElement (elem, s, a, b, c, d, e, f, t1, r1, t2, r2);
System.out.println("Elements: " + s);
s = new StringBuilder ();
for (XWPFParagraph para : doc.getParagraphs ())
debugElement (para, s, a, b, c, d, e, f, t1, r1, t2, r2);
System.out.println("Paragraphs: " + s);
}
public void run (XWPFDocument doc, int insertionPoint)
{
XWPFParagraph paraA = doc.getParagraphs().get(0);
XWPFParagraph paraB = doc.getParagraphs().get(1);
XWPFParagraph paraC = doc.getParagraphs().get(2);
XWPFParagraph paraD = doc.getParagraphs().get(3);
XWPFParagraph paraE = doc.getParagraphs().get(4);
XWPFParagraph paraF = doc.getParagraphs().get(5);
System.out.println ("--- Document initial state ---");
debug (doc, paraA, paraB, paraC, paraD, paraE, paraF, null, null, null, null);
// Clone the first paragraph
XWPFParagraph cloneThis = (XWPFParagraph) doc.getBodyElements ().get (0);
XWPFParagraph clonedPara = new XWPFParagraph ((CTP) cloneThis.getCTP ().copy (), doc);
// Add new paragraph before the final paragraph
XWPFParagraph insertBeforePara = (XWPFParagraph) doc.getBodyElements ().get (insertionPoint);
XmlCursor cursor = insertBeforePara.getCTP ().newCursor ();
XWPFParagraph newPara = doc.insertNewParagraph (cursor);
newPara.insertNewRun (0).setText ("this should get replaced");
System.out.println ("--- Insert 1st temporary para before F ---");
debug (doc, paraA, paraB, paraC, paraD, paraE, paraF, newPara, clonedPara, null, null);
int newParaIndex = 0;
for (IBodyElement elem : doc.getBodyElements ())
{
if (elem == newPara)
break;
else if (elem.getElementType () == newPara.getElementType ())
newParaIndex++;
}
System.out.println ("1st temporary para is at index " + newParaIndex); // 5, as expected
// Now replace the added paragraph with the cloned one
doc.setParagraph (clonedPara, newParaIndex);
System.out.println ("--- Replace 1st temporary para ---");
debug (doc, paraA, paraB, paraC, paraD, paraE, paraF, newPara, clonedPara, null, null);
// Do exactly the same thing again to clone the second paragraph
XWPFParagraph cloneThis2 = (XWPFParagraph) doc.getBodyElements ().get (1);
XWPFParagraph clonedPara2 = new XWPFParagraph ((CTP) cloneThis2.getCTP ().copy (), doc);
XWPFParagraph insertBeforePara2 = (XWPFParagraph) doc.getBodyElements ().get (insertionPoint + 1);
XmlCursor cursor2 = insertBeforePara2.getCTP ().newCursor ();
XWPFParagraph newPara2 = doc.insertNewParagraph (cursor2);
newPara2.insertNewRun (0).setText ("this should get replaced too");
System.out.println ("--- Insert 2nd temporary para before F ---");
debug (doc, paraA, paraB, paraC, paraD, paraE, paraF, newPara, clonedPara, newPara2, clonedPara2);
int newParaIndex2 = 0;
for (IBodyElement elem : doc.getBodyElements ())
{
if (elem == newPara2)
break;
else if (elem.getElementType () == newPara2.getElementType ())
newParaIndex2++;
}
System.out.println ("2nd temporary para is at index " + newParaIndex2);
doc.setParagraph (clonedPara2, newParaIndex2); // So then this replaces the wrong paragraph
System.out.println ("--- Replace 2nd temporary para ---");
debug (doc, paraA, paraB, paraC, paraD, paraE, paraF, newPara, clonedPara, newPara2, clonedPara2);
}
public final static void main (final String [] args)
{
try (FileInputStream in = new FileInputStream ("W:\\TestIn.docx"))
{
XWPFDocument doc = new XWPFDocument (in);
new ParagraphIssue ().run (doc, 5);
try (FileOutputStream out = new FileOutputStream ("W:\\TestOut.docx"))
{
doc.write (out);
}
}
catch (Exception e)
{
e.printStackTrace ();
}
}
}
很多是调试代码,因此我可以获得准确显示正在发生的情况的输出:
--- 记录初始状态---
元素:A B C D E F
段落:A B C D E F
--- 在 F 之前插入第一个临时段落 ---
元素:A B C D E T1 F
段落:A B C D E T1 F
第一个临时段落位于索引 5 - 到目前为止完美
--- 替换第一个临时段落 ---
元素:A B C D E T1 F
段落:A B C D E R1 F - 段落列表有替换段落,但元素列表仍然有临时段落
--- 在 F 之前插入第二个临时段落 ---
元素:A B C D E T1 T2 F
段落:T2 A B C D E R1 F -现在第二个临时段落已经位于列表的前面;它位于元素列表中的正确位置
第二个临时段落位于索引 6
--- 替换第二个临时段落 ---
元素:A B C D E T1 T2 F
段落:T2 A B C D E R2 F - 元素列表仍包含临时段落;段落列表第二段位置错误
令人惊讶的是,保存的 Word 文档实际上看起来是正确的,但我不明白当两个列表看起来都不正确时怎么办。
就查找插入位置而言,到目前为止我可以使用
int newParaIndex = doc.getPosOfParagraph (newPara);
。当您将表添加到组合中时,就会出现问题。现在,我编辑了源文档并插入了一个表格,因此元素列表现在看起来像 A、B、(表格)、C、D、E、F 并将 insertPoint 相应更改为 6。
现在您不能再使用 doc.getPosOfParagraph (),因为它返回元素列表(包括表格)中的段落索引,但 setParagraph 需要段落列表(不包括表格)中的段落索引。使用 doc.getParagraphPos() 对此进行补偿,对于第二个插入的临时段落返回 0,因为正如您在上面的输出中所看到的,这实际上就是它所在的位置。因此,我通过仅搜索元素列表的段落来解决这个问题,正如您在代码中看到的那样。
添加表后再次运行(这是调试输出中的“U”):
--- 记录初始状态---
元素:A B U C D E F
段落:A B C D E F
--- 在 F 之前插入第一个临时段落 ---
元素:A B U C D E T1 F
段落:A B C D E T1 F
--- 替换第一个临时段落 ---
元素:A B U C D E T1 F
段落:A B C D E R1 F
--- 在 F 之前插入第二个临时段落 ---
元素:A B U C D E T1 T2 F
段落:T2 A B C D E R1 F
第二个临时段落位于索引 6
--- 替换第二个临时段落 ---
元素:A B U C D E T1 T2 F
段落:T2 A B C D E R2 F
这实际上确实在保存的文档中生成了正确的输出。我的问题是:
我在尝试使用 Apache POI 复制 Word 文档中的段落时遇到了同样的问题。我发现你的第一个问题的解决方案确实是一次性制作所有临时段落(即所有对
insertNewParagraph
的调用),然后将它们全部替换为之后要复制的内容(即所有对setParagraph
的调用)
)。
我最终得到了以下工作解决方案:
/** Utilities for managing paragraphs in Word documents. */
public class Paragraphs {
/** Copy the given paragraph and its contents to a new paragraph in the document. */
public static List<XWPFParagraph> duplicate(XWPFParagraph paragraph, int times) {
// **Implementation note:**
// Due to some weird behaviour with Apache POI's insertNewParagraph and setParagraph as described in
// https://stackoverflow.com/questions/75289475/insert-multiple-copied-paragraphs-in-xwpfdocument
// we have to insert the new paragraphs first, then copy the contents of the original paragraph to them.
// We cannot insert and copy in the same loop, because insertNewParagraph will insert subsequent paragraphs at
// position 0, while getPosOfParagraph still returns the intended position, thus causing only the first duplication
// to succeed, while the rest only result in more empty paragraphs at the start of document.getParagraphs().
var document = paragraph.getDocument();
var newParagraphs = new ArrayList<XWPFParagraph>();
try (var cursor = paragraph.getCTP().newCursor()) {
for (int i = 0; i < times; i++) {
var newParagraph = document.insertNewParagraph(cursor);
newParagraphs.add(newParagraph);
while (cursor.toNextToken() != TokenType.START);
}
}
for (int i = 0; i < newParagraphs.size(); i++) {
// copy the contents of the original paragraph to a new paragraph and overwrite the empty paragraph
var newParagraphPosition = document.getPosOfParagraph(newParagraphs.get(i));
var newParagraph = new XWPFParagraph((CTP) paragraph.getCTP().copy(), document);
document.setParagraph(newParagraph, newParagraphPosition);
newParagraphs.set(i, newParagraph); // replace the empty paragraph with the copied one
}
return newParagraphs;
}
}
据我所知,这种奇怪行为的原因确实与
setParagraph
中的 TODO 评论有关,正如 Axel Richter 在他的评论中指出的那样。为了演示这种行为,我编写了这些通过测试:
@Test
void demoApachePoiBugInvalidBehaviour() {
var document = new XWPFDocument();
var paragraph1 = document.createParagraph();
paragraph1.createRun().setText("Hello World!");
var paragraph2 = document.insertNewParagraph(paragraph1.getCTP().newCursor());
paragraph2.createRun().setText("Hello People!");
assertEquals("Hello People!", document.getParagraphs().get(0).getText());
assertEquals("Hello World!", document.getParagraphs().get(1).getText());
var paragraph2Position = document.getPosOfParagraph(paragraph2);
var newParagraph2 = new XWPFParagraph((CTP) paragraph1.getCTP().copy(), document);
document.setParagraph(newParagraph2, paragraph2Position);
assertEquals("Hello World!", document.getParagraphs().get(0).getText());
assertEquals("Hello World!", document.getParagraphs().get(1).getText());
// so far so good.
// However, inserting a new paragraph at the position of paragraph1 now results in the new paragraph being inserted
// at position 0, while it should be inserted at position 1 (one before last).
var paragraph3 = document.insertNewParagraph(paragraph1.getCTP().newCursor());
paragraph3.createRun().setText("Hello Opinity!");
assertEquals("Hello Opinity!", document.getParagraphs().get(0).getText());
assertEquals("Hello World!", document.getParagraphs().get(1).getText());
assertEquals("Hello World!", document.getParagraphs().get(2).getText());
var paragraph3Position = document.getPosOfParagraph(paragraph3);
var newParagraph3 = new XWPFParagraph((CTP) paragraph1.getCTP().copy(), document);
document.setParagraph(newParagraph3, paragraph3Position);
assertThrows(XmlValueDisconnectedException.class, () -> document.getParagraphs().get(0).getText());
assertEquals("Hello World!", document.getParagraphs().get(1).getText());
assertEquals("Hello World!", document.getParagraphs().get(2).getText());
}
与按正确顺序调用
insertNewParagraph
和 setParagraph
时的预期行为相反:
@Test
void demoApachePoiBugValidBehaviour() {
var document = new XWPFDocument();
var paragraph1 = document.createParagraph();
paragraph1.createRun().setText("Hello World!");
var paragraph2 = document.insertNewParagraph(paragraph1.getCTP().newCursor());
paragraph2.createRun().setText("Hello People!");
var paragraph3 = document.insertNewParagraph(paragraph1.getCTP().newCursor());
paragraph3.createRun().setText("Hello Opinity!");
assertEquals("Hello People!", document.getParagraphs().get(0).getText());
assertEquals("Hello Opinity!", document.getParagraphs().get(1).getText());
assertEquals("Hello World!", document.getParagraphs().get(2).getText());
var paragraph2Position = document.getPosOfParagraph(paragraph2);
var newParagraph2 = new XWPFParagraph((CTP) paragraph1.getCTP().copy(), document);
document.setParagraph(newParagraph2, paragraph2Position);
assertEquals("Hello World!", document.getParagraphs().get(0).getText());
assertEquals("Hello Opinity!", document.getParagraphs().get(1).getText());
assertEquals("Hello World!", document.getParagraphs().get(2).getText());
var paragraph3Position = document.getPosOfParagraph(paragraph3);
var newParagraph3 = new XWPFParagraph((CTP) paragraph1.getCTP().copy(), document);
document.setParagraph(newParagraph3, paragraph3Position);
assertEquals("Hello World!", document.getParagraphs().get(0).getText());
assertEquals("Hello World!", document.getParagraphs().get(1).getText());
assertEquals("Hello World!", document.getParagraphs().get(2).getText());
}