我们使用 Saxon 生成一些导出文件。 这些转换通常由相继应用的多个 XSLT 转换组成。 为了提高性能,我引入了使用
XMLFilters
来防止将结果序列化为字符串并再次解析这些字符串以进行下一次转换的代码。
编码有效,并且性能显着提升。
但是,某些导出不是 XML,而是基于文本。 尽管在最后一个 XSLT 中设置了
omit-xml-declaration="yes"
,但我无法摆脱 XML 声明。
旧的实现也使用 Saxon,在没有 XML 声明的情况下产生相同的输出,尽管速度较慢。
我尽可能地减少了这两种实现并创建了一个单元测试,希望看看哪个差异会导致效果。但到目前为止还没有运气。 我错过了什么?
实际转换是在
saxonWithDestinations()
和 saxonWithXMLFilters()
中完成的
import net.sf.saxon.lib.Feature;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import org.junit.Assert;
import org.junit.Test;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLFilter;
import org.xml.sax.XMLReader;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Templates;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
public class SaxonXMLFilterTest {
@Test
public void compareTransformationResultsForXml() throws Exception {
compareTransformations("nop", false);
}
@Test
public void compareTransformationResultsForText() throws Exception {
compareTransformations("text", false);
}
@Test
public void compareTransformationResultsForTextWithHackEnabled() throws Exception {
compareTransformations("text", true);
}
private void compareTransformations(String styleSheetName, boolean cutOffXmlDeclaration) throws ParserConfigurationException, TransformerException, SAXException, SaxonApiException, IOException {
String resultStringWithDestination = saxonWithDestinations(getXMLData(), getStylesheetSource(styleSheetName));
String resultStringWithXMLFilters = saxonWithXMLFilters(getXMLData(), getStylesheetSource(styleSheetName));
// This hack removes the XML declaration before comparing the results
if (cutOffXmlDeclaration == true) {
resultStringWithXMLFilters = resultStringWithDestination.replace("<?xml version=\"1.0\" encoding=\"UTF-8\"?>", "");
}
Assert.assertEquals(resultStringWithDestination, resultStringWithXMLFilters);
}
public String saxonWithDestinations(InputStream dataStream, StreamSource stylesheetSource) throws SaxonApiException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
Processor processor = buildProcessor();
Serializer destination = processor.newSerializer(out);
net.sf.saxon.s9api.XsltTransformer transformer = processor.newXsltCompiler().compile(stylesheetSource).load();
transformer.setSource(new StreamSource(dataStream));
transformer.setDestination(destination);
transformer.transform();
return out.toString();
}
private String saxonWithXMLFilters(InputStream dataStream, StreamSource stylesheetSource) throws ParserConfigurationException, SAXException, TransformerException {
String implementationClassName = "net.sf.saxon.TransformerFactoryImpl"; // Force the usage of Saxon-HE
SAXTransformerFactory transformerFactory = (SAXTransformerFactory) TransformerFactory.newInstance(implementationClassName, null);
XMLReader reader = SAXParserFactory.newInstance().newSAXParser().getXMLReader();
// Build Filter and use reader as first parent
Templates template = transformerFactory.newTemplates(stylesheetSource);
XMLFilter filter = transformerFactory.newXMLFilter(template);
filter.setParent(reader);
//
// More filters to be added here
//
XMLFilter lastFilter = filter;
// Push data through filter chain into ByteArrayOutputStream
ByteArrayOutputStream out = new ByteArrayOutputStream();
transformerFactory.newTransformer().transform(new SAXSource(lastFilter, new InputSource(dataStream)), new StreamResult(out));
return out.toString();
}
public static Processor buildProcessor() {
Processor processor = new Processor(false);
processor.setConfigurationProperty(Feature.TIMING, false);
processor.setConfigurationProperty(Feature.COMPILE_WITH_TRACING, false);
return processor;
}
private static StreamSource getStylesheetSource(String styleSheetName) throws IOException {
switch (styleSheetName) {
case "nop":
return stringToStreamSource(
"""
<xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
"""
);
case "text":
return stringToStreamSource(
"""
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output encoding="utf-8" method="text" indent="no" omit-xml-declaration="yes" xml:space="default"/>
<xsl:template match="/">
<xsl:apply-templates select="Data/Measurement" />
</xsl:template>
<xsl:template match="Data/Measurement">
<xsl:apply-templates select="x"/><xsl:text>|</xsl:text>
<xsl:apply-templates select="y"/><xsl:text>|</xsl:text>
<xsl:apply-templates select="z"/><xsl:text>
</xsl:text>
</xsl:template>
<xsl:template match="x|y|z">
<xsl:apply-templates select="*"/>
</xsl:template>
<xsl:template match="*">
<xsl:apply-templates select="text()"/>
<xsl:if test="not(position() = last())">
<xsl:text>;</xsl:text>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
"""
);
default:
throw new IllegalArgumentException("Unknown stylesheet '" + styleSheetName + "'");
}
}
private InputStream getXMLData() {
return stringToInputStream(
"""
<?xml version="1.0" encoding="UTF-8"?>
<Data>
<Measurement>
<x>
<max>60.00</max>
<value>50.00</value>
<min>-40.00</min>
<unit>mm</unit>
</x>
<y>
<max>3.00</max>
<value>0.00</value>
<min>-3.00</min>
<unit>kN</unit>
</y>
<z>
<max>0.00</max>
<value>0.00</value>
<min>0.00</min>
<unit>ms</unit>
</z>
</Measurement>
<Measurement>
<x>
<max>260.00</max>
<value>250.00</value>
<min>160.00</min>
<unit>mm</unit>
</x>
<y>
<max>203.00</max>
<value>200.00</value>
<min>197.00</min>
<unit>kN</unit>
</y>
<z>
<max>600000.00</max>
<value>400000.00</value>
<min>200000.00</min>
<unit>ms</unit>
</z>
</Measurement>
</Data>
"""
);
}
private static StreamSource stringToStreamSource(String s) {
return new StreamSource(stringToInputStream(s));
}
private static InputStream stringToInputStream(String s) {
return new ByteArrayInputStream(s.getBytes(Charset.defaultCharset()));
}
}
首先观察到,omit-xml-declaration 对 method="text" 没有影响,因为文本输出方法不发出 XML 声明。
事实上,最终的序列化不是由任何 XSLT 过程完成的,而是由 JAXP 身份转换器完成的:
transformerFactory.newTransformer().transform(
new SAXSource(lastFilter, new InputSource(dataStream)),
new StreamResult(out));
根据您的类路径,可能会或可能不会使用 Saxon。您可以使用 JAXP API (
Transformer.setOutputProperty()
) 在此最终转换器上设置序列化属性,但无法从 XSLT 执行此操作,因为不涉及 XSLT。
我建议按照 Martin Honnen 的建议使用 s9api API。