我需要帮助从提要中删除重复的项目并映射第一个对象。为此,我使用
xsl:for-each-group
按 id
对对象进行分组并使用它。
它适用于小型 XML,但当我使用包含 200 万个项目的大型提要时,它不执行 xsl 转换,文件为空。
有没有其他方法可以在不使用分组的情况下实现相同的效果?
我使用的是saxon ee 10.5版本
xsl 模板
<?xml version="1.0"?>
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0">
<xsl:output method="text" encoding="UTF-8" indent="no" omit-xml-declaration="yes"/>
<xsl:mode streamable="yes"/>
<xsl:strip-space elements="*"/>
<xsl:variable name="dq" select="'"'"/>
<xsl:variable name="qcq" select="'","'"/>
<xsl:variable name="lf" select="' '"/>
<!-- Static defaults -->
<xsl:variable name="data_source" select="'HHTestMedia'"/>
<xsl:template match="items">
<xsl:text>title,city,state,postal_code,country,company_name</xsl:text>
<xsl:value-of select="' '"/>
<!--<xsl:for-each select="job ! copy-of(.)">-->
<xsl:for-each-group select="item ! copy-of(.)" group-by="id">
<xsl:variable name="currentGroup" select="current-group()[1]"/>
<xsl:variable name="p_title" select="substring($currentGroup/replace(title,'"',''),0,128)"/>
<xsl:variable name="p_city" select="substring-before($currentGroup/location,',')"/>
<xsl:variable name="p_state_code" select="substring(normalize-space(substring-after($currentGroup/location,',')),1,2)"/>
<xsl:variable name="p_postal_code" select="$currentGroup/postcode"/>
<xsl:variable name="p_country_code" select="$currentGroup/country"/>
<xsl:variable name="p_company_name" select="replace(substring($currentGroup/company,1,64),'"','')"/>
<!-- The following line should not need to be updated if you copied this XSL from a different setup. -->
<xsl:value-of disable-output-escaping="yes" select="concat($dq,$p_title,$qcq,$p_city,$qcq,$p_state_code,$qcq,$p_postal_code,$qcq,$p_country_code,$qcq,$p_company_name,$dq,$lf)"/>
<!--</xsl:for-each>-->
</xsl:for-each-group>
</xsl:template>
</xsl:stylesheet>
示例 XML
<?xml version="1.0" encoding="UTF-8" ?>
<source>
<items>
<item>
<id><![CDATA[160449417]]></id>
<title><![CDATA[Campaign1]]></title>
<location><![CDATA[Hudson, FL 34667]]></location>
<postcode><![CDATA[34667]]></postcode>
<country><![CDATA[US]]></country>
<company><![CDATA[Halloween]]></company>
</item>
<item>
<id><![CDATA[160449417]]></id>
<title><![CDATA[Campaign1]]></title>
<location><![CDATA[Hudson, FL 34667]]></location>
<postcode><![CDATA[34667]]></postcode>
<country><![CDATA[US]]></country>
<company><![CDATA[Halloween]]></company>
</item>
</items>
</source>
提前致谢。
我们应该尝试运行一些诊断来确定您的方法失败的原因。
但是,正如您还要求另一种方法,不使用分组,这是一种使用累加器的方法,将处理后的
item/id
计数存储在 map(xs:integer, xs:integer)
中:
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:map="http://www.w3.org/2005/xpath-functions/map"
version="3.0">
<xsl:output method="text" encoding="UTF-8" indent="no" omit-xml-declaration="yes"/>
<xsl:mode streamable="yes" on-no-match="shallow-skip" use-accumulators="processed-ids"/>
<xsl:strip-space elements="*"/>
<xsl:variable name="dq" select="'"'"/>
<xsl:variable name="qcq" select="'","'"/>
<xsl:variable name="lf" select="' '"/>
<!-- Static defaults -->
<xsl:variable name="data_source" select="'HHTestMedia'"/>
<xsl:accumulator name="processed-ids" as="map(xs:integer, xs:integer)" initial-value="map{}" streamable="yes">
<xsl:accumulator-rule match="items/item/id/text()"
select="let $id := xs:integer(.),
$already-processed := map:contains($value, $id)
return
if ($already-processed)
then map:put($value, $id, $value($id) + 1)
else map:put($value, $id, 1)"/>
</xsl:accumulator>
<xsl:template match="items">
<xsl:text>title,city,state,postal_code,country,company_name</xsl:text>
<xsl:value-of select="' '"/>
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="items/item">
<xsl:variable name="item" select="copy-of()"/>
<xsl:variable name="id" select="xs:integer($item/id)"/>
<xsl:apply-templates select="$item[accumulator-after('processed-ids')($id) = 1]" mode="grounded"/>
</xsl:template>
<xsl:mode name="grounded"/>
<xsl:template match="item" mode="grounded">
<xsl:variable name="currentGroup" select="."/>
<xsl:variable name="p_title" select="substring($currentGroup/replace(title,'"',''),0,128)"/>
<xsl:variable name="p_city" select="substring-before($currentGroup/location,',')"/>
<xsl:variable name="p_state_code" select="substring(normalize-space(substring-after($currentGroup/location,',')),1,2)"/>
<xsl:variable name="p_postal_code" select="$currentGroup/postcode"/>
<xsl:variable name="p_country_code" select="$currentGroup/country"/>
<xsl:variable name="p_company_name" select="replace(substring($currentGroup/company,1,64),'"','')"/>
<xsl:value-of disable-output-escaping="yes" select="concat($dq,$p_title,$qcq,$p_city,$qcq,$p_state_code,$qcq,$p_postal_code,$qcq,$p_country_code,$qcq,$p_company_name,$dq,$lf)"/>
</xsl:template>
</xsl:stylesheet>