在 Java 中使用 xPath 解析具有多个命名空间的 XML

Question

我正在尝试解析一个具有两个

xmlns

命名空间的 XML 文档，并且我的所有 xPath 查询都返回 null。

我想解析

category

节点的值并创建一个数组，但由于文档有两个命名空间，无论我使用什么 xpath 表达式，它总是返回 null。

如果我删除一个名称空间，那么它就可以正常工作。我查找了其他答案，但找不到有效的答案，因此将其作为新问题发布。

这是我到目前为止所尝试过的。我正在使用this文章作为参考。

感谢您提前的帮助。

import java.io.FileInputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.xml.sax.InputSource;

import org.w3c.dom.Document;
import org.w3c.dom.NodeList;


class Main
{
    public static void main(String[] args) throws Exception
    {

        //Parse XML file
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(new InputSource(new StringReader("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
                "<newsItem guid=\"urn:newsml:news.com.au:20210401\" version=\"1\"\n" +
                "  standard=\"NewsML-G2\" standardversion=\"2.9\"\n" +
                "  xmlns=\"http://iptc.org/std/nar/2006-10-01/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n" +
                "  <catalogRef href=\"http://www.iptc.org/std/catalog/catalog.IPTC-G2-Standards_16.xml\"/>\n" +
                "  <itemMeta>\n" +
                "    <itemClass qcode=\"ninat:video\"/>\n" +
                "    <provider>\n" +
                "      <name>FoxSports</name>\n" +
                "    </provider>\n" +
                "    <versionCreated>2021-04-01T16:10:15.736+11:00</versionCreated>\n" +
                "    <event>create</event>\n" +
                "  </itemMeta>\n" +
                "  <contentMeta>\n" +
                "    <FWID>0</FWID>\n" +
                "    <originalId>799186</originalId>\n" +
                "    <contentCreated>2021-04-01T16:10:15.736+11:00</contentCreated>\n" +
                "    <expiration>2021-05-01T15:00:43.057+10:00</expiration>\n" +
                "    <slugline>Test - Video Name</slugline>\n" +
                "    <headline>Test - video headline</headline>\n" +
                "    <description>Test AFL: David King breaks down his new theory surrounding Dimma and the Tigers. </description>\n" +
                "    <category>\n" +
                "      <id>208</id>\n" +
                "      <name>AFL</name>\n" +
                "      <category>\n" +
                "        <id>320</id>\n" +
                "        <name>AFL 360</name>\n" +
                "      </category>\n" +
                "    </category>\n" +
                "    <collections>\n" +
                "      <collection>\n" +
                "        <id>138</id>\n" +
                "        <name>alexa</name>\n" +
                "      </collection>\n" +
                "    </collections>\n" +
                "    <isPremiumPay>false</isPremiumPay>\n" +
                "    <geoblock>false</geoblock>\n" +
                "  </contentMeta>\n" +
                "  <contentSet>\n" +
                "    <remoteContent id=\"web\"\n" +
                "      href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09.jpg\" version=\"1\"\n" +
                "      rendition=\"rnd:web\" size=\"44848\" contenttype=\"image/jpeg\"\n" +
                "      width=\"640\" height=\"360\" colourspace=\"colsp:sRGB\" orientation=\"1\" resolution=\"96\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_564.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"512\" height=\"288\" duration=\"121\"\n" +
                "      audiobitrate=\"64000\" videoavgbitrate=\"500000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_248.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"512\" height=\"288\" duration=\"121\"\n" +
                "      audiobitrate=\"48000\" videoavgbitrate=\"200000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_1596.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"800\" height=\"450\" duration=\"121\"\n" +
                "      audiobitrate=\"96000\" videoavgbitrate=\"1500000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_2628.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"1280\" height=\"720\" duration=\"121\"\n" +
                "      audiobitrate=\"128000\" videoavgbitrate=\"2500000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_1096.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"640\" height=\"360\" duration=\"121\"\n" +
                "      audiobitrate=\"96000\" videoavgbitrate=\"1000000\" videoaspectratio=\"16:9\"/>\n" +
                "    <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_896.mp4\"\n" +
                "      contenttype=\"video/mp4\" width=\"640\" height=\"360\" duration=\"121\"\n" +
                "      audiobitrate=\"96000\" videoavgbitrate=\"800000\" videoaspectratio=\"16:9\"/>\n" +
                "  </contentSet>\n" +
                "</newsItem>\n")));

        //Get XPath expression
        XPathFactory xpathfactory = XPathFactory.newInstance();
        XPath xpath = xpathfactory.newXPath();
        xpath.setNamespaceContext(new NamespaceResolver(doc));
        XPathExpression expr = xpath.compile("/newsItem/itemMeta");

        //Search XPath expression
        Object result = expr.evaluate(doc, XPathConstants.NODESET);

        //Iterate over results and fetch book names
        NodeList nodes = (NodeList) result;
        for (int i = 0; i < nodes.getLength(); i++) {
            System.out.println(nodes.item(i).getNodeValue());
        }
        
    }
}
class NamespaceResolver implements NamespaceContext
{
    //Store the source document to search the namespaces
    private Document sourceDocument;

    public NamespaceResolver(Document document) {
        sourceDocument = document;
    }

    //The lookup for the namespace uris is delegated to the stored document.
    public String getNamespaceURI(String prefix) {
        if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
            return sourceDocument.lookupNamespaceURI(null);
        }  else {
            return sourceDocument.lookupNamespaceURI(prefix);
        }
    }

    public String getPrefix(String namespaceURI) {
        return sourceDocument.lookupPrefix(namespaceURI);
    }

    @SuppressWarnings("rawtypes")
    public Iterator getPrefixes(String namespaceURI) {
        return null;
    }
}```

Answer 1

您的 XML 元素绑定到命名空间

http://iptc.org/std/nar/2006-10-01/

，但您的 XPath 未使用任何命名空间前缀，因此

/newsItem/itemMeta

要求提供未绑定到命名空间的元素。

您可以通过

local-name()

:

来解决它们

/*[local-name()='newsItem']/*[local-name()='itemMeta']

否则，您需要使用名称空间前缀注册名称空间，或使用自定义 NamespaceContext 从您选择的名称空间前缀解析名称空间：

xpath.setNamespaceContext(new NamespaceContext() {
    public String getNamespaceURI(String prefix) {
      switch (prefix) {
        case "i": return "http://iptc.org/std/nar/2006-10-01/";
        // ...
       }
    });

然后在 XPath 中使用该名称空间前缀：

/i:newsItem/i:itemMeta

Answer 2

也许这是使用命名空间查询 xml 的最简单方法。

首先，创建命名空间上下文

public static class NamespaceResolver implements NamespaceContext {
    //Store the source document to search the namespaces
    private final Node sourceNode;

    public NamespaceResolver(Node node) {
        sourceNode = node;
    }

    //The lookup for the namespace uris is delegated to the stored document.
    public String getNamespaceURI(String prefix) {
        if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
            return sourceNode.lookupNamespaceURI(null);
        } else {
            return lookupNamespaceURI(sourceNode, prefix);
        }
    }

    private String lookupNamespaceURI(Node node, String prefix) {
        if (node.lookupNamespaceURI(prefix) != null) {
            return node.lookupNamespaceURI(prefix);
        }
        NodeList nodeList = node.getChildNodes();
        for (int i = 0; i < nodeList.getLength(); i++) {
            Node childNode = nodeList.item(i);
            if (childNode.getNodeType() == Node.ELEMENT_NODE) {
                return lookupNamespaceURI(childNode, prefix);
            }
        }
        return null;
    }

    public String getPrefix(String namespaceURI) {
        throw new UnsupportedOperationException();
    }

    @SuppressWarnings("rawtypes")
    public Iterator getPrefixes(String namespaceURI) {
        throw new UnsupportedOperationException();
    }
}

然后，你可以像这样使用xpath查询xml节点：

public static void main(String[] args) throws Exception {
    String text = "<root xmlns=\"https://abc.xyz\" xmlns:name1=\"https://abc.xyz\" xmlns:name2=\"https:abc.xyz\">\n" +
            "    <name1:a>\n" +
            "        <name2:b>this is a test</name2:b>\n" +
            "    </name1:a>\n" +
            "</root>";
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setNamespaceAware(true);
    DocumentBuilder builder = factory.newDocumentBuilder();
    text = StringUtils.trim(text);
    InputStream inputStream = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
    Document document = builder.parse(inputStream);


    NamespaceResolver namespaceResolver = new NamespaceResolver(document);
    XPath xPath = XPathFactory.newInstance().newXPath();
    xPath.setNamespaceContext(namespaceResolver);


    Node node = (Node) xPath.compile("/:root/name1:a/name2:b").evaluate(document, XPathConstants.NODE);
    System.out.println(node.getTextContent());
}

在 Java 中使用 xPath 解析具有多个命名空间的 XML

问题描述投票：0回答：2

2个回答

最新问题

在 Java 中使用 xPath 解析具有多个命名空间的 XML

问题描述 投票：0回答：2

2个回答

最新问题

问题描述投票：0回答：2