我正在尝试解析一个具有两个
xmlns
命名空间的 XML 文档,并且我的所有 xPath 查询都返回 null。
我想解析
category
节点的值并创建一个数组,但由于文档有两个命名空间,无论我使用什么 xpath 表达式,它总是返回 null。
如果我删除一个名称空间,那么它就可以正常工作。我查找了其他答案,但找不到有效的答案,因此将其作为新问题发布。
这是我到目前为止所尝试过的。我正在使用this文章作为参考。
感谢您提前的帮助。
import java.io.FileInputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.xml.sax.InputSource;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
class Main
{
public static void main(String[] args) throws Exception
{
//Parse XML file
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(new InputSource(new StringReader("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<newsItem guid=\"urn:newsml:news.com.au:20210401\" version=\"1\"\n" +
" standard=\"NewsML-G2\" standardversion=\"2.9\"\n" +
" xmlns=\"http://iptc.org/std/nar/2006-10-01/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n" +
" <catalogRef href=\"http://www.iptc.org/std/catalog/catalog.IPTC-G2-Standards_16.xml\"/>\n" +
" <itemMeta>\n" +
" <itemClass qcode=\"ninat:video\"/>\n" +
" <provider>\n" +
" <name>FoxSports</name>\n" +
" </provider>\n" +
" <versionCreated>2021-04-01T16:10:15.736+11:00</versionCreated>\n" +
" <event>create</event>\n" +
" </itemMeta>\n" +
" <contentMeta>\n" +
" <FWID>0</FWID>\n" +
" <originalId>799186</originalId>\n" +
" <contentCreated>2021-04-01T16:10:15.736+11:00</contentCreated>\n" +
" <expiration>2021-05-01T15:00:43.057+10:00</expiration>\n" +
" <slugline>Test - Video Name</slugline>\n" +
" <headline>Test - video headline</headline>\n" +
" <description>Test AFL: David King breaks down his new theory surrounding Dimma and the Tigers. </description>\n" +
" <category>\n" +
" <id>208</id>\n" +
" <name>AFL</name>\n" +
" <category>\n" +
" <id>320</id>\n" +
" <name>AFL 360</name>\n" +
" </category>\n" +
" </category>\n" +
" <collections>\n" +
" <collection>\n" +
" <id>138</id>\n" +
" <name>alexa</name>\n" +
" </collection>\n" +
" </collections>\n" +
" <isPremiumPay>false</isPremiumPay>\n" +
" <geoblock>false</geoblock>\n" +
" </contentMeta>\n" +
" <contentSet>\n" +
" <remoteContent id=\"web\"\n" +
" href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09.jpg\" version=\"1\"\n" +
" rendition=\"rnd:web\" size=\"44848\" contenttype=\"image/jpeg\"\n" +
" width=\"640\" height=\"360\" colourspace=\"colsp:sRGB\" orientation=\"1\" resolution=\"96\"/>\n" +
" <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_564.mp4\"\n" +
" contenttype=\"video/mp4\" width=\"512\" height=\"288\" duration=\"121\"\n" +
" audiobitrate=\"64000\" videoavgbitrate=\"500000\" videoaspectratio=\"16:9\"/>\n" +
" <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_248.mp4\"\n" +
" contenttype=\"video/mp4\" width=\"512\" height=\"288\" duration=\"121\"\n" +
" audiobitrate=\"48000\" videoavgbitrate=\"200000\" videoaspectratio=\"16:9\"/>\n" +
" <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_1596.mp4\"\n" +
" contenttype=\"video/mp4\" width=\"800\" height=\"450\" duration=\"121\"\n" +
" audiobitrate=\"96000\" videoavgbitrate=\"1500000\" videoaspectratio=\"16:9\"/>\n" +
" <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_2628.mp4\"\n" +
" contenttype=\"video/mp4\" width=\"1280\" height=\"720\" duration=\"121\"\n" +
" audiobitrate=\"128000\" videoavgbitrate=\"2500000\" videoaspectratio=\"16:9\"/>\n" +
" <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_1096.mp4\"\n" +
" contenttype=\"video/mp4\" width=\"640\" height=\"360\" duration=\"121\"\n" +
" audiobitrate=\"96000\" videoavgbitrate=\"1000000\" videoaspectratio=\"16:9\"/>\n" +
" <remoteContent href=\"DIMMA'S_GAMBIT_2021_01_04_04_55_09_896.mp4\"\n" +
" contenttype=\"video/mp4\" width=\"640\" height=\"360\" duration=\"121\"\n" +
" audiobitrate=\"96000\" videoavgbitrate=\"800000\" videoaspectratio=\"16:9\"/>\n" +
" </contentSet>\n" +
"</newsItem>\n")));
//Get XPath expression
XPathFactory xpathfactory = XPathFactory.newInstance();
XPath xpath = xpathfactory.newXPath();
xpath.setNamespaceContext(new NamespaceResolver(doc));
XPathExpression expr = xpath.compile("/newsItem/itemMeta");
//Search XPath expression
Object result = expr.evaluate(doc, XPathConstants.NODESET);
//Iterate over results and fetch book names
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++) {
System.out.println(nodes.item(i).getNodeValue());
}
}
}
class NamespaceResolver implements NamespaceContext
{
//Store the source document to search the namespaces
private Document sourceDocument;
public NamespaceResolver(Document document) {
sourceDocument = document;
}
//The lookup for the namespace uris is delegated to the stored document.
public String getNamespaceURI(String prefix) {
if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
return sourceDocument.lookupNamespaceURI(null);
} else {
return sourceDocument.lookupNamespaceURI(prefix);
}
}
public String getPrefix(String namespaceURI) {
return sourceDocument.lookupPrefix(namespaceURI);
}
@SuppressWarnings("rawtypes")
public Iterator getPrefixes(String namespaceURI) {
return null;
}
}```
您的 XML 元素绑定到命名空间
http://iptc.org/std/nar/2006-10-01/
,但您的 XPath 未使用任何命名空间前缀,因此 /newsItem/itemMeta
要求提供未绑定到命名空间的元素。
您可以通过
local-name()
: 来解决它们
/*[local-name()='newsItem']/*[local-name()='itemMeta']
否则,您需要使用名称空间前缀注册名称空间,或使用自定义 NamespaceContext 从您选择的名称空间前缀解析名称空间:
xpath.setNamespaceContext(new NamespaceContext() {
public String getNamespaceURI(String prefix) {
switch (prefix) {
case "i": return "http://iptc.org/std/nar/2006-10-01/";
// ...
}
});
然后在 XPath 中使用该名称空间前缀:
/i:newsItem/i:itemMeta
也许这是使用命名空间查询 xml 的最简单方法。
首先,创建命名空间上下文
public static class NamespaceResolver implements NamespaceContext {
//Store the source document to search the namespaces
private final Node sourceNode;
public NamespaceResolver(Node node) {
sourceNode = node;
}
//The lookup for the namespace uris is delegated to the stored document.
public String getNamespaceURI(String prefix) {
if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
return sourceNode.lookupNamespaceURI(null);
} else {
return lookupNamespaceURI(sourceNode, prefix);
}
}
private String lookupNamespaceURI(Node node, String prefix) {
if (node.lookupNamespaceURI(prefix) != null) {
return node.lookupNamespaceURI(prefix);
}
NodeList nodeList = node.getChildNodes();
for (int i = 0; i < nodeList.getLength(); i++) {
Node childNode = nodeList.item(i);
if (childNode.getNodeType() == Node.ELEMENT_NODE) {
return lookupNamespaceURI(childNode, prefix);
}
}
return null;
}
public String getPrefix(String namespaceURI) {
throw new UnsupportedOperationException();
}
@SuppressWarnings("rawtypes")
public Iterator getPrefixes(String namespaceURI) {
throw new UnsupportedOperationException();
}
}
然后,你可以像这样使用xpath查询xml节点:
public static void main(String[] args) throws Exception {
String text = "<root xmlns=\"https://abc.xyz\" xmlns:name1=\"https://abc.xyz\" xmlns:name2=\"https:abc.xyz\">\n" +
" <name1:a>\n" +
" <name2:b>this is a test</name2:b>\n" +
" </name1:a>\n" +
"</root>";
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
text = StringUtils.trim(text);
InputStream inputStream = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
Document document = builder.parse(inputStream);
NamespaceResolver namespaceResolver = new NamespaceResolver(document);
XPath xPath = XPathFactory.newInstance().newXPath();
xPath.setNamespaceContext(namespaceResolver);
Node node = (Node) xPath.compile("/:root/name1:a/name2:b").evaluate(document, XPathConstants.NODE);
System.out.println(node.getTextContent());
}