import net.sf.saxon.Configuration;
import net.sf.saxon.lib.NamespaceConstant;
import net.sf.saxon.om.NodeInfo;
import net.sf.saxon.om.TreeInfo;
import net.sf.saxon.xpath.XPathFactoryImpl;
import org.xml.sax.InputSource;
import javax.xml.transform.sax.SAXSource;
import javax.xml.xpath.*;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.TreeMap;
public class Task3 {
private static String[] ParaToSentenc(String PtS) {
String[] strArray = PtS.split(",");
return strArray;
private static List<String> UniqueAndSortWord(String[] UW) {
List<String> unique_sort = new ArrayList<String>();
Map<String, String> hMap = new HashMap<String, String>();
for(String word : UW) {
if(!hMap.containsKey(word)) {
return unique_sort;
private static void FileWriter(String content, String outputfile) {
File file = new File(outputfile);
FileWriter writer = null;
BufferedWriter bw = null;
try {
writer = new FileWriter(file);
bw = new BufferedWriter(writer);
catch (IOException e) {
public static void main (String args[]) throws Exception {
String Inputname = args[0];//sc.nextLine(); //"D:\\document.xml";
String outputname = args[1];//sc.nextLine(); //"D:\\document.txt";
Task3.runApp(Inputname, outputname);
* Run the application
private static void runApp(String filename, String outputfile) throws Exception {
// The following initialization code is specific to Saxon
// Please refer to SaxonHE documentation for details
XPathFactory xpFactory = XPathFactory.
XPath xpExpression = xpFactory.newXPath();
System.err.println("Loaded XPath Provider " + xpExpression.getClass().getName());
// Build the source document.
InputSource inputSrc = new InputSource(new File(filename).toURL().toString());
SAXSource saxSrc = new SAXSource(inputSrc);
Configuration config = ((XPathFactoryImpl) xpFactory).getConfiguration();
TreeInfo treeInfo = config.buildDocumentTree(saxSrc);
// End Saxon specific code
XPathExpression findwtTags =
Number countResults = (Number)findwtTags.evaluate(treeInfo, XPathConstants.NUMBER);
// Get a list of the <deg> Tags
// The following expression gets a set of nodes that have a <deg> Tags,
// then extracts the text node from the <deg> tags
XPathExpression findwtTextNodes =
//global string
String global = "";
List resultNodeList = (List) findwtTextNodes.evaluate(treeInfo, XPathConstants.NODESET);
if (resultNodeList != null) {
int count = resultNodeList.size();
for (int i = 0; i < count; i++) {
NodeInfo cNode = (NodeInfo) resultNodeList.get(i);
String name = cNode.getStringValue();
global = global + "\n" + name;
//Full content text...
String globalText = "Full Degree content:" + global + "\n\n";
// Para To Sentence...
String[] strSenArray = ParaToSentenc(global);
globalText = globalText + "Each Degree separated in line by line:\n";
// globalText = globalText + "Sentence Count : "+strSenArray.length+"\n";
for(int i=0; i<strSenArray.length; i++){
globalText = globalText + strSenArray[i].trim() + "\n";
globalText = globalText + "\n";
//Unique Words
List<String> strUniqueList = UniqueAndSortWord(strSenArray);
globalText = globalText + "Unique Degree list:\n";
for(String word : strUniqueList){
globalText = globalText + word.trim() + "\n";
globalText = globalText.substring(0, globalText.length()-1);
globalText = globalText + "\n\n";
//All Text wtite into file...
FileWriter(globalText, outputfile);
您可以使用XPath 3.1在一个XPath表达式中完成全部操作:
(collection('file:///C:/JavaPractice/Task3/Process/test?select=tud.xml;recurse=yes') //deg
! tokenize(., ',')) => distinct-values() => sort())))