用saxon框架对xml数据进行过滤 - 程序员的天堂 - ITeye技术网站

Saxon 是一个 XSLT 和XQuery处理器。它是使用 XML 文档和样式表作为输入,然后生成结果文档作为输出的程序,它还包括了一个串行化器,用于将结果树转换成 XML、HTML 或纯文本。

 

 Saxon8以上版本主要组成有:XSLT 2.0处理器、 XPath 2.0处理器、 XQuery 1.0处理器、XML Schema 1.0处理器。

 

XPath 教程: http://www.w3school.com.cn/xpath/index.asp

XQuery 教程:http://www.w3school.com.cn/xquery/index.asp

 

 以下范例代码在saxonb9-1-0-8j包下测试通过:

 

1、xml文档内容

  1. <?xml version="1.0" encoding="UTF-8"?>  
  2. <flight>  
  3.     <row flightno="CA3411" airline_code="CA" airline_namecn="中国国际航空公司" airline_nameen="Air China" city_code="SHA" city_namecn="上海虹桥" city_nameen="Shanghai" flight_date="20130202" flight_time="2200" status_code="cancel" status_namecn="取消" status_nameen="Cancel" checkin_counter="M2-3" gate="A118"/>  
  4.     <row flightno="CA3411" airline_code="CA" airline_namecn="中国国际航空公司" airline_nameen="Air China" city_code="SHA" city_namecn="上海虹桥" city_nameen="Shanghai" flight_date="20130202" flight_time="2300" status_code="fly" status_namecn="起飞" status_nameen="Fly" checkin_counter="M2-3" gate="A118"/>  
  5.     <row flightno="CZ3412" airline_code="CZ" airline_namecn="中国南方航空公司" airline_nameen="South Air" city_code="PEK" city_namecn="北京" city_nameen="Beijing" flight_date="20130203" flight_time="2200" status_code="fly" status_namecn="起飞" status_nameen="Fly" checkin_counter="M1-3" gate="A218"/>  
  6. </flight>  
<?xml version="1.0" encoding="UTF-8"?><flight>	<row flightno="CA3411" airline_code="CA" airline_namecn="中国国际航空公司" airline_nameen="Air China" city_code="SHA" city_namecn="上海虹桥" city_nameen="Shanghai" flight_date="20130202" flight_time="2200" status_code="cancel" status_namecn="取消" status_nameen="Cancel" checkin_counter="M2-3" gate="A118"/>	<row flightno="CA3411" airline_code="CA" airline_namecn="中国国际航空公司" airline_nameen="Air China" city_code="SHA" city_namecn="上海虹桥" city_nameen="Shanghai" flight_date="20130202" flight_time="2300" status_code="fly" status_namecn="起飞" status_nameen="Fly" checkin_counter="M2-3" gate="A118"/>	<row flightno="CZ3412" airline_code="CZ" airline_namecn="中国南方航空公司" airline_nameen="South Air" city_code="PEK" city_namecn="北京" city_nameen="Beijing" flight_date="20130203" flight_time="2200" status_code="fly" status_namecn="起飞" status_nameen="Fly" checkin_counter="M1-3" gate="A218"/></flight>

 

2、java源码

  1. public class Test2 {  
  2.     public static void main(String[] args) {  
  3.         try{  
  4.             DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();  
  5.             DocumentBuilder builder = builderFactory.newDocumentBuilder();  
  6.               
  7.             //从文档中加载xml内容  
  8.             InputStream in = Class.class.getResourceAsStream("/flight/flight_data.xml");  
  9.             Document document = builder.parse(in);  
  10.             document.normalize(); //去掉XML文档中空白部分  
  11.               
  12.             //从字符串中加载xml内容  
  13.             //StringReader sr = new StringReader("<flight><row flightno=\"CA3411\" airline_code=\"CA\" airline_namecn=\"中国国际航空公司\" airline_nameen=\"Air China\" city_code=\"SHA\" city_namecn=\"上海虹桥\" city_nameen=\"Shanghai\" flight_date=\"20130202\" flight_time=\"2300\" status_code=\"fly\" status_namecn=\"起飞\" status_nameen=\"Fly\" checkin_counter=\"M2-3\" gate=\"A118\"/></flight>");  
  14.             //InputSource is = new InputSource(sr);  
  15.             //Document document = builder.parse(is);  
  16.             //document.normalize(); //去掉XML文档中空白部分  
  17.               
  18.             //xQuery表达式  
  19.             StringBuffer sb = new StringBuffer();  
  20.             sb.append(" for $s in /flight/row where 1=1 ");  
  21.             sb.append(" and contains(upper-case($s/@flightno), 'CA') ");  
  22.             sb.append(" and contains(upper-case($s/@city_namecn), '海') ");  
  23.             sb.append(" and upper-case($s/@airline_code)='CA' ");  
  24.             sb.append(" and $s/@flight_date='20130202' ");  
  25.             sb.append(" and $s/@flight_time>='2300' ");  
  26.             sb.append(" and $s/@flight_time<='2300' ");  
  27.             sb.append(" and $s/@status_code='fly' ");  
  28.             sb.append(" return $s ");  
  29.               
  30.             Configuration configuration = new Configuration();  
  31.               
  32.             //静态查询上下文  
  33.             StaticQueryContext context = new StaticQueryContext(configuration);  
  34.             XQueryExpression expression = context.compileQuery(sb.toString());  
  35.               
  36.             //动态查询上下文  
  37.             DynamicQueryContext context2 = new DynamicQueryContext(configuration);    
  38.             context2.setContextItem(new DocumentWrapper(document, null, configuration));  
  39.               
  40.             Properties props = new Properties();  
  41.             props.setProperty(OutputKeys.METHOD, "xml");  
  42.             props.setProperty(OutputKeys.INDENT, "yes");  
  43.             props.setProperty(OutputKeys.ENCODING, "GBK");  
  44.             props.setProperty(OutputKeys.VERSION, "1.0");  
  45.               
  46.             //根据xQuery表达式解析xml文件,返回符合条件的数据,存储到writer对象  
  47.             Writer writer = new StringWriter();  
  48.             expression.run(context2, new StreamResult(writer), props);  
  49.               
  50.             System.out.println(writer.toString());  
  51.               
  52.         }catch(Exception ex){  
  53.             ex.printStackTrace();  
  54.         }  
  55.     }  
  56. }  
public class Test2 {	public static void main(String[] args) {		try{			DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();			DocumentBuilder builder = builderFactory.newDocumentBuilder();						//从文档中加载xml内容			InputStream in = Class.class.getResourceAsStream("/flight/flight_data.xml");			Document document = builder.parse(in);			document.normalize(); //去掉XML文档中空白部分						//从字符串中加载xml内容			//StringReader sr = new StringReader("<flight><row flightno=\"CA3411\" airline_code=\"CA\" airline_namecn=\"中国国际航空公司\" airline_nameen=\"Air China\" city_code=\"SHA\" city_namecn=\"上海虹桥\" city_nameen=\"Shanghai\" flight_date=\"20130202\" flight_time=\"2300\" status_code=\"fly\" status_namecn=\"起飞\" status_nameen=\"Fly\" checkin_counter=\"M2-3\" gate=\"A118\"/></flight>");			//InputSource is = new InputSource(sr);			//Document document = builder.parse(is);			//document.normalize(); //去掉XML文档中空白部分						//xQuery表达式			StringBuffer sb = new StringBuffer();			sb.append(" for $s in /flight/row where 1=1 ");			sb.append(" and contains(upper-case($s/@flightno), 'CA') ");			sb.append(" and contains(upper-case($s/@city_namecn), '海') ");			sb.append(" and upper-case($s/@airline_code)='CA' ");			sb.append(" and $s/@flight_date='20130202' ");			sb.append(" and $s/@flight_time>='2300' ");			sb.append(" and $s/@flight_time<='2300' ");			sb.append(" and $s/@status_code='fly' ");			sb.append(" return $s ");						Configuration configuration = new Configuration();						//静态查询上下文			StaticQueryContext context = new StaticQueryContext(configuration);			XQueryExpression expression = context.compileQuery(sb.toString());						//动态查询上下文			DynamicQueryContext context2 = new DynamicQueryContext(configuration);  			context2.setContextItem(new DocumentWrapper(document, null, configuration));						Properties props = new Properties();			props.setProperty(OutputKeys.METHOD, "xml");			props.setProperty(OutputKeys.INDENT, "yes");			props.setProperty(OutputKeys.ENCODING, "GBK");			props.setProperty(OutputKeys.VERSION, "1.0");						//根据xQuery表达式解析xml文件,返回符合条件的数据,存储到writer对象			Writer writer = new StringWriter();			expression.run(context2, new StreamResult(writer), props);						System.out.println(writer.toString());					}catch(Exception ex){			ex.printStackTrace();		}	}}

  

3、输出结果

  1. <?xml version="1.0" encoding="GBK"?>  
  2. <row airline_code="CA" airline_namecn="中国国际航空公司" airline_nameen="Air China"  
  3.      checkin_counter="M2-3"  
  4.      city_code="SHA"  
  5.      city_namecn="上海虹桥"  
  6.      city_nameen="Shanghai"  
  7.      flight_date="20130202"  
  8.      flight_time="2300"  
  9.      flightno="CA3411"  
  10.      gate="A118"  
  11.      status_code="fly"  
  12.      status_namecn="起飞"  
  13.      status_nameen="Fly"/>  
<?xml version="1.0" encoding="GBK"?><row airline_code="CA" airline_namecn="中国国际航空公司" airline_nameen="Air China"     checkin_counter="M2-3"     city_code="SHA"     city_namecn="上海虹桥"     city_nameen="Shanghai"     flight_date="20130202"     flight_time="2300"     flightno="CA3411"     gate="A118"     status_code="fly"     status_namecn="起飞"     status_nameen="Fly"/>

 

上一篇:HDU 5900 - QSC and Master [ DP ]


下一篇:【java】企业级分布式搜索平台Solr视频教程