【JAVA解析XML文件实现CRUD操作】

2023-07-17 13:30:04

一、简介。

1.xml解析技术有两种：dom和sax

2.dom:Document Object Model,即文档对象模型，是W3C组织推荐的解析XML的一种方式。

sax:Simple API for XML，不是官方标准，单它是xml社区事实上的标准。

3.XML解析器：Crimson(sun,jdk自带)、Xerces(IBM 最好的解析器)、A elfred2(dom4j)，使用哪种解析器对程序员基本上没有什么影响，我们学习的是解析开发包，解析开发包调用什么样的解析器对程序员没有意义。

4.XML解析开发包：Jaxp(sun)、Jdom（不推荐使用）、dom4j(比较不错)，Pull（android的sdk自带,它使用的是另外的解析方式streaming api for xml,即stax）

5.JAXP：Java API for xml Processing,jaxp是sun提供的一套xml解析API,jaxp很好地支持了dom和sax解析方式

解析XML文档使用的包名：

javax.xml
org.xml.sax
org.w3c.dom

javax.xml.parsers包中，定义了几个工厂类，程序员调用这些工程类，可以得到对xml文档进行解析的dom或者sax的解析器对

象。

6.DOM解析过程：首先将整个文档加载到内存，形成DOM树。使用dom进行解析，得到Document对象

7.使用DOM解析方式的缺点：整个文档需要全部放入内存，如果是大文件极易出现内存溢出的情况。

使用DOM解析方式的有点：操作速度快。

二、使用DOM对XML文档实现CRUD操作。

首先创建一个类：Book，该类对应着XML文档的一个节点。

 package p00.domain;

 public class Book {

     public String title;

     public double price;

     public String id;

     public String getId()

     {

     return id;

     }

     public void setId(String id)

     {

     this.id=id;

     }

     public String getTitle()

     {

     return title;

     }

     public double getPrice()

     {

     return price;

     }

     public void setTitle(String title)

     {

     this.title=title;

     }

     public void setPrice(double price)

     {

     this.price=price;

     }

     public String toString()

     {

     return "图书ISBN为："+id+"   书名为："+title+"    价格为："+price;

     }

 }

Book.java

得到Document对象的公共方法：

 private static Document getDocument() throws Exception {

     DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();

     //获得解析器实例

     DocumentBuilder db=dbf.newDocumentBuilder();

     //获得Document实例

     File file=new File("xmldata/books.xml");

     if(!file.exists())

     {

         System.out.println("目标文件不存在！");

         return null;

     }

     Document document=db.parse(file);

     return document;

     }

通过接受Document对象参数写入到新文件newbooks.xml的方法：

  /**

     * 根据得到的Document对象将其中的内容保存到硬盘中的XML文件，完成持久化的工作。

     * @param document

     * @throws Exception

     */

     private static void saveToAnotherPlace(Document document) throws Exception {

     TransformerFactory tf=TransformerFactory.newInstance();

     Transformer transformer=tf.newTransformer();

     Element rootSource=document.getDocumentElement();

     Source xmlSource=new DOMSource(rootSource);

     Result outputTarget=new StreamResult("xmldata/books1.xml");

     transformer.transform(xmlSource, outputTarget);

     }

原本books.xml文档中的内容：

 <?xml version="1.0" encoding="UTF-8"?>

 <books>

     <book id="book1">

         <title>JAVA编程思想</title>

         <price>80.00</price>

     </book>

     <book id="book2">

         <title>JAVA核心技术</title>

         <price>100.00</price>

     </book>

 </books>

1、读取（R）

 package p01.getElementsByDomDemo;

 import java.io.File;

 import java.util.ArrayList;

 import java.util.Iterator;

 import java.util.List;

 import javax.xml.parsers.DocumentBuilder;

 import javax.xml.parsers.DocumentBuilderFactory;

 import org.w3c.dom.Document;

 import org.w3c.dom.Element;

 import org.w3c.dom.Node;

 import org.w3c.dom.NodeList;

 import p00.domain.Book;

 public class getElementsDemo {

     /**

      * 该类演示使用dom对xml文档的查询，包括元素节点查询和元素属性值查询。

      */

     public static List<Book>list=new ArrayList<Book>();

     public static void main(String[] args) throws Exception {

     /**

      * 得到所有的元素并保存到list中。

      */

         list=getAllElementsToList();

         traverse(list);//遍历集合，查看集合中的内容是否正确。

     }

     /**

      * 该方法用于遍历集合元素。

      * @param list2

      */

     private static void traverse(List<Book> list2) {

     System.out.println();

     System.out.println("得到的集合内容为：");

     Iterator<Book>it=list2.iterator();

     while(it.hasNext())

     {

         Book book=it.next();

         System.out.println(book);

     }

     }

     public static List<Book> getAllElementsToList() throws Exception

     {

     List<Book>list=new ArrayList<Book>();

     DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();

     DocumentBuilder db=dbf.newDocumentBuilder();

     File file=new File("xmldata/books.xml");

     if(!file.exists())

     {

         System.out.println("目标文件不存在！");

         return list;

     }

     Document domtree=db.parse(file);

     //得到根元素列表

     NodeList roots=domtree.getElementsByTagName("books");

     Node root=roots.item(0);

     //根据根节点，遍历xml文档中的元素。

     NodeList books=root.getChildNodes();

     //显示结果为5本书，事实上包含了回车和换行。

 //    System.out.println("books节点的子节点长度为："+books.getLength());

     //遍历得到的结果集，并去除回车和换行。

     for(int i=0;i<books.getLength();i++)

     {

         Node node=books.item(i);

         String name=node.getNodeName();

         if("book".equals(name))

         {

         Book book=new Book();

         Element bookElement=(Element)node;//父接口向子接口强制转换发生异常。

         String idValue=bookElement.getAttribute("id");

         book.setId(idValue);

         System.out.println("id属性值为："+idValue);

         NodeList tp=node.getChildNodes();

         //这里由于回车换行的关系，所以长度为5

 //        System.out.println("book节点的子节点长度为："+tp.getLength());

         for(int j=0;j<tp.getLength();j++)

         {

             Node node1=tp.item(j);

             String nodename=node1.getNodeName();

             if("title".equals(nodename))

             {

             String bookName=node1.getTextContent();

             book.setTitle(bookName);

             System.out.println("书名为："+bookName);

             }

             if("price".equals(nodename))

             {

             String bookPrice=node1.getTextContent();

             System.out.println("价格为："+bookPrice);

             double bookprice=Double.parseDouble(bookPrice);

             book.setPrice(bookprice);

             }

         }

         list.add(book);

         }

         else

         continue;

     }

     return list;

     }

 }

读取的时候应当注意的事项是调用getChildNodes方法的时候会将回车换行符作为一个子节点，应当加以判断识别才行。

2、修改(U)

 /**

      * 修改XML文档的内容，将JAVA编程思想修改为Thinking in Java

      * @param document

      */

     private static void updateXMLContent(Document document) {

     Element root=document.getDocumentElement();

     NodeList books=root.getElementsByTagName("book");

     for(int i=0;i<books.getLength();i++)

     {

         Node node=books.item(i);

         Element book=(Element)node;

         String id=book.getAttribute("id");

         if("book1".equals(id))

         {

         NodeList childs=node.getChildNodes();

         for(int j=0;j<childs.getLength();j++)

         {

             Node title=childs.item(j);

             String nodeName=title.getNodeName();

             if("title".equals(nodeName))

             {

             Element aim=(Element)title;

             aim.setTextContent("Thinking in java");

             }

             else

             continue;

         }

         }

         else

         continue;

     }

     }

修改后的内容：

 <?xml version="1.0" encoding="UTF-8"?><books>

     <book id="book1">

         <title>Thinking in java</title>

         <price>80.00</price>

     </book>

     <book id="book2">

         <title>JAVA核心技术</title>

         <price>100.00</price>

     </book>

 </books>

3、删除（D）

 /**

      * 删除指定元素的方法，要求：删除id值为002的元素。

      * @param document

      */

     private static void removeOldNodeFromXML(Document document) {

     Element root=document.getDocumentElement();

     NodeList books=root.getElementsByTagName("book");

     for(int i=0;i<books.getLength();i++)

     {

         Node node=books.item(i);

         Element book=(Element)node;

         String id=book.getAttribute("id");

         if("book2".equals(id))

         {

         Node parent=node.getParentNode();

         parent.removeChild(node);

         }

         else

         continue;

     }

     }

删除后内容：

 <?xml version="1.0" encoding="UTF-8"?><books>

     <book id="book1">

         <title>JAVA编程思想</title>

         <price>80.00</price>

     </book>

 </books>

4、添加(C)

  /**

      * 添加新元素的方法。

      * :添加一个新节点，节点要求：id为book3，title为计算机网络，price为1.0

      * @param document

      */

     private static void addNewNodeToXML(Document document) {

     Element root=document.getDocumentElement();

     Element book=document.createElement("book");

     book.setAttribute("id", "book3");

     Element title=document.createElement("title");

     Element price=document.createElement("price");

     title.setTextContent("计算机网络");

     price.setTextContent("1.0");

     book.appendChild(title);

     book.appendChild(price);

     root.appendChild(book);

     }

添加后内容：

 <?xml version="1.0" encoding="UTF-8"?><books>

     <book id="book1">

         <title>JAVA编程思想</title>

         <price>80.00</price>

     </book>

     <book id="book2">

         <title>JAVA核心技术</title>

         <price>100.00</price>

     </book>

 <book id="book3"><title>计算机网络</title><price>1.0</price></book></books>

注意使用这种方式添加的元素没有格式上的缩进。

5、完整代码。

 package p01.getElementsByDomDemo;

 import java.io.File;

 import javax.xml.parsers.DocumentBuilder;

 import javax.xml.parsers.DocumentBuilderFactory;

 import javax.xml.transform.Result;

 import javax.xml.transform.Source;

 import javax.xml.transform.Transformer;

 import javax.xml.transform.TransformerFactory;

 import javax.xml.transform.dom.DOMSource;

 import javax.xml.transform.stream.StreamResult;

 import org.w3c.dom.Document;

 import org.w3c.dom.Element;

 import org.w3c.dom.Node;

 import org.w3c.dom.NodeList;

 public class CUDDemo {

     public static void main(String[] args) throws Exception {

     Document document=getDocument();

     //修改操作：把id为book1的书籍title的值改为Thinking in java

 //    updateXMLContent(document);

     //添加新元素操作

 //    addNewNodeToXML(document);

     //删除指定元素的方法。

 //    removeOldNodeFromXML(document);

     //作为一个独立的方法将得到的document对象中的内容写入到硬盘中的文件。

     saveToAnotherPlace(document);

     }

     /**

      * 删除指定元素的方法，要求：删除id值为002的元素。

      * @param document

      */

     private static void removeOldNodeFromXML(Document document) {

     Element root=document.getDocumentElement();

     NodeList books=root.getElementsByTagName("book");

     for(int i=0;i<books.getLength();i++)

     {

         Node node=books.item(i);

         Element book=(Element)node;

         String id=book.getAttribute("id");

         if("book2".equals(id))

         {

         Node parent=node.getParentNode();

         parent.removeChild(node);

         }

         else

         continue;

     }

     }

     /**

      * 添加新元素的方法。

      * :添加一个新节点，节点要求：id为book3，title为计算机网络，price为1.0

      * @param document

      */

     private static void addNewNodeToXML(Document document) {

     Element root=document.getDocumentElement();

     Element book=document.createElement("book");

     book.setAttribute("id", "book3");

     Element title=document.createElement("title");

     Element price=document.createElement("price");

     title.setTextContent("计算机网络");

     price.setTextContent("1.0");

     book.appendChild(title);

     book.appendChild(price);

     root.appendChild(book);

     }

     /**

      * 修改XML文档的内容，将JAVA编程思想修改为Thinking in Java

      * @param document

      */

     private static void updateXMLContent(Document document) {

     Element root=document.getDocumentElement();

     NodeList books=root.getElementsByTagName("book");

     for(int i=0;i<books.getLength();i++)

     {

         Node node=books.item(i);

         Element book=(Element)node;

         String id=book.getAttribute("id");

         if("book1".equals(id))

         {

         NodeList childs=node.getChildNodes();

         for(int j=0;j<childs.getLength();j++)

         {

             Node title=childs.item(j);

             String nodeName=title.getNodeName();

             if("title".equals(nodeName))

             {

             Element aim=(Element)title;

             aim.setTextContent("Thinking in java");

             }

             else

             continue;

         }

         }

         else

         continue;

     }

     }

    /**

     * 根据得到的Document对象将其中的内容保存到硬盘中的XML文件，完成持久化的工作。

     * @param document

     * @throws Exception

     */

     private static void saveToAnotherPlace(Document document) throws Exception {

     TransformerFactory tf=TransformerFactory.newInstance();

     Transformer transformer=tf.newTransformer();

     Element rootSource=document.getDocumentElement();

     Source xmlSource=new DOMSource(rootSource);

     Result outputTarget=new StreamResult("xmldata/books1.xml");

     transformer.transform(xmlSource, outputTarget);

     }

     /**

      * 得到Document对象的方法。

      * @return

      * @throws Exception

      */

     private static Document getDocument() throws Exception {

     DocumentBuilderFactory dbf=DocumentBuilderFactory.newInstance();

     //获得解析器实例

     DocumentBuilder db=dbf.newDocumentBuilder();

     //获得Document实例

     File file=new File("xmldata/books.xml");

     if(!file.exists())

     {

         System.out.println("目标文件不存在！");

         return null;

     }

     Document document=db.parse(file);

     return document;

     }

 }

三、SAX

使用该解析技术只能实现对XML文档的读取操作。使用这种方式的优点就是它的解析方式为“逐行读取”，并非将XML文档一次性加载进内存，这样就能够避免内存溢出的情况了，该解析方式是dom4j的解析方式。

注意DefaultHandler类，该类实现了某些处理xml文档必须的接口，但是均没有具体的方法，也就是说是空方法，如果想要解析xml文档，需要覆写该方法。

 package p02.readElementsBySaxDemo;

 import java.io.File;

 import javax.xml.parsers.ParserConfigurationException;

 import javax.xml.parsers.SAXParser;

 import javax.xml.parsers.SAXParserFactory;

 import org.xml.sax.Attributes;

 import org.xml.sax.SAXException;

 import org.xml.sax.helpers.DefaultHandler;

 /**

  * 该类的功能是通过Sax技术实现对xml文档的查找操作。

  * 使用SAX技术不能实现对XML文档的增删改操作。

  * @author kdyzm

  *

  */

 public class ReadXMLBySax {

     public static void main(String[] args) throws Exception, SAXException {

     SAXParserFactory spf=SAXParserFactory.newInstance();

     SAXParser sp=spf.newSAXParser();

     File file=new File("xmldata/books.xml");

     MyHandler mh=new MyHandler();

     sp.parse(file, mh);

     }

 }

 /**

  * 该类重写了默认处理类中的部分方法。

  * @author kdyzm

  *

  */

 class MyHandler extends DefaultHandler

 {

     //文档开始的时候触发该事件

     @Override

     public void startDocument() throws SAXException {

     System.out.println("开始解析文档！");

     super.startDocument();

     }

     //文档结束的时候触发该事件

     @Override

     public void endDocument() throws SAXException {

     System.out.println("解析文档结束！");

     super.endDocument();

     }

     //当开始解析一个元素的时候触发该事件

     @Override

     public void startElement(String uri, String localName, String qName,

         Attributes attributes) throws SAXException {

     System.out.println("开始解析元素："+qName+"    属性id的值是："+attributes.getValue("id"));

     super.startElement(uri, localName, qName, attributes);

     }

     //解析完成一个元素的时候触发该事件

     @Override

     public void endElement(String uri, String localName, String qName)

         throws SAXException {

     super.endElement(uri, localName, qName);

     System.out.println("解析元素结束："+qName);

     }

     //遇到字符串的时候触发该事件。

     @Override

     public void characters(char[] ch, int start, int length)

         throws SAXException {

     System.out.println("解析得到的字符串是："+new String(ch,start,length));

     super.characters(ch, start, length);

     }

 }

输出：

开始解析文档！

开始解析元素：books    属性id的值是：null

解析得到的字符串是：

开始解析元素：book    属性id的值是：book1

解析得到的字符串是：

开始解析元素：title    属性id的值是：null

解析得到的字符串是：JAVA编程思想

解析元素结束：title

解析得到的字符串是：

开始解析元素：price    属性id的值是：null

解析得到的字符串是：80.00

解析元素结束：price

解析得到的字符串是：

解析元素结束：book

解析得到的字符串是：

开始解析元素：book    属性id的值是：book2

解析得到的字符串是：

开始解析元素：title    属性id的值是：null

解析得到的字符串是：JAVA核心技术

解析元素结束：title

解析得到的字符串是：

开始解析元素：price    属性id的值是：null

解析得到的字符串是：100.00

解析元素结束：price

解析得到的字符串是：

解析元素结束：book

解析得到的字符串是：

解析元素结束：books

解析文档结束！

四、使用dom4j解析包快速解析XML文档，实现CRUD操作。

码农公寓

相关文章