本文例子主要是将XML文件按某个节点拆分。至于DOM4j和SAX之间的区别也非常明显,在此我就不用赘述。我们通过一个实例来理解一下DOM4j的拆分功能。
package shuai.study.dom4j.demo; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Element; import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; /** * @author shengshu * */ public class Dom4jHandler { private static volatile int pageNumber = -1; private Document getDocument(File inputFile) { SAXReader saxReader = new SAXReader(); Document document = null; try { document = saxReader.read(new FileInputStream(inputFile)); } catch (FileNotFoundException | DocumentException e) { e.printStackTrace(); } return document; } private Element getRootElement(Document document) { return document.getRootElement(); } public List<File> splitXml(File inputFile, String outputFileDirectory, String splitNodeString, int nodeLimitCount) { Document document = this.getDocument(inputFile); Element rootElement = this.getRootElement(document); if (document == null || rootElement == null) { throw new NullPointerException(); } Document fragmentDocument = null; List<File> exportFileList = new ArrayList<File>(); File exportFile = null; Element tempRootElement = null; int splitNodeIndex = 0; Iterator<?> iterator = rootElement.elementIterator(splitNodeString); while (iterator.hasNext()) { if (splitNodeIndex == nodeLimitCount || splitNodeIndex == 0) { if (splitNodeIndex == nodeLimitCount) { exportFile = this.saveXml(fragmentDocument, this.getOutputFile(outputFileDirectory, inputFile, ++pageNumber)); if (exportFile != null) { exportFileList.add(exportFile); } splitNodeIndex = 0; } fragmentDocument = DocumentHelper.createDocument(); tempRootElement = fragmentDocument.addElement(rootElement.getName()); tempRootElement.addAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); List<?> attributeList = rootElement.attributes(); Iterator<?> attributeIterator = attributeList.iterator(); while (attributeIterator.hasNext()) { Attribute attribute = (Attribute) attributeIterator.next(); tempRootElement.addAttribute(attribute.getName(), attribute.getValue()); } } Element splitElement = (Element) iterator.next(); tempRootElement.add((Element) splitElement.clone()); splitNodeIndex++; } if (fragmentDocument != null) { exportFile = this.saveXml(fragmentDocument, this.getOutputFile(outputFileDirectory, inputFile, ++pageNumber)); if (exportFile != null) { exportFileList.add(exportFile); } } return exportFileList; } private File saveXml(Document document, File outputFile) { OutputStream fileOutputStream = null; XMLWriter xmlWriter = null; OutputFormat format = OutputFormat.createPrettyPrint(); try { FileUtils.touch(outputFile); fileOutputStream = new FileOutputStream(outputFile); xmlWriter = new XMLWriter(fileOutputStream, format); xmlWriter.write(document); xmlWriter.flush(); } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (UnsupportedEncodingException uee) { uee.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { if (xmlWriter != null) { try { xmlWriter.close(); } catch (IOException ioe) { ioe.printStackTrace(); } } IOUtils.closeQuietly(fileOutputStream); } return outputFile; } private File getOutputFile(String outputFileDirectory, File inputFile, int pageNumber) { String fileName = inputFile.getName(); String fileBaseName = FilenameUtils.getBaseName(fileName); String fileNameExtension = FilenameUtils.getExtension(fileName); String outputFilePath = outputFileDirectory + File.separator + fileBaseName + "-" + this.pagingFormat(pageNumber) + "." + fileNameExtension; File outputFile = new File(outputFilePath); return outputFile; } private synchronized String pagingFormat(int pageNumber) { String pageNumberStr = "P00"; if (pageNumber < 0) { throw new NumberFormatException("The page number should not be negative"); } if (pageNumber < 10) { pageNumberStr = String.format("P0%d", pageNumber); } else { pageNumberStr = String.format("P%d", pageNumber); } return pageNumberStr; } }
package shuai.study.dom4j.demo; import java.io.File; /** * @author shengshu * */ public class Dom4jDemo { public static void main(String[] args) { String inputFilePath = Dom4jHandler.class.getResource("/file/input/company.xml").getPath(); File inputFile = new File(inputFilePath); String outputFileDirectory = Dom4jHandler.class.getResource("/file/output").getPath(); String splitNodeString = "Employee"; int nodeLimitCount = 1; Dom4jHandler dom4jHandler = new Dom4jHandler(); dom4jHandler.splitXml(inputFile, outputFileDirectory, splitNodeString, nodeLimitCount); } }
<?xml version = "1.0" encoding="UTF-8"?> <Company xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="CompanyFileFormat.xsd"> <Message>XXX Company Employee Message</Message> <Employee ID="37"> <Name> <FirstName>Zhou</FirstName> <LastName>Shengshuai</LastName> </Name> <Sex>Male</Sex> <Address> <Country>China</Country> <Province>ShanDong</Province> <City>LinYi</City> <Village>FengHuangYu</Village> <Contact> <Mobile>18108***778</Mobile> <Mail>zhoushengshuai2007@163.com</Mail> <QQ>254392398</QQ> <Postcode>276422</Postcode> </Contact> </Address> <Profession>Software</Profession> </Employee> <Employee ID="66"> <Name> <FirstName>Wang</FirstName> <LastName>Eric</LastName> </Name> <Sex>Male</Sex> <Address> <Country>China</Country> <Province>HeBei</Province> <City>QinHuangDao</City> <Village>hhh</Village> <Contact> <Mobile>150*****955</Mobile> <Mail>eric@163.com</Mail> <QQ>666666666</QQ> <Postcode>111666</Postcode> </Contact> </Address> <Profession>Software</Profession> </Employee> <Employee ID="99"> <Name> <FirstName>Shi</FirstName> <LastName>Stone</LastName> </Name> <Sex>Male</Sex> <Address> <Country>China</Country> <Province>HeNan</Province> <City>PingDingShan</City> <Village>nnn</Village> <Contact> <Mobile>186*****015</Mobile> <Mail>stone@163.com</Mail> <QQ>999999999</QQ> <Postcode>111999</Postcode> </Contact> </Address> <Profession>Software</Profession> </Employee> </Company>