<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.sy.parse</groupId> <artifactId>parse-word</artifactId> <version>1.0-SNAPSHOT</version> <packaging>jar</packaging> <name>parse-word</name> <url>http://www.example.com</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <maven.compiler.source>1.8</maven.compiler.source> <maven.compiler.target>1.8</maven.compiler.target> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.17</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.17</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.17</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.17</version> </dependency> </dependencies> <build> <finalName>parse-word</finalName> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> <plugin> <!-- JAR Maven 管理--> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> <version>3.1.0</version> <configuration> <archive> <manifest> <!-- 配置主程序 java -jar 默认Class --> <addClasspath>true</addClasspath> <classpathPrefix>lib/</classpathPrefix> <!--Main程序入口--> <mainClass>com.sy.parse.App</mainClass> </manifest> </archive> </configuration> </plugin> <!-- maven 打包集成插件 --> <plugin> <artifactId>maven-assembly-plugin</artifactId> <executions> <execution> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> <configuration> <descriptorRefs> <!-- 将依赖一起打包到 JAR --> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> <archive> <manifest> <!-- 配置主程序 java -jar 默认Class --> <addClasspath>true</addClasspath> <classpathPrefix>lib/</classpathPrefix> <!--Main程序入口--> <mainClass>com.sy.parse.App</mainClass> </manifest> </archive> </configuration> </plugin> </plugins> </build> </project>
package com.sy.parse; import com.sy.parse.utils.ReadExcel; import com.sy.parse.utils.ReadWord; import java.util.Arrays; import java.util.List; /** * 读excel word 存入文本中 * * @author Alice on 2021-08-20 */ public class App { public static void main(String[] args) throws Exception { //step1.读取excel List<String> excelName = Arrays.asList("1"); for (int i = 0; i < excelName.size(); i++) { String fileName = excelName.get(i); String path = "/excel/" + fileName + ".xls"; ReadExcel.readExcel(path, fileName); } //step2.读取word List<String> wordName = Arrays.asList("a", "b"); for (int i = 0; i < wordName.size(); i++) { String fileName = wordName.get(i); String path = "/word/" + fileName + ".doc"; ReadWord.readWord(path, fileName); } } }
package com.sy.parse.utils; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.xssf.usermodel.XSSFRow; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import java.io.InputStream; /** * 读excel * * @author Alice on 2021-08-20 */ public class ReadExcel { public static final String LINE = System.getProperty("line.separator"); public static void readExcel(String path, String fileName) throws Exception { String filePath = ReadExcel.class.getResource(path).getPath(); System.out.println("================"); System.out.println(filePath); if (filePath.endsWith(".xls")) { //InputStream is = new FileInputStream(new File(filePath)); InputStream is = ReadExcel.class.getResourceAsStream(path); HSSFWorkbook sheets = new HSSFWorkbook(is); int sheetSize = sheets.getNumberOfSheets(); String content = ""; for (int i = 0; i < sheetSize; i++) { HSSFSheet sheet = sheets.getSheetAt(i); //int rows = sheet.getPhysicalNumberOfRows(); int rows = sheet.getLastRowNum(); for (int j = 0; j <= rows; j++) { HSSFRow row = sheet.getRow(j); if (row != null) { int cells = row.getLastCellNum(); String con = ""; for (int k = 0; k <= cells; k++) { if (row.getCell(k) != null) { String cell = row.getCell(k).toString(); con += cell + " "; } } System.out.println(con); content += con + LINE; } } } WriteFile.writeFile(fileName, content); sheets.close(); } else if (filePath.endsWith(".xlsx")) { //InputStream is = new FileInputStream(new File(filePath)); InputStream is = ReadExcel.class.getResourceAsStream(path); XSSFWorkbook sheets = new XSSFWorkbook(is); int sheetSize = sheets.getNumberOfSheets(); String content = ""; for (int i = 0; i < sheetSize; i++) { XSSFSheet sheet = sheets.getSheetAt(i); int rows = sheet.getLastRowNum(); for (int j = 0; j <= rows; j++) { XSSFRow row = sheet.getRow(j); if (row != null) { int cells = row.getLastCellNum(); String con = ""; for (int k = 0; k <= cells; k++) { if (row.getCell(k) != null) { String cell = row.getCell(k).toString(); con += cell + " "; } } System.out.println(con); content += con + LINE; } } } WriteFile.writeFile(fileName, content); sheets.close(); } else { System.out.println("此文件不是excel文件."); } } }
package com.sy.parse.utils; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import java.io.InputStream; /** * 读word * * @author Alice on 2021-08-20 */ public class ReadWord { public static void readWord(String path, String fileName) throws Exception { String filePath = ReadWord.class.getResource(path).getPath(); String result = ""; if (filePath.endsWith(".doc")) { //InputStream is = new FileInputStream(new File(filePath)); InputStream is = ReadExcel.class.getResourceAsStream(path); WordExtractor re = new WordExtractor(is); result = re.getText(); re.close(); } else if (filePath.endsWith(".docx")) { OPCPackage opcPackage = POIXMLDocument.openPackage(filePath); POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage); result = extractor.getText(); extractor.close(); } else { System.out.println("此文件不是word文件."); } System.out.println("============================"); System.out.println(result); WriteFile.writeFile(fileName, result); } }
package com.sy.parse.utils; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; public class WriteFile { public static void writeFile(String fileName, String content) throws Exception { String fileDir = "/home/bj-word/"; File file = new File(fileDir); if (!file.exists() && !file.isDirectory()) { file.mkdirs(); } String storeFilePath = fileDir + fileName + ".txt"; bufferedWrite(storeFilePath, content); } public static void fileWrite(String path, String content) throws Exception { try (FileWriter fw = new FileWriter(path)) { fw.append(content); } } public static void bufferedWrite(String path, String content) throws Exception { try (BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(path))) { bufferedWriter.write(content); } } }