第一种使用Tess4j (不推荐,准确率太低但是免费)
- 首先下载训练库,下载中文的 https://github.com/tesseract-ocr/tessdata 链接是百度到的
这个文件最好放在resources下,也可以随便放
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId> org.apache.cassandra</groupId>
<artifactId>cassandra-all</artifactId>
<version>0.8.1</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
package com.example.provider.controller;/**
* @description:
* @Author EDZ
* @data 2021/7/13 16:09
* @param: nacos
* @return: $
*/
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import java.io.File;
import java.io.FileWriter;
/**
* @description:
* @author :xww
* @date :2021/7/13 16:09
*/
public class OCRDemo {
public static void main(String[] args)throws TesseractException {
ITesseract instance =new Tesseract();
//如果未放在根目录下需要指定绝对路径
instance.setDatapath("D://workspace//nacos");
//如果需要识别英文之外的语种,需要指定识别语种,并且需要将对应的语言包放进项目中
instance.setLanguage("chi_sim");
// 指定识别图片
File imgDir =new File("D:\\ps\\5.png");
long startTime = System.currentTimeMillis();
String ocrResult = instance.doOCR(imgDir);
//输出到txt文件
String fname="D:\\"+"cs.txt";
File file=new File(fname);
try {
file.createNewFile();
}catch (Exception e){
e.printStackTrace();
}
try {
FileWriter fw =new FileWriter(file);
fw.write(ocrResult);
fw.flush();
fw.close();
}catch (Exception e){
e.printStackTrace();
}
// 输出识别结果
System.out.println("OCR Result: \n" + ocrResult +"\n 耗时:" + (System.currentTimeMillis() - startTime) +"ms");
}
}
第二种百度云的官方接口 (可领取免费次数)
<dependency>
<groupId>com.baidu.aip</groupId>
<artifactId>java-sdk</artifactId>
<version>4.16.2</version>
</dependency>
<dependency>
<groupId> org.apache.cassandra</groupId>
<artifactId>cassandra-all</artifactId>
<version>0.8.1</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
package com.example.provider.controller;/**
* @description:
* @Author EDZ
* @data 2021/7/13 19:02
* @param: nacos
* @return: $
*/
import java.io.File;
import java.io.FileWriter;
import java.util.HashMap;
import org.json.JSONObject;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import com.baidu.aip.ocr.AipOcr;
/**
* @description:
* @author :xww
* @date :2021/7/13 19:02
*/
@Component
public class Sample {
// 设置APPID/AK/SK
public static final String APP_ID = "";
public static final String API_KEY = "";
public static final String SECRET_KEY = "";
public static void main(String[] args) {
// 初始化一个AipOcr
AipOcr client = new AipOcr(APP_ID, API_KEY, SECRET_KEY);
// 可选:设置网络连接参数
client.setConnectionTimeoutInMillis(2000);
client.setSocketTimeoutInMillis(60000);
// 可选:设置代理服务器地址, http和socket二选一,或者均不设置
// client.setHttpProxy("proxy_host", proxy_port); // 设置http代理
// client.setSocketProxy("proxy_host", proxy_port); // 设置socket代理
// 可选:设置log4j日志输出格式,若不设置,则使用默认配置
// 也可以直接通过jvm启动参数设置此环境变量
// System.setProperty("aip.log4j.conf", "path/to/your/log4j.properties");
// 调用接口
String path = "D:\\ps\\9.png";
JSONObject res = client.basicGeneral(path, new HashMap<String, String>());
String fname="D:\\"+"cs5.txt";
File file=new File(fname);
try {
file.createNewFile();
}catch (Exception e){
e.printStackTrace();
}
try {
FileWriter fw =new FileWriter(file);
fw.write(res.toString());
fw.flush();
fw.close();
}catch (Exception e){
e.printStackTrace();
}
System.out.println(res.toString(2));
}
}