1. 检测标题与内容是否相关
当相关度返回值 >0 的时候才算做通过
import java.text.NumberFormat;
import java.util.Locale;
public class Compute {
public static void main(String[] args) {
String content = "最近公司由于业务拓展,需要进行小程序相关的开发,本着朝全栈开发者努力,决定学习下Vue,去年csdn送了一本《Vue.js权威指南》";
String title = "VueVueVue";
double ss = SimilarDegree(content, title);
System.out.println(ss);
}
/*
* 计算相似度
* */
public static double SimilarDegree(String strA, String strB) {
String newStrA = removeSign(strA);
String newStrB = removeSign(strB);
//用较大的字符串长度作为分母,相似子串作为分子计算出字串相似度
int temp = Math.max(newStrA.length(), newStrB.length());
int temp2 = longestCommonSubstring(newStrA, newStrB).length();
return temp2 * 1.0 / temp;
}
/*
* 将字符串的所有数据依次写成一行
* */
public static String removeSign(String str) {
StringBuffer sb = new StringBuffer();
//遍历字符串str,如果是汉字数字或字母,则追加到ab上面
for (char item : str.toCharArray()) {
if (charReg(item)) {
sb.append(item);
}
}
return sb.toString();
}
/*
* 判断字符是否为汉字,数字和字母,
* 因为对符号进行相似度比较没有实际意义,故符号不加入考虑范围。
* */
public static boolean charReg(char charValue) {
return (charValue >= 0x4E00 && charValue <= 0X9FA5) || (charValue >= 'a' && charValue <= 'z')
|| (charValue >= 'A' && charValue <= 'Z') || (charValue >= '0' && charValue <= '9');
}
/*
* 求公共子串,采用动态规划算法。
* 其不要求所求得的字符在所给的字符串中是连续的。
*
* */
public static String longestCommonSubstring(String strA, String strB) {
char[] chars_strA = strA.toCharArray();
char[] chars_strB = strB.toCharArray();
int m = chars_strA.length;
int n = chars_strB.length;
/*
* 初始化矩阵数据,matrix[0][0]的值为0,
* 如果字符数组chars_strA和chars_strB的对应位相同,则matrix[i][j]的值为左上角的值加1,
* 否则,matrix[i][j]的值等于左上方最近两个位置的较大值,
* 矩阵中其余各点的值为0.
*/
int[][] matrix = new int[m + 1][n + 1];
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
if (chars_strA[i - 1] == chars_strB[j - 1]) {
matrix[i][j] = matrix[i - 1][j - 1] + 1;
} else {
matrix[i][j] = Math.max(matrix[i][j - 1], matrix[i - 1][j]);
}
}
}
/*
* 矩阵中,如果matrix[m][n]的值不等于matrix[m-1][n]的值也不等于matrix[m][n-1]的值,
* 则matrix[m][n]对应的字符为相似字符元,并将其存入result数组中。
*
*/
char[] result = new char[matrix[m][n]];
int currentIndex = result.length - 1;
while (matrix[m][n] != 0) {
if (matrix[n] == matrix[n - 1]){
n--;
} else if (matrix[m][n] == matrix[m - 1][n]){
m--;
}else {
result[currentIndex] = chars_strA[m - 1];
currentIndex--;
n--;
m--;
}
}
return new String(result);
}
/*
* 结果转换成百分比形式
* */
public static String similarityResult(double resule) {
return NumberFormat.getPercentInstance(new Locale("en ", "US ")).format(resule);
}
}
2. 阿里云内容审核
内容安全_云盾_违规内容识别_安全-阿里云 (aliyun.com)
文字审核
package com.heima.common.aliyun;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.aliyuncs.DefaultAcsClient;
import com.aliyuncs.IAcsClient;
import com.aliyuncs.exceptions.ClientException;
import com.aliyuncs.green.model.v20180509.TextScanRequest;
import com.aliyuncs.http.FormatType;
import com.aliyuncs.http.HttpResponse;
import com.aliyuncs.profile.DefaultProfile;
import com.aliyuncs.profile.IClientProfile;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.PropertySource;
import java.util.*;
@Getter
@Setter
@Configuration
@ConfigurationProperties(prefix="aliyun")
@PropertySource("classpath:aliyun.properties")
public class AliyunTextScanRequest {
private String accessKey;
private String secret;
public String textScanRequest(String content) throws Exception {
IClientProfile profile = DefaultProfile.getProfile("cn-shanghai", accessKey, secret);
IAcsClient client = new DefaultAcsClient(profile);
TextScanRequest textScanRequest = new TextScanRequest();
textScanRequest.setAcceptFormat(FormatType.JSON); // 指定api返回格式
textScanRequest.setHttpContentType(FormatType.JSON);
textScanRequest.setMethod(com.aliyuncs.http.MethodType.POST); // 指定请求方法
textScanRequest.setEncoding("UTF-8");
textScanRequest.setRegionId("cn-shanghai");
List<Map<String, Object>> tasks = new ArrayList<Map<String, Object>>();
Map<String, Object> task1 = new LinkedHashMap<String, Object>();
task1.put("dataId", UUID.randomUUID().toString());
/**
* 待检测的文本,长度不超过10000个字符
*/
task1.put("content", content);
tasks.add(task1);
JSONObject data = new JSONObject();
/**
* 检测场景,文本垃圾检测传递:antispam
**/
data.put("scenes", Arrays.asList("antispam"));
data.put("tasks", tasks);
System.out.println(JSON.toJSONString(data, true));
textScanRequest.setHttpContent(data.toJSONString().getBytes("UTF-8"), "UTF-8", FormatType.JSON);
// 请务必设置超时时间
textScanRequest.setConnectTimeout(3000);
textScanRequest.setReadTimeout(6000);
try {
HttpResponse httpResponse = client.doAction(textScanRequest);
if(httpResponse.isSuccess()){
JSONObject scrResponse = JSON.parseObject(new String(httpResponse.getHttpContent(), "UTF-8"));
System.out.println(JSON.toJSONString(scrResponse, true));
if (200 == scrResponse.getInteger("code")) {
JSONArray taskResults = scrResponse.getJSONArray("data");
for (Object taskResult : taskResults) {
if(200 == ((JSONObject)taskResult).getInteger("code")){
JSONArray sceneResults = ((JSONObject)taskResult).getJSONArray("results");
for (Object sceneResult : sceneResults) {
String scene = ((JSONObject)sceneResult).getString("scene");
String suggestion = ((JSONObject)sceneResult).getString("suggestion");
//根据scene和suggetion做相关处理
//suggestion == pass 未命中垃圾 review 人工审核 suggestion == block 命中了垃圾,可以通过label字段查看命中的垃圾分类
System.out.println("args = [" + scene + "]");
System.out.println("args = [" + suggestion + "]");
return suggestion;
}
}else{
System.out.println("task process fail:" + ((JSONObject)taskResult).getInteger("code"));
}
}
} else {
System.out.println("detect not success. code:" + scrResponse.getInteger("code"));
}
}else{
System.out.println("response not success. status:" + httpResponse.getStatus());
}
} catch (ClientException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
图片审核
package com.heima.common.aliyun;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.aliyuncs.DefaultAcsClient;
import com.aliyuncs.IAcsClient;
import com.aliyuncs.green.model.v20180509.ImageSyncScanRequest;
import com.aliyuncs.http.FormatType;
import com.aliyuncs.http.HttpResponse;
import com.aliyuncs.http.MethodType;
import com.aliyuncs.http.ProtocolType;
import com.aliyuncs.profile.DefaultProfile;
import com.aliyuncs.profile.IClientProfile;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.PropertySource;
import java.util.*;
@Getter
@Setter
@Configuration
@ConfigurationProperties(prefix="aliyun")
@PropertySource("classpath:aliyun.properties")
public class AliyunImageScanRequest {
private String accessKey;
private String secret;
public String imageScanRequest(List<String> images) throws Exception {
IClientProfile profile = DefaultProfile.getProfile("cn-shanghai",accessKey,secret);
DefaultProfile.addEndpoint("cn-shanghai", "cn-shanghai", "Green", "green.cn-shanghai.aliyuncs.com");
IAcsClient client = new DefaultAcsClient(profile);
ImageSyncScanRequest imageSyncScanRequest = new ImageSyncScanRequest();
// 指定api返回格式
imageSyncScanRequest.setAcceptFormat(FormatType.JSON);
// 指定请求方法
imageSyncScanRequest.setMethod(MethodType.POST);
imageSyncScanRequest.setEncoding("utf-8");
//支持http和https
imageSyncScanRequest.setProtocol(ProtocolType.HTTP);
JSONObject httpBody = new JSONObject();
/**
* 设置要检测的场景, 计费是按照该处传递的场景进行
* 一次请求中可以同时检测多张图片,每张图片可以同时检测多个风险场景,计费按照场景计算
* 例如:检测2张图片,场景传递porn,terrorism,计费会按照2张图片鉴黄,2张图片暴恐检测计算
* porn: porn表示色情场景检测
*/
httpBody.put("scenes", Arrays.asList("logo","porn","ad","terrorism"));
/**
* 设置待检测图片, 一张图片一个task,
* 多张图片同时检测时,处理的时间由最后一个处理完的图片决定。
* 通常情况下批量检测的平均rt比单张检测的要长, 一次批量提交的图片数越多,rt被拉长的概率越高
* 这里以单张图片检测作为示例, 如果是批量图片检测,请自行构建多个task
*/
List<JSONObject> list = new ArrayList<JSONObject>();
for (String image : images) {
JSONObject task = new JSONObject();
task.put("dataId", UUID.randomUUID().toString());
//设置图片链接为上传后的url
task.put("url", image);
task.put("time", new Date());
list.add(task);
}
httpBody.put("tasks", list);
imageSyncScanRequest.setHttpContent(org.apache.commons.codec.binary.StringUtils.getBytesUtf8(httpBody.toJSONString()),
"UTF-8", FormatType.JSON);
/**
* 请设置超时时间, 服务端全链路处理超时时间为10秒,请做相应设置
* 如果您设置的ReadTimeout 小于服务端处理的时间,程序中会获得一个read timeout 异常
*/
imageSyncScanRequest.setConnectTimeout(3000);
imageSyncScanRequest.setReadTimeout(10000);
HttpResponse httpResponse = null;
try {
httpResponse = client.doAction(imageSyncScanRequest);
} catch (Exception e) {
e.printStackTrace();
}
//服务端接收到请求,并完成处理返回的结果
if (httpResponse != null && httpResponse.isSuccess()) {
JSONObject scrResponse = JSON.parseObject(org.apache.commons.codec.binary.StringUtils.newStringUtf8(httpResponse.getHttpContent()));
System.out.println(JSON.toJSONString(scrResponse, true));
int requestCode = scrResponse.getIntValue("code");
//每一张图片的检测结果
JSONArray taskResults = scrResponse.getJSONArray("data");
if (200 == requestCode) {
for (Object taskResult : taskResults) {
//单张图片的处理结果
int taskCode = ((JSONObject) taskResult).getIntValue("code");
//图片要检测的场景的处理结果, 如果是多个场景,则会有每个场景的结果
JSONArray sceneResults = ((JSONObject) taskResult).getJSONArray("results");
if (200 == taskCode) {
for (Object sceneResult : sceneResults) {
String scene = ((JSONObject) sceneResult).getString("scene");
String suggestion = ((JSONObject) sceneResult).getString("suggestion");
//根据scene和suggetion做相关处理
//do something
//pass 通过 review 人工审核 block 不通过
System.out.println("scene = [" + scene + "]");
System.out.println("suggestion = [" + suggestion + "]");
return suggestion;
}
} else {
//单张图片处理失败, 原因是具体的情况详细分析
System.out.println("task process fail. task response:" + JSON.toJSONString(taskResult));
}
}
} else {
/**
* 表明请求整体处理失败,原因视具体的情况详细分析
*/
System.out.println("the whole image scan request failed. response:" + JSON.toJSONString(scrResponse));
}
}
return null;
}
}
依次进行 文本审核 / 图片审核
当有一项为未命中时, 就进行人工审核,
否则就是全自动的啦
我刚才阿里云看了下价格, 真的是好贵