使用阿里云TTS实现web语音播报

一、开通阿里云TTS服务

登录阿里云,选择菜单:产品->人工智能->语音合成

点击“申请开通”,然后在“管理控制台”创建一个项目

复制 appkey

注意,token只有1天有效,所以需要通过接口去定时获取

二、对接语音合成api接口

查看接口文档

由于sdk需要引入很多第三方jar包,所以建议对接RESTful API

copy接口文档里的demo代码,把申请到token和appkey粘贴进去,可以直接运行,demo会生成一个syAudio.wav文件,使用语言播放器直接播放就可以。

根据文档提示需要在工程中引入三个jar包:

<dependency>
    <groupId>com.squareup.okhttp3</groupId>
    <artifactId>okhttp</artifactId>
    <version>3.9.1</version>
</dependency>
<!-- http://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>fastjson</artifactId>
    <version>1.2.42</version>
</dependency>
<!-- 获取token使用 -->
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>aliyun-java-sdk-core</artifactId>
    <version>3.7.1</version>
</dependency>

语音生成工具类:

package com.hsoft.web.util;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.alibaba.fastjson.JSONObject;
import com.hsoft.commutil.props.PropertiesUtil;

import okhttp3.MediaType;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
public class SpeechRestfulUtil {
	private static Logger logger = LoggerFactory.getLogger(SpeechRestfulUtil.class);
    private String accessToken;
    private String appkey;

	private static SpeechRestfulUtil getInstance() {
		String appkey = PropertiesUtil.getProperty("aliyun.voice.appkey");
		String token = AliTokenUtil.getToken();
		return new SpeechRestfulUtil(appkey, token);
	}
    
    private SpeechRestfulUtil(String appkey, String token) {
        this.appkey = appkey;
        this.accessToken = token;
    }
    /**
     * HTTPS GET 请求
     */
    private byte[] processGETRequet(String text, String format, int sampleRate) {
        /**
         * 设置HTTPS GET请求
         * 1.使用HTTPS协议
         * 2.语音识别服务域名:nls-gateway.cn-shanghai.aliyuncs.com
         * 3.语音识别接口请求路径:/stream/v1/tts
         * 4.设置必须请求参数:appkey、token、text、format、sample_rate
         * 5.设置可选请求参数:voice、volume、speech_rate、pitch_rate
         */
        String url = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/tts";
        url = url + "?appkey=" + appkey;
        url = url + "&token=" + accessToken;
        url = url + "&text=" + text;
        url = url + "&format=" + format;
        url = url + "&sample_rate=" + String.valueOf(sampleRate);
        // voice 发音人,可选,默认是xiaoyun
        // url = url + "&voice=" + "xiaoyun";
        // volume 音量,范围是0~100,可选,默认50
        // url = url + "&volume=" + String.valueOf(50);
        // speech_rate 语速,范围是-500~500,可选,默认是0
         url = url + "&speech_rate=" + String.valueOf(100);
        // pitch_rate 语调,范围是-500~500,可选,默认是0
        // url = url + "&pitch_rate=" + String.valueOf(0);
//        System.out.println("URL: " + url);
        /**
         * 发送HTTPS GET请求,处理服务端的响应
         */
        Request request = new Request.Builder()
                .url(url)
                .get()
                .build();
        byte[] bytes=null;
        try {
            OkHttpClient client = new OkHttpClient();
            Response response = client.newCall(request).execute();
            String contentType = response.header("Content-Type");
            if ("audio/mpeg".equals(contentType)) {
            	bytes =response.body().bytes();
//                File f = new File(audioSaveFile);
//                FileOutputStream fout = new FileOutputStream(f);
//                fout.write(response.body().bytes());
//                fout.close();
//                System.out.println(f.getAbsolutePath());
            	logger.info("The GET SpeechRestful succeed!");
            }
            else {
                // ContentType 为 null 或者为 "application/json"
                String errorMessage = response.body().string();
                logger.info("The GET SpeechRestful failed: " + errorMessage);
            }
            response.close();
        } catch (Exception e) {
            logger.error("processGETRequet",e);
        }
        return bytes;
    }
    /**
     * HTTPS POST 请求
     */
    private byte[] processPOSTRequest(String text, String audioSaveFile, String format, int sampleRate) {
        /**
         * 设置HTTPS POST请求
         * 1.使用HTTPS协议
         * 2.语音合成服务域名:nls-gateway.cn-shanghai.aliyuncs.com
         * 3.语音合成接口请求路径:/stream/v1/tts
         * 4.设置必须请求参数:appkey、token、text、format、sample_rate
         * 5.设置可选请求参数:voice、volume、speech_rate、pitch_rate
         */
        String url = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/tts";
        JSONObject taskObject = new JSONObject();
        taskObject.put("appkey", appkey);
        taskObject.put("token", accessToken);
        taskObject.put("text", text);
        taskObject.put("format", format);
        taskObject.put("sample_rate", sampleRate);
        // voice 发音人,可选,默认是xiaoyun
        // taskObject.put("voice", "xiaoyun");
        // volume 音量,范围是0~100,可选,默认50
        // taskObject.put("volume", 50);
        // speech_rate 语速,范围是-500~500,可选,默认是0
        // taskObject.put("speech_rate", 0);
        // pitch_rate 语调,范围是-500~500,可选,默认是0
        // taskObject.put("pitch_rate", 0);
        String bodyContent = taskObject.toJSONString();
//        System.out.println("POST Body Content: " + bodyContent);
        RequestBody reqBody = RequestBody.create(MediaType.parse("application/json"), bodyContent);
        Request request = new Request.Builder()
                .url(url)
                .header("Content-Type", "application/json")
                .post(reqBody)
                .build();
        
        byte[] bytes=null;
        try {
            OkHttpClient client = new OkHttpClient();
            Response response = client.newCall(request).execute();
            String contentType = response.header("Content-Type");
            if ("audio/mpeg".equals(contentType)) {
                bytes = response.body().bytes();
                logger.info("The POST SpeechRestful succeed!");
            }
            else {
                // ContentType 为 null 或者为 "application/json"
                String errorMessage = response.body().string();
                logger.info("The POST SpeechRestful failed: " + errorMessage);
            }
            response.close();
        } catch (Exception e) {
        	logger.error("processPOSTRequest",e);
        }
        return bytes;
    }
    
    public static byte[] text2voice(String text) {
    	if (StringUtils.isBlank(text)) {
    		return null;
    	}
        SpeechRestfulUtil demo = SpeechRestfulUtil.getInstance();
//        String text = "会员收款87.12元";
        // 采用RFC 3986规范进行urlencode编码
        String textUrlEncode = text;
        try {
            textUrlEncode = URLEncoder.encode(textUrlEncode, "UTF-8")
                    .replace("+", "%20")
                    .replace("*", "%2A")
                    .replace("%7E", "~");
        } catch (UnsupportedEncodingException e) {
        	logger.error("encode",e);
        }
//        String audioSaveFile = "syAudio.wav";
        String format = "wav";
        int sampleRate = 16000;
       return demo.processGETRequet(textUrlEncode, format, sampleRate);
    }
    
}

获取Token工具类

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.aliyuncs.CommonRequest;
import com.aliyuncs.CommonResponse;
import com.aliyuncs.DefaultAcsClient;
import com.aliyuncs.IAcsClient;
import com.aliyuncs.http.MethodType;
import com.aliyuncs.http.ProtocolType;
import com.aliyuncs.profile.DefaultProfile;
import com.hsoft.commutil.props.PropertiesUtil;

public class AliTokenUtil {
	private static Logger logger = LoggerFactory.getLogger(AliTokenUtil.class);
	// 您的地域ID
	private static final String REGIONID = "cn-shanghai";
	// 获取Token服务域名
	private static final String DOMAIN = "nls-meta.cn-shanghai.aliyuncs.com";
	// API 版本
	private static final String API_VERSION = "2019-02-28";
	// API名称
	private static final String REQUEST_ACTION = "CreateToken";
	// 响应参数
	private static final String KEY_TOKEN = "Token";
	private static final String KEY_ID = "Id";
	private static final String KEY_EXPIRETIME = "ExpireTime";

	private static volatile String TOKEN = "";
	private static volatile long EXPIRETIME = 0L;

	public static String getToken() {
		if (StringUtils.isNotBlank(TOKEN)) {
			if (EXPIRETIME - System.currentTimeMillis() / 1000 > 3600) {
				return TOKEN;
			}
		}
		try {
			String accessKeyId = PropertiesUtil.getProperty("aliyun.accessId");;
			String accessKeySecret = PropertiesUtil.getProperty("aliyun.accessKey");;
			// 创建DefaultAcsClient实例并初始化
			DefaultProfile profile = DefaultProfile.getProfile(REGIONID, accessKeyId, accessKeySecret);
			IAcsClient client = new DefaultAcsClient(profile);
			CommonRequest request = new CommonRequest();
			request.setDomain(DOMAIN);
			request.setVersion(API_VERSION);
			request.setAction(REQUEST_ACTION);
			request.setMethod(MethodType.POST);
			request.setProtocol(ProtocolType.HTTPS);
			CommonResponse response = client.getCommonResponse(request);
			logger.info(response.getData());
			if (response.getHttpStatus() == 200) {
				JSONObject result = JSON.parseObject(response.getData());
				TOKEN = result.getJSONObject(KEY_TOKEN).getString(KEY_ID);
				EXPIRETIME = result.getJSONObject(KEY_TOKEN).getLongValue(KEY_EXPIRETIME);
				logger.info("获取到的Token: " + TOKEN + ",有效期时间戳(单位:秒): " + EXPIRETIME);
			} else {
				logger.info("获取Token失败!");
			}
		} catch (Exception e) {
			logger.error("getToken error!", e);
		}

		return TOKEN;
	}

}

三、集成websocket

当然,我们的目的不是得到一个音频文件,而是在web站点上可以直接听见声音。

为此,需要引入Websocket,将得到的音频资源直接推送到web页面上,然后使用FileReader对象直接播放

1、引入jar包

<dependency>
	<groupId>org.springframework.boot</groupId>
	<artifactId>spring-boot-starter-websocket</artifactId>
	<exclusions>
		<exclusion>
			<groupId>org.slf4j</groupId>
			<artifactId>log4j-over-slf4j</artifactId>
		</exclusion>
		<exclusion>
			<groupId>org.hibernate</groupId>
			<artifactId>hibernate-validator</artifactId>
		</exclusion>
	</exclusions>
</dependency>

2、创建Websocket处理类

public class VoiceHandler extends AbstractWebSocketHandler {
	private static final Logger logger = LoggerFactory.getLogger(VoiceHandler.class);
	
	@Override
	public void afterConnectionEstablished(WebSocketSession session) throws Exception {
		VoicePool.add(session);
	}
	
	@Override
	public void afterConnectionClosed(WebSocketSession session, CloseStatus status) throws Exception {
		VoicePool.remove(session);
	}
	
	
	@Override
	protected void handleTextMessage(WebSocketSession session, TextMessage message) throws Exception {
		logger.debug("receive Msg :" + message.getPayload());
        TextMessage msg=new TextMessage(message.getPayload());
        session.sendMessage(msg);
	}

}

3、创建websocket连接池管理类

public class VoicePool {
	private static final Logger logger = LoggerFactory.getLogger(VoicePool.class);
	private static Map<String, WebSocketSession> pool = new ConcurrentHashMap<String, WebSocketSession>();
	private static Map<Long, List<String>> userMap = new ConcurrentHashMap<Long, List<String>>();
	private static final ExecutorService threadPool = Executors.newFixedThreadPool(50);

	public static void add(WebSocketSession inbound) {
		pool.put(inbound.getId(), inbound);
		Map<String, String> map = ParamUtil.parser(inbound.getUri().getQuery());
		Long companyId = Long.valueOf(map.get("companyId"));
		logger.info("add companyId:{}", companyId);
		List<String> lstInBound = null;
		if (companyId != null) {
			lstInBound = userMap.get(companyId);
			if (lstInBound == null) {
				lstInBound = new ArrayList<String>();
				userMap.put(companyId, lstInBound);
			}
			lstInBound.add(inbound.getId());
		}
		logger.info("add connetion {},total size {}", inbound.getId(), pool.size());
	}

	public static void remove(WebSocketSession socket) {
		String sessionId = socket.getId();
		List<String> lstInBound = null;
		Map<String, String> map = ParamUtil.parser(socket.getUri().getQuery());
		Long companyId = Long.valueOf(map.get("companyId"));
		logger.info("remove companyId:{}", companyId);
		if (StringUtils.isNotBlank(sessionId)) {
			if (companyId != null) {
				lstInBound = userMap.get(companyId);
				if (lstInBound != null) {
					lstInBound.remove(sessionId);
					if (lstInBound.isEmpty()) {
						userMap.remove(companyId);
					}
				}
			}
		}

		pool.remove(sessionId);
		logger.info("remove connetion {},total size {}", sessionId, pool.size());
	}

	/** 推送信息 */
	public static void broadcast(VoiceMsgVo vo) {
		Long companyId = vo.getCompanyId();
		if (companyId == null || companyId == 0L) {
			return;
		}
		List<String> lstInBoundId = userMap.get(companyId);
		if (lstInBoundId == null || lstInBoundId.isEmpty()) {
			return;
		}
		byte[] bytes = SpeechRestfulUtil.text2voice(vo.getText());
		if (bytes == null) {
			return;
		}
		threadPool.execute(() -> {
			try {
				for (String id : lstInBoundId) {
					// 发送给指定用户
					WebSocketSession connection = pool.get(id);
					if (connection != null) {
						synchronized (connection) {
							BinaryMessage msg = new BinaryMessage(bytes);
							connection.sendMessage(msg);
						}
					}
				}
			} catch (Exception e) {
				logger.error("broadcast error: companyId:{}", companyId, e);
			}
		});
	}

}

消息对象bean

public class VoiceMsgVo {
	private String text;
	private Long companyId;
}

4、Websocket配置

@Configuration
@EnableWebSocket
public class WebSocketConfig implements WebSocketConfigurer {

	@Override
	public void registerWebSocketHandlers(WebSocketHandlerRegistry registry) {
		registry.addHandler(voiceHandler(), "/ws/voice").setAllowedOrigins("*");
	}

	@Bean
	public VoiceHandler voiceHandler() {
		return new VoiceHandler();
	}

}

5、前端js处理

随便创建也页面,引入下面的js

var audioContext = new (window.AudioContext || window.webkitAudioContext)();
var Chat = {};
Chat.socket = null;
Chat.connect = (function(host) {
	if ("WebSocket" in window) {
		Chat.socket = new WebSocket(host);
	} else if ("MozWebSocket" in window) {
		Chat.socket = new MozWebSocket(host);
	} else {
		Console.log("Error: WebSocket is not supported by this browser.");
		return;
	}
	Chat.socket.onopen = function() {
		Console.log("Info: 语音播报已启动.");
		// 心跳检测重置
		heartCheck.reset().start(Chat.socket);
	};
	Chat.socket.onclose = function() {
		Console.log("Info: 语音播报已关闭.");
	};
	Chat.socket.onmessage = function(message) {
		
		heartCheck.reset().start(Chat.socket);
		if (message.data == null || message.data == '' || "HeartBeat" == message.data){
			//心跳消息
			return;
		}
		
		
		var reader = new FileReader();
		reader.onload = function(evt) {
			if (evt.target.readyState == FileReader.DONE) {
				audioContext.decodeAudioData(evt.target.result,
						function(buffer) {
							// 解码成pcm流
							var audioBufferSouceNode = audioContext
									.createBufferSource();
							audioBufferSouceNode.buffer = buffer;
							audioBufferSouceNode
									.connect(audioContext.destination);
							audioBufferSouceNode.start(0);
						}, function(e) {
							console.log(e);
						});
			}
		};
		reader.readAsArrayBuffer(message.data);
	};
});
Chat.initialize = function() {
	Chat.companyId = _currCompanyId;
	if (window.location.protocol == "http:") {
		Chat.connect("ws://" + window.location.host + "/ws/voice?companyId="+Chat.companyId);
	} else {
		Chat.connect("wss://" + window.location.host + "/ws/voice?companyId="+Chat.companyId);
	}
};
Chat.sendMessage = (function() {
	var message = document.getElementById("chat").value;
	if (message != "") {
		Chat.socket.send(message);
		document.getElementById("chat").value = "";
	}
});
var Console = {};
Console.log = (function(message) {
	
	var _console=document.getElementById("console");
	if (_console==null || _console==undefined){
		console.log(message);
		return;
	}
    var p=document.createElement("p");
    p.style.wordWrap="break-word";
    p.innerHTML=message;
    _console.appendChild(p);
    while(_console.childNodes.length>25) 
    {
    	_console.removeChild(_console.firstChild);
    }
    _console.scrollTop=_console.scrollHeight;
});
Chat.initialize();


//心跳检测
var heartCheck = {
	timeout : 60000,// 60秒
	timeoutObj : null,
	serverTimeoutObj : null,
	reset : function() {
		clearTimeout(this.timeoutObj);
		clearTimeout(this.serverTimeoutObj);
		return this;
	},
	start : function(ws) {
		var self = this;
		this.timeoutObj = setTimeout(function() {
			// 这里发送一个心跳,后端收到后,返回一个心跳消息,
			// onmessage拿到返回的心跳就说明连接正常
//			console.log('start heartCheck');
			ws.send("HeartBeat");
			self.serverTimeoutObj = setTimeout(function() {// 如果超过一定时间还没重置,说明后端主动断开了
				ws.close();// 如果onclose会执行reconnect,我们执行ws.close()就行了.如果直接执行reconnect
							// 会触发onclose导致重连两次
			}, self.timeout)
		}, this.timeout)
	}
}

四、启动工程测试

启动工程,从后台发送一段消息

VoiceMsgVo vo = new VoiceMsgVo();
vo.setCompanyId(1L);
vo.setText("今天天气真好!我出去散步了");
VoicePool.broadcast(vo);
上一篇:【TTS】AIX->Linux--基于RMAN(真实环境)


下一篇:解读 Android TTS 语音合成播报