突发奇想小程序尝试着集成语音识别功能,对号入座般首选方案为百度语音识别nodejs REST API。
其流程如图所示:
小程序端录制语音代码 如下所示:
<view class="page">
<view class="page__bd page__bd_spacing">
<swiper indicator-dots="{{indicatorDots}}"
autoplay="{{autoplay}}" interval="{{interval}}" duration="{{duration}}">
<block wx:for="{{imgUrls}}">
<swiper-item>
<image src="{{item}}" class="slide-image" style=‘width:100%;‘ height="150"/>
</swiper-item>
</block>
</swiper>
<button class="weui-btn" type="primary" plain="true" bindtouchstart=‘startRecordAudio‘ bindtouchend=‘stopRecordAudio‘ bindtouchcancel=‘stopRecordAudio‘ >{{audioState}}</button>
</view>
</view>
//index.js
//录制音频参数
const options = {
duration: 60000,
sampleRate: 44100,
numberOfChannels: 1,
encodeBitRate: 192000,
format: ‘mp3‘,
frameSize: 50
};
//语音录制管理器
const recorderManager = wx.getRecorderManager();
/**
* 开始录制语音
*
*/
startRecordAudio:function()
{
recorderManager.onStart(() => {
this.setData({ audioState: "语音录制中.." });
});
recorderManager.onResume(() => {
console.log(‘recorder resume‘)
});
recorderManager.onPause(() => {
this.setData({ audioState: "语音暂停录制" });
});
recorderManager.onStop((res) => {
console.log(‘recorder stop‘, res);
if (res.duration < 1000) {
this.setData({ audioState: "开始录制" });
wx.showToast({
title: ‘录音不能小于1秒钟‘,
});
return;
};
var that = this;
wx.saveFile({
tempFilePath: res.tempFilePath,
success: function (res) {
var savedFilePath = res.savedFilePath;
that.setData({ tempAudioFilePath: savedFilePath });
that.uploadRecordAudio();
}
})
});
recorderManager.onFrameRecorded((res) => {
const { frameBuffer } = res
console.log(‘frameBuffer.byteLength‘, frameBuffer.byteLength)
});
recorderManager.onError((res) => {
const { error } = res;
console.log("record error is " + error);
});
recorderManager.start(options);
},
/**
* 结束录制语音
*
*/
stopRecordAudio: function () {
this.setData({ audioState: "语音录制完成,上传中.." });
recorderManager.stop();
},
/**
* 上传录制语音
*
*/
uploadRecordAudio: function () {
wx.showLoading({
title: ‘音频上传中..‘,
})
this.setData({ audioState: this.data.tempAudioFilePath });
var that = this;
const uploadTask = wx.uploadFile({
url: ‘http://10.129.83.13:3000/aip‘,
// url: ‘http://aip.butterfly.mopaasapp.com/aip‘,
filePath: that.data.tempAudioFilePath,
name: ‘file‘,
header: {
‘content-type‘: ‘multipart/form-data‘
},
formData: {
‘user‘: ‘test‘
},
success: function (res) {
//do something
console.log("res" + JSON.stringify(res));
wx.hideLoading();
that.echoRecordAudioContent(res.data);
}
});
uploadTask.onProgressUpdate((res) => {
this.setData({ audioState: "语音录制完成,上传中" + res.progress+".." });
console.log(‘上传进度‘, res.progress)
console.log(‘已经上传的数据长度‘, res.totalBytesSent)
console.log(‘预期需要上传的数据总长度‘, res.totalBytesExpectedToSend)
});
// uploadTask.abort() // 取消上传任务
},
/**
* 回显录制语音转化成的文字
*
*/
echoRecordAudioContent: function (obj) {
console.log("文字回显" + JSON.stringify(obj));
var title = obj.result[0];
wx.showToast({
title: obj.result,
})
}
后端代码实现采用的Nodejs+express解决方案(因为涉及到音频格式的转码 需要在本地部署ffmpeg进行音频格式的转化)
aip.js代码如下
var express = require(‘express‘);
var router = express.Router();
var fs = require(‘fs‘);
var formidable = require("formidable");
var ffmpeg = require(‘fluent-ffmpeg‘);
var AipSpeechClient = require("baidu-aip-sdk").speech;
// 设置APPID/AK/SK
var APP_ID = "******************";
var API_KEY = "******************";
var SECRET_KEY = "******************";
// 新建一个对象,建议只保存一个对象调用服务接口
var client = new AipSpeechClient(APP_ID, API_KEY, SECRET_KEY);
router.post(‘/‘, function(req, res, next) {
var tmp_path = null;
var target_path = null;
var form = new formidable.IncomingForm();
form.uploadDir = ‘./uploads/tmp/‘;
form.maxFieldsSize = 20 * 1024 * 1024;
form.timeout =
form.parse(req,function(error,fields,files){
if(!error)
{
tmp_path = files.file.path;
// 指定文件上传后的目录 - 示例为"audio"目录。
target_path = ‘./uploads/audio/‘ + files.file.name;
}
else
{
res.status(200).send("{‘err_msg‘:‘语音上传失败‘}");
}
});
form.on(‘error‘, function(error) {
console.log(‘********************error‘+error);
});
form.on(‘aborted‘, function() {
});
form.on(‘end‘, function() {
if(fs.existsSync(tmp_path))
{
// 移动文件
fs.rename(tmp_path, target_path, function(rename_err) {
if (rename_err) throw rename_err;
//使用ffmpeg 进行音频转换
convertAudioType(target_path).then((data) => {
if(data != undefined)
{
res.send(data);
}
else
{
res.status(200).send("{‘err_msg‘:‘语音上传失败‘}");
}
});
});
}
});
});
//使用ffmpeg 进行音频转换 已经知道上传格式为aac
function convertAudioType(path)
{
return new Promise((resolve,reject) => {
//文件名称
var filename = path.substr(path.lastIndexOf(‘/‘)+1);
filename = filename.slice(0,filename.lastIndexOf(‘.‘));
//输出文件路径
var outputPath = ‘./assets/convert/audio/‘+filename+‘.wav‘;
const extractAudio = require(‘ffmpeg-extract-audio‘);
extractAudio({
input: path,
output: outputPath,
format:‘wav‘,
transform: (cmd) => {
cmd.audioFilters([
{
filter: ‘volume‘,
options: ‘0.5‘
}
]);
cmd.on(‘error‘, function(err) {
if(err) return reject({"err_msg":"语音格式转换失败"});
});
cmd.on(‘end‘, function() {
var obj = convertToText(path).then((data) => {
console.log("******************"+JSON.stringify(data));
return JSON.stringify(data);
}).catch((error) => {
});
return resolve(obj);
});
}
});
});
}
//使用百度语音识别
function convertToText(path)
{
return new Promise((resolve,reject) => {
// setTimeout(function(){return resolve({"err_msg":"语音识别失败"});},1000);
var voice = fs.readFileSync(path);
var voiceBuffer = new Buffer(voice);
// 识别本地文件
client.recognize(voiceBuffer, ‘wav‘, 16000).then(function (result) {
var data = result;
if(data.err_no == 0)
{
//{"corpus_no":"6543498202833665207","err_msg":"success.","err_no":0,"result":["欢迎使用百度语音合成,"],"sn":"321835246031523526898"}
return resolve({"err_msg":"success","result":data.result[0]}); //小程序端直接cancel了请求
}
else
{
//{"err_msg":"speech quality error.","err_no":3301,"sn":"725439393781523526861"}
return resolve({"err_msg":"语音识别失败"}); //小程序端可以接收到 response
}
}, function(err)
{
console.log(err);
if(err) return reject({"err_msg":"语音识别失败"});
});
});
}
module.exports = router;
问题来了,红色标注如下图:
问题在持续跟踪中,也给百度反馈了,等待百度给出合理的解释。。。