续 ROS应用 —— Ubuntu16.04下 科大讯飞SDK的下载与测试(1)
前提:
已经创建了工作空间,这里假设工作空间为 catkin_ws ;
libmsc.so已经复制到系统下;
1. 建立一个功能包
- 创建功能包: cd ~/catkin_ws/src ; catkin_create_pkg robot_voice std_msgs rospy roscpp
- 编译整个工作空间: cd ~/catkin_ws ; catkin_make
注:此处可参照 ROS整理 —— 工作空间&功能包&覆盖机制(2);若没有设置环境变量,要设置下。
2. 将之前下载的科大讯飞SDK里的 iat_online_record_sample 功能包下的 .c 和 .h 文件复制到 robot_voice/src 下;
这里SDK下载的文件为 Linux_iat1218_5c6e7f8d;
3. 更改 iat_online_record_sample.c 中的代码,具体如下,并重命名为 iat_publish.cpp;
注:更改appid号为你自己的,
/*
* 语音听写(iFly Auto Transform)技术能够实时地将语音转换成对应的文字。
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "qisr.h"
#include "msp_cmn.h"
#include "msp_errors.h"
#include "speech_recognizer.h"
#include <iconv.h>
#include "ros/ros.h"
#include "std_msgs/String.h"
#define FRAME_LEN 640
#define BUFFER_SIZE 4096
int wakeupFlag = 0 ;
int resultFlag = 0 ;
static void show_result(char *string, char is_over)
{
resultFlag=1;
printf("\rResult: [ %s ]", string);
if(is_over)
putchar('\n');
}
static char *g_result = NULL;
static unsigned int g_buffersize = BUFFER_SIZE;
void on_result(const char *result, char is_last)
{
if (result) {
size_t left = g_buffersize - 1 - strlen(g_result);
size_t size = strlen(result);
if (left < size) {
g_result = (char*)realloc(g_result, g_buffersize + BUFFER_SIZE);
if (g_result)
g_buffersize += BUFFER_SIZE;
else {
printf("mem alloc failed\n");
return;
}
}
strncat(g_result, result, size);
show_result(g_result, is_last);
}
}
void on_speech_begin()
{
if (g_result)
{
free(g_result);
}
g_result = (char*)malloc(BUFFER_SIZE);
g_buffersize = BUFFER_SIZE;
memset(g_result, 0, g_buffersize);
printf("Start Listening...\n");
}
void on_speech_end(int reason)
{
if (reason == END_REASON_VAD_DETECT)
printf("\nSpeaking done \n");
else
printf("\nRecognizer error %d\n", reason);
}
/* demo recognize the audio from microphone */
static void demo_mic(const char* session_begin_params)
{
int errcode;
int i = 0;
struct speech_rec iat;
struct speech_rec_notifier recnotifier = {
on_result,
on_speech_begin,
on_speech_end
};
errcode = sr_init(&iat, session_begin_params, SR_MIC, &recnotifier);
if (errcode) {
printf("speech recognizer init failed\n");
return;
}
errcode = sr_start_listening(&iat);
if (errcode) {
printf("start listen failed %d\n", errcode);
}
/* demo 10 seconds recording */
while(i++ < 10)
sleep(1);
errcode = sr_stop_listening(&iat);
if (errcode) {
printf("stop listening failed %d\n", errcode);
}
sr_uninit(&iat);
}
/* main thread: start/stop record ; query the result of recgonization.
* record thread: record callback(data write)
* helper thread: ui(keystroke detection)
*/
void WakeUp(const std_msgs::String::ConstPtr& msg)
{
printf("waking up\r\n");
usleep(700*1000);
wakeupFlag=1;
}
int main(int argc, char* argv[])
{
// 初始化ROS
ros::init(argc, argv, "voiceRecognition");
ros::NodeHandle n;
ros::Rate loop_rate(10);
// 声明Publisher和Subscriber
// 订阅唤醒语音识别的信号
ros::Subscriber wakeUpSub = n.subscribe("voiceWakeup", 1000, WakeUp);
// 订阅唤醒语音识别的信号
ros::Publisher voiceWordsPub = n.advertise<std_msgs::String>("voiceWords", 1000);
ROS_INFO("Sleeping...");
int count=0;
while(ros::ok())
{
// 语音识别唤醒
if (wakeupFlag){
ROS_INFO("Wakeup...");
int ret = MSP_SUCCESS;
const char* login_params = "appid = 5c6e7f8d, work_dir = .";
const char* session_begin_params =
"sub = iat, domain = iat, language = zh_cn, "
"accent = mandarin, sample_rate = 16000, "
"result_type = plain, result_encoding = utf8";
ret = MSPLogin(NULL, NULL, login_params);
if(MSP_SUCCESS != ret){
MSPLogout();
printf("MSPLogin failed , Error code %d.\n",ret);
}
printf("Demo recognizing the speech from microphone\n");
printf("Speak in 10 seconds\n");
demo_mic(session_begin_params);
printf("10 sec passed\n");
wakeupFlag=0;
MSPLogout();
}
// 语音识别完成
if(resultFlag){
resultFlag=0;
std_msgs::String msg;
msg.data = g_result;
voiceWordsPub.publish(msg);
}
ros::spinOnce();
loop_rate.sleep();
count++;
}
exit:
MSPLogout(); // Logout...
return 0;
}
4. 直接将Linux_iat1218_5c6e7f8d/include 下的.h文件复制到 robot_voice/include 下; ( 可删除 robot_voice/include 下 的robot_voice )
5. 在CMakeLists.txt下添加编译规则,如下:
include_directories(
include
${catkin_INCLUDE_DIRS}
)
add_executable(iat_publish src/iat_publish.cpp src/speech_recognizer.c src/linuxrec.c)
target_link_libraries(iat_publish ${catkin_LIBRARIES} libmsc.so -ldl -lpthread -lm -lrt -lasound)
6. 运行
// 终端1:启动管理器节点
roscore
// 终端2:运行可执行文件(订阅voiceWakeup话题,发布voiceWords话题)
rosrun robot_voice iat_publish
// 终端3:voiceWords话题是识别数据
rostopic echo /voiceWords
// 终端4:voiceWakeup话题是唤醒功能,每发一次就可以录音一次
rostopic pub /voiceWakeup std_msgs/String "data: 'anny string'"
发布唤醒信号后就可以,可以看到 “Start Listening...” ,这是对麦克风说话,在线识别后结果会显示出来。
不清楚的地方可以参考之前的博客;
或者参考:https://zhuanlan.zhihu.com/p/35986985
https://www.ncnynl.com/archives/201702/1287.html