ROS2 + 科大讯飞 初步实现机器人语音控制

环境配置:

        电脑端: ubuntu22.04实体机作为上位机

        ROS版本:ros2-humble

        实体机器人: STM32 + 思岚A1激光雷达

        科大讯飞语音SDK 讯飞开放平台-以语音交互为核心的人工智能开放平台

实现步骤:

        1. 下载和处理科大讯飞语音模块

        (1)进入官网的控制台

       

 (2)在左侧导航栏中选择 语音识别-> 语音听写

        (3)下载语音模块

 

 2.科大讯飞SDK的处理

新建一个工作空间,里面新建两个文件夹 src   voice_ros2

将SDK压缩包解压后的文件,放入voice_ros2中,进入sample目录的iat_online_record_sample目录下,运行下面的命令

source 64bit_make.sh

在bin目录下执行对应的可执行文件了 

./iat_online_record_sample

 

 如果遇到下列问题:error while loading shared libraries: libmsc.so: cannot open shared object file: No such file or directory


就把在终端中进入下列目录中

 执行命令:

sudo cp libmsc.so /usr/local/lib
sudo ldconfig
3.上位机实现

 

src 文件夹中放的是 两个功能包,base 中是stm32的ROS2驱动包,teleop_twist_keyboard是github上下载的键盘控制节点功能包,地址如下:

GitHub - ros2/teleop_twist_keyboard at ardent

这个目录下的文件是SDK解压后的文件,其中 红框中的voice.py是也单独编写的文件

import subprocess
import multiprocessing
import time

def run_iat_online_record_sample(queue):
    process = subprocess.Popen(["./bin/iat_online_record_sample"], 
                               stdout=subprocess.PIPE, 
                               stdin=subprocess.PIPE, 
                               stderr=subprocess.PIPE, 
                               )
    
    # Communicate with the process
    stdout, _ = process.communicate(input=b"0\n1\n")
    
    # Put the result into the queue
    queue.put(stdout.decode('utf-8'))

def main():
    while True:
        # Create a queue for communication between processes
        queue = multiprocessing.Queue()
        
        # Start the process
        process = multiprocessing.Process(target=run_iat_online_record_sample, args=(queue,))
        process.start()
        
        # Wait for the process to finish and get the result from the queue
        process.join()
        result = queue.get()
        
        # Print the result
        print("Result:", result)
        
        # Save the result to a text file, clearing the file first
        with open("result.txt", "w") as f:
            f.write(result)
        
        # Ask user whether to continue recognition
        continue_recognition = input("是否继续识别? (0: 结束, 1: 继续): ")
        if continue_recognition == "0":
            break

if __name__ == "__main__":
    main()

这个文件运行后会在当前目录生成一个result.txt文件,如下图,这个文件的内容每次识别之后都会更新,键盘节点就是通过获取这个文件的数据来通过语音控制机器人移动的

4.修改teleop_twist_keyboard.py文件

在键盘控制的代码前添加读取文件数据的代码


这里将刚刚识别到的语音过滤后存储在voice_command[0]中,以供后续使用,下面会通过判断voice_command[0]中的值来进行不同的操作

import sys
import threading
import time
import os
from std_msgs.msg import String
import geometry_msgs.msg
import rclpy

if sys.platform == 'win32':
    import msvcrt
else:
    import termios
    import tty

msg = """
This node takes keypresses from the keyboard and publishes them
as Twist/TwistStamped messages. It works best with a US keyboard layout.
---------------------------
Moving around:
   u    i    o
   j    k    l
   m    ,    .

For Holonomic mode (strafing), hold down the shift key:
---------------------------
   U    I    O
   J    K    L
   M    <    >

t : up (+z)
b : down (-z)

anything else : stop

q/z : increase/decrease max speeds by 10%
w/x : increase/decrease only linear speed by 10%
e/c : increase/decrease only angular speed by 10%

CTRL-C to quit
"""

moveBindings = {
    'i': (1, 0, 0, 0),
    'o': (1, 0, 0, -1),
    'j': (0, 0, 0, 1),
    'l': (0, 0, 0, -1),
    'u': (1, 0, 0, 1),
    ',': (-1, 0, 0, 0),
    '.': (-1, 0, 0, 1),
    'm': (-1, 0, 0, -1),
    'O': (1, -1, 0, 0),
    'I': (1, 0, 0, 0),
    'J': (0, 1, 0, 0),
    'L': (0, -1, 0, 0),
    'U': (1, 1, 0, 0),
    '<': (-1, 0, 0, 0),
    '>': (-1, -1, 0, 0),
    'M': (-1, 1, 0, 0),
    't': (0, 0, 1, 0),
    'b': (0, 0, -1, 0),
}

speedBindings = {
    'q': (1.1, 1.1),
    'z': (.9, .9),
    'w': (1.1, 1),
    'x': (.9, 1),
    'e': (1, 1.1),
    'c': (1, .9),
}


def getKey(settings):
    if sys.platform == 'win32':
        # getwch() returns a string on Windows
        key = msvcrt.getwch()
    else:
        tty.setraw(sys.stdin.fileno())
        # sys.stdin.read() returns a string on Linux
        key = sys.stdin.read(1)
        termios.tcsetattr(sys.stdin, termios.TCSADRAIN, settings)
    return key


def saveTerminalSettings():
    if sys.platform == 'win32':
        return None
    return termios.tcgetattr(sys.stdin)


def restoreTerminalSettings(old_settings):
    if sys.platform == 'win32':
        return
    termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)


def vels(speed, turn):
    return 'currently:\tspeed %s\tturn %s ' % (speed, turn)


def main():
    settings = saveTerminalSettings()

    rclpy.init()

    node = rclpy.create_node('teleop_twist_keyboard')

    # parameters
    stamped = node.declare_parameter('stamped', False).value
    frame_id = node.declare_parameter('frame_id', '').value
    if not stamped and frame_id:
        raise Exception("'frame_id' can only be set when 'stamped' is True")

    if stamped:
        TwistMsg = geometry_msgs.msg.TwistStamped
    else:
        TwistMsg = geometry_msgs.msg.Twist

    pub = node.create_publisher(TwistMsg, 'cmd_vel', 10)

    voice_command = [None]  # Initializing as a list

    spinner = threading.Thread(target=rclpy.spin, args=(node,))
    spinner.start()

    speed = 0.5
    turn = 1.0
    x = 0.0
    y = 0.0
    z = 0.0
    th = 0.0
    status = 0.0

    twist_msg = TwistMsg()

    if stamped:
        twist = twist_msg.twist
        twist_msg.header.stamp = node.get_clock().now().to_msg()
        twist_msg.header.frame_id = frame_id
    else:
        twist = twist_msg

    try:
        print(msg)
        print(vels(speed, turn))
        while True:
            print("当前工作路径:", os.getcwd())
            with open('./voice_ros2/result.txt', 'r') as f:
            # with open('/home/lsg/xufen3_ws/voice_ros2/result.txt', 'r') as f:
                for line in f:
                    if line.startswith('Result: ['):
                        start = line.find('[')
                        end = line.find(']')
                        if start != -1 and end != -1:
                            voice_command[0] = line[start + 1:end].strip()
                            print("voice_command", voice_command[0])
                            # Clearing the content of result.txt
                            open('./voice_ros2/result.txt', 'w').close()
                            # open('/home/lsg/xufen3_ws/voice_ros2/result.txt', 'w').close()
                            break

            key = getKey(settings)
            # print("键盘控制按键输出", key)
            if key in moveBindings.keys():
                x = moveBindings[key][0]
                y = moveBindings[key][1]
                z = moveBindings[key][2]
                th = moveBindings[key][3]
            elif key in speedBindings.keys():
                speed = speed * speedBindings[key][0]
                turn = turn * speedBindings[key][1]

                print(vels(speed, turn))
                if (status == 14):
                    print(msg)
                status = (status + 1) % 15
           
            elif voice_command[0] is not None:
                if voice_command[0] == "小车后退":
                    print("语音控制小车前进", voice_command[0])
                    x = moveBindings['i'][0]
                    y = moveBindings['i'][1]
                    z = moveBindings['i'][2]
                    th = moveBindings['i'][3]
                elif voice_command[0] == "小车前进":
                    print("语音控制小车后退", voice_command[0])
                    x = moveBindings[','][0]
                    y = moveBindings[','][1]
                    z = moveBindings[','][2]
                    th = moveBindings[','][3]
                elif voice_command[0] == "小车左转":
                    print("语音控制小车左转", voice_command[0])
                    x = moveBindings['j'][0]
                    y = moveBindings['j'][1]
                    z = moveBindings['j'][2]
                    th = moveBindings['j'][3]
                elif voice_command[0] == "小车右转":
                    print("语音控制小车右转", voice_command[0])
                    x = moveBindings['l'][0]
                    y = moveBindings['l'][1]
                    z = moveBindings['l'][2]
                    th = moveBindings['l'][3]
                elif voice_command[0] == "小车停":
                    print("语音控制小车停", voice_command[0])
                    x = moveBindings['k'][0]
                    y = moveBindings['k'][1]
                    z = moveBindings['k'][2]
                    th = moveBindings['k'][3]
                voice_command[0] = None
            else:
                x = 0.0
                y = 0.0
                z = 0.0
                th = 0.0
                if (key == '\x03'):
                    break

            if stamped:
                twist_msg.header.stamp = node.get_clock().now().to_msg()

            twist.linear.x = x * speed
            twist.linear.y = y * speed
            twist.linear.z = z * speed
            twist.angular.x = 0.0
            twist.angular.y = 0.0
            twist.angular.z = th * turn
            pub.publish(twist_msg)

            # Print timestamp every second
            time.sleep(1)
            print("时间戳:", time.time())

    except Exception as e:
        print(e)

    finally:
        if stamped:
            twist_msg.header.stamp = node.get_clock().now().to_msg()

        twist.linear.x = 0.0
        twist.linear.y = 0.0
        twist.linear.z = 0.0
        twist.angular.x = 0.0
        twist.angular.y = 0.0
        twist.angular.z = 0.0
        pub.publish(twist_msg)
        rclpy.shutdown()
        spinner.join()

        restoreTerminalSettings(settings)


if __name__ == '__main__':
    main()

5. 编译运行

// xufen3_ws工作空间下
// 终端1:
colcon build

. install/setup.bash

 ros2 launch ros2_stm32_bridge driver.launch.py


// 终端2:
. install/setup.bash

ros2 run teleop_twist_keyboard teleop_twist_keyboard


// 终端3 ~/xufen3_ws/voice_ros2$ 目录下 :

python3 voice.py 

然后就可以通过语音控制小车
在右侧终端按1进行语音识别,此时将识别到小车前进的命令并打印,在左侧终端按回车健获取result中的命令,将输出voice_command 小车前进,此时再按键ctrl+c,将输出语音控制小车前进 小车前进并且小车开始移动。
目前的代码需要按键才能加载进来语音的命令并控制小车移动,但好在实现了功能,后续还会继续优化。

 

终端3中,输入数字1    然后 语音输入指令 “小车前进” 或“  小车后退”  或 “小车左转” 或“”小车右转” 

等到终端3中,打印了语音指令后,鼠标移动到终端2,按下回车键即可小车移动。

需要按键控制,感觉发出语音指令后,要等好几秒才能移动小车,还需要按键,不过还是初步实现了语音控制,后期优化,实现更实用的语音控制

上一篇:在微服务架构架构中父工程中的`<dependencyManagement>`和 `<dependencies>`的区别


下一篇:动手学深度学习—— 1.引言