iOS语音转文字实现

2023-09-24 12:35:45

目前正在搞一个IM的APP，内部好友之间可以发送语音，需要长按实现语音转文字的功能，之前使用的是阿里的NUI.framework,但是这个破玩意，经常出现转出来的文字重复，即使了多声道控制都无法处理掉，体验太差。没办法，就决定替换为apple自己的实现，毕竟siri那么强大！此实现包含本地音频及远程音频，你只需要按时数据model保存对应的path即可，内部会自动识别。

现在来看看实现条件：

在 Info.plist里面添加两个键值对：

1、Privacy - Speech Recognition Usage Description （用于请求语音识别） 2、 Privacy - Microphone Usage Description（用于请求麦克风语音输入授权）。

并给出相应的文字描述。

导入库文件：

#import <Speech/Speech.h>

以下为实现头文件及逻辑文件：

头文件： NSVoice2Text.h

#import <Foundation/Foundation.h>
#import <Speech/Speech.h>

NS_ASSUME_NONNULL_BEGIN

typedef NS_ENUM(NSUInteger, NSVoice2TextAuthorationStatus) {
    NSVoice2TextAuthorizationStatusNotDetermined,  //语音识别未授权
    NSVoice2TextAuthorizationStatusDenied,         //用户拒绝使用语音识别
    NSVoice2TextAuthorizationStatusRestricted,     //语音识别在这台设备上受到限制
    NSVoice2TextAuthorizationStatusAuthorized,     //可以语音识别
};


@interface NSVoiceModel : NSObject
@property (nonatomic,copy) NSString *path;

@property (nonatomic,assign) NSInteger taskId;
@end


@interface NSVoice2TextFinal : NSObject
@property (nonatomic,copy) NSString *value;

@property (nonatomic,assign) NSInteger taskId;

@property (nonatomic,copy) NSError * __nullable error;
@end


@interface NSVoice2Text : NSObject

+ (BOOL) isRunning;

//权限
+ (void)voice2TextRequestAuthorationStatus:(void (^)(NSVoice2TextAuthorationStatus status))requestBlock;

+ (void)voice2TextGotter:(NSArray <NSVoiceModel *>*)glist runningModelBlock:(void (^__nullable)(NSVoiceModel *amodel))runningModelBlock resultsBlock:(void (^)(NSVoice2TextFinal *finalValue))resultsBlock;

@end

NS_ASSUME_NONNULL_END

实现文件：NSVoice2Text.m

#import "NSVoice2Text.h"

typedef void (^VoiceConversionResultsBlock) (NSVoice2TextFinal *finalValue);

@interface NSVoiceModel ()
@property (nonatomic, copy) VoiceConversionResultsBlock voiceConversionBlock;

@property (nonatomic, copy) void (^voiceConversionRunningBlock)(NSVoiceModel *md);
@end

@implementation NSVoiceModel

@end

@implementation NSVoice2TextFinal
@end


static NSVoice2Text *v2text = nil;

@interface NSVoice2Text ()<SFSpeechRecognizerDelegate>
{
    BOOL isRunning;
    NSMutableArray <NSVoiceModel *>* taskList;
}

@property (nonatomic, assign) NSVoice2TextAuthorationStatus authorationStatus;

@property(nonatomic,strong)SFSpeechRecognizer *speechRecognizer;//语音识别器
@property(nonatomic,strong) SFSpeechURLRecognitionRequest *recognitionRequest;//语音识别请求
@property (nonatomic, strong) SFSpeechRecognitionTask *recognitionTask;//语音任务管理器

@end

@implementation NSVoice2Text
- (instancetype)init
{
    self = [super init];
    if (self)
    {
        taskList = [NSMutableArray arrayWithCapacity:0];
    }
    return self;
}

+ (instancetype)shareInstance
{
    if (!v2text)
    {
        v2text = [[NSVoice2Text alloc] init];
    }
    
    return v2text;
}


+ (void)releaseInstance
{
    if (v2text)
    {
        v2text = nil;
    }
}


- (SFSpeechRecognizer *)speechRecognizer
{
    if (_speechRecognizer == nil) {
        NSLocale *cale = [[NSLocale alloc]initWithLocaleIdentifier:@"zh-CN"];
        _speechRecognizer = [[SFSpeechRecognizer alloc]initWithLocale:cale];
        _speechRecognizer.delegate = self;
    }
    return _speechRecognizer;
}


+ (BOOL) isRunning
{
    return [NSVoice2Text shareInstance]->isRunning;
}


- (void)resume
{
    isRunning = YES;
    NSVoiceModel *md = [self->taskList firstObject];
    if (md)
    {
        if (md.voiceConversionRunningBlock)
        {
            md.voiceConversionRunningBlock(md);
        }
        if (md.path && md.path > 0)
        {
            NSString *text = @"^(http|https)+.*";
            NSPredicate *regextest = [NSPredicate predicateWithFormat:@"SELF MATCHES %@", text];
            BOOL flag = [regextest evaluateWithObject:md.path];
            if (flag)
            {
                [self startVoiceConversionWithURL:md.path];
            }
            else
            {
                [self startVoiceConversionWithFilePath:md.path];
            }
        }
        else
        {
            NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init];
            el.taskId = -1;
            el.error = [NSError errorWithDomain:@"语音路径错误或为空" code:404 userInfo:nil];
            md.voiceConversionBlock(el);
        }
    }
    else
    {
        isRunning = NO;
        [NSVoice2Text releaseInstance];
    }
}


- (void)addItToTask:(NSVoiceModel *)md
{
    [taskList addObject:md];
}


+ (void)voice2TextRequestAuthorationStatus:(void (^)(NSVoice2TextAuthorationStatus status))requestBlock
{
    //发送语音认证请求(首先要判断设备是否支持语音识别功能)
    [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status)
    {
        [[NSVoice2Text shareInstance] setAuthorationStatus:status];
        requestBlock(status);
    }];
}

+ (void)voice2TextGotter:(NSArray <NSVoiceModel *>*)glist runningModelBlock:(void (^__nullable)(NSVoiceModel *amodel))runningModelBlock resultsBlock:(void (^)(NSVoice2TextFinal *finalValue))resultsBlock
{
    [glist enumerateObjectsUsingBlock:^(NSVoiceModel * _Nonnull obj, NSUInteger idx, BOOL * _Nonnull stop) {
        [obj setVoiceConversionRunningBlock:runningModelBlock];
        [obj setVoiceConversionBlock:resultsBlock];
        [[NSVoice2Text shareInstance] addItToTask:obj];
    }];
    
    if (![NSVoice2Text shareInstance]->isRunning)
    {
        [[NSVoice2Text shareInstance] resume];
    }
}


- (void)startVoiceConversionWithFilePath:(NSString *)path
{
    self.recognitionRequest = [[SFSpeechURLRecognitionRequest alloc]initWithURL:[NSURL fileURLWithPath:path]];
    [self startVoiceConversion];
}

- (void)startVoiceConversionWithURL:(NSString *)url
{
    self.recognitionRequest = [[SFSpeechURLRecognitionRequest alloc]initWithURL:[NSURL URLWithString:url]];
    [self startVoiceConversion];
}

#pragma mark - private methods
///开始转换
- (void)startVoiceConversion
{
    __weak typeof(taskList) weakTaskList = taskList;
    __weak typeof(self) this = self;
    self.recognitionTask =  [self.speechRecognizer recognitionTaskWithRequest:self.recognitionRequest resultHandler:^(SFSpeechRecognitionResult * _Nullable result, NSError *  error){
        if (!error)
        {
            NSVoiceModel *md = [weakTaskList firstObject];
            if (result)
            {
                BOOL isFinal = [result isFinal];//是否结束
                if (isFinal)
                {
                    NSString *str = [[result bestTranscription]formattedString];
                    NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init];
                    el.taskId = md.taskId;
                    el.error = nil;
                    el.value = str;
                    md.voiceConversionBlock(el);
                    
                    [weakTaskList removeObject:md];
                    [this resume];
                }
            }
            else
            {
                NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init];
                el.taskId = md.taskId;
                el.error = error;
                md.voiceConversionBlock(el);
                
                [weakTaskList removeObject:md];
                [this resume];
            }
        }
    }];
}
@end

此实现内部已经实现了队列转文字功能，你只需要随时传入数据模型即可。

代码分析：

1、权限请求

+ (void)voice2TextRequestAuthorationStatus:(void (^)(NSVoice2TextAuthorationStatus status))requestBlock;

用于请求隐私权限，只有用户同意后方可使用此功能。否则无法使用此功能。

2、传入音频文件路径

+ (void)voice2TextGotter:(NSArray <NSVoiceModel *>*)glist 
runningModelBlock:(void (^__nullable)(NSVoiceModel *amodel))runningModelBlock 
resultsBlock:(void (^)(NSVoice2TextFinal *finalValue))resultsBlock

音频以数据模型NSVoiceModel传入，将你的音频文件与此模型实现映射关系,taskID用于实现绑定,参考头文件的定义及实现。

2.1 runningModelBlock，因为支持队列事务，所以，当前正在处理哪条，则会对外输出此条。页面上可以此显示"正在转换中"文字

2.2 resultsBlock，转换结果文字，以NSVoice2TextFinal对外输出，你只需要处理好这里面的逻辑好可。

3、完整使用：

[NSVoice2Text voice2TextRequestAuthorationStatus:^(NSVoice2TextAuthorationStatus status)
    {
        if (status == NSVoice2TextAuthorizationStatusAuthorized)
        {
            NSVoiceModel *md = [[NSVoiceModel alloc] init];
            [md setTaskId:[bmodel.messageId integerValue]];
            [md setPath:bmodel.audioFilePath];
            
            [NSVoice2Text voice2TextGotter:@[md] runningModelBlock:^(NSVoiceModel * _Nonnull amodel)
            {
                NSString *taskId = intToStr(amodel.taskId);
                //通过taskId找到对应的处理的UI，显示"正在转换中"
            }
            resultsBlock:^(NSVoice2TextFinal * _Nonnull finalValue)
            {
                if (!finalValue.error)
                {
                    NSString *taskId = intToStr(finalValue.taskId);
                    NSString *trTexgt = [finalValue value];

                    //通过taskId找到对应的处理的UI，转换完成，得到转换后的文字
                }
                else
                {
                    NSString *taskId = intToStr(finalValue.taskId);
                    //此taskId对应的语音转换失败，亦可找到对应的UI，显示"转换失败"等文字
                }
            }];
        }
        else
        {
            [weakSelf showToastMessageThenHide:@"未授权使用语音识别功能"];
        }
    }];

码农公寓

相关文章