c#调用开源espeak语音引擎实现中英文混读

(原创)c#调用开源espeak语音引擎实现中英文混读

需求

实际.net项目中需要用到TTS(文本转语音技术),实现语音播服务,需要中英文混读

方案

1,微软TTS语音引擎。
调用非常简单 ,也是我首先采用的方案,可参考如下微博:
https://www.cnblogs.com/JuneZhang/archive/2014/04/28/3696594.html
但有个缺点:依赖windows平台的语音引擎与Speech Api,使用过程中会发现:有的系统能用,有的系统用不了(据说是阉割版系统所致);非中文版系统更惨,需要各种操作安装中文语音包,最后安装好了,可能发现还是用不了(项目中血淋淋的教训:windows server英文版系统,各种折腾最后用不了,甲方服务器,又不能随便重装系统),于是替代方案。

2,espeak方案
espeak是c语言写的一个小型的、开放源码的语音合成系统,支持多种语言,这里包括汉语,甚至是粤语等。
C++版开发教程可参考如下微博:
http://www.cnblogs.com/luochengor/p/3511165.html
http://cool.worm.blog.163.com/blog/static/6433900620097535713944/
上github找到了一个C#版本的espeak-ng调用demo,网址如下:
https://github.com/parhamdoustdar/espeak-ng-wrapper
没看懂espeak-ng,最后采用官网原生的espeak-1.48.04-source,并以github上c#框架进行改造,实现了c#调用开源espeak语音引擎TTS,摆脱了windows平台的语音引擎与Speech Api,实现了中英文混读的C#espeak方案。主要代码如下:

c#引用espeak_lib.dll中的函数

[DllImport("espeak_lib.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
static extern Error espeak_SetVoiceByName([MarshalAs(UnmanagedType.LPUTF8Str)] string name);

[DllImport("espeak_lib.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
static extern Error espeak_SetParameter(Parameter parameter, int value, ParameterType type);

[DllImport("espeak_lib.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
static extern Error espeak_Synchronize();

[DllImport("espeak_lib.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
static extern Error espeak_Terminate();

[DllImport("espeak_lib.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
static extern IntPtr espeak_GetCurrentVoice();

[DllImport("espeak_lib.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
static extern Error espeak_Synth([MarshalAs(UnmanagedType.LPUTF8Str)] string text, int size, uint startPosition = 0, PositionType positionType = PositionType.Character, uint endPosition = 0, SpeechFlags flags = SpeechFlags.CharsUtf8, UIntPtr uniqueIdentifier = default(UIntPtr), IntPtr userData = default(IntPtr));

[DllImport("espeak_lib.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
static extern int espeak_Initialize(AudioOutput output, int bufferLength, IntPtr path, int options);

[DllImport("espeak_lib.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
static extern Error espeak_Cancel();

[DllImport("espeak_lib.dll", CharSet = CharSet.Auto, CallingConvention = CallingConvention.Cdecl)]
static extern void espeak_SetSynthCallback(EventHandler.SynthCallback callback);

c#对espeak_lib.dll函数进行封装

  public static void Initialize(string path)
        {
            IntPtr ptr = Marshal.StringToHGlobalAnsi(path);
            var result = espeak_Initialize(AudioOutput.Synchronous,100, ptr, 0);
            // 释放 ptr 的内存
            Marshal.FreeHGlobal(ptr);
            if (result == (int)Error.EE_INTERNAL_ERROR)
            {
                throw new Exception(string.Format("Could not initialize ESpeak. Maybe there is no espeak data at {0}?", path));
            }

            espeak_SetSynthCallback(EventHandler.Handle);

            Initialized = true;
        }

        public static bool SetRate(int rate)
        {
            if (rate < 80 && rate > 450)
            {
                throw new Exception("The rate must be between 80 and 450.");
            }

            var result = espeak_SetParameter(Parameter.Rate, rate, ParameterType.Absolute);
            return CheckResult(result);
        }

        static bool CheckResult(Error result)
        {
            if (result == Error.EE_OK)
            {
                return true;
            }
            else if (result == Error.EE_BUFFER_FULL)
            {
                return false;
            }
            else if (result == Error.EE_INTERNAL_ERROR)
            {
                throw new Exception("Internal error in ESpeak.");
            }
            else
            {
                return false;
            }
        }

        public static bool Speak(string text)
        {
            var result = espeak_Synth(text, text.Length * Marshal.SystemDefaultCharSize);
            return CheckResult(result);
        }

        public static bool SpeakSSML(string text)
        {
            var result = espeak_Synth(text, text.Length * Marshal.SystemDefaultCharSize, 0, PositionType.Character, 0, SpeechFlags.CharsUtf8 | SpeechFlags.SSML);
            return CheckResult(result);
        }

        public static bool Stop()
        {
            var result = espeak_Cancel();
            return CheckResult(result);
        }

        public static bool Espeak_Synchronize()
        {
            var result = espeak_Synchronize();
            return CheckResult(result);
        }

        public static bool Espeak_Terminate()
        {
            var result = espeak_Terminate();
            return CheckResult(result);
        }

        public static bool SetVoiceByName(string name)
        {
            var result = espeak_SetVoiceByName(name);
            return CheckResult(result);
        }

        public static Voice GetCurrentVoice()
        {
            IntPtr espeakVoicePtr = espeak_GetCurrentVoice();
            ESpeakVoice espeakVoice = (ESpeakVoice)Marshal.PtrToStructure(espeakVoicePtr, typeof(ESpeakVoice));

            if (espeakVoice.Equals(default(ESpeakVoice)))
            {
                throw new Exception("eSpeak returned an empty voice object. Did you call one of the ESpeak.SetVoice*() functions?");
            }

            return new Voice()
            {
                Name = espeakVoice.Name,
                Languages = espeakVoice.Languages.Substring(1),
                Priority = (int)espeakVoice.Languages[0],
                Identifier = espeakVoice.Identifier,
            };
        }

c#控制台程序调用发声

 class Program
    {
        static void Main(string[] args)
        {
            Client.Initialize("." );
            Client.SetVoiceByName("zh+m10");
            Client.Speak("Helloworld!");
            // 等待发音结束
            Client.Espeak_Synchronize();
            //Client.Espeak_Terminate();
            //Thread.Sleep(5000);
            Client.Stop();
            Client.Speak("你好。Hello world");
            Client.Espeak_Synchronize();
            Client.Espeak_Terminate();
            Console.Read();
        }

        
    }

源码Github地址

因该模块开发主要得益于开源,故将源码开源:

https://github.com/mastertostar/Csharp-espeak-tts

上一篇:java中如何判断字符串是否乱码


下一篇:Mysql学习随笔--4