1 模型
采用能够反映人对语音的感知特性的Mel频率倒谱系数(MFCC)作为特征参数,以及为避免时间规整问题采用矢量量化技术开发的说话人识别系统.MFCC主要的是模拟人耳的听觉过程,相对于其它参数它对语音波形的变化不敏感,更加稳定,系统取得很好的识别结果,实验表明系统训练和识别的计算量和存储量都比较低.
2 部分代码
function varargout = Main(varargin) % MAIN M-file for Main.fig % MAIN, by itself, creates a new MAIN or raises the existing % singleton*. % % H = MAIN returns the handle to a new MAIN or the handle to % the existing singleton*. % % MAIN('CALLBACK',hObject,eventData,handles,...) calls the local % function named CALLBACK in MAIN.M with the given input arguments. % % MAIN('Property','Value',...) creates a new MAIN or raises the % existing singleton*. Starting from the left, property value pairs are % applied to the GUI before Main_OpeningFcn gets called. An % unrecognized property name or invalid value makes property application % stop. All inputs are passed to Main_OpeningFcn via varargin. % % *See GUI Options on GUIDE's Tools menu. Choose "GUI allows only one % instance to run (singleton)". % % See also: GUIDE, GUIDATA, GUIHANDLES % Edit the above text to modify the response to help Main % Last Modified by GUIDE v2.5 11-Aug-2016 00:35:18 % Begin initialization code - DO NOT EDIT gui_Singleton = 1; gui_State = struct('gui_Name', mfilename, ... 'gui_Singleton', gui_Singleton, ... 'gui_OpeningFcn', @Main_OpeningFcn, ... 'gui_OutputFcn', @Main_OutputFcn, ... 'gui_LayoutFcn', [] , ... 'gui_Callback', []); if nargin && ischar(varargin{1}) gui_State.gui_Callback = str2func(varargin{1}); end if nargout [varargout{1:nargout}] = gui_mainfcn(gui_State, varargin{:}); else gui_mainfcn(gui_State, varargin{:}); end % End initialization code - DO NOT EDIT % --- Executes just before Main is made visible. function Main_OpeningFcn(hObject, eventdata, handles, varargin) % This function has no output args, see OutputFcn. % hObject handle to figure % eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) % varargin command line arguments to Main (see VARARGIN) % Choose default command line output for Main handles.output = hObject; % Update handles structure guidata(hObject, handles); % UIWAIT makes Main wait for user response (see UIRESUME) % uiwait(handles.figure1); load TrainingSet; load TrainingLable; [totalSampl,q]=size(TrainingSet); str=num2str(tabulate(TrainingLable)); set(handles.totalrecords,'String',strcat(str)); set(handles.resultText,'String',strcat('Total Samples: ',num2str(totalSampl))); % --- Outputs from this function are returned to the command line. function varargout = Main_OutputFcn(hObject, eventdata, handles) % varargout cell array for returning output args (see VARARGOUT); % hObject handle to figure % eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) % Get default command line output from handles structure varargout{1} = handles.output; % --- Executes on button press in trainBtn. function trainBtn_Callback(hObject, eventdata, handles) % hObject handle to trainBtn (see GCBO) % eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) clc; % clear all; % close all; set(handles.statusText,'String','Start Speaking...'); pause(0.001); Fs = 8000; % Sampling Freq (Hz) %%Duration = 2; % Duration (sec) %%audio_rec_obj = audiorecorder(Fs, 16, 1); % get(audio_rec_obj); % Record your voice for Duration seconds. myRecording = wavrecord(2*Fs,Fs); %%recordblocking(audio_rec_obj, Duration); % disp('End of Recording.'); set(handles.statusText,'String','Saving....'); pause(0.001); % Play back the recording. %%play(audio_rec_obj); % Store data in double-precision array. %%myRecording = getaudiodata(audio_rec_obj); % Plot the waveform. % figure, %plot(myRecording); %grid on; % title('Input Signal'); %xlabel('Samples'); %ylabel('Magnitude(db)'); %pre-empasis or high pass filter Prem=0.97; Filtered_output=filter([1,-Prem],1,myRecording); %sound(Filtered_output); wavwrite(Filtered_output, Fs, 16,'RAW'); wavplay(Filtered_output,Fs); % figure, %plot(Filtered_output); %grid on; % title('Pre-empasis Signal/Filtered Signal'); %xlabel('Samples'); %ylabel('Magnitude(db)'); len=length(Filtered_output); Frame_size = Fs*32/1000; %200 (sample points) Frame_overlap = Fs*16/1000; %120 (sample points) Frame_step = Frame_size-Frame_overlap; % 80 (sample points) Frame_rate = round(Fs/Frame_step)+1; %100; frames/sec Fft_size=Frame_size; numFrames=length(Filtered_output)/Frame_step; %padd the zeros for equal frame length for i=1:numFrames*Frame_size paddesSignal(i,:)=0; end %get orignal signal for n=1:len paddesSignal(n,:) = Filtered_output(n,:); end %frame blocking or farming for i=1:numFrames for n=1:Frame_size fdata(i,n)=paddesSignal(i*Frame_step+n,:); end end %% (2) Windowing.. frameSize = size(fdata); nbFrames = frameSize(1); nbSamples = frameSize(2); % Hamming window.. w = hamming(nbSamples); afterWindow = zeros(nbFrames,nbSamples); for i = 1:nbFrames singleFrame = fdata(i,1:nbSamples); afterWindow(i, 1:nbSamples) = w'.*singleFrame; end % figure, %plot(afterWindow); %grid on; %xlabel('Samples'); %ylabel('Magnitude(db)'); % title('Windowing graph'); %ylabel('Magnitude(db)'); % title('mfcc normalized freq graph'); % disp('done feature extraction '); set(handles.statusText,'String','Input Saved in .wav file format'); pause(0.001); % %get size of train variable %%%try %%%load TrainingSet; %%%load TrainingLable; %%%catch er %%%TrainingSet=[]; %%%TrainingLable=[]; %%%disp('created new training'); %%%end %%%[featuresCnt,Samples]=size(TrainingSet); %%%TrainingSet(featuresCnt+1,:)=meanMFCC; %craete training matrix %create lables for features from user input inputLable=input('Press any key ', 's'); %%%TrainingLable(featuresCnt+1)=str2num(inputLable); disp('Select saved input through "Train with Audio" for Feature Extraction'); %store training and labels in .mat files for classifier training %%%try %%%save('TrainingSet','TrainingSet'); %%%save('TrainingLable','TrainingLable'); %%%set(handles.statusText,'String','Done with Training and Saved'); %%%pause(0.001); %%%catch ers %%%disp('Unable to save training set try again'); %%%end % --- Executes on button press in testBtn. function testBtn_Callback(hObject, eventdata, handles) % hObject handle to testBtn (see GCBO) % eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) clc; set(handles.statusText,'String','Start Speaking...'); Fs = 8000; % Sampling Freq (Hz) %%Duration = 2; % Duration (sec) %%audio_rec_obj = audiorecorder(Fs, 16, 1); % get(audio_rec_obj); % Record your voice for Duration seconds. % disp('Start speaking.') myRecording =audiorecorder(2*Fs,Fs); pause(0.01); set(handles.outputText,'String','--'); %%recordblocking(audio_rec_obj, Duration); % disp('End of Recording.'); set(handles.statusText,'String','Stop Speaking'); pause(0.001); % Play back the recording. %%play(audio_rec_obj); % Store data in double-precision array. %%myRecording = getaudiodata(audio_rec_obj); % Plot the waveform. % figure, axes(handles.axes1); plot(myRecording); grid on; %title('Input Signal'); xlabel('Samples'); ylabel('Magnitude(db)'); set(handles.statusText,'String','Done with Recording...'); pause(0.001); %pre-empasis or high pass filter Prem=0.97; Filtered_output=filter([1,-Prem],1,myRecording); sound(Filtered_output); % figure, axes(handles.axes2); plot(Filtered_output); grid on; %title('Pre-empasis Signal/Filtered Signal'); xlabel('Samples'); ylabel('Magnitude(db)'); len=length(Filtered_output); Frame_size = Fs*32/1000; %200 (sample points) Frame_overlap = Fs*16/1000; %120 (sample points) Frame_step = Frame_size-Frame_overlap; % 80 (sample points) Frame_rate = round(Fs/Frame_step)+1; %100; frames/sec Fft_size=Frame_size; numFrames=length(Filtered_output)/Frame_step; %padd the zeros for equal frame length for i=1:numFrames*Frame_size paddesSignal(i,:)=0; end %get orignal signal for n=1:len paddesSignal(n,:) = Filtered_output(n,:); end %frame blocking or farming for i=1:numFrames for n=1:Frame_size fdata(i,n)=paddesSignal(i*Frame_step+n,:); end end %% (2) Windowing.. frameSize = size(fdata); nbFrames = frameSize(1); nbSamples = frameSize(2); % Hamming window.. w = hamming(nbSamples); afterWindow = zeros(nbFrames,nbSamples); for i = 1:nbFrames singleFrame = fdata(i,1:nbSamples); afterWindow(i, 1:nbSamples) = w'.*singleFrame; end % figure, axes(handles.axes3); plot(afterWindow); grid on; xlabel('Samples'); ylabel('Magnitude(db)'); %title('Windowing graph'); Tw = 25; % analysis frame duration (ms) Ts = 10; % analysis frame shift (ms) alpha = 0.97; % preemphasis coefficient R = [ 300 3700 ]; % frequency range to consider M = 20; % number of filterbank channels N = 13; % number of cepstral coefficients L = 22; nfft = 2^nextpow2( nbFrames ); % length of FFT analysis K = nfft/2+1; % length of the unique part of the FFT %% HANDY INLINE FUNCTION HANDLES % Forward and backward mel frequency warping. % Note that base 10 is used in [1], while base e is used here and in HTK code hz2mel = @( hz )( 1127*log(1+hz/700) ); % Hertz to mel warping function mel2hz = @( mel )( 700*exp(mel/1127)-700 ); % mel to Hertz warping function % Type III DCT matrix routine dctm = @( N, M )( sqrt(2.0/M) * cos( repmat([0:N-1].',1,M).* repmat(pi*([1:M]-0.5)/M,N,1) ) ); % Cepstral lifter routine ceplifter = @( N, L )( 1+0.5*L*sin(pi*[0:N-1]/L) ); MAG = abs( fft(afterWindow,nfft,1) ); % figure, % plot(MAG); % title('fft magnitude garaph'); % Triangular filterbank with uniformly spaced filters on mel scale H = trifbank( M, K, R, Fs, hz2mel, mel2hz ); % size of H is M x K % Filterbank application to unique part of the magnitude spectrum FBE = H * MAG(1:K,:); % FBE( FBE<1.0 ) = 1.0; % apply mel floor % DCT matrix computation temp = dctm( N, M ); % Conversion of logFBEs to cepstral coefficients through DCT CC = temp * log( FBE ); % Cepstral lifter computation lifter = ceplifter( N, L ); % Cepstral liftering gives liftered cepstral coefficients CC = diag( lifter ) * CC; % ~ HTK's MFCCs %%%%%%%%%%%%%%%%%%%% training %%%%%%%%%%%%%%%%%%% %to train the classifier normalize the values by taking the mean of CC; meanMFCC=mean(CC); %mean of CC 1xN % plot(CC) % figure, axes(handles.axes4); plot(meanMFCC); grid on; %title('mfcc normalized freq graph'); xlabel('Samples'); ylabel('Magnitude(db)'); set(handles.statusText,'String','Done'); % disp('done feature extraction '); % %get size of train variable % try % load TrainingSet; % load TrainingLable; % catch er % TrainingSet=[]; % TrainingLable=[]; % disp('created new training'); % end % % [featuresCnt,Samples]=size(TrainingSet); % TrainingSet(featuresCnt+1,:)=meanMFCC; %craete training matrix % % %create lables for features from user input % inputLable=input('Type the language lable (e.g. 1 for Marathi, 0 for English): ', 's'); % % TrainingLable(featuresCnt+1)=str2num(inputLable); % disp('done feature extraction'); % %store training and labels in .mat files for classifier training % try % save('TrainingSet','TrainingSet'); % save('TrainingLable','TrainingLable'); % catch ers % disp('Unable to save training set try again'); % end clc; testData=meanMFCC; %call svm training function load Traininglable; load TrainingSet; % svmStruct = svmtrain(TrainingSet,TrainingLable','showplot',false); % classes = svmclassify(svmStruct,testData,'showplot',false); classes = multisvm(TrainingSet, TrainingLable', testData) % disp('Done training'); set(handles.outputText,'String','--'); if(classes==1) set(handles.outputText,'String','English'); end if(classes==2) set(handles.outputText,'String','Marathi'); end if(classes==3) set(handles.outputText,'String','Hindi'); end % Conversion of logFBEs to cepstral coefficients through DCT CC = DCT * log( FBE ); % Cepstral lifter computation lifter = ceplifter( N, L ); % Cepstral liftering gives liftered cepstral coefficients CC = diag( lifter ) * CC; % ~ HTK's MFCCs %%%%%%%%%%%%%%%%%%%% training %%%%%%%%%%%%%%%%%%% %to train the classifier normalize the values by taking the mean of CC; meanMFCC=mean(CC); %mean of CC 1xN set(handles.statusText,'String','Done Feature extraction'); axes(handles.axes4); plot(meanMFCC); grid on; %title('mfcc normalized freq graph'); xlabel('Samples'); ylabel('Magnitude(db)'); set(handles.statusText,'String','Done feature extraction'); clc; testData=meanMFCC; %call svm training function load Traininglable; load TrainingSet; % svmStruct = svmtrain(TrainingSet,TrainingLable','showplot',false); % classes = svmclassify(svmStruct,testData,'showplot',false); classes = multisvm(TrainingSet, TrainingLable', testData); % disp('Done training'); set(handles.outputText,'String','--'); if(classes==1) set(handles.outputText,'String','English'); end if(classes==2) set(handles.outputText,'String','Marathi'); end if(classes==3) set(handles.outputText,'String','Hindi'); end function edit1_Callback(hObject, eventdata, handles) % hObject handle to edit1 (see GCBO) % eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) % Hints: get(hObject,'String') returns contents of edit1 as text % str2double(get(hObject,'String')) returns contents of edit1 as a double % --- Executes during object creation, after setting all properties. function edit1_CreateFcn(hObject, eventdata, handles) % hObject handle to edit1 (see GCBO) % eventdata reserved - to be defined in a future version of MATLAB % handles empty - handles not created until after all CreateFcns called % Hint: edit controls usually have a white background on Windows. % See ISPC and COMPUTER. if ispc && isequal(get(hObject,'BackgroundColor'), get(0,'defaultUicontrolBackgroundColor')) set(hObject,'BackgroundColor','white'); end % -------------------------------------------------------------------- function uipanel1_ButtonDownFcn(hObject, eventdata, handles) % hObject handle to uipanel1 (see GCBO) % eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) % --- Executes on mouse press over figure background. function figure1_ButtonDownFcn(hObject, eventdata, handles) % hObject handle to figure1 (see GCBO) % eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) % --- If Enable == 'on', executes on mouse press in 5 pixel border. % --- Otherwise, executes on mouse press in 5 pixel border or over trainWithFilebtn. function trainWithFilebtn_ButtonDownFcn(hObject, eventdata, handles) % hObject handle to trainWithFilebtn (see GCBO) % eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) % --- Executes when figure1 is resized. function figure1_ResizeFcn(hObject, eventdata, handles) % hObject handle to figure1 (see GCBO) % eventdata reserved - to be defined in a future version of MATLAB % handles structure with handles and user data (see GUIDATA) % --- Executes on key press with focus on testWithAudioBtn and none of its controls. function testWithAudioBtn_KeyPressFcn(hObject, eventdata, handles) % hObject handle to testWithAudioBtn (see GCBO) % eventdata structure with the following fields (see UICONTROL) %Key: name of the key that was pressed, in lower case %Character: character interpretation of the key(s) that was pressed %Modifier: name(s) of the modifier key(s) (i.e., control, shift) pressed % handles structure with handles and user data (see GUIDATA)
3 仿真结果
4 参考文献
[1]王伟, and 邓辉文. "基于MFCC参数和VQ的说话人识别系统." 第四届全国信息获取与处理学术会议 0.