求助语音识别代码的注释,要每一句都写明意思,谢谢
发布网友
发布时间:2022-04-25 03:40
我来回答
共1个回答
热心网友
时间:2023-10-23 18:16
这个是完整的代码,我自己的账号发不了这么长,希望好心人帮忙注释啊,非常感谢!!!
(1)端点检测部分(vad):
function [x1,x2] = vad(x)
%语音信号x幅度归一化到[-1,1]
x = double(x);
x = x / max(abs(x));
%常数设置
FrameLen = 240; %帧长度为240
FrameInc = 80; %帧移为80
amp1 = 10; %短时能量高门限10
amp2 = 2; %短时能量低门限为2
zcr1 = 10; %短时过零率高门限为10
zcr2 = 5; %短时过零率低门限为5
maxsilence =3;%静音时间门限3*10ms= 30ms
minlen= 15;%最小语音时间长度15*10ms = 150ms
status= 0; %
count= 0; %语音时间累计
silence = 0; %静音时间累计
%计算过零率
tmp1= enframe(x(1:end-1), FrameLen, FrameInc);
tmp2= enframe(x(2:end), FrameLen, FrameInc);
signs =(tmp1.*tmp2)<0;%符号数组,用于存储相邻两个采样点符号是否相同,即是否穿越0电平
diffs = (tmp1-tmp2)>0.02;%度量相邻两个采样点之间距离,如果大于门限0.02(经验值),则1,否则0
zcr = sum(signs.*diffs,2);%过零率
%计算短时能量
amp =sum(abs(enframe(filter([1 -0.9375], 1, x), FrameLen, FrameInc)), 2);
%调整能量门限
amp1 = min(amp1,max(amp)/4);
amp2 = min(amp2,max(amp)/8);
%开始端点检测
x1 = 0;
x2 = 0;
for n=1:length(zcr)
goto = 0;
switch status
case {0,1}% 0 =静音, 1 =可能开始
if amp(n) > amp1%确信进入语音段
x1 = max(n-count-1,1);
status= 2;
silence = 0;
count= count + 1;
elseif amp(n) > amp2 || ... %可能处于语音段
zcr(n) > zcr2
status = 1;
count= count + 1;
else%静音状态
status= 0;
count= 0;
end
case 2,% 2 =语音段
if amp(n) > amp2 || ...%保持在语音段
zcr(n) > zcr2
count = count + 1;
else%语音将结束
silence = silence+1;
if silence < maxsilence %静音还不够长,尚未结束
count= count + 1;
elseif count < minlen%语音长度太短,认为是噪声
status= 0;
silence = 0;
count= 0;
else%语音结束
status= 3;
end
end
case 3,
break;
end
end
count = count-silence/2;
x2 = x1 + count -1;
subplot(311)
plot(x)
axis([1 length(x) -1 1])
xlabel('语音信号');
line([x1*FrameIncx1*FrameInc ],[-1,1],'Color','red');
line([x2*FrameIncx2*FrameInc ],[-1,1],'Color','red');
subplot(312)
plot(amp);
axis([1 length(amp) 0max(amp)])
xlabel('短时能量');
line([x1,x1],[min(amp),max(amp)],'Color','red');
line([x2,x2],[min(amp),max(amp)],'Color','red');
subplot(313)
plot(zcr);
axis([1 length(zcr) 0max(zcr)])
xlabel('过零率');
line([x1,x1],[min(zcr),max(zcr)],'Color','red');
line([x2,x2],[min(zcr),max(zcr)],'Color','red');
(2)MFCC部分:
function ccc = mfcc(x)
%归一化mel滤波器组系数
bank=melbankm(24,256,8000,0,0.5,'m');%24滤波器个数,8000采样频率
bank=full(bank);
bank=bank/max(bank(:));
% DCT系数,12*24
for k=1:12
n=0:23;
dctcoef(k,:)=cos((2*n+1)*k*pi/(2*24));
end
%归一化倒谱提升窗口
w = 1 + 6 * sin(pi *(1:12) ./ 12);
w = w/max(w);
%预加重滤波器
xx=double(x);
xx=filter([1-0.9375],1,xx);
%语音信号分帧,xx是输入语音信号;256是帧长;80是帧移
xx=enframe(xx,256,80);
%计算每帧的MFCC参数
for i=1:size(xx,1)
y = xx(i,:);
s = y' .* hamming(256);%加汉明窗
t = abs(fft(s));%fft变换
t = t.^2;
c1=dctcoef * log(bank * t(1:129));
c2 = c1.*w';
m(i,:)=c2';
end
%差分系数
dtm = zeros(size(m));
for i=3:size(m,1)-2
dtm(i,:) = -2*m(i-2,:) - m(i-1,:) + m(i+1,:)+ 2*m(i+2,:);
end
dtm = dtm / 3;
%合并mfcc参数和一阶差分mfcc参数
ccc = [m dtm];
%去除首尾两帧,因为这两帧的一阶差分参数为0
ccc =ccc(3:size(m,1)-2,:);
(3)dtw计算部分:
function dist = dtw2(test, ref)
global x y_min y_max
global t r
global D d
global m n
t = test;
r = ref;
n = size(t,1);
m = size(r,1);
d = zeros(m,1);
D =ones(m,1) *realmax;
D(1) = 0;
%如果两个模板长度相差过多,匹配失败
if (2*m-n<3) || (2*n-m<2)
dist =realmax;
return
end
%计算匹配区域
xa = round((2*m-n)/3);
xb = round((2*n-m)*2/3);
if xb>xa
%xb>xa,按下面三个区域匹配
%1:xa
%xa+1:xb
%xb+1:N
for x =1:xa
y_max= 2*x;
y_min= round(0.5*x);
warp
end
for x =(xa+1):xb
y_max= round(0.5*(x-n)+m);
y_min= round(0.5*x);
warp
end
for x =(xb+1):n
y_max= round(0.5*(x-n)+m);
y_min= round(2*(x-n)+m);
warp
end
elseif xa>xb
%xa>xb,按下面三个区域匹配
%0:xb
%xb+1:xa
%xa+1:N
for x =1:xb
y_max= 2*x;
y_min= round(0.5*x);
warp
end
for x =(xb+1):xa
y_max= 2*x;
y_min= round(2*(x-n)+m);
warp
end
for x =(xa+1):n
y_max= round(0.5*(x-n)+m);
y_min= round(2*(x-n)+m);
warp
end
elseif xa==xb
%xa=xb,按下面两个区域匹配
%0:xa
%xa+1:N
for x =1:xa
y_max= 2*x;
y_min= round(0.5*x);
warp
end
for x =(xa+1):n
y_max= round(0.5*(x-n)+m);
y_min= round(2*(x-n)+m);
warp
end
end
%返回匹配分数
dist = D(m);
function warp
global x y_min y_max
global t r
global D d
global m n
d = D;
for y = y_min:y_max
D1 = D(y);
if y>1
D2= D(y-1);
else
D2 =realmax;
end
if y>2
D3= D(y-2);
else
D3 = realmax;
end
d(y) =sum((t(x,:)-r(y,:)).^2) + min([D1,D2,D3]);
end
D = d;
(4)测试函数testdtw部分;
disp('正在计算参考模板的参数...')
for i=1:10
fname = sprintf('G:\\石东东\\语音\\%da.wav',i-1);
x = wavread(fname);
[x1 x2] = vad(x);
m = mfcc(x);
m = m(x1-2:x2-2,:);
ref(i).mfcc = m;
end
disp('正在计算测试模板的参数...')
for i=1:10
fname = sprintf('G:\\石东东\\语音\\%db.wav',i-1);
x = wavread(fname);
[x1 x2] = vad(x);
m = mfcc(x);
m = m(x1-2:x2-2,:);
test(i).mfcc = m;
end
disp('正在进行模板匹配...')
dist = zeros(10,10);
for i=1:10
for j=1:10
dist(i,j) = dtw2(test(i).mfcc, ref(j).mfcc);
end
end
disp('正在计算匹配结果...')
for i=1:10
[d,j] = min(dist(i,:));
fprintf('测试模板%d的识别结果为:%d\n', i-1, j-1);
end