"Fossies" - the Fresh Open Source Software Archive 
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Authors: Paul Taylor and Simon King */
34 /* Date : March 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* Functions operating on a single frame */
37 /* */
38 /*=======================================================================*/
39
40 #include "sigpr/EST_sigpr_frame.h"
41 #include "sigpr/EST_fft.h"
42 #include "EST_inline_utils.h"
43 #include "EST_math.h"
44 #include "EST_error.h"
45 #include "EST_TBuffer.h"
46
47 #define ALMOST1 0.99999
48 #define MAX_ABS_CEPS 4.0
49
50 float Hz2Mel(float frequency_in_Hertz);
51 float Mel2Hz(float frequency_in_Mel);
52
53 float lpredict(float *adc, int wsize,
54 float *acf, float *ref, float *lpc,
55 int order);
56 float ogi_lpc(float *adc, int wsize, int order,
57 float *acf, float *ref, float *lpc);
58 /*
59 void lpc2ref(float const *a, float *b, int c)
60 {
61 (void) a;
62 (void) b;
63 (void) c;
64 }
65 */
66
67 void convert2lpc(const EST_FVector &in_frame, const EST_String &in_type,
68 EST_FVector &out_frame)
69 {
70 if (in_type == "sig")
71 sig2lpc(in_frame, out_frame);
72 else if (in_type == "lsf")
73 lsf2lpc(in_frame, out_frame);
74 else if (in_type == "ref")
75 ref2lpc(in_frame, out_frame);
76 else
77 EST_error("Cannot convert coefficient type %s to lpc\n",
78 (const char *) in_type);
79 }
80
81 void convert2ref(const EST_FVector &in_frame, const EST_String &in_type,
82 EST_FVector &out_frame)
83 {
84 EST_FVector tmp;
85
86 if (in_type == "lpc")
87 lpc2ref(in_frame, out_frame);
88 else if (in_type == "sig")
89 {
90 tmp.resize(out_frame.length());
91 sig2lpc(in_frame, tmp);
92 lpc2ref(tmp, out_frame);
93 }
94 else if (in_type == "lsf")
95 {
96 tmp.resize(out_frame.length());
97 lsf2lpc(in_frame, tmp);
98 lpc2ref(tmp, out_frame);
99 }
100 else
101 EST_error("Cannot convert coefficient type %s to reflection coefs\n",
102 (const char *) in_type);
103 }
104
105 void convert2area(const EST_FVector &in_frame, const EST_String &in_type,
106 EST_FVector &out_frame)
107 {
108 EST_FVector tmp;
109
110 if (in_type == "lpc")
111 lpc2ref(in_frame, out_frame);
112 else if (in_type == "sig")
113 {
114 tmp.resize(out_frame.length());
115 sig2lpc(in_frame, tmp);
116 lpc2ref(tmp, out_frame);
117 }
118 else if (in_type == "lsf")
119 {
120 tmp.resize(out_frame.length());
121 lsf2lpc(in_frame, tmp);
122 lpc2ref(tmp, out_frame);
123 }
124 else
125 EST_error("Cannot convert coefficient type %s to reflection coefs\n",
126 (const char *) in_type);
127 }
128
129 void convert2cep(const EST_FVector &in_frame, const EST_String &in_type,
130 EST_FVector &out_frame)
131 {
132 EST_FVector tmp;
133
134 if (in_type == "lpc")
135 lpc2cep(in_frame, out_frame);
136 else if (in_type == "sig")
137 {
138 tmp.resize(out_frame.length());
139 sig2lpc(in_frame, tmp);
140 lpc2cep(tmp, out_frame);
141 }
142 else if (in_type == "lsf")
143 {
144 tmp.resize(out_frame.length());
145 lsf2lpc(in_frame, tmp);
146 lpc2cep(tmp, out_frame);
147 }
148 else if (in_type == "ref")
149 {
150 tmp.resize(out_frame.length());
151 ref2lpc(in_frame, tmp);
152 lpc2cep(tmp, out_frame);
153 }
154 else
155 EST_error("Cannot convert coefficient type %s to cepstrum coefs\n",
156 (const char *) in_type);
157 }
158
159 // void convert2melcep(const EST_FVector &in_frame, const EST_String &in_type,
160 // EST_FVector &out_frame, int num_mfccs, int fbank_order,
161 // float liftering_parameter)
162 // {
163 // EST_FVector tmp;
164
165 // if (in_type == "fbank")
166 // // fbank2melcep(in_frame, out_frame);
167 // return;
168 // else if (in_type == "sig")
169 // {
170 // tmp.resize(out_frame.length());
171 // // incomplete !
172 // // sig2melcep(in_frame, outframe,num_mfccs,fbank_order,liftering_parameter);
173 // }
174 // else
175 // EST_error("Cannot convert coefficient type %s to Mel cepstrum coefs\n",
176 // (const char *) in_type);
177 // }
178
179
180 void convert2lsf(const EST_FVector &in_frame, const EST_String &in_type,
181 EST_FVector &out_frame)
182 {
183 EST_FVector tmp;
184
185 if (in_type == "lpc")
186 lpc2lsf(in_frame, out_frame);
187 else if (in_type == "sig")
188 {
189 tmp.resize(out_frame.length());
190 sig2lpc(in_frame, tmp);
191 lpc2lsf(tmp, out_frame);
192 }
193 else if (in_type == "ref")
194 {
195 tmp.resize(out_frame.length());
196 ref2lpc(in_frame, tmp);
197 lpc2lsf(tmp, out_frame);
198 }
199 else
200 EST_error("Cannot convert coefficient type %s to reflection coefs\n",
201 (const char *) in_type);
202 }
203
204 void frame_convert(const EST_FVector &in_frame, const EST_String &in_type,
205 EST_FVector &out_frame, const EST_String &out_type)
206 {
207 if (out_type == "lpc")
208 convert2lpc(in_frame, in_type, out_frame);
209 else if (out_type == "lsf")
210 convert2lsf(in_frame, in_type, out_frame);
211 else if (out_type == "ref")
212 convert2ref(in_frame, in_type, out_frame);
213 else if (out_type == "cep")
214 convert2cep(in_frame, in_type, out_frame);
215 else if (out_type == "area")
216 convert2area(in_frame, in_type, out_frame);
217 else
218 EST_error("Cannot convert coefficients to type %s\n",
219 (const char *) out_type);
220 }
221
222 void sig2lpc(const EST_FVector &sig, EST_FVector &acf,
223 EST_FVector &ref, EST_FVector &xlpc);
224
225 float lpredict2(EST_FVector &adc, int wsize,
226 EST_FVector &acf, float *ref, float *lpc,
227 int order);
228
229 void sig2lpc(const EST_FVector &sig, EST_FVector &lpc)
230 {
231 EST_FVector acf(lpc.length()), ref(lpc.length());
232
233 /* float *fadc, *facf, *fref, *flpc;
234
235 fadc = new float[sig.length()];
236 facf = new float[lpc.length()];
237 fref = new float[lpc.length()];
238 flpc = new float[lpc.length()];
239
240 // for (int i = 0; i < sig.length(); ++i)
241 // adc[i] = sig(i);
242
243 lpredict2(sig, sig.length(), acf, fref, flpc, lpc.length()-1);
244
245 for (int i = 0; i < lpc.length(); ++i)
246 lpc.a(i) = flpc[i];
247 */
248 sig2lpc(sig, acf, ref, lpc);
249 }
250
251 void sig2ref(const EST_FVector &sig, EST_FVector &ref)
252 {
253 (void)sig;
254 EST_FVector acf(ref.length()), lpc(ref.length());
255
256 // sig2lpc(sig, acf, ref, lpc);
257 }
258
259 void ref2area(const EST_FVector &ref, EST_FVector &area)
260 {
261 for (int i = 1; i < ref.length(); i++)
262 area[i] = (1.0 - ref(i)) / (1.0 + ref(i));
263 }
264
265 void ref2logarea(const EST_FVector &ref, EST_FVector &logarea)
266 {
267 int order = ref.length() -1;
268
269 for(int i = 1; i <= order; i++)
270 {
271 if (ref(i) > ALMOST1)
272 logarea[i] = log((1.0 - ALMOST1) / (1.0 + ALMOST1));
273 else if (ref(i) < -ALMOST1)
274 logarea[i] = log((1.0 + ALMOST1)/(1.0 - ALMOST1));
275 else
276 logarea[i] = log((1.0 - ref(i)) / (1.0 + ref(i)));
277 }
278 }
279
280 void ref2truearea(const EST_FVector &ref, EST_FVector &area)
281 {
282 int order = ref.length() -1;
283
284 area[1] = (1.0 - ref(1)) / (1.0 + ref(1));
285 for(int i = 2; i <= order; i++)
286 area[i] = area[i - 1] * (1.0 - ref(i)) / (1.0 + ref(i));
287 }
288
289 void lpc2cep(const EST_FVector &lpc, EST_FVector &cep)
290 {
291 int n, k;
292 float sum;
293 int order = lpc.length() - 1;
294
295 for (n = 1; n <= order && n <= cep.length(); n++)
296 {
297 sum = 0.0;
298 for (k = 1; k < n; k++)
299 sum += k * cep(k-1) * lpc(n - k);
300 cep[n-1] = lpc(n) + sum / n;
301 }
302
303 /* be wary of these interpolated values */
304 for(n = order + 1; n <= cep.length(); n++)
305 {
306 sum = 0.0;
307 for (k = n - (order - 1); k < n; k++)
308 sum += k * cep(k-1) * lpc(n - k);
309 cep[n-1] = sum / n;
310 }
311
312 /* very occasionally the above can go unstable, fudge if this happens */
313
314 for (n = 0; n < cep.length(); n++)
315 {
316 // check if NaN -- happens on some frames of silence
317 if (isnanf(cep[n]) ) cep[n] = 0.0;
318
319 if (cep[n] > MAX_ABS_CEPS){
320 cerr << "WARNING : cepstral coeff " << n << " was " <<
321 cep[n] << endl;
322 cerr << "lpc coeff " << n << " = " << lpc(n + 1) << endl;
323
324 cep[n] = MAX_ABS_CEPS;
325 }
326 if (cep[n] < -MAX_ABS_CEPS) {
327 cerr << "WARNING : cepstral coeff " << n << " was " <<
328 cep[n] << endl;
329 cep[n] = -MAX_ABS_CEPS;
330 }
331 }
332 }
333
334 // REORG - test this!!
335 void lpc2ref(const EST_FVector &lpc, EST_FVector &ref)
336 {
337
338 // seem to get weird output from this - best not to use it !
339 EST_error("lpc2ref Code unfinished\n");
340
341 // LPC to reflection coefficients
342 // from code from Borja Etxebarria
343 // This code does clever things with pointer and so has been
344 // left using float * arrays.
345
346 // simonk (May 99) : fixed because lpc coeffs always have energy at
347 // coeff 0 - the code here would need changing is lpc coeff 0 was
348 // ever made optional.
349 int lpc_offset=1;
350
351 int order = lpc.length() - 1;
352 int i,j;
353 float f,ai;
354 float *vo,*vx;
355 float *vn = new float[order];
356
357 i = order - 1;
358 ref[i] = lpc(i+lpc_offset);
359 ai = lpc(i+lpc_offset);
360 f = 1-ai*ai;
361 i--;
362
363 for (j=0; j<=i; j++)
364 ref[j] = (lpc(j+lpc_offset)+((ai*lpc(i-j+lpc_offset))))/f;
365
366 /* vn=vtmp in previous #define */
367 // Check whether this should really be a pointer
368 vo = new float[order];
369 for (i = 0; i < order; ++i)
370 vo[i] = ref(i);
371
372 for ( ;i>0; )
373 {
374 ai=vo[i];
375 f = 1-ai*ai;
376 i--;
377 for (j=0; j<=i; j++)
378 vn[j] = (vo[j]+((ai*vo[i-j])))/f;
379
380 ref[i]=vn[i];
381
382 vx = vn;
383 vn = vo;
384 vo = vx;
385 }
386
387 delete [] vn;
388 }
389
390 void ref2lpc(const EST_FVector &ref, EST_FVector &lpc)
391 {
392 // Here we use Christopher Longet Higgin's algorithm converted to
393 // an equivalent by awb. It doesn't have the reverse order or
394 // negation requirement.
395 int order = ref.length() - 1;
396 float a, b;
397 int n, k;
398
399 for (n=0; n < order; n++)
400 {
401 lpc[n] = ref(n);
402 for (k=0; 2 * (k+1) <= n + 1; k++)
403 {
404 a = lpc[k];
405 b = lpc[n-(k + 1)];
406 lpc[k] = a-b * lpc[n];
407 lpc[n-(k+1)] = b-a * lpc[n];
408 }
409 }
410 }
411
412
413 /************************************************************
414 ** LPC_TO_LSF -
415 ** pass the LENGTH of the LPC vector - this is the LPC
416 ** order plus 1. Must pre-allocate lsfs to length+1
417 ************************************************************/
418 void lpc2lsf(const EST_FVector &lpc, EST_FVector &lsf)
419 {
420 (void) lpc;
421 (void) lsf;
422 EST_error("LSF Code unfinished\n");
423 }
424
425 void lsf2lpc(const EST_FVector &lpc, EST_FVector &lsf)
426 {
427 (void) lpc;
428 (void) lsf;
429 EST_error("LSF Code unfinished\n");
430 }
431
432 void sig2lpc(const EST_FVector &sig, EST_FVector &acf,
433 EST_FVector &ref, EST_FVector &lpc)
434 {
435
436 int i, j;
437 float e, ci, sum;
438 int order = lpc.length() -1;
439 EST_FVector tmp(order);
440 int stableorder=-1;
441
442 if ((acf.length() != ref.length()) ||
443 (acf.length() != lpc.length()))
444 EST_error("sig2lpc: acf, ref are not of lpc's order");
445
446 //cerr << "sig2lpc order " << order << endl;
447
448
449 for (i = 0; i <= order; i++)
450 {
451 sum = 0.0;
452 for(j = 0; j < sig.length() - i; j++)
453 sum += sig.a_no_check(j) * sig.a_no_check(j + i);
454 acf.a_no_check(i) = sum;
455 }
456
457 // find lpc coefficients
458 e = acf.a_no_check(0);
459 lpc.a_no_check(0) = 1.0;
460
461 for (i = 1; i <= order; i++)
462 {
463 ci = 0.0;
464 for(j = 1; j < i; j++)
465 ci += lpc.a_no_check(j) * acf.a_no_check(i-j);
466 if (e == 0)
467 ref.a_no_check(i) = ci = 0.0;
468 else
469 ref.a_no_check(i) = ci = (acf.a_no_check(i) - ci) / e;
470 //Check stability of the recursion
471 if (absval(ci) < 1.000000)
472 {
473 lpc.a_no_check(i) = ci;
474 for (j = 1; j < i; j++)
475 tmp.a_no_check(j) = lpc.a_no_check(j) -
476 (ci * lpc.a_no_check(i-j));
477 for( j = 1; j < i; j++)
478 lpc.a_no_check(j) = tmp.a_no_check(j);
479
480 e = (1 - ci * ci) * e;
481 stableorder = i;
482 }
483 else break;
484 }
485 if (stableorder != order)
486 {
487 fprintf(stderr,
488 "warning:levinson instability, order restricted to %d\n",
489 stableorder);
490 for (; i <= order; i++)
491 lpc.a_no_check(i) = 0.0;
492 }
493
494 // normalisation for frame length
495 lpc.a_no_check(0) = e / sig.length();
496 }
497
498 void sig2pow(EST_FVector &frame, float &power)
499 {
500 power = 0.0;
501 for (int i = 0; i < frame.length(); i++)
502 power += pow(frame(i), float(2.0));
503
504 power /= frame.length();
505 }
506
507 void sig2rms(EST_FVector &frame, float &rms_energy)
508 {
509 sig2pow(frame, rms_energy);
510 rms_energy = sqrt(rms_energy);
511 }
512
513
514 float lpredict2(EST_FVector &adc, int wsize,
515 EST_FVector &acf, float *ref, float *lpc,
516 int order)
517 {
518 int i, j;
519 float e, ci, sum;
520 EST_TBuffer<float> tmp(order);
521 int stableorder=-1;
522
523 EST_FVector vref(order + 1), vlpc(order + 1);
524
525 for (i = 0; i <= order; i++)
526 {
527 sum = 0.0;
528 for (j = 0; j < wsize - i; j++)
529 sum += adc[j] * adc[j + i];
530 acf[i] = sum;
531 }
532 /* find lpc coefficients */
533 e = acf[0];
534 lpc[0] = 1.0;
535 for(i = 1; i <= order; i++) {
536 ci = 0.0;
537 for(j = 1; j < i; j++)
538 ci += lpc[j] * acf[i-j];
539 ref[i] = ci = (acf[i] - ci) / e;
540 //Check stability of the recursion
541 if (absval(ci) < 1.000000) {
542 lpc[i] = ci;
543 for(j = 1; j < i; j++)
544 tmp[j] = lpc[j] - ci * lpc[i-j];
545 for(j = 1; j < i; j++)
546 lpc[j] = tmp[j];
547 e = (1 - ci * ci) * e;
548 stableorder = i;
549 }
550 else break;
551 }
552 if (stableorder != order) {
553 fprintf(stderr,
554 "warning:levinson instability, order restricted to %d\n",
555 stableorder);
556 for (;i<=order;i++)
557 lpc[i]=0.0;
558 }
559 return(e);
560 }
561
562
563
564 void sig2fbank(const EST_FVector &sig,
565 EST_FVector &fbank_frame,
566 const float sample_rate,
567 const bool use_power_rather_than_energy,
568 const bool take_log)
569 {
570
571 EST_FVector fft_frame;
572 int i,fbank_order;
573 float Hz_per_fft_coeff;
574
575 // upper and lower limits of filter bank
576 // where the upper limit depends on the sampling frequency
577 // TO DO : add low/high pass filtering HERE
578 float mel_low=0;
579 float mel_high=Hz2Mel(sample_rate / 2);
580
581 // FFT this frame. FFT order will be computed by sig2fft
582 // FFT frame returned will be half length of actual FFT performed
583 sig2fft(sig,fft_frame,use_power_rather_than_energy);
584
585 // this is more easily understood as half the sampling
586 // frequency over half the fft order, but fft_frame_length()
587 // is already halved
588 Hz_per_fft_coeff = 0.5 * sample_rate / fft_frame.length();
589
590 fbank_order = fbank_frame.length();
591
592 // store the list of centre frequencies and lower and upper bounds of
593 // the triangular filters
594 EST_FVector mel_fbank_centre_frequencies(fbank_order+2);
595
596 mel_fbank_centre_frequencies[0]=mel_low;
597
598 for(i=1;i<=fbank_order;i++)
599 mel_fbank_centre_frequencies[i] = mel_low +
600 (float)(i) * (mel_high - mel_low) / (fbank_order+1);
601
602 mel_fbank_centre_frequencies[fbank_order+1]=mel_high;
603
604 // bin FFT in Mel filters
605 fft2fbank(fft_frame,
606 fbank_frame,
607 Hz_per_fft_coeff,
608 mel_fbank_centre_frequencies);
609
610 if(take_log)
611 for(i=0;i<fbank_frame.length();i++)
612 fbank_frame[i] = safe_log(fbank_frame[i]);
613
614 }
615
616 void sig2fft(const EST_FVector &sig,
617 EST_FVector &fft_vec,
618 const bool use_power_rather_than_energy)
619 {
620
621 int i,half_fft_order;
622 float real,imag;
623 float window_size = sig.length();
624 int fft_order = fft_vec.length();
625
626 // work out FFT order required
627 fft_order = 2;
628 while (window_size > fft_order)
629 fft_order *= 2;
630
631 fft_vec = sig;
632
633 // pad with zeros
634 fft_vec.resize(fft_order);
635
636 // in place FFT
637 (void)fastFFT(fft_vec);
638
639 // of course, we only need the lower half of the fft
640 half_fft_order = fft_order/2;
641
642 for(i=0;i<half_fft_order;i++)
643 {
644 real = fft_vec(i*2);
645 imag = fft_vec(i*2 + 1);
646
647 fft_vec[i] = real * real + imag * imag;
648
649 if(!use_power_rather_than_energy)
650 fft_vec[i] = sqrt(fft_vec(i));
651
652 }
653
654 // discard mirror image, retaining energy/power spectrum
655 fft_vec.resize(half_fft_order);
656
657 }
658
659
660
661 void fft2fbank(const EST_FVector &fft_frame,
662 EST_FVector &fbank_vec,
663 const float Hz_per_fft_coeff,
664 const EST_FVector &mel_fbank_frequencies)
665 {
666
667 // expects "half length" FFT - i.e. energy or power spectrum
668 // energy is magnitude; power is squared magnitude
669
670 // mel_fbank_frequencies is a vector of centre frequencies
671 // BUT : first element is lower bound of first filter
672 // last element is upper bound of final filter
673 // i.e. length = num filters + 2
674
675 int i,k;
676 float this_mel_centre,this_mel_low,this_mel_high;
677 EST_FVector filter;
678 int fft_index_start;
679
680 // check that fbank_vec and mel_fbank_frequencies lengths match
681 if(mel_fbank_frequencies.length() != fbank_vec.length() + 2)
682 {
683 EST_error("Filter centre frequencies length (%i) is not equal to fbank order (%i) plus 2\n",mel_fbank_frequencies.length(),
684 fbank_vec.length());
685 return;
686 }
687
688 // filters are computed on the fly
689 for(i=0;i<fbank_vec.length();i++)
690 {
691
692 // work out shape of the i'th filter
693 this_mel_low=mel_fbank_frequencies(i);
694 this_mel_centre=mel_fbank_frequencies(i+1);
695 this_mel_high=mel_fbank_frequencies(i+2);
696
697 make_mel_triangular_filter(this_mel_centre,this_mel_low,this_mel_high,
698 Hz_per_fft_coeff,fft_frame.length(),
699 fft_index_start,filter);
700
701 // do filtering
702 fbank_vec[i]=0.0;
703 for(k=0;k<filter.length();k++)
704 fbank_vec[i] += fft_frame(fft_index_start + k) * filter(k);
705 }
706
707 }
708
709
710 void fbank2melcep(const EST_FVector &fbank_vec,
711 EST_FVector &mfcc_vec,
712 const float liftering_parameter,
713 const bool include_c0)
714 {
715 // a cosine transform of the fbank output
716 // remember to pass LOG fbank params (energy or power)
717
718 int i,j,actual_mfcc_index;
719 float pi_i_over_N,cos_xform_order,const_factor;
720 float PI_over_liftering_parameter;
721
722 if(liftering_parameter != 0.0)
723 PI_over_liftering_parameter = PI / liftering_parameter;
724 else
725 PI_over_liftering_parameter = PI; // since sin(n.PI) == 0
726
727 // if we are not including cepstral coeff 0 (c0) then we need
728 // to do a cosine transform 1 longer than otherwise
729 cos_xform_order = include_c0 ? mfcc_vec.length() : mfcc_vec.length() + 1;
730
731 const_factor = sqrt(2 / (float)(fbank_vec.length()));
732
733 for(i=0;i<mfcc_vec.length();i++)
734 {
735 actual_mfcc_index = include_c0 ? i : i+1;
736
737 pi_i_over_N =
738 PI * (float)(actual_mfcc_index) / (float)(fbank_vec.length());
739
740 for(j=0;j<fbank_vec.length();j++)
741 // j + 0.5 is because we want (j+1) - 0.5
742 mfcc_vec[i] += fbank_vec(j) * cos(pi_i_over_N * ((float)j + 0.5));
743
744 mfcc_vec[i] *= const_factor;
745
746 // liftering
747 mfcc_vec[i] *= 1 + (0.5 * liftering_parameter
748 * sin(PI_over_liftering_parameter * (float)(actual_mfcc_index)));
749 }
750
751 }
752
753
754 void make_mel_triangular_filter(const float this_mel_centre,
755 const float this_mel_low,
756 const float this_mel_high,
757 const float Hz_per_fft_coeff,
758 const int half_fft_order,
759 int &fft_index_start,
760 EST_FVector &filter)
761 {
762
763 // makes a triangular (on a Mel scale) filter and creates
764 // a weight vector to apply to FFT coefficients
765
766 int i,filter_vector_length,fft_index_stop;
767 float rise_slope,fall_slope,this_mel;
768
769
770 // slopes are in units per Mel
771 // this is important - slope is linear in MEl domain, not Hz
772 rise_slope = 1/(this_mel_centre - this_mel_low);
773 fall_slope = 1/(this_mel_centre - this_mel_high);
774
775
776 // care with rounding - we want FFT indices **guaranteed**
777 // to be within filter so we get no negative filter gains
778 // (irint gives the _nearest_ integer)
779
780 // round up
781 if(this_mel_low == 0)
782 fft_index_start=0;
783 else
784 fft_index_start = irint(0.5 + (Mel2Hz(this_mel_low) / Hz_per_fft_coeff));
785
786 // round down
787 fft_index_stop = irint((Mel2Hz(this_mel_high) / Hz_per_fft_coeff) - 0.5);
788
789 if(fft_index_stop > half_fft_order-1)
790 fft_index_stop = half_fft_order-1;
791
792
793 filter_vector_length = fft_index_stop - fft_index_start + 1;
794 filter.resize(filter_vector_length);
795
796 for(i=0;i<filter_vector_length;i++)
797 {
798 this_mel = Hz2Mel( (i + fft_index_start) * Hz_per_fft_coeff );
799
800 if(this_mel <= this_mel_centre)
801 {
802 filter[i] = rise_slope * (this_mel - this_mel_low);
803 }
804 else
805 {
806 filter[i] = 1 + fall_slope * (this_mel - this_mel_centre);
807 }
808
809 }
810
811 }
812
813
814 float Hz2Mel(float frequency_in_Hertz)
815 {
816 return 1127 * log(1 + frequency_in_Hertz/700.0);
817 }
818
819 float Mel2Hz(float frequency_in_Mel)
820 {
821 return (exp(frequency_in_Mel / 1127) - 1) * 700;
822 }