"Fossies" - the Fresh Open Source Software Archive 
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Paul Taylor */
34 /* Date : April 1995 */
35 /*-----------------------------------------------------------------------*/
36 /* Change EST_Wave utility main */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <iostream>
41 #include <cmath>
42 #include "EST_Wave.h"
43 #include "EST_cmd_line.h"
44 #include "EST_cmd_line_options.h"
45 #include "EST_sigpr.h"
46 #include "EST_wave_aux.h"
47 #include "EST.h"
48
49 #define sgn(x) (x>0?1:x?-1:0)
50
51 void wave_extract_channel(EST_Wave &single, const EST_Wave &multi, EST_IList &ch_list);
52
53
54 void extract_channels(EST_Wave &single, const EST_Wave &multi, EST_IList &ch_list);
55
56 /** @name <command>ch_wave</command> <emphasis>Audio file manipulation</emphasis>
57 @id ch_wave_manual
58 * @toc
59 */
60
61 //@{
62
63
64 /**@name Synopsis
65 */
66 //@{
67
68 //@synopsis
69
70 /**
71 ch_wave is used to manipulate the format of a waveform
72 file. Operations include:
73
74 <itemizedlist>
75 <listitem><para>file format conversion</para></listitem>
76 <listitem><para>resampling (changing the sampling frequency)</para></listitem>
77 <listitem><para>byte-swapping</para></listitem>
78 <listitem><para>making multiple input files into a single multi-channel output file</para></listitem>
79 <listitem><para>making multiple input files into a single single-channel output file</para></listitem>
80 <listitem><para>extracting a single channel from a multi-channel waveform</para></listitem>
81 <listitem><para>scaling the amplitude of the waveform</para></listitem>
82 <listitem><para>low pass and high pass filtering</para></listitem>
83 <listitem><para>extracting a time-delimited portion of the waveform</para></listitem>
84 </itemizedlist>
85
86 ch_wave is a executable program that serves as a wrap-around for the
87 EST_Wave class and the basic wave manipulation functions. More
88 advanced waveform processing is performed by the signal processing library.
89
90 */
91
92 //@}
93
94 /**@name OPTIONS
95 */
96 //@{
97
98 //@options
99
100 //@}
101
102
103 int main (int argc, char *argv[])
104 {
105 EST_Wave sig, sigload;
106 EST_String in_file("-"), out_file("-"), op_file(""), test;
107 EST_Option al;
108 EST_StrList files;
109 EST_Litem *p;
110
111
112 parse_command_line
113 (argc, argv,
114 EST_String("[input file0] [input file1] ... -o [output file]\n")+
115 "Summary: change/copy/combine waveform files\n"+
116 "use \"-\" to make input and output files stdin/out\n"+
117 "-h Options help\n\n"+
118 options_wave_input()+
119 options_wave_output()+
120 "-scale <float> Scaling factor. Increase or descrease the amplitude\n"
121 " of the whole waveform by the factor given\n\n"
122
123 "-scaleN <float> Scaling factor with normalization. \n"
124 " The waveform is scaled to its maximum level, after which \n"
125 " it is scaled by the factor given\n\n"
126
127 "-lpfilter <int> Low pass filter, with cutoff frequency in Hz \n"
128 " Filtering is performed by a FIR filter which is built at run \n"
129 " time. The order of the filter can be given by -forder. The \n"
130 " default value is 199\n\n"
131
132 "-hpfilter <int> High pass filter, with cutoff frequency in Hz \n"
133 " Filtering is performed by a FIR filter which is \n"
134 " built at run time. The order of the filter can \n"
135 " be given by -forder. The default value is 199.\n\n"
136
137 "-forder <int> Order of FIR filter used for lpfilter and \n"
138 " hpfilter. This must be ODD. Sensible values range \n"+
139 " from 19 (quick but with a shallow rolloff) to 199 \n"
140 " (slow but with a steep rolloff). The default is 199.\n\n"
141
142 "-fafter Do filtering after other operations such as \n"
143 " resampling (default : filter before other operations)\n\n"
144
145 "-info Print information about file and header. \n"
146 " This option gives useful information such as file \n"
147 " length, sampling rate, number of channels etc\n"
148 " No output is produced\n\n"
149
150 "-add A new single channel waveform is created by adding \n"
151 " the corresponding sample points of each input waveform\n\n"
152
153 "-pc <string> Combine input waveforms to form a single \n"
154 " multichannel waveform. The argument to this option controls \n"
155 " how long the new waveform should be. If the option \n"
156 " is LONGEST, the output wave if the length of the \n"
157 " longest input wave and shorter waves are padded with \n"
158 " zeros at the end. If the option is FIRST, the length \n"
159 " of the new waveform is the length of the first file \n"
160 " on the command line, and subsequent waves are padded \n"
161 " or cut to this length\n\n"
162
163 "-key <ifile> Label file designating subsections, for use with \n"
164 " -divide. The KEYLAB file is a label file which specifies \n"
165 " where chunks (such as individual sentences) in \n"
166 " a waveform begin and end. See section of wave extraction.\n\n"
167
168 "-divide Divide a single input waveform into multiple output \n"
169 " waveforms. Each output waveform is extracted from the \n"
170 " input waveform by using the KEYLAB file, which \n"
171 " specifies the start and stop times for each chunk. \n"
172 " The output files are named according to the filename \n"
173 " in the KEYLAB file, with extension given by -ext. See \n"
174 " section on wave extraction\n\n"
175
176 "-ext <string> File extension for divided waveforms\n\n"
177
178 "-compress <float> Apply Dynamic Range Compression by factor specified \n"
179
180 "-extract <string> Used in conjunction with -key to extract a \n"
181 " single section of waveform from the input \n"
182 " waveform. The argument is the name of a file given \n"
183 " in the file column of the KEYLAB file.\n",
184 files, al);
185
186 out_file = al.present("-o") ? al.val("-o") : (EST_String)"-";
187
188 // There will always be at least one (or stdin)
189 // The first is dealt specially in case its *way* big
190 if (read_wave(sig, files.first(), al) != format_ok)
191 exit(-1);
192 if (al.present("-info"))
193 wave_info(sig);
194 // concat or parallelize remaining input files
195
196 if (files.length() > 1)
197 {
198 for (p= files.head()->next(); p != 0; p=p->next())
199 {
200 if (read_wave(sigload, files(p), al) != format_ok)
201 exit(-1);
202 if (al.present("-info"))
203 wave_info(sigload);
204 else if (al.present("-pc"))
205 {
206 if ((al.val("-pc") == "longest") &&
207 (sig.num_samples() < sigload.num_samples()))
208 sig.resize(sigload.num_samples());
209 else /* "first" or sig is longer */
210 sigload.resize(sig.num_samples());
211 sig |= sigload;
212 }
213 else if (al.present("-add"))
214 add_waves(sig, sigload);
215 else
216 sig += sigload;
217 }
218 }
219
220 if (al.present("-info"))
221 exit(0); // done what I've been asked to so stop
222
223 // All input files are now in a single wave called sig
224
225 // default is to filter before any resampling etc.
226 // (this may cause problems for multiplexed data !)
227 if(!al.present("-fafter")){
228 if(al.present("-lpfilter"))
229 FIRlowpass_filter(sig,al.ival("-lpfilter"),al.ival("-forder"));
230 if(al.present("-hpfilter"))
231 FIRhighpass_filter(sig,al.ival("-hpfilter"),al.ival("-forder"));
232 }
233
234 if (al.present("-c")) // extract a channel from a multi-channel wave
235 {
236 EST_StrList s;
237 EST_IList il;
238 EST_Wave nsig;
239 StringtoStrList(al.val("-c"), s, " ,"); // separator can be space or comma
240 StrListtoIList(s, il);
241 extract_channels(nsig, sig, il);
242 sig = nsig;
243 }
244
245 if (al.present("-F")) // resample
246 sig.resample(al.ival("-F"));
247
248 if (al.present("-compress")) // Dynamic Range Compression
249 {
250 float mu = al.fval("-compress" , 0);
251 float lim = 30000.0;
252
253 sig.compress(mu, lim);
254 }
255
256 if (al.present("-scale")) // rescale
257 {
258 float scale = al.fval("-scale", 0);
259 sig.rescale(scale);
260 }
261 if (al.present("-scaleN")) // rescale
262 {
263 float scale = al.fval("-scaleN", 0);
264 if ((scale < 0) || (scale > 1.0))
265 {
266 cerr << "ch_wave: -scaleN must be in range 0 to 1" << endl;
267 exit(-1);
268 }
269 sig.rescale(scale,1);
270 }
271
272 EST_Relation key;
273
274 if (al.present("-divide"))
275 {
276 EST_WaveList wl;
277 if (!al.present("-key"))
278 {
279 cerr << "Must have key file specified when dividing waveform\n";
280 exit (-1);
281 }
282 if (key.load(al.val("-key")) != format_ok)
283 exit(-1);
284
285 if (wave_divide(wl, sig, key, al.val("-ext", 0)) == -1)
286 exit(0);
287 for (p = wl.head(); p; p = p->next())
288 wl(p).save(wl(p).name(), al.val("-otype", 0));
289 exit(0);
290 }
291 else if (al.present("-extract"))
292 {
293 EST_Wave e;
294 if (!al.present("-key"))
295 {
296 cerr << "Must have key file specified when dividing waveform\n";
297 exit (-1);
298 }
299 if (key.load(al.val("-key")) != format_ok)
300 exit(-1);
301
302 if (wave_extract(e, sig, key, al.val("-extract")) == -1)
303 exit (-1);
304 sig = e;
305 }
306
307 // if we are filtering after other operations
308 if(al.present("-fafter")){
309 if(al.present("-lpfilter"))
310 FIRlowpass_filter(sig,al.ival("-lpfilter"),al.ival("-forder"));
311 if(al.present("-hpfilter"))
312 FIRhighpass_filter(sig,al.ival("-hpfilter"),al.ival("-forder"));
313 }
314
315 write_wave(sig, out_file, al);
316 return 0;
317 }
318
319 /** @name Making multiple waves into a single wave
320
321 If multiple input files are specified, by default they are concatenated into
322 the output file.
323 </para>
324 <para>
325 <screen>
326 $ ch_wave kdt_010.wav kdt_011.wav kdt_012.wav kdt_013.wav -o out.wav
327 </screen>
328 </para>
329 <para>
330 In the above example, 4 single channel input files are converted to
331 one single channel output file. Multi-channel waveforms can also be
332 concatenated provided they all have the same number of input channels.
333
334 </para><para>
335
336 Multiple input files can be made into a multi-channel output file by
337 using the -pc option:
338
339 </para><para>
340 <screen>
341 $ ch_wave kdt_010.wav kdt_011.wav kdt_012.wav kdt_013.wav -o -pc LONGEST out.wav
342 </screen>
343 </para>
344 <para>
345 The argument to -pc can either be LONGEST, in which the output
346 waveform is the length of the longest input file, or FIRST in which it
347 is the length of the first input file.
348
349 */
350
351 //@{
352 //@}
353
354 /** @name Extracting channels from multi-channel waves
355
356 The -c option is used to specify channels which should be extracted
357 from the input. If the input is a 4 channel wave,
358 </para><para>
359 <screen>
360 $ ch_wave kdt_m.wav -o a.wav -c "0 2"
361 </screen>
362 </para>
363 <para>
364 will extract the 0th and 2nd channel (counting starts from 0). The
365 argument to -c can be either a single number of a list of numbers
366 (wrapped in quotes)
367
368 */
369 //@{
370 //@}
371
372
373 /** @name Extracting of a single region from a waveform
374
375 There are several ways of extracting a region of a waveform. The
376 simplest way is by using the start, end, to and from commands to
377 delimit a sub portion of the input wave. For example
378 </para><para>
379 <screen>
380 $ ch_wave kdt_010.wav -o small.wav -start 1.45 -end 1.768
381 </screen>
382 </para>
383 <para>
384 extracts a subwave starting at 1.45 seconds and extending to 1.768 seconds.
385
386 alternatively,
387 </para><para>
388 <screen>
389 $ ch_wave kd_010.wav -o small.wav -from 5000 -to 10000
390 </screen>
391 </para>
392 <para>
393 extracts a subwave starting at 5000 samples and extending to 10000
394 samples. Times and samples can be mixed in sub-wave extraction. The
395 output waveform will have the same number of channels as the input
396 waveform.
397
398 */
399 //@{
400 //@}
401
402 /** @name Extracting of a multiple regions from a waveform
403
404 Multiple regions can be extracted from a waveform, but as it would be
405 too complicated to specify the start and end points on the command
406 line, a label file with start and end points, and file names is used.
407
408 The file is called a key label file and in xwaves label format looks
409 like:
410 </para>
411 <para>
412 <screen>
413 separator ;
414 #
415 0.308272 121 sil ; file kdt_010.01 ;
416 0.440021 121 are ; file kdt_010.02 ;
417 0.512930 121 your ; file kdt_010.03 ;
418 0.784097 121 grades ; file kdt_010.04 ;
419 1.140969 121 higher ; file kdt_010.05 ;
420 1.258647 121 or ; file kdt_010.06 ;
421 1.577145 121 lower ; file kdt_010.07 ;
422 1.725516 121 than ; file kdt_010.08 ;
423 2.315186 121 nancy's ; file kdt_010.09 ;
424 </screen>
425 </para>
426 <para>
427 Each line represents one region. The first column is the end time of
428 that region and the start time of the next. The next two columns are
429 colour and an arbitrary name, and the filename in which the output
430 waveform is to be stored is kept as a field called file in the last column.
431 In this example, each region corresponds to a single word in the file.
432
433 If the above file is called "kdt_010.words.keylab", the command:
434 </para>
435 <para>
436 <screen>
437 $ ch_wave kdt_010.wav -key kdt_010.words -ext .wav -divide
438 </screen>
439 </para>
440 <para>
441 will divide the input waveform into 9 output waveforms called
442 kdt_010.01.wav, kdt_010.02.wav ... kdt_010.09.wav. The -ext option
443 specifies the extension of the new waveforms, and the -divide command
444 specifies that division of the entire waveform is to take place.
445
446 If only a single file is required the -extract option can be used, in
447 which case its argument is the filename required.
448 </para>
449 <para>
450 <screen>
451 $ ch_wave kdt_010.wav -key kdt_010.words -ext .wav -extract kdt_010.03 \
452 -o kdt_010.03.wav
453 </screen>
454 </para>
455 <para>
456 Note that an output filename should be specified with this option.
457 */
458 //@{
459 //@}
460
461 /** @name Adding headers and format conversion
462
463 It is usually a good idea for all waveform files to have headers as
464 this way different byte orders, sampling rates etc can be handled
465 safely. ch_wave provides a means of adding headers to raw files.
466
467 The following adds a header to a file of 16 bit shorts
468 </para>
469 <para>
470 <screen>
471 $ ch_wave kdt_010.raw1 -o kdt_010.h1.wav -otype nist -f 16000 -itype raw
472 </screen>
473 </para>
474 <para>
475 The following downsamples the input to 8 KHz
476 </para>
477 <para>
478 <screen>
479 $ ch_wave kdt_010.raw1 -o kdt_010.h2.wav -otype nist -f 16000 \
480 -F 8000 -itype raw
481 </screen>
482 </para>
483 <para>
484 The following takes a 8K ulaw input file and produces a 16bit, 20Khz output file:
485 </para>
486 <para>
487 <screen>
488 $ ch_wave kdt_010.raw2 -o kdt_010.h3.wav -otype nist -istype ulaw \
489 -f 8000 -F 20000 -itype raw
490 </screen>
491 */
492 //@{
493 //@}
494
495 //@}