]> git.mxchange.org Git - flightgear.git/blob - 3rdparty/hts_engine_API/lib/HTS_engine.c
549cddaefeae189ec7ba374f990c3d90009ac9db
[flightgear.git] / 3rdparty / hts_engine_API / lib / HTS_engine.c
1 /* ----------------------------------------------------------------- */
2 /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
3 /*           developed by HTS Working Group                          */
4 /*           http://hts-engine.sourceforge.net/                      */
5 /* ----------------------------------------------------------------- */
6 /*                                                                   */
7 /*  Copyright (c) 2001-2013  Nagoya Institute of Technology          */
8 /*                           Department of Computer Science          */
9 /*                                                                   */
10 /*                2001-2008  Tokyo Institute of Technology           */
11 /*                           Interdisciplinary Graduate School of    */
12 /*                           Science and Engineering                 */
13 /*                                                                   */
14 /* All rights reserved.                                              */
15 /*                                                                   */
16 /* Redistribution and use in source and binary forms, with or        */
17 /* without modification, are permitted provided that the following   */
18 /* conditions are met:                                               */
19 /*                                                                   */
20 /* - Redistributions of source code must retain the above copyright  */
21 /*   notice, this list of conditions and the following disclaimer.   */
22 /* - Redistributions in binary form must reproduce the above         */
23 /*   copyright notice, this list of conditions and the following     */
24 /*   disclaimer in the documentation and/or other materials provided */
25 /*   with the distribution.                                          */
26 /* - Neither the name of the HTS working group nor the names of its  */
27 /*   contributors may be used to endorse or promote products derived */
28 /*   from this software without specific prior written permission.   */
29 /*                                                                   */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
42 /* POSSIBILITY OF SUCH DAMAGE.                                       */
43 /* ----------------------------------------------------------------- */
44
45 #ifndef HTS_ENGINE_C
46 #define HTS_ENGINE_C
47
48 #ifdef __cplusplus
49 #define HTS_ENGINE_C_START extern "C" {
50 #define HTS_ENGINE_C_END   }
51 #else
52 #define HTS_ENGINE_C_START
53 #define HTS_ENGINE_C_END
54 #endif                          /* __CPLUSPLUS */
55
56 HTS_ENGINE_C_START;
57
58 #include <stdlib.h>             /* for atof() */
59 #include <string.h>             /* for strcpy() */
60 #include <math.h>               /* for pow() */
61
62 /* hts_engine libraries */
63 #include "HTS_hidden.h"
64
65 /* HTS_Engine_initialize: initialize engine */
66 void HTS_Engine_initialize(HTS_Engine * engine)
67 {
68    /* global */
69    engine->condition.sampling_frequency = 0;
70    engine->condition.fperiod = 0;
71    engine->condition.audio_buff_size = 0;
72    engine->condition.stop = FALSE;
73    engine->condition.volume = 1.0;
74    engine->condition.msd_threshold = NULL;
75    engine->condition.gv_weight = NULL;
76
77    /* duration */
78    engine->condition.speed = 1.0;
79    engine->condition.phoneme_alignment_flag = FALSE;
80
81    /* spectrum */
82    engine->condition.stage = 0;
83    engine->condition.use_log_gain = FALSE;
84    engine->condition.alpha = 0.0;
85    engine->condition.beta = 0.0;
86
87    /* log F0 */
88    engine->condition.additional_half_tone = 0.0;
89
90    /* interpolation weights */
91    engine->condition.duration_iw = NULL;
92    engine->condition.parameter_iw = NULL;
93    engine->condition.gv_iw = NULL;
94
95    /* initialize audio */
96    HTS_Audio_initialize(&engine->audio);
97    /* initialize model set */
98    HTS_ModelSet_initialize(&engine->ms);
99    /* initialize label list */
100    HTS_Label_initialize(&engine->label);
101    /* initialize state sequence set */
102    HTS_SStreamSet_initialize(&engine->sss);
103    /* initialize pstream set */
104    HTS_PStreamSet_initialize(&engine->pss);
105    /* initialize gstream set */
106    HTS_GStreamSet_initialize(&engine->gss);
107 }
108
109 /* HTS_Engine_load: load HTS voices */
110 HTS_Boolean HTS_Engine_load(HTS_Engine * engine, char **voices, size_t num_voices)
111 {
112    size_t i, j;
113    size_t nstream;
114    double average_weight;
115    const char *option, *find;
116
117    /* reset engine */
118    HTS_Engine_clear(engine);
119
120    /* load voices */
121    if (HTS_ModelSet_load(&engine->ms, voices, num_voices) != TRUE) {
122       HTS_Engine_clear(engine);
123       return FALSE;
124    }
125    nstream = HTS_ModelSet_get_nstream(&engine->ms);
126    average_weight = 1.0 / num_voices;
127
128    /* global */
129    engine->condition.sampling_frequency = HTS_ModelSet_get_sampling_frequency(&engine->ms);
130    engine->condition.fperiod = HTS_ModelSet_get_fperiod(&engine->ms);
131    engine->condition.msd_threshold = (double *) HTS_calloc(nstream, sizeof(double));
132    for (i = 0; i < nstream; i++)
133       engine->condition.msd_threshold[i] = 0.5;
134    engine->condition.gv_weight = (double *) HTS_calloc(nstream, sizeof(double));
135    for (i = 0; i < nstream; i++)
136       engine->condition.gv_weight[i] = 1.0;
137
138    /* spectrum */
139    option = HTS_ModelSet_get_option(&engine->ms, 0);
140    find = strstr(option, "GAMMA=");
141    if (find != NULL)
142       engine->condition.stage = (size_t) atoi(&find[strlen("GAMMA=")]);
143    find = strstr(option, "LN_GAIN=");
144    if (find != NULL)
145       engine->condition.use_log_gain = atoi(&find[strlen("LN_GAIN=")]) == 1 ? TRUE : FALSE;
146    find = strstr(option, "ALPHA=");
147    if (find != NULL)
148       engine->condition.alpha = atof(&find[strlen("ALPHA=")]);
149
150    /* interpolation weights */
151    engine->condition.duration_iw = (double *) HTS_calloc(num_voices, sizeof(double));
152    for (i = 0; i < num_voices; i++)
153       engine->condition.duration_iw[i] = average_weight;
154    engine->condition.parameter_iw = (double **) HTS_calloc(nstream, sizeof(double *));
155    for (i = 0; i < nstream; i++) {
156       engine->condition.parameter_iw[i] = (double *) HTS_calloc(num_voices, sizeof(double));
157       for (j = 0; j < num_voices; j++)
158          engine->condition.parameter_iw[i][j] = average_weight;
159    }
160    engine->condition.gv_iw = (double **) HTS_calloc(nstream, sizeof(double *));
161    for (i = 0; i < nstream; i++) {
162       engine->condition.gv_iw[i] = (double *) HTS_calloc(num_voices, sizeof(double));
163       for (j = 0; j < num_voices; j++)
164          engine->condition.gv_iw[i][j] = average_weight;
165    }
166
167    return TRUE;
168 }
169
170 /* HTS_Engine_set_sampling_frequency: set sampling frequency */
171 void HTS_Engine_set_sampling_frequency(HTS_Engine * engine, size_t i)
172 {
173    if (i < 1)
174       i = 1;
175    engine->condition.sampling_frequency = i;
176    HTS_Audio_set_parameter(&engine->audio, engine->condition.sampling_frequency, engine->condition.audio_buff_size);
177 }
178
179 /* HTS_Engine_get_sampling_frequency: get sampling frequency */
180 size_t HTS_Engine_get_sampling_frequency(HTS_Engine * engine)
181 {
182    return engine->condition.sampling_frequency;
183 }
184
185 /* HTS_Engine_set_fperiod: set frame period */
186 void HTS_Engine_set_fperiod(HTS_Engine * engine, size_t i)
187 {
188    if (i < 1)
189       i = 1;
190    engine->condition.fperiod = i;
191 }
192
193 /* HTS_Engine_get_fperiod: get frame period */
194 size_t HTS_Engine_get_fperiod(HTS_Engine * engine)
195 {
196    return engine->condition.fperiod;
197 }
198
199 /* HTS_Engine_set_audio_buff_size: set audio buffer size */
200 void HTS_Engine_set_audio_buff_size(HTS_Engine * engine, size_t i)
201 {
202    engine->condition.audio_buff_size = i;
203    HTS_Audio_set_parameter(&engine->audio, engine->condition.sampling_frequency, engine->condition.audio_buff_size);
204 }
205
206 /* HTS_Engine_get_audio_buff_size: get audio buffer size */
207 size_t HTS_Engine_get_audio_buff_size(HTS_Engine * engine)
208 {
209    return engine->condition.audio_buff_size;
210 }
211
212 /* HTS_Engine_set_stop_flag: set stop flag */
213 void HTS_Engine_set_stop_flag(HTS_Engine * engine, HTS_Boolean b)
214 {
215    engine->condition.stop = b;
216 }
217
218 /* HTS_Engine_get_stop_flag: get stop flag */
219 HTS_Boolean HTS_Engine_get_stop_flag(HTS_Engine * engine)
220 {
221    return engine->condition.stop;
222 }
223
224 /* HTS_Engine_set_volume: set volume in db */
225 void HTS_Engine_set_volume(HTS_Engine * engine, double f)
226 {
227    engine->condition.volume = exp(f * DB);
228 }
229
230 /* HTS_Engine_get_volume: get volume in db */
231 double HTS_Engine_get_volume(HTS_Engine * engine)
232 {
233    return log(engine->condition.volume) / DB;
234 }
235
236 /* HTS_Egnine_set_msd_threshold: set MSD threshold */
237 void HTS_Engine_set_msd_threshold(HTS_Engine * engine, size_t stream_index, double f)
238 {
239    if (f < 0.0)
240       f = 0.0;
241    if (f > 1.0)
242       f = 1.0;
243    engine->condition.msd_threshold[stream_index] = f;
244 }
245
246 /* HTS_Engine_get_msd_threshold: get MSD threshold */
247 double HTS_Engine_get_msd_threshold(HTS_Engine * engine, size_t stream_index)
248 {
249    return engine->condition.msd_threshold[stream_index];
250 }
251
252 /* HTS_Engine_set_gv_weight: set GV weight */
253 void HTS_Engine_set_gv_weight(HTS_Engine * engine, size_t stream_index, double f)
254 {
255    if (f < 0.0)
256       f = 0.0;
257    engine->condition.gv_weight[stream_index] = f;
258 }
259
260 /* HTS_Engine_get_gv_weight: get GV weight */
261 double HTS_Engine_get_gv_weight(HTS_Engine * engine, size_t stream_index)
262 {
263    return engine->condition.gv_weight[stream_index];
264 }
265
266 /* HTS_Engine_set_speed: set speech speed */
267 void HTS_Engine_set_speed(HTS_Engine * engine, double f)
268 {
269    if (f < 1.0E-06)
270       f = 1.0E-06;
271    engine->condition.speed = f;
272 }
273
274 /* HTS_Engine_set_phoneme_alignment_flag: set flag for using phoneme alignment in label */
275 void HTS_Engine_set_phoneme_alignment_flag(HTS_Engine * engine, HTS_Boolean b)
276 {
277    engine->condition.phoneme_alignment_flag = b;
278 }
279
280 /* HTS_Engine_set_alpha: set alpha */
281 void HTS_Engine_set_alpha(HTS_Engine * engine, double f)
282 {
283    if (f < 0.0)
284       f = 0.0;
285    if (f > 1.0)
286       f = 1.0;
287    engine->condition.alpha = f;
288 }
289
290 /* HTS_Engine_get_alpha: get alpha */
291 double HTS_Engine_get_alpha(HTS_Engine * engine)
292 {
293    return engine->condition.alpha;
294 }
295
296 /* HTS_Engine_set_beta: set beta */
297 void HTS_Engine_set_beta(HTS_Engine * engine, double f)
298 {
299    if (f < 0.0)
300       f = 0.0;
301    if (f > 1.0)
302       f = 1.0;
303    engine->condition.beta = f;
304 }
305
306 /* HTS_Engine_get_beta: get beta */
307 double HTS_Engine_get_beta(HTS_Engine * engine)
308 {
309    return engine->condition.beta;
310 }
311
312 /* HTS_Engine_add_half_tone: add half tone */
313 void HTS_Engine_add_half_tone(HTS_Engine * engine, double f)
314 {
315    engine->condition.additional_half_tone = f;
316 }
317
318 /* HTS_Engine_set_duration_interpolation_weight: set interpolation weight for duration */
319 void HTS_Engine_set_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index, double f)
320 {
321    engine->condition.duration_iw[voice_index] = f;
322 }
323
324 /* HTS_Engine_get_duration_interpolation_weight: get interpolation weight for duration */
325 double HTS_Engine_get_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index)
326 {
327    return engine->condition.duration_iw[voice_index];
328 }
329
330 /* HTS_Engine_set_parameter_interpolation_weight: set interpolation weight for parameter */
331 void HTS_Engine_set_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f)
332 {
333    engine->condition.parameter_iw[voice_index][stream_index] = f;
334 }
335
336 /* HTS_Engine_get_parameter_interpolation_weight: get interpolation weight for parameter */
337 double HTS_Engine_get_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index)
338 {
339    return engine->condition.parameter_iw[voice_index][stream_index];
340 }
341
342 /* HTS_Engine_set_gv_interpolation_weight: set interpolation weight for GV */
343 void HTS_Engine_set_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f)
344 {
345    engine->condition.gv_iw[voice_index][stream_index] = f;
346 }
347
348 /* HTS_Engine_get_gv_interpolation_weight: get interpolation weight for GV */
349 double HTS_Engine_get_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index)
350 {
351    return engine->condition.gv_iw[voice_index][stream_index];
352 }
353
354 /* HTS_Engine_get_total_state: get total number of state */
355 size_t HTS_Engine_get_total_state(HTS_Engine * engine)
356 {
357    return HTS_SStreamSet_get_total_state(&engine->sss);
358 }
359
360 /* HTS_Engine_set_state_mean: set mean value of state */
361 void HTS_Engine_set_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index, double f)
362 {
363    HTS_SStreamSet_set_mean(&engine->sss, stream_index, state_index, vector_index, f);
364 }
365
366 /* HTS_Engine_get_state_mean: get mean value of state */
367 double HTS_Engine_get_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index)
368 {
369    return HTS_SStreamSet_get_mean(&engine->sss, stream_index, state_index, vector_index);
370 }
371
372 /* HTS_Engine_get_state_duration: get state duration */
373 size_t HTS_Engine_get_state_duration(HTS_Engine * engine, size_t state_index)
374 {
375    return HTS_SStreamSet_get_duration(&engine->sss, state_index);
376 }
377
378 /* HTS_Engine_get_nvoices: get number of voices */
379 size_t HTS_Engine_get_nvoices(HTS_Engine * engine)
380 {
381    return HTS_ModelSet_get_nvoices(&engine->ms);
382 }
383
384 /* HTS_Engine_get_nstream: get number of stream */
385 size_t HTS_Engine_get_nstream(HTS_Engine * engine)
386 {
387    return HTS_ModelSet_get_nstream(&engine->ms);
388 }
389
390 /* HTS_Engine_get_nstate: get number of state */
391 size_t HTS_Engine_get_nstate(HTS_Engine * engine)
392 {
393    return HTS_ModelSet_get_nstate(&engine->ms);
394 }
395
396 /* HTS_Engine_get_total_frame: get total number of frame */
397 size_t HTS_Engine_get_total_frame(HTS_Engine * engine)
398 {
399    return HTS_GStreamSet_get_total_frame(&engine->gss);
400 }
401
402 /* HTS_Engine_get_nsamples: get number of samples */
403 size_t HTS_Engine_get_nsamples(HTS_Engine * engine)
404 {
405    return HTS_GStreamSet_get_total_nsamples(&engine->gss);
406 }
407
408 /* HTS_Engine_get_generated_parameter: output generated parameter */
409 double HTS_Engine_get_generated_parameter(HTS_Engine * engine, size_t stream_index, size_t frame_index, size_t vector_index)
410 {
411    return HTS_GStreamSet_get_parameter(&engine->gss, stream_index, frame_index, vector_index);
412 }
413
414 /* HTS_Engine_get_generated_speech: output generated speech */
415 double HTS_Engine_get_generated_speech(HTS_Engine * engine, size_t index)
416 {
417    return HTS_GStreamSet_get_speech(&engine->gss, index);
418 }
419
420 /* HTS_Engine_generate_state_sequence: genereate state sequence (1st synthesis step) */
421 static HTS_Boolean HTS_Engine_generate_state_sequence(HTS_Engine * engine)
422 {
423    size_t i, state_index, model_index;
424    double f;
425
426    if (HTS_SStreamSet_create(&engine->sss, &engine->ms, &engine->label, engine->condition.phoneme_alignment_flag, engine->condition.speed, engine->condition.duration_iw, engine->condition.parameter_iw, engine->condition.gv_iw) != TRUE) {
427       HTS_Engine_refresh(engine);
428       return FALSE;
429    }
430    if (engine->condition.additional_half_tone != 0.0) {
431       state_index = 0;
432       model_index = 0;
433       for (i = 0; i < HTS_Engine_get_total_state(engine); i++) {
434          f = HTS_Engine_get_state_mean(engine, 1, i, 0);
435          f += engine->condition.additional_half_tone * HALF_TONE;
436          if (f < MIN_LF0)
437             f = MIN_LF0;
438          else if (f > MAX_LF0)
439             f = MAX_LF0;
440          HTS_Engine_set_state_mean(engine, 1, i, 0, f);
441          state_index++;
442          if (state_index >= HTS_Engine_get_nstate(engine)) {
443             state_index = 0;
444             model_index++;
445          }
446       }
447    }
448    return TRUE;
449 }
450
451 /* HTS_Engine_generate_state_sequence_from_fn: genereate state sequence from file name (1st synthesis step) */
452 HTS_Boolean HTS_Engine_generate_state_sequence_from_fn(HTS_Engine * engine, const char *fn)
453 {
454    HTS_Engine_refresh(engine);
455    HTS_Label_load_from_fn(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, fn);
456    return HTS_Engine_generate_state_sequence(engine);
457 }
458
459 /* HTS_Engine_generate_state_sequence_from_strings: generate state sequence from strings (1st synthesis step) */
460 HTS_Boolean HTS_Engine_generate_state_sequence_from_strings(HTS_Engine * engine, char **lines, size_t num_lines)
461 {
462    HTS_Engine_refresh(engine);
463    HTS_Label_load_from_strings(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, lines, num_lines);
464    return HTS_Engine_generate_state_sequence(engine);
465 }
466
467 /* HTS_Engine_generate_parameter_sequence: generate parameter sequence (2nd synthesis step) */
468 HTS_Boolean HTS_Engine_generate_parameter_sequence(HTS_Engine * engine)
469 {
470    return HTS_PStreamSet_create(&engine->pss, &engine->sss, engine->condition.msd_threshold, engine->condition.gv_weight);
471 }
472
473 /* HTS_Engine_generate_sample_sequence: generate sample sequence (3rd synthesis step) */
474 HTS_Boolean HTS_Engine_generate_sample_sequence(HTS_Engine * engine)
475 {
476    return HTS_GStreamSet_create(&engine->gss, &engine->pss, engine->condition.stage, engine->condition.use_log_gain, engine->condition.sampling_frequency, engine->condition.fperiod, engine->condition.alpha, engine->condition.beta, &engine->condition.stop, engine->condition.volume, engine->condition.audio_buff_size > 0 ? &engine->audio : NULL);
477 }
478
479 /* HTS_Engine_synthesize: synthesize speech */
480 static HTS_Boolean HTS_Engine_synthesize(HTS_Engine * engine)
481 {
482    if (HTS_Engine_generate_state_sequence(engine) != TRUE) {
483       HTS_Engine_refresh(engine);
484       return FALSE;
485    }
486    if (HTS_Engine_generate_parameter_sequence(engine) != TRUE) {
487       HTS_Engine_refresh(engine);
488       return FALSE;
489    }
490    if (HTS_Engine_generate_sample_sequence(engine) != TRUE) {
491       HTS_Engine_refresh(engine);
492       return FALSE;
493    }
494    return TRUE;
495 }
496
497 /* HTS_Engine_synthesize_from_fn: synthesize speech from file name */
498 HTS_Boolean HTS_Engine_synthesize_from_fn(HTS_Engine * engine, const char *fn)
499 {
500    HTS_Engine_refresh(engine);
501    HTS_Label_load_from_fn(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, fn);
502    return HTS_Engine_synthesize(engine);
503 }
504
505 /* HTS_Engine_synthesize_from_strings: synthesize speech from strings */
506 HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, char **lines, size_t num_lines)
507 {
508    HTS_Engine_refresh(engine);
509    HTS_Label_load_from_strings(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, lines, num_lines);
510    return HTS_Engine_synthesize(engine);
511 }
512
513 /* HTS_Engine_save_information: save trace information */
514 void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp)
515 {
516    size_t i, j, k, l, m, n;
517    double temp;
518    HTS_Condition *condition = &engine->condition;
519    HTS_ModelSet *ms = &engine->ms;
520    HTS_Label *label = &engine->label;
521    HTS_SStreamSet *sss = &engine->sss;
522    HTS_PStreamSet *pss = &engine->pss;
523
524    /* global parameter */
525    fprintf(fp, "[Global parameter]\n");
526    fprintf(fp, "Sampring frequency                     -> %8lu(Hz)\n", (unsigned long) condition->sampling_frequency);
527    fprintf(fp, "Frame period                           -> %8lu(point)\n", (unsigned long) condition->fperiod);
528    fprintf(fp, "                                          %8.5f(msec)\n", 1e+3 * condition->fperiod / condition->sampling_frequency);
529    fprintf(fp, "All-pass constant                      -> %8.5f\n", (float) condition->alpha);
530    fprintf(fp, "Gamma                                  -> %8.5f\n", (float) (condition->stage == 0 ? 0.0 : -1.0 / condition->stage));
531    if (condition->stage != 0) {
532       if (condition->use_log_gain == TRUE)
533          fprintf(fp, "Log gain flag                          ->     TRUE\n");
534       else
535          fprintf(fp, "Log gain flag                          ->    FALSE\n");
536    }
537    fprintf(fp, "Postfiltering coefficient              -> %8.5f\n", (float) condition->beta);
538    fprintf(fp, "Audio buffer size                      -> %8lu(sample)\n", (unsigned long) condition->audio_buff_size);
539    fprintf(fp, "\n");
540
541    /* duration parameter */
542    fprintf(fp, "[Duration parameter]\n");
543    fprintf(fp, "Number of states                       -> %8lu\n", (unsigned long) HTS_ModelSet_get_nstate(ms));
544    fprintf(fp, "         Interpolation size            -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
545    /* check interpolation */
546    for (i = 0, temp = 0.0; i < HTS_ModelSet_get_nvoices(ms); i++)
547       temp += condition->duration_iw[i];
548    for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
549       if (condition->duration_iw[i] != 0.0)
550          condition->duration_iw[i] /= temp;
551    for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
552       fprintf(fp, "         Interpolation weight[%2lu]      -> %8.0f(%%)\n", (unsigned long) i, (float) (100 * condition->duration_iw[i]));
553    fprintf(fp, "\n");
554
555    fprintf(fp, "[Stream parameter]\n");
556    for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) {
557       /* stream parameter */
558       fprintf(fp, "Stream[%2lu] vector length               -> %8lu\n", (unsigned long) i, (unsigned long) HTS_ModelSet_get_vector_length(ms, i));
559       fprintf(fp, "           Dynamic window size         -> %8lu\n", (unsigned long) HTS_ModelSet_get_window_size(ms, i));
560       /* interpolation */
561       fprintf(fp, "           Interpolation size          -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
562       for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
563          temp += condition->parameter_iw[i][j];
564       for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
565          if (condition->parameter_iw[i][j] != 0.0)
566             condition->parameter_iw[i][j] /= temp;
567       for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
568          fprintf(fp, "           Interpolation weight[%2lu]    -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->parameter_iw[i][j]));
569       /* MSD */
570       if (HTS_ModelSet_is_msd(ms, i)) { /* for MSD */
571          fprintf(fp, "           MSD flag                    ->     TRUE\n");
572          fprintf(fp, "           MSD threshold               -> %8.5f\n", condition->msd_threshold[i]);
573       } else {                  /* for non MSD */
574          fprintf(fp, "           MSD flag                    ->    FALSE\n");
575       }
576       /* GV */
577       if (HTS_ModelSet_use_gv(ms, i)) {
578          fprintf(fp, "           GV flag                     ->     TRUE\n");
579          fprintf(fp, "           GV weight                   -> %8.0f(%%)\n", (float) (100 * condition->gv_weight[i]));
580          fprintf(fp, "           GV interpolation size       -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
581          /* interpolation */
582          for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
583             temp += condition->gv_iw[i][j];
584          for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
585             if (condition->gv_iw[i][j] != 0.0)
586                condition->gv_iw[i][j] /= temp;
587          for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
588             fprintf(fp, "           GV interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->gv_iw[i][j]));
589       } else {
590          fprintf(fp, "           GV flag                     ->    FALSE\n");
591       }
592    }
593    fprintf(fp, "\n");
594
595    /* generated sequence */
596    fprintf(fp, "[Generated sequence]\n");
597    fprintf(fp, "Number of HMMs                         -> %8lu\n", (unsigned long) HTS_Label_get_size(label));
598    fprintf(fp, "Number of stats                        -> %8lu\n", (unsigned long) HTS_Label_get_size(label) * HTS_ModelSet_get_nstate(ms));
599    fprintf(fp, "Length of this speech                  -> %8.3f(sec)\n", (float) ((double) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod / condition->sampling_frequency));
600    fprintf(fp, "                                       -> %8lu(frames)\n", (unsigned long) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod);
601
602    for (i = 0; i < HTS_Label_get_size(label); i++) {
603       fprintf(fp, "HMM[%2lu]\n", (unsigned long) i);
604       fprintf(fp, "  Name                                 -> %s\n", HTS_Label_get_string(label, i));
605       fprintf(fp, "  Duration\n");
606       for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) {
607          fprintf(fp, "    Interpolation[%2lu]\n", (unsigned long) j);
608          HTS_ModelSet_get_duration_index(ms, j, HTS_Label_get_string(label, i), &k, &l);
609          fprintf(fp, "      Tree index                       -> %8lu\n", (unsigned long) k);
610          fprintf(fp, "      PDF index                        -> %8lu\n", (unsigned long) l);
611       }
612       for (j = 0; j < HTS_ModelSet_get_nstate(ms); j++) {
613          fprintf(fp, "  State[%2lu]\n", (unsigned long) j + 2);
614          fprintf(fp, "    Length                             -> %8lu(frames)\n", (unsigned long) HTS_SStreamSet_get_duration(sss, i * HTS_ModelSet_get_nstate(ms) + j));
615          for (k = 0; k < HTS_ModelSet_get_nstream(ms); k++) {
616             fprintf(fp, "    Stream[%2lu]\n", (unsigned long) k);
617             if (HTS_ModelSet_is_msd(ms, k)) {
618                if (HTS_SStreamSet_get_msd(sss, k, i * HTS_ModelSet_get_nstate(ms) + j) > condition->msd_threshold[k])
619                   fprintf(fp, "      MSD flag                         ->     TRUE\n");
620                else
621                   fprintf(fp, "      MSD flag                         ->    FALSE\n");
622             }
623             for (l = 0; l < HTS_ModelSet_get_nvoices(ms); l++) {
624                fprintf(fp, "      Interpolation[%2lu]\n", (unsigned long) l);
625                HTS_ModelSet_get_parameter_index(ms, l, k, j + 2, HTS_Label_get_string(label, i), &m, &n);
626                fprintf(fp, "        Tree index                     -> %8lu\n", (unsigned long) m);
627                fprintf(fp, "        PDF index                      -> %8lu\n", (unsigned long) n);
628             }
629          }
630       }
631    }
632 }
633
634 /* HTS_Engine_save_label: save label with time */
635 void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp)
636 {
637    size_t i, j;
638    size_t frame, state, duration;
639
640    HTS_Label *label = &engine->label;
641    HTS_SStreamSet *sss = &engine->sss;
642    size_t nstate = HTS_ModelSet_get_nstate(&engine->ms);
643    double rate = engine->condition.fperiod * 1.0e+07 / engine->condition.sampling_frequency;
644
645    for (i = 0, state = 0, frame = 0; i < HTS_Label_get_size(label); i++) {
646       for (j = 0, duration = 0; j < nstate; j++)
647          duration += HTS_SStreamSet_get_duration(sss, state++);
648       fprintf(fp, "%lu %lu %s\n", (unsigned long) (frame * rate), (unsigned long) ((frame + duration) * rate), HTS_Label_get_string(label, i));
649       frame += duration;
650    }
651 }
652
653 /* HTS_Engine_save_generated_parameter: save generated parameter */
654 void HTS_Engine_save_generated_parameter(HTS_Engine * engine, size_t stream_index, FILE * fp)
655 {
656    size_t i, j;
657    float temp;
658    HTS_GStreamSet *gss = &engine->gss;
659
660    for (i = 0; i < HTS_GStreamSet_get_total_frame(gss); i++)
661       for (j = 0; j < HTS_GStreamSet_get_vector_length(gss, stream_index); j++) {
662          temp = (float) HTS_GStreamSet_get_parameter(gss, stream_index, i, j);
663          fwrite(&temp, sizeof(float), 1, fp);
664       }
665 }
666
667 /* HTS_Engine_save_generated_speech: save generated speech */
668 void HTS_Engine_save_generated_speech(HTS_Engine * engine, FILE * fp)
669 {
670    size_t i;
671    double x;
672    short temp;
673    HTS_GStreamSet *gss = &engine->gss;
674
675    for (i = 0; i < HTS_GStreamSet_get_total_nsamples(gss); i++) {
676       x = HTS_GStreamSet_get_speech(gss, i);
677       if (x > 32767.0)
678          temp = 32767;
679       else if (x < -32768.0)
680          temp = -32768;
681       else
682          temp = (short) x;
683       fwrite(&temp, sizeof(short), 1, fp);
684    }
685 }
686
687 /* HTS_Engine_save_riff: save RIFF format file */
688 void HTS_Engine_save_riff(HTS_Engine * engine, FILE * fp)
689 {
690    size_t i;
691    double x;
692    short temp;
693
694    HTS_GStreamSet *gss = &engine->gss;
695    char data_01_04[] = { 'R', 'I', 'F', 'F' };
696    int data_05_08 = HTS_GStreamSet_get_total_nsamples(gss) * sizeof(short) + 36;
697    char data_09_12[] = { 'W', 'A', 'V', 'E' };
698    char data_13_16[] = { 'f', 'm', 't', ' ' };
699    int data_17_20 = 16;
700    short data_21_22 = 1;        /* PCM */
701    short data_23_24 = 1;        /* monoral */
702    int data_25_28 = engine->condition.sampling_frequency;
703    int data_29_32 = engine->condition.sampling_frequency * sizeof(short);
704    short data_33_34 = sizeof(short);
705    short data_35_36 = (short) (sizeof(short) * 8);
706    char data_37_40[] = { 'd', 'a', 't', 'a' };
707    int data_41_44 = HTS_GStreamSet_get_total_nsamples(gss) * sizeof(short);
708
709    /* write header */
710    HTS_fwrite_little_endian(data_01_04, sizeof(char), 4, fp);
711    HTS_fwrite_little_endian(&data_05_08, sizeof(int), 1, fp);
712    HTS_fwrite_little_endian(data_09_12, sizeof(char), 4, fp);
713    HTS_fwrite_little_endian(data_13_16, sizeof(char), 4, fp);
714    HTS_fwrite_little_endian(&data_17_20, sizeof(int), 1, fp);
715    HTS_fwrite_little_endian(&data_21_22, sizeof(short), 1, fp);
716    HTS_fwrite_little_endian(&data_23_24, sizeof(short), 1, fp);
717    HTS_fwrite_little_endian(&data_25_28, sizeof(int), 1, fp);
718    HTS_fwrite_little_endian(&data_29_32, sizeof(int), 1, fp);
719    HTS_fwrite_little_endian(&data_33_34, sizeof(short), 1, fp);
720    HTS_fwrite_little_endian(&data_35_36, sizeof(short), 1, fp);
721    HTS_fwrite_little_endian(data_37_40, sizeof(char), 4, fp);
722    HTS_fwrite_little_endian(&data_41_44, sizeof(int), 1, fp);
723    /* write data */
724    for (i = 0; i < HTS_GStreamSet_get_total_nsamples(gss); i++) {
725       x = HTS_GStreamSet_get_speech(gss, i);
726       if (x > 32767.0)
727          temp = 32767;
728       else if (x < -32768.0)
729          temp = -32768;
730       else
731          temp = (short) x;
732       HTS_fwrite_little_endian(&temp, sizeof(short), 1, fp);
733    }
734 }
735
736 /* HTS_Engine_refresh: free model per one time synthesis */
737 void HTS_Engine_refresh(HTS_Engine * engine)
738 {
739    /* free generated parameter stream set */
740    HTS_GStreamSet_clear(&engine->gss);
741    /* free parameter stream set */
742    HTS_PStreamSet_clear(&engine->pss);
743    /* free state stream set */
744    HTS_SStreamSet_clear(&engine->sss);
745    /* free label list */
746    HTS_Label_clear(&engine->label);
747    /* stop flag */
748    engine->condition.stop = FALSE;
749 }
750
751 /* HTS_Engine_clear: free engine */
752 void HTS_Engine_clear(HTS_Engine * engine)
753 {
754    size_t i;
755
756    if (engine->condition.msd_threshold != NULL)
757       HTS_free(engine->condition.msd_threshold);
758    if (engine->condition.duration_iw != NULL)
759       HTS_free(engine->condition.duration_iw);
760    if (engine->condition.gv_weight != NULL)
761       HTS_free(engine->condition.gv_weight);
762    if (engine->condition.parameter_iw != NULL) {
763       for (i = 0; i < HTS_ModelSet_get_nstream(&engine->ms); i++)
764          HTS_free(engine->condition.parameter_iw[i]);
765       HTS_free(engine->condition.parameter_iw);
766    }
767    if (engine->condition.gv_iw != NULL) {
768       for (i = 0; i < HTS_ModelSet_get_nstream(&engine->ms); i++)
769          HTS_free(engine->condition.gv_iw[i]);
770       HTS_free(engine->condition.gv_iw);
771    }
772
773    HTS_ModelSet_clear(&engine->ms);
774    HTS_Audio_clear(&engine->audio);
775    HTS_Engine_initialize(engine);
776 }
777
778 HTS_ENGINE_C_END;
779
780 #endif                          /* !HTS_ENGINE_C */