1 /* ----------------------------------------------------------------- */
2 /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 /* developed by HTS Working Group */
4 /* http://hts-engine.sourceforge.net/ */
5 /* ----------------------------------------------------------------- */
7 /* Copyright (c) 2001-2013 Nagoya Institute of Technology */
8 /* Department of Computer Science */
10 /* 2001-2008 Tokyo Institute of Technology */
11 /* Interdisciplinary Graduate School of */
12 /* Science and Engineering */
14 /* All rights reserved. */
16 /* Redistribution and use in source and binary forms, with or */
17 /* without modification, are permitted provided that the following */
18 /* conditions are met: */
20 /* - Redistributions of source code must retain the above copyright */
21 /* notice, this list of conditions and the following disclaimer. */
22 /* - Redistributions in binary form must reproduce the above */
23 /* copyright notice, this list of conditions and the following */
24 /* disclaimer in the documentation and/or other materials provided */
25 /* with the distribution. */
26 /* - Neither the name of the HTS working group nor the names of its */
27 /* contributors may be used to endorse or promote products derived */
28 /* from this software without specific prior written permission. */
30 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 /* POSSIBILITY OF SUCH DAMAGE. */
43 /* ----------------------------------------------------------------- */
49 #define HTS_ENGINE_C_START extern "C" {
50 #define HTS_ENGINE_C_END }
52 #define HTS_ENGINE_C_START
53 #define HTS_ENGINE_C_END
54 #endif /* __CPLUSPLUS */
58 #include <stdlib.h> /* for atof() */
59 #include <string.h> /* for strcpy() */
60 #include <math.h> /* for pow() */
62 /* hts_engine libraries */
63 #include "HTS_hidden.h"
65 /* HTS_Engine_initialize: initialize engine */
66 void HTS_Engine_initialize(HTS_Engine * engine)
69 engine->condition.sampling_frequency = 0;
70 engine->condition.fperiod = 0;
71 engine->condition.audio_buff_size = 0;
72 engine->condition.stop = FALSE;
73 engine->condition.volume = 1.0;
74 engine->condition.msd_threshold = NULL;
75 engine->condition.gv_weight = NULL;
78 engine->condition.speed = 1.0;
79 engine->condition.phoneme_alignment_flag = FALSE;
82 engine->condition.stage = 0;
83 engine->condition.use_log_gain = FALSE;
84 engine->condition.alpha = 0.0;
85 engine->condition.beta = 0.0;
88 engine->condition.additional_half_tone = 0.0;
90 /* interpolation weights */
91 engine->condition.duration_iw = NULL;
92 engine->condition.parameter_iw = NULL;
93 engine->condition.gv_iw = NULL;
95 /* initialize audio */
96 HTS_Audio_initialize(&engine->audio);
97 /* initialize model set */
98 HTS_ModelSet_initialize(&engine->ms);
99 /* initialize label list */
100 HTS_Label_initialize(&engine->label);
101 /* initialize state sequence set */
102 HTS_SStreamSet_initialize(&engine->sss);
103 /* initialize pstream set */
104 HTS_PStreamSet_initialize(&engine->pss);
105 /* initialize gstream set */
106 HTS_GStreamSet_initialize(&engine->gss);
109 /* HTS_Engine_load: load HTS voices */
110 HTS_Boolean HTS_Engine_load(HTS_Engine * engine, char **voices, size_t num_voices)
114 double average_weight;
115 const char *option, *find;
118 HTS_Engine_clear(engine);
121 if (HTS_ModelSet_load(&engine->ms, voices, num_voices) != TRUE) {
122 HTS_Engine_clear(engine);
125 nstream = HTS_ModelSet_get_nstream(&engine->ms);
126 average_weight = 1.0 / num_voices;
129 engine->condition.sampling_frequency = HTS_ModelSet_get_sampling_frequency(&engine->ms);
130 engine->condition.fperiod = HTS_ModelSet_get_fperiod(&engine->ms);
131 engine->condition.msd_threshold = (double *) HTS_calloc(nstream, sizeof(double));
132 for (i = 0; i < nstream; i++)
133 engine->condition.msd_threshold[i] = 0.5;
134 engine->condition.gv_weight = (double *) HTS_calloc(nstream, sizeof(double));
135 for (i = 0; i < nstream; i++)
136 engine->condition.gv_weight[i] = 1.0;
139 option = HTS_ModelSet_get_option(&engine->ms, 0);
140 find = strstr(option, "GAMMA=");
142 engine->condition.stage = (size_t) atoi(&find[strlen("GAMMA=")]);
143 find = strstr(option, "LN_GAIN=");
145 engine->condition.use_log_gain = atoi(&find[strlen("LN_GAIN=")]) == 1 ? TRUE : FALSE;
146 find = strstr(option, "ALPHA=");
148 engine->condition.alpha = atof(&find[strlen("ALPHA=")]);
150 /* interpolation weights */
151 engine->condition.duration_iw = (double *) HTS_calloc(num_voices, sizeof(double));
152 for (i = 0; i < num_voices; i++)
153 engine->condition.duration_iw[i] = average_weight;
154 engine->condition.parameter_iw = (double **) HTS_calloc(nstream, sizeof(double *));
155 for (i = 0; i < nstream; i++) {
156 engine->condition.parameter_iw[i] = (double *) HTS_calloc(num_voices, sizeof(double));
157 for (j = 0; j < num_voices; j++)
158 engine->condition.parameter_iw[i][j] = average_weight;
160 engine->condition.gv_iw = (double **) HTS_calloc(nstream, sizeof(double *));
161 for (i = 0; i < nstream; i++) {
162 engine->condition.gv_iw[i] = (double *) HTS_calloc(num_voices, sizeof(double));
163 for (j = 0; j < num_voices; j++)
164 engine->condition.gv_iw[i][j] = average_weight;
170 /* HTS_Engine_set_sampling_frequency: set sampling frequency */
171 void HTS_Engine_set_sampling_frequency(HTS_Engine * engine, size_t i)
175 engine->condition.sampling_frequency = i;
176 HTS_Audio_set_parameter(&engine->audio, engine->condition.sampling_frequency, engine->condition.audio_buff_size);
179 /* HTS_Engine_get_sampling_frequency: get sampling frequency */
180 size_t HTS_Engine_get_sampling_frequency(HTS_Engine * engine)
182 return engine->condition.sampling_frequency;
185 /* HTS_Engine_set_fperiod: set frame period */
186 void HTS_Engine_set_fperiod(HTS_Engine * engine, size_t i)
190 engine->condition.fperiod = i;
193 /* HTS_Engine_get_fperiod: get frame period */
194 size_t HTS_Engine_get_fperiod(HTS_Engine * engine)
196 return engine->condition.fperiod;
199 /* HTS_Engine_set_audio_buff_size: set audio buffer size */
200 void HTS_Engine_set_audio_buff_size(HTS_Engine * engine, size_t i)
202 engine->condition.audio_buff_size = i;
203 HTS_Audio_set_parameter(&engine->audio, engine->condition.sampling_frequency, engine->condition.audio_buff_size);
206 /* HTS_Engine_get_audio_buff_size: get audio buffer size */
207 size_t HTS_Engine_get_audio_buff_size(HTS_Engine * engine)
209 return engine->condition.audio_buff_size;
212 /* HTS_Engine_set_stop_flag: set stop flag */
213 void HTS_Engine_set_stop_flag(HTS_Engine * engine, HTS_Boolean b)
215 engine->condition.stop = b;
218 /* HTS_Engine_get_stop_flag: get stop flag */
219 HTS_Boolean HTS_Engine_get_stop_flag(HTS_Engine * engine)
221 return engine->condition.stop;
224 /* HTS_Engine_set_volume: set volume in db */
225 void HTS_Engine_set_volume(HTS_Engine * engine, double f)
227 engine->condition.volume = exp(f * DB);
230 /* HTS_Engine_get_volume: get volume in db */
231 double HTS_Engine_get_volume(HTS_Engine * engine)
233 return log(engine->condition.volume) / DB;
236 /* HTS_Egnine_set_msd_threshold: set MSD threshold */
237 void HTS_Engine_set_msd_threshold(HTS_Engine * engine, size_t stream_index, double f)
243 engine->condition.msd_threshold[stream_index] = f;
246 /* HTS_Engine_get_msd_threshold: get MSD threshold */
247 double HTS_Engine_get_msd_threshold(HTS_Engine * engine, size_t stream_index)
249 return engine->condition.msd_threshold[stream_index];
252 /* HTS_Engine_set_gv_weight: set GV weight */
253 void HTS_Engine_set_gv_weight(HTS_Engine * engine, size_t stream_index, double f)
257 engine->condition.gv_weight[stream_index] = f;
260 /* HTS_Engine_get_gv_weight: get GV weight */
261 double HTS_Engine_get_gv_weight(HTS_Engine * engine, size_t stream_index)
263 return engine->condition.gv_weight[stream_index];
266 /* HTS_Engine_set_speed: set speech speed */
267 void HTS_Engine_set_speed(HTS_Engine * engine, double f)
271 engine->condition.speed = f;
274 /* HTS_Engine_set_phoneme_alignment_flag: set flag for using phoneme alignment in label */
275 void HTS_Engine_set_phoneme_alignment_flag(HTS_Engine * engine, HTS_Boolean b)
277 engine->condition.phoneme_alignment_flag = b;
280 /* HTS_Engine_set_alpha: set alpha */
281 void HTS_Engine_set_alpha(HTS_Engine * engine, double f)
287 engine->condition.alpha = f;
290 /* HTS_Engine_get_alpha: get alpha */
291 double HTS_Engine_get_alpha(HTS_Engine * engine)
293 return engine->condition.alpha;
296 /* HTS_Engine_set_beta: set beta */
297 void HTS_Engine_set_beta(HTS_Engine * engine, double f)
303 engine->condition.beta = f;
306 /* HTS_Engine_get_beta: get beta */
307 double HTS_Engine_get_beta(HTS_Engine * engine)
309 return engine->condition.beta;
312 /* HTS_Engine_add_half_tone: add half tone */
313 void HTS_Engine_add_half_tone(HTS_Engine * engine, double f)
315 engine->condition.additional_half_tone = f;
318 /* HTS_Engine_set_duration_interpolation_weight: set interpolation weight for duration */
319 void HTS_Engine_set_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index, double f)
321 engine->condition.duration_iw[voice_index] = f;
324 /* HTS_Engine_get_duration_interpolation_weight: get interpolation weight for duration */
325 double HTS_Engine_get_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index)
327 return engine->condition.duration_iw[voice_index];
330 /* HTS_Engine_set_parameter_interpolation_weight: set interpolation weight for parameter */
331 void HTS_Engine_set_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f)
333 engine->condition.parameter_iw[voice_index][stream_index] = f;
336 /* HTS_Engine_get_parameter_interpolation_weight: get interpolation weight for parameter */
337 double HTS_Engine_get_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index)
339 return engine->condition.parameter_iw[voice_index][stream_index];
342 /* HTS_Engine_set_gv_interpolation_weight: set interpolation weight for GV */
343 void HTS_Engine_set_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f)
345 engine->condition.gv_iw[voice_index][stream_index] = f;
348 /* HTS_Engine_get_gv_interpolation_weight: get interpolation weight for GV */
349 double HTS_Engine_get_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index)
351 return engine->condition.gv_iw[voice_index][stream_index];
354 /* HTS_Engine_get_total_state: get total number of state */
355 size_t HTS_Engine_get_total_state(HTS_Engine * engine)
357 return HTS_SStreamSet_get_total_state(&engine->sss);
360 /* HTS_Engine_set_state_mean: set mean value of state */
361 void HTS_Engine_set_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index, double f)
363 HTS_SStreamSet_set_mean(&engine->sss, stream_index, state_index, vector_index, f);
366 /* HTS_Engine_get_state_mean: get mean value of state */
367 double HTS_Engine_get_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index)
369 return HTS_SStreamSet_get_mean(&engine->sss, stream_index, state_index, vector_index);
372 /* HTS_Engine_get_state_duration: get state duration */
373 size_t HTS_Engine_get_state_duration(HTS_Engine * engine, size_t state_index)
375 return HTS_SStreamSet_get_duration(&engine->sss, state_index);
378 /* HTS_Engine_get_nvoices: get number of voices */
379 size_t HTS_Engine_get_nvoices(HTS_Engine * engine)
381 return HTS_ModelSet_get_nvoices(&engine->ms);
384 /* HTS_Engine_get_nstream: get number of stream */
385 size_t HTS_Engine_get_nstream(HTS_Engine * engine)
387 return HTS_ModelSet_get_nstream(&engine->ms);
390 /* HTS_Engine_get_nstate: get number of state */
391 size_t HTS_Engine_get_nstate(HTS_Engine * engine)
393 return HTS_ModelSet_get_nstate(&engine->ms);
396 /* HTS_Engine_get_total_frame: get total number of frame */
397 size_t HTS_Engine_get_total_frame(HTS_Engine * engine)
399 return HTS_GStreamSet_get_total_frame(&engine->gss);
402 /* HTS_Engine_get_nsamples: get number of samples */
403 size_t HTS_Engine_get_nsamples(HTS_Engine * engine)
405 return HTS_GStreamSet_get_total_nsamples(&engine->gss);
408 /* HTS_Engine_get_generated_parameter: output generated parameter */
409 double HTS_Engine_get_generated_parameter(HTS_Engine * engine, size_t stream_index, size_t frame_index, size_t vector_index)
411 return HTS_GStreamSet_get_parameter(&engine->gss, stream_index, frame_index, vector_index);
414 /* HTS_Engine_get_generated_speech: output generated speech */
415 double HTS_Engine_get_generated_speech(HTS_Engine * engine, size_t index)
417 return HTS_GStreamSet_get_speech(&engine->gss, index);
420 /* HTS_Engine_generate_state_sequence: genereate state sequence (1st synthesis step) */
421 static HTS_Boolean HTS_Engine_generate_state_sequence(HTS_Engine * engine)
423 size_t i, state_index, model_index;
426 if (HTS_SStreamSet_create(&engine->sss, &engine->ms, &engine->label, engine->condition.phoneme_alignment_flag, engine->condition.speed, engine->condition.duration_iw, engine->condition.parameter_iw, engine->condition.gv_iw) != TRUE) {
427 HTS_Engine_refresh(engine);
430 if (engine->condition.additional_half_tone != 0.0) {
433 for (i = 0; i < HTS_Engine_get_total_state(engine); i++) {
434 f = HTS_Engine_get_state_mean(engine, 1, i, 0);
435 f += engine->condition.additional_half_tone * HALF_TONE;
438 else if (f > MAX_LF0)
440 HTS_Engine_set_state_mean(engine, 1, i, 0, f);
442 if (state_index >= HTS_Engine_get_nstate(engine)) {
451 /* HTS_Engine_generate_state_sequence_from_fn: genereate state sequence from file name (1st synthesis step) */
452 HTS_Boolean HTS_Engine_generate_state_sequence_from_fn(HTS_Engine * engine, const char *fn)
454 HTS_Engine_refresh(engine);
455 HTS_Label_load_from_fn(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, fn);
456 return HTS_Engine_generate_state_sequence(engine);
459 /* HTS_Engine_generate_state_sequence_from_strings: generate state sequence from strings (1st synthesis step) */
460 HTS_Boolean HTS_Engine_generate_state_sequence_from_strings(HTS_Engine * engine, char **lines, size_t num_lines)
462 HTS_Engine_refresh(engine);
463 HTS_Label_load_from_strings(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, lines, num_lines);
464 return HTS_Engine_generate_state_sequence(engine);
467 /* HTS_Engine_generate_parameter_sequence: generate parameter sequence (2nd synthesis step) */
468 HTS_Boolean HTS_Engine_generate_parameter_sequence(HTS_Engine * engine)
470 return HTS_PStreamSet_create(&engine->pss, &engine->sss, engine->condition.msd_threshold, engine->condition.gv_weight);
473 /* HTS_Engine_generate_sample_sequence: generate sample sequence (3rd synthesis step) */
474 HTS_Boolean HTS_Engine_generate_sample_sequence(HTS_Engine * engine)
476 return HTS_GStreamSet_create(&engine->gss, &engine->pss, engine->condition.stage, engine->condition.use_log_gain, engine->condition.sampling_frequency, engine->condition.fperiod, engine->condition.alpha, engine->condition.beta, &engine->condition.stop, engine->condition.volume, engine->condition.audio_buff_size > 0 ? &engine->audio : NULL);
479 /* HTS_Engine_synthesize: synthesize speech */
480 static HTS_Boolean HTS_Engine_synthesize(HTS_Engine * engine)
482 if (HTS_Engine_generate_state_sequence(engine) != TRUE) {
483 HTS_Engine_refresh(engine);
486 if (HTS_Engine_generate_parameter_sequence(engine) != TRUE) {
487 HTS_Engine_refresh(engine);
490 if (HTS_Engine_generate_sample_sequence(engine) != TRUE) {
491 HTS_Engine_refresh(engine);
497 /* HTS_Engine_synthesize_from_fn: synthesize speech from file name */
498 HTS_Boolean HTS_Engine_synthesize_from_fn(HTS_Engine * engine, const char *fn)
500 HTS_Engine_refresh(engine);
501 HTS_Label_load_from_fn(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, fn);
502 return HTS_Engine_synthesize(engine);
505 /* HTS_Engine_synthesize_from_strings: synthesize speech from strings */
506 HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, char **lines, size_t num_lines)
508 HTS_Engine_refresh(engine);
509 HTS_Label_load_from_strings(&engine->label, engine->condition.sampling_frequency, engine->condition.fperiod, lines, num_lines);
510 return HTS_Engine_synthesize(engine);
513 /* HTS_Engine_save_information: save trace information */
514 void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp)
516 size_t i, j, k, l, m, n;
518 HTS_Condition *condition = &engine->condition;
519 HTS_ModelSet *ms = &engine->ms;
520 HTS_Label *label = &engine->label;
521 HTS_SStreamSet *sss = &engine->sss;
522 HTS_PStreamSet *pss = &engine->pss;
524 /* global parameter */
525 fprintf(fp, "[Global parameter]\n");
526 fprintf(fp, "Sampring frequency -> %8lu(Hz)\n", (unsigned long) condition->sampling_frequency);
527 fprintf(fp, "Frame period -> %8lu(point)\n", (unsigned long) condition->fperiod);
528 fprintf(fp, " %8.5f(msec)\n", 1e+3 * condition->fperiod / condition->sampling_frequency);
529 fprintf(fp, "All-pass constant -> %8.5f\n", (float) condition->alpha);
530 fprintf(fp, "Gamma -> %8.5f\n", (float) (condition->stage == 0 ? 0.0 : -1.0 / condition->stage));
531 if (condition->stage != 0) {
532 if (condition->use_log_gain == TRUE)
533 fprintf(fp, "Log gain flag -> TRUE\n");
535 fprintf(fp, "Log gain flag -> FALSE\n");
537 fprintf(fp, "Postfiltering coefficient -> %8.5f\n", (float) condition->beta);
538 fprintf(fp, "Audio buffer size -> %8lu(sample)\n", (unsigned long) condition->audio_buff_size);
541 /* duration parameter */
542 fprintf(fp, "[Duration parameter]\n");
543 fprintf(fp, "Number of states -> %8lu\n", (unsigned long) HTS_ModelSet_get_nstate(ms));
544 fprintf(fp, " Interpolation size -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
545 /* check interpolation */
546 for (i = 0, temp = 0.0; i < HTS_ModelSet_get_nvoices(ms); i++)
547 temp += condition->duration_iw[i];
548 for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
549 if (condition->duration_iw[i] != 0.0)
550 condition->duration_iw[i] /= temp;
551 for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
552 fprintf(fp, " Interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) i, (float) (100 * condition->duration_iw[i]));
555 fprintf(fp, "[Stream parameter]\n");
556 for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) {
557 /* stream parameter */
558 fprintf(fp, "Stream[%2lu] vector length -> %8lu\n", (unsigned long) i, (unsigned long) HTS_ModelSet_get_vector_length(ms, i));
559 fprintf(fp, " Dynamic window size -> %8lu\n", (unsigned long) HTS_ModelSet_get_window_size(ms, i));
561 fprintf(fp, " Interpolation size -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
562 for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
563 temp += condition->parameter_iw[i][j];
564 for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
565 if (condition->parameter_iw[i][j] != 0.0)
566 condition->parameter_iw[i][j] /= temp;
567 for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
568 fprintf(fp, " Interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->parameter_iw[i][j]));
570 if (HTS_ModelSet_is_msd(ms, i)) { /* for MSD */
571 fprintf(fp, " MSD flag -> TRUE\n");
572 fprintf(fp, " MSD threshold -> %8.5f\n", condition->msd_threshold[i]);
573 } else { /* for non MSD */
574 fprintf(fp, " MSD flag -> FALSE\n");
577 if (HTS_ModelSet_use_gv(ms, i)) {
578 fprintf(fp, " GV flag -> TRUE\n");
579 fprintf(fp, " GV weight -> %8.0f(%%)\n", (float) (100 * condition->gv_weight[i]));
580 fprintf(fp, " GV interpolation size -> %8lu\n", (unsigned long) HTS_ModelSet_get_nvoices(ms));
582 for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
583 temp += condition->gv_iw[i][j];
584 for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
585 if (condition->gv_iw[i][j] != 0.0)
586 condition->gv_iw[i][j] /= temp;
587 for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
588 fprintf(fp, " GV interpolation weight[%2lu] -> %8.0f(%%)\n", (unsigned long) j, (float) (100 * condition->gv_iw[i][j]));
590 fprintf(fp, " GV flag -> FALSE\n");
595 /* generated sequence */
596 fprintf(fp, "[Generated sequence]\n");
597 fprintf(fp, "Number of HMMs -> %8lu\n", (unsigned long) HTS_Label_get_size(label));
598 fprintf(fp, "Number of stats -> %8lu\n", (unsigned long) HTS_Label_get_size(label) * HTS_ModelSet_get_nstate(ms));
599 fprintf(fp, "Length of this speech -> %8.3f(sec)\n", (float) ((double) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod / condition->sampling_frequency));
600 fprintf(fp, " -> %8lu(frames)\n", (unsigned long) HTS_PStreamSet_get_total_frame(pss) * condition->fperiod);
602 for (i = 0; i < HTS_Label_get_size(label); i++) {
603 fprintf(fp, "HMM[%2lu]\n", (unsigned long) i);
604 fprintf(fp, " Name -> %s\n", HTS_Label_get_string(label, i));
605 fprintf(fp, " Duration\n");
606 for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++) {
607 fprintf(fp, " Interpolation[%2lu]\n", (unsigned long) j);
608 HTS_ModelSet_get_duration_index(ms, j, HTS_Label_get_string(label, i), &k, &l);
609 fprintf(fp, " Tree index -> %8lu\n", (unsigned long) k);
610 fprintf(fp, " PDF index -> %8lu\n", (unsigned long) l);
612 for (j = 0; j < HTS_ModelSet_get_nstate(ms); j++) {
613 fprintf(fp, " State[%2lu]\n", (unsigned long) j + 2);
614 fprintf(fp, " Length -> %8lu(frames)\n", (unsigned long) HTS_SStreamSet_get_duration(sss, i * HTS_ModelSet_get_nstate(ms) + j));
615 for (k = 0; k < HTS_ModelSet_get_nstream(ms); k++) {
616 fprintf(fp, " Stream[%2lu]\n", (unsigned long) k);
617 if (HTS_ModelSet_is_msd(ms, k)) {
618 if (HTS_SStreamSet_get_msd(sss, k, i * HTS_ModelSet_get_nstate(ms) + j) > condition->msd_threshold[k])
619 fprintf(fp, " MSD flag -> TRUE\n");
621 fprintf(fp, " MSD flag -> FALSE\n");
623 for (l = 0; l < HTS_ModelSet_get_nvoices(ms); l++) {
624 fprintf(fp, " Interpolation[%2lu]\n", (unsigned long) l);
625 HTS_ModelSet_get_parameter_index(ms, l, k, j + 2, HTS_Label_get_string(label, i), &m, &n);
626 fprintf(fp, " Tree index -> %8lu\n", (unsigned long) m);
627 fprintf(fp, " PDF index -> %8lu\n", (unsigned long) n);
634 /* HTS_Engine_save_label: save label with time */
635 void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp)
638 size_t frame, state, duration;
640 HTS_Label *label = &engine->label;
641 HTS_SStreamSet *sss = &engine->sss;
642 size_t nstate = HTS_ModelSet_get_nstate(&engine->ms);
643 double rate = engine->condition.fperiod * 1.0e+07 / engine->condition.sampling_frequency;
645 for (i = 0, state = 0, frame = 0; i < HTS_Label_get_size(label); i++) {
646 for (j = 0, duration = 0; j < nstate; j++)
647 duration += HTS_SStreamSet_get_duration(sss, state++);
648 fprintf(fp, "%lu %lu %s\n", (unsigned long) (frame * rate), (unsigned long) ((frame + duration) * rate), HTS_Label_get_string(label, i));
653 /* HTS_Engine_save_generated_parameter: save generated parameter */
654 void HTS_Engine_save_generated_parameter(HTS_Engine * engine, size_t stream_index, FILE * fp)
658 HTS_GStreamSet *gss = &engine->gss;
660 for (i = 0; i < HTS_GStreamSet_get_total_frame(gss); i++)
661 for (j = 0; j < HTS_GStreamSet_get_vector_length(gss, stream_index); j++) {
662 temp = (float) HTS_GStreamSet_get_parameter(gss, stream_index, i, j);
663 fwrite(&temp, sizeof(float), 1, fp);
667 /* HTS_Engine_save_generated_speech: save generated speech */
668 void HTS_Engine_save_generated_speech(HTS_Engine * engine, FILE * fp)
673 HTS_GStreamSet *gss = &engine->gss;
675 for (i = 0; i < HTS_GStreamSet_get_total_nsamples(gss); i++) {
676 x = HTS_GStreamSet_get_speech(gss, i);
679 else if (x < -32768.0)
683 fwrite(&temp, sizeof(short), 1, fp);
687 /* HTS_Engine_save_riff: save RIFF format file */
688 void HTS_Engine_save_riff(HTS_Engine * engine, FILE * fp)
694 HTS_GStreamSet *gss = &engine->gss;
695 char data_01_04[] = { 'R', 'I', 'F', 'F' };
696 int data_05_08 = HTS_GStreamSet_get_total_nsamples(gss) * sizeof(short) + 36;
697 char data_09_12[] = { 'W', 'A', 'V', 'E' };
698 char data_13_16[] = { 'f', 'm', 't', ' ' };
700 short data_21_22 = 1; /* PCM */
701 short data_23_24 = 1; /* monoral */
702 int data_25_28 = engine->condition.sampling_frequency;
703 int data_29_32 = engine->condition.sampling_frequency * sizeof(short);
704 short data_33_34 = sizeof(short);
705 short data_35_36 = (short) (sizeof(short) * 8);
706 char data_37_40[] = { 'd', 'a', 't', 'a' };
707 int data_41_44 = HTS_GStreamSet_get_total_nsamples(gss) * sizeof(short);
710 HTS_fwrite_little_endian(data_01_04, sizeof(char), 4, fp);
711 HTS_fwrite_little_endian(&data_05_08, sizeof(int), 1, fp);
712 HTS_fwrite_little_endian(data_09_12, sizeof(char), 4, fp);
713 HTS_fwrite_little_endian(data_13_16, sizeof(char), 4, fp);
714 HTS_fwrite_little_endian(&data_17_20, sizeof(int), 1, fp);
715 HTS_fwrite_little_endian(&data_21_22, sizeof(short), 1, fp);
716 HTS_fwrite_little_endian(&data_23_24, sizeof(short), 1, fp);
717 HTS_fwrite_little_endian(&data_25_28, sizeof(int), 1, fp);
718 HTS_fwrite_little_endian(&data_29_32, sizeof(int), 1, fp);
719 HTS_fwrite_little_endian(&data_33_34, sizeof(short), 1, fp);
720 HTS_fwrite_little_endian(&data_35_36, sizeof(short), 1, fp);
721 HTS_fwrite_little_endian(data_37_40, sizeof(char), 4, fp);
722 HTS_fwrite_little_endian(&data_41_44, sizeof(int), 1, fp);
724 for (i = 0; i < HTS_GStreamSet_get_total_nsamples(gss); i++) {
725 x = HTS_GStreamSet_get_speech(gss, i);
728 else if (x < -32768.0)
732 HTS_fwrite_little_endian(&temp, sizeof(short), 1, fp);
736 /* HTS_Engine_refresh: free model per one time synthesis */
737 void HTS_Engine_refresh(HTS_Engine * engine)
739 /* free generated parameter stream set */
740 HTS_GStreamSet_clear(&engine->gss);
741 /* free parameter stream set */
742 HTS_PStreamSet_clear(&engine->pss);
743 /* free state stream set */
744 HTS_SStreamSet_clear(&engine->sss);
745 /* free label list */
746 HTS_Label_clear(&engine->label);
748 engine->condition.stop = FALSE;
751 /* HTS_Engine_clear: free engine */
752 void HTS_Engine_clear(HTS_Engine * engine)
756 if (engine->condition.msd_threshold != NULL)
757 HTS_free(engine->condition.msd_threshold);
758 if (engine->condition.duration_iw != NULL)
759 HTS_free(engine->condition.duration_iw);
760 if (engine->condition.gv_weight != NULL)
761 HTS_free(engine->condition.gv_weight);
762 if (engine->condition.parameter_iw != NULL) {
763 for (i = 0; i < HTS_ModelSet_get_nstream(&engine->ms); i++)
764 HTS_free(engine->condition.parameter_iw[i]);
765 HTS_free(engine->condition.parameter_iw);
767 if (engine->condition.gv_iw != NULL) {
768 for (i = 0; i < HTS_ModelSet_get_nstream(&engine->ms); i++)
769 HTS_free(engine->condition.gv_iw[i]);
770 HTS_free(engine->condition.gv_iw);
773 HTS_ModelSet_clear(&engine->ms);
774 HTS_Audio_clear(&engine->audio);
775 HTS_Engine_initialize(engine);
780 #endif /* !HTS_ENGINE_C */