43 #include <sphinxbase/err.h>
44 #include <sphinxbase/strfuncs.h>
45 #include <sphinxbase/filename.h>
46 #include <sphinxbase/pio.h>
49 #include "cmdln_macro.h"
53 #include "fsg_search_internal.h"
58 static const arg_t ps_args_def[] = {
65 file_exists(
const char *path)
69 tmp = fopen(path,
"rb");
75 hmmdir_exists(
const char *path)
78 char *mdef = string_join(path,
"/mdef", NULL);
80 tmp = fopen(mdef,
"rb");
88 const char *hmmdir,
const char *file)
90 char *tmp = string_join(hmmdir,
"/", file, NULL);
92 if (cmd_ln_str_r(ps->
config, arg) == NULL && file_exists(tmp))
93 cmd_ln_set_str_r(ps->
config, arg, tmp);
100 char const *hmmdir, *lmfile, *dictfile;
103 #ifdef __ADSPBLACKFIN__
104 E_INFO(
"Will not use mmap() on uClinux/Blackfin.");
105 cmd_ln_set_boolean_r(ps->
config,
"-mmap", FALSE);
110 hmmdir = cmd_ln_str_r(ps->
config,
"-hmm");
111 lmfile = cmd_ln_str_r(ps->
config,
"-lm");
112 dictfile = cmd_ln_str_r(ps->
config,
"-dict");
113 if (hmmdir == NULL && hmmdir_exists(MODELDIR
"/hmm/en_US/hub4wsj_sc_8k")) {
114 hmmdir = MODELDIR
"/hmm/en_US/hub4wsj_sc_8k";
115 cmd_ln_set_str_r(ps->
config,
"-hmm", hmmdir);
117 if (lmfile == NULL && !cmd_ln_str_r(ps->
config,
"-fsg")
118 && !cmd_ln_str_r(ps->
config,
"-jsgf")
119 && file_exists(MODELDIR
"/lm/en_US/hub4.5000.DMP")) {
120 lmfile = MODELDIR
"/lm/en_US/hub4.5000.DMP";
121 cmd_ln_set_str_r(ps->
config,
"-lm", lmfile);
123 if (dictfile == NULL && file_exists(MODELDIR
"/lm/en_US/cmu07a.dic")) {
124 dictfile = MODELDIR
"/lm/en_US/cmu07a.dic";
125 cmd_ln_set_str_r(ps->
config,
"-dict", dictfile);
129 if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) {
130 char *tmphmm = string_join(MODELDIR
"/hmm/", hmmdir, NULL);
131 if (hmmdir_exists(tmphmm)) {
132 cmd_ln_set_str_r(ps->
config,
"-hmm", tmphmm);
134 E_ERROR(
"Failed to find mdef file inside the model folder specified with -hmm '%s'\n", hmmdir);
138 if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) {
139 char *tmplm = string_join(MODELDIR
"/lm/", lmfile, NULL);
140 cmd_ln_set_str_r(ps->
config,
"-lm", tmplm);
143 if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) {
144 char *tmpdict = string_join(MODELDIR
"/lm/", dictfile, NULL);
145 cmd_ln_set_str_r(ps->
config,
"-dict", tmpdict);
151 if ((hmmdir = cmd_ln_str_r(ps->
config,
"-hmm")) != NULL) {
152 ps_add_file(ps,
"-mdef", hmmdir,
"mdef");
153 ps_add_file(ps,
"-mean", hmmdir,
"means");
154 ps_add_file(ps,
"-var", hmmdir,
"variances");
155 ps_add_file(ps,
"-tmat", hmmdir,
"transition_matrices");
156 ps_add_file(ps,
"-mixw", hmmdir,
"mixture_weights");
157 ps_add_file(ps,
"-sendump", hmmdir,
"sendump");
158 ps_add_file(ps,
"-fdict", hmmdir,
"noisedict");
159 ps_add_file(ps,
"-lda", hmmdir,
"feature_transform");
160 ps_add_file(ps,
"-featparams", hmmdir,
"feat.params");
161 ps_add_file(ps,
"-senmgau", hmmdir,
"senmgau");
173 for (gn = ps->
searches; gn; gn = gnode_next(gn))
174 ps_search_free(gnode_ptr(gn));
185 for (gn = ps->
searches; gn; gn = gnode_next(gn)) {
186 if (0 == strcmp(ps_search_name(gnode_ptr(gn)), name))
195 char const *lmfile, *lmctl = NULL;
197 if (config && config != ps->
config) {
198 cmd_ln_free_r(ps->
config);
199 ps->
config = cmd_ln_retain(config);
202 err_set_debug_level(cmd_ln_int32_r(ps->
config,
"-debug"));
208 ps_init_defaults(ps);
211 ps_free_searches(ps);
226 if (ps->
lmath == NULL
227 || (logmath_get_base(ps->
lmath) !=
228 (float64)cmd_ln_float32_r(ps->
config,
"-logbase"))) {
230 logmath_free(ps->
lmath);
231 ps->
lmath = logmath_init
232 ((float64)cmd_ln_float32_r(ps->
config,
"-logbase"), 0,
233 cmd_ln_boolean_r(ps->
config,
"-bestpath"));
256 if (cmd_ln_str_r(ps->
config,
"-fsg") || cmd_ln_str_r(ps->
config,
"-jsgf")) {
267 else if ((lmfile = cmd_ln_str_r(ps->
config,
"-lm"))
268 || (lmctl = cmd_ln_str_r(ps->
config,
"-lmctl"))) {
272 if (cmd_ln_boolean_r(ps->
config,
"-fwdflat")
273 && cmd_ln_boolean_r(ps->
config,
"-fwdtree"))
292 ps->
perf.name =
"decode";
293 ptmr_init(&ps->
perf);
303 ps = ckd_calloc(1,
sizeof(*ps));
332 ps_free_searches(ps);
336 logmath_free(ps->
lmath);
337 cmd_ln_free_r(ps->
config);
383 || 0 != strcmp(ps_search_name(ps->
search),
"ngram"))
395 search = ps_find_search(ps,
"ngram");
396 if (search == NULL) {
405 else if (lmset != NULL) {
408 if (ngs->
lmset != NULL && ngs->
lmset != lmset)
409 ngram_model_free(ngs->
lmset);
412 if (ps_search_reinit(search, ps->
dict, ps->
d2p) < 0)
426 || 0 != strcmp(ps_search_name(ps->
search),
"fsg"))
437 search = ps_find_search(ps,
"fsg");
438 if (search == NULL) {
440 if ((search = fsg_search_init(ps->
config,
449 if (ps_search_reinit(search, ps->
dict, ps->
d2p) < 0)
458 char const *fdictfile,
char const *format)
468 newconfig = cmd_ln_init(NULL,
ps_args(), TRUE, NULL);
469 cmd_ln_set_boolean_r(newconfig,
"-dictcase",
470 cmd_ln_boolean_r(ps->
config,
"-dictcase"));
471 cmd_ln_set_str_r(newconfig,
"-dict", dictfile);
473 cmd_ln_set_str_r(newconfig,
"-fdict", fdictfile);
475 cmd_ln_set_str_r(newconfig,
"-fdict",
476 cmd_ln_str_r(ps->
config,
"-fdict"));
479 if ((dict = dict_init(newconfig, ps->
acmod->
mdef)) == NULL) {
480 cmd_ln_free_r(newconfig);
486 cmd_ln_free_r(newconfig);
492 cmd_ln_free_r(newconfig);
493 cmd_ln_set_str_r(ps->
config,
"-dict", dictfile);
495 cmd_ln_set_str_r(ps->
config,
"-fdict", fdictfile);
502 for (gn = ps->
searches; gn; gn = gnode_next(gn)) {
504 if ((rv = ps_search_reinit(search, dict, d2p)) < 0)
515 return dict_write(ps->
dict, dictfile, format);
525 ngram_model_t *lmset;
527 char **phonestr, *tmp;
531 tmp = ckd_salloc(phones);
532 np = str2words(tmp, NULL, 0);
533 phonestr = ckd_calloc(np,
sizeof(*phonestr));
534 str2words(tmp, phonestr, np);
535 pron = ckd_calloc(np,
sizeof(*pron));
536 for (i = 0; i < np; ++i) {
537 pron[i] = bin_mdef_ciphone_id(ps->
acmod->
mdef, phonestr[i]);
539 E_ERROR(
"Unknown phone %s in phone string %s\n",
552 if ((wid = dict_add_word(ps->
dict, word, pron, np)) == -1) {
566 if ((lmwid = ngram_model_add_word(lmset, word, 1.0))
567 == NGRAM_INVALID_WID)
573 if ((rv = ps_search_reinit(ps->
search, ps->
dict, ps->
d2p) < 0))
581 char const *uttid,
long maxsamps)
588 if (maxsamps != -1 || (pos = ftell(rawfh)) >= 0) {
591 if (maxsamps == -1) {
593 fseek(rawfh, 0, SEEK_END);
594 endpos = ftell(rawfh);
595 fseek(rawfh, pos, SEEK_SET);
596 maxsamps = endpos - pos;
598 data = ckd_calloc(maxsamps,
sizeof(*data));
599 total = fread(data,
sizeof(*data), maxsamps, rawfh);
606 while (!feof(rawfh)) {
610 nread = fread(data,
sizeof(*data),
sizeof(data)/
sizeof(*data), rawfh);
625 E_ERROR(
"No search module is selected, did you forget to "
626 "specify a language model or grammar?\n");
630 ptmr_reset(&ps->
perf);
631 ptmr_start(&ps->
perf);
635 ps->
uttid = ckd_salloc(uttid);
640 sprintf(nuttid,
"%09u", ps->
uttno);
641 ps->
uttid = ckd_salloc(nuttid);
657 char *logfn = string_join(ps->
mfclogdir,
"/",
658 ps->
uttid,
".mfc", NULL);
660 E_INFO(
"Writing MFCC log file: %s\n", logfn);
661 if ((mfcfh = fopen(logfn,
"wb")) == NULL) {
662 E_ERROR_SYSTEM(
"Failed to open MFCC log file %s", logfn);
670 char *logfn = string_join(ps->
rawlogdir,
"/",
671 ps->
uttid,
".raw", NULL);
673 E_INFO(
"Writing raw audio log file: %s\n", logfn);
674 if ((rawfh = fopen(logfn,
"wb")) == NULL) {
675 E_ERROR_SYSTEM(
"Failed to open raw audio log file %s", logfn);
683 char *logfn = string_join(ps->
senlogdir,
"/",
684 ps->
uttid,
".sen", NULL);
686 E_INFO(
"Writing senone score log file: %s\n", logfn);
687 if ((senfh = fopen(logfn,
"wb")) == NULL) {
688 E_ERROR_SYSTEM(
"Failed to open senone score log file %s", logfn);
700 return ps_search_start(ps->
search);
715 if ((k = ps_search_step(ps->
search,
735 if ((nfr = ps_search_forward(ps)) < 0) {
757 E_ERROR(
"Failed to process data, utterance is not started. Use start_utt to start it\n");
769 &n_samples, full_utt)) < 0)
775 if ((nfr = ps_search_forward(ps)) < 0)
800 &n_frames, full_utt)) < 0)
806 if ((nfr = ps_search_forward(ps)) < 0)
822 if ((rv = ps_search_forward(ps)) < 0) {
823 ptmr_stop(&ps->
perf);
828 if ((rv = ps_search_finish(ps->
phone_loop)) < 0) {
829 ptmr_stop(&ps->
perf);
835 i < ps->acmod->output_frame; ++i)
836 ps_search_step(ps->
search, i);
838 if ((rv = ps_search_finish(ps->
search)) < 0) {
839 ptmr_stop(&ps->
perf);
842 ptmr_stop(&ps->
perf);
845 if (cmd_ln_boolean_r(ps->
config,
"-backtrace")) {
846 char const *uttid, *hyp;
851 E_INFO(
"%s: %s (%d)\n", uttid, hyp, score);
852 E_INFO_NOFN(
"%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n",
853 "word",
"start",
"end",
"pprob",
"ascr",
"lscr",
"lback");
858 int32 post, lscr, ascr, lback;
863 E_INFO_NOFN(
"%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n",
864 word, sf, ef, logmath_exp(
ps_get_logmath(ps), post), ascr, lscr, lback);
875 ptmr_start(&ps->
perf);
876 hyp = ps_search_hyp(ps->
search, out_best_score, NULL);
878 *out_uttid = ps->
uttid;
879 ptmr_stop(&ps->
perf);
888 ptmr_start(&ps->
perf);
889 hyp = ps_search_hyp(ps->
search, NULL, out_is_final);
890 ptmr_stop(&ps->
perf);
900 ptmr_start(&ps->
perf);
901 prob = ps_search_prob(ps->
search);
903 *out_uttid = ps->
uttid;
904 ptmr_stop(&ps->
perf);
913 ptmr_start(&ps->
perf);
914 itor = ps_search_seg_iter(ps->
search, out_best_score);
915 ptmr_stop(&ps->
perf);
922 return ps_search_seg_next(seg);
934 if (out_sf) *out_sf = seg->
sf;
935 if (out_ef) *out_ef = seg->
ef;
941 if (out_ascr) *out_ascr = seg->
ascr;
942 if (out_lscr) *out_lscr = seg->
lscr;
943 if (out_lback) *out_lback = seg->
lback;
950 ps_search_seg_free(seg);
956 return ps_search_lattice(ps->
search);
961 char const *ctx1,
char const *ctx2)
964 ngram_model_t *lmset;
977 if (0 != strcmp(ps_search_name(ps->
search),
"ngram")) {
986 w1 = ctx1 ? dict_wordid(ps_search_dict(ps->
search), ctx1) : -1;
987 w2 = ctx2 ? dict_wordid(ps_search_dict(ps->
search), ctx2) : -1;
1015 assert(nbest != NULL);
1017 if (nbest->top == NULL)
1019 if (out_score) *out_score = nbest->top->
score;
1026 if (nbest->top == NULL)
1028 if (out_score) *out_score = nbest->top->
score;
1040 double *out_ncpu,
double *out_nwall)
1044 frate = cmd_ln_int32_r(ps->
config,
"-frate");
1046 *out_ncpu = ps->
perf.t_cpu;
1047 *out_nwall = ps->
perf.t_elapsed;
1052 double *out_ncpu,
double *out_nwall)
1056 frate = cmd_ln_int32_r(ps->
config,
"-frate");
1057 *out_nspeech = (double)ps->
n_frame / frate;
1058 *out_ncpu = ps->
perf.t_tot_cpu;
1059 *out_nwall = ps->
perf.t_tot_elapsed;
1069 search->
acmod = acmod;
1075 search->
dict = dict_retain(dict);
1076 search->
start_wid = dict_startwid(dict);
1082 search->
dict = NULL;
1092 dict_free(search->
dict);
1096 search->
dict = dict_retain(dict);
1097 search->
start_wid = dict_startwid(dict);
1103 search->
dict = NULL;
1119 dict_free(search->
dict);