|
|
/*******************************************************************************
* LtsCart.cpp * *----------* * * ** WARNING ** * CART code for LTS. This code was created in MS Research and LiJ owns * the algorithm. YunusM eliminated the private heap used by this code * and used the new and delete operators instead. * * Created By: LIJ (MS Research) Date: 06/18/99 * Current Owner: Fil * * Copyright (C) 1999 Microsoft Corporation. All Rights Reserved *******************************************************************************/
//--- Includes --------------------------------------------------------------
#include "StdAfx.h"
#include "LtsCart.h"
#pragma warning(disable : 4100)
/* the following are for exceptions: single letter and NULL output */ static const char *bogus_pron_1033 = "B OW G AH S P R AH N AH N S IY EY SH AH N"; static const char *bogus_pron_1041 = "N A N I"; // what?
static const char *single_letter_pron_1033[52] = { "EY", "B IY", "S IY", "D IY", "IY", "EH F", "JH IY", "EY CH", "AY", "JH EY", "K EY", "EH L", "EH M", "EH N", "OW", "P IY", "K Y UW", "AA R", "EH S", "T IY", "Y UW", "V IY", "D AH B AX L Y UW", "EH K S", "W AY", "Z IY", //
// PLURAL SPELLINGS
//
"EY Z", "B IY Z", "S IY Z", "D IY Z", "IY Z", "EH F S", "JH IY Z", "EY CH AX Z", "AY Z", "JH EY Z", "K EY Z", "EH L Z", "EH M Z", "EH N Z", "OW Z", "P IY Z", "K Y UW Z", "AA R Z", "EH S AX Z", "T IY Z", "Y UW Z", "V IY Z", "D AH B AX L Y UW Z", "EH K S AX Z", "W AY Z", "Z IY Z" }; static const char *single_letter_pron_1041[52] = { "EE", "B II", "SH II", "D II", "II", "E H U", "J II", "EE CH I", "A I", "J EE", "K EE", "E R U", "E M U", "E N U", "OO", "P II", "K Y UU", "AA R U", "E S U", "T II", "Y UU", "B U I", "D A B U R Y UU", "E STOP K U S U", "W A I", "Z E STOP T O", //
// PLURAL SPELLINGS
//
"EE Z U", "B II Z U", "SH II Z U", "D II Z U", "II Z U", "E H U Z U", "J II Z U", "EE CH I Z U", "A I Z U", "J EE Z U", "K EE Z U", "E R U Z U", "E M U Z U", "E N U Z U", "OO Z U", "P II Z U", "K Y UU Z U", "AA R U Z U", "E S U Z U", "T II Z U", "Y UU Z U", "B U I Z U", "D A B U R Y UU Z U", "E STOP K U S U Z U", "W A I Z U", "Z E STOP T O Z U" };
/*
* not worthwhile to use binary search with only about 30 entries */ static int symbol_to_id(LTS_SYMTAB *tab, char *sym) { USES_CONVERSION; SPDBG_FUNC("symbol_to_id");
int i; for (i = 0; i < tab->n_symbols; i++) { if (CSTR_EQUAL == CompareString(MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT), NORM_IGNORECASE, A2T(tab->storage + tab->sym_idx[i]), -1, A2T(sym), -1)) { return i; } } return NO_SYMBOL; } // static int symbol_to_id(LTS_SYMTAB *tab, char *sym)
static char *id_to_symbol(LTS_SYMTAB *tab, int id) { SPDBG_FUNC("id_to_symbol");
if (id < 0 || id > tab->n_symbols) { return NULL; } else { return tab->storage + tab->sym_idx[id]; } } // static char *id_to_symbol(LTS_SYMTAB *tab, int id)
__inline void ODS (const char *format, ...) { #ifdef _DEBUG
SPDBG_FUNC("ODS");
va_list arglist; va_start (arglist, format);
char buf[2048]; _vsnprintf(buf, 2048, format, arglist); OutputDebugStringA(buf);
va_end (arglist); #endif
}
__inline int ans_simp_question (LTS_FEATURE *feat, SIMPLE_QUESTION question, LTS_SAMPLE *sample) { SPDBG_FUNC("ans_simp_question");
SYMBOL id; int *phones = feat[question.questype].feature[question.feature];
SAMPLE_GET_CONTEXT(sample, question.questype, question.context, question.offset, id);
return (TST_BIT(phones, id) ? TRUE : FALSE); } // __inline int ans_simp_question (LTS_FEATURE *feat, SIMPLE_QUESTION question,
static int product_eval (LTS_FEATURE *feat, char *term, LTS_SAMPLE *sample) { SPDBG_FUNC("product_eval");
int negate, result; SIMPLE_QUESTION ques; char *cptr;
cptr = term; while (TRUE) { /* negation sign */ if (*cptr == '~') { negate = TRUE; cptr++; } else { negate = FALSE; }
if (!isdigit(*cptr)) { //quit (-1, "Invalid product in product_eval\n");
// OutputDebugString("Invalid product in product_eval\n");
return FALSE; }
for (result = *cptr++ - '0'; isdigit (*cptr); cptr++) { result = result * 10 + (*cptr - '0'); }
QUES_DECODE(result, ques.questype, ques.context, ques.offset, ques.feature); if ((negate ^ ans_simp_question (feat, ques, sample)) == FALSE) { return FALSE; }
if (*cptr == '\0') { break; } if (*cptr++ != '&') { //quit (-1, "product_eval: syntax error in product term %s\n", term);
/*
char szTemp[512];
sprintf(szTemp, "product_eval: syntax error in product term %s\n", term); OutputDebugString(szTemp); */ return FALSE; } }
return TRUE; } // static int product_eval (LTS_FEATURE *feat, char *term, LTS_SAMPLE *sample)
static int ans_comp_question(LTS_FEATURE *feat, char *prod, LTS_SAMPLE *sample) { SPDBG_FUNC("ans_comp_question");
int i, num_products, limit; char *cptr, string[LONGEST_STR], *products[MAX_PRODUCTS];
strcpy(string, prod); for (cptr = string, num_products = 1; *cptr != '\0'; cptr++) { if (*cptr == '|') num_products++; }
if (num_products > MAX_PRODUCTS) { //quit(1, "please increase MAX_PRODUCTS up to %d at least\n", num_products);
/*
char szTemp[256]; sprintf(szTemp, "please increase MAX_PRODUCTS up to %d at least\n", num_products); OutputDebugString(szTemp); */
return FALSE; }
for (i = 0, limit = num_products -1, cptr = string; ; i++) { products[i] = cptr++; if (i == limit) { break; }
for (; *cptr != '|'; cptr++) {}; *cptr++ = '\0'; }
for (i = 0; i < num_products; i++) { if (product_eval (feat, products[i], sample) == TRUE) { return TRUE; } }
return FALSE; } // static int ans_comp_question(LTS_FEATURE *feat, char *prod,
static T_NODE *find_leaf(LTS_FEATURE *feat, T_NODE *root, LTS_SAMPLE *sample) { SPDBG_FUNC("find_leaf");
if (!root->yes_child) { return root; } else if (ans_comp_question(feat, root->prod, sample)) { return find_leaf(feat, root->yes_child, sample); } else { return find_leaf(feat, root->no_child, sample); } } // static T_NODE *find_leaf(LTS_FEATURE *feat, T_NODE *root, LTS_SAMPLE *sample)
static int lts_product_eval (LTS_FEATURE *feat, LTS_PROD *term, LTS_SAMPLE *sample, LTS_PROD **next) { SPDBG_FUNC("lts_product_eval");
int negate, result; SIMPLE_QUESTION ques; LTS_PROD *cptr = term;
while (TRUE) { if ((*cptr) & PROD_NEG) { negate = TRUE; result = (*cptr) ^ PROD_NEG; } else { negate = FALSE; result = (*cptr); }
QUES_DECODE(result, ques.questype, ques.context, ques.offset, ques.feature); if ((negate ^ ans_simp_question (feat, ques, sample)) == FALSE) { while (*cptr != PROD_TERM && *cptr != QUES_TERM) { cptr++; } if (*cptr == QUES_TERM) { *next = NULL; } else { *next = cptr + 1; } return FALSE; }
cptr++; if (*cptr == QUES_TERM) { *next = NULL; break; } else if (*cptr == PROD_TERM) { *next = cptr + 1; break; } }
return TRUE; } // static int lts_product_eval (LTS_FEATURE *feat, LTS_PROD *term,
static int lts_ans_comp_question(LTS_TREE UNALIGNED *tree, LTS_FEATURE *feat, int idx, LTS_SAMPLE *sample) { SPDBG_FUNC("lts_ans_comp_question");
LTS_PROD *next, *term = (LTS_PROD *) ((char *) tree->p_prod + idx);
while (TRUE) { if (lts_product_eval (feat, term, sample, &next) == TRUE) { return TRUE; } if (next == NULL) { break; } term = next; }
return FALSE; } // static int lts_ans_comp_question(LTS_TREE *tree, LTS_FEATURE *feat,
static LTS_NODE *lts_find_leaf(LTS_TREE UNALIGNED *tree, LTS_FEATURE *feat, LTS_NODE *root, LTS_SAMPLE *sample) { SPDBG_FUNC("lts_find_leaf");
if (IS_LEAF_NODE(root)) { return root; } else if (lts_ans_comp_question(tree, feat, ((LTS_NODE UNALIGNED *)root)->idx, sample)) { return lts_find_leaf(tree, feat, root + ((LTS_NODE UNALIGNED *)root)->yes, sample); } else { return lts_find_leaf(tree, feat, root + ((LTS_NODE UNALIGNED *)root)->yes + 1, sample); } } // static LTS_NODE *lts_find_leaf(LTS_TREE *tree, LTS_FEATURE *feat,
static LTS_DIST *lts_find_leaf_count(LTS_FOREST *l_forest, SYMBOL *pIn, SYMBOL *pOut) { SPDBG_FUNC("lts_find_leaf_count");
LTS_TREE UNALIGNED *tree = l_forest->tree[*pIn]; LTS_NODE UNALIGNED *leaf; LTS_SAMPLE sample;
/*
* construct a sample in order to share all the code with training */ sample.pIn = pIn; sample.pOut = pOut;
/* *pOut cannot be NULL_SYMBOL_ID */ *pOut = NULL_SYMBOL_ID + 1;
leaf = lts_find_leaf(tree, l_forest->features, &(tree->nodes[0]), &sample); return (LTS_DIST *) ((char *)tree->p_dist + leaf->idx); } // static LTS_DIST *lts_find_leaf_count(LTS_FOREST *l_forest, SYMBOL *pIn,
static LTS_OUT_RESULT *allocate_out_result(LTS_FOREST *l_forest) { SPDBG_FUNC("allocate_out_result");
LTS_OUT_RESULT *res = new LTS_OUT_RESULT; if (res) { res->out_strings = new LTS_OUT_STRING *[MAX_ALT_STRINGS]; if (res->out_strings) { res->num_allocated_strings = MAX_ALT_STRINGS; res->num_strings = 0; } else { delete res; res = NULL; } }
return res; } // static LTS_OUT_RESULT *allocate_out_result(LTS_FOREST *l_forest)
static void free_out_result(LTS_FOREST *l_forest, LTS_OUT_RESULT *res) { SPDBG_FUNC("free_out_result");
int i;
for (i = 0; i < res->num_strings; i++) { delete res->out_strings[i]; } if (res->num_allocated_strings == MAX_ALT_STRINGS) { delete res->out_strings; } else { free(res->out_strings); /* dirty */ }
delete res; } // static void free_out_result(LTS_FOREST *l_forest, LTS_OUT_RESULT *res)
static bool reallocate_out_result(LTS_FOREST *l_forest, LTS_OUT_RESULT *res, int min) { SPDBG_FUNC("reallocate_out_result");
int s = res->num_allocated_strings, old_size = s; LTS_OUT_STRING **p;
while (s < min) s += INC_ALT_STRINGS; p = res->out_strings;
res->out_strings = (LTS_OUT_STRING **) calloc(s, sizeof(LTS_OUT_STRING *)); if (!res->out_strings) { return false; }
memcpy(res->out_strings, p, old_size * sizeof(LTS_OUT_STRING *));
if (old_size == MAX_ALT_STRINGS) { delete p; } else { free(p); }
res->num_allocated_strings = s; ODS("increased out_strings to %d in order to meet %d\n", s, min);
return true; } // static void reallocate_out_result(LTS_FOREST *l_forest, LTS_OUT_RESULT *res,
static bool grow_out_result(LTS_FOREST *l_forest, LTS_OUT_RESULT *res, SYMBOL i, int count, float inv_sum, LTS_OUT_RESULT *tmpRes) { SPDBG_FUNC("grow_out_result");
int j;
if (res->num_strings + tmpRes->num_strings >= res->num_allocated_strings) { if (!reallocate_out_result(l_forest, res, res->num_strings + tmpRes->num_strings)) { return false; } } for (j = 0; j < tmpRes->num_strings; j++) { SYMBOL *psrc = tmpRes->out_strings[j]->psym; SYMBOL *ptgt; res->out_strings[res->num_strings + j] = new LTS_OUT_STRING; if (!res->out_strings) { return false; }
ptgt = res->out_strings[res->num_strings + j]->psym; *ptgt++ = i; while (*psrc != NULL_SYMBOL_ID) { *ptgt++ = *psrc++; } *ptgt++ = NULL_SYMBOL_ID; res->out_strings[res->num_strings + j]->prob = count * inv_sum * tmpRes->out_strings[j]->prob; } res->num_strings += tmpRes->num_strings; free_out_result(l_forest, tmpRes);
return true; } // static void grow_out_result(LTS_FOREST *l_forest, LTS_OUT_RESULT *res,
static LTS_OUT_RESULT *gen_one_output(LTS_FOREST *l_forest, int len, SYMBOL *input_id, int in_index, SYMBOL *output_id, float cutoff) { SPDBG_FUNC("gen_one_output");
SYMBOL out[SP_MAX_WORD_LENGTH], *pOut; LTS_OUT_RESULT *res = allocate_out_result(l_forest); if (!res) { return NULL; }
int sum, i, dim; LTS_DIST UNALIGNED *pdf; LTS_PAIR UNALIGNED *l_pair, *lp; float cut, inv_sum;
/*
* copy output_id to local */ SYMBOL *psrc = output_id - 1, *ptgt = out; while (*psrc != NULL_SYMBOL_ID) psrc--; while (psrc != output_id) *ptgt++ = *psrc++; pOut = ptgt; /* sanity check */ if (pOut - out != in_index + 1) { // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
int *z=0; z[0]=z[1]; }
if (in_index == len - 1) { pdf = lts_find_leaf_count(l_forest, input_id + in_index, pOut); l_pair = &(pdf->p_pair); dim = pdf->c_dists; for (lp = l_pair, sum = 0, i = 0; i < dim; i++, lp++) { sum += lp->cnt; } SPDBG_ASSERT(sum > 0); inv_sum = 1.0f / sum; cut = cutoff * sum; for (lp = l_pair, i = 0; i < dim; i++, lp++) { if ((float)(lp->cnt) > cut) { res->out_strings[res->num_strings] = new LTS_OUT_STRING; if (NULL == res->out_strings[res->num_strings]) { return NULL; } res->out_strings[res->num_strings]->psym[0] = (SYMBOL) lp->id; res->out_strings[res->num_strings]->psym[1] = NULL_SYMBOL_ID; res->out_strings[res->num_strings]->prob = lp->cnt * inv_sum; res->num_strings++; } /* cut */ } } else { LTS_OUT_RESULT *tmpRes;
pdf = lts_find_leaf_count(l_forest, input_id + in_index, pOut); dim = pdf->c_dists; l_pair = &(pdf->p_pair); for (lp = l_pair, sum = 0, i = 0; i < dim; i++, lp++) { sum += lp->cnt; } SPDBG_ASSERT(sum > 0);
inv_sum = 1.0f / sum; cut = cutoff * sum; for (lp = l_pair, i = 0; i < dim; i++, lp++) { if ((float)(lp->cnt) > cut) { SYMBOL *pTmpOut = pOut + 1; *pOut = (SYMBOL) lp->id; tmpRes = gen_one_output(l_forest, len, input_id, in_index + 1, pTmpOut, cutoff); if (!tmpRes) { return NULL; }
if (!grow_out_result(l_forest, res, (SYMBOL)(lp->id), lp->cnt, inv_sum, tmpRes)) { return NULL; } } } /* i */ } /* else */
return res; } // static LTS_OUT_RESULT *gen_one_output(LTS_FOREST *l_forest, int len,
static int comp_out_result_prob(const void *vp1, const void *vp2) { SPDBG_FUNC("comp_out_result_prob");
LTS_OUT_STRING **p1 = (LTS_OUT_STRING **) vp1, **p2 = (LTS_OUT_STRING **) vp2;
if ((*p1)->prob > (*p2)->prob) { return -1; } else if ((*p1)->prob < (*p2)->prob) { return 1; } else { return 0; } } // static int comp_out_result_prob(const void *vp1, const void *vp2)
static void lts_fill_out_buffer(LTS_FOREST *l_forest, LTS_OUT_RESULT *out, char *word) { SPDBG_FUNC("lts_fill_out_buffer");
int i, j, n; float inv_sum, sum = 0.0f; char phnstr[LONGEST_STR]; char *tmp; LTS_SYMTAB *tab = l_forest->symbols;
if (out == NULL) { return; }
if (word) { strcpy(l_forest->out.word, word); } else { l_forest->out.word[0] = 0; }
/* normalize probabilities */ for (i = 0; i < out->num_strings; i++) { sum += out->out_strings[i]->prob; } inv_sum = 1.0f / sum; for (i = 0; i < out->num_strings; i++) { out->out_strings[i]->prob *= inv_sum; }
/*
* sort them according to the prob field */ qsort(out->out_strings, out->num_strings, sizeof(LTS_OUT_STRING *), &comp_out_result_prob);
if (out->num_strings > MAX_OUTPUT_STRINGS - l_forest->out.num_prons) { n = MAX_OUTPUT_STRINGS - l_forest->out.num_prons; for (sum = 0.0f, i = 0; i < n; i++) { sum += out->out_strings[i]->prob; } inv_sum = 1.0f / sum; for (i = 0; i < n; i++) { out->out_strings[i]->prob *= inv_sum; } } else { n = out->num_strings; }
for (j = l_forest->out.num_prons, i = 0; i < n; i++) { SYMBOL *p = out->out_strings[i]->psym; char *psrc, *ptgt;
if (out->out_strings[i]->prob < MIN_OUT_PROB) { continue; }
phnstr[0] = 0; l_forest->out.pron[j].prob = out->out_strings[i]->prob;
while (*p != NULL_SYMBOL_ID) { tmp = id_to_symbol(&(tab[OUTPUT]), *p++); SPDBG_ASSERT(tmp); if (tmp) { strcat(phnstr, tmp); strcat(phnstr, " "); } }
psrc = phnstr; ptgt = l_forest->out.pron[j].pstr; while (*psrc) { if (*psrc != '#' && *psrc != '_') { *ptgt++ = *psrc++; } else if (*psrc == '_') { *ptgt++ = ' '; psrc++; } else { psrc += 2; /* skip an extra space */ } /* extreme case, truncate it */ if (ptgt - l_forest->out.pron[j].pstr >= SP_MAX_PRON_LENGTH) { for (ptgt--; !isspace(*ptgt); ptgt--) {}; /* never output partial phone */ ptgt++; break; } } // output could contain only '# '
if (ptgt > l_forest->out.pron[j].pstr && *(ptgt - 1) == ' ') { *(ptgt - 1) = 0; /* remove the last space */ } else { *ptgt = 0; /* shouldn't happen unless ptgt didn't move */ } if (ptgt > l_forest->out.pron[j].pstr) { j++; } } /* i */
if (j <= MAX_OUTPUT_STRINGS) { l_forest->out.num_prons = j; } else { l_forest->out.num_prons = MAX_OUTPUT_STRINGS; // should never happen
}
free_out_result(l_forest, out); } // static void lts_fill_out_buffer(LTS_FOREST *l_forest, LTS_OUT_RESULT *out,
void assign_a_fixed_pron(LTS_OUTPUT *out, const char *pron, char *word) { SPDBG_FUNC("assign_a_fixed_pron");
out->num_prons = 1; strcpy(out->word, word); out->pron[0].prob = 1.0f; if (strlen(pron) < SP_MAX_PRON_LENGTH) { strcpy(out->pron[0].pstr, pron); } else { char *p; strncpy(out->pron[0].pstr, pron, SP_MAX_PRON_LENGTH); p = &(out->pron[0].pstr[SP_MAX_PRON_LENGTH - 1]); while (!isspace(*p)) { p--; /* truncate the last partial phoneme */ } *p = 0; } } // void assign_a_fixed_pron(LTS_OUTPUT *out, char *pron, char *word)
inline BOOL IsCharInRangeA(int ch, int chMin, int chMax) { return (unsigned)(ch - chMin) <= (unsigned)(chMax - chMin); }
void assign_a_spelling_pron(LTS_OUTPUT *out, const char * single_letter_pron[52], char *word) { SPDBG_FUNC("assign_a_spelling_pron");
char *p; int cchPron = 0;
strcpy(out->word, word); if (ispunct(*word)) { p = word + 1; } else { p = word; }
out->num_prons = 1; out->pron[0].prob = 1.0f; out->pron[0].pstr[0] = 0;
char * pchPron = out->pron[0].pstr;
while (*p) { int cPOffset = 0; // 0 for single letter, 26 for plurals
int c = *p++;
// Lowercaseify, and skip over non-letters
if (IsCharInRangeA(c, 'A', 'Z')) { c += 'a' - 'A'; } else if (!IsCharInRangeA(c, 'a', 'z')) { continue; }
// Check if the next two characters are 'S (apostrophe S). Include the following cases: words ending in 's 'S s' S'
// If they are we use a the plural pronunciation for the letter and skip over the letter and 'S
if ((p[0] == '\'') && ((0 == p[1] && 's' == c) || 's' == p[1] || 'S' == p[1])) { cPOffset = 26; p += p[1] ? 1 : 0; // skip 'S
}
// Make sure the string isn't too long accounting for the new phone and seperator
const char * const pchPronT = single_letter_pron[cPOffset + c - 'a']; const int cchPronT = strlen(pchPronT);
if ((cchPron + 1 + cchPronT) < (SP_MAX_PRON_LENGTH - 1)) // +1 for separating space, -1 for terminating NUL
{ strcpy(pchPron + cchPron, pchPronT);
cchPron += cchPronT;
pchPron[cchPron++] = ' '; } else { break; } }
if (cchPron) { pchPron[cchPron - 1] = 0; // trim trailing space char
} }
HRESULT LtscartGetPron(LTS_FOREST *l_forest, char *word, LTS_OUTPUT **ppLtsOutput) { SPDBG_FUNC("LtscartGetPron");
HRESULT hr = S_OK; LTS_OUT_RESULT *pres = NULL; char *p, *base; SYMBOL buffer[LONGEST_STR], *pbuf = buffer + 1; int len, id, hasvowel = 0, allcapital = 1;
l_forest->out.num_prons = 0; buffer[0] = NULL_SYMBOL_ID; len = 0;
if (word == NULL || (base = strtok(word, " \t\n")) == NULL) { assign_a_fixed_pron(&(l_forest->out), l_forest->bogus_pron, "NUL"); *ppLtsOutput = &(l_forest->out); return S_FALSE; } else { base = strtok(word, " \t\n"); if (ispunct(*base)) { for (p = base; *p && ispunct(*p); p++) {}; } else { p = base; } }
char ach[2]; ach[1] = 0;
while (*p) { const int d = *p++; const int c = tolower(d);
if (!hasvowel && (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u' || c == 'y')) { hasvowel = 1; } if (allcapital && d == c) { allcapital = 0; }
ach[0] = (char)c;
if ((id = symbol_to_id (&(l_forest->symbols[INPUT]), ach)) == NO_SYMBOL || id == NULL_SYMBOL_ID) { ODS("cannot find the symbol %c, skip!\n", c); continue; }
pbuf[len++] = (SYMBOL) id; }
pbuf[len] = NULL_SYMBOL_ID; if (len >= SP_MAX_WORD_LENGTH || len <= 0) { // fill in bogus pron below
} else if (len == 1) { LTS_SYMTAB *tab = l_forest->symbols; char *p = id_to_symbol(&(tab[INPUT]), pbuf[0]); int c = tolower(p[0]); if (c >= 'a' && c <= 'z') { assign_a_fixed_pron(&(l_forest->out), l_forest->single_letter_pron[c - 'a'], word); } } else if (!hasvowel) { assign_a_spelling_pron(&(l_forest->out), l_forest->single_letter_pron, word); } else { if (allcapital) { assign_a_spelling_pron(&(l_forest->out), l_forest->single_letter_pron, word); } pres = gen_one_output(l_forest, len, pbuf, 0, pbuf, DEFAULT_PRUNE); if (!pres) { return E_OUTOFMEMORY; }
lts_fill_out_buffer(l_forest, pres, word); }
if (l_forest->out.num_prons == 0) { hr = S_FALSE;
assign_a_fixed_pron(&(l_forest->out), l_forest->bogus_pron, word); }
*ppLtsOutput = &(l_forest->out);
SPDBG_RETURN(hr); } /* LtscartGetPron */
LTS_FOREST *LtscartReadData (LCID lcid, PBYTE map_addr) { SPDBG_FUNC("LtscartReadData");
int i; LTS_FOREST *l_forest; LTS_SYMTAB *tab; LTS_FEATURE *feat; int output = 0;
l_forest = (LTS_FOREST *) calloc(1, sizeof(LTS_FOREST)); if (!l_forest) { return NULL; }
if (lcid == 1033) { l_forest->bogus_pron = bogus_pron_1033; l_forest->single_letter_pron = single_letter_pron_1033; } else if (lcid == 1041) { l_forest->bogus_pron = bogus_pron_1041; l_forest->single_letter_pron = single_letter_pron_1041; } else { return NULL; }
//read in the symbol table
l_forest->symbols = (LTS_SYMTAB *) calloc(2, sizeof(LTS_SYMTAB)); if (!l_forest->symbols) { return NULL; }
tab = &(l_forest->symbols[INPUT]); CopyMemory(&(tab->n_symbols), map_addr + output, sizeof(int)); output += sizeof(int);
tab->sym_idx = (int *)(map_addr + output); output += tab->n_symbols * sizeof(int);
CopyMemory(&(tab->n_bytes), map_addr + output, sizeof(int)); output += sizeof(int);
tab->storage = (char*)(map_addr + output); output += tab->n_bytes * sizeof(char);
tab = &(l_forest->symbols[OUTPUT]); CopyMemory(&(tab->n_symbols), map_addr + output, sizeof(int)); output += sizeof(int);
tab->sym_idx = (int*)(map_addr + output); output += tab->n_symbols * sizeof(int); CopyMemory(&(tab->n_bytes), map_addr + output, sizeof(int)); output += sizeof(int);
tab->storage = (char*)(map_addr + output); output += tab->n_bytes * sizeof(char);
// read in the feature vector
l_forest->features = (LTS_FEATURE *) calloc(2, sizeof(LTS_FEATURE)); if (!l_forest->features) { return NULL; }
feat = &(l_forest->features[INPUT]);
CopyMemory(&(feat->n_feat), map_addr + output, sizeof(int)); output += sizeof(int);
CopyMemory(&(feat->dim), map_addr + output, sizeof(int)); output += sizeof(int);
feat->feature = (int **) calloc(feat->n_feat, sizeof(int *)); if (!feat->feature) { return NULL; }
for (i = 0; i < feat->n_feat; i++) { feat->feature[i] = (int*)(map_addr + output); output += feat->dim * sizeof(int); }
feat = &(l_forest->features[OUTPUT]); CopyMemory(&(feat->n_feat), map_addr + output, sizeof(int)); output += sizeof(int);
CopyMemory(&(feat->dim), map_addr + output, sizeof(int)); output += sizeof(int);
feat->feature = (int **) calloc(feat->n_feat, sizeof(int *)); if (!feat->feature) { return NULL; }
for (i = 0; i < feat->n_feat; i++) { feat->feature[i] = (int*)(map_addr + output); output += feat->dim * sizeof(int); }
/*
* read in the tree */ l_forest->tree = (LTS_TREE **) calloc(l_forest->symbols[INPUT].n_symbols, sizeof(LTS_TREE *)); if (!l_forest->tree) { return NULL; }
for (i = 1; i < l_forest->symbols[INPUT].n_symbols; i++) { LTS_TREE *l_root; l_forest->tree[i] = l_root = (LTS_TREE *) calloc(1, sizeof(LTS_TREE)); if (!l_root) { return NULL; }
CopyMemory(&(l_root->n_nodes), map_addr + output, sizeof(int)); output += sizeof(int);
l_root->nodes = (LTS_NODE*)(map_addr + output); output += l_root->n_nodes * sizeof(LTS_NODE);
CopyMemory(&(l_root->size_dist), map_addr + output, sizeof(int)); output += sizeof(int);
l_root->p_dist = (LTS_DIST*)(map_addr + output); output += l_root->size_dist * sizeof(char);
CopyMemory(&(l_root->size_prod), map_addr + output, sizeof(int)); output += sizeof(int);
if (l_root->size_prod > 0) { l_root->p_prod = (LTS_PROD*)(map_addr + output); output += l_root->size_prod * sizeof(char); } }
return l_forest; } // LTS_FOREST *LtscartReadData(char *forest_image, HANDLE *hFile1,
void LtscartFreeData(LTS_FOREST *l_forest) { SPDBG_FUNC("LtscartFreeData");
for (int i = 1; i < l_forest->symbols[INPUT].n_symbols; i++) { free(l_forest->tree[i]); } free(l_forest->tree);
free(l_forest->features[INPUT].feature); free(l_forest->features[OUTPUT].feature); free(l_forest->features);
free(l_forest->symbols);
free(l_forest); } // void LtscartFreeData(LTS_FOREST *l_forest, HANDLE m_hFile,
|