Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

915 lines
21 KiB

  1. /******************************************************************************
  2. * trees.cpp *
  3. *-----------*
  4. *
  5. *------------------------------------------------------------------------------
  6. * Copyright (c) 1997 Entropic Research Laboratory, Inc.
  7. * Copyright (C) 1998 Entropic, Inc
  8. * Copyright (C) 2000 Microsoft Corporation Date: 03/02/00 - 12/5/00
  9. * All Rights Reserved
  10. *
  11. ********************************************************************* mplumpe was PACOG ***/
  12. #include "trees.h"
  13. #include "list.h"
  14. #include "clusters.h"
  15. #include <assert.h>
  16. #include <ctype.h>
  17. #define MAX_QS_LEN 128
  18. #define MAX_LINE 512
  19. class CRegExp
  20. {
  21. public:
  22. CRegExp ();
  23. CRegExp (const char* string);
  24. bool Evaluate(const char* pszString);
  25. private:
  26. char m_text[MAX_QS_LEN];
  27. };
  28. //----------------------------------------------------------
  29. // Question set classes
  30. //
  31. class CQuest
  32. {
  33. public:
  34. CQuest& operator= (CQuest& rSrc)
  35. {
  36. m_pExpr = rSrc.m_pExpr;
  37. return *this;
  38. }
  39. int AddExpression (const char* pszLine);
  40. bool Matches (const char* pszString);
  41. #ifdef _DEBUG_
  42. void Debug();
  43. #endif
  44. private:
  45. CList<CRegExp> m_pExpr;
  46. };
  47. //----------------------------------------------------------
  48. //
  49. //
  50. class CQuestSet
  51. {
  52. public:
  53. bool Matches (const char* pszQuestTag, const char* pszTriph);
  54. bool AddQuestion ( const char* pszLine);
  55. void Sort();
  56. #ifdef _DEBUG_
  57. void Debug();
  58. #endif
  59. private:
  60. CList<CQuest> m_pQuest;
  61. };
  62. //----------------------------------------------------------
  63. // Tree classes
  64. //
  65. class CLeave
  66. {
  67. public:
  68. CLeave () {m_pszLeave[0] = '\0';};
  69. CLeave (const char* pszLeaveValue);
  70. const char* Value();
  71. private:
  72. char m_pszLeave[MAX_QS_LEN];
  73. };
  74. //----------------------------------------------------------
  75. //
  76. //
  77. class CBranch
  78. {
  79. public:
  80. CBranch ()
  81. {
  82. m_pszQuestion[0] = '\0';
  83. m_iLeft = 0;
  84. m_iRight = 0;
  85. }
  86. CBranch( const char* pszQuestion, int iLeft, int iRight);
  87. int Left();
  88. int Right();
  89. const char* Question();
  90. private:
  91. char m_pszQuestion[MAX_QS_LEN];
  92. int m_iLeft;
  93. int m_iRight;
  94. };
  95. //----------------------------------------------------------
  96. //
  97. //
  98. class CTree
  99. {
  100. public:
  101. CTree& operator= (CTree& rSrc)
  102. {
  103. m_branches = rSrc.m_branches;
  104. m_terminals = rSrc.m_terminals;
  105. return *this;
  106. }
  107. int AddNode( const char* pszLine);
  108. const char* Traverse(CQuestSet* pQuestSet, const char* pszTriphone);
  109. #ifdef _DEBUG_
  110. void Debug();
  111. #endif
  112. private:
  113. CList<CBranch> m_branches;
  114. CList<CLeave> m_terminals;
  115. };
  116. //----------------------------------------------------------
  117. //
  118. //
  119. class CClustTreeImp : CClustTree
  120. {
  121. public:
  122. ~CClustTreeImp();
  123. int LoadFromFile (FILE* fp);
  124. int GetNumStates (const char* pszTriphone);
  125. const char* TriphoneToCluster(const char* pszTriphone, int iState);
  126. #ifdef _DEBUG_
  127. void Debug();
  128. #endif
  129. private:
  130. int ParseTree (const char* pszLine);
  131. int CentralPhone (const char *pszTriphone, char *pszThone);
  132. CQuestSet* m_pQuestSet;
  133. CList<CTree> m_trees;
  134. };
  135. /*****************************************************************************
  136. * CLeave::CLeave *
  137. *----------------*
  138. * Description:
  139. *
  140. ******************************************************************* PACOG ***/
  141. CLeave::CLeave (const char* pszLeaveValue)
  142. {
  143. strcpy(m_pszLeave, pszLeaveValue);
  144. }
  145. /*****************************************************************************
  146. * CLeave::Value *
  147. *---------------*
  148. * Description:
  149. *
  150. ******************************************************************* PACOG ***/
  151. const char* CLeave::Value()
  152. {
  153. return m_pszLeave;
  154. }
  155. /*****************************************************************************
  156. * CBranch::CBranch *
  157. *------------------*
  158. * Description:
  159. *
  160. ******************************************************************* PACOG ***/
  161. CBranch::CBranch( const char* pszQuestion, int iLeft, int iRight)
  162. {
  163. strcpy(m_pszQuestion, pszQuestion);
  164. m_iLeft = iLeft;
  165. m_iRight = iRight;
  166. }
  167. /*****************************************************************************
  168. * CBranch::Left *
  169. *---------------*
  170. * Description:
  171. *
  172. ******************************************************************* PACOG ***/
  173. int CBranch::Left()
  174. {
  175. return m_iLeft;
  176. }
  177. /*****************************************************************************
  178. * CBranch::Right *
  179. *----------------*
  180. * Description:
  181. *
  182. ******************************************************************* PACOG ***/
  183. int CBranch::Right()
  184. {
  185. return m_iRight;
  186. }
  187. /*****************************************************************************
  188. * CBranch::Question *
  189. *-------------------*
  190. * Description:
  191. *
  192. ******************************************************************* PACOG ***/
  193. const char* CBranch::Question()
  194. {
  195. return m_pszQuestion;
  196. }
  197. /*****************************************************************************
  198. * CClustTree::ClassFactory *
  199. *--------------------------*
  200. * Description:
  201. *
  202. ******************************************************************* PACOG ***/
  203. CClustTree* CClustTree::ClassFactory ()
  204. {
  205. return new CClustTreeImp;
  206. }
  207. /*****************************************************************************
  208. * CClustTreeImp::~CClustTreeImp *
  209. *-------------------------------*
  210. * Description:
  211. *
  212. ******************************************************************* PACOG ***/
  213. CClustTreeImp::~CClustTreeImp ()
  214. {
  215. delete m_pQuestSet;
  216. }
  217. /*****************************************************************************
  218. * CClustTreeImp::LoadFromFile *
  219. *-----------------------------*
  220. * Description:
  221. *
  222. ******************************************************************* PACOG ***/
  223. int CClustTreeImp::LoadFromFile (FILE* fp)
  224. {
  225. char line[MAX_LINE+1];
  226. char *ptr;
  227. assert (fp);
  228. if ((m_pQuestSet = new CQuestSet) == 0)
  229. {
  230. return 0;
  231. }
  232. while (fgets(line, MAX_LINE, fp) && line[0]!='#')
  233. {
  234. if (line[strlen(line)-1]=='\r' || line[strlen(line)-1]=='\n')
  235. {
  236. line[strlen(line)-1]= '\0';
  237. }
  238. ptr = line;
  239. while (*ptr && isspace (*ptr))
  240. {
  241. ptr++;
  242. }
  243. if (strncmp(ptr, "QS ", 3)==0)
  244. {
  245. if (!m_pQuestSet->AddQuestion (ptr+3))
  246. {
  247. return 0;
  248. }
  249. }
  250. else
  251. {
  252. if (!ParseTree (ptr))
  253. {
  254. return 0;
  255. }
  256. }
  257. }
  258. m_pQuestSet->Sort();
  259. m_trees.Sort();
  260. #ifdef _DEBUG_
  261. Debug();
  262. #endif
  263. return 1;
  264. }
  265. /*****************************************************************************
  266. * CClustTreeImp::GetNumStates *
  267. *-----------------------------*
  268. * Description:
  269. *
  270. ******************************************************************* PACOG ***/
  271. int CClustTreeImp::GetNumStates(const char* triphone)
  272. {
  273. char triphHtk[20];
  274. char centralPhone[10];
  275. char stateName[20];
  276. int stateCount = 0;
  277. strcpy(triphHtk, triphone);
  278. if ( CentralPhone(triphHtk, centralPhone) )
  279. {
  280. for (stateCount = 0; stateCount<3; stateCount++)
  281. {
  282. sprintf(stateName, "%s[%d]", centralPhone, stateCount+2);
  283. CTree* tree;
  284. if ( ! m_trees.Find (stateName, &tree) )
  285. {
  286. break;
  287. }
  288. }
  289. }
  290. return stateCount;
  291. }
  292. /*****************************************************************************
  293. * CClustTreeImp::TriphoneToCluster *
  294. *----------------------------------*
  295. * Description:
  296. *
  297. ******************************************************************* PACOG ***/
  298. const char *CClustTreeImp::TriphoneToCluster (const char *triphone, int state)
  299. {
  300. char centralPhone[10];
  301. char stateName[20];
  302. char triphHtk[20];
  303. assert (triphone);
  304. assert (0<=state && state<3);
  305. strcpy(triphHtk, triphone);
  306. if ( CentralPhone(triphHtk, centralPhone) )
  307. {
  308. sprintf(stateName, "%s[%d]", centralPhone, state+2);
  309. CTree* tree = 0;
  310. if ( m_trees.Find (stateName, &tree) )
  311. {
  312. return tree->Traverse(m_pQuestSet, triphHtk);
  313. }
  314. }
  315. return 0;
  316. }
  317. /*****************************************************************************
  318. * CClustTreeImp::CentralPhone *
  319. *-----------------------------*
  320. * Description:
  321. *
  322. ******************************************************************* PACOG ***/
  323. int CClustTreeImp::CentralPhone (const char *triphone, char *phone)
  324. {
  325. char *index1;
  326. char *index2;
  327. assert (phone);
  328. assert (triphone);
  329. index1 = strchr(triphone, '-');
  330. if (index1)
  331. {
  332. index2 = strchr (++index1, '+');
  333. }
  334. if ( index1 && index2 )
  335. {
  336. strncpy ( phone, index1, index2-index1);
  337. phone[index2-index1] = '\0';
  338. return 1;
  339. }
  340. return 0;
  341. }
  342. /*****************************************************************************
  343. * CClustTreeImp::ParseTree *
  344. *--------------------------*
  345. * Description:
  346. *
  347. ******************************************************************* PACOG ***/
  348. int CClustTreeImp::ParseTree (const char *ptr)
  349. {
  350. static int newTree = 1;
  351. assert (ptr);
  352. if (!strlen (ptr))
  353. {
  354. newTree = 1;
  355. }
  356. else if (strncmp(ptr,"{",1)==0)
  357. {
  358. newTree = 0;
  359. }
  360. else if (strncmp(ptr,"}",1)==0)
  361. {
  362. newTree = 1;
  363. }
  364. else
  365. {
  366. if (newTree )
  367. {
  368. CTree tree;
  369. m_trees.PushBack(ptr, tree);
  370. newTree = 0;
  371. }
  372. else
  373. {
  374. m_trees.Back().AddNode(ptr);
  375. }
  376. }
  377. return 1;
  378. }
  379. /*****************************************************************************
  380. * CTree::AddNode *
  381. *----------------*
  382. * Description:
  383. *
  384. ******************************************************************* PACOG ***/
  385. int CTree::AddNode (const char *line)
  386. {
  387. char aux1[50] = "";
  388. char aux2[50] = "";
  389. char *index1;
  390. char *index2;
  391. int leftIdx;
  392. int rightIdx;
  393. int i;
  394. assert (line);
  395. if (line[0]=='"')
  396. {
  397. // This is the final node (tree only has one cluster)
  398. index1 = strchr(line+1, '"');
  399. if (index1)
  400. {
  401. strncpy(aux1, line+1, index1 - line - 1);
  402. aux1[index1 - line - 1] = '\0';
  403. CLeave terminal(aux1);
  404. m_terminals.PushBack("", terminal);
  405. }
  406. }
  407. else
  408. {
  409. //Node name
  410. index1 = strchr(line, '\'');
  411. if (index1)
  412. {
  413. index2 = strchr(++index1, '\'');
  414. strncpy(aux1, index1, index2 - index1);
  415. aux1[index2 - index1] = '\0';
  416. }
  417. index1 = ++index2;
  418. while (*index1 && isspace (*index1))
  419. {
  420. index1++;
  421. }
  422. //Left node
  423. if (*index1 == '"')
  424. {
  425. index2 = strchr (++index1, '"');
  426. strncpy(aux2, index1, index2 - index1);
  427. aux2[index2 - index1] = '\0';
  428. CLeave terminal(aux2);
  429. m_terminals.PushBack("", terminal);
  430. leftIdx = m_terminals.Size() - 1;
  431. index1 = ++index2;
  432. }
  433. else
  434. {
  435. if (*index1 == '-')
  436. {
  437. aux2[0]= *index1++;
  438. }
  439. for (i=1 ; isdigit(*index1); i++)
  440. {
  441. aux2[i]= *index1++;
  442. }
  443. aux2[i]='\0';
  444. leftIdx = atoi (aux2);
  445. }
  446. while (isspace(*++index1))
  447. {
  448. //Empty loop
  449. }
  450. //Right node
  451. if (*index1 == '"')
  452. {
  453. index2 = strchr (++index1, '"');
  454. strncpy(aux2, index1, index2 - index1);
  455. aux2[index2 - index1] = '\0';
  456. CLeave terminal(aux2);
  457. m_terminals.PushBack("", terminal);
  458. rightIdx = m_terminals.Size() - 1;
  459. }
  460. else
  461. {
  462. if (*index1== '-')
  463. {
  464. aux2[0]= *index1++;
  465. }
  466. for (i=1; isdigit(*index1); i++)
  467. {
  468. aux2[i]= *index1++;
  469. }
  470. aux2[i]='\0';
  471. rightIdx = atoi (aux2);
  472. }
  473. CBranch node(aux1, leftIdx, rightIdx);
  474. m_branches.PushBack("", node);
  475. }
  476. return 1;
  477. }
  478. /*****************************************************************************
  479. * CTree::Traverse *
  480. *-----------------*
  481. * Description:
  482. *
  483. ******************************************************************* PACOG ***/
  484. const char *CTree::Traverse (CQuestSet* pQuestSet, const char *triph)
  485. {
  486. char *retVal = 0;
  487. int nodeIdx = 0;
  488. int nextIdx;
  489. assert (triph);
  490. if (m_branches.Size() == 0)
  491. {
  492. return m_terminals[0].Value();
  493. }
  494. // Search until we find a leave
  495. while (!retVal)
  496. {
  497. if (nodeIdx > m_branches.Size())
  498. {
  499. return 0;
  500. }
  501. if (pQuestSet->Matches (m_branches[nodeIdx].Question(), triph))
  502. {
  503. nextIdx = m_branches[nodeIdx].Right();
  504. }
  505. else
  506. {
  507. nextIdx = m_branches[nodeIdx].Left();
  508. }
  509. if ( nextIdx >= 0)
  510. {
  511. retVal = (char *)m_terminals[nextIdx].Value();
  512. }
  513. else
  514. {
  515. nodeIdx = -nextIdx;
  516. }
  517. }
  518. return retVal;
  519. }
  520. /*****************************************************************************
  521. * CRegExp::CRegExp *
  522. *------------------*
  523. * Description:
  524. *
  525. ******************************************************************* PACOG ***/
  526. CRegExp::CRegExp ()
  527. {
  528. m_text[0] = '\0';
  529. }
  530. /*****************************************************************************
  531. * CRegExp::CRegExp *
  532. *------------------*
  533. * Description:
  534. *
  535. ******************************************************************* PACOG ***/
  536. CRegExp::CRegExp (const char* regExp)
  537. {
  538. strcpy(m_text, regExp);
  539. }
  540. /*****************************************************************************
  541. * CRegExp::Evaluate *
  542. *-------------------*
  543. * Description:
  544. *
  545. ******************************************************************* PACOG ***/
  546. bool CRegExp::Evaluate (const char *string)
  547. {
  548. const char *index1;
  549. const char *index2;
  550. int len;
  551. int i;
  552. int jump = 0;
  553. assert (string);
  554. len = strlen(m_text);
  555. index1 = string;
  556. for (i=0; i<len; i++)
  557. {
  558. if (m_text[i]=='*')
  559. {
  560. jump = 1;
  561. }
  562. else
  563. {
  564. if (jump)
  565. {
  566. // After a star, several characters can be skipped
  567. index2 = strchr(index1, m_text[i]);
  568. if (index2 == NULL)
  569. {
  570. return 0; /* Next character not found, expresion not matched */
  571. }
  572. index1 = ++index2;
  573. jump = 0;
  574. }
  575. else
  576. {
  577. // If not a star, next character must match
  578. if (m_text[i] != *index1++)
  579. {
  580. return false;
  581. }
  582. }
  583. }
  584. }
  585. // If we complete the pass over the regexp string, we probably found a match
  586. // If the last char in regexp is '*', the is match else,
  587. // if both strings reached the end, is match
  588. if (m_text[len-1]=='*' || !*index1)
  589. {
  590. return true;
  591. }
  592. return false;
  593. }
  594. /*****************************************************************************
  595. * CQuest::AddExpression *
  596. *-----------------------*
  597. * Description:
  598. *
  599. ******************************************************************* PACOG ***/
  600. int CQuest::AddExpression (const char* line)
  601. {
  602. CRegExp regExp(line);
  603. m_pExpr.PushBack("", regExp);
  604. return 1;
  605. }
  606. /*****************************************************************************
  607. * CQuest::Matches *
  608. *-----------------*
  609. * Description:
  610. *
  611. ******************************************************************* PACOG ***/
  612. bool CQuest::Matches (const char *triphone)
  613. {
  614. assert (triphone);
  615. for (int i=0; i<m_pExpr.Size(); i++)
  616. {
  617. if (m_pExpr[i].Evaluate (triphone))
  618. {
  619. return true;
  620. }
  621. }
  622. return false;
  623. }
  624. /*****************************************************************************
  625. * CQuestSet::AddQuestion *
  626. *------------------------*
  627. * Description:
  628. *
  629. ******************************************************************* PACOG ***/
  630. bool CQuestSet::AddQuestion (const char *line)
  631. {
  632. char name[30];
  633. char aux[30];
  634. const char *index1 = NULL;
  635. const char *index2 = NULL;
  636. assert (line);
  637. if (line!=NULL)
  638. {
  639. index1 = strchr(line,'\'');
  640. if (index1)
  641. {
  642. index2 = strchr(++index1, '\'');
  643. }
  644. if (index1 && index2)
  645. {
  646. strncpy (name, index1, index2-index1);
  647. name[index2-index1] = '\0';
  648. CQuest newQuestion;
  649. do
  650. {
  651. line = index2+1;
  652. index1 = strchr (line,'"');
  653. if (index1)
  654. {
  655. index2 = strchr (++index1, '"');
  656. }
  657. if (index1 && index2)
  658. {
  659. strncpy(aux, index1, index2-index1);
  660. aux[index2-index1] = '\0';
  661. newQuestion.AddExpression(aux);
  662. }
  663. } while (index1 && index2);
  664. return m_pQuest.PushBack (name, newQuestion);
  665. }
  666. }
  667. return false;
  668. }
  669. /*****************************************************************************
  670. * CQuestSet::Matches *
  671. *--------------------*
  672. * Description:
  673. *
  674. * Changes:
  675. * 12/5/00 Was getting pQuestion by reference, which forced a big
  676. * nested copy. Now getting a pointer which we can use
  677. * and discard.
  678. *
  679. ******************************************************************* mplumpe ***/
  680. bool CQuestSet::Matches (const char* tag, const char* triph)
  681. {
  682. CQuest *pQuestion;
  683. if ( m_pQuest.Find(tag, &pQuestion) )
  684. {
  685. return pQuestion->Matches (triph);
  686. }
  687. return false;
  688. }
  689. /*****************************************************************************
  690. * CQuestSet::Sort *
  691. *-----------------*
  692. * Description:
  693. *
  694. ******************************************************************* PACOG ***/
  695. void CQuestSet::Sort ()
  696. {
  697. m_pQuest.Sort();
  698. }
  699. #ifdef _DEBUG_
  700. /*****************************************************************************
  701. * CClustTreeImp::Debug *
  702. *----------------------*
  703. * Description:
  704. *
  705. ******************************************************************* PACOG ***/
  706. void CClustTreeImp::Debug ()
  707. {
  708. m_questionSet.Debug();
  709. for (int i=0; i<m_trees.size(); i++)
  710. {
  711. printf ("\nTrees[%ld]=%s", i, m_trees[i].Name());
  712. m_trees[i].Debug();
  713. }
  714. puts ("");
  715. }
  716. /*****************************************************************************
  717. * CTree::Debug *
  718. *--------------*
  719. * Description:
  720. *
  721. ******************************************************************* PACOG ***/
  722. void CTree::Debug ()
  723. {
  724. int idx;
  725. for (int i=0; i<m_branches[i].size(); i++)
  726. {
  727. idx = m_branches[i].Left();
  728. if (idx>=0)
  729. {
  730. printf("Left= %s ", m_terminals[i].Value());
  731. }
  732. else
  733. {
  734. printf("Left= %ld ", -idx);
  735. }
  736. idx = m_branches[i].Right();
  737. if (idx>=0)
  738. {
  739. printf("Right= %s ", m_terminals[i].Value());
  740. }
  741. else
  742. {
  743. printf("Right= %ld ", -idx);
  744. }
  745. }
  746. }
  747. /*****************************************************************************
  748. * CQuestSet::Debug *
  749. *------------------*
  750. * Description:
  751. *
  752. ******************************************************************* PACOG ***/
  753. void CQuestSet::Debug ()
  754. {
  755. for (int i=0; i<m_pQuest.size(); i++)
  756. {
  757. printf("Question[%ld]=%s\n", i, m_pQuest[i].GetName());
  758. m_pQuest[i].Debug();
  759. }
  760. }
  761. /*****************************************************************************
  762. * CQuest::Debug *
  763. *---------------*
  764. * Description:
  765. *
  766. ******************************************************************* PACOG ***/
  767. void CQuest::Debug ()
  768. {
  769. for (int i=0; j<m_pExpr.size(); i++)
  770. {
  771. printf("\texpr[%ld]=%s\n",i, m_pExpr[i].c_str() );
  772. }
  773. }
  774. #endif