|
|
/******************************************************************************
* trees.cpp * *-----------* * *------------------------------------------------------------------------------ * Copyright (c) 1997 Entropic Research Laboratory, Inc. * Copyright (C) 1998 Entropic, Inc * Copyright (C) 2000 Microsoft Corporation Date: 03/02/00 - 12/5/00 * All Rights Reserved * ********************************************************************* mplumpe was PACOG ***/
#include "trees.h"
#include "list.h"
#include "clusters.h"
#include <assert.h>
#include <ctype.h>
#define MAX_QS_LEN 128
#define MAX_LINE 512
class CRegExp { public: CRegExp (); CRegExp (const char* string); bool Evaluate(const char* pszString);
private: char m_text[MAX_QS_LEN]; };
//----------------------------------------------------------
// Question set classes
//
class CQuest { public: CQuest& operator= (CQuest& rSrc) { m_pExpr = rSrc.m_pExpr; return *this; } int AddExpression (const char* pszLine); bool Matches (const char* pszString);
#ifdef _DEBUG_
void Debug(); #endif
private: CList<CRegExp> m_pExpr; };
//----------------------------------------------------------
//
//
class CQuestSet { public: bool Matches (const char* pszQuestTag, const char* pszTriph); bool AddQuestion ( const char* pszLine); void Sort(); #ifdef _DEBUG_
void Debug(); #endif
private: CList<CQuest> m_pQuest; };
//----------------------------------------------------------
// Tree classes
//
class CLeave { public: CLeave () {m_pszLeave[0] = '\0';}; CLeave (const char* pszLeaveValue); const char* Value(); private: char m_pszLeave[MAX_QS_LEN]; };
//----------------------------------------------------------
//
//
class CBranch { public: CBranch () { m_pszQuestion[0] = '\0'; m_iLeft = 0; m_iRight = 0; } CBranch( const char* pszQuestion, int iLeft, int iRight); int Left(); int Right(); const char* Question(); private: char m_pszQuestion[MAX_QS_LEN]; int m_iLeft; int m_iRight; };
//----------------------------------------------------------
//
//
class CTree { public: CTree& operator= (CTree& rSrc) { m_branches = rSrc.m_branches; m_terminals = rSrc.m_terminals; return *this; } int AddNode( const char* pszLine); const char* Traverse(CQuestSet* pQuestSet, const char* pszTriphone);
#ifdef _DEBUG_
void Debug(); #endif
private: CList<CBranch> m_branches; CList<CLeave> m_terminals; };
//----------------------------------------------------------
//
//
class CClustTreeImp : CClustTree { public: ~CClustTreeImp();
int LoadFromFile (FILE* fp); int GetNumStates (const char* pszTriphone); const char* TriphoneToCluster(const char* pszTriphone, int iState); #ifdef _DEBUG_
void Debug(); #endif
private: int ParseTree (const char* pszLine); int CentralPhone (const char *pszTriphone, char *pszThone);
CQuestSet* m_pQuestSet; CList<CTree> m_trees; };
/*****************************************************************************
* CLeave::CLeave * *----------------* * Description: * ******************************************************************* PACOG ***/ CLeave::CLeave (const char* pszLeaveValue) { strcpy(m_pszLeave, pszLeaveValue); } /*****************************************************************************
* CLeave::Value * *---------------* * Description: * ******************************************************************* PACOG ***/ const char* CLeave::Value() { return m_pszLeave; }
/*****************************************************************************
* CBranch::CBranch * *------------------* * Description: * ******************************************************************* PACOG ***/ CBranch::CBranch( const char* pszQuestion, int iLeft, int iRight) { strcpy(m_pszQuestion, pszQuestion); m_iLeft = iLeft; m_iRight = iRight; } /*****************************************************************************
* CBranch::Left * *---------------* * Description: * ******************************************************************* PACOG ***/ int CBranch::Left() { return m_iLeft; } /*****************************************************************************
* CBranch::Right * *----------------* * Description: * ******************************************************************* PACOG ***/ int CBranch::Right() { return m_iRight; } /*****************************************************************************
* CBranch::Question * *-------------------* * Description: * ******************************************************************* PACOG ***/ const char* CBranch::Question() { return m_pszQuestion; }
/*****************************************************************************
* CClustTree::ClassFactory * *--------------------------* * Description: * ******************************************************************* PACOG ***/ CClustTree* CClustTree::ClassFactory () { return new CClustTreeImp; }
/*****************************************************************************
* CClustTreeImp::~CClustTreeImp * *-------------------------------* * Description: * ******************************************************************* PACOG ***/ CClustTreeImp::~CClustTreeImp () { delete m_pQuestSet; }
/*****************************************************************************
* CClustTreeImp::LoadFromFile * *-----------------------------* * Description: * ******************************************************************* PACOG ***/ int CClustTreeImp::LoadFromFile (FILE* fp) { char line[MAX_LINE+1]; char *ptr; assert (fp);
if ((m_pQuestSet = new CQuestSet) == 0) { return 0; }
while (fgets(line, MAX_LINE, fp) && line[0]!='#') { if (line[strlen(line)-1]=='\r' || line[strlen(line)-1]=='\n') { line[strlen(line)-1]= '\0'; } ptr = line; while (*ptr && isspace (*ptr)) { ptr++; } if (strncmp(ptr, "QS ", 3)==0) { if (!m_pQuestSet->AddQuestion (ptr+3)) { return 0; } } else { if (!ParseTree (ptr)) { return 0; } } } m_pQuestSet->Sort(); m_trees.Sort();
#ifdef _DEBUG_
Debug(); #endif
return 1; } /*****************************************************************************
* CClustTreeImp::GetNumStates * *-----------------------------* * Description: * ******************************************************************* PACOG ***/ int CClustTreeImp::GetNumStates(const char* triphone) { char triphHtk[20]; char centralPhone[10]; char stateName[20]; int stateCount = 0;
strcpy(triphHtk, triphone);
if ( CentralPhone(triphHtk, centralPhone) ) { for (stateCount = 0; stateCount<3; stateCount++) { sprintf(stateName, "%s[%d]", centralPhone, stateCount+2);
CTree* tree; if ( ! m_trees.Find (stateName, &tree) ) { break; } } }
return stateCount; } /*****************************************************************************
* CClustTreeImp::TriphoneToCluster * *----------------------------------* * Description: * ******************************************************************* PACOG ***/ const char *CClustTreeImp::TriphoneToCluster (const char *triphone, int state) { char centralPhone[10]; char stateName[20]; char triphHtk[20];
assert (triphone); assert (0<=state && state<3); strcpy(triphHtk, triphone);
if ( CentralPhone(triphHtk, centralPhone) ) { sprintf(stateName, "%s[%d]", centralPhone, state+2); CTree* tree = 0; if ( m_trees.Find (stateName, &tree) ) { return tree->Traverse(m_pQuestSet, triphHtk); } } return 0; }
/*****************************************************************************
* CClustTreeImp::CentralPhone * *-----------------------------* * Description: * ******************************************************************* PACOG ***/ int CClustTreeImp::CentralPhone (const char *triphone, char *phone) { char *index1; char *index2; assert (phone); assert (triphone); index1 = strchr(triphone, '-'); if (index1) { index2 = strchr (++index1, '+'); } if ( index1 && index2 ) { strncpy ( phone, index1, index2-index1); phone[index2-index1] = '\0';
return 1; } return 0; } /*****************************************************************************
* CClustTreeImp::ParseTree * *--------------------------* * Description: * ******************************************************************* PACOG ***/ int CClustTreeImp::ParseTree (const char *ptr) { static int newTree = 1;
assert (ptr); if (!strlen (ptr)) { newTree = 1; } else if (strncmp(ptr,"{",1)==0) { newTree = 0; } else if (strncmp(ptr,"}",1)==0) { newTree = 1; } else { if (newTree ) { CTree tree; m_trees.PushBack(ptr, tree); newTree = 0; } else { m_trees.Back().AddNode(ptr); } } return 1; }
/*****************************************************************************
* CTree::AddNode * *----------------* * Description: * ******************************************************************* PACOG ***/
int CTree::AddNode (const char *line) { char aux1[50] = ""; char aux2[50] = ""; char *index1; char *index2; int leftIdx; int rightIdx; int i; assert (line); if (line[0]=='"') { // This is the final node (tree only has one cluster)
index1 = strchr(line+1, '"'); if (index1) { strncpy(aux1, line+1, index1 - line - 1); aux1[index1 - line - 1] = '\0'; CLeave terminal(aux1); m_terminals.PushBack("", terminal); } } else { //Node name
index1 = strchr(line, '\''); if (index1) { index2 = strchr(++index1, '\'');
strncpy(aux1, index1, index2 - index1); aux1[index2 - index1] = '\0'; } index1 = ++index2; while (*index1 && isspace (*index1)) { index1++; } //Left node
if (*index1 == '"') { index2 = strchr (++index1, '"');
strncpy(aux2, index1, index2 - index1); aux2[index2 - index1] = '\0';
CLeave terminal(aux2); m_terminals.PushBack("", terminal);
leftIdx = m_terminals.Size() - 1; index1 = ++index2; } else { if (*index1 == '-') { aux2[0]= *index1++; }
for (i=1 ; isdigit(*index1); i++) { aux2[i]= *index1++; } aux2[i]='\0';
leftIdx = atoi (aux2); } while (isspace(*++index1)) { //Empty loop
}
//Right node
if (*index1 == '"') { index2 = strchr (++index1, '"'); strncpy(aux2, index1, index2 - index1); aux2[index2 - index1] = '\0';
CLeave terminal(aux2); m_terminals.PushBack("", terminal);
rightIdx = m_terminals.Size() - 1; } else { if (*index1== '-') { aux2[0]= *index1++; } for (i=1; isdigit(*index1); i++) { aux2[i]= *index1++; } aux2[i]='\0';
rightIdx = atoi (aux2); }
CBranch node(aux1, leftIdx, rightIdx);
m_branches.PushBack("", node); } return 1; }
/*****************************************************************************
* CTree::Traverse * *-----------------* * Description: * ******************************************************************* PACOG ***/
const char *CTree::Traverse (CQuestSet* pQuestSet, const char *triph) { char *retVal = 0; int nodeIdx = 0; int nextIdx; assert (triph); if (m_branches.Size() == 0) { return m_terminals[0].Value(); } // Search until we find a leave
while (!retVal) { if (nodeIdx > m_branches.Size()) { return 0; } if (pQuestSet->Matches (m_branches[nodeIdx].Question(), triph)) { nextIdx = m_branches[nodeIdx].Right(); } else { nextIdx = m_branches[nodeIdx].Left(); }
if ( nextIdx >= 0) { retVal = (char *)m_terminals[nextIdx].Value(); } else { nodeIdx = -nextIdx; } } return retVal; }
/*****************************************************************************
* CRegExp::CRegExp * *------------------* * Description: * ******************************************************************* PACOG ***/ CRegExp::CRegExp () { m_text[0] = '\0'; }
/*****************************************************************************
* CRegExp::CRegExp * *------------------* * Description: * ******************************************************************* PACOG ***/ CRegExp::CRegExp (const char* regExp) { strcpy(m_text, regExp); }
/*****************************************************************************
* CRegExp::Evaluate * *-------------------* * Description: * ******************************************************************* PACOG ***/ bool CRegExp::Evaluate (const char *string) { const char *index1; const char *index2; int len; int i; int jump = 0; assert (string); len = strlen(m_text); index1 = string; for (i=0; i<len; i++) { if (m_text[i]=='*') { jump = 1; } else { if (jump) { // After a star, several characters can be skipped
index2 = strchr(index1, m_text[i]); if (index2 == NULL) { return 0; /* Next character not found, expresion not matched */ } index1 = ++index2; jump = 0; } else { // If not a star, next character must match
if (m_text[i] != *index1++) { return false; } } } } // If we complete the pass over the regexp string, we probably found a match
// If the last char in regexp is '*', the is match else,
// if both strings reached the end, is match
if (m_text[len-1]=='*' || !*index1) { return true; } return false; }
/*****************************************************************************
* CQuest::AddExpression * *-----------------------* * Description: * ******************************************************************* PACOG ***/ int CQuest::AddExpression (const char* line) { CRegExp regExp(line);
m_pExpr.PushBack("", regExp);
return 1; }
/*****************************************************************************
* CQuest::Matches * *-----------------* * Description: * ******************************************************************* PACOG ***/ bool CQuest::Matches (const char *triphone) { assert (triphone); for (int i=0; i<m_pExpr.Size(); i++) { if (m_pExpr[i].Evaluate (triphone)) { return true; } } return false; }
/*****************************************************************************
* CQuestSet::AddQuestion * *------------------------* * Description: * ******************************************************************* PACOG ***/ bool CQuestSet::AddQuestion (const char *line) { char name[30]; char aux[30]; const char *index1 = NULL; const char *index2 = NULL; assert (line); if (line!=NULL) { index1 = strchr(line,'\''); if (index1) { index2 = strchr(++index1, '\''); } if (index1 && index2) {
strncpy (name, index1, index2-index1); name[index2-index1] = '\0';
CQuest newQuestion;
do { line = index2+1; index1 = strchr (line,'"'); if (index1) { index2 = strchr (++index1, '"'); } if (index1 && index2) { strncpy(aux, index1, index2-index1); aux[index2-index1] = '\0'; newQuestion.AddExpression(aux); } } while (index1 && index2); return m_pQuest.PushBack (name, newQuestion); } } return false; }
/*****************************************************************************
* CQuestSet::Matches * *--------------------* * Description: * * Changes: * 12/5/00 Was getting pQuestion by reference, which forced a big * nested copy. Now getting a pointer which we can use * and discard. * ******************************************************************* mplumpe ***/ bool CQuestSet::Matches (const char* tag, const char* triph) { CQuest *pQuestion; if ( m_pQuest.Find(tag, &pQuestion) ) { return pQuestion->Matches (triph); }
return false; }
/*****************************************************************************
* CQuestSet::Sort * *-----------------* * Description: * ******************************************************************* PACOG ***/ void CQuestSet::Sort () { m_pQuest.Sort(); }
#ifdef _DEBUG_
/*****************************************************************************
* CClustTreeImp::Debug * *----------------------* * Description: * ******************************************************************* PACOG ***/
void CClustTreeImp::Debug () { m_questionSet.Debug(); for (int i=0; i<m_trees.size(); i++) { printf ("\nTrees[%ld]=%s", i, m_trees[i].Name()); m_trees[i].Debug(); } puts (""); }
/*****************************************************************************
* CTree::Debug * *--------------* * Description: * ******************************************************************* PACOG ***/
void CTree::Debug () { int idx;
for (int i=0; i<m_branches[i].size(); i++) { idx = m_branches[i].Left(); if (idx>=0) { printf("Left= %s ", m_terminals[i].Value()); } else { printf("Left= %ld ", -idx); }
idx = m_branches[i].Right(); if (idx>=0) { printf("Right= %s ", m_terminals[i].Value()); } else { printf("Right= %ld ", -idx); } } }
/*****************************************************************************
* CQuestSet::Debug * *------------------* * Description: * ******************************************************************* PACOG ***/
void CQuestSet::Debug () { for (int i=0; i<m_pQuest.size(); i++) { printf("Question[%ld]=%s\n", i, m_pQuest[i].GetName()); m_pQuest[i].Debug(); } }
/*****************************************************************************
* CQuest::Debug * *---------------* * Description: * ******************************************************************* PACOG ***/
void CQuest::Debug () { for (int i=0; j<m_pExpr.size(); i++) { printf("\texpr[%ld]=%s\n",i, m_pExpr[i].c_str() ); } }
#endif
|