Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

927 lines
30 KiB

  1. /******************************************************************************
  2. * UnitSearch.cpp *
  3. *----------------*
  4. *
  5. *------------------------------------------------------------------------------
  6. * Copyright (c) 1997 Entropic Research Laboratory, Inc.
  7. * Copyright (C) 1998 Entropic, Inc
  8. * Copyright (C) 2000 Microsoft Corporation Date: 03/02/00 - 12/4/00
  9. * All Rights Reserved
  10. *
  11. ********************************************************************* mplumpe was PACOG ***/
  12. #include "UnitSearch.h"
  13. #include "clusters.h"
  14. #include "vqtable.h"
  15. #include "trees.h"
  16. #include "SpeakerData.h"
  17. #include "backendInt.h"
  18. #include <float.h>
  19. #include <math.h>
  20. #include <assert.h>
  21. //
  22. // definitions for DPCand and DPLink moved to UnitSearch.h so they can be
  23. // used in the CUnitSearch class. mplumpe 12/5/00
  24. //
  25. static const double s_dDefaultF0Weight = 0.5F;
  26. static const double s_dDefaultDurWeight = 0.1F;
  27. static const double s_dDefaultRmsWeight = 0.3F;
  28. static const double s_dDefaultLklWeight = 0.1F;
  29. static const double s_dDefaultContWeight = 2.0F;
  30. static const double s_dDefaultSameSegWeight = 1.0F;
  31. static const double s_dDefaultPhBdrWeight = 0.4F;
  32. static const double s_dDefaultF0BdrWeight = 0.2F;
  33. /*****************************************************************************
  34. * CUnitSearch::CUnitSearch *
  35. *--------------------------*
  36. * Description:
  37. *
  38. ******************************************************************* PACOG ***/
  39. CUnitSearch::CUnitSearch (int iDynSearch, int iBlend, int iUseTargetF0, int iUseGain)
  40. {
  41. m_iDynSearch = iDynSearch;
  42. m_iBlend = iBlend;
  43. m_iUseTargetF0 = iUseTargetF0;
  44. m_iUseGain = iUseGain;
  45. m_pszLastPhone[0] = '\0';
  46. m_pszUnitName[0] = '\0';
  47. m_iChunkIdx1 = -1;
  48. m_dTime1 = 0.0;
  49. m_dFrom1 = 0.0;
  50. m_dTo1 = 0.0;
  51. m_dGain1 = 0.0;
  52. m_dNumAcum = 0.0;
  53. m_weights.f0 = s_dDefaultF0Weight;
  54. m_weights.dur = s_dDefaultDurWeight;
  55. m_weights.rms = s_dDefaultRmsWeight;
  56. m_weights.lkl = s_dDefaultLklWeight;
  57. m_weights.cont = s_dDefaultContWeight;
  58. m_weights.sameSeg = s_dDefaultSameSegWeight;
  59. m_weights.phBdr = s_dDefaultPhBdrWeight;
  60. m_weights.f0Bdr = s_dDefaultF0BdrWeight;
  61. }
  62. /*****************************************************************************
  63. * CUnitSearch::SetSpeakerData *
  64. *-----------------------------*
  65. * Description:
  66. * This is the main slm function, dp search for a segment of input phone
  67. * sequence will be done here.
  68. ******************************************************************* PACOG ***/
  69. void CUnitSearch::SetSpeakerData (CSpeakerData* pSpeakerData)
  70. {
  71. m_pSpeakerData = pSpeakerData;
  72. m_weights = pSpeakerData->GetWeights();
  73. m_pSpeakerData->PreComputeDist();
  74. }
  75. /*****************************************************************************
  76. * CUnitSearch::ComputeDPInfo *
  77. *----------------------------*
  78. * Description:
  79. * This is the main slm function, dp search for a segment of input phone
  80. * sequence will be done here.
  81. ******************************************************************* mplumpe ***/
  82. void CUnitSearch::ComputeDPInfo (DPLink* pLastLink, DPLink& rNewLink, double targetF0)
  83. {
  84. SegInfo *candIseg;
  85. SegInfo *candJseg;
  86. int nCandI;
  87. int nCandJ;
  88. double f0Dev;
  89. double timePenalty;
  90. double bestWeight = 0.0;
  91. short targetF0Positive;
  92. int i;
  93. int j;
  94. int iBest;
  95. bool fF0Dist;
  96. nCandI = rNewLink.m_cands.size();
  97. rNewLink.m_iBestPath = -1;
  98. rNewLink.m_dTargF0 = targetF0;
  99. /* Pre compute booleans */
  100. targetF0Positive = (short) (targetF0 > 0);
  101. timePenalty = m_weights.sameSeg;
  102. for (i=0; i<nCandI; i++)
  103. {
  104. candIseg = rNewLink.m_cands[i].segment;
  105. // Compute weights
  106. if (candIseg->f0flag == 1)
  107. {
  108. if (m_iUseTargetF0)
  109. {
  110. if (targetF0Positive)
  111. {
  112. f0Dev = fabs(candIseg->f0 - targetF0) / targetF0;
  113. }
  114. else
  115. {
  116. f0Dev = 0.0;
  117. }
  118. }
  119. else
  120. {
  121. if ( rNewLink.m_dAverF0 )
  122. {
  123. f0Dev = fabs(candIseg->f0 - rNewLink.m_dAverF0) / rNewLink.m_dAverF0;
  124. }
  125. else
  126. {
  127. f0Dev = 0.0;
  128. }
  129. }
  130. }
  131. else
  132. {
  133. f0Dev = 0.0;
  134. }
  135. rNewLink.m_cands[i].f0Weight = m_weights.f0 * f0Dev;
  136. rNewLink.m_cands[i].acumWeight = rNewLink.m_cands[i].f0Weight + candIseg->repDist;
  137. if (pLastLink)
  138. {
  139. fF0Dist = ( candIseg->f0flag == 1 ) || ( candIseg->f0flag == -2 );
  140. double minimum = DBL_MAX;
  141. double totalWeight;
  142. nCandJ = pLastLink->m_cands.size();
  143. //
  144. // For now, I have two loops, one for with VQ one for without VQ. I don't want to
  145. // have to check for VQ within the loop, that happens too often. Probably a better
  146. // solution is to have just not call this routine if we're doing a Min database,
  147. // and otherwise require a VQ table
  148. //
  149. if (m_pSpeakerData->m_pVq)
  150. {
  151. for (j=0; j<nCandJ; j++)
  152. {
  153. DPCand& rCand = pLastLink->m_cands[j];
  154. candJseg = rCand.segment;
  155. // I've simplified this : instead of calculating the end point of the left and making sure it is close
  156. // to the start point of the right, I'm just checking that the VQ indexes are the same.
  157. // This perhaps isn't quite as accurate, but it is much quicker.
  158. //
  159. // I've also made the assumption that we are using VQ.
  160. //
  161. // Also not calculating contWeight since it wasn't used anywhere.
  162. //
  163. // Also assuming VQ table (asserted above)
  164. //
  165. // Also, doing some lossless pruning right below. Since totalWeight after the first calculation
  166. // is much larger than timePenalty (often 10x), it happens often enough that we don't need to
  167. // do the calculations to determine this weight (whether or not the segments are sequential)
  168. //
  169. // mplumpe 12/1/00
  170. totalWeight = rCand.acumWeight + m_pSpeakerData->m_pVq->Element(candJseg->rightVqIdx, candIseg->leftVqIdx);
  171. if (totalWeight < minimum)
  172. {
  173. if ( (candIseg->chunkIdx != candJseg->chunkIdx) || (candJseg->rightVqIdx != candIseg->leftVqIdx))
  174. {
  175. if ( pLastLink->m_dTime > 0.0 )
  176. {
  177. totalWeight += timePenalty + m_weights.phBdr;
  178. }
  179. else
  180. {
  181. totalWeight += timePenalty;
  182. }
  183. //--- f0 flag
  184. // 1 if f0 of the unit is all 1 (have non-zero f0 value)
  185. // 0 if f0 of the unit is all 0
  186. // -1 if f0 of the unit is from 0 to 1
  187. // -2 if f0 of the unit is from 1 to 0
  188. if ( fF0Dist && ( candJseg->f0flag == 1 || candJseg->f0flag == -1 ) )
  189. {
  190. totalWeight += fabs(candIseg->f0 - candJseg->f0) / 10.0 * m_weights.f0Bdr;
  191. }
  192. if (totalWeight < minimum)
  193. {
  194. minimum = totalWeight;
  195. iBest = j;
  196. }
  197. }
  198. else
  199. {
  200. if ( fabs(candJseg->start + candJseg->dur - candIseg->start) < 0.001 )
  201. {
  202. minimum = totalWeight;
  203. iBest = j;
  204. }
  205. else
  206. {
  207. if ( pLastLink->m_dTime > 0.0 )
  208. {
  209. totalWeight += timePenalty + m_weights.phBdr;
  210. }
  211. else
  212. {
  213. totalWeight += timePenalty;
  214. }
  215. if ( fF0Dist && ( candJseg->f0flag == 1 || candJseg->f0flag == -1 ) )
  216. {
  217. totalWeight += fabs(candIseg->f0 - candJseg->f0) / 10.0 * m_weights.f0Bdr;
  218. }
  219. if (totalWeight < minimum)
  220. {
  221. minimum = totalWeight;
  222. iBest = j;
  223. }
  224. }
  225. }
  226. }
  227. }
  228. }
  229. else
  230. {
  231. for (j=0; j<nCandJ; j++)
  232. {
  233. DPCand& rCand = pLastLink->m_cands[j];
  234. candJseg = rCand.segment;
  235. totalWeight = rCand.acumWeight;
  236. if (totalWeight < minimum)
  237. {
  238. if ((candIseg->chunkIdx != candJseg->chunkIdx) || (fabs(candJseg->start+candJseg->dur - candIseg->start) > .0001))
  239. {
  240. if ( pLastLink->m_dTime > 0.0 )
  241. {
  242. totalWeight += timePenalty + m_weights.phBdr;
  243. }
  244. else
  245. {
  246. totalWeight += timePenalty;
  247. }
  248. if (candIseg->f0flag == 1 && candJseg->f0flag == 1)
  249. {
  250. totalWeight += fabs(candIseg->f0 - candJseg->f0) / 10.0 * m_weights.f0Bdr;
  251. }
  252. if (totalWeight < minimum)
  253. {
  254. minimum = totalWeight;
  255. iBest = j;
  256. }
  257. }
  258. else
  259. {
  260. minimum = totalWeight;
  261. iBest = j;
  262. }
  263. }
  264. }
  265. }
  266. rNewLink.m_cands[i].acumWeight += minimum;
  267. rNewLink.m_cands[i].prevPath = iBest;
  268. }
  269. else
  270. {
  271. rNewLink.m_cands[i].prevPath = -1;
  272. }
  273. if (i==0)
  274. {
  275. bestWeight = rNewLink.m_cands[i].acumWeight + 1.0;
  276. }
  277. if (rNewLink.m_cands[i].acumWeight < bestWeight)
  278. {
  279. bestWeight = rNewLink.m_cands[i].acumWeight;
  280. rNewLink.m_iBestPath = i;
  281. }
  282. }
  283. }
  284. /*****************************************************************************
  285. * CUnitSearch::Search *
  286. *---------------------*
  287. * Description:
  288. * This is the main slm function, dp search for a segment of input phone
  289. * sequence will be done here.
  290. *
  291. * Changes:
  292. * 12/5/00 dpList and dpLink are now member variables so we don't have
  293. * to reallocate them each time.
  294. *
  295. ******************************************************************* mplumpe ***/
  296. int CUnitSearch::Search (Phone* phList, int nPh, ChkDescript** ppChunks, int* piNumChunks, double dStartTime)
  297. {
  298. char leftPh[PHONE_MAX_LEN]="";
  299. char rightPh[PHONE_MAX_LEN]="";
  300. char centralPh[PHONE_MAX_LEN]="sil";
  301. char triph[MAX_CLUSTER_LEN];
  302. const char* clusterName;
  303. double newTime;
  304. double oldTime = 0.0;
  305. double newF0;
  306. double oldF0 = 100.0;
  307. int phonCnt;
  308. int i;
  309. int stateCount;
  310. int lastDone;
  311. assert (nPh==0 || phList!=NULL);
  312. assert (ppChunks && piNumChunks);
  313. oldTime = dStartTime;
  314. phonCnt = 0;
  315. lastDone = 0;
  316. while ((phonCnt<nPh) || (!lastDone))
  317. {
  318. if ( phonCnt>=nPh )
  319. {
  320. sprintf(rightPh,"sil");
  321. lastDone = 1;
  322. }
  323. else
  324. {
  325. strcpy(rightPh, phList[phonCnt].phone);
  326. newTime = phList[phonCnt].end;
  327. newF0 = phList[phonCnt].f0;
  328. }
  329. if (*leftPh && strcmp(centralPh,"sil"))
  330. {
  331. sprintf(triph,"%s-%s+%s",leftPh,centralPh,rightPh) ;
  332. if ((stateCount = m_pSpeakerData->m_pTrees->GetNumStates(triph)) <= 0)
  333. {
  334. return 0;
  335. }
  336. // Find cluster for each state
  337. for ( i=0; i <stateCount; i++)
  338. {
  339. if ((clusterName = m_pSpeakerData->m_pTrees->TriphoneToCluster( triph, i)) == 0 )
  340. {
  341. return 0;
  342. }
  343. m_dpLink.m_cands.resize(0);
  344. m_pSpeakerData->m_pClusters->GetStats( clusterName, 0, &m_dpLink.m_dAverF0,
  345. &m_dpLink.m_dAverRms, &m_dpLink.m_dAverDur);
  346. m_dpLink.m_dTime = (i < (stateCount-1)) ? ((float) i - stateCount + 1) : oldTime;
  347. if (m_iDynSearch)
  348. {
  349. m_pSpeakerData->m_critSect.Lock();
  350. int iEquivCount = m_pSpeakerData->m_pClusters->GetEquivalentCount (clusterName);
  351. for (int j=0; j<iEquivCount; j++)
  352. {
  353. DPCand cand;
  354. cand.segment = m_pSpeakerData->m_pClusters->GetEquivalent( j );
  355. m_dpLink.m_cands.push_back(cand);
  356. }
  357. m_pSpeakerData->m_critSect.Unlock();
  358. }
  359. else
  360. {
  361. DPCand cand;
  362. m_pSpeakerData->m_critSect.Lock();
  363. cand.segment = m_pSpeakerData->m_pClusters->GetBestExample( clusterName );
  364. m_pSpeakerData->m_critSect.Unlock();
  365. m_dpLink.m_cands.push_back(cand);
  366. }
  367. ComputeDPInfo( &m_dpList.back(), m_dpLink, oldF0);
  368. m_dpList.push_back(m_dpLink);
  369. }
  370. }
  371. else if (*leftPh)
  372. {
  373. m_dpLink.m_cands.resize(0);
  374. m_pSpeakerData->m_pClusters->GetStats( "sil", 0, &m_dpLink.m_dAverF0,
  375. &m_dpLink.m_dAverRms, &m_dpLink.m_dAverDur);
  376. m_dpLink.m_dTime = oldTime;
  377. if (m_iDynSearch)
  378. {
  379. m_pSpeakerData->m_critSect.Lock();
  380. int iEquivCount = m_pSpeakerData->m_pClusters->GetEquivalentCount ("sil");
  381. for (int j=0; j<iEquivCount; j++)
  382. {
  383. DPCand cand;
  384. cand.segment = m_pSpeakerData->m_pClusters->GetEquivalent( j );
  385. m_dpLink.m_cands.push_back(cand);
  386. }
  387. m_pSpeakerData->m_critSect.Unlock();
  388. }
  389. else
  390. {
  391. DPCand cand;
  392. m_pSpeakerData->m_critSect.Lock();
  393. cand.segment = m_pSpeakerData->m_pClusters->GetBestExample( "sil" );
  394. m_pSpeakerData->m_critSect.Unlock();
  395. m_dpLink.m_cands.push_back(cand);
  396. }
  397. if (m_dpList.size() == 0)
  398. {
  399. ComputeDPInfo( 0, m_dpLink, oldF0);
  400. }
  401. else
  402. {
  403. ComputeDPInfo( &m_dpList.back(), m_dpLink, oldF0);
  404. }
  405. m_dpList.push_back(m_dpLink);
  406. }
  407. strcpy(leftPh, centralPh);
  408. strcpy(centralPh, rightPh);
  409. oldTime = newTime;
  410. oldF0 = newF0;
  411. phonCnt++;
  412. }
  413. #ifdef _DEBUG_
  414. DebugDPInfo (m_dpList);
  415. #endif
  416. if (!FindOptimalPath (m_dpList, dStartTime, ppChunks, piNumChunks) )
  417. {
  418. return 0;
  419. }
  420. m_dpList.resize(0);
  421. return 1;
  422. }
  423. /*****************************************************************************
  424. * CUnitSearch::FindOptimalPath *
  425. *------------------------------*
  426. * Description:
  427. *
  428. ******************************************************************* PACOG ***/
  429. int CUnitSearch::FindOptimalPath (std::vector<DPLink>& rDPList, double dStartTime,
  430. ChkDescript** ppChunks, int* piNumChunks)
  431. {
  432. int* piIndexes;
  433. int iNumIndexes;
  434. double dRunTime = 0.0;
  435. double dTimeSlot = 0.0;
  436. double dTotalSegDur;
  437. double dGain;
  438. double dPrevTime = 0.0;
  439. int i;
  440. int j;
  441. assert (ppChunks);
  442. assert (piNumChunks);
  443. dRunTime = dStartTime;
  444. iNumIndexes = rDPList.size();
  445. piIndexes = new int[iNumIndexes];
  446. if (piIndexes)
  447. {
  448. Backtrack (rDPList, piIndexes);
  449. #ifdef _SLM_DEBUG
  450. DebugOptimalPath (list, lLength, piIndexes);
  451. #endif
  452. for (i=0; i<iNumIndexes; i++)
  453. {
  454. if ( !m_pSpeakerData->GetFrontEndFlag() )
  455. {
  456. //--- use TrueTalk Front End
  457. //--- use target duration as finnal output duration
  458. if (rDPList[i].m_dTime > 0.0)
  459. { // final state
  460. dRunTime = rDPList[i].m_dTime;
  461. }
  462. else if (rDPList[i].m_dTime< 0)
  463. {
  464. dTotalSegDur = 0;
  465. for (j=(int)rDPList[i].m_dTime; j<=0; j++)
  466. {
  467. dTotalSegDur += rDPList[i-j].m_cands[piIndexes[i-j]].segment->dur; // remember, j is -ve
  468. }
  469. dTimeSlot = (rDPList[i-(int)rDPList[i].m_dTime].m_dTime- dRunTime)/dTotalSegDur;
  470. dRunTime += dTimeSlot * rDPList[i].m_cands[piIndexes[i]].segment->dur;
  471. for (j=(int)rDPList[i].m_dTime; j<0; j++)
  472. {
  473. rDPList[i-j].m_dTime= 0; // remember, j is -ve
  474. }
  475. }
  476. else
  477. {
  478. dRunTime += dTimeSlot * rDPList[i].m_cands[piIndexes[i]].segment->dur;
  479. }
  480. }
  481. else
  482. {
  483. //--- use MS Entropic Front End
  484. //--- use tree cluster duration as target duration, and m_dTime is dur_ratio from frontend ---
  485. dTimeSlot = 1.0;
  486. j = i;
  487. if (0 == strcmp ("sil", rDPList[j].m_cands[0].segment->clusterName))
  488. {
  489. dRunTime += rDPList[j].m_dTime - dPrevTime;
  490. dPrevTime = rDPList[j].m_dTime;
  491. }
  492. else
  493. {
  494. while ( j < iNumIndexes && rDPList[j].m_dTime < 0 )
  495. {
  496. j++;
  497. }
  498. if ( j < iNumIndexes && rDPList[j].m_dTime > 0 )
  499. {
  500. dTimeSlot = rDPList[j].m_dTime - dPrevTime;
  501. if (i == j)
  502. {
  503. dPrevTime = rDPList[j].m_dTime;
  504. }
  505. }
  506. dRunTime += rDPList[i].m_dAverDur * dTimeSlot;
  507. }
  508. }
  509. if(rDPList[i].m_cands[piIndexes[i]].segment->rms > 0)
  510. {
  511. dGain = rDPList[i].m_dAverRms/rDPList[i].m_cands[piIndexes[i]].segment->rms;
  512. }
  513. else
  514. {
  515. dGain = 1.0;
  516. }
  517. GenerateOutput (ppChunks, piNumChunks,
  518. rDPList[i].m_cands[piIndexes[i]].segment->clusterName,
  519. dRunTime,
  520. rDPList[i].m_cands[piIndexes[i]].segment->chunkIdx,
  521. rDPList[i].m_cands[piIndexes[i]].segment->start,
  522. rDPList[i].m_cands[piIndexes[i]].segment->start +
  523. rDPList[i].m_cands[piIndexes[i]].segment->dur,
  524. rDPList[i].m_cands[piIndexes[i]].segment->rms,
  525. rDPList[i].m_dTargF0,
  526. rDPList[i].m_cands[piIndexes[i]].segment->f0 * (rDPList[i].m_cands[piIndexes[i]].segment->f0flag != 0 ? 1 : 0 ),
  527. dGain);
  528. }
  529. delete[] piIndexes;
  530. }
  531. FlushOutput (ppChunks, piNumChunks);
  532. return 1;
  533. }
  534. /*****************************************************************************
  535. * CUnitSearch::Backtrack *
  536. *------------------------*
  537. * Description:
  538. *
  539. ******************************************************************* PACOG ***/
  540. void CUnitSearch::Backtrack (std::vector<DPLink>& rDPList, int* piIndexes)
  541. {
  542. assert (piIndexes);
  543. int i = rDPList.size() - 1;
  544. int iPrev = rDPList[i].m_iBestPath;
  545. for (; i>=0; i--)
  546. {
  547. piIndexes[i] = iPrev;
  548. iPrev = rDPList[i].m_cands[iPrev].prevPath;
  549. }
  550. }
  551. /*****************************************************************************
  552. * CUnitSearch::Backtrack *
  553. *------------------------*
  554. * Description:
  555. *
  556. ******************************************************************* PACOG ***/
  557. int CUnitSearch::GenerateOutput (ChkDescript** ppChunks, int* piNumChunks, const char* pszCluster,
  558. double dTime, int iChunkIdx, double dFrom, double dTo,
  559. double dRms, double targF0, double srcF0, double dGain)
  560. {
  561. char pszPhone[20] = "";
  562. // int difSegments;
  563. int contiguous;
  564. double outGain = 1.0;
  565. if (!m_iBlend)
  566. {
  567. outGain = (m_iUseGain) ? dGain : 1.0;
  568. AddChunk (ppChunks, piNumChunks, pszCluster, dTime, iChunkIdx, dFrom, dTo, targF0, srcF0, outGain);
  569. }
  570. else
  571. {
  572. if (pszCluster)
  573. {
  574. CentralPhone (pszCluster, pszPhone);
  575. if (m_iChunkIdx1!= -1) {
  576. // difSegments = (strcmp (pszPhone, m_pszLastPhone)!=0);
  577. /* difSegments = (strcmp (pszPhone, m_pszLastPhone)!=0)
  578. && ( (Unvoiced(pszPhone) && !Unvoiced(m_pszLastPhone) ) ||
  579. ( !Unvoiced(pszPhone) && Unvoiced(m_pszLastPhone)) );
  580. */
  581. contiguous = (fabs(dFrom - m_dTo1) < .0001) && (m_iChunkIdx1 == iChunkIdx);
  582. // if ( contiguous && !difSegments ) {
  583. if ( contiguous ) {
  584. if (strcmp(m_pszLastPhone, pszPhone)) {
  585. strcat(strcat (m_pszUnitName, "_"), pszPhone);
  586. }
  587. m_dTime1 = dTime;
  588. m_dTo1 = dTo;
  589. m_dGain1 += dGain * dRms;
  590. m_dNumAcum += dRms;
  591. if ( srcF0 > 0.0 )
  592. {
  593. m_dSrcF0 += srcF0;
  594. m_iNumSrcF0++;
  595. }
  596. m_dTargF0 += targF0;
  597. m_iNumTargF0++;
  598. } else {
  599. outGain = (m_iUseGain) ? m_dGain1/(m_dNumAcum) : 1.0;
  600. if ( m_iNumTargF0 > 1 )
  601. {
  602. m_dTargF0 /= m_iNumTargF0;
  603. }
  604. if ( m_iNumSrcF0 > 1 )
  605. {
  606. m_dSrcF0 /= m_iNumSrcF0;
  607. }
  608. AddChunk (ppChunks, piNumChunks, m_pszUnitName, m_dTime1, m_iChunkIdx1,
  609. m_dFrom1, m_dTo1, m_dTargF0, m_dSrcF0, outGain);
  610. strcpy(m_pszUnitName, pszPhone);
  611. m_dNumAcum = dRms;
  612. m_iChunkIdx1 = iChunkIdx;
  613. m_dTime1 = dTime;
  614. m_dFrom1 = dFrom;
  615. m_dTo1 = dTo;
  616. m_dGain1= dGain * dRms;
  617. m_dSrcF0 = srcF0;
  618. if ( srcF0 > 0 )
  619. {
  620. m_iNumSrcF0 = 1;
  621. }
  622. else
  623. {
  624. m_iNumSrcF0 = 0;
  625. }
  626. m_dTargF0 = targF0;
  627. m_iNumTargF0 = 1;
  628. }
  629. } else {
  630. if (strcmp(m_pszLastPhone, pszPhone))
  631. {
  632. strcat (m_pszUnitName, pszPhone);
  633. }
  634. m_dTime1 = dTime;
  635. m_iChunkIdx1 = iChunkIdx;
  636. m_dFrom1 = dFrom;
  637. m_dTo1 = dTo;
  638. m_dNumAcum = dRms;
  639. m_dGain1 = dGain * dRms;
  640. m_dSrcF0 = srcF0;
  641. if ( srcF0 > 0 )
  642. {
  643. m_iNumSrcF0 = 1;
  644. }
  645. else
  646. {
  647. m_iNumSrcF0 = 0;
  648. }
  649. m_dTargF0 = targF0;
  650. m_iNumTargF0 = 1;
  651. }
  652. strcpy (m_pszLastPhone, pszPhone);
  653. } else {
  654. outGain = (m_iUseGain) ? m_dGain1/(m_dNumAcum) : 1.0;
  655. if ( m_iNumTargF0 > 1 )
  656. {
  657. m_dTargF0 /= m_iNumTargF0;
  658. }
  659. if ( m_iNumSrcF0 > 1 )
  660. {
  661. m_dSrcF0 /= m_iNumSrcF0;
  662. }
  663. AddChunk (ppChunks, piNumChunks, m_pszUnitName, m_dTime1, m_iChunkIdx1,
  664. m_dFrom1, m_dTo1, m_dTargF0, m_dSrcF0, outGain);
  665. }
  666. }
  667. return 1;
  668. }
  669. /*****************************************************************************
  670. * CUnitSearch::Unvoiced *
  671. *-----------------------*
  672. * Description:
  673. *
  674. ******************************************************************* PACOG ***/
  675. int CUnitSearch::Unvoiced (const char* pszPhone)
  676. {
  677. if (*pszPhone=='s' || *pszPhone=='z' || *pszPhone== 'f' || //Includes z and zh
  678. *pszPhone=='k' || *pszPhone=='p' || *pszPhone=='t' ) // Includes t and th
  679. {
  680. return 1;
  681. }
  682. return 0;
  683. }
  684. /*****************************************************************************
  685. * CUnitSearch::CentralPhone *
  686. *---------------------------*
  687. * Description:
  688. *
  689. ******************************************************************* PACOG ***/
  690. int CUnitSearch::CentralPhone ( const char *pszTriphone, char *pszPhone )
  691. {
  692. char *ptr;
  693. strcpy( pszPhone, pszTriphone);
  694. ptr = strchr(pszPhone, '_');
  695. if (ptr)
  696. {
  697. *ptr = '\0';
  698. }
  699. else
  700. {
  701. ptr = strchr(pszPhone, ';');
  702. if (ptr)
  703. {
  704. *ptr='\0';
  705. }
  706. }
  707. return 1;
  708. }
  709. /*****************************************************************************
  710. * CUnitSearch::AddChunk *
  711. *-----------------------*
  712. * Description:
  713. *
  714. ******************************************************************* PACOG ***/
  715. int CUnitSearch::AddChunk (ChkDescript** ppChunks, int* piNumChunks, const char* name,
  716. double time, int chunkIdx, double from, double to,
  717. double targF0, double srcF0, double gain)
  718. {
  719. assert (ppChunks);
  720. assert (piNumChunks);
  721. assert (time>=0.0);
  722. assert (chunkIdx>=0);
  723. assert (from>=0.0);
  724. assert (to>from);
  725. assert (gain!=0.0);
  726. if (*ppChunks)
  727. {
  728. *ppChunks = (ChkDescript *)realloc (*ppChunks, (*piNumChunks + 1) * sizeof (**ppChunks));
  729. }
  730. else
  731. {
  732. assert (*piNumChunks ==0);
  733. *ppChunks = (ChkDescript *)malloc (sizeof (**ppChunks));
  734. }
  735. if (*ppChunks == 0)
  736. {
  737. return 0;
  738. }
  739. if (name)
  740. {
  741. strcpy( (*ppChunks)[*piNumChunks].name, name );
  742. }
  743. else
  744. {
  745. (*ppChunks)[*piNumChunks].name[0] = 0;
  746. }
  747. (*ppChunks)[*piNumChunks].end = time;
  748. (*ppChunks)[*piNumChunks].from = from;
  749. (*ppChunks)[*piNumChunks].to = to;
  750. (*ppChunks)[*piNumChunks].gain = gain;
  751. (*ppChunks)[*piNumChunks].srcF0 = srcF0;
  752. (*ppChunks)[*piNumChunks].targF0 = targF0;
  753. if (m_pSpeakerData->m_pFileNames)
  754. {
  755. (*ppChunks)[*piNumChunks].isFileName = 1;
  756. (*ppChunks)[*piNumChunks].chunk.fileName = m_pSpeakerData->m_pFileNames[chunkIdx].m_psz;
  757. if ((*ppChunks)[*piNumChunks].chunk.fileName == NULL)
  758. {
  759. return 0;
  760. }
  761. }
  762. else
  763. {
  764. (*ppChunks)[*piNumChunks].isFileName = 0;
  765. (*ppChunks)[*piNumChunks].chunk.chunkIdx = chunkIdx;
  766. }
  767. (*piNumChunks)++;
  768. return 1;
  769. }
  770. /*****************************************************************************
  771. * CUnitSearch::AddChunk *
  772. *-----------------------*
  773. * Description:
  774. *
  775. ******************************************************************* PACOG ***/
  776. void CUnitSearch::FlushOutput (ChkDescript** ppChunks, int* piNumChunks)
  777. {
  778. if (*m_pszUnitName)
  779. {
  780. double gain = (m_iUseGain) ? m_dGain1/(m_dNumAcum) : 1.0;
  781. if ( m_iNumTargF0 > 1 )
  782. {
  783. m_dTargF0 /= m_iNumTargF0;
  784. }
  785. if ( m_iNumSrcF0 > 1 )
  786. {
  787. m_dSrcF0 /= m_iNumSrcF0;
  788. }
  789. AddChunk (ppChunks, piNumChunks, m_pszUnitName,
  790. m_dTime1, m_iChunkIdx1, m_dFrom1, m_dTo1, m_dTargF0, m_dSrcF0, gain);
  791. }
  792. m_pszLastPhone[0] = '\0';
  793. m_pszUnitName[0] = '\0';
  794. m_iChunkIdx1 = -1;
  795. m_dNumAcum = 0.0;
  796. }