Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

378 lines
9.7 KiB

  1. /*
  2. *===========================================================================
  3. *
  4. * main.c
  5. *
  6. * This material contains unpublished, proprietary software of
  7. * Entropic, Inc. Any reproduction, distribution, or publication
  8. * of this work must be authorized in writing by Entropic, Inc.,
  9. * and must bear the notice:
  10. *
  11. * "Copyright (c) 1998 Entropic, Inc. All rights reserved"
  12. *
  13. * The copyright notice above does not evidence any actual or intended
  14. * publication of this source code.
  15. *
  16. * rcs_id: $Id: main.c,v 1.1 1999/10/12 19:44:42 galanes Exp $
  17. *
  18. *
  19. *
  20. *===================================================mplumpe 12/19/00========================
  21. */
  22. #include <stdio.h>
  23. #include <stdlib.h>
  24. #include <string.h>
  25. #include <assert.h>
  26. #include <sigproc.h>
  27. #include <math.h>
  28. #include "viterbi.h"
  29. typedef char TName[_MAX_PATH+1];
  30. #define SYNTAX fprintf(stderr, "USAGE: fileDist file1 file2 (DTW) (DELTA:wt) (output.txt)\n\n")
  31. double* ReadInputFile (char *fName, int *nFrames, int *frameSize);
  32. int *FindOptimalPath (double *adOriginal, int iOrigLen, double *adSynth, int iSynthLen, int iDim);
  33. // globals for cost functions
  34. int giMaxShift=-1;
  35. int giOrigLen=-1;
  36. int giSynthLen=-1;
  37. int giDim=-1;
  38. double *gadOriginal=NULL;
  39. double *gadSynth=NULL;
  40. float *gafConcatCosts=NULL;
  41. /*
  42. *-----------------------------------------------------------------------------
  43. *
  44. * MAIN
  45. *
  46. *-----------------------------------------------------------------------------
  47. */
  48. int
  49. main(int argc, char **argv)
  50. {
  51. FILE* output = stdout;
  52. TName fName1;
  53. TName fName2;
  54. TName fName3 = "";
  55. double* data1;
  56. double* data2;
  57. int nFrames1;
  58. int nFrames2;
  59. int frameSize1;
  60. int frameSize2;
  61. double distance = 0.0;
  62. int i, j;
  63. int cnt = 0;
  64. int *aiOptimalPath;
  65. bool fDTW = false;
  66. bool fDeltaDist = false;
  67. double dDeltaScale = 1.;
  68. if ( argc < 3 || argc > 6 )
  69. {
  70. SYNTAX;
  71. return 1;
  72. }
  73. strncpy (fName1, argv[1], _MAX_PATH);
  74. strncpy (fName2, argv[2], _MAX_PATH);
  75. if (argc == 4)
  76. {
  77. strncpy (fName3, argv[3], _MAX_PATH);
  78. if (0 == strcmp (fName3, "DTW"))
  79. {
  80. fDTW = true;
  81. fName3[0] = '\0';
  82. }
  83. else if (0 == strncmp (fName3, "DELTA:", 6))
  84. {
  85. fDeltaDist = true;
  86. fName3[0] = '\0';
  87. dDeltaScale = atof(fName3+6);
  88. }
  89. }
  90. else if (argc == 5)
  91. {
  92. if (0 == strcmp (argv[3], "DTW"))
  93. {
  94. fDTW = true;
  95. if (0 == strncmp (argv[4], "DELTA:", 6))
  96. {
  97. fDeltaDist = true;
  98. dDeltaScale = atof(argv[4]+6);
  99. }
  100. else
  101. {
  102. strncpy (fName3, argv[4], _MAX_PATH);
  103. }
  104. }
  105. else
  106. {
  107. if (0 == strncmp (argv[3], "DELTA:", 6))
  108. {
  109. fDeltaDist = true;
  110. dDeltaScale = atof(argv[3]+6);
  111. }
  112. strncpy (fName3, argv[4], _MAX_PATH);
  113. }
  114. }
  115. else if (argc == 6)
  116. {
  117. if (0 == strcmp (argv[3], "DTW"))
  118. {
  119. fDTW = true;
  120. }
  121. if (0 == strncmp (argv[4], "DELTA:", 6))
  122. {
  123. fDeltaDist = true;
  124. dDeltaScale = atof(argv[4]+6);
  125. }
  126. strncpy (fName3, argv[5], _MAX_PATH);
  127. }
  128. /*
  129. * read data
  130. */
  131. data1 = ReadInputFile(fName1, &nFrames1, &frameSize1);
  132. data2 = ReadInputFile(fName2, &nFrames2, &frameSize2);
  133. if (frameSize1 != frameSize2)
  134. {
  135. fprintf(stderr, "Different data order between %s %s\n", fName1, fName2);
  136. return 1;
  137. }
  138. if (fDTW)
  139. {
  140. //
  141. // Find the optimal path - assumes the original is fName1
  142. //
  143. aiOptimalPath = FindOptimalPath (data1, nFrames1, data2, nFrames2, frameSize1);
  144. }
  145. else
  146. {
  147. if (nFrames2 < nFrames1)
  148. {
  149. nFrames1 = nFrames2;
  150. }
  151. aiOptimalPath = (int *)malloc (sizeof(int)*nFrames1);
  152. for (i=0; i < nFrames1; i++)
  153. {
  154. aiOptimalPath[i] = i;
  155. }
  156. }
  157. //
  158. // Find the distance between the optimal path & the original
  159. //
  160. if (!fDeltaDist)
  161. {
  162. for (i = 0; i < nFrames1 ; i++)
  163. {
  164. /* only use voiced segments */
  165. if (data1[i * frameSize1] > 0.8 || data2[aiOptimalPath[i] * frameSize2] > 0.8)
  166. {
  167. distance += EuclideanDist(&data1[i * frameSize1], &data2[aiOptimalPath[i] * frameSize2], frameSize1);
  168. cnt++;
  169. }
  170. }
  171. }
  172. else // fDeltaDist
  173. {
  174. // just skip the first and last frames. These are surely silence and don't matter anyway.
  175. // This makes delta calculations easier
  176. double *adDelta1, *adDelta2;
  177. adDelta1 = (double *)malloc (sizeof(double)*frameSize1);
  178. adDelta2 = (double *)malloc (sizeof(double)*frameSize1);
  179. for (i = 1; i < nFrames1-1 ; i++)
  180. {
  181. /* only use voiced segments */
  182. if (data1[i * frameSize1] > 0.8 || data2[aiOptimalPath[i] * frameSize2] > 0.8)
  183. {
  184. distance += EuclideanDist(&data1[i * frameSize1], &data2[aiOptimalPath[i] * frameSize2], frameSize1);
  185. for (j=0; j < frameSize1; j++)
  186. {
  187. adDelta1[j] = data1[(i+1)*frameSize1+j] - data1[(i-1)*frameSize1+j];
  188. adDelta2[j] = data2[(aiOptimalPath[i]+1)*frameSize1+j] - data2[(aiOptimalPath[i]-1)*frameSize1+j];
  189. }
  190. distance += dDeltaScale * EuclideanDist(adDelta1, adDelta2, frameSize1);
  191. cnt++;
  192. }
  193. }
  194. free (adDelta1);
  195. free (adDelta2);
  196. }
  197. free (aiOptimalPath);
  198. if (cnt > 0)
  199. {
  200. distance /= cnt;
  201. }
  202. /*
  203. * write result
  204. */
  205. if (fName3[0])
  206. {
  207. if( (output = fopen(fName3, "wt")) == NULL)
  208. {
  209. fprintf(stderr, "Can not open file %s\n", fName3);
  210. return 1;
  211. }
  212. }
  213. fprintf(output, "%f", distance);
  214. fclose(output);
  215. free(data1);
  216. free(data2);
  217. return 0;
  218. }
  219. /*
  220. *-----------------------------------------------------------------------------
  221. *
  222. * Read spectral data
  223. *
  224. *-----------------------------------------------------------------------------
  225. */
  226. double *
  227. ReadInputFile (char *fName, int *nFrames, int *frameSize)
  228. {
  229. FILE* fp;
  230. int i;
  231. double *data;
  232. int type;
  233. if( (fp = fopen(fName, "rb")) == NULL)
  234. {
  235. fprintf(stderr, "Can not open file %s\n", fName);
  236. return NULL;
  237. }
  238. /* read header */
  239. fread(&type, sizeof(int), 1, fp);
  240. fread(frameSize, sizeof(int), 1, fp);
  241. fread(nFrames, sizeof(int), 1, fp);
  242. /* alloc memory */
  243. data = (double *)malloc((*frameSize) * (*nFrames) * sizeof(double));
  244. if (data == NULL)
  245. {
  246. fprintf(stderr, "Can not alloc memory \n");
  247. return NULL;
  248. }
  249. /* read cep data */
  250. for ( i = 0; i < *nFrames; i++ )
  251. {
  252. fread(&data[i * (*frameSize)], sizeof(double), (*frameSize), fp);
  253. }
  254. fclose(fp);
  255. return data;
  256. }
  257. float _cdecl ConcatCost (const void *pElem1, const void *pElem2, float fUnitCost)
  258. {
  259. // check if it is a valid concat option
  260. int i1 = (int)pElem1;
  261. int i2 = (int)pElem2;
  262. if (i1 > i2)
  263. return 9E9f;
  264. else if (i2-i1 > giMaxShift)
  265. return 9e9f;
  266. else
  267. return fUnitCost*gafConcatCosts[i2-i1];
  268. }
  269. float _cdecl UnitCost (const void *pElem1, const int iOrigPos)
  270. {
  271. // Just the Euclidean distance
  272. int iSynthPos = (int)pElem1;
  273. iSynthPos--;
  274. assert ((iOrigPos >=0) && (iOrigPos < giOrigLen));
  275. assert ((iSynthPos >=0) && (iSynthPos < giSynthLen));
  276. return (float)EuclideanDist(gadOriginal + iOrigPos * giDim, gadSynth + iSynthPos * giDim, giDim);
  277. }
  278. int *FindOptimalPath (double *adOriginal, int iOrigLen, double *adSynth, int iSynthLen, int iDim)
  279. {
  280. //
  281. // Put the appropriate vectors into Viterbi, then call it
  282. //
  283. int i, j;
  284. CViterbi Viterbi;
  285. float fCost, fMidShift;
  286. int *aiPath;
  287. int iStart, iStop;
  288. //
  289. // Find ConcatCosts
  290. //
  291. giMaxShift = 2*iSynthLen/iOrigLen + 1;
  292. fMidShift=(float)iSynthLen/(float)iOrigLen;
  293. gafConcatCosts = (float *)malloc (sizeof(float)*(giMaxShift+1));
  294. for (i=0; i <= giMaxShift; i++)
  295. {
  296. gafConcatCosts[i] = (float )(1.f+fabs(fMidShift-i)/fMidShift);
  297. }
  298. giOrigLen = iOrigLen;
  299. giSynthLen = iSynthLen;
  300. giDim = iDim;
  301. gadOriginal = adOriginal;
  302. gadSynth = adSynth;
  303. Viterbi.Init (iOrigLen, 51);
  304. // The passed in position always must be one greater, because 0 is a special tag for the viterbi algorithm
  305. // Add endpoint constrants
  306. Viterbi.Add (0, (void *)1);
  307. Viterbi.Add (iOrigLen-1, (void *)iSynthLen);
  308. // Add one more constraint to allow delta calculation
  309. Viterbi.Add (1, (void *)2);
  310. Viterbi.Add (iOrigLen-2, (void *)(iSynthLen-1));
  311. // add intermediate options
  312. for (i=2; i <= iOrigLen-3; i++)
  313. {
  314. // for now, add in 25 frames on either side (a window of .51 seconds total) of average
  315. iStart = (int)(i*fMidShift-25);
  316. iStop = (int)(i*fMidShift+25);
  317. if (iStart < 0)
  318. {
  319. iStart = 0;
  320. }
  321. if (iStop > iSynthLen)
  322. {
  323. iStop=iSynthLen;
  324. }
  325. iStart += 1;
  326. for (j=iStart; j <= iStop; j++)
  327. {
  328. Viterbi.Add (i, (void *)j);
  329. }
  330. }
  331. Viterbi.FindBestPath (ConcatCost, UnitCost, &fCost);
  332. //
  333. // Best path now in void ** Viterbi.m_rgpBestElems
  334. //
  335. free (gafConcatCosts);
  336. aiPath = (int *)malloc (sizeof(int)*iOrigLen);
  337. for (i=0; i < iOrigLen; i++)
  338. {
  339. aiPath[i] = (int)(Viterbi.m_rgpBestElems[i]) - 1;
  340. }
  341. return aiPath;
  342. }