mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
378 lines
9.7 KiB
378 lines
9.7 KiB
/*
|
|
*===========================================================================
|
|
*
|
|
* main.c
|
|
*
|
|
* This material contains unpublished, proprietary software of
|
|
* Entropic, Inc. Any reproduction, distribution, or publication
|
|
* of this work must be authorized in writing by Entropic, Inc.,
|
|
* and must bear the notice:
|
|
*
|
|
* "Copyright (c) 1998 Entropic, Inc. All rights reserved"
|
|
*
|
|
* The copyright notice above does not evidence any actual or intended
|
|
* publication of this source code.
|
|
*
|
|
* rcs_id: $Id: main.c,v 1.1 1999/10/12 19:44:42 galanes Exp $
|
|
*
|
|
*
|
|
*
|
|
*===================================================mplumpe 12/19/00========================
|
|
*/
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <sigproc.h>
|
|
#include <math.h>
|
|
#include "viterbi.h"
|
|
|
|
typedef char TName[_MAX_PATH+1];
|
|
|
|
#define SYNTAX fprintf(stderr, "USAGE: fileDist file1 file2 (DTW) (DELTA:wt) (output.txt)\n\n")
|
|
|
|
double* ReadInputFile (char *fName, int *nFrames, int *frameSize);
|
|
|
|
int *FindOptimalPath (double *adOriginal, int iOrigLen, double *adSynth, int iSynthLen, int iDim);
|
|
|
|
// globals for cost functions
|
|
|
|
int giMaxShift=-1;
|
|
int giOrigLen=-1;
|
|
int giSynthLen=-1;
|
|
int giDim=-1;
|
|
double *gadOriginal=NULL;
|
|
double *gadSynth=NULL;
|
|
float *gafConcatCosts=NULL;
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* MAIN
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
FILE* output = stdout;
|
|
TName fName1;
|
|
TName fName2;
|
|
TName fName3 = "";
|
|
double* data1;
|
|
double* data2;
|
|
int nFrames1;
|
|
int nFrames2;
|
|
int frameSize1;
|
|
int frameSize2;
|
|
double distance = 0.0;
|
|
int i, j;
|
|
int cnt = 0;
|
|
int *aiOptimalPath;
|
|
bool fDTW = false;
|
|
bool fDeltaDist = false;
|
|
double dDeltaScale = 1.;
|
|
|
|
if ( argc < 3 || argc > 6 )
|
|
{
|
|
SYNTAX;
|
|
return 1;
|
|
}
|
|
|
|
strncpy (fName1, argv[1], _MAX_PATH);
|
|
strncpy (fName2, argv[2], _MAX_PATH);
|
|
if (argc == 4)
|
|
{
|
|
strncpy (fName3, argv[3], _MAX_PATH);
|
|
if (0 == strcmp (fName3, "DTW"))
|
|
{
|
|
fDTW = true;
|
|
fName3[0] = '\0';
|
|
}
|
|
else if (0 == strncmp (fName3, "DELTA:", 6))
|
|
{
|
|
fDeltaDist = true;
|
|
fName3[0] = '\0';
|
|
dDeltaScale = atof(fName3+6);
|
|
}
|
|
}
|
|
else if (argc == 5)
|
|
{
|
|
if (0 == strcmp (argv[3], "DTW"))
|
|
{
|
|
fDTW = true;
|
|
if (0 == strncmp (argv[4], "DELTA:", 6))
|
|
{
|
|
fDeltaDist = true;
|
|
dDeltaScale = atof(argv[4]+6);
|
|
}
|
|
else
|
|
{
|
|
strncpy (fName3, argv[4], _MAX_PATH);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (0 == strncmp (argv[3], "DELTA:", 6))
|
|
{
|
|
fDeltaDist = true;
|
|
dDeltaScale = atof(argv[3]+6);
|
|
}
|
|
strncpy (fName3, argv[4], _MAX_PATH);
|
|
}
|
|
}
|
|
else if (argc == 6)
|
|
{
|
|
if (0 == strcmp (argv[3], "DTW"))
|
|
{
|
|
fDTW = true;
|
|
}
|
|
if (0 == strncmp (argv[4], "DELTA:", 6))
|
|
{
|
|
fDeltaDist = true;
|
|
dDeltaScale = atof(argv[4]+6);
|
|
}
|
|
strncpy (fName3, argv[5], _MAX_PATH);
|
|
}
|
|
|
|
/*
|
|
* read data
|
|
*/
|
|
|
|
data1 = ReadInputFile(fName1, &nFrames1, &frameSize1);
|
|
data2 = ReadInputFile(fName2, &nFrames2, &frameSize2);
|
|
|
|
if (frameSize1 != frameSize2)
|
|
{
|
|
fprintf(stderr, "Different data order between %s %s\n", fName1, fName2);
|
|
return 1;
|
|
}
|
|
|
|
if (fDTW)
|
|
{
|
|
//
|
|
// Find the optimal path - assumes the original is fName1
|
|
//
|
|
aiOptimalPath = FindOptimalPath (data1, nFrames1, data2, nFrames2, frameSize1);
|
|
}
|
|
else
|
|
{
|
|
if (nFrames2 < nFrames1)
|
|
{
|
|
nFrames1 = nFrames2;
|
|
}
|
|
aiOptimalPath = (int *)malloc (sizeof(int)*nFrames1);
|
|
for (i=0; i < nFrames1; i++)
|
|
{
|
|
aiOptimalPath[i] = i;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Find the distance between the optimal path & the original
|
|
//
|
|
if (!fDeltaDist)
|
|
{
|
|
for (i = 0; i < nFrames1 ; i++)
|
|
{
|
|
/* only use voiced segments */
|
|
if (data1[i * frameSize1] > 0.8 || data2[aiOptimalPath[i] * frameSize2] > 0.8)
|
|
{
|
|
distance += EuclideanDist(&data1[i * frameSize1], &data2[aiOptimalPath[i] * frameSize2], frameSize1);
|
|
cnt++;
|
|
}
|
|
}
|
|
}
|
|
else // fDeltaDist
|
|
{
|
|
// just skip the first and last frames. These are surely silence and don't matter anyway.
|
|
// This makes delta calculations easier
|
|
double *adDelta1, *adDelta2;
|
|
adDelta1 = (double *)malloc (sizeof(double)*frameSize1);
|
|
adDelta2 = (double *)malloc (sizeof(double)*frameSize1);
|
|
for (i = 1; i < nFrames1-1 ; i++)
|
|
{
|
|
/* only use voiced segments */
|
|
if (data1[i * frameSize1] > 0.8 || data2[aiOptimalPath[i] * frameSize2] > 0.8)
|
|
{
|
|
distance += EuclideanDist(&data1[i * frameSize1], &data2[aiOptimalPath[i] * frameSize2], frameSize1);
|
|
for (j=0; j < frameSize1; j++)
|
|
{
|
|
adDelta1[j] = data1[(i+1)*frameSize1+j] - data1[(i-1)*frameSize1+j];
|
|
adDelta2[j] = data2[(aiOptimalPath[i]+1)*frameSize1+j] - data2[(aiOptimalPath[i]-1)*frameSize1+j];
|
|
}
|
|
distance += dDeltaScale * EuclideanDist(adDelta1, adDelta2, frameSize1);
|
|
cnt++;
|
|
}
|
|
}
|
|
free (adDelta1);
|
|
free (adDelta2);
|
|
}
|
|
free (aiOptimalPath);
|
|
|
|
if (cnt > 0)
|
|
{
|
|
distance /= cnt;
|
|
}
|
|
|
|
/*
|
|
* write result
|
|
*/
|
|
if (fName3[0])
|
|
{
|
|
if( (output = fopen(fName3, "wt")) == NULL)
|
|
{
|
|
fprintf(stderr, "Can not open file %s\n", fName3);
|
|
return 1;
|
|
}
|
|
}
|
|
fprintf(output, "%f", distance);
|
|
fclose(output);
|
|
|
|
free(data1);
|
|
free(data2);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
*-----------------------------------------------------------------------------
|
|
*
|
|
* Read spectral data
|
|
*
|
|
*-----------------------------------------------------------------------------
|
|
*/
|
|
double *
|
|
ReadInputFile (char *fName, int *nFrames, int *frameSize)
|
|
{
|
|
FILE* fp;
|
|
int i;
|
|
double *data;
|
|
int type;
|
|
|
|
if( (fp = fopen(fName, "rb")) == NULL)
|
|
{
|
|
fprintf(stderr, "Can not open file %s\n", fName);
|
|
return NULL;
|
|
}
|
|
|
|
/* read header */
|
|
fread(&type, sizeof(int), 1, fp);
|
|
fread(frameSize, sizeof(int), 1, fp);
|
|
fread(nFrames, sizeof(int), 1, fp);
|
|
|
|
/* alloc memory */
|
|
data = (double *)malloc((*frameSize) * (*nFrames) * sizeof(double));
|
|
|
|
if (data == NULL)
|
|
{
|
|
fprintf(stderr, "Can not alloc memory \n");
|
|
return NULL;
|
|
}
|
|
|
|
/* read cep data */
|
|
for ( i = 0; i < *nFrames; i++ )
|
|
{
|
|
fread(&data[i * (*frameSize)], sizeof(double), (*frameSize), fp);
|
|
}
|
|
|
|
fclose(fp);
|
|
|
|
return data;
|
|
}
|
|
|
|
float _cdecl ConcatCost (const void *pElem1, const void *pElem2, float fUnitCost)
|
|
{
|
|
// check if it is a valid concat option
|
|
int i1 = (int)pElem1;
|
|
int i2 = (int)pElem2;
|
|
|
|
if (i1 > i2)
|
|
return 9E9f;
|
|
else if (i2-i1 > giMaxShift)
|
|
return 9e9f;
|
|
else
|
|
return fUnitCost*gafConcatCosts[i2-i1];
|
|
}
|
|
float _cdecl UnitCost (const void *pElem1, const int iOrigPos)
|
|
{
|
|
// Just the Euclidean distance
|
|
int iSynthPos = (int)pElem1;
|
|
iSynthPos--;
|
|
assert ((iOrigPos >=0) && (iOrigPos < giOrigLen));
|
|
assert ((iSynthPos >=0) && (iSynthPos < giSynthLen));
|
|
|
|
return (float)EuclideanDist(gadOriginal + iOrigPos * giDim, gadSynth + iSynthPos * giDim, giDim);
|
|
|
|
}
|
|
|
|
|
|
int *FindOptimalPath (double *adOriginal, int iOrigLen, double *adSynth, int iSynthLen, int iDim)
|
|
{
|
|
//
|
|
// Put the appropriate vectors into Viterbi, then call it
|
|
//
|
|
int i, j;
|
|
CViterbi Viterbi;
|
|
float fCost, fMidShift;
|
|
int *aiPath;
|
|
int iStart, iStop;
|
|
|
|
//
|
|
// Find ConcatCosts
|
|
//
|
|
giMaxShift = 2*iSynthLen/iOrigLen + 1;
|
|
fMidShift=(float)iSynthLen/(float)iOrigLen;
|
|
gafConcatCosts = (float *)malloc (sizeof(float)*(giMaxShift+1));
|
|
for (i=0; i <= giMaxShift; i++)
|
|
{
|
|
gafConcatCosts[i] = (float )(1.f+fabs(fMidShift-i)/fMidShift);
|
|
}
|
|
giOrigLen = iOrigLen;
|
|
giSynthLen = iSynthLen;
|
|
giDim = iDim;
|
|
gadOriginal = adOriginal;
|
|
gadSynth = adSynth;
|
|
|
|
Viterbi.Init (iOrigLen, 51);
|
|
|
|
// The passed in position always must be one greater, because 0 is a special tag for the viterbi algorithm
|
|
|
|
// Add endpoint constrants
|
|
Viterbi.Add (0, (void *)1);
|
|
Viterbi.Add (iOrigLen-1, (void *)iSynthLen);
|
|
// Add one more constraint to allow delta calculation
|
|
Viterbi.Add (1, (void *)2);
|
|
Viterbi.Add (iOrigLen-2, (void *)(iSynthLen-1));
|
|
// add intermediate options
|
|
for (i=2; i <= iOrigLen-3; i++)
|
|
{
|
|
// for now, add in 25 frames on either side (a window of .51 seconds total) of average
|
|
iStart = (int)(i*fMidShift-25);
|
|
iStop = (int)(i*fMidShift+25);
|
|
if (iStart < 0)
|
|
{
|
|
iStart = 0;
|
|
}
|
|
if (iStop > iSynthLen)
|
|
{
|
|
iStop=iSynthLen;
|
|
}
|
|
iStart += 1;
|
|
for (j=iStart; j <= iStop; j++)
|
|
{
|
|
Viterbi.Add (i, (void *)j);
|
|
}
|
|
}
|
|
Viterbi.FindBestPath (ConcatCost, UnitCost, &fCost);
|
|
//
|
|
// Best path now in void ** Viterbi.m_rgpBestElems
|
|
//
|
|
free (gafConcatCosts);
|
|
aiPath = (int *)malloc (sizeof(int)*iOrigLen);
|
|
for (i=0; i < iOrigLen; i++)
|
|
{
|
|
aiPath[i] = (int)(Viterbi.m_rgpBestElems[i]) - 1;
|
|
}
|
|
return aiPath;
|
|
}
|