windows-server-2003/enduser/speech/sapi/cpl/silence.cpp

/*********************************************************************
Silence.Cpp - Code for detecting silence on an incoming audio stream

begun 5/14/94 by Mike Rozak
Modified 12/10/96 by John Merrill to fix up alignment problems
*/


#include "stdafx.h"
#include <malloc.h>
#include "silence.h"

// temporary
#pragma warning(disable: 4100 4244) 

/*********************************************************************
LowPassFilter - This low-pass filters 16-bit mono PCM data from one
   buffer into another.

inputs
   short    *lpSrc - Source buffer
   DWORD    dwNumSamples - Number of samples in the source buffer
   short    *lpDst - Destination buffer. This will be filled in
               with a low-passed version. It will have about an 8
               sample lag. This must be as large as lpSrc.
   short    *psMax - Filled in with the new maximum.
               If NULL then nothing is copied.
   short    *psMin - Filled in with the new minimum
               If NULL then nothing is copied.
   short    *psAvg - Filled in with the new average
               If NULL then nothing is copied.
   DWORD    dwSamplesPerSec
returns
   DWORD - Number of samples returned. This will be <= dwNumSamples,
      possible dwNumSamples - 7.
*/
DWORD LowPassFilter (short *lpSrc, DWORD dwNumSamples, short *lpDst,
   short *psMax, short *psMin, short *psAvg, DWORD dwSamplesPerSec)
{
    SPDBG_FUNC( "LowPassFilter" );
DWORD    i;
long     lSum;
short    sSum, sMax, sMin;
short    *lpLag;
BOOL     fLow = (dwSamplesPerSec < 13000);

#define  SHIFTRIGHT        (fLow ? 3 : 4)   // # bits to shift right.
#define  WINDOWSIZE        (1 << SHIFTRIGHT)   // # samples

if (dwNumSamples < (DWORD) (WINDOWSIZE+1))
   return 0;

// take the first 8 samples and average them together.
lSum = 0;
for (i = 0; i < (DWORD) WINDOWSIZE; i++)
   lSum += lpSrc[i];
sSum = (short) (lSum >> SHIFTRIGHT);

//loop through the rest of the samples
lpLag = lpSrc;
lpSrc += WINDOWSIZE;
dwNumSamples -= WINDOWSIZE;
lSum = 0;   // total
sMax = -32768;
sMin = 32767;
for (i = 0;dwNumSamples; lpSrc++, lpDst++, lpLag++, i++, dwNumSamples--) {
   sSum = sSum - (*lpLag >> SHIFTRIGHT) + (*lpSrc >> SHIFTRIGHT);
   // sSum = *lpSrc; // Dont do any filtering at all
   *lpDst = sSum;
   lSum += sSum;
   if (sSum > sMax)
      sMax = sSum;
   if (sSum < sMin)
      sMin = sSum;
   };

// whow much did we do
if (psMax)
   *psMax = sMax;
if (psMin)
   *psMin = sMin;
if (psAvg && i)
   *psAvg = (short) (lSum / (long) i);
return i;
}


/*********************************************************************
QuantSamples - This quantizes the samples to +1, 0, or -1 (in place),
   depedning if the given value is:
      > sPositive then +1
      < sNegative then -1
      else 0

inputs
   short       *pSamples - Samples
   DWORD       dwNumSamples - Number of samples
   short       sPositive - Positive threshhold
   short       sNegative - Negative threshhold
returns
   none
*/
void QuantSamples (short *pSamples, DWORD dwNumSamples,
   short sPositive, short sNegative)
{
    SPDBG_FUNC( "QuantSamples" );
while (dwNumSamples) {
   if (*pSamples > sPositive)
      *pSamples = 1;
   else if (*pSamples < sNegative)
      *pSamples = -1;
   else
      *pSamples = 0;
   pSamples++;
   dwNumSamples--;
   };
}

/*********************************************************************
FindZC - This searches through the samples for the first zero crossing.
   The returned point will have its previous sample at <= 0, and the
   new one at >0.

inputs
   short       *pSamples - Samples;
   DWORD       dwNumSamples - Number of samples
returns
   DWORD - first sampe number which is positive, or 0 if cant find
*/
DWORD FindZC (short *pSamples, DWORD dwNumSamples)
{
    SPDBG_FUNC( "FindZC" );
DWORD i;

for (i = 1; i < dwNumSamples; i++)
   if ((pSamples[i] > 0) && (pSamples[i-1] <= 0))
      return i;

// else cant find
return 0;
}


/*********************************************************************
CompareSegments - This compares two wave segments and sees how much
   alike they are, returning a confidence that they are the same.

inputs
   short       *pA - Samples. This assumes that the samples
                  are -1, 0, or +1.
   short       *pB - Samples for B. Should be -1, 0, or +1
   DWORD       dwNumSamples - Number of samples in each of them
returns
   WORD - Confidence from 0 to 0xffff (highest confidence)

Notes about the algo: Each sample will score a "similarity point"
for like signs, or if one of the values is a 0.
*/
WORD CompareSegments (short *pA, short *pB, DWORD dwNumSamples)
{
    SPDBG_FUNC( "CompareSegments" );
DWORD    dwSimilar = 0;
DWORD    dwLeft;

for (dwLeft = dwNumSamples; dwLeft; pA++, pB++, dwLeft--)
   if ((*pA == *pB) || (*pA == 0) || (*pB == 0))
      dwSimilar++;

return (WORD) ((dwSimilar * 0xffff) / dwNumSamples);
}


/*********************************************************************
FindMostLikelyWaveLen - This Searches through wave data and finds the
   most likeley wavelength for voiced audio. it returns a condifence
   score from 0 to ffff (ffff is 100% positive).

inputs
   short       *pSamples - Samples
   DWORD       dwNumSamples - Number of samples
   DWORD       dwMinWaveLen - Minimum accepatble wavelength
   DWORD       dwMaxWaveLen - Maximum acceptable wavelength
   WORD       *pwConfidence - Filled in with confidence rating.
returns
   DWORD - Wavelength found. 0 if can't deteermine anything
*/
DWORD FindMostLikelyWaveLen (short *pSamples, DWORD dwNumSamples,
   DWORD dwMinWaveLen, DWORD dwMaxWaveLen, WORD *pwConfidence)
{
    SPDBG_FUNC( "FindMostLikelyWaveLen" );
#define     NUMCOMP     (3)
DWORD    dwFirstZC, i;
DWORD    dwBestWaveLen;
WORD     wBestConfidence;
DWORD    dwCurZC, dwCurWaveLen, dwTemp;
WORD     wConf, wTemp;

// Step one, find the first zero crossing
dwFirstZC = FindZC (pSamples, dwNumSamples);
if (!dwFirstZC)
   return 0;   // error

// Start at a minimum-wavelength away and start finding a wave
// which repeats three times and compares well.
dwBestWaveLen = 0;   // best wavelength found so far
wBestConfidence = 0; // confidence of the best wavelength
dwCurWaveLen = dwMinWaveLen;
while (dwCurWaveLen <= dwMaxWaveLen) {
   // Try the first comparison
   dwCurZC = dwFirstZC + dwCurWaveLen;
   if (dwCurZC >= dwNumSamples)
      break;   // no more samples left

   // find first zero crossing from the current wavelen
   dwTemp = FindZC (pSamples + dwCurZC, dwNumSamples - dwCurZC);
   if (!dwTemp)
      break;      // no more samples left
   dwCurZC += dwTemp;
   dwCurWaveLen += dwTemp;

   // Make sure that we have three wavelength's worth
   if ((dwFirstZC + (NUMCOMP+1)*dwCurWaveLen) >= dwNumSamples)
      break;   // cant compare this

   // Do two confidence tests and multiply them toegther to
   // get the confidence for this wavelength
   wConf = 0xffff;
   for (i = 0; i < NUMCOMP; i++) {
      wTemp = CompareSegments (pSamples + dwFirstZC /* + i * dwCurWaveLen */,
         pSamples + (dwFirstZC + (i+1) * dwCurWaveLen), dwCurWaveLen);
      wConf = (WORD) (((DWORD) wConf * (DWORD) wTemp) >> 16);
      };

   // If we're more confident about this one than others then use it
   if (wConf >= wBestConfidence) {
      wBestConfidence = wConf;
      dwBestWaveLen = dwCurWaveLen;
      };

   // Up the current wavelength just a tad
   dwCurWaveLen++;
   };

*pwConfidence = wBestConfidence;
return dwBestWaveLen;
}

/*********************************************************************
IsSegmentVoiced - This detects if the segment if voiced or not.

inputs
   short       *pSamples - Sample data
   DWORD       dwNumSamples - number of samples
   DWORD       dwSamplesPerSec - Number of sample sper second
   WORD        wMinConfidence - Minimum condifence
returns
   BOOL - TRUE if its definately voiced, FALSE if not or cant tell
*/

BOOL CSilence::IsSegmentVoiced (short *pSamples, DWORD dwNumSamples,
   DWORD dwSamplesPerSec, WORD wMinConfidence, short *asFiltered)
{
    SPDBG_FUNC( "CSilence::IsSegmentVoiced" );
//#define     FILTERNUM      (1024)      // max # samples i nthe filter
//#define     MAXVOICEHZ     (300)       // maximum voicce pitchm in hz
//#define     MINVOICEHZ     (50)        // minimum voice pitch in hz
// #define     MINCONFIDENCE  (0x6000)    // minimum confidence
   // This means that 70% of the samples line up from one wavelength
   // to another

DWORD    dwNumFilter;
//short    asFiltered[FILTERNUM];
short    sMax, sMin, sAvg;
DWORD    dwWaveLen;
WORD     wConfidence;
short    sPositive, sNegative;

// Filter it first so we just get the voiced audio range
if (dwNumSamples > FILTERNUM)
   dwNumSamples = FILTERNUM;
dwNumFilter = LowPassFilter (pSamples, dwNumSamples, asFiltered,
  &sMax, &sMin, &sAvg, m_dwSamplesPerSec);

// Truncate the wave samples to +1, 0, -1
sPositive = sAvg;
sNegative =  sAvg;
QuantSamples (asFiltered, dwNumFilter, sPositive, sNegative);

// look through the voiced wavelengths for a frequency
dwWaveLen = FindMostLikelyWaveLen (asFiltered, dwNumFilter,
   dwSamplesPerSec / m_dwHighFreq, dwSamplesPerSec / MINVOICEHZ,
   &wConfidence);

return (dwWaveLen && (wConfidence >= wMinConfidence));
}


/*********************************************************************
TrimMaxAmp - This extracts the maximum amplitude range of the wave file
	segment.

inputs
	short *	lpS - samples to look through
	WORD		dwNum - number of samples
returns
	WORD - maximum amplitude range
*/
WORD NEAR PASCAL TrimMaxAmp (short * lpS, DWORD dwNum)
{
    SPDBG_FUNC( "TrimMaxAmp" );
DWORD i;
short	sMin, sMax, sTemp;

sMin = 32767;
sMax = (short) -32768;
for (i = dwNum; i; i--) {
   sTemp = *(lpS++);
	if (sTemp < sMin)
		sMin = sTemp;
	if (sTemp > sMax)
		sMax = sTemp;
	};

// If we're clipping at all then claim that we've maxed out.
// Some sound cards have bad DC offsets
if ((sMax >= 0x7f00) || (sMin <= -0x7f00))
   return 0xffff;

return (WORD) (sMax - sMin);
}

/********************************************************************
TrimMaxAmpDelta - This extracts the maximum amplitude range and 
                  calculates the maximum delta of the wave file
	               segment.

inputs
   PBLOCKCHAR  pBlockChar - Pointer to a block characteristic
            structure which is filled in. 
	short *	lpS - deltas to look through
	WORD		dwNum - number of samples
returns
	nothing
*/
void TrimMaxAmpDelta(PBLOCKCHAR pBlockChar, short *lpS, DWORD dwNum)
{
    SPDBG_FUNC( "TrimMaxAmpDelta" );
   DWORD i;
   WORD wMax = 0;
   WORD wTemp;
   short sMin, sMax, sCur, sLast;

   // BUGFIX:  4303 Merge TrimMaxAmp and TrimMaxDelta
   sLast = sMin = sMax = *(lpS++);
   for (i = dwNum - 1; i; i--, sLast = sCur) {
      sCur = *(lpS++);
      // TrimMaxAmp
      if (sCur < sMin)
         sMin = sCur;
      if (sCur > sMax)
         sMax = sCur;

      // TrimMaxDelta
      wTemp = sCur > sLast ? (WORD) (sCur - sLast) : (WORD) (sLast - sCur);
      if (wTemp > wMax)
         wMax = wTemp;

   }
   // If we're clipping at all then claim that we've maxed out.
   // Some sound cards have bad DC offsets
   pBlockChar->wMaxLevel = ((sMax >= 0x7F00) || (sMin <= -0x7F00)) ? 0xFFFF : (WORD) (sMax - sMin);
   pBlockChar->wMaxDelta = wMax;
} /* End of TrimMaxAmpDelta() */ 

         
/*********************************************************************
GetBlockChar - This gets the characteristics of a block of audio.
   This characteristics can then be used to determine if the block
   is silent or not.

inputs
   short    *lpS - sample data
   DWORD    dwNum - number of samples
   PBLOCKCHAR  pBlockChar - Pointer to a block characteristic
            structure which is filled in. 
   BOOL     fTestVoiced - Voicce testing will only be done if
            this is TTRUE (in order to save processor).
returns
   none
*/
void GetBlockChar(short *lpS, DWORD dwNum, PBLOCKCHAR pBlockChar, BOOL fTestVoiced)
{
    SPDBG_FUNC( "GetBlockChar" );
   // BUGFIX:  4303 Merge TrimMaxAmp and TrimMaxDelta
   TrimMaxAmpDelta(pBlockChar, lpS, dwNum);
   pBlockChar->bIsVoiced = pBlockChar->bHighLevel =
      pBlockChar->bHighDelta = SIL_UNKNOWN;
}


/*********************************************************************
IsBlockSound - This detects whether the block is silent or not.

inputs
   PBLOCKCHAR  pBlockInQuestion - Block in question. This has the
      bHighLevel and bHighDelta flags modified
   PBLOCKCHAR  pBlockSilence - Silent block  
   BOOL        fInUtterance - TRUE if we're in an utterance (which
            means be more sensative), FALSE if we're not
returns
   BOOL - TTRUE if has sound, FALSE if it is silent
*/
BOOL IsBlockSound (PBLOCKCHAR pBlockInQuestion, PBLOCKCHAR pBlockSilence,
   BOOL fInUtterance)
{
    SPDBG_FUNC( "IsBlockSound" );
#ifdef SOFTEND // Use so that catches a soft ending to phrases
#define     SENSINV_THRESHHOLD_LEVEL(x)     (((x)/4)*3)
#define     SENSINV_THRESHHOLD_DELTA(x)     (((x)/4)*3)
#else
#define     SENSINV_THRESHHOLD_LEVEL(x)     ((x)/2)
#define     SENSINV_THRESHHOLD_DELTA(x)     ((x)/2)
#endif
#define     NORMINV_THRESHHOLD_LEVEL(x)     ((x)/2)
#define     NORMINV_THRESHHOLD_DELTA(x)     ((x)/2)

if (fInUtterance) {
   pBlockInQuestion->bHighLevel =
      SENSINV_THRESHHOLD_LEVEL(pBlockInQuestion->wMaxLevel) >= pBlockSilence->wMaxLevel;
   pBlockInQuestion->bHighDelta =
      SENSINV_THRESHHOLD_DELTA(pBlockInQuestion->wMaxDelta) >= pBlockSilence->wMaxDelta;
   }
else {
   pBlockInQuestion->bHighLevel =
      NORMINV_THRESHHOLD_LEVEL(pBlockInQuestion->wMaxLevel) >= pBlockSilence->wMaxLevel;
   pBlockInQuestion->bHighDelta =
      NORMINV_THRESHHOLD_DELTA(pBlockInQuestion->wMaxDelta) >= pBlockSilence->wMaxDelta;
   };


return pBlockInQuestion->bHighLevel || pBlockInQuestion->bHighDelta;
}


/*********************************************************************
ReEvaluateSilence - This takes the values used for silence and re-evaluates
   them based upon new data which indicates what silence is. It
   automatically adjusts to the noise level in the room over a few seconds.
   NOTE: This should not be called when an utterance is happening, or
   when it might be starting.

inputs
   PBLOCKCHAR     pSilence - This is the silence block, and should
                     start out with values in it. It will be modified
                     so to incorporate the new silence information.
   PBLOCKCHAR     pNew - New block which is known to be silence.
   BYTE           bWeight - This is the weighting of the new block
                     in influencing the old block, in a value from 0 to 255.
                     256 means that the value of the new silence completely
                     overpowers the old one, 0 means that it doesnt have
                     any affect.
returns
   none
*/
void ReEvaluateSilence (PBLOCKCHAR pSilence, PBLOCKCHAR pNew,
   BYTE bWeight)
{
    SPDBG_FUNC( "ReEvaluateSilence" );
#define  ADJUST(wOrig,wNew,bWt)                 \
   (WORD) ((                                    \
      ((DWORD) (wOrig) * (DWORD) (256 - (bWt))) + \
      ((DWORD) (wNew) * (DWORD) (bWt))          \
      ) >> 8);

pSilence->wMaxLevel = ADJUST (pSilence->wMaxLevel,
   pNew->wMaxLevel, bWeight);
pSilence->wMaxDelta = ADJUST (pSilence->wMaxDelta,
   pNew->wMaxDelta, bWeight);

// If it's way too silence (and too good to be true) then assume
// a default silece
// if (!pNew->wMaxLevel && !pNew->wMaxDelta) {
//   if (pSilence->wMaxLevel < 2500)
//      pSilence->wMaxLevel = 2500;
//   if (pSilence->wMaxDelta < 400)
//       pSilence->wMaxDelta = 400;
//   }
}

/*********************************************************************
WhatsTheNewState - This takes in a stream of bit-field indicating which
   of the last 32 blocks were detected as having sound, and what our
   state was the last time this was called (utterance or not). It then
   figureous out if we're still in an utterance, or we just entered one.
   It also says how many buffers ago that was.

inputs
   DWORD    dwSoundBits - This is a bit-field of the last 32
               audio blocks. A 1 in the field indicates that there was
               sound there, a 0 indicates no sound. The low bit
               corresponds to the most recent block, and high bit
               the oldest.
   DWORD    dwVoicedBits - Just like sound bits except that it indicates
               voiced sections of sound.
   BOOL     fWasInUtterance - This is true is we had an utterance
               the last time this called, FALSE if there was silence
   BOOL     fLongUtterance - If this is a long utterance then dont
               react for 1/4 second, otherwise use 1/8 second for
               short utterance
   WORD     wBlocksPerSec - How many of the above-mentioned blocks
               fit into a second.
   WORD     *wStarted - If a transition occurs from no utterance to
               an utterance, then this fills in the number of of blocks
               ago that the utterance started, into *wStarted. Otherwise
               it is not changed.
   WORD     wReaction - Reaction time (in blocks) after an utterance is
               finished
returns
   BOOL - TRUE if we're in an utterance now,  FALSE if we're in silence
*/

BOOL CSilence::WhatsTheNewState (DWORD dwSoundBits, DWORD dwVoicedBits,
   BOOL fWasInUtterance, BOOL fLongUtterance,
   WORD wBlocksPerSec, WORD *wStarted, WORD wReaction)
{
    SPDBG_FUNC( "CSilence::WhatsTheNewState" );
WORD wCount, wOneBits;
WORD  wTimeToCheck;
DWORD dwTemp, dwMask;

if (fWasInUtterance)
   wTimeToCheck = wReaction;
else
   wTimeToCheck = (wBlocksPerSec/4);   // 1/4 second
if (!wTimeToCheck)
   wTimeToCheck = 1;


for (wOneBits = 0, wCount = wTimeToCheck, dwTemp = dwSoundBits;
      wCount;
      dwTemp /= 2, wCount--)
   if (dwTemp & 0x01)
      wOneBits++;

if (fWasInUtterance) {
   // If we were in an utterance, then we still are in an utterance
   // UNLESS the number of bits which are turned on for the last
   // 0.5 seconds is less that 1/4 of what should be turned on.
   if ( (wOneBits >= 1))
      return TRUE;
   else
      return FALSE;
   }
else {
   // We are in silence. We cannot possible go into an utterance
   // until the current block is voicced
   if (!(dwVoicedBits & 0x01))
      return FALSE;

   // If we were in silence then we're still in silence
   // UNLESS the number of bits which are turned on for the last
   // 0.5 seconds is more than 1/2 of what should be turned on.
   // If so, then start the utterance 0.75 seconds ago.
   if (wOneBits >= (wTimeToCheck / 2)) {
      // we're not in an utterance

      // Look back until get 1/8 second of silence, and include
      // that in the data returned
      dwTemp = dwSoundBits;
 //     dwMask = (1 << (wBlocksPerSec / 8)) - 1;
 //     for (wCount = wBlocksPerSec/8; dwTemp & dwMask; dwTemp >>= 1, wCount++);
      dwMask = (1 << (wBlocksPerSec / m_wAddSilenceDiv)) - 1;
      for (wCount = wBlocksPerSec/m_wAddSilenceDiv; dwTemp & dwMask; dwTemp >>= 1, wCount++);

      *wStarted = wCount;

      return TRUE;
      }
   else
      return FALSE;
   };

}


/*********************************************************************
CSilence::CSilence - This creates the silence class.

inputs
   WORD     wBlocksPerSec - Number of blocks per second. The blocks
               will be passed down through AddBlock().
returns
   class
*/
CSilence::CSilence (WORD wBlocksPerSec)
{
    SPDBG_FUNC( "CSilence::CSilence" );
m_wBlocksPerSec = min(wBlocksPerSec, 32); // no more than the # bits in a DWORD
m_wBlocksInQueue = m_wBlocksPerSec;   // 1 second worth.
m_wLatestBlock = 0;
m_paBlockInfo = NULL;
m_dwSoundBits = m_dwVoicedBits = 0;
m_fFirstBlock = TRUE;
m_fInUtterance = FALSE;
m_dwUtteranceLength = 0;
m_dwSamplesPerSec = 11025;
}

/*********************************************************************
CSilence::~CSilence - Free up everything.
*/
CSilence::~CSilence (void)
{
    SPDBG_FUNC( "CSilence::~CSilence" );
   WORD  i;

   if (m_paBlockInfo) {
      for (i = 0; i < m_wBlocksInQueue; i++)
         if (m_paBlockInfo[i].pSamples)
            free(m_paBlockInfo[i].pSamples);
      free(m_paBlockInfo);
   }

   if (m_pASFiltered)
      free(m_pASFiltered);
}

/*********************************************************************
CSilence::Init - This initializes the silence code. It basically
   allocates memory. It should be called immediately after the object
   is created and then not again.

inputs
   none
returns
   BOOL - TRUE if succeded, else out of memory
*/
BOOL CSilence::Init(BOOL fPhoneOptimized, DWORD dwSamplesPerSec)
{
    SPDBG_FUNC( "CSilence::Init" );
   m_dwSamplesPerSec = dwSamplesPerSec;
   if (fPhoneOptimized) {
   	m_wAddSilenceDiv = (WORD) PHADD_BEGIN_SILENCE;
	   m_dwHighFreq = PHMAXVOICEHZ;
	}
   else {
   	m_wAddSilenceDiv = (WORD) PCADD_BEGIN_SILENCE;
	   m_dwHighFreq = PCMAXVOICEHZ;
	}
   if ((m_pASFiltered = (short *) malloc((sizeof(short)) * FILTERNUM)) == NULL)
	   return (FALSE);

   // Initialize memory for the blocks and clear it.
   if (m_paBlockInfo)
      return (TRUE);
   m_paBlockInfo = (PBINFO) malloc(m_wBlocksInQueue * sizeof(BINFO));
   if (!m_paBlockInfo)
      return (FALSE);
   if (m_wBlocksInQueue && m_paBlockInfo)
      memset(m_paBlockInfo, 0, m_wBlocksInQueue * sizeof(BINFO));
   return (TRUE);
} /* End of Init() */

/*********************************************************************
CSilence::AddBlock - This does the following:
   - Add the block the the queue. Free up an old block if needed.
      The block should be 1/wBlocksPerSec long (about).
   - Analyze the block to see if its got sound or is quiet.
   - Fill in *wVU with a VU level.
   - Return TRUE if we're in an utterance, FALSE if its silence now.
      If TRUE then app should call GetBlock() until no more blocks left,
      and pass them to the SR engine.

inputs
   short    *pSamples - Pointer to samples. This memory should
               be allocaed with malloc(), and may be freed by the
               object.
   DWORD    dwNumSamples - Number of samples
   WORD     *wVU - This is fille in with the VU meter for the block
   QWORD	qwTimeStamp - Time stamp for this buffer.
returns
   BOOL - TRUE if an utterance is taking place, FALSE if its silent
*/
BOOL CSilence::AddBlock (short *pSamples, DWORD dwNumSamples,
   WORD *wVU, QWORD qwTimeStamp)
{
    SPDBG_FUNC( "CSilence::AddBlock" );
BLOCKCHAR      bcNew;
BOOL           fSound, fUtt;
PBINFO         pbInfo;
WORD           wUttStart, i;

// Dont add empty blocks
if (!dwNumSamples) {
   if (pSamples)
      free (pSamples);
   return m_fInUtterance;
   };

// Analyze the block for characteristics.
GetBlockChar (pSamples, dwNumSamples, &bcNew, !m_fInUtterance);

// fill in the vu
*wVU = bcNew.wMaxLevel;

// see if it's silent or not
if (m_fFirstBlock) {
   // first block, so of course its silent
   m_bcSilence = bcNew;
   m_fFirstBlock = FALSE;
   fSound = FALSE;

   // BUGFIX 2466 - If it's way too silence (and too good to be true) then assume
   // a default silece
   if ((m_bcSilence.wMaxLevel < 500) || (m_bcSilence.wMaxDelta < 100)) {
      m_bcSilence.wMaxLevel = 2500;
      m_bcSilence.wMaxDelta = 400;
      };

   // If it's way too loud then cut down
   if ((m_bcSilence.wMaxLevel > 2500) || (m_bcSilence.wMaxDelta > 1500)) {
      m_bcSilence.wMaxLevel = min (m_bcSilence.wMaxLevel, 2500);
      m_bcSilence.wMaxDelta = min (m_bcSilence.wMaxDelta, 1500);
      };
   }
else {
   fSound = IsBlockSound (&bcNew, &m_bcSilence, m_fInUtterance);
   };

// Test to see if the block is voiced if:
//    - The amplitude level is more than background sound
//    - We're not yet in an utterance (to save processor)
if (bcNew.bHighLevel && !m_fInUtterance) {
   WORD  wNoise;
   wNoise = (m_dwSamplesPerSec <= 13000) ?
               m_wNoiseThresh :
               ((m_wNoiseThresh / 3) * 2);

   bcNew.bIsVoiced = this->IsSegmentVoiced (pSamples, dwNumSamples, m_dwSamplesPerSec, wNoise, m_pASFiltered) ?
      SIL_YES : SIL_NO;
}

// add the block
m_dwVoicedBits = (m_dwVoicedBits << 1) |
   ( (bcNew.bIsVoiced  == SIL_YES) ? 1 : 0 );
m_dwSoundBits = (m_dwSoundBits << 1) | (fSound ? 1 : 0);
m_wLatestBlock++;
if (m_wLatestBlock >= m_wBlocksInQueue)
   m_wLatestBlock = 0;
pbInfo = m_paBlockInfo + m_wLatestBlock;
if (pbInfo->pSamples)
   free (pbInfo->pSamples);
pbInfo->pSamples = pSamples;
pbInfo->dwNumSamples = dwNumSamples;

// BUGFIX: Alignment code.  We need to store the timestamp for
// the BEGINNING of the block, not the end!

pbInfo->qwTimeStamp = qwTimeStamp - dwNumSamples * sizeof(WORD);

// What's our utterance state?
fUtt = this->WhatsTheNewState (m_dwSoundBits, m_dwVoicedBits, m_fInUtterance,
   m_dwUtteranceLength >= m_wBlocksPerSec,
   m_wBlocksPerSec, &wUttStart, m_wReaction);
if (fUtt && !m_fInUtterance) {
   // We just entered an utterance, so wUttStart has a valid teerm
   // in it. Go through the buffer queue and free all buffers which
   // are older than wUttStart. Remembeer, this is a circular buffer
   for (i = 0; i < (m_wBlocksInQueue - wUttStart); i++) {
      pbInfo = m_paBlockInfo +
         ( (m_wLatestBlock + i + 1) % m_wBlocksInQueue);
      if (pbInfo->pSamples)
         free (pbInfo->pSamples);
      pbInfo->pSamples = NULL;
      };

   // Since we just entered an utterance clear the utterance length counter
   m_dwUtteranceLength = 0;
   };
m_fInUtterance = fUtt;

// Remember how long this utterance has done on. Long utterances
// deserve more patience as far as silence goes
m_dwUtteranceLength++;

// Adjust the silence level if we're not in an utterance
// Requiring !fSound so that we dont accidentally indclude any
// utterance sections in the sound calculations
if (!m_fInUtterance /* && !fSound */) {
   ReEvaluateSilence (&m_bcSilence, &bcNew,
      255 / m_wBlocksPerSec);
   }
else if (m_dwUtteranceLength >= ((DWORD)m_wBlocksPerSec * 30))
   // if we have a very long utterance (> 30 second) then it's not
   ReEvaluateSilence (&m_bcSilence, &bcNew, 255 / m_wBlocksPerSec);

// done
return m_fInUtterance;
}

/*********************************************************************
CSilence::ExpectNoiseChange - Sent to the silence detection algorithm
   when it should expect the noise floor to go up/down.

inputs
   WORD     wValue - Amount that noise floor should change.
               0x100 = no change. > 0x100 => louder, < 0x100 => quieter
returns
*/
void CSilence::ExpectNoiseChange (WORD wValue)
{
    SPDBG_FUNC( "CSilence::ExpectNoiseChange" );
DWORD dwTemp;

dwTemp = ((DWORD) m_bcSilence.wMaxLevel * wValue) >> 8;
if (dwTemp > 0xffff)
   dwTemp = 0xffff;
m_bcSilence.wMaxLevel = (WORD) dwTemp;

dwTemp = ((DWORD) m_bcSilence.wMaxDelta * wValue) >> 8;
if (dwTemp > 0xffff)
   dwTemp = 0xffff;
m_bcSilence.wMaxDelta = (WORD) dwTemp;
}

/*********************************************************************
CSilence::GetBlock - This gets a block from the queue. This will fail
   if there are no more blocks left to get OR if there's not utterance.

inputs
   DWORD    *pdwNumSamples - If a block is returned then this
            will be filled in with the number of samples in the block.	 
	QWORD	*pqwTimeStamp - Filled in woth the time-stamp for the
			buffer.
returns
   short * - Pointer to a block of samples. This memory is the
         caller's property and can be freed with free().
*/
short * CSilence::GetBlock (DWORD *pdwNumSamples, QWORD * pqwTimeStamp)
{
    SPDBG_FUNC( "CSilence::GetBlock" );
PBINFO         pbInfo;
WORD           i, wCount;
short          *pSamples;

if (!m_fInUtterance)
   return NULL;

// find the first occurance
i = (m_wLatestBlock + 1) % m_wBlocksInQueue;
for (wCount = m_wBlocksInQueue; wCount;
      i = ((i < (m_wBlocksInQueue-1)) ? (i+1) : 0), wCount-- ) {
   pbInfo = m_paBlockInfo + i;
   if (pbInfo->pSamples) {
      *pdwNumSamples = pbInfo->dwNumSamples;
	  *pqwTimeStamp = pbInfo->qwTimeStamp;
      pSamples = pbInfo->pSamples;
      pbInfo->pSamples = NULL;

      return pSamples;
      };
   };

// if got here then couldnt find anything
return NULL;
}

/*********************************************************************
CSilence::KillUtterance - Kills an exitsing utterance.

inputs
   none
returns
   none
*/
void CSilence::KillUtterance (void)
{
    SPDBG_FUNC( "CSilence::KillUtterance" );
m_fInUtterance = FALSE;
m_dwSoundBits = 0;
m_dwVoicedBits = 0;
}