Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
3.8 KiB

  1. /*********************************************************************
  2. Silence.H - Includes to use the code to detect silence.
  3. begun 5/14/94 by Mike Rozak
  4. */
  5. #ifndef _SILENCE_H_
  6. #define _SILENCE_H_
  7. #ifndef _SPEECH_
  8. typedef unsigned _int64 QWORD, *PQWORD;
  9. #endif
  10. /*********************************************************************
  11. Typedefs */
  12. #define SIL_YES (2)
  13. #define SIL_NO (0)
  14. #define SIL_UNKNOWN (1)
  15. // #define SIL_SAMPRATE (11025) // assumed sampling rate
  16. #define PHADD_BEGIN_SILENCE (4) // 1/4 second
  17. #define PCADD_BEGIN_SILENCE (4) // 1/4 second
  18. #define FILTERNUM (1024) // max # samples i nthe filter
  19. #if 0
  20. #define MAXVOICEHZ (300) // maximum voice pitchm in hz
  21. #define PHMAXVOICEHZ (300) // maximum voice pitch in hz (phone)
  22. #endif
  23. #define PHMAXVOICEHZ (500) // maximum voice pitch in hz (phone)
  24. #define PCMAXVOICEHZ (500) // maximum voice pitch in hz (PC)
  25. #define MINVOICEHZ (50) // minimum voice pitch in hz
  26. // Store characteristics of a block
  27. typedef struct {
  28. WORD wMaxLevel;
  29. WORD wMaxDelta;
  30. BYTE bIsVoiced;
  31. BYTE bHighLevel;
  32. BYTE bHighDelta;
  33. } BLOCKCHAR, *PBLOCKCHAR;
  34. // Store information about a block
  35. typedef struct {
  36. short *pSamples; // Sample data, or NULL if empty
  37. DWORD dwNumSamples; // number of samples in block
  38. QWORD qwTimeStamp; // time stamp for block
  39. } BINFO, *PBINFO;
  40. class CSilence {
  41. private:
  42. WORD m_wBlocksPerSec;
  43. WORD m_wBlocksInQueue;
  44. WORD m_wLatestBlock; // points to the last block entered in the circular list
  45. PBINFO m_paBlockInfo;
  46. DWORD m_dwSoundBits;
  47. DWORD m_dwVoicedBits; // turned on if block was voiced
  48. BLOCKCHAR m_bcSilence; // what silence is
  49. BOOL m_fFirstBlock; // TRUE if the next block is the first
  50. // block ever, and used to judge silence, else FALSE
  51. BOOL m_fInUtterance; // TRUE if we're in an utterance
  52. DWORD m_dwUtteranceLength; // Number of frames that utterance has gone on
  53. WORD m_wReaction; // reaction time
  54. WORD m_wNoiseThresh; // noiuse threshhold
  55. short *m_pASFiltered; // pointer to filtered data buffer
  56. WORD m_wAddSilenceDiv;
  57. DWORD m_dwHighFreq;
  58. DWORD m_dwSamplesPerSec;
  59. #ifdef USE_REG_ENG_CTRL
  60. BOOL m_fSilenceDetectEnbl;
  61. BOOL m_fVoiceDetectEnbl;
  62. WORD m_wTimeToCheckDiv;
  63. DWORD m_dwLowFreq;
  64. DWORD m_dwCheckThisManySamples;
  65. DWORD m_dwNumFilteredSamples;
  66. WORD m_wMinConfidenceAdj;
  67. DWORD m_dwLPFShift;
  68. DWORD m_dwLPFWindow;
  69. #endif
  70. public:
  71. CSilence (WORD wBlocksPerSec);
  72. ~CSilence (void);
  73. BOOL Init(BOOL fPhoneOptimized, DWORD dwSamplesPerSec);
  74. BOOL AddBlock (short * pSamples, DWORD dwNumSamples, WORD * wVU,
  75. QWORD qwTimeStamp);
  76. short * GetBlock (DWORD * pdwNumSamples, QWORD * pqwTimeStamp);
  77. void KillUtterance(void);
  78. void NoiseResistSet (WORD wValue)
  79. {
  80. m_wNoiseThresh = wValue;
  81. };
  82. void ReactionTimeSet (DWORD dwTime)
  83. {m_wReaction = (WORD) ((dwTime * m_wBlocksPerSec) / 1000);};
  84. WORD GetBackgroundNoise (void)
  85. {return m_bcSilence.wMaxLevel;};
  86. void ExpectNoiseChange (WORD wValue);
  87. private:
  88. BOOL CSilence::IsSegmentVoiced (short *pSamples, DWORD dwNumSamples,
  89. DWORD dwSamplesPerSec, WORD wMinConfidence, short *asFiltered);
  90. BOOL CSilence::WhatsTheNewState (DWORD dwSoundBits, DWORD dwVoicedBits,
  91. BOOL fWasInUtterance, BOOL fLongUtterance,
  92. WORD wBlocksPerSec, WORD *wStarted, WORD wReaction);
  93. };
  94. typedef CSilence *PCSilence;
  95. WORD NEAR PASCAL TrimMaxAmp(short * lpS, DWORD dwNum);
  96. #endif // _SILENCE_H_