Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1387 lines
44 KiB

  1. /*==========================================================================
  2. *
  3. * Copyright (C) 1999 Microsoft Corporation. All Rights Reserved.
  4. *
  5. * File: agcva1.cpp
  6. * Content: Concrete class that implements CAutoGainControl
  7. *
  8. * History:
  9. * Date By Reason
  10. * ==== == ======
  11. * 12/01/99 pnewson Created it
  12. * 01/14/2000 rodtoll Plugged memory leak
  13. * 01/21/2000 pnewson Fixed false detection at start of audio stream
  14. * Raised VA_LOW_ENVELOPE from (2<<8) to (3<<8)
  15. * 01/24/2000 pnewson Fixed return code on Deinit
  16. * 01/31/2000 pnewson re-add support for absence of DVCLIENTCONFIG_AUTOSENSITIVITY flag
  17. * 02/08/2000 rodtoll Bug #131496 - Selecting DVSENSITIVITY_DEFAULT results in voice
  18. * never being detected
  19. * 03/03/2000 rodtoll Updated to handle alternative gamevoice build.
  20. * 04/20/2000 rodtoll Bug #32889 - Unable to run on non-admin accounts on Win2k
  21. * 04/20/2000 pnewson Tune AGC algorithm to make it more agressive at
  22. * raising the recording volume.
  23. * 04/25/2000 pnewson Fix to improve responsiveness of AGC when volume level too low
  24. * 12/07/2000 rodtoll WinBugs #48379: DPVOICE: AGC appears to be functioning incorrectly (restoring to old algorithm(
  25. *
  26. ***************************************************************************/
  27. #include "dxvutilspch.h"
  28. /*
  29. How this voice activation code works:
  30. The idea is this. The power of the noise signal is pretty much constant over
  31. time. The power of a voice signal varies considerably over time. The power of
  32. a voice signal is not always high however. Weak frictive noises and such do not
  33. generate much power, but since they are part of a stream of speech, they represent
  34. a dip in the power, not a constant low power like the noise signal. We therefore
  35. associate changes in power with the presence of a voice signal.
  36. If it works as expected, this will allow us to detect voice activity even
  37. when the input volume, and therefore the total power of the signal, is very
  38. low. This in turn will allow the auto gain control code to be more effective.
  39. To estimate the power of the signal, we run the absolute value of the input signal
  40. through a recursive digital low pass filter. This gives us the "envelope" signal.
  41. [An alternative way to view this is a low frequency envelope signal modulated by a
  42. higher frequency carrier signal. We're extracting the low frequency envelope signal.]
  43. */
  44. #undef DPF_SUBCOMP
  45. #define DPF_SUBCOMP DN_SUBCOMP_VOICE
  46. // the registry names where the AGC stuff is saved
  47. #define DPVOICE_REGISTRY_SAVEDAGCLEVEL L"SavedAGCLevel"
  48. // AGC_VOLUME_TICKSIZE
  49. //
  50. // The amount the recording volume should be changed
  51. // when AGC determines it is required.
  52. #define AGC_VOLUME_TICKSIZE 100
  53. /*
  54. // AGC_VOLUME_UPTICK
  55. //
  56. // The amount the recording volume should be increased
  57. // when the input level has been too low for a while.
  58. #define AGC_VOLUME_UPTICK 125
  59. // AGC_VOLUME_DOWNTICK
  60. //
  61. // The amount the recording volume should be increased
  62. // when the input level has been too high for a while.
  63. #define AGC_VOLUME_DOWNTICK 250
  64. */
  65. // AGC_VOLUME_INITIAL_UPTICK
  66. //
  67. // When the AGC level is loaded from the registry, this
  68. // amount is added to it as an initial boost, since it
  69. // is much easier and faster to lower the recording level
  70. // via AGC than it is to raise it.
  71. #define AGC_VOLUME_INITIAL_UPTICK 500
  72. // AGC_VOLUME_MINIMUM
  73. //
  74. // The minimum volume setting allowed.
  75. // Make sure it's above 0, this mutes some cards
  76. #define AGC_VOLUME_MINIMUM (DSBVOLUME_MIN+AGC_VOLUME_TICKSIZE)
  77. // AGC_VOLUME_MAXIMUM
  78. //
  79. // The maximum volume setting allowed.
  80. #define AGC_VOLUME_MAXIMUM DSBVOLUME_MAX
  81. // AGC_VOLUME_LEVELS
  82. //
  83. // How many possible volume levels are there?
  84. #define AGC_VOLUME_LEVELS ((DV_ABS(AGC_VOLUME_MAXIMUM - AGC_VOLUME_MINIMUM) / AGC_VOLUME_TICKSIZE) + 1)
  85. /*
  86. // AGC_REDUCTION_THRESHOLD
  87. //
  88. // The peak level at which the recording volume
  89. // must be reduced
  90. #define AGC_REDUCTION_THRESHOLD 98
  91. // AGC_INCREASE_THRESHOLD
  92. //
  93. // If the user's input remains under this threshold
  94. // for an extended period of time, we will consider
  95. // raising the input level.
  96. #define AGC_INCREASE_THRESHOLD 70
  97. // AGC_INCREASE_THRESHOLD_TIME
  98. //
  99. // How long must the input remain uner the increase
  100. // threshold to trigger in increase? (measured
  101. // in milliseconds
  102. #define AGC_INCREASE_THRESHOLD_TIME 500
  103. */
  104. // AGC_PEAK_CLIPPING_THRESHOLD
  105. //
  106. // The peak value at or above which we consider the
  107. // input signal to be clipping.
  108. #define AGC_PEAK_CLIPPING_THRESHOLD 0x7e00
  109. /*
  110. // AGC_ENV_CLIPPING_THRESHOLD
  111. //
  112. // When we detect clipping via the threshold above,
  113. // the 16 bit normalized envelope signal must be above
  114. // this threshold for us to lower the input volume.
  115. // This allows us to ignore intermittent spikes in
  116. // the input.
  117. #define AGC_ENV_CLIPPING_THRESHOLD 0x2000
  118. // AGC_ENV_CLIPPING_COUNT_THRESHOLD
  119. //
  120. // For how many envelope samples does the envelope
  121. // signal need to stay above the threshold value
  122. // above in order to take the volume down a tick?
  123. #define AGC_ENV_CLIPPING_COUNT_THRESHOLD 10
  124. */
  125. // AGC_IDEAL_CLIPPING_RATIO
  126. //
  127. // What is the ideal ratio of clipped to total samples?
  128. // E.g. a value of 0.005 says that we would like 5 out of
  129. // every 1000 samples to clip. If we are getting less clipping,
  130. // the volume should be increased. If we are getting more,
  131. // the volume should be reduced.
  132. //
  133. // Note: only samples that are part of a frame detected as
  134. // speech are considered.
  135. #define AGC_IDEAL_CLIPPING_RATIO 0.0005
  136. // AGC_CHANGE_THRESHOLD
  137. //
  138. // How far from the ideal does a volume level have to
  139. // stray before we will consider changing the volume?
  140. //
  141. // E.g. If this value is 1.05, the history for a volume
  142. // level would have to be 5% above or below the ideal
  143. // value in order to have an AGC correction made.
  144. #define AGC_CHANGE_THRESHOLD 1.01
  145. // AGC_CLIPPING_HISTORY
  146. //
  147. // How many milliseconds of history should we keep regarding
  148. // the clipping behavior at a particular volume setting?
  149. // E.g. a value of 10000 means that we remember the last
  150. // 10 seconds of activity at each volume level.
  151. //
  152. // Note: only samples that are part of a frame detected as
  153. // speech are considered.
  154. #define AGC_CLIPPING_HISTORY 1000
  155. //#define AGC_CLIPPING_HISTORY 2000
  156. //#define AGC_CLIPPING_HISTORY 5000
  157. //#define AGC_CLIPPING_HISTORY 10000
  158. //#define AGC_CLIPPING_HISTORY 30000 // it took AGC too long to recover
  159. // from low volume leves with this
  160. // setting
  161. // AGC_FEEDBACK_ENV_THRESHOLD
  162. //
  163. // To detect a feedback condition, we check to see if the
  164. // envelope signal has a value larger than AGC_FEEDBACK_ENV_THRESHOLD.
  165. // If the envelope signal stays consistently above this level,
  166. // for longer than AGC_FEEDBACK_TIME_THRESHOLD milliseconds, we conclude
  167. // that feedback is occuring. Voice has a changing envelope, and will
  168. // dip below the threshold on a regular basis. Feedback will not.
  169. // This will allow us to automatically reduce the input volume
  170. // when feedback is detected.
  171. #define AGC_FEEDBACK_ENV_THRESHOLD 2500
  172. #define AGC_FEEDBACK_TIME_THRESHOLD 1000
  173. // AGC_DEADZONE_THRESHOLD
  174. //
  175. // If the input signal never goes above this value
  176. // (16bits, promoted if required) for the deadzone time,
  177. // then we consider the input to be in the dead zone,
  178. // and the volume should be upticked.
  179. // #define AGC_DEADZONE_THRESHOLD 0 // This is too low - it does not reliably detect the deadzone
  180. #define AGC_DEADZONE_THRESHOLD (1 << 8)
  181. // AGC_DEADZONE_TIME
  182. //
  183. // How long we have to be in the deadzone before
  184. // the deadzone increase kicks in - we need this to
  185. // be longer than just one frame, or we get false
  186. // positives.
  187. #define AGC_DEADZONE_TIME 1000
  188. // VA_HIGH_DELTA
  189. //
  190. // If the percent change in the envelope signal is greater
  191. // than this value, voice is detected. Each point of this
  192. // value is equal to 0.1%. E.g. 4000 == 400% increase.
  193. // An unchanging signal produces a 100% value.
  194. //#define VA_HIGH_DELTA 2000
  195. //#define VA_HIGH_DELTA_FASTSLOW 0x7fffffff // select this to factor out this VA parameter
  196. //#define VA_HIGH_DELTA_FASTSLOW 1400
  197. //#define VA_HIGH_DELTA_FASTSLOW 1375 // current choice
  198. //#define VA_HIGH_DELTA_FASTSLOW 1350
  199. //#define VA_HIGH_DELTA_FASTSLOW 1325
  200. //#define VA_HIGH_DELTA_FASTSLOW 1300
  201. //#define VA_HIGH_DELTA_FASTSLOW 1275
  202. //#define VA_HIGH_DELTA_FASTSLOW 1250
  203. //#define VA_HIGH_DELTA_FASTSLOW 1200
  204. //#define VA_HIGH_DELTA_FASTSLOW 1175 // catches all noise
  205. //#define VA_HIGH_DELTA_FASTSLOW 1150 // catches all noise
  206. //#define VA_HIGH_DELTA_FASTSLOW 1125 // catches all noise
  207. //#define VA_HIGH_DELTA_FASTSLOW 1100 // catches all noise
  208. // VA_LOW_DELTA
  209. //
  210. // If the percent change in the envelope signal is lower
  211. // than this value, voice is detected. Each point of this
  212. // value is equal to 0.1%. E.g. 250 == 25% increase
  213. // (i.e a decrease to 1/4 the original signal strength).
  214. // An unchanging signal produces a 100% value.
  215. //#define VA_LOW_DELTA 500
  216. //#define VA_LOW_DELTA_FASTSLOW 0 // select this to factor out this VA parameter
  217. //#define VA_LOW_DELTA_FASTSLOW 925
  218. //#define VA_LOW_DELTA_FASTSLOW 900
  219. //#define VA_LOW_DELTA_FASTSLOW 875
  220. //#define VA_LOW_DELTA_FASTSLOW 850
  221. //#define VA_LOW_DELTA_FASTSLOW 825
  222. //#define VA_LOW_DELTA_FASTSLOW 800
  223. //#define VA_LOW_DELTA_FASTSLOW 775 // current choice
  224. //#define VA_LOW_DELTA_FASTSLOW 750
  225. //#define VA_LOW_DELTA_FASTSLOW 725
  226. //#define VA_LOW_DELTA_FASTSLOW 700
  227. //#define VA_LOW_DELTA_FASTSLOW 675
  228. //#define VA_LOW_DELTA_FASTSLOW 650
  229. // The following VA parameters were optimized for what I believe to be
  230. // the hardest configuration: A cheap open stick mic with external speakers,
  231. // with Echo Suppression turned on. Echo suppression penalizes false positives
  232. // harshly, since the receiver cannot send which receiving the "noise". If
  233. // the VA parameters work for this case, then they should be fine for the
  234. // much better signal to noise ratio provided by a headset or collar mic.
  235. // (As long as the user does not breathe directly on the headset mic.)
  236. //
  237. // Two source-to-mic distances were tested during tuning.
  238. //
  239. // 1) Across an enclosed office (approx 8 to 10 feet)
  240. // 2) Seated at the workstation (approx 16 to 20 inches)
  241. //
  242. // At distance 1, the AGC was never invoked, gain was at 100%
  243. // At distance 2, the AGC would take the mic down a few ticks.
  244. //
  245. // The office enviroment had the background noise from 3 computers,
  246. // a ceiling vent, and a surprisingly noisy fan from the ethernet
  247. // hub. There is no background talking, cars, trains, or things of
  248. // that nature.
  249. //
  250. // Each parameter was tuned separately to reject 100% of the
  251. // background noise for case 1 (gain at 100%).
  252. //
  253. // Then they were tested together to see if they could detect
  254. // across the room speech.
  255. //
  256. // Individually, none of the detection criteria could reliably
  257. // detect all of the across the room speech. Together, they did
  258. // not do much better. They even missed some speech while seated.
  259. // Not very satifactory.
  260. //
  261. // Therefore, I decided to abandon the attempt to detect across
  262. // the room speech. I retuned the parameters to reject noise
  263. // after speaking while seated (which allowed AGC to reduce
  264. // the volume a couple of ticks, thereby increasing the signal
  265. // to noise ratio) and to reliably detect seated speech.
  266. //
  267. // I also found that the "fast" envelope signal was better at
  268. // detecting speech than the "slow" one in a straight threshold
  269. // comparison, so it is used in the VA tests.
  270. //
  271. // VA_HIGH_PERCENT
  272. //
  273. // If the fast envelope signal is more than this percentage
  274. // higher than the slow envelope signal, speech is detected.
  275. //
  276. #define VA_HIGH_PERCENT 170 // rejects most noise, still catches some.
  277. // decent voice detection. Catches the beginning
  278. // of speech a majority of the time, but does miss
  279. // once in a while. Will often drop out partway
  280. // into a phrase when used alone. Must test in
  281. // conjunction with VA_LOW_PERCENT.
  282. //
  283. // After testing in conjunction with VA_LOW_PERCENT,
  284. // the performance is reasonable. Low input volume
  285. // signals are usually detected ok, but dropouts are
  286. // a bit common. However, noise is sometimes still
  287. // detected, so making these parameters more sensitive
  288. // would not be useful.
  289. //#define VA_HIGH_PERCENT 165 // catches occational noise
  290. //#define VA_HIGH_PERCENT 160 // catches too much noise
  291. //#define VA_HIGH_PERCENT 150 // catches most noise
  292. //#define VA_HIGH_PERCENT 140 // catches almost all noise
  293. //#define VA_HIGH_PERCENT 0x00007fff // select this to factor out this VA parameter
  294. // VA_LOW_PERCENT
  295. //
  296. // If the fast envelope signal is more than this percentage
  297. // lower than the slow envelope signal, speech is detected.
  298. //
  299. #define VA_LOW_PERCENT 50 // excellent noise rejection. poor detection of speech.
  300. // when used alone, could miss entire phrases. Must evaluate
  301. // in conjunction with tuned VA_HIGH_PERCENT
  302. //
  303. // See note above re: testing in conjunction with VA_HIGH_PERCENT
  304. //#define VA_LOW_PERCENT 55 // still catches too much noise
  305. //#define VA_LOW_PERCENT 60 // catches most noise
  306. //#define VA_LOW_PERCENT 65 // catches most noise
  307. //#define VA_LOW_PERCENT 70 // still catches almost all noise
  308. //#define VA_LOW_PERCENT 75 // catches almost all noise
  309. //#define VA_LOW_PERCENT 80 // catches all noise
  310. //#define VA_LOW_PERCENT 0 // select this to factor out this VA parameter
  311. // VA_HIGH_ENVELOPE
  312. //
  313. // If the 16 bit normalized value of the envelope exceeds
  314. // this number, the signal is considered voice.
  315. //
  316. //#define VA_HIGH_ENVELOPE (15 << 8) // still catches high gain noise, starting to get
  317. // speech dropouts, when "p" sounds lower the gain
  318. #define VA_HIGH_ENVELOPE (14 << 8) // Noise immunity good at "seated" S/N ratio. No speech
  319. // dropouts encountered. Still catches noise at full gain.
  320. //#define VA_HIGH_ENVELOPE (13 << 8) // Noise immunity not as good as expected (new day).
  321. //#define VA_HIGH_ENVELOPE (12 << 8) // Good noise immunity. Speech recognition excellent.
  322. // Only one dropout occured in the test with a 250ms
  323. // hangover. I think the hangover time should be increased
  324. // above 250 however, because a comma (properly read) tends
  325. // to cause a dropout. I'm going to tune the hangover time,
  326. // and return to this test.
  327. //
  328. // Hangover time is now 400ms. No dropouts occur with
  329. // "seated" speech.
  330. //#define VA_HIGH_ENVELOPE (11 << 8) // Catches almost no noise at "seated" gain
  331. // however, if the gain creeped up a bit, noise would
  332. // be detected. I therefore think a slightly higher
  333. // threshold would be a good idea. The speech recognition
  334. // based on only this parameter at this level was flawless.
  335. // No dropouts at all with a 250 ms hangover time. (commas
  336. // excepted).
  337. //#define VA_HIGH_ENVELOPE (10 << 8) // catches some noise at "seated" gain - getting very close
  338. //#define VA_HIGH_ENVELOPE (9 << 8) // catches some noise at "seated" gain - getting close
  339. //#define VA_HIGH_ENVELOPE (8 << 8) // catches noise at "seated" gain
  340. //#define VA_HIGH_ENVELOPE (7 << 8) // catches noise at "seated" gain
  341. //#define VA_HIGH_ENVELOPE (0x7fffffff) // select this to factor out this VA parameter
  342. // VA_LOW_ENVELOPE
  343. //
  344. // If the 16 bit normalized value of the envelope is below
  345. // this number, the signal will never be considered voice.
  346. // This reduces some false positives on the delta checks
  347. // at very low signal levels
  348. #define VA_LOW_ENVELOPE (3 << 8)
  349. //#define VA_LOW_ENVELOPE (2 << 8) // causes false VA at low input volumes
  350. //#define VA_LOW_ENVELOPE (1 << 8) // causes false VA at low input volumes
  351. // VA_HANGOVER_TIME
  352. //
  353. // The time, in milliseconds, that voice activation sticks in
  354. // the ON position following a voice detection. E.g. a value of 500
  355. // means that voice will always be transmitted in at least 1/2 second
  356. // bursts.
  357. //
  358. // I am trying to tune this so that a properly read comma will not cause
  359. // a dropout. This will give the user a bit of leeway to pause in the
  360. // speech stream without losing the floor when in Echo Suppression mode.
  361. // It will also prevent dropouts even when not in Echo Suppression mode
  362. #define VA_HANGOVER_TIME 400 // this gives satisfying performance
  363. //#define VA_HANGOVER_TIME 375 // almost there, longest commas still goners
  364. //#define VA_HANGOVER_TIME 350 // still drops long commas
  365. //#define VA_HANGOVER_TIME 325 // does not drop fast commas, drops long ones
  366. //#define VA_HANGOVER_TIME 300 // drops almost no commas, quite good
  367. //#define VA_HANGOVER_TIME 275 // drops about half of the commas
  368. //#define VA_HANGOVER_TIME 250 // commas are always dropped
  369. // macros to avoid clib dependencies
  370. #define DV_ABS(a) ((a) < 0 ? -(a) : (a))
  371. #define DV_MAX(a, b) ((a) > (b) ? (a) : (b))
  372. #define DV_MIN(a, b) ((a) < (b) ? (a) : (b))
  373. // A function to lookup the log of n base 1.354 (sort of)
  374. // where 0 <= n <= 127
  375. //
  376. // Why the heck do we care about log n base 1.354???
  377. //
  378. // What we need is a function that maps 0 to 127 down to 0 to 15
  379. // in a nice, smooth non-linear fashion that has more fidelity at
  380. // the low end than at the high end.
  381. //
  382. // The function is actually floor(log(n, 1.354), 1) to keep things
  383. // in the integer realm.
  384. //
  385. // Why 1.354? Because log(128, 1.354) = 16, so we are using the full
  386. // range from 0 to 15.
  387. //
  388. // This function also cheats and just defines fn(0) = 0 and fn(1) = 1
  389. // for convenience.
  390. BYTE DV_LOG_1_354_lookup_table[95] =
  391. {
  392. 0, 1, 2, 3, 4, 5, 5, 6, // 0.. 7
  393. 6, 7, 7, 7, 8, 8, 8, 8, // 8.. 15
  394. 9, 9, 9, 9, 9, 10, 10, 10, // 16.. 23
  395. 10, 10, 10, 10, 10, 11, 11, 11, // 24.. 31
  396. 11, 11, 11, 11, 11, 11, 12, 12, // 32.. 39
  397. 12, 12, 12, 12, 12, 12, 12, 12, // 40.. 47
  398. 12, 12, 12, 12, 13, 13, 13, 13, // 48.. 55
  399. 13, 13, 13, 13, 13, 13, 13, 13, // 56.. 63
  400. 13, 13, 13, 13, 13, 13, 14, 14, // 64.. 71
  401. 14, 14, 14, 14, 14, 14, 14, 14, // 72.. 79
  402. 14, 14, 14, 14, 14, 14, 14, 14, // 80.. 87
  403. 14, 14, 14, 14, 14, 14, 14 // 88.. 94 - stop table at 94 here, everything above is 15
  404. };
  405. BYTE DV_log_1_354(BYTE n)
  406. {
  407. if (n > 94) return 15;
  408. return DV_LOG_1_354_lookup_table[n];
  409. }
  410. // function to lookup the base 2 log of (n) where n is 16 bits unsigned
  411. // except that we cheat and say that log_2 of zero is zero
  412. // and we chop of any decimals.
  413. BYTE DV_log_2(WORD n)
  414. {
  415. if (n & 0x8000)
  416. {
  417. return 0x0f;
  418. }
  419. if (n & 0x4000)
  420. {
  421. return 0x0e;
  422. }
  423. if (n & 0x2000)
  424. {
  425. return 0x0d;
  426. }
  427. if (n & 0x1000)
  428. {
  429. return 0x0c;
  430. }
  431. if (n & 0x0800)
  432. {
  433. return 0x0b;
  434. }
  435. if (n & 0x0400)
  436. {
  437. return 0x0a;
  438. }
  439. if (n & 0x0200)
  440. {
  441. return 0x09;
  442. }
  443. if (n & 0x0100)
  444. {
  445. return 0x08;
  446. }
  447. if (n & 0x0080)
  448. {
  449. return 0x07;
  450. }
  451. if (n & 0x0040)
  452. {
  453. return 0x06;
  454. }
  455. if (n & 0x0020)
  456. {
  457. return 0x05;
  458. }
  459. if (n & 0x0010)
  460. {
  461. return 0x04;
  462. }
  463. if (n & 0x0008)
  464. {
  465. return 0x03;
  466. }
  467. if (n & 0x0004)
  468. {
  469. return 0x02;
  470. }
  471. if (n & 0x0002)
  472. {
  473. return 0x01;
  474. }
  475. return 0x00;
  476. }
  477. #undef DPF_MODNAME
  478. #define DPF_MODNAME "CAGCVA1::Init"
  479. //
  480. // Init - initializes the AGC and VA algorithms, including loading saved
  481. // values from registry.
  482. //
  483. // dwFlags - the dwFlags from the dvClientConfig structure
  484. // guidCaptureDevice - the capture device we're performing AGC for
  485. // plInitVolume - the initial volume level is written here
  486. //
  487. HRESULT CAGCVA1::Init(
  488. const WCHAR *wszBasePath,
  489. DWORD dwFlags,
  490. GUID guidCaptureDevice,
  491. int iSampleRate,
  492. int iBitsPerSample,
  493. LONG* plInitVolume,
  494. DWORD dwSensitivity)
  495. {
  496. // Remember the number of bits per sample, if valid
  497. if (iBitsPerSample != 8 && iBitsPerSample != 16)
  498. {
  499. DPFX(DPFPREP,DVF_ERRORLEVEL, "Unexpected number of bits per sample!");
  500. return DVERR_INVALIDPARAM;
  501. }
  502. m_iBitsPerSample = iBitsPerSample;
  503. // Remember the flags
  504. m_dwFlags = dwFlags;
  505. // Remember the sensitivity
  506. m_dwSensitivity = dwSensitivity;
  507. // Figure out the shift constants for this sample rate
  508. m_iShiftConstantFast = (DV_log_2((iSampleRate * 2) / 1000) + 1);
  509. // This gives the slow filter a cutoff frequency 1/4 of
  510. // the fast filter
  511. m_iShiftConstantSlow = m_iShiftConstantFast + 2;
  512. // Figure out how often we should sample the envelope signal
  513. // to measure its change. This of course depends on the sample
  514. // rate. The cutoff frequency allowed by the calculation
  515. // above is between 40 and 80 Hz. Therefore we'll sample the
  516. // envelope signal at about 100 Hz.
  517. m_iEnvelopeSampleRate = iSampleRate / 100;
  518. // Figure out the number of samples in the configured
  519. // hangover time.
  520. m_iHangoverSamples = (VA_HANGOVER_TIME * iSampleRate) / 1000;
  521. m_iCurHangoverSamples = m_iHangoverSamples+1;
  522. // Figure out the number of samples in the configured dead zone time
  523. m_iDeadZoneSampleThreshold = (AGC_DEADZONE_TIME * iSampleRate) / 1000;
  524. // Figure out the number of samples in the configured
  525. // feedback threshold time.
  526. m_iFeedbackSamples = (AGC_FEEDBACK_TIME_THRESHOLD * iSampleRate) / 1000;
  527. m_iPossibleFeedbackSamples = 0;
  528. // Start the envelope signal at zero
  529. m_iCurEnvelopeValueFast = 0;
  530. m_iCurEnvelopeValueSlow = 0;
  531. m_iPrevEnvelopeSample = 0;
  532. m_iCurSampleNum = 0;
  533. // We're not clipping now
  534. //m_fClipping = 0;
  535. //m_iClippingCount = 0;
  536. DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:INIT:%i,%i,%i,%i,%i",
  537. iSampleRate,
  538. m_iShiftConstantFast,
  539. m_iShiftConstantSlow,
  540. m_iEnvelopeSampleRate,
  541. m_iHangoverSamples);
  542. // Save the guid in our local member...
  543. m_guidCaptureDevice = guidCaptureDevice;
  544. wcscpy( m_wszRegPath, wszBasePath );
  545. wcscat( m_wszRegPath, DPVOICE_REGISTRY_AGC );
  546. // if the AGC reset flag is set, reset the AGC parameters,
  547. // otherwise grab them from the registry
  548. if (m_dwFlags & DVCLIENTCONFIG_AUTOVOLUMERESET)
  549. {
  550. m_lCurVolume = DSBVOLUME_MAX;
  551. }
  552. else
  553. {
  554. CRegistry cregBase;
  555. if( !cregBase.Open( HKEY_CURRENT_USER, m_wszRegPath, FALSE, TRUE ) )
  556. {
  557. m_lCurVolume = DSBVOLUME_MAX;
  558. }
  559. else
  560. {
  561. CRegistry cregCapture;
  562. if (!cregCapture.Open( cregBase.GetHandle(), &m_guidCaptureDevice ), FALSE, TRUE )
  563. {
  564. m_lCurVolume = DSBVOLUME_MAX;
  565. }
  566. if (!cregCapture.ReadDWORD( DPVOICE_REGISTRY_SAVEDAGCLEVEL, (DWORD&)m_lCurVolume ))
  567. {
  568. m_lCurVolume = DSBVOLUME_MAX;
  569. }
  570. else
  571. {
  572. // boost the saved volume a bit
  573. m_lCurVolume += AGC_VOLUME_INITIAL_UPTICK;
  574. if (m_lCurVolume > DSBVOLUME_MAX)
  575. {
  576. m_lCurVolume = DSBVOLUME_MAX;
  577. }
  578. }
  579. }
  580. }
  581. /*
  582. // zero out the historgrams
  583. memset(m_rgdwPeakHistogram, 0, CAGCVA1_HISTOGRAM_BUCKETS*sizeof(DWORD));
  584. memset(m_rgdwZeroCrossingsHistogram, 0, CAGCVA1_HISTOGRAM_BUCKETS*sizeof(DWORD));
  585. */
  586. // allocate the memory for the AGC history
  587. m_rgfAGCHistory = new float[AGC_VOLUME_LEVELS];
  588. if (m_rgfAGCHistory == NULL)
  589. {
  590. return DVERR_OUTOFMEMORY;
  591. }
  592. // initialize the history to the ideal value
  593. for (int iIndex = 0; iIndex < AGC_VOLUME_LEVELS; ++iIndex)
  594. {
  595. m_rgfAGCHistory[iIndex] = (float)AGC_IDEAL_CLIPPING_RATIO;
  596. }
  597. m_dwHistorySamples = (iSampleRate * AGC_CLIPPING_HISTORY) / 1000;
  598. m_iPossibleFeedbackSamples = 0;
  599. // stuff the initial volume into the caller's variable
  600. *plInitVolume = m_lCurVolume;
  601. return DV_OK;
  602. }
  603. #undef DPF_MODNAME
  604. #define DPF_MODNAME "CAGCVA1::Deinit"
  605. //
  606. // Deinit - saves the current AGC and VA state to the registry for use next session
  607. //
  608. HRESULT CAGCVA1::Deinit()
  609. {
  610. HRESULT hr = DV_OK;
  611. CRegistry cregBase;
  612. if(cregBase.Open( HKEY_CURRENT_USER, m_wszRegPath, FALSE, TRUE ) )
  613. {
  614. CRegistry cregDevice;
  615. if (cregDevice.Open( cregBase.GetHandle(), &m_guidCaptureDevice, FALSE, TRUE))
  616. {
  617. if (!cregDevice.WriteDWORD( DPVOICE_REGISTRY_SAVEDAGCLEVEL, (DWORD&)m_lCurVolume ))
  618. {
  619. DPFX(DPFPREP,DVF_ERRORLEVEL, "Error writing AGC settings to registry");
  620. hr = DVERR_WIN32;
  621. }
  622. }
  623. else
  624. {
  625. DPFX(DPFPREP,DVF_ERRORLEVEL, "Error writing AGC settings to registry");
  626. hr = DVERR_WIN32;
  627. }
  628. }
  629. else
  630. {
  631. DPFX(DPFPREP,DVF_ERRORLEVEL, "Error writing AGC settings to registry");
  632. hr = DVERR_WIN32;
  633. }
  634. delete [] m_rgfAGCHistory;
  635. return hr;
  636. }
  637. #undef DPF_MODNAME
  638. #define DPF_MODNAME "CAGCVA1::SetSensitivity"
  639. HRESULT CAGCVA1::SetSensitivity(DWORD dwFlags, DWORD dwSensitivity)
  640. {
  641. if (dwFlags & DVCLIENTCONFIG_AUTOVOICEACTIVATED)
  642. {
  643. m_dwFlags |= DVCLIENTCONFIG_AUTOVOICEACTIVATED;
  644. }
  645. else
  646. {
  647. m_dwFlags &= ~DVCLIENTCONFIG_AUTOVOICEACTIVATED;
  648. }
  649. m_dwSensitivity = dwSensitivity;
  650. return DV_OK;
  651. }
  652. #undef DPF_MODNAME
  653. #define DPF_MODNAME "CAGCVA1::GetSensitivity"
  654. HRESULT CAGCVA1::GetSensitivity(DWORD* pdwFlags, DWORD* pdwSensitivity)
  655. {
  656. if (m_dwFlags & DVCLIENTCONFIG_AUTORECORDVOLUME)
  657. {
  658. *pdwFlags |= DVCLIENTCONFIG_AUTORECORDVOLUME;
  659. }
  660. else
  661. {
  662. *pdwFlags &= ~DVCLIENTCONFIG_AUTORECORDVOLUME;
  663. }
  664. *pdwSensitivity = m_dwSensitivity;
  665. return DV_OK;
  666. }
  667. #undef DPF_MODNAME
  668. #define DPF_MODNAME "CAGCVA1::AnalyzeData"
  669. //
  670. // AnaylzeData - performs the AGC & VA calculations on one frame of audio
  671. //
  672. // pbAudioData - pointer to a buffer containing the audio data
  673. // dwAudioDataSize - size, in bytes, of the audio data
  674. //
  675. HRESULT CAGCVA1::AnalyzeData(BYTE* pbAudioData, DWORD dwAudioDataSize /*, DWORD dwFrameTime*/)
  676. {
  677. int iMaxValue;
  678. //int iValue;
  679. int iValueAbs;
  680. //int iZeroCrossings;
  681. int iIndex;
  682. int iMaxPossiblePeak;
  683. int iNumberOfSamples;
  684. //BYTE bPeak255;
  685. //m_dwFrameTime = dwFrameTime;
  686. if (dwAudioDataSize < 1)
  687. {
  688. DPFX(DPFPREP,DVF_ERRORLEVEL, "Error: Audio Data Size < 1");
  689. return DVERR_INVALIDPARAM;
  690. }
  691. /*
  692. // zip through the data, find the peak value and zero crossings
  693. if (m_iBitsPerSample == 8)
  694. {
  695. iPrevValue = 0;
  696. iZeroCrossings = 0;
  697. iMaxValue = 0;
  698. iNumberOfSamples = dwAudioDataSize;
  699. for (iIndex = 0; iIndex < (int)iNumberOfSamples; ++iIndex)
  700. {
  701. iValue = (int)pbAudioData[iIndex] - 0x7F;
  702. if (iValue * iPrevValue < 0)
  703. {
  704. ++iZeroCrossings;
  705. }
  706. iPrevValue = iValue;
  707. iMaxValue = DV_MAX(DV_ABS(iValue), iMaxValue);
  708. }
  709. iMaxPossiblePeak = 0x7F;
  710. }
  711. else if (m_iBitsPerSample == 16)
  712. {
  713. // cast the audio data to signed 16 bit integers
  714. signed short* psiAudioData = (signed short *)pbAudioData;
  715. // halve the number of samples
  716. iNumberOfSamples = dwAudioDataSize / 2;
  717. iPrevValue = 0;
  718. iZeroCrossings = 0;
  719. iMaxValue = 0;
  720. for (iIndex = 0; iIndex < (int)iNumberOfSamples; ++iIndex)
  721. {
  722. iValue = (int)psiAudioData[iIndex];
  723. if (iValue * iPrevValue < 0)
  724. {
  725. ++iZeroCrossings;
  726. }
  727. iPrevValue = iValue;
  728. iMaxValue = DV_MAX(DV_ABS(iValue), iMaxValue);
  729. }
  730. iMaxPossiblePeak = 0x7FFF;
  731. }
  732. else
  733. {
  734. DPFX(DPFPREP,DVF_ERRORLEVEL, "Unexpected number of bits per sample!");
  735. iMaxValue = 0;
  736. iZeroCrossings = 0;
  737. }
  738. */
  739. /*
  740. // normalize the peak value to the range DVINPUTLEVEL_MIN to DVINPUTLEVEL_MAX
  741. m_bPeak = (BYTE)(DVINPUTLEVEL_MIN + ((iMaxValue * (DVINPUTLEVEL_MAX - DVINPUTLEVEL_MIN)) / iMaxPossiblePeak));
  742. // normalize zero crossings and peak to the range 0 to 127 (0x7f)
  743. m_bZeroCrossings127 = (iZeroCrossings * 0x7f) / iIndex;
  744. m_bPeak127 = (iMaxValue * 0x7f) / iMaxPossiblePeak;
  745. // take the log base 1.354 of the peak and zero crossing
  746. m_bPeakLog = DV_log_1_354(m_bPeak127);
  747. m_bZeroCrossingsLog = DV_log_1_354(m_bZeroCrossings127);
  748. // update the histograms
  749. ++m_rgdwPeakHistogram[m_bPeakLog];
  750. ++m_rgdwZeroCrossingsHistogram[m_bZeroCrossingsLog];
  751. DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:ANA:PZ,%i,%i,%i,%i,%i,%i",
  752. m_bPeak,
  753. m_bPeak127,
  754. m_bPeakLog,
  755. iZeroCrossings,
  756. m_bZeroCrossings127,
  757. m_bZeroCrossingsLog
  758. );
  759. DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:ANAHP,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i",
  760. m_rgdwPeakHistogram[0x00],
  761. m_rgdwPeakHistogram[0x01],
  762. m_rgdwPeakHistogram[0x02],
  763. m_rgdwPeakHistogram[0x03],
  764. m_rgdwPeakHistogram[0x04],
  765. m_rgdwPeakHistogram[0x05],
  766. m_rgdwPeakHistogram[0x06],
  767. m_rgdwPeakHistogram[0x07],
  768. m_rgdwPeakHistogram[0x08],
  769. m_rgdwPeakHistogram[0x09],
  770. m_rgdwPeakHistogram[0x0a],
  771. m_rgdwPeakHistogram[0x0b],
  772. m_rgdwPeakHistogram[0x0c],
  773. m_rgdwPeakHistogram[0x0d],
  774. m_rgdwPeakHistogram[0x0e],
  775. m_rgdwPeakHistogram[0x0f]);
  776. DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:ANAHZ,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i,%i",
  777. m_rgdwZeroCrossingsHistogram[0x00],
  778. m_rgdwZeroCrossingsHistogram[0x01],
  779. m_rgdwZeroCrossingsHistogram[0x02],
  780. m_rgdwZeroCrossingsHistogram[0x03],
  781. m_rgdwZeroCrossingsHistogram[0x04],
  782. m_rgdwZeroCrossingsHistogram[0x05],
  783. m_rgdwZeroCrossingsHistogram[0x06],
  784. m_rgdwZeroCrossingsHistogram[0x07],
  785. m_rgdwZeroCrossingsHistogram[0x08],
  786. m_rgdwZeroCrossingsHistogram[0x09],
  787. m_rgdwZeroCrossingsHistogram[0x0a],
  788. m_rgdwZeroCrossingsHistogram[0x0b],
  789. m_rgdwZeroCrossingsHistogram[0x0c],
  790. m_rgdwZeroCrossingsHistogram[0x0d],
  791. m_rgdwZeroCrossingsHistogram[0x0e],
  792. m_rgdwZeroCrossingsHistogram[0x0f]);
  793. */
  794. // new algorithm...
  795. // cast the audio data to signed 16 bit integers
  796. signed short* psiAudioData = (signed short *)pbAudioData;
  797. if (m_iBitsPerSample == 16)
  798. {
  799. iNumberOfSamples = dwAudioDataSize / 2;
  800. iMaxPossiblePeak = 0x7fff;
  801. }
  802. else
  803. {
  804. iNumberOfSamples = dwAudioDataSize;
  805. iMaxPossiblePeak = 0x7f00;
  806. }
  807. m_fDeadZoneDetected = TRUE;
  808. m_iClippingSampleCount = 0;
  809. m_iNonClippingSampleCount = 0;
  810. m_fVoiceDetectedThisFrame = FALSE;
  811. iMaxValue = 0;
  812. for (iIndex = 0; iIndex < (int)iNumberOfSamples; ++iIndex)
  813. {
  814. ++m_iCurSampleNum;
  815. // extract a sample
  816. if (m_iBitsPerSample == 8)
  817. {
  818. iValueAbs = DV_ABS((int)pbAudioData[iIndex] - 0x80);
  819. // promote it to 16 bits
  820. iValueAbs <<= 8;
  821. }
  822. else
  823. {
  824. iValueAbs = DV_ABS((int)psiAudioData[iIndex]);
  825. }
  826. // see if it is the new peak value
  827. iMaxValue = DV_MAX(iValueAbs, iMaxValue);
  828. // do the low pass filtering, but only if we are in autosensitivity mode
  829. int iNormalizedCurEnvelopeValueFast;
  830. int iNormalizedCurEnvelopeValueSlow;
  831. if (m_dwFlags & DVCLIENTCONFIG_AUTOVOICEACTIVATED)
  832. {
  833. m_iCurEnvelopeValueFast =
  834. iValueAbs +
  835. (m_iCurEnvelopeValueFast - (m_iCurEnvelopeValueFast >> m_iShiftConstantFast));
  836. iNormalizedCurEnvelopeValueFast = m_iCurEnvelopeValueFast >> m_iShiftConstantFast;
  837. m_iCurEnvelopeValueSlow =
  838. iValueAbs +
  839. (m_iCurEnvelopeValueSlow - (m_iCurEnvelopeValueSlow >> m_iShiftConstantSlow));
  840. iNormalizedCurEnvelopeValueSlow = m_iCurEnvelopeValueSlow >> m_iShiftConstantSlow;
  841. // check to see if we consider this voice
  842. if (iNormalizedCurEnvelopeValueFast > VA_LOW_ENVELOPE &&
  843. (iNormalizedCurEnvelopeValueFast > VA_HIGH_ENVELOPE ||
  844. iNormalizedCurEnvelopeValueFast > (VA_HIGH_PERCENT * iNormalizedCurEnvelopeValueSlow) / 100 ||
  845. iNormalizedCurEnvelopeValueFast < (VA_LOW_PERCENT * iNormalizedCurEnvelopeValueSlow) / 100 ))
  846. {
  847. m_fVoiceDetectedNow = TRUE;
  848. m_fVoiceDetectedThisFrame = TRUE;
  849. m_fVoiceHangoverActive = TRUE;
  850. m_iCurHangoverSamples = 0;
  851. }
  852. else
  853. {
  854. m_fVoiceDetectedNow = FALSE;
  855. ++m_iCurHangoverSamples;
  856. if (m_iCurHangoverSamples > m_iHangoverSamples)
  857. {
  858. m_fVoiceHangoverActive = FALSE;
  859. }
  860. else
  861. {
  862. m_fVoiceHangoverActive = TRUE;
  863. m_fVoiceDetectedThisFrame = TRUE;
  864. }
  865. }
  866. }
  867. /*
  868. DPFX(DPFPREP,DVF_WARNINGLEVEL, "AGCVA1:VA,%i,%i,%i,%i,%i,%i",
  869. iValueAbs,
  870. iNormalizedCurEnvelopeValueFast,
  871. iNormalizedCurEnvelopeValueSlow,
  872. m_fVoiceDetectedNow,
  873. m_fVoiceHangoverActive,
  874. m_fVoiceDetectedThisFrame);
  875. */
  876. // check for clipping
  877. if (iValueAbs > AGC_PEAK_CLIPPING_THRESHOLD)
  878. {
  879. ++m_iClippingSampleCount;
  880. }
  881. else
  882. {
  883. ++m_iNonClippingSampleCount;
  884. }
  885. // check for possible feedback condition
  886. /*
  887. if (m_iCurEnvelopeValueFast >> m_iShiftConstantFast > AGC_FEEDBACK_ENV_THRESHOLD)
  888. {
  889. ++m_iPossibleFeedbackSamples;
  890. }
  891. else
  892. {
  893. m_iPossibleFeedbackSamples = 0;
  894. }
  895. */
  896. /*
  897. // see if this is a sample point
  898. if (m_iCurSampleNum % m_iEnvelopeSampleRate == 0)
  899. {
  900. // calculate the change since the last sample - normalize to 16 bits
  901. // int iDelta = DV_ABS(m_iCurEnvelopeValueFast - m_iPrevEnvelopeSample) >> iShiftConstant;
  902. int iDelta;
  903. if (m_iPrevEnvelopeSample != 0)
  904. {
  905. iDelta = (1000 * m_iCurEnvelopeValueFast) / m_iPrevEnvelopeSample;
  906. }
  907. else
  908. {
  909. iDelta = 1000;
  910. }
  911. DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:ANA:ENV,%i,%i",
  912. m_iCurEnvelopeValueFast >> m_iShiftConstantFast,
  913. iDelta);
  914. // check to see if we consider this voice
  915. if (m_iCurEnvelopeValueFast >> m_iShiftConstantFast > VA_LOW_ENVELOPE &&
  916. (iDelta > VA_HIGH_DELTA ||
  917. iDelta < VA_LOW_DELTA ||
  918. m_iCurEnvelopeValueFast >> m_iShiftConstantFast > VA_HIGH_ENVELOPE))
  919. {
  920. m_fVoiceDetectedNow = TRUE;
  921. m_fVoiceDetectedThisFrame = TRUE;
  922. m_fVoiceHangoverActive = TRUE;
  923. m_iCurHangoverSamples = 0;
  924. }
  925. else
  926. {
  927. m_fVoiceDetectedNow = FALSE;
  928. m_iCurHangoverSamples += m_iEnvelopeSampleRate;
  929. if (m_iCurHangoverSamples > m_iHangoverSamples)
  930. {
  931. m_fVoiceHangoverActive = FALSE;
  932. }
  933. else
  934. {
  935. m_fVoiceHangoverActive = TRUE;
  936. m_fVoiceDetectedThisFrame = TRUE;
  937. }
  938. }
  939. // check for possible feedback condition
  940. if (m_iCurEnvelopeValueFast >> m_iShiftConstantFast > AGC_FEEDBACK_ENV_THRESHOLD)
  941. {
  942. m_iPossibleFeedbackSamples += m_iEnvelopeSampleRate;
  943. }
  944. else
  945. {
  946. m_iPossibleFeedbackSamples = 0;
  947. }
  948. DPFX(DPFPREP,DVF_WARNINGLEVEL, "AGCVA1:VA,%i,%i,%i,%i,%i,%i,%i",
  949. m_iCurEnvelopeValueFast >> m_iShiftConstantFast,
  950. m_iCurEnvelopeValueSlow >> m_iShiftConstantSlow,
  951. iDeltaFastSlow,
  952. iDelta,
  953. m_fVoiceDetectedNow,
  954. m_fVoiceHangoverActive,
  955. m_fVoiceDetectedThisFrame);
  956. m_iPrevEnvelopeSample = m_iCurEnvelopeValueFast;
  957. }
  958. */
  959. }
  960. // Normalize the peak value to the range DVINPUTLEVEL_MIN to DVINPUTLEVEL_MAX
  961. // This is what is returned for caller's peak meters...
  962. m_bPeak = (BYTE)(DVINPUTLEVEL_MIN +
  963. ((iMaxValue * (DVINPUTLEVEL_MAX - DVINPUTLEVEL_MIN)) / iMaxPossiblePeak));
  964. // if we are in manual VA mode (not autovolume) check the peak against
  965. // the sensitivity threshold
  966. if (!(m_dwFlags & DVCLIENTCONFIG_AUTOVOICEACTIVATED))
  967. {
  968. if (m_bPeak > m_dwSensitivity)
  969. {
  970. m_fVoiceDetectedThisFrame = TRUE;
  971. }
  972. }
  973. // Check if we're in a deadzone
  974. if (iMaxValue > AGC_DEADZONE_THRESHOLD)
  975. {
  976. m_fDeadZoneDetected = FALSE;
  977. }
  978. DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:ANA,%i,%i,%i,%i,%i,%i,%i",
  979. m_bPeak,
  980. iMaxValue,
  981. m_fVoiceDetectedThisFrame,
  982. m_fDeadZoneDetected,
  983. m_iClippingSampleCount,
  984. m_iNonClippingSampleCount,
  985. m_iPossibleFeedbackSamples);
  986. return DV_OK;
  987. }
  988. #undef DPF_MODNAME
  989. #define DPF_MODNAME "CAGCVA1::AGCResults"
  990. //
  991. // AGCResults - returns the AGC results from the previous AnalyzeFrame call
  992. //
  993. // lCurVolume - the current recording volume
  994. // plNewVolume - stuffed with the desired new recording volume
  995. //
  996. HRESULT CAGCVA1::AGCResults(LONG lCurVolume, LONG* plNewVolume, BOOL fTransmitFrame)
  997. {
  998. // default to keeping the same volume
  999. *plNewVolume = lCurVolume;
  1000. // Figure out what volume level we're at
  1001. int iVolumeLevel = DV_MIN(DV_ABS(AGC_VOLUME_MAXIMUM - lCurVolume) / AGC_VOLUME_TICKSIZE,
  1002. AGC_VOLUME_LEVELS - 1);
  1003. //DPFX(DPFPREP, DVF_INFOLEVEL, "AGCVA1:AGC,Cur Volume:%i,%i",lCurVolume, iVolumeLevel);
  1004. // Don't make another adjustment if we have just done one.
  1005. // This ensures that when we start looking at input data
  1006. // again, it will be post-adjustment data.
  1007. if( m_fAGCLastFrameAdjusted )
  1008. {
  1009. //DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:AGC,Previous frame adjusted, no AGC this frame");
  1010. m_fAGCLastFrameAdjusted = FALSE;
  1011. }
  1012. else
  1013. {
  1014. // check for a dead zone condition
  1015. if (m_fDeadZoneDetected /* || m_rgfAGCHistory[iVolumeLevel] == 0.0 */)
  1016. {
  1017. // We may be in the dead zone (volume way too low).
  1018. // Before we take the drastic action of sweepting the volume
  1019. // up, make sure we've been here long enough to be sure
  1020. // we're too low.
  1021. m_iDeadZoneSamples += (m_iClippingSampleCount + m_iNonClippingSampleCount);
  1022. if (m_iDeadZoneSamples > m_iDeadZoneSampleThreshold)
  1023. {
  1024. // The input volume has been lowered too far. We're not
  1025. // getting any input at all. To remedy this situation,
  1026. // we'll boost the volume now, but we'll also mark this
  1027. // volume level as off limits by setting its history to
  1028. // zero. That will prevent the volume from ever being
  1029. // dropped to this level again during this session.
  1030. if (iVolumeLevel != 0)
  1031. {
  1032. // We also reset the history of the volume level we are going to,
  1033. // so we start with a clean slate.
  1034. m_rgfAGCHistory[iVolumeLevel-1] = (const float)AGC_IDEAL_CLIPPING_RATIO;
  1035. *plNewVolume = DV_MIN(lCurVolume + AGC_VOLUME_TICKSIZE, AGC_VOLUME_MAXIMUM);
  1036. m_fAGCLastFrameAdjusted = TRUE;
  1037. }
  1038. }
  1039. }
  1040. else
  1041. {
  1042. m_iDeadZoneSamples = 0;
  1043. }
  1044. // check for a feedback condition
  1045. if (m_iPossibleFeedbackSamples > m_iFeedbackSamples)
  1046. {
  1047. // we have feedback. take the volume down a tick, but only if the
  1048. // next tick down is not off limits due to a dead zone, and we're
  1049. // not already on the lowest tick.
  1050. if (iVolumeLevel < AGC_VOLUME_LEVELS - 1)
  1051. {
  1052. *plNewVolume = DV_MAX(lCurVolume - AGC_VOLUME_TICKSIZE, AGC_VOLUME_MINIMUM);
  1053. m_fAGCLastFrameAdjusted = TRUE;
  1054. // Also adjust this level's history, so it will be hard to
  1055. // get back up to this bad feedback level. Pretend we just
  1056. // clipped on 100% of the samples in this frame.
  1057. m_rgfAGCHistory[iVolumeLevel] =
  1058. (m_iClippingSampleCount + m_iNonClippingSampleCount + (m_rgfAGCHistory[iVolumeLevel] * m_dwHistorySamples))
  1059. / (m_iClippingSampleCount + m_iNonClippingSampleCount + m_dwHistorySamples);
  1060. }
  1061. }
  1062. else if (fTransmitFrame)
  1063. {
  1064. // Factor this frame's clipping ratio into the appropriate history bucket
  1065. m_rgfAGCHistory[iVolumeLevel] =
  1066. (m_iClippingSampleCount + (m_rgfAGCHistory[iVolumeLevel] * m_dwHistorySamples))
  1067. / (m_iClippingSampleCount + m_iNonClippingSampleCount + m_dwHistorySamples);
  1068. if (m_rgfAGCHistory[iVolumeLevel] > AGC_IDEAL_CLIPPING_RATIO)
  1069. {
  1070. // Only consider lowering the volume if we clipped on this frame.
  1071. if (m_iClippingSampleCount > 0)
  1072. {
  1073. // we're clipping too much at this level, consider reducing
  1074. // the volume.
  1075. if (iVolumeLevel >= AGC_VOLUME_LEVELS - 1)
  1076. {
  1077. // we're already at the lowest volume level that we have
  1078. // a bucket for. Make sure we're clamped to the minimum
  1079. if (lCurVolume > AGC_VOLUME_MINIMUM)
  1080. {
  1081. *plNewVolume = AGC_VOLUME_MINIMUM;
  1082. m_fAGCLastFrameAdjusted = TRUE;
  1083. //DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:AGC,too much clipping, clamping volume to min: %i", *plNewVolume);
  1084. }
  1085. }
  1086. else
  1087. {
  1088. // Choose either this volume level, or the next lower
  1089. // one, depending on which has the history that is
  1090. // closest to the ideal.
  1091. float fCurDistanceFromIdeal = (float)(m_rgfAGCHistory[iVolumeLevel] / AGC_IDEAL_CLIPPING_RATIO);
  1092. if (fCurDistanceFromIdeal < 1.0)
  1093. {
  1094. fCurDistanceFromIdeal = (float)(1.0 / fCurDistanceFromIdeal);
  1095. }
  1096. float fLowerDistanceFromIdeal = (float)(m_rgfAGCHistory[iVolumeLevel+1] / (float)AGC_IDEAL_CLIPPING_RATIO);
  1097. if (fLowerDistanceFromIdeal < 1.0)
  1098. {
  1099. fLowerDistanceFromIdeal = (float)(1.0 / fLowerDistanceFromIdeal);
  1100. }
  1101. if (fLowerDistanceFromIdeal < fCurDistanceFromIdeal
  1102. && fCurDistanceFromIdeal > AGC_CHANGE_THRESHOLD)
  1103. {
  1104. // The next lower volume level is closer to the ideal
  1105. // clipping ratio. Take the volume down a tick.
  1106. *plNewVolume = DV_MAX(lCurVolume - AGC_VOLUME_TICKSIZE, AGC_VOLUME_MINIMUM);
  1107. m_fAGCLastFrameAdjusted = TRUE;
  1108. //DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:AGC,too much clipping, setting volume to: %i", *plNewVolume);
  1109. }
  1110. }
  1111. }
  1112. }
  1113. else
  1114. {
  1115. // we're clipping too little at this level, consider increasing
  1116. // the volume.
  1117. if (iVolumeLevel == 0)
  1118. {
  1119. // We're already at the highest volume level.
  1120. // Make sure we're at the max
  1121. if (lCurVolume != AGC_VOLUME_MAXIMUM)
  1122. {
  1123. *plNewVolume = AGC_VOLUME_MAXIMUM;
  1124. m_fAGCLastFrameAdjusted = TRUE;
  1125. //DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:AGC,too little clipping, clamping volume to max: %i", *plNewVolume);
  1126. }
  1127. }
  1128. else
  1129. {
  1130. // We always increase the volume in this case, and let it push back down if
  1131. // it clips again. This will continue testing the upper volume limit, and
  1132. // help dig us out of "too low" volume holes.
  1133. *plNewVolume = DV_MIN(lCurVolume + AGC_VOLUME_TICKSIZE, AGC_VOLUME_MAXIMUM);
  1134. m_fAGCLastFrameAdjusted = TRUE;
  1135. }
  1136. }
  1137. /*
  1138. // see if we clipped on the last analysis pass
  1139. if (m_fClipping)
  1140. {
  1141. DPFX(DPFPREP, DVF_INFOLEVEL, "AGCVA1: Above reduction threshold, reducing volume level\n" );
  1142. if( lCurVolume > AGC_VOLUME_MINIMUM )
  1143. {
  1144. *plNewVolume = lCurVolume - AGC_VOLUME_DOWNTICK;
  1145. m_fAGCLastFrameAdjusted = TRUE;
  1146. m_dwAGCBelowThresholdTime = 0;
  1147. }
  1148. // check to make sure we didn't just make it too low...
  1149. if( *plNewVolume < AGC_VOLUME_MINIMUM )
  1150. {
  1151. DPFX(DPFPREP, DVF_INFOLEVEL, "AGCVA1: Clamping volume to Min\n" );
  1152. *plNewVolume = AGC_VOLUME_MINIMUM;
  1153. }
  1154. }
  1155. */
  1156. /*
  1157. // The input level can fall into one of three ranges:
  1158. // Above AGC_REDUCTION_THRESHOLD
  1159. // - this means we're probably clipping
  1160. // Between AGC_REDUCTION_THRESHOLD and AGC_INCREASE_THRESHOLD
  1161. // - this is the happy place
  1162. // Below AGC_INCREASE_THRESHOLD
  1163. // - the input is pretty quiet, we should *consider* raising the input volume.
  1164. if (m_bPeak > AGC_REDUCTION_THRESHOLD)
  1165. {
  1166. // Too high! Reduce the volume and then reset the AGC state
  1167. // variables.
  1168. DPFX(DPFPREP, DVF_INFOLEVEL, "AGCVA1: Above reduction threshold, reducing volume level\n" );
  1169. if( lCurVolume > AGC_VOLUME_MINIMUM )
  1170. {
  1171. *plNewVolume = lCurVolume - AGC_VOLUME_DOWNTICK;
  1172. //m_fAGCLastFrameAdjusted = TRUE;
  1173. m_dwAGCBelowThresholdTime = 0;
  1174. }
  1175. // check to make sure we didn't just make it too low...
  1176. if( *plNewVolume < AGC_VOLUME_MINIMUM )
  1177. {
  1178. DPFX(DPFPREP, DVF_INFOLEVEL, "AGCVA1: Clamping volume to Min\n" );
  1179. *plNewVolume = AGC_VOLUME_MINIMUM;
  1180. }
  1181. }
  1182. else if (m_bPeak < AGC_INCREASE_THRESHOLD)
  1183. {
  1184. // Increase the time that the input has been too quiet
  1185. m_dwAGCBelowThresholdTime += m_dwFrameTime;
  1186. DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1: Now below increase threshold for %i milliseconds", m_dwAGCBelowThresholdTime);
  1187. // If things have been too quiet for too long, raise the
  1188. // volume level a tick, assuming we're not already maxed,
  1189. // then reset the AGC state vars
  1190. if( m_dwAGCBelowThresholdTime >= AGC_INCREASE_THRESHOLD_TIME &&
  1191. lCurVolume < AGC_VOLUME_MAXIMUM )
  1192. {
  1193. DPFX(DPFPREP, DVF_INFOLEVEL, "AGCVA1: Boosting volume level\n" );
  1194. *plNewVolume = lCurVolume + AGC_VOLUME_UPTICK;
  1195. //m_fAGCLastFrameAdjusted = TRUE;
  1196. m_dwAGCBelowThresholdTime = 0;
  1197. // check to make sure we didn't just make it too high...
  1198. if( *plNewVolume > AGC_VOLUME_MAXIMUM )
  1199. {
  1200. DPFX(DPFPREP, DVF_INFOLEVEL, "AGCVA1: Clamping volume to Max\n" );
  1201. *plNewVolume = AGC_VOLUME_MAXIMUM;
  1202. }
  1203. }
  1204. }
  1205. else
  1206. {
  1207. // We are nicely in the sweet spot. Not too loud, not too soft. Reset
  1208. // the below threshold count.
  1209. m_dwAGCBelowThresholdTime = 0;
  1210. DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1: between thresholds, resetting below threshold time: %i", m_dwAGCBelowThresholdTime);
  1211. }
  1212. */
  1213. }
  1214. }
  1215. m_lCurVolume = *plNewVolume;
  1216. // dump profiling data, in an easily importable format
  1217. DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:AGC,%i,%i,%i,%i,%i,%i,%i",
  1218. m_fVoiceDetectedThisFrame,
  1219. m_fDeadZoneDetected,
  1220. iVolumeLevel,
  1221. (int)(m_rgfAGCHistory[iVolumeLevel]*1000000),
  1222. m_iClippingSampleCount,
  1223. m_iNonClippingSampleCount,
  1224. m_lCurVolume);
  1225. return DV_OK;
  1226. }
  1227. #undef DPF_MODNAME
  1228. #define DPF_MODNAME "CAGCVA1::VAResults"
  1229. //
  1230. // VAResults - returns the VA results from the previous AnalyzeFrame call
  1231. //
  1232. // pfVoiceDetected - stuffed with TRUE if voice was detected in the data, FALSE otherwise
  1233. //
  1234. HRESULT CAGCVA1::VAResults(BOOL* pfVoiceDetected)
  1235. {
  1236. if (pfVoiceDetected != NULL)
  1237. {
  1238. *pfVoiceDetected = m_fVoiceDetectedThisFrame;
  1239. }
  1240. return DV_OK;
  1241. }
  1242. #undef DPF_MODNAME
  1243. #define DPF_MODNAME "CAGCVA1::PeakResults"
  1244. //
  1245. // PeakResults - returns the peak sample value from the previous AnalyzeFrame call,
  1246. // normalized to the range 0 to 99
  1247. //
  1248. // pfPeakValue - pointer to a byte where the peak value is written
  1249. //
  1250. HRESULT CAGCVA1::PeakResults(BYTE* pbPeakValue)
  1251. {
  1252. DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1: peak value: %i" , m_bPeak);
  1253. *pbPeakValue = m_bPeak;
  1254. return DV_OK;
  1255. }