Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1085 lines
35 KiB

/*==========================================================================
*
* Copyright (C) 1999 Microsoft Corporation. All Rights Reserved.
*
* File: agcva1.cpp
* Content: Concrete class that implements CAutoGainControl
*
* History:
* Date By Reason
* ==== == ======
* 12/01/99 pnewson Created it
* 01/14/2000 rodtoll Plugged memory leak
* 01/21/2000 pnewson Fixed false detection at start of audio stream
* Raised VA_LOW_ENVELOPE from (2<<8) to (3<<8)
* 01/24/2000 pnewson Fixed return code on Deinit
* 01/31/2000 pnewson re-add support for absence of DVCLIENTCONFIG_AUTOSENSITIVITY flag
* 02/08/2000 rodtoll Bug #131496 - Selecting DVSENSITIVITY_DEFAULT results in voice
* never being detected
* 03/03/2000 rodtoll Updated to handle alternative gamevoice build.
* 04/20/2000 rodtoll Bug #32889 - Unable to run on non-admin accounts on Win2k
* 04/20/2000 pnewson Tune AGC algorithm to make it more agressive at
* raising the recording volume.
* 04/25/2000 pnewson Fix to improve responsiveness of AGC when volume level too low
* 12/07/2000 rodtoll WinBugs #48379: DPVOICE: AGC appears to be functioning incorrectly (restoring to old algorithm(
*
***************************************************************************/
#include "dxvutilspch.h"
/*
How this voice activation code works:
The idea is this. The power of the noise signal is pretty much constant over
time. The power of a voice signal varies considerably over time. The power of
a voice signal is not always high however. Weak frictive noises and such do not
generate much power, but since they are part of a stream of speech, they represent
a dip in the power, not a constant low power like the noise signal. We therefore
associate changes in power with the presence of a voice signal.
If it works as expected, this will allow us to detect voice activity even
when the input volume, and therefore the total power of the signal, is very
low. This in turn will allow the auto gain control code to be more effective.
To estimate the power of the signal, we run the absolute value of the input signal
through a recursive digital low pass filter. This gives us the "envelope" signal.
[An alternative way to view this is a low frequency envelope signal modulated by a
higher frequency carrier signal. We're extracting the low frequency envelope signal.]
*/
#undef DPF_SUBCOMP
#define DPF_SUBCOMP DN_SUBCOMP_VOICE
// the registry names where the AGC stuff is saved
#define DPVOICE_REGISTRY_SAVEDAGCLEVEL L"SavedAGCLevel"
// AGC_VOLUME_TICKSIZE
//
// The amount the recording volume should be changed
// when AGC determines it is required.
#define AGC_VOLUME_TICKSIZE 100
/*
// AGC_VOLUME_UPTICK
//
// The amount the recording volume should be increased
// when the input level has been too low for a while.
#define AGC_VOLUME_UPTICK 125
// AGC_VOLUME_DOWNTICK
//
// The amount the recording volume should be increased
// when the input level has been too high for a while.
#define AGC_VOLUME_DOWNTICK 250
*/
// AGC_VOLUME_INITIAL_UPTICK
//
// When the AGC level is loaded from the registry, this
// amount is added to it as an initial boost, since it
// is much easier and faster to lower the recording level
// via AGC than it is to raise it.
#define AGC_VOLUME_INITIAL_UPTICK 500
// AGC_VOLUME_MINIMUM
//
// The minimum volume setting allowed.
// Make sure it's above 0, this mutes some cards
#define AGC_VOLUME_MINIMUM (DSBVOLUME_MIN+AGC_VOLUME_TICKSIZE)
// AGC_VOLUME_MAXIMUM
//
// The maximum volume setting allowed.
#define AGC_VOLUME_MAXIMUM DSBVOLUME_MAX
// AGC_VOLUME_LEVELS
//
// How many possible volume levels are there?
#define AGC_VOLUME_LEVELS ((DV_ABS(AGC_VOLUME_MAXIMUM - AGC_VOLUME_MINIMUM) / AGC_VOLUME_TICKSIZE) + 1)
/*
// AGC_REDUCTION_THRESHOLD
//
// The peak level at which the recording volume
// must be reduced
#define AGC_REDUCTION_THRESHOLD 98
// AGC_INCREASE_THRESHOLD
//
// If the user's input remains under this threshold
// for an extended period of time, we will consider
// raising the input level.
#define AGC_INCREASE_THRESHOLD 70
// AGC_INCREASE_THRESHOLD_TIME
//
// How long must the input remain uner the increase
// threshold to trigger in increase? (measured
// in milliseconds
#define AGC_INCREASE_THRESHOLD_TIME 500
*/
// AGC_PEAK_CLIPPING_THRESHOLD
//
// The peak value at or above which we consider the
// input signal to be clipping.
#define AGC_PEAK_CLIPPING_THRESHOLD 0x7e00
/*
// AGC_ENV_CLIPPING_THRESHOLD
//
// When we detect clipping via the threshold above,
// the 16 bit normalized envelope signal must be above
// this threshold for us to lower the input volume.
// This allows us to ignore intermittent spikes in
// the input.
#define AGC_ENV_CLIPPING_THRESHOLD 0x2000
// AGC_ENV_CLIPPING_COUNT_THRESHOLD
//
// For how many envelope samples does the envelope
// signal need to stay above the threshold value
// above in order to take the volume down a tick?
#define AGC_ENV_CLIPPING_COUNT_THRESHOLD 10
*/
// AGC_IDEAL_CLIPPING_RATIO
//
// What is the ideal ratio of clipped to total samples?
// E.g. a value of 0.005 says that we would like 5 out of
// every 1000 samples to clip. If we are getting less clipping,
// the volume should be increased. If we are getting more,
// the volume should be reduced.
//
// Note: only samples that are part of a frame detected as
// speech are considered.
#define AGC_IDEAL_CLIPPING_RATIO 0.0005
// AGC_CHANGE_THRESHOLD
//
// How far from the ideal does a volume level have to
// stray before we will consider changing the volume?
//
// E.g. If this value is 1.05, the history for a volume
// level would have to be 5% above or below the ideal
// value in order to have an AGC correction made.
#define AGC_CHANGE_THRESHOLD 1.01
// AGC_CLIPPING_HISTORY
//
// How many milliseconds of history should we keep regarding
// the clipping behavior at a particular volume setting?
// E.g. a value of 10000 means that we remember the last
// 10 seconds of activity at each volume level.
//
// Note: only samples that are part of a frame detected as
// speech are considered.
#define AGC_CLIPPING_HISTORY 1000
//#define AGC_CLIPPING_HISTORY 2000
//#define AGC_CLIPPING_HISTORY 5000
//#define AGC_CLIPPING_HISTORY 10000
//#define AGC_CLIPPING_HISTORY 30000 // it took AGC too long to recover
// from low volume leves with this
// setting
// AGC_FEEDBACK_ENV_THRESHOLD
//
// To detect a feedback condition, we check to see if the
// envelope signal has a value larger than AGC_FEEDBACK_ENV_THRESHOLD.
// If the envelope signal stays consistently above this level,
// for longer than AGC_FEEDBACK_TIME_THRESHOLD milliseconds, we conclude
// that feedback is occuring. Voice has a changing envelope, and will
// dip below the threshold on a regular basis. Feedback will not.
// This will allow us to automatically reduce the input volume
// when feedback is detected.
#define AGC_FEEDBACK_ENV_THRESHOLD 2500
#define AGC_FEEDBACK_TIME_THRESHOLD 1000
// AGC_DEADZONE_THRESHOLD
//
// If the input signal never goes above this value
// (16bits, promoted if required) for the deadzone time,
// then we consider the input to be in the dead zone,
// and the volume should be upticked.
// #define AGC_DEADZONE_THRESHOLD 0 // This is too low - it does not reliably detect the deadzone
#define AGC_DEADZONE_THRESHOLD (1 << 8)
// AGC_DEADZONE_TIME
//
// How long we have to be in the deadzone before
// the deadzone increase kicks in - we need this to
// be longer than just one frame, or we get false
// positives.
#define AGC_DEADZONE_TIME 1000
// VA_HIGH_DELTA
//
// If the percent change in the envelope signal is greater
// than this value, voice is detected. Each point of this
// value is equal to 0.1%. E.g. 4000 == 400% increase.
// An unchanging signal produces a 100% value.
//#define VA_HIGH_DELTA 2000
//#define VA_HIGH_DELTA_FASTSLOW 0x7fffffff // select this to factor out this VA parameter
//#define VA_HIGH_DELTA_FASTSLOW 1400
//#define VA_HIGH_DELTA_FASTSLOW 1375 // current choice
//#define VA_HIGH_DELTA_FASTSLOW 1350
//#define VA_HIGH_DELTA_FASTSLOW 1325
//#define VA_HIGH_DELTA_FASTSLOW 1300
//#define VA_HIGH_DELTA_FASTSLOW 1275
//#define VA_HIGH_DELTA_FASTSLOW 1250
//#define VA_HIGH_DELTA_FASTSLOW 1200
//#define VA_HIGH_DELTA_FASTSLOW 1175 // catches all noise
//#define VA_HIGH_DELTA_FASTSLOW 1150 // catches all noise
//#define VA_HIGH_DELTA_FASTSLOW 1125 // catches all noise
//#define VA_HIGH_DELTA_FASTSLOW 1100 // catches all noise
// VA_LOW_DELTA
//
// If the percent change in the envelope signal is lower
// than this value, voice is detected. Each point of this
// value is equal to 0.1%. E.g. 250 == 25% increase
// (i.e a decrease to 1/4 the original signal strength).
// An unchanging signal produces a 100% value.
//#define VA_LOW_DELTA 500
//#define VA_LOW_DELTA_FASTSLOW 0 // select this to factor out this VA parameter
//#define VA_LOW_DELTA_FASTSLOW 925
//#define VA_LOW_DELTA_FASTSLOW 900
//#define VA_LOW_DELTA_FASTSLOW 875
//#define VA_LOW_DELTA_FASTSLOW 850
//#define VA_LOW_DELTA_FASTSLOW 825
//#define VA_LOW_DELTA_FASTSLOW 800
//#define VA_LOW_DELTA_FASTSLOW 775 // current choice
//#define VA_LOW_DELTA_FASTSLOW 750
//#define VA_LOW_DELTA_FASTSLOW 725
//#define VA_LOW_DELTA_FASTSLOW 700
//#define VA_LOW_DELTA_FASTSLOW 675
//#define VA_LOW_DELTA_FASTSLOW 650
// The following VA parameters were optimized for what I believe to be
// the hardest configuration: A cheap open stick mic with external speakers,
// with Echo Suppression turned on. Echo suppression penalizes false positives
// harshly, since the receiver cannot send which receiving the "noise". If
// the VA parameters work for this case, then they should be fine for the
// much better signal to noise ratio provided by a headset or collar mic.
// (As long as the user does not breathe directly on the headset mic.)
//
// Two source-to-mic distances were tested during tuning.
//
// 1) Across an enclosed office (approx 8 to 10 feet)
// 2) Seated at the workstation (approx 16 to 20 inches)
//
// At distance 1, the AGC was never invoked, gain was at 100%
// At distance 2, the AGC would take the mic down a few ticks.
//
// The office enviroment had the background noise from 3 computers,
// a ceiling vent, and a surprisingly noisy fan from the ethernet
// hub. There is no background talking, cars, trains, or things of
// that nature.
//
// Each parameter was tuned separately to reject 100% of the
// background noise for case 1 (gain at 100%).
//
// Then they were tested together to see if they could detect
// across the room speech.
//
// Individually, none of the detection criteria could reliably
// detect all of the across the room speech. Together, they did
// not do much better. They even missed some speech while seated.
// Not very satifactory.
//
// Therefore, I decided to abandon the attempt to detect across
// the room speech. I retuned the parameters to reject noise
// after speaking while seated (which allowed AGC to reduce
// the volume a couple of ticks, thereby increasing the signal
// to noise ratio) and to reliably detect seated speech.
//
// I also found that the "fast" envelope signal was better at
// detecting speech than the "slow" one in a straight threshold
// comparison, so it is used in the VA tests.
//
// VA_HIGH_PERCENT
//
// If the fast envelope signal is more than this percentage
// higher than the slow envelope signal, speech is detected.
//
#define VA_HIGH_PERCENT 170 // rejects most noise, still catches some.
// decent voice detection. Catches the beginning
// of speech a majority of the time, but does miss
// once in a while. Will often drop out partway
// into a phrase when used alone. Must test in
// conjunction with VA_LOW_PERCENT.
//
// After testing in conjunction with VA_LOW_PERCENT,
// the performance is reasonable. Low input volume
// signals are usually detected ok, but dropouts are
// a bit common. However, noise is sometimes still
// detected, so making these parameters more sensitive
// would not be useful.
//#define VA_HIGH_PERCENT 165 // catches occational noise
//#define VA_HIGH_PERCENT 160 // catches too much noise
//#define VA_HIGH_PERCENT 150 // catches most noise
//#define VA_HIGH_PERCENT 140 // catches almost all noise
//#define VA_HIGH_PERCENT 0x00007fff // select this to factor out this VA parameter
// VA_LOW_PERCENT
//
// If the fast envelope signal is more than this percentage
// lower than the slow envelope signal, speech is detected.
//
#define VA_LOW_PERCENT 50 // excellent noise rejection. poor detection of speech.
// when used alone, could miss entire phrases. Must evaluate
// in conjunction with tuned VA_HIGH_PERCENT
//
// See note above re: testing in conjunction with VA_HIGH_PERCENT
//#define VA_LOW_PERCENT 55 // still catches too much noise
//#define VA_LOW_PERCENT 60 // catches most noise
//#define VA_LOW_PERCENT 65 // catches most noise
//#define VA_LOW_PERCENT 70 // still catches almost all noise
//#define VA_LOW_PERCENT 75 // catches almost all noise
//#define VA_LOW_PERCENT 80 // catches all noise
//#define VA_LOW_PERCENT 0 // select this to factor out this VA parameter
// VA_HIGH_ENVELOPE
//
// If the 16 bit normalized value of the envelope exceeds
// this number, the signal is considered voice.
//
//#define VA_HIGH_ENVELOPE (15 << 8) // still catches high gain noise, starting to get
// speech dropouts, when "p" sounds lower the gain
#define VA_HIGH_ENVELOPE (14 << 8) // Noise immunity good at "seated" S/N ratio. No speech
// dropouts encountered. Still catches noise at full gain.
//#define VA_HIGH_ENVELOPE (13 << 8) // Noise immunity not as good as expected (new day).
//#define VA_HIGH_ENVELOPE (12 << 8) // Good noise immunity. Speech recognition excellent.
// Only one dropout occured in the test with a 250ms
// hangover. I think the hangover time should be increased
// above 250 however, because a comma (properly read) tends
// to cause a dropout. I'm going to tune the hangover time,
// and return to this test.
//
// Hangover time is now 400ms. No dropouts occur with
// "seated" speech.
//#define VA_HIGH_ENVELOPE (11 << 8) // Catches almost no noise at "seated" gain
// however, if the gain creeped up a bit, noise would
// be detected. I therefore think a slightly higher
// threshold would be a good idea. The speech recognition
// based on only this parameter at this level was flawless.
// No dropouts at all with a 250 ms hangover time. (commas
// excepted).
//#define VA_HIGH_ENVELOPE (10 << 8) // catches some noise at "seated" gain - getting very close
//#define VA_HIGH_ENVELOPE (9 << 8) // catches some noise at "seated" gain - getting close
//#define VA_HIGH_ENVELOPE (8 << 8) // catches noise at "seated" gain
//#define VA_HIGH_ENVELOPE (7 << 8) // catches noise at "seated" gain
//#define VA_HIGH_ENVELOPE (0x7fffffff) // select this to factor out this VA parameter
// VA_LOW_ENVELOPE
//
// If the 16 bit normalized value of the envelope is below
// this number, the signal will never be considered voice.
// This reduces some false positives on the delta checks
// at very low signal levels
#define VA_LOW_ENVELOPE (3 << 8)
//#define VA_LOW_ENVELOPE (2 << 8) // causes false VA at low input volumes
//#define VA_LOW_ENVELOPE (1 << 8) // causes false VA at low input volumes
// VA_HANGOVER_TIME
//
// The time, in milliseconds, that voice activation sticks in
// the ON position following a voice detection. E.g. a value of 500
// means that voice will always be transmitted in at least 1/2 second
// bursts.
//
// I am trying to tune this so that a properly read comma will not cause
// a dropout. This will give the user a bit of leeway to pause in the
// speech stream without losing the floor when in Echo Suppression mode.
// It will also prevent dropouts even when not in Echo Suppression mode
#define VA_HANGOVER_TIME 400 // this gives satisfying performance
//#define VA_HANGOVER_TIME 375 // almost there, longest commas still goners
//#define VA_HANGOVER_TIME 350 // still drops long commas
//#define VA_HANGOVER_TIME 325 // does not drop fast commas, drops long ones
//#define VA_HANGOVER_TIME 300 // drops almost no commas, quite good
//#define VA_HANGOVER_TIME 275 // drops about half of the commas
//#define VA_HANGOVER_TIME 250 // commas are always dropped
// macros to avoid clib dependencies
#define DV_ABS(a) ((a) < 0 ? -(a) : (a))
#define DV_MAX(a, b) ((a) > (b) ? (a) : (b))
#define DV_MIN(a, b) ((a) < (b) ? (a) : (b))
// A function to lookup the log of n base 1.354 (sort of)
// where 0 <= n <= 127
//
// Why the heck do we care about log n base 1.354???
//
// What we need is a function that maps 0 to 127 down to 0 to 15
// in a nice, smooth non-linear fashion that has more fidelity at
// the low end than at the high end.
//
// The function is actually floor(log(n, 1.354), 1) to keep things
// in the integer realm.
//
// Why 1.354? Because log(128, 1.354) = 16, so we are using the full
// range from 0 to 15.
//
// This function also cheats and just defines fn(0) = 0 and fn(1) = 1
// for convenience.
BYTE DV_LOG_1_354_lookup_table[95] =
{
0, 1, 2, 3, 4, 5, 5, 6, // 0.. 7
6, 7, 7, 7, 8, 8, 8, 8, // 8.. 15
9, 9, 9, 9, 9, 10, 10, 10, // 16.. 23
10, 10, 10, 10, 10, 11, 11, 11, // 24.. 31
11, 11, 11, 11, 11, 11, 12, 12, // 32.. 39
12, 12, 12, 12, 12, 12, 12, 12, // 40.. 47
12, 12, 12, 12, 13, 13, 13, 13, // 48.. 55
13, 13, 13, 13, 13, 13, 13, 13, // 56.. 63
13, 13, 13, 13, 13, 13, 14, 14, // 64.. 71
14, 14, 14, 14, 14, 14, 14, 14, // 72.. 79
14, 14, 14, 14, 14, 14, 14, 14, // 80.. 87
14, 14, 14, 14, 14, 14, 14 // 88.. 94 - stop table at 94 here, everything above is 15
};
BYTE DV_log_1_354(BYTE n)
{
if (n > 94) return 15;
return DV_LOG_1_354_lookup_table[n];
}
// function to lookup the base 2 log of (n) where n is 16 bits unsigned
// except that we cheat and say that log_2 of zero is zero
// and we chop of any decimals.
BYTE DV_log_2(WORD n)
{
if (n & 0x8000)
{
return 0x0f;
}
if (n & 0x4000)
{
return 0x0e;
}
if (n & 0x2000)
{
return 0x0d;
}
if (n & 0x1000)
{
return 0x0c;
}
if (n & 0x0800)
{
return 0x0b;
}
if (n & 0x0400)
{
return 0x0a;
}
if (n & 0x0200)
{
return 0x09;
}
if (n & 0x0100)
{
return 0x08;
}
if (n & 0x0080)
{
return 0x07;
}
if (n & 0x0040)
{
return 0x06;
}
if (n & 0x0020)
{
return 0x05;
}
if (n & 0x0010)
{
return 0x04;
}
if (n & 0x0008)
{
return 0x03;
}
if (n & 0x0004)
{
return 0x02;
}
if (n & 0x0002)
{
return 0x01;
}
return 0x00;
}
#undef DPF_MODNAME
#define DPF_MODNAME "CAGCVA1::Init"
//
// Init - initializes the AGC and VA algorithms, including loading saved
// values from registry.
//
// dwFlags - the dwFlags from the dvClientConfig structure
// guidCaptureDevice - the capture device we're performing AGC for
// plInitVolume - the initial volume level is written here
//
HRESULT CAGCVA1::Init(
const WCHAR *wszBasePath,
DWORD dwFlags,
GUID guidCaptureDevice,
int iSampleRate,
int iBitsPerSample,
LONG* plInitVolume,
DWORD dwSensitivity)
{
// Remember the number of bits per sample, if valid
if (iBitsPerSample != 8 && iBitsPerSample != 16)
{
DPFX(DPFPREP,DVF_ERRORLEVEL, "Unexpected number of bits per sample!");
return DVERR_INVALIDPARAM;
}
m_iBitsPerSample = iBitsPerSample;
// Remember the flags
m_dwFlags = dwFlags;
// Remember the sensitivity
m_dwSensitivity = dwSensitivity;
// Figure out the shift constants for this sample rate
m_iShiftConstantFast = (DV_log_2((iSampleRate * 2) / 1000) + 1);
// This gives the slow filter a cutoff frequency 1/4 of
// the fast filter
m_iShiftConstantSlow = m_iShiftConstantFast + 2;
// Figure out how often we should sample the envelope signal
// to measure its change. This of course depends on the sample
// rate. The cutoff frequency allowed by the calculation
// above is between 40 and 80 Hz. Therefore we'll sample the
// envelope signal at about 100 Hz.
m_iEnvelopeSampleRate = iSampleRate / 100;
// Figure out the number of samples in the configured
// hangover time.
m_iHangoverSamples = (VA_HANGOVER_TIME * iSampleRate) / 1000;
m_iCurHangoverSamples = m_iHangoverSamples+1;
// Figure out the number of samples in the configured dead zone time
m_iDeadZoneSampleThreshold = (AGC_DEADZONE_TIME * iSampleRate) / 1000;
// Figure out the number of samples in the configured
// feedback threshold time.
m_iFeedbackSamples = (AGC_FEEDBACK_TIME_THRESHOLD * iSampleRate) / 1000;
// Start the envelope signal at zero
m_iCurEnvelopeValueFast = 0;
m_iCurEnvelopeValueSlow = 0;
m_iPrevEnvelopeSample = 0;
m_iCurSampleNum = 0;
// We're not clipping now
//m_fClipping = 0;
//m_iClippingCount = 0;
DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:INIT:%i,%i,%i,%i,%i",
iSampleRate,
m_iShiftConstantFast,
m_iShiftConstantSlow,
m_iEnvelopeSampleRate,
m_iHangoverSamples);
// Save the guid in our local member...
m_guidCaptureDevice = guidCaptureDevice;
wcscpy( m_wszRegPath, wszBasePath );
wcscat( m_wszRegPath, DPVOICE_REGISTRY_AGC );
// if the AGC reset flag is set, reset the AGC parameters,
// otherwise grab them from the registry
if (m_dwFlags & DVCLIENTCONFIG_AUTOVOLUMERESET)
{
m_lCurVolume = DSBVOLUME_MAX;
}
else
{
CRegistry cregBase;
if( !cregBase.Open( HKEY_CURRENT_USER, m_wszRegPath, FALSE, TRUE ) )
{
m_lCurVolume = DSBVOLUME_MAX;
}
else
{
CRegistry cregCapture;
if (!cregCapture.Open( cregBase.GetHandle(), &m_guidCaptureDevice ), FALSE, TRUE )
{
m_lCurVolume = DSBVOLUME_MAX;
}
if (!cregCapture.ReadDWORD( DPVOICE_REGISTRY_SAVEDAGCLEVEL, (DWORD*)&m_lCurVolume ))
{
m_lCurVolume = DSBVOLUME_MAX;
}
else
{
// boost the saved volume a bit
m_lCurVolume += AGC_VOLUME_INITIAL_UPTICK;
if (m_lCurVolume > DSBVOLUME_MAX)
{
m_lCurVolume = DSBVOLUME_MAX;
}
}
}
}
/*
// zero out the historgrams
memset(m_rgdwPeakHistogram, 0, CAGCVA1_HISTOGRAM_BUCKETS*sizeof(DWORD));
memset(m_rgdwZeroCrossingsHistogram, 0, CAGCVA1_HISTOGRAM_BUCKETS*sizeof(DWORD));
*/
// allocate the memory for the AGC history
m_rgfAGCHistory = new float[AGC_VOLUME_LEVELS];
if (m_rgfAGCHistory == NULL)
{
return DVERR_OUTOFMEMORY;
}
// initialize the history to the ideal value
for (int iIndex = 0; iIndex < AGC_VOLUME_LEVELS; ++iIndex)
{
m_rgfAGCHistory[iIndex] = (float)AGC_IDEAL_CLIPPING_RATIO;
}
m_dwHistorySamples = (iSampleRate * AGC_CLIPPING_HISTORY) / 1000;
// stuff the initial volume into the caller's variable
*plInitVolume = m_lCurVolume;
return DV_OK;
}
#undef DPF_MODNAME
#define DPF_MODNAME "CAGCVA1::Deinit"
//
// Deinit - saves the current AGC and VA state to the registry for use next session
//
HRESULT CAGCVA1::Deinit()
{
HRESULT hr = DV_OK;
CRegistry cregBase;
if(cregBase.Open( HKEY_CURRENT_USER, m_wszRegPath, FALSE, TRUE ) )
{
CRegistry cregDevice;
if (cregDevice.Open( cregBase.GetHandle(), &m_guidCaptureDevice, FALSE, TRUE))
{
if (!cregDevice.WriteDWORD( DPVOICE_REGISTRY_SAVEDAGCLEVEL, (DWORD&)m_lCurVolume ))
{
DPFX(DPFPREP,DVF_ERRORLEVEL, "Error writing AGC settings to registry");
hr = DVERR_WIN32;
}
}
else
{
DPFX(DPFPREP,DVF_ERRORLEVEL, "Error writing AGC settings to registry");
hr = DVERR_WIN32;
}
}
else
{
DPFX(DPFPREP,DVF_ERRORLEVEL, "Error writing AGC settings to registry");
hr = DVERR_WIN32;
}
delete [] m_rgfAGCHistory;
return hr;
}
#undef DPF_MODNAME
#define DPF_MODNAME "CAGCVA1::SetSensitivity"
HRESULT CAGCVA1::SetSensitivity(DWORD dwFlags, DWORD dwSensitivity)
{
if (dwFlags & DVCLIENTCONFIG_AUTOVOICEACTIVATED)
{
m_dwFlags |= DVCLIENTCONFIG_AUTOVOICEACTIVATED;
}
else
{
m_dwFlags &= ~DVCLIENTCONFIG_AUTOVOICEACTIVATED;
}
m_dwSensitivity = dwSensitivity;
return DV_OK;
}
#undef DPF_MODNAME
#define DPF_MODNAME "CAGCVA1::GetSensitivity"
HRESULT CAGCVA1::GetSensitivity(DWORD* pdwFlags, DWORD* pdwSensitivity)
{
if (m_dwFlags & DVCLIENTCONFIG_AUTORECORDVOLUME)
{
*pdwFlags |= DVCLIENTCONFIG_AUTORECORDVOLUME;
}
else
{
*pdwFlags &= ~DVCLIENTCONFIG_AUTORECORDVOLUME;
}
*pdwSensitivity = m_dwSensitivity;
return DV_OK;
}
#undef DPF_MODNAME
#define DPF_MODNAME "CAGCVA1::AnalyzeData"
//
// AnaylzeData - performs the AGC & VA calculations on one frame of audio
//
// pbAudioData - pointer to a buffer containing the audio data
// dwAudioDataSize - size, in bytes, of the audio data
//
HRESULT CAGCVA1::AnalyzeData(BYTE* pbAudioData, DWORD dwAudioDataSize /*, DWORD dwFrameTime*/)
{
int iMaxValue;
//int iValue;
int iValueAbs;
//int iZeroCrossings;
int iIndex;
int iMaxPossiblePeak;
int iNumberOfSamples;
//BYTE bPeak255;
//m_dwFrameTime = dwFrameTime;
if (dwAudioDataSize < 1)
{
DPFX(DPFPREP,DVF_ERRORLEVEL, "Error: Audio Data Size < 1");
return DVERR_INVALIDPARAM;
}
// new algorithm...
// cast the audio data to signed 16 bit integers
const signed short* psiAudioData = (signed short *)pbAudioData;
if (m_iBitsPerSample == 16)
{
iNumberOfSamples = dwAudioDataSize / 2;
iMaxPossiblePeak = 0x7fff;
}
else
{
iNumberOfSamples = dwAudioDataSize;
iMaxPossiblePeak = 0x7f00;
}
m_fDeadZoneDetected = TRUE;
m_iClippingSampleCount = 0;
m_iNonClippingSampleCount = 0;
m_fVoiceDetectedThisFrame = FALSE;
iMaxValue = 0;
for (iIndex = 0; iIndex < (int)iNumberOfSamples; ++iIndex)
{
++m_iCurSampleNum;
// extract a sample
if (m_iBitsPerSample == 8)
{
iValueAbs = DV_ABS((int)pbAudioData[iIndex] - 0x80);
// promote it to 16 bits
iValueAbs <<= 8;
}
else
{
iValueAbs = DV_ABS((int)psiAudioData[iIndex]);
}
// see if it is the new peak value
iMaxValue = DV_MAX(iValueAbs, iMaxValue);
// do the low pass filtering, but only if we are in autosensitivity mode
int iNormalizedCurEnvelopeValueFast;
int iNormalizedCurEnvelopeValueSlow;
if (m_dwFlags & DVCLIENTCONFIG_AUTOVOICEACTIVATED)
{
m_iCurEnvelopeValueFast =
iValueAbs +
(m_iCurEnvelopeValueFast - (m_iCurEnvelopeValueFast >> m_iShiftConstantFast));
iNormalizedCurEnvelopeValueFast = m_iCurEnvelopeValueFast >> m_iShiftConstantFast;
m_iCurEnvelopeValueSlow =
iValueAbs +
(m_iCurEnvelopeValueSlow - (m_iCurEnvelopeValueSlow >> m_iShiftConstantSlow));
iNormalizedCurEnvelopeValueSlow = m_iCurEnvelopeValueSlow >> m_iShiftConstantSlow;
// check to see if we consider this voice
if (iNormalizedCurEnvelopeValueFast > VA_LOW_ENVELOPE &&
(iNormalizedCurEnvelopeValueFast > VA_HIGH_ENVELOPE ||
iNormalizedCurEnvelopeValueFast > (VA_HIGH_PERCENT * iNormalizedCurEnvelopeValueSlow) / 100 ||
iNormalizedCurEnvelopeValueFast < (VA_LOW_PERCENT * iNormalizedCurEnvelopeValueSlow) / 100 ))
{
m_fVoiceDetectedNow = TRUE;
m_fVoiceDetectedThisFrame = TRUE;
m_fVoiceHangoverActive = TRUE;
m_iCurHangoverSamples = 0;
}
else
{
m_fVoiceDetectedNow = FALSE;
++m_iCurHangoverSamples;
if (m_iCurHangoverSamples > m_iHangoverSamples)
{
m_fVoiceHangoverActive = FALSE;
}
else
{
m_fVoiceHangoverActive = TRUE;
m_fVoiceDetectedThisFrame = TRUE;
}
}
}
/*
DPFX(DPFPREP,DVF_WARNINGLEVEL, "AGCVA1:VA,%i,%i,%i,%i,%i,%i",
iValueAbs,
iNormalizedCurEnvelopeValueFast,
iNormalizedCurEnvelopeValueSlow,
m_fVoiceDetectedNow,
m_fVoiceHangoverActive,
m_fVoiceDetectedThisFrame);
*/
// check for clipping
if (iValueAbs > AGC_PEAK_CLIPPING_THRESHOLD)
{
++m_iClippingSampleCount;
}
else
{
++m_iNonClippingSampleCount;
}
}
// Normalize the peak value to the range DVINPUTLEVEL_MIN to DVINPUTLEVEL_MAX
// This is what is returned for caller's peak meters...
m_bPeak = (BYTE)(DVINPUTLEVEL_MIN +
((iMaxValue * (DVINPUTLEVEL_MAX - DVINPUTLEVEL_MIN)) / iMaxPossiblePeak));
// if we are in manual VA mode (not autovolume) check the peak against
// the sensitivity threshold
if (!(m_dwFlags & DVCLIENTCONFIG_AUTOVOICEACTIVATED))
{
if (m_bPeak > m_dwSensitivity)
{
m_fVoiceDetectedThisFrame = TRUE;
}
}
// Check if we're in a deadzone
if (iMaxValue > AGC_DEADZONE_THRESHOLD)
{
m_fDeadZoneDetected = FALSE;
}
DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:ANA,%i,%i,%i,%i,%i,%i",
m_bPeak,
iMaxValue,
m_fVoiceDetectedThisFrame,
m_fDeadZoneDetected,
m_iClippingSampleCount,
m_iNonClippingSampleCount);
return DV_OK;
}
#undef DPF_MODNAME
#define DPF_MODNAME "CAGCVA1::AGCResults"
//
// AGCResults - returns the AGC results from the previous AnalyzeFrame call
//
// lCurVolume - the current recording volume
// plNewVolume - stuffed with the desired new recording volume
//
HRESULT CAGCVA1::AGCResults(LONG lCurVolume, LONG* plNewVolume, BOOL fTransmitFrame)
{
// default to keeping the same volume
*plNewVolume = lCurVolume;
// Figure out what volume level we're at
int iVolumeLevel = DV_MIN(DV_ABS(AGC_VOLUME_MAXIMUM - lCurVolume) / AGC_VOLUME_TICKSIZE,
AGC_VOLUME_LEVELS - 1);
//DPFX(DPFPREP, DVF_INFOLEVEL, "AGCVA1:AGC,Cur Volume:%i,%i",lCurVolume, iVolumeLevel);
// Don't make another adjustment if we have just done one.
// This ensures that when we start looking at input data
// again, it will be post-adjustment data.
if( m_fAGCLastFrameAdjusted )
{
m_fAGCLastFrameAdjusted = FALSE;
}
else
{
// check for a dead zone condition
if (m_fDeadZoneDetected /* || m_rgfAGCHistory[iVolumeLevel] == 0.0 */)
{
// We may be in the dead zone (volume way too low).
// Before we take the drastic action of sweepting the volume
// up, make sure we've been here long enough to be sure
// we're too low.
m_iDeadZoneSamples += (m_iClippingSampleCount + m_iNonClippingSampleCount);
if (m_iDeadZoneSamples > m_iDeadZoneSampleThreshold)
{
// The input volume has been lowered too far. We're not
// getting any input at all. To remedy this situation,
// we'll boost the volume now, but we'll also mark this
// volume level as off limits by setting its history to
// zero. That will prevent the volume from ever being
// dropped to this level again during this session.
if (iVolumeLevel != 0)
{
// We also reset the history of the volume level we are going to,
// so we start with a clean slate.
m_rgfAGCHistory[iVolumeLevel-1] = (const float)AGC_IDEAL_CLIPPING_RATIO;
*plNewVolume = DV_MIN(lCurVolume + AGC_VOLUME_TICKSIZE, AGC_VOLUME_MAXIMUM);
m_fAGCLastFrameAdjusted = TRUE;
}
}
}
else
{
m_iDeadZoneSamples = 0;
}
if (fTransmitFrame)
{
// Factor this frame's clipping ratio into the appropriate history bucket
m_rgfAGCHistory[iVolumeLevel] =
(m_iClippingSampleCount + (m_rgfAGCHistory[iVolumeLevel] * m_dwHistorySamples))
/ (m_iClippingSampleCount + m_iNonClippingSampleCount + m_dwHistorySamples);
if (m_rgfAGCHistory[iVolumeLevel] > AGC_IDEAL_CLIPPING_RATIO)
{
// Only consider lowering the volume if we clipped on this frame.
if (m_iClippingSampleCount > 0)
{
// we're clipping too much at this level, consider reducing
// the volume.
if (iVolumeLevel >= AGC_VOLUME_LEVELS - 1)
{
// we're already at the lowest volume level that we have
// a bucket for. Make sure we're clamped to the minimum
if (lCurVolume > AGC_VOLUME_MINIMUM)
{
*plNewVolume = AGC_VOLUME_MINIMUM;
m_fAGCLastFrameAdjusted = TRUE;
//DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:AGC,too much clipping, clamping volume to min: %i", *plNewVolume);
}
}
else
{
// Choose either this volume level, or the next lower
// one, depending on which has the history that is
// closest to the ideal.
float fCurDistanceFromIdeal = (float)(m_rgfAGCHistory[iVolumeLevel] / AGC_IDEAL_CLIPPING_RATIO);
if (fCurDistanceFromIdeal < 1.0)
{
fCurDistanceFromIdeal = (float)(1.0 / fCurDistanceFromIdeal);
}
float fLowerDistanceFromIdeal = (float)(m_rgfAGCHistory[iVolumeLevel+1] / (float)AGC_IDEAL_CLIPPING_RATIO);
if (fLowerDistanceFromIdeal < 1.0)
{
fLowerDistanceFromIdeal = (float)(1.0 / fLowerDistanceFromIdeal);
}
if (fLowerDistanceFromIdeal < fCurDistanceFromIdeal
&& fCurDistanceFromIdeal > AGC_CHANGE_THRESHOLD)
{
// The next lower volume level is closer to the ideal
// clipping ratio. Take the volume down a tick.
*plNewVolume = DV_MAX(lCurVolume - AGC_VOLUME_TICKSIZE, AGC_VOLUME_MINIMUM);
m_fAGCLastFrameAdjusted = TRUE;
//DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:AGC,too much clipping, setting volume to: %i", *plNewVolume);
}
}
}
}
else
{
// we're clipping too little at this level, consider increasing
// the volume.
if (iVolumeLevel == 0)
{
// We're already at the highest volume level.
// Make sure we're at the max
if (lCurVolume != AGC_VOLUME_MAXIMUM)
{
*plNewVolume = AGC_VOLUME_MAXIMUM;
m_fAGCLastFrameAdjusted = TRUE;
//DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:AGC,too little clipping, clamping volume to max: %i", *plNewVolume);
}
}
else
{
// We always increase the volume in this case, and let it push back down if
// it clips again. This will continue testing the upper volume limit, and
// help dig us out of "too low" volume holes.
*plNewVolume = DV_MIN(lCurVolume + AGC_VOLUME_TICKSIZE, AGC_VOLUME_MAXIMUM);
m_fAGCLastFrameAdjusted = TRUE;
}
}
}
}
m_lCurVolume = *plNewVolume;
// dump profiling data, in an easily importable format
DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1:AGC,%i,%i,%i,%i,%i,%i,%i",
m_fVoiceDetectedThisFrame,
m_fDeadZoneDetected,
iVolumeLevel,
(int)(m_rgfAGCHistory[iVolumeLevel]*1000000),
m_iClippingSampleCount,
m_iNonClippingSampleCount,
m_lCurVolume);
return DV_OK;
}
#undef DPF_MODNAME
#define DPF_MODNAME "CAGCVA1::VAResults"
//
// VAResults - returns the VA results from the previous AnalyzeFrame call
//
// pfVoiceDetected - stuffed with TRUE if voice was detected in the data, FALSE otherwise
//
HRESULT CAGCVA1::VAResults(BOOL* pfVoiceDetected)
{
if (pfVoiceDetected != NULL)
{
*pfVoiceDetected = m_fVoiceDetectedThisFrame;
}
return DV_OK;
}
#undef DPF_MODNAME
#define DPF_MODNAME "CAGCVA1::PeakResults"
//
// PeakResults - returns the peak sample value from the previous AnalyzeFrame call,
// normalized to the range 0 to 99
//
// pfPeakValue - pointer to a byte where the peak value is written
//
HRESULT CAGCVA1::PeakResults(BYTE* pbPeakValue)
{
DPFX(DPFPREP,DVF_INFOLEVEL, "AGCVA1: peak value: %i" , m_bPeak);
*pbPeakValue = m_bPeak;
return DV_OK;
}