Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

379 lines
13 KiB

/*++
Copyright (c) 1998 - 1999 Microsoft Corporation
Module Name:
timing.c
Abstract: This module contains routines to perform X86 specific timing functions
Environment:
Kernel mode
@@BEGIN_DDKSPLIT
Author:
MarcAnd 12-Oct-1998
Revision History:
@@END_DDKSPLIT
--*/
#include "hidgame.h"
#ifdef ALLOC_PRAGMA
#pragma alloc_text (PAGE, HGM_x86IsClockAvailable)
#pragma alloc_text (PAGE, HGM_x86SampleClocks)
#pragma alloc_text (PAGE, HGM_x86CounterInit)
#endif
/*****************************************************************************
*
* @doc INTERNAL
*
* @func LARGE_INTEGER | HGM_x86ReadCounter |
*
* Read the x86 CPU Time Stamp Counter
* This function is not pageable as it is called from DISPATCH_LEVEL
*
* @parm IN PLARGE_INTEGER | Dummy |
*
* Unused parameter to match KeQueryPerformanceCounter
*
* @returns LARGE_INTEGER Counter value
*
*****************************************************************************/
_declspec( naked ) LARGE_INTEGER EXTERNAL
HGM_x86ReadCounter
(
IN PLARGE_INTEGER Dummy
)
{
#define RDTSC __asm _emit 0x0f __asm _emit 0x31
__asm RDTSC
__asm ret SIZE Dummy
}
/*****************************************************************************
*
* @doc INTERNAL
*
* @func BOOLEAN | HGM_x86IsClockAvailable |
*
* Use direct processor interogation to see if the current CPU
* supports the RDTSC instruction.
*
* @rvalue TRUE | instruction supported
* @rvalue FALSE | instruction not supported
*
*****************************************************************************/
BOOLEAN INTERNAL
HGM_x86IsClockAvailable
(
VOID
)
{
#define CPU_ID __asm _emit 0x0f __asm _emit 0xa2
BOOLEAN rc = FALSE;
__asm
{
pushfd // Store original EFLAGS on stack
pop eax // Get original EFLAGS in EAX
mov ecx, eax // Duplicate original EFLAGS in ECX for toggle check
xor eax, 0x00200000L // Flip ID bit in EFLAGS
push eax // Save new EFLAGS value on stack
popfd // Replace current EFLAGS value
pushfd // Store new EFLAGS on stack
pop eax // Get new EFLAGS in EAX
xor eax, ecx // Can we toggle ID bit?
jz Done // Jump if no, Processor is older than a Pentium so CPU_ID is not supported
mov eax, 1 // Set EAX to tell the CPUID instruction what to return
push ebx // Don't corrupt EBX
CPU_ID // Get family/model/stepping/features
pop ebx
test edx, 0x00000010L // Check if RDTSC is available
jz Done // Jump if no
}
rc = TRUE;
Done:
return( rc );
} /* HGM_IsRDTSCAvailable */
/*****************************************************************************
*
* @doc INTERNAL
*
* @func VOID | HGM_x86SampleClocks |
*
* Sample the CPU time stamp counter and KeQueryPerformanceCounter
* and retry until the time between samples does not improve for
* three consecutive loops. This should ensure that the sampling is
* done without interruption on the fastest time. It does not
* mattter that the timing is not the same for all iterations as
* any interruption should cause a much larger delay than small
* differences in loop logic.
* NOTE: Do not put any debug output in this routine as the counter
* reported by KeQueryPerformanceCounter, depending on implementation,
* may 'slip' relative to the CPU counter.
*
* @parm OUT PULONGLONG | pTSC |
*
* Pointer to a ULONGLONG into which sampled CPU time is stored.
*
* @parm OUT PULONGLONG | pQPC |
*
* Pointer to a ULONGLONG into which sampled performance counter is
* stored.
*
*****************************************************************************/
VOID INTERNAL
HGM_x86SampleClocks
(
OUT PULONGLONG pTSC,
OUT PULONGLONG pQPC
)
{
ULONGLONG TestQPC;
ULONGLONG TestTSC;
ULONGLONG LastQPC;
ULONGLONG Delta = (ULONGLONG)-1;
int Retries = 3;
/*
* The first iteration of the loop below should always be
* the best so far but just in case there's a timer glitch
* set Retries anyway. If a timer is ever found to fail
* by decrementing by 1 three times in a row Delta could be
* tested and an abort return code added.
*/
TestQPC = KeQueryPerformanceCounter( NULL ).QuadPart;
do
{
LastQPC = TestQPC;
/*
* Keep the sampling as close together as we can
*/
TestTSC = HGM_x86ReadCounter( NULL ).QuadPart;
TestQPC = KeQueryPerformanceCounter( NULL ).QuadPart;
/*
* See if this is the quickest sample yet.
* If it is, give it three more loops to get better still.
*/
if( TestQPC - LastQPC < Delta )
{
Delta = TestQPC - LastQPC;
Retries = 3;
*pQPC = TestQPC;
*pTSC = TestTSC;
}
else
{
Retries--;
}
} while( Retries );
} /* HGM_x86SampleClocks */
/*****************************************************************************
*
* @doc INTERNAL
*
* @func BOOLEAN | HGM_x86CounterInit |
*
* Detect and, if present, calibrate an x86 Time Stamp Counter.
*
* Windows 98 ntkern does not export KeNumberProcessors (even though
* it is in wdm.h) so there is no really simple run-time test for
* multiple processors. Given the remote chance of finding a system
* with processors that do not symetrically support RDTSC assume that
* the worst that can happen is very jittery axis data.
* Better almost-symetric-multi-processor support could be added most
* easily by dropping Windows 98 support and using non-WDM functions.
*
* @rvalue TRUE | specific counter function has been set up
* @rvalue FALSE | no specific counter function set up, default needed
*
*****************************************************************************/
BOOLEAN EXTERNAL
HGM_x86CounterInit()
{
LARGE_INTEGER QPCFreq;
BOOLEAN rf = FALSE;
KeQueryPerformanceCounter( &QPCFreq );
if( ( QPCFreq.HighPart == 0 )
&& ( QPCFreq.LowPart <= 10000 ) )
{
/*
* If the performance counter is too slow to use, bail as there's
* probably something more serious wrong. This is only a warning
* as the caller will try again to use QPC for the default and will
* make more fuss then if it fails there as well.
*/
HGM_DBGPRINT(FILE_TIMING | HGM_WARN,\
("QPC unusable at reported %I64u Hz", QPCFreq.QuadPart ));
}
else if( !HGM_x86IsClockAvailable() )
{
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
("No RDTSC available, using %I64u Hz QPC", QPCFreq.QuadPart ));
}
else if( QPCFreq.HighPart )
{
/*
* If the query performance counter runs at at least 4GHz then it is
* probably CPU based and this is plenty fast enough.
* Use the QPC to reduce the risk of an extended delay causing an
* overflow in the scale calculations.
*/
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
("QPC too fast not to use at %I64u Hz", QPCFreq.QuadPart ));
}
else
{
ULONGLONG QPCStart;
ULONGLONG TSCStart;
ULONGLONG QPCEnd;
ULONGLONG TSCEnd;
{
LARGE_INTEGER Delay;
Delay.QuadPart = -50000;
/*
* Trivial rejections are now out of the way. Get a pair of start
* time samples, then delay for long enough to allow both timers to
* increase by a significant amount, then get a pair of end samples.
* KeDelayExecutionThread is used to delay 5ms but if the actual
* delay is longer this is taken into account in the calculation.
* see NOTE in HGM_x86SampleClocks about debug output.
*/
HGM_x86SampleClocks( &TSCStart, &QPCStart );
KeDelayExecutionThread(KernelMode, FALSE, &Delay);
HGM_x86SampleClocks( &TSCEnd, &QPCEnd );
}
{
LARGE_INTEGER TSCFreq;
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
("RDTSC: Start: %I64u End: %I64u delta: %I64u",
TSCStart, TSCEnd, TSCEnd - TSCStart ));
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
("QPC: Start: %I64u End: %I64u delta: %I64u",
QPCStart, QPCEnd, QPCEnd - QPCStart ));
TSCFreq.QuadPart = (TSCEnd - TSCStart);
if( TSCFreq.HighPart )
{
/*
* Somehow the delay allowed the TSC to tick more than 2^32
* times so bail as that would indicate a calibration error.
*/
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
("Clock sample failed, using %I64u Hz QPC",
QPCFreq.QuadPart ));
}
else
{
/*
* QPC_freq / QPC_sampled = TSC_freq / TSC_sampled
* so
* TSC_sampled * QPC_freq / QPC_sampled = TSC_freq
*/
TSCFreq.QuadPart *= QPCFreq.QuadPart;
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
("TSC_sampled * QPC_freq: %I64u", TSCFreq.QuadPart ));
TSCFreq.QuadPart /= QPCEnd - QPCStart;
if( TSCFreq.LowPart < HIDGAME_SLOWEST_X86_HZ )
{
/*
* If the value for TSC is less than the slowest CPU we
* allow something probably went wrong in the calibration.
*/
HGM_DBGPRINT(FILE_TIMING | HGM_ERROR,\
("TSC calibrated at %I64u Hz is too slow to be believed",
TSCFreq.QuadPart ));
}
else
{
/*
* The TSC looks usable so set up the global variables.
*/
rf = TRUE;
Global.ReadCounter = (COUNTER_FUNCTION)&HGM_x86ReadCounter;
/*
* There's no point in calibrating the TSC against QPC if QPC
* is just returning TSC. So if the reported QPC frequency
* is large enough to be a CPU counter and the sampled QPC is
* very marginally larger than the TSC both before and after
* the poll then just use the QPCFreq.
*/
/*
* HGM_x86SampleClocks always sets QPC last so it must be larger.
* The QPC frequency divided by 2^20 is a little less than 1ms
* worth of ticks which should be a reasonable test.
*/
if( ( QPCFreq.LowPart > HIDGAME_SLOWEST_X86_HZ )
&&( QPCStart > TSCStart )
&&( QPCEnd > TSCEnd )
&&( TSCEnd > QPCStart )
&&( TSCStart + (QPCFreq.LowPart>>20) > QPCStart )
&&( TSCEnd + (QPCFreq.LowPart>>20) > QPCEnd ) )
{
Global.CounterScale = CALCULATE_SCALE( QPCFreq.QuadPart );
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
("RDTSC at %I64u Hz assumed from QPC at %I64u Hz with scale %d",
TSCFreq.QuadPart, QPCFreq.QuadPart, Global.CounterScale ));
}
else
{
Global.CounterScale = CALCULATE_SCALE( TSCFreq.QuadPart );
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
("RDTSC calibrated at %I64u Hz from QPC at %I64u Hz with scale %d",
TSCFreq.QuadPart, QPCFreq.QuadPart, Global.CounterScale ));
}
}
}
}
}
return rf;
} /* HGM_x86CounterInit */