You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
379 lines
13 KiB
379 lines
13 KiB
|
|
/*++
|
|
|
|
Copyright (c) 1998 - 1999 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
timing.c
|
|
|
|
Abstract: This module contains routines to perform X86 specific timing functions
|
|
|
|
Environment:
|
|
|
|
Kernel mode
|
|
|
|
@@BEGIN_DDKSPLIT
|
|
Author:
|
|
|
|
MarcAnd 12-Oct-1998
|
|
|
|
Revision History:
|
|
|
|
|
|
@@END_DDKSPLIT
|
|
|
|
--*/
|
|
|
|
#include "hidgame.h"
|
|
|
|
#ifdef ALLOC_PRAGMA
|
|
#pragma alloc_text (PAGE, HGM_x86IsClockAvailable)
|
|
#pragma alloc_text (PAGE, HGM_x86SampleClocks)
|
|
#pragma alloc_text (PAGE, HGM_x86CounterInit)
|
|
#endif
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* @doc INTERNAL
|
|
*
|
|
* @func LARGE_INTEGER | HGM_x86ReadCounter |
|
|
*
|
|
* Read the x86 CPU Time Stamp Counter
|
|
* This function is not pageable as it is called from DISPATCH_LEVEL
|
|
*
|
|
* @parm IN PLARGE_INTEGER | Dummy |
|
|
*
|
|
* Unused parameter to match KeQueryPerformanceCounter
|
|
*
|
|
* @returns LARGE_INTEGER Counter value
|
|
*
|
|
*****************************************************************************/
|
|
_declspec( naked ) LARGE_INTEGER EXTERNAL
|
|
HGM_x86ReadCounter
|
|
(
|
|
IN PLARGE_INTEGER Dummy
|
|
)
|
|
{
|
|
#define RDTSC __asm _emit 0x0f __asm _emit 0x31
|
|
__asm RDTSC
|
|
__asm ret SIZE Dummy
|
|
}
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* @doc INTERNAL
|
|
*
|
|
* @func BOOLEAN | HGM_x86IsClockAvailable |
|
|
*
|
|
* Use direct processor interogation to see if the current CPU
|
|
* supports the RDTSC instruction.
|
|
*
|
|
* @rvalue TRUE | instruction supported
|
|
* @rvalue FALSE | instruction not supported
|
|
*
|
|
*****************************************************************************/
|
|
|
|
BOOLEAN INTERNAL
|
|
HGM_x86IsClockAvailable
|
|
(
|
|
VOID
|
|
)
|
|
{
|
|
#define CPU_ID __asm _emit 0x0f __asm _emit 0xa2
|
|
|
|
BOOLEAN rc = FALSE;
|
|
|
|
__asm
|
|
{
|
|
pushfd // Store original EFLAGS on stack
|
|
pop eax // Get original EFLAGS in EAX
|
|
mov ecx, eax // Duplicate original EFLAGS in ECX for toggle check
|
|
xor eax, 0x00200000L // Flip ID bit in EFLAGS
|
|
push eax // Save new EFLAGS value on stack
|
|
popfd // Replace current EFLAGS value
|
|
pushfd // Store new EFLAGS on stack
|
|
pop eax // Get new EFLAGS in EAX
|
|
xor eax, ecx // Can we toggle ID bit?
|
|
jz Done // Jump if no, Processor is older than a Pentium so CPU_ID is not supported
|
|
mov eax, 1 // Set EAX to tell the CPUID instruction what to return
|
|
push ebx // Don't corrupt EBX
|
|
CPU_ID // Get family/model/stepping/features
|
|
pop ebx
|
|
test edx, 0x00000010L // Check if RDTSC is available
|
|
jz Done // Jump if no
|
|
}
|
|
|
|
rc = TRUE;
|
|
Done:
|
|
return( rc );
|
|
} /* HGM_IsRDTSCAvailable */
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* @doc INTERNAL
|
|
*
|
|
* @func VOID | HGM_x86SampleClocks |
|
|
*
|
|
* Sample the CPU time stamp counter and KeQueryPerformanceCounter
|
|
* and retry until the time between samples does not improve for
|
|
* three consecutive loops. This should ensure that the sampling is
|
|
* done without interruption on the fastest time. It does not
|
|
* mattter that the timing is not the same for all iterations as
|
|
* any interruption should cause a much larger delay than small
|
|
* differences in loop logic.
|
|
* NOTE: Do not put any debug output in this routine as the counter
|
|
* reported by KeQueryPerformanceCounter, depending on implementation,
|
|
* may 'slip' relative to the CPU counter.
|
|
*
|
|
* @parm OUT PULONGLONG | pTSC |
|
|
*
|
|
* Pointer to a ULONGLONG into which sampled CPU time is stored.
|
|
*
|
|
* @parm OUT PULONGLONG | pQPC |
|
|
*
|
|
* Pointer to a ULONGLONG into which sampled performance counter is
|
|
* stored.
|
|
*
|
|
*****************************************************************************/
|
|
VOID INTERNAL
|
|
HGM_x86SampleClocks
|
|
(
|
|
OUT PULONGLONG pTSC,
|
|
OUT PULONGLONG pQPC
|
|
)
|
|
{
|
|
ULONGLONG TestQPC;
|
|
ULONGLONG TestTSC;
|
|
ULONGLONG LastQPC;
|
|
ULONGLONG Delta = (ULONGLONG)-1;
|
|
int Retries = 3;
|
|
/*
|
|
* The first iteration of the loop below should always be
|
|
* the best so far but just in case there's a timer glitch
|
|
* set Retries anyway. If a timer is ever found to fail
|
|
* by decrementing by 1 three times in a row Delta could be
|
|
* tested and an abort return code added.
|
|
*/
|
|
|
|
TestQPC = KeQueryPerformanceCounter( NULL ).QuadPart;
|
|
|
|
do
|
|
{
|
|
LastQPC = TestQPC;
|
|
/*
|
|
* Keep the sampling as close together as we can
|
|
*/
|
|
TestTSC = HGM_x86ReadCounter( NULL ).QuadPart;
|
|
TestQPC = KeQueryPerformanceCounter( NULL ).QuadPart;
|
|
|
|
/*
|
|
* See if this is the quickest sample yet.
|
|
* If it is, give it three more loops to get better still.
|
|
*/
|
|
if( TestQPC - LastQPC < Delta )
|
|
{
|
|
Delta = TestQPC - LastQPC;
|
|
Retries = 3;
|
|
*pQPC = TestQPC;
|
|
*pTSC = TestTSC;
|
|
}
|
|
else
|
|
{
|
|
Retries--;
|
|
}
|
|
} while( Retries );
|
|
|
|
|
|
} /* HGM_x86SampleClocks */
|
|
|
|
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* @doc INTERNAL
|
|
*
|
|
* @func BOOLEAN | HGM_x86CounterInit |
|
|
*
|
|
* Detect and, if present, calibrate an x86 Time Stamp Counter.
|
|
*
|
|
* Windows 98 ntkern does not export KeNumberProcessors (even though
|
|
* it is in wdm.h) so there is no really simple run-time test for
|
|
* multiple processors. Given the remote chance of finding a system
|
|
* with processors that do not symetrically support RDTSC assume that
|
|
* the worst that can happen is very jittery axis data.
|
|
* Better almost-symetric-multi-processor support could be added most
|
|
* easily by dropping Windows 98 support and using non-WDM functions.
|
|
*
|
|
* @rvalue TRUE | specific counter function has been set up
|
|
* @rvalue FALSE | no specific counter function set up, default needed
|
|
*
|
|
*****************************************************************************/
|
|
|
|
BOOLEAN EXTERNAL
|
|
HGM_x86CounterInit()
|
|
{
|
|
LARGE_INTEGER QPCFreq;
|
|
BOOLEAN rf = FALSE;
|
|
|
|
KeQueryPerformanceCounter( &QPCFreq );
|
|
|
|
if( ( QPCFreq.HighPart == 0 )
|
|
&& ( QPCFreq.LowPart <= 10000 ) )
|
|
{
|
|
/*
|
|
* If the performance counter is too slow to use, bail as there's
|
|
* probably something more serious wrong. This is only a warning
|
|
* as the caller will try again to use QPC for the default and will
|
|
* make more fuss then if it fails there as well.
|
|
*/
|
|
HGM_DBGPRINT(FILE_TIMING | HGM_WARN,\
|
|
("QPC unusable at reported %I64u Hz", QPCFreq.QuadPart ));
|
|
}
|
|
else if( !HGM_x86IsClockAvailable() )
|
|
{
|
|
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
|
|
("No RDTSC available, using %I64u Hz QPC", QPCFreq.QuadPart ));
|
|
}
|
|
else if( QPCFreq.HighPart )
|
|
{
|
|
/*
|
|
* If the query performance counter runs at at least 4GHz then it is
|
|
* probably CPU based and this is plenty fast enough.
|
|
* Use the QPC to reduce the risk of an extended delay causing an
|
|
* overflow in the scale calculations.
|
|
*/
|
|
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
|
|
("QPC too fast not to use at %I64u Hz", QPCFreq.QuadPart ));
|
|
}
|
|
else
|
|
{
|
|
ULONGLONG QPCStart;
|
|
ULONGLONG TSCStart;
|
|
ULONGLONG QPCEnd;
|
|
ULONGLONG TSCEnd;
|
|
|
|
{
|
|
LARGE_INTEGER Delay;
|
|
|
|
Delay.QuadPart = -50000;
|
|
|
|
/*
|
|
* Trivial rejections are now out of the way. Get a pair of start
|
|
* time samples, then delay for long enough to allow both timers to
|
|
* increase by a significant amount, then get a pair of end samples.
|
|
* KeDelayExecutionThread is used to delay 5ms but if the actual
|
|
* delay is longer this is taken into account in the calculation.
|
|
* see NOTE in HGM_x86SampleClocks about debug output.
|
|
*/
|
|
HGM_x86SampleClocks( &TSCStart, &QPCStart );
|
|
|
|
KeDelayExecutionThread(KernelMode, FALSE, &Delay);
|
|
|
|
HGM_x86SampleClocks( &TSCEnd, &QPCEnd );
|
|
}
|
|
|
|
{
|
|
LARGE_INTEGER TSCFreq;
|
|
|
|
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
|
|
("RDTSC: Start: %I64u End: %I64u delta: %I64u",
|
|
TSCStart, TSCEnd, TSCEnd - TSCStart ));
|
|
|
|
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
|
|
("QPC: Start: %I64u End: %I64u delta: %I64u",
|
|
QPCStart, QPCEnd, QPCEnd - QPCStart ));
|
|
|
|
|
|
TSCFreq.QuadPart = (TSCEnd - TSCStart);
|
|
|
|
if( TSCFreq.HighPart )
|
|
{
|
|
/*
|
|
* Somehow the delay allowed the TSC to tick more than 2^32
|
|
* times so bail as that would indicate a calibration error.
|
|
*/
|
|
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
|
|
("Clock sample failed, using %I64u Hz QPC",
|
|
QPCFreq.QuadPart ));
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* QPC_freq / QPC_sampled = TSC_freq / TSC_sampled
|
|
* so
|
|
* TSC_sampled * QPC_freq / QPC_sampled = TSC_freq
|
|
*/
|
|
|
|
TSCFreq.QuadPart *= QPCFreq.QuadPart;
|
|
|
|
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
|
|
("TSC_sampled * QPC_freq: %I64u", TSCFreq.QuadPart ));
|
|
|
|
TSCFreq.QuadPart /= QPCEnd - QPCStart;
|
|
|
|
if( TSCFreq.LowPart < HIDGAME_SLOWEST_X86_HZ )
|
|
{
|
|
/*
|
|
* If the value for TSC is less than the slowest CPU we
|
|
* allow something probably went wrong in the calibration.
|
|
*/
|
|
HGM_DBGPRINT(FILE_TIMING | HGM_ERROR,\
|
|
("TSC calibrated at %I64u Hz is too slow to be believed",
|
|
TSCFreq.QuadPart ));
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* The TSC looks usable so set up the global variables.
|
|
*/
|
|
rf = TRUE;
|
|
|
|
Global.ReadCounter = (COUNTER_FUNCTION)&HGM_x86ReadCounter;
|
|
|
|
/*
|
|
* There's no point in calibrating the TSC against QPC if QPC
|
|
* is just returning TSC. So if the reported QPC frequency
|
|
* is large enough to be a CPU counter and the sampled QPC is
|
|
* very marginally larger than the TSC both before and after
|
|
* the poll then just use the QPCFreq.
|
|
*/
|
|
|
|
/*
|
|
* HGM_x86SampleClocks always sets QPC last so it must be larger.
|
|
* The QPC frequency divided by 2^20 is a little less than 1ms
|
|
* worth of ticks which should be a reasonable test.
|
|
*/
|
|
if( ( QPCFreq.LowPart > HIDGAME_SLOWEST_X86_HZ )
|
|
&&( QPCStart > TSCStart )
|
|
&&( QPCEnd > TSCEnd )
|
|
&&( TSCEnd > QPCStart )
|
|
&&( TSCStart + (QPCFreq.LowPart>>20) > QPCStart )
|
|
&&( TSCEnd + (QPCFreq.LowPart>>20) > QPCEnd ) )
|
|
{
|
|
Global.CounterScale = CALCULATE_SCALE( QPCFreq.QuadPart );
|
|
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
|
|
("RDTSC at %I64u Hz assumed from QPC at %I64u Hz with scale %d",
|
|
TSCFreq.QuadPart, QPCFreq.QuadPart, Global.CounterScale ));
|
|
}
|
|
else
|
|
{
|
|
Global.CounterScale = CALCULATE_SCALE( TSCFreq.QuadPart );
|
|
HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
|
|
("RDTSC calibrated at %I64u Hz from QPC at %I64u Hz with scale %d",
|
|
TSCFreq.QuadPart, QPCFreq.QuadPart, Global.CounterScale ));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return rf;
|
|
} /* HGM_x86CounterInit */
|
|
|