/*++ Copyright (c) 1998 - 1999 Microsoft Corporation Module Name: timing.c Abstract: This module contains routines to perform X86 specific timing functions Environment: Kernel mode @@BEGIN_DDKSPLIT Author: MarcAnd 12-Oct-1998 Revision History: @@END_DDKSPLIT --*/ #include "hidgame.h" #ifdef ALLOC_PRAGMA #pragma alloc_text (PAGE, HGM_x86IsClockAvailable) #pragma alloc_text (PAGE, HGM_x86SampleClocks) #pragma alloc_text (PAGE, HGM_x86CounterInit) #endif /***************************************************************************** * * @doc INTERNAL * * @func LARGE_INTEGER | HGM_x86ReadCounter | * * Read the x86 CPU Time Stamp Counter * This function is not pageable as it is called from DISPATCH_LEVEL * * @parm IN PLARGE_INTEGER | Dummy | * * Unused parameter to match KeQueryPerformanceCounter * * @returns LARGE_INTEGER Counter value * *****************************************************************************/ _declspec( naked ) LARGE_INTEGER EXTERNAL HGM_x86ReadCounter ( IN PLARGE_INTEGER Dummy ) { #define RDTSC __asm _emit 0x0f __asm _emit 0x31 __asm RDTSC __asm ret SIZE Dummy } /***************************************************************************** * * @doc INTERNAL * * @func BOOLEAN | HGM_x86IsClockAvailable | * * Use direct processor interogation to see if the current CPU * supports the RDTSC instruction. * * @rvalue TRUE | instruction supported * @rvalue FALSE | instruction not supported * *****************************************************************************/ BOOLEAN INTERNAL HGM_x86IsClockAvailable ( VOID ) { #define CPU_ID __asm _emit 0x0f __asm _emit 0xa2 BOOLEAN rc = FALSE; __asm { pushfd // Store original EFLAGS on stack pop eax // Get original EFLAGS in EAX mov ecx, eax // Duplicate original EFLAGS in ECX for toggle check xor eax, 0x00200000L // Flip ID bit in EFLAGS push eax // Save new EFLAGS value on stack popfd // Replace current EFLAGS value pushfd // Store new EFLAGS on stack pop eax // Get new EFLAGS in EAX xor eax, ecx // Can we toggle ID bit? jz Done // Jump if no, Processor is older than a Pentium so CPU_ID is not supported mov eax, 1 // Set EAX to tell the CPUID instruction what to return push ebx // Don't corrupt EBX CPU_ID // Get family/model/stepping/features pop ebx test edx, 0x00000010L // Check if RDTSC is available jz Done // Jump if no } rc = TRUE; Done: return( rc ); } /* HGM_IsRDTSCAvailable */ /***************************************************************************** * * @doc INTERNAL * * @func VOID | HGM_x86SampleClocks | * * Sample the CPU time stamp counter and KeQueryPerformanceCounter * and retry until the time between samples does not improve for * three consecutive loops. This should ensure that the sampling is * done without interruption on the fastest time. It does not * mattter that the timing is not the same for all iterations as * any interruption should cause a much larger delay than small * differences in loop logic. * NOTE: Do not put any debug output in this routine as the counter * reported by KeQueryPerformanceCounter, depending on implementation, * may 'slip' relative to the CPU counter. * * @parm OUT PULONGLONG | pTSC | * * Pointer to a ULONGLONG into which sampled CPU time is stored. * * @parm OUT PULONGLONG | pQPC | * * Pointer to a ULONGLONG into which sampled performance counter is * stored. * *****************************************************************************/ VOID INTERNAL HGM_x86SampleClocks ( OUT PULONGLONG pTSC, OUT PULONGLONG pQPC ) { ULONGLONG TestQPC; ULONGLONG TestTSC; ULONGLONG LastQPC; ULONGLONG Delta = (ULONGLONG)-1; int Retries = 3; /* * The first iteration of the loop below should always be * the best so far but just in case there's a timer glitch * set Retries anyway. If a timer is ever found to fail * by decrementing by 1 three times in a row Delta could be * tested and an abort return code added. */ TestQPC = KeQueryPerformanceCounter( NULL ).QuadPart; do { LastQPC = TestQPC; /* * Keep the sampling as close together as we can */ TestTSC = HGM_x86ReadCounter( NULL ).QuadPart; TestQPC = KeQueryPerformanceCounter( NULL ).QuadPart; /* * See if this is the quickest sample yet. * If it is, give it three more loops to get better still. */ if( TestQPC - LastQPC < Delta ) { Delta = TestQPC - LastQPC; Retries = 3; *pQPC = TestQPC; *pTSC = TestTSC; } else { Retries--; } } while( Retries ); } /* HGM_x86SampleClocks */ /***************************************************************************** * * @doc INTERNAL * * @func BOOLEAN | HGM_x86CounterInit | * * Detect and, if present, calibrate an x86 Time Stamp Counter. * * Windows 98 ntkern does not export KeNumberProcessors (even though * it is in wdm.h) so there is no really simple run-time test for * multiple processors. Given the remote chance of finding a system * with processors that do not symetrically support RDTSC assume that * the worst that can happen is very jittery axis data. * Better almost-symetric-multi-processor support could be added most * easily by dropping Windows 98 support and using non-WDM functions. * * @rvalue TRUE | specific counter function has been set up * @rvalue FALSE | no specific counter function set up, default needed * *****************************************************************************/ BOOLEAN EXTERNAL HGM_x86CounterInit() { LARGE_INTEGER QPCFreq; BOOLEAN rf = FALSE; KeQueryPerformanceCounter( &QPCFreq ); if( ( QPCFreq.HighPart == 0 ) && ( QPCFreq.LowPart <= 10000 ) ) { /* * If the performance counter is too slow to use, bail as there's * probably something more serious wrong. This is only a warning * as the caller will try again to use QPC for the default and will * make more fuss then if it fails there as well. */ HGM_DBGPRINT(FILE_TIMING | HGM_WARN,\ ("QPC unusable at reported %I64u Hz", QPCFreq.QuadPart )); } else if( !HGM_x86IsClockAvailable() ) { HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\ ("No RDTSC available, using %I64u Hz QPC", QPCFreq.QuadPart )); } else if( QPCFreq.HighPart ) { /* * If the query performance counter runs at at least 4GHz then it is * probably CPU based and this is plenty fast enough. * Use the QPC to reduce the risk of an extended delay causing an * overflow in the scale calculations. */ HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\ ("QPC too fast not to use at %I64u Hz", QPCFreq.QuadPart )); } else { ULONGLONG QPCStart; ULONGLONG TSCStart; ULONGLONG QPCEnd; ULONGLONG TSCEnd; { LARGE_INTEGER Delay; Delay.QuadPart = -50000; /* * Trivial rejections are now out of the way. Get a pair of start * time samples, then delay for long enough to allow both timers to * increase by a significant amount, then get a pair of end samples. * KeDelayExecutionThread is used to delay 5ms but if the actual * delay is longer this is taken into account in the calculation. * see NOTE in HGM_x86SampleClocks about debug output. */ HGM_x86SampleClocks( &TSCStart, &QPCStart ); KeDelayExecutionThread(KernelMode, FALSE, &Delay); HGM_x86SampleClocks( &TSCEnd, &QPCEnd ); } { LARGE_INTEGER TSCFreq; HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\ ("RDTSC: Start: %I64u End: %I64u delta: %I64u", TSCStart, TSCEnd, TSCEnd - TSCStart )); HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\ ("QPC: Start: %I64u End: %I64u delta: %I64u", QPCStart, QPCEnd, QPCEnd - QPCStart )); TSCFreq.QuadPart = (TSCEnd - TSCStart); if( TSCFreq.HighPart ) { /* * Somehow the delay allowed the TSC to tick more than 2^32 * times so bail as that would indicate a calibration error. */ HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\ ("Clock sample failed, using %I64u Hz QPC", QPCFreq.QuadPart )); } else { /* * QPC_freq / QPC_sampled = TSC_freq / TSC_sampled * so * TSC_sampled * QPC_freq / QPC_sampled = TSC_freq */ TSCFreq.QuadPart *= QPCFreq.QuadPart; HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\ ("TSC_sampled * QPC_freq: %I64u", TSCFreq.QuadPart )); TSCFreq.QuadPart /= QPCEnd - QPCStart; if( TSCFreq.LowPart < HIDGAME_SLOWEST_X86_HZ ) { /* * If the value for TSC is less than the slowest CPU we * allow something probably went wrong in the calibration. */ HGM_DBGPRINT(FILE_TIMING | HGM_ERROR,\ ("TSC calibrated at %I64u Hz is too slow to be believed", TSCFreq.QuadPart )); } else { /* * The TSC looks usable so set up the global variables. */ rf = TRUE; Global.ReadCounter = (COUNTER_FUNCTION)&HGM_x86ReadCounter; /* * There's no point in calibrating the TSC against QPC if QPC * is just returning TSC. So if the reported QPC frequency * is large enough to be a CPU counter and the sampled QPC is * very marginally larger than the TSC both before and after * the poll then just use the QPCFreq. */ /* * HGM_x86SampleClocks always sets QPC last so it must be larger. * The QPC frequency divided by 2^20 is a little less than 1ms * worth of ticks which should be a reasonable test. */ if( ( QPCFreq.LowPart > HIDGAME_SLOWEST_X86_HZ ) &&( QPCStart > TSCStart ) &&( QPCEnd > TSCEnd ) &&( TSCEnd > QPCStart ) &&( TSCStart + (QPCFreq.LowPart>>20) > QPCStart ) &&( TSCEnd + (QPCFreq.LowPart>>20) > QPCEnd ) ) { Global.CounterScale = CALCULATE_SCALE( QPCFreq.QuadPart ); HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\ ("RDTSC at %I64u Hz assumed from QPC at %I64u Hz with scale %d", TSCFreq.QuadPart, QPCFreq.QuadPart, Global.CounterScale )); } else { Global.CounterScale = CALCULATE_SCALE( TSCFreq.QuadPart ); HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\ ("RDTSC calibrated at %I64u Hz from QPC at %I64u Hz with scale %d", TSCFreq.QuadPart, QPCFreq.QuadPart, Global.CounterScale )); } } } } } return rf; } /* HGM_x86CounterInit */