Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

379 lines
13 KiB

  1. /*++
  2. Copyright (c) 1998 - 1999 Microsoft Corporation
  3. Module Name:
  4. timing.c
  5. Abstract: This module contains routines to perform X86 specific timing functions
  6. Environment:
  7. Kernel mode
  8. @@BEGIN_DDKSPLIT
  9. Author:
  10. MarcAnd 12-Oct-1998
  11. Revision History:
  12. @@END_DDKSPLIT
  13. --*/
  14. #include "hidgame.h"
  15. #ifdef ALLOC_PRAGMA
  16. #pragma alloc_text (PAGE, HGM_x86IsClockAvailable)
  17. #pragma alloc_text (PAGE, HGM_x86SampleClocks)
  18. #pragma alloc_text (PAGE, HGM_x86CounterInit)
  19. #endif
  20. /*****************************************************************************
  21. *
  22. * @doc INTERNAL
  23. *
  24. * @func LARGE_INTEGER | HGM_x86ReadCounter |
  25. *
  26. * Read the x86 CPU Time Stamp Counter
  27. * This function is not pageable as it is called from DISPATCH_LEVEL
  28. *
  29. * @parm IN PLARGE_INTEGER | Dummy |
  30. *
  31. * Unused parameter to match KeQueryPerformanceCounter
  32. *
  33. * @returns LARGE_INTEGER Counter value
  34. *
  35. *****************************************************************************/
  36. _declspec( naked ) LARGE_INTEGER EXTERNAL
  37. HGM_x86ReadCounter
  38. (
  39. IN PLARGE_INTEGER Dummy
  40. )
  41. {
  42. #define RDTSC __asm _emit 0x0f __asm _emit 0x31
  43. __asm RDTSC
  44. __asm ret SIZE Dummy
  45. }
  46. /*****************************************************************************
  47. *
  48. * @doc INTERNAL
  49. *
  50. * @func BOOLEAN | HGM_x86IsClockAvailable |
  51. *
  52. * Use direct processor interogation to see if the current CPU
  53. * supports the RDTSC instruction.
  54. *
  55. * @rvalue TRUE | instruction supported
  56. * @rvalue FALSE | instruction not supported
  57. *
  58. *****************************************************************************/
  59. BOOLEAN INTERNAL
  60. HGM_x86IsClockAvailable
  61. (
  62. VOID
  63. )
  64. {
  65. #define CPU_ID __asm _emit 0x0f __asm _emit 0xa2
  66. BOOLEAN rc = FALSE;
  67. __asm
  68. {
  69. pushfd // Store original EFLAGS on stack
  70. pop eax // Get original EFLAGS in EAX
  71. mov ecx, eax // Duplicate original EFLAGS in ECX for toggle check
  72. xor eax, 0x00200000L // Flip ID bit in EFLAGS
  73. push eax // Save new EFLAGS value on stack
  74. popfd // Replace current EFLAGS value
  75. pushfd // Store new EFLAGS on stack
  76. pop eax // Get new EFLAGS in EAX
  77. xor eax, ecx // Can we toggle ID bit?
  78. jz Done // Jump if no, Processor is older than a Pentium so CPU_ID is not supported
  79. mov eax, 1 // Set EAX to tell the CPUID instruction what to return
  80. push ebx // Don't corrupt EBX
  81. CPU_ID // Get family/model/stepping/features
  82. pop ebx
  83. test edx, 0x00000010L // Check if RDTSC is available
  84. jz Done // Jump if no
  85. }
  86. rc = TRUE;
  87. Done:
  88. return( rc );
  89. } /* HGM_IsRDTSCAvailable */
  90. /*****************************************************************************
  91. *
  92. * @doc INTERNAL
  93. *
  94. * @func VOID | HGM_x86SampleClocks |
  95. *
  96. * Sample the CPU time stamp counter and KeQueryPerformanceCounter
  97. * and retry until the time between samples does not improve for
  98. * three consecutive loops. This should ensure that the sampling is
  99. * done without interruption on the fastest time. It does not
  100. * mattter that the timing is not the same for all iterations as
  101. * any interruption should cause a much larger delay than small
  102. * differences in loop logic.
  103. * NOTE: Do not put any debug output in this routine as the counter
  104. * reported by KeQueryPerformanceCounter, depending on implementation,
  105. * may 'slip' relative to the CPU counter.
  106. *
  107. * @parm OUT PULONGLONG | pTSC |
  108. *
  109. * Pointer to a ULONGLONG into which sampled CPU time is stored.
  110. *
  111. * @parm OUT PULONGLONG | pQPC |
  112. *
  113. * Pointer to a ULONGLONG into which sampled performance counter is
  114. * stored.
  115. *
  116. *****************************************************************************/
  117. VOID INTERNAL
  118. HGM_x86SampleClocks
  119. (
  120. OUT PULONGLONG pTSC,
  121. OUT PULONGLONG pQPC
  122. )
  123. {
  124. ULONGLONG TestQPC;
  125. ULONGLONG TestTSC;
  126. ULONGLONG LastQPC;
  127. ULONGLONG Delta = (ULONGLONG)-1;
  128. int Retries = 3;
  129. /*
  130. * The first iteration of the loop below should always be
  131. * the best so far but just in case there's a timer glitch
  132. * set Retries anyway. If a timer is ever found to fail
  133. * by decrementing by 1 three times in a row Delta could be
  134. * tested and an abort return code added.
  135. */
  136. TestQPC = KeQueryPerformanceCounter( NULL ).QuadPart;
  137. do
  138. {
  139. LastQPC = TestQPC;
  140. /*
  141. * Keep the sampling as close together as we can
  142. */
  143. TestTSC = HGM_x86ReadCounter( NULL ).QuadPart;
  144. TestQPC = KeQueryPerformanceCounter( NULL ).QuadPart;
  145. /*
  146. * See if this is the quickest sample yet.
  147. * If it is, give it three more loops to get better still.
  148. */
  149. if( TestQPC - LastQPC < Delta )
  150. {
  151. Delta = TestQPC - LastQPC;
  152. Retries = 3;
  153. *pQPC = TestQPC;
  154. *pTSC = TestTSC;
  155. }
  156. else
  157. {
  158. Retries--;
  159. }
  160. } while( Retries );
  161. } /* HGM_x86SampleClocks */
  162. /*****************************************************************************
  163. *
  164. * @doc INTERNAL
  165. *
  166. * @func BOOLEAN | HGM_x86CounterInit |
  167. *
  168. * Detect and, if present, calibrate an x86 Time Stamp Counter.
  169. *
  170. * Windows 98 ntkern does not export KeNumberProcessors (even though
  171. * it is in wdm.h) so there is no really simple run-time test for
  172. * multiple processors. Given the remote chance of finding a system
  173. * with processors that do not symetrically support RDTSC assume that
  174. * the worst that can happen is very jittery axis data.
  175. * Better almost-symetric-multi-processor support could be added most
  176. * easily by dropping Windows 98 support and using non-WDM functions.
  177. *
  178. * @rvalue TRUE | specific counter function has been set up
  179. * @rvalue FALSE | no specific counter function set up, default needed
  180. *
  181. *****************************************************************************/
  182. BOOLEAN EXTERNAL
  183. HGM_x86CounterInit()
  184. {
  185. LARGE_INTEGER QPCFreq;
  186. BOOLEAN rf = FALSE;
  187. KeQueryPerformanceCounter( &QPCFreq );
  188. if( ( QPCFreq.HighPart == 0 )
  189. && ( QPCFreq.LowPart <= 10000 ) )
  190. {
  191. /*
  192. * If the performance counter is too slow to use, bail as there's
  193. * probably something more serious wrong. This is only a warning
  194. * as the caller will try again to use QPC for the default and will
  195. * make more fuss then if it fails there as well.
  196. */
  197. HGM_DBGPRINT(FILE_TIMING | HGM_WARN,\
  198. ("QPC unusable at reported %I64u Hz", QPCFreq.QuadPart ));
  199. }
  200. else if( !HGM_x86IsClockAvailable() )
  201. {
  202. HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
  203. ("No RDTSC available, using %I64u Hz QPC", QPCFreq.QuadPart ));
  204. }
  205. else if( QPCFreq.HighPart )
  206. {
  207. /*
  208. * If the query performance counter runs at at least 4GHz then it is
  209. * probably CPU based and this is plenty fast enough.
  210. * Use the QPC to reduce the risk of an extended delay causing an
  211. * overflow in the scale calculations.
  212. */
  213. HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
  214. ("QPC too fast not to use at %I64u Hz", QPCFreq.QuadPart ));
  215. }
  216. else
  217. {
  218. ULONGLONG QPCStart;
  219. ULONGLONG TSCStart;
  220. ULONGLONG QPCEnd;
  221. ULONGLONG TSCEnd;
  222. {
  223. LARGE_INTEGER Delay;
  224. Delay.QuadPart = -50000;
  225. /*
  226. * Trivial rejections are now out of the way. Get a pair of start
  227. * time samples, then delay for long enough to allow both timers to
  228. * increase by a significant amount, then get a pair of end samples.
  229. * KeDelayExecutionThread is used to delay 5ms but if the actual
  230. * delay is longer this is taken into account in the calculation.
  231. * see NOTE in HGM_x86SampleClocks about debug output.
  232. */
  233. HGM_x86SampleClocks( &TSCStart, &QPCStart );
  234. KeDelayExecutionThread(KernelMode, FALSE, &Delay);
  235. HGM_x86SampleClocks( &TSCEnd, &QPCEnd );
  236. }
  237. {
  238. LARGE_INTEGER TSCFreq;
  239. HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
  240. ("RDTSC: Start: %I64u End: %I64u delta: %I64u",
  241. TSCStart, TSCEnd, TSCEnd - TSCStart ));
  242. HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
  243. ("QPC: Start: %I64u End: %I64u delta: %I64u",
  244. QPCStart, QPCEnd, QPCEnd - QPCStart ));
  245. TSCFreq.QuadPart = (TSCEnd - TSCStart);
  246. if( TSCFreq.HighPart )
  247. {
  248. /*
  249. * Somehow the delay allowed the TSC to tick more than 2^32
  250. * times so bail as that would indicate a calibration error.
  251. */
  252. HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
  253. ("Clock sample failed, using %I64u Hz QPC",
  254. QPCFreq.QuadPart ));
  255. }
  256. else
  257. {
  258. /*
  259. * QPC_freq / QPC_sampled = TSC_freq / TSC_sampled
  260. * so
  261. * TSC_sampled * QPC_freq / QPC_sampled = TSC_freq
  262. */
  263. TSCFreq.QuadPart *= QPCFreq.QuadPart;
  264. HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
  265. ("TSC_sampled * QPC_freq: %I64u", TSCFreq.QuadPart ));
  266. TSCFreq.QuadPart /= QPCEnd - QPCStart;
  267. if( TSCFreq.LowPart < HIDGAME_SLOWEST_X86_HZ )
  268. {
  269. /*
  270. * If the value for TSC is less than the slowest CPU we
  271. * allow something probably went wrong in the calibration.
  272. */
  273. HGM_DBGPRINT(FILE_TIMING | HGM_ERROR,\
  274. ("TSC calibrated at %I64u Hz is too slow to be believed",
  275. TSCFreq.QuadPart ));
  276. }
  277. else
  278. {
  279. /*
  280. * The TSC looks usable so set up the global variables.
  281. */
  282. rf = TRUE;
  283. Global.ReadCounter = (COUNTER_FUNCTION)&HGM_x86ReadCounter;
  284. /*
  285. * There's no point in calibrating the TSC against QPC if QPC
  286. * is just returning TSC. So if the reported QPC frequency
  287. * is large enough to be a CPU counter and the sampled QPC is
  288. * very marginally larger than the TSC both before and after
  289. * the poll then just use the QPCFreq.
  290. */
  291. /*
  292. * HGM_x86SampleClocks always sets QPC last so it must be larger.
  293. * The QPC frequency divided by 2^20 is a little less than 1ms
  294. * worth of ticks which should be a reasonable test.
  295. */
  296. if( ( QPCFreq.LowPart > HIDGAME_SLOWEST_X86_HZ )
  297. &&( QPCStart > TSCStart )
  298. &&( QPCEnd > TSCEnd )
  299. &&( TSCEnd > QPCStart )
  300. &&( TSCStart + (QPCFreq.LowPart>>20) > QPCStart )
  301. &&( TSCEnd + (QPCFreq.LowPart>>20) > QPCEnd ) )
  302. {
  303. Global.CounterScale = CALCULATE_SCALE( QPCFreq.QuadPart );
  304. HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
  305. ("RDTSC at %I64u Hz assumed from QPC at %I64u Hz with scale %d",
  306. TSCFreq.QuadPart, QPCFreq.QuadPart, Global.CounterScale ));
  307. }
  308. else
  309. {
  310. Global.CounterScale = CALCULATE_SCALE( TSCFreq.QuadPart );
  311. HGM_DBGPRINT(FILE_TIMING | HGM_BABBLE,\
  312. ("RDTSC calibrated at %I64u Hz from QPC at %I64u Hz with scale %d",
  313. TSCFreq.QuadPart, QPCFreq.QuadPart, Global.CounterScale ));
  314. }
  315. }
  316. }
  317. }
  318. }
  319. return rf;
  320. } /* HGM_x86CounterInit */