Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

888 lines
20 KiB

  1. /*++
  2. Copyright (c) 1999-2001 Microsoft Corporation. All Rights Reserved.
  3. Module Name:
  4. cpu.h
  5. Abstract:
  6. This module contains code for processor identification, as well as code for
  7. calibrating the processor clock speed.
  8. Author:
  9. Joseph Ballantyne
  10. Environment:
  11. Kernel Mode
  12. Revision History:
  13. --*/
  14. // Processor related defines and macros.
  15. // Supported architectures.
  16. #define X86 1
  17. // Supported manufacturers.
  18. #define INTEL 1
  19. #define AMD 2
  20. // Feature bit locations in the CPU id instruction supported features dword.
  21. #define FXSR (1<<24)
  22. #define MMX (1<<23)
  23. #define APIC (1<<9)
  24. #define MSR (1<<5)
  25. #define TSC (1<<4)
  26. #define FPU (1<<0)
  27. #define OSFXSR 0x200
  28. #define MAXTIMERREADSLOTS 33
  29. #define LIMITTOTALTIMERREADS 64
  30. #define DEFAULTMAXTIMERCOUNT 0x12
  31. // Instructions not supported by the inline assembler.
  32. #define cpuid __asm _emit 0x0f __asm _emit 0xa2
  33. #define rdtsc __asm _emit 0x0f __asm _emit 0x31
  34. // Macro to force serialization of instruction execution.
  35. // This is the fastest possible way to force instruction serialization for
  36. // Pentium processors. It may not be the fastest on PII, PIII or P4
  37. // processors.
  38. #define SerializeExecution() \
  39. __asm { \
  40. __asm sub esp,8 \
  41. __asm sidt [esp] \
  42. __asm lidt [esp] \
  43. __asm add esp,8 \
  44. }
  45. // Structures for supporting processor identification and clock speed calibration.
  46. // The CPUINFO structure is used to store/access the results of the CPU ID instruction.
  47. typedef struct {
  48. ULONG eax;
  49. ULONG ebx;
  50. ULONG edx;
  51. ULONG ecx;
  52. } CPUINFO;
  53. // The TimerReadInfo structure is used by the cpu clock speed calibration code.
  54. // Note that we want this structure to be 32 byte aligned.
  55. // This allows us to force the whole thing into cache by just touching
  56. // the first element. We depend on this in our code for getting
  57. // stuff into the cache, so don't change it.
  58. typedef struct {
  59. ULONGLONG firstreadtimestamp;
  60. ULONGLONG lastreadtimestamp;
  61. ULONG count;
  62. ULONG value;
  63. ULONGLONG align;
  64. } TimerReadInfo;
  65. // These globals store information about the processor we are running on.
  66. ULONG CPUArchitecture=0;
  67. ULONG CPUManufacturer=0;
  68. LONG CPUFamily=0xffffffff;
  69. LONG CPUModel=0xffffffff;
  70. LONG CPUStepping=0xffffffff;
  71. ULONGLONG CPUFeatures=0;
  72. // TimerReadInfo stores timing meassurements as they are taken.
  73. TimerReadInfo TimerRead[MAXTIMERREADSLOTS];
  74. #pragma warning ( disable : 4035 )
  75. ULONG
  76. CpuIdOk (
  77. VOID
  78. )
  79. /*++
  80. Routine Description:
  81. Determines if the processor supports running the CPUID instruction.
  82. It does so by checking if the state of bit 21 of the eflags register
  83. can be changed.
  84. Arguments:
  85. None.
  86. Return Value:
  87. FALSE (0) if processor does not support the CPUID instruction.
  88. TRUE (bit 21 set to 1) if processor does support CPUID.
  89. --*/
  90. {
  91. __asm {
  92. // First save eflags.
  93. pushfd
  94. // Load eflags into eax.
  95. mov eax, [esp]
  96. // Flip state of bit 21 and put it back into eflags.
  97. xor eax,1<<21
  98. push eax
  99. popfd
  100. // Read new eflags into eax.
  101. pushfd
  102. pop eax
  103. // See if bit 21 state change stuck. If so, we have CPUID instruction support
  104. // and we will return with bit 21 set, otherwise there is no CPUID support and
  105. // we will return zero.
  106. xor eax, [esp]
  107. // Restore orignal eflags.
  108. popfd
  109. }
  110. }
  111. #pragma warning ( default : 4035 )
  112. BOOLEAN
  113. GetCpuId (
  114. ULONG index,
  115. CPUINFO *cpuinfo
  116. )
  117. /*++
  118. Routine Description:
  119. Checks whether the processor supports the CPUID instruction. If not, returns
  120. FALSE. Otherwise, runs the CPUID instruction with the supplied index and
  121. writes the output into the location pointed to by cpuinfo if it is not NULL.
  122. Arguments:
  123. index - Supplies a value that selects which data should be returned by CPUID.
  124. cpuinfo - Supplies a pointer to a CPUINFO structure into which the CPUID data
  125. will be written. Can be NULL in which case no data is returned.
  126. Return Value:
  127. TRUE if the CPUID instruction is supported by the processor and was run for
  128. this call of the function.
  129. FALSE if the CPUID instruction is not supported. Nothing is written into
  130. cpuinfo in that case.
  131. --*/
  132. {
  133. static ULONG initialized=0;
  134. // Check if the processor supports the CPUID instruction.
  135. // This is a one time check.
  136. if (!initialized) {
  137. initialized=1+CpuIdOk();
  138. }
  139. // If the CPUID instruction is supported, then run it.
  140. if (initialized&(1<<21)) {
  141. // The CPU supports CPU ID. Do it.
  142. __asm {
  143. mov eax,index
  144. // Now make clear to the compiler that the cpuid instruction
  145. // will blow away the contents of these registers. The inline
  146. // assembler does not support the cpuid instruction, so the compiler
  147. // thinks edx, ebx, and ecx do not change in this function. Not so.
  148. xor edx,edx
  149. xor ebx,ebx
  150. xor ecx,ecx
  151. cpuid
  152. // Now, if we were passed a valid pointer for the data, then store it.
  153. mov esi,cpuinfo
  154. or esi,esi
  155. jz nullptr
  156. mov [esi],eax
  157. mov [esi+4],ebx
  158. mov [esi+8],edx
  159. mov [esi+12],ecx
  160. nullptr:
  161. }
  162. return TRUE;
  163. }
  164. // CPU does not support CPU ID.
  165. return FALSE;
  166. }
  167. BOOL
  168. GetProcessorInfo (
  169. VOID
  170. )
  171. /*++
  172. Routine Description:
  173. Loads globals containing information about the processor architecture, manufacturer
  174. family, model, stepping and features. Currently supports AMD and Intel processors.
  175. Arguments:
  176. None.
  177. Return Value:
  178. TRUE if the processor supports CPUID instruction and the manufacturer is known
  179. and supported. Currently Intel and AMD are supported.
  180. FALSE otherwise.
  181. --*/
  182. {
  183. CPUINFO thecpu;
  184. if (!GetCpuId(0, &thecpu)) {
  185. return FALSE;
  186. }
  187. if (thecpu.ebx==0x756e6547 && thecpu.edx==0x49656e69 && thecpu.ecx==0x6c65746e ) {
  188. CPUManufacturer=INTEL;
  189. if (thecpu.eax) {
  190. GetCpuId(1, &thecpu);
  191. CPUFamily=(thecpu.eax>>8)&0xf;
  192. CPUModel=(thecpu.eax>>4)&0xf;
  193. CPUStepping=(thecpu.eax)&0xf;
  194. CPUArchitecture=X86;
  195. CPUFeatures=thecpu.edx;
  196. return TRUE;
  197. }
  198. return FALSE;
  199. }
  200. if (thecpu.ebx==0x68747541 && thecpu.edx==0x69746e65 && thecpu.ecx==0x444d4163 ) {
  201. CPUManufacturer=AMD;
  202. if (thecpu.eax) {
  203. GetCpuId(1, &thecpu);
  204. CPUFamily=(thecpu.eax>>8)&0xf;
  205. CPUModel=(thecpu.eax>>4)&0xf;
  206. CPUStepping=(thecpu.eax)&0xf;
  207. CPUArchitecture=X86;
  208. CPUFeatures=thecpu.edx;
  209. return TRUE;
  210. }
  211. return FALSE;
  212. }
  213. CPUManufacturer=(ULONG)-1;
  214. return FALSE;
  215. }
  216. // Note that all of the timing functions should be in locked memory. Note also
  217. // that I should make sure that they are in the cache - preferably in the primary CPU
  218. // cache, but at least in the secondary cache.
  219. // We use timer 0 on the PC motherboard for our calibration of the CPU
  220. // cycle counter.
  221. #pragma warning ( disable : 4035 )
  222. LONGLONG
  223. __inline
  224. ReadCycleCounter (
  225. VOID
  226. )
  227. {
  228. __asm {
  229. rdtsc
  230. }
  231. }
  232. ULONG
  233. ReadPcTimer (
  234. VOID
  235. )
  236. {
  237. __asm {
  238. xor eax,eax
  239. out 0x43,al
  240. in al,0x40
  241. mov ecx,eax
  242. in al,0x40
  243. shl eax,8
  244. or eax,ecx
  245. }
  246. }
  247. ULONG
  248. __inline
  249. FastPcTimerRead (
  250. VOID
  251. )
  252. {
  253. __asm {
  254. xor eax,eax
  255. in al,0x41
  256. }
  257. }
  258. #pragma warning ( default : 4035 )
  259. VOID
  260. SetupPcTimer (
  261. VOID
  262. )
  263. {
  264. __asm {
  265. #if 0
  266. // The following code is for using timer 2. I have decided to use timer 0.
  267. // First turn off PC speaker and turn ON timer 2 gate.
  268. mov eax,1
  269. out 0x61,al
  270. // Now read from timer port to clear any previously latched data.
  271. // In case someone else latched data but never read it.
  272. in al,0x42
  273. in al,0x42
  274. // Now latch timer 2 status and make sure mode and latch type OK.
  275. mov eax,0xe8
  276. out 0x43,al
  277. in al,0x42
  278. and eax,0x37
  279. cmp eax,0x36
  280. jz timersetupok
  281. int 3
  282. timersetupok:
  283. #endif
  284. // Now read from timer port to clear any previously latched data.
  285. // In case someone else latched data but never read it.
  286. // Note that we do three reads in case both status and a 2 byte
  287. // count were latched.
  288. in al,0x40
  289. in al,0x40
  290. in al,0x40
  291. // Now latch timer 0 status and make sure mode and latch type OK.
  292. mov eax,0xe2
  293. out 0x43,al
  294. in al,0x40
  295. and eax,0x37
  296. cmp eax,0x34
  297. jz timersetupok
  298. int 3
  299. timersetupok:
  300. }
  301. // Now since we are NOT reprogramming the mode, or the count of the timer, we
  302. // do not know what count the timer counter will autoreload. Therefore, we
  303. // cannot make an accurate timing measurement if the timer wraps. Therefore
  304. // we wait here until the timer has a positive count that is large enough to
  305. // ensure that we can calibrate our delay loop without having the timer wrap.
  306. // Note that this does mean we WILL have an extra delay here waiting for the timer
  307. // to wrap. Note that we should also make sure that we disable the timer interrupts
  308. // so that they don't cause us to wait even longer.
  309. while (ReadPcTimer()<20)
  310. ;
  311. }
  312. // IMPORTANT! Note that it is possible though VERY unlikely that someone
  313. // could have sent a command to latch BOTH the count and the status
  314. // and then never done the reads. To clear out any latched data we
  315. // would thus need to do 3 reads. One for status, and 2 for possibly
  316. // 16 bits of latched data.
  317. // In this function, we check setup of the motherboard PC timer 1.
  318. // It should be setup for mode 2 counting, for doing single byte reads,
  319. // and for reloading a max count of 0x12. We verify this and return
  320. // true if so, false otherwise.
  321. BOOL
  322. SetupFastPcTimer (
  323. VOID
  324. )
  325. {
  326. __asm {
  327. // Read from timer port to clear any previously latched data.
  328. // We do 3 reads in case the timer was programmed for 16 bit reads
  329. // and both status and count were latched.
  330. in al,0x41
  331. in al,0x41
  332. in al,0x41
  333. // Now latch timer 2 status and make sure mode and latch type OK.
  334. mov eax,0xe4
  335. out 0x43,al
  336. in al,0x41
  337. and eax,0x37
  338. cmp eax,0x14
  339. jz timersetupok
  340. }
  341. Trap();
  342. return FALSE;
  343. timersetupok:
  344. return TRUE;
  345. }
  346. // Note that this function assumes that there is a cycle counter
  347. // in the machine and that calling ReadCycleCounter will read
  348. // it correctly.
  349. // If this function cannot successfully measure cyclespertick, it
  350. // returns 0.
  351. ULONG
  352. MeasureCPUCyclesPerTick (
  353. VOID
  354. )
  355. {
  356. ULONG i;
  357. ULONG ticks;
  358. LONGLONG cyclecount;
  359. ULONG totaltimerreads;
  360. ULONG fasttimermaxvalue,counterwrapped;
  361. ULONG cyclespertick;
  362. static ULONG largesttimermaxvalue=0;
  363. #if DEBUG
  364. static ULONG maxcyclespertick=0,mincyclespertick=0xffffffff;
  365. #endif
  366. // Make sure that the PC motherboard timer we will use is setup
  367. // correctly. If not, then punt.
  368. if (!SetupFastPcTimer()) {
  369. return 0;
  370. }
  371. // Get the variables we will use in the cache.
  372. cyclecount=0;
  373. for (i=0;i<MAXTIMERREADSLOTS;i++) {
  374. cyclecount+=TimerRead[i].firstreadtimestamp;
  375. }
  376. i=1;
  377. ticks=0;
  378. cyclecount=0;
  379. totaltimerreads=0;
  380. fasttimermaxvalue=0;
  381. counterwrapped=FALSE;
  382. // Get the functions we call while making measurements into the cache
  383. FastPcTimerRead();
  384. SerializeExecution();
  385. ReadCycleCounter();
  386. // Setup for the first time through the loop.
  387. TimerRead[i-1].value=0xffffffff;
  388. TimerRead[i-1].lastreadtimestamp=ReadCycleCounter();
  389. while (i<MAXTIMERREADSLOTS && totaltimerreads<LIMITTOTALTIMERREADS) {
  390. // Count total number of timer reads.
  391. totaltimerreads++;
  392. // Read the timer value and timestamp the end of the read
  393. // with the cycle counter.
  394. ticks=FastPcTimerRead();
  395. SerializeExecution();
  396. cyclecount=ReadCycleCounter();
  397. // Track the max count.
  398. if (ticks>fasttimermaxvalue) {
  399. fasttimermaxvalue=ticks;
  400. }
  401. // Track if the counter wraps.
  402. // IMPORTANT! We depend here on TimerRead[0].value ALWAYS being larger
  403. // than any value we can read from the hardware.
  404. if (ticks>TimerRead[i-1].value) {
  405. counterwrapped=TRUE;
  406. }
  407. // Make sure the cyclecounter didn't get blown away between this and
  408. // the last measurement. If so, then punt.
  409. // IMPORTANT! We depend here on TimerRead[0].lastreadtimestamp
  410. // being correctly initialized outside this loop.
  411. if ((LONGLONG)(cyclecount-TimerRead[i-1].lastreadtimestamp)<0) {
  412. Trap();
  413. return 0;
  414. }
  415. // If we are on the same tick as the last read, then update
  416. // the lastreadtimestamp and count for the previous set of reads.
  417. // IMPORTANT! We depend here on TimerRead[0].value ALWAYS being
  418. // different from any value we can read from the hardware.
  419. if (ticks==TimerRead[i-1].value) {
  420. TimerRead[i-1].lastreadtimestamp=cyclecount;
  421. TimerRead[i-1].count++;
  422. continue;
  423. }
  424. // If we get here, we have a new timer value. So log the new
  425. // value.
  426. TimerRead[i].firstreadtimestamp=cyclecount;
  427. TimerRead[i].lastreadtimestamp=cyclecount;
  428. TimerRead[i].value=ticks;
  429. TimerRead[i].count=1;
  430. // Point to next timer read slot.
  431. i++;
  432. }
  433. // Make sure that we don't try to calibrate with a broken timer.
  434. if (totaltimerreads>=LIMITTOTALTIMERREADS) {
  435. Trap();
  436. return 0;
  437. }
  438. // Track the largest value read from timer 1 since boot.
  439. if (fasttimermaxvalue>largesttimermaxvalue) {
  440. largesttimermaxvalue=fasttimermaxvalue;
  441. // Make sure the max count is OK.
  442. if (largesttimermaxvalue>DEFAULTMAXTIMERCOUNT) {
  443. dprintf(("Max count of 0x%x instead of 0x12 on channel 1 of 8254 timer.", largesttimermaxvalue));
  444. }
  445. }
  446. // If we wrapped, and our current max count either equals or is within
  447. // 2 ticks of the largest since boot, then use the largest max count
  448. // since boot in our loops to calculate the upper and lower bounds
  449. // on cyclespertick.
  450. // Otherwise we have wrapped and logged a maximum timer 1 count that
  451. // is not close enough to the maximum since boot to be able to trust
  452. // that the max since boot is the correct value to use when calculating
  453. // the bounds, so, force the calculations to only pair up measurements
  454. // that do not include counter wraps in the calculations by setting
  455. // fasttimermaxvalue to zero.
  456. // If we did not wrap, then the value of fasttimermaxvalue is irrelevant
  457. // since it will not be needed to correctly calculate the upper and lower
  458. // bounds on cyclespertick.
  459. if (counterwrapped) {
  460. if (fasttimermaxvalue+2>=largesttimermaxvalue) {
  461. fasttimermaxvalue=largesttimermaxvalue;
  462. // Now make sure that largesttimermaxvalue matches what
  463. // the default maximum should be. If not, then prevent the
  464. // upper and lower bound calculations from using measurement
  465. // pairs that have wrapped timer ticks.
  466. if (fasttimermaxvalue!=DEFAULTMAXTIMERCOUNT) {
  467. fasttimermaxvalue=0;
  468. }
  469. }
  470. else {
  471. fasttimermaxvalue=0;
  472. }
  473. }
  474. // Now calculate the cyclespertick.
  475. // Scan through all of the pairs of tick values and calculate an
  476. // an upper and lower bound on the cycles per tick for each pair.
  477. {
  478. ULONG j;
  479. ULONG lowerbound,upperbound;
  480. ULONG maxlowerbound,minupperbound;
  481. ULONG lowerboundtotal,upperboundtotal;
  482. ULONG lowerboundcount,upperboundcount;
  483. lowerboundtotal=0;
  484. lowerboundcount=0;
  485. upperboundtotal=0;
  486. upperboundcount=0;
  487. maxlowerbound=0;
  488. minupperbound=0xffffffff;
  489. for (i=1;i<MAXTIMERREADSLOTS;i++) {
  490. LONG totaltickdiff;
  491. // If a tick was read more than once, then update the lower bound
  492. // on cycles per tick.
  493. if (TimerRead[i].count>1) {
  494. //Trap();
  495. lowerbound=(ULONG)(TimerRead[i].lastreadtimestamp-TimerRead[i].firstreadtimestamp);
  496. if (lowerbound>maxlowerbound) {
  497. maxlowerbound=lowerbound;
  498. }
  499. lowerboundtotal+=lowerbound;
  500. lowerboundcount++;
  501. }
  502. totaltickdiff=0;
  503. for (j=i+1;j<MAXTIMERREADSLOTS;j++) {
  504. LONG tickdiff;
  505. tickdiff=TimerRead[j-1].value-TimerRead[j].value;
  506. if (tickdiff<0) {
  507. // The timer tick values have wrapped, if we have a valid
  508. // maximum for the timer tick values, then fix up the
  509. // tickdiff and allow these pairs. Otherwise prevent wrapped
  510. // tick value pairs from being used in our calculations.
  511. if (fasttimermaxvalue) {
  512. tickdiff+=fasttimermaxvalue;
  513. }
  514. else {
  515. break;
  516. }
  517. }
  518. // At this point tickdiff MUST be >= 1. If not, then something
  519. // is very wrong. So punt completely.
  520. if (tickdiff<1) {
  521. Trap();
  522. return 0;
  523. }
  524. totaltickdiff+=tickdiff;
  525. lowerbound=(ULONG)(TimerRead[j].lastreadtimestamp-TimerRead[i].firstreadtimestamp)/(totaltickdiff+1);
  526. if (lowerbound>maxlowerbound) {
  527. maxlowerbound=lowerbound;
  528. }
  529. lowerboundtotal+=lowerbound;
  530. lowerboundcount++;
  531. if (totaltickdiff>1) {
  532. upperbound=(ULONG)(TimerRead[j].firstreadtimestamp-TimerRead[i].lastreadtimestamp)/(totaltickdiff-1);
  533. if (upperbound<minupperbound) {
  534. minupperbound=upperbound;
  535. }
  536. upperboundtotal+=upperbound;
  537. upperboundcount++;
  538. }
  539. }
  540. }
  541. {
  542. static totalcount=0;
  543. totalcount++;
  544. }
  545. {
  546. if ((ULONG)abs((LONG)(minupperbound-maxlowerbound))*10>(maxlowerbound+minupperbound)/2) {
  547. static badcount=0;
  548. badcount++;
  549. //Trap();
  550. return 0;
  551. }
  552. }
  553. if (maxlowerbound>minupperbound) {
  554. static invertedcount=0;
  555. invertedcount++;
  556. //Trap();
  557. }
  558. {
  559. static ULONG totalminupperbound=0;
  560. static ULONG countminupperbound=0;
  561. static ULONG minminupperbound=0xffffffff;
  562. static ULONG maxminupperbound=0;
  563. if (minupperbound<minminupperbound) {
  564. minminupperbound=minupperbound;
  565. }
  566. if (minupperbound>maxminupperbound) {
  567. maxminupperbound=minupperbound;
  568. }
  569. totalminupperbound+=minupperbound;
  570. countminupperbound++;
  571. }
  572. cyclespertick=(upperboundtotal/upperboundcount + lowerboundtotal/lowerboundcount)/2;
  573. cyclespertick=(maxlowerbound+minupperbound)/2;
  574. }
  575. // IDEA! We can probably get a better estimate if we randomize the
  576. // time of our reads. That will remove any relationship between the
  577. // time it takes to do an i/o read and the period of the timer tick
  578. // itself.
  579. #if DEBUG
  580. // Log statistics.
  581. if (cyclespertick>maxcyclespertick) {
  582. maxcyclespertick=cyclespertick;
  583. }
  584. if (cyclespertick<mincyclespertick) {
  585. mincyclespertick=cyclespertick;
  586. }
  587. #endif
  588. return cyclespertick;
  589. }