Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

496 lines
20 KiB

  1. /*++
  2. Copyright (c) 1990 Microsoft Corporation
  3. Module Name:
  4. p6.c
  5. Abstract:
  6. Counted events for P6 processor
  7. Author:
  8. Ken Reneris
  9. Environment:
  10. Notes:
  11. Revision History:
  12. --*/
  13. #include "ntddk.h"
  14. #include "..\..\pstat.h"
  15. #include "stat.h"
  16. #ifdef ALLOC_DATA_PRAGMA
  17. #pragma data_seg("PAGE")
  18. #endif
  19. //
  20. // Official descriptions
  21. //
  22. char desc_0x03[] = "Number of store buffer blocks.";
  23. char desc_0x04[] = "Number of store buffer drains cycles.";
  24. char desc_0x05[] = "Number of misaligned data memory references.";
  25. char desc_0x06[] = "Number of segment register loads.";
  26. char desc_0x10[] = "Number of computational floating point operations "
  27. "executed.";
  28. char desc_0x11[] = "Number of floating point exception cases handled by "
  29. "microcode.";
  30. char desc_0x12[] = "Number of multiplies.";
  31. char desc_0x13[] = "Number of divides.";
  32. char desc_0x14[] = "Number of cycles the divider is busy.";
  33. char desc_0x21[] = "Number of L2 address strobes.";
  34. char desc_0x22[] = "Number of cycles in which the data bus is busy.";
  35. char desc_0x23[] = "Number of cycles in which the data bus is busy "
  36. "transfering data from L2 to the processor.";
  37. char desc_0x24[] = "Number of lines allocated in the L2.";
  38. char desc_0x25[] = "Number of modified lines allocated in the L2.";
  39. char desc_0x26[] = "Number of lines removed from the L2 for any reason.";
  40. char desc_0x27[] = "Number of Modified lines removed from the L2 for any "
  41. "reason.";
  42. char desc_0x28[] = "Number of L2 instruction fetches.";
  43. char desc_0x29[] = "Number of L2 data loads.";
  44. char desc_0x2A[] = "Number of L2 data stores.";
  45. char desc_0x2E[] = "Total number of L2 requests.";
  46. char desc_0x43[] = "Total number of all memory references, both cacheable "
  47. "and non-cacheable.";
  48. char desc_0x45[] = "Number of total lines allocated in the DCU.";
  49. char desc_0x46[] = "Number of M state lines allocated in the DCU.";
  50. char desc_0x47[] = "Number of M state lines evicted from the DCU. This "
  51. "includes evictions via snoop HITM, intervention "
  52. "or replacement.";
  53. char desc_0x48[] = "Weighted number of cycles while a DCU miss is "
  54. "outstanding.";
  55. char desc_0x60[] = "Number of bus requests outstanding.";
  56. char desc_0x61[] = "Number of bus clock cycles that this processor is "
  57. "driving the BNR pin.";
  58. char desc_0x62[] = "Number of clocks in which DRDY is asserted.";
  59. char desc_0x63[] = "Number of clocks in which LOCK is asserted.";
  60. char desc_0x64[] = "Number of bus clock cycles that this processor is "
  61. "receiving data.";
  62. char desc_0x65[] = "Number of Burst Read transactions.";
  63. char desc_0x66[] = "Number of Read For Ownership transactions.";
  64. char desc_0x67[] = "Number of Write Back transactions.";
  65. char desc_0x68[] = "Number of Instruction Fetch transactions.";
  66. char desc_0x69[] = "Number of Invalidate transactions.";
  67. char desc_0x6A[] = "Number of Partial Write transactions.";
  68. char desc_0x6B[] = "Number of Partial transactions.";
  69. char desc_0x6C[] = "Number of I/O transations.";
  70. char desc_0x6D[] = "Number of Deferred transactions.";
  71. char desc_0x6E[] = "Number of Burst transactions.";
  72. char desc_0x6F[] = "Number of memory transactions.";
  73. char desc_0x70[] = "Total number of all transactions.";
  74. char desc_0x79[] = "Number of cycles for which the processor is not halted.";
  75. char desc_0x7A[] = "Number of bus clock cycles that this processor is "
  76. "driving the HIT pin, including cycles due to "
  77. "snoop stalls.";
  78. char desc_0x7B[] = "Number of bus clock cycles that this processor is "
  79. "driving the HITM pin, including cycles due to "
  80. "snoop stalls.";
  81. char desc_0x7E[] = "Number of clock cycles for which the bus is snoop "
  82. "stalled.";
  83. char desc_0x80[] = "Total number of instruction fetches, both cacheable "
  84. "and uncacheable.";
  85. char desc_0x81[] = "Total number of instruction fetch misses.";
  86. char desc_0x85[] = "Number of ITLB misses.";
  87. char desc_0x86[] = "The number of cycles that instruction fetch "
  88. "pipestage is stalled (includes cache "
  89. "misses, ITLB misses, ITLB faults and "
  90. "Victem Cache evictions.)";
  91. char desc_0x87[] = "Number of cycles for which the instruction "
  92. "length decoder is stalled.";
  93. char desc_0xA2[] = "Number of cycles for which there are resource related "
  94. "stalls.";
  95. char desc_0xC0[] = "Number of instructions retired.";
  96. char desc_0xC1[] = "Number of computational floating point operations "
  97. "retired.";
  98. char desc_0xC2[] = "Number of UOPs retired.";
  99. char desc_0xC4[] = "Number of branch instructions retired.";
  100. char desc_0xC5[] = "Number of mispredicted branches retired.";
  101. char desc_0xC6[] = "Number of processor cycles for which interrupts are "
  102. "disabled.";
  103. char desc_0xC7[] = "Number of processor cycles for which interrupts are "
  104. "disabled and interrupts are pending.";
  105. char desc_0xC8[] = "Number of hardware interrupts received.";
  106. char desc_0xC9[] = "Number of taken branchs retired.";
  107. char desc_0xCA[] = "Number of taken mispredicted branchs retired.";
  108. char desc_0xD0[] = "Number of instructions decoded.";
  109. char desc_0xD2[] = "Number of cycles or events for partial stalls.";
  110. char desc_0xE0[] = "Number of branch instructions decoded.";
  111. char desc_0xE2[] = "Number of branchs that miss the BTB.";
  112. char desc_0xE4[] = "Number of bogus branches.";
  113. char desc_0xE6[] = "Number of times BACLEAR is asserted.";
  114. #define RARE 100
  115. // suggested counts are set to be around .1ms
  116. // 1 2 3* 4
  117. COUNTED_EVENTS P6Events[] = { // 1234567890123456789012345678901234567890
  118. // Memory Ordering
  119. // LD_BLOCKS - Number of store buffer blocks.
  120. 0x03, "sbb", 1000, "Store buffer blocks",
  121. "LD_BLOCKS", desc_0x03,
  122. // SB_DRAINS - Number of store buffer drain cycles.
  123. 0x04, "sbd", RARE, "Store buffer drain cycles",
  124. "SB_DRAINS", desc_0x04,
  125. // MISALIGN_MEM_REF - Number of misaligned data memory references
  126. 0x05, "misalign", 1000, "Misadligned data ref",
  127. "MISALIGN_MEM_REF", desc_0x05,
  128. // Segment Register Loads
  129. // SEGMENT_REG_LOADS - Number of segment register loads.
  130. 0x06, "segloads", 10000, "Segment loads",
  131. "SEGMENT_REG_LOADS", desc_0x06,
  132. // Floating Point
  133. // FP_COMP_OPS_EXE - Number of computatonal floating point operations
  134. // executed.
  135. 0x10, "flops", 1000, "FLOPs (computational) executed",
  136. "FP_COMP_OPS_EXE", desc_0x10,
  137. // FP_ASSIST - Number of floating point exception cases handled by
  138. // microcode.
  139. 0x11, "eflops", RARE, "FP exceptions handled by ucode",
  140. "FP_ASSIST", desc_0x11,
  141. // MUL - Number of multiplies.
  142. 0x12, "mul", 1000, "Multiplies",
  143. "MUL", desc_0x12,
  144. // DIV - Number of divides.
  145. 0x13, "div", 1000, "Divides",
  146. "DIV", desc_0x13,
  147. // CYCLES_DIV_BUSY - Number of cycles the divider is busy.
  148. 0x14, "divb", 10000, "Divider busy cycles",
  149. "CYCLES_DIV_BUSY", desc_0x14,
  150. // see also 0xC1 below
  151. // Secondary Cache (L2)
  152. // L2_ADS - Number of L2 address strobes.
  153. 0x21, "l2astrobe", 1000, "L2 address stobes",
  154. "L2_ADS", desc_0x21,
  155. // L2_DBUS_BUSY - Number of cycles in which the data bus was busy.
  156. 0x22, "l2busy", 10000, "L2 data bus busy cycles",
  157. "L2_DBUS_BUSY", desc_0x22,
  158. // L2_DBUS_BUSY_RD - Number of cycles in which the data bus was busy
  159. // transfering data from L2 to processor.
  160. 0x23, "l2busyrd", 10000, "L2 data bus to cpu busy cycles",
  161. "L2_DBUS_BUSY_RD", desc_0x23,
  162. // L2_LINES_IN - Number of lines allocated in the L2.
  163. 0x24, "l2all", 1000, "L2 lines allocated",
  164. "L2_LINES_IN", desc_0x24,
  165. // L2LINEINM - Number of Modified lines allocated in the L2.
  166. 0x25, "l2m", 1000, "L2 lines M state",
  167. "L2_M_LINES_IN", desc_0x25,
  168. // L2_LINES_OUT - Number of lines removed from the L2 for any reason.
  169. 0x26, "l2evict", 1000, "L2 lines removed",
  170. "L2_LINES_OUT", desc_0x26,
  171. // L2_M_LINES_OUT - Number of Modified lines removed from the L2 for
  172. // any reason.
  173. 0x27, "l2mevict", 100, "L2 lines M state removed",
  174. "L2_M_LINES_OUT", desc_0x27,
  175. // L2_IFETCH - L2 instruction fetches - "MESI" (0Fh)
  176. 0x28, "l2inst", 0, "L2 instruction fetches",
  177. "L2_IFETCH", desc_0x28,
  178. // L2_LD - L2 data loads - "MESI" (0Fh)
  179. 0x29, "l2load", 0, "L2 data loads",
  180. "L2_LD", desc_0x29,
  181. // L2_ST - L2 data stores - "MESI" (0Fh)
  182. 0x2a, "l2store", 0, "L2 data stores",
  183. "L2_ST", desc_0x2A,
  184. // L2_RQSTS - Total Number of L2 Requests - "MESI" (0Fh)
  185. 0x2e, "l2req", 0, "L2 requests (all)",
  186. "L2_RQSTS", desc_0x2E,
  187. // Data Cache Unit (DCU)
  188. // DATA_MEM_REFS - Total number of all memory referenced both cacheable
  189. // and non-cachable
  190. 0x43, "memref", 10000, "Data memory references",
  191. "DATA_MEM_REFS", desc_0x43,
  192. // DCU_LINES_IN - Number of total lines allocated in the DCU
  193. 0x45, "dculines", 1000, "DCU lines allocated",
  194. "DCU_LINES_IN", desc_0x45,
  195. // DCU_M_LINES_IN - Number of M state lines allocated in the DCU
  196. 0x46, "dcumlines", 100, "DCU M state lines allocated",
  197. "DCU_M_LINES_IN", desc_0x46,
  198. // DCU_M_LINES_OUT - Number of M state lines evicted from the DCU.
  199. // This includes evictions via snoop HITM, intervention or replacement.
  200. 0x47, "dcumevicted", 100, "DCU M state lines evicted",
  201. "DCU_M_LINES_OUT", desc_0x47,
  202. // DCU_MISS_OUTSTANDING - Weighted number of cycles while a DCU miss is
  203. // outstanding. Note - An access that also misses the L2 is short-changed
  204. // by 2 cycles. i.e. - if counts N cycles, should be N+2 cycles.
  205. // Count value not precise, but still usful.
  206. 0x48, "dcuout", 100000, "Weighted DCU misses outstd",
  207. "DCU_MISS_OUTSTANDING", desc_0x48,
  208. // External Bus Logic (EBL)
  209. // BUS_REQ_OUTSTANDING - Total number of bus requests outstanding.
  210. // Note - Counts only DCU full-line cacheable reads (not RFO's, writes,
  211. // ifetches or anything else. Counts "waiting for bus" to "Complete"
  212. // (last data chunk received).
  213. 0x60, "bus", 1000, "Bus requests outstanding",
  214. "BUS_REQ_OUTSTANDING", desc_0x60,
  215. // BUS_BRN_DRV - Number of bus clock cycles that this processor is driving
  216. // the corresponding pin.
  217. 0x61, "bnr", 0, "Bus BNR pin drive cycles",
  218. "BUS_BNR_DRV", desc_0x61,
  219. // BUS_DRDY_CLOCKS - Number of clocks in which DRDY is asserted.
  220. // Note - UMSK = 0h counts bus clocks when PPP is driving DRDY.
  221. // UMSK = 20h counts in processor clocks when any agent is
  222. // driving DRDY.
  223. 0x62, "drdy", 0, "Bus DRDY asserted clocks",
  224. "BUS_DRDY_CLOCKS", desc_0x62,
  225. // BUS_LOCK_CLOCKS - Number of clocks LOCK is asserted.
  226. // Note - always counts in processor clocks.
  227. 0x63, "lock", 0, "Bus LOCK asserted clocks",
  228. "BUS_LOCK_CLOCKS", desc_0x63,
  229. // BUS_DATA_RCV - Number of bus clock cycles that this p6 is receiving data.
  230. 0x64, "rdata", 10000, "Bus clocks receiving data",
  231. "BUS_DATA_RCV", desc_0x64,
  232. // BUS_TRANS_BRD - Total number of Burst Read transactions.
  233. 0x65, "bread", 10000, "Bus burst read transactions",
  234. "BUS_TRANS_BRD", desc_0x65,
  235. // BUS_TRANS_RFO - Total number of Read For Ownership transactions.
  236. 0x66, "owner", 1000, "Bus read for ownership trans",
  237. "BUS_TRANS_RFO", desc_0x66,
  238. // BUS_TRANS_WB - Total number of Write Back transactions
  239. 0x67, "writeback", 1000, "Bus writeback transactions",
  240. "BUS_TRANS_WB", desc_0x67,
  241. // BUS_TRANS_IFETCH - Total number of instruction fetch transactions.
  242. 0x68, "binst", 10000, "Bus instruction fetches",
  243. "BUS_TRANS_IFETCH", desc_0x68,
  244. // BUS_TRANS_INVAL - Total number of invalidate transactions.
  245. 0x69, "binvalid", 1000, "Bus invalidate transactions",
  246. "BUS_TRANS_INVAL", desc_0x69,
  247. // BUS_TRANS_PWR - Total number of Partial Write transactions.
  248. 0x6a, "bpwrite", 1000, "Bus partial write transactions",
  249. "BUS_TRANS_PWR", desc_0x6A,
  250. // BUS_TRANS_P - Total number of Partial transactions
  251. 0x6b, "bptrans", 1000, "Bus partial transactions",
  252. "BUS_TRANS_P", desc_0x6B,
  253. // BUS_TRANS_IO - Total number of IO transactions
  254. 0x6c, "bio", 10000, "Bus IO transactions",
  255. "BUS_TRANS_IO", desc_0x6C,
  256. // BUS_TRANS_DEF - Total number of deferred transactions.
  257. 0x6d, "bdeferred", 10000, "Bus deferred transactions",
  258. "BUS_TRANS_DEF", desc_0x6D,
  259. // BUS_TRANS_BURST - Total number of Burst transactions.
  260. 0x6e, "bburst", 10000, "Bus burst transactions (total)",
  261. "BUS_TRANS_BURST", desc_0x6E,
  262. // BUS_TRANS_MEM - Total number of memory transactions.
  263. 0x6f, "bmemory", 10000, "Bus memory transactions (total)",
  264. "BUS_TRANS_MEM", desc_0x6F,
  265. // BUS_TRANS_ANY - Total number of all transactions.
  266. 0x70, "btrans", 10000, "Bus all transactions",
  267. "BUS_TRANS_ANY", desc_0x70,
  268. // continued at 0x7a below
  269. // Clocks
  270. // CPU_CLK_UNHALTED - Number of cycles for which the processor is not
  271. // halted.
  272. 0x79, "nhalt", 100000, "CPU was not HALTED cycles",
  273. "CPU_CLK_UNHALTED", desc_0x79,
  274. // External Bus Logic (EBL) (continued from 0x70 above)
  275. // BUS_HIT_DRV - Number of bus clock cycles that this processor is driving
  276. // the corresponding pin.
  277. // Note - includes cycles due to snoop stalls
  278. 0x7a, "hit", 1000, "Bus CPU drives HIT cycles",
  279. "BUS_HIT_DRV", desc_0x7A,
  280. // BUS_HITM_DRV - Number of bus clock cycles that this processor is driving
  281. // the cooresponding pin.
  282. // Note - includes cycles due to snoop stalls
  283. 0x7b, "hitm", 1000, "Bus CPU drives HITM cycles",
  284. "BUS_HITM_DRV", desc_0x7B,
  285. // BUS_SNOOP_STALL - Number of clock cycles for which the bus is snoop
  286. // stalled.
  287. 0x7e, "bsstall", 0, "Bus snoop stalled cycles",
  288. "BUS_SNOOP_STALL", desc_0x7E,
  289. // Instruction Fetch Unit (IFU)
  290. // IFU_IFETCH - Total number of instruction fetches (cacheable and
  291. // uncacheable).
  292. 0x80, "ifetch", 100000, "Instruction fetches",
  293. "IFU_IFETCH", desc_0x80,
  294. // IFU_IFETCH_MISS _ Total number of instruction fetch misses.
  295. 0x81, "imfetch", 10000, "Instrection fetch Misses",
  296. "IFU_IFETCH_MISS", desc_0x81,
  297. // ITLB_MISS - Number of ITLB misses
  298. 0x85, "itlbmiss", 100, "Instruction TLB misses",
  299. "ITLB_MISS", desc_0x85,
  300. // IFU_MEM_STALL - The number of cycles that instruction fetch pipestage
  301. // is stalled (includes cache misses, ITLB misses, ITLB faults and
  302. // Victim Cache evictions).
  303. 0x86, "ifstall", 1000, "Inst fetch stalled cycles",
  304. "IFU_MEM_STALL", desc_0x86,
  305. // ILD_STALL - Number of cycles for which the instruction length decoder
  306. // is stalled.
  307. 0x87, "ildstall", 1000, "Inst len decoder stalled cycles",
  308. "ILD_STALL", desc_0x87,
  309. // Stalls
  310. // RESOURCE_STALLS - Number of cycles for which there are resouce related
  311. // stalls.
  312. 0xa2, "rstall", 10000, "Resource related stalls",
  313. "RESOURCE_STALLS", desc_0xA2,
  314. // see also 0xd2 below
  315. // Instruction Decode and Retirement
  316. // INST_RETIRED - Number of instructions retired.
  317. 0xc0, "instr", 100000, "Instructions retired",
  318. "INST_RETIRED", desc_0xC0,
  319. // continued at 0xc2 below
  320. // Floating Point (continued from 0x14 above)
  321. // FLOPS - Number of computational floating point operations retired.
  322. 0xc1, "fpr", RARE, "FP compute opers retired",
  323. "FLOPS", desc_0xC1,
  324. // Instruction Decode and Retirement (continued from 0xc0 above)
  325. // UOPS_RETIRED - Number of Uops retired
  326. 0xc2, "ur", 100000, "UOPs retired",
  327. "UOPS_RETIRED", desc_0xC2,
  328. // see also 0xd0 below
  329. // Branches
  330. // BR_INST_RETIRED - Number of branch instructions that retire.
  331. 0xc4, "br", 10000, "Branches retired",
  332. "BR_INST_RETIRED", desc_0xC4,
  333. // BR_MISS_PRED_RETIRED - Number of mispredicted branches that retire.
  334. 0xc5, "brm", 1000, "Branch miss predictions retired",
  335. "BR_MISS_PRED_RETIRED", desc_0xC5,
  336. // continued at 0xc9 below
  337. // Interrupts
  338. // CYCLES_INT_MASKED - Number of processor cycles for which interrupts
  339. // are disabled.
  340. 0xc6, "intm", 10000, "Interrupts masked cycles",
  341. "CYCLES_INT_MASKED", desc_0xC6,
  342. // CYCLES_INT_PENDING_AND_MASKED - Number of processor cycles for which
  343. // interrupts are disabled and interrupts are pending.
  344. 0xc7, "intmp", 1000, "Int pending while masked cycles",
  345. "CYCLES_INT_PENDING_AND_MASKED", desc_0xC7,
  346. // HW_INT_RX - Number of hardware interrupts received.
  347. 0xc8, "int", 0, "Hardware interrupts received",
  348. "HW_INT_RX", desc_0xC8,
  349. // Branches (continued from 0xc5 above)
  350. // BR_TAKEN_RETIRED - Number of taken branches that are retired.
  351. 0xc9, "brt", 10000, "Taken branches retired",
  352. "BR_TAKEN_RETIRED", desc_0xC9,
  353. // BR_MISS_PRED_TAKEN_RET - Number of Mispredictions that are retired.
  354. 0xca, "brtm", 0, "Taken branch miss pred retired",
  355. "BR_MISS_PRED_TAKEN_RET", desc_0xCA,
  356. // continued at 0xe0 below
  357. // Instruction Decode and Retirement (continued from 0xc2 above)
  358. // INST_DECODED - Number of Instructions decoded.
  359. 0xd0, "idecode", 100000, "Instructions decoded",
  360. "INST_DECODED", desc_0xD0,
  361. // Stalls (continued from 0xa2 above)
  362. // PARTIAL_RAT_STALLS - Number of cycles or events for partial stalls.
  363. 0xd2, "pstall", 1000, "Partial register stalls",
  364. "PARTIAL_RAT_STALLS", desc_0xD2,
  365. // Branches (continued from 0xca above)
  366. // BR_INST_DECODED - Number of branch instructions that are decoded.
  367. 0xe0, "ibdecode", 0, "Branches decoded",
  368. "BR_INST_DECODED", desc_0xE0,
  369. // BTB_MISSES - Number of branches that miss the BTB
  370. 0xe2, "btbmiss", 1000, "BTB misses",
  371. "BTB_MISSES", desc_0xE2,
  372. // BR_BOGUS - Number of bogus branches.
  373. 0xe4, "brbogus", 1000, "Bogus branches",
  374. "BR_BOGUS", desc_0xE4,
  375. // BACLEARS - Number of times BACLEAR is asserted.
  376. 0xe6, "baclear", 1000, "BACLEARS Asserted",
  377. "BACLEARS", desc_0xE6,
  378. // Terminator
  379. 0, NULL, 0, NULL,
  380. NULL, NULL
  381. } ;