Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

322 lines
7.4 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. // $NoKeywords: $
  6. //
  7. //=============================================================================//
  8. #ifndef P4PERFORMANCECOUNTERS_H
  9. #define P4PERFORMANCECOUNTERS_H
  10. #pragma once
  11. // Pentium 4 support
  12. /*
  13. http://developer.intel.com/design/Pentium4/documentation.htm
  14. IA-32 Intel Architecture Software Developer's Manual Volume 1: Basic Architecture
  15. IA-32 Intel Architecture Software Developer's Manual Volume 2A: Instruction Set Reference, A-M
  16. IA-32 Intel Architecture Software Developer's Manual Volume 2B: Instruction Set Reference, N-Z
  17. IA-32 Intel Architecture Software Developer's Manual Volume 3: System Programming Guide
  18. From Mikael Pettersson's perfctr:
  19. http://user.it.uu.se/~mikpe/linux/perfctr/
  20. * Known quirks:
  21. - OVF_PMI+FORCE_OVF counters must have an ireset value of -1.
  22. This allows the regular overflow check to also handle FORCE_OVF
  23. counters. Not having this restriction would lead to MAJOR
  24. complications in the driver's "detect overflow counters" code.
  25. There is no loss of functionality since the ireset value doesn't
  26. affect the counter's PMI rate for FORCE_OVF counters.
  27. - In experiments with FORCE_OVF counters, and regular OVF_PMI
  28. counters with small ireset values between -8 and -1, it appears
  29. that the faulting instruction is subjected to a new PMI before
  30. it can complete, ad infinitum. This occurs even though the driver
  31. clears the CCCR (and in testing also the ESCR) and invokes a
  32. user-space signal handler before restoring the CCCR and resuming
  33. the instruction.
  34. */
  35. #define NCOUNTERS 18
  36. // The 18 counters
  37. enum Counters
  38. {
  39. MSR_BPU_COUNTER0,
  40. MSR_BPU_COUNTER1,
  41. MSR_BPU_COUNTER2,
  42. MSR_BPU_COUNTER3,
  43. MSR_MS_COUNTER0,
  44. MSR_MS_COUNTER1,
  45. MSR_MS_COUNTER2,
  46. MSR_MS_COUNTER3,
  47. MSR_FLAME_COUNTER0,
  48. MSR_FLAME_COUNTER1,
  49. MSR_FLAME_COUNTER2,
  50. MSR_FLAME_COUNTER3,
  51. MSR_IQ_COUNTER0,
  52. MSR_IQ_COUNTER1,
  53. MSR_IQ_COUNTER2,
  54. MSR_IQ_COUNTER3,
  55. MSR_IQ_COUNTER4,
  56. MSR_IQ_COUNTER5
  57. };
  58. // register base for counters
  59. #define MSR_COUNTER_BASE 0x300
  60. // register base for CCCR register
  61. #define MSR_CCCR_BASE 0x360
  62. #pragma pack(push, 1)
  63. // access to these bits is through the methods
  64. typedef union ESCR
  65. {
  66. struct
  67. {
  68. uint64 Reserved0_1 : 2; //
  69. uint64 USR : 1; //
  70. uint64 OS : 1; //
  71. uint64 TagEnable : 1; //
  72. uint64 TagValue : 4; //
  73. uint64 EventMask : 16; // from event select
  74. uint64 ESCREventSelect : 6; // 31:25 class of event
  75. uint64 Reserved31 : 1; //
  76. uint64 Reserved32_63 : 32; //
  77. };
  78. uint64 flat;
  79. } ESCR;
  80. typedef union CCCR
  81. {
  82. struct
  83. {
  84. uint64 Reserved0_11 : 12;// 0 -11
  85. uint64 Enable : 1; // 12
  86. uint64 CCCRSelect : 3; // 13-15
  87. uint64 Reserved16_17 : 2; // 16 17
  88. uint64 Compare : 1; // 18
  89. uint64 Complement : 1; // 19
  90. uint64 Threshold : 4; // 20-23
  91. uint64 Edge : 1; // 24
  92. uint64 FORCE_OVF : 1; // 25
  93. uint64 OVF_PMI : 1; // 26
  94. uint64 Reserved27_29 : 3; // 27-29
  95. uint64 Cascade : 1; // 30
  96. uint64 OVF : 1; // 31
  97. uint64 Reserved32_63 : 32; //
  98. };
  99. uint64 flat;
  100. } CCCR;
  101. #pragma pack(pop)
  102. extern const unsigned short cccr_escr_map[NCOUNTERS][8];
  103. enum P4TagState
  104. {
  105. TagDisable, //
  106. TagEnable, //
  107. };
  108. enum P4ForceOverflow
  109. {
  110. ForceOverflowDisable,
  111. ForceOverflowEnable,
  112. };
  113. enum P4OverflowInterrupt
  114. {
  115. OverflowInterruptDisable,
  116. OverflowInterruptEnable,
  117. };
  118. // Turn off the no return value warning in ReadCounter.
  119. #pragma warning( disable : 4035 )
  120. class P4BaseEvent
  121. {
  122. int m_counter;
  123. protected:
  124. void SetCounter(int counter)
  125. {
  126. m_counter = counter;
  127. cccrPort = MSR_CCCR_BASE + m_counter;
  128. counterPort = MSR_COUNTER_BASE + m_counter;
  129. escrPort = cccr_escr_map[m_counter][cccr.CCCRSelect];
  130. }
  131. public:
  132. unsigned short m_eventMask;
  133. const tchar *description;
  134. PME *pme;
  135. ESCR escr;
  136. CCCR cccr;
  137. int counterPort;
  138. int cccrPort;
  139. int escrPort;
  140. P4BaseEvent()
  141. {
  142. pme = PME::Instance();
  143. m_eventMask = 0;
  144. description = _T("");
  145. escr.flat = 0;
  146. cccr.flat = 0;
  147. cccr.Reserved16_17 = 3; // must be set
  148. escrPort = 0;
  149. m_counter = -1;
  150. }
  151. void StartCounter()
  152. {
  153. cccr.Enable = 1;
  154. pme->WriteMSR( cccrPort, cccr.flat );
  155. }
  156. void StopCounter()
  157. {
  158. cccr.Enable = 0;
  159. pme->WriteMSR( cccrPort, cccr.flat );
  160. }
  161. void ClearCounter()
  162. {
  163. pme->WriteMSR( counterPort, 0ui64 ); // clear
  164. }
  165. void WriteCounter( int64 value )
  166. {
  167. pme->WriteMSR( counterPort, value ); // clear
  168. }
  169. int64 ReadCounter()
  170. {
  171. #if PME_DEBUG
  172. if ( escr.USR == 0 && escr.OS == 0 )
  173. return -1; // no area to collect, use SetCaptureMode
  174. if ( escr.EventMask == 0 )
  175. return -2; // no event mask set
  176. if ( m_counter == -1 )
  177. return -3; // counter not legal
  178. #endif
  179. // ReadMSR should work here too, but RDPMC should be faster
  180. int64 value = 0;
  181. pme->ReadMSR( counterPort, &value );
  182. return value;
  183. #if 0
  184. // we need to copy this into a temp for some reason
  185. int temp = m_counter;
  186. _asm
  187. {
  188. mov ecx, temp
  189. RDPMC
  190. }
  191. #endif
  192. }
  193. void SetCaptureMode( PrivilegeCapture priv )
  194. {
  195. switch ( priv )
  196. {
  197. case OS_Only:
  198. {
  199. escr.USR = 0;
  200. escr.OS = 1;
  201. break;
  202. }
  203. case USR_Only:
  204. {
  205. escr.USR = 1;
  206. escr.OS = 0;
  207. break;
  208. }
  209. case OS_and_USR:
  210. {
  211. escr.USR = 1;
  212. escr.OS = 1;
  213. break;
  214. }
  215. }
  216. escr.EventMask = m_eventMask;
  217. pme->WriteMSR( escrPort, escr.flat );
  218. }
  219. void SetTagging( P4TagState tagEnable, uint8 tagValue )
  220. {
  221. escr.TagEnable = tagEnable;
  222. escr.TagValue = tagValue;
  223. pme->WriteMSR( escrPort, escr.flat );
  224. }
  225. void SetFiltering( CompareState compareEnable, CompareMethod compareMethod, uint8 threshold, EdgeState edgeEnable )
  226. {
  227. cccr.Compare = compareEnable;
  228. cccr.Complement = compareMethod;
  229. cccr.Threshold = threshold;
  230. cccr.Edge = edgeEnable;
  231. pme->WriteMSR( cccrPort, cccr.flat );
  232. }
  233. void SetOverflowEnables( P4ForceOverflow overflowEnable, P4OverflowInterrupt overflowInterruptEnable )
  234. {
  235. cccr.FORCE_OVF = overflowEnable;
  236. cccr.OVF_PMI = overflowInterruptEnable;
  237. pme->WriteMSR( cccrPort, cccr.flat );
  238. }
  239. void SetOverflow()
  240. {
  241. cccr.OVF = 1;
  242. pme->WriteMSR( cccrPort, cccr.flat );
  243. }
  244. void ClearOverflow()
  245. {
  246. cccr.OVF = 0;
  247. pme->WriteMSR( cccrPort, cccr.flat );
  248. }
  249. bool isOverflow()
  250. {
  251. CCCR cccr_temp;
  252. pme->ReadMSR( cccrPort, &cccr_temp.flat );
  253. return cccr_temp.OVF;
  254. }
  255. void SetCascade()
  256. {
  257. cccr.Cascade = 1;
  258. pme->WriteMSR( cccrPort, cccr.flat );
  259. }
  260. void ClearCascade()
  261. {
  262. cccr.Cascade = 0;
  263. pme->WriteMSR( cccrPort, cccr.flat );
  264. }
  265. };
  266. #pragma warning( default : 4035 )
  267. #include "EventMasks.h"
  268. #include "EventModes.h"
  269. #endif // P4PERFORMANCECOUNTERS_H