Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

272 lines
8.6 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. icecap2.s
  5. Abstract:
  6. This module implements the assembler versions of the probe routines
  7. for kernel icecap tracing of assembler routines in ke\IA64.
  8. They have to be in assembler because the target routines expect
  9. registers to be preserved which the C version of these probes
  10. do not preserve.
  11. Author:
  12. Rick Vicik (rickv) 10-Aug-2001
  13. Revision History:
  14. --*/
  15. #ifdef _CAPKERN
  16. #include "ksia64.h"
  17. .file "icecap2.s"
  18. .global BBTBuffer
  19. //++
  20. //
  21. // VOID
  22. // _CAP_Start_Profiling (
  23. // IN PVOID Current,
  24. // IN PVOID Child
  25. // )
  26. //
  27. // Routine Description:
  28. //
  29. // Kernel-mode version of before-call icecap probe. Logs a type 5
  30. // icecap record into the part of BBTBuffer for the current cpu
  31. // (obtained from Prcb). Inserts adrs of current and called functions
  32. // plus ar.itc timestamp into logrecord.
  33. // If BBTBuffer flag 2 set, also copies PMD4 into logrecord.
  34. // Uses cmpxchg8 to claim buffer space without the need for spinlocks.
  35. //
  36. // Arguments:
  37. //
  38. // current - address of routine which did the call
  39. // child - address of called routine
  40. //
  41. //--
  42. LEAF_ENTRY(_CAP_Start_Profiling2)
  43. movl r31 = BBTBuffer // adr of ptr to BBTBuffer
  44. ;;
  45. ld8 r31 = [r31] // ptr to BBTBuffer
  46. ;;
  47. cmp.eq p6 = r0, r31 // check if ptr not set up
  48. (p6) br.ret.sptk.clr brp
  49. adds r30 = 8, r31 // BBTBuffer+1
  50. ;;
  51. ld8 r30 = [r30] // *(BBTBuffer+1)
  52. ;;
  53. tbit.z p6 = r30, 0 // (*(BBTBuffer+1)) & 1
  54. (p6) br.ret.sptk.clr brp
  55. movl r29 = KiPcr + PcNumber // Get cpu# from Pcr
  56. ;;
  57. ld1 r29 = [r29] // extract 1 byte cpu#
  58. tbit.nz p7=r30, 1 // (*(BBTBuffer+1)) & 2
  59. tbit.nz p8=r30, 3 // (*(BBTBuffer+1)) & 8
  60. ;;
  61. mov r30 = 40 // size w/o 2nd counter
  62. add r29 = 2, r29 // cpu+2
  63. ;;
  64. (p7) mov r30 = 48 // size w/ 2nd counter
  65. shladd r29 = r29, 3, r31 // CpuPtr=BBTBuffer + 8*(cpu+2)
  66. ;;
  67. (p8) mov r30 = 56 // size w/ 3rd counter
  68. // r30=size, r29=CpuPtr
  69. ld8 r31 = [r29] // *CpuPtr
  70. add r28 = 8, r29 // (CpuPtr+1)
  71. ;;
  72. cmp.eq p6 = r0, r31 // !(*CpuPtr)
  73. (p6) br.ret.sptk.clr brp
  74. ld8 r29 = [r31] // **CpuPtr
  75. ld8 r28 = [r28] // *(CpuPtr+1)
  76. ;;
  77. // loc1=*CpuPtr, loc2=size, loc3=**CpuPtr, loc4=*(CpuPtr+1)
  78. cmp.gtu p6 = r29, r28 // **CpuPtr > *(CpuPtr+1)
  79. (p6) br.ret.sptk.clr brp
  80. ;;
  81. // RecPtr = (CAPENTER*)InterlockedExchangeAddPtr( (SIZE_T*)(*CpuPtr), size);
  82. SP_Retry:
  83. ld8 r29 = [r31] // refresh **CpuPtr
  84. ;;
  85. mov.m ar.ccv = r29 // save old value
  86. add r27 = r29, r30 // loc5 is proposed value
  87. ;;
  88. cmpxchg8.acq r27=[r31], r27, ar.ccv // loc5 now RecPtr
  89. ;;
  90. cmp.ne p6 = r27, r29
  91. (p6) br.cond.dptk.few SP_Retry
  92. add r31 = r30, r27 // RecPtr+size
  93. ;;
  94. // r30=size, r27=RecPtr
  95. // if( (((SIZE_T)RecPtr)+size) >= *(CpuPtr+1) )
  96. cmp.geu p6 = r31, r28 // r28 = *(CpuPtr+1)
  97. (p6) br.ret.sptk.clr brp
  98. add r30 = -4, r30 // RecSize doesn't include header
  99. ;;
  100. shl r30 = r30, 16 // shift up 2 bytes
  101. ;;
  102. adds r30 = 5, r30 // RecType 5 in low byte
  103. ;;
  104. st8 [r27] = r30, 8 // copy RecType & size to RecPtr+0(8)
  105. ;;
  106. st8 [r27] = r32, 8 // copy A0 (Current) to RecPtr+8(8)
  107. ;;
  108. st8 [r27] = r33, 8 // copy A1 (Child) to RecPtr+16(8)
  109. ;;
  110. // RecPtr->stack = (SIZE_T)PsGetCurrentThread()->Cid.UniqueThread;
  111. movl r30 = KiPcr + PcCurrentThread
  112. ;;
  113. ld8 r30 = [r30]
  114. ;;
  115. adds r30 = EtCid + CidUniqueThread, r30 // Ethread->Cid.UniqueThread
  116. ;;
  117. ld8 r30 = [r30]
  118. ;;
  119. st8 [r27] = r30, 8
  120. mov.m r31 = ar.itc // get TS
  121. (p7) mov r29 = 4 // PMD[4]
  122. ;;
  123. st8 [r27] = r31, 8 // copy TS to RecPtr+32(8)
  124. (p7) mov r30 = PMD[r29] // get PMD[4]
  125. ;;
  126. (p7) st8 [r27] = r30, 8 // copy to RecPtr+40(8)
  127. (p8) mov r29= 5 // PMD[5]
  128. ;;
  129. (p8) mov r30 = PMD[r29] // get PMD[5]
  130. ;;
  131. (p8) st8 [r27] = r30, 8 // copy to RecPtr+48(8)
  132. br.ret.sptk.clr brp
  133. LEAF_EXIT(_CAP_Start_Profiling2)
  134. //++
  135. //
  136. // VOID
  137. // _CAP_End_Profiling (
  138. // IN PVOID Current
  139. // )
  140. //
  141. // Routine Description:
  142. //
  143. // Kernel-mode version of after-call icecap probe. Logs a type 6
  144. // icecap record into the part of BBTBuffer for the current cpu
  145. // (obtained from Prcb). Inserts adr of current function
  146. // plus ar.itc timestamp into logrecord.
  147. // If BBTBuffer flag 2 set, also copies PMD4 into logrecord.
  148. // Uses cmpxchg8 to claim buffer space without the need for spinlocks.
  149. //
  150. // Arguments:
  151. //
  152. // current - address of routine which did the call
  153. //
  154. //--
  155. LEAF_ENTRY(_CAP_End_Profiling2)
  156. movl r31 = BBTBuffer // adr of ptr to BBTBuffer
  157. ;;
  158. ld8 r31 = [r31] // ptr to BBTBuffer
  159. ;;
  160. cmp.eq p6 = r0, r31 // check if ptr not set up
  161. (p6) br.ret.sptk.clr brp
  162. adds r30 = 8, r31 // BBTBuffer+1
  163. ;;
  164. ld8 r30 = [r30] // *(BBTBuffer+1)
  165. ;;
  166. tbit.z p6 = r30, 0 // (*(BBTBuffer+1)) & 1
  167. (p6) br.ret.sptk.clr brp
  168. movl r29 = KiPcr + PcNumber // Get cpu# from Pcr
  169. ;;
  170. ld1 r29 = [r29] // extract 1 byte cpu#
  171. tbit.nz p7=r30, 1 // (*(BBTBuffer+1)) & 2
  172. tbit.nz p8=r30, 3 // (*(BBTBuffer+1)) & 8
  173. ;;
  174. mov r30 = 24 // size w/o 2nd counter
  175. add r29 = 2, r29 // cpu+2
  176. ;;
  177. (p7) mov r30 = 32 // size w/ 2nd counter
  178. shladd r29 = r29, 3, r31 // CpuPtr=BBTBuffer + 8*(cpu+2)
  179. ;;
  180. (p8) mov r30 = 40 // size w/ 3rd counter
  181. // r30=size, r29=CpuPtr
  182. ld8 r31 = [r29] // *CpuPtr
  183. add r28 = 8, r29 // (CpuPtr+1)
  184. ;;
  185. cmp.eq p6 = r0, r31 // !(*CpuPtr)
  186. ld8 r29 = [r31] // **CpuPtr
  187. ld8 r28 = [r28] // *(CpuPtr+1)
  188. (p6) br.ret.sptk.clr brp
  189. ;;
  190. // r31=*CpuPtr, r30=size, r29=**CpuPtr, r28=*(CpuPtr+1)
  191. cmp.gtu p6 = r29, r28 // **CpuPtr > *(CpuPtr+1)
  192. (p6) br.ret.sptk.clr brp
  193. ;;
  194. // RecPtr = (CAPENTER*)InterlockedExchangeAddPtr( (SIZE_T*)(*CpuPtr), size);
  195. EP_Retry:
  196. ld8 r29 = [r31] // refresh **CpuPtr
  197. ;;
  198. mov.m ar.ccv = r29 // save old value
  199. add r27 = r29, r30 // r27 is proposed value
  200. ;;
  201. cmpxchg8.acq r27=[r31], r27, ar.ccv // r27 now RecPtr
  202. ;;
  203. cmp.ne p6 = r27, r29
  204. (p6) br.cond.dptk.few EP_Retry
  205. add r31 = r30, r27 // RecPtr+size
  206. ;;
  207. // r30=size, r27=RecPtr
  208. // if( (((SIZE_T)RecPtr)+size) >= *(CpuPtr+1) )
  209. cmp.geu p6 = r31, r28 // r28 = *(CpuPtr+1)
  210. (p6) br.ret.sptk.clr brp
  211. add r30 = -4, r30 // RecSize doesn't include header
  212. ;;
  213. shl r30 = r30, 16 // shift up 2 bytes
  214. ;;
  215. adds r30 = 6, r30 // RecType 6 in low byte
  216. ;;
  217. st8 [r27] = r30, 8 // copy RecType & size to RecPtr+0(8)
  218. ;;
  219. st8 [r27] = r32, 8 // copy A0 (Current) to RecPtr+8(8)
  220. mov.m r31 = ar.itc // get TS
  221. (p7) mov r29 = 4 // PMD[4]
  222. ;;
  223. st8 [r27] = r31, 8 // copy TS to RecPtr+16(8)
  224. (p7) mov r30 = PMD[r29] // get PMD[4]
  225. ;;
  226. (p7) st8 [r27] = r30, 8 // copy to RecPtr+24(8)
  227. (p8) mov r29 = 5 // PMD[5]
  228. ;;
  229. (p8) mov r30 = PMD[r29] // get PMD[5]
  230. ;;
  231. (p8) st8 [r27] = r30, 8 // copy to RecPtr+32(8)
  232. br.ret.sptk.clr brp
  233. LEAF_EXIT(_CAP_End_Profiling2)
  234. #endif