Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1082 lines
27 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. icecap.c
  5. Abstract:
  6. This module implements the probe and support routines for
  7. kernel icecap tracing.
  8. Author:
  9. Rick Vicik (rickv) 9-May-2000
  10. Revision History:
  11. --*/
  12. #ifdef _CAPKERN
  13. #include <ntos.h>
  14. #include <ntrtl.h>
  15. #include <nturtl.h>
  16. #include <zwapi.h>
  17. #include <stdio.h>
  18. //
  19. // Kernel Icecap logs to Perfmem (BBTBuffer) using the following format:
  20. //
  21. // BBTBuffer[0] contains the length in 4kpages
  22. // BBTBuffer[1] is a flagword: 1 = RDPMC 0
  23. // 2 = user stack dump
  24. // BBTBuffer[2] is ptr to beginning of cpu0 buffer
  25. // BBTBuffer[3] is ptr to beginning of cpu1 buffer (also end of cpu0 buffer)
  26. // BBTBuffer[4] is ptr to beginning of cpu2 buffer (also end of cpu1 buffer)
  27. // ...
  28. // BBTBuffer[n+2] is ptr to beginning of cpu 'n' buffer (also end of cpu 'n-1' buffer)
  29. // BBTBuffer[n+3] is ptr the end of cpu 'n' buffer
  30. //
  31. // The area starting with &BBTBuffer[n+4] is divided into private buffers
  32. // for each cpu. The first dword in each cpu-private buffer points to the
  33. // beginning of freespace in that buffer. Each one is initialized to point
  34. // just after itself. Space is claimed using lock xadd on that dword.
  35. // If the resulting value points beyond the beginning of the next cpu's
  36. // buffer, this buffer is considered full and nothing further is logged.
  37. // Each cpu's freespace pointer is in a separate cacheline.
  38. //
  39. // Sizes of trace records
  40. //
  41. #define CAPENTERSIZE 20
  42. #define CAPENTERSIZE2 28
  43. #define CAPEXITSIZE 12
  44. #define CAPEXITSIZE2 20
  45. #define CAPTIDSIZE 28
  46. //
  47. // The pre-call (CAP_Start_Profiling) and post-call (CAP_End_Profiling)
  48. // probe calls are defined in RTL because they must be built twice:
  49. // once for kernel runtime and once for user-mode runtime (because the
  50. // technique for getting the trace buffer address is different).
  51. //
  52. #ifdef NTOS_KERNEL_RUNTIME
  53. //
  54. // Kernel-Mode Probe & Support Routines:
  55. // (BBTBuffer address obtained from kglobal pointer *BBTBuffer,
  56. // cpu number obtained from PCR)
  57. //
  58. extern unsigned long *BBTBuffer;
  59. VOID
  60. _declspec(naked)
  61. __stdcall
  62. _CAP_Start_Profiling(
  63. PVOID current,
  64. PVOID child)
  65. /*++
  66. Routine description:
  67. Kernel-mode version of before-call icecap probe. Logs a type 5
  68. icecap record into the part of BBTBuffer for the current cpu
  69. (obtained from Prcb). Inserts adrs of current and called functions
  70. plus RDTSC timestamp into logrecord. If BBTBuffer flag 1 set,
  71. also does RDPMC 0 and inserts result into logrecord.
  72. Uses lock xadd to claim buffer space without the need for spinlocks.
  73. Arguments:
  74. current - address of routine which did the call
  75. child - address of called routine
  76. --*/
  77. {
  78. _asm {
  79. push eax ; save eax
  80. mov eax, BBTBuffer ; get BBTBuffer address
  81. test eax,eax ; if null, just return
  82. jz return1 ; (restore eax & return)
  83. push ecx
  84. bt [eax+4],0 ; if 1st flag bit set,
  85. jc pmc1 ; datalen is 28
  86. mov ecx, CAPENTERSIZE ; otherwise it is 20
  87. jmp tsonly1
  88. pmc1:
  89. mov ecx, CAPENTERSIZE2
  90. tsonly1:
  91. push edx ; save edx
  92. movzx edx, _PCR KPCR.Number ; get processor number
  93. lea eax, [eax][edx*4]+8 ; offset to freeptr ptr = (cpu * 4) + 8
  94. mov edx, [eax+4] ; next per-cpu buffer is EOB for this cpu
  95. mov eax, [eax] ; eax now points to freeptr for this cpu
  96. or eax,eax ; if ptr to freeptr not set up yet,
  97. jz return2 ; just return
  98. cmp [eax],edx ; if freeptr >= EOB, don't trace
  99. jge return2 ; (also return if both 0)
  100. push ebx
  101. lea ebx,[ecx+4] ; record len is datalen + 4
  102. sub edx,ebx ; adjust EOB to account for newrec
  103. lock xadd [eax],ebx ; atomically claim freespace
  104. cmp ebx,edx ; if newrec goes beyond EOB
  105. jge return4 ; don't log it
  106. mov word ptr[ebx],5 ; initialize CapEnter record
  107. mov word ptr [ebx+2],cx ; insert datalen
  108. mov eax,[esp+20] ; p1 (4 saved regs + retadr)
  109. mov [ebx+4],eax
  110. mov eax,[esp+24] ; p2
  111. mov [ebx+8],eax
  112. mov eax, _PCR KPCR.PrcbData.CurrentThread
  113. mov eax, [eax] ETHREAD.Cid.UniqueThread
  114. mov [ebx+12],eax ; current Teb
  115. rdtsc ; read timestamp into edx:eax
  116. mov [ebx+16],eax ; ts low
  117. mov [ebx+20],edx ; ts high
  118. cmp ecx, CAPENTERSIZE ; if record length 20,
  119. jne pmc2
  120. jmp return4 ; skip rdpmc
  121. pmc2:
  122. xor ecx,ecx ; pmc0
  123. rdpmc ; read pmc into edx:eax
  124. mov [ebx+24],eax ; ts low
  125. mov [ebx+28],edx ; ts high
  126. return4: ; restore regs & return
  127. pop ebx
  128. return2:
  129. pop edx
  130. pop ecx
  131. return1:
  132. pop eax
  133. ret 8 ; 2 input parms
  134. }
  135. }
  136. VOID
  137. _declspec(naked)
  138. __stdcall
  139. _CAP_End_Profiling(
  140. PVOID current)
  141. /*++
  142. Routine description:
  143. Kernel-mode version of after-call icecap probe. Logs a type 6
  144. icecap record into the part of BBTBuffer for the current cpu
  145. (obtained from Prcb). Inserts adr of current function and
  146. RDTSC timestamp into logrecord. If BBTBuffer flag 1 set,
  147. also does RDPMC 0 and inserts result into logrecord.
  148. Uses lock xadd to claim buffer space without the need for spinlocks.
  149. Arguments:
  150. current - address of routine which did the call
  151. --*/
  152. {
  153. _asm {
  154. push eax ; save eax
  155. mov eax, BBTBuffer ; get BBTBuffer address
  156. test eax,eax ; if null, just return
  157. jz return1 ; (restore eax & return)
  158. push ecx
  159. bt [eax+4],0 ; if 1st flag bit set,
  160. jc pmc1 ; datalen is 20
  161. mov ecx, CAPEXITSIZE ; otherwise it is 12
  162. jmp tsonly1
  163. pmc1:
  164. mov ecx, CAPEXITSIZE2
  165. tsonly1:
  166. push edx
  167. movzx edx, _PCR KPCR.Number ; get processor number
  168. lea eax, [eax][edx*4]+8 ; offset to freeptr ptr = (cpu * 4) + 8
  169. mov edx, [eax+4] ; ptr to next buffer is end of this one
  170. mov eax, [eax] ; eax now points to freeptr for this cpu
  171. or eax,eax ; if ptr to freeptr not set up yet,
  172. jz return2 ; just return
  173. cmp [eax],edx ; if freeptr >= EOB, don't trace
  174. jge return2 ; (also return if both 0)
  175. push ebx
  176. lea ebx,[ecx+4] ; record len is datalen + 4
  177. sub edx,ebx ; adjust EOB to account for newrec
  178. lock xadd [eax],ebx ; atomically claim freespace
  179. cmp ebx,edx ; if newrec goes beyond EOB
  180. jge return4 ; don't log it
  181. mov word ptr[ebx],6 ; initialize CapExit record
  182. mov word ptr [ebx+2],cx ; insert datalen
  183. mov eax,[esp+20] ; p1 (4 saved regs + retadr)
  184. mov [ebx+4],eax
  185. rdtsc ; read timestamp into edx:eax
  186. mov [ebx+8],eax ; ts low
  187. mov [ebx+12],edx ; ts high
  188. cmp ecx, CAPEXITSIZE ; if datalen is 16,
  189. jne pmc2
  190. jmp return4 ; skip rdpmc
  191. pmc2:
  192. xor ecx,ecx ; pmc0
  193. rdpmc ; read pmc into edx:eax
  194. mov [ebx+16],eax ; ts low
  195. mov [ebx+20],edx ; ts high
  196. return4: ; restore regs & return
  197. pop ebx
  198. return2:
  199. pop edx
  200. pop ecx
  201. return1:
  202. pop eax
  203. ret 4 ; 1 input parm
  204. }
  205. }
  206. VOID CAPKComment(char* Format, ...);
  207. VOID
  208. __stdcall
  209. _CAP_ThreadID( VOID )
  210. /*++
  211. Routine description:
  212. Called by KiSystemService before executing the service routine.
  213. Logs a type 14 icecap record containing Pid, Tid & image file name.
  214. Optionally, if BBTBuffer flag 2 set, runs the stack frame pointers
  215. in the user-mode call stack starting with the trap frame and copies
  216. the return addresses to the log record. The length of the logrecord
  217. indicates whether user call stack info is included.
  218. --*/
  219. {
  220. PEPROCESS Process;
  221. PKTHREAD Thread;
  222. PETHREAD EThread;
  223. char* buf;
  224. int callcnt;
  225. ULONG* cpuptr;
  226. ULONG recsize;
  227. ULONG RetAddr[7];
  228. if( !BBTBuffer || !BBTBuffer[0] )
  229. goto fail;
  230. _asm {
  231. call KeGetCurrentThread
  232. mov Thread, eax ; return value
  233. movzx eax, _PCR KPCR.Number ; get processor number
  234. mov callcnt, eax ; return value
  235. }
  236. cpuptr = BBTBuffer + callcnt + 2;
  237. if( !(*cpuptr) || *(ULONG*)(*cpuptr) >= *(cpuptr+1) )
  238. goto fail;
  239. // if trapframe, count call-frames to determine record size
  240. EThread = CONTAINING_RECORD(Thread,ETHREAD,Tcb);
  241. if( (BBTBuffer[1] & 2) && EThread->Tcb.PreviousMode != KernelMode ) {
  242. PTEB Teb;
  243. ULONG* FramePtr;
  244. recsize = CAPTIDSIZE;
  245. FramePtr = (ULONG*)EThread->Tcb.TrapFrame; // get trap frame
  246. Teb = EThread->Tcb.Teb;
  247. if( FramePtr && Teb ) {
  248. ULONG* StackBase = (ULONG*)Teb->NtTib.StackBase;
  249. ULONG* StackLimit = (ULONG*)Teb->NtTib.StackLimit;
  250. // first retadr is last thing pushed
  251. RetAddr[0] = *(ULONG*)(EThread->Tcb.TrapFrame->HardwareEsp);
  252. // count frames that have a null next frame (may have valid retadr)
  253. FramePtr = (ULONG*)((PKTRAP_FRAME)FramePtr)->Ebp; // get stack frame
  254. for( callcnt=1; callcnt<7 && FramePtr<StackBase
  255. && FramePtr>StackLimit
  256. && *(FramePtr);
  257. FramePtr = (ULONG*)*(FramePtr)) {
  258. RetAddr[callcnt++] = *(FramePtr+1);
  259. }
  260. recsize += (callcnt<<2);
  261. }
  262. } else {
  263. recsize = CAPTIDSIZE;
  264. callcnt=0;
  265. }
  266. _asm {
  267. mov eax, cpuptr
  268. mov edx, [eax+4]
  269. mov eax, [eax]
  270. mov ecx,recsize ; total size of mark record
  271. sub edx,ecx ; adjust EOB to account for newrec
  272. lock xadd [eax],ecx ; atomically claim freespace
  273. cmp ecx,edx ; if newrec goes beyond EOB
  274. jge fail ; don't log it
  275. mov buf, ecx ; export tracerec destination adr
  276. }
  277. // initialize CapThreadID record (type 14)
  278. *((short*)buf) = (short)14;
  279. // insert data length (excluding 4byte header)
  280. *((short*)(buf+2)) = (short)(recsize-4);
  281. // insert Pid & Tid
  282. *((ULONG*)(buf+4)) = (ULONG)EThread->Cid.UniqueProcess;
  283. *((ULONG*)(buf+8)) = (ULONG)EThread->Cid.UniqueThread;
  284. // insert ImageFile name
  285. Process = CONTAINING_RECORD(Thread->ApcState.Process,EPROCESS,Pcb);
  286. memcpy(buf+12, Process->ImageFileName, 16 );
  287. // insert optional user call stack data
  288. if( recsize > CAPTIDSIZE && callcnt )
  289. memcpy( buf+28, RetAddr, callcnt<<2 );
  290. fail:
  291. ;
  292. }
  293. VOID
  294. __stdcall
  295. _CAP_SetCPU( VOID )
  296. /*++
  297. Routine description:
  298. Called by KiSystemService before returning to user mode.
  299. Sets current cpu number in Teb->Spare3 (+0xf78) so user-mode version
  300. of probe functions know which part of BBTBuffer to use.
  301. --*/
  302. {
  303. ULONG* cpuptr;
  304. ULONG cpu;
  305. PTEB Teb;
  306. if( !BBTBuffer || !BBTBuffer[0] )
  307. goto fail;
  308. _asm {
  309. movzx eax, _PCR KPCR.Number ; get processor number
  310. mov cpu, eax ; return value
  311. }
  312. cpuptr = BBTBuffer + cpu + 2;
  313. if( !(*cpuptr) || *(ULONG*)(*cpuptr) >= *(cpuptr+1) )
  314. goto fail;
  315. if( !(Teb = NtCurrentTeb()) )
  316. goto fail;
  317. try {
  318. Teb->Spare3 = cpu;
  319. } except(EXCEPTION_EXECUTE_HANDLER) {
  320. NOTHING;
  321. }
  322. fail:
  323. ;
  324. }
  325. VOID
  326. _declspec(naked)
  327. __stdcall
  328. _CAP_Log_1Int(
  329. ULONG code,
  330. ULONG data)
  331. /*++
  332. Routine description:
  333. Kernel-mode version of general-purpose log integer probe.
  334. Logs a type 15 icecap record into the part of BBTBuffer for the
  335. current cpu (obtained from Prcb). Inserts code into the byte after
  336. length, RDTSC timestamp and the value of 'data'.
  337. Uses lock xadd to claim buffer space without the need for spinlocks.
  338. Arguments:
  339. code - type-code for trace formatting
  340. data - ULONG value to be logged
  341. --*/
  342. {
  343. _asm {
  344. push eax ; save eax
  345. mov eax, BBTBuffer ; get BBTBuffer address
  346. test eax,eax ; if null, just return
  347. jz return1 ; (restore eax & return)
  348. bt [eax+4],2 ; if 0x4 bit not set,
  349. jnc return1 ; just return
  350. push edx
  351. movzx edx, _PCR KPCR.Number ; get processor number
  352. lea eax, [eax][edx*4]+8 ; offset to freeptr ptr = (cpu * 4) + 8
  353. mov edx, [eax+4] ; ptr to next buffer is end of this one
  354. mov eax, [eax] ; eax now points to freeptr for this cpu
  355. or eax,eax ; if ptr to freeptr not set up yet,
  356. jz return2 ; just return
  357. cmp [eax],edx ; if freeptr >= EOB, don't trace
  358. jge return2 ; (also return if both 0)
  359. push ebx
  360. push ecx
  361. mov ecx, 12 ; datalength is ULONG plus TS (4+8)
  362. lea ebx,[ecx+4] ; record len is datalen + 4
  363. sub edx,ebx ; adjust EOB to account for newrec
  364. lock xadd [eax],ebx ; atomically claim freespace
  365. cmp ebx,edx ; if newrec goes beyond EOB
  366. jge return4 ; don't log it
  367. mov eax,[esp+20] ; p1 = code (4 saved regs + retadr)
  368. shl eax,8 ; shift the code up 1 byte
  369. or eax,15 ; or-in the record type
  370. mov word ptr [ebx],ax ; insert record type and code (from p1)
  371. mov word ptr [ebx+2],cx ; insert datalen
  372. mov eax,[esp+24] ; insert data (p2)
  373. mov [ebx+4],eax
  374. rdtsc ; read timestamp into edx:eax
  375. mov [ebx+8],eax ; insert ts low
  376. mov [ebx+12],edx ; insert ts high
  377. return4: ; restore regs & return
  378. pop ecx
  379. pop ebx
  380. return2:
  381. pop edx
  382. return1:
  383. pop eax
  384. ret 8 ; 2 input parms
  385. }
  386. }
  387. #ifdef FOOBAR
  388. VOID
  389. _declspec(naked)
  390. __stdcall
  391. _CAP_LogRetries(
  392. ULONG retries)
  393. /*++
  394. Routine description:
  395. Logs a type 15 icecap record with specified value.
  396. Arguments:
  397. retries - value to substitute in type 15 record
  398. --*/
  399. {
  400. _asm {
  401. push eax ; save eax
  402. mov eax, BBTBuffer ; get BBTBuffer address
  403. test eax,eax ; if null, just return
  404. jz return1 ; (restore eax & return)
  405. bt [eax+4],2 ; if 0x4 bit not set,
  406. jnc return1 ; just return
  407. push edx
  408. movzx edx, _PCR KPCR.Number ; get processor number
  409. lea eax, [eax][edx*4]+8 ; offset to freeptr ptr = (cpu * 4) + 8
  410. mov edx, [eax+4] ; ptr to next buffer is end of this one
  411. mov eax, [eax] ; eax now points to freeptr for this cpu
  412. or eax,eax ; if ptr to freeptr not set up yet,
  413. jz return2 ; just return
  414. cmp [eax],edx ; if freeptr >= EOB, don't trace
  415. jge return2 ; (also return if both 0)
  416. push ebx
  417. push ecx
  418. mov ecx,4 ; datalen is 4
  419. lea ebx,[ecx+4] ; record len is datalen + 4
  420. sub edx,ebx ; adjust EOB to account for newrec
  421. lock xadd [eax],ebx ; atomically claim freespace
  422. cmp ebx,edx ; if newrec goes beyond EOB
  423. jge return4 ; don't log it
  424. mov word ptr[ebx],15 ; initialize CapRetries record
  425. mov word ptr [ebx+2],cx ; insert datalen
  426. mov eax,[esp+20] ; p1 (4 saved regs + retadr)
  427. mov [ebx+4],eax ; copy p1 to logrec
  428. return4: ; restore regs & return
  429. pop ecx
  430. pop ebx
  431. return2:
  432. pop edx
  433. return1:
  434. pop eax
  435. ret 4 ; 1 input parm
  436. }
  437. }
  438. #endif
  439. NTSTATUS
  440. NtSetPMC (
  441. IN ULONG PMC)
  442. /*++
  443. Routine description:
  444. Sets PMC and CR4 so RDPMC 0 reads the
  445. desired performance counter.
  446. Arguments:
  447. PMC - desired performance counter
  448. --*/
  449. {
  450. if( PMC == -1 )
  451. return 0;
  452. WRMSR(0x186, PMC);
  453. if( PMC & 0x10000 ) {
  454. _asm {
  455. _emit 0Fh
  456. _emit 20h
  457. _emit 0E0h ; mov eax, cr4
  458. or eax, 100h
  459. _emit 0Fh
  460. _emit 22h
  461. _emit 0E0h ; mov cr4, eax
  462. }
  463. }
  464. return STATUS_SUCCESS;
  465. }
  466. #else
  467. //
  468. // User-Mode Probe Routines (for ntdll, win32k, etc.)
  469. // (BBTBuffer address & cpu obtained from Teb)
  470. //
  471. VOID
  472. _declspec(naked)
  473. __stdcall
  474. _CAP_Start_Profiling(
  475. PVOID current,
  476. PVOID child)
  477. /*++
  478. Routine description:
  479. user-mode version of before-call icecap probe. Logs a type 5
  480. icecap record into the part of BBTBuffer for the current cpu
  481. (obtained from Teb+0xf78). Inserts adrs of current and called
  482. functions plus RDTSC timestamp into logrecord. If BBTBuffer
  483. flag 1 set, also does RDPMC 0 and inserts result into logrecord.
  484. Uses lock xadd to claim buffer space without the need for spinlocks.
  485. Arguments:
  486. current - address of routine which did the call
  487. child - address of called routine
  488. --*/
  489. {
  490. _asm {
  491. push eax ; save eax
  492. mov eax, fs:[0x18]
  493. mov eax, [eax+0xf7c] ; get adr of BBTBuffer from fs
  494. test eax,eax ; if null, just return
  495. jz return1 ; (restore eax & return)
  496. push ecx ; save ecx
  497. bt [eax+4],0 ; if 1st flag bit set,
  498. jc pmc1 ; datalen is 28
  499. mov ecx, CAPENTERSIZE ; otherwise it is 20
  500. jmp tsonly1
  501. pmc1:
  502. mov ecx, CAPENTERSIZE2
  503. tsonly1:
  504. push ebx
  505. push edx ; save edx
  506. mov ebx, fs:[0x18]
  507. xor edx,edx
  508. mov dl, byte ptr [ebx+0xf78]
  509. lea eax, [eax][edx*4]+8 ; offset to freeptr ptr = (cpu * 4) + 8
  510. mov edx, [eax+4] ; next per-cpu buffer is EOB for this cpu
  511. mov eax, [eax] ; eax now points to freeptr for this cpu
  512. or eax,eax ; if ptr to freeptr not set up yet,
  513. jz return4 ; just return
  514. cmp [eax],edx ; if freeptr >= EOB, don't trace
  515. jge return4 ; (also return if both 0)
  516. lea ebx, [ecx+4] ; record len is datalen + 4
  517. sub edx,ebx ; adjust EOB to account for newrec
  518. lock xadd [eax],ebx ; atomically claim freespace
  519. cmp ebx,edx ; if newrec goes beyond EOB
  520. jge return4 ; don't log it
  521. mov word ptr[ebx],5 ; initialize CapEnter record
  522. mov word ptr [ebx+2],cx
  523. mov eax,[esp+20] ; p1 (4 saved regs + retadr)
  524. mov [ebx+4],eax
  525. mov eax,[esp+24] ; p2
  526. mov [ebx+8],eax
  527. mov eax,fs:[0x18] ; Teb adr
  528. mov eax, [eax] TEB.ClientId.UniqueThread
  529. mov [ebx+12],eax ;
  530. rdtsc ; read timestamp into edx:eax
  531. mov [ebx+16],eax ; ts low
  532. mov [ebx+20],edx ; ts high
  533. cmp ecx, CAPENTERSIZE ; if record length 20,
  534. jne pmc2
  535. jmp return4 ; skip rdpmc
  536. pmc2:
  537. xor ecx,ecx ; pmc0
  538. rdpmc ; read pmc into edx:eax
  539. mov [ebx+24],eax ; ts low
  540. mov [ebx+28],edx ; ts high
  541. return4: ; restore regs & return
  542. pop edx
  543. pop ebx
  544. pop ecx
  545. return1:
  546. pop eax
  547. ret 8 ; 2 input parms
  548. }
  549. }
  550. VOID
  551. _declspec(naked)
  552. __stdcall
  553. _CAP_End_Profiling(
  554. PVOID current)
  555. /*++
  556. Routine description:
  557. user-mode version of after-call icecap probe. Logs a type 6
  558. icecap record into the part of BBTBuffer for the current cpu
  559. (obtained from Teb+0xf78). Inserts adr of current function
  560. plus RDTSC timestamp into logrecord. If BBTBuffer flag 1 set,
  561. also does RDPMC 0 and inserts result into logrecord.
  562. Uses lock xadd to claim buffer space without the need for spinlocks.
  563. Arguments:
  564. current - address of routine which did the call
  565. --*/
  566. {
  567. _asm {
  568. push eax ; save eax
  569. mov eax, fs:[0x18]
  570. mov eax, [eax+0xf7c] ; get adr of BBTBuffer from fs
  571. test eax,eax ; if null, just return
  572. jz return1 ; (restore eax & return)
  573. push ecx ; save ecx
  574. bt [eax+4],0 ; if 1st flag bit set,
  575. jc pmc1 ; datalen is 20
  576. mov ecx, CAPEXITSIZE ; otherwise it is 12
  577. jmp tsonly1
  578. pmc1:
  579. mov ecx, CAPEXITSIZE2
  580. tsonly1:
  581. push ebx
  582. push edx
  583. mov ebx, fs:[0x18]
  584. xor edx,edx
  585. mov dl, byte ptr [ebx+0xf78]
  586. lea eax, [eax][edx*4]+8 ; offset to freeptr ptr = (cpu * 4) + 8
  587. mov edx, [eax+4] ; ptr to next buffer is end of this one
  588. mov eax, [eax] ; eax now points to freeptr for this cpu
  589. or eax,eax ; if ptr to freeptr not set up yet,
  590. jz return4 ; just return
  591. cmp [eax],edx ; if freeptr >= EOB, don't trace
  592. jge return4 ; (also return if both 0)
  593. lea ebx, [ecx+4] ; record len is datalen+4
  594. sub edx,ebx ; adjust EOB to account for newrec
  595. lock xadd [eax],ebx ; atomically claim freespace
  596. cmp ebx,edx ; if newrec goes beyond EOB
  597. jge return4 ; don't log it
  598. mov word ptr[ebx],6 ; initialize CapExit record
  599. mov word ptr [ebx+2],cx ; insert datalen
  600. mov eax,[esp+20] ; p1 (4 saved regs + retadr)
  601. mov [ebx+4],eax
  602. rdtsc ; read timestamp into edx:eax
  603. mov [ebx+8],eax ; ts low
  604. mov [ebx+12],edx ; ts high
  605. cmp ecx, CAPEXITSIZE ; if datalen is 12,
  606. jne pmc2
  607. jmp return4 ; skip rdpmc
  608. pmc2:
  609. xor ecx,ecx ; pmc0
  610. rdpmc ; read pmc into edx:eax
  611. mov [ebx+16],eax ; ts low
  612. mov [ebx+20],edx ; ts high
  613. return4: ; restore regs & return
  614. pop edx
  615. pop ebx
  616. pop ecx
  617. return1:
  618. pop eax
  619. ret 4 ; 1 input parm
  620. }
  621. }
  622. #endif
  623. //
  624. // Common Support Routines
  625. // (method for getting BBTBuffer address & cpu ifdef'ed for kernel & user)
  626. //
  627. VOID
  628. CAPKComment(
  629. char* Format, ...)
  630. /*++
  631. Routine description:
  632. Logs a free-form comment (record type 13) in the icecap trace
  633. Arguments:
  634. Format - printf-style format string and substitutional parms
  635. --*/
  636. {
  637. va_list arglist;
  638. UCHAR Buffer[512];
  639. int cb, insize, outsize;
  640. char* buf;
  641. char* data;
  642. ULONG BufEnd;
  643. ULONG FreePtr;
  644. #ifndef NTOS_KERNEL_RUNTIME
  645. ULONG* BBTBuffer = NtCurrentTeb()->ReservedForPerf;
  646. #endif
  647. if( !BBTBuffer || !BBTBuffer[0] )
  648. goto fail;
  649. _asm {
  650. #ifdef NTOS_KERNEL_RUNTIME
  651. movzx edx, _PCR KPCR.Number ; get processor number
  652. #else
  653. mov ecx, fs:[0x18]
  654. xor edx,edx
  655. mov dl, byte ptr [ecx+0xf78]
  656. #endif
  657. lea eax, [eax][edx*4]+8 ; offset to freeptr ptr = (cpu * 4) + 8
  658. mov edx, [eax+4] ; ptr to next buffer is end of this one
  659. mov eax, [eax] ; eax now points to freeptr for this cpu
  660. or eax,eax ; if ptr to freeptr not set up yet,
  661. jz fail ; just return
  662. cmp [eax],edx ; if freeptr >= EOB, don't trace
  663. jge fail ; (also return if both 0)
  664. mov FreePtr,eax ; save freeptr & buffer end adr
  665. mov BufEnd,edx
  666. }
  667. va_start(arglist, Format);
  668. //
  669. // Do the following call in assembler so it won't get instrumented
  670. // cb = _vsnprintf(Buffer, sizeof(Buffer), Format, arglist);
  671. //
  672. _asm {
  673. push arglist ; arglist
  674. push Format ; Format
  675. push 512 ; sizeof(Buffer)
  676. lea eax,Buffer
  677. push eax ; Buffer
  678. call _vsnprintf
  679. add esp,16 ; adj stack for 4 parameters
  680. mov cb, eax ; return value
  681. }
  682. va_end(arglist);
  683. if (cb == -1) { // detect buffer overflow
  684. cb = sizeof(Buffer);
  685. Buffer[sizeof(Buffer) - 1] = '\n';
  686. }
  687. data = &Buffer[0];
  688. insize = strlen(data); // save insize for data copy
  689. outsize = ((insize+7) & 0xfffffffc); // pad outsize to DWORD boundary
  690. // +4 to account for hdr, +3 to pad
  691. _asm {
  692. mov eax, FreePtr ; restore FreePtr & EOB
  693. mov edx, BufEnd
  694. mov ecx,outsize ; total size of mark record
  695. sub edx,ecx ; adjust EOB to account for newrec
  696. lock xadd [eax],ecx ; atomically claim freespace
  697. cmp ecx,edx ; if newrec goes beyond EOB
  698. jge fail ; don't log it
  699. mov buf, ecx ; export tracerec destination adr
  700. }
  701. // size in tracerec excludes 4byte hdr
  702. outsize -= 4;
  703. // initialize CapkComment record (type 13)
  704. *((short*)(buf)) = (short)13;
  705. // insert size
  706. *((short*)(buf+2)) = (short)outsize;
  707. // insert sprintf data
  708. memcpy(buf+4, data, insize );
  709. // if had to pad, add null terminator to string
  710. if( outsize > insize )
  711. *(buf+4+insize) = 0;
  712. fail:
  713. return;
  714. }
  715. //
  716. // Constants for CAPKControl
  717. //
  718. #define CAPKStart 1
  719. #define CAPKStop 2
  720. #define CAPKResume 3
  721. #define MAXDUMMY 30
  722. #define CAPK0 4
  723. int CAPKControl(
  724. ULONG opcode)
  725. /*++
  726. Routine description:
  727. CAPKControl
  728. Description:
  729. Starts, stops or pauses icecap tracing
  730. Arguments:
  731. opcode - 1=start, 2=stop, 3=resume, 4,5,6,7 reserved
  732. Return value:
  733. 1 = success, 0 = BBTBuf not set up
  734. --*/
  735. {
  736. ULONG i;
  737. ULONG cpus;
  738. ULONG percpusize;
  739. ULONG pwords;
  740. ULONG* ptr1;
  741. #ifdef NTOS_KERNEL_RUNTIME
  742. cpus = KeNumberProcessors;
  743. #else
  744. ULONG* BBTBuffer= NtCurrentTeb()->ReservedForPerf;
  745. cpus = NtCurrentPeb()->NumberOfProcessors;
  746. #endif
  747. if( !BBTBuffer || !(BBTBuffer[0]) )
  748. return 0;
  749. pwords = CAPK0 + cpus;
  750. percpusize = ((BBTBuffer[0]*1024) - pwords)/cpus; // in DWORDs
  751. if(opcode == CAPKStart) { // start
  752. ULONG j;
  753. // clear freeptr ptrs (including final ptr)
  754. for( i=0; i<cpus+1; i++ )
  755. BBTBuffer[2+i] = 0;
  756. // initialize each freeptr to next dword
  757. // (and log dummy records to calibrate overhead)
  758. for( i=0, ptr1 = BBTBuffer+pwords; i<cpus; i++, ptr1+=percpusize) {
  759. *ptr1 = (ULONG)(ptr1+1);
  760. // for( j=0; j<MAXDUMMY; j++ ) {
  761. //
  762. // _CAP_Start_Profiling(ptr, NULL);
  763. // _CAP_End_Profiling(ptr);
  764. //
  765. // }
  766. }
  767. // set up freeptr ptrs (including final ptr)
  768. for(i=0, ptr1=BBTBuffer+pwords; i<cpus+1; i++, ptr1+=percpusize)
  769. BBTBuffer[2+i] = (ULONG)ptr1;
  770. } else if( opcode == CAPKStop ) { // stop
  771. for(i=0; i<cpus+1; i++)
  772. BBTBuffer[2+i] = 0;
  773. } else if( opcode == CAPKResume ) { //resume
  774. // set up freeptr ptrs (including final ptr)
  775. for(i=0, ptr1=BBTBuffer+pwords; i<cpus+1; i++, ptr1+=percpusize)
  776. BBTBuffer[2+i] = (ULONG)ptr1;
  777. } else {
  778. return 0; // invalid opcode
  779. }
  780. return 1;
  781. }
  782. #endif