You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
272 lines
8.6 KiB
272 lines
8.6 KiB
/*++
|
|
|
|
Copyright (c) 2000 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
icecap2.s
|
|
|
|
Abstract:
|
|
|
|
This module implements the assembler versions of the probe routines
|
|
for kernel icecap tracing of assembler routines in ke\IA64.
|
|
They have to be in assembler because the target routines expect
|
|
registers to be preserved which the C version of these probes
|
|
do not preserve.
|
|
|
|
Author:
|
|
|
|
Rick Vicik (rickv) 10-Aug-2001
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#ifdef _CAPKERN
|
|
|
|
#include "ksia64.h"
|
|
|
|
.file "icecap2.s"
|
|
.global BBTBuffer
|
|
|
|
//++
|
|
//
|
|
// VOID
|
|
// _CAP_Start_Profiling (
|
|
// IN PVOID Current,
|
|
// IN PVOID Child
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// Kernel-mode version of before-call icecap probe. Logs a type 5
|
|
// icecap record into the part of BBTBuffer for the current cpu
|
|
// (obtained from Prcb). Inserts adrs of current and called functions
|
|
// plus ar.itc timestamp into logrecord.
|
|
// If BBTBuffer flag 2 set, also copies PMD4 into logrecord.
|
|
// Uses cmpxchg8 to claim buffer space without the need for spinlocks.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// current - address of routine which did the call
|
|
// child - address of called routine
|
|
//
|
|
//--
|
|
|
|
LEAF_ENTRY(_CAP_Start_Profiling2)
|
|
movl r31 = BBTBuffer // adr of ptr to BBTBuffer
|
|
;;
|
|
ld8 r31 = [r31] // ptr to BBTBuffer
|
|
;;
|
|
cmp.eq p6 = r0, r31 // check if ptr not set up
|
|
(p6) br.ret.sptk.clr brp
|
|
adds r30 = 8, r31 // BBTBuffer+1
|
|
;;
|
|
|
|
ld8 r30 = [r30] // *(BBTBuffer+1)
|
|
;;
|
|
|
|
tbit.z p6 = r30, 0 // (*(BBTBuffer+1)) & 1
|
|
(p6) br.ret.sptk.clr brp
|
|
movl r29 = KiPcr + PcNumber // Get cpu# from Pcr
|
|
;;
|
|
|
|
ld1 r29 = [r29] // extract 1 byte cpu#
|
|
tbit.nz p7=r30, 1 // (*(BBTBuffer+1)) & 2
|
|
tbit.nz p8=r30, 3 // (*(BBTBuffer+1)) & 8
|
|
;;
|
|
mov r30 = 40 // size w/o 2nd counter
|
|
add r29 = 2, r29 // cpu+2
|
|
;;
|
|
(p7) mov r30 = 48 // size w/ 2nd counter
|
|
shladd r29 = r29, 3, r31 // CpuPtr=BBTBuffer + 8*(cpu+2)
|
|
;;
|
|
(p8) mov r30 = 56 // size w/ 3rd counter
|
|
|
|
// r30=size, r29=CpuPtr
|
|
|
|
ld8 r31 = [r29] // *CpuPtr
|
|
add r28 = 8, r29 // (CpuPtr+1)
|
|
;;
|
|
cmp.eq p6 = r0, r31 // !(*CpuPtr)
|
|
(p6) br.ret.sptk.clr brp
|
|
ld8 r29 = [r31] // **CpuPtr
|
|
ld8 r28 = [r28] // *(CpuPtr+1)
|
|
;;
|
|
|
|
// loc1=*CpuPtr, loc2=size, loc3=**CpuPtr, loc4=*(CpuPtr+1)
|
|
|
|
cmp.gtu p6 = r29, r28 // **CpuPtr > *(CpuPtr+1)
|
|
(p6) br.ret.sptk.clr brp
|
|
;;
|
|
|
|
// RecPtr = (CAPENTER*)InterlockedExchangeAddPtr( (SIZE_T*)(*CpuPtr), size);
|
|
SP_Retry:
|
|
ld8 r29 = [r31] // refresh **CpuPtr
|
|
;;
|
|
mov.m ar.ccv = r29 // save old value
|
|
add r27 = r29, r30 // loc5 is proposed value
|
|
;;
|
|
cmpxchg8.acq r27=[r31], r27, ar.ccv // loc5 now RecPtr
|
|
;;
|
|
cmp.ne p6 = r27, r29
|
|
(p6) br.cond.dptk.few SP_Retry
|
|
add r31 = r30, r27 // RecPtr+size
|
|
;;
|
|
|
|
// r30=size, r27=RecPtr
|
|
// if( (((SIZE_T)RecPtr)+size) >= *(CpuPtr+1) )
|
|
cmp.geu p6 = r31, r28 // r28 = *(CpuPtr+1)
|
|
(p6) br.ret.sptk.clr brp
|
|
add r30 = -4, r30 // RecSize doesn't include header
|
|
;;
|
|
|
|
shl r30 = r30, 16 // shift up 2 bytes
|
|
;;
|
|
adds r30 = 5, r30 // RecType 5 in low byte
|
|
;;
|
|
st8 [r27] = r30, 8 // copy RecType & size to RecPtr+0(8)
|
|
;;
|
|
st8 [r27] = r32, 8 // copy A0 (Current) to RecPtr+8(8)
|
|
;;
|
|
st8 [r27] = r33, 8 // copy A1 (Child) to RecPtr+16(8)
|
|
;;
|
|
|
|
// RecPtr->stack = (SIZE_T)PsGetCurrentThread()->Cid.UniqueThread;
|
|
movl r30 = KiPcr + PcCurrentThread
|
|
;;
|
|
ld8 r30 = [r30]
|
|
;;
|
|
adds r30 = EtCid + CidUniqueThread, r30 // Ethread->Cid.UniqueThread
|
|
;;
|
|
ld8 r30 = [r30]
|
|
;;
|
|
st8 [r27] = r30, 8
|
|
mov.m r31 = ar.itc // get TS
|
|
(p7) mov r29 = 4 // PMD[4]
|
|
;;
|
|
st8 [r27] = r31, 8 // copy TS to RecPtr+32(8)
|
|
(p7) mov r30 = PMD[r29] // get PMD[4]
|
|
;;
|
|
(p7) st8 [r27] = r30, 8 // copy to RecPtr+40(8)
|
|
(p8) mov r29= 5 // PMD[5]
|
|
;;
|
|
(p8) mov r30 = PMD[r29] // get PMD[5]
|
|
;;
|
|
(p8) st8 [r27] = r30, 8 // copy to RecPtr+48(8)
|
|
br.ret.sptk.clr brp
|
|
LEAF_EXIT(_CAP_Start_Profiling2)
|
|
|
|
//++
|
|
//
|
|
// VOID
|
|
// _CAP_End_Profiling (
|
|
// IN PVOID Current
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// Kernel-mode version of after-call icecap probe. Logs a type 6
|
|
// icecap record into the part of BBTBuffer for the current cpu
|
|
// (obtained from Prcb). Inserts adr of current function
|
|
// plus ar.itc timestamp into logrecord.
|
|
// If BBTBuffer flag 2 set, also copies PMD4 into logrecord.
|
|
// Uses cmpxchg8 to claim buffer space without the need for spinlocks.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// current - address of routine which did the call
|
|
//
|
|
//--
|
|
|
|
LEAF_ENTRY(_CAP_End_Profiling2)
|
|
movl r31 = BBTBuffer // adr of ptr to BBTBuffer
|
|
;;
|
|
ld8 r31 = [r31] // ptr to BBTBuffer
|
|
;;
|
|
cmp.eq p6 = r0, r31 // check if ptr not set up
|
|
(p6) br.ret.sptk.clr brp
|
|
adds r30 = 8, r31 // BBTBuffer+1
|
|
;;
|
|
|
|
ld8 r30 = [r30] // *(BBTBuffer+1)
|
|
;;
|
|
|
|
tbit.z p6 = r30, 0 // (*(BBTBuffer+1)) & 1
|
|
(p6) br.ret.sptk.clr brp
|
|
movl r29 = KiPcr + PcNumber // Get cpu# from Pcr
|
|
;;
|
|
|
|
ld1 r29 = [r29] // extract 1 byte cpu#
|
|
tbit.nz p7=r30, 1 // (*(BBTBuffer+1)) & 2
|
|
tbit.nz p8=r30, 3 // (*(BBTBuffer+1)) & 8
|
|
;;
|
|
mov r30 = 24 // size w/o 2nd counter
|
|
add r29 = 2, r29 // cpu+2
|
|
;;
|
|
(p7) mov r30 = 32 // size w/ 2nd counter
|
|
shladd r29 = r29, 3, r31 // CpuPtr=BBTBuffer + 8*(cpu+2)
|
|
;;
|
|
(p8) mov r30 = 40 // size w/ 3rd counter
|
|
|
|
// r30=size, r29=CpuPtr
|
|
|
|
ld8 r31 = [r29] // *CpuPtr
|
|
add r28 = 8, r29 // (CpuPtr+1)
|
|
;;
|
|
cmp.eq p6 = r0, r31 // !(*CpuPtr)
|
|
ld8 r29 = [r31] // **CpuPtr
|
|
ld8 r28 = [r28] // *(CpuPtr+1)
|
|
(p6) br.ret.sptk.clr brp
|
|
;;
|
|
|
|
// r31=*CpuPtr, r30=size, r29=**CpuPtr, r28=*(CpuPtr+1)
|
|
|
|
cmp.gtu p6 = r29, r28 // **CpuPtr > *(CpuPtr+1)
|
|
(p6) br.ret.sptk.clr brp
|
|
;;
|
|
|
|
// RecPtr = (CAPENTER*)InterlockedExchangeAddPtr( (SIZE_T*)(*CpuPtr), size);
|
|
EP_Retry:
|
|
ld8 r29 = [r31] // refresh **CpuPtr
|
|
;;
|
|
mov.m ar.ccv = r29 // save old value
|
|
add r27 = r29, r30 // r27 is proposed value
|
|
;;
|
|
cmpxchg8.acq r27=[r31], r27, ar.ccv // r27 now RecPtr
|
|
;;
|
|
cmp.ne p6 = r27, r29
|
|
(p6) br.cond.dptk.few EP_Retry
|
|
add r31 = r30, r27 // RecPtr+size
|
|
;;
|
|
|
|
// r30=size, r27=RecPtr
|
|
// if( (((SIZE_T)RecPtr)+size) >= *(CpuPtr+1) )
|
|
cmp.geu p6 = r31, r28 // r28 = *(CpuPtr+1)
|
|
(p6) br.ret.sptk.clr brp
|
|
add r30 = -4, r30 // RecSize doesn't include header
|
|
;;
|
|
|
|
shl r30 = r30, 16 // shift up 2 bytes
|
|
;;
|
|
adds r30 = 6, r30 // RecType 6 in low byte
|
|
;;
|
|
st8 [r27] = r30, 8 // copy RecType & size to RecPtr+0(8)
|
|
;;
|
|
st8 [r27] = r32, 8 // copy A0 (Current) to RecPtr+8(8)
|
|
mov.m r31 = ar.itc // get TS
|
|
(p7) mov r29 = 4 // PMD[4]
|
|
;;
|
|
st8 [r27] = r31, 8 // copy TS to RecPtr+16(8)
|
|
(p7) mov r30 = PMD[r29] // get PMD[4]
|
|
;;
|
|
(p7) st8 [r27] = r30, 8 // copy to RecPtr+24(8)
|
|
(p8) mov r29 = 5 // PMD[5]
|
|
;;
|
|
(p8) mov r30 = PMD[r29] // get PMD[5]
|
|
;;
|
|
(p8) st8 [r27] = r30, 8 // copy to RecPtr+32(8)
|
|
br.ret.sptk.clr brp
|
|
LEAF_EXIT(_CAP_End_Profiling2)
|
|
|
|
#endif
|