; call cpuid with args in eax, ecx
; store eax, ebx, ecx, edx to p
option casemap:none

PUBLIC GetStackPtr64
.CODE

GetStackPtr64	PROC FRAME
; unsigned char* GetStackPtr64(void);
        .endprolog      
        
        mov		rax, rsp	; get stack ptr
        add		rax, 8h		; account for 8-byte return value of this function

        ret

GetStackPtr64 ENDP




; Needs to match definition found in setjmp.h
_JUMP_BUFFER STRUCT
	m_Frame QWORD ?
	m_Rbx QWORD ?
	m_Rsp QWORD ?
	m_Rbp QWORD ?
	m_Rsi QWORD ?
	m_Rdi QWORD ?
	m_R12 QWORD ?
	m_R13 QWORD ?
	m_R14 QWORD ?
	m_R15 QWORD ?
	m_Rip QWORD ?
	m_MxCsr DWORD ?
	m_FpCsr WORD ?
	m_Spare WORD ?
	m_Xmm6 XMMWORD ?
	m_Xmm7 XMMWORD ?
	m_Xmm8 XMMWORD ?
	m_Xmm9 XMMWORD ?
	m_Xmm10 XMMWORD ?
	m_Xmm11 XMMWORD ?
	m_Xmm12 XMMWORD ?
	m_Xmm13 XMMWORD ?
	m_Xmm14 XMMWORD ?
	m_Xmm15 XMMWORD ?
_JUMP_BUFFER ENDS


;This is the reference asm for __intrinsic_setjmp() in VS2015
;mov         qword ptr [rcx],rdx		; intrinsic call site does "mov rdx,rbp" followed by "add rdx,0FFFFFFFFFFFFFFC0h", looks like a nonstandard abi
;mov         qword ptr [rcx+8],rbx  
;mov         qword ptr [rcx+18h],rbp  
;mov         qword ptr [rcx+20h],rsi  
;mov         qword ptr [rcx+28h],rdi  
;mov         qword ptr [rcx+30h],r12  
;mov         qword ptr [rcx+38h],r13  
;mov         qword ptr [rcx+40h],r14  
;mov         qword ptr [rcx+48h],r15  
;lea         r8,[rsp+8]					; rsp set to post-return address
;mov         qword ptr [rcx+10h],r8  
;mov         r8,qword ptr [rsp]  
;mov         qword ptr [rcx+50h],r8  
;stmxcsr     dword ptr [rcx+58h]  
;fnstcw      word ptr [rcx+5Ch]  
;movdqa      xmmword ptr [rcx+60h],xmm6  
;ovdqa      xmmword ptr [rcx+70h],xmm7  
;movdqa      xmmword ptr [rcx+80h],xmm8  
;movdqa      xmmword ptr [rcx+90h],xmm9  
;movdqa      xmmword ptr [rcx+0A0h],xmm10  
;movdqa      xmmword ptr [rcx+0B0h],xmm11  
;movdqa      xmmword ptr [rcx+0C0h],xmm12  
;movdqa      xmmword ptr [rcx+0D0h],xmm13  
;movdqa      xmmword ptr [rcx+0E0h],xmm14  
;movdqa      xmmword ptr [rcx+0F0h],xmm15  
;xor         eax,eax  
;ret  


; extern "C" void NORETURN Coroutine_LongJmp_UnChecked( jmp_buf buf, int nResult )
; Per Win64 ABI, incoming params are rcx, rdx, r8, r9. initial stack pointer is half-aligned due to return address
Coroutine_LongJmp_Unchecked PROC
	;load nResult into result from initial setjmp()
	xor rax, rax
	mov eax, edx
	
	;restore to setjmp() caller state
	mov rdx, [rcx]._JUMP_BUFFER.m_Frame ; appears to be an error checking value of (_JUMP_BUFFER.m_Rbp + 0FFFFFFFFFFFFFFC0h) passed non-standardly through rdx to setjmp()
	mov rbx, [rcx]._JUMP_BUFFER.m_Rbx
	mov rsp, [rcx]._JUMP_BUFFER.m_Rsp
	mov rbp, [rcx]._JUMP_BUFFER.m_Rbp
	mov rsi, [rcx]._JUMP_BUFFER.m_Rsi
	mov rdi, [rcx]._JUMP_BUFFER.m_Rdi
	mov r12, [rcx]._JUMP_BUFFER.m_R12
	mov r13, [rcx]._JUMP_BUFFER.m_R13
	mov r14, [rcx]._JUMP_BUFFER.m_R14
	mov r15, [rcx]._JUMP_BUFFER.m_R15
	mov r10, [rcx]._JUMP_BUFFER.m_Rip ; store return address in r10 for return
	ldmxcsr [rcx]._JUMP_BUFFER.m_MxCsr
	fldcw [rcx]._JUMP_BUFFER.m_FpCsr
	;[rcx]._JUMP_BUFFER.m_Spare
	movaps xmm6, [rcx]._JUMP_BUFFER.m_Xmm6
	movaps xmm7, [rcx]._JUMP_BUFFER.m_Xmm7
	movaps xmm8, [rcx]._JUMP_BUFFER.m_Xmm8
	movaps xmm9, [rcx]._JUMP_BUFFER.m_Xmm9
	movaps xmm10, [rcx]._JUMP_BUFFER.m_Xmm10
	movaps xmm11, [rcx]._JUMP_BUFFER.m_Xmm11
	movaps xmm12, [rcx]._JUMP_BUFFER.m_Xmm12
	movaps xmm13, [rcx]._JUMP_BUFFER.m_Xmm13
	movaps xmm14, [rcx]._JUMP_BUFFER.m_Xmm14
	movaps xmm15, [rcx]._JUMP_BUFFER.m_Xmm15

	;jmp instead of ret to _JUMP_BUFFER.m_Rip because setjmp() already set the _JUMP_BUFFER.m_Rsp to the post-return state
	db 048h ; emit a REX prefix on the jmp to ensure it's a full qword
	jmp qword ptr r10
Coroutine_LongJmp_Unchecked ENDP

_TEXT ENDS
END