|
|
/*++
Copyright (c) 1995-1998 Microsoft Corporation
Module Name:
optfrag.c
Abstract: Instruction Fragments which correspond to optimizations.
Author:
6-July-1995 Ori Gershony (t-orig)
Revision History:
24-Aug-1999 [askhalid] copied from 32-bit wx86 directory and make work for 64bit.
--*/
#include <nt.h>
#include <ntrtl.h>
#include <nturtl.h>
#include <windows.h>
#include <stdio.h>
#include "cpuassrt.h"
#include "fragp.h"
#include "optfrag.h"
ASSERTNAME;
// This fragment corresponds to:
// push ebx
// push esi
// push edi
FRAG0(OPT_PushEbxEsiEdiFrag) { ULONG *espval;
espval=(ULONG *)esp;
*(espval-1) = ebx; *(espval-2) = esi; *(espval-3) = edi; esp=(ULONG)(LONGLONG)espval-12; }
// This fragment corresponds to:
// pop edi
// pop esi
// pop ebx
FRAG0(OPT_PopEdiEsiEbxFrag) { ULONG *espval;
espval=(ULONG *)esp;
edi=*espval; esi=*(espval+1); ebx=*(espval+2); esp=(ULONG)(LONGLONG)espval+12; }
// This fragment corresponds to:
// push ebp
// mov ebp,esp
// sub esp, op1
FRAG1IMM(OPT_SetupStackFrag, ULONG) { ULONG result, oldespminusfour;
oldespminusfour = esp-4; result = oldespminusfour - op1; *(ULONG *)oldespminusfour = ebp; ebp = oldespminusfour; esp = result; SET_FLAGS_SUB32(result, oldespminusfour, op1, 0x80000000); } FRAG1IMM(OPT_SetupStackNoFlagsFrag, ULONG) { ULONG result, oldespminusfour;
oldespminusfour = esp-4; result = oldespminusfour - op1; *(ULONG *)oldespminusfour = ebp; ebp = oldespminusfour; esp = result; }
FRAG1(OPT_ZEROFrag32, LONG) { // implements: XOR samereg, samereg
// SUB samereg, samereg
// ie. XOR EAX, EAX or SUB ECX, ECX
*pop1 = 0; SET_CFLAG_OFF; SET_OFLAG_OFF; SET_SFLAG_OFF; SET_ZFLAG(0); SET_PFLAG(0); SET_AUXFLAG(0); }
FRAG1(OPT_ZERONoFlagsFrag32, LONG) { // implements: XOR samereg, samereg
// SUB samereg, samereg
// ie. XOR EAX, EAX or SUB ECX, ECX
*pop1 = 0; }
FRAG3(OPT_CmpSbbFrag32, ULONG, ULONG, ULONG) { ULONG result; ULONG cf;
//
// implements: CMP op2, op3
// SBB op1, op1
//
result = op2-op3; cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result)); result = (ULONG)-(LONG)(cf >> 31); *pop1 = result; // pop1 is a pointer to a reg, so always aligned
SET_OFLAG_OFF; SET_CFLAG(result); SET_SFLAG(result); SET_ZFLAG(result); SET_AUXFLAG(result); SET_PFLAG(result); } FRAG3(OPT_CmpSbbNoFlagsFrag32, ULONG, ULONG, ULONG) { ULONG result; ULONG cf;
//
// implements: CMP op2, op3
// SBB op1, op1
//
result = op2-op3; cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result)); *pop1 = (ULONG)-(LONG)(cf >> 31); } FRAG3(OPT_CmpSbbNegFrag32, ULONG, ULONG, ULONG) { ULONG result; ULONG cf;
//
// implements: CMP op2, op3
// SBB op1, op1
// NEG op1
//
result = op2-op3; cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result)); // pop1 is a pointer to a reg, so it is always aligned
if (cf >= 0x80000000) { result = 1; *pop1 = result; // store the result before updating flags
SET_CFLAG_ON; // set if result != 0
SET_AUXFLAG(0xfe); // this is (BYTE)(0xffffffff ^ 0x00000001)
} else { result = 0; *pop1 = result; // store the result before updating flags
SET_CFLAG_OFF; // cleared if result==0
SET_AUXFLAG(0); // this is (BYTE)(0x0 ^ 0x0)
SET_OFLAG_OFF; // this is (0x0 & 0x0) << 31
} SET_ZFLAG(result); SET_PFLAG(result); SET_SFLAG_OFF; SET_OFLAG_OFF; // this is either (0xffffffff & 0x00000001) or (0 & 0)
} FRAG3(OPT_CmpSbbNegNoFlagsFrag32, ULONG, ULONG, ULONG) { ULONG result; ULONG cf;
//
// implements: CMP op2, op3
// SBB op1, op1
// NEG op1
//
result = op2-op3; cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result)); // result is 1 if high bit of cf is set, 0 if high bit is clear
*pop1 = cf >> 31; }
FRAG2IMM(OPT_Push2Frag32, ULONG, ULONG) { //
// implements: PUSH op1
// PUSH op2
// Note that the analysis phase must ensure that the value of op2 does
// not depend on the value of ESP, as op2 will be computed before the
// first PUSH is excuted.
//
PUSH_LONG(op1); PUSH_LONG(op2); } FRAG2REF(OPT_Pop2Frag32, ULONG) { //
// implements: POP pop1
// POP pop2
//
// Note that the analysis phase must ensure that the value of pop2 does
// not depend on the value of pop1, as pop1 will not have been popped
// when the value of pop2 is computed.
//
POP_LONG(*pop1); POP_LONG(*pop2); }
FRAG1(OPT_CwdIdivFrag16, USHORT) { short op1; short result;
//
// implements: CWD
// IDIV EAX, *pop1
// The CWD sign-extends EAX into EDX:EAX, which means, we can
// avoid a 64-bit division and just divide EAX. There is no
// possibility of overflow.
//
op1 = (short)GET_SHORT(pop1); // Must do the divide before modifying edx, in case op1==0 and we fault.
result = (short)ax / op1;
dx = (short)ax % op1; ax = result; } FRAG1(OPT_CwdIdivFrag16A, USHORT) { short op1; short result;
//
// implements: CWD
// IDIV EAX, *pop1
// The CWD sign-extends EAX into EDX:EAX, which means, we can
// avoid a 64-bit division and just divide EAX. There is no
// possibility of overflow.
//
op1 = (short)*pop1; // Must do the divide before modifying edx, in case op1==0 and we fault.
result = (short)ax / op1;
dx = (short)ax % op1; ax = result; }
FRAG1(OPT_CwdIdivFrag32, ULONG) { long op1; long result;
//
// implements: CWD
// IDIV EAX, *pop1
// The CWD sign-extends EAX into EDX:EAX, which means, we can
// avoid a 64-bit division and just divide EAX. There is no
// possibility of overflow.
//
op1 = (long)GET_LONG(pop1); // Must do the divide before modifying edx, in case op1==0 and we fault.
result = (long)eax / op1;
edx = (long)eax % op1; eax = result; } FRAG1(OPT_CwdIdivFrag32A, ULONG) { long op1; long result;
//
// implements: CWD
// IDIV EAX, *pop1
// The CWD sign-extends EAX into EDX:EAX, which means, we can
// avoid a 64-bit division and just divide EAX. There is no
// possibility of overflow.
//
op1 = (long)*pop1; // Must do the divide before modifying edx, in case op1==0 and we fault.
result = (long)eax / op1;
edx = (long)eax % op1; eax = result; }
// This fragment should never be called!
FRAG0(OPT_OPTIMIZEDFrag) { CPUASSERTMSG(FALSE, "OPTIMIZED fragment should never be called!"); }
|