/*++ Copyright (c) 1992-2000 Microsoft Corporation Module Name: analysis.c Abstract: This module contains the main file of the analysis module. Author: Ori Gershony (t-orig) creation-date 6-July-1995 Revision History: 24-Aug-1999 [askhalid] copied from 32-bit wx86 directory and make work for 64bit. --*/ #include <nt.h> #include <ntrtl.h> #include <nturtl.h> #include <windows.h> #include <wx86.h> #include <wx86nt.h> #include <wx86cpu.h> #include <cpuassrt.h> #include <threadst.h> #include <instr.h> #include <analysis.h> #include <decoder.h> #include <frag.h> #include <config.h> #include <compiler.h> ASSERTNAME; // // Macro to determine when to stop looking ahead during compilation. // #define STOP_DECODING(inst) (Fragments[inst.Operation].Flags & OPFL_STOP_COMPILE) // // Map a REG_ constant (offset into cpu struct) into register bit map // used by instruction data. // const DWORD MapRegNumToRegBits[0x1e] = {REGEAX, REGECX, REGEDX, REGEBX, REGESP, REGEBP, REGESI, REGEDI, 0, 0, 0, 0, 0, 0, REGAX, REGCX, REGDX, REGBX, REGSP, REGBP, REGSI, REGDI, REGAL, REGCL, REGDL, REGBL, REGAH, REGCH, REGDH, REGBH }; ULONG LocateEntryPoints( PINSTRUCTION InstructionStream, ULONG NumberOfInstructions ) /*++ Routine Description: This function scans the InstructionStream and marks instructions which begin entrypoint. An instruction begins an entrypoint if its EntryPoint field has a different value than the previous instruction's value. No instruction will have a NULL pointer. Note that in this pass, the EntryPoint field does *not* point to an ENTRYPOINT structure... it is only a marker. Arguments: IntelStart -- The intel address of the first instruction in the stream IntelStart -- The last byte of the last intel instruction in the stream Return Value: Count of EntryPoints located. --*/ { ULONG i, j, intelDest; ULONG EntryPointCounter; ULONG IntelStart; ULONG IntelEnd; if (CompilerFlags & COMPFL_SLOW) { // // The compiler is supposed to generate slowmode code. Each // x86 instruction gets its own ENTRYPOINT // EntryPointCounter=1; for (i=0; i<NumberOfInstructions; i++) { // // Mark all instructions which don't correspond to 0-byte NOPs // following optimized instructions as starting EntryPoints. // if (InstructionStream[i].Size) { EntryPointCounter++; } InstructionStream[i].EntryPoint = (PENTRYPOINT)EntryPointCounter; } } else { // // Find all instructions which need Entrypoints. // Performance is O(n^2) in the worst case, although // it will be typically much closer to O(n) // // Instructions which mark the starts of Entrypoints have // their .EntryPoint pointer set to non-NULL. Instructions which // don't require entrypoints have it set to NULL; // IntelStart = InstructionStream[0].IntelAddress; IntelEnd = IntelStart + InstructionStream[NumberOfInstructions-1].IntelAddress + InstructionStream[NumberOfInstructions-1].Size; // // The first instruction always gets an entrypoint // InstructionStream[0].EntryPoint = (PENTRYPOINT)1; // // Visit each instruction in turn // for (i=0; i<NumberOfInstructions; i++) { if (((i+1) < NumberOfInstructions) && (Fragments[InstructionStream[i].Operation].Flags & OPFL_END_NEXT_EP)) { // // This instruction marks the end of an Entrypoint. The next // instruction gets a new Entrypoint. // CPUASSERT(i < CpuInstructionLookahead-1 && i < NumberOfInstructions-1); InstructionStream[i+1].EntryPoint = (PENTRYPOINT)1; } // Now see if it is a direct control transfer instruction with a // destination that lies within this instruction stream. If it is, // we want to create an Entry Point at the destination so that the // control transfer will be compiled directly to the patched form, // and won't have to be patched later. // if (Fragments[InstructionStream[i].Operation].Flags & OPFL_CTRLTRNS) { // // The instruction is a direct control-transfer. If the // destination is within the InstructionStream, create an // Entrypoint at the destination. // if (InstructionStream[i].Operand1.Type == OPND_IMM || InstructionStream[i].Operand1.Type == OPND_NOCODEGEN) { // Get the intel destination from the instruction structure. intelDest = InstructionStream[i].Operand1.Immed; } else { CPUASSERT(InstructionStream[i].Operand1.Type == OPND_ADDRREF ); // A FAR instruction - Operand1 is a ptr to a SEL:OFFSET pair intelDest = *(UNALIGNED PULONG)(InstructionStream[i].Operand1.Immed); } // Get the intel destination from the instruction structure. // It is always an immediate with direct control transfers. if ((intelDest >= IntelStart) && (intelDest <= IntelEnd)) { // // Destination of the control-transfer is within the // instructionstream. Find the destination instruction. // if (intelDest > InstructionStream[i].IntelAddress) { // // The dest. address is at a higher address. // for (j=i+1; j<NumberOfInstructions; ++j) { if (InstructionStream[j].IntelAddress == intelDest) { break; } } } else { // // The dest. address is at a lower address. // for (j=i; j>0; --j) { if (InstructionStream[j].IntelAddress == intelDest) { break; } } } // // An exact match may not be found in the event that the // app is punning (either a real pun or the app is jumping // into the middle of an optimized instruction). In // either of the cases, defer entrypoint creation until // the branch is actually taken. // if (j >= 0 && j < NumberOfInstructions) { // // Exact match was found. Create an Entrypoint. // InstructionStream[j].EntryPoint = (PENTRYPOINT)1; } } } // if OPFL_CTRLTRNS } // for () // // Convert the EntryPoint field from NULL/non-NULL to a unique // value for each range of instructions. // EntryPointCounter=1; i=0; while (i<NumberOfInstructions) { // // This instruction marks the beginning of a basic block // InstructionStream[i].EntryPoint = (PENTRYPOINT)EntryPointCounter; j=i+1; while (j < NumberOfInstructions) { if ((j >= NumberOfInstructions) || (InstructionStream[j].Size && InstructionStream[j].EntryPoint)) { // // Either ran out of instructions, or encountered an instruction // which marks the start of the next basic block. Note that // 0-byte NOP instructions are not allowed to start basic blocks // as that violates the rules of OPT_ instructions. // break; } InstructionStream[j].EntryPoint = (PENTRYPOINT)EntryPointCounter; j++; } EntryPointCounter++; i = j; } } // if not COMPFL_SLOW // // At this point, EntryPointCounter holds the number of EntryPoints // plus one, because we started the counter at 1, not 0. Correct // that now. // EntryPointCounter--; return EntryPointCounter; } VOID UpdateRegs( PINSTRUCTION pInstr, POPERAND Operand ) /*++ Routine Description: Updates the list of registers referenced and/or modified based on the Operand. Arguments: pInstr -- the instruction to examine Operand -- the operand of the instruction to examine Return Value: return-value - none --*/ { switch (Operand->Type) { case OPND_NOCODEGEN: case OPND_REGREF: if (Operand->Reg != NO_REG) { pInstr->RegsSet |= MapRegNumToRegBits[Operand->Reg]; } break; case OPND_REGVALUE: if (Operand->Reg != NO_REG) { pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->Reg]; } break; case OPND_ADDRREF: case OPND_ADDRVALUE8: case OPND_ADDRVALUE16: case OPND_ADDRVALUE32: if (Operand->Reg != NO_REG) { pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->Reg]; } if (Operand->IndexReg != NO_REG) { pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->IndexReg]; } break; default: break; } } VOID CacheIntelRegs( PINSTRUCTION InstructionStream, ULONG numInstr) /*++ Routine Description: This function deterimes what x86 registers, if any, can be cached in RISC preserved registers. Arguments: InstructionStream -- The instruction stream returned by the decoder numInstr -- The length of InstructionStream Return Value: return-value - none --*/ { PINSTRUCTION pInstr; BYTE RegUsage[REGCOUNT]; DWORD RegsToCache; int i; PENTRYPOINT PrevEntryPoint; // // Calculate the RegsSet and RegsNeeded for the bottommost instruction // pInstr = &InstructionStream[numInstr-1]; pInstr->RegsSet = Fragments[pInstr->Operation].RegsSet; PrevEntryPoint = pInstr->EntryPoint; UpdateRegs(pInstr, &pInstr->Operand1); UpdateRegs(pInstr, &pInstr->Operand2); UpdateRegs(pInstr, &pInstr->Operand3); // // For each 32-bit register used as a parameter to this instruction, // set the usage count to 1. // for (i=0; i<REGCOUNT; ++i) { if (pInstr->RegsNeeded & (REGMASK<<(REGSHIFT*i))) { RegUsage[i] = 1; } else { RegUsage[i] = 0; } } // // Loop over instruction stream from bottom to top, starting at the // second-to-last instruction // for (pInstr--; pInstr >= InstructionStream; pInstr--) { // // Calculate the RegsSet and RegsNeeded values for this instruction // pInstr->RegsSet = Fragments[pInstr->Operation].RegsSet; UpdateRegs(pInstr, &pInstr->Operand1); UpdateRegs(pInstr, &pInstr->Operand2); UpdateRegs(pInstr, &pInstr->Operand3); RegsToCache = 0; if (PrevEntryPoint != pInstr->EntryPoint) { // // The current instruction marks the end of an Entrypoint. // PrevEntryPoint = pInstr->EntryPoint; // // For all x86 registers which have been read more than once // but not modified in the basic block, load them into the // cache before executing the first instruction in the basic // block. // for (i=0; i<REGCOUNT; ++i) { if (RegUsage[i] > 1) { RegsToCache |= (REGMASK<<(REGSHIFT*i)); } } // // Reset the RegUsage[] array to indicate no registers are // cached. // RtlZeroMemory(RegUsage, REGCOUNT); } else { // // For each 32-bit x86 register modified by this instruction, // update the caching info. // for (i=0; i<REGCOUNT; ++i) { DWORD RegBits = pInstr->RegsSet & (REGMASK<<(REGSHIFT*i)); if (RegBits) { // // The ith 32-bit x86 register has been modified by this // instruction // if (RegUsage[i] > 1) { // // There is more than one consumer of the modified // value so it is worth caching. // RegsToCache |= RegBits; } // // Since this x86 register was dirtied by this instruction, // it usage count must be reset to 0. // RegUsage[i] = 0; } } } // // Update the list of x86 registers which can be loaded into // cache registers before the next instruction executes. // pInstr[1].RegsToCache |= RegsToCache; // // For each 32-bit register used as a parameter to this instruction, // bump the usage count. // for (i=0; i<REGCOUNT; ++i) { if (pInstr->RegsNeeded & (REGMASK<<(REGSHIFT*i))) { RegUsage[i]++; } } } } VOID OptimizeInstructionStream( PINSTRUCTION IS, ULONG numInstr ) /*++ Routine Description: This function performs various optimization on the instruction stream retured by the decoder. Arguments: IS -- The instruction stream returned by the decoder numInstr -- The length of IS Return Value: return-value - none --*/ { ULONG i; CPUASSERTMSG(numInstr, "Cannot optimize 0-length instruction stream"); // // Pass 1: Optimize x86 instruction stream, replacing single x86 // instructions with special-case instructions, and replacing // multiple x86 instructions with single special-case OPT_ // instructions // for (i=0; i<numInstr; ++i) { switch (IS[i].Operation) { case OP_Push32: if (i < numInstr-2 && IS[i].Operand1.Type == OPND_REGVALUE){ if (IS[i].Operand1.Reg == GP_EBP) { // OP_OPT_SetupStack -- // push ebp // mov ebp, esp // sub esp, x if ((IS[i+1].Operation == OP_Mov32) && (IS[i+1].Operand1.Type == OPND_REGREF) && (IS[i+1].Operand1.Reg == GP_EBP) && (IS[i+1].Operand2.Type == OPND_REGVALUE) && (IS[i+1].Operand2.Reg == GP_ESP) && (IS[i+2].Operation == OP_Sub32) && (IS[i+2].Operand1.Type == OPND_REGREF) && (IS[i+2].Operand1.Reg == GP_ESP) && (IS[i+2].Operand2.Type == OPND_IMM)){ IS[i].Operation = OP_OPT_SetupStack; IS[i].Operand1.Type = OPND_IMM; IS[i].Operand1.Immed = IS[i+2].Operand2.Immed; IS[i].Size += IS[i+1].Size + IS[i+2].Size; IS[i].Operand2.Type = OPND_NONE; IS[i+1].Operation = OP_Nop; IS[i+1].Operand1.Type = OPND_NONE; IS[i+1].Operand2.Type = OPND_NONE; IS[i+1].Size = 0; IS[i+2].Operation = OP_Nop; IS[i+2].Operand1.Type = OPND_NONE; IS[i+2].Operand2.Type = OPND_NONE; IS[i+2].Size = 0; i+=2; break; } } else if (IS[i].Operand1.Reg == GP_EBX) { // OP_OPT_PushEbxEsiEdi -- // push ebx // push esi // push edi if ((IS[i+1].Operation == OP_Push32) && (IS[i+1].Operand1.Type == OPND_REGVALUE) && (IS[i+1].Operand1.Reg == GP_ESI) && (IS[i+2].Operation == OP_Push32) && (IS[i+2].Operand1.Type == OPND_REGVALUE) && (IS[i+2].Operand1.Reg == GP_EDI)){ IS[i].Operation = OP_OPT_PushEbxEsiEdi; IS[i].Size += IS[i+1].Size + IS[i+2].Size; IS[i].Operand1.Type = OPND_NONE; IS[i].Operand2.Type = OPND_NONE; IS[i+1].Operation = OP_Nop; IS[i+1].Operand1.Type = OPND_NONE; IS[i+1].Operand2.Type = OPND_NONE; IS[i+1].Size = 0; IS[i+2].Operation = OP_Nop; IS[i+2].Operand1.Type = OPND_NONE; IS[i+2].Operand2.Type = OPND_NONE; IS[i+2].Size = 0; i+=2; break; } } } // // It is not one of the other special PUSH sequences, so see // if there are two consecutive PUSHes to merge together. Note: // If the second PUSH references ESP, the two cannot be merged // because the value is computed before 4 is subtracted from ESP. // ie. the following is disallowed: // PUSH EAX // PUSH ESP ; second operand to Push2 would have been // ; built before the PUSH EAX was executed. // if (i < numInstr-1 && !IS[i].FsOverride && !IS[i+1].FsOverride && IS[i+1].Operation == OP_Push32 && IS[i+1].Operand1.Reg != GP_ESP && IS[i+1].Operand1.IndexReg != GP_ESP) { IS[i].Operation = OP_OPT_Push232; IS[i].Operand2 = IS[i+1].Operand1; IS[i].Size += IS[i+1].Size; IS[i+1].Operation = OP_Nop; IS[i+1].Operand1.Type = OPND_NONE; IS[i+1].Size = 0; i++; } break; case OP_Pop32: // OP_OPT_PopEdiEsiEbx // pop edi // pop esi // pop ebx if (i < numInstr-2 && (IS[i].Operand1.Type == OPND_REGREF) && (IS[i].Operand1.Reg == GP_EDI) && (IS[i+1].Operation == OP_Pop32) && (IS[i+1].Operand1.Type == OPND_REGREF) && (IS[i+1].Operand1.Reg == GP_ESI) && (IS[i+2].Operation == OP_Pop32) && (IS[i+2].Operand1.Type == OPND_REGREF) && (IS[i+2].Operand1.Reg == GP_EBX)){ IS[i].Operation = OP_OPT_PopEdiEsiEbx; IS[i].Size += IS[i+1].Size + IS[i+2].Size; IS[i].Operand1.Type = OPND_NONE; IS[i].Operand2.Type = OPND_NONE; IS[i+1].Operation = OP_Nop; IS[i+1].Operand1.Type = OPND_NONE; IS[i+1].Operand2.Type = OPND_NONE; IS[i+1].Size = 0; IS[i+2].Operation = OP_Nop; IS[i+2].Operand1.Type = OPND_NONE; IS[i+2].Operand2.Type = OPND_NONE; IS[i+2].Size = 0; i+=2; } else if (i < numInstr-1 && !IS[i].FsOverride && !IS[i].FsOverride && IS[i].Operand1.Type == OPND_REGREF && IS[i+1].Operation == OP_Pop32 && IS[i+1].Operand1.Type == OPND_REGREF) { // Fold the two POPs together. Both operands are REGREF, // so there is no problem with interdependencies between // memory touched by the first POP modifying the address // of the second POP. ie. the following is not merged: // POP EAX // POP [EAX] ; depends on results of first POP IS[i].Operation = OP_OPT_Pop232; IS[i].Operand2 = IS[i+1].Operand1; IS[i].Size += IS[i+1].Size; IS[i+1].Operation = OP_Nop; IS[i+1].Operand1.Type = OPND_NONE; IS[i+1].Size = 0; i++; } break; case OP_Xor32: case OP_Sub32: if (IS[i].Operand1.Type == OPND_REGREF && IS[i].Operand2.Type == OPND_REGVALUE && IS[i].Operand1.Reg == IS[i].Operand2.Reg) { // Instruction is XOR samereg, samereg (ie. XOR EAX, EAX), // or SUB samereg, samereg (ie. SUB ECX, ECX). // Emit OP_OPT_ZERO32 samereg IS[i].Operand2.Type = OPND_NONE; IS[i].Operation = OP_OPT_ZERO32; } break; case OP_Test8: if (IS[i].Operand1.Type == OPND_REGVALUE && IS[i].Operand2.Type == OPND_REGVALUE && IS[i].Operand1.Reg == IS[i].Operand2.Reg) { // Instruction is TEST samereg, samereg (ie. TEST EAX, EAX) // Emit OP_OPT_FastTest8/16/32 IS[i].Operand1.Type = OPND_REGVALUE; IS[i].Operand2.Type = OPND_NONE; IS[i].Operation = OP_OPT_FastTest8; } break; case OP_Test16: if (IS[i].Operand1.Type == OPND_REGVALUE && IS[i].Operand2.Type == OPND_REGVALUE && IS[i].Operand1.Reg == IS[i].Operand2.Reg) { // Instruction is TEST samereg, samereg (ie. TEST EAX, EAX) // Emit OP_OPT_FastTest8/16/32 IS[i].Operand1.Type = OPND_REGVALUE; IS[i].Operand2.Type = OPND_NONE; IS[i].Operation = OP_OPT_FastTest16; } break; case OP_Test32: if (IS[i].Operand1.Type == OPND_REGVALUE && IS[i].Operand2.Type == OPND_REGVALUE && IS[i].Operand1.Reg == IS[i].Operand2.Reg) { // Instruction is TEST samereg, samereg (ie. TEST EAX, EAX) // Emit OP_OPT_FastTest8/16/32 IS[i].Operand1.Type = OPND_REGVALUE; IS[i].Operand2.Type = OPND_NONE; IS[i].Operation = OP_OPT_FastTest32; } break; case OP_Cmp32: if (i<numInstr+1 && IS[i+1].Operation == OP_Sbb32 && IS[i+1].Operand1.Type == OPND_REGREF && IS[i+1].Operand2.Type == OPND_REGVALUE && IS[i+1].Operand1.Reg == IS[i+1].Operand2.Reg) { // The two instructions are: // CMP anything1, anything2 // SBB samereg, samereg // The optimized instruction is: // Operation = either CmpSbb32 or CmpSbbNeg32 // Operand1 = &samereg (passed as REGREF) // Operand2 = anything1 (passed as ADDRVAL32 or REGVAL) // Operand3 = anything2 (passed as ADDRVAL32 or REGVAL) IS[i].Operand3 = IS[i].Operand2; IS[i].Operand2 = IS[i].Operand1; IS[i].Operand1 = IS[i+1].Operand1; if (i<numInstr+2 && IS[i+2].Operation == OP_Neg32 && IS[i+2].Operand1.Type == OPND_REGREF && IS[i+2].Operand1.Reg == IS[i+1].Operand1.Reg) { // The third instruction is NEG samereg, samereg IS[i].Operation = OP_OPT_CmpSbbNeg32; IS[i+2].Operation = OP_Nop; IS[i+2].Operand1.Type = OPND_NONE; IS[i+2].Operand2.Type = OPND_NONE; IS[i+2].Size = 0; } else { IS[i].Operation = OP_OPT_CmpSbb32; } IS[i+1].Operation = OP_Nop; IS[i+1].Operand1.Type = OPND_NONE; IS[i+1].Operand2.Type = OPND_NONE; IS[i+1].Size = 0; i++; } break; case OP_Cwd16: if (i<numInstr+1 && IS[i+1].Operation == OP_Idiv16) { IS[i].Operation = OP_OPT_CwdIdiv16; IS[i].Operand1 = IS[i+1].Operand1; IS[i].Size += IS[i+1].Size; IS[i+1].Operation = OP_Nop; IS[i+1].Operand1.Type = OPND_NONE; IS[i+1].Size = 0; i++; } break; case OP_Cwd32: if (i<numInstr+1 && IS[i+1].Operation == OP_Idiv32) { IS[i].Operation = OP_OPT_CwdIdiv32; IS[i].Operand1 = IS[i+1].Operand1; IS[i].Size += IS[i+1].Size; IS[i+1].Operation = OP_Nop; IS[i+1].Operand1.Type = OPND_NONE; IS[i+1].Size = 0; i++; } break; case OP_FP_FNSTSW: if (i<numInstr+1 && IS[i+1].Operation == OP_Sahf && IS[i].Operand1.Type == OPND_REGREF && IS[i].Operand1.Reg == GP_AX) { // Replace FNSTSW AX / SAHF by one instruction IS[i].Operation = OP_OPT_FNSTSWAxSahf; IS[i].Operand1.Type = OPND_NONE; IS[i].Size += IS[i+1].Size; IS[i+1].Operation = OP_Nop; IS[i+1].Size = 0; i++; } break; case OP_FP_FSTP_STi: if (IS[i].Operand1.Immed == 0) { IS[i].Operand1.Type = OPND_NONE; IS[i].Operation = OP_OPT_FSTP_ST0; } break; } } } VOID OptimizeIntelFlags( PINSTRUCTION IS, ULONG numInstr ) /*++ Routine Description: This function analysis x86 flag register usage and switches instructions to use NoFlags versions if possible. Arguments: IS -- The instruction stream returned by the decoder numInstr -- The length of IS Return Value: return-value - none --*/ { USHORT FlagsNeeded; // flags required to execute current x86 instr USHORT FlagsToGenerate; // flags which current x86 instr must generate PFRAGDESCR pFragDesc; // ptr to Fragments[] array for current instr ULONG i; // instruction index BOOL fPassNeeded = TRUE;// TRUE if the outer loop needs to loop once more ULONG PassNumber = 0; // number of times outer loop has looped PENTRYPOINT pEPDest; // Entrypoint for destination of a ctrl transfer USHORT KnownFlagsNeeded[MAX_INSTR_COUNT]; // flags needed for each instr while (fPassNeeded) { // // This loop is executed at most two times. The second pass is only // required if there is a control-transfer instruction whose // destination is within the Instruction Stream and at a lower // Intel address (ie. a backwards JMP). // fPassNeeded = FALSE; PassNumber++; CPUASSERT(PassNumber <= 2); // // Iterate over all x86 instructions decoded, from bottom to top, // propagating flags info up. Start off by assuming all x86 flags // must be up-to-date at the end of the last basic block. // FlagsNeeded = ALLFLAGS; i = numInstr; do { i--; pFragDesc = &Fragments[IS[i].Operation]; // // Calculate what flags will need to be computed by this // instruction and ones before this. // KnownFlagsNeeded[i] = FlagsNeeded | pFragDesc->FlagsNeeded; FlagsToGenerate = FlagsNeeded & pFragDesc->FlagsSet; // // Calculate what flags this instruction will need to have // computed before it can be executed. // FlagsNeeded = (FlagsNeeded & ~FlagsToGenerate) | pFragDesc->FlagsNeeded; if (pFragDesc->Flags & OPFL_CTRLTRNS) { ULONG IntelDest = IS[i].Operand1.Immed; // // For control-transfer instructions, FlagsNeeded also includes // the flags required for the destination of the transfer. // if (IS[0].IntelAddress <= IntelDest && i > 0 && IS[i-1].IntelAddress >= IntelDest) { // // The destination of the control-transfer is at a lower // address in the Instruction Stream. // if (PassNumber == 1) { // // Need to make a second pass over the flags // optimizations in order to determine what flags are // needed for the destination address. // fPassNeeded = TRUE; FlagsNeeded = ALLFLAGS; // assume all flags are needed } else { ULONG j; USHORT NewFlagsNeeded; // // Search for the IntelDest within the Instruction // Stream. IntelDest may not be found if there is // a pun. // NewFlagsNeeded = ALLFLAGS; // assume there is a pun for (j=0; j < i; ++j) { if (IS[j].IntelAddress == IntelDest) { NewFlagsNeeded = KnownFlagsNeeded[j]; break; } } FlagsNeeded |= NewFlagsNeeded; } } else if (IS[i+1].IntelAddress <= IntelDest && IntelDest <= IS[numInstr-1].IntelAddress) { // // The destination of the control-transfer is at a higher // address in the Instruction Stream. Pick up the // already-computed FlagsNeeded for the destination. // ULONG j; USHORT NewFlagsNeeded = ALLFLAGS; // assume a pun for (j=i+1; j < numInstr; ++j) { if (IS[j].IntelAddress == IntelDest) { NewFlagsNeeded = KnownFlagsNeeded[j]; break; } } FlagsNeeded |= NewFlagsNeeded; } else { // // Destination of the control-transfer is unknown. Assume // the worst: all flags are required. // FlagsNeeded = ALLFLAGS; } } if (!(FlagsToGenerate & pFragDesc->FlagsSet) && (pFragDesc->Flags & OPFL_HASNOFLAGS)) { // // This instruction is not required to generate any flags, and // it has a NOFLAGS version. Update the flags that need to be // computed by instructions before this one, and modify the // Operation number to point at the NoFlags fragment. // FlagsToGenerate &= pFragDesc->FlagsSet; if (pFragDesc->Flags & OPFL_ALIGN) { IS[i].Operation += 2; } else { IS[i].Operation ++; } if (IS[i].Operation == OP_OPT_ZERONoFlags32) { // // Special-case this to be a "mov [value], zero" so it is // inlined. // IS[i].Operation = OP_Mov32; IS[i].Operand2.Type = OPND_IMM; IS[i].Operand2.Immed = 0; } } } while (i); } } VOID DetermineEbpAlignment( PINSTRUCTION InstructionStream, ULONG numInstr ) /*++ Routine Description: For each instruction in InstructionStream[], sets Instruction->EbpAligned based on whether EBP is assumed to be DWORD-aligned or not. EBP is assumed to be DWORD-aligned if a "MOV EBP, ESP" instruction is seen, and it is assumed to become unaligned at the first instruction which is flagged as modifying EBP. Arguments: InstructionStream -- The instruction stream returned by the decoder numInstr -- The length of InstructionStream Return Value: return-value - none --*/ { ULONG i; BOOL EbpAligned = FALSE; for (i=0; i<numInstr; ++i) { if (InstructionStream[i].RegsSet & REGEBP) { // // This instruction modified EBP // if (InstructionStream[i].Operation == OP_OPT_SetupStack || InstructionStream[i].Operation == OP_OPT_SetupStackNoFlags || (InstructionStream[i].Operation == OP_Mov32 && InstructionStream[i].Operand2.Type == OPND_REGVALUE && InstructionStream[i].Operand2.Reg == GP_ESP)) { // // The instruction is either "MOV EBP, ESP" or one of the // SetupStack fragments (which contains a "MOV EBP, ESP") // assume Ebp is aligned from now on. // EbpAligned = TRUE; } else { EbpAligned = FALSE; } } InstructionStream[i].EbpAligned = EbpAligned; } } ULONG GetInstructionStream( PINSTRUCTION InstructionStream, PULONG NumberOfInstructions, PVOID pIntelInstruction, PVOID pLastIntelInstruction ) /*++ Routine Description: Returns an instruction stream to the compiler. The instruction stream is terminated either when the buffer is full, or when we reach a control transfer instruction. Arguments: InstructionStream -- A pointer to the buffer where the decoded instructions are stored. NumberOfInstructions -- Upon entry, this variable contains the maximal number of instructions the buffer can hold. When returning, it contains the actual number of instructions decoded. pIntelInstruction -- A pointer to the first real intel instruction to be decoded. pLastIntelInstruction -- A pointer to the last intel instruction to be compiled, 0xffffffff if not used. Return Value: Number of entrypoints required to describe the decoded instruction stream. --*/ { ULONG numInstr=0; ULONG maxBufferSize; ULONG cEntryPoints; maxBufferSize = (*NumberOfInstructions); // // Zero-fill the InstructionStream. The decoder depends on this. // RtlZeroMemory(InstructionStream, maxBufferSize*sizeof(INSTRUCTION)); #if DBG // // Do a little analysis on the address we're about to decode. If // the address is part of a non-x86 image, log that to the debugger. // That probably indicates a thunking problem. If the address is not // part of an image, warn that the app is running generated code. // try { USHORT Instr; // // Try to read the instruction about to be executed. If we get // an access violation, use 0 as the value of the instruction. // Instr = 0; // // Ignore BOP instructions - we assume we know what's going on with // them. // if (Instr != 0xc4c4) { NTSTATUS st; MEMORY_BASIC_INFORMATION mbi; st = NtQueryVirtualMemory(NtCurrentProcess(), pIntelInstruction, MemoryBasicInformation, &mbi, sizeof(mbi), NULL); if (NT_SUCCESS(st)) { PIMAGE_NT_HEADERS Headers; Headers = RtlImageNtHeader(mbi.AllocationBase); if (!Headers || Headers->FileHeader.Machine != IMAGE_FILE_MACHINE_I386) { LOGPRINT((TRACELOG, "CPU Analysis warning: jumping from Intel to non-intel code at 0x%X\r\n", pIntelInstruction)); } } else { // Eip isn't pointing anywhere??? } } } except(EXCEPTION_EXECUTE_HANDLER) { ; } #endif //DBG while (numInstr < maxBufferSize) { DecodeInstruction ((DWORD) (ULONGLONG)pIntelInstruction, InstructionStream+numInstr); if ((STOP_DECODING(InstructionStream[numInstr])) || (pIntelInstruction >= pLastIntelInstruction)) { // We reached a control transfer instruction numInstr++; (*NumberOfInstructions) = numInstr; break; // SUCCESS } pIntelInstruction = (PVOID) ((ULONGLONG)pIntelInstruction + (InstructionStream+numInstr)->Size); numInstr++; } // // Optimize x86 code by merging x86 instructions into meta-instructions // and cleaning up special x86 idioms. // if (!(CompilerFlags & COMPFL_SLOW)) { OptimizeInstructionStream (InstructionStream, numInstr); } // // Determine where all basic blocks are by filling in the EntryPoint // field in each instruction. This must be done after // OptimizeInstructionStream() runs so that EntryPoints don't fall // into the middle of meta-instructions. // cEntryPoints = LocateEntryPoints(InstructionStream, numInstr); // // Perform optimizations which require knowledge of EntryPoints // if (numInstr > 2 && !(CompilerFlags & COMPFL_SLOW)) { if (!CpuDisableNoFlags) { OptimizeIntelFlags(InstructionStream, numInstr); } if (!CpuDisableRegCache) { CacheIntelRegs(InstructionStream, numInstr); } if (!CpuDisableEbpAlign) { DetermineEbpAlignment(InstructionStream, numInstr); } } return cEntryPoints; }