Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1142 lines
40 KiB

  1. /*++
  2. Copyright (c) 1992-2000 Microsoft Corporation
  3. Module Name:
  4. analysis.c
  5. Abstract:
  6. This module contains the main file of the analysis
  7. module.
  8. Author:
  9. Ori Gershony (t-orig) creation-date 6-July-1995
  10. Revision History:
  11. 24-Aug-1999 [askhalid] copied from 32-bit wx86 directory and make work for 64bit.
  12. --*/
  13. #include <nt.h>
  14. #include <ntrtl.h>
  15. #include <nturtl.h>
  16. #include <windows.h>
  17. #include <wx86.h>
  18. #include <wx86nt.h>
  19. #include <wx86cpu.h>
  20. #include <cpuassrt.h>
  21. #include <threadst.h>
  22. #include <instr.h>
  23. #include <analysis.h>
  24. #include <decoder.h>
  25. #include <frag.h>
  26. #include <config.h>
  27. #include <compiler.h>
  28. ASSERTNAME;
  29. //
  30. // Macro to determine when to stop looking ahead during compilation.
  31. //
  32. #define STOP_DECODING(inst) (Fragments[inst.Operation].Flags & OPFL_STOP_COMPILE)
  33. //
  34. // Map a REG_ constant (offset into cpu struct) into register bit map
  35. // used by instruction data.
  36. //
  37. const DWORD MapRegNumToRegBits[0x1e] =
  38. {REGEAX, REGECX, REGEDX, REGEBX, REGESP, REGEBP, REGESI, REGEDI,
  39. 0, 0, 0, 0, 0, 0,
  40. REGAX, REGCX, REGDX, REGBX, REGSP, REGBP, REGSI, REGDI,
  41. REGAL, REGCL, REGDL, REGBL, REGAH, REGCH, REGDH, REGBH };
  42. ULONG
  43. LocateEntryPoints(
  44. PINSTRUCTION InstructionStream,
  45. ULONG NumberOfInstructions
  46. )
  47. /*++
  48. Routine Description:
  49. This function scans the InstructionStream and marks instructions
  50. which begin entrypoint. An instruction begins an entrypoint if its
  51. EntryPoint field has a different value than the previous instruction's
  52. value. No instruction will have a NULL pointer.
  53. Note that in this pass, the EntryPoint field does *not* point to an
  54. ENTRYPOINT structure... it is only a marker.
  55. Arguments:
  56. IntelStart -- The intel address of the first instruction in the stream
  57. IntelStart -- The last byte of the last intel instruction in the stream
  58. Return Value:
  59. Count of EntryPoints located.
  60. --*/
  61. {
  62. ULONG i, j, intelDest;
  63. ULONG EntryPointCounter;
  64. ULONG IntelStart;
  65. ULONG IntelEnd;
  66. if (CompilerFlags & COMPFL_SLOW) {
  67. //
  68. // The compiler is supposed to generate slowmode code. Each
  69. // x86 instruction gets its own ENTRYPOINT
  70. //
  71. EntryPointCounter=1;
  72. for (i=0; i<NumberOfInstructions; i++) {
  73. //
  74. // Mark all instructions which don't correspond to 0-byte NOPs
  75. // following optimized instructions as starting EntryPoints.
  76. //
  77. if (InstructionStream[i].Size) {
  78. EntryPointCounter++;
  79. }
  80. InstructionStream[i].EntryPoint = (PENTRYPOINT)EntryPointCounter;
  81. }
  82. } else {
  83. //
  84. // Find all instructions which need Entrypoints.
  85. // Performance is O(n^2) in the worst case, although
  86. // it will be typically much closer to O(n)
  87. //
  88. // Instructions which mark the starts of Entrypoints have
  89. // their .EntryPoint pointer set to non-NULL. Instructions which
  90. // don't require entrypoints have it set to NULL;
  91. //
  92. IntelStart = InstructionStream[0].IntelAddress;
  93. IntelEnd = IntelStart +
  94. InstructionStream[NumberOfInstructions-1].IntelAddress +
  95. InstructionStream[NumberOfInstructions-1].Size;
  96. //
  97. // The first instruction always gets an entrypoint
  98. //
  99. InstructionStream[0].EntryPoint = (PENTRYPOINT)1;
  100. //
  101. // Visit each instruction in turn
  102. //
  103. for (i=0; i<NumberOfInstructions; i++) {
  104. if (((i+1) < NumberOfInstructions) &&
  105. (Fragments[InstructionStream[i].Operation].Flags & OPFL_END_NEXT_EP)) {
  106. //
  107. // This instruction marks the end of an Entrypoint. The next
  108. // instruction gets a new Entrypoint.
  109. //
  110. CPUASSERT(i < CpuInstructionLookahead-1 && i < NumberOfInstructions-1);
  111. InstructionStream[i+1].EntryPoint = (PENTRYPOINT)1;
  112. }
  113. // Now see if it is a direct control transfer instruction with a
  114. // destination that lies within this instruction stream. If it is,
  115. // we want to create an Entry Point at the destination so that the
  116. // control transfer will be compiled directly to the patched form,
  117. // and won't have to be patched later.
  118. //
  119. if (Fragments[InstructionStream[i].Operation].Flags & OPFL_CTRLTRNS) {
  120. //
  121. // The instruction is a direct control-transfer. If the
  122. // destination is within the InstructionStream, create an
  123. // Entrypoint at the destination.
  124. //
  125. if (InstructionStream[i].Operand1.Type == OPND_IMM ||
  126. InstructionStream[i].Operand1.Type == OPND_NOCODEGEN) {
  127. // Get the intel destination from the instruction structure.
  128. intelDest = InstructionStream[i].Operand1.Immed;
  129. } else {
  130. CPUASSERT(InstructionStream[i].Operand1.Type == OPND_ADDRREF );
  131. // A FAR instruction - Operand1 is a ptr to a SEL:OFFSET pair
  132. intelDest = *(UNALIGNED PULONG)(InstructionStream[i].Operand1.Immed);
  133. }
  134. // Get the intel destination from the instruction structure.
  135. // It is always an immediate with direct control transfers.
  136. if ((intelDest >= IntelStart) && (intelDest <= IntelEnd)) {
  137. //
  138. // Destination of the control-transfer is within the
  139. // instructionstream. Find the destination instruction.
  140. //
  141. if (intelDest > InstructionStream[i].IntelAddress) {
  142. //
  143. // The dest. address is at a higher address.
  144. //
  145. for (j=i+1; j<NumberOfInstructions; ++j) {
  146. if (InstructionStream[j].IntelAddress == intelDest) {
  147. break;
  148. }
  149. }
  150. } else {
  151. //
  152. // The dest. address is at a lower address.
  153. //
  154. for (j=i; j>0; --j) {
  155. if (InstructionStream[j].IntelAddress == intelDest) {
  156. break;
  157. }
  158. }
  159. }
  160. //
  161. // An exact match may not be found in the event that the
  162. // app is punning (either a real pun or the app is jumping
  163. // into the middle of an optimized instruction). In
  164. // either of the cases, defer entrypoint creation until
  165. // the branch is actually taken.
  166. //
  167. if (j >= 0 && j < NumberOfInstructions) {
  168. //
  169. // Exact match was found. Create an Entrypoint.
  170. //
  171. InstructionStream[j].EntryPoint = (PENTRYPOINT)1;
  172. }
  173. }
  174. } // if OPFL_CTRLTRNS
  175. } // for ()
  176. //
  177. // Convert the EntryPoint field from NULL/non-NULL to a unique
  178. // value for each range of instructions.
  179. //
  180. EntryPointCounter=1;
  181. i=0;
  182. while (i<NumberOfInstructions) {
  183. //
  184. // This instruction marks the beginning of a basic block
  185. //
  186. InstructionStream[i].EntryPoint = (PENTRYPOINT)EntryPointCounter;
  187. j=i+1;
  188. while (j < NumberOfInstructions) {
  189. if ((j >= NumberOfInstructions) ||
  190. (InstructionStream[j].Size && InstructionStream[j].EntryPoint)) {
  191. //
  192. // Either ran out of instructions, or encountered an instruction
  193. // which marks the start of the next basic block. Note that
  194. // 0-byte NOP instructions are not allowed to start basic blocks
  195. // as that violates the rules of OPT_ instructions.
  196. //
  197. break;
  198. }
  199. InstructionStream[j].EntryPoint = (PENTRYPOINT)EntryPointCounter;
  200. j++;
  201. }
  202. EntryPointCounter++;
  203. i = j;
  204. }
  205. } // if not COMPFL_SLOW
  206. //
  207. // At this point, EntryPointCounter holds the number of EntryPoints
  208. // plus one, because we started the counter at 1, not 0. Correct
  209. // that now.
  210. //
  211. EntryPointCounter--;
  212. return EntryPointCounter;
  213. }
  214. VOID
  215. UpdateRegs(
  216. PINSTRUCTION pInstr,
  217. POPERAND Operand
  218. )
  219. /*++
  220. Routine Description:
  221. Updates the list of registers referenced and/or modified based on the
  222. Operand.
  223. Arguments:
  224. pInstr -- the instruction to examine
  225. Operand -- the operand of the instruction to examine
  226. Return Value:
  227. return-value - none
  228. --*/
  229. {
  230. switch (Operand->Type) {
  231. case OPND_NOCODEGEN:
  232. case OPND_REGREF:
  233. if (Operand->Reg != NO_REG) {
  234. pInstr->RegsSet |= MapRegNumToRegBits[Operand->Reg];
  235. }
  236. break;
  237. case OPND_REGVALUE:
  238. if (Operand->Reg != NO_REG) {
  239. pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->Reg];
  240. }
  241. break;
  242. case OPND_ADDRREF:
  243. case OPND_ADDRVALUE8:
  244. case OPND_ADDRVALUE16:
  245. case OPND_ADDRVALUE32:
  246. if (Operand->Reg != NO_REG) {
  247. pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->Reg];
  248. }
  249. if (Operand->IndexReg != NO_REG) {
  250. pInstr->RegsNeeded |= MapRegNumToRegBits[Operand->IndexReg];
  251. }
  252. break;
  253. default:
  254. break;
  255. }
  256. }
  257. VOID
  258. CacheIntelRegs(
  259. PINSTRUCTION InstructionStream,
  260. ULONG numInstr)
  261. /*++
  262. Routine Description:
  263. This function deterimes what x86 registers, if any, can be cached in
  264. RISC preserved registers.
  265. Arguments:
  266. InstructionStream -- The instruction stream returned by the decoder
  267. numInstr -- The length of InstructionStream
  268. Return Value:
  269. return-value - none
  270. --*/
  271. {
  272. PINSTRUCTION pInstr;
  273. BYTE RegUsage[REGCOUNT];
  274. DWORD RegsToCache;
  275. int i;
  276. PENTRYPOINT PrevEntryPoint;
  277. //
  278. // Calculate the RegsSet and RegsNeeded for the bottommost instruction
  279. //
  280. pInstr = &InstructionStream[numInstr-1];
  281. pInstr->RegsSet = Fragments[pInstr->Operation].RegsSet;
  282. PrevEntryPoint = pInstr->EntryPoint;
  283. UpdateRegs(pInstr, &pInstr->Operand1);
  284. UpdateRegs(pInstr, &pInstr->Operand2);
  285. UpdateRegs(pInstr, &pInstr->Operand3);
  286. //
  287. // For each 32-bit register used as a parameter to this instruction,
  288. // set the usage count to 1.
  289. //
  290. for (i=0; i<REGCOUNT; ++i) {
  291. if (pInstr->RegsNeeded & (REGMASK<<(REGSHIFT*i))) {
  292. RegUsage[i] = 1;
  293. } else {
  294. RegUsage[i] = 0;
  295. }
  296. }
  297. //
  298. // Loop over instruction stream from bottom to top, starting at the
  299. // second-to-last instruction
  300. //
  301. for (pInstr--; pInstr >= InstructionStream; pInstr--) {
  302. //
  303. // Calculate the RegsSet and RegsNeeded values for this instruction
  304. //
  305. pInstr->RegsSet = Fragments[pInstr->Operation].RegsSet;
  306. UpdateRegs(pInstr, &pInstr->Operand1);
  307. UpdateRegs(pInstr, &pInstr->Operand2);
  308. UpdateRegs(pInstr, &pInstr->Operand3);
  309. RegsToCache = 0;
  310. if (PrevEntryPoint != pInstr->EntryPoint) {
  311. //
  312. // The current instruction marks the end of an Entrypoint.
  313. //
  314. PrevEntryPoint = pInstr->EntryPoint;
  315. //
  316. // For all x86 registers which have been read more than once
  317. // but not modified in the basic block, load them into the
  318. // cache before executing the first instruction in the basic
  319. // block.
  320. //
  321. for (i=0; i<REGCOUNT; ++i) {
  322. if (RegUsage[i] > 1) {
  323. RegsToCache |= (REGMASK<<(REGSHIFT*i));
  324. }
  325. }
  326. //
  327. // Reset the RegUsage[] array to indicate no registers are
  328. // cached.
  329. //
  330. RtlZeroMemory(RegUsage, REGCOUNT);
  331. } else {
  332. //
  333. // For each 32-bit x86 register modified by this instruction,
  334. // update the caching info.
  335. //
  336. for (i=0; i<REGCOUNT; ++i) {
  337. DWORD RegBits = pInstr->RegsSet & (REGMASK<<(REGSHIFT*i));
  338. if (RegBits) {
  339. //
  340. // The ith 32-bit x86 register has been modified by this
  341. // instruction
  342. //
  343. if (RegUsage[i] > 1) {
  344. //
  345. // There is more than one consumer of the modified
  346. // value so it is worth caching.
  347. //
  348. RegsToCache |= RegBits;
  349. }
  350. //
  351. // Since this x86 register was dirtied by this instruction,
  352. // it usage count must be reset to 0.
  353. //
  354. RegUsage[i] = 0;
  355. }
  356. }
  357. }
  358. //
  359. // Update the list of x86 registers which can be loaded into
  360. // cache registers before the next instruction executes.
  361. //
  362. pInstr[1].RegsToCache |= RegsToCache;
  363. //
  364. // For each 32-bit register used as a parameter to this instruction,
  365. // bump the usage count.
  366. //
  367. for (i=0; i<REGCOUNT; ++i) {
  368. if (pInstr->RegsNeeded & (REGMASK<<(REGSHIFT*i))) {
  369. RegUsage[i]++;
  370. }
  371. }
  372. }
  373. }
  374. VOID
  375. OptimizeInstructionStream(
  376. PINSTRUCTION IS,
  377. ULONG numInstr
  378. )
  379. /*++
  380. Routine Description:
  381. This function performs various optimization on the instruction stream
  382. retured by the decoder.
  383. Arguments:
  384. IS -- The instruction stream returned by the decoder
  385. numInstr -- The length of IS
  386. Return Value:
  387. return-value - none
  388. --*/
  389. {
  390. ULONG i;
  391. CPUASSERTMSG(numInstr, "Cannot optimize 0-length instruction stream");
  392. //
  393. // Pass 1: Optimize x86 instruction stream, replacing single x86
  394. // instructions with special-case instructions, and replacing
  395. // multiple x86 instructions with single special-case OPT_
  396. // instructions
  397. //
  398. for (i=0; i<numInstr; ++i) {
  399. switch (IS[i].Operation) {
  400. case OP_Push32:
  401. if (i < numInstr-2
  402. && IS[i].Operand1.Type == OPND_REGVALUE){
  403. if (IS[i].Operand1.Reg == GP_EBP) {
  404. // OP_OPT_SetupStack --
  405. // push ebp
  406. // mov ebp, esp
  407. // sub esp, x
  408. if ((IS[i+1].Operation == OP_Mov32) &&
  409. (IS[i+1].Operand1.Type == OPND_REGREF) &&
  410. (IS[i+1].Operand1.Reg == GP_EBP) &&
  411. (IS[i+1].Operand2.Type == OPND_REGVALUE) &&
  412. (IS[i+1].Operand2.Reg == GP_ESP) &&
  413. (IS[i+2].Operation == OP_Sub32) &&
  414. (IS[i+2].Operand1.Type == OPND_REGREF) &&
  415. (IS[i+2].Operand1.Reg == GP_ESP) &&
  416. (IS[i+2].Operand2.Type == OPND_IMM)){
  417. IS[i].Operation = OP_OPT_SetupStack;
  418. IS[i].Operand1.Type = OPND_IMM;
  419. IS[i].Operand1.Immed = IS[i+2].Operand2.Immed;
  420. IS[i].Size += IS[i+1].Size + IS[i+2].Size;
  421. IS[i].Operand2.Type = OPND_NONE;
  422. IS[i+1].Operation = OP_Nop;
  423. IS[i+1].Operand1.Type = OPND_NONE;
  424. IS[i+1].Operand2.Type = OPND_NONE;
  425. IS[i+1].Size = 0;
  426. IS[i+2].Operation = OP_Nop;
  427. IS[i+2].Operand1.Type = OPND_NONE;
  428. IS[i+2].Operand2.Type = OPND_NONE;
  429. IS[i+2].Size = 0;
  430. i+=2;
  431. break;
  432. }
  433. } else if (IS[i].Operand1.Reg == GP_EBX) {
  434. // OP_OPT_PushEbxEsiEdi --
  435. // push ebx
  436. // push esi
  437. // push edi
  438. if ((IS[i+1].Operation == OP_Push32) &&
  439. (IS[i+1].Operand1.Type == OPND_REGVALUE) &&
  440. (IS[i+1].Operand1.Reg == GP_ESI) &&
  441. (IS[i+2].Operation == OP_Push32) &&
  442. (IS[i+2].Operand1.Type == OPND_REGVALUE) &&
  443. (IS[i+2].Operand1.Reg == GP_EDI)){
  444. IS[i].Operation = OP_OPT_PushEbxEsiEdi;
  445. IS[i].Size += IS[i+1].Size + IS[i+2].Size;
  446. IS[i].Operand1.Type = OPND_NONE;
  447. IS[i].Operand2.Type = OPND_NONE;
  448. IS[i+1].Operation = OP_Nop;
  449. IS[i+1].Operand1.Type = OPND_NONE;
  450. IS[i+1].Operand2.Type = OPND_NONE;
  451. IS[i+1].Size = 0;
  452. IS[i+2].Operation = OP_Nop;
  453. IS[i+2].Operand1.Type = OPND_NONE;
  454. IS[i+2].Operand2.Type = OPND_NONE;
  455. IS[i+2].Size = 0;
  456. i+=2;
  457. break;
  458. }
  459. }
  460. }
  461. //
  462. // It is not one of the other special PUSH sequences, so see
  463. // if there are two consecutive PUSHes to merge together. Note:
  464. // If the second PUSH references ESP, the two cannot be merged
  465. // because the value is computed before 4 is subtracted from ESP.
  466. // ie. the following is disallowed:
  467. // PUSH EAX
  468. // PUSH ESP ; second operand to Push2 would have been
  469. // ; built before the PUSH EAX was executed.
  470. //
  471. if (i < numInstr-1 &&
  472. !IS[i].FsOverride &&
  473. !IS[i+1].FsOverride &&
  474. IS[i+1].Operation == OP_Push32 &&
  475. IS[i+1].Operand1.Reg != GP_ESP &&
  476. IS[i+1].Operand1.IndexReg != GP_ESP) {
  477. IS[i].Operation = OP_OPT_Push232;
  478. IS[i].Operand2 = IS[i+1].Operand1;
  479. IS[i].Size += IS[i+1].Size;
  480. IS[i+1].Operation = OP_Nop;
  481. IS[i+1].Operand1.Type = OPND_NONE;
  482. IS[i+1].Size = 0;
  483. i++;
  484. }
  485. break;
  486. case OP_Pop32:
  487. // OP_OPT_PopEdiEsiEbx
  488. // pop edi
  489. // pop esi
  490. // pop ebx
  491. if (i < numInstr-2 &&
  492. (IS[i].Operand1.Type == OPND_REGREF) &&
  493. (IS[i].Operand1.Reg == GP_EDI) &&
  494. (IS[i+1].Operation == OP_Pop32) &&
  495. (IS[i+1].Operand1.Type == OPND_REGREF) &&
  496. (IS[i+1].Operand1.Reg == GP_ESI) &&
  497. (IS[i+2].Operation == OP_Pop32) &&
  498. (IS[i+2].Operand1.Type == OPND_REGREF) &&
  499. (IS[i+2].Operand1.Reg == GP_EBX)){
  500. IS[i].Operation = OP_OPT_PopEdiEsiEbx;
  501. IS[i].Size += IS[i+1].Size + IS[i+2].Size;
  502. IS[i].Operand1.Type = OPND_NONE;
  503. IS[i].Operand2.Type = OPND_NONE;
  504. IS[i+1].Operation = OP_Nop;
  505. IS[i+1].Operand1.Type = OPND_NONE;
  506. IS[i+1].Operand2.Type = OPND_NONE;
  507. IS[i+1].Size = 0;
  508. IS[i+2].Operation = OP_Nop;
  509. IS[i+2].Operand1.Type = OPND_NONE;
  510. IS[i+2].Operand2.Type = OPND_NONE;
  511. IS[i+2].Size = 0;
  512. i+=2;
  513. } else if (i < numInstr-1 &&
  514. !IS[i].FsOverride &&
  515. !IS[i].FsOverride &&
  516. IS[i].Operand1.Type == OPND_REGREF &&
  517. IS[i+1].Operation == OP_Pop32 &&
  518. IS[i+1].Operand1.Type == OPND_REGREF) {
  519. // Fold the two POPs together. Both operands are REGREF,
  520. // so there is no problem with interdependencies between
  521. // memory touched by the first POP modifying the address
  522. // of the second POP. ie. the following is not merged:
  523. // POP EAX
  524. // POP [EAX] ; depends on results of first POP
  525. IS[i].Operation = OP_OPT_Pop232;
  526. IS[i].Operand2 = IS[i+1].Operand1;
  527. IS[i].Size += IS[i+1].Size;
  528. IS[i+1].Operation = OP_Nop;
  529. IS[i+1].Operand1.Type = OPND_NONE;
  530. IS[i+1].Size = 0;
  531. i++;
  532. }
  533. break;
  534. case OP_Xor32:
  535. case OP_Sub32:
  536. if (IS[i].Operand1.Type == OPND_REGREF &&
  537. IS[i].Operand2.Type == OPND_REGVALUE &&
  538. IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
  539. // Instruction is XOR samereg, samereg (ie. XOR EAX, EAX),
  540. // or SUB samereg, samereg (ie. SUB ECX, ECX).
  541. // Emit OP_OPT_ZERO32 samereg
  542. IS[i].Operand2.Type = OPND_NONE;
  543. IS[i].Operation = OP_OPT_ZERO32;
  544. }
  545. break;
  546. case OP_Test8:
  547. if (IS[i].Operand1.Type == OPND_REGVALUE &&
  548. IS[i].Operand2.Type == OPND_REGVALUE &&
  549. IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
  550. // Instruction is TEST samereg, samereg (ie. TEST EAX, EAX)
  551. // Emit OP_OPT_FastTest8/16/32
  552. IS[i].Operand1.Type = OPND_REGVALUE;
  553. IS[i].Operand2.Type = OPND_NONE;
  554. IS[i].Operation = OP_OPT_FastTest8;
  555. }
  556. break;
  557. case OP_Test16:
  558. if (IS[i].Operand1.Type == OPND_REGVALUE &&
  559. IS[i].Operand2.Type == OPND_REGVALUE &&
  560. IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
  561. // Instruction is TEST samereg, samereg (ie. TEST EAX, EAX)
  562. // Emit OP_OPT_FastTest8/16/32
  563. IS[i].Operand1.Type = OPND_REGVALUE;
  564. IS[i].Operand2.Type = OPND_NONE;
  565. IS[i].Operation = OP_OPT_FastTest16;
  566. }
  567. break;
  568. case OP_Test32:
  569. if (IS[i].Operand1.Type == OPND_REGVALUE &&
  570. IS[i].Operand2.Type == OPND_REGVALUE &&
  571. IS[i].Operand1.Reg == IS[i].Operand2.Reg) {
  572. // Instruction is TEST samereg, samereg (ie. TEST EAX, EAX)
  573. // Emit OP_OPT_FastTest8/16/32
  574. IS[i].Operand1.Type = OPND_REGVALUE;
  575. IS[i].Operand2.Type = OPND_NONE;
  576. IS[i].Operation = OP_OPT_FastTest32;
  577. }
  578. break;
  579. case OP_Cmp32:
  580. if (i<numInstr+1 && IS[i+1].Operation == OP_Sbb32 &&
  581. IS[i+1].Operand1.Type == OPND_REGREF &&
  582. IS[i+1].Operand2.Type == OPND_REGVALUE &&
  583. IS[i+1].Operand1.Reg == IS[i+1].Operand2.Reg) {
  584. // The two instructions are:
  585. // CMP anything1, anything2
  586. // SBB samereg, samereg
  587. // The optimized instruction is:
  588. // Operation = either CmpSbb32 or CmpSbbNeg32
  589. // Operand1 = &samereg (passed as REGREF)
  590. // Operand2 = anything1 (passed as ADDRVAL32 or REGVAL)
  591. // Operand3 = anything2 (passed as ADDRVAL32 or REGVAL)
  592. IS[i].Operand3 = IS[i].Operand2;
  593. IS[i].Operand2 = IS[i].Operand1;
  594. IS[i].Operand1 = IS[i+1].Operand1;
  595. if (i<numInstr+2 && IS[i+2].Operation == OP_Neg32 &&
  596. IS[i+2].Operand1.Type == OPND_REGREF &&
  597. IS[i+2].Operand1.Reg == IS[i+1].Operand1.Reg) {
  598. // The third instruction is NEG samereg, samereg
  599. IS[i].Operation = OP_OPT_CmpSbbNeg32;
  600. IS[i+2].Operation = OP_Nop;
  601. IS[i+2].Operand1.Type = OPND_NONE;
  602. IS[i+2].Operand2.Type = OPND_NONE;
  603. IS[i+2].Size = 0;
  604. } else {
  605. IS[i].Operation = OP_OPT_CmpSbb32;
  606. }
  607. IS[i+1].Operation = OP_Nop;
  608. IS[i+1].Operand1.Type = OPND_NONE;
  609. IS[i+1].Operand2.Type = OPND_NONE;
  610. IS[i+1].Size = 0;
  611. i++;
  612. }
  613. break;
  614. case OP_Cwd16:
  615. if (i<numInstr+1 && IS[i+1].Operation == OP_Idiv16) {
  616. IS[i].Operation = OP_OPT_CwdIdiv16;
  617. IS[i].Operand1 = IS[i+1].Operand1;
  618. IS[i].Size += IS[i+1].Size;
  619. IS[i+1].Operation = OP_Nop;
  620. IS[i+1].Operand1.Type = OPND_NONE;
  621. IS[i+1].Size = 0;
  622. i++;
  623. }
  624. break;
  625. case OP_Cwd32:
  626. if (i<numInstr+1 && IS[i+1].Operation == OP_Idiv32) {
  627. IS[i].Operation = OP_OPT_CwdIdiv32;
  628. IS[i].Operand1 = IS[i+1].Operand1;
  629. IS[i].Size += IS[i+1].Size;
  630. IS[i+1].Operation = OP_Nop;
  631. IS[i+1].Operand1.Type = OPND_NONE;
  632. IS[i+1].Size = 0;
  633. i++;
  634. }
  635. break;
  636. case OP_FP_FNSTSW:
  637. if (i<numInstr+1 && IS[i+1].Operation == OP_Sahf &&
  638. IS[i].Operand1.Type == OPND_REGREF &&
  639. IS[i].Operand1.Reg == GP_AX) {
  640. // Replace FNSTSW AX / SAHF by one instruction
  641. IS[i].Operation = OP_OPT_FNSTSWAxSahf;
  642. IS[i].Operand1.Type = OPND_NONE;
  643. IS[i].Size += IS[i+1].Size;
  644. IS[i+1].Operation = OP_Nop;
  645. IS[i+1].Size = 0;
  646. i++;
  647. }
  648. break;
  649. case OP_FP_FSTP_STi:
  650. if (IS[i].Operand1.Immed == 0) {
  651. IS[i].Operand1.Type = OPND_NONE;
  652. IS[i].Operation = OP_OPT_FSTP_ST0;
  653. }
  654. break;
  655. }
  656. }
  657. }
  658. VOID
  659. OptimizeIntelFlags(
  660. PINSTRUCTION IS,
  661. ULONG numInstr
  662. )
  663. /*++
  664. Routine Description:
  665. This function analysis x86 flag register usage and switches instructions
  666. to use NoFlags versions if possible.
  667. Arguments:
  668. IS -- The instruction stream returned by the decoder
  669. numInstr -- The length of IS
  670. Return Value:
  671. return-value - none
  672. --*/
  673. {
  674. USHORT FlagsNeeded; // flags required to execute current x86 instr
  675. USHORT FlagsToGenerate; // flags which current x86 instr must generate
  676. PFRAGDESCR pFragDesc; // ptr to Fragments[] array for current instr
  677. ULONG i; // instruction index
  678. BOOL fPassNeeded = TRUE;// TRUE if the outer loop needs to loop once more
  679. ULONG PassNumber = 0; // number of times outer loop has looped
  680. PENTRYPOINT pEPDest; // Entrypoint for destination of a ctrl transfer
  681. USHORT KnownFlagsNeeded[MAX_INSTR_COUNT]; // flags needed for each instr
  682. while (fPassNeeded) {
  683. //
  684. // This loop is executed at most two times. The second pass is only
  685. // required if there is a control-transfer instruction whose
  686. // destination is within the Instruction Stream and at a lower
  687. // Intel address (ie. a backwards JMP).
  688. //
  689. fPassNeeded = FALSE;
  690. PassNumber++;
  691. CPUASSERT(PassNumber <= 2);
  692. //
  693. // Iterate over all x86 instructions decoded, from bottom to top,
  694. // propagating flags info up. Start off by assuming all x86 flags
  695. // must be up-to-date at the end of the last basic block.
  696. //
  697. FlagsNeeded = ALLFLAGS;
  698. i = numInstr;
  699. do {
  700. i--;
  701. pFragDesc = &Fragments[IS[i].Operation];
  702. //
  703. // Calculate what flags will need to be computed by this
  704. // instruction and ones before this.
  705. //
  706. KnownFlagsNeeded[i] = FlagsNeeded | pFragDesc->FlagsNeeded;
  707. FlagsToGenerate = FlagsNeeded & pFragDesc->FlagsSet;
  708. //
  709. // Calculate what flags this instruction will need to have
  710. // computed before it can be executed.
  711. //
  712. FlagsNeeded = (FlagsNeeded & ~FlagsToGenerate) |
  713. pFragDesc->FlagsNeeded;
  714. if (pFragDesc->Flags & OPFL_CTRLTRNS) {
  715. ULONG IntelDest = IS[i].Operand1.Immed;
  716. //
  717. // For control-transfer instructions, FlagsNeeded also includes
  718. // the flags required for the destination of the transfer.
  719. //
  720. if (IS[0].IntelAddress <= IntelDest &&
  721. i > 0 && IS[i-1].IntelAddress >= IntelDest) {
  722. //
  723. // The destination of the control-transfer is at a lower
  724. // address in the Instruction Stream.
  725. //
  726. if (PassNumber == 1) {
  727. //
  728. // Need to make a second pass over the flags
  729. // optimizations in order to determine what flags are
  730. // needed for the destination address.
  731. //
  732. fPassNeeded = TRUE;
  733. FlagsNeeded = ALLFLAGS; // assume all flags are needed
  734. } else {
  735. ULONG j;
  736. USHORT NewFlagsNeeded;
  737. //
  738. // Search for the IntelDest within the Instruction
  739. // Stream. IntelDest may not be found if there is
  740. // a pun.
  741. //
  742. NewFlagsNeeded = ALLFLAGS; // assume there is a pun
  743. for (j=0; j < i; ++j) {
  744. if (IS[j].IntelAddress == IntelDest) {
  745. NewFlagsNeeded = KnownFlagsNeeded[j];
  746. break;
  747. }
  748. }
  749. FlagsNeeded |= NewFlagsNeeded;
  750. }
  751. } else if (IS[i+1].IntelAddress <= IntelDest &&
  752. IntelDest <= IS[numInstr-1].IntelAddress) {
  753. //
  754. // The destination of the control-transfer is at a higher
  755. // address in the Instruction Stream. Pick up the
  756. // already-computed FlagsNeeded for the destination.
  757. //
  758. ULONG j;
  759. USHORT NewFlagsNeeded = ALLFLAGS; // assume a pun
  760. for (j=i+1; j < numInstr; ++j) {
  761. if (IS[j].IntelAddress == IntelDest) {
  762. NewFlagsNeeded = KnownFlagsNeeded[j];
  763. break;
  764. }
  765. }
  766. FlagsNeeded |= NewFlagsNeeded;
  767. } else {
  768. //
  769. // Destination of the control-transfer is unknown. Assume
  770. // the worst: all flags are required.
  771. //
  772. FlagsNeeded = ALLFLAGS;
  773. }
  774. }
  775. if (!(FlagsToGenerate & pFragDesc->FlagsSet) &&
  776. (pFragDesc->Flags & OPFL_HASNOFLAGS)) {
  777. //
  778. // This instruction is not required to generate any flags, and
  779. // it has a NOFLAGS version. Update the flags that need to be
  780. // computed by instructions before this one, and modify the
  781. // Operation number to point at the NoFlags fragment.
  782. //
  783. FlagsToGenerate &= pFragDesc->FlagsSet;
  784. if (pFragDesc->Flags & OPFL_ALIGN) {
  785. IS[i].Operation += 2;
  786. } else {
  787. IS[i].Operation ++;
  788. }
  789. if (IS[i].Operation == OP_OPT_ZERONoFlags32) {
  790. //
  791. // Special-case this to be a "mov [value], zero" so it is
  792. // inlined.
  793. //
  794. IS[i].Operation = OP_Mov32;
  795. IS[i].Operand2.Type = OPND_IMM;
  796. IS[i].Operand2.Immed = 0;
  797. }
  798. }
  799. } while (i);
  800. }
  801. }
  802. VOID
  803. DetermineEbpAlignment(
  804. PINSTRUCTION InstructionStream,
  805. ULONG numInstr
  806. )
  807. /*++
  808. Routine Description:
  809. For each instruction in InstructionStream[], sets Instruction->EbpAligned
  810. based on whether EBP is assumed to be DWORD-aligned or not. EBP is
  811. assumed to be DWORD-aligned if a "MOV EBP, ESP" instruction is seen, and
  812. it is assumed to become unaligned at the first instruction which is
  813. flagged as modifying EBP.
  814. Arguments:
  815. InstructionStream -- The instruction stream returned by the decoder
  816. numInstr -- The length of InstructionStream
  817. Return Value:
  818. return-value - none
  819. --*/
  820. {
  821. ULONG i;
  822. BOOL EbpAligned = FALSE;
  823. for (i=0; i<numInstr; ++i) {
  824. if (InstructionStream[i].RegsSet & REGEBP) {
  825. //
  826. // This instruction modified EBP
  827. //
  828. if (InstructionStream[i].Operation == OP_OPT_SetupStack ||
  829. InstructionStream[i].Operation == OP_OPT_SetupStackNoFlags ||
  830. (InstructionStream[i].Operation == OP_Mov32 &&
  831. InstructionStream[i].Operand2.Type == OPND_REGVALUE &&
  832. InstructionStream[i].Operand2.Reg == GP_ESP)) {
  833. //
  834. // The instruction is either "MOV EBP, ESP" or one of the
  835. // SetupStack fragments (which contains a "MOV EBP, ESP")
  836. // assume Ebp is aligned from now on.
  837. //
  838. EbpAligned = TRUE;
  839. } else {
  840. EbpAligned = FALSE;
  841. }
  842. }
  843. InstructionStream[i].EbpAligned = EbpAligned;
  844. }
  845. }
  846. ULONG
  847. GetInstructionStream(
  848. PINSTRUCTION InstructionStream,
  849. PULONG NumberOfInstructions,
  850. PVOID pIntelInstruction,
  851. PVOID pLastIntelInstruction
  852. )
  853. /*++
  854. Routine Description:
  855. Returns an instruction stream to the compiler. The instruction
  856. stream is terminated either when the buffer is full, or when
  857. we reach a control transfer instruction.
  858. Arguments:
  859. InstructionStream -- A pointer to the buffer where the decoded
  860. instructions are stored.
  861. NumberOfInstructions -- Upon entry, this variable contains the
  862. maximal number of instructions the buffer can hold. When
  863. returning, it contains the actual number of instructions
  864. decoded.
  865. pIntelInstruction -- A pointer to the first real intel instruction
  866. to be decoded.
  867. pLastIntelInstruction -- A pointer to the last intel instruction to be
  868. compiled, 0xffffffff if not used.
  869. Return Value:
  870. Number of entrypoints required to describe the decoded instruction
  871. stream.
  872. --*/
  873. {
  874. ULONG numInstr=0;
  875. ULONG maxBufferSize;
  876. ULONG cEntryPoints;
  877. maxBufferSize = (*NumberOfInstructions);
  878. //
  879. // Zero-fill the InstructionStream. The decoder depends on this.
  880. //
  881. RtlZeroMemory(InstructionStream, maxBufferSize*sizeof(INSTRUCTION));
  882. #if DBG
  883. //
  884. // Do a little analysis on the address we're about to decode. If
  885. // the address is part of a non-x86 image, log that to the debugger.
  886. // That probably indicates a thunking problem. If the address is not
  887. // part of an image, warn that the app is running generated code.
  888. //
  889. try {
  890. USHORT Instr;
  891. //
  892. // Try to read the instruction about to be executed. If we get
  893. // an access violation, use 0 as the value of the instruction.
  894. //
  895. Instr = 0;
  896. //
  897. // Ignore BOP instructions - we assume we know what's going on with
  898. // them.
  899. //
  900. if (Instr != 0xc4c4) {
  901. NTSTATUS st;
  902. MEMORY_BASIC_INFORMATION mbi;
  903. st = NtQueryVirtualMemory(NtCurrentProcess(),
  904. pIntelInstruction,
  905. MemoryBasicInformation,
  906. &mbi,
  907. sizeof(mbi),
  908. NULL);
  909. if (NT_SUCCESS(st)) {
  910. PIMAGE_NT_HEADERS Headers;
  911. Headers = RtlImageNtHeader(mbi.AllocationBase);
  912. if (!Headers || Headers->FileHeader.Machine != IMAGE_FILE_MACHINE_I386) {
  913. LOGPRINT((TRACELOG, "CPU Analysis warning: jumping from Intel to non-intel code at 0x%X\r\n", pIntelInstruction));
  914. }
  915. } else {
  916. // Eip isn't pointing anywhere???
  917. }
  918. }
  919. } except(EXCEPTION_EXECUTE_HANDLER) {
  920. ;
  921. }
  922. #endif //DBG
  923. while (numInstr < maxBufferSize) {
  924. DecodeInstruction ((DWORD) (ULONGLONG)pIntelInstruction, InstructionStream+numInstr);
  925. if ((STOP_DECODING(InstructionStream[numInstr])) ||
  926. (pIntelInstruction >= pLastIntelInstruction)) {
  927. // We reached a control transfer instruction
  928. numInstr++;
  929. (*NumberOfInstructions) = numInstr;
  930. break; // SUCCESS
  931. }
  932. pIntelInstruction = (PVOID) ((ULONGLONG)pIntelInstruction + (InstructionStream+numInstr)->Size);
  933. numInstr++;
  934. }
  935. //
  936. // Optimize x86 code by merging x86 instructions into meta-instructions
  937. // and cleaning up special x86 idioms.
  938. //
  939. if (!(CompilerFlags & COMPFL_SLOW)) {
  940. OptimizeInstructionStream (InstructionStream, numInstr);
  941. }
  942. //
  943. // Determine where all basic blocks are by filling in the EntryPoint
  944. // field in each instruction. This must be done after
  945. // OptimizeInstructionStream() runs so that EntryPoints don't fall
  946. // into the middle of meta-instructions.
  947. //
  948. cEntryPoints = LocateEntryPoints(InstructionStream, numInstr);
  949. //
  950. // Perform optimizations which require knowledge of EntryPoints
  951. //
  952. if (numInstr > 2 && !(CompilerFlags & COMPFL_SLOW)) {
  953. if (!CpuDisableNoFlags) {
  954. OptimizeIntelFlags(InstructionStream, numInstr);
  955. }
  956. if (!CpuDisableRegCache) {
  957. CacheIntelRegs(InstructionStream, numInstr);
  958. }
  959. if (!CpuDisableEbpAlign) {
  960. DetermineEbpAlignment(InstructionStream, numInstr);
  961. }
  962. }
  963. return cEntryPoints;
  964. }