Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

306 lines
7.0 KiB

  1. /*++
  2. Copyright (c) 1995-1998 Microsoft Corporation
  3. Module Name:
  4. optfrag.c
  5. Abstract:
  6. Instruction Fragments which correspond to optimizations.
  7. Author:
  8. 6-July-1995 Ori Gershony (t-orig)
  9. Revision History:
  10. 24-Aug-1999 [askhalid] copied from 32-bit wx86 directory and make work for 64bit.
  11. --*/
  12. #include <nt.h>
  13. #include <ntrtl.h>
  14. #include <nturtl.h>
  15. #include <windows.h>
  16. #include <stdio.h>
  17. #include "cpuassrt.h"
  18. #include "fragp.h"
  19. #include "optfrag.h"
  20. ASSERTNAME;
  21. // This fragment corresponds to:
  22. // push ebx
  23. // push esi
  24. // push edi
  25. FRAG0(OPT_PushEbxEsiEdiFrag)
  26. {
  27. ULONG *espval;
  28. espval=(ULONG *)esp;
  29. *(espval-1) = ebx;
  30. *(espval-2) = esi;
  31. *(espval-3) = edi;
  32. esp=(ULONG)(LONGLONG)espval-12;
  33. }
  34. // This fragment corresponds to:
  35. // pop edi
  36. // pop esi
  37. // pop ebx
  38. FRAG0(OPT_PopEdiEsiEbxFrag)
  39. {
  40. ULONG *espval;
  41. espval=(ULONG *)esp;
  42. edi=*espval;
  43. esi=*(espval+1);
  44. ebx=*(espval+2);
  45. esp=(ULONG)(LONGLONG)espval+12;
  46. }
  47. // This fragment corresponds to:
  48. // push ebp
  49. // mov ebp,esp
  50. // sub esp, op1
  51. FRAG1IMM(OPT_SetupStackFrag, ULONG)
  52. {
  53. ULONG result, oldespminusfour;
  54. oldespminusfour = esp-4;
  55. result = oldespminusfour - op1;
  56. *(ULONG *)oldespminusfour = ebp;
  57. ebp = oldespminusfour;
  58. esp = result;
  59. SET_FLAGS_SUB32(result, oldespminusfour, op1, 0x80000000);
  60. }
  61. FRAG1IMM(OPT_SetupStackNoFlagsFrag, ULONG)
  62. {
  63. ULONG result, oldespminusfour;
  64. oldespminusfour = esp-4;
  65. result = oldespminusfour - op1;
  66. *(ULONG *)oldespminusfour = ebp;
  67. ebp = oldespminusfour;
  68. esp = result;
  69. }
  70. FRAG1(OPT_ZEROFrag32, LONG)
  71. {
  72. // implements: XOR samereg, samereg
  73. // SUB samereg, samereg
  74. // ie. XOR EAX, EAX or SUB ECX, ECX
  75. *pop1 = 0;
  76. SET_CFLAG_OFF;
  77. SET_OFLAG_OFF;
  78. SET_SFLAG_OFF;
  79. SET_ZFLAG(0);
  80. SET_PFLAG(0);
  81. SET_AUXFLAG(0);
  82. }
  83. FRAG1(OPT_ZERONoFlagsFrag32, LONG)
  84. {
  85. // implements: XOR samereg, samereg
  86. // SUB samereg, samereg
  87. // ie. XOR EAX, EAX or SUB ECX, ECX
  88. *pop1 = 0;
  89. }
  90. FRAG3(OPT_CmpSbbFrag32, ULONG, ULONG, ULONG)
  91. {
  92. ULONG result;
  93. ULONG cf;
  94. //
  95. // implements: CMP op2, op3
  96. // SBB op1, op1
  97. //
  98. result = op2-op3;
  99. cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
  100. result = (ULONG)-(LONG)(cf >> 31);
  101. *pop1 = result; // pop1 is a pointer to a reg, so always aligned
  102. SET_OFLAG_OFF;
  103. SET_CFLAG(result);
  104. SET_SFLAG(result);
  105. SET_ZFLAG(result);
  106. SET_AUXFLAG(result);
  107. SET_PFLAG(result);
  108. }
  109. FRAG3(OPT_CmpSbbNoFlagsFrag32, ULONG, ULONG, ULONG)
  110. {
  111. ULONG result;
  112. ULONG cf;
  113. //
  114. // implements: CMP op2, op3
  115. // SBB op1, op1
  116. //
  117. result = op2-op3;
  118. cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
  119. *pop1 = (ULONG)-(LONG)(cf >> 31);
  120. }
  121. FRAG3(OPT_CmpSbbNegFrag32, ULONG, ULONG, ULONG)
  122. {
  123. ULONG result;
  124. ULONG cf;
  125. //
  126. // implements: CMP op2, op3
  127. // SBB op1, op1
  128. // NEG op1
  129. //
  130. result = op2-op3;
  131. cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
  132. // pop1 is a pointer to a reg, so it is always aligned
  133. if (cf >= 0x80000000) {
  134. result = 1;
  135. *pop1 = result; // store the result before updating flags
  136. SET_CFLAG_ON; // set if result != 0
  137. SET_AUXFLAG(0xfe); // this is (BYTE)(0xffffffff ^ 0x00000001)
  138. } else {
  139. result = 0;
  140. *pop1 = result; // store the result before updating flags
  141. SET_CFLAG_OFF; // cleared if result==0
  142. SET_AUXFLAG(0); // this is (BYTE)(0x0 ^ 0x0)
  143. SET_OFLAG_OFF; // this is (0x0 & 0x0) << 31
  144. }
  145. SET_ZFLAG(result);
  146. SET_PFLAG(result);
  147. SET_SFLAG_OFF;
  148. SET_OFLAG_OFF; // this is either (0xffffffff & 0x00000001) or (0 & 0)
  149. }
  150. FRAG3(OPT_CmpSbbNegNoFlagsFrag32, ULONG, ULONG, ULONG)
  151. {
  152. ULONG result;
  153. ULONG cf;
  154. //
  155. // implements: CMP op2, op3
  156. // SBB op1, op1
  157. // NEG op1
  158. //
  159. result = op2-op3;
  160. cf = (op2 ^ op3 ^ result) ^ ((op2 ^ op3) & (op2 ^ result));
  161. // result is 1 if high bit of cf is set, 0 if high bit is clear
  162. *pop1 = cf >> 31;
  163. }
  164. FRAG2IMM(OPT_Push2Frag32, ULONG, ULONG)
  165. {
  166. //
  167. // implements: PUSH op1
  168. // PUSH op2
  169. // Note that the analysis phase must ensure that the value of op2 does
  170. // not depend on the value of ESP, as op2 will be computed before the
  171. // first PUSH is excuted.
  172. //
  173. PUSH_LONG(op1);
  174. PUSH_LONG(op2);
  175. }
  176. FRAG2REF(OPT_Pop2Frag32, ULONG)
  177. {
  178. //
  179. // implements: POP pop1
  180. // POP pop2
  181. //
  182. // Note that the analysis phase must ensure that the value of pop2 does
  183. // not depend on the value of pop1, as pop1 will not have been popped
  184. // when the value of pop2 is computed.
  185. //
  186. POP_LONG(*pop1);
  187. POP_LONG(*pop2);
  188. }
  189. FRAG1(OPT_CwdIdivFrag16, USHORT)
  190. {
  191. short op1;
  192. short result;
  193. //
  194. // implements: CWD
  195. // IDIV EAX, *pop1
  196. // The CWD sign-extends EAX into EDX:EAX, which means, we can
  197. // avoid a 64-bit division and just divide EAX. There is no
  198. // possibility of overflow.
  199. //
  200. op1 = (short)GET_SHORT(pop1);
  201. // Must do the divide before modifying edx, in case op1==0 and we fault.
  202. result = (short)ax / op1;
  203. dx = (short)ax % op1;
  204. ax = result;
  205. }
  206. FRAG1(OPT_CwdIdivFrag16A, USHORT)
  207. {
  208. short op1;
  209. short result;
  210. //
  211. // implements: CWD
  212. // IDIV EAX, *pop1
  213. // The CWD sign-extends EAX into EDX:EAX, which means, we can
  214. // avoid a 64-bit division and just divide EAX. There is no
  215. // possibility of overflow.
  216. //
  217. op1 = (short)*pop1;
  218. // Must do the divide before modifying edx, in case op1==0 and we fault.
  219. result = (short)ax / op1;
  220. dx = (short)ax % op1;
  221. ax = result;
  222. }
  223. FRAG1(OPT_CwdIdivFrag32, ULONG)
  224. {
  225. long op1;
  226. long result;
  227. //
  228. // implements: CWD
  229. // IDIV EAX, *pop1
  230. // The CWD sign-extends EAX into EDX:EAX, which means, we can
  231. // avoid a 64-bit division and just divide EAX. There is no
  232. // possibility of overflow.
  233. //
  234. op1 = (long)GET_LONG(pop1);
  235. // Must do the divide before modifying edx, in case op1==0 and we fault.
  236. result = (long)eax / op1;
  237. edx = (long)eax % op1;
  238. eax = result;
  239. }
  240. FRAG1(OPT_CwdIdivFrag32A, ULONG)
  241. {
  242. long op1;
  243. long result;
  244. //
  245. // implements: CWD
  246. // IDIV EAX, *pop1
  247. // The CWD sign-extends EAX into EDX:EAX, which means, we can
  248. // avoid a 64-bit division and just divide EAX. There is no
  249. // possibility of overflow.
  250. //
  251. op1 = (long)*pop1;
  252. // Must do the divide before modifying edx, in case op1==0 and we fault.
  253. result = (long)eax / op1;
  254. edx = (long)eax % op1;
  255. eax = result;
  256. }
  257. // This fragment should never be called!
  258. FRAG0(OPT_OPTIMIZEDFrag)
  259. {
  260. CPUASSERTMSG(FALSE, "OPTIMIZED fragment should never be called!");
  261. }