Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2146 lines
69 KiB

  1. /*****************************************************************************
  2. * *
  3. * Intel Confidential *
  4. * *
  5. * *
  6. * XMMI_FP_emulate () - XMMI FP instruction emulation for the FP IEEE filter *
  7. * *
  8. * *
  9. * History: *
  10. * Marius Cornea-Hasegan, Mar 1998; modified Jun 1998; added DAZ Oct 2000 *
  11. * marius.cornea@intel.com *
  12. * *
  13. *****************************************************************************/
  14. // #define _DEBUG_FPU
  15. // #define _XMMI_DEBUG
  16. // XMMI_FP_Emulation () receives the input operands of a XMMI FP instruction
  17. // (operating on single-precision floating-point numbers and/or signed
  18. // integers), that might cause a floating-point exception (enabled or not).
  19. //
  20. // Arguments: PXMMI_ENV XmmiEnv
  21. //
  22. // The type of every field (INPUT or OUTPUT) is indicated below:
  23. //
  24. // typedef struct _XMMI_ENV {
  25. // ULONG Masks; //Mask values from MxCsr INPUT
  26. // ULONG Fz; //Flush to Zero INPUT
  27. // ULONG Rc; //Rounding INPUT
  28. // ULONG Precision; //Precision INPUT
  29. // ULONG Imm8; //imm8 predicate INPUT
  30. // ULONG EFlags; //EFlags INPUT/OUTPUT
  31. // _FPIEEE_RECORD *Ieee; //FP IEEE Record INPUT/OUTPUT,
  32. // field dependent
  33. // } XMMI_ENV, *PXMMI_ENV;
  34. //
  35. // The _FP_IEEE record and the _FPIEEE_VALUE are defined as:
  36. //
  37. // typedef struct {
  38. // unsigned int RoundingMode : 2; OUTPUT
  39. // unsigned int Precision : 3; OUTPUT
  40. // unsigned int Operation :12; INPUT
  41. // _FPIEEE_EXCEPTION_FLAGS Cause; OUTPUT
  42. // _FPIEEE_EXCEPTION_FLAGS Enable; OUTPUT
  43. // _FPIEEE_EXCEPTION_FLAGS Status; OUTPUT
  44. // _FPIEEE_VALUE Operand1; INPUT
  45. // _FPIEEE_VALUE Operand2; INPUT
  46. // _FPIEEE_VALUE Result; INPUT/OUTPUT,
  47. // field dependent
  48. // } _FPIEEE_RECORD, *_PFPIEEE_RECORD;
  49. //
  50. // typedef struct {
  51. // union {
  52. // _FP32 Fp32Value;
  53. // _FP64 Fp64Value;
  54. // _FP80 Fp80Value;
  55. // _FP128 Fp128Value;
  56. // _I16 I16Value;
  57. // _I32 I32Value;
  58. // _I64 I64Value;
  59. // _U16 U16Value;
  60. // _U32 U32Value;
  61. // _U64 U64Value;
  62. // _BCD80 Bcd80Value;
  63. // char *StringValue;
  64. // int CompareValue;
  65. // } Value; INPUT for operands,
  66. // OUTPUT for result
  67. //
  68. // unsigned int OperandValid : 1; INPUT for operands
  69. // INPUT/OUTPUT for result
  70. // unsigned int Format : 4; INPUT
  71. //
  72. // } _FPIEEE_VALUE;
  73. //
  74. // Return Value:
  75. // ExceptionRaised if an enabled floating-point exception condition is
  76. // detected; in this case, the fields of XmmiEnv->Ieee are filled in
  77. // appropriately to be passed directly to a user exception handler; the
  78. // XmmiEnv->Ieee->Cause bits indicate the cause of the exception, but if
  79. // a denormal exception occurred, then no XmmiEnv->Ieee->Cause bit is set;
  80. // upon return from the user handler, the caller of XMMI_FP_emulate should
  81. // interpret the result for a compare instruction (CMPPS, CMPPS, COMISS,
  82. // UCOMISS); the Enable, Rounding, and Precision fields in _FPIEEE_RECORD
  83. // have to be checked too for possible changes by the user handler
  84. //
  85. // NoExceptionRaised if no floating-point exception condition occurred, or
  86. // if a disabled floating-point exception occurred; in this case,
  87. // XmmiEnv->Ieee->Result.Value contains the instruction's result,
  88. // XmmiEnv->Ieee->Status contains the IEEE floating-point status flags
  89. //
  90. // Implementation Notes:
  91. //
  92. // - the operation code in XmmiEnv->Ieee->Operation is changed as expected
  93. // by a user exception handler (even if no exception is raised):
  94. // from OP_ADDPS, OP_ADDSS to _FpCodeAdd
  95. // from OP_SUBPS, OP_SUBSS to _FpCodeSubtract
  96. // from OP_MULPS, OP_MULSS to _FpCodeMultiply
  97. // from OP_DIVPS, OP_DIVSS to _FpCodeDivide
  98. // from OP_CMPPS, OP_CMPSS to _FpCodeCompare
  99. // from OP_COMISS, OP_UCOMISS to _FpCodeCompare
  100. // from OP_CVTPI2PS, OP_CVTSI2SS to _FpCodeConvert
  101. // from OP_CVTPS2PI, OP_CVTSS2SI to _FpCodeConvert
  102. // from OP_CVTTPS2PI, OP_CVTTSS2SI to _FpCodeConvertTrunc
  103. // from OP_MAXPS, OP_MAXSS to _FpCodeMax
  104. // from OP_MINPS, OP_MINSS to _FpCodeMin
  105. // from OP_SQRTPS, OP_SQRTSS to _FpCodeSquareRoot
  106. //
  107. //
  108. // - for ADDPS, ADDSS, SUBPS, SUBSS, MULPS, MULSS, DIVPS, DIVSS:
  109. //
  110. // - execute the operation with x86 instructions (fld,
  111. // faddp/fsubp/fmulp/fdivp, and fstp), using the user
  112. // rounding mode, 24-bit significands, and 11-bit exponents for results
  113. // - if the invalid flag is set and the invalid exceptions are enabled,
  114. // take an invalid trap (i.e. return RaiseException with the IEEE record
  115. // filled out appropriately)
  116. // - if any input operand is a NaN:
  117. // - if both operands are NaNs, return the first operand ("quietized"
  118. // if SNaN)
  119. // - if only one operand is a NaN, return it ("quietized" if SNaN)
  120. // - set the invalid flag if needed, and return NoExceptionRaised
  121. // - if the denormal flag is set and the denormal exceptions are enabled,
  122. // take a denormal trap (i.e. return RaiseException with the IEEE record
  123. // filled out appropriately [no Cause bit set])
  124. // - if the divide by zero flag is set (for DIVPS and DIVSS only) and the
  125. // divide by zero exceptions are enabled, take a divide by zero trap
  126. // (i.e. return RaiseException with the IEEE record filled out
  127. // appropriately)
  128. // - if the result is a NaN (QNaN Indefinite), the operation must have been
  129. // Inf - Inf, Inf * 0, Inf / Inf, or 0 / 0; set the invalid status flag
  130. // and return NoExceptionRaised
  131. // - determine whether the result is tiny or huge
  132. // - if the underflow traps are enabled and the result is tiny, take an
  133. // underflow trap (i.e. return RaiseException with the IEEE record
  134. // filled out appropriately)
  135. // - if the overflow traps are enabled and the result is huge, take an
  136. // overflow trap (i.e. return RaiseException with the IEEE record
  137. // filled out appropriately)
  138. // - re-do the operation with x86 instructions, using the user rounding
  139. // mode, 53-bit significands, and 11-bit exponents for results (this will
  140. // allow rounding to 24 bits without a double rounding error - needed for
  141. // the case the result requires denormalization) [cannot denormalize
  142. // without a possible double rounding error starting from a 24-bit
  143. // significand]
  144. // - round to 24 bits (or to less than 24 bits if denormalization is
  145. // needed), for the case an inexact trap has to be taken, or if no
  146. // exception occurs
  147. // - if the result is inexact and the inexact exceptions are enabled,
  148. // take an inexact trap (i.e. return RaiseException with the IEEE record
  149. // filled out appropriately); if the flush-to-zero mode is enabled and
  150. // the result is tiny, the result is flushed to zero
  151. // - if no exception has to be raised, the flush-to-zero mode is enabled,
  152. // and the result is tiny, then the result is flushed to zero; set the
  153. // status flags and return NoExceptionRaised
  154. //
  155. // - for CMPPS, CMPSS
  156. //
  157. // - for EQ, UNORD, NEQ, ORD, SNaN operands signal invalid
  158. // - for LT, LE, NLT, NLE, QNaN/SNaN operands (one or both) signal invalid
  159. // - if the invalid exception condition is met and the invalid exceptions
  160. // are enabled, take an invalid trap (i.e. return RaiseException with the
  161. // IEEE record filled out appropriately)
  162. // - if any operand is a NaN and the compare type is EQ, LT, LE, or ORD,
  163. // set the result to "false", set the value of the invalid status flag,
  164. // and return NoExceptionRaised
  165. // - if any operand is a NaN and the compare type is NEQ, NLT, NLE, or
  166. // UNORD, set the result to "false", set the value of the invalid status
  167. // flag, and return NoExceptionRaised
  168. // - if any operand is denormal and the denormal exceptions are enabled,
  169. // take a denormal trap (i.e. return RaiseException with the IEEE record
  170. // filled out appropriately [no Cause bit set])
  171. // - if no exception has to be raised, determine the result and return
  172. // NoExceptionRaised
  173. //
  174. // - for COMISS, UCOMISS
  175. //
  176. // - for COMISS, QNaN/SNaN operands (one or both) signal invalid
  177. // - for UCOMISS, SNaN operands (one or both) signal invalid
  178. // - if the invalid exception condition is met and the invalid exceptions
  179. // are enabled, take invalid trap (i.e. return RaiseException with the
  180. // IEEE record filled out appropriately)
  181. // - if any operand is a NaN, set OF, SF, AF = 000, ZF, PF, CF = 111,
  182. // set the value of the invalid status flag, and return NoExceptionRaised
  183. // - if any operand is denormal and the denormal exceptions are enabled,
  184. // take a denormal trap (i.e. return RaiseException with the IEEE record
  185. // filled out appropriately [no Cause bit set])
  186. // - if no exception has to be raised, determine the result and set EFlags,
  187. // set the value of the invalid status flag, and return NoExceptionRaised
  188. //
  189. // - for CVTPI2PS, CVTSI2SS
  190. //
  191. // - execute the operation with x86 instructions (fild and fstp), using the
  192. // user rounding mode, 24-bit significands, and an 8-bit exponent for
  193. // the result
  194. // - if the inexact flag is set and the inexact exceptions are enabled,
  195. // set the result and take an inexact trap (i.e. return RaiseException
  196. // with the IEEE record filled out appropriately)
  197. // - if no exception has to be raised, set the result, the value of the
  198. // inexact status flag and return NoExceptionRaised
  199. //
  200. // - for CVTPS2PI, CVTSS2SI, CVTTPS2PI, CVTTSS2SI
  201. //
  202. // - execute the operation with x86 instructions (fld and fistp), using
  203. // the user rounding mode for CVT* and chop for CVTT*
  204. // - if the invalid flag is set and the invalid exceptions are enabled,
  205. // take an invalid trap (i.e. return RaiseException with the IEEE record
  206. // filled out appropriately) [the invalid operation condition occurs for
  207. // any input operand that does not lead through conversion to a valid
  208. // 32-bit signed integer; the result is in such cases the Integer
  209. // Indefinite value]
  210. // - set the result value
  211. // - if the inexact flag is set and the inexact exceptions are enabled,
  212. // take an inexact trap (i.e. return RaiseException with the IEEE record
  213. // filled out appropriately)
  214. // - if no exception has to be raised, set the value of the invalid status
  215. // flag and of the inexact status flag and return NoExceptionRaised
  216. //
  217. // - for MAXPS, MAXSS, MINPS, MINSS
  218. //
  219. // - check for invalid exception (QNaN/SNaN operands signal invalid)
  220. // - if the invalid exception condition is met and the invalid exceptions
  221. // are enabled, take an invalid trap (i.e. return RaiseException with the
  222. // IEEE record filled out appropriately)
  223. // - if any operand is a NaN, set the result to the value of the second
  224. // operand, set the invalid status flag to 1, and return NoExceptionRaised
  225. // - if any operand is denormal and the denormal exceptions are enabled,
  226. // take a denormal trap (i.e. return RaiseException with the IEEE record
  227. // filled out appropriately [no Cause bit set])
  228. // - if no exception has to be raised, determine the result and return
  229. // NoExceptionRaised
  230. //
  231. // - for SQRTPS, SQRTSS
  232. //
  233. // - execute the operation with x86 instructions (fld, fsqrt, and fstp),
  234. // using the user rounding mode, 24-bit significands, and an 8-bit
  235. // exponent for the result
  236. // - if the invalid flag is set and the invalid exceptions are enabled,
  237. // take an invalid trap (i.e. return RaiseException with the IEEE record
  238. // filled out appropriately)
  239. // - if the denormal flag is set and the denormal exceptions are enabled,
  240. // take a denormal trap (i.e. return RaiseException with the IEEE record
  241. // filled out appropriately [no Cause bit set])
  242. // - if the result is inexact and the inexact exceptions are enabled,
  243. // take an inexact trap (i.e. return RaiseException with the IEEE record
  244. // filled out appropriately)
  245. // - if no exception has to be raised, set the status flags and return
  246. // NoExceptionRaised
  247. //
  248. #include <wtypes.h>
  249. #include <trans.h>
  250. #include <float.h>
  251. #include "xmmi_types.h"
  252. #include "filter.h"
  253. #ifdef _XMMI_DEBUG
  254. #include "temp_context.h"
  255. #include "debug.h"
  256. #endif
  257. // masks for individual status word bits
  258. #define P_MASK 0x20
  259. #define U_MASK 0x10
  260. #define O_MASK 0x08
  261. #define Z_MASK 0x04
  262. #define D_MASK 0x02
  263. #define I_MASK 0x01
  264. // 32-bit constants
  265. static unsigned ZEROFA[] = {0x00000000};
  266. #define ZEROF *(float *) ZEROFA
  267. static unsigned NZEROFA[] = {0x80000000};
  268. #define NZEROF *(float *) NZEROFA
  269. static unsigned POSINFFA[] = {0x7f800000};
  270. #define POSINFF *(float *)POSINFFA
  271. static unsigned NEGINFFA[] = {0xff800000};
  272. #define NEGINFF *(float *)NEGINFFA
  273. #ifdef _XMMI_DEBUG
  274. static unsigned QNANINDEFFA[] = {0xffc00000};
  275. #define QNANINDEFF *(float *)QNANINDEFFA
  276. #endif
  277. // 64-bit constants
  278. static unsigned MIN_SINGLE_NORMALA [] = {0x00000000, 0x38100000};
  279. // +1.0 * 2^-126
  280. #define MIN_SINGLE_NORMAL *(double *)MIN_SINGLE_NORMALA
  281. static unsigned MAX_SINGLE_NORMALA [] = {0xe0000000, 0x47efffff};
  282. // +1.1...1*2^127
  283. #define MAX_SINGLE_NORMAL *(double *)MAX_SINGLE_NORMALA
  284. static unsigned TWO_TO_192A[] = {0x00000000, 0x4bf00000};
  285. #define TWO_TO_192 *(double *)TWO_TO_192A
  286. static unsigned TWO_TO_M192A[] = {0x00000000, 0x33f00000};
  287. #define TWO_TO_M192 *(double *)TWO_TO_M192A
  288. // auxiliary functions
  289. static void Fill_FPIEEE_RECORD (PXMMI_ENV XmmiEnv);
  290. static int issnanf (float f);
  291. static int isnanf (float f);
  292. static float quietf (float f);
  293. static int isdenormalf (float f);
  294. ULONG
  295. XMMI_FP_Emulation (PXMMI_ENV XmmiEnv)
  296. {
  297. float opd1, opd2, res;
  298. int iopd1; // for conversions from int to float
  299. int ires; // for conversions from float to int
  300. double dbl_res24; // needed to check tininess, to provide a scaled result to
  301. // an underflow/overflow trap handler, and in flush-to-zero
  302. unsigned int result_tiny;
  303. unsigned int result_huge;
  304. unsigned int rc, sw;
  305. unsigned long imm8;
  306. unsigned int invalid_exc;
  307. unsigned int denormal_exc;
  308. unsigned int cmp_res;
  309. // Note that ExceptionCode is always STATUS_FLOAT_MULTIPLE_FAULTS in the
  310. // calling routine, so we have to check first for faults, and then for traps
  311. #ifdef _DEBUG_FPU
  312. unsigned int in_top;
  313. unsigned int out_top;
  314. char fp_env[108];
  315. unsigned short int *control_word, *status_word, *tag_word;
  316. // read status word
  317. sw = _status87 ();
  318. in_top = (sw >> 11) & 0x07;
  319. if (in_top != 0x0) printf ("XMMI_FP_Emulate WARNING: in_top = %d\n", in_top);
  320. __asm {
  321. fnsave fp_env;
  322. }
  323. control_word = (unsigned short *)fp_env;
  324. status_word = (unsigned short *)(fp_env + 2);
  325. tag_word = (unsigned short *)(fp_env + 8);
  326. if (*tag_word != 0xffff) printf ("XMMI_FP_Emulate WARNING: tag_word = %x\n", *tag_word);
  327. #endif
  328. _asm {
  329. fninit;
  330. }
  331. #ifdef _DEBUG_FPU
  332. // read status word
  333. sw = _status87 ();
  334. in_top = (sw >> 11) & 0x07;
  335. if (in_top != 0x0)
  336. printf ("XMMI_FP_Emulate () XMMI_FP_Emulate () ERROR: in_top = %d\n", in_top);
  337. __asm {
  338. fnsave fp_env;
  339. }
  340. tag_word = (unsigned short *)(fp_env + 8);
  341. if (*tag_word != 0xffff) {
  342. printf ("XMMI_FP_Emulate () XMMI_FP_Emulate () ERROR: tag_word = %x\n",
  343. *tag_word);
  344. printf ("control, status, tag = %x %x %x %x %x %x\n",
  345. fp_env[0] & 0xff, fp_env[1] & 0xff, fp_env[4] & 0xff,
  346. fp_env[5] & 0xff, fp_env[8] & 0xff, fp_env[9] & 0xff);
  347. }
  348. #endif
  349. #ifdef _XMMI_DEBUG
  350. print_FPIEEE_RECORD (XmmiEnv);
  351. #endif
  352. result_tiny = 0;
  353. result_huge = 0;
  354. XmmiEnv->Ieee->RoundingMode = XmmiEnv->Rc;
  355. XmmiEnv->Ieee->Precision = XmmiEnv->Precision;
  356. switch (XmmiEnv->Ieee->Operation) {
  357. case OP_ADDPS:
  358. case OP_ADDSS:
  359. case OP_SUBPS:
  360. case OP_SUBSS:
  361. case OP_MULPS:
  362. case OP_MULSS:
  363. case OP_DIVPS:
  364. case OP_DIVSS:
  365. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp32Value;
  366. opd2 = XmmiEnv->Ieee->Operand2.Value.Fp32Value;
  367. if (XmmiEnv->Daz) {
  368. if (isdenormalf (opd1)) opd1 = opd1 * (float)0.0;
  369. if (isdenormalf (opd2)) opd2 = opd2 * (float)0.0;
  370. }
  371. // adjust operation code
  372. switch (XmmiEnv->Ieee->Operation) {
  373. case OP_ADDPS:
  374. case OP_ADDSS:
  375. XmmiEnv->Ieee->Operation = _FpCodeAdd;
  376. break;
  377. case OP_SUBPS:
  378. case OP_SUBSS:
  379. XmmiEnv->Ieee->Operation = _FpCodeSubtract;
  380. break;
  381. case OP_MULPS:
  382. case OP_MULSS:
  383. XmmiEnv->Ieee->Operation = _FpCodeMultiply;
  384. break;
  385. case OP_DIVPS:
  386. case OP_DIVSS:
  387. XmmiEnv->Ieee->Operation = _FpCodeDivide;
  388. break;
  389. default:
  390. ; // will never occur
  391. }
  392. // execute the operation and check whether the invalid, denormal, or
  393. // divide by zero flags are set and the respective exceptions enabled
  394. switch (XmmiEnv->Rc) {
  395. case _FpRoundNearest:
  396. rc = _RC_NEAR;
  397. break;
  398. case _FpRoundMinusInfinity:
  399. rc = _RC_DOWN;
  400. break;
  401. case _FpRoundPlusInfinity:
  402. rc = _RC_UP;
  403. break;
  404. case _FpRoundChopped:
  405. rc = _RC_CHOP;
  406. break;
  407. default:
  408. ; // internal error
  409. }
  410. _control87 (rc | _PC_24 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  411. // compute result and round to the destination precision, with
  412. // "unbounded" exponent (first IEEE rounding)
  413. switch (XmmiEnv->Ieee->Operation) {
  414. case _FpCodeAdd:
  415. // perform the add
  416. __asm {
  417. fnclex;
  418. // load input operands
  419. fld DWORD PTR opd1; // may set the denormal or invalid status flags
  420. fld DWORD PTR opd2; // may set the denormal or invalid status flags
  421. faddp st(1), st(0); // may set the inexact or invalid status flags
  422. // store result
  423. fstp QWORD PTR dbl_res24; // exact
  424. }
  425. break;
  426. case _FpCodeSubtract:
  427. // perform the subtract
  428. __asm {
  429. fnclex;
  430. // load input operands
  431. fld DWORD PTR opd1; // may set the denormal or invalid status flags
  432. fld DWORD PTR opd2; // may set the denormal or invalid status flags
  433. fsubp st(1), st(0); // may set the inexact or invalid status flags
  434. // store result
  435. fstp QWORD PTR dbl_res24; // exact
  436. }
  437. break;
  438. case _FpCodeMultiply:
  439. // perform the multiply
  440. __asm {
  441. fnclex;
  442. // load input operands
  443. fld DWORD PTR opd1; // may set the denormal or invalid status flags
  444. fld DWORD PTR opd2; // may set the denormal or invalid status flags
  445. fmulp st(1), st(0); // may set the inexact or invalid status flags
  446. // store result
  447. fstp QWORD PTR dbl_res24; // exact
  448. }
  449. break;
  450. case _FpCodeDivide:
  451. // perform the divide
  452. __asm {
  453. fnclex;
  454. // load input operands
  455. fld DWORD PTR opd1; // may set the denormal or invalid status flags
  456. fld DWORD PTR opd2; // may set the denormal or invalid status flags
  457. fdivp st(1), st(0); // may set the inexact, divide by zero, or
  458. // invalid status flags
  459. // store result
  460. fstp QWORD PTR dbl_res24; // exact
  461. }
  462. break;
  463. default:
  464. ; // will never occur
  465. }
  466. // read status word
  467. sw = _status87 ();
  468. if (sw & _SW_ZERODIVIDE) sw = sw & ~0x00080000; // clear D flag for den/0
  469. // if invalid flag is set, and invalid exceptions are enabled, take trap
  470. if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) {
  471. // fill in part of the FP IEEE record
  472. Fill_FPIEEE_RECORD (XmmiEnv);
  473. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  474. XmmiEnv->Flags |= I_MASK;
  475. // Cause = Enable & Status
  476. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  477. #ifdef _DEBUG_FPU
  478. // read status word
  479. sw = _status87 ();
  480. out_top = (sw >> 11) & 0x07;
  481. if (in_top != out_top) {
  482. printf ("XMMI_FP_Emulate () ERROR 1: in_top =%d != out_top = %d\n",
  483. in_top, out_top);
  484. exit (1);
  485. }
  486. #endif
  487. return (ExceptionRaised);
  488. }
  489. // checking for NaN operands has priority over denormal exceptions; also
  490. // fix for the differences in treating two NaN inputs between the XMMI
  491. // instructions and other x86 instructions
  492. if (isnanf (opd1) || isnanf (opd2)) {
  493. XmmiEnv->Ieee->Result.OperandValid = 1;
  494. if (isnanf (opd1) && isnanf (opd2))
  495. XmmiEnv->Ieee->Result.Value.Fp32Value = quietf (opd1);
  496. else
  497. XmmiEnv->Ieee->Result.Value.Fp32Value = (float)dbl_res24;
  498. // conversion to single precision is exact
  499. XmmiEnv->Ieee->Status.Underflow = 0;
  500. XmmiEnv->Ieee->Status.Overflow = 0;
  501. XmmiEnv->Ieee->Status.Inexact = 0;
  502. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  503. if (sw & _SW_INVALID) {
  504. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  505. XmmiEnv->Flags |= I_MASK;
  506. } else {
  507. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  508. }
  509. #ifdef _DEBUG_FPU
  510. // read status word
  511. sw = _status87 ();
  512. out_top = (sw >> 11) & 0x07;
  513. if (in_top != out_top) {
  514. printf ("XMMI_FP_Emulate () ERROR 2: in_top =%d != out_top = %d\n",
  515. in_top, out_top);
  516. exit (1);
  517. }
  518. #endif
  519. return (NoExceptionRaised);
  520. }
  521. // if denormal flag is set, and denormal exceptions are enabled, take trap
  522. if (!(XmmiEnv->Masks & D_MASK) && (sw & _SW_DENORMAL)) {
  523. // fill in part of the FP IEEE record
  524. Fill_FPIEEE_RECORD (XmmiEnv);
  525. // Note: the exception code is STATUS_FLOAT_INVALID in this case
  526. #ifdef _DEBUG_FPU
  527. // read status word
  528. sw = _status87 ();
  529. out_top = (sw >> 11) & 0x07;
  530. if (in_top != out_top) {
  531. printf ("XMMI_FP_Emulate () ERROR 3: in_top =%d != out_top = %d\n",
  532. in_top, out_top);
  533. exit (1);
  534. }
  535. #endif
  536. XmmiEnv->Flags |= D_MASK;
  537. return (ExceptionRaised);
  538. }
  539. // if divide by zero flag is set, and divide by zero exceptions are
  540. // enabled, take trap (for divide only)
  541. if (!(XmmiEnv->Masks & Z_MASK) && (sw & _SW_ZERODIVIDE)) {
  542. // fill in part of the FP IEEE record
  543. Fill_FPIEEE_RECORD (XmmiEnv);
  544. XmmiEnv->Ieee->Status.ZeroDivide = 1;
  545. XmmiEnv->Flags |= Z_MASK;
  546. // Cause = Enable & Status
  547. XmmiEnv->Ieee->Cause.ZeroDivide = 1;
  548. #ifdef _DEBUG_FPU
  549. // read status word
  550. sw = _status87 ();
  551. out_top = (sw >> 11) & 0x07;
  552. if (in_top != out_top) {
  553. printf ("XMMI_FP_Emulate () ERROR 4: in_top =%d != out_top = %d\n",
  554. in_top, out_top);
  555. exit (1);
  556. }
  557. #endif
  558. return (ExceptionRaised);
  559. }
  560. // done if the result is a NaN (QNaN Indefinite)
  561. res = (float)dbl_res24;
  562. if (isnanf (res)) {
  563. #ifdef _XMMI_DEBUG
  564. if (res != QNANINDEFF)
  565. fprintf (stderr, "XMMI_FP_Emulation () INTERNAL XMMI_FP_Emulate () ERROR: "
  566. "res = %f = %x is not QNaN Indefinite\n",
  567. (double)res, *(unsigned int *)&res);
  568. #endif
  569. XmmiEnv->Ieee->Result.OperandValid = 1;
  570. XmmiEnv->Ieee->Result.Value.Fp32Value = res; // exact
  571. XmmiEnv->Ieee->Status.Underflow = 0;
  572. XmmiEnv->Ieee->Status.Overflow = 0;
  573. XmmiEnv->Ieee->Status.Inexact = 0;
  574. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  575. XmmiEnv->Ieee->Status.InvalidOperation = 1; // sw & _SW_INVALID true
  576. XmmiEnv->Flags |= I_MASK;
  577. #ifdef _DEBUG_FPU
  578. // read status word
  579. sw = _status87 ();
  580. out_top = (sw >> 11) & 0x07;
  581. if (in_top != out_top) {
  582. printf ("XMMI_FP_Emulate () ERROR 5: in_top =%d != out_top = %d\n",
  583. in_top, out_top);
  584. exit (1);
  585. }
  586. #endif
  587. return (NoExceptionRaised);
  588. }
  589. // dbl_res24 is not a NaN at this point
  590. if (sw & _SW_DENORMAL) XmmiEnv->Flags |= D_MASK;
  591. // check if the result is tiny
  592. // Note: (dbl_res24 == 0.0 && sw & _SW_INEXACT) cannot occur
  593. if (-MIN_SINGLE_NORMAL < dbl_res24 && dbl_res24 < 0.0 ||
  594. 0.0 < dbl_res24 && dbl_res24 < MIN_SINGLE_NORMAL) {
  595. result_tiny = 1;
  596. }
  597. // check if the result is huge
  598. if (NEGINFF < dbl_res24 && dbl_res24 < -MAX_SINGLE_NORMAL ||
  599. MAX_SINGLE_NORMAL < dbl_res24 && dbl_res24 < POSINFF) {
  600. result_huge = 1;
  601. }
  602. // at this point, there are no enabled I, D, or Z exceptions; the instr.
  603. // might lead to an enabled underflow, enabled underflow and inexact,
  604. // enabled overflow, enabled overflow and inexact, enabled inexact, or
  605. // none of these; if there are no U or O enabled exceptions, re-execute
  606. // the instruction using iA32 stack single precision format, and the
  607. // user's rounding mode; exceptions must have been disabled; an inexact
  608. // exception may be reported on the 24-bit faddp, fsubp, fmulp, or fdivp,
  609. // while an overflow or underflow (with traps disabled !) may be reported
  610. // on the fstp
  611. // check whether there is a underflow, overflow, or inexact trap to be
  612. // taken
  613. // if the underflow traps are enabled and the result is tiny, take
  614. // underflow trap
  615. if (!(XmmiEnv->Masks & U_MASK) && result_tiny) {
  616. dbl_res24 = TWO_TO_192 * dbl_res24; // exact
  617. // fill in part of the FP IEEE record
  618. Fill_FPIEEE_RECORD (XmmiEnv);
  619. XmmiEnv->Ieee->Status.Underflow = 1;
  620. XmmiEnv->Flags |= U_MASK;
  621. XmmiEnv->Ieee->Cause.Underflow = 1;
  622. XmmiEnv->Ieee->Result.OperandValid = 1;
  623. XmmiEnv->Ieee->Result.Value.Fp32Value = (float)dbl_res24; // exact
  624. if (sw & _SW_INEXACT) {
  625. XmmiEnv->Ieee->Status.Inexact = 1;
  626. XmmiEnv->Flags |= P_MASK;
  627. }
  628. #ifdef _DEBUG_FPU
  629. // read status word
  630. sw = _status87 ();
  631. out_top = (sw >> 11) & 0x07;
  632. if (in_top != out_top) {
  633. printf ("XMMI_FP_Emulate () ERROR 6: in_top =%d != out_top = %d\n",
  634. in_top, out_top);
  635. exit (1);
  636. }
  637. #endif
  638. return (ExceptionRaised);
  639. }
  640. // if overflow traps are enabled and the result is huge, take
  641. // overflow trap
  642. if (!(XmmiEnv->Masks & O_MASK) && result_huge) {
  643. dbl_res24 = TWO_TO_M192 * dbl_res24; // exact
  644. // fill in part of the FP IEEE record
  645. Fill_FPIEEE_RECORD (XmmiEnv);
  646. XmmiEnv->Ieee->Status.Overflow = 1;
  647. XmmiEnv->Flags |= O_MASK;
  648. XmmiEnv->Ieee->Cause.Overflow = 1;
  649. XmmiEnv->Ieee->Result.OperandValid = 1;
  650. XmmiEnv->Ieee->Result.Value.Fp32Value = (float)dbl_res24; // exact
  651. if (sw & _SW_INEXACT) {
  652. XmmiEnv->Ieee->Status.Inexact = 1;
  653. XmmiEnv->Flags |= P_MASK;
  654. }
  655. #ifdef _DEBUG_FPU
  656. // read status word
  657. sw = _status87 ();
  658. out_top = (sw >> 11) & 0x07;
  659. if (in_top != out_top) {
  660. printf ("XMMI_FP_Emulate () ERROR 7: in_top =%d != out_top = %d\n",
  661. in_top, out_top);
  662. exit (1);
  663. }
  664. #endif
  665. return (ExceptionRaised);
  666. }
  667. // calculate result for the case an inexact trap has to be taken, or
  668. // when no trap occurs (second IEEE rounding)
  669. switch (XmmiEnv->Ieee->Operation) {
  670. case _FpCodeAdd:
  671. // perform the add
  672. __asm {
  673. // load input operands
  674. fld DWORD PTR opd1; // may set the denormal status flag
  675. fld DWORD PTR opd2; // may set the denormal status flag
  676. faddp st(1), st(0); // rounded to 24 bits, may set the inexact
  677. // status flag
  678. // store result
  679. fstp DWORD PTR res; // exact, will not set any flag
  680. }
  681. break;
  682. case _FpCodeSubtract:
  683. // perform the subtract
  684. __asm {
  685. // load input operands
  686. fld DWORD PTR opd1; // may set the denormal status flag
  687. fld DWORD PTR opd2; // may set the denormal status flag
  688. fsubp st(1), st(0); // rounded to 24 bits, may set the inexact
  689. // status flag
  690. // store result
  691. fstp DWORD PTR res; // exact, will not set any flag
  692. }
  693. break;
  694. case _FpCodeMultiply:
  695. // perform the multiply
  696. __asm {
  697. // load input operands
  698. fld DWORD PTR opd1; // may set the denormal status flag
  699. fld DWORD PTR opd2; // may set the denormal status flag
  700. fmulp st(1), st(0); // rounded to 24 bits, may set the inexact
  701. // status flag
  702. // store result
  703. fstp DWORD PTR res; // exact, will not set any flag
  704. }
  705. break;
  706. case _FpCodeDivide:
  707. // perform the divide
  708. __asm {
  709. // load input operands
  710. fld DWORD PTR opd1; // may set the denormal status flag
  711. fld DWORD PTR opd2; // may set the denormal status flag
  712. fdivp st(1), st(0); // rounded to 24 bits, may set the inexact
  713. // or divide by zero status flags
  714. // store result
  715. fstp DWORD PTR res; // exact, will not set any flag
  716. }
  717. break;
  718. default:
  719. ; // will never occur
  720. }
  721. // read status word
  722. sw = _status87 ();
  723. // if inexact traps are enabled and result is inexact, take inexact trap
  724. if (!(XmmiEnv->Masks & P_MASK) &&
  725. ((sw & _SW_INEXACT) || (XmmiEnv->Fz && result_tiny))) {
  726. // fill in part of the FP IEEE record
  727. Fill_FPIEEE_RECORD (XmmiEnv);
  728. XmmiEnv->Ieee->Status.Inexact = 1;
  729. XmmiEnv->Flags |= P_MASK;
  730. XmmiEnv->Ieee->Cause.Inexact = 1;
  731. XmmiEnv->Ieee->Result.OperandValid = 1;
  732. if (result_tiny) {
  733. XmmiEnv->Ieee->Status.Underflow = 1;
  734. XmmiEnv->Flags |= U_MASK;
  735. // Note: the condition above is equivalent to
  736. // if (sw & _SW_UNDERFLOW) XmmiEnv->Ieee->Status.Underflow = 1;
  737. }
  738. if (result_huge) {
  739. XmmiEnv->Ieee->Status.Overflow = 1;
  740. XmmiEnv->Flags |= O_MASK;
  741. // Note: the condition above is equivalent to
  742. // if (sw & _SW_OVERFLOW) XmmiEnv->Ieee->Status.Overflow = 1;
  743. }
  744. // if ftz = 1 and result is tiny, result = 0.0
  745. // (no need to check for underflow traps disabled: result tiny and
  746. // underflow traps enabled would have caused taking an underflow
  747. // trap above)
  748. if (XmmiEnv->Fz && result_tiny) {
  749. // Note: the condition above is equivalent to
  750. // if (XmmiEnv->Fz && (sw & _SW_UNDERFLOW))
  751. if (res > 0.0)
  752. res = ZEROF;
  753. else if (res < 0.0)
  754. res = NZEROF;
  755. // else leave res unchanged
  756. }
  757. XmmiEnv->Ieee->Result.Value.Fp32Value = res;
  758. #ifdef _DEBUG_FPU
  759. // read status word
  760. sw = _status87 ();
  761. out_top = (sw >> 11) & 0x07;
  762. if (in_top != out_top) {
  763. printf ("XMMI_FP_Emulate () ERROR 8: in_top =%d != out_top = %d\n",
  764. in_top, out_top);
  765. exit (1);
  766. }
  767. #endif
  768. return (ExceptionRaised);
  769. }
  770. // if it got here, then there is no trap to be taken; the following must
  771. // hold: ((the MXCSR U exceptions are disabled or
  772. //
  773. // the MXCSR underflow exceptions are enabled and the underflow flag is
  774. // clear and (the inexact flag is set or the inexact flag is clear and
  775. // the 24-bit result with unbounded exponent is not tiny)))
  776. // and (the MXCSR overflow traps are disabled or the overflow flag is
  777. // clear) and (the MXCSR inexact traps are disabled or the inexact flag
  778. // is clear)
  779. //
  780. // in this case, the result has to be delivered (the status flags are
  781. // sticky, so they are all set correctly already)
  782. #ifdef _XMMI_DEBUG
  783. // error if the condition stated above does not hold
  784. if (!((XmmiEnv->Masks & U_MASK || (!(XmmiEnv->Masks & U_MASK) &&
  785. !(sw & _SW_UNDERFLOW) && ((sw & _SW_INEXACT) ||
  786. !(sw & _SW_INEXACT) && !result_tiny))) &&
  787. ((XmmiEnv->Masks & O_MASK) || !(sw & _SW_OVERFLOW)) &&
  788. ((XmmiEnv->Masks & P_MASK) || !(sw & _SW_INEXACT)))) {
  789. fprintf (stderr, "XMMI_FP_Emulation () INTERNAL XMMI_FP_Emulate () ERROR for "
  790. "ADDPS/ADDSS/SUBPS/SUBSS/MULPS/MULSS/DIVPS/DIVSS\n");
  791. }
  792. #endif
  793. XmmiEnv->Ieee->Result.OperandValid = 1;
  794. if (sw & _SW_UNDERFLOW) {
  795. XmmiEnv->Ieee->Status.Underflow = 1;
  796. XmmiEnv->Flags |= U_MASK;
  797. } else {
  798. XmmiEnv->Ieee->Status.Underflow = 0;
  799. }
  800. if (sw & _SW_OVERFLOW) {
  801. XmmiEnv->Ieee->Status.Overflow = 1;
  802. XmmiEnv->Flags |= O_MASK;
  803. } else {
  804. XmmiEnv->Ieee->Status.Overflow = 0;
  805. }
  806. if (sw & _SW_INEXACT) {
  807. XmmiEnv->Ieee->Status.Inexact = 1;
  808. XmmiEnv->Flags |= P_MASK;
  809. } else {
  810. XmmiEnv->Ieee->Status.Inexact = 0;
  811. }
  812. // if ftz = 1, and result is tiny (underflow traps must be disabled),
  813. // result = 0.0
  814. if (XmmiEnv->Fz && result_tiny) {
  815. if (res > 0.0)
  816. res = ZEROF;
  817. else if (res < 0.0)
  818. res = NZEROF;
  819. // else leave res unchanged
  820. XmmiEnv->Ieee->Status.Inexact = 1;
  821. XmmiEnv->Flags |= P_MASK;
  822. XmmiEnv->Ieee->Status.Underflow = 1;
  823. XmmiEnv->Flags |= U_MASK;
  824. }
  825. XmmiEnv->Ieee->Result.Value.Fp32Value = res;
  826. // note that there is no way to
  827. // communicate to the caller that the denormal flag was set - we count
  828. // on the XMMI instruction to have set the denormal flag in MXCSR if
  829. // needed, regardless of the other components of the input operands
  830. // (invalid or not; the caller will have to update the underflow,
  831. // overflow, and inexact flags in MXCSR)
  832. if (sw & _SW_ZERODIVIDE) {
  833. XmmiEnv->Ieee->Status.ZeroDivide = 1;
  834. XmmiEnv->Flags |= Z_MASK;
  835. } else {
  836. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  837. }
  838. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  839. #ifdef _DEBUG_FPU
  840. // read status word
  841. sw = _status87 ();
  842. out_top = (sw >> 11) & 0x07;
  843. if (in_top != out_top) {
  844. printf ("XMMI_FP_Emulate () ERROR 9: in_top =%d != out_top = %d\n",
  845. in_top, out_top);
  846. exit (1);
  847. }
  848. #endif
  849. return (NoExceptionRaised);
  850. break;
  851. case OP_CMPPS:
  852. case OP_CMPSS:
  853. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp32Value;
  854. opd2 = XmmiEnv->Ieee->Operand2.Value.Fp32Value;
  855. if (XmmiEnv->Daz) {
  856. if (isdenormalf (opd1)) opd1 = opd1 * (float)0.0;
  857. if (isdenormalf (opd2)) opd2 = opd2 * (float)0.0;
  858. }
  859. imm8 = XmmiEnv->Imm8 & 0x07;
  860. // adjust operation code
  861. XmmiEnv->Ieee->Operation = _FpCodeCompare;
  862. // check whether an invalid exception has to be raised
  863. switch (imm8) {
  864. case IMM8_EQ:
  865. case IMM8_UNORD:
  866. case IMM8_NEQ:
  867. case IMM8_ORD:
  868. if (issnanf (opd1) || issnanf (opd2))
  869. invalid_exc = 1; // SNaN operands signal invalid
  870. else
  871. invalid_exc = 0; // QNaN or other operands do not signal invalid
  872. // guard against the case when an SNaN operand was converted to
  873. // QNaN by compiler generated code
  874. sw = _status87 ();
  875. if (sw & _SW_INVALID) invalid_exc = 1;
  876. break;
  877. case IMM8_LT:
  878. case IMM8_LE:
  879. case IMM8_NLT:
  880. case IMM8_NLE:
  881. if (isnanf (opd1) || isnanf (opd2))
  882. invalid_exc = 1; // SNaN/QNaN operands signal invalid
  883. else
  884. invalid_exc = 0; // other operands do not signal invalid
  885. break;
  886. default:
  887. ; // will never occur
  888. }
  889. // if invalid_exc = 1, and invalid exceptions are enabled, take trap
  890. if (invalid_exc && !(XmmiEnv->Masks & I_MASK)) {
  891. // fill in part of the FP IEEE record
  892. Fill_FPIEEE_RECORD (XmmiEnv);
  893. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  894. XmmiEnv->Flags |= I_MASK;
  895. // Cause = Enable & Status
  896. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  897. // Note: the calling function will have to interpret the value returned
  898. // by the user handler, if execution is to be continued
  899. #ifdef _DEBUG_FPU
  900. // read status word
  901. sw = _status87 ();
  902. out_top = (sw >> 11) & 0x07;
  903. if (in_top != out_top) {
  904. printf ("XMMI_FP_Emulate () ERROR 10: in_top =%d != out_top = %d\n",
  905. in_top, out_top);
  906. exit (1);
  907. }
  908. #endif
  909. return (ExceptionRaised);
  910. }
  911. // checking for NaN operands has priority over denormal exceptions
  912. if (isnanf (opd1) || isnanf (opd2)) {
  913. switch (imm8) {
  914. case IMM8_EQ:
  915. case IMM8_LT:
  916. case IMM8_LE:
  917. case IMM8_ORD:
  918. cmp_res = 0x0;
  919. break;
  920. case IMM8_UNORD:
  921. case IMM8_NEQ:
  922. case IMM8_NLT:
  923. case IMM8_NLE:
  924. cmp_res = 0xffffffff;
  925. break;
  926. default:
  927. ; // will never occur
  928. }
  929. XmmiEnv->Ieee->Result.OperandValid = 1;
  930. XmmiEnv->Ieee->Result.Value.Fp32Value = *((float *)&cmp_res);
  931. // may make U32Value
  932. XmmiEnv->Ieee->Status.Inexact = 0;
  933. XmmiEnv->Ieee->Status.Underflow = 0;
  934. XmmiEnv->Ieee->Status.Overflow = 0;
  935. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  936. // Note that the denormal flag will not be updated by _fpieee_flt (),
  937. // even if an operand is denormal
  938. if (invalid_exc) {
  939. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  940. XmmiEnv->Flags |= I_MASK;
  941. } else {
  942. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  943. }
  944. #ifdef _DEBUG_FPU
  945. // read status word
  946. sw = _status87 ();
  947. out_top = (sw >> 11) & 0x07;
  948. if (in_top != out_top) {
  949. printf ("XMMI_FP_Emulate () ERROR 11: in_top =%d != out_top = %d\n",
  950. in_top, out_top);
  951. exit (1);
  952. }
  953. #endif
  954. return (NoExceptionRaised);
  955. }
  956. // check whether a denormal exception has to be raised
  957. if (isdenormalf (opd1) || isdenormalf (opd2)) {
  958. denormal_exc = 1;
  959. XmmiEnv->Flags |= D_MASK;
  960. } else {
  961. denormal_exc = 0;
  962. }
  963. // if denormal_exc = 1, and denormal exceptions are enabled, take trap
  964. if (denormal_exc && !(XmmiEnv->Masks & D_MASK)) {
  965. // fill in part of the FP IEEE record
  966. Fill_FPIEEE_RECORD (XmmiEnv);
  967. // Note: the exception code is STATUS_FLOAT_INVALID in this case
  968. #ifdef _DEBUG_FPU
  969. // read status word
  970. sw = _status87 ();
  971. out_top = (sw >> 11) & 0x07;
  972. if (in_top != out_top) {
  973. printf ("XMMI_FP_Emulate () ERROR 12: in_top =%d != out_top = %d\n",
  974. in_top, out_top);
  975. exit (1);
  976. }
  977. #endif
  978. return (ExceptionRaised);
  979. }
  980. // no exception has to be raised, and no operand is a NaN; calculate
  981. // and deliver the result
  982. if (opd1 < opd2) {
  983. switch (imm8) {
  984. case IMM8_LT:
  985. case IMM8_LE:
  986. case IMM8_NEQ:
  987. case IMM8_ORD:
  988. cmp_res = 0xffffffff;
  989. break;
  990. case IMM8_EQ:
  991. case IMM8_UNORD:
  992. case IMM8_NLT:
  993. case IMM8_NLE:
  994. cmp_res = 0x0;
  995. break;
  996. default:
  997. ; // will never occur
  998. }
  999. } else if (opd1 > opd2) {
  1000. switch (imm8) {
  1001. case IMM8_NEQ:
  1002. case IMM8_NLT:
  1003. case IMM8_NLE:
  1004. case IMM8_ORD:
  1005. cmp_res = 0xffffffff;
  1006. break;
  1007. case IMM8_EQ:
  1008. case IMM8_LT:
  1009. case IMM8_LE:
  1010. case IMM8_UNORD:
  1011. cmp_res = 0x0;
  1012. break;
  1013. default:
  1014. ; // will never occur
  1015. }
  1016. } else if (opd1 == opd2) {
  1017. switch (imm8) {
  1018. case IMM8_EQ:
  1019. case IMM8_LE:
  1020. case IMM8_NLT:
  1021. case IMM8_ORD:
  1022. cmp_res = 0xffffffff;
  1023. break;
  1024. case IMM8_LT:
  1025. case IMM8_UNORD:
  1026. case IMM8_NEQ:
  1027. case IMM8_NLE:
  1028. cmp_res = 0x0;
  1029. break;
  1030. default:
  1031. ; // will never occur
  1032. }
  1033. } else { // could eliminate this case
  1034. #ifdef _DEBUG_FPU
  1035. fprintf (stderr, "XMMI_FP_Emulation () INTERNAL XMMI_FP_Emulate () ERROR for CMPPS/CMPSS\n");
  1036. #endif
  1037. }
  1038. XmmiEnv->Ieee->Result.OperandValid = 1;
  1039. XmmiEnv->Ieee->Result.Value.Fp32Value = *((float *)&cmp_res);
  1040. // may make U32Value
  1041. XmmiEnv->Ieee->Status.Inexact = 0;
  1042. XmmiEnv->Ieee->Status.Underflow = 0;
  1043. XmmiEnv->Ieee->Status.Overflow = 0;
  1044. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1045. // Note that the denormal flag will not be updated by _fpieee_flt (),
  1046. // even if an operand is denormal
  1047. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1048. #ifdef _DEBUG_FPU
  1049. // read status word
  1050. sw = _status87 ();
  1051. out_top = (sw >> 11) & 0x07;
  1052. if (in_top != out_top) {
  1053. printf ("XMMI_FP_Emulate () ERROR 13: in_top =%d != out_top = %d\n",
  1054. in_top, out_top);
  1055. exit (1);
  1056. }
  1057. #endif
  1058. return (NoExceptionRaised);
  1059. break;
  1060. case OP_COMISS:
  1061. case OP_UCOMISS:
  1062. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp32Value;
  1063. opd2 = XmmiEnv->Ieee->Operand2.Value.Fp32Value;
  1064. if (XmmiEnv->Daz) {
  1065. if (isdenormalf (opd1)) opd1 = opd1 * (float)0.0;
  1066. if (isdenormalf (opd2)) opd2 = opd2 * (float)0.0;
  1067. }
  1068. // check whether an invalid exception has to be raised
  1069. switch (XmmiEnv->Ieee->Operation) {
  1070. case OP_COMISS:
  1071. if (isnanf (opd1) || isnanf (opd2)) {
  1072. invalid_exc = 1;
  1073. } else
  1074. invalid_exc = 0;
  1075. break;
  1076. case OP_UCOMISS:
  1077. if (issnanf (opd1) || issnanf (opd2))
  1078. invalid_exc = 1;
  1079. else
  1080. invalid_exc = 0;
  1081. // guard against the case when an SNaN operand was converted to
  1082. // QNaN by compiler generated code
  1083. sw = _status87 ();
  1084. if (sw & _SW_INVALID) invalid_exc = 1;
  1085. break;
  1086. default:
  1087. ; // will never occur
  1088. }
  1089. // adjust operation code
  1090. XmmiEnv->Ieee->Operation = _FpCodeCompare;
  1091. // if invalid_exc = 1, and invalid exceptions are enabled, take trap
  1092. if (invalid_exc && !(XmmiEnv->Masks & I_MASK)) {
  1093. // fill in part of the FP IEEE record
  1094. Fill_FPIEEE_RECORD (XmmiEnv);
  1095. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1096. XmmiEnv->Flags |= I_MASK;
  1097. // Cause = Enable & Status
  1098. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  1099. // Note: the calling function will have to interpret the value returned
  1100. // by the user handler, if execution is to be continued
  1101. #ifdef _DEBUG_FPU
  1102. // read status word
  1103. sw = _status87 ();
  1104. out_top = (sw >> 11) & 0x07;
  1105. if (in_top != out_top) {
  1106. printf ("XMMI_FP_Emulate () ERROR 14: in_top =%d != out_top = %d\n",
  1107. in_top, out_top);
  1108. exit (1);
  1109. }
  1110. #endif
  1111. return (ExceptionRaised);
  1112. }
  1113. // EFlags:
  1114. // 333222222222211111111110000000000
  1115. // 210987654321098765432109876543210
  1116. // O SZ A P C
  1117. // checking for NaN operands has priority over denormal exceptions
  1118. if (isnanf (opd1) || isnanf (opd2)) {
  1119. // OF, SF, AF = 000, ZF, PF, CF = 111
  1120. XmmiEnv->EFlags = (XmmiEnv->EFlags & 0xfffff76f) | 0x00000045;
  1121. XmmiEnv->Ieee->Status.Inexact = 0;
  1122. XmmiEnv->Ieee->Status.Underflow = 0;
  1123. XmmiEnv->Ieee->Status.Overflow = 0;
  1124. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1125. // Note that the denormal flag will not be updated by _fpieee_flt (),
  1126. // even if an operand is denormal
  1127. if (invalid_exc) {
  1128. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1129. XmmiEnv->Flags |= I_MASK;
  1130. } else {
  1131. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1132. }
  1133. #ifdef _DEBUG_FPU
  1134. // read status word
  1135. sw = _status87 ();
  1136. out_top = (sw >> 11) & 0x07;
  1137. if (in_top != out_top) {
  1138. printf ("XMMI_FP_Emulate () ERROR 15: in_top =%d != out_top = %d\n",
  1139. in_top, out_top);
  1140. exit (1);
  1141. }
  1142. #endif
  1143. return (NoExceptionRaised);
  1144. }
  1145. // check whether a denormal exception has to be raised
  1146. if (isdenormalf (opd1) || isdenormalf (opd2)) {
  1147. denormal_exc = 1;
  1148. XmmiEnv->Flags |= D_MASK;
  1149. } else {
  1150. denormal_exc = 0;
  1151. }
  1152. // if denormal_exc = 1, and denormal exceptions are enabled, take trap
  1153. if (denormal_exc && !(XmmiEnv->Masks & D_MASK)) {
  1154. // fill in part of the FP IEEE record
  1155. Fill_FPIEEE_RECORD (XmmiEnv);
  1156. // Note: the exception code is STATUS_FLOAT_INVALID in this case
  1157. #ifdef _DEBUG_FPU
  1158. // read status word
  1159. sw = _status87 ();
  1160. out_top = (sw >> 11) & 0x07;
  1161. if (in_top != out_top) {
  1162. printf ("XMMI_FP_Emulate () ERROR 16: in_top =%d != out_top = %d\n",
  1163. in_top, out_top);
  1164. exit (1);
  1165. }
  1166. #endif
  1167. return (ExceptionRaised);
  1168. }
  1169. // no exception has to be raised, and no operand is a NaN; calculate
  1170. // and deliver the result
  1171. // 333222222222211111111110000000000
  1172. // 210987654321098765432109876543210
  1173. // O SZ A P C
  1174. if (opd1 > opd2) {
  1175. // OF, SF, AF = 000, ZF, PF, CF = 000
  1176. XmmiEnv->EFlags = XmmiEnv->EFlags & 0xfffff72a;
  1177. } else if (opd1 < opd2) {
  1178. // OF, SF, AF = 000, ZF, PF, CF = 001
  1179. XmmiEnv->EFlags = (XmmiEnv->EFlags & 0xfffff72b) | 0x00000001;
  1180. } else if (opd1 == opd2) {
  1181. // OF, SF, AF = 000, ZF, PF, CF = 100
  1182. XmmiEnv->EFlags = (XmmiEnv->EFlags & 0xfffff76a) | 0x00000040;
  1183. } else { // could eliminate this case
  1184. #ifdef _DEBUG_FPU
  1185. fprintf (stderr, "XMMI_FP_Emulation () INTERNAL XMMI_FP_Emulate () ERROR for COMISS/UCOMISS\n");
  1186. #endif
  1187. }
  1188. XmmiEnv->Ieee->Status.Inexact = 0;
  1189. XmmiEnv->Ieee->Status.Underflow = 0;
  1190. XmmiEnv->Ieee->Status.Overflow = 0;
  1191. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1192. // Note that the denormal flag will not be updated by _fpieee_flt (),
  1193. // even if an operand is denormal
  1194. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1195. #ifdef _DEBUG_FPU
  1196. // read status word
  1197. sw = _status87 ();
  1198. out_top = (sw >> 11) & 0x07;
  1199. if (in_top != out_top) {
  1200. printf ("XMMI_FP_Emulate () ERROR 17: in_top =%d != out_top = %d\n",
  1201. in_top, out_top);
  1202. exit (1);
  1203. }
  1204. #endif
  1205. return (NoExceptionRaised);
  1206. break;
  1207. case OP_CVTPI2PS:
  1208. case OP_CVTSI2SS:
  1209. iopd1 = XmmiEnv->Ieee->Operand1.Value.I32Value;
  1210. switch (XmmiEnv->Rc) {
  1211. case _FpRoundNearest:
  1212. rc = _RC_NEAR;
  1213. break;
  1214. case _FpRoundMinusInfinity:
  1215. rc = _RC_DOWN;
  1216. break;
  1217. case _FpRoundPlusInfinity:
  1218. rc = _RC_UP;
  1219. break;
  1220. case _FpRoundChopped:
  1221. rc = _RC_CHOP;
  1222. break;
  1223. default:
  1224. ; // internal error
  1225. }
  1226. // execute the operation and check whether the inexact flag is set
  1227. // and the respective exception is enabled
  1228. _control87 (rc | _PC_24 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  1229. // perform the conversion
  1230. __asm {
  1231. fnclex;
  1232. fild DWORD PTR iopd1; // exact
  1233. fstp DWORD PTR res; // may set P
  1234. }
  1235. // read status word
  1236. sw = _status87 ();
  1237. // if inexact traps are enabled and result is inexact, take inexact trap
  1238. if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) {
  1239. // fill in part of the FP IEEE record
  1240. Fill_FPIEEE_RECORD (XmmiEnv);
  1241. XmmiEnv->Ieee->Operation = _FpCodeConvert;
  1242. XmmiEnv->Ieee->Status.Inexact = 1;
  1243. XmmiEnv->Flags |= P_MASK;
  1244. XmmiEnv->Ieee->Cause.Inexact = 1;
  1245. XmmiEnv->Ieee->Result.OperandValid = 1;
  1246. XmmiEnv->Ieee->Result.Value.Fp32Value = res; // exact
  1247. #ifdef _DEBUG_FPU
  1248. // read status word
  1249. sw = _status87 ();
  1250. out_top = (sw >> 11) & 0x07;
  1251. if (in_top != out_top) {
  1252. printf ("XMMI_FP_Emulate () ERROR 18: in_top =%d != out_top = %d\n",
  1253. in_top, out_top);
  1254. exit (1);
  1255. }
  1256. #endif
  1257. return (ExceptionRaised);
  1258. }
  1259. // if it got here, then there is no trap to be taken; in this case,
  1260. // the result has to be delivered
  1261. XmmiEnv->Ieee->Result.OperandValid = 1;
  1262. XmmiEnv->Ieee->Result.Value.Fp32Value = res; // exact
  1263. if (sw & _SW_INEXACT) {
  1264. XmmiEnv->Ieee->Status.Inexact = 1;
  1265. XmmiEnv->Flags |= P_MASK;
  1266. } else {
  1267. XmmiEnv->Ieee->Status.Inexact = 0;
  1268. }
  1269. XmmiEnv->Ieee->Status.Underflow = 0;
  1270. XmmiEnv->Ieee->Status.Overflow = 0;
  1271. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1272. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1273. #ifdef _DEBUG_FPU
  1274. // read status word
  1275. sw = _status87 ();
  1276. out_top = (sw >> 11) & 0x07;
  1277. if (in_top != out_top) {
  1278. printf ("XMMI_FP_Emulate () ERROR 19: in_top =%d != out_top = %d\n",
  1279. in_top, out_top);
  1280. exit (1);
  1281. }
  1282. #endif
  1283. return (NoExceptionRaised);
  1284. break;
  1285. case OP_CVTPS2PI:
  1286. case OP_CVTSS2SI:
  1287. case OP_CVTTPS2PI:
  1288. case OP_CVTTSS2SI:
  1289. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp32Value;
  1290. if (XmmiEnv->Daz) {
  1291. if (isdenormalf (opd1)) opd1 = opd1 * (float)0.0;
  1292. }
  1293. // adjust the operation code
  1294. switch (XmmiEnv->Ieee->Operation) {
  1295. case OP_CVTPS2PI:
  1296. case OP_CVTSS2SI:
  1297. XmmiEnv->Ieee->Operation = _FpCodeConvert;
  1298. break;
  1299. case OP_CVTTPS2PI:
  1300. case OP_CVTTSS2SI:
  1301. XmmiEnv->Ieee->Operation = _FpCodeConvertTrunc;
  1302. break;
  1303. default:
  1304. ; // will never occur
  1305. }
  1306. switch (XmmiEnv->Ieee->Operation) {
  1307. case _FpCodeConvert:
  1308. switch (XmmiEnv->Rc) {
  1309. case _FpRoundNearest:
  1310. rc = _RC_NEAR;
  1311. break;
  1312. case _FpRoundMinusInfinity:
  1313. rc = _RC_DOWN;
  1314. break;
  1315. case _FpRoundPlusInfinity:
  1316. rc = _RC_UP;
  1317. break;
  1318. case _FpRoundChopped:
  1319. rc = _RC_CHOP;
  1320. break;
  1321. default:
  1322. ; // internal error
  1323. }
  1324. break;
  1325. case _FpCodeConvertTrunc:
  1326. rc = _RC_CHOP;
  1327. break;
  1328. default:
  1329. ; // will never occur
  1330. }
  1331. // execute the operation and check whether the inexact flag is set
  1332. // and the respective exceptions enabled
  1333. _control87 (rc | _PC_24 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  1334. // perform the conversion
  1335. __asm {
  1336. fnclex;
  1337. fld DWORD PTR opd1; // may set the denormal [ignored] or invalid
  1338. // status flags
  1339. fistp DWORD PTR ires; // may set the inexact or invalid status
  1340. // flags (for NaN or out-of-range)
  1341. }
  1342. // read status word
  1343. sw = _status87 ();
  1344. // if invalid flag is set, and invalid exceptions are enabled, take trap
  1345. if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) {
  1346. // fill in part of the FP IEEE record
  1347. Fill_FPIEEE_RECORD (XmmiEnv);
  1348. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1349. XmmiEnv->Flags |= I_MASK;
  1350. // Cause = Enable & Status
  1351. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  1352. #ifdef _DEBUG_FPU
  1353. // read status word
  1354. sw = _status87 ();
  1355. out_top = (sw >> 11) & 0x07;
  1356. if (in_top != out_top) {
  1357. printf ("XMMI_FP_Emulate () ERROR 20: in_top =%d != out_top = %d\n",
  1358. in_top, out_top);
  1359. exit (1);
  1360. }
  1361. #endif
  1362. return (ExceptionRaised);
  1363. }
  1364. // at this point, there are no enabled invalid exceptions; the
  1365. // instruction might have lead to an enabled inexact exception, or to
  1366. // no exception at all
  1367. XmmiEnv->Ieee->Result.Value.I32Value = ires;
  1368. // if inexact traps are enabled and result is inexact, take inexact trap
  1369. // (no flush-to-zero situation is possible)
  1370. if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) {
  1371. // fill in part of the FP IEEE record
  1372. Fill_FPIEEE_RECORD (XmmiEnv);
  1373. XmmiEnv->Ieee->Status.Inexact = 1;
  1374. XmmiEnv->Flags |= P_MASK;
  1375. XmmiEnv->Ieee->Cause.Inexact = 1;
  1376. XmmiEnv->Ieee->Result.OperandValid = 1;
  1377. #ifdef _DEBUG_FPU
  1378. // read status word
  1379. sw = _status87 ();
  1380. out_top = (sw >> 11) & 0x07;
  1381. if (in_top != out_top) {
  1382. printf ("XMMI_FP_Emulate () ERROR 21: in_top =%d != out_top = %d\n",
  1383. in_top, out_top);
  1384. exit (1);
  1385. }
  1386. #endif
  1387. return (ExceptionRaised);
  1388. }
  1389. // if it got here, then there is no trap to be taken; return result
  1390. XmmiEnv->Ieee->Result.OperandValid = 1;
  1391. if (sw & _SW_INEXACT) {
  1392. XmmiEnv->Ieee->Status.Inexact = 1;
  1393. XmmiEnv->Flags |= P_MASK;
  1394. } else {
  1395. XmmiEnv->Ieee->Status.Inexact = 0;
  1396. }
  1397. XmmiEnv->Ieee->Status.Underflow = 0;
  1398. XmmiEnv->Ieee->Status.Overflow = 0;
  1399. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1400. if (sw & _SW_INVALID) {
  1401. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1402. XmmiEnv->Flags |= I_MASK;
  1403. } else {
  1404. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1405. }
  1406. #ifdef _DEBUG_FPU
  1407. // read status word
  1408. sw = _status87 ();
  1409. out_top = (sw >> 11) & 0x07;
  1410. if (in_top != out_top) {
  1411. printf ("XMMI_FP_Emulate () ERROR 22: in_top =%d != out_top = %d\n",
  1412. in_top, out_top);
  1413. exit (1);
  1414. }
  1415. #endif
  1416. return (NoExceptionRaised);
  1417. break;
  1418. case OP_MAXPS:
  1419. case OP_MAXSS:
  1420. case OP_MINPS:
  1421. case OP_MINSS:
  1422. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp32Value;
  1423. opd2 = XmmiEnv->Ieee->Operand2.Value.Fp32Value;
  1424. if (XmmiEnv->Daz) {
  1425. if (isdenormalf (opd1)) opd1 = opd1 * (float)0.0;
  1426. if (isdenormalf (opd2)) opd2 = opd2 * (float)0.0;
  1427. }
  1428. // adjust operation code
  1429. switch (XmmiEnv->Ieee->Operation) {
  1430. case OP_MAXPS:
  1431. case OP_MAXSS:
  1432. XmmiEnv->Ieee->Operation = _FpCodeFmax;
  1433. break;
  1434. case OP_MINPS:
  1435. case OP_MINSS:
  1436. XmmiEnv->Ieee->Operation = _FpCodeFmin;
  1437. break;
  1438. default:
  1439. ; // will never occur
  1440. }
  1441. // check whether an invalid exception has to be raised
  1442. if (isnanf (opd1) || isnanf (opd2))
  1443. invalid_exc = 1;
  1444. else
  1445. invalid_exc = 0;
  1446. // if invalid_exc = 1, and invalid exceptions are enabled, take trap
  1447. if (invalid_exc && !(XmmiEnv->Masks & I_MASK)) {
  1448. // fill in part of the FP IEEE record
  1449. Fill_FPIEEE_RECORD (XmmiEnv);
  1450. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1451. XmmiEnv->Flags |= I_MASK;
  1452. // Cause = Enable & Status
  1453. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  1454. #ifdef _DEBUG_FPU
  1455. // read status word
  1456. sw = _status87 ();
  1457. out_top = (sw >> 11) & 0x07;
  1458. if (in_top != out_top) {
  1459. printf ("XMMI_FP_Emulate () ERROR 23: in_top =%d != out_top = %d\n",
  1460. in_top, out_top);
  1461. exit (1);
  1462. }
  1463. #endif
  1464. return (ExceptionRaised);
  1465. }
  1466. // checking for NaN operands has priority over denormal exceptions
  1467. if (invalid_exc) {
  1468. XmmiEnv->Ieee->Result.OperandValid = 1;
  1469. XmmiEnv->Ieee->Result.Value.Fp32Value = opd2;
  1470. XmmiEnv->Ieee->Status.Inexact = 0;
  1471. XmmiEnv->Ieee->Status.Underflow = 0;
  1472. XmmiEnv->Ieee->Status.Overflow = 0;
  1473. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1474. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1475. XmmiEnv->Flags |= I_MASK;
  1476. #ifdef _DEBUG_FPU
  1477. // read status word
  1478. sw = _status87 ();
  1479. out_top = (sw >> 11) & 0x07;
  1480. if (in_top != out_top) {
  1481. printf ("XMMI_FP_Emulate () ERROR 24: in_top =%d != out_top = %d\n",
  1482. in_top, out_top);
  1483. exit (1);
  1484. }
  1485. #endif
  1486. return (NoExceptionRaised);
  1487. }
  1488. // check whether a denormal exception has to be raised
  1489. if (isdenormalf (opd1) || isdenormalf (opd2)) {
  1490. denormal_exc = 1;
  1491. XmmiEnv->Flags |= D_MASK;
  1492. } else {
  1493. denormal_exc = 0;
  1494. }
  1495. // if denormal_exc = 1, and denormal exceptions are enabled, take trap
  1496. if (denormal_exc && !(XmmiEnv->Masks & D_MASK)) {
  1497. // fill in part of the FP IEEE record
  1498. Fill_FPIEEE_RECORD (XmmiEnv);
  1499. // Note: the exception code is STATUS_FLOAT_INVALID in this case
  1500. #ifdef _DEBUG_FPU
  1501. // read status word
  1502. sw = _status87 ();
  1503. out_top = (sw >> 11) & 0x07;
  1504. if (in_top != out_top) {
  1505. printf ("XMMI_FP_Emulate () ERROR 25: in_top =%d != out_top = %d\n",
  1506. in_top, out_top);
  1507. exit (1);
  1508. }
  1509. #endif
  1510. return (ExceptionRaised);
  1511. }
  1512. // no exception has to be raised, and no operand is a NaN; calculate
  1513. // and deliver the result
  1514. if (opd1 < opd2) {
  1515. switch (XmmiEnv->Ieee->Operation) {
  1516. case _FpCodeFmax:
  1517. XmmiEnv->Ieee->Result.Value.Fp32Value = opd2;
  1518. break;
  1519. case _FpCodeFmin:
  1520. XmmiEnv->Ieee->Result.Value.Fp32Value = opd1;
  1521. break;
  1522. default:
  1523. ; // will never occur
  1524. }
  1525. } else if (opd1 > opd2) {
  1526. switch (XmmiEnv->Ieee->Operation) {
  1527. case _FpCodeFmax:
  1528. XmmiEnv->Ieee->Result.Value.Fp32Value = opd1;
  1529. break;
  1530. case _FpCodeFmin:
  1531. XmmiEnv->Ieee->Result.Value.Fp32Value = opd2;
  1532. break;
  1533. default:
  1534. ; // will never occur
  1535. }
  1536. } else if (opd1 == opd2) {
  1537. XmmiEnv->Ieee->Result.Value.Fp32Value = opd2;
  1538. } else { // could eliminate this case
  1539. #ifdef _DEBUG_FPU
  1540. fprintf (stderr, "XMMI_FP_Emulation () INTERNAL XMMI_FP_Emulate () ERROR for MAXPS/MAXSS/MINPS/MINSS\n");
  1541. #endif
  1542. }
  1543. XmmiEnv->Ieee->Result.OperandValid = 1;
  1544. XmmiEnv->Ieee->Status.Inexact = 0;
  1545. XmmiEnv->Ieee->Status.Underflow = 0;
  1546. XmmiEnv->Ieee->Status.Overflow = 0;
  1547. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1548. // Note that the denormal flag will not be updated by _fpieee_flt (),
  1549. // even if an operand is denormal
  1550. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1551. #ifdef _DEBUG_FPU
  1552. // read status word
  1553. sw = _status87 ();
  1554. out_top = (sw >> 11) & 0x07;
  1555. if (in_top != out_top) {
  1556. printf ("XMMI_FP_Emulate () ERROR 26: in_top =%d != out_top = %d\n",
  1557. in_top, out_top);
  1558. exit (1);
  1559. }
  1560. #endif
  1561. return (NoExceptionRaised);
  1562. break;
  1563. case OP_SQRTPS:
  1564. case OP_SQRTSS:
  1565. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp32Value;
  1566. if (XmmiEnv->Daz) {
  1567. if (isdenormalf (opd1)) opd1 = opd1 * (float)0.0;
  1568. }
  1569. // adjust operation code
  1570. XmmiEnv->Ieee->Operation = _FpCodeSquareRoot;
  1571. // execute the operation and check whether the invalid, denormal, or
  1572. // inexact flags are set and the respective exceptions enabled
  1573. switch (XmmiEnv->Rc) {
  1574. case _FpRoundNearest:
  1575. rc = _RC_NEAR;
  1576. break;
  1577. case _FpRoundMinusInfinity:
  1578. rc = _RC_DOWN;
  1579. break;
  1580. case _FpRoundPlusInfinity:
  1581. rc = _RC_UP;
  1582. break;
  1583. case _FpRoundChopped:
  1584. rc = _RC_CHOP;
  1585. break;
  1586. default:
  1587. ; // internal error
  1588. }
  1589. _control87 (rc | _PC_24 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  1590. // perform the square root
  1591. __asm {
  1592. fnclex;
  1593. fld DWORD PTR opd1; // may set the denormal or invalid status flags
  1594. fsqrt; // may set the inexact or invalid status flags
  1595. fstp DWORD PTR res; // exact
  1596. }
  1597. // read status word
  1598. sw = _status87 ();
  1599. if (sw & _SW_INVALID) sw = sw & ~0x00080000; // clr D flag for sqrt(-den)
  1600. // if invalid flag is set, and invalid exceptions are enabled, take trap
  1601. if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) {
  1602. // fill in part of the FP IEEE record
  1603. Fill_FPIEEE_RECORD (XmmiEnv);
  1604. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1605. XmmiEnv->Flags |= I_MASK;
  1606. // Cause = Enable & Status
  1607. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  1608. #ifdef _DEBUG_FPU
  1609. // read status word
  1610. sw = _status87 ();
  1611. out_top = (sw >> 11) & 0x07;
  1612. if (in_top != out_top) {
  1613. printf ("XMMI_FP_Emulate () ERROR 27: in_top =%d != out_top = %d\n",
  1614. in_top, out_top);
  1615. exit (1);
  1616. }
  1617. #endif
  1618. return (ExceptionRaised);
  1619. }
  1620. if (sw & _SW_DENORMAL) XmmiEnv->Flags |= D_MASK;
  1621. // if denormal flag is set, and denormal exceptions are enabled, take trap
  1622. if (!(XmmiEnv->Masks & D_MASK) && (sw & _SW_DENORMAL)) {
  1623. // fill in part of the FP IEEE record
  1624. Fill_FPIEEE_RECORD (XmmiEnv);
  1625. #ifdef _DEBUG_FPU
  1626. // read status word
  1627. sw = _status87 ();
  1628. out_top = (sw >> 11) & 0x07;
  1629. if (in_top != out_top) {
  1630. printf ("XMMI_FP_Emulate () ERROR 28: in_top =%d != out_top = %d\n",
  1631. in_top, out_top);
  1632. exit (1);
  1633. }
  1634. #endif
  1635. return (ExceptionRaised);
  1636. }
  1637. // the result cannot be tiny
  1638. // at this point, there are no enabled I or D or exceptions; the instr.
  1639. // might lead to an enabled inexact exception or to no exception (this
  1640. // includes the case of a NaN or negative operand); exceptions must have
  1641. // been disabled before calling this function; an inexact exception is
  1642. // reported on the fsqrt
  1643. // if (the MXCSR inexact traps are disabled or the inexact flag is clear)
  1644. // then deliver the result (the status flags are sticky, so they are
  1645. // all set correctly already)
  1646. // if it got here, then there is either an inexact trap to be taken, or
  1647. // no trap at all
  1648. XmmiEnv->Ieee->Result.Value.Fp32Value = res; // exact
  1649. // if inexact traps are enabled and result is inexact, take inexact trap
  1650. if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) {
  1651. // fill in part of the FP IEEE record
  1652. Fill_FPIEEE_RECORD (XmmiEnv);
  1653. XmmiEnv->Ieee->Status.Inexact = 1;
  1654. XmmiEnv->Flags |= P_MASK;
  1655. XmmiEnv->Ieee->Cause.Inexact = 1;
  1656. XmmiEnv->Ieee->Result.OperandValid = 1;
  1657. #ifdef _DEBUG_FPU
  1658. // read status word
  1659. sw = _status87 ();
  1660. out_top = (sw >> 11) & 0x07;
  1661. if (in_top != out_top) {
  1662. printf ("XMMI_FP_Emulate () ERROR 29: in_top =%d != out_top = %d\n",
  1663. in_top, out_top);
  1664. exit (1);
  1665. }
  1666. #endif
  1667. return (ExceptionRaised);
  1668. }
  1669. // no trap was taken
  1670. XmmiEnv->Ieee->Result.OperandValid = 1;
  1671. XmmiEnv->Ieee->Status.Underflow = 0;
  1672. XmmiEnv->Ieee->Status.Overflow = 0;
  1673. if (sw & _SW_INEXACT) {
  1674. XmmiEnv->Ieee->Status.Inexact = 1;
  1675. XmmiEnv->Flags |= P_MASK;
  1676. } else {
  1677. XmmiEnv->Ieee->Status.Inexact = 0;
  1678. }
  1679. // note that there is no way to
  1680. // communicate to the caller that the denormal flag was set - we count
  1681. // on the XMMI instruction to have set the denormal flag in MXCSR if
  1682. // needed, regardless of the other components of the input operands
  1683. // (invalid or not); the caller will have to update the inexact flag
  1684. // in MXCSR
  1685. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1686. if (sw & _SW_INVALID) {
  1687. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1688. XmmiEnv->Flags = I_MASK; // no other flags set if invalid is set
  1689. } else {
  1690. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1691. }
  1692. #ifdef _DEBUG_FPU
  1693. // read status word
  1694. sw = _status87 ();
  1695. out_top = (sw >> 11) & 0x07;
  1696. if (in_top != out_top) {
  1697. printf ("XMMI_FP_Emulate () ERROR 30: in_top =%d != out_top = %d\n",
  1698. in_top, out_top);
  1699. exit (1);
  1700. }
  1701. #endif
  1702. return (NoExceptionRaised);
  1703. break;
  1704. case OP_UNSPEC:
  1705. #ifdef _DEBUG_FPU
  1706. fprintf (stderr, "XMMI_FP_Emulation internal error: unknown operation code OP_UNSPEC\n");
  1707. #endif
  1708. break;
  1709. default:
  1710. #ifdef _DEBUG_FPU
  1711. fprintf (stderr, "XMMI_FP_Emulation internal error: unknown operation code %d\n", XmmiEnv->Ieee->Operation);
  1712. #endif
  1713. break;
  1714. }
  1715. }
  1716. static int
  1717. issnanf (float f)
  1718. {
  1719. // checks whether f is a signaling NaN
  1720. unsigned int *fp;
  1721. fp = (unsigned int *)&f;
  1722. if (((fp[0] & 0x7fc00000) == 0x7f800000) && ((fp[0] & 0x003fffff) != 0))
  1723. return (1);
  1724. else
  1725. return (0);
  1726. }
  1727. static int
  1728. isnanf (float f)
  1729. {
  1730. // checks whether f is a NaN
  1731. unsigned int *fp;
  1732. fp = (unsigned int *)&f;
  1733. if (((fp[0] & 0x7f800000) == 0x7f800000) && ((fp[0] & 0x007fffff) != 0))
  1734. return (1);
  1735. else
  1736. return (0);
  1737. }
  1738. static float
  1739. quietf (float f)
  1740. {
  1741. // makes a signaling NaN quiet, and leaves a quiet NaN unchanged; does
  1742. // not check that the input value f is a NaN
  1743. unsigned int *fp;
  1744. fp = (unsigned int *)&f;
  1745. *fp = *fp | 0x00400000;
  1746. return (f);
  1747. }
  1748. static int
  1749. isdenormalf (float f)
  1750. {
  1751. // checks whether f is a denormal
  1752. unsigned int *fp;
  1753. fp = (unsigned int *)&f;
  1754. if ((fp[0] & 0x7f800000) == 0x0 && (fp[0] & 0x007fffff) != 0x0)
  1755. return (1);
  1756. else
  1757. return (0);
  1758. }
  1759. static void Fill_FPIEEE_RECORD (PXMMI_ENV XmmiEnv)
  1760. {
  1761. // fill in part of the FP IEEE record
  1762. XmmiEnv->Ieee->RoundingMode = XmmiEnv->Rc;
  1763. XmmiEnv->Ieee->Precision = XmmiEnv->Precision;
  1764. XmmiEnv->Ieee->Enable.Inexact = !(XmmiEnv->Masks & P_MASK);
  1765. XmmiEnv->Ieee->Enable.Underflow = !(XmmiEnv->Masks & U_MASK);
  1766. XmmiEnv->Ieee->Enable.Overflow = !(XmmiEnv->Masks & O_MASK);
  1767. XmmiEnv->Ieee->Enable.ZeroDivide = !(XmmiEnv->Masks & Z_MASK);
  1768. XmmiEnv->Ieee->Enable.InvalidOperation = !(XmmiEnv->Masks & I_MASK);
  1769. XmmiEnv->Ieee->Status.Inexact = 0;
  1770. XmmiEnv->Ieee->Status.Underflow = 0;
  1771. XmmiEnv->Ieee->Status.Overflow = 0;
  1772. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1773. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1774. // Cause = Enable & Status
  1775. XmmiEnv->Ieee->Cause.Inexact = 0;
  1776. XmmiEnv->Ieee->Cause.Underflow = 0;
  1777. XmmiEnv->Ieee->Cause.Overflow = 0;
  1778. XmmiEnv->Ieee->Cause.ZeroDivide = 0;
  1779. XmmiEnv->Ieee->Cause.InvalidOperation = 0;
  1780. }