Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2834 lines
87 KiB

  1. /*****************************************************************************
  2. * *
  3. * Intel Confidential *
  4. * *
  5. * *
  6. * XMMI2_FP_emulate(): WNI FP instruction emulation for the FP IEEE filter *
  7. * *
  8. * *
  9. * History: *
  10. * Marius Cornea-Hasegan, Nov 1999; added DAZ Oct 2000 *
  11. * marius.cornea@intel.com *
  12. * *
  13. *****************************************************************************/
  14. //#define _DEBUG_FPU
  15. // XMMI2_FP_Emulation () receives the input operands of an XMMI2 FP instruction
  16. // (operating on double-precision floating-point numbers and/or signed
  17. // integers), that might cause a floating-point exception (enabled or not).
  18. //
  19. // Arguments: PXMMI_ENV XmmiEnv
  20. //
  21. // The type of every field (INPUT or OUTPUT) is indicated below:
  22. //
  23. // typedef struct _XMMI_ENV {
  24. // ULONG Masks; //Mask values from MxCsr INPUT
  25. // ULONG Fz; //Flush to Zero INPUT
  26. // ULONG Rc; //Rounding INPUT
  27. // ULONG Precision; //Precision INPUT
  28. // ULONG Imm8; //imm8 predicate INPUT
  29. // ULONG EFlags; //EFlags INPUT/OUTPUT
  30. // _FPIEEE_RECORD *Ieee; //FP IEEE Record INPUT/OUTPUT,
  31. // field dependent
  32. // } XMMI_ENV, *PXMMI_ENV;
  33. //
  34. // The _FP_IEEE record and the _FPIEEE_VALUE are defined as:
  35. //
  36. // typedef struct {
  37. // unsigned int RoundingMode : 2; OUTPUT
  38. // unsigned int Precision : 3; OUTPUT
  39. // unsigned int Operation :12; INPUT
  40. // _FPIEEE_EXCEPTION_FLAGS Cause; OUTPUT
  41. // _FPIEEE_EXCEPTION_FLAGS Enable; OUTPUT
  42. // _FPIEEE_EXCEPTION_FLAGS Status; OUTPUT
  43. // _FPIEEE_VALUE Operand1; INPUT
  44. // _FPIEEE_VALUE Operand2; INPUT
  45. // _FPIEEE_VALUE Result; INPUT/OUTPUT,
  46. // field dependent
  47. // } _FPIEEE_RECORD, *PFPIEEE_RECORD;
  48. //
  49. // typedef struct {
  50. // union {
  51. // _FP32 Fp64Value;
  52. // _FP64 Fp64Value;
  53. // _FP80 Fp80Value;
  54. // _FP128 Fp128Value;
  55. // _I16 I16Value;
  56. // _I32 I32Value;
  57. // _I64 I64Value;
  58. // _U16 U16Value;
  59. // _U32 U32Value;
  60. // _U64 U64Value;
  61. // _BCD80 Bcd80Value;
  62. // char *StringValue;
  63. // int CompareValue;
  64. // } Value; INPUT for operands,
  65. // OUTPUT for result
  66. //
  67. // unsigned int OperandValid : 1; INPUT for operands
  68. // INPUT/OUTPUT for result
  69. // unsigned int Format : 4; INPUT
  70. //
  71. // } _FPIEEE_VALUE;
  72. //
  73. // Return Value:
  74. // ExceptionRaised if an enabled floating-point exception condition is
  75. // detected; in this case, the fields of XmmiEnv->Ieee are filled in
  76. // appropriately to be passed directly to a user exception handler; the
  77. // XmmiEnv->Ieee->Cause bits indicate the cause of the exception, but if
  78. // a denormal exception occurred, then no XmmiEnv->Ieee->Cause bit is set;
  79. // upon return from the user handler,the caller of XMMI2_FP_emulate should
  80. // interpret the result for a compare instruction (CMPPD, CMPPD, COMISD,
  81. // UCOMISD); the Enable, Rounding, and Precision fields in _FPIEEE_RECORD
  82. // have to be checked too for possible changes by the user handler
  83. //
  84. // NoExceptionRaised if no floating-point exception condition occurred, or
  85. // if a disabled floating-point exception occurred; in this case,
  86. // XmmiEnv->Ieee->Result.Value contains the instruction's result,
  87. // XmmiEnv->Ieee->Status contains the IEEE floating-point status flags
  88. //
  89. // Implementation Notes:
  90. //
  91. // - the operation code in XmmiEnv->Ieee->Operation is changed as expected
  92. // by a user exception handler (even if no exception is raised):
  93. // from OP_ADDPD, OP_ADDSD to _FpCodeAdd
  94. // from OP_SUBPD, OP_SUBSD to _FpCodeSubtract
  95. // from OP_MULPD, OP_MULSD to _FpCodeMultiply
  96. // from OP_DIVPD, OP_DIVSD to _FpCodeDivide
  97. // from OP_CMPPD, OP_CMPSD to _FpCodeCompare
  98. // from OP_COMISD, OP_UCOMISD to _FpCodeCompare
  99. // from OP_CVTDQ2PS, OP_CVTPS2DQ, OP_CVTPD2PI, OP_CVTSD2SI, OP_CVTPD2DQ,
  100. // OP_CVTPS2PD, OP_CVTSS2SD, OP_CVTPD2PS, OP_CVTSD2SS to _FpCodeConvert
  101. // from OP_CVTTPS2DQ, OP_CVTTPD2PI, OP_CVTTSD2SI, OP_CVTTPD2DQ
  102. // to _FpCodeConvertTrunc
  103. // from OP_MAXPD, OP_MAXSD to _FpCodeMax
  104. // from OP_MINPD, OP_MINSD to _FpCodeMin
  105. // from OP_SQRTPD, OP_SQRTSD to _FpCodeSquareRoot
  106. //
  107. //
  108. #include <wtypes.h>
  109. #include <trans.h>
  110. #include <float.h>
  111. #include "xmmi_types.h"
  112. #include "filter.h"
  113. #ifdef _XMMI_DEBUG
  114. #include "temp_context.h"
  115. #include "debug.h"
  116. #endif
  117. // masks for individual status word bits
  118. #define P_MASK 0x20
  119. #define U_MASK 0x10
  120. #define O_MASK 0x08
  121. #define Z_MASK 0x04
  122. #define D_MASK 0x02
  123. #define I_MASK 0x01
  124. // 32-bit constants
  125. static unsigned ZEROFA[] = {0x00000000};
  126. #define ZEROF *(float *) ZEROFA
  127. static unsigned NZEROFA[] = {0x80000000};
  128. #define NZEROF *(float *) NZEROFA
  129. // 64-bit constants
  130. static unsigned ZERODA[] = {0x00000000, 0x00000000};
  131. #define ZEROD *(double *) ZERODA
  132. static unsigned NZERODA[] = {0x00000000, 0x80000000};
  133. #define NZEROD *(double *) NZERODA
  134. static unsigned POSINFDA[] = {0x00000000, 0x7ff00000};
  135. #define POSINFD *(float *)POSINFDA
  136. static unsigned NEGINFDA[] = {0x00000000, 0xfff00000};
  137. #define NEGINFD *(float *)NEGINFDA
  138. #ifdef _DEBUG_FPU
  139. static unsigned QNANINDEFDA[] = {0x00000000, 0xffc00000};
  140. #define QNANINDEFD *(float *)QNANINDEFDA
  141. #endif
  142. static unsigned MIN_SINGLE_NORMALA [] = {0x00000000, 0x38100000};
  143. // +1.0 * 2^-126
  144. #define MIN_SINGLE_NORMAL *(double *)MIN_SINGLE_NORMALA
  145. static unsigned MAX_SINGLE_NORMALA [] = {0x70000000, 0x47efffff};
  146. // +1.1...1*2^127
  147. #define MAX_SINGLE_NORMAL *(double *)MAX_SINGLE_NORMALA
  148. static unsigned TWO_TO_192A[] = {0x00000000, 0x4bf00000};
  149. #define TWO_TO_192 *(double *)TWO_TO_192A
  150. static unsigned TWO_TO_M192A[] = {0x00000000, 0x33f00000};
  151. #define TWO_TO_M192 *(double *)TWO_TO_M192A
  152. // 80-bit constants
  153. static unsigned POSINFDEA[] = {0x00000000, 0x80000000, 0x00007fff};
  154. #define POSINFDE *(float *)POSINFDEA
  155. static unsigned NEGINFDEA[] = {0x00000000, 0x80000000, 0x0000ffff};
  156. #define NEGINFDE *(float *)NEGINFDEA
  157. static unsigned MIN_DOUBLE_NORMALA [] = {0x00000000, 0x80000000, 0x00003c01};
  158. // +1.0 * 2^-1022
  159. #define MIN_DOUBLE_NORMAL *(double *)MIN_DOUBLE_NORMALA
  160. static unsigned MAX_DOUBLE_NORMALA [] = {0xfffff800, 0xffffffff, 0x000043fe};
  161. // +1.1...1*2^1023
  162. #define MAX_DOUBLE_NORMAL *(double *)MAX_DOUBLE_NORMALA
  163. static unsigned TWO_TO_1536A[] = {0x00000000, 0x80000000, 0x000045ff};
  164. #define TWO_TO_1536 *(double *)TWO_TO_1536A
  165. static unsigned TWO_TO_M1536A[] = {0x00000000, 0x80000000, 0x000039ff};
  166. #define TWO_TO_M1536 *(double *)TWO_TO_M1536A
  167. // auxiliary functions
  168. static void Fill_FPIEEE_RECORD (PXMMI_ENV XmmiEnv);
  169. static int iszerod (double);
  170. static int isinfd (double);
  171. static int issnand (double);
  172. static int isnand (double);
  173. static double quietd (double);
  174. static int isdenormald (double);
  175. static int isdenormalf (float f);
  176. ULONG
  177. XMMI2_FP_Emulation (PXMMI_ENV XmmiEnv)
  178. {
  179. float opd24, res24;
  180. double opd1, opd2, res, dbl_res24;
  181. int iopd1; // for conversions from int to float
  182. int ires; // for conversions from float to int
  183. char dbl_ext_res64[10];
  184. // needed to check tininess, to provide a scaled result to
  185. // an underflow/overflow trap handler, and in flush-to-zero
  186. double dbl_res64;
  187. unsigned int result_tiny;
  188. unsigned int result_huge;
  189. unsigned int rc, sw;
  190. unsigned short sw1, sw2, sw3, sw4;
  191. unsigned long imm8;
  192. unsigned int invalid_exc;
  193. unsigned int denormal_exc;
  194. unsigned __int64 cmp_res;
  195. unsigned char min_double_normal[10];
  196. unsigned char max_double_normal[10];
  197. unsigned char posinfde[10];
  198. unsigned char neginfde[10];
  199. unsigned char two_to_1536[10];
  200. unsigned char two_to_m1536[10];
  201. // Note that ExceptionCode is always STATUS_FLOAT_MULTIPLE_FAULTS in the
  202. // calling routine, so we have to check first for faults, and then for traps
  203. #ifdef _DEBUG_FPU
  204. unsigned int in_top;
  205. unsigned int out_top;
  206. char fp_env[108];
  207. unsigned short int *control_word, *status_word, *tag_word;
  208. // read status word
  209. sw = _status87 ();
  210. in_top = (sw >> 11) & 0x07;
  211. if (in_top != 0x0)
  212. printf ("XMMI2_FP_Emulation WARNING: in_top = %d\n", in_top);
  213. __asm {
  214. fnsave fp_env;
  215. }
  216. control_word = (unsigned short *)fp_env;
  217. status_word = (unsigned short *)(fp_env + 2);
  218. tag_word = (unsigned short *)(fp_env + 8);
  219. if (*tag_word != 0xffff)
  220. printf ("XMMI2_FP_Emulation WARNING: tag_word = %x\n",
  221. *tag_word);
  222. #endif
  223. _asm {
  224. fninit;
  225. }
  226. #ifdef _DEBUG_FPU
  227. // read status word
  228. sw = _status87 ();
  229. in_top = (sw >> 11) & 0x07;
  230. if (in_top != 0x0)
  231. printf ("XMMI2_FP_Emulation () XMMI2_FP_Emulation () ERROR: in_top = %d\n", in_top);
  232. __asm {
  233. fnsave fp_env;
  234. }
  235. tag_word = (unsigned short *)(fp_env + 8);
  236. if (*tag_word != 0xffff) {
  237. printf ("XMMI2_FP_Emulation () XMMI2_FP_Emulation () ERROR: tag_word = %x\n",
  238. *tag_word);
  239. printf ("control, status, tag = %x %x %x %x %x %x\n",
  240. fp_env[0] & 0xff, fp_env[1] & 0xff, fp_env[4] & 0xff,
  241. fp_env[5] & 0xff, fp_env[8] & 0xff, fp_env[9] & 0xff);
  242. }
  243. #endif
  244. #ifdef _XMMI_DEBUG
  245. print_FPIEEE_RECORD (XmmiEnv);
  246. #endif
  247. result_tiny = 0;
  248. result_huge = 0;
  249. XmmiEnv->Ieee->RoundingMode = XmmiEnv->Rc;
  250. XmmiEnv->Ieee->Precision = XmmiEnv->Precision;
  251. switch (XmmiEnv->Ieee->Operation) {
  252. case OP_ADDPD:
  253. case OP_ADDSD:
  254. case OP_SUBPD:
  255. case OP_SUBSD:
  256. case OP_MULPD:
  257. case OP_MULSD:
  258. case OP_DIVPD:
  259. case OP_DIVSD:
  260. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value;
  261. opd2 = XmmiEnv->Ieee->Operand2.Value.Fp64Value;
  262. if (XmmiEnv->Daz) {
  263. if (isdenormald (opd1)) opd1 = opd1 * 0.0;
  264. if (isdenormald (opd2)) opd2 = opd2 * 0.0;
  265. }
  266. // adjust operation code
  267. switch (XmmiEnv->Ieee->Operation) {
  268. case OP_ADDPD:
  269. case OP_ADDSD:
  270. XmmiEnv->Ieee->Operation = _FpCodeAdd;
  271. break;
  272. case OP_SUBPD:
  273. case OP_SUBSD:
  274. XmmiEnv->Ieee->Operation = _FpCodeSubtract;
  275. break;
  276. case OP_MULPD:
  277. case OP_MULSD:
  278. XmmiEnv->Ieee->Operation = _FpCodeMultiply;
  279. break;
  280. case OP_DIVPD:
  281. case OP_DIVSD:
  282. XmmiEnv->Ieee->Operation = _FpCodeDivide;
  283. break;
  284. default:
  285. ; // will never occur
  286. }
  287. // execute the operation and check whether the invalid, denormal, or
  288. // divide by zero flags are set and the respective exceptions enabled
  289. switch (XmmiEnv->Rc) {
  290. case _FpRoundNearest:
  291. rc = _RC_NEAR;
  292. break;
  293. case _FpRoundMinusInfinity:
  294. rc = _RC_DOWN;
  295. break;
  296. case _FpRoundPlusInfinity:
  297. rc = _RC_UP;
  298. break;
  299. case _FpRoundChopped:
  300. rc = _RC_CHOP;
  301. break;
  302. default:
  303. ; // internal error
  304. }
  305. _control87 (rc | _PC_53 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  306. // compute result and round to the destination precision, with
  307. // "unbounded" exponent (first IEEE rounding)
  308. switch (XmmiEnv->Ieee->Operation) {
  309. case _FpCodeAdd:
  310. // perform the add
  311. __asm {
  312. fnclex;
  313. // load input operands
  314. fld QWORD PTR opd1; // may set the denormal or invalid status flags
  315. fld QWORD PTR opd2; // may set the denormal or invalid status flags
  316. faddp st(1), st(0); // may set the inexact or invalid status flags
  317. // store result
  318. fstp QWORD PTR dbl_res64;
  319. }
  320. __asm {
  321. fnclex;
  322. // load input operands
  323. fld QWORD PTR opd1; // may set the denormal or invalid status flags
  324. fld QWORD PTR opd2; // may set the denormal or invalid status flags
  325. faddp st(1), st(0); // may set the inexact or invalid status flags
  326. // store result
  327. fstp TBYTE PTR dbl_ext_res64; // exact
  328. }
  329. break;
  330. case _FpCodeSubtract:
  331. // perform the subtract
  332. __asm {
  333. fnclex;
  334. // load input operands
  335. fld QWORD PTR opd1; // may set the denormal or invalid status flags
  336. fld QWORD PTR opd2; // may set the denormal or invalid status flags
  337. fsubp st(1), st(0); // may set the inexact or invalid status flags
  338. // store result
  339. fstp QWORD PTR dbl_res64;
  340. }
  341. __asm {
  342. fnclex;
  343. // load input operands
  344. fld QWORD PTR opd1; // may set the denormal or invalid status flags
  345. fld QWORD PTR opd2; // may set the denormal or invalid status flags
  346. fsubp st(1), st(0); // may set the inexact or invalid status flags
  347. // store result
  348. fstp TBYTE PTR dbl_ext_res64; // exact
  349. }
  350. break;
  351. case _FpCodeMultiply:
  352. // perform the multiply
  353. __asm {
  354. fnclex;
  355. // load input operands
  356. fld QWORD PTR opd1; // may set the denormal or invalid status flags
  357. fld QWORD PTR opd2; // may set the denormal or invalid status flags
  358. fmulp st(1), st(0); // may set the inexact or invalid status flags
  359. // store result
  360. fstp QWORD PTR dbl_res64; // exact
  361. }
  362. __asm {
  363. fnclex;
  364. // load input operands
  365. fld QWORD PTR opd1; // may set the denormal or invalid status flags
  366. fld QWORD PTR opd2; // may set the denormal or invalid status flags
  367. fmulp st(1), st(0); // may set the inexact or invalid status flags
  368. // store result
  369. fstp TBYTE PTR dbl_ext_res64; // exact
  370. }
  371. break;
  372. case _FpCodeDivide:
  373. // perform the divide
  374. __asm {
  375. fnclex;
  376. // load input operands
  377. fld QWORD PTR opd1; // may set the denormal or invalid status flags
  378. fld QWORD PTR opd2; // may set the denormal or invalid status flags
  379. fdivp st(1), st(0); // may set the inexact, divide by zero, or
  380. // invalid status flags
  381. // store result
  382. fstp QWORD PTR dbl_res64; // exact
  383. }
  384. __asm {
  385. fnclex;
  386. // load input operands
  387. fld QWORD PTR opd1; // may set the denormal or invalid status flags
  388. fld QWORD PTR opd2; // may set the denormal or invalid status flags
  389. fdivp st(1), st(0); // may set the inexact, divide by zero, or
  390. // invalid status flags
  391. // store result
  392. fstp TBYTE PTR dbl_ext_res64; // exact
  393. }
  394. break;
  395. default:
  396. ; // will never occur
  397. }
  398. // read status word
  399. sw = _status87 ();
  400. if (sw & _SW_ZERODIVIDE) sw = sw & ~0x00080000; // clear D flag for den/0
  401. // if invalid flag is set, and invalid exceptions are enabled, take trap
  402. if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) {
  403. // fill in part of the FP IEEE record
  404. Fill_FPIEEE_RECORD (XmmiEnv);
  405. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  406. XmmiEnv->Flags |= I_MASK;
  407. // Cause = Enable & Status
  408. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  409. #ifdef _DEBUG_FPU
  410. // read status word
  411. sw = _status87 ();
  412. out_top = (sw >> 11) & 0x07;
  413. if (in_top != out_top) {
  414. printf ("XMMI2_FP_Emulation () ERROR 1: in_top =%d != out_top = %d\n",
  415. in_top, out_top);
  416. exit (1);
  417. }
  418. #endif
  419. return (ExceptionRaised);
  420. }
  421. // checking for NaN operands has priority over denormal exceptions; also
  422. // fix for the differences in treating two NaN inputs between the XMMI
  423. // instructions and other x86 instructions
  424. if (isnand (opd1) || isnand (opd2)) {
  425. XmmiEnv->Ieee->Result.OperandValid = 1;
  426. if (isnand (opd1) && isnand (opd2))
  427. XmmiEnv->Ieee->Result.Value.Fp64Value = quietd (opd1);
  428. else
  429. XmmiEnv->Ieee->Result.Value.Fp64Value = dbl_res64;
  430. XmmiEnv->Ieee->Status.Underflow = 0;
  431. XmmiEnv->Ieee->Status.Overflow = 0;
  432. XmmiEnv->Ieee->Status.Inexact = 0;
  433. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  434. if (sw & _SW_INVALID) {
  435. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  436. XmmiEnv->Flags |= I_MASK;
  437. } else {
  438. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  439. }
  440. #ifdef _DEBUG_FPU
  441. // read status word
  442. sw = _status87 ();
  443. out_top = (sw >> 11) & 0x07;
  444. if (in_top != out_top) {
  445. printf ("XMMI2_FP_Emulation () ERROR 2: in_top =%d != out_top = %d\n",
  446. in_top, out_top);
  447. exit (1);
  448. }
  449. #endif
  450. return (NoExceptionRaised);
  451. }
  452. // if denormal flag is set, and denormal exceptions are enabled, take trap
  453. if (!(XmmiEnv->Masks & D_MASK) && (sw & _SW_DENORMAL)) {
  454. // fill in part of the FP IEEE record
  455. Fill_FPIEEE_RECORD (XmmiEnv);
  456. // Note: the exception code is STATUS_FLOAT_INVALID in this case
  457. #ifdef _DEBUG_FPU
  458. // read status word
  459. sw = _status87 ();
  460. out_top = (sw >> 11) & 0x07;
  461. if (in_top != out_top) {
  462. printf ("XMMI2_FP_Emulation () ERROR 3: in_top =%d != out_top = %d\n",
  463. in_top, out_top);
  464. exit (1);
  465. }
  466. #endif
  467. XmmiEnv->Flags |= D_MASK;
  468. return (ExceptionRaised);
  469. }
  470. // if divide by zero flag is set, and divide by zero exceptions are
  471. // enabled, take trap (for divide only)
  472. if (!(XmmiEnv->Masks & Z_MASK) && (sw & _SW_ZERODIVIDE)) {
  473. // fill in part of the FP IEEE record
  474. Fill_FPIEEE_RECORD (XmmiEnv);
  475. XmmiEnv->Ieee->Status.ZeroDivide = 1;
  476. XmmiEnv->Flags |= Z_MASK;
  477. // Cause = Enable & Status
  478. XmmiEnv->Ieee->Cause.ZeroDivide = 1;
  479. #ifdef _DEBUG_FPU
  480. // read status word
  481. sw = _status87 ();
  482. out_top = (sw >> 11) & 0x07;
  483. if (in_top != out_top) {
  484. printf ("XMMI2_FP_Emulation () ERROR 4: in_top =%d != out_top = %d\n",
  485. in_top, out_top);
  486. exit (1);
  487. }
  488. #endif
  489. return (ExceptionRaised);
  490. }
  491. // done if the result is a NaN (QNaN Indefinite)
  492. res = dbl_res64;
  493. if (isnand (res)) {
  494. #ifdef _DEBUG_FPU
  495. if (res != QNANINDEFD)
  496. fprintf (stderr, "XMMI2_FP_Emulation () INTERNAL XMMI2_FP_Emulation"
  497. " () ERROR: res = %f = %x is not QNaN Indefinite\n",
  498. (double)res, *(unsigned int *)&res);
  499. #endif
  500. XmmiEnv->Ieee->Result.OperandValid = 1;
  501. XmmiEnv->Ieee->Result.Value.Fp64Value = res; // exact
  502. XmmiEnv->Ieee->Status.Underflow = 0;
  503. XmmiEnv->Ieee->Status.Overflow = 0;
  504. XmmiEnv->Ieee->Status.Inexact = 0;
  505. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  506. XmmiEnv->Ieee->Status.InvalidOperation = 1; // sw & _SW_INVALID true
  507. XmmiEnv->Flags |= I_MASK;
  508. #ifdef _DEBUG_FPU
  509. // read status word
  510. sw = _status87 ();
  511. out_top = (sw >> 11) & 0x07;
  512. if (in_top != out_top) {
  513. printf ("XMMI2_FP_Emulation () ERROR 5: in_top =%d != out_top = %d\n",
  514. in_top, out_top);
  515. exit (1);
  516. }
  517. #endif
  518. return (NoExceptionRaised);
  519. }
  520. // dbl_ext_res64 is not a NaN at this point
  521. if (sw & _SW_DENORMAL) XmmiEnv->Flags |= D_MASK;
  522. // Note: (dbl_ext_res64 == 0.0 && sw & _SW_INEXACT) cannot occur
  523. // check if the result is tiny
  524. // if (-MIN_DOUBLE_NORMAL < dbl_ext_res64 && dbl_ext_res64 < 0.0 ||
  525. // 0.0 < dbl_ext_res64 && dbl_ext_res64 < MIN_DOUBLE_NORMAL)
  526. // result_tiny = 1;
  527. memcpy (min_double_normal, (char *)MIN_DOUBLE_NORMALA, 10);
  528. memcpy (max_double_normal, (char *)MAX_DOUBLE_NORMALA, 10);
  529. memcpy (posinfde, (char *)POSINFDEA, 10);
  530. memcpy (neginfde, (char *)NEGINFDEA, 10);
  531. memcpy (two_to_1536, (char *)TWO_TO_1536A, 10);
  532. memcpy (two_to_m1536, (char *)TWO_TO_M1536A, 10);
  533. __asm {
  534. // -MIN_DOUBLE_NORMAL < dbl_ext_res64
  535. fld TBYTE PTR dbl_ext_res64;
  536. fld TBYTE PTR min_double_normal;
  537. fchs; // -1.0*2^e_min in st(0)
  538. fcompp; // C3,C2,C0 = 001 if <
  539. fstsw WORD PTR sw1;
  540. // dbl_ext_res64 < 0.0
  541. fldz;
  542. fld TBYTE PTR dbl_ext_res64; // dbl_ext_res64 in st(0)
  543. fcompp; // C3,C2,C0 = 001 if <
  544. fstsw WORD PTR sw2;
  545. // 0.0 < dbl_ext_res64
  546. fld TBYTE PTR dbl_ext_res64;
  547. fldz; // 0.0 in st(0)
  548. fcompp; // C3,C2,C0 = 001 if <
  549. fstsw WORD PTR sw3;
  550. // dbl_ext_res64 < MIN_DOUBLE_NORMAL
  551. fld TBYTE PTR min_double_normal;
  552. fld TBYTE PTR dbl_ext_res64; // dbl_ext_res64 in st(0)
  553. fcompp; // C3,C2,C0 = 001 if <
  554. fstsw WORD PTR sw4;
  555. }
  556. if (((sw1 & 0x4500) == 0x0100) && ((sw2 & 0x4500) == 0x0100) ||
  557. ((sw3 & 0x4500) == 0x0100) && ((sw4 & 0x4500) == 0x0100)) {
  558. result_tiny = 1;
  559. }
  560. // check if the result is huge
  561. // if (NEGINFD<dbl_ext_res64 && dbl_ext_res64<-MAX_DOUBLE_NORMAL ||
  562. // MAX_DOUBLE_NORMAL<dbl_ext_res64 && dbl_ext_res64<POSINFD)
  563. // result_huge = 1;
  564. __asm {
  565. // NEGINFD < dbl_ext_res64
  566. fld TBYTE PTR dbl_ext_res64;
  567. fld TBYTE PTR neginfde; // -inf in st(0)
  568. fcompp; // C3,C2,C0 = 001 if <
  569. fstsw WORD PTR sw1;
  570. // dbl_ext_res64 < -MAX_DOUBLE_NORMAL
  571. fld TBYTE PTR max_double_normal;
  572. fchs;
  573. fld TBYTE PTR dbl_ext_res64; // dbl_ext_res64 in st(0)
  574. fcompp; // C3,C2,C0 = 001 if <
  575. fstsw WORD PTR sw2;
  576. // MAX_DOUBLE_NORMAL < dbl_ext_res64
  577. fld TBYTE PTR dbl_ext_res64;
  578. fld TBYTE PTR max_double_normal; // +1.1...1*2^e_max in st(0)
  579. fcompp; // C3,C2,C0 = 001 if <
  580. fstsw WORD PTR sw3;
  581. // dbl_ext_res64 < POSINFD
  582. fld TBYTE PTR posinfde;
  583. fld TBYTE PTR dbl_ext_res64; // dbl_res_ext24 in st(0)
  584. fcompp; // C3,C2,C0 = 001 if <
  585. fstsw WORD PTR sw4;
  586. }
  587. if (((sw1 & 0x4500) == 0x0100) && ((sw2 & 0x4500) == 0x0100) ||
  588. ((sw3 & 0x4500) == 0x0100) && ((sw4 & 0x4500) == 0x0100)) {
  589. result_huge = 1;
  590. }
  591. // at this point, there are no enabled I, D, or Z exceptions; the instr.
  592. // might lead to an enabled underflow, enabled underflow and inexact,
  593. // enabled overflow, enabled overflow and inexact, enabled inexact, or
  594. // none of these; if there are no U or O enabled exceptions, re-execute
  595. // the instruction using iA32 stack double precision format, and the
  596. // user's rounding mode; exceptions must have been disabled before calling
  597. // this function; an inexact exception may be reported on the 64-bit
  598. // faddp, fsubp, fdivp, or on both the 64-bit and 53-bit conversions,
  599. // while an overflow or underflow (with traps disabled !) may be reported
  600. // on the conversion from dbl_res to res
  601. // check whether there is an underflow, overflow, or inexact trap to be
  602. // taken
  603. // if the underflow traps are enabled and the result is tiny, take
  604. // underflow trap
  605. if (!(XmmiEnv->Masks & U_MASK) && result_tiny) {
  606. // dbl_ext_res64 = TWO_TO_1536 * dbl_ext_res64; // exact
  607. __asm {
  608. fld TBYTE PTR dbl_ext_res64;
  609. fld TBYTE PTR two_to_1536;
  610. fmulp st(1), st(0);
  611. // store result
  612. fstp QWORD PTR dbl_res64;
  613. }
  614. // fill in part of the FP IEEE record
  615. Fill_FPIEEE_RECORD (XmmiEnv);
  616. XmmiEnv->Ieee->Status.Underflow = 1;
  617. XmmiEnv->Flags |= U_MASK;
  618. XmmiEnv->Ieee->Cause.Underflow = 1;
  619. XmmiEnv->Ieee->Result.OperandValid = 1;
  620. XmmiEnv->Ieee->Result.Value.Fp64Value = dbl_res64; // exact
  621. if (sw & _SW_INEXACT) {
  622. XmmiEnv->Ieee->Status.Inexact = 1;
  623. XmmiEnv->Flags |= P_MASK;
  624. }
  625. #ifdef _DEBUG_FPU
  626. // read status word
  627. sw = _status87 ();
  628. out_top = (sw >> 11) & 0x07;
  629. if (in_top != out_top) {
  630. printf ("XMMI2_FP_Emulation () ERROR 6: in_top =%d != out_top = %d\n",
  631. in_top, out_top);
  632. exit (1);
  633. }
  634. #endif
  635. return (ExceptionRaised);
  636. }
  637. // if overflow traps are enabled and the result is huge, take
  638. // overflow trap
  639. if (!(XmmiEnv->Masks & O_MASK) && result_huge) {
  640. // dbl_ext_res64 = TWO_TO_M1536 * dbl_ext_res64; // exact
  641. __asm {
  642. fld TBYTE PTR dbl_ext_res64;
  643. fld TBYTE PTR two_to_m1536;
  644. fmulp st(1), st(0);
  645. // store result
  646. fstp QWORD PTR dbl_res64;
  647. }
  648. // fill in part of the FP IEEE record
  649. Fill_FPIEEE_RECORD (XmmiEnv);
  650. XmmiEnv->Ieee->Status.Overflow = 1;
  651. XmmiEnv->Flags |= O_MASK;
  652. XmmiEnv->Ieee->Cause.Overflow = 1;
  653. XmmiEnv->Ieee->Result.OperandValid = 1;
  654. XmmiEnv->Ieee->Result.Value.Fp64Value = dbl_res64; // exact
  655. if (sw & _SW_INEXACT) {
  656. XmmiEnv->Ieee->Status.Inexact = 1;
  657. XmmiEnv->Flags |= P_MASK;
  658. }
  659. #ifdef _DEBUG_FPU
  660. // read status word
  661. sw = _status87 ();
  662. out_top = (sw >> 11) & 0x07;
  663. if (in_top != out_top) {
  664. printf ("XMMI2_FP_Emulation () ERROR 7: in_top =%d != out_top = %d\n",
  665. in_top, out_top);
  666. exit (1);
  667. }
  668. #endif
  669. return (ExceptionRaised);
  670. }
  671. // set user rounding mode, 64-bit precision (to avoid a double rounding
  672. // error in case the result requires denormalization), and disable all
  673. // exceptions
  674. _control87 (rc | _PC_53 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  675. // calculate result for the case an inexact trap has to be taken, or
  676. // when no trap occurs (second IEEE rounding)
  677. // may set P, U or O; may also involve denormalizing the result
  678. switch (XmmiEnv->Ieee->Operation) {
  679. case _FpCodeAdd:
  680. // perform the add
  681. __asm {
  682. // load input operands
  683. fld QWORD PTR opd1; // may set the denormal status flag
  684. fld QWORD PTR opd2; // may set the denormal status flag
  685. faddp st(1), st(0); // rounded to 64 bits, may set the inexact
  686. // or divide by zero status flags
  687. // store result
  688. fstp QWORD PTR res;
  689. }
  690. break;
  691. case _FpCodeSubtract:
  692. // perform the subtract
  693. __asm {
  694. // load input operands
  695. fld QWORD PTR opd1; // may set the denormal status flag
  696. fld QWORD PTR opd2; // may set the denormal status flag
  697. fsubp st(1), st(0); // rounded to 64 bits, may set the inexact
  698. // status flag
  699. // store result
  700. fstp QWORD PTR res;
  701. }
  702. break;
  703. case _FpCodeMultiply:
  704. // perform the multiply
  705. __asm {
  706. // load input operands
  707. fld QWORD PTR opd1; // may set the denormal status flag
  708. fld QWORD PTR opd2; // may set the denormal status flag
  709. fmulp st(1), st(0); // rounded to 64 bits, exact
  710. // store result
  711. fstp QWORD PTR res;
  712. }
  713. break;
  714. case _FpCodeDivide:
  715. // perform the divide
  716. __asm {
  717. // load input operands
  718. fld QWORD PTR opd1; // may set the denormal status flag
  719. fld QWORD PTR opd2; // may set the denormal status flag
  720. fdivp st(1), st(0); // rounded to 64 bits, may set the inexact
  721. // or divide by zero status flags
  722. // store result
  723. fstp QWORD PTR res;
  724. }
  725. break;
  726. default:
  727. ; // will never occur
  728. }
  729. // read status word
  730. sw = _status87 ();
  731. if ((sw & _SW_INEXACT) && result_tiny) sw = sw | _SW_UNDERFLOW;
  732. // if inexact traps are enabled and result is inexact, take inexact trap
  733. if (!(XmmiEnv->Masks & P_MASK) &&
  734. ((sw & _SW_INEXACT) || (XmmiEnv->Fz && result_tiny))) {
  735. // fill in part of the FP IEEE record
  736. Fill_FPIEEE_RECORD (XmmiEnv);
  737. XmmiEnv->Ieee->Status.Inexact = 1;
  738. XmmiEnv->Flags |= P_MASK;
  739. XmmiEnv->Ieee->Cause.Inexact = 1;
  740. XmmiEnv->Ieee->Result.OperandValid = 1;
  741. if (result_tiny) {
  742. XmmiEnv->Ieee->Status.Underflow = 1;
  743. XmmiEnv->Flags |= U_MASK;
  744. // Note: the condition above is equivalent to
  745. // if (sw & _SW_UNDERFLOW) XmmiEnv->Ieee->Status.Underflow = 1;
  746. }
  747. if (result_huge) {
  748. XmmiEnv->Ieee->Status.Overflow = 1;
  749. XmmiEnv->Flags |= O_MASK;
  750. // Note: the condition above is equivalent to
  751. // if (sw & _SW_OVERFLOW) XmmiEnv->Ieee->Status.Overflow = 1;
  752. }
  753. // if ftz = 1 and result is tiny, result = 0.0
  754. // (no need to check for underflow traps disabled: result tiny and
  755. // underflow traps enabled would have caused taking an underflow
  756. // trap above)
  757. if (XmmiEnv->Fz && result_tiny) {
  758. // Note: the condition above is equivalent to
  759. // if (XmmiEnv->Fz && (sw & _SW_UNDERFLOW))
  760. if (res > 0.0)
  761. res = ZEROD;
  762. else if (res < 0.0)
  763. res = NZEROD;
  764. // else leave res unchanged
  765. }
  766. XmmiEnv->Ieee->Result.Value.Fp64Value = res;
  767. #ifdef _DEBUG_FPU
  768. // read status word
  769. sw = _status87 ();
  770. out_top = (sw >> 11) & 0x07;
  771. if (in_top != out_top) {
  772. printf ("XMMI2_FP_Emulation () ERROR 8: in_top =%d != out_top = %d\n",
  773. in_top, out_top);
  774. exit (1);
  775. }
  776. #endif
  777. return (ExceptionRaised);
  778. }
  779. // if it got here, then there is no trap to be taken; the following must
  780. // hold: ((the MXCSR U exceptions are disabled or
  781. //
  782. // the MXCSR underflow exceptions are enabled and the underflow flag is
  783. // clear and (the inexact flag is set or the inexact flag is clear and
  784. // the 53-bit result with unbounded exponent is not tiny)))
  785. // and (the MXCSR overflow traps are disabled or the overflow flag is
  786. // clear) and (the MXCSR inexact traps are disabled or the inexact flag
  787. // is clear)
  788. //
  789. // in this case, the result has to be delivered (the status flags are
  790. // sticky, so they are all set correctly already)
  791. #ifdef _DEBUG_FPU
  792. // error if the condition stated above does not hold
  793. if (!((XmmiEnv->Masks & U_MASK || (!(XmmiEnv->Masks & U_MASK) &&
  794. !(sw & _SW_UNDERFLOW) && ((sw & _SW_INEXACT) ||
  795. !(sw & _SW_INEXACT) && !result_tiny))) &&
  796. ((XmmiEnv->Masks & O_MASK) || !(sw & _SW_OVERFLOW)) &&
  797. ((XmmiEnv->Masks & P_MASK) || !(sw & _SW_INEXACT)))) {
  798. fprintf (stderr, "XMMI2_FP_Emulation () INTERNAL XMMI2_FP_Emulation () ERROR for "
  799. "ADDPS/ADDSS/SUBPS/SUBSS/MULPS/MULSS/DIVPS/DIVSS\n");
  800. }
  801. #endif
  802. XmmiEnv->Ieee->Result.OperandValid = 1;
  803. if (sw & _SW_UNDERFLOW) {
  804. XmmiEnv->Ieee->Status.Underflow = 1;
  805. XmmiEnv->Flags |= U_MASK;
  806. } else {
  807. XmmiEnv->Ieee->Status.Underflow = 0;
  808. }
  809. if (sw & _SW_OVERFLOW) {
  810. XmmiEnv->Ieee->Status.Overflow = 1;
  811. XmmiEnv->Flags |= O_MASK;
  812. } else {
  813. XmmiEnv->Ieee->Status.Overflow = 0;
  814. }
  815. if (sw & _SW_INEXACT) {
  816. XmmiEnv->Ieee->Status.Inexact = 1;
  817. XmmiEnv->Flags |= P_MASK;
  818. } else {
  819. XmmiEnv->Ieee->Status.Inexact = 0;
  820. }
  821. // if ftz = 1, and result is tiny (underflow traps must be disabled),
  822. // result = 0.0
  823. if (XmmiEnv->Fz && result_tiny) {
  824. if (res > 0.0)
  825. res = ZEROD;
  826. else if (res < 0.0)
  827. res = NZEROD;
  828. // else leave res unchanged
  829. XmmiEnv->Ieee->Status.Inexact = 1;
  830. XmmiEnv->Flags |= P_MASK;
  831. XmmiEnv->Ieee->Status.Underflow = 1;
  832. XmmiEnv->Flags |= U_MASK;
  833. }
  834. XmmiEnv->Ieee->Result.Value.Fp64Value = res;
  835. // note that there is no way to
  836. // communicate to the caller that the denormal flag was set - we count
  837. // on the XMMI instruction to have set the denormal flag in MXCSR if
  838. // needed, regardless of the other components of the input operands
  839. // (invalid or not; the caller will have to update the underflow,
  840. // overflow, and inexact flags in MXCSR)
  841. if (sw & _SW_ZERODIVIDE) {
  842. XmmiEnv->Ieee->Status.ZeroDivide = 1;
  843. XmmiEnv->Flags |= Z_MASK;
  844. } else {
  845. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  846. }
  847. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  848. #ifdef _DEBUG_FPU
  849. // read status word
  850. sw = _status87 ();
  851. out_top = (sw >> 11) & 0x07;
  852. if (in_top != out_top) {
  853. printf ("XMMI2_FP_Emulation () ERROR 9: in_top =%d != out_top = %d\n",
  854. in_top, out_top);
  855. exit (1);
  856. }
  857. #endif
  858. return (NoExceptionRaised);
  859. break;
  860. case OP_CMPPD:
  861. case OP_CMPSD:
  862. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value;
  863. opd2 = XmmiEnv->Ieee->Operand2.Value.Fp64Value;
  864. if (XmmiEnv->Daz) {
  865. if (isdenormald (opd1)) opd1 = opd1 * 0.0;
  866. if (isdenormald (opd2)) opd2 = opd2 * 0.0;
  867. }
  868. imm8 = XmmiEnv->Imm8 & 0x07;
  869. // adjust operation code
  870. XmmiEnv->Ieee->Operation = _FpCodeCompare;
  871. // check whether an invalid exception has to be raised
  872. switch (imm8) {
  873. case IMM8_EQ:
  874. case IMM8_UNORD:
  875. case IMM8_NEQ:
  876. case IMM8_ORD:
  877. if (issnand (opd1) || issnand (opd2))
  878. invalid_exc = 1; // SNaN operands signal invalid
  879. else
  880. invalid_exc = 0; // QNaN or other operands do not signal invalid
  881. // guard against the case when an SNaN operand was converted to
  882. // QNaN by compiler generated code
  883. sw = _status87 ();
  884. if (sw & _SW_INVALID) invalid_exc = 1;
  885. break;
  886. case IMM8_LT:
  887. case IMM8_LE:
  888. case IMM8_NLT:
  889. case IMM8_NLE:
  890. if (isnand (opd1) || isnand (opd2))
  891. invalid_exc = 1; // SNaN/QNaN operands signal invalid
  892. else
  893. invalid_exc = 0; // other operands do not signal invalid
  894. break;
  895. default:
  896. ; // will never occur
  897. }
  898. // if invalid_exc = 1, and invalid exceptions are enabled, take trap
  899. if (invalid_exc && !(XmmiEnv->Masks & I_MASK)) {
  900. // fill in part of the FP IEEE record
  901. Fill_FPIEEE_RECORD (XmmiEnv);
  902. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  903. XmmiEnv->Flags |= I_MASK;
  904. // Cause = Enable & Status
  905. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  906. // Note: the calling function will have to interpret the value returned
  907. // by the user handler, if execution is to be continued
  908. #ifdef _DEBUG_FPU
  909. // read status word
  910. sw = _status87 ();
  911. out_top = (sw >> 11) & 0x07;
  912. if (in_top != out_top) {
  913. printf ("XMMI2_FP_Emulation () ERROR 10: in_top =%d != out_top = %d\n",
  914. in_top, out_top);
  915. exit (1);
  916. }
  917. #endif
  918. return (ExceptionRaised);
  919. }
  920. // checking for NaN operands has priority over denormal exceptions
  921. if (isnand (opd1) || isnand (opd2)) {
  922. switch (imm8) {
  923. case IMM8_EQ:
  924. case IMM8_LT:
  925. case IMM8_LE:
  926. case IMM8_ORD:
  927. cmp_res = 0x0;
  928. break;
  929. case IMM8_UNORD:
  930. case IMM8_NEQ:
  931. case IMM8_NLT:
  932. case IMM8_NLE:
  933. cmp_res = 0xffffffffffffffff;
  934. break;
  935. default:
  936. ; // will never occur
  937. }
  938. XmmiEnv->Ieee->Result.OperandValid = 1;
  939. XmmiEnv->Ieee->Result.Value.Fp64Value = *((double *)&cmp_res);
  940. // may make U32Value
  941. XmmiEnv->Ieee->Status.Inexact = 0;
  942. XmmiEnv->Ieee->Status.Underflow = 0;
  943. XmmiEnv->Ieee->Status.Overflow = 0;
  944. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  945. // Note that the denormal flag will not be updated by _fpieee_flt (),
  946. // even if an operand is denormal
  947. if (invalid_exc) {
  948. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  949. XmmiEnv->Flags |= I_MASK;
  950. } else {
  951. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  952. }
  953. #ifdef _DEBUG_FPU
  954. // read status word
  955. sw = _status87 ();
  956. out_top = (sw >> 11) & 0x07;
  957. if (in_top != out_top) {
  958. printf ("XMMI2_FP_Emulation () ERROR 11: in_top =%d != out_top = %d\n",
  959. in_top, out_top);
  960. exit (1);
  961. }
  962. #endif
  963. return (NoExceptionRaised);
  964. }
  965. // check whether a denormal exception has to be raised
  966. if (isdenormald (opd1) || isdenormald (opd2)) {
  967. denormal_exc = 1;
  968. XmmiEnv->Flags |= D_MASK;
  969. } else {
  970. denormal_exc = 0;
  971. }
  972. // if denormal_exc = 1, and denormal exceptions are enabled, take trap
  973. if (denormal_exc && !(XmmiEnv->Masks & D_MASK)) {
  974. // fill in part of the FP IEEE record
  975. Fill_FPIEEE_RECORD (XmmiEnv);
  976. // Note: the exception code is STATUS_FLOAT_INVALID in this case
  977. #ifdef _DEBUG_FPU
  978. // read status word
  979. sw = _status87 ();
  980. out_top = (sw >> 11) & 0x07;
  981. if (in_top != out_top) {
  982. printf ("XMMI2_FP_Emulation () ERROR 12: in_top =%d != out_top = %d\n",
  983. in_top, out_top);
  984. exit (1);
  985. }
  986. #endif
  987. return (ExceptionRaised);
  988. }
  989. // no exception has to be raised, and no operand is a NaN; calculate
  990. // and deliver the result
  991. if (opd1 < opd2) {
  992. switch (imm8) {
  993. case IMM8_LT:
  994. case IMM8_LE:
  995. case IMM8_NEQ:
  996. case IMM8_ORD:
  997. cmp_res = 0xffffffffffffffff;
  998. break;
  999. case IMM8_EQ:
  1000. case IMM8_UNORD:
  1001. case IMM8_NLT:
  1002. case IMM8_NLE:
  1003. cmp_res = 0x0;
  1004. break;
  1005. default:
  1006. ; // will never occur
  1007. }
  1008. } else if (opd1 > opd2) {
  1009. switch (imm8) {
  1010. case IMM8_NEQ:
  1011. case IMM8_NLT:
  1012. case IMM8_NLE:
  1013. case IMM8_ORD:
  1014. cmp_res = 0xffffffffffffffff;
  1015. break;
  1016. case IMM8_EQ:
  1017. case IMM8_LT:
  1018. case IMM8_LE:
  1019. case IMM8_UNORD:
  1020. cmp_res = 0x0;
  1021. break;
  1022. default:
  1023. ; // will never occur
  1024. }
  1025. } else if (opd1 == opd2) {
  1026. switch (imm8) {
  1027. case IMM8_EQ:
  1028. case IMM8_LE:
  1029. case IMM8_NLT:
  1030. case IMM8_ORD:
  1031. cmp_res = 0xffffffffffffffff;
  1032. break;
  1033. case IMM8_LT:
  1034. case IMM8_UNORD:
  1035. case IMM8_NEQ:
  1036. case IMM8_NLE:
  1037. cmp_res = 0x0;
  1038. break;
  1039. default:
  1040. ; // will never occur
  1041. }
  1042. } else { // could eliminate this case
  1043. #ifdef _DEBUG_FPU
  1044. fprintf (stderr, "XMMI2_FP_Emulation () INTERNAL XMMI2_FP_Emulation ()"
  1045. " ERROR for CMPPS/CMPSS\n");
  1046. #endif
  1047. }
  1048. XmmiEnv->Ieee->Result.OperandValid = 1;
  1049. XmmiEnv->Ieee->Result.Value.Fp64Value = *((double *)&cmp_res);
  1050. // may make U32Value
  1051. XmmiEnv->Ieee->Status.Inexact = 0;
  1052. XmmiEnv->Ieee->Status.Underflow = 0;
  1053. XmmiEnv->Ieee->Status.Overflow = 0;
  1054. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1055. // Note that the denormal flag will not be updated by _fpieee_flt (),
  1056. // even if an operand is denormal
  1057. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1058. #ifdef _DEBUG_FPU
  1059. // read status word
  1060. sw = _status87 ();
  1061. out_top = (sw >> 11) & 0x07;
  1062. if (in_top != out_top) {
  1063. printf ("XMMI2_FP_Emulation () ERROR 13: in_top =%d != out_top = %d\n",
  1064. in_top, out_top);
  1065. exit (1);
  1066. }
  1067. #endif
  1068. return (NoExceptionRaised);
  1069. break;
  1070. case OP_COMISD:
  1071. case OP_UCOMISD:
  1072. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value;
  1073. opd2 = XmmiEnv->Ieee->Operand2.Value.Fp64Value;
  1074. if (XmmiEnv->Daz) {
  1075. if (isdenormald (opd1)) opd1 = opd1 * 0.0;
  1076. if (isdenormald (opd2)) opd2 = opd2 * 0.0;
  1077. }
  1078. // check whether an invalid exception has to be raised
  1079. switch (XmmiEnv->Ieee->Operation) {
  1080. case OP_COMISD:
  1081. if (isnand (opd1) || isnand (opd2)) {
  1082. invalid_exc = 1;
  1083. } else
  1084. invalid_exc = 0;
  1085. break;
  1086. case OP_UCOMISD:
  1087. if (issnand (opd1) || issnand (opd2))
  1088. invalid_exc = 1;
  1089. else
  1090. invalid_exc = 0;
  1091. // guard against the case when an SNaN operand was converted to
  1092. // QNaN by compiler generated code
  1093. sw = _status87 ();
  1094. if (sw & _SW_INVALID) invalid_exc = 1;
  1095. break;
  1096. default:
  1097. ; // will never occur
  1098. }
  1099. // adjust operation code
  1100. XmmiEnv->Ieee->Operation = _FpCodeCompare;
  1101. // if invalid_exc = 1, and invalid exceptions are enabled, take trap
  1102. if (invalid_exc && !(XmmiEnv->Masks & I_MASK)) {
  1103. // fill in part of the FP IEEE record
  1104. Fill_FPIEEE_RECORD (XmmiEnv);
  1105. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1106. XmmiEnv->Flags |= I_MASK;
  1107. // Cause = Enable & Status
  1108. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  1109. // Note: the calling function will have to interpret the value returned
  1110. // by the user handler, if execution is to be continued
  1111. #ifdef _DEBUG_FPU
  1112. // read status word
  1113. sw = _status87 ();
  1114. out_top = (sw >> 11) & 0x07;
  1115. if (in_top != out_top) {
  1116. printf ("XMMI2_FP_Emulation () ERROR 14: in_top =%d != out_top = "
  1117. "%d\n", in_top, out_top);
  1118. exit (1);
  1119. }
  1120. #endif
  1121. return (ExceptionRaised);
  1122. }
  1123. // EFlags:
  1124. // 333222222222211111111110000000000
  1125. // 210987654321098765432109876543210
  1126. // O SZ A P C
  1127. // checking for NaN operands has priority over denormal exceptions
  1128. if (isnand (opd1) || isnand (opd2)) {
  1129. // OF, SF, AF = 000, ZF, PF, CF = 111
  1130. XmmiEnv->EFlags = (XmmiEnv->EFlags & 0xfffff76f) | 0x00000045;
  1131. XmmiEnv->Ieee->Status.Inexact = 0;
  1132. XmmiEnv->Ieee->Status.Underflow = 0;
  1133. XmmiEnv->Ieee->Status.Overflow = 0;
  1134. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1135. // Note that the denormal flag will not be updated by _fpieee_flt (),
  1136. // even if an operand is denormal
  1137. if (invalid_exc) {
  1138. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1139. XmmiEnv->Flags |= I_MASK;
  1140. } else {
  1141. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1142. }
  1143. #ifdef _DEBUG_FPU
  1144. // read status word
  1145. sw = _status87 ();
  1146. out_top = (sw >> 11) & 0x07;
  1147. if (in_top != out_top) {
  1148. printf ("XMMI2_FP_Emulation () ERROR 15: in_top =%d != out_top = "
  1149. "%d\n", in_top, out_top);
  1150. exit (1);
  1151. }
  1152. #endif
  1153. return (NoExceptionRaised);
  1154. }
  1155. // check whether a denormal exception has to be raised
  1156. if (isdenormald (opd1) || isdenormald (opd2)) {
  1157. denormal_exc = 1;
  1158. XmmiEnv->Flags |= D_MASK;
  1159. } else {
  1160. denormal_exc = 0;
  1161. }
  1162. // if denormal_exc = 1, and denormal exceptions are enabled, take trap
  1163. if (denormal_exc && !(XmmiEnv->Masks & D_MASK)) {
  1164. // fill in part of the FP IEEE record
  1165. Fill_FPIEEE_RECORD (XmmiEnv);
  1166. // Note: the exception code is STATUS_FLOAT_INVALID in this case
  1167. #ifdef _DEBUG_FPU
  1168. // read status word
  1169. sw = _status87 ();
  1170. out_top = (sw >> 11) & 0x07;
  1171. if (in_top != out_top) {
  1172. printf ("XMMI2_FP_Emulation () ERROR 16: in_top =%d != out_top = %d\n",
  1173. in_top, out_top);
  1174. exit (1);
  1175. }
  1176. #endif
  1177. return (ExceptionRaised);
  1178. }
  1179. // no exception has to be raised, and no operand is a NaN; calculate
  1180. // and deliver the result
  1181. // 333222222222211111111110000000000
  1182. // 210987654321098765432109876543210
  1183. // O SZ A P C
  1184. if (opd1 > opd2) {
  1185. // OF, SF, AF = 000, ZF, PF, CF = 000
  1186. XmmiEnv->EFlags = XmmiEnv->EFlags & 0xfffff72a;
  1187. } else if (opd1 < opd2) {
  1188. // OF, SF, AF = 000, ZF, PF, CF = 001
  1189. XmmiEnv->EFlags = (XmmiEnv->EFlags & 0xfffff72b) | 0x00000001;
  1190. } else if (opd1 == opd2) {
  1191. // OF, SF, AF = 000, ZF, PF, CF = 100
  1192. XmmiEnv->EFlags = (XmmiEnv->EFlags & 0xfffff76a) | 0x00000040;
  1193. } else { // could eliminate this case
  1194. #ifdef _DEBUG_FPU
  1195. fprintf (stderr, "XMMI2_FP_Emulation () INTERNAL XMMI2_FP_Emulation () "
  1196. "ERROR for COMISS/UCOMISS\n");
  1197. #endif
  1198. }
  1199. XmmiEnv->Ieee->Status.Inexact = 0;
  1200. XmmiEnv->Ieee->Status.Underflow = 0;
  1201. XmmiEnv->Ieee->Status.Overflow = 0;
  1202. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1203. // Note that the denormal flag will not be updated by _fpieee_flt (),
  1204. // even if an operand is denormal
  1205. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1206. #ifdef _DEBUG_FPU
  1207. // read status word
  1208. sw = _status87 ();
  1209. out_top = (sw >> 11) & 0x07;
  1210. if (in_top != out_top) {
  1211. printf ("XMMI2_FP_Emulation () ERROR 17: in_top =%d != out_top = %d\n",
  1212. in_top, out_top);
  1213. exit (1);
  1214. }
  1215. #endif
  1216. return (NoExceptionRaised);
  1217. break;
  1218. case OP_CVTDQ2PS:
  1219. iopd1 = XmmiEnv->Ieee->Operand1.Value.I32Value;
  1220. switch (XmmiEnv->Rc) {
  1221. case _FpRoundNearest:
  1222. rc = _RC_NEAR;
  1223. break;
  1224. case _FpRoundMinusInfinity:
  1225. rc = _RC_DOWN;
  1226. break;
  1227. case _FpRoundPlusInfinity:
  1228. rc = _RC_UP;
  1229. break;
  1230. case _FpRoundChopped:
  1231. rc = _RC_CHOP;
  1232. break;
  1233. default:
  1234. ; // internal error
  1235. }
  1236. // execute the operation and check whether the inexact flag is set
  1237. // and the respective exception is enabled
  1238. _control87 (rc | _PC_24 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  1239. // perform the conversion
  1240. __asm {
  1241. fnclex;
  1242. fild DWORD PTR iopd1; // exact
  1243. fstp DWORD PTR res24; // may set P
  1244. }
  1245. // read status word
  1246. sw = _status87 ();
  1247. // if inexact traps are enabled and result is inexact, take inexact trap
  1248. if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) {
  1249. // fill in part of the FP IEEE record
  1250. Fill_FPIEEE_RECORD (XmmiEnv);
  1251. XmmiEnv->Ieee->Operation = _FpCodeConvert;
  1252. XmmiEnv->Ieee->Status.Inexact = 1;
  1253. XmmiEnv->Flags |= P_MASK;
  1254. XmmiEnv->Ieee->Cause.Inexact = 1;
  1255. XmmiEnv->Ieee->Result.OperandValid = 1;
  1256. XmmiEnv->Ieee->Result.Value.Fp32Value = res24; // exact
  1257. #ifdef _DEBUG_FPU
  1258. // read status word
  1259. sw = _status87 ();
  1260. out_top = (sw >> 11) & 0x07;
  1261. if (in_top != out_top) {
  1262. printf ("XMMI2_FP_Emulation () ERROR 18: in_top =%d != out_top = %d\n",
  1263. in_top, out_top);
  1264. exit (1);
  1265. }
  1266. #endif
  1267. return (ExceptionRaised);
  1268. }
  1269. // if it got here, then there is no trap to be taken; in this case,
  1270. // the result has to be delivered
  1271. XmmiEnv->Ieee->Result.OperandValid = 1;
  1272. XmmiEnv->Ieee->Result.Value.Fp32Value = res24; // exact
  1273. if (sw & _SW_INEXACT) {
  1274. XmmiEnv->Ieee->Status.Inexact = 1;
  1275. XmmiEnv->Flags |= P_MASK;
  1276. } else {
  1277. XmmiEnv->Ieee->Status.Inexact = 0;
  1278. }
  1279. XmmiEnv->Ieee->Status.Underflow = 0;
  1280. XmmiEnv->Ieee->Status.Overflow = 0;
  1281. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1282. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1283. #ifdef _DEBUG_FPU
  1284. // read status word
  1285. sw = _status87 ();
  1286. out_top = (sw >> 11) & 0x07;
  1287. if (in_top != out_top) {
  1288. printf ("XMMI2_FP_Emulation () ERROR 19: in_top =%d != out_top = %d\n",
  1289. in_top, out_top);
  1290. exit (1);
  1291. }
  1292. #endif
  1293. return (NoExceptionRaised);
  1294. break;
  1295. case OP_CVTPS2DQ:
  1296. case OP_CVTTPS2DQ:
  1297. opd24 = XmmiEnv->Ieee->Operand1.Value.Fp32Value;
  1298. if (XmmiEnv->Daz) {
  1299. if (isdenormalf (opd24)) opd24 = opd24 * (float)0.0;
  1300. }
  1301. // adjust the operation code
  1302. switch (XmmiEnv->Ieee->Operation) {
  1303. case OP_CVTPS2DQ:
  1304. XmmiEnv->Ieee->Operation = _FpCodeConvert;
  1305. break;
  1306. case OP_CVTTPS2DQ:
  1307. XmmiEnv->Ieee->Operation = _FpCodeConvertTrunc;
  1308. break;
  1309. default:
  1310. ; // will never occur
  1311. }
  1312. switch (XmmiEnv->Ieee->Operation) {
  1313. case _FpCodeConvert:
  1314. switch (XmmiEnv->Rc) {
  1315. case _FpRoundNearest:
  1316. rc = _RC_NEAR;
  1317. break;
  1318. case _FpRoundMinusInfinity:
  1319. rc = _RC_DOWN;
  1320. break;
  1321. case _FpRoundPlusInfinity:
  1322. rc = _RC_UP;
  1323. break;
  1324. case _FpRoundChopped:
  1325. rc = _RC_CHOP;
  1326. break;
  1327. default:
  1328. ; // internal error
  1329. }
  1330. break;
  1331. case _FpCodeConvertTrunc:
  1332. rc = _RC_CHOP;
  1333. break;
  1334. default:
  1335. ; // will never occur
  1336. }
  1337. // execute the operation and check whether the inexact flag is set
  1338. // and the respective exceptions enabled
  1339. _control87 (rc | _PC_24 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  1340. // perform the conversion
  1341. __asm {
  1342. fnclex;
  1343. fld DWORD PTR opd24; // may set the denormal [ignored] or invalid
  1344. // status flags
  1345. fistp DWORD PTR ires; // may set the inexact or invalid status
  1346. // flags (for NaN or out-of-range)
  1347. }
  1348. // read status word
  1349. sw = _status87 ();
  1350. // if invalid flag is set, and invalid exceptions are enabled, take trap
  1351. if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) {
  1352. // fill in part of the FP IEEE record
  1353. Fill_FPIEEE_RECORD (XmmiEnv);
  1354. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1355. XmmiEnv->Flags |= I_MASK;
  1356. // Cause = Enable & Status
  1357. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  1358. #ifdef _DEBUG_FPU
  1359. // read status word
  1360. sw = _status87 ();
  1361. out_top = (sw >> 11) & 0x07;
  1362. if (in_top != out_top) {
  1363. printf ("XMMI2_FP_Emulation () ERROR 20: in_top =%d != out_top = %d\n",
  1364. in_top, out_top);
  1365. exit (1);
  1366. }
  1367. #endif
  1368. return (ExceptionRaised);
  1369. }
  1370. // at this point, there are no enabled invalid exceptions; the
  1371. // instruction might have lead to an enabled inexact exception, or to
  1372. // no exception at all
  1373. XmmiEnv->Ieee->Result.Value.I32Value = ires;
  1374. // if inexact traps are enabled and result is inexact, take inexact trap
  1375. // (no flush-to-zero situation is possible)
  1376. if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) {
  1377. // fill in part of the FP IEEE record
  1378. Fill_FPIEEE_RECORD (XmmiEnv);
  1379. XmmiEnv->Ieee->Status.Inexact = 1;
  1380. XmmiEnv->Flags |= P_MASK;
  1381. XmmiEnv->Ieee->Cause.Inexact = 1;
  1382. XmmiEnv->Ieee->Result.OperandValid = 1;
  1383. #ifdef _DEBUG_FPU
  1384. // read status word
  1385. sw = _status87 ();
  1386. out_top = (sw >> 11) & 0x07;
  1387. if (in_top != out_top) {
  1388. printf ("XMMI2_FP_Emulation () ERROR 21: in_top =%d != out_top = %d\n",
  1389. in_top, out_top);
  1390. exit (1);
  1391. }
  1392. #endif
  1393. return (ExceptionRaised);
  1394. }
  1395. // if it got here, then there is no trap to be taken; return result
  1396. XmmiEnv->Ieee->Result.OperandValid = 1;
  1397. if (sw & _SW_INEXACT) {
  1398. XmmiEnv->Ieee->Status.Inexact = 1;
  1399. XmmiEnv->Flags |= P_MASK;
  1400. } else {
  1401. XmmiEnv->Ieee->Status.Inexact = 0;
  1402. }
  1403. XmmiEnv->Ieee->Status.Underflow = 0;
  1404. XmmiEnv->Ieee->Status.Overflow = 0;
  1405. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1406. if (sw & _SW_INVALID) {
  1407. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1408. XmmiEnv->Flags |= I_MASK;
  1409. } else {
  1410. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1411. }
  1412. #ifdef _DEBUG_FPU
  1413. // read status word
  1414. sw = _status87 ();
  1415. out_top = (sw >> 11) & 0x07;
  1416. if (in_top != out_top) {
  1417. printf ("XMMI2_FP_Emulation () ERROR 22: in_top =%d != out_top = %d\n",
  1418. in_top, out_top);
  1419. exit (1);
  1420. }
  1421. #endif
  1422. return (NoExceptionRaised);
  1423. break;
  1424. case OP_CVTPD2PI:
  1425. case OP_CVTSD2SI:
  1426. case OP_CVTPD2DQ:
  1427. case OP_CVTTPD2PI:
  1428. case OP_CVTTSD2SI:
  1429. case OP_CVTTPD2DQ:
  1430. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value;
  1431. if (XmmiEnv->Daz) {
  1432. if (isdenormald (opd1)) opd1 = opd1 * 0.0;
  1433. }
  1434. // adjust the operation code
  1435. switch (XmmiEnv->Ieee->Operation) {
  1436. case OP_CVTPD2PI:
  1437. case OP_CVTSD2SI:
  1438. case OP_CVTPD2DQ:
  1439. XmmiEnv->Ieee->Operation = _FpCodeConvert;
  1440. break;
  1441. case OP_CVTTPD2PI:
  1442. case OP_CVTTSD2SI:
  1443. case OP_CVTTPD2DQ:
  1444. XmmiEnv->Ieee->Operation = _FpCodeConvertTrunc;
  1445. break;
  1446. default:
  1447. ; // will never occur
  1448. }
  1449. switch (XmmiEnv->Ieee->Operation) {
  1450. case _FpCodeConvert:
  1451. switch (XmmiEnv->Rc) {
  1452. case _FpRoundNearest:
  1453. rc = _RC_NEAR;
  1454. break;
  1455. case _FpRoundMinusInfinity:
  1456. rc = _RC_DOWN;
  1457. break;
  1458. case _FpRoundPlusInfinity:
  1459. rc = _RC_UP;
  1460. break;
  1461. case _FpRoundChopped:
  1462. rc = _RC_CHOP;
  1463. break;
  1464. default:
  1465. ; // internal error
  1466. }
  1467. break;
  1468. case _FpCodeConvertTrunc:
  1469. rc = _RC_CHOP;
  1470. break;
  1471. default:
  1472. ; // will never occur
  1473. }
  1474. // execute the operation and check whether the inexact flag is set
  1475. // and the respective exceptions enabled
  1476. _control87 (rc | _PC_53 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  1477. // perform the conversion
  1478. __asm {
  1479. fnclex;
  1480. fld QWORD PTR opd1; // may set the denormal [ignored] or invalid
  1481. // status flags
  1482. fistp DWORD PTR ires; // may set the inexact or invalid status
  1483. // flags (for NaN or out-of-range)
  1484. }
  1485. // read status word
  1486. sw = _status87 ();
  1487. // if invalid flag is set, and invalid exceptions are enabled, take trap
  1488. if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) {
  1489. // fill in part of the FP IEEE record
  1490. Fill_FPIEEE_RECORD (XmmiEnv);
  1491. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1492. XmmiEnv->Flags |= I_MASK;
  1493. // Cause = Enable & Status
  1494. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  1495. #ifdef _DEBUG_FPU
  1496. // read status word
  1497. sw = _status87 ();
  1498. out_top = (sw >> 11) & 0x07;
  1499. if (in_top != out_top) {
  1500. printf ("XMMI2_FP_Emulation () ERROR 20: in_top =%d != out_top = %d\n",
  1501. in_top, out_top);
  1502. exit (1);
  1503. }
  1504. #endif
  1505. return (ExceptionRaised);
  1506. }
  1507. // at this point, there are no enabled invalid exceptions; the
  1508. // instruction might have lead to an enabled inexact exception, or to
  1509. // no exception at all
  1510. XmmiEnv->Ieee->Result.Value.I32Value = ires;
  1511. // if inexact traps are enabled and result is inexact, take inexact trap
  1512. // (no flush-to-zero situation is possible)
  1513. if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) {
  1514. // fill in part of the FP IEEE record
  1515. Fill_FPIEEE_RECORD (XmmiEnv);
  1516. XmmiEnv->Ieee->Status.Inexact = 1;
  1517. XmmiEnv->Flags |= P_MASK;
  1518. XmmiEnv->Ieee->Cause.Inexact = 1;
  1519. XmmiEnv->Ieee->Result.OperandValid = 1;
  1520. #ifdef _DEBUG_FPU
  1521. // read status word
  1522. sw = _status87 ();
  1523. out_top = (sw >> 11) & 0x07;
  1524. if (in_top != out_top) {
  1525. printf ("XMMI2_FP_Emulation () ERROR 21: in_top =%d != out_top = %d\n",
  1526. in_top, out_top);
  1527. exit (1);
  1528. }
  1529. #endif
  1530. return (ExceptionRaised);
  1531. }
  1532. // if it got here, then there is no trap to be taken; return result
  1533. XmmiEnv->Ieee->Result.OperandValid = 1;
  1534. if (sw & _SW_INEXACT) {
  1535. XmmiEnv->Ieee->Status.Inexact = 1;
  1536. XmmiEnv->Flags |= P_MASK;
  1537. } else {
  1538. XmmiEnv->Ieee->Status.Inexact = 0;
  1539. }
  1540. XmmiEnv->Ieee->Status.Underflow = 0;
  1541. XmmiEnv->Ieee->Status.Overflow = 0;
  1542. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1543. if (sw & _SW_INVALID) {
  1544. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1545. XmmiEnv->Flags |= I_MASK;
  1546. } else {
  1547. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1548. }
  1549. #ifdef _DEBUG_FPU
  1550. // read status word
  1551. sw = _status87 ();
  1552. out_top = (sw >> 11) & 0x07;
  1553. if (in_top != out_top) {
  1554. printf ("XMMI2_FP_Emulation () ERROR 22: in_top =%d != out_top = %d\n",
  1555. in_top, out_top);
  1556. exit (1);
  1557. }
  1558. #endif
  1559. return (NoExceptionRaised);
  1560. break;
  1561. case OP_CVTPS2PD:
  1562. case OP_CVTSS2SD:
  1563. opd24 = XmmiEnv->Ieee->Operand1.Value.Fp32Value;
  1564. if (XmmiEnv->Daz) {
  1565. if (isdenormalf (opd24)) opd24 = opd24 * (float)0.0;
  1566. }
  1567. // adjust the operation code
  1568. XmmiEnv->Ieee->Operation = _FpCodeConvert;
  1569. switch (XmmiEnv->Rc) {
  1570. case _FpRoundNearest:
  1571. rc = _RC_NEAR;
  1572. break;
  1573. case _FpRoundMinusInfinity:
  1574. rc = _RC_DOWN;
  1575. break;
  1576. case _FpRoundPlusInfinity:
  1577. rc = _RC_UP;
  1578. break;
  1579. case _FpRoundChopped:
  1580. rc = _RC_CHOP;
  1581. break;
  1582. default:
  1583. ; // internal error
  1584. }
  1585. // execute the operation and check whether the invalid or denormal
  1586. // flag is set and the respective exception is enabled
  1587. _control87 (rc | _PC_53 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC); // redun.
  1588. // perform the conversion
  1589. __asm {
  1590. fnclex;
  1591. fld DWORD PTR opd24; // may set the denormal or invalid
  1592. // status flags; exact
  1593. fstp QWORD PTR res; // store as a double; exact
  1594. }
  1595. // read status word
  1596. sw = _status87 ();
  1597. // if invalid flag is set, and invalid exceptions are enabled, take trap
  1598. if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) {
  1599. // fill in part of the FP IEEE record
  1600. Fill_FPIEEE_RECORD (XmmiEnv);
  1601. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1602. XmmiEnv->Flags |= I_MASK;
  1603. // Cause = Enable & Status
  1604. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  1605. #ifdef _DEBUG_FPU
  1606. // read status word
  1607. sw = _status87 ();
  1608. out_top = (sw >> 11) & 0x07;
  1609. if (in_top != out_top) {
  1610. printf ("XMMI2_FP_Emulation () ERROR 20: in_top =%d != out_top = %d\n",
  1611. in_top, out_top);
  1612. exit (1);
  1613. }
  1614. #endif
  1615. return (ExceptionRaised);
  1616. }
  1617. if (sw & _SW_DENORMAL) XmmiEnv->Flags |= D_MASK; // whether enabled or not
  1618. // if denormal flag is set, and denormal exceptions are enabled, take trap
  1619. if (!(XmmiEnv->Masks & D_MASK) && (sw & _SW_DENORMAL)) {
  1620. // fill in part of the FP IEEE record
  1621. Fill_FPIEEE_RECORD (XmmiEnv);
  1622. #ifdef _DEBUG_FPU
  1623. // read status word
  1624. sw = _status87 ();
  1625. out_top = (sw >> 11) & 0x07;
  1626. if (in_top != out_top) {
  1627. printf ("XMMI_FP_Emulate () ERROR 28: in_top =%d != out_top = %d\n",
  1628. in_top, out_top);
  1629. exit (1);
  1630. }
  1631. #endif
  1632. return (ExceptionRaised);
  1633. }
  1634. // at this point, there are no enabled invalid or denormal exceptions;
  1635. // the instruction did not lead to an enabled exception; return result
  1636. XmmiEnv->Ieee->Result.Value.Fp64Value = res;
  1637. XmmiEnv->Ieee->Result.OperandValid = 1;
  1638. XmmiEnv->Ieee->Status.Inexact = 0;
  1639. XmmiEnv->Ieee->Status.Underflow = 0;
  1640. XmmiEnv->Ieee->Status.Overflow = 0;
  1641. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1642. if (sw & _SW_INVALID) {
  1643. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1644. XmmiEnv->Flags |= I_MASK;
  1645. } else {
  1646. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1647. }
  1648. #ifdef _DEBUG_FPU
  1649. // read status word
  1650. sw = _status87 ();
  1651. out_top = (sw >> 11) & 0x07;
  1652. if (in_top != out_top) {
  1653. printf ("XMMI2_FP_Emulation () ERROR 22: in_top =%d != out_top = %d\n",
  1654. in_top, out_top);
  1655. exit (1);
  1656. }
  1657. #endif
  1658. return (NoExceptionRaised);
  1659. break;
  1660. case OP_CVTPD2PS:
  1661. case OP_CVTSD2SS:
  1662. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value;
  1663. if (XmmiEnv->Daz) {
  1664. if (isdenormald (opd1)) opd1 = opd1 * 0.0;
  1665. }
  1666. // adjust the operation code
  1667. XmmiEnv->Ieee->Operation = _FpCodeConvert;
  1668. switch (XmmiEnv->Rc) {
  1669. case _FpRoundNearest:
  1670. rc = _RC_NEAR;
  1671. break;
  1672. case _FpRoundMinusInfinity:
  1673. rc = _RC_DOWN;
  1674. break;
  1675. case _FpRoundPlusInfinity:
  1676. rc = _RC_UP;
  1677. break;
  1678. case _FpRoundChopped:
  1679. rc = _RC_CHOP;
  1680. break;
  1681. default:
  1682. ; // internal error
  1683. }
  1684. // execute the operation and check whether the invalid or denormal
  1685. // flag is set and the respective exception is enabled
  1686. _control87 (rc | _PC_24 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  1687. // perform the conversion
  1688. __asm {
  1689. fnclex;
  1690. fld QWORD PTR opd1; // may set the denormal or invalid
  1691. // status flags; exact
  1692. fstp DWORD PTR res24; // store as a single; may set the underflow/
  1693. // overflow and/or the inexact status flags
  1694. }
  1695. // read status word
  1696. sw = _status87 ();
  1697. // if invalid flag is set, and invalid exceptions are enabled, take trap
  1698. if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) {
  1699. // fill in part of the FP IEEE record
  1700. Fill_FPIEEE_RECORD (XmmiEnv);
  1701. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1702. XmmiEnv->Flags |= I_MASK;
  1703. // Cause = Enable & Status
  1704. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  1705. #ifdef _DEBUG_FPU
  1706. // read status word
  1707. sw = _status87 ();
  1708. out_top = (sw >> 11) & 0x07;
  1709. if (in_top != out_top) {
  1710. printf ("XMMI2_FP_Emulation () ERROR 20: in_top =%d != out_top = %d\n",
  1711. in_top, out_top);
  1712. exit (1);
  1713. }
  1714. #endif
  1715. return (ExceptionRaised);
  1716. }
  1717. if (sw & _SW_DENORMAL) XmmiEnv->Flags |= D_MASK; // whether enabled or not
  1718. // if denormal flag is set, and denormal exceptions are enabled, take trap
  1719. if (!(XmmiEnv->Masks & D_MASK) && (sw & _SW_DENORMAL)) {
  1720. // fill in part of the FP IEEE record
  1721. Fill_FPIEEE_RECORD (XmmiEnv);
  1722. #ifdef _DEBUG_FPU
  1723. // read status word
  1724. sw = _status87 ();
  1725. out_top = (sw >> 11) & 0x07;
  1726. if (in_top != out_top) {
  1727. printf ("XMMI_FP_Emulate () ERROR 28: in_top =%d != out_top = %d\n",
  1728. in_top, out_top);
  1729. exit (1);
  1730. }
  1731. #endif
  1732. return (ExceptionRaised);
  1733. }
  1734. // done if opd1 is 0.0, inf, or NaN
  1735. if (isnand (opd1) || isinfd (opd1) || iszerod (opd1)) {
  1736. XmmiEnv->Ieee->Result.OperandValid = 1;
  1737. XmmiEnv->Ieee->Result.Value.Fp32Value = res24;
  1738. XmmiEnv->Ieee->Status.Underflow = 0;
  1739. XmmiEnv->Ieee->Status.Overflow = 0;
  1740. XmmiEnv->Ieee->Status.Inexact = 0;
  1741. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1742. if (sw & _SW_INVALID) {
  1743. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1744. XmmiEnv->Flags |= I_MASK;
  1745. } else {
  1746. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1747. }
  1748. #ifdef _DEBUG_FPU
  1749. // read status word
  1750. sw = _status87 ();
  1751. out_top = (sw >> 11) & 0x07;
  1752. if (in_top != out_top) {
  1753. printf ("XMMI_FP_Emulate () ERROR 5: in_top =%d != out_top = %d\n",
  1754. in_top, out_top);
  1755. exit (1);
  1756. }
  1757. #endif
  1758. return (NoExceptionRaised);
  1759. }
  1760. // at this point, there are no enabled I, or D exceptions, and the input
  1761. // operand is not 0, infinity, or NaN; the instruction
  1762. // might lead to an enabled underflow, enabled underflow and inexact,
  1763. // enabled overflow, enabled overflow and inexact, enabled inexact, or
  1764. // none of these; if there are no U or O enabled exceptions, the result
  1765. // is res24
  1766. // check whether there is an underflow, overflow, or inexact trap to be
  1767. // taken
  1768. // calculate the result to 24 bits, with 'unbounded' exponent
  1769. __asm {
  1770. fnclex;
  1771. fld QWORD PTR opd1; // may set the denormal or invalid
  1772. // status flags (same as above)
  1773. fld1; // +1.0
  1774. fmulp st(1), st(0); // rounded to 24 bits; may set P
  1775. fstp QWORD PTR dbl_res24; // store as a double; may set the underflow/
  1776. // overflow and/or the inexact status flags,
  1777. // but no new trap flags are set here
  1778. }
  1779. if (-MIN_SINGLE_NORMAL < dbl_res24 && dbl_res24 < MIN_SINGLE_NORMAL) {
  1780. result_tiny = 1;
  1781. }
  1782. // check if the result is huge
  1783. if (dbl_res24 < -MAX_SINGLE_NORMAL || MAX_SINGLE_NORMAL < dbl_res24) {
  1784. result_huge = 1;
  1785. }
  1786. // if the underflow traps are enabled and the result is tiny, take
  1787. // underflow trap
  1788. if (!(XmmiEnv->Masks & U_MASK) && result_tiny) {
  1789. // leave source and destination operands unchanged
  1790. // fill in part of the FP IEEE record
  1791. Fill_FPIEEE_RECORD (XmmiEnv);
  1792. XmmiEnv->Ieee->Status.Underflow = 1;
  1793. XmmiEnv->Flags |= U_MASK;
  1794. XmmiEnv->Ieee->Cause.Underflow = 1;
  1795. XmmiEnv->Ieee->Result.OperandValid = 0;
  1796. #ifdef _DEBUG_FPU
  1797. // read status word
  1798. sw = _status87 ();
  1799. out_top = (sw >> 11) & 0x07;
  1800. if (in_top != out_top) {
  1801. printf ("XMMI_FP_Emulate () ERROR 6: in_top =%d != out_top = %d\n",
  1802. in_top, out_top);
  1803. exit (1);
  1804. }
  1805. #endif
  1806. return (ExceptionRaised);
  1807. }
  1808. // if overflow traps are enabled and the result is huge, take
  1809. // overflow trap
  1810. if (!(XmmiEnv->Masks & O_MASK) && result_huge) {
  1811. // leave source and destination operands unchanged
  1812. // fill in part of the FP IEEE record
  1813. Fill_FPIEEE_RECORD (XmmiEnv);
  1814. XmmiEnv->Ieee->Status.Overflow = 1;
  1815. XmmiEnv->Flags |= O_MASK;
  1816. XmmiEnv->Ieee->Cause.Overflow = 1;
  1817. XmmiEnv->Ieee->Result.OperandValid = 0;
  1818. #ifdef _DEBUG_FPU
  1819. // read status word
  1820. sw = _status87 ();
  1821. out_top = (sw >> 11) & 0x07;
  1822. if (in_top != out_top) {
  1823. printf ("XMMI_FP_Emulate () ERROR 7: in_top =%d != out_top = %d\n",
  1824. in_top, out_top);
  1825. exit (1);
  1826. }
  1827. #endif
  1828. return (ExceptionRaised);
  1829. }
  1830. // at this point, there are no enabled exceptions, or an enabled
  1831. // inexact exception; return the result from res24
  1832. // if inexact traps are enabled and result is inexact, take inexact trap
  1833. if (!(XmmiEnv->Masks & P_MASK) &&
  1834. ((sw & _SW_INEXACT) || (XmmiEnv->Fz && result_tiny))) {
  1835. // fill in part of the FP IEEE record
  1836. Fill_FPIEEE_RECORD (XmmiEnv);
  1837. XmmiEnv->Ieee->Status.Inexact = 1;
  1838. XmmiEnv->Flags |= P_MASK;
  1839. XmmiEnv->Ieee->Cause.Inexact = 1;
  1840. XmmiEnv->Ieee->Result.OperandValid = 1;
  1841. if (result_tiny) {
  1842. XmmiEnv->Ieee->Status.Underflow = 1;
  1843. XmmiEnv->Flags |= U_MASK;
  1844. // Note: the condition above is equivalent to
  1845. // if (sw & _SW_UNDERFLOW) XmmiEnv->Ieee->Status.Underflow = 1;
  1846. }
  1847. if (result_huge) {
  1848. XmmiEnv->Ieee->Status.Overflow = 1;
  1849. XmmiEnv->Flags |= O_MASK;
  1850. // Note: the condition above is equivalent to
  1851. // if (sw & _SW_OVERFLOW) XmmiEnv->Ieee->Status.Overflow = 1;
  1852. }
  1853. // if ftz = 1 and result is tiny, result = 0.0
  1854. // (no need to check for underflow traps disabled: result tiny and
  1855. // underflow traps enabled would have caused taking an underflow
  1856. // trap above)
  1857. if (XmmiEnv->Fz && result_tiny) {
  1858. // Note: the condition above is equivalent to
  1859. // if (XmmiEnv->Fz && (sw & _SW_UNDERFLOW))
  1860. if (res24 > 0.0) // it might already be +0.0 from fmulp and/or fstp
  1861. res24 = ZEROF;
  1862. else if (res24 < 0.0) // it might already be -0.0 from fmulp/fstp
  1863. res24 = NZEROF;
  1864. // else leave res24 unchanged
  1865. }
  1866. XmmiEnv->Ieee->Result.Value.Fp32Value = res24;
  1867. #ifdef _DEBUG_FPU
  1868. // read status word
  1869. sw = _status87 ();
  1870. out_top = (sw >> 11) & 0x07;
  1871. if (in_top != out_top) {
  1872. printf ("XMMI_FP_Emulate () ERROR 8: in_top =%d != out_top = %d\n",
  1873. in_top, out_top);
  1874. exit (1);
  1875. }
  1876. #endif
  1877. return (ExceptionRaised);
  1878. }
  1879. // if it got here, then there is no trap to be taken
  1880. XmmiEnv->Ieee->Result.OperandValid = 1;
  1881. if (sw & _SW_UNDERFLOW) {
  1882. XmmiEnv->Ieee->Status.Underflow = 1;
  1883. XmmiEnv->Flags |= U_MASK;
  1884. } else {
  1885. XmmiEnv->Ieee->Status.Underflow = 0;
  1886. }
  1887. if (sw & _SW_OVERFLOW) {
  1888. XmmiEnv->Ieee->Status.Overflow = 1;
  1889. XmmiEnv->Flags |= O_MASK;
  1890. } else {
  1891. XmmiEnv->Ieee->Status.Overflow = 0;
  1892. }
  1893. if (sw & _SW_INEXACT) {
  1894. XmmiEnv->Ieee->Status.Inexact = 1;
  1895. XmmiEnv->Flags |= P_MASK;
  1896. } else {
  1897. XmmiEnv->Ieee->Status.Inexact = 0;
  1898. }
  1899. // if ftz = 1, and result is tiny (underflow traps must be disabled),
  1900. // result = 0.0
  1901. if (XmmiEnv->Fz && result_tiny) {
  1902. if (res24 > 0.0)
  1903. res24 = ZEROF;
  1904. else if (res24 < 0.0)
  1905. res24 = NZEROF;
  1906. // else leave res24 unchanged
  1907. XmmiEnv->Ieee->Status.Inexact = 1;
  1908. XmmiEnv->Flags |= P_MASK;
  1909. XmmiEnv->Ieee->Status.Underflow = 1;
  1910. XmmiEnv->Flags |= U_MASK;
  1911. }
  1912. XmmiEnv->Ieee->Result.Value.Fp32Value = res24;
  1913. // note that there is no way to
  1914. // communicate to the caller that the denormal flag was set - we count
  1915. // on the XMMI instruction to have set the denormal flag in MXCSR if
  1916. // needed, regardless of the other components of the input operands
  1917. // (invalid or not; the caller will have to update the underflow,
  1918. // overflow, and inexact flags in MXCSR)
  1919. if (sw & _SW_INVALID) {
  1920. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1921. XmmiEnv->Flags |= I_MASK;
  1922. } else {
  1923. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  1924. }
  1925. #ifdef _DEBUG_FPU
  1926. // read status word
  1927. sw = _status87 ();
  1928. out_top = (sw >> 11) & 0x07;
  1929. if (in_top != out_top) {
  1930. printf ("XMMI_FP_Emulate () ERROR 9: in_top =%d != out_top = %d\n",
  1931. in_top, out_top);
  1932. exit (1);
  1933. }
  1934. #endif
  1935. return (NoExceptionRaised);
  1936. break;
  1937. case OP_MAXPD:
  1938. case OP_MAXSD:
  1939. case OP_MINPD:
  1940. case OP_MINSD:
  1941. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value;
  1942. opd2 = XmmiEnv->Ieee->Operand2.Value.Fp64Value;
  1943. if (XmmiEnv->Daz) {
  1944. if (isdenormald (opd1)) opd1 = opd1 * 0.0;
  1945. if (isdenormald (opd2)) opd2 = opd2 * 0.0;
  1946. }
  1947. // adjust operation code
  1948. switch (XmmiEnv->Ieee->Operation) {
  1949. case OP_MAXPD:
  1950. case OP_MAXSD:
  1951. XmmiEnv->Ieee->Operation = _FpCodeFmax;
  1952. break;
  1953. case OP_MINPD:
  1954. case OP_MINSD:
  1955. XmmiEnv->Ieee->Operation = _FpCodeFmin;
  1956. break;
  1957. default:
  1958. ; // will never occur
  1959. }
  1960. // check whether an invalid exception has to be raised
  1961. if (isnand (opd1) || isnand (opd2))
  1962. invalid_exc = 1;
  1963. else
  1964. invalid_exc = 0;
  1965. // if invalid_exc = 1, and invalid exceptions are enabled, take trap
  1966. if (invalid_exc && !(XmmiEnv->Masks & I_MASK)) {
  1967. // fill in part of the FP IEEE record
  1968. Fill_FPIEEE_RECORD (XmmiEnv);
  1969. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1970. XmmiEnv->Flags |= I_MASK;
  1971. // Cause = Enable & Status
  1972. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  1973. #ifdef _DEBUG_FPU
  1974. // read status word
  1975. sw = _status87 ();
  1976. out_top = (sw >> 11) & 0x07;
  1977. if (in_top != out_top) {
  1978. printf ("XMMI2_FP_Emulation () ERROR 23: in_top =%d != out_top = "
  1979. "%d\n", in_top, out_top);
  1980. exit (1);
  1981. }
  1982. #endif
  1983. return (ExceptionRaised);
  1984. }
  1985. // checking for NaN operands has priority over denormal exceptions
  1986. if (invalid_exc) {
  1987. XmmiEnv->Ieee->Result.OperandValid = 1;
  1988. XmmiEnv->Ieee->Result.Value.Fp64Value = opd2;
  1989. XmmiEnv->Ieee->Status.Inexact = 0;
  1990. XmmiEnv->Ieee->Status.Underflow = 0;
  1991. XmmiEnv->Ieee->Status.Overflow = 0;
  1992. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  1993. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  1994. XmmiEnv->Flags |= I_MASK;
  1995. #ifdef _DEBUG_FPU
  1996. // read status word
  1997. sw = _status87 ();
  1998. out_top = (sw >> 11) & 0x07;
  1999. if (in_top != out_top) {
  2000. printf ("XMMI2_FP_Emulation () ERROR 24: in_top =%d != out_top = "
  2001. "%d\n", in_top, out_top);
  2002. exit (1);
  2003. }
  2004. #endif
  2005. return (NoExceptionRaised);
  2006. }
  2007. // check whether a denormal exception has to be raised
  2008. if (isdenormald (opd1) || isdenormald (opd2)) {
  2009. denormal_exc = 1;
  2010. XmmiEnv->Flags |= D_MASK;
  2011. } else {
  2012. denormal_exc = 0;
  2013. }
  2014. // if denormal_exc = 1, and denormal exceptions are enabled, take trap
  2015. if (denormal_exc && !(XmmiEnv->Masks & D_MASK)) {
  2016. // fill in part of the FP IEEE record
  2017. Fill_FPIEEE_RECORD (XmmiEnv);
  2018. // Note: the exception code is STATUS_FLOAT_INVALID in this case
  2019. #ifdef _DEBUG_FPU
  2020. // read status word
  2021. sw = _status87 ();
  2022. out_top = (sw >> 11) & 0x07;
  2023. if (in_top != out_top) {
  2024. printf ("XMMI2_FP_Emulation () ERROR 25: in_top =%d != out_top = "
  2025. "%d\n", in_top, out_top);
  2026. exit (1);
  2027. }
  2028. #endif
  2029. return (ExceptionRaised);
  2030. }
  2031. // no exception has to be raised, and no operand is a NaN; calculate
  2032. // and deliver the result
  2033. if (opd1 < opd2) {
  2034. switch (XmmiEnv->Ieee->Operation) {
  2035. case _FpCodeFmax:
  2036. XmmiEnv->Ieee->Result.Value.Fp64Value = opd2;
  2037. break;
  2038. case _FpCodeFmin:
  2039. XmmiEnv->Ieee->Result.Value.Fp64Value = opd1;
  2040. break;
  2041. default:
  2042. ; // will never occur
  2043. }
  2044. } else if (opd1 > opd2) {
  2045. switch (XmmiEnv->Ieee->Operation) {
  2046. case _FpCodeFmax:
  2047. XmmiEnv->Ieee->Result.Value.Fp64Value = opd1;
  2048. break;
  2049. case _FpCodeFmin:
  2050. XmmiEnv->Ieee->Result.Value.Fp64Value = opd2;
  2051. break;
  2052. default:
  2053. ; // will never occur
  2054. }
  2055. } else if (opd1 == opd2) {
  2056. XmmiEnv->Ieee->Result.Value.Fp64Value = opd2;
  2057. } else { // could eliminate this case
  2058. #ifdef _DEBUG_FPU
  2059. fprintf (stderr,
  2060. "XMMI2_FP_Emulation () INTERNAL XMMI2_FP_Emulation () ERROR for MAXPS/MAXSS/MINPS/MINSS\n");
  2061. #endif
  2062. }
  2063. XmmiEnv->Ieee->Result.OperandValid = 1;
  2064. XmmiEnv->Ieee->Status.Inexact = 0;
  2065. XmmiEnv->Ieee->Status.Underflow = 0;
  2066. XmmiEnv->Ieee->Status.Overflow = 0;
  2067. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  2068. // Note that the denormal flag will not be updated by _fpieee_flt (),
  2069. // even if an operand is denormal
  2070. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  2071. #ifdef _DEBUG_FPU
  2072. // read status word
  2073. sw = _status87 ();
  2074. out_top = (sw >> 11) & 0x07;
  2075. if (in_top != out_top) {
  2076. printf ("XMMI2_FP_Emulation () ERROR 26: in_top =%d != out_top = %d\n",
  2077. in_top, out_top);
  2078. exit (1);
  2079. }
  2080. #endif
  2081. return (NoExceptionRaised);
  2082. break;
  2083. case OP_SQRTPD:
  2084. case OP_SQRTSD:
  2085. opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value;
  2086. if (XmmiEnv->Daz) {
  2087. if (isdenormald (opd1)) opd1 = opd1 * 0.0;
  2088. }
  2089. // adjust operation code
  2090. XmmiEnv->Ieee->Operation = _FpCodeSquareRoot;
  2091. // execute the operation and check whether the invalid, denormal, or
  2092. // inexact flags are set and the respective exceptions enabled
  2093. switch (XmmiEnv->Rc) {
  2094. case _FpRoundNearest:
  2095. rc = _RC_NEAR;
  2096. break;
  2097. case _FpRoundMinusInfinity:
  2098. rc = _RC_DOWN;
  2099. break;
  2100. case _FpRoundPlusInfinity:
  2101. rc = _RC_UP;
  2102. break;
  2103. case _FpRoundChopped:
  2104. rc = _RC_CHOP;
  2105. break;
  2106. default:
  2107. ; // internal error
  2108. }
  2109. _control87 (rc | _PC_53 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC);
  2110. // perform the square root
  2111. __asm {
  2112. fnclex;
  2113. fld QWORD PTR opd1; // may set the denormal or invalid status flags
  2114. fsqrt; // may set the inexact or invalid status flags
  2115. fstp QWORD PTR res; // exact
  2116. }
  2117. // read status word
  2118. sw = _status87 ();
  2119. if (sw & _SW_INVALID) sw = sw & ~0x00080000; // clr D flag for sqrt(-den)
  2120. // if invalid flag is set, and invalid exceptions are enabled, take trap
  2121. if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) {
  2122. // fill in part of the FP IEEE record
  2123. Fill_FPIEEE_RECORD (XmmiEnv);
  2124. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  2125. XmmiEnv->Flags |= I_MASK;
  2126. // Cause = Enable & Status
  2127. XmmiEnv->Ieee->Cause.InvalidOperation = 1;
  2128. #ifdef _DEBUG_FPU
  2129. // read status word
  2130. sw = _status87 ();
  2131. out_top = (sw >> 11) & 0x07;
  2132. if (in_top != out_top) {
  2133. printf ("XMMI2_FP_Emulation () ERROR 27: in_top =%d != out_top = "
  2134. "%d\n", in_top, out_top);
  2135. exit (1);
  2136. }
  2137. #endif
  2138. return (ExceptionRaised);
  2139. }
  2140. if (sw & _SW_DENORMAL) XmmiEnv->Flags |= D_MASK;
  2141. // if denormal flag is set, and denormal exceptions are enabled, take trap
  2142. if (!(XmmiEnv->Masks & D_MASK) && (sw & _SW_DENORMAL)) {
  2143. // fill in part of the FP IEEE record
  2144. Fill_FPIEEE_RECORD (XmmiEnv);
  2145. #ifdef _DEBUG_FPU
  2146. // read status word
  2147. sw = _status87 ();
  2148. out_top = (sw >> 11) & 0x07;
  2149. if (in_top != out_top) {
  2150. printf ("XMMI2_FP_Emulation () ERROR 28: in_top =%d != out_top = %d\n",
  2151. in_top, out_top);
  2152. exit (1);
  2153. }
  2154. #endif
  2155. return (ExceptionRaised);
  2156. }
  2157. // the result cannot be tiny
  2158. // at this point, there are no enabled I or D or exceptions; the instr.
  2159. // might lead to an enabled inexact exception or to no exception (this
  2160. // includes the case of a NaN or negative operand); exceptions must have
  2161. // been disabled before calling this function; an inexact exception is
  2162. // reported on the fsqrt
  2163. // if (the MXCSR inexact traps are disabled or the inexact flag is clear)
  2164. // then deliver the result (the status flags are sticky, so they are
  2165. // all set correctly already)
  2166. // if it got here, then there is either an inexact trap to be taken, or
  2167. // no trap at all
  2168. XmmiEnv->Ieee->Result.Value.Fp64Value = res; // exact
  2169. // if inexact traps are enabled and result is inexact, take inexact trap
  2170. if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) {
  2171. // fill in part of the FP IEEE record
  2172. Fill_FPIEEE_RECORD (XmmiEnv);
  2173. XmmiEnv->Ieee->Status.Inexact = 1;
  2174. XmmiEnv->Flags |= P_MASK;
  2175. XmmiEnv->Ieee->Cause.Inexact = 1;
  2176. XmmiEnv->Ieee->Result.OperandValid = 1;
  2177. #ifdef _DEBUG_FPU
  2178. // read status word
  2179. sw = _status87 ();
  2180. out_top = (sw >> 11) & 0x07;
  2181. if (in_top != out_top) {
  2182. printf ("XMMI2_FP_Emulation () ERROR 29: in_top =%d != out_top = %d\n",
  2183. in_top, out_top);
  2184. exit (1);
  2185. }
  2186. #endif
  2187. return (ExceptionRaised);
  2188. }
  2189. // no trap was taken
  2190. XmmiEnv->Ieee->Result.OperandValid = 1;
  2191. XmmiEnv->Ieee->Status.Underflow = 0;
  2192. XmmiEnv->Ieee->Status.Overflow = 0;
  2193. if (sw & _SW_INEXACT) {
  2194. XmmiEnv->Ieee->Status.Inexact = 1;
  2195. XmmiEnv->Flags |= P_MASK;
  2196. } else {
  2197. XmmiEnv->Ieee->Status.Inexact = 0;
  2198. }
  2199. // note that there is no way to
  2200. // communicate to the caller that the denormal flag was set - we count
  2201. // on the XMMI instruction to have set the denormal flag in MXCSR if
  2202. // needed, regardless of the other components of the input operands
  2203. // (invalid or not); the caller will have to update the inexact flag
  2204. // in MXCSR
  2205. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  2206. if (sw & _SW_INVALID) {
  2207. XmmiEnv->Ieee->Status.InvalidOperation = 1;
  2208. XmmiEnv->Flags = I_MASK; // no other flags set if invalid is set
  2209. } else {
  2210. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  2211. }
  2212. #ifdef _DEBUG_FPU
  2213. // read status word
  2214. sw = _status87 ();
  2215. out_top = (sw >> 11) & 0x07;
  2216. if (in_top != out_top) {
  2217. printf ("XMMI2_FP_Emulation () ERROR 30: in_top =%d != out_top = %d\n",
  2218. in_top, out_top);
  2219. exit (1);
  2220. }
  2221. #endif
  2222. return (NoExceptionRaised);
  2223. break;
  2224. case OP_UNSPEC:
  2225. #ifdef _DEBUG_FPU
  2226. fprintf (stderr,
  2227. "XMMI2_FP_Emulation internal error: unknown operation code OP_UNSPEC\n");
  2228. #endif
  2229. break;
  2230. default:
  2231. #ifdef _DEBUG_FPU
  2232. fprintf (stderr,
  2233. "XMMI2_FP_Emulation internal error: unknown operation code %d\n",
  2234. XmmiEnv->Ieee->Operation);
  2235. #endif
  2236. break;
  2237. }
  2238. }
  2239. static int
  2240. iszerod (double d)
  2241. {
  2242. unsigned int *dp;
  2243. dp = (unsigned int *)&d;
  2244. if (((dp[1] & 0x7fffffff) == 0x0) && (dp[0] == 0x0))
  2245. return (1);
  2246. else
  2247. return (0);
  2248. }
  2249. static int
  2250. isinfd (double d)
  2251. {
  2252. unsigned int *dp;
  2253. dp = (unsigned int *)&d;
  2254. if (((dp[1] & 0x7fffffff) == 0x7ff80000) && (dp[0] == 0x0))
  2255. return (1);
  2256. else
  2257. return (0);
  2258. }
  2259. static int
  2260. issnand (double d)
  2261. {
  2262. unsigned int *dp;
  2263. dp = (unsigned int *)&d;
  2264. if (((dp[1] & 0x7ff80000) == 0x7ff00000) &&
  2265. (((dp[1] & 0x0007ffff) != 0) || (dp[0] != 0)))
  2266. return (1);
  2267. else
  2268. return (0);
  2269. }
  2270. static int
  2271. isnand (double d)
  2272. {
  2273. unsigned int *qp;
  2274. qp = (unsigned int *)&d;
  2275. if (((qp[1] & 0x7ff00000) == 0x7ff00000) &&
  2276. (((qp[1] & 0x000fffff) != 0x00000000) || qp[0] != 0x00000000))
  2277. return (1);
  2278. else
  2279. return (0);
  2280. }
  2281. static double
  2282. quietd (double d)
  2283. {
  2284. // makes a signaling NaN quiet, and leaves a quiet NaN unchanged; does
  2285. // not check that the input value d is a NaN
  2286. unsigned int *dp;
  2287. dp = (unsigned int *)&d;
  2288. dp[1] = dp[1] | 0x00080000;
  2289. return (d);
  2290. }
  2291. static int
  2292. isdenormald (double d)
  2293. {
  2294. unsigned int *dp;
  2295. dp = (unsigned int *)&d;
  2296. if (((dp[1] & 0x7ff00000) == 0x0) &&
  2297. (((dp[1] & 0x000fffff) != 0) || (dp[0] != 0)))
  2298. return (1);
  2299. else
  2300. return (0);
  2301. }
  2302. static int
  2303. isdenormalf (float f)
  2304. {
  2305. // checks whether f is a denormal
  2306. unsigned int *fp;
  2307. fp = (unsigned int *)&f;
  2308. if ((fp[0] & 0x7f800000) == 0x0 && (fp[0] & 0x007fffff) != 0x0)
  2309. return (1);
  2310. else
  2311. return (0);
  2312. }
  2313. static void Fill_FPIEEE_RECORD (PXMMI_ENV XmmiEnv)
  2314. {
  2315. // fill in part of the FP IEEE record
  2316. XmmiEnv->Ieee->RoundingMode = XmmiEnv->Rc;
  2317. XmmiEnv->Ieee->Precision = XmmiEnv->Precision;
  2318. XmmiEnv->Ieee->Enable.Inexact = !(XmmiEnv->Masks & P_MASK);
  2319. XmmiEnv->Ieee->Enable.Underflow = !(XmmiEnv->Masks & U_MASK);
  2320. XmmiEnv->Ieee->Enable.Overflow = !(XmmiEnv->Masks & O_MASK);
  2321. XmmiEnv->Ieee->Enable.ZeroDivide = !(XmmiEnv->Masks & Z_MASK);
  2322. XmmiEnv->Ieee->Enable.InvalidOperation = !(XmmiEnv->Masks & I_MASK);
  2323. XmmiEnv->Ieee->Status.Inexact = 0;
  2324. XmmiEnv->Ieee->Status.Underflow = 0;
  2325. XmmiEnv->Ieee->Status.Overflow = 0;
  2326. XmmiEnv->Ieee->Status.ZeroDivide = 0;
  2327. XmmiEnv->Ieee->Status.InvalidOperation = 0;
  2328. // Cause = Enable & Status
  2329. XmmiEnv->Ieee->Cause.Inexact = 0;
  2330. XmmiEnv->Ieee->Cause.Underflow = 0;
  2331. XmmiEnv->Ieee->Cause.Overflow = 0;
  2332. XmmiEnv->Ieee->Cause.ZeroDivide = 0;
  2333. XmmiEnv->Ieee->Cause.InvalidOperation = 0;
  2334. }