/***************************************************************************** * * * Intel Confidential * * * * * * XMMI2_FP_emulate(): WNI FP instruction emulation for the FP IEEE filter * * * * * * History: * * Marius Cornea-Hasegan, Nov 1999; added DAZ Oct 2000 * * marius.cornea@intel.com * * * *****************************************************************************/ //#define _DEBUG_FPU // XMMI2_FP_Emulation () receives the input operands of an XMMI2 FP instruction // (operating on double-precision floating-point numbers and/or signed // integers), that might cause a floating-point exception (enabled or not). // // Arguments: PXMMI_ENV XmmiEnv // // The type of every field (INPUT or OUTPUT) is indicated below: // // typedef struct _XMMI_ENV { // ULONG Masks; //Mask values from MxCsr INPUT // ULONG Fz; //Flush to Zero INPUT // ULONG Rc; //Rounding INPUT // ULONG Precision; //Precision INPUT // ULONG Imm8; //imm8 predicate INPUT // ULONG EFlags; //EFlags INPUT/OUTPUT // _FPIEEE_RECORD *Ieee; //FP IEEE Record INPUT/OUTPUT, // field dependent // } XMMI_ENV, *PXMMI_ENV; // // The _FP_IEEE record and the _FPIEEE_VALUE are defined as: // // typedef struct { // unsigned int RoundingMode : 2; OUTPUT // unsigned int Precision : 3; OUTPUT // unsigned int Operation :12; INPUT // _FPIEEE_EXCEPTION_FLAGS Cause; OUTPUT // _FPIEEE_EXCEPTION_FLAGS Enable; OUTPUT // _FPIEEE_EXCEPTION_FLAGS Status; OUTPUT // _FPIEEE_VALUE Operand1; INPUT // _FPIEEE_VALUE Operand2; INPUT // _FPIEEE_VALUE Result; INPUT/OUTPUT, // field dependent // } _FPIEEE_RECORD, *PFPIEEE_RECORD; // // typedef struct { // union { // _FP32 Fp64Value; // _FP64 Fp64Value; // _FP80 Fp80Value; // _FP128 Fp128Value; // _I16 I16Value; // _I32 I32Value; // _I64 I64Value; // _U16 U16Value; // _U32 U32Value; // _U64 U64Value; // _BCD80 Bcd80Value; // char *StringValue; // int CompareValue; // } Value; INPUT for operands, // OUTPUT for result // // unsigned int OperandValid : 1; INPUT for operands // INPUT/OUTPUT for result // unsigned int Format : 4; INPUT // // } _FPIEEE_VALUE; // // Return Value: // ExceptionRaised if an enabled floating-point exception condition is // detected; in this case, the fields of XmmiEnv->Ieee are filled in // appropriately to be passed directly to a user exception handler; the // XmmiEnv->Ieee->Cause bits indicate the cause of the exception, but if // a denormal exception occurred, then no XmmiEnv->Ieee->Cause bit is set; // upon return from the user handler,the caller of XMMI2_FP_emulate should // interpret the result for a compare instruction (CMPPD, CMPPD, COMISD, // UCOMISD); the Enable, Rounding, and Precision fields in _FPIEEE_RECORD // have to be checked too for possible changes by the user handler // // NoExceptionRaised if no floating-point exception condition occurred, or // if a disabled floating-point exception occurred; in this case, // XmmiEnv->Ieee->Result.Value contains the instruction's result, // XmmiEnv->Ieee->Status contains the IEEE floating-point status flags // // Implementation Notes: // // - the operation code in XmmiEnv->Ieee->Operation is changed as expected // by a user exception handler (even if no exception is raised): // from OP_ADDPD, OP_ADDSD to _FpCodeAdd // from OP_SUBPD, OP_SUBSD to _FpCodeSubtract // from OP_MULPD, OP_MULSD to _FpCodeMultiply // from OP_DIVPD, OP_DIVSD to _FpCodeDivide // from OP_CMPPD, OP_CMPSD to _FpCodeCompare // from OP_COMISD, OP_UCOMISD to _FpCodeCompare // from OP_CVTDQ2PS, OP_CVTPS2DQ, OP_CVTPD2PI, OP_CVTSD2SI, OP_CVTPD2DQ, // OP_CVTPS2PD, OP_CVTSS2SD, OP_CVTPD2PS, OP_CVTSD2SS to _FpCodeConvert // from OP_CVTTPS2DQ, OP_CVTTPD2PI, OP_CVTTSD2SI, OP_CVTTPD2DQ // to _FpCodeConvertTrunc // from OP_MAXPD, OP_MAXSD to _FpCodeMax // from OP_MINPD, OP_MINSD to _FpCodeMin // from OP_SQRTPD, OP_SQRTSD to _FpCodeSquareRoot // // #include #include #include #include "xmmi_types.h" #include "filter.h" #ifdef _XMMI_DEBUG #include "temp_context.h" #include "debug.h" #endif // masks for individual status word bits #define P_MASK 0x20 #define U_MASK 0x10 #define O_MASK 0x08 #define Z_MASK 0x04 #define D_MASK 0x02 #define I_MASK 0x01 // 32-bit constants static unsigned ZEROFA[] = {0x00000000}; #define ZEROF *(float *) ZEROFA static unsigned NZEROFA[] = {0x80000000}; #define NZEROF *(float *) NZEROFA // 64-bit constants static unsigned ZERODA[] = {0x00000000, 0x00000000}; #define ZEROD *(double *) ZERODA static unsigned NZERODA[] = {0x00000000, 0x80000000}; #define NZEROD *(double *) NZERODA static unsigned POSINFDA[] = {0x00000000, 0x7ff00000}; #define POSINFD *(float *)POSINFDA static unsigned NEGINFDA[] = {0x00000000, 0xfff00000}; #define NEGINFD *(float *)NEGINFDA #ifdef _DEBUG_FPU static unsigned QNANINDEFDA[] = {0x00000000, 0xffc00000}; #define QNANINDEFD *(float *)QNANINDEFDA #endif static unsigned MIN_SINGLE_NORMALA [] = {0x00000000, 0x38100000}; // +1.0 * 2^-126 #define MIN_SINGLE_NORMAL *(double *)MIN_SINGLE_NORMALA static unsigned MAX_SINGLE_NORMALA [] = {0x70000000, 0x47efffff}; // +1.1...1*2^127 #define MAX_SINGLE_NORMAL *(double *)MAX_SINGLE_NORMALA static unsigned TWO_TO_192A[] = {0x00000000, 0x4bf00000}; #define TWO_TO_192 *(double *)TWO_TO_192A static unsigned TWO_TO_M192A[] = {0x00000000, 0x33f00000}; #define TWO_TO_M192 *(double *)TWO_TO_M192A // 80-bit constants static unsigned POSINFDEA[] = {0x00000000, 0x80000000, 0x00007fff}; #define POSINFDE *(float *)POSINFDEA static unsigned NEGINFDEA[] = {0x00000000, 0x80000000, 0x0000ffff}; #define NEGINFDE *(float *)NEGINFDEA static unsigned MIN_DOUBLE_NORMALA [] = {0x00000000, 0x80000000, 0x00003c01}; // +1.0 * 2^-1022 #define MIN_DOUBLE_NORMAL *(double *)MIN_DOUBLE_NORMALA static unsigned MAX_DOUBLE_NORMALA [] = {0xfffff800, 0xffffffff, 0x000043fe}; // +1.1...1*2^1023 #define MAX_DOUBLE_NORMAL *(double *)MAX_DOUBLE_NORMALA static unsigned TWO_TO_1536A[] = {0x00000000, 0x80000000, 0x000045ff}; #define TWO_TO_1536 *(double *)TWO_TO_1536A static unsigned TWO_TO_M1536A[] = {0x00000000, 0x80000000, 0x000039ff}; #define TWO_TO_M1536 *(double *)TWO_TO_M1536A // auxiliary functions static void Fill_FPIEEE_RECORD (PXMMI_ENV XmmiEnv); static int iszerod (double); static int isinfd (double); static int issnand (double); static int isnand (double); static double quietd (double); static int isdenormald (double); static int isdenormalf (float f); ULONG XMMI2_FP_Emulation (PXMMI_ENV XmmiEnv) { float opd24, res24; double opd1, opd2, res, dbl_res24; int iopd1; // for conversions from int to float int ires; // for conversions from float to int char dbl_ext_res64[10]; // needed to check tininess, to provide a scaled result to // an underflow/overflow trap handler, and in flush-to-zero double dbl_res64; unsigned int result_tiny; unsigned int result_huge; unsigned int rc, sw; unsigned short sw1, sw2, sw3, sw4; unsigned long imm8; unsigned int invalid_exc; unsigned int denormal_exc; unsigned __int64 cmp_res; unsigned char min_double_normal[10]; unsigned char max_double_normal[10]; unsigned char posinfde[10]; unsigned char neginfde[10]; unsigned char two_to_1536[10]; unsigned char two_to_m1536[10]; // Note that ExceptionCode is always STATUS_FLOAT_MULTIPLE_FAULTS in the // calling routine, so we have to check first for faults, and then for traps #ifdef _DEBUG_FPU unsigned int in_top; unsigned int out_top; char fp_env[108]; unsigned short int *control_word, *status_word, *tag_word; // read status word sw = _status87 (); in_top = (sw >> 11) & 0x07; if (in_top != 0x0) printf ("XMMI2_FP_Emulation WARNING: in_top = %d\n", in_top); __asm { fnsave fp_env; } control_word = (unsigned short *)fp_env; status_word = (unsigned short *)(fp_env + 2); tag_word = (unsigned short *)(fp_env + 8); if (*tag_word != 0xffff) printf ("XMMI2_FP_Emulation WARNING: tag_word = %x\n", *tag_word); #endif _asm { fninit; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); in_top = (sw >> 11) & 0x07; if (in_top != 0x0) printf ("XMMI2_FP_Emulation () XMMI2_FP_Emulation () ERROR: in_top = %d\n", in_top); __asm { fnsave fp_env; } tag_word = (unsigned short *)(fp_env + 8); if (*tag_word != 0xffff) { printf ("XMMI2_FP_Emulation () XMMI2_FP_Emulation () ERROR: tag_word = %x\n", *tag_word); printf ("control, status, tag = %x %x %x %x %x %x\n", fp_env[0] & 0xff, fp_env[1] & 0xff, fp_env[4] & 0xff, fp_env[5] & 0xff, fp_env[8] & 0xff, fp_env[9] & 0xff); } #endif #ifdef _XMMI_DEBUG print_FPIEEE_RECORD (XmmiEnv); #endif result_tiny = 0; result_huge = 0; XmmiEnv->Ieee->RoundingMode = XmmiEnv->Rc; XmmiEnv->Ieee->Precision = XmmiEnv->Precision; switch (XmmiEnv->Ieee->Operation) { case OP_ADDPD: case OP_ADDSD: case OP_SUBPD: case OP_SUBSD: case OP_MULPD: case OP_MULSD: case OP_DIVPD: case OP_DIVSD: opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value; opd2 = XmmiEnv->Ieee->Operand2.Value.Fp64Value; if (XmmiEnv->Daz) { if (isdenormald (opd1)) opd1 = opd1 * 0.0; if (isdenormald (opd2)) opd2 = opd2 * 0.0; } // adjust operation code switch (XmmiEnv->Ieee->Operation) { case OP_ADDPD: case OP_ADDSD: XmmiEnv->Ieee->Operation = _FpCodeAdd; break; case OP_SUBPD: case OP_SUBSD: XmmiEnv->Ieee->Operation = _FpCodeSubtract; break; case OP_MULPD: case OP_MULSD: XmmiEnv->Ieee->Operation = _FpCodeMultiply; break; case OP_DIVPD: case OP_DIVSD: XmmiEnv->Ieee->Operation = _FpCodeDivide; break; default: ; // will never occur } // execute the operation and check whether the invalid, denormal, or // divide by zero flags are set and the respective exceptions enabled switch (XmmiEnv->Rc) { case _FpRoundNearest: rc = _RC_NEAR; break; case _FpRoundMinusInfinity: rc = _RC_DOWN; break; case _FpRoundPlusInfinity: rc = _RC_UP; break; case _FpRoundChopped: rc = _RC_CHOP; break; default: ; // internal error } _control87 (rc | _PC_53 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC); // compute result and round to the destination precision, with // "unbounded" exponent (first IEEE rounding) switch (XmmiEnv->Ieee->Operation) { case _FpCodeAdd: // perform the add __asm { fnclex; // load input operands fld QWORD PTR opd1; // may set the denormal or invalid status flags fld QWORD PTR opd2; // may set the denormal or invalid status flags faddp st(1), st(0); // may set the inexact or invalid status flags // store result fstp QWORD PTR dbl_res64; } __asm { fnclex; // load input operands fld QWORD PTR opd1; // may set the denormal or invalid status flags fld QWORD PTR opd2; // may set the denormal or invalid status flags faddp st(1), st(0); // may set the inexact or invalid status flags // store result fstp TBYTE PTR dbl_ext_res64; // exact } break; case _FpCodeSubtract: // perform the subtract __asm { fnclex; // load input operands fld QWORD PTR opd1; // may set the denormal or invalid status flags fld QWORD PTR opd2; // may set the denormal or invalid status flags fsubp st(1), st(0); // may set the inexact or invalid status flags // store result fstp QWORD PTR dbl_res64; } __asm { fnclex; // load input operands fld QWORD PTR opd1; // may set the denormal or invalid status flags fld QWORD PTR opd2; // may set the denormal or invalid status flags fsubp st(1), st(0); // may set the inexact or invalid status flags // store result fstp TBYTE PTR dbl_ext_res64; // exact } break; case _FpCodeMultiply: // perform the multiply __asm { fnclex; // load input operands fld QWORD PTR opd1; // may set the denormal or invalid status flags fld QWORD PTR opd2; // may set the denormal or invalid status flags fmulp st(1), st(0); // may set the inexact or invalid status flags // store result fstp QWORD PTR dbl_res64; // exact } __asm { fnclex; // load input operands fld QWORD PTR opd1; // may set the denormal or invalid status flags fld QWORD PTR opd2; // may set the denormal or invalid status flags fmulp st(1), st(0); // may set the inexact or invalid status flags // store result fstp TBYTE PTR dbl_ext_res64; // exact } break; case _FpCodeDivide: // perform the divide __asm { fnclex; // load input operands fld QWORD PTR opd1; // may set the denormal or invalid status flags fld QWORD PTR opd2; // may set the denormal or invalid status flags fdivp st(1), st(0); // may set the inexact, divide by zero, or // invalid status flags // store result fstp QWORD PTR dbl_res64; // exact } __asm { fnclex; // load input operands fld QWORD PTR opd1; // may set the denormal or invalid status flags fld QWORD PTR opd2; // may set the denormal or invalid status flags fdivp st(1), st(0); // may set the inexact, divide by zero, or // invalid status flags // store result fstp TBYTE PTR dbl_ext_res64; // exact } break; default: ; // will never occur } // read status word sw = _status87 (); if (sw & _SW_ZERODIVIDE) sw = sw & ~0x00080000; // clear D flag for den/0 // if invalid flag is set, and invalid exceptions are enabled, take trap if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; // Cause = Enable & Status XmmiEnv->Ieee->Cause.InvalidOperation = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 1: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // checking for NaN operands has priority over denormal exceptions; also // fix for the differences in treating two NaN inputs between the XMMI // instructions and other x86 instructions if (isnand (opd1) || isnand (opd2)) { XmmiEnv->Ieee->Result.OperandValid = 1; if (isnand (opd1) && isnand (opd2)) XmmiEnv->Ieee->Result.Value.Fp64Value = quietd (opd1); else XmmiEnv->Ieee->Result.Value.Fp64Value = dbl_res64; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; if (sw & _SW_INVALID) { XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; } else { XmmiEnv->Ieee->Status.InvalidOperation = 0; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 2: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); } // if denormal flag is set, and denormal exceptions are enabled, take trap if (!(XmmiEnv->Masks & D_MASK) && (sw & _SW_DENORMAL)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); // Note: the exception code is STATUS_FLOAT_INVALID in this case #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 3: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif XmmiEnv->Flags |= D_MASK; return (ExceptionRaised); } // if divide by zero flag is set, and divide by zero exceptions are // enabled, take trap (for divide only) if (!(XmmiEnv->Masks & Z_MASK) && (sw & _SW_ZERODIVIDE)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.ZeroDivide = 1; XmmiEnv->Flags |= Z_MASK; // Cause = Enable & Status XmmiEnv->Ieee->Cause.ZeroDivide = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 4: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // done if the result is a NaN (QNaN Indefinite) res = dbl_res64; if (isnand (res)) { #ifdef _DEBUG_FPU if (res != QNANINDEFD) fprintf (stderr, "XMMI2_FP_Emulation () INTERNAL XMMI2_FP_Emulation" " () ERROR: res = %f = %x is not QNaN Indefinite\n", (double)res, *(unsigned int *)&res); #endif XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Result.Value.Fp64Value = res; // exact XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; XmmiEnv->Ieee->Status.InvalidOperation = 1; // sw & _SW_INVALID true XmmiEnv->Flags |= I_MASK; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 5: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); } // dbl_ext_res64 is not a NaN at this point if (sw & _SW_DENORMAL) XmmiEnv->Flags |= D_MASK; // Note: (dbl_ext_res64 == 0.0 && sw & _SW_INEXACT) cannot occur // check if the result is tiny // if (-MIN_DOUBLE_NORMAL < dbl_ext_res64 && dbl_ext_res64 < 0.0 || // 0.0 < dbl_ext_res64 && dbl_ext_res64 < MIN_DOUBLE_NORMAL) // result_tiny = 1; memcpy (min_double_normal, (char *)MIN_DOUBLE_NORMALA, 10); memcpy (max_double_normal, (char *)MAX_DOUBLE_NORMALA, 10); memcpy (posinfde, (char *)POSINFDEA, 10); memcpy (neginfde, (char *)NEGINFDEA, 10); memcpy (two_to_1536, (char *)TWO_TO_1536A, 10); memcpy (two_to_m1536, (char *)TWO_TO_M1536A, 10); __asm { // -MIN_DOUBLE_NORMAL < dbl_ext_res64 fld TBYTE PTR dbl_ext_res64; fld TBYTE PTR min_double_normal; fchs; // -1.0*2^e_min in st(0) fcompp; // C3,C2,C0 = 001 if < fstsw WORD PTR sw1; // dbl_ext_res64 < 0.0 fldz; fld TBYTE PTR dbl_ext_res64; // dbl_ext_res64 in st(0) fcompp; // C3,C2,C0 = 001 if < fstsw WORD PTR sw2; // 0.0 < dbl_ext_res64 fld TBYTE PTR dbl_ext_res64; fldz; // 0.0 in st(0) fcompp; // C3,C2,C0 = 001 if < fstsw WORD PTR sw3; // dbl_ext_res64 < MIN_DOUBLE_NORMAL fld TBYTE PTR min_double_normal; fld TBYTE PTR dbl_ext_res64; // dbl_ext_res64 in st(0) fcompp; // C3,C2,C0 = 001 if < fstsw WORD PTR sw4; } if (((sw1 & 0x4500) == 0x0100) && ((sw2 & 0x4500) == 0x0100) || ((sw3 & 0x4500) == 0x0100) && ((sw4 & 0x4500) == 0x0100)) { result_tiny = 1; } // check if the result is huge // if (NEGINFDMasks & U_MASK) && result_tiny) { // dbl_ext_res64 = TWO_TO_1536 * dbl_ext_res64; // exact __asm { fld TBYTE PTR dbl_ext_res64; fld TBYTE PTR two_to_1536; fmulp st(1), st(0); // store result fstp QWORD PTR dbl_res64; } // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.Underflow = 1; XmmiEnv->Flags |= U_MASK; XmmiEnv->Ieee->Cause.Underflow = 1; XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Result.Value.Fp64Value = dbl_res64; // exact if (sw & _SW_INEXACT) { XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 6: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // if overflow traps are enabled and the result is huge, take // overflow trap if (!(XmmiEnv->Masks & O_MASK) && result_huge) { // dbl_ext_res64 = TWO_TO_M1536 * dbl_ext_res64; // exact __asm { fld TBYTE PTR dbl_ext_res64; fld TBYTE PTR two_to_m1536; fmulp st(1), st(0); // store result fstp QWORD PTR dbl_res64; } // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.Overflow = 1; XmmiEnv->Flags |= O_MASK; XmmiEnv->Ieee->Cause.Overflow = 1; XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Result.Value.Fp64Value = dbl_res64; // exact if (sw & _SW_INEXACT) { XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 7: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // set user rounding mode, 64-bit precision (to avoid a double rounding // error in case the result requires denormalization), and disable all // exceptions _control87 (rc | _PC_53 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC); // calculate result for the case an inexact trap has to be taken, or // when no trap occurs (second IEEE rounding) // may set P, U or O; may also involve denormalizing the result switch (XmmiEnv->Ieee->Operation) { case _FpCodeAdd: // perform the add __asm { // load input operands fld QWORD PTR opd1; // may set the denormal status flag fld QWORD PTR opd2; // may set the denormal status flag faddp st(1), st(0); // rounded to 64 bits, may set the inexact // or divide by zero status flags // store result fstp QWORD PTR res; } break; case _FpCodeSubtract: // perform the subtract __asm { // load input operands fld QWORD PTR opd1; // may set the denormal status flag fld QWORD PTR opd2; // may set the denormal status flag fsubp st(1), st(0); // rounded to 64 bits, may set the inexact // status flag // store result fstp QWORD PTR res; } break; case _FpCodeMultiply: // perform the multiply __asm { // load input operands fld QWORD PTR opd1; // may set the denormal status flag fld QWORD PTR opd2; // may set the denormal status flag fmulp st(1), st(0); // rounded to 64 bits, exact // store result fstp QWORD PTR res; } break; case _FpCodeDivide: // perform the divide __asm { // load input operands fld QWORD PTR opd1; // may set the denormal status flag fld QWORD PTR opd2; // may set the denormal status flag fdivp st(1), st(0); // rounded to 64 bits, may set the inexact // or divide by zero status flags // store result fstp QWORD PTR res; } break; default: ; // will never occur } // read status word sw = _status87 (); if ((sw & _SW_INEXACT) && result_tiny) sw = sw | _SW_UNDERFLOW; // if inexact traps are enabled and result is inexact, take inexact trap if (!(XmmiEnv->Masks & P_MASK) && ((sw & _SW_INEXACT) || (XmmiEnv->Fz && result_tiny))) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; XmmiEnv->Ieee->Cause.Inexact = 1; XmmiEnv->Ieee->Result.OperandValid = 1; if (result_tiny) { XmmiEnv->Ieee->Status.Underflow = 1; XmmiEnv->Flags |= U_MASK; // Note: the condition above is equivalent to // if (sw & _SW_UNDERFLOW) XmmiEnv->Ieee->Status.Underflow = 1; } if (result_huge) { XmmiEnv->Ieee->Status.Overflow = 1; XmmiEnv->Flags |= O_MASK; // Note: the condition above is equivalent to // if (sw & _SW_OVERFLOW) XmmiEnv->Ieee->Status.Overflow = 1; } // if ftz = 1 and result is tiny, result = 0.0 // (no need to check for underflow traps disabled: result tiny and // underflow traps enabled would have caused taking an underflow // trap above) if (XmmiEnv->Fz && result_tiny) { // Note: the condition above is equivalent to // if (XmmiEnv->Fz && (sw & _SW_UNDERFLOW)) if (res > 0.0) res = ZEROD; else if (res < 0.0) res = NZEROD; // else leave res unchanged } XmmiEnv->Ieee->Result.Value.Fp64Value = res; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 8: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // if it got here, then there is no trap to be taken; the following must // hold: ((the MXCSR U exceptions are disabled or // // the MXCSR underflow exceptions are enabled and the underflow flag is // clear and (the inexact flag is set or the inexact flag is clear and // the 53-bit result with unbounded exponent is not tiny))) // and (the MXCSR overflow traps are disabled or the overflow flag is // clear) and (the MXCSR inexact traps are disabled or the inexact flag // is clear) // // in this case, the result has to be delivered (the status flags are // sticky, so they are all set correctly already) #ifdef _DEBUG_FPU // error if the condition stated above does not hold if (!((XmmiEnv->Masks & U_MASK || (!(XmmiEnv->Masks & U_MASK) && !(sw & _SW_UNDERFLOW) && ((sw & _SW_INEXACT) || !(sw & _SW_INEXACT) && !result_tiny))) && ((XmmiEnv->Masks & O_MASK) || !(sw & _SW_OVERFLOW)) && ((XmmiEnv->Masks & P_MASK) || !(sw & _SW_INEXACT)))) { fprintf (stderr, "XMMI2_FP_Emulation () INTERNAL XMMI2_FP_Emulation () ERROR for " "ADDPS/ADDSS/SUBPS/SUBSS/MULPS/MULSS/DIVPS/DIVSS\n"); } #endif XmmiEnv->Ieee->Result.OperandValid = 1; if (sw & _SW_UNDERFLOW) { XmmiEnv->Ieee->Status.Underflow = 1; XmmiEnv->Flags |= U_MASK; } else { XmmiEnv->Ieee->Status.Underflow = 0; } if (sw & _SW_OVERFLOW) { XmmiEnv->Ieee->Status.Overflow = 1; XmmiEnv->Flags |= O_MASK; } else { XmmiEnv->Ieee->Status.Overflow = 0; } if (sw & _SW_INEXACT) { XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; } else { XmmiEnv->Ieee->Status.Inexact = 0; } // if ftz = 1, and result is tiny (underflow traps must be disabled), // result = 0.0 if (XmmiEnv->Fz && result_tiny) { if (res > 0.0) res = ZEROD; else if (res < 0.0) res = NZEROD; // else leave res unchanged XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; XmmiEnv->Ieee->Status.Underflow = 1; XmmiEnv->Flags |= U_MASK; } XmmiEnv->Ieee->Result.Value.Fp64Value = res; // note that there is no way to // communicate to the caller that the denormal flag was set - we count // on the XMMI instruction to have set the denormal flag in MXCSR if // needed, regardless of the other components of the input operands // (invalid or not; the caller will have to update the underflow, // overflow, and inexact flags in MXCSR) if (sw & _SW_ZERODIVIDE) { XmmiEnv->Ieee->Status.ZeroDivide = 1; XmmiEnv->Flags |= Z_MASK; } else { XmmiEnv->Ieee->Status.ZeroDivide = 0; } XmmiEnv->Ieee->Status.InvalidOperation = 0; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 9: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); break; case OP_CMPPD: case OP_CMPSD: opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value; opd2 = XmmiEnv->Ieee->Operand2.Value.Fp64Value; if (XmmiEnv->Daz) { if (isdenormald (opd1)) opd1 = opd1 * 0.0; if (isdenormald (opd2)) opd2 = opd2 * 0.0; } imm8 = XmmiEnv->Imm8 & 0x07; // adjust operation code XmmiEnv->Ieee->Operation = _FpCodeCompare; // check whether an invalid exception has to be raised switch (imm8) { case IMM8_EQ: case IMM8_UNORD: case IMM8_NEQ: case IMM8_ORD: if (issnand (opd1) || issnand (opd2)) invalid_exc = 1; // SNaN operands signal invalid else invalid_exc = 0; // QNaN or other operands do not signal invalid // guard against the case when an SNaN operand was converted to // QNaN by compiler generated code sw = _status87 (); if (sw & _SW_INVALID) invalid_exc = 1; break; case IMM8_LT: case IMM8_LE: case IMM8_NLT: case IMM8_NLE: if (isnand (opd1) || isnand (opd2)) invalid_exc = 1; // SNaN/QNaN operands signal invalid else invalid_exc = 0; // other operands do not signal invalid break; default: ; // will never occur } // if invalid_exc = 1, and invalid exceptions are enabled, take trap if (invalid_exc && !(XmmiEnv->Masks & I_MASK)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; // Cause = Enable & Status XmmiEnv->Ieee->Cause.InvalidOperation = 1; // Note: the calling function will have to interpret the value returned // by the user handler, if execution is to be continued #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 10: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // checking for NaN operands has priority over denormal exceptions if (isnand (opd1) || isnand (opd2)) { switch (imm8) { case IMM8_EQ: case IMM8_LT: case IMM8_LE: case IMM8_ORD: cmp_res = 0x0; break; case IMM8_UNORD: case IMM8_NEQ: case IMM8_NLT: case IMM8_NLE: cmp_res = 0xffffffffffffffff; break; default: ; // will never occur } XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Result.Value.Fp64Value = *((double *)&cmp_res); // may make U32Value XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; // Note that the denormal flag will not be updated by _fpieee_flt (), // even if an operand is denormal if (invalid_exc) { XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; } else { XmmiEnv->Ieee->Status.InvalidOperation = 0; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 11: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); } // check whether a denormal exception has to be raised if (isdenormald (opd1) || isdenormald (opd2)) { denormal_exc = 1; XmmiEnv->Flags |= D_MASK; } else { denormal_exc = 0; } // if denormal_exc = 1, and denormal exceptions are enabled, take trap if (denormal_exc && !(XmmiEnv->Masks & D_MASK)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); // Note: the exception code is STATUS_FLOAT_INVALID in this case #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 12: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // no exception has to be raised, and no operand is a NaN; calculate // and deliver the result if (opd1 < opd2) { switch (imm8) { case IMM8_LT: case IMM8_LE: case IMM8_NEQ: case IMM8_ORD: cmp_res = 0xffffffffffffffff; break; case IMM8_EQ: case IMM8_UNORD: case IMM8_NLT: case IMM8_NLE: cmp_res = 0x0; break; default: ; // will never occur } } else if (opd1 > opd2) { switch (imm8) { case IMM8_NEQ: case IMM8_NLT: case IMM8_NLE: case IMM8_ORD: cmp_res = 0xffffffffffffffff; break; case IMM8_EQ: case IMM8_LT: case IMM8_LE: case IMM8_UNORD: cmp_res = 0x0; break; default: ; // will never occur } } else if (opd1 == opd2) { switch (imm8) { case IMM8_EQ: case IMM8_LE: case IMM8_NLT: case IMM8_ORD: cmp_res = 0xffffffffffffffff; break; case IMM8_LT: case IMM8_UNORD: case IMM8_NEQ: case IMM8_NLE: cmp_res = 0x0; break; default: ; // will never occur } } else { // could eliminate this case #ifdef _DEBUG_FPU fprintf (stderr, "XMMI2_FP_Emulation () INTERNAL XMMI2_FP_Emulation ()" " ERROR for CMPPS/CMPSS\n"); #endif } XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Result.Value.Fp64Value = *((double *)&cmp_res); // may make U32Value XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; // Note that the denormal flag will not be updated by _fpieee_flt (), // even if an operand is denormal XmmiEnv->Ieee->Status.InvalidOperation = 0; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 13: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); break; case OP_COMISD: case OP_UCOMISD: opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value; opd2 = XmmiEnv->Ieee->Operand2.Value.Fp64Value; if (XmmiEnv->Daz) { if (isdenormald (opd1)) opd1 = opd1 * 0.0; if (isdenormald (opd2)) opd2 = opd2 * 0.0; } // check whether an invalid exception has to be raised switch (XmmiEnv->Ieee->Operation) { case OP_COMISD: if (isnand (opd1) || isnand (opd2)) { invalid_exc = 1; } else invalid_exc = 0; break; case OP_UCOMISD: if (issnand (opd1) || issnand (opd2)) invalid_exc = 1; else invalid_exc = 0; // guard against the case when an SNaN operand was converted to // QNaN by compiler generated code sw = _status87 (); if (sw & _SW_INVALID) invalid_exc = 1; break; default: ; // will never occur } // adjust operation code XmmiEnv->Ieee->Operation = _FpCodeCompare; // if invalid_exc = 1, and invalid exceptions are enabled, take trap if (invalid_exc && !(XmmiEnv->Masks & I_MASK)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; // Cause = Enable & Status XmmiEnv->Ieee->Cause.InvalidOperation = 1; // Note: the calling function will have to interpret the value returned // by the user handler, if execution is to be continued #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 14: in_top =%d != out_top = " "%d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // EFlags: // 333222222222211111111110000000000 // 210987654321098765432109876543210 // O SZ A P C // checking for NaN operands has priority over denormal exceptions if (isnand (opd1) || isnand (opd2)) { // OF, SF, AF = 000, ZF, PF, CF = 111 XmmiEnv->EFlags = (XmmiEnv->EFlags & 0xfffff76f) | 0x00000045; XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; // Note that the denormal flag will not be updated by _fpieee_flt (), // even if an operand is denormal if (invalid_exc) { XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; } else { XmmiEnv->Ieee->Status.InvalidOperation = 0; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 15: in_top =%d != out_top = " "%d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); } // check whether a denormal exception has to be raised if (isdenormald (opd1) || isdenormald (opd2)) { denormal_exc = 1; XmmiEnv->Flags |= D_MASK; } else { denormal_exc = 0; } // if denormal_exc = 1, and denormal exceptions are enabled, take trap if (denormal_exc && !(XmmiEnv->Masks & D_MASK)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); // Note: the exception code is STATUS_FLOAT_INVALID in this case #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 16: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // no exception has to be raised, and no operand is a NaN; calculate // and deliver the result // 333222222222211111111110000000000 // 210987654321098765432109876543210 // O SZ A P C if (opd1 > opd2) { // OF, SF, AF = 000, ZF, PF, CF = 000 XmmiEnv->EFlags = XmmiEnv->EFlags & 0xfffff72a; } else if (opd1 < opd2) { // OF, SF, AF = 000, ZF, PF, CF = 001 XmmiEnv->EFlags = (XmmiEnv->EFlags & 0xfffff72b) | 0x00000001; } else if (opd1 == opd2) { // OF, SF, AF = 000, ZF, PF, CF = 100 XmmiEnv->EFlags = (XmmiEnv->EFlags & 0xfffff76a) | 0x00000040; } else { // could eliminate this case #ifdef _DEBUG_FPU fprintf (stderr, "XMMI2_FP_Emulation () INTERNAL XMMI2_FP_Emulation () " "ERROR for COMISS/UCOMISS\n"); #endif } XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; // Note that the denormal flag will not be updated by _fpieee_flt (), // even if an operand is denormal XmmiEnv->Ieee->Status.InvalidOperation = 0; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 17: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); break; case OP_CVTDQ2PS: iopd1 = XmmiEnv->Ieee->Operand1.Value.I32Value; switch (XmmiEnv->Rc) { case _FpRoundNearest: rc = _RC_NEAR; break; case _FpRoundMinusInfinity: rc = _RC_DOWN; break; case _FpRoundPlusInfinity: rc = _RC_UP; break; case _FpRoundChopped: rc = _RC_CHOP; break; default: ; // internal error } // execute the operation and check whether the inexact flag is set // and the respective exception is enabled _control87 (rc | _PC_24 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC); // perform the conversion __asm { fnclex; fild DWORD PTR iopd1; // exact fstp DWORD PTR res24; // may set P } // read status word sw = _status87 (); // if inexact traps are enabled and result is inexact, take inexact trap if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Operation = _FpCodeConvert; XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; XmmiEnv->Ieee->Cause.Inexact = 1; XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Result.Value.Fp32Value = res24; // exact #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 18: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // if it got here, then there is no trap to be taken; in this case, // the result has to be delivered XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Result.Value.Fp32Value = res24; // exact if (sw & _SW_INEXACT) { XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; } else { XmmiEnv->Ieee->Status.Inexact = 0; } XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; XmmiEnv->Ieee->Status.InvalidOperation = 0; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 19: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); break; case OP_CVTPS2DQ: case OP_CVTTPS2DQ: opd24 = XmmiEnv->Ieee->Operand1.Value.Fp32Value; if (XmmiEnv->Daz) { if (isdenormalf (opd24)) opd24 = opd24 * (float)0.0; } // adjust the operation code switch (XmmiEnv->Ieee->Operation) { case OP_CVTPS2DQ: XmmiEnv->Ieee->Operation = _FpCodeConvert; break; case OP_CVTTPS2DQ: XmmiEnv->Ieee->Operation = _FpCodeConvertTrunc; break; default: ; // will never occur } switch (XmmiEnv->Ieee->Operation) { case _FpCodeConvert: switch (XmmiEnv->Rc) { case _FpRoundNearest: rc = _RC_NEAR; break; case _FpRoundMinusInfinity: rc = _RC_DOWN; break; case _FpRoundPlusInfinity: rc = _RC_UP; break; case _FpRoundChopped: rc = _RC_CHOP; break; default: ; // internal error } break; case _FpCodeConvertTrunc: rc = _RC_CHOP; break; default: ; // will never occur } // execute the operation and check whether the inexact flag is set // and the respective exceptions enabled _control87 (rc | _PC_24 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC); // perform the conversion __asm { fnclex; fld DWORD PTR opd24; // may set the denormal [ignored] or invalid // status flags fistp DWORD PTR ires; // may set the inexact or invalid status // flags (for NaN or out-of-range) } // read status word sw = _status87 (); // if invalid flag is set, and invalid exceptions are enabled, take trap if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; // Cause = Enable & Status XmmiEnv->Ieee->Cause.InvalidOperation = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 20: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // at this point, there are no enabled invalid exceptions; the // instruction might have lead to an enabled inexact exception, or to // no exception at all XmmiEnv->Ieee->Result.Value.I32Value = ires; // if inexact traps are enabled and result is inexact, take inexact trap // (no flush-to-zero situation is possible) if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; XmmiEnv->Ieee->Cause.Inexact = 1; XmmiEnv->Ieee->Result.OperandValid = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 21: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // if it got here, then there is no trap to be taken; return result XmmiEnv->Ieee->Result.OperandValid = 1; if (sw & _SW_INEXACT) { XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; } else { XmmiEnv->Ieee->Status.Inexact = 0; } XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; if (sw & _SW_INVALID) { XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; } else { XmmiEnv->Ieee->Status.InvalidOperation = 0; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 22: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); break; case OP_CVTPD2PI: case OP_CVTSD2SI: case OP_CVTPD2DQ: case OP_CVTTPD2PI: case OP_CVTTSD2SI: case OP_CVTTPD2DQ: opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value; if (XmmiEnv->Daz) { if (isdenormald (opd1)) opd1 = opd1 * 0.0; } // adjust the operation code switch (XmmiEnv->Ieee->Operation) { case OP_CVTPD2PI: case OP_CVTSD2SI: case OP_CVTPD2DQ: XmmiEnv->Ieee->Operation = _FpCodeConvert; break; case OP_CVTTPD2PI: case OP_CVTTSD2SI: case OP_CVTTPD2DQ: XmmiEnv->Ieee->Operation = _FpCodeConvertTrunc; break; default: ; // will never occur } switch (XmmiEnv->Ieee->Operation) { case _FpCodeConvert: switch (XmmiEnv->Rc) { case _FpRoundNearest: rc = _RC_NEAR; break; case _FpRoundMinusInfinity: rc = _RC_DOWN; break; case _FpRoundPlusInfinity: rc = _RC_UP; break; case _FpRoundChopped: rc = _RC_CHOP; break; default: ; // internal error } break; case _FpCodeConvertTrunc: rc = _RC_CHOP; break; default: ; // will never occur } // execute the operation and check whether the inexact flag is set // and the respective exceptions enabled _control87 (rc | _PC_53 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC); // perform the conversion __asm { fnclex; fld QWORD PTR opd1; // may set the denormal [ignored] or invalid // status flags fistp DWORD PTR ires; // may set the inexact or invalid status // flags (for NaN or out-of-range) } // read status word sw = _status87 (); // if invalid flag is set, and invalid exceptions are enabled, take trap if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; // Cause = Enable & Status XmmiEnv->Ieee->Cause.InvalidOperation = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 20: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // at this point, there are no enabled invalid exceptions; the // instruction might have lead to an enabled inexact exception, or to // no exception at all XmmiEnv->Ieee->Result.Value.I32Value = ires; // if inexact traps are enabled and result is inexact, take inexact trap // (no flush-to-zero situation is possible) if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; XmmiEnv->Ieee->Cause.Inexact = 1; XmmiEnv->Ieee->Result.OperandValid = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 21: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // if it got here, then there is no trap to be taken; return result XmmiEnv->Ieee->Result.OperandValid = 1; if (sw & _SW_INEXACT) { XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; } else { XmmiEnv->Ieee->Status.Inexact = 0; } XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; if (sw & _SW_INVALID) { XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; } else { XmmiEnv->Ieee->Status.InvalidOperation = 0; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 22: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); break; case OP_CVTPS2PD: case OP_CVTSS2SD: opd24 = XmmiEnv->Ieee->Operand1.Value.Fp32Value; if (XmmiEnv->Daz) { if (isdenormalf (opd24)) opd24 = opd24 * (float)0.0; } // adjust the operation code XmmiEnv->Ieee->Operation = _FpCodeConvert; switch (XmmiEnv->Rc) { case _FpRoundNearest: rc = _RC_NEAR; break; case _FpRoundMinusInfinity: rc = _RC_DOWN; break; case _FpRoundPlusInfinity: rc = _RC_UP; break; case _FpRoundChopped: rc = _RC_CHOP; break; default: ; // internal error } // execute the operation and check whether the invalid or denormal // flag is set and the respective exception is enabled _control87 (rc | _PC_53 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC); // redun. // perform the conversion __asm { fnclex; fld DWORD PTR opd24; // may set the denormal or invalid // status flags; exact fstp QWORD PTR res; // store as a double; exact } // read status word sw = _status87 (); // if invalid flag is set, and invalid exceptions are enabled, take trap if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; // Cause = Enable & Status XmmiEnv->Ieee->Cause.InvalidOperation = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 20: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } if (sw & _SW_DENORMAL) XmmiEnv->Flags |= D_MASK; // whether enabled or not // if denormal flag is set, and denormal exceptions are enabled, take trap if (!(XmmiEnv->Masks & D_MASK) && (sw & _SW_DENORMAL)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI_FP_Emulate () ERROR 28: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // at this point, there are no enabled invalid or denormal exceptions; // the instruction did not lead to an enabled exception; return result XmmiEnv->Ieee->Result.Value.Fp64Value = res; XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; if (sw & _SW_INVALID) { XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; } else { XmmiEnv->Ieee->Status.InvalidOperation = 0; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 22: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); break; case OP_CVTPD2PS: case OP_CVTSD2SS: opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value; if (XmmiEnv->Daz) { if (isdenormald (opd1)) opd1 = opd1 * 0.0; } // adjust the operation code XmmiEnv->Ieee->Operation = _FpCodeConvert; switch (XmmiEnv->Rc) { case _FpRoundNearest: rc = _RC_NEAR; break; case _FpRoundMinusInfinity: rc = _RC_DOWN; break; case _FpRoundPlusInfinity: rc = _RC_UP; break; case _FpRoundChopped: rc = _RC_CHOP; break; default: ; // internal error } // execute the operation and check whether the invalid or denormal // flag is set and the respective exception is enabled _control87 (rc | _PC_24 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC); // perform the conversion __asm { fnclex; fld QWORD PTR opd1; // may set the denormal or invalid // status flags; exact fstp DWORD PTR res24; // store as a single; may set the underflow/ // overflow and/or the inexact status flags } // read status word sw = _status87 (); // if invalid flag is set, and invalid exceptions are enabled, take trap if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; // Cause = Enable & Status XmmiEnv->Ieee->Cause.InvalidOperation = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 20: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } if (sw & _SW_DENORMAL) XmmiEnv->Flags |= D_MASK; // whether enabled or not // if denormal flag is set, and denormal exceptions are enabled, take trap if (!(XmmiEnv->Masks & D_MASK) && (sw & _SW_DENORMAL)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI_FP_Emulate () ERROR 28: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // done if opd1 is 0.0, inf, or NaN if (isnand (opd1) || isinfd (opd1) || iszerod (opd1)) { XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Result.Value.Fp32Value = res24; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; if (sw & _SW_INVALID) { XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; } else { XmmiEnv->Ieee->Status.InvalidOperation = 0; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI_FP_Emulate () ERROR 5: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); } // at this point, there are no enabled I, or D exceptions, and the input // operand is not 0, infinity, or NaN; the instruction // might lead to an enabled underflow, enabled underflow and inexact, // enabled overflow, enabled overflow and inexact, enabled inexact, or // none of these; if there are no U or O enabled exceptions, the result // is res24 // check whether there is an underflow, overflow, or inexact trap to be // taken // calculate the result to 24 bits, with 'unbounded' exponent __asm { fnclex; fld QWORD PTR opd1; // may set the denormal or invalid // status flags (same as above) fld1; // +1.0 fmulp st(1), st(0); // rounded to 24 bits; may set P fstp QWORD PTR dbl_res24; // store as a double; may set the underflow/ // overflow and/or the inexact status flags, // but no new trap flags are set here } if (-MIN_SINGLE_NORMAL < dbl_res24 && dbl_res24 < MIN_SINGLE_NORMAL) { result_tiny = 1; } // check if the result is huge if (dbl_res24 < -MAX_SINGLE_NORMAL || MAX_SINGLE_NORMAL < dbl_res24) { result_huge = 1; } // if the underflow traps are enabled and the result is tiny, take // underflow trap if (!(XmmiEnv->Masks & U_MASK) && result_tiny) { // leave source and destination operands unchanged // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.Underflow = 1; XmmiEnv->Flags |= U_MASK; XmmiEnv->Ieee->Cause.Underflow = 1; XmmiEnv->Ieee->Result.OperandValid = 0; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI_FP_Emulate () ERROR 6: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // if overflow traps are enabled and the result is huge, take // overflow trap if (!(XmmiEnv->Masks & O_MASK) && result_huge) { // leave source and destination operands unchanged // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.Overflow = 1; XmmiEnv->Flags |= O_MASK; XmmiEnv->Ieee->Cause.Overflow = 1; XmmiEnv->Ieee->Result.OperandValid = 0; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI_FP_Emulate () ERROR 7: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // at this point, there are no enabled exceptions, or an enabled // inexact exception; return the result from res24 // if inexact traps are enabled and result is inexact, take inexact trap if (!(XmmiEnv->Masks & P_MASK) && ((sw & _SW_INEXACT) || (XmmiEnv->Fz && result_tiny))) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; XmmiEnv->Ieee->Cause.Inexact = 1; XmmiEnv->Ieee->Result.OperandValid = 1; if (result_tiny) { XmmiEnv->Ieee->Status.Underflow = 1; XmmiEnv->Flags |= U_MASK; // Note: the condition above is equivalent to // if (sw & _SW_UNDERFLOW) XmmiEnv->Ieee->Status.Underflow = 1; } if (result_huge) { XmmiEnv->Ieee->Status.Overflow = 1; XmmiEnv->Flags |= O_MASK; // Note: the condition above is equivalent to // if (sw & _SW_OVERFLOW) XmmiEnv->Ieee->Status.Overflow = 1; } // if ftz = 1 and result is tiny, result = 0.0 // (no need to check for underflow traps disabled: result tiny and // underflow traps enabled would have caused taking an underflow // trap above) if (XmmiEnv->Fz && result_tiny) { // Note: the condition above is equivalent to // if (XmmiEnv->Fz && (sw & _SW_UNDERFLOW)) if (res24 > 0.0) // it might already be +0.0 from fmulp and/or fstp res24 = ZEROF; else if (res24 < 0.0) // it might already be -0.0 from fmulp/fstp res24 = NZEROF; // else leave res24 unchanged } XmmiEnv->Ieee->Result.Value.Fp32Value = res24; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI_FP_Emulate () ERROR 8: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // if it got here, then there is no trap to be taken XmmiEnv->Ieee->Result.OperandValid = 1; if (sw & _SW_UNDERFLOW) { XmmiEnv->Ieee->Status.Underflow = 1; XmmiEnv->Flags |= U_MASK; } else { XmmiEnv->Ieee->Status.Underflow = 0; } if (sw & _SW_OVERFLOW) { XmmiEnv->Ieee->Status.Overflow = 1; XmmiEnv->Flags |= O_MASK; } else { XmmiEnv->Ieee->Status.Overflow = 0; } if (sw & _SW_INEXACT) { XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; } else { XmmiEnv->Ieee->Status.Inexact = 0; } // if ftz = 1, and result is tiny (underflow traps must be disabled), // result = 0.0 if (XmmiEnv->Fz && result_tiny) { if (res24 > 0.0) res24 = ZEROF; else if (res24 < 0.0) res24 = NZEROF; // else leave res24 unchanged XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; XmmiEnv->Ieee->Status.Underflow = 1; XmmiEnv->Flags |= U_MASK; } XmmiEnv->Ieee->Result.Value.Fp32Value = res24; // note that there is no way to // communicate to the caller that the denormal flag was set - we count // on the XMMI instruction to have set the denormal flag in MXCSR if // needed, regardless of the other components of the input operands // (invalid or not; the caller will have to update the underflow, // overflow, and inexact flags in MXCSR) if (sw & _SW_INVALID) { XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; } else { XmmiEnv->Ieee->Status.InvalidOperation = 0; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI_FP_Emulate () ERROR 9: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); break; case OP_MAXPD: case OP_MAXSD: case OP_MINPD: case OP_MINSD: opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value; opd2 = XmmiEnv->Ieee->Operand2.Value.Fp64Value; if (XmmiEnv->Daz) { if (isdenormald (opd1)) opd1 = opd1 * 0.0; if (isdenormald (opd2)) opd2 = opd2 * 0.0; } // adjust operation code switch (XmmiEnv->Ieee->Operation) { case OP_MAXPD: case OP_MAXSD: XmmiEnv->Ieee->Operation = _FpCodeFmax; break; case OP_MINPD: case OP_MINSD: XmmiEnv->Ieee->Operation = _FpCodeFmin; break; default: ; // will never occur } // check whether an invalid exception has to be raised if (isnand (opd1) || isnand (opd2)) invalid_exc = 1; else invalid_exc = 0; // if invalid_exc = 1, and invalid exceptions are enabled, take trap if (invalid_exc && !(XmmiEnv->Masks & I_MASK)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; // Cause = Enable & Status XmmiEnv->Ieee->Cause.InvalidOperation = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 23: in_top =%d != out_top = " "%d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // checking for NaN operands has priority over denormal exceptions if (invalid_exc) { XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Result.Value.Fp64Value = opd2; XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 24: in_top =%d != out_top = " "%d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); } // check whether a denormal exception has to be raised if (isdenormald (opd1) || isdenormald (opd2)) { denormal_exc = 1; XmmiEnv->Flags |= D_MASK; } else { denormal_exc = 0; } // if denormal_exc = 1, and denormal exceptions are enabled, take trap if (denormal_exc && !(XmmiEnv->Masks & D_MASK)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); // Note: the exception code is STATUS_FLOAT_INVALID in this case #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 25: in_top =%d != out_top = " "%d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // no exception has to be raised, and no operand is a NaN; calculate // and deliver the result if (opd1 < opd2) { switch (XmmiEnv->Ieee->Operation) { case _FpCodeFmax: XmmiEnv->Ieee->Result.Value.Fp64Value = opd2; break; case _FpCodeFmin: XmmiEnv->Ieee->Result.Value.Fp64Value = opd1; break; default: ; // will never occur } } else if (opd1 > opd2) { switch (XmmiEnv->Ieee->Operation) { case _FpCodeFmax: XmmiEnv->Ieee->Result.Value.Fp64Value = opd1; break; case _FpCodeFmin: XmmiEnv->Ieee->Result.Value.Fp64Value = opd2; break; default: ; // will never occur } } else if (opd1 == opd2) { XmmiEnv->Ieee->Result.Value.Fp64Value = opd2; } else { // could eliminate this case #ifdef _DEBUG_FPU fprintf (stderr, "XMMI2_FP_Emulation () INTERNAL XMMI2_FP_Emulation () ERROR for MAXPS/MAXSS/MINPS/MINSS\n"); #endif } XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; // Note that the denormal flag will not be updated by _fpieee_flt (), // even if an operand is denormal XmmiEnv->Ieee->Status.InvalidOperation = 0; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 26: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); break; case OP_SQRTPD: case OP_SQRTSD: opd1 = XmmiEnv->Ieee->Operand1.Value.Fp64Value; if (XmmiEnv->Daz) { if (isdenormald (opd1)) opd1 = opd1 * 0.0; } // adjust operation code XmmiEnv->Ieee->Operation = _FpCodeSquareRoot; // execute the operation and check whether the invalid, denormal, or // inexact flags are set and the respective exceptions enabled switch (XmmiEnv->Rc) { case _FpRoundNearest: rc = _RC_NEAR; break; case _FpRoundMinusInfinity: rc = _RC_DOWN; break; case _FpRoundPlusInfinity: rc = _RC_UP; break; case _FpRoundChopped: rc = _RC_CHOP; break; default: ; // internal error } _control87 (rc | _PC_53 | _MCW_EM, _MCW_EM | _MCW_RC | _MCW_PC); // perform the square root __asm { fnclex; fld QWORD PTR opd1; // may set the denormal or invalid status flags fsqrt; // may set the inexact or invalid status flags fstp QWORD PTR res; // exact } // read status word sw = _status87 (); if (sw & _SW_INVALID) sw = sw & ~0x00080000; // clr D flag for sqrt(-den) // if invalid flag is set, and invalid exceptions are enabled, take trap if (!(XmmiEnv->Masks & I_MASK) && (sw & _SW_INVALID)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags |= I_MASK; // Cause = Enable & Status XmmiEnv->Ieee->Cause.InvalidOperation = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 27: in_top =%d != out_top = " "%d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } if (sw & _SW_DENORMAL) XmmiEnv->Flags |= D_MASK; // if denormal flag is set, and denormal exceptions are enabled, take trap if (!(XmmiEnv->Masks & D_MASK) && (sw & _SW_DENORMAL)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 28: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // the result cannot be tiny // at this point, there are no enabled I or D or exceptions; the instr. // might lead to an enabled inexact exception or to no exception (this // includes the case of a NaN or negative operand); exceptions must have // been disabled before calling this function; an inexact exception is // reported on the fsqrt // if (the MXCSR inexact traps are disabled or the inexact flag is clear) // then deliver the result (the status flags are sticky, so they are // all set correctly already) // if it got here, then there is either an inexact trap to be taken, or // no trap at all XmmiEnv->Ieee->Result.Value.Fp64Value = res; // exact // if inexact traps are enabled and result is inexact, take inexact trap if (!(XmmiEnv->Masks & P_MASK) && (sw & _SW_INEXACT)) { // fill in part of the FP IEEE record Fill_FPIEEE_RECORD (XmmiEnv); XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; XmmiEnv->Ieee->Cause.Inexact = 1; XmmiEnv->Ieee->Result.OperandValid = 1; #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 29: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (ExceptionRaised); } // no trap was taken XmmiEnv->Ieee->Result.OperandValid = 1; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; if (sw & _SW_INEXACT) { XmmiEnv->Ieee->Status.Inexact = 1; XmmiEnv->Flags |= P_MASK; } else { XmmiEnv->Ieee->Status.Inexact = 0; } // note that there is no way to // communicate to the caller that the denormal flag was set - we count // on the XMMI instruction to have set the denormal flag in MXCSR if // needed, regardless of the other components of the input operands // (invalid or not); the caller will have to update the inexact flag // in MXCSR XmmiEnv->Ieee->Status.ZeroDivide = 0; if (sw & _SW_INVALID) { XmmiEnv->Ieee->Status.InvalidOperation = 1; XmmiEnv->Flags = I_MASK; // no other flags set if invalid is set } else { XmmiEnv->Ieee->Status.InvalidOperation = 0; } #ifdef _DEBUG_FPU // read status word sw = _status87 (); out_top = (sw >> 11) & 0x07; if (in_top != out_top) { printf ("XMMI2_FP_Emulation () ERROR 30: in_top =%d != out_top = %d\n", in_top, out_top); exit (1); } #endif return (NoExceptionRaised); break; case OP_UNSPEC: #ifdef _DEBUG_FPU fprintf (stderr, "XMMI2_FP_Emulation internal error: unknown operation code OP_UNSPEC\n"); #endif break; default: #ifdef _DEBUG_FPU fprintf (stderr, "XMMI2_FP_Emulation internal error: unknown operation code %d\n", XmmiEnv->Ieee->Operation); #endif break; } } static int iszerod (double d) { unsigned int *dp; dp = (unsigned int *)&d; if (((dp[1] & 0x7fffffff) == 0x0) && (dp[0] == 0x0)) return (1); else return (0); } static int isinfd (double d) { unsigned int *dp; dp = (unsigned int *)&d; if (((dp[1] & 0x7fffffff) == 0x7ff80000) && (dp[0] == 0x0)) return (1); else return (0); } static int issnand (double d) { unsigned int *dp; dp = (unsigned int *)&d; if (((dp[1] & 0x7ff80000) == 0x7ff00000) && (((dp[1] & 0x0007ffff) != 0) || (dp[0] != 0))) return (1); else return (0); } static int isnand (double d) { unsigned int *qp; qp = (unsigned int *)&d; if (((qp[1] & 0x7ff00000) == 0x7ff00000) && (((qp[1] & 0x000fffff) != 0x00000000) || qp[0] != 0x00000000)) return (1); else return (0); } static double quietd (double d) { // makes a signaling NaN quiet, and leaves a quiet NaN unchanged; does // not check that the input value d is a NaN unsigned int *dp; dp = (unsigned int *)&d; dp[1] = dp[1] | 0x00080000; return (d); } static int isdenormald (double d) { unsigned int *dp; dp = (unsigned int *)&d; if (((dp[1] & 0x7ff00000) == 0x0) && (((dp[1] & 0x000fffff) != 0) || (dp[0] != 0))) return (1); else return (0); } static int isdenormalf (float f) { // checks whether f is a denormal unsigned int *fp; fp = (unsigned int *)&f; if ((fp[0] & 0x7f800000) == 0x0 && (fp[0] & 0x007fffff) != 0x0) return (1); else return (0); } static void Fill_FPIEEE_RECORD (PXMMI_ENV XmmiEnv) { // fill in part of the FP IEEE record XmmiEnv->Ieee->RoundingMode = XmmiEnv->Rc; XmmiEnv->Ieee->Precision = XmmiEnv->Precision; XmmiEnv->Ieee->Enable.Inexact = !(XmmiEnv->Masks & P_MASK); XmmiEnv->Ieee->Enable.Underflow = !(XmmiEnv->Masks & U_MASK); XmmiEnv->Ieee->Enable.Overflow = !(XmmiEnv->Masks & O_MASK); XmmiEnv->Ieee->Enable.ZeroDivide = !(XmmiEnv->Masks & Z_MASK); XmmiEnv->Ieee->Enable.InvalidOperation = !(XmmiEnv->Masks & I_MASK); XmmiEnv->Ieee->Status.Inexact = 0; XmmiEnv->Ieee->Status.Underflow = 0; XmmiEnv->Ieee->Status.Overflow = 0; XmmiEnv->Ieee->Status.ZeroDivide = 0; XmmiEnv->Ieee->Status.InvalidOperation = 0; // Cause = Enable & Status XmmiEnv->Ieee->Cause.Inexact = 0; XmmiEnv->Ieee->Cause.Underflow = 0; XmmiEnv->Ieee->Cause.Overflow = 0; XmmiEnv->Ieee->Cause.ZeroDivide = 0; XmmiEnv->Ieee->Cause.InvalidOperation = 0; }