Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

647 lines
18 KiB

  1. /*++
  2. Copyright (c) 1995-1998 Microsoft Corporation
  3. Module Name:
  4. fpur10.c
  5. Abstract:
  6. Floating point 10-byte real support
  7. Author:
  8. 06-Oct-1995 BarryBo
  9. Revision History:
  10. --*/
  11. #include <nt.h>
  12. #include <ntrtl.h>
  13. #include <nturtl.h>
  14. #include <windows.h>
  15. #include <float.h>
  16. #include <math.h>
  17. #include <stdio.h>
  18. #include "wx86.h"
  19. #include "cpuassrt.h"
  20. #include "fragp.h"
  21. #include "fpufragp.h"
  22. ASSERTNAME;
  23. //
  24. // Forward declarations
  25. //
  26. NPXLOADINTELR10TOR8(LoadIntelR10ToR8_VALID);
  27. NPXLOADINTELR10TOR8(LoadIntelR10ToR8_ZERO);
  28. NPXLOADINTELR10TOR8(LoadIntelR10ToR8_SPECIAL);
  29. NPXLOADINTELR10TOR8(LoadIntelR10ToR8_EMPTY);
  30. NPXPUTINTELR10(PutIntelR10_VALID);
  31. NPXPUTINTELR10(PutIntelR10_ZERO);
  32. NPXPUTINTELR10(PutIntelR10_SPECIAL);
  33. NPXPUTINTELR10(PutIntelR10_EMPTY);
  34. //
  35. // Jump tables
  36. //
  37. const NpxLoadIntelR10ToR8 LoadIntelR10ToR8Table[TAG_MAX] = {
  38. LoadIntelR10ToR8_VALID,
  39. LoadIntelR10ToR8_ZERO,
  40. LoadIntelR10ToR8_SPECIAL,
  41. LoadIntelR10ToR8_EMPTY
  42. };
  43. const NpxPutIntelR10 PutIntelR10Table[TAG_MAX] = {
  44. PutIntelR10_VALID,
  45. PutIntelR10_ZERO,
  46. PutIntelR10_SPECIAL,
  47. PutIntelR10_EMPTY
  48. };
  49. VOID
  50. ComputeR10Tag(
  51. USHORT *r10,
  52. PFPREG FpReg
  53. )
  54. /*++
  55. Routine Description:
  56. Computes the TAG value for an R10, classifying it so conversion to R8
  57. is simpler.
  58. Arguments:
  59. r10 - pointer to R10 value to classify.
  60. FpReg - OUT FP register to set Tag and TagSpecial fields in
  61. Return Value:
  62. Tag value which classifies the R10.
  63. --*/
  64. {
  65. USHORT Exponent;
  66. /* On average, the value will be zero or a valid real, so those cases
  67. * have the fastest code paths. NANs tend to be less frequent and are
  68. * slower to calculate.
  69. */
  70. Exponent = r10[4] & 0x7fff;
  71. if (Exponent == 0x7fff) {
  72. // exponent is all 1's - NAN or INFINITY of some sort
  73. FpReg->Tag = TAG_SPECIAL;
  74. if (r10[0] == 0 && r10[1] == 0 && r10[2] == 0) {
  75. // Low 6 bytes of mantissa are 0.
  76. if (r10[3] & 0x4000) {
  77. // 2nd bit of mantissa set - INDEF or QNAN
  78. if (r10[3] == 0xc000 && r10[4] == 0xffff) {
  79. // INDEF - negative and only top 2 bits of mantissa set
  80. FpReg->TagSpecial = TAG_SPECIAL_INDEF;
  81. } else {
  82. // QNAN - positive or more than 2 top bits set
  83. FpReg->TagSpecial = TAG_SPECIAL_QNAN;
  84. }
  85. } else if (r10[3] & 0x3fff) {
  86. // SNAN - Only top 1 bit of mantissa is set
  87. FpReg->TagSpecial = TAG_SPECIAL_SNAN;
  88. } else {
  89. FpReg->TagSpecial = TAG_SPECIAL_INFINITY;
  90. }
  91. } else {
  92. // Some bit is set in the low 6 bytes - SNAN or QNAN
  93. if (r10[3] & 0x4000) {
  94. // QNAN - Top 2 bits of mantissa set
  95. FpReg->TagSpecial = TAG_SPECIAL_QNAN;
  96. } else {
  97. // SNAN - 2nd bit of mantissa clear
  98. FpReg->TagSpecial = TAG_SPECIAL_SNAN;
  99. }
  100. }
  101. } else if (Exponent == 0) {
  102. // exponent is 0 - DENORMAL or ZERO
  103. if (r10[0] == 0 && r10[1] == 0 && r10[2] == 0 && r10[3] == 0) {
  104. // mantissa is all zeroes - ZERO
  105. FpReg->Tag = TAG_ZERO;
  106. } else {
  107. FpReg->Tag = TAG_SPECIAL;
  108. FpReg->TagSpecial = TAG_SPECIAL_DENORM;
  109. }
  110. } else {
  111. // Exponent is not all 1's and not all 0's - a VALID
  112. FpReg->Tag = TAG_VALID;
  113. }
  114. }
  115. VOID
  116. ChopR10ToR8(
  117. PBYTE r10,
  118. PFPREG FpReg,
  119. USHORT R10Exponent
  120. )
  121. /*++
  122. Routine Description:
  123. Chops a 10-byte real to fit into an FPREG's r64 field. The FPREG's Tag
  124. value is not set.
  125. Arguments:
  126. r10 - 10-byte real to load
  127. FpReg - Destination FP register
  128. R10Exponent - Biased exponent from the R10 value
  129. Return Value:
  130. None
  131. --*/
  132. {
  133. short Exponent;
  134. PBYTE r8 = (PBYTE)&FpReg->r64;
  135. if (FpReg->Tag == TAG_SPECIAL && FpReg->TagSpecial != TAG_SPECIAL_DENORM) {
  136. //
  137. // The caller must handle all other special values itself.
  138. //
  139. CPUASSERT(FpReg->TagSpecial == TAG_SPECIAL_QNAN || FpReg->TagSpecial == TAG_SPECIAL_SNAN);
  140. //
  141. // The R10 is a QNAN or an SNAN - ignore its exponent (fifteen 1's)
  142. // and set Exponent to be the correct number of 1 bits for an R8
  143. // (11 ones, in the correct location within a SHORT)
  144. //
  145. Exponent = (short)0x7ff0;
  146. } else {
  147. //
  148. // The R10 is a valid number. Convert the R10 exponent to an
  149. // R8 exponent by changing the bias.
  150. //
  151. Exponent = (short)R10Exponent - 16383;
  152. if (Exponent < -1022) {
  153. //
  154. // Exponent is too small - silently convert the R10 to an
  155. // R8 +/-DBL_MIN
  156. //
  157. if (r8[7] & 0x80) {
  158. FpReg->r64 = -DBL_MIN;
  159. } else {
  160. FpReg->r64 = DBL_MIN;
  161. }
  162. return;
  163. } else if (Exponent > 1023) {
  164. //
  165. // Exponent is too big - silently convert the R10 to an
  166. // R8 +/-DBL_MAX
  167. //
  168. if (r8[7] & 0x80) {
  169. FpReg->r64 = -DBL_MAX;
  170. } else {
  171. FpReg->r64 = DBL_MAX;
  172. }
  173. return;
  174. }
  175. //
  176. // Bias the exponent and shift it to the correct location for an R8
  177. //
  178. Exponent = ((USHORT)(Exponent + 1023) & 0x7ff) << 4;
  179. }
  180. // Copy in the top 7 bits of the exponent along with the sign bit
  181. r8[7] = (r10[9] & 0x80) | ((USHORT)Exponent >> 8);
  182. // Copy in the remaining 4 bits of the exponent, along with bits 1-4 of
  183. // the R10's mantissa (bit 0 is always 1 in R10s).
  184. r8[6] = (Exponent & 0xf0) | ((r10[7] >> 3) & 0x0f);
  185. // Copy bits 6-13 from the R10's mantissa
  186. r8[5] = (r10[7] << 5) | ((r10[6] >> 3) & 0x1f); // bits 5-12 from the R10
  187. r8[4] = (r10[6] << 5) | ((r10[5] >> 3) & 0x1f); // bits 14-20 from the R10
  188. r8[3] = (r10[5] << 5) | ((r10[4] >> 3) & 0x1f); // bits 21-28 from the R10
  189. r8[2] = (r10[4] << 5) | ((r10[3] >> 3) & 0x1f); // bits 29-36 from the R10
  190. r8[1] = (r10[3] << 5) | ((r10[2] >> 3) & 0x1f); // bits 37-44 from the R10
  191. r8[0] = (r10[2] << 5) | ((r10[1] >> 3) & 0x1f); // bits 45-52 from the R10
  192. //
  193. // Bits 53-64 from the R10 are ignored. The caller may examine them
  194. // and round the resulting R8 accordingly.
  195. //
  196. }
  197. VOID
  198. NextValue(
  199. PFPREG Fp,
  200. BOOLEAN RoundingUp
  201. )
  202. /*++
  203. Routine Description:
  204. Replaces a floating-point value with either its higher- or lower-
  205. valued neighbour.
  206. Arguments:
  207. Fp - floating-point value to adjust (tag must be set to one of:
  208. TAG_VALID, TAG_ZERO or TAG_SPECIAL/TAG_SPECIAL_DENORM)
  209. RoundingUp - TRUE if the next value is to be the higher-valued neighbour.
  210. FALSE to return the lower-valued neighbour.
  211. Return Value:
  212. None. Value in FP and the Tag may have changed.
  213. --*/
  214. {
  215. DWORD OldExp;
  216. DWORD NewExp;
  217. DWORD Sign;
  218. if (Fp->Tag == TAG_ZERO) {
  219. //
  220. // Neighbour of 0.0 is +/- DBL_MIN.
  221. //
  222. Fp->Tag = TAG_VALID;
  223. if (RoundingUp) {
  224. Fp->r64 = DBL_MIN;
  225. } else {
  226. Fp->r64 = -DBL_MIN;
  227. }
  228. return;
  229. }
  230. //
  231. // Remember the original sign and exponent
  232. //
  233. Sign = Fp->rdw[1] & 0x80000000;
  234. OldExp = Fp->rdw[1] & 0x7ff00000;
  235. //
  236. // Treat x as a 64-bit integer then add or subtract 1.
  237. //
  238. if ((Sign && RoundingUp) || (!Sign && !RoundingUp)) {
  239. //
  240. // x is negative. Subtract 1.
  241. //
  242. Fp->rdw[0]--;
  243. if (Fp->rdw[0] == 0xffffffff) {
  244. //
  245. // need to borrow from the high dword
  246. //
  247. Fp->rdw[1]--;
  248. }
  249. } else {
  250. //
  251. // x is positive. Add 1.
  252. //
  253. Fp->rdw[0]++;
  254. if (Fp->rdw[0] == 0) {
  255. //
  256. // propagate carry to the high dword
  257. //
  258. Fp->rdw[1]++;
  259. }
  260. }
  261. //
  262. // Get the new value of the exponent
  263. //
  264. NewExp = Fp->rdw[1] & 0x7ff00000;
  265. if (NewExp != OldExp) {
  266. //
  267. // A borrow or a carry caused the exponent to change.
  268. //
  269. if (NewExp == 0x7ff00000) {
  270. //
  271. // Got an overflow. Return the largest double value.
  272. //
  273. Fp->Tag = TAG_VALID;
  274. if (Sign) {
  275. Fp->r64 = -DBL_MAX;
  276. } else {
  277. Fp->r64 = DBL_MAX;
  278. }
  279. } else if (OldExp && !NewExp) {
  280. //
  281. // The original value was a normal number, but the result is a
  282. // denormal. Convert the underflow to a 0 with the correct sign.
  283. //
  284. Fp->Tag = TAG_ZERO;
  285. Fp->rdw[0] = 0;
  286. Fp->rdw[1] = Sign;
  287. }
  288. }
  289. }
  290. NPXLOADINTELR10TOR8(LoadIntelR10ToR8_VALID)
  291. {
  292. USHORT R10Exponent = (*(USHORT *)&r10[8]) & 0x7fff;
  293. // Copy the value in, chopping exponent and mantissa to fit
  294. ChopR10ToR8(r10, Fp, R10Exponent);
  295. if (r10[0] != 0 || (r10[1]&0x7) != 0) {
  296. // The value can't fit without rounding. DO NOT REPORT THIS
  297. // AS AN OVERFLOW EXCEPTION - THIS ONLY OCCURS BECAUSE THE
  298. // FPU EMULATOR IS USING R8 ARITHMETIC INTERNALLY. Because of
  299. // this, the roundoff should be performed silently. The default
  300. // behavior when a masked overflow exception is performed is to
  301. // store +/-infinity. We don't want hand-coded R10's loading as
  302. // infinity as many instructions thow Invalid Operation exceptions
  303. // when they detect an infinity.
  304. switch (cpu->FpControlRounding) {
  305. case 0: // round to nearest or even
  306. {
  307. FPREG a, c;
  308. double ba, cb;
  309. a = *Fp;
  310. NextValue(&a, FALSE); // a is lower neighbour
  311. // b = Fp->r64.
  312. c = *Fp;
  313. NextValue(&c, TRUE); // c is higher neighbour
  314. ba = Fp->r64 - a.r64;
  315. cb = c.r64 - Fp->r64;
  316. if (ba == cb) {
  317. // a and c are equally close to b - select the even
  318. // number (LSB==0)
  319. if ( ((*(PBYTE)&a) & 1) == 0) {
  320. *Fp = a;
  321. } else {
  322. *Fp = c;
  323. }
  324. } else if (ba < cb) {
  325. // a is closer to b than c is. Choose a
  326. *Fp = a;
  327. } else {
  328. // c is closer to b than a is. Choose c
  329. *Fp = c;
  330. }
  331. }
  332. break;
  333. case 1: // round down (towards -infinity)
  334. NextValue(Fp, FALSE);
  335. break;
  336. case 2: // round up (towards +infinity)
  337. NextValue(Fp, TRUE);
  338. break;
  339. case 3: // chop (truncate toward zero)
  340. if (Fp->rdw[0] == 0 && (Fp->rdw[1] & 0x7fffffff) == 0) {
  341. //
  342. // Truncated value is 0.0. Reclassify.
  343. //
  344. Fp->Tag = TAG_ZERO;
  345. }
  346. break;
  347. }
  348. }
  349. }
  350. NPXLOADINTELR10TOR8(LoadIntelR10ToR8_ZERO)
  351. {
  352. // write in zeroes
  353. Fp->r64 = 0.0;
  354. // copy in the sign bit
  355. Fp->rb[7] = r10[9] & 0x80;
  356. }
  357. NPXLOADINTELR10TOR8(LoadIntelR10ToR8_SPECIAL)
  358. {
  359. switch (Fp->TagSpecial) {
  360. case TAG_SPECIAL_INFINITY:
  361. Fp->rdw[0] = 0; // low 32 bits of mantissa are zero
  362. Fp->rdw[1] = 0x7ff00000; // mantissa=0, exponent=1s
  363. Fp->rb[7] |= r10[9] & 0x80; // copy in the sign bit
  364. break;
  365. case TAG_SPECIAL_INDEF:
  366. #if NATIVE_NAN_IS_INTEL_FORMAT
  367. Fp->rdw[0] = 0;
  368. Fp->rdw[1] = 0xfff80000;
  369. #else
  370. Fp->rdw[0] = 0xffffffff;
  371. Fp->rdw[1] = 0x7ff7ffff;
  372. #endif
  373. break;
  374. case TAG_SPECIAL_SNAN:
  375. case TAG_SPECIAL_QNAN:
  376. ChopR10ToR8(r10, Fp, (USHORT)((*(USHORT *)&r10[8]) & 0x7fff));
  377. #if !NATIVE_NAN_IS_INTEL_FORMAT
  378. Fp->rb[6] ^= 0x08; // invert the top bit of the mantissa
  379. #endif
  380. break;
  381. case TAG_SPECIAL_DENORM:
  382. LoadIntelR10ToR8_VALID(cpu, r10, Fp);
  383. break;
  384. }
  385. }
  386. NPXLOADINTELR10TOR8(LoadIntelR10ToR8_EMPTY)
  387. {
  388. CPUASSERT(FALSE);
  389. }
  390. VOID
  391. LoadIntelR10ToR8(
  392. PCPUDATA cpu,
  393. PBYTE r10,
  394. PFPREG FpReg
  395. )
  396. /*++
  397. Routine Description:
  398. Converts an Intel 10-byte real to an FPREG (Tag and 64-byte real).
  399. According to emload.asm, this is not an arithmetic operation,
  400. so SNANs do not throw exceptions.
  401. Arguments:
  402. cpu - per-thread data
  403. r10 - 10-byte real to load
  404. FpReg - destination FP register.
  405. Return Value:
  406. None
  407. --*/
  408. {
  409. // Classify the R10 and store its tag into the FP register
  410. ComputeR10Tag( (USHORT*)r10, FpReg );
  411. // Perform the coersion based on the classification
  412. (*LoadIntelR10ToR8Table[FpReg->Tag])(cpu, r10, FpReg);
  413. }
  414. FRAG1(FLD80, BYTE) // FLD m80real
  415. {
  416. PFPREG ST0;
  417. FpArithDataPreamble(cpu, pop1);
  418. cpu->FpStatusC1 = 0; // assume no error
  419. PUSHFLT(ST0);
  420. if (ST0->Tag != TAG_EMPTY) {
  421. HandleStackFull(cpu, ST0);
  422. } else {
  423. LoadIntelR10ToR8(cpu, pop1, ST0);
  424. if (ST0->Tag == TAG_SPECIAL && ST0->TagSpecial == TAG_SPECIAL_DENORM) {
  425. if (!(cpu->FpControlMask & FPCONTROL_DM)) {
  426. cpu->FpStatusES = 1; // Unmasked exception
  427. }
  428. cpu->FpStatusExceptions |= FPCONTROL_DM;
  429. }
  430. }
  431. }
  432. NPXPUTINTELR10(PutIntelR10_VALID)
  433. {
  434. USHORT Exponent;
  435. FPREG FpReg;
  436. //
  437. // Ugly compatibility hack here. If the app sets the Tag word so all
  438. // registers are VALID, but the registers actually contain ZERO, detect
  439. // and correct that so we write the correct value back to memory.
  440. //
  441. FpReg.r64 = Fp->r64;
  442. SetTag(&FpReg);
  443. if (FpReg.Tag != TAG_VALID &&
  444. !(FpReg.Tag == TAG_SPECIAL && FpReg.TagSpecial == TAG_SPECIAL_DENORM)) {
  445. //
  446. // The app lied to us. The tag word does not match the value in the
  447. // tag field. Write the value according to its actual tag, not
  448. // according to the tag the app tried to foist on us.
  449. //
  450. PutIntelR10(r10, &FpReg);
  451. return;
  452. }
  453. // Grab the 11-bit SIGNED exponent and sign-extend it to 15 bits
  454. Exponent = (short)((FpReg.rdw[1] >> 20) & 0x7ff) - 1023 + 16383;
  455. // Drop in the sign bit
  456. if (FpReg.rb[7] >= 0x80) {
  457. Exponent |= 0x8000;
  458. }
  459. // Write the sign and exponent into the r10
  460. r10[9] = (Exponent >> 8) & 0xff;
  461. r10[8] = Exponent & 0xff;
  462. // Bit 0 of the mantissa is always 1 for R10 values, so write that
  463. // in, along with the first 7 bits of the FpReg.rb mantissa.
  464. r10[7] = 0x80 | ((FpReg.rb[6] & 0x0f) << 3) | (FpReg.rb[5] >> 5);
  465. // Copy in the remaining bits of the FpReg.rb mantissa
  466. r10[6] = (FpReg.rb[5] << 3) | (FpReg.rb[4] >> 5); // copy bits 7-14 from the FpReg.rb
  467. r10[5] = (FpReg.rb[4] << 3) | (FpReg.rb[3] >> 5); // copy bits 15-22
  468. r10[4] = (FpReg.rb[3] << 3) | (FpReg.rb[2] >> 5); // copy bits 23-30
  469. r10[3] = (FpReg.rb[2] << 3) | (FpReg.rb[1] >> 5); // copy bits 31-38
  470. r10[2] = (FpReg.rb[1] << 3) | (FpReg.rb[0] >> 5); // copy bits 39-46
  471. r10[1] = FpReg.rb[0] << 3; // copy bits 46-52, then fill the remaining bits
  472. r10[0] = 0; // of the R10 mantissa with 0s
  473. }
  474. NPXPUTINTELR10(PutIntelR10_ZERO)
  475. {
  476. r10[9] = Fp->rb[7]; // copy in sign plus 7 bits of exponent
  477. memset(r10, 0, 9); // remainder is all zeroes
  478. }
  479. NPXPUTINTELR10(PutIntelR10_SPECIAL)
  480. {
  481. switch (Fp->TagSpecial) {
  482. case TAG_SPECIAL_INDEF:
  483. r10[9] = 0xff; // sign=1, exponent = 7 1s
  484. r10[8] = 0xff; // exponent = 8 1s
  485. r10[7] = 0xc0; // mantissa = 1100.00
  486. memset(r10, 0, 7); // store rest of mantissa
  487. break;
  488. case TAG_SPECIAL_INFINITY:
  489. r10[9] = Fp->rb[7]; // copy in sign plus 7 bits of exponent
  490. r10[8] = 0xff; // remainder of exponent is all 1s
  491. r10[7] = 0x80; // top bit of mantissa is 1, rest is 0s
  492. memset(r10, 0, 7); // remainder is all zeroes
  493. break;
  494. case TAG_SPECIAL_QNAN:
  495. case TAG_SPECIAL_SNAN:
  496. r10[9] = Fp->rb[7]; // copy in sign plus 7 1 bits of exponent
  497. r10[8] = 0xff; // remainder of exponent is all 1s
  498. // Bit 0 of the mantissa is always 1 for R10 values, so write that
  499. // in, along with the first 7 bits of the R8 mantissa.
  500. r10[7] = 0x80 | ((Fp->rb[6] & 0x0f) << 3) | (Fp->rb[5] >> 5);
  501. #if !NATIVE_NAN_IS_INTEL_FORMAT
  502. r10[7] ^= 0x40; // switch the meaning of the NAN
  503. #endif
  504. r10[6] = (Fp->rb[5] << 3) | (Fp->rb[4] >> 5); // copy bits 7-14 from the R8
  505. r10[5] = (Fp->rb[4] << 3) | (Fp->rb[3] >> 5); // copy bits 15-22
  506. r10[4] = (Fp->rb[3] << 3) | (Fp->rb[2] >> 5); // copy bits 23-30
  507. r10[3] = (Fp->rb[2] << 3) | (Fp->rb[1] >> 5); // copy bits 31-38
  508. r10[2] = (Fp->rb[1] << 3) | (Fp->rb[0] >> 5); // copy bits 39-46
  509. r10[1] = Fp->rb[0] << 3; // copy bits 46-52, then fill the remaining bits
  510. r10[0] = 0; // of the R10 mantissa with 0s
  511. break;
  512. default:
  513. CPUASSERT(FALSE); // fall through in free builds
  514. case TAG_SPECIAL_DENORM:
  515. PutIntelR10_VALID(r10, Fp);
  516. break;
  517. }
  518. }
  519. NPXPUTINTELR10(PutIntelR10_EMPTY)
  520. {
  521. CPUASSERT(FALSE); // Callers must handle TAG_EMPTY on their own.
  522. }
  523. FRAG1(FSTP80, BYTE) // FSTP m80real
  524. {
  525. PFPREG ST0;
  526. FpArithDataPreamble(cpu, pop1);
  527. cpu->FpStatusC1 = 0; // assume no error
  528. ST0 = cpu->FpST0;
  529. if (ST0->Tag == TAG_EMPTY && HandleStackEmpty(cpu, ST0)) {
  530. return;
  531. }
  532. PutIntelR10(pop1, ST0);
  533. POPFLT;
  534. }