Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1206 lines
38 KiB

  1. subttl emftran.asm - Transcendental instructions
  2. page
  3. ;*******************************************************************************
  4. ; Copyright (c) Microsoft Corporation 1991
  5. ; All Rights Reserved
  6. ;
  7. ;emftran.asm - Transcendental instructions
  8. ; by Tim Paterson
  9. ;
  10. ;Purpose:
  11. ; F2XM1, FPATAN, FYL2X, FYL2XP1 instructions
  12. ;Inputs:
  13. ; edi = [CURstk]
  14. ;
  15. ;Revision History:
  16. ;
  17. ; [] 09/05/91 TP Initial 32-bit version.
  18. ;
  19. ;*******************************************************************************
  20. ;********************* Polynomial Coefficients *********************
  21. ;These polynomial coefficients were all taken from "Computer Approximations"
  22. ;by J.F. Hart (reprinted 1978 w/corrections). All calculations and
  23. ;conversions to hexadecimal were done with a character-string calculator
  24. ;written in Visual Basic with precision set to 30 digits. Once the constants
  25. ;were typed into this file, all transfers were done with cut-and-paste
  26. ;operations to and from the calculator to help eliminate any typographical
  27. ;errors.
  28. tAtanPoly label word
  29. ;These constants are from Hart #5056: atan(x) = x * P(x^2) / Q(x^2),
  30. ;accurate to 20.78 digits over interval [0, tan(pi/12)].
  31. dd 4 ;P() is degree four
  32. ; Hart constant
  33. ;
  34. ;+.16241 70218 72227 96595 08 E0
  35. ;Hex value: 0.A650A5D5050DE43A2C25A8C00 HFFFE
  36. dq 0A650A5D5050DE43AH
  37. dw bTAG_VALID,0FFFEH-1
  38. ;+.65293 76545 29069 63960 675 E1
  39. ;Hex value: 0.D0F0A714A9604993AC4AC49A0 H3
  40. dq 0D0F0A714A9604994H
  41. dw bTAG_VALID,03H-1
  42. ;+.39072 57269 45281 71734 92684 E2
  43. ;Hex value: 0.9C4A507F16530AC3CDDEFA3DE H6
  44. dq 09C4A507F16530AC4H
  45. dw bTAG_VALID,06H-1
  46. ;+.72468 55912 17450 17145 90416 9 E2
  47. ;Hex value: 0.90EFE6FB30465042CF089D1310 H7
  48. dq 090EFE6FB30465043H
  49. dw bTAG_VALID,07H-1
  50. ;+.41066 29181 34876 24224 77349 62 E2
  51. ;Hex value: 0.A443E2004BB000B84A5154D44 H6
  52. dq 0A443E2004BB000B8H
  53. dw bTAG_VALID,06H-1
  54. dd 4 ;Q() is degree four
  55. ; Hart constant
  56. ;
  57. ;+.15023 99905 56978 85827 4928 E2
  58. ;Hex value: 0.F0624CD575B782643AFB912D0 H4
  59. dq 0F0624CD575B78264H
  60. dw bTAG_VALID,04H-1
  61. ;+.59578 42201 83554 49303 22456 E2
  62. ;Hex value: 0.EE504DDC907DEAEB7D7473B82 H6
  63. dq 0EE504DDC907DEAEBH
  64. dw bTAG_VALID,06H-1
  65. ;+.86157 32305 95742 25062 42472 E2
  66. ;Hex value: 0.AC508CA5E78E504AB2032E864 H7
  67. dq 0AC508CA5E78E504BH
  68. dw bTAG_VALID,07H-1
  69. ;+.41066 29181 34876 24224 84140 84 E2
  70. ;Hex value: 0.A443E2004BB000B84F542813C H6
  71. dq 0A443E2004BB000B8H
  72. dw bTAG_VALID,06H-1
  73. ;tan(pi/12) = tan(15 deg.) = 2 - sqrt(3)
  74. ;= 0.26794 91924 31122 70647 25536 58494 12763 ;From Hart appendix
  75. ;Hex value: 0.8930A2F4F66AB189B517A51F2 HFFFF
  76. Tan15Hi equ 08930A2F4H
  77. Tan15Lo equ 0F66AB18AH
  78. Tan15exp equ 0FFFFH-1
  79. ;1/tan(pi/6) = sqrt(3) = 1.73205 08075 68877 29352 74463 41505 87236 ;From Hart appendix
  80. ;Hex value: 0.DDB3D742C265539D92BA16B8 H1
  81. Sqrt3Hi equ 0DDB3D742H
  82. Sqrt3Lo equ 0C265539EH
  83. Sqrt3exp equ 01H-1
  84. ;pi = +3.14159265358979323846264338328
  85. ;Hex value: 0.C90FDAA22168C234C4C6628B8 H2
  86. PiHi equ 0C90FDAA2H
  87. PiLo equ 02168C235H
  88. PiExp equ 02H-1
  89. ;3*pi = +9.42477796076937971538793014984
  90. ;Hex value: 0.96CBE3F9990E91A79394C9E890 H4
  91. XThreePiHi equ 096CBE3F9H
  92. XThreePiMid equ 0990E91A7H
  93. XThreePiLo equ 090000000H
  94. ThreePiExp equ 04H-1
  95. ;This is a table of multiples of pi/6. It is used to adjust the
  96. ;final result angle after atan(). Derived from Hart appendix
  97. ;pi/180 = 0.01745 32925 19943 29576 92369 07684 88612
  98. ;
  99. ;When the reduced argument for atan() is very small, these correction
  100. ;constants simply become the result. These constants have all been
  101. ;rounded to nearest, but the user may have selected a different rounding
  102. ;mode. The tag byte is not needed for these constants, so its space
  103. ;is used to indicate if it was rounded. To determine if a constant
  104. ;was rounded, 7FH is subtracted from this flag; CY set means it was
  105. ;rounded up.
  106. RoundedUp equ 040H
  107. RoundedDown equ 0C0H
  108. tAtanPiFrac label dword
  109. ;pi/2 = +1.57079632679489661923132169163
  110. ;Hex value: 0.C90FDAA22168C234C4C6628B0 H1
  111. dq 0C90FDAA22168C235H
  112. dw RoundedUp,01H-1
  113. ;2*pi/3 = +2.09439510239319549230842892218
  114. ;Hex value: 0.860A91C16B9B2C232DD997078 H2
  115. dq 0860A91C16B9B2C23H
  116. dw RoundedDown,02H-1
  117. ;none
  118. dd 0,0,0
  119. ;pi/6 = +0.523598775598298873077107230544E0
  120. ;Hex value: 0.860A91C16B9B2C232DD99707A H0
  121. dq 0860A91C16B9B2C23H
  122. dw RoundedDown,00H-1
  123. ;pi/2 = +1.57079632679489661923132169163
  124. ;Hex value: 0.C90FDAA22168C234C4C6628B0 H1
  125. dq 0C90FDAA22168C235H
  126. dw RoundedUp,01H-1
  127. ;pi/3 = +1.04719755119659774615421446109
  128. ;Hex value: 0.860A91C16B9B2C232DD997078 H1
  129. dq 0860A91C16B9B2C23H
  130. dw RoundedDown,01H-1
  131. ;pi = +3.14159265358979323846264338328
  132. ;Hex value: 0.C90FDAA22168C234C4C6628B8 H2
  133. dq 0C90FDAA22168C235H
  134. dw RoundedUp,02H-1
  135. ;5*pi/6 = +2.61799387799149436538553615272
  136. ;Hex value: 0.A78D3631C681F72BF94FFCC96 H2
  137. dq 0A78D3631C681F72CH
  138. dw RoundedUp,02H-1
  139. ;*********************
  140. tExpPoly label word
  141. ;These constants are from Hart #1324: 2^x - 1 =
  142. ; 2 * x * P(x^2) / ( Q(x^2) - x * P(x^2) )
  143. ;accurate to 21.54 digits over interval [0, 0.5].
  144. dd 2 ;P() is degree two
  145. ; Hart constant
  146. ;
  147. ;+.60613 30790 74800 42574 84896 07 E2
  148. ;Hex value: 0.F27406FCF405189818F68BB78 H6
  149. dq 0F27406FCF4051898H
  150. dw bTAG_VALID,06H-1
  151. ;+.30285 61978 21164 59206 24269 927 E5
  152. ;Hex value: 0.EC9B3D5414E1AD0852E432A18 HF
  153. dq 0EC9B3D5414E1AD08H
  154. dw bTAG_VALID,0FH-1
  155. ;+.20802 83036 50596 27128 55955 242 E7
  156. ;Hex value: 0.FDF0D84AC3A35FAF89A690CC4 H15
  157. dq 0FDF0D84AC3A35FB0H
  158. dw bTAG_VALID,015H-1
  159. dd 3 ;Q() is degree three. First
  160. ;coefficient is 1.0 and is not listed.
  161. ; Hart constant
  162. ;
  163. ;+.17492 20769 51057 14558 99141 717 E4
  164. ;Hex value: 0.DAA7108B387B776F212ECFBEC HB
  165. dq 0DAA7108B387B776FH
  166. dw bTAG_VALID,0BH-1
  167. ;+.32770 95471 93281 18053 40200 719 E6
  168. ;Hex value: 0.A003B1829B7BE85CC81BD5309 H13
  169. dq 0A003B1829B7BE85DH
  170. dw bTAG_VALID,013H-1
  171. ;+.60024 28040 82517 36653 36946 908 E7
  172. ;Hex value: 0.B72DF814E709837E066855BDD H17
  173. dq 0B72DF814E709837EH
  174. dw bTAG_VALID,017H-1
  175. ;sqrt(2) = 1.41421 35623 73095 04880 16887 24209 69808 ;From Hart appendix
  176. ;Hex value: 0.B504F333F9DE6484597D89B30 H1
  177. Sqrt2Hi equ 0B504F333H
  178. Sqrt2Lo equ 0F9DE6484H
  179. Sqrt2Exp equ 01H-1
  180. ;sqrt(2) - 1 = +0.4142135623730950488016887242E0
  181. ;Hex value: 0.D413CCCFE779921165F626CC4 HFFFF
  182. Sqrt2m1Hi equ 0D413CCCFH
  183. Sqrt2m1Lo equ 0E7799211H
  184. XSqrt2m1Lo equ 060000000H
  185. Sqrt2m1Exp equ 0FFFFH-1
  186. ;2 - sqrt(2) = +0.5857864376269049511983112758E0
  187. ;Hex value: 0.95F619980C4336F74D04EC9A0 H0
  188. TwoMinusSqrt2Hi equ 095F61998H
  189. TwoMinusSqrt2Lo equ 00C4336F7H
  190. TwoMinusSqrt2Exp equ 00H-1
  191. ;*********************
  192. tLogPoly label dword
  193. ;These constants are derived from Hart #2355: log2(x) = z * P(z^2) / Q(z^2),
  194. ; z = (x+1) / (x-1) accurate to 19.74 digits over interval
  195. ;[1/sqrt(2), sqrt(2)]. The original Hart coefficients were for log10();
  196. ;the P() coefficients have been scaled by log2(10) to compute log2().
  197. ;
  198. ;log2(10) = 3.32192 80948 87362 34787 03194 29489 39017 ;From Hart appendix
  199. dd 3 ;P() is degree three
  200. ; Original Hart constant Scaled value
  201. ;
  202. ;+.18287 59212 09199 9337 E0 +0.607500660543248917834110566373E0
  203. ;Hex value: 0.9B8529CD54E72022A12BAEC53 H0
  204. dq 09B8529CD54E72023H
  205. dw bTAG_VALID,00H-1
  206. ;-.41855 96001 31266 20633 E1 -13.9042489506087332809657007634
  207. ;Hex value: 0.DE77CDBF64E8C53F0DCD458D0 H4
  208. dq 0DE77CDBF64E8C53FH
  209. dw bSign shl 8 + bTAG_VALID,04H-1
  210. ;+.13444 58152 27503 62236 E2 +44.6619330844279438866067340334
  211. ;Hex value: 0.B2A5D1C95708A0C9FE50F6F97 H6
  212. dq 0B2A5D1C95708A0CAH
  213. dw bTAG_VALID,06H-1
  214. ;-.10429 11213 72526 69497 44122 E2 -34.6447606134704282123622236943
  215. ;Hex value: 0.8A943C20526AE439A98B30F6A H6
  216. dq 08A943C20526AE43AH
  217. dw bSign shl 8 + bTAG_VALID,06H-1
  218. dd 3 ;Q() is degree three. First
  219. ;coefficient is 1.0 and is not listed.
  220. ; Hart constant
  221. ;
  222. ;-.89111 09060 90270 85654 E1
  223. ;Hex value: 0.8E93E7183AA998D74F45CDFF0 H4
  224. dq 08E93E7183AA998D7H
  225. dw bSign shl 8 + bTAG_VALID,04H-1
  226. ;+.19480 96618 79809 36524 155 E2
  227. ;Hex value: 0.9BD904CCFEE118D4BEF319716 H5
  228. dq 09BD904CCFEE118D5H
  229. dw bTAG_VALID,05H-1
  230. ;-.12006 95907 02006 34243 4218 E2
  231. ;Hex value: 0.C01C811D2EC1B5806304B1858 H4
  232. dq 0C01C811D2EC1B580H
  233. dw bSign shl 8 + bTAG_VALID,04H-1
  234. ;Log2(e) = 1.44269 50408 88963 40735 99246 81001 89213 ;From Hart appendix
  235. ;Hex value: 0.B8AA3B295C17F0BBBE87FED04 H1
  236. Log2OfEHi equ 0B8AA3B29H
  237. Log2OfELo equ 05C17F0BCH
  238. Log2OfEexp equ 01H-1
  239. ;********************* Generic polynomial evaluation *********************
  240. ;
  241. ;EvalPoly, EvalPolyAdd, EvalPolySetup, Eval2Poly
  242. ;
  243. ;Inputs:
  244. ; ebx:esi,ecx = floating point number, internal format
  245. ; edi = pointer to polynomial degree and coefficients
  246. ;Outputs:
  247. ; result in ebx:esi,ecx
  248. ; edi incremented to start of last coefficient in list
  249. ;
  250. ;EvalPoly is the basic polynomial evaluator, using Horner's rule. The
  251. ;polynomial pointer in edi points to a list: the first dword in the list
  252. ;is the degree of the polynomial (n); it is followed by the n+1
  253. ;coefficients in internal (12-byte) format. The argment for EvalPoly
  254. ;must be stored in the static FloatTemp in addition to being in
  255. ;registers.
  256. ;
  257. ;EvalPolyAdd is an alternate entry point into the middle of EvalPoly.
  258. ;It is used when the first coefficient is 1.0, so it skips the first
  259. ;multiplication. It requires that the degree of the polynomial be
  260. ;already loaded into ebp.
  261. ;
  262. ;EvalPolySetup store a copy of the argument in the static ArgTemp,
  263. ;and stores the square of the argument in the static FloatTemp.
  264. ;Then it falls into EvalPoly to evaluate the polynomial on the square.
  265. ;
  266. ;Eval2Poly evaluate two polynomials on its argument. The first
  267. ;polynomial is x * P(x^2), and its result is left at [[CURstk]].
  268. ;The second polynomial is Q(x^2), and its result is left in registers.
  269. ;The most significant coefficient of Q() is 1.
  270. ;
  271. ;Polynomial evaluation uses a slight variation on the standard add
  272. ;and multiply routines. PolyAddDouble and PolyMulDouble both check
  273. ;to see if the argument in registers (the current accumulation) is
  274. ;zero. The argument pointed to by edi is a coefficient and is never
  275. ;zero.
  276. ;
  277. ;In addition, the [RoundMode] and [ZeroVector] vectors are "trapped",
  278. ;i.e., redirected to special handlers for polynomial evaluation.
  279. ;[RoundMode] ordinarily points to the routine that handles the
  280. ;the current rounding mode and precision control; however, during
  281. ;polynomial evaluation, we always want full precision and round
  282. ;nearest. The normal rounding routines also store their result
  283. ;at [[Result]], but we want the result left in registers.
  284. ;[ZeroVector] exists solely so polynomial evaluation can trap
  285. ;when AddDouble results of zero. The normal response is to store
  286. ;a zero at [[Result]], but we need the zero left in registers.
  287. ;PolyRound and PolyZero handle these traps.
  288. EvalPolySetup:
  289. ;Save x in ArgTemp
  290. mov EMSEG:[ArgTemp].ExpSgn,ecx
  291. mov EMSEG:[ArgTemp].lManHi,ebx
  292. mov EMSEG:[ArgTemp].lManLo,esi
  293. mov EMSEG:[RoundMode],offset PolyRound
  294. mov EMSEG:[ZeroVector],offset PolyZero
  295. push edi ;Save pointer to polynomials
  296. ;op1 mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
  297. mov edx,ebx
  298. mov edi,esi
  299. mov eax,ecx
  300. ;op2 mantissa in edx:edi, exponent in high eax, sign in ah bit 7
  301. call MulDoubleReg ;Compute x^2
  302. ;Save x^2 in FloatTemp
  303. mov EMSEG:[FloatTemp].ExpSgn,ecx
  304. mov EMSEG:[FloatTemp].lManHi,ebx
  305. mov EMSEG:[FloatTemp].lManLo,esi
  306. pop edi
  307. EvalPoly:
  308. ;ebx:esi,ecx = arg to evaluate, also in FloatTemp
  309. ;edi = pointer to degree and list of coefficients.
  310. push edi
  311. mov eax,cs:[edi+4].ExpSgn
  312. mov edx,cs:[edi+4].lManHi
  313. mov edi,cs:[edi+4].lManLo
  314. call MulDoubleReg ;Multiply arg by first coef.
  315. pop edi
  316. mov ebp,cs:[edi] ;Get polynomial degree
  317. add edi,4+Reg87Len ;Point to second coefficient
  318. jmp EvalPolyAdd
  319. PolyLoop:
  320. push ebp ;Save loop count
  321. ifdef NT386
  322. mov edi,YFloatTemp
  323. else
  324. mov edi,offset edata:FloatTemp
  325. endif
  326. call PolyMulDouble
  327. pop ebp
  328. pop edi
  329. add di,Reg87Len
  330. EvalPolyAdd:
  331. push edi
  332. mov eax,cs:[edi].ExpSgn
  333. mov edx,cs:[edi].lManHi
  334. mov edi,cs:[edi].lManLo
  335. cmp cl,bTAG_ZERO ;Adding to zero?
  336. jz AddToZero
  337. call AddDoubleReg ;ebp preserved
  338. ContPolyLoop:
  339. dec ebp
  340. jnz PolyLoop
  341. pop edi
  342. ret
  343. AddToZero:
  344. ;Number in registers is zero, so just return value from memory.
  345. mov ecx,eax
  346. mov ebx,edx
  347. mov esi,edi
  348. jmp ContPolyLoop
  349. Eval2Poly:
  350. call EvalPolySetup
  351. push edi
  352. ifdef NT386
  353. mov edi,YArgTemp
  354. else
  355. mov edi,offset edata:ArgTemp
  356. endif
  357. call PolyMulDouble ;Multiply first result by argument
  358. pop edi
  359. ;Save result of first polynomial at [[CURstk]]
  360. mov edx,EMSEG:[CURstk]
  361. mov EMSEG:[edx].ExpSgn,ecx
  362. mov EMSEG:[edx].lManHi,ebx
  363. mov EMSEG:[edx].lManLo,esi
  364. ;Load x^2 back into registers
  365. mov ecx,EMSEG:[FloatTemp].ExpSgn
  366. mov ebx,EMSEG:[FloatTemp].lManHi
  367. mov esi,EMSEG:[FloatTemp].lManLo
  368. ;Start second polynomial evaluation
  369. add edi,4+Reg87Len ;Point to coefficient
  370. mov ebp,cs:[edi-4] ;Get polynomial degree
  371. jmp EvalPolyAdd
  372. PolyRound:
  373. ;This routine handles all rounding during polynomial evaluation.
  374. ;It performs 64-but round nearest, with result left in registers.
  375. ;
  376. ;Inputs:
  377. ; mantissa in ebx:esi:eax, exponent in high ecx, sign in ch bit 7
  378. ;Outputs:
  379. ; same, plus tag in cl.
  380. ;
  381. ;To perform "round even" when the round bit is set and the sticky bits
  382. ;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
  383. ;is set, that will always force a round up (to even) if the round bit is
  384. ;set. If the LSB is zero, then the sticky bits remain zero and we always
  385. ;round down. This rounding rule is implemented by adding RoundBit-1
  386. ;(7F..FFH), setting CY if round up.
  387. ;
  388. ;This routine needs to be reversible in case we're at the last step
  389. ;in the polynomial and final rounding uses a different rounding mode.
  390. ;We do this by copying the LSB of esi into al. While the rounding is
  391. ;reversible, you can't tell if the answer was exact.
  392. mov edx,esi
  393. and dl,1 ;Look at LSB
  394. or al,dl ;Set LSB as sticky bit
  395. add eax,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up
  396. adc esi,0
  397. adc ebx,0
  398. jc PolyBumpExponent ;Overflowed, increment exponent
  399. or esi,esi ;Any bits in low half?
  400. .erre bTAG_VALID eq 1
  401. .erre bTAG_SNGL eq 0
  402. setnz cl ;if low half==0 then cl=0 else cl=1
  403. ret
  404. PolyBumpExponent:
  405. add ecx,1 shl 16 ;Mantissa overflowed, bump exponent
  406. or ebx,1 shl 31 ;Set MSB
  407. mov cl,bTAG_SNGL
  408. PolyZero:
  409. ;Enter here when result is zero
  410. ret
  411. ;*******************************************************************************
  412. ;FPATAN instruction
  413. ;Actual instruction entry point is in emarith.asm
  414. tFpatanDisp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
  415. dd AtanDouble ;single single
  416. dd AtanDouble ;single double
  417. dd AtanZeroDest ;single zero
  418. dd AtanSpclDest ;single special
  419. dd AtanDouble ;double single
  420. dd AtanDouble ;double double
  421. dd AtanZeroDest ;double zero
  422. dd AtanSpclDest ;double special
  423. dd AtanZeroSource ;zero single
  424. dd AtanZeroSource ;zero double
  425. dd AtanZeroDest ;zero zero
  426. dd AtanSpclDest ;zero special
  427. dd AtanSpclSource ;special single
  428. dd AtanSpclSource ;special double
  429. dd AtanSpclSource ;special zero
  430. dd TwoOpBothSpcl ;special special
  431. dd AtanTwoInf ;Two infinites
  432. ;Compute atan( st(1)/st(0) ). Neither st(0) or st(1) are zero or
  433. ;infinity at this point.
  434. ;
  435. ;Argument reduction starts by dividing the smaller by the larger,
  436. ;ensuring that the result x is <= 1. The absolute value of the quotient
  437. ;is used and the quadrant is fixed up later. If x = st(0)/st(1), then
  438. ;the final atan result is subtracted from pi/2 (and normalized for the
  439. ;correct range of -pi to +pi).
  440. ;
  441. ;The range of x is further reduced using the formulas:
  442. ; t = (x - k) / (1 + kx)
  443. ; atan(x) = atan(k) + atan(t)
  444. ;
  445. ;Given that x <= 1, if we choose k = tan(pi/6) = 1/sqrt(3), then we
  446. ;are assured that t <= tan(pi/12) = 2 - sqrt(3), and
  447. ;for x >= tan(pi/12) = 2 - sqrt(3), t >= -tan(pi/12).
  448. ;Thus we can always reduce the argument to abs(t) <= tan(pi/12).
  449. ;
  450. ;Since k = 1/sqrt(3), it is convenient to multiply the numerator
  451. ;and denominator of t by 1/k, which gives
  452. ;t = (x/k - 1) / (1/k + x) = ( x*sqrt(3) - 1 ) / ( sqrt(3) + x ).
  453. ;This is the form found in Cody and Waite and in previous versions
  454. ;of the emulator. It requires one each add, subtract, multiply, and
  455. ;divide.
  456. ;
  457. ;Hart has derived a simpler version of this formula:
  458. ;t = 1/k - (1/k^2 + 1) / (1/k + x) = sqrt(3) - 4 / ( sqrt(3) + x ).
  459. ;Note that this computation requires one each add, subtract, and
  460. ;divide, but no multiply.
  461. ;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
  462. ;[edi] points to st(1), where result is returned
  463. AtanDouble:
  464. mov EMSEG:[Result],edi
  465. mov EMSEG:[RoundMode],offset PolyRound
  466. mov EMSEG:[ZeroVector],offset PolyZero
  467. mov ah,EMSEG:[edi].bSgn ;Sign of result
  468. mov al,ch ;Affects quadrant of result
  469. and al,bSign ;Zero other bits, used as flags
  470. push eax ;Save flag
  471. ;First figure out which is larger
  472. push offset AtanQuo ;Return address for DivDouble
  473. shld edx,ecx,16 ;Get exponent to ax
  474. cmp dx,EMSEG:[edi].wExp ;Compare exponents
  475. jl DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
  476. jg DivDouble ; ...is bigger, make it divisor
  477. ;Exponents are equal, compare mantissas
  478. cmp ebx,EMSEG:[edi].lManHi
  479. jb DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
  480. ja DivDouble ; ...is bigger, make it divisor
  481. cmp esi,EMSEG:[edi].lManLo
  482. jbe DivrDoubleSetFlag ;ST(0) is smaller, make it dividend
  483. jmp DivDouble
  484. TinyAtan:
  485. ;Come here if the angle was reduced to zero, or the divide resulted in
  486. ;unmasked underflow so that the quotient exponent was biased.
  487. ;Note that an angle of zero means reduction was performed, and the
  488. ;result will be corrected to a non-zero value.
  489. mov dl,[esp] ;Get flag byte
  490. or dl,dl ;No correction needed?
  491. jz AtanSetSign ;Just return result of divide
  492. and EMSEG:[CURerr],not Underflow
  493. ;Angle in registers is too small to affect correction amount. Just
  494. ;load up correction angle instead of adding it in.
  495. add dl,40H ;Change flags for correction lookup
  496. shr dl,5-2 ;Now in bits 2,3,4
  497. and edx,7 shl 2
  498. mov ebx,[edx+2*edx+tAtanPiFrac].lManHi
  499. mov esi,[edx+2*edx+tAtanPiFrac].lManLo
  500. mov ecx,[edx+2*edx+tAtanPiFrac].ExpSgn
  501. shrd eax,ecx,8 ;Copy rounding flag to high eax
  502. jmp AtanSetSign
  503. AtanQuo:
  504. ;Return here after divide. Underflow flag is set only for "big underflow",
  505. ;meaning the (15-bit) exponent couldn't even be kept in 16 bits. This can
  506. ;only happen dividing a denormal by one of the largest numbers.
  507. ;
  508. ;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx
  509. test EMSEG:[CURerr],Underflow;Did we underflow?
  510. jnz TinyAtan
  511. ;Now compare quotient in ebx:esi,ecx with tan(pi/12) = 2 - sqrt(3)
  512. xor cx,cx ;Use absolute value
  513. cmp ecx,Tan15exp shl 16
  514. jg AtnNeedReduce
  515. jl AtnReduced
  516. cmp ebx,Tan15Hi
  517. ja AtnNeedReduce
  518. jb AtnReduced
  519. cmp esi,Tan15Lo
  520. jbe AtnReduced
  521. AtnNeedReduce:
  522. or byte ptr [esp],20H ;Note reduction in flags on stack
  523. ;Compute t = sqrt(3) - 4 / ( sqrt(3) + x ).
  524. mov eax,Sqrt3exp shl 16
  525. mov edx,Sqrt3Hi
  526. mov edi,Sqrt3Lo
  527. call AddDoubleReg ;x + sqrt(3)
  528. mov edi,esi
  529. mov esi,ebx ;Mantissa in esi:edi
  530. mov ebx,ecx ;ExpSgn to ebx
  531. mov ecx,(2+TexpBias) shl 16
  532. mov edx,1 shl 31
  533. xor eax,eax ;edx:edi,eax = 4.0
  534. ;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
  535. ;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
  536. call DivDoubleReg ;4 / ( x + sqrt(3) )
  537. not ch ;Flip sign
  538. mov eax,Sqrt3exp shl 16
  539. mov edx,Sqrt3Hi
  540. mov edi,Sqrt3Lo
  541. call AddDoubleReg ;sqrt(3) - 4 / ( x + sqrt(3) )
  542. ;Result in ebx:esi,ecx could be very small (or zero) if arg was near tan(pi/6).
  543. cmp cl,bTAG_ZERO
  544. jz TinyAtan
  545. AtnReduced:
  546. ;If angle is small, skip the polynomial. atan(x) = x when x - x^3/3 = x
  547. ;[or 1 - x^2/3 = 1], which happens when x < 2^-32. This prevents underflow
  548. ;in computing x^2.
  549. TinyAtanArg equ -32
  550. cmp ecx,TinyAtanArg shl 16
  551. jl AtanCorrection
  552. mov edi,offset tAtanPoly
  553. call Eval2Poly
  554. mov edi,EMSEG:[CURstk] ;Point to first result
  555. call DivDouble ;x * P(x^2) / Q(x^2)
  556. AtanCorrection:
  557. ;Rounded mantissa in ebx:esi:eax, exp/sign in high ecx
  558. ;
  559. ;Correct sign and add fraction of pi to account for various angle reductions:
  560. ;
  561. ; flag bit indicates correction
  562. ;----------------------------------------------------
  563. ; 5 arg > tan(pi/12) add pi/6
  564. ; 6 st(1) > st(0) sub from pi/2
  565. ; 7 st(0) < 0 sub from pi
  566. ;
  567. ;This results in the following correction for the result R:
  568. ;
  569. ;bit 7 6 5 correction
  570. ;---------------------------
  571. ; 0 0 0 none
  572. ; 0 0 1 pi/6 + R
  573. ; 0 1 0 pi/2 - R
  574. ; 0 1 1 pi/3 - R
  575. ; 1 0 0 pi - R
  576. ; 1 0 1 5*pi/6 - R
  577. ; 1 1 0 pi/2 + R
  578. ; 1 1 1 2*pi/3 + R
  579. mov dl,[esp] ;Get flag byte
  580. or dl,dl ;No correction needed?
  581. jz AtanSetSign
  582. add dl,40H ;Set bit 7 for all -R cases
  583. ;This changes the meaning of the flag bits to the following:
  584. ;
  585. ;bit 7 6 5 correction
  586. ;---------------------------
  587. ; 0 0 0 pi/2 + R
  588. ; 0 0 1 2*pi/3 + R
  589. ; 0 1 0 none
  590. ; 0 1 1 pi/6 + R
  591. ; 1 0 0 pi/2 - R
  592. ; 1 0 1 pi/3 - R
  593. ; 1 1 0 pi - R
  594. ; 1 1 1 5*pi/6 - R
  595. xor ch,dl ;Flip sign bit in cases 4 - 7
  596. shr dl,5-2 ;Now in bits 2,3,4
  597. and edx,7 shl 2
  598. mov eax,[edx+2*edx+tAtanPiFrac].ExpSgn
  599. mov edi,[edx+2*edx+tAtanPiFrac].lManLo
  600. mov edx,[edx+2*edx+tAtanPiFrac].lManHi
  601. call AddDoubleReg ;Add in correction angle
  602. AtanSetSign:
  603. pop edx ;Get flags again
  604. mov ch,dh ;Set sign to original ST(1)
  605. ;Rounded mantissa in ebx:esi:eax, exp/sign in ecx
  606. jmp TransUnround
  607. ;***
  608. AtanSpclDest:
  609. mov al,EMSEG:[edi].bTag ;Pick up tag
  610. ; cmp cl,bTAG_INF ;Is argument infinity?
  611. cmp al,bTAG_INF ;Is argument infinity?
  612. jnz SpclDest ;In emarith.asm
  613. AtanZeroSource:
  614. ;Dividend is infinity or divisor is zero. Return pi/2 with
  615. ;same sign as dividend.
  616. mov ecx,(PiExp-1) shl 16 + bTAG_VALID ;Exponent for pi/2
  617. PiMant:
  618. ;For storing multiples of pi. Exponent/tag is in ecx.
  619. mov ch,EMSEG:[edi].bSgn ;Get dividend's sign
  620. mov ebx,XPiHi
  621. mov esi,XPiMid
  622. mov eax,XPiLo
  623. ;A jump through [TransRound] is only valid if the number is known not to
  624. ;underflow. Unmasked underflow requires [RoundMode] be set.
  625. jmp EMSEG:[TransRound]
  626. ;***
  627. AtanSpclSource:
  628. cmp cl,bTAG_INF ;Scaling by infinity?
  629. jnz SpclSource ;in emarith.asm
  630. AtanZeroDest:
  631. ;Divisor is infinity or dividend is zero. Return zero for +divisor,
  632. ;pi for -divisor. Result sign is same is dividend.
  633. or ch,ch ;Check divisor's sign
  634. mov ecx,PiExp shl 16 + bTAG_VALID ;Exponent for pi
  635. js PiMant ;Store pi
  636. ;Result is zero
  637. mov EMSEG:[edi].lManHi,0
  638. mov EMSEG:[edi].lManLo,0
  639. mov EMSEG:[edi].wExp,0
  640. mov EMSEG:[edi].bTAG,bTAG_ZERO
  641. ret
  642. ;***
  643. AtanTwoInf:
  644. ;Return pi/4 for +infinity divisor, 3*pi/4 for -infinity divisor.
  645. ;Result sign is same is dividend infinity.
  646. or ch,ch ;Check divisor's sign
  647. mov ecx,(PiExp-2) shl 16 + bTAG_VALID ;Exponent for pi/4
  648. jns PiMant ;Store pi/4
  649. mov ecx,(ThreePiExp-2) shl 16 + bTAG_VALID ;Exponent for 3*pi/4
  650. mov ch,EMSEG:[edi].bSgn ;Get dividend's sign
  651. mov ebx,XThreePiHi
  652. mov esi,XThreePiMid
  653. mov eax,XThreePiLo
  654. ;A jump through [TransRound] is only valid if the number is known not to
  655. ;underflow. Unmasked underflow requires [RoundMode] be set.
  656. jmp EMSEG:[TransRound]
  657. ;*******************************************************************************
  658. ExpSpcl:
  659. ;Tagged special
  660. cmp cl,bTAG_DEN
  661. jz ExpDenorm
  662. cmp cl,bTAG_INF
  663. mov al, cl
  664. jnz SpclDestNotDen ;Check for Empty or NAN
  665. ;Have infinity, check its sign.
  666. ;Return -1 for -infinity, no change if +infinity
  667. or ch,ch ;Check sign
  668. jns ExpRet ;Just return the +inifinity
  669. mov EMSEG:[edi].lManLo,0
  670. mov EMSEG:[edi].lManHi,1 shl 31
  671. mov EMSEG:[edi].ExpSgn,bSign shl 8 + bTAG_SNGL ;-1.0 (exponent is zero)
  672. ret
  673. ExpDenorm:
  674. mov EMSEG:[CURerr],Denormal
  675. test EMSEG:[CWmask],Denormal ;Is denormal exception masked?
  676. jnz ExpCont ;Yes, continue
  677. ExpRet:
  678. ret
  679. EM_ENTRY eF2XM1
  680. eF2XM1:
  681. ;edi = [CURstk]
  682. mov ecx,EMSEG:[edi].ExpSgn
  683. cmp cl,bTAG_ZERO
  684. jz ExpRet ;Return same zero
  685. ja ExpSpcl
  686. ExpCont:
  687. ;The input range specified for the function is (-1, +1). The polynomial
  688. ;used for this function is valid only over the range [0, +0.5], so range
  689. ;reduction is needed. Range reduction is based on the identity:
  690. ;
  691. ; 2^(a+b) = 2^a * 2^b
  692. ;
  693. ;1.0 or 0.5 can be added/subtracted from the argument to bring it into
  694. ;range. We calculate 2^x - 1 with a polynomial, and then adjust the
  695. ;result according to the amount added or subtracted, as shown in the table:
  696. ;
  697. ;Arg range Adj Polynomial result Required result, 2^x - 1
  698. ;
  699. ; (-1, -0.5] +1 P = 2^(x+1) - 1 (P - 1)/2
  700. ;
  701. ; (-0.5, 0) +0.5 P = 2^(x+0.5) - 1 P * sqrt(2)/2 + (sqrt(2)/2 - 1)
  702. ;
  703. ; (0, 0.5) 0 P = 2^x - 1 P
  704. ;
  705. ; [0.5, 1) -0.5 P = 2^(x-0.5) - 1 P * sqrt(2) + (sqrt(2)-1)
  706. ;
  707. ;Since the valid input range does not include +1.0 or -1.0, and zero is
  708. ;handled separately, the precision exception will always be set.
  709. mov EMSEG:[Result],edi
  710. mov EMSEG:[RoundMode],offset PolyRound
  711. mov EMSEG:[ZeroVector],offset PolyZero
  712. push offset TransUnround ;Always exit through here
  713. mov ebx,EMSEG:[edi].lManHi
  714. mov esi,EMSEG:[edi].lManLo
  715. ;Check for small argument, so that x^2 does not underflow. Note that
  716. ;e^x = 1+x for small x, where small x means x + x^2/2 = x [or 1 + x/2 = 1],
  717. ;which happens when x < 2^-64, so 2^x - 1 = x * ln(2) for small x.
  718. TinyExpArg equ -64
  719. cmp ecx,TinyExpArg shl 16
  720. jl TinyExp
  721. cmp ecx,-1 shl 16 + bSign shl 8 ;See if positive, < 0.5
  722. jl ExpReduced
  723. ;Argument was not in range (0, 0.5), so we need some kind of reduction
  724. or ecx,ecx ;Exp >= 0 means arg >= 1.0 --> too big
  725. ;CONSIDER: this returns through TransUnround which restores the rounding
  726. ;vectors, but it also randomly rounds the result becase eax is not set.
  727. jge ExpRet ;Give up if arg out of range
  728. ;We're going to need to add/subtract 1.0 or 0.5, so load up the constant
  729. mov edx,1 shl 31
  730. xor edi,edi
  731. mov eax,-1 shl 16 + bSign shl 8 ;edx:edi,eax = -0.5
  732. mov ebp,offset ExpReducedMinusHalf
  733. or ch,ch ;If it's positive, must be [0.5, 1)
  734. jns ExpReduction
  735. xor ah,ah ;edx:edi,eax = +0.5
  736. mov ebp,offset ExpReducedPlusHalf
  737. cmp ecx,eax ;See if abs(arg) >= 0.5
  738. jl ExpReduction ;No, adjust by .5
  739. xor eax,eax ;edx:edi,eax = 1.0
  740. mov ebp,offset ExpReducedPlusOne
  741. ExpReduction:
  742. call AddDoubleReg ;Argument now in range [0, 0.5]
  743. cmp cl,bTAG_ZERO ;Did reduction result in zero?
  744. jz ExpHalf ;If so, must have been exactly 0.5
  745. push ebp ;Address of reduction cleanup
  746. ExpReduced:
  747. mov edi,offset tExpPoly
  748. call Eval2Poly
  749. ;2^x - 1 is approximated with 2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) )
  750. ;Q(x^2) is in registers, P(x^2) is at [[CURstk]]
  751. mov edi,EMSEG:[CURstk]
  752. mov dx,bSign shl 8 ;Subtract memory operand
  753. ;Note that Q() and P() have no roots over the input range
  754. ;(they will never be zero).
  755. call AddDouble ;Q(x^2) - x*P(x^2)
  756. sub ecx,1 shl 16 ;Divide by two
  757. mov edi,EMSEG:[CURstk]
  758. jmp DivDouble ;2 * x*P(x^2) / ( Q(x^2) - x*P(x^2) )
  759. ;Returns to correct argument reduction correction routine or TransUnround
  760. TinyExp:
  761. ;Exponent is very small (and was not reduced)
  762. mov edx,cFLDLN2hi
  763. mov edi,cFLDLN2lo
  764. mov eax,cFLDLN2exp shl 16
  765. ;This could underflow (but not big time)
  766. jmp MulDoubleReg ;Returns to TransUnround
  767. ExpHalf:
  768. ;Argument of exactly 0.5 was reduced to zero. Just return result.
  769. mov ebx,Sqrt2m1Hi
  770. mov esi,Sqrt2m1Lo
  771. mov eax,XSqrt2m1Lo + 1 shl 31 - 1
  772. mov ecx,Sqrt2m1Exp shl 16
  773. ret ;Exit through TransUnround
  774. ExpReducedPlusOne:
  775. ;Correct result is (P - 1)/2
  776. sub ecx,1 shl 16 ;Divide by two
  777. mov edx,1 shl 31
  778. xor edi,edi
  779. mov eax,-1 shl 16 + bSign shl 8 ;edx:edi,eax = -0.5
  780. jmp AddDoubleReg
  781. ExpReducedPlusHalf:
  782. ;Correct result is P * sqrt(2)/2 - (1 - sqrt(2)/2)
  783. mov edx,Sqrt2Hi
  784. mov edi,Sqrt2Lo
  785. mov eax,Sqrt2exp-1 shl 16 ;sqrt(2)/2
  786. call MulDoubleReg
  787. mov edx,TwoMinusSqrt2Hi
  788. mov edi,TwoMinusSqrt2Lo
  789. mov eax,(TwoMinusSqrt2Exp-1) shl 16 + bSign shl 8 ;(2-sqrt(2))/2
  790. jmp AddDoubleReg
  791. ExpReducedMinusHalf:
  792. ;Correct result is P * sqrt(2) + (sqrt(2)-1)
  793. mov edx,Sqrt2Hi
  794. mov edi,Sqrt2Lo
  795. mov eax,Sqrt2exp shl 16
  796. call MulDoubleReg
  797. mov edx,Sqrt2m1Hi
  798. mov edi,Sqrt2m1Lo
  799. mov eax,Sqrt2m1Exp shl 16
  800. jmp AddDoubleReg
  801. ;*******************************************************************************
  802. ;Dispatch table for log(x+1)
  803. ;
  804. ;One operand has been loaded into ecx:ebx:esi ("source"), the other is
  805. ;pointed to by edi ("dest").
  806. ;
  807. ;Tag of source is shifted. Tag values are as follows:
  808. .erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
  809. .erre TAG_VALID eq 1
  810. .erre TAG_ZERO eq 2
  811. .erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
  812. ;Any special case routines not found in this file are in emarith.asm
  813. tFyl2xp1Disp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
  814. dd LogP1Double ;single single
  815. dd LogP1Double ;single double
  816. dd LogP1ZeroDest ;single zero
  817. dd LogP1SpclDest ;single special
  818. dd LogP1Double ;double single
  819. dd LogP1Double ;double double
  820. dd LogP1ZeroDest ;double zero
  821. dd LogP1SpclDest ;double special
  822. dd XorSourceSign ;zero single
  823. dd XorSourceSign ;zero double
  824. dd XorDestSign ;zero zero
  825. dd LogP1SpclDest ;zero special
  826. dd LogSpclSource ;special single
  827. dd LogSpclSource ;special double
  828. dd LogSpclSource ;special zero
  829. dd TwoOpBothSpcl ;special special
  830. dd LogTwoInf ;Two infinites
  831. LogP1Double:
  832. ;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
  833. ;[edi] points to st(1), where result is returned
  834. ;
  835. ;This instruction is defined only for x+1 in the range [1/sqrt(2), sqrt(2)]
  836. ;The approximation used (valid over exactly this range) is
  837. ; log2(x) = z * P(z^2) / Q(z^2), z = (x-1) / (x+1), which is
  838. ; log2(x+1) = r * P(r^2) / Q(r^2), r = x / (x+2)
  839. ;
  840. ;We're not too picky about this range check because the function is simply
  841. ;"undefined" if out of range--EXCEPT, we're supposed to check for -1 and
  842. ;signal Invalid if less, -infinity if equal.
  843. or ecx,ecx ;abs(x) >= 1.0?
  844. jge LogP1OutOfRange ;Valid range is approx [-0.3, +0.4]
  845. mov EMSEG:[Result],edi
  846. mov EMSEG:[RoundMode],offset PolyRound
  847. mov EMSEG:[ZeroVector],offset PolyZero
  848. mov eax,1 shl 16 ;Exponent of 1 for adding 2.0
  849. push offset TotalLog ;Return address for BasicLog
  850. ; jmp BasicLog ;Fall into BasicLog
  851. ;.erre BasicLog eq $
  852. ;BasicLog is used by eFYL2X and eFYL2XP1.
  853. ;eax has exponent and sign to add 1.0 or 2.0 to argument
  854. ;ebx:esi,ecx has argument, non-zero, tag not set
  855. ;ST has argument to take log2 of, minus 1. (This is the actual argument
  856. ;of eFYL2XP1, or argument minus 1 of eFYL2X.)
  857. BasicLog:
  858. mov edx,1 shl 31
  859. xor edi,edi ;edx:edi,eax = +1.0 or +2.0
  860. call AddDoubleReg
  861. mov edi,EMSEG:[CURstk] ;Point to x-1
  862. call DivDouble ;Compute (x-1) / (x+1)
  863. ;Result in registers is z = (x-1)/(x+1). For tiny z, ln(x) = 2*z, so
  864. ; log2(x) = 2 * log2(e) * z. Tiny z is such that z + z^3/3 = z.
  865. cmp ecx,-32 shl 16 ;Smallest exponent to bother with
  866. jl LogSkipPoly
  867. mov edi,offset tLogPoly
  868. call Eval2Poly
  869. mov edi,EMSEG:[CURstk] ;Point to first result, r * P(r^2)
  870. jmp DivDouble ;Compute r * P(r^2) / Q(r^2)
  871. LogSkipPoly:
  872. ;Multiply r by 2 * log2(e)
  873. mov edx,Log2OfEHi
  874. mov edi,Log2OfELo
  875. mov eax,(Log2OfEexp+1) shl 16
  876. jmp MulDoubleReg
  877. LogP1OutOfRange:
  878. ;Input range isn't valid, so we can return anything we want--EXCEPT, for
  879. ;numbers < -1 we must signal Invalid Operation, and Divide By Zero for
  880. ;-1. Otherwise, we return an effective log of one by just leaving the
  881. ;second operand as the return value.
  882. ;
  883. ;Exponent in ecx >= 0 ( abs(x) >= 1 )
  884. or ch,ch ;Is it positive?
  885. jns LogP1Ret ;If so, skip it
  886. and ecx,0FFFFH shl 16 ;Look at exponent only: 0 for -1.0
  887. sub ebx,1 shl 31 ;Kill MSB
  888. or ebx,esi
  889. or ebx,ecx
  890. jnz ReturnIndefinite ;Must be < -1.0
  891. jmp DivideByMinusZero
  892. LogP1Ret:
  893. ret
  894. ;***
  895. LogP1ZeroDest:
  896. or ch,ch ;Is it negative?
  897. jns LogP1Ret ;If not, just leave it zero
  898. or ecx,ecx ;abs(x) >= 1.0?
  899. jl XorDestSign ;Flip sign of zero
  900. ;Argument is <= -1
  901. jmp ReturnIndefinite ;Have 0 * log( <=0 )
  902. ;***
  903. LogP1SpclDest:
  904. mov al,EMSEG:[edi].bTag ;Pick up tag
  905. cmp al,bTAG_INF ;Is argument infinity?
  906. jnz SpclDest ;In emarith.asm
  907. ;Multiplying log(x+1) * infinity.
  908. ;If x > 0, return original infinity.
  909. ;If -1 <= x < 0, return infinity with sign flipped.
  910. ;If x < -1 or x == 0, invalid operation.
  911. cmp cl,bTAG_ZERO
  912. jz ReturnIndefinite
  913. or ch,ch ;Is it positive?
  914. jns LogP1Ret
  915. test ecx,0FFFFH shl 16 ;Is exponent zero?
  916. jl XorDestSign
  917. jg ReturnIndefinite
  918. sub ebx,1 shl 31 ;Kill MSB
  919. or ebx,esi
  920. jnz ReturnIndefinite ;Must be < -1.0
  921. jmp XorDestSign
  922. ;***
  923. LogSpclSource:
  924. cmp cl,bTAG_INF ;Is argument infinity?
  925. jnz SpclSource ;in emarith.asm
  926. or ch,ch ;Is it negative infinity?
  927. js ReturnIndefinite
  928. jmp MulByInf
  929. ;***
  930. LogTwoInf:
  931. or ch,ch ;Is it negative infinity?
  932. js ReturnIndefinite
  933. jmp XorDestSign
  934. ;*******************************************************************************
  935. ;Dispatch table for log(x)
  936. ;
  937. ;One operand has been loaded into ecx:ebx:esi ("source"), the other is
  938. ;pointed to by edi ("dest").
  939. ;
  940. ;Tag of source is shifted. Tag values are as follows:
  941. .erre TAG_SNGL eq 0 ;SINGLE: low 32 bits are zero
  942. .erre TAG_VALID eq 1
  943. .erre TAG_ZERO eq 2
  944. .erre TAG_SPCL eq 3 ;NAN, Infinity, Denormal, Empty
  945. ;Any special case routines not found in this file are in emarith.asm
  946. tFyl2xDisp label dword ;Source (ST(0)) Dest (*[di] = ST(1))
  947. dd LogDouble ;single single
  948. dd LogDouble ;single double
  949. dd LogZeroDest ;single zero
  950. dd LogSpclDest ;single special
  951. dd LogDouble ;double single
  952. dd LogDouble ;double double
  953. dd LogZeroDest ;double zero
  954. dd LogSpclDest ;double special
  955. dd DivideByMinusZero ;zero single
  956. dd DivideByMinusZero ;zero double
  957. dd ReturnIndefinite ;zero zero
  958. dd LogSpclDest ;zero special
  959. dd LogSpclSource ;special single
  960. dd LogSpclSource ;special double
  961. dd LogSpclSource ;special zero
  962. dd TwoOpBothSpcl ;special special
  963. dd LogTwoInf ;Two infinites
  964. LogDouble:
  965. ;st(0) mantissa in ebx:esi, exponent in high ecx, sign in ch bit 7
  966. ;[edi] points to st(1), where result is returned
  967. ;
  968. ;Must reduce the argument to the range [1/sqrt(2), sqrt(2)]
  969. or ch,ch ;Is it positive?
  970. js ReturnIndefinite ;Can't take log of negative number
  971. mov EMSEG:[Result],edi
  972. mov EMSEG:[RoundMode],offset PolyRound
  973. mov EMSEG:[ZeroVector],offset PolyZero
  974. shld eax,ecx,16 ;Save exponent in ax as int part of log2
  975. xor ecx,ecx ;Zero exponent: 1 <= x < 2
  976. cmp ebx,Sqrt2Hi ;x > sqrt(2)?
  977. jb LogReduced
  978. ja LogReduceOne
  979. cmp esi,Sqrt2Lo
  980. jb LogReduced
  981. LogReduceOne:
  982. sub ecx,1 shl 16 ;1/sqrt(2) < x < 1
  983. inc eax
  984. LogReduced:
  985. push eax ;Save integer part of log2
  986. mov ebp,ecx ;Save reduced exponent (tag is wrong!)
  987. mov edx,1 shl 31
  988. mov eax,bSign shl 8 ;Exponent of 0, negaitve
  989. xor edi,edi ;edx:edi,eax = -1.0
  990. call AddDoubleReg
  991. cmp cl,bTAG_ZERO ;Was it exact power of two?
  992. jz LogDone ;Skip log if power of two
  993. ;Save (x - 1), reload x with reduced exponent
  994. mov edi,EMSEG:[CURstk] ;Point to original x again
  995. xchg EMSEG:[edi].lManHi,ebx
  996. xchg EMSEG:[edi].lManLo,esi
  997. mov EMSEG:[edi].ExpSgn,ecx
  998. mov ecx,ebp ;Get reduced exponent
  999. xor eax,eax ;Exponent of 0, positive
  1000. call BasicLog
  1001. LogDone:
  1002. pop eax ;Get integer part back
  1003. cwde
  1004. or eax,eax ;Is it zero?
  1005. jz TotalLog
  1006. ;Next 3 instructions take abs() of integer
  1007. cdq ;Extend sign through edx
  1008. xor eax,edx ;Complement...
  1009. sub eax,edx ; and increment if negative
  1010. bsr dx,ax ;Look for MSB to normalize integer
  1011. ;Bit number in dx ranges from 0 to 15
  1012. mov cl,dl
  1013. not cl ;Convert to shift count
  1014. shl eax,cl ;Normalize
  1015. .erre TexpBias eq 0
  1016. rol edx,16 ;Move exponent high, sign low
  1017. or ebx,ebx ;Was log zero?
  1018. jz ExactPower
  1019. xchg edx,eax ;Exp/sign to eax, mantissa to edx
  1020. xor edi,edi ;Extend with zero
  1021. call AddDoubleReg
  1022. TotalLog:
  1023. ;Registers could be zero if input was exactly 1.0
  1024. cmp cl,bTAG_ZERO
  1025. jz ZeroLog
  1026. TotalLogNotZero:
  1027. mov edi,EMSEG:[Result] ;Point to second arg
  1028. push offset TransUnround
  1029. jmp MulDouble
  1030. ExactPower:
  1031. ;Arg was a power of two, so log is exact (but not zero).
  1032. mov ebx,eax ;Mantissa to ebx
  1033. mov ecx,edx ;Exponent to ecx
  1034. xor esi,esi ;Extend with zero
  1035. ;Exponent of arg [= log2(arg)] is now normalized in ebx:esi,ecx
  1036. ;
  1037. ;The result log is exact, so we don't want TransUnround, which is designed
  1038. ;to ensure the result is never exact. Instead we set the [RoundMode]
  1039. ;vector to [TransRound] before the final multiply.
  1040. mov eax,EMSEG:[TransRound]
  1041. mov EMSEG:[RoundMode],eax
  1042. mov edi,EMSEG:[Result] ;Point to second arg
  1043. push offset RestoreRound ;Return addr. for MulDouble in emtrig.asm
  1044. jmp MulDouble
  1045. ZeroLog:
  1046. mov eax,EMSEG:[SavedRoundMode]
  1047. mov EMSEG:[RoundMode],eax
  1048. mov EMSEG:[ZeroVector],offset SaveResult
  1049. jmp SaveResult
  1050. ;***
  1051. LogZeroDest:
  1052. or ch,ch ;Is it negative?
  1053. js ReturnIndefinite ;Can't take log of negative numbers
  1054. ;See if log is + or - so we can get correct sign of zero
  1055. or ecx,ecx ;Is exponent >= 0?
  1056. jge LogRet ;If so, keep present zero sign
  1057. FlipDestSign:
  1058. not EMSEG:[edi].bSgn
  1059. ret
  1060. ;***
  1061. LogSpclDest:
  1062. mov al,EMSEG:[edi].bTag ;Pick up tag
  1063. cmp al,bTAG_INF ;Is argument infinity?
  1064. jnz SpclDest ;In emarith.asm
  1065. ;Multiplying log(x) * infinity.
  1066. ;If x > 1, return original infinity.
  1067. ;If 0 <= x < 1, return infinity with sign flipped.
  1068. ;If x < 0 or x == 1, invalid operation.
  1069. cmp cl,bTAG_ZERO
  1070. jz FlipDestSign
  1071. or ch,ch ;Is it positive?
  1072. js ReturnIndefinite
  1073. test ecx,0FFFFH shl 16 ;Is exponent zero?
  1074. jg LogRet ;x > 1, just return infinity
  1075. jl FlipDestSign
  1076. sub ebx,1 shl 31 ;Kill MSB
  1077. or ebx,esi
  1078. jz ReturnIndefinite ;x == 1.0
  1079. LogRet:
  1080. ret