Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

863 lines
28 KiB

  1. subttl emtrig.asm - Trig functions sine, cosine, tangent
  2. page
  3. ;*******************************************************************************
  4. ; Copyright (c) Microsoft Corporation 1991
  5. ; All Rights Reserved
  6. ;
  7. ;emtrig.asm - Trig functions sine, cosine, tangent
  8. ; by Tim Paterson
  9. ;
  10. ;Purpose:
  11. ; FCOS, FPTAN, FSIN, FSINCOS instructions
  12. ;Inputs:
  13. ; edi = [CURstk]
  14. ;
  15. ;Revision History:
  16. ;
  17. ; [] 09/05/91 TP Initial 32-bit version.
  18. ;
  19. ;*******************************************************************************
  20. ;XPi is the 66-bit value of Pi from the Intel manual
  21. XPiHi equ 0C90FDAA2H
  22. XPiMid equ 02168C234H
  23. XPiLo equ 0C0000000H ;Extension of pi
  24. PiOver4exp equ -1 ;Pi/4 ~= 3/4, so exponent is -1
  25. TinyAngleExp equ -32 ;Smallest angle we bother with
  26. MaxAngleExp equ 63 ;Angle that's too big
  27. Trig1Result:
  28. ;Trig function reduction routine used by functions returning 1 value
  29. ;(FSIN and FCOS)
  30. ;edi = [CURstk] = argument pointer
  31. ;Argument has already been checked for zero.
  32. ;ZF = (tag == bTAG_ZERO)
  33. jb TrigPrem
  34. ;Tagged special
  35. mov al,EMSEG:[edi].bTAG
  36. cmp al,bTAG_DEN
  37. jz TrigDenorm
  38. add sp,4 ;Don't return to caller
  39. cmp al,bTAG_INF
  40. jnz SpclDestNotDen ;Check for Empty or NAN
  41. mov EMSEG:[SWcc],C2 ;Can't reduce infinity
  42. jmp ReturnIndefinite
  43. TrigDenorm:
  44. mov EMSEG:[CURerr],Denormal
  45. test EMSEG:[CWmask],Denormal ;Is denormal exception masked?
  46. jnz TrigPrem ;Yes, continue
  47. add sp,4 ;Don't return to caller
  48. TrigRet:
  49. ret
  50. Trig2Inf:
  51. mov EMSEG:[SWcc],C2 ;Can't reduce infinity
  52. jmp Trig2Indefinite
  53. Trig2StackOver:
  54. mov EMSEG:[SWcc],C1 ;Signal overflow
  55. Trig2StackUnder:
  56. mov EMSEG:[CURerr],Invalid+StackFlag
  57. Trig2Indefinite:
  58. add sp,4 ;Don't return to caller
  59. call ReturnIndefinite
  60. jz TrigRet ;Unmasked, don't change registers
  61. ;Produce masked response
  62. mov EMSEG:[CURstk],esi ;Push stack
  63. mov edi,esi
  64. jmp ReturnIndefinite
  65. Trig2Special:
  66. cmp al,bTAG_DEN
  67. jz TrigDenorm
  68. cmp al,bTAG_INF
  69. jz Trig2Inf
  70. ;Must be a NAN
  71. add sp,4 ;Don't return to caller
  72. call DestNAN
  73. jz TrigRet ;Unmasked, don't change registers
  74. ;Produce masked response
  75. mov EMSEG:[CURstk],esi ;Push stack
  76. mov eax,EMSEG:[edi].ExpSgn
  77. mov EMSEG:[esi].ExpSgn,eax
  78. mov eax,EMSEG:[edi].lManHi
  79. mov EMSEG:[esi].lManHi,eax
  80. mov eax,EMSEG:[edi].lManLo
  81. mov EMSEG:[esi].lManLo,eax
  82. ret
  83. Trig2Zero:
  84. add sp,4 ;Don't return to caller
  85. mov EMSEG:[CURstk],esi
  86. mov edi,esi
  87. ;Amazing coincidence: both FSINCOS and FPTAN return the same result for
  88. ;a zero argument:
  89. ; FSINCOS returns ST(0) = cos(0) = 1, ST(1) = sin(0) = 0.
  90. ; FPTAN returns ST(0) = 1 always, ST(1) = tan(0) = 0.
  91. ;Return zero has same sign as argument zero, so we don't need to touch
  92. ;it -- just push +1.0.
  93. jmp ReturnOne
  94. TrigOutOfRange:
  95. mov EMSEG:[SWcc],C2 ;Signal argument not reduced
  96. add sp,4
  97. ret
  98. PrevStackWrap esi,Trig2 ;Tied to PrevStackElem below
  99. Trig2Result:
  100. ;Trig function reduction routine used by functions returning 2 values
  101. ;(FSINCOS and FPTAN)
  102. ;edi = [CURstk] = argument pointer
  103. mov esi,edi
  104. PrevStackElem esi,Trig2 ;esi points to second result location
  105. mov al,EMSEG:[edi].bTAG ;Get tag
  106. cmp al,bTAG_EMPTY ;Stack underflow if empty
  107. jz Trig2StackUnder
  108. cmp EMSEG:[esi].bTAG,bTAG_EMPTY ;Stack overflow if not empty
  109. jnz Trig2StackOver
  110. cmp al,bTAG_ZERO ;Is it Special?
  111. ja Trig2Special
  112. jz Trig2Zero
  113. ;Fall into TrigPrem
  114. ;****
  115. ;TrigPrem
  116. ;
  117. ;This routine reduces an angle in radians to the range [0, pi/4].
  118. ;Angles in odd-numbered octants have been subtracted from pi/4.
  119. ;It uses a 66-bit value for pi, as required by the 387.
  120. ;TrigPrem uses the same two-stage algorithm as FPREM (see
  121. ;emfprem.asm). However, it is limited to an argument < 2^63.
  122. ;
  123. ;Inputs:
  124. ; edi = [CURstk]
  125. ;Outputs:
  126. ; ebx:esi = remainder, normalized
  127. ; high ecx = exponent, cl = tag
  128. ; al = octant
  129. ; edi = [CURstk]
  130. TrigPrem:
  131. mov EMSEG:[Result],edi
  132. mov eax,EMSEG:[edi].lManLo
  133. mov edx,EMSEG:[edi].lManHi
  134. movsx ebx,EMSEG:[edi].wExp
  135. cmp ebx,MaxAngleExp
  136. jge TrigOutOfRange
  137. xor edi,edi ;Extend dividend
  138. xor esi,esi ;Quotient, in case we skip stage 1
  139. .erre PiOver4exp eq -1
  140. inc ebx ;Subtract exponent of pi/4
  141. jl ExitTrigPrem ;If dividend is smaller, return it.
  142. ;We now know that 0 <= ExpDif < 64, so it fits in bl.
  143. cmp bl,31 ;Do we need to do stage 1?
  144. jl FitPi ;No, start stage 2
  145. ;FPREM stage 1
  146. ;
  147. ;Exponent difference is at least 31. Use 32-bit division to compute
  148. ;quotient and exact remainder, reducing exponent difference by 31.
  149. ;
  150. ;edx:eax = dividend
  151. ;ebx = exponent difference
  152. ;Shift dividend right one bit to be sure DIV instruction won't overflow
  153. ;This means we'll be reducing the exponent difference by 31, not 32
  154. xor ebp,ebp ;Dividend extension
  155. shrd ebp,eax,1
  156. shrd eax,edx,1
  157. shr edx,1
  158. sub bl,31 ;Exponent reduced
  159. mov ecx,XPiHi
  160. div ecx ;Guess a quotient "digit"
  161. ;Check out our guess.
  162. ;Currently, remainder in edx = (high dividend) - (quotient * high pi).
  163. ;(High dividend is the upper 64 bits--ebp has 1 bit.) The definition
  164. ;of remainder is (all dividend) - (quotient * all pi). So if we
  165. ;subtract (quotient * low pi) from edx:ebp, we'll get the true
  166. ;remainder. If it's negative, our guess was too big.
  167. mov esi,eax ;Save quotient
  168. mov ecx,edx ;Save remainder
  169. ;The pi/4 we use has two bits set below the first 64 bits. This means
  170. ;we must add another 3/4 of the quotient into the amount to subtract,
  171. ;which we'll compute by rounding the low 32 bits up 1, then subtracting
  172. ;1/4 of quotient. But since we're computing the amount to subtract from
  173. ;the remainder, we'll add the 1/4 of the quotient to the remainder instead
  174. ;of subtracting it from the amount to subtract.
  175. .erre XPiLo eq (3 shl 30)
  176. mov eax,XPiMid+1
  177. mul esi ;Quotient * low pi
  178. ;Note that ebp is either 0 or 800...00H
  179. shr ebp,30 ;Move down to low end
  180. shld ebp,esi,30 ;Move back up, adding 1/4 of quotient
  181. mov edi,esi ;Another copy of quotient
  182. shl edi,30 ;Keep last two bits
  183. ;edx:eax has amount to subtract to get correct remainder from ecx:ebp:edi
  184. sub ebp,eax
  185. sbb ecx,edx ;Subtract from remainder
  186. mov eax,ebp
  187. mov edx,ecx ;Remainder back to edx:eax:edi
  188. jnc TrigPremNorm ;Was quotient OK?
  189. TrigCorrect:
  190. dec esi ;Quotient was too big
  191. add edi,XPiLo
  192. adc eax,XPiMid ;Add divisor back into remainder
  193. adc edx,XPiHi
  194. jnc TrigCorrect ;Repeat if quotient is still too big
  195. jmp TrigPremNorm
  196. ;FPREM stage 2
  197. ;
  198. ;Exponent difference is less than 32. Use restoring long division to
  199. ;compute quotient bits until exponent difference is zero. Note that we
  200. ;often get more than one bit/loop: BSR is used to scan off leading
  201. ;zeros each time around. Since the divisor is normalized, we can
  202. ;instantly compute a zero quotient bit for each leading zero bit.
  203. TrigPremLoop:
  204. ;edx:eax:edi = dividend (remainder) minus pi/4
  205. ;esi = quotient
  206. ;ebx = exponent difference
  207. ;
  208. ;If D is current dividend and p is pi/4, then we have edx:eax:edi = D - p,
  209. ;which is negative. We want 2*D - p, which is positive.
  210. ;2*D - p = 2*(D - p) + p.
  211. add edi,edi ;2*(D - p)
  212. adc eax,eax
  213. adc edx,edx
  214. add edi,XPiLo ;2*(D-p) + p = 2*D - p
  215. adc eax,XPiMid
  216. adc edx,XPiHi
  217. add esi,esi ;Double quotient too
  218. dec ebx ;Decrement exponent difference
  219. PiFit:
  220. inc esi
  221. TrigPremNorm:
  222. bsr ecx,edx ;Find first 1 bit
  223. jz TrigPremZero
  224. not cl
  225. and cl,1FH ;Convert bit no. to shift count
  226. sub ebx,ecx ;Reduce exponent difference
  227. jl TrigTooFar
  228. shld edx,eax,cl
  229. shld eax,edi,cl
  230. shl edi,cl ;Finish normalize shift
  231. shl esi,cl ;Shift quotient
  232. FitPi:
  233. ;Dividend could be larger or smaller than divisor
  234. sub edi,XPiLo
  235. sbb eax,XPiMid
  236. sbb edx,XPiHi
  237. jnc PiFit
  238. ;Couldn't subtract pi/2 from dividend.
  239. ;edx:eax:edi = dividend - pi/4, which is negative
  240. or ebx,ebx ;Is exponent difference zero?
  241. jg TrigPremLoop
  242. ;If quotient (octant number) is odd, we have subtracted an odd number of
  243. ;pi/4's. However, simple angle reductions work in multiples of pi/2.
  244. ;We will keep the extra pi/4 we just subtracted if the octant was odd.
  245. ;This will give a result range of [-pi/4, pi/4].
  246. test esi,1 ;Is octant odd?
  247. jz EvenOctant
  248. NegPremResult:
  249. ;-pi/4 < dividend < 0. Negate this since we use sign-magnitude representation.
  250. not edx ;96-bit negation
  251. not eax
  252. neg edi
  253. sbb eax,-1
  254. sbb edx,-1
  255. ;May need to normalize
  256. bsr ecx,edx
  257. jz TrigNorm32
  258. lea ebx,[ebx+ecx-31] ;Fix up exponent for normalization
  259. not cl ;Convert bit no. to shift count
  260. TrigShortNorm:
  261. shld edx,eax,cl
  262. shld eax,edi,cl
  263. shl edi,cl ;Finish normalize shift
  264. RoundPrem:
  265. ;Must round 66-bit result to 64 bits.
  266. ;To perform "round even" when the round bit is set and the sticky bits
  267. ;are zero, we treat the LSB as if it were a sticky bit. Thus if the LSB
  268. ;is set, that will always force a round up (to even) if the round bit is
  269. ;set. If the LSB is zero, then the sticky bits remain zero and we always
  270. ;round down. This rounding rule is implemented by adding RoundBit-1
  271. ;(7F..FFH), setting CY if round up.
  272. bt eax,0 ;Is mantissa even or odd? (set CY)
  273. adc edi,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up
  274. adc eax,0
  275. adc edx,0
  276. ExitTrigPrem:
  277. ;edx:eax = remainder, normalized
  278. ;esi = quotient
  279. ;ebx = exponent difference, zero or less
  280. .erre PiOver4exp eq -1
  281. dec ebx ;True exponent
  282. .erre bTAG_SNGL eq 0
  283. shrd ecx,ebx,16 ;Exponent to high ecx
  284. mov ebx,edx ;High mant. to ebx
  285. xchg esi,eax ;Low mant. to esi, octant to eax
  286. or esi,esi ;Any bits in low half?
  287. .erre bTAG_VALID eq 1
  288. .erre bTAG_SNGL eq 0
  289. setnz cl ;if low half==0 then cl=0 else cl=1
  290. mov edi,EMSEG:[CURstk]
  291. test EMSEG:[edi].bSgn,bSign ;Was angle negative?
  292. jnz FlipOct ;Yes, flip octant over
  293. ret
  294. FlipOct:
  295. ;Angle was negative. Subtract octant from 7.
  296. neg al
  297. add al,7
  298. ret
  299. EvenOctant:
  300. ;Restore dividend
  301. add edi,XPiLo
  302. adc eax,XPiMid
  303. adc edx,XPiHi
  304. jmp RoundPrem
  305. TrigTooFar:
  306. ;Exponent difference in ebx went negative when reduced by shift count in ecx.
  307. ;We need a quotient corresponding to exponent difference of zero.
  308. add ecx,ebx ;Compute previous exponent difference
  309. shl esi,cl ;Fix up quotient
  310. sub ecx,ebx ;Restore shift count
  311. test esi,1 ;Is octant odd?
  312. jz TrigShortNorm ;No, go normalize
  313. xor ebx,ebx ;Restore old exponent difference (zero)
  314. SubPiOver4:
  315. ;We are here if exponent difference was zero and octant is odd.
  316. ;As noted above, we need to reduce the angle by a multiple of pi/2,
  317. ;not pi/4. We will subtract one more pi/4, which will make the
  318. ;result range [-pi/4, pi/4].
  319. sub edi,XPiLo
  320. sbb eax,XPiMid
  321. sbb edx,XPiHi
  322. jmp NegPremResult
  323. TrigPremZero:
  324. ;High dword of remainder is all zero, so we've reduced exponent difference
  325. ;by 32 bits and overshot. We need a quotient corresponding to exponent
  326. ;difference of zero, so we just shift it by the original difference. Then
  327. ;we need to normalize the rest of the remainder.
  328. mov ecx,ebx ;Get exponent difference
  329. shl esi,cl ;Fix up quotient
  330. test esi,1 ;Is octant odd?
  331. jnz SubPiOver4 ;Yes, go subtract another pi/4
  332. TrigNorm32:
  333. bsr ecx,eax
  334. jz TinyTrig
  335. lea ebx,[ebx+ecx-31-32] ;Fix up exponent for normalization
  336. mov edx,eax
  337. mov eax,edi ;Shift left by 32 bits
  338. not cl ;Convert bit no. to shift count
  339. shld edx,eax,cl ;Normalize remainder
  340. shl eax,cl
  341. jmp ExitTrigPrem
  342. TinyTrig:
  343. ;Upper 64 bits of remainder are all zero. We are assured that the extended
  344. ;remainder is never zero, though.
  345. mov edx,edi ;Shift left 64 bits
  346. bsr ecx,edi
  347. lea ebx,[ebx+ecx-31-64] ;Fix up exponent for normalization
  348. not cl ;Convert bit no. to shift count
  349. shl edx,cl ;Normalize
  350. jmp ExitTrigPrem
  351. ;*******************************************************************************
  352. EM_ENTRY eFCOS
  353. eFCOS:
  354. and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2
  355. cmp EMSEG:[edi].bTAG,bTAG_ZERO
  356. jz ReturnOne
  357. call Trig1Result
  358. ;ebx:esi,ecx = reduced argument
  359. ;eax = octant
  360. mov ch,80H ;Assume negative
  361. test al,110B ;Negative in octants 2 - 5
  362. jpo @F ;Occurs when 1 of these bits are set
  363. xor ch,ch ;Actually positve
  364. @@:
  365. test al,011B ;Look for octants 0,3,4,7
  366. jpo TakeSine ;Use sine if not
  367. TakeCosine:
  368. cmp ecx,TinyAngleExp shl 16 ;Is angle really small?
  369. jl CosReturnOne ;cos(x) = 1 for tiny x
  370. CosNotTiny:
  371. mov edi,offset tCosPoly
  372. ;Note that argument needs to be saved in ArgTemp (by EvalPolySetup) in case
  373. ;we were called from eFSINCOS and we'll need the arg for the sine. Argument
  374. ;is not needed for cosine, however (just its square).
  375. call EvalPolySetup ;In emftran.asm
  376. mov ch,EMSEG:[ArgTemp].bSgn ;Get sign we already figured out
  377. TransUnround:
  378. ;The last operation performed a simple round nearest, without setting the
  379. ;C1 status bit if round up occured. We reverse this last rounding now
  380. ;so we can do the user's selected rounding mode. We also ensure that
  381. ;the answer is never exact.
  382. sub eax,(1 shl 31)-1 ;Sum LSB & sticky bits--CY if round up
  383. jz UnroundExact ;Answer looks exact, but it's not
  384. sbb esi,0
  385. sbb ebx,0
  386. jns PolyDropExponent ;We had rounded up exponent too
  387. FinalTransRound:
  388. ;A jump through [TransRound] is only valid if the number is known not to
  389. ;underflow. Unmasked underflow requires [RoundMode] be set.
  390. mov edx,EMSEG:[TransRound]
  391. mov EMSEG:[RoundMode],edx
  392. call edx ;Perform user's rounding
  393. RestoreRound:
  394. ;Restore rounding vectors
  395. mov EMSEG:[ZeroVector],offset SaveResult
  396. mov eax,EMSEG:[SavedRoundMode]
  397. mov EMSEG:[RoundMode],eax
  398. ret
  399. UnroundExact:
  400. inc eax ;Let's say our answer is a bit small
  401. jmp FinalTransRound
  402. PolyDropExponent:
  403. sub ecx,1 shl 16 ;Decrement exponent
  404. or ebx,1 shl 31 ;Set MSB
  405. jmp FinalTransRound
  406. SinRet:
  407. ret
  408. SaveTinySin:
  409. ;Argument in ebx:esi,ecx is small enough so that sin(x) = x, which happens
  410. ;when x - x^3/6 = x [or 1 - x^2/6 = 1]. Note that the infinitely precise
  411. ;result is slightly less than the argument. To get the correct answer for
  412. ;any rounding mode, we decrement the argument and set up for rounding.
  413. mov eax,-1 ;Set up rounding bits
  414. sub esi,1
  415. sbb ebx,0 ;Drop mantissa by one
  416. js FinalTransRound ;Still normalized?
  417. ;mantissa must have been 800..000H, set it to 0FFF...FFFH and drop exponent
  418. mov ebx,eax ;ebx = -1
  419. sub ecx,1 shl 16 ;Drop exponent by one
  420. jmp FinalTransRound
  421. EM_ENTRY eFSIN
  422. eFSIN:
  423. and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2
  424. cmp EMSEG:[edi].bTAG,bTAG_ZERO
  425. jz SinRet ;Return zero for zero argument
  426. call Trig1Result
  427. mov ch,al
  428. shl ch,7-2 ;Move bit 2 to bit 7 as sign bit
  429. ReducedSine:
  430. ;ebx:esi,ecx = reduced argument
  431. ;ch = correct sign
  432. ;eax = octant
  433. test al,011B ;Look for octants 0,3,4,7
  434. jpo TakeCosine ;Use cosine if not
  435. TakeSine:
  436. cmp ecx,TinyAngleExp shl 16 ;Is angle really small?
  437. jl SaveTinySin ;sin(x) = x for tiny x
  438. ;The polynomial for sine is sin(x) = x * P(x^2). However, the degree zero
  439. ;coefficient of P() is 1, so P() = R() + 1, where R() has no degree zero
  440. ;term. Thus sin(x) = x * [R(x^2) + 1] = x * R(x^2) + x.
  441. ;
  442. ;What's important here is that adding 1 to R(x^2) can blow away a lot of
  443. ;precision just before we do that last multiply by x. Note that x < pi/4 < 1,
  444. ;so that x^2 is often << 1. The precision is lost when R(x^2) is shifted
  445. ;right to align its binary point with 1.0. This can cause a loss of at
  446. ;least 1 bit of precision after the final multiply by x in addition to
  447. ;rounding errors.
  448. ;
  449. ;To avoid this precision loss, we use the alternate form given above,
  450. ;sin(x) = x * R(x^2) + x. Instead of adding 1.0 and multiplying by x,
  451. ;we multiply by x and add x--exactly the same level of difficulty. But
  452. ;the mulitply has all of R(x^2)'s precision available.
  453. ;
  454. ;Because the polynomial R() has no zero-degree term, we give EvalPoly
  455. ;one degree less (so we don't have to add zero as the last term).
  456. ;Then we have to multiply once more by x^2 since we left the loop early.
  457. SineNotTiny:
  458. mov edi,offset tSinPoly
  459. call EvalPolySetup ;In emftran.asm
  460. SineFinish:
  461. ifdef NT386
  462. mov edi,YFloatTemp
  463. else
  464. mov edi,offset edata:FloatTemp
  465. endif
  466. call PolyMulDouble ;Last coefficient in R(x^2)
  467. ifdef NT386
  468. mov edi,YArgTemp ;Point to original x
  469. else
  470. mov edi,offset edata:ArgTemp ;Point to original x
  471. endif
  472. call PolyMulDouble ;Compute x * R(x^2)
  473. ifdef NT386
  474. mov edi,YArgTemp ;Point to original x
  475. else
  476. mov edi,offset edata:ArgTemp ;Point to original x
  477. endif
  478. push offset TransUnround
  479. jmp PolyAddDouble ;Compute x * R(x^2) + x
  480. EM_ENTRY eFPTAN
  481. eFPTAN:
  482. and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2
  483. call Trig2Result
  484. push offset TanPushOne ; Push 1.0 when we're all done
  485. ;ebx:esi,ecx = reduced argument
  486. ;eax = octant
  487. mov ch,al
  488. shl ch,7-1 ;Move bit 1 to bit 7 as sign bit
  489. ;Note that ch bit 6 now has even/odd octant, which we'll need when we're
  490. ;done to see if we should take reciprocal.
  491. cmp ecx,TinyAngleExp shl 16 ;Is angle really small?
  492. jl TinyTan
  493. mov edi,offset tTanPoly
  494. call Eval2Poly ;In emftran.asm
  495. mov edi,EMSEG:[CURstk] ;Point to first result
  496. push offset TransUnround ;Return address of divide
  497. test EMSEG:[ArgTemp].bSgn,0C0H ;Check low 2 bits of octant
  498. ;Given the reduced input range, the result can never overflow or underflow.
  499. ;It is must then be safe to assume neither operand is zero.
  500. jpe DivDouble ;Tan() octants 0,3,4,7
  501. jmp DivrDouble ;CoTan()
  502. TinyTan:
  503. test ch,0C0H ;Check low 2 bits of octant
  504. jpe SaveTinySin ;Octants 0,3,4,7: tan(x) = x for tiny x
  505. ;Need reciprocal of reduced argument
  506. mov edi,esi
  507. mov esi,ebx ;Mantissa in esi:edi
  508. mov ebx,ecx ;ExpSgn to ebx
  509. mov edx,1 shl 31 ;Load 1.0
  510. xor eax,eax
  511. .erre TexpBias eq 0
  512. xor ecx,ecx ;Sign and exponent are zero
  513. ;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
  514. ;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
  515. push offset TransUnround ;Return address of divide
  516. ;Note that this can never overflow, because the reduced argument is never
  517. ;smaller than about 2^-65.
  518. jmp DivDoubleReg
  519. PrevStackWrap edi,Tan ;Tied to PrevStackElem below
  520. TanPushOne:
  521. PrevStackElem edi,Tan ;edi points to second result location
  522. mov EMSEG:[CURstk],edi
  523. ReturnOne:
  524. mov EMSEG:[edi].lManLo,0
  525. mov EMSEG:[edi].lManHi,1 shl 31
  526. mov EMSEG:[edi].ExpSgn,(0-TexpBias) shl 16 + bTAG_SNGL
  527. ret
  528. PrevStackWrap edi,SinCos ;Tied to PrevStackElem below
  529. eFSINCOS:
  530. and [esp].[OldLongStatus+4],NOT(C2 SHL 16) ;clear C2
  531. call Trig2Result
  532. ;Figure out signs
  533. mov ch,al ;Start with sign of sine
  534. shl ch,7-2 ;Move bit 2 to bit 7 as sign bit
  535. mov ah,80H ;Assume sign of cosine is negative
  536. test al,110B ;Negative in octants 2 - 5
  537. jpo @F ;Occurs when 1 of these bits are set
  538. xor ah,ah ;Actually positve
  539. @@:
  540. ;ch = sign of sine
  541. ;ah = sign of cosine
  542. cmp ecx,TinyAngleExp shl 16 ;Is angle really small?
  543. jl TinySinCos
  544. push eax ;Save octant and sign of cosine
  545. call ReducedSine ;On exit, edi = [CURstk]
  546. pop eax
  547. ;The Sin() funcion restored the rounding vectors to normal. Set them back.
  548. mov EMSEG:[RoundMode],offset PolyRound
  549. mov EMSEG:[ZeroVector],offset PolyZero
  550. PrevStackElem edi,SinCos ;edi points to second result location
  551. mov EMSEG:[CURstk],edi
  552. mov EMSEG:[Result],edi
  553. ;Load x^2 back into registers
  554. mov ecx,EMSEG:[FloatTemp].ExpSgn
  555. mov ebx,EMSEG:[FloatTemp].lManHi
  556. mov esi,EMSEG:[FloatTemp].lManLo
  557. mov EMSEG:[ArgTemp].bSgn,ah ;Save sign
  558. test al,011B ;Look for octants 0,3,4,7
  559. jpo FastSine ;Use sine if not
  560. mov edi,offset tCosPoly
  561. call EvalPoly ;In emftran.asm
  562. mov ch,EMSEG:[ArgTemp].bSgn ;Get sign we already figured out
  563. jmp TransUnround
  564. FastSine:
  565. mov edi,offset tSinPoly
  566. push offset SineFinish
  567. jmp EvalPoly ;In emftran.asm
  568. TinySinCos:
  569. ;ch = sign of sine
  570. ;ah = sign of cosine
  571. ;ebx:esi,high ecx = reduced argument
  572. ;edi = [CURstk]
  573. test al,011B ;Look for octants 0,3,4,7
  574. jpo TinyCosSin ;Take cosine first if not
  575. push eax
  576. call SaveTinySin ;For sine, arg is result
  577. pop ecx
  578. ;edi = [CURstk]
  579. ;ch = sign of cosine
  580. ;Set cosine to 1.0
  581. PrevStackElem edi,TinySinCos ;edi points to second result location
  582. mov EMSEG:[CURstk],edi
  583. mov EMSEG:[Result],edi
  584. CosReturnOne:
  585. ;Cosine is nearly equal to 1.0. Put in next smaller value and round it.
  586. mov ebx,-1
  587. mov esi,ebx ;Set mantissa to -1
  588. mov eax,ebx ;Set up rounding bits
  589. .erre TexpBias eq 0
  590. and ecx,bSign shl 8 ;Keep only sign
  591. sub ecx,1 shl 16 ;Exponent of -1
  592. ;A jump through [TransRound] is only valid if the number is known not to
  593. ;underflow. Unmasked underflow requires [RoundMode] be set.
  594. jmp EMSEG:[TransRound]
  595. PrevStackWrap edi,TinySinCos
  596. PrevStackWrap edi,TinyCosSin
  597. TinyCosSin:
  598. ;Sine is nearly 1.0, cosine is argument
  599. ;
  600. ;ch = sign of sine
  601. ;ah = sign of cosine
  602. ;ebx:esi,high ecx = reduced argument
  603. ;edi = [CURstk]
  604. xchg ah,ch ;Cosine sign to ch, sine sign to ah
  605. push edi ;Save place for sine
  606. PrevStackElem edi,TinyCosSin ;edi points to second result location
  607. mov EMSEG:[CURstk],edi
  608. mov EMSEG:[Result],edi
  609. push eax
  610. call SaveTinySin ;For sine, arg is result
  611. pop ecx
  612. ;ch = sign of sine
  613. pop EMSEG:[Result] ;Set up location for sine
  614. jmp CosReturnOne
  615. ;*******************************************************************************
  616. ;********************* Polynomial Coefficients *********************
  617. ;These polynomial coefficients were all taken from "Computer Approximations"
  618. ;by J.F. Hart (reprinted 1978 w/corrections). All calculations and
  619. ;conversions to hexadecimal were done with a character-string calculator
  620. ;written in Visual Basic with precision set to 30 digits. Once the constants
  621. ;were typed into this file, all transfers were done with cut-and-paste
  622. ;operations to and from the calculator to help eliminate any typographical
  623. ;errors.
  624. tCosPoly label word
  625. ;These constants are derived from Hart #3824: cos(x) = P(x^2),
  626. ;accurate to 19.45 digits over interval [0, pi/4]. The original
  627. ;constants in Hart required that the argument x be divided by pi/4.
  628. ;These constants have been scaled so this is no longer required.
  629. ;Scaling is done by multiplying the constant by a power of 4/pi.
  630. ;The power is given in the table.
  631. dd 7 ;Degree seven
  632. ; Original Hart constant power Scaled constant
  633. ;
  634. ;-0.38577 62037 2 E-12 14 -0.113521232057839395845871741043E-10
  635. ;Hex value: 0.C7B56AF786699CF1BD13FD290 HFFDC
  636. dq 0C7B56AF786699CF2H
  637. dw (bSign shl 8)+bTAG_VALID,0FFDCH-1
  638. ;+0.11500 49702 4263 E-9 12 +0.208755551456778828747793797596E-8
  639. ;Hex value: 0.8F74AA3CCE49E68D6F5444A18 HFFE4
  640. dq 08F74AA3CCE49E68DH
  641. dw bTAG_VALID,0FFE4H-1
  642. ;-0.24611 36382 63700 5 E-7 10 -0.275573128656960822243472872247E-6
  643. ;Hex value: 0.93F27B7F10CC8A1703EFC8A04 HFFEB
  644. dq 093F27B7F10CC8A17H
  645. dw (bSign shl 8)+bTAG_VALID,0FFEBH-1
  646. ;+0.35908 60445 88581 953 E-5 8 +0.248015872828994630247806807317E-4
  647. ;Hex value: 0.D00D00CD6BB3ECD17E10D5830 HFFF1
  648. dq 0D00D00CD6BB3ECD1H
  649. dw bTAG_VALID,0FFF1H-1
  650. ;-0.32599 18869 26687 55044 E-3 6 -0.138888888888589604343951947246E-2
  651. ;Hex value: 0.B60B60B609B165894CFE522AC HFFF7
  652. dq 0B60B60B609B16589H
  653. dw (bSign shl 8)+bTAG_VALID,0FFF7H-1
  654. ;+0.15854 34424 38154 10897 54 E-1 4 +0.416666666666664302573692446873E-1
  655. ;Hex value: 0.AAAAAAAAAAA99A1AF53042B08 HFFFC
  656. dq 0AAAAAAAAAAA99A1BH
  657. dw bTAG_VALID,0FFFCH-1
  658. ;-0.30842 51375 34042 45242 414 E0 2 -0.499999999999999992843582920899E0
  659. ;Hex value: 0.FFFFFFFFFFFFFEF7F98D3BFA8 HFFFF
  660. dq 0FFFFFFFFFFFFFEF8H
  661. dw (bSign shl 8)+bTAG_VALID,0FFFFH-1
  662. ;+0.99999 99999 99999 99996 415 E0 0 (no change)
  663. ;Hex value 0.FFFFFFFFFFFFFFFF56B402618 H0
  664. dq 0FFFFFFFFFFFFFFFFH
  665. dw bTAG_VALID,00H-1
  666. tSinPoly label word
  667. ;These constants are derived from Hart #3044: sin(x) = x * P(x^2),
  668. ;accurate to 20.73 digits over interval [0, pi/4]. The original
  669. ;constants in Hart required that the argument x be divided by pi/4.
  670. ;These constants have been scaled so this is no longer required.
  671. ;Scaling is done by multiplying the constant by a power of 4/pi.
  672. ;The power is given in the table.
  673. dd 7-1 ;Degree seven, but the last coefficient
  674. ;is 1.0 and is not listed here.
  675. ; Original Hart constant power Scaled constant
  676. ;
  677. ;-0.20225 31292 93 E-13 15 -0.757786788401271156262125540409E-12
  678. ;Hex value: 0.D54C4AF2B524F0F2D6411C90A HFFD8
  679. dq 0D54C4AF2B524F0F3H
  680. dw (bSign shl 8)+bTAG_VALID,0FFD8H-1
  681. ;+0.69481 52035 0522 E-11 13 +0.160583476232246065559545749398E-9
  682. ;Hex value: 0.B0903AF085DA66030F16E43BC HFFE0
  683. dq 0B0903AF085DA6603H
  684. dw bTAG_VALID,0FFE0H-1
  685. ;-0.17572 47417 61708 06 E-8 11 -0.250521047382673309542092418731E-7
  686. ;Hex value: 0.D73229320D2AF05971AC96FF4 HFFE7
  687. dq 0D73229320D2AF059H
  688. dw (bSign shl 8)+bTAG_VALID,0FFE7H-1
  689. ;+0.31336 16889 17325 348 E-6 9 +0.275573192133901687156480447942E-5
  690. ;Hex value: 0.B8EF1D2984D2FBA28A9CC9DEE HFFEE
  691. dq 0B8EF1D2984D2FBA3H
  692. dw bTAG_VALID,0FFEEH-1
  693. ;-0.36576 20418 21464 00052 9 E-4 7 -0.198412698412531058609618529749E-3
  694. ;Hex value: 0.D00D00D00C3FDDD7916E5CB28 HFFF4
  695. dq 0D00D00D00C3FDDD8H
  696. dw (bSign shl 8)+bTAG_VALID,0FFF4H-1
  697. ;+0.24903 94570 19271 62752 519 E-2 5 +0.83333333333333203341753387264E-2
  698. ;Hex value: 0.8888888888884C95D619A0343 HFFFA
  699. dq 08888888888884C96H
  700. dw bTAG_VALID,0FFFAH-1
  701. ;-0.80745 51218 82807 81520 2582 E-1 3 -0.166666666666666666281276062229E0
  702. ;Hex value: 0.AAAAAAAAAAAAAA8E3AD80EAB8 HFFFE
  703. dq 0AAAAAAAAAAAAAA8EH
  704. dw (bSign shl 8)+bTAG_VALID,0FFFEH-1
  705. ;+0.78539 81633 97448 30961 41845 E0 1 +0.99999999999999999999812025812E0
  706. ;Hex value: 0.FFFFFFFFFFFFFFFFF71F88110 H0
  707. ; dq 8000000000000000H ;This constant of 1.0 omitted here.
  708. ; dw bTAG_VALID,0 ; It is handled in code.
  709. tTanPoly label word
  710. ;These constants are derived from Hart #4286: tan(x) = x * P(x^2) / Q(x^2),
  711. ;accurate to 19.94 digits over interval [0, pi/4]. The original
  712. ;constants in Hart required that the argument x be divided by pi/4.
  713. ;These constants have been scaled so this is no longer required.
  714. ;Scaling is done by multiplying the constant by the same power of 4/pi
  715. ;as the power of x the constant is used on. However, the highest
  716. ;degree coefficient of Q() is 1, and after scaling this way it would
  717. ;become (4/pi)^8. In order to keep this coefficient equal to one,
  718. ;we scale everything again by (pi/4)^8. This scaling is partially
  719. ;canceled by the original scaling by powers of 4/pi, and the net
  720. ;resulting power of pi/4 is given in the table.
  721. dd 3 ;First poly is degree 3
  722. ; Original Hart constant power Scaled constant
  723. ;
  724. ;-.45649 31943 86656 31873 96113 7 E2 1 -35.8528916474714232910463077546
  725. ;Hex value: 0.8F695C6D93AF6F97B6E022AB3 H6
  726. dq 08F695C6D93AF6F98H
  727. dw (bSign shl 8)+bTAG_VALID,06H-1
  728. ;+.14189 85425 27617 78388 00394 831 E5 3 +6874.60229709782436592720603503
  729. ;Hex value: 0.D6D4D181240D0D08C88DF4AA6 HD
  730. dq 0D6D4D181240D0D09H
  731. dw bTAG_VALID,0DH-1
  732. ;-.89588 84400 67680 41087 29639 541 E6 5 -267733.884797157298951145495276
  733. ;Hex value: 0.82BABC504220C62B1D0722684 H13
  734. dq 082BABC504220C62BH
  735. dw (bSign shl 8)+bTAG_VALID,013H-1
  736. ;+.10888 60043 72816 87521 38857 983 E8 7 +2007248.9111748838841548144685
  737. ;Hex value: 0.F506874A160EB9C0994AADD6A H15
  738. dq 0F506874A160EB9C1H
  739. dw bTAG_VALID,015H-1
  740. dd 4 ;Second poly is degree 4
  741. ;NOTE: Eval2Poly assumes the first coefficient is 1.0, so it is omitted
  742. ; Original Hart constant power Scaled constant
  743. ;
  744. ;-.10146 56190 25288 53387 54401 947 E4 2 -625.890950057027419879480354834
  745. ;Hex value: 0.9C790553635355A95241A5324 HA
  746. dq 09C790553635355A9H
  747. dw (bSign shl 8)+bTAG_VALID,0AH-1
  748. ;+.13538 27128 05119 09382 89294 872 E6 4 +51513.6992033752080924797647367
  749. ;Hex value: 0.C939B2FEFE0DC585E649870FE H10
  750. dq 0C939B2FEFE0DC586H
  751. dw bTAG_VALID,010H-1
  752. ;-.39913 09518 03516 51504 43427 94 E7 6 -936816.855188785264866481436899
  753. ;Hex value: 0.E4B70DAEDA6F89E5A7CE626FA H14
  754. dq 0E4B70DAEDA6F89E6H
  755. dw (bSign shl 8)+bTAG_VALID,014H-1
  756. ;+.13863 79666 35676 29165 33913 361 E8 8 +2007248.91117488388417770850458
  757. ;Hex value: 0.F506874A160EB9C0CCD8313BC H15
  758. dq 0F506874A160EB9C1H
  759. dw bTAG_VALID,015H-1