subttl	emtrig.asm - Trig functions sine, cosine, tangent
	page
;*******************************************************************************
;	 Copyright (c) Microsoft Corporation 1991
;	 All Rights Reserved
;
;emtrig.asm - Trig functions sine, cosine, tangent
;	by Tim Paterson
;
;Purpose:
;	FCOS, FPTAN, FSIN, FSINCOS instructions
;Inputs:
;	edi = [CURstk]
;
;Revision History:
;
; []	09/05/91  TP	Initial 32-bit version.
;
;*******************************************************************************


;XPi is the 66-bit value of Pi from the Intel manual
XPiHi		equ	0C90FDAA2H
XPiMid		equ	02168C234H
XPiLo		equ	0C0000000H	;Extension of pi
PiOver4exp	equ	-1		;Pi/4 ~= 3/4, so exponent is -1

TinyAngleExp	equ	-32		;Smallest angle we bother with
MaxAngleExp	equ	63		;Angle that's too big

Trig1Result:
;Trig function reduction routine used by functions returning 1 value
;(FSIN and FCOS)
;edi = [CURstk] = argument pointer
;Argument has already been checked for zero.
;ZF = (tag == bTAG_ZERO)
	jb	TrigPrem
;Tagged special
	mov	al,EMSEG:[edi].bTAG
	cmp	al,bTAG_DEN
	jz	TrigDenorm
	add	sp,4			;Don't return to caller
	cmp	al,bTAG_INF
	jnz	SpclDestNotDen		;Check for Empty or NAN
	mov	EMSEG:[SWcc],C2		;Can't reduce infinity
	jmp	ReturnIndefinite

TrigDenorm:
	mov	EMSEG:[CURerr],Denormal
	test	EMSEG:[CWmask],Denormal	;Is denormal exception masked?
	jnz	TrigPrem		;Yes, continue
	add	sp,4			;Don't return to caller
TrigRet:
	ret


Trig2Inf:
	mov	EMSEG:[SWcc],C2		;Can't reduce infinity
	jmp	Trig2Indefinite

Trig2StackOver:
	mov	EMSEG:[SWcc],C1		;Signal overflow
Trig2StackUnder:
	mov	EMSEG:[CURerr],Invalid+StackFlag
Trig2Indefinite:
	add	sp,4			;Don't return to caller
	call	ReturnIndefinite
	jz	TrigRet			;Unmasked, don't change registers
;Produce masked response
	mov	EMSEG:[CURstk],esi		;Push stack
	mov	edi,esi
	jmp	ReturnIndefinite

Trig2Special:
	cmp	al,bTAG_DEN
	jz	TrigDenorm
	cmp	al,bTAG_INF
	jz	Trig2Inf
;Must be a NAN
	add	sp,4			;Don't return to caller
	call	DestNAN
	jz	TrigRet			;Unmasked, don't change registers
;Produce masked response
	mov	EMSEG:[CURstk],esi		;Push stack
        mov     eax,EMSEG:[edi].ExpSgn
        mov     EMSEG:[esi].ExpSgn,eax
        mov     eax,EMSEG:[edi].lManHi
        mov     EMSEG:[esi].lManHi,eax
        mov     eax,EMSEG:[edi].lManLo
        mov     EMSEG:[esi].lManLo,eax
	ret

Trig2Zero:
	add	sp,4			;Don't return to caller
	mov	EMSEG:[CURstk],esi
	mov	edi,esi
;Amazing coincidence: both FSINCOS and FPTAN return the same result for
;a zero argument:
;	FSINCOS returns ST(0) = cos(0) = 1, ST(1) = sin(0) = 0.
;	FPTAN returns ST(0) = 1 always, ST(1) = tan(0) = 0.
;Return zero has same sign as argument zero, so we don't need to touch
;it -- just push +1.0.
	jmp	ReturnOne

TrigOutOfRange:
	mov	EMSEG:[SWcc],C2		;Signal argument not reduced
	add	sp,4
	ret

PrevStackWrap	esi,Trig2		;Tied to PrevStackElem below

Trig2Result:
;Trig function reduction routine used by functions returning 2 values
;(FSINCOS and FPTAN)
;edi = [CURstk] = argument pointer
	mov	esi,edi
	PrevStackElem	esi,Trig2	;esi points to second result location
	mov	al,EMSEG:[edi].bTAG	;Get tag
	cmp	al,bTAG_EMPTY		;Stack underflow if empty
	jz	Trig2StackUnder
	cmp	EMSEG:[esi].bTAG,bTAG_EMPTY	;Stack overflow if not empty
	jnz	Trig2StackOver
	cmp	al,bTAG_ZERO		;Is it Special?
	ja	Trig2Special
	jz	Trig2Zero
;Fall into TrigPrem

;****
;TrigPrem
;
;This routine reduces an angle in radians to the range [0, pi/4].
;Angles in odd-numbered octants have been subtracted from pi/4.
;It uses a 66-bit value for pi, as required by the 387.
;TrigPrem uses the same two-stage algorithm as FPREM (see 
;emfprem.asm).	However, it is limited to an argument < 2^63.
;
;Inputs:
;	edi = [CURstk]
;Outputs:
;	ebx:esi = remainder, normalized
;	high ecx = exponent, cl = tag
;	al = octant
;	edi = [CURstk]

TrigPrem:
	mov	EMSEG:[Result],edi
	mov	eax,EMSEG:[edi].lManLo
	mov	edx,EMSEG:[edi].lManHi
	movsx	ebx,EMSEG:[edi].wExp
	cmp	ebx,MaxAngleExp
	jge	TrigOutOfRange
	xor	edi,edi			;Extend dividend
	xor	esi,esi			;Quotient, in case we skip stage 1
.erre	PiOver4exp eq -1
	inc	ebx			;Subtract exponent of pi/4
	jl	ExitTrigPrem		;If dividend is smaller, return it.
;We now know that 0 <= ExpDif < 64, so it fits in bl.
	cmp	bl,31			;Do we need to do stage 1?
	jl	FitPi			;No, start stage 2

;FPREM stage 1
;
;Exponent difference is at least 31.  Use 32-bit division to compute
;quotient and exact remainder, reducing exponent difference by 31.
;
;edx:eax = dividend
;ebx = exponent difference

;Shift dividend right one bit to be sure DIV instruction won't overflow
;This means we'll be reducing the exponent difference by 31, not 32
	xor	ebp,ebp			;Dividend extension
	shrd	ebp,eax,1
	shrd	eax,edx,1
	shr	edx,1

	sub	bl,31			;Exponent reduced
	mov	ecx,XPiHi
	div	ecx			;Guess a quotient "digit"

;Check out our guess.  
;Currently, remainder in edx = (high dividend) - (quotient * high pi).
;(High dividend is the upper 64 bits--ebp has 1 bit.)  The definition 
;of remainder is (all dividend) - (quotient * all pi).  So if we
;subtract (quotient * low pi) from edx:ebp, we'll get the true 
;remainder.  If it's negative, our guess was too big.

	mov	esi,eax			;Save quotient
	mov	ecx,edx			;Save remainder

;The pi/4 we use has two bits set below the first 64 bits.  This means
;we must add another 3/4 of the quotient into the amount to subtract,
;which we'll compute by rounding the low 32 bits up 1, then subtracting 
;1/4 of quotient.  But since we're computing the amount to subtract from
;the remainder, we'll add the 1/4 of the quotient to the remainder instead
;of subtracting it from the amount to subtract.

.erre	XPiLo eq (3 shl 30)
	mov	eax,XPiMid+1
	mul	esi			;Quotient * low pi
;Note that ebp is either 0 or 800...00H
	shr	ebp,30			;Move down to low end
	shld	ebp,esi,30		;Move back up, adding 1/4 of quotient
	mov	edi,esi			;Another copy of quotient
	shl	edi,30			;Keep last two bits
;edx:eax has amount to subtract to get correct remainder from ecx:ebp:edi
	sub	ebp,eax
	sbb	ecx,edx			;Subtract from remainder
	mov	eax,ebp
	mov	edx,ecx			;Remainder back to edx:eax:edi
	jnc	TrigPremNorm		;Was quotient OK?
TrigCorrect:
	dec	esi			;Quotient was too big
	add	edi,XPiLo
	adc	eax,XPiMid		;Add divisor back into remainder
	adc	edx,XPiHi
	jnc	TrigCorrect		;Repeat if quotient is still too big
	jmp	TrigPremNorm

;FPREM stage 2
;
;Exponent difference is less than 32.  Use restoring long division to
;compute quotient bits until exponent difference is zero.  Note that we
;often get more than one bit/loop:  BSR is used to scan off leading
;zeros each time around.  Since the divisor is normalized, we can
;instantly compute a zero quotient bit for each leading zero bit.

TrigPremLoop:
;edx:eax:edi = dividend (remainder) minus pi/4
;esi = quotient
;ebx = exponent difference
;
;If D is current dividend and p is pi/4, then we have edx:eax:edi = D - p, 
;which is negative.  We want 2*D - p, which is positive.  
;2*D - p = 2*(D - p) + p.
	add	edi,edi			;2*(D - p)
	adc	eax,eax
	adc	edx,edx

	add	edi,XPiLo		;2*(D-p) + p = 2*D - p
	adc	eax,XPiMid
	adc	edx,XPiHi

	add	esi,esi			;Double quotient too
	dec	ebx			;Decrement exponent difference
PiFit:
	inc	esi
TrigPremNorm:
	bsr	ecx,edx			;Find first 1 bit
	jz	TrigPremZero
	not	cl
	and	cl,1FH			;Convert bit no. to shift count
	sub	ebx,ecx			;Reduce exponent difference
	jl	TrigTooFar
	shld	edx,eax,cl
	shld	eax,edi,cl
	shl	edi,cl			;Finish normalize shift
	shl	esi,cl			;Shift quotient
FitPi:
;Dividend could be larger or smaller than divisor
	sub	edi,XPiLo
	sbb	eax,XPiMid
	sbb	edx,XPiHi
	jnc	PiFit
;Couldn't subtract pi/2 from dividend.	
;edx:eax:edi = dividend - pi/4, which is negative
	or	ebx,ebx			;Is exponent difference zero?
	jg	TrigPremLoop
;If quotient (octant number) is odd, we have subtracted an odd number of
;pi/4's.  However, simple angle reductions work in multiples of pi/2.
;We will keep the extra pi/4 we just subtracted if the octant was odd.
;This will give a result range of [-pi/4, pi/4].  
	test	esi,1			;Is octant odd?
	jz	EvenOctant
NegPremResult:
;-pi/4 < dividend < 0.  Negate this since we use sign-magnitude representation.
	not	edx			;96-bit negation
	not	eax
	neg	edi
	sbb	eax,-1
	sbb	edx,-1
;May need to normalize
	bsr	ecx,edx
	jz	TrigNorm32
	lea	ebx,[ebx+ecx-31]	;Fix up exponent for normalization
	not	cl			;Convert bit no. to shift count
TrigShortNorm:
	shld	edx,eax,cl
	shld	eax,edi,cl
	shl	edi,cl			;Finish normalize shift
RoundPrem:
;Must round 66-bit result to 64 bits.
;To perform "round even" when the round bit is set and the sticky bits
;are zero, we treat the LSB as if it were a sticky bit.  Thus if the LSB
;is set, that will always force a round up (to even) if the round bit is
;set.  If the LSB is zero, then the sticky bits remain zero and we always
;round down.  This rounding rule is implemented by adding RoundBit-1
;(7F..FFH), setting CY if round up.  
	bt	eax,0			;Is mantissa even or odd? (set CY)
	adc	edi,(1 shl 31)-1	;Sum LSB & sticky bits--CY if round up
	adc	eax,0
	adc	edx,0
ExitTrigPrem:
;edx:eax = remainder, normalized
;esi = quotient
;ebx = exponent difference, zero or less
.erre	PiOver4exp eq -1
	dec	ebx			;True exponent
.erre	bTAG_SNGL eq 0
	shrd	ecx,ebx,16		;Exponent to high ecx
	mov	ebx,edx			;High mant. to ebx
	xchg	esi,eax			;Low mant. to esi, octant to eax
	or      esi,esi			;Any bits in low half?
.erre   bTAG_VALID eq 1
.erre   bTAG_SNGL eq 0
	setnz   cl			;if low half==0 then cl=0 else cl=1
	mov	edi,EMSEG:[CURstk]
	test	EMSEG:[edi].bSgn,bSign	;Was angle negative?
	jnz	FlipOct			;Yes, flip octant over
	ret

FlipOct:
;Angle was negative.  Subtract octant from 7.
	neg	al
	add	al,7
	ret

EvenOctant:
;Restore dividend
	add	edi,XPiLo
	adc	eax,XPiMid
	adc	edx,XPiHi
	jmp	RoundPrem

TrigTooFar:
;Exponent difference in ebx went negative when reduced by shift count in ecx.
;We need a quotient corresponding to exponent difference of zero.
	add	ecx,ebx			;Compute previous exponent difference
	shl	esi,cl			;Fix up quotient
	sub	ecx,ebx			;Restore shift count
	test	esi,1			;Is octant odd?
	jz	TrigShortNorm		;No, go normalize
	xor	ebx,ebx			;Restore old exponent difference (zero)
SubPiOver4:
;We are here if exponent difference was zero and octant is odd.
;As noted above, we need to reduce the angle by a multiple of pi/2,
;not pi/4.  We will subtract one more pi/4, which will make the
;result range [-pi/4, pi/4].
	sub	edi,XPiLo
	sbb	eax,XPiMid
	sbb	edx,XPiHi
	jmp	NegPremResult

TrigPremZero:
;High dword of remainder is all zero, so we've reduced exponent difference
;by 32 bits and overshot.  We need a quotient corresponding to exponent 
;difference of zero, so we just shift it by the original difference.  Then
;we need to normalize the rest of the remainder.
	mov	ecx,ebx			;Get exponent difference
	shl	esi,cl			;Fix up quotient
	test	esi,1			;Is octant odd?
	jnz	SubPiOver4		;Yes, go subtract another pi/4
TrigNorm32:
	bsr	ecx,eax
	jz	TinyTrig
	lea	ebx,[ebx+ecx-31-32]	;Fix up exponent for normalization
	mov	edx,eax
	mov	eax,edi			;Shift left by 32 bits
	not	cl			;Convert bit no. to shift count
	shld	edx,eax,cl		;Normalize remainder
	shl	eax,cl
	jmp	ExitTrigPrem

TinyTrig:
;Upper 64 bits of remainder are all zero.  We are assured that the extended
;remainder is never zero, though.
	mov	edx,edi			;Shift left 64 bits
	bsr	ecx,edi
	lea	ebx,[ebx+ecx-31-64]	;Fix up exponent for normalization
	not	cl			;Convert bit no. to shift count
	shl	edx,cl			;Normalize
	jmp	ExitTrigPrem

;*******************************************************************************

EM_ENTRY eFCOS
eFCOS:
    and		[esp].[OldLongStatus+4],NOT(C2 SHL 16)	;clear C2
	cmp	EMSEG:[edi].bTAG,bTAG_ZERO
	jz	ReturnOne
	call	Trig1Result
;ebx:esi,ecx = reduced argument
;eax = octant
	mov	ch,80H			;Assume negative
	test	al,110B			;Negative in octants 2 - 5
	jpo	@F			;Occurs when 1 of these bits are set
	xor	ch,ch			;Actually positve
@@:
	test	al,011B			;Look for octants 0,3,4,7
	jpo	TakeSine		;Use sine if not
TakeCosine:
	cmp	ecx,TinyAngleExp shl 16	;Is angle really small?
	jl	CosReturnOne		;cos(x) = 1 for tiny x
CosNotTiny:
	mov	edi,offset tCosPoly
;Note that argument needs to be saved in ArgTemp (by EvalPolySetup) in case 
;we were called from eFSINCOS and we'll need the arg for the sine.  Argument
;is not needed for cosine, however (just its square).
	call	EvalPolySetup		;In emftran.asm
	mov	ch,EMSEG:[ArgTemp].bSgn	;Get sign we already figured out
TransUnround:
;The last operation performed a simple round nearest, without setting the 
;C1 status bit if round up occured.  We reverse this last rounding now
;so we can do the user's selected rounding mode.  We also ensure that
;the answer is never exact.
	sub	eax,(1 shl 31)-1	;Sum LSB & sticky bits--CY if round up
	jz	UnroundExact		;Answer looks exact, but it's not
	sbb	esi,0
	sbb	ebx,0
	jns	PolyDropExponent	;We had rounded up exponent too
FinalTransRound:
;A jump through [TransRound] is only valid if the number is known not to
;underflow.  Unmasked underflow requires [RoundMode] be set.
	mov	edx,EMSEG:[TransRound]
	mov	EMSEG:[RoundMode],edx
	call	edx			;Perform user's rounding
RestoreRound:
;Restore rounding vectors
	mov	EMSEG:[ZeroVector],offset SaveResult
	mov	eax,EMSEG:[SavedRoundMode]
	mov	EMSEG:[RoundMode],eax
	ret

UnroundExact:
	inc	eax			;Let's say our answer is a bit small
	jmp	FinalTransRound

PolyDropExponent:
	sub	ecx,1 shl 16		;Decrement exponent
	or	ebx,1 shl 31		;Set MSB
	jmp	FinalTransRound


SinRet:
	ret

SaveTinySin:
;Argument in ebx:esi,ecx is small enough so that sin(x) = x, which happens
;when x - x^3/6 = x [or 1 - x^2/6 = 1].  Note that the infinitely precise
;result is slightly less than the argument.  To get the correct answer for
;any rounding mode, we decrement the argument and set up for rounding.
	mov	eax,-1			;Set up rounding bits
	sub	esi,1
	sbb	ebx,0			;Drop mantissa by one
	js	FinalTransRound		;Still normalized?
;mantissa must have been 800..000H, set it to 0FFF...FFFH and drop exponent
	mov	ebx,eax			;ebx = -1
	sub	ecx,1 shl 16		;Drop exponent by one
	jmp	FinalTransRound


EM_ENTRY eFSIN
eFSIN:
    and		[esp].[OldLongStatus+4],NOT(C2 SHL 16)	;clear C2
	cmp	EMSEG:[edi].bTAG,bTAG_ZERO
	jz	SinRet			;Return zero for zero argument
	call	Trig1Result
	mov	ch,al
	shl	ch,7-2			;Move bit 2 to bit 7 as sign bit
ReducedSine:
;ebx:esi,ecx = reduced argument
;ch = correct sign
;eax = octant
	test	al,011B			;Look for octants 0,3,4,7
	jpo	TakeCosine		;Use cosine if not
TakeSine:
	cmp	ecx,TinyAngleExp shl 16	;Is angle really small?
	jl	SaveTinySin		;sin(x) = x for tiny x

;The polynomial for sine is  sin(x) = x * P(x^2).  However, the degree zero
;coefficient of P() is 1, so  P() = R() + 1, where R() has no degree zero
;term.	Thus  sin(x) = x * [R(x^2) + 1] = x * R(x^2) + x.
;
;What's important here is that adding 1 to R(x^2) can blow away a lot of
;precision just before we do that last multiply by x.  Note that x < pi/4 < 1,
;so that x^2 is often << 1.  The precision is lost when R(x^2) is shifted
;right to align its binary point with 1.0.  This can cause a loss of at
;least 1 bit of precision after the final multiply by x in addition to 
;rounding errors.
;
;To avoid this precision loss, we use the alternate form given above,
;sin(x) = x * R(x^2) + x.  Instead of adding 1.0 and multiplying by x,
;we multiply by x and add x--exactly the same level of difficulty.  But
;the mulitply has all of R(x^2)'s precision available.
;
;Because the polynomial R() has no zero-degree term, we give EvalPoly
;one degree less (so we don't have to add zero as the last term).
;Then we have to multiply once more by x^2 since we left the loop early.

SineNotTiny:
	mov	edi,offset tSinPoly
	call	EvalPolySetup		;In emftran.asm
SineFinish:

ifdef NT386
        mov	edi,YFloatTemp
else
	mov	edi,offset edata:FloatTemp
endif
	call	PolyMulDouble		;Last coefficient in R(x^2)

ifdef NT386
	mov	edi,YArgTemp		;Point to original x
else
	mov	edi,offset edata:ArgTemp ;Point to original x
endif

	call	PolyMulDouble		;Compute x * R(x^2)

ifdef NT386
	mov	edi,YArgTemp		;Point to original x
else
	mov	edi,offset edata:ArgTemp ;Point to original x
endif

	push	offset TransUnround
	jmp	PolyAddDouble		;Compute x * R(x^2) + x


EM_ENTRY eFPTAN
eFPTAN:
    and		[esp].[OldLongStatus+4],NOT(C2 SHL 16)	;clear C2
	call	Trig2Result
	push	offset TanPushOne	; Push 1.0 when we're all done
;ebx:esi,ecx = reduced argument
;eax = octant
	mov	ch,al
	shl	ch,7-1			;Move bit 1 to bit 7 as sign bit
;Note that ch bit 6 now has even/odd octant, which we'll need when we're
;done to see if we should take reciprocal.
	cmp	ecx,TinyAngleExp shl 16	;Is angle really small?
	jl	TinyTan
	mov	edi,offset tTanPoly
	call	Eval2Poly		;In emftran.asm
	mov	edi,EMSEG:[CURstk]	;Point to first result
	push	offset TransUnround	;Return address of divide
	test	EMSEG:[ArgTemp].bSgn,0C0H	;Check low 2 bits of octant
;Given the reduced input range, the result can never overflow or underflow.
;It is must then be safe to assume neither operand is zero.
	jpe	DivDouble		;Tan() octants 0,3,4,7
	jmp	DivrDouble		;CoTan()

TinyTan:
	test	ch,0C0H			;Check low 2 bits of octant
	jpe	SaveTinySin		;Octants 0,3,4,7: tan(x) = x for tiny x
;Need reciprocal of reduced argument
	mov	edi,esi
	mov	esi,ebx			;Mantissa in esi:edi
	mov	ebx,ecx			;ExpSgn to ebx
	mov	edx,1 shl 31		;Load 1.0
	xor	eax,eax
.erre	TexpBias eq 0
	xor	ecx,ecx			;Sign and exponent are zero
;dividend mantissa in edx:eax, exponent in high ecx, sign in ch bit 7
;divisor mantissa in esi:edi, exponent in high ebx, sign in bh bit 7
	push	offset TransUnround	;Return address of divide
;Note that this can never overflow, because the reduced argument is never
;smaller than about 2^-65.
	jmp	DivDoubleReg


PrevStackWrap	edi,Tan			;Tied to PrevStackElem below

TanPushOne:
	PrevStackElem	edi,Tan		;edi points to second result location
	mov	EMSEG:[CURstk],edi
ReturnOne:
	mov	EMSEG:[edi].lManLo,0
	mov	EMSEG:[edi].lManHi,1 shl 31
	mov	EMSEG:[edi].ExpSgn,(0-TexpBias) shl 16 + bTAG_SNGL
	ret


PrevStackWrap	edi,SinCos		;Tied to PrevStackElem below

eFSINCOS:
    and		[esp].[OldLongStatus+4],NOT(C2 SHL 16)	;clear C2
	call	Trig2Result
;Figure out signs
	mov	ch,al			;Start with sign of sine
	shl	ch,7-2			;Move bit 2 to bit 7 as sign bit
	mov	ah,80H			;Assume sign of cosine is negative
	test	al,110B			;Negative in octants 2 - 5
	jpo	@F			;Occurs when 1 of these bits are set
	xor	ah,ah			;Actually positve
@@:
;ch = sign of sine
;ah = sign of cosine
	cmp	ecx,TinyAngleExp shl 16	;Is angle really small?
	jl	TinySinCos
	push	eax			;Save octant and sign of cosine
	call	ReducedSine		;On exit, edi = [CURstk]
	pop	eax
;The Sin() funcion restored the rounding vectors to normal.  Set them back.
	mov	EMSEG:[RoundMode],offset PolyRound
	mov	EMSEG:[ZeroVector],offset PolyZero
	PrevStackElem	edi,SinCos	;edi points to second result location
	mov	EMSEG:[CURstk],edi
	mov	EMSEG:[Result],edi
;Load x^2 back into registers
	mov	ecx,EMSEG:[FloatTemp].ExpSgn
	mov	ebx,EMSEG:[FloatTemp].lManHi
	mov	esi,EMSEG:[FloatTemp].lManLo
	mov	EMSEG:[ArgTemp].bSgn,ah	;Save sign
	test	al,011B			;Look for octants 0,3,4,7
	jpo	FastSine		;Use sine if not
	mov	edi,offset tCosPoly
	call	EvalPoly		;In emftran.asm
	mov	ch,EMSEG:[ArgTemp].bSgn	;Get sign we already figured out
	jmp	TransUnround

FastSine:
	mov	edi,offset tSinPoly
	push	offset SineFinish
	jmp	EvalPoly		;In emftran.asm

TinySinCos:
;ch = sign of sine
;ah = sign of cosine
;ebx:esi,high ecx = reduced argument
;edi = [CURstk]
	test	al,011B			;Look for octants 0,3,4,7
	jpo	TinyCosSin		;Take cosine first if not
	push	eax
	call	SaveTinySin		;For sine, arg is result
	pop	ecx
;edi = [CURstk]
;ch = sign of cosine
;Set cosine to 1.0
	PrevStackElem	edi,TinySinCos	;edi points to second result location
	mov	EMSEG:[CURstk],edi
	mov	EMSEG:[Result],edi
CosReturnOne:
;Cosine is nearly equal to 1.0.  Put in next smaller value and round it.
	mov	ebx,-1
	mov	esi,ebx			;Set mantissa to -1
	mov	eax,ebx			;Set up rounding bits
.erre	TexpBias eq 0
	and	ecx,bSign shl 8		;Keep only sign
	sub	ecx,1 shl 16		;Exponent of -1
;A jump through [TransRound] is only valid if the number is known not to
;underflow.  Unmasked underflow requires [RoundMode] be set.
	jmp	EMSEG:[TransRound]

	PrevStackWrap	edi,TinySinCos

	PrevStackWrap	edi,TinyCosSin

TinyCosSin:
;Sine is nearly 1.0, cosine is argument
;
;ch = sign of sine
;ah = sign of cosine
;ebx:esi,high ecx = reduced argument
;edi = [CURstk]
	xchg	ah,ch			;Cosine sign to ch, sine sign to ah
	push	edi			;Save place for sine
	PrevStackElem	edi,TinyCosSin	;edi points to second result location
	mov	EMSEG:[CURstk],edi
	mov	EMSEG:[Result],edi
	push	eax
	call	SaveTinySin		;For sine, arg is result
	pop	ecx
;ch = sign of sine
	pop	EMSEG:[Result]		;Set up location for sine
	jmp	CosReturnOne

;*******************************************************************************

;********************* Polynomial Coefficients *********************

;These polynomial coefficients were all taken from "Computer Approximations"
;by J.F. Hart (reprinted 1978 w/corrections).  All calculations and 
;conversions to hexadecimal were done with a character-string calculator
;written in Visual Basic with precision set to 30 digits.  Once the constants
;were typed into this file, all transfers were done with cut-and-paste
;operations to and from the calculator to help eliminate any typographical
;errors.


tCosPoly	label	word

;These constants are derived from Hart #3824: cos(x) = P(x^2),
;accurate to 19.45 digits over interval [0, pi/4].  The original 
;constants in Hart required that the argument x be divided by pi/4.  
;These constants have been scaled so this is no longer required.
;Scaling is done by multiplying the constant by a power of 4/pi.
;The power is given in the table.

	dd	7			;Degree seven

;  Original Hart constant	      power	Scaled constant
;
;-0.38577 62037 2		 E-12  14  -0.113521232057839395845871741043E-10
;Hex value:    0.C7B56AF786699CF1BD13FD290 HFFDC
	dq	0C7B56AF786699CF2H
	dw	(bSign shl 8)+bTAG_VALID,0FFDCH-1

;+0.11500 49702 4263		  E-9  12  +0.208755551456778828747793797596E-8
;Hex value:    0.8F74AA3CCE49E68D6F5444A18 HFFE4
	dq	08F74AA3CCE49E68DH
	dw	bTAG_VALID,0FFE4H-1

;-0.24611 36382 63700 5		  E-7  10  -0.275573128656960822243472872247E-6
;Hex value:    0.93F27B7F10CC8A1703EFC8A04 HFFEB
	dq	093F27B7F10CC8A17H
	dw	(bSign shl 8)+bTAG_VALID,0FFEBH-1

;+0.35908 60445 88581 953	  E-5	8  +0.248015872828994630247806807317E-4
;Hex value:    0.D00D00CD6BB3ECD17E10D5830 HFFF1
	dq	0D00D00CD6BB3ECD1H
	dw	bTAG_VALID,0FFF1H-1

;-0.32599 18869 26687 55044	  E-3	6  -0.138888888888589604343951947246E-2
;Hex value:    0.B60B60B609B165894CFE522AC HFFF7
	dq	0B60B60B609B16589H
	dw	(bSign shl 8)+bTAG_VALID,0FFF7H-1

;+0.15854 34424 38154 10897 54	  E-1	4  +0.416666666666664302573692446873E-1
;Hex value:    0.AAAAAAAAAAA99A1AF53042B08 HFFFC
	dq	0AAAAAAAAAAA99A1BH
	dw	bTAG_VALID,0FFFCH-1

;-0.30842 51375 34042 45242 414	  E0	2  -0.499999999999999992843582920899E0
;Hex value:    0.FFFFFFFFFFFFFEF7F98D3BFA8 HFFFF
	dq	0FFFFFFFFFFFFFEF8H
	dw	(bSign shl 8)+bTAG_VALID,0FFFFH-1

;+0.99999 99999 99999 99996 415	  E0	0  (no change)
;Hex value     0.FFFFFFFFFFFFFFFF56B402618 H0
	dq	0FFFFFFFFFFFFFFFFH
	dw	bTAG_VALID,00H-1


tSinPoly	label	word

;These constants are derived from Hart #3044: sin(x) = x * P(x^2),
;accurate to 20.73 digits over interval [0, pi/4].  The original 
;constants in Hart required that the argument x be divided by pi/4.  
;These constants have been scaled so this is no longer required.
;Scaling is done by multiplying the constant by a power of 4/pi.
;The power is given in the table.

	dd	7-1			;Degree seven, but the last coefficient
					;is 1.0 and is not listed here.

;  Original Hart constant	      power	Scaled constant
;
;-0.20225 31292 93		 E-13  15  -0.757786788401271156262125540409E-12
;Hex value:    0.D54C4AF2B524F0F2D6411C90A HFFD8
	dq	0D54C4AF2B524F0F3H
	dw	(bSign shl 8)+bTAG_VALID,0FFD8H-1

;+0.69481 52035 0522		 E-11  13  +0.160583476232246065559545749398E-9
;Hex value:    0.B0903AF085DA66030F16E43BC HFFE0
	dq	0B0903AF085DA6603H
	dw	bTAG_VALID,0FFE0H-1

;-0.17572 47417 61708 06	  E-8  11  -0.250521047382673309542092418731E-7
;Hex value:    0.D73229320D2AF05971AC96FF4 HFFE7
	dq	0D73229320D2AF059H
	dw	(bSign shl 8)+bTAG_VALID,0FFE7H-1

;+0.31336 16889 17325 348	  E-6	9  +0.275573192133901687156480447942E-5
;Hex value:    0.B8EF1D2984D2FBA28A9CC9DEE HFFEE
	dq	0B8EF1D2984D2FBA3H
	dw	bTAG_VALID,0FFEEH-1

;-0.36576 20418 21464 00052 9	  E-4	7  -0.198412698412531058609618529749E-3
;Hex value:    0.D00D00D00C3FDDD7916E5CB28 HFFF4
	dq	0D00D00D00C3FDDD8H
	dw	(bSign shl 8)+bTAG_VALID,0FFF4H-1

;+0.24903 94570 19271 62752 519	  E-2	5  +0.83333333333333203341753387264E-2
;Hex value:    0.8888888888884C95D619A0343 HFFFA
	dq	08888888888884C96H
	dw	bTAG_VALID,0FFFAH-1

;-0.80745 51218 82807 81520 2582  E-1	3  -0.166666666666666666281276062229E0
;Hex value:    0.AAAAAAAAAAAAAA8E3AD80EAB8 HFFFE
	dq	0AAAAAAAAAAAAAA8EH
	dw	(bSign shl 8)+bTAG_VALID,0FFFEH-1

;+0.78539 81633 97448 30961 41845 E0	1  +0.99999999999999999999812025812E0
;Hex value:    0.FFFFFFFFFFFFFFFFF71F88110 H0
;	dq	8000000000000000H	;This constant of 1.0 omitted here.
;	dw	bTAG_VALID,0		;   It is handled in code.


tTanPoly	label	word

;These constants are derived from Hart #4286: tan(x) = x * P(x^2) / Q(x^2),
;accurate to 19.94 digits over interval [0, pi/4].  The original 
;constants in Hart required that the argument x be divided by pi/4.  
;These constants have been scaled so this is no longer required.
;Scaling is done by multiplying the constant by the same power of 4/pi
;as the power of x the constant is used on.  However, the highest
;degree coefficient of Q() is 1, and after scaling this way it would
;become (4/pi)^8.  In order to keep this coefficient equal to one,
;we scale everything again by (pi/4)^8.  This scaling is partially
;canceled by the original scaling by powers of 4/pi, and the net
;resulting power of pi/4 is given in the table.


	dd	3			;First poly is degree 3

;  Original Hart constant	        power	Scaled constant
;
;-.45649 31943 86656 31873 96113 7    E2  1  -35.8528916474714232910463077546
;Hex value:    0.8F695C6D93AF6F97B6E022AB3 H6
        dq      08F695C6D93AF6F98H
        dw      (bSign shl 8)+bTAG_VALID,06H-1

;+.14189 85425 27617 78388 00394 831  E5  3  +6874.60229709782436592720603503
;Hex value:    0.D6D4D181240D0D08C88DF4AA6 HD
        dq      0D6D4D181240D0D09H
        dw      bTAG_VALID,0DH-1

;-.89588 84400 67680 41087 29639 541  E6  5  -267733.884797157298951145495276
;Hex value:    0.82BABC504220C62B1D0722684 H13
        dq      082BABC504220C62BH
        dw      (bSign shl 8)+bTAG_VALID,013H-1

;+.10888 60043 72816 87521 38857 983  E8  7  +2007248.9111748838841548144685
;Hex value:    0.F506874A160EB9C0994AADD6A H15
        dq      0F506874A160EB9C1H
        dw      bTAG_VALID,015H-1


	dd	4			;Second poly is degree 4
;NOTE: Eval2Poly assumes the first coefficient is 1.0, so it is omitted

;  Original Hart constant	        power	Scaled constant
;
;-.10146 56190 25288 53387 54401 947  E4  2  -625.890950057027419879480354834
;Hex value:    0.9C790553635355A95241A5324 HA
        dq      09C790553635355A9H
        dw      (bSign shl 8)+bTAG_VALID,0AH-1

;+.13538 27128 05119 09382 89294 872  E6  4  +51513.6992033752080924797647367
;Hex value:    0.C939B2FEFE0DC585E649870FE H10
        dq      0C939B2FEFE0DC586H
        dw      bTAG_VALID,010H-1

;-.39913 09518 03516 51504 43427 94   E7  6  -936816.855188785264866481436899
;Hex value:    0.E4B70DAEDA6F89E5A7CE626FA H14
        dq      0E4B70DAEDA6F89E6H
        dw      (bSign shl 8)+bTAG_VALID,014H-1

;+.13863 79666 35676 29165 33913 361  E8  8  +2007248.91117488388417770850458
;Hex value:    0.F506874A160EB9C0CCD8313BC H15
        dq      0F506874A160EB9C1H
        dw      bTAG_VALID,015H-1