/*
 * |-----------------------------------------------------------|
 * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc.      |
 * | All Rights Reserved                                       |
 * |-----------------------------------------------------------|
 * |          Restricted Rights Legend                         |
 * | Use, duplication, or disclosure by the Government is      |
 * | subject to restrictions as set forth in                   |
 * | subparagraph (c)(1)(ii) of the Rights in Technical        |
 * | Data and Computer Software Clause of DFARS 252.227-7013.  |
 * |         MIPS Computer Systems, Inc.                       |
 * |         950 DeGuigne Avenue                               |
 * |         Sunnyvale, California 94088-3650, USA             |
 * |-----------------------------------------------------------|
 */
/* $Header: trig.s,v 3000.3.1.2 91/06/10 15:18:21 karen Exp $ */
/* Algorithm from 4.3bsd */

/*
 * Original fcsr is saved in t6.  Do not use t6 as a temp register!
 */


#include <kxmips.h>
#include "trans.h"
#include "fpieee.h"

#define	PIo4	 7.8539816339744828E-1
#define OoPIo2	 6.3661977236758138E-1
#define	PIo2hi	 1.5707963109016418
#define	PIo2lo	 1.5893254712295857E-8
#define Xmax	 105414357.85197645
#define half	 0.5
#define one	 1.0
#define thresh	 5.2234479296242364e-01
#define Ymax     2.98156826864790199324e8; /* 2^(53/2)*PI/2 */


#define S0	-1.6666666666666463126E-1
#define S1	 8.3333333332992771264E-3
#define S2	-1.9841269816180999116E-4
#define S3	 2.7557309793219876880E-6
#define S4	-2.5050225177523807003E-8
#define S5	 1.5868926979889205164E-10

#define C0	 4.1666666666666504759E-2
#define C1	-1.3888888888865301516E-3
#define C2	 2.4801587269650015769E-5
#define C3	-2.7557304623183959811E-7
#define C4	 2.0873958177697780076E-9
#define C5	-1.1250289076471311557E-11

#undef	FSIZE
#define	FSIZE	16

.text .text$trigm
.globl cos	/* double cos(double x) */
.ent cos
cos:
	.frame	sp, FSIZE, ra
	subu	sp, FSIZE
	sw	ra, FSIZE-4(sp)
	.prologue 1
	cfc1	t6, $31		// t6 original fcsr
	ctc1	$0, $31		// set round to zero, no traps
	li	t7, 1

	// if |x| >= 2^63 generate _TLOSS and return indefinite
	li.d	$f16, Ymax
	abs.d	$f0, $f12
	c.olt.d	$f0, $f16
	li.d	$f10, PIo4
	bc1f	cos_err

	/* reduce to [-PI/4,+PI/4] */
	c.olt.d	$f0, $f10
	li.d	$f16, Xmax
	bc1t	cos1		// in range, no reduction necessary

	c.olt.d	$f0, $f16	// if |round(x/(PI/2))| > 2^26, need special
	li.d	$f18, OoPIo2
	bc1f	8f		// argument reduction

1:	mul.d	$f2, $f12, $f18	// round(x/(PI/2))
	cvt.w.d	$f4, $f2	// ...
	cvt.d.w	$f2, $f4	// ...
	/* f2 <= 2^26 */
	li.d	$f6, PIo2hi
	li.d	$f8, PIo2lo
	mul.d	$f6, $f2	// exact (26 x 26 = 52 bits)
	mul.d	$f8, $f2	// exact (27 x 26 = 53 bits)
	sub.d	$f12, $f6	// exact
	sub.d	$f12, $f8	// exact
	mfc1	t0, $f4
	addu	t7, t0

	abs.d	$f0, $f12
	c.le.d	$f0, $f10
	and	t0, t7, 1
	bc1f	1b

	and	t1, t7, 2
	bne	t0, 0, cos1

	beq	t1, 0, sin1
	neg.d	$f12
	b	sin1

8:	/* |round(x/(PI/2))| > 2^26 or x is NaN */
	mfc1	t0, $f13
	li	t1, 0x7ff00000
	and	t0, t1
	subu	t2, t0, (1023+25)<<20
	beq	t0, t1, 9f
	li.d	$f2, OoPIo2
	li.d	$f6, PIo2hi
	li.d	$f8, PIo2lo
	mfc1	t3, $f3
	mfc1	t4, $f7
	mfc1	t5, $f9
	subu	t3, t2
	addu	t4, t2
	addu	t5, t2
	mtc1	t3, $f3
	mtc1	t4, $f7
	mtc1	t5, $f9
	mul.d	$f2, $f12
	cvt.w.d	$f4, $f2
	cvt.d.w	$f2, $f4
	mul.d	$f6, $f2	// exact (26 x 26 = 52 bits)
	mul.d	$f8, $f2	// exact (27 x 26 = 53 bits)
	sub.d	$f12, $f6	// exact
	sub.d	$f12, $f8	// exact

	abs.d	$f0, $f12
	c.lt.d	$f0, $f16	// if |round(x/(PI/2))| > 2^26, continue special
	li	t0, (1<<20)
	bc1f	8b		// argument reduction
	bne	t2, t0, 1b
	mfc1	t0, $f4
	sll	t0, 1
	addu	t7, t0
	b	1b

cos_err:
	// |x| >= 2^63
	ctc1	t6, $31		// restore original fcsr
	li	a1, OP_COS	// operation code (funtion name index)
	jal	set_trigm_err
	b cosret1

9:	/* x is NaN or Infinity */
	/* sub.d	$f0, $f12, $f12 */
 	mov.d   $f0, $f12

cosret:
	ctc1	t6, $31		// restore original fcsr
cosret1:
	lw	ra, FSIZE-4(sp)
	addu	sp, FSIZE
	j	ra
.end cos


.text .text$trigm
.globl sin	/* double sin(double x) */
.ent sin
sin:
	.frame	sp, FSIZE, ra
	subu	sp, FSIZE
	sw	ra, FSIZE-4(sp)
	.prologue 1
	cfc1	t6, $31		// t6 original fcsr
	ctc1	$0, $31		// set round to zero, no traps
	li	t7, 0

	// if |x| >= 2^63 generate _TLOSS and return indefinite
	li.d	$f16, Ymax
	abs.d	$f0, $f12
	c.olt.d	$f0, $f16
	li.d	$f10, PIo4
	bc1f	sin_err

	/* reduce to [-PI/4,+PI/4] */
	c.olt.d	$f0, $f10
	li.d	$f16, Xmax
	bc1t	sin1		// in range, no reduction necessary

	c.olt.d	$f0, $f16	// if |round(x/(PI/2))| > 2^26, need special
	li.d	$f18, OoPIo2
	bc1f	8f		// argument reduction

1:	mul.d	$f2, $f12, $f18
	cvt.w.d	$f4, $f2
	cvt.d.w	$f2, $f4
	/* f2 <= 2^26 */
	li.d	$f6, PIo2hi
	li.d	$f8, PIo2lo
	mul.d	$f6, $f2	// exact (26 x 26 = 52 bits)
	mul.d	$f8, $f2	// exact (27 x 26 = 53 bits)
	sub.d	$f12, $f6	// exact
	sub.d	$f12, $f8	// exact
	mfc1	t0, $f4
	addu	t7, t0

	abs.d	$f0, $f12
	c.le.d	$f0, $f10
	and	t0, t7, 1
	bc1f	1b

	and	t1, t7, 2
	bne	t0, 0, cos1

	beq	t1, 0, 2f
	neg.d	$f12
2:

sin1:	/* compute sin(x) for x in [-PI/4,PI/4] */
	/* z = x*x, sin(x) = x + x*z*(S0+z*(S1+z*(S2+z*(S3+z*(S4+z*S5))))) */
	mul.d	$f8, $f12, $f12
	li.d	$f0, S5
	mul.d	$f0, $f8
	li.d	$f4, S4
	add.d	$f0, $f4
	mul.d	$f0, $f8
	li.d	$f6, S3
	add.d	$f0, $f6
	mul.d	$f0, $f8
	li.d	$f4, S2
	add.d	$f0, $f4
	mul.d	$f0, $f8
	li.d	$f6, S1
	add.d	$f0, $f6
	mul.d	$f0, $f8
	li.d	$f4, S0
	add.d	$f0, $f4
	mul.d	$f0, $f8
	mul.d	$f0, $f12
	add.d	$f0, $f12
	b	sinret

cos1:	/* compute cos(x) for x in [-PI/4,PI/4] */
	/* z = x*x, c = z*z*(C0+z*(C1+z*(C2+z*(C3+z*(C4+z*C5))))) */
	mul.d	$f8, $f12, $f12
	li.d	$f0, C5
	mul.d	$f0, $f8
	li.d	$f4, C4
	add.d	$f0, $f4
	mul.d	$f0, $f8
	li.d	$f6, C3
	add.d	$f0, $f6
	mul.d	$f0, $f8
	li.d	$f4, C2
	add.d	$f0, $f4
	mul.d	$f0, $f8
	li.d	$f6, C1
	add.d	$f0, $f6
	mul.d	$f0, $f8
	li.d	$f4, C0
	add.d	$f0, $f4
	mul.d	$f0, $f8
	mul.d	$f0, $f8

	li.d	$f6, thresh
	li.d	$f16, 0.5
	c.lt.d	$f8, $f6
	mul.d	$f14 $f16, $f8
	bc1t	4f
	/* z >= thresh, cos(x) = 0.5-((z/2-0.5)-c) */
	sub.d	$f8, $f14, $f16
	sub.d	$f8, $f0
	b	5f
4:	/* z < thresh, cos(x) = 1.0-(z/2-c) */
	li.d	$f16, one
	sub.d	$f8, $f14, $f0
5:
	and	t0, t7, 2
	bne	t0, 0, 6f
	sub.d	$f0, $f16, $f8
	b	sinret
6:	sub.d	$f0, $f8, $f16
	b	sinret

8:	/* |round(x/(PI/2))| > 2^26 or x is NaN */
	mfc1	t0, $f13
	li	t1, 0x7ff00000
	and	t0, t1
	subu	t2, t0, (1023+25)<<20
	beq	t0, t1, 9f
	li.d	$f2, OoPIo2
	li.d	$f6, PIo2hi
	li.d	$f8, PIo2lo
	mfc1	t3, $f3
	mfc1	t4, $f7
	mfc1	t5, $f9
	subu	t3, t2
	addu	t4, t2
	addu	t5, t2
	mtc1	t3, $f3
	mtc1	t4, $f7
	mtc1	t5, $f9
	mul.d	$f2, $f12
	cvt.w.d	$f4, $f2
	cvt.d.w	$f2, $f4
	mul.d	$f6, $f2	// exact (26 x 26 = 52 bits)
	mul.d	$f8, $f2	// exact (27 x 26 = 53 bits)
	sub.d	$f12, $f6	// exact
	sub.d	$f12, $f8	// exact

	abs.d	$f0, $f12
	c.lt.d	$f0, $f16	// if |round(x/(PI/2))| > 2^26, continue special
	li	t0, (1<<20)
	bc1f	8b		// argument reduction
	bne	t2, t0, 1b
	mfc1	t0, $f4
	sll	t0, 1
	addu	t7, t0
	b	1b

sin_err:
	// |x| >= 2^63
	ctc1	t6, $31		// restore original fcsr
	li	a1, OP_SIN	// operation code (funtion name index)
	jal	set_trigm_err
	b sinret1

9:	/* x is NaN or Infinity */
	/* sub.d	$f0, $f12, $f12 */
	mov.d	$f0,$f12

sinret:
	ctc1	t6, $31		// restore original fcsr
sinret1:
	lw	ra, FSIZE-4(sp)
	addu	sp, FSIZE
	j	ra
.end sin


/* leave tan in its own section */
.text
.globl tan	/* double tan(double x) */
.ent tan
tan:
	.frame	sp, FSIZE, ra
	subu	sp, FSIZE
	sw	ra, FSIZE-4(sp)
	.prologue 1
	cfc1	t6, $31		// t6 original fcsr
	ctc1	$0, $31		// set round to zero, no exceptions
	li	t7, 0

	// if |x| >= 2^63 generate _TLOSS and return indefinite
	li.d	$f16, Ymax
	abs.d	$f0, $f12
	c.olt.d	$f0, $f16
	li.d	$f10, PIo4
	bc1f	tan_err

	/* reduce to [-PI/4,+PI/4] */
	c.olt.d	$f0, $f10
	li.d	$f16, Xmax
	bc1t	3f

	c.olt.d	$f0, $f16	// if |round(x/(PI/2))| > 2^26, need special
	li.d	$f18, OoPIo2
	bc1f	8f		// argument reduction

1:	mul.d	$f2, $f12, $f18	// round(x/(PI/2))
	cvt.w.d	$f4, $f2	// ...
	cvt.d.w	$f2, $f4	// ...
	/* f2 <= 2^26 */
	li.d	$f6, PIo2hi
	li.d	$f8, PIo2lo
	mul.d	$f6, $f2	// exact (26 x 26 = 52 bits)
	mul.d	$f8, $f2	// exact (27 x 26 = 53 bits)
	sub.d	$f12, $f6	// exact
	sub.d	$f12, $f8	// exact
	mfc1	t0, $f4
	addu	t7, t0

	abs.d	$f0, $f12
	c.le.d	$f0, $f10
	and	t0, t7, 1
	bc1f	1b

	beq	t0, 0, 2f
	neg.d	$f12
2:

3:	/* compute sin(x) and cos(x) for x in [-PI/4,PI/4] */
	/* z = x*x */
	/* (f0) cc = z*z*(C0+z*(C1+z*(C2+z*(C3+z*(C4+z*C5))))) */
	/* (f2) ss = z*(S0+z*(S1+z*(S2+z*(S3+z*(S4+z*S5))))) */
	mul.d	$f8, $f12, $f12
	li.d	$f2, S5
	li.d	$f0, C5
	mul.d	$f2, $f8
	mul.d	$f0, $f8

	li.d	$f4, S4
	li.d	$f6, C4
	add.d	$f2, $f4
	add.d	$f0, $f6
	mul.d	$f2, $f8
	mul.d	$f0, $f8

	li.d	$f4, S3
	li.d	$f6, C3
	add.d	$f2, $f4
	add.d	$f0, $f6
	mul.d	$f2, $f8
	mul.d	$f0, $f8

	li.d	$f4, S2
	li.d	$f6, C2
	add.d	$f2, $f4
	add.d	$f0, $f6
	mul.d	$f2, $f8
	mul.d	$f0, $f8

	li.d	$f4, S1
	li.d	$f6, C1
	add.d	$f2, $f4
	add.d	$f0, $f6
	mul.d	$f2, $f8
	mul.d	$f0, $f8

	li.d	$f4, S0
	li.d	$f6, C0
	add.d	$f2, $f4
	add.d	$f0, $f6
	mul.d	$f2, $f8
	mul.d	$f0, $f8
	mul.d	$f0, $f8

	li.d	$f6, thresh
	li.d	$f16, 0.5
	c.lt.d	$f8, $f6
	mul.d	$f14 $f16, $f8
	bc1t	4f

	/* z >= thresh, c = 0.5-((z/2-0.5)-cc) */
	sub.d	$f6, $f14, $f16
	sub.d	$f6, $f0
	b	5f

4:	/* z < thresh, c = 1.0-(z/2-cc) */
	li.d	$f16, one
	sub.d	$f6, $f14, $f0

5:	/* ss in $f2, c in $f6 */
	sub.d	$f6, $f16, $f6
	and	t0, t7, 1
	bne	t0, 0, 6f

	/* tan(x) = x + (x*(z/2-(cc-ss)))/c */
	sub.d	$f4, $f0, $f2
	sub.d	$f0, $f14, $f4
	mul.d	$f0, $f12
	div.d	$f0, $f6
	add.d	$f0, $f12
	b	tanret

6:	/* tan(x) = c/(x+x*ss) */
	mul.d	$f2, $f12
	add.d	$f2, $f12
	div.d	$f0, $f6, $f2
	b	tanret

8:	/* |round(x/(PI/2))| > 2^26 or x is NaN */
	mfc1	t0, $f13
	li	t1, 0x7ff00000
	and	t0, t1
	subu	t2, t0, (1023+25)<<20
	beq	t0, t1, 9f
	li.d	$f2, OoPIo2
	li.d	$f6, PIo2hi
	li.d	$f8, PIo2lo
	mfc1	t3, $f3
	mfc1	t4, $f7
	mfc1	t5, $f9
	subu	t3, t2
	addu	t4, t2
	addu	t5, t2
	mtc1	t3, $f3
	mtc1	t4, $f7
	mtc1	t5, $f9
	mul.d	$f2, $f12
	cvt.w.d	$f4, $f2
	cvt.d.w	$f2, $f4
	mul.d	$f6, $f2	// exact (26 x 26 = 52 bits)
	mul.d	$f8, $f2	// exact (27 x 26 = 53 bits)
	sub.d	$f12, $f6	// exact
	sub.d	$f12, $f8	// exact

	abs.d	$f0, $f12
	c.lt.d	$f0, $f16	// if |round(x/(PI/2))| > 2^26, continue special
	li	t0, (1<<20)
	bc1f	8b		/// argument reduction
	bne	t2, t0, 1b
	mfc1	t0, $f4
	sll	t0, 1
	addu	t7, t0
	b	1b

tan_err:
	// |x| >= 2^63
	ctc1	t6, $31		// restore original fcsr
	li	a1, OP_TAN	// operation code (funtion name index)
	jal	set_trigm_err
	b	tanret1

// REVIEW is this correct?
9:	/* x is NaN or Infinity */
	sub.d	$f0, $f12, $f12
	mov.d	$f2, $f0

tanret:
	ctc1	t6, $31		// restore original fcsr
tanret1:
	lw	ra, FSIZE-4(sp)
	addu	sp, FSIZE
	j	ra
.end tan

.extern _except1

.text .text$trigm
.ent set_trigm_err
set_trigm_err:
#undef FSIZE
#define FSIZE 48
	.frame  sp, FSIZE, ra
	.mask   0x80000000, -4
	subu    sp, FSIZE
	sw      ra, FSIZE-4(sp)
	.prologue 1
	li	a0, (FP_TLOSS | FP_I)	// exception mask
	// a1 passed by caller = operation code (funtion name index)
	mfc1.d	a2, $f12		// arg1 
	li.d    $f0, 0.0		// generate a NaN
	div.d   $f0, $f0
	s.d	$f0, 16(sp)		// default result
	cfc1    t7, $31			// fp control/status register
	xor     t7, t7, 0xf80		// inverse exception enable bits
	sw	t7, 24(sp)		// goes on parameter stack
	jal  	_except1
	lw      ra, FSIZE-4(sp)
	addu    sp, FSIZE
	j	ra
#undef FSIZE
.end set_trigm_err