/* * |-----------------------------------------------------------| * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | * | All Rights Reserved | * |-----------------------------------------------------------| * | Restricted Rights Legend | * | Use, duplication, or disclosure by the Government is | * | subject to restrictions as set forth in | * | subparagraph (c)(1)(ii) of the Rights in Technical | * | Data and Computer Software Clause of DFARS 252.227-7013. | * | MIPS Computer Systems, Inc. | * | 950 DeGuigne Avenue | * | Sunnyvale, California 94088-3650, USA | * |-----------------------------------------------------------| */ /* $Header: trig.s,v 3000.3.1.2 91/06/10 15:18:21 karen Exp $ */ /* Algorithm from 4.3bsd */ /* * Original fcsr is saved in t6. Do not use t6 as a temp register! */ #include #include "trans.h" #include "fpieee.h" #define PIo4 7.8539816339744828E-1 #define OoPIo2 6.3661977236758138E-1 #define PIo2hi 1.5707963109016418 #define PIo2lo 1.5893254712295857E-8 #define Xmax 105414357.85197645 #define half 0.5 #define one 1.0 #define thresh 5.2234479296242364e-01 #define Ymax 2.98156826864790199324e8; /* 2^(53/2)*PI/2 */ #define S0 -1.6666666666666463126E-1 #define S1 8.3333333332992771264E-3 #define S2 -1.9841269816180999116E-4 #define S3 2.7557309793219876880E-6 #define S4 -2.5050225177523807003E-8 #define S5 1.5868926979889205164E-10 #define C0 4.1666666666666504759E-2 #define C1 -1.3888888888865301516E-3 #define C2 2.4801587269650015769E-5 #define C3 -2.7557304623183959811E-7 #define C4 2.0873958177697780076E-9 #define C5 -1.1250289076471311557E-11 #undef FSIZE #define FSIZE 16 .text .text$trigm .globl cos /* double cos(double x) */ .ent cos cos: .frame sp, FSIZE, ra subu sp, FSIZE sw ra, FSIZE-4(sp) .prologue 1 cfc1 t6, $31 // t6 original fcsr ctc1 $0, $31 // set round to zero, no traps li t7, 1 // if |x| >= 2^63 generate _TLOSS and return indefinite li.d $f16, Ymax abs.d $f0, $f12 c.olt.d $f0, $f16 li.d $f10, PIo4 bc1f cos_err /* reduce to [-PI/4,+PI/4] */ c.olt.d $f0, $f10 li.d $f16, Xmax bc1t cos1 // in range, no reduction necessary c.olt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, need special li.d $f18, OoPIo2 bc1f 8f // argument reduction 1: mul.d $f2, $f12, $f18 // round(x/(PI/2)) cvt.w.d $f4, $f2 // ... cvt.d.w $f2, $f4 // ... /* f2 <= 2^26 */ li.d $f6, PIo2hi li.d $f8, PIo2lo mul.d $f6, $f2 // exact (26 x 26 = 52 bits) mul.d $f8, $f2 // exact (27 x 26 = 53 bits) sub.d $f12, $f6 // exact sub.d $f12, $f8 // exact mfc1 t0, $f4 addu t7, t0 abs.d $f0, $f12 c.le.d $f0, $f10 and t0, t7, 1 bc1f 1b and t1, t7, 2 bne t0, 0, cos1 beq t1, 0, sin1 neg.d $f12 b sin1 8: /* |round(x/(PI/2))| > 2^26 or x is NaN */ mfc1 t0, $f13 li t1, 0x7ff00000 and t0, t1 subu t2, t0, (1023+25)<<20 beq t0, t1, 9f li.d $f2, OoPIo2 li.d $f6, PIo2hi li.d $f8, PIo2lo mfc1 t3, $f3 mfc1 t4, $f7 mfc1 t5, $f9 subu t3, t2 addu t4, t2 addu t5, t2 mtc1 t3, $f3 mtc1 t4, $f7 mtc1 t5, $f9 mul.d $f2, $f12 cvt.w.d $f4, $f2 cvt.d.w $f2, $f4 mul.d $f6, $f2 // exact (26 x 26 = 52 bits) mul.d $f8, $f2 // exact (27 x 26 = 53 bits) sub.d $f12, $f6 // exact sub.d $f12, $f8 // exact abs.d $f0, $f12 c.lt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, continue special li t0, (1<<20) bc1f 8b // argument reduction bne t2, t0, 1b mfc1 t0, $f4 sll t0, 1 addu t7, t0 b 1b cos_err: // |x| >= 2^63 ctc1 t6, $31 // restore original fcsr li a1, OP_COS // operation code (funtion name index) jal set_trigm_err b cosret1 9: /* x is NaN or Infinity */ /* sub.d $f0, $f12, $f12 */ mov.d $f0, $f12 cosret: ctc1 t6, $31 // restore original fcsr cosret1: lw ra, FSIZE-4(sp) addu sp, FSIZE j ra .end cos .text .text$trigm .globl sin /* double sin(double x) */ .ent sin sin: .frame sp, FSIZE, ra subu sp, FSIZE sw ra, FSIZE-4(sp) .prologue 1 cfc1 t6, $31 // t6 original fcsr ctc1 $0, $31 // set round to zero, no traps li t7, 0 // if |x| >= 2^63 generate _TLOSS and return indefinite li.d $f16, Ymax abs.d $f0, $f12 c.olt.d $f0, $f16 li.d $f10, PIo4 bc1f sin_err /* reduce to [-PI/4,+PI/4] */ c.olt.d $f0, $f10 li.d $f16, Xmax bc1t sin1 // in range, no reduction necessary c.olt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, need special li.d $f18, OoPIo2 bc1f 8f // argument reduction 1: mul.d $f2, $f12, $f18 cvt.w.d $f4, $f2 cvt.d.w $f2, $f4 /* f2 <= 2^26 */ li.d $f6, PIo2hi li.d $f8, PIo2lo mul.d $f6, $f2 // exact (26 x 26 = 52 bits) mul.d $f8, $f2 // exact (27 x 26 = 53 bits) sub.d $f12, $f6 // exact sub.d $f12, $f8 // exact mfc1 t0, $f4 addu t7, t0 abs.d $f0, $f12 c.le.d $f0, $f10 and t0, t7, 1 bc1f 1b and t1, t7, 2 bne t0, 0, cos1 beq t1, 0, 2f neg.d $f12 2: sin1: /* compute sin(x) for x in [-PI/4,PI/4] */ /* z = x*x, sin(x) = x + x*z*(S0+z*(S1+z*(S2+z*(S3+z*(S4+z*S5))))) */ mul.d $f8, $f12, $f12 li.d $f0, S5 mul.d $f0, $f8 li.d $f4, S4 add.d $f0, $f4 mul.d $f0, $f8 li.d $f6, S3 add.d $f0, $f6 mul.d $f0, $f8 li.d $f4, S2 add.d $f0, $f4 mul.d $f0, $f8 li.d $f6, S1 add.d $f0, $f6 mul.d $f0, $f8 li.d $f4, S0 add.d $f0, $f4 mul.d $f0, $f8 mul.d $f0, $f12 add.d $f0, $f12 b sinret cos1: /* compute cos(x) for x in [-PI/4,PI/4] */ /* z = x*x, c = z*z*(C0+z*(C1+z*(C2+z*(C3+z*(C4+z*C5))))) */ mul.d $f8, $f12, $f12 li.d $f0, C5 mul.d $f0, $f8 li.d $f4, C4 add.d $f0, $f4 mul.d $f0, $f8 li.d $f6, C3 add.d $f0, $f6 mul.d $f0, $f8 li.d $f4, C2 add.d $f0, $f4 mul.d $f0, $f8 li.d $f6, C1 add.d $f0, $f6 mul.d $f0, $f8 li.d $f4, C0 add.d $f0, $f4 mul.d $f0, $f8 mul.d $f0, $f8 li.d $f6, thresh li.d $f16, 0.5 c.lt.d $f8, $f6 mul.d $f14 $f16, $f8 bc1t 4f /* z >= thresh, cos(x) = 0.5-((z/2-0.5)-c) */ sub.d $f8, $f14, $f16 sub.d $f8, $f0 b 5f 4: /* z < thresh, cos(x) = 1.0-(z/2-c) */ li.d $f16, one sub.d $f8, $f14, $f0 5: and t0, t7, 2 bne t0, 0, 6f sub.d $f0, $f16, $f8 b sinret 6: sub.d $f0, $f8, $f16 b sinret 8: /* |round(x/(PI/2))| > 2^26 or x is NaN */ mfc1 t0, $f13 li t1, 0x7ff00000 and t0, t1 subu t2, t0, (1023+25)<<20 beq t0, t1, 9f li.d $f2, OoPIo2 li.d $f6, PIo2hi li.d $f8, PIo2lo mfc1 t3, $f3 mfc1 t4, $f7 mfc1 t5, $f9 subu t3, t2 addu t4, t2 addu t5, t2 mtc1 t3, $f3 mtc1 t4, $f7 mtc1 t5, $f9 mul.d $f2, $f12 cvt.w.d $f4, $f2 cvt.d.w $f2, $f4 mul.d $f6, $f2 // exact (26 x 26 = 52 bits) mul.d $f8, $f2 // exact (27 x 26 = 53 bits) sub.d $f12, $f6 // exact sub.d $f12, $f8 // exact abs.d $f0, $f12 c.lt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, continue special li t0, (1<<20) bc1f 8b // argument reduction bne t2, t0, 1b mfc1 t0, $f4 sll t0, 1 addu t7, t0 b 1b sin_err: // |x| >= 2^63 ctc1 t6, $31 // restore original fcsr li a1, OP_SIN // operation code (funtion name index) jal set_trigm_err b sinret1 9: /* x is NaN or Infinity */ /* sub.d $f0, $f12, $f12 */ mov.d $f0,$f12 sinret: ctc1 t6, $31 // restore original fcsr sinret1: lw ra, FSIZE-4(sp) addu sp, FSIZE j ra .end sin /* leave tan in its own section */ .text .globl tan /* double tan(double x) */ .ent tan tan: .frame sp, FSIZE, ra subu sp, FSIZE sw ra, FSIZE-4(sp) .prologue 1 cfc1 t6, $31 // t6 original fcsr ctc1 $0, $31 // set round to zero, no exceptions li t7, 0 // if |x| >= 2^63 generate _TLOSS and return indefinite li.d $f16, Ymax abs.d $f0, $f12 c.olt.d $f0, $f16 li.d $f10, PIo4 bc1f tan_err /* reduce to [-PI/4,+PI/4] */ c.olt.d $f0, $f10 li.d $f16, Xmax bc1t 3f c.olt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, need special li.d $f18, OoPIo2 bc1f 8f // argument reduction 1: mul.d $f2, $f12, $f18 // round(x/(PI/2)) cvt.w.d $f4, $f2 // ... cvt.d.w $f2, $f4 // ... /* f2 <= 2^26 */ li.d $f6, PIo2hi li.d $f8, PIo2lo mul.d $f6, $f2 // exact (26 x 26 = 52 bits) mul.d $f8, $f2 // exact (27 x 26 = 53 bits) sub.d $f12, $f6 // exact sub.d $f12, $f8 // exact mfc1 t0, $f4 addu t7, t0 abs.d $f0, $f12 c.le.d $f0, $f10 and t0, t7, 1 bc1f 1b beq t0, 0, 2f neg.d $f12 2: 3: /* compute sin(x) and cos(x) for x in [-PI/4,PI/4] */ /* z = x*x */ /* (f0) cc = z*z*(C0+z*(C1+z*(C2+z*(C3+z*(C4+z*C5))))) */ /* (f2) ss = z*(S0+z*(S1+z*(S2+z*(S3+z*(S4+z*S5))))) */ mul.d $f8, $f12, $f12 li.d $f2, S5 li.d $f0, C5 mul.d $f2, $f8 mul.d $f0, $f8 li.d $f4, S4 li.d $f6, C4 add.d $f2, $f4 add.d $f0, $f6 mul.d $f2, $f8 mul.d $f0, $f8 li.d $f4, S3 li.d $f6, C3 add.d $f2, $f4 add.d $f0, $f6 mul.d $f2, $f8 mul.d $f0, $f8 li.d $f4, S2 li.d $f6, C2 add.d $f2, $f4 add.d $f0, $f6 mul.d $f2, $f8 mul.d $f0, $f8 li.d $f4, S1 li.d $f6, C1 add.d $f2, $f4 add.d $f0, $f6 mul.d $f2, $f8 mul.d $f0, $f8 li.d $f4, S0 li.d $f6, C0 add.d $f2, $f4 add.d $f0, $f6 mul.d $f2, $f8 mul.d $f0, $f8 mul.d $f0, $f8 li.d $f6, thresh li.d $f16, 0.5 c.lt.d $f8, $f6 mul.d $f14 $f16, $f8 bc1t 4f /* z >= thresh, c = 0.5-((z/2-0.5)-cc) */ sub.d $f6, $f14, $f16 sub.d $f6, $f0 b 5f 4: /* z < thresh, c = 1.0-(z/2-cc) */ li.d $f16, one sub.d $f6, $f14, $f0 5: /* ss in $f2, c in $f6 */ sub.d $f6, $f16, $f6 and t0, t7, 1 bne t0, 0, 6f /* tan(x) = x + (x*(z/2-(cc-ss)))/c */ sub.d $f4, $f0, $f2 sub.d $f0, $f14, $f4 mul.d $f0, $f12 div.d $f0, $f6 add.d $f0, $f12 b tanret 6: /* tan(x) = c/(x+x*ss) */ mul.d $f2, $f12 add.d $f2, $f12 div.d $f0, $f6, $f2 b tanret 8: /* |round(x/(PI/2))| > 2^26 or x is NaN */ mfc1 t0, $f13 li t1, 0x7ff00000 and t0, t1 subu t2, t0, (1023+25)<<20 beq t0, t1, 9f li.d $f2, OoPIo2 li.d $f6, PIo2hi li.d $f8, PIo2lo mfc1 t3, $f3 mfc1 t4, $f7 mfc1 t5, $f9 subu t3, t2 addu t4, t2 addu t5, t2 mtc1 t3, $f3 mtc1 t4, $f7 mtc1 t5, $f9 mul.d $f2, $f12 cvt.w.d $f4, $f2 cvt.d.w $f2, $f4 mul.d $f6, $f2 // exact (26 x 26 = 52 bits) mul.d $f8, $f2 // exact (27 x 26 = 53 bits) sub.d $f12, $f6 // exact sub.d $f12, $f8 // exact abs.d $f0, $f12 c.lt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, continue special li t0, (1<<20) bc1f 8b /// argument reduction bne t2, t0, 1b mfc1 t0, $f4 sll t0, 1 addu t7, t0 b 1b tan_err: // |x| >= 2^63 ctc1 t6, $31 // restore original fcsr li a1, OP_TAN // operation code (funtion name index) jal set_trigm_err b tanret1 // REVIEW is this correct? 9: /* x is NaN or Infinity */ sub.d $f0, $f12, $f12 mov.d $f2, $f0 tanret: ctc1 t6, $31 // restore original fcsr tanret1: lw ra, FSIZE-4(sp) addu sp, FSIZE j ra .end tan .extern _except1 .text .text$trigm .ent set_trigm_err set_trigm_err: #undef FSIZE #define FSIZE 48 .frame sp, FSIZE, ra .mask 0x80000000, -4 subu sp, FSIZE sw ra, FSIZE-4(sp) .prologue 1 li a0, (FP_TLOSS | FP_I) // exception mask // a1 passed by caller = operation code (funtion name index) mfc1.d a2, $f12 // arg1 li.d $f0, 0.0 // generate a NaN div.d $f0, $f0 s.d $f0, 16(sp) // default result cfc1 t7, $31 // fp control/status register xor t7, t7, 0xf80 // inverse exception enable bits sw t7, 24(sp) // goes on parameter stack jal _except1 lw ra, FSIZE-4(sp) addu sp, FSIZE j ra #undef FSIZE .end set_trigm_err