Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

553 lines
11 KiB

/*
* |-----------------------------------------------------------|
* | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. |
* | All Rights Reserved |
* |-----------------------------------------------------------|
* | Restricted Rights Legend |
* | Use, duplication, or disclosure by the Government is |
* | subject to restrictions as set forth in |
* | subparagraph (c)(1)(ii) of the Rights in Technical |
* | Data and Computer Software Clause of DFARS 252.227-7013. |
* | MIPS Computer Systems, Inc. |
* | 950 DeGuigne Avenue |
* | Sunnyvale, California 94088-3650, USA |
* |-----------------------------------------------------------|
*/
/* $Header: trig.s,v 3000.3.1.2 91/06/10 15:18:21 karen Exp $ */
/* Algorithm from 4.3bsd */
/*
* Original fcsr is saved in t6. Do not use t6 as a temp register!
*/
#include <kxmips.h>
#include "trans.h"
#include "fpieee.h"
#define PIo4 7.8539816339744828E-1
#define OoPIo2 6.3661977236758138E-1
#define PIo2hi 1.5707963109016418
#define PIo2lo 1.5893254712295857E-8
#define Xmax 105414357.85197645
#define half 0.5
#define one 1.0
#define thresh 5.2234479296242364e-01
#define Ymax 2.98156826864790199324e8; /* 2^(53/2)*PI/2 */
#define S0 -1.6666666666666463126E-1
#define S1 8.3333333332992771264E-3
#define S2 -1.9841269816180999116E-4
#define S3 2.7557309793219876880E-6
#define S4 -2.5050225177523807003E-8
#define S5 1.5868926979889205164E-10
#define C0 4.1666666666666504759E-2
#define C1 -1.3888888888865301516E-3
#define C2 2.4801587269650015769E-5
#define C3 -2.7557304623183959811E-7
#define C4 2.0873958177697780076E-9
#define C5 -1.1250289076471311557E-11
#undef FSIZE
#define FSIZE 16
.text .text$trigm
.globl cos /* double cos(double x) */
.ent cos
cos:
.frame sp, FSIZE, ra
subu sp, FSIZE
sw ra, FSIZE-4(sp)
.prologue 1
cfc1 t6, $31 // t6 original fcsr
ctc1 $0, $31 // set round to zero, no traps
li t7, 1
// if |x| >= 2^63 generate _TLOSS and return indefinite
li.d $f16, Ymax
abs.d $f0, $f12
c.olt.d $f0, $f16
li.d $f10, PIo4
bc1f cos_err
/* reduce to [-PI/4,+PI/4] */
c.olt.d $f0, $f10
li.d $f16, Xmax
bc1t cos1 // in range, no reduction necessary
c.olt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, need special
li.d $f18, OoPIo2
bc1f 8f // argument reduction
1: mul.d $f2, $f12, $f18 // round(x/(PI/2))
cvt.w.d $f4, $f2 // ...
cvt.d.w $f2, $f4 // ...
/* f2 <= 2^26 */
li.d $f6, PIo2hi
li.d $f8, PIo2lo
mul.d $f6, $f2 // exact (26 x 26 = 52 bits)
mul.d $f8, $f2 // exact (27 x 26 = 53 bits)
sub.d $f12, $f6 // exact
sub.d $f12, $f8 // exact
mfc1 t0, $f4
addu t7, t0
abs.d $f0, $f12
c.le.d $f0, $f10
and t0, t7, 1
bc1f 1b
and t1, t7, 2
bne t0, 0, cos1
beq t1, 0, sin1
neg.d $f12
b sin1
8: /* |round(x/(PI/2))| > 2^26 or x is NaN */
mfc1 t0, $f13
li t1, 0x7ff00000
and t0, t1
subu t2, t0, (1023+25)<<20
beq t0, t1, 9f
li.d $f2, OoPIo2
li.d $f6, PIo2hi
li.d $f8, PIo2lo
mfc1 t3, $f3
mfc1 t4, $f7
mfc1 t5, $f9
subu t3, t2
addu t4, t2
addu t5, t2
mtc1 t3, $f3
mtc1 t4, $f7
mtc1 t5, $f9
mul.d $f2, $f12
cvt.w.d $f4, $f2
cvt.d.w $f2, $f4
mul.d $f6, $f2 // exact (26 x 26 = 52 bits)
mul.d $f8, $f2 // exact (27 x 26 = 53 bits)
sub.d $f12, $f6 // exact
sub.d $f12, $f8 // exact
abs.d $f0, $f12
c.lt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, continue special
li t0, (1<<20)
bc1f 8b // argument reduction
bne t2, t0, 1b
mfc1 t0, $f4
sll t0, 1
addu t7, t0
b 1b
cos_err:
// |x| >= 2^63
ctc1 t6, $31 // restore original fcsr
li a1, OP_COS // operation code (funtion name index)
jal set_trigm_err
b cosret1
9: /* x is NaN or Infinity */
/* sub.d $f0, $f12, $f12 */
mov.d $f0, $f12
cosret:
ctc1 t6, $31 // restore original fcsr
cosret1:
lw ra, FSIZE-4(sp)
addu sp, FSIZE
j ra
.end cos
.text .text$trigm
.globl sin /* double sin(double x) */
.ent sin
sin:
.frame sp, FSIZE, ra
subu sp, FSIZE
sw ra, FSIZE-4(sp)
.prologue 1
cfc1 t6, $31 // t6 original fcsr
ctc1 $0, $31 // set round to zero, no traps
li t7, 0
// if |x| >= 2^63 generate _TLOSS and return indefinite
li.d $f16, Ymax
abs.d $f0, $f12
c.olt.d $f0, $f16
li.d $f10, PIo4
bc1f sin_err
/* reduce to [-PI/4,+PI/4] */
c.olt.d $f0, $f10
li.d $f16, Xmax
bc1t sin1 // in range, no reduction necessary
c.olt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, need special
li.d $f18, OoPIo2
bc1f 8f // argument reduction
1: mul.d $f2, $f12, $f18
cvt.w.d $f4, $f2
cvt.d.w $f2, $f4
/* f2 <= 2^26 */
li.d $f6, PIo2hi
li.d $f8, PIo2lo
mul.d $f6, $f2 // exact (26 x 26 = 52 bits)
mul.d $f8, $f2 // exact (27 x 26 = 53 bits)
sub.d $f12, $f6 // exact
sub.d $f12, $f8 // exact
mfc1 t0, $f4
addu t7, t0
abs.d $f0, $f12
c.le.d $f0, $f10
and t0, t7, 1
bc1f 1b
and t1, t7, 2
bne t0, 0, cos1
beq t1, 0, 2f
neg.d $f12
2:
sin1: /* compute sin(x) for x in [-PI/4,PI/4] */
/* z = x*x, sin(x) = x + x*z*(S0+z*(S1+z*(S2+z*(S3+z*(S4+z*S5))))) */
mul.d $f8, $f12, $f12
li.d $f0, S5
mul.d $f0, $f8
li.d $f4, S4
add.d $f0, $f4
mul.d $f0, $f8
li.d $f6, S3
add.d $f0, $f6
mul.d $f0, $f8
li.d $f4, S2
add.d $f0, $f4
mul.d $f0, $f8
li.d $f6, S1
add.d $f0, $f6
mul.d $f0, $f8
li.d $f4, S0
add.d $f0, $f4
mul.d $f0, $f8
mul.d $f0, $f12
add.d $f0, $f12
b sinret
cos1: /* compute cos(x) for x in [-PI/4,PI/4] */
/* z = x*x, c = z*z*(C0+z*(C1+z*(C2+z*(C3+z*(C4+z*C5))))) */
mul.d $f8, $f12, $f12
li.d $f0, C5
mul.d $f0, $f8
li.d $f4, C4
add.d $f0, $f4
mul.d $f0, $f8
li.d $f6, C3
add.d $f0, $f6
mul.d $f0, $f8
li.d $f4, C2
add.d $f0, $f4
mul.d $f0, $f8
li.d $f6, C1
add.d $f0, $f6
mul.d $f0, $f8
li.d $f4, C0
add.d $f0, $f4
mul.d $f0, $f8
mul.d $f0, $f8
li.d $f6, thresh
li.d $f16, 0.5
c.lt.d $f8, $f6
mul.d $f14 $f16, $f8
bc1t 4f
/* z >= thresh, cos(x) = 0.5-((z/2-0.5)-c) */
sub.d $f8, $f14, $f16
sub.d $f8, $f0
b 5f
4: /* z < thresh, cos(x) = 1.0-(z/2-c) */
li.d $f16, one
sub.d $f8, $f14, $f0
5:
and t0, t7, 2
bne t0, 0, 6f
sub.d $f0, $f16, $f8
b sinret
6: sub.d $f0, $f8, $f16
b sinret
8: /* |round(x/(PI/2))| > 2^26 or x is NaN */
mfc1 t0, $f13
li t1, 0x7ff00000
and t0, t1
subu t2, t0, (1023+25)<<20
beq t0, t1, 9f
li.d $f2, OoPIo2
li.d $f6, PIo2hi
li.d $f8, PIo2lo
mfc1 t3, $f3
mfc1 t4, $f7
mfc1 t5, $f9
subu t3, t2
addu t4, t2
addu t5, t2
mtc1 t3, $f3
mtc1 t4, $f7
mtc1 t5, $f9
mul.d $f2, $f12
cvt.w.d $f4, $f2
cvt.d.w $f2, $f4
mul.d $f6, $f2 // exact (26 x 26 = 52 bits)
mul.d $f8, $f2 // exact (27 x 26 = 53 bits)
sub.d $f12, $f6 // exact
sub.d $f12, $f8 // exact
abs.d $f0, $f12
c.lt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, continue special
li t0, (1<<20)
bc1f 8b // argument reduction
bne t2, t0, 1b
mfc1 t0, $f4
sll t0, 1
addu t7, t0
b 1b
sin_err:
// |x| >= 2^63
ctc1 t6, $31 // restore original fcsr
li a1, OP_SIN // operation code (funtion name index)
jal set_trigm_err
b sinret1
9: /* x is NaN or Infinity */
/* sub.d $f0, $f12, $f12 */
mov.d $f0,$f12
sinret:
ctc1 t6, $31 // restore original fcsr
sinret1:
lw ra, FSIZE-4(sp)
addu sp, FSIZE
j ra
.end sin
/* leave tan in its own section */
.text
.globl tan /* double tan(double x) */
.ent tan
tan:
.frame sp, FSIZE, ra
subu sp, FSIZE
sw ra, FSIZE-4(sp)
.prologue 1
cfc1 t6, $31 // t6 original fcsr
ctc1 $0, $31 // set round to zero, no exceptions
li t7, 0
// if |x| >= 2^63 generate _TLOSS and return indefinite
li.d $f16, Ymax
abs.d $f0, $f12
c.olt.d $f0, $f16
li.d $f10, PIo4
bc1f tan_err
/* reduce to [-PI/4,+PI/4] */
c.olt.d $f0, $f10
li.d $f16, Xmax
bc1t 3f
c.olt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, need special
li.d $f18, OoPIo2
bc1f 8f // argument reduction
1: mul.d $f2, $f12, $f18 // round(x/(PI/2))
cvt.w.d $f4, $f2 // ...
cvt.d.w $f2, $f4 // ...
/* f2 <= 2^26 */
li.d $f6, PIo2hi
li.d $f8, PIo2lo
mul.d $f6, $f2 // exact (26 x 26 = 52 bits)
mul.d $f8, $f2 // exact (27 x 26 = 53 bits)
sub.d $f12, $f6 // exact
sub.d $f12, $f8 // exact
mfc1 t0, $f4
addu t7, t0
abs.d $f0, $f12
c.le.d $f0, $f10
and t0, t7, 1
bc1f 1b
beq t0, 0, 2f
neg.d $f12
2:
3: /* compute sin(x) and cos(x) for x in [-PI/4,PI/4] */
/* z = x*x */
/* (f0) cc = z*z*(C0+z*(C1+z*(C2+z*(C3+z*(C4+z*C5))))) */
/* (f2) ss = z*(S0+z*(S1+z*(S2+z*(S3+z*(S4+z*S5))))) */
mul.d $f8, $f12, $f12
li.d $f2, S5
li.d $f0, C5
mul.d $f2, $f8
mul.d $f0, $f8
li.d $f4, S4
li.d $f6, C4
add.d $f2, $f4
add.d $f0, $f6
mul.d $f2, $f8
mul.d $f0, $f8
li.d $f4, S3
li.d $f6, C3
add.d $f2, $f4
add.d $f0, $f6
mul.d $f2, $f8
mul.d $f0, $f8
li.d $f4, S2
li.d $f6, C2
add.d $f2, $f4
add.d $f0, $f6
mul.d $f2, $f8
mul.d $f0, $f8
li.d $f4, S1
li.d $f6, C1
add.d $f2, $f4
add.d $f0, $f6
mul.d $f2, $f8
mul.d $f0, $f8
li.d $f4, S0
li.d $f6, C0
add.d $f2, $f4
add.d $f0, $f6
mul.d $f2, $f8
mul.d $f0, $f8
mul.d $f0, $f8
li.d $f6, thresh
li.d $f16, 0.5
c.lt.d $f8, $f6
mul.d $f14 $f16, $f8
bc1t 4f
/* z >= thresh, c = 0.5-((z/2-0.5)-cc) */
sub.d $f6, $f14, $f16
sub.d $f6, $f0
b 5f
4: /* z < thresh, c = 1.0-(z/2-cc) */
li.d $f16, one
sub.d $f6, $f14, $f0
5: /* ss in $f2, c in $f6 */
sub.d $f6, $f16, $f6
and t0, t7, 1
bne t0, 0, 6f
/* tan(x) = x + (x*(z/2-(cc-ss)))/c */
sub.d $f4, $f0, $f2
sub.d $f0, $f14, $f4
mul.d $f0, $f12
div.d $f0, $f6
add.d $f0, $f12
b tanret
6: /* tan(x) = c/(x+x*ss) */
mul.d $f2, $f12
add.d $f2, $f12
div.d $f0, $f6, $f2
b tanret
8: /* |round(x/(PI/2))| > 2^26 or x is NaN */
mfc1 t0, $f13
li t1, 0x7ff00000
and t0, t1
subu t2, t0, (1023+25)<<20
beq t0, t1, 9f
li.d $f2, OoPIo2
li.d $f6, PIo2hi
li.d $f8, PIo2lo
mfc1 t3, $f3
mfc1 t4, $f7
mfc1 t5, $f9
subu t3, t2
addu t4, t2
addu t5, t2
mtc1 t3, $f3
mtc1 t4, $f7
mtc1 t5, $f9
mul.d $f2, $f12
cvt.w.d $f4, $f2
cvt.d.w $f2, $f4
mul.d $f6, $f2 // exact (26 x 26 = 52 bits)
mul.d $f8, $f2 // exact (27 x 26 = 53 bits)
sub.d $f12, $f6 // exact
sub.d $f12, $f8 // exact
abs.d $f0, $f12
c.lt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, continue special
li t0, (1<<20)
bc1f 8b /// argument reduction
bne t2, t0, 1b
mfc1 t0, $f4
sll t0, 1
addu t7, t0
b 1b
tan_err:
// |x| >= 2^63
ctc1 t6, $31 // restore original fcsr
li a1, OP_TAN // operation code (funtion name index)
jal set_trigm_err
b tanret1
// REVIEW is this correct?
9: /* x is NaN or Infinity */
sub.d $f0, $f12, $f12
mov.d $f2, $f0
tanret:
ctc1 t6, $31 // restore original fcsr
tanret1:
lw ra, FSIZE-4(sp)
addu sp, FSIZE
j ra
.end tan
.extern _except1
.text .text$trigm
.ent set_trigm_err
set_trigm_err:
#undef FSIZE
#define FSIZE 48
.frame sp, FSIZE, ra
.mask 0x80000000, -4
subu sp, FSIZE
sw ra, FSIZE-4(sp)
.prologue 1
li a0, (FP_TLOSS | FP_I) // exception mask
// a1 passed by caller = operation code (funtion name index)
mfc1.d a2, $f12 // arg1
li.d $f0, 0.0 // generate a NaN
div.d $f0, $f0
s.d $f0, 16(sp) // default result
cfc1 t7, $31 // fp control/status register
xor t7, t7, 0xf80 // inverse exception enable bits
sw t7, 24(sp) // goes on parameter stack
jal _except1
lw ra, FSIZE-4(sp)
addu sp, FSIZE
j ra
#undef FSIZE
.end set_trigm_err