Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

776 lines
14 KiB

/*
* |-----------------------------------------------------------|
* | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. |
* | All Rights Reserved |
* |-----------------------------------------------------------|
* | Restricted Rights Legend |
* | Use, duplication, or disclosure by the Government is |
* | subject to restrictions as set forth in |
* | subparagraph (c)(1)(ii) of the Rights in Technical |
* | Data and Computer Software Clause of DFARS 252.227-7013. |
* | MIPS Computer Systems, Inc. |
* | 950 DeGuigne Avenue |
* | Sunnyvale, California 94088-3650, USA |
* |-----------------------------------------------------------|
*/
/* $Header: pow.s,v 3000.4.1.15 92/02/13 10:37:44 zaineb Exp $ */
/* Algorithm from
"Table-driven Implementation of the Power Function
in IEEE Floating-Point Arithmetic", Peter Tang and Earl Killian
Coded in MIPS assembler by Earl Killian.
*/
/*
Jun-06-94 Xzero not always setting $f0 before calling _set_pow_err.
*/
.globl pow /* double pow(double x, double y) */
#include <kxmips.h>
#include <trans.h>
#include <fpieee.h>
#define OVERFLOW_EXC_BIT 0x4000
#define UNDERFLOW_EXC_BIT 0x2000
.extern _logtable
.extern _exptable
.extern _d_ind 8
#define D_IND _d_ind
.text
.ent pow
pow:
#define FSIZE 64
.frame sp, FSIZE, ra
.mask 0x80000000, -4
subu sp, FSIZE
sw ra, FSIZE-4(sp)
.prologue 1
#define X $f12
#define Xhi $f13
#define Xlo $f12
#define Y $f14
#define Yhi $f15
#define Ylo $f14
#define one $f18
.set noreorder
c.un.d X, Y /* test if either X or Y is NaN */
s.d $f12, 5*8(sp)
mfc1 t3, Yhi
mfc1 t2, Ylo
mfc1 t1, Xhi
mfc1 t0, Xlo
li.d one, 1.0
bc1t XYNaN
nop
bne t2, 0, 10f /* fast test to rule out special cases */
sll t4, t3, 12 /* that is Y != +-0.0, +-1.0, 2.0, Infinity */
bne t4, 0, 10f
sll t8, t3, 1
/* Y may be a special case */
beq t8, 0, retOne /* Y = +-0 */
li t7, 0x3ff00000
beq t3, t7, retX /* Y = 1 */
li t8, 0x40000000
beq t3, t8, retXsq /* Y = 2 */
sll t4, t3, 1
li t8, 0x7ff00000<<1
beq t4, t8, Yinfinite /* Y = +-Infinity */
nop
10:
cfc1 t6, $31 /* save rounding mode, etc. */
ctc1 $0, $31 /* set round to nearest */
bltz t1, Xnegative
li t5, 0 /* result sign */
12:
bne t0, 0, 14f /* eliminate X = 0 and X = Infinity */
li t7, 0x7ff00000
beq t1, 0, Xzero /* X = +-0 */
nop
beq t1, t7, Xinfinite /* X = Infinity */
nop
.set reorder
14: /* save registers */
s.d $f20, 0*8(sp)
s.d $f22, 1*8(sp)
s.d $f24, 2*8(sp)
/*s.d $f30, 5*8(sp)*/
abs.d $f4, Y
li.d $f6, 3.1965771613006643e18
c.lt.d $f6, $f4
li.d $f0, 0.984375
li.d $f2, 1.015625
bc1t Ybig
/* finally we've eliminated all the special cases and can get
down to computing X^Y */
/* Procedure L */
/* outputs */
#define z1 $f0
#define z2 $f2
/* save more registers */
s.d $f26, 3*8(sp)
s.d $f28, 4*8(sp)
.fmask 0x3FF00000, -FSIZE
c.lt.d $f0, X
bc1f Lnormal
c.lt.d X, $f2
bc1f Lnormal
/* Procedure Lsmall */
#define g $f6
#define f $f0
#define f1 $f2
#define F2 $f4
#define u $f8
#define u1 $f20
#define v $f10
#define q $f22
#define c1 $f16
#define c2 $f18
add.d g, X, one /* g = 1.0 / (1.0 + X) */
div.d g, one, g
sub.d f, X, one /* f = X - 1.0 */
cvt.s.d f1, f /* f1 = (float)f */
cvt.d.s f1
sub.d F2, f, f1 /* f2 = f - f1 */
add.d u, f, f /* u = 2 * f * g */
mul.d u, g
mul.d v, u, u /* v = u * u */
cvt.s.d u1, u /* u1 = (float)u */
cvt.d.s u1
/* q = u * (v * (C1 + v * (C2 + v * (C3 + v * (C4 + v * C5))))) */
li.d c2, 4.4072021372392785e-04 /* C5 */
mul.d q, v, c2
add.d q, c2
li.d c1, 2.2321412321046185e-03 /* C3 */
mul.d q, v
add.d q, c1
li.d c2, 1.2500000000155512e-02 /* C2 */
mul.d q, v
add.d q, c2
li.d c1, 8.3333333333333329e-02 /* C1 */
mul.d q, v
add.d q, c1
mul.d q, v
mul.d q, u
sub.d f, u1 /* u2 = 2 * (f - u1) */
add.d f, f
mul.d f1, u1 /* u2 = u2 - u1*f1 * u1*f2 */
mul.d F2, u1
sub.d f, f1
sub.d f, F2
mul.d z2, g, z1 /* z2 = g * u2 + q */
add.d z2, q
add.d z1, u1, z2 /* z1 = (float)(u1 + z2) */
cvt.s.d z1
cvt.d.s z1
sub.d u1, z1 /* z2 = z2 + (u1 - z1) */
add.d z2, u1
j M
#undef g
#undef f
#undef f1
#undef F2
#undef u
#undef u1
#undef v
#undef q
#undef c1
#undef c2
/* Procedure Lnormal */
#define l1 $f0
#define l2 $f2
#define F $f8
#define f $f4
#define g $f6
#define u $f20
#define m $f2
#define c0 $f22
#define d0 $f0
#define d1 $f2
#define d2 $f10
#define d3 $f16
#define d4 $f26
#define d5 $f28
#define v $f10
#define u1 $f16
#define f1 $f22
#define F2 $f24
#define u2 $f26
#define d6 $f28
#define d7 $f26
#define d8 $f6
#define d9 $f10
#define d10 $f12
#define d11 $f18
#define q $f6
#define d12 $f4
#define d13 $f8
#define d14 $f10
#define d15 $f18
#define c1 $f18
#define c2 $f12
Lnormal:
li.d d9, 3.5184372088832000e+13 /* 2^(52-7) */
srl t8, t1, 20
beq t8, 0, Xdenorm
subu t2, t8, 1023
sll t3, t2, 20
subu t1, t3
mtc1 t1, Xhi
16: /* Xdenorm returns here */
add.d d0, X, d9 /* j = rint(x * 128) */
mfc1 t1, d0
sll t1, 4
sub.d F, d0, d9 /* F = j/128 */
add.d d1, F, X /* g = 1.0 / (F + x) */
div.d g, one, d1
sub.d f, X, F /* f = x - F */
la t4, _logtable
addu t1, t4
l.d d2, 128*16+0(t4) /* log2head */
l.d d3, 128*16+8(t4) /* log2trail */
mtc1 t2, m
cvt.d.w m /* m */
mul.d l1, m, d2 /* a1 = m * log2lead */
mul.d l2, m, d3 /* a2 = m * log2trail */
l.d d4, -128*16+0(t1)
l.d d5, -128*16+8(t1)
add.d l1, d4 /* a1 = a1 + logtable[j-128] */
add.d l2, d5 /* a2 = a2 + logtable[j-128] */
add.d u, f, f /* u = 2 * f * g */
mul.d u, g
cvt.s.d u1, u /* u1 = (float)u */
cvt.d.s u1
mul.d v, u, u /* v = u * u */
mul.d c0, Y, l1 /* c = abs(Y * a1) */
abs.d c0
li.d d7, 16.0
c.lt.d c0, d7
bc1t 20f
/* c >= 16.0 */
cvt.s.d f1, f /* f1 = (float)f */
cvt.d.s f1
sub.d F2, f, f1 /* f2 = f - f1 */
mul.d u2, u1, F /* u2 = 2 * (f - u1 * F) */
sub.d u2, f, u2
add.d u2, u2
mul.d d15, u1, f1 /* u2 = u2 - u1 * f1 */
sub.d u2, d15
mul.d d6, u1, F2 /* u2 = u2 - u1 * f2 */
sub.d u2, d6
mul.d u2, g /* u2 = u2 * g */
/* q = u * (v * (A1 + v * (A2 + v * A3))) */
li.d q, 2.2321229798769144e-03 /* A3 */
li.d c2, 1.2500000000716587e-02 /* A2 */
mul.d q, v
add.d q, c2
li.d c1, 8.3333333333333329e-02 /* A1 */
mul.d q, v
add.d q, c1
j 30f
20:
/* c < 16.0 */
/* u2 = g * (2 * (f - u1 * F) - u1 * f) */
mul.d d10, u1, F
sub.d d10, f, d10
add.d d10, d10
mul.d d11, u1, f
sub.d d10, d11
mul.d u2, d10, g
li.d d8, 0.125
c.lt.d c0, d8
bc1t 25f
/* c >= 0.125 */
/* q = u * (v * (A1 + v * (A2 + v * A3))) */
li.d q, 2.2321229798769144e-03 /* A3 */
li.d c2, 1.2500000000716587e-02 /* A2 */
mul.d q, v
add.d q, c2
li.d c1, 8.3333333333333329e-02 /* A1 */
mul.d q, v
add.d q, c1
j 30f
25:
/* c < 0.125 */
/* q = u * (v * (B1 + v * B2)) */
li.d q, 1.2500055860192138e-02 /* B2 */
li.d c1, 8.3333333333008588e-02 /* B1 */
mul.d q, v
add.d q, c1
30:
mul.d q, v
mul.d q, u
add.d d12, l1, u1 /* t = a1 + u1 */
sub.d d13, l1, d12 /* a2 = a2 + ((a1 - t) + u1) */
add.d d13, u1
add.d l2, d13
add.d q, u2 /* p = u2 + q */
add.d z2, q, l2 /* z2 = p + a2 */
add.d z1, d12, z2 /* z1 = (float)(t + z2) */
cvt.s.d z1
cvt.d.s z1
sub.d d14, d12, z1 /* z2 = z2 + (t - z1) */
add.d z2, d14
#undef l1
#undef l2
#undef F
#undef f
#undef g
#undef u
#undef m
#undef c
#undef d0
#undef d1
#undef d2
#undef d3
#undef d4
#undef d5
#undef v
#undef u1
#undef f1
#undef F2
#undef u2
#undef d6
#undef d7
#undef d8
#undef d9
#undef d10
#undef d11
#undef q
#undef d12
#undef d13
#undef d14
#undef c1
#undef c2
M: /* restore registers */
l.d $f20, 0*8(sp)
l.d $f22, 1*8(sp)
l.d $f24, 2*8(sp)
l.d $f26, 3*8(sp)
l.d $f28, 4*8(sp)
/*l.d $f30, 5*8(sp)*/
/* Procedure M */
#define w1 $f6
#define w2 $f8
#define y1 $f16
#define y2 $f18
#define d0 $f12
#define d1 $f4
cvt.s.d y1, Y
cvt.d.s y1
sub.d y2, Y, y1
mul.d d0, y2, z1
mul.d d1, y2, z2
mul.d w2, y1, z2
add.d d0, d1
mul.d w1, y1, z1
add.d w2, d0
/* Procedure E */
li.d $f10, 4.6166241308446828e+01 /* Inv_L */
mul.d $f10, w1
/* Check for gross overflow or underflow. */
li.d $f16, 2000.0
neg.d $f18, $f16
c.lt.d w1, $f16
bc1f Overflow
c.lt.d w1, $f18
bc1t Underflow
cvt.w.d $f10
mfc1 t0, $f10
and t1, t0, 31 /* region */
sra t2, t0, 5 /* scale */
cvt.d.w $f12, $f10
li.d $f2, 2.1660849390173098e-02 /* L1 */
mul.d $f2, $f12
sub.d $f10, w1, $f2
add.d $f10, w2
li.d $f4, 2.3251928468788740e-12 /* L2 */
mul.d $f4, $f12
sub.d $f10, $f4
li.d $f0, 1.3888944287816253e-03 /* P5 */
li.d $f2, 8.3333703801026920e-03 /* P4 */
mul.d $f0, $f10
li.d $f4, 4.1666666666361998e-02 /* P3 */
add.d $f0, $f2
mul.d $f0, $f10
li.d $f2, 1.6666666666505991e-01 /* P2 */
add.d $f0, $f4
mul.d $f0, $f10
li.d $f4, 5.0000000000000000e-01 /* P1 */
add.d $f0, $f2
mul.d $f0, $f10
add.d $f0, $f4
mul.d $f0, $f10
mul.d $f0, $f10
add.d $f0, $f10
addu t4, t2, 1023
sll t3, t4, 20
or t3, t5
mtc1 t3, $f9
mtc1 $0, $f8
sll t1, 4
la t1, _exptable(t1)
l.d $f2, 0(t1)
l.d $f4, 8(t1)
add.d $f6, $f2, $f4
mul.d $f0, $f6
add.d $f0, $f4
.set noreorder
ctc1 t6, $31 /* restore rounding mode */
slt t7, t4, 2047
blez t4, 90f
add.d $f0, $f2 /* add in high part */
beq t7, 0, 90f
nop
mul.d $f0, $f8 /* sign and exponent */
cfc1 t0, $31 /* Check for overflow/underflow */
nop
and t1, t0, OVERFLOW_EXC_BIT
bne t1, 0, Overflow
and t2, t0, UNDERFLOW_EXC_BIT
bne t2, 0, Underflow
nop
j ret
nop
90: /* scale is outside of 2^-1022 to 2^1023 -- do it the slow way */
mfc1 t0, $f1 /* get result high word */
nop
srl t1, t0, 20
addu t1, t2 /* add scale to check for denorm */
blez t1, 92f
slt t7, t1, 2047
beq t7, 0, Overflow
sll t2, 20
addu t0, t2 /* add scale */
or t0, t5 /* add sign */
mtc1 t0, $f1 /* put back in result high word */
nop
j ret
nop
92: /* denorm result */
addu t1, 64
blez t1, Underflow
addu t2, 64
sll t2, 20
addu t0, t2
mtc1 t0, $f1
li.d $f2, 5.4210108624275222e-20 /* 2^-64 */
nop
mul.d $f0, $f2
cfc1 t0, $31 /* Check for overflow/underflow */
nop
and t1, t0, OVERFLOW_EXC_BIT
bne t1, 0, Overflow
and t2, t0, UNDERFLOW_EXC_BIT
bne t2, 0, Underflow
nop
j ret
nop
Ybig:
li.d $f2, 1.0
abs.d $f0, X
c.eq.d $f0, $f2
nop
bc1t retOne
c.lt.d $f0, $f2
bc1t Underflow
nop
/* fall through to Overflow */
Overflow:
ctc1 t6, $31 /* restore rounding mode */
94: li.d $f0, 8.9884656743115795e+307
bne t5, 0, 96f
mov.d $f0, Y
li.d $f2, 0.0
l.d $f12,5*8(sp)
c.le.d $f0, $f2
nop
bc1t Underf
nop
li t8, FP_O
j Calerr
nop
Underf:
li t8, FP_U
Calerr:
jal set_pow_err
nop
j ret
mul.d $f0, $f0
96: neg.d $f2, $f0
l.d $f12,5*8(sp)
li t8, FP_O
jal set_pow_err
mul.d $f0, $f2
j ret
nop
Underflow:
li.d $f0, 0.0
beq t5, 0, 1f
ctc1 t6, $31
nop
neg.d $f0
1:
l.d $f12,5*8(sp)
li t8, FP_U
jal set_pow_err
nop
j ret
nop
.set reorder
Xdenorm:
li.d $f0, 1.8446744073709552e+19 /* 2^64 */
mul.d X, $f0
mfc1 t0, Xhi
sll t1, t0, 1
srl t1, 21
subu t2, t1, 1023
sll t3, t2, 20
subu t0, t3
mtc1 t0, Xhi
subu t2, 64
j 16b
.set noreorder
Xnegative:
li.d $f2, 9.0071992547409920e+15 /* 2^53 */
abs.d $f0, Y
neg.d X
c.lt.d $f0, $f2
add.d $f4, $f2, $f0
mfc1 t1, Xhi
bc1f 12b /* if abs(Y) >= 2^53, then it is an even
integer */
sub.d $f4, $f2
c.eq.d $f4, $f0
li.d $f2, 4.5035996273704960e+15 /* 2^52 */
bc1t 12b /* if (abs(Y)+2^53)-2^53 = Y then it is
an even integer */
c.lt.d $f0, $f2
add.d $f4, $f2, $f0
li t5, 0x80000000 /* result is negative */
bc1f 12b /* if abs(Y) >= 2^52, then it is an integer */
sub.d $f4, $f2
c.eq.d $f4, $f0
nop
bc1t 12b /* if (abs(Y)+2^52)-2^52 = Y then it is
an integer */
nop
/* Y is not an integer */
bne t1, 0, retNaN
ctc1 t6, $31
bne t0, 0, retNaN
nop
bgez t3, 1f
mov.d $f0, X /* +0 */
div.d $f0, one, $f0 /* -Infinity */
nop
1: j ret
nop
Xzero: /* X = +-0 */
/* t3 = Yhi */
/* t2 = Ylo */
bnez t2, 1f
nop
beqz t3, retOne
nop
1:
bgtz t3, retZero
nop
div.d $f0, one, X
bnel t5, 0, 2f
neg.d $f0
2: li t8, FP_Z
jal set_pow_err
nop
j ret
nop
Xinfinite:
bgez t3, 1f
ctc1 t6, $31
nop
div.d X, one, X
1:
beq t5, 0, retX
nop
neg.d $f0, X
j ret
nop
/* below here, FCSR does not need to be restored */
XYNaN:
bne t2, 0, 1f
c.eq.d X, X
sll t8, t3, 1
beq t8, 0, retOne
1: nop
bc1f retX
mov.d $f0, Y
nop
j ret
nop
Yinfinite:
abs.d $f0, X
c.eq.d $f0, one
bc1t retNaN
c.lt.d $f0, one
nop
bc1t 1f
nop
bltz t3, retZero
nop
mov.d $f0, Y
nop
j ret
nop
1: bgez t3, retZero
nop
neg.d $f0, Y
nop
j ret
nop
.set reorder
retNaN:
li.d $f0, 0.0 // generate a NaN
div.d $f0, $f0
l.d Y, 5*8(sp) // restore the second argument
li t8, FP_I
jal set_pow_err
j ret
retXsq:
/* t1 = Xhi */
/* t0 = Xlo */
bnez t0, 1f
beqz t1, retZero /* x = +-0 */
1:
mul.d $f0, X, X
j ret
retOne:
mov.d $f0, one
j ret
retZero:
li.d $f0, 0.0
j ret
retX:
mov.d $f0, X
j ret
retooX:
div.d $f0, one, X
bne t1, 0, ret /* X = +-0 */
li t8, FP_I
l.d $f12, 5*8(sp)
jal set_pow_err
j ret
ret: lw ra, FSIZE-4(sp)
addu sp, FSIZE
j ra
.end pow
.extern _except2
.text
.set reorder
/* t8 = exception mask, $f0 = default result, $f12 = arg1, $f14 = arg2 */
.ent set_pow_err
set_pow_err:
#define FMSIZE 48
.frame sp, FMSIZE, ra
.mask 0x80000000, -4
subu sp, FMSIZE
sw ra, FMSIZE-4(sp)
.prologue 1
move $4, t8 // exception mask
li $5, OP_POW // operation code (funtion name index)
mfc1.d $6, $f12 // arg1
s.d $f14, 16(sp) // arg2
s.d $f0, 24(sp) // default result
cfc1 t7, $31 // floating point control/status register
xor t7, t7, 0xf80 // inverse exception enable bits
sw t7, 32(sp)
jal _except2
lw ra, FMSIZE-4(sp)
addu sp, FMSIZE
j ra
.end set_pow_err