mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
776 lines
14 KiB
776 lines
14 KiB
/*
|
|
* |-----------------------------------------------------------|
|
|
* | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. |
|
|
* | All Rights Reserved |
|
|
* |-----------------------------------------------------------|
|
|
* | Restricted Rights Legend |
|
|
* | Use, duplication, or disclosure by the Government is |
|
|
* | subject to restrictions as set forth in |
|
|
* | subparagraph (c)(1)(ii) of the Rights in Technical |
|
|
* | Data and Computer Software Clause of DFARS 252.227-7013. |
|
|
* | MIPS Computer Systems, Inc. |
|
|
* | 950 DeGuigne Avenue |
|
|
* | Sunnyvale, California 94088-3650, USA |
|
|
* |-----------------------------------------------------------|
|
|
*/
|
|
/* $Header: pow.s,v 3000.4.1.15 92/02/13 10:37:44 zaineb Exp $ */
|
|
|
|
/* Algorithm from
|
|
"Table-driven Implementation of the Power Function
|
|
in IEEE Floating-Point Arithmetic", Peter Tang and Earl Killian
|
|
Coded in MIPS assembler by Earl Killian.
|
|
*/
|
|
|
|
/*
|
|
Jun-06-94 Xzero not always setting $f0 before calling _set_pow_err.
|
|
*/
|
|
|
|
.globl pow /* double pow(double x, double y) */
|
|
|
|
#include <kxmips.h>
|
|
#include <trans.h>
|
|
#include <fpieee.h>
|
|
|
|
|
|
#define OVERFLOW_EXC_BIT 0x4000
|
|
#define UNDERFLOW_EXC_BIT 0x2000
|
|
|
|
.extern _logtable
|
|
.extern _exptable
|
|
|
|
.extern _d_ind 8
|
|
#define D_IND _d_ind
|
|
|
|
.text
|
|
|
|
.ent pow
|
|
pow:
|
|
#define FSIZE 64
|
|
.frame sp, FSIZE, ra
|
|
.mask 0x80000000, -4
|
|
subu sp, FSIZE
|
|
sw ra, FSIZE-4(sp)
|
|
.prologue 1
|
|
|
|
#define X $f12
|
|
#define Xhi $f13
|
|
#define Xlo $f12
|
|
#define Y $f14
|
|
#define Yhi $f15
|
|
#define Ylo $f14
|
|
|
|
#define one $f18
|
|
|
|
.set noreorder
|
|
c.un.d X, Y /* test if either X or Y is NaN */
|
|
s.d $f12, 5*8(sp)
|
|
mfc1 t3, Yhi
|
|
mfc1 t2, Ylo
|
|
mfc1 t1, Xhi
|
|
mfc1 t0, Xlo
|
|
li.d one, 1.0
|
|
bc1t XYNaN
|
|
nop
|
|
|
|
bne t2, 0, 10f /* fast test to rule out special cases */
|
|
sll t4, t3, 12 /* that is Y != +-0.0, +-1.0, 2.0, Infinity */
|
|
bne t4, 0, 10f
|
|
sll t8, t3, 1
|
|
/* Y may be a special case */
|
|
beq t8, 0, retOne /* Y = +-0 */
|
|
li t7, 0x3ff00000
|
|
beq t3, t7, retX /* Y = 1 */
|
|
li t8, 0x40000000
|
|
beq t3, t8, retXsq /* Y = 2 */
|
|
sll t4, t3, 1
|
|
li t8, 0x7ff00000<<1
|
|
beq t4, t8, Yinfinite /* Y = +-Infinity */
|
|
nop
|
|
10:
|
|
cfc1 t6, $31 /* save rounding mode, etc. */
|
|
ctc1 $0, $31 /* set round to nearest */
|
|
|
|
bltz t1, Xnegative
|
|
li t5, 0 /* result sign */
|
|
12:
|
|
bne t0, 0, 14f /* eliminate X = 0 and X = Infinity */
|
|
li t7, 0x7ff00000
|
|
beq t1, 0, Xzero /* X = +-0 */
|
|
nop
|
|
beq t1, t7, Xinfinite /* X = Infinity */
|
|
nop
|
|
|
|
.set reorder
|
|
|
|
14: /* save registers */
|
|
s.d $f20, 0*8(sp)
|
|
s.d $f22, 1*8(sp)
|
|
s.d $f24, 2*8(sp)
|
|
/*s.d $f30, 5*8(sp)*/
|
|
|
|
abs.d $f4, Y
|
|
li.d $f6, 3.1965771613006643e18
|
|
c.lt.d $f6, $f4
|
|
li.d $f0, 0.984375
|
|
li.d $f2, 1.015625
|
|
bc1t Ybig
|
|
|
|
/* finally we've eliminated all the special cases and can get
|
|
down to computing X^Y */
|
|
|
|
/* Procedure L */
|
|
/* outputs */
|
|
#define z1 $f0
|
|
#define z2 $f2
|
|
|
|
/* save more registers */
|
|
s.d $f26, 3*8(sp)
|
|
s.d $f28, 4*8(sp)
|
|
.fmask 0x3FF00000, -FSIZE
|
|
c.lt.d $f0, X
|
|
bc1f Lnormal
|
|
c.lt.d X, $f2
|
|
bc1f Lnormal
|
|
|
|
/* Procedure Lsmall */
|
|
|
|
#define g $f6
|
|
#define f $f0
|
|
#define f1 $f2
|
|
#define F2 $f4
|
|
#define u $f8
|
|
#define u1 $f20
|
|
#define v $f10
|
|
#define q $f22
|
|
#define c1 $f16
|
|
#define c2 $f18
|
|
|
|
add.d g, X, one /* g = 1.0 / (1.0 + X) */
|
|
div.d g, one, g
|
|
sub.d f, X, one /* f = X - 1.0 */
|
|
cvt.s.d f1, f /* f1 = (float)f */
|
|
cvt.d.s f1
|
|
sub.d F2, f, f1 /* f2 = f - f1 */
|
|
add.d u, f, f /* u = 2 * f * g */
|
|
mul.d u, g
|
|
mul.d v, u, u /* v = u * u */
|
|
cvt.s.d u1, u /* u1 = (float)u */
|
|
cvt.d.s u1
|
|
|
|
/* q = u * (v * (C1 + v * (C2 + v * (C3 + v * (C4 + v * C5))))) */
|
|
|
|
li.d c2, 4.4072021372392785e-04 /* C5 */
|
|
mul.d q, v, c2
|
|
add.d q, c2
|
|
li.d c1, 2.2321412321046185e-03 /* C3 */
|
|
mul.d q, v
|
|
add.d q, c1
|
|
li.d c2, 1.2500000000155512e-02 /* C2 */
|
|
mul.d q, v
|
|
add.d q, c2
|
|
li.d c1, 8.3333333333333329e-02 /* C1 */
|
|
mul.d q, v
|
|
add.d q, c1
|
|
mul.d q, v
|
|
mul.d q, u
|
|
|
|
sub.d f, u1 /* u2 = 2 * (f - u1) */
|
|
add.d f, f
|
|
mul.d f1, u1 /* u2 = u2 - u1*f1 * u1*f2 */
|
|
mul.d F2, u1
|
|
sub.d f, f1
|
|
sub.d f, F2
|
|
|
|
mul.d z2, g, z1 /* z2 = g * u2 + q */
|
|
add.d z2, q
|
|
add.d z1, u1, z2 /* z1 = (float)(u1 + z2) */
|
|
cvt.s.d z1
|
|
cvt.d.s z1
|
|
sub.d u1, z1 /* z2 = z2 + (u1 - z1) */
|
|
add.d z2, u1
|
|
|
|
j M
|
|
|
|
#undef g
|
|
#undef f
|
|
#undef f1
|
|
#undef F2
|
|
#undef u
|
|
#undef u1
|
|
#undef v
|
|
#undef q
|
|
#undef c1
|
|
#undef c2
|
|
|
|
|
|
/* Procedure Lnormal */
|
|
|
|
#define l1 $f0
|
|
#define l2 $f2
|
|
#define F $f8
|
|
#define f $f4
|
|
#define g $f6
|
|
#define u $f20
|
|
#define m $f2
|
|
#define c0 $f22
|
|
#define d0 $f0
|
|
#define d1 $f2
|
|
#define d2 $f10
|
|
#define d3 $f16
|
|
#define d4 $f26
|
|
#define d5 $f28
|
|
#define v $f10
|
|
#define u1 $f16
|
|
#define f1 $f22
|
|
#define F2 $f24
|
|
#define u2 $f26
|
|
#define d6 $f28
|
|
#define d7 $f26
|
|
#define d8 $f6
|
|
#define d9 $f10
|
|
#define d10 $f12
|
|
#define d11 $f18
|
|
#define q $f6
|
|
#define d12 $f4
|
|
#define d13 $f8
|
|
#define d14 $f10
|
|
#define d15 $f18
|
|
#define c1 $f18
|
|
#define c2 $f12
|
|
|
|
Lnormal:
|
|
li.d d9, 3.5184372088832000e+13 /* 2^(52-7) */
|
|
srl t8, t1, 20
|
|
beq t8, 0, Xdenorm
|
|
|
|
subu t2, t8, 1023
|
|
sll t3, t2, 20
|
|
subu t1, t3
|
|
mtc1 t1, Xhi
|
|
16: /* Xdenorm returns here */
|
|
add.d d0, X, d9 /* j = rint(x * 128) */
|
|
mfc1 t1, d0
|
|
sll t1, 4
|
|
sub.d F, d0, d9 /* F = j/128 */
|
|
add.d d1, F, X /* g = 1.0 / (F + x) */
|
|
div.d g, one, d1
|
|
sub.d f, X, F /* f = x - F */
|
|
la t4, _logtable
|
|
addu t1, t4
|
|
l.d d2, 128*16+0(t4) /* log2head */
|
|
l.d d3, 128*16+8(t4) /* log2trail */
|
|
mtc1 t2, m
|
|
cvt.d.w m /* m */
|
|
mul.d l1, m, d2 /* a1 = m * log2lead */
|
|
mul.d l2, m, d3 /* a2 = m * log2trail */
|
|
l.d d4, -128*16+0(t1)
|
|
l.d d5, -128*16+8(t1)
|
|
add.d l1, d4 /* a1 = a1 + logtable[j-128] */
|
|
add.d l2, d5 /* a2 = a2 + logtable[j-128] */
|
|
|
|
add.d u, f, f /* u = 2 * f * g */
|
|
mul.d u, g
|
|
cvt.s.d u1, u /* u1 = (float)u */
|
|
cvt.d.s u1
|
|
mul.d v, u, u /* v = u * u */
|
|
|
|
mul.d c0, Y, l1 /* c = abs(Y * a1) */
|
|
abs.d c0
|
|
li.d d7, 16.0
|
|
c.lt.d c0, d7
|
|
bc1t 20f
|
|
|
|
/* c >= 16.0 */
|
|
|
|
cvt.s.d f1, f /* f1 = (float)f */
|
|
cvt.d.s f1
|
|
sub.d F2, f, f1 /* f2 = f - f1 */
|
|
|
|
mul.d u2, u1, F /* u2 = 2 * (f - u1 * F) */
|
|
sub.d u2, f, u2
|
|
add.d u2, u2
|
|
mul.d d15, u1, f1 /* u2 = u2 - u1 * f1 */
|
|
sub.d u2, d15
|
|
mul.d d6, u1, F2 /* u2 = u2 - u1 * f2 */
|
|
sub.d u2, d6
|
|
mul.d u2, g /* u2 = u2 * g */
|
|
|
|
/* q = u * (v * (A1 + v * (A2 + v * A3))) */
|
|
li.d q, 2.2321229798769144e-03 /* A3 */
|
|
li.d c2, 1.2500000000716587e-02 /* A2 */
|
|
mul.d q, v
|
|
add.d q, c2
|
|
li.d c1, 8.3333333333333329e-02 /* A1 */
|
|
mul.d q, v
|
|
add.d q, c1
|
|
|
|
j 30f
|
|
|
|
20:
|
|
/* c < 16.0 */
|
|
|
|
/* u2 = g * (2 * (f - u1 * F) - u1 * f) */
|
|
mul.d d10, u1, F
|
|
sub.d d10, f, d10
|
|
add.d d10, d10
|
|
mul.d d11, u1, f
|
|
sub.d d10, d11
|
|
mul.d u2, d10, g
|
|
li.d d8, 0.125
|
|
c.lt.d c0, d8
|
|
bc1t 25f
|
|
|
|
/* c >= 0.125 */
|
|
|
|
/* q = u * (v * (A1 + v * (A2 + v * A3))) */
|
|
li.d q, 2.2321229798769144e-03 /* A3 */
|
|
li.d c2, 1.2500000000716587e-02 /* A2 */
|
|
mul.d q, v
|
|
add.d q, c2
|
|
li.d c1, 8.3333333333333329e-02 /* A1 */
|
|
mul.d q, v
|
|
add.d q, c1
|
|
j 30f
|
|
|
|
25:
|
|
/* c < 0.125 */
|
|
|
|
/* q = u * (v * (B1 + v * B2)) */
|
|
li.d q, 1.2500055860192138e-02 /* B2 */
|
|
li.d c1, 8.3333333333008588e-02 /* B1 */
|
|
mul.d q, v
|
|
add.d q, c1
|
|
|
|
30:
|
|
mul.d q, v
|
|
mul.d q, u
|
|
|
|
add.d d12, l1, u1 /* t = a1 + u1 */
|
|
sub.d d13, l1, d12 /* a2 = a2 + ((a1 - t) + u1) */
|
|
add.d d13, u1
|
|
add.d l2, d13
|
|
add.d q, u2 /* p = u2 + q */
|
|
add.d z2, q, l2 /* z2 = p + a2 */
|
|
add.d z1, d12, z2 /* z1 = (float)(t + z2) */
|
|
cvt.s.d z1
|
|
cvt.d.s z1
|
|
sub.d d14, d12, z1 /* z2 = z2 + (t - z1) */
|
|
add.d z2, d14
|
|
|
|
#undef l1
|
|
#undef l2
|
|
#undef F
|
|
#undef f
|
|
#undef g
|
|
#undef u
|
|
#undef m
|
|
#undef c
|
|
#undef d0
|
|
#undef d1
|
|
#undef d2
|
|
#undef d3
|
|
#undef d4
|
|
#undef d5
|
|
#undef v
|
|
#undef u1
|
|
#undef f1
|
|
#undef F2
|
|
#undef u2
|
|
#undef d6
|
|
#undef d7
|
|
#undef d8
|
|
#undef d9
|
|
#undef d10
|
|
#undef d11
|
|
#undef q
|
|
#undef d12
|
|
#undef d13
|
|
#undef d14
|
|
#undef c1
|
|
#undef c2
|
|
|
|
|
|
M: /* restore registers */
|
|
l.d $f20, 0*8(sp)
|
|
l.d $f22, 1*8(sp)
|
|
l.d $f24, 2*8(sp)
|
|
l.d $f26, 3*8(sp)
|
|
l.d $f28, 4*8(sp)
|
|
/*l.d $f30, 5*8(sp)*/
|
|
|
|
/* Procedure M */
|
|
|
|
#define w1 $f6
|
|
#define w2 $f8
|
|
#define y1 $f16
|
|
#define y2 $f18
|
|
#define d0 $f12
|
|
#define d1 $f4
|
|
cvt.s.d y1, Y
|
|
cvt.d.s y1
|
|
sub.d y2, Y, y1
|
|
mul.d d0, y2, z1
|
|
mul.d d1, y2, z2
|
|
mul.d w2, y1, z2
|
|
add.d d0, d1
|
|
mul.d w1, y1, z1
|
|
add.d w2, d0
|
|
|
|
/* Procedure E */
|
|
|
|
li.d $f10, 4.6166241308446828e+01 /* Inv_L */
|
|
mul.d $f10, w1
|
|
|
|
/* Check for gross overflow or underflow. */
|
|
li.d $f16, 2000.0
|
|
neg.d $f18, $f16
|
|
c.lt.d w1, $f16
|
|
bc1f Overflow
|
|
c.lt.d w1, $f18
|
|
bc1t Underflow
|
|
|
|
cvt.w.d $f10
|
|
mfc1 t0, $f10
|
|
and t1, t0, 31 /* region */
|
|
sra t2, t0, 5 /* scale */
|
|
cvt.d.w $f12, $f10
|
|
li.d $f2, 2.1660849390173098e-02 /* L1 */
|
|
mul.d $f2, $f12
|
|
sub.d $f10, w1, $f2
|
|
add.d $f10, w2
|
|
li.d $f4, 2.3251928468788740e-12 /* L2 */
|
|
mul.d $f4, $f12
|
|
sub.d $f10, $f4
|
|
|
|
li.d $f0, 1.3888944287816253e-03 /* P5 */
|
|
li.d $f2, 8.3333703801026920e-03 /* P4 */
|
|
mul.d $f0, $f10
|
|
li.d $f4, 4.1666666666361998e-02 /* P3 */
|
|
add.d $f0, $f2
|
|
mul.d $f0, $f10
|
|
li.d $f2, 1.6666666666505991e-01 /* P2 */
|
|
add.d $f0, $f4
|
|
mul.d $f0, $f10
|
|
li.d $f4, 5.0000000000000000e-01 /* P1 */
|
|
add.d $f0, $f2
|
|
mul.d $f0, $f10
|
|
add.d $f0, $f4
|
|
mul.d $f0, $f10
|
|
mul.d $f0, $f10
|
|
add.d $f0, $f10
|
|
|
|
addu t4, t2, 1023
|
|
sll t3, t4, 20
|
|
or t3, t5
|
|
mtc1 t3, $f9
|
|
mtc1 $0, $f8
|
|
|
|
sll t1, 4
|
|
la t1, _exptable(t1)
|
|
l.d $f2, 0(t1)
|
|
l.d $f4, 8(t1)
|
|
add.d $f6, $f2, $f4
|
|
|
|
mul.d $f0, $f6
|
|
add.d $f0, $f4
|
|
|
|
.set noreorder
|
|
ctc1 t6, $31 /* restore rounding mode */
|
|
slt t7, t4, 2047
|
|
blez t4, 90f
|
|
add.d $f0, $f2 /* add in high part */
|
|
beq t7, 0, 90f
|
|
nop
|
|
mul.d $f0, $f8 /* sign and exponent */
|
|
cfc1 t0, $31 /* Check for overflow/underflow */
|
|
nop
|
|
and t1, t0, OVERFLOW_EXC_BIT
|
|
bne t1, 0, Overflow
|
|
and t2, t0, UNDERFLOW_EXC_BIT
|
|
bne t2, 0, Underflow
|
|
nop
|
|
j ret
|
|
nop
|
|
|
|
90: /* scale is outside of 2^-1022 to 2^1023 -- do it the slow way */
|
|
|
|
mfc1 t0, $f1 /* get result high word */
|
|
nop
|
|
srl t1, t0, 20
|
|
addu t1, t2 /* add scale to check for denorm */
|
|
blez t1, 92f
|
|
slt t7, t1, 2047
|
|
beq t7, 0, Overflow
|
|
sll t2, 20
|
|
addu t0, t2 /* add scale */
|
|
or t0, t5 /* add sign */
|
|
mtc1 t0, $f1 /* put back in result high word */
|
|
nop
|
|
j ret
|
|
nop
|
|
92: /* denorm result */
|
|
addu t1, 64
|
|
blez t1, Underflow
|
|
addu t2, 64
|
|
sll t2, 20
|
|
addu t0, t2
|
|
mtc1 t0, $f1
|
|
li.d $f2, 5.4210108624275222e-20 /* 2^-64 */
|
|
nop
|
|
mul.d $f0, $f2
|
|
cfc1 t0, $31 /* Check for overflow/underflow */
|
|
nop
|
|
and t1, t0, OVERFLOW_EXC_BIT
|
|
bne t1, 0, Overflow
|
|
and t2, t0, UNDERFLOW_EXC_BIT
|
|
bne t2, 0, Underflow
|
|
nop
|
|
j ret
|
|
nop
|
|
|
|
Ybig:
|
|
li.d $f2, 1.0
|
|
abs.d $f0, X
|
|
c.eq.d $f0, $f2
|
|
nop
|
|
bc1t retOne
|
|
c.lt.d $f0, $f2
|
|
bc1t Underflow
|
|
nop
|
|
/* fall through to Overflow */
|
|
|
|
Overflow:
|
|
ctc1 t6, $31 /* restore rounding mode */
|
|
94: li.d $f0, 8.9884656743115795e+307
|
|
bne t5, 0, 96f
|
|
mov.d $f0, Y
|
|
li.d $f2, 0.0
|
|
l.d $f12,5*8(sp)
|
|
c.le.d $f0, $f2
|
|
nop
|
|
bc1t Underf
|
|
nop
|
|
li t8, FP_O
|
|
j Calerr
|
|
nop
|
|
Underf:
|
|
li t8, FP_U
|
|
Calerr:
|
|
jal set_pow_err
|
|
nop
|
|
j ret
|
|
mul.d $f0, $f0
|
|
|
|
96: neg.d $f2, $f0
|
|
l.d $f12,5*8(sp)
|
|
li t8, FP_O
|
|
jal set_pow_err
|
|
mul.d $f0, $f2
|
|
j ret
|
|
nop
|
|
|
|
Underflow:
|
|
li.d $f0, 0.0
|
|
beq t5, 0, 1f
|
|
ctc1 t6, $31
|
|
nop
|
|
neg.d $f0
|
|
1:
|
|
l.d $f12,5*8(sp)
|
|
li t8, FP_U
|
|
jal set_pow_err
|
|
nop
|
|
j ret
|
|
nop
|
|
|
|
.set reorder
|
|
Xdenorm:
|
|
li.d $f0, 1.8446744073709552e+19 /* 2^64 */
|
|
mul.d X, $f0
|
|
mfc1 t0, Xhi
|
|
sll t1, t0, 1
|
|
srl t1, 21
|
|
subu t2, t1, 1023
|
|
sll t3, t2, 20
|
|
subu t0, t3
|
|
mtc1 t0, Xhi
|
|
subu t2, 64
|
|
j 16b
|
|
|
|
.set noreorder
|
|
Xnegative:
|
|
li.d $f2, 9.0071992547409920e+15 /* 2^53 */
|
|
abs.d $f0, Y
|
|
neg.d X
|
|
c.lt.d $f0, $f2
|
|
add.d $f4, $f2, $f0
|
|
mfc1 t1, Xhi
|
|
bc1f 12b /* if abs(Y) >= 2^53, then it is an even
|
|
integer */
|
|
sub.d $f4, $f2
|
|
c.eq.d $f4, $f0
|
|
li.d $f2, 4.5035996273704960e+15 /* 2^52 */
|
|
bc1t 12b /* if (abs(Y)+2^53)-2^53 = Y then it is
|
|
an even integer */
|
|
c.lt.d $f0, $f2
|
|
add.d $f4, $f2, $f0
|
|
li t5, 0x80000000 /* result is negative */
|
|
bc1f 12b /* if abs(Y) >= 2^52, then it is an integer */
|
|
sub.d $f4, $f2
|
|
c.eq.d $f4, $f0
|
|
nop
|
|
bc1t 12b /* if (abs(Y)+2^52)-2^52 = Y then it is
|
|
an integer */
|
|
nop
|
|
/* Y is not an integer */
|
|
bne t1, 0, retNaN
|
|
ctc1 t6, $31
|
|
bne t0, 0, retNaN
|
|
nop
|
|
bgez t3, 1f
|
|
mov.d $f0, X /* +0 */
|
|
div.d $f0, one, $f0 /* -Infinity */
|
|
nop
|
|
1: j ret
|
|
nop
|
|
|
|
Xzero: /* X = +-0 */
|
|
/* t3 = Yhi */
|
|
/* t2 = Ylo */
|
|
bnez t2, 1f
|
|
nop
|
|
beqz t3, retOne
|
|
nop
|
|
1:
|
|
bgtz t3, retZero
|
|
nop
|
|
div.d $f0, one, X
|
|
bnel t5, 0, 2f
|
|
neg.d $f0
|
|
2: li t8, FP_Z
|
|
jal set_pow_err
|
|
nop
|
|
j ret
|
|
nop
|
|
|
|
Xinfinite:
|
|
bgez t3, 1f
|
|
ctc1 t6, $31
|
|
nop
|
|
div.d X, one, X
|
|
1:
|
|
beq t5, 0, retX
|
|
nop
|
|
neg.d $f0, X
|
|
j ret
|
|
nop
|
|
|
|
/* below here, FCSR does not need to be restored */
|
|
|
|
XYNaN:
|
|
bne t2, 0, 1f
|
|
c.eq.d X, X
|
|
sll t8, t3, 1
|
|
beq t8, 0, retOne
|
|
1: nop
|
|
bc1f retX
|
|
mov.d $f0, Y
|
|
nop
|
|
j ret
|
|
nop
|
|
|
|
Yinfinite:
|
|
abs.d $f0, X
|
|
c.eq.d $f0, one
|
|
bc1t retNaN
|
|
c.lt.d $f0, one
|
|
nop
|
|
bc1t 1f
|
|
nop
|
|
bltz t3, retZero
|
|
nop
|
|
mov.d $f0, Y
|
|
nop
|
|
j ret
|
|
nop
|
|
1: bgez t3, retZero
|
|
nop
|
|
neg.d $f0, Y
|
|
nop
|
|
j ret
|
|
nop
|
|
|
|
.set reorder
|
|
retNaN:
|
|
li.d $f0, 0.0 // generate a NaN
|
|
div.d $f0, $f0
|
|
l.d Y, 5*8(sp) // restore the second argument
|
|
li t8, FP_I
|
|
jal set_pow_err
|
|
j ret
|
|
|
|
retXsq:
|
|
/* t1 = Xhi */
|
|
/* t0 = Xlo */
|
|
bnez t0, 1f
|
|
beqz t1, retZero /* x = +-0 */
|
|
1:
|
|
mul.d $f0, X, X
|
|
j ret
|
|
|
|
retOne:
|
|
mov.d $f0, one
|
|
j ret
|
|
|
|
retZero:
|
|
li.d $f0, 0.0
|
|
j ret
|
|
|
|
retX:
|
|
mov.d $f0, X
|
|
j ret
|
|
|
|
retooX:
|
|
div.d $f0, one, X
|
|
bne t1, 0, ret /* X = +-0 */
|
|
li t8, FP_I
|
|
l.d $f12, 5*8(sp)
|
|
jal set_pow_err
|
|
j ret
|
|
|
|
ret: lw ra, FSIZE-4(sp)
|
|
addu sp, FSIZE
|
|
j ra
|
|
|
|
.end pow
|
|
|
|
|
|
.extern _except2
|
|
|
|
.text
|
|
|
|
.set reorder
|
|
|
|
/* t8 = exception mask, $f0 = default result, $f12 = arg1, $f14 = arg2 */
|
|
.ent set_pow_err
|
|
set_pow_err:
|
|
#define FMSIZE 48
|
|
.frame sp, FMSIZE, ra
|
|
.mask 0x80000000, -4
|
|
subu sp, FMSIZE
|
|
sw ra, FMSIZE-4(sp)
|
|
.prologue 1
|
|
move $4, t8 // exception mask
|
|
li $5, OP_POW // operation code (funtion name index)
|
|
mfc1.d $6, $f12 // arg1
|
|
s.d $f14, 16(sp) // arg2
|
|
s.d $f0, 24(sp) // default result
|
|
cfc1 t7, $31 // floating point control/status register
|
|
xor t7, t7, 0xf80 // inverse exception enable bits
|
|
sw t7, 32(sp)
|
|
jal _except2
|
|
lw ra, FMSIZE-4(sp)
|
|
addu sp, FMSIZE
|
|
j ra
|
|
.end set_pow_err
|
|
|