Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

387 lines
8.5 KiB

/*
* |-----------------------------------------------------------|
* | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. |
* | All Rights Reserved |
* |-----------------------------------------------------------|
* | Restricted Rights Legend |
* | Use, duplication, or disclosure by the Government is |
* | subject to restrictions as set forth in |
* | subparagraph (c)(1)(ii) of the Rights in Technical |
* | Data and Computer Software Clause of DFARS 252.227-7013. |
* | MIPS Computer Systems, Inc. |
* | 950 DeGuigne Avenue |
* | Sunnyvale, California 94088-3650, USA |
* |-----------------------------------------------------------|
*/
/* $Header: fmod.s,v 3000.3.1.6 91/10/09 11:14:56 zaineb Exp $ */
.extern _except2
.extern errno 4
#include <kxmips.h>
#include <trans.h>
#include <fpieee.h>
/* double fmod(double x, double y) */
.text .text$fmodm
.ent fmod_small
fmod_small:
.frame sp, 16, ra
.mask 0x80000000, 0
/* y is almost subnormal */
/* f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20,
t3 = fcsr, fcsr = round-to-zero */
/* scale both x and y, compute remainder, and unscale it */
subu sp, 16
sw ra, 16(sp)
.prologue 1
li.d $f18, 1.2474001934591999e+291 /* 2^(1024-57) */
li.d $f16, 1.4411518807585587e+17 /* 2^57 */
c.lt.d $f0, $f18
mul.d $f2, $f16
bc1t 10f
/* x * 2^57 would overflow */
/* first compute with unscaled x to chop it down to size */
li t0, 0
bal fmod1
li t4, 1
ctc1 t4, $31
mfc1 t0, $f12
li.d $f16, 1.4411518807585587e+17 /* 2^57 */
mul.d $f0, $f16
bal fmod2
b 20f
10: mul.d $f0, $f16
bal fmod1
20: li.d $f16, 6.9388939039072284e-18 /* 2^-57 */
mul.d $f0, $f16
lw ra, 16(sp)
addu sp, 16
j ra
.end fmod_small
.text .text$fmodm
.globl fmod
.ent fmod
fmod:
.frame sp, 0, ra
.prologue 0
.set noreorder
c.un.d $f12, $f14 /* x NaN or y NaN? */
dmfc1 t0, $f12 /* sign and exponent of x */
dmfc1 t1, $f14 /* sign and exponent of y */
dsra t0, t0, 32
dsra t1, t1, 32
bc1t 70f
cfc1 t3, $31 /* t3 = fcsr */
abs.d $f0, $f12 /* f0 = |x| */
abs.d $f2, $f14 /* f2 = |y| */
li t2, (2047<<20)
c.lt.d $f0, $f2
and t8, t0, t2 /* check for x = +-Infinity */
and t9, t1, t2
bc1t 30f
li t4, 1
beq t8, t2, 80f
ctc1 t4, $31 /* set round to zero mode */
beq t9, 0, 90f /* y is 0 or subnormal */
li t8, 0x03900000
bleu t9, t8, fmod_small /* almost subnormals */
nop
fmod1: // entry from fmod_subnormal, fmod_small
// f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20,
// t3 = fcsr, fcsr = round-to-zero
20: /* x > y */
/* q = x/y (>= 1.0) */
div.d $f8, $f0, $f2
/* f4 = y with low 27 bits 0 */
dmfc1 t8, $f2
dsra t5, t8, 32
dsrl t8, t8, 27
dsll t8, t8, 27
dmtc1 t8, $f4
dmfc1 t4, $f0
dsra t4, t4, 32
and t4, t2
and t5, t2
subu t4, t5
subu t4, (25<<20)
bgtz t4, 40f
sub.d $f6, $f2, $f4 /* f6 = low 27 bits of y */
22: /* q < 2^26 */
cvt.w.d $f16, $f8 /* truncate */
cvt.d.w $f8, $f16
mul.d $f4, $f8 /* exact (26 x 26 = 52 bits) */
mul.d $f6, $f8 /* exact (27 x 26 = 53 bits) */
sub.d $f0, $f4 /* exact */
sub.d $f0, $f6 /* exact */
fmod2: /* entry from fmod_subnormal and fmod_small */
c.lt.d $f0, $f2
nop
bc1f 20b
nop
.set reorder
30: /* x < y */
/* negate remainder if dividend was negative */
bgez t0, 36f
neg.d $f0
36: ctc1 t3, $31
j ra
40: /* q >= 2^26 */
// REVIEW: use t4 as 64 bit add and avoid shifts/or?
dmfc1 t8, $f2
dsra t9, t8, 32
dsll t8, t8, 32
dsrl t8, t8, 32
addu t9, t4
dsll t9, t9, 32
or t8, t9
dmtc1 t8, $f10
dmfc1 t8, $f4
dsra t9, t8, 32
dsll t8, t8, 32
dsrl t8, t8, 32
addu t9, t4
dsll t9, t9, 32
or t8, t9
dmtc1 t8, $f5
div.d $f8, $f0, $f10
sub.d $f6, $f10, $f4
b 22b
70: /* x NaN or y NaN */
c.eq.d $f12, $f12
bc1t 72f
mov.d $f0, $f12
j ra
72: mov.d $f0, $f14
j ra
80: /* x = +-Infinity */
ctc1 t3, $31
sub.d $f0, $f12, $f12 /* raise Invalid, return NaN */
j ra
90: /* y is zero or subnormal */
// REVIEW: reduce usage to 64-bits and remove shifts
mfc1 t8, $f14
sll t9, t1, 1
bne t9, 0, fmod_subnormal
bne t8, 0, fmod_subnormal
/* y = +-0 */
ctc1 t3, $31
div.d $f0, $f14, $f14 /* raise Invalid, return NaN */
j set_fmod_err
.end fmod
.text .text$fmodm
.ent fmod_subnormal
fmod_subnormal:
.frame sp, 16, ra
.mask 0x80000000, 0
// y is subnormal
// f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20,
// t3 = fcsr, fcsr = round-to-zero
// scale both x and y, compute remainder, and unscale it
subu sp, 16
sw ra, 16(sp)
.prologue 1
li.d $f18, 8.6555775981267394e+273 /* 2^(1024-114) */
li.d $f16, 2.0769187434139311e+34 /* 2^114 */
c.lt.d $f0, $f18
mul.d $f2, $f16
bc1t 10f
/* x * 2^114 would overflow */
/* first compute with unscaled x to chop it down to size */
li t0, 0
bal fmod1
li t4, 1
ctc1 t4, $31
// REVIEW: use 64-bits in t0 for sign?
dmfc1 t0, $f12
dsra t0, t0, 32
li.d $f16, 2.0769187434139311e+34 /* 2^114 */
mul.d $f0, $f16
bal fmod2
b 20f
10: mul.d $f0, $f16
bal fmod1
20: li.d $f16, 4.8148248609680896e-35 /* 2^-114 */
mul.d $f0, $f16
lw ra, 16(sp)
addu sp, 16
j ra
.end fmod_subnormal
/* float fmodf(float x, float y) */
.weakext fmodf, __fmodf
/* float fmodf(float x, float y) */
.text .text$fmodm
.globl fmodf
.ent fmodf
fmodf:
.frame sp, 0, ra
.prologue 0
c.un.s $f12, $f14 # x NaN or y NaN?
cvt.d.s $f0, $f12 # dx = x
cvt.d.s $f2, $f14 # dy = y
bc1t 70f # branch if x or y is a NaN
dmfc1 t8, $f0
dmfc1 t9, $f3
dsra t8, t8, 32
dsra t9, t9, 32
cfc1 t3, $31 # t3 = fcsr
li t2, 0x7ff
sra t8, 20
sra t9, 20
and t8, t2 # t8 = xptx
and t9, t2 # t9 = xpty
beq t8, t2, 80f # branch if x == +/-Inf
li.s $f6, 0.0
c.eq.s $f6, $f14
bc1t 90f # branch if y == +/-0.0
abs.d $f2, $f2 # dy = fabs(dy)
abs.d $f6, $f0 # $f6 = fabs(dx)
c.lt.d $f6, $f2
bc1t 100f # branch if |x| < |y|
and t4, t3, 0xfffc0000
or t4, 1
ctc1 t4, $31 # set round to zero mode with traps disabled
addi t0, t9, 24
bge t8, t0, 30f # branch if xptx >= xpty + 24
/* compute dx = dx - floor(dx/dy)*dy */
div.d $f4, $f0, $f2 # $f4 = nd == dx/dy
cvt.w.d $f4
cvt.d.w $f4 # $f4 = (int)nd
mul.d $f4, $f2 # $f4 = nd*dy
sub.d $f0, $f4 # dx = dx - nd*dy
cvt.s.d $f0 # convert result to single precision
ctc1 t3, $31 # restore rounding mode
j ra
30:
/* scale dy up and compute dx = dx - floor(dx/(k*dy))*k*dy */
mov.d $f10, $f2 # dy1 = dy
dmfc1 t2, $f10
dsll t2, 12
dsrl t2, 12
subu t0, t8, 23
dsll t0, 32+20
or t2, t0
dmtc1 t2, $f10 # xptdy1 = xptx - 23
div.d $f6, $f0, $f10 # $f6 = dx/dy1
cvt.w.d $f6
cvt.d.w $f6
mul.d $f6, $f10 # $f6 = nd*dy1
sub.d $f0, $f6 # dx = dx - nd*dy1
dmfc1 t8, $f0
dsll t8, 1
dsrl t8, 32+21 # update xptx
addi t0, t9, 24
bge t8, t0, 30b # branch if xptx >= xpty + 24
abs.d $f4, $f0
c.lt.d $f4, $f2
bc1f 40f # branch if |dx| >= dy
cvt.s.d $f0 # convert result to single precision
ctc1 t3, $31 # restore rounding mode
j ra
40:
div.d $f4, $f0, $f2 # $f4 = nd == dx/dy
cvt.w.d $f4
cvt.d.w $f4 # $f4 = (int)nd
mul.d $f4, $f2 # $f4 = nd*dy
sub.d $f0, $f4 # dx = dx - nd*dy
cvt.s.d $f0 # convert result to single precision
ctc1 t3, $31 # restore rounding mode
j ra
70: /* x NaN or y NaN */
c.eq.s $f12, $f12
bc1t 72f
mov.s $f0, $f12
j ra
72:
mov.s $f0,$f14
j ra
80: /* x = +-Infinity */
ctc1 t3, $31
sub.s $f0, $f12, $f12 /* raise Invalid, return NaN */
mov.s $f0,$f12
j ra
90: /* y == +/- 0.0 */
ctc1 t3, $31
div.s $f0, $f14, $f14 /* raise Invalid, return NaN */
j ra
100: /* |x| < |y| */
mov.s $f0, $f12 # result = x
j ra
.end fmodf
.text .text$fmodm
.ent set_fmod_err
set_fmod_err:
#define FSIZE 48
.frame sp, FSIZE, ra
.mask 0x80000000, -4
subu sp, FSIZE
sw ra, FSIZE-4(sp)
.prologue 1
li $4, FP_I // exception mask
li $5, OP_FMOD // operation code (funtion name index)
dmfc1 $6, $f12 // arg1
dsrl $7, $6, 32
dsll $6, $6, 32
dsrl $6, $6, 32
s.d $f14, 16(sp) // arg2 (TODO: pass 0.0 as arg2???, see above)
s.d $f0, 24(sp) // default result
cfc1 t7, $31 // floating point control/status register
xor t7, t7, 0xf80 // inverse exception enable bits
sw t7, 32(sp)
jal _except2
lw ra, FSIZE-4(sp)
addu sp, FSIZE
j ra
#undef FSIZE
.end set_fmod_err