/* * |-----------------------------------------------------------| * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | * | All Rights Reserved | * |-----------------------------------------------------------| * | Restricted Rights Legend | * | Use, duplication, or disclosure by the Government is | * | subject to restrictions as set forth in | * | subparagraph (c)(1)(ii) of the Rights in Technical | * | Data and Computer Software Clause of DFARS 252.227-7013. | * | MIPS Computer Systems, Inc. | * | 950 DeGuigne Avenue | * | Sunnyvale, California 94088-3650, USA | * |-----------------------------------------------------------| */ /* $Header: fmod.s,v 3000.3.1.6 91/10/09 11:14:56 zaineb Exp $ */ .extern _except2 .extern errno 4 #include #include #include /* double fmod(double x, double y) */ .text .text$fmodm .ent fmod_small fmod_small: .frame sp, 16, ra .mask 0x80000000, 0 /* y is almost subnormal */ /* f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20, t3 = fcsr, fcsr = round-to-zero */ /* scale both x and y, compute remainder, and unscale it */ subu sp, 16 sw ra, 16(sp) .prologue 1 li.d $f18, 1.2474001934591999e+291 /* 2^(1024-57) */ li.d $f16, 1.4411518807585587e+17 /* 2^57 */ c.lt.d $f0, $f18 mul.d $f2, $f16 bc1t 10f /* x * 2^57 would overflow */ /* first compute with unscaled x to chop it down to size */ li t0, 0 bal fmod1 li t4, 1 ctc1 t4, $31 mfc1 t0, $f12 li.d $f16, 1.4411518807585587e+17 /* 2^57 */ mul.d $f0, $f16 bal fmod2 b 20f 10: mul.d $f0, $f16 bal fmod1 20: li.d $f16, 6.9388939039072284e-18 /* 2^-57 */ mul.d $f0, $f16 lw ra, 16(sp) addu sp, 16 j ra .end fmod_small .text .text$fmodm .globl fmod .ent fmod fmod: .frame sp, 0, ra .prologue 0 .set noreorder c.un.d $f12, $f14 /* x NaN or y NaN? */ dmfc1 t0, $f12 /* sign and exponent of x */ dmfc1 t1, $f14 /* sign and exponent of y */ dsra t0, t0, 32 dsra t1, t1, 32 bc1t 70f cfc1 t3, $31 /* t3 = fcsr */ abs.d $f0, $f12 /* f0 = |x| */ abs.d $f2, $f14 /* f2 = |y| */ li t2, (2047<<20) c.lt.d $f0, $f2 and t8, t0, t2 /* check for x = +-Infinity */ and t9, t1, t2 bc1t 30f li t4, 1 beq t8, t2, 80f ctc1 t4, $31 /* set round to zero mode */ beq t9, 0, 90f /* y is 0 or subnormal */ li t8, 0x03900000 bleu t9, t8, fmod_small /* almost subnormals */ nop fmod1: // entry from fmod_subnormal, fmod_small // f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20, // t3 = fcsr, fcsr = round-to-zero 20: /* x > y */ /* q = x/y (>= 1.0) */ div.d $f8, $f0, $f2 /* f4 = y with low 27 bits 0 */ dmfc1 t8, $f2 dsra t5, t8, 32 dsrl t8, t8, 27 dsll t8, t8, 27 dmtc1 t8, $f4 dmfc1 t4, $f0 dsra t4, t4, 32 and t4, t2 and t5, t2 subu t4, t5 subu t4, (25<<20) bgtz t4, 40f sub.d $f6, $f2, $f4 /* f6 = low 27 bits of y */ 22: /* q < 2^26 */ cvt.w.d $f16, $f8 /* truncate */ cvt.d.w $f8, $f16 mul.d $f4, $f8 /* exact (26 x 26 = 52 bits) */ mul.d $f6, $f8 /* exact (27 x 26 = 53 bits) */ sub.d $f0, $f4 /* exact */ sub.d $f0, $f6 /* exact */ fmod2: /* entry from fmod_subnormal and fmod_small */ c.lt.d $f0, $f2 nop bc1f 20b nop .set reorder 30: /* x < y */ /* negate remainder if dividend was negative */ bgez t0, 36f neg.d $f0 36: ctc1 t3, $31 j ra 40: /* q >= 2^26 */ // REVIEW: use t4 as 64 bit add and avoid shifts/or? dmfc1 t8, $f2 dsra t9, t8, 32 dsll t8, t8, 32 dsrl t8, t8, 32 addu t9, t4 dsll t9, t9, 32 or t8, t9 dmtc1 t8, $f10 dmfc1 t8, $f4 dsra t9, t8, 32 dsll t8, t8, 32 dsrl t8, t8, 32 addu t9, t4 dsll t9, t9, 32 or t8, t9 dmtc1 t8, $f5 div.d $f8, $f0, $f10 sub.d $f6, $f10, $f4 b 22b 70: /* x NaN or y NaN */ c.eq.d $f12, $f12 bc1t 72f mov.d $f0, $f12 j ra 72: mov.d $f0, $f14 j ra 80: /* x = +-Infinity */ ctc1 t3, $31 sub.d $f0, $f12, $f12 /* raise Invalid, return NaN */ j ra 90: /* y is zero or subnormal */ // REVIEW: reduce usage to 64-bits and remove shifts mfc1 t8, $f14 sll t9, t1, 1 bne t9, 0, fmod_subnormal bne t8, 0, fmod_subnormal /* y = +-0 */ ctc1 t3, $31 div.d $f0, $f14, $f14 /* raise Invalid, return NaN */ j set_fmod_err .end fmod .text .text$fmodm .ent fmod_subnormal fmod_subnormal: .frame sp, 16, ra .mask 0x80000000, 0 // y is subnormal // f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20, // t3 = fcsr, fcsr = round-to-zero // scale both x and y, compute remainder, and unscale it subu sp, 16 sw ra, 16(sp) .prologue 1 li.d $f18, 8.6555775981267394e+273 /* 2^(1024-114) */ li.d $f16, 2.0769187434139311e+34 /* 2^114 */ c.lt.d $f0, $f18 mul.d $f2, $f16 bc1t 10f /* x * 2^114 would overflow */ /* first compute with unscaled x to chop it down to size */ li t0, 0 bal fmod1 li t4, 1 ctc1 t4, $31 // REVIEW: use 64-bits in t0 for sign? dmfc1 t0, $f12 dsra t0, t0, 32 li.d $f16, 2.0769187434139311e+34 /* 2^114 */ mul.d $f0, $f16 bal fmod2 b 20f 10: mul.d $f0, $f16 bal fmod1 20: li.d $f16, 4.8148248609680896e-35 /* 2^-114 */ mul.d $f0, $f16 lw ra, 16(sp) addu sp, 16 j ra .end fmod_subnormal /* float fmodf(float x, float y) */ .weakext fmodf, __fmodf /* float fmodf(float x, float y) */ .text .text$fmodm .globl fmodf .ent fmodf fmodf: .frame sp, 0, ra .prologue 0 c.un.s $f12, $f14 # x NaN or y NaN? cvt.d.s $f0, $f12 # dx = x cvt.d.s $f2, $f14 # dy = y bc1t 70f # branch if x or y is a NaN dmfc1 t8, $f0 dmfc1 t9, $f3 dsra t8, t8, 32 dsra t9, t9, 32 cfc1 t3, $31 # t3 = fcsr li t2, 0x7ff sra t8, 20 sra t9, 20 and t8, t2 # t8 = xptx and t9, t2 # t9 = xpty beq t8, t2, 80f # branch if x == +/-Inf li.s $f6, 0.0 c.eq.s $f6, $f14 bc1t 90f # branch if y == +/-0.0 abs.d $f2, $f2 # dy = fabs(dy) abs.d $f6, $f0 # $f6 = fabs(dx) c.lt.d $f6, $f2 bc1t 100f # branch if |x| < |y| and t4, t3, 0xfffc0000 or t4, 1 ctc1 t4, $31 # set round to zero mode with traps disabled addi t0, t9, 24 bge t8, t0, 30f # branch if xptx >= xpty + 24 /* compute dx = dx - floor(dx/dy)*dy */ div.d $f4, $f0, $f2 # $f4 = nd == dx/dy cvt.w.d $f4 cvt.d.w $f4 # $f4 = (int)nd mul.d $f4, $f2 # $f4 = nd*dy sub.d $f0, $f4 # dx = dx - nd*dy cvt.s.d $f0 # convert result to single precision ctc1 t3, $31 # restore rounding mode j ra 30: /* scale dy up and compute dx = dx - floor(dx/(k*dy))*k*dy */ mov.d $f10, $f2 # dy1 = dy dmfc1 t2, $f10 dsll t2, 12 dsrl t2, 12 subu t0, t8, 23 dsll t0, 32+20 or t2, t0 dmtc1 t2, $f10 # xptdy1 = xptx - 23 div.d $f6, $f0, $f10 # $f6 = dx/dy1 cvt.w.d $f6 cvt.d.w $f6 mul.d $f6, $f10 # $f6 = nd*dy1 sub.d $f0, $f6 # dx = dx - nd*dy1 dmfc1 t8, $f0 dsll t8, 1 dsrl t8, 32+21 # update xptx addi t0, t9, 24 bge t8, t0, 30b # branch if xptx >= xpty + 24 abs.d $f4, $f0 c.lt.d $f4, $f2 bc1f 40f # branch if |dx| >= dy cvt.s.d $f0 # convert result to single precision ctc1 t3, $31 # restore rounding mode j ra 40: div.d $f4, $f0, $f2 # $f4 = nd == dx/dy cvt.w.d $f4 cvt.d.w $f4 # $f4 = (int)nd mul.d $f4, $f2 # $f4 = nd*dy sub.d $f0, $f4 # dx = dx - nd*dy cvt.s.d $f0 # convert result to single precision ctc1 t3, $31 # restore rounding mode j ra 70: /* x NaN or y NaN */ c.eq.s $f12, $f12 bc1t 72f mov.s $f0, $f12 j ra 72: mov.s $f0,$f14 j ra 80: /* x = +-Infinity */ ctc1 t3, $31 sub.s $f0, $f12, $f12 /* raise Invalid, return NaN */ mov.s $f0,$f12 j ra 90: /* y == +/- 0.0 */ ctc1 t3, $31 div.s $f0, $f14, $f14 /* raise Invalid, return NaN */ j ra 100: /* |x| < |y| */ mov.s $f0, $f12 # result = x j ra .end fmodf .text .text$fmodm .ent set_fmod_err set_fmod_err: #define FSIZE 48 .frame sp, FSIZE, ra .mask 0x80000000, -4 subu sp, FSIZE sw ra, FSIZE-4(sp) .prologue 1 li $4, FP_I // exception mask li $5, OP_FMOD // operation code (funtion name index) dmfc1 $6, $f12 // arg1 dsrl $7, $6, 32 dsll $6, $6, 32 dsrl $6, $6, 32 s.d $f14, 16(sp) // arg2 (TODO: pass 0.0 as arg2???, see above) s.d $f0, 24(sp) // default result cfc1 t7, $31 // floating point control/status register xor t7, t7, 0xf80 // inverse exception enable bits sw t7, 32(sp) jal _except2 lw ra, FSIZE-4(sp) addu sp, FSIZE j ra #undef FSIZE .end set_fmod_err