windows-nt-4.0/private/sdktools/vctools/crt/fpw32/tran/mips/fmodm.s


								/*

								 * |-----------------------------------------------------------|

								 * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc.      |

								 * | All Rights Reserved                                       |

								 * |-----------------------------------------------------------|

								 * |          Restricted Rights Legend                         |

								 * | Use, duplication, or disclosure by the Government is      |

								 * | subject to restrictions as set forth in                   |

								 * | subparagraph (c)(1)(ii) of the Rights in Technical        |

								 * | Data and Computer Software Clause of DFARS 252.227-7013.  |

								 * |         MIPS Computer Systems, Inc.                       |

								 * |         950 DeGuigne Avenue                               |

								 * |         Sunnyvale, California 94088-3650, USA             |

								 * |-----------------------------------------------------------|

								 */

								/* $Header: fmod.s,v 3000.3.1.6 91/10/09 11:14:56 zaineb Exp $ */


								.extern _except2

								.extern errno 4


								#include <kxmips.h>

								#include <trans.h>

								#include <fpieee.h>


								/* double fmod(double x, double y) */

								.text .text$fmodm

								.ent fmod_small

								fmod_small:

									.frame  sp, 16, ra

									.mask   0x80000000, 0

									/* y is almost subnormal */

									/* f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20,

									  t3 = fcsr, fcsr = round-to-zero */

									/* scale both x and y, compute remainder, and unscale it */

									subu    sp, 16

									sw      ra, 16(sp)

									.prologue 1

									li.d    $f18, 1.2474001934591999e+291   /* 2^(1024-57) */

									li.d    $f16, 1.4411518807585587e+17    /* 2^57 */

									c.lt.d  $f0, $f18

									mul.d   $f2, $f16

									bc1t    10f

									/* x * 2^57 would overflow */

									/* first compute with unscaled x to chop it down to size */

									li      t0, 0

									bal     fmod1

									li      t4, 1

									ctc1    t4, $31

									mfc1    t0, $f12

									li.d    $f16, 1.4411518807585587e+17    /* 2^57 */

									mul.d   $f0, $f16

									bal     fmod2

									b       20f

								10:     mul.d   $f0, $f16

									bal     fmod1

								20:     li.d    $f16, 6.9388939039072284e-18    /* 2^-57 */

									mul.d   $f0, $f16

									lw      ra, 16(sp)

									addu    sp, 16

									j       ra

								.end fmod_small


								.text .text$fmodm

								.globl fmod

								.ent fmod

								fmod:

									.frame	sp, 0, ra

									.prologue 0

								.set noreorder

									c.un.d	$f12, $f14		/* x NaN or y NaN? */

									dmfc1	t0, $f12		/* sign and exponent of x */

									dmfc1	t1, $f14		/* sign and exponent of y */

									dsra	t0, t0, 32

									dsra	t1, t1, 32

									bc1t	70f

									cfc1	t3, $31			/* t3 = fcsr */

									abs.d	$f0, $f12		/* f0 = |x| */

									abs.d	$f2, $f14		/* f2 = |y| */

									li	t2, (2047<<20)

									c.lt.d	$f0, $f2

									and	t8, t0, t2		/* check for x = +-Infinity */

									and	t9, t1, t2

									bc1t	30f

									li	t4, 1

									beq	t8, t2, 80f

									ctc1	t4, $31			/* set round to zero mode */

									beq	t9, 0, 90f		/* y is 0 or subnormal */

									li	t8, 0x03900000

									bleu	t9, t8, fmod_small	/* almost subnormals */

									nop


								fmod1:	// entry from fmod_subnormal, fmod_small

									// f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20,

									// t3 = fcsr, fcsr = round-to-zero


								20:     /* x > y */

									/* q = x/y (>= 1.0) */


									div.d	$f8, $f0, $f2


									/* f4 = y with low 27 bits 0 */

									dmfc1	t8, $f2

									dsra	t5, t8, 32

									dsrl	t8, t8, 27

									dsll	t8, t8, 27

									dmtc1	t8, $f4


									dmfc1	t4, $f0

									dsra	t4, t4, 32


									and	t4, t2

									and	t5, t2

									subu	t4, t5

									subu	t4, (25<<20)

									bgtz	t4, 40f

									sub.d	$f6, $f2, $f4		/* f6 = low 27 bits of y */


								22:     /* q < 2^26 */

									cvt.w.d $f16, $f8               /* truncate */

									cvt.d.w $f8, $f16

									mul.d   $f4, $f8                /* exact (26 x 26 = 52 bits) */

									mul.d   $f6, $f8                /* exact (27 x 26 = 53 bits) */

									sub.d   $f0, $f4                /* exact */

									sub.d   $f0, $f6                /* exact */

								fmod2:  /* entry from fmod_subnormal and fmod_small */

									c.lt.d  $f0, $f2

									nop

									bc1f    20b

									nop

								.set reorder


								30:     /* x < y */

									/* negate remainder if dividend was negative */

									bgez    t0, 36f

									neg.d   $f0

								36:     ctc1    t3, $31

									j       ra


								40:     /* q >= 2^26 */

									// REVIEW:  use t4 as 64 bit add and avoid shifts/or?

									dmfc1	t8, $f2

									dsra	t9, t8, 32

									dsll	t8, t8, 32

									dsrl	t8, t8, 32

									addu	t9, t4

									dsll	t9, t9, 32

									or	t8, t9

									dmtc1	t8, $f10


									dmfc1	t8, $f4

									dsra	t9, t8, 32

									dsll	t8, t8, 32

									dsrl	t8, t8, 32

									addu    t9, t4

									dsll	t9, t9, 32

									or	t8, t9

									dmtc1	t8, $f5


									div.d   $f8, $f0, $f10

									sub.d   $f6, $f10, $f4


									b       22b


								70:     /* x NaN or y NaN */

									c.eq.d  $f12, $f12

									bc1t    72f

									mov.d   $f0, $f12

									j       ra

								72:     mov.d   $f0, $f14

									j       ra


								80:     /* x = +-Infinity */

									ctc1    t3, $31

									sub.d   $f0, $f12, $f12         /* raise Invalid, return NaN */

									j       ra


								90:     /* y is zero or subnormal */

									// REVIEW:  reduce usage to 64-bits and remove shifts

									mfc1	t8, $f14

									sll	t9, t1, 1

									bne	t9, 0, fmod_subnormal

									bne	t8, 0, fmod_subnormal


									/* y = +-0 */

									ctc1    t3, $31

									div.d   $f0, $f14, $f14         /* raise Invalid, return NaN */

									j       set_fmod_err

								.end fmod


								.text .text$fmodm

								.ent fmod_subnormal

								fmod_subnormal:

									.frame  sp, 16, ra

									.mask   0x80000000, 0

									// y is subnormal

									// f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20,

									// t3 = fcsr, fcsr = round-to-zero

									// scale both x and y, compute remainder, and unscale it

									subu    sp, 16

									sw      ra, 16(sp)

									.prologue 1

									li.d    $f18, 8.6555775981267394e+273   /* 2^(1024-114) */

									li.d    $f16, 2.0769187434139311e+34    /* 2^114 */

									c.lt.d  $f0, $f18

									mul.d   $f2, $f16

									bc1t    10f

									/* x * 2^114 would overflow */

									/* first compute with unscaled x to chop it down to size */

									li	t0, 0

									bal	fmod1

									li	t4, 1

									ctc1	t4, $31

									// REVIEW:  use 64-bits in t0 for sign?

									dmfc1	t0, $f12

									dsra	t0, t0, 32

									li.d	$f16, 2.0769187434139311e+34    /* 2^114 */

									mul.d	$f0, $f16

									bal	fmod2

									b	20f

								10:     mul.d	$f0, $f16

									bal	fmod1

								20:     li.d	$f16, 4.8148248609680896e-35    /* 2^-114 */

									mul.d	$f0, $f16

									lw	ra, 16(sp)

									addu	sp, 16

									j	ra

								.end fmod_subnormal


								/* float fmodf(float x, float y) */


								.weakext  fmodf, __fmodf


								/* float fmodf(float x, float y) */


								.text .text$fmodm

								.globl fmodf

								.ent fmodf

								fmodf:

									.frame  sp, 0, ra

									.prologue 0

									c.un.s	$f12, $f14		# x NaN or y NaN?

									cvt.d.s	$f0, $f12		# dx = x

									cvt.d.s	$f2, $f14		# dy = y

									bc1t	70f			# branch if x or y is a NaN

									dmfc1	t8, $f0

									dmfc1	t9, $f3

									dsra	t8, t8, 32

									dsra	t9, t9, 32

									cfc1	t3, $31			# t3 = fcsr

									li	t2, 0x7ff

									sra	t8, 20

									sra	t9, 20

									and	t8, t2			# t8 = xptx

									and	t9, t2			# t9 = xpty

									beq	t8, t2, 80f		# branch if x == +/-Inf

									li.s	$f6, 0.0

									c.eq.s	$f6, $f14

									bc1t	90f			# branch if y == +/-0.0

									abs.d	$f2, $f2		# dy = fabs(dy)

									abs.d	$f6, $f0		# $f6 = fabs(dx)

									c.lt.d	$f6, $f2

									bc1t	100f			# branch if |x| < |y|

									and	t4, t3, 0xfffc0000

									or	t4, 1

									ctc1	t4, $31			# set round to zero mode with traps disabled

									addi	t0, t9, 24

									bge	t8, t0, 30f		# branch if xptx >= xpty + 24


									/* compute dx = dx - floor(dx/dy)*dy	*/


									div.d	$f4, $f0, $f2		# $f4 = nd == dx/dy

									cvt.w.d	$f4

									cvt.d.w	$f4			# $f4 = (int)nd

									mul.d	$f4, $f2		# $f4 = nd*dy

									sub.d	$f0, $f4		# dx = dx - nd*dy


									cvt.s.d	$f0			# convert result to single precision

									ctc1	t3, $31			# restore rounding mode

									j	ra


								30:

									/* scale dy up and compute dx = dx - floor(dx/(k*dy))*k*dy	*/


									mov.d	$f10, $f2		# dy1 = dy

									dmfc1	t2, $f10

									dsll	t2, 12

									dsrl	t2, 12

									subu	t0, t8, 23

									dsll	t0, 32+20

									or	t2, t0

									dmtc1	t2, $f10		# xptdy1 = xptx - 23


									div.d	$f6, $f0, $f10		# $f6 = dx/dy1

									cvt.w.d	$f6

									cvt.d.w	$f6

									mul.d	$f6, $f10		# $f6 = nd*dy1

									sub.d	$f0, $f6		# dx = dx - nd*dy1


									dmfc1	t8, $f0

									dsll	t8, 1

									dsrl	t8, 32+21		# update xptx

									addi	t0, t9, 24

									bge	t8, t0, 30b		# branch if xptx >= xpty + 24


									abs.d	$f4, $f0

									c.lt.d	$f4, $f2

									bc1f	40f			# branch if |dx| >= dy


									cvt.s.d	$f0			# convert result to single precision

									ctc1	t3, $31			# restore rounding mode

									j	ra


								40:

									div.d	$f4, $f0, $f2		# $f4 = nd == dx/dy

									cvt.w.d	$f4

									cvt.d.w	$f4			# $f4 = (int)nd

									mul.d	$f4, $f2		# $f4 = nd*dy

									sub.d	$f0, $f4		# dx = dx - nd*dy


									cvt.s.d	$f0			# convert result to single precision

									ctc1	t3, $31			# restore rounding mode

									j	ra


								70:	/* x NaN or y NaN */


									c.eq.s  $f12, $f12

									bc1t    72f

									mov.s   $f0, $f12

									j	ra


								72:

									mov.s   $f0,$f14

									j	ra


								80:	/* x = +-Infinity */


									ctc1    t3, $31

									sub.s   $f0, $f12, $f12         /* raise Invalid, return NaN */

									mov.s   $f0,$f12

									j	ra


								90:	/* y == +/- 0.0 */


									ctc1    t3, $31

									div.s   $f0, $f14, $f14  /* raise Invalid, return NaN */

									j	ra


								100:	/* |x| < |y| */


									mov.s	$f0, $f12		# result = x

									j	ra


								.end fmodf


								.text .text$fmodm

								.ent set_fmod_err

								set_fmod_err:

								#define FSIZE 48

									.frame	sp, FSIZE, ra

									.mask	0x80000000, -4

									subu	sp, FSIZE

									sw	ra, FSIZE-4(sp)

									.prologue 1

									li	$4, FP_I	// exception mask

									li	$5, OP_FMOD	// operation code (funtion name index)

									dmfc1	$6, $f12	// arg1

									dsrl	$7, $6, 32

									dsll	$6, $6, 32

									dsrl	$6, $6, 32

									s.d	$f14, 16(sp)	// arg2 (TODO:  pass 0.0 as arg2???, see above)

									s.d	$f0, 24(sp)	// default result

									cfc1	t7, $31		// floating point control/status register

									xor	t7, t7, 0xf80	// inverse exception enable bits

									sw	t7, 32(sp)

									jal	_except2

									lw	ra, FSIZE-4(sp)

									addu	sp, FSIZE

									j	ra

								#undef FSIZE

								.end set_fmod_err