windows-nt-4.0/private/fp32/tran/mips/fmodm.s


								/*

								 * |-----------------------------------------------------------|

								 * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc.      |

								 * | All Rights Reserved                                       |

								 * |-----------------------------------------------------------|

								 * |          Restricted Rights Legend                         |

								 * | Use, duplication, or disclosure by the Government is      |

								 * | subject to restrictions as set forth in                   |

								 * | subparagraph (c)(1)(ii) of the Rights in Technical        |

								 * | Data and Computer Software Clause of DFARS 252.227-7013.  |

								 * |         MIPS Computer Systems, Inc.                       |

								 * |         950 DeGuigne Avenue                               |

								 * |         Sunnyvale, California 94088-3650, USA             |

								 * |-----------------------------------------------------------|

								 */

								/* $Header: fmod.s,v 3000.3.1.6 91/10/09 11:14:56 zaineb Exp $ */


								.extern _except2

								.extern errno 4


								#include <kxmips.h>

								#include <trans.h>

								#include <fpieee.h>


								/* double fmod(double x, double y) */

								.text .text$fmodm

								.ent fmod_small

								fmod_small:

									.frame	sp, 16, ra

									.mask	0x80000000, 0

									/* y is almost subnormal */

									/* f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20,

									   t3 = fcsr, fcsr = round-to-zero */

									/* scale both x and y, compute remainder, and unscale it */

									subu	sp, 16

									sw	ra, 16(sp)

									.prologue 1

									li.d	$f18, 1.2474001934591999e+291	/* 2^(1024-57) */

									li.d	$f16, 1.4411518807585587e+17	/* 2^57 */

									c.lt.d	$f0, $f18

									mul.d	$f2, $f16

									bc1t	10f

									/* x * 2^57 would overflow */

									/* first compute with unscaled x to chop it down to size */

									li	t0, 0

									bal	fmod1

									li	t4, 1

									ctc1	t4, $31

									mfc1	t0, $f13

									li.d	$f16, 1.4411518807585587e+17	/* 2^57 */

									mul.d	$f0, $f16

									bal	fmod2

									b	20f

								10:	mul.d	$f0, $f16

									bal	fmod1

								20:	li.d	$f16, 6.9388939039072284e-18	/* 2^-57 */

									mul.d	$f0, $f16

									lw	ra, 16(sp)

									addu	sp, 16

									j	ra

								.end fmod_small


								.text .text$fmodm

								.globl fmod

								.ent fmod

								fmod:

									.frame	sp, 0, ra

									.prologue 0

								.set noreorder

									c.un.d	$f12, $f14		/* x NaN or y NaN? */

									mfc1	t0, $f13		/* sign and exponent of x */

									mfc1	t1, $f15		/* sign and exponent of y */

									bc1t	70f

									 cfc1	t3, $31			/* t3 = fcsr */

									abs.d	$f0, $f12		/* f0 = |x| */

									abs.d	$f2, $f14		/* f2 = |y| */

									li	t2, (2047<<20)

									c.lt.d	$f0, $f2

									and	t8, t0, t2		/* check for x = +-Infinity */

									and	t9, t1, t2

									bc1t	30f

									 li	t4, 1

									beq	t8, t2, 80f

									 ctc1	t4, $31			/* set round to zero mode */

									beq	t9, 0, 90f		/* y is 0 or subnormal */

									 li	t8, 0x03900000

									bleu	t9, t8, fmod_small	/* almost subnormals */

									 nop


								fmod1:	/* entry from fmod_subnormal, fmod_small, and fmodf_punt */

									/* f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20,

									   t3 = fcsr, fcsr = round-to-zero */


								20:	/* x > y */

									div.d	$f8, $f0, $f2		/* q = x/y (>= 1.0) */

									mfc1	t8, $f2			/* f4 = y with low 27 bits 0 */

									mfc1	t4, $f1

									mfc1	t5, $f3

									mov.d	$f4, $f2

									srl	t8, 27

									sll	t8, 27

									mtc1	t8, $f4

									and	t4, t2

									and	t5, t2

									subu	t4, t5

									subu	t4, (25<<20)

									bgtz	t4, 40f

									 sub.d	$f6, $f2, $f4		/* f6 = low 27 bits of y */


								22:	/* q < 2^26 */

									cvt.w.d $f16, $f8		/* truncate */

									cvt.d.w	$f8, $f16

									mul.d	$f4, $f8		/* exact (26 x 26 = 52 bits) */

									mul.d	$f6, $f8		/* exact (27 x 26 = 53 bits) */

									sub.d	$f0, $f4		/* exact */

									sub.d	$f0, $f6		/* exact */

								fmod2:	/* entry from fmod_subnormal and fmod_small */

									c.lt.d	$f0, $f2

									nop

									bc1f	20b

									nop

								.set reorder


								30:	/* x < y */

									/* negate remainder if dividend was negative */

									bgez	t0, 36f

									neg.d	$f0

								36:	ctc1	t3, $31

									j	ra


								40:	/* q >= 2^26 */

									mfc1	t8, $f3

									mfc1	t9, $f5

									mov.d	$f10, $f2

									addu	t8, t4

									addu	t9, t4

									mtc1	t8, $f11

									mtc1	t9, $f5

									div.d	$f8, $f0, $f10

									sub.d	$f6, $f10, $f4

									b	22b


								70:	/* x NaN or y NaN */

									c.eq.d	$f12, $f12

									bc1t	72f

									mov.d	$f0, $f12

									j	ra

								72:	mov.d	$f0, $f14

									j	ra


								80:	/* x = +-Infinity */

									ctc1	t3, $31

									sub.d	$f0, $f12, $f12		/* raise Invalid, return NaN */

									j	ra


								90:	/* y is zero or subnormal */

									mfc1	t8, $f14

									sll	t9, t1, 1

									bne	t9, 0, fmod_subnormal

									bne	t8, 0, fmod_subnormal


									/* y = +-0 */

									ctc1	t3, $31

									div.d	$f0, $f14, $f14		/* raise Invalid, return NaN */

									j	set_fmod_err

								.end fmod


								.text .text$fmodm

								.ent fmod_subnormal

								fmod_subnormal:

									.frame	sp, 16, ra

									.mask	0x80000000, 0

									/* y is subnormal */

									/* f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20,

									   t3 = fcsr, fcsr = round-to-zero */

									/* scale both x and y, compute remainder, and unscale it */

									subu	sp, 16

									sw	ra, 16(sp)

									.prologue 1

									li.d	$f18, 8.6555775981267394e+273	/* 2^(1024-114) */

									li.d	$f16, 2.0769187434139311e+34	/* 2^114 */

									c.lt.d	$f0, $f18

									mul.d	$f2, $f16

									bc1t	10f

									/* x * 2^114 would overflow */

									/* first compute with unscaled x to chop it down to size */

									li	t0, 0

									bal	fmod1

									li	t4, 1

									ctc1	t4, $31

									mfc1	t0, $f13

									li.d	$f16, 2.0769187434139311e+34	/* 2^114 */

									mul.d	$f0, $f16

									bal	fmod2

									b	20f

								10:	mul.d	$f0, $f16

									bal	fmod1

								20:	li.d	$f16, 4.8148248609680896e-35	/* 2^-114 */

									mul.d	$f0, $f16

									lw	ra, 16(sp)

									addu	sp, 16

									j	ra

								.end fmod_subnormal


								/* float fmodf(float x, float y) */


								.text .text$fmodm

								.globl fmodf

								.ent fmodf

								fmodf:

									.frame	sp, 0, ra

									.prologue 0

								.set noreorder

									c.un.s	$f12, $f14		/* x NaN or y NaN? */

									mfc1	t0, $f12		/* sign and exponent of x */

									mfc1	t1, $f14		/* sign and exponent of y */

									bc1t	70f

									 cfc1	t3, $31			/* t3 = fcsr */

									abs.s	$f0, $f12		/* f0 = |x| */

									abs.s	$f2, $f14		/* f2 = |y| */

									li	t2, (255<<23)

									c.lt.s	$f0, $f2

									and	t8, t0, t2		/* check for x = +-Infinity */

									and	t9, t1, t2

									bc1t	30f

									 li	t4, 1

									beq	t8, t2, 80f

									 ctc1	t4, $31			/* set round to zero mode */

									beq	t9, 0, 90f		/* y is 0 or subnormal */

									 cvt.d.s $f4, $f2

								.set reorder


								20:	/* x > y */

									div.s	$f8, $f0, $f2		/* q = x/y (>= 1.0) */

									mfc1	t4, $f0

									mfc1	t5, $f2

									and	t4, t2

									and	t5, t2

									subu	t4, t5

									subu	t4, (23<<23)

									bgtz	t4, fmodf_punt


									/* q < 2^24 */

									cvt.w.s $f16, $f8		/* truncate */

									cvt.d.w	$f8, $f16

									mul.d	$f8, $f4

									cvt.d.s	$f0

									sub.d	$f0, $f8


									c.lt.s	$f0, $f4

									cvt.s.d	$f0

									bc1f	20b


								30:	/* x < y */

									/* negate remainder if dividend was negative */

									bgez	t0, 36f

									neg.s	$f0

								36:	ctc1	t3, $31

									j	ra


								70:	/* x NaN or y NaN */

									c.eq.s	$f12, $f12

									bc1t	72f

									mov.s	$f0, $f12

									j	ra

								72:	mov.s	$f0, $f14

									j	ra


								80:	/* x = +-Infinity */

									ctc1	t3, $31

									sub.s	$f0, $f12, $f12		/* raise Invalid, return NaN */

									mov.s	$f0,$f12

									j	ra


								90:	/* y is zero or subnormal */

									sll	t9, t1, 1

									bne	t9, 0, fmodf_punt


									/* y = +-0 */

									ctc1	t3, $31

								     	div.s   $f0, $f14, $f14  /* raise Invalid, return NaN */

									j	ra

								.end fmodf


								.text .text$fmodm

								.ent fmodf_punt

								fmodf_punt:

									.frame	sp, 16, ra

									.mask	0x80000000, 0

									/* f0 = |x|, f2 = |y|, t0 = sign of x,

									   t3 = fcsr, fcsr = round-to-zero */

									subu	sp, 16

									sw	ra, 16(sp)

									.prologue 1

									cvt.d.s	$f12

									cvt.d.s	$f14

									li	t2, (2047<<20)

									bal	fmod1

									cvt.s.d	$f0

									lw	ra, 16(sp)

									addu	sp, 16

									j	ra

								.end fmodf_punt


								.text .text$fmodm

								.ent set_fmod_err

								set_fmod_err:

								#define FSIZE 48

									.frame  sp, FSIZE, ra

									.mask   0x80000000, -4

									subu    sp, FSIZE

									sw      ra, FSIZE-4(sp)

									.prologue 1

									li	$4, FP_I 	// exception mask

									li	$5, OP_FMOD  	// operation code (funtion name index)

									mfc1.d	$6, $f12   	// arg1

									s.d     $f14, 16(sp)    // arg2 (TODO:  pass 0.0 as arg2???, see above)

									s.d	$f0, 24(sp)	// default result

									cfc1    t7, $31         // floating point control/status register

									xor     t7, t7, 0xf80   // inverse exception enable bits

									sw	t7, 32(sp)

									jal  	_except2

									lw      ra, FSIZE-4(sp)

									addu    sp, FSIZE

									j	ra

								#undef FSIZE

								.end set_fmod_err