windows-server-2003/base/crts/fpw32/tran/i386/adj_fdiv.asm


								        title   adj_fdiv   - routines to compensate for incorrect Pentium FDIV

								;***

								;adj_fdiv - routines to compensate for incorrect Pentium FDIV

								;

								;   Copyright (c) 1994-2001, Microsoft Corporation. All rights reserved.

								;

								;Purpose:

								;   Workarounds to correct for broken FDIV

								;

								;Revision History:

								;

								;   12/06/94	Jamie MacCalman

								;		initial version, based on Intel fix

								;   12/09/94	Jamie MacCalman

								;		added _adj_fpremX & _safe_fdivX entry points

								;   12/13/94	Jamie MacCalman

								;		upgraded to V.3 of Intel's workarounds

								;   12/19/94	Jamie MacCalman

								;		upgraded to V.4 of Intel's workarounds

								;   12/27/94	Jamie MacCalman

								;		upgraded to V.5 (aka "V1.0") of Intel's workarounds

								;    1/13/95	Jamie MacCalman

								;		added underscores to fdivp_sti_st & fdivrp_sti_st for ANSI conformance

								;

								;  The following code is a PRELIMINARY IMPLEMENTATION of a

								;  software patch for the floating point divide instructions.

								;

								;


									include cruntime.inc

									include mrt386.inc

									include elem87.inc


								;

								;  Stack variables for divide routines.

								;


								DENOM		EQU	0

								NUMER		EQU	12

								PREV_CW		EQU	28

								PATCH_CW 	EQU	32


								DENOM_SAVE	EQU	32


								MAIN_DENOM	EQU	4

								MAIN_NUMER	EQU	16


								SPILL_SIZE	EQU	12

								MEM_OPERAND	EQU	8

								STACK_SIZE	EQU	44

								SPILL_MEM_OPERAND	EQU	20


								ONESMASK	EQU	0e000000h


								SINGLE_NAN	EQU	07f800000h

								DOUBLE_NAN	EQU	07ff00000h


								ILLEGAL_OPC	EQU	6


								;

								; FPREM constants

								;


								FPREM_FLT_SIZE		EQU	12

								FPREM_DENOM			EQU 0

								FPREM_DENOM_SAVE	EQU	FPREM_DENOM + FPREM_FLT_SIZE

								FPREM_NUMER			EQU FPREM_DENOM_SAVE + FPREM_FLT_SIZE

								FPREM_PREV_CW		EQU FPREM_NUMER + FPREM_FLT_SIZE

								FPREM_PATCH_CW		EQU FPREM_PREV_CW + 4

								FPREM_SW			EQU	FPREM_PATCH_CW + 4

								FPREM_STACK_SIZE	EQU FPREM_SW + 4

								FPREM_RET_SIZE		EQU	4

								FPREM_PUSH_SIZE		EQU	4


								FPREM_MAIN_FUDGE	EQU	FPREM_RET_SIZE + FPREM_PUSH_SIZE + FPREM_PUSH_SIZE + FPREM_PUSH_SIZE


								FPREM_MAIN_DENOM		EQU FPREM_DENOM + FPREM_MAIN_FUDGE

								FPREM_MAIN_DENOM_SAVE	EQU	FPREM_DENOM_SAVE + FPREM_MAIN_FUDGE

								FPREM_MAIN_NUMER		EQU FPREM_NUMER + FPREM_MAIN_FUDGE

								FPREM_MAIN_PREV_CW		EQU	FPREM_PREV_CW + FPREM_MAIN_FUDGE

								FPREM_MAIN_PATCH_CW		EQU	FPREM_PATCH_CW + FPREM_MAIN_FUDGE

								FPREM_MAIN_FPREM_SW		EQU	FPREM_SW + FPREM_MAIN_FUDGE


								FPREM_ONESMASK	EQU     700h


								.data


								fdiv_risc_table	DB	0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0

								fdiv_scale_1  	DD	03f700000h		;0.9375

								fdiv_scale_2	DD	03f880000h		;1.0625

								one_shl_63  	DD	05f000000h


								fprem_risc_table 	DB 	0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0

								fprem_scale 		DB 	0, 0, 0, 0, 0, 0, 0eeh, 03fh

								one_shl_64 		DB 	0, 0, 0, 0, 0, 0, 0f0h, 043h

								one_shr_64 		DB 	0, 0, 0, 0, 0, 0, 0f0h, 03bh

								one 			DB 	0, 0, 0, 0, 0, 0, 0f0h, 03fh

								half 			DB 	0, 0, 0, 0, 0, 0, 0e0h, 03fh

								big_number		DB	0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh


								ifdef	DEBUG

									public	fpcw

									public	fpsw

								fpcw	dw	0

								fpsw	dw	0

								endif


								FPU_STATE	STRUC

									CONTROL_WORD	DW	?

									reserved_1	DW	?

									STATUS_WORD	DD	?

									TAG_WORD	DW	?

									reserved_3	DW	?

									IP_OFFSET	DD	?

									CS_SLCT		DW	?

									OPCODE		DW	?

									DATA_OFFSET	DD	?

									OPERAND_SLCT	DW	?

									reserved_4	DW	?

								FPU_STATE	ENDS


								ENV_SIZE	EQU	28


								dispatch_table DD	offset FLAT:label0

									DD	offset FLAT:label1

									DD	offset FLAT:label2

									DD	offset FLAT:label3

									DD	offset FLAT:label4

									DD	offset FLAT:label5

									DD	offset FLAT:label6

									DD	offset FLAT:label7

									DD	offset FLAT:label8

									DD	offset FLAT:label9

									DD	offset FLAT:label10

									DD	offset FLAT:label11

									DD	offset FLAT:label12

									DD	offset FLAT:label13

									DD	offset FLAT:label14

									DD	offset FLAT:label15

									DD	offset FLAT:label16

									DD	offset FLAT:label17

									DD	offset FLAT:label18

									DD	offset FLAT:label19

									DD	offset FLAT:label20

									DD	offset FLAT:label21

									DD	offset FLAT:label22

									DD	offset FLAT:label23

									DD	offset FLAT:label24

									DD	offset FLAT:label25

									DD	offset FLAT:label26

									DD	offset FLAT:label27

									DD	offset FLAT:label28

									DD	offset FLAT:label29

									DD	offset FLAT:label30

									DD	offset FLAT:label31

									DD	offset FLAT:label32

									DD	offset FLAT:label33

									DD	offset FLAT:label34

									DD	offset FLAT:label35

									DD	offset FLAT:label36

									DD	offset FLAT:label37

									DD	offset FLAT:label38

									DD	offset FLAT:label39

									DD	offset FLAT:label40

									DD	offset FLAT:label41

									DD	offset FLAT:label42

									DD	offset FLAT:label43

									DD	offset FLAT:label44

									DD	offset FLAT:label45

									DD	offset FLAT:label46

									DD	offset FLAT:label47

									DD	offset FLAT:label48

									DD	offset FLAT:label49

									DD	offset FLAT:label50

									DD	offset FLAT:label51

									DD	offset FLAT:label52

									DD	offset FLAT:label53

									DD	offset FLAT:label54

									DD	offset FLAT:label55

									DD	offset FLAT:label56

									DD	offset FLAT:label57

									DD	offset FLAT:label58

									DD	offset FLAT:label59

									DD	offset FLAT:label60

									DD	offset FLAT:label61

									DD	offset FLAT:label62

									DD	offset FLAT:label63


								fpcw	dw	0


								CODESEG


								;

								;  PRELIMINARY VERSION for register-register divides.

								;


													; In this implementation the

													; fdiv_main_routine is called,

													; therefore all the stack frame

													; locations are adjusted for the

													; return pointer.


								fdiv_main_routine PROC	NEAR


									fld     tbyte ptr [esp+MAIN_NUMER]	; load the numerator

									fld     tbyte ptr [esp+MAIN_DENOM]	; load the denominator

								retry:


								;  The following three lines test for denormals and zeros.

								;  A denormal or zero has a 0 in the explicit digit to the left of the

								;  binary point.  Since that bit is the high bit of the word, adding

								;  it to itself will produce a carry if and only if the number is not

								;  denormal or zero.

								;

									mov 	eax, [esp+MAIN_DENOM+4]	; get mantissa bits 32-64

									add 	eax,eax			; shift the one's bit onto carry

									jnc 	denormal		; if no carry, we're denormal


								;  The following three lines test the three bits after the four bit

								;  pattern (1,4,7,a,d).  If these three bits are not all one, then

								;  the denominator cannot expose the flaw.  This condition is tested by

								;  inverting the bits and testing that all are equal to zero afterward.


									xor 	eax, ONESMASK		; invert the bits that must be ones

									test	eax, ONESMASK		; and make sure they are all ones

									jz  	scale_if_needed		; if all are one scale numbers

									fdivp	st(1), st		; use of hardware is OK.

									ret


								;

								;  Now we test the four bits for one of the five patterns.

								;

								scale_if_needed:

									shr	eax, 28			; keep first 4 bits after point

									cmp	byte ptr fdiv_risc_table[eax], 0	; check for (1,4,7,a,d)

									jnz	divide_scaled		; are in potential problem area

									fdivp	st(1), st		; use of hardware is OK.

									ret


								divide_scaled:

									mov	eax, [esp + MAIN_DENOM+8]	; test denominator exponent

									and	eax, 07fffh             ; if pseudodenormal ensure that only

									jz	invalid_denom		; invalid exception flag is set

									cmp	eax, 07fffh             ; if NaN or infinity  ensure that only

									je	invalid_denom		; invalid exception flag is set

								;

								;  The following six lines turn off exceptions and set the

								;  precision control to 80 bits.  The former is necessary to

								;  force any traps to be taken at the divide instead of the scaling

								;  code.  The latter is necessary in order to get full precision for

								;  codes with incoming 32 and 64 bit precision settings.  If

								;  it can be guaranteed that before reaching this point, the underflow

								;  exception is masked and the precision control is at 80 bits, these

								;  six lines can be omitted.

								;

									fnstcw	[esp+PREV_CW]		; save caller's control word

									mov	eax, [esp+PREV_CW]

									or	eax, 033fh		; mask exceptions, pc=80

									and	eax, 0f3ffh		; set rounding mode to nearest

									mov	[esp+PATCH_CW], eax

									fldcw	[esp+PATCH_CW]		; mask exceptions & pc=80


								;  The following lines check the numerator exponent before scaling.

								;  This in order to prevent undeflow when scaling the numerator,

								;  which will cause a denormal exception flag to be set when the

								;  actual divide is preformed. This flag would not have been set

								;  normally. If there is a risk of underflow, the scale factor is

								;  17/16 instead of 15/16.

								;

								 	mov	eax, [esp+MAIN_NUMER+8]	; test numerator exponent

								 	and	eax, 07fffh

								 	cmp	eax, 00001h

								 	je	small_numer


									fmul	fdiv_scale_1		; scale denominator by 15/16

									fxch

									fmul	fdiv_scale_1		; scale numerator by 15/16

									fxch


								;

								;  The next line restores the users control word.  If the incoming

								;  control word had the underflow exception masked and precision

								;  control set to 80 bits, this line can be omitted.

								;


									fldcw	[esp+PREV_CW]		; restore caller's control word

									fdivp	st(1), st		; use of hardware is OK.

									ret


								small_numer:

									fmul	fdiv_scale_2		; scale denominator by 17/16

									fxch

									fmul	fdiv_scale_2		; scale numerator by 17/16

									fxch


								;

								;  The next line restores the users control word.  If the incoming

								;  control word had the underflow exception masked and precision

								;  control set to 80 bits, this line can be omitted.

								;


									fldcw	[esp+PREV_CW]		; restore caller's control word

									fdivp	st(1), st		; use of hardware is OK.

									ret


								denormal:

									mov	eax, [esp+MAIN_DENOM]	; test for whole mantissa == 0

									or	eax, [esp+MAIN_DENOM+4]	; test for whole mantissa == 0

									jnz	denormal_divide_scaled	; denominator is not zero

								invalid_denom:				; zero or invalid denominator

									fdivp	st(1), st		; use of hardware is OK.

									ret


								denormal_divide_scaled:

									mov	eax, [esp + MAIN_DENOM + 8]	; get exponent

									and	eax, 07fffh		; check for zero exponent

									jnz	invalid_denom		;

								;

								;  The following six lines turn off exceptions and set the

								;  precision control to 80 bits.  The former is necessary to

								;  force any traps to be taken at the divide instead of the scaling

								;  code.  The latter is necessary in order to get full precision for

								;  codes with incoming 32 and 64 bit precision settings.  If

								;  it can be guaranteed that before reaching this point, the underflow

								;  exception is masked and the precision control is at 80 bits, these

								;  six lines can be omitted.

								;


									fnstcw	[esp+PREV_CW]		; save caller's control word

									mov	eax, [esp+PREV_CW]

									or	eax, 033fh		; mask exceptions, pc=80

									and	eax, 0f3ffh		; set rounding mode to nearest

									mov	[esp+PATCH_CW], eax

									fldcw	[esp+PATCH_CW]		; mask exceptions & pc=80


									mov	eax, [esp + MAIN_NUMER +8]	; test numerator exponent

									and	eax, 07fffh		; check for denormal numerator

									je	denormal_numer

									cmp	eax, 07fffh		; NaN or infinity

									je	invalid_numer

									mov	eax, [esp + MAIN_NUMER + 4]	; get bits 32..63 of mantissa

									add	eax, eax		; shift the first bit into carry

									jnc	invalid_numer		; if there is no carry, we have an

													; invalid numer

									jmp	numer_ok


								denormal_numer:

									mov	eax, [esp + MAIN_NUMER + 4]	; get bits 32..63 of mantissa

									add	eax, eax		; shift the first bit into carry

									jc	invalid_numer		; if there is a carry, we have an

													; invalid numer


								numer_ok:

									fxch

									fstp	st			; pop numerator

									fld 	st			; make copy of denominator

									fmul	dword ptr[one_shl_63]	; make denominator not denormal

									fstp	tbyte ptr [esp+MAIN_DENOM]	; save modified denominator

									fld 	tbyte ptr [esp+MAIN_NUMER]	; load numerator

									fxch				; restore proper order

									fwait


								;  The next line restores the users control word.  If the incoming

								;  control word had the underflow exception masked and precision

								;  control set to 80 bits, this line can be omitted.

								;


									fldcw	[esp+PREV_CW]		; restore caller's control word

									jmp	retry			; start the whole thing over


								invalid_numer:

								;

								;  The next line restores the users control word.  If the incoming

								;  control word had the underflow exception masked and precision

								;  control set to 80 bits, this line can be omitted.

								;

									fldcw	[esp + PREV_CW]

									fdivp	st(1), st		; use of hardware is OK.

									ret


								fdiv_main_routine	ENDP


								fdivr_st	MACRO	reg_index, reg_index_minus1

									fstp	tbyte ptr [esp+DENOM]

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									fstp	tbyte ptr [esp+NUMER]

									call	fdiv_main_routine

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									fld	tbyte ptr [esp+NUMER]

									fxch	st(reg_index)

									add	esp, STACK_SIZE

								ENDM


								fdivr_sti	MACRO	reg_index, reg_index_minus1

									fstp	tbyte ptr [esp+NUMER]

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									fstp	tbyte ptr [esp+DENOM]

									call	fdiv_main_routine

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									fld	tbyte ptr [esp+NUMER]

									add	esp, STACK_SIZE

								ENDM


								fdivrp_sti	MACRO	reg_index, reg_index_minus1

									fstp	tbyte ptr [esp+NUMER]

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									fstp	tbyte ptr [esp+DENOM]

									call	fdiv_main_routine

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									add	esp, STACK_SIZE

								ENDM


								fdiv_st		MACRO	reg_index, reg_index_minus1

									fstp	tbyte ptr [esp+NUMER]

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									fld	st

									fstp	tbyte ptr [esp+DENOM]

									fstp	tbyte ptr [esp+DENOM_SAVE]	; save original denom,

									call	fdiv_main_routine

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									fld	tbyte ptr [esp+DENOM_SAVE]

									fxch	st(reg_index)

									add	esp, STACK_SIZE

								ENDM


								fdiv_sti	MACRO	reg_index, reg_index_minus1

									fxch	st(reg_index)

									fstp	tbyte ptr [esp+NUMER]

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									fld	st

									fstp	tbyte ptr [esp+DENOM]

									fstp	tbyte ptr [esp+DENOM_SAVE]	; save original denom,

									call	fdiv_main_routine

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									fld	tbyte ptr [esp+DENOM_SAVE]

									add	esp, STACK_SIZE

								ENDM


								fdivp_sti	MACRO	reg_index, reg_index_minus1

									fstp	tbyte ptr [esp+DENOM]

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									fstp	tbyte ptr [esp+NUMER]

									call	fdiv_main_routine

								IF	reg_index_minus1 GE 1

									fxch	st(reg_index_minus1)

								ENDIF

									add	esp, STACK_SIZE

								ENDM


									public  _adj_fdiv_r

								_adj_fdiv_r      PROC    NEAR


									sub	esp, STACK_SIZE			; added back at end of fdiv_x macros

									and eax, 0000003FH			; upper 26 bits could be anything

									jmp	dword ptr dispatch_table[eax*4]


								label0::

									fdiv	st,st(0)		; D8 F0 	FDIV	ST,ST(0)

									add	esp, STACK_SIZE

									ret

								label1::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label2::

									fdivr	st,st(0)		; D8 F8		FDIVR	ST,ST(0)

									add	esp, STACK_SIZE

									ret

								label3::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label4::

									fdiv 	st(0),st		; DC F8/D8 F0	FDIV	ST(0),ST

									add	esp, STACK_SIZE

									ret

								label5::

									fdivp 	st(0),st		; DE F8		FDIVP	ST(0),ST

									add	esp, STACK_SIZE

									ret

								label6::

									fdivr 	st(0),st		; DC F0/DE F0	FDIVR	ST(0),ST

									add	esp, STACK_SIZE

									ret

								label7::

									fdivrp 	st(0),st		; DE F0		FDIVRP	ST(0),ST

									add	esp, STACK_SIZE

									ret

								label8::

									fdiv_st 1,0

									ret

								label9::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label10::

									fdivr_st 1,0

									ret

								label11::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label12::

									fdiv_sti 1,0

									ret

								label13::

									fdivp_sti 1,0

									ret

								label14::

									fdivr_sti 1,0

									ret

								label15::

									fdivrp_sti 1,0

									ret

								label16::

									fdiv_st 2,1

									ret

								label17::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label18::

									fdivr_st 2,1

									ret

								label19::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label20::

									fdiv_sti 2,1

									ret

								label21::

									fdivp_sti 2,1

									ret

								label22::

									fdivr_sti 2,1

									ret

								label23::

									fdivrp_sti 2,1

									ret

								label24::

									fdiv_st 3,2

									ret

								label25::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label26::

									fdivr_st 3,2

									ret

								label27::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label28::

									fdiv_sti 3,2

									ret

								label29::

									fdivp_sti 3,2

									ret

								label30::

									fdivr_sti 3,2

									ret

								label31::

									fdivrp_sti 3,2

									ret

								label32::

									fdiv_st 4,3

									ret

								label33::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label34::

									fdivr_st 4,3

									ret

								label35::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label36::

									fdiv_sti 4,3

									ret

								label37::

									fdivp_sti 4,3

									ret

								label38::

									fdivr_sti 4,3

									ret

								label39::

									fdivrp_sti 4,3

									ret

								label40::

									fdiv_st 5,4

									ret

								label41::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label42::

									fdivr_st 5,4

									ret

								label43::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label44::

									fdiv_sti 5,4

									ret

								label45::

									fdivp_sti 5,4

									ret

								label46::

									fdivr_sti 5,4

									ret

								label47::

									fdivrp_sti 5,4

									ret

								label48::

									fdiv_st 6,5

									ret

								label49::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label50::

									fdivr_st 6,5

									ret

								label51::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label52::

									fdiv_sti 6,5

									ret

								label53::

									fdivp_sti 6,5

									ret

								label54::

									fdivr_sti 6,5

									ret

								label55::

									fdivrp_sti 6,5

									ret

								label56::

									fdiv_st 7,6

									ret

								label57::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label58::

									fdivr_st 7,6

									ret

								label59::

									add	esp, STACK_SIZE

									int	ILLEGAL_OPC

								label60::

									fdiv_sti 7,6

									ret

								label61::

									fdivp_sti 7,6

									ret

								label62::

									fdivr_sti 7,6

									ret

								label63::

									fdivrp_sti 7,6

									ret

								_adj_fdiv_r      ENDP


								_fdivp_sti_st	PROC	NEAR

												; for calling from mem routines

									sub	esp, STACK_SIZE			; added back at end of fdivp_sti macro

									fdivp_sti 1, 0

									ret

								_fdivp_sti_st	ENDP


								_fdivrp_sti_st	PROC	NEAR

												; for calling from mem routines

									sub	esp, STACK_SIZE			; added back at end of fdivrp_sti macro

									fdivrp_sti 1, 0

									ret

								_fdivrp_sti_st	ENDP


								;;; _adj_fdiv_m32 - FDIV m32real FIX

								;;

								;; 	Input : Value of the m32real in the top of STACK

								;;

								;;	Output: Result of FDIV in ST


									PUBLIC	_adj_fdiv_m32

								_adj_fdiv_m32	PROC	NEAR


									push	eax				; save eax

									mov	eax, [esp + MEM_OPERAND]	; check for

									and	eax, SINGLE_NAN			; NaN

									cmp	eax, SINGLE_NAN			;

									je	memory_divide_m32		;


									fnstsw	ax				; get status word

									and	eax, 3800h			; get top of stack

									je	spill_fpstack			; is FP stack full?

									fld	dword ptr[esp + MEM_OPERAND]	; load m32real in ST

									call	_fdivp_sti_st			; do actual divide

									pop	eax

									ret 4

								spill_fpstack:

									fxch

									sub	esp, SPILL_SIZE 		; make temp space

									fstp	tbyte ptr[esp ]			; save user's ST(1)

									fld	dword ptr[esp + SPILL_MEM_OPERAND] ; load m32 real

									call	_fdivp_sti_st			; do actual divide

									fld	tbyte ptr[esp]			; restore user's ST(1)

														;esp is adjusted by fdivrp fn

									fxch

									add	esp, SPILL_SIZE

									pop	eax

									ret 4

								memory_divide_m32:

									fdiv	dword ptr[esp + MEM_OPERAND]	; do actual divide

									pop	eax

									ret 4


								_adj_fdiv_m32	ENDP


								;;; _adj_fdiv_m64 - FDIV m64real FIX

								;;

								;; 	Input : Value of the m64real in the top of STACK

								;;

								;;	Output: Result of FDIV in ST


									PUBLIC	_adj_fdiv_m64

								_adj_fdiv_m64	PROC	NEAR


									push	eax				; save eax

									mov	eax, [esp + MEM_OPERAND + 4]	; check for

									and	eax, DOUBLE_NAN			; NaN

									cmp	eax, DOUBLE_NAN			;

									je	memory_divide_m64		;


									fnstsw	ax				; get status word

									and	eax, 3800h			; get top of stack

									je	spill_fpstack_m64		; is FP stack full?

									fld	qword ptr[esp + MEM_OPERAND]	; load m64real in ST

									call	_fdivp_sti_st			; do actual divide

									pop	eax

									ret 8

								spill_fpstack_m64:

									fxch

									sub	esp, SPILL_SIZE 		; make temp space

									fstp	tbyte ptr[esp]			; save user's ST(1)

									fld	qword ptr[esp + SPILL_MEM_OPERAND] ; load m64real

									call	_fdivp_sti_st			; do actual divide

									fld	tbyte ptr[esp]			; restore user's ST(1)

														;esp is adjusted by fdivrp fn

									fxch

									add	esp, SPILL_SIZE

									pop	eax

									ret 8

								memory_divide_m64:

									fdiv	qword ptr[esp + MEM_OPERAND]	; do actual divide

									pop	eax

									ret 8


								_adj_fdiv_m64	ENDP


								;;; _adj_fdiv_m16i - FDIV m16int FIX

								;;

								;; 	Input : Value of the m16int in the top of STACK

								;;

								;;	Output: Result of FDIV in ST


									PUBLIC	_adj_fdiv_m16i

								_adj_fdiv_m16i	PROC	NEAR

									push	eax				; save eax

									fnstsw	ax				; get status word

									and	eax, 3800h			; get top of stack

									je	spill_fpstack_m16i		; is FP stack full?

									fild	word ptr[esp + MEM_OPERAND]	; load m16int in ST

									call	_fdivp_sti_st			; do actual divide

									pop	eax

									ret 4

								spill_fpstack_m16i:

									fxch

									sub	esp, SPILL_SIZE 		; make temp space

									fstp	tbyte ptr[esp ]			; save user's ST(1)

									fild	word ptr[esp + SPILL_MEM_OPERAND] ; load m16int

									call	_fdivp_sti_st			; do actual divide

									fld	tbyte ptr[esp]			; restore user's ST(1)

														;esp is adjusted by fdivrp fn

									fxch

									add	esp, SPILL_SIZE

									pop	eax

									ret 4


								_adj_fdiv_m16i	ENDP


								;;; _adj_fdiv_m32i - FDIV m32int FIX

								;;

								;; 	Input : Value of the m32int in the top of STACK

								;;

								;;	Output: Result of FDIV in ST


									PUBLIC	_adj_fdiv_m32i

								_adj_fdiv_m32i	PROC	NEAR

									push	eax				; save eax

									fnstsw	ax				; get status word

									and	eax, 3800h			; get top of stack

									je	spill_fpstack_m32i		; is FP stack full?

									fild	dword ptr[esp + MEM_OPERAND]	; load m32int in ST

									call	_fdivp_sti_st			; do actual divide

									pop	eax

									ret 4

								spill_fpstack_m32i:

									fxch

									sub	esp, SPILL_SIZE 		; make temp space

									fstp	tbyte ptr[esp ]			; save user's ST(1)

									fild	dword ptr[esp + SPILL_MEM_OPERAND] ; load m32int

									call	_fdivp_sti_st			; do actual divide

									fld	tbyte ptr[esp]			; restore user's ST(1)

														;esp is adjusted by fdivrp fn

									fxch

									add	esp, SPILL_SIZE

									pop	eax

									ret 4


								_adj_fdiv_m32i	ENDP


								;;; _adj_fdivr_m32 - FDIVR m32real FIX

								;;

								;; 	Input : Value of the m32real in the top of STACK

								;;

								;;	Output: Result of FDIVR in ST


									PUBLIC	_adj_fdivr_m32

								_adj_fdivr_m32	PROC	NEAR

									push	eax				; save eax

									mov	eax, [esp + MEM_OPERAND]	; check for

									and	eax, SINGLE_NAN			; NaN

									cmp	eax, SINGLE_NAN			;

									je	memory_divide_m32r		;


									fnstsw	ax				; get status word

									and	eax, 3800h			; get top of stack

									je	spill_fpstack_m32r		; is FP stack full?

									fld	dword ptr[esp + MEM_OPERAND]	; load m32real in ST

									call	_fdivrp_sti_st			; do actual divide

									pop	eax

									ret 4

								spill_fpstack_m32r:

									fxch

									sub	esp, SPILL_SIZE 		; make temp space

									fstp	tbyte ptr[esp ]			; save user's ST(1)

									fld	dword ptr[esp + SPILL_MEM_OPERAND] ; load m32 real

									call	_fdivrp_sti_st			; do actual divide

									fld	tbyte ptr[esp]			; restore user's ST(1)

														;esp is adjusted by fdivp fn

									fxch

									add	esp, SPILL_SIZE

									pop	eax

									ret 4

								memory_divide_m32r:

									fdivr	dword ptr[esp + MEM_OPERAND]	; do actual divide

									pop	eax

									ret 4


								_adj_fdivr_m32	ENDP


								;;; _adj_fdivr_m64 - FDIVR m64real FIX

								;;

								;; 	Input : Value of the m64real in the top of STACK

								;;

								;;	Output: Result of FDIVR in ST


									PUBLIC	_adj_fdivr_m64

								_adj_fdivr_m64	PROC	NEAR

									push	eax				; save eax

									mov	eax, [esp + MEM_OPERAND + 4]	; check for

									and	eax, DOUBLE_NAN			; NaN

									cmp	eax, DOUBLE_NAN			;

									je	memory_divide_m64r		;


									fnstsw	ax				; get status word

									and	eax, 3800h			; get top of stack

									je	spill_fpstack_m64r		; is FP stack full?

									fld	qword ptr[esp + MEM_OPERAND]	; load m64real in ST

									call	_fdivrp_sti_st			; do actual divide

									pop	eax

									ret 8

								spill_fpstack_m64r:

									fxch

									sub	esp, SPILL_SIZE 		; make temp space

									fstp	tbyte ptr[esp ]			; save user's ST(1)

									fld	qword ptr[esp + SPILL_MEM_OPERAND] ; load m64real

									call	_fdivrp_sti_st			; do actual divide

									fld	tbyte ptr[esp]			; restore user's ST(1)

														;esp is adjusted by fdivp fn

									fxch

									add	esp, SPILL_SIZE

									pop	eax

									ret 8

								memory_divide_m64r:

									fdivr	qword ptr[esp + MEM_OPERAND]	; do actual divide

									pop	eax

									ret 8


								_adj_fdivr_m64	ENDP


								;;; _adj_fdivr_m16i - FDIVR m16int FIX

								;;

								;; 	Input : Value of the m16int in the top of STACK

								;;

								;;	Output: Result of FDIVR in ST


									PUBLIC	_adj_fdivr_m16i

								_adj_fdivr_m16i	PROC	NEAR

									push	eax				; save eax

									fnstsw	ax				; get status word

									and	eax, 3800h			; get top of stack

									je	spill_fpstack_m16ir		; is FP stack full?

									fild	word ptr[esp + MEM_OPERAND]	; load m16int in ST

									call	_fdivrp_sti_st			; do actual divide

									pop	eax

									ret 4

								spill_fpstack_m16ir:

									fxch

									sub	esp, SPILL_SIZE 		; make temp space

									fstp	tbyte ptr[esp ]			; save user's ST(1)

									fild	word ptr[esp + SPILL_MEM_OPERAND] ; load m16int

									call	_fdivrp_sti_st			; do actual divide

									fld	tbyte ptr[esp]			; restore user's ST(1)

														;esp is adjusted by fdivp fn

									fxch

									add	esp, SPILL_SIZE

									pop	eax

									ret 4


								_adj_fdivr_m16i	ENDP


								;;; _adj_fdivr_m32i - FDIVR m32int FIX

								;;

								;; 	Input : Value of the m32int in the top of STACK

								;;

								;;	Output: Result of FDIVR in ST


									PUBLIC	_adj_fdivr_m32i

								_adj_fdivr_m32i	PROC	NEAR

									push	eax				; save eax

									fnstsw	ax				; get status word

									and	eax, 3800h			; get top of stack

									je	spill_fpstack_m32ir		; is FP stack full?

									fild	dword ptr[esp + MEM_OPERAND]	; load m32int in ST

									call	_fdivrp_sti_st			; do actual divide

									pop	eax

									ret 4

								spill_fpstack_m32ir:

									fxch

									sub	esp, SPILL_SIZE 		; make temp space

									fstp	tbyte ptr[esp ]			; save user's ST(1)

									fild	dword ptr[esp + SPILL_MEM_OPERAND] ; load m32int

									call	_fdivrp_sti_st			; do actual divide

									fld	tbyte ptr[esp]			; restore user's ST(1)

														;esp is adjusted by fdivp fn

									fxch

									add	esp, SPILL_SIZE

									pop	eax

									ret 4


								_adj_fdivr_m32i	ENDP


								;;; _safe_fdiv - FDIV fix

								;;

								;;	Pentium-safe version of FDIV, aka FDIVP ST(1),ST(0)

								;;

								;; 	Input : Numerator in ST(1), Denominator in ST(0)

								;;

								;;	Output: Result of FDIV in ST(0)


									PUBLIC  _safe_fdiv

								_safe_fdiv      PROC    NEAR


									push eax

									sub	esp, STACK_SIZE

									fstp	tbyte ptr [esp+DENOM]

									fstp	tbyte ptr [esp+NUMER]

									call	fdiv_main_routine

									add	esp, STACK_SIZE

									pop eax

									ret


								_safe_fdiv	ENDP


								;;; _safe_fdivr - FDIVR fix

								;;

								;;	Pentium-safe version of FDIVR, aka FDIVRP ST(1),ST(0)

								;;

								;; 	Input : Numerator in ST(0), Denominator in ST(1)

								;;

								;;	Output: Result of FDIVR in ST(0)


									public  _safe_fdivr

								_safe_fdivr      PROC    NEAR


									push eax

									sub	esp, STACK_SIZE

									fstp	tbyte ptr [esp+NUMER]

									fstp	tbyte ptr [esp+DENOM]

									call	fdiv_main_routine

									add	esp, STACK_SIZE

									pop eax

									ret


								_safe_fdivr	ENDP


								;;; _adj_fprem - FPREM FIX

								;;

								;;	Based on PRELIMINARY Intel code.


								_fprem_common	PROC	NEAR


									push	eax

									push	ebx

									push	ecx

								    mov     eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa

								    xor     eax, FPREM_ONESMASK           ; invert bits that have to be one

								    test    eax, FPREM_ONESMASK           ; check bits that have to be one

									jnz     remainder_hardware_ok

								    shr     eax, 11

								    and     eax, 0fh

								    cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)

								    jz      remainder_hardware_ok


								; The denominator has the bit pattern. Weed out the funny cases like NaNs

								; before applying the software version. Our caller guarantees that the

								; denominator is not a denormal. Here we check for:

								;	denominator	inf, NaN, unnormal

								;	numerator	inf, NaN, unnormal, denormal


									mov     eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa

								    and     eax, 07fff0000h	        ; mask the exponent only

								    cmp     eax, 07fff0000h         ; check for INF or NaN

									je  	remainder_hardware_ok

									mov     eax, [FPREM_MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa

								    and     eax, 07fff0000h		; mask the exponent only

									jz  	remainder_hardware_ok	; jif numerator denormal

								    cmp     eax, 07fff0000h         ; check for INF or NaN

									je  	remainder_hardware_ok

									mov 	eax, [esp + FPREM_MAIN_NUMER + 4]	; high mantissa bits - numerator

									add 	eax, eax		; set carry if explicit bit set

									jnz 	remainder_hardware_ok	; jmp if numerator is unnormal

									mov 	eax, [esp + FPREM_MAIN_DENOM + 4] ; high mantissa bits - denominator

									add 	eax, eax		; set carry if explicit bit set

									jnz 	remainder_hardware_ok	; jmp if denominator is unnormal


								rem_patch:

								    mov     eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)

								    and     eax, 07fffh              ; clear sy

								    add     eax, 63                  ; evaluate ey + 63

								    mov     ebx, [FPREM_MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)

								    and     ebx, 07fffh              ; clear sx

								    sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)

								    ja      rem_large	 	; if ex > ey + 63, case of large arguments

								rem_patch_loop:

									mov     eax, [FPREM_MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)

									and     eax, 07fffh		; clear sy

									add 	eax, 10			; evaluate ey + 10

									mov     ebx, [FPREM_MAIN_NUMER+8+esp]	; sign and exponent of x (numerator)

									and     ebx, 07fffh		; clear sx

									sub 	ebx, eax		; evaluate the exponent difference (ex - ey)

									js  	remainder_hardware_ok	; safe if ey + 10 > ex

									fld     tbyte ptr [FPREM_MAIN_NUMER+esp]   ; load the numerator

									mov     eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)

								    mov     ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)

									and     ebx, 07fffh             ; clear sx

									mov		ecx, ebx

									sub		ebx, eax

									and		ebx, 07h

									or		ebx, 04h

									sub		ecx, ebx

									mov		ebx, eax

									and     ebx, 08000h		; keep sy

									or		ecx, ebx		; merge the sign of y

									mov		dword ptr [FPREM_MAIN_DENOM+8+esp], ecx

									fld     tbyte ptr [FPREM_MAIN_DENOM+esp]   ; load the shifted denominator

									mov     dword ptr [FPREM_MAIN_DENOM+8+esp], eax	; restore the initial denominator

									fxch

									fprem				; this rem is safe

									fstp	tbyte ptr [FPREM_MAIN_NUMER+esp]	; update the numerator

									fstp    st(0)                   ; pop the stack

									jmp     rem_patch_loop

								rem_large:

									test	edx, 02h		; is denominator already saved

									jnz 	already_saved

									fld 	tbyte ptr[esp + FPREM_MAIN_DENOM]

									fstp	tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]	; save denominator

								already_saved:

									; Save user's precision control and institute 80.  The fp ops in

									; rem_large_loop must not round to user's precision (if it is less

									; than 80) because the hardware would not have done so.  We are

									; aping the hardware here, which is all extended.


									fnstcw	[esp+FPREM_MAIN_PREV_CW]	; save caller's control word

									mov 	eax, dword ptr[esp + FPREM_MAIN_PREV_CW]

									or  	eax, 033fh		; mask exceptions, pc=80

									mov 	[esp + FPREM_MAIN_PATCH_CW], eax

									fldcw	[esp + FPREM_MAIN_PATCH_CW]


								    mov     eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)

								    and     eax, 07fffh             ; clear sy

								    mov     ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)

								    and     ebx, 07fffh             ; clear sx

									sub 	ebx, eax		; evaluate the exponent difference

									and 	ebx, 03fh

									or  	ebx, 020h

									add 	ebx, 1

									mov 	ecx, ebx

									mov     eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)

									mov     ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)

								    and     ebx, 07fffh             ; clear sx

								    and     eax, 08000h             ; keep sy

								    or      ebx, eax                ; merge the sign of y

								    mov     dword ptr[FPREM_MAIN_DENOM+8+esp], ebx	; make ey equal to ex (scaled denominator)

								    fld     tbyte ptr [FPREM_MAIN_DENOM+esp]   ; load the scaled denominator

									fabs

								    fld     tbyte ptr [FPREM_MAIN_NUMER+esp]   ; load the numerator

									fabs

								rem_large_loop:

									fcom

									fnstsw  ax

									and     eax, 00100h

									jnz 	rem_no_sub

									fsub	st, st(1)

								rem_no_sub:

									fxch

									fmul	qword ptr half

									fxch

									sub	ecx, 1			; decrement the loop counter

									jnz 	rem_large_loop

									mov     ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)

									fstp	tbyte ptr[esp + FPREM_MAIN_NUMER]	; save result

									fstp	st			; toss modified denom

									fld 	tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]

									fld 	tbyte ptr[big_number]	; force C2 to be set

									fprem

									fstp	st

									fld 	tbyte ptr[esp + FPREM_MAIN_NUMER]	; restore saved result


									fldcw	[esp + FPREM_MAIN_PREV_CW]	; restore caller's control word

									and     ebx, 08000h             ; keep sx

									jz  	rem_done

									fchs

									jmp  	rem_done

								remainder_hardware_ok:

								    fld     tbyte ptr [FPREM_MAIN_DENOM+esp]   ; load the denominator

								    fld     tbyte ptr [FPREM_MAIN_NUMER+esp]   ; load the numerator

									fprem                           ; and finally do a remainder

								; prem_main_routine end

								rem_done:

									test	edx, 03h

									jz  	rem_exit

									fnstsw	[esp + FPREM_MAIN_FPREM_SW]	; save Q0 Q1 and Q2

									test	edx, 01h

									jz  	do_not_de_scale

								; De-scale the result. Go to pc=80 to prevent from fmul

								; from user precision (fprem does not round the result).

									fnstcw	[esp + FPREM_MAIN_PREV_CW]	; save callers control word

									mov 	eax, [esp + FPREM_MAIN_PREV_CW]

									or  	eax, 0300h		; pc = 80

									mov 	[esp + FPREM_MAIN_PATCH_CW], eax

									fldcw	[esp + FPREM_MAIN_PATCH_CW]

									fmul	qword ptr one_shr_64

									fldcw	[esp + FPREM_MAIN_PREV_CW]	; restore callers CW

								do_not_de_scale:

									mov	eax, [esp + FPREM_MAIN_FPREM_SW]

									fxch

									fstp	st

									fld 	tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]

									fxch

									and 	eax, 04300h		; restore saved Q0, Q1, Q2

									sub 	esp, ENV_SIZE

									fnstenv	[esp]

									and 	[esp].STATUS_WORD, 0bcffh

									or  	[esp].STATUS_WORD, eax

									fldenv	[esp]

									add 	esp, ENV_SIZE

								rem_exit:

									pop 	ecx

									pop 	ebx

									pop 	eax

									ret

								_fprem_common	ENDP


								    PUBLIC  _adj_fprem

								_adj_fprem	PROC	NEAR

									push	edx

								    sub     esp, FPREM_STACK_SIZE

								    fstp    tbyte ptr [FPREM_NUMER+esp]

								    fstp    tbyte ptr [FPREM_DENOM+esp]

									xor 	edx, edx

								; prem_main_routine begin

								    mov     eax,[FPREM_DENOM+6+esp]       ; exponent and high 16 bits of mantissa

								    test    eax,07fff0000h          ; check for denormal

								    jz      fprem_denormal

									call	_fprem_common

									add 	esp, FPREM_STACK_SIZE

									pop 	edx

									ret


								fprem_denormal:

								    fld     tbyte ptr [FPREM_DENOM+esp]   ; load the denominator

								    fld     tbyte ptr [FPREM_NUMER+esp]   ; load the numerator

								    mov     eax, [FPREM_DENOM+esp]        ; test for whole mantissa == 0

								    or      eax, [FPREM_DENOM+4+esp]      ; test for whole mantissa == 0

								    jz      remainder_hardware_ok_l ; denominator is zero

									fxch

									fstp	tbyte ptr[esp + FPREM_DENOM_SAVE]	; save org denominator

									fld 	tbyte ptr[esp + FPREM_DENOM]

									fxch

									or  	edx, 02h

								;

								; For this we need pc=80.  Also, mask exceptions so we don't take any

								; denormal operand exceptions.  It is guaranteed that the descaling

								; later on will take underflow, which is what the hardware would have done

								; on a normal fprem.

								;

								    fnstcw  [FPREM_PREV_CW+esp]         ; save caller's control word

								    mov     eax, [FPREM_PREV_CW+esp]

								    or      eax, 0033fh             	; mask exceptions, pc=80

								    mov     [FPREM_PATCH_CW+esp], eax

								    fldcw   [FPREM_PATCH_CW+esp]        ; mask exceptions & pc=80


								; The denominator is a denormal.  For most numerators, scale both numerator

								; and denominator to get rid of denormals.  Then execute the common code

								; with the flag set to indicate that the result must be de-scaled.

								; For large numerators this won't work because the scaling would cause

								; overflow.  In this case we know the numerator is large, the denominator

								; is small (denormal), so the exponent difference is also large.  This means

								; the rem_large code will be used and this code depends on the difference

								; in exponents modulo 64.  Adding 64 to the denominators exponent

								; doesn't change the modulo 64 difference.  So we can scale the denominator

								; by 64, making it not denormal, and this won't effect the result.

								;

								; To start with, figure out if numerator is large


									mov 	eax, [esp + FPREM_NUMER + 8]	; load numerator exponent

									and 	eax, 7fffh		; isolate numerator exponent

									cmp 	eax, 7fbeh		; compare Nexp to Maxexp-64

									ja  	big_numer_rem_de	; jif big numerator


								; So the numerator is not large scale both numerator and denominator


									or  	edx, 1			; edx = 1, if denormal extended divisor

									fmul	qword ptr one_shl_64	; make numerator not denormal

									fstp	tbyte ptr[esp + FPREM_NUMER]

									fmul	qword ptr one_shl_64	; make denominator not denormal

									fstp	tbyte ptr[esp + FPREM_DENOM]

									jmp 	scaling_done


								; The numerator is large.  Scale only the denominator, which will not

								; change the result which we know will be partial.  Set the scale flag

								; to false.

								big_numer_rem_de:

								; We must do this with pc=80 to avoid rounding to single/double.

								; In this case we do not mask exceptions so that we will take

								; denormal operand, as would the hardware.

									fnstcw  [FPREM_PREV_CW+esp]				; save caller's control word

									mov     eax, [FPREM_PREV_CW+esp]

									or      eax, 00300h             		; pc=80

									mov     [FPREM_PATCH_CW+esp], eax

									fldcw   [FPREM_PATCH_CW+esp]			; pc=80


									fstp	st			; Toss numerator

									fmul	qword ptr one_shl_64	; make denominator not denormal

									fstp	tbyte ptr[esp + FPREM_DENOM]


								; Restore the control word which was fiddled to scale at 80-bit precision.

								; Then call the common code.

								scaling_done:

									fldcw	[esp + FPREM_PREV_CW] 	; restore callers control word

									call	_fprem_common

									add 	esp, FPREM_STACK_SIZE

									pop 	edx

									ret


								remainder_hardware_ok_l:

								    fprem              		; and finally do a remainder


								    add     esp, FPREM_STACK_SIZE

									pop 	edx

								    ret


								_adj_fprem	ENDP


								;

								; FPREM1 code begins here

								;


								_fprem1_common	PROC	NEAR


									push	eax

									push	ebx

									push	ecx

								    mov     eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa

								    xor     eax, FPREM_ONESMASK           ; invert bits that have to be one

								    test    eax, FPREM_ONESMASK           ; check bits that have to be one

									jnz     remainder1_hardware_ok

								    shr     eax, 11

								    and     eax, 0fh

								    cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)

								    jz      remainder1_hardware_ok


								; The denominator has the bit pattern. Weed out the funny cases like NaNs

								; before applying the software version. Our caller guarantees that the

								; denominator is not a denormal. Here we check for:

								;	denominator	inf, NaN, unnormal

								;	numerator	inf, NaN, unnormal, denormal


									mov     eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa

								    and     eax, 07fff0000h	        ; mask the exponent only

								    cmp     eax, 07fff0000h         ; check for INF or NaN

									je  	remainder1_hardware_ok

									mov     eax, [FPREM_MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa

								    and     eax, 07fff0000h		; mask the exponent only

									jz  	remainder1_hardware_ok	; jif numerator denormal

								    cmp     eax, 07fff0000h         ; check for INF or NaN

									je  	remainder1_hardware_ok

									mov 	eax, [esp + FPREM_MAIN_NUMER + 4]	; high mantissa bits - numerator

									add 	eax, eax		; set carry if explicit bit set

									jnz 	remainder1_hardware_ok	; jmp if numerator is unnormal

									mov 	eax, [esp + FPREM_MAIN_DENOM + 4] ; high mantissa bits - denominator

									add 	eax, eax		; set carry if explicit bit set

									jnz 	remainder1_hardware_ok	; jmp if denominator is unnormal


								rem1_patch:

								    mov     eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)

								    and     eax, 07fffh              ; clear sy

								    add     eax, 63                  ; evaluate ey + 63

								    mov     ebx, [FPREM_MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)

								    and     ebx, 07fffh              ; clear sx

								    sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)

								    ja      rem1_large	 	; if ex > ey + 63, case of large arguments

								rem1_patch_loop:

									mov     eax, [FPREM_MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)

									and     eax, 07fffh		; clear sy

									add 	eax, 10			; evaluate ey + 10

									mov     ebx, [FPREM_MAIN_NUMER+8+esp]	; sign and exponent of x (numerator)

									and     ebx, 07fffh		; clear sx

									sub 	ebx, eax		; evaluate the exponent difference (ex - ey)

									js  	remainder1_hardware_ok	; safe if ey + 10 > ex

									fld     tbyte ptr [FPREM_MAIN_NUMER+esp]   ; load the numerator

									mov     eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)

								    mov     ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)

									and     ebx, 07fffh             ; clear sx

									mov		ecx, ebx

									sub		ebx, eax

									and		ebx, 07h

									or		ebx, 04h

									sub		ecx, ebx

									mov		ebx, eax

									and     ebx, 08000h				; keep sy

									or		ecx, ebx				; merge the sign of y

									mov		dword ptr [FPREM_MAIN_DENOM+8+esp], ecx

									fld     tbyte ptr [FPREM_MAIN_DENOM+esp]   ; load the shifted denominator

									mov     dword ptr [FPREM_MAIN_DENOM+8+esp], eax	; restore the initial denominator

									fxch

									fprem				; this rem is safe

									fstp	tbyte ptr [FPREM_MAIN_NUMER+esp]	; update the numerator

									fstp    st(0)                   ; pop the stack

									jmp     rem1_patch_loop

								rem1_large:

									test	ebx, 02h		; is denominator already saved

									jnz 	already_saved1

									fld 	tbyte ptr[esp + FPREM_MAIN_DENOM]

									fstp	tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]	; save denominator

								already_saved1:

									; Save user's precision control and institute 80.  The fp ops in

									; rem1_large_loop must not round to user's precision (if it is less

									; than 80) because the hardware would not have done so.  We are

									; aping the hardware here, which is all extended.


									fnstcw	[esp+FPREM_MAIN_PREV_CW]	; save caller's control word

									mov 	eax, dword ptr[esp + FPREM_MAIN_PREV_CW]

									or  	eax, 033fh		; mask exceptions, pc=80

									mov 	[esp + FPREM_MAIN_PATCH_CW], eax

									fldcw	[esp + FPREM_MAIN_PATCH_CW]


								    mov     eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)

								    and     eax, 07fffh             ; clear sy

								    mov     ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)

								    and     ebx, 07fffh             ; clear sx

									sub 	ebx, eax		; evaluate the exponent difference

									and 	ebx, 03fh

									or  	ebx, 020h

									add 	ebx, 1

									mov 	ecx, ebx

									mov     eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)

									mov     ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)

								    and     ebx, 07fffh             ; clear sx

								    and     eax, 08000h             ; keep sy

								    or      ebx, eax                ; merge the sign of y

								    mov     dword ptr[FPREM_MAIN_DENOM+8+esp], ebx	; make ey equal to ex (scaled denominator)

								    fld     tbyte ptr [FPREM_MAIN_DENOM+esp]   ; load the scaled denominator

									fabs

								    fld     tbyte ptr [FPREM_MAIN_NUMER+esp]   ; load the numerator

									fabs

								rem1_large_loop:

									fcom

									fnstsw  ax

									and     eax, 00100h

									jnz	rem1_no_sub

									fsub	st, st(1)

								rem1_no_sub:

									fxch

									fmul	qword ptr half

									fxch

									sub 	ecx, 1			; decrement the loop counter

									jnz 	rem1_large_loop

									mov     ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)

									fstp	tbyte ptr[esp + FPREM_MAIN_NUMER]	; save result

									fstp	st			; toss modified denom

									fld 	tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]

									fld 	tbyte ptr[big_number]	; force C2 to be set

									fprem1

									fstp	st

									fld 	tbyte ptr[esp + FPREM_MAIN_NUMER]	; restore saved result


									fldcw	[esp + FPREM_MAIN_PREV_CW]	; restore caller's control word

									and     ebx, 08000h             ; keep sx

									jz  	rem1_done

									fchs

									jmp 	rem1_done

								remainder1_hardware_ok:

								    fld     tbyte ptr [FPREM_MAIN_DENOM+esp]   ; load the denominator

								    fld     tbyte ptr [FPREM_MAIN_NUMER+esp]   ; load the numerator

									fprem1                           ; and finally do a remainder

								; prem1_main_routine end

								rem1_done:

									test	edx, 03h

									jz  	rem1_exit

									fnstsw	[esp + FPREM_MAIN_FPREM_SW]	; save Q0 Q1 and Q2

									test	edx, 01h

									jz  	do_not_de_scale1

								; De-scale the result. Go to pc=80 to prevent from fmul

								; from user precision (fprem does not round the result).

									fnstcw	[esp + FPREM_MAIN_PREV_CW]	; save callers control word

									mov 	eax, [esp + FPREM_MAIN_PREV_CW]

									or  	eax, 0300h		; pc = 80

									mov 	[esp + FPREM_MAIN_PATCH_CW], eax

									fldcw	[esp + FPREM_MAIN_PATCH_CW]

									fmul	qword ptr one_shr_64

									fldcw	[esp + FPREM_MAIN_PREV_CW]	; restore callers CW

								do_not_de_scale1:

									mov	eax, [esp + FPREM_MAIN_FPREM_SW]

									fxch

									fstp	st

									fld 	tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]

									fxch

									and 	eax, 04300h		; restore saved Q0, Q1, Q2

									sub 	esp, ENV_SIZE

									fnstenv	[esp]

									and 	[esp].STATUS_WORD, 0bcffh

									or  	[esp].STATUS_WORD, eax

									fldenv	[esp]

									add 	esp, ENV_SIZE

								rem1_exit:

									pop	ecx

									pop	ebx

									pop	eax

									ret

								_fprem1_common	ENDP


									PUBLIC	_adj_fprem1

								_adj_fprem1	PROC	NEAR


									push	edx

								    sub     esp, FPREM_STACK_SIZE

								    fstp    tbyte ptr [FPREM_NUMER+esp]

								    fstp    tbyte ptr [FPREM_DENOM+esp]

									mov 	edx, 0

								; prem1_main_routine begin

								    mov     eax,[FPREM_DENOM+6+esp]       ; exponent and high 16 bits of mantissa

								    test    eax,07fff0000h          ; check for denormal

								    jz      denormal1

									call	_fprem1_common

									add 	esp, FPREM_STACK_SIZE

									pop 	edx

									ret


								denormal1:

								    fld     tbyte ptr [FPREM_DENOM+esp]   ; load the denominator

								    fld     tbyte ptr [FPREM_NUMER+esp]   ; load the numerator

								    mov     eax, [FPREM_DENOM+esp]        ; test for whole mantissa == 0

								    or      eax, [FPREM_DENOM+4+esp]      ; test for whole mantissa == 0

								    jz      remainder1_hardware_ok_l ; denominator is zero

									fxch

									fstp	tbyte ptr[esp + FPREM_DENOM_SAVE]	; save org denominator

									fld 	tbyte ptr[esp + FPREM_DENOM]

									fxch

									or  	edx, 02h

								;

								; For this we need pc=80.  Also, mask exceptions so we don't take any

								; denormal operand exceptions.  It is guaranteed that the descaling

								; later on will take underflow, which is what the hardware would have done

								; on a normal fprem.

								;

								    fnstcw  [FPREM_PREV_CW+esp]         ; save caller's control word

								    mov     eax, [FPREM_PREV_CW+esp]

								    or      eax, 0033fh             	; mask exceptions, pc=80

								    mov     [FPREM_PATCH_CW+esp], eax

								    fldcw   [FPREM_PATCH_CW+esp]        ; mask exceptions & pc=80


								; The denominator is a denormal.  For most numerators, scale both numerator

								; and denominator to get rid of denormals.  Then execute the common code

								; with the flag set to indicate that the result must be de-scaled.

								; For large numerators this won't work because the scaling would cause

								; overflow.  In this case we know the numerator is large, the denominator

								; is small (denormal), so the exponent difference is also large.  This means

								; the rem1_large code will be used and this code depends on the difference

								; in exponents modulo 64.  Adding 64 to the denominators exponent

								; doesn't change the modulo 64 difference.  So we can scale the denominator

								; by 64, making it not denormal, and this won't effect the result.

								;

								; To start with, figure out if numerator is large


									mov 	eax, [esp + FPREM_NUMER + 8]	; load numerator exponent

									and 	eax, 7fffh		; isolate numerator exponent

									cmp 	eax, 7fbeh		; compare Nexp to Maxexp-64

									ja  	big_numer_rem1_de	; jif big numerator


								; So the numerator is not large scale both numerator and denominator


									or  	edx, 1			; edx = 1, if denormal extended divisor

									fmul	qword ptr one_shl_64	; make numerator not denormal

									fstp	tbyte ptr[esp + FPREM_NUMER]

									fmul	qword ptr one_shl_64	; make denominator not denormal

									fstp	tbyte ptr[esp + FPREM_DENOM]

									jmp 	scaling_done1


								; The numerator is large.  Scale only the denominator, which will not

								; change the result which we know will be partial.  Set the scale flag

								; to false.

								big_numer_rem1_de:

								; We must do this with pc=80 to avoid rounding to single/double.

								; In this case we do not mask exceptions so that we will take

								; denormal operand, as would the hardware.

									fnstcw  [FPREM_PREV_CW+esp]			; save caller's control word

									mov     eax, [FPREM_PREV_CW+esp]

									or      eax, 00300h             	; pc=80

									mov     [FPREM_PATCH_CW+esp], eax

									fldcw   [FPREM_PATCH_CW+esp]		;  pc=80

									fstp	st							; Toss numerator

									fmul	qword ptr one_shl_64		; make denominator not denormal

									fstp	tbyte ptr[esp + FPREM_DENOM]


								; Restore the control word which was fiddled to scale at 80-bit precision.

								; Then call the common code.

								scaling_done1:

									fldcw	[esp + FPREM_PREV_CW] 	; restore callers control word

									call	_fprem1_common

									add 	esp, FPREM_STACK_SIZE

									pop 	edx

									ret


								remainder1_hardware_ok_l:

								    fprem              		; and finally do a remainder

								    add     esp, FPREM_STACK_SIZE

									pop	edx

								    ret

								_adj_fprem1	ENDP


									PUBLIC	_safe_fprem

								_safe_fprem	PROC	NEAR


								    call _adj_fprem

								    ret


								_safe_fprem	ENDP


									PUBLIC	_safe_fprem1

								_safe_fprem1	PROC	NEAR


								    call _adj_fprem1

								    ret


								_safe_fprem1	ENDP


								;;; _adj_fpatan - FPATAN FIX

								;;

								;;	Dummy entry point


									PUBLIC	_adj_fpatan

								_adj_fpatan	PROC	NEAR


									fpatan

									ret


								_adj_fpatan	ENDP


								;;; _adj_fptan - FPTAN FIX

								;;

								;;	Dummy entry point


									PUBLIC	_adj_fptan

								_adj_fptan	PROC	NEAR


									fptan

									ret


								_adj_fptan	ENDP


									end