You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1659 lines
46 KiB
1659 lines
46 KiB
title adj_fdiv - routines to compensate for incorrect Pentium FDIV
|
|
;***
|
|
;adj_fdiv - routines to compensate for incorrect Pentium FDIV
|
|
;
|
|
; Copyright (c) 1994-2001, Microsoft Corporation. All rights reserved.
|
|
;
|
|
;Purpose:
|
|
; Workarounds to correct for broken FDIV
|
|
;
|
|
;Revision History:
|
|
;
|
|
; 12/06/94 Jamie MacCalman
|
|
; initial version, based on Intel fix
|
|
; 12/09/94 Jamie MacCalman
|
|
; added _adj_fpremX & _safe_fdivX entry points
|
|
; 12/13/94 Jamie MacCalman
|
|
; upgraded to V.3 of Intel's workarounds
|
|
; 12/19/94 Jamie MacCalman
|
|
; upgraded to V.4 of Intel's workarounds
|
|
; 12/27/94 Jamie MacCalman
|
|
; upgraded to V.5 (aka "V1.0") of Intel's workarounds
|
|
; 1/13/95 Jamie MacCalman
|
|
; added underscores to fdivp_sti_st & fdivrp_sti_st for ANSI conformance
|
|
;
|
|
; The following code is a PRELIMINARY IMPLEMENTATION of a
|
|
; software patch for the floating point divide instructions.
|
|
;
|
|
;
|
|
|
|
|
|
include cruntime.inc
|
|
include mrt386.inc
|
|
include elem87.inc
|
|
|
|
;
|
|
; Stack variables for divide routines.
|
|
;
|
|
|
|
DENOM EQU 0
|
|
NUMER EQU 12
|
|
PREV_CW EQU 28
|
|
PATCH_CW EQU 32
|
|
|
|
DENOM_SAVE EQU 32
|
|
|
|
MAIN_DENOM EQU 4
|
|
MAIN_NUMER EQU 16
|
|
|
|
SPILL_SIZE EQU 12
|
|
MEM_OPERAND EQU 8
|
|
STACK_SIZE EQU 44
|
|
SPILL_MEM_OPERAND EQU 20
|
|
|
|
ONESMASK EQU 0e000000h
|
|
|
|
SINGLE_NAN EQU 07f800000h
|
|
DOUBLE_NAN EQU 07ff00000h
|
|
|
|
ILLEGAL_OPC EQU 6
|
|
|
|
;
|
|
; FPREM constants
|
|
;
|
|
|
|
FPREM_FLT_SIZE EQU 12
|
|
FPREM_DENOM EQU 0
|
|
FPREM_DENOM_SAVE EQU FPREM_DENOM + FPREM_FLT_SIZE
|
|
FPREM_NUMER EQU FPREM_DENOM_SAVE + FPREM_FLT_SIZE
|
|
FPREM_PREV_CW EQU FPREM_NUMER + FPREM_FLT_SIZE
|
|
FPREM_PATCH_CW EQU FPREM_PREV_CW + 4
|
|
FPREM_SW EQU FPREM_PATCH_CW + 4
|
|
FPREM_STACK_SIZE EQU FPREM_SW + 4
|
|
FPREM_RET_SIZE EQU 4
|
|
FPREM_PUSH_SIZE EQU 4
|
|
|
|
FPREM_MAIN_FUDGE EQU FPREM_RET_SIZE + FPREM_PUSH_SIZE + FPREM_PUSH_SIZE + FPREM_PUSH_SIZE
|
|
|
|
FPREM_MAIN_DENOM EQU FPREM_DENOM + FPREM_MAIN_FUDGE
|
|
FPREM_MAIN_DENOM_SAVE EQU FPREM_DENOM_SAVE + FPREM_MAIN_FUDGE
|
|
FPREM_MAIN_NUMER EQU FPREM_NUMER + FPREM_MAIN_FUDGE
|
|
FPREM_MAIN_PREV_CW EQU FPREM_PREV_CW + FPREM_MAIN_FUDGE
|
|
FPREM_MAIN_PATCH_CW EQU FPREM_PATCH_CW + FPREM_MAIN_FUDGE
|
|
FPREM_MAIN_FPREM_SW EQU FPREM_SW + FPREM_MAIN_FUDGE
|
|
|
|
FPREM_ONESMASK EQU 700h
|
|
|
|
|
|
.data
|
|
|
|
fdiv_risc_table DB 0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
|
|
fdiv_scale_1 DD 03f700000h ;0.9375
|
|
fdiv_scale_2 DD 03f880000h ;1.0625
|
|
one_shl_63 DD 05f000000h
|
|
|
|
fprem_risc_table DB 0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
|
|
fprem_scale DB 0, 0, 0, 0, 0, 0, 0eeh, 03fh
|
|
one_shl_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 043h
|
|
one_shr_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 03bh
|
|
one DB 0, 0, 0, 0, 0, 0, 0f0h, 03fh
|
|
half DB 0, 0, 0, 0, 0, 0, 0e0h, 03fh
|
|
big_number DB 0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh
|
|
|
|
ifdef DEBUG
|
|
public fpcw
|
|
public fpsw
|
|
fpcw dw 0
|
|
fpsw dw 0
|
|
endif
|
|
|
|
FPU_STATE STRUC
|
|
CONTROL_WORD DW ?
|
|
reserved_1 DW ?
|
|
STATUS_WORD DD ?
|
|
TAG_WORD DW ?
|
|
reserved_3 DW ?
|
|
IP_OFFSET DD ?
|
|
CS_SLCT DW ?
|
|
OPCODE DW ?
|
|
DATA_OFFSET DD ?
|
|
OPERAND_SLCT DW ?
|
|
reserved_4 DW ?
|
|
FPU_STATE ENDS
|
|
|
|
ENV_SIZE EQU 28
|
|
|
|
|
|
|
|
dispatch_table DD offset FLAT:label0
|
|
DD offset FLAT:label1
|
|
DD offset FLAT:label2
|
|
DD offset FLAT:label3
|
|
DD offset FLAT:label4
|
|
DD offset FLAT:label5
|
|
DD offset FLAT:label6
|
|
DD offset FLAT:label7
|
|
DD offset FLAT:label8
|
|
DD offset FLAT:label9
|
|
DD offset FLAT:label10
|
|
DD offset FLAT:label11
|
|
DD offset FLAT:label12
|
|
DD offset FLAT:label13
|
|
DD offset FLAT:label14
|
|
DD offset FLAT:label15
|
|
DD offset FLAT:label16
|
|
DD offset FLAT:label17
|
|
DD offset FLAT:label18
|
|
DD offset FLAT:label19
|
|
DD offset FLAT:label20
|
|
DD offset FLAT:label21
|
|
DD offset FLAT:label22
|
|
DD offset FLAT:label23
|
|
DD offset FLAT:label24
|
|
DD offset FLAT:label25
|
|
DD offset FLAT:label26
|
|
DD offset FLAT:label27
|
|
DD offset FLAT:label28
|
|
DD offset FLAT:label29
|
|
DD offset FLAT:label30
|
|
DD offset FLAT:label31
|
|
DD offset FLAT:label32
|
|
DD offset FLAT:label33
|
|
DD offset FLAT:label34
|
|
DD offset FLAT:label35
|
|
DD offset FLAT:label36
|
|
DD offset FLAT:label37
|
|
DD offset FLAT:label38
|
|
DD offset FLAT:label39
|
|
DD offset FLAT:label40
|
|
DD offset FLAT:label41
|
|
DD offset FLAT:label42
|
|
DD offset FLAT:label43
|
|
DD offset FLAT:label44
|
|
DD offset FLAT:label45
|
|
DD offset FLAT:label46
|
|
DD offset FLAT:label47
|
|
DD offset FLAT:label48
|
|
DD offset FLAT:label49
|
|
DD offset FLAT:label50
|
|
DD offset FLAT:label51
|
|
DD offset FLAT:label52
|
|
DD offset FLAT:label53
|
|
DD offset FLAT:label54
|
|
DD offset FLAT:label55
|
|
DD offset FLAT:label56
|
|
DD offset FLAT:label57
|
|
DD offset FLAT:label58
|
|
DD offset FLAT:label59
|
|
DD offset FLAT:label60
|
|
DD offset FLAT:label61
|
|
DD offset FLAT:label62
|
|
DD offset FLAT:label63
|
|
|
|
|
|
fpcw dw 0
|
|
|
|
|
|
|
|
CODESEG
|
|
|
|
|
|
;
|
|
; PRELIMINARY VERSION for register-register divides.
|
|
;
|
|
|
|
|
|
; In this implementation the
|
|
; fdiv_main_routine is called,
|
|
; therefore all the stack frame
|
|
; locations are adjusted for the
|
|
; return pointer.
|
|
|
|
fdiv_main_routine PROC NEAR
|
|
|
|
fld tbyte ptr [esp+MAIN_NUMER] ; load the numerator
|
|
fld tbyte ptr [esp+MAIN_DENOM] ; load the denominator
|
|
retry:
|
|
|
|
; The following three lines test for denormals and zeros.
|
|
; A denormal or zero has a 0 in the explicit digit to the left of the
|
|
; binary point. Since that bit is the high bit of the word, adding
|
|
; it to itself will produce a carry if and only if the number is not
|
|
; denormal or zero.
|
|
;
|
|
mov eax, [esp+MAIN_DENOM+4] ; get mantissa bits 32-64
|
|
add eax,eax ; shift the one's bit onto carry
|
|
jnc denormal ; if no carry, we're denormal
|
|
|
|
; The following three lines test the three bits after the four bit
|
|
; pattern (1,4,7,a,d). If these three bits are not all one, then
|
|
; the denominator cannot expose the flaw. This condition is tested by
|
|
; inverting the bits and testing that all are equal to zero afterward.
|
|
|
|
xor eax, ONESMASK ; invert the bits that must be ones
|
|
test eax, ONESMASK ; and make sure they are all ones
|
|
jz scale_if_needed ; if all are one scale numbers
|
|
fdivp st(1), st ; use of hardware is OK.
|
|
ret
|
|
|
|
;
|
|
; Now we test the four bits for one of the five patterns.
|
|
;
|
|
scale_if_needed:
|
|
shr eax, 28 ; keep first 4 bits after point
|
|
cmp byte ptr fdiv_risc_table[eax], 0 ; check for (1,4,7,a,d)
|
|
jnz divide_scaled ; are in potential problem area
|
|
fdivp st(1), st ; use of hardware is OK.
|
|
ret
|
|
|
|
divide_scaled:
|
|
mov eax, [esp + MAIN_DENOM+8] ; test denominator exponent
|
|
and eax, 07fffh ; if pseudodenormal ensure that only
|
|
jz invalid_denom ; invalid exception flag is set
|
|
cmp eax, 07fffh ; if NaN or infinity ensure that only
|
|
je invalid_denom ; invalid exception flag is set
|
|
;
|
|
; The following six lines turn off exceptions and set the
|
|
; precision control to 80 bits. The former is necessary to
|
|
; force any traps to be taken at the divide instead of the scaling
|
|
; code. The latter is necessary in order to get full precision for
|
|
; codes with incoming 32 and 64 bit precision settings. If
|
|
; it can be guaranteed that before reaching this point, the underflow
|
|
; exception is masked and the precision control is at 80 bits, these
|
|
; six lines can be omitted.
|
|
;
|
|
fnstcw [esp+PREV_CW] ; save caller's control word
|
|
mov eax, [esp+PREV_CW]
|
|
or eax, 033fh ; mask exceptions, pc=80
|
|
and eax, 0f3ffh ; set rounding mode to nearest
|
|
mov [esp+PATCH_CW], eax
|
|
fldcw [esp+PATCH_CW] ; mask exceptions & pc=80
|
|
|
|
; The following lines check the numerator exponent before scaling.
|
|
; This in order to prevent undeflow when scaling the numerator,
|
|
; which will cause a denormal exception flag to be set when the
|
|
; actual divide is preformed. This flag would not have been set
|
|
; normally. If there is a risk of underflow, the scale factor is
|
|
; 17/16 instead of 15/16.
|
|
;
|
|
mov eax, [esp+MAIN_NUMER+8] ; test numerator exponent
|
|
and eax, 07fffh
|
|
cmp eax, 00001h
|
|
je small_numer
|
|
|
|
fmul fdiv_scale_1 ; scale denominator by 15/16
|
|
fxch
|
|
fmul fdiv_scale_1 ; scale numerator by 15/16
|
|
fxch
|
|
|
|
;
|
|
; The next line restores the users control word. If the incoming
|
|
; control word had the underflow exception masked and precision
|
|
; control set to 80 bits, this line can be omitted.
|
|
;
|
|
|
|
fldcw [esp+PREV_CW] ; restore caller's control word
|
|
fdivp st(1), st ; use of hardware is OK.
|
|
ret
|
|
|
|
small_numer:
|
|
fmul fdiv_scale_2 ; scale denominator by 17/16
|
|
fxch
|
|
fmul fdiv_scale_2 ; scale numerator by 17/16
|
|
fxch
|
|
|
|
;
|
|
; The next line restores the users control word. If the incoming
|
|
; control word had the underflow exception masked and precision
|
|
; control set to 80 bits, this line can be omitted.
|
|
;
|
|
|
|
fldcw [esp+PREV_CW] ; restore caller's control word
|
|
fdivp st(1), st ; use of hardware is OK.
|
|
ret
|
|
|
|
denormal:
|
|
mov eax, [esp+MAIN_DENOM] ; test for whole mantissa == 0
|
|
or eax, [esp+MAIN_DENOM+4] ; test for whole mantissa == 0
|
|
jnz denormal_divide_scaled ; denominator is not zero
|
|
invalid_denom: ; zero or invalid denominator
|
|
fdivp st(1), st ; use of hardware is OK.
|
|
ret
|
|
|
|
denormal_divide_scaled:
|
|
mov eax, [esp + MAIN_DENOM + 8] ; get exponent
|
|
and eax, 07fffh ; check for zero exponent
|
|
jnz invalid_denom ;
|
|
;
|
|
; The following six lines turn off exceptions and set the
|
|
; precision control to 80 bits. The former is necessary to
|
|
; force any traps to be taken at the divide instead of the scaling
|
|
; code. The latter is necessary in order to get full precision for
|
|
; codes with incoming 32 and 64 bit precision settings. If
|
|
; it can be guaranteed that before reaching this point, the underflow
|
|
; exception is masked and the precision control is at 80 bits, these
|
|
; six lines can be omitted.
|
|
;
|
|
|
|
fnstcw [esp+PREV_CW] ; save caller's control word
|
|
mov eax, [esp+PREV_CW]
|
|
or eax, 033fh ; mask exceptions, pc=80
|
|
and eax, 0f3ffh ; set rounding mode to nearest
|
|
mov [esp+PATCH_CW], eax
|
|
fldcw [esp+PATCH_CW] ; mask exceptions & pc=80
|
|
|
|
mov eax, [esp + MAIN_NUMER +8] ; test numerator exponent
|
|
and eax, 07fffh ; check for denormal numerator
|
|
je denormal_numer
|
|
cmp eax, 07fffh ; NaN or infinity
|
|
je invalid_numer
|
|
mov eax, [esp + MAIN_NUMER + 4] ; get bits 32..63 of mantissa
|
|
add eax, eax ; shift the first bit into carry
|
|
jnc invalid_numer ; if there is no carry, we have an
|
|
; invalid numer
|
|
jmp numer_ok
|
|
|
|
denormal_numer:
|
|
mov eax, [esp + MAIN_NUMER + 4] ; get bits 32..63 of mantissa
|
|
add eax, eax ; shift the first bit into carry
|
|
jc invalid_numer ; if there is a carry, we have an
|
|
; invalid numer
|
|
|
|
numer_ok:
|
|
fxch
|
|
fstp st ; pop numerator
|
|
fld st ; make copy of denominator
|
|
fmul dword ptr[one_shl_63] ; make denominator not denormal
|
|
fstp tbyte ptr [esp+MAIN_DENOM] ; save modified denominator
|
|
fld tbyte ptr [esp+MAIN_NUMER] ; load numerator
|
|
fxch ; restore proper order
|
|
fwait
|
|
|
|
; The next line restores the users control word. If the incoming
|
|
; control word had the underflow exception masked and precision
|
|
; control set to 80 bits, this line can be omitted.
|
|
;
|
|
|
|
fldcw [esp+PREV_CW] ; restore caller's control word
|
|
jmp retry ; start the whole thing over
|
|
|
|
invalid_numer:
|
|
;
|
|
; The next line restores the users control word. If the incoming
|
|
; control word had the underflow exception masked and precision
|
|
; control set to 80 bits, this line can be omitted.
|
|
;
|
|
fldcw [esp + PREV_CW]
|
|
fdivp st(1), st ; use of hardware is OK.
|
|
ret
|
|
|
|
fdiv_main_routine ENDP
|
|
|
|
fdivr_st MACRO reg_index, reg_index_minus1
|
|
fstp tbyte ptr [esp+DENOM]
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
fstp tbyte ptr [esp+NUMER]
|
|
call fdiv_main_routine
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
fld tbyte ptr [esp+NUMER]
|
|
fxch st(reg_index)
|
|
add esp, STACK_SIZE
|
|
ENDM
|
|
|
|
fdivr_sti MACRO reg_index, reg_index_minus1
|
|
fstp tbyte ptr [esp+NUMER]
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
fstp tbyte ptr [esp+DENOM]
|
|
call fdiv_main_routine
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
fld tbyte ptr [esp+NUMER]
|
|
add esp, STACK_SIZE
|
|
ENDM
|
|
|
|
fdivrp_sti MACRO reg_index, reg_index_minus1
|
|
fstp tbyte ptr [esp+NUMER]
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
fstp tbyte ptr [esp+DENOM]
|
|
call fdiv_main_routine
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
add esp, STACK_SIZE
|
|
ENDM
|
|
|
|
fdiv_st MACRO reg_index, reg_index_minus1
|
|
fstp tbyte ptr [esp+NUMER]
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
fld st
|
|
fstp tbyte ptr [esp+DENOM]
|
|
fstp tbyte ptr [esp+DENOM_SAVE] ; save original denom,
|
|
call fdiv_main_routine
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
fld tbyte ptr [esp+DENOM_SAVE]
|
|
fxch st(reg_index)
|
|
add esp, STACK_SIZE
|
|
ENDM
|
|
|
|
fdiv_sti MACRO reg_index, reg_index_minus1
|
|
fxch st(reg_index)
|
|
fstp tbyte ptr [esp+NUMER]
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
fld st
|
|
fstp tbyte ptr [esp+DENOM]
|
|
fstp tbyte ptr [esp+DENOM_SAVE] ; save original denom,
|
|
call fdiv_main_routine
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
fld tbyte ptr [esp+DENOM_SAVE]
|
|
add esp, STACK_SIZE
|
|
ENDM
|
|
|
|
fdivp_sti MACRO reg_index, reg_index_minus1
|
|
fstp tbyte ptr [esp+DENOM]
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
fstp tbyte ptr [esp+NUMER]
|
|
call fdiv_main_routine
|
|
IF reg_index_minus1 GE 1
|
|
fxch st(reg_index_minus1)
|
|
ENDIF
|
|
add esp, STACK_SIZE
|
|
ENDM
|
|
|
|
|
|
public _adj_fdiv_r
|
|
_adj_fdiv_r PROC NEAR
|
|
|
|
sub esp, STACK_SIZE ; added back at end of fdiv_x macros
|
|
and eax, 0000003FH ; upper 26 bits could be anything
|
|
jmp dword ptr dispatch_table[eax*4]
|
|
|
|
|
|
|
|
label0::
|
|
fdiv st,st(0) ; D8 F0 FDIV ST,ST(0)
|
|
add esp, STACK_SIZE
|
|
ret
|
|
label1::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label2::
|
|
fdivr st,st(0) ; D8 F8 FDIVR ST,ST(0)
|
|
add esp, STACK_SIZE
|
|
ret
|
|
label3::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label4::
|
|
fdiv st(0),st ; DC F8/D8 F0 FDIV ST(0),ST
|
|
add esp, STACK_SIZE
|
|
ret
|
|
label5::
|
|
fdivp st(0),st ; DE F8 FDIVP ST(0),ST
|
|
add esp, STACK_SIZE
|
|
ret
|
|
label6::
|
|
fdivr st(0),st ; DC F0/DE F0 FDIVR ST(0),ST
|
|
add esp, STACK_SIZE
|
|
ret
|
|
label7::
|
|
fdivrp st(0),st ; DE F0 FDIVRP ST(0),ST
|
|
add esp, STACK_SIZE
|
|
ret
|
|
label8::
|
|
fdiv_st 1,0
|
|
ret
|
|
label9::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label10::
|
|
fdivr_st 1,0
|
|
ret
|
|
label11::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label12::
|
|
fdiv_sti 1,0
|
|
ret
|
|
label13::
|
|
fdivp_sti 1,0
|
|
ret
|
|
label14::
|
|
fdivr_sti 1,0
|
|
ret
|
|
label15::
|
|
fdivrp_sti 1,0
|
|
ret
|
|
label16::
|
|
fdiv_st 2,1
|
|
ret
|
|
label17::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label18::
|
|
fdivr_st 2,1
|
|
ret
|
|
label19::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label20::
|
|
fdiv_sti 2,1
|
|
ret
|
|
label21::
|
|
fdivp_sti 2,1
|
|
ret
|
|
label22::
|
|
fdivr_sti 2,1
|
|
ret
|
|
label23::
|
|
fdivrp_sti 2,1
|
|
ret
|
|
label24::
|
|
fdiv_st 3,2
|
|
ret
|
|
label25::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label26::
|
|
fdivr_st 3,2
|
|
ret
|
|
label27::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label28::
|
|
fdiv_sti 3,2
|
|
ret
|
|
label29::
|
|
fdivp_sti 3,2
|
|
ret
|
|
label30::
|
|
fdivr_sti 3,2
|
|
ret
|
|
label31::
|
|
fdivrp_sti 3,2
|
|
ret
|
|
label32::
|
|
fdiv_st 4,3
|
|
ret
|
|
label33::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label34::
|
|
fdivr_st 4,3
|
|
ret
|
|
label35::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label36::
|
|
fdiv_sti 4,3
|
|
ret
|
|
label37::
|
|
fdivp_sti 4,3
|
|
ret
|
|
label38::
|
|
fdivr_sti 4,3
|
|
ret
|
|
label39::
|
|
fdivrp_sti 4,3
|
|
ret
|
|
label40::
|
|
fdiv_st 5,4
|
|
ret
|
|
label41::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label42::
|
|
fdivr_st 5,4
|
|
ret
|
|
label43::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label44::
|
|
fdiv_sti 5,4
|
|
ret
|
|
label45::
|
|
fdivp_sti 5,4
|
|
ret
|
|
label46::
|
|
fdivr_sti 5,4
|
|
ret
|
|
label47::
|
|
fdivrp_sti 5,4
|
|
ret
|
|
label48::
|
|
fdiv_st 6,5
|
|
ret
|
|
label49::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label50::
|
|
fdivr_st 6,5
|
|
ret
|
|
label51::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label52::
|
|
fdiv_sti 6,5
|
|
ret
|
|
label53::
|
|
fdivp_sti 6,5
|
|
ret
|
|
label54::
|
|
fdivr_sti 6,5
|
|
ret
|
|
label55::
|
|
fdivrp_sti 6,5
|
|
ret
|
|
label56::
|
|
fdiv_st 7,6
|
|
ret
|
|
label57::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label58::
|
|
fdivr_st 7,6
|
|
ret
|
|
label59::
|
|
add esp, STACK_SIZE
|
|
int ILLEGAL_OPC
|
|
label60::
|
|
fdiv_sti 7,6
|
|
ret
|
|
label61::
|
|
fdivp_sti 7,6
|
|
ret
|
|
label62::
|
|
fdivr_sti 7,6
|
|
ret
|
|
label63::
|
|
fdivrp_sti 7,6
|
|
ret
|
|
_adj_fdiv_r ENDP
|
|
|
|
|
|
|
|
_fdivp_sti_st PROC NEAR
|
|
; for calling from mem routines
|
|
sub esp, STACK_SIZE ; added back at end of fdivp_sti macro
|
|
fdivp_sti 1, 0
|
|
ret
|
|
_fdivp_sti_st ENDP
|
|
|
|
_fdivrp_sti_st PROC NEAR
|
|
; for calling from mem routines
|
|
sub esp, STACK_SIZE ; added back at end of fdivrp_sti macro
|
|
fdivrp_sti 1, 0
|
|
ret
|
|
_fdivrp_sti_st ENDP
|
|
|
|
|
|
;;; _adj_fdiv_m32 - FDIV m32real FIX
|
|
;;
|
|
;; Input : Value of the m32real in the top of STACK
|
|
;;
|
|
;; Output: Result of FDIV in ST
|
|
|
|
PUBLIC _adj_fdiv_m32
|
|
_adj_fdiv_m32 PROC NEAR
|
|
|
|
push eax ; save eax
|
|
mov eax, [esp + MEM_OPERAND] ; check for
|
|
and eax, SINGLE_NAN ; NaN
|
|
cmp eax, SINGLE_NAN ;
|
|
je memory_divide_m32 ;
|
|
|
|
fnstsw ax ; get status word
|
|
and eax, 3800h ; get top of stack
|
|
je spill_fpstack ; is FP stack full?
|
|
fld dword ptr[esp + MEM_OPERAND] ; load m32real in ST
|
|
call _fdivp_sti_st ; do actual divide
|
|
pop eax
|
|
ret 4
|
|
spill_fpstack:
|
|
fxch
|
|
sub esp, SPILL_SIZE ; make temp space
|
|
fstp tbyte ptr[esp ] ; save user's ST(1)
|
|
fld dword ptr[esp + SPILL_MEM_OPERAND] ; load m32 real
|
|
call _fdivp_sti_st ; do actual divide
|
|
fld tbyte ptr[esp] ; restore user's ST(1)
|
|
;esp is adjusted by fdivrp fn
|
|
fxch
|
|
add esp, SPILL_SIZE
|
|
pop eax
|
|
ret 4
|
|
memory_divide_m32:
|
|
fdiv dword ptr[esp + MEM_OPERAND] ; do actual divide
|
|
pop eax
|
|
ret 4
|
|
|
|
_adj_fdiv_m32 ENDP
|
|
|
|
|
|
;;; _adj_fdiv_m64 - FDIV m64real FIX
|
|
;;
|
|
;; Input : Value of the m64real in the top of STACK
|
|
;;
|
|
;; Output: Result of FDIV in ST
|
|
|
|
PUBLIC _adj_fdiv_m64
|
|
_adj_fdiv_m64 PROC NEAR
|
|
|
|
push eax ; save eax
|
|
mov eax, [esp + MEM_OPERAND + 4] ; check for
|
|
and eax, DOUBLE_NAN ; NaN
|
|
cmp eax, DOUBLE_NAN ;
|
|
je memory_divide_m64 ;
|
|
|
|
fnstsw ax ; get status word
|
|
and eax, 3800h ; get top of stack
|
|
je spill_fpstack_m64 ; is FP stack full?
|
|
fld qword ptr[esp + MEM_OPERAND] ; load m64real in ST
|
|
call _fdivp_sti_st ; do actual divide
|
|
pop eax
|
|
ret 8
|
|
spill_fpstack_m64:
|
|
fxch
|
|
sub esp, SPILL_SIZE ; make temp space
|
|
fstp tbyte ptr[esp] ; save user's ST(1)
|
|
fld qword ptr[esp + SPILL_MEM_OPERAND] ; load m64real
|
|
call _fdivp_sti_st ; do actual divide
|
|
fld tbyte ptr[esp] ; restore user's ST(1)
|
|
;esp is adjusted by fdivrp fn
|
|
fxch
|
|
add esp, SPILL_SIZE
|
|
pop eax
|
|
ret 8
|
|
memory_divide_m64:
|
|
fdiv qword ptr[esp + MEM_OPERAND] ; do actual divide
|
|
pop eax
|
|
ret 8
|
|
|
|
_adj_fdiv_m64 ENDP
|
|
|
|
;;; _adj_fdiv_m16i - FDIV m16int FIX
|
|
;;
|
|
;; Input : Value of the m16int in the top of STACK
|
|
;;
|
|
;; Output: Result of FDIV in ST
|
|
|
|
PUBLIC _adj_fdiv_m16i
|
|
_adj_fdiv_m16i PROC NEAR
|
|
push eax ; save eax
|
|
fnstsw ax ; get status word
|
|
and eax, 3800h ; get top of stack
|
|
je spill_fpstack_m16i ; is FP stack full?
|
|
fild word ptr[esp + MEM_OPERAND] ; load m16int in ST
|
|
call _fdivp_sti_st ; do actual divide
|
|
pop eax
|
|
ret 4
|
|
spill_fpstack_m16i:
|
|
fxch
|
|
sub esp, SPILL_SIZE ; make temp space
|
|
fstp tbyte ptr[esp ] ; save user's ST(1)
|
|
fild word ptr[esp + SPILL_MEM_OPERAND] ; load m16int
|
|
call _fdivp_sti_st ; do actual divide
|
|
fld tbyte ptr[esp] ; restore user's ST(1)
|
|
;esp is adjusted by fdivrp fn
|
|
fxch
|
|
add esp, SPILL_SIZE
|
|
pop eax
|
|
ret 4
|
|
|
|
_adj_fdiv_m16i ENDP
|
|
|
|
;;; _adj_fdiv_m32i - FDIV m32int FIX
|
|
;;
|
|
;; Input : Value of the m32int in the top of STACK
|
|
;;
|
|
;; Output: Result of FDIV in ST
|
|
|
|
PUBLIC _adj_fdiv_m32i
|
|
_adj_fdiv_m32i PROC NEAR
|
|
push eax ; save eax
|
|
fnstsw ax ; get status word
|
|
and eax, 3800h ; get top of stack
|
|
je spill_fpstack_m32i ; is FP stack full?
|
|
fild dword ptr[esp + MEM_OPERAND] ; load m32int in ST
|
|
call _fdivp_sti_st ; do actual divide
|
|
pop eax
|
|
ret 4
|
|
spill_fpstack_m32i:
|
|
fxch
|
|
sub esp, SPILL_SIZE ; make temp space
|
|
fstp tbyte ptr[esp ] ; save user's ST(1)
|
|
fild dword ptr[esp + SPILL_MEM_OPERAND] ; load m32int
|
|
call _fdivp_sti_st ; do actual divide
|
|
fld tbyte ptr[esp] ; restore user's ST(1)
|
|
;esp is adjusted by fdivrp fn
|
|
fxch
|
|
add esp, SPILL_SIZE
|
|
pop eax
|
|
ret 4
|
|
|
|
_adj_fdiv_m32i ENDP
|
|
|
|
|
|
|
|
;;; _adj_fdivr_m32 - FDIVR m32real FIX
|
|
;;
|
|
;; Input : Value of the m32real in the top of STACK
|
|
;;
|
|
;; Output: Result of FDIVR in ST
|
|
|
|
PUBLIC _adj_fdivr_m32
|
|
_adj_fdivr_m32 PROC NEAR
|
|
push eax ; save eax
|
|
mov eax, [esp + MEM_OPERAND] ; check for
|
|
and eax, SINGLE_NAN ; NaN
|
|
cmp eax, SINGLE_NAN ;
|
|
je memory_divide_m32r ;
|
|
|
|
fnstsw ax ; get status word
|
|
and eax, 3800h ; get top of stack
|
|
je spill_fpstack_m32r ; is FP stack full?
|
|
fld dword ptr[esp + MEM_OPERAND] ; load m32real in ST
|
|
call _fdivrp_sti_st ; do actual divide
|
|
pop eax
|
|
ret 4
|
|
spill_fpstack_m32r:
|
|
fxch
|
|
sub esp, SPILL_SIZE ; make temp space
|
|
fstp tbyte ptr[esp ] ; save user's ST(1)
|
|
fld dword ptr[esp + SPILL_MEM_OPERAND] ; load m32 real
|
|
call _fdivrp_sti_st ; do actual divide
|
|
fld tbyte ptr[esp] ; restore user's ST(1)
|
|
;esp is adjusted by fdivp fn
|
|
fxch
|
|
add esp, SPILL_SIZE
|
|
pop eax
|
|
ret 4
|
|
memory_divide_m32r:
|
|
fdivr dword ptr[esp + MEM_OPERAND] ; do actual divide
|
|
pop eax
|
|
ret 4
|
|
|
|
_adj_fdivr_m32 ENDP
|
|
|
|
|
|
;;; _adj_fdivr_m64 - FDIVR m64real FIX
|
|
;;
|
|
;; Input : Value of the m64real in the top of STACK
|
|
;;
|
|
;; Output: Result of FDIVR in ST
|
|
|
|
PUBLIC _adj_fdivr_m64
|
|
_adj_fdivr_m64 PROC NEAR
|
|
push eax ; save eax
|
|
mov eax, [esp + MEM_OPERAND + 4] ; check for
|
|
and eax, DOUBLE_NAN ; NaN
|
|
cmp eax, DOUBLE_NAN ;
|
|
je memory_divide_m64r ;
|
|
|
|
fnstsw ax ; get status word
|
|
and eax, 3800h ; get top of stack
|
|
je spill_fpstack_m64r ; is FP stack full?
|
|
fld qword ptr[esp + MEM_OPERAND] ; load m64real in ST
|
|
call _fdivrp_sti_st ; do actual divide
|
|
pop eax
|
|
ret 8
|
|
spill_fpstack_m64r:
|
|
fxch
|
|
sub esp, SPILL_SIZE ; make temp space
|
|
fstp tbyte ptr[esp ] ; save user's ST(1)
|
|
fld qword ptr[esp + SPILL_MEM_OPERAND] ; load m64real
|
|
call _fdivrp_sti_st ; do actual divide
|
|
fld tbyte ptr[esp] ; restore user's ST(1)
|
|
;esp is adjusted by fdivp fn
|
|
fxch
|
|
add esp, SPILL_SIZE
|
|
pop eax
|
|
ret 8
|
|
memory_divide_m64r:
|
|
fdivr qword ptr[esp + MEM_OPERAND] ; do actual divide
|
|
pop eax
|
|
ret 8
|
|
|
|
_adj_fdivr_m64 ENDP
|
|
|
|
|
|
;;; _adj_fdivr_m16i - FDIVR m16int FIX
|
|
;;
|
|
;; Input : Value of the m16int in the top of STACK
|
|
;;
|
|
;; Output: Result of FDIVR in ST
|
|
|
|
PUBLIC _adj_fdivr_m16i
|
|
_adj_fdivr_m16i PROC NEAR
|
|
push eax ; save eax
|
|
fnstsw ax ; get status word
|
|
and eax, 3800h ; get top of stack
|
|
je spill_fpstack_m16ir ; is FP stack full?
|
|
fild word ptr[esp + MEM_OPERAND] ; load m16int in ST
|
|
call _fdivrp_sti_st ; do actual divide
|
|
pop eax
|
|
ret 4
|
|
spill_fpstack_m16ir:
|
|
fxch
|
|
sub esp, SPILL_SIZE ; make temp space
|
|
fstp tbyte ptr[esp ] ; save user's ST(1)
|
|
fild word ptr[esp + SPILL_MEM_OPERAND] ; load m16int
|
|
call _fdivrp_sti_st ; do actual divide
|
|
fld tbyte ptr[esp] ; restore user's ST(1)
|
|
;esp is adjusted by fdivp fn
|
|
fxch
|
|
add esp, SPILL_SIZE
|
|
pop eax
|
|
ret 4
|
|
|
|
_adj_fdivr_m16i ENDP
|
|
|
|
|
|
;;; _adj_fdivr_m32i - FDIVR m32int FIX
|
|
;;
|
|
;; Input : Value of the m32int in the top of STACK
|
|
;;
|
|
;; Output: Result of FDIVR in ST
|
|
|
|
PUBLIC _adj_fdivr_m32i
|
|
_adj_fdivr_m32i PROC NEAR
|
|
push eax ; save eax
|
|
fnstsw ax ; get status word
|
|
and eax, 3800h ; get top of stack
|
|
je spill_fpstack_m32ir ; is FP stack full?
|
|
fild dword ptr[esp + MEM_OPERAND] ; load m32int in ST
|
|
call _fdivrp_sti_st ; do actual divide
|
|
pop eax
|
|
ret 4
|
|
spill_fpstack_m32ir:
|
|
fxch
|
|
sub esp, SPILL_SIZE ; make temp space
|
|
fstp tbyte ptr[esp ] ; save user's ST(1)
|
|
fild dword ptr[esp + SPILL_MEM_OPERAND] ; load m32int
|
|
call _fdivrp_sti_st ; do actual divide
|
|
fld tbyte ptr[esp] ; restore user's ST(1)
|
|
;esp is adjusted by fdivp fn
|
|
fxch
|
|
add esp, SPILL_SIZE
|
|
pop eax
|
|
ret 4
|
|
|
|
_adj_fdivr_m32i ENDP
|
|
|
|
|
|
;;; _safe_fdiv - FDIV fix
|
|
;;
|
|
;; Pentium-safe version of FDIV, aka FDIVP ST(1),ST(0)
|
|
;;
|
|
;; Input : Numerator in ST(1), Denominator in ST(0)
|
|
;;
|
|
;; Output: Result of FDIV in ST(0)
|
|
|
|
|
|
PUBLIC _safe_fdiv
|
|
_safe_fdiv PROC NEAR
|
|
|
|
push eax
|
|
sub esp, STACK_SIZE
|
|
fstp tbyte ptr [esp+DENOM]
|
|
fstp tbyte ptr [esp+NUMER]
|
|
call fdiv_main_routine
|
|
add esp, STACK_SIZE
|
|
pop eax
|
|
ret
|
|
|
|
_safe_fdiv ENDP
|
|
|
|
|
|
;;; _safe_fdivr - FDIVR fix
|
|
;;
|
|
;; Pentium-safe version of FDIVR, aka FDIVRP ST(1),ST(0)
|
|
;;
|
|
;; Input : Numerator in ST(0), Denominator in ST(1)
|
|
;;
|
|
;; Output: Result of FDIVR in ST(0)
|
|
|
|
public _safe_fdivr
|
|
_safe_fdivr PROC NEAR
|
|
|
|
push eax
|
|
sub esp, STACK_SIZE
|
|
fstp tbyte ptr [esp+NUMER]
|
|
fstp tbyte ptr [esp+DENOM]
|
|
call fdiv_main_routine
|
|
add esp, STACK_SIZE
|
|
pop eax
|
|
ret
|
|
|
|
_safe_fdivr ENDP
|
|
|
|
|
|
|
|
;;; _adj_fprem - FPREM FIX
|
|
;;
|
|
;; Based on PRELIMINARY Intel code.
|
|
|
|
|
|
_fprem_common PROC NEAR
|
|
|
|
push eax
|
|
push ebx
|
|
push ecx
|
|
mov eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
|
|
xor eax, FPREM_ONESMASK ; invert bits that have to be one
|
|
test eax, FPREM_ONESMASK ; check bits that have to be one
|
|
jnz remainder_hardware_ok
|
|
shr eax, 11
|
|
and eax, 0fh
|
|
cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d)
|
|
jz remainder_hardware_ok
|
|
|
|
; The denominator has the bit pattern. Weed out the funny cases like NaNs
|
|
; before applying the software version. Our caller guarantees that the
|
|
; denominator is not a denormal. Here we check for:
|
|
; denominator inf, NaN, unnormal
|
|
; numerator inf, NaN, unnormal, denormal
|
|
|
|
mov eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
|
|
and eax, 07fff0000h ; mask the exponent only
|
|
cmp eax, 07fff0000h ; check for INF or NaN
|
|
je remainder_hardware_ok
|
|
mov eax, [FPREM_MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
|
|
and eax, 07fff0000h ; mask the exponent only
|
|
jz remainder_hardware_ok ; jif numerator denormal
|
|
cmp eax, 07fff0000h ; check for INF or NaN
|
|
je remainder_hardware_ok
|
|
mov eax, [esp + FPREM_MAIN_NUMER + 4] ; high mantissa bits - numerator
|
|
add eax, eax ; set carry if explicit bit set
|
|
jnz remainder_hardware_ok ; jmp if numerator is unnormal
|
|
mov eax, [esp + FPREM_MAIN_DENOM + 4] ; high mantissa bits - denominator
|
|
add eax, eax ; set carry if explicit bit set
|
|
jnz remainder_hardware_ok ; jmp if denominator is unnormal
|
|
|
|
rem_patch:
|
|
mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
|
|
and eax, 07fffh ; clear sy
|
|
add eax, 63 ; evaluate ey + 63
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
and ebx, 07fffh ; clear sx
|
|
sub ebx, eax ; evaluate the exponent difference (ex - ey)
|
|
ja rem_large ; if ex > ey + 63, case of large arguments
|
|
rem_patch_loop:
|
|
mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
|
|
and eax, 07fffh ; clear sy
|
|
add eax, 10 ; evaluate ey + 10
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
and ebx, 07fffh ; clear sx
|
|
sub ebx, eax ; evaluate the exponent difference (ex - ey)
|
|
js remainder_hardware_ok ; safe if ey + 10 > ex
|
|
fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
|
|
mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
and ebx, 07fffh ; clear sx
|
|
mov ecx, ebx
|
|
sub ebx, eax
|
|
and ebx, 07h
|
|
or ebx, 04h
|
|
sub ecx, ebx
|
|
mov ebx, eax
|
|
and ebx, 08000h ; keep sy
|
|
or ecx, ebx ; merge the sign of y
|
|
mov dword ptr [FPREM_MAIN_DENOM+8+esp], ecx
|
|
fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the shifted denominator
|
|
mov dword ptr [FPREM_MAIN_DENOM+8+esp], eax ; restore the initial denominator
|
|
fxch
|
|
fprem ; this rem is safe
|
|
fstp tbyte ptr [FPREM_MAIN_NUMER+esp] ; update the numerator
|
|
fstp st(0) ; pop the stack
|
|
jmp rem_patch_loop
|
|
rem_large:
|
|
test edx, 02h ; is denominator already saved
|
|
jnz already_saved
|
|
fld tbyte ptr[esp + FPREM_MAIN_DENOM]
|
|
fstp tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE] ; save denominator
|
|
already_saved:
|
|
; Save user's precision control and institute 80. The fp ops in
|
|
; rem_large_loop must not round to user's precision (if it is less
|
|
; than 80) because the hardware would not have done so. We are
|
|
; aping the hardware here, which is all extended.
|
|
|
|
fnstcw [esp+FPREM_MAIN_PREV_CW] ; save caller's control word
|
|
mov eax, dword ptr[esp + FPREM_MAIN_PREV_CW]
|
|
or eax, 033fh ; mask exceptions, pc=80
|
|
mov [esp + FPREM_MAIN_PATCH_CW], eax
|
|
fldcw [esp + FPREM_MAIN_PATCH_CW]
|
|
|
|
mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
|
|
and eax, 07fffh ; clear sy
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
and ebx, 07fffh ; clear sx
|
|
sub ebx, eax ; evaluate the exponent difference
|
|
and ebx, 03fh
|
|
or ebx, 020h
|
|
add ebx, 1
|
|
mov ecx, ebx
|
|
mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
and ebx, 07fffh ; clear sx
|
|
and eax, 08000h ; keep sy
|
|
or ebx, eax ; merge the sign of y
|
|
mov dword ptr[FPREM_MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator)
|
|
fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the scaled denominator
|
|
fabs
|
|
fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
|
|
fabs
|
|
rem_large_loop:
|
|
fcom
|
|
fnstsw ax
|
|
and eax, 00100h
|
|
jnz rem_no_sub
|
|
fsub st, st(1)
|
|
rem_no_sub:
|
|
fxch
|
|
fmul qword ptr half
|
|
fxch
|
|
sub ecx, 1 ; decrement the loop counter
|
|
jnz rem_large_loop
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
fstp tbyte ptr[esp + FPREM_MAIN_NUMER] ; save result
|
|
fstp st ; toss modified denom
|
|
fld tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]
|
|
fld tbyte ptr[big_number] ; force C2 to be set
|
|
fprem
|
|
fstp st
|
|
fld tbyte ptr[esp + FPREM_MAIN_NUMER] ; restore saved result
|
|
|
|
fldcw [esp + FPREM_MAIN_PREV_CW] ; restore caller's control word
|
|
and ebx, 08000h ; keep sx
|
|
jz rem_done
|
|
fchs
|
|
jmp rem_done
|
|
remainder_hardware_ok:
|
|
fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the denominator
|
|
fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
|
|
fprem ; and finally do a remainder
|
|
; prem_main_routine end
|
|
rem_done:
|
|
test edx, 03h
|
|
jz rem_exit
|
|
fnstsw [esp + FPREM_MAIN_FPREM_SW] ; save Q0 Q1 and Q2
|
|
test edx, 01h
|
|
jz do_not_de_scale
|
|
; De-scale the result. Go to pc=80 to prevent from fmul
|
|
; from user precision (fprem does not round the result).
|
|
fnstcw [esp + FPREM_MAIN_PREV_CW] ; save callers control word
|
|
mov eax, [esp + FPREM_MAIN_PREV_CW]
|
|
or eax, 0300h ; pc = 80
|
|
mov [esp + FPREM_MAIN_PATCH_CW], eax
|
|
fldcw [esp + FPREM_MAIN_PATCH_CW]
|
|
fmul qword ptr one_shr_64
|
|
fldcw [esp + FPREM_MAIN_PREV_CW] ; restore callers CW
|
|
do_not_de_scale:
|
|
mov eax, [esp + FPREM_MAIN_FPREM_SW]
|
|
fxch
|
|
fstp st
|
|
fld tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]
|
|
fxch
|
|
and eax, 04300h ; restore saved Q0, Q1, Q2
|
|
sub esp, ENV_SIZE
|
|
fnstenv [esp]
|
|
and [esp].STATUS_WORD, 0bcffh
|
|
or [esp].STATUS_WORD, eax
|
|
fldenv [esp]
|
|
add esp, ENV_SIZE
|
|
rem_exit:
|
|
pop ecx
|
|
pop ebx
|
|
pop eax
|
|
ret
|
|
_fprem_common ENDP
|
|
|
|
|
|
|
|
|
|
PUBLIC _adj_fprem
|
|
_adj_fprem PROC NEAR
|
|
push edx
|
|
sub esp, FPREM_STACK_SIZE
|
|
fstp tbyte ptr [FPREM_NUMER+esp]
|
|
fstp tbyte ptr [FPREM_DENOM+esp]
|
|
xor edx, edx
|
|
; prem_main_routine begin
|
|
mov eax,[FPREM_DENOM+6+esp] ; exponent and high 16 bits of mantissa
|
|
test eax,07fff0000h ; check for denormal
|
|
jz fprem_denormal
|
|
call _fprem_common
|
|
add esp, FPREM_STACK_SIZE
|
|
pop edx
|
|
ret
|
|
|
|
fprem_denormal:
|
|
fld tbyte ptr [FPREM_DENOM+esp] ; load the denominator
|
|
fld tbyte ptr [FPREM_NUMER+esp] ; load the numerator
|
|
mov eax, [FPREM_DENOM+esp] ; test for whole mantissa == 0
|
|
or eax, [FPREM_DENOM+4+esp] ; test for whole mantissa == 0
|
|
jz remainder_hardware_ok_l ; denominator is zero
|
|
fxch
|
|
fstp tbyte ptr[esp + FPREM_DENOM_SAVE] ; save org denominator
|
|
fld tbyte ptr[esp + FPREM_DENOM]
|
|
fxch
|
|
or edx, 02h
|
|
;
|
|
; For this we need pc=80. Also, mask exceptions so we don't take any
|
|
; denormal operand exceptions. It is guaranteed that the descaling
|
|
; later on will take underflow, which is what the hardware would have done
|
|
; on a normal fprem.
|
|
;
|
|
fnstcw [FPREM_PREV_CW+esp] ; save caller's control word
|
|
mov eax, [FPREM_PREV_CW+esp]
|
|
or eax, 0033fh ; mask exceptions, pc=80
|
|
mov [FPREM_PATCH_CW+esp], eax
|
|
fldcw [FPREM_PATCH_CW+esp] ; mask exceptions & pc=80
|
|
|
|
; The denominator is a denormal. For most numerators, scale both numerator
|
|
; and denominator to get rid of denormals. Then execute the common code
|
|
; with the flag set to indicate that the result must be de-scaled.
|
|
; For large numerators this won't work because the scaling would cause
|
|
; overflow. In this case we know the numerator is large, the denominator
|
|
; is small (denormal), so the exponent difference is also large. This means
|
|
; the rem_large code will be used and this code depends on the difference
|
|
; in exponents modulo 64. Adding 64 to the denominators exponent
|
|
; doesn't change the modulo 64 difference. So we can scale the denominator
|
|
; by 64, making it not denormal, and this won't effect the result.
|
|
;
|
|
; To start with, figure out if numerator is large
|
|
|
|
mov eax, [esp + FPREM_NUMER + 8] ; load numerator exponent
|
|
and eax, 7fffh ; isolate numerator exponent
|
|
cmp eax, 7fbeh ; compare Nexp to Maxexp-64
|
|
ja big_numer_rem_de ; jif big numerator
|
|
|
|
; So the numerator is not large scale both numerator and denominator
|
|
|
|
or edx, 1 ; edx = 1, if denormal extended divisor
|
|
fmul qword ptr one_shl_64 ; make numerator not denormal
|
|
fstp tbyte ptr[esp + FPREM_NUMER]
|
|
fmul qword ptr one_shl_64 ; make denominator not denormal
|
|
fstp tbyte ptr[esp + FPREM_DENOM]
|
|
jmp scaling_done
|
|
|
|
; The numerator is large. Scale only the denominator, which will not
|
|
; change the result which we know will be partial. Set the scale flag
|
|
; to false.
|
|
big_numer_rem_de:
|
|
; We must do this with pc=80 to avoid rounding to single/double.
|
|
; In this case we do not mask exceptions so that we will take
|
|
; denormal operand, as would the hardware.
|
|
fnstcw [FPREM_PREV_CW+esp] ; save caller's control word
|
|
mov eax, [FPREM_PREV_CW+esp]
|
|
or eax, 00300h ; pc=80
|
|
mov [FPREM_PATCH_CW+esp], eax
|
|
fldcw [FPREM_PATCH_CW+esp] ; pc=80
|
|
|
|
fstp st ; Toss numerator
|
|
fmul qword ptr one_shl_64 ; make denominator not denormal
|
|
fstp tbyte ptr[esp + FPREM_DENOM]
|
|
|
|
; Restore the control word which was fiddled to scale at 80-bit precision.
|
|
; Then call the common code.
|
|
scaling_done:
|
|
fldcw [esp + FPREM_PREV_CW] ; restore callers control word
|
|
call _fprem_common
|
|
add esp, FPREM_STACK_SIZE
|
|
pop edx
|
|
ret
|
|
|
|
remainder_hardware_ok_l:
|
|
fprem ; and finally do a remainder
|
|
|
|
add esp, FPREM_STACK_SIZE
|
|
pop edx
|
|
ret
|
|
|
|
_adj_fprem ENDP
|
|
|
|
|
|
|
|
;
|
|
; FPREM1 code begins here
|
|
;
|
|
|
|
|
|
_fprem1_common PROC NEAR
|
|
|
|
push eax
|
|
push ebx
|
|
push ecx
|
|
mov eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
|
|
xor eax, FPREM_ONESMASK ; invert bits that have to be one
|
|
test eax, FPREM_ONESMASK ; check bits that have to be one
|
|
jnz remainder1_hardware_ok
|
|
shr eax, 11
|
|
and eax, 0fh
|
|
cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d)
|
|
jz remainder1_hardware_ok
|
|
|
|
; The denominator has the bit pattern. Weed out the funny cases like NaNs
|
|
; before applying the software version. Our caller guarantees that the
|
|
; denominator is not a denormal. Here we check for:
|
|
; denominator inf, NaN, unnormal
|
|
; numerator inf, NaN, unnormal, denormal
|
|
|
|
mov eax, [FPREM_MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
|
|
and eax, 07fff0000h ; mask the exponent only
|
|
cmp eax, 07fff0000h ; check for INF or NaN
|
|
je remainder1_hardware_ok
|
|
mov eax, [FPREM_MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
|
|
and eax, 07fff0000h ; mask the exponent only
|
|
jz remainder1_hardware_ok ; jif numerator denormal
|
|
cmp eax, 07fff0000h ; check for INF or NaN
|
|
je remainder1_hardware_ok
|
|
mov eax, [esp + FPREM_MAIN_NUMER + 4] ; high mantissa bits - numerator
|
|
add eax, eax ; set carry if explicit bit set
|
|
jnz remainder1_hardware_ok ; jmp if numerator is unnormal
|
|
mov eax, [esp + FPREM_MAIN_DENOM + 4] ; high mantissa bits - denominator
|
|
add eax, eax ; set carry if explicit bit set
|
|
jnz remainder1_hardware_ok ; jmp if denominator is unnormal
|
|
|
|
rem1_patch:
|
|
mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
|
|
and eax, 07fffh ; clear sy
|
|
add eax, 63 ; evaluate ey + 63
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
and ebx, 07fffh ; clear sx
|
|
sub ebx, eax ; evaluate the exponent difference (ex - ey)
|
|
ja rem1_large ; if ex > ey + 63, case of large arguments
|
|
rem1_patch_loop:
|
|
mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
|
|
and eax, 07fffh ; clear sy
|
|
add eax, 10 ; evaluate ey + 10
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
and ebx, 07fffh ; clear sx
|
|
sub ebx, eax ; evaluate the exponent difference (ex - ey)
|
|
js remainder1_hardware_ok ; safe if ey + 10 > ex
|
|
fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
|
|
mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
and ebx, 07fffh ; clear sx
|
|
mov ecx, ebx
|
|
sub ebx, eax
|
|
and ebx, 07h
|
|
or ebx, 04h
|
|
sub ecx, ebx
|
|
mov ebx, eax
|
|
and ebx, 08000h ; keep sy
|
|
or ecx, ebx ; merge the sign of y
|
|
mov dword ptr [FPREM_MAIN_DENOM+8+esp], ecx
|
|
fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the shifted denominator
|
|
mov dword ptr [FPREM_MAIN_DENOM+8+esp], eax ; restore the initial denominator
|
|
fxch
|
|
fprem ; this rem is safe
|
|
fstp tbyte ptr [FPREM_MAIN_NUMER+esp] ; update the numerator
|
|
fstp st(0) ; pop the stack
|
|
jmp rem1_patch_loop
|
|
rem1_large:
|
|
test ebx, 02h ; is denominator already saved
|
|
jnz already_saved1
|
|
fld tbyte ptr[esp + FPREM_MAIN_DENOM]
|
|
fstp tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE] ; save denominator
|
|
already_saved1:
|
|
; Save user's precision control and institute 80. The fp ops in
|
|
; rem1_large_loop must not round to user's precision (if it is less
|
|
; than 80) because the hardware would not have done so. We are
|
|
; aping the hardware here, which is all extended.
|
|
|
|
fnstcw [esp+FPREM_MAIN_PREV_CW] ; save caller's control word
|
|
mov eax, dword ptr[esp + FPREM_MAIN_PREV_CW]
|
|
or eax, 033fh ; mask exceptions, pc=80
|
|
mov [esp + FPREM_MAIN_PATCH_CW], eax
|
|
fldcw [esp + FPREM_MAIN_PATCH_CW]
|
|
|
|
mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
|
|
and eax, 07fffh ; clear sy
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
and ebx, 07fffh ; clear sx
|
|
sub ebx, eax ; evaluate the exponent difference
|
|
and ebx, 03fh
|
|
or ebx, 020h
|
|
add ebx, 1
|
|
mov ecx, ebx
|
|
mov eax, [FPREM_MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
and ebx, 07fffh ; clear sx
|
|
and eax, 08000h ; keep sy
|
|
or ebx, eax ; merge the sign of y
|
|
mov dword ptr[FPREM_MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator)
|
|
fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the scaled denominator
|
|
fabs
|
|
fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
|
|
fabs
|
|
rem1_large_loop:
|
|
fcom
|
|
fnstsw ax
|
|
and eax, 00100h
|
|
jnz rem1_no_sub
|
|
fsub st, st(1)
|
|
rem1_no_sub:
|
|
fxch
|
|
fmul qword ptr half
|
|
fxch
|
|
sub ecx, 1 ; decrement the loop counter
|
|
jnz rem1_large_loop
|
|
mov ebx, [FPREM_MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
|
|
fstp tbyte ptr[esp + FPREM_MAIN_NUMER] ; save result
|
|
fstp st ; toss modified denom
|
|
fld tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]
|
|
fld tbyte ptr[big_number] ; force C2 to be set
|
|
fprem1
|
|
fstp st
|
|
fld tbyte ptr[esp + FPREM_MAIN_NUMER] ; restore saved result
|
|
|
|
fldcw [esp + FPREM_MAIN_PREV_CW] ; restore caller's control word
|
|
and ebx, 08000h ; keep sx
|
|
jz rem1_done
|
|
fchs
|
|
jmp rem1_done
|
|
remainder1_hardware_ok:
|
|
fld tbyte ptr [FPREM_MAIN_DENOM+esp] ; load the denominator
|
|
fld tbyte ptr [FPREM_MAIN_NUMER+esp] ; load the numerator
|
|
fprem1 ; and finally do a remainder
|
|
; prem1_main_routine end
|
|
rem1_done:
|
|
test edx, 03h
|
|
jz rem1_exit
|
|
fnstsw [esp + FPREM_MAIN_FPREM_SW] ; save Q0 Q1 and Q2
|
|
test edx, 01h
|
|
jz do_not_de_scale1
|
|
; De-scale the result. Go to pc=80 to prevent from fmul
|
|
; from user precision (fprem does not round the result).
|
|
fnstcw [esp + FPREM_MAIN_PREV_CW] ; save callers control word
|
|
mov eax, [esp + FPREM_MAIN_PREV_CW]
|
|
or eax, 0300h ; pc = 80
|
|
mov [esp + FPREM_MAIN_PATCH_CW], eax
|
|
fldcw [esp + FPREM_MAIN_PATCH_CW]
|
|
fmul qword ptr one_shr_64
|
|
fldcw [esp + FPREM_MAIN_PREV_CW] ; restore callers CW
|
|
do_not_de_scale1:
|
|
mov eax, [esp + FPREM_MAIN_FPREM_SW]
|
|
fxch
|
|
fstp st
|
|
fld tbyte ptr[esp + FPREM_MAIN_DENOM_SAVE]
|
|
fxch
|
|
and eax, 04300h ; restore saved Q0, Q1, Q2
|
|
sub esp, ENV_SIZE
|
|
fnstenv [esp]
|
|
and [esp].STATUS_WORD, 0bcffh
|
|
or [esp].STATUS_WORD, eax
|
|
fldenv [esp]
|
|
add esp, ENV_SIZE
|
|
rem1_exit:
|
|
pop ecx
|
|
pop ebx
|
|
pop eax
|
|
ret
|
|
_fprem1_common ENDP
|
|
|
|
PUBLIC _adj_fprem1
|
|
_adj_fprem1 PROC NEAR
|
|
|
|
push edx
|
|
sub esp, FPREM_STACK_SIZE
|
|
fstp tbyte ptr [FPREM_NUMER+esp]
|
|
fstp tbyte ptr [FPREM_DENOM+esp]
|
|
mov edx, 0
|
|
; prem1_main_routine begin
|
|
mov eax,[FPREM_DENOM+6+esp] ; exponent and high 16 bits of mantissa
|
|
test eax,07fff0000h ; check for denormal
|
|
jz denormal1
|
|
call _fprem1_common
|
|
add esp, FPREM_STACK_SIZE
|
|
pop edx
|
|
ret
|
|
|
|
denormal1:
|
|
fld tbyte ptr [FPREM_DENOM+esp] ; load the denominator
|
|
fld tbyte ptr [FPREM_NUMER+esp] ; load the numerator
|
|
mov eax, [FPREM_DENOM+esp] ; test for whole mantissa == 0
|
|
or eax, [FPREM_DENOM+4+esp] ; test for whole mantissa == 0
|
|
jz remainder1_hardware_ok_l ; denominator is zero
|
|
fxch
|
|
fstp tbyte ptr[esp + FPREM_DENOM_SAVE] ; save org denominator
|
|
fld tbyte ptr[esp + FPREM_DENOM]
|
|
fxch
|
|
or edx, 02h
|
|
;
|
|
; For this we need pc=80. Also, mask exceptions so we don't take any
|
|
; denormal operand exceptions. It is guaranteed that the descaling
|
|
; later on will take underflow, which is what the hardware would have done
|
|
; on a normal fprem.
|
|
;
|
|
fnstcw [FPREM_PREV_CW+esp] ; save caller's control word
|
|
mov eax, [FPREM_PREV_CW+esp]
|
|
or eax, 0033fh ; mask exceptions, pc=80
|
|
mov [FPREM_PATCH_CW+esp], eax
|
|
fldcw [FPREM_PATCH_CW+esp] ; mask exceptions & pc=80
|
|
|
|
; The denominator is a denormal. For most numerators, scale both numerator
|
|
; and denominator to get rid of denormals. Then execute the common code
|
|
; with the flag set to indicate that the result must be de-scaled.
|
|
; For large numerators this won't work because the scaling would cause
|
|
; overflow. In this case we know the numerator is large, the denominator
|
|
; is small (denormal), so the exponent difference is also large. This means
|
|
; the rem1_large code will be used and this code depends on the difference
|
|
; in exponents modulo 64. Adding 64 to the denominators exponent
|
|
; doesn't change the modulo 64 difference. So we can scale the denominator
|
|
; by 64, making it not denormal, and this won't effect the result.
|
|
;
|
|
; To start with, figure out if numerator is large
|
|
|
|
mov eax, [esp + FPREM_NUMER + 8] ; load numerator exponent
|
|
and eax, 7fffh ; isolate numerator exponent
|
|
cmp eax, 7fbeh ; compare Nexp to Maxexp-64
|
|
ja big_numer_rem1_de ; jif big numerator
|
|
|
|
; So the numerator is not large scale both numerator and denominator
|
|
|
|
or edx, 1 ; edx = 1, if denormal extended divisor
|
|
fmul qword ptr one_shl_64 ; make numerator not denormal
|
|
fstp tbyte ptr[esp + FPREM_NUMER]
|
|
fmul qword ptr one_shl_64 ; make denominator not denormal
|
|
fstp tbyte ptr[esp + FPREM_DENOM]
|
|
jmp scaling_done1
|
|
|
|
; The numerator is large. Scale only the denominator, which will not
|
|
; change the result which we know will be partial. Set the scale flag
|
|
; to false.
|
|
big_numer_rem1_de:
|
|
; We must do this with pc=80 to avoid rounding to single/double.
|
|
; In this case we do not mask exceptions so that we will take
|
|
; denormal operand, as would the hardware.
|
|
fnstcw [FPREM_PREV_CW+esp] ; save caller's control word
|
|
mov eax, [FPREM_PREV_CW+esp]
|
|
or eax, 00300h ; pc=80
|
|
mov [FPREM_PATCH_CW+esp], eax
|
|
fldcw [FPREM_PATCH_CW+esp] ; pc=80
|
|
fstp st ; Toss numerator
|
|
fmul qword ptr one_shl_64 ; make denominator not denormal
|
|
fstp tbyte ptr[esp + FPREM_DENOM]
|
|
|
|
; Restore the control word which was fiddled to scale at 80-bit precision.
|
|
; Then call the common code.
|
|
scaling_done1:
|
|
fldcw [esp + FPREM_PREV_CW] ; restore callers control word
|
|
call _fprem1_common
|
|
add esp, FPREM_STACK_SIZE
|
|
pop edx
|
|
ret
|
|
|
|
remainder1_hardware_ok_l:
|
|
fprem ; and finally do a remainder
|
|
add esp, FPREM_STACK_SIZE
|
|
pop edx
|
|
ret
|
|
_adj_fprem1 ENDP
|
|
|
|
PUBLIC _safe_fprem
|
|
_safe_fprem PROC NEAR
|
|
|
|
call _adj_fprem
|
|
ret
|
|
|
|
_safe_fprem ENDP
|
|
|
|
PUBLIC _safe_fprem1
|
|
_safe_fprem1 PROC NEAR
|
|
|
|
call _adj_fprem1
|
|
ret
|
|
|
|
_safe_fprem1 ENDP
|
|
|
|
|
|
|
|
;;; _adj_fpatan - FPATAN FIX
|
|
;;
|
|
;; Dummy entry point
|
|
|
|
|
|
PUBLIC _adj_fpatan
|
|
_adj_fpatan PROC NEAR
|
|
|
|
fpatan
|
|
ret
|
|
|
|
_adj_fpatan ENDP
|
|
|
|
|
|
;;; _adj_fptan - FPTAN FIX
|
|
;;
|
|
;; Dummy entry point
|
|
|
|
|
|
PUBLIC _adj_fptan
|
|
_adj_fptan PROC NEAR
|
|
|
|
fptan
|
|
ret
|
|
|
|
_adj_fptan ENDP
|
|
|
|
|
|
end
|
|
|