Windows NT 4.0 source code leak
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

515 lines
12 KiB

// TITLE("Alpha AXP ArcTangent2")
//++
//
// Copyright (c) 1993, 1994 Digital Equipment Corporation
//
// Module Name:
//
// atan2.s
//
// Abstract:
//
// This module implements a high-performance Alpha AXP specific routine
// for IEEE double format arctangent2.
//
// Author:
//
// Andy Garside
//
// Environment:
//
// User mode.
//
// Revision History:
//
// Thomas Van Baak (tvb) 15-Feb-1994
//
// Adapted for NT.
//
//--
#include "ksalpha.h"
//
// Define DPML exception record for NT.
//
.struct 0
ErErr: .space 4 // error code
ErCxt: .space 4 // context
ErPlat: .space 4 // platform
ErEnv: .space 4 // environment
ErRet: .space 4 // return value pointer
ErName: .space 4 // function name
ErType: .space 8 // flags and fill
ErVal: .space 8 // return value
ErArg0: .space 8 // arg 0
ErArg1: .space 8 // arg 1
ErArg2: .space 8 // arg 2
ErArg3: .space 8 // arg 3
DpmlExceptionLength:
//
// Define stack frame.
//
.struct 0
SaveS0: .space 8 //
SaveS1: .space 8 //
SaveRa: .space 8 //
SaveF2: .space 8 //
SaveF3: .space 8 //
Temp: .space 8 // save argument
ExRec: .space DpmlExceptionLength // exception record
.space 8 // for 16-byte stack alignment
FrameLength:
//
// Define lower and upper 32-bit parts of 64-bit double.
//
#define LowPart 0x0
#define HighPart 0x4
//
// Define offsets into atan_t_table.
//
#define ATAN_INF 0xf18
#define TWICE_ATAN_INF 0xf28
SBTTL("ArcTangent2")
//++
//
// double
// atan2 (
// IN double y
// IN double x
// )
//
// Routine Description:
//
// This function returns the arctangent of the given double arguments.
// It returns atan(y/x) in range [-pi,pi].
//
// Arguments:
//
// y (f16) - Supplies the argument value.
//
// x (f17) - Supplies the argument value.
//
// Return Value:
//
// The double arctangent2 result is returned as the function value in f0.
//
//--
NESTED_ENTRY(atan2, FrameLength, ra)
lda sp, -FrameLength(sp) // allocate stack frame
stq s0, SaveS0(sp)
stq s1, SaveS1(sp)
stq ra, SaveRa(sp)
stt f2, SaveF2(sp)
stt f3, SaveF3(sp)
PROLOGUE_END
cpys f16, f16, f2 // y
ldah s0, 0x7ff0(zero)
cpys f17, f17, f3 // x
stt f2, Temp(sp)
ldl v0, Temp + HighPart(sp)
and v0, s0, v0
mov v0, t0
xor t0, s0, t1
beq t1, spec_y
beq t0, spec_y
stt f3, Temp(sp)
ldl t2, Temp + HighPart(sp)
and t2, s0, t2
xor t2, s0, t1
beq t1, class_y
bne t2, calc_atan2
br zero, class_y
//
// Abnormal inputs
//
spec_y: stt f3, Temp(sp)
ldl t2, Temp + HighPart(sp)
and t2, s0, t2
//
// Classify y according to type
//
class_y:
stt f2, Temp(sp)
ldl t3, Temp + HighPart(sp)
zapnot t3, 0xf, t1
and t3, s0, t4
srl t1, 31, t1
and t1, 1, t1
beq t4, LL00d0
cmpult t4, s0, t4
beq t4, LL0098
addl t1, 4, t5
br zero, class_x
LL0098: ldah t6, 0x10(zero)
ldl t4, Temp(sp)
lda t6, -1(t6)
and t3, t6, t6
stl t6, Temp + HighPart(sp)
bis t6, t4, t4
srl t6, 19, t6
beq t4, LL00c8
and t6, 1, t6
mov t6, t5
br zero, class_x
LL00c8: addl t1, 2, t5
br zero, class_x
LL00d0: ldl t7, Temp(sp)
ldah t4, 0x10(zero)
lda t4, -1(t4)
and t3, t4, t3
bis t3, t7, t7
stl t3, Temp + HighPart(sp)
mov 6, t6
cmoveq t7, 8, t6
addl t1, t6, t5
//
// Classify x according to type
//
class_x:
stt f3, Temp(sp)
ldl t3, Temp + HighPart(sp)
zapnot t3, 0xf, t4
and t3, s0, t1
srl t4, 31, t4
and t4, 1, t4
beq t1, LL0158
cmpult t1, s0, t1
beq t1, LL0120
addl t4, 4, t6
br zero, switch
LL0120: ldah t1, 0x10(zero)
ldl t7, Temp(sp)
lda t1, -1(t1)
and t3, t1, t1
bis t1, t7, t7
stl t1, Temp + HighPart(sp)
beq t7, LL0150
srl t1, 19, t1
and t1, 1, t1
mov t1, t6
br zero, switch
LL0150: addl t4, 2, t6
br zero, switch
LL0158: ldl a0, Temp(sp)
ldah t7, 0x10(zero)
lda t7, -1(t7)
and t3, t7, t3
bis t3, a0, a0
stl t3, Temp + HighPart(sp)
mov 6, t1
cmoveq a0, 8, t1
addl t4, t1, t6
//
// switch on class(y) and class(x)
//
switch: sra t5, 1, a0
sra t6, 1, t3
s4addl a0, a0, a0
addl a0, t3, t3
cmpule t3, 24, t12
beq t12, cpys_y_class
lda t12, Switch_table
s4addl t3, t12, t12
ldl t12, 0(t12)
jmp zero, (t12)
ret_y: cpys f2, f2, f0
br zero, done
ret_x: cpys f3, f3, f0
br zero, done
infs:
lda t1, atan2Name
stl t1, ExRec + ErName(sp)
ldah t3, 0x800(zero)
stt f2, ExRec + ErArg0(sp)
stt f3, ExRec + ErArg1(sp)
lda t3, 9(t3)
stl t3, ExRec + ErErr(sp)
lda v0, ExRec(sp)
bsr ra, __dpml_exception
ldt f0, 0(v0)
br zero, done
zeros:
lda t6, atan2Name
stl t6, ExRec + ErName(sp)
ldah a0, 0x800(zero)
stt f2, ExRec + ErArg0(sp)
stt f3, ExRec + ErArg1(sp)
lda a0, 8(a0)
stl a0, ExRec + ErErr(sp)
lda v0, ExRec(sp)
bsr ra, __dpml_exception
ldt f0, 0(v0)
br zero, done
ret_inf:
ldt f0, __atan_t_table + ATAN_INF
cpys_y_class:
blbc t5, done
cpysn f0, f0, f0
br zero, done
ret_tw_inf:
blbc t6, x_pos
ldt f16, __atan_t_table + TWICE_ATAN_INF
cpys f16, f16, f0
blbc t5, done
cpysn f0, f0, f0
br zero, done
x_pos: cpys f31, f31, f16
cpys f16, f16, f0
blbc t5, done
cpysn f0, f0, f0
br zero, done
de_o_norm:
ldah t4, 0x4350(zero) // underflow check
cmpult t2, t4, t4
bne t4, scale_up_denorm
br zero, underflow
n_o_de: ldah t1, 0x360(zero) // check for const range
cmplt t0, t1, t1
beq t1, const_range
// Scale x and y up by 2^F_PRECISION and adjust exp_x and exp_y accordingly.
// With x and y scaled into the normal range, we can rejoin the main logic
// flow for computing atan(y/x)
scale_up_denorm:
beq t0, LL02c0
stt f2, Temp(sp)
ldl ra, Temp + HighPart(sp)
ldah v0, 0x4330(zero)
ldah t3, -0x7ff0(zero)
addl t0, v0, v0
lda t3, -1(t3)
and ra, t3, t3
mov v0, t0
bis t3, t0, t3
stl t3, Temp + HighPart(sp)
ldt f2, Temp(sp)
br zero, LL02e4
LL02c0: ldt f17, Two53
cpys f2, f17, f16
cpyse f16, f2, f0
subt f0, f16, f2
stt f2, Temp(sp)
ldl t4, Temp + HighPart(sp)
and t4, s0, t4
mov t4, t0
LL02e4: beq t2, LL0318
stt f3, Temp(sp)
ldl a0, Temp + HighPart(sp)
ldah v0, -0x7ff0(zero)
ldah ra, 0x4330(zero)
lda v0, -1(v0)
addl t2, ra, t2
and a0, v0, v0
bis v0, t2, v0
stl v0, Temp + HighPart(sp)
ldt f3, Temp(sp)
br zero, calc_atan2
LL0318: ldt f17, Two53
cpys f3, f17, f0
cpyse f0, f3, f16
subt f16, f0, f3
stt f3, Temp(sp)
ldl t1, Temp + HighPart(sp)
and t1, s0, t1
mov t1, t2
//
// OK. Calculate atan2.
//
calc_atan2:
subl t0, t2, s1
ldah t4, 0x360(zero) // check for const range
ldah t5, -0x1c0(zero) // check for identity range
cmplt s1, t4, t4
cmple s1, t5, t5
beq t4, const_range
bne t5, ident_range
divt f2, f3, f16
bsr ra, atan
cpys f0, f0, f1
cmptlt f31, f3, f3
cpys f1, f1, f0
fbeq f3, post_proc
br zero, done
ident_range:
ldah v0, -0x360(zero) // check for possible underflow
cmpult s1, v0, v0
fbge f3, poss_under
beq v0, poss_under
ldt f10, __atan_t_table + TWICE_ATAN_INF
br zero, fix_sign
poss_under:
ldah t1, -0x3fe0(zero) // check for certain underflow or denorm
cmpule s1, t1, t1
bne t1, under_or_de
divt f2, f3, f1
cmptlt f31, f3, f3
fbeq f3, post_proc
cpys f1, f1, f0
br zero, done
post_proc:
ldt f11, __atan_t_table + TWICE_ATAN_INF
cpys f2, f11, f12
addt f1, f12, f0
br zero, done
under_or_de:
ldah t3, -0x4350(zero) // check for underflow
cmpult s1, t3, t3
bne t3, underflow
ldah t6, 0x350(zero) // fixup denorm check
cpys f2, f2, f13
stt f13, Temp(sp)
ldl t5, Temp + HighPart(sp)
addl t5, t6, t5
stl t5, Temp + HighPart(sp)
ldt f14, Temp(sp)
divt f14, f3, f14
stt f14, Temp(sp)
ldl a2, Temp + HighPart(sp)
and a2, s0, s0
subl s0, t6, t6
ble t6, underflow
stt f14, Temp(sp)
ldl a4, Temp + HighPart(sp)
ldah a5, -0x7ff0(zero)
lda a5, -1(a5)
and a4, a5, a4
bis a4, t6, t6
stl t6, Temp + HighPart(sp)
ldt f0, Temp(sp)
br zero, done
//
// quotient underflows
//
underflow:
lda t10, atan2Name
ldah v0, 0x800(zero)
stl t10, ExRec + ErName(sp)
stt f2, ExRec + ErArg0(sp)
lda v0, 0xa(v0)
stt f3, ExRec + ErArg1(sp)
stl v0, ExRec + ErErr(sp)
lda v0, ExRec(sp)
bsr ra, __dpml_exception
ldt f0, 0(v0)
br zero, done
const_range:
ldt f10, __atan_t_table + ATAN_INF
fix_sign:
cpys f2, f10, f0
//
// Restore registers and return with result in f0.
//
done:
ldq s0, SaveS0(sp)
ldq s1, SaveS1(sp)
ldq ra, SaveRa(sp)
ldt f2, SaveF2(sp)
ldt f3, SaveF3(sp)
lda sp, FrameLength(sp) // deallocate stack frame
ret zero, (ra) // return
.end atan2
.rdata
.align 3
//
// Define floating point constants.
//
One: .double 1.0
Two53: .quad 0x4340000000000000 // 2^53 (9007199254740992)
//
// switch on class of y and x
//
Switch_table:
.long ret_y
.long ret_y
.long ret_y
.long ret_y
.long ret_y
.long ret_x
.long infs
.long ret_inf
.long ret_inf
.long ret_inf
.long ret_x
.long ret_tw_inf
.long cpys_y_class
.long n_o_de
.long ret_inf
.long ret_x
.long ret_tw_inf
.long de_o_norm
.long scale_up_denorm
.long ret_inf
.long ret_x
.long ret_tw_inf
.long ret_tw_inf
.long ret_tw_inf
.long zeros
//
// Function name for dpml_exception.
//
atan2Name:
.ascii "atan2\0"