mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
586 lines
15 KiB
586 lines
15 KiB
// TITLE("Alpha AXP Exponential")
|
|
//++
|
|
//
|
|
// Copyright (c) 1993, 1994 Digital Equipment Corporation
|
|
//
|
|
// Module Name:
|
|
//
|
|
// exp.s
|
|
//
|
|
// Abstract:
|
|
//
|
|
// This module implements a high-performance Alpha AXP specific routine
|
|
// for IEEE double format exponential.
|
|
//
|
|
// Author:
|
|
//
|
|
// Bob Hanek 30-Jun-1993
|
|
//
|
|
// Environment:
|
|
//
|
|
// User mode.
|
|
//
|
|
// Revision History:
|
|
//
|
|
// Thomas Van Baak (tvb) 6-Feb-1994
|
|
//
|
|
// Adapted for NT.
|
|
//
|
|
//--
|
|
|
|
#include "ksalpha.h"
|
|
|
|
//
|
|
// Define DPML exception record for NT.
|
|
//
|
|
|
|
.struct 0
|
|
ErErr: .space 4 // error code
|
|
ErCxt: .space 4 // context
|
|
ErPlat: .space 4 // platform
|
|
ErEnv: .space 4 // environment
|
|
ErRet: .space 4 // return value pointer
|
|
ErName: .space 4 // function name
|
|
ErType: .space 8 // flags and fill
|
|
ErVal: .space 8 // return value
|
|
ErArg0: .space 8 // arg 0
|
|
ErArg1: .space 8 // arg 1
|
|
ErArg2: .space 8 // arg 2
|
|
ErArg3: .space 8 // arg 3
|
|
DpmlExceptionLength:
|
|
|
|
//
|
|
// Define stack frame.
|
|
//
|
|
|
|
.struct 0
|
|
Temp: .space 8 // save argument
|
|
ExRec: .space DpmlExceptionLength // exception record
|
|
.space 0 // for 16-byte stack alignment
|
|
FrameLength:
|
|
|
|
//
|
|
// Define lower and upper 32-bit parts of 64-bit double.
|
|
//
|
|
|
|
#define LowPart 0x0
|
|
#define HighPart 0x4
|
|
|
|
SBTTL("Exponential")
|
|
|
|
//++
|
|
//
|
|
// double
|
|
// exp (
|
|
// IN double x
|
|
// )
|
|
//
|
|
// Routine Description:
|
|
//
|
|
// This function returns the exponential of the given double argument.
|
|
//
|
|
// Arguments:
|
|
//
|
|
// x (f16) - Supplies the argument value.
|
|
//
|
|
// Return Value:
|
|
//
|
|
// The double exponential result is returned as the function value in f0.
|
|
//
|
|
//--
|
|
|
|
NESTED_ENTRY(exp, FrameLength, ra)
|
|
|
|
lda sp, -FrameLength(sp) // allocate stack frame
|
|
mov ra, t6 // save return address
|
|
|
|
PROLOGUE_END
|
|
|
|
//
|
|
// do some range checks and load some constants
|
|
//
|
|
|
|
ornot zero, zero, t1
|
|
stt f16, Temp(sp)
|
|
ldl v0, Temp + HighPart(sp)
|
|
srl t1, 33, t1
|
|
ldah t2, 0x3c90(zero) // small_x
|
|
zapnot v0, 0xf, t0
|
|
and t0, t1, t1
|
|
lda v0, __exp_t_table
|
|
ldah t3, 0x3f6(zero) // big_x - small_x
|
|
subq t1, t2, t2
|
|
lda t3, 0x232b(t3) // big_x - small_x
|
|
ldt f1, 0x10(v0) // two_pow_l_over_ln2
|
|
cmpult t2, t3, t2
|
|
beq t2, 10f
|
|
|
|
mov zero, t3
|
|
br zero, 30f
|
|
|
|
//
|
|
// check for possible problems
|
|
//
|
|
|
|
10: ldah t4, 0x4086(zero) // big_x
|
|
ldah t2, 0x7ff0(zero) // exp mask
|
|
lda t4, 0x232b(t4)
|
|
cmpult t1, t2, t3
|
|
cmpult t1, t4, t1
|
|
beq t3, 80f
|
|
|
|
beq t1, 20f
|
|
|
|
ldt f0, One
|
|
br zero, done
|
|
|
|
//
|
|
// check for under/over flow
|
|
//
|
|
|
|
20: lda t1, 1(zero)
|
|
ldah t4, 0x4086(zero)
|
|
sll t1, 31, t1
|
|
mov t0, t3
|
|
lda t4, 0x2e42(t4) // overflow_x
|
|
cmpule t3, t4, t4
|
|
mov 1, t3
|
|
bne t4, 30f
|
|
|
|
cmpult t0, t1, t1
|
|
bne t1, 70f
|
|
|
|
ldq t4, Under // underflow_x
|
|
cmpult t0, t4, t0
|
|
beq t0, 60f
|
|
|
|
//
|
|
// rejoin normal path
|
|
//
|
|
|
|
30: mult f16, f1, f1
|
|
ldt f12, 0x48(v0) // load poly coef
|
|
ldt f0, Two52
|
|
ldt f13, 0x38(v0) // load poly coef
|
|
ldt f15, 0x40(v0) // load poly coef
|
|
cpys f16, f0, f10
|
|
ldt f0, 0(v0)
|
|
addt f1, f10, f1
|
|
subt f1, f10, f1 // flt_int_N
|
|
ldt f10, 8(v0)
|
|
mult f1, f0, f0
|
|
mult f1, f10, f10
|
|
subt f16, f0, f0
|
|
addt f0, f10, f11
|
|
mult f11, f11, f14
|
|
mult f12, f11, f12
|
|
mult f13, f11, f11
|
|
ldt f13, 0x30(v0) // load poly coef
|
|
mult f14, f14, f17
|
|
addt f15, f12, f12
|
|
addt f13, f11, f11
|
|
cvttqc f1, f15
|
|
mult f17, f12, f12
|
|
mult f14, f11, f11
|
|
stt f15, Temp(sp)
|
|
ldq t1, Temp(sp)
|
|
and t1, 0x3f, t0
|
|
sll t0, 4, t0
|
|
addt f11, f12, f11
|
|
addl v0, t0, v0
|
|
sra t1, 6, t1
|
|
ldt f17, 0x50(v0) // powers of two
|
|
ldt f14, 0x58(v0)
|
|
lda t4, 0x3ff(t1)
|
|
addt f10, f11, f10
|
|
addt f17, f14, f1
|
|
sll t4, 20, t4
|
|
stl t4, Temp + HighPart(sp)
|
|
stl zero, Temp(sp)
|
|
ldt f13, Temp(sp)
|
|
addt f0, f10, f0
|
|
mult f1, f0, f0
|
|
addt f14, f0, f0
|
|
addt f17, f0, f17
|
|
bne t3, 40f
|
|
|
|
mult f17, f13, f0
|
|
br zero, done
|
|
|
|
//
|
|
// do check
|
|
//
|
|
|
|
40: stt f17, Temp(sp)
|
|
ldl v0, Temp + HighPart(sp)
|
|
subq t1, 1, t1
|
|
sll t1, 20, t1
|
|
zapnot v0, 0xf, v0
|
|
ldah t4, 0x7fe0(zero)
|
|
addq v0, t1, t0
|
|
mov t0, t3
|
|
cmpult t3, t4, t4
|
|
beq t4, 50f
|
|
|
|
ldah t5, 0x10(zero)
|
|
addq t3, t5, t5
|
|
stl t5, Temp + HighPart(sp)
|
|
ldt f0, Temp(sp)
|
|
br zero, done
|
|
|
|
//
|
|
// must check for abnormals
|
|
//
|
|
|
|
50: bgt t1, 70f
|
|
|
|
ldah t5, 0x350(zero)
|
|
addq t3, t5, t5
|
|
blt t5, 60f
|
|
|
|
subq v0, t3, v0
|
|
stl zero, Temp(sp)
|
|
stl v0, Temp + HighPart(sp)
|
|
ldt f15, Temp(sp)
|
|
addt f17, f15, f17
|
|
stt f17, Temp(sp)
|
|
cmpteq f17, f15, f15
|
|
fbne f15, 60f
|
|
|
|
ldl t4, Temp + HighPart(sp)
|
|
ldah t3, 0x7ff0(zero)
|
|
zapnot t4, 0xf, t4
|
|
subq t4, v0, v0
|
|
stl v0, Temp + HighPart(sp)
|
|
and v0, t3, v0
|
|
ldt f16, Temp(sp)
|
|
bne v0, retarg
|
|
|
|
//
|
|
// underflow
|
|
//
|
|
|
|
60: lda t0, expName
|
|
ldah t5, 0x800(zero)
|
|
stl t0, ExRec + ErName(sp)
|
|
stt f16, ExRec + ErArg0(sp)
|
|
lda t5, 0x1f(t5)
|
|
stl t5, ExRec + ErErr(sp)
|
|
lda v0, ExRec(sp)
|
|
bsr ra, __dpml_exception
|
|
ldt f0, 0(v0)
|
|
br zero, done
|
|
|
|
//
|
|
// overflow
|
|
//
|
|
|
|
70: lda t0, expName
|
|
ldah t3, 0x800(zero)
|
|
stl t0, ExRec + ErName(sp)
|
|
stt f16, ExRec + ErArg0(sp)
|
|
lda t3, 0x1e(t3)
|
|
stl t3, ExRec + ErErr(sp)
|
|
lda v0, ExRec(sp)
|
|
bsr ra, __dpml_exception
|
|
ldt f0, 0(v0)
|
|
br zero, done
|
|
|
|
//
|
|
// nan or inf check
|
|
//
|
|
|
|
80: stt f16, Temp(sp)
|
|
ldl t1, Temp + HighPart(sp)
|
|
ldl ra, Temp(sp)
|
|
zapnot t1, 0xf, t3
|
|
zapnot ra, 0xf, ra
|
|
and t3, t2, t5
|
|
cmpeq t5, t2, t2
|
|
beq t2, 90f
|
|
|
|
zapnot t1, 0xf, t3
|
|
ldq v0, Mask52
|
|
and t3, v0, v0
|
|
bis v0, ra, v0
|
|
cmpult zero, v0, v0
|
|
and t2, v0, t2
|
|
bne t2, retarg
|
|
|
|
//
|
|
// call exception dispatcher for inf
|
|
//
|
|
|
|
90: lda t4, 1(zero)
|
|
sll t4, 31, t4
|
|
mov 0x21, t5
|
|
and t0, t4, t0
|
|
cmoveq t0, 0x20, t5
|
|
ldah t1, 0x800(zero)
|
|
bis t5, t1, t1
|
|
stl t1, ExRec + ErErr(sp)
|
|
lda ra, expName
|
|
stl ra, ExRec + ErName(sp)
|
|
lda v0, ExRec(sp)
|
|
stt f16, ExRec + ErArg0(sp)
|
|
bsr ra, __dpml_exception
|
|
ldt f16, 0(v0)
|
|
|
|
//
|
|
// just return x
|
|
//
|
|
|
|
retarg: cpys f16, f16, f0
|
|
|
|
//
|
|
// Return with result in f0.
|
|
//
|
|
|
|
done:
|
|
lda sp, FrameLength(sp) // deallocate stack frame
|
|
ret zero, (t6) // return through saved ra in t6
|
|
|
|
.end exp
|
|
|
|
SBTTL("Exponential Helper")
|
|
|
|
//
|
|
// exp special entry point (for sinh, cosh etc)
|
|
//
|
|
|
|
NESTED_ENTRY(__dpml_exp_special_entry_point, 0x10, ra)
|
|
|
|
lda sp, -0x10(sp)
|
|
|
|
PROLOGUE_END
|
|
|
|
lda t0, __exp_t_table
|
|
ldt f0, 0x10(t0)
|
|
ldt f1, Two52
|
|
ldt f12, 0x38(t0)
|
|
mult f16, f0, f0
|
|
ldt f11, 0x48(t0)
|
|
cpys f16, f1, f10
|
|
ldt f1, 0(t0)
|
|
ldt f14, 0x40(t0)
|
|
addt f0, f10, f0
|
|
subt f0, f10, f0
|
|
ldt f10, 8(t0)
|
|
mult f0, f1, f1
|
|
mult f0, f10, f10
|
|
subt f16, f1, f1
|
|
addt f1, f10, f16
|
|
mult f16, f16, f13
|
|
mult f11, f16, f11
|
|
mult f12, f16, f12
|
|
ldt f16, 0x30(t0)
|
|
mult f13, f13, f15
|
|
addt f14, f11, f11
|
|
addt f16, f12, f12
|
|
cvttqc f0, f14
|
|
mult f15, f11, f11
|
|
stt f14, Temp(sp)
|
|
mult f13, f12, f12
|
|
ldq v0, Temp(sp)
|
|
and v0, 0x3f, t1
|
|
sll t1, 4, t1
|
|
addt f12, f11, f11
|
|
addl t0, t1, t0
|
|
ldt f0, 0x50(t0)
|
|
ldt f15, 0x58(t0)
|
|
addt f10, f11, f10
|
|
addt f0, f15, f13
|
|
addt f1, f10, f1
|
|
mult f13, f1, f1
|
|
addt f15, f1, f1
|
|
stt f1, 0(a2)
|
|
stq v0, 0(a1)
|
|
|
|
lda sp, 0x10(sp)
|
|
ret zero, (ra)
|
|
|
|
.end __dpml_exp_special_entry_point
|
|
|
|
.align 3
|
|
.rdata
|
|
|
|
Under: .long 0xc0874386 // underflow_x
|
|
.long 0x0
|
|
|
|
Two52: .quad 0x4330000000000000 // 2^52 (4503599627370496.0)
|
|
|
|
Mask52: .quad 0x000fffffffffffff // 52-bit mantissa mask
|
|
|
|
One: .double 1.0
|
|
|
|
//
|
|
// Function name for _dpml_exception.
|
|
//
|
|
|
|
expName:
|
|
.ascii "exp\0"
|
|
|
|
//
|
|
// exp table data
|
|
//
|
|
|
|
.align 3
|
|
|
|
__exp_t_table:
|
|
|
|
//
|
|
// misc constants
|
|
//
|
|
|
|
.quad 0x3f862e42fefa0000
|
|
.quad 0xbd1cf79abc9e3b3a
|
|
.quad 0x40571547652b82fe
|
|
.quad 0x0000000042b8aa3b
|
|
.quad 0x3fe00001ebfbdb81
|
|
.quad 0x3fc55555555551c2
|
|
|
|
//
|
|
// poly coefs
|
|
//
|
|
|
|
.quad 0x3fdfffffffffe5bc
|
|
.quad 0x3fc5555555556bd8
|
|
.quad 0x3fa555570aa6fd1d
|
|
.quad 0x3f81111111110f6f
|
|
|
|
//
|
|
// 2^(j/2^L) for j = 0 to 2^L - 1 in hi and lo pieces
|
|
//
|
|
|
|
.quad 0x3ff0000000000000
|
|
.quad 0x0000000000000000
|
|
.quad 0x3ff02c9a3e778040
|
|
.quad 0x3d007737be56527b
|
|
.quad 0x3ff059b0d3158540
|
|
.quad 0x3d0a1d73e2a475b4
|
|
.quad 0x3ff0874518759bc0
|
|
.quad 0x3ce01186be4bb284
|
|
.quad 0x3ff0b5586cf98900
|
|
.quad 0x3ceec5317256e308
|
|
.quad 0x3ff0e3ec32d3d180
|
|
.quad 0x3d010103a1727c57
|
|
.quad 0x3ff11301d0125b40
|
|
.quad 0x3cf0a4ebbf1aed93
|
|
.quad 0x3ff1429aaea92dc0
|
|
.quad 0x3cffb34101943b25
|
|
.quad 0x3ff172b83c7d5140
|
|
.quad 0x3d0d6e6fbe462875
|
|
.quad 0x3ff1a35beb6fcb40
|
|
.quad 0x3d0a9e5b4c7b4968
|
|
.quad 0x3ff1d4873168b980
|
|
.quad 0x3d053c02dc0144c8
|
|
.quad 0x3ff2063b88628cc0
|
|
.quad 0x3cf63b8eeb029509
|
|
.quad 0x3ff2387a6e756200
|
|
.quad 0x3d0c3360fd6d8e0a
|
|
.quad 0x3ff26b4565e27cc0
|
|
.quad 0x3cfd257a673281d3
|
|
.quad 0x3ff29e9df51fdec0
|
|
.quad 0x3d009612e8afad12
|
|
.quad 0x3ff2d285a6e40300
|
|
.quad 0x3ce680123aa6da0e
|
|
.quad 0x3ff306fe0a31b700
|
|
.quad 0x3cf52de8d5a46305
|
|
.quad 0x3ff33c08b26416c0
|
|
.quad 0x3d0fa64e43086cb3
|
|
.quad 0x3ff371a7373aa9c0
|
|
.quad 0x3ce54e28aa05e8a8
|
|
.quad 0x3ff3a7db34e59fc0
|
|
.quad 0x3d0b750de494cf05
|
|
.quad 0x3ff3dea64c123400
|
|
.quad 0x3d011ada0911f09e
|
|
.quad 0x3ff4160a21f72e00
|
|
.quad 0x3d04fc2192dc79ed
|
|
.quad 0x3ff44e0860618900
|
|
.quad 0x3d068189b7a04ef8
|
|
.quad 0x3ff486a2b5c13cc0
|
|
.quad 0x3cf013c1a3b69062
|
|
.quad 0x3ff4bfdad5362a00
|
|
.quad 0x3d038ea1cbd7f621
|
|
.quad 0x3ff4f9b2769d2c80
|
|
.quad 0x3d035699ec5b4d50
|
|
.quad 0x3ff5342b569d4f80
|
|
.quad 0x3cbdf0a83c49d86a
|
|
.quad 0x3ff56f4736b527c0
|
|
.quad 0x3cfa66ecb004764e
|
|
.quad 0x3ff5ab07dd485400
|
|
.quad 0x3d04ac64980a8c8f
|
|
.quad 0x3ff5e76f15ad2140
|
|
.quad 0x3ce0dd37c9840732
|
|
.quad 0x3ff6247eb03a5580
|
|
.quad 0x3cd2c7c3e81bf4b6
|
|
.quad 0x3ff6623882552200
|
|
.quad 0x3d024893ecf14dc7
|
|
.quad 0x3ff6a09e667f3bc0
|
|
.quad 0x3ce921165f626cdd
|
|
.quad 0x3ff6dfb23c651a00
|
|
.quad 0x3d0779107165f0dd
|
|
.quad 0x3ff71f75e8ec5f40
|
|
.quad 0x3d09ee91b8797785
|
|
.quad 0x3ff75feb564267c0
|
|
.quad 0x3ce17edd35467491
|
|
.quad 0x3ff7a11473eb0180
|
|
.quad 0x3cdb5f54408fdb36
|
|
.quad 0x3ff7e2f336cf4e40
|
|
.quad 0x3d01082e815d0abc
|
|
.quad 0x3ff82589994cce00
|
|
.quad 0x3cf28acf88afab34
|
|
.quad 0x3ff868d99b4492c0
|
|
.quad 0x3d0640720ec85612
|
|
.quad 0x3ff8ace5422aa0c0
|
|
.quad 0x3cfb5ba7c55a192c
|
|
.quad 0x3ff8f1ae99157700
|
|
.quad 0x3d0b15cc13a2e397
|
|
.quad 0x3ff93737b0cdc5c0
|
|
.quad 0x3d027a280e1f92a0
|
|
.quad 0x3ff97d829fde4e40
|
|
.quad 0x3cef173d241f23d1
|
|
.quad 0x3ff9c49182a3f080
|
|
.quad 0x3cf01c7c46b071f2
|
|
.quad 0x3ffa0c667b5de540
|
|
.quad 0x3d02594d6d45c655
|
|
.quad 0x3ffa5503b23e2540
|
|
.quad 0x3cfc8b424491caf8
|
|
.quad 0x3ffa9e6b5579fd80
|
|
.quad 0x3d0fa1f5921deffa
|
|
.quad 0x3ffae89f995ad380
|
|
.quad 0x3d06af439a68bb99
|
|
.quad 0x3ffb33a2b84f15c0
|
|
.quad 0x3d0d7b5fe873deca
|
|
.quad 0x3ffb7f76f2fb5e40
|
|
.quad 0x3cdbaa9ec206ad4f
|
|
.quad 0x3ffbcc1e904bc1c0
|
|
.quad 0x3cf2247ba0f45b3d
|
|
.quad 0x3ffc199bdd855280
|
|
.quad 0x3cfc2220cb12a091
|
|
.quad 0x3ffc67f12e57d140
|
|
.quad 0x3ce694426ffa41e5
|
|
.quad 0x3ffcb720dcef9040
|
|
.quad 0x3d048a81e5e8f4a4
|
|
.quad 0x3ffd072d4a078940
|
|
.quad 0x3d0dc68791790d0a
|
|
.quad 0x3ffd5818dcfba480
|
|
.quad 0x3cdc976816bad9b8
|
|
.quad 0x3ffda9e603db3280
|
|
.quad 0x3cd5c2300696db53
|
|
.quad 0x3ffdfc97337b9b40
|
|
.quad 0x3cfeb968cac39ed2
|
|
.quad 0x3ffe502ee78b3fc0
|
|
.quad 0x3d0b139e8980a9cc
|
|
.quad 0x3ffea4afa2a490c0
|
|
.quad 0x3cf9858f73a18f5d
|
|
.quad 0x3ffefa1bee615a00
|
|
.quad 0x3d03bb8fe90d496d
|
|
.quad 0x3fff50765b6e4540
|
|
.quad 0x3c99d3e12dd8a18a
|
|
.quad 0x3fffa7c1819e90c0
|
|
.quad 0x3cf82e90a7e74b26
|
|
|
|
//
|
|
// End of table.
|
|
//
|