windows-nt-4.0/private/fp32/tran/alpha/atan2s.s


								//      TITLE("Alpha AXP ArcTangent2")

								//++

								//

								// Copyright (c) 1993, 1994  Digital Equipment Corporation

								//

								// Module Name:

								//

								//    atan2.s

								//

								// Abstract:

								//

								//    This module implements a high-performance Alpha AXP specific routine

								//    for IEEE double format arctangent2.

								//

								// Author:

								//

								//    Andy Garside

								//

								// Environment:

								//

								//    User mode.

								//

								// Revision History:

								//

								//    Thomas Van Baak (tvb) 15-Feb-1994

								//

								//        Adapted for NT.

								//

								//--


								#include "ksalpha.h"


								//

								// Define DPML exception record for NT.

								//


								        .struct 0

								ErErr:  .space  4                       // error code

								ErCxt:  .space  4                       // context

								ErPlat: .space  4                       // platform

								ErEnv:  .space  4                       // environment

								ErRet:  .space  4                       // return value pointer

								ErName: .space  4                       // function name

								ErType: .space  8                       // flags and fill

								ErVal:  .space  8                       // return value

								ErArg0: .space  8                       // arg 0

								ErArg1: .space  8                       // arg 1

								ErArg2: .space  8                       // arg 2

								ErArg3: .space  8                       // arg 3

								DpmlExceptionLength:


								//

								// Define stack frame.

								//


								        .struct 0

								SaveS0: .space  8                       //

								SaveS1: .space  8                       //

								SaveRa: .space  8                       //

								SaveF2: .space  8                       //

								SaveF3: .space  8                       //

								Temp:   .space  8                       // save argument

								ExRec:  .space  DpmlExceptionLength     // exception record

								        .space  8                       // for 16-byte stack alignment

								FrameLength:


								//

								// Define lower and upper 32-bit parts of 64-bit double.

								//


								#define LowPart 0x0

								#define HighPart 0x4


								//

								// Define offsets into atan_t_table.

								//


								#define ATAN_INF        0xf18

								#define TWICE_ATAN_INF  0xf28


								        SBTTL("ArcTangent2")


								//++

								//

								// double

								// atan2 (

								//    IN double y

								//    IN double x

								//    )

								//

								// Routine Description:

								//

								//    This function returns the arctangent of the given double arguments.

								//    It returns atan(y/x) in range [-pi,pi].

								//

								// Arguments:

								//

								//    y (f16) - Supplies the argument value.

								//

								//    x (f17) - Supplies the argument value.

								//

								// Return Value:

								//

								//    The double arctangent2 result is returned as the function value in f0.

								//

								//--


								        NESTED_ENTRY(atan2, FrameLength, ra)


								        lda     sp, -FrameLength(sp)    // allocate stack frame

								        stq     s0, SaveS0(sp)

								        stq     s1, SaveS1(sp)

								        stq     ra, SaveRa(sp)

								        stt     f2, SaveF2(sp)

								        stt     f3, SaveF3(sp)


								        PROLOGUE_END


								        cpys    f16, f16, f2            // y

								        ldah    s0, 0x7ff0(zero)

								        cpys    f17, f17, f3            // x

								        stt     f2, Temp(sp)

								        ldl     v0, Temp + HighPart(sp)

								        and     v0, s0, v0

								        mov     v0, t0

								        xor     t0, s0, t1

								        beq     t1, spec_y

								        beq     t0, spec_y


								        stt     f3, Temp(sp)

								        ldl     t2, Temp + HighPart(sp)

								        and     t2, s0, t2

								        xor     t2, s0, t1

								        beq     t1, class_y

								        bne     t2, calc_atan2

								        br      zero, class_y


								//

								// Abnormal inputs

								//


								spec_y: stt     f3, Temp(sp)

								        ldl     t2, Temp + HighPart(sp)

								        and     t2, s0, t2


								//

								// Classify y according to type

								//


								class_y:

								        stt     f2, Temp(sp)

								        ldl     t3, Temp + HighPart(sp)

								        zapnot  t3, 0xf, t1

								        and     t3, s0, t4

								        srl     t1, 31, t1

								        and     t1, 1, t1

								        beq     t4, LL00d0

								        cmpult  t4, s0, t4

								        beq     t4, LL0098

								        addl    t1, 4, t5

								        br      zero, class_x

								LL0098: ldah    t6, 0x10(zero)

								        ldl     t4, Temp(sp)

								        lda     t6, -1(t6)

								        and     t3, t6, t6

								        stl     t6, Temp + HighPart(sp)

								        bis     t6, t4, t4

								        srl     t6, 19, t6

								        beq     t4, LL00c8

								        and     t6, 1, t6

								        mov     t6, t5

								        br      zero, class_x

								LL00c8: addl    t1, 2, t5

								        br      zero, class_x

								LL00d0: ldl     t7, Temp(sp)

								        ldah    t4, 0x10(zero)

								        lda     t4, -1(t4)

								        and     t3, t4, t3

								        bis     t3, t7, t7

								        stl     t3, Temp + HighPart(sp)

								        mov     6, t6

								        cmoveq  t7, 8, t6

								        addl    t1, t6, t5


								//

								// Classify x according to type

								//


								class_x:

								        stt     f3, Temp(sp)

								        ldl     t3, Temp + HighPart(sp)

								        zapnot  t3, 0xf, t4

								        and     t3, s0, t1

								        srl     t4, 31, t4

								        and     t4, 1, t4

								        beq     t1, LL0158

								        cmpult  t1, s0, t1

								        beq     t1, LL0120

								        addl    t4, 4, t6

								        br      zero, switch

								LL0120: ldah    t1, 0x10(zero)

								        ldl     t7, Temp(sp)

								        lda     t1, -1(t1)

								        and     t3, t1, t1

								        bis     t1, t7, t7

								        stl     t1, Temp + HighPart(sp)

								        beq     t7, LL0150

								        srl     t1, 19, t1

								        and     t1, 1, t1

								        mov     t1, t6

								        br      zero, switch

								LL0150: addl    t4, 2, t6

								        br      zero, switch

								LL0158: ldl     a0, Temp(sp)

								        ldah    t7, 0x10(zero)

								        lda     t7, -1(t7)

								        and     t3, t7, t3

								        bis     t3, a0, a0

								        stl     t3, Temp + HighPart(sp)

								        mov     6, t1

								        cmoveq  a0, 8, t1

								        addl    t4, t1, t6


								//

								// switch on class(y) and class(x)

								//


								switch: sra     t5, 1, a0

								        sra     t6, 1, t3

								        s4addl  a0, a0, a0

								        addl    a0, t3, t3

								        cmpule  t3, 24, t12

								        beq     t12, cpys_y_class


								        lda     t12, Switch_table

								        s4addl  t3, t12, t12

								        ldl     t12, 0(t12)

								        jmp     zero, (t12)


								ret_y:  cpys    f2, f2, f0

								        br      zero, done


								ret_x:  cpys    f3, f3, f0

								        br      zero, done


								infs:

								        lda     t1, atan2Name

								        stl     t1, ExRec + ErName(sp)

								        ldah    t3, 0x800(zero)

								        stt     f2, ExRec + ErArg0(sp)

								        stt     f3, ExRec + ErArg1(sp)

								        lda     t3, 9(t3)

								        stl     t3, ExRec + ErErr(sp)

								        lda     v0, ExRec(sp)

								        bsr     ra, __dpml_exception

								        ldt     f0, 0(v0)

								        br      zero, done


								zeros:

								        lda     t6, atan2Name

								        stl     t6, ExRec + ErName(sp)

								        ldah    a0, 0x800(zero)

								        stt     f2, ExRec + ErArg0(sp)

								        stt     f3, ExRec + ErArg1(sp)

								        lda     a0, 8(a0)

								        stl     a0, ExRec + ErErr(sp)

								        lda     v0, ExRec(sp)

								        bsr     ra, __dpml_exception

								        ldt     f0, 0(v0)

								        br      zero, done


								ret_inf:

								        ldt     f0, __atan_t_table + ATAN_INF


								cpys_y_class:

								        blbc    t5, done

								        cpysn   f0, f0, f0

								        br      zero, done


								ret_tw_inf:

								        blbc    t6, x_pos


								        ldt     f16, __atan_t_table + TWICE_ATAN_INF

								        cpys    f16, f16, f0


								        blbc    t5, done

								        cpysn   f0, f0, f0

								        br      zero, done


								x_pos:  cpys    f31, f31, f16

								        cpys    f16, f16, f0


								        blbc    t5, done

								        cpysn   f0, f0, f0

								        br      zero, done


								de_o_norm:

								        ldah    t4, 0x4350(zero)        // underflow check

								        cmpult  t2, t4, t4

								        bne     t4, scale_up_denorm

								        br      zero, underflow


								n_o_de: ldah    t1, 0x360(zero)         // check for const range

								        cmplt   t0, t1, t1

								        beq     t1, const_range


								// Scale x and y up by 2^F_PRECISION and adjust exp_x and exp_y accordingly.

								// With x and y scaled into the normal range, we can rejoin the main logic

								// flow for computing atan(y/x)


								scale_up_denorm:


								        beq     t0, LL02c0

								        stt     f2, Temp(sp)

								        ldl     ra, Temp + HighPart(sp)

								        ldah    v0, 0x4330(zero)

								        ldah    t3, -0x7ff0(zero)

								        addl    t0, v0, v0

								        lda     t3, -1(t3)

								        and     ra, t3, t3

								        mov     v0, t0

								        bis     t3, t0, t3

								        stl     t3, Temp + HighPart(sp)

								        ldt     f2, Temp(sp)

								        br      zero, LL02e4

								LL02c0: ldt     f17, Two53

								        cpys    f2, f17, f16

								        cpyse   f16, f2, f0

								        subt    f0, f16, f2

								        stt     f2, Temp(sp)

								        ldl     t4, Temp + HighPart(sp)

								        and     t4, s0, t4

								        mov     t4, t0

								LL02e4: beq     t2, LL0318

								        stt     f3, Temp(sp)

								        ldl     a0, Temp + HighPart(sp)

								        ldah    v0, -0x7ff0(zero)

								        ldah    ra, 0x4330(zero)

								        lda     v0, -1(v0)

								        addl    t2, ra, t2

								        and     a0, v0, v0

								        bis     v0, t2, v0

								        stl     v0, Temp + HighPart(sp)

								        ldt     f3, Temp(sp)

								        br      zero, calc_atan2

								LL0318: ldt     f17, Two53

								        cpys    f3, f17, f0

								        cpyse   f0, f3, f16

								        subt    f16, f0, f3

								        stt     f3, Temp(sp)

								        ldl     t1, Temp + HighPart(sp)

								        and     t1, s0, t1

								        mov     t1, t2


								//

								//  OK. Calculate atan2.

								//


								calc_atan2:

								        subl    t0, t2, s1

								        ldah    t4, 0x360(zero)         // check for const range

								        ldah    t5, -0x1c0(zero)        // check for identity range

								        cmplt   s1, t4, t4

								        cmple   s1, t5, t5

								        beq     t4, const_range

								        bne     t5, ident_range

								        divt    f2, f3, f16

								        bsr     ra, atan

								        cpys    f0, f0, f1

								        cmptlt  f31, f3, f3

								        cpys    f1, f1, f0

								        fbeq    f3, post_proc

								        br      zero, done


								ident_range:

								        ldah    v0, -0x360(zero)        // check for possible underflow

								        cmpult  s1, v0, v0

								        fbge    f3, poss_under

								        beq     v0, poss_under


								        ldt     f10, __atan_t_table + TWICE_ATAN_INF

								        br      zero, fix_sign


								poss_under:

								        ldah    t1, -0x3fe0(zero)       // check for certain underflow or denorm

								        cmpule  s1, t1, t1

								        bne     t1, under_or_de


								        divt    f2, f3, f1

								        cmptlt  f31, f3, f3

								        fbeq    f3, post_proc

								        cpys    f1, f1, f0

								        br      zero, done


								post_proc:

								        ldt     f11, __atan_t_table + TWICE_ATAN_INF

								        cpys    f2, f11, f12

								        addt    f1, f12, f0

								        br      zero, done


								under_or_de:

								        ldah    t3, -0x4350(zero)       // check for underflow

								        cmpult  s1, t3, t3

								        bne     t3, underflow


								        ldah    t6, 0x350(zero)         // fixup denorm check

								        cpys    f2, f2, f13

								        stt     f13, Temp(sp)

								        ldl     t5, Temp + HighPart(sp)

								        addl    t5, t6, t5

								        stl     t5, Temp + HighPart(sp)

								        ldt     f14, Temp(sp)

								        divt    f14, f3, f14

								        stt     f14, Temp(sp)

								        ldl     a2, Temp + HighPart(sp)

								        and     a2, s0, s0

								        subl    s0, t6, t6

								        ble     t6, underflow


								        stt     f14, Temp(sp)

								        ldl     a4, Temp + HighPart(sp)

								        ldah    a5, -0x7ff0(zero)

								        lda     a5, -1(a5)

								        and     a4, a5, a4

								        bis     a4, t6, t6

								        stl     t6, Temp + HighPart(sp)

								        ldt     f0, Temp(sp)

								        br      zero, done


								//

								// quotient underflows

								//


								underflow:

								        lda     t10, atan2Name

								        ldah    v0, 0x800(zero)

								        stl     t10, ExRec + ErName(sp)

								        stt     f2, ExRec + ErArg0(sp)

								        lda     v0, 0xa(v0)

								        stt     f3, ExRec + ErArg1(sp)

								        stl     v0, ExRec + ErErr(sp)

								        lda     v0, ExRec(sp)

								        bsr     ra, __dpml_exception

								        ldt     f0, 0(v0)

								        br      zero, done


								const_range:

								        ldt     f10, __atan_t_table + ATAN_INF


								fix_sign:

								        cpys    f2, f10, f0


								//

								// Restore registers and return with result in f0.

								//


								done:

								        ldq     s0, SaveS0(sp)

								        ldq     s1, SaveS1(sp)

								        ldq     ra, SaveRa(sp)

								        ldt     f2, SaveF2(sp)

								        ldt     f3, SaveF3(sp)

								        lda     sp, FrameLength(sp)     // deallocate stack frame

								        ret     zero, (ra)              // return


								        .end    atan2


								        .rdata

								        .align  3


								//

								// Define floating point constants.

								//


								One:    .double 1.0


								Two53:  .quad   0x4340000000000000      // 2^53 (9007199254740992)


								//

								// switch on class of y and x

								//

								Switch_table:

								        .long   ret_y

								        .long   ret_y

								        .long   ret_y

								        .long   ret_y

								        .long   ret_y

								        .long   ret_x

								        .long   infs

								        .long   ret_inf

								        .long   ret_inf

								        .long   ret_inf

								        .long   ret_x

								        .long   ret_tw_inf

								        .long   cpys_y_class

								        .long   n_o_de

								        .long   ret_inf

								        .long   ret_x

								        .long   ret_tw_inf

								        .long   de_o_norm

								        .long   scale_up_denorm

								        .long   ret_inf

								        .long   ret_x

								        .long   ret_tw_inf

								        .long   ret_tw_inf

								        .long   ret_tw_inf

								        .long   zeros


								//

								// Function name for dpml_exception.

								//


								atan2Name:

								       .ascii  "atan2\0"