//      TITLE("Alpha AXP Sine")
//++
//
// Copyright (c) 1993, 1994  Digital Equipment Corporation
//
// Module Name:
//
//    sin.s
//
// Abstract:
//
//    This module implements a high-performance Alpha AXP specific routine
//    for IEEE double format sine.
//
// Author:
//
//    Bob Hanek 1-Oct-1991
//
// Environment:
//
//    User mode.
//
// Revision History:
//
//    Thomas Van Baak (tvb) 12-Feb-1994
//
//        Adapted for NT.
//
//--

#include "ksalpha.h"

//
// Define DPML exception record for NT.
//

        .struct 0
ErErr:  .space  4                       // error code
ErCxt:  .space  4                       // context
ErPlat: .space  4                       // platform
ErEnv:  .space  4                       // environment
ErRet:  .space  4                       // return value pointer
ErName: .space  4                       // function name
ErType: .space  8                       // flags and fill
ErVal:  .space  8                       // return value
ErArg0: .space  8                       // arg 0
ErArg1: .space  8                       // arg 1
ErArg2: .space  8                       // arg 2
ErArg3: .space  8                       // arg 3
DpmlExceptionLength:

//
// Define stack frame.
//

        .struct 0
SaveS0: .space  8                       //
SaveS1: .space  8                       //
SaveRa: .space  8                       //
SaveF2: .space  8                       //
SaveF3: .space  8                       //
SaveF4: .space  8                       //
SaveF5: .space  8                       //
SaveF6: .space  8                       //
SaveF7: .space  8                       //
SaveF8: .space  8                       //
SaveF9: .space  8                       //
Temp0:  .space  8                       //
Temp1:  .space  8                       //
Temp2:  .space  8                       //
Temp3:  .space  8                       //
ExRec:  .space  DpmlExceptionLength     // exception record
        .space  0                       // for 16-byte stack alignment
FrameLength:

//
// Define lower and upper 32-bit parts of 64-bit double.
//

#define LowPart 0x0
#define HighPart 0x4

//
// Define argument ranges.
//

#define BIG_X_HI 0x4169         // upper bound of medium argument range
#define BIG_X_LO 0x21fb
#define SMALL_X_HI 0x3e40       // lower bound of medium argument range
#define SMALL_X_LO 0x0000
#define EXP_WORD_OF_TWO_PI_HI 0x4019
#define EXP_WORD_OF_TWO_PI_LO 0x21fb

//
// Define offsets into table.
//

#define D_2_POW_K_OVER_PI_OVER_4 0x0
#define PI_OVER_4_OVER_2_POW_K_0 0x08
#define PI_OVER_4_OVER_2_POW_K_1 0x10
#define PI_OVER_4_OVER_2_POW_K_2 0x18
#define PI_OVER_4_OVER_2_POW_K_3 0x20
#define PI_OVER_4_OVER_2_POW_K_4 0x28

#define PI_OVER_2_HI 0x30
#define PI_OVER_2_LO 0x38
#define PI_HI 0x40
#define PI_LO 0x48
#define THREE_PI_OVER_2_HI 0x50
#define THREE_PI_OVER_2_LO 0x58
#define TWO_PI_HI 0x60
#define TWO_PI_LO 0x68
#define TWO_POW_K_OVER_PI_OVER_4 0x70

#define C_POLY0 0xb8
#define C_POLY1 C_POLY0 + 8
#define C_POLY2 C_POLY1 + 8
#define C_POLY3 C_POLY2 + 8
#define C_POLY4 C_POLY3 + 8

#define P_POLY0 0x78
#define P_POLY1 P_POLY0 + 8

#define Q_POLY0 0x88
#define Q_POLY1 Q_POLY0 + 8

#define S_POLY0 0x98
#define S_POLY1 S_POLY0 + 8
#define S_POLY2 S_POLY1 + 8
#define S_POLY3 S_POLY2 + 8

#define SINCOS 0xe0
#define SIN_A 0xe8
#define COS_A 0xf0

        SBTTL("Sine")

//++
//
// double
// sin (
//    IN double x
//    )
//
// Routine Description:
//
//    This function returns the sine of the given double argument.
//
// Arguments:
//
//    x (f16) - Supplies the argument value.
//
// Return Value:
//
//    The double sine result is returned as the function value in f0.
//
//--

        NESTED_ENTRY(sin, FrameLength, ra)

        lda     sp, -FrameLength(sp)    // allocate stack frame
        stq     s0, SaveS0(sp)
        stq     s1, SaveS1(sp)
        stq     ra, SaveRa(sp)
        stt     f2, SaveF2(sp)
        stt     f3, SaveF3(sp)
        stt     f4, SaveF4(sp)
        stt     f5, SaveF5(sp)
        stt     f6, SaveF6(sp)
        stt     f7, SaveF7(sp)
        stt     f8, SaveF8(sp)
        stt     f9, SaveF9(sp)

        PROLOGUE_END

        ornot   zero, zero, t0
        srl     t0, 33, t0
        ldah    t1, SMALL_X_HI(zero)
        ldah    t2, BIG_X_HI - SMALL_X_HI(zero)
        stt     f16, Temp2(sp)
        cpys    f31, f16, f2
        ldl     v0, Temp2 + HighPart(sp)
        lda     t2, BIG_X_LO - SMALL_X_LO(t2)
        cpys    f16, f16, f3
        and     v0, t0, v0      // the exponent field of the argument
        subl    v0, t1, t0      // if v0 - small <= big - small
        cmpult  t0, t2, t0      // we've got an abnormal (but not nessarily bad) argument
        beq     t0, abnormal_argument

        ldah    t2, EXP_WORD_OF_TWO_PI_HI(zero) // if (j >= EXP_WORD_OF_TWO_PI)
        lda     t2, EXP_WORD_OF_TWO_PI_LO(t2)   // we have a medium argument
        cmplt   v0, t2, t2
        beq     t2, medium_argument

//
//  small argument reduction
//
//  reduce the argument X to ( 8 * N + I ) * pi / 4 + y
//  and let the reduced argument be y' where
//      y' = X - floor( ( 8 * N + I + 1 ) / 2 ) * pi / 4
//  the low 3 bits of I are the octant
//

        lda     t0, __trig_cons
        ldt     f0, TWO_POW_K_OVER_PI_OVER_4(t0)
        mult    f2, f0, f0
        cvttqc  f0, f1
        stt     f1, Temp2(sp)
        ldl     t2, Temp2(sp)
        sra     t2, 7, t4
        cmpule  t4, 7, t12
        beq     t12, small_cos          // if octant > 7; shouldn't happen

//      dispatch on octant

        lda     t12, Switch2
        s4addl  t4, t12, t12
        ldl     t12, 0(t12)
        jmp     zero, (t12)

//
//      1st octant; compute sin(y')
//
Switch20:
        and     t2, 127, t5
        subl    t5, 27, t5
        blt     t5, small_sin

        s4subl  t5, t5, t5
        s8addl  t5, t0, t5
        ldt     f0, SINCOS(t5)
        ldt     f5, SIN_A(t5)
        ldt     f6, COS_A(t5)
        subt    f2, f0, f4
        br      zero, pos_tab_eval

small_sin:
        mult    f2, f2, f1
        ldt     f10, S_POLY3(t0)
        ldt     f0, S_POLY1(t0)
        ldt     f5, S_POLY2(t0)
        ldt     f11, S_POLY0(t0)
        mult    f10, f1, f10
        mult    f1, f1, f6
        mult    f0, f1, f0
        mult    f2, f1, f1
        addt    f10, f5, f5
        addt    f0, f11, f0
        mult    f5, f6, f5
        addt    f5, f0, f0
        mult    f1, f0, f0
        subt    f2, f0, f7
        fbge    f3, adjust_sign
        br      zero, change_sign

//
//      2nd octant; compute cos(y')
//
Switch21:
        ldt     f10, PI_OVER_2_HI(t0)
        ornot   zero, t2, t4
        and     t4, 127, t4
        ldt     f11, PI_OVER_2_LO(t0)
        subl    t4, 27, t4
        subt    f2, f10, f10
        blt     t4, pos_cos_1

        s4subl  t4, t4, t4
        s8addl  t4, t0, t4
        ldt     f6, SINCOS(t4)
        ldt     f5, COS_A(t4)
        addt    f10, f6, f6
        subt    f6, f11, f4
        ldt     f6, SIN_A(t4)
        br      zero, pos_tab_eval

//
//      3rd octant; compute cos(y')
//
Switch22:
        ldt     f1, PI_OVER_2_HI(t0)
        and     t2, 127, t3
        ldt     f11, PI_OVER_2_LO(t0)
        subl    t3, 27, t3
        subt    f2, f1, f10
        blt     t3, pos_cos_1

        s4subl  t3, t3, t3
        s8addl  t3, t0, t3
        ldt     f0, SINCOS(t3)
        ldt     f6, SIN_A(t3)
        ldt     f5, COS_A(t3)
        subt    f10, f0, f0
        subt    f11, f0, f4
        br      zero, pos_tab_eval

pos_cos_1:
        subt    f10, f11, f10
        ldt     f1, C_POLY1(t0)
        ldt     f2, C_POLY4(t0)
        ldt     f0, C_POLY2(t0)
        ldt     f4, C_POLY0(t0)
        ldt     f6, C_POLY3(t0)
        ldt     f11, One
        mult    f10, f10, f10
        mult    f10, f10, f7
        mult    f10, f1, f1
        mult    f2, f10, f2
        mult    f0, f7, f0
        addt    f1, f4, f1
        mult    f7, f10, f7
        addt    f2, f6, f2
        addt    f1, f0, f0
        mult    f7, f2, f2
        addt    f0, f2, f0
        mult    f10, f0, f0
        subt    f11, f0, f7
        fbge    f3, adjust_sign
        br      zero, change_sign

//
//      4th octant; compute -sin(y')
//
Switch23:
        ldt     f6, PI_HI(t0)
        ornot   zero, t2, t5
        and     t5, 127, t5
        ldt     f10, PI_LO(t0)
        subl    t5, 27, t5
        subt    f2, f6, f1
        blt     t5, neg_sin_1

        s4subl  t5, t5, t5
        s8addl  t5, t0, t5
        ldt     f0, SINCOS(t5)
        ldt     f5, SIN_A(t5)
        ldt     f6, COS_A(t5)
        addt    f1, f0, f0
        subt    f10, f0, f4
        br      zero, pos_tab_eval

//
//      5th octant; compute -sin(y')
//
Switch24:
        ldt     f11, PI_HI(t0)
        and     t2, 127, t4
        ldt     f10, PI_LO(t0)
        subl    t4, 27, t4
        subt    f2, f11, f1
        blt     t4, neg_sin_1

        s4subl  t4, t4, t4
        s8addl  t4, t0, t4
        ldt     f0, SINCOS(t4)
        ldt     f5, SIN_A(t4)
        ldt     f6, COS_A(t4)
        subt    f1, f0, f0
        subt    f0, f10, f4
        br      zero, neg_tab_eval

neg_sin_1:
        subt    f1, f10, f2
        ldt     f7, S_POLY3(t0)
        ldt     f0, S_POLY1(t0)
        ldt     f4, S_POLY2(t0)
        ldt     f12, S_POLY0(t0)
        mult    f2, f2, f11
        mult    f7, f11, f7
        mult    f11, f11, f6
        mult    f0, f11, f0
        mult    f2, f11, f2
        addt    f7, f4, f4
        addt    f0, f12, f0
        mult    f4, f6, f4
        addt    f4, f0, f0
        mult    f2, f0, f0
        addt    f10, f0, f0
        subt    f0, f1, f7
        fbge    f3, adjust_sign
        br      zero, change_sign

//
//      6th octant; compute -cos(y')
//
Switch25:
        ldt     f12, THREE_PI_OVER_2_HI(t0)
        ornot   zero, t2, t3
        and     t3, 127, t3
        ldt     f11, THREE_PI_OVER_2_LO(t0)
        subl    t3, 27, t3
        subt    f2, f12, f12
        blt     t3, neg_cos_1

        s4subl  t3, t3, t3
        s8addl  t3, t0, t3
        ldt     f6, SINCOS(t3)
        ldt     f5, COS_A(t3)
        addt    f12, f6, f6
        subt    f6, f11, f4
        ldt     f6, SIN_A(t3)
        br      zero, neg_tab_eval

//
//      7th octant; compute -cos(y')
//
Switch26:
        ldt     f10, THREE_PI_OVER_2_HI(t0)
        and     t2, 127, t5
        ldt     f11, THREE_PI_OVER_2_LO(t0)
        subl    t5, 27, t5
        subt    f2, f10, f12
        blt     t5, neg_cos_1

        s4subl  t5, t5, t5
        s8addl  t5, t0, t5
        ldt     f0, SINCOS(t5)
        ldt     f6, SIN_A(t5)
        ldt     f5, COS_A(t5)
        subt    f12, f0, f0
        subt    f11, f0, f4
        br      zero, neg_tab_eval

neg_cos_1:
        subt    f12, f11, f11
        ldt     f1, C_POLY1(t0)
        ldt     f2, C_POLY4(t0)
        ldt     f7, C_POLY2(t0)
        ldt     f0, C_POLY0(t0)
        ldt     f4, C_POLY3(t0)
        ldt     f6, One
        mult    f11, f11, f11
        mult    f11, f11, f10
        mult    f11, f1, f1
        mult    f2, f11, f2
        mult    f7, f10, f7
        addt    f1, f0, f0
        mult    f10, f11, f10
        addt    f2, f4, f2
        addt    f0, f7, f0
        mult    f10, f2, f2
        addt    f0, f2, f0
        mult    f11, f0, f0
        subt    f0, f6, f7
        fbge    f3, adjust_sign
        br      zero, change_sign

//
//      8th octant; compute sin(y')
//
Switch27:
        ldt     f12, TWO_PI_HI(t0)
        ornot   zero, t2, t2
        and     t2, 127, t2
        ldt     f1, TWO_PI_LO(t0)
        subl    t2, 27, t2
        subt    f2, f12, f2
        blt     t2, pos_sin_1

        s4subl  t2, t2, t2
        s8addl  t2, t0, t2
        ldt     f10, SINCOS(t2)
        ldt     f5, SIN_A(t2)
        ldt     f6, COS_A(t2)
        addt    f2, f10, f10
        subt    f1, f10, f4
        br      zero, neg_tab_eval

pos_sin_1:
        subt    f2, f1, f11
        ldt     f12, S_POLY3(t0)
        ldt     f7, S_POLY1(t0)
        ldt     f10, S_POLY2(t0)
        ldt     f6, S_POLY0(t0)
        mult    f11, f11, f0
        mult    f12, f0, f12
        mult    f0, f0, f4
        mult    f7, f0, f7
        mult    f11, f0, f0
        addt    f12, f10, f10
        addt    f7, f6, f6
        mult    f10, f4, f4
        addt    f4, f6, f4
        mult    f0, f4, f0
        addt    f1, f0, f0
        subt    f2, f0, f7
        fbge    f3, adjust_sign
        br      zero, change_sign

small_cos:
        mult    f2, f2, f12
        ldt     f10, C_POLY1(t0)
        ldt     f6, C_POLY4(t0)
        ldt     f1, C_POLY2(t0)
        ldt     f0, C_POLY0(t0)
        ldt     f5, C_POLY3(t0)
        mult    f12, f12, f11
        mult    f12, f10, f10
        mult    f6, f12, f6
        mult    f1, f11, f1
        addt    f10, f0, f0
        mult    f11, f12, f11
        ldt     f10, One
        addt    f6, f5, f5
        addt    f0, f1, f0
        mult    f11, f5, f5
        addt    f0, f5, f0
        mult    f12, f0, f0
        subt    f10, f0, f7
        fbge    f3, adjust_sign
        br      zero, change_sign

//
//      a medium argument
//
medium_argument: 
        lda     t5, __trig_cons         // reduce the argument with extra precision
        ldt     f6, D_2_POW_K_OVER_PI_OVER_4(t5)
        mult    f2, f6, f6
        cvttqc  f6, f1
        stt     f1, Temp2(sp)
        ldl     s0, Temp2(sp)
        addl    s0, 0x80, t2
        bic     t2, 0xff, t2
        stq     t2, Temp3(sp)
        ldt     f5, Temp3(sp)
        ldt     f0, PI_OVER_4_OVER_2_POW_K_0(t5)
        ldt     f10, PI_OVER_4_OVER_2_POW_K_1(t5)
        cvtqt   f5, f5
        ldt     f6, PI_OVER_4_OVER_2_POW_K_2(t5)
        mult    f5, f0, f0
        mult    f5, f10, f10
        mult    f5, f6, f6
        subt    f2, f0, f0
        subt    f0, f10, f1
        subt    f1, f0, f0
        addt    f10, f0, f0
        addt    f6, f0, f8
        subt    f1, f8, f9
        cmpteq  f9, f1, f11
        fbne    f11, evaluate

        subt    f9, f1, f1
        ldt     f12, PI_OVER_4_OVER_2_POW_K_3(t5)
        mult    f5, f12, f12
        addt    f8, f1, f1
        addt    f12, f1, f8
        subt    f9, f8, f10
        cmpteq  f10, f9, f0
        fbne    f0, evaluate

        subt    f10, f9, f9
        ldt     f6, PI_OVER_4_OVER_2_POW_K_4(t5)
        mult    f5, f6, f5
        addt    f8, f9, f8
        addt    f5, f8, f8
        subt    f10, f8, f9
        cmpteq  f9, f10, f11
        fbne    f11, evaluate

        subt    f9, f10, f10
        addt    f8, f10, f8
        br      zero, evaluate

//
//      process an abnormal argument
//      it's either very small, very big, a NaN or an Inf
//
abnormal_argument:
        cmple   v0, t1, t1
        beq     t1, big_NaN_or_Inf

        cpys    f3, f2, f0      // very small argument; simply return it.
        br      zero, done

//
//      Process big arguments or NaNs or Infs
//
big_NaN_or_Inf:
        ldah    s1, 0x7ff0(zero)        // mask is 0x7ff00000
        and     v0, s1, v0
        xor     v0, s1, v0
        beq     v0, NaN_or_Inf  // NaN or an infinity

        cpys    f2, f2, f16     // reduce the very big argument
        mov     zero, a1        // very carefully
        lda     a2, Temp1(sp)
        lda     a3, Temp0(sp)
        bsr     ra, __trig_reduce
        mov     v0, s0
        ldt     f8, Temp0(sp)
        ldt     f9, Temp1(sp)

//
//      evaluate the function
//
evaluate:
        sra     s0, 7, t2
        and     t2, 7, t2
        cmpule  t2, 7, t12
        beq     t12, pos_sin_2

        lda     t12, Switch1    // dispatch on the octant
        s4addl  t2, t12, t12
        ldl     t12, 0(t12)
        jmp     zero, (t12)

//
//      1st octant; compute sin(y')
//
Switch10:
        and     s0, 127, t4
        subl    t4, 27, t4
        blt     t4, pos_sin_2

        s4subl  t4, t4, t4
        lda     t5, __trig_cons
        s8addl  t4, t5, t4
        ldt     f6, SINCOS(t4)
        ldt     f5, SIN_A(t4)
        subt    f9, f6, f6
        subt    f6, f8, f4
        ldt     f6, COS_A(t4)
        br      zero, pos_tab_eval

//
//      2nd octant; compute cos(y')
//
Switch11:
        ornot   zero, s0, t7
        and     t7, 127, t7
        subl    t7, 27, t7
        blt     t7, pos_cos_2

        s4subl  t7, t7, t7
        lda     a0, __trig_cons
        s8addl  t7, a0, t7
        ldt     f0, SINCOS(t7)
        ldt     f6, SIN_A(t7)
        ldt     f5, COS_A(t7)
        addt    f9, f0, f0
        subt    f0, f8, f4
        br      zero, pos_tab_eval

//
//      3rd octant; compute cos(y')
//
Switch12:
        and     s0, 127, a2
        subl    a2, 27, a2
        blt     a2, pos_cos_2

        s4subl  a2, a2, a2
        lda     a3, __trig_cons
        s8addl  a2, a3, a2
        ldt     f1, SINCOS(a2)
        ldt     f6, SIN_A(a2)
        ldt     f5, COS_A(a2)
        subt    f9, f1, f1
        subt    f8, f1, f4
        br      zero, pos_tab_eval

pos_cos_2: 
        mult    f9, f9, f10
        lda     a5, __trig_cons
        ldt     f11, C_POLY1(a5)
        ldt     f12, C_POLY4(a5)
        ldt     f14, C_POLY2(a5)
        mult    f10, f10, f13
        mult    f10, f11, f11
        ldt     f15, C_POLY0(a5)
        mult    f12, f10, f12
        ldt     f16, C_POLY3(a5)
        ldt     f17, One
        mult    f14, f13, f14
        addt    f11, f15, f11
        mult    f13, f10, f13
        addt    f12, f16, f12
        addt    f11, f14, f11
        mult    f13, f12, f12
        addt    f11, f12, f11
        mult    f10, f11, f10
        subt    f17, f10, f7
        fbge    f3, adjust_sign
        br      zero, change_sign

//
//      4th octant; compute -sin(y')
//
Switch13:
        ornot   zero, s0, t8
        and     t8, 127, t8
        subl    t8, 27, t8
        blt     t8, neg_sin_2

        s4subl  t8, t8, t8
        lda     t9, __trig_cons
        s8addl  t8, t9, t8
        ldt     f18, SINCOS(t8)
        ldt     f5, SIN_A(t8)
        ldt     f6, COS_A(t8)
        addt    f9, f18, f18
        subt    f8, f18, f4
        br      zero, pos_tab_eval

//
//      5th octant; compute -sin(y')
//
Switch14:
        and     s0, 127, t11
        subl    t11, 27, t11
        blt     t11, neg_sin_2

        s4subl  t11, t11, t11
        lda     ra, __trig_cons
        s8addl  t11, ra, t11
        ldt     f19, SINCOS(t11)
        ldt     f5, SIN_A(t11)
        ldt     f6, COS_A(t11)
        subt    f9, f19, f19
        subt    f19, f8, f4
        br      zero, neg_tab_eval

neg_sin_2: 
        mult    f9, f9, f20
        lda     v0, __trig_cons
        ldt     f21, S_POLY3(v0)
        ldt     f22, S_POLY1(v0)
        ldt     f23, S_POLY2(v0)
        mult    f21, f20, f21
        ldt     f25, S_POLY0(v0)
        mult    f20, f20, f24
        mult    f22, f20, f22
        mult    f9, f20, f20
        addt    f21, f23, f21
        addt    f22, f25, f22
        mult    f21, f24, f21
        addt    f21, f22, f21
        mult    f20, f21, f20
        addt    f8, f20, f8
        subt    f8, f9, f7
        fbge    f3, adjust_sign
        br      zero, change_sign

//
//      6th octant; compute -cos(y')
//
Switch15:
        ornot   zero, s0, t0
        and     t0, 127, t0
        subl    t0, 27, t0
        blt     t0, neg_cos_2

        s4subl  t0, t0, t0
        lda     t1, __trig_cons
        s8addl  t0, t1, t0
        ldt     f26, SINCOS(t0)
        ldt     f6, SIN_A(t0)
        ldt     f5, COS_A(t0)
        addt    f9, f26, f26
        subt    f26, f8, f4
        br      zero, neg_tab_eval

//
//      7th octant; compute -cos(y')
//
Switch16:
        and     s0, 127, t3
        subl    t3, 27, t3
        blt     t3, neg_cos_2

        s4subl  t3, t3, t3
        lda     t5, __trig_cons
        s8addl  t3, t5, t3
        ldt     f27, SINCOS(t3)
        ldt     f6, SIN_A(t3)
        ldt     f5, COS_A(t3)
        subt    f9, f27, f27
        subt    f8, f27, f4
        br      zero, neg_tab_eval

neg_cos_2: 
        mult    f9, f9, f28
        lda     t6, __trig_cons
        ldt     f29, C_POLY1(t6)
        ldt     f30, C_POLY4(t6)
        ldt     f1, C_POLY2(t6)
        mult    f28, f28, f0
        mult    f28, f29, f29
        ldt     f15, C_POLY0(t6)
        mult    f30, f28, f30
        ldt     f16, C_POLY3(t6)
        ldt     f14, One
        mult    f1, f0, f1
        addt    f29, f15, f15
        mult    f0, f28, f0
        addt    f30, f16, f16
        addt    f15, f1, f1
        mult    f0, f16, f0
        addt    f1, f0, f0
        mult    f28, f0, f0
        subt    f0, f14, f7
        fbge    f3, adjust_sign
        br      zero, change_sign

//
//      8th octant; compute sin(y')
//
Switch17:
        ornot   zero, s0, s0
        and     s0, 127, s0
        subl    s0, 27, s0
        blt     s0, pos_sin_2

        s4subl  s0, s0, s0
        lda     a0, __trig_cons
        s8addl  s0, a0, s0
        ldt     f13, SINCOS(s0)
        ldt     f5, SIN_A(s0)
        ldt     f6, COS_A(s0)
        addt    f9, f13, f13
        subt    f8, f13, f4

neg_tab_eval:
        cpysn   f3, f3, f3

pos_tab_eval: 
        mult    f4, f4, f12             // evalutate p_poly & q_poly
        lda     a1, __trig_cons
        ldt     f11, P_POLY1(a1)
        ldt     f17, P_POLY0(a1)
        ldt     f10, Q_POLY1(a1)
        mult    f11, f12, f11
        ldt     f19, Q_POLY0(a1)
        mult    f4, f12, f18
        mult    f10, f12, f10
        addt    f11, f17, f11
        addt    f10, f19, f10
        mult    f18, f11, f11
        mult    f12, f10, f10
        subt    f4, f11, f4
        mult    f5, f10, f10
        mult    f6, f4, f4
        subt    f10, f4, f4
        subt    f5, f4, f7
        fbge    f3, adjust_sign
        br      zero, change_sign

pos_sin_2: 
        mult    f9, f9, f23
        lda     a3, __trig_cons
        ldt     f25, S_POLY3(a3)
        ldt     f24, S_POLY1(a3)
        ldt     f22, S_POLY2(a3)
        mult    f25, f23, f25
        ldt     f20, S_POLY0(a3)
        mult    f23, f23, f21
        mult    f24, f23, f24
        mult    f9, f23, f23
        addt    f25, f22, f22
        addt    f24, f20, f20
        mult    f22, f21, f21
        addt    f21, f20, f20
        mult    f23, f20, f20
        addt    f8, f20, f8
        subt    f9, f8, f7
        fbge    f3, adjust_sign

        br      zero, change_sign

//
// Determine if we have a NaN or an Inf
//
NaN_or_Inf:
        stt     f2, Temp2(sp)
        ldl     a4, Temp2 + HighPart(sp)
        and     a4, s1, a5
        cmpeq   a5, s1, s1
        beq     s1, NaN_or_Inf1

        ldl     t9, Temp2(sp)
        ldah    t8, 0x10(zero)  // mask = 0x000fffff
        lda     t8, -1(t8)
        and     a4, t8, a4
        bis     a4, t9, a4
        cmpult  zero, a4, a4
        and     s1, a4, s1
        bne     s1, NaN_or_Inf2

//
// report an exception
//
NaN_or_Inf1:
        lda     t10, sinName
        stl     t10, ExRec + ErName(sp)
        ldah    t12, 0x800(zero)
        stt     f2, ExRec + ErArg0(sp)
        lda     t12, 0x55(t12)
        stl     t12, ExRec + ErErr(sp)
        lda     v0, ExRec(sp)
        bsr     ra, __dpml_exception
        ldt     f2, 0(v0)

//
// return the argument
//
NaN_or_Inf2:
        cpys    f2, f2, f0
        br      zero, done

change_sign:
        cpysn   f7, f7, f7
adjust_sign:
        cpys    f7, f7, f0

//
// Restore registers and return with result in f0.
//

done:   ldq     s0, SaveS0(sp)
        ldq     s1, SaveS1(sp)
        ldq     ra, SaveRa(sp)
        ldt     f2, SaveF2(sp)
        ldt     f3, SaveF3(sp)
        ldt     f4, SaveF4(sp)
        ldt     f5, SaveF5(sp)
        ldt     f6, SaveF6(sp)
        ldt     f7, SaveF7(sp)
        ldt     f8, SaveF8(sp)
        ldt     f9, SaveF9(sp)
        lda     sp, FrameLength(sp)     // deallocate stack frame
        ret     zero, (ra)              // return

        .end    sin

        .rdata
        .align  3

//
// Define floating point constants.
//

One:    .double 1.0

//
// dispatch on octant
//

Switch1:
        .long Switch10
        .long Switch11
        .long Switch12
        .long Switch13
        .long Switch14
        .long Switch15
        .long Switch16
        .long Switch17

//
// dispatch on octant
//

Switch2:
        .long Switch20
        .long Switch21
        .long Switch22
        .long Switch23
        .long Switch24
        .long Switch25
        .long Switch26
        .long Switch27

//
// Function name for dpml_exception.
//

sinName:
        .ascii  "sin\0"