windows-server-2003/base/crts/fpw32/tran/ia64/tanh.s


								.file "tanh.s"


								// Copyright (c) 2000, 2001, Intel Corporation

								// All rights reserved.

								//

								// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,

								// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.

								//

								// WARRANTY DISCLAIMER

								//

								// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

								// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

								// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

								// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS

								// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

								// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

								// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

								// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY

								// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING

								// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

								// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

								//

								// Intel Corporation is the author of this code, and requests that all

								// problem reports or change requests be submitted to it directly at

								// http://developer.intel.com/opensource.

								//

								// History

								//==============================================================

								// 05/30/01  Initial version


								//

								// API

								//==============================================================

								// double tanh(double)

								//

								// Overview of operation

								//==============================================================


								//


								// There are 8 paths:

								// 1. x = +/-0.0

								//    Return tanh(x) = +/-0.0

								//

								// 2. MAX_DENORMAL_ABS < |x| < 1/16

								//    Return tanh(x) = P13(x), where

								//    P13(x) = (((C13*x^2 + C11)*x^4 + (C9*x^2 + C7))*x^4 +

								//            (C5*x^2 + C3))*x^3 + x

								//

								// 3. 1/16 <= |x| < 32

								//    Return tanh(x) = sign(x)*(1 - 2 / (1 + exp(2*|x|))

								//    Algorithm description for exp function see below

								//

								// 4. 32 <= |x| < +INF

								//    Return tanh(x) = sign(x)*(1.0 - 2^(63))

								//

								// 5. x = +/-INF

								//    Return tanh(x) = sign(x)

								//

								// 6. x = [S,Q]NaN

								//   Return tanh(x) = QNaN

								//


								// 7. x is positive denormal

								//    Return tanhf(x) = x - x^2

								//


								// 8. x is negative denormal

								//    Return tanhf(x) = x + x^2

								//


								//==============================================================


								// Algorithm Description for exp(x) function


								//


								// Take the input x. w is "how many log2/128 in x?"

								//  w = x * 128/log2

								//  n = int(w)

								//  x = n log2/128 + r + delta


								//  n = 128M + index_1 + 2^4 index_2

								//  x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta


								//  exp(x) = 2^M  2^(index_1/128)  2^(index_2/8) exp(r) exp(delta)

								//       Construct 2^M

								//       Get 2^(index_1/128) from table_1;

								//       Get 2^(index_2/8)   from table_2;

								//       Calculate exp(r) by series

								//          r = x - n (log2/128)_high

								//          delta = - n (log2/128)_low

								//       Calculate exp(delta) as 1 + delta


								// Registers used

								//==============================================================

								// Floating Point registers used:

								// f8, input

								// f32 -> f75


								// General registers used:

								// r32 -> r57


								// Predicate registers used:

								// p6 -> p15


								// Assembly macros

								//==============================================================

								exp_GR_rshf                   = r33

								EXP_AD_TB1                    = r34

								EXP_AD_TB2                    = r35

								EXP_AD_P                      = r36

								exp_GR_N                      = r37

								exp_GR_index_1                = r38

								exp_GR_index_2_16             = r39

								exp_GR_biased_M               = r40

								exp_GR_index_1_16             = r41

								EXP_AD_T1                     = r42

								EXP_AD_T2                     = r43

								exp_GR_sig_inv_ln2            = r44

								exp_GR_17ones                 = r45

								exp_GR_rshf_2to56             = r46

								exp_GR_exp_2tom56             = r47

								exp_Expb                      = r48

								exp_ExpbOf2to4                = r49

								exp_NearZeroBound             = r50

								TANH_NZ_CF                    = r51

								ALMOST_ONE                    = r52

								DATA_PTR                      = r53

								reg_RcMask                    = r54

								reg_ArFsr                     = r55

								reg_RcDown                    = r56

								reg_RcUp                      = r57


								//==============================================================

								EXP_RSHF_2TO56                = f33

								EXP_INV_LN2_2TO63             = f34

								EXP_W_2TO56_RSH               = f35

								EXP_2TOM56                    = f36

								exp_P4                        = f37

								exp_P3                        = f38

								exp_P2                        = f39

								exp_P1                        = f40

								exp_ln2_by_128_hi             = f41

								exp_ln2_by_128_lo             = f42

								EXP_RSHF                      = f43

								EXP_Nfloat                    = f44

								exp_r                         = f45

								exp_f                         = f46

								exp_rsq                       = f47

								exp_rcube                     = f48

								EXP_2M                        = f49

								exp_S1                        = f50

								exp_T1                        = f51

								exp_rP4pP3                    = f52

								exp_P_lo                      = f53

								exp_P_hi                      = f54

								exp_P                         = f55

								exp_S                         = f56

								exp_ExppOne                   = f57

								EXP_NORM_f8                   = f58

								exp_S2                        = f59

								exp_T2                        = f60

								tanh_rcp0                     = f61

								tanh_rcp1                     = f62

								tanh_rcp2                     = f63

								tanh_rcp3                     = f64

								tanh_Two                      = f65

								tanh_C13                      = f66

								tanh_C11                      = f67

								tanh_C9                       = f68

								tanh_C7                       = f69

								tanh_C5                       = f70

								tanh_C3                       = f71

								tanh_X4                       = f72

								tanh_X3                       = f73

								tanh_X2                       = f74

								tanh_AlmostOne                = f75


								// Data tables

								//==============================================================


								.data


								.align 16


								// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************


								// double-extended 1/ln(2)

								// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88

								// 3fff b8aa 3b29 5c17 f0bc

								// For speed the significand will be loaded directly with a movl and setf.sig

								//   and the exponent will be bias+63 instead of bias+0.  Thus subsequent

								//   computations need to scale appropriately.

								// The constant 128/ln(2) is needed for the computation of w.  This is also

								//   obtained by scaling the computations.

								//

								// Two shifting constants are loaded directly with movl and setf.d.

								//   1. EXP_RSHF_2TO56 = 1.1000..00 * 2^(63-7)

								//        This constant is added to x*1/ln2 to shift the integer part of

								//        x*128/ln2 into the rightmost bits of the significand.

								//        The result of this fma is EXP_W_2TO56_RSH.

								//   2. EXP_RSHF       = 1.1000..00 * 2^(63)

								//        This constant is subtracted from EXP_W_2TO56_RSH * 2^(-56) to give

								//        the integer part of w, n, as a floating-point number.

								//        The result of this fms is EXP_Nfloat.

								tanh_data:

								data8 0xeb69e870abeefdb0,  0x00003ff6 // C13

								data8 0x91371aaf3611e47b,  0x0000bff8 // C11

								data8 0xb327a4416087cf99,  0x00003ff9 // C9

								data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi

								data8 0xffffffffffffffff,  0x00003ffe // almost one

								data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo

								data8 0xdd0dd0dd0dd0dd0e,  0x0000bffa // C7

								data8 0x8888888888888889,  0x00003ffc // C5

								data8 0xaaaaaaaaaaaaaaab,  0x0000bffd // C3

								data8 0x8000000000000001,  0x00004000 // almost two


								// Table 1 is 2^(index_1/128) where

								// index_1 goes from 0 to 15

								data8 0x8000000000000000 , 0x00003FFF

								data8 0x80B1ED4FD999AB6C , 0x00003FFF

								data8 0x8164D1F3BC030773 , 0x00003FFF

								data8 0x8218AF4373FC25EC , 0x00003FFF

								data8 0x82CD8698AC2BA1D7 , 0x00003FFF

								data8 0x8383594EEFB6EE37 , 0x00003FFF

								data8 0x843A28C3ACDE4046 , 0x00003FFF

								data8 0x84F1F656379C1A29 , 0x00003FFF

								data8 0x85AAC367CC487B15 , 0x00003FFF

								data8 0x8664915B923FBA04 , 0x00003FFF

								data8 0x871F61969E8D1010 , 0x00003FFF

								data8 0x87DB357FF698D792 , 0x00003FFF

								data8 0x88980E8092DA8527 , 0x00003FFF

								data8 0x8955EE03618E5FDD , 0x00003FFF

								data8 0x8A14D575496EFD9A , 0x00003FFF

								data8 0x8AD4C6452C728924 , 0x00003FFF


								// Table 2 is 2^(index_1/8) where

								// index_2 goes from 0 to 7

								data8 0x8000000000000000 , 0x00003FFF

								data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF

								data8 0x9837F0518DB8A96F , 0x00003FFF

								data8 0xA5FED6A9B15138EA , 0x00003FFF

								data8 0xB504F333F9DE6484 , 0x00003FFF

								data8 0xC5672A115506DADD , 0x00003FFF

								data8 0xD744FCCAD69D6AF4 , 0x00003FFF

								data8 0xEAC0C6E7DD24392F , 0x00003FFF


								data8 0x3f8111116da21757 //P_4

								data8 0x3fa55555d787761c //P_3

								data8 0x3fc5555555555414 //P_2

								data8 0x3fdffffffffffd6a //P_1


								.align 32

								.global tanh#


								.section .text

								.proc  tanh#

								.align 32

								tanh:


								{ .mlx

								      alloc       r32=ar.pfs,1,25,0,0

								      // significand of 1/ln2

								      movl        exp_GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc

								}

								{ .mlx

								      addl        DATA_PTR = @ltoff(tanh_data), gp

								      movl        exp_GR_rshf_2to56 = 0x4768000000000000 // 1.1 * 2^(63+56)

								};;


								// We do this fnorm right at the beginning to take any enabled

								// faults and to normalize any input unnormals so that SWA is not taken.

								{ .mfi

								      ld8         EXP_AD_TB1 = [DATA_PTR]

								      fclass.m    p6,p0 = f8, 0xC7 // is arg NaN or +/-0 ?

								      mov         exp_GR_17ones = 0x1FFFF

								}

								{ .mfi

								      ld8         ALMOST_ONE = [DATA_PTR]

								      fma.s1      EXP_NORM_f8 = f8, f1, f8 // 2*x

								      mov         exp_GR_exp_2tom56 = 0xFFFF-56

								};;


								// Form two constants we need

								//  1/ln2 * 2^63  to compute  w = x * 1/ln2 * 128

								//  1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand

								{ .mmf

								      // form 1/ln2 * 2^63

								      setf.sig    EXP_INV_LN2_2TO63 = exp_GR_sig_inv_ln2

								      // form const 1.1 * 2^(63+56)

								      setf.d      EXP_RSHF_2TO56 = exp_GR_rshf_2to56

								      fclass.m    p7,p0 = f8, 0x0A // is arg -denormal ?

								};;

								{ .mlx

								      // form 2^-56 for scaling Nfloat

								      setf.exp    EXP_2TOM56 = exp_GR_exp_2tom56

								      // 1.10000 2^63 for right shift

								      movl        exp_GR_rshf = 0x43e8000000000000

								}

								{ .mfb

								      nop.m       0

								(p6)  fma.d.s0    f8 =  f8, f1, f8 // NaN or +/-0

								(p6)  br.ret.spnt b0

								};;

								{ .mfi

								      getf.exp    exp_Expb = f8

								      fclass.m    p8,p0 = f8, 0x09 // is arg +denormal ?

								      adds        ALMOST_ONE = 0x40, ALMOST_ONE

								}

								{ .mfb

								      ldfe        tanh_C13 = [EXP_AD_TB1], 16

								(p7)  fma.d.s0    f8 =  f8, f8, f8 // -denormal

								(p7)  br.ret.spnt b0

								};;

								{ .mfi

								      // Form right shift const 1.100 * 2^63

								      setf.d      EXP_RSHF = exp_GR_rshf

								      fma.s1      tanh_X2 = f8, f8, f0

								      mov         exp_ExpbOf2to4 = 0x10003 // biased exp of 16

								}

								{ .mfi

								      ldfe        tanh_C11 = [EXP_AD_TB1], 16

								      nop.f       0

								      mov         exp_NearZeroBound = 0xFFFB

								};;

								{ .mfi

								      ldfe        tanh_C9 = [EXP_AD_TB1], 16

								      fcmp.lt     p10, p11 = f8, f0 // is x < 0 ?

								      and         exp_Expb = exp_Expb, exp_GR_17ones

								};;

								{ .mfi

								      ldfe        exp_ln2_by_128_hi  = [EXP_AD_TB1], 32

								      fma.s1      tanh_Two = f1, f1, f1

								      cmp.gtu     p13, p0 = exp_Expb, exp_ExpbOf2to4

								}

								{ .mfi

								      ldfe        tanh_AlmostOne = [ALMOST_ONE], 80

								      nop.f       0

								      cmp.eq      p9, p0 = exp_Expb, exp_GR_17ones

								};;

								{ .mfi

								      ldfe        exp_ln2_by_128_lo  = [EXP_AD_TB1], 16

								(p8)  fnma.d.s0   f8 =  f8, f8, f8 // +denormal

								      mov         reg_RcDown = 0x400

								}

								{ .mfb

								      cmp.ltu     p12, p0 = exp_Expb, exp_NearZeroBound

								      nop.f       0

								(p8)  br.ret.spnt b0

								};;

								{ .mfi

								      mov         reg_ArFsr = ar.fpsr

								(p9)  fmerge.s    f8 = f8,f1 // +/- inf

								      adds        TANH_NZ_CF = -32, ALMOST_ONE

								}

								{ .mfb

								      ldfe        tanh_C7  = [EXP_AD_TB1], 16

									  nop.f       0

								(p9)  br.ret.spnt b0

								};;

								{ .mfi

								      nop.m       0

								      fma.s1      tanh_X4 = tanh_X2, tanh_X2, f0

								      nop.i       0

								}

								{ .mfi

								      nop.m       0

								      fma.s1      tanh_X3 = tanh_X2, f8, f0

								      nop.i       0

								}

								;;


								// After that last load, EXP_AD_TB1 points to the beginning of table 1

								// W = X * Inv_log2_by_128

								// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.

								// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.

								.pred.rel "mutex",p11,p10

								{ .mfi

								      adds        EXP_AD_TB1 = 0x30, EXP_AD_TB1

								(p11) fma.s1      EXP_W_2TO56_RSH  = EXP_NORM_f8, EXP_INV_LN2_2TO63, EXP_RSHF_2TO56

								      mov         reg_RcMask = 0xC00

								}

								{ .mfi

								      ldfe        tanh_C5 = [TANH_NZ_CF], 16

								(p10) fnma.s1     EXP_W_2TO56_RSH  = EXP_NORM_f8, EXP_INV_LN2_2TO63, EXP_RSHF_2TO56

								      nop.i       0

								};;

								{ .mfi

								      ldfe        tanh_C3 = [TANH_NZ_CF], 16

								(p10) fnma.s1     EXP_NORM_f8 = EXP_NORM_f8, f1, f0

								      adds        EXP_AD_TB2 = 0x100, EXP_AD_TB1

								}

								{ .mfb

								      adds        EXP_AD_P = 0x180, EXP_AD_TB1

								      nop.f       0

								(p12) br.cond.spnt tanh_near_zero

								};;

								{ .mfi

								      ldfpd       exp_P4, exp_P3  = [EXP_AD_P] ,16

								      nop.f       0

								      mov         reg_RcUp = 0x800

								};;


								// Nfloat = round_int(W)

								// The signficand of EXP_W_2TO56_RSH contains the rounded integer part of W,

								// as a twos complement number in the lower bits (that is, it may be negative).

								// That twos complement number (called N) is put into exp_GR_N.


								// Since EXP_W_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56

								// before the shift constant 1.10000 * 2^63 is subtracted to yield EXP_Nfloat.

								// Thus, EXP_Nfloat contains the floating point version of N

								{ .mfi

								      ldfpd       exp_P2, exp_P1 = [EXP_AD_P]

								      fms.s1      EXP_Nfloat = EXP_W_2TO56_RSH, EXP_2TOM56, EXP_RSHF

								      nop.i       0

								};;


								.pred.rel "mutex",p11,p10

								tanh_gt32:

								{ .mfi

								      // for x > 32 result is +1.0

									  nop.m       0

								(p11) fma.d.s0    f8 = tanh_AlmostOne, tanh_AlmostOne, f0

									  nop.i       0

								}

								{ .mfb

									  nop.m       0

								      // for x < -32 result is -1.0

								(p10) fnma.d.s0   f8 = tanh_AlmostOne, tanh_AlmostOne, f0

								(p13) br.ret.spnt b0

								};;


								{ .mfi

								      getf.sig    exp_GR_N        = EXP_W_2TO56_RSH

								      nop.f       0

								      nop.i       0

								};;


								// exp_GR_index_1 has index_1

								// exp_GR_index_2_16 has index_2 * 16

								// exp_GR_biased_M has M

								// exp_GR_index_1_16 has index_1 * 16

								// r2 has true M

								{ .mfi

								      and         exp_GR_index_1 = 0x0f, exp_GR_N

								      fnma.s1     exp_r   = EXP_Nfloat, exp_ln2_by_128_hi, EXP_NORM_f8

								      shr         r2 = exp_GR_N,  0x7

								}

								{ .mfi

								      and         exp_GR_index_2_16 = 0x70, exp_GR_N

								      fnma.s1     exp_f = EXP_Nfloat, exp_ln2_by_128_lo, f1

								      nop.i       0

								};;


								// EXP_AD_T1 has address of T1

								// EXP_AD_T2 has address if T2

								{ .mmi

								      addl        exp_GR_biased_M = 0xffff, r2

								      add         EXP_AD_T2 = EXP_AD_TB2, exp_GR_index_2_16

								      shladd      EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1

								};;


								// Create Scale = 2^M

								// r = x - Nfloat * ln2_by_128_hi

								// f = 1 - Nfloat * ln2_by_128_lo

								{ .mmi

								      setf.exp    EXP_2M = exp_GR_biased_M

								      ldfe        exp_T2  = [EXP_AD_T2]

								      nop.i       0

								};;


								// Load T1 and T2

								{ .mfi

								      ldfe        exp_T1  = [EXP_AD_T1]

								      nop.f       0

								      and         reg_ArFsr = reg_ArFsr, reg_RcMask

								}

								;;

								{ .mfi

								      nop.m       0

								      fma.s1      exp_rsq = exp_r, exp_r, f0

								      cmp.eq      p14, p0 = reg_ArFsr, reg_RcUp

								}

								{ .mfi

								      nop.m       0

								      fma.s1      exp_rP4pP3 = exp_r, exp_P4, exp_P3

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      fma.s1      exp_rcube = exp_r, exp_rsq, f0

								      cmp.eq      p15, p0 = reg_ArFsr, reg_RcDown

								}

								{ .mfi

								      nop.m       0

								      fma.s1      exp_P_lo  = exp_r, exp_rP4pP3, exp_P2

								      nop.i       0

								};;

								{ .mfi

								(p14) ldfe        tanh_Two = [ALMOST_ONE], 16

								      fma.s1      exp_P_hi  = exp_rsq, exp_P1, exp_r

								      nop.i       0

								}

								{ .mfi

								      nop.m       0

								      fma.s1      exp_S2 = exp_f,exp_T2,f0

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      fma.s1      exp_S1 = EXP_2M,exp_T1,f0

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      fma.s1      exp_P = exp_rcube, exp_P_lo, exp_P_hi

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      fma.s1      exp_S = exp_S1,exp_S2,f0

								      nop.i       0

								}

								{ .mfi

								      nop.m       0

								      fma.s1      exp_ExppOne  = exp_S1,exp_S2,f1

								      nop.i       0

								}

								;;

								{ .mfi

								(p15) ldfe        tanh_Two = [ALMOST_ONE], 16

								      fma.s1      exp_ExppOne = exp_S, exp_P, exp_ExppOne

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      frcpa.s1    tanh_rcp0, p6 = f1, exp_ExppOne

								      nop.i       0

								}

								;;

								// NR method: ineration #1

								{ .mfi

								      nop.m       0

								      fnma.s1     tanh_rcp1 = tanh_rcp0, exp_ExppOne, f1 // t = 1 - r0*x

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      // r1 = r0 + r0*t = r0 + r0*(1 - r0*x)

								      fma.s1      tanh_rcp1 = tanh_rcp0, tanh_rcp1, tanh_rcp0

								      nop.i       0

								};;

								// NR method: ineration #2

								{ .mfi

								      nop.m       0

								      fnma.s1     tanh_rcp2 = tanh_rcp1, exp_ExppOne, f1 // t = 1 - r1*x

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      // r2 = r1 + r1*t = r1 + r1*(1 - r1*x)

								      fma.s1      tanh_rcp2 = tanh_rcp1, tanh_rcp2, tanh_rcp1

								      nop.i       0

								};;

								// NR method: ineration #3

								{ .mfi

								      nop.m       0

								      fnma.s1     tanh_rcp3 = tanh_rcp2, exp_ExppOne, f1 // t = 1 - r2*x

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      // y = r2 + r2*t = r2 + r2*(1 - r2*x)

								      fma.s1      exp_ExppOne = tanh_rcp2, tanh_rcp3, tanh_rcp2

								      nop.i       0

								};;


								.pred.rel "mutex",p11,p10

								{ .mfi

								      nop.m       0

								      // tanh(x) = 1 - 2 / (1 + e^(2*x))

								(p11) fnma.d.s0   f8 = exp_ExppOne, tanh_Two, f1

								      nop.i       0

								}

								{ .mfb

								      nop.m       0

								      // tanh(x) = 2 / (1 + e^(2*x)) - 1

								(p10) fms.d.s0    f8 = exp_ExppOne, tanh_Two, f1

								      br.ret.sptk b0 // Normal path exit

								};;


								// Here if |x| < 1/16

								tanh_near_zero:

								{ .mfi

								      nop.m       0

								      fma.s1      tanh_C13 = tanh_C13, tanh_X2, tanh_C11

								      nop.i       0

								}

								{ .mfi

								      nop.m       0

								      fma.s1      tanh_C9 = tanh_C9, tanh_X2, tanh_C7

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      fma.s1      tanh_C5 = tanh_C5, tanh_X2, tanh_C3

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      fma.s1      tanh_C13 = tanh_C13, tanh_X4, tanh_C9

								      nop.i       0

								};;

								{ .mfi

								      nop.m       0

								      fma.s1      tanh_C13 = tanh_C13, tanh_X4, tanh_C5

								      nop.i       0

								};;

								{ .mfb

								      nop.m       0

								      fma.d.s0    f8 = tanh_C13, tanh_X3, f8

								      br.ret.sptk b0

								};;


								.endp tanh