.file "ldexpf.s" // Copyright (c) 2000, Intel Corporation // All rights reserved. // // Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation. // // WARRANTY DISCLAIMER // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Intel Corporation is the author of this code, and requests that all // problem reports or change requests be submitted to it directly at // http://developer.intel.com/opensource. // // History //============================================================== // 2/02/00 Initial version // 4/04/00 Unwind support added // 5/22/00 rewritten to not take swa and be a little faster // 8/15/00 Bundle added after call to __libm_error_support to properly // set [the previously overwritten] GR_Parameter_RESULT. //12/07/00 Removed code that prevented call to __libm_error_support. // Stored r33 instead of f9 as Parameter 2 for call to // __libm_error_support. // // API //============================================================== // float = ldexpf (float x, int n) // input floating point f8 and integer r33 // output floating point f8 // // returns x* 2**n computed by exponent // manipulation rather than by actually performing an // exponentiation or a multiplication. // // Overview of operation //============================================================== // ldexpf: // p7 is set if x is nan, inf, zero; go to x_nan_inf_zero // sign extend r33 // norm_f8 = fnorm(f8) // get exponent of norm_f8 // add to r33 to get new exponent // p6, new_exponent > 103fe => overflow // p7, new_exponent > fbcd => underflow // setf new_exponent, merge significand, normalize, return ldexp_float_int_f9 = f10 ldexp_int_f9 = f11 ldexp_max_exp = f12 ldexp_neg_max_exp = f13 ldexp_new_f9 = f14 LDEXP_BIG = f32 LDEXP_NORM_F8 = f33 LDEXP_FFFF = f34 LDEXP_BIG_SIGN = f35 LDEXP_13FFE = f36 LDEXP_INV_BIG_SIGN = f37 // general registers used // r32 has ar.pfs // r33 has input integer ldexp_GR_signexp = r34 ldexp_GR_13FFE = r35 ldexp_GR_new_exponent = r36 ldexp_GE_FBCD = r37 ldexp_GR_17ones = r38 GR_SAVE_B0 = r39 GR_SAVE_GP = r40 GR_SAVE_PFS = r41 ldexp_GR_exponent = r42 ldexp_GR_103FE = r43 ldexp_GR_FFFF = r44 GR_Parameter_X = r45 GR_Parameter_Y = r46 GR_Parameter_RESULT = r47 ldexp_GR_tag = r48 .global ldexpf // =============================================================== // LDEXPF // =============================================================== .text .proc ldexpf .align 32 // Be sure to sign extend r33 because the // integer comes in as 32-bits ldexpf: // x NAN, INF, ZERO, +- { .mfi alloc r32=ar.pfs,1,12,4,0 fclass.m.unc p7,p0 = f8, 0xe7 //@qnan | @snan | @inf | @zero sxt4 r33 = r33 } ;; { .mfi nop.m 999 fnorm LDEXP_NORM_F8 = f8 nop.i 999 } { .mbb nop.m 999 (p7) br.cond.spnt LDEXP_X_NAN_INF_ZERO nop.b 999 } ;; // LDEXP_BIG gets a big number, enough to overflow an frcpa // but not take an architecturally mandated swa. // We construct this constant rather than load it. { .mlx mov ldexp_GR_17ones = 0x1FFFF movl ldexp_GR_FFFF = 0xffffffffffffffff } { .mfi addl ldexp_GR_13FFE = 0x13ffe, r0 nop.f 999 nop.i 999 } ;; { .mmb setf.exp LDEXP_13FFE = ldexp_GR_13FFE setf.sig LDEXP_FFFF = ldexp_GR_FFFF nop.b 999 } ;; { .mfi nop.m 999 fmerge.se LDEXP_BIG = LDEXP_13FFE, LDEXP_FFFF nop.i 999 } // Put the absolute normalized exponent in ldexp_GR_new_exponent // Assuming that the input int is in r33. // ldexp_GR_new_exponent gets the input int + the exponent of the input double { .mfi getf.exp ldexp_GR_signexp = LDEXP_NORM_F8 nop.f 999 nop.i 999 } ;; { .mii nop.m 999 nop.i 999 and ldexp_GR_exponent = ldexp_GR_signexp, ldexp_GR_17ones } ;; // HUGE // Put big number in ldexp_GR_103FE // If ldexp_GR_new_exponent is bigger than ldexp_GR_103FE // Return a big number of the same sign // double: largest double exponent is 7fe (double-biased) // 103fe (register-biased) // f11 gets the big value in f9 with the f8 sign // For single, // single: largest single exponent is fe (single-biased) // fe - 7f + ffff = 1007e { .mii add ldexp_GR_new_exponent = ldexp_GR_exponent, r33 addl ldexp_GR_103FE = 0x1007e, r0 nop.i 999 } ;; { .mfi setf.exp f12 = ldexp_GR_new_exponent nop.f 999 cmp.gt.unc p6,p0 = ldexp_GR_new_exponent, ldexp_GR_103FE } ;; { .mfb nop.m 999 (p6) fmerge.s LDEXP_BIG_SIGN = f8, LDEXP_BIG nop.b 999 } ;; { .mfi nop.m 999 (p6) fma.s f12 = LDEXP_BIG_SIGN, LDEXP_BIG, f0 (p6) mov ldexp_GR_tag = 148 } { .mib nop.m 999 nop.i 999 (p6) br.spnt LDEXP_HUGE } ;; // TINY // Put a small number in ldexp_GE_FBCD // If ldexp_GR_new_exponent is less than ldexp_GE_FBCD // Return a small number of the same sign // double: // 0xfbcd is -1074 unbiased, which is the exponent // of the smallest double denormal // single // 0xff6a is -149 unbiased which is the exponent // of the smallest single denormal // // Take the large value in f9 and put in f10 with // the sign of f8. Then take reciprocal in f11 { .mfi addl ldexp_GE_FBCD = 0xff6a, r0 nop.f 999 nop.i 999 } ;; { .mfi nop.m 999 nop.f 999 cmp.lt.unc p7,p0 = ldexp_GR_new_exponent, ldexp_GE_FBCD } ;; { .mfi nop.m 999 (p7) fmerge.s LDEXP_BIG_SIGN = f8, LDEXP_BIG nop.i 999 } ;; { .mfi nop.m 999 (p7) frcpa.s1 LDEXP_INV_BIG_SIGN,p10 = f1,LDEXP_BIG_SIGN nop.i 999 } ;; { .mfi nop.m 999 (p7) fnorm.s f12 = LDEXP_INV_BIG_SIGN (p7) mov ldexp_GR_tag = 149 } { .mib nop.m 999 nop.i 999 (p7) br.spnt LDEXP_TINY } ;; // CALCULATION // Put exponent of answer in f12 // f10 has the normalized f8 // f13 = exp(f12) sig(f10) // f14 = sign(f8) expsig(f13) { .mfi nop.m 999 fmerge.se f13 = f12,LDEXP_NORM_F8 nop.i 999 } ;; { .mfi nop.m 999 fmerge.s f14 = f8,f13 nop.i 999 } ;; { .mfb nop.m 999 fnorm.s f8 = f14 br.ret.sptk b0 } ;; LDEXP_N_NAN_INF: // Is n a NAN? { .mfi nop.m 999 (p0) fclass.m.unc p6,p0 = f9, 0xc3 //@snan | @qnan nop.i 999 } ;; { .mfi nop.m 999 (p6) fma.s f8 = f8,f9,f0 nop.i 999 } // Is n +INF? { .mfi nop.m 999 (p0) fclass.m.unc p7,p0 = f9, 0x21 //@inf | @pos nop.i 999 } ;; { .mfi nop.m 999 (p7) fma.s f8 = f8,f9,f0 nop.i 999 } // Is n -inf? { .mfi nop.m 999 fclass.m.unc p8,p9 = f9, 0x22 //@inf | @neg nop.i 999 } ;; { .mfb nop.m 999 (p8) frcpa f8,p6 = f8,f9 br.ret.sptk b0 } ;; LDEXP_X_NAN_INF_ZERO: { .mfb nop.m 999 fnorm.s f8 = f8 // quietize br.ret.sptk b0 } ;; .endp ldexpf .proc __libm_error_region __libm_error_region: LDEXP_HUGE: LDEXP_TINY: .prologue { .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0 .save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs } { .mfi .fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp };; { .mmi st8 [GR_Parameter_Y] = r33,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address .save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0 };; .body { .mib stfs [GR_Parameter_X] = f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } { .mib stfs [GR_Parameter_Y] = f12 // STORE Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function };; { .mmi nop.m 0 nop.m 0 add GR_Parameter_RESULT = 48,sp };; { .mmi ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack .restore add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address };; { .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return };; .endp __libm_error_region .type __libm_error_support#,@function .global __libm_error_support#