Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

365 lines
11 KiB

  1. /***
  2. *pow.c - raise to a power
  3. *
  4. * Copyright (c) 1991-2001, Microsoft Corporation. All rights reserved.
  5. *
  6. *Purpose:
  7. *
  8. *Revision History:
  9. * 8-15-91 GDP written
  10. * 12-20-91 GDP support IEEE exceptions & denormals
  11. * 1-11-92 GDP special handling of small powers
  12. * special handling of u1, u2 when cancellation occurs
  13. * 3-22-92 GDP changed handling of int exponents, pow(0, neg)
  14. * added check to avoid internal overflow due to large y
  15. * 6-23-92 GDP adjusted special return values according to NCEG spec
  16. * 02-06-95 JWM Mac merge
  17. * 02-07-95 JWM powhlp() usage restored to Intel version.
  18. * 10-07-97 RDL Added IA64.
  19. *
  20. *******************************************************************************/
  21. #include <math.h>
  22. #include <trans.h>
  23. #include <float.h>
  24. #if defined(_M_IA64)
  25. #pragma function(pow)
  26. #endif
  27. static double _reduce(double);
  28. static double const a1[18] = {
  29. 0.00000000000000000000e+000, /* dummy element */
  30. 1.00000000000000000000e+000,
  31. 9.57603280698573646910e-001,
  32. 9.17004043204671231754e-001,
  33. 8.78126080186649741555e-001,
  34. 8.40896415253714543073e-001,
  35. 8.05245165974627154042e-001,
  36. 7.71105412703970411793e-001,
  37. 7.38413072969749655712e-001,
  38. 7.07106781186547524436e-001,
  39. 6.77127773468446364133e-001,
  40. 6.48419777325504832961e-001,
  41. 6.20928906036742024317e-001,
  42. 5.94603557501360533344e-001,
  43. 5.69394317378345826849e-001,
  44. 5.45253866332628829604e-001,
  45. 5.22136891213706920173e-001,
  46. 5.00000000000000000000e-001
  47. };
  48. static double const a2[9] = {
  49. 0.00000000000000000000e+000, /* dummy element */
  50. -5.31259064517897172664e-017,
  51. 1.47993596544271355242e-017,
  52. 1.23056946577104753260e-017,
  53. -1.74014448683923461658e-017,
  54. 3.84891771232354074073e-017,
  55. 2.33103467084383453312e-017,
  56. 4.45607092891542322377e-017,
  57. 4.27717757045531499216e-017
  58. };
  59. static double const log2inv = 1.44269504088896340739e+0; // 1/log(2)
  60. static double const K = 0.44269504088896340736e+0;
  61. static double const p1 = 0.83333333333333211405e-1;
  62. static double const p2 = 0.12500000000503799174e-1;
  63. static double const p3 = 0.22321421285924258967e-2;
  64. static double const p4 = 0.43445775672163119635e-3;
  65. #define P(v) (((p4 * v + p3) * v + p2) * v + p1)
  66. static double const q1 = 0.69314718055994529629e+0;
  67. static double const q2 = 0.24022650695909537056e+0;
  68. static double const q3 = 0.55504108664085595326e-1;
  69. static double const q4 = 0.96181290595172416964e-2;
  70. static double const q5 = 0.13333541313585784703e-2;
  71. static double const q6 = 0.15400290440989764601e-3;
  72. static double const q7 = 0.14928852680595608186e-4;
  73. #define Q(w) ((((((q7 * w + q6) * w + q5) * w + q4) * w + \
  74. q3) * w + q2) * w + q1)
  75. /*
  76. * Thresholds for over/underflow that results in an adjusted value
  77. * too big/small to be represented as a double. An infinity or 0
  78. * is delivered to the trap handler instead
  79. */
  80. static _dbl const _ovfx ={SET_DBL(0x40e40000,0)}; // 16*log2(XMAX*2^IEEE_ADJ)
  81. static _dbl const _uflx ={SET_DBL(0xc0e3fc00,0)}; // 16*log2(XMIN*2^(-IEEE_ADJ))
  82. #define OVFX _ovfx.dbl
  83. #define UFLX _uflx.dbl
  84. #define INT_POW_LIMIT 128.0
  85. static double ymax = 1e20;
  86. static double _reduce(double x)
  87. {
  88. return 0.0625 * _frnd( 16.0 * x);
  89. }
  90. /***
  91. *double pow(double x, double y) - x raised to the power of y
  92. *
  93. *Purpose:
  94. * Calculate x^y
  95. * Algorithm from Cody & Waite
  96. *
  97. *Entry:
  98. *
  99. *Exit:
  100. *
  101. *Exceptions:
  102. *
  103. * All 5 IEEE exceptions may occur
  104. *
  105. *******************************************************************************/
  106. double pow(double x, double y)
  107. {
  108. uintptr_t savedcw;
  109. int m,mprim;
  110. int p,pprim;
  111. int i,iw1;
  112. int iy;
  113. int newexp;
  114. double diw1;
  115. double sign;
  116. double g,z,bigz,v,rz,result;
  117. double u1,u2,y1,y2,w,w1,w2;
  118. double savedx;
  119. /* save user fp control word */
  120. savedcw = _maskfp();
  121. savedx = x; // save original value of first argument
  122. if (_fpclass(y) & (_FPCLASS_NZ | _FPCLASS_PZ)) {
  123. RETURN(savedcw, 1.0);
  124. }
  125. /* Check for zero^y */
  126. if (_fpclass(x) & (_FPCLASS_NZ | _FPCLASS_PZ)) { /* x==0? */
  127. int type;
  128. type = _d_inttype(y);
  129. if (y < 0.0) {
  130. result = (type == _D_ODD ? _copysign(D_INF,x) : D_INF);
  131. return _except2(FP_Z,OP_POW,savedx,y,result,savedcw|ISW_ZERODIVIDE);
  132. }
  133. else if (y > 0.0) {
  134. result = (type == _D_ODD ? x : 0.0);
  135. RETURN(savedcw, result);
  136. }
  137. }
  138. /* check for infinity or NAN */
  139. if (IS_D_SPECIAL(x) || IS_D_SPECIAL(y)) {
  140. double absx = fabs(x);
  141. if (IS_D_SNAN(x) || IS_D_SNAN(y)) {
  142. return _except2(FP_I,OP_POW,savedx,y,_d_snan2(x,y),savedcw | (ISW_INVALID>>5) );
  143. }
  144. if (IS_D_QNAN(x) || IS_D_QNAN(y)){
  145. return _handle_qnan2(OP_POW,x,y,savedcw | (ISW_INVALID>>5) );
  146. }
  147. /* there is at least one infinite argument ... */
  148. if (_powhlp(x, y, &result)) { /* removed "<" 0. */
  149. return _except2(FP_I,OP_POW,savedx,y,result,savedcw | (ISW_INVALID>>5) );
  150. }
  151. RETURN(savedcw, result);
  152. }
  153. sign = 1.0;
  154. if (x < 0) {
  155. switch (_d_inttype(y)) {
  156. case _D_ODD: /* y is an odd integral value */
  157. sign = -1.0;
  158. /* NO BREAK */
  159. case _D_EVEN:
  160. x = -x;
  161. break;
  162. default: /* y is not an integral value */
  163. return _except2(FP_I,OP_POW,savedx,y,D_IND,savedcw|(ISW_INVALID>>5));
  164. }
  165. }
  166. //
  167. // This is here in order to prevent internal overflows
  168. // due to a large value of y
  169. // The following relation holds on overflow with a scaled
  170. // result out of range
  171. // (lg stands for log base 2)
  172. // |y| * |lg(x)| > MAXEXP + IEEE_ADJUST <=>
  173. // |y| > 2560 / |lg(x)|
  174. // The values of lg(x) closer to 0 are:
  175. // x lg(x)
  176. // 3fefffffffffffff (0,99...9) -1.601e-16
  177. // 3ff0000000000000 (1.0) 0.0
  178. // 3ff0000000000001 (1.00...1) 3.203e-16
  179. //
  180. // So if |y| > 2560/1.6e-16 = 1.6e19 overflow occurs
  181. // We set ymax to 1e20 in order to have a safety margin
  182. //
  183. if (ABS(y) > ymax) {
  184. if (y < 0) {
  185. y = -y;
  186. //
  187. // this may cause an underflow
  188. // there is no problem with fp sw pollution because
  189. // a FP_U exception is going to be raised anyway.
  190. //
  191. x = 1.0 / x;
  192. }
  193. if (x > 1.0) {
  194. return _except2(FP_O | FP_P,OP_POW,savedx,y,sign*D_INF,savedcw|ISW_OVERFLOW);
  195. }
  196. else if (x < 1.0){
  197. return _except2(FP_U | FP_P,OP_POW,savedx,y,sign*0.0,savedcw|ISW_UNDERFLOW);
  198. }
  199. else {
  200. RETURN(savedcw, sign*1.0);
  201. }
  202. }
  203. /* determine m, g */
  204. g = _decomp(x, &m);
  205. /* handle small integer powers
  206. * for small integer powers this is faster that Cody&Waite's
  207. * algorithm, and yields better precision
  208. * Without this piece of code there was not enough precision
  209. * to satisfy all requirements of the 'paranoia' test.
  210. * We choose INT_POW_LIMIT such that (1) no overflow or underflow
  211. * occurs while computing bigz (g is in the range
  212. * [0.5, 1.0) or (1.0, 2.0] so INT_POW_LIMIT should be less than
  213. * approximately 10^3) and (2) no extraordinary loss of precision
  214. * occurs because of repeated multiplications (this practically
  215. * restricts the maximum INT_POW_LIMIT to 128).
  216. */
  217. if (y <= INT_POW_LIMIT &&
  218. _d_inttype(x) != _D_NOINT &&
  219. _d_inttype(y) != _D_NOINT &&
  220. y > 0.0 ) {
  221. iy = (int)y;
  222. mprim = m * iy;
  223. for (bigz=1 ; iy ; iy >>= 1, g *= g) {
  224. if (iy & 0x1)
  225. bigz *= g;
  226. }
  227. newexp = _get_exp(bigz) + mprim;
  228. if (newexp > MAXEXP + IEEE_ADJUST) {
  229. return _except2(FP_O | FP_P, OP_POW, savedx, y, sign*bigz*D_INF, savedcw);
  230. }
  231. if (newexp < MINEXP - IEEE_ADJUST) {
  232. return _except2(FP_U | FP_P, OP_POW, savedx, y, sign*bigz*0.0, savedcw);
  233. }
  234. }
  235. else {
  236. /* determine p using binary search */
  237. p = 1;
  238. if (g <= a1[9])
  239. p = 9;
  240. if (g <= a1[p+4])
  241. p += 4;
  242. if (g <= a1[p+2])
  243. p += 2;
  244. /* C&W's algorithm is not very accurate when m*16-p == 1,
  245. * because there is cancellation between u1 and u2.
  246. * Handle this separately.
  247. */
  248. if (ABS(m*16-p) == 1) {
  249. u1 = log(x) * log2inv;
  250. u2 = 0.0;
  251. }
  252. else {
  253. /* determine z */
  254. z = ( (g - a1[p+1]) - a2[(p+1)/2] ) / ( g + a1[p+1] );
  255. z += z;
  256. /* determine u2 */
  257. v = z * z;
  258. rz = P(v) * v * z;
  259. rz += K * rz;
  260. u2 = (rz + z * K) + z;
  261. u1 = (m * 16 - p) * 0.0625;
  262. }
  263. /* determine w1, w2 */
  264. y1 = _reduce(y);
  265. y2 = y - y1;
  266. w = u2 * y + u1 * y2;
  267. w1 = _reduce(w);
  268. w2 = w - w1;
  269. w = w1 + u1 * y1;
  270. w1 = _reduce(w);
  271. w2 += w - w1;
  272. w = _reduce(w2);
  273. diw1 = 16 * (w1 + w); /* iw1 might overflow here, so use diw1 */
  274. w2 -= w;
  275. if (diw1 > OVFX) {
  276. return _except2(FP_O | FP_P,OP_POW,savedx,y,sign*D_INF,savedcw | ISW_OVERFLOW);
  277. }
  278. if (diw1 < UFLX) {
  279. return _except2(FP_U | FP_P,OP_POW,savedx,y,sign*0.0,savedcw | ISW_UNDERFLOW);
  280. }
  281. iw1 = (int) diw1; /* now it is safe to cast to int */
  282. /* make sure w2 <= 0 */
  283. if (w2 > 0) {
  284. iw1 += 1;
  285. w2 -= 0.0625;
  286. }
  287. /* determine mprim, pprim */
  288. i = iw1 < 0 ? 0 : 1;
  289. mprim = iw1 / 16 + i;
  290. pprim = 16 * mprim - iw1;
  291. /* determine 2^w2 */
  292. bigz = Q(w2) * w2;
  293. /* determine final result */
  294. bigz = a1[pprim + 1] + a1[pprim + 1] * bigz;
  295. newexp = _get_exp(bigz) + mprim;
  296. }
  297. if (newexp > MAXEXP) {
  298. result = sign * _set_exp(bigz, newexp - IEEE_ADJUST);
  299. return _except2(FP_O | FP_P, OP_POW, savedx, y, sign*D_INF, savedcw|ISW_OVERFLOW);
  300. }
  301. if (newexp < MINEXP) {
  302. result = sign * _set_exp(bigz, newexp + IEEE_ADJUST);
  303. return _except2(FP_U | FP_P, OP_POW, savedx, y, sign*0.0, savedcw|ISW_UNDERFLOW);
  304. }
  305. result = sign * _set_exp(bigz, newexp);
  306. RETURN_INEXACT2(OP_POW, savedx, y, result, savedcw|ISW_INEXACT);
  307. }