Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1516 lines
52 KiB

  1. /*++
  2. Copyright (c) 2000 Microsoft Corporation
  3. Module Name:
  4. bignum.h
  5. Abstract:
  6. bignum package routines and defines.
  7. --*/
  8. #include <windows.h>
  9. #include <math.h>
  10. #ifndef RADIX_BITS /* If not previously #included */
  11. #define MP_LONGEST_BITS 4096
  12. /*
  13. Multiple precision moduli can have up to
  14. MP_LONGEST_BITS bits, which is
  15. MP_LONGEST words. Some routines allow
  16. longer operands.
  17. */
  18. /*
  19. Error messages are not printed in the
  20. production version of the code.
  21. In the test version, compiled
  22. by MSCV with ENABLE_ERROR_MESSAGES
  23. listed under PREPROCESSOR DEFINITIONS
  24. in the project workspace, they are printed,
  25. */
  26. #ifndef PRINT_ERROR_MESSAGES
  27. #ifdef ENABLE_ERROR_MESSAGES
  28. #define PRINT_ERROR_MESSAGES 1
  29. #else
  30. #define PRINT_ERROR_MESSAGES 0
  31. #endif
  32. #endif
  33. #if PRINT_ERROR_MESSAGES
  34. #include <stdio.h>
  35. #include <string.h>
  36. #include <stdlib.h>
  37. #endif
  38. #define COMPILER_GCC 1
  39. #define COMPILER_VC 2
  40. #ifndef COMPILER
  41. #ifdef __GNUC__
  42. #define COMPILER COMPILER_GCC
  43. /* GNU compiler */
  44. #endif
  45. #ifdef _MSC_VER
  46. #define COMPILER COMPILER_VC
  47. #endif /* Microsoft Visual C compiler */
  48. #endif
  49. #if !defined(COMPILER) || COMPILER <= 0
  50. #error -- "Unknown COMPILER"
  51. #endif
  52. #define COMPILER_NAME ( \
  53. COMPILER == COMPILER_GCC ? "GCC compiler" \
  54. : COMPILER == COMPILER_VC ? "Microsoft Visual C++ compiler" \
  55. : "Unknown compiler")
  56. /*
  57. Major Windows operating systems
  58. */
  59. #define OS_WCE 1
  60. #define OS_WNT 2
  61. #if defined(_WIN32_WCE)
  62. #define TARGET_OS OS_WCE
  63. #define assert(exp) 0 // or ASSERT(exp)
  64. // No assert.h in Windows CE
  65. #elif defined(WIN32)
  66. #define TARGET_OS OS_WNT
  67. #else
  68. #error "Unknown OS target"
  69. #endif
  70. /*
  71. List of architectures on which code has been run.
  72. The SPARC code was used only during development,
  73. and is not a deliverable.
  74. */
  75. #define TARGET_AMD64 1
  76. #define TARGET_IX86 2
  77. #define TARGET_MIPS 3
  78. #define TARGET_PPC 4
  79. #define TARGET_SPARC 5
  80. #define TARGET_IA64 6
  81. #define TARGET_ARM 7
  82. #define TARGET_SHX 8
  83. #ifndef TARGET
  84. #if defined(_M_AMD64) // AMD 64-bit
  85. #define TARGET TARGET_AMD64
  86. #endif
  87. #if defined(_M_IX86) || defined(_x86) // Intel X86 (e.g., 486, Pentium)
  88. #define TARGET TARGET_IX86
  89. #endif
  90. #if defined(_M_MRX000) || defined(_MIPS_) // MIPS 32-bit systems
  91. #define TARGET TARGET_MIPS
  92. #endif
  93. #if defined(_M_PPC) // Motorola/Macintosh Power PC
  94. #define TARGET TARGET_PPC
  95. #endif
  96. #if defined(__sparc__) // Sun SPARC
  97. #define TARGET TARGET_SPARC
  98. #endif
  99. #if defined(_M_IA64) // Intel IA-64 (e.g., Merced, McKinley)
  100. #define TARGET TARGET_IA64
  101. #endif
  102. #if defined(_ARM_)
  103. #define TARGET TARGET_ARM
  104. #endif
  105. #if defined(_SH3_) || defined(_SH4_) // Hitachi SH-3 or SH-4
  106. #define TARGET TARGET_SHX
  107. #endif
  108. #endif
  109. #if !defined(TARGET) || TARGET <= 0
  110. #error -- "Unknown TARGET"
  111. #endif
  112. #define TARGET_NAME ( \
  113. TARGET == TARGET_AMD64 ? "AMD64" \
  114. : TARGET == TARGET_IX86 ? "Intel x86 (x >= 3) and Pentium" \
  115. : TARGET == TARGET_MIPS ? "MIPS R2000/R3000" \
  116. : TARGET == TARGET_PPC ? "Macintosh/Motorola PowerPC" \
  117. : TARGET == TARGET_SPARC ? "Sun SPARC" \
  118. : TARGET == TARGET_IA64 ? "Intel IA-64" \
  119. : TARGET == TARGET_ARM ? "ARM" \
  120. : TARGET == TARGET_SHX ? "Hitachi SHx" \
  121. : "Unknown target architecture")
  122. /*
  123. USEASM_AMD64, ... specify whether to use assembly language,
  124. if it has been written for a platform.
  125. */
  126. #ifndef USEASM
  127. #if TARGET == TARGET_IX86
  128. #define USEASM 1
  129. #elif TARGET == TARGET_MIPS
  130. #define USEASM 1
  131. #elif TARGET == TARGET_SHX
  132. #define USEASM 1
  133. #else
  134. #define USEASM 0
  135. #endif
  136. #endif
  137. #if !defined(USEASM) || (USEASM != 0 && USEASM != 1)
  138. #error "USEASM not defined"
  139. #endif
  140. #define USEASM_AMD64 (USEASM && TARGET == TARGET_AMD64)
  141. #define USEASM_IX86 (USEASM && TARGET == TARGET_IX86)
  142. #define USEASM_MIPS (USEASM && TARGET == TARGET_MIPS)
  143. #define USEASM_PPC (USEASM && TARGET == TARGET_PPC)
  144. #define USEASM_SPARC (USEASM && TARGET == TARGET_SPARC)
  145. #define USEASM_IA64 (USEASM && TARGET == TARGET_IA64)
  146. #define USEASM_ARM (USEASM && TARGET == TARGET_ARM)
  147. #define USEASM_SHX (USEASM && TARGET == TARGET_SHX)
  148. #if USEASM_SHX
  149. void __asm(const char*, ...); // this declartion needed to allow inline of asm
  150. #endif
  151. #if COMPILER == COMPILER_VC
  152. /*
  153. Visual C recognizes _inline but not inline.
  154. */
  155. #define inline _inline
  156. #pragma intrinsic(abs, labs, memcpy)
  157. #if TARGET != TARGET_SHX
  158. #pragma intrinsic(memset)
  159. #endif
  160. #pragma warning(disable: 4146 4514)
  161. /* 4146 -- unary minus operator applied
  162. to unsigned type, result still unsigned.
  163. 4514 -- unreferenced inline function
  164. */
  165. #endif
  166. #if TARGET_OS == OS_WCE
  167. #define assert(exp) 0 // or ASSERT(exp)
  168. // No assert.h in Windows CE
  169. #define CEstatic static
  170. // Windows CE stack limited to 64K
  171. // CEstatic should be used only in
  172. // test codes and other
  173. // single-threaded, non-recursive. codes.
  174. #else
  175. #define CEstatic
  176. #endif
  177. /*
  178. x86 assembly routines are declared naked,
  179. so they do their own stack management and
  180. register saving.
  181. When using a DLL on Intel platforms, all functions use
  182. the __stdcall convention, so the assembly routines use it too.
  183. To ensure they are called with the __stdcall
  184. conventions always (i.e., even when compiled under Microsoft
  185. Developer Studio), we put __stdcall explicitly in the prototypes.
  186. */
  187. #if USEASM_IX86
  188. #define Naked86 __declspec(naked)
  189. #define Stdcall86 __stdcall
  190. #else
  191. #define Naked86
  192. #define Stdcall86
  193. #endif
  194. #if (TARGET == TARGET_AMD64) || (TARGET == TARGET_IA64)
  195. #define RADIX_BITS 64
  196. #define RADIX_BYTES 8
  197. typedef signed __int64 sdigit_t;
  198. typedef unsigned __int64 digit_t;
  199. #else
  200. #define RADIX_BITS 32
  201. #define RADIX_BYTES 4
  202. typedef signed __int32 sdigit_t;
  203. typedef unsigned __int32 digit_t;
  204. #endif
  205. #define MP_LONGEST (MP_LONGEST_BITS/RADIX_BITS)
  206. #if MP_LONGEST_BITS == RADIX_BITS
  207. #define LG2_MP_LONGEST 0
  208. #elif MP_LONGEST_BITS == 2*RADIX_BITS
  209. #define LG2_MP_LONGEST 1
  210. #elif MP_LONGEST_BITS == 4*RADIX_BITS
  211. #define LG2_MP_LONGEST 2
  212. #elif MP_LONGEST_BITS == 8*RADIX_BITS
  213. #define LG2_MP_LONGEST 3
  214. #elif MP_LONGEST_BITS == 16*RADIX_BITS
  215. #define LG2_MP_LONGEST 4
  216. #elif MP_LONGEST_BITS == 32*RADIX_BITS
  217. #define LG2_MP_LONGEST 5
  218. #elif MP_LONGEST_BITS == 64*RADIX_BITS
  219. #define LG2_MP_LONGEST 6
  220. #elif MP_LONGEST_BITS == 128*RADIX_BITS
  221. #define LG2_MP_LONGEST 7
  222. #elif MP_LONGEST_BITS == 256*RADIX_BITS
  223. #define LG2_MP_LONGEST 8
  224. #else
  225. #define LG2_MP_LONGEST 0
  226. #endif
  227. #if MP_LONGEST_BITS != RADIX_BITS << LG2_MP_LONGEST
  228. #error "Unrecognized value of MP_LONGEST_BITS"
  229. #endif
  230. /*
  231. The letter 'c' following a type name identifies
  232. a const entity of that type.
  233. */
  234. typedef const char charc;
  235. typedef const digit_t digit_tc;
  236. typedef const sdigit_t sdigit_tc;
  237. typedef const int intc;
  238. typedef int BOOL; /* Same as windef.h */
  239. #ifndef TRUE
  240. #define TRUE 1
  241. #endif
  242. #ifndef FALSE
  243. #define FALSE 0
  244. #endif
  245. #define DIGIT_ZERO ((digit_t)0)
  246. #define DIGIT_ONE ((digit_t)1)
  247. #define RADIX_HALF (DIGIT_ONE << (RADIX_BITS - 1))
  248. #define RADIXM1 (-DIGIT_ONE)
  249. #define F_RADIX ((double)RADIXM1 + 1.0)
  250. #define HALF_RADIX_BITS (RADIX_BITS/2)
  251. #if (RADIX_BITS != 2*HALF_RADIX_BITS)
  252. #error -- "RADIX_BITS must be even"
  253. #endif
  254. #define RADIX_HALFMASK_BOTTOM (RADIXM1 >> HALF_RADIX_BITS)
  255. // Multiple-precision data is normally represented
  256. // in radix 2^RADIX_BITS, with RADIX_BITS bits per word.
  257. // Here ``word'' means type digit_t. RADIX_BITS
  258. // is 32 on some architectures (Intel, MIPS, PowerPC)
  259. // and 64 bits on other architectures (Alpha).
  260. // Within Windows NT, the data type DWORD predominates.
  261. // DWORD is a 32-bit unsigned datatype on all platforms
  262. // (Intel, Alpha, MIPS, PowerPC). DWORD data can safely be
  263. // written to disk on one architecture and read back on another,
  264. // unlike digit_t.
  265. // [CAUTION -- Even DWORD is not safe when sending data to
  266. // big-endian architectures, such as Office products for the Macintosh.]
  267. typedef unsigned char BYTE;
  268. typedef unsigned long DWORD;
  269. typedef const DWORD DWORDC;
  270. #define DWORD_BITS 32
  271. #define DWORD_LEFT_BIT 0x80000000UL
  272. #if RADIX_BITS % DWORD_BITS != 0
  273. #error "RADIX_BITS not a multiple of 32"
  274. #endif
  275. #define DWORDS_PER_DIGIT (RADIX_BITS/DWORD_BITS)
  276. // DWORDS_TO_DIGITS(lng_dwords) computes the number of digit_t
  277. // elements required to store an array with -lng_dwords- DWORDs.
  278. // DIGITS_TO_DWORDS converts in the opposite direction.
  279. #define DWORDS_TO_DIGITS(lng_dwords) \
  280. ( ((lng_dwords) + DWORDS_PER_DIGIT - 1)/DWORDS_PER_DIGIT)
  281. #define DIGITS_TO_DWORDS(lng_digits) ((lng_digits) * DWORDS_PER_DIGIT)
  282. #define BITS_TO_DIGITS(nb) (((nb) + RADIX_BITS - 1)/RADIX_BITS)
  283. /*
  284. DOUBLE_SHIFT_LEFT(n1, n0, amt) returns
  285. n1 shifted left by amt bits,
  286. with new bits coming in from the top of n0.
  287. DOUBLE_SHIFT_RIGHT(n1, n0, amt) returns n0 shifted right
  288. by amt bits, with new bits coming from the bottom of n1.
  289. The shift counts must satisfy 0 <= amt <= RADIX_BITS - 1.
  290. The shift by RADIX_BITS - amt is done in two stages
  291. (first by 1, then by RADIX_BITS - 1 - amt),
  292. to avoid an illegal shift count of RADIX_BITS if amt = 0.
  293. DOUBLE_SHIFT_LEFT_NONZERO and DOUBLE_SHIFT_RIGHT_NONZERO
  294. are similar, but disallow a zero shift count, allowing the
  295. RADIX_BITS - amt shift to be done in one stage,
  296. DOUBLE_SHIFT_LEFT_NONZERO(n1, n0, amt) is the same as
  297. DOUBLE_SHIFT_RIGHT_NONZERO(n1, n0, RADIX_BITS - amt).
  298. TBD -- If the x86 VC compiler optimizes __int64 shifts,
  299. (6.0 SP3 does not), try to rewrite these definitions to generate
  300. SHLD and SHRD instructions..
  301. */
  302. #define DOUBLE_SHIFT_LEFT(n1, n0, amt) \
  303. (((n1) << (amt)) | (((n0) >> 1) >> (RADIX_BITS - 1 - (amt))))
  304. #define DOUBLE_SHIFT_LEFT_NONZERO(n1, n0, amt) \
  305. (((n1) << (amt)) | ((n0) >> (RADIX_BITS - (amt))))
  306. #define DOUBLE_SHIFT_RIGHT(n1, n0, amt) \
  307. (((n0) >> (amt)) | (((n1) << 1) << (RADIX_BITS - 1 - (amt))))
  308. #define DOUBLE_SHIFT_RIGHT_NONZERO(n1, n0, amt) \
  309. (((n0) >> (amt)) | ((n1) << (RADIX_BITS - (amt))))
  310. #include "dblint.h"
  311. #define digit_getbit(iword, ibit) (((iword) >> (ibit)) & 1)
  312. #define dword_getbit(iword, ibit) digit_getbit(iword, ibit)
  313. /* Extract bit from a word.
  314. // 0 <= ibit <= RADIX_BITS - 1.
  315. // Rightmost (i.e., least significant) bit is bit 0.
  316. */
  317. /*
  318. Test whether a number is odd or even.
  319. */
  320. #define IS_EVEN(n) (~(n) & 1)
  321. #define IS_ODD(n) ((n) & 1)
  322. /*
  323. Maximum and minimum of two arguments
  324. (no side effects in arguments)
  325. */
  326. #if 0
  327. #define MAX _max
  328. #define MIN _min
  329. #else
  330. #define MAX(x, y) ((x) > (y) ? (x) : (y))
  331. #define MIN(x, y) ((x) > (y) ? (y) : (x))
  332. #endif
  333. #if 0
  334. /*
  335. If we are building a DLL, use __declspec before certain variable
  336. declarations (and out procedure names in a .def file).
  337. _PM_DLL should be #defined when compiling bignum but not the application.
  338. If we are building a static library, use normal C declarations.
  339. */
  340. #ifdef _PM_DLL
  341. #define exportable_var __declspec( dllexport )
  342. #define exportable_var_declaration __declspec (dllexport)
  343. #else
  344. #define exportable_var __declspec( dllimport )
  345. #endif
  346. #else
  347. #define exportable_var extern
  348. #define exportable_var_declaration
  349. #endif
  350. #
  351. /*
  352. Macro to return 3^i (exponentiation), for 0 <= i <= 15.
  353. Intended for use with constant argument, such as
  354. in array dimensions. The POWER3 array should
  355. be used if the argument is variable.
  356. */
  357. #define POWER3CON(i) ( ((i) & 1 ? 3 : 1) * ((i) & 2 ? 9 : 1) \
  358. * ((i) & 4 ? 81 : 1) * ((i) & 8 ? 6561 : 1) )
  359. exportable_var DWORDC POWER3[16]; /* See mpglobals.c */
  360. /*
  361. kara.c repeatedly replaces an operand by three
  362. half-length operands and a sign. The sign has
  363. type kara_sign_t. The operands are partitioned
  364. in half until their size at most VMUL_MAX_LNG_SINGLE,
  365. and sometimes further (see padinfo_initialization in kara.c)
  366. This may require up to KARA_MAX_HALVINGS halvings,
  367. giving 3^KARA_MAX_HALVINGS outputs each with size
  368. as large as VMUL_MAX_SINGLE words. The signs
  369. array has length (3^KARA_MAX_HALVINGS - 1)/2.
  370. */
  371. #if TARGET == TARGET_ALPHA
  372. typedef int kara_sign_t;
  373. /* Try to avoid char data on Alpha */
  374. #else
  375. typedef unsigned char kara_sign_t;
  376. /* Values SIGN_PLUS, SIGN_MINUS. See kara.c. */
  377. #endif
  378. typedef const kara_sign_t kara_sign_tc;
  379. #define VMUL_MAX_LNG_SINGLE 12
  380. #define KARA_MAX_HALVINGS (LG2_MP_LONGEST - 2)
  381. #if KARA_MAX_HALVINGS > 15
  382. #error -- "Extend POWER3CON macro"
  383. #endif
  384. #define KARA_MAX_LNG_DIFS ((MP_LONGEST >> KARA_MAX_HALVINGS) * POWER3CON(KARA_MAX_HALVINGS))
  385. #define KARA_MAX_LNG_SIGNS ((POWER3CON(KARA_MAX_HALVINGS) - 1)/2)
  386. #define MEMORY_BANK_ALLOWANCE 1
  387. typedef struct {
  388. digit_t difs[KARA_MAX_LNG_DIFS + MEMORY_BANK_ALLOWANCE];
  389. kara_sign_t signs[KARA_MAX_LNG_SIGNS];
  390. } kara_longest_t; /* For MP_LONGEST or less */
  391. /* On the Pentium P5 and P6,
  392. the two arguments to vmulnn
  393. should lie in different memory banks
  394. (i.e., different addresses mod 32 bytes).
  395. We make the .difs arrays one digit_t entry
  396. larger than essential, in an attempt to reduce
  397. data cache conflicts. Look for the
  398. MEMORY_BANK_ALLOWANCE symbol in the source code.
  399. */
  400. #define kara_longest_NULL ((kara_longest_t*)0)
  401. typedef struct {
  402. digit_t difs[KARA_MAX_LNG_DIFS/3 + MEMORY_BANK_ALLOWANCE];
  403. kara_sign_t signs[KARA_MAX_LNG_SIGNS/3];
  404. } kara_half_longest_t; /* For MP_LONGEST/2 or less */
  405. typedef const kara_half_longest_t kara_half_longest_tc;
  406. typedef const kara_longest_t kara_longest_tc;
  407. typedef struct { /* Constants relating to padding lengths. */
  408. DWORD length;
  409. /* length = length3[0] * 2^nhalving */
  410. DWORD nhalving;
  411. DWORD length3[KARA_MAX_HALVINGS+1];
  412. /* length3[0] is 1, 2, 3, or 4 */
  413. /* length3[i] is length3[0] * 3^i */
  414. } padinfo_t;
  415. typedef const padinfo_t padinfo_tc;
  416. #define padinfo_NULL ((padinfo_t*)0)
  417. /*
  418. The reciprocal_1_t type is used when div21
  419. or divide or divide_immediate would otherwise
  420. divide by the same number repeatedly. See file divide.c.
  421. */
  422. typedef struct {
  423. digit_t multiplier;
  424. DWORD shiftamt;
  425. } reciprocal_1_t;
  426. typedef const reciprocal_1_t reciprocal_1_tc;
  427. /*
  428. mp_modulus_t struct has modulus-dependent constants
  429. used for fast reduction (typically for a fixed modulus,
  430. which will be used several times, as in modular exponentiation).
  431. These constants are initialized by function create_modulus:
  432. modulus -- Modulus used for computations. Must be nonzero.
  433. length -- Length of the modulus, without leading zeros.
  434. Operands to mod_add, mod_mul, mod_sub, ...
  435. are assumed to have this length.
  436. padinfo -- Pointer to a padinfo_t struct. For fast arithmetic,
  437. operands are padded to a length
  438. length_padded >= length (see find_padinfo in kara.c).
  439. The value of length_padded is stored in padinfo->length.
  440. The present implementation requires length_padded be either
  441. a power of 2, or 3 times a power of 2.
  442. For example, if length = 19, then length_padded = 24,
  443. and the operands are treated as 24-word
  444. operands for Karatsuba.
  445. half_padinfo -- Pointer to a padinfo_t struct for length
  446. CEIL(length/2). Used in modular_reduce to
  447. use Karatsuba multiplication on half-length operands.
  448. We denote half_length_padded = half_padinfo->length.
  449. reddir -- Equal to FROM_LEFT if reductions of
  450. products are done from the left (traditional
  451. division), and to FROM_RIGHT if reductions of
  452. products are done from the right (Montgomery reduction).
  453. When using FROM_RIGHT, the modulus must be odd.
  454. Arguments to mod_mul should be pre-scaled by
  455. RADIX^scaling_power (mod modulus).
  456. The product will be similarly scaled.
  457. scaling_power -- Equal to 2*half_length_padded when
  458. reddir = FROM_RIGHT. Undefined
  459. if reddir = FROM_LEFT.
  460. one -- Constant 1 (length length), scaled if reddir = FROM_RIGHT.
  461. When reddir = FROM_RIGHT, this is
  462. RADIX^scaling_power (mod modulus).
  463. left_multiplier_first -- The first multiplier when reducing from the
  464. left. Length length.
  465. -RADIX^(length + half_length_padded)/2^(left_reciprocal_1.shiftamt) mod modulus
  466. left_reciprocal_1 -- Reciprocal of the divisor starting at the
  467. leftmost digit (i.e., modulus[length-1]);
  468. right_reciprocal_1 -- If modulus is odd, this holds
  469. 1/modulus (mod RADIX), for use in mod_shift.
  470. Otherwise the field is zero.
  471. right_multiplier_second -- If reddir = FROM_RIGHT,
  472. then this has 1/modulus mod RADIX^(half_length_padded).
  473. right_multiplier_first -- -1/RADIX^half_length_padded mod modulus.
  474. Equal to
  475. left_multiplier_second -- Contains the half_length_padded*RADIX_BITS
  476. (modulus * right_multiplier_second - 1)/RADIX^half_length_padded.
  477. most significant bits of (high power of 2)/modulus
  478. (excluding the leading -1-). More precisely, this has
  479. RADIX^(length + half_length_padded) - 1
  480. FLOOR( --------------------------------------- ) - RADIX^(half_length_padded)
  481. modulus * 2^(left_reciprocal_1.shiftamt)
  482. See file divide.c for an explanation
  483. about how this constant is used to get accurate
  484. quotients when dividing from the left.
  485. left_multiplier_second_over2 -- Left_multiplier_second/2.
  486. */
  487. typedef enum {FROM_LEFT, FROM_RIGHT} reddir_t;
  488. typedef const reddir_t reddir_tc;
  489. typedef struct {
  490. digit_t modulus[MP_LONGEST];
  491. DWORD length; /* Length passed to create_modulus */
  492. DWORD scaling_power; /* 2*half_padinfo->length */
  493. padinfo_tc *padinfo; /* Pointer to struct containing
  494. padded length and related info */
  495. padinfo_tc *half_padinfo;
  496. /* Padinfo info for CEIL(length/2) */
  497. reddir_t reddir; /* FROM_LEFT or FROM_RIGHT */
  498. reciprocal_1_t left_reciprocal_1;
  499. digit_t right_reciprocal_1;
  500. /* 1/modulus[0] mod RADIX,
  501. if modulus is odd */
  502. kara_half_longest_t modulus_kara2[2];
  503. /*
  504. Copy of modulus.
  505. Lower half_length_padded
  506. and upper
  507. length - half_length_padded
  508. words separately passed
  509. to to_kara.
  510. */
  511. kara_half_longest_t left_multiplier_first_kara2[2];
  512. /* Remainder when dividing
  513. -RADIX^(length + half_length_padded)
  514. / 2^(left_reciprocal_1.shiftamt)
  515. by modulus.
  516. Lower and upper halvves separately
  517. passed to to_kara.
  518. */
  519. kara_half_longest_t left_multiplier_second_kara;
  520. /* half_length_padded*RADIX_BITS
  521. most significant bits of (left)
  522. reciprocal of modulus,
  523. excluding the leading -1-. */
  524. digit_t left_multiplier_second_over2[MP_LONGEST/2];
  525. /* left_multiplier_second/2 */
  526. kara_half_longest_t right_multiplier_first_kara2[2];
  527. /* -1/RADIX^half_length_padded
  528. mod modulus.
  529. */
  530. digit_t right_multiplier_second[MP_LONGEST/2];
  531. kara_half_longest_t right_multiplier_second_kara;
  532. /* 1/modulus mod RADIX^(half_length_padded) */
  533. digit_t cofactor[MP_LONGEST];
  534. DWORD lng_cofactor;
  535. /*
  536. In factorization programs, this
  537. holds the cofactor after dividing
  538. modulus by any factors found.
  539. Used by gcdex_jacobi.
  540. */
  541. digit_t one[MP_LONGEST];
  542. } mp_modulus_t;
  543. typedef const mp_modulus_t mp_modulus_tc;
  544. /*
  545. The modular multiplication code and its
  546. relatives (e.g., modular_reduce, to_kara)
  547. need large amounts of temporary space
  548. during processing. All big temporaries
  549. are gathered into a modmultemp_t struct.
  550. Users of these routines can allocate the
  551. storage themselves, and pass a pointer
  552. to the temporary storage (fastest), or can pass
  553. a null pointer (modmultemp_NULL).
  554. */
  555. typedef struct {
  556. // mmul fields are for mod_mul,
  557. // mod_mul_kara, mod_mul_kara1
  558. digit_t mmul_adifs[KARA_MAX_LNG_DIFS];
  559. kara_sign_t mmul_asigns[KARA_MAX_LNG_SIGNS];
  560. digit_t mmul_bdifs[KARA_MAX_LNG_DIFS
  561. + MEMORY_BANK_ALLOWANCE];
  562. kara_sign_t mmul_bsigns[KARA_MAX_LNG_SIGNS];
  563. // mr_ fields are for modular_reduce.
  564. // The input to modular_reduce can be stored
  565. // in mr_dividend -- this will save a mp_copy call.
  566. digit_t mr_dividend[MAX(2*MP_LONGEST,
  567. 2*KARA_MAX_LNG_DIFS+1)];
  568. digit_t mr_prd1[2*MP_LONGEST];
  569. digit_t mr_prd2[2*MP_LONGEST];
  570. digit_t mr_mptemp[2*MP_LONGEST];
  571. // htk_ fields are for half_times_kara
  572. // and half_times_kara2
  573. digit_t htk_abprd[2][2*KARA_MAX_LNG_DIFS/3];
  574. kara_half_longest_t htk_ak;
  575. } modmultemp_t;
  576. /*
  577. mod_exp2000 returns statistics on what happened during the
  578. exponentiation.
  579. */
  580. typedef struct { // Statistics from mod_exp2000
  581. // This struct may grow in future versions.
  582. DWORD cnt_mod_mul_kara; // Calls to mod_mul_kara
  583. DWORD cnt_mp_copy; // Calls to mp_copy
  584. DWORD cnt_to_kara; // Calls to to_kara
  585. } mod_exp_stats_t;
  586. /*
  587. When an error is detected, variable mp_errno is set
  588. to the error number and execution continues.
  589. If the library was compiled with #define PRINT_ERROR_MESSAGES,
  590. then a message is written to file mp_errfil.
  591. The application program should occasionally check mp_errno.
  592. Except for MP_ERRNO_NO_ERROR, the error numbers are
  593. in alphabetical order by name. The routine issuing
  594. each error number is part of the name.
  595. */
  596. typedef enum {
  597. MP_ERRNO_NO_ERROR = 0,
  598. MP_ERRNO_CREATE_MODULUS_LEADING_ZERO,
  599. MP_ERRNO_CREATE_MODULUS_MONTGOMERY_EVEN,
  600. MP_ERRNO_CREATE_MODULUS_TOO_LONG,
  601. MP_ERRNO_DIGIT_JACOBI_EVEN_DENOMINATOR,
  602. MP_ERRNO_DIGIT_MOD_DIVIDE_ODD_EVEN_MODULUS,
  603. MP_ERRNO_DIGIT_MOD_DIVIDE_ODD_NONTRIVIAL_GCD,
  604. MP_ERRNO_DIGIT_MOD_DIVIDE_ODD_ZERO_DENOMINATOR,
  605. MP_ERRNO_DIGIT_NEXT_PRIME_TOO_HIGH,
  606. MP_ERRNO_DIV21_INVALID_ARGUMENT,
  607. MP_ERRNO_DIVIDE_ESTIMATION_ERROR,
  608. MP_ERRNO_DIVIDE_INVALID_LENGTHS,
  609. MP_ERRNO_DIVIDE_LEADING_ZERO,
  610. MP_ERRNO_DSA_KEY_GENERATION_INVALID_SIZES,
  611. MP_ERRNO_DSA_PRECOMPUTE_BAD_G,
  612. MP_ERRNO_DSA_PRECOMPUTE_INVALID_KEY,
  613. MP_ERRNO_DSA_PRECOMPUTE_PQ_NONPRIME,
  614. MP_ERRNO_DSA_PRECOMPUTE_WRONG_SC,
  615. MP_ERRNO_DSA_SIGNATURE_VERIFICATION_NONTRIVIAL_GCD,
  616. MP_ERRNO_FIND_BIG_PRIME_BAD_CONGRUENCE_CLASS,
  617. MP_ERRNO_FIND_BIG_PRIME_CONG_MOD_TOO_LARGE,
  618. MP_ERRNO_FIND_BIG_PRIME_CONG_TO_TOO_LARGE,
  619. MP_ERRNO_GCDEX_JACOBI_EVEN_MODULUS,
  620. MP_ERRNO_KP_TOO_SHORT,
  621. MP_ERRNO_KPDIV_ZERO_DENOMINATOR,
  622. MP_ERRNO_MOD_ADD_CARRY_NONZERO,
  623. MP_ERRNO_MOD_SHIFT_LEFT_CARRY_NONZERO,
  624. MP_ERRNO_MOD_SHIFT_RIGHT_CARRY_NONZERO,
  625. MP_ERRNO_MOD_SHIFT_RIGHT_EVEN,
  626. MP_ERRNO_MOD_SUB_BORROW_NONZERO,
  627. MP_ERRNO_MODULAR_REDUCE_BOTTOM_BITS_DIFFERENT,
  628. MP_ERRNO_MODULAR_REDUCE_TOO_LONG,
  629. MP_ERRNO_MODULAR_REDUCE_UNEXPECTED_CARRY,
  630. MP_ERRNO_MP_DECIMAL_INPUT_NONDIGIT,
  631. MP_ERRNO_MP_DECIMAL_INPUT_OVERFLOW,
  632. MP_ERRNO_MP_GCD_INTERMEDIATE_EVEN,
  633. MP_ERRNO_MP_GCD_TOO_LONG,
  634. MP_ERRNO_MP_GCDEX_INTERNAL_ERROR,
  635. MP_ERRNO_MP_GCDEX_NONZERO_REMAINDER,
  636. MP_ERRNO_MP_GCDEX_ZERO_OPERAND,
  637. MP_ERRNO_MP_SHIFT_INVALID_SHIFT_COUNT,
  638. MP_ERRNO_MP_TRAILING_ZERO_COUNT_ZERO_ARG,
  639. MP_ERRNO_MULTIPLY_LOW_INVALID_LENGTH,
  640. MP_ERRNO_NO_MEMORY, // From mp_alloc_temp
  641. MP_ERRNO_PADINFO_INITIALIZATION_BAD_CUTOFF,
  642. MP_ERRNO_RANDOM_DIGIT_INTERVAL_INVALID_PARAMETERS,
  643. MP_ERRNO_RANDOM_MOD_INVALID_PARAMETERS,
  644. MP_ERRNO_RANDOM_MOD_INVERSE_NOT_PRIME,
  645. MP_ERRNO_RANDOM_MOD_NONZERO_INVALID_PARAMETERS,
  646. MP_ERRNO_SELECT_A0B0_BAD_COFACTOR,
  647. MP_ERRNO_SELECT_A0B0_BAD_MU,
  648. MP_ERRNO_SELECT_A0B0_NON_CONSTANT_QUOTIENT,
  649. MP_ERRNO_SELECT_A0B0_NONZERO_REMAINDER,
  650. MP_ERRNO_SELECT_CURVE_BAD_FIELD_TYPE,
  651. MP_ERRNO_SELECT_D_UNSUCCESSFUL,
  652. MP_ERRNO_TO_KARA_INVALID_LENGTH,
  653. MP_ERRNO_TO_KARA2_INVALID_LENGTH,
  654. MP_ERRNO_COUNT // Number of entries above
  655. } mp_errno_t;
  656. exportable_var mp_errno_t mp_errno;
  657. #if defined(WIN32)
  658. #define SetMpErrno(x) SetLastError((DWORD)(mp_errno = (x)))
  659. #define GetMpErrno() ((mp_errno_t)GetLastError())
  660. #else
  661. #define SetMpErrno(x) mp_errno = (x)
  662. #define GetMpErrno() mp_errno
  663. #endif
  664. #define inadequate_memory (GetMpErrno() == MP_ERRNO_NO_MEMORY)
  665. extern const char* mp_errno_name(const mp_errno_t);
  666. // Update table in mperrnam.c when adding new error message
  667. /*
  668. Some routine allow an argument of digit_NULL or
  669. reciprocal_1_NULL when the corresponding argument
  670. is not otherwise used. For example, the division
  671. routine allows but does not require a
  672. reciprocal structure as argument,
  673. and allows the quotient to be suppressed.
  674. */
  675. #define digit_NULL ((digit_t*)0)
  676. #define reciprocal_1_NULL ((reciprocal_1_t*)0)
  677. #define modmultemp_NULL ((modmultemp_t*)0)
  678. /*
  679. The next several #defines are used in function prototypes.
  680. */
  681. #define MP_INPUT digit_tc[]
  682. #define MP_OUTPUT digit_t[]
  683. #define MP_MODIFIED digit_t[]
  684. #define DIFS_INPUT MP_INPUT
  685. #define DIFS_OUTPUT MP_OUTPUT
  686. #define DIFS_MODIFIED MP_MODIFIED
  687. #define SIGNS_INPUT kara_sign_tc[]
  688. #define SIGNS_MODIFIED kara_sign_t[]
  689. #define SIGNS_OUTPUT kara_sign_t[]
  690. extern digit_t accumulate(MP_INPUT, digit_tc, MP_MODIFIED, DWORDC);
  691. extern digit_t Stdcall86 add_diff(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);
  692. extern DWORD add_full(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);
  693. extern digit_t Stdcall86 add_same(MP_INPUT, MP_INPUT, MP_OUTPUT, DWORDC);
  694. extern DWORD add_signed(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);
  695. extern int compare_diff(MP_INPUT, DWORDC, MP_INPUT, DWORDC);
  696. extern int compare_sum_diff(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_INPUT, DWORDC);
  697. BOOL create_modulus(MP_INPUT, DWORDC, reddir_tc, mp_modulus_t*);
  698. extern dblint_t dblint_gcd(dblint_tc, dblint_tc);
  699. extern dblint_t dblint_ogcd(dblint_tc, dblint_tc);
  700. extern digit_t dblint_sqrt(dblint_tc);
  701. extern digit_t decumulate(MP_INPUT, digit_tc, MP_MODIFIED, DWORDC);
  702. extern DWORD digit_factor(digit_tc, digit_t[], DWORD[]);
  703. extern digit_t digit_gcd(digit_tc, digit_tc);
  704. extern int digit_jacobi(digit_tc, digit_tc);
  705. extern digit_t digit_least_prime_divisor(digit_tc);
  706. extern digit_t digit_mod_divide_odd(digit_tc, digit_tc, digit_tc);
  707. extern digit_t digit_ogcd(digit_tc, digit_tc);
  708. extern char* digit_out(digit_tc);
  709. extern digit_t digit_sqrt(digit_tc);
  710. /*
  711. digit2_aligned(array) checks that _array_ is
  712. aligned on a 2*sizeof(digit_t) boundary.
  713. Assembly code versions of the software sometimes load
  714. or store two digit_t values with one instruction.
  715. Specifically, MMX code on X86 can load or store two 32-bit
  716. digit_t values with one 64-bit MOVQ instruction.
  717. IA-64 and AMD64 code can load two 64-bit values to the floating
  718. point registers with a load pair instruction.
  719. The digit2_aligned macro checks whether its operand is
  720. appropriately aligned. The required alignment is never
  721. worse than that returned by mp_alloc_temp.
  722. */
  723. #if TARGET == TARGET_IX86 || TARGET == TARGET_IA64 || TARGET == TARGET_AMD64
  724. #define digit2_aligned(array) (((DWORD)(array) & (2*sizeof(digit_t) - 1)) == 0)
  725. #else
  726. #define digit2_aligned(array) (TRUE)
  727. #endif
  728. extern void div21(dblint_tc, digit_tc, digit_t*, digit_t*);
  729. extern void div21_fast(dblint_tc, digit_tc,
  730. reciprocal_1_tc*, digit_t*, digit_t*);
  731. extern DWORD divide(MP_INPUT, DWORDC, MP_INPUT, DWORDC,
  732. reciprocal_1_tc*, MP_OUTPUT, MP_OUTPUT);
  733. extern DWORD divide_rounded(MP_INPUT, DWORDC, MP_INPUT, DWORDC,
  734. reciprocal_1_tc*, MP_OUTPUT, MP_OUTPUT);
  735. extern void divide_precondition_1(MP_INPUT, DWORDC, reciprocal_1_t*);
  736. extern digit_t divide_immediate(MP_INPUT, digit_tc,
  737. reciprocal_1_tc*, MP_OUTPUT, DWORDC);
  738. extern digit_t estimated_quotient_1(digit_tc, digit_tc,
  739. digit_tc, reciprocal_1_tc*);
  740. extern BOOL find_big_prime(DWORDC, MP_INPUT, DWORDC,
  741. MP_INPUT, DWORDC, MP_OUTPUT);
  742. extern padinfo_tc *find_padinfo(DWORDC);
  743. DWORD from_modular(MP_INPUT, MP_OUTPUT, mp_modulus_tc*);
  744. extern int gcdex_jacobi(MP_INPUT, mp_modulus_tc*, MP_OUTPUT, MP_OUTPUT);
  745. extern void mod_add(MP_INPUT, MP_INPUT, MP_OUTPUT, mp_modulus_tc*);
  746. extern DWORD mod_exp(MP_INPUT, MP_INPUT, DWORDC, MP_OUTPUT,
  747. mp_modulus_tc*);
  748. extern BOOL mod_exp2000(MP_INPUT, MP_INPUT, DWORDC, MP_OUTPUT,
  749. mp_modulus_tc*, mod_exp_stats_t*);
  750. extern DWORD mod_exp_immediate(MP_INPUT, digit_tc, MP_OUTPUT,
  751. mp_modulus_tc*);
  752. extern int mod_jacobi_immediate(const signed long, mp_modulus_tc*);
  753. extern void mod_Lucas(MP_INPUT, MP_INPUT, DWORDC, MP_OUTPUT,
  754. mp_modulus_tc*);
  755. extern void mod_LucasUV(MP_INPUT, MP_INPUT, MP_INPUT, DWORDC,
  756. MP_OUTPUT, MP_OUTPUT, mp_modulus_tc*);
  757. extern void mod_mul(MP_INPUT, MP_INPUT, MP_OUTPUT,
  758. mp_modulus_tc*, modmultemp_t*);
  759. extern void mod_mul_immediate(MP_INPUT, digit_tc,
  760. MP_OUTPUT, mp_modulus_tc*);
  761. extern void mod_mul_kara1(MP_INPUT, DIFS_INPUT, SIGNS_INPUT,
  762. MP_OUTPUT, mp_modulus_tc*, modmultemp_t*);
  763. extern void mod_mul_kara(DIFS_INPUT, SIGNS_INPUT,
  764. DIFS_INPUT, SIGNS_INPUT,
  765. MP_OUTPUT, mp_modulus_tc*, modmultemp_t*);
  766. extern void mod_negate(MP_INPUT, MP_OUTPUT, mp_modulus_tc*);
  767. extern void mod_shift(MP_INPUT, intc, MP_OUTPUT, mp_modulus_tc*);
  768. extern BOOL mod_sqrt(MP_INPUT, MP_OUTPUT, mp_modulus_tc*);
  769. extern void mod_sub(MP_INPUT, MP_INPUT, MP_OUTPUT, mp_modulus_tc*);
  770. extern BOOL modular_reduce(MP_INPUT, DWORDC, reddir_tc,
  771. MP_OUTPUT, mp_modulus_tc*, modmultemp_t*);
  772. extern void* mp_alloc_temp(DWORDC);
  773. #define Allocate_Temporaries(typename, ptr) \
  774. ptr = (typename*)mp_alloc_temp(sizeof(typename))
  775. #define Allocate_Temporaries_Multiple(nelmt, typename, ptr) \
  776. ptr = (typename*)mp_alloc_temp((nelmt)*sizeof(typename))
  777. #if USEASM_ALPHA || USEASM_MIPS
  778. extern void mp_copy(MP_INPUT, MP_OUTPUT, DWORDC);
  779. #else
  780. #define mp_copy(src, dest, lng) \
  781. memcpy((void *)(dest), (const void *)(src), (lng)*sizeof(digit_t))
  782. #endif
  783. extern char* mp_decimal(MP_INPUT, DWORDC);
  784. extern long mp_decimal_input(charc*, MP_OUTPUT, DWORDC, charc**);
  785. extern char* mp_dword_decimal(DWORDC*, DWORDC);
  786. extern int mp_format(MP_MODIFIED, DWORDC,
  787. digit_tc, charc*, char*, DWORDC);
  788. extern void mp_free_temp(void*);
  789. #define Free_Temporaries(ptr) mp_free_temp((void*)ptr)
  790. extern DWORD mp_gcd(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);
  791. extern DWORD mp_gcdex(MP_INPUT, DWORDC, MP_INPUT, DWORDC,
  792. MP_OUTPUT, MP_OUTPUT, MP_OUTPUT, MP_OUTPUT);
  793. extern void mp_initialization(void);
  794. extern void mp_longshift(MP_INPUT, intc, MP_OUTPUT, DWORDC);
  795. extern void Stdcall86 mp_mul22s(digit_tc[4], MP_MODIFIED, MP_MODIFIED, DWORDC, sdigit_t[2]);
  796. extern void Stdcall86 mp_mul22u(digit_tc[4], MP_MODIFIED, MP_MODIFIED, DWORDC, digit_t[2]);
  797. extern DWORD mp_remove2(MP_MODIFIED, DWORDC);
  798. extern digit_t mp_shift(MP_INPUT, intc, MP_OUTPUT, DWORDC);
  799. extern DWORD mp_significant_bit_count(MP_INPUT, DWORDC);
  800. extern BOOL mp_sqrt(MP_INPUT, MP_OUTPUT, DWORDC);
  801. extern DWORD mp_trailing_zero_count(MP_INPUT, DWORDC);
  802. extern void mul_kara(DIFS_INPUT, SIGNS_INPUT,
  803. DIFS_INPUT, SIGNS_INPUT,
  804. MP_OUTPUT, padinfo_tc*);
  805. extern void mul_kara_know_low(DIFS_INPUT, SIGNS_INPUT,
  806. DIFS_INPUT, SIGNS_INPUT,
  807. MP_INPUT, MP_OUTPUT,
  808. padinfo_tc*);
  809. extern void mul_kara_squaring(MP_INPUT, DWORDC,
  810. DIFS_MODIFIED, SIGNS_MODIFIED,
  811. MP_OUTPUT, padinfo_tc*,
  812. modmultemp_t*);
  813. extern void multiply(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);
  814. extern digit_t multiply_immediate(MP_INPUT, digit_tc, MP_OUTPUT, DWORDC);
  815. extern void Stdcall86 multiply_low(MP_INPUT, MP_INPUT, MP_OUTPUT, DWORDC);
  816. extern DWORD multiply_signed(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);
  817. extern DWORD multiply_signed_immediate(MP_INPUT, DWORDC,
  818. signed long, MP_OUTPUT);
  819. #if TARGET_OS == OS_WCE
  820. #define PRIME_SIEVE_LENGTH 300
  821. #else
  822. #define PRIME_SIEVE_LENGTH 3000
  823. #endif
  824. // Must be multiple of 3
  825. #if PRIME_SIEVE_LENGTH % 3 != 0
  826. #error "PRIME_SIEVE_LENGTH must be a multiple of 3"
  827. #endif
  828. extern digit_t next_prime(
  829. digit_tc pstart,
  830. digit_t *lpsievbeg,
  831. digit_t sieve[PRIME_SIEVE_LENGTH],
  832. digit_t *lpmax_sieved_squared
  833. );
  834. extern void padinfo_initialization(DWORDC);
  835. extern BOOL probable_prime(MP_INPUT, DWORDC, MP_INPUT, DWORDC, DWORDC);
  836. extern BOOL remove_small_primes(MP_INPUT, DWORDC, digit_tc,
  837. digit_t[], DWORD[], DWORD*,
  838. MP_OUTPUT, DWORD*);
  839. #if USEASM_IX86
  840. #define SIGNIFICANT_BIT_COUNT_DEFINED 1
  841. #define UNIFORM_SIGNIFICANT_BIT_COUNT 1
  842. #pragma warning(disable : 4035) /* No return value */
  843. static inline DWORD significant_bit_count(digit_tc pattern)
  844. {
  845. _asm {
  846. mov eax,pattern ; Nonzero pattern
  847. bsr eax,eax ; eax = index of leftmost nonzero bit
  848. ; BSR is slow on Pentium
  849. ; but fast on Pentium Pro
  850. inc eax ; Add one to get significant bit count
  851. }
  852. }
  853. #pragma warning(default : 4035)
  854. #elif USEASM_ALPHA
  855. #define SIGNIFICANT_BIT_COUNT_DEFINED 1
  856. #define UNIFORM_SIGNIFICANT_BIT_COUNT 1
  857. extern const BYTE half_byte_significant_bit_count[128]; /* See mpmisc.c */
  858. /*
  859. The Alpha code uses the CMPBGE instruction to
  860. identify which bytes are nonzero. The most significant
  861. bit must occur within the leftmost nonzero byte.
  862. We use the CMPBGE output to identify which byte that is.
  863. After we extract that byte, we identify its most significant bit.
  864. */
  865. static inline DWORD significant_bit_count(digit_tc pattern)
  866. {
  867. DWORDC zero_byte_pattern = __asm("cmpbge zero, %0, v0", pattern);
  868. DWORDC byte_offset_plus_1
  869. = 8*half_byte_significant_bit_count[127 - (zero_byte_pattern >> 1)] + 1;
  870. return byte_offset_plus_1
  871. + half_byte_significant_bit_count[pattern >> byte_offset_plus_1];
  872. }
  873. #else
  874. #define SIGNIFICANT_BIT_COUNT_DEFINED 0
  875. #define UNIFORM_SIGNIFICANT_BIT_COUNT 0
  876. /* Algorithm faster for larger inputs. See mpmisc.c */
  877. extern DWORD significant_bit_count(digit_tc);
  878. #endif
  879. extern digit_t Stdcall86 sub_diff(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);
  880. extern digit_t Stdcall86 sub_same(MP_INPUT, MP_INPUT, MP_OUTPUT, DWORDC);
  881. #define sub_signed(a, lnga, b, lngb, c) add_signed(a, lnga, b, -(lngb), c)
  882. extern BOOL test_primality(MP_INPUT, DWORDC);
  883. extern BOOL test_primality_check_low(MP_INPUT, DWORDC);
  884. extern BOOL get_prime(MP_OUTPUT, DWORDC);
  885. extern BOOL get_generator(DWORD*, DWORD*, DWORDC);
  886. extern void to_kara(MP_INPUT, DWORDC, DIFS_OUTPUT, SIGNS_OUTPUT,
  887. padinfo_tc*);
  888. extern BOOL to_modular(MP_INPUT, DWORDC, MP_OUTPUT, mp_modulus_tc*);
  889. // The following functions are indexed indirectly via pointers.
  890. // Also see GF2_get_funcs in field.h.
  891. #if TARGET == TARGET_IX86
  892. exportable_var BOOL MMX_available; /* See mpglobal.c */
  893. #endif
  894. typedef void Stdcall86 vmul_t(DIFS_INPUT, DIFS_INPUT, DIFS_OUTPUT, DWORDC);
  895. exportable_var vmul_t *vmulnn[VMUL_MAX_LNG_SINGLE];
  896. /* Addresses for 1 x 1 to 12 x 12 products */
  897. /* Defined at end of vmul.c */
  898. #if PRINT_ERROR_MESSAGES
  899. extern void mp_display(FILE*, charc*, MP_INPUT, DWORDC);
  900. exportable_var FILE* mp_errfil; /* Set to stdout in mp_global.c */
  901. extern void mp_print_allocation_statistics(FILE*);
  902. #endif /* PRINT_ERROR_MESSAGES */
  903. /****************************************************************************/
  904. static inline digit_t add_immediate(digit_tc a[],
  905. digit_tc iadd,
  906. digit_t b[],
  907. DWORDC lng)
  908. /*
  909. Compute b = a + iadd, where iadd has length 1.
  910. Both a and b have length lng.
  911. Function value is carry out of leftmost digit in b.
  912. */
  913. {
  914. if (lng == 0) {
  915. return iadd;
  916. } else if (a == b && b[0] <= RADIXM1 - iadd) {
  917. b[0] += iadd;
  918. return 0;
  919. } else {
  920. return add_diff(a, lng, &iadd, 1, b);
  921. }
  922. }
  923. /***************************************************************************/
  924. static inline int compare_immediate(digit_tc a[],
  925. digit_tc ivalue,
  926. DWORDC lng)
  927. /*
  928. Compare a multiple-precision number to a scalar.
  929. */
  930. {
  931. return compare_diff(a, lng, &ivalue, 1);
  932. }
  933. /****************************************************************************/
  934. #if USEASM_MIPS
  935. extern int compare_same(MP_INPUT, MP_INPUT, DWORDC);
  936. #else
  937. static inline int compare_same(digit_tc a[],
  938. digit_tc b[],
  939. DWORDC lng)
  940. /*
  941. Compare two multiple precision numbers a and b each of length lng.
  942. Function value is the sign of a - b, namely
  943. +1 if a > b
  944. 0 if a = b
  945. -1 if a < b
  946. */
  947. #if USEASM_IX86
  948. #pragma warning(disable : 4035) /* No return value */
  949. {
  950. /*
  951. We could use REPE CMPSD,
  952. but REPE is slow (4 cycles)
  953. on the Pentium. Plus we
  954. would need std and cld
  955. to adjust the direction flag.
  956. We anticipate that most loops
  957. will have either 1 or 2 iterations,
  958. and use RISC instructions.
  959. */
  960. _asm {
  961. mov eax,lng
  962. mov esi,a
  963. mov edi,b
  964. label1:
  965. test eax,eax
  966. jz label2 ; If nothing left, exit with eax = 0
  967. mov ecx,[esi+4*eax-4] ;
  968. mov edx,[edi+4*eax-4]
  969. dec eax ; Decrement remaining loop count
  970. cmp ecx,edx ; Test a[i] - b[i]
  971. je label1
  972. sbb eax,eax ; eax = 0 if a > b, -1 if a < b
  973. or eax,1 ; eax = 1 if a > b, -1 if a < b
  974. label2:
  975. }
  976. }
  977. #pragma warning(default : 4035)
  978. #else
  979. {
  980. DWORD i;
  981. for (i = lng-1; i != -1; i--) {
  982. if (a[i] != b[i]) return (a[i] > b[i] ? +1 : -1);
  983. }
  984. return 0;
  985. } /* compare_same */
  986. #endif
  987. #endif
  988. /****************************************************************************/
  989. #if USEASM_ALPHA || USEASM_MIPS
  990. extern void mp_clear(MP_OUTPUT, DWORDC);
  991. #elif 0
  992. static inline void mp_clear(digit_t a[],
  993. DWORDC lnga)
  994. /*
  995. Zero a multiple-precision number.
  996. */
  997. {
  998. DWORD i;
  999. for (i = 0; i != lnga; i++) a[i] = 0;
  1000. }
  1001. #else
  1002. #define mp_clear(dest, lng) (void)memset((void *)(dest), 0, (lng)*sizeof(digit_t))
  1003. #endif
  1004. /****************************************************************************/
  1005. #if USEASM_ALPHA || USEASM_MIPS
  1006. extern void mp_extend(MP_INPUT, DWORDC, MP_OUTPUT, DWORDC);
  1007. // See alpha.s
  1008. #else
  1009. static inline void mp_extend(digit_tc a[],
  1010. DWORDC lnga,
  1011. digit_t b[],
  1012. DWORDC lngb)
  1013. /*
  1014. Copy a to b, while changing its length from
  1015. lnga to lngb (zero fill). Require lngb >= lnga.
  1016. */
  1017. {
  1018. mp_copy(a, b, lnga);
  1019. mp_clear(b + lnga, lngb - lnga);
  1020. }
  1021. #endif
  1022. /****************************************************************************/
  1023. static inline digit_t mp_getbit(digit_tc a[],
  1024. DWORDC ibit)
  1025. /* Extract bit of multiple precision number */
  1026. {
  1027. return digit_getbit(a[ibit/RADIX_BITS], ibit % RADIX_BITS);
  1028. }
  1029. /******************************************************************************/
  1030. static inline int mp_jacobi_wrt_immediate(digit_tc numer[],
  1031. DWORD lnumer,
  1032. digit_tc denom)
  1033. // Return jacobi(numer, denom), where denom is single precision
  1034. {
  1035. digit_tc rem = divide_immediate(numer, denom,
  1036. reciprocal_1_NULL,
  1037. digit_NULL, lnumer);
  1038. return digit_jacobi(rem, denom);
  1039. } /* mp_jacobi_wrt_immediate */
  1040. /****************************************************************************/
  1041. static inline void mp_setbit(digit_t a[],
  1042. DWORDC ibit,
  1043. digit_tc new_value)
  1044. /*
  1045. Set a bit to 0 or 1,
  1046. when the number is viewed as a bit array.
  1047. */
  1048. {
  1049. DWORDC j = ibit / RADIX_BITS;
  1050. DWORDC ishift = ibit % RADIX_BITS;
  1051. digit_tc mask1 = (DIGIT_ONE & new_value) << ishift;
  1052. digit_tc mask2 = (DIGIT_ONE & ~new_value) << ishift;
  1053. a[j] = (a[j] & ~mask2) | mask1;
  1054. } // end mp_setbit
  1055. /****************************************************************************/
  1056. #if MEMORY_BANK_ALLOWANCE == 0
  1057. #define Preferred_Memory_Bank(new_array, old_array) new_array
  1058. #else
  1059. static inline digit_t* Preferred_Memory_Bank(digit_t *new_array,
  1060. digit_tc *old_array)
  1061. /*
  1062. To avoid memory bank conflicts, it is desirable
  1063. that (input) arguments to vmulxx assembly routines start
  1064. on distinct memory banks, when not doing a squaring.
  1065. If MEMORY_BANK_ALLOWANCE > 0,
  1066. then new_array should have MEMORY_BANK_ALLOWANCE
  1067. extra entries at the end. We return either
  1068. new_array or new_array + 1, whichever ensures the
  1069. addresses are distinct.
  1070. CAUTION -- This routine does non-portable pointer manipulations.
  1071. */
  1072. {
  1073. return new_array + (1 & ~(old_array - new_array));
  1074. }
  1075. #endif
  1076. /****************************************************************************/
  1077. static inline void set_immediate(digit_t a[],
  1078. digit_tc ivalue,
  1079. DWORDC lnga)
  1080. {
  1081. a[0] = ivalue;
  1082. mp_clear(a + 1, lnga - 1);
  1083. }
  1084. /****************************************************************************/
  1085. static inline DWORD set_immediate_signed(digit_t a[],
  1086. signed long ivalue)
  1087. {
  1088. a[0] = labs(ivalue);
  1089. return (ivalue > 0) - (ivalue < 0); /* Sign of result -- -1, 0, +1 */
  1090. }
  1091. /****************************************************************************/
  1092. #if USEASM_MIPS
  1093. extern DWORD significant_digit_count(MP_INPUT, DWORDC);
  1094. #else
  1095. static inline DWORD significant_digit_count(digit_tc a[],
  1096. DWORDC lng)
  1097. /*
  1098. Return the number of significant digits in a.
  1099. Function value is zero precisely when a == 0.
  1100. */
  1101. #if USEASM_IX86
  1102. #pragma warning(disable : 4035) /* No return value */
  1103. {
  1104. /*
  1105. We could use REPE SCASD,
  1106. but the REPE overhead is
  1107. four cycles/compare on the Pentium.
  1108. We would also need sld and cld.
  1109. It is shorter to use RISC instructions.
  1110. We anticipate that the leading term a[lng-1]
  1111. will usually be nonzero.
  1112. */
  1113. _asm {
  1114. mov eax,lng
  1115. mov edx,a
  1116. label1:
  1117. test eax,eax
  1118. jz label2 ; If nothing left in number, return 0
  1119. mov ecx,[edx+4*eax-4]
  1120. dec eax
  1121. test ecx,ecx ; Test leading digit
  1122. jz label1
  1123. inc eax ; Nonzero element found; return old eax
  1124. label2:
  1125. }
  1126. }
  1127. #pragma warning(default : 4035)
  1128. #else
  1129. {
  1130. DWORD i = lng;
  1131. while (i != 0 && a[i-1] == 0) i--;
  1132. return i;
  1133. } /* significant_digit_count */
  1134. #endif
  1135. #endif
  1136. #define all_zero(a, lng) (significant_digit_count(a, lng) == 0)
  1137. /****************************************************************************/
  1138. static inline digit_t sub_immediate(digit_tc a[],
  1139. digit_tc isub,
  1140. digit_t b[],
  1141. DWORDC lng)
  1142. /*
  1143. Compute b = a - isub, where isub has length 1.
  1144. Both a and b have length lng.
  1145. Function value is borrow out of leftmost digit in b.
  1146. */
  1147. {
  1148. return (lng == 0 ? isub : sub_diff(a, lng, &isub, 1, b));
  1149. }
  1150. /****************************************************************************/
  1151. #if USEASM_IX86
  1152. #define TRAILING_ZERO_COUNT_DEFINED 1
  1153. static inline DWORD trailing_zero_count(digit_tc d)
  1154. #pragma warning(disable : 4035) /* No return value */
  1155. {
  1156. _asm {
  1157. mov eax,d
  1158. bsf eax,eax ; eax = index of rightmost nonzero bit
  1159. ; BSF is slow on Pentium,
  1160. ; but fast on Pentium Pro.
  1161. }
  1162. }
  1163. #pragma warning(default : 4035)
  1164. #elif UNIFORM_SIGNIFICANT_BIT_COUNT
  1165. #define TRAILING_ZERO_COUNT_DEFINED 1
  1166. static inline DWORD trailing_zero_count(digit_tc d)
  1167. /*
  1168. Given a nonzero integer d, this routine computes
  1169. the largest integer n such that 2^n divides d.
  1170. If d = 2^n * (2k + 1), then
  1171. d = k *2^(n+1) + 2^n
  1172. -d = (-1-k)*2^(n+1) + 2^n
  1173. The integers k and -1 - k are one's complements of
  1174. each other, so d & (-d) = 2^n. Once we determine
  1175. 2^n from d, we can get n via significant_bit_count.
  1176. */
  1177. {
  1178. return significant_bit_count(d & (-d)) - 1;
  1179. } /* trailing_zero_count */
  1180. #else
  1181. #define TRAILING_ZERO_COUNT_DEFINED 0
  1182. extern DWORD trailing_zero_count(digit_tc); /* See mpmisc.c */
  1183. #endif
  1184. /****************************************************************************/
  1185. static inline void digits_to_dwords(digit_tc pdigit[],
  1186. DWORD pdword[],
  1187. DWORDC lng_dwords)
  1188. {
  1189. #if DWORDS_PER_DIGIT == 1
  1190. mp_copy(pdigit, (digit_t*)pdword, lng_dwords);
  1191. #elif DWORDS_PER_DIGIT == 2
  1192. DWORDC lng_half = lng_dwords >> 1;
  1193. DWORD i;
  1194. if (IS_ODD(lng_dwords)) {
  1195. pdword[lng_dwords-1] = (DWORD)pdigit[lng_half];
  1196. }
  1197. for (i = 0; i != lng_half; i++) {
  1198. digit_tc dig = pdigit[i];
  1199. pdword[2*i ] = (DWORD)dig;
  1200. pdword[2*i + 1] = (DWORD)(dig >> DWORD_BITS);
  1201. }
  1202. #else
  1203. #error "Unexpected DWORDS_PER_DIGIT"
  1204. #endif
  1205. } /* digits_to_dwords */
  1206. /****************************************************************************/
  1207. static inline void dwords_to_digits(DWORDC pdword[],
  1208. digit_t pdigit[],
  1209. DWORDC lng_dwords)
  1210. {
  1211. #if DWORDS_PER_DIGIT == 1
  1212. mp_copy((digit_t*)pdword, pdigit, lng_dwords);
  1213. #elif DWORDS_PER_DIGIT == 2
  1214. DWORDC lng_half = lng_dwords >> 1;
  1215. DWORD i;
  1216. if (IS_ODD(lng_dwords)) {
  1217. pdigit[lng_half] = (digit_t)pdword[lng_dwords - 1]; // Zero fill
  1218. }
  1219. for (i = 0; i != lng_half; i++) {
  1220. pdigit[i] = ((digit_t)pdword[2*i+1] << DWORD_BITS)
  1221. | (digit_t)pdword[2*i];
  1222. }
  1223. #else
  1224. #error "Unexpected DWORDS_PER_DIGIT"
  1225. #endif
  1226. } /* dwords_to_digits */
  1227. #endif // RADIX_BITS