windows-xp/Source/XPSP1/NT/ds/published/inc/bignum.w


								/*++


								Copyright (c) 2000 Microsoft Corporation


								Module Name:


								    bignum.h


								Abstract:


								    bignum package routines and defines.


								--*/


								#include <windows.h>

								#include <math.h>


								#ifndef RADIX_BITS              /* If not previously #included */


								#define MP_LONGEST_BITS  2048

								                        /*

								                           Multiple precision moduli can have up to

								                           MP_LONGEST_BITS bits, which is

								                           MP_LONGEST words.  Some routines allow

								                           longer operands.

								                        */


								                        /*

								                           Error messages are not printed in the

								                           production version of the code.

								                           In the test version, compiled

								                           by MSCV with ENABLE_ERROR_MESSAGES

								                           listed under PREPROCESSOR DEFINITIONS

								                           in the project workspace, they are printed,

								                        */


								#ifndef PRINT_ERROR_MESSAGES

								    #ifdef ENABLE_ERROR_MESSAGES

								        #define PRINT_ERROR_MESSAGES 1

								    #else

								        #define PRINT_ERROR_MESSAGES 0

								    #endif

								#endif


								#if PRINT_ERROR_MESSAGES

								#include <stdio.h>

								#include <string.h>

								#include <stdlib.h>

								#endif


								#define COMPILER_GCC 1

								#define COMPILER_VC  2


								#ifndef COMPILER

								    #ifdef __GNUC__

								        #define COMPILER COMPILER_GCC

								                        /* GNU compiler */

								    #endif

								    #ifdef _MSC_VER

								        #define COMPILER COMPILER_VC

								    #endif              /* Microsoft Visual C compiler */

								#endif


								#if !defined(COMPILER) || COMPILER <= 0

								    #error -- "Unknown COMPILER"

								#endif


								#define COMPILER_NAME  ( \

								        COMPILER == COMPILER_GCC ? "GCC compiler" \

								      : COMPILER == COMPILER_VC  ? "Microsoft Visual C++ compiler" \

								      : "Unknown compiler")

								/*

								        Major Windows operating systems

								*/


								#define OS_WCE 1

								#define OS_WNT 2


								#if defined(_WIN32_WCE)

								    #define TARGET_OS OS_WCE

								    #define assert(exp) 0 // or ASSERT(exp)

								                 // No assert.h in Windows CE

								#elif defined(WIN32)

								    #define TARGET_OS OS_WNT

								#else

								    #error "Unknown OS target"

								#endif


								/*

								        List of architectures on which code has been run.

								        The SPARC code was used only during development,

								        and is not a deliverable.

								*/


								#define TARGET_AMD64 1

								#define TARGET_IX86 2

								#define TARGET_MIPS 3

								#define TARGET_PPC 4

								#define TARGET_SPARC 5

								#define TARGET_IA64 6

								#define TARGET_ARM 7

								#define TARGET_SHX 8


								#ifndef TARGET

								    #if defined(_M_AMD64)               // AMD 64-bit

								        #define TARGET TARGET_AMD64

								    #endif

								    #if defined(_M_IX86) || defined(_x86)  // Intel X86 (e.g., 486, Pentium)

								        #define TARGET TARGET_IX86

								    #endif

								    #if defined(_M_MRX000) || defined(_MIPS_) // MIPS 32-bit systems

								        #define TARGET TARGET_MIPS

								    #endif

								    #if defined(_M_PPC)       // Motorola/Macintosh Power PC

								        #define TARGET TARGET_PPC

								    #endif

								    #if defined(__sparc__)    // Sun SPARC

								        #define TARGET TARGET_SPARC

								    #endif

								    #if defined(_M_IA64)      // Intel IA-64 (e.g., Merced, McKinley)

								        #define TARGET TARGET_IA64

								    #endif

								    #if defined(_ARM_)

								        #define TARGET TARGET_ARM

								    #endif

								    #if defined(_SH3_) || defined(_SH4_)   // Hitachi SH-3 or SH-4

								        #define TARGET TARGET_SHX

								    #endif

								#endif


								#if !defined(TARGET) || TARGET <= 0

								    #error -- "Unknown TARGET"

								#endif


								#define TARGET_NAME ( \

								          TARGET == TARGET_AMD64   ? "AMD64" \

								        : TARGET == TARGET_IX86    ? "Intel x86 (x >= 3) and Pentium" \

								        : TARGET == TARGET_MIPS    ? "MIPS R2000/R3000" \

								        : TARGET == TARGET_PPC     ? "Macintosh/Motorola PowerPC" \

								        : TARGET == TARGET_SPARC   ? "Sun SPARC" \

								        : TARGET == TARGET_IA64    ? "Intel IA-64" \

								        : TARGET == TARGET_ARM     ? "ARM" \

								        : TARGET == TARGET_SHX     ? "Hitachi SHx" \

								        : "Unknown target architecture")


								/*

								        USEASM_AMD64, ... specify whether to use assembly language,

								        if it has been written for a platform.

								*/

								#ifndef USEASM

								    #if TARGET == TARGET_IX86

								        #define USEASM 1

								    #elif TARGET == TARGET_MIPS

								        #define USEASM 1

								    #elif TARGET == TARGET_SHX

								        #define USEASM 1

								    #else

								        #define USEASM 0

								    #endif

								#endif


								#if !defined(USEASM) || (USEASM != 0 && USEASM != 1)

								    #error "USEASM not defined"

								#endif


								#define USEASM_AMD64    (USEASM && TARGET == TARGET_AMD64)

								#define USEASM_IX86     (USEASM && TARGET == TARGET_IX86)

								#define USEASM_MIPS     (USEASM && TARGET == TARGET_MIPS)

								#define USEASM_PPC      (USEASM && TARGET == TARGET_PPC)

								#define USEASM_SPARC    (USEASM && TARGET == TARGET_SPARC)

								#define USEASM_IA64     (USEASM && TARGET == TARGET_IA64)

								#define USEASM_ARM      (USEASM && TARGET == TARGET_ARM)

								#define USEASM_SHX      (USEASM && TARGET == TARGET_SHX)


								#if USEASM_SHX

								void __asm(const char*, ...);    // this declartion needed to allow inline of asm

								#endif


								#if COMPILER == COMPILER_VC

								        /*

								           Visual C recognizes _inline but not inline.

								        */

								    #define inline _inline


								    #pragma intrinsic(abs, labs, memcpy)


								    #if TARGET != TARGET_SHX

								    #pragma intrinsic(memset)

								    #endif


								    #pragma warning(disable: 4146 4514)

								         /* 4146 -- unary minus operator applied

								            to unsigned type, result still unsigned.

								            4514 -- unreferenced inline function

								          */

								#endif


								#if TARGET_OS == OS_WCE

								    #define assert(exp) 0 // or ASSERT(exp)

								                 // No assert.h in Windows CE

								    #define CEstatic static

								                 // Windows CE stack limited to 64K

								                 // CEstatic should be used only in

								                 // test codes and other

								                 // single-threaded, non-recursive. codes.

								#else

								    #define CEstatic

								#endif


								/*

								        x86 assembly routines are declared naked,

								        so they do their own stack management and

								        register saving.


								        When using a DLL on Intel platforms, all functions use

								        the __stdcall convention, so the assembly routines use it too.

								        To ensure they are called with the __stdcall

								        conventions always (i.e., even when compiled under Microsoft

								        Developer Studio), we put __stdcall explicitly in the prototypes.

								*/


								#if USEASM_IX86

								    #define Naked86 __declspec(naked)

								    #define Stdcall86 __stdcall

								#else

								    #define Naked86

								    #define Stdcall86

								#endif


								#if (TARGET == TARGET_AMD64) || (TARGET == TARGET_IA64)

								    #define RADIX_BITS 64

								    #define RADIX_BYTES 8

								    typedef   signed __int64 sdigit_t;

								    typedef unsigned __int64  digit_t;

								#else

								    #define RADIX_BITS 32

								    #define RADIX_BYTES 4

								    typedef   signed __int32 sdigit_t;

								    typedef unsigned __int32 digit_t;

								#endif


								#define MP_LONGEST (MP_LONGEST_BITS/RADIX_BITS)


								#if MP_LONGEST_BITS == RADIX_BITS

								    #define LG2_MP_LONGEST 0

								#elif MP_LONGEST_BITS == 2*RADIX_BITS

								    #define LG2_MP_LONGEST 1

								#elif MP_LONGEST_BITS == 4*RADIX_BITS

								    #define LG2_MP_LONGEST 2

								#elif MP_LONGEST_BITS == 8*RADIX_BITS

								    #define LG2_MP_LONGEST 3

								#elif MP_LONGEST_BITS == 16*RADIX_BITS

								    #define LG2_MP_LONGEST 4

								#elif MP_LONGEST_BITS == 32*RADIX_BITS

								    #define LG2_MP_LONGEST 5

								#elif MP_LONGEST_BITS == 64*RADIX_BITS

								    #define LG2_MP_LONGEST 6

								#elif MP_LONGEST_BITS == 128*RADIX_BITS

								    #define LG2_MP_LONGEST 7

								#elif MP_LONGEST_BITS == 256*RADIX_BITS

								    #define LG2_MP_LONGEST 8

								#else

								    #define LG2_MP_LONGEST 0

								#endif


								#if MP_LONGEST_BITS != RADIX_BITS << LG2_MP_LONGEST

								    #error "Unrecognized value of MP_LONGEST_BITS"

								#endif


								/*

								        The letter 'c' following a type name identifies

								        a const entity of that type.

								*/

								typedef const char charc;

								typedef const  digit_t  digit_tc;

								typedef const sdigit_t sdigit_tc;

								typedef const int intc;


								typedef int BOOL;       /* Same as windef.h */

								#ifndef TRUE

								#define TRUE 1

								#endif

								#ifndef FALSE

								#define FALSE 0

								#endif


								#define DIGIT_ZERO ((digit_t)0)

								#define DIGIT_ONE  ((digit_t)1)

								#define RADIX_HALF (DIGIT_ONE << (RADIX_BITS - 1))

								#define RADIXM1 (-DIGIT_ONE)

								#define F_RADIX ((double)RADIXM1 + 1.0)


								#define HALF_RADIX_BITS (RADIX_BITS/2)

								#if (RADIX_BITS != 2*HALF_RADIX_BITS)

								    #error -- "RADIX_BITS must be even"

								#endif

								#define RADIX_HALFMASK_BOTTOM (RADIXM1 >> HALF_RADIX_BITS)


								//          Multiple-precision data is normally represented

								//      in radix 2^RADIX_BITS, with RADIX_BITS bits per word.

								//      Here ``word'' means type digit_t.  RADIX_BITS

								//      is 32 on some architectures (Intel, MIPS, PowerPC)

								//      and 64 bits on other architectures (Alpha).


								//          Within Windows NT, the data type DWORD predominates.

								//      DWORD is a 32-bit unsigned datatype on all platforms

								//      (Intel, Alpha, MIPS, PowerPC).  DWORD data can safely be

								//      written to disk on one architecture and read back on another,

								//      unlike digit_t.


								//      [CAUTION -- Even DWORD is not safe when sending data to

								//      big-endian architectures, such as Office products for the Macintosh.]


								typedef unsigned char BYTE;

								typedef unsigned long DWORD;

								typedef const DWORD DWORDC;


								#define DWORD_BITS 32

								#define DWORD_LEFT_BIT 0x80000000UL


								#if RADIX_BITS % DWORD_BITS != 0

								    #error "RADIX_BITS not a multiple of 32"

								#endif


								#define DWORDS_PER_DIGIT (RADIX_BITS/DWORD_BITS)


								//      DWORDS_TO_DIGITS(lng_dwords) computes the number of digit_t

								//      elements required to store an array with -lng_dwords- DWORDs.

								//      DIGITS_TO_DWORDS converts in the opposite direction.


								#define DWORDS_TO_DIGITS(lng_dwords) \

								                ( ((lng_dwords) + DWORDS_PER_DIGIT - 1)/DWORDS_PER_DIGIT)


								#define DIGITS_TO_DWORDS(lng_digits) ((lng_digits) * DWORDS_PER_DIGIT)


								#define BITS_TO_DIGITS(nb) (((nb) + RADIX_BITS - 1)/RADIX_BITS)


								/*

								        DOUBLE_SHIFT_LEFT(n1, n0, amt) returns

								        n1 shifted left by amt bits,

								        with new bits coming in from the top of n0.


								        DOUBLE_SHIFT_RIGHT(n1, n0, amt) returns n0 shifted right

								        by amt bits, with new bits coming from the bottom of n1.


								        The shift counts must satisfy 0 <= amt <= RADIX_BITS - 1.

								        The shift by    RADIX_BITS - amt   is done in two stages

								        (first by 1, then by RADIX_BITS - 1 - amt),

								        to avoid an illegal shift count of RADIX_BITS if amt = 0.


								        DOUBLE_SHIFT_LEFT_NONZERO and DOUBLE_SHIFT_RIGHT_NONZERO

								        are similar, but disallow a zero shift count, allowing the

								        RADIX_BITS - amt shift to be done in one stage,

								        DOUBLE_SHIFT_LEFT_NONZERO(n1, n0, amt) is the same as

								        DOUBLE_SHIFT_RIGHT_NONZERO(n1, n0, RADIX_BITS - amt).


								        TBD -- If the x86 VC compiler optimizes __int64 shifts,

								        (6.0 SP3 does not), try to rewrite these definitions to generate

								        SHLD and SHRD instructions..

								*/


								#define DOUBLE_SHIFT_LEFT(n1, n0, amt)  \

								        (((n1) << (amt)) | (((n0) >> 1) >> (RADIX_BITS - 1 - (amt))))


								#define DOUBLE_SHIFT_LEFT_NONZERO(n1, n0, amt)  \

								        (((n1) << (amt)) | ((n0) >> (RADIX_BITS - (amt))))


								#define DOUBLE_SHIFT_RIGHT(n1, n0, amt)  \

								        (((n0) >> (amt)) | (((n1) << 1) << (RADIX_BITS - 1 - (amt))))


								#define DOUBLE_SHIFT_RIGHT_NONZERO(n1, n0, amt)  \

								        (((n0) >> (amt)) | ((n1) << (RADIX_BITS - (amt))))


								#include "dblint.h"


								#define digit_getbit(iword, ibit) (((iword) >> (ibit)) & 1)

								#define dword_getbit(iword, ibit) digit_getbit(iword, ibit)

								                        /* Extract bit from a word.

								                        // 0 <= ibit <= RADIX_BITS - 1.

								                        // Rightmost (i.e., least significant) bit is bit 0.

								                        */


								/*

								        Test whether a number is odd or even.

								*/

								#define IS_EVEN(n) (~(n) & 1)

								#define IS_ODD(n) ((n) & 1)


								/*

								        Maximum and minimum of two arguments

								        (no side effects in arguments)

								*/


								#if 0

								    #define MAX _max

								    #define MIN _min

								#else

								    #define MAX(x, y) ((x) > (y) ? (x) : (y))

								    #define MIN(x, y) ((x) > (y) ? (y) : (x))

								#endif


								#if 0

								/*

								        If we are building a DLL, use __declspec before certain variable

								        declarations (and out procedure names in a .def file).

								        _PM_DLL should be #defined when compiling bignum but not the application.


								        If we are building a static library, use normal C declarations.

								*/

								    #ifdef _PM_DLL

								        #define exportable_var __declspec( dllexport )

								        #define exportable_var_declaration __declspec (dllexport)

								    #else

								        #define exportable_var __declspec( dllimport )

								    #endif

								#else

								    #define exportable_var extern

								    #define exportable_var_declaration

								#endif

								#


								/*

								        Macro to return 3^i (exponentiation), for 0 <= i <= 15.

								        Intended for use with constant argument, such as

								        in array dimensions.  The POWER3 array should

								        be used if the argument is variable.

								*/


								#define POWER3CON(i) (   ((i) & 1 ?  3 : 1)  *  ((i) & 2 ?    9 : 1) \

								                       * ((i) & 4 ? 81 : 1)  *  ((i) & 8 ? 6561 : 1) )


								exportable_var DWORDC POWER3[16];   /* See mpglobals.c */

								/*

								        kara.c repeatedly replaces an operand by three

								        half-length operands and a sign.  The sign has

								        type kara_sign_t.  The operands are partitioned

								        in half until their size at most VMUL_MAX_LNG_SINGLE,

								        and sometimes further (see padinfo_initialization in kara.c)

								        This may require up to KARA_MAX_HALVINGS halvings,

								        giving 3^KARA_MAX_HALVINGS outputs each with size

								        as large as VMUL_MAX_SINGLE words.  The signs

								        array has length (3^KARA_MAX_HALVINGS - 1)/2.

								*/

								#if TARGET == TARGET_ALPHA

								    typedef int kara_sign_t;

								                        /* Try to avoid char data on Alpha */

								#else

								    typedef unsigned char kara_sign_t;

								                        /* Values SIGN_PLUS, SIGN_MINUS.  See kara.c. */

								#endif


								typedef const kara_sign_t kara_sign_tc;

								#define VMUL_MAX_LNG_SINGLE 12

								#define KARA_MAX_HALVINGS (LG2_MP_LONGEST - 2)

								#if KARA_MAX_HALVINGS > 15

								    #error -- "Extend POWER3CON macro"

								#endif

								#define KARA_MAX_LNG_DIFS ((MP_LONGEST >> KARA_MAX_HALVINGS) * POWER3CON(KARA_MAX_HALVINGS))

								#define KARA_MAX_LNG_SIGNS ((POWER3CON(KARA_MAX_HALVINGS) - 1)/2)

								#define MEMORY_BANK_ALLOWANCE 1


								typedef struct  {

								                  digit_t     difs[KARA_MAX_LNG_DIFS + MEMORY_BANK_ALLOWANCE];

								                  kara_sign_t signs[KARA_MAX_LNG_SIGNS];

								                } kara_longest_t;   /* For MP_LONGEST or less */

								                       /* On the Pentium P5 and P6,

								                          the two arguments to vmulnn

								                          should lie in different memory banks

								                          (i.e., different addresses mod 32 bytes).

								                          We make the .difs arrays one digit_t entry

								                          larger than essential, in an attempt to reduce

								                          data cache conflicts.  Look for the

								                          MEMORY_BANK_ALLOWANCE symbol in the source code.

								                        */


								#define kara_longest_NULL ((kara_longest_t*)0)

								typedef struct  {

								                  digit_t     difs[KARA_MAX_LNG_DIFS/3 + MEMORY_BANK_ALLOWANCE];

								                  kara_sign_t signs[KARA_MAX_LNG_SIGNS/3];

								                } kara_half_longest_t;  /* For MP_LONGEST/2 or less */


								typedef const kara_half_longest_t kara_half_longest_tc;

								typedef const kara_longest_t      kara_longest_tc;


								typedef struct {      /* Constants relating to padding lengths. */

								                  DWORD  length;

								                                /* length = length3[0] * 2^nhalving */

								                  DWORD  nhalving;

								                  DWORD  length3[KARA_MAX_HALVINGS+1];

								                                /* length3[0] is 1, 2, 3, or 4 */

								                                /* length3[i] is length3[0] * 3^i */

								               } padinfo_t;


								typedef const padinfo_t padinfo_tc;

								#define padinfo_NULL ((padinfo_t*)0)


								/*

								        The reciprocal_1_t type is used when div21

								        or divide or divide_immediate would otherwise

								        divide by the same number repeatedly.  See file divide.c.

								*/


								typedef struct {

								                digit_t multiplier;

								                DWORD   shiftamt;

								               } reciprocal_1_t;


								typedef const reciprocal_1_t reciprocal_1_tc;


								/*

								        mp_modulus_t struct has modulus-dependent constants

								        used for fast reduction (typically for a fixed modulus,

								        which will be used several times, as in modular exponentiation).

								        These constants are initialized by function create_modulus:


								        modulus -- Modulus used for computations.  Must be nonzero.


								        length  -- Length of the modulus, without leading zeros.

								                   Operands to mod_add, mod_mul, mod_sub, ...

								                   are assumed to have this length.


								        padinfo -- Pointer to a padinfo_t struct.  For fast arithmetic,

								                   operands are padded to a length

								                   length_padded >= length (see find_padinfo in kara.c).

								                   The value of length_padded is stored in padinfo->length.

								                   The present implementation requires length_padded be either

								                   a power of 2, or 3 times a power of 2.

								                   For example, if length = 19, then length_padded = 24,

								                   and the operands are treated as 24-word

								                   operands for Karatsuba.


								        half_padinfo -- Pointer to a padinfo_t struct for length

								                   CEIL(length/2).  Used in modular_reduce to

								                   use Karatsuba multiplication on half-length operands.

								                   We denote half_length_padded = half_padinfo->length.


								        reddir  -- Equal to FROM_LEFT if reductions of

								                   products are done from the left (traditional

								                   division), and to FROM_RIGHT if reductions of

								                   products are done from the right (Montgomery reduction).


								                   When using FROM_RIGHT, the modulus must be odd.

								                   Arguments to mod_mul should be pre-scaled by

								                   RADIX^scaling_power (mod modulus).

								                   The product will be similarly scaled.


								        scaling_power --  Equal to 2*half_length_padded when

								                   reddir = FROM_RIGHT.  Undefined

								                   if reddir = FROM_LEFT.


								        one --     Constant 1 (length length), scaled if reddir = FROM_RIGHT.

								                   When reddir = FROM_RIGHT, this is

								                   RADIX^scaling_power (mod modulus).


								        left_multiplier_first -- The first multiplier when reducing from the

								                   left.  Length length.


								-RADIX^(length + half_length_padded)/2^(left_reciprocal_1.shiftamt) mod modulus


								        left_reciprocal_1 -- Reciprocal of the divisor starting at the

								                   leftmost digit (i.e., modulus[length-1]);


								        right_reciprocal_1 -- If modulus is odd, this holds

								                   1/modulus (mod RADIX), for use in mod_shift.

								                   Otherwise the field is zero.


								        right_multiplier_second -- If reddir = FROM_RIGHT,

								                   then this has 1/modulus mod RADIX^(half_length_padded).


								        right_multiplier_first --   -1/RADIX^half_length_padded mod modulus.

								                   Equal to


								        left_multiplier_second -- Contains the half_length_padded*RADIX_BITS


								            (modulus * right_multiplier_second - 1)/RADIX^half_length_padded.

								                   most significant bits of (high power of 2)/modulus

								                   (excluding the leading -1-).  More precisely, this has


								        RADIX^(length + half_length_padded) - 1

								 FLOOR( --------------------------------------- ) - RADIX^(half_length_padded)

								        modulus * 2^(left_reciprocal_1.shiftamt)


								                   See file divide.c for an explanation

								                   about how this constant is used to get accurate

								                   quotients when dividing from the left.


								        left_multiplier_second_over2 -- Left_multiplier_second/2.

								*/


								typedef enum {FROM_LEFT, FROM_RIGHT} reddir_t;

								typedef const reddir_t reddir_tc;


								typedef struct {

								                  digit_t   modulus[MP_LONGEST];

								                  DWORD     length;      /* Length passed to create_modulus */

								                  DWORD     scaling_power; /* 2*half_padinfo->length */

								                  padinfo_tc *padinfo;   /* Pointer to struct containing

								                                            padded length and related info */

								                  padinfo_tc *half_padinfo;

								                                         /* Padinfo info for CEIL(length/2) */

								                  reddir_t  reddir;      /* FROM_LEFT or FROM_RIGHT */

								                  reciprocal_1_t  left_reciprocal_1;

								                  digit_t  right_reciprocal_1;

								                                        /* 1/modulus[0] mod RADIX,

								                                           if modulus is odd */


								                  kara_half_longest_t modulus_kara2[2];

								                                        /*

								                                            Copy of modulus.


								                                            Lower half_length_padded

								                                            and upper

								                                            length - half_length_padded

								                                            words separately passed

								                                            to to_kara.

								                                         */

								                  kara_half_longest_t left_multiplier_first_kara2[2];

								                                /* Remainder when dividing

								                                     -RADIX^(length + half_length_padded)

								                                   / 2^(left_reciprocal_1.shiftamt)

								                                   by modulus.


								                                   Lower and upper halvves separately

								                                   passed to to_kara.

								                                */


								                  kara_half_longest_t left_multiplier_second_kara;

								                                /* half_length_padded*RADIX_BITS

								                                   most significant bits of (left)

								                                   reciprocal of modulus,

								                                   excluding the leading -1-. */


								                  digit_t  left_multiplier_second_over2[MP_LONGEST/2];

								                                /* left_multiplier_second/2 */

								                  kara_half_longest_t right_multiplier_first_kara2[2];

								                                        /*  -1/RADIX^half_length_padded

								                                            mod modulus.

								                                        */

								                  digit_t             right_multiplier_second[MP_LONGEST/2];

								                  kara_half_longest_t right_multiplier_second_kara;

								                              /* 1/modulus mod RADIX^(half_length_padded) */

								                  digit_t  cofactor[MP_LONGEST];

								                  DWORD    lng_cofactor;

								                             /*

								                                In factorization programs, this

								                                holds the cofactor after dividing

								                                modulus by any factors found.

								                                Used by gcdex_jacobi.

								                             */

								                  digit_t  one[MP_LONGEST];

								                } mp_modulus_t;


								typedef const mp_modulus_t mp_modulus_tc;

								/*

								       The modular multiplication code and its

								       relatives (e.g., modular_reduce, to_kara)

								       need large amounts of temporary space

								       during processing.  All big temporaries

								       are gathered into a modmultemp_t struct.

								       Users of these routines can allocate the

								       storage themselves, and pass a pointer

								       to the temporary storage (fastest), or can pass

								       a null pointer (modmultemp_NULL).


								*/

								typedef struct {

								                   // mmul fields are for mod_mul,

								                   // mod_mul_kara, mod_mul_kara1


								        digit_t             mmul_adifs[KARA_MAX_LNG_DIFS];

								        kara_sign_t         mmul_asigns[KARA_MAX_LNG_SIGNS];

								        digit_t             mmul_bdifs[KARA_MAX_LNG_DIFS

								                                       + MEMORY_BANK_ALLOWANCE];

								        kara_sign_t         mmul_bsigns[KARA_MAX_LNG_SIGNS];


								                   // mr_ fields are for modular_reduce.

								                   // The input to modular_reduce can be stored

								                   // in mr_dividend -- this will save a mp_copy call.


								        digit_t             mr_dividend[MAX(2*MP_LONGEST,

								                                            2*KARA_MAX_LNG_DIFS+1)];


								        digit_t             mr_prd1[2*MP_LONGEST];

								        digit_t             mr_prd2[2*MP_LONGEST];

								        digit_t             mr_mptemp[2*MP_LONGEST];


								                   // htk_ fields are for half_times_kara

								                   // and half_times_kara2


								        digit_t             htk_abprd[2][2*KARA_MAX_LNG_DIFS/3];

								        kara_half_longest_t htk_ak;

								        } modmultemp_t;


								/*

								        mod_exp2000 returns statistics on what happened during the

								        exponentiation.

								*/


								typedef struct {              // Statistics from mod_exp2000

								                              // This struct may grow in future versions.

								        DWORD cnt_mod_mul_kara;      // Calls to mod_mul_kara

								        DWORD cnt_mp_copy;           // Calls to mp_copy

								        DWORD cnt_to_kara;           // Calls to to_kara

								    } mod_exp_stats_t;


								/*

								        When an error is detected, variable mp_errno is set

								        to the error number and execution continues.

								        If the library was compiled with #define PRINT_ERROR_MESSAGES,

								        then a message is written to file mp_errfil.


								        The application program should occasionally check mp_errno.


								        Except for MP_ERRNO_NO_ERROR, the error numbers are

								        in alphabetical order by name.  The routine issuing

								        each error number is part of the name.

								*/


								typedef enum {

								        MP_ERRNO_NO_ERROR = 0,

								        MP_ERRNO_CREATE_MODULUS_LEADING_ZERO,

								        MP_ERRNO_CREATE_MODULUS_MONTGOMERY_EVEN,

								        MP_ERRNO_CREATE_MODULUS_TOO_LONG,

								        MP_ERRNO_DIGIT_JACOBI_EVEN_DENOMINATOR,

								        MP_ERRNO_DIGIT_MOD_DIVIDE_ODD_EVEN_MODULUS,

								        MP_ERRNO_DIGIT_MOD_DIVIDE_ODD_NONTRIVIAL_GCD,

								        MP_ERRNO_DIGIT_MOD_DIVIDE_ODD_ZERO_DENOMINATOR,

								        MP_ERRNO_DIGIT_NEXT_PRIME_TOO_HIGH,

								        MP_ERRNO_DIV21_INVALID_ARGUMENT,

								        MP_ERRNO_DIVIDE_ESTIMATION_ERROR,

								        MP_ERRNO_DIVIDE_INVALID_LENGTHS,

								        MP_ERRNO_DIVIDE_LEADING_ZERO,

								        MP_ERRNO_DSA_KEY_GENERATION_INVALID_SIZES,

								        MP_ERRNO_DSA_PRECOMPUTE_BAD_G,

								        MP_ERRNO_DSA_PRECOMPUTE_INVALID_KEY,

								        MP_ERRNO_DSA_PRECOMPUTE_PQ_NONPRIME,

								        MP_ERRNO_DSA_PRECOMPUTE_WRONG_SC,

								        MP_ERRNO_DSA_SIGNATURE_VERIFICATION_NONTRIVIAL_GCD,

								        MP_ERRNO_FIND_BIG_PRIME_BAD_CONGRUENCE_CLASS,

								        MP_ERRNO_FIND_BIG_PRIME_CONG_MOD_TOO_LARGE,

								        MP_ERRNO_FIND_BIG_PRIME_CONG_TO_TOO_LARGE,

								        MP_ERRNO_GCDEX_JACOBI_EVEN_MODULUS,

								        MP_ERRNO_KP_TOO_SHORT,

								        MP_ERRNO_KPDIV_ZERO_DENOMINATOR,

								        MP_ERRNO_MOD_ADD_CARRY_NONZERO,

								        MP_ERRNO_MOD_SHIFT_LEFT_CARRY_NONZERO,

								        MP_ERRNO_MOD_SHIFT_RIGHT_CARRY_NONZERO,

								        MP_ERRNO_MOD_SHIFT_RIGHT_EVEN,

								        MP_ERRNO_MOD_SUB_BORROW_NONZERO,

								        MP_ERRNO_MODULAR_REDUCE_BOTTOM_BITS_DIFFERENT,

								        MP_ERRNO_MODULAR_REDUCE_TOO_LONG,

								        MP_ERRNO_MODULAR_REDUCE_UNEXPECTED_CARRY,

								        MP_ERRNO_MP_DECIMAL_INPUT_NONDIGIT,

								        MP_ERRNO_MP_DECIMAL_INPUT_OVERFLOW,

								        MP_ERRNO_MP_GCD_INTERMEDIATE_EVEN,

								        MP_ERRNO_MP_GCD_TOO_LONG,

								        MP_ERRNO_MP_GCDEX_INTERNAL_ERROR,

								        MP_ERRNO_MP_GCDEX_NONZERO_REMAINDER,

								        MP_ERRNO_MP_GCDEX_ZERO_OPERAND,

								        MP_ERRNO_MP_SHIFT_INVALID_SHIFT_COUNT,

								        MP_ERRNO_MP_TRAILING_ZERO_COUNT_ZERO_ARG,

								        MP_ERRNO_MULTIPLY_LOW_INVALID_LENGTH,

								        MP_ERRNO_NO_MEMORY,      // From mp_alloc_temp

								        MP_ERRNO_PADINFO_INITIALIZATION_BAD_CUTOFF,

								        MP_ERRNO_RANDOM_DIGIT_INTERVAL_INVALID_PARAMETERS,

								        MP_ERRNO_RANDOM_MOD_INVALID_PARAMETERS,

								        MP_ERRNO_RANDOM_MOD_INVERSE_NOT_PRIME,

								        MP_ERRNO_RANDOM_MOD_NONZERO_INVALID_PARAMETERS,

								        MP_ERRNO_SELECT_A0B0_BAD_COFACTOR,

								        MP_ERRNO_SELECT_A0B0_BAD_MU,

								        MP_ERRNO_SELECT_A0B0_NON_CONSTANT_QUOTIENT,

								        MP_ERRNO_SELECT_A0B0_NONZERO_REMAINDER,

								        MP_ERRNO_SELECT_CURVE_BAD_FIELD_TYPE,

								        MP_ERRNO_SELECT_D_UNSUCCESSFUL,

								        MP_ERRNO_TO_KARA_INVALID_LENGTH,

								        MP_ERRNO_TO_KARA2_INVALID_LENGTH,

								        MP_ERRNO_COUNT      // Number of entries above

								    } mp_errno_t;


								exportable_var mp_errno_t mp_errno;


								#if defined(WIN32)

								#define SetMpErrno(x) SetLastError((DWORD)(mp_errno = (x)))

								#define GetMpErrno()  ((mp_errno_t)GetLastError())

								#else

								#define SetMpErrno(x) mp_errno = (x)

								#define GetMpErrno()  mp_errno

								#endif


								#define inadequate_memory (GetMpErrno() == MP_ERRNO_NO_MEMORY)

								extern const char* mp_errno_name(const mp_errno_t);

								       // Update table in mperrnam.c when adding new error message


								/*

								        Some routine allow an argument of digit_NULL or

								        reciprocal_1_NULL when the corresponding argument

								        is not otherwise used.  For example, the division

								        routine allows but does not require a

								        reciprocal structure as argument,

								        and allows the quotient to be suppressed.

								*/


								#define digit_NULL ((digit_t*)0)

								#define reciprocal_1_NULL ((reciprocal_1_t*)0)

								#define modmultemp_NULL ((modmultemp_t*)0)


								/*

								        The next several #defines are used in function prototypes.

								*/


								#define MP_INPUT      digit_tc[]

								#define MP_OUTPUT     digit_t[]

								#define MP_MODIFIED   digit_t[]

								#define DIFS_INPUT    MP_INPUT

								#define DIFS_OUTPUT   MP_OUTPUT

								#define DIFS_MODIFIED MP_MODIFIED

								#define SIGNS_INPUT   kara_sign_tc[]

								#define SIGNS_MODIFIED kara_sign_t[]

								#define SIGNS_OUTPUT  kara_sign_t[]


								extern digit_t accumulate(MP_INPUT, digit_tc, MP_MODIFIED, DWORDC);


								extern digit_t Stdcall86 add_diff(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);


								extern DWORD add_full(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);


								extern digit_t Stdcall86 add_same(MP_INPUT, MP_INPUT, MP_OUTPUT, DWORDC);


								extern DWORD add_signed(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);


								extern int compare_diff(MP_INPUT, DWORDC, MP_INPUT, DWORDC);


								extern int compare_sum_diff(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_INPUT, DWORDC);


								BOOL create_modulus(MP_INPUT, DWORDC, reddir_tc, mp_modulus_t*);


								extern dblint_t dblint_gcd(dblint_tc, dblint_tc);


								extern dblint_t dblint_ogcd(dblint_tc, dblint_tc);


								extern digit_t dblint_sqrt(dblint_tc);


								extern digit_t decumulate(MP_INPUT, digit_tc, MP_MODIFIED, DWORDC);


								extern DWORD digit_factor(digit_tc, digit_t[], DWORD[]);


								extern digit_t digit_gcd(digit_tc, digit_tc);


								extern int digit_jacobi(digit_tc, digit_tc);


								extern digit_t digit_least_prime_divisor(digit_tc);


								extern digit_t digit_mod_divide_odd(digit_tc, digit_tc, digit_tc);


								extern digit_t digit_ogcd(digit_tc, digit_tc);


								extern char* digit_out(digit_tc);


								extern digit_t digit_sqrt(digit_tc);


								/*

								       digit2_aligned(array) checks that _array_ is

								       aligned on a 2*sizeof(digit_t) boundary.


								       Assembly code versions of the software sometimes load

								       or store two digit_t values with one instruction.

								       Specifically, MMX code on X86 can load or store two 32-bit

								       digit_t values with one 64-bit MOVQ instruction.

								       IA-64 and AMD64 code can load two 64-bit values to the floating

								       point registers with a load pair instruction.


								       The digit2_aligned macro checks whether its operand is

								       appropriately aligned.  The required alignment is never

								       worse than that returned by mp_alloc_temp.


								*/

								#if TARGET == TARGET_IX86 || TARGET == TARGET_IA64 || TARGET == TARGET_AMD64

								#define digit2_aligned(array) (((DWORD)(array) & (2*sizeof(digit_t) - 1)) == 0)

								#else

								#define digit2_aligned(array) (TRUE)

								#endif


								extern void div21(dblint_tc, digit_tc, digit_t*, digit_t*);


								extern void div21_fast(dblint_tc, digit_tc,

								                       reciprocal_1_tc*, digit_t*, digit_t*);


								extern DWORD divide(MP_INPUT, DWORDC, MP_INPUT, DWORDC,

								                       reciprocal_1_tc*, MP_OUTPUT, MP_OUTPUT);


								extern DWORD divide_rounded(MP_INPUT, DWORDC, MP_INPUT, DWORDC,

								                       reciprocal_1_tc*, MP_OUTPUT, MP_OUTPUT);


								extern void divide_precondition_1(MP_INPUT, DWORDC, reciprocal_1_t*);


								extern digit_t divide_immediate(MP_INPUT, digit_tc,

								                         reciprocal_1_tc*, MP_OUTPUT, DWORDC);


								extern digit_t estimated_quotient_1(digit_tc, digit_tc,

								                                    digit_tc, reciprocal_1_tc*);


								extern BOOL find_big_prime(DWORDC, MP_INPUT, DWORDC,

								                           MP_INPUT, DWORDC, MP_OUTPUT);


								extern padinfo_tc *find_padinfo(DWORDC);


								DWORD from_modular(MP_INPUT, MP_OUTPUT, mp_modulus_tc*);


								extern int gcdex_jacobi(MP_INPUT, mp_modulus_tc*, MP_OUTPUT, MP_OUTPUT);


								extern void mod_add(MP_INPUT, MP_INPUT, MP_OUTPUT, mp_modulus_tc*);


								extern DWORD mod_exp(MP_INPUT, MP_INPUT, DWORDC, MP_OUTPUT,

								                        mp_modulus_tc*);


								extern BOOL mod_exp2000(MP_INPUT, MP_INPUT, DWORDC, MP_OUTPUT,

								                        mp_modulus_tc*, mod_exp_stats_t*);


								extern DWORD mod_exp_immediate(MP_INPUT, digit_tc, MP_OUTPUT,

								                                  mp_modulus_tc*);


								extern int mod_jacobi_immediate(const signed long, mp_modulus_tc*);


								extern void mod_Lucas(MP_INPUT, MP_INPUT, DWORDC, MP_OUTPUT,

								                      mp_modulus_tc*);


								extern void mod_LucasUV(MP_INPUT, MP_INPUT, MP_INPUT, DWORDC,

								                        MP_OUTPUT, MP_OUTPUT, mp_modulus_tc*);


								extern void mod_mul(MP_INPUT, MP_INPUT, MP_OUTPUT,

								                    mp_modulus_tc*, modmultemp_t*);


								extern void mod_mul_immediate(MP_INPUT, digit_tc,

								                              MP_OUTPUT, mp_modulus_tc*);


								extern void mod_mul_kara1(MP_INPUT, DIFS_INPUT, SIGNS_INPUT,

								                          MP_OUTPUT, mp_modulus_tc*, modmultemp_t*);


								extern void mod_mul_kara(DIFS_INPUT, SIGNS_INPUT,

								                         DIFS_INPUT, SIGNS_INPUT,

								                         MP_OUTPUT, mp_modulus_tc*, modmultemp_t*);


								extern void mod_negate(MP_INPUT, MP_OUTPUT, mp_modulus_tc*);


								extern void mod_shift(MP_INPUT, intc, MP_OUTPUT, mp_modulus_tc*);


								extern BOOL mod_sqrt(MP_INPUT, MP_OUTPUT, mp_modulus_tc*);


								extern void mod_sub(MP_INPUT, MP_INPUT, MP_OUTPUT, mp_modulus_tc*);


								extern BOOL modular_reduce(MP_INPUT, DWORDC, reddir_tc,

								                           MP_OUTPUT, mp_modulus_tc*, modmultemp_t*);


								extern void* mp_alloc_temp(DWORDC);

								#define Allocate_Temporaries(typename, ptr) \

								               ptr = (typename*)mp_alloc_temp(sizeof(typename))


								#define Allocate_Temporaries_Multiple(nelmt, typename, ptr) \

								               ptr = (typename*)mp_alloc_temp((nelmt)*sizeof(typename))


								#if USEASM_ALPHA || USEASM_MIPS

								extern void mp_copy(MP_INPUT, MP_OUTPUT, DWORDC);

								#else

								#define mp_copy(src, dest, lng) \

								            memcpy((void *)(dest), (const void *)(src), (lng)*sizeof(digit_t))

								#endif


								extern char* mp_decimal(MP_INPUT, DWORDC);


								extern long mp_decimal_input(charc*, MP_OUTPUT, DWORDC, charc**);


								extern char* mp_dword_decimal(DWORDC*, DWORDC);


								extern int mp_format(MP_MODIFIED, DWORDC,

								                     digit_tc, charc*, char*, DWORDC);


								extern void mp_free_temp(void*);

								#define Free_Temporaries(ptr)    mp_free_temp((void*)ptr)


								extern DWORD mp_gcd(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);


								extern DWORD mp_gcdex(MP_INPUT, DWORDC, MP_INPUT, DWORDC,

								                      MP_OUTPUT, MP_OUTPUT, MP_OUTPUT, MP_OUTPUT);


								extern void mp_initialization(void);


								extern void mp_longshift(MP_INPUT, intc, MP_OUTPUT, DWORDC);


								extern void Stdcall86 mp_mul22s(digit_tc[4], MP_MODIFIED, MP_MODIFIED, DWORDC, sdigit_t[2]);


								extern void Stdcall86 mp_mul22u(digit_tc[4], MP_MODIFIED, MP_MODIFIED, DWORDC,  digit_t[2]);


								extern DWORD mp_remove2(MP_MODIFIED, DWORDC);


								extern digit_t mp_shift(MP_INPUT, intc, MP_OUTPUT, DWORDC);


								extern DWORD mp_significant_bit_count(MP_INPUT, DWORDC);


								extern BOOL mp_sqrt(MP_INPUT, MP_OUTPUT, DWORDC);


								extern DWORD mp_trailing_zero_count(MP_INPUT, DWORDC);


								extern void mul_kara(DIFS_INPUT, SIGNS_INPUT,

								                     DIFS_INPUT, SIGNS_INPUT,

								                     MP_OUTPUT,  padinfo_tc*);


								extern void mul_kara_know_low(DIFS_INPUT, SIGNS_INPUT,

								                              DIFS_INPUT, SIGNS_INPUT,

								                              MP_INPUT, MP_OUTPUT,

								                              padinfo_tc*);


								extern void mul_kara_squaring(MP_INPUT, DWORDC,

								                              DIFS_MODIFIED, SIGNS_MODIFIED,

								                              MP_OUTPUT, padinfo_tc*,

								                              modmultemp_t*);


								extern void multiply(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);


								extern digit_t multiply_immediate(MP_INPUT, digit_tc, MP_OUTPUT, DWORDC);


								extern void Stdcall86 multiply_low(MP_INPUT, MP_INPUT, MP_OUTPUT, DWORDC);


								extern DWORD multiply_signed(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);


								extern DWORD multiply_signed_immediate(MP_INPUT, DWORDC,

								                                       signed long, MP_OUTPUT);

								#if TARGET_OS == OS_WCE

								    #define PRIME_SIEVE_LENGTH 300

								#else

								    #define PRIME_SIEVE_LENGTH 3000

								#endif

								          // Must be multiple of 3

								#if PRIME_SIEVE_LENGTH % 3 != 0

								    #error "PRIME_SIEVE_LENGTH must be a multiple of 3"

								#endif


								extern digit_t next_prime(

								    digit_tc pstart,

								    digit_t *lpsievbeg,

								    digit_t sieve[PRIME_SIEVE_LENGTH],

								    digit_t *lpmax_sieved_squared

								    );


								extern void padinfo_initialization(DWORDC);


								extern BOOL probable_prime(MP_INPUT, DWORDC, MP_INPUT, DWORDC, DWORDC);


								extern BOOL remove_small_primes(MP_INPUT, DWORDC, digit_tc,

								                                digit_t[], DWORD[], DWORD*,

								                                MP_OUTPUT, DWORD*);


								#if USEASM_IX86

								    #define SIGNIFICANT_BIT_COUNT_DEFINED 1

								    #define UNIFORM_SIGNIFICANT_BIT_COUNT 1

								    #pragma warning(disable : 4035)      /* No return value */

								    static inline DWORD significant_bit_count(digit_tc pattern)

								    {

								    _asm {

								            mov  eax,pattern        ; Nonzero pattern

								            bsr  eax,eax            ; eax = index of leftmost nonzero bit

								                                         ; BSR is slow on Pentium

								                                         ; but fast on Pentium Pro

								            inc  eax                ; Add one to get significant bit count

								         }

								    }

								    #pragma warning(default : 4035)

								#elif USEASM_ALPHA

								    #define SIGNIFICANT_BIT_COUNT_DEFINED 1

								    #define UNIFORM_SIGNIFICANT_BIT_COUNT 1

								    extern const BYTE half_byte_significant_bit_count[128];  /* See mpmisc.c */

								            /*

								                The Alpha code uses the CMPBGE instruction to

								                identify which bytes are nonzero.  The most significant

								                bit must occur within the leftmost nonzero byte.

								                We use the CMPBGE output to identify which byte that is.

								                After we extract that byte, we identify its most significant bit.

								            */

								    static inline DWORD significant_bit_count(digit_tc pattern)

								    {

								        DWORDC zero_byte_pattern = __asm("cmpbge  zero, %0, v0", pattern);


								        DWORDC byte_offset_plus_1

								                = 8*half_byte_significant_bit_count[127 - (zero_byte_pattern >> 1)] + 1;


								        return byte_offset_plus_1

								                + half_byte_significant_bit_count[pattern >> byte_offset_plus_1];

								    }

								#else

								    #define SIGNIFICANT_BIT_COUNT_DEFINED 0

								    #define UNIFORM_SIGNIFICANT_BIT_COUNT 0

								           /* Algorithm faster for larger inputs.  See mpmisc.c */

								    extern DWORD significant_bit_count(digit_tc);

								#endif


								extern digit_t Stdcall86 sub_diff(MP_INPUT, DWORDC, MP_INPUT, DWORDC, MP_OUTPUT);


								extern digit_t Stdcall86 sub_same(MP_INPUT, MP_INPUT, MP_OUTPUT, DWORDC);


								#define sub_signed(a, lnga, b, lngb, c) add_signed(a, lnga, b, -(lngb), c)


								extern BOOL test_primality(MP_INPUT, DWORDC);


								extern BOOL test_primality_check_low(MP_INPUT, DWORDC);


								extern BOOL get_prime(MP_OUTPUT, DWORDC);


								extern BOOL get_generator(DWORD*, DWORD*, DWORDC);


								extern void to_kara(MP_INPUT, DWORDC, DIFS_OUTPUT, SIGNS_OUTPUT,

								                    padinfo_tc*);


								extern BOOL to_modular(MP_INPUT, DWORDC, MP_OUTPUT, mp_modulus_tc*);


								//     The following functions are indexed indirectly via pointers.

								//     Also see GF2_get_funcs in field.h.


								#if TARGET == TARGET_IX86

								exportable_var BOOL  MMX_available;   /* See mpglobal.c */

								#endif


								typedef void Stdcall86 vmul_t(DIFS_INPUT, DIFS_INPUT, DIFS_OUTPUT, DWORDC);


								exportable_var vmul_t *vmulnn[VMUL_MAX_LNG_SINGLE];

								                /* Addresses for 1 x 1 to 12 x 12 products */

								                /* Defined at end of vmul.c */


								#if PRINT_ERROR_MESSAGES

								    extern void mp_display(FILE*, charc*, MP_INPUT, DWORDC);

								    exportable_var FILE* mp_errfil;       /* Set to stdout in mp_global.c */

								    extern void mp_print_allocation_statistics(FILE*);

								#endif /* PRINT_ERROR_MESSAGES */


								/****************************************************************************/

								static inline digit_t add_immediate(digit_tc  a[],

								                                    digit_tc iadd,

								                                    digit_t   b[],

								                                    DWORDC    lng)

								/*

								        Compute b = a + iadd, where iadd has length 1.

								        Both a and b have length lng.

								        Function value is carry out of leftmost digit in b.

								*/

								{

								    if (lng == 0) {

								        return iadd;

								    } else if (a == b && b[0] <= RADIXM1 - iadd) {

								        b[0] += iadd;

								        return 0;

								    } else {

								        return add_diff(a, lng, &iadd, 1, b);

								    }

								}

								/***************************************************************************/

								static inline int compare_immediate(digit_tc  a[],

								                                    digit_tc  ivalue,

								                                    DWORDC    lng)

								/*

								        Compare a multiple-precision number to a scalar.

								*/

								{

								    return compare_diff(a, lng, &ivalue, 1);

								}

								/****************************************************************************/

								#if USEASM_MIPS

								extern int compare_same(MP_INPUT, MP_INPUT, DWORDC);

								#else

								static inline int compare_same(digit_tc  a[],

								                               digit_tc  b[],

								                               DWORDC    lng)

								/*

								        Compare two multiple precision numbers a and b each of length lng.

								        Function value is the sign of a - b, namely


								                          +1 if a > b

								                           0 if a = b

								                          -1 if a < b

								*/

								#if USEASM_IX86

								    #pragma warning(disable : 4035)      /* No return value */

								{

								                    /*

								                            We could use REPE CMPSD,

								                            but REPE is slow (4 cycles)

								                            on the Pentium.  Plus we

								                            would need std and cld

								                            to adjust the direction flag.

								                            We anticipate that most loops

								                            will have either 1 or 2 iterations,

								                            and use RISC instructions.

								                    */


								    _asm {

								        mov  eax,lng

								        mov  esi,a

								        mov  edi,b

								     label1:

								        test eax,eax

								        jz   label2              ; If nothing left, exit with eax = 0


								        mov  ecx,[esi+4*eax-4]   ;

								        mov  edx,[edi+4*eax-4]


								        dec  eax                 ; Decrement remaining loop count

								        cmp  ecx,edx             ; Test a[i] - b[i]


								        je   label1


								        sbb  eax,eax             ; eax = 0 if a > b,   -1 if a < b

								        or   eax,1               ; eax = 1 if a > b,   -1 if a < b

								     label2:

								    }

								}

								    #pragma warning(default : 4035)

								#else

								{

								    DWORD i;

								    for (i = lng-1; i != -1; i--) {

								        if (a[i] != b[i]) return (a[i] > b[i] ? +1 : -1);

								    }

								    return 0;

								}  /* compare_same */

								#endif

								#endif

								/****************************************************************************/

								#if USEASM_ALPHA || USEASM_MIPS

								    extern void mp_clear(MP_OUTPUT, DWORDC);

								#elif 0

								static inline void mp_clear(digit_t a[],

								                            DWORDC  lnga)

								/*

								        Zero a multiple-precision number.

								*/

								{

								    DWORD i;

								    for (i = 0; i != lnga; i++) a[i] = 0;

								}

								#else

								#define mp_clear(dest, lng) (void)memset((void *)(dest), 0, (lng)*sizeof(digit_t))

								#endif

								/****************************************************************************/

								#if USEASM_ALPHA || USEASM_MIPS

								    extern void mp_extend(MP_INPUT, DWORDC, MP_OUTPUT, DWORDC);

								        // See alpha.s

								#else

								static inline void mp_extend(digit_tc  a[],

								                             DWORDC    lnga,

								                             digit_t   b[],

								                             DWORDC    lngb)

								/*

								        Copy a to b, while changing its length from

								        lnga to lngb (zero fill).  Require lngb >= lnga.

								*/

								{

								    mp_copy(a, b, lnga);

								    mp_clear(b + lnga, lngb - lnga);

								}

								#endif

								/****************************************************************************/

								static inline digit_t mp_getbit(digit_tc a[],

								                                DWORDC ibit)

								                /* Extract bit of multiple precision number */

								{

								    return digit_getbit(a[ibit/RADIX_BITS],  ibit % RADIX_BITS);

								}


								/******************************************************************************/

								static inline int mp_jacobi_wrt_immediate(digit_tc  numer[],

								                                          DWORD     lnumer,

								                                          digit_tc  denom)

								// Return jacobi(numer, denom), where denom is single precision

								{

								   digit_tc rem = divide_immediate(numer, denom,

								                                   reciprocal_1_NULL,

								                                   digit_NULL, lnumer);

								   return digit_jacobi(rem, denom);

								} /* mp_jacobi_wrt_immediate */

								/****************************************************************************/

								static inline void mp_setbit(digit_t   a[],

								                             DWORDC    ibit,

								                             digit_tc  new_value)

								/*

								        Set a bit to 0 or 1,

								        when the number is viewed as a bit array.

								*/


								{

								    DWORDC j       = ibit / RADIX_BITS;

								    DWORDC ishift  = ibit % RADIX_BITS;


								    digit_tc mask1 = (DIGIT_ONE &  new_value) << ishift;

								    digit_tc mask2 = (DIGIT_ONE & ~new_value) << ishift;


								    a[j] = (a[j] & ~mask2) | mask1;

								} // end mp_setbit

								/****************************************************************************/

								#if MEMORY_BANK_ALLOWANCE == 0

								#define Preferred_Memory_Bank(new_array, old_array) new_array

								#else

								static inline digit_t* Preferred_Memory_Bank(digit_t  *new_array,

								                                             digit_tc *old_array)

								/*

								        To avoid memory bank conflicts, it is desirable

								        that (input) arguments to vmulxx assembly routines start

								        on distinct memory banks, when not doing a squaring.

								        If MEMORY_BANK_ALLOWANCE > 0,

								        then new_array should have MEMORY_BANK_ALLOWANCE

								        extra entries at the end.  We return either

								        new_array or new_array + 1, whichever ensures the

								        addresses are distinct.


								        CAUTION -- This routine does non-portable pointer manipulations.

								*/

								{

								    return new_array + (1 & ~(old_array - new_array));

								}

								#endif

								/****************************************************************************/

								static inline void set_immediate(digit_t  a[],

								                                 digit_tc ivalue,

								                                 DWORDC   lnga)

								{

								   a[0] = ivalue;

								   mp_clear(a + 1, lnga - 1);

								}

								/****************************************************************************/

								static inline DWORD set_immediate_signed(digit_t     a[],

								                                         signed long ivalue)

								{

								    a[0] = labs(ivalue);

								    return (ivalue > 0) - (ivalue < 0);     /* Sign of result  -- -1, 0, +1  */

								}

								/****************************************************************************/


								#if USEASM_MIPS

								extern DWORD significant_digit_count(MP_INPUT, DWORDC);

								#else

								static inline DWORD significant_digit_count(digit_tc  a[],

								                                            DWORDC    lng)

								/*

								        Return the number of significant digits in a.

								        Function value is zero precisely when a == 0.

								*/

								#if USEASM_IX86

								    #pragma warning(disable : 4035)      /* No return value */

								{

								                /*

								                   We could use REPE SCASD,

								                   but the REPE overhead is

								                   four cycles/compare on the Pentium.

								                   We would also need sld and cld.

								                   It is shorter to use RISC instructions.

								                   We anticipate that the leading term a[lng-1]

								                   will usually be nonzero.

								                */


								    _asm {

								        mov  eax,lng

								        mov  edx,a

								     label1:

								        test eax,eax

								        jz   label2             ; If nothing left in number, return 0


								        mov  ecx,[edx+4*eax-4]

								        dec  eax


								        test ecx,ecx            ; Test leading digit

								        jz   label1


								        inc  eax                ; Nonzero element found; return old eax

								     label2:

								    }

								}

								    #pragma warning(default : 4035)

								#else

								{

								    DWORD i = lng;


								    while (i != 0 && a[i-1] == 0) i--;

								    return i;

								}  /* significant_digit_count */

								#endif

								#endif

								#define all_zero(a, lng) (significant_digit_count(a, lng) == 0)

								/****************************************************************************/

								static inline digit_t sub_immediate(digit_tc  a[],

								                                    digit_tc  isub,

								                                    digit_t   b[],

								                                    DWORDC    lng)

								/*

								        Compute b = a - isub, where isub has length 1.

								        Both a and b have length lng.

								        Function value is borrow out of leftmost digit in b.

								*/

								{

								    return (lng == 0 ? isub : sub_diff(a, lng, &isub, 1, b));

								}

								/****************************************************************************/

								#if USEASM_IX86

								#define TRAILING_ZERO_COUNT_DEFINED 1

								static inline DWORD trailing_zero_count(digit_tc d)

								    #pragma warning(disable : 4035)      /* No return value */

								{

								    _asm {

								            mov  eax,d

								            bsf  eax,eax            ; eax = index of rightmost nonzero bit

								                                    ; BSF is slow on Pentium,

								                                    ; but fast on Pentium Pro.

								         }


								}

								    #pragma warning(default : 4035)

								#elif UNIFORM_SIGNIFICANT_BIT_COUNT

								#define TRAILING_ZERO_COUNT_DEFINED 1

								static inline DWORD trailing_zero_count(digit_tc d)

								/*

								        Given a nonzero integer d, this routine computes

								        the largest integer n such that 2^n divides d.


								        If d = 2^n * (2k + 1), then


								                        d =     k *2^(n+1) + 2^n

								                       -d = (-1-k)*2^(n+1) + 2^n


								        The integers k and -1 - k are one's complements of

								        each other, so d & (-d) = 2^n.  Once we determine

								        2^n from d, we can get n via significant_bit_count.

								*/

								{

								    return significant_bit_count(d & (-d)) - 1;

								}  /* trailing_zero_count */

								#else

								#define TRAILING_ZERO_COUNT_DEFINED 0

								extern DWORD trailing_zero_count(digit_tc);   /* See mpmisc.c */

								#endif

								/****************************************************************************/

								static inline void digits_to_dwords(digit_tc  pdigit[],

								                                    DWORD     pdword[],

								                                    DWORDC    lng_dwords)

								{

								#if DWORDS_PER_DIGIT == 1

								    mp_copy(pdigit, (digit_t*)pdword, lng_dwords);

								#elif DWORDS_PER_DIGIT == 2

								    DWORDC lng_half = lng_dwords >> 1;

								    DWORD i;


								    if (IS_ODD(lng_dwords)) {

								        pdword[lng_dwords-1] = (DWORD)pdigit[lng_half];

								    }

								    for (i = 0; i != lng_half; i++) {

								        digit_tc dig = pdigit[i];

								        pdword[2*i    ] = (DWORD)dig;

								        pdword[2*i + 1] = (DWORD)(dig >> DWORD_BITS);

								    }

								#else

								    #error "Unexpected DWORDS_PER_DIGIT"

								#endif

								}  /* digits_to_dwords */

								/****************************************************************************/

								static inline void dwords_to_digits(DWORDC  pdword[],

								                                    digit_t pdigit[],

								                                    DWORDC  lng_dwords)

								{

								#if DWORDS_PER_DIGIT == 1

								    mp_copy((digit_t*)pdword, pdigit, lng_dwords);

								#elif DWORDS_PER_DIGIT == 2

								    DWORDC lng_half = lng_dwords >> 1;

								    DWORD i;


								    if (IS_ODD(lng_dwords)) {

								        pdigit[lng_half] = (digit_t)pdword[lng_dwords - 1];  // Zero fill

								    }

								    for (i = 0; i != lng_half; i++) {

								        pdigit[i] =    ((digit_t)pdword[2*i+1] << DWORD_BITS)

								                     |  (digit_t)pdword[2*i];

								    }

								#else

								    #error "Unexpected DWORDS_PER_DIGIT"

								#endif

								}  /* dwords_to_digits */


								#endif // RADIX_BITS