|
|
// integer.cpp - written and placed in the public domain by Wei Dai
// contains public domain code contributed by Alister Lee and Leonard Janke
#include "pch.h"
#ifndef CRYPTOPP_IMPORTS
#include "integer.h"
#include "modarith.h"
#include "nbtheory.h"
#include "asn.h"
#include "oids.h"
#include "words.h"
#include "algparam.h"
#include "pubkey.h" // for P1363_KDF2
#include "sha.h"
#include "cpu.h"
#include <iostream>
#if _MSC_VER >= 1400
#include <intrin.h>
#endif
#ifdef __DECCXX
#include <c_asm.h>
#endif
#ifdef CRYPTOPP_MSVC6_NO_PP
#pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
#endif
#define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
NAMESPACE_BEGIN(CryptoPP)
bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt) { if (valueType != typeid(Integer)) return false; *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt); return true; }
inline static int Compare(const word *A, const word *B, size_t N) { while (N--) if (A[N] > B[N]) return 1; else if (A[N] < B[N]) return -1;
return 0; }
inline static int Increment(word *A, size_t N, word B=1) { assert(N); word t = A[0]; A[0] = t+B; if (A[0] >= t) return 0; for (unsigned i=1; i<N; i++) if (++A[i]) return 0; return 1; }
inline static int Decrement(word *A, size_t N, word B=1) { assert(N); word t = A[0]; A[0] = t-B; if (A[0] <= t) return 0; for (unsigned i=1; i<N; i++) if (A[i]--) return 0; return 1; }
static void TwosComplement(word *A, size_t N) { Decrement(A, N); for (unsigned i=0; i<N; i++) A[i] = ~A[i]; }
static word AtomicInverseModPower2(word A) { assert(A%2==1);
word R=A%8;
for (unsigned i=3; i<WORD_BITS; i*=2) R = R*(2-R*A);
assert(R*A==1); return R; }
// ********************************************************
#if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
#define Declare2Words(x) word x##0, x##1;
#define AssignWord(a, b) a##0 = b; a##1 = 0;
#define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
#define LowWord(a) a##0
#define HighWord(a) a##1
#ifdef _MSC_VER
#define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
#ifndef __INTEL_COMPILER
#define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
#endif
#elif defined(__DECCXX)
#define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
#elif defined(__x86_64__)
#if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
// Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
#define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
#else
#define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
#define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
#define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
#define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
#define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
#define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
#endif
#endif
#define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
#ifndef Double3Words
#define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
#endif
#ifndef Acc2WordsBy2
#define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
#endif
#define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
#define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
#define GetCarry(u) u##1
#define GetBorrow(u) u##1
#else
#define Declare2Words(x) dword x;
#if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
#define MultiplyWords(p, a, b) p = __emulu(a, b);
#else
#define MultiplyWords(p, a, b) p = (dword)a*b;
#endif
#define AssignWord(a, b) a = b;
#define Add2WordsBy1(a, b, c) a = b + c;
#define Acc2WordsBy2(a, b) a += b;
#define LowWord(a) word(a)
#define HighWord(a) word(a>>WORD_BITS)
#define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
#define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
#define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
#define GetCarry(u) HighWord(u)
#define GetBorrow(u) word(u>>(WORD_BITS*2-1))
#endif
#ifndef MulAcc
#define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
#endif
#ifndef Acc2WordsBy1
#define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
#endif
#ifndef Acc3WordsBy2
#define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
#endif
class DWord { public: DWord() {}
#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
explicit DWord(word low) { m_whole = low; } #else
explicit DWord(word low) { m_halfs.low = low; m_halfs.high = 0; } #endif
DWord(word low, word high) { m_halfs.low = low; m_halfs.high = high; }
static DWord Multiply(word a, word b) { DWord r; #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
r.m_whole = (dword)a * b; #elif defined(MultiplyWordsLoHi)
MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b); #endif
return r; }
static DWord MultiplyAndAdd(word a, word b, word c) { DWord r = Multiply(a, b); return r += c; }
DWord & operator+=(word a) { #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
m_whole = m_whole + a; #else
m_halfs.low += a; m_halfs.high += (m_halfs.low < a); #endif
return *this; }
DWord operator+(word a) { DWord r; #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
r.m_whole = m_whole + a; #else
r.m_halfs.low = m_halfs.low + a; r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a); #endif
return r; }
DWord operator-(DWord a) { DWord r; #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
r.m_whole = m_whole - a.m_whole; #else
r.m_halfs.low = m_halfs.low - a.m_halfs.low; r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low); #endif
return r; }
DWord operator-(word a) { DWord r; #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
r.m_whole = m_whole - a; #else
r.m_halfs.low = m_halfs.low - a; r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low); #endif
return r; }
// returns quotient, which must fit in a word
word operator/(word divisor);
word operator%(word a);
bool operator!() const { #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
return !m_whole; #else
return !m_halfs.high && !m_halfs.low; #endif
}
word GetLowHalf() const {return m_halfs.low;} word GetHighHalf() const {return m_halfs.high;} word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
private: union { #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
dword m_whole; #endif
struct { #ifdef IS_LITTLE_ENDIAN
word low; word high; #else
word high; word low; #endif
} m_halfs; }; };
class Word { public: Word() {}
Word(word value) { m_whole = value; }
Word(hword low, hword high) { m_whole = low | (word(high) << (WORD_BITS/2)); }
static Word Multiply(hword a, hword b) { Word r; r.m_whole = (word)a * b; return r; }
Word operator-(Word a) { Word r; r.m_whole = m_whole - a.m_whole; return r; }
Word operator-(hword a) { Word r; r.m_whole = m_whole - a; return r; }
// returns quotient, which must fit in a word
hword operator/(hword divisor) { return hword(m_whole / divisor); }
bool operator!() const { return !m_whole; }
word GetWhole() const {return m_whole;} hword GetLowHalf() const {return hword(m_whole);} hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));} hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));} private: word m_whole; };
// do a 3 word by 2 word divide, returns quotient and leaves remainder in A
template <class S, class D> S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL) { // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
// estimate the quotient: do a 2 S by 1 S divide
S Q; if (S(B1+1) == 0) Q = A[2]; else if (B1 > 0) Q = D(A[1], A[2]) / S(B1+1); else Q = D(A[0], A[1]) / B0;
// now subtract Q*B from A
D p = D::Multiply(B0, Q); D u = (D) A[0] - p.GetLowHalf(); A[0] = u.GetLowHalf(); u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q); A[1] = u.GetLowHalf(); A[2] += u.GetHighHalf();
// Q <= actual quotient, so fix it
while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) { u = (D) A[0] - B0; A[0] = u.GetLowHalf(); u = (D) A[1] - B1 - u.GetHighHalfAsBorrow(); A[1] = u.GetLowHalf(); A[2] += u.GetHighHalf(); Q++; assert(Q); // shouldn't overflow
}
return Q; }
// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
template <class S, class D> inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B) { if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
return D(Ah.GetLowHalf(), Ah.GetHighHalf()); else { S Q[2]; T[0] = Al.GetLowHalf(); T[1] = Al.GetHighHalf(); T[2] = Ah.GetLowHalf(); T[3] = Ah.GetHighHalf(); Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf()); Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf()); return D(Q[0], Q[1]); } }
// returns quotient, which must fit in a word
inline word DWord::operator/(word a) { #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
return word(m_whole / a); #else
hword r[4]; return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole(); #endif
}
inline word DWord::operator%(word a) { #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
return word(m_whole % a); #else
if (a < (word(1) << (WORD_BITS/2))) { hword h = hword(a); word r = m_halfs.high % h; r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h; return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h); } else { hword r[4]; DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a); return Word(r[0], r[1]).GetWhole(); } #endif
}
// ********************************************************
// use some tricks to share assembly code between MSVC and GCC
#if defined(__GNUC__)
#define AddPrologue \
int result; \ __asm__ __volatile__ \ ( \ ".intel_syntax noprefix;" #define AddEpilogue \
".att_syntax prefix;" \ : "=a" (result)\ : "d" (C), "a" (A), "D" (B), "c" (N) \ : "%esi", "memory", "cc" \ );\ return result; #define MulPrologue \
__asm__ __volatile__ \ ( \ ".intel_syntax noprefix;" \ AS1( push ebx) \ AS2( mov ebx, edx) #define MulEpilogue \
AS1( pop ebx) \ ".att_syntax prefix;" \ : \ : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \ : "%esi", "memory", "cc" \ ); #define SquPrologue MulPrologue
#define SquEpilogue \
AS1( pop ebx) \ ".att_syntax prefix;" \ : \ : "d" (s_maskLow16), "c" (C), "a" (A) \ : "%esi", "%edi", "memory", "cc" \ ); #define TopPrologue MulPrologue
#define TopEpilogue \
AS1( pop ebx) \ ".att_syntax prefix;" \ : \ : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \ : "memory", "cc" \ ); #else
#define AddPrologue \
__asm push edi \ __asm push esi \ __asm mov eax, [esp+12] \ __asm mov edi, [esp+16] #define AddEpilogue \
__asm pop esi \ __asm pop edi \ __asm ret 8 #if _MSC_VER < 1300
#define SaveEBX __asm push ebx
#define RestoreEBX __asm pop ebx
#else
#define SaveEBX
#define RestoreEBX
#endif
#define SquPrologue \
AS2( mov eax, A) \ AS2( mov ecx, C) \ SaveEBX \ AS2( lea ebx, s_maskLow16) #define MulPrologue \
AS2( mov eax, A) \ AS2( mov edi, B) \ AS2( mov ecx, C) \ SaveEBX \ AS2( lea ebx, s_maskLow16) #define TopPrologue \
AS2( mov eax, A) \ AS2( mov edi, B) \ AS2( mov ecx, C) \ AS2( mov esi, L) \ SaveEBX \ AS2( lea ebx, s_maskLow16) #define SquEpilogue RestoreEBX
#define MulEpilogue RestoreEBX
#define TopEpilogue RestoreEBX
#endif
#ifdef CRYPTOPP_X64_MASM_AVAILABLE
extern "C" { int Baseline_Add(size_t N, word *C, const word *A, const word *B); int Baseline_Sub(size_t N, word *C, const word *A, const word *B); } #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
int Baseline_Add(size_t N, word *C, const word *A, const word *B) { word result; __asm__ __volatile__ ( ".intel_syntax;" AS1( neg %1) ASJ( jz, 1, f) AS2( mov %0,[%3+8*%1]) AS2( add %0,[%4+8*%1]) AS2( mov [%2+8*%1],%0) ASL(0) AS2( mov %0,[%3+8*%1+8]) AS2( adc %0,[%4+8*%1+8]) AS2( mov [%2+8*%1+8],%0) AS2( lea %1,[%1+2]) ASJ( jrcxz, 1, f) AS2( mov %0,[%3+8*%1]) AS2( adc %0,[%4+8*%1]) AS2( mov [%2+8*%1],%0) ASJ( jmp, 0, b) ASL(1) AS2( mov %0, 0) AS2( adc %0, %0) ".att_syntax;" : "=&r" (result), "+c" (N) : "r" (C+N), "r" (A+N), "r" (B+N) : "memory", "cc" ); return (int)result; }
int Baseline_Sub(size_t N, word *C, const word *A, const word *B) { word result; __asm__ __volatile__ ( ".intel_syntax;" AS1( neg %1) ASJ( jz, 1, f) AS2( mov %0,[%3+8*%1]) AS2( sub %0,[%4+8*%1]) AS2( mov [%2+8*%1],%0) ASL(0) AS2( mov %0,[%3+8*%1+8]) AS2( sbb %0,[%4+8*%1+8]) AS2( mov [%2+8*%1+8],%0) AS2( lea %1,[%1+2]) ASJ( jrcxz, 1, f) AS2( mov %0,[%3+8*%1]) AS2( sbb %0,[%4+8*%1]) AS2( mov [%2+8*%1],%0) ASJ( jmp, 0, b) ASL(1) AS2( mov %0, 0) AS2( adc %0, %0) ".att_syntax;" : "=&r" (result), "+c" (N) : "r" (C+N), "r" (A+N), "r" (B+N) : "memory", "cc" ); return (int)result; } #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) { AddPrologue
// now: eax = A, edi = B, edx = C, ecx = N
AS2( lea eax, [eax+4*ecx]) AS2( lea edi, [edi+4*ecx]) AS2( lea edx, [edx+4*ecx])
AS1( neg ecx) // ecx is negative index
AS2( test ecx, 2) // this clears carry flag
ASJ( jz, 0, f) AS2( sub ecx, 2) ASJ( jmp, 1, f)
ASL(0) ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
AS2( mov esi,[eax+4*ecx]) AS2( adc esi,[edi+4*ecx]) AS2( mov [edx+4*ecx],esi) AS2( mov esi,[eax+4*ecx+4]) AS2( adc esi,[edi+4*ecx+4]) AS2( mov [edx+4*ecx+4],esi) ASL(1) AS2( mov esi,[eax+4*ecx+8]) AS2( adc esi,[edi+4*ecx+8]) AS2( mov [edx+4*ecx+8],esi) AS2( mov esi,[eax+4*ecx+12]) AS2( adc esi,[edi+4*ecx+12]) AS2( mov [edx+4*ecx+12],esi)
AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
ASJ( jmp, 0, b)
ASL(2) AS2( mov eax, 0) AS1( setc al) // store carry into eax (return result register)
AddEpilogue }
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) { AddPrologue
// now: eax = A, edi = B, edx = C, ecx = N
AS2( lea eax, [eax+4*ecx]) AS2( lea edi, [edi+4*ecx]) AS2( lea edx, [edx+4*ecx])
AS1( neg ecx) // ecx is negative index
AS2( test ecx, 2) // this clears carry flag
ASJ( jz, 0, f) AS2( sub ecx, 2) ASJ( jmp, 1, f)
ASL(0) ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
AS2( mov esi,[eax+4*ecx]) AS2( sbb esi,[edi+4*ecx]) AS2( mov [edx+4*ecx],esi) AS2( mov esi,[eax+4*ecx+4]) AS2( sbb esi,[edi+4*ecx+4]) AS2( mov [edx+4*ecx+4],esi) ASL(1) AS2( mov esi,[eax+4*ecx+8]) AS2( sbb esi,[edi+4*ecx+8]) AS2( mov [edx+4*ecx+8],esi) AS2( mov esi,[eax+4*ecx+12]) AS2( sbb esi,[edi+4*ecx+12]) AS2( mov [edx+4*ecx+12],esi)
AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
ASJ( jmp, 0, b)
ASL(2) AS2( mov eax, 0) AS1( setc al) // store carry into eax (return result register)
AddEpilogue }
#if CRYPTOPP_INTEGER_SSE2
CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B) { AddPrologue
// now: eax = A, edi = B, edx = C, ecx = N
AS2( lea eax, [eax+4*ecx]) AS2( lea edi, [edi+4*ecx]) AS2( lea edx, [edx+4*ecx])
AS1( neg ecx) // ecx is negative index
AS2( pxor mm2, mm2) ASJ( jz, 2, f) AS2( test ecx, 2) // this clears carry flag
ASJ( jz, 0, f) AS2( sub ecx, 2) ASJ( jmp, 1, f)
ASL(0) AS2( movd mm0, DWORD PTR [eax+4*ecx]) AS2( movd mm1, DWORD PTR [edi+4*ecx]) AS2( paddq mm0, mm1) AS2( paddq mm2, mm0) AS2( movd DWORD PTR [edx+4*ecx], mm2) AS2( psrlq mm2, 32)
AS2( movd mm0, DWORD PTR [eax+4*ecx+4]) AS2( movd mm1, DWORD PTR [edi+4*ecx+4]) AS2( paddq mm0, mm1) AS2( paddq mm2, mm0) AS2( movd DWORD PTR [edx+4*ecx+4], mm2) AS2( psrlq mm2, 32)
ASL(1) AS2( movd mm0, DWORD PTR [eax+4*ecx+8]) AS2( movd mm1, DWORD PTR [edi+4*ecx+8]) AS2( paddq mm0, mm1) AS2( paddq mm2, mm0) AS2( movd DWORD PTR [edx+4*ecx+8], mm2) AS2( psrlq mm2, 32)
AS2( movd mm0, DWORD PTR [eax+4*ecx+12]) AS2( movd mm1, DWORD PTR [edi+4*ecx+12]) AS2( paddq mm0, mm1) AS2( paddq mm2, mm0) AS2( movd DWORD PTR [edx+4*ecx+12], mm2) AS2( psrlq mm2, 32)
AS2( add ecx, 4) ASJ( jnz, 0, b)
ASL(2) AS2( movd eax, mm2) AS1( emms)
AddEpilogue } CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B) { AddPrologue
// now: eax = A, edi = B, edx = C, ecx = N
AS2( lea eax, [eax+4*ecx]) AS2( lea edi, [edi+4*ecx]) AS2( lea edx, [edx+4*ecx])
AS1( neg ecx) // ecx is negative index
AS2( pxor mm2, mm2) ASJ( jz, 2, f) AS2( test ecx, 2) // this clears carry flag
ASJ( jz, 0, f) AS2( sub ecx, 2) ASJ( jmp, 1, f)
ASL(0) AS2( movd mm0, DWORD PTR [eax+4*ecx]) AS2( movd mm1, DWORD PTR [edi+4*ecx]) AS2( psubq mm0, mm1) AS2( psubq mm0, mm2) AS2( movd DWORD PTR [edx+4*ecx], mm0) AS2( psrlq mm0, 63)
AS2( movd mm2, DWORD PTR [eax+4*ecx+4]) AS2( movd mm1, DWORD PTR [edi+4*ecx+4]) AS2( psubq mm2, mm1) AS2( psubq mm2, mm0) AS2( movd DWORD PTR [edx+4*ecx+4], mm2) AS2( psrlq mm2, 63)
ASL(1) AS2( movd mm0, DWORD PTR [eax+4*ecx+8]) AS2( movd mm1, DWORD PTR [edi+4*ecx+8]) AS2( psubq mm0, mm1) AS2( psubq mm0, mm2) AS2( movd DWORD PTR [edx+4*ecx+8], mm0) AS2( psrlq mm0, 63)
AS2( movd mm2, DWORD PTR [eax+4*ecx+12]) AS2( movd mm1, DWORD PTR [edi+4*ecx+12]) AS2( psubq mm2, mm1) AS2( psubq mm2, mm0) AS2( movd DWORD PTR [edx+4*ecx+12], mm2) AS2( psrlq mm2, 63)
AS2( add ecx, 4) ASJ( jnz, 0, b)
ASL(2) AS2( movd eax, mm2) AS1( emms)
AddEpilogue } #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
#else
int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B) { assert (N%2 == 0);
Declare2Words(u); AssignWord(u, 0); for (size_t i=0; i<N; i+=2) { AddWithCarry(u, A[i], B[i]); C[i] = LowWord(u); AddWithCarry(u, A[i+1], B[i+1]); C[i+1] = LowWord(u); } return int(GetCarry(u)); }
int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B) { assert (N%2 == 0);
Declare2Words(u); AssignWord(u, 0); for (size_t i=0; i<N; i+=2) { SubtractWithBorrow(u, A[i], B[i]); C[i] = LowWord(u); SubtractWithBorrow(u, A[i+1], B[i+1]); C[i+1] = LowWord(u); } return int(GetBorrow(u)); } #endif
static word LinearMultiply(word *C, const word *A, word B, size_t N) { word carry=0; for(unsigned i=0; i<N; i++) { Declare2Words(p); MultiplyWords(p, A[i], B); Acc2WordsBy1(p, carry); C[i] = LowWord(p); carry = HighWord(p); } return carry; }
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
#define Mul_2 \
Mul_Begin(2) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ Mul_End(1, 1)
#define Mul_4 \
Mul_Begin(4) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \ Mul_End(5, 3)
#define Mul_8 \
Mul_Begin(8) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \ Mul_End(13, 7)
#define Mul_16 \
Mul_Begin(16) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \ Mul_End(29, 15)
#define Squ_2 \
Squ_Begin(2) \ Squ_End(2)
#define Squ_4 \
Squ_Begin(4) \ Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \ Squ_SaveAcc(4, 2, 3) Squ_NonDiag \ Squ_End(4)
#define Squ_8 \
Squ_Begin(8) \ Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \ Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \ Squ_SaveAcc(12, 6, 7) Squ_NonDiag \ Squ_End(8)
#define Squ_16 \
Squ_Begin(16) \ Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \ Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \ Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \ Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \ Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \ Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \ Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \ Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \ Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \ Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \ Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \ Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \ Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \ Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \ Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \ Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \ Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \ Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \ Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \ Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \ Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \ Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \ Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \ Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \ Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \ Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \ Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \ Squ_SaveAcc(28, 14, 15) Squ_NonDiag \ Squ_End(16)
#define Bot_2 \
Mul_Begin(2) \ Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \ Bot_End(2)
#define Bot_4 \
Mul_Begin(4) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \ Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \ Bot_End(4)
#define Bot_8 \
Mul_Begin(8) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \ Bot_End(8)
#define Bot_16 \
Mul_Begin(16) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \ Bot_End(16) #endif
#if 0
#define Mul_Begin(n) \
Declare2Words(p) \ Declare2Words(c) \ Declare2Words(d) \ MultiplyWords(p, A[0], B[0]) \ AssignWord(c, LowWord(p)) \ AssignWord(d, HighWord(p))
#define Mul_Acc(i, j) \
MultiplyWords(p, A[i], B[j]) \ Acc2WordsBy1(c, LowWord(p)) \ Acc2WordsBy1(d, HighWord(p))
#define Mul_SaveAcc(k, i, j) \
R[k] = LowWord(c); \ Add2WordsBy1(c, d, HighWord(c)) \ MultiplyWords(p, A[i], B[j]) \ AssignWord(d, HighWord(p)) \ Acc2WordsBy1(c, LowWord(p))
#define Mul_End(n) \
R[2*n-3] = LowWord(c); \ Acc2WordsBy1(d, HighWord(c)) \ MultiplyWords(p, A[n-1], B[n-1])\ Acc2WordsBy2(d, p) \ R[2*n-2] = LowWord(d); \ R[2*n-1] = HighWord(d);
#define Bot_SaveAcc(k, i, j) \
R[k] = LowWord(c); \ word e = LowWord(d) + HighWord(c); \ e += A[i] * B[j];
#define Bot_Acc(i, j) \
e += A[i] * B[j];
#define Bot_End(n) \
R[n-1] = e; #else
#define Mul_Begin(n) \
Declare2Words(p) \ word c; \ Declare2Words(d) \ MultiplyWords(p, A[0], B[0]) \ c = LowWord(p); \ AssignWord(d, HighWord(p))
#define Mul_Acc(i, j) \
MulAcc(c, d, A[i], B[j])
#define Mul_SaveAcc(k, i, j) \
R[k] = c; \ c = LowWord(d); \ AssignWord(d, HighWord(d)) \ MulAcc(c, d, A[i], B[j])
#define Mul_End(k, i) \
R[k] = c; \ MultiplyWords(p, A[i], B[i]) \ Acc2WordsBy2(p, d) \ R[k+1] = LowWord(p); \ R[k+2] = HighWord(p);
#define Bot_SaveAcc(k, i, j) \
R[k] = c; \ c = LowWord(d); \ c += A[i] * B[j];
#define Bot_Acc(i, j) \
c += A[i] * B[j];
#define Bot_End(n) \
R[n-1] = c; #endif
#define Squ_Begin(n) \
Declare2Words(p) \ word c; \ Declare2Words(d) \ Declare2Words(e) \ MultiplyWords(p, A[0], A[0]) \ R[0] = LowWord(p); \ AssignWord(e, HighWord(p)) \ MultiplyWords(p, A[0], A[1]) \ c = LowWord(p); \ AssignWord(d, HighWord(p)) \ Squ_NonDiag \
#define Squ_NonDiag \
Double3Words(c, d)
#define Squ_SaveAcc(k, i, j) \
Acc3WordsBy2(c, d, e) \ R[k] = c; \ MultiplyWords(p, A[i], A[j]) \ c = LowWord(p); \ AssignWord(d, HighWord(p)) \
#define Squ_Acc(i, j) \
MulAcc(c, d, A[i], A[j])
#define Squ_Diag(i) \
Squ_NonDiag \ MulAcc(c, d, A[i], A[i])
#define Squ_End(n) \
Acc3WordsBy2(c, d, e) \ R[2*n-3] = c; \ MultiplyWords(p, A[n-1], A[n-1])\ Acc2WordsBy2(p, e) \ R[2*n-2] = LowWord(p); \ R[2*n-1] = HighWord(p);
void Baseline_Multiply2(word *R, const word *A, const word *B) { Mul_2 }
void Baseline_Multiply4(word *R, const word *A, const word *B) { Mul_4 }
void Baseline_Multiply8(word *R, const word *A, const word *B) { Mul_8 }
void Baseline_Square2(word *R, const word *A) { Squ_2 }
void Baseline_Square4(word *R, const word *A) { Squ_4 }
void Baseline_Square8(word *R, const word *A) { Squ_8 }
void Baseline_MultiplyBottom2(word *R, const word *A, const word *B) { Bot_2 }
void Baseline_MultiplyBottom4(word *R, const word *A, const word *B) { Bot_4 }
void Baseline_MultiplyBottom8(word *R, const word *A, const word *B) { Bot_8 }
#define Top_Begin(n) \
Declare2Words(p) \ word c; \ Declare2Words(d) \ MultiplyWords(p, A[0], B[n-2]);\ AssignWord(d, HighWord(p));
#define Top_Acc(i, j) \
MultiplyWords(p, A[i], B[j]);\ Acc2WordsBy1(d, HighWord(p));
#define Top_SaveAcc0(i, j) \
c = LowWord(d); \ AssignWord(d, HighWord(d)) \ MulAcc(c, d, A[i], B[j])
#define Top_SaveAcc1(i, j) \
c = L<c; \ Acc2WordsBy1(d, c); \ c = LowWord(d); \ AssignWord(d, HighWord(d)) \ MulAcc(c, d, A[i], B[j])
void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L) { word T[4]; Baseline_Multiply2(T, A, B); R[0] = T[2]; R[1] = T[3]; }
void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L) { Top_Begin(4) Top_Acc(1, 1) Top_Acc(2, 0) \ Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \ Mul_End(1, 3) }
void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L) { Top_Begin(8) Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \ Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \ Mul_End(5, 7) }
#if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
void Baseline_Multiply16(word *R, const word *A, const word *B) { Mul_16 }
void Baseline_Square16(word *R, const word *A) { Squ_16 }
void Baseline_MultiplyBottom16(word *R, const word *A, const word *B) { Bot_16 }
void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L) { Top_Begin(16) Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \ Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \ Mul_End(13, 15) } #endif
// ********************************************************
#if CRYPTOPP_INTEGER_SSE2
CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
#undef Mul_Begin
#undef Mul_Acc
#undef Top_Begin
#undef Top_Acc
#undef Squ_Acc
#undef Squ_NonDiag
#undef Squ_Diag
#undef Squ_SaveAcc
#undef Squ_Begin
#undef Mul_SaveAcc
#undef Bot_Acc
#undef Bot_SaveAcc
#undef Bot_End
#undef Squ_End
#undef Mul_End
#define SSE2_FinalSave(k) \
AS2( psllq xmm5, 16) \ AS2( paddq xmm4, xmm5) \ AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
#define SSE2_SaveShift(k) \
AS2( movq xmm0, xmm6) \ AS2( punpckhqdq xmm6, xmm0) \ AS2( movq xmm1, xmm7) \ AS2( punpckhqdq xmm7, xmm1) \ AS2( paddd xmm6, xmm0) \ AS2( pslldq xmm6, 4) \ AS2( paddd xmm7, xmm1) \ AS2( paddd xmm4, xmm6) \ AS2( pslldq xmm7, 4) \ AS2( movq xmm6, xmm4) \ AS2( paddd xmm5, xmm7) \ AS2( movq xmm7, xmm5) \ AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ AS2( psrlq xmm6, 16) \ AS2( paddq xmm6, xmm7) \ AS2( punpckhqdq xmm4, xmm0) \ AS2( punpckhqdq xmm5, xmm0) \ AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \ AS2( psrlq xmm6, 3*16) \ AS2( paddd xmm4, xmm6) \
#define Squ_SSE2_SaveShift(k) \
AS2( movq xmm0, xmm6) \ AS2( punpckhqdq xmm6, xmm0) \ AS2( movq xmm1, xmm7) \ AS2( punpckhqdq xmm7, xmm1) \ AS2( paddd xmm6, xmm0) \ AS2( pslldq xmm6, 4) \ AS2( paddd xmm7, xmm1) \ AS2( paddd xmm4, xmm6) \ AS2( pslldq xmm7, 4) \ AS2( movhlps xmm6, xmm4) \ AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ AS2( paddd xmm5, xmm7) \ AS2( movhps QWORD PTR [esp+12], xmm5)\ AS2( psrlq xmm4, 16) \ AS2( paddq xmm4, xmm5) \ AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \ AS2( psrlq xmm4, 3*16) \ AS2( paddd xmm4, xmm6) \ AS2( movq QWORD PTR [esp+4], xmm4)\
#define SSE2_FirstMultiply(i) \
AS2( movdqa xmm7, [esi+(i)*16])\ AS2( movdqa xmm5, [edi-(i)*16])\ AS2( pmuludq xmm5, xmm7) \ AS2( movdqa xmm4, [ebx])\ AS2( movdqa xmm6, xmm4) \ AS2( pand xmm4, xmm5) \ AS2( psrld xmm5, 16) \ AS2( pmuludq xmm7, [edx-(i)*16])\ AS2( pand xmm6, xmm7) \ AS2( psrld xmm7, 16)
#define Squ_Begin(n) \
SquPrologue \ AS2( mov esi, esp)\ AS2( and esp, 0xfffffff0)\ AS2( lea edi, [esp-32*n])\ AS2( sub esp, 32*n+16)\ AS1( push esi)\ AS2( mov esi, edi) \ AS2( xor edx, edx) \ ASL(1) \ ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ AS2( movdqa [edi+2*edx], xmm0) \ AS2( psrlq xmm0, 32) \ AS2( movdqa [edi+2*edx+16], xmm0) \ AS2( movdqa [edi+16*n+2*edx], xmm1) \ AS2( psrlq xmm1, 32) \ AS2( movdqa [edi+16*n+2*edx+16], xmm1) \ AS2( add edx, 16) \ AS2( cmp edx, 8*(n)) \ ASJ( jne, 1, b) \ AS2( lea edx, [edi+16*n])\ SSE2_FirstMultiply(0) \
#define Squ_Acc(i) \
ASL(LSqu##i) \ AS2( movdqa xmm1, [esi+(i)*16]) \ AS2( movdqa xmm0, [edi-(i)*16]) \ AS2( movdqa xmm2, [ebx]) \ AS2( pmuludq xmm0, xmm1) \ AS2( pmuludq xmm1, [edx-(i)*16]) \ AS2( movdqa xmm3, xmm2) \ AS2( pand xmm2, xmm0) \ AS2( psrld xmm0, 16) \ AS2( paddd xmm4, xmm2) \ AS2( paddd xmm5, xmm0) \ AS2( pand xmm3, xmm1) \ AS2( psrld xmm1, 16) \ AS2( paddd xmm6, xmm3) \ AS2( paddd xmm7, xmm1) \
#define Squ_Acc1(i)
#define Squ_Acc2(i) ASC(call, LSqu##i)
#define Squ_Acc3(i) Squ_Acc2(i)
#define Squ_Acc4(i) Squ_Acc2(i)
#define Squ_Acc5(i) Squ_Acc2(i)
#define Squ_Acc6(i) Squ_Acc2(i)
#define Squ_Acc7(i) Squ_Acc2(i)
#define Squ_Acc8(i) Squ_Acc2(i)
#define SSE2_End(E, n) \
SSE2_SaveShift(2*(n)-3) \ AS2( movdqa xmm7, [esi+16]) \ AS2( movdqa xmm0, [edi]) \ AS2( pmuludq xmm0, xmm7) \ AS2( movdqa xmm2, [ebx]) \ AS2( pmuludq xmm7, [edx]) \ AS2( movdqa xmm6, xmm2) \ AS2( pand xmm2, xmm0) \ AS2( psrld xmm0, 16) \ AS2( paddd xmm4, xmm2) \ AS2( paddd xmm5, xmm0) \ AS2( pand xmm6, xmm7) \ AS2( psrld xmm7, 16) \ SSE2_SaveShift(2*(n)-2) \ SSE2_FinalSave(2*(n)-1) \ AS1( pop esp)\ E
#define Squ_End(n) SSE2_End(SquEpilogue, n)
#define Mul_End(n) SSE2_End(MulEpilogue, n)
#define Top_End(n) SSE2_End(TopEpilogue, n)
#define Squ_Column1(k, i) \
Squ_SSE2_SaveShift(k) \ AS2( add esi, 16) \ SSE2_FirstMultiply(1)\ Squ_Acc##i(i) \ AS2( paddd xmm4, xmm4) \ AS2( paddd xmm5, xmm5) \ AS2( movdqa xmm3, [esi]) \ AS2( movq xmm1, QWORD PTR [esi+8]) \ AS2( pmuludq xmm1, xmm3) \ AS2( pmuludq xmm3, xmm3) \ AS2( movdqa xmm0, [ebx])\ AS2( movdqa xmm2, xmm0) \ AS2( pand xmm0, xmm1) \ AS2( psrld xmm1, 16) \ AS2( paddd xmm6, xmm0) \ AS2( paddd xmm7, xmm1) \ AS2( pand xmm2, xmm3) \ AS2( psrld xmm3, 16) \ AS2( paddd xmm6, xmm6) \ AS2( paddd xmm7, xmm7) \ AS2( paddd xmm4, xmm2) \ AS2( paddd xmm5, xmm3) \ AS2( movq xmm0, QWORD PTR [esp+4])\ AS2( movq xmm1, QWORD PTR [esp+12])\ AS2( paddd xmm4, xmm0)\ AS2( paddd xmm5, xmm1)\
#define Squ_Column0(k, i) \
Squ_SSE2_SaveShift(k) \ AS2( add edi, 16) \ AS2( add edx, 16) \ SSE2_FirstMultiply(1)\ Squ_Acc##i(i) \ AS2( paddd xmm6, xmm6) \ AS2( paddd xmm7, xmm7) \ AS2( paddd xmm4, xmm4) \ AS2( paddd xmm5, xmm5) \ AS2( movq xmm0, QWORD PTR [esp+4])\ AS2( movq xmm1, QWORD PTR [esp+12])\ AS2( paddd xmm4, xmm0)\ AS2( paddd xmm5, xmm1)\
#define SSE2_MulAdd45 \
AS2( movdqa xmm7, [esi]) \ AS2( movdqa xmm0, [edi]) \ AS2( pmuludq xmm0, xmm7) \ AS2( movdqa xmm2, [ebx]) \ AS2( pmuludq xmm7, [edx]) \ AS2( movdqa xmm6, xmm2) \ AS2( pand xmm2, xmm0) \ AS2( psrld xmm0, 16) \ AS2( paddd xmm4, xmm2) \ AS2( paddd xmm5, xmm0) \ AS2( pand xmm6, xmm7) \ AS2( psrld xmm7, 16)
#define Mul_Begin(n) \
MulPrologue \ AS2( mov esi, esp)\ AS2( and esp, 0xfffffff0)\ AS2( sub esp, 48*n+16)\ AS1( push esi)\ AS2( xor edx, edx) \ ASL(1) \ ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \ AS2( movdqa [esp+20+2*edx], xmm0) \ AS2( psrlq xmm0, 32) \ AS2( movdqa [esp+20+2*edx+16], xmm0) \ AS2( movdqa [esp+20+16*n+2*edx], xmm1) \ AS2( psrlq xmm1, 32) \ AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \ AS2( movdqa [esp+20+32*n+2*edx], xmm2) \ AS2( psrlq xmm2, 32) \ AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \ AS2( add edx, 16) \ AS2( cmp edx, 8*(n)) \ ASJ( jne, 1, b) \ AS2( lea edi, [esp+20])\ AS2( lea edx, [esp+20+16*n])\ AS2( lea esi, [esp+20+32*n])\ SSE2_FirstMultiply(0) \
#define Mul_Acc(i) \
ASL(LMul##i) \ AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \ AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \ AS2( movdqa xmm2, [ebx]) \ AS2( pmuludq xmm0, xmm1) \ AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ AS2( movdqa xmm3, xmm2) \ AS2( pand xmm2, xmm0) \ AS2( psrld xmm0, 16) \ AS2( paddd xmm4, xmm2) \ AS2( paddd xmm5, xmm0) \ AS2( pand xmm3, xmm1) \ AS2( psrld xmm1, 16) \ AS2( paddd xmm6, xmm3) \ AS2( paddd xmm7, xmm1) \
#define Mul_Acc1(i)
#define Mul_Acc2(i) ASC(call, LMul##i)
#define Mul_Acc3(i) Mul_Acc2(i)
#define Mul_Acc4(i) Mul_Acc2(i)
#define Mul_Acc5(i) Mul_Acc2(i)
#define Mul_Acc6(i) Mul_Acc2(i)
#define Mul_Acc7(i) Mul_Acc2(i)
#define Mul_Acc8(i) Mul_Acc2(i)
#define Mul_Acc9(i) Mul_Acc2(i)
#define Mul_Acc10(i) Mul_Acc2(i)
#define Mul_Acc11(i) Mul_Acc2(i)
#define Mul_Acc12(i) Mul_Acc2(i)
#define Mul_Acc13(i) Mul_Acc2(i)
#define Mul_Acc14(i) Mul_Acc2(i)
#define Mul_Acc15(i) Mul_Acc2(i)
#define Mul_Acc16(i) Mul_Acc2(i)
#define Mul_Column1(k, i) \
SSE2_SaveShift(k) \ AS2( add esi, 16) \ SSE2_MulAdd45\ Mul_Acc##i(i) \
#define Mul_Column0(k, i) \
SSE2_SaveShift(k) \ AS2( add edi, 16) \ AS2( add edx, 16) \ SSE2_MulAdd45\ Mul_Acc##i(i) \
#define Bot_Acc(i) \
AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \ AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \ AS2( pmuludq xmm0, xmm1) \ AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ AS2( paddq xmm4, xmm0) \ AS2( paddd xmm6, xmm1)
#define Bot_SaveAcc(k) \
SSE2_SaveShift(k) \ AS2( add edi, 16) \ AS2( add edx, 16) \ AS2( movdqa xmm6, [esi]) \ AS2( movdqa xmm0, [edi]) \ AS2( pmuludq xmm0, xmm6) \ AS2( paddq xmm4, xmm0) \ AS2( psllq xmm5, 16) \ AS2( paddq xmm4, xmm5) \ AS2( pmuludq xmm6, [edx])
#define Bot_End(n) \
AS2( movhlps xmm7, xmm6) \ AS2( paddd xmm6, xmm7) \ AS2( psllq xmm6, 32) \ AS2( paddd xmm4, xmm6) \ AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \ AS1( pop esp)\ MulEpilogue
#define Top_Begin(n) \
TopPrologue \ AS2( mov edx, esp)\ AS2( and esp, 0xfffffff0)\ AS2( sub esp, 48*n+16)\ AS1( push edx)\ AS2( xor edx, edx) \ ASL(1) \ ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \ AS2( movdqa [esp+20+2*edx], xmm0) \ AS2( psrlq xmm0, 32) \ AS2( movdqa [esp+20+2*edx+16], xmm0) \ AS2( movdqa [esp+20+16*n+2*edx], xmm1) \ AS2( psrlq xmm1, 32) \ AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \ AS2( movdqa [esp+20+32*n+2*edx], xmm2) \ AS2( psrlq xmm2, 32) \ AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \ AS2( add edx, 16) \ AS2( cmp edx, 8*(n)) \ ASJ( jne, 1, b) \ AS2( mov eax, esi) \ AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\ AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\ AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\ AS2( pxor xmm4, xmm4)\ AS2( pxor xmm5, xmm5)
#define Top_Acc(i) \
AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \ AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \ AS2( psrlq xmm0, 48) \ AS2( paddd xmm5, xmm0)\
#define Top_Column0(i) \
AS2( psllq xmm5, 32) \ AS2( add edi, 16) \ AS2( add edx, 16) \ SSE2_MulAdd45\ Mul_Acc##i(i) \
#define Top_Column1(i) \
SSE2_SaveShift(0) \ AS2( add esi, 16) \ SSE2_MulAdd45\ Mul_Acc##i(i) \ AS2( shr eax, 16) \ AS2( movd xmm0, eax)\ AS2( movd xmm1, [ecx+4])\ AS2( psrld xmm1, 16)\ AS2( pcmpgtd xmm1, xmm0)\ AS2( psrld xmm1, 31)\ AS2( paddd xmm4, xmm1)\
void SSE2_Square4(word *C, const word *A) { Squ_Begin(2) Squ_Column0(0, 1) Squ_End(2) }
void SSE2_Square8(word *C, const word *A) { Squ_Begin(4) #ifndef __GNUC__
ASJ( jmp, 0, f) Squ_Acc(2) AS1( ret) ASL(0) #endif
Squ_Column0(0, 1) Squ_Column1(1, 1) Squ_Column0(2, 2) Squ_Column1(3, 1) Squ_Column0(4, 1) Squ_End(4) }
void SSE2_Square16(word *C, const word *A) { Squ_Begin(8) #ifndef __GNUC__
ASJ( jmp, 0, f) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2) AS1( ret) ASL(0) #endif
Squ_Column0(0, 1) Squ_Column1(1, 1) Squ_Column0(2, 2) Squ_Column1(3, 2) Squ_Column0(4, 3) Squ_Column1(5, 3) Squ_Column0(6, 4) Squ_Column1(7, 3) Squ_Column0(8, 3) Squ_Column1(9, 2) Squ_Column0(10, 2) Squ_Column1(11, 1) Squ_Column0(12, 1) Squ_End(8) }
void SSE2_Square32(word *C, const word *A) { Squ_Begin(16) ASJ( jmp, 0, f) Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2) AS1( ret) ASL(0) Squ_Column0(0, 1) Squ_Column1(1, 1) Squ_Column0(2, 2) Squ_Column1(3, 2) Squ_Column0(4, 3) Squ_Column1(5, 3) Squ_Column0(6, 4) Squ_Column1(7, 4) Squ_Column0(8, 5) Squ_Column1(9, 5) Squ_Column0(10, 6) Squ_Column1(11, 6) Squ_Column0(12, 7) Squ_Column1(13, 7) Squ_Column0(14, 8) Squ_Column1(15, 7) Squ_Column0(16, 7) Squ_Column1(17, 6) Squ_Column0(18, 6) Squ_Column1(19, 5) Squ_Column0(20, 5) Squ_Column1(21, 4) Squ_Column0(22, 4) Squ_Column1(23, 3) Squ_Column0(24, 3) Squ_Column1(25, 2) Squ_Column0(26, 2) Squ_Column1(27, 1) Squ_Column0(28, 1) Squ_End(16) }
void SSE2_Multiply4(word *C, const word *A, const word *B) { Mul_Begin(2) #ifndef __GNUC__
ASJ( jmp, 0, f) Mul_Acc(2) AS1( ret) ASL(0) #endif
Mul_Column0(0, 2) Mul_End(2) }
void SSE2_Multiply8(word *C, const word *A, const word *B) { Mul_Begin(4) #ifndef __GNUC__
ASJ( jmp, 0, f) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0) #endif
Mul_Column0(0, 2) Mul_Column1(1, 3) Mul_Column0(2, 4) Mul_Column1(3, 3) Mul_Column0(4, 2) Mul_End(4) }
void SSE2_Multiply16(word *C, const word *A, const word *B) { Mul_Begin(8) #ifndef __GNUC__
ASJ( jmp, 0, f) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0) #endif
Mul_Column0(0, 2) Mul_Column1(1, 3) Mul_Column0(2, 4) Mul_Column1(3, 5) Mul_Column0(4, 6) Mul_Column1(5, 7) Mul_Column0(6, 8) Mul_Column1(7, 7) Mul_Column0(8, 6) Mul_Column1(9, 5) Mul_Column0(10, 4) Mul_Column1(11, 3) Mul_Column0(12, 2) Mul_End(8) }
void SSE2_Multiply32(word *C, const word *A, const word *B) { Mul_Begin(16) ASJ( jmp, 0, f) Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0) Mul_Column0(0, 2) Mul_Column1(1, 3) Mul_Column0(2, 4) Mul_Column1(3, 5) Mul_Column0(4, 6) Mul_Column1(5, 7) Mul_Column0(6, 8) Mul_Column1(7, 9) Mul_Column0(8, 10) Mul_Column1(9, 11) Mul_Column0(10, 12) Mul_Column1(11, 13) Mul_Column0(12, 14) Mul_Column1(13, 15) Mul_Column0(14, 16) Mul_Column1(15, 15) Mul_Column0(16, 14) Mul_Column1(17, 13) Mul_Column0(18, 12) Mul_Column1(19, 11) Mul_Column0(20, 10) Mul_Column1(21, 9) Mul_Column0(22, 8) Mul_Column1(23, 7) Mul_Column0(24, 6) Mul_Column1(25, 5) Mul_Column0(26, 4) Mul_Column1(27, 3) Mul_Column0(28, 2) Mul_End(16) }
void SSE2_MultiplyBottom4(word *C, const word *A, const word *B) { Mul_Begin(2) Bot_SaveAcc(0) Bot_Acc(2) Bot_End(2) }
void SSE2_MultiplyBottom8(word *C, const word *A, const word *B) { Mul_Begin(4) #ifndef __GNUC__
ASJ( jmp, 0, f) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0) #endif
Mul_Column0(0, 2) Mul_Column1(1, 3) Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) Bot_End(4) }
void SSE2_MultiplyBottom16(word *C, const word *A, const word *B) { Mul_Begin(8) #ifndef __GNUC__
ASJ( jmp, 0, f) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0) #endif
Mul_Column0(0, 2) Mul_Column1(1, 3) Mul_Column0(2, 4) Mul_Column1(3, 5) Mul_Column0(4, 6) Mul_Column1(5, 7) Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) Bot_End(8) }
void SSE2_MultiplyBottom32(word *C, const word *A, const word *B) { Mul_Begin(16) #ifndef __GNUC__
ASJ( jmp, 0, f) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0) #endif
Mul_Column0(0, 2) Mul_Column1(1, 3) Mul_Column0(2, 4) Mul_Column1(3, 5) Mul_Column0(4, 6) Mul_Column1(5, 7) Mul_Column0(6, 8) Mul_Column1(7, 9) Mul_Column0(8, 10) Mul_Column1(9, 11) Mul_Column0(10, 12) Mul_Column1(11, 13) Mul_Column0(12, 14) Mul_Column1(13, 15) Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2) Bot_End(16) }
void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L) { Top_Begin(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) #ifndef __GNUC__
ASJ( jmp, 0, f) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0) #endif
Top_Column0(4) Top_Column1(3) Mul_Column0(0, 2) Top_End(2) }
void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L) { Top_Begin(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) #ifndef __GNUC__
ASJ( jmp, 0, f) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0) #endif
Top_Column0(8) Top_Column1(7) Mul_Column0(0, 6) Mul_Column1(1, 5) Mul_Column0(2, 4) Mul_Column1(3, 3) Mul_Column0(4, 2) Top_End(4) }
void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L) { Top_Begin(16) Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1) #ifndef __GNUC__
ASJ( jmp, 0, f) Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0) #endif
Top_Column0(16) Top_Column1(15) Mul_Column0(0, 14) Mul_Column1(1, 13) Mul_Column0(2, 12) Mul_Column1(3, 11) Mul_Column0(4, 10) Mul_Column1(5, 9) Mul_Column0(6, 8) Mul_Column1(7, 7) Mul_Column0(8, 6) Mul_Column1(9, 5) Mul_Column0(10, 4) Mul_Column1(11, 3) Mul_Column0(12, 2) Top_End(8) }
#endif // #if CRYPTOPP_INTEGER_SSE2
// ********************************************************
typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B); typedef void (* PMul)(word *C, const word *A, const word *B); typedef void (* PSqu)(word *C, const word *A); typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
#if CRYPTOPP_INTEGER_SSE2
static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub; static size_t s_recursionLimit = 8; #else
static const size_t s_recursionLimit = 16; #endif
static PMul s_pMul[9], s_pBot[9]; static PSqu s_pSqu[9]; static PMulTop s_pTop[9];
static void SetFunctionPointers() { s_pMul[0] = &Baseline_Multiply2; s_pBot[0] = &Baseline_MultiplyBottom2; s_pSqu[0] = &Baseline_Square2; s_pTop[0] = &Baseline_MultiplyTop2; s_pTop[1] = &Baseline_MultiplyTop4;
#if CRYPTOPP_INTEGER_SSE2
if (HasSSE2()) { #if _MSC_VER != 1200 || defined(NDEBUG)
if (IsP4()) { s_pAdd = &SSE2_Add; s_pSub = &SSE2_Sub; } #endif
s_recursionLimit = 32;
s_pMul[1] = &SSE2_Multiply4; s_pMul[2] = &SSE2_Multiply8; s_pMul[4] = &SSE2_Multiply16; s_pMul[8] = &SSE2_Multiply32;
s_pBot[1] = &SSE2_MultiplyBottom4; s_pBot[2] = &SSE2_MultiplyBottom8; s_pBot[4] = &SSE2_MultiplyBottom16; s_pBot[8] = &SSE2_MultiplyBottom32;
s_pSqu[1] = &SSE2_Square4; s_pSqu[2] = &SSE2_Square8; s_pSqu[4] = &SSE2_Square16; s_pSqu[8] = &SSE2_Square32;
s_pTop[2] = &SSE2_MultiplyTop8; s_pTop[4] = &SSE2_MultiplyTop16; s_pTop[8] = &SSE2_MultiplyTop32; } else #endif
{ s_pMul[1] = &Baseline_Multiply4; s_pMul[2] = &Baseline_Multiply8;
s_pBot[1] = &Baseline_MultiplyBottom4; s_pBot[2] = &Baseline_MultiplyBottom8;
s_pSqu[1] = &Baseline_Square4; s_pSqu[2] = &Baseline_Square8;
s_pTop[2] = &Baseline_MultiplyTop8;
#if !CRYPTOPP_INTEGER_SSE2
s_pMul[4] = &Baseline_Multiply16; s_pBot[4] = &Baseline_MultiplyBottom16; s_pSqu[4] = &Baseline_Square16; s_pTop[4] = &Baseline_MultiplyTop16; #endif
} }
inline int Add(word *C, const word *A, const word *B, size_t N) { #if CRYPTOPP_INTEGER_SSE2
return s_pAdd(N, C, A, B); #else
return Baseline_Add(N, C, A, B); #endif
}
inline int Subtract(word *C, const word *A, const word *B, size_t N) { #if CRYPTOPP_INTEGER_SSE2
return s_pSub(N, C, A, B); #else
return Baseline_Sub(N, C, A, B); #endif
}
// ********************************************************
#define A0 A
#define A1 (A+N2)
#define B0 B
#define B1 (B+N2)
#define T0 T
#define T1 (T+N2)
#define T2 (T+N)
#define T3 (T+N+N2)
#define R0 R
#define R1 (R+N2)
#define R2 (R+N)
#define R3 (R+N+N2)
// R[2*N] - result = A*B
// T[2*N] - temporary work space
// A[N] --- multiplier
// B[N] --- multiplicant
void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N) { assert(N>=2 && N%2==0);
if (N <= s_recursionLimit) s_pMul[N/4](R, A, B); else { const size_t N2 = N/2;
size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
RecursiveMultiply(R2, T2, A1, B1, N2); RecursiveMultiply(T0, T2, R0, R1, N2); RecursiveMultiply(R0, T2, A0, B0, N2);
// now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
int c2 = Add(R2, R2, R1, N2); int c3 = c2; c2 += Add(R1, R2, R0, N2); c3 += Add(R2, R2, R3, N2);
if (AN2 == BN2) c3 -= Subtract(R1, R1, T0, N); else c3 += Add(R1, R1, T0, N);
c3 += Increment(R2, N2, c2); assert (c3 >= 0 && c3 <= 2); Increment(R3, N2, c3); } }
// R[2*N] - result = A*A
// T[2*N] - temporary work space
// A[N] --- number to be squared
void RecursiveSquare(word *R, word *T, const word *A, size_t N) { assert(N && N%2==0);
if (N <= s_recursionLimit) s_pSqu[N/4](R, A); else { const size_t N2 = N/2;
RecursiveSquare(R0, T2, A0, N2); RecursiveSquare(R2, T2, A1, N2); RecursiveMultiply(T0, T2, A0, A1, N2);
int carry = Add(R1, R1, T0, N); carry += Add(R1, R1, T0, N); Increment(R3, N2, carry); } }
// R[N] - bottom half of A*B
// T[3*N/2] - temporary work space
// A[N] - multiplier
// B[N] - multiplicant
void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N) { assert(N>=2 && N%2==0);
if (N <= s_recursionLimit) s_pBot[N/4](R, A, B); else { const size_t N2 = N/2;
RecursiveMultiply(R, T, A0, B0, N2); RecursiveMultiplyBottom(T0, T1, A1, B0, N2); Add(R1, R1, T0, N2); RecursiveMultiplyBottom(T0, T1, A0, B1, N2); Add(R1, R1, T0, N2); } }
// R[N] --- upper half of A*B
// T[2*N] - temporary work space
// L[N] --- lower half of A*B
// A[N] --- multiplier
// B[N] --- multiplicant
void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N) { assert(N>=2 && N%2==0);
if (N <= s_recursionLimit) s_pTop[N/4](R, A, B, L[N-1]); else { const size_t N2 = N/2;
size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
RecursiveMultiply(T0, T2, R0, R1, N2); RecursiveMultiply(R0, T2, A1, B1, N2);
// now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
int t, c3; int c2 = Subtract(T2, L+N2, L, N2);
if (AN2 == BN2) { c2 -= Add(T2, T2, T0, N2); t = (Compare(T2, R0, N2) == -1); c3 = t - Subtract(T2, T2, T1, N2); } else { c2 += Subtract(T2, T2, T0, N2); t = (Compare(T2, R0, N2) == -1); c3 = t + Add(T2, T2, T1, N2); }
c2 += t; if (c2 >= 0) c3 += Increment(T2, N2, c2); else c3 -= Decrement(T2, N2, -c2); c3 += Add(R0, T2, R1, N2);
assert (c3 >= 0 && c3 <= 2); Increment(R1, N2, c3); } }
inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N) { RecursiveMultiply(R, T, A, B, N); }
inline void Square(word *R, word *T, const word *A, size_t N) { RecursiveSquare(R, T, A, N); }
inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N) { RecursiveMultiplyBottom(R, T, A, B, N); }
// R[NA+NB] - result = A*B
// T[NA+NB] - temporary work space
// A[NA] ---- multiplier
// B[NB] ---- multiplicant
void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB) { if (NA == NB) { if (A == B) Square(R, T, A, NA); else Multiply(R, T, A, B, NA);
return; }
if (NA > NB) { std::swap(A, B); std::swap(NA, NB); }
assert(NB % NA == 0);
if (NA==2 && !A[1]) { switch (A[0]) { case 0: SetWords(R, 0, NB+2); return; case 1: CopyWords(R, B, NB); R[NB] = R[NB+1] = 0; return; default: R[NB] = LinearMultiply(R, B, A[0], NB); R[NB+1] = 0; return; } }
size_t i; if ((NB/NA)%2 == 0) { Multiply(R, T, A, B, NA); CopyWords(T+2*NA, R+NA, NA);
for (i=2*NA; i<NB; i+=2*NA) Multiply(T+NA+i, T, A, B+i, NA); for (i=NA; i<NB; i+=2*NA) Multiply(R+i, T, A, B+i, NA); } else { for (i=0; i<NB; i+=2*NA) Multiply(R+i, T, A, B+i, NA); for (i=NA; i<NB; i+=2*NA) Multiply(T+NA+i, T, A, B+i, NA); }
if (Add(R+NA, R+NA, T+2*NA, NB-NA)) Increment(R+NB, NA); }
// R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
// T[3*N/2] - temporary work space
// A[N] ----- an odd number as input
void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N) { if (N==2) { T[0] = AtomicInverseModPower2(A[0]); T[1] = 0; s_pBot[0](T+2, T, A); TwosComplement(T+2, 2); Increment(T+2, 2, 2); s_pBot[0](R, T, T+2); } else { const size_t N2 = N/2; RecursiveInverseModPower2(R0, T0, A0, N2); T0[0] = 1; SetWords(T0+1, 0, N2-1); MultiplyTop(R1, T1, T0, R0, A0, N2); MultiplyBottom(T0, T1, R0, A1, N2); Add(T0, R1, T0, N2); TwosComplement(T0, N2); MultiplyBottom(R1, T1, R0, T0, N2); } }
// R[N] --- result = X/(2**(WORD_BITS*N)) mod M
// T[3*N] - temporary work space
// X[2*N] - number to be reduced
// M[N] --- modulus
// U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N) { #if 1
MultiplyBottom(R, T, X, U, N); MultiplyTop(T, T+N, X, R, M, N); word borrow = Subtract(T, X+N, T, N); // defend against timing attack by doing this Add even when not needed
word carry = Add(T+N, T, M, N); assert(carry | !borrow); CopyWords(R, T + ((0-borrow) & N), N); #elif 0
const word u = 0-U[0]; Declare2Words(p) for (size_t i=0; i<N; i++) { const word t = u * X[i]; word c = 0; for (size_t j=0; j<N; j+=2) { MultiplyWords(p, t, M[j]); Acc2WordsBy1(p, X[i+j]); Acc2WordsBy1(p, c); X[i+j] = LowWord(p); c = HighWord(p); MultiplyWords(p, t, M[j+1]); Acc2WordsBy1(p, X[i+j+1]); Acc2WordsBy1(p, c); X[i+j+1] = LowWord(p); c = HighWord(p); }
if (Increment(X+N+i, N-i, c)) while (!Subtract(X+N, X+N, M, N)) {} }
memcpy(R, X+N, N*WORD_SIZE); #else
__m64 u = _mm_cvtsi32_si64(0-U[0]), p; for (size_t i=0; i<N; i++) { __m64 t = _mm_cvtsi32_si64(X[i]); t = _mm_mul_su32(t, u); __m64 c = _mm_setzero_si64(); for (size_t j=0; j<N; j+=2) { p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j])); p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j])); c = _mm_add_si64(c, p); X[i+j] = _mm_cvtsi64_si32(c); c = _mm_srli_si64(c, 32); p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1])); p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1])); c = _mm_add_si64(c, p); X[i+j+1] = _mm_cvtsi64_si32(c); c = _mm_srli_si64(c, 32); }
if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c))) while (!Subtract(X+N, X+N, M, N)) {} }
memcpy(R, X+N, N*WORD_SIZE); _mm_empty(); #endif
}
// R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
// T[2*N] - temporary work space
// X[2*N] - number to be reduced
// M[N] --- modulus
// U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
// V[N] --- 2**(WORD_BITS*3*N/2) mod M
void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N) { assert(N%2==0 && N>=4);
#define M0 M
#define M1 (M+N2)
#define V0 V
#define V1 (V+N2)
#define X0 X
#define X1 (X+N2)
#define X2 (X+N)
#define X3 (X+N+N2)
const size_t N2 = N/2; Multiply(T0, T2, V0, X3, N2); int c2 = Add(T0, T0, X0, N); MultiplyBottom(T3, T2, T0, U, N2); MultiplyTop(T2, R, T0, T3, M0, N2); c2 -= Subtract(T2, T1, T2, N2); Multiply(T0, R, T3, M1, N2); c2 -= Subtract(T0, T2, T0, N2); int c3 = -(int)Subtract(T1, X2, T1, N2); Multiply(R0, T2, V1, X3, N2); c3 += Add(R, R, T, N);
if (c2>0) c3 += Increment(R1, N2); else if (c2<0) c3 -= Decrement(R1, N2, -c2);
assert(c3>=-1 && c3<=1); if (c3>0) Subtract(R, R, M, N); else if (c3<0) Add(R, R, M, N);
#undef M0
#undef M1
#undef V0
#undef V1
#undef X0
#undef X1
#undef X2
#undef X3
}
#undef A0
#undef A1
#undef B0
#undef B1
#undef T0
#undef T1
#undef T2
#undef T3
#undef R0
#undef R1
#undef R2
#undef R3
/*
// do a 3 word by 2 word divide, returns quotient and leaves remainder in A
static word SubatomicDivide(word *A, word B0, word B1) { // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
// estimate the quotient: do a 2 word by 1 word divide
word Q; if (B1+1 == 0) Q = A[2]; else Q = DWord(A[1], A[2]).DividedBy(B1+1);
// now subtract Q*B from A
DWord p = DWord::Multiply(B0, Q); DWord u = (DWord) A[0] - p.GetLowHalf(); A[0] = u.GetLowHalf(); u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q); A[1] = u.GetLowHalf(); A[2] += u.GetHighHalf();
// Q <= actual quotient, so fix it
while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0)) { u = (DWord) A[0] - B0; A[0] = u.GetLowHalf(); u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow(); A[1] = u.GetLowHalf(); A[2] += u.GetHighHalf(); Q++; assert(Q); // shouldn't overflow
}
return Q; }
// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
static inline void AtomicDivide(word *Q, const word *A, const word *B) { if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
{ Q[0] = A[2]; Q[1] = A[3]; } else { word T[4]; T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3]; Q[1] = SubatomicDivide(T+1, B[0], B[1]); Q[0] = SubatomicDivide(T, B[0], B[1]);
#ifndef NDEBUG
// multiply quotient and divisor and add remainder, make sure it equals dividend
assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); word P[4]; LowLevel::Multiply2(P, Q, B); Add(P, P, T, 4); assert(memcmp(P, A, 4*WORD_SIZE)==0); #endif
} } */
static inline void AtomicDivide(word *Q, const word *A, const word *B) { word T[4]; DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1])); Q[0] = q.GetLowHalf(); Q[1] = q.GetHighHalf();
#ifndef NDEBUG
if (B[0] || B[1]) { // multiply quotient and divisor and add remainder, make sure it equals dividend
assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0]))); word P[4]; s_pMul[0](P, Q, B); Add(P, P, T, 4); assert(memcmp(P, A, 4*WORD_SIZE)==0); } #endif
}
// for use by Divide(), corrects the underestimated quotient {Q1,Q0}
static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N) { assert(N && N%2==0);
AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
word borrow = Subtract(R, R, T, N+2); assert(!borrow && !R[N+1]);
while (R[N] || Compare(R, B, N) >= 0) { R[N] -= Subtract(R, R, B, N); Q[1] += (++Q[0]==0); assert(Q[0] || Q[1]); // no overflow
} }
// R[NB] -------- remainder = A%B
// Q[NA-NB+2] --- quotient = A/B
// T[NA+3*(NB+2)] - temp work space
// A[NA] -------- dividend
// B[NB] -------- divisor
void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB) { assert(NA && NB && NA%2==0 && NB%2==0); assert(B[NB-1] || B[NB-2]); assert(NB <= NA);
// set up temporary work space
word *const TA=T; word *const TB=T+NA+2; word *const TP=T+NA+2+NB;
// copy B into TB and normalize it so that TB has highest bit set to 1
unsigned shiftWords = (B[NB-1]==0); TB[0] = TB[NB-1] = 0; CopyWords(TB+shiftWords, B, NB-shiftWords); unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]); assert(shiftBits < WORD_BITS); ShiftWordsLeftByBits(TB, NB, shiftBits);
// copy A into TA and normalize it
TA[0] = TA[NA] = TA[NA+1] = 0; CopyWords(TA+shiftWords, A, NA); ShiftWordsLeftByBits(TA, NA+2, shiftBits);
if (TA[NA+1]==0 && TA[NA] <= 1) { Q[NA-NB+1] = Q[NA-NB] = 0; while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0) { TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB); ++Q[NA-NB]; } } else { NA+=2; assert(Compare(TA+NA-NB, TB, NB) < 0); }
word BT[2]; BT[0] = TB[NB-2] + 1; BT[1] = TB[NB-1] + (BT[0]==0);
// start reducing TA mod TB, 2 words at a time
for (size_t i=NA-2; i>=NB; i-=2) { AtomicDivide(Q+i-NB, TA+i-2, BT); CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB); }
// copy TA into R, and denormalize it
CopyWords(R, TA+shiftWords, NB); ShiftWordsRightByBits(R, NB, shiftBits); }
static inline size_t EvenWordCount(const word *X, size_t N) { while (N && X[N-2]==0 && X[N-1]==0) N-=2; return N; }
// return k
// R[N] --- result = A^(-1) * 2^k mod M
// T[4*N] - temporary work space
// A[NA] -- number to take inverse of
// M[N] --- modulus
unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N) { assert(NA<=N && N && N%2==0);
word *b = T; word *c = T+N; word *f = T+2*N; word *g = T+3*N; size_t bcLen=2, fgLen=EvenWordCount(M, N); unsigned int k=0; bool s=false;
SetWords(T, 0, 3*N); b[0]=1; CopyWords(f, A, NA); CopyWords(g, M, N);
while (1) { word t=f[0]; while (!t) { if (EvenWordCount(f, fgLen)==0) { SetWords(R, 0, N); return 0; }
ShiftWordsRightByWords(f, fgLen, 1); bcLen += 2 * (c[bcLen-1] != 0); assert(bcLen <= N); ShiftWordsLeftByWords(c, bcLen, 1); k+=WORD_BITS; t=f[0]; }
unsigned int i = TrailingZeros(t); t >>= i; k += i;
if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0) { if (s) Subtract(R, M, b, N); else CopyWords(R, b, N); return k; }
ShiftWordsRightByBits(f, fgLen, i); t = ShiftWordsLeftByBits(c, bcLen, i); c[bcLen] += t; bcLen += 2 * (t!=0); assert(bcLen <= N);
bool swap = Compare(f, g, fgLen)==-1; ConditionalSwapPointers(swap, f, g); ConditionalSwapPointers(swap, b, c); s ^= swap;
fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
Subtract(f, f, g, fgLen); t = Add(b, b, c, bcLen); b[bcLen] += t; bcLen += 2*t; assert(bcLen <= N); } }
// R[N] - result = A/(2^k) mod M
// A[N] - input
// M[N] - modulus
void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N) { CopyWords(R, A, N);
while (k--) { if (R[0]%2==0) ShiftWordsRightByBits(R, N, 1); else { word carry = Add(R, R, M, N); ShiftWordsRightByBits(R, N, 1); R[N-1] += carry<<(WORD_BITS-1); } } }
// R[N] - result = A*(2^k) mod M
// A[N] - input
// M[N] - modulus
void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N) { CopyWords(R, A, N);
while (k--) if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0) Subtract(R, R, M, N); }
// ******************************************************************
InitializeInteger::InitializeInteger() { if (!g_pAssignIntToInteger) { SetFunctionPointers(); g_pAssignIntToInteger = AssignIntToInteger; } }
static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
static inline size_t RoundupSize(size_t n) { if (n<=8) return RoundupSizeTable[n]; else if (n<=16) return 16; else if (n<=32) return 32; else if (n<=64) return 64; else return size_t(1) << BitPrecision(n-1); }
Integer::Integer() : reg(2), sign(POSITIVE) { reg[0] = reg[1] = 0; }
Integer::Integer(const Integer& t) : reg(RoundupSize(t.WordCount())), sign(t.sign) { CopyWords(reg, t.reg, reg.size()); }
Integer::Integer(Sign s, lword value) : reg(2), sign(s) { reg[0] = word(value); reg[1] = word(SafeRightShift<WORD_BITS>(value)); }
Integer::Integer(signed long value) : reg(2) { if (value >= 0) sign = POSITIVE; else { sign = NEGATIVE; value = -value; } reg[0] = word(value); reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value)); }
Integer::Integer(Sign s, word high, word low) : reg(2), sign(s) { reg[0] = low; reg[1] = high; }
bool Integer::IsConvertableToLong() const { if (ByteCount() > sizeof(long)) return false;
unsigned long value = (unsigned long)reg[0]; value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
if (sign==POSITIVE) return (signed long)value >= 0; else return -(signed long)value < 0; }
signed long Integer::ConvertToLong() const { assert(IsConvertableToLong());
unsigned long value = (unsigned long)reg[0]; value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]); return sign==POSITIVE ? value : -(signed long)value; }
Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s) { Decode(encodedInteger, byteCount, s); }
Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s) { Decode(encodedInteger, byteCount, s); }
Integer::Integer(BufferedTransformation &bt) { BERDecode(bt); }
Integer::Integer(RandomNumberGenerator &rng, size_t bitcount) { Randomize(rng, bitcount); }
Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) { if (!Randomize(rng, min, max, rnType, equiv, mod)) throw Integer::RandomNumberNotFound(); }
Integer Integer::Power2(size_t e) { Integer r((word)0, BitsToWords(e+1)); r.SetBit(e); return r; }
template <long i> struct NewInteger { Integer * operator()() const { return new Integer(i); } };
const Integer &Integer::Zero() { return Singleton<Integer>().Ref(); }
const Integer &Integer::One() { return Singleton<Integer, NewInteger<1> >().Ref(); }
const Integer &Integer::Two() { return Singleton<Integer, NewInteger<2> >().Ref(); }
bool Integer::operator!() const { return IsNegative() ? false : (reg[0]==0 && WordCount()==0); }
Integer& Integer::operator=(const Integer& t) { if (this != &t) { if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0) reg.New(RoundupSize(t.WordCount())); CopyWords(reg, t.reg, reg.size()); sign = t.sign; } return *this; }
bool Integer::GetBit(size_t n) const { if (n/WORD_BITS >= reg.size()) return 0; else return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1); }
void Integer::SetBit(size_t n, bool value) { if (value) { reg.CleanGrow(RoundupSize(BitsToWords(n+1))); reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS)); } else { if (n/WORD_BITS < reg.size()) reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS)); } }
byte Integer::GetByte(size_t n) const { if (n/WORD_SIZE >= reg.size()) return 0; else return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8)); }
void Integer::SetByte(size_t n, byte value) { reg.CleanGrow(RoundupSize(BytesToWords(n+1))); reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE)); reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE)); }
lword Integer::GetBits(size_t i, size_t n) const { lword v = 0; assert(n <= sizeof(v)*8); for (unsigned int j=0; j<n; j++) v |= lword(GetBit(i+j)) << j; return v; }
Integer Integer::operator-() const { Integer result(*this); result.Negate(); return result; }
Integer Integer::AbsoluteValue() const { Integer result(*this); result.sign = POSITIVE; return result; }
void Integer::swap(Integer &a) { reg.swap(a.reg); std::swap(sign, a.sign); }
Integer::Integer(word value, size_t length) : reg(RoundupSize(length)), sign(POSITIVE) { reg[0] = value; SetWords(reg+1, 0, reg.size()-1); }
template <class T> static Integer StringToInteger(const T *str) { int radix; // GCC workaround
// std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
unsigned int length; for (length = 0; str[length] != 0; length++) {}
Integer v;
if (length == 0) return v;
switch (str[length-1]) { case 'h': case 'H': radix=16; break; case 'o': case 'O': radix=8; break; case 'b': case 'B': radix=2; break; default: radix=10; }
if (length > 2 && str[0] == '0' && str[1] == 'x') radix = 16;
for (unsigned i=0; i<length; i++) { int digit;
if (str[i] >= '0' && str[i] <= '9') digit = str[i] - '0'; else if (str[i] >= 'A' && str[i] <= 'F') digit = str[i] - 'A' + 10; else if (str[i] >= 'a' && str[i] <= 'f') digit = str[i] - 'a' + 10; else digit = radix;
if (digit < radix) { v *= radix; v += digit; } }
if (str[0] == '-') v.Negate();
return v; }
Integer::Integer(const char *str) : reg(2), sign(POSITIVE) { *this = StringToInteger(str); }
Integer::Integer(const wchar_t *str) : reg(2), sign(POSITIVE) { *this = StringToInteger(str); }
unsigned int Integer::WordCount() const { return (unsigned int)CountWords(reg, reg.size()); }
unsigned int Integer::ByteCount() const { unsigned wordCount = WordCount(); if (wordCount) return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]); else return 0; }
unsigned int Integer::BitCount() const { unsigned wordCount = WordCount(); if (wordCount) return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]); else return 0; }
void Integer::Decode(const byte *input, size_t inputLen, Signedness s) { StringStore store(input, inputLen); Decode(store, inputLen, s); }
void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s) { assert(bt.MaxRetrievable() >= inputLen);
byte b; bt.Peek(b); sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff)) { bt.Skip(1); inputLen--; bt.Peek(b); }
reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
for (size_t i=inputLen; i > 0; i--) { bt.Get(b); reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8; }
if (sign == NEGATIVE) { for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++) reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8; TwosComplement(reg, reg.size()); } }
size_t Integer::MinEncodedSize(Signedness signedness) const { unsigned int outputLen = STDMAX(1U, ByteCount()); if (signedness == UNSIGNED) return outputLen; if (NotNegative() && (GetByte(outputLen-1) & 0x80)) outputLen++; if (IsNegative() && *this < -Power2(outputLen*8-1)) outputLen++; return outputLen; }
void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const { ArraySink sink(output, outputLen); Encode(sink, outputLen, signedness); }
void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const { if (signedness == UNSIGNED || NotNegative()) { for (size_t i=outputLen; i > 0; i--) bt.Put(GetByte(i-1)); } else { // take two's complement of *this
Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this; temp.Encode(bt, outputLen, UNSIGNED); } }
void Integer::DEREncode(BufferedTransformation &bt) const { DERGeneralEncoder enc(bt, INTEGER); Encode(enc, MinEncodedSize(SIGNED), SIGNED); enc.MessageEnd(); }
void Integer::BERDecode(const byte *input, size_t len) { StringStore store(input, len); BERDecode(store); }
void Integer::BERDecode(BufferedTransformation &bt) { BERGeneralDecoder dec(bt, INTEGER); if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength()) BERDecodeError(); Decode(dec, (size_t)dec.RemainingLength(), SIGNED); dec.MessageEnd(); }
void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const { DERGeneralEncoder enc(bt, OCTET_STRING); Encode(enc, length); enc.MessageEnd(); }
void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length) { BERGeneralDecoder dec(bt, OCTET_STRING); if (!dec.IsDefiniteLength() || dec.RemainingLength() != length) BERDecodeError(); Decode(dec, length); dec.MessageEnd(); }
size_t Integer::OpenPGPEncode(byte *output, size_t len) const { ArraySink sink(output, len); return OpenPGPEncode(sink); }
size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const { word16 bitCount = BitCount(); bt.PutWord16(bitCount); size_t byteCount = BitsToBytes(bitCount); Encode(bt, byteCount); return 2 + byteCount; }
void Integer::OpenPGPDecode(const byte *input, size_t len) { StringStore store(input, len); OpenPGPDecode(store); }
void Integer::OpenPGPDecode(BufferedTransformation &bt) { word16 bitCount; if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount)) throw OpenPGPDecodeErr(); Decode(bt, BitsToBytes(bitCount)); }
void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits) { const size_t nbytes = nbits/8 + 1; SecByteBlock buf(nbytes); rng.GenerateBlock(buf, nbytes); if (nbytes) buf[0] = (byte)Crop(buf[0], nbits % 8); Decode(buf, nbytes, UNSIGNED); }
void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max) { if (min > max) throw InvalidArgument("Integer: Min must be no greater than Max");
Integer range = max - min; const unsigned int nbits = range.BitCount();
do { Randomize(rng, nbits); } while (*this > range);
*this += min; }
bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod) { return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod)); }
class KDF2_RNG : public RandomNumberGenerator { public: KDF2_RNG(const byte *seed, size_t seedSize) : m_counter(0), m_counterAndSeed(seedSize + 4) { memcpy(m_counterAndSeed + 4, seed, seedSize); }
void GenerateBlock(byte *output, size_t size) { PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter); ++m_counter; P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0); }
private: word32 m_counter; SecByteBlock m_counterAndSeed; };
bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms) { Integer min = params.GetValueWithDefault("Min", Integer::Zero()); Integer max; if (!params.GetValue("Max", max)) { int bitLength; if (params.GetIntValue("BitLength", bitLength)) max = Integer::Power2(bitLength); else throw InvalidArgument("Integer: missing Max argument"); } if (min > max) throw InvalidArgument("Integer: Min must be no greater than Max");
Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero()); Integer mod = params.GetValueWithDefault("Mod", Integer::One());
if (equiv.IsNegative() || equiv >= mod) throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
member_ptr<KDF2_RNG> kdf2Rng; ConstByteArrayParameter seed; if (params.GetValue(Name::Seed(), seed)) { ByteQueue bq; DERSequenceEncoder seq(bq); min.DEREncode(seq); max.DEREncode(seq); equiv.DEREncode(seq); mod.DEREncode(seq); DEREncodeUnsigned(seq, rnType); DEREncodeOctetString(seq, seed.begin(), seed.size()); seq.MessageEnd();
SecByteBlock finalSeed((size_t)bq.MaxRetrievable()); bq.Get(finalSeed, finalSeed.size()); kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size())); } RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
switch (rnType) { case ANY: if (mod == One()) Randomize(rng, min, max); else { Integer min1 = min + (equiv-min)%mod; if (max < min1) return false; Randomize(rng, Zero(), (max - min1) / mod); *this *= mod; *this += min1; } return true;
case PRIME: { const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
int i; i = 0; while (1) { if (++i==16) { // check if there are any suitable primes in [min, max]
Integer first = min; if (FirstPrime(first, max, equiv, mod, pSelector)) { // if there is only one suitable prime, we're done
*this = first; if (!FirstPrime(first, max, equiv, mod, pSelector)) return true; } else return false; }
Randomize(rng, min, max); if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector)) return true; } }
default: throw InvalidArgument("Integer: invalid RandomNumberType argument"); } }
std::istream& operator>>(std::istream& in, Integer &a) { char c; unsigned int length = 0; SecBlock<char> str(length + 16);
std::ws(in);
do { in.read(&c, 1); str[length++] = c; if (length >= str.size()) str.Grow(length + 16); } while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
if (in.gcount()) in.putback(c); str[length-1] = '\0'; a = Integer(str);
return in; }
std::ostream& operator<<(std::ostream& out, const Integer &a) { // Get relevant conversion specifications from ostream.
long f = out.flags() & std::ios::basefield; // Get base digits.
int base, block; char suffix; switch(f) { case std::ios::oct : base = 8; block = 8; suffix = 'o'; break; case std::ios::hex : base = 16; block = 4; suffix = 'h'; break; default : base = 10; block = 3; suffix = '.'; }
Integer temp1=a, temp2; if (a.IsNegative()) { out << '-'; temp1.Negate(); }
if (!a) out << '0';
static const char upper[]="0123456789ABCDEF"; static const char lower[]="0123456789abcdef";
const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower; unsigned i=0; SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
while (!!temp1) { word digit; Integer::Divide(digit, temp2, temp1, base); s[i++]=vec[digit]; temp1.swap(temp2); }
while (i--) { out << s[i]; // if (i && !(i%block))
// out << ",";
} return out << suffix; }
Integer& Integer::operator++() { if (NotNegative()) { if (Increment(reg, reg.size())) { reg.CleanGrow(2*reg.size()); reg[reg.size()/2]=1; } } else { word borrow = Decrement(reg, reg.size()); assert(!borrow); if (WordCount()==0) *this = Zero(); } return *this; }
Integer& Integer::operator--() { if (IsNegative()) { if (Increment(reg, reg.size())) { reg.CleanGrow(2*reg.size()); reg[reg.size()/2]=1; } } else { if (Decrement(reg, reg.size())) *this = -One(); } return *this; }
void PositiveAdd(Integer &sum, const Integer &a, const Integer& b) { int carry; if (a.reg.size() == b.reg.size()) carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); else if (a.reg.size() > b.reg.size()) { carry = Add(sum.reg, a.reg, b.reg, b.reg.size()); CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size()); carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry); } else { carry = Add(sum.reg, a.reg, b.reg, a.reg.size()); CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size()); carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry); }
if (carry) { sum.reg.CleanGrow(2*sum.reg.size()); sum.reg[sum.reg.size()/2] = 1; } sum.sign = Integer::POSITIVE; }
void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b) { unsigned aSize = a.WordCount(); aSize += aSize%2; unsigned bSize = b.WordCount(); bSize += bSize%2;
if (aSize == bSize) { if (Compare(a.reg, b.reg, aSize) >= 0) { Subtract(diff.reg, a.reg, b.reg, aSize); diff.sign = Integer::POSITIVE; } else { Subtract(diff.reg, b.reg, a.reg, aSize); diff.sign = Integer::NEGATIVE; } } else if (aSize > bSize) { word borrow = Subtract(diff.reg, a.reg, b.reg, bSize); CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize); borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow); assert(!borrow); diff.sign = Integer::POSITIVE; } else { word borrow = Subtract(diff.reg, b.reg, a.reg, aSize); CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize); borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow); assert(!borrow); diff.sign = Integer::NEGATIVE; } }
// MSVC .NET 2003 workaround
template <class T> inline const T& STDMAX2(const T& a, const T& b) { return a < b ? b : a; }
Integer Integer::Plus(const Integer& b) const { Integer sum((word)0, STDMAX2(reg.size(), b.reg.size())); if (NotNegative()) { if (b.NotNegative()) PositiveAdd(sum, *this, b); else PositiveSubtract(sum, *this, b); } else { if (b.NotNegative()) PositiveSubtract(sum, b, *this); else { PositiveAdd(sum, *this, b); sum.sign = Integer::NEGATIVE; } } return sum; }
Integer& Integer::operator+=(const Integer& t) { reg.CleanGrow(t.reg.size()); if (NotNegative()) { if (t.NotNegative()) PositiveAdd(*this, *this, t); else PositiveSubtract(*this, *this, t); } else { if (t.NotNegative()) PositiveSubtract(*this, t, *this); else { PositiveAdd(*this, *this, t); sign = Integer::NEGATIVE; } } return *this; }
Integer Integer::Minus(const Integer& b) const { Integer diff((word)0, STDMAX2(reg.size(), b.reg.size())); if (NotNegative()) { if (b.NotNegative()) PositiveSubtract(diff, *this, b); else PositiveAdd(diff, *this, b); } else { if (b.NotNegative()) { PositiveAdd(diff, *this, b); diff.sign = Integer::NEGATIVE; } else PositiveSubtract(diff, b, *this); } return diff; }
Integer& Integer::operator-=(const Integer& t) { reg.CleanGrow(t.reg.size()); if (NotNegative()) { if (t.NotNegative()) PositiveSubtract(*this, *this, t); else PositiveAdd(*this, *this, t); } else { if (t.NotNegative()) { PositiveAdd(*this, *this, t); sign = Integer::NEGATIVE; } else PositiveSubtract(*this, t, *this); } return *this; }
Integer& Integer::operator<<=(size_t n) { const size_t wordCount = WordCount(); const size_t shiftWords = n / WORD_BITS; const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n))); ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords); ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits); return *this; }
Integer& Integer::operator>>=(size_t n) { const size_t wordCount = WordCount(); const size_t shiftWords = n / WORD_BITS; const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
ShiftWordsRightByWords(reg, wordCount, shiftWords); if (wordCount > shiftWords) ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits); if (IsNegative() && WordCount()==0) // avoid -0
*this = Zero(); return *this; }
void PositiveMultiply(Integer &product, const Integer &a, const Integer &b) { size_t aSize = RoundupSize(a.WordCount()); size_t bSize = RoundupSize(b.WordCount());
product.reg.CleanNew(RoundupSize(aSize+bSize)); product.sign = Integer::POSITIVE;
IntegerSecBlock workspace(aSize + bSize); AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize); }
void Multiply(Integer &product, const Integer &a, const Integer &b) { PositiveMultiply(product, a, b);
if (a.NotNegative() != b.NotNegative()) product.Negate(); }
Integer Integer::Times(const Integer &b) const { Integer product; Multiply(product, *this, b); return product; }
/*
void PositiveDivide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor) { remainder.reg.CleanNew(divisor.reg.size()); remainder.sign = Integer::POSITIVE; quotient.reg.New(0); quotient.sign = Integer::POSITIVE; unsigned i=dividend.BitCount(); while (i--) { word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1); remainder.reg[0] |= dividend[i]; if (overflow || remainder >= divisor) { Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size()); quotient.SetBit(i); } } } */
void PositiveDivide(Integer &remainder, Integer "ient, const Integer &a, const Integer &b) { unsigned aSize = a.WordCount(); unsigned bSize = b.WordCount();
if (!bSize) throw Integer::DivideByZero();
if (aSize < bSize) { remainder = a; remainder.sign = Integer::POSITIVE; quotient = Integer::Zero(); return; }
aSize += aSize%2; // round up to next even number
bSize += bSize%2;
remainder.reg.CleanNew(RoundupSize(bSize)); remainder.sign = Integer::POSITIVE; quotient.reg.CleanNew(RoundupSize(aSize-bSize+2)); quotient.sign = Integer::POSITIVE;
IntegerSecBlock T(aSize+3*(bSize+2)); Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize); }
void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor) { PositiveDivide(remainder, quotient, dividend, divisor);
if (dividend.IsNegative()) { quotient.Negate(); if (remainder.NotZero()) { --quotient; remainder = divisor.AbsoluteValue() - remainder; } }
if (divisor.IsNegative()) quotient.Negate(); }
void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n) { q = a; q >>= n;
const size_t wordCount = BitsToWords(n); if (wordCount <= a.WordCount()) { r.reg.resize(RoundupSize(wordCount)); CopyWords(r.reg, a.reg, wordCount); SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount); if (n % WORD_BITS != 0) r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS)); } else { r.reg.resize(RoundupSize(a.WordCount())); CopyWords(r.reg, a.reg, r.reg.size()); } r.sign = POSITIVE;
if (a.IsNegative() && r.NotZero()) { --q; r = Power2(n) - r; } }
Integer Integer::DividedBy(const Integer &b) const { Integer remainder, quotient; Integer::Divide(remainder, quotient, *this, b); return quotient; }
Integer Integer::Modulo(const Integer &b) const { Integer remainder, quotient; Integer::Divide(remainder, quotient, *this, b); return remainder; }
void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor) { if (!divisor) throw Integer::DivideByZero();
assert(divisor);
if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
{ quotient = dividend >> (BitPrecision(divisor)-1); remainder = dividend.reg[0] & (divisor-1); return; }
unsigned int i = dividend.WordCount(); quotient.reg.CleanNew(RoundupSize(i)); remainder = 0; while (i--) { quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor; remainder = DWord(dividend.reg[i], remainder) % divisor; }
if (dividend.NotNegative()) quotient.sign = POSITIVE; else { quotient.sign = NEGATIVE; if (remainder) { --quotient; remainder = divisor - remainder; } } }
Integer Integer::DividedBy(word b) const { word remainder; Integer quotient; Integer::Divide(remainder, quotient, *this, b); return quotient; }
word Integer::Modulo(word divisor) const { if (!divisor) throw Integer::DivideByZero();
assert(divisor);
word remainder;
if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
remainder = reg[0] & (divisor-1); else { unsigned int i = WordCount();
if (divisor <= 5) { DWord sum(0, 0); while (i--) sum += reg[i]; remainder = sum % divisor; } else { remainder = 0; while (i--) remainder = DWord(reg[i], remainder) % divisor; } }
if (IsNegative() && remainder) remainder = divisor - remainder;
return remainder; }
void Integer::Negate() { if (!!(*this)) // don't flip sign if *this==0
sign = Sign(1-sign); }
int Integer::PositiveCompare(const Integer& t) const { unsigned size = WordCount(), tSize = t.WordCount();
if (size == tSize) return CryptoPP::Compare(reg, t.reg, size); else return size > tSize ? 1 : -1; }
int Integer::Compare(const Integer& t) const { if (NotNegative()) { if (t.NotNegative()) return PositiveCompare(t); else return 1; } else { if (t.NotNegative()) return -1; else return -PositiveCompare(t); } }
Integer Integer::SquareRoot() const { if (!IsPositive()) return Zero();
// overestimate square root
Integer x, y = Power2((BitCount()+1)/2); assert(y*y >= *this);
do { x = y; y = (x + *this/x) >> 1; } while (y<x);
return x; }
bool Integer::IsSquare() const { Integer r = SquareRoot(); return *this == r.Squared(); }
bool Integer::IsUnit() const { return (WordCount() == 1) && (reg[0] == 1); }
Integer Integer::MultiplicativeInverse() const { return IsUnit() ? *this : Zero(); }
Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m) { return x*y%m; }
Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m) { ModularArithmetic mr(m); return mr.Exponentiate(x, e); }
Integer Integer::Gcd(const Integer &a, const Integer &b) { return EuclideanDomainOf<Integer>().Gcd(a, b); }
Integer Integer::InverseMod(const Integer &m) const { assert(m.NotNegative());
if (IsNegative()) return Modulo(m).InverseMod(m);
if (m.IsEven()) { if (!m || IsEven()) return Zero(); // no inverse
if (*this == One()) return One();
Integer u = m.Modulo(*this).InverseMod(*this); return !u ? Zero() : (m*(*this-u)+1)/(*this); }
SecBlock<word> T(m.reg.size() * 4); Integer r((word)0, m.reg.size()); unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size()); DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size()); return r; }
word Integer::InverseMod(word mod) const { word g0 = mod, g1 = *this % mod; word v0 = 0, v1 = 1; word y;
while (g1) { if (g1 == 1) return v1; y = g0 / g1; g0 = g0 % g1; v0 += y * v1;
if (!g0) break; if (g0 == 1) return mod-v0; y = g1 / g0; g1 = g1 % g0; v1 += y * v0; } return 0; }
// ********************************************************
ModularArithmetic::ModularArithmetic(BufferedTransformation &bt) { BERSequenceDecoder seq(bt); OID oid(seq); if (oid != ASN1::prime_field()) BERDecodeError(); m_modulus.BERDecode(seq); seq.MessageEnd(); m_result.reg.resize(m_modulus.reg.size()); }
void ModularArithmetic::DEREncode(BufferedTransformation &bt) const { DERSequenceEncoder seq(bt); ASN1::prime_field().DEREncode(seq); m_modulus.DEREncode(seq); seq.MessageEnd(); }
void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const { a.DEREncodeAsOctetString(out, MaxElementByteLength()); }
void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const { a.BERDecodeAsOctetString(in, MaxElementByteLength()); }
const Integer& ModularArithmetic::Half(const Integer &a) const { if (a.reg.size()==m_modulus.reg.size()) { CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size()); return m_result; } else return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1)); }
const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const { if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) { if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size()) || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0) { CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size()); } return m_result; } else { m_result1 = a+b; if (m_result1 >= m_modulus) m_result1 -= m_modulus; return m_result1; } }
Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const { if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) { if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size()) || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0) { CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size()); } } else { a+=b; if (a>=m_modulus) a-=m_modulus; }
return a; }
const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const { if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) { if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size())) CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size()); return m_result; } else { m_result1 = a-b; if (m_result1.IsNegative()) m_result1 += m_modulus; return m_result1; } }
Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const { if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size()) { if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size())) CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size()); } else { a-=b; if (a.IsNegative()) a+=m_modulus; }
return a; }
const Integer& ModularArithmetic::Inverse(const Integer &a) const { if (!a) return a;
CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size()); if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size())) Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
return m_result; }
Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const { if (m_modulus.IsOdd()) { MontgomeryRepresentation dr(m_modulus); return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2)); } else return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2); }
void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const { if (m_modulus.IsOdd()) { MontgomeryRepresentation dr(m_modulus); dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount); for (unsigned int i=0; i<exponentsCount; i++) results[i] = dr.ConvertOut(results[i]); } else AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount); }
MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd
: ModularArithmetic(m), m_u((word)0, m_modulus.reg.size()), m_workspace(5*m_modulus.reg.size()) { if (!m_modulus.IsOdd()) throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size()); }
const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const { word *const T = m_workspace.begin(); word *const R = m_result.reg.begin(); const size_t N = m_modulus.reg.size(); assert(a.reg.size()<=N && b.reg.size()<=N);
AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size()); SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size()); MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); return m_result; }
const Integer& MontgomeryRepresentation::Square(const Integer &a) const { word *const T = m_workspace.begin(); word *const R = m_result.reg.begin(); const size_t N = m_modulus.reg.size(); assert(a.reg.size()<=N);
CryptoPP::Square(T, T+2*N, a.reg, a.reg.size()); SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size()); MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); return m_result; }
Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const { word *const T = m_workspace.begin(); word *const R = m_result.reg.begin(); const size_t N = m_modulus.reg.size(); assert(a.reg.size()<=N);
CopyWords(T, a.reg, a.reg.size()); SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); return m_result; }
const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const { // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
word *const T = m_workspace.begin(); word *const R = m_result.reg.begin(); const size_t N = m_modulus.reg.size(); assert(a.reg.size()<=N);
CopyWords(T, a.reg, a.reg.size()); SetWords(T+a.reg.size(), 0, 2*N-a.reg.size()); MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N); unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
// cout << "k=" << k << " N*32=" << 32*N << endl;
if (k>N*WORD_BITS) DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N); else MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
return m_result; }
NAMESPACE_END
#endif
|