Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4235 lines
104 KiB

  1. // integer.cpp - written and placed in the public domain by Wei Dai
  2. // contains public domain code contributed by Alister Lee and Leonard Janke
  3. #include "pch.h"
  4. #ifndef CRYPTOPP_IMPORTS
  5. #include "integer.h"
  6. #include "modarith.h"
  7. #include "nbtheory.h"
  8. #include "asn.h"
  9. #include "oids.h"
  10. #include "words.h"
  11. #include "algparam.h"
  12. #include "pubkey.h" // for P1363_KDF2
  13. #include "sha.h"
  14. #include "cpu.h"
  15. #include <iostream>
  16. #if _MSC_VER >= 1400
  17. #include <intrin.h>
  18. #endif
  19. #ifdef __DECCXX
  20. #include <c_asm.h>
  21. #endif
  22. #ifdef CRYPTOPP_MSVC6_NO_PP
  23. #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
  24. #endif
  25. #define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
  26. NAMESPACE_BEGIN(CryptoPP)
  27. bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
  28. {
  29. if (valueType != typeid(Integer))
  30. return false;
  31. *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
  32. return true;
  33. }
  34. inline static int Compare(const word *A, const word *B, size_t N)
  35. {
  36. while (N--)
  37. if (A[N] > B[N])
  38. return 1;
  39. else if (A[N] < B[N])
  40. return -1;
  41. return 0;
  42. }
  43. inline static int Increment(word *A, size_t N, word B=1)
  44. {
  45. assert(N);
  46. word t = A[0];
  47. A[0] = t+B;
  48. if (A[0] >= t)
  49. return 0;
  50. for (unsigned i=1; i<N; i++)
  51. if (++A[i])
  52. return 0;
  53. return 1;
  54. }
  55. inline static int Decrement(word *A, size_t N, word B=1)
  56. {
  57. assert(N);
  58. word t = A[0];
  59. A[0] = t-B;
  60. if (A[0] <= t)
  61. return 0;
  62. for (unsigned i=1; i<N; i++)
  63. if (A[i]--)
  64. return 0;
  65. return 1;
  66. }
  67. static void TwosComplement(word *A, size_t N)
  68. {
  69. Decrement(A, N);
  70. for (unsigned i=0; i<N; i++)
  71. A[i] = ~A[i];
  72. }
  73. static word AtomicInverseModPower2(word A)
  74. {
  75. assert(A%2==1);
  76. word R=A%8;
  77. for (unsigned i=3; i<WORD_BITS; i*=2)
  78. R = R*(2-R*A);
  79. assert(R*A==1);
  80. return R;
  81. }
  82. // ********************************************************
  83. #if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) || (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
  84. #define Declare2Words(x) word x##0, x##1;
  85. #define AssignWord(a, b) a##0 = b; a##1 = 0;
  86. #define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
  87. #define LowWord(a) a##0
  88. #define HighWord(a) a##1
  89. #ifdef _MSC_VER
  90. #define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
  91. #ifndef __INTEL_COMPILER
  92. #define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
  93. #endif
  94. #elif defined(__DECCXX)
  95. #define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
  96. #elif defined(__x86_64__)
  97. #if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
  98. // Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
  99. #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
  100. #else
  101. #define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
  102. #define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
  103. #define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
  104. #define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
  105. #define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
  106. #define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
  107. #endif
  108. #endif
  109. #define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
  110. #ifndef Double3Words
  111. #define Double3Words(c, d) d##1 = 2*d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2*d##0 + (c>>(WORD_BITS-1)); c *= 2;
  112. #endif
  113. #ifndef Acc2WordsBy2
  114. #define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
  115. #endif
  116. #define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
  117. #define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
  118. #define GetCarry(u) u##1
  119. #define GetBorrow(u) u##1
  120. #else
  121. #define Declare2Words(x) dword x;
  122. #if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER)
  123. #define MultiplyWords(p, a, b) p = __emulu(a, b);
  124. #else
  125. #define MultiplyWords(p, a, b) p = (dword)a*b;
  126. #endif
  127. #define AssignWord(a, b) a = b;
  128. #define Add2WordsBy1(a, b, c) a = b + c;
  129. #define Acc2WordsBy2(a, b) a += b;
  130. #define LowWord(a) word(a)
  131. #define HighWord(a) word(a>>WORD_BITS)
  132. #define Double3Words(c, d) d = 2*d + (c>>(WORD_BITS-1)); c *= 2;
  133. #define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
  134. #define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
  135. #define GetCarry(u) HighWord(u)
  136. #define GetBorrow(u) word(u>>(WORD_BITS*2-1))
  137. #endif
  138. #ifndef MulAcc
  139. #define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
  140. #endif
  141. #ifndef Acc2WordsBy1
  142. #define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
  143. #endif
  144. #ifndef Acc3WordsBy2
  145. #define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
  146. #endif
  147. class DWord
  148. {
  149. public:
  150. DWord() {}
  151. #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
  152. explicit DWord(word low)
  153. {
  154. m_whole = low;
  155. }
  156. #else
  157. explicit DWord(word low)
  158. {
  159. m_halfs.low = low;
  160. m_halfs.high = 0;
  161. }
  162. #endif
  163. DWord(word low, word high)
  164. {
  165. m_halfs.low = low;
  166. m_halfs.high = high;
  167. }
  168. static DWord Multiply(word a, word b)
  169. {
  170. DWord r;
  171. #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
  172. r.m_whole = (dword)a * b;
  173. #elif defined(MultiplyWordsLoHi)
  174. MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
  175. #endif
  176. return r;
  177. }
  178. static DWord MultiplyAndAdd(word a, word b, word c)
  179. {
  180. DWord r = Multiply(a, b);
  181. return r += c;
  182. }
  183. DWord & operator+=(word a)
  184. {
  185. #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
  186. m_whole = m_whole + a;
  187. #else
  188. m_halfs.low += a;
  189. m_halfs.high += (m_halfs.low < a);
  190. #endif
  191. return *this;
  192. }
  193. DWord operator+(word a)
  194. {
  195. DWord r;
  196. #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
  197. r.m_whole = m_whole + a;
  198. #else
  199. r.m_halfs.low = m_halfs.low + a;
  200. r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
  201. #endif
  202. return r;
  203. }
  204. DWord operator-(DWord a)
  205. {
  206. DWord r;
  207. #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
  208. r.m_whole = m_whole - a.m_whole;
  209. #else
  210. r.m_halfs.low = m_halfs.low - a.m_halfs.low;
  211. r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
  212. #endif
  213. return r;
  214. }
  215. DWord operator-(word a)
  216. {
  217. DWord r;
  218. #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
  219. r.m_whole = m_whole - a;
  220. #else
  221. r.m_halfs.low = m_halfs.low - a;
  222. r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
  223. #endif
  224. return r;
  225. }
  226. // returns quotient, which must fit in a word
  227. word operator/(word divisor);
  228. word operator%(word a);
  229. bool operator!() const
  230. {
  231. #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
  232. return !m_whole;
  233. #else
  234. return !m_halfs.high && !m_halfs.low;
  235. #endif
  236. }
  237. word GetLowHalf() const {return m_halfs.low;}
  238. word GetHighHalf() const {return m_halfs.high;}
  239. word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
  240. private:
  241. union
  242. {
  243. #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
  244. dword m_whole;
  245. #endif
  246. struct
  247. {
  248. #ifdef IS_LITTLE_ENDIAN
  249. word low;
  250. word high;
  251. #else
  252. word high;
  253. word low;
  254. #endif
  255. } m_halfs;
  256. };
  257. };
  258. class Word
  259. {
  260. public:
  261. Word() {}
  262. Word(word value)
  263. {
  264. m_whole = value;
  265. }
  266. Word(hword low, hword high)
  267. {
  268. m_whole = low | (word(high) << (WORD_BITS/2));
  269. }
  270. static Word Multiply(hword a, hword b)
  271. {
  272. Word r;
  273. r.m_whole = (word)a * b;
  274. return r;
  275. }
  276. Word operator-(Word a)
  277. {
  278. Word r;
  279. r.m_whole = m_whole - a.m_whole;
  280. return r;
  281. }
  282. Word operator-(hword a)
  283. {
  284. Word r;
  285. r.m_whole = m_whole - a;
  286. return r;
  287. }
  288. // returns quotient, which must fit in a word
  289. hword operator/(hword divisor)
  290. {
  291. return hword(m_whole / divisor);
  292. }
  293. bool operator!() const
  294. {
  295. return !m_whole;
  296. }
  297. word GetWhole() const {return m_whole;}
  298. hword GetLowHalf() const {return hword(m_whole);}
  299. hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
  300. hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
  301. private:
  302. word m_whole;
  303. };
  304. // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
  305. template <class S, class D>
  306. S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
  307. {
  308. // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
  309. assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
  310. // estimate the quotient: do a 2 S by 1 S divide
  311. S Q;
  312. if (S(B1+1) == 0)
  313. Q = A[2];
  314. else if (B1 > 0)
  315. Q = D(A[1], A[2]) / S(B1+1);
  316. else
  317. Q = D(A[0], A[1]) / B0;
  318. // now subtract Q*B from A
  319. D p = D::Multiply(B0, Q);
  320. D u = (D) A[0] - p.GetLowHalf();
  321. A[0] = u.GetLowHalf();
  322. u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
  323. A[1] = u.GetLowHalf();
  324. A[2] += u.GetHighHalf();
  325. // Q <= actual quotient, so fix it
  326. while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
  327. {
  328. u = (D) A[0] - B0;
  329. A[0] = u.GetLowHalf();
  330. u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
  331. A[1] = u.GetLowHalf();
  332. A[2] += u.GetHighHalf();
  333. Q++;
  334. assert(Q); // shouldn't overflow
  335. }
  336. return Q;
  337. }
  338. // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
  339. template <class S, class D>
  340. inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
  341. {
  342. if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
  343. return D(Ah.GetLowHalf(), Ah.GetHighHalf());
  344. else
  345. {
  346. S Q[2];
  347. T[0] = Al.GetLowHalf();
  348. T[1] = Al.GetHighHalf();
  349. T[2] = Ah.GetLowHalf();
  350. T[3] = Ah.GetHighHalf();
  351. Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
  352. Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
  353. return D(Q[0], Q[1]);
  354. }
  355. }
  356. // returns quotient, which must fit in a word
  357. inline word DWord::operator/(word a)
  358. {
  359. #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
  360. return word(m_whole / a);
  361. #else
  362. hword r[4];
  363. return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
  364. #endif
  365. }
  366. inline word DWord::operator%(word a)
  367. {
  368. #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
  369. return word(m_whole % a);
  370. #else
  371. if (a < (word(1) << (WORD_BITS/2)))
  372. {
  373. hword h = hword(a);
  374. word r = m_halfs.high % h;
  375. r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
  376. return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
  377. }
  378. else
  379. {
  380. hword r[4];
  381. DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
  382. return Word(r[0], r[1]).GetWhole();
  383. }
  384. #endif
  385. }
  386. // ********************************************************
  387. // use some tricks to share assembly code between MSVC and GCC
  388. #if defined(__GNUC__)
  389. #define AddPrologue \
  390. int result; \
  391. __asm__ __volatile__ \
  392. ( \
  393. ".intel_syntax noprefix;"
  394. #define AddEpilogue \
  395. ".att_syntax prefix;" \
  396. : "=a" (result)\
  397. : "d" (C), "a" (A), "D" (B), "c" (N) \
  398. : "%esi", "memory", "cc" \
  399. );\
  400. return result;
  401. #define MulPrologue \
  402. __asm__ __volatile__ \
  403. ( \
  404. ".intel_syntax noprefix;" \
  405. AS1( push ebx) \
  406. AS2( mov ebx, edx)
  407. #define MulEpilogue \
  408. AS1( pop ebx) \
  409. ".att_syntax prefix;" \
  410. : \
  411. : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
  412. : "%esi", "memory", "cc" \
  413. );
  414. #define SquPrologue MulPrologue
  415. #define SquEpilogue \
  416. AS1( pop ebx) \
  417. ".att_syntax prefix;" \
  418. : \
  419. : "d" (s_maskLow16), "c" (C), "a" (A) \
  420. : "%esi", "%edi", "memory", "cc" \
  421. );
  422. #define TopPrologue MulPrologue
  423. #define TopEpilogue \
  424. AS1( pop ebx) \
  425. ".att_syntax prefix;" \
  426. : \
  427. : "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
  428. : "memory", "cc" \
  429. );
  430. #else
  431. #define AddPrologue \
  432. __asm push edi \
  433. __asm push esi \
  434. __asm mov eax, [esp+12] \
  435. __asm mov edi, [esp+16]
  436. #define AddEpilogue \
  437. __asm pop esi \
  438. __asm pop edi \
  439. __asm ret 8
  440. #if _MSC_VER < 1300
  441. #define SaveEBX __asm push ebx
  442. #define RestoreEBX __asm pop ebx
  443. #else
  444. #define SaveEBX
  445. #define RestoreEBX
  446. #endif
  447. #define SquPrologue \
  448. AS2( mov eax, A) \
  449. AS2( mov ecx, C) \
  450. SaveEBX \
  451. AS2( lea ebx, s_maskLow16)
  452. #define MulPrologue \
  453. AS2( mov eax, A) \
  454. AS2( mov edi, B) \
  455. AS2( mov ecx, C) \
  456. SaveEBX \
  457. AS2( lea ebx, s_maskLow16)
  458. #define TopPrologue \
  459. AS2( mov eax, A) \
  460. AS2( mov edi, B) \
  461. AS2( mov ecx, C) \
  462. AS2( mov esi, L) \
  463. SaveEBX \
  464. AS2( lea ebx, s_maskLow16)
  465. #define SquEpilogue RestoreEBX
  466. #define MulEpilogue RestoreEBX
  467. #define TopEpilogue RestoreEBX
  468. #endif
  469. #ifdef CRYPTOPP_X64_MASM_AVAILABLE
  470. extern "C" {
  471. int Baseline_Add(size_t N, word *C, const word *A, const word *B);
  472. int Baseline_Sub(size_t N, word *C, const word *A, const word *B);
  473. }
  474. #elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
  475. int Baseline_Add(size_t N, word *C, const word *A, const word *B)
  476. {
  477. word result;
  478. __asm__ __volatile__
  479. (
  480. ".intel_syntax;"
  481. AS1( neg %1)
  482. ASJ( jz, 1, f)
  483. AS2( mov %0,[%3+8*%1])
  484. AS2( add %0,[%4+8*%1])
  485. AS2( mov [%2+8*%1],%0)
  486. ASL(0)
  487. AS2( mov %0,[%3+8*%1+8])
  488. AS2( adc %0,[%4+8*%1+8])
  489. AS2( mov [%2+8*%1+8],%0)
  490. AS2( lea %1,[%1+2])
  491. ASJ( jrcxz, 1, f)
  492. AS2( mov %0,[%3+8*%1])
  493. AS2( adc %0,[%4+8*%1])
  494. AS2( mov [%2+8*%1],%0)
  495. ASJ( jmp, 0, b)
  496. ASL(1)
  497. AS2( mov %0, 0)
  498. AS2( adc %0, %0)
  499. ".att_syntax;"
  500. : "=&r" (result), "+c" (N)
  501. : "r" (C+N), "r" (A+N), "r" (B+N)
  502. : "memory", "cc"
  503. );
  504. return (int)result;
  505. }
  506. int Baseline_Sub(size_t N, word *C, const word *A, const word *B)
  507. {
  508. word result;
  509. __asm__ __volatile__
  510. (
  511. ".intel_syntax;"
  512. AS1( neg %1)
  513. ASJ( jz, 1, f)
  514. AS2( mov %0,[%3+8*%1])
  515. AS2( sub %0,[%4+8*%1])
  516. AS2( mov [%2+8*%1],%0)
  517. ASL(0)
  518. AS2( mov %0,[%3+8*%1+8])
  519. AS2( sbb %0,[%4+8*%1+8])
  520. AS2( mov [%2+8*%1+8],%0)
  521. AS2( lea %1,[%1+2])
  522. ASJ( jrcxz, 1, f)
  523. AS2( mov %0,[%3+8*%1])
  524. AS2( sbb %0,[%4+8*%1])
  525. AS2( mov [%2+8*%1],%0)
  526. ASJ( jmp, 0, b)
  527. ASL(1)
  528. AS2( mov %0, 0)
  529. AS2( adc %0, %0)
  530. ".att_syntax;"
  531. : "=&r" (result), "+c" (N)
  532. : "r" (C+N), "r" (A+N), "r" (B+N)
  533. : "memory", "cc"
  534. );
  535. return (int)result;
  536. }
  537. #elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
  538. CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
  539. {
  540. AddPrologue
  541. // now: eax = A, edi = B, edx = C, ecx = N
  542. AS2( lea eax, [eax+4*ecx])
  543. AS2( lea edi, [edi+4*ecx])
  544. AS2( lea edx, [edx+4*ecx])
  545. AS1( neg ecx) // ecx is negative index
  546. AS2( test ecx, 2) // this clears carry flag
  547. ASJ( jz, 0, f)
  548. AS2( sub ecx, 2)
  549. ASJ( jmp, 1, f)
  550. ASL(0)
  551. ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
  552. AS2( mov esi,[eax+4*ecx])
  553. AS2( adc esi,[edi+4*ecx])
  554. AS2( mov [edx+4*ecx],esi)
  555. AS2( mov esi,[eax+4*ecx+4])
  556. AS2( adc esi,[edi+4*ecx+4])
  557. AS2( mov [edx+4*ecx+4],esi)
  558. ASL(1)
  559. AS2( mov esi,[eax+4*ecx+8])
  560. AS2( adc esi,[edi+4*ecx+8])
  561. AS2( mov [edx+4*ecx+8],esi)
  562. AS2( mov esi,[eax+4*ecx+12])
  563. AS2( adc esi,[edi+4*ecx+12])
  564. AS2( mov [edx+4*ecx+12],esi)
  565. AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
  566. ASJ( jmp, 0, b)
  567. ASL(2)
  568. AS2( mov eax, 0)
  569. AS1( setc al) // store carry into eax (return result register)
  570. AddEpilogue
  571. }
  572. CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
  573. {
  574. AddPrologue
  575. // now: eax = A, edi = B, edx = C, ecx = N
  576. AS2( lea eax, [eax+4*ecx])
  577. AS2( lea edi, [edi+4*ecx])
  578. AS2( lea edx, [edx+4*ecx])
  579. AS1( neg ecx) // ecx is negative index
  580. AS2( test ecx, 2) // this clears carry flag
  581. ASJ( jz, 0, f)
  582. AS2( sub ecx, 2)
  583. ASJ( jmp, 1, f)
  584. ASL(0)
  585. ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
  586. AS2( mov esi,[eax+4*ecx])
  587. AS2( sbb esi,[edi+4*ecx])
  588. AS2( mov [edx+4*ecx],esi)
  589. AS2( mov esi,[eax+4*ecx+4])
  590. AS2( sbb esi,[edi+4*ecx+4])
  591. AS2( mov [edx+4*ecx+4],esi)
  592. ASL(1)
  593. AS2( mov esi,[eax+4*ecx+8])
  594. AS2( sbb esi,[edi+4*ecx+8])
  595. AS2( mov [edx+4*ecx+8],esi)
  596. AS2( mov esi,[eax+4*ecx+12])
  597. AS2( sbb esi,[edi+4*ecx+12])
  598. AS2( mov [edx+4*ecx+12],esi)
  599. AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
  600. ASJ( jmp, 0, b)
  601. ASL(2)
  602. AS2( mov eax, 0)
  603. AS1( setc al) // store carry into eax (return result register)
  604. AddEpilogue
  605. }
  606. #if CRYPTOPP_INTEGER_SSE2
  607. CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word *C, const word *A, const word *B)
  608. {
  609. AddPrologue
  610. // now: eax = A, edi = B, edx = C, ecx = N
  611. AS2( lea eax, [eax+4*ecx])
  612. AS2( lea edi, [edi+4*ecx])
  613. AS2( lea edx, [edx+4*ecx])
  614. AS1( neg ecx) // ecx is negative index
  615. AS2( pxor mm2, mm2)
  616. ASJ( jz, 2, f)
  617. AS2( test ecx, 2) // this clears carry flag
  618. ASJ( jz, 0, f)
  619. AS2( sub ecx, 2)
  620. ASJ( jmp, 1, f)
  621. ASL(0)
  622. AS2( movd mm0, DWORD PTR [eax+4*ecx])
  623. AS2( movd mm1, DWORD PTR [edi+4*ecx])
  624. AS2( paddq mm0, mm1)
  625. AS2( paddq mm2, mm0)
  626. AS2( movd DWORD PTR [edx+4*ecx], mm2)
  627. AS2( psrlq mm2, 32)
  628. AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
  629. AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
  630. AS2( paddq mm0, mm1)
  631. AS2( paddq mm2, mm0)
  632. AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
  633. AS2( psrlq mm2, 32)
  634. ASL(1)
  635. AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
  636. AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
  637. AS2( paddq mm0, mm1)
  638. AS2( paddq mm2, mm0)
  639. AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
  640. AS2( psrlq mm2, 32)
  641. AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
  642. AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
  643. AS2( paddq mm0, mm1)
  644. AS2( paddq mm2, mm0)
  645. AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
  646. AS2( psrlq mm2, 32)
  647. AS2( add ecx, 4)
  648. ASJ( jnz, 0, b)
  649. ASL(2)
  650. AS2( movd eax, mm2)
  651. AS1( emms)
  652. AddEpilogue
  653. }
  654. CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word *C, const word *A, const word *B)
  655. {
  656. AddPrologue
  657. // now: eax = A, edi = B, edx = C, ecx = N
  658. AS2( lea eax, [eax+4*ecx])
  659. AS2( lea edi, [edi+4*ecx])
  660. AS2( lea edx, [edx+4*ecx])
  661. AS1( neg ecx) // ecx is negative index
  662. AS2( pxor mm2, mm2)
  663. ASJ( jz, 2, f)
  664. AS2( test ecx, 2) // this clears carry flag
  665. ASJ( jz, 0, f)
  666. AS2( sub ecx, 2)
  667. ASJ( jmp, 1, f)
  668. ASL(0)
  669. AS2( movd mm0, DWORD PTR [eax+4*ecx])
  670. AS2( movd mm1, DWORD PTR [edi+4*ecx])
  671. AS2( psubq mm0, mm1)
  672. AS2( psubq mm0, mm2)
  673. AS2( movd DWORD PTR [edx+4*ecx], mm0)
  674. AS2( psrlq mm0, 63)
  675. AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
  676. AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
  677. AS2( psubq mm2, mm1)
  678. AS2( psubq mm2, mm0)
  679. AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
  680. AS2( psrlq mm2, 63)
  681. ASL(1)
  682. AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
  683. AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
  684. AS2( psubq mm0, mm1)
  685. AS2( psubq mm0, mm2)
  686. AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
  687. AS2( psrlq mm0, 63)
  688. AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
  689. AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
  690. AS2( psubq mm2, mm1)
  691. AS2( psubq mm2, mm0)
  692. AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
  693. AS2( psrlq mm2, 63)
  694. AS2( add ecx, 4)
  695. ASJ( jnz, 0, b)
  696. ASL(2)
  697. AS2( movd eax, mm2)
  698. AS1( emms)
  699. AddEpilogue
  700. }
  701. #endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
  702. #else
  703. int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word *C, const word *A, const word *B)
  704. {
  705. assert (N%2 == 0);
  706. Declare2Words(u);
  707. AssignWord(u, 0);
  708. for (size_t i=0; i<N; i+=2)
  709. {
  710. AddWithCarry(u, A[i], B[i]);
  711. C[i] = LowWord(u);
  712. AddWithCarry(u, A[i+1], B[i+1]);
  713. C[i+1] = LowWord(u);
  714. }
  715. return int(GetCarry(u));
  716. }
  717. int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word *C, const word *A, const word *B)
  718. {
  719. assert (N%2 == 0);
  720. Declare2Words(u);
  721. AssignWord(u, 0);
  722. for (size_t i=0; i<N; i+=2)
  723. {
  724. SubtractWithBorrow(u, A[i], B[i]);
  725. C[i] = LowWord(u);
  726. SubtractWithBorrow(u, A[i+1], B[i+1]);
  727. C[i+1] = LowWord(u);
  728. }
  729. return int(GetBorrow(u));
  730. }
  731. #endif
  732. static word LinearMultiply(word *C, const word *A, word B, size_t N)
  733. {
  734. word carry=0;
  735. for(unsigned i=0; i<N; i++)
  736. {
  737. Declare2Words(p);
  738. MultiplyWords(p, A[i], B);
  739. Acc2WordsBy1(p, carry);
  740. C[i] = LowWord(p);
  741. carry = HighWord(p);
  742. }
  743. return carry;
  744. }
  745. #ifndef CRYPTOPP_DOXYGEN_PROCESSING
  746. #define Mul_2 \
  747. Mul_Begin(2) \
  748. Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
  749. Mul_End(1, 1)
  750. #define Mul_4 \
  751. Mul_Begin(4) \
  752. Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
  753. Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
  754. Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
  755. Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
  756. Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
  757. Mul_End(5, 3)
  758. #define Mul_8 \
  759. Mul_Begin(8) \
  760. Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
  761. Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
  762. Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
  763. Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
  764. Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
  765. Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
  766. Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
  767. Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
  768. Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
  769. Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
  770. Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
  771. Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
  772. Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
  773. Mul_End(13, 7)
  774. #define Mul_16 \
  775. Mul_Begin(16) \
  776. Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
  777. Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
  778. Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
  779. Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
  780. Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
  781. Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
  782. Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
  783. Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
  784. Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
  785. Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
  786. Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
  787. Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
  788. Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
  789. Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
  790. Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
  791. Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
  792. Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
  793. Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
  794. Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
  795. Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
  796. Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
  797. Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
  798. Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
  799. Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
  800. Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
  801. Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
  802. Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
  803. Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
  804. Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
  805. Mul_End(29, 15)
  806. #define Squ_2 \
  807. Squ_Begin(2) \
  808. Squ_End(2)
  809. #define Squ_4 \
  810. Squ_Begin(4) \
  811. Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
  812. Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
  813. Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
  814. Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
  815. Squ_End(4)
  816. #define Squ_8 \
  817. Squ_Begin(8) \
  818. Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
  819. Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
  820. Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
  821. Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
  822. Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
  823. Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
  824. Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
  825. Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
  826. Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
  827. Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
  828. Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
  829. Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
  830. Squ_End(8)
  831. #define Squ_16 \
  832. Squ_Begin(16) \
  833. Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
  834. Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
  835. Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
  836. Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
  837. Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
  838. Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
  839. Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
  840. Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
  841. Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
  842. Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
  843. Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
  844. Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
  845. Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
  846. Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
  847. Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
  848. Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
  849. Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
  850. Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
  851. Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
  852. Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
  853. Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
  854. Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
  855. Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
  856. Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
  857. Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
  858. Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
  859. Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
  860. Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
  861. Squ_End(16)
  862. #define Bot_2 \
  863. Mul_Begin(2) \
  864. Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
  865. Bot_End(2)
  866. #define Bot_4 \
  867. Mul_Begin(4) \
  868. Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
  869. Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
  870. Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
  871. Bot_End(4)
  872. #define Bot_8 \
  873. Mul_Begin(8) \
  874. Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
  875. Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
  876. Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
  877. Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
  878. Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
  879. Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
  880. Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
  881. Bot_End(8)
  882. #define Bot_16 \
  883. Mul_Begin(16) \
  884. Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
  885. Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
  886. Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
  887. Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
  888. Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
  889. Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
  890. Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
  891. Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
  892. Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
  893. Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
  894. Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
  895. Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
  896. Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
  897. Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
  898. Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
  899. Bot_End(16)
  900. #endif
  901. #if 0
  902. #define Mul_Begin(n) \
  903. Declare2Words(p) \
  904. Declare2Words(c) \
  905. Declare2Words(d) \
  906. MultiplyWords(p, A[0], B[0]) \
  907. AssignWord(c, LowWord(p)) \
  908. AssignWord(d, HighWord(p))
  909. #define Mul_Acc(i, j) \
  910. MultiplyWords(p, A[i], B[j]) \
  911. Acc2WordsBy1(c, LowWord(p)) \
  912. Acc2WordsBy1(d, HighWord(p))
  913. #define Mul_SaveAcc(k, i, j) \
  914. R[k] = LowWord(c); \
  915. Add2WordsBy1(c, d, HighWord(c)) \
  916. MultiplyWords(p, A[i], B[j]) \
  917. AssignWord(d, HighWord(p)) \
  918. Acc2WordsBy1(c, LowWord(p))
  919. #define Mul_End(n) \
  920. R[2*n-3] = LowWord(c); \
  921. Acc2WordsBy1(d, HighWord(c)) \
  922. MultiplyWords(p, A[n-1], B[n-1])\
  923. Acc2WordsBy2(d, p) \
  924. R[2*n-2] = LowWord(d); \
  925. R[2*n-1] = HighWord(d);
  926. #define Bot_SaveAcc(k, i, j) \
  927. R[k] = LowWord(c); \
  928. word e = LowWord(d) + HighWord(c); \
  929. e += A[i] * B[j];
  930. #define Bot_Acc(i, j) \
  931. e += A[i] * B[j];
  932. #define Bot_End(n) \
  933. R[n-1] = e;
  934. #else
  935. #define Mul_Begin(n) \
  936. Declare2Words(p) \
  937. word c; \
  938. Declare2Words(d) \
  939. MultiplyWords(p, A[0], B[0]) \
  940. c = LowWord(p); \
  941. AssignWord(d, HighWord(p))
  942. #define Mul_Acc(i, j) \
  943. MulAcc(c, d, A[i], B[j])
  944. #define Mul_SaveAcc(k, i, j) \
  945. R[k] = c; \
  946. c = LowWord(d); \
  947. AssignWord(d, HighWord(d)) \
  948. MulAcc(c, d, A[i], B[j])
  949. #define Mul_End(k, i) \
  950. R[k] = c; \
  951. MultiplyWords(p, A[i], B[i]) \
  952. Acc2WordsBy2(p, d) \
  953. R[k+1] = LowWord(p); \
  954. R[k+2] = HighWord(p);
  955. #define Bot_SaveAcc(k, i, j) \
  956. R[k] = c; \
  957. c = LowWord(d); \
  958. c += A[i] * B[j];
  959. #define Bot_Acc(i, j) \
  960. c += A[i] * B[j];
  961. #define Bot_End(n) \
  962. R[n-1] = c;
  963. #endif
  964. #define Squ_Begin(n) \
  965. Declare2Words(p) \
  966. word c; \
  967. Declare2Words(d) \
  968. Declare2Words(e) \
  969. MultiplyWords(p, A[0], A[0]) \
  970. R[0] = LowWord(p); \
  971. AssignWord(e, HighWord(p)) \
  972. MultiplyWords(p, A[0], A[1]) \
  973. c = LowWord(p); \
  974. AssignWord(d, HighWord(p)) \
  975. Squ_NonDiag \
  976. #define Squ_NonDiag \
  977. Double3Words(c, d)
  978. #define Squ_SaveAcc(k, i, j) \
  979. Acc3WordsBy2(c, d, e) \
  980. R[k] = c; \
  981. MultiplyWords(p, A[i], A[j]) \
  982. c = LowWord(p); \
  983. AssignWord(d, HighWord(p)) \
  984. #define Squ_Acc(i, j) \
  985. MulAcc(c, d, A[i], A[j])
  986. #define Squ_Diag(i) \
  987. Squ_NonDiag \
  988. MulAcc(c, d, A[i], A[i])
  989. #define Squ_End(n) \
  990. Acc3WordsBy2(c, d, e) \
  991. R[2*n-3] = c; \
  992. MultiplyWords(p, A[n-1], A[n-1])\
  993. Acc2WordsBy2(p, e) \
  994. R[2*n-2] = LowWord(p); \
  995. R[2*n-1] = HighWord(p);
  996. void Baseline_Multiply2(word *R, const word *A, const word *B)
  997. {
  998. Mul_2
  999. }
  1000. void Baseline_Multiply4(word *R, const word *A, const word *B)
  1001. {
  1002. Mul_4
  1003. }
  1004. void Baseline_Multiply8(word *R, const word *A, const word *B)
  1005. {
  1006. Mul_8
  1007. }
  1008. void Baseline_Square2(word *R, const word *A)
  1009. {
  1010. Squ_2
  1011. }
  1012. void Baseline_Square4(word *R, const word *A)
  1013. {
  1014. Squ_4
  1015. }
  1016. void Baseline_Square8(word *R, const word *A)
  1017. {
  1018. Squ_8
  1019. }
  1020. void Baseline_MultiplyBottom2(word *R, const word *A, const word *B)
  1021. {
  1022. Bot_2
  1023. }
  1024. void Baseline_MultiplyBottom4(word *R, const word *A, const word *B)
  1025. {
  1026. Bot_4
  1027. }
  1028. void Baseline_MultiplyBottom8(word *R, const word *A, const word *B)
  1029. {
  1030. Bot_8
  1031. }
  1032. #define Top_Begin(n) \
  1033. Declare2Words(p) \
  1034. word c; \
  1035. Declare2Words(d) \
  1036. MultiplyWords(p, A[0], B[n-2]);\
  1037. AssignWord(d, HighWord(p));
  1038. #define Top_Acc(i, j) \
  1039. MultiplyWords(p, A[i], B[j]);\
  1040. Acc2WordsBy1(d, HighWord(p));
  1041. #define Top_SaveAcc0(i, j) \
  1042. c = LowWord(d); \
  1043. AssignWord(d, HighWord(d)) \
  1044. MulAcc(c, d, A[i], B[j])
  1045. #define Top_SaveAcc1(i, j) \
  1046. c = L<c; \
  1047. Acc2WordsBy1(d, c); \
  1048. c = LowWord(d); \
  1049. AssignWord(d, HighWord(d)) \
  1050. MulAcc(c, d, A[i], B[j])
  1051. void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L)
  1052. {
  1053. word T[4];
  1054. Baseline_Multiply2(T, A, B);
  1055. R[0] = T[2];
  1056. R[1] = T[3];
  1057. }
  1058. void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L)
  1059. {
  1060. Top_Begin(4)
  1061. Top_Acc(1, 1) Top_Acc(2, 0) \
  1062. Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
  1063. Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
  1064. Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
  1065. Mul_End(1, 3)
  1066. }
  1067. void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L)
  1068. {
  1069. Top_Begin(8)
  1070. Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
  1071. Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
  1072. Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
  1073. Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
  1074. Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
  1075. Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
  1076. Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
  1077. Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
  1078. Mul_End(5, 7)
  1079. }
  1080. #if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
  1081. void Baseline_Multiply16(word *R, const word *A, const word *B)
  1082. {
  1083. Mul_16
  1084. }
  1085. void Baseline_Square16(word *R, const word *A)
  1086. {
  1087. Squ_16
  1088. }
  1089. void Baseline_MultiplyBottom16(word *R, const word *A, const word *B)
  1090. {
  1091. Bot_16
  1092. }
  1093. void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L)
  1094. {
  1095. Top_Begin(16)
  1096. Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
  1097. Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
  1098. Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
  1099. Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
  1100. Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
  1101. Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
  1102. Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
  1103. Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
  1104. Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
  1105. Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
  1106. Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
  1107. Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
  1108. Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
  1109. Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
  1110. Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
  1111. Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
  1112. Mul_End(13, 15)
  1113. }
  1114. #endif
  1115. // ********************************************************
  1116. #if CRYPTOPP_INTEGER_SSE2
  1117. CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
  1118. #undef Mul_Begin
  1119. #undef Mul_Acc
  1120. #undef Top_Begin
  1121. #undef Top_Acc
  1122. #undef Squ_Acc
  1123. #undef Squ_NonDiag
  1124. #undef Squ_Diag
  1125. #undef Squ_SaveAcc
  1126. #undef Squ_Begin
  1127. #undef Mul_SaveAcc
  1128. #undef Bot_Acc
  1129. #undef Bot_SaveAcc
  1130. #undef Bot_End
  1131. #undef Squ_End
  1132. #undef Mul_End
  1133. #define SSE2_FinalSave(k) \
  1134. AS2( psllq xmm5, 16) \
  1135. AS2( paddq xmm4, xmm5) \
  1136. AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
  1137. #define SSE2_SaveShift(k) \
  1138. AS2( movq xmm0, xmm6) \
  1139. AS2( punpckhqdq xmm6, xmm0) \
  1140. AS2( movq xmm1, xmm7) \
  1141. AS2( punpckhqdq xmm7, xmm1) \
  1142. AS2( paddd xmm6, xmm0) \
  1143. AS2( pslldq xmm6, 4) \
  1144. AS2( paddd xmm7, xmm1) \
  1145. AS2( paddd xmm4, xmm6) \
  1146. AS2( pslldq xmm7, 4) \
  1147. AS2( movq xmm6, xmm4) \
  1148. AS2( paddd xmm5, xmm7) \
  1149. AS2( movq xmm7, xmm5) \
  1150. AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
  1151. AS2( psrlq xmm6, 16) \
  1152. AS2( paddq xmm6, xmm7) \
  1153. AS2( punpckhqdq xmm4, xmm0) \
  1154. AS2( punpckhqdq xmm5, xmm0) \
  1155. AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
  1156. AS2( psrlq xmm6, 3*16) \
  1157. AS2( paddd xmm4, xmm6) \
  1158. #define Squ_SSE2_SaveShift(k) \
  1159. AS2( movq xmm0, xmm6) \
  1160. AS2( punpckhqdq xmm6, xmm0) \
  1161. AS2( movq xmm1, xmm7) \
  1162. AS2( punpckhqdq xmm7, xmm1) \
  1163. AS2( paddd xmm6, xmm0) \
  1164. AS2( pslldq xmm6, 4) \
  1165. AS2( paddd xmm7, xmm1) \
  1166. AS2( paddd xmm4, xmm6) \
  1167. AS2( pslldq xmm7, 4) \
  1168. AS2( movhlps xmm6, xmm4) \
  1169. AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
  1170. AS2( paddd xmm5, xmm7) \
  1171. AS2( movhps QWORD PTR [esp+12], xmm5)\
  1172. AS2( psrlq xmm4, 16) \
  1173. AS2( paddq xmm4, xmm5) \
  1174. AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
  1175. AS2( psrlq xmm4, 3*16) \
  1176. AS2( paddd xmm4, xmm6) \
  1177. AS2( movq QWORD PTR [esp+4], xmm4)\
  1178. #define SSE2_FirstMultiply(i) \
  1179. AS2( movdqa xmm7, [esi+(i)*16])\
  1180. AS2( movdqa xmm5, [edi-(i)*16])\
  1181. AS2( pmuludq xmm5, xmm7) \
  1182. AS2( movdqa xmm4, [ebx])\
  1183. AS2( movdqa xmm6, xmm4) \
  1184. AS2( pand xmm4, xmm5) \
  1185. AS2( psrld xmm5, 16) \
  1186. AS2( pmuludq xmm7, [edx-(i)*16])\
  1187. AS2( pand xmm6, xmm7) \
  1188. AS2( psrld xmm7, 16)
  1189. #define Squ_Begin(n) \
  1190. SquPrologue \
  1191. AS2( mov esi, esp)\
  1192. AS2( and esp, 0xfffffff0)\
  1193. AS2( lea edi, [esp-32*n])\
  1194. AS2( sub esp, 32*n+16)\
  1195. AS1( push esi)\
  1196. AS2( mov esi, edi) \
  1197. AS2( xor edx, edx) \
  1198. ASL(1) \
  1199. ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
  1200. ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
  1201. AS2( movdqa [edi+2*edx], xmm0) \
  1202. AS2( psrlq xmm0, 32) \
  1203. AS2( movdqa [edi+2*edx+16], xmm0) \
  1204. AS2( movdqa [edi+16*n+2*edx], xmm1) \
  1205. AS2( psrlq xmm1, 32) \
  1206. AS2( movdqa [edi+16*n+2*edx+16], xmm1) \
  1207. AS2( add edx, 16) \
  1208. AS2( cmp edx, 8*(n)) \
  1209. ASJ( jne, 1, b) \
  1210. AS2( lea edx, [edi+16*n])\
  1211. SSE2_FirstMultiply(0) \
  1212. #define Squ_Acc(i) \
  1213. ASL(LSqu##i) \
  1214. AS2( movdqa xmm1, [esi+(i)*16]) \
  1215. AS2( movdqa xmm0, [edi-(i)*16]) \
  1216. AS2( movdqa xmm2, [ebx]) \
  1217. AS2( pmuludq xmm0, xmm1) \
  1218. AS2( pmuludq xmm1, [edx-(i)*16]) \
  1219. AS2( movdqa xmm3, xmm2) \
  1220. AS2( pand xmm2, xmm0) \
  1221. AS2( psrld xmm0, 16) \
  1222. AS2( paddd xmm4, xmm2) \
  1223. AS2( paddd xmm5, xmm0) \
  1224. AS2( pand xmm3, xmm1) \
  1225. AS2( psrld xmm1, 16) \
  1226. AS2( paddd xmm6, xmm3) \
  1227. AS2( paddd xmm7, xmm1) \
  1228. #define Squ_Acc1(i)
  1229. #define Squ_Acc2(i) ASC(call, LSqu##i)
  1230. #define Squ_Acc3(i) Squ_Acc2(i)
  1231. #define Squ_Acc4(i) Squ_Acc2(i)
  1232. #define Squ_Acc5(i) Squ_Acc2(i)
  1233. #define Squ_Acc6(i) Squ_Acc2(i)
  1234. #define Squ_Acc7(i) Squ_Acc2(i)
  1235. #define Squ_Acc8(i) Squ_Acc2(i)
  1236. #define SSE2_End(E, n) \
  1237. SSE2_SaveShift(2*(n)-3) \
  1238. AS2( movdqa xmm7, [esi+16]) \
  1239. AS2( movdqa xmm0, [edi]) \
  1240. AS2( pmuludq xmm0, xmm7) \
  1241. AS2( movdqa xmm2, [ebx]) \
  1242. AS2( pmuludq xmm7, [edx]) \
  1243. AS2( movdqa xmm6, xmm2) \
  1244. AS2( pand xmm2, xmm0) \
  1245. AS2( psrld xmm0, 16) \
  1246. AS2( paddd xmm4, xmm2) \
  1247. AS2( paddd xmm5, xmm0) \
  1248. AS2( pand xmm6, xmm7) \
  1249. AS2( psrld xmm7, 16) \
  1250. SSE2_SaveShift(2*(n)-2) \
  1251. SSE2_FinalSave(2*(n)-1) \
  1252. AS1( pop esp)\
  1253. E
  1254. #define Squ_End(n) SSE2_End(SquEpilogue, n)
  1255. #define Mul_End(n) SSE2_End(MulEpilogue, n)
  1256. #define Top_End(n) SSE2_End(TopEpilogue, n)
  1257. #define Squ_Column1(k, i) \
  1258. Squ_SSE2_SaveShift(k) \
  1259. AS2( add esi, 16) \
  1260. SSE2_FirstMultiply(1)\
  1261. Squ_Acc##i(i) \
  1262. AS2( paddd xmm4, xmm4) \
  1263. AS2( paddd xmm5, xmm5) \
  1264. AS2( movdqa xmm3, [esi]) \
  1265. AS2( movq xmm1, QWORD PTR [esi+8]) \
  1266. AS2( pmuludq xmm1, xmm3) \
  1267. AS2( pmuludq xmm3, xmm3) \
  1268. AS2( movdqa xmm0, [ebx])\
  1269. AS2( movdqa xmm2, xmm0) \
  1270. AS2( pand xmm0, xmm1) \
  1271. AS2( psrld xmm1, 16) \
  1272. AS2( paddd xmm6, xmm0) \
  1273. AS2( paddd xmm7, xmm1) \
  1274. AS2( pand xmm2, xmm3) \
  1275. AS2( psrld xmm3, 16) \
  1276. AS2( paddd xmm6, xmm6) \
  1277. AS2( paddd xmm7, xmm7) \
  1278. AS2( paddd xmm4, xmm2) \
  1279. AS2( paddd xmm5, xmm3) \
  1280. AS2( movq xmm0, QWORD PTR [esp+4])\
  1281. AS2( movq xmm1, QWORD PTR [esp+12])\
  1282. AS2( paddd xmm4, xmm0)\
  1283. AS2( paddd xmm5, xmm1)\
  1284. #define Squ_Column0(k, i) \
  1285. Squ_SSE2_SaveShift(k) \
  1286. AS2( add edi, 16) \
  1287. AS2( add edx, 16) \
  1288. SSE2_FirstMultiply(1)\
  1289. Squ_Acc##i(i) \
  1290. AS2( paddd xmm6, xmm6) \
  1291. AS2( paddd xmm7, xmm7) \
  1292. AS2( paddd xmm4, xmm4) \
  1293. AS2( paddd xmm5, xmm5) \
  1294. AS2( movq xmm0, QWORD PTR [esp+4])\
  1295. AS2( movq xmm1, QWORD PTR [esp+12])\
  1296. AS2( paddd xmm4, xmm0)\
  1297. AS2( paddd xmm5, xmm1)\
  1298. #define SSE2_MulAdd45 \
  1299. AS2( movdqa xmm7, [esi]) \
  1300. AS2( movdqa xmm0, [edi]) \
  1301. AS2( pmuludq xmm0, xmm7) \
  1302. AS2( movdqa xmm2, [ebx]) \
  1303. AS2( pmuludq xmm7, [edx]) \
  1304. AS2( movdqa xmm6, xmm2) \
  1305. AS2( pand xmm2, xmm0) \
  1306. AS2( psrld xmm0, 16) \
  1307. AS2( paddd xmm4, xmm2) \
  1308. AS2( paddd xmm5, xmm0) \
  1309. AS2( pand xmm6, xmm7) \
  1310. AS2( psrld xmm7, 16)
  1311. #define Mul_Begin(n) \
  1312. MulPrologue \
  1313. AS2( mov esi, esp)\
  1314. AS2( and esp, 0xfffffff0)\
  1315. AS2( sub esp, 48*n+16)\
  1316. AS1( push esi)\
  1317. AS2( xor edx, edx) \
  1318. ASL(1) \
  1319. ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
  1320. ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
  1321. ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
  1322. AS2( movdqa [esp+20+2*edx], xmm0) \
  1323. AS2( psrlq xmm0, 32) \
  1324. AS2( movdqa [esp+20+2*edx+16], xmm0) \
  1325. AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
  1326. AS2( psrlq xmm1, 32) \
  1327. AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
  1328. AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
  1329. AS2( psrlq xmm2, 32) \
  1330. AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
  1331. AS2( add edx, 16) \
  1332. AS2( cmp edx, 8*(n)) \
  1333. ASJ( jne, 1, b) \
  1334. AS2( lea edi, [esp+20])\
  1335. AS2( lea edx, [esp+20+16*n])\
  1336. AS2( lea esi, [esp+20+32*n])\
  1337. SSE2_FirstMultiply(0) \
  1338. #define Mul_Acc(i) \
  1339. ASL(LMul##i) \
  1340. AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
  1341. AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
  1342. AS2( movdqa xmm2, [ebx]) \
  1343. AS2( pmuludq xmm0, xmm1) \
  1344. AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
  1345. AS2( movdqa xmm3, xmm2) \
  1346. AS2( pand xmm2, xmm0) \
  1347. AS2( psrld xmm0, 16) \
  1348. AS2( paddd xmm4, xmm2) \
  1349. AS2( paddd xmm5, xmm0) \
  1350. AS2( pand xmm3, xmm1) \
  1351. AS2( psrld xmm1, 16) \
  1352. AS2( paddd xmm6, xmm3) \
  1353. AS2( paddd xmm7, xmm1) \
  1354. #define Mul_Acc1(i)
  1355. #define Mul_Acc2(i) ASC(call, LMul##i)
  1356. #define Mul_Acc3(i) Mul_Acc2(i)
  1357. #define Mul_Acc4(i) Mul_Acc2(i)
  1358. #define Mul_Acc5(i) Mul_Acc2(i)
  1359. #define Mul_Acc6(i) Mul_Acc2(i)
  1360. #define Mul_Acc7(i) Mul_Acc2(i)
  1361. #define Mul_Acc8(i) Mul_Acc2(i)
  1362. #define Mul_Acc9(i) Mul_Acc2(i)
  1363. #define Mul_Acc10(i) Mul_Acc2(i)
  1364. #define Mul_Acc11(i) Mul_Acc2(i)
  1365. #define Mul_Acc12(i) Mul_Acc2(i)
  1366. #define Mul_Acc13(i) Mul_Acc2(i)
  1367. #define Mul_Acc14(i) Mul_Acc2(i)
  1368. #define Mul_Acc15(i) Mul_Acc2(i)
  1369. #define Mul_Acc16(i) Mul_Acc2(i)
  1370. #define Mul_Column1(k, i) \
  1371. SSE2_SaveShift(k) \
  1372. AS2( add esi, 16) \
  1373. SSE2_MulAdd45\
  1374. Mul_Acc##i(i) \
  1375. #define Mul_Column0(k, i) \
  1376. SSE2_SaveShift(k) \
  1377. AS2( add edi, 16) \
  1378. AS2( add edx, 16) \
  1379. SSE2_MulAdd45\
  1380. Mul_Acc##i(i) \
  1381. #define Bot_Acc(i) \
  1382. AS2( movdqa xmm1, [esi+i/2*(1-(i-2*(i/2))*2)*16]) \
  1383. AS2( movdqa xmm0, [edi-i/2*(1-(i-2*(i/2))*2)*16]) \
  1384. AS2( pmuludq xmm0, xmm1) \
  1385. AS2( pmuludq xmm1, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
  1386. AS2( paddq xmm4, xmm0) \
  1387. AS2( paddd xmm6, xmm1)
  1388. #define Bot_SaveAcc(k) \
  1389. SSE2_SaveShift(k) \
  1390. AS2( add edi, 16) \
  1391. AS2( add edx, 16) \
  1392. AS2( movdqa xmm6, [esi]) \
  1393. AS2( movdqa xmm0, [edi]) \
  1394. AS2( pmuludq xmm0, xmm6) \
  1395. AS2( paddq xmm4, xmm0) \
  1396. AS2( psllq xmm5, 16) \
  1397. AS2( paddq xmm4, xmm5) \
  1398. AS2( pmuludq xmm6, [edx])
  1399. #define Bot_End(n) \
  1400. AS2( movhlps xmm7, xmm6) \
  1401. AS2( paddd xmm6, xmm7) \
  1402. AS2( psllq xmm6, 32) \
  1403. AS2( paddd xmm4, xmm6) \
  1404. AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
  1405. AS1( pop esp)\
  1406. MulEpilogue
  1407. #define Top_Begin(n) \
  1408. TopPrologue \
  1409. AS2( mov edx, esp)\
  1410. AS2( and esp, 0xfffffff0)\
  1411. AS2( sub esp, 48*n+16)\
  1412. AS1( push edx)\
  1413. AS2( xor edx, edx) \
  1414. ASL(1) \
  1415. ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
  1416. ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
  1417. ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
  1418. AS2( movdqa [esp+20+2*edx], xmm0) \
  1419. AS2( psrlq xmm0, 32) \
  1420. AS2( movdqa [esp+20+2*edx+16], xmm0) \
  1421. AS2( movdqa [esp+20+16*n+2*edx], xmm1) \
  1422. AS2( psrlq xmm1, 32) \
  1423. AS2( movdqa [esp+20+16*n+2*edx+16], xmm1) \
  1424. AS2( movdqa [esp+20+32*n+2*edx], xmm2) \
  1425. AS2( psrlq xmm2, 32) \
  1426. AS2( movdqa [esp+20+32*n+2*edx+16], xmm2) \
  1427. AS2( add edx, 16) \
  1428. AS2( cmp edx, 8*(n)) \
  1429. ASJ( jne, 1, b) \
  1430. AS2( mov eax, esi) \
  1431. AS2( lea edi, [esp+20+00*n+16*(n/2-1)])\
  1432. AS2( lea edx, [esp+20+16*n+16*(n/2-1)])\
  1433. AS2( lea esi, [esp+20+32*n+16*(n/2-1)])\
  1434. AS2( pxor xmm4, xmm4)\
  1435. AS2( pxor xmm5, xmm5)
  1436. #define Top_Acc(i) \
  1437. AS2( movq xmm0, QWORD PTR [esi+i/2*(1-(i-2*(i/2))*2)*16+8]) \
  1438. AS2( pmuludq xmm0, [edx-i/2*(1-(i-2*(i/2))*2)*16]) \
  1439. AS2( psrlq xmm0, 48) \
  1440. AS2( paddd xmm5, xmm0)\
  1441. #define Top_Column0(i) \
  1442. AS2( psllq xmm5, 32) \
  1443. AS2( add edi, 16) \
  1444. AS2( add edx, 16) \
  1445. SSE2_MulAdd45\
  1446. Mul_Acc##i(i) \
  1447. #define Top_Column1(i) \
  1448. SSE2_SaveShift(0) \
  1449. AS2( add esi, 16) \
  1450. SSE2_MulAdd45\
  1451. Mul_Acc##i(i) \
  1452. AS2( shr eax, 16) \
  1453. AS2( movd xmm0, eax)\
  1454. AS2( movd xmm1, [ecx+4])\
  1455. AS2( psrld xmm1, 16)\
  1456. AS2( pcmpgtd xmm1, xmm0)\
  1457. AS2( psrld xmm1, 31)\
  1458. AS2( paddd xmm4, xmm1)\
  1459. void SSE2_Square4(word *C, const word *A)
  1460. {
  1461. Squ_Begin(2)
  1462. Squ_Column0(0, 1)
  1463. Squ_End(2)
  1464. }
  1465. void SSE2_Square8(word *C, const word *A)
  1466. {
  1467. Squ_Begin(4)
  1468. #ifndef __GNUC__
  1469. ASJ( jmp, 0, f)
  1470. Squ_Acc(2)
  1471. AS1( ret) ASL(0)
  1472. #endif
  1473. Squ_Column0(0, 1)
  1474. Squ_Column1(1, 1)
  1475. Squ_Column0(2, 2)
  1476. Squ_Column1(3, 1)
  1477. Squ_Column0(4, 1)
  1478. Squ_End(4)
  1479. }
  1480. void SSE2_Square16(word *C, const word *A)
  1481. {
  1482. Squ_Begin(8)
  1483. #ifndef __GNUC__
  1484. ASJ( jmp, 0, f)
  1485. Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
  1486. AS1( ret) ASL(0)
  1487. #endif
  1488. Squ_Column0(0, 1)
  1489. Squ_Column1(1, 1)
  1490. Squ_Column0(2, 2)
  1491. Squ_Column1(3, 2)
  1492. Squ_Column0(4, 3)
  1493. Squ_Column1(5, 3)
  1494. Squ_Column0(6, 4)
  1495. Squ_Column1(7, 3)
  1496. Squ_Column0(8, 3)
  1497. Squ_Column1(9, 2)
  1498. Squ_Column0(10, 2)
  1499. Squ_Column1(11, 1)
  1500. Squ_Column0(12, 1)
  1501. Squ_End(8)
  1502. }
  1503. void SSE2_Square32(word *C, const word *A)
  1504. {
  1505. Squ_Begin(16)
  1506. ASJ( jmp, 0, f)
  1507. Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
  1508. AS1( ret) ASL(0)
  1509. Squ_Column0(0, 1)
  1510. Squ_Column1(1, 1)
  1511. Squ_Column0(2, 2)
  1512. Squ_Column1(3, 2)
  1513. Squ_Column0(4, 3)
  1514. Squ_Column1(5, 3)
  1515. Squ_Column0(6, 4)
  1516. Squ_Column1(7, 4)
  1517. Squ_Column0(8, 5)
  1518. Squ_Column1(9, 5)
  1519. Squ_Column0(10, 6)
  1520. Squ_Column1(11, 6)
  1521. Squ_Column0(12, 7)
  1522. Squ_Column1(13, 7)
  1523. Squ_Column0(14, 8)
  1524. Squ_Column1(15, 7)
  1525. Squ_Column0(16, 7)
  1526. Squ_Column1(17, 6)
  1527. Squ_Column0(18, 6)
  1528. Squ_Column1(19, 5)
  1529. Squ_Column0(20, 5)
  1530. Squ_Column1(21, 4)
  1531. Squ_Column0(22, 4)
  1532. Squ_Column1(23, 3)
  1533. Squ_Column0(24, 3)
  1534. Squ_Column1(25, 2)
  1535. Squ_Column0(26, 2)
  1536. Squ_Column1(27, 1)
  1537. Squ_Column0(28, 1)
  1538. Squ_End(16)
  1539. }
  1540. void SSE2_Multiply4(word *C, const word *A, const word *B)
  1541. {
  1542. Mul_Begin(2)
  1543. #ifndef __GNUC__
  1544. ASJ( jmp, 0, f)
  1545. Mul_Acc(2)
  1546. AS1( ret) ASL(0)
  1547. #endif
  1548. Mul_Column0(0, 2)
  1549. Mul_End(2)
  1550. }
  1551. void SSE2_Multiply8(word *C, const word *A, const word *B)
  1552. {
  1553. Mul_Begin(4)
  1554. #ifndef __GNUC__
  1555. ASJ( jmp, 0, f)
  1556. Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
  1557. AS1( ret) ASL(0)
  1558. #endif
  1559. Mul_Column0(0, 2)
  1560. Mul_Column1(1, 3)
  1561. Mul_Column0(2, 4)
  1562. Mul_Column1(3, 3)
  1563. Mul_Column0(4, 2)
  1564. Mul_End(4)
  1565. }
  1566. void SSE2_Multiply16(word *C, const word *A, const word *B)
  1567. {
  1568. Mul_Begin(8)
  1569. #ifndef __GNUC__
  1570. ASJ( jmp, 0, f)
  1571. Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
  1572. AS1( ret) ASL(0)
  1573. #endif
  1574. Mul_Column0(0, 2)
  1575. Mul_Column1(1, 3)
  1576. Mul_Column0(2, 4)
  1577. Mul_Column1(3, 5)
  1578. Mul_Column0(4, 6)
  1579. Mul_Column1(5, 7)
  1580. Mul_Column0(6, 8)
  1581. Mul_Column1(7, 7)
  1582. Mul_Column0(8, 6)
  1583. Mul_Column1(9, 5)
  1584. Mul_Column0(10, 4)
  1585. Mul_Column1(11, 3)
  1586. Mul_Column0(12, 2)
  1587. Mul_End(8)
  1588. }
  1589. void SSE2_Multiply32(word *C, const word *A, const word *B)
  1590. {
  1591. Mul_Begin(16)
  1592. ASJ( jmp, 0, f)
  1593. Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
  1594. AS1( ret) ASL(0)
  1595. Mul_Column0(0, 2)
  1596. Mul_Column1(1, 3)
  1597. Mul_Column0(2, 4)
  1598. Mul_Column1(3, 5)
  1599. Mul_Column0(4, 6)
  1600. Mul_Column1(5, 7)
  1601. Mul_Column0(6, 8)
  1602. Mul_Column1(7, 9)
  1603. Mul_Column0(8, 10)
  1604. Mul_Column1(9, 11)
  1605. Mul_Column0(10, 12)
  1606. Mul_Column1(11, 13)
  1607. Mul_Column0(12, 14)
  1608. Mul_Column1(13, 15)
  1609. Mul_Column0(14, 16)
  1610. Mul_Column1(15, 15)
  1611. Mul_Column0(16, 14)
  1612. Mul_Column1(17, 13)
  1613. Mul_Column0(18, 12)
  1614. Mul_Column1(19, 11)
  1615. Mul_Column0(20, 10)
  1616. Mul_Column1(21, 9)
  1617. Mul_Column0(22, 8)
  1618. Mul_Column1(23, 7)
  1619. Mul_Column0(24, 6)
  1620. Mul_Column1(25, 5)
  1621. Mul_Column0(26, 4)
  1622. Mul_Column1(27, 3)
  1623. Mul_Column0(28, 2)
  1624. Mul_End(16)
  1625. }
  1626. void SSE2_MultiplyBottom4(word *C, const word *A, const word *B)
  1627. {
  1628. Mul_Begin(2)
  1629. Bot_SaveAcc(0) Bot_Acc(2)
  1630. Bot_End(2)
  1631. }
  1632. void SSE2_MultiplyBottom8(word *C, const word *A, const word *B)
  1633. {
  1634. Mul_Begin(4)
  1635. #ifndef __GNUC__
  1636. ASJ( jmp, 0, f)
  1637. Mul_Acc(3) Mul_Acc(2)
  1638. AS1( ret) ASL(0)
  1639. #endif
  1640. Mul_Column0(0, 2)
  1641. Mul_Column1(1, 3)
  1642. Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
  1643. Bot_End(4)
  1644. }
  1645. void SSE2_MultiplyBottom16(word *C, const word *A, const word *B)
  1646. {
  1647. Mul_Begin(8)
  1648. #ifndef __GNUC__
  1649. ASJ( jmp, 0, f)
  1650. Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
  1651. AS1( ret) ASL(0)
  1652. #endif
  1653. Mul_Column0(0, 2)
  1654. Mul_Column1(1, 3)
  1655. Mul_Column0(2, 4)
  1656. Mul_Column1(3, 5)
  1657. Mul_Column0(4, 6)
  1658. Mul_Column1(5, 7)
  1659. Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
  1660. Bot_End(8)
  1661. }
  1662. void SSE2_MultiplyBottom32(word *C, const word *A, const word *B)
  1663. {
  1664. Mul_Begin(16)
  1665. #ifndef __GNUC__
  1666. ASJ( jmp, 0, f)
  1667. Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
  1668. AS1( ret) ASL(0)
  1669. #endif
  1670. Mul_Column0(0, 2)
  1671. Mul_Column1(1, 3)
  1672. Mul_Column0(2, 4)
  1673. Mul_Column1(3, 5)
  1674. Mul_Column0(4, 6)
  1675. Mul_Column1(5, 7)
  1676. Mul_Column0(6, 8)
  1677. Mul_Column1(7, 9)
  1678. Mul_Column0(8, 10)
  1679. Mul_Column1(9, 11)
  1680. Mul_Column0(10, 12)
  1681. Mul_Column1(11, 13)
  1682. Mul_Column0(12, 14)
  1683. Mul_Column1(13, 15)
  1684. Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
  1685. Bot_End(16)
  1686. }
  1687. void SSE2_MultiplyTop8(word *C, const word *A, const word *B, word L)
  1688. {
  1689. Top_Begin(4)
  1690. Top_Acc(3) Top_Acc(2) Top_Acc(1)
  1691. #ifndef __GNUC__
  1692. ASJ( jmp, 0, f)
  1693. Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
  1694. AS1( ret) ASL(0)
  1695. #endif
  1696. Top_Column0(4)
  1697. Top_Column1(3)
  1698. Mul_Column0(0, 2)
  1699. Top_End(2)
  1700. }
  1701. void SSE2_MultiplyTop16(word *C, const word *A, const word *B, word L)
  1702. {
  1703. Top_Begin(8)
  1704. Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
  1705. #ifndef __GNUC__
  1706. ASJ( jmp, 0, f)
  1707. Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
  1708. AS1( ret) ASL(0)
  1709. #endif
  1710. Top_Column0(8)
  1711. Top_Column1(7)
  1712. Mul_Column0(0, 6)
  1713. Mul_Column1(1, 5)
  1714. Mul_Column0(2, 4)
  1715. Mul_Column1(3, 3)
  1716. Mul_Column0(4, 2)
  1717. Top_End(4)
  1718. }
  1719. void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L)
  1720. {
  1721. Top_Begin(16)
  1722. Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
  1723. #ifndef __GNUC__
  1724. ASJ( jmp, 0, f)
  1725. Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
  1726. AS1( ret) ASL(0)
  1727. #endif
  1728. Top_Column0(16)
  1729. Top_Column1(15)
  1730. Mul_Column0(0, 14)
  1731. Mul_Column1(1, 13)
  1732. Mul_Column0(2, 12)
  1733. Mul_Column1(3, 11)
  1734. Mul_Column0(4, 10)
  1735. Mul_Column1(5, 9)
  1736. Mul_Column0(6, 8)
  1737. Mul_Column1(7, 7)
  1738. Mul_Column0(8, 6)
  1739. Mul_Column1(9, 5)
  1740. Mul_Column0(10, 4)
  1741. Mul_Column1(11, 3)
  1742. Mul_Column0(12, 2)
  1743. Top_End(8)
  1744. }
  1745. #endif // #if CRYPTOPP_INTEGER_SSE2
  1746. // ********************************************************
  1747. typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);
  1748. typedef void (* PMul)(word *C, const word *A, const word *B);
  1749. typedef void (* PSqu)(word *C, const word *A);
  1750. typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);
  1751. #if CRYPTOPP_INTEGER_SSE2
  1752. static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
  1753. static size_t s_recursionLimit = 8;
  1754. #else
  1755. static const size_t s_recursionLimit = 16;
  1756. #endif
  1757. static PMul s_pMul[9], s_pBot[9];
  1758. static PSqu s_pSqu[9];
  1759. static PMulTop s_pTop[9];
  1760. static void SetFunctionPointers()
  1761. {
  1762. s_pMul[0] = &Baseline_Multiply2;
  1763. s_pBot[0] = &Baseline_MultiplyBottom2;
  1764. s_pSqu[0] = &Baseline_Square2;
  1765. s_pTop[0] = &Baseline_MultiplyTop2;
  1766. s_pTop[1] = &Baseline_MultiplyTop4;
  1767. #if CRYPTOPP_INTEGER_SSE2
  1768. if (HasSSE2())
  1769. {
  1770. #if _MSC_VER != 1200 || defined(NDEBUG)
  1771. if (IsP4())
  1772. {
  1773. s_pAdd = &SSE2_Add;
  1774. s_pSub = &SSE2_Sub;
  1775. }
  1776. #endif
  1777. s_recursionLimit = 32;
  1778. s_pMul[1] = &SSE2_Multiply4;
  1779. s_pMul[2] = &SSE2_Multiply8;
  1780. s_pMul[4] = &SSE2_Multiply16;
  1781. s_pMul[8] = &SSE2_Multiply32;
  1782. s_pBot[1] = &SSE2_MultiplyBottom4;
  1783. s_pBot[2] = &SSE2_MultiplyBottom8;
  1784. s_pBot[4] = &SSE2_MultiplyBottom16;
  1785. s_pBot[8] = &SSE2_MultiplyBottom32;
  1786. s_pSqu[1] = &SSE2_Square4;
  1787. s_pSqu[2] = &SSE2_Square8;
  1788. s_pSqu[4] = &SSE2_Square16;
  1789. s_pSqu[8] = &SSE2_Square32;
  1790. s_pTop[2] = &SSE2_MultiplyTop8;
  1791. s_pTop[4] = &SSE2_MultiplyTop16;
  1792. s_pTop[8] = &SSE2_MultiplyTop32;
  1793. }
  1794. else
  1795. #endif
  1796. {
  1797. s_pMul[1] = &Baseline_Multiply4;
  1798. s_pMul[2] = &Baseline_Multiply8;
  1799. s_pBot[1] = &Baseline_MultiplyBottom4;
  1800. s_pBot[2] = &Baseline_MultiplyBottom8;
  1801. s_pSqu[1] = &Baseline_Square4;
  1802. s_pSqu[2] = &Baseline_Square8;
  1803. s_pTop[2] = &Baseline_MultiplyTop8;
  1804. #if !CRYPTOPP_INTEGER_SSE2
  1805. s_pMul[4] = &Baseline_Multiply16;
  1806. s_pBot[4] = &Baseline_MultiplyBottom16;
  1807. s_pSqu[4] = &Baseline_Square16;
  1808. s_pTop[4] = &Baseline_MultiplyTop16;
  1809. #endif
  1810. }
  1811. }
  1812. inline int Add(word *C, const word *A, const word *B, size_t N)
  1813. {
  1814. #if CRYPTOPP_INTEGER_SSE2
  1815. return s_pAdd(N, C, A, B);
  1816. #else
  1817. return Baseline_Add(N, C, A, B);
  1818. #endif
  1819. }
  1820. inline int Subtract(word *C, const word *A, const word *B, size_t N)
  1821. {
  1822. #if CRYPTOPP_INTEGER_SSE2
  1823. return s_pSub(N, C, A, B);
  1824. #else
  1825. return Baseline_Sub(N, C, A, B);
  1826. #endif
  1827. }
  1828. // ********************************************************
  1829. #define A0 A
  1830. #define A1 (A+N2)
  1831. #define B0 B
  1832. #define B1 (B+N2)
  1833. #define T0 T
  1834. #define T1 (T+N2)
  1835. #define T2 (T+N)
  1836. #define T3 (T+N+N2)
  1837. #define R0 R
  1838. #define R1 (R+N2)
  1839. #define R2 (R+N)
  1840. #define R3 (R+N+N2)
  1841. // R[2*N] - result = A*B
  1842. // T[2*N] - temporary work space
  1843. // A[N] --- multiplier
  1844. // B[N] --- multiplicant
  1845. void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
  1846. {
  1847. assert(N>=2 && N%2==0);
  1848. if (N <= s_recursionLimit)
  1849. s_pMul[N/4](R, A, B);
  1850. else
  1851. {
  1852. const size_t N2 = N/2;
  1853. size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
  1854. Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
  1855. size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
  1856. Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
  1857. RecursiveMultiply(R2, T2, A1, B1, N2);
  1858. RecursiveMultiply(T0, T2, R0, R1, N2);
  1859. RecursiveMultiply(R0, T2, A0, B0, N2);
  1860. // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
  1861. int c2 = Add(R2, R2, R1, N2);
  1862. int c3 = c2;
  1863. c2 += Add(R1, R2, R0, N2);
  1864. c3 += Add(R2, R2, R3, N2);
  1865. if (AN2 == BN2)
  1866. c3 -= Subtract(R1, R1, T0, N);
  1867. else
  1868. c3 += Add(R1, R1, T0, N);
  1869. c3 += Increment(R2, N2, c2);
  1870. assert (c3 >= 0 && c3 <= 2);
  1871. Increment(R3, N2, c3);
  1872. }
  1873. }
  1874. // R[2*N] - result = A*A
  1875. // T[2*N] - temporary work space
  1876. // A[N] --- number to be squared
  1877. void RecursiveSquare(word *R, word *T, const word *A, size_t N)
  1878. {
  1879. assert(N && N%2==0);
  1880. if (N <= s_recursionLimit)
  1881. s_pSqu[N/4](R, A);
  1882. else
  1883. {
  1884. const size_t N2 = N/2;
  1885. RecursiveSquare(R0, T2, A0, N2);
  1886. RecursiveSquare(R2, T2, A1, N2);
  1887. RecursiveMultiply(T0, T2, A0, A1, N2);
  1888. int carry = Add(R1, R1, T0, N);
  1889. carry += Add(R1, R1, T0, N);
  1890. Increment(R3, N2, carry);
  1891. }
  1892. }
  1893. // R[N] - bottom half of A*B
  1894. // T[3*N/2] - temporary work space
  1895. // A[N] - multiplier
  1896. // B[N] - multiplicant
  1897. void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
  1898. {
  1899. assert(N>=2 && N%2==0);
  1900. if (N <= s_recursionLimit)
  1901. s_pBot[N/4](R, A, B);
  1902. else
  1903. {
  1904. const size_t N2 = N/2;
  1905. RecursiveMultiply(R, T, A0, B0, N2);
  1906. RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
  1907. Add(R1, R1, T0, N2);
  1908. RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
  1909. Add(R1, R1, T0, N2);
  1910. }
  1911. }
  1912. // R[N] --- upper half of A*B
  1913. // T[2*N] - temporary work space
  1914. // L[N] --- lower half of A*B
  1915. // A[N] --- multiplier
  1916. // B[N] --- multiplicant
  1917. void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
  1918. {
  1919. assert(N>=2 && N%2==0);
  1920. if (N <= s_recursionLimit)
  1921. s_pTop[N/4](R, A, B, L[N-1]);
  1922. else
  1923. {
  1924. const size_t N2 = N/2;
  1925. size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
  1926. Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
  1927. size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
  1928. Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
  1929. RecursiveMultiply(T0, T2, R0, R1, N2);
  1930. RecursiveMultiply(R0, T2, A1, B1, N2);
  1931. // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1
  1932. int t, c3;
  1933. int c2 = Subtract(T2, L+N2, L, N2);
  1934. if (AN2 == BN2)
  1935. {
  1936. c2 -= Add(T2, T2, T0, N2);
  1937. t = (Compare(T2, R0, N2) == -1);
  1938. c3 = t - Subtract(T2, T2, T1, N2);
  1939. }
  1940. else
  1941. {
  1942. c2 += Subtract(T2, T2, T0, N2);
  1943. t = (Compare(T2, R0, N2) == -1);
  1944. c3 = t + Add(T2, T2, T1, N2);
  1945. }
  1946. c2 += t;
  1947. if (c2 >= 0)
  1948. c3 += Increment(T2, N2, c2);
  1949. else
  1950. c3 -= Decrement(T2, N2, -c2);
  1951. c3 += Add(R0, T2, R1, N2);
  1952. assert (c3 >= 0 && c3 <= 2);
  1953. Increment(R1, N2, c3);
  1954. }
  1955. }
  1956. inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
  1957. {
  1958. RecursiveMultiply(R, T, A, B, N);
  1959. }
  1960. inline void Square(word *R, word *T, const word *A, size_t N)
  1961. {
  1962. RecursiveSquare(R, T, A, N);
  1963. }
  1964. inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
  1965. {
  1966. RecursiveMultiplyBottom(R, T, A, B, N);
  1967. }
  1968. // R[NA+NB] - result = A*B
  1969. // T[NA+NB] - temporary work space
  1970. // A[NA] ---- multiplier
  1971. // B[NB] ---- multiplicant
  1972. void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
  1973. {
  1974. if (NA == NB)
  1975. {
  1976. if (A == B)
  1977. Square(R, T, A, NA);
  1978. else
  1979. Multiply(R, T, A, B, NA);
  1980. return;
  1981. }
  1982. if (NA > NB)
  1983. {
  1984. std::swap(A, B);
  1985. std::swap(NA, NB);
  1986. }
  1987. assert(NB % NA == 0);
  1988. if (NA==2 && !A[1])
  1989. {
  1990. switch (A[0])
  1991. {
  1992. case 0:
  1993. SetWords(R, 0, NB+2);
  1994. return;
  1995. case 1:
  1996. CopyWords(R, B, NB);
  1997. R[NB] = R[NB+1] = 0;
  1998. return;
  1999. default:
  2000. R[NB] = LinearMultiply(R, B, A[0], NB);
  2001. R[NB+1] = 0;
  2002. return;
  2003. }
  2004. }
  2005. size_t i;
  2006. if ((NB/NA)%2 == 0)
  2007. {
  2008. Multiply(R, T, A, B, NA);
  2009. CopyWords(T+2*NA, R+NA, NA);
  2010. for (i=2*NA; i<NB; i+=2*NA)
  2011. Multiply(T+NA+i, T, A, B+i, NA);
  2012. for (i=NA; i<NB; i+=2*NA)
  2013. Multiply(R+i, T, A, B+i, NA);
  2014. }
  2015. else
  2016. {
  2017. for (i=0; i<NB; i+=2*NA)
  2018. Multiply(R+i, T, A, B+i, NA);
  2019. for (i=NA; i<NB; i+=2*NA)
  2020. Multiply(T+NA+i, T, A, B+i, NA);
  2021. }
  2022. if (Add(R+NA, R+NA, T+2*NA, NB-NA))
  2023. Increment(R+NB, NA);
  2024. }
  2025. // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
  2026. // T[3*N/2] - temporary work space
  2027. // A[N] ----- an odd number as input
  2028. void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
  2029. {
  2030. if (N==2)
  2031. {
  2032. T[0] = AtomicInverseModPower2(A[0]);
  2033. T[1] = 0;
  2034. s_pBot[0](T+2, T, A);
  2035. TwosComplement(T+2, 2);
  2036. Increment(T+2, 2, 2);
  2037. s_pBot[0](R, T, T+2);
  2038. }
  2039. else
  2040. {
  2041. const size_t N2 = N/2;
  2042. RecursiveInverseModPower2(R0, T0, A0, N2);
  2043. T0[0] = 1;
  2044. SetWords(T0+1, 0, N2-1);
  2045. MultiplyTop(R1, T1, T0, R0, A0, N2);
  2046. MultiplyBottom(T0, T1, R0, A1, N2);
  2047. Add(T0, R1, T0, N2);
  2048. TwosComplement(T0, N2);
  2049. MultiplyBottom(R1, T1, R0, T0, N2);
  2050. }
  2051. }
  2052. // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
  2053. // T[3*N] - temporary work space
  2054. // X[2*N] - number to be reduced
  2055. // M[N] --- modulus
  2056. // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
  2057. void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N)
  2058. {
  2059. #if 1
  2060. MultiplyBottom(R, T, X, U, N);
  2061. MultiplyTop(T, T+N, X, R, M, N);
  2062. word borrow = Subtract(T, X+N, T, N);
  2063. // defend against timing attack by doing this Add even when not needed
  2064. word carry = Add(T+N, T, M, N);
  2065. assert(carry | !borrow);
  2066. CopyWords(R, T + ((0-borrow) & N), N);
  2067. #elif 0
  2068. const word u = 0-U[0];
  2069. Declare2Words(p)
  2070. for (size_t i=0; i<N; i++)
  2071. {
  2072. const word t = u * X[i];
  2073. word c = 0;
  2074. for (size_t j=0; j<N; j+=2)
  2075. {
  2076. MultiplyWords(p, t, M[j]);
  2077. Acc2WordsBy1(p, X[i+j]);
  2078. Acc2WordsBy1(p, c);
  2079. X[i+j] = LowWord(p);
  2080. c = HighWord(p);
  2081. MultiplyWords(p, t, M[j+1]);
  2082. Acc2WordsBy1(p, X[i+j+1]);
  2083. Acc2WordsBy1(p, c);
  2084. X[i+j+1] = LowWord(p);
  2085. c = HighWord(p);
  2086. }
  2087. if (Increment(X+N+i, N-i, c))
  2088. while (!Subtract(X+N, X+N, M, N)) {}
  2089. }
  2090. memcpy(R, X+N, N*WORD_SIZE);
  2091. #else
  2092. __m64 u = _mm_cvtsi32_si64(0-U[0]), p;
  2093. for (size_t i=0; i<N; i++)
  2094. {
  2095. __m64 t = _mm_cvtsi32_si64(X[i]);
  2096. t = _mm_mul_su32(t, u);
  2097. __m64 c = _mm_setzero_si64();
  2098. for (size_t j=0; j<N; j+=2)
  2099. {
  2100. p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
  2101. p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
  2102. c = _mm_add_si64(c, p);
  2103. X[i+j] = _mm_cvtsi64_si32(c);
  2104. c = _mm_srli_si64(c, 32);
  2105. p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
  2106. p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
  2107. c = _mm_add_si64(c, p);
  2108. X[i+j+1] = _mm_cvtsi64_si32(c);
  2109. c = _mm_srli_si64(c, 32);
  2110. }
  2111. if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
  2112. while (!Subtract(X+N, X+N, M, N)) {}
  2113. }
  2114. memcpy(R, X+N, N*WORD_SIZE);
  2115. _mm_empty();
  2116. #endif
  2117. }
  2118. // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
  2119. // T[2*N] - temporary work space
  2120. // X[2*N] - number to be reduced
  2121. // M[N] --- modulus
  2122. // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
  2123. // V[N] --- 2**(WORD_BITS*3*N/2) mod M
  2124. void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
  2125. {
  2126. assert(N%2==0 && N>=4);
  2127. #define M0 M
  2128. #define M1 (M+N2)
  2129. #define V0 V
  2130. #define V1 (V+N2)
  2131. #define X0 X
  2132. #define X1 (X+N2)
  2133. #define X2 (X+N)
  2134. #define X3 (X+N+N2)
  2135. const size_t N2 = N/2;
  2136. Multiply(T0, T2, V0, X3, N2);
  2137. int c2 = Add(T0, T0, X0, N);
  2138. MultiplyBottom(T3, T2, T0, U, N2);
  2139. MultiplyTop(T2, R, T0, T3, M0, N2);
  2140. c2 -= Subtract(T2, T1, T2, N2);
  2141. Multiply(T0, R, T3, M1, N2);
  2142. c2 -= Subtract(T0, T2, T0, N2);
  2143. int c3 = -(int)Subtract(T1, X2, T1, N2);
  2144. Multiply(R0, T2, V1, X3, N2);
  2145. c3 += Add(R, R, T, N);
  2146. if (c2>0)
  2147. c3 += Increment(R1, N2);
  2148. else if (c2<0)
  2149. c3 -= Decrement(R1, N2, -c2);
  2150. assert(c3>=-1 && c3<=1);
  2151. if (c3>0)
  2152. Subtract(R, R, M, N);
  2153. else if (c3<0)
  2154. Add(R, R, M, N);
  2155. #undef M0
  2156. #undef M1
  2157. #undef V0
  2158. #undef V1
  2159. #undef X0
  2160. #undef X1
  2161. #undef X2
  2162. #undef X3
  2163. }
  2164. #undef A0
  2165. #undef A1
  2166. #undef B0
  2167. #undef B1
  2168. #undef T0
  2169. #undef T1
  2170. #undef T2
  2171. #undef T3
  2172. #undef R0
  2173. #undef R1
  2174. #undef R2
  2175. #undef R3
  2176. /*
  2177. // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
  2178. static word SubatomicDivide(word *A, word B0, word B1)
  2179. {
  2180. // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
  2181. assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
  2182. // estimate the quotient: do a 2 word by 1 word divide
  2183. word Q;
  2184. if (B1+1 == 0)
  2185. Q = A[2];
  2186. else
  2187. Q = DWord(A[1], A[2]).DividedBy(B1+1);
  2188. // now subtract Q*B from A
  2189. DWord p = DWord::Multiply(B0, Q);
  2190. DWord u = (DWord) A[0] - p.GetLowHalf();
  2191. A[0] = u.GetLowHalf();
  2192. u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
  2193. A[1] = u.GetLowHalf();
  2194. A[2] += u.GetHighHalf();
  2195. // Q <= actual quotient, so fix it
  2196. while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
  2197. {
  2198. u = (DWord) A[0] - B0;
  2199. A[0] = u.GetLowHalf();
  2200. u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
  2201. A[1] = u.GetLowHalf();
  2202. A[2] += u.GetHighHalf();
  2203. Q++;
  2204. assert(Q); // shouldn't overflow
  2205. }
  2206. return Q;
  2207. }
  2208. // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
  2209. static inline void AtomicDivide(word *Q, const word *A, const word *B)
  2210. {
  2211. if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
  2212. {
  2213. Q[0] = A[2];
  2214. Q[1] = A[3];
  2215. }
  2216. else
  2217. {
  2218. word T[4];
  2219. T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
  2220. Q[1] = SubatomicDivide(T+1, B[0], B[1]);
  2221. Q[0] = SubatomicDivide(T, B[0], B[1]);
  2222. #ifndef NDEBUG
  2223. // multiply quotient and divisor and add remainder, make sure it equals dividend
  2224. assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
  2225. word P[4];
  2226. LowLevel::Multiply2(P, Q, B);
  2227. Add(P, P, T, 4);
  2228. assert(memcmp(P, A, 4*WORD_SIZE)==0);
  2229. #endif
  2230. }
  2231. }
  2232. */
  2233. static inline void AtomicDivide(word *Q, const word *A, const word *B)
  2234. {
  2235. word T[4];
  2236. DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
  2237. Q[0] = q.GetLowHalf();
  2238. Q[1] = q.GetHighHalf();
  2239. #ifndef NDEBUG
  2240. if (B[0] || B[1])
  2241. {
  2242. // multiply quotient and divisor and add remainder, make sure it equals dividend
  2243. assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
  2244. word P[4];
  2245. s_pMul[0](P, Q, B);
  2246. Add(P, P, T, 4);
  2247. assert(memcmp(P, A, 4*WORD_SIZE)==0);
  2248. }
  2249. #endif
  2250. }
  2251. // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
  2252. static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
  2253. {
  2254. assert(N && N%2==0);
  2255. AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
  2256. word borrow = Subtract(R, R, T, N+2);
  2257. assert(!borrow && !R[N+1]);
  2258. while (R[N] || Compare(R, B, N) >= 0)
  2259. {
  2260. R[N] -= Subtract(R, R, B, N);
  2261. Q[1] += (++Q[0]==0);
  2262. assert(Q[0] || Q[1]); // no overflow
  2263. }
  2264. }
  2265. // R[NB] -------- remainder = A%B
  2266. // Q[NA-NB+2] --- quotient = A/B
  2267. // T[NA+3*(NB+2)] - temp work space
  2268. // A[NA] -------- dividend
  2269. // B[NB] -------- divisor
  2270. void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
  2271. {
  2272. assert(NA && NB && NA%2==0 && NB%2==0);
  2273. assert(B[NB-1] || B[NB-2]);
  2274. assert(NB <= NA);
  2275. // set up temporary work space
  2276. word *const TA=T;
  2277. word *const TB=T+NA+2;
  2278. word *const TP=T+NA+2+NB;
  2279. // copy B into TB and normalize it so that TB has highest bit set to 1
  2280. unsigned shiftWords = (B[NB-1]==0);
  2281. TB[0] = TB[NB-1] = 0;
  2282. CopyWords(TB+shiftWords, B, NB-shiftWords);
  2283. unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
  2284. assert(shiftBits < WORD_BITS);
  2285. ShiftWordsLeftByBits(TB, NB, shiftBits);
  2286. // copy A into TA and normalize it
  2287. TA[0] = TA[NA] = TA[NA+1] = 0;
  2288. CopyWords(TA+shiftWords, A, NA);
  2289. ShiftWordsLeftByBits(TA, NA+2, shiftBits);
  2290. if (TA[NA+1]==0 && TA[NA] <= 1)
  2291. {
  2292. Q[NA-NB+1] = Q[NA-NB] = 0;
  2293. while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
  2294. {
  2295. TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
  2296. ++Q[NA-NB];
  2297. }
  2298. }
  2299. else
  2300. {
  2301. NA+=2;
  2302. assert(Compare(TA+NA-NB, TB, NB) < 0);
  2303. }
  2304. word BT[2];
  2305. BT[0] = TB[NB-2] + 1;
  2306. BT[1] = TB[NB-1] + (BT[0]==0);
  2307. // start reducing TA mod TB, 2 words at a time
  2308. for (size_t i=NA-2; i>=NB; i-=2)
  2309. {
  2310. AtomicDivide(Q+i-NB, TA+i-2, BT);
  2311. CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
  2312. }
  2313. // copy TA into R, and denormalize it
  2314. CopyWords(R, TA+shiftWords, NB);
  2315. ShiftWordsRightByBits(R, NB, shiftBits);
  2316. }
  2317. static inline size_t EvenWordCount(const word *X, size_t N)
  2318. {
  2319. while (N && X[N-2]==0 && X[N-1]==0)
  2320. N-=2;
  2321. return N;
  2322. }
  2323. // return k
  2324. // R[N] --- result = A^(-1) * 2^k mod M
  2325. // T[4*N] - temporary work space
  2326. // A[NA] -- number to take inverse of
  2327. // M[N] --- modulus
  2328. unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
  2329. {
  2330. assert(NA<=N && N && N%2==0);
  2331. word *b = T;
  2332. word *c = T+N;
  2333. word *f = T+2*N;
  2334. word *g = T+3*N;
  2335. size_t bcLen=2, fgLen=EvenWordCount(M, N);
  2336. unsigned int k=0;
  2337. bool s=false;
  2338. SetWords(T, 0, 3*N);
  2339. b[0]=1;
  2340. CopyWords(f, A, NA);
  2341. CopyWords(g, M, N);
  2342. while (1)
  2343. {
  2344. word t=f[0];
  2345. while (!t)
  2346. {
  2347. if (EvenWordCount(f, fgLen)==0)
  2348. {
  2349. SetWords(R, 0, N);
  2350. return 0;
  2351. }
  2352. ShiftWordsRightByWords(f, fgLen, 1);
  2353. bcLen += 2 * (c[bcLen-1] != 0);
  2354. assert(bcLen <= N);
  2355. ShiftWordsLeftByWords(c, bcLen, 1);
  2356. k+=WORD_BITS;
  2357. t=f[0];
  2358. }
  2359. unsigned int i = TrailingZeros(t);
  2360. t >>= i;
  2361. k += i;
  2362. if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
  2363. {
  2364. if (s)
  2365. Subtract(R, M, b, N);
  2366. else
  2367. CopyWords(R, b, N);
  2368. return k;
  2369. }
  2370. ShiftWordsRightByBits(f, fgLen, i);
  2371. t = ShiftWordsLeftByBits(c, bcLen, i);
  2372. c[bcLen] += t;
  2373. bcLen += 2 * (t!=0);
  2374. assert(bcLen <= N);
  2375. bool swap = Compare(f, g, fgLen)==-1;
  2376. ConditionalSwapPointers(swap, f, g);
  2377. ConditionalSwapPointers(swap, b, c);
  2378. s ^= swap;
  2379. fgLen -= 2 * !(f[fgLen-2] | f[fgLen-1]);
  2380. Subtract(f, f, g, fgLen);
  2381. t = Add(b, b, c, bcLen);
  2382. b[bcLen] += t;
  2383. bcLen += 2*t;
  2384. assert(bcLen <= N);
  2385. }
  2386. }
  2387. // R[N] - result = A/(2^k) mod M
  2388. // A[N] - input
  2389. // M[N] - modulus
  2390. void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
  2391. {
  2392. CopyWords(R, A, N);
  2393. while (k--)
  2394. {
  2395. if (R[0]%2==0)
  2396. ShiftWordsRightByBits(R, N, 1);
  2397. else
  2398. {
  2399. word carry = Add(R, R, M, N);
  2400. ShiftWordsRightByBits(R, N, 1);
  2401. R[N-1] += carry<<(WORD_BITS-1);
  2402. }
  2403. }
  2404. }
  2405. // R[N] - result = A*(2^k) mod M
  2406. // A[N] - input
  2407. // M[N] - modulus
  2408. void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
  2409. {
  2410. CopyWords(R, A, N);
  2411. while (k--)
  2412. if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
  2413. Subtract(R, R, M, N);
  2414. }
  2415. // ******************************************************************
  2416. InitializeInteger::InitializeInteger()
  2417. {
  2418. if (!g_pAssignIntToInteger)
  2419. {
  2420. SetFunctionPointers();
  2421. g_pAssignIntToInteger = AssignIntToInteger;
  2422. }
  2423. }
  2424. static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
  2425. static inline size_t RoundupSize(size_t n)
  2426. {
  2427. if (n<=8)
  2428. return RoundupSizeTable[n];
  2429. else if (n<=16)
  2430. return 16;
  2431. else if (n<=32)
  2432. return 32;
  2433. else if (n<=64)
  2434. return 64;
  2435. else return size_t(1) << BitPrecision(n-1);
  2436. }
  2437. Integer::Integer()
  2438. : reg(2), sign(POSITIVE)
  2439. {
  2440. reg[0] = reg[1] = 0;
  2441. }
  2442. Integer::Integer(const Integer& t)
  2443. : reg(RoundupSize(t.WordCount())), sign(t.sign)
  2444. {
  2445. CopyWords(reg, t.reg, reg.size());
  2446. }
  2447. Integer::Integer(Sign s, lword value)
  2448. : reg(2), sign(s)
  2449. {
  2450. reg[0] = word(value);
  2451. reg[1] = word(SafeRightShift<WORD_BITS>(value));
  2452. }
  2453. Integer::Integer(signed long value)
  2454. : reg(2)
  2455. {
  2456. if (value >= 0)
  2457. sign = POSITIVE;
  2458. else
  2459. {
  2460. sign = NEGATIVE;
  2461. value = -value;
  2462. }
  2463. reg[0] = word(value);
  2464. reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
  2465. }
  2466. Integer::Integer(Sign s, word high, word low)
  2467. : reg(2), sign(s)
  2468. {
  2469. reg[0] = low;
  2470. reg[1] = high;
  2471. }
  2472. bool Integer::IsConvertableToLong() const
  2473. {
  2474. if (ByteCount() > sizeof(long))
  2475. return false;
  2476. unsigned long value = (unsigned long)reg[0];
  2477. value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
  2478. if (sign==POSITIVE)
  2479. return (signed long)value >= 0;
  2480. else
  2481. return -(signed long)value < 0;
  2482. }
  2483. signed long Integer::ConvertToLong() const
  2484. {
  2485. assert(IsConvertableToLong());
  2486. unsigned long value = (unsigned long)reg[0];
  2487. value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
  2488. return sign==POSITIVE ? value : -(signed long)value;
  2489. }
  2490. Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
  2491. {
  2492. Decode(encodedInteger, byteCount, s);
  2493. }
  2494. Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
  2495. {
  2496. Decode(encodedInteger, byteCount, s);
  2497. }
  2498. Integer::Integer(BufferedTransformation &bt)
  2499. {
  2500. BERDecode(bt);
  2501. }
  2502. Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
  2503. {
  2504. Randomize(rng, bitcount);
  2505. }
  2506. Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
  2507. {
  2508. if (!Randomize(rng, min, max, rnType, equiv, mod))
  2509. throw Integer::RandomNumberNotFound();
  2510. }
  2511. Integer Integer::Power2(size_t e)
  2512. {
  2513. Integer r((word)0, BitsToWords(e+1));
  2514. r.SetBit(e);
  2515. return r;
  2516. }
  2517. template <long i>
  2518. struct NewInteger
  2519. {
  2520. Integer * operator()() const
  2521. {
  2522. return new Integer(i);
  2523. }
  2524. };
  2525. const Integer &Integer::Zero()
  2526. {
  2527. return Singleton<Integer>().Ref();
  2528. }
  2529. const Integer &Integer::One()
  2530. {
  2531. return Singleton<Integer, NewInteger<1> >().Ref();
  2532. }
  2533. const Integer &Integer::Two()
  2534. {
  2535. return Singleton<Integer, NewInteger<2> >().Ref();
  2536. }
  2537. bool Integer::operator!() const
  2538. {
  2539. return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
  2540. }
  2541. Integer& Integer::operator=(const Integer& t)
  2542. {
  2543. if (this != &t)
  2544. {
  2545. if (reg.size() != t.reg.size() || t.reg[t.reg.size()/2] == 0)
  2546. reg.New(RoundupSize(t.WordCount()));
  2547. CopyWords(reg, t.reg, reg.size());
  2548. sign = t.sign;
  2549. }
  2550. return *this;
  2551. }
  2552. bool Integer::GetBit(size_t n) const
  2553. {
  2554. if (n/WORD_BITS >= reg.size())
  2555. return 0;
  2556. else
  2557. return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
  2558. }
  2559. void Integer::SetBit(size_t n, bool value)
  2560. {
  2561. if (value)
  2562. {
  2563. reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
  2564. reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
  2565. }
  2566. else
  2567. {
  2568. if (n/WORD_BITS < reg.size())
  2569. reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
  2570. }
  2571. }
  2572. byte Integer::GetByte(size_t n) const
  2573. {
  2574. if (n/WORD_SIZE >= reg.size())
  2575. return 0;
  2576. else
  2577. return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
  2578. }
  2579. void Integer::SetByte(size_t n, byte value)
  2580. {
  2581. reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
  2582. reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
  2583. reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
  2584. }
  2585. lword Integer::GetBits(size_t i, size_t n) const
  2586. {
  2587. lword v = 0;
  2588. assert(n <= sizeof(v)*8);
  2589. for (unsigned int j=0; j<n; j++)
  2590. v |= lword(GetBit(i+j)) << j;
  2591. return v;
  2592. }
  2593. Integer Integer::operator-() const
  2594. {
  2595. Integer result(*this);
  2596. result.Negate();
  2597. return result;
  2598. }
  2599. Integer Integer::AbsoluteValue() const
  2600. {
  2601. Integer result(*this);
  2602. result.sign = POSITIVE;
  2603. return result;
  2604. }
  2605. void Integer::swap(Integer &a)
  2606. {
  2607. reg.swap(a.reg);
  2608. std::swap(sign, a.sign);
  2609. }
  2610. Integer::Integer(word value, size_t length)
  2611. : reg(RoundupSize(length)), sign(POSITIVE)
  2612. {
  2613. reg[0] = value;
  2614. SetWords(reg+1, 0, reg.size()-1);
  2615. }
  2616. template <class T>
  2617. static Integer StringToInteger(const T *str)
  2618. {
  2619. int radix;
  2620. // GCC workaround
  2621. // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
  2622. unsigned int length;
  2623. for (length = 0; str[length] != 0; length++) {}
  2624. Integer v;
  2625. if (length == 0)
  2626. return v;
  2627. switch (str[length-1])
  2628. {
  2629. case 'h':
  2630. case 'H':
  2631. radix=16;
  2632. break;
  2633. case 'o':
  2634. case 'O':
  2635. radix=8;
  2636. break;
  2637. case 'b':
  2638. case 'B':
  2639. radix=2;
  2640. break;
  2641. default:
  2642. radix=10;
  2643. }
  2644. if (length > 2 && str[0] == '0' && str[1] == 'x')
  2645. radix = 16;
  2646. for (unsigned i=0; i<length; i++)
  2647. {
  2648. int digit;
  2649. if (str[i] >= '0' && str[i] <= '9')
  2650. digit = str[i] - '0';
  2651. else if (str[i] >= 'A' && str[i] <= 'F')
  2652. digit = str[i] - 'A' + 10;
  2653. else if (str[i] >= 'a' && str[i] <= 'f')
  2654. digit = str[i] - 'a' + 10;
  2655. else
  2656. digit = radix;
  2657. if (digit < radix)
  2658. {
  2659. v *= radix;
  2660. v += digit;
  2661. }
  2662. }
  2663. if (str[0] == '-')
  2664. v.Negate();
  2665. return v;
  2666. }
  2667. Integer::Integer(const char *str)
  2668. : reg(2), sign(POSITIVE)
  2669. {
  2670. *this = StringToInteger(str);
  2671. }
  2672. Integer::Integer(const wchar_t *str)
  2673. : reg(2), sign(POSITIVE)
  2674. {
  2675. *this = StringToInteger(str);
  2676. }
  2677. unsigned int Integer::WordCount() const
  2678. {
  2679. return (unsigned int)CountWords(reg, reg.size());
  2680. }
  2681. unsigned int Integer::ByteCount() const
  2682. {
  2683. unsigned wordCount = WordCount();
  2684. if (wordCount)
  2685. return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
  2686. else
  2687. return 0;
  2688. }
  2689. unsigned int Integer::BitCount() const
  2690. {
  2691. unsigned wordCount = WordCount();
  2692. if (wordCount)
  2693. return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
  2694. else
  2695. return 0;
  2696. }
  2697. void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
  2698. {
  2699. StringStore store(input, inputLen);
  2700. Decode(store, inputLen, s);
  2701. }
  2702. void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
  2703. {
  2704. assert(bt.MaxRetrievable() >= inputLen);
  2705. byte b;
  2706. bt.Peek(b);
  2707. sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
  2708. while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
  2709. {
  2710. bt.Skip(1);
  2711. inputLen--;
  2712. bt.Peek(b);
  2713. }
  2714. reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
  2715. for (size_t i=inputLen; i > 0; i--)
  2716. {
  2717. bt.Get(b);
  2718. reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
  2719. }
  2720. if (sign == NEGATIVE)
  2721. {
  2722. for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
  2723. reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
  2724. TwosComplement(reg, reg.size());
  2725. }
  2726. }
  2727. size_t Integer::MinEncodedSize(Signedness signedness) const
  2728. {
  2729. unsigned int outputLen = STDMAX(1U, ByteCount());
  2730. if (signedness == UNSIGNED)
  2731. return outputLen;
  2732. if (NotNegative() && (GetByte(outputLen-1) & 0x80))
  2733. outputLen++;
  2734. if (IsNegative() && *this < -Power2(outputLen*8-1))
  2735. outputLen++;
  2736. return outputLen;
  2737. }
  2738. void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
  2739. {
  2740. ArraySink sink(output, outputLen);
  2741. Encode(sink, outputLen, signedness);
  2742. }
  2743. void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
  2744. {
  2745. if (signedness == UNSIGNED || NotNegative())
  2746. {
  2747. for (size_t i=outputLen; i > 0; i--)
  2748. bt.Put(GetByte(i-1));
  2749. }
  2750. else
  2751. {
  2752. // take two's complement of *this
  2753. Integer temp = Integer::Power2(8*STDMAX((size_t)ByteCount(), outputLen)) + *this;
  2754. temp.Encode(bt, outputLen, UNSIGNED);
  2755. }
  2756. }
  2757. void Integer::DEREncode(BufferedTransformation &bt) const
  2758. {
  2759. DERGeneralEncoder enc(bt, INTEGER);
  2760. Encode(enc, MinEncodedSize(SIGNED), SIGNED);
  2761. enc.MessageEnd();
  2762. }
  2763. void Integer::BERDecode(const byte *input, size_t len)
  2764. {
  2765. StringStore store(input, len);
  2766. BERDecode(store);
  2767. }
  2768. void Integer::BERDecode(BufferedTransformation &bt)
  2769. {
  2770. BERGeneralDecoder dec(bt, INTEGER);
  2771. if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
  2772. BERDecodeError();
  2773. Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
  2774. dec.MessageEnd();
  2775. }
  2776. void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
  2777. {
  2778. DERGeneralEncoder enc(bt, OCTET_STRING);
  2779. Encode(enc, length);
  2780. enc.MessageEnd();
  2781. }
  2782. void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
  2783. {
  2784. BERGeneralDecoder dec(bt, OCTET_STRING);
  2785. if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
  2786. BERDecodeError();
  2787. Decode(dec, length);
  2788. dec.MessageEnd();
  2789. }
  2790. size_t Integer::OpenPGPEncode(byte *output, size_t len) const
  2791. {
  2792. ArraySink sink(output, len);
  2793. return OpenPGPEncode(sink);
  2794. }
  2795. size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
  2796. {
  2797. word16 bitCount = BitCount();
  2798. bt.PutWord16(bitCount);
  2799. size_t byteCount = BitsToBytes(bitCount);
  2800. Encode(bt, byteCount);
  2801. return 2 + byteCount;
  2802. }
  2803. void Integer::OpenPGPDecode(const byte *input, size_t len)
  2804. {
  2805. StringStore store(input, len);
  2806. OpenPGPDecode(store);
  2807. }
  2808. void Integer::OpenPGPDecode(BufferedTransformation &bt)
  2809. {
  2810. word16 bitCount;
  2811. if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
  2812. throw OpenPGPDecodeErr();
  2813. Decode(bt, BitsToBytes(bitCount));
  2814. }
  2815. void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
  2816. {
  2817. const size_t nbytes = nbits/8 + 1;
  2818. SecByteBlock buf(nbytes);
  2819. rng.GenerateBlock(buf, nbytes);
  2820. if (nbytes)
  2821. buf[0] = (byte)Crop(buf[0], nbits % 8);
  2822. Decode(buf, nbytes, UNSIGNED);
  2823. }
  2824. void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
  2825. {
  2826. if (min > max)
  2827. throw InvalidArgument("Integer: Min must be no greater than Max");
  2828. Integer range = max - min;
  2829. const unsigned int nbits = range.BitCount();
  2830. do
  2831. {
  2832. Randomize(rng, nbits);
  2833. }
  2834. while (*this > range);
  2835. *this += min;
  2836. }
  2837. bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
  2838. {
  2839. return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
  2840. }
  2841. class KDF2_RNG : public RandomNumberGenerator
  2842. {
  2843. public:
  2844. KDF2_RNG(const byte *seed, size_t seedSize)
  2845. : m_counter(0), m_counterAndSeed(seedSize + 4)
  2846. {
  2847. memcpy(m_counterAndSeed + 4, seed, seedSize);
  2848. }
  2849. void GenerateBlock(byte *output, size_t size)
  2850. {
  2851. PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
  2852. ++m_counter;
  2853. P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
  2854. }
  2855. private:
  2856. word32 m_counter;
  2857. SecByteBlock m_counterAndSeed;
  2858. };
  2859. bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
  2860. {
  2861. Integer min = params.GetValueWithDefault("Min", Integer::Zero());
  2862. Integer max;
  2863. if (!params.GetValue("Max", max))
  2864. {
  2865. int bitLength;
  2866. if (params.GetIntValue("BitLength", bitLength))
  2867. max = Integer::Power2(bitLength);
  2868. else
  2869. throw InvalidArgument("Integer: missing Max argument");
  2870. }
  2871. if (min > max)
  2872. throw InvalidArgument("Integer: Min must be no greater than Max");
  2873. Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
  2874. Integer mod = params.GetValueWithDefault("Mod", Integer::One());
  2875. if (equiv.IsNegative() || equiv >= mod)
  2876. throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
  2877. Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
  2878. member_ptr<KDF2_RNG> kdf2Rng;
  2879. ConstByteArrayParameter seed;
  2880. if (params.GetValue(Name::Seed(), seed))
  2881. {
  2882. ByteQueue bq;
  2883. DERSequenceEncoder seq(bq);
  2884. min.DEREncode(seq);
  2885. max.DEREncode(seq);
  2886. equiv.DEREncode(seq);
  2887. mod.DEREncode(seq);
  2888. DEREncodeUnsigned(seq, rnType);
  2889. DEREncodeOctetString(seq, seed.begin(), seed.size());
  2890. seq.MessageEnd();
  2891. SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
  2892. bq.Get(finalSeed, finalSeed.size());
  2893. kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
  2894. }
  2895. RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
  2896. switch (rnType)
  2897. {
  2898. case ANY:
  2899. if (mod == One())
  2900. Randomize(rng, min, max);
  2901. else
  2902. {
  2903. Integer min1 = min + (equiv-min)%mod;
  2904. if (max < min1)
  2905. return false;
  2906. Randomize(rng, Zero(), (max - min1) / mod);
  2907. *this *= mod;
  2908. *this += min1;
  2909. }
  2910. return true;
  2911. case PRIME:
  2912. {
  2913. const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
  2914. int i;
  2915. i = 0;
  2916. while (1)
  2917. {
  2918. if (++i==16)
  2919. {
  2920. // check if there are any suitable primes in [min, max]
  2921. Integer first = min;
  2922. if (FirstPrime(first, max, equiv, mod, pSelector))
  2923. {
  2924. // if there is only one suitable prime, we're done
  2925. *this = first;
  2926. if (!FirstPrime(first, max, equiv, mod, pSelector))
  2927. return true;
  2928. }
  2929. else
  2930. return false;
  2931. }
  2932. Randomize(rng, min, max);
  2933. if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
  2934. return true;
  2935. }
  2936. }
  2937. default:
  2938. throw InvalidArgument("Integer: invalid RandomNumberType argument");
  2939. }
  2940. }
  2941. std::istream& operator>>(std::istream& in, Integer &a)
  2942. {
  2943. char c;
  2944. unsigned int length = 0;
  2945. SecBlock<char> str(length + 16);
  2946. std::ws(in);
  2947. do
  2948. {
  2949. in.read(&c, 1);
  2950. str[length++] = c;
  2951. if (length >= str.size())
  2952. str.Grow(length + 16);
  2953. }
  2954. while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
  2955. if (in.gcount())
  2956. in.putback(c);
  2957. str[length-1] = '\0';
  2958. a = Integer(str);
  2959. return in;
  2960. }
  2961. std::ostream& operator<<(std::ostream& out, const Integer &a)
  2962. {
  2963. // Get relevant conversion specifications from ostream.
  2964. long f = out.flags() & std::ios::basefield; // Get base digits.
  2965. int base, block;
  2966. char suffix;
  2967. switch(f)
  2968. {
  2969. case std::ios::oct :
  2970. base = 8;
  2971. block = 8;
  2972. suffix = 'o';
  2973. break;
  2974. case std::ios::hex :
  2975. base = 16;
  2976. block = 4;
  2977. suffix = 'h';
  2978. break;
  2979. default :
  2980. base = 10;
  2981. block = 3;
  2982. suffix = '.';
  2983. }
  2984. Integer temp1=a, temp2;
  2985. if (a.IsNegative())
  2986. {
  2987. out << '-';
  2988. temp1.Negate();
  2989. }
  2990. if (!a)
  2991. out << '0';
  2992. static const char upper[]="0123456789ABCDEF";
  2993. static const char lower[]="0123456789abcdef";
  2994. const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
  2995. unsigned i=0;
  2996. SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
  2997. while (!!temp1)
  2998. {
  2999. word digit;
  3000. Integer::Divide(digit, temp2, temp1, base);
  3001. s[i++]=vec[digit];
  3002. temp1.swap(temp2);
  3003. }
  3004. while (i--)
  3005. {
  3006. out << s[i];
  3007. // if (i && !(i%block))
  3008. // out << ",";
  3009. }
  3010. return out << suffix;
  3011. }
  3012. Integer& Integer::operator++()
  3013. {
  3014. if (NotNegative())
  3015. {
  3016. if (Increment(reg, reg.size()))
  3017. {
  3018. reg.CleanGrow(2*reg.size());
  3019. reg[reg.size()/2]=1;
  3020. }
  3021. }
  3022. else
  3023. {
  3024. word borrow = Decrement(reg, reg.size());
  3025. assert(!borrow);
  3026. if (WordCount()==0)
  3027. *this = Zero();
  3028. }
  3029. return *this;
  3030. }
  3031. Integer& Integer::operator--()
  3032. {
  3033. if (IsNegative())
  3034. {
  3035. if (Increment(reg, reg.size()))
  3036. {
  3037. reg.CleanGrow(2*reg.size());
  3038. reg[reg.size()/2]=1;
  3039. }
  3040. }
  3041. else
  3042. {
  3043. if (Decrement(reg, reg.size()))
  3044. *this = -One();
  3045. }
  3046. return *this;
  3047. }
  3048. void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
  3049. {
  3050. int carry;
  3051. if (a.reg.size() == b.reg.size())
  3052. carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
  3053. else if (a.reg.size() > b.reg.size())
  3054. {
  3055. carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
  3056. CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
  3057. carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
  3058. }
  3059. else
  3060. {
  3061. carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
  3062. CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
  3063. carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
  3064. }
  3065. if (carry)
  3066. {
  3067. sum.reg.CleanGrow(2*sum.reg.size());
  3068. sum.reg[sum.reg.size()/2] = 1;
  3069. }
  3070. sum.sign = Integer::POSITIVE;
  3071. }
  3072. void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
  3073. {
  3074. unsigned aSize = a.WordCount();
  3075. aSize += aSize%2;
  3076. unsigned bSize = b.WordCount();
  3077. bSize += bSize%2;
  3078. if (aSize == bSize)
  3079. {
  3080. if (Compare(a.reg, b.reg, aSize) >= 0)
  3081. {
  3082. Subtract(diff.reg, a.reg, b.reg, aSize);
  3083. diff.sign = Integer::POSITIVE;
  3084. }
  3085. else
  3086. {
  3087. Subtract(diff.reg, b.reg, a.reg, aSize);
  3088. diff.sign = Integer::NEGATIVE;
  3089. }
  3090. }
  3091. else if (aSize > bSize)
  3092. {
  3093. word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
  3094. CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
  3095. borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
  3096. assert(!borrow);
  3097. diff.sign = Integer::POSITIVE;
  3098. }
  3099. else
  3100. {
  3101. word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
  3102. CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
  3103. borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
  3104. assert(!borrow);
  3105. diff.sign = Integer::NEGATIVE;
  3106. }
  3107. }
  3108. // MSVC .NET 2003 workaround
  3109. template <class T> inline const T& STDMAX2(const T& a, const T& b)
  3110. {
  3111. return a < b ? b : a;
  3112. }
  3113. Integer Integer::Plus(const Integer& b) const
  3114. {
  3115. Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
  3116. if (NotNegative())
  3117. {
  3118. if (b.NotNegative())
  3119. PositiveAdd(sum, *this, b);
  3120. else
  3121. PositiveSubtract(sum, *this, b);
  3122. }
  3123. else
  3124. {
  3125. if (b.NotNegative())
  3126. PositiveSubtract(sum, b, *this);
  3127. else
  3128. {
  3129. PositiveAdd(sum, *this, b);
  3130. sum.sign = Integer::NEGATIVE;
  3131. }
  3132. }
  3133. return sum;
  3134. }
  3135. Integer& Integer::operator+=(const Integer& t)
  3136. {
  3137. reg.CleanGrow(t.reg.size());
  3138. if (NotNegative())
  3139. {
  3140. if (t.NotNegative())
  3141. PositiveAdd(*this, *this, t);
  3142. else
  3143. PositiveSubtract(*this, *this, t);
  3144. }
  3145. else
  3146. {
  3147. if (t.NotNegative())
  3148. PositiveSubtract(*this, t, *this);
  3149. else
  3150. {
  3151. PositiveAdd(*this, *this, t);
  3152. sign = Integer::NEGATIVE;
  3153. }
  3154. }
  3155. return *this;
  3156. }
  3157. Integer Integer::Minus(const Integer& b) const
  3158. {
  3159. Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
  3160. if (NotNegative())
  3161. {
  3162. if (b.NotNegative())
  3163. PositiveSubtract(diff, *this, b);
  3164. else
  3165. PositiveAdd(diff, *this, b);
  3166. }
  3167. else
  3168. {
  3169. if (b.NotNegative())
  3170. {
  3171. PositiveAdd(diff, *this, b);
  3172. diff.sign = Integer::NEGATIVE;
  3173. }
  3174. else
  3175. PositiveSubtract(diff, b, *this);
  3176. }
  3177. return diff;
  3178. }
  3179. Integer& Integer::operator-=(const Integer& t)
  3180. {
  3181. reg.CleanGrow(t.reg.size());
  3182. if (NotNegative())
  3183. {
  3184. if (t.NotNegative())
  3185. PositiveSubtract(*this, *this, t);
  3186. else
  3187. PositiveAdd(*this, *this, t);
  3188. }
  3189. else
  3190. {
  3191. if (t.NotNegative())
  3192. {
  3193. PositiveAdd(*this, *this, t);
  3194. sign = Integer::NEGATIVE;
  3195. }
  3196. else
  3197. PositiveSubtract(*this, t, *this);
  3198. }
  3199. return *this;
  3200. }
  3201. Integer& Integer::operator<<=(size_t n)
  3202. {
  3203. const size_t wordCount = WordCount();
  3204. const size_t shiftWords = n / WORD_BITS;
  3205. const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
  3206. reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
  3207. ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
  3208. ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
  3209. return *this;
  3210. }
  3211. Integer& Integer::operator>>=(size_t n)
  3212. {
  3213. const size_t wordCount = WordCount();
  3214. const size_t shiftWords = n / WORD_BITS;
  3215. const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
  3216. ShiftWordsRightByWords(reg, wordCount, shiftWords);
  3217. if (wordCount > shiftWords)
  3218. ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
  3219. if (IsNegative() && WordCount()==0) // avoid -0
  3220. *this = Zero();
  3221. return *this;
  3222. }
  3223. void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
  3224. {
  3225. size_t aSize = RoundupSize(a.WordCount());
  3226. size_t bSize = RoundupSize(b.WordCount());
  3227. product.reg.CleanNew(RoundupSize(aSize+bSize));
  3228. product.sign = Integer::POSITIVE;
  3229. IntegerSecBlock workspace(aSize + bSize);
  3230. AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
  3231. }
  3232. void Multiply(Integer &product, const Integer &a, const Integer &b)
  3233. {
  3234. PositiveMultiply(product, a, b);
  3235. if (a.NotNegative() != b.NotNegative())
  3236. product.Negate();
  3237. }
  3238. Integer Integer::Times(const Integer &b) const
  3239. {
  3240. Integer product;
  3241. Multiply(product, *this, b);
  3242. return product;
  3243. }
  3244. /*
  3245. void PositiveDivide(Integer &remainder, Integer &quotient,
  3246. const Integer &dividend, const Integer &divisor)
  3247. {
  3248. remainder.reg.CleanNew(divisor.reg.size());
  3249. remainder.sign = Integer::POSITIVE;
  3250. quotient.reg.New(0);
  3251. quotient.sign = Integer::POSITIVE;
  3252. unsigned i=dividend.BitCount();
  3253. while (i--)
  3254. {
  3255. word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
  3256. remainder.reg[0] |= dividend[i];
  3257. if (overflow || remainder >= divisor)
  3258. {
  3259. Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
  3260. quotient.SetBit(i);
  3261. }
  3262. }
  3263. }
  3264. */
  3265. void PositiveDivide(Integer &remainder, Integer &quotient,
  3266. const Integer &a, const Integer &b)
  3267. {
  3268. unsigned aSize = a.WordCount();
  3269. unsigned bSize = b.WordCount();
  3270. if (!bSize)
  3271. throw Integer::DivideByZero();
  3272. if (aSize < bSize)
  3273. {
  3274. remainder = a;
  3275. remainder.sign = Integer::POSITIVE;
  3276. quotient = Integer::Zero();
  3277. return;
  3278. }
  3279. aSize += aSize%2; // round up to next even number
  3280. bSize += bSize%2;
  3281. remainder.reg.CleanNew(RoundupSize(bSize));
  3282. remainder.sign = Integer::POSITIVE;
  3283. quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
  3284. quotient.sign = Integer::POSITIVE;
  3285. IntegerSecBlock T(aSize+3*(bSize+2));
  3286. Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
  3287. }
  3288. void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
  3289. {
  3290. PositiveDivide(remainder, quotient, dividend, divisor);
  3291. if (dividend.IsNegative())
  3292. {
  3293. quotient.Negate();
  3294. if (remainder.NotZero())
  3295. {
  3296. --quotient;
  3297. remainder = divisor.AbsoluteValue() - remainder;
  3298. }
  3299. }
  3300. if (divisor.IsNegative())
  3301. quotient.Negate();
  3302. }
  3303. void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
  3304. {
  3305. q = a;
  3306. q >>= n;
  3307. const size_t wordCount = BitsToWords(n);
  3308. if (wordCount <= a.WordCount())
  3309. {
  3310. r.reg.resize(RoundupSize(wordCount));
  3311. CopyWords(r.reg, a.reg, wordCount);
  3312. SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
  3313. if (n % WORD_BITS != 0)
  3314. r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
  3315. }
  3316. else
  3317. {
  3318. r.reg.resize(RoundupSize(a.WordCount()));
  3319. CopyWords(r.reg, a.reg, r.reg.size());
  3320. }
  3321. r.sign = POSITIVE;
  3322. if (a.IsNegative() && r.NotZero())
  3323. {
  3324. --q;
  3325. r = Power2(n) - r;
  3326. }
  3327. }
  3328. Integer Integer::DividedBy(const Integer &b) const
  3329. {
  3330. Integer remainder, quotient;
  3331. Integer::Divide(remainder, quotient, *this, b);
  3332. return quotient;
  3333. }
  3334. Integer Integer::Modulo(const Integer &b) const
  3335. {
  3336. Integer remainder, quotient;
  3337. Integer::Divide(remainder, quotient, *this, b);
  3338. return remainder;
  3339. }
  3340. void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
  3341. {
  3342. if (!divisor)
  3343. throw Integer::DivideByZero();
  3344. assert(divisor);
  3345. if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
  3346. {
  3347. quotient = dividend >> (BitPrecision(divisor)-1);
  3348. remainder = dividend.reg[0] & (divisor-1);
  3349. return;
  3350. }
  3351. unsigned int i = dividend.WordCount();
  3352. quotient.reg.CleanNew(RoundupSize(i));
  3353. remainder = 0;
  3354. while (i--)
  3355. {
  3356. quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
  3357. remainder = DWord(dividend.reg[i], remainder) % divisor;
  3358. }
  3359. if (dividend.NotNegative())
  3360. quotient.sign = POSITIVE;
  3361. else
  3362. {
  3363. quotient.sign = NEGATIVE;
  3364. if (remainder)
  3365. {
  3366. --quotient;
  3367. remainder = divisor - remainder;
  3368. }
  3369. }
  3370. }
  3371. Integer Integer::DividedBy(word b) const
  3372. {
  3373. word remainder;
  3374. Integer quotient;
  3375. Integer::Divide(remainder, quotient, *this, b);
  3376. return quotient;
  3377. }
  3378. word Integer::Modulo(word divisor) const
  3379. {
  3380. if (!divisor)
  3381. throw Integer::DivideByZero();
  3382. assert(divisor);
  3383. word remainder;
  3384. if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
  3385. remainder = reg[0] & (divisor-1);
  3386. else
  3387. {
  3388. unsigned int i = WordCount();
  3389. if (divisor <= 5)
  3390. {
  3391. DWord sum(0, 0);
  3392. while (i--)
  3393. sum += reg[i];
  3394. remainder = sum % divisor;
  3395. }
  3396. else
  3397. {
  3398. remainder = 0;
  3399. while (i--)
  3400. remainder = DWord(reg[i], remainder) % divisor;
  3401. }
  3402. }
  3403. if (IsNegative() && remainder)
  3404. remainder = divisor - remainder;
  3405. return remainder;
  3406. }
  3407. void Integer::Negate()
  3408. {
  3409. if (!!(*this)) // don't flip sign if *this==0
  3410. sign = Sign(1-sign);
  3411. }
  3412. int Integer::PositiveCompare(const Integer& t) const
  3413. {
  3414. unsigned size = WordCount(), tSize = t.WordCount();
  3415. if (size == tSize)
  3416. return CryptoPP::Compare(reg, t.reg, size);
  3417. else
  3418. return size > tSize ? 1 : -1;
  3419. }
  3420. int Integer::Compare(const Integer& t) const
  3421. {
  3422. if (NotNegative())
  3423. {
  3424. if (t.NotNegative())
  3425. return PositiveCompare(t);
  3426. else
  3427. return 1;
  3428. }
  3429. else
  3430. {
  3431. if (t.NotNegative())
  3432. return -1;
  3433. else
  3434. return -PositiveCompare(t);
  3435. }
  3436. }
  3437. Integer Integer::SquareRoot() const
  3438. {
  3439. if (!IsPositive())
  3440. return Zero();
  3441. // overestimate square root
  3442. Integer x, y = Power2((BitCount()+1)/2);
  3443. assert(y*y >= *this);
  3444. do
  3445. {
  3446. x = y;
  3447. y = (x + *this/x) >> 1;
  3448. } while (y<x);
  3449. return x;
  3450. }
  3451. bool Integer::IsSquare() const
  3452. {
  3453. Integer r = SquareRoot();
  3454. return *this == r.Squared();
  3455. }
  3456. bool Integer::IsUnit() const
  3457. {
  3458. return (WordCount() == 1) && (reg[0] == 1);
  3459. }
  3460. Integer Integer::MultiplicativeInverse() const
  3461. {
  3462. return IsUnit() ? *this : Zero();
  3463. }
  3464. Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
  3465. {
  3466. return x*y%m;
  3467. }
  3468. Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
  3469. {
  3470. ModularArithmetic mr(m);
  3471. return mr.Exponentiate(x, e);
  3472. }
  3473. Integer Integer::Gcd(const Integer &a, const Integer &b)
  3474. {
  3475. return EuclideanDomainOf<Integer>().Gcd(a, b);
  3476. }
  3477. Integer Integer::InverseMod(const Integer &m) const
  3478. {
  3479. assert(m.NotNegative());
  3480. if (IsNegative())
  3481. return Modulo(m).InverseMod(m);
  3482. if (m.IsEven())
  3483. {
  3484. if (!m || IsEven())
  3485. return Zero(); // no inverse
  3486. if (*this == One())
  3487. return One();
  3488. Integer u = m.Modulo(*this).InverseMod(*this);
  3489. return !u ? Zero() : (m*(*this-u)+1)/(*this);
  3490. }
  3491. SecBlock<word> T(m.reg.size() * 4);
  3492. Integer r((word)0, m.reg.size());
  3493. unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
  3494. DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
  3495. return r;
  3496. }
  3497. word Integer::InverseMod(word mod) const
  3498. {
  3499. word g0 = mod, g1 = *this % mod;
  3500. word v0 = 0, v1 = 1;
  3501. word y;
  3502. while (g1)
  3503. {
  3504. if (g1 == 1)
  3505. return v1;
  3506. y = g0 / g1;
  3507. g0 = g0 % g1;
  3508. v0 += y * v1;
  3509. if (!g0)
  3510. break;
  3511. if (g0 == 1)
  3512. return mod-v0;
  3513. y = g1 / g0;
  3514. g1 = g1 % g0;
  3515. v1 += y * v0;
  3516. }
  3517. return 0;
  3518. }
  3519. // ********************************************************
  3520. ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
  3521. {
  3522. BERSequenceDecoder seq(bt);
  3523. OID oid(seq);
  3524. if (oid != ASN1::prime_field())
  3525. BERDecodeError();
  3526. m_modulus.BERDecode(seq);
  3527. seq.MessageEnd();
  3528. m_result.reg.resize(m_modulus.reg.size());
  3529. }
  3530. void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
  3531. {
  3532. DERSequenceEncoder seq(bt);
  3533. ASN1::prime_field().DEREncode(seq);
  3534. m_modulus.DEREncode(seq);
  3535. seq.MessageEnd();
  3536. }
  3537. void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
  3538. {
  3539. a.DEREncodeAsOctetString(out, MaxElementByteLength());
  3540. }
  3541. void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
  3542. {
  3543. a.BERDecodeAsOctetString(in, MaxElementByteLength());
  3544. }
  3545. const Integer& ModularArithmetic::Half(const Integer &a) const
  3546. {
  3547. if (a.reg.size()==m_modulus.reg.size())
  3548. {
  3549. CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
  3550. return m_result;
  3551. }
  3552. else
  3553. return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
  3554. }
  3555. const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
  3556. {
  3557. if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
  3558. {
  3559. if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
  3560. || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
  3561. {
  3562. CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
  3563. }
  3564. return m_result;
  3565. }
  3566. else
  3567. {
  3568. m_result1 = a+b;
  3569. if (m_result1 >= m_modulus)
  3570. m_result1 -= m_modulus;
  3571. return m_result1;
  3572. }
  3573. }
  3574. Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
  3575. {
  3576. if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
  3577. {
  3578. if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
  3579. || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
  3580. {
  3581. CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
  3582. }
  3583. }
  3584. else
  3585. {
  3586. a+=b;
  3587. if (a>=m_modulus)
  3588. a-=m_modulus;
  3589. }
  3590. return a;
  3591. }
  3592. const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
  3593. {
  3594. if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
  3595. {
  3596. if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
  3597. CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
  3598. return m_result;
  3599. }
  3600. else
  3601. {
  3602. m_result1 = a-b;
  3603. if (m_result1.IsNegative())
  3604. m_result1 += m_modulus;
  3605. return m_result1;
  3606. }
  3607. }
  3608. Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
  3609. {
  3610. if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
  3611. {
  3612. if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
  3613. CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
  3614. }
  3615. else
  3616. {
  3617. a-=b;
  3618. if (a.IsNegative())
  3619. a+=m_modulus;
  3620. }
  3621. return a;
  3622. }
  3623. const Integer& ModularArithmetic::Inverse(const Integer &a) const
  3624. {
  3625. if (!a)
  3626. return a;
  3627. CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
  3628. if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
  3629. Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
  3630. return m_result;
  3631. }
  3632. Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
  3633. {
  3634. if (m_modulus.IsOdd())
  3635. {
  3636. MontgomeryRepresentation dr(m_modulus);
  3637. return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
  3638. }
  3639. else
  3640. return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
  3641. }
  3642. void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
  3643. {
  3644. if (m_modulus.IsOdd())
  3645. {
  3646. MontgomeryRepresentation dr(m_modulus);
  3647. dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
  3648. for (unsigned int i=0; i<exponentsCount; i++)
  3649. results[i] = dr.ConvertOut(results[i]);
  3650. }
  3651. else
  3652. AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
  3653. }
  3654. MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd
  3655. : ModularArithmetic(m),
  3656. m_u((word)0, m_modulus.reg.size()),
  3657. m_workspace(5*m_modulus.reg.size())
  3658. {
  3659. if (!m_modulus.IsOdd())
  3660. throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
  3661. RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
  3662. }
  3663. const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
  3664. {
  3665. word *const T = m_workspace.begin();
  3666. word *const R = m_result.reg.begin();
  3667. const size_t N = m_modulus.reg.size();
  3668. assert(a.reg.size()<=N && b.reg.size()<=N);
  3669. AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
  3670. SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
  3671. MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
  3672. return m_result;
  3673. }
  3674. const Integer& MontgomeryRepresentation::Square(const Integer &a) const
  3675. {
  3676. word *const T = m_workspace.begin();
  3677. word *const R = m_result.reg.begin();
  3678. const size_t N = m_modulus.reg.size();
  3679. assert(a.reg.size()<=N);
  3680. CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
  3681. SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
  3682. MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
  3683. return m_result;
  3684. }
  3685. Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
  3686. {
  3687. word *const T = m_workspace.begin();
  3688. word *const R = m_result.reg.begin();
  3689. const size_t N = m_modulus.reg.size();
  3690. assert(a.reg.size()<=N);
  3691. CopyWords(T, a.reg, a.reg.size());
  3692. SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
  3693. MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
  3694. return m_result;
  3695. }
  3696. const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
  3697. {
  3698. // return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
  3699. word *const T = m_workspace.begin();
  3700. word *const R = m_result.reg.begin();
  3701. const size_t N = m_modulus.reg.size();
  3702. assert(a.reg.size()<=N);
  3703. CopyWords(T, a.reg, a.reg.size());
  3704. SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
  3705. MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
  3706. unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
  3707. // cout << "k=" << k << " N*32=" << 32*N << endl;
  3708. if (k>N*WORD_BITS)
  3709. DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
  3710. else
  3711. MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
  3712. return m_result;
  3713. }
  3714. NAMESPACE_END
  3715. #endif