Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

184 lines
4.3 KiB

  1. /* AesOpt.c -- Intel's AES
  2. 2013-11-12 : Igor Pavlov : Public domain */
  3. #include "Precomp.h"
  4. #include "CpuArch.h"
  5. #ifdef MY_CPU_X86_OR_AMD64
  6. #if _MSC_VER >= 1500
  7. #define USE_INTEL_AES
  8. #endif
  9. #endif
  10. #ifdef USE_INTEL_AES
  11. #include <wmmintrin.h>
  12. void MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
  13. {
  14. __m128i m = *p;
  15. for (; numBlocks != 0; numBlocks--, data++)
  16. {
  17. UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
  18. const __m128i *w = p + 3;
  19. m = _mm_xor_si128(m, *data);
  20. m = _mm_xor_si128(m, p[2]);
  21. do
  22. {
  23. m = _mm_aesenc_si128(m, w[0]);
  24. m = _mm_aesenc_si128(m, w[1]);
  25. w += 2;
  26. }
  27. while (--numRounds2 != 0);
  28. m = _mm_aesenc_si128(m, w[0]);
  29. m = _mm_aesenclast_si128(m, w[1]);
  30. *data = m;
  31. }
  32. *p = m;
  33. }
  34. #define NUM_WAYS 3
  35. #define AES_OP_W(op, n) { \
  36. const __m128i t = w[n]; \
  37. m0 = op(m0, t); \
  38. m1 = op(m1, t); \
  39. m2 = op(m2, t); \
  40. }
  41. #define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
  42. #define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
  43. #define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
  44. #define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
  45. void MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
  46. {
  47. __m128i iv = *p;
  48. for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
  49. {
  50. UInt32 numRounds2 = *(const UInt32 *)(p + 1);
  51. const __m128i *w = p + numRounds2 * 2;
  52. __m128i m0, m1, m2;
  53. {
  54. const __m128i t = w[2];
  55. m0 = _mm_xor_si128(t, data[0]);
  56. m1 = _mm_xor_si128(t, data[1]);
  57. m2 = _mm_xor_si128(t, data[2]);
  58. }
  59. numRounds2--;
  60. do
  61. {
  62. AES_DEC(1)
  63. AES_DEC(0)
  64. w -= 2;
  65. }
  66. while (--numRounds2 != 0);
  67. AES_DEC(1)
  68. AES_DEC_LAST(0)
  69. {
  70. __m128i t;
  71. t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t;
  72. t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t;
  73. t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t;
  74. }
  75. }
  76. for (; numBlocks != 0; numBlocks--, data++)
  77. {
  78. UInt32 numRounds2 = *(const UInt32 *)(p + 1);
  79. const __m128i *w = p + numRounds2 * 2;
  80. __m128i m = _mm_xor_si128(w[2], *data);
  81. numRounds2--;
  82. do
  83. {
  84. m = _mm_aesdec_si128(m, w[1]);
  85. m = _mm_aesdec_si128(m, w[0]);
  86. w -= 2;
  87. }
  88. while (--numRounds2 != 0);
  89. m = _mm_aesdec_si128(m, w[1]);
  90. m = _mm_aesdeclast_si128(m, w[0]);
  91. m = _mm_xor_si128(m, iv);
  92. iv = *data;
  93. *data = m;
  94. }
  95. *p = iv;
  96. }
  97. void MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks)
  98. {
  99. __m128i ctr = *p;
  100. __m128i one;
  101. one.m128i_u64[0] = 1;
  102. one.m128i_u64[1] = 0;
  103. for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
  104. {
  105. UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
  106. const __m128i *w = p;
  107. __m128i m0, m1, m2;
  108. {
  109. const __m128i t = w[2];
  110. ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
  111. ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
  112. ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
  113. }
  114. w += 3;
  115. do
  116. {
  117. AES_ENC(0)
  118. AES_ENC(1)
  119. w += 2;
  120. }
  121. while (--numRounds2 != 0);
  122. AES_ENC(0)
  123. AES_ENC_LAST(1)
  124. data[0] = _mm_xor_si128(data[0], m0);
  125. data[1] = _mm_xor_si128(data[1], m1);
  126. data[2] = _mm_xor_si128(data[2], m2);
  127. }
  128. for (; numBlocks != 0; numBlocks--, data++)
  129. {
  130. UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
  131. const __m128i *w = p;
  132. __m128i m;
  133. ctr = _mm_add_epi64(ctr, one);
  134. m = _mm_xor_si128(ctr, p[2]);
  135. w += 3;
  136. do
  137. {
  138. m = _mm_aesenc_si128(m, w[0]);
  139. m = _mm_aesenc_si128(m, w[1]);
  140. w += 2;
  141. }
  142. while (--numRounds2 != 0);
  143. m = _mm_aesenc_si128(m, w[0]);
  144. m = _mm_aesenclast_si128(m, w[1]);
  145. *data = _mm_xor_si128(*data, m);
  146. }
  147. *p = ctr;
  148. }
  149. #else
  150. void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
  151. void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
  152. void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
  153. void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
  154. {
  155. AesCbc_Encode(p, data, numBlocks);
  156. }
  157. void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
  158. {
  159. AesCbc_Decode(p, data, numBlocks);
  160. }
  161. void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
  162. {
  163. AesCtr_Code(p, data, numBlocks);
  164. }
  165. #endif