Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

271 lines
8.2 KiB

  1. // TITLE("Compute Checksum")
  2. //++
  3. //
  4. // Copyright (c) Microsoft Corporation. All rights reserved.
  5. //
  6. // Module Name:
  7. //
  8. // xsum.s
  9. //
  10. // Abstract:
  11. //
  12. // This module implements a function to compute the checksum of a buffer.
  13. //
  14. // Author:
  15. //
  16. // John Vert (jvert) 11-Jul-1994
  17. //
  18. // Environment:
  19. //
  20. // Revision History:
  21. //
  22. //--
  23. #include "ksalpha.h"
  24. SBTTL("Compute Checksum")
  25. //++
  26. //
  27. // ULONG
  28. // tcpxsum (
  29. // IN ULONG Checksum,
  30. // IN PUSHORT Source,
  31. // IN ULONG Length
  32. // )
  33. //
  34. // Routine Description:
  35. //
  36. // This function computes the checksum of the specified buffer.
  37. //
  38. // Arguments:
  39. //
  40. // Checksum (a0) - Supplies the initial checksum value.
  41. //
  42. // Source (a1) - Supplies a pointer to the checksum buffer
  43. //
  44. // Length (a2) - Supplies the length of the buffer in words.
  45. //
  46. // Return Value:
  47. //
  48. // The computed checksum is returned as the function value.
  49. //
  50. //--
  51. LEAF_ENTRY(tcpxsum)
  52. zap a0, 0xf0, a0 // clear high half of a0
  53. bis a1, zero, t6 // save initial buffer address
  54. bis zero, zero, v0 // clear accumulated checksum
  55. //
  56. // Check if the buffer is quadword aligned.
  57. //
  58. // If the buffer is not quadword aligned, then add the leading words to the
  59. // checksum.
  60. //
  61. ldq_u t0, 0(a1) // get containing quadword of first part
  62. blbc a1, 10f // check for word alignment
  63. beq a2, 65f // if zero bytes, don't do anything
  64. extbl t0, a1, t1 // get leading byte
  65. sll t1, 8, v0 // shift it to correct spot for later byte swap
  66. addq a1, 1, a1 // increment buffer to first full word
  67. subq a2, 1, a2 // decrement byte count
  68. 10:
  69. and a1, 6, t2 // check if buffer quadword aligned
  70. beq t2, 20f // if eq, quadword aligned
  71. extql t0, t2, t0 // extract bytes to checksum
  72. and a1, 7, t3 // compute bytes summed
  73. subq zero, t3, t3
  74. addq t3, 8, t3
  75. addq a1, 8, a1 // advance buffer address to next qword
  76. bic a1, 7, a1 //
  77. subq a2, t3, t2
  78. blt t2, 55f // if ltz, too many, jump to residual code
  79. addq v0, t0, v0 // add bytes to partial checksum
  80. cmpult v0, t0, t1 // generate carry
  81. addq t1, v0, v0 // add carry back into checksum
  82. bis t2, zero, a2 // reduce count of bytes to checksum
  83. beq t2, 60f // if eq, no more bytes
  84. 20:
  85. //
  86. // Compute the checksum in 64-byte blocks
  87. //
  88. bic a2, 7, t4 // subtract out residual bytes
  89. beq t4, 40f // if eq, no quadwords to checksum
  90. subq zero, t4, t2 // compute negative of byte count
  91. and t2, 15 << 2, t3 // compute bytes in first iteration
  92. ldq t0, 0(a1) // get first quadword to checksum
  93. beq t3, 35f // if eq, full 64-byte block
  94. subq a1, t3, a1 // bias buffer address by offset
  95. bic t4, 64-1, t4 // subtract out bytes in first iteration
  96. lda t2, 30f // get base address of code vector
  97. addl t3, t3, t3 //
  98. addq t3, t2, t2 // compute code vector offset
  99. bis t0, zero, t1 // copy first quadword to checksum
  100. jmp (t2) // dispatch
  101. 30:
  102. //
  103. // The following code vector computes the checksum of a 64-byte block.
  104. //
  105. .set noreorder
  106. ldq t1, 8(a1)
  107. addq v0, t0, v0
  108. cmpult v0, t0, t2
  109. addq v0, t2, v0
  110. ldq t0, 16(a1)
  111. addq v0, t1, v0
  112. cmpult v0, t1, t2
  113. addq v0, t2, v0
  114. ldq t1, 24(a1)
  115. addq v0, t0, v0
  116. cmpult v0, t0, t2
  117. addq v0, t2, v0
  118. ldq t0, 32(a1)
  119. addq v0, t1, v0
  120. cmpult v0, t1, t2
  121. addq v0, t2, v0
  122. ldq t1, 40(a1)
  123. addq v0, t0, v0
  124. cmpult v0, t0, t2
  125. addq v0, t2, v0
  126. ldq t0, 48(a1)
  127. addq v0, t1, v0
  128. cmpult v0, t1, t2
  129. addq v0, t2, v0
  130. ldq t1, 56(a1)
  131. addq v0, t0, v0
  132. cmpult v0, t0, t2
  133. addq v0, t2, v0
  134. addq a1, 64, a1
  135. addq v0, t1, v0
  136. cmpult v0, t1, t2
  137. addq v0, t2, v0
  138. .set reorder
  139. beq t4, 40f // if zero, end of block
  140. 35:
  141. ldq t0, 0(a1)
  142. //
  143. // The following loop is allowed to be reordered by the assembler for
  144. // optimal scheduling. It is never branched into.
  145. //
  146. subq t4, 64, t4 // reduce byte count of longwords
  147. ldq t1, 8(a1)
  148. addq v0, t0, v0
  149. cmpult v0, t0, t2
  150. addq v0, t2, v0
  151. ldq t0, 16(a1)
  152. addq v0, t1, v0
  153. cmpult v0, t1, t2
  154. addq v0, t2, v0
  155. ldq t1, 24(a1)
  156. addq v0, t0, v0
  157. cmpult v0, t0, t2
  158. addq v0, t2, v0
  159. ldq t0, 32(a1)
  160. addq v0, t1, v0
  161. cmpult v0, t1, t2
  162. addq v0, t2, v0
  163. ldq t1, 40(a1)
  164. addq v0, t0, v0
  165. cmpult v0, t0, t2
  166. addq v0, t2, v0
  167. ldq t0, 48(a1)
  168. addq v0, t1, v0
  169. cmpult v0, t1, t2
  170. addq v0, t2, v0
  171. ldq t1, 56(a1)
  172. addq v0, t0, v0
  173. cmpult v0, t0, t2
  174. addq v0, t2, v0
  175. addq a1, 64, a1
  176. addq v0, t1, v0
  177. cmpult v0, t1, t2
  178. addq v0, t2, v0
  179. bne t4, 35b // if ne zero, not end of block
  180. 40:
  181. //
  182. // Check for any remaining bytes.
  183. //
  184. and a2, 7, a2 // isolate residual bytes
  185. beq a2, 60f // if eq, no residual bytes
  186. 50:
  187. //
  188. // Checksum remaining bytes.
  189. //
  190. // The technique we use here is to load the final quadword, then
  191. // zero out the bytes that are not included.
  192. //
  193. ldq t0, 0(a1) // get quadword surrounding remainder
  194. 55:
  195. ornot zero, zero, t1 // get FF mask
  196. sll t1, a2, t2 // shift to produce byte mask
  197. zap t0, t2, t0 // zero out bytes past end of buffer
  198. addq v0, t0, v0 // add quadword to partial checksum
  199. cmpult v0, t0, t1 // generate carry
  200. addq t1, v0, v0 // add carry back into checksum
  201. 60:
  202. //
  203. // Byte swap the 64-bit checksum if the start of the buffer was not word aligned
  204. //
  205. blbc t6, 65f
  206. zap v0, 0xAA, t0 // isolate even bytes
  207. sll t0, 8, t0 // shift even bytes into odd positions
  208. srl v0, 8, t1 // shift odd bytes into even positions
  209. zap t1, 0xAA, t1 // isolate odd bytes
  210. bis t0, t1, v0 // merge bytes back together
  211. 65:
  212. //
  213. // add computed checksum to original checksum, and fold the 64-bit
  214. // result down to 16 bits.
  215. //
  216. addq v0, a0, v0 // add computed checksum to original
  217. cmpult v0, a0, t0 // generate carry
  218. addq v0, t0, v0 // add carry back into checksum
  219. //
  220. // swap the longwords in order to sum two longwords and their carry in one add.
  221. //
  222. sll v0, 32, t0 // shift low longword into high
  223. srl v0, 32, t1 // shift high longword into low
  224. bis t1, t0, t5 // merge back together
  225. addq v0, t5, t0 // produce sum + carry in high longword
  226. srl t0, 32, t1 // shift back down to low half
  227. //
  228. // swap the words in order to sum two words and their carry in one add
  229. //
  230. sll t1, 16, t2 // shift high word into low
  231. srl t1, 16, t3 // shift low word into high
  232. bis t2, t3, t4 // merge back together
  233. addq t4, t1, t2 // produce sum and carry in high word
  234. extwl t2, 2, v0 // extract result.
  235. ret zero, (ra) // return
  236. .end tcpxsum