Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

231 lines
8.0 KiB

  1. title "Compute Checksum"
  2. ;++
  3. ;
  4. ; Copyright (c) Microsoft Corporation. All rights reserved.
  5. ;
  6. ; Module Name:
  7. ;
  8. ; xsum.amd
  9. ;
  10. ; Abstract:
  11. ;
  12. ; This module implements the platform specific function to compute the
  13. ; checksum of a buffer.
  14. ;
  15. ; Author:
  16. ;
  17. ; David N. Cutler (davec) 6-Jul-2000
  18. ;
  19. ; Environment:
  20. ;
  21. ; Any mode.
  22. ;
  23. ;--
  24. include ksamd64.inc
  25. ;++
  26. ;
  27. ; ULONG
  28. ; tcpxsum(
  29. ; IN ULONG Checksum,
  30. ; IN PUCHAR Source,
  31. ; IN ULONG Length
  32. ; )
  33. ;
  34. ; Routine Description:
  35. ;
  36. ; This function computes the checksum of the specified buffer and combines
  37. ; the computed checksum with the specified checksum.
  38. ;
  39. ; Arguments:
  40. ;
  41. ; Checksum (ecx) - Suppiles the initial checksum value, in 16-bit form,
  42. ; with the high word set to 0.
  43. ;
  44. ; Source (rdx) - Supplies a pointer to the checksum buffer.
  45. ;
  46. ; Length (r8d) - Supplies the length of the buffer in bytes.
  47. ;
  48. ; Return Value:
  49. ;
  50. ; The computed checksum, in 16-bit form, with the high word set to 0.
  51. ;
  52. ;--
  53. NESTED_ENTRY tcpxsum, _TEXT$00
  54. push_reg rbx ; save nonvolatile register
  55. END_PROLOGUE
  56. mov r11, rdx ; save initial buffer address
  57. mov bx, cx ; save initial checksum
  58. mov r10, rdx ; set checksum buffer address
  59. mov ecx, r8d ; set buffer length
  60. xor eax, eax ; clear computed checksum
  61. test ecx, ecx ; test if any bytes to checksum
  62. jz combine ; if z, no bytes to checksum
  63. ;
  64. ; If the checksum buffer is not word aligned, then add the first byte of
  65. ; the buffer to the checksum.
  66. ;
  67. ; N.B. First buffer address check is done using rdx rather than r10 so
  68. ; the register ah can be used.
  69. ;
  70. test dl, 1 ; test if buffer word aligned
  71. jz short word_aligned ; if z, buffer word aligned
  72. mov ah, [rdx] ; get first byte of checksum
  73. inc r10 ; increment buffer address
  74. dec ecx ; decrement number of bytes
  75. jz done ; if z set, no more bytes
  76. ;
  77. ; If the buffer is not an even number of bytes, then add the last byte of
  78. ; the buffer to the checksum.
  79. ;
  80. word_aligned: ;
  81. shr ecx, 1 ; convert to word count
  82. jnc short word_count ; if nc, even number of bytes
  83. mov al, [r10][rcx * 2] ; initialize the computed checksum
  84. jz done ; if z set, no more bytes
  85. ;
  86. ; If the buffer is not quadword aligned, then add words to the checksum until
  87. ; the buffer is quadword aligned.
  88. ;
  89. word_count: ;
  90. test r10b, 6 ; test if buffer quadword aligned
  91. jz short qword_aligned ; if z, buffer quadword aligned
  92. qword_align: ;
  93. add ax, [r10] ; add next word of checksum
  94. adc eax, 0 ; propagate carry
  95. add r10, 2 ; increment buffer address
  96. dec ecx ; decrement number of words
  97. jz done ; if z, no more words
  98. test r10b, 6 ; test if buffer qword aligned
  99. jnz short qword_align ; if nz, buffer not qword aligned
  100. ;
  101. ; Compute checksum in large blocks of qwords.
  102. ;
  103. qword_aligned: ;
  104. mov edx, ecx ; copy number or words remaining
  105. shr edx, 2 ; compute number of quadwords
  106. jz residual_words ; if z, no quadwords to checksum
  107. mov r8d, edx ; compute number of loop iterations
  108. shr r8d, 4 ;
  109. and edx, 16 - 1 ; isolate partial loop iteration
  110. jz short checksum_loop ; if z, no partial loop iteration
  111. sub rdx, 16 ; compute negative loop top offset
  112. lea r10, [r10][rdx * 8] ; bias initial buffer address
  113. neg rdx ; compute positive loop top offset
  114. add r8d, 1 ; increment loop iteration count
  115. ;
  116. ; ASSEMBLER WORKAROUND - when fixed, remove the following data
  117. ; byte
  118. ;
  119. db 04ch
  120. lea r9, checksum_start ; get address of checksum array
  121. lea r9, [r9][rdx * 4] ; compute initial iteration address
  122. jmp r9 ; start checksum
  123. ;
  124. ; Checksum quadwords.
  125. ;
  126. ; N.B. This loop is entered with carry clear.
  127. ;
  128. align 16
  129. checksum_loop: ;
  130. prefetchnta 0[r10] ; prefetch start of 128-byte block
  131. prefetchnta 120[r10] ; prefetch end of 128-byte block
  132. ;
  133. ; N.B. The first 16 of following instructions are exactly 4 bytes long.
  134. ;
  135. checksum_start:
  136. ; adc rax, 0[r10] ; Compute checksum
  137. ;
  138. db 049h ; Manually encode the 4-byte
  139. db 013h ; version of the instruction
  140. db 042h ;
  141. db 000h ; adc rax, 0[r10]
  142. adc rax, 8[r10] ;
  143. adc rax, 16[r10] ;
  144. adc rax, 24[r10] ;
  145. adc rax, 32[r10] ;
  146. adc rax, 40[r10] ;
  147. adc rax, 48[r10] ;
  148. adc rax, 56[r10] ;
  149. adc rax, 64[r10] ;
  150. adc rax, 72[r10] ;
  151. adc rax, 80[r10] ;
  152. adc rax, 88[r10] ;
  153. adc rax, 96[r10] ;
  154. adc rax, 104[r10] ;
  155. adc rax, 112[r10] ;
  156. adc rax, 120[r10] ;
  157. .errnz (($ - checksum_start) - (4 * 16))
  158. lea r10, 128[r10] ; update source address
  159. dec r8d ; decrement loop count
  160. jnz short checksum_loop ; if nz, more iterations
  161. adc rax, 0 ; propagate last carry
  162. ;
  163. ; Compute checksum of residual words.
  164. ;
  165. residual_words: ;
  166. and ecx, 3 ; isolate residual words
  167. jz short done ; if z, no residual words
  168. add_word: ;
  169. add ax, [r10] ; add word to checksum
  170. adc ax, 0 ; propagate carry
  171. add r10, 2 ; increment buffer address
  172. dec ecx ; decrement word count
  173. jnz short add_word ; if nz, more words remaining
  174. ;
  175. ; Fold the computed checksum to 32-bits and then to 16-bits.
  176. ;
  177. done: ;
  178. mov rcx, rax ; fold the checksum to 32-bits
  179. ror rcx, 32 ; swap high and low dwords
  180. add rax, rcx ; produce sum + carry in high 32-bits
  181. shr rax, 32 ; extract 32-bit checksum
  182. mov ecx, eax ; fold the checksum to 16-bits
  183. ror ecx, 16 ; swap high and low words
  184. add eax, ecx ; produce sum + carry in high 16-bits
  185. shr eax, 16 ; extract 16-bit check sum
  186. test r11b, 1 ; test if buffer word aligned
  187. jz short combine ; if z set, buffer word aligned
  188. ror ax, 8 ; swap checksum bytes
  189. ;
  190. ; Combine the input checksum with the computed checksum.
  191. ;
  192. combine: ;
  193. add ax, bx ; combine checksums
  194. adc eax, 0 ; add carry to low 16-bits
  195. pop rbx ; restore nonvolatile register
  196. retq ; return
  197. NESTED_END tcpxsum, _TEXT$00
  198. end