Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

223 lines
8.1 KiB

  1. title "Compute Checksum"
  2. ;++
  3. ;
  4. ; Copyright (c) Microsoft Corporation. All rights reserved.
  5. ;
  6. ; Module Name:
  7. ;
  8. ; xsum.amd
  9. ;
  10. ; Abstract:
  11. ;
  12. ; This module implements the platform specific function to compute the
  13. ; checksum of a buffer.
  14. ;
  15. ; Author:
  16. ;
  17. ; David N. Cutler (davec) 6-Jul-2000
  18. ;
  19. ; Environment:
  20. ;
  21. ; Any mode.
  22. ;
  23. ;--
  24. include ksamd64.inc
  25. ;++
  26. ;
  27. ; ULONG
  28. ; tcpxsum(
  29. ; IN ULONG Checksum,
  30. ; IN PUCHAR Source,
  31. ; IN ULONG Length
  32. ; )
  33. ;
  34. ; Routine Description:
  35. ;
  36. ; This function computes the checksum of the specified buffer and combines
  37. ; the computed checksum with the specified checksum.
  38. ;
  39. ; Arguments:
  40. ;
  41. ; Checksum (ecx) - Suppiles the initial checksum value, in 16-bit form,
  42. ; with the high word set to 0.
  43. ;
  44. ; Source (rdx) - Supplies a pointer to the checksum buffer.
  45. ;
  46. ; Length (r8d) - Supplies the length of the buffer in bytes.
  47. ;
  48. ; Return Value:
  49. ;
  50. ; The computed checksum, in 16-bit form, with the high word set to 0.
  51. ;
  52. ;--
  53. NESTED_ENTRY tcpxsum, _TEXT$00
  54. push_reg rbx ; save nonvolatile register
  55. END_PROLOGUE
  56. mov r11, rdx ; save initial buffer address
  57. mov bx, cx ; save initial checksum
  58. mov r10, rdx ; set checksum buffer address
  59. mov ecx, r8d ; set buffer length
  60. xor eax, eax ; clear computed checksum
  61. test ecx, ecx ; test if any bytes to checksum
  62. jz combine ; if z, no bytes to checksum
  63. ;
  64. ; If the checksum buffer is not word aligned, then add the first byte of
  65. ; the buffer to the checksum.
  66. ;
  67. ; N.B. First buffer address check is done using rdx rather than r10 so
  68. ; the register ah can be used.
  69. ;
  70. test dl, 1 ; test if buffer word aligned
  71. jz short word_aligned ; if z, buffer word aligned
  72. mov ah, [rdx] ; get first byte of checksum
  73. inc r10 ; increment buffer address
  74. dec ecx ; decrement number of bytes
  75. jz done ; if z set, no more bytes
  76. ;
  77. ; If the buffer is not an even number of bytes, then add the last byte of
  78. ; the buffer to the checksum.
  79. ;
  80. word_aligned: ;
  81. shr ecx, 1 ; convert to word count
  82. jnc short word_count ; if nc, even number of bytes
  83. mov al, [r10][rcx * 2] ; initialize the computed checksum
  84. jz done ; if z set, no more bytes
  85. ;
  86. ; If the buffer is not quadword aligned, then add words to the checksum until
  87. ; the buffer is quadword aligned.
  88. ;
  89. word_count: ;
  90. test r10b, 6 ; test if buffer quadword aligned
  91. jz short qword_aligned ; if z, buffer quadword aligned
  92. qword_align: ;
  93. add ax, [r10] ; add next word of checksum
  94. adc eax, 0 ; propagate carry
  95. add r10, 2 ; increment buffer address
  96. dec ecx ; decrement number of words
  97. jz done ; if z, no more words
  98. test r10b, 6 ; test if buffer qword aligned
  99. jnz short qword_align ; if nz, buffer not qword aligned
  100. ;
  101. ; Compute checksum in large blocks of qwords.
  102. ;
  103. qword_aligned: ;
  104. mov edx, ecx ; copy number or words remaining
  105. shr edx, 2 ; compute number of quadwords
  106. jz residual_words ; if z, no quadwords to checksum
  107. mov r8d, edx ; compute number of loop iterations
  108. shr r8d, 4 ;
  109. and edx, 16 - 1 ; isolate partial loop iteration
  110. jz short checksum_loop ; if z, no partial loop iteration
  111. sub rdx, 16 ; compute negative loop top offset
  112. lea r10, [r10][rdx * 8] ; bias initial buffer address
  113. neg rdx ; compute positive loop top offset
  114. add r8d, 1 ; increment loop iteration count
  115. lea r9, checksum_start ; get address of checksum array
  116. lea r9, [r9][rdx * 4] ; compute initial iteration address
  117. jmp r9 ; start checksum
  118. ;
  119. ; Checksum quadwords.
  120. ;
  121. ; N.B. This loop is entered with carry clear.
  122. ;
  123. align 16
  124. checksum_loop: ;
  125. prefetchnta 0[r10] ; prefetch start of 128-byte block
  126. prefetchnta 120[r10] ; prefetch end of 128-byte block
  127. ;
  128. ; N.B. The first 16 of following instructions are exactly 4 bytes long.
  129. ;
  130. checksum_start:
  131. ; adc rax, 0[r10] ; Compute checksum
  132. ;
  133. db 049h ; Manually encode the 4-byte
  134. db 013h ; version of the instruction
  135. db 042h ;
  136. db 000h ; adc rax, 0[r10]
  137. adc rax, 8[r10] ;
  138. adc rax, 16[r10] ;
  139. adc rax, 24[r10] ;
  140. adc rax, 32[r10] ;
  141. adc rax, 40[r10] ;
  142. adc rax, 48[r10] ;
  143. adc rax, 56[r10] ;
  144. adc rax, 64[r10] ;
  145. adc rax, 72[r10] ;
  146. adc rax, 80[r10] ;
  147. adc rax, 88[r10] ;
  148. adc rax, 96[r10] ;
  149. adc rax, 104[r10] ;
  150. adc rax, 112[r10] ;
  151. adc rax, 120[r10] ;
  152. .errnz (($ - checksum_start) - (4 * 16))
  153. lea r10, 128[r10] ; update source address
  154. dec r8d ; decrement loop count
  155. jnz short checksum_loop ; if nz, more iterations
  156. adc rax, 0 ; propagate last carry
  157. ;
  158. ; Compute checksum of residual words.
  159. ;
  160. residual_words: ;
  161. and ecx, 3 ; isolate residual words
  162. jz short done ; if z, no residual words
  163. add_word: ;
  164. add ax, [r10] ; add word to checksum
  165. adc ax, 0 ; propagate carry
  166. add r10, 2 ; increment buffer address
  167. dec ecx ; decrement word count
  168. jnz short add_word ; if nz, more words remaining
  169. ;
  170. ; Fold the computed checksum to 32-bits and then to 16-bits.
  171. ;
  172. done: ;
  173. mov rcx, rax ; fold the checksum to 32-bits
  174. ror rcx, 32 ; swap high and low dwords
  175. add rax, rcx ; produce sum + carry in high 32-bits
  176. shr rax, 32 ; extract 32-bit checksum
  177. mov ecx, eax ; fold the checksum to 16-bits
  178. ror ecx, 16 ; swap high and low words
  179. add eax, ecx ; produce sum + carry in high 16-bits
  180. shr eax, 16 ; extract 16-bit check sum
  181. test r11b, 1 ; test if buffer word aligned
  182. jz short combine ; if z set, buffer word aligned
  183. ror ax, 8 ; swap checksum bytes
  184. ;
  185. ; Combine the input checksum with the computed checksum.
  186. ;
  187. combine: ;
  188. add ax, bx ; combine checksums
  189. adc eax, 0 ; add carry to low 16-bits
  190. pop rbx ; restore nonvolatile register
  191. retq ; return
  192. NESTED_END tcpxsum, _TEXT$00
  193. end