Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

683 lines
22 KiB

  1. title "Compute Checksum"
  2. ;/*++
  3. ;
  4. ; Copyright (c) Microsoft Corporation. All rights reserved.
  5. ;
  6. ; Module Name:
  7. ;
  8. ; xsum.x86
  9. ;
  10. ; Abstract:
  11. ;
  12. ; This module implements a function to compute the checksum of a buffer.
  13. ;
  14. ; Author:
  15. ;
  16. ; David N. Cutler (davec) 27-Jan-1992
  17. ;
  18. ; Revision History:
  19. ;
  20. ; Who When What
  21. ; -------- -------- ----------------------------------------------
  22. ; mikeab 01-22-94 Pentium optimization
  23. ;
  24. ; Environment:
  25. ;
  26. ; Any mode.
  27. ;
  28. ; Revision History:
  29. ;
  30. ;--*/
  31. LOOP_UNROLLING_BITS equ 5
  32. LOOP_UNROLLING equ (1 SHL LOOP_UNROLLING_BITS)
  33. .386
  34. .model small,c
  35. assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
  36. assume fs:nothing,gs:nothing
  37. .xlist
  38. include callconv.inc
  39. include ks386.inc
  40. .list
  41. .code
  42. ;++
  43. ;
  44. ; ULONG
  45. ; tcpxsum(
  46. ; IN ULONG cksum,
  47. ; IN PUCHAR buf,
  48. ; IN ULONG len
  49. ; )
  50. ;
  51. ; Routine Description:
  52. ;
  53. ; This function computes the checksum of the specified buffer.
  54. ;
  55. ; Arguments:
  56. ;
  57. ; cksum - Suppiles the initial checksum value, in 16-bit form,
  58. ; with the high word set to 0.
  59. ;
  60. ; buf - Supplies a pointer to the buffer to the checksum buffer.
  61. ;
  62. ; len - Supplies the length of the buffer in bytes.
  63. ;
  64. ; Return Value:
  65. ;
  66. ; The computed checksum in 32-bit two-partial-accumulators form, added to
  67. ; the initial checksum, is returned as the function value.
  68. ;
  69. ;--
  70. cksum equ 12 ; stack offset to initial checksum
  71. buf equ 16 ; stack offset to source address
  72. len equ 20 ; stack offset to length in words
  73. to_checksum_last_word:
  74. jmp checksum_last_word
  75. to_checksum_done:
  76. jmp checksum_done
  77. to_checksum_dword_loop_done:
  78. jmp checksum_dword_loop_done
  79. cPublicProc tcpxsum,3
  80. push ebx ; save nonvolatile register
  81. push esi ; save nonvolatile register
  82. mov ecx,[esp + len] ; get length in bytes
  83. sub eax,eax ; clear computed checksum
  84. test ecx,ecx ; any bytes to checksum at all?
  85. jz short to_checksum_done ; no bytes to checksum
  86. ;
  87. ; if the checksum buffer is not word aligned, then add the first byte of
  88. ; the buffer to the input checksum.
  89. ;
  90. mov esi,[esp + buf] ; get source address
  91. sub edx,edx ; set up to load word into EDX below
  92. test esi,1 ; check if buffer word aligned
  93. jz short checksum_word_aligned ; if zf, buffer word aligned
  94. mov ah,[esi] ; get first byte (we know we'll have
  95. ; to swap at the end)
  96. inc esi ; increment buffer address
  97. dec ecx ; decrement number of bytes
  98. jz short to_checksum_done ; if zf set, no more bytes
  99. ;
  100. ; If the buffer is not an even number of of bytes, then initialize
  101. ; the computed checksum with the last byte of the buffer.
  102. ;
  103. checksum_word_aligned: ;
  104. shr ecx,1 ; convert to word count
  105. jnc short checksum_start ; if nc, even number of bytes
  106. mov al,[esi+ecx*2] ; initialize the computed checksum
  107. jz short to_checksum_done ; if zf set, no more bytes
  108. ;
  109. ; Compute checksum in large blocks of dwords, with one partial word up front if
  110. ; necessary to get dword alignment, and another partial word at the end if
  111. ; needed.
  112. ;
  113. ;
  114. ; Compute checksum on the leading word, if that's necessary to get dword
  115. ; alignment.
  116. ;
  117. checksum_start: ;
  118. test esi,02h ; check if source dword aligned
  119. jz short checksum_dword_aligned ; source is already dword aligned
  120. mov dx,[esi] ; get first word to checksum
  121. add esi,2 ; update source address
  122. add eax,edx ; update partial checksum
  123. ; (no carry is possible, because EAX
  124. ; and EDX are both 16-bit values)
  125. dec ecx ; count off this word (zero case gets
  126. ; picked up below)
  127. ;
  128. ; Checksum as many words as possible by processing a dword at a time.
  129. ;
  130. checksum_dword_aligned:
  131. push ecx ; so we can tell if there's a trailing
  132. ; word later
  133. shr ecx,1 ; # of dwords to checksum
  134. jz short to_checksum_last_word ; no dwords to checksum
  135. mov edx,[esi] ; preload the first dword
  136. add esi,4 ; point to the next dword
  137. dec ecx ; count off the dword we just loaded
  138. jz short to_checksum_dword_loop_done
  139. ; skip the loop if that was the only
  140. ; dword
  141. mov ebx,ecx ; EBX = # of dwords left to checksum
  142. add ecx,LOOP_UNROLLING-1 ; round up loop count
  143. shr ecx,LOOP_UNROLLING_BITS ; convert from word count to unrolled
  144. ; loop count
  145. and ebx,LOOP_UNROLLING-1 ; # of partial dwords to do in first
  146. ; loop
  147. jz short checksum_dword_loop ; special-case when no partial loop,
  148. ; because fixup below doesn't work
  149. ; in that case (carry flag is
  150. ; cleared at this point, as required
  151. ; at loop entry)
  152. lea esi,[esi+ebx*4-(LOOP_UNROLLING*4)]
  153. ; adjust buffer pointer back to
  154. ; compensate for hardwired displacement
  155. ; at loop entry point
  156. ; ***doesn't change carry flag***
  157. jmp loop_entry[ebx*4] ; enter the loop to do the first,
  158. ; partial iteration, after which we can
  159. ; just do 64-word blocks
  160. ; ***doesn't change carry flag***
  161. checksum_dword_loop:
  162. DEFLAB macro pre,suf
  163. pre&suf:
  164. endm
  165. TEMP=0
  166. REPT LOOP_UNROLLING
  167. deflab loop_entry_,%TEMP
  168. adc eax,edx
  169. mov edx,[esi + TEMP]
  170. TEMP=TEMP+4
  171. ENDM
  172. checksum_dword_loop_end:
  173. lea esi,[esi + LOOP_UNROLLING * 4] ; update source address
  174. ; ***doesn't change carry flag***
  175. dec ecx ; count off unrolled loop iteration
  176. ; ***doesn't change carry flag***
  177. jnz checksum_dword_loop ; do more blocks
  178. checksum_dword_loop_done label proc
  179. adc eax,edx ; finish dword checksum
  180. mov edx,0 ; prepare to load trailing word
  181. adc eax,edx
  182. ;
  183. ; Compute checksum on the trailing word, if there is one.
  184. ; High word of EDX = 0 at this point
  185. ; Carry flag set iff there's a trailing word to do at this point
  186. ;
  187. checksum_last_word label proc ; "proc" so not scoped to function
  188. pop ecx ; get back word count
  189. test ecx,1 ; is there a trailing word?
  190. jz short checksum_done ; no trailing word
  191. add ax,[esi] ; add in the trailing word
  192. adc eax,0 ;
  193. checksum_done label proc ; "proc" so not scoped to function
  194. mov ecx,eax ; fold the checksum to 16 bits
  195. ror ecx,16
  196. add eax,ecx
  197. mov ebx,[esp + buf]
  198. shr eax,16
  199. test ebx,1 ; check if buffer word aligned
  200. jz short checksum_combine ; if zf set, buffer word aligned
  201. ror ax,8 ; byte aligned--swap bytes back
  202. checksum_combine label proc ; "proc" so not scoped to function
  203. add ax,word ptr [esp + cksum] ; combine checksums
  204. pop esi ; restore nonvolatile register
  205. adc eax,0 ;
  206. pop ebx ; restore nonvolatile register
  207. stdRET tcpxsum
  208. REFLAB macro pre,suf
  209. dd pre&suf
  210. endm
  211. align 4
  212. loop_entry label dword
  213. dd 0
  214. TEMP=LOOP_UNROLLING*4
  215. REPT LOOP_UNROLLING-1
  216. TEMP=TEMP-4
  217. reflab loop_entry_,%TEMP
  218. ENDM
  219. stdENDP tcpxsum
  220. ifndef NO_XMMI
  221. LOOP_UNROLLING_BITS_XMMI equ 4
  222. LOOP_UNROLLING_XMMI equ (1 SHL LOOP_UNROLLING_BITS_XMMI)
  223. ;VRSTEST EQU 0
  224. ifdef VRSTEST
  225. ;
  226. ; Test tcpxsum_xmmi for correctness.
  227. tcksum equ 8[ebp] ; stack offset to initial checksum
  228. tbuf equ 12[ebp] ; stack offset to source address
  229. tlen equ 16[ebp] ; stack offset to length in words
  230. align
  231. cPublicProc tcpxsum_xmmi,3
  232. ;int 3
  233. push ebp
  234. mov ebp, esp
  235. push ebx
  236. push esi
  237. mov ebx, offset tcpxsum
  238. mov esi, offset tcpxsum_xmmi1
  239. ; Get a "random" number
  240. .586p
  241. rdtsc
  242. .386p
  243. and eax, 10H
  244. jz old_then_new
  245. ; Swap which routine is called first
  246. push ebx
  247. mov ebx, esi
  248. pop esi
  249. old_then_new:
  250. ; Call the first routine
  251. push tlen
  252. push tbuf
  253. push tcksum
  254. call ebx
  255. ; Save the answer
  256. push eax
  257. ; Call the second routine
  258. push tlen
  259. push tbuf
  260. push tcksum
  261. call esi
  262. ; Check the answer
  263. cmp eax, [esp]
  264. jnz different_xsum
  265. ; Same answer, we are done
  266. pop eax
  267. pop esi
  268. pop ebx
  269. pop ebp
  270. stdRET tcpxsum_xmmi
  271. align
  272. ; Different answers, need to debug the problem
  273. different_xsum:
  274. ; Get both checksums onto the stack
  275. push eax
  276. ; ... and bugcheck
  277. ;EXTRNP _KeBugCheck,1,IMPORT
  278. ;stdCall _KeBugCheck, <0>
  279. again:
  280. int 3
  281. jmp again
  282. stdENDP tcpxsum_xmmi
  283. endif
  284. ;++
  285. ;
  286. ; ULONG
  287. ; tcpxsum_xmmi(
  288. ; IN ULONG cksum,
  289. ; IN PUCHAR buf,
  290. ; IN ULONG len
  291. ; )
  292. ;
  293. ; Routine Description:
  294. ;
  295. ; This function computes the checksum of the specified buffer.
  296. ; It uses Processor's prefetch instruction.
  297. ;
  298. ; Arguments:
  299. ;
  300. ; cksum - Suppiles the initial checksum value, in 16-bit form,
  301. ; with the high word set to 0.
  302. ;
  303. ; buf - Supplies a pointer to the buffer to the checksum buffer.
  304. ;
  305. ; len - Supplies the length of the buffer in bytes.
  306. ;
  307. ; Return Value:
  308. ;
  309. ; The computed checksum in 32-bit two-partial-accumulators form, added to
  310. ; the initial checksum, is returned as the function value.
  311. ;
  312. ;--
  313. cksum equ 12 ; stack offset to initial checksum
  314. buf equ 16 ; stack offset to source address
  315. len equ 20 ; stack offset to length in words
  316. to_checksum_last_word_xmmi:
  317. jmp checksum_last_word_xmmi
  318. to_checksum_done_xmmi:
  319. jmp checksum_done_xmmi
  320. to_checksum_dword_loop_done_xmmi:
  321. jmp checksum_dword_loop_done_xmmi
  322. ifdef VRSTEST
  323. cPublicProc tcpxsum_xmmi1,3
  324. else
  325. cPublicProc tcpxsum_xmmi,3
  326. endif
  327. push ebx ; save nonvolatile register
  328. push esi ; save nonvolatile register
  329. mov ecx,[esp + len] ; get length in bytes
  330. sub eax,eax ; clear computed checksum
  331. test ecx,ecx ; any bytes to checksum at all?
  332. jz short to_checksum_done_xmmi ; no bytes to checksum
  333. ;
  334. ; if the checksum buffer is not word aligned, then add the first byte of
  335. ; the buffer to the input checksum.
  336. ;
  337. mov esi,[esp + buf] ; get source address
  338. sub edx,edx ; set up to load word into EDX below
  339. test esi,1 ; check if buffer word aligned
  340. jz short checksum_word_aligned ; if zf, buffer word aligned
  341. mov ah,[esi] ; get first byte (we know we'll have
  342. ; to swap at the end)
  343. inc esi ; increment buffer address
  344. dec ecx ; decrement number of bytes
  345. jz short to_checksum_done_xmmi ; if zf set, no more bytes
  346. ;
  347. ; If the buffer is not an even number of of bytes, then initialize
  348. ; the computed checksum with the last byte of the buffer.
  349. ;
  350. checksum_word_aligned: ;
  351. shr ecx,1 ; convert to word count
  352. jnc short checksum_start ; if nc, even number of bytes
  353. mov al,[esi+ecx*2] ; initialize the computed checksum
  354. jz short to_checksum_done_xmmi ; if zf set, no more bytes
  355. ;
  356. ; Compute checksum in large blocks of dwords, with one partial word up front if
  357. ; necessary to get dword alignment, and another partial word at the end if
  358. ; needed.
  359. ;
  360. ;
  361. ; Compute checksum on the leading word, if that's necessary to get dword
  362. ; alignment.
  363. ;
  364. checksum_start: ;
  365. test esi,02h ; check if source dword aligned
  366. jz short checksum_dword_aligned ; source is already dword aligned
  367. mov dx,[esi] ; get first word to checksum
  368. add esi,2 ; update source address
  369. add eax,edx ; update partial checksum
  370. ; (no carry is possible, because EAX
  371. ; and EDX are both 16-bit values)
  372. dec ecx ; count off this word (zero case gets
  373. ; picked up below)
  374. ;
  375. ; Checksum as many words as possible by processing a dword at a time.
  376. ;
  377. checksum_dword_aligned:
  378. push ecx ; so we can tell if there's a trailing
  379. ; word later
  380. shr ecx,1 ; # of dwords to checksum
  381. jz short to_checksum_last_word_xmmi ; no dwords to checksum
  382. mov edx,[esi] ; preload the first dword
  383. add esi,4 ; point to the next dword
  384. dec ecx ; count off the dword we just loaded
  385. jz short to_checksum_dword_loop_done_xmmi
  386. ; skip the loop if that was the only
  387. ; dword
  388. mov ebx,ecx ; EBX = # of dwords left to checksum
  389. add ecx,LOOP_UNROLLING_XMMI-1 ; round up loop count
  390. shr ecx,LOOP_UNROLLING_BITS_XMMI ; convert from word count to unrolled
  391. ; loop count
  392. and ebx,LOOP_UNROLLING_XMMI-1 ; # of partial dwords to do in first
  393. ; loop
  394. jz short checksum_dword_loop ; special-case when no partial loop,
  395. ; because fixup below doesn't work
  396. ; in that case (carry flag is
  397. ; cleared at this point, as required
  398. ; at loop entry)
  399. lea esi,[esi+ebx*4-(LOOP_UNROLLING_XMMI*4)]
  400. ; adjust buffer pointer back to
  401. ; compensate for hardwired displacement
  402. ; at loop entry point
  403. ; ***doesn't change carry flag***
  404. jmp xmmi_loop_entry[ebx*4] ; enter the loop to do the first,
  405. ; partial iteration, after which we can
  406. ; just do 64-word blocks
  407. ; ***doesn't change carry flag***
  408. checksum_dword_loop:
  409. ; prefetch the 32-byte cache line from [esi+0]
  410. db 0fH
  411. db 18H
  412. db 46H
  413. db 00H
  414. ; prefetch the 32-byte cache line from [esi+20h]
  415. db 0fH
  416. db 18H
  417. db 46H
  418. db 20H
  419. ; prefetch the 32-byte cache line from [esi+40h]
  420. db 0fH
  421. db 18H
  422. db 46H
  423. db 40H
  424. ; prefetch the 32-byte cache line from [esi+60h]
  425. db 0fH
  426. db 18H
  427. db 46H
  428. db 60H
  429. DEFLAB macro pre,suf
  430. pre&suf:
  431. endm
  432. TEMP=0
  433. REPT LOOP_UNROLLING_XMMI
  434. deflab xmmi_loop_entry_,%TEMP
  435. adc eax,edx
  436. mov edx,[esi + TEMP]
  437. TEMP=TEMP+4
  438. ENDM
  439. checksum_dword_loop_end:
  440. lea esi,[esi + LOOP_UNROLLING_XMMI * 4] ; update source address
  441. ; ***doesn't change carry flag***
  442. dec ecx ; count off unrolled loop iteration
  443. ; ***doesn't change carry flag***
  444. jnz checksum_dword_loop ; do more blocks
  445. checksum_dword_loop_done_xmmi label proc
  446. adc eax,edx ; finish dword checksum
  447. mov edx,0 ; prepare to load trailing word
  448. adc eax,edx
  449. ;
  450. ; Compute checksum on the trailing word, if there is one.
  451. ; High word of EDX = 0 at this point
  452. ; Carry flag set iff there's a trailing word to do at this point
  453. ;
  454. checksum_last_word_xmmi label proc ; "proc" so not scoped to function
  455. pop ecx ; get back word count
  456. test ecx,1 ; is there a trailing word?
  457. jz short checksum_done_xmmi; no trailing word
  458. add ax,[esi] ; add in the trailing word
  459. adc eax,0 ;
  460. checksum_done_xmmi label proc ; "proc" so not scoped to function
  461. mov ecx,eax ; fold the checksum to 16 bits
  462. ror ecx,16
  463. add eax,ecx
  464. mov ebx,[esp + buf]
  465. shr eax,16
  466. test ebx,1 ; check if buffer word aligned
  467. jz short checksum_combine_xmmi ; if zf set, buffer word aligned
  468. ror ax,8 ; byte aligned--swap bytes back
  469. checksum_combine_xmmi label proc ; "proc" so not scoped to function
  470. add ax,word ptr [esp + cksum] ; combine checksums
  471. pop esi ; restore nonvolatile register
  472. adc eax,0 ;
  473. pop ebx ; restore nonvolatile register
  474. stdRET tcpxsum
  475. REFLAB macro pre,suf
  476. dd pre&suf
  477. endm
  478. align 4
  479. xmmi_loop_entry label dword
  480. dd 0
  481. TEMP=LOOP_UNROLLING_XMMI*4
  482. REPT LOOP_UNROLLING_XMMI-1
  483. TEMP=TEMP-4
  484. reflab xmmi_loop_entry_,%TEMP
  485. ENDM
  486. ifdef VRSTEST
  487. stdENDP tcpxsum_xmmi1
  488. else
  489. stdENDP tcpxsum_xmmi
  490. endif
  491. endif ; NO_XMMI
  492. ifndef NO_OLD_FLUSHSLIST
  493. ;++
  494. ;
  495. ; PSINGLE_LIST_ENTRY
  496. ; FASTCALL
  497. ; InterlockedFlushSList (
  498. ; IN PSINGLE_LIST_ENTRY ListHead
  499. ; )
  500. ;
  501. ; Routine Description:
  502. ;
  503. ; This function removes the entire list from a sequenced singly
  504. ; linked list so that access to the list is synchronized in an MP system.
  505. ; If there are no entries in the list, then a value of NULL is returned.
  506. ; Otherwise, the address of the entry at the top of the list is removed
  507. ; and returned as the function value and the list header is set to point
  508. ; to NULL.
  509. ;
  510. ; N.B. The cmpxchg8b instruction is only supported on some processors.
  511. ; If the host processor does not support this instruction, then
  512. ; then following code is patched to contain a jump to the normal
  513. ; pop entry code which has a compatible calling sequence and data
  514. ; structure.
  515. ;
  516. ; Arguments:
  517. ;
  518. ; (ecx) = ListHead - Supplies a pointer to the sequenced listhead from
  519. ; which the list is to be flushed.
  520. ;
  521. ; Return Value:
  522. ;
  523. ; The address of the entire current list, or NULL if the list is
  524. ; empty.
  525. ;
  526. ;--
  527. cPublicProc InterlockedFlushSList, 1
  528. ;
  529. ; Save nonvolatile registers and read the listhead sequence number followed
  530. ; by the listhead next link.
  531. ;
  532. ; N.B. These two dwords MUST be read exactly in this order.
  533. ;
  534. push ecx
  535. push ebx ; save nonvolatile registers
  536. push ebp ;
  537. mov ecx, [esp+16]
  538. mov ebp, ecx ; save listhead address
  539. mov edx, [ebp] + 4 ; get current sequence number
  540. mov eax, [ebp] + 0 ; get current next link
  541. ;
  542. ; N.B. The following code is the retry code should the compare
  543. ; part of the compare exchange operation fail
  544. ;
  545. ; If the list is empty, then there is nothing that can be removed.
  546. ;
  547. Efls10: or eax, eax ; check if list is empty
  548. jz short Efls20 ; if z set, list is empty
  549. mov ecx, 0 ; clear sequence number and depth
  550. mov ebx, 0 ; clear successor entry pointer
  551. .586
  552. ifndef NT_UP
  553. lock cmpxchg8b qword ptr [ebp] ; compare and exchange
  554. else
  555. cmpxchg8b qword ptr [ebp] ; compare and exchange
  556. endif
  557. .386
  558. jnz short Efls10 ; if z clear, exchange failed
  559. ;
  560. ; Restore nonvolatile registers and return result.
  561. ;
  562. Efls20: pop ebp ; restore nonvolatile registers
  563. pop ebx ;
  564. pop ecx
  565. stdRET InterlockedFlushSList
  566. stdENDP InterlockedFlushSList
  567. endif ; NO_OLD_FLUSHSLIST
  568. end