Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

699 lines
23 KiB

  1. title "Compute Checksum"
  2. ;/*++
  3. ;
  4. ; Copyright (c) Microsoft Corporation. All rights reserved.
  5. ;
  6. ; Module Name:
  7. ;
  8. ; xsum.x86
  9. ;
  10. ; Abstract:
  11. ;
  12. ; This module implements a function to compute the checksum of a buffer.
  13. ;
  14. ; Author:
  15. ;
  16. ; David N. Cutler (davec) 27-Jan-1992
  17. ;
  18. ; Revision History:
  19. ;
  20. ; Who When What
  21. ; -------- -------- ----------------------------------------------
  22. ; mikeab 01-22-94 Pentium optimization
  23. ;
  24. ; Environment:
  25. ;
  26. ; Any mode.
  27. ;
  28. ; Revision History:
  29. ;
  30. ;--*/
  31. LOOP_UNROLLING_BITS equ 5
  32. LOOP_UNROLLING equ (1 SHL LOOP_UNROLLING_BITS)
  33. .386
  34. .model small,c
  35. assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
  36. assume fs:nothing,gs:nothing
  37. .xlist
  38. include callconv.inc
  39. include ks386.inc
  40. .list
  41. .code
  42. ;++
  43. ;
  44. ; ULONG
  45. ; tcpxsum(
  46. ; IN ULONG cksum,
  47. ; IN PUCHAR buf,
  48. ; IN ULONG len
  49. ; )
  50. ;
  51. ; Routine Description:
  52. ;
  53. ; This function computes the checksum of the specified buffer.
  54. ;
  55. ; Arguments:
  56. ;
  57. ; cksum - Suppiles the initial checksum value, in 16-bit form,
  58. ; with the high word set to 0.
  59. ;
  60. ; buf - Supplies a pointer to the buffer to the checksum buffer.
  61. ;
  62. ; len - Supplies the length of the buffer in bytes.
  63. ;
  64. ; Return Value:
  65. ;
  66. ; The computed checksum in 32-bit two-partial-accumulators form, added to
  67. ; the initial checksum, is returned as the function value.
  68. ;
  69. ;--
  70. cksum equ 12 ; stack offset to initial checksum
  71. buf equ 16 ; stack offset to source address
  72. len equ 20 ; stack offset to length in words
  73. to_checksum_last_word:
  74. jmp checksum_last_word
  75. to_checksum_done:
  76. jmp checksum_done
  77. to_checksum_dword_loop_done:
  78. jmp checksum_dword_loop_done
  79. cPublicProc tcpxsum,3
  80. ; FPO = 0 dwords locals allocated in prolog
  81. ; 3 dword parameters
  82. ; 2 bytes in prolog
  83. ; 2 registers saved
  84. ; 0 EBP is not used
  85. ; 0 frame type = FPO
  86. .FPO (0,3,2,2,0,0)
  87. push ebx ; save nonvolatile register
  88. push esi ; save nonvolatile register
  89. mov ecx,[esp + len] ; get length in bytes
  90. sub eax,eax ; clear computed checksum
  91. test ecx,ecx ; any bytes to checksum at all?
  92. jz short to_checksum_done ; no bytes to checksum
  93. ;
  94. ; if the checksum buffer is not word aligned, then add the first byte of
  95. ; the buffer to the input checksum.
  96. ;
  97. mov esi,[esp + buf] ; get source address
  98. sub edx,edx ; set up to load word into EDX below
  99. test esi,1 ; check if buffer word aligned
  100. jz short checksum_word_aligned ; if zf, buffer word aligned
  101. mov ah,[esi] ; get first byte (we know we'll have
  102. ; to swap at the end)
  103. inc esi ; increment buffer address
  104. dec ecx ; decrement number of bytes
  105. jz short to_checksum_done ; if zf set, no more bytes
  106. ;
  107. ; If the buffer is not an even number of of bytes, then initialize
  108. ; the computed checksum with the last byte of the buffer.
  109. ;
  110. checksum_word_aligned: ;
  111. shr ecx,1 ; convert to word count
  112. jnc short checksum_start ; if nc, even number of bytes
  113. mov al,[esi+ecx*2] ; initialize the computed checksum
  114. jz short to_checksum_done ; if zf set, no more bytes
  115. ;
  116. ; Compute checksum in large blocks of dwords, with one partial word up front if
  117. ; necessary to get dword alignment, and another partial word at the end if
  118. ; needed.
  119. ;
  120. ;
  121. ; Compute checksum on the leading word, if that's necessary to get dword
  122. ; alignment.
  123. ;
  124. checksum_start: ;
  125. test esi,02h ; check if source dword aligned
  126. jz short checksum_dword_aligned ; source is already dword aligned
  127. mov dx,[esi] ; get first word to checksum
  128. add esi,2 ; update source address
  129. add eax,edx ; update partial checksum
  130. ; (no carry is possible, because EAX
  131. ; and EDX are both 16-bit values)
  132. dec ecx ; count off this word (zero case gets
  133. ; picked up below)
  134. ;
  135. ; Checksum as many words as possible by processing a dword at a time.
  136. ;
  137. checksum_dword_aligned:
  138. push ecx ; so we can tell if there's a trailing
  139. ; word later
  140. shr ecx,1 ; # of dwords to checksum
  141. jz short to_checksum_last_word ; no dwords to checksum
  142. mov edx,[esi] ; preload the first dword
  143. add esi,4 ; point to the next dword
  144. dec ecx ; count off the dword we just loaded
  145. jz short to_checksum_dword_loop_done
  146. ; skip the loop if that was the only
  147. ; dword
  148. mov ebx,ecx ; EBX = # of dwords left to checksum
  149. add ecx,LOOP_UNROLLING-1 ; round up loop count
  150. shr ecx,LOOP_UNROLLING_BITS ; convert from word count to unrolled
  151. ; loop count
  152. and ebx,LOOP_UNROLLING-1 ; # of partial dwords to do in first
  153. ; loop
  154. jz short checksum_dword_loop ; special-case when no partial loop,
  155. ; because fixup below doesn't work
  156. ; in that case (carry flag is
  157. ; cleared at this point, as required
  158. ; at loop entry)
  159. lea esi,[esi+ebx*4-(LOOP_UNROLLING*4)]
  160. ; adjust buffer pointer back to
  161. ; compensate for hardwired displacement
  162. ; at loop entry point
  163. ; ***doesn't change carry flag***
  164. jmp loop_entry[ebx*4] ; enter the loop to do the first,
  165. ; partial iteration, after which we can
  166. ; just do 64-word blocks
  167. ; ***doesn't change carry flag***
  168. checksum_dword_loop:
  169. DEFLAB macro pre,suf
  170. pre&suf:
  171. endm
  172. TEMP=0
  173. REPT LOOP_UNROLLING
  174. deflab loop_entry_,%TEMP
  175. adc eax,edx
  176. mov edx,[esi + TEMP]
  177. TEMP=TEMP+4
  178. ENDM
  179. checksum_dword_loop_end:
  180. lea esi,[esi + LOOP_UNROLLING * 4] ; update source address
  181. ; ***doesn't change carry flag***
  182. dec ecx ; count off unrolled loop iteration
  183. ; ***doesn't change carry flag***
  184. jnz checksum_dword_loop ; do more blocks
  185. checksum_dword_loop_done label proc
  186. adc eax,edx ; finish dword checksum
  187. mov edx,0 ; prepare to load trailing word
  188. adc eax,edx
  189. ;
  190. ; Compute checksum on the trailing word, if there is one.
  191. ; High word of EDX = 0 at this point
  192. ; Carry flag set iff there's a trailing word to do at this point
  193. ;
  194. checksum_last_word label proc ; "proc" so not scoped to function
  195. pop ecx ; get back word count
  196. test ecx,1 ; is there a trailing word?
  197. jz short checksum_done ; no trailing word
  198. add ax,[esi] ; add in the trailing word
  199. adc eax,0 ;
  200. checksum_done label proc ; "proc" so not scoped to function
  201. mov ecx,eax ; fold the checksum to 16 bits
  202. ror ecx,16
  203. add eax,ecx
  204. mov ebx,[esp + buf]
  205. shr eax,16
  206. test ebx,1 ; check if buffer word aligned
  207. jz short checksum_combine ; if zf set, buffer word aligned
  208. ror ax,8 ; byte aligned--swap bytes back
  209. checksum_combine label proc ; "proc" so not scoped to function
  210. add ax,word ptr [esp + cksum] ; combine checksums
  211. pop esi ; restore nonvolatile register
  212. adc eax,0 ;
  213. pop ebx ; restore nonvolatile register
  214. stdRET tcpxsum
  215. REFLAB macro pre,suf
  216. dd pre&suf
  217. endm
  218. align 4
  219. loop_entry label dword
  220. dd 0
  221. TEMP=LOOP_UNROLLING*4
  222. REPT LOOP_UNROLLING-1
  223. TEMP=TEMP-4
  224. reflab loop_entry_,%TEMP
  225. ENDM
  226. stdENDP tcpxsum
  227. ifndef NO_XMMI
  228. LOOP_UNROLLING_BITS_XMMI equ 4
  229. LOOP_UNROLLING_XMMI equ (1 SHL LOOP_UNROLLING_BITS_XMMI)
  230. ;VRSTEST EQU 0
  231. ifdef VRSTEST
  232. ;
  233. ; Test tcpxsum_xmmi for correctness.
  234. tcksum equ 8[ebp] ; stack offset to initial checksum
  235. tbuf equ 12[ebp] ; stack offset to source address
  236. tlen equ 16[ebp] ; stack offset to length in words
  237. align
  238. cPublicProc tcpxsum_xmmi,3
  239. ;int 3
  240. push ebp
  241. mov ebp, esp
  242. push ebx
  243. push esi
  244. mov ebx, offset tcpxsum
  245. mov esi, offset tcpxsum_xmmi1
  246. ; Get a "random" number
  247. .586p
  248. rdtsc
  249. .386p
  250. and eax, 10H
  251. jz old_then_new
  252. ; Swap which routine is called first
  253. push ebx
  254. mov ebx, esi
  255. pop esi
  256. old_then_new:
  257. ; Call the first routine
  258. push tlen
  259. push tbuf
  260. push tcksum
  261. call ebx
  262. ; Save the answer
  263. push eax
  264. ; Call the second routine
  265. push tlen
  266. push tbuf
  267. push tcksum
  268. call esi
  269. ; Check the answer
  270. cmp eax, [esp]
  271. jnz different_xsum
  272. ; Same answer, we are done
  273. pop eax
  274. pop esi
  275. pop ebx
  276. pop ebp
  277. stdRET tcpxsum_xmmi
  278. align
  279. ; Different answers, need to debug the problem
  280. different_xsum:
  281. ; Get both checksums onto the stack
  282. push eax
  283. ; ... and bugcheck
  284. ;EXTRNP _KeBugCheck,1,IMPORT
  285. ;stdCall _KeBugCheck, <0>
  286. again:
  287. int 3
  288. jmp again
  289. stdENDP tcpxsum_xmmi
  290. endif
  291. ;++
  292. ;
  293. ; ULONG
  294. ; tcpxsum_xmmi(
  295. ; IN ULONG cksum,
  296. ; IN PUCHAR buf,
  297. ; IN ULONG len
  298. ; )
  299. ;
  300. ; Routine Description:
  301. ;
  302. ; This function computes the checksum of the specified buffer.
  303. ; It uses Processor's prefetch instruction.
  304. ;
  305. ; Arguments:
  306. ;
  307. ; cksum - Suppiles the initial checksum value, in 16-bit form,
  308. ; with the high word set to 0.
  309. ;
  310. ; buf - Supplies a pointer to the buffer to the checksum buffer.
  311. ;
  312. ; len - Supplies the length of the buffer in bytes.
  313. ;
  314. ; Return Value:
  315. ;
  316. ; The computed checksum in 32-bit two-partial-accumulators form, added to
  317. ; the initial checksum, is returned as the function value.
  318. ;
  319. ;--
  320. cksum equ 12 ; stack offset to initial checksum
  321. buf equ 16 ; stack offset to source address
  322. len equ 20 ; stack offset to length in words
  323. to_checksum_last_word_xmmi:
  324. jmp checksum_last_word_xmmi
  325. to_checksum_done_xmmi:
  326. jmp checksum_done_xmmi
  327. to_checksum_dword_loop_done_xmmi:
  328. jmp checksum_dword_loop_done_xmmi
  329. ifdef VRSTEST
  330. cPublicProc tcpxsum_xmmi1,3
  331. else
  332. cPublicProc tcpxsum_xmmi,3
  333. endif
  334. ; FPO = 0 dwords locals allocated in prolog
  335. ; 3 dword parameters
  336. ; 2 bytes in prolog
  337. ; 2 registers saved
  338. ; 0 EBP is not used
  339. ; 0 frame type = FPO
  340. .FPO (0,3,2,2,0,0)
  341. push ebx ; save nonvolatile register
  342. push esi ; save nonvolatile register
  343. mov ecx,[esp + len] ; get length in bytes
  344. sub eax,eax ; clear computed checksum
  345. test ecx,ecx ; any bytes to checksum at all?
  346. jz short to_checksum_done_xmmi ; no bytes to checksum
  347. ;
  348. ; if the checksum buffer is not word aligned, then add the first byte of
  349. ; the buffer to the input checksum.
  350. ;
  351. mov esi,[esp + buf] ; get source address
  352. sub edx,edx ; set up to load word into EDX below
  353. test esi,1 ; check if buffer word aligned
  354. jz short checksum_word_aligned ; if zf, buffer word aligned
  355. mov ah,[esi] ; get first byte (we know we'll have
  356. ; to swap at the end)
  357. inc esi ; increment buffer address
  358. dec ecx ; decrement number of bytes
  359. jz short to_checksum_done_xmmi ; if zf set, no more bytes
  360. ;
  361. ; If the buffer is not an even number of of bytes, then initialize
  362. ; the computed checksum with the last byte of the buffer.
  363. ;
  364. checksum_word_aligned: ;
  365. shr ecx,1 ; convert to word count
  366. jnc short checksum_start ; if nc, even number of bytes
  367. mov al,[esi+ecx*2] ; initialize the computed checksum
  368. jz short to_checksum_done_xmmi ; if zf set, no more bytes
  369. ;
  370. ; Compute checksum in large blocks of dwords, with one partial word up front if
  371. ; necessary to get dword alignment, and another partial word at the end if
  372. ; needed.
  373. ;
  374. ;
  375. ; Compute checksum on the leading word, if that's necessary to get dword
  376. ; alignment.
  377. ;
  378. checksum_start: ;
  379. test esi,02h ; check if source dword aligned
  380. jz short checksum_dword_aligned ; source is already dword aligned
  381. mov dx,[esi] ; get first word to checksum
  382. add esi,2 ; update source address
  383. add eax,edx ; update partial checksum
  384. ; (no carry is possible, because EAX
  385. ; and EDX are both 16-bit values)
  386. dec ecx ; count off this word (zero case gets
  387. ; picked up below)
  388. ;
  389. ; Checksum as many words as possible by processing a dword at a time.
  390. ;
  391. checksum_dword_aligned:
  392. push ecx ; so we can tell if there's a trailing
  393. ; word later
  394. shr ecx,1 ; # of dwords to checksum
  395. jz short to_checksum_last_word_xmmi ; no dwords to checksum
  396. mov edx,[esi] ; preload the first dword
  397. add esi,4 ; point to the next dword
  398. dec ecx ; count off the dword we just loaded
  399. jz short to_checksum_dword_loop_done_xmmi
  400. ; skip the loop if that was the only
  401. ; dword
  402. mov ebx,ecx ; EBX = # of dwords left to checksum
  403. add ecx,LOOP_UNROLLING_XMMI-1 ; round up loop count
  404. shr ecx,LOOP_UNROLLING_BITS_XMMI ; convert from word count to unrolled
  405. ; loop count
  406. and ebx,LOOP_UNROLLING_XMMI-1 ; # of partial dwords to do in first
  407. ; loop
  408. jz short checksum_dword_loop ; special-case when no partial loop,
  409. ; because fixup below doesn't work
  410. ; in that case (carry flag is
  411. ; cleared at this point, as required
  412. ; at loop entry)
  413. lea esi,[esi+ebx*4-(LOOP_UNROLLING_XMMI*4)]
  414. ; adjust buffer pointer back to
  415. ; compensate for hardwired displacement
  416. ; at loop entry point
  417. ; ***doesn't change carry flag***
  418. jmp xmmi_loop_entry[ebx*4] ; enter the loop to do the first,
  419. ; partial iteration, after which we can
  420. ; just do 64-word blocks
  421. ; ***doesn't change carry flag***
  422. checksum_dword_loop:
  423. ; prefetch the 32-byte cache line from [esi+0]
  424. db 0fH
  425. db 18H
  426. db 46H
  427. db 00H
  428. ; prefetch the 32-byte cache line from [esi+20h]
  429. db 0fH
  430. db 18H
  431. db 46H
  432. db 20H
  433. ; prefetch the 32-byte cache line from [esi+40h]
  434. db 0fH
  435. db 18H
  436. db 46H
  437. db 40H
  438. ; prefetch the 32-byte cache line from [esi+60h]
  439. db 0fH
  440. db 18H
  441. db 46H
  442. db 60H
  443. DEFLAB macro pre,suf
  444. pre&suf:
  445. endm
  446. TEMP=0
  447. REPT LOOP_UNROLLING_XMMI
  448. deflab xmmi_loop_entry_,%TEMP
  449. adc eax,edx
  450. mov edx,[esi + TEMP]
  451. TEMP=TEMP+4
  452. ENDM
  453. checksum_dword_loop_end:
  454. lea esi,[esi + LOOP_UNROLLING_XMMI * 4] ; update source address
  455. ; ***doesn't change carry flag***
  456. dec ecx ; count off unrolled loop iteration
  457. ; ***doesn't change carry flag***
  458. jnz checksum_dword_loop ; do more blocks
  459. checksum_dword_loop_done_xmmi label proc
  460. adc eax,edx ; finish dword checksum
  461. mov edx,0 ; prepare to load trailing word
  462. adc eax,edx
  463. ;
  464. ; Compute checksum on the trailing word, if there is one.
  465. ; High word of EDX = 0 at this point
  466. ; Carry flag set iff there's a trailing word to do at this point
  467. ;
  468. checksum_last_word_xmmi label proc ; "proc" so not scoped to function
  469. pop ecx ; get back word count
  470. test ecx,1 ; is there a trailing word?
  471. jz short checksum_done_xmmi; no trailing word
  472. add ax,[esi] ; add in the trailing word
  473. adc eax,0 ;
  474. checksum_done_xmmi label proc ; "proc" so not scoped to function
  475. mov ecx,eax ; fold the checksum to 16 bits
  476. ror ecx,16
  477. add eax,ecx
  478. mov ebx,[esp + buf]
  479. shr eax,16
  480. test ebx,1 ; check if buffer word aligned
  481. jz short checksum_combine_xmmi ; if zf set, buffer word aligned
  482. ror ax,8 ; byte aligned--swap bytes back
  483. checksum_combine_xmmi label proc ; "proc" so not scoped to function
  484. add ax,word ptr [esp + cksum] ; combine checksums
  485. pop esi ; restore nonvolatile register
  486. adc eax,0 ;
  487. pop ebx ; restore nonvolatile register
  488. stdRET tcpxsum
  489. REFLAB macro pre,suf
  490. dd pre&suf
  491. endm
  492. align 4
  493. xmmi_loop_entry label dword
  494. dd 0
  495. TEMP=LOOP_UNROLLING_XMMI*4
  496. REPT LOOP_UNROLLING_XMMI-1
  497. TEMP=TEMP-4
  498. reflab xmmi_loop_entry_,%TEMP
  499. ENDM
  500. ifdef VRSTEST
  501. stdENDP tcpxsum_xmmi1
  502. else
  503. stdENDP tcpxsum_xmmi
  504. endif
  505. endif ; NO_XMMI
  506. ifndef NO_OLD_FLUSHSLIST
  507. ;++
  508. ;
  509. ; PSINGLE_LIST_ENTRY
  510. ; FASTCALL
  511. ; InterlockedFlushSList (
  512. ; IN PSINGLE_LIST_ENTRY ListHead
  513. ; )
  514. ;
  515. ; Routine Description:
  516. ;
  517. ; This function removes the entire list from a sequenced singly
  518. ; linked list so that access to the list is synchronized in an MP system.
  519. ; If there are no entries in the list, then a value of NULL is returned.
  520. ; Otherwise, the address of the entry at the top of the list is removed
  521. ; and returned as the function value and the list header is set to point
  522. ; to NULL.
  523. ;
  524. ; N.B. The cmpxchg8b instruction is only supported on some processors.
  525. ; If the host processor does not support this instruction, then
  526. ; then following code is patched to contain a jump to the normal
  527. ; pop entry code which has a compatible calling sequence and data
  528. ; structure.
  529. ;
  530. ; Arguments:
  531. ;
  532. ; (ecx) = ListHead - Supplies a pointer to the sequenced listhead from
  533. ; which the list is to be flushed.
  534. ;
  535. ; Return Value:
  536. ;
  537. ; The address of the entire current list, or NULL if the list is
  538. ; empty.
  539. ;
  540. ;--
  541. cPublicProc InterlockedFlushSList, 1
  542. ;
  543. ; Save nonvolatile registers and read the listhead sequence number followed
  544. ; by the listhead next link.
  545. ;
  546. ; N.B. These two dwords MUST be read exactly in this order.
  547. ;
  548. push ecx
  549. push ebx ; save nonvolatile registers
  550. push ebp ;
  551. mov ecx, [esp+16]
  552. mov ebp, ecx ; save listhead address
  553. mov edx, [ebp] + 4 ; get current sequence number
  554. mov eax, [ebp] + 0 ; get current next link
  555. ;
  556. ; N.B. The following code is the retry code should the compare
  557. ; part of the compare exchange operation fail
  558. ;
  559. ; If the list is empty, then there is nothing that can be removed.
  560. ;
  561. Efls10: or eax, eax ; check if list is empty
  562. jz short Efls20 ; if z set, list is empty
  563. mov ecx, 0 ; clear sequence number and depth
  564. mov ebx, 0 ; clear successor entry pointer
  565. .586
  566. ifndef NT_UP
  567. lock cmpxchg8b qword ptr [ebp] ; compare and exchange
  568. else
  569. cmpxchg8b qword ptr [ebp] ; compare and exchange
  570. endif
  571. .386
  572. jnz short Efls10 ; if z clear, exchange failed
  573. ;
  574. ; Restore nonvolatile registers and return result.
  575. ;
  576. Efls20: pop ebp ; restore nonvolatile registers
  577. pop ebx ;
  578. pop ecx
  579. stdRET InterlockedFlushSList
  580. stdENDP InterlockedFlushSList
  581. endif ; NO_OLD_FLUSHSLIST
  582. end