Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

677 lines
22 KiB

  1. title "Compute Checksum"
  2. ;/*++
  3. ;
  4. ; Copyright (c) 1992 Microsoft Corporation
  5. ;
  6. ; Module Name:
  7. ;
  8. ; cksy.asm
  9. ;
  10. ; Abstract:
  11. ;
  12. ; This module implements a function to compute the checksum of a buffer.
  13. ;
  14. ; Author:
  15. ;
  16. ; David N. Cutler (davec) 27-Jan-1992
  17. ;
  18. ; Revision History:
  19. ;
  20. ; Who When What
  21. ; -------- -------- ----------------------------------------------
  22. ; mikeab 01-22-94 Pentium optimization
  23. ;
  24. ; Environment:
  25. ;
  26. ; Any mode.
  27. ;
  28. ; Revision History:
  29. ;
  30. ;--*/
  31. LOOP_UNROLLING_BITS equ 5
  32. LOOP_UNROLLING equ (1 SHL LOOP_UNROLLING_BITS)
  33. .386
  34. .model small,c
  35. assume cs:FLAT,ds:FLAT,es:FLAT,ss:FLAT
  36. assume fs:nothing,gs:nothing
  37. .xlist
  38. include callconv.inc
  39. include ks386.inc
  40. .list
  41. .code
  42. ;++
  43. ;
  44. ; ULONG
  45. ; tcpxsum(
  46. ; IN ULONG cksum,
  47. ; IN PUCHAR buf,
  48. ; IN ULONG len
  49. ; )
  50. ;
  51. ; Routine Description:
  52. ;
  53. ; This function computes the checksum of the specified buffer.
  54. ;
  55. ; Arguments:
  56. ;
  57. ; cksum - Suppiles the initial checksum value, in 16-bit form,
  58. ; with the high word set to 0.
  59. ;
  60. ; buf - Supplies a pointer to the buffer to the checksum buffer.
  61. ;
  62. ; len - Supplies the length of the buffer in bytes.
  63. ;
  64. ; Return Value:
  65. ;
  66. ; The computed checksum in 32-bit two-partial-accumulators form, added to
  67. ; the initial checksum, is returned as the function value.
  68. ;
  69. ;--
  70. cksum equ 12 ; stack offset to initial checksum
  71. buf equ 16 ; stack offset to source address
  72. len equ 20 ; stack offset to length in words
  73. to_checksum_last_word:
  74. jmp checksum_last_word
  75. to_checksum_done:
  76. jmp checksum_done
  77. to_checksum_dword_loop_done:
  78. jmp checksum_dword_loop_done
  79. cPublicProc tcpxsum,3
  80. push ebx ; save nonvolatile register
  81. push esi ; save nonvolatile register
  82. mov ecx,[esp + len] ; get length in bytes
  83. sub eax,eax ; clear computed checksum
  84. test ecx,ecx ; any bytes to checksum at all?
  85. jz short to_checksum_done ; no bytes to checksum
  86. ;
  87. ; if the checksum buffer is not word aligned, then add the first byte of
  88. ; the buffer to the input checksum.
  89. ;
  90. mov esi,[esp + buf] ; get source address
  91. sub edx,edx ; set up to load word into EDX below
  92. test esi,1 ; check if buffer word aligned
  93. jz short checksum_word_aligned ; if zf, buffer word aligned
  94. mov ah,[esi] ; get first byte (we know we'll have
  95. ; to swap at the end)
  96. inc esi ; increment buffer address
  97. dec ecx ; decrement number of bytes
  98. jz short to_checksum_done ; if zf set, no more bytes
  99. ;
  100. ; If the buffer is not an even number of of bytes, then initialize
  101. ; the computed checksum with the last byte of the buffer.
  102. ;
  103. checksum_word_aligned: ;
  104. shr ecx,1 ; convert to word count
  105. jnc short checksum_start ; if nc, even number of bytes
  106. mov al,[esi+ecx*2] ; initialize the computed checksum
  107. jz short to_checksum_done ; if zf set, no more bytes
  108. ;
  109. ; Compute checksum in large blocks of dwords, with one partial word up front if
  110. ; necessary to get dword alignment, and another partial word at the end if
  111. ; needed.
  112. ;
  113. ;
  114. ; Compute checksum on the leading word, if that's necessary to get dword
  115. ; alignment.
  116. ;
  117. checksum_start: ;
  118. test esi,02h ; check if source dword aligned
  119. jz short checksum_dword_aligned ; source is already dword aligned
  120. mov dx,[esi] ; get first word to checksum
  121. add esi,2 ; update source address
  122. add eax,edx ; update partial checksum
  123. ; (no carry is possible, because EAX
  124. ; and EDX are both 16-bit values)
  125. dec ecx ; count off this word (zero case gets
  126. ; picked up below)
  127. ;
  128. ; Checksum as many words as possible by processing a dword at a time.
  129. ;
  130. checksum_dword_aligned:
  131. push ecx ; so we can tell if there's a trailing
  132. ; word later
  133. shr ecx,1 ; # of dwords to checksum
  134. jz short to_checksum_last_word ; no dwords to checksum
  135. mov edx,[esi] ; preload the first dword
  136. add esi,4 ; point to the next dword
  137. dec ecx ; count off the dword we just loaded
  138. jz short to_checksum_dword_loop_done
  139. ; skip the loop if that was the only
  140. ; dword
  141. mov ebx,ecx ; EBX = # of dwords left to checksum
  142. add ecx,LOOP_UNROLLING-1 ; round up loop count
  143. shr ecx,LOOP_UNROLLING_BITS ; convert from word count to unrolled
  144. ; loop count
  145. and ebx,LOOP_UNROLLING-1 ; # of partial dwords to do in first
  146. ; loop
  147. jz short checksum_dword_loop ; special-case when no partial loop,
  148. ; because fixup below doesn't work
  149. ; in that case (carry flag is
  150. ; cleared at this point, as required
  151. ; at loop entry)
  152. lea esi,[esi+ebx*4-(LOOP_UNROLLING*4)]
  153. ; adjust buffer pointer back to
  154. ; compensate for hardwired displacement
  155. ; at loop entry point
  156. ; ***doesn't change carry flag***
  157. jmp loop_entry[ebx*4] ; enter the loop to do the first,
  158. ; partial iteration, after which we can
  159. ; just do 64-word blocks
  160. ; ***doesn't change carry flag***
  161. checksum_dword_loop:
  162. DEFLAB macro pre,suf
  163. pre&suf:
  164. endm
  165. TEMP=0
  166. REPT LOOP_UNROLLING
  167. deflab loop_entry_,%TEMP
  168. adc eax,edx
  169. mov edx,[esi + TEMP]
  170. TEMP=TEMP+4
  171. ENDM
  172. checksum_dword_loop_end:
  173. lea esi,[esi + LOOP_UNROLLING * 4] ; update source address
  174. ; ***doesn't change carry flag***
  175. dec ecx ; count off unrolled loop iteration
  176. ; ***doesn't change carry flag***
  177. jnz checksum_dword_loop ; do more blocks
  178. checksum_dword_loop_done label proc
  179. adc eax,edx ; finish dword checksum
  180. mov edx,0 ; prepare to load trailing word
  181. adc eax,edx
  182. ;
  183. ; Compute checksum on the trailing word, if there is one.
  184. ; High word of EDX = 0 at this point
  185. ; Carry flag set iff there's a trailing word to do at this point
  186. ;
  187. checksum_last_word label proc ; "proc" so not scoped to function
  188. pop ecx ; get back word count
  189. test ecx,1 ; is there a trailing word?
  190. jz short checksum_done ; no trailing word
  191. add ax,[esi] ; add in the trailing word
  192. adc eax,0 ;
  193. checksum_done label proc ; "proc" so not scoped to function
  194. mov ecx,eax ; fold the checksum to 16 bits
  195. ror ecx,16
  196. add eax,ecx
  197. mov ebx,[esp + buf]
  198. shr eax,16
  199. test ebx,1 ; check if buffer word aligned
  200. jz short checksum_combine ; if zf set, buffer word aligned
  201. ror ax,8 ; byte aligned--swap bytes back
  202. checksum_combine label proc ; "proc" so not scoped to function
  203. add ax,word ptr [esp + cksum] ; combine checksums
  204. pop esi ; restore nonvolatile register
  205. adc eax,0 ;
  206. pop ebx ; restore nonvolatile register
  207. stdRET tcpxsum
  208. REFLAB macro pre,suf
  209. dd pre&suf
  210. endm
  211. align 4
  212. loop_entry label dword
  213. dd 0
  214. TEMP=LOOP_UNROLLING*4
  215. REPT LOOP_UNROLLING-1
  216. TEMP=TEMP-4
  217. reflab loop_entry_,%TEMP
  218. ENDM
  219. stdENDP tcpxsum
  220. LOOP_UNROLLING_BITS_XMMI equ 4
  221. LOOP_UNROLLING_XMMI equ (1 SHL LOOP_UNROLLING_BITS_XMMI)
  222. ;VRSTEST EQU 0
  223. ifdef VRSTEST
  224. ;
  225. ; Test tcpxsum_xmmi for correctness.
  226. tcksum equ 8[ebp] ; stack offset to initial checksum
  227. tbuf equ 12[ebp] ; stack offset to source address
  228. tlen equ 16[ebp] ; stack offset to length in words
  229. align
  230. cPublicProc tcpxsum_xmmi,3
  231. ;int 3
  232. push ebp
  233. mov ebp, esp
  234. push ebx
  235. push esi
  236. mov ebx, offset tcpxsum
  237. mov esi, offset tcpxsum_xmmi1
  238. ; Get a "random" number
  239. .586p
  240. rdtsc
  241. .386p
  242. and eax, 10H
  243. jz old_then_new
  244. ; Swap which routine is called first
  245. push ebx
  246. mov ebx, esi
  247. pop esi
  248. old_then_new:
  249. ; Call the first routine
  250. push tlen
  251. push tbuf
  252. push tcksum
  253. call ebx
  254. ; Save the answer
  255. push eax
  256. ; Call the second routine
  257. push tlen
  258. push tbuf
  259. push tcksum
  260. call esi
  261. ; Check the answer
  262. cmp eax, [esp]
  263. jnz different_xsum
  264. ; Same answer, we are done
  265. pop eax
  266. pop esi
  267. pop ebx
  268. pop ebp
  269. stdRET tcpxsum_xmmi
  270. align
  271. ; Different answers, need to debug the problem
  272. different_xsum:
  273. ; Get both checksums onto the stack
  274. push eax
  275. ; ... and bugcheck
  276. ;EXTRNP _KeBugCheck,1,IMPORT
  277. ;stdCall _KeBugCheck, <0>
  278. again:
  279. int 3
  280. jmp again
  281. stdENDP tcpxsum_xmmi
  282. endif
  283. ;++
  284. ;
  285. ; ULONG
  286. ; tcpxsum_xmmi(
  287. ; IN ULONG cksum,
  288. ; IN PUCHAR buf,
  289. ; IN ULONG len
  290. ; )
  291. ;
  292. ; Routine Description:
  293. ;
  294. ; This function computes the checksum of the specified buffer.
  295. ; It uses Processor's prefetch instruction.
  296. ;
  297. ; Arguments:
  298. ;
  299. ; cksum - Suppiles the initial checksum value, in 16-bit form,
  300. ; with the high word set to 0.
  301. ;
  302. ; buf - Supplies a pointer to the buffer to the checksum buffer.
  303. ;
  304. ; len - Supplies the length of the buffer in bytes.
  305. ;
  306. ; Return Value:
  307. ;
  308. ; The computed checksum in 32-bit two-partial-accumulators form, added to
  309. ; the initial checksum, is returned as the function value.
  310. ;
  311. ;--
  312. cksum equ 12 ; stack offset to initial checksum
  313. buf equ 16 ; stack offset to source address
  314. len equ 20 ; stack offset to length in words
  315. to_checksum_last_word_xmmi:
  316. jmp checksum_last_word_xmmi
  317. to_checksum_done_xmmi:
  318. jmp checksum_done_xmmi
  319. to_checksum_dword_loop_done_xmmi:
  320. jmp checksum_dword_loop_done_xmmi
  321. ifdef VRSTEST
  322. cPublicProc tcpxsum_xmmi1,3
  323. else
  324. cPublicProc tcpxsum_xmmi,3
  325. endif
  326. push ebx ; save nonvolatile register
  327. push esi ; save nonvolatile register
  328. mov ecx,[esp + len] ; get length in bytes
  329. sub eax,eax ; clear computed checksum
  330. test ecx,ecx ; any bytes to checksum at all?
  331. jz short to_checksum_done_xmmi ; no bytes to checksum
  332. ;
  333. ; if the checksum buffer is not word aligned, then add the first byte of
  334. ; the buffer to the input checksum.
  335. ;
  336. mov esi,[esp + buf] ; get source address
  337. sub edx,edx ; set up to load word into EDX below
  338. test esi,1 ; check if buffer word aligned
  339. jz short checksum_word_aligned ; if zf, buffer word aligned
  340. mov ah,[esi] ; get first byte (we know we'll have
  341. ; to swap at the end)
  342. inc esi ; increment buffer address
  343. dec ecx ; decrement number of bytes
  344. jz short to_checksum_done_xmmi ; if zf set, no more bytes
  345. ;
  346. ; If the buffer is not an even number of of bytes, then initialize
  347. ; the computed checksum with the last byte of the buffer.
  348. ;
  349. checksum_word_aligned: ;
  350. shr ecx,1 ; convert to word count
  351. jnc short checksum_start ; if nc, even number of bytes
  352. mov al,[esi+ecx*2] ; initialize the computed checksum
  353. jz short to_checksum_done_xmmi ; if zf set, no more bytes
  354. ;
  355. ; Compute checksum in large blocks of dwords, with one partial word up front if
  356. ; necessary to get dword alignment, and another partial word at the end if
  357. ; needed.
  358. ;
  359. ;
  360. ; Compute checksum on the leading word, if that's necessary to get dword
  361. ; alignment.
  362. ;
  363. checksum_start: ;
  364. test esi,02h ; check if source dword aligned
  365. jz short checksum_dword_aligned ; source is already dword aligned
  366. mov dx,[esi] ; get first word to checksum
  367. add esi,2 ; update source address
  368. add eax,edx ; update partial checksum
  369. ; (no carry is possible, because EAX
  370. ; and EDX are both 16-bit values)
  371. dec ecx ; count off this word (zero case gets
  372. ; picked up below)
  373. ;
  374. ; Checksum as many words as possible by processing a dword at a time.
  375. ;
  376. checksum_dword_aligned:
  377. push ecx ; so we can tell if there's a trailing
  378. ; word later
  379. shr ecx,1 ; # of dwords to checksum
  380. jz short to_checksum_last_word_xmmi ; no dwords to checksum
  381. mov edx,[esi] ; preload the first dword
  382. add esi,4 ; point to the next dword
  383. dec ecx ; count off the dword we just loaded
  384. jz short to_checksum_dword_loop_done_xmmi
  385. ; skip the loop if that was the only
  386. ; dword
  387. mov ebx,ecx ; EBX = # of dwords left to checksum
  388. add ecx,LOOP_UNROLLING_XMMI-1 ; round up loop count
  389. shr ecx,LOOP_UNROLLING_BITS_XMMI ; convert from word count to unrolled
  390. ; loop count
  391. and ebx,LOOP_UNROLLING_XMMI-1 ; # of partial dwords to do in first
  392. ; loop
  393. jz short checksum_dword_loop ; special-case when no partial loop,
  394. ; because fixup below doesn't work
  395. ; in that case (carry flag is
  396. ; cleared at this point, as required
  397. ; at loop entry)
  398. lea esi,[esi+ebx*4-(LOOP_UNROLLING_XMMI*4)]
  399. ; adjust buffer pointer back to
  400. ; compensate for hardwired displacement
  401. ; at loop entry point
  402. ; ***doesn't change carry flag***
  403. jmp xmmi_loop_entry[ebx*4] ; enter the loop to do the first,
  404. ; partial iteration, after which we can
  405. ; just do 64-word blocks
  406. ; ***doesn't change carry flag***
  407. checksum_dword_loop:
  408. ; prefetch the 32-byte cache line from [esi+0]
  409. db 0fH
  410. db 18H
  411. db 46H
  412. db 00H
  413. ; prefetch the 32-byte cache line from [esi+20h]
  414. db 0fH
  415. db 18H
  416. db 46H
  417. db 20H
  418. ; prefetch the 32-byte cache line from [esi+40h]
  419. db 0fH
  420. db 18H
  421. db 46H
  422. db 40H
  423. ; prefetch the 32-byte cache line from [esi+60h]
  424. db 0fH
  425. db 18H
  426. db 46H
  427. db 60H
  428. DEFLAB macro pre,suf
  429. pre&suf:
  430. endm
  431. TEMP=0
  432. REPT LOOP_UNROLLING_XMMI
  433. deflab xmmi_loop_entry_,%TEMP
  434. adc eax,edx
  435. mov edx,[esi + TEMP]
  436. TEMP=TEMP+4
  437. ENDM
  438. checksum_dword_loop_end:
  439. lea esi,[esi + LOOP_UNROLLING_XMMI * 4] ; update source address
  440. ; ***doesn't change carry flag***
  441. dec ecx ; count off unrolled loop iteration
  442. ; ***doesn't change carry flag***
  443. jnz checksum_dword_loop ; do more blocks
  444. checksum_dword_loop_done_xmmi label proc
  445. adc eax,edx ; finish dword checksum
  446. mov edx,0 ; prepare to load trailing word
  447. adc eax,edx
  448. ;
  449. ; Compute checksum on the trailing word, if there is one.
  450. ; High word of EDX = 0 at this point
  451. ; Carry flag set iff there's a trailing word to do at this point
  452. ;
  453. checksum_last_word_xmmi label proc ; "proc" so not scoped to function
  454. pop ecx ; get back word count
  455. test ecx,1 ; is there a trailing word?
  456. jz short checksum_done_xmmi; no trailing word
  457. add ax,[esi] ; add in the trailing word
  458. adc eax,0 ;
  459. checksum_done_xmmi label proc ; "proc" so not scoped to function
  460. mov ecx,eax ; fold the checksum to 16 bits
  461. ror ecx,16
  462. add eax,ecx
  463. mov ebx,[esp + buf]
  464. shr eax,16
  465. test ebx,1 ; check if buffer word aligned
  466. jz short checksum_combine_xmmi ; if zf set, buffer word aligned
  467. ror ax,8 ; byte aligned--swap bytes back
  468. checksum_combine_xmmi label proc ; "proc" so not scoped to function
  469. add ax,word ptr [esp + cksum] ; combine checksums
  470. pop esi ; restore nonvolatile register
  471. adc eax,0 ;
  472. pop ebx ; restore nonvolatile register
  473. stdRET tcpxsum
  474. REFLAB macro pre,suf
  475. dd pre&suf
  476. endm
  477. align 4
  478. xmmi_loop_entry label dword
  479. dd 0
  480. TEMP=LOOP_UNROLLING_XMMI*4
  481. REPT LOOP_UNROLLING_XMMI-1
  482. TEMP=TEMP-4
  483. reflab xmmi_loop_entry_,%TEMP
  484. ENDM
  485. ifdef VRSTEST
  486. stdENDP tcpxsum_xmmi1
  487. else
  488. stdENDP tcpxsum_xmmi
  489. endif
  490. ;++
  491. ;
  492. ; PSINGLE_LIST_ENTRY
  493. ; FASTCALL
  494. ; InterlockedFlushSList (
  495. ; IN PSINGLE_LIST_ENTRY ListHead
  496. ; )
  497. ;
  498. ; Routine Description:
  499. ;
  500. ; This function removes the entire list from a sequenced singly
  501. ; linked list so that access to the list is synchronized in an MP system.
  502. ; If there are no entries in the list, then a value of NULL is returned.
  503. ; Otherwise, the address of the entry at the top of the list is removed
  504. ; and returned as the function value and the list header is set to point
  505. ; to NULL.
  506. ;
  507. ; N.B. The cmpxchg8b instruction is only supported on some processors.
  508. ; If the host processor does not support this instruction, then
  509. ; then following code is patched to contain a jump to the normal
  510. ; pop entry code which has a compatible calling sequence and data
  511. ; structure.
  512. ;
  513. ; Arguments:
  514. ;
  515. ; (ecx) = ListHead - Supplies a pointer to the sequenced listhead from
  516. ; which the list is to be flushed.
  517. ;
  518. ; Return Value:
  519. ;
  520. ; The address of the entire current list, or NULL if the list is
  521. ; empty.
  522. ;
  523. ;--
  524. cPublicProc InterlockedFlushSList, 1
  525. ;
  526. ; Save nonvolatile registers and read the listhead sequence number followed
  527. ; by the listhead next link.
  528. ;
  529. ; N.B. These two dwords MUST be read exactly in this order.
  530. ;
  531. push ecx
  532. push ebx ; save nonvolatile registers
  533. push ebp ;
  534. mov ecx, [esp+16]
  535. mov ebp, ecx ; save listhead address
  536. mov edx, [ebp] + 4 ; get current sequence number
  537. mov eax, [ebp] + 0 ; get current next link
  538. ;
  539. ; N.B. The following code is the retry code should the compare
  540. ; part of the compare exchange operation fail
  541. ;
  542. ; If the list is empty, then there is nothing that can be removed.
  543. ;
  544. Efls10: or eax, eax ; check if list is empty
  545. jz short Efls20 ; if z set, list is empty
  546. mov ecx, 0 ; clear sequence number and depth
  547. mov ebx, 0 ; clear successor entry pointer
  548. .586
  549. ifndef NT_UP
  550. lock cmpxchg8b qword ptr [ebp] ; compare and exchange
  551. else
  552. cmpxchg8b qword ptr [ebp] ; compare and exchange
  553. endif
  554. .386
  555. jnz short Efls10 ; if z clear, exchange failed
  556. ;
  557. ; Restore nonvolatile registers and return result.
  558. ;
  559. Efls20: pop ebp ; restore nonvolatile registers
  560. pop ebx ;
  561. pop ecx
  562. stdRET InterlockedFlushSList
  563. stdENDP InterlockedFlushSList
  564. end
  565.