Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1327 lines
28 KiB

  1. title "User Mode Zero and Move Memory functions"
  2. ;++
  3. ;
  4. ; Copyright (c) 1989 Microsoft Corporation
  5. ;
  6. ; Module Name:
  7. ;
  8. ; movemem.asm
  9. ;
  10. ; Abstract:
  11. ;
  12. ; This module implements functions to zero and copy blocks of memory
  13. ;
  14. ;
  15. ; Author:
  16. ;
  17. ; Steven R. Wood (stevewo) 25-May-1990
  18. ;
  19. ; Environment:
  20. ;
  21. ; User mode only.
  22. ;
  23. ; Revision History:
  24. ;
  25. ;--
  26. .386p
  27. .xlist
  28. include ks386.inc
  29. include callconv.inc ; calling convention macros
  30. .list
  31. if DBG
  32. _DATA SEGMENT DWORD PUBLIC 'DATA'
  33. public _RtlpZeroCount
  34. public _RtlpZeroBytes
  35. _RtlpZeroCount dd 0
  36. _RtlpZeroBytes dd 0
  37. ifndef BLDR_KERNEL_RUNTIME
  38. _MsgUnalignedPtr db 'RTL: RtlCompare/FillMemoryUlong called with unaligned pointer (%x)\n',0
  39. _MsgUnalignedCount db 'RTL: RtlCompare/FillMemoryUlong called with unaligned count (%x)\n',0
  40. endif
  41. _DATA ENDS
  42. ifndef BLDR_KERNEL_RUNTIME
  43. ifdef NTOS_KERNEL_RUNTIME
  44. extrn _KdDebuggerEnabled:BYTE
  45. endif
  46. EXTRNP _DbgBreakPoint,0
  47. extrn _DbgPrint:near
  48. endif
  49. endif
  50. ;
  51. ; Alignment parameters for zeroing and moving memory.
  52. ;
  53. ZERO_MEMORY_ALIGNMENT = 4
  54. ZERO_MEMORY_ALIGNMENT_LOG2 = 2
  55. ZERO_MEMORY_ALIGNMENT_MASK = ZERO_MEMORY_ALIGNMENT - 1
  56. MEMORY_ALIGNMENT = 4
  57. MEMORY_ALIGNMENT_LOG2 = 2
  58. MEMORY_ALIGNMENT_MASK = MEMORY_ALIGNMENT - 1
  59. ;
  60. ; Alignment for functions in this module
  61. ;
  62. CODE_ALIGNMENT macro
  63. align 16
  64. endm
  65. _TEXT$00 SEGMENT PARA PUBLIC 'CODE'
  66. ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
  67. page , 132
  68. subttl "RtlCompareMemory"
  69. ;++
  70. ;
  71. ; ULONG
  72. ; RtlCompareMemory (
  73. ; IN PVOID Source1,
  74. ; IN PVOID Source2,
  75. ; IN ULONG Length
  76. ; )
  77. ;
  78. ; Routine Description:
  79. ;
  80. ; This function compares two blocks of memory and returns the number
  81. ; of bytes that compared equal.
  82. ;
  83. ; Arguments:
  84. ;
  85. ; Source1 (esp+4) - Supplies a pointer to the first block of memory to
  86. ; compare.
  87. ;
  88. ; Source2 (esp+8) - Supplies a pointer to the second block of memory to
  89. ; compare.
  90. ;
  91. ; Length (esp+12) - Supplies the Length, in bytes, of the memory to be
  92. ; compared.
  93. ;
  94. ; Return Value:
  95. ;
  96. ; The number of bytes that compared equal is returned as the function
  97. ; value. If all bytes compared equal, then the length of the orginal
  98. ; block of memory is returned.
  99. ;
  100. ;--
  101. RcmSource1 equ [esp+12]
  102. RcmSource2 equ [esp+16]
  103. RcmLength equ [esp+20]
  104. CODE_ALIGNMENT
  105. cPublicProc _RtlCompareMemory,3
  106. cPublicFpo 3,0
  107. push esi ; save registers
  108. push edi ;
  109. cld ; clear direction
  110. mov esi,RcmSource1 ; (esi) -> first block to compare
  111. mov edi,RcmSource2 ; (edi) -> second block to compare
  112. ;
  113. ; Compare dwords, if any.
  114. ;
  115. rcm10: mov ecx,RcmLength ; (ecx) = length in bytes
  116. shr ecx,2 ; (ecx) = length in dwords
  117. jz rcm20 ; no dwords, try bytes
  118. repe cmpsd ; compare dwords
  119. jnz rcm40 ; mismatch, go find byte
  120. ;
  121. ; Compare residual bytes, if any.
  122. ;
  123. rcm20: mov ecx,RcmLength ; (ecx) = length in bytes
  124. and ecx,3 ; (ecx) = length mod 4
  125. jz rcm30 ; 0 odd bytes, go do dwords
  126. repe cmpsb ; compare odd bytes
  127. jnz rcm50 ; mismatch, go report how far we got
  128. ;
  129. ; All bytes in the block match.
  130. ;
  131. rcm30: mov eax,RcmLength ; set number of matching bytes
  132. pop edi ; restore registers
  133. pop esi ;
  134. stdRET _RtlCompareMemory
  135. ;
  136. ; When we come to rcm40, esi (and edi) points to the dword after the
  137. ; one which caused the mismatch. Back up 1 dword and find the byte.
  138. ; Since we know the dword didn't match, we can assume one byte won't.
  139. ;
  140. rcm40: sub esi,4 ; back up
  141. sub edi,4 ; back up
  142. mov ecx,5 ; ensure that ecx doesn't count out
  143. repe cmpsb ; find mismatch byte
  144. ;
  145. ; When we come to rcm50, esi points to the byte after the one that
  146. ; did not match, which is TWO after the last byte that did match.
  147. ;
  148. rcm50: dec esi ; back up
  149. sub esi,RcmSource1 ; compute bytes that matched
  150. mov eax,esi ;
  151. pop edi ; restore registers
  152. pop esi ;
  153. stdRET _RtlCompareMemory
  154. stdENDP _RtlCompareMemory
  155. subttl "RtlCompareMemory"
  156. EcmlSource equ [esp + 4 + 4]
  157. EcmlLength equ [esp + 4 + 8]
  158. EcmlPattern equ [esp + 4 + 12]
  159. ; end of arguments
  160. CODE_ALIGNMENT
  161. cPublicProc _RtlCompareMemoryUlong ,3
  162. ;
  163. ; Save the non-volatile registers that we will use, without the benefit of
  164. ; a frame pointer. No exception handling in this routine.
  165. ;
  166. push edi
  167. ;
  168. ; Setup the registers for using REP STOS instruction to zero memory.
  169. ;
  170. ; edi -> memory to zero
  171. ; ecx = number of 32-bit words to zero
  172. ; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
  173. ; eax = value to store in destination
  174. ; direction flag is clear for auto-increment
  175. ;
  176. mov edi,EcmlSource
  177. if DBG
  178. ifndef BLDR_KERNEL_RUNTIME
  179. test edi,3
  180. jz @F
  181. push edi
  182. push offset FLAT:_MsgUnalignedPtr
  183. call _DbgPrint
  184. add esp, 2 * 4
  185. ifdef NTOS_KERNEL_RUNTIME
  186. cmp _KdDebuggerEnabled,0
  187. else
  188. mov eax,fs:[PcTeb]
  189. mov eax,[eax].TebPeb
  190. cmp byte ptr [eax].PebBeingDebugged,0
  191. endif
  192. je @F
  193. call _DbgBreakPoint@0
  194. @@:
  195. endif
  196. endif
  197. mov ecx,EcmlLength
  198. mov eax,EcmlPattern
  199. shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
  200. ;
  201. ; If number of 32-bit words to compare is non-zero, then do it.
  202. ;
  203. repe scasd
  204. je @F
  205. sub edi,4
  206. @@:
  207. sub edi,EcmlSource
  208. mov eax,edi
  209. pop edi
  210. stdRET _RtlCompareMemoryUlong
  211. stdENDP _RtlCompareMemoryUlong
  212. subttl "RtlFillMemory"
  213. ;++
  214. ;
  215. ; VOID
  216. ; RtlFillMemory (
  217. ; IN PVOID Destination,
  218. ; IN ULONG Length,
  219. ; IN UCHAR Fill
  220. ; )
  221. ;
  222. ; Routine Description:
  223. ;
  224. ; This function fills memory with a byte value.
  225. ;
  226. ; Arguments:
  227. ;
  228. ; Destination - Supplies a pointer to the memory to zero.
  229. ;
  230. ; Length - Supplies the Length, in bytes, of the memory to be zeroed.
  231. ;
  232. ; Fill - Supplies the byte value to fill memory with.
  233. ;
  234. ; Return Value:
  235. ;
  236. ; None.
  237. ;
  238. ;--
  239. ; definitions for arguments
  240. ; (TOS) = Return address
  241. EfmDestination equ [esp + 4 + 4]
  242. EfmLength equ [esp + 4 + 8]
  243. EfmFill equ byte ptr [esp + 4 + 12]
  244. ; end of arguments
  245. CODE_ALIGNMENT
  246. cPublicProc _RtlFillMemory ,3
  247. cPublicFpo 3,1
  248. ;
  249. ; Save the non-volatile registers that we will use, without the benefit of
  250. ; a frame pointer. No exception handling in this routine.
  251. ;
  252. push edi
  253. ;
  254. ; Setup the registers for using REP STOS instruction to zero memory.
  255. ;
  256. ; edi -> memory to zero
  257. ; ecx = number of 32-bit words to zero
  258. ; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
  259. ; eax = value to store in destination
  260. ; direction flag is clear for auto-increment
  261. ;
  262. mov edi,EfmDestination
  263. mov ecx,EfmLength
  264. mov al,EfmFill
  265. mov ah,al
  266. shl eax,16
  267. mov al,EfmFill
  268. mov ah,al
  269. cld
  270. mov edx,ecx
  271. and edx,ZERO_MEMORY_ALIGNMENT_MASK
  272. shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
  273. ;
  274. ; If number of 32-bit words to zero is non-zero, then do it.
  275. ;
  276. rep stosd
  277. ;
  278. ; If number of extra 8-bit bytes to zero is non-zero, then do it. In either
  279. ; case restore non-volatile registers and return.
  280. ;
  281. or ecx,edx
  282. jnz @F
  283. pop edi
  284. stdRET _RtlFillMemory
  285. @@:
  286. rep stosb
  287. pop edi
  288. stdRET _RtlFillMemory
  289. stdENDP _RtlFillMemory
  290. subttl "RtlFillMemory"
  291. ;++
  292. ;
  293. ; VOID
  294. ; RtlFillMemoryUlonglong (
  295. ; IN PVOID Destination,
  296. ; IN ULONG Length,
  297. ; IN ULONG Fill
  298. ; )
  299. ;
  300. ; Routine Description:
  301. ;
  302. ; This function fills memory with a 64-bit value. The Destination pointer
  303. ; must be aligned on an 8 byte boundary and the low order two bits of the
  304. ; Length parameter are ignored.
  305. ;
  306. ; Arguments:
  307. ;
  308. ; Destination - Supplies a pointer to the memory to zero.
  309. ;
  310. ; Length - Supplies the Length, in bytes, of the memory to be zeroed.
  311. ;
  312. ; Fill - Supplies the 64-bit value to fill memory with.
  313. ;
  314. ; Return Value:
  315. ;
  316. ; None.
  317. ;
  318. ;--
  319. ; definitions for arguments
  320. ; (TOS) = Return address
  321. EfmlDestination equ [esp + 0ch]
  322. EfmlLength equ [esp + 10h]
  323. EfmlFillLow equ [esp + 14h]
  324. EfmlFillHigh equ [esp + 18h]
  325. ; end of arguments
  326. CODE_ALIGNMENT
  327. cPublicProc _RtlFillMemoryUlonglong ,4
  328. cPublicFpo 4,1
  329. ;
  330. ; Save the non-volatile registers that we will use, without the benefit of
  331. ; a frame pointer. No exception handling in this routine.
  332. ;
  333. push esi
  334. push edi
  335. ;
  336. ; Setup the registers for using REP MOVSD instruction to zero memory.
  337. ;
  338. ; edi -> memory to fill
  339. ; esi -> first 8 byte chunk of the memory destination to fill
  340. ; ecx = number of 32-bit words to zero
  341. ; eax = value to store in destination
  342. ; direction flag is clear for auto-increment
  343. ;
  344. mov ecx,EfmlLength ; # of bytes
  345. mov esi,EfmlDestination ; Destination pointer
  346. if DBG
  347. ifndef BLDR_KERNEL_RUNTIME
  348. test ecx,7
  349. jz @F
  350. push ecx
  351. push offset FLAT:_MsgUnalignedPtr
  352. call _DbgPrint
  353. add esp, 2 * 4
  354. mov ecx,EfmlLength ; # of bytes
  355. ifdef NTOS_KERNEL_RUNTIME
  356. cmp _KdDebuggerEnabled,0
  357. else
  358. mov eax,fs:[PcTeb]
  359. mov eax,[eax].TebPeb
  360. cmp byte ptr [eax].PebBeingDebugged,0
  361. endif
  362. je @F
  363. call _DbgBreakPoint@0
  364. @@:
  365. test esi,3
  366. jz @F
  367. push esi
  368. push offset FLAT:_MsgUnalignedPtr
  369. call _DbgPrint
  370. add esp, 2 * 4
  371. ifdef NTOS_KERNEL_RUNTIME
  372. cmp _KdDebuggerEnabled,0
  373. else
  374. mov eax,fs:[PcTeb]
  375. mov eax,[eax].TebPeb
  376. cmp byte ptr [eax].PebBeingDebugged,0
  377. endif
  378. je @F
  379. call _DbgBreakPoint@0
  380. @@:
  381. endif
  382. endif
  383. mov eax,EfmlFillLow ; get low portion of the fill arg
  384. shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2 ; convert bytes to dwords
  385. sub ecx,2 ; doing the 1st one by hand
  386. mov [esi],eax ; fill 1st highpart
  387. mov eax,EfmlFillHigh ; get high portion of the fill arg
  388. lea edi,[esi+08] ; initialize the dest pointer
  389. mov [esi+04],eax ; fill 1st lowpart
  390. rep movsd ; ripple the rest
  391. pop edi
  392. pop esi
  393. stdRET _RtlFillMemoryUlonglong
  394. stdENDP _RtlFillMemoryUlonglong
  395. subttl "RtlZeroMemory"
  396. ;++
  397. ;
  398. ; VOID
  399. ; RtlFillMemoryUlong (
  400. ; IN PVOID Destination,
  401. ; IN ULONG Length,
  402. ; IN ULONG Fill
  403. ; )
  404. ;
  405. ; Routine Description:
  406. ;
  407. ; This function fills memory with a 32-bit value. The Destination pointer
  408. ; must be aligned on a 4 byte boundary and the low order two bits of the
  409. ; Length parameter are ignored.
  410. ;
  411. ; Arguments:
  412. ;
  413. ; Destination - Supplies a pointer to the memory to zero.
  414. ;
  415. ; Length - Supplies the Length, in bytes, of the memory to be zeroed.
  416. ;
  417. ; Fill - Supplies the 32-bit value to fill memory with.
  418. ;
  419. ; Return Value:
  420. ;
  421. ; None.
  422. ;
  423. ;--
  424. ; definitions for arguments
  425. ; (TOS) = Return address
  426. EfmlDestination equ [esp + 4 + 4]
  427. EfmlLength equ [esp + 4 + 8]
  428. EfmlFill equ [esp + 4 + 12]
  429. ; end of arguments
  430. CODE_ALIGNMENT
  431. cPublicProc _RtlFillMemoryUlong ,3
  432. cPublicFpo 3,1
  433. ;
  434. ; Save the non-volatile registers that we will use, without the benefit of
  435. ; a frame pointer. No exception handling in this routine.
  436. ;
  437. push edi
  438. ;
  439. ; Setup the registers for using REP STOS instruction to zero memory.
  440. ;
  441. ; edi -> memory to zero
  442. ; ecx = number of 32-bit words to zero
  443. ; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
  444. ; eax = value to store in destination
  445. ; direction flag is clear for auto-increment
  446. ;
  447. mov edi,EfmlDestination
  448. if DBG
  449. ifndef BLDR_KERNEL_RUNTIME
  450. test edi,3
  451. jz @F
  452. push edi
  453. push offset FLAT:_MsgUnalignedPtr
  454. call _DbgPrint
  455. add esp, 2 * 4
  456. ifdef NTOS_KERNEL_RUNTIME
  457. cmp _KdDebuggerEnabled,0
  458. else
  459. mov eax,fs:[PcTeb]
  460. mov eax,[eax].TebPeb
  461. cmp byte ptr [eax].PebBeingDebugged,0
  462. endif
  463. je @F
  464. call _DbgBreakPoint@0
  465. @@:
  466. endif
  467. endif
  468. mov ecx,EfmlLength
  469. mov eax,EfmlFill
  470. shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
  471. ;
  472. ; If number of 32-bit words to zero is non-zero, then do it.
  473. ;
  474. rep stosd
  475. pop edi
  476. stdRET _RtlFillMemoryUlong
  477. stdENDP _RtlFillMemoryUlong
  478. subttl "RtlZeroMemory"
  479. ;++
  480. ;
  481. ; VOID
  482. ; RtlZeroMemory (
  483. ; IN PVOID Destination,
  484. ; IN ULONG Length
  485. ; )
  486. ;
  487. ; Routine Description:
  488. ;
  489. ; This function zeros memory.
  490. ;
  491. ; Arguments:
  492. ;
  493. ; Destination - Supplies a pointer to the memory to zero.
  494. ;
  495. ; Length - Supplies the Length, in bytes, of the memory to be zeroed.
  496. ;
  497. ; Return Value:
  498. ;
  499. ; None.
  500. ;
  501. ;--
  502. ; definitions for arguments
  503. ; (TOS) = Return address
  504. EzmDestination equ [esp + 4 + 4]
  505. EzmLength equ [esp + 4 + 8]
  506. ; end of arguments
  507. CODE_ALIGNMENT
  508. cPublicProc _RtlZeroMemory ,2
  509. cPublicFpo 2,1
  510. ;
  511. ; Save the non-volatile registers that we will use, without the benefit of
  512. ; a frame pointer. No exception handling in this routine.
  513. ;
  514. push edi
  515. ;
  516. ; Setup the registers for using REP STOS instruction to zero memory.
  517. ;
  518. ; edi -> memory to zero
  519. ; ecx = number of 32-bit words to zero
  520. ; edx = number of extra 8-bit bytes to zero at the end (0 - 3)
  521. ; eax = zero (value to store in destination)
  522. ; direction flag is clear for auto-increment
  523. ;
  524. mov edi,EzmDestination
  525. mov ecx,EzmLength
  526. xor eax,eax
  527. cld
  528. mov edx,ecx
  529. and edx,ZERO_MEMORY_ALIGNMENT_MASK
  530. shr ecx,ZERO_MEMORY_ALIGNMENT_LOG2
  531. ;
  532. ; If number of 32-bit words to zero is non-zero, then do it.
  533. ;
  534. rep stosd
  535. ;
  536. ; If number of extra 8-bit bytes to zero is non-zero, then do it. In either
  537. ; case restore non-volatile registers and return.
  538. ;
  539. or ecx,edx
  540. jnz @F
  541. pop edi
  542. stdRET _RtlZeroMemory
  543. @@:
  544. rep stosb
  545. pop edi
  546. stdRET _RtlZeroMemory
  547. stdENDP _RtlZeroMemory
  548. page , 132
  549. subttl "RtlMoveMemory"
  550. ;++
  551. ;
  552. ; VOID
  553. ; RtlMoveMemory (
  554. ; IN PVOID Destination,
  555. ; IN PVOID Source OPTIONAL,
  556. ; IN ULONG Length
  557. ; )
  558. ;
  559. ; Routine Description:
  560. ;
  561. ; This function moves memory either forward or backward, aligned or
  562. ; unaligned, in 4-byte blocks, followed by any remaining bytes.
  563. ;
  564. ; Arguments:
  565. ;
  566. ; Destination - Supplies a pointer to the destination of the move.
  567. ;
  568. ; Source - Supplies a pointer to the memory to move.
  569. ;
  570. ; Length - Supplies the Length, in bytes, of the memory to be moved.
  571. ;
  572. ; Return Value:
  573. ;
  574. ; None.
  575. ;
  576. ;--
  577. ; Definitions of arguments
  578. ; (TOS) = Return address
  579. EmmDestination equ [esp + 8 + 4]
  580. EmmSource equ [esp + 8 + 8]
  581. EmmLength equ [esp + 8 + 12]
  582. ; End of arguments
  583. CODE_ALIGNMENT
  584. cPublicProc _RtlMoveMemory ,3
  585. cPublicFpo 3,2
  586. ;
  587. ; Save the non-volatile registers that we will use, without the benefit of
  588. ; a frame pointer. No exception handling in this routine.
  589. ;
  590. push esi
  591. push edi
  592. ;
  593. ; Setup the registers for using REP MOVS instruction to move memory.
  594. ;
  595. ; esi -> memory to move (NULL implies the destination will be zeroed)
  596. ; edi -> destination of move
  597. ; ecx = number of 32-bit words to move
  598. ; edx = number of extra 8-bit bytes to move at the end (0 - 3)
  599. ; direction flag is clear for auto-increment
  600. ;
  601. mov esi,EmmSource
  602. mov edi,EmmDestination
  603. mov ecx,EmmLength
  604. if DBG
  605. inc _RtlpZeroCount
  606. add _RtlpZeroBytes,ecx
  607. endif
  608. cld
  609. cmp esi,edi ; Special case if Source > Destination
  610. jbe overlap
  611. nooverlap:
  612. mov edx,ecx
  613. and edx,MEMORY_ALIGNMENT_MASK
  614. shr ecx,MEMORY_ALIGNMENT_LOG2
  615. ;
  616. ; If number of 32-bit words to move is non-zero, then do it.
  617. ;
  618. rep movsd
  619. ;
  620. ; If number of extra 8-bit bytes to move is non-zero, then do it. In either
  621. ; case restore non-volatile registers and return.
  622. ;
  623. or ecx,edx
  624. jnz @F
  625. pop edi
  626. pop esi
  627. stdRET _RtlMoveMemory
  628. @@:
  629. rep movsb
  630. movedone:
  631. pop edi
  632. pop esi
  633. stdRET _RtlMoveMemory
  634. ;
  635. ; Here to handle special case when Source > Destination and therefore is a
  636. ; potential overlapping move. If Source == Destination, then nothing to do.
  637. ; Otherwise, increment the Source and Destination pointers by Length and do
  638. ; the move backwards, a byte at a time.
  639. ;
  640. overlap:
  641. je movedone
  642. mov eax,edi
  643. sub eax,esi
  644. cmp ecx,eax
  645. jbe nooverlap
  646. std
  647. add esi,ecx
  648. add edi,ecx
  649. dec esi
  650. dec edi
  651. rep movsb
  652. cld
  653. jmp short movedone
  654. stdENDP _RtlMoveMemory
  655. subttl "RtlCopyMemoryNonTemporal"
  656. ;
  657. ; Register Definitions (for instruction macros).
  658. ;
  659. rEAX equ 0
  660. rECX equ 1
  661. rEDX equ 2
  662. rEBX equ 3
  663. rESP equ 4
  664. rEBP equ 5
  665. rESI equ 6
  666. rEDI equ 7
  667. MEMORY_ALIGNMENT_MASK0 = 63
  668. MEMORY_ALIGNMENT_LOG2_0 = 6
  669. MEMORY_ALIGNMENT_MASK1 = 3
  670. MEMORY_ALIGNMENT_LOG2_1 = 2
  671. sfence macro
  672. db 0FH, 0AEH, 0F8H
  673. endm
  674. prefetchnta_short macro GeneralReg, Offset
  675. db 0FH, 018H, 040H + GeneralReg, Offset
  676. endm
  677. prefetchnta_long macro GeneralReg, Offset
  678. db 0FH, 018H, 080h + GeneralReg
  679. dd Offset
  680. endm
  681. movnti_eax macro GeneralReg, Offset
  682. db 0FH, 0C3H, 040H + GeneralReg, Offset
  683. endm
  684. movnti_eax_0_disp macro GeneralReg
  685. db 0FH, 0C3H, 000H + GeneralReg
  686. endm
  687. movnti_ebx macro GeneralReg, Offset
  688. db 0FH, 0C3H, 058H + GeneralReg, Offset
  689. endm
  690. ;
  691. ;
  692. ; Macro that moves 64bytes (1 cache line using movnti (eax and ebx registers)
  693. ;
  694. ;
  695. movnticopy64bytes macro
  696. mov eax, [esi]
  697. mov ebx, [esi + 4]
  698. movnti_eax_0_disp rEDI
  699. movnti_ebx rEDI, 4
  700. mov eax, [esi + 8]
  701. mov ebx, [esi + 12]
  702. movnti_eax rEDI, 8
  703. movnti_ebx rEDI, 12
  704. mov eax, [esi + 16]
  705. mov ebx, [esi + 20]
  706. movnti_eax rEDI, 16
  707. movnti_ebx rEDI, 20
  708. mov eax, [esi + 24]
  709. mov ebx, [esi + 28]
  710. movnti_eax rEDI, 24
  711. movnti_ebx rEDI, 28
  712. mov eax, [esi + 32]
  713. mov ebx, [esi + 36]
  714. movnti_eax rEDI,32
  715. movnti_ebx rEDI, 36
  716. mov eax, [esi + 40]
  717. mov ebx, [esi + 44]
  718. movnti_eax rEDI, 40
  719. movnti_ebx rEDI, 44
  720. mov eax, [esi + 48]
  721. mov ebx, [esi + 52]
  722. movnti_eax rEDI,48
  723. movnti_ebx rEDI, 52
  724. mov eax, [esi + 56]
  725. mov ebx, [esi + 60]
  726. movnti_eax rEDI, 56
  727. movnti_ebx rEDI, 60
  728. endm
  729. ;++
  730. ;
  731. ; VOID
  732. ; RtlCopyMemoryNonTemporal(
  733. ; IN PVOID Destination,
  734. ; IN PVOID Source ,
  735. ; IN ULONG Length
  736. ; )
  737. ;
  738. ; Routine Description:
  739. ;
  740. ; This function copies nonoverlapping from one buffer to another
  741. ; using nontemporal moves that do not polute the cache.
  742. ;
  743. ; Arguments:
  744. ;
  745. ; Destination - Supplies a pointer to the destination of the move.
  746. ;
  747. ; Source - Supplies a pointer to the memory to move.
  748. ;
  749. ; Length - Supplies the Length, in bytes, of the memory to be moved.
  750. ;
  751. ; Return Value:
  752. ;
  753. ; None.
  754. ;
  755. ;--
  756. cPublicProc _RtlCopyMemoryNonTemporal ,3
  757. ; Definitions of arguments
  758. ; (TOS) = Return address
  759. CPNDestination equ [ebp + 4 + 4]
  760. CPNSource equ [ebp + 4 + 8]
  761. CPNLength equ [ebp + 4 + 12]
  762. push ebp
  763. mov ebp, esp
  764. push esi
  765. push edi
  766. push ebx
  767. mov esi, CPNSource
  768. mov edi, CPNDestination
  769. mov ecx, CPNLength
  770. ;
  771. ; Before prefetching we must guarantee the TLB is valid.
  772. ;
  773. mov eax, [esi]
  774. cld
  775. ;
  776. ;Check if less than 64 bytes
  777. ;
  778. mov edx, ecx
  779. and ecx, MEMORY_ALIGNMENT_MASK0
  780. shr edx, MEMORY_ALIGNMENT_LOG2_0
  781. je Copy4
  782. dec edx
  783. je copy64
  784. prefetchnta_short rESI, 128
  785. dec edx
  786. je copy128
  787. prefetchnta_short rESI, 192
  788. dec edx
  789. je copy192
  790. copyLoop:
  791. prefetchnta_long rESI, 256
  792. movnticopy64bytes
  793. lea esi, [esi + 64]
  794. lea edi, [edi + 64]
  795. dec edx
  796. jnz copyLoop
  797. copy192:
  798. movnticopy64bytes
  799. lea esi, [esi + 64]
  800. lea edi, [edi + 64]
  801. copy128:
  802. movnticopy64bytes
  803. lea esi, [esi + 64]
  804. lea edi, [edi + 64]
  805. copy64:
  806. movnticopy64bytes
  807. or ecx, ecx ; anything less than 64 to do?
  808. jz ExitRoutine
  809. prefetchnta_short rESI, 0
  810. ;
  811. ;Update pointer for last copy
  812. ;
  813. lea esi, [esi + 64]
  814. lea edi, [edi + 64]
  815. ;
  816. ;Handle extra bytes here in 32 bit chuncks and then 8-bit bytes
  817. ;
  818. Copy4:
  819. mov edx, ecx
  820. and ecx, MEMORY_ALIGNMENT_MASK1
  821. shr edx, MEMORY_ALIGNMENT_LOG2_1
  822. ;
  823. ; If the number of 32-bit words to move is non-zero, then do it
  824. ;
  825. jz RemainingBytes
  826. Copy4Loop:
  827. mov eax, [esi]
  828. movnti_eax_0_disp rEDI
  829. lea esi, [esi+4]
  830. lea edi, [edi+4]
  831. dec edx
  832. jnz Copy4Loop
  833. RemainingBytes:
  834. or ecx, ecx
  835. jz ExitRoutine
  836. rep movsb
  837. ExitRoutine:
  838. sfence ;Make all stores globally visible
  839. pop ebx
  840. pop edi
  841. pop esi
  842. pop ebp
  843. stdRET _RtlCopyMemoryNonTemporal
  844. stdENDP _RtlCopyMemoryNonTemporal
  845. ;++
  846. ;
  847. ; VOID
  848. ; RtlPrefetchCopyMemory(
  849. ; IN PVOID Destination,
  850. ; IN PVOID Source ,
  851. ; IN ULONG Length
  852. ; )
  853. ;
  854. ; Routine Description:
  855. ;
  856. ; This function copies nonoverlapping from one buffer to another
  857. ; prefetching the source 256 bytes ahead.
  858. ;
  859. ; Arguments:
  860. ;
  861. ; Destination - Supplies a pointer to the destination of the move.
  862. ;
  863. ; Source - Supplies a pointer to the memory to move.
  864. ;
  865. ; Length - Supplies the Length, in bytes, of the memory to be moved.
  866. ;
  867. ; Return Value:
  868. ;
  869. ; None.
  870. ;
  871. ;--
  872. cPublicProc _RtlPrefetchCopyMemory,3
  873. push ebp
  874. mov ebp, esp
  875. push esi
  876. push edi
  877. push ebx
  878. mov esi, CPNSource
  879. mov edi, CPNDestination
  880. mov ecx, CPNLength
  881. ;
  882. ; Before prefetching we must guarantee the TLB is valid.
  883. ;
  884. mov eax, [esi]
  885. cld
  886. ;
  887. ;Check if less than 64 bytes
  888. ;
  889. mov edx, ecx
  890. and ecx, MEMORY_ALIGNMENT_MASK0
  891. shr edx, MEMORY_ALIGNMENT_LOG2_0
  892. je short pcmCopy4
  893. dec edx
  894. push ecx
  895. je short pcmcopy64
  896. prefetchnta_short rESI, 128
  897. dec edx
  898. je short pcmcopy128
  899. prefetchnta_short rESI, 192
  900. dec edx
  901. je short pcmcopy192
  902. pcmcopyLoop:
  903. prefetchnta_long rESI, 256
  904. mov ecx, 16
  905. rep movsd
  906. dec edx
  907. jnz short pcmcopyLoop
  908. pcmcopy192:
  909. mov ecx, 16
  910. rep movsd
  911. pcmcopy128:
  912. mov ecx, 16
  913. rep movsd
  914. pcmcopy64:
  915. mov ecx, 16
  916. rep movsd
  917. pop ecx
  918. or ecx, ecx ; anything less than 64 to do?
  919. jz short pcmExitRoutine
  920. prefetchnta_short rESI, 0
  921. ;
  922. ; Copy last part byte by byte.
  923. ;
  924. pcmCopy4:
  925. or ecx, ecx
  926. jz short pcmExitRoutine
  927. rep movsb
  928. pcmExitRoutine:
  929. pop ebx
  930. pop edi
  931. pop esi
  932. pop ebp
  933. stdRET _RtlPrefetchCopyMemory
  934. stdENDP _RtlPrefetchCopyMemory
  935. ;++
  936. ;
  937. ; VOID
  938. ; RtlPrefetchCopyMemory32(
  939. ; IN PVOID Destination,
  940. ; IN PVOID Source ,
  941. ; IN ULONG Length
  942. ; )
  943. ;
  944. ; Routine Description:
  945. ;
  946. ; This function copies nonoverlapping from one buffer to another
  947. ; prefetching the source 256 bytes ahead.
  948. ;
  949. ; Arguments:
  950. ;
  951. ; Destination - Supplies a pointer to the destination of the move.
  952. ;
  953. ; Source - Supplies a pointer to the memory to move.
  954. ;
  955. ; Length - Supplies the Length, in bytes, of the memory to be moved.
  956. ;
  957. ; Return Value:
  958. ;
  959. ; None.
  960. ;
  961. ;--
  962. cPublicProc _RtlPrefetchCopyMemory32,3
  963. push ebp
  964. mov ebp, esp
  965. push esi
  966. push edi
  967. push ebx
  968. mov esi, CPNSource
  969. mov edi, CPNDestination
  970. mov ecx, CPNLength
  971. ;
  972. ; Before prefetching we must guarantee the TLB is valid.
  973. ;
  974. mov eax, [esi]
  975. cld
  976. ;
  977. ;Check if less than 64 bytes
  978. ;
  979. mov edx, ecx
  980. and ecx, MEMORY_ALIGNMENT_MASK0
  981. shr edx, MEMORY_ALIGNMENT_LOG2_0
  982. je short pcm32Copy4
  983. dec edx
  984. prefetchnta_short rESI, 32
  985. push ecx
  986. je short pcm32copy64
  987. prefetchnta_short rESI, 128
  988. prefetchnta_short rESI, 160
  989. dec edx
  990. je short pcm32copy128
  991. prefetchnta_short rESI, 192
  992. prefetchnta_short rESI, 124
  993. dec edx
  994. je short pcm32copy192
  995. pcm32copyLoop:
  996. prefetchnta_long rESI, 256
  997. prefetchnta_long rESI, 288
  998. mov ecx, 16
  999. rep movsd
  1000. dec edx
  1001. jnz short pcm32copyLoop
  1002. pcm32copy192:
  1003. mov ecx, 16
  1004. rep movsd
  1005. pcm32copy128:
  1006. mov ecx, 16
  1007. rep movsd
  1008. pcm32copy64:
  1009. mov ecx, 16
  1010. rep movsd
  1011. pop ecx
  1012. or ecx, ecx ; anything less than 64 to do?
  1013. jz short pcm32ExitRoutine
  1014. prefetchnta_short rESI, 0
  1015. ;
  1016. ; Copy last part byte by byte.
  1017. ;
  1018. pcm32Copy4:
  1019. or ecx, ecx
  1020. jz short pcm32ExitRoutine
  1021. rep movsb
  1022. pcm32ExitRoutine:
  1023. pop ebx
  1024. pop edi
  1025. pop esi
  1026. pop ebp
  1027. stdRET _RtlPrefetchCopyMemory32
  1028. stdENDP _RtlPrefetchCopyMemory32
  1029. subttl "RtlPrefetchMemoryNonTemporal"
  1030. ;++
  1031. ;
  1032. ; VOID
  1033. ; FASTCALL
  1034. ; RtlPrefetchMemoryNonTemporal(
  1035. ; IN PVOID Source,
  1036. ; IN SIZE_T Length
  1037. ; )
  1038. ;
  1039. ; Routine Description:
  1040. ;
  1041. ; This function prefetches memory at Source, for Length bytes into the
  1042. ; closest cache to the processor.
  1043. ;
  1044. ; Arguments:
  1045. ;
  1046. ; Source - Supplies a pointer to the memory to prefetch.
  1047. ;
  1048. ; Length - Supplies the Length, in bytes, of the memory to be moved.
  1049. ;
  1050. ; Return Value:
  1051. ;
  1052. ; None.
  1053. ;
  1054. ;--
  1055. ifndef BLDR_KERNEL_RUNTIME
  1056. ifdef NTOS_KERNEL_RUNTIME
  1057. extrn _KePrefetchNTAGranularity:DWORD
  1058. cPublicFastCall RtlPrefetchMemoryNonTemporal ,2
  1059. ;
  1060. ; The following instruction will be patched out at boot time if
  1061. ; this processor supports the prefetch instruction.
  1062. ;
  1063. ret ; patched out at boot.
  1064. mov eax, _KePrefetchNTAGranularity ; get d-cache line size
  1065. @@: prefetchnta_short rECX, 0 ; prefetch line
  1066. add ecx, eax ; bump prefetch address
  1067. sub edx, eax ; decrement length
  1068. ja short @b ; loop if more to get
  1069. fstRET RtlPrefetchMemoryNonTemporal ; return
  1070. fstENDP RtlPrefetchMemoryNonTemporal
  1071. endif
  1072. endif
  1073. _TEXT$00 ends
  1074. end