Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

284 lines
6.1 KiB

  1. title "Hal Copy using Movnti"
  2. ;++
  3. ;
  4. ;Copyright (c) 2000 Microsoft Corporation
  5. ;
  6. ;Module Name:
  7. ;
  8. ; ixmovnti.asm
  9. ;
  10. ;Abstract:
  11. ;
  12. ; HAL routine that uses movnti instruction to copy buffer
  13. ; similar to RtlMovememory but does not support backwards and
  14. ; overlapped move
  15. ; Based on a previously tested fast copy by Jim crossland.
  16. ;Author:
  17. ; Gautham chinya
  18. ; Intel Corp
  19. ;
  20. ;Revision History:
  21. ;
  22. ;--
  23. .386p
  24. .xlist
  25. include callconv.inc ; calling convention macros
  26. include mac386.inc
  27. .list
  28. ;
  29. ; Register Definitions (for instruction macros).
  30. ;
  31. rEAX equ 0
  32. rECX equ 1
  33. rEDX equ 2
  34. rEBX equ 3
  35. rESP equ 4
  36. rEBP equ 5
  37. rESI equ 6
  38. rEDI equ 7
  39. MEMORY_ALIGNMENT_MASK0 = 63
  40. MEMORY_ALIGNMENT_LOG2_0 = 6
  41. MEMORY_ALIGNMENT_MASK1 = 3
  42. MEMORY_ALIGNMENT_LOG2_1 = 2
  43. sfence macro
  44. db 0FH, 0AEH, 0F8H
  45. endm
  46. prefetchnta_short macro GeneralReg, Offset
  47. db 0FH, 018H, 040H + GeneralReg, Offset
  48. endm
  49. prefetchnta_long macro GeneralReg, Offset
  50. db 0FH, 018H, 080h + GeneralReg
  51. dd Offset
  52. endm
  53. movnti_eax macro GeneralReg, Offset
  54. db 0FH, 0C3H, 040H + GeneralReg, Offset
  55. endm
  56. movnti_eax_0_disp macro GeneralReg
  57. db 0FH, 0C3H, 000H + GeneralReg
  58. endm
  59. movnti_ebx macro GeneralReg, Offset
  60. db 0FH, 0C3H, 058H + GeneralReg, Offset
  61. endm
  62. ;
  63. ;
  64. ; Macro that moves 64bytes (1 cache line using movnti (eax and ebx registers)
  65. ;
  66. ;
  67. movnticopy64bytes macro
  68. mov eax, [esi]
  69. mov ebx, [esi + 4]
  70. movnti_eax_0_disp rEDI
  71. movnti_ebx rEDI, 4
  72. mov eax, [esi + 8]
  73. mov ebx, [esi + 12]
  74. movnti_eax rEDI, 8
  75. movnti_ebx rEDI, 12
  76. mov eax, [esi + 16]
  77. mov ebx, [esi + 20]
  78. movnti_eax rEDI, 16
  79. movnti_ebx rEDI, 20
  80. mov eax, [esi + 24]
  81. mov ebx, [esi + 28]
  82. movnti_eax rEDI, 24
  83. movnti_ebx rEDI, 28
  84. mov eax, [esi + 32]
  85. mov ebx, [esi + 36]
  86. movnti_eax rEDI,32
  87. movnti_ebx rEDI, 36
  88. mov eax, [esi + 40]
  89. mov ebx, [esi + 44]
  90. movnti_eax rEDI, 40
  91. movnti_ebx rEDI, 44
  92. mov eax, [esi + 48]
  93. mov ebx, [esi + 52]
  94. movnti_eax rEDI,48
  95. movnti_ebx rEDI, 52
  96. mov eax, [esi + 56]
  97. mov ebx, [esi + 60]
  98. movnti_eax rEDI, 56
  99. movnti_ebx rEDI, 60
  100. endm
  101. _TEXT$03 SEGMENT DWORD PUBLIC 'CODE'
  102. ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
  103. page ,132
  104. subttl "HalpMovntiCopyBuffer"
  105. ;++
  106. ;
  107. ; VOID
  108. ; HalpMovntiCopyBuffer(
  109. ; IN PVOID Destination,
  110. ; IN PVOID Source ,
  111. ; IN ULONG Length
  112. ; )
  113. ;
  114. ; Routine Description:
  115. ;
  116. ; This function copies buffers
  117. ; in 4-byte blocks using movnti.
  118. ;
  119. ; Arguments:
  120. ;
  121. ; Destination - Supplies a pointer to the destination of the move.
  122. ;
  123. ; Source - Supplies a pointer to the memory to move.
  124. ;
  125. ; Length - Supplies the Length, in bytes, of the memory to be moved.
  126. ;
  127. ; Return Value:
  128. ;
  129. ; None.
  130. ;
  131. ;--
  132. cPublicProc _HalpMovntiCopyBuffer ,3
  133. ; Definitions of arguments
  134. ; (TOS) = Return address
  135. EmmDestination equ [ebp + 4 + 4]
  136. EmmSource equ [ebp + 4 + 8]
  137. EmmLength equ [ebp + 4 + 12]
  138. push ebp
  139. mov ebp, esp
  140. push esi
  141. push edi
  142. push ebx
  143. mov esi, EmmSource
  144. mov edi, EmmDestination
  145. mov ecx, EmmLength
  146. ;
  147. ; Before prefetching we must guarantee the TLB is valid.
  148. ;
  149. mov eax, [esi]
  150. cld
  151. ;
  152. ;Check if less than 64 bytes
  153. ;
  154. mov edx, ecx
  155. and ecx, MEMORY_ALIGNMENT_MASK0
  156. shr edx, MEMORY_ALIGNMENT_LOG2_0
  157. je Copy4
  158. dec edx
  159. je copy64
  160. prefetchnta_short rESI, 128
  161. dec edx
  162. je copy128
  163. prefetchnta_short rESI, 192
  164. dec edx
  165. je copy192
  166. copyLoop:
  167. prefetchnta_long rESI, 256
  168. movnticopy64bytes
  169. lea esi, [esi + 64]
  170. lea edi, [edi + 64]
  171. dec edx
  172. jnz copyLoop
  173. copy192:
  174. movnticopy64bytes
  175. lea esi, [esi + 64]
  176. lea edi, [edi + 64]
  177. copy128:
  178. movnticopy64bytes
  179. lea esi, [esi + 64]
  180. lea edi, [edi + 64]
  181. copy64:
  182. movnticopy64bytes
  183. or ecx, ecx ; anything less than 64 to do?
  184. jz ExitRoutine
  185. prefetchnta_short rESI, 0
  186. ;
  187. ;Update pointer for last copy
  188. ;
  189. lea esi, [esi + 64]
  190. lea edi, [edi + 64]
  191. ;
  192. ;Handle extra bytes here in 32 bit chuncks and then 8-bit bytes
  193. ;
  194. Copy4:
  195. mov edx, ecx
  196. and ecx, MEMORY_ALIGNMENT_MASK1
  197. shr edx, MEMORY_ALIGNMENT_LOG2_1
  198. ;
  199. ; If the number of 32-bit words to move is non-zero, then do it
  200. ;
  201. jz RemainingBytes
  202. Copy4Loop:
  203. mov eax, [esi]
  204. movnti_eax_0_disp rEDI
  205. lea esi, [esi+4]
  206. lea edi, [edi+4]
  207. dec edx
  208. jnz Copy4Loop
  209. RemainingBytes:
  210. or ecx, ecx
  211. jz ExitRoutine
  212. rep movsb
  213. ExitRoutine:
  214. sfence ;Make all stores globally visible
  215. pop ebx
  216. pop edi
  217. pop esi
  218. pop ebp
  219. stdRET _HalpMovntiCopyBuffer
  220. stdENDP _HalpMovntiCopyBuffer
  221. _TEXT$03 ends
  222. end