Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

290 lines
6.5 KiB

  1. title "Hal Copy using Movnti"
  2. ;++
  3. ;
  4. ;Copyright (c) 2000 Microsoft Corporation
  5. ;
  6. ;Module Name:
  7. ;
  8. ; ixmovnti.asm
  9. ;
  10. ;Abstract:
  11. ;
  12. ; HAL routine that uses movnti instruction to copy buffer
  13. ; similar to RtlMovememory but does not support backwards and
  14. ; overlapped move
  15. ; Based on a previously tested fast copy by Jim crossland.
  16. ;Author:
  17. ; Gautham chinya
  18. ; Intel Corp
  19. ;
  20. ;Revision History:
  21. ;
  22. ;--
  23. .386p
  24. .xlist
  25. include callconv.inc ; calling convention macros
  26. include mac386.inc
  27. .list
  28. ;
  29. ; Register Definitions (for instruction macros).
  30. ;
  31. rEAX equ 0
  32. rECX equ 1
  33. rEDX equ 2
  34. rEBX equ 3
  35. rESP equ 4
  36. rEBP equ 5
  37. rESI equ 6
  38. rEDI equ 7
  39. MEMORY_ALIGNMENT_MASK0 = 63
  40. MEMORY_ALIGNMENT_LOG2_0 = 6
  41. MEMORY_ALIGNMENT_MASK1 = 3
  42. MEMORY_ALIGNMENT_LOG2_1 = 2
  43. sfence macro
  44. db 0FH, 0AEH, 0F8H
  45. endm
  46. prefetchnta_short macro GeneralReg, Offset
  47. db 0FH, 018H, 040H + GeneralReg, Offset
  48. endm
  49. prefetchnta_long macro GeneralReg, Offset
  50. db 0FH, 018H, 080h + GeneralReg
  51. dd Offset
  52. endm
  53. movnti_eax macro GeneralReg, Offset
  54. db 0FH, 0C3H, 040H + GeneralReg, Offset
  55. endm
  56. movnti_eax_0_disp macro GeneralReg
  57. db 0FH, 0C3H, 000H + GeneralReg
  58. endm
  59. movnti_ebx macro GeneralReg, Offset
  60. db 0FH, 0C3H, 058H + GeneralReg, Offset
  61. endm
  62. ;
  63. ;
  64. ; Macro that moves 64bytes (1 cache line using movnti (eax and ebx registers)
  65. ;
  66. ;
  67. movnticopy64bytes macro
  68. mov eax, [esi]
  69. mov ebx, [esi + 4]
  70. movnti_eax_0_disp rEDI
  71. movnti_ebx rEDI, 4
  72. mov eax, [esi + 8]
  73. mov ebx, [esi + 12]
  74. movnti_eax rEDI, 8
  75. movnti_ebx rEDI, 12
  76. mov eax, [esi + 16]
  77. mov ebx, [esi + 20]
  78. movnti_eax rEDI, 16
  79. movnti_ebx rEDI, 20
  80. mov eax, [esi + 24]
  81. mov ebx, [esi + 28]
  82. movnti_eax rEDI, 24
  83. movnti_ebx rEDI, 28
  84. mov eax, [esi + 32]
  85. mov ebx, [esi + 36]
  86. movnti_eax rEDI,32
  87. movnti_ebx rEDI, 36
  88. mov eax, [esi + 40]
  89. mov ebx, [esi + 44]
  90. movnti_eax rEDI, 40
  91. movnti_ebx rEDI, 44
  92. mov eax, [esi + 48]
  93. mov ebx, [esi + 52]
  94. movnti_eax rEDI,48
  95. movnti_ebx rEDI, 52
  96. mov eax, [esi + 56]
  97. mov ebx, [esi + 60]
  98. movnti_eax rEDI, 56
  99. movnti_ebx rEDI, 60
  100. endm
  101. _TEXT$03 SEGMENT DWORD PUBLIC 'CODE'
  102. ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
  103. page ,132
  104. subttl "HalpMovntiCopyBuffer"
  105. ;++
  106. ;
  107. ; VOID
  108. ; HalpMovntiCopyBuffer(
  109. ; IN PVOID Destination,
  110. ; IN PVOID Source,
  111. ; IN ULONG Length
  112. ; )
  113. ;
  114. ; Routine Description:
  115. ;
  116. ; This function tries to copy buffers
  117. ; in 4-byte blocks using movnti, but also handles
  118. ; smaller requests
  119. ;
  120. ; Arguments:
  121. ;
  122. ; Destination - Supplies a pointer to the destination of the move.
  123. ;
  124. ; Source - Supplies a pointer to the memory to move.
  125. ;
  126. ; Length - Supplies the Length, in bytes, of the memory to be moved.
  127. ;
  128. ; Return Value:
  129. ;
  130. ; None.
  131. ;
  132. ;--
  133. cPublicProc _HalpMovntiCopyBuffer ,3
  134. ; Definitions of arguments
  135. ; (TOS) = Return address
  136. EmmDestination equ [ebp + 4 + 4]
  137. EmmSource equ [ebp + 4 + 8]
  138. EmmLength equ [ebp + 4 + 12]
  139. push ebp
  140. mov ebp, esp
  141. push esi
  142. push edi
  143. push ebx
  144. mov esi, EmmSource
  145. mov edi, EmmDestination
  146. mov ecx, EmmLength
  147. ;
  148. ; Can't use movnti for this wee-quest
  149. ;
  150. cmp ecx, 4
  151. jl RemainingBytes
  152. ;
  153. ; Before prefetching we must guarantee the TLB is valid.
  154. ;
  155. mov eax, [esi]
  156. cld
  157. ;
  158. ;Check if less than 64 bytes
  159. ;
  160. mov edx, ecx
  161. and ecx, MEMORY_ALIGNMENT_MASK0
  162. shr edx, MEMORY_ALIGNMENT_LOG2_0
  163. je Copy4
  164. dec edx
  165. je copy64
  166. prefetchnta_short rESI, 128
  167. dec edx
  168. je copy128
  169. prefetchnta_short rESI, 192
  170. dec edx
  171. je copy192
  172. copyLoop:
  173. prefetchnta_long rESI, 256
  174. movnticopy64bytes
  175. lea esi, [esi + 64]
  176. lea edi, [edi + 64]
  177. dec edx
  178. jnz copyLoop
  179. copy192:
  180. movnticopy64bytes
  181. lea esi, [esi + 64]
  182. lea edi, [edi + 64]
  183. copy128:
  184. movnticopy64bytes
  185. lea esi, [esi + 64]
  186. lea edi, [edi + 64]
  187. copy64:
  188. movnticopy64bytes
  189. or ecx, ecx ; anything less than 64 to do?
  190. jz ExitRoutine
  191. prefetchnta_short rESI, 0
  192. ;
  193. ;Update pointer for last copy
  194. ;
  195. lea esi, [esi + 64]
  196. lea edi, [edi + 64]
  197. ;
  198. ;Handle extra bytes here in 32 bit chuncks and then 8-bit bytes
  199. ;
  200. Copy4:
  201. mov edx, ecx
  202. and ecx, MEMORY_ALIGNMENT_MASK1
  203. shr edx, MEMORY_ALIGNMENT_LOG2_1
  204. ;
  205. ; If the number of 32-bit words to move is non-zero, then do it
  206. ;
  207. jz RemainingBytes
  208. Copy4Loop:
  209. mov eax, [esi]
  210. movnti_eax_0_disp rEDI
  211. lea esi, [esi+4]
  212. lea edi, [edi+4]
  213. dec edx
  214. jnz Copy4Loop
  215. RemainingBytes:
  216. or ecx, ecx
  217. jz ExitRoutine
  218. rep movsb
  219. ExitRoutine:
  220. sfence ;Make all stores globally visible
  221. pop ebx
  222. pop edi
  223. pop esi
  224. pop ebp
  225. stdRET _HalpMovntiCopyBuffer
  226. stdENDP _HalpMovntiCopyBuffer
  227. _TEXT$03 ends
  228. end