Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

353 lines
8.4 KiB

  1. title "Zero memory pages using fastest means available"
  2. ;++
  3. ;
  4. ; Copyright (c) 1998 Microsoft Corporation
  5. ;
  6. ; Module Name:
  7. ;
  8. ; zero.asm
  9. ;
  10. ; Abstract:
  11. ;
  12. ; Zero memory pages using the fastest means available.
  13. ;
  14. ; Author:
  15. ;
  16. ; Peter Johnston (peterj) 20-Jun-1998.
  17. ; Critical sections of Katmai code adapted from in-line
  18. ; assembly version by Shiv Kaushik or Intel Corp.
  19. ;
  20. ; Environment:
  21. ;
  22. ; x86
  23. ;
  24. ; Revision History:
  25. ;
  26. ;--
  27. .386p
  28. .xlist
  29. include ks386.inc
  30. include callconv.inc
  31. include mac386.inc
  32. .list
  33. ;
  34. ; Register Definitions (for instruction macros).
  35. ;
  36. rEAX equ 0
  37. rECX equ 1
  38. rEDX equ 2
  39. rEBX equ 3
  40. rESP equ 4
  41. rEBP equ 5
  42. rESI equ 6
  43. rEDI equ 7
  44. ;
  45. ; Define SIMD instructions used in this module.
  46. ;
  47. if 0
  48. ; these remain for reference only. In theory the stuff following
  49. ; should generate the right code.
  50. xorps_xmm0_xmm0 macro
  51. db 0FH, 057H, 0C0H
  52. endm
  53. movntps_edx macro Offset
  54. db 0FH, 02BH, 042H, Offset
  55. endm
  56. movaps_esp_xmm0 macro
  57. db 0FH, 029H, 004H, 024H
  58. endm
  59. movaps_xmm0_esp macro
  60. db 0FH, 028H, 004H, 024H
  61. endm
  62. endif
  63. xorps macro XMMReg1, XMMReg2
  64. db 0FH, 057H, 0C0H + (XMMReg1 * 8) + XMMReg2
  65. endm
  66. movntps macro GeneralReg, Offset, XMMReg
  67. db 0FH, 02BH, 040H + (XmmReg * 8) + GeneralReg, Offset
  68. endm
  69. sfence macro
  70. db 0FH, 0AEH, 0F8H
  71. endm
  72. movaps_load macro XMMReg, GeneralReg
  73. db 0FH, 028H, (XMMReg * 8) + 4, (4 * 8) + GeneralReg
  74. endm
  75. movaps_store macro GeneralReg, XMMReg
  76. db 0FH, 029H, (XMMReg * 8) + 4, (4 * 8) + GeneralReg
  77. endm
  78. ;
  79. ; NPX Save and Restore
  80. ;
  81. fxsave macro Register
  82. db 0FH, 0AEH, Register
  83. endm
  84. fxrstor macro Register
  85. db 0FH, 0AEH, 8+Register
  86. endm
  87. _TEXT SEGMENT DWORD PUBLIC 'CODE'
  88. ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
  89. ;++
  90. ;
  91. ; VOID
  92. ; KeZeroPage(
  93. ; PageBase
  94. ; )
  95. ;
  96. ; Routine Description:
  97. ;
  98. ; KeZeroPage is really just a function pointer that points at
  99. ; either KiZeroPage or KiXMMIZeroPage depending on whether or
  100. ; not XMMI instructions are available.
  101. ;
  102. ; Arguments:
  103. ;
  104. ; (ecx) PageBase Base address of page to be zeroed.
  105. ;
  106. ;
  107. ; Return Value:
  108. ;
  109. ;--
  110. page ,132
  111. subttl "KiXMMIZeroPageNoSave - Use XMMI to zero memory (XMMI owned)"
  112. ;++
  113. ;
  114. ; VOID
  115. ; KiXMMIZeroPageNoSave (
  116. ; IN PVOID PageBase
  117. ; )
  118. ;
  119. ; Routine Description:
  120. ;
  121. ; Use XMMI to zero a page of memory 16 bytes at a time while
  122. ; at the same time minimizing cache polution.
  123. ;
  124. ; Note: The XMMI register set belongs to this thread. It is neither
  125. ; saved nor restored by this procedure.
  126. ;
  127. ; Arguments:
  128. ;
  129. ; (ecx) PageBase Virtual address of the base of the page to be zeroed.
  130. ;
  131. ; Return Value:
  132. ;
  133. ; None.
  134. ;
  135. ;--
  136. INNER_LOOP_BYTES equ 64
  137. cPublicFastCall KiXMMIZeroPageNoSave,1
  138. cPublicFpo 0, 1
  139. xorps 0, 0 ; zero xmm0 (128 bits)
  140. mov eax, PAGE_SIZE/INNER_LOOP_BYTES ; Number of Iterations
  141. inner:
  142. movntps rECX, 0, 0 ; store bytes 0 - 15
  143. movntps rECX, 16, 0 ; 16 - 31
  144. movntps rECX, 32, 0 ; 32 - 47
  145. movntps rECX, 48, 0 ; 48 - 63
  146. add ecx, 64 ; increment base
  147. dec eax ; decrement loop count
  148. jnz short inner
  149. ; Force all stores to complete before any other
  150. ; stores from this processor.
  151. sfence
  152. ifndef SFENCE_IS_NOT_BUSTED
  153. ; ERRATA the next uncached write to this processor's apic
  154. ; may fail unless the store pipes have drained. sfence by
  155. ; itself is not enough. Force drainage now by doing an
  156. ; interlocked exchange.
  157. xchg [esp-4], eax
  158. endif
  159. fstRET KiXMMIZeroPageNoSave
  160. fstENDP KiXMMIZeroPageNoSave
  161. page ,132
  162. subttl "KiXMMIZeroPage - Use XMMI to zero memory"
  163. ;++
  164. ;
  165. ; VOID
  166. ; KiXMMIZeroPage (
  167. ; IN PVOID PageBase
  168. ; )
  169. ;
  170. ; Routine Description:
  171. ;
  172. ; Use XMMI to zero a page of memory 16 bytes at a time. This
  173. ; routine is a wrapper around KiXMMIZeroPageNoSave. In this
  174. ; case we don't have the luxury of not saving/restoring context.
  175. ;
  176. ; Arguments:
  177. ;
  178. ; (ecx) PageBase Virtual address of the base of the page to be zeroed.
  179. ;
  180. ; Return Value:
  181. ;
  182. ; None.
  183. ;
  184. ;--
  185. cPublicFastCall KiXMMIZeroPage,1
  186. cPublicFpo 0, 2
  187. mov eax, PCR[PcInitialStack]
  188. mov edx, PCR[PcPrcbData+PbCurrentThread]
  189. push ebp
  190. push ebx
  191. mov ebp, esp ; save stack pointer
  192. sub esp, 16 ; reserve space for xmm0
  193. and esp, 0FFFFFFF0H ; 16 byte aligned
  194. cli ; don't context switch
  195. test [eax].FpCr0NpxState, CR0_EM ; if FP explicitly disabled
  196. jnz short kxzp90 ; do it the old way
  197. cmp byte ptr [edx].ThNpxState, NPX_STATE_LOADED
  198. je short kxzp80 ; jiff, NPX stated loaded
  199. ; NPX state is not loaded on this thread, it will be by
  200. ; the time we reenable context switching.
  201. mov byte ptr [edx].ThNpxState, NPX_STATE_LOADED
  202. ; enable use of FP instructions
  203. mov ebx, cr0
  204. and ebx, NOT (CR0_MP+CR0_TS+CR0_EM)
  205. mov cr0, ebx ; enable NPX
  206. ifdef NT_UP
  207. ; if this is a UP machine, the state might be loaded for
  208. ; another thread in which case it needs to be saved.
  209. mov ebx, PCR[PcPrcbData+PbNpxThread]; Owner of NPX state
  210. or ebx, ebx ; NULL?
  211. jz short @f ; yes, skip save.
  212. mov byte ptr [ebx].ThNpxState, NPX_STATE_NOT_LOADED
  213. mov ebx, [ebx].ThInitialStack ; get address of save
  214. sub ebx, NPX_FRAME_LENGTH ; area.
  215. fxsave rEBX ; save NPX
  216. @@:
  217. endif
  218. ; Now load the NPX context for this thread. This is because
  219. ; if we switch away from this thread it will get saved again
  220. ; in this save area and destroying it would be bad.
  221. fxrstor rEAX
  222. mov PCR[PcPrcbData+PbNpxThread], edx
  223. kxzp80:
  224. sti ; reenable context switching
  225. movaps_store rESP, 0 ; save xmm0
  226. fstCall KiXMMIZeroPageNoSave ; zero the page
  227. movaps_load 0, rESP ; restore xmm
  228. ; restore stack pointer, non-volatiles and return
  229. mov esp, ebp
  230. pop ebx
  231. pop ebp
  232. fstRET KiXMMIZeroPage
  233. ; FP is explicitly disabled for this thread (probably a VDM
  234. ; thread). Restore stack pointer, non-volatiles and jump into
  235. ; KiZeroPage to do the work the old fashioned way.
  236. kxzp90:
  237. sti
  238. mov esp, ebp
  239. pop ebx
  240. pop ebp
  241. jmp short @KiZeroPage@4
  242. fstENDP KiXMMIZeroPage
  243. page ,132
  244. subttl "KiZeroPage - Available to all X86 processors"
  245. ;++
  246. ;
  247. ; KiZeroPage(
  248. ; PVOID PageBase
  249. ; )
  250. ;
  251. ; Routine Description:
  252. ;
  253. ; Generic Zero Page routine, used on processors that don't have
  254. ; a more effecient way to zero large blocks of memory.
  255. ; (Same as RtlZeroMemory).
  256. ;
  257. ; Arguments:
  258. ;
  259. ; (ecx) PageBase Base address of page to be zeroed.
  260. ;
  261. ; Return Value:
  262. ;
  263. ; None.
  264. ;
  265. ;--
  266. cPublicFastCall KiZeroPage,1
  267. cPublicFpo 0, 0
  268. push edi ; save EDI (non-volatile)
  269. xor eax, eax ; 32 bit zero
  270. mov edi, ecx ; setup for repsto
  271. mov ecx, PAGE_SIZE/4 ; iteration count
  272. ; store eax, ecx times starting at edi
  273. rep stosd
  274. pop edi ; restore edi and return
  275. fstRET KiZeroPage
  276. fstENDP KiZeroPage
  277. _TEXT ends
  278. end