Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

369 lines
9.5 KiB

  1. title "Zero memory pages using fastest means available"
  2. ;++
  3. ;
  4. ; Copyright (c) 1998 Microsoft Corporation
  5. ;
  6. ; Module Name:
  7. ;
  8. ; zero.asm
  9. ;
  10. ; Abstract:
  11. ;
  12. ; Zero memory pages using the fastest means available.
  13. ;
  14. ; Author:
  15. ;
  16. ; Peter Johnston (peterj) 20-Jun-1998.
  17. ; Critical sections of Katmai code adapted from in-line
  18. ; assembly version by Shiv Kaushik or Intel Corp.
  19. ;
  20. ; Environment:
  21. ;
  22. ; x86
  23. ;
  24. ; Revision History:
  25. ;
  26. ;--
  27. .386p
  28. .xlist
  29. include ks386.inc
  30. include callconv.inc
  31. include mac386.inc
  32. .list
  33. ;
  34. ; Register Definitions (for instruction macros).
  35. ;
  36. rEAX equ 0
  37. rECX equ 1
  38. rEDX equ 2
  39. rEBX equ 3
  40. rESP equ 4
  41. rEBP equ 5
  42. rESI equ 6
  43. rEDI equ 7
  44. ;
  45. ; Define SIMD instructions used in this module.
  46. ;
  47. if 0
  48. ; these remain for reference only. In theory the stuff following
  49. ; should generate the right code.
  50. xorps_xmm0_xmm0 macro
  51. db 0FH, 057H, 0C0H
  52. endm
  53. movntps_edx macro Offset
  54. db 0FH, 02BH, 042H, Offset
  55. endm
  56. movaps_esp_xmm0 macro
  57. db 0FH, 029H, 004H, 024H
  58. endm
  59. movaps_xmm0_esp macro
  60. db 0FH, 028H, 004H, 024H
  61. endm
  62. endif
  63. xorps macro XMMReg1, XMMReg2
  64. db 0FH, 057H, 0C0H + (XMMReg1 * 8) + XMMReg2
  65. endm
  66. movntps macro GeneralReg, Offset, XMMReg
  67. db 0FH, 02BH, 040H + (XmmReg * 8) + GeneralReg, Offset
  68. endm
  69. sfence macro
  70. db 0FH, 0AEH, 0F8H
  71. endm
  72. movaps_load macro XMMReg, GeneralReg
  73. db 0FH, 028H, (XMMReg * 8) + 4, (4 * 8) + GeneralReg
  74. endm
  75. movaps_store macro GeneralReg, XMMReg
  76. db 0FH, 029H, (XMMReg * 8) + 4, (4 * 8) + GeneralReg
  77. endm
  78. ;
  79. ; NPX Save and Restore
  80. ;
  81. fxsave macro Register
  82. db 0FH, 0AEH, Register
  83. endm
  84. fxrstor macro Register
  85. db 0FH, 0AEH, 8+Register
  86. endm
  87. _TEXT SEGMENT DWORD PUBLIC 'CODE'
  88. ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
  89. ;++
  90. ;
  91. ; VOID
  92. ; KeZeroPages (
  93. ; IN PVOID PageBase,
  94. ; IN SIZE_T NumberOfBytes
  95. ; )
  96. ;
  97. ; Routine Description:
  98. ;
  99. ; KeZeroPages is really just a function pointer that points at
  100. ; either KiZeroPages or KiXMMIZeroPages depending on whether or
  101. ; not XMMI instructions are available.
  102. ;
  103. ; Arguments:
  104. ;
  105. ; (ecx) PageBase Base address of pages to be zeroed.
  106. ;
  107. ; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple.
  108. ;
  109. ;
  110. ; Return Value:
  111. ;
  112. ;--
  113. page ,132
  114. subttl "KiXMMIZeroPagesNoSave - Use XMMI to zero memory (XMMI owned)"
  115. ;++
  116. ;
  117. ; VOID
  118. ; KiXMMIZeroPagesNoSave (
  119. ; IN PVOID PageBase,
  120. ; IN SIZE_T NumberOfBytes
  121. ; )
  122. ;
  123. ; Routine Description:
  124. ;
  125. ; Use XMMI to zero a page of memory 16 bytes at a time while
  126. ; at the same time minimizing cache polution.
  127. ;
  128. ; Note: The XMMI register set belongs to this thread. It is neither
  129. ; saved nor restored by this procedure.
  130. ;
  131. ; Arguments:
  132. ;
  133. ; (ecx) PageBase Virtual address of the base of the page to be zeroed.
  134. ;
  135. ; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple.
  136. ;
  137. ; Return Value:
  138. ;
  139. ; None.
  140. ;
  141. ;--
  142. INNER_LOOP_BYTES equ 64
  143. INNER_LOOP_SHIFT equ 6
  144. cPublicFastCall KiXMMIZeroPagesNoSave,2
  145. cPublicFpo 0, 1
  146. xorps 0, 0 ; zero xmm0 (128 bits)
  147. shr edx, INNER_LOOP_SHIFT ; Number of Iterations
  148. inner:
  149. movntps rECX, 0, 0 ; store bytes 0 - 15
  150. movntps rECX, 16, 0 ; 16 - 31
  151. movntps rECX, 32, 0 ; 32 - 47
  152. movntps rECX, 48, 0 ; 48 - 63
  153. add ecx, 64 ; increment base
  154. dec edx ; decrement loop count
  155. jnz short inner
  156. ; Force all stores to complete before any other
  157. ; stores from this processor.
  158. sfence
  159. ifndef SFENCE_IS_NOT_BUSTED
  160. ; ERRATA the next uncached write to this processor's APIC
  161. ; may fail unless the store pipes have drained. sfence by
  162. ; itself is not enough. Force drainage now by doing an
  163. ; interlocked exchange.
  164. xchg [esp-4], edx
  165. endif
  166. fstRET KiXMMIZeroPagesNoSave
  167. fstENDP KiXMMIZeroPagesNoSave
  168. page ,132
  169. subttl "KiXMMIZeroPages - Use XMMI to zero memory"
  170. ;++
  171. ;
  172. ; VOID
  173. ; KiXMMIZeroPages (
  174. ; IN PVOID PageBase,
  175. ; IN SIZE_T NumberOfBytes
  176. ; )
  177. ;
  178. ; Routine Description:
  179. ;
  180. ; Use XMMI to zero a page of memory 16 bytes at a time. This
  181. ; routine is a wrapper around KiXMMIZeroPagesNoSave. In this
  182. ; case we don't have the luxury of not saving/restoring context.
  183. ;
  184. ; Arguments:
  185. ;
  186. ; (ecx) PageBase Virtual address of the base of the page to be zeroed.
  187. ;
  188. ; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple.
  189. ;
  190. ; Return Value:
  191. ;
  192. ; None.
  193. ;
  194. ;--
  195. cPublicFastCall KiXMMIZeroPages,2
  196. cPublicFpo 0, 2
  197. push ebp
  198. push ebx
  199. mov ebx, PCR[PcPrcbData+PbCurrentThread]
  200. mov eax, [ebx].ThInitialStack
  201. sub eax, NPX_FRAME_LENGTH
  202. mov ebp, esp ; save stack pointer
  203. sub esp, 16 ; reserve space for xmm0
  204. and esp, 0FFFFFFF0H ; 16 byte aligned
  205. cli ; don't context switch
  206. test [eax].FpCr0NpxState, CR0_EM ; if FP explicitly disabled
  207. jnz short kxzp90 ; do it the old way
  208. cmp byte ptr [ebx].ThNpxState, NPX_STATE_LOADED
  209. je short kxzp80 ; jiff, NPX stated loaded
  210. ; NPX state is not loaded on this thread, it will be by
  211. ; the time we reenable context switching.
  212. mov byte ptr [ebx].ThNpxState, NPX_STATE_LOADED
  213. ; enable use of FP instructions
  214. mov ebx, cr0
  215. and ebx, NOT (CR0_MP+CR0_TS+CR0_EM)
  216. mov cr0, ebx ; enable NPX
  217. ifdef NT_UP
  218. ; if this is a UP machine, the state might be loaded for
  219. ; another thread in which case it needs to be saved.
  220. mov ebx, PCR[PcPrcbData+PbNpxThread]; Owner of NPX state
  221. or ebx, ebx ; NULL?
  222. jz short @f ; yes, skip save.
  223. mov byte ptr [ebx].ThNpxState, NPX_STATE_NOT_LOADED
  224. mov ebx, [ebx].ThInitialStack ; get address of save
  225. sub ebx, NPX_FRAME_LENGTH ; area.
  226. fxsave rEBX ; save NPX
  227. @@:
  228. endif
  229. ; Now load the NPX context for this thread. This is because
  230. ; if we switch away from this thread it will get saved again
  231. ; in this save area and destroying it would be bad.
  232. fxrstor rEAX
  233. mov eax, PCR[PcPrcbData+PbCurrentThread]
  234. mov PCR[PcPrcbData+PbNpxThread], eax
  235. kxzp80:
  236. sti ; reenable context switching
  237. movaps_store rESP, 0 ; save xmm0
  238. fstCall KiXMMIZeroPagesNoSave ; zero the page
  239. movaps_load 0, rESP ; restore xmm
  240. ; restore stack pointer, non-volatiles and return
  241. mov esp, ebp
  242. pop ebx
  243. pop ebp
  244. fstRET KiXMMIZeroPages
  245. ; FP is explicitly disabled for this thread (probably a VDM
  246. ; thread). Restore stack pointer, non-volatiles and jump into
  247. ; KiZeroPage to do the work the old fashioned way.
  248. kxzp90:
  249. sti
  250. mov esp, ebp
  251. pop ebx
  252. pop ebp
  253. jmp short @KiZeroPages@8
  254. fstENDP KiXMMIZeroPages
  255. page ,132
  256. subttl "KiZeroPages - Available to all X86 processors"
  257. ;++
  258. ;
  259. ; KiZeroPages(
  260. ; PVOID PageBase,
  261. ; IN SIZE_T NumberOfBytes
  262. ; )
  263. ;
  264. ; Routine Description:
  265. ;
  266. ; Generic Zero Page routine, used on processors that don't have
  267. ; a more efficient way to zero large blocks of memory.
  268. ; (Same as RtlZeroMemory).
  269. ;
  270. ; Arguments:
  271. ;
  272. ; (ecx) PageBase Base address of page to be zeroed.
  273. ;
  274. ; (edx) NumberOfBytes Number of bytes to be zeroed. Always a PAGE_SIZE multiple.
  275. ;
  276. ; Return Value:
  277. ;
  278. ; None.
  279. ;
  280. ;--
  281. cPublicFastCall KiZeroPages,2
  282. cPublicFpo 0, 0
  283. push edi ; save EDI (non-volatile)
  284. xor eax, eax ; 32 bit zero
  285. mov edi, ecx ; setup for repsto
  286. mov ecx, edx ; number of bytes
  287. shr ecx, 2 ; iteration count
  288. ; store eax, ecx times starting at edi
  289. rep stosd
  290. pop edi ; restore edi and return
  291. fstRET KiZeroPages
  292. fstENDP KiZeroPages
  293. _TEXT ends
  294. end