Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

643 lines
21 KiB

  1. title "Memory functions"
  2. ;++
  3. ;
  4. ; Copyright (c) 2000 Microsoft Corporation
  5. ;
  6. ; Module Name:
  7. ;
  8. ; movemem.asm
  9. ;
  10. ; Abstract:
  11. ;
  12. ; This module implements functions to fill, copy , and compare blocks of
  13. ; memory.
  14. ;
  15. ; Author:
  16. ;
  17. ; David N. Cutler (davec) 6-Jul-2000
  18. ;
  19. ; Environment:
  20. ;
  21. ; Any mode.
  22. ;
  23. ;--
  24. include ksamd64.inc
  25. altentry RtlCopyMemoryAlternate
  26. subttl "Compare Memory"
  27. ;++
  28. ;
  29. ; SIZE_T
  30. ; RtlCompareMemory (
  31. ; IN PVOID Source1,
  32. ; IN PVOID Source2,
  33. ; IN SIZE_T Length
  34. ; )
  35. ;
  36. ; Routine Description:
  37. ;
  38. ; This function compares two unaligned blocks of memory and returns the
  39. ; number of bytes that compared equal.
  40. ;
  41. ; Arguments:
  42. ;
  43. ; Source1 (rcx) - Supplies a pointer to the first block of memory to
  44. ; compare.
  45. ;
  46. ; Source2 (rdx) - Supplies a pointer to the second block of memory to
  47. ; compare.
  48. ;
  49. ; Length (r8) - Supplies the Length, in bytes, of the memory to be
  50. ; compared.
  51. ;
  52. ; Return Value:
  53. ;
  54. ; The number of bytes that compared equal is returned as the function
  55. ; value. If all bytes compared equal, then the length of the orginal
  56. ; block of memory is returned.
  57. ;
  58. ;--
  59. NESTED_ENTRY RtlCompareMemory, _TEXT$00
  60. push_reg rdi ; save nonvolatile registers
  61. push_reg rsi ;
  62. END_PROLOGUE
  63. mov rsi, rcx ; set address of first string
  64. mov rdi, rdx ; set address of second string
  65. xor edx, ecx ; check if compatible alignment
  66. and edx, 07h ;
  67. jnz short RlCM50 ; if nz, incompatible alignment
  68. cmp r8, 8 ; check if length to align
  69. jb short RlCM50 ; if b, insufficient alignment length
  70. ;
  71. ; Buffer alignment is compatible and there are enough bytes for alignment.
  72. ;
  73. mov r9, rdi ; copy destination address
  74. neg ecx ; compute alignment length
  75. and ecx, 07h ;
  76. jz short RlCM10 ; if z, buffers already aligned
  77. sub r8, rcx ; reduce count by align length
  78. repe cmpsb ; compare bytes to alignment
  79. jnz short RlCM30 ; if nz, not all bytes matched
  80. RlCM10: mov rcx, r8 ;
  81. and rcx, -8 ; check if any quarwords to compare
  82. jz short RlCM20 ; if z, no quadwords to compare
  83. sub r8, rcx ; reduce length by compare count
  84. shr rcx, 3 ; compute number of quadwords
  85. repe cmpsq ; compare quadwords
  86. jz short RlCM20 ; if z, all quadwords compared
  87. inc rcx ; increment remaining count
  88. sub rsi, 8 ; back up source address
  89. sub rdi, 8 ; back up destination address
  90. shl rcx, 3 ; compute uncompared bytes
  91. RlCM20: add r8, rcx ; compute residual bytes to compare
  92. jz short RlCM40 ; if z, all bytes compared equal
  93. mov rcx, r8 ; set remaining bytes to compare
  94. repe cmpsb ; compare bytes
  95. jz short RlCM40 ; if z, all byte compared equal
  96. RlCM30: dec rdi ; back up destination address
  97. RlCM40: sub rdi, r9 ; compute number of bytes matched
  98. mov rax, rdi ;
  99. pop rsi ; restore nonvolatile register
  100. pop rdi ;
  101. ret ; return
  102. ;
  103. ; Buffer alignment is incompatible or there is less than 8 bytes to compare.
  104. ;
  105. RlCM50: test r8, r8 ; test if any bytes to compare
  106. jz short RlCM60 ; if z, no bytes to compare
  107. mov rcx, r8 ; set number of bytes to compare
  108. repe cmpsb ; compare bytes
  109. jz short RlCM60 ; if z, all bytes compared equal
  110. inc rcx ; increment remaining count
  111. sub r8, rcx ; compute number of bytes matched
  112. RlCM60: mov rax, r8 ;
  113. pop rsi ; restore nonvolatile register
  114. pop rdi ;
  115. ret ; return
  116. NESTED_END RtlCompareMemory, _TEXT$00
  117. subttl "Compare Memory 32-bits"
  118. ;++
  119. ;
  120. ; SIZE_T
  121. ; RtlCompareMemoryUlong (
  122. ; IN PVOID Source,
  123. ; IN SIZE_T Length,
  124. ; IN ULONG Pattern
  125. ; )
  126. ;
  127. ; Routine Description:
  128. ;
  129. ; This function compares a block of dword aligned memory with a specified
  130. ; pattern 32-bits at a time.
  131. ;
  132. ; N.B. The low two bits of the length are assumed to be zero and are
  133. ; ignored.
  134. ;
  135. ; Arguments:
  136. ;
  137. ; Source (rcx) - Supplies a pointer to the block of memory to compare.
  138. ;
  139. ; Length (rdx) - Supplies the length, in bytes, of the memory to compare. compare.
  140. ;
  141. ; Pattern (r8d) - Supplies the pattern to be compared against.
  142. ;
  143. ; Return Value:
  144. ;
  145. ; The number of bytes that compared equal is returned as the function
  146. ; value. If all bytes compared equal, then the length of the orginal
  147. ; block of memory is returned.
  148. ;
  149. ;--
  150. NESTED_ENTRY RtlCompareMemoryUlong, _TEXT$00
  151. push_reg rdi ; save nonvolatile register
  152. END_PROLOGUE
  153. mov rdi, rcx ; set destination address
  154. shr rdx, 2 ; compute number of dwords
  155. jz short RlCU10 ; if z, no dwords to compare
  156. mov rcx, rdx ; set length of compare in dwords
  157. mov eax, r8d ; set comparison pattern
  158. repe scasd ; compare memory with pattern
  159. jz short RlCU10 ; if z, all dwords compared
  160. inc rcx ; increment remaining count
  161. sub rdx, rcx ; compute number of bytes matched
  162. RlCU10: lea rax, [rdx*4] ; compute successful compare in bytes
  163. pop rdi ; restore nonvolatile register
  164. ret ; return
  165. NESTED_END RtlCompareMemoryUlong, _TEXT$00
  166. subttl "Copy Memory"
  167. ;++
  168. ;
  169. ; VOID
  170. ; RtlCopyMemory (
  171. ; OUT VOID UNALIGNED *Destination,
  172. ; IN CONST VOID UNALIGNED * Sources,
  173. ; IN SIZE_T Length
  174. ; )
  175. ;
  176. ; Routine Description:
  177. ;
  178. ; This function copies nonoverlapping from one unaligned buffer to another.
  179. ;
  180. ; Arguments:
  181. ;
  182. ; Destination (rcx) - Supplies a pointer to the destination buffer.
  183. ;
  184. ; Sources (rdx) - Supplies a pointer to the source buffer.
  185. ;
  186. ; Length (r8) - Supplies the length, in bytes, of the copy operation.
  187. ;
  188. ; Return Value:
  189. ;
  190. ; None.
  191. ;
  192. ;--
  193. NESTED_ENTRY RtlCopyMemory, _TEXT$00
  194. push_reg rdi ; save nonvolatile registers
  195. push_reg rsi ;
  196. END_PROLOGUE
  197. ALTERNATE_ENTRY RtlCopyMemoryAlternate
  198. mov rdi, rcx ; set destination address
  199. mov rsi, rdx ; set source address
  200. ;
  201. ; Check for quadword alignment compatibility.
  202. ;
  203. xor edx, ecx ; check if compatible alignment
  204. and edx, 07h ;
  205. jnz short RlCP40 ; is nz, incompatible alignment
  206. cmp r8, 8 ; check if 8 bytes to move
  207. jb short RlCP20 ; if b, less than 8 bytes to move
  208. ;
  209. ; Buffer alignment is compatible and there are enough bytes for alignment.
  210. ;
  211. neg ecx ; compute alignment length
  212. and ecx, 07h ;
  213. jz short RlCP10 ; if z, buffers already aligned
  214. sub r8, rcx ; reduce count by align length
  215. rep movsb ; move bytes to alignment
  216. ;
  217. ; Move 8-byte blocks.
  218. ;
  219. RlCP10: mov rcx, r8 ; compute number of 8-byte blocks
  220. and rcx, -8 ;
  221. jz short RlCP20 ; if z, no 8-byte blocks
  222. sub r8, rcx ; subtract 8-byte blocks from count
  223. shr rcx, 3 ; compute number of 8-byte blocks
  224. rep movsq ; move 8-byte blocks
  225. ;
  226. ; Move residual bytes.
  227. ;
  228. RlCP20: test r8, r8 ; test if any bytes to move
  229. jz short RlCP30 ; if z, no bytes to move
  230. mov rcx, r8 ; set remaining byte to move
  231. rep movsb ; move bytes to destination
  232. RlCP30: pop rsi ; restore nonvolatile registers
  233. pop rdi ;
  234. ret ; return
  235. ;
  236. ; The source and destination are not quadword alignment compatible.
  237. ;
  238. ; Check for doubleword alignment compatibility.
  239. ;
  240. RlCP40: and edx, 03h ; check if compatibile alignment
  241. jnz short RlCP60 ; is nz, incompatible alignment
  242. cmp r8, 4 ; check if 4 bytes to move
  243. jb short RlCP20 ; if b, less than 4 bytes to move
  244. ;
  245. ; Buffer alignment is compatible and there are enough bytes for alignment.
  246. ;
  247. neg ecx ; compute alignment length
  248. and ecx, 03h ;
  249. jz short RlCP50 ; if z, buffers already aligned
  250. sub r8, rcx ; reduce count by align length
  251. rep movsb ; move bytes to alignment
  252. ;
  253. ; Move 4-byte blocks.
  254. ;
  255. RlCP50: mov rcx, r8 ; compute number of 4-byte blocks
  256. and rcx, -4 ;
  257. jz short RlCP20 ; if z, no 4-byte blocks
  258. sub r8, rcx ; subtract 4-byte blocks from count
  259. shr rcx, 2 ; compute number of 4-byte blocks
  260. rep movsd ; move 4-byte blocks
  261. jmp short RlCP20 ; finish in common code
  262. ;
  263. ; The source and destination are not doubleword alignment compatible.
  264. ;
  265. ; Check for word alignment compatibility.
  266. ;
  267. RlCP60: and edx, 01h ; check if compatibile alignment
  268. jnz short RlCP20 ; is nz, incompatible alignment
  269. cmp r8, 2 ; check if 2 bytes to move
  270. jb short RlCP20 ; if b, less than 2 bytes to move
  271. ;
  272. ; Buffer alignment is compatible and there are enough bytes for alignment.
  273. ;
  274. neg ecx ; compute alignment length
  275. and ecx, 01h ;
  276. jz short RlCP70 ; if z, buffers already aligned
  277. sub r8, rcx ; reduce count by align length
  278. rep movsb ; move bytes to alignment
  279. ;
  280. ; Move 2-byte blocks.
  281. ;
  282. RlCP70: mov rcx, r8 ; compute number of 2-byte blocks
  283. and rcx, -2 ;
  284. jz short RlCP20 ; if z, no 2-byte blocks
  285. sub r8, rcx ; subtract 2-byte blocks from count
  286. shr rcx, 1 ; compute number of 2-byte blocks
  287. rep movsw ; move 2-byte blocks
  288. jmp short RlCP20 ; finish in common code
  289. NESTED_END RtlCopyMemory, _TEXT$00
  290. subttl "Copy Memory NonTemporal"
  291. ;++
  292. ;
  293. ; VOID
  294. ; RtlCopyMemoryNonTemporal (
  295. ; OUT VOID UNALIGNED *Destination,
  296. ; IN CONST VOID UNALIGNED * Sources,
  297. ; IN SIZE_T Length
  298. ; )
  299. ;
  300. ; Routine Description:
  301. ;
  302. ; This function copies nonoverlapping from one buffer to another using
  303. ; nontemporal moves that do not polute the cache.
  304. ;
  305. ; Arguments:
  306. ;
  307. ; Destination (rcx) - Supplies a pointer to the destination buffer.
  308. ;
  309. ; Sources (rdx) - Supplies a pointer to the source buffer.
  310. ;
  311. ; Length (r8) - Supplies the length, in bytes, of the copy operation.
  312. ;
  313. ; Return Value:
  314. ;
  315. ; None.
  316. ;
  317. ;--
  318. NESTED_ENTRY RtlCopyMemoryNonTemporal, _TEXT$00
  319. push_reg rdi ; save nonvolatile registers
  320. push_reg rsi ;
  321. END_PROLOGUE
  322. mov rdi, rcx ; set destination address
  323. mov rsi, rdx ; set source address
  324. cmp r8, 16 ; check if 16 bytes to move
  325. jb RlNT50 ; if b, less than 16 bytes to move
  326. ;
  327. ; Align the destination to a 16-byte boundary.
  328. ;
  329. neg ecx ; compute alignment length
  330. and ecx, 0fh ;
  331. jz short RlNT10 ; if z, destination already aligned
  332. sub r8, rcx ; reduce count by align length
  333. rep movsb ; move bytes to alignment
  334. ;
  335. ; Move 64-byte blocks.
  336. ;
  337. RlNT10: mov rax, r8 ; compute number of 64-byte blocks
  338. and rax, -64 ;
  339. jz short RlNT30 ; if z, no 64-byte blocks to move
  340. sub r8, rax ; subtract 64-byte blocks from count
  341. RlNT20: prefetchnta 0[rsi] ; prefetch start of source block
  342. prefetchnta 63[rsi] ; prefetch end source block
  343. movdqu xmm0, [rsi] ; move 64-byte block
  344. movdqu xmm1, 16[rsi] ;
  345. movdqu xmm2, 32[rsi] ;
  346. movdqu xmm3, 48[rsi] ;
  347. movntdq [rdi], xmm0 ;
  348. movntdq 16[rdi], xmm1 ;
  349. movntdq 32[rdi], xmm2 ;
  350. movntdq 48[rdi], xmm3 ;
  351. add rdi, 64 ; advance destination address
  352. add rsi, 64 ; advance source address
  353. sub rax, 64 ; subtract number of bytes moved
  354. jnz short RlNT20 ; if nz, more 64-byte blocks to move
  355. ;
  356. ; Move 16-byte blocks.
  357. ;
  358. RlNT30: mov rax, r8 ; compute number of 16-byte blocks
  359. and rax, -16 ;
  360. jz short RlNT50 ; if z, no 16-byte blocks
  361. sub r8, rax ; subract 16-byte blocks from count
  362. RlNT40: movdqu xmm0, [rsi] ; move 16-byte block
  363. movntdq [rdi], xmm0 ;
  364. add rdi, 16 ; advance destination address
  365. add rsi, 16 ; advance source address
  366. sub rax, 16 ; subtract number of bytes moved
  367. jnz short RlNT40 ; if nz, more 16-byte blocks to move
  368. ;
  369. ; Move residual bytes.
  370. ;
  371. RlNT50: test r8, r8 ; test if any bytes to move
  372. jz short RlNT60 ; if z, no bytes to move
  373. mov rcx, r8 ; set residual bytes to move
  374. rep movsb ; move residual bytes
  375. RlNT60: sfence ; make sure all stores complete
  376. pop rsi ; restore nonvolatile registers
  377. pop rdi ;
  378. ret ; return
  379. NESTED_END RtlCopyMemoryNonTemporal, _TEXT$00
  380. subttl "Fill Memory"
  381. ;++
  382. ;
  383. ; VOID
  384. ; RtlFillMemory (
  385. ; IN VOID UNALIGNED *Destination,
  386. ; IN SIZE_T Length,
  387. ; IN UCHAR Fill
  388. ; )
  389. ;
  390. ; Routine Description:
  391. ;
  392. ; This function fills a block of unaligned memory with a specified pattern.
  393. ;
  394. ; Arguments:
  395. ;
  396. ; Destination (rcx) - Supplies a pointer to the memory to fill.
  397. ;
  398. ; Length (rdx) - Supplies the length, in bytes, of the memory to fill.
  399. ;
  400. ; Fill (r8d) - Supplies the value to fill memory with.
  401. ;
  402. ; Return Value:
  403. ;
  404. ; None.
  405. ;
  406. ;--
  407. NESTED_ENTRY RtlFillMemory, _TEXT$00
  408. push_reg rdi ; save nonvolatile register
  409. END_PROLOGUE
  410. mov rdi, rcx ; set destination address
  411. mov eax, r8d ; set fill pattern
  412. cmp rdx, 8 ; check if 8 bytes to fill
  413. jb short RlFM20 ; if b, less than 8 bytes to fill
  414. ;
  415. ; Fill alignment bytes.
  416. ;
  417. neg ecx ; compute alignment length
  418. and ecx, 07h ;
  419. jz short RlFM10 ; if z, buffers already aligned
  420. sub rdx, rcx ; reduce count by align length
  421. rep stosb ; fill bytes to alignment
  422. ;
  423. ; Fill 8-byte blocks.
  424. ;
  425. RlFM10: mov rcx, rdx ; compute number of 8-byte blocks
  426. and rcx, -8 ;
  427. jz short RlFM20 ; if z, no 8-byte blocks
  428. sub rdx, rcx ; subtract 8-byte blocks from count
  429. shr rcx, 3 ; compute number of 8-byte blocks
  430. mov ah, al ; replicate pattern to dword
  431. shl eax, 16 ;
  432. mov al, r8b ;
  433. mov ah, al ;
  434. mov r9, rax ;
  435. shl rax, 32 ;
  436. or rax, r9 ;
  437. rep stosq ; fill 8-byte blocks
  438. ;
  439. ; Fill residual bytes.
  440. ;
  441. RlFM20: test rdx, rdx ; test if any bytes to fill
  442. jz short RlFM30 ; if z, no bytes to fill
  443. mov rcx, rdx ; set remaining byte to fill
  444. rep stosb ; fill residual bytes
  445. RlFM30: pop rdi ; restore nonvolatile register
  446. ret ; return
  447. NESTED_END RtlFillMemory, _TEXT$00
  448. subttl "Move Memory"
  449. ;++
  450. ;
  451. ; VOID
  452. ; RtlMoveMemory (
  453. ; OUT VOID UNALIGNED *Destination,
  454. ; IN CONST VOID UNALIGNED * Sources,
  455. ; IN SIZE_T Length
  456. ; )
  457. ;
  458. ; Routine Description:
  459. ;
  460. ; This function copies from one unaligned buffer to another.
  461. ;
  462. ; Arguments:
  463. ;
  464. ; Destination (rcx) - Supplies a pointer to the destination buffer.
  465. ;
  466. ; Sources (rdx) - Supplies a pointer to the source buffer.
  467. ;
  468. ; Length (r8) - Supplies the length, in bytes, of the copy operation.
  469. ;
  470. ; Return Value:
  471. ;
  472. ; None.
  473. ;
  474. ;--
  475. NESTED_ENTRY RtlMoveMemory, _TEXT$00
  476. push_reg rdi ; save nonvolatile registers
  477. push_reg rsi ;
  478. END_PROLOGUE
  479. cmp rcx, rdx ; check if possible buffer overlap
  480. jbe RtlCopyMemoryAlternate ; if be, no overlap possible
  481. mov rsi, rdx ; compute ending source address
  482. add rsi, r8 ;
  483. dec rsi ;
  484. cmp rcx, rsi ; check for buffer overlap
  485. ja RtlCopyMemoryAlternate ; if g, no overlap possible
  486. mov rdi, rcx ; compute ending destination address
  487. add rdi, r8 ;
  488. dec rdi ;
  489. mov rcx, r8 ; set count of bytes to move
  490. std ; set direction flag
  491. rep movsb ; move bytes backward to destination
  492. cld ; clear direction flag
  493. pop rsi ; restore nonvolatile registers
  494. pop rdi ;
  495. ret ; return
  496. NESTED_END RtlMoveMemory, _TEXT$00
  497. subttl "Prefetch Memory NonTemporal"
  498. ;++
  499. ;
  500. ; VOID
  501. ; RtlPrefetchMemoryNonTemporal (
  502. ; IN CONST PVOID Source,
  503. ; IN SIZE_T Length
  504. ; )
  505. ;
  506. ; Routine Description:
  507. ;
  508. ; This function prefetches memory at Source, for Length bytes into the
  509. ; closest cache to the processor.
  510. ;
  511. ; Arguments:
  512. ;
  513. ; Source (rcx) - Supplies a pointer to the memory to be prefetched.
  514. ;
  515. ; Length (rdx) - Supplies the length, in bytes, of the operation.
  516. ;
  517. ; Return Value:
  518. ;
  519. ; None.
  520. ;
  521. ;--
  522. LEAF_ENTRY RtlPrefetchMemoryNonTemporal, _TEXT$00
  523. RlPF10: prefetchnta 0[rcx] ; prefetch line
  524. add rcx, 64 ; increment address to prefetch
  525. sub rdx, 64 ; subtract number of bytes prefetched
  526. ja RlPF10 ; if above zero, more bytes to move
  527. ret ; return
  528. LEAF_END RtlPrefetchMemoryNonTemporal, _TEXT$00
  529. subttl "Zero Memory"
  530. ;++
  531. ;
  532. ; VOID
  533. ; RtlZeroMemory (
  534. ; IN VOID UNALIGNED *Destination,
  535. ; IN SIZE_T Length
  536. ; )
  537. ;
  538. ; Routine Description:
  539. ;
  540. ; This function fills a block of unaligned memory with zero.
  541. ;
  542. ; Arguments:
  543. ;
  544. ; Destination (rcx) - Supplies a pointer to the memory to fill.
  545. ;
  546. ; Length (rdx) - Supplies the length, in bytes, of the memory to fill.
  547. ;
  548. ; Return Value:
  549. ;
  550. ; None.
  551. ;
  552. ;--
  553. LEAF_ENTRY RtlZeroMemory, _TEXT$00
  554. xor r8, r8 ; set fill pattern
  555. jmp RtlFillMemory ; finish in common code
  556. LEAF_END RtlZeroMemory, _TEXT$00
  557. end