Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

507 lines
11 KiB

  1. //
  2. // Module Name:
  3. //
  4. // fillmem.s
  5. //
  6. // Abstract:
  7. //
  8. // This module implements functions to move, zero, and fill blocks
  9. // of memory. If the memory is aligned, then these functions are
  10. // very efficient.
  11. //
  12. // Author:
  13. //
  14. //
  15. // Environment:
  16. //
  17. // User or Kernel mode.
  18. //
  19. //--
  20. #include "ksia64.h"
  21. //++
  22. //
  23. // VOID
  24. // RtlFillMemory (
  25. // IN PVOID destination,
  26. // IN SIZE_T length,
  27. // IN UCHAR fill
  28. // )
  29. //
  30. // Routine Description:
  31. //
  32. // This function fills memory by first aligning the destination address to
  33. // a qword boundary, and then filling 4-byte blocks, followed by any
  34. // remaining bytes.
  35. //
  36. // Arguments:
  37. //
  38. // destination (a0) - Supplies a pointer to the memory to fill.
  39. //
  40. // length (a1) - Supplies the length, in bytes, of the memory to be filled.
  41. //
  42. // fill (a2) - Supplies the fill byte.
  43. //
  44. // N.B. The alternate entry memset expects the length and fill arguments
  45. // to be reversed. It also returns the Destination pointer
  46. //
  47. // Return Value:
  48. //
  49. // None.
  50. //
  51. //--
  52. LEAF_ENTRY(RtlFillMemory)
  53. lfetch.excl [a0]
  54. mov t0 = a0
  55. add t4 = 64, a0
  56. cmp.eq pt0 = zero, a1 // length == 0 ?
  57. add t1 = -1, a0
  58. zxt1 a2 = a2
  59. cmp.ge pt1 = 7, a1
  60. mov v0 = a0
  61. (pt0) br.ret.spnt brp // return if length is zero
  62. ;;
  63. //
  64. // Align address on qword boundary by determining the number of bytes
  65. // before the next qword boundary by performing an AND operation on
  66. // the 2's complement of the address with a mask value of 0x7.
  67. //
  68. lfetch.excl [t4], 64
  69. andcm t1 = 7, t1 // t1 = # bytes before dword boundary
  70. (pt1) br.cond.spnt TailSet // 1 <= length <= 3, br to TailSet
  71. ;;
  72. cmp.eq pt2 = zero, t1 // skip HeadSet if t1 is zero
  73. mux1 t2 = a2, @brcst // t2 = all 8 bytes = [fill]
  74. sub a1 = a1, t1 // a1 = adjusted length
  75. ;;
  76. lfetch.excl [t4], 64
  77. (pt2) br.cond.sptk SkipHeadSet
  78. //
  79. // Copy the leading bytes until t1 is equal to zero
  80. //
  81. HeadSet:
  82. st1 [t0] = a2, 1
  83. add t1 = -1, t1
  84. ;;
  85. cmp.ne pt0 = zero, t1
  86. (pt0) br.cond.sptk HeadSet
  87. //
  88. // now the address is qword aligned;
  89. // fall into the QwordSet loop if remaining length is greater than 8;
  90. // else skip the QwordSet loop
  91. //
  92. SkipHeadSet:
  93. cmp.gt pt1 = 16, a1
  94. add t4 = 64, t0
  95. cmp.le pt2 = 8, a1
  96. add t3 = 8, t0
  97. cmp.gt pt3 = 64, a1
  98. (pt1) br.cond.spnt SkipQwordSet
  99. ;;
  100. lfetch.excl [t4], 64
  101. (pt3) br.cond.spnt QwordSet
  102. nop.m 0
  103. nop.m 0
  104. nop.i 0
  105. UnrolledQwordSet:
  106. st8 [t0] = t2, 16
  107. st8 [t3] = t2, 16
  108. add a1 = -64, a1
  109. ;;
  110. st8 [t0] = t2, 16
  111. st8 [t3] = t2, 16
  112. cmp.le pt0 = 64, a1
  113. ;;
  114. st8 [t0] = t2, 16
  115. st8 [t3] = t2, 16
  116. cmp.le pt2 = 8, a1
  117. ;;
  118. st8 [t0] = t2, 16
  119. nop.f 0
  120. cmp.gt pt1 = 16, a1
  121. st8 [t3] = t2, 16
  122. (pt0) br.cond.sptk UnrolledQwordSet
  123. (pt1) br.cond.spnt SkipQwordSet
  124. ;;
  125. //
  126. // fill 8 bytes at a time until the remaining length is less than 8
  127. //
  128. QwordSet:
  129. st8 [t0] = t2, 16
  130. st8 [t3] = t2, 16
  131. add a1 = -16, a1
  132. ;;
  133. cmp.le pt0 = 16, a1
  134. cmp.le pt2 = 8, a1
  135. (pt0) br.cond.sptk QwordSet
  136. ;;
  137. SkipQwordSet:
  138. (pt2) st8 [t0] = t2, 8
  139. (pt2) add a1 = -8, a1
  140. ;;
  141. cmp.eq pt3 = zero, a1 // return now if length equals 0
  142. (pt3) br.ret.sptk brp
  143. ;;
  144. //
  145. // copy the remaining bytes one at a time
  146. //
  147. TailSet:
  148. st1 [t0] = a2, 1
  149. add a1 = -1, a1
  150. nop.i 0
  151. ;;
  152. cmp.ne pt0, pt3 = 0, a1
  153. (pt0) br.cond.dptk TailSet
  154. (pt3) br.ret.dpnt brp
  155. ;;
  156. LEAF_EXIT(RtlFillMemory)
  157. //++
  158. //
  159. // VOID
  160. // RtlFillMemoryUlong (
  161. // IN PVOID Destination,
  162. // IN SIZE_T Length,
  163. // IN ULONG Pattern
  164. // )
  165. //
  166. // Routine Description:
  167. //
  168. // This function fills memory with the specified longowrd pattern
  169. // 4 bytes at a time.
  170. //
  171. // N.B. This routine assumes that the destination address is aligned
  172. // on a longword boundary and that the length is an even multiple
  173. // of longwords.
  174. //
  175. // Arguments:
  176. //
  177. // Destination (a0) - Supplies a pointer to the memory to fill.
  178. //
  179. // Length (a1) - Supplies the length, in bytes, of the memory to be filled.
  180. //
  181. // Pattern (a2) - Supplies the fill pattern.
  182. //
  183. // Return Value:
  184. //
  185. // None.
  186. //
  187. //--
  188. LEAF_ENTRY(RtlFillMemoryUlong)
  189. .prologue
  190. .save ar.lc, t22
  191. mov t22 = ar.lc
  192. extr.u a1 = a1, 2, 30
  193. ;;
  194. PROLOGUE_END
  195. cmp.eq pt0, pt1 = zero, a1
  196. add a1 = -1, a1
  197. ;;
  198. nop.m 0
  199. (pt1) mov ar.lc = a1
  200. (pt0) br.ret.spnt brp
  201. ;;
  202. Rfmu10:
  203. st4 [a0] = a2, 4
  204. br.cloop.dptk.few Rfmu10
  205. ;;
  206. nop.m 0
  207. mov ar.lc = t22
  208. br.ret.sptk brp
  209. LEAF_EXIT(RtlFillMemoryUlong)
  210. //++
  211. //
  212. // VOID
  213. // RtlFillMemoryUlonglong (
  214. // IN PVOID Destination,
  215. // IN SIZE_T Length,
  216. // IN ULONGLONG Pattern
  217. // )
  218. //
  219. // Routine Description:
  220. //
  221. // This function fills memory with the specified pattern
  222. // 8 bytes at a time.
  223. //
  224. // N.B. This routine assumes that the destination address is aligned
  225. // on a longword boundary and that the length is an even multiple
  226. // of longwords.
  227. //
  228. // Arguments:
  229. //
  230. // Destination (a0) - Supplies a pointer to the memory to fill.
  231. //
  232. // Length (a1) - Supplies the length, in bytes, of the memory to be filled.
  233. //
  234. // Pattern (a2,a3) - Supplies the fill pattern.
  235. //
  236. // Return Value:
  237. //
  238. // None.
  239. //
  240. //--
  241. LEAF_ENTRY(RtlFillMemoryUlonglong)
  242. .prologue
  243. .save ar.lc, t22
  244. mov t22 = ar.lc
  245. extr.u a1 = a1, 3, 29
  246. ;;
  247. PROLOGUE_END
  248. cmp.eq pt0, pt1 = zero, a1
  249. add a1 = -1, a1
  250. ;;
  251. nop.m 0
  252. (pt1) mov ar.lc = a1
  253. (pt0) br.ret.spnt brp
  254. ;;
  255. Rfmul10:
  256. st8 [a0] = a2, 8
  257. br.cloop.dptk.few Rfmul10
  258. ;;
  259. nop.m 0
  260. mov ar.lc = t22
  261. br.ret.sptk brp
  262. ;;
  263. LEAF_EXIT(RtlFillMemoryUlonglong)
  264. //++
  265. //
  266. // VOID
  267. // RtlZeroMemory (
  268. // IN PVOID Destination,
  269. // IN SIZE_T Length
  270. // )
  271. //
  272. // Routine Description:
  273. //
  274. // This function simply sets up the fill value (out2) and branches
  275. // directly to RtlFillMemory
  276. //
  277. // Arguments:
  278. //
  279. // Destination (a0) - Supplies a pointer to the memory to zero.
  280. //
  281. // Length (a1) - Supplies the length, in bytes, of the memory to be zeroed.
  282. //
  283. // Return Value:
  284. //
  285. // None.
  286. //
  287. //--
  288. LEAF_ENTRY(RtlZeroMemory)
  289. alloc t22 = ar.pfs, 0, 0, 3, 0
  290. mov out2 = 0
  291. br RtlFillMemory
  292. LEAF_EXIT(RtlZeroMemory)
  293. //++
  294. //
  295. // RtlCompareMemory
  296. //
  297. //--
  298. LEAF_ENTRY(RtlCompareMemory)
  299. cmp.eq pt0 = 0, a2
  300. mov v0 = 0
  301. (pt0) br.ret.spnt.many brp
  302. ;;
  303. add t2 = -1, a2
  304. Rcmp10:
  305. ld1 t0 = [a0], 1
  306. ld1 t1 = [a1], 1
  307. ;;
  308. cmp4.eq pt2 = t0, t1
  309. ;;
  310. (pt2) cmp.ne.unc pt1 = v0, t2
  311. (pt2) add v0 = 1, v0
  312. (pt1) br.cond.dptk.few Rcmp10
  313. br.ret.sptk.many brp
  314. LEAF_EXIT(RtlCompareMemory)
  315. //++
  316. //
  317. // VOID
  318. // RtlCopyIa64FloatRegisterContext (
  319. // PFLOAT128 Destination,
  320. // PFLOAT128 Source,
  321. // ULONGLONG Length
  322. // )
  323. //
  324. // Routine Description:
  325. //
  326. // This routine copies floating point context from one place to
  327. // another. It assumes both the source and the destination are
  328. // 16-byte aligned and the buffer contains only memory image of
  329. // floating point registers. Note that Length must be greater
  330. // than 0 and a multiple of 32.
  331. //
  332. // Arguments:
  333. //
  334. // a0 - Destination
  335. // a1 - Source
  336. // a2 - Length
  337. //
  338. // Return Value:
  339. //
  340. // None.
  341. //
  342. //--
  343. NESTED_ENTRY(RtlCopyIa64FloatRegisterContext)
  344. .prologue
  345. .save ar.lc, t22
  346. mov t22 = ar.lc
  347. shr t0 = a2, 5
  348. ;;
  349. cmp.gtu pt0, pt1 = 32, a2
  350. add t0 = -1, t0
  351. add t1 = 16, a1
  352. ;;
  353. PROLOGUE_END
  354. #if DBG
  355. and t4 = 0x1f, a2
  356. ;;
  357. cmp.ne pt2 = 0, t4
  358. ;;
  359. (pt2) break.i BREAKPOINT_STOP
  360. #endif
  361. add t2 = 16, a0
  362. (pt1) mov ar.lc = t0
  363. (pt0) br.ret.spnt brp
  364. Rcf10:
  365. ldf.fill ft0 = [a1], 32
  366. ldf.fill ft1 = [t1], 32
  367. nop.i 0
  368. ;;
  369. stf.spill [a0] = ft0, 32
  370. stf.spill [t2] = ft1, 32
  371. br.cloop.dptk Rcf10
  372. ;;
  373. nop.m 0
  374. mov ar.lc = t22
  375. br.ret.sptk brp
  376. ;;
  377. NESTED_EXIT(RtlCopyIa64FloatRegisterContext)
  378. //++
  379. //
  380. // VOID
  381. // RtlPrefetchMemoryNonTemporal (
  382. // IN PVOID Source,
  383. // IN SIZE_T Length
  384. // )
  385. //
  386. // Routine Description:
  387. //
  388. // This routine prefetches memory at Source, for Length bytes into
  389. // the closest cache to the processor.
  390. //
  391. // N.B. Currently this code assumes a line size of 32 bytes. At
  392. // some stage it should be altered to determine and use the processor's
  393. // actual line size.
  394. //
  395. // Arguments:
  396. //
  397. // a0 - Source
  398. // a1 - Length
  399. //
  400. // Return Value:
  401. //
  402. // None.
  403. //
  404. //--
  405. LEAF_ENTRY(RtlPrefetchMemoryNonTemporal)
  406. .prologue
  407. lfetch.nta [a0], 32 // get first line coming
  408. .save ar.lc, t0
  409. mov.i t0 = ar.lc // save loop counter
  410. shr a1 = a1, 5 // determine loop count
  411. ;;
  412. .body
  413. add t2 = -1, a1 // subtract out already fetched line
  414. cmp.lt pt0, pt1 = 2, a1 // check if less than one line to fetch
  415. ;;
  416. (pt0) mov ar.lc = t2 // set loop count
  417. (pt1) br.ret.spnt.few brp // return if no more lines to fetch
  418. ;;
  419. Rpmnt10:
  420. lfetch.nta [a0], 32 // fetch next line
  421. br.cloop.dptk.many Rpmnt10 // loop while more lines to fetch
  422. ;;
  423. mov ar.lc = t0 // restore loop counter
  424. br.ret.sptk.many brp // return
  425. LEAF_EXIT(RtlPrefetchMemoryNonTemporal)