Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

216 lines
4.4 KiB

  1. // memset.s: function to set a number of bytes to a char value - McKinley version
  2. // Copyright (C) 2002 Intel Corporation.
  3. //
  4. // The information and source code contained herein is the exclusive property
  5. // of Intel Corporation and may not be disclosed, examined, or
  6. // reproduced in whole or in part without explicit written authorization from
  7. // the Company.
  8. // Author: Vadim Paretsky
  9. // Date: February, 2002
  10. //
  11. .section .text
  12. .proc memset#
  13. .global memset#
  14. .align 64
  15. memset:
  16. { .mii
  17. and r21 = 7, r32
  18. mux1 r25 = r33, @brcst
  19. add r16 = r32, r34
  20. } { .mmb
  21. cmp.ge p9 = 0, r34
  22. mov r8 = r32
  23. (p9) br.ret.spnt b0
  24. ;;
  25. }
  26. // align on an 8-byte boundary
  27. { .mmi
  28. mov r27 = 0x88
  29. mov r28 = 0x108
  30. mov r29 = 0x188
  31. } { .mmb
  32. nop.m 0
  33. cmp.ne p15 = 0, r21 //Low 3 bits zero?
  34. (p15) br.cond.dpnt Align_Loop
  35. ;;
  36. }
  37. Is_Aligned:
  38. { .mmi
  39. add r14 = 0x80, r32
  40. mov r30 = 0x208
  41. add r31 = 8, r32
  42. } { .mmb
  43. cmp.ge p7 = r34, r27
  44. cmp.gt p10 = 0x30, r34
  45. (p10) br.cond.dpnt Aligned_Short
  46. ;;
  47. }
  48. // >= 80 bytes goes through a loop
  49. Aligned_Long:
  50. { .mmi
  51. st8 [r32] = r25
  52. (p7) st8 [r14] = r25,0x80
  53. mov r20 = r34
  54. } { .mmi
  55. add r26 = 0x180, r32
  56. cmp.ge p8 = r34, r28
  57. cmp.ge p9 = r34, r29
  58. ;;
  59. } { .mmi
  60. (p8) st8 [r14] = r25
  61. (p9) st8 [r26] = r25, 0x80
  62. cmp.ge p10 = r34, r30
  63. ;;
  64. }
  65. .align 64
  66. Long_loop:
  67. { .mmi
  68. (p10) st8 [r26] = r25, 0x80
  69. st8 [r31] = r25, 0x10
  70. cmp.le p15,p12 = 0x20, r20
  71. } { .mmb
  72. add r32 = 0x10, r32
  73. add r34 = -0x10, r34
  74. (p12) br.cond.dpnt Aligned_Short
  75. ;;
  76. } { .mmi
  77. (p15) st8 [r32] = r25, 0x10
  78. (p15) st8 [r31] = r25, 0x10
  79. cmp.le p14,p12 = 0x30, r20
  80. } { .mmb
  81. nop.m 0
  82. add r34 = -0x10, r34
  83. (p12) br.cond.dpnt Aligned_Short
  84. ;;
  85. } { .mmi
  86. (p14) st8 [r32] = r25, 0x10
  87. (p14) st8 [r31] = r25, 0x10
  88. cmp.le p15,p12 = 0x40, r20
  89. } { .mmb
  90. nop.m 0
  91. add r34 = -0x10, r34
  92. (p12) br.cond.dpnt Aligned_Short
  93. ;;
  94. } { .mmi
  95. (p15) st8 [r32] = r25, 0x10
  96. (p15) st8 [r31] = r25, 0x10
  97. cmp.le p14,p12 = 0x50, r20
  98. } { .mmb
  99. nop.m 0
  100. add r34 = -0x10, r34
  101. (p12) br.cond.dpnt Aligned_Short
  102. ;;
  103. } { .mmi
  104. (p14) st8 [r32] = r25, 0x10
  105. (p14) st8 [r31] = r25, 0x10
  106. cmp.le p15,p12 = 0x60, r20
  107. } { .mmb
  108. nop.m 0
  109. add r34 = -0x10, r34
  110. (p12) br.cond.dpnt Aligned_Short
  111. ;;
  112. } { .mmi
  113. (p15) st8 [r32] = r25, 0x10
  114. (p15) st8 [r31] = r25, 0x10
  115. cmp.le p14,p12 = 0x70, r20
  116. } { .mmb
  117. add r21 = -0x80, r20
  118. add r34 = -0x10, r34
  119. (p12) br.cond.dpnt Aligned_Short
  120. ;;
  121. } { .mmi
  122. (p14) st8 [r32] = r25, 0x10
  123. (p14) st8 [r31] = r25, 0x10
  124. cmp.le p15,p12 = 0x80, r20
  125. } { .mmb
  126. cmp.ge p10 = r21, r30
  127. add r34 = -0x10, r34
  128. (p12) br.cond.dpnt Aligned_Short
  129. ;;
  130. } { .mmi
  131. (p15) st8 [r32] = r25, 0x10
  132. (p15) st8 [r31] = r25, 0x10
  133. add r34 = -0x10, r34
  134. } { .mmb
  135. mov r20 = r21
  136. cmp.le p13 = 0x30, r21
  137. (p13) br.sptk.many Long_loop
  138. ;;
  139. }
  140. //
  141. // Do partial word stores
  142. //
  143. Aligned_Short:
  144. { .mmi
  145. and r27 = 2, r34
  146. nop.m 0
  147. tbit.nz p6 = r34, 0 //bit 0 on?
  148. } { .mmb
  149. cmp.le p11 = 0x10, r34
  150. cmp.eq p10 = 0, r34
  151. (p10) br.ret.dpnt b0
  152. ;;
  153. } { .mmi
  154. (p11) st8 [r32] = r25, 0x10
  155. (p11) st8 [r31] = r25, 0x10
  156. cmp.le p12 = 0x20, r34
  157. } { .mmi
  158. add r17 = -2, r16
  159. add r18 = -4, r16
  160. tbit.nz p9 = r34, 3 //odd number of st8s?
  161. ;;
  162. } { .mmi
  163. (p12) st8 [r32] = r25, 0x10
  164. (p12) st8 [r31] = r25, 0x10
  165. nop.i 0
  166. } { .mmi
  167. (p6) add r18 = -1, r18
  168. (p6) add r16 = -1, r16
  169. cmp.ne p7 = 0, r27
  170. ;;
  171. } { .mmi
  172. (p9) st8 [r32] = r25
  173. (p6) st1 [r16] = r25
  174. tbit.nz p8 = r34, 2 //bit 2 on?
  175. } { .mmi
  176. (p7) add r18 = -2, r18
  177. (p6) add r17 = -1, r17
  178. nop.i 0
  179. ;;
  180. } { .mmb
  181. (p8) st4 [r18] = r25
  182. (p7) st2 [r17] = r25
  183. br.ret.sptk.many b0
  184. ;;
  185. }
  186. .align 64
  187. // Align the input pointer to an 8-byte boundary
  188. Align_Loop:
  189. { .mmi
  190. st1 [r32] = r33,1
  191. add r21 = 1, r21
  192. add r34 = -1, r34
  193. ;;
  194. } { .mmi
  195. cmp.gt p10 = 8, r21
  196. cmp.eq p15 = 0, r34
  197. nop.i 0
  198. } { .bbb
  199. (p15) br.ret.dpnt b0
  200. (p10) br.cond.sptk Align_Loop
  201. br.cond.sptk Is_Aligned
  202. ;;
  203. }
  204. .endp memset#
  205. // End