Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

208 lines
6.6 KiB

  1. /* *************************************************************************
  2. ** INTEL Corporation Proprietary Information
  3. **
  4. ** This listing is supplied under the terms of a license
  5. ** agreement with INTEL Corporation and may not be copied
  6. ** nor disclosed except in accordance with the terms of
  7. ** that agreement.
  8. **
  9. ** Copyright (c) 1995, 1996 Intel Corporation.
  10. ** All Rights Reserved.
  11. **
  12. ** *************************************************************************
  13. */
  14. // $Author: AKASAI $
  15. // $Date: 15 Mar 1996 08:48:06 $
  16. // $Archive: S:\h26x\src\dec\d1blkcpy.cpv $
  17. // $Header: S:\h26x\src\dec\d1blkcpy.cpv 1.0 15 Mar 1996 08:48:06 AKASAI $
  18. // $Log: S:\h26x\src\dec\d1blkcpy.cpv $
  19. //
  20. // Rev 1.0 15 Mar 1996 08:48:06 AKASAI
  21. // Initial revision.
  22. //
  23. // Rev 1.3 31 Jan 1996 13:15:14 RMCKENZX
  24. // Rewrote file to avoid bank conflicts. Fully unrolled the loop.
  25. // Module now really will execute in 52 cycles if the cache is hot.
  26. //
  27. // Rev 1.2 22 Dec 1995 13:51:06 KMILLS
  28. // added new copyright notice
  29. //
  30. // Rev 1.1 25 Sep 1995 09:03:22 CZHU
  31. // Added comments on cycle counts
  32. //
  33. // Rev 1.0 11 Sep 1995 16:52:26 CZHU
  34. // Initial revision.
  35. //
  36. //------------------------------------------------------------------------------
  37. //------------------------------------------------------------------------------
  38. //
  39. // BlockCopy reads reference in BYTES and writes DWORDS. Read of BYTES
  40. // is to avoid data alignment problems from motion compensated previous.
  41. //
  42. // Input U8 *reference (Motion Compensated address of reference)
  43. // Output U8 *output (Output buffer)
  44. //
  45. // Registers used:
  46. // eax source address
  47. // ebx temp
  48. // ecx, edx accumulators
  49. // edi destination address
  50. // esi PITCH
  51. //
  52. // Assumption: reference and output use PITCH
  53. //
  54. // Cycle count:
  55. //
  56. //------------------------------------------------------------------------------
  57. #include "precomp.h"
  58. #pragma code_seg("IACODE2")
  59. __declspec(naked)
  60. void BlockCopy (U32 uDstBlock, U32 uSrcBlock)
  61. {
  62. __asm {
  63. mov eax, [esp+8] // eax gets Base addr of uSrcBlock
  64. push edi
  65. push esi // avoid Address Generation Interlocks
  66. push ebx
  67. mov cl, 2[eax] // ref[0][2]
  68. mov edi, [esp+16] // edi gets Base addr of uDstBlock
  69. mov ch, 3[eax] // ref[0][3]
  70. mov dh, 7[eax] // ref[0][7]
  71. shl ecx, 16
  72. mov dl, 6[eax] // ref[0][6]
  73. shl edx, 16
  74. mov ebx, [edi] // heat output cache
  75. mov esi, PITCH
  76. mov cl, 0[eax] // ref[0][0]
  77. mov dh, 5[eax] // ref[0][5]
  78. mov ch, 1[eax] // ref[0][1]
  79. mov dl, 4[eax] // ref[0][4]
  80. add eax, esi
  81. mov 0[edi], ecx // row 0, bytes 0-3
  82. mov 4[edi], edx // row 0, bytes 4-7
  83. mov cl, 2[eax] // ref[1][2]
  84. mov dh, 7[eax] // ref[1][7]
  85. mov ch, 3[eax] // ref[1][3]
  86. add edi, esi
  87. shl ecx, 16
  88. mov dl, 6[eax] // ref[1][6]
  89. shl edx, 16
  90. mov ebx, [edi] // heat output cache
  91. mov cl, 0[eax] // ref[1][0]
  92. mov dh, 5[eax] // ref[1][5]
  93. mov ch, 1[eax] // ref[1][1]
  94. mov dl, 4[eax] // ref[1][4]
  95. add eax, esi
  96. mov 0[edi], ecx // row 1, bytes 0-3
  97. mov cl, 2[eax] // ref[2][2]
  98. mov 4[edi], edx // row 1, bytes 4-7
  99. mov ch, 3[eax] // ref[2][3]
  100. add edi, esi
  101. shl ecx, 16
  102. mov dh, 7[eax] // ref[2][7]
  103. mov dl, 6[eax] // ref[2][6]
  104. mov ebx, [edi] // heat output cache
  105. shl edx, 16
  106. mov cl, 0[eax] // ref[2][0]
  107. mov dh, 5[eax] // ref[2][5]
  108. mov ch, 1[eax] // ref[2][1]
  109. mov dl, 4[eax] // ref[2][4]
  110. add eax, esi
  111. mov 0[edi], ecx // row 2, bytes 0-3
  112. mov 4[edi], edx // row 2, bytes 4-7
  113. mov cl, 2[eax] // ref[3][2]
  114. mov dh, 7[eax] // ref[3][7]
  115. mov ch, 3[eax] // ref[3][3]
  116. add edi, esi
  117. shl ecx, 16
  118. mov dl, 6[eax] // ref[3][6]
  119. shl edx, 16
  120. mov ebx, [edi] // heat output cache
  121. mov cl, 0[eax] // ref[3][0]
  122. mov dh, 5[eax] // ref[3][5]
  123. mov ch, 1[eax] // ref[3][1]
  124. mov dl, 4[eax] // ref[3][4]
  125. add eax, esi
  126. mov 0[edi], ecx // row 3, bytes 0-3
  127. mov cl, 2[eax] // ref[4][2]
  128. mov 4[edi],edx // row 3, bytes 4-7
  129. mov ch, 3[eax] // ref[4][3]
  130. add edi, esi
  131. shl ecx, 16
  132. mov dh, 7[eax] // ref[4][7]
  133. mov dl, 6[eax] // ref[4][6]
  134. mov ebx, [edi] // heat output cache
  135. shl edx, 16
  136. mov cl, 0[eax] // ref[4][0]
  137. mov dh, 5[eax] // ref[4][5]
  138. mov ch, 1[eax] // ref[4][1]
  139. mov dl, 4[eax] // ref[4][4]
  140. add eax, esi
  141. mov 0[edi], ecx // row 4, bytes 0-3
  142. mov 4[edi], edx // row 4, bytes 4-7
  143. mov cl, 2[eax] // ref[5][2]
  144. mov dh, 7[eax] // ref[5][7]
  145. mov ch, 3[eax] // ref[5][3]
  146. add edi, esi
  147. shl ecx, 16
  148. mov dl, 6[eax] // ref[5][6]
  149. shl edx, 16
  150. mov ebx, [edi] // heat output cache
  151. mov cl, 0[eax] // ref[5][0]
  152. mov dh, 5[eax] // ref[5][5]
  153. mov ch, 1[eax] // ref[5][1]
  154. mov dl, 4[eax] // ref[5][4]
  155. add eax, esi
  156. mov 0[edi], ecx // row 5, bytes 0-3
  157. mov cl, 2[eax] // ref[6][2]
  158. mov 4[edi], edx // row 5, bytes 4-7
  159. mov ch, 3[eax] // ref[6][3]
  160. add edi, esi
  161. shl ecx, 16
  162. mov dh, 7[eax] // ref[6][7]
  163. mov dl, 6[eax] // ref[6][6]
  164. mov ebx, [edi] // heat output cache
  165. shl edx, 16
  166. mov cl, 0[eax] // ref[6][0]
  167. mov dh, 5[eax] // ref[6][5]
  168. mov ch, 1[eax] // ref[6][1]
  169. mov dl, 4[eax] // ref[6][4]
  170. add eax, esi
  171. mov 0[edi], ecx // row 6, bytes 0-3
  172. mov 4[edi], edx // row 6, bytes 4-7
  173. mov cl, 2[eax] // ref[7][2]
  174. mov dh, 7[eax] // ref[7][7]
  175. mov ch, 3[eax] // ref[7][3]
  176. add edi, esi
  177. shl ecx, 16
  178. mov dl, 6[eax] // ref[7][6]
  179. shl edx, 16
  180. mov ebx, [edi] // heat output cache
  181. mov cl, 0[eax] // ref[7][0]
  182. mov dh, 5[eax] // ref[7][5]
  183. mov ch, 1[eax] // ref[7][1]
  184. mov dl, 4[eax] // ref[7][4]
  185. mov 0[edi], ecx // row 7, bytes 0-3
  186. mov 4[edi], edx // row 7, bytes 4-7
  187. pop ebx
  188. pop esi
  189. pop edi
  190. ret
  191. } // end of asm BlockCopy
  192. }
  193. #pragma code_seg()