Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

151 lines
4.3 KiB

  1. /* *************************************************************************
  2. ** INTEL Corporation Proprietary Information
  3. **
  4. ** This listing is supplied under the terms of a license
  5. ** agreement with INTEL Corporation and may not be copied
  6. ** nor disclosed except in accordance with the terms of
  7. ** that agreement.
  8. **
  9. ** Copyright (c) 1995, 1996 Intel Corporation.
  10. ** All Rights Reserved.
  11. **
  12. ** *************************************************************************
  13. */
  14. // $Author: AGUPTA2 $
  15. // $Date: 08 Mar 1996 16:46:34 $
  16. // $Archive: S:\h26x\src\dec\dxblkcpy.cpv $
  17. // $Header: S:\h26x\src\dec\dxblkcpy.cpv 1.4 08 Mar 1996 16:46:34 AGUPTA2 $
  18. // $Log: S:\h26x\src\dec\dxblkcpy.cpv $
  19. //
  20. // Rev 1.4 08 Mar 1996 16:46:34 AGUPTA2
  21. // Rewritten to reduce code size by avoiding 32-bit displacements. Added
  22. // pragma code_seg. May need to optimize for misaligned case.
  23. //
  24. //
  25. // Rev 1.3 31 Jan 1996 13:15:14 RMCKENZX
  26. // Rewrote file to avoid bank conflicts. Fully unrolled the loop.
  27. // Module now really will execute in 52 cycles if the cache is hot.
  28. //
  29. // Rev 1.2 22 Dec 1995 13:51:06 KMILLS
  30. // added new copyright notice
  31. //
  32. // Rev 1.1 25 Sep 1995 09:03:22 CZHU
  33. // Added comments on cycle counts
  34. //
  35. // Rev 1.0 11 Sep 1995 16:52:26 CZHU
  36. // Initial revision.
  37. //
  38. //------------------------------------------------------------------------------
  39. //------------------------------------------------------------------------------
  40. //
  41. // Note:
  42. // - BlockCopy reads and writes in DWORDS.
  43. // - The __fastcall convention is used.
  44. // - Code re-written to minimize code size.
  45. // - We assume the output frame to NOT be in cache.
  46. // - The constants PITCH and U32 are defined internally (no include files used).
  47. //
  48. // Registers used:
  49. // eax accumulator
  50. // ebx accumulator
  51. // ecx destination address
  52. // edx source address
  53. // ebp PITCH
  54. //
  55. // Pentium cycle count (input cache hot, output cache cold):
  56. // 33 + 8*(cache miss time) input aligned
  57. // 81 + 8*(cache miss time) input mis-aligned
  58. //
  59. //------------------------------------------------------------------------------
  60. #include "precomp.h"
  61. #define U32 unsigned long
  62. // Already defined in precomp.h
  63. #define DXPITCH 384
  64. #pragma code_seg("IACODE2")
  65. /*
  66. * Notes:
  67. * The parameter uDstBlock is in ecx and uSrcBlock is in edx.
  68. */
  69. __declspec(naked)
  70. void __fastcall BlockCopy (U32 uDstBlock, U32 uSrcBlock)
  71. {
  72. __asm {
  73. push edi
  74. push ebx
  75. push ebp
  76. mov ebp, DXPITCH
  77. // row 0
  78. mov eax, [edx]
  79. mov ebx, [edx+4]
  80. add edx, ebp
  81. mov edi, [ecx] // heat output cache
  82. mov [ecx], eax
  83. mov [ecx+4], ebx
  84. // row 1
  85. add ecx, ebp
  86. mov eax, [edx]
  87. mov ebx, [edx+4]
  88. add edx, ebp
  89. mov edi, [ecx] // heat output cache
  90. mov [ecx], eax
  91. mov [ecx+4], ebx
  92. add ecx, ebp
  93. // row 2
  94. mov eax, [edx]
  95. mov ebx, [edx+4]
  96. add edx, ebp
  97. mov edi, [ecx] // heat output cache
  98. mov [ecx], eax
  99. mov [ecx+4], ebx
  100. // row 3
  101. add ecx, ebp
  102. mov eax, [edx]
  103. mov ebx, [edx+4]
  104. add edx, ebp
  105. mov edi, [ecx] // heat output cache
  106. mov [ecx], eax
  107. mov [ecx+4], ebx
  108. add ecx, ebp
  109. // row 4
  110. mov eax, [edx]
  111. mov ebx, [edx+4]
  112. add edx, ebp
  113. mov edi, [ecx] // heat output cache
  114. mov [ecx], eax
  115. mov [ecx+4], ebx
  116. // row 5
  117. add ecx, ebp
  118. mov eax, [edx]
  119. mov ebx, [edx+4]
  120. add edx, ebp
  121. mov edi, [ecx] // heat output cache
  122. mov [ecx], eax
  123. mov [ecx+4], ebx
  124. add ecx, ebp
  125. // row 6
  126. mov eax, [edx]
  127. mov ebx, [edx+4]
  128. add edx, ebp
  129. mov edi, [ecx] // heat output cache
  130. mov [ecx], eax
  131. mov [ecx+4], ebx
  132. // row 7
  133. add ecx, ebp
  134. pop ebp
  135. mov eax, [edx]
  136. mov ebx, [edx+4]
  137. mov edi, [ecx] // heat output cache
  138. mov [ecx], eax
  139. mov [ecx+4], ebx
  140. pop ebx
  141. pop edi
  142. ret
  143. } // end of asm
  144. }
  145. #pragma code_seg()