Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

163 lines
7.0 KiB

  1. /* *************************************************************************
  2. ** INTEL Corporation Proprietary Information
  3. **
  4. ** This listing is supplied under the terms of a license
  5. ** agreement with INTEL Corporation and may not be copied
  6. ** nor disclosed except in accordance with the terms of
  7. ** that agreement.
  8. **
  9. ** Copyright (c) 1995 Intel Corporation.
  10. ** All Rights Reserved.
  11. **
  12. ** *************************************************************************
  13. */
  14. // $Author: KLILLEVO $
  15. // $Date: 30 Aug 1996 08:39:58 $
  16. // $Archive: S:\h26x\src\dec\dxblkadd.cpv $
  17. // $Header: S:\h26x\src\dec\dxblkadd.cpv 1.7 30 Aug 1996 08:39:58 KLILLEVO $
  18. // $Log: S:\h26x\src\dec\dxblkadd.cpv $
  19. //
  20. // Rev 1.7 30 Aug 1996 08:39:58 KLILLEVO
  21. // added C version of block edge filter, and changed the bias in
  22. // ClampTbl[] from 128 to CLAMP_BIAS (defined to 128)
  23. // The C version of the block edge filter takes up way too much CPU time
  24. // relative to the rest of the decode time (4 ms for QCIF and 16 ms
  25. // for CIF on a P120, so this needs to coded in assembly)
  26. //
  27. // Rev 1.6 17 Jul 1996 15:33:56 AGUPTA2
  28. // Increased the size of clamping table ClampTbl to 128+256+128.
  29. //
  30. // Rev 1.5 08 Mar 1996 16:46:32 AGUPTA2
  31. // Moved the ClampTbl to be common between this module and IDCT. Reduced
  32. // the size of the ClampTbl from 256+256+256 to 64+256+64. IDCT INTER coeffs
  33. // are biased by 1024 and is taken care of when accessing ClampTbl. Added
  34. // pragma code_seg to place the rtn in Pass 2 code segment.
  35. //
  36. //
  37. // Rev 1.4 22 Dec 1995 13:52:16 KMILLS
  38. //
  39. // added new copyright notice
  40. //
  41. // Rev 1.3 25 Sep 1995 09:03:36 CZHU
  42. // Added comments on cycle counts
  43. //
  44. // Rev 1.2 13 Sep 1995 08:46:44 AKASAI
  45. // Set loopcounter back to 8. Intermediate is 8x8 of DWORDS so TEMPPITCH4
  46. // should be 32 not 64.
  47. //
  48. // Rev 1.1 12 Sep 1995 18:19:20 CZHU
  49. //
  50. // Changed loop from 8 to 7 to start with.
  51. //
  52. // Rev 1.0 11 Sep 1995 16:52:20 CZHU
  53. // Initial revision.
  54. // -------------------------------------------------------------------------
  55. // T is routine performs a block(8 8) addition.
  56. // output = clamp[reference + current]
  57. //
  58. // Input I32 *current (output of FMIDCT)
  59. // U8 *reference (Motion Compensated address of reference)
  60. // U8 *output (Output buffer)
  61. //
  62. // Assumption: reference and output use PITCH
  63. // current as some other pitch
  64. // -------------------------------------------------------------------------
  65. #include "precomp.h"
  66. #define TEMPPITCH4 32
  67. #pragma data_seg("IADATA2")
  68. #define FRAMEPOINTER esp
  69. #define L_LOOPCOUNTER FRAMEPOINTER + 0 // 4 byte
  70. #define LOCALSIZE 4 // keep aligned
  71. #pragma code_seg("IACODE2")
  72. __declspec(naked)
  73. void BlockAdd (U32 uResidual, U32 uRefBlock,U32 uDstBlock)
  74. {
  75. __asm {
  76. push ebp // save callers frame pointer
  77. mov ebp,esp // make parameters accessible
  78. push esi // assumed preserved
  79. push edi
  80. push ebx
  81. sub esp,LOCALSIZE // reserve local storage
  82. mov edi, uDstBlock ;// edi gets Base addr of OutputBuffer
  83. mov ecx, 8
  84. mov esi, uRefBlock; ;// esi gets Base addr of Current
  85. mov ebp, uResidual ;// ebp gets Base addr of Reference
  86. mov ebx, [edi] ;// pre-fetch output
  87. xor eax, eax
  88. // Cylces counts: 26 x 8=208 without cache miss
  89. // czhu, 9/25/95
  90. loop_for_i:
  91. mov [L_LOOPCOUNTER], ecx ; save loop counter in temporary
  92. mov ebx, [ebp+8] ; 1) fetch current[i+2]
  93. mov al, BYTE PTR[esi+2] ; 1) fetch ref[i+2]
  94. xor ecx, ecx ; 2)
  95. mov cl, BYTE PTR[esi+3] ; 2) fetch ref[i+3]
  96. mov edx, [ebp+12] ; 2) fetch current[i+3]
  97. add eax, ebx ; 1) result2 = ref[i+2] + current[i+2]
  98. xor ebx, ebx ; 3)
  99. add ecx, edx ; 2) result3= ref[i+3] + current[i+3]
  100. mov bl, BYTE PTR[esi+0] ; 3) fetch ref[i]
  101. mov dl, ClampTbl[CLAMP_BIAS+eax-1024] ; 1) fetch clamp[result2]
  102. mov eax, [ebp+0] ; 3) fetch current[i]
  103. add ebx, eax ; 3) result0 = ref[i] + current[i]
  104. xor eax, eax ; 4)
  105. mov dh, ClampTbl[CLAMP_BIAS+ecx-1024] ; 2) fetch clamp[result3]
  106. mov al, [esi+1] ; 4) fetch ref[i+1]
  107. shl edx, 16 ; move 1st 2 results to high word
  108. mov ecx, [ebp+4] ; 4) fetch current[i+1]
  109. mov dl, ClampTbl[CLAMP_BIAS+ebx-1024] ; 3) fetch clamp[result0]
  110. add eax, ecx ; 4) result1 = ref[i+1] + current[i+1]
  111. xor ecx, ecx ; 4+1)
  112. mov ebx, [ebp+24] ; 4+1) fetch current[i+6]
  113. mov dh, ClampTbl[CLAMP_BIAS+eax-1024] ; 4) fetch clamp[result1]
  114. mov cl, BYTE PTR[esi+6] ; 4+1) fetch ref[i+6]
  115. mov [edi], edx ; store 4 output pixels
  116. xor eax, eax ; 4+2)
  117. mov al, BYTE PTR[esi+7] ; 4+2) fetch ref[i+7]
  118. mov edx, [ebp+28] ; 4+2) fetch current[i+7]
  119. add ecx, ebx ; 4+1) result6 = ref[i+6] + current[i+6]
  120. xor ebx, ebx ; 4+3)
  121. add eax, edx ; 4+2) result7= ref[i+7] + current[i+7]
  122. mov bl, BYTE PTR[esi+4] ; 4+3) fetch ref[i+4]
  123. mov dl, ClampTbl[CLAMP_BIAS+ecx-1024] ; 4+1) fetch clamp[result6]
  124. mov ecx, [ebp+16] ; 4+3) fetch current[i+4]
  125. add ebx, ecx ; 4+3) result4 = ref[i+4] + current[i+4]
  126. xor ecx, ecx ; 4+4)
  127. mov dh, ClampTbl[CLAMP_BIAS+eax-1024] ; 4+2) fetch clamp[result7]
  128. mov cl, [esi+5] ; 4+4) fetch ref[i+5]
  129. shl edx, 16 ; move 3rd 2 results to high word
  130. mov eax, [ebp+20] ; 4+4) fetch current[i+5]
  131. add ecx, eax ; 4+4) result5 = ref[i+5] + current[i+5]
  132. add esi, PITCH ; Update address of next line
  133. mov dl, ClampTbl[CLAMP_BIAS+ebx-1024] ; 4+3) fetch clamp[result4]
  134. add ebp, TEMPPITCH4 ; Update address of current to next line
  135. mov dh, ClampTbl[CLAMP_BIAS+ecx-1024] ; 4+4) fetch clamp[result5]
  136. mov ecx, [L_LOOPCOUNTER] ; get loop counter
  137. mov [edi+4], edx ; store 4 output pixels
  138. add edi, PITCH ; Update address of output to next line
  139. xor eax, eax ; 1)
  140. dec ecx
  141. mov ebx, [edi] ; pre-fetch output
  142. jnz loop_for_i
  143. add esp,LOCALSIZE // free locals
  144. pop ebx
  145. pop edi
  146. pop esi
  147. pop ebp
  148. ret
  149. } //end of asm
  150. }
  151. #pragma code_seg()