Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

144 lines
6.1 KiB

  1. /* *************************************************************************
  2. ** INTEL Corporation Proprietary Information
  3. **
  4. ** This listing is supplied under the terms of a license
  5. ** agreement with INTEL Corporation and may not be copied
  6. ** nor disclosed except in accordance with the terms of
  7. ** that agreement.
  8. **
  9. ** Copyright (c) 1995, 1996 Intel Corporation.
  10. ** All Rights Reserved.
  11. **
  12. ** *************************************************************************
  13. */
  14. //////////////////////////////////////////////////////////////////////////
  15. // $Author: AKASAI $
  16. // $Date: 18 Mar 1996 10:47:48 $
  17. // $Archive: S:\h26x\src\dec\d1addsp.cpv $
  18. // $Header: S:\h26x\src\dec\d1addsp.cpv 1.1 18 Mar 1996 10:47:48 AKASAI $
  19. // $Log: S:\h26x\src\dec\d1addsp.cpv $
  20. //
  21. // Rev 1.1 18 Mar 1996 10:47:48 AKASAI
  22. // Deleted ClampTblSpecial so now uses common table ClipPixIntra.
  23. // Added pragma code_seg("IACODE2").
  24. //
  25. // Rev 1.0 01 Nov 1995 13:37:58 AKASAI
  26. // Initial revision.
  27. //
  28. // -------------------------------------------------------------------------
  29. // ROUTINE NAME: BlockAddSpecial
  30. // FILE NAME: d1addsp.cpp
  31. //
  32. // This routine performs a block(8 8) addition.
  33. // output = clamp[reference + current]
  34. //
  35. // Input I32 *current (output of FMIDCT)
  36. // U8 *reference (Motion Compensated address of reference)
  37. // U8 *output (Output buffer)
  38. //
  39. // Assumption: reference uses 8 as pitch, output use PITCH,
  40. // current has some other pitch, TEMPPITCH4
  41. //
  42. // Registers used: eax, ebx, ecx, edx, esi, edi, ebp
  43. //
  44. // -------------------------------------------------------------------------
  45. #include "precomp.h"
  46. #define TEMPPITCH4 32
  47. extern U8 ClipPixIntra[];
  48. #define FRAMEPOINTER esp
  49. #define L_LOOPCOUNTER FRAMEPOINTER + 0 // 4 byte
  50. #define LOCALSIZE 4 // keep aligned
  51. #pragma code_seg("IACODE2")
  52. __declspec(naked)
  53. void BlockAddSpecial (U32 uResidual, U32 uRefBlock,U32 uDstBlock)
  54. {
  55. __asm {
  56. push ebp ;// save callers frame pointer
  57. mov ebp,esp ;// make parameters accessible
  58. push esi ;// assumed preserved
  59. push edi
  60. push ebx
  61. sub esp,LOCALSIZE ;// reserve local storage
  62. mov esi, uRefBlock; ;// esi gets Base addr of Current
  63. mov edi, uDstBlock ;// edi gets Base addr of OutputBuffer
  64. mov ebp, uResidual ;// ebp gets Base addr of Reference
  65. mov ecx, 8
  66. xor eax, eax
  67. // Cylces counts: 26 x 8=208 without cache miss
  68. // czhu, 9/25/95
  69. ALIGN 4
  70. loop_for_i:
  71. mov [L_LOOPCOUNTER], ecx ; save loop counter in temporary
  72. mov ebx, [ebp+8] ; 1) fetch current[i+2]
  73. mov al, BYTE PTR[esi+2] ; 1) fetch ref[i+2]
  74. xor ecx, ecx ; 2)
  75. mov cl, BYTE PTR[esi+3] ; 2) fetch ref[i+3]
  76. mov edx, [ebp+12] ; 2) fetch current[i+3]
  77. add eax, ebx ; 1) result2 = ref[i+2] + current[i+2]
  78. xor ebx, ebx ; 3)
  79. add ecx, edx ; 2) result3= ref[i+3] + current[i+3]
  80. mov bl, BYTE PTR[esi] ; 3) fetch ref[i]
  81. mov dl, ClipPixIntra[1024+eax] ; 1) fetch clamp[result2]
  82. mov eax, [ebp] ; 3) fetch current[i]
  83. add ebx, eax ; 3) result0 = ref[i] + current[i]
  84. xor eax, eax ; 4)
  85. mov dh, ClipPixIntra[1024+ecx] ; 2) fetch clamp[result3]
  86. mov al, [esi+1] ; 4) fetch ref[i+1]
  87. shl edx, 16 ; move 1st 2 results to high word
  88. mov ecx, [ebp+4] ; 4) fetch current[i+1]
  89. mov dl, ClipPixIntra[1024+ebx] ; 3) fetch clamp[result0]
  90. add eax, ecx ; 4) result1 = ref[i+1] + current[i+1]
  91. xor ecx, ecx ; 4+1)
  92. mov ebx, [ebp+24] ; 4+1) fetch current[i+6]
  93. mov dh, ClipPixIntra[1024+eax] ; 4) fetch clamp[result1]
  94. mov cl, BYTE PTR[esi+6] ; 4+1) fetch ref[i+6]
  95. mov [edi], edx ; store 4 output pixels
  96. xor eax, eax ; 4+2)
  97. mov al, BYTE PTR[esi+7] ; 4+2) fetch ref[i+7]
  98. mov edx, [ebp+28] ; 4+2) fetch current[i+7]
  99. add ecx, ebx ; 4+1) result6 = ref[i+6] + current[i+6]
  100. xor ebx, ebx ; 4+3)
  101. add eax, edx ; 4+2) result7= ref[i+7] + current[i+7]
  102. mov bl, BYTE PTR[esi+4] ; 4+3) fetch ref[i+4]
  103. mov dl, ClipPixIntra[1024+ecx] ; 4+1) fetch clamp[result6]
  104. mov ecx, [ebp+16] ; 4+3) fetch current[i+4]
  105. add ebx, ecx ; 4+3) result4 = ref[i+4] + current[i+4]
  106. xor ecx, ecx ; 4+4)
  107. mov dh, ClipPixIntra[1024+eax] ; 4+2) fetch clamp[result7]
  108. mov cl, [esi+5] ; 4+4) fetch ref[i+5]
  109. shl edx, 16 ; move 3rd 2 results to high word
  110. mov eax, [ebp+20] ; 4+4) fetch current[i+5]
  111. add ecx, eax ; 4+4) result5 = ref[i+5] + current[i+5]
  112. add esi, 8 ; Update address of next line
  113. mov dl, ClipPixIntra[1024+ebx] ; 4+3) fetch clamp[result4]
  114. add ebp, TEMPPITCH4 ; Update address of current to next line
  115. mov dh, ClipPixIntra[1024+ecx] ; 4+4) fetch clamp[result5]
  116. mov ecx, [L_LOOPCOUNTER] ; get loop counter
  117. mov [edi+4], edx ; store 4 output pixels
  118. add edi, PITCH ; Update address of output to next line
  119. xor eax, eax ; 1)
  120. dec ecx
  121. jnz loop_for_i
  122. add esp,LOCALSIZE // free locals
  123. pop ebx
  124. pop edi
  125. pop esi
  126. pop ebp
  127. ret
  128. } //end of asm, BlockAddSpecial
  129. } // End of BlockAddSpecial
  130. #pragma code_seg()