Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

188 lines
5.1 KiB

  1. ;*************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;*************************************************************************
  13. .486
  14. .Model FLAT, C
  15. APP_32BIT equ 1
  16. .CODE
  17. IFDEF SLF_WORK_AROUND
  18. EncUVLoopFilter PROC C PUBLIC USES esi edi ebx ebp in8x8:DWORD, out8x8:DWORD, pitch:DWORD
  19. LOCAL filt_temp[32]:DWORD, loop_count:DWORD
  20. ; **************************************************
  21. ; output pitch is hard coded to 384
  22. ; input pitch is 384 (as passed parameter)
  23. ; **************************************************
  24. mov esi,in8x8
  25. ; mov edi,out8x8 ; for debug
  26. lea edi,filt_temp ; use temporary storage
  27. mov loop_count,8
  28. xor eax,eax
  29. ; filter 8x8 block horizontally
  30. ; input is 8-bit, output is 16-bit temporary storage
  31. do_row:
  32. ; pixel 0
  33. mov al,byte ptr [esi] ; get p0, eax = a
  34. xor ebx,ebx
  35. mov edx,eax ; copy pixel 0
  36. xor ecx,ecx
  37. shl edx,2 ; a<<2
  38. ; pixel 0 + pixel 1
  39. mov bl,byte ptr [esi+1] ; get p1, ebx = b
  40. mov [edi],dx ; output p0 = a<<2
  41. add eax,ebx ; eax = (a+b)
  42. mov cl,byte ptr [esi+2] ; get p2, ecx = c
  43. ; pixel 1 + pixel 2
  44. xor edx,edx
  45. add ebx,ecx ; ebx = (b+c)
  46. mov dl,byte ptr [esi+3] ; get p3, edx = c
  47. add eax,ebx ; eax = (a+b) + (b+c)
  48. add ecx,edx ; ecx = (b+c)
  49. mov [edi+2],ax ; output p1 = (a+b) + (b+c)
  50. add ebx,ecx ; ebx = (a+b) + (b+c)
  51. ; pixel 2 + pixel 3
  52. mov [edi+4],bx ; output p2 = (a+b) + (b+c)
  53. xor eax,eax
  54. mov al,byte ptr [esi+4] ; get p4, eax = c
  55. ; pixel 3 + pixel 4
  56. xor ebx,ebx
  57. add edx,eax ; edx = (b+c)
  58. mov bl,byte ptr [esi+5] ; get p5, ebx = c
  59. add ecx,edx ; ecx = (a+b) + (b+c)
  60. add eax,ebx ; eax = (b+c)
  61. mov [edi+6],cx ; output p3 = (a+b) + (b+c)
  62. add edx,eax ; edx = (a+b) + (b+c)
  63. ; pixel 4 + pixel 5
  64. mov [edi+8],dx ; output p4 = (a+b) + (b+c)
  65. xor ecx,ecx
  66. mov cl,byte ptr [esi+6] ; get p6, ecx = c
  67. ; pixel 5 + pixel 6
  68. xor edx,edx
  69. add ebx,ecx ; ebx = (a+b)
  70. mov dl,byte ptr [esi+7] ; get p7, edx = c
  71. add eax,ebx ; eax = (a+b) + (b+c)
  72. add ecx,edx ; ecx = (b+c)
  73. shl edx,2 ; p7<<2
  74. add ebx,ecx ; ebx = (a+b) + (b+c)
  75. mov [edi+10],ax ; output p5 = (a+b) + (b+c)
  76. ; pixel 6 + pixel 7
  77. xor eax,eax ; for next iteration
  78. mov [edi+12],bx ; output p6 = (a+b) + (b+c)
  79. mov ecx,loop_count
  80. mov [edi+14],dx ; output p7 = c<<2
  81. mov ebx,pitch
  82. add edi,16
  83. add esi,ebx ; inc input ptr
  84. dec ecx
  85. mov loop_count,ecx
  86. jnz do_row
  87. ; filter 8x8 block vertically
  88. ; input is 16-bit from temporary storage, output is 8-bit
  89. lea esi,filt_temp
  90. mov edi,out8x8
  91. mov loop_count,4 ; loop counter
  92. row0:
  93. mov eax,[esi] ; eax = a
  94. ; row0 + row1
  95. mov ebx,[esi+16] ; get b
  96. mov edx,eax ; copy a
  97. add eax,ebx ; eax = (a+b)
  98. add edx,00020002h ; round result
  99. mov ecx,[esi+32] ; get c
  100. shr edx,2 ; divide by 4
  101. add ebx,ecx ; ebx = (b+c)
  102. and edx,00ff00ffh ; convert back to 8-bit
  103. add eax,ebx ; eax = (a+b) + (b+c)
  104. mov [edi],dl ; output a for column 0
  105. add eax,00080008h ; round
  106. shr edx,16
  107. shr eax,4
  108. mov [edi+1],dl ; output a for column 1
  109. ; row1 + row2
  110. mov edx,[esi+48] ; get c
  111. and eax,00ff00ffh
  112. add ecx,edx ; ecx = (b+c)
  113. mov [edi+384],al ; output b for column 0
  114. shr eax,16
  115. add ebx,ecx ; ebx = (a+b) + (b+c)
  116. mov [edi+385],al ; output b for column 1
  117. add ebx,00080008h ; round
  118. shr ebx,4
  119. ; row2 + row3
  120. mov eax,[esi+64] ; get c
  121. and ebx,00ff00ffh
  122. add edx,eax ; edx = (b+c)
  123. mov [edi+768],bl ; output c for column 0
  124. add ecx,edx ; ecx = (a+b) + (b+c)
  125. shr ebx,16
  126. add ecx,00080008h ; round
  127. shr ecx,4
  128. mov [edi+769],bl ; output c for column 1
  129. and ecx,00ff00ffh
  130. ; row3 + row4
  131. mov ebx,[esi+80] ; get c
  132. mov [edi+1152],cl ; output c
  133. add eax,ebx ; eax = (b+c)
  134. shr ecx,16
  135. add edx,eax ; edx = (a+b) + (b+c)
  136. mov [edi+1153],cl ; output c
  137. add edx,00080008h ; round
  138. shr edx,4
  139. ; row4 + row5
  140. mov ecx,[esi+96] ; get c
  141. and edx,00ff00ffh
  142. add ebx,ecx ; ebx = (b+c)
  143. mov [edi+1536],dl ; output c
  144. add eax,ebx ; eax = (a+b) + (b+c)
  145. shr edx,16
  146. add eax,00080008h ; round
  147. shr eax,4
  148. mov [edi+1537],dl ; output c
  149. and eax,00ff00ffh
  150. ; row5 + row6
  151. mov edx,[esi+112] ; get c
  152. mov [edi+1920],al ; output c
  153. add ecx,edx ; ecx = (b+c)
  154. shr eax,16
  155. ; row6 + row7
  156. add edx,00020002h ; round result
  157. shr edx,2 ; divide by 4
  158. mov [edi+1921],al ; output c
  159. add ebx,ecx ; ebx = (a+b) + (b+c)
  160. and edx,00ff00ffh ; convert back to 8-bit
  161. add ebx,00080008h ; round
  162. mov [edi+2688],dl ; output c
  163. shr ebx,4
  164. mov ecx,loop_count
  165. shr edx,16
  166. and ebx,00ff00ffh
  167. mov [edi+2304],bl ; output c
  168. mov [edi+2689],dl ; output c
  169. shr ebx,16
  170. add esi,4 ; inc input ptr
  171. mov [edi+2305],bl ; output c
  172. add edi,2
  173. dec ecx
  174. mov loop_count,ecx
  175. jnz row0
  176. ret
  177. EncUVLoopFilter EndP
  178. ENDIF
  179. END