Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

198 lines
5.2 KiB

  1. ;*************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995, 1996 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;*************************************************************************
  13. .486
  14. .Model FLAT, C
  15. APP_32BIT equ 1
  16. ;.CODE
  17. IACODE2 SEGMENT PARA USE32 PUBLIC 'CODE'
  18. IACODE2 ENDS
  19. IACODE2 SEGMENT
  20. LoopFilter PROC C PUBLIC USES esi edi ebx ebp in8x8:DWORD, out8x8:DWORD, pitch:DWORD
  21. LOCAL filt_temp[32]:DWORD, loop_count:DWORD
  22. ; **************************************************
  23. ; output pitch is hard coded to 8 for uFilterBBuffer
  24. ; input pitch is 384 (as passed parameter)
  25. ; **************************************************
  26. mov esi,in8x8
  27. ; mov edi,out8x8 ; for debug
  28. lea edi,filt_temp ; use temporary storage
  29. mov loop_count,8
  30. xor eax,eax
  31. ; filter 8x8 block horizontally
  32. ; input is 8-bit, output is 16-bit temporary storage
  33. ALIGN 4
  34. do_row:
  35. ; pixel 0
  36. mov al,byte ptr [esi] ; get p0, eax = a
  37. xor ebx,ebx
  38. mov edx,eax ; copy pixel 0
  39. xor ecx,ecx
  40. shl edx,2 ; a<<2
  41. ; pixel 0 + pixel 1
  42. mov bl,byte ptr [esi+1] ; get p1, ebx = b
  43. mov [edi],dx ; output p0 = a<<2
  44. add eax,ebx ; eax = (a+b)
  45. mov cl,byte ptr [esi+2] ; get p2, ecx = c
  46. ; pixel 1 + pixel 2
  47. xor edx,edx
  48. add ebx,ecx ; ebx = (b+c)
  49. mov dl,byte ptr [esi+3] ; get p3, edx = c
  50. add eax,ebx ; eax = (a+b) + (b+c)
  51. add ecx,edx ; ecx = (b+c)
  52. mov [edi+2],ax ; output p1 = (a+b) + (b+c)
  53. add ebx,ecx ; ebx = (a+b) + (b+c)
  54. ; pixel 2 + pixel 3
  55. mov [edi+4],bx ; output p2 = (a+b) + (b+c)
  56. xor eax,eax
  57. mov al,byte ptr [esi+4] ; get p4, eax = c
  58. ; pixel 3 + pixel 4
  59. xor ebx,ebx
  60. add edx,eax ; edx = (b+c)
  61. mov bl,byte ptr [esi+5] ; get p5, ebx = c
  62. add ecx,edx ; ecx = (a+b) + (b+c)
  63. add eax,ebx ; eax = (b+c)
  64. mov [edi+6],cx ; output p3 = (a+b) + (b+c)
  65. add edx,eax ; edx = (a+b) + (b+c)
  66. ; pixel 4 + pixel 5
  67. mov [edi+8],dx ; output p4 = (a+b) + (b+c)
  68. xor ecx,ecx
  69. mov cl,byte ptr [esi+6] ; get p6, ecx = c
  70. ; pixel 5 + pixel 6
  71. xor edx,edx
  72. add ebx,ecx ; ebx = (a+b)
  73. mov dl,byte ptr [esi+7] ; get p7, edx = c
  74. add eax,ebx ; eax = (a+b) + (b+c)
  75. add ecx,edx ; ecx = (b+c)
  76. shl edx,2 ; p7<<2
  77. add ebx,ecx ; ebx = (a+b) + (b+c)
  78. mov [edi+10],ax ; output p5 = (a+b) + (b+c)
  79. ; pixel 6 + pixel 7
  80. xor eax,eax ; for next iteration
  81. mov [edi+12],bx ; output p6 = (a+b) + (b+c)
  82. mov ecx,loop_count
  83. mov [edi+14],dx ; output p7 = c<<2
  84. mov ebx,pitch
  85. add edi,16
  86. add esi,ebx ; inc input ptr
  87. dec ecx
  88. mov loop_count,ecx
  89. jnz do_row
  90. ; filter 8x8 block vertically
  91. ; input is 16-bit from temporary storage, output is 8-bit
  92. lea esi,filt_temp
  93. mov edi,out8x8
  94. mov loop_count,4 ; loop counter
  95. ;
  96. ;
  97. ALIGN 4
  98. row0:
  99. mov eax,[esi] ; eax = a
  100. ; row0 + row1
  101. mov ebx,[esi+16] ; get b
  102. mov edx,eax ; copy a
  103. add eax,ebx ; eax = (a+b)
  104. add edx,00020002h ; round result
  105. mov ecx,[esi+32] ; get c
  106. shr edx,2 ; divide by 4
  107. add ebx,ecx ; ebx = (b+c)
  108. and edx,00ff00ffh ; convert back to 8-bit
  109. add eax,ebx ; eax = (a+b) + (b+c)
  110. mov [edi],dl ; output a for column 0
  111. add eax,00080008h ; round
  112. shr edx,16
  113. shr eax,4
  114. mov [edi+1],dl ; output a for column 1
  115. ; row1 + row2
  116. mov edx,[esi+48] ; get c
  117. and eax,00ff00ffh
  118. add ecx,edx ; ecx = (b+c)
  119. mov [edi+8],al ; output b for column 0
  120. shr eax,16
  121. add ebx,ecx ; ebx = (a+b) + (b+c)
  122. mov [edi+9],al ; output b for column 1
  123. add ebx,00080008h ; round
  124. shr ebx,4
  125. ; row2 + row3
  126. mov eax,[esi+64] ; get c
  127. and ebx,00ff00ffh
  128. add edx,eax ; edx = (b+c)
  129. mov [edi+16],bl ; output c for column 0
  130. add ecx,edx ; ecx = (a+b) + (b+c)
  131. shr ebx,16
  132. add ecx,00080008h ; round
  133. shr ecx,4
  134. mov [edi+17],bl ; output c for column 1
  135. and ecx,00ff00ffh
  136. ; row3 + row4
  137. mov ebx,[esi+80] ; get c
  138. mov [edi+24],cl ; output c
  139. add eax,ebx ; eax = (b+c)
  140. shr ecx,16
  141. add edx,eax ; edx = (a+b) + (b+c)
  142. mov [edi+25],cl ; output c
  143. add edx,00080008h ; round
  144. shr edx,4
  145. ; row4 + row5
  146. mov ecx,[esi+96] ; get c
  147. and edx,00ff00ffh
  148. add ebx,ecx ; ebx = (b+c)
  149. mov [edi+32],dl ; output c
  150. add eax,ebx ; eax = (a+b) + (b+c)
  151. shr edx,16
  152. add eax,00080008h ; round
  153. shr eax,4
  154. mov [edi+33],dl ; output c
  155. and eax,00ff00ffh
  156. ; row5 + row6
  157. mov edx,[esi+112] ; get c
  158. mov [edi+40],al ; output c
  159. add ecx,edx ; ecx = (b+c)
  160. shr eax,16
  161. ; row6 + row7
  162. add edx,00020002h ; round result
  163. shr edx,2 ; divide by 4
  164. mov [edi+41],al ; output c
  165. add ebx,ecx ; ebx = (a+b) + (b+c)
  166. and edx,00ff00ffh ; convert back to 8-bit
  167. add ebx,00080008h ; round
  168. mov [edi+56],dl ; output c
  169. shr ebx,4
  170. mov ecx,loop_count
  171. shr edx,16
  172. and ebx,00ff00ffh
  173. mov [edi+48],bl ; output c
  174. mov [edi+57],dl ; output c
  175. shr ebx,16
  176. add esi,4 ; inc input ptr
  177. mov [edi+49],bl ; output c
  178. add edi,2
  179. dec ecx
  180. mov loop_count,ecx
  181. jnz row0
  182. ret
  183. LoopFilter EndP
  184. IACODE2 ENDS
  185. END