Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

257 lines
8.7 KiB

  1. ;/* *************************************************************************
  2. ;** INTEL Corporation Proprietary Information
  3. ;**
  4. ;** This listing is supplied under the terms of a license
  5. ;** agreement with INTEL Corporation and may not be copied
  6. ;** nor disclosed except in accordance with the terms of
  7. ;** that agreement.
  8. ;**
  9. ;** Copyright (c) 1995 Intel Corporation.
  10. ;** All Rights Reserved.
  11. ;**
  12. ;** $Header: R:\h26x\h26x\src\enc\exmqrle.asv 1.11 18 Oct 1996 16:57:20 BNICKERS $
  13. ;**
  14. ;** $Log: R:\h26x\h26x\src\enc\exmqrle.asv $
  15. ;//
  16. ;// Rev 1.11 18 Oct 1996 16:57:20 BNICKERS
  17. ;// Fixes for EMV
  18. ;//
  19. ;// Rev 1.10 10 Oct 1996 16:42:32 BNICKERS
  20. ;// Initial debugging of Extended Motion Vectors.
  21. ;//
  22. ;// Rev 1.9 22 Jul 1996 15:23:28 BNICKERS
  23. ;// Reduce code size. Implement H261 spatial filter.
  24. ;//
  25. ;// Rev 1.8 02 May 1996 12:00:50 BNICKERS
  26. ;// Initial integration of B Frame ME, MMX version.
  27. ;//
  28. ;// Rev 1.7 10 Apr 1996 13:14:16 BNICKERS
  29. ;// No change.
  30. ;//
  31. ;// Rev 1.6 26 Mar 1996 12:00:26 BNICKERS
  32. ;// Did some tuning for MMx encode.
  33. ;//
  34. ;// Rev 1.5 20 Mar 1996 15:26:58 KLILLEVO
  35. ;// changed quantization to match IA quantization
  36. ;//
  37. ;// Rev 1.4 15 Mar 1996 15:52:06 BECHOLS
  38. ;//
  39. ;// Completed Monolithic - Brian
  40. ;//
  41. ;// Rev 1.3 27 Feb 1996 08:28:04 KLILLEVO
  42. ;// now saves ebx in order not to crash in release build
  43. ;//
  44. ;// Rev 1.2 22 Feb 1996 18:38:38 BECHOLS
  45. ;//
  46. ;// Rescaled the quantization constants, Intra DC scaling, and accounted
  47. ;// for the inter bias that frame differencing performs.
  48. ;//
  49. ;// Rev 1.1 25 Jan 1996 08:20:32 BECHOLS
  50. ;// Changed the zigzag path to match the output of the MMx Forward DCT.
  51. ;//
  52. ;// Rev 1.0 17 Oct 1995 13:35:10 AGUPTA2
  53. ;// Initial revision.
  54. ;** *************************************************************************
  55. ;*/
  56. ;/* MMXQuantRLE This function performs quantization on a block of coefficients
  57. ;** and produces (run,level,sign) triples. (These triples are VLC by
  58. ;** another routine.) 'run' is unsigned byte integer, 'level' is
  59. ;** unsigned byte integer, and 'sign' is a signed byte integer with
  60. ;** a value of either 0 or -1. Since 'level' is an unsigned byte
  61. ;** integer, it needs to be clamped, to the right range for AC coeff,
  62. ;** outside this routine.
  63. ;** Arguments:
  64. ;** CoeffStr: Starting Address of coefficient stream; each coeff is a
  65. ;** signed 16-bit value and stored in an 8X8 matrix
  66. ;** CodeStr: Starting address of code stream; i.e. starting address for code
  67. ;** stream triples
  68. ;** QP: Quantizer value 1..31
  69. ;** IntraFlag:Odd for INTRA and even for INTER
  70. ;** Returns:
  71. ;** Ending code stream address
  72. ;** Dependencies:
  73. ;** Clamping of 'level' must be done by the caller.
  74. ;*/
  75. .xlist
  76. include e3inst.inc
  77. include memmodel.inc
  78. include iammx.inc
  79. include exEDTQ.inc
  80. include e3mbad.inc
  81. .list
  82. .CODE EDTQ
  83. PUBLIC MMxQuantRLE
  84. StackOffset TEXTEQU <8>
  85. CONST_384 TEXTEQU <ebp>
  86. MMxQuantRLE:
  87. lea esi, Coeffs+128
  88. mov edi, CodeStreamCursor
  89. mov bl, StashBlockType
  90. mov eax, -128
  91. cmp bl, INTRA
  92. mov ebx, Coeffs+C00
  93. pxor mm6, mm6 ; clear mm6
  94. je @f
  95. movdt mm6,QPDiv2 ; load mm6 with 4 copies of QP/2
  96. @@:
  97. ; Register usage:
  98. ; esi -- base addr of coefficients; the order expected is the same as produced
  99. ; by Fast DCT
  100. ; edi -- RLE stream cursor
  101. ; edx -- Reserved. MacroBlockActionStream cursor, perturbed by block offsets.
  102. ; eax -- Loop induction variable.
  103. ; ebx -- DC
  104. ; ebp -- Reserved. PITCH
  105. ; mm7 -- Reciprocal of quantization level.
  106. movdt mm7, Recip2QPToUse
  107. punpckldq mm6, mm6
  108. movq mm0, C00[esi+eax] ;00A Load 4 coeffs
  109. punpckldq mm7, mm7 ; 4 words of Recip2QP
  110. movq mm2, C04[esi+eax] ;04A
  111. movq mm1, mm0 ;00B Copy
  112. psraw mm0, 15 ;00C Extract sign
  113. movq mm3, mm2 ;04B
  114. QuantCoeffs:
  115. psraw mm3, 15 ;04C
  116. pxor mm1, mm0 ;00D 1's complement
  117. pxor mm2, mm3 ;04D
  118. psubsw mm1, mm0 ;00E Absolute value
  119. psubsw mm2, mm3 ;04E
  120. psubusw mm1, mm6 ;00S Subtract QP/2 in case of inter
  121. pmulhw mm1, mm7 ;00F Quantize
  122. psubusw mm2, mm6 ;04S
  123. movq mm4, C10[esi+eax] ;10A
  124. pmulhw mm2, mm7 ;04F
  125. movq mm5, mm4 ;10B
  126. packsswb mm0, mm3 ;0*A Sign for 8 coeffs
  127. movq mm3, C14[esi+eax] ;14A
  128. psraw mm4, 15 ;10C
  129. packsswb mm1, mm2 ;0*C Quantized 8 coeffs
  130. movq mm2, mm3 ;14B
  131. psraw mm2, 15 ;14C
  132. pxor mm5, mm4 ;10D
  133. pxor mm3, mm2 ;14D
  134. psubsw mm5, mm4 ;10E
  135. psubsw mm3, mm2 ;14E
  136. psubusw mm5, mm6 ;10S
  137. pmulhw mm5, mm7 ;10F
  138. psubusw mm3, mm6 ;14S
  139. movq C04[esi+eax], mm0 ;0*B Save sign
  140. pmulhw mm3, mm7 ;14F
  141. movq mm0, C20[esi+eax] ;20A
  142. packsswb mm4, mm2 ;1*A
  143. movq C00[esi+eax], mm1 ;0*D Save quantized 8 coeffs
  144. movq mm1, mm0 ;20B
  145. movq C14[esi+eax], mm4 ;1*B
  146. packsswb mm5, mm3 ;1*C
  147. movq mm2, C24[esi+eax] ;24A
  148. psraw mm0, 15 ;20C
  149. movq C10[esi+eax], mm5 ;1*D
  150. movq mm3, mm2 ;24B
  151. add eax,32
  152. jne QuantCoeffs
  153. pcmpeqb mm7,mm7
  154. mov cl, StashBlockType
  155. cmp cl, INTRA ;
  156. mov cl, [edi] ; Get output line into cache.
  157. mov cl, [edi+32] ; Get output line into cache.
  158. jne RunValSignINTER00
  159. RunValSignINTRAC00:
  160. mov ecx, ebx
  161. sub esi, 128
  162. shl ecx, 16
  163. mov PB [edi], 0H ; Run-length
  164. shr ecx, 20 ; 8-bit unsigned INTRA-DC value
  165. jnz @f
  166. mov cl, 1
  167. @@:
  168. mov [edi+1], cl ; DC
  169. xor ecx, ecx
  170. mov [edi+2], al ; sign of DC
  171. xor ebx, ebx
  172. mov bl, [esi+Q01]
  173. mov cl, Q01 ; Index to Zigzag table.
  174. add edi, 3
  175. jmp QuantizeFirstACCoeff
  176. RunValSignINTER00:
  177. xor ecx, ecx ; Index to Zigzag table.
  178. xor ebx, ebx
  179. mov bl, [esi+Q00-128]
  180. sub esi, 128
  181. QuantizeFirstACCoeff:
  182. xor al, al ; Zero run counter
  183. QuantizeNextCoeff:
  184. mov [edi+1], bl ; Store quantized value.
  185. add bl,255 ; CF == 1 iff did not quantize to zero.
  186. sbb bl,bl ; bl == 0xFF iff did not quant to zero.
  187. mov ah, [esi+ecx+8] ; Fetch sign.
  188. mov [edi],al ; Store zero run counter.
  189. or al,bl ; Zero cnt == -1 iff did not quant to zero.
  190. inc al ; Increment zero count.
  191. mov cl,NextZigZagCoeff[ecx]
  192. and bl,3 ; bl == 3 iff did not quant to 0, else 0.
  193. mov [edi+2],ah ; Store sign.
  194. add edi,ebx ; Inc output ptr iff did not quant to zero.
  195. mov bl,[esi+ecx] ; Fetch next quantized coeff.
  196. test cl,cl ; More coeffs to do?
  197. jne QuantizeNextCoeff
  198. QuantDone:
  199. mov ebx,CodeStreamCursor
  200. mov al,StashBlockType
  201. sub ebx,edi
  202. je ReturnBlockEmptyFlag
  203. mov ah,[edi-3]
  204. cmp ah,16
  205. jl @f
  206. mov ah,[edi-2]
  207. cmp ah,1
  208. jne @f
  209. sub edi,3
  210. jmp QuantDone
  211. @@:
  212. add ebx,3
  213. xor al,INTRA
  214. or al,bl
  215. je ReturnBlockEmptyFlag
  216. mov ebx,-1 ; Set to -1
  217. mov [edi],bl
  218. add edi,3
  219. ReturnBlockEmptyFlag:
  220. mov CodeStreamCursor, edi
  221. pcmpeqb mm6,mm6
  222. inc ebx ; 0 if block not empty; 1 if block empty.
  223. paddb mm6,mm6
  224. ret
  225. END