windows-server-2003/enduser/netmeeting/av/codecs/intel/h263/d3mblk.cpp


								/* *************************************************************************

								**    INTEL Corporation Proprietary Information

								**

								**    This listing is supplied under the terms of a license

								**    agreement with INTEL Corporation and may not be copied

								**    nor disclosed except in accordance with the terms of

								**    that agreement.

								**

								**    Copyright (c) 1995 Intel Corporation.

								**    All Rights Reserved.

								**

								** *************************************************************************

								*/

								// $Author:   JMCVEIGH  $

								// $Date:   21 Jan 1997 08:53:16  $

								// $Archive:   S:\h26x\src\dec\d3mblk.cpv  $

								// $Header:   S:\h26x\src\dec\d3mblk.cpv   1.60   21 Jan 1997 08:53:16   JMCVEIGH  $

								// $Log:   S:\h26x\src\dec\d3mblk.cpv  $

								//

								//    Rev 1.60   21 Jan 1997 08:53:16   JMCVEIGH

								// Before we calculated the interpolated index for MC prior to

								// clipping for UMV. We might then reference outside of the 16 pel

								// wide padded border. Moved calculation of interp_index to after

								// UMV clipping.

								//

								//    Rev 1.59   16 Dec 1996 17:45:26   JMCVEIGH

								// Proper motion vector decoding and prediction for forward prediction

								// in B portion of improved PB-frame.

								//

								//    Rev 1.58   09 Dec 1996 15:54:10   GMLIM

								//

								// Added a debug message in H263BBlockPrediction() for the case where

								// TR == TR_Prev.  Set iTRD = 256 to avoid divide by 0.

								//

								//    Rev 1.57   27 Sep 1996 17:29:24   KLILLEVO

								//

								// added clipping of extended motion vectors for MMX

								//

								//    Rev 1.56   26 Sep 1996 13:56:52   KLILLEVO

								//

								// fixed a totally bogus version of the extended motion vectors

								//

								//    Rev 1.55   26 Sep 1996 11:32:16   KLILLEVO

								// extended motion vectors now work for AP on the P54C chip

								//

								//    Rev 1.54   25 Sep 1996 08:05:32   KLILLEVO

								// initial extended motion vectors support

								// does not work for AP yet

								//

								//    Rev 1.53   09 Jul 1996 16:46:00   AGUPTA2

								// MMX code now clears DC value for INTRA blocks and adds it back during

								// ClipANdMove; this is to solve overflow problem.

								//

								//    Rev 1.52   29 May 1996 10:18:36   AGUPTA2

								// MMX need not be defd to use MMX decoder.

								//

								//    Rev 1.51   04 Apr 1996 11:06:16   AGUPTA2

								// Added calls to MMX_BlockCopy().

								//

								//    Rev 1.50   01 Apr 1996 13:05:28   RMCKENZX

								// Added MMx functionality for Advance Prediction and PB Frames.

								//

								//    Rev 1.49   22 Mar 1996 17:50:30   AGUPTA2

								// MMX support.  MMX support is included only if MMX defined. MMX is

								// not defined by default so that we do not impact IA code size.

								//

								//    Rev 1.48   08 Mar 1996 16:46:22   AGUPTA2

								// Added pragmas code_seg and data_seg to place code and data in appropriate

								// segments.  Created a function table of interpolation rtns.; interpolation

								// rtns. are now called thru this function table.  Commented out the clipping of

								// MV code.  It is not needed now and it needs to be re-written to be more

								// efficient.

								//

								//

								//    Rev 1.47   23 Feb 1996 09:46:54   KLILLEVO

								// fixed decoding of Unrestricted Motion Vector mode

								//

								//    Rev 1.46   29 Jan 1996 17:50:48   RMCKENZX

								// Reorganized logic in H263IDCTandMC for AP, optimizing the changes

								// made for revision 1.42 and simplifying logic for determining iNext[i].

								// Also corrected omission for UMV decoding in H263BBlockPrediction.

								//

								//    Rev 1.0   29 Jan 1996 12:44:00   RMCKENZX

								// Initial revision.

								//

								//    Rev 1.45   24 Jan 1996 13:22:06   BNICKERS

								// Turn OBMC back on.

								//

								//    Rev 1.44   16 Jan 1996 11:46:22   RMCKENZX

								// Added support for UMV -- to correctly decode B-block

								// motion vectors when UMV is on

								//

								//    Rev 1.43   15 Jan 1996 14:34:32   BNICKERS

								//

								// Temporarily turn off OBMC until encoder can be changed to do it too.

								//

								//    Rev 1.42   12 Jan 1996 16:29:48   BNICKERS

								//

								// Correct OBMC to be spec compliant when neighbor is Intra coded.

								//

								//    Rev 1.41   06 Jan 1996 18:36:58   RMCKENZX

								// Simplified rounding logic for chroma motion vector computation

								// using MUCH smaller tables (at the cost of a shift, add, and mask

								// per vector).

								//

								//    Rev 1.40   05 Jan 1996 15:59:12   RMCKENZX

								//

								// fixed bug in decoding forward b-frame motion vectors

								// so that they will stay within the legal ranges.

								// re-organized the BBlockPredict function - using only

								// one test for 4 motion vectors and a unified call to

								// do the backward prediction for both lumina and chroma blocks.

								//

								//    Rev 1.39   21 Dec 1995 17:05:24   TRGARDOS

								// Added comments about descrepancy with H.263 spec.

								//

								//    Rev 1.38   21 Dec 1995 13:24:28   RMCKENZX

								// Fixed bug on pRefL, re-architected IDCTandMC

								//

								//    Rev 1.37   18 Dec 1995 12:46:34   RMCKENZX

								// added copyright notice

								//

								//    Rev 1.36   16 Dec 1995 20:34:04   RHAZRA

								//

								// Changed declaration of pRefX to U32

								//

								//    Rev 1.35   15 Dec 1995 13:53:32   RHAZRA

								//

								// AP cleanup

								//

								//    Rev 1.34   15 Dec 1995 10:51:38   RHAZRA

								//

								// Changed reference block addresses in AP

								//

								//    Rev 1.33   14 Dec 1995 17:04:16   RHAZRA

								//

								// Cleanup in the if-then-else structure in the OBMC part

								//

								//    Rev 1.32   13 Dec 1995 22:11:56   RHAZRA

								// AP cleanup

								//

								//    Rev 1.31   13 Dec 1995 10:59:26   RHAZRA

								// More AP+PB fixes

								//

								//    Rev 1.29   11 Dec 1995 11:33:12   RHAZRA

								// 12-10-95 changes: added AP stuff

								//

								//    Rev 1.28   09 Dec 1995 17:31:22   RMCKENZX

								// Gutted and re-built file to support decoder re-architecture.

								// New modules are:

								// H263IDCTandMC

								// H263BFrameIDCTandBiMC

								// H263BBlockPrediction

								// This module now contains code to support the second pass of the decoder.

								//

								//    Rev 1.27   23 Oct 1995 13:28:42   CZHU

								// Use the right quant for B blocks and call BlockAdd for type 3/4 too

								//

								//    Rev 1.26   17 Oct 1995 17:18:24   CZHU

								// Fixed the bug in decoding PB block CBPC

								//

								//    Rev 1.25   13 Oct 1995 16:06:20   CZHU

								// First version that supports PB frames. Display B or P frames under

								// VfW for now.

								//

								//    Rev 1.24   11 Oct 1995 17:46:28   CZHU

								// Fixed bitstream bugs

								//

								//    Rev 1.23   11 Oct 1995 13:26:00   CZHU

								// Added code to support PB frame

								//

								//    Rev 1.22   27 Sep 1995 16:24:14   TRGARDOS

								//

								// Added debug print statements.

								//

								//    Rev 1.21   26 Sep 1995 15:33:52   CZHU

								//

								// Adjusted buffers used for MB for inter frame motion compensation

								//

								//    Rev 1.20   19 Sep 1995 10:37:04   CZHU

								//

								// Cleaning up

								//

								//    Rev 1.19   15 Sep 1995 09:39:34   CZHU

								//

								// Update both GOB Quant and Picture Quant after DQUANT

								//

								//    Rev 1.18   14 Sep 1995 10:11:48   CZHU

								// Fixed bugs updating Quant for the picture

								//

								//    Rev 1.17   13 Sep 1995 11:57:08   CZHU

								//

								// Fixed bugs in calling Chroma BlockAdd parameters.

								//

								//    Rev 1.16   12 Sep 1995 18:18:40   CZHU

								// Call BlockAdd finally.

								//

								//    Rev 1.15   12 Sep 1995 11:12:38   CZHU

								// Call blockCopy for MB that is not coded.

								//

								//    Rev 1.14   11 Sep 1995 16:43:26   CZHU

								// Changed interface to DecodeBlock. Added interface calls to BlockCopy and Bl

								//

								//    Rev 1.13   11 Sep 1995 14:30:12   CZHU

								// MVs decoding.

								//

								//    Rev 1.12   08 Sep 1995 11:48:12   CZHU

								// Added support for Delta frames, also fixed early bugs regarding INTER CBPY

								//

								//    Rev 1.11   25 Aug 1995 09:16:32   DBRUCKS

								// add ifdef DEBUG_MBLK

								//

								//    Rev 1.10   23 Aug 1995 19:12:02   AKASAI

								// Fixed gNewTAB_CBPY table building.  Was using 8 as mask instead of 0xf.

								//

								//    Rev 1.9   18 Aug 1995 15:03:22   CZHU

								//

								// Output more error message when DecodeBlock returns error.

								//

								//    Rev 1.8   16 Aug 1995 14:26:54   CZHU

								//

								// Changed DWORD adjustment back to byte oriented reading.

								//

								//    Rev 1.7   15 Aug 1995 09:54:18   DBRUCKS

								// improve stuffing handling and add debug msg

								//

								//    Rev 1.6   14 Aug 1995 18:00:40   DBRUCKS

								// add chroma parsing

								//

								//    Rev 1.5   11 Aug 1995 17:47:58   DBRUCKS

								// cleanup

								//

								//    Rev 1.4   11 Aug 1995 16:12:28   DBRUCKS

								// add ptr check to MB data

								//

								//    Rev 1.3   11 Aug 1995 15:10:58   DBRUCKS

								// finish INTRA mb header parsing and callblock

								//

								//    Rev 1.2   03 Aug 1995 14:30:26   CZHU

								// Take block level operations out to d3block.cpp

								//

								//    Rev 1.1   02 Aug 1995 10:21:12   CZHU

								// Added asm codes for VLD of TCOEFF, inverse quantization, run-length decode.

								//

								//    Rev 1.0   31 Jul 1995 13:00:08   DBRUCKS

								// Initial revision.

								//

								//    Rev 1.2   31 Jul 1995 11:45:42   CZHU

								// changed the parameter list

								//

								//    Rev 1.1   28 Jul 1995 16:25:52   CZHU

								//

								// Added per block decoding framework.

								//

								//    Rev 1.0   28 Jul 1995 15:20:16   CZHU

								// Initial revision.


								//Block level decoding for H.26x decoder


								#include "precomp.h"


								extern "C" {

								    void H263BiMotionComp(U32, U32, I32, I32, I32);

								    void H263OBMC(U32, U32, U32, U32, U32, U32);

								}


								#ifdef USE_MMX // { USE_MMX

								extern "C" {

									void MMX_AdvancePredict(T_BlkAction FAR *, int *, U8 *, I8 *, I8 *);

									void MMX_BiMotionComp(U32, U32, I32, I32, I32);

								}

								#endif // } USE_MMX


								void AdvancePredict(T_BlkAction FAR *fpBlockAction, int *iNext, U8 *pDst, int, int, BOOL);


								#pragma data_seg("IARDATA2")

								char QuarterPelRound[] =

								    {0, 1, 0, 0};

								char SixteenthPelRound[] =

								    {0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1};

								void (*Interpolate_Table[4])(U32, U32) =

								    {NULL,

								     Interpolate_Half_Int,

								     Interpolate_Int_Half,

								     Interpolate_Half_Half};

								#ifdef USE_MMX // { USE_MMX

								void (_fastcall *  MMX_Interpolate_Table[4])(U32, U32) =

								    {NULL,

								     MMX_Interpolate_Half_Int,

								     MMX_Interpolate_Int_Half,

								     MMX_Interpolate_Half_Half};

								#endif // } USE_MMX


								I8 i8EMVClipTbl_NoClip[128] = {

								-64,-63,-62,-61,-60,-59,-58,-57,

								-56,-55,-54,-53,-52,-51,-50,-49,

								-48,-47,-46,-45,-44,-43,-42,-41,

								-40,-39,-38,-37,-36,-35,-34,-33,

								-32,-31,-30,-29,-28,-27,-26,-25,

								-24,-23,-22,-21,-20,-19,-18,-17,

								-16,-15,-14,-13,-12,-11,-10, -9,

								 -8, -7, -6, -5, -4, -3, -2, -1,

								  0,  1,  2,  3,  4,  5,  6,  7,

								  8,  9, 10, 11, 12, 13, 14, 15,

								 16, 17, 18, 19, 20, 21, 22, 23,

								 24, 25, 26, 27, 28, 29, 30, 31,

								 32, 33, 34, 35, 36, 37, 38, 39,

								 40, 41, 42, 43, 44, 45, 46, 47,

								 48, 49, 50, 51, 52, 53, 54, 55,

								 56, 57, 58, 59, 60, 61, 62, 63,

								};

								I8 i8EMVClipTbl_HiClip[128] = {

								-64,-63,-62,-61,-60,-59,-58,-57,

								-56,-55,-54,-53,-52,-51,-50,-49,

								-48,-47,-46,-45,-44,-43,-42,-41,

								-40,-39,-38,-37,-36,-35,-34,-33,

								-32,-31,-30,-29,-28,-27,-26,-25,

								-24,-23,-22,-21,-20,-19,-18,-17,

								-16,-15,-14,-13,-12,-11,-10, -9,

								 -8, -7, -6, -5, -4, -3, -2, -1,

								  0,  1,  2,  3,  4,  5,  6,  7,

								  8,  9, 10, 11, 12, 13, 14, 15,

								 16, 17, 18, 19, 20, 21, 22, 23,

								 24, 25, 26, 27, 28, 29, 30, 31,

								 32, 32, 32, 32, 32, 32, 32, 32,

								 32, 32, 32, 32, 32, 32, 32, 32,

								 32, 32, 32, 32, 32, 32, 32, 32,

								 32, 32, 32, 32, 32, 32, 32, 32,

								};

								I8 i8EMVClipTbl_LoClip[128] = {

								-32,-32,-32,-32,-32,-32,-32,-32,

								-32,-32,-32,-32,-32,-32,-32,-32,

								-32,-32,-32,-32,-32,-32,-32,-32,

								-32,-32,-32,-32,-32,-32,-32,-32,

								-32,-31,-30,-29,-28,-27,-26,-25,

								-24,-23,-22,-21,-20,-19,-18,-17,

								-16,-15,-14,-13,-12,-11,-10, -9,

								 -8, -7, -6, -5, -4, -3, -2, -1,

								  0,  1,  2,  3,  4,  5,  6,  7,

								  8,  9, 10, 11, 12, 13, 14, 15,

								 16, 17, 18, 19, 20, 21, 22, 23,

								 24, 25, 26, 27, 28, 29, 30, 31,

								 32, 33, 34, 35, 36, 37, 38, 39,

								 40, 41, 42, 43, 44, 45, 46, 47,

								 48, 49, 50, 51, 52, 53, 54, 55,

								 56, 57, 58, 59, 60, 61, 62, 63,

								};


								#pragma data_seg(".data")


								#pragma code_seg("IACODE2")

								// doing this as a function instead of a macro should save

								// some codespace.

								void UmvOnEdgeClipMotionVectors2(I32 *mvx, I32 *mvy, int EdgeFlag, int BlockNo)

								{

									int MaxVec;


									if (BlockNo < 4)

										MaxVec = 32;

									else

										MaxVec = 16;


									if (EdgeFlag & LEFT_EDGE)

									{

										if (*mvx < -MaxVec)

											*mvx = -MaxVec;

									}

									if (EdgeFlag & RIGHT_EDGE)

									{

										if (*mvx > MaxVec )

											*mvx = MaxVec ;

									}

									if (EdgeFlag & TOP_EDGE)

									{

										if (*mvy < -MaxVec )

											*mvy = -MaxVec ;

									}

									if (EdgeFlag & BOTTOM_EDGE)

									{

										if (*mvy > MaxVec )

											*mvy = MaxVec ;

									}

								}

								#pragma code_seg()


								/*****************************************************************************

								 *

								 *  H263IDCTandMC

								 *

								 *  Inverse Discrete Cosine Transform and

								 *  Motion Compensation for each block

								 *

								 */


								#pragma code_seg("IACODE2")

								void H263IDCTandMC(

								    T_H263DecoderCatalog FAR *DC,

								    T_BlkAction FAR          *fpBlockAction,

								    int                       iBlock,

								    int                       iMBNum,     // AP-NEW

								    int                       iGOBNum, // AP-NEW

								    U32                      *pN,

								    T_IQ_INDEX               *pRUN_INVERSE_Q,

								    T_MBInfo                 *fpMBInfo,      // AP-NEW

								    int                       iEdgeFlag

								)

								{

								    I32 pRef;

								    int iNext[4];            // Left-Right-Above-Below

								    I32 mvx, mvy;

								    U32 pRefTmp;

								    int i;


								    ASSERT(*pN != 65);


								    if (*pN < 65) // Inter block

								    {

										int interp_index;


										// first do motion compensation

										// result will be pointed to by pRef


										pRef = (U32) DC + DC->uMBBuffer;

										mvx = fpBlockAction[iBlock].i8MVx2;

										mvy = fpBlockAction[iBlock].i8MVy2;


										// Clip motion vectors pointing outside active image area

										if (DC->bUnrestrictedMotionVectors)

										{

											UmvOnEdgeClipMotionVectors2(&mvx,&mvy,iEdgeFlag,iBlock);

										}


										pRefTmp = fpBlockAction[iBlock].pRefBlock +

												(I32) (mvx >> 1) +

												PITCH * (I32) (mvy >> 1);


										// Must calculate AFTER UMV clipping

										interp_index = ((mvy & 0x1)<<1) | (mvx & 0x1);


										// Do non-OBMC prediction if this is a chroma block OR

										// a luma block in non-AP mode of operation

										if ( (!DC->bAdvancedPrediction) || (iBlock > 3) )

										{

											if (interp_index)

											{

											//  TODO

								#ifdef USE_MMX // { USE_MMX

											if (DC->bMMXDecoder)

												(*MMX_Interpolate_Table[interp_index])(pRefTmp, pRef);

											else

												(*Interpolate_Table[interp_index])(pRefTmp, pRef);

								#else // }{ USE_MMX

												(*Interpolate_Table[interp_index])(pRefTmp, pRef);

								#endif // } USE_MMX

											}

											else

												pRef = pRefTmp;

										}

										else  // Overlapped block motion compensation

										{


											ASSERT (DC->bAdvancedPrediction);

											ASSERT ( (iBlock <= 3) );


											//  Compute iNext[i] which will point at the adjacent blocks.


											// Left & Right blocks

											if (iBlock & 1)    { // blocks 1 or 3, on right

												iNext[0] = -1;

												if ( iMBNum == DC->iNumberOfMBsPerGOB )

													iNext[1] = 0;

												else

													iNext[1] = 5;

											}

											else { // blocks 0 or 2, on left

												iNext[1] = 1;

												if (iMBNum == 1)

													iNext[0] = 0;

												else

													iNext[0] = -5;

											}


											// Above & Below blocks

											if (iBlock > 1)    { // blocks 2 or 3, on bottom

												iNext[2] = -2;

												iNext[3] = 0;

											}

											else { // blocks 0 or 1, on top

												iNext[3] = 2;

												if (iGOBNum == 1)

													iNext[2] = 0;

												else

													iNext[2] = -6*DC->iNumberOfMBsPerGOB + 2;

											}


											//  When PB frames are OFF

											//    if there is a neighbor and it is INTRA, use this block's vector instead.

											if (!DC->bPBFrame)

												for (i=0; i<4; i++)

													// block types:  0=INTRA_DC, 1=INTRA, 2=INTER, 3=EMPTY, 4=ERROR

													if (iNext[i] && fpBlockAction[iBlock+iNext[i]].u8BlkType < 2)

														iNext[i] = 0;


											// Now do overlapped motion compensation; output to pRef

								#ifdef USE_MMX // { USE_MMX

											if (DC->bMMXDecoder)

											{


												I8 *pClipX, *pClipY;


												pClipY = pClipX = &i8EMVClipTbl_NoClip[0];

												if (DC->bUnrestrictedMotionVectors)

												{

													if (iEdgeFlag & TOP_EDGE)

														pClipY = &i8EMVClipTbl_LoClip[0];

													else if (iEdgeFlag & BOTTOM_EDGE)

														pClipY = &i8EMVClipTbl_HiClip[0];

													if (iEdgeFlag & LEFT_EDGE)

														pClipX = &i8EMVClipTbl_LoClip[0];

													else if (iEdgeFlag & RIGHT_EDGE)

														pClipX = &i8EMVClipTbl_HiClip[0];

												}

												MMX_AdvancePredict(fpBlockAction+iBlock, iNext, (U8*)pRef, pClipX, pClipY);

											}

											else

												AdvancePredict(fpBlockAction+iBlock, iNext, (U8*)pRef, iEdgeFlag, iBlock, DC->bUnrestrictedMotionVectors);

								#else // }{ USE_MMX

												AdvancePredict(fpBlockAction+iBlock, iNext, (U8*)pRef, iEdgeFlag, iBlock, DC->bUnrestrictedMotionVectors);

								#endif // } USE_MMX


										} // end OBMC


										// now do the inverse transform (where appropriate) & combine

										if (*pN > 0) // and, of course, < 65.

										{

										// Get residual block; output at DC+DC->uMBBuffer+BLOCK_BUFFER_OFFSET

										// Finally add the residual to the reference block

										//  TODO

								#ifdef USE_MMX // { USE_MMX

										if (DC->bMMXDecoder)

										{

											MMX_DecodeBlock_IDCT(

												(U32)pRUN_INVERSE_Q,

												*pN,

												(U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET); // inter  output

											MMX_BlockAdd(

												(U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET,  // output

												pRef,                                            // prediction

												fpBlockAction[iBlock].pCurBlock);                // destination

										}

										else

										{

											DecodeBlock_IDCT(

												(U32)pRUN_INVERSE_Q,

												*pN,

												fpBlockAction[iBlock].pCurBlock,                // not used here

												(U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET);// inter  output

											BlockAdd(

												(U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET, // output

												pRef,                                           // prediction

												fpBlockAction[iBlock].pCurBlock);               // destination

										}

								#else // }{ USE_MMX

											DecodeBlock_IDCT(

												(U32)pRUN_INVERSE_Q,

												*pN,

												fpBlockAction[iBlock].pCurBlock,                // not used here

												(U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET);// inter  output

											BlockAdd(

												(U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET, // output

												pRef,                                           // prediction

												fpBlockAction[iBlock].pCurBlock);               // destination

								#endif // } USE_MMX

										}

										else  // *pN == 0, so no transform coefficients for this block

										{

										// Just copy motion compensated reference block

								#ifdef USE_MMX // { USE_MMX

											if (DC->bMMXDecoder)

												MMX_BlockCopy(

													fpBlockAction[iBlock].pCurBlock,                    // destination

													pRef);                                              // prediction

											else

												BlockCopy(

													fpBlockAction[iBlock].pCurBlock,                   // destination

													pRef);                                             // prediction

								#else // }{ USE_MMX

												BlockCopy(

													fpBlockAction[iBlock].pCurBlock,                   // destination

													pRef);                                             // prediction

								#endif // } USE_MMX

										}


								    }

								    else  // *pN >= 65, hence intRA

								    {

								      //  TODO

								#ifdef USE_MMX // { USE_MMX

								      if (DC->bMMXDecoder)

								      {

								        U32 ScaledDC = pRUN_INVERSE_Q->dInverseQuant;


								        pRUN_INVERSE_Q->dInverseQuant = 0;

								        MMX_DecodeBlock_IDCT(

								            (U32)pRUN_INVERSE_Q,  //

								            *pN - 65,             //  No. of coeffs

								            (U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET);

								        MMX_ClipAndMove((U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET,

								            fpBlockAction[iBlock].pCurBlock, (U32)ScaledDC);

								      }

								      else

								        DecodeBlock_IDCT(

								            (U32)pRUN_INVERSE_Q,

								            *pN,

								            fpBlockAction[iBlock].pCurBlock,      // INTRA transform output

								            (U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET);

								#else // }{ USE_MMX

								        DecodeBlock_IDCT(

								            (U32)pRUN_INVERSE_Q,

								            *pN,

								            fpBlockAction[iBlock].pCurBlock,      // INTRA transform output

								            (U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET);

								#endif // } USE_MMX

								    }  // end if (*pN < 65) ... else ...


								}

								//  End IDCTandMC

								////////////////////////////////////////////////////////////////////////////////

								#pragma code_seg()


								/*****************************************************************************

								 *

								 *  AdvancePredict

								 *

								 *  Motion Compensation for Advance Prediction

								 *    This module is only called in the non-MMx case.

								 *    In the MMx case, MMX_AdvancePredict is called instead.

								 *

								 ****************************************************************************/


								#pragma code_seg("IACODE2")

								void AdvancePredict(

								    T_BlkAction FAR          *fpBlockAction,

								    int                      *iNext,

								    U8                       *pDst,

									int                      iEdgeFlag,

									int                      iBlock,

									BOOL                     bUnrestrictedMotionVectors

								)

								{


								    U32 pRefC, pRefN[4];    // Left-Right-Above-Below

								    I32 mvx, mvy;

								    U32 pRefTmp;

								    int i;

									int interp_index;


									mvx = fpBlockAction->i8MVx2;

									mvy = fpBlockAction->i8MVy2;


									// Clip motion vectors pointing outside active image area

									if (bUnrestrictedMotionVectors)

									{

										UmvOnEdgeClipMotionVectors2(&mvx,&mvy,iEdgeFlag,iBlock);

									}


									interp_index = ((mvy & 0x1)<<1) | (mvx & 0x1);


									pRefTmp = fpBlockAction->pRefBlock +

									        (I32) (mvx >> 1) +

									        PITCH * (I32) (mvy >> 1);


									pRefC    = (U32) pDst +  8;

									pRefN[0] = (U32) pDst + 16;

									pRefN[1] = (U32) pDst + 24;

									pRefN[2] = (U32) pDst + 32;

									pRefN[3] = (U32) pDst + 40;


									// Current block

									if (interp_index)

										(*Interpolate_Table[interp_index])(pRefTmp, pRefC);

									else

										pRefC = pRefTmp;


								        //  Compute and apply motion vectors

								        //  Prediction is placed at pRefN[i]

								        for (i=0; i<4; i++) {


											if (iNext[i]) {


												// Get the motion vector components.

												// Note that for macroblocks that were not coded, THESE MUST BE 0!

												// (Which is what H263InitializeBlockActionStream sets them to.)

												mvx = fpBlockAction[iNext[i]].i8MVx2;

												mvy = fpBlockAction[iNext[i]].i8MVy2;


												// Clip motion vectors pointing outside active image area

												if (bUnrestrictedMotionVectors)

												{

													UmvOnEdgeClipMotionVectors2(&mvx,&mvy,iEdgeFlag,iBlock);

												}


									            // apply motion vector to get reference block at pRefN[i]

									            pRefTmp = fpBlockAction->pRefBlock +

									                        (I32) (mvx >> 1) +

									                        PITCH * (I32) (mvy >> 1);


												// do interpolation if needed

												interp_index = ((mvy & 0x1)<<1) | (mvx & 0x1);

												if (interp_index)

													(*Interpolate_Table[interp_index])(pRefTmp, pRefN[i]);

												else

													pRefN[i] = pRefTmp;


											}  // end if (iNext[i])

											else { // use this block's reference

												pRefN[i] = pRefC;

											} // end if (iNext[i] && ...) ... else ...


										}  // end for (i=0; i<4; i++) {}


										// Now do overlapped motion compensation.

										H263OBMC(pRefC, pRefN[0], pRefN[1], pRefN[2], pRefN[3], (U32)pDst);


								}

								//  End AdvancePredict

								////////////////////////////////////////////////////////////////////////////////

								#pragma code_seg()


								/*****************************************************************************

								 *

								 *  BBlockPrediction

								 *

								 *  Compute the predictions from the "forward" and "backward" motion vectors.

								 *

								 ****************************************************************************/

								#pragma code_seg("IACODE2")

								void H263BBlockPrediction(

								    T_H263DecoderCatalog FAR *DC,

								    T_BlkAction FAR          *fpBlockAction,

								    U32                       pRef[],

								    T_MBInfo FAR             *fpMBInfo,

								    int                       iEdgeFlag

								)

								{

								    //find out the MVf and MVb first from TR


								  	I32 mv_f_x[6], mv_b_x[6], mv_f_y[6], mv_b_y[6];

								    I32 mvx_expectation, mvy_expectation;

								    I32 iTRD, iTRB;

								    I32 i;

								    U32 pRefTmp;


								    int mvfx, mvbx, mvfy, mvby;


									FX_ENTRY("H263BBlockPrediction")


								    iTRB = DC->uBFrameTempRef;

								    iTRD = DC->uTempRef - DC->uTempRefPrev;


								    if (!iTRD)

								    {

										DEBUGMSG(ZONE_DECODE_DETAILS, ("%s: Warning: given TR == last TR, set TRD = 256\r\n", _fx_));

								        iTRD = 256;

								    }

								    else

								    if (iTRD < 0)

								        iTRD += 256;


								    // final MVD for P blocks is in

								    //    fpBlockAction[0].i8MVx2,... and fpBlockAction[3].i8MVx2, and

								    //    fpBlockAction[0].i8MVy2,... and fpBlockAction[3].i8MVy2.


								    // check for 4 motion vectors per macroblock

								    //  TODO can motion vector calculation be done in the first pass

								    if (fpMBInfo->i8MBType == 2)

								    {  // yep, we got 4 of 'em


								#ifdef H263P

										// If H.263+, we can have 8x8 MV's if the deblocking filter

										// was selected.

								        ASSERT(DC->bAdvancedPrediction || DC->bDeblockingFilter);

								#else

								        ASSERT(DC->bAdvancedPrediction);

								#endif


								        // Do luma vectors first

								        for (i=0; i<4; i++)

								        {

								#ifdef H263P

											// If we are using improved PB-frame mode (H.263+) and the B-block

											// was signalled to be predicted in the forward direction only,

											// the motion vector contained in MVDB is the actual forward MV -

											// no prediction is used.

											if (DC->bImprovedPBFrames == TRUE &&

												fpMBInfo->bForwardPredOnly == TRUE)

											{

												// Zero-out the expectation (the motion vector prediction)

												mvx_expectation = 0;

												mvy_expectation = 0;

											}

											else

								#endif

											{

												// compute forward expectation

												mvx_expectation = ( iTRB * (I32)fpBlockAction[i].i8MVx2 / iTRD );

												mvy_expectation = ( iTRB * (I32)fpBlockAction[i].i8MVy2 / iTRD );

											}


								            // add in differential

								            mv_f_x[i] = mvx_expectation + fpMBInfo->i8MVDBx2;

								            mv_f_y[i] = mvy_expectation + fpMBInfo->i8MVDBy2;


								            // check to see if the differential carried us too far

								            if (DC->bUnrestrictedMotionVectors)

								            {

								                if (mvx_expectation > 32)

								                {

								                    if (mv_f_x[i] > 63) mv_f_x[i] -=64;

								                }

								                else if (mvx_expectation < -31)

								                {

								                    if (mv_f_x[i] < -63) mv_f_x[i] +=64;

								                } // always use "first column" when expectation lies in [-31, +32]


								                if (mvy_expectation > 32)

								                {

								                    if (mv_f_y[i] > 63) mv_f_y[i] -=64;

								                }

								                else if (mvy_expectation < -31)

								                {

								                    if (mv_f_y[i] < -63) mv_f_y[i] +=64;

								                }

								            }

								            else  // UMV off

								            {

								                if (mv_f_x[i] >= 32) mv_f_x[i] -= 64;

								                else if (mv_f_x[i] < -32) mv_f_x[i] += 64;


								                if (mv_f_y[i] >= 32) mv_f_y[i] -= 64;

								                else if (mv_f_y[i] < -32) mv_f_y[i] += 64;

								            } // end if (UMV) ... else ...


								            // Do backwards motion vectors

											// Backward vectors are not required if using improved PB-frame mode

											// and the B-block uses only forward prediction. We will keep the calculation

											// of mv_b_{x,y} here since it doesn't harm anything.

								            //  TODO

								            if (fpMBInfo->i8MVDBx2)

								                mv_b_x[i] = mv_f_x[i] - fpBlockAction[i].i8MVx2;

								            else

								                mv_b_x[i] = ( (iTRB - iTRD) * (I32)fpBlockAction[i].i8MVx2 / iTRD );

								            if (fpMBInfo->i8MVDBy2)

								                mv_b_y[i] = mv_f_y[i] - fpBlockAction[i].i8MVy2;

								            else

								                mv_b_y[i] = ( (iTRB - iTRD) * (I32)fpBlockAction[i].i8MVy2 / iTRD );


								        }  // end for(i=0; i<4; i++){}


								        // Now do the chromas

								        //   first get average times 4

								        for (i=0, mvfx=mvbx=mvfy=mvby=0; i<4; i++)

								        {

								            mvfx += mv_f_x[i];

								            mvfy += mv_f_y[i];

								            mvbx += mv_b_x[i];

								            mvby += mv_b_y[i];

								        }


								        //   now interpolate

								        mv_f_x[4] = mv_f_x[5] = (mvfx >> 3) + SixteenthPelRound[mvfx & 0x0f];

								        mv_f_y[4] = mv_f_y[5] = (mvfy >> 3) + SixteenthPelRound[mvfy & 0x0f];

								        mv_b_x[4] = mv_b_x[5] = (mvbx >> 3) + SixteenthPelRound[mvbx & 0x0f];

								        mv_b_y[4] = mv_b_y[5] = (mvby >> 3) + SixteenthPelRound[mvby & 0x0f];


								    }

								    else  // only 1 motion vector for this macroblock

								    {


								#ifdef H263P

										// If we are using improved PB-frame mode (H.263+) and the B-block

										// was signalled to be predicted in the forward direction only,

										// the motion vector contained in MVDB is the actual forward MV -

										// no prediction is used.

										if (DC->bImprovedPBFrames == TRUE &&

											fpMBInfo->bForwardPredOnly == TRUE)

										{

											// Zero-out the expectation (the motion vector prediction)

											mvx_expectation = 0;

											mvy_expectation = 0;

										}

										else

								#endif

										{

											// compute forward expectation

											mvx_expectation = ( iTRB * (I32)fpBlockAction[0].i8MVx2 / iTRD );

											mvy_expectation = ( iTRB * (I32)fpBlockAction[0].i8MVy2 / iTRD );

										}


								        // add in differential

								        mv_f_x[0] = mvx_expectation + fpMBInfo->i8MVDBx2;

								        mv_f_y[0] = mvy_expectation + fpMBInfo->i8MVDBy2;


								        // check to see if the differential carried us too far

								        // TODO: Clipping of motion vector needs to happen when decoder needs

								        //       to interoperate

								        if (DC->bUnrestrictedMotionVectors)

								        {

								            if (mvx_expectation > 32)

								            {

								                if (mv_f_x[0] > 63) mv_f_x[0] -=64;

								            }

								            else if (mvx_expectation < -31)

								            {

								                if (mv_f_x[0] < -63) mv_f_x[0] +=64;

								            } // always use "first column" when expectation lies in [-31, +32]


								            if (mvy_expectation > 32)

								            {

								                if (mv_f_y[0] > 63) mv_f_y[0] -=64;

								            }

								            else if (mvy_expectation < -31)

								            {

								                if (mv_f_y[0] < -63) mv_f_y[0] +=64;

								            }

								        }

								        else // UMV off, decode normally

								        {

								            if (mv_f_x[0] >= 32) mv_f_x[0] -= 64;

								            else if (mv_f_x[0] < -32) mv_f_x[0] += 64;


								            if (mv_f_y[0] >= 32) mv_f_y[0] -= 64;

								            else if (mv_f_y[0] < -32) mv_f_y[0] += 64;

								        } // finished decoding


								        // copy for other 3 motion vectors

								        mv_f_x[1] = mv_f_x[2] = mv_f_x[3] = mv_f_x[0];

								        mv_f_y[1] = mv_f_y[2] = mv_f_y[3] = mv_f_y[0];


								        // do backwards motion vectors

										// Backward vectors are not required if using improved PB-frame mode

										// and the B-block uses only forward prediction. We will keep the calculation

										// of mv_b_{x,y} here since it doesn't harm anything.

								        // TODO

								        if (fpMBInfo->i8MVDBx2)

								            mv_b_x[0] = mv_f_x[0] - fpBlockAction[0].i8MVx2;

								        else

								            mv_b_x[0] = ( (iTRB - iTRD) * (I32)fpBlockAction[0].i8MVx2 / iTRD );


								        if (fpMBInfo->i8MVDBy2)

								            mv_b_y[0] = mv_f_y[0] - fpBlockAction[0].i8MVy2;

								        else

								            mv_b_y[0] = ( (iTRB - iTRD) * (I32)fpBlockAction[0].i8MVy2 / iTRD );


								        // copy for other 3 motion vectors

								        mv_b_x[1] = mv_b_x[2] = mv_b_x[3] = mv_b_x[0];

								        mv_b_y[1] = mv_b_y[2] = mv_b_y[3] = mv_b_y[0];


								        // interpolate for chroma

								        mv_f_x[4] = mv_f_x[5] = (mv_f_x[0] >> 1) + QuarterPelRound[mv_f_x[0] & 0x03];

								        mv_f_y[4] = mv_f_y[5] = (mv_f_y[0] >> 1) + QuarterPelRound[mv_f_y[0] & 0x03];

								        mv_b_x[4] = mv_b_x[5] = (mv_b_x[0] >> 1) + QuarterPelRound[mv_b_x[0] & 0x03];

								        mv_b_y[4] = mv_b_y[5] = (mv_b_y[0] >> 1) + QuarterPelRound[mv_b_y[0] & 0x03];


								    }  // end else 1 motion vector per macroblock


								    // Prediction from Previous decoder P frames, referenced by RefBlock

								    // Note: The previous decoder P blocks in in RefBlock, and

								    //       the just decoder P blocks are in CurBlock

								    //       the target B blocks are in BBlock


								    // translate MV into address of reference blocks.

								    pRefTmp = (U32) DC + DC->uMBBuffer;

								    for (i=0; i<6; i++)

								    {

								        pRef[i] =  pRefTmp;

								        pRefTmp += 8;

								    }


								    // Do the forward predictions

								    for (i=0; i<6; i++)

								    {

								        int interp_index;


										// in UMV mode: clip MVs pointing outside 16 pels wide edge

										if (DC->bUnrestrictedMotionVectors)

										{

											UmvOnEdgeClipMotionVectors2(&mv_f_x[i],&mv_f_y[i], iEdgeFlag, i);

											// no need to clip backward vectors

										}


								        // Put forward predictions at addresses pRef[0], ..., pRef[5].

								        pRefTmp = fpBlockAction[i].pRefBlock + (I32)(mv_f_x[i]>>1) +

								                  PITCH * (I32)(mv_f_y[i]>>1);

								        // TODO

								        interp_index = ((mv_f_y[i] & 0x1)<<1) | (mv_f_x[i] & 0x1);

								        if (interp_index)

								        {

								#ifdef USE_MMX // { USE_MMX

								            if (DC->bMMXDecoder)

								                (*MMX_Interpolate_Table[interp_index])(pRefTmp, pRef[i]);

								            else

								                (*Interpolate_Table[interp_index])(pRefTmp, pRef[i]);

								#else // }{ USE_MMX

								                (*Interpolate_Table[interp_index])(pRefTmp, pRef[i]);

								#endif // } USE_MMX

								        }

								        else

								        {

								#ifdef USE_MMX // { USE_MMX

								            if (DC->bMMXDecoder)

								                MMX_BlockCopy(

								                    pRef[i],     // destination

								                    pRefTmp);    // prediction

								            else

								                BlockCopy(pRef[i], pRefTmp);

								#else // }{ USE_MMX

								                BlockCopy(pRef[i], pRefTmp);

								#endif // } USE_MMX

								        }


								#ifdef H263P

										// If we are using improved PB-frame mode (H.263+) and the B-block

										// was signalled to be predicted in the forward direction only,

										// we do not adjust with the backward prediction from the future.

										if (DC->bImprovedPBFrames == FALSE ||

											fpMBInfo->bForwardPredOnly == FALSE)

								#endif

										{

								#ifdef USE_MMX // { USE_MMX

								        if (DC->bMMXDecoder)

								    	    // adjust with bacward prediction from the future

								    	    MMX_BiMotionComp(

								                pRef[i],

								                fpBlockAction[i].pCurBlock,

								                (I32) mv_b_x[i],

								                (I32) mv_b_y[i],

								                i);

								        else

								    	    // adjust with bacward prediction from the future

								    	H263BiMotionComp(

								            pRef[i],

								            fpBlockAction[i].pCurBlock,

								            (I32) mv_b_x[i],

								            (I32) mv_b_y[i],

								            i);

								#else // }{ USE_MMX

								    	    // adjust with bacward prediction from the future

								    	H263BiMotionComp(

								            pRef[i],

								            fpBlockAction[i].pCurBlock,

								            (I32) mv_b_x[i],

								            (I32) mv_b_y[i],

								            i);

								#endif // } USE_MMX

										}


								    } // end for (i=0; i<6; i++) {}

								}

								#pragma code_seg()


								/*****************************************************************************

								 *

								 *  H263BFrameIDCTandBiMC

								 *

								 *  B Frame IDCT and

								 *  Bi-directional MC for B blocks

								 */


								#pragma code_seg("IACODE2")

								void H263BFrameIDCTandBiMC(

								    T_H263DecoderCatalog FAR *DC,

								    T_BlkAction FAR          *fpBlockAction,

								    int                       iBlock,

								    U32                      *pN,

								    T_IQ_INDEX               *pRUN_INVERSE_Q,

								    U32                      *pRef

								)

								{

								    ASSERT(*pN < 65);


								    // do the inverse transform (where appropriate) & combine

								    if (*pN > 0) {


								#ifdef USE_MMX // { USE_MMX

								        if (DC->bMMXDecoder)

								        {

								            MMX_DecodeBlock_IDCT(

								                (U32)pRUN_INVERSE_Q,

								                *pN,

								                (U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET); // inter  output


								            MMX_BlockAdd(

								                (U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET,  // output

								                pRef[iBlock],                                    // prediction

								                fpBlockAction[iBlock].pBBlock);                  // destination

								        }

								        else

								        {

									      	// Get residual block; put output at DC+DC->uMBBuffer+BLOCK_BUFFER_OFFSET

											DecodeBlock_IDCT(

									            (U32)pRUN_INVERSE_Q,

									            *pN,

									            fpBlockAction[iBlock].pBBlock,                   // intRA not used here

									            (U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET); // inter output


									        // Add the residual to the reference block

											BlockAdd(

									            (U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET,  // transform output

									            pRef[iBlock],                                    // prediction

									            fpBlockAction[iBlock].pBBlock);                  // destination


								        }

								#else // }{ USE_MMX

									      	// Get residual block; put output at DC+DC->uMBBuffer+BLOCK_BUFFER_OFFSET

											DecodeBlock_IDCT(

									            (U32)pRUN_INVERSE_Q,

									            *pN,

									            fpBlockAction[iBlock].pBBlock,                   // intRA not used here

									            (U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET); // inter output


									        // Add the residual to the reference block

											BlockAdd(

									            (U32) DC + DC->uMBBuffer + BLOCK_BUFFER_OFFSET,  // transform output

									            pRef[iBlock],                                    // prediction

									            fpBlockAction[iBlock].pBBlock);                  // destination

								#endif // } USE_MMX


								    }

								    else

								    {

								      	// No transform coefficients for this block,

								      	// copy the prediction to the output.

								#ifdef USE_MMX // { USE_MMX

								      	if (DC->bMMXDecoder)

								            MMX_BlockCopy(

								          		fpBlockAction[iBlock].pBBlock,   // destination

								          		pRef[iBlock]);                   // prediction

								      	else

								      	  	BlockCopy(

								 		  		fpBlockAction[iBlock].pBBlock,   // destination

								            	pRef[iBlock]);                   // prediction

								#else // }{ USE_MMX

								      	  	BlockCopy(

								 		  		fpBlockAction[iBlock].pBBlock,   // destination

								            	pRef[iBlock]);                   // prediction

								#endif // } USE_MMX

								    }

								}

								#pragma code_seg()