/* ************************************************************************* ** INTEL Corporation Proprietary Information ** ** This listing is supplied under the terms of a license ** agreement with INTEL Corporation and may not be copied ** nor disclosed except in accordance with the terms of ** that agreement. ** ** Copyright (c) 1995 Intel Corporation. ** All Rights Reserved. ** ** ************************************************************************* */ #include "precomp.h" #ifdef H263P // { // // For the P5 versions, the strategy is to compute the Y value for an odd RGB value // followed by computing the Y value for the corresponding even RGB value. The registers // are then set with the proper values to compute U and V values for the even RGB // value. This avoids repeating the shifting and masking needed to extract the Red, // Green and Blue components. // /***************************************************************************** * * H26X_BGR32toYUV12() * * Convert from BGR32 to YUV12 (YCrCb 4:2:0) and copy to destination memory * with pitch defined by the constant PITCH. The input data is stored in * the order B,G,R,B,G,R... * */ #if 0 // { 0 void C_H26X_BGR32toYUV12( LPBITMAPINFOHEADER lpbiInput, WORD OutputWidth, WORD OutputHeight, U8 *lpInput, U8 *YPlane, U8 *UPlane, U8 *VPlane, const int pitch) { int tm1, tm2; int t1, t2, t3, t4; C_RGB_COLOR_CONVERT_INIT // This assignment statement is here simply to avoid a warning message. t = t; for ( j = 0; j < LumaIters; j++) { for (k = 0; k < mark; k++) { for (i = OutputWidth; i > 0; i-=4, YPlane+=4) { tm1 = *pnext++; t1 = (BYUV[(tm1>>1)&0x7F].YU + GYUV[(tm1>>9)&0x7F].YU + RYUV[(tm1>>17)&0x7F].YU); tm = *pnext++; t2 = (BYUV[(tm>>1)&0x7F].YU + GYUV[(tm>>9)&0x7F].YU + RYUV[(tm>>17)&0x7F].YU); tm2 = *pnext++; t3 = (BYUV[(tm2>>1)&0x7F].YU + GYUV[(tm2>>9)&0x7F].YU + RYUV[(tm2>>17)&0x7F].YU); tm = *pnext++; t4 = (BYUV[(tm>>1)&0x7F].YU + GYUV[(tm>>9)&0x7F].YU + RYUV[(tm>>17)&0x7F].YU); *(U32 *)YPlane = (((t1+0x800)>>8)&0xFF) | ((t2+0x800)&0xFF00) | (((t3+0x800)<<8)&0xFF0000) | (((t4+0x800)<<16)&0xFF000000); if (0 == (k&1)) { *(U16 *)UPlane = ((t1+0x40000000)>>24) | (((t2+0x40000000)>>16)&0xFF00); t1 = (BYUV[(tm1>>1)&0x7F].V + GYUV[(tm1>>9)&0x7F].V + RYUV[(tm1>>17)&0x7F].V); t2 = (BYUV[(tm2>>1)&0x7F].V + GYUV[(tm2>>9)&0x7F].V + RYUV[(tm2>>17)&0x7F].V); *(U16 *)VPlane = ((t1+0x4000)>>8) | ((t2+0x4000)&0xFF00); UPlane += 2; VPlane += 2; } } // The next two cases are mutually exclusive. // If there is a width_diff there cannot be a stretch and // if there is a stretch, there cannot be a width_diff. C_WIDTH_FILL if (stretch && (0 == k) && j) { for (i = OutputWidth; i > 0; i -= 8) { tm = ((*pyprev++ & 0xFEFEFEFE) >> 1); tm += ((*pynext++ & 0xFEFEFEFE) >> 1); *pyspace++ = tm; tm = ((*pyprev++ & 0xFEFEFEFE) >> 1); tm += ((*pynext++ & 0xFEFEFEFE) >> 1); *pyspace++ = tm; } } pnext += BackTwoLines; YPlane += byte_ypitch_adj; // Increment after even lines. if(0 == (k&1)) { UPlane += byte_uvpitch_adj; VPlane += byte_uvpitch_adj; } } // end of for k if (stretch) { pyprev = (U32 *)(YPlane - pitch); pyspace = (U32 *)YPlane; pynext = (U32 *)(YPlane += pitch); } } // end of for j // The next two cases are mutually exclusive. // If there is a height_diff there cannot be a stretch and // if there is a stretch, there cannot be a height_diff. C_HEIGHT_FILL if (stretch) { for (i = OutputWidth; i > 0; i -= 4) { *pyspace++ = *pyprev++; } } } // end of C_H26X_BGR32toYUV12() #endif // } 0 __declspec(naked) void P5_H26X_BGR32toYUV12( LPBITMAPINFOHEADER lpbiInput, WORD OutputWidth, WORD OutputHeight, U8 *lpInput, U8 *YPlane, U8 *UPlane, U8 *VPlane, const int pitch) { // Permanent (callee-save) registers - ebx, esi, edi, ebp // Temporary (caller-save) registers - eax, ecx, edx // // Stack frame layout // | pitch | +136 // | VPlane | +132 // | UPlane | +128 // | YPlane | +124 // | lpInput | +120 // | OutputHeight | +116 // | OutputWidth | +112 // | lpbiInput | +108 // ---------------------------- // | return addr | +104 // | saved ebp | +100 // | saved ebx | + 96 // | saved esi | + 92 // | saved edi | + 88 // | output_width | + 84 // | pyprev | + 80 // | pyspace | + 76 // | pynext | + 72 // | puvprev | + 68 // | puvspace | + 64 // | i | + 60 // | j | + 56 // | k | + 52 // | BackTwoLines | + 48 // | widthx16 | + 44 // | heightx16 | + 40 // | width_diff | + 36 // | height_diff | + 32 // | width_adj | + 28 // | height_adj | + 24 // | stretch | + 20 // | aspect | + 16 // | LumaIters | + 12 // | mark | + 8 // | byte_ypitch_adj | + 4 // | byte_uvpitch_adj | + 0 #define LOCALSIZE 88 #define PITCH_PARM 136 #define VPLANE 132 #define UPLANE 128 #define YPLANE 124 #define LP_INPUT 120 #define OUTPUT_HEIGHT_WORD 116 #define OUTPUT_WIDTH_WORD 112 #define LPBI_INPUT 108 #define OUTPUT_WIDTH 84 #define PYPREV 80 #define PYSPACE 76 #define PYNEXT 72 #define PUVPREV 68 #define PUVSPACE 64 #define LOOP_I 60 #define LOOP_J 56 #define LOOP_K 52 #define BACK_TWO_LINES 48 #define WIDTHX16 44 #define HEIGHTX16 40 #define WIDTH_DIFF 36 #define HEIGHT_DIFF 32 #define WIDTH_ADJ 28 #define HEIGHT_ADJ 24 #define STRETCH 20 #define ASPECT 16 #define LUMA_ITERS 12 #define MARK 8 #define BYTE_YPITCH_ADJ 4 #define BYTE_UVPITCH_ADJ 0 _asm { push ebp push ebx push esi push edi sub esp, LOCALSIZE // int width_diff = 0 // int height_diff = 0 // int width_adj = 0 // int height_adj = 0 // int stretch = 0 // int aspect = 0 xor eax, eax mov [esp + WIDTH_DIFF], eax mov [esp + HEIGHT_DIFF], eax mov [esp + WIDTH_ADJ], eax mov [esp + HEIGHT_ADJ], eax mov [esp + STRETCH], eax mov [esp + ASPECT], eax // int LumaIters = 1 inc eax mov [esp + LUMA_ITERS], eax // int mark = OutputHeight // int output_width = OutputWidth // int byte_ypitch_adj = pitch - OutputWidth // int byte_uvpitch_adj = pitch - (OutputWidth >> 1) xor ebx, ebx mov bx, [esp + OUTPUT_HEIGHT_WORD] mov [esp + MARK], ebx mov bx, [esp + OUTPUT_WIDTH_WORD] mov [esp + OUTPUT_WIDTH], ebx mov ecx, [esp + PITCH_PARM] mov edx, ecx sub ecx, ebx mov [esp + BYTE_YPITCH_ADJ], ecx shr ebx, 1 sub edx, ebx mov [esp + BYTE_UVPITCH_ADJ], edx // if (lpbiInput->biHeight > OutputHeight) mov ebx, [esp + LPBI_INPUT] mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight xor edx, edx mov dx, [esp + OUTPUT_HEIGHT_WORD] cmp ecx, edx jle Lno_stretch // for (LumaIters = 0, i = OutputHeight; i > 0; i -= 48) LumaIters += 4 xor ecx, ecx Lrepeat48: lea ecx, [ecx + 4] sub edx, 48 jnz Lrepeat48 mov [esp + LUMA_ITERS], ecx // aspect = LumaIters mov [esp + ASPECT], ecx // width_adj = (lpbiInput->biWidth - OutputWidth) >> 1 // width_adj *= lpbiInput->biBitCount // width_adj >>= 3 mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth mov edx, [esp + OUTPUT_WIDTH] sub ecx, edx shr ecx, 1 xor edx, edx mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount imul ecx, edx shr ecx, 3 mov [esp + WIDTH_ADJ], ecx // height_adj = (lpbiInput->biHeight - (OutputHeight - aspect)) >> 1 mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight xor edx, edx mov dx, [esp + OUTPUT_HEIGHT_WORD] sub ecx, edx add ecx, [esp + ASPECT] shr ecx, 1 mov [esp + HEIGHT_ADJ], ecx // stretch = 1 // mark = 11 mov ecx, 1 mov edx, 11 mov [esp + STRETCH], ecx mov [esp + MARK], edx jmp Lif_done Lno_stretch: // widthx16 = (lpbiInput->biWidth + 0xF) & ~0xF // width_diff = widthx16 - OutputWidth mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth add ecx, 00FH and ecx, 0FFFFFFF0H mov [esp + WIDTHX16], ecx mov edx, [esp + OUTPUT_WIDTH] sub ecx, edx mov [esp + WIDTH_DIFF], ecx // byte_ypitch_adj -= width_diff mov edx, [esp + BYTE_YPITCH_ADJ] sub edx, ecx mov [esp + BYTE_YPITCH_ADJ], edx // byte_uvpitch_adj -= (width_diff >> 1) mov edx, [esp + BYTE_UVPITCH_ADJ] shr ecx, 1 sub edx, ecx mov [esp + BYTE_UVPITCH_ADJ], edx // heightx16 = (lpbiInput->biHeight + 0xF) & ~0xF // height_diff = heightx16 - OutputHeight mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight add ecx, 00FH and ecx, 0FFFFFFF0H mov [esp + HEIGHTX16], ecx xor edx, edx mov dx, [esp + OUTPUT_HEIGHT_WORD] sub ecx, edx mov [esp + HEIGHT_DIFF], ecx Lif_done: // BackTwoLines = -(lpbiInput->biWidth + OutputWidth); // BackTwoLines *= lpbiInput->biBitCount // BackTwoLines >>= 3 mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth mov edx, [esp + OUTPUT_WIDTH] add ecx, edx neg ecx xor edx, edx mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount imul ecx, edx sar ecx, 3 mov [esp + BACK_TWO_LINES], ecx // pnext = (U32 *)(lpInput + // (((lpbiInput->biWidth * lpbiInput->biBitCount) >> 3)) * // ((OutputHeight - aspect - 1) + height_adj)) + // width_adj) // assign (esi, pnext) mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth xor edx, edx mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount imul ecx, edx shr ecx, 3 xor edx, edx mov dx, [esp + OUTPUT_HEIGHT_WORD] sub edx, [esp + ASPECT] dec edx add edx, [esp + HEIGHT_ADJ] imul ecx, edx add ecx, [esp + WIDTH_ADJ] add ecx, [esp + LP_INPUT] mov esi, ecx // assign (edi, YPlane) mov edi, [esp + YPLANE] // for (j = 0; j < LumaIters; j++) xor eax, eax mov [esp + LOOP_J], eax // for (k = 0; k < mark; k++) L4: xor eax, eax mov [esp + LOOP_K], eax // for (i = OutputWidth; i > 0; i -= 4, pnext += 16) L5: mov eax, [esp + OUTPUT_WIDTH] mov [esp + LOOP_I], eax // This jump is here to make sure the following loop starts in the U pipe jmp L6 L6: // --------------------- // | | R1 | G1 | B1 | pnext[0] // --------------------- // | | R2 | G2 | B2 | pnext[1] // --------------------- // | | R3 | G3 | B3 | pnext[2] // --------------------- // | | R4 | G4 | B4 | pnext[3] // --------------------- // t0 = pnext[0] // t1 = pnext[1] // t = ( BYUV[(t1>> 1)&0x7F].YU + // GYUV[(t1>> 9)&0x7F].YU + // RYUV[(t1>>17)&0x7F].YU ) // *(YPlane+1) = ((t>>8)+8) // t = ( BYUV[(t0>> 1)&0x7F].YU + // GYUV[(t0>> 9)&0x7F].YU + // RYUV[(t0>>17)&0x7F].YU ) // *YPlane = ((t>>8)+8) // assign(eax: B2,Y1,Y2,U) // assign(ebx: B1,V) // assign(ecx: G2,G1) // assign(edx: R2,R1) // assign(ebp: B1) // 1 mov ebx, [esi] mov ecx, [esi + 4] // 2 mov eax, ecx mov edx, ecx // 3 shr eax, 1 and ecx, 0xFE00 // 4 shr ecx, 9 and eax, 0x7F // 5 shr edx, 17 nop // 6 mov eax, [BYUV+eax*8].YU and edx, 0x7F // 7 add eax, [GYUV+ecx*8].YU mov ecx, ebx // 8 add eax, [RYUV+edx*8].YU mov edx, ebx // 9 shr ebx, 1 add eax, 0x800 // 10 sar eax, 8 and ecx, 0xFE00 // 11 shr ecx, 9 and ebx, 0x7F // 12 shr edx, 17 mov [edi + 1], al // 13 mov eax, [BYUV+ebx*8].YU and edx, 0x7F // 14 add eax, [GYUV+ecx*8].YU mov ebp, ebx // 15 add eax, [RYUV+edx*8].YU nop // 16 sar eax, 8 mov ebx, [esp + LOOP_K] // 17 add eax, 8 and ebx, 1 // 18 mov [edi], al jnz L9 // At this point, ebp: B1, ecx: G1, edx: R1 // t0 = pnext[0] // *UPlane++ = ((t>>24)+64) // t = ( RYUV[(t0>>17)&0x7F].V + // GYUV[(t0>> 9)&0x7F].V + // BYUV[(t0>> 1)&0x7F].V ) // *VPlane++ = ((t>>8)+64) // 19 mov ebx, [RYUV+edx*8].V mov edx, [esp + UPLANE] // 20 sar eax, 16 add ebx, [GYUV+ecx*8].V // 21 add eax, 64 add ebx, [BYUV+ebp*8].V // 22 mov [edx], al inc edx // 23 mov [esp + UPLANE], edx mov edx, [esp + VPLANE] // 24 sar ebx, 8 inc edx // 25 add ebx, 64 mov [esp + VPLANE], edx // 26 mov [edx - 1], bl nop L9: // --------------------- // | | R1 | G1 | B1 | pnext[0] // --------------------- // | | R2 | G2 | B2 | pnext[1] // --------------------- // | | R3 | G3 | B3 | pnext[2] // --------------------- // | | R4 | G4 | B4 | pnext[3] // --------------------- // t2 = pnext[2] // t3 = pnext[3] // t = ( BYUV[(t3>> 1)&0x7F].YU + // GYUV[(t3>> 9)&0x7F].YU + // RYUV[(t3>>17)&0x7F].YU ) // *(YPlane+3) = ((t>>8)+8) // t = ( BYUV[(t2>> 1)&0x7F].YU + // GYUV[(t2>> 9)&0x7F].YU + // RYUV[(t2>>17)&0x7F].YU ) // *(YPlane+2) = ((t>>8)+8) // YPlane += 4 // assign(eax: B4,Y3,Y4,U) // assign(ebx: R3,V) // assign(ecx: G4,G3) // assign(edx: R4/B3) // assign(ebp: R3) // 27 mov ebx, [esi + 8] mov ecx, [esi + 12] // 28 mov eax, ecx mov edx, ecx // 29 shr eax, 1 and ecx, 0xFE00 // 30 shr ecx, 9 and eax, 0x7F // 31 shr edx, 17 nop // 32 mov eax, [BYUV+eax*8].YU and edx, 0x7F // 33 add eax, [GYUV+ecx*8].YU mov ecx, ebx // 34 add eax, [RYUV+edx*8].YU mov edx, ebx // 35 shr ebx, 1 add eax, 0x800 // 36 sar eax, 8 and ebx, 0x7F // 37 shr ecx, 9 mov [edi + 3], al // 38 shr edx, 17 and ecx, 0x7F // 39 mov eax, [BYUV+ebx*8].YU and edx, 0x7F // 40 add eax, [GYUV+ecx*8].YU mov ebp, ebx // 41 add eax, [RYUV+edx*8].YU nop // 42 sar eax, 8 mov ebx, [esp + LOOP_K] // 43 add eax, 8 and ebx, 1 // 44 mov [edi + 2], al jnz L16 // At this point, ebp: R3, ecx: G3, edx: B3 // t1 = pnext[1] // t2 = pnext[2] // *UPlane++ = ((t>>16)+64) // t = ( RYUV[(t2>> 1)&0x7F].V + // GYUV[t1>>25].V + // BYUV[(t1>>17)&0x7F].V ) // *VPlane++ = ((t>>8)+64) // 45 mov ebx, [RYUV+edx*8].V mov edx, [esp + UPLANE] // 46 sar eax, 16 add ebx, [GYUV+ecx*8].V // 47 add eax, 64 add ebx, [BYUV+ebp*8].V // 48 mov [edx], al inc edx // 49 mov [esp + UPLANE], edx mov edx, [esp + VPLANE] // 50 sar ebx, 8 inc edx // 51 add ebx, 64 mov [esp + VPLANE], edx // 52 mov [edx - 1], bl nop L16: // 53 mov eax, [esp + LOOP_I] lea esi, [esi + 16] // 54 sub eax, 4 lea edi, [edi + 4] // 55 mov [esp + LOOP_I], eax jnz L6 // Assembler version of C_WIDTH_DIFF // if (width_diff) mov eax, [esp + WIDTH_DIFF] mov edx, eax test eax, eax jz Lno_width_diff // tm = (*(YPlane-1)) << 24 // tm |= (tm>>8) | (tm>>16) | (tm>>24) mov bl, [edi - 1] shl ebx, 24 mov ecx, ebx shr ebx, 8 or ecx, ebx shr ebx, 8 or ecx, ebx shr ebx, 8 or ecx, ebx // *(U32 *)YPlane = tm mov [edi], ecx // if ((width_diff-4) > 0) sub eax, 4 jz Lupdate_YPlane // *(U32 *)(YPlane + 4) = tm mov [edi + 4], ecx sub eax, 4 // if ((width_diff-8) > 0) jz Lupdate_YPlane // *(U32 *)(YPlane + 8) = tm mov [edi + 8], ecx Lupdate_YPlane: // YPlane += width_diff lea edi, [edi + edx] ///if (0 == (k&1)) mov eax, [esp + LOOP_K] test eax, 1 jnz Lno_width_diff // t8u = *(UPlane-1) // t8v = *(VPlane-1) // *UPlane++ = t8u // *UPlane++ = t8u // *VPlane++ = t8v // *VPlane++ = t8v mov ebp, edx mov eax, [esp + UPLANE] mov ebx, [esp + VPLANE] mov cl, [eax - 1] mov ch, [ebx - 1] mov [eax], cl mov [eax + 1], cl mov [ebx], ch mov [ebx + 1], ch // if ((width_diff-4) > 0) sub ebp, 4 jz Lupdate_UVPlane // *UPlane++ = t8u // *UPlane++ = t8u // *VPlane++ = t8v // *VPlane++ = t8v mov [eax + 2], cl mov [eax + 3], cl mov [ebx + 2], ch mov [ebx + 3], ch // if ((width_diff-8) > 0) sub ebp, 4 jz Lupdate_UVPlane // *UPlane++ = t8u // *UPlane++ = t8u // *VPlane++ = t8v // *VPlane++ = t8v mov [eax + 4], cl mov [eax + 5], cl mov [ebx + 4], ch mov [ebx + 5], ch Lupdate_UVPlane: shr edx, 1 lea eax, [eax + edx] mov [esp + UPLANE], eax lea ebx, [ebx + edx] mov [esp + VPLANE], ebx Lno_width_diff: // if (stretch && (0 == k) && j) mov eax, [esp + STRETCH] test eax, eax jz L21 mov eax, [esp + LOOP_K] test eax, eax jnz L21 mov eax, [esp + LOOP_J] test eax, eax jz L21 // spill YPlane ptr mov [esp + YPLANE], edi nop // for (i = OutputWidth; i > 0; i -= 8) // assign (ebx, pyprev) // assign (ecx, t) // assign (edx, pynext) // assign (edi, pyspace) // assign (ebp, i) // make sure offsets are such that there are no bank conflicts here mov ebx, [esp + PYPREV] mov edi, [esp + PYSPACE] mov edx, [esp + PYNEXT] mov ebp, [esp + OUTPUT_WIDTH] // t = (*pyprev++ & 0xFEFEFEFE) >> 1 // t += (*pynext++ & 0xFEFEFEFE) >> 1 // *pyspace++ = t // t = (*pyprev++ & 0xFEFEFEFE) >> 1 // t += (*pynext++ & 0xFEFEFEFE) >> 1 // *pyspace++ = t L22: // 1 mov eax, [ebx] lea ebx, [ebx + 4] // 2 mov ecx, [edx] lea edx, [edx + 4] // 3 shr ecx, 1 and eax, 0xFEFEFEFE // 4 shr eax, 1 and ecx, 0x7F7F7F7F // 5 add eax, ecx mov ecx, [ebx] // 6 shr ecx, 1 mov [edi], eax // 7 mov eax, [edx] and ecx, 0x7F7F7F7F // 8 shr eax, 1 lea edi, [edi + 4] // 9 and eax, 0x7F7F7F7F lea ebx, [ebx + 4] // 10 lea edx, [edx + 4] add eax, ecx // 11 mov [edi], eax lea edi, [edi + 4] // 12 sub ebp, 8 jnz L22 // kill (ebx, pyprev) // kill (ecx, t) // kill (edx, pynext) // kill (edi, pyspace) // kill (ebp, i) // restore YPlane mov edi, [esp + YPLANE] // pnext += BackTwoLines L21: add esi, [esp + BACK_TWO_LINES] // YPlane += byte_ypitch_adj; add edi, [esp + BYTE_YPITCH_ADJ] // if(0 == (k&1)) mov eax, [esp + LOOP_K] and eax, 1 jnz L23 // UPlane += byte_uvpitch_adj; // VPlane += byte_uvpitch_adj; mov eax, [esp + BYTE_UVPITCH_ADJ] add [esp + UPLANE], eax add [esp + VPLANE], eax L23: inc DWORD PTR [esp + LOOP_K] mov eax, [esp + LOOP_K] cmp eax, [esp + MARK] jl L5 // if (stretch) cmp DWORD PTR [esp + STRETCH], 0 je L24 // pyprev = YPlane - pitch mov eax, edi sub eax, [esp + PITCH_PARM] mov [esp + PYPREV], eax // pyspace = YPlane mov [esp + PYSPACE], edi // pynext = (YPlane += pitch) add edi, [esp + PITCH_PARM] mov [esp + PYNEXT], edi L24: inc DWORD PTR [esp + LOOP_J] mov eax, [esp + LOOP_J] cmp eax, [esp + LUMA_ITERS] jl L4 // kill (esi, pnext) // kill (edi, YPlane) // ASM version of C_HEIGHT_FILL // if (height_diff) mov eax, [esp + HEIGHT_DIFF] test eax, eax jz Lno_height_diff // pyspace = (U32 *)YPlane mov esi, edi // pyprev = (U32 *)(YPlane - pitch) sub esi, [esp + PITCH_PARM] // for (j = height_diff; j > 0; j--) Lheight_yfill_loop: mov ebx, [esp + WIDTHX16] // for (i = widthx16; i>0; i -=4) Lheight_yfill_row: // *pyspace++ = *pyprev++ mov ecx, [esi] lea esi, [esi + 4] mov [edi], ecx lea edi, [edi + 4] sub ebx, 4 jnz Lheight_yfill_row // pyspace += word_ypitch_adj // pyprev += word_ypitch_adj add esi, [esp + BYTE_YPITCH_ADJ] add edi, [esp + BYTE_YPITCH_ADJ] dec eax jnz Lheight_yfill_loop mov eax, [esp + HEIGHT_DIFF] mov edi, [esp + UPLANE] // puvspace = (U32 *)UPlane mov esi, edi // puvprev = (U32 *)(UPlane - pitch) sub esi, [esp + PITCH_PARM] // for (j = height_diff; j > 0; j -= 2) Lheight_ufill_loop: mov ebx, [esp + WIDTHX16] // for (i = widthx16; i>0; i -= 8) Lheight_ufill_row: // *puvspace++ = *puvprev++ mov ecx, [esi] mov [edi], ecx lea esi, [esi + 4] lea edi, [edi + 4] sub ebx, 8 jnz Lheight_ufill_row // puvspace += word_uvpitch_adj // puvprev += word_uvpitch_adj add esi, [esp + BYTE_UVPITCH_ADJ] add edi, [esp + BYTE_UVPITCH_ADJ] sub eax, 2 jnz Lheight_ufill_loop mov eax, [esp + HEIGHT_DIFF] mov edi, [esp + VPLANE] // puvspace = (U32 *)VPlane mov esi, edi // puvprev = (U32 *)(VPlane - pitch) sub esi, [esp + PITCH_PARM] // for (j = height_diff; j > 0; j -= 2) Lheight_vfill_loop: mov ebx, [esp + WIDTHX16] // for (i = widthx16; i>0; i -= 8) Lheight_vfill_row: // *puvspace++ = *puvprev++ mov ecx, [esi] mov [edi], ecx lea esi, [esi + 4] lea edi, [edi + 4] sub ebx, 8 jnz Lheight_vfill_row // puvspace += word_uvpitch_adj // puvprev += word_uvpitch_adj add esi, [esp + BYTE_UVPITCH_ADJ] add edi, [esp + BYTE_UVPITCH_ADJ] sub eax, 2 jnz Lheight_vfill_loop Lno_height_diff: // if (stretch) mov esi, [esp + PYPREV] cmp DWORD PTR [esp + STRETCH], 0 je L26 // for (i = OutputWidth; i > 0; i -= 4) // assign (esi, pyprev) // assign (edi, pyspace) // assign (ebp, i) mov ebp, [esp + OUTPUT_WIDTH] mov edi, [esp + PYSPACE] L25: mov ecx, [esi] lea esi, [esi + 4] mov [edi], ecx lea edi, [edi + 4] sub ebp, 4 jnz L25 // kill (esi, pyprev) // kill (edi, pyspace) // kill (ebp, i) L26: add esp, LOCALSIZE pop edi pop esi pop ebx pop ebp ret } } #undef LOCALSIZE #undef PITCH_PARM #undef VPLANE #undef UPLANE #undef YPLANE #undef LP_INPUT #undef OUTPUT_HEIGHT_WORD #undef OUTPUT_WIDTH_WORD #undef LPBI_INPUT #undef OUTPUT_WIDTH #undef PYPREV #undef PYSPACE #undef PYNEXT #undef PUVPREV #undef PUVSPACE #undef LOOP_I #undef LOOP_J #undef LOOP_K #undef BACK_TWO_LINES #undef WIDTHX16 #undef HEIGHTX16 #undef WIDTH_DIFF #undef HEIGHT_DIFF #undef WIDTH_ADJ #undef HEIGHT_ADJ #undef STRETCH #undef ASPECT #undef LUMA_ITERS #undef MARK #undef BYTE_YPITCH_ADJ #undef BYTE_UVPITCH_ADJ #endif // } H263P