/* ************************************************************************* ** INTEL Corporation Proprietary Information ** ** This listing is supplied under the terms of a license ** agreement with INTEL Corporation and may not be copied ** nor disclosed except in accordance with the terms of ** that agreement. ** ** Copyright (c) 1995 Intel Corporation. ** All Rights Reserved. ** ** ************************************************************************* */ #include "precomp.h" #if defined(H263P) || defined(USE_BILINEAR_MSH26X) // { // // For the P5 versions, the strategy is to compute the Y value for an odd RGB value // followed by computing the Y value for the corresponding even RGB value. The registers // are then set with the proper values to compute U and V values for the even RGB // value. This avoids repeating the shifting and masking needed to extract the Red, // Green and Blue components. // // Only the 555 version of RGB16 input color conversion is provided. To generate // other versions, use the following table. // // number shift mask // B, G, R // ------ ----------- ---------------- // 555 2, 3, 8 0x7C, 0x7C, 0x7C // 664 3, 3, 9 0x78, 0x7E, 0x7E // 565 2, 4, 9 0x7C, 0x7E, 0x7C // 655 2, 3, 9 0x7C, 0x7C, 0x7E // // Only 555 falls under BI_RGB. The others are specified using the // BI_BITFIELDS compression specification. For BI_BITFIELDS, call // Build16bitModeID to get the actual bitfield number. This routine requires the // three array elements in the bmiColors field of a BITMAPINFO object. // /***************************************************************************** * * H26X_BGR16555toYUV12() * * Convert from BGR24 to YUV12 (YCrCb 4:2:0) and copy to destination memory * with pitch defined by the constant PITCH. The input data is stored in * the order B,G,R,B,G,R... * */ #if 0 // { 0 void C_H26X_BGR16555toYUV12( LPBITMAPINFOHEADER lpbiInput, WORD OutputWidth, WORD OutputHeight, U8 *lpInput, U8 *YPlane, U8 *UPlane, U8 *VPlane, const int pitch) { int t1, t2; int tm1, tm2; C_RGB_COLOR_CONVERT_INIT for ( j = 0; j < LumaIters; j++) { for (k = 0; k < mark; k++) { for (i = OutputWidth; i > 0; i-=4, YPlane+=4) { tm1 = *pnext++; t1 = (BYUV[(tm1<<2)&0x7C].YU + GYUV[(tm1>>3)&0x7C].YU + RYUV[(tm1>>8)&0x7C].YU); *(YPlane) = (U8)((t1>>SHIFT_WIDTH)+8); t = (BYUV[(tm1>>14)&0x7C].YU + GYUV[(tm1>>19)&0x7C].YU + RYUV[(tm1>>24)&0x7C].YU); *(YPlane+1) = (U8)((t>>SHIFT_WIDTH)+8); tm2 = *pnext++; t2 = (BYUV[(tm2<<2)&0x7C].YU + GYUV[(tm2>>3)&0x7C].YU + RYUV[(tm2>>8)&0x7C].YU); *(YPlane+2) = (U8)((t2>>SHIFT_WIDTH)+8); t = (BYUV[(tm2>>14)&0x7C].YU + GYUV[(tm2>>19)&0x7C].YU + RYUV[(tm2>>24)&0x7C].YU); *(YPlane+3) = (U8)((t>>SHIFT_WIDTH)+8); if (0 == (k&1)) { *(U16 *)UPlane = ((t1+0x40000000)>>24) | (((t2+0x40000000)>>16)&0xFF00); t1 = (RYUV[(tm1>>8)&0x7C].V + GYUV[(tm1>>3)&0x7C].V + BYUV[(tm1<<2)&0x7C].V); t2 = (RYUV[(tm2>>8)&0x7C].V + GYUV[(tm2>>3)&0x7C].V + BYUV[(tm2<<2)&0x7C].V); *(U16 *)VPlane = ((t1+0x4000)>>8) | ((t2+0x4000)&0xFF00); UPlane += 2; VPlane += 2; } } // The next two cases are mutually exclusive. // If there is a width_diff there cannot be a stretch and // if there is a stretch, there cannot be a width_diff. C_WIDTH_FILL if (stretch && (0 == k) && j) { for (i = OutputWidth; i > 0; i -= 8) { tm = ((*pyprev++ & 0xFEFEFEFE) >> 1); tm += ((*pynext++ & 0xFEFEFEFE) >> 1); *pyspace++ = tm; tm = ((*pyprev++ & 0xFEFEFEFE) >> 1); tm += ((*pynext++ & 0xFEFEFEFE) >> 1); *pyspace++ = tm; } } pnext += BackTwoLines; YPlane += byte_ypitch_adj; // Increment after even lines. if(0 == (k&1)) { UPlane += byte_uvpitch_adj; VPlane += byte_uvpitch_adj; } } // end of for k if (stretch) { pyprev = (U32 *)(YPlane - pitch); pyspace = (U32 *)YPlane; pynext = (U32 *)(YPlane += pitch); } } // end of for j // The next two cases are mutually exclusive. // If there is a height_diff there cannot be a stretch and // if there is a stretch, there cannot be a height_diff. C_HEIGHT_FILL if (stretch) { for (i = OutputWidth; i > 0; i -= 4) { *pyspace++ = *pyprev++; } } } // end of C_H26X_BGR55516toYUV12 #endif // } 0 __declspec(naked) void P5_H26X_BGR16555toYUV12( LPBITMAPINFOHEADER lpbiInput, WORD OutputWidth, WORD OutputHeight, U8 *lpInput, U8 *YPlane, U8 *UPlane, U8 *VPlane, const int pitch) { // Permanent (callee-save) registers - ebx, esi, edi, ebp // Temporary (caller-save) registers - eax, ecx, edx // // Stack frame layout // | pitch | +136 // | VPlane | +132 // | UPlane | +128 // | YPlane | +124 // | lpInput | +120 // | OutputHeight | +116 // | OutputWidth | +112 // | lpbiInput | +108 // ---------------------------- // | return addr | +104 // | saved ebp | +100 // | saved ebx | + 96 // | saved esi | + 92 // | saved edi | + 88 // | output_width | + 84 // | pyprev | + 80 // | pyspace | + 76 // | pynext | + 72 // | puvprev | + 68 // | puvspace | + 64 // | i | + 60 // | j | + 56 // | k | + 52 // | BackTwoLines | + 48 // | widthx16 | + 44 // | heightx16 | + 40 // | width_diff | + 36 // | height_diff | + 32 // | width_adj | + 28 // | height_adj | + 24 // | stretch | + 20 // | aspect | + 16 // | LumaIters | + 12 // | mark | + 8 // | byte_ypitch_adj | + 4 // | byte_uvpitch_adj | + 0 #define LOCALSIZE 88 #define PITCH_PARM 136 #define VPLANE 132 #define UPLANE 128 #define YPLANE 124 #define LP_INPUT 120 #define OUTPUT_HEIGHT_WORD 116 #define OUTPUT_WIDTH_WORD 112 #define LPBI_INPUT 108 #define OUTPUT_WIDTH 84 #define PYPREV 80 #define PYSPACE 76 #define PYNEXT 72 #define PUVPREV 68 #define PUVSPACE 64 #define LOOP_I 60 #define LOOP_J 56 #define LOOP_K 52 #define BACK_TWO_LINES 48 #define WIDTHX16 44 #define HEIGHTX16 40 #define WIDTH_DIFF 36 #define HEIGHT_DIFF 32 #define WIDTH_ADJ 28 #define HEIGHT_ADJ 24 #define STRETCH 20 #define ASPECT 16 #define LUMA_ITERS 12 #define MARK 8 #define BYTE_YPITCH_ADJ 4 #define BYTE_UVPITCH_ADJ 0 _asm { push ebp push ebx push esi push edi sub esp, LOCALSIZE // int width_diff = 0 // int height_diff = 0 // int width_adj = 0 // int height_adj = 0 // int stretch = 0 // int aspect = 0 xor eax, eax mov [esp + WIDTH_DIFF], eax mov [esp + HEIGHT_DIFF], eax mov [esp + WIDTH_ADJ], eax mov [esp + HEIGHT_ADJ], eax mov [esp + STRETCH], eax mov [esp + ASPECT], eax // int LumaIters = 1 inc eax mov [esp + LUMA_ITERS], eax // int mark = OutputHeight // int output_width = OutputWidth // int byte_ypitch_adj = pitch - OutputWidth // int byte_uvpitch_adj = pitch - (OutputWidth >> 1) xor ebx, ebx mov bx, [esp + OUTPUT_HEIGHT_WORD] mov [esp + MARK], ebx mov bx, [esp + OUTPUT_WIDTH_WORD] mov [esp + OUTPUT_WIDTH], ebx mov ecx, [esp + PITCH_PARM] mov edx, ecx sub ecx, ebx mov [esp + BYTE_YPITCH_ADJ], ecx shr ebx, 1 sub edx, ebx mov [esp + BYTE_UVPITCH_ADJ], edx // if (lpbiInput->biHeight > OutputHeight) mov ebx, [esp + LPBI_INPUT] mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight xor edx, edx mov dx, [esp + OUTPUT_HEIGHT_WORD] cmp ecx, edx jle Lno_stretch // for (LumaIters = 0, i = OutputHeight; i > 0; i -= 48) LumaIters += 4 xor ecx, ecx Lrepeat48: lea ecx, [ecx + 4] sub edx, 48 jnz Lrepeat48 mov [esp + LUMA_ITERS], ecx // aspect = LumaIters mov [esp + ASPECT], ecx // width_adj = (lpbiInput->biWidth - OutputWidth) >> 1 // width_adj *= lpbiInput->biBitCount // width_adj >>= 3 mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth mov edx, [esp + OUTPUT_WIDTH] sub ecx, edx shr ecx, 1 xor edx, edx mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount imul ecx, edx shr ecx, 3 mov [esp + WIDTH_ADJ], ecx // height_adj = (lpbiInput->biHeight - (OutputHeight - aspect)) >> 1 mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight xor edx, edx mov dx, [esp + OUTPUT_HEIGHT_WORD] sub ecx, edx add ecx, [esp + ASPECT] shr ecx, 1 mov [esp + HEIGHT_ADJ], ecx // stretch = 1 // mark = 11 mov ecx, 1 mov edx, 11 mov [esp + STRETCH], ecx mov [esp + MARK], edx jmp Lif_done Lno_stretch: // widthx16 = (lpbiInput->biWidth + 0xF) & ~0xF // width_diff = widthx16 - OutputWidth mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth add ecx, 00FH and ecx, 0FFFFFFF0H mov [esp + WIDTHX16], ecx mov edx, [esp + OUTPUT_WIDTH] sub ecx, edx mov [esp + WIDTH_DIFF], ecx // byte_ypitch_adj -= width_diff mov edx, [esp + BYTE_YPITCH_ADJ] sub edx, ecx mov [esp + BYTE_YPITCH_ADJ], edx // byte_uvpitch_adj -= (width_diff >> 1) mov edx, [esp + BYTE_UVPITCH_ADJ] shr ecx, 1 sub edx, ecx mov [esp + BYTE_UVPITCH_ADJ], edx // heightx16 = (lpbiInput->biHeight + 0xF) & ~0xF // height_diff = heightx16 - OutputHeight mov ecx, (LPBITMAPINFOHEADER)[ebx].biHeight add ecx, 00FH and ecx, 0FFFFFFF0H mov [esp + HEIGHTX16], ecx xor edx, edx mov dx, [esp + OUTPUT_HEIGHT_WORD] sub ecx, edx mov [esp + HEIGHT_DIFF], ecx Lif_done: // BackTwoLines = -(lpbiInput->biWidth + OutputWidth); // BackTwoLines *= lpbiInput->biBitCount // BackTwoLines >>= 3 mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth mov edx, [esp + OUTPUT_WIDTH] add ecx, edx neg ecx xor edx, edx mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount imul ecx, edx sar ecx, 3 mov [esp + BACK_TWO_LINES], ecx // pnext = (U32 *)(lpInput + // (((lpbiInput->biWidth * lpbiInput->biBitCount) >> 3)) * // ((OutputHeight - aspect - 1) + height_adj)) + // width_adj) // assign (esi, pnext) mov ecx, (LPBITMAPINFOHEADER)[ebx].biWidth xor edx, edx mov dx, (LPBITMAPINFOHEADER)[ebx].biBitCount imul ecx, edx shr ecx, 3 xor edx, edx mov dx, [esp + OUTPUT_HEIGHT_WORD] sub edx, [esp + ASPECT] dec edx add edx, [esp + HEIGHT_ADJ] imul ecx, edx add ecx, [esp + WIDTH_ADJ] add ecx, [esp + LP_INPUT] mov esi, ecx // assign (edi, YPlane) mov edi, [esp + YPLANE] // for (j = 0; j < LumaIters; j++) xor eax, eax mov [esp + LOOP_J], eax // for (k = 0; k < mark; k++) L4: xor eax, eax mov [esp + LOOP_K], eax // for (i = OutputWidth; i > 0; i -= 2, pnext += 4) L5: mov eax, [esp + OUTPUT_WIDTH] // This jump is here to make sure the following loop starts on the U pipe jmp L6 L6: // tm1 = pnext[0] // t = ( BYUV[(tm1>>14)&0x7C].YU + // GYUV[(tm1>>19)&0x7C].YU + // RYUV[(tm1>>24)&0x7C].YU ) // *(YPlane+1) = (U8)((t>>8)+8) // t1 = ( BYUV[(tm1<< 2)&0x7C].YU + // GYUV[(tm1>> 8)&0x7C].YU + // RYUV[(tm1>>13)&0x7C].YU ) // *YPlane = (U8)((t1>>8)+8) // assign(eax: B2/Y1/Y2/U) // assign(ebx: B1/V) // assign(ecx: G2/G1) // assign(edx: R2/R1) // assign(ebp: B1) // 1 mov ebx, [esi] mov [esp + LOOP_I], eax // 2 mov eax, ebx mov ecx, ebx // 3 shr eax, 14 mov edx, ebx // 4 shr ecx, 19 and eax, 0x7C // 5 shr edx, 24 and ecx, 0x7C // 6 mov eax, [BYUV+eax*8].YU and edx, 0x7C // 7 add eax, [GYUV+ecx*8].YU mov ecx, ebx // 8 add eax, [RYUV+edx*8].YU mov edx, ebx // 9 sar eax, 8 and ebx, 0x1F // 10 shl ebx, 2 add eax, 8 // 11 shr ecx, 3 mov [edi + 1], al // 12 shr edx, 8 and ecx, 0x7C // 13 mov eax, [BYUV+ebx*8].YU and edx, 0x7C // 14 add eax, [GYUV+ecx*8].YU mov ebp, ebx // 15 add eax, [RYUV+edx*8].YU lea edi, [edi + 4] // 16 sar eax, 8 mov ebx, [esp + LOOP_K] // 17 add eax, 8 and ebx, 1 // 18 mov [edi - 4], al jnz L9a // At this point, ebp: B1, ecx: G1, edx: R1 // *UPlane++ = (U8)((t1>>24)+64) // t = ( VBGR[(t>>13)&0x7C].VR + // VBGR[(t>> 8)&0x7C].VG + // VBGR[(t<< 2)&0x7C].VB ) // *VPlane++ = (U8)((t>>8)+64) // 19 mov ebx, [RYUV+edx*8].V mov edx, [esp + UPLANE] // 20 sar eax, 16 add ebx, [GYUV+ecx*8].V // 21 add eax, 64 add ebx, [BYUV+ebp*8].V // 22 mov [edx], al inc edx // 23 mov [esp + UPLANE], edx mov edx, [esp + VPLANE] // 24 sar ebx, 8 inc edx // 25 add ebx, 64 mov [esp + VPLANE], edx // 26 mov [edx - 1], bl nop L9a: // tm2 = pnext[1] // t = ( BYUV[(tm2>>14)&0x7C].YU + // GYUV[(tm2>>19)&0x7C].YU + // RYUV[(tm2>>24)&0x7C].YU ) // *(YPlane+1) = (U8)((t>>8)+8) // t2 = ( BYUV[(tm2<< 2)&0x7C].YU + // GYUV[(tm2>> 8)&0x7C].YU + // RYUV[(tm2>>13)&0x7C].YU ) // *YPlane = (U8)((t2>>8)+8) // YPlane += 4 // assign(eax: B2/Y1/Y2/U) // assign(ebx: B1/V) // assign(ecx: G2/G1) // assign(edx: R2/R1) // assign(ebp: B1) // 27 mov eax, [esi + 4] lea esi, [esi + 8] // 28 mov ebx, eax mov ecx, eax // 29 shr eax, 14 mov edx, ebx // 30 shr ecx, 19 and eax, 0x7C // 31 shr edx, 24 and ecx, 0x7C // 32 mov eax, [BYUV+eax*8].YU and edx, 0x7C // 33 add eax, [GYUV+ecx*8].YU mov ecx, ebx // 34 add eax, [RYUV+edx*8].YU mov edx, ebx // 35 sar eax, 8 and ebx, 0x1F // 36 shl ebx, 2 add eax, 8 // 37 shr ecx, 3 mov [edi - 1], al // 38 shr edx, 8 and ecx, 0x7C // 39 mov eax, [BYUV+ebx*8].YU and edx, 0x7C // 40 add eax, [GYUV+ecx*8].YU mov ebp, ebx // 41 add eax, [RYUV+edx*8].YU nop // 42 sar eax, 8 mov ebx, [esp + LOOP_K] // 43 add eax, 8 and ebx, 1 // 44 mov [edi - 2], al jnz L9 // At this point, ebp: B1, ecx: G1, edx: R1 // *UPlane++ = (U8)((t2>>24)+64) // t = ( VBGR[(t>>13)&0x7C].VR + // VBGR[(t>> 8)&0x7C].VG + // VBGR[(t<< 2)&0x7C].VB ) // *VPlane++ = (U8)((t>>8)+64) // 45 mov ebx, [RYUV+edx*8].V mov edx, [esp + UPLANE] // 46 sar eax, 16 add ebx, [GYUV+ecx*8].V // 47 add eax, 64 add ebx, [BYUV+ebp*8].V // 48 mov [edx], al inc edx // 49 mov [esp + UPLANE], edx mov edx, [esp + VPLANE] // 50 sar ebx, 8 inc edx // 51 add ebx, 64 mov [esp + VPLANE], edx // 52 mov [edx - 1], bl nop L9: // 53 mov eax, [esp + LOOP_I] nop // 54 sub eax, 4 jnz L6 // Assembler version of C_WIDTH_DIFF // if (width_diff) mov eax, [esp + WIDTH_DIFF] mov edx, eax test eax, eax jz Lno_width_diff // tm = (*(YPlane-1)) << 24 // tm |= (tm>>8) | (tm>>16) | (tm>>24) mov bl, [edi - 1] shl ebx, 24 mov ecx, ebx shr ebx, 8 or ecx, ebx shr ebx, 8 or ecx, ebx shr ebx, 8 or ecx, ebx // *(U32 *)YPlane = tm mov [edi], ecx // if ((width_diff-4) > 0) sub eax, 4 jz Lupdate_YPlane // *(U32 *)(YPlane + 4) = tm mov [edi + 4], ecx sub eax, 4 // if ((width_diff-8) > 0) jz Lupdate_YPlane // *(U32 *)(YPlane + 8) = tm mov [edi + 8], ecx Lupdate_YPlane: // YPlane += width_diff lea edi, [edi + edx] ///if (0 == (k&1)) mov eax, [esp + LOOP_K] test eax, 1 jnz Lno_width_diff // t8u = *(UPlane-1) // t8v = *(VPlane-1) // *UPlane++ = t8u // *UPlane++ = t8u // *VPlane++ = t8v // *VPlane++ = t8v mov ebp, edx mov eax, [esp + UPLANE] mov ebx, [esp + VPLANE] mov cl, [eax - 1] mov ch, [ebx - 1] mov [eax], cl mov [eax + 1], cl mov [ebx], ch mov [ebx + 1], ch // if ((width_diff-4) > 0) sub ebp, 4 jz Lupdate_UVPlane // *UPlane++ = t8u // *UPlane++ = t8u // *VPlane++ = t8v // *VPlane++ = t8v mov [eax + 2], cl mov [eax + 3], cl mov [ebx + 2], ch mov [ebx + 3], ch // if ((width_diff-8) > 0) sub ebp, 4 jz Lupdate_UVPlane // *UPlane++ = t8u // *UPlane++ = t8u // *VPlane++ = t8v // *VPlane++ = t8v mov [eax + 4], cl mov [eax + 5], cl mov [ebx + 4], ch mov [ebx + 5], ch Lupdate_UVPlane: shr edx, 1 lea eax, [eax + edx] mov [esp + UPLANE], eax lea ebx, [ebx + edx] mov [esp + VPLANE], ebx Lno_width_diff: // if (stretch && (0 == k) && j) mov eax, [esp + STRETCH] test eax, eax jz L14 mov eax, [esp + LOOP_K] test eax, eax jnz L14 mov eax, [esp + LOOP_J] test eax, eax jz L14 // spill YPlane ptr mov [esp + YPLANE], edi nop // for (i = OutputWidth; i > 0; i -= 8) // assign (ebx, pyprev) // assign (ecx, t) // assign (edx, pynext) // assign (edi, pyspace) // assign (ebp, i) // make sure offsets are such that there are no bank conflicts here mov ebx, [esp + PYPREV] mov edi, [esp + PYSPACE] mov edx, [esp + PYNEXT] mov ebp, [esp + OUTPUT_WIDTH] // t = (*pyprev++ & 0xFEFEFEFE) >> 1 // t += (*pynext++ & 0xFEFEFEFE) >> 1 // *pyspace++ = t // t = (*pyprev++ & 0xFEFEFEFE) >> 1 // t += (*pynext++ & 0xFEFEFEFE) >> 1 // *pyspace++ = t L15: // 1 mov eax, [ebx] lea ebx, [ebx + 4] // 2 mov ecx, [edx] lea edx, [edx + 4] // 3 shr ecx, 1 and eax, 0xFEFEFEFE // 4 shr eax, 1 and ecx, 0x7F7F7F7F // 5 add eax, ecx mov ecx, [ebx] // 6 shr ecx, 1 mov [edi], eax // 7 mov eax, [edx] and ecx, 0x7F7F7F7F // 8 shr eax, 1 lea edi, [edi + 4] // 9 and eax, 0x7F7F7F7F lea ebx, [ebx + 4] // 10 lea edx, [edx + 4] add eax, ecx // 11 mov [edi], eax lea edi, [edi + 4] // 12 sub ebp, 8 jnz L15 // kill (ebx, pyprev) // kill (ecx, t) // kill (edx, pynext) // kill (edi, pyspace) // kill (ebp, i) // restore YPlane mov edi, [esp + YPLANE] // pnext += BackTwoLines L14: add esi, [esp + BACK_TWO_LINES] // YPlane += byte_ypitch_adj; add edi, [esp + BYTE_YPITCH_ADJ] // if(0 == (k&1)) mov eax, [esp + LOOP_K] and eax, 1 jnz L16 // UPlane += byte_uvpitch_adj; // VPlane += byte_uvpitch_adj; mov eax, [esp + BYTE_UVPITCH_ADJ] add [esp + UPLANE], eax add [esp + VPLANE], eax L16: inc DWORD PTR [esp + LOOP_K] mov eax, [esp + LOOP_K] cmp eax, [esp + MARK] jl L5 // if (stretch) cmp DWORD PTR [esp + STRETCH], 0 je L17 // pyprev = YPlane - pitch mov eax, edi sub eax, [esp + PITCH_PARM] mov [esp + PYPREV], eax // pyspace = YPlane mov [esp + PYSPACE], edi // pynext = (YPlane += pitch) add edi, [esp + PITCH_PARM] mov [esp + PYNEXT], edi L17: inc DWORD PTR [esp + LOOP_J] mov eax, [esp + LOOP_J] cmp eax, [esp + LUMA_ITERS] jl L4 // kill (esi, pnext) // kill (edi, YPlane) // ASM version of C_HEIGHT_FILL // if (height_diff) mov eax, [esp + HEIGHT_DIFF] test eax, eax jz Lno_height_diff // pyspace = (U32 *)YPlane mov esi, edi // pyprev = (U32 *)(YPlane - pitch) sub esi, [esp + PITCH_PARM] // for (j = height_diff; j > 0; j--) Lheight_yfill_loop: mov ebx, [esp + WIDTHX16] // for (i = widthx16; i>0; i -=4) Lheight_yfill_row: // *pyspace++ = *pyprev++ mov ecx, [esi] lea esi, [esi + 4] mov [edi], ecx lea edi, [edi + 4] sub ebx, 4 jnz Lheight_yfill_row // pyspace += word_ypitch_adj // pyprev += word_ypitch_adj add esi, [esp + BYTE_YPITCH_ADJ] add edi, [esp + BYTE_YPITCH_ADJ] dec eax jnz Lheight_yfill_loop mov eax, [esp + HEIGHT_DIFF] mov edi, [esp + UPLANE] // puvspace = (U32 *)UPlane mov esi, edi // puvprev = (U32 *)(UPlane - pitch) sub esi, [esp + PITCH_PARM] // for (j = height_diff; j > 0; j -= 2) Lheight_ufill_loop: mov ebx, [esp + WIDTHX16] // for (i = widthx16; i>0; i -= 8) Lheight_ufill_row: // *puvspace++ = *puvprev++ mov ecx, [esi] mov [edi], ecx lea esi, [esi + 4] lea edi, [edi + 4] sub ebx, 8 jnz Lheight_ufill_row // puvspace += word_uvpitch_adj // puvprev += word_uvpitch_adj add esi, [esp + BYTE_UVPITCH_ADJ] add edi, [esp + BYTE_UVPITCH_ADJ] sub eax, 2 jnz Lheight_ufill_loop mov eax, [esp + HEIGHT_DIFF] mov edi, [esp + VPLANE] // puvspace = (U32 *)VPlane mov esi, edi // puvprev = (U32 *)(VPlane - pitch) sub esi, [esp + PITCH_PARM] // for (j = height_diff; j > 0; j -= 2) Lheight_vfill_loop: mov ebx, [esp + WIDTHX16] // for (i = widthx16; i>0; i -= 8) Lheight_vfill_row: // *puvspace++ = *puvprev++ mov ecx, [esi] mov [edi], ecx lea esi, [esi + 4] lea edi, [edi + 4] sub ebx, 8 jnz Lheight_vfill_row // puvspace += word_uvpitch_adj // puvprev += word_uvpitch_adj add esi, [esp + BYTE_UVPITCH_ADJ] add edi, [esp + BYTE_UVPITCH_ADJ] sub eax, 2 jnz Lheight_vfill_loop Lno_height_diff: // if (stretch) mov esi, [esp + PYPREV] cmp DWORD PTR [esp + STRETCH], 0 je L19 // for (i = OutputWidth; i > 0; i -= 4) // assign (esi, pyprev) // assign (edi, pyspace) // assign (ebp, i) mov ebp, [esp + OUTPUT_WIDTH] mov edi, [esp + PYSPACE] L18: mov ecx, [esi] lea esi, [esi + 4] mov [edi], ecx lea edi, [edi + 4] sub ebp, 4 jnz L18 // kill (esi, pyprev) // kill (edi, pyspace) // kill (ebp, i) L19: add esp, LOCALSIZE pop edi pop esi pop ebx pop ebp ret } } #undef LOCALSIZE #undef PITCH_PARM #undef VPLANE #undef UPLANE #undef YPLANE #undef LP_INPUT #undef OUTPUT_HEIGHT_WORD #undef OUTPUT_WIDTH_WORD #undef LPBI_INPUT #undef OUTPUT_WIDTH #undef PYPREV #undef PYSPACE #undef PYNEXT #undef PUVPREV #undef PUVSPACE #undef LOOP_I #undef LOOP_J #undef LOOP_K #undef BACK_TWO_LINES #undef WIDTHX16 #undef HEIGHTX16 #undef WIDTH_DIFF #undef HEIGHT_DIFF #undef WIDTH_ADJ #undef HEIGHT_ADJ #undef STRETCH #undef ASPECT #undef LUMA_ITERS #undef MARK #undef BYTE_YPITCH_ADJ #undef BYTE_UVPITCH_ADJ #endif // } H263P