//===== Copyright 1996-2005, Valve Corporation, All rights reserved. ======// // // Purpose: // // $NoKeywords: $ // //===========================================================================// #ifndef PIXELWRITER_H #define PIXELWRITER_H #ifdef _WIN32 #pragma once #endif #if defined( _WIN32 ) || defined( _PS3 ) #define FORCEINLINE_PIXEL FORCEINLINE #elif POSIX #define FORCEINLINE_PIXEL inline #else #error "implement me" #endif // This flag allows us to write to formats we we don't support direct pixel access // (like DXT1) without spewing errors. The only actions that are available for // these formats are direct access to the bitstream. #define ALLOW_UNSUPPORTED_FORMATS 1 #include "bitmap/imageformat.h" #include "tier0/dbg.h" #include "mathlib/compressed_vector.h" #include "mathlib/ssemath.h" #include "mathlib/vector4d.h" #include "cache_hints.h" //----------------------------------------------------------------------------- // Color writing class //----------------------------------------------------------------------------- class CPixelWriter { public: FORCEINLINE void SetPixelMemory( ImageFormat format, void* pMemory, int stride ); FORCEINLINE void *GetPixelMemory() { return m_pBase; } // this is no longer used: #if 0 // defined( _X360 ) // set after SetPixelMemory() FORCEINLINE void ActivateByteSwapping( bool bSwap ); #endif FORCEINLINE void Seek( int x, int y ); FORCEINLINE void* SkipBytes( int n ) RESTRICT; FORCEINLINE void SkipPixels( int n ); FORCEINLINE void WritePixel( int r, int g, int b, int a = 255 ); FORCEINLINE void WritePixelNoAdvance( int r, int g, int b, int a = 255 ); FORCEINLINE void WritePixelSigned( int r, int g, int b, int a = 255 ); FORCEINLINE void WritePixelNoAdvanceSigned( int r, int g, int b, int a = 255 ); FORCEINLINE void ReadPixelNoAdvance( int &r, int &g, int &b, int &a ); // Floating point formats FORCEINLINE void WritePixelNoAdvanceF( float r, float g, float b, float a = 1.0f ); FORCEINLINE void WritePixelF( float r, float g, float b, float a = 1.0f ); FORCEINLINE void WriteManyPixelF( const float * RESTRICT pSrc, const int num ); // write a contiguous stream of 4-floats. // SIMD formats FORCEINLINE void WritePixel( FLTX4 rgba ) RESTRICT; FORCEINLINE void WritePixelNoAdvance( FLTX4 rgba ) RESTRICT; #if defined ( _X360 ) || defined ( _PS3 ) // here are some explicit formats so we can avoid the switch: FORCEINLINE void WritePixelNoAdvance_RGBA8888( FLTX4 rgba ); FORCEINLINE void WritePixelNoAdvance_BGRA8888( FLTX4 rgba ); // as above, but with m_pBits passed in to avoid a LHS FORCEINLINE void WritePixelNoAdvance_BGRA8888( FLTX4 rgba, void *pBits ) RESTRICT; // for writing entire SIMD registers at once when they have // already been packed, and when m_pBits is vector-aligned // (which is a requirement for write-combined memory) // offset is added to m_pBits (saving you from the obligatory // LHS of a SkipBytes) FORCEINLINE void WriteFourPixelsExplicitLocation_BGRA8888( FLTX4 rgba, int offset ); FORCEINLINE void WritePixelNoAdvance_RGBA16161616( FLTX4 rgba ); #endif FORCEINLINE void WritePixelNoAdvance16F( float r, float g, float b, float a ); FORCEINLINE unsigned char GetPixelSize() { return m_Size; } FORCEINLINE unsigned short GetBytesPerRow() { return m_BytesPerRow; } FORCEINLINE bool IsUsingFloatFormat() const; FORCEINLINE bool IsUsing16BitFloatFormat() const; // We allow "unsupported" formats only if you are writing directly into the bitstream FORCEINLINE bool IsUsingSupportedFormat() const; FORCEINLINE unsigned char *GetCurrentPixel() { return m_pBits; } private: // helper functions for some explicit combinations of flags and sizes -- lets us // do some conversions on the GPRs using bitshifts rather than a round trip to the // FPU and a LHS. FORCEINLINE void WriteManyPixelTo16BitF( const float * RESTRICT pSrc, int num ) RESTRICT; // write a contiguous stream of 4-floats. // FORCEINLINE void WriteManyPixelTo32BitF( const float * RESTRICT pSrc, const int num ); // write a contiguous stream of 4-floats. FORCEINLINE void AssertFormatIsSupported( ImageFormat format ) const; enum { PIXELWRITER_USING_FLOAT_FORMAT = 0x01, PIXELWRITER_USING_16BIT_FLOAT_FORMAT = 0x02, PIXELWRITER_SWAPBYTES = 0x04, PIXELWRITER_USING_UNSUPPORTED_FORMAT = 0x08, }; unsigned char* m_pBase; unsigned char* m_pBits; unsigned short m_BytesPerRow; unsigned char m_Size; unsigned char m_nFlags; signed short m_RShift; signed short m_GShift; signed short m_BShift; signed short m_AShift; unsigned int m_RMask; unsigned int m_GMask; unsigned int m_BMask; unsigned int m_AMask; #if defined ( _X360 ) || defined ( _PS3 ) ImageFormat m_Format; public: inline const ImageFormat &GetFormat() { return m_Format; } private: #endif }; FORCEINLINE_PIXEL bool CPixelWriter::IsUsingFloatFormat() const { return (m_nFlags & PIXELWRITER_USING_FLOAT_FORMAT) != 0; } FORCEINLINE_PIXEL bool CPixelWriter::IsUsing16BitFloatFormat() const { return (m_nFlags & PIXELWRITER_USING_16BIT_FLOAT_FORMAT) != 0; } FORCEINLINE_PIXEL bool CPixelWriter::IsUsingSupportedFormat() const { return (m_nFlags & PIXELWRITER_USING_UNSUPPORTED_FORMAT) == 0; } FORCEINLINE_PIXEL void CPixelWriter::SetPixelMemory( ImageFormat format, void* pMemory, int stride ) { m_pBits = (unsigned char*)pMemory; m_pBase = m_pBits; m_BytesPerRow = (unsigned short)stride; m_nFlags = 0; #if defined ( _X360 ) || defined ( _PS3 ) m_Format = format; #endif switch ( format ) { case IMAGE_FORMAT_R32F: // NOTE! : the low order bits are first in this naming convention. m_Size = 4; m_RShift = 0; m_GShift = 0; m_BShift = 0; m_AShift = 0; m_RMask = 0xFFFFFFFF; m_GMask = 0x0; m_BMask = 0x0; m_AMask = 0x0; m_nFlags |= PIXELWRITER_USING_FLOAT_FORMAT; break; case IMAGE_FORMAT_RGBA32323232F: m_Size = 16; m_RShift = 0; m_GShift = 32; m_BShift = 64; m_AShift = 96; m_RMask = 0xFFFFFFFF; m_GMask = 0xFFFFFFFF; m_BMask = 0xFFFFFFFF; m_AMask = 0xFFFFFFFF; m_nFlags |= PIXELWRITER_USING_FLOAT_FORMAT; break; case IMAGE_FORMAT_RGBA16161616F: m_Size = 8; m_RShift = 0; m_GShift = 16; m_BShift = 32; m_AShift = 48; m_RMask = 0xFFFF; m_GMask = 0xFFFF; m_BMask = 0xFFFF; m_AMask = 0xFFFF; m_nFlags |= PIXELWRITER_USING_FLOAT_FORMAT | PIXELWRITER_USING_16BIT_FLOAT_FORMAT; break; case IMAGE_FORMAT_RGBA8888: #if defined( _X360 ) case IMAGE_FORMAT_LINEAR_RGBA8888: #endif m_Size = 4; m_RShift = 0; m_GShift = 8; m_BShift = 16; m_AShift = 24; m_RMask = 0xFF; m_GMask = 0xFF; m_BMask = 0xFF; m_AMask = 0xFF; break; case IMAGE_FORMAT_BGRA1010102: // NOTE! : the low order bits are first in this naming convention. m_Size = 4; m_RShift = 20; m_GShift = 10; m_BShift = 0; m_AShift = 30; m_RMask = 0x3FF; m_GMask = 0x3FF; m_BMask = 0x3FF; m_AMask = 0x03; break; case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention. #if defined( _X360 ) case IMAGE_FORMAT_LINEAR_BGRA8888: #endif m_Size = 4; m_RShift = 16; m_GShift = 8; m_BShift = 0; m_AShift = 24; m_RMask = 0xFF; m_GMask = 0xFF; m_BMask = 0xFF; m_AMask = 0xFF; break; case IMAGE_FORMAT_BGRX8888: #if defined( _X360 ) case IMAGE_FORMAT_LINEAR_BGRX8888: #endif m_Size = 4; m_RShift = 16; m_GShift = 8; m_BShift = 0; m_AShift = 24; m_RMask = 0xFF; m_GMask = 0xFF; m_BMask = 0xFF; m_AMask = 0x00; break; case IMAGE_FORMAT_BGRA4444: m_Size = 2; m_RShift = 4; m_GShift = 0; m_BShift = -4; m_AShift = 8; m_RMask = 0xF0; m_GMask = 0xF0; m_BMask = 0xF0; m_AMask = 0xF0; break; case IMAGE_FORMAT_BGR888: m_Size = 3; m_RShift = 16; m_GShift = 8; m_BShift = 0; m_AShift = 0; m_RMask = 0xFF; m_GMask = 0xFF; m_BMask = 0xFF; m_AMask = 0x00; break; case IMAGE_FORMAT_BGR565: m_Size = 2; m_RShift = 8; m_GShift = 3; m_BShift = -3; m_AShift = 0; m_RMask = 0xF8; m_GMask = 0xFC; m_BMask = 0xF8; m_AMask = 0x00; break; case IMAGE_FORMAT_BGRA5551: case IMAGE_FORMAT_BGRX5551: m_Size = 2; m_RShift = 7; m_GShift = 2; m_BShift = -3; m_AShift = 8; m_RMask = 0xF8; m_GMask = 0xF8; m_BMask = 0xF8; m_AMask = 0x80; break; // GR - alpha format for HDR support case IMAGE_FORMAT_A8: #if defined( _X360 ) case IMAGE_FORMAT_LINEAR_A8: #endif m_Size = 1; m_RShift = 0; m_GShift = 0; m_BShift = 0; m_AShift = 0; m_RMask = 0x00; m_GMask = 0x00; m_BMask = 0x00; m_AMask = 0xFF; break; case IMAGE_FORMAT_UVWQ8888: m_Size = 4; m_RShift = 0; m_GShift = 8; m_BShift = 16; m_AShift = 24; m_RMask = 0xFF; m_GMask = 0xFF; m_BMask = 0xFF; m_AMask = 0xFF; break; case IMAGE_FORMAT_RGBA16161616: #if defined( _X360 ) case IMAGE_FORMAT_LINEAR_RGBA16161616: #endif m_Size = 8; if ( !IsX360() ) { m_RShift = 0; m_GShift = 16; m_BShift = 32; m_AShift = 48; } else { m_RShift = 48; m_GShift = 32; m_BShift = 16; m_AShift = 0; } m_RMask = 0xFFFF; m_GMask = 0xFFFF; m_BMask = 0xFFFF; m_AMask = 0xFFFF; break; case IMAGE_FORMAT_I8: #if defined( _X360 ) case IMAGE_FORMAT_LINEAR_I8: #endif // whatever goes into R is considered the intensity. m_Size = 1; m_RShift = 0; m_GShift = 0; m_BShift = 0; m_AShift = 0; m_RMask = 0xFF; m_GMask = 0x00; m_BMask = 0x00; m_AMask = 0x00; break; // FIXME: Add more color formats as need arises default: { #if ALLOW_UNSUPPORTED_FORMATS m_nFlags |= PIXELWRITER_USING_UNSUPPORTED_FORMAT; #else // ALLOW_UNSUPPORTED_FORMATS static bool format_error_printed[NUM_IMAGE_FORMATS]; if ( !format_error_printed[format] ) { Assert( 0 ); Msg( "CPixelWriter::SetPixelMemory: Unsupported image format %i\n", format ); format_error_printed[format] = true; } #endif // ALLOW_UNSUPPORTED_FORMATS m_Size = 0; // set to zero so that we don't stomp memory for formats that we don't understand. m_RShift = 0; m_GShift = 0; m_BShift = 0; m_AShift = 0; m_RMask = 0xFF; m_GMask = 0x00; m_BMask = 0x00; m_AMask = 0x00; } break; } } #if 0 // defined( _X360 ) FORCEINLINE void CPixelWriter::ActivateByteSwapping( bool bSwap ) { // X360TBD: Who is trying to use this? // Purposely not hooked up because PixelWriter has been ported to read/write native pixels only Assert( 0 ); if ( bSwap && !(m_nFlags & PIXELWRITER_SWAPBYTES ) ) { m_nFlags |= PIXELWRITER_SWAPBYTES; // only tested with 4 byte formats Assert( m_Size == 4 ); } else if ( !bSwap && (m_nFlags & PIXELWRITER_SWAPBYTES ) ) { m_nFlags &= ~PIXELWRITER_SWAPBYTES; } else { // same state return; } // swap the shifts m_RShift = 24-m_RShift; m_GShift = 24-m_GShift; m_BShift = 24-m_BShift; m_AShift = 24-m_AShift; } #endif //----------------------------------------------------------------------------- // Sets where we're writing to //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::Seek( int x, int y ) { Assert( IsUsingSupportedFormat() ); m_pBits = m_pBase + y * m_BytesPerRow + x * m_Size; } //----------------------------------------------------------------------------- // Skips n bytes: //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void* CPixelWriter::SkipBytes( int n ) RESTRICT { m_pBits += n; return m_pBits; } //----------------------------------------------------------------------------- // Skips n pixels: //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::SkipPixels( int n ) { Assert( IsUsingSupportedFormat() ); SkipBytes( n * m_Size ); } //----------------------------------------------------------------------------- // Writes a pixel without advancing the index PC ONLY //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvanceF( float r, float g, float b, float a ) { Assert( IsUsingSupportedFormat() ); Assert( IsUsingFloatFormat() ); // X360TBD: Not ported Assert( IsPC() || IsPS3() ); if (PIXELWRITER_USING_16BIT_FLOAT_FORMAT & m_nFlags) { WritePixelNoAdvance16F( r,g,b,a ); } else { // fp32 int pBuf[4] = { 0, 0, 0, 0 }; pBuf[ m_RShift >> 5 ] |= (FloatBits(r) & m_RMask) << ( m_RShift & 0x1F ); pBuf[ m_GShift >> 5 ] |= (FloatBits(g) & m_GMask) << ( m_GShift & 0x1F ); pBuf[ m_BShift >> 5 ] |= (FloatBits(b) & m_BMask) << ( m_BShift & 0x1F ); pBuf[ m_AShift >> 5 ] |= (FloatBits(a) & m_AMask) << ( m_AShift & 0x1F ); memcpy( m_pBits, pBuf, m_Size ); } } FORCEINLINE void CPixelWriter::WritePixelNoAdvance16F( float r, float g, float b, float a ) { if ( IsPS3() ) { // we know what the values of shift and mask are going to be because // of the format, so we can elide them and write directly float16 *fp16 = reinterpret_cast(m_pBits); fp16[0].SetFloat( r ); fp16[1].SetFloat( g ); fp16[2].SetFloat( b ); fp16[3].SetFloat( a ); } else { float16 fp16[4]; fp16[0].SetFloat( r ); fp16[1].SetFloat( g ); fp16[2].SetFloat( b ); fp16[3].SetFloat( a ); // fp16 unsigned short pBuf[4] = { 0, 0, 0, 0 }; pBuf[ m_RShift >> 4 ] |= (fp16[0].GetBits() & m_RMask) << ( m_RShift & 0xF ); pBuf[ m_GShift >> 4 ] |= (fp16[1].GetBits() & m_GMask) << ( m_GShift & 0xF ); pBuf[ m_BShift >> 4 ] |= (fp16[2].GetBits() & m_BMask) << ( m_BShift & 0xF ); pBuf[ m_AShift >> 4 ] |= (fp16[3].GetBits() & m_AMask) << ( m_AShift & 0xF ); memcpy( m_pBits, pBuf, m_Size ); } } //----------------------------------------------------------------------------- // Writes a lot of pixels, efficiently //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WriteManyPixelTo16BitF( const float * RESTRICT pSrc, int num ) RESTRICT { Assert( IsUsingSupportedFormat() ); const static int SIZE = 4*sizeof(unsigned short); // known precondition const static int MASK = 0xFFFF; // another known precondition: m_RShift == 0 && m_GShift == 16 && m_BShift == 32 && m_AShift == 48 unsigned char *pBits = m_pBits; // compiler actually fails to hoist this onto a register properly otherwise. for ( int i = 0; num; --num, ++i ) { /* // this actually slowed things down, for whatever perverse reason. // every cache line boundary, prefetch the next in bloc, so long as we've at least 128 bytes left to go. // the destination is in noncacheable memory. if ( (num > 32) && ( (reinterpret_cast(pSrc) & 127) == 0 ) ) { PREFETCH_128( pSrc, 128 ); } */ float16 * RESTRICT pOut = reinterpret_cast< float16 * >(pBits); pOut[0].SetFloat( pSrc[0] ); pOut[1].SetFloat( pSrc[1] ); pOut[2].SetFloat( pSrc[2] ); pOut[3].SetFloat( pSrc[3] ); /* pAck[i+0].SetFloat( pSrc[0] ); pAck[i+1].SetFloat( pSrc[1] ); pAck[i+2].SetFloat( pSrc[2] ); pAck[i+3].SetFloat( pSrc[3] ); */ pSrc += 4; pBits += SIZE; } m_pBits = pBits; } //----------------------------------------------------------------------------- // Writes a pixel, advances the write index //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WritePixelF( float r, float g, float b, float a ) { WritePixelNoAdvanceF(r, g, b, a); m_pBits += m_Size; } //----------------------------------------------------------------------------- // Writes an array of pixels, advancing the write index. // the input data is required to be a contiguous stream of Vector4Ds // (ie, each pixel consists of four consecutive floats, and the data is // consecutive in memory) //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WriteManyPixelF( const float * RESTRICT pSrc, const int num ) { Assert( IsUsingSupportedFormat() ); Assert( IsUsingFloatFormat() ); // X360TBD: Not ported Assert( IsPC() || IsPS3() ); if ( m_Size == 4*sizeof(unsigned short) && (PIXELWRITER_USING_16BIT_FLOAT_FORMAT & m_nFlags) ) { Assert( m_RShift == 0 && m_GShift == 16 && m_BShift == 32 && m_AShift == 48 ); WriteManyPixelTo16BitF( pSrc, num ); } /* else if ( m_Size == 4*sizeof(int) && !(PIXELWRITER_USING_16BIT_FLOAT_FORMAT & m_nFlags) ) { WriteManyPixelTo32BitF( pSrc, num ); } */ else for ( const float * const sentinel = pSrc + ( num * 4 ); pSrc < sentinel; pSrc += 4 ) // naive general case { WritePixelF( pSrc[0], pSrc[1], pSrc[2], pSrc[3] ); } } //----------------------------------------------------------------------------- // Writes a pixel, advances the write index //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WritePixel( int r, int g, int b, int a ) { WritePixelNoAdvance(r,g,b,a); m_pBits += m_Size; } //----------------------------------------------------------------------------- // Writes a pixel, advances the write index //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WritePixelSigned( int r, int g, int b, int a ) { WritePixelNoAdvanceSigned(r,g,b,a); m_pBits += m_Size; } //----------------------------------------------------------------------------- // Writes a pixel without advancing the index //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvance( int r, int g, int b, int a ) { Assert( IsUsingSupportedFormat() ); Assert( !IsUsingFloatFormat() ); if ( m_Size <= 0 ) { return; } if ( m_Size < 5 ) { unsigned int val = (r & m_RMask) << m_RShift; val |= (g & m_GMask) << m_GShift; val |= (m_BShift > 0) ? ((b & m_BMask) << m_BShift) : ((b & m_BMask) >> -m_BShift); val |= (a & m_AMask) << m_AShift; switch( m_Size ) { default: Assert( 0 ); return; case 1: { m_pBits[0] = (unsigned char)((val & 0xff)); return; } case 2: { ((unsigned short *)m_pBits)[0] = (unsigned short)((val & 0xffff)); return; } case 3: { if ( IsPC() || IsPS3() || !IsX360() ) { ((unsigned short *)m_pBits)[0] = (unsigned short)((val & 0xffff)); m_pBits[2] = (unsigned char)((val >> 16) & 0xff); } else { m_pBits[0] = (unsigned char)(((val >> 16) & 0xff)); m_pBits[1] = (unsigned char)(((val >> 8 ) & 0xff)); m_pBits[2] = (unsigned char)(val & 0xff); } return; } case 4: { ((unsigned int *)m_pBits)[0] = val; return; } } } else // RGBA32323232 or RGBA16161616 -- PC only. { AssertMsgOnce(!IsX360(), "Unsupported lightmap format used in WritePixelNoAdvance(). This is a severe performance fault.\n"); // AssertMsg(!IsX360(), "Unsupported lightmap format used in WritePixelNoAdvance(). This is a severe performance fault.\n"); int64 val = ( ( int64 )(r & m_RMask) ) << m_RShift; val |= ( ( int64 )(g & m_GMask) ) << m_GShift; val |= (m_BShift > 0) ? ((( int64 )( b & m_BMask)) << m_BShift) : (((int64)( b & m_BMask)) >> -m_BShift); val |= ( ( int64 )(a & m_AMask) ) << m_AShift; switch( m_Size ) { case 6: { if ( IsPC() || IsPS3() || !IsX360() ) { ((unsigned int *)m_pBits)[0] = val & 0xffffffff; ((unsigned short *)m_pBits)[2] = (unsigned short)( ( val >> 32 ) & 0xffff ); } else { ((unsigned int *)m_pBits)[0] = (val >> 16) & 0xffffffff; ((unsigned short *)m_pBits)[2] = (unsigned short)( val & 0xffff ); } return; } case 8: { if ( IsPC() || IsPS3() || !IsX360() ) { ((unsigned int *)m_pBits)[0] = val & 0xffffffff; ((unsigned int *)m_pBits)[1] = ( val >> 32 ) & 0xffffffff; } else { ((unsigned int *)m_pBits)[0] = ( val >> 32 ) & 0xffffffff; ((unsigned int *)m_pBits)[1] = val & 0xffffffff; } return; } default: Assert( 0 ); return; } } } #ifdef _X360 // There isn't a PC port of these because of the many varied // pixel formats the PC deals with. If you write SSE versions // of all the various necessary packers, then this can be made // to work on PC. //----------------------------------------------------------------------------- // Writes a pixel, advances the write index //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WritePixel( FLTX4 rgba ) RESTRICT { WritePixelNoAdvance(rgba); m_pBits += m_Size; } //----------------------------------------------------------------------------- // Writes a pixel without advancing the index // rgba are four float values, each on the range 0..255 (though they may leak // fractionally over 255 due to numerical errors earlier) //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvance( FLTX4 rgba ) RESTRICT { Assert( IsUsingSupportedFormat() ); Assert( !IsUsingFloatFormat() ); switch (m_Size) { case 0: return; case 4: { AssertMsg((reinterpret_cast(m_pBits) & 0x03) == 0,"Unaligned m_pBits in WritePixelNoAdvance!"); switch ( m_Format ) { // note: format names are low-order-byte first. case IMAGE_FORMAT_RGBA8888: case IMAGE_FORMAT_LINEAR_RGBA8888: WritePixelNoAdvance_RGBA8888(rgba); break; case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention. case IMAGE_FORMAT_LINEAR_BGRA8888: WritePixelNoAdvance_BGRA8888(rgba); break; default: AssertMsg1(false, "Unknown four-byte pixel format %d in lightmap write.\n", m_Format); } break; } case 8: { switch ( m_Format ) { // note: format names are low-order-byte first. case IMAGE_FORMAT_RGBA16161616: case IMAGE_FORMAT_LINEAR_RGBA16161616: WritePixelNoAdvance_RGBA16161616(rgba); break; default: AssertMsg1(false, "Unknown eight-byte pixel format %d in lightmap write.\n", m_Format); } break; } default: AssertMsg1(false, "WritePixelNoAdvance on unsupported 360 %d-byte format\n", m_Size); break; } } // here are some explicit formats so we can avoid the switch: FORCEINLINE void CPixelWriter::WritePixelNoAdvance_RGBA8888( FLTX4 rgba ) { // it's easier to do tiered convert-saturates here // than the d3d color convertor op // first permute const static fltx4 permReverse = XMVectorPermuteControl(3,2,1,0); fltx4 N = XMVectorPermute(rgba, rgba, permReverse); N = __vctuxs(N, 0); // convert to unsigned fixed point 0 w/ saturate N = __vpkuwus(N, N); // convert to halfword saturate N = __vpkuhus(N, N); // convert to byte saturate N = __vspltw(N, 0); // splat w-word to all four __stvewx(N, m_pBits, 0); // store whatever word happens to be aligned with m_pBits to that word } FORCEINLINE void CPixelWriter::WritePixelNoAdvance_BGRA8888( FLTX4 rgba ) { WritePixelNoAdvance_BGRA8888( rgba, m_pBits ); } FORCEINLINE void CPixelWriter::WritePixelNoAdvance_RGBA16161616( FLTX4 rgba ) { // input is in 0..16 range. //Multiply by 4096 to get into 0..65536 range static const fltx4 vMult = { 4096.0f, 4096.0f, 4096.0f, 65536.0f }; rgba = XMVectorMultiply( rgba, vMult ); XMStoreUShort4( (XMUSHORT4*)m_pBits, rgba ); } FORCEINLINE void CPixelWriter::WritePixelNoAdvance_BGRA8888( FLTX4 rgba, void * RESTRICT pBits ) RESTRICT { // this happens to be in an order such that we can use the handy builtin packing op // clamp to 0..255 (coz it might have leaked over) static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f}; fltx4 N = MinSIMD(vTwoFiftyFive, rgba); // the magic number such that when mul-accummulated against rbga, // gets us a representation 3.0 + (r)*2^-22 -- puts the bits at // the bottom of the float static CONST XMVECTOR PackScale = { (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22))}; // 255.0f / (FLOAT)(1 << 22) static const XMVECTOR Three = {3.0f, 3.0f, 3.0f, 3.0f}; N = __vmaddfp(N, PackScale, Three); N = __vpkd3d(N, N, VPACK_D3DCOLOR, VPACK_32, 3); // pack to X word N = __vspltw(N, 0); // splat X // this is a nasty thing to work around the April XDK bug in __stvewx { void * RESTRICT copyOfPBits = pBits; __stvewx(N, copyOfPBits, 0); } } // for writing entire SIMD registers at once FORCEINLINE void CPixelWriter::WriteFourPixelsExplicitLocation_BGRA8888 ( FLTX4 rgba, int offset ) { Assert( (reinterpret_cast(m_pBits) & 15) == 0 ); // assert alignment XMStoreVector4A( m_pBits + offset , rgba ); } #elif defined ( _PS3 ) // There isn't a PC port of these because of the many varied // pixel formats the PC deals with. If you write SSE versions // of all the various necessary packers, then this can be made // to work on PC. //----------------------------------------------------------------------------- // Writes a pixel, advances the write index //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WritePixel( FLTX4 rgba ) RESTRICT { WritePixelNoAdvance(rgba); m_pBits += m_Size; } //----------------------------------------------------------------------------- // Writes a pixel without advancing the index // rgba are four float values, each on the range 0..255 (though they may leak // fractionally over 255 due to numerical errors earlier) //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvance( FLTX4 rgba ) RESTRICT { Assert( IsUsingSupportedFormat() ); Assert( !IsUsingFloatFormat() ); switch (m_Size) { case 0: return; case 4: { AssertMsg((reinterpret_cast(m_pBits) & 0x03) == 0,"Unaligned m_pBits in WritePixelNoAdvance!"); switch ( m_Format ) { // note: format names are low-order-byte first. case IMAGE_FORMAT_RGBA8888: case IMAGE_FORMAT_LINEAR_RGBA8888: WritePixelNoAdvance_RGBA8888(rgba); break; case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention. //EAPS3 case IMAGE_FORMAT_LINEAR_BGRA8888: WritePixelNoAdvance_BGRA8888(rgba); break; default: AssertMsg1(false, "Unknown four-byte pixel format %d in lightmap write.\n", m_Format); } break; } default: AssertMsg1(false, "WritePixelNoAdvance on unsupported 360 %d-byte format\n", m_Size); break; } } // here are some explicit formats so we can avoid the switch: FORCEINLINE void CPixelWriter::WritePixelNoAdvance_RGBA8888( FLTX4 rgba ) { // it's easier to do tiered convert-saturates here // than the d3d color convertor op // first permute fltx4 N = vec_perm(rgba, rgba, _VEC_SWIZZLE_WZYX); vector unsigned int N_ui = vec_ctu(N, 0); // convert to unsigned fixed point 0 w/ saturate vector unsigned short N_us = vec_packsu(N_ui, N_ui); // convert to halfword saturate vector unsigned char N_uc = vec_packsu(N_us, N_us); // convert to byte saturate // don't need to do this, should already be unpacked to all elements in the same way // N = vec_splat((fltx4)N_uc, 0); // splat w-word to all four // vec_ste(N, 0, m_pBits); // store whatever word happens to be aligned with m_pBits to that word vec_ste((vec_uint4)N_uc, 0, (unsigned int *)m_pBits); // store whatever word happens to be aligned with m_pBits to that word } FORCEINLINE void CPixelWriter::WritePixelNoAdvance_BGRA8888( FLTX4 rgba ) { WritePixelNoAdvance_BGRA8888( rgba, m_pBits ); } FORCEINLINE void CPixelWriter::WritePixelNoAdvance_BGRA8888( FLTX4 rgba, void * RESTRICT pBits ) RESTRICT { fltx4 N; vector unsigned int N_ui = vec_ctu(rgba, 0); // convert to unsigned fixed point 0 w/ saturate vector unsigned short N_us = vec_packsu(N_ui, N_ui); // convert to halfword saturate vector unsigned char N_uc = vec_packsu(N_us, N_us); // convert to byte saturate // N = vec_splat((fltx4)N_uc, 0); // splat w-word to all four // vec_ste(N, 0, (float*)pBits); // store whatever word happens to be aligned with m_pBits to that word vec_ste((vec_uint4)N_uc, 0, (unsigned int*)pBits); // store whatever word happens to be aligned with m_pBits to that word } // for writing entire SIMD registers at once FORCEINLINE void CPixelWriter::WriteFourPixelsExplicitLocation_BGRA8888 ( FLTX4 rgba, int offset ) { Assert( (reinterpret_cast(m_pBits) & 15) == 0 && offset == 0 ); // assert alignment // XMStoreVector4A( m_pBits + offset , rgba ); vec_st( rgba, offset, (float*)m_pBits ); } #endif //----------------------------------------------------------------------------- // Writes a signed pixel without advancing the index //----------------------------------------------------------------------------- FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvanceSigned( int r, int g, int b, int a ) { Assert( IsUsingSupportedFormat() ); Assert( !IsUsingFloatFormat() ); if ( m_Size <= 0 ) { return; } if ( m_Size < 5 ) { int val = (r & m_RMask) << m_RShift; val |= (g & m_GMask) << m_GShift; val |= (m_BShift > 0) ? ((b & m_BMask) << m_BShift) : ((b & m_BMask) >> -m_BShift); val |= (a & m_AMask) << m_AShift; signed char *pSignedBits = (signed char *)m_pBits; if ( IsPC() || IsPS3() || !IsX360() ) { switch ( m_Size ) { case 4: pSignedBits[3] = (signed char)((val >> 24) & 0xff); // fall through intentionally. case 3: pSignedBits[2] = (signed char)((val >> 16) & 0xff); // fall through intentionally. case 2: pSignedBits[1] = (signed char)((val >> 8) & 0xff); // fall through intentionally. case 1: pSignedBits[0] = (signed char)((val & 0xff)); // fall through intentionally. return; } } else { switch ( m_Size ) { case 4: pSignedBits[0] = (signed char)((val >> 24) & 0xff); pSignedBits[1] = (signed char)((val >> 16) & 0xff); pSignedBits[2] = (signed char)((val >> 8) & 0xff); pSignedBits[3] = (signed char)(val & 0xff); break; case 3: pSignedBits[0] = (signed char)((val >> 16) & 0xff); pSignedBits[1] = (signed char)((val >> 8) & 0xff); pSignedBits[2] = (signed char)(val & 0xff); break; case 2: pSignedBits[0] = (signed char)((val >> 8) & 0xff); pSignedBits[1] = (signed char)(val & 0xff); break; case 1: pSignedBits[0] = (signed char)(val & 0xff); break; } } } else { int64 val = ( ( int64 )(r & m_RMask) ) << m_RShift; val |= ( ( int64 )(g & m_GMask) ) << m_GShift; val |= (m_BShift > 0) ? ((( int64 )( b & m_BMask)) << m_BShift) : (((int64)( b & m_BMask)) >> -m_BShift); val |= ( ( int64 )(a & m_AMask) ) << m_AShift; signed char *pSignedBits = ( signed char * )m_pBits; if ( IsPC() || IsPS3() || !IsX360() ) { switch( m_Size ) { case 8: pSignedBits[7] = (signed char)((val >> 56) & 0xff); pSignedBits[6] = (signed char)((val >> 48) & 0xff); // fall through intentionally. case 6: pSignedBits[5] = (signed char)((val >> 40) & 0xff); pSignedBits[4] = (signed char)((val >> 32) & 0xff); // fall through intentionally. case 4: pSignedBits[3] = (signed char)((val >> 24) & 0xff); // fall through intentionally. case 3: pSignedBits[2] = (signed char)((val >> 16) & 0xff); // fall through intentionally. case 2: pSignedBits[1] = (signed char)((val >> 8) & 0xff); // fall through intentionally. case 1: pSignedBits[0] = (signed char)((val & 0xff)); break; default: Assert( 0 ); return; } } else { switch( m_Size ) { case 8: pSignedBits[0] = (signed char)((val >> 56) & 0xff); pSignedBits[1] = (signed char)((val >> 48) & 0xff); pSignedBits[2] = (signed char)((val >> 40) & 0xff); pSignedBits[3] = (signed char)((val >> 32) & 0xff); pSignedBits[4] = (signed char)((val >> 24) & 0xff); pSignedBits[5] = (signed char)((val >> 16) & 0xff); pSignedBits[6] = (signed char)((val >> 8) & 0xff); pSignedBits[7] = (signed char)(val & 0xff); break; case 6: pSignedBits[0] = (signed char)((val >> 40) & 0xff); pSignedBits[1] = (signed char)((val >> 32) & 0xff); pSignedBits[2] = (signed char)((val >> 24) & 0xff); pSignedBits[3] = (signed char)((val >> 16) & 0xff); pSignedBits[4] = (signed char)((val >> 8) & 0xff); pSignedBits[5] = (signed char)(val & 0xff); break; case 4: pSignedBits[0] = (signed char)((val >> 24) & 0xff); pSignedBits[1] = (signed char)((val >> 16) & 0xff); pSignedBits[2] = (signed char)((val >> 8) & 0xff); pSignedBits[3] = (signed char)(val & 0xff); break; case 3: pSignedBits[0] = (signed char)((val >> 16) & 0xff); pSignedBits[1] = (signed char)((val >> 8) & 0xff); pSignedBits[2] = (signed char)(val & 0xff); break; case 2: pSignedBits[0] = (signed char)((val >> 8) & 0xff); pSignedBits[1] = (signed char)(val & 0xff); break; case 1: pSignedBits[0] = (signed char)(val & 0xff); break; default: Assert( 0 ); return; } } } } FORCEINLINE_PIXEL void CPixelWriter::ReadPixelNoAdvance( int &r, int &g, int &b, int &a ) { Assert( IsUsingSupportedFormat() ); Assert( !IsUsingFloatFormat() ); int val = m_pBits[0]; if ( m_Size > 1 ) { if ( IsPC() || IsPS3() || !IsX360() ) { val |= (int)m_pBits[1] << 8; if ( m_Size > 2 ) { val |= (int)m_pBits[2] << 16; if ( m_Size > 3 ) { val |= (int)m_pBits[3] << 24; } } } else { val <<= 8; val |= (int)m_pBits[1]; if ( m_Size > 2 ) { val <<= 8; val |= (int)m_pBits[2]; if ( m_Size > 3 ) { val <<= 8; val |= (int)m_pBits[3]; } } } } r = (val>>m_RShift) & m_RMask; g = (val>>m_GShift) & m_GMask; b = (val>>m_BShift) & m_BMask; a = (val>>m_AShift) & m_AMask; } #endif // PIXELWRITER_H;