Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1180 lines
33 KiB

  1. //===== Copyright 1996-2005, Valve Corporation, All rights reserved. ======//
  2. //
  3. // Purpose:
  4. //
  5. // $NoKeywords: $
  6. //
  7. //===========================================================================//
  8. #ifndef PIXELWRITER_H
  9. #define PIXELWRITER_H
  10. #ifdef _WIN32
  11. #pragma once
  12. #endif
  13. #if defined( _WIN32 ) || defined( _PS3 )
  14. #define FORCEINLINE_PIXEL FORCEINLINE
  15. #elif POSIX
  16. #define FORCEINLINE_PIXEL inline
  17. #else
  18. #error "implement me"
  19. #endif
  20. // This flag allows us to write to formats we we don't support direct pixel access
  21. // (like DXT1) without spewing errors. The only actions that are available for
  22. // these formats are direct access to the bitstream.
  23. #define ALLOW_UNSUPPORTED_FORMATS 1
  24. #include "bitmap/imageformat.h"
  25. #include "tier0/dbg.h"
  26. #include "mathlib/compressed_vector.h"
  27. #include "mathlib/ssemath.h"
  28. #include "mathlib/vector4d.h"
  29. #include "cache_hints.h"
  30. //-----------------------------------------------------------------------------
  31. // Color writing class
  32. //-----------------------------------------------------------------------------
  33. class CPixelWriter
  34. {
  35. public:
  36. FORCEINLINE void SetPixelMemory( ImageFormat format, void* pMemory, int stride );
  37. FORCEINLINE void *GetPixelMemory() { return m_pBase; }
  38. // this is no longer used:
  39. #if 0 // defined( _X360 )
  40. // set after SetPixelMemory()
  41. FORCEINLINE void ActivateByteSwapping( bool bSwap );
  42. #endif
  43. FORCEINLINE void Seek( int x, int y );
  44. FORCEINLINE void* SkipBytes( int n ) RESTRICT;
  45. FORCEINLINE void SkipPixels( int n );
  46. FORCEINLINE void WritePixel( int r, int g, int b, int a = 255 );
  47. FORCEINLINE void WritePixelNoAdvance( int r, int g, int b, int a = 255 );
  48. FORCEINLINE void WritePixelSigned( int r, int g, int b, int a = 255 );
  49. FORCEINLINE void WritePixelNoAdvanceSigned( int r, int g, int b, int a = 255 );
  50. FORCEINLINE void ReadPixelNoAdvance( int &r, int &g, int &b, int &a );
  51. // Floating point formats
  52. FORCEINLINE void WritePixelNoAdvanceF( float r, float g, float b, float a = 1.0f );
  53. FORCEINLINE void WritePixelF( float r, float g, float b, float a = 1.0f );
  54. FORCEINLINE void WriteManyPixelF( const float * RESTRICT pSrc, const int num ); // write a contiguous stream of 4-floats.
  55. // SIMD formats
  56. FORCEINLINE void WritePixel( FLTX4 rgba ) RESTRICT;
  57. FORCEINLINE void WritePixelNoAdvance( FLTX4 rgba ) RESTRICT;
  58. #if defined ( _X360 ) || defined ( _PS3 )
  59. // here are some explicit formats so we can avoid the switch:
  60. FORCEINLINE void WritePixelNoAdvance_RGBA8888( FLTX4 rgba );
  61. FORCEINLINE void WritePixelNoAdvance_BGRA8888( FLTX4 rgba );
  62. // as above, but with m_pBits passed in to avoid a LHS
  63. FORCEINLINE void WritePixelNoAdvance_BGRA8888( FLTX4 rgba, void *pBits ) RESTRICT;
  64. // for writing entire SIMD registers at once when they have
  65. // already been packed, and when m_pBits is vector-aligned
  66. // (which is a requirement for write-combined memory)
  67. // offset is added to m_pBits (saving you from the obligatory
  68. // LHS of a SkipBytes)
  69. FORCEINLINE void WriteFourPixelsExplicitLocation_BGRA8888( FLTX4 rgba, int offset );
  70. FORCEINLINE void WritePixelNoAdvance_RGBA16161616( FLTX4 rgba );
  71. #endif
  72. FORCEINLINE void WritePixelNoAdvance16F( float r, float g, float b, float a );
  73. FORCEINLINE unsigned char GetPixelSize() { return m_Size; }
  74. FORCEINLINE unsigned short GetBytesPerRow() { return m_BytesPerRow; }
  75. FORCEINLINE bool IsUsingFloatFormat() const;
  76. FORCEINLINE bool IsUsing16BitFloatFormat() const;
  77. // We allow "unsupported" formats only if you are writing directly into the bitstream
  78. FORCEINLINE bool IsUsingSupportedFormat() const;
  79. FORCEINLINE unsigned char *GetCurrentPixel() { return m_pBits; }
  80. private:
  81. // helper functions for some explicit combinations of flags and sizes -- lets us
  82. // do some conversions on the GPRs using bitshifts rather than a round trip to the
  83. // FPU and a LHS.
  84. FORCEINLINE void WriteManyPixelTo16BitF( const float * RESTRICT pSrc, int num ) RESTRICT; // write a contiguous stream of 4-floats.
  85. // FORCEINLINE void WriteManyPixelTo32BitF( const float * RESTRICT pSrc, const int num ); // write a contiguous stream of 4-floats.
  86. FORCEINLINE void AssertFormatIsSupported( ImageFormat format ) const;
  87. enum
  88. {
  89. PIXELWRITER_USING_FLOAT_FORMAT = 0x01,
  90. PIXELWRITER_USING_16BIT_FLOAT_FORMAT = 0x02,
  91. PIXELWRITER_SWAPBYTES = 0x04,
  92. PIXELWRITER_USING_UNSUPPORTED_FORMAT = 0x08,
  93. };
  94. unsigned char* m_pBase;
  95. unsigned char* m_pBits;
  96. unsigned short m_BytesPerRow;
  97. unsigned char m_Size;
  98. unsigned char m_nFlags;
  99. signed short m_RShift;
  100. signed short m_GShift;
  101. signed short m_BShift;
  102. signed short m_AShift;
  103. unsigned int m_RMask;
  104. unsigned int m_GMask;
  105. unsigned int m_BMask;
  106. unsigned int m_AMask;
  107. #if defined ( _X360 ) || defined ( _PS3 )
  108. ImageFormat m_Format;
  109. public:
  110. inline const ImageFormat &GetFormat() { return m_Format; }
  111. private:
  112. #endif
  113. };
  114. FORCEINLINE_PIXEL bool CPixelWriter::IsUsingFloatFormat() const
  115. {
  116. return (m_nFlags & PIXELWRITER_USING_FLOAT_FORMAT) != 0;
  117. }
  118. FORCEINLINE_PIXEL bool CPixelWriter::IsUsing16BitFloatFormat() const
  119. {
  120. return (m_nFlags & PIXELWRITER_USING_16BIT_FLOAT_FORMAT) != 0;
  121. }
  122. FORCEINLINE_PIXEL bool CPixelWriter::IsUsingSupportedFormat() const
  123. {
  124. return (m_nFlags & PIXELWRITER_USING_UNSUPPORTED_FORMAT) == 0;
  125. }
  126. FORCEINLINE_PIXEL void CPixelWriter::SetPixelMemory( ImageFormat format, void* pMemory, int stride )
  127. {
  128. m_pBits = (unsigned char*)pMemory;
  129. m_pBase = m_pBits;
  130. m_BytesPerRow = (unsigned short)stride;
  131. m_nFlags = 0;
  132. #if defined ( _X360 ) || defined ( _PS3 )
  133. m_Format = format;
  134. #endif
  135. switch ( format )
  136. {
  137. case IMAGE_FORMAT_R32F: // NOTE! : the low order bits are first in this naming convention.
  138. m_Size = 4;
  139. m_RShift = 0;
  140. m_GShift = 0;
  141. m_BShift = 0;
  142. m_AShift = 0;
  143. m_RMask = 0xFFFFFFFF;
  144. m_GMask = 0x0;
  145. m_BMask = 0x0;
  146. m_AMask = 0x0;
  147. m_nFlags |= PIXELWRITER_USING_FLOAT_FORMAT;
  148. break;
  149. case IMAGE_FORMAT_RGBA32323232F:
  150. m_Size = 16;
  151. m_RShift = 0;
  152. m_GShift = 32;
  153. m_BShift = 64;
  154. m_AShift = 96;
  155. m_RMask = 0xFFFFFFFF;
  156. m_GMask = 0xFFFFFFFF;
  157. m_BMask = 0xFFFFFFFF;
  158. m_AMask = 0xFFFFFFFF;
  159. m_nFlags |= PIXELWRITER_USING_FLOAT_FORMAT;
  160. break;
  161. case IMAGE_FORMAT_RGBA16161616F:
  162. m_Size = 8;
  163. m_RShift = 0;
  164. m_GShift = 16;
  165. m_BShift = 32;
  166. m_AShift = 48;
  167. m_RMask = 0xFFFF;
  168. m_GMask = 0xFFFF;
  169. m_BMask = 0xFFFF;
  170. m_AMask = 0xFFFF;
  171. m_nFlags |= PIXELWRITER_USING_FLOAT_FORMAT | PIXELWRITER_USING_16BIT_FLOAT_FORMAT;
  172. break;
  173. case IMAGE_FORMAT_RGBA8888:
  174. #if defined( _X360 )
  175. case IMAGE_FORMAT_LINEAR_RGBA8888:
  176. #endif
  177. m_Size = 4;
  178. m_RShift = 0;
  179. m_GShift = 8;
  180. m_BShift = 16;
  181. m_AShift = 24;
  182. m_RMask = 0xFF;
  183. m_GMask = 0xFF;
  184. m_BMask = 0xFF;
  185. m_AMask = 0xFF;
  186. break;
  187. case IMAGE_FORMAT_BGRA1010102: // NOTE! : the low order bits are first in this naming convention.
  188. m_Size = 4;
  189. m_RShift = 20;
  190. m_GShift = 10;
  191. m_BShift = 0;
  192. m_AShift = 30;
  193. m_RMask = 0x3FF;
  194. m_GMask = 0x3FF;
  195. m_BMask = 0x3FF;
  196. m_AMask = 0x03;
  197. break;
  198. case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention.
  199. #if defined( _X360 )
  200. case IMAGE_FORMAT_LINEAR_BGRA8888:
  201. #endif
  202. m_Size = 4;
  203. m_RShift = 16;
  204. m_GShift = 8;
  205. m_BShift = 0;
  206. m_AShift = 24;
  207. m_RMask = 0xFF;
  208. m_GMask = 0xFF;
  209. m_BMask = 0xFF;
  210. m_AMask = 0xFF;
  211. break;
  212. case IMAGE_FORMAT_BGRX8888:
  213. #if defined( _X360 )
  214. case IMAGE_FORMAT_LINEAR_BGRX8888:
  215. #endif
  216. m_Size = 4;
  217. m_RShift = 16;
  218. m_GShift = 8;
  219. m_BShift = 0;
  220. m_AShift = 24;
  221. m_RMask = 0xFF;
  222. m_GMask = 0xFF;
  223. m_BMask = 0xFF;
  224. m_AMask = 0x00;
  225. break;
  226. case IMAGE_FORMAT_BGRA4444:
  227. m_Size = 2;
  228. m_RShift = 4;
  229. m_GShift = 0;
  230. m_BShift = -4;
  231. m_AShift = 8;
  232. m_RMask = 0xF0;
  233. m_GMask = 0xF0;
  234. m_BMask = 0xF0;
  235. m_AMask = 0xF0;
  236. break;
  237. case IMAGE_FORMAT_BGR888:
  238. m_Size = 3;
  239. m_RShift = 16;
  240. m_GShift = 8;
  241. m_BShift = 0;
  242. m_AShift = 0;
  243. m_RMask = 0xFF;
  244. m_GMask = 0xFF;
  245. m_BMask = 0xFF;
  246. m_AMask = 0x00;
  247. break;
  248. case IMAGE_FORMAT_BGR565:
  249. m_Size = 2;
  250. m_RShift = 8;
  251. m_GShift = 3;
  252. m_BShift = -3;
  253. m_AShift = 0;
  254. m_RMask = 0xF8;
  255. m_GMask = 0xFC;
  256. m_BMask = 0xF8;
  257. m_AMask = 0x00;
  258. break;
  259. case IMAGE_FORMAT_BGRA5551:
  260. case IMAGE_FORMAT_BGRX5551:
  261. m_Size = 2;
  262. m_RShift = 7;
  263. m_GShift = 2;
  264. m_BShift = -3;
  265. m_AShift = 8;
  266. m_RMask = 0xF8;
  267. m_GMask = 0xF8;
  268. m_BMask = 0xF8;
  269. m_AMask = 0x80;
  270. break;
  271. // GR - alpha format for HDR support
  272. case IMAGE_FORMAT_A8:
  273. #if defined( _X360 )
  274. case IMAGE_FORMAT_LINEAR_A8:
  275. #endif
  276. m_Size = 1;
  277. m_RShift = 0;
  278. m_GShift = 0;
  279. m_BShift = 0;
  280. m_AShift = 0;
  281. m_RMask = 0x00;
  282. m_GMask = 0x00;
  283. m_BMask = 0x00;
  284. m_AMask = 0xFF;
  285. break;
  286. case IMAGE_FORMAT_UVWQ8888:
  287. m_Size = 4;
  288. m_RShift = 0;
  289. m_GShift = 8;
  290. m_BShift = 16;
  291. m_AShift = 24;
  292. m_RMask = 0xFF;
  293. m_GMask = 0xFF;
  294. m_BMask = 0xFF;
  295. m_AMask = 0xFF;
  296. break;
  297. case IMAGE_FORMAT_RGBA16161616:
  298. #if defined( _X360 )
  299. case IMAGE_FORMAT_LINEAR_RGBA16161616:
  300. #endif
  301. m_Size = 8;
  302. if ( !IsX360() )
  303. {
  304. m_RShift = 0;
  305. m_GShift = 16;
  306. m_BShift = 32;
  307. m_AShift = 48;
  308. }
  309. else
  310. {
  311. m_RShift = 48;
  312. m_GShift = 32;
  313. m_BShift = 16;
  314. m_AShift = 0;
  315. }
  316. m_RMask = 0xFFFF;
  317. m_GMask = 0xFFFF;
  318. m_BMask = 0xFFFF;
  319. m_AMask = 0xFFFF;
  320. break;
  321. case IMAGE_FORMAT_I8:
  322. #if defined( _X360 )
  323. case IMAGE_FORMAT_LINEAR_I8:
  324. #endif
  325. // whatever goes into R is considered the intensity.
  326. m_Size = 1;
  327. m_RShift = 0;
  328. m_GShift = 0;
  329. m_BShift = 0;
  330. m_AShift = 0;
  331. m_RMask = 0xFF;
  332. m_GMask = 0x00;
  333. m_BMask = 0x00;
  334. m_AMask = 0x00;
  335. break;
  336. // FIXME: Add more color formats as need arises
  337. default:
  338. {
  339. #if ALLOW_UNSUPPORTED_FORMATS
  340. m_nFlags |= PIXELWRITER_USING_UNSUPPORTED_FORMAT;
  341. #else // ALLOW_UNSUPPORTED_FORMATS
  342. static bool format_error_printed[NUM_IMAGE_FORMATS];
  343. if ( !format_error_printed[format] )
  344. {
  345. Assert( 0 );
  346. Msg( "CPixelWriter::SetPixelMemory: Unsupported image format %i\n", format );
  347. format_error_printed[format] = true;
  348. }
  349. #endif // ALLOW_UNSUPPORTED_FORMATS
  350. m_Size = 0; // set to zero so that we don't stomp memory for formats that we don't understand.
  351. m_RShift = 0;
  352. m_GShift = 0;
  353. m_BShift = 0;
  354. m_AShift = 0;
  355. m_RMask = 0xFF;
  356. m_GMask = 0x00;
  357. m_BMask = 0x00;
  358. m_AMask = 0x00;
  359. }
  360. break;
  361. }
  362. }
  363. #if 0 // defined( _X360 )
  364. FORCEINLINE void CPixelWriter::ActivateByteSwapping( bool bSwap )
  365. {
  366. // X360TBD: Who is trying to use this?
  367. // Purposely not hooked up because PixelWriter has been ported to read/write native pixels only
  368. Assert( 0 );
  369. if ( bSwap && !(m_nFlags & PIXELWRITER_SWAPBYTES ) )
  370. {
  371. m_nFlags |= PIXELWRITER_SWAPBYTES;
  372. // only tested with 4 byte formats
  373. Assert( m_Size == 4 );
  374. }
  375. else if ( !bSwap && (m_nFlags & PIXELWRITER_SWAPBYTES ) )
  376. {
  377. m_nFlags &= ~PIXELWRITER_SWAPBYTES;
  378. }
  379. else
  380. {
  381. // same state
  382. return;
  383. }
  384. // swap the shifts
  385. m_RShift = 24-m_RShift;
  386. m_GShift = 24-m_GShift;
  387. m_BShift = 24-m_BShift;
  388. m_AShift = 24-m_AShift;
  389. }
  390. #endif
  391. //-----------------------------------------------------------------------------
  392. // Sets where we're writing to
  393. //-----------------------------------------------------------------------------
  394. FORCEINLINE_PIXEL void CPixelWriter::Seek( int x, int y )
  395. {
  396. Assert( IsUsingSupportedFormat() );
  397. m_pBits = m_pBase + y * m_BytesPerRow + x * m_Size;
  398. }
  399. //-----------------------------------------------------------------------------
  400. // Skips n bytes:
  401. //-----------------------------------------------------------------------------
  402. FORCEINLINE_PIXEL void* CPixelWriter::SkipBytes( int n ) RESTRICT
  403. {
  404. m_pBits += n;
  405. return m_pBits;
  406. }
  407. //-----------------------------------------------------------------------------
  408. // Skips n pixels:
  409. //-----------------------------------------------------------------------------
  410. FORCEINLINE_PIXEL void CPixelWriter::SkipPixels( int n )
  411. {
  412. Assert( IsUsingSupportedFormat() );
  413. SkipBytes( n * m_Size );
  414. }
  415. //-----------------------------------------------------------------------------
  416. // Writes a pixel without advancing the index PC ONLY
  417. //-----------------------------------------------------------------------------
  418. FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvanceF( float r, float g, float b, float a )
  419. {
  420. Assert( IsUsingSupportedFormat() );
  421. Assert( IsUsingFloatFormat() );
  422. // X360TBD: Not ported
  423. Assert( IsPC() || IsPS3() );
  424. if (PIXELWRITER_USING_16BIT_FLOAT_FORMAT & m_nFlags)
  425. {
  426. WritePixelNoAdvance16F( r,g,b,a );
  427. }
  428. else
  429. {
  430. // fp32
  431. int pBuf[4] = { 0, 0, 0, 0 };
  432. pBuf[ m_RShift >> 5 ] |= (FloatBits(r) & m_RMask) << ( m_RShift & 0x1F );
  433. pBuf[ m_GShift >> 5 ] |= (FloatBits(g) & m_GMask) << ( m_GShift & 0x1F );
  434. pBuf[ m_BShift >> 5 ] |= (FloatBits(b) & m_BMask) << ( m_BShift & 0x1F );
  435. pBuf[ m_AShift >> 5 ] |= (FloatBits(a) & m_AMask) << ( m_AShift & 0x1F );
  436. memcpy( m_pBits, pBuf, m_Size );
  437. }
  438. }
  439. FORCEINLINE void CPixelWriter::WritePixelNoAdvance16F( float r, float g, float b, float a )
  440. {
  441. if ( IsPS3() )
  442. {
  443. // we know what the values of shift and mask are going to be because
  444. // of the format, so we can elide them and write directly
  445. float16 *fp16 = reinterpret_cast<float16 *>(m_pBits);
  446. fp16[0].SetFloat( r );
  447. fp16[1].SetFloat( g );
  448. fp16[2].SetFloat( b );
  449. fp16[3].SetFloat( a );
  450. }
  451. else
  452. {
  453. float16 fp16[4];
  454. fp16[0].SetFloat( r );
  455. fp16[1].SetFloat( g );
  456. fp16[2].SetFloat( b );
  457. fp16[3].SetFloat( a );
  458. // fp16
  459. unsigned short pBuf[4] = { 0, 0, 0, 0 };
  460. pBuf[ m_RShift >> 4 ] |= (fp16[0].GetBits() & m_RMask) << ( m_RShift & 0xF );
  461. pBuf[ m_GShift >> 4 ] |= (fp16[1].GetBits() & m_GMask) << ( m_GShift & 0xF );
  462. pBuf[ m_BShift >> 4 ] |= (fp16[2].GetBits() & m_BMask) << ( m_BShift & 0xF );
  463. pBuf[ m_AShift >> 4 ] |= (fp16[3].GetBits() & m_AMask) << ( m_AShift & 0xF );
  464. memcpy( m_pBits, pBuf, m_Size );
  465. }
  466. }
  467. //-----------------------------------------------------------------------------
  468. // Writes a lot of pixels, efficiently
  469. //-----------------------------------------------------------------------------
  470. FORCEINLINE_PIXEL void CPixelWriter::WriteManyPixelTo16BitF( const float * RESTRICT pSrc, int num ) RESTRICT
  471. {
  472. Assert( IsUsingSupportedFormat() );
  473. const static int SIZE = 4*sizeof(unsigned short); // known precondition
  474. const static int MASK = 0xFFFF;
  475. // another known precondition: m_RShift == 0 && m_GShift == 16 && m_BShift == 32 && m_AShift == 48
  476. unsigned char *pBits = m_pBits; // compiler actually fails to hoist this onto a register properly otherwise.
  477. for ( int i = 0; num; --num, ++i )
  478. {
  479. /* // this actually slowed things down, for whatever perverse reason.
  480. // every cache line boundary, prefetch the next in bloc, so long as we've at least 128 bytes left to go.
  481. // the destination is in noncacheable memory.
  482. if ( (num > 32) && ( (reinterpret_cast<unsigned int>(pSrc) & 127) == 0 ) )
  483. {
  484. PREFETCH_128( pSrc, 128 );
  485. }
  486. */
  487. float16 * RESTRICT pOut = reinterpret_cast< float16 * >(pBits);
  488. pOut[0].SetFloat( pSrc[0] );
  489. pOut[1].SetFloat( pSrc[1] );
  490. pOut[2].SetFloat( pSrc[2] );
  491. pOut[3].SetFloat( pSrc[3] );
  492. /*
  493. pAck[i+0].SetFloat( pSrc[0] );
  494. pAck[i+1].SetFloat( pSrc[1] );
  495. pAck[i+2].SetFloat( pSrc[2] );
  496. pAck[i+3].SetFloat( pSrc[3] );
  497. */
  498. pSrc += 4;
  499. pBits += SIZE;
  500. }
  501. m_pBits = pBits;
  502. }
  503. //-----------------------------------------------------------------------------
  504. // Writes a pixel, advances the write index
  505. //-----------------------------------------------------------------------------
  506. FORCEINLINE_PIXEL void CPixelWriter::WritePixelF( float r, float g, float b, float a )
  507. {
  508. WritePixelNoAdvanceF(r, g, b, a);
  509. m_pBits += m_Size;
  510. }
  511. //-----------------------------------------------------------------------------
  512. // Writes an array of pixels, advancing the write index.
  513. // the input data is required to be a contiguous stream of Vector4Ds
  514. // (ie, each pixel consists of four consecutive floats, and the data is
  515. // consecutive in memory)
  516. //-----------------------------------------------------------------------------
  517. FORCEINLINE_PIXEL void CPixelWriter::WriteManyPixelF( const float * RESTRICT pSrc, const int num )
  518. {
  519. Assert( IsUsingSupportedFormat() );
  520. Assert( IsUsingFloatFormat() );
  521. // X360TBD: Not ported
  522. Assert( IsPC() || IsPS3() );
  523. if ( m_Size == 4*sizeof(unsigned short) && (PIXELWRITER_USING_16BIT_FLOAT_FORMAT & m_nFlags) )
  524. {
  525. Assert( m_RShift == 0 && m_GShift == 16 && m_BShift == 32 && m_AShift == 48 );
  526. WriteManyPixelTo16BitF( pSrc, num );
  527. }
  528. /*
  529. else if ( m_Size == 4*sizeof(int) && !(PIXELWRITER_USING_16BIT_FLOAT_FORMAT & m_nFlags) )
  530. {
  531. WriteManyPixelTo32BitF( pSrc, num );
  532. }
  533. */
  534. else for ( const float * const sentinel = pSrc + ( num * 4 ); pSrc < sentinel; pSrc += 4 ) // naive general case
  535. {
  536. WritePixelF( pSrc[0], pSrc[1], pSrc[2], pSrc[3] );
  537. }
  538. }
  539. //-----------------------------------------------------------------------------
  540. // Writes a pixel, advances the write index
  541. //-----------------------------------------------------------------------------
  542. FORCEINLINE_PIXEL void CPixelWriter::WritePixel( int r, int g, int b, int a )
  543. {
  544. WritePixelNoAdvance(r,g,b,a);
  545. m_pBits += m_Size;
  546. }
  547. //-----------------------------------------------------------------------------
  548. // Writes a pixel, advances the write index
  549. //-----------------------------------------------------------------------------
  550. FORCEINLINE_PIXEL void CPixelWriter::WritePixelSigned( int r, int g, int b, int a )
  551. {
  552. WritePixelNoAdvanceSigned(r,g,b,a);
  553. m_pBits += m_Size;
  554. }
  555. //-----------------------------------------------------------------------------
  556. // Writes a pixel without advancing the index
  557. //-----------------------------------------------------------------------------
  558. FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvance( int r, int g, int b, int a )
  559. {
  560. Assert( IsUsingSupportedFormat() );
  561. Assert( !IsUsingFloatFormat() );
  562. if ( m_Size <= 0 )
  563. {
  564. return;
  565. }
  566. if ( m_Size < 5 )
  567. {
  568. unsigned int val = (r & m_RMask) << m_RShift;
  569. val |= (g & m_GMask) << m_GShift;
  570. val |= (m_BShift > 0) ? ((b & m_BMask) << m_BShift) : ((b & m_BMask) >> -m_BShift);
  571. val |= (a & m_AMask) << m_AShift;
  572. switch( m_Size )
  573. {
  574. default:
  575. Assert( 0 );
  576. return;
  577. case 1:
  578. {
  579. m_pBits[0] = (unsigned char)((val & 0xff));
  580. return;
  581. }
  582. case 2:
  583. {
  584. ((unsigned short *)m_pBits)[0] = (unsigned short)((val & 0xffff));
  585. return;
  586. }
  587. case 3:
  588. {
  589. if ( IsPC() || IsPS3() || !IsX360() )
  590. {
  591. ((unsigned short *)m_pBits)[0] = (unsigned short)((val & 0xffff));
  592. m_pBits[2] = (unsigned char)((val >> 16) & 0xff);
  593. }
  594. else
  595. {
  596. m_pBits[0] = (unsigned char)(((val >> 16) & 0xff));
  597. m_pBits[1] = (unsigned char)(((val >> 8 ) & 0xff));
  598. m_pBits[2] = (unsigned char)(val & 0xff);
  599. }
  600. return;
  601. }
  602. case 4:
  603. {
  604. ((unsigned int *)m_pBits)[0] = val;
  605. return;
  606. }
  607. }
  608. }
  609. else // RGBA32323232 or RGBA16161616 -- PC only.
  610. {
  611. AssertMsgOnce(!IsX360(), "Unsupported lightmap format used in WritePixelNoAdvance(). This is a severe performance fault.\n");
  612. // AssertMsg(!IsX360(), "Unsupported lightmap format used in WritePixelNoAdvance(). This is a severe performance fault.\n");
  613. int64 val = ( ( int64 )(r & m_RMask) ) << m_RShift;
  614. val |= ( ( int64 )(g & m_GMask) ) << m_GShift;
  615. val |= (m_BShift > 0) ? ((( int64 )( b & m_BMask)) << m_BShift) : (((int64)( b & m_BMask)) >> -m_BShift);
  616. val |= ( ( int64 )(a & m_AMask) ) << m_AShift;
  617. switch( m_Size )
  618. {
  619. case 6:
  620. {
  621. if ( IsPC() || IsPS3() || !IsX360() )
  622. {
  623. ((unsigned int *)m_pBits)[0] = val & 0xffffffff;
  624. ((unsigned short *)m_pBits)[2] = (unsigned short)( ( val >> 32 ) & 0xffff );
  625. }
  626. else
  627. {
  628. ((unsigned int *)m_pBits)[0] = (val >> 16) & 0xffffffff;
  629. ((unsigned short *)m_pBits)[2] = (unsigned short)( val & 0xffff );
  630. }
  631. return;
  632. }
  633. case 8:
  634. {
  635. if ( IsPC() || IsPS3() || !IsX360() )
  636. {
  637. ((unsigned int *)m_pBits)[0] = val & 0xffffffff;
  638. ((unsigned int *)m_pBits)[1] = ( val >> 32 ) & 0xffffffff;
  639. }
  640. else
  641. {
  642. ((unsigned int *)m_pBits)[0] = ( val >> 32 ) & 0xffffffff;
  643. ((unsigned int *)m_pBits)[1] = val & 0xffffffff;
  644. }
  645. return;
  646. }
  647. default:
  648. Assert( 0 );
  649. return;
  650. }
  651. }
  652. }
  653. #ifdef _X360
  654. // There isn't a PC port of these because of the many varied
  655. // pixel formats the PC deals with. If you write SSE versions
  656. // of all the various necessary packers, then this can be made
  657. // to work on PC.
  658. //-----------------------------------------------------------------------------
  659. // Writes a pixel, advances the write index
  660. //-----------------------------------------------------------------------------
  661. FORCEINLINE_PIXEL void CPixelWriter::WritePixel( FLTX4 rgba ) RESTRICT
  662. {
  663. WritePixelNoAdvance(rgba);
  664. m_pBits += m_Size;
  665. }
  666. //-----------------------------------------------------------------------------
  667. // Writes a pixel without advancing the index
  668. // rgba are four float values, each on the range 0..255 (though they may leak
  669. // fractionally over 255 due to numerical errors earlier)
  670. //-----------------------------------------------------------------------------
  671. FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvance( FLTX4 rgba ) RESTRICT
  672. {
  673. Assert( IsUsingSupportedFormat() );
  674. Assert( !IsUsingFloatFormat() );
  675. switch (m_Size)
  676. {
  677. case 0:
  678. return;
  679. case 4:
  680. {
  681. AssertMsg((reinterpret_cast<unsigned int>(m_pBits) & 0x03) == 0,"Unaligned m_pBits in WritePixelNoAdvance!");
  682. switch ( m_Format )
  683. {
  684. // note: format names are low-order-byte first.
  685. case IMAGE_FORMAT_RGBA8888:
  686. case IMAGE_FORMAT_LINEAR_RGBA8888:
  687. WritePixelNoAdvance_RGBA8888(rgba);
  688. break;
  689. case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention.
  690. case IMAGE_FORMAT_LINEAR_BGRA8888:
  691. WritePixelNoAdvance_BGRA8888(rgba);
  692. break;
  693. default:
  694. AssertMsg1(false, "Unknown four-byte pixel format %d in lightmap write.\n", m_Format);
  695. }
  696. break;
  697. }
  698. case 8:
  699. {
  700. switch ( m_Format )
  701. {
  702. // note: format names are low-order-byte first.
  703. case IMAGE_FORMAT_RGBA16161616:
  704. case IMAGE_FORMAT_LINEAR_RGBA16161616:
  705. WritePixelNoAdvance_RGBA16161616(rgba);
  706. break;
  707. default:
  708. AssertMsg1(false, "Unknown eight-byte pixel format %d in lightmap write.\n", m_Format);
  709. }
  710. break;
  711. }
  712. default:
  713. AssertMsg1(false, "WritePixelNoAdvance on unsupported 360 %d-byte format\n", m_Size);
  714. break;
  715. }
  716. }
  717. // here are some explicit formats so we can avoid the switch:
  718. FORCEINLINE void CPixelWriter::WritePixelNoAdvance_RGBA8888( FLTX4 rgba )
  719. {
  720. // it's easier to do tiered convert-saturates here
  721. // than the d3d color convertor op
  722. // first permute
  723. const static fltx4 permReverse = XMVectorPermuteControl(3,2,1,0);
  724. fltx4 N = XMVectorPermute(rgba, rgba, permReverse);
  725. N = __vctuxs(N, 0); // convert to unsigned fixed point 0 w/ saturate
  726. N = __vpkuwus(N, N); // convert to halfword saturate
  727. N = __vpkuhus(N, N); // convert to byte saturate
  728. N = __vspltw(N, 0); // splat w-word to all four
  729. __stvewx(N, m_pBits, 0); // store whatever word happens to be aligned with m_pBits to that word
  730. }
  731. FORCEINLINE void CPixelWriter::WritePixelNoAdvance_BGRA8888( FLTX4 rgba )
  732. {
  733. WritePixelNoAdvance_BGRA8888( rgba, m_pBits );
  734. }
  735. FORCEINLINE void CPixelWriter::WritePixelNoAdvance_RGBA16161616( FLTX4 rgba )
  736. {
  737. // input is in 0..16 range.
  738. //Multiply by 4096 to get into 0..65536 range
  739. static const fltx4 vMult = { 4096.0f, 4096.0f, 4096.0f, 65536.0f };
  740. rgba = XMVectorMultiply( rgba, vMult );
  741. XMStoreUShort4( (XMUSHORT4*)m_pBits, rgba );
  742. }
  743. FORCEINLINE void CPixelWriter::WritePixelNoAdvance_BGRA8888( FLTX4 rgba, void * RESTRICT pBits ) RESTRICT
  744. {
  745. // this happens to be in an order such that we can use the handy builtin packing op
  746. // clamp to 0..255 (coz it might have leaked over)
  747. static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f};
  748. fltx4 N = MinSIMD(vTwoFiftyFive, rgba);
  749. // the magic number such that when mul-accummulated against rbga,
  750. // gets us a representation 3.0 + (r)*2^-22 -- puts the bits at
  751. // the bottom of the float
  752. static CONST XMVECTOR PackScale = { (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22))}; // 255.0f / (FLOAT)(1 << 22)
  753. static const XMVECTOR Three = {3.0f, 3.0f, 3.0f, 3.0f};
  754. N = __vmaddfp(N, PackScale, Three);
  755. N = __vpkd3d(N, N, VPACK_D3DCOLOR, VPACK_32, 3); // pack to X word
  756. N = __vspltw(N, 0); // splat X
  757. // this is a nasty thing to work around the April XDK bug in __stvewx
  758. {
  759. void * RESTRICT copyOfPBits = pBits;
  760. __stvewx(N, copyOfPBits, 0);
  761. }
  762. }
  763. // for writing entire SIMD registers at once
  764. FORCEINLINE void CPixelWriter::WriteFourPixelsExplicitLocation_BGRA8888 ( FLTX4 rgba, int offset )
  765. {
  766. Assert( (reinterpret_cast<unsigned int>(m_pBits) & 15) == 0 ); // assert alignment
  767. XMStoreVector4A( m_pBits + offset , rgba );
  768. }
  769. #elif defined ( _PS3 )
  770. // There isn't a PC port of these because of the many varied
  771. // pixel formats the PC deals with. If you write SSE versions
  772. // of all the various necessary packers, then this can be made
  773. // to work on PC.
  774. //-----------------------------------------------------------------------------
  775. // Writes a pixel, advances the write index
  776. //-----------------------------------------------------------------------------
  777. FORCEINLINE_PIXEL void CPixelWriter::WritePixel( FLTX4 rgba ) RESTRICT
  778. {
  779. WritePixelNoAdvance(rgba);
  780. m_pBits += m_Size;
  781. }
  782. //-----------------------------------------------------------------------------
  783. // Writes a pixel without advancing the index
  784. // rgba are four float values, each on the range 0..255 (though they may leak
  785. // fractionally over 255 due to numerical errors earlier)
  786. //-----------------------------------------------------------------------------
  787. FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvance( FLTX4 rgba ) RESTRICT
  788. {
  789. Assert( IsUsingSupportedFormat() );
  790. Assert( !IsUsingFloatFormat() );
  791. switch (m_Size)
  792. {
  793. case 0:
  794. return;
  795. case 4:
  796. {
  797. AssertMsg((reinterpret_cast<unsigned int>(m_pBits) & 0x03) == 0,"Unaligned m_pBits in WritePixelNoAdvance!");
  798. switch ( m_Format )
  799. {
  800. // note: format names are low-order-byte first.
  801. case IMAGE_FORMAT_RGBA8888:
  802. case IMAGE_FORMAT_LINEAR_RGBA8888:
  803. WritePixelNoAdvance_RGBA8888(rgba);
  804. break;
  805. case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention.
  806. //EAPS3 case IMAGE_FORMAT_LINEAR_BGRA8888:
  807. WritePixelNoAdvance_BGRA8888(rgba);
  808. break;
  809. default:
  810. AssertMsg1(false, "Unknown four-byte pixel format %d in lightmap write.\n", m_Format);
  811. }
  812. break;
  813. }
  814. default:
  815. AssertMsg1(false, "WritePixelNoAdvance on unsupported 360 %d-byte format\n", m_Size);
  816. break;
  817. }
  818. }
  819. // here are some explicit formats so we can avoid the switch:
  820. FORCEINLINE void CPixelWriter::WritePixelNoAdvance_RGBA8888( FLTX4 rgba )
  821. {
  822. // it's easier to do tiered convert-saturates here
  823. // than the d3d color convertor op
  824. // first permute
  825. fltx4 N = vec_perm(rgba, rgba, _VEC_SWIZZLE_WZYX);
  826. vector unsigned int N_ui = vec_ctu(N, 0); // convert to unsigned fixed point 0 w/ saturate
  827. vector unsigned short N_us = vec_packsu(N_ui, N_ui); // convert to halfword saturate
  828. vector unsigned char N_uc = vec_packsu(N_us, N_us); // convert to byte saturate
  829. // don't need to do this, should already be unpacked to all elements in the same way
  830. // N = vec_splat((fltx4)N_uc, 0); // splat w-word to all four
  831. // vec_ste(N, 0, m_pBits); // store whatever word happens to be aligned with m_pBits to that word
  832. vec_ste((vec_uint4)N_uc, 0, (unsigned int *)m_pBits); // store whatever word happens to be aligned with m_pBits to that word
  833. }
  834. FORCEINLINE void CPixelWriter::WritePixelNoAdvance_BGRA8888( FLTX4 rgba )
  835. {
  836. WritePixelNoAdvance_BGRA8888( rgba, m_pBits );
  837. }
  838. FORCEINLINE void CPixelWriter::WritePixelNoAdvance_BGRA8888( FLTX4 rgba, void * RESTRICT pBits ) RESTRICT
  839. {
  840. fltx4 N;
  841. vector unsigned int N_ui = vec_ctu(rgba, 0); // convert to unsigned fixed point 0 w/ saturate
  842. vector unsigned short N_us = vec_packsu(N_ui, N_ui); // convert to halfword saturate
  843. vector unsigned char N_uc = vec_packsu(N_us, N_us); // convert to byte saturate
  844. // N = vec_splat((fltx4)N_uc, 0); // splat w-word to all four
  845. // vec_ste(N, 0, (float*)pBits); // store whatever word happens to be aligned with m_pBits to that word
  846. vec_ste((vec_uint4)N_uc, 0, (unsigned int*)pBits); // store whatever word happens to be aligned with m_pBits to that word
  847. }
  848. // for writing entire SIMD registers at once
  849. FORCEINLINE void CPixelWriter::WriteFourPixelsExplicitLocation_BGRA8888 ( FLTX4 rgba, int offset )
  850. {
  851. Assert( (reinterpret_cast<unsigned int>(m_pBits) & 15) == 0 && offset == 0 ); // assert alignment
  852. // XMStoreVector4A( m_pBits + offset , rgba );
  853. vec_st( rgba, offset, (float*)m_pBits );
  854. }
  855. #endif
  856. //-----------------------------------------------------------------------------
  857. // Writes a signed pixel without advancing the index
  858. //-----------------------------------------------------------------------------
  859. FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvanceSigned( int r, int g, int b, int a )
  860. {
  861. Assert( IsUsingSupportedFormat() );
  862. Assert( !IsUsingFloatFormat() );
  863. if ( m_Size <= 0 )
  864. {
  865. return;
  866. }
  867. if ( m_Size < 5 )
  868. {
  869. int val = (r & m_RMask) << m_RShift;
  870. val |= (g & m_GMask) << m_GShift;
  871. val |= (m_BShift > 0) ? ((b & m_BMask) << m_BShift) : ((b & m_BMask) >> -m_BShift);
  872. val |= (a & m_AMask) << m_AShift;
  873. signed char *pSignedBits = (signed char *)m_pBits;
  874. if ( IsPC() || IsPS3() || !IsX360() )
  875. {
  876. switch ( m_Size )
  877. {
  878. case 4:
  879. pSignedBits[3] = (signed char)((val >> 24) & 0xff);
  880. // fall through intentionally.
  881. case 3:
  882. pSignedBits[2] = (signed char)((val >> 16) & 0xff);
  883. // fall through intentionally.
  884. case 2:
  885. pSignedBits[1] = (signed char)((val >> 8) & 0xff);
  886. // fall through intentionally.
  887. case 1:
  888. pSignedBits[0] = (signed char)((val & 0xff));
  889. // fall through intentionally.
  890. return;
  891. }
  892. }
  893. else
  894. {
  895. switch ( m_Size )
  896. {
  897. case 4:
  898. pSignedBits[0] = (signed char)((val >> 24) & 0xff);
  899. pSignedBits[1] = (signed char)((val >> 16) & 0xff);
  900. pSignedBits[2] = (signed char)((val >> 8) & 0xff);
  901. pSignedBits[3] = (signed char)(val & 0xff);
  902. break;
  903. case 3:
  904. pSignedBits[0] = (signed char)((val >> 16) & 0xff);
  905. pSignedBits[1] = (signed char)((val >> 8) & 0xff);
  906. pSignedBits[2] = (signed char)(val & 0xff);
  907. break;
  908. case 2:
  909. pSignedBits[0] = (signed char)((val >> 8) & 0xff);
  910. pSignedBits[1] = (signed char)(val & 0xff);
  911. break;
  912. case 1:
  913. pSignedBits[0] = (signed char)(val & 0xff);
  914. break;
  915. }
  916. }
  917. }
  918. else
  919. {
  920. int64 val = ( ( int64 )(r & m_RMask) ) << m_RShift;
  921. val |= ( ( int64 )(g & m_GMask) ) << m_GShift;
  922. val |= (m_BShift > 0) ? ((( int64 )( b & m_BMask)) << m_BShift) : (((int64)( b & m_BMask)) >> -m_BShift);
  923. val |= ( ( int64 )(a & m_AMask) ) << m_AShift;
  924. signed char *pSignedBits = ( signed char * )m_pBits;
  925. if ( IsPC() || IsPS3() || !IsX360() )
  926. {
  927. switch( m_Size )
  928. {
  929. case 8:
  930. pSignedBits[7] = (signed char)((val >> 56) & 0xff);
  931. pSignedBits[6] = (signed char)((val >> 48) & 0xff);
  932. // fall through intentionally.
  933. case 6:
  934. pSignedBits[5] = (signed char)((val >> 40) & 0xff);
  935. pSignedBits[4] = (signed char)((val >> 32) & 0xff);
  936. // fall through intentionally.
  937. case 4:
  938. pSignedBits[3] = (signed char)((val >> 24) & 0xff);
  939. // fall through intentionally.
  940. case 3:
  941. pSignedBits[2] = (signed char)((val >> 16) & 0xff);
  942. // fall through intentionally.
  943. case 2:
  944. pSignedBits[1] = (signed char)((val >> 8) & 0xff);
  945. // fall through intentionally.
  946. case 1:
  947. pSignedBits[0] = (signed char)((val & 0xff));
  948. break;
  949. default:
  950. Assert( 0 );
  951. return;
  952. }
  953. }
  954. else
  955. {
  956. switch( m_Size )
  957. {
  958. case 8:
  959. pSignedBits[0] = (signed char)((val >> 56) & 0xff);
  960. pSignedBits[1] = (signed char)((val >> 48) & 0xff);
  961. pSignedBits[2] = (signed char)((val >> 40) & 0xff);
  962. pSignedBits[3] = (signed char)((val >> 32) & 0xff);
  963. pSignedBits[4] = (signed char)((val >> 24) & 0xff);
  964. pSignedBits[5] = (signed char)((val >> 16) & 0xff);
  965. pSignedBits[6] = (signed char)((val >> 8) & 0xff);
  966. pSignedBits[7] = (signed char)(val & 0xff);
  967. break;
  968. case 6:
  969. pSignedBits[0] = (signed char)((val >> 40) & 0xff);
  970. pSignedBits[1] = (signed char)((val >> 32) & 0xff);
  971. pSignedBits[2] = (signed char)((val >> 24) & 0xff);
  972. pSignedBits[3] = (signed char)((val >> 16) & 0xff);
  973. pSignedBits[4] = (signed char)((val >> 8) & 0xff);
  974. pSignedBits[5] = (signed char)(val & 0xff);
  975. break;
  976. case 4:
  977. pSignedBits[0] = (signed char)((val >> 24) & 0xff);
  978. pSignedBits[1] = (signed char)((val >> 16) & 0xff);
  979. pSignedBits[2] = (signed char)((val >> 8) & 0xff);
  980. pSignedBits[3] = (signed char)(val & 0xff);
  981. break;
  982. case 3:
  983. pSignedBits[0] = (signed char)((val >> 16) & 0xff);
  984. pSignedBits[1] = (signed char)((val >> 8) & 0xff);
  985. pSignedBits[2] = (signed char)(val & 0xff);
  986. break;
  987. case 2:
  988. pSignedBits[0] = (signed char)((val >> 8) & 0xff);
  989. pSignedBits[1] = (signed char)(val & 0xff);
  990. break;
  991. case 1:
  992. pSignedBits[0] = (signed char)(val & 0xff);
  993. break;
  994. default:
  995. Assert( 0 );
  996. return;
  997. }
  998. }
  999. }
  1000. }
  1001. FORCEINLINE_PIXEL void CPixelWriter::ReadPixelNoAdvance( int &r, int &g, int &b, int &a )
  1002. {
  1003. Assert( IsUsingSupportedFormat() );
  1004. Assert( !IsUsingFloatFormat() );
  1005. int val = m_pBits[0];
  1006. if ( m_Size > 1 )
  1007. {
  1008. if ( IsPC() || IsPS3() || !IsX360() )
  1009. {
  1010. val |= (int)m_pBits[1] << 8;
  1011. if ( m_Size > 2 )
  1012. {
  1013. val |= (int)m_pBits[2] << 16;
  1014. if ( m_Size > 3 )
  1015. {
  1016. val |= (int)m_pBits[3] << 24;
  1017. }
  1018. }
  1019. }
  1020. else
  1021. {
  1022. val <<= 8;
  1023. val |= (int)m_pBits[1];
  1024. if ( m_Size > 2 )
  1025. {
  1026. val <<= 8;
  1027. val |= (int)m_pBits[2];
  1028. if ( m_Size > 3 )
  1029. {
  1030. val <<= 8;
  1031. val |= (int)m_pBits[3];
  1032. }
  1033. }
  1034. }
  1035. }
  1036. r = (val>>m_RShift) & m_RMask;
  1037. g = (val>>m_GShift) & m_GMask;
  1038. b = (val>>m_BShift) & m_BMask;
  1039. a = (val>>m_AShift) & m_AMask;
  1040. }
  1041. #endif // PIXELWRITER_H;