Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

883 lines
23 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. // $NoKeywords: $
  6. //
  7. //===========================================================================//
  8. #ifndef PIXELWRITER_H
  9. #define PIXELWRITER_H
  10. #ifdef _WIN32
  11. #pragma once
  12. #endif
  13. #ifdef _WIN32
  14. #define FORCEINLINE_PIXEL FORCEINLINE
  15. #elif POSIX
  16. #define FORCEINLINE_PIXEL inline
  17. #else
  18. #error "implement me"
  19. #endif
  20. #include "bitmap/imageformat.h"
  21. #include "tier0/dbg.h"
  22. #include "mathlib/compressed_vector.h"
  23. #include "mathlib/ssemath.h"
  24. //-----------------------------------------------------------------------------
  25. // Color writing class
  26. //-----------------------------------------------------------------------------
  27. class CPixelWriter
  28. {
  29. public:
  30. FORCEINLINE void SetPixelMemory( ImageFormat format, void* pMemory, int stride );
  31. FORCEINLINE void *GetPixelMemory() { return m_pBase; }
  32. // this is no longer used:
  33. #if 0 // defined( _X360 )
  34. // set after SetPixelMemory()
  35. FORCEINLINE void ActivateByteSwapping( bool bSwap );
  36. #endif
  37. FORCEINLINE void Seek( int x, int y );
  38. FORCEINLINE void* SkipBytes( int n );
  39. FORCEINLINE void SkipPixels( int n );
  40. FORCEINLINE void WritePixel( int r, int g, int b, int a = 255 );
  41. FORCEINLINE void WritePixelNoAdvance( int r, int g, int b, int a = 255 );
  42. FORCEINLINE void WritePixelSigned( int r, int g, int b, int a = 255 );
  43. FORCEINLINE void WritePixelNoAdvanceSigned( int r, int g, int b, int a = 255 );
  44. FORCEINLINE void ReadPixelNoAdvance( int &r, int &g, int &b, int &a );
  45. // Floating point formats
  46. FORCEINLINE void WritePixelNoAdvanceF( float r, float g, float b, float a = 1.0f );
  47. FORCEINLINE void WritePixelF( float r, float g, float b, float a = 1.0f );
  48. // SIMD formats
  49. FORCEINLINE void WritePixel( FLTX4 rgba );
  50. FORCEINLINE void WritePixelNoAdvance( FLTX4 rgba );
  51. #ifdef _X360
  52. // here are some explicit formats so we can avoid the switch:
  53. FORCEINLINE void WritePixelNoAdvance_RGBA8888( FLTX4 rgba );
  54. FORCEINLINE void WritePixelNoAdvance_BGRA8888( FLTX4 rgba );
  55. // as above, but with m_pBits passed in to avoid a LHS
  56. FORCEINLINE void WritePixelNoAdvance_BGRA8888( FLTX4 rgba, void *pBits );
  57. // for writing entire SIMD registers at once when they have
  58. // already been packed, and when m_pBits is vector-aligned
  59. // (which is a requirement for write-combined memory)
  60. // offset is added to m_pBits (saving you from the obligatory
  61. // LHS of a SkipBytes)
  62. FORCEINLINE void WriteFourPixelsExplicitLocation_BGRA8888( FLTX4 rgba, int offset );
  63. #endif
  64. FORCEINLINE unsigned char GetPixelSize() { return m_Size; }
  65. FORCEINLINE bool IsUsingFloatFormat() const;
  66. FORCEINLINE unsigned char *GetCurrentPixel() { return m_pBits; }
  67. private:
  68. enum
  69. {
  70. PIXELWRITER_USING_FLOAT_FORMAT = 0x01,
  71. PIXELWRITER_USING_16BIT_FLOAT_FORMAT = 0x02,
  72. PIXELWRITER_SWAPBYTES = 0x04,
  73. };
  74. unsigned char* m_pBase;
  75. unsigned char* m_pBits;
  76. unsigned short m_BytesPerRow;
  77. unsigned char m_Size;
  78. unsigned char m_nFlags;
  79. signed short m_RShift;
  80. signed short m_GShift;
  81. signed short m_BShift;
  82. signed short m_AShift;
  83. unsigned int m_RMask;
  84. unsigned int m_GMask;
  85. unsigned int m_BMask;
  86. unsigned int m_AMask;
  87. #ifdef _X360
  88. ImageFormat m_Format;
  89. public:
  90. inline const ImageFormat &GetFormat() { return m_Format; }
  91. private:
  92. #endif
  93. };
  94. FORCEINLINE_PIXEL bool CPixelWriter::IsUsingFloatFormat() const
  95. {
  96. return (m_nFlags & PIXELWRITER_USING_FLOAT_FORMAT) != 0;
  97. }
  98. FORCEINLINE_PIXEL void CPixelWriter::SetPixelMemory( ImageFormat format, void* pMemory, int stride )
  99. {
  100. m_pBits = (unsigned char*)pMemory;
  101. m_pBase = m_pBits;
  102. m_BytesPerRow = (unsigned short)stride;
  103. m_nFlags = 0;
  104. #ifdef _X360
  105. m_Format = format;
  106. #endif
  107. switch ( format )
  108. {
  109. case IMAGE_FORMAT_R32F: // NOTE! : the low order bits are first in this naming convention.
  110. m_Size = 4;
  111. m_RShift = 0;
  112. m_GShift = 0;
  113. m_BShift = 0;
  114. m_AShift = 0;
  115. m_RMask = 0xFFFFFFFF;
  116. m_GMask = 0x0;
  117. m_BMask = 0x0;
  118. m_AMask = 0x0;
  119. m_nFlags |= PIXELWRITER_USING_FLOAT_FORMAT;
  120. break;
  121. case IMAGE_FORMAT_RGBA32323232F:
  122. m_Size = 16;
  123. m_RShift = 0;
  124. m_GShift = 32;
  125. m_BShift = 64;
  126. m_AShift = 96;
  127. m_RMask = 0xFFFFFFFF;
  128. m_GMask = 0xFFFFFFFF;
  129. m_BMask = 0xFFFFFFFF;
  130. m_AMask = 0xFFFFFFFF;
  131. m_nFlags |= PIXELWRITER_USING_FLOAT_FORMAT;
  132. break;
  133. case IMAGE_FORMAT_RGBA16161616F:
  134. m_Size = 8;
  135. m_RShift = 0;
  136. m_GShift = 16;
  137. m_BShift = 32;
  138. m_AShift = 48;
  139. m_RMask = 0xFFFF;
  140. m_GMask = 0xFFFF;
  141. m_BMask = 0xFFFF;
  142. m_AMask = 0xFFFF;
  143. m_nFlags |= PIXELWRITER_USING_FLOAT_FORMAT | PIXELWRITER_USING_16BIT_FLOAT_FORMAT;
  144. break;
  145. case IMAGE_FORMAT_RGBA8888:
  146. #if defined( _X360 )
  147. case IMAGE_FORMAT_LINEAR_RGBA8888:
  148. #endif
  149. m_Size = 4;
  150. m_RShift = 0;
  151. m_GShift = 8;
  152. m_BShift = 16;
  153. m_AShift = 24;
  154. m_RMask = 0xFF;
  155. m_GMask = 0xFF;
  156. m_BMask = 0xFF;
  157. m_AMask = 0xFF;
  158. break;
  159. case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention.
  160. #if defined( _X360 )
  161. case IMAGE_FORMAT_LINEAR_BGRA8888:
  162. #endif
  163. m_Size = 4;
  164. m_RShift = 16;
  165. m_GShift = 8;
  166. m_BShift = 0;
  167. m_AShift = 24;
  168. m_RMask = 0xFF;
  169. m_GMask = 0xFF;
  170. m_BMask = 0xFF;
  171. m_AMask = 0xFF;
  172. break;
  173. case IMAGE_FORMAT_BGRX8888:
  174. #if defined( _X360 )
  175. case IMAGE_FORMAT_LINEAR_BGRX8888:
  176. #endif
  177. m_Size = 4;
  178. m_RShift = 16;
  179. m_GShift = 8;
  180. m_BShift = 0;
  181. m_AShift = 24;
  182. m_RMask = 0xFF;
  183. m_GMask = 0xFF;
  184. m_BMask = 0xFF;
  185. m_AMask = 0x00;
  186. break;
  187. case IMAGE_FORMAT_BGRA4444:
  188. m_Size = 2;
  189. m_RShift = 4;
  190. m_GShift = 0;
  191. m_BShift = -4;
  192. m_AShift = 8;
  193. m_RMask = 0xF0;
  194. m_GMask = 0xF0;
  195. m_BMask = 0xF0;
  196. m_AMask = 0xF0;
  197. break;
  198. case IMAGE_FORMAT_BGR888:
  199. m_Size = 3;
  200. m_RShift = 16;
  201. m_GShift = 8;
  202. m_BShift = 0;
  203. m_AShift = 0;
  204. m_RMask = 0xFF;
  205. m_GMask = 0xFF;
  206. m_BMask = 0xFF;
  207. m_AMask = 0x00;
  208. break;
  209. case IMAGE_FORMAT_BGR565:
  210. m_Size = 2;
  211. m_RShift = 8;
  212. m_GShift = 3;
  213. m_BShift = -3;
  214. m_AShift = 0;
  215. m_RMask = 0xF8;
  216. m_GMask = 0xFC;
  217. m_BMask = 0xF8;
  218. m_AMask = 0x00;
  219. break;
  220. case IMAGE_FORMAT_BGRA5551:
  221. case IMAGE_FORMAT_BGRX5551:
  222. m_Size = 2;
  223. m_RShift = 7;
  224. m_GShift = 2;
  225. m_BShift = -3;
  226. m_AShift = 8;
  227. m_RMask = 0xF8;
  228. m_GMask = 0xF8;
  229. m_BMask = 0xF8;
  230. m_AMask = 0x80;
  231. break;
  232. // GR - alpha format for HDR support
  233. case IMAGE_FORMAT_A8:
  234. m_Size = 1;
  235. m_RShift = 0;
  236. m_GShift = 0;
  237. m_BShift = 0;
  238. m_AShift = 0;
  239. m_RMask = 0x00;
  240. m_GMask = 0x00;
  241. m_BMask = 0x00;
  242. m_AMask = 0xFF;
  243. break;
  244. case IMAGE_FORMAT_UVWQ8888:
  245. m_Size = 4;
  246. m_RShift = 0;
  247. m_GShift = 8;
  248. m_BShift = 16;
  249. m_AShift = 24;
  250. m_RMask = 0xFF;
  251. m_GMask = 0xFF;
  252. m_BMask = 0xFF;
  253. m_AMask = 0xFF;
  254. break;
  255. case IMAGE_FORMAT_RGBA16161616:
  256. #if defined( _X360 )
  257. case IMAGE_FORMAT_LINEAR_RGBA16161616:
  258. #endif
  259. m_Size = 8;
  260. if ( !IsX360() )
  261. {
  262. m_RShift = 0;
  263. m_GShift = 16;
  264. m_BShift = 32;
  265. m_AShift = 48;
  266. }
  267. else
  268. {
  269. m_RShift = 48;
  270. m_GShift = 32;
  271. m_BShift = 16;
  272. m_AShift = 0;
  273. }
  274. m_RMask = 0xFFFF;
  275. m_GMask = 0xFFFF;
  276. m_BMask = 0xFFFF;
  277. m_AMask = 0xFFFF;
  278. break;
  279. case IMAGE_FORMAT_I8:
  280. // whatever goes into R is considered the intensity.
  281. m_Size = 1;
  282. m_RShift = 0;
  283. m_GShift = 0;
  284. m_BShift = 0;
  285. m_AShift = 0;
  286. m_RMask = 0xFF;
  287. m_GMask = 0x00;
  288. m_BMask = 0x00;
  289. m_AMask = 0x00;
  290. break;
  291. // FIXME: Add more color formats as need arises
  292. default:
  293. {
  294. static bool format_error_printed[NUM_IMAGE_FORMATS];
  295. if ( !format_error_printed[format] )
  296. {
  297. Assert( 0 );
  298. Msg( "CPixelWriter::SetPixelMemory: Unsupported image format %i\n", format );
  299. format_error_printed[format] = true;
  300. }
  301. m_Size = 0; // set to zero so that we don't stomp memory for formats that we don't understand.
  302. m_RShift = 0;
  303. m_GShift = 0;
  304. m_BShift = 0;
  305. m_AShift = 0;
  306. m_RMask = 0x00;
  307. m_GMask = 0x00;
  308. m_BMask = 0x00;
  309. m_AMask = 0x00;
  310. }
  311. break;
  312. }
  313. }
  314. #if 0 // defined( _X360 )
  315. FORCEINLINE void CPixelWriter::ActivateByteSwapping( bool bSwap )
  316. {
  317. // X360TBD: Who is trying to use this?
  318. // Purposely not hooked up because PixelWriter has been ported to read/write native pixels only
  319. Assert( 0 );
  320. if ( bSwap && !(m_nFlags & PIXELWRITER_SWAPBYTES ) )
  321. {
  322. m_nFlags |= PIXELWRITER_SWAPBYTES;
  323. // only tested with 4 byte formats
  324. Assert( m_Size == 4 );
  325. }
  326. else if ( !bSwap && (m_nFlags & PIXELWRITER_SWAPBYTES ) )
  327. {
  328. m_nFlags &= ~PIXELWRITER_SWAPBYTES;
  329. }
  330. else
  331. {
  332. // same state
  333. return;
  334. }
  335. // swap the shifts
  336. m_RShift = 24-m_RShift;
  337. m_GShift = 24-m_GShift;
  338. m_BShift = 24-m_BShift;
  339. m_AShift = 24-m_AShift;
  340. }
  341. #endif
  342. //-----------------------------------------------------------------------------
  343. // Sets where we're writing to
  344. //-----------------------------------------------------------------------------
  345. FORCEINLINE_PIXEL void CPixelWriter::Seek( int x, int y )
  346. {
  347. m_pBits = m_pBase + y * m_BytesPerRow + x * m_Size;
  348. }
  349. //-----------------------------------------------------------------------------
  350. // Skips n bytes:
  351. //-----------------------------------------------------------------------------
  352. FORCEINLINE_PIXEL void* CPixelWriter::SkipBytes( int n ) RESTRICT
  353. {
  354. m_pBits += n;
  355. return m_pBits;
  356. }
  357. //-----------------------------------------------------------------------------
  358. // Skips n pixels:
  359. //-----------------------------------------------------------------------------
  360. FORCEINLINE_PIXEL void CPixelWriter::SkipPixels( int n )
  361. {
  362. SkipBytes( n * m_Size );
  363. }
  364. //-----------------------------------------------------------------------------
  365. // Writes a pixel without advancing the index PC ONLY
  366. //-----------------------------------------------------------------------------
  367. FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvanceF( float r, float g, float b, float a )
  368. {
  369. Assert( IsUsingFloatFormat() );
  370. // X360TBD: Not ported
  371. Assert( IsPC() );
  372. if (PIXELWRITER_USING_16BIT_FLOAT_FORMAT & m_nFlags)
  373. {
  374. float16 fp16[4];
  375. fp16[0].SetFloat( r );
  376. fp16[1].SetFloat( g );
  377. fp16[2].SetFloat( b );
  378. fp16[3].SetFloat( a );
  379. // fp16
  380. unsigned short pBuf[4] = { 0, 0, 0, 0 };
  381. pBuf[ m_RShift >> 4 ] |= (fp16[0].GetBits() & m_RMask) << ( m_RShift & 0xF );
  382. pBuf[ m_GShift >> 4 ] |= (fp16[1].GetBits() & m_GMask) << ( m_GShift & 0xF );
  383. pBuf[ m_BShift >> 4 ] |= (fp16[2].GetBits() & m_BMask) << ( m_BShift & 0xF );
  384. pBuf[ m_AShift >> 4 ] |= (fp16[3].GetBits() & m_AMask) << ( m_AShift & 0xF );
  385. memcpy( m_pBits, pBuf, m_Size );
  386. }
  387. else
  388. {
  389. // fp32
  390. int pBuf[4] = { 0, 0, 0, 0 };
  391. pBuf[ m_RShift >> 5 ] |= (FloatBits(r) & m_RMask) << ( m_RShift & 0x1F );
  392. pBuf[ m_GShift >> 5 ] |= (FloatBits(g) & m_GMask) << ( m_GShift & 0x1F );
  393. pBuf[ m_BShift >> 5 ] |= (FloatBits(b) & m_BMask) << ( m_BShift & 0x1F );
  394. pBuf[ m_AShift >> 5 ] |= (FloatBits(a) & m_AMask) << ( m_AShift & 0x1F );
  395. memcpy( m_pBits, pBuf, m_Size );
  396. }
  397. }
  398. //-----------------------------------------------------------------------------
  399. // Writes a pixel, advances the write index
  400. //-----------------------------------------------------------------------------
  401. FORCEINLINE_PIXEL void CPixelWriter::WritePixelF( float r, float g, float b, float a )
  402. {
  403. WritePixelNoAdvanceF(r, g, b, a);
  404. m_pBits += m_Size;
  405. }
  406. //-----------------------------------------------------------------------------
  407. // Writes a pixel, advances the write index
  408. //-----------------------------------------------------------------------------
  409. FORCEINLINE_PIXEL void CPixelWriter::WritePixel( int r, int g, int b, int a )
  410. {
  411. WritePixelNoAdvance(r,g,b,a);
  412. m_pBits += m_Size;
  413. }
  414. //-----------------------------------------------------------------------------
  415. // Writes a pixel, advances the write index
  416. //-----------------------------------------------------------------------------
  417. FORCEINLINE_PIXEL void CPixelWriter::WritePixelSigned( int r, int g, int b, int a )
  418. {
  419. WritePixelNoAdvanceSigned(r,g,b,a);
  420. m_pBits += m_Size;
  421. }
  422. //-----------------------------------------------------------------------------
  423. // Writes a pixel without advancing the index
  424. //-----------------------------------------------------------------------------
  425. FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvance( int r, int g, int b, int a )
  426. {
  427. Assert( !IsUsingFloatFormat() );
  428. if ( m_Size <= 0 )
  429. {
  430. return;
  431. }
  432. if ( m_Size < 5 )
  433. {
  434. unsigned int val = (r & m_RMask) << m_RShift;
  435. val |= (g & m_GMask) << m_GShift;
  436. val |= (m_BShift > 0) ? ((b & m_BMask) << m_BShift) : ((b & m_BMask) >> -m_BShift);
  437. val |= (a & m_AMask) << m_AShift;
  438. switch( m_Size )
  439. {
  440. default:
  441. Assert( 0 );
  442. return;
  443. case 1:
  444. {
  445. m_pBits[0] = (unsigned char)((val & 0xff));
  446. return;
  447. }
  448. case 2:
  449. {
  450. ((unsigned short *)m_pBits)[0] = (unsigned short)((val & 0xffff));
  451. return;
  452. }
  453. case 3:
  454. {
  455. if ( IsPC() || !IsX360() )
  456. {
  457. ((unsigned short *)m_pBits)[0] = (unsigned short)((val & 0xffff));
  458. m_pBits[2] = (unsigned char)((val >> 16) & 0xff);
  459. }
  460. else
  461. {
  462. m_pBits[0] = (unsigned char)(((val >> 16) & 0xff));
  463. m_pBits[1] = (unsigned char)(((val >> 8 ) & 0xff));
  464. m_pBits[2] = (unsigned char)(val & 0xff);
  465. }
  466. return;
  467. }
  468. case 4:
  469. {
  470. ((unsigned int *)m_pBits)[0] = val;
  471. return;
  472. }
  473. }
  474. }
  475. else // RGBA32323232 or RGBA16161616 -- PC only.
  476. {
  477. AssertMsg(!IsX360(), "Unsupported lightmap format used in WritePixelNoAdvance(). This is a severe performance fault.\n");
  478. int64 val = ( ( int64 )(r & m_RMask) ) << m_RShift;
  479. val |= ( ( int64 )(g & m_GMask) ) << m_GShift;
  480. val |= (m_BShift > 0) ? ((( int64 )( b & m_BMask)) << m_BShift) : (((int64)( b & m_BMask)) >> -m_BShift);
  481. val |= ( ( int64 )(a & m_AMask) ) << m_AShift;
  482. switch( m_Size )
  483. {
  484. case 6:
  485. {
  486. if ( IsPC() || !IsX360() )
  487. {
  488. ((unsigned int *)m_pBits)[0] = val & 0xffffffff;
  489. ((unsigned short *)m_pBits)[2] = (unsigned short)( ( val >> 32 ) & 0xffff );
  490. }
  491. else
  492. {
  493. ((unsigned int *)m_pBits)[0] = (val >> 16) & 0xffffffff;
  494. ((unsigned short *)m_pBits)[2] = (unsigned short)( val & 0xffff );
  495. }
  496. return;
  497. }
  498. case 8:
  499. {
  500. if ( IsPC() || !IsX360() )
  501. {
  502. ((unsigned int *)m_pBits)[0] = val & 0xffffffff;
  503. ((unsigned int *)m_pBits)[1] = ( val >> 32 ) & 0xffffffff;
  504. }
  505. else
  506. {
  507. ((unsigned int *)m_pBits)[0] = ( val >> 32 ) & 0xffffffff;
  508. ((unsigned int *)m_pBits)[1] = val & 0xffffffff;
  509. }
  510. return;
  511. }
  512. default:
  513. Assert( 0 );
  514. return;
  515. }
  516. }
  517. }
  518. #ifdef _X360
  519. // There isn't a PC port of these because of the many varied
  520. // pixel formats the PC deals with. If you write SSE versions
  521. // of all the various necessary packers, then this can be made
  522. // to work on PC.
  523. //-----------------------------------------------------------------------------
  524. // Writes a pixel, advances the write index
  525. //-----------------------------------------------------------------------------
  526. FORCEINLINE_PIXEL void CPixelWriter::WritePixel( FLTX4 rgba ) RESTRICT
  527. {
  528. WritePixelNoAdvance(rgba);
  529. m_pBits += m_Size;
  530. }
  531. //-----------------------------------------------------------------------------
  532. // Writes a pixel without advancing the index
  533. // rgba are four float values, each on the range 0..255 (though they may leak
  534. // fractionally over 255 due to numerical errors earlier)
  535. //-----------------------------------------------------------------------------
  536. FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvance( FLTX4 rgba ) RESTRICT
  537. {
  538. Assert( !IsUsingFloatFormat() );
  539. switch (m_Size)
  540. {
  541. case 0:
  542. return;
  543. case 4:
  544. {
  545. AssertMsg((reinterpret_cast<unsigned int>(m_pBits) & 0x03) == 0,"Unaligned m_pBits in WritePixelNoAdvance!");
  546. switch ( m_Format )
  547. {
  548. // note: format names are low-order-byte first.
  549. case IMAGE_FORMAT_RGBA8888:
  550. case IMAGE_FORMAT_LINEAR_RGBA8888:
  551. WritePixelNoAdvance_RGBA8888(rgba);
  552. break;
  553. case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention.
  554. case IMAGE_FORMAT_LINEAR_BGRA8888:
  555. WritePixelNoAdvance_BGRA8888(rgba);
  556. break;
  557. default:
  558. AssertMsg1(false, "Unknown four-byte pixel format %d in lightmap write.\n", m_Format);
  559. }
  560. break;
  561. }
  562. default:
  563. AssertMsg1(false, "WritePixelNoAdvance on unsupported 360 %d-byte format\n", m_Size);
  564. break;
  565. }
  566. }
  567. // here are some explicit formats so we can avoid the switch:
  568. FORCEINLINE void CPixelWriter::WritePixelNoAdvance_RGBA8888( FLTX4 rgba )
  569. {
  570. // it's easier to do tiered convert-saturates here
  571. // than the d3d color convertor op
  572. // first permute
  573. const static fltx4 permReverse = XMVectorPermuteControl(3,2,1,0);
  574. fltx4 N = XMVectorPermute(rgba, rgba, permReverse);
  575. N = __vctuxs(N, 0); // convert to unsigned fixed point 0 w/ saturate
  576. N = __vpkuwus(N, N); // convert to halfword saturate
  577. N = __vpkuhus(N, N); // convert to byte saturate
  578. N = __vspltw(N, 0); // splat w-word to all four
  579. __stvewx(N, m_pBits, 0); // store whatever word happens to be aligned with m_pBits to that word
  580. }
  581. FORCEINLINE void CPixelWriter::WritePixelNoAdvance_BGRA8888( FLTX4 rgba )
  582. {
  583. WritePixelNoAdvance_BGRA8888( rgba, m_pBits );
  584. }
  585. FORCEINLINE void CPixelWriter::WritePixelNoAdvance_BGRA8888( FLTX4 rgba, void * RESTRICT pBits ) RESTRICT
  586. {
  587. // this happens to be in an order such that we can use the handy builtin packing op
  588. // clamp to 0..255 (coz it might have leaked over)
  589. static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f};
  590. fltx4 N = MinSIMD(vTwoFiftyFive, rgba);
  591. // the magic number such that when mul-accummulated against rbga,
  592. // gets us a representation 3.0 + (r)*2^-22 -- puts the bits at
  593. // the bottom of the float
  594. static CONST XMVECTOR PackScale = { (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22))}; // 255.0f / (FLOAT)(1 << 22)
  595. static const XMVECTOR Three = {3.0f, 3.0f, 3.0f, 3.0f};
  596. N = __vmaddfp(N, PackScale, Three);
  597. N = __vpkd3d(N, N, VPACK_D3DCOLOR, VPACK_32, 3); // pack to X word
  598. N = __vspltw(N, 0); // splat X
  599. // this is a nasty thing to work around the April XDK bug in __stvewx
  600. {
  601. void * RESTRICT copyOfPBits = pBits;
  602. __stvewx(N, copyOfPBits, 0);
  603. }
  604. }
  605. // for writing entire SIMD registers at once
  606. FORCEINLINE void CPixelWriter::WriteFourPixelsExplicitLocation_BGRA8888 ( FLTX4 rgba, int offset )
  607. {
  608. Assert( (reinterpret_cast<unsigned int>(m_pBits) & 15) == 0 ); // assert alignment
  609. XMStoreVector4A( m_pBits + offset , rgba );
  610. }
  611. #endif
  612. //-----------------------------------------------------------------------------
  613. // Writes a signed pixel without advancing the index
  614. //-----------------------------------------------------------------------------
  615. FORCEINLINE_PIXEL void CPixelWriter::WritePixelNoAdvanceSigned( int r, int g, int b, int a )
  616. {
  617. Assert( !IsUsingFloatFormat() );
  618. if ( m_Size <= 0 )
  619. {
  620. return;
  621. }
  622. if ( m_Size < 5 )
  623. {
  624. int val = (r & m_RMask) << m_RShift;
  625. val |= (g & m_GMask) << m_GShift;
  626. val |= (m_BShift > 0) ? ((b & m_BMask) << m_BShift) : ((b & m_BMask) >> -m_BShift);
  627. val |= (a & m_AMask) << m_AShift;
  628. signed char *pSignedBits = (signed char *)m_pBits;
  629. if ( IsPC() || !IsX360() )
  630. {
  631. switch ( m_Size )
  632. {
  633. case 4:
  634. pSignedBits[3] = (signed char)((val >> 24) & 0xff);
  635. // fall through intentionally.
  636. case 3:
  637. pSignedBits[2] = (signed char)((val >> 16) & 0xff);
  638. // fall through intentionally.
  639. case 2:
  640. pSignedBits[1] = (signed char)((val >> 8) & 0xff);
  641. // fall through intentionally.
  642. case 1:
  643. pSignedBits[0] = (signed char)((val & 0xff));
  644. // fall through intentionally.
  645. return;
  646. }
  647. }
  648. else
  649. {
  650. switch ( m_Size )
  651. {
  652. case 4:
  653. pSignedBits[0] = (signed char)((val >> 24) & 0xff);
  654. pSignedBits[1] = (signed char)((val >> 16) & 0xff);
  655. pSignedBits[2] = (signed char)((val >> 8) & 0xff);
  656. pSignedBits[3] = (signed char)(val & 0xff);
  657. break;
  658. case 3:
  659. pSignedBits[0] = (signed char)((val >> 16) & 0xff);
  660. pSignedBits[1] = (signed char)((val >> 8) & 0xff);
  661. pSignedBits[2] = (signed char)(val & 0xff);
  662. break;
  663. case 2:
  664. pSignedBits[0] = (signed char)((val >> 8) & 0xff);
  665. pSignedBits[1] = (signed char)(val & 0xff);
  666. break;
  667. case 1:
  668. pSignedBits[0] = (signed char)(val & 0xff);
  669. break;
  670. }
  671. }
  672. }
  673. else
  674. {
  675. int64 val = ( ( int64 )(r & m_RMask) ) << m_RShift;
  676. val |= ( ( int64 )(g & m_GMask) ) << m_GShift;
  677. val |= (m_BShift > 0) ? ((( int64 )( b & m_BMask)) << m_BShift) : (((int64)( b & m_BMask)) >> -m_BShift);
  678. val |= ( ( int64 )(a & m_AMask) ) << m_AShift;
  679. signed char *pSignedBits = ( signed char * )m_pBits;
  680. if ( IsPC() || !IsX360() )
  681. {
  682. switch( m_Size )
  683. {
  684. case 8:
  685. pSignedBits[7] = (signed char)((val >> 56) & 0xff);
  686. pSignedBits[6] = (signed char)((val >> 48) & 0xff);
  687. // fall through intentionally.
  688. case 6:
  689. pSignedBits[5] = (signed char)((val >> 40) & 0xff);
  690. pSignedBits[4] = (signed char)((val >> 32) & 0xff);
  691. // fall through intentionally.
  692. case 4:
  693. pSignedBits[3] = (signed char)((val >> 24) & 0xff);
  694. // fall through intentionally.
  695. case 3:
  696. pSignedBits[2] = (signed char)((val >> 16) & 0xff);
  697. // fall through intentionally.
  698. case 2:
  699. pSignedBits[1] = (signed char)((val >> 8) & 0xff);
  700. // fall through intentionally.
  701. case 1:
  702. pSignedBits[0] = (signed char)((val & 0xff));
  703. break;
  704. default:
  705. Assert( 0 );
  706. return;
  707. }
  708. }
  709. else
  710. {
  711. switch( m_Size )
  712. {
  713. case 8:
  714. pSignedBits[0] = (signed char)((val >> 56) & 0xff);
  715. pSignedBits[1] = (signed char)((val >> 48) & 0xff);
  716. pSignedBits[2] = (signed char)((val >> 40) & 0xff);
  717. pSignedBits[3] = (signed char)((val >> 32) & 0xff);
  718. pSignedBits[4] = (signed char)((val >> 24) & 0xff);
  719. pSignedBits[5] = (signed char)((val >> 16) & 0xff);
  720. pSignedBits[6] = (signed char)((val >> 8) & 0xff);
  721. pSignedBits[7] = (signed char)(val & 0xff);
  722. break;
  723. case 6:
  724. pSignedBits[0] = (signed char)((val >> 40) & 0xff);
  725. pSignedBits[1] = (signed char)((val >> 32) & 0xff);
  726. pSignedBits[2] = (signed char)((val >> 24) & 0xff);
  727. pSignedBits[3] = (signed char)((val >> 16) & 0xff);
  728. pSignedBits[4] = (signed char)((val >> 8) & 0xff);
  729. pSignedBits[5] = (signed char)(val & 0xff);
  730. break;
  731. case 4:
  732. pSignedBits[0] = (signed char)((val >> 24) & 0xff);
  733. pSignedBits[1] = (signed char)((val >> 16) & 0xff);
  734. pSignedBits[2] = (signed char)((val >> 8) & 0xff);
  735. pSignedBits[3] = (signed char)(val & 0xff);
  736. break;
  737. case 3:
  738. pSignedBits[0] = (signed char)((val >> 16) & 0xff);
  739. pSignedBits[1] = (signed char)((val >> 8) & 0xff);
  740. pSignedBits[2] = (signed char)(val & 0xff);
  741. break;
  742. case 2:
  743. pSignedBits[0] = (signed char)((val >> 8) & 0xff);
  744. pSignedBits[1] = (signed char)(val & 0xff);
  745. break;
  746. case 1:
  747. pSignedBits[0] = (signed char)(val & 0xff);
  748. break;
  749. default:
  750. Assert( 0 );
  751. return;
  752. }
  753. }
  754. }
  755. }
  756. FORCEINLINE_PIXEL void CPixelWriter::ReadPixelNoAdvance( int &r, int &g, int &b, int &a )
  757. {
  758. Assert( !IsUsingFloatFormat() );
  759. int val = m_pBits[0];
  760. if ( m_Size > 1 )
  761. {
  762. if ( IsPC() || !IsX360() )
  763. {
  764. val |= (int)m_pBits[1] << 8;
  765. if ( m_Size > 2 )
  766. {
  767. val |= (int)m_pBits[2] << 16;
  768. if ( m_Size > 3 )
  769. {
  770. val |= (int)m_pBits[3] << 24;
  771. }
  772. }
  773. }
  774. else
  775. {
  776. val <<= 8;
  777. val |= (int)m_pBits[1];
  778. if ( m_Size > 2 )
  779. {
  780. val <<= 8;
  781. val |= (int)m_pBits[2];
  782. if ( m_Size > 3 )
  783. {
  784. val <<= 8;
  785. val |= (int)m_pBits[3];
  786. }
  787. }
  788. }
  789. }
  790. r = (val>>m_RShift) & m_RMask;
  791. g = (val>>m_GShift) & m_GMask;
  792. b = (val>>m_BShift) & m_BMask;
  793. a = (val>>m_AShift) & m_AMask;
  794. }
  795. #endif // PIXELWRITER_H;