Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
2.4 KiB

  1. #ifndef TIER0_CACHE_HINTS_HDR
  2. #define TIER0_CACHE_HINTS_HDR
  3. #if defined(_X360)
  4. #define PREFETCH_128(POINTER,OFFSET) { __dcbt((OFFSET), (POINTER)); }
  5. #define PREZERO_128(POINTER, OFFSET) { __dcbz128((OFFSET), (POINTER)); }
  6. #elif defined( _PS3 ) && !defined( SPU )
  7. #define PREFETCH_128(POINTER,OFFSET) { __dcbt( ( char * )( POINTER ) + ( size_t )( OFFSET ) ); }
  8. #define PREZERO_128(POINTER,OFFSET) { __dcbz( ( char * )( POINTER ) + ( size_t )( OFFSET ) ); }
  9. #elif defined(WIN32)
  10. // NOTE: In every case I've tested so far using this prefetch on PC is actually slower. Changing it actually
  11. // prefetch 128-bytes (tested on a PC with 64-byte cache lines) makes it even slower
  12. // It is much more difficult to improve performance with prefetch on the PC. I suggest trying to make your data
  13. // linear and let the hardware prefetch do the work for you. Otherwise you can prefetch in 64-byte chunks with this:
  14. #define PREFETCH_64_PC(POINTER,OFFSET) { _mm_prefetch((const char*)(POINTER) + (OFFSET), _MM_HINT_T0); }
  15. // leave this empty because we can't improve perf of any existing cases by defining it
  16. #define PREFETCH_128(POINTER,OFFSET) { /* Nothing to do here */ }
  17. // The concept of zeroing the cache does not exist the same way on PC. Nevertheless, simulate the same behavior.
  18. #define PREZERO_128(POINTER,OFFSET) \
  19. { \
  20. intptr_t __tempPtr__ = (intptr_t)((char *)(POINTER) + (OFFSET)); \
  21. __tempPtr__ &= -128; \
  22. memset((void*)__tempPtr__, 0, 128); \
  23. }
  24. #else
  25. // Same for other platforms.
  26. #define PREFETCH_128(POINTER,OFFSET) { /* Nothing to do here */ }
  27. #define PREZERO_128(POINTER,OFFSET) \
  28. { \
  29. intptr_t __tempPtr__ = (intptr_t)((char *)(POINTER) + (OFFSET)); \
  30. __tempPtr__ &= -128; \
  31. memset((void*)__tempPtr__, 0, 128); \
  32. }
  33. #endif
  34. // This exists for backward compatibility until a massive search and replace is done
  35. #define PREFETCH_CACHE_LINE PREFETCH_128
  36. // Indicate that the cache line is 128. It is not correct on PC, but this will have no side effects related to the macros above.
  37. #define CACHE_LINE_SIZE 128
  38. #ifdef IVP_VECTOR_INCLUDED
  39. template<class T>
  40. inline void UnsafePrefetchLastElementOf(IVP_U_Vector<T>&array)
  41. {
  42. PREFETCH_128(array.element_at(array.len()-1),0);
  43. }
  44. template<class T>
  45. inline void PrefetchLastElementOf(IVP_U_Vector<T>&array)
  46. {
  47. if(array.len() > 0)
  48. PREFETCH_128(array.element_at(array.len()-1),0);
  49. }
  50. #endif
  51. #endif