You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
57 lines
2.4 KiB
57 lines
2.4 KiB
#ifndef TIER0_CACHE_HINTS_HDR
|
|
#define TIER0_CACHE_HINTS_HDR
|
|
|
|
#if defined(_X360)
|
|
#define PREFETCH_128(POINTER,OFFSET) { __dcbt((OFFSET), (POINTER)); }
|
|
#define PREZERO_128(POINTER, OFFSET) { __dcbz128((OFFSET), (POINTER)); }
|
|
#elif defined( _PS3 ) && !defined( SPU )
|
|
#define PREFETCH_128(POINTER,OFFSET) { __dcbt( ( char * )( POINTER ) + ( size_t )( OFFSET ) ); }
|
|
#define PREZERO_128(POINTER,OFFSET) { __dcbz( ( char * )( POINTER ) + ( size_t )( OFFSET ) ); }
|
|
#elif defined(WIN32)
|
|
// NOTE: In every case I've tested so far using this prefetch on PC is actually slower. Changing it actually
|
|
// prefetch 128-bytes (tested on a PC with 64-byte cache lines) makes it even slower
|
|
// It is much more difficult to improve performance with prefetch on the PC. I suggest trying to make your data
|
|
// linear and let the hardware prefetch do the work for you. Otherwise you can prefetch in 64-byte chunks with this:
|
|
#define PREFETCH_64_PC(POINTER,OFFSET) { _mm_prefetch((const char*)(POINTER) + (OFFSET), _MM_HINT_T0); }
|
|
|
|
// leave this empty because we can't improve perf of any existing cases by defining it
|
|
#define PREFETCH_128(POINTER,OFFSET) { /* Nothing to do here */ }
|
|
|
|
// The concept of zeroing the cache does not exist the same way on PC. Nevertheless, simulate the same behavior.
|
|
#define PREZERO_128(POINTER,OFFSET) \
|
|
{ \
|
|
intptr_t __tempPtr__ = (intptr_t)((char *)(POINTER) + (OFFSET)); \
|
|
__tempPtr__ &= -128; \
|
|
memset((void*)__tempPtr__, 0, 128); \
|
|
}
|
|
#else
|
|
// Same for other platforms.
|
|
#define PREFETCH_128(POINTER,OFFSET) { /* Nothing to do here */ }
|
|
#define PREZERO_128(POINTER,OFFSET) \
|
|
{ \
|
|
intptr_t __tempPtr__ = (intptr_t)((char *)(POINTER) + (OFFSET)); \
|
|
__tempPtr__ &= -128; \
|
|
memset((void*)__tempPtr__, 0, 128); \
|
|
}
|
|
#endif
|
|
|
|
// This exists for backward compatibility until a massive search and replace is done
|
|
#define PREFETCH_CACHE_LINE PREFETCH_128
|
|
// Indicate that the cache line is 128. It is not correct on PC, but this will have no side effects related to the macros above.
|
|
#define CACHE_LINE_SIZE 128
|
|
|
|
#ifdef IVP_VECTOR_INCLUDED
|
|
template<class T>
|
|
inline void UnsafePrefetchLastElementOf(IVP_U_Vector<T>&array)
|
|
{
|
|
PREFETCH_128(array.element_at(array.len()-1),0);
|
|
}
|
|
template<class T>
|
|
inline void PrefetchLastElementOf(IVP_U_Vector<T>&array)
|
|
{
|
|
if(array.len() > 0)
|
|
PREFETCH_128(array.element_at(array.len()-1),0);
|
|
}
|
|
#endif
|
|
|
|
#endif
|