Counter Strike : Global Offensive Source Code
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

925 lines
24 KiB

  1. //===== Copyright (c) 1996-2005, Valve Corporation, All rights reserved. ======//
  2. //
  3. // Purpose:
  4. //
  5. // $NoKeywords: $
  6. //=============================================================================//
  7. #include "pch_tier0.h"
  8. #if defined(_WIN32) && !defined(_X360)
  9. #define WINDOWS_LEAN_AND_MEAN
  10. #include <windows.h>
  11. #include "cputopology.h"
  12. #elif defined( PLATFORM_OSX )
  13. #include <sys/sysctl.h>
  14. #endif
  15. #ifndef _PS3
  16. #include "tier0_strtools.h"
  17. #endif
  18. //#include "tier1/strtools.h" // this is included for the definition of V_isspace()
  19. #ifdef PLATFORM_WINDOWS_PC
  20. #include <intrin.h>
  21. #endif
  22. // NOTE: This has to be the last file included!
  23. #include "tier0/memdbgon.h"
  24. const tchar* GetProcessorVendorId();
  25. const tchar* GetProcessorBrand();
  26. struct CpuIdResult_t
  27. {
  28. unsigned long eax;
  29. unsigned long ebx;
  30. unsigned long ecx;
  31. unsigned long edx;
  32. void Reset()
  33. {
  34. eax = ebx = ecx = edx = 0;
  35. }
  36. };
  37. static bool cpuid( unsigned long function, CpuIdResult_t &out )
  38. {
  39. #if defined( _X360 ) || defined( _PS3 )
  40. return false;
  41. #elif defined(GNUC)
  42. unsigned long out_eax,out_ebx,out_ecx,out_edx;
  43. #ifdef PLATFORM_64BITS
  44. asm("mov %%rbx, %%rsi\n\t"
  45. "cpuid\n\t"
  46. "xchg %%rsi, %%rbx"
  47. : "=a" (out_eax),
  48. "=S" (out_ebx),
  49. "=c" (out_ecx),
  50. "=d" (out_edx)
  51. : "a" (function)
  52. );
  53. #else
  54. asm("mov %%ebx, %%esi\n\t"
  55. "cpuid\n\t"
  56. "xchg %%esi, %%ebx"
  57. : "=a" (out_eax),
  58. "=S" (out_ebx),
  59. "=c" (out_ecx),
  60. "=d" (out_edx)
  61. : "a" (function)
  62. );
  63. #endif
  64. out.eax = out_eax;
  65. out.ebx = out_ebx;
  66. out.ecx = out_ecx;
  67. out.edx = out_edx;
  68. return true;
  69. #elif defined(_WIN64)
  70. int pCPUInfo[4];
  71. __cpuid( pCPUInfo, (int)function );
  72. out.eax = pCPUInfo[0];
  73. out.ebx = pCPUInfo[1];
  74. out.ecx = pCPUInfo[2];
  75. out.edx = pCPUInfo[3];
  76. return true;
  77. #else
  78. bool retval = true;
  79. unsigned long out_eax = 0, out_ebx = 0, out_ecx = 0, out_edx = 0;
  80. _asm pushad;
  81. __try
  82. {
  83. _asm
  84. {
  85. xor edx, edx // Clue the compiler that EDX & others is about to be used.
  86. xor ecx, ecx
  87. xor ebx, ebx // <Sergiy> Note: if I don't zero these out, cpuid sometimes won't work, I didn't find out why yet
  88. mov eax, function // set up CPUID to return processor version and features
  89. // 0 = vendor string, 1 = version info, 2 = cache info
  90. cpuid // code bytes = 0fh, 0a2h
  91. mov out_eax, eax // features returned in eax
  92. mov out_ebx, ebx // features returned in ebx
  93. mov out_ecx, ecx // features returned in ecx
  94. mov out_edx, edx // features returned in edx
  95. }
  96. }
  97. __except(EXCEPTION_EXECUTE_HANDLER)
  98. {
  99. retval = false;
  100. }
  101. out.eax = out_eax;
  102. out.ebx = out_ebx;
  103. out.ecx = out_ecx;
  104. out.edx = out_edx;
  105. _asm popad
  106. return retval;
  107. #endif
  108. }
  109. static bool cpuidex( unsigned long function, unsigned long subfunction, CpuIdResult_t &out )
  110. {
  111. #if defined( _X360 ) || defined( _PS3 )
  112. return false;
  113. #elif defined(GNUC)
  114. unsigned long out_eax, out_ebx, out_ecx, out_edx;
  115. asm( "mov %%ebx, %%esi\n\t"
  116. "cpuid\n\t"
  117. "xchg %%esi, %%ebx"
  118. : "=a" ( out_eax ),
  119. "=S" ( out_ebx ),
  120. "=c" ( out_ecx ),
  121. "=d" ( out_edx )
  122. : "a" ( function ),
  123. "c" ( subfunction )
  124. );
  125. out.eax = out_eax;
  126. out.ebx = out_ebx;
  127. out.ecx = out_ecx;
  128. out.edx = out_edx;
  129. return true;
  130. #elif defined(_WIN64)
  131. int pCPUInfo[ 4 ];
  132. __cpuidex( pCPUInfo, ( int )function, ( int )subfunction );
  133. out.eax = pCPUInfo[ 0 ];
  134. out.ebx = pCPUInfo[ 1 ];
  135. out.ecx = pCPUInfo[ 2 ];
  136. out.edx = pCPUInfo[ 3 ];
  137. return false;
  138. #else
  139. bool retval = true;
  140. unsigned long out_eax = 0, out_ebx = 0, out_ecx = 0, out_edx = 0;
  141. _asm pushad;
  142. __try
  143. {
  144. _asm
  145. {
  146. xor edx, edx // Clue the compiler that EDX & others is about to be used.
  147. mov ecx, subfunction
  148. xor ebx, ebx // <Sergiy> Note: if I don't zero these out, cpuid sometimes won't work, I didn't find out why yet
  149. mov eax, function // set up CPUID to return processor version and features
  150. // 0 = vendor string, 1 = version info, 2 = cache info
  151. cpuid // code bytes = 0fh, 0a2h
  152. mov out_eax, eax // features returned in eax
  153. mov out_ebx, ebx // features returned in ebx
  154. mov out_ecx, ecx // features returned in ecx
  155. mov out_edx, edx // features returned in edx
  156. }
  157. }
  158. __except ( EXCEPTION_EXECUTE_HANDLER )
  159. {
  160. retval = false;
  161. }
  162. out.eax = out_eax;
  163. out.ebx = out_ebx;
  164. out.ecx = out_ecx;
  165. out.edx = out_edx;
  166. _asm popad
  167. return retval;
  168. #endif
  169. }
  170. static CpuIdResult_t cpuid( unsigned long function )
  171. {
  172. CpuIdResult_t out;
  173. if ( !cpuid( function, out ) )
  174. {
  175. out.Reset();
  176. }
  177. return out;
  178. }
  179. static CpuIdResult_t cpuidex( unsigned long function, unsigned long subfunction )
  180. {
  181. CpuIdResult_t out;
  182. if ( !cpuidex( function, subfunction, out ) )
  183. {
  184. out.Reset();
  185. }
  186. return out;
  187. }
  188. //-----------------------------------------------------------------------------
  189. // Purpose: This is a bit of a hack because it appears
  190. // Output : Returns true on success, false on failure.
  191. //-----------------------------------------------------------------------------
  192. static bool IsWin98OrOlder()
  193. {
  194. #if defined( _X360 ) || defined( _PS3 ) || defined( POSIX )
  195. return false;
  196. #else
  197. bool retval = false;
  198. OSVERSIONINFOEX osvi;
  199. ZeroMemory(&osvi, sizeof(OSVERSIONINFOEX));
  200. osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFOEX);
  201. BOOL bOsVersionInfoEx = GetVersionEx ((OSVERSIONINFO *) &osvi);
  202. if( !bOsVersionInfoEx )
  203. {
  204. // If OSVERSIONINFOEX doesn't work, try OSVERSIONINFO.
  205. osvi.dwOSVersionInfoSize = sizeof (OSVERSIONINFO);
  206. if ( !GetVersionEx ( (OSVERSIONINFO *) &osvi) )
  207. {
  208. Error( _T("IsWin98OrOlder: Unable to get OS version information") );
  209. }
  210. }
  211. switch (osvi.dwPlatformId)
  212. {
  213. case VER_PLATFORM_WIN32_NT:
  214. // NT, XP, Win2K, etc. all OK for SSE
  215. break;
  216. case VER_PLATFORM_WIN32_WINDOWS:
  217. // Win95, 98, Me can't do SSE
  218. retval = true;
  219. break;
  220. case VER_PLATFORM_WIN32s:
  221. // Can't really run this way I don't think...
  222. retval = true;
  223. break;
  224. default:
  225. break;
  226. }
  227. return retval;
  228. #endif
  229. }
  230. static bool CheckSSETechnology(void)
  231. {
  232. #if defined( _X360 ) || defined( _PS3 )
  233. return true;
  234. #else
  235. if ( IsWin98OrOlder() )
  236. {
  237. return false;
  238. }
  239. return ( cpuid( 1 ).edx & 0x2000000L ) != 0;
  240. #endif
  241. }
  242. static bool CheckSSE2Technology(void)
  243. {
  244. #if defined( _X360 ) || defined( _PS3 )
  245. return false;
  246. #else
  247. return ( cpuid( 1 ).edx & 0x04000000 ) != 0;
  248. #endif
  249. }
  250. bool CheckSSE3Technology(void)
  251. {
  252. #if defined( _X360 ) || defined( _PS3 )
  253. return false;
  254. #else
  255. return ( cpuid( 1 ).ecx & 0x00000001 ) != 0; // bit 1 of ECX
  256. #endif
  257. }
  258. bool CheckSSSE3Technology(void)
  259. {
  260. #if defined( _X360 ) || defined( _PS3 )
  261. return false;
  262. #else
  263. // SSSE 3 is implemented by both Intel and AMD
  264. // detection is done the same way for both vendors
  265. return ( cpuid( 1 ).ecx & ( 1 << 9 ) ) != 0; // bit 9 of ECX
  266. #endif
  267. }
  268. bool CheckSSE41Technology(void)
  269. {
  270. #if defined( _X360 ) || defined( _PS3 )
  271. return false;
  272. #else
  273. // SSE 4.1 is implemented by both Intel and AMD
  274. // detection is done the same way for both vendors
  275. return ( cpuid( 1 ).ecx & ( 1 << 19 ) ) != 0; // bit 19 of ECX
  276. #endif
  277. }
  278. bool CheckSSE42Technology(void)
  279. {
  280. #if defined( _X360 ) || defined( _PS3 )
  281. return false;
  282. #else
  283. // SSE4.2 is an Intel-only feature
  284. const char *pchVendor = GetProcessorVendorId();
  285. if ( 0 != V_tier0_stricmp( pchVendor, "GenuineIntel" ) )
  286. return false;
  287. return ( cpuid( 1 ).ecx & ( 1 << 20 ) ) != 0; // bit 20 of ECX
  288. #endif
  289. }
  290. bool CheckSSE4aTechnology( void )
  291. {
  292. #if defined( _X360 ) || defined( _PS3 )
  293. return false;
  294. #else
  295. // SSE 4a is an AMD-only feature
  296. const char *pchVendor = GetProcessorVendorId();
  297. if ( 0 != V_tier0_stricmp( pchVendor, "AuthenticAMD" ) )
  298. return false;
  299. return ( cpuid( 1 ).ecx & ( 1 << 6 ) ) != 0; // bit 6 of ECX
  300. #endif
  301. }
  302. static bool Check3DNowTechnology(void)
  303. {
  304. #if defined( _X360 ) || defined( _PS3 )
  305. return false;
  306. #else
  307. if ( cpuid( 0x80000000 ).eax > 0x80000000L )
  308. {
  309. return ( cpuid( 0x80000001 ).eax & ( 1 << 31 ) ) != 0;
  310. }
  311. return false;
  312. #endif
  313. }
  314. static bool CheckCMOVTechnology()
  315. {
  316. #if defined( _X360 ) || defined( _PS3 )
  317. return false;
  318. #else
  319. return ( cpuid( 1 ).edx & ( 1 << 15 ) ) != 0;
  320. #endif
  321. }
  322. static bool CheckFCMOVTechnology(void)
  323. {
  324. #if defined( _X360 ) || defined( _PS3 )
  325. return false;
  326. #else
  327. return ( cpuid( 1 ).edx & ( 1 << 16 ) ) != 0;
  328. #endif
  329. }
  330. static bool CheckRDTSCTechnology(void)
  331. {
  332. #if defined( _X360 ) || defined( _PS3 )
  333. return false;
  334. #else
  335. return ( cpuid( 1 ).edx & 0x10 ) != 0;
  336. #endif
  337. }
  338. static tchar s_CpuVendorID[ 13 ] = "unknown";
  339. bool s_bCpuVendorIdInitialized = false;
  340. union CpuBrand_t
  341. {
  342. CpuIdResult_t cpuid[ 3 ];
  343. char name[ 49 ];
  344. };
  345. CpuBrand_t s_CpuBrand;
  346. bool s_bCpuBrandInitialized = false;
  347. // Return the Processor's vendor identification string, or "Generic_x86" if it doesn't exist on this CPU
  348. const tchar* GetProcessorVendorId()
  349. {
  350. #if defined( _X360 ) || defined( _PS3 )
  351. return "PPC";
  352. #else
  353. if ( s_bCpuVendorIdInitialized )
  354. {
  355. return s_CpuVendorID;
  356. }
  357. s_bCpuVendorIdInitialized = true;
  358. CpuIdResult_t cpuid0 = cpuid( 0 );
  359. memset( s_CpuVendorID, 0, sizeof(s_CpuVendorID) );
  360. if ( !cpuid0.eax )
  361. {
  362. // weird...
  363. if ( IsPC() )
  364. {
  365. _tcscpy( s_CpuVendorID, _T( "Generic_x86" ) );
  366. }
  367. else if ( IsX360() )
  368. {
  369. _tcscpy( s_CpuVendorID, _T( "PowerPC" ) );
  370. }
  371. }
  372. else
  373. {
  374. memcpy( s_CpuVendorID + 0, &( cpuid0.ebx ), sizeof( cpuid0.ebx ) );
  375. memcpy( s_CpuVendorID + 4, &( cpuid0.edx ), sizeof( cpuid0.edx ) );
  376. memcpy( s_CpuVendorID + 8, &( cpuid0.ecx ), sizeof( cpuid0.ecx ) );
  377. }
  378. return s_CpuVendorID;
  379. #endif
  380. }
  381. const tchar* GetProcessorBrand()
  382. {
  383. #if defined( _X360 )
  384. return "Xenon";
  385. #elif defined( _PS3 )
  386. return "Cell Broadband Engine";
  387. #else
  388. if ( s_bCpuBrandInitialized )
  389. {
  390. return s_CpuBrand.name;
  391. }
  392. s_bCpuBrandInitialized = true;
  393. memset( &s_CpuBrand, 0, sizeof( s_CpuBrand ) );
  394. const char *pchVendor = GetProcessorVendorId();
  395. if ( 0 == V_tier0_stricmp( pchVendor, "GenuineIntel" ) )
  396. {
  397. // Intel brand string
  398. if ( cpuid( 0x80000000 ).eax >= 0x80000004 )
  399. {
  400. s_CpuBrand.cpuid[ 0 ] = cpuid( 0x80000002 );
  401. s_CpuBrand.cpuid[ 1 ] = cpuid( 0x80000003 );
  402. s_CpuBrand.cpuid[ 2 ] = cpuid( 0x80000004 );
  403. }
  404. }
  405. return s_CpuBrand.name;
  406. #endif
  407. }
  408. // Returns non-zero if Hyper-Threading Technology is supported on the processors and zero if not.
  409. // If it's supported, it does not mean that it's been enabled. So we test another flag to see if it's enabled
  410. // See Intel Processor Identification and the CPUID instruction Application Note 485
  411. // http://www.intel.com/Assets/PDF/appnote/241618.pdf
  412. static bool HTSupported(void)
  413. {
  414. #if ( defined( _X360 ) || defined( _PS3 ) )
  415. // not entirtely sure about the semantic of HT support, it being an intel name
  416. // are we asking about HW threads or HT?
  417. return true;
  418. #else
  419. enum {
  420. HT_BIT = 0x10000000, // EDX[28] - Bit 28 set indicates Hyper-Threading Technology is supported in hardware.
  421. FAMILY_ID = 0x0f00, // EAX[11:8] - Bit 11 thru 8 contains family processor id
  422. EXT_FAMILY_ID = 0x0f00000, // EAX[23:20] - Bit 23 thru 20 contains extended family processor id
  423. FAMILY_ID_386 = 0x0300,
  424. FAMILY_ID_486 = 0x0400, // EAX[8:12] - 486, 487 and overdrive
  425. FAMILY_ID_PENTIUM = 0x0500, // Pentium, Pentium OverDrive 60 - 200
  426. FAMILY_ID_PENTIUM_PRO = 0x0600,// P Pro, P II, P III, P M, Celeron M, Core Duo, Core Solo, Core2 Duo, Core2 Extreme, P D, Xeon model F,
  427. // also 45-nm : Intel Atom, Core i7, Xeon MP ; see Intel Processor Identification and the CPUID instruction pg 20,21
  428. FAMILY_ID_EXTENDED = 0x0F00 // P IV, Xeon, Celeron D, P D,
  429. };
  430. // this works on both newer AMD and Intel CPUs
  431. CpuIdResult_t cpuid1 = cpuid( 1 );
  432. // <Sergiy> Previously, we detected P4 specifically; now, we detect GenuineIntel with HT enabled in general
  433. // if (((cpuid1.eax & FAMILY_ID) == FAMILY_ID_EXTENDED) || (cpuid1.eax & EXT_FAMILY_ID))
  434. // Check to see if this is an Intel Processor with HT or CMT capability , and if HT/CMT is enabled
  435. // ddk: This codef is actually correct: see example code at software.intel.com/en-us/articles/multi-core-detect/
  436. return ( cpuid1.edx & HT_BIT ) != 0 && // Genuine Intel Processor with Hyper-Threading Technology implemented
  437. ( ( cpuid1.ebx >> 16 ) & 0xFF ) > 1; // Hyper-Threading OR Core Multi-Processing has been enabled
  438. #endif
  439. }
  440. // Returns the number of logical processors per physical processors.
  441. static uint8 LogicalProcessorsPerPackage(void)
  442. {
  443. #if defined( _X360 )
  444. return 2;
  445. #else
  446. // EBX[23:16] indicate number of logical processors per package
  447. const unsigned NUM_LOGICAL_BITS = 0x00FF0000;
  448. if ( !HTSupported() )
  449. return 1;
  450. return ( uint8 )( ( cpuid( 1 ).ebx & NUM_LOGICAL_BITS ) >> 16 );
  451. #endif
  452. }
  453. #if defined(POSIX)
  454. // Move this declaration out of the CalculateClockSpeed() function because
  455. // otherwise clang warns that it is non-obvious whether it is a variable
  456. // or a function declaration: [-Wvexing-parse]
  457. uint64 CalculateCPUFreq(); // from cpu_linux.cpp
  458. #endif
  459. // Measure the processor clock speed by sampling the cycle count, waiting
  460. // for some fraction of a second, then measuring the elapsed number of cycles.
  461. static int64 CalculateClockSpeed()
  462. {
  463. #if defined( _X360 ) || defined(_PS3)
  464. // Xbox360 and PS3 have the same clock speed and share a lot of characteristics on PPU
  465. return 3200000000LL;
  466. #else
  467. #if defined( _WIN32 )
  468. LARGE_INTEGER waitTime, startCount, curCount;
  469. CCycleCount start, end;
  470. // Take 1/32 of a second for the measurement.
  471. QueryPerformanceFrequency( &waitTime );
  472. int scale = 5;
  473. waitTime.QuadPart >>= scale;
  474. QueryPerformanceCounter( &startCount );
  475. start.Sample();
  476. do
  477. {
  478. QueryPerformanceCounter( &curCount );
  479. }
  480. while ( curCount.QuadPart - startCount.QuadPart < waitTime.QuadPart );
  481. end.Sample();
  482. return (end.m_Int64 - start.m_Int64) << scale;
  483. #elif defined(POSIX)
  484. int64 freq =(int64)CalculateCPUFreq();
  485. if ( freq == 0 ) // couldn't calculate clock speed
  486. {
  487. Error( "Unable to determine CPU Frequency\n" );
  488. }
  489. return freq;
  490. #else
  491. #error "Please implement Clock Speed function for this platform"
  492. #endif
  493. #endif
  494. }
  495. static CPUInformation s_cpuInformation;
  496. struct IntelCacheDesc_t
  497. {
  498. uint8 nDesc;
  499. uint16 nCacheSize;
  500. };
  501. static IntelCacheDesc_t s_IntelL1DataCacheDesc[] = {
  502. { 0xA, 8 },
  503. { 0xC, 16 },
  504. { 0xD, 16 },
  505. { 0x2C, 32 },
  506. { 0x30, 32 },
  507. { 0x60, 16 },
  508. { 0x66, 8 },
  509. { 0x67, 16 },
  510. { 0x68, 32 }
  511. };
  512. static IntelCacheDesc_t s_IntelL2DataCacheDesc[] =
  513. {
  514. { 0x21, 256 },
  515. { 0x39, 128 },
  516. { 0x3a, 192 },
  517. { 0x3b, 128 },
  518. { 0x3c, 256 },
  519. { 0x3D, 384 },
  520. { 0x3E, 512 },
  521. { 0x41, 128 },
  522. { 0x42, 256 },
  523. { 0x43, 512 },
  524. { 0x44, 1024 },
  525. { 0x45, 2048 },
  526. { 0x48, 3 * 1024 },
  527. { 0x4e, 6 * 1024 },
  528. { 0x78, 1024 },
  529. { 0x79, 128 },
  530. { 0x7a, 256 },
  531. { 0x7b, 512 },
  532. { 0x7c, 1024 },
  533. { 0x7d, 2048 },
  534. { 0x7f, 512 },
  535. { 0x82, 256 },
  536. { 0x83, 512 },
  537. { 0x84, 1024 },
  538. { 0x85, 2048 },
  539. { 0x86, 512 },
  540. { 0x87, 1024 }
  541. };
  542. static IntelCacheDesc_t s_IntelL3DataCacheDesc[] = {
  543. { 0x22, 512 },
  544. { 0x23, 1024 },
  545. { 0x25, 2 * 1024 },
  546. { 0x29, 4 * 1024 },
  547. { 0x46, 4 * 1024 },
  548. { 0x47, 8 * 1024 },
  549. // { 49,
  550. { 0x4a, 6 * 1024 },
  551. { 0x4b, 8 * 1024 },
  552. { 0x4c, 12 * 1024 },
  553. { 0x4d, 16 * 1014 },
  554. { 0xD0, 512 },
  555. { 0xD1, 1024 },
  556. { 0xD2, 2048 },
  557. { 0xD6, 1024 },
  558. { 0xD7, 2048 },
  559. { 0xD8, 4096 },
  560. { 0xDC, 1536 },
  561. { 0xDD, 3 * 1024 },
  562. { 0xDE, 6 * 1024 },
  563. { 0xE2, 2048 },
  564. { 0xE3, 4096 },
  565. { 0xE4, 8 * 1024 },
  566. { 0xEA, 12 * 1024 },
  567. { 0xEB, 18 * 1024 },
  568. { 0xEC, 24 * 1024 }
  569. };
  570. static void FindIntelCacheDesc( uint8 nDesc, const IntelCacheDesc_t *pDesc, int nDescCount, uint32 &nCache, uint32 &nCacheDesc )
  571. {
  572. for ( int i = 0; i < nDescCount; ++i )
  573. {
  574. if ( pDesc->nDesc == nDesc )
  575. {
  576. nCache = pDesc->nCacheSize;
  577. nCacheDesc = nDesc;
  578. break;
  579. }
  580. }
  581. }
  582. // see "Output of the CPUID instruction" from Intel, page 26
  583. static void InterpretIntelCacheDescriptors( uint32 nPackedDesc )
  584. {
  585. if ( nPackedDesc & 0x80000000 )
  586. {
  587. return; // this is a wrong descriptor
  588. }
  589. for ( int i = 0; i < 4; ++i )
  590. {
  591. FindIntelCacheDesc( nPackedDesc & 0xFF, s_IntelL1DataCacheDesc, ARRAYSIZE( s_IntelL1DataCacheDesc ), s_cpuInformation.m_nL1CacheSizeKb, s_cpuInformation.m_nL1CacheDesc );
  592. FindIntelCacheDesc( nPackedDesc & 0xFF, s_IntelL2DataCacheDesc, ARRAYSIZE( s_IntelL2DataCacheDesc ), s_cpuInformation.m_nL2CacheSizeKb, s_cpuInformation.m_nL2CacheDesc );
  593. FindIntelCacheDesc( nPackedDesc & 0xFF, s_IntelL3DataCacheDesc, ARRAYSIZE( s_IntelL3DataCacheDesc ), s_cpuInformation.m_nL3CacheSizeKb, s_cpuInformation.m_nL3CacheDesc );
  594. nPackedDesc >>= 8;
  595. }
  596. }
  597. const CPUInformation& GetCPUInformation()
  598. {
  599. CPUInformation &pi = s_cpuInformation;
  600. // Has the structure already been initialized and filled out?
  601. if ( pi.m_Size == sizeof(pi) )
  602. return pi;
  603. // Redundant, but just in case the user somehow messes with the size.
  604. memset(&pi, 0x0, sizeof(pi));
  605. // Fill out the structure, and return it:
  606. pi.m_Size = sizeof(pi);
  607. // Grab the processor frequency:
  608. pi.m_Speed = CalculateClockSpeed();
  609. // Get the logical and physical processor counts:
  610. pi.m_nLogicalProcessors = LogicalProcessorsPerPackage();
  611. bool bAuthenticAMD = ( 0 == V_tier0_stricmp( GetProcessorVendorId(), "AuthenticAMD" ) );
  612. bool bGenuineIntel = !bAuthenticAMD && ( 0 == V_tier0_stricmp( GetProcessorVendorId(), "GenuineIntel" ) );
  613. #if defined( _X360 )
  614. pi.m_nPhysicalProcessors = 3;
  615. pi.m_nLogicalProcessors = 6;
  616. #elif defined( _PS3 )
  617. pi.m_nPhysicalProcessors = 1;
  618. pi.m_nLogicalProcessors = 2;
  619. #elif defined(_WIN32) && !defined( _X360 )
  620. SYSTEM_INFO si;
  621. ZeroMemory( &si, sizeof(si) );
  622. GetSystemInfo( &si );
  623. // Sergiy: fixing: si.dwNumberOfProcessors is the number of logical processors according to experiments on i7, P4 and a DirectX sample (Aug'09)
  624. // this is contrary to MSDN documentation on GetSystemInfo()
  625. //
  626. pi.m_nLogicalProcessors = si.dwNumberOfProcessors;
  627. if ( bAuthenticAMD )
  628. {
  629. // quick fix for AMD Phenom: it reports 3 logical cores and 4 physical cores;
  630. // no AMD CPUs by the end of 2009 have HT, so we'll override HT detection here
  631. pi.m_nPhysicalProcessors = pi.m_nLogicalProcessors;
  632. }
  633. else
  634. {
  635. CpuTopology topo;
  636. pi.m_nPhysicalProcessors = topo.NumberOfSystemCores();
  637. }
  638. // Make sure I always report at least one, when running WinXP with the /ONECPU switch,
  639. // it likes to report 0 processors for some reason.
  640. if ( pi.m_nPhysicalProcessors == 0 && pi.m_nLogicalProcessors == 0 )
  641. {
  642. Assert( !"Sergiy: apparently I didn't fix some CPU detection code completely. Let me know and I'll do my best to fix it soon." );
  643. pi.m_nPhysicalProcessors = 1;
  644. pi.m_nLogicalProcessors = 1;
  645. }
  646. #elif defined(LINUX)
  647. pi.m_nLogicalProcessors = 0;
  648. pi.m_nPhysicalProcessors = 0;
  649. const int k_cMaxProcessors = 256;
  650. bool rgbProcessors[k_cMaxProcessors];
  651. memset( rgbProcessors, 0, sizeof( rgbProcessors ) );
  652. int cMaxCoreId = 0;
  653. FILE *fpCpuInfo = fopen( "/proc/cpuinfo", "r" );
  654. if ( fpCpuInfo )
  655. {
  656. char rgchLine[256];
  657. while ( fgets( rgchLine, sizeof( rgchLine ), fpCpuInfo ) )
  658. {
  659. if ( !strncasecmp( rgchLine, "processor", strlen( "processor" ) ) )
  660. {
  661. pi.m_nLogicalProcessors++;
  662. }
  663. if ( !strncasecmp( rgchLine, "core id", strlen( "core id" ) ) )
  664. {
  665. char *pchValue = strchr( rgchLine, ':' );
  666. cMaxCoreId = MAX( cMaxCoreId, atoi( pchValue + 1 ) );
  667. }
  668. if ( !strncasecmp( rgchLine, "physical id", strlen( "physical id" ) ) )
  669. {
  670. // it seems (based on survey data) that we can see
  671. // processor N (N > 0) when it's the only processor in
  672. // the system. so keep track of each processor
  673. char *pchValue = strchr( rgchLine, ':' );
  674. int cPhysicalId = atoi( pchValue + 1 );
  675. if ( cPhysicalId < k_cMaxProcessors )
  676. rgbProcessors[cPhysicalId] = true;
  677. }
  678. /* this code will tell us how many physical chips are in the machine, but we want
  679. core count, so for the moment, each processor counts as both logical and physical.
  680. if ( !strncasecmp( rgchLine, "physical id ", strlen( "physical id " ) ) )
  681. {
  682. char *pchValue = strchr( rgchLine, ':' );
  683. pi.m_nPhysicalProcessors = MAX( pi.m_nPhysicalProcessors, atol( pchValue ) );
  684. }
  685. */
  686. }
  687. fclose( fpCpuInfo );
  688. for ( int i = 0; i < k_cMaxProcessors; i++ )
  689. if ( rgbProcessors[i] )
  690. pi.m_nPhysicalProcessors++;
  691. pi.m_nPhysicalProcessors *= ( cMaxCoreId + 1 );
  692. }
  693. else
  694. {
  695. pi.m_nLogicalProcessors = 1;
  696. pi.m_nPhysicalProcessors = 1;
  697. Assert( !"couldn't read cpu information from /proc/cpuinfo" );
  698. }
  699. #elif defined(OSX)
  700. int num_phys_cpu = 1, num_log_cpu = 1;
  701. size_t len = sizeof(num_phys_cpu);
  702. sysctlbyname( "hw.physicalcpu", &num_phys_cpu, &len, NULL, 0 );
  703. sysctlbyname( "hw.logicalcpu", &num_log_cpu, &len, NULL, 0 );
  704. pi.m_nPhysicalProcessors = num_phys_cpu;
  705. pi.m_nLogicalProcessors = num_log_cpu;
  706. #endif
  707. CpuIdResult_t cpuid0 = cpuid( 0 );
  708. if ( cpuid0.eax >= 1 )
  709. {
  710. CpuIdResult_t cpuid1 = cpuid( 1 );
  711. uint bFPU = cpuid1.edx & 1; // this should always be on on anything we support
  712. // Determine Processor Features:
  713. pi.m_bRDTSC = ( cpuid1.edx >> 4 ) & 1;
  714. pi.m_bCMOV = ( cpuid1.edx >> 15 ) & 1;
  715. pi.m_bFCMOV = ( pi.m_bCMOV && bFPU ) ? 1 : 0;
  716. pi.m_bMMX = ( cpuid1.edx >> 23 ) & 1;
  717. pi.m_bSSE = ( cpuid1.edx >> 25 ) & 1;
  718. pi.m_bSSE2 = ( cpuid1.edx >> 26 ) & 1;
  719. pi.m_bSSE3 = cpuid1.ecx & 1;
  720. pi.m_bSSSE3 = ( cpuid1.ecx >> 9 ) & 1;;
  721. pi.m_bSSE4a = CheckSSE4aTechnology();
  722. pi.m_bSSE41 = ( cpuid1.ecx >> 19 ) & 1;
  723. pi.m_bSSE42 = ( cpuid1.ecx >> 20 ) & 1;
  724. pi.m_b3DNow = Check3DNowTechnology();
  725. pi.m_bAVX = ( cpuid1.ecx >> 28 ) & 1;
  726. pi.m_szProcessorID = ( tchar* )GetProcessorVendorId();
  727. pi.m_szProcessorBrand = ( tchar* )GetProcessorBrand();
  728. pi.m_bHT = ( pi.m_nPhysicalProcessors < pi.m_nLogicalProcessors ); //HTSupported();
  729. pi.m_nModel = cpuid1.eax; // full CPU model info
  730. pi.m_nFeatures[ 0 ] = cpuid1.edx; // x87+ features
  731. pi.m_nFeatures[ 1 ] = cpuid1.ecx; // sse3+ features
  732. pi.m_nFeatures[ 2 ] = cpuid1.ebx; // some additional features
  733. if ( bGenuineIntel )
  734. {
  735. if ( cpuid0.eax >= 4 )
  736. {
  737. // we have CPUID.4, use it to find all the cache parameters
  738. const uint nCachesToQuery = 4; // leve 0 is not used
  739. uint nCacheSizeKiB[ nCachesToQuery ];
  740. for ( uint i = 0; i < nCachesToQuery; ++i )
  741. {
  742. nCacheSizeKiB[ i ] = 0;
  743. }
  744. for ( unsigned long nSub = 0; nSub < 1024 ; ++nSub )
  745. {
  746. CpuIdResult_t cpuid4 = cpuidex( 4, nSub );
  747. uint nCacheType = cpuid4.eax & 0x1F;
  748. if ( nCacheType == 0 )
  749. {
  750. // no more caches
  751. break;
  752. }
  753. if ( nCacheType & 1 )
  754. {
  755. // this cache includes data cache: it's either data or unified. Instuction cache type is 2
  756. uint nCacheLevel = ( cpuid4.eax >> 5 ) & 7;
  757. if ( nCacheLevel < nCachesToQuery )
  758. {
  759. uint nCacheWays = 1 + ( ( cpuid4.ebx >> 22 ) & 0x3F );
  760. uint nCachePartitions = 1 + ( ( cpuid4.ebx >> 12 ) & 0x3F );
  761. uint nCacheLineSize = 1 + ( cpuid4.ebx & 0xFF );
  762. uint nCacheSets = 1 + cpuid4.ecx;
  763. uint nCacheSizeBytes = nCacheWays * nCachePartitions * nCacheLineSize * nCacheSets;
  764. nCacheSizeKiB[ nCacheLevel ] = nCacheSizeBytes >> 10;
  765. }
  766. }
  767. }
  768. pi.m_nL1CacheSizeKb = nCacheSizeKiB[ 1 ];
  769. pi.m_nL2CacheSizeKb = nCacheSizeKiB[ 2 ];
  770. pi.m_nL3CacheSizeKb = nCacheSizeKiB[ 3 ];
  771. }
  772. else if ( cpuid0.eax >= 2 )
  773. {
  774. // get the cache
  775. CpuIdResult_t cpuid2 = cpuid( 2 );
  776. for ( int i = ( cpuid2.eax & 0xFF ); i-- > 0; )
  777. {
  778. InterpretIntelCacheDescriptors( cpuid2.eax & ~0xFF );
  779. InterpretIntelCacheDescriptors( cpuid2.ebx );
  780. InterpretIntelCacheDescriptors( cpuid2.ecx );
  781. InterpretIntelCacheDescriptors( cpuid2.edx );
  782. cpuid2 = cpuid( 2 ); // read the next
  783. }
  784. }
  785. }
  786. }
  787. CpuIdResult_t cpuid0ex = cpuid( 0x80000000 );
  788. if ( bAuthenticAMD )
  789. {
  790. if ( cpuid0ex.eax >= 0x80000005 )
  791. {
  792. CpuIdResult_t cpuid5ex = cpuid( 0x80000005 );
  793. pi.m_nL1CacheSizeKb = cpuid5ex.ecx >> 24;
  794. pi.m_nL1CacheDesc = cpuid5ex.ecx & 0xFFFFFF;
  795. }
  796. if ( cpuid0ex.eax >= 0x80000006 )
  797. {
  798. CpuIdResult_t cpuid6ex = cpuid( 0x80000006 );
  799. pi.m_nL2CacheSizeKb = cpuid6ex.ecx >> 16;
  800. pi.m_nL2CacheDesc = cpuid6ex.ecx & 0xFFFF;
  801. pi.m_nL3CacheSizeKb = ( cpuid6ex.edx >> 18 ) * 512;
  802. pi.m_nL3CacheDesc = cpuid6ex.edx & 0xFFFF;
  803. }
  804. }
  805. else if ( bGenuineIntel )
  806. {
  807. if ( cpuid0ex.eax >= 0x80000006 )
  808. {
  809. // make sure we got the L2 cache info right
  810. pi.m_nL2CacheSizeKb = ( cpuid( 0x80000006 ).ecx >> 16 );
  811. }
  812. }
  813. return pi;
  814. }