Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1752 lines
45 KiB

  1. //===== Copyright 1996-2005, Valve Corporation, All rights reserved. ======//
  2. //
  3. // Purpose: Real-Time Hierarchical Profiling
  4. //
  5. // $NoKeywords: $
  6. //===========================================================================//
  7. #include "pch_tier0.h"
  8. #include "tier0/memalloc.h"
  9. #include "tier0/valve_off.h"
  10. #if defined(_WIN32) && !defined(_X360)
  11. #define WIN_32_LEAN_AND_MEAN
  12. #include <windows.h>
  13. #endif
  14. #include <assert.h>
  15. #ifdef _WIN32
  16. #pragma warning(disable:4073)
  17. #pragma init_seg( lib )
  18. #endif
  19. #pragma warning(push, 1)
  20. #pragma warning(disable:4786)
  21. #pragma warning(disable:4530)
  22. #include <map>
  23. #include <vector>
  24. #include <algorithm>
  25. #pragma warning(pop)
  26. #include "tier0/valve_on.h"
  27. #include "tier0/vprof.h"
  28. #include "tier0/l2cache.h"
  29. #include "strtools.h"
  30. #ifdef _X360
  31. #include "xbox/xbox_console.h"
  32. #elif defined(_PS3)
  33. #include "ps3/ps3_console.h"
  34. #else // NOT _X360:
  35. #include "tier0/memdbgon.h"
  36. #endif
  37. // NOTE: Explicitly and intentionally using STL in here to not generate any
  38. // cyclical dependencies between the low-level debug library and the higher
  39. // level data structures (toml 01-27-03)
  40. using namespace std;
  41. #ifdef VPROF_ENABLED
  42. #if defined(_X360) && !defined(_CERT) // enable PIX CPU trace:
  43. #include "tracerecording.h"
  44. #pragma comment( lib, "tracerecording.lib" )
  45. #pragma comment( lib, "xbdm.lib" )
  46. #endif
  47. //-----------------------------------------------------------------------------
  48. bool g_VProfSignalSpike;
  49. //-----------------------------------------------------------------------------
  50. CVProfile g_VProfCurrentProfile;
  51. int CVProfNode::s_iCurrentUniqueNodeID = 0;
  52. CVProfNode::~CVProfNode()
  53. {
  54. #if !defined( _WIN32 ) && !defined( POSIX )
  55. delete m_pChild;
  56. delete m_pSibling;
  57. #endif
  58. }
  59. CVProfNode *CVProfNode::GetSubNode( const tchar *pszName, int detailLevel, const tchar *pBudgetGroupName, int budgetFlags )
  60. {
  61. // Try to find this sub node
  62. CVProfNode * child = m_pChild;
  63. while ( child )
  64. {
  65. if ( child->m_pszName == pszName )
  66. {
  67. return child;
  68. }
  69. child = child->m_pSibling;
  70. }
  71. // We didn't find it, so add it
  72. CVProfNode * node = new CVProfNode( pszName, detailLevel, this, pBudgetGroupName, budgetFlags );
  73. node->m_pSibling = m_pChild;
  74. m_pChild = node;
  75. return node;
  76. }
  77. CVProfNode *CVProfNode::GetSubNode( const tchar *pszName, int detailLevel, const tchar *pBudgetGroupName )
  78. {
  79. return GetSubNode( pszName, detailLevel, pBudgetGroupName, BUDGETFLAG_OTHER );
  80. }
  81. //-------------------------------------
  82. void CVProfNode::EnterScope()
  83. {
  84. m_nCurFrameCalls++;
  85. if ( m_nRecursions++ == 0 )
  86. {
  87. m_Timer.Start();
  88. #ifndef _X360
  89. if ( g_VProfCurrentProfile.UsePME() )
  90. {
  91. m_L2Cache.Start();
  92. }
  93. #else // 360 code:
  94. if ( g_VProfCurrentProfile.UsePME() || ((m_iBitFlags & kRecordL2) != 0) )
  95. {
  96. m_PMCData.Start();
  97. }
  98. if ( (m_iBitFlags & kCPUTrace) != 0)
  99. {
  100. // this node is to be recorded. Which recording mode are we in?
  101. switch ( g_VProfCurrentProfile.GetCPUTraceMode() )
  102. {
  103. case CVProfile::kFirstHitNode:
  104. case CVProfile::kAllNodesInFrame_Recording:
  105. case CVProfile::kAllNodesInFrame_RecordingMultiFrame:
  106. // we are presently recording.
  107. if ( !XTraceStartRecording( g_VProfCurrentProfile.GetCPUTraceFilename() ) )
  108. {
  109. Msg( "XTraceStartRecording failed, error code %d\n", GetLastError() );
  110. }
  111. default:
  112. // no default.
  113. break;
  114. }
  115. }
  116. #endif
  117. #ifdef VPROF_VTUNE_GROUP
  118. g_VProfCurrentProfile.PushGroup( m_BudgetGroupID );
  119. #endif
  120. }
  121. }
  122. //-------------------------------------
  123. bool CVProfNode::ExitScope()
  124. {
  125. if ( --m_nRecursions == 0 && m_nCurFrameCalls != 0 )
  126. {
  127. m_Timer.End();
  128. m_CurFrameTime += m_Timer.GetDuration();
  129. #ifndef _X360
  130. if ( g_VProfCurrentProfile.UsePME() )
  131. {
  132. m_L2Cache.End();
  133. m_iCurL2CacheMiss += m_L2Cache.GetL2CacheMisses();
  134. }
  135. #else // 360 code:
  136. if ( g_VProfCurrentProfile.UsePME() || ((m_iBitFlags & kRecordL2) != 0) )
  137. {
  138. m_PMCData.End();
  139. m_iCurL2CacheMiss += m_PMCData.GetL2CacheMisses();
  140. m_iCurLoadHitStores += m_PMCData.GetLHS();
  141. }
  142. if ( (m_iBitFlags & kCPUTrace) != 0 )
  143. {
  144. // this node is enabled to be recorded. What mode are we in?
  145. switch ( g_VProfCurrentProfile.GetCPUTraceMode() )
  146. {
  147. case CVProfile::kFirstHitNode:
  148. {
  149. // one-off recording. stop now.
  150. if ( XTraceStopRecording() )
  151. {
  152. Msg( "CPU trace finished.\n" );
  153. if ( g_VProfCurrentProfile.TraceCompleteEvent() )
  154. {
  155. // signal VXConsole that trace is completed
  156. XBX_rTraceComplete();
  157. }
  158. }
  159. // don't trace again next frame, overwriting the file.
  160. g_VProfCurrentProfile.SetCPUTraceEnabled( CVProfile::kDisabled );
  161. break;
  162. }
  163. case CVProfile::kAllNodesInFrame_Recording:
  164. case CVProfile::kAllNodesInFrame_RecordingMultiFrame:
  165. {
  166. // one-off recording. stop now.
  167. if ( XTraceStopRecording() )
  168. {
  169. if ( g_VProfCurrentProfile.GetCPUTraceMode() == CVProfile::kAllNodesInFrame_RecordingMultiFrame )
  170. {
  171. Msg( "%.3f msec in %s\n", m_CurFrameTime.GetMillisecondsF(), g_VProfCurrentProfile.GetCPUTraceFilename() );
  172. }
  173. else
  174. {
  175. Msg( "CPU trace finished.\n" );
  176. }
  177. }
  178. // Spew time info for file to allow figuring it out later
  179. g_VProfCurrentProfile.LatchMultiFrame( m_CurFrameTime.GetLongCycles() );
  180. #if 0 // This doesn't want to work on the xbox360-- MoveFile not available or file still being put down to disk?
  181. char suffix[ 32 ];
  182. _snprintf( suffix, sizeof( suffix ), "_%.3f_msecs", flMsecs );
  183. char fn[ 512 ];
  184. strncpy( fn, g_VProfCurrentProfile.GetCPUTraceFilename(), sizeof( fn ) );
  185. char *p = strrchr( fn, '.' );
  186. if ( *p )
  187. {
  188. *p = 0;
  189. }
  190. strncat( fn, suffix, sizeof( fn ) );
  191. strncat( fn, ".pix2", sizeof( fn ) );
  192. BOOL bSuccess = MoveFile( g_VProfCurrentProfile.GetCPUTraceFilename(), fn );
  193. if ( !bSuccess )
  194. {
  195. DWORD eCode = GetLastError();
  196. Msg( "Error %d\n", eCode );
  197. }
  198. #endif
  199. // we're still recording until the frame is done.
  200. // but, increment the index.
  201. g_VProfCurrentProfile.IncrementMultiTraceIndex();
  202. break;
  203. }
  204. }
  205. // g_VProfCurrentProfile.IsCPUTraceEnabled() &&
  206. }
  207. #endif
  208. #ifdef VPROF_VTUNE_GROUP
  209. g_VProfCurrentProfile.PopGroup();
  210. #endif
  211. }
  212. return ( m_nRecursions == 0 );
  213. }
  214. //-------------------------------------
  215. void CVProfNode::Pause()
  216. {
  217. if ( m_nRecursions > 0 )
  218. {
  219. m_Timer.End();
  220. m_CurFrameTime += m_Timer.GetDuration();
  221. #ifndef _X360
  222. if ( g_VProfCurrentProfile.UsePME() )
  223. {
  224. m_L2Cache.End();
  225. m_iCurL2CacheMiss += m_L2Cache.GetL2CacheMisses();
  226. }
  227. #else // 360 code:
  228. if ( g_VProfCurrentProfile.UsePME() || ((m_iBitFlags & kRecordL2) != 0) )
  229. {
  230. m_PMCData.End();
  231. m_iCurL2CacheMiss += m_PMCData.GetL2CacheMisses();
  232. m_iCurLoadHitStores += m_PMCData.GetLHS();
  233. }
  234. #endif
  235. }
  236. if ( m_pChild )
  237. {
  238. m_pChild->Pause();
  239. }
  240. if ( m_pSibling )
  241. {
  242. m_pSibling->Pause();
  243. }
  244. }
  245. //-------------------------------------
  246. void CVProfNode::Resume()
  247. {
  248. if ( m_nRecursions > 0 )
  249. {
  250. m_Timer.Start();
  251. #ifndef _X360
  252. if ( g_VProfCurrentProfile.UsePME() )
  253. {
  254. m_L2Cache.Start();
  255. }
  256. #else
  257. if ( g_VProfCurrentProfile.UsePME() || ((m_iBitFlags & kRecordL2) != 0) )
  258. {
  259. m_PMCData.Start();
  260. }
  261. #endif
  262. }
  263. if ( m_pChild )
  264. {
  265. m_pChild->Resume();
  266. }
  267. if ( m_pSibling )
  268. {
  269. m_pSibling->Resume();
  270. }
  271. }
  272. //-------------------------------------
  273. void CVProfNode::Reset()
  274. {
  275. m_nPrevFrameCalls = 0;
  276. m_PrevFrameTime.Init();
  277. m_nCurFrameCalls = 0;
  278. m_CurFrameTime.Init();
  279. m_nTotalCalls = 0;
  280. m_TotalTime.Init();
  281. m_PeakTime.Init();
  282. m_iPrevL2CacheMiss = 0;
  283. m_iCurL2CacheMiss = 0;
  284. m_iTotalL2CacheMiss = 0;
  285. #ifdef _X360
  286. m_iPrevLoadHitStores = 0;
  287. m_iCurLoadHitStores = 0;
  288. m_iTotalLoadHitStores = 0;
  289. #endif
  290. if ( m_pChild )
  291. {
  292. m_pChild->Reset();
  293. }
  294. if ( m_pSibling )
  295. {
  296. m_pSibling->Reset();
  297. }
  298. }
  299. //-------------------------------------
  300. void CVProfNode::MarkFrame()
  301. {
  302. m_nPrevFrameCalls = m_nCurFrameCalls;
  303. m_PrevFrameTime = m_CurFrameTime;
  304. m_iPrevL2CacheMiss = m_iCurL2CacheMiss;
  305. #ifdef _X360
  306. m_iPrevLoadHitStores = m_iCurLoadHitStores;
  307. #endif
  308. m_nTotalCalls += m_nCurFrameCalls;
  309. m_TotalTime += m_CurFrameTime;
  310. if ( m_PeakTime.IsLessThan( m_CurFrameTime ) )
  311. {
  312. m_PeakTime = m_CurFrameTime;
  313. }
  314. m_CurFrameTime.Init();
  315. m_nCurFrameCalls = 0;
  316. m_iTotalL2CacheMiss += m_iCurL2CacheMiss;
  317. m_iCurL2CacheMiss = 0;
  318. #ifdef _X360
  319. m_iTotalLoadHitStores += m_iCurLoadHitStores;
  320. m_iCurLoadHitStores = 0;
  321. #endif
  322. if ( m_pChild )
  323. {
  324. m_pChild->MarkFrame();
  325. }
  326. if ( m_pSibling )
  327. {
  328. m_pSibling->MarkFrame();
  329. }
  330. }
  331. //-------------------------------------
  332. void CVProfNode::ResetPeak()
  333. {
  334. m_PeakTime.Init();
  335. if ( m_pChild )
  336. {
  337. m_pChild->ResetPeak();
  338. }
  339. if ( m_pSibling )
  340. {
  341. m_pSibling->ResetPeak();
  342. }
  343. }
  344. void CVProfNode::SetCurFrameTime( unsigned long milliseconds )
  345. {
  346. m_CurFrameTime.Init( (float)milliseconds );
  347. }
  348. #ifdef DBGFLAG_VALIDATE
  349. //-----------------------------------------------------------------------------
  350. // Purpose: Ensure that all of our internal structures are consistent, and
  351. // account for all memory that we've allocated.
  352. // Input: validator - Our global validator object
  353. // pchName - Our name (typically a member var in our container)
  354. //-----------------------------------------------------------------------------
  355. void CVProfNode::Validate( CValidator &validator, tchar *pchName )
  356. {
  357. validator.Push( _T("CVProfNode"), this, pchName );
  358. m_L2Cache.Validate( validator, _T("m_L2Cache") );
  359. if ( m_pSibling )
  360. m_pSibling->Validate( validator, _T("m_pSibling") );
  361. if ( m_pChild )
  362. m_pChild->Validate( validator, _T("m_pChild") );
  363. validator.Pop( );
  364. }
  365. #endif // DBGFLAG_VALIDATE
  366. //-----------------------------------------------------------------------------
  367. struct TimeSums_t
  368. {
  369. const tchar *pszProfileScope;
  370. unsigned calls;
  371. double time;
  372. double timeLessChildren;
  373. double peak;
  374. };
  375. static bool TimeCompare( const TimeSums_t &lhs, const TimeSums_t &rhs )
  376. {
  377. return ( lhs.time > rhs.time );
  378. }
  379. static bool TimeLessChildrenCompare( const TimeSums_t &lhs, const TimeSums_t &rhs )
  380. {
  381. return ( lhs.timeLessChildren > rhs.timeLessChildren );
  382. }
  383. static bool PeakCompare( const TimeSums_t &lhs, const TimeSums_t &rhs )
  384. {
  385. return ( lhs.peak > rhs.peak );
  386. }
  387. static bool AverageTimeCompare( const TimeSums_t &lhs, const TimeSums_t &rhs )
  388. {
  389. double avgLhs = ( lhs.calls ) ? lhs.time / (double)lhs.calls : 0.0;
  390. double avgRhs = ( rhs.calls ) ? rhs.time / (double)rhs.calls : 0.0;
  391. return ( avgLhs > avgRhs );
  392. }
  393. static bool AverageTimeLessChildrenCompare( const TimeSums_t &lhs, const TimeSums_t &rhs )
  394. {
  395. double avgLhs = ( lhs.calls ) ? lhs.timeLessChildren / (double)lhs.calls : 0.0;
  396. double avgRhs = ( rhs.calls ) ? rhs.timeLessChildren / (double)rhs.calls : 0.0;
  397. return ( avgLhs > avgRhs );
  398. }
  399. static bool PeakOverAverageCompare( const TimeSums_t &lhs, const TimeSums_t &rhs )
  400. {
  401. double avgLhs = ( lhs.calls ) ? lhs.time / (double)lhs.calls : 0.0;
  402. double avgRhs = ( rhs.calls ) ? rhs.time / (double)rhs.calls : 0.0;
  403. double lhsPoA = ( avgLhs != 0 ) ? lhs.peak / avgLhs : 0.0;
  404. double rhsPoA = ( avgRhs != 0 ) ? rhs.peak / avgRhs : 0.0;
  405. return ( lhsPoA > rhsPoA );
  406. }
  407. map<CVProfNode *, double> g_TimesLessChildren;
  408. int g_TotalFrames;
  409. map<const tchar *, uintp> g_TimeSumsMap;
  410. vector<TimeSums_t> g_TimeSums;
  411. CVProfNode * g_pStartNode;
  412. const tchar * g_pszSumNode;
  413. //-------------------------------------
  414. void CVProfile::SumTimes( CVProfNode *pNode, int budgetGroupID )
  415. {
  416. if ( !pNode )
  417. return; // this generally only happens on a failed FindNode()
  418. bool bSetStartNode;
  419. if ( !g_pStartNode && _tcscmp( pNode->GetName(), g_pszSumNode ) == 0 )
  420. {
  421. g_pStartNode = pNode;
  422. bSetStartNode = true;
  423. }
  424. else
  425. bSetStartNode = false;
  426. if ( GetRoot() != pNode )
  427. {
  428. if ( g_pStartNode && pNode->GetTotalCalls() > 0 && ( budgetGroupID == -1 || pNode->GetBudgetGroupID() == budgetGroupID ) )
  429. {
  430. double timeLessChildren = pNode->GetTotalTimeLessChildren();
  431. g_TimesLessChildren.insert( make_pair( pNode, timeLessChildren ) );
  432. map<const tchar *, uintp>::iterator iter;
  433. iter = g_TimeSumsMap.find( pNode->GetName() ); // intenionally using address of string rather than string compare (toml 01-27-03)
  434. if ( iter == g_TimeSumsMap.end() )
  435. {
  436. TimeSums_t timeSums = { pNode->GetName(), pNode->GetTotalCalls(), pNode->GetTotalTime(), timeLessChildren, pNode->GetPeakTime() };
  437. g_TimeSumsMap.insert( make_pair( pNode->GetName(), g_TimeSums.size() ) );
  438. g_TimeSums.push_back( timeSums );
  439. }
  440. else
  441. {
  442. TimeSums_t &timeSums = g_TimeSums[iter->second];
  443. timeSums.calls += pNode->GetTotalCalls();
  444. timeSums.time += pNode->GetTotalTime();
  445. timeSums.timeLessChildren += timeLessChildren;
  446. if ( pNode->GetPeakTime() > timeSums.peak )
  447. timeSums.peak = pNode->GetPeakTime();
  448. }
  449. }
  450. if( ( !g_pStartNode || pNode != g_pStartNode ) && pNode->GetSibling() )
  451. {
  452. SumTimes( pNode->GetSibling(), budgetGroupID );
  453. }
  454. }
  455. if( pNode->GetChild() )
  456. {
  457. SumTimes( pNode->GetChild(), budgetGroupID );
  458. }
  459. if ( bSetStartNode )
  460. g_pStartNode = NULL;
  461. }
  462. //-------------------------------------
  463. CVProfNode *CVProfile::FindNode( CVProfNode *pStartNode, const tchar *pszNode )
  464. {
  465. if ( _tcscmp( pStartNode->GetName(), pszNode ) != 0 )
  466. {
  467. CVProfNode *pFoundNode = NULL;
  468. if ( pStartNode->GetSibling() )
  469. {
  470. pFoundNode = FindNode( pStartNode->GetSibling(), pszNode );
  471. }
  472. if ( !pFoundNode && pStartNode->GetChild() )
  473. {
  474. pFoundNode = FindNode( pStartNode->GetChild(), pszNode );
  475. }
  476. return pFoundNode;
  477. }
  478. return pStartNode;
  479. }
  480. //-------------------------------------
  481. #ifdef _X360
  482. void CVProfile::PMCDisableAllNodes(CVProfNode *pStartNode)
  483. {
  484. if (pStartNode == NULL)
  485. {
  486. pStartNode = GetRoot();
  487. }
  488. pStartNode->EnableL2andLHS(false);
  489. if ( pStartNode->GetSibling() )
  490. {
  491. PMCDisableAllNodes(pStartNode->GetSibling());
  492. }
  493. if ( pStartNode->GetChild() )
  494. {
  495. PMCDisableAllNodes(pStartNode->GetChild());
  496. }
  497. }
  498. // recursively set l2/lhs recording state for a node and all children AND SIBLINGS
  499. static void PMCRecursiveL2Set(CVProfNode *pNode, bool enableState)
  500. {
  501. if ( pNode )
  502. {
  503. pNode->EnableL2andLHS(enableState);
  504. if ( pNode->GetSibling() )
  505. {
  506. PMCRecursiveL2Set( pNode->GetSibling(), enableState );
  507. }
  508. if ( pNode->GetChild() )
  509. {
  510. PMCRecursiveL2Set( pNode->GetChild(), enableState );
  511. }
  512. }
  513. }
  514. bool CVProfile::PMCEnableL2Upon(const tchar *pszNodeName, bool bRecursive)
  515. {
  516. // PMCDisableAllNodes();
  517. CVProfNode *pNode = FindNode( GetRoot(), pszNodeName );
  518. if (pNode)
  519. {
  520. pNode->EnableL2andLHS(true);
  521. if (bRecursive)
  522. {
  523. PMCRecursiveL2Set(pNode->GetChild(), true);
  524. }
  525. return true;
  526. }
  527. else
  528. {
  529. return false;
  530. }
  531. }
  532. bool CVProfile::PMCDisableL2Upon(const tchar *pszNodeName, bool bRecursive)
  533. {
  534. // PMCDisableAllNodes();
  535. CVProfNode *pNode = FindNode( GetRoot(), pszNodeName );
  536. if ( pNode )
  537. {
  538. pNode->EnableL2andLHS( false );
  539. if ( bRecursive )
  540. {
  541. PMCRecursiveL2Set( pNode->GetChild(), false );
  542. }
  543. return true;
  544. }
  545. else
  546. {
  547. return false;
  548. }
  549. }
  550. static void DumpEnabledPMCNodesInner(CVProfNode* pNode)
  551. {
  552. if (!pNode)
  553. return;
  554. if (pNode->IsL2andLHSEnabled())
  555. {
  556. Msg( _T("\t%s\n"), pNode->GetName() );
  557. }
  558. // depth first printing clearer
  559. if ( pNode->GetChild() )
  560. {
  561. DumpEnabledPMCNodesInner(pNode->GetChild());
  562. }
  563. if ( pNode->GetSibling() )
  564. {
  565. DumpEnabledPMCNodesInner(pNode->GetChild());
  566. }
  567. }
  568. void CVProfile::DumpEnabledPMCNodes( void )
  569. {
  570. Msg( _T("Nodes enabled for PMC counters:\n") );
  571. CVProfNode *pNode = GetRoot();
  572. DumpEnabledPMCNodesInner( pNode );
  573. Msg( _T("(end)\n") );
  574. }
  575. CVProfNode *CVProfile::CPUTraceGetEnabledNode(CVProfNode *pStartNode)
  576. {
  577. if (!pStartNode)
  578. {
  579. pStartNode = GetRoot();
  580. }
  581. if ( (pStartNode->m_iBitFlags & CVProfNode::kCPUTrace) != 0 )
  582. {
  583. return pStartNode;
  584. }
  585. if (pStartNode->GetSibling())
  586. {
  587. CVProfNode *retval = CPUTraceGetEnabledNode(pStartNode->GetSibling());
  588. if (retval)
  589. return retval;
  590. }
  591. if (pStartNode->GetChild())
  592. {
  593. CVProfNode *retval = CPUTraceGetEnabledNode(pStartNode->GetChild());
  594. if (retval)
  595. return retval;
  596. }
  597. return NULL;
  598. }
  599. const char *CVProfile::SetCPUTraceFilename( const char *filename )
  600. {
  601. strncpy( m_CPUTraceFilename, filename, sizeof( m_CPUTraceFilename ) );
  602. return GetCPUTraceFilename();
  603. }
  604. /// Returns a pointer to an internal static, so you don't need to
  605. /// make temporary char buffers for this to write into. What of it?
  606. /// You're not hanging on to that pointer. That would be foolish.
  607. const char *CVProfile::GetCPUTraceFilename()
  608. {
  609. static char retBuf[256];
  610. switch ( m_iCPUTraceEnabled )
  611. {
  612. case kAllNodesInFrame_WaitingForMark:
  613. case kAllNodesInFrame_Recording:
  614. _snprintf( retBuf, sizeof( retBuf ), "e:\\%.128s%.4d.pix2", m_CPUTraceFilename, m_iSuccessiveTraceIndex );
  615. break;
  616. case kAllNodesInFrame_WaitingForMarkMultiFrame:
  617. case kAllNodesInFrame_RecordingMultiFrame:
  618. _snprintf( retBuf, sizeof( retBuf ), "e:\\%.128s_%.4d_%.4d.pix2", m_CPUTraceFilename, m_nFrameCount, m_iSuccessiveTraceIndex );
  619. break;
  620. default:
  621. _snprintf( retBuf, sizeof( retBuf ), "e:\\%.128s.pix2", m_CPUTraceFilename );
  622. }
  623. return retBuf;
  624. }
  625. bool CVProfile::TraceCompleteEvent( void )
  626. {
  627. return m_bTraceCompleteEvent;
  628. }
  629. CVProfNode *CVProfile::CPUTraceEnableForNode(const tchar *pszNodeName)
  630. {
  631. // disable whatever may be enabled already (we can only trace one node at a time)
  632. CPUTraceDisableAllNodes();
  633. CVProfNode *which = FindNode(GetRoot(), pszNodeName);
  634. if (which)
  635. {
  636. which->m_iBitFlags |= CVProfNode::kCPUTrace;
  637. return which;
  638. }
  639. else
  640. return NULL;
  641. }
  642. void CVProfile::CPUTraceDisableAllNodes(CVProfNode *pStartNode)
  643. {
  644. if (!pStartNode)
  645. {
  646. pStartNode = GetRoot();
  647. }
  648. pStartNode->m_iBitFlags &= ~CVProfNode::kCPUTrace;
  649. if (pStartNode->GetSibling())
  650. {
  651. CPUTraceDisableAllNodes(pStartNode->GetSibling());
  652. }
  653. if (pStartNode->GetChild())
  654. {
  655. CPUTraceDisableAllNodes(pStartNode->GetChild());
  656. }
  657. }
  658. #endif
  659. //-------------------------------------
  660. void CVProfile::SumTimes( const tchar *pszStartNode, int budgetGroupID )
  661. {
  662. if ( GetRoot()->GetChild() )
  663. {
  664. if ( pszStartNode == NULL )
  665. g_pStartNode = GetRoot();
  666. else
  667. g_pStartNode = NULL;
  668. g_pszSumNode = pszStartNode;
  669. SumTimes( GetRoot(), budgetGroupID );
  670. g_pStartNode = NULL;
  671. }
  672. }
  673. //-------------------------------------
  674. void CVProfile::DumpNodes( CVProfNode *pNode, int indent, bool bAverageAndCountOnly )
  675. {
  676. if ( !pNode )
  677. return; // this generally only happens on a failed FindNode()
  678. bool fIsRoot = ( pNode == GetRoot() );
  679. if ( fIsRoot || pNode == g_pStartNode )
  680. {
  681. if( bAverageAndCountOnly )
  682. {
  683. Msg( _T(" Avg Time/Frame (ms)\n") );
  684. Msg( _T("[ func+child func ] Count\n") );
  685. Msg( _T(" ---------- --------- --------\n") );
  686. }
  687. else
  688. {
  689. Msg( _T(" Sum (ms) Avg Time/Frame (ms) Avg Time/Call (ms)\n") );
  690. Msg( _T("[ func+child func ] [ func+child func ] [ func+child func ] Count Peak\n") );
  691. Msg( _T(" ---------- --------- ---------- ------ ---------- ------ -------- ------\n") );
  692. }
  693. }
  694. if ( !fIsRoot )
  695. {
  696. map<CVProfNode *, double>::iterator iterTimeLessChildren = g_TimesLessChildren.find( pNode );
  697. double dNodeTime = 0;
  698. if(iterTimeLessChildren != g_TimesLessChildren.end())
  699. dNodeTime = iterTimeLessChildren->second;
  700. if( bAverageAndCountOnly )
  701. {
  702. Msg( _T(" %10.3f %9.2f %8d"),
  703. ( pNode->GetTotalCalls() > 0 ) ? pNode->GetTotalTime() / (double)NumFramesSampled() : 0,
  704. ( pNode->GetTotalCalls() > 0 ) ? dNodeTime / (double)NumFramesSampled() : 0,
  705. pNode->GetTotalCalls() );
  706. }
  707. else
  708. {
  709. Msg( _T(" %10.3f %9.2f %10.3f %6.2f %10.3f %6.2f %8d %6.2f"),
  710. pNode->GetTotalTime(), dNodeTime,
  711. ( pNode->GetTotalCalls() > 0 ) ? pNode->GetTotalTime() / (double)NumFramesSampled() : 0,
  712. ( pNode->GetTotalCalls() > 0 ) ? dNodeTime / (double)NumFramesSampled() : 0,
  713. ( pNode->GetTotalCalls() > 0 ) ? pNode->GetTotalTime() / (double)pNode->GetTotalCalls() : 0,
  714. ( pNode->GetTotalCalls() > 0 ) ? dNodeTime / (double)pNode->GetTotalCalls() : 0,
  715. pNode->GetTotalCalls(), pNode->GetPeakTime() );
  716. }
  717. Msg( _T(" ") );
  718. for ( int i = 1; i < indent; i++ )
  719. {
  720. Msg( _T("| ") );
  721. }
  722. Msg( _T("%s\n"), pNode->GetName() );
  723. }
  724. if( pNode->GetChild() )
  725. {
  726. DumpNodes( pNode->GetChild(), indent + 1, bAverageAndCountOnly );
  727. }
  728. if( !( fIsRoot || pNode == g_pStartNode ) && pNode->GetSibling() )
  729. {
  730. DumpNodes( pNode->GetSibling(), indent, bAverageAndCountOnly );
  731. }
  732. }
  733. //-------------------------------------
  734. #if defined( VPROF_VXCONSOLE_EXISTS )
  735. static void CalcBudgetGroupTimes_Recursive( CVProfNode *pNode, unsigned int *groupTimes, int numGroups, float flScale )
  736. {
  737. int groupID;
  738. CVProfNode *nodePtr;
  739. groupID = pNode->GetBudgetGroupID();
  740. if ( groupID >= numGroups )
  741. {
  742. return;
  743. }
  744. groupTimes[groupID] += flScale*pNode->GetPrevTimeLessChildren();
  745. nodePtr = pNode->GetSibling();
  746. if ( nodePtr )
  747. {
  748. CalcBudgetGroupTimes_Recursive( nodePtr, groupTimes, numGroups, flScale );
  749. }
  750. nodePtr = pNode->GetChild();
  751. if ( nodePtr )
  752. {
  753. CalcBudgetGroupTimes_Recursive( nodePtr, groupTimes, numGroups, flScale );
  754. }
  755. }
  756. static void CalcBudgetGroupL2CacheMisses_Recursive( CVProfNode *pNode, unsigned int *groupTimes, int numGroups, float flScale )
  757. {
  758. int groupID;
  759. CVProfNode *nodePtr;
  760. groupID = pNode->GetBudgetGroupID();
  761. if ( groupID >= numGroups )
  762. {
  763. return;
  764. }
  765. groupTimes[groupID] += flScale*pNode->GetPrevL2CacheMissLessChildren();
  766. nodePtr = pNode->GetSibling();
  767. if ( nodePtr )
  768. {
  769. CalcBudgetGroupL2CacheMisses_Recursive( nodePtr, groupTimes, numGroups, flScale );
  770. }
  771. nodePtr = pNode->GetChild();
  772. if ( nodePtr )
  773. {
  774. CalcBudgetGroupL2CacheMisses_Recursive( nodePtr, groupTimes, numGroups, flScale );
  775. }
  776. }
  777. static void CalcBudgetGroupLHS_Recursive( CVProfNode *pNode, unsigned int *groupTimes, int numGroups, float flScale )
  778. {
  779. int groupID;
  780. CVProfNode *nodePtr;
  781. groupID = pNode->GetBudgetGroupID();
  782. if ( groupID >= numGroups )
  783. {
  784. return;
  785. }
  786. groupTimes[groupID] += flScale*pNode->GetPrevLoadHitStoreLessChildren();
  787. nodePtr = pNode->GetSibling();
  788. if ( nodePtr )
  789. {
  790. CalcBudgetGroupLHS_Recursive( nodePtr, groupTimes, numGroups, flScale );
  791. }
  792. nodePtr = pNode->GetChild();
  793. if ( nodePtr )
  794. {
  795. CalcBudgetGroupLHS_Recursive( nodePtr, groupTimes, numGroups, flScale );
  796. }
  797. }
  798. void CVProfile::VXConsoleReportMode( VXConsoleReportMode_t mode )
  799. {
  800. m_ReportMode = mode;
  801. }
  802. void CVProfile::VXConsoleReportScale( VXConsoleReportMode_t mode, float flScale )
  803. {
  804. m_pReportScale[mode] = flScale;
  805. }
  806. //-----------------------------------------------------------------------------
  807. // Send the all the counter attributes once to VXConsole at profiling start
  808. //-----------------------------------------------------------------------------
  809. void CVProfile::VXProfileStart()
  810. {
  811. const char *names[XBX_MAX_PROFILE_COUNTERS];
  812. COLORREF colors[XBX_MAX_PROFILE_COUNTERS];
  813. int numGroups;
  814. int counterGroup;
  815. const char *pGroupName;
  816. int i;
  817. int r,g,b,a;
  818. // vprof system must be running
  819. if ( m_enabled <= 0 || !m_UpdateMode )
  820. {
  821. return;
  822. }
  823. if ( m_UpdateMode & VPROF_UPDATE_BUDGET )
  824. {
  825. // update budget profiling
  826. numGroups = g_VProfCurrentProfile.GetNumBudgetGroups();
  827. if ( numGroups > XBX_MAX_PROFILE_COUNTERS )
  828. {
  829. numGroups = XBX_MAX_PROFILE_COUNTERS;
  830. }
  831. for ( i=0; i<numGroups; i++ )
  832. {
  833. names[i] = g_VProfCurrentProfile.GetBudgetGroupName( i );
  834. g_VProfCurrentProfile.GetBudgetGroupColor( i, r, g, b, a );
  835. colors[i] = XMAKECOLOR( r, g, b );
  836. }
  837. // send all the profile attributes
  838. XBX_rSetProfileAttributes( "cpu", numGroups, names, colors );
  839. }
  840. if ( m_UpdateMode & (VPROF_UPDATE_TEXTURE_GLOBAL|VPROF_UPDATE_TEXTURE_PERFRAME) )
  841. {
  842. // update texture profiling
  843. numGroups = 0;
  844. counterGroup = (m_UpdateMode & VPROF_UPDATE_TEXTURE_GLOBAL) ? COUNTER_GROUP_TEXTURE_GLOBAL : COUNTER_GROUP_TEXTURE_PER_FRAME;
  845. for ( i=0; i<g_VProfCurrentProfile.GetNumCounters(); i++ )
  846. {
  847. if ( g_VProfCurrentProfile.GetCounterGroup( i ) == counterGroup )
  848. {
  849. // strip undesired prefix
  850. pGroupName = g_VProfCurrentProfile.GetCounterName( i );
  851. if ( !stricmp( pGroupName, "texgroup_frame_" ) )
  852. {
  853. pGroupName += 15;
  854. }
  855. else if ( !stricmp( pGroupName, "texgroup_global_" ) )
  856. {
  857. pGroupName += 16;
  858. }
  859. names[numGroups] = pGroupName;
  860. g_VProfCurrentProfile.GetBudgetGroupColor( numGroups, r, g, b, a );
  861. colors[numGroups] = XMAKECOLOR( r, g, b );
  862. numGroups++;
  863. if ( numGroups == XBX_MAX_PROFILE_COUNTERS )
  864. {
  865. break;
  866. }
  867. }
  868. }
  869. // send all the profile attributes
  870. XBX_rSetProfileAttributes( "texture", numGroups, names, colors );
  871. }
  872. }
  873. //-----------------------------------------------------------------------------
  874. // Send the counters to VXConsole
  875. //-----------------------------------------------------------------------------
  876. void CVProfile::VXProfileUpdate()
  877. {
  878. int i;
  879. int counterGroup;
  880. int numGroups;
  881. unsigned int groupData[XBX_MAX_PROFILE_COUNTERS];
  882. // vprof system must be running
  883. if ( m_enabled <= 0 || !m_UpdateMode )
  884. {
  885. return;
  886. }
  887. if ( m_UpdateMode & VPROF_UPDATE_BUDGET )
  888. {
  889. // send the cpu counters
  890. numGroups = g_VProfCurrentProfile.GetNumBudgetGroups();
  891. if ( numGroups > XBX_MAX_PROFILE_COUNTERS )
  892. {
  893. numGroups = XBX_MAX_PROFILE_COUNTERS;
  894. }
  895. memset( groupData, 0, numGroups * sizeof( unsigned int ) );
  896. CVProfNode *pNode = g_VProfCurrentProfile.GetRoot();
  897. if ( pNode && pNode->GetChild() )
  898. {
  899. switch ( m_ReportMode )
  900. {
  901. default:
  902. case VXCONSOLE_REPORT_TIME:
  903. CalcBudgetGroupTimes_Recursive( pNode->GetChild(), groupData, numGroups, m_pReportScale[VXCONSOLE_REPORT_TIME] );
  904. break;
  905. case VXCONSOLE_REPORT_L2CACHE_MISSES:
  906. CalcBudgetGroupL2CacheMisses_Recursive( pNode->GetChild(), groupData, numGroups, m_pReportScale[VXCONSOLE_REPORT_L2CACHE_MISSES] );
  907. break;
  908. case VXCONSOLE_REPORT_LOAD_HIT_STORE:
  909. CalcBudgetGroupLHS_Recursive( pNode->GetChild(), groupData, numGroups, m_pReportScale[VXCONSOLE_REPORT_LOAD_HIT_STORE] );
  910. break;
  911. }
  912. }
  913. XBX_rSetProfileData( "cpu", numGroups, groupData );
  914. }
  915. if ( m_UpdateMode & ( VPROF_UPDATE_TEXTURE_GLOBAL|VPROF_UPDATE_TEXTURE_PERFRAME ) )
  916. {
  917. // send the texture counters
  918. numGroups = 0;
  919. counterGroup = ( m_UpdateMode & VPROF_UPDATE_TEXTURE_GLOBAL ) ? COUNTER_GROUP_TEXTURE_GLOBAL : COUNTER_GROUP_TEXTURE_PER_FRAME;
  920. for ( i = 0; i < g_VProfCurrentProfile.GetNumCounters(); i++ )
  921. {
  922. if ( g_VProfCurrentProfile.GetCounterGroup( i ) == counterGroup )
  923. {
  924. // get the size in bytes
  925. groupData[numGroups++] = g_VProfCurrentProfile.GetCounterValue( i );
  926. if ( numGroups == XBX_MAX_PROFILE_COUNTERS )
  927. {
  928. break;
  929. }
  930. }
  931. }
  932. XBX_rSetProfileData( "texture", numGroups, groupData );
  933. }
  934. }
  935. void CVProfile::VXEnableUpdateMode( int event, bool bEnable )
  936. {
  937. // enable or disable the updating of specified events
  938. if ( bEnable )
  939. {
  940. m_UpdateMode |= event;
  941. }
  942. else
  943. {
  944. m_UpdateMode &= ~event;
  945. }
  946. // force a resend of possibly affected attributes
  947. VXProfileStart();
  948. }
  949. #define MAX_VPROF_NODES_IN_LIST 4096
  950. static void VXBuildNodeList_r( CVProfNode *pNode, xVProfNodeItem_t *pNodeList, int *pNumNodes )
  951. {
  952. if ( !pNode )
  953. {
  954. return;
  955. }
  956. if ( *pNumNodes >= MAX_VPROF_NODES_IN_LIST )
  957. {
  958. // list full
  959. return;
  960. }
  961. // add to list
  962. pNodeList[*pNumNodes].pName = (const char *)pNode->GetName();
  963. pNodeList[*pNumNodes].pBudgetGroupName = g_VProfCurrentProfile.GetBudgetGroupName( pNode->GetBudgetGroupID() );
  964. int r, g, b, a;
  965. g_VProfCurrentProfile.GetBudgetGroupColor( pNode->GetBudgetGroupID(), r, g, b, a );
  966. pNodeList[*pNumNodes].budgetGroupColor = XMAKECOLOR( r, g, b );
  967. pNodeList[*pNumNodes].totalCalls = pNode->GetTotalCalls();
  968. pNodeList[*pNumNodes].inclusiveTime = pNode->GetTotalTime();
  969. pNodeList[*pNumNodes].exclusiveTime = pNode->GetTotalTimeLessChildren();
  970. (*pNumNodes)++;
  971. CVProfNode *nodePtr = pNode->GetSibling();
  972. if ( nodePtr )
  973. {
  974. VXBuildNodeList_r( nodePtr, pNodeList, pNumNodes );
  975. }
  976. nodePtr = pNode->GetChild();
  977. if ( nodePtr )
  978. {
  979. VXBuildNodeList_r( nodePtr, pNodeList, pNumNodes );
  980. }
  981. }
  982. void CVProfile::VXSendNodes( void )
  983. {
  984. Pause();
  985. xVProfNodeItem_t *pNodeList = (xVProfNodeItem_t *)stackalloc( MAX_VPROF_NODES_IN_LIST * sizeof(xVProfNodeItem_t) );
  986. int numNodes = 0;
  987. VXBuildNodeList_r( GetRoot(), pNodeList, &numNodes );
  988. // send to vxconsole
  989. XBX_rVProfNodeList( numNodes, pNodeList );
  990. Resume();
  991. }
  992. #endif
  993. //-------------------------------------
  994. static void DumpSorted( const tchar *pszHeading, double totalTime, bool (*pfnSort)( const TimeSums_t &, const TimeSums_t & ), int maxLen = 999999 )
  995. {
  996. unsigned i;
  997. vector<TimeSums_t> sortedSums;
  998. sortedSums = g_TimeSums;
  999. sort( sortedSums.begin(), sortedSums.end(), pfnSort );
  1000. Msg( _T("%s\n"), pszHeading);
  1001. Msg( _T(" Scope Calls Calls/Frame Time+Child Pct Time Pct Avg/Frame Avg/Call Avg-NoChild Peak\n"));
  1002. Msg( _T(" ---------------------------------------------------- ----------- ----------- ----------- ------ ----------- ------ ----------- ----------- ----------- -----------\n"));
  1003. for ( i = 0; i < sortedSums.size() && i < (unsigned)maxLen; i++ )
  1004. {
  1005. double avg = ( sortedSums[i].calls ) ? sortedSums[i].time / (double)sortedSums[i].calls : 0.0;
  1006. double avgLessChildren = ( sortedSums[i].calls ) ? sortedSums[i].timeLessChildren / (double)sortedSums[i].calls : 0.0;
  1007. Msg( _T(" %52.52s%12d%12.3f%12.3f%7.2f%12.3f%7.2f%12.3f%12.3f%12.3f%12.3f\n"),
  1008. sortedSums[i].pszProfileScope,
  1009. sortedSums[i].calls,
  1010. (float)sortedSums[i].calls / (float)g_TotalFrames,
  1011. sortedSums[i].time,
  1012. min( ( sortedSums[i].time / totalTime ) * 100.0, 100.0 ),
  1013. sortedSums[i].timeLessChildren,
  1014. min( ( sortedSums[i].timeLessChildren / totalTime ) * 100.0, 100.0 ),
  1015. sortedSums[i].time / (float)g_TotalFrames,
  1016. avg,
  1017. avgLessChildren,
  1018. sortedSums[i].peak );
  1019. }
  1020. }
  1021. #if defined( _X360 )
  1022. // Dump information on all nodes with PMC recording
  1023. static void DumpPMC( CVProfNode *pNode, bool &bPrintHeader, uint64 L2thresh = 1, uint64 LHSthresh = 1 )
  1024. {
  1025. if (!pNode) return;
  1026. uint64 l2 = pNode->GetL2CacheMisses();
  1027. uint64 lhs = pNode->GetLoadHitStores();
  1028. if ( l2 > L2thresh &&
  1029. lhs > LHSthresh )
  1030. {
  1031. // met threshold.
  1032. if (bPrintHeader)
  1033. {
  1034. // print header
  1035. Msg( _T("-- 360 PMC information --\n") );
  1036. Msg( _T("Scope L2/call L2/frame LHS/call LHS/frame\n") );
  1037. Msg( _T("---------------------------------------------------- --------- --------- --------- ---------\n") );
  1038. bPrintHeader = false;
  1039. }
  1040. // print
  1041. float calls = pNode->GetTotalCalls();
  1042. float frames = g_TotalFrames;
  1043. Msg( _T("%52.52s %9.2f %9.2f %9.2f %9.2f\n"), pNode->GetName(), l2/calls, l2/frames, lhs/calls, lhs/frames );
  1044. }
  1045. if ( pNode->GetSibling() )
  1046. {
  1047. DumpPMC( pNode->GetSibling(), bPrintHeader, L2thresh, LHSthresh );
  1048. }
  1049. if ( pNode->GetChild() )
  1050. {
  1051. DumpPMC( pNode->GetChild(), bPrintHeader, L2thresh, LHSthresh );
  1052. }
  1053. }
  1054. #endif
  1055. //-------------------------------------
  1056. void CVProfile::OutputReport( int type, const tchar *pszStartNode, int budgetGroupID )
  1057. {
  1058. Msg( _T("******** BEGIN VPROF REPORT ********\n"));
  1059. #ifdef _MSC_VER
  1060. #if (_MSC_VER < 1300)
  1061. Msg( _T(" (note: this report exceeds the output capacity of MSVC debug window. Use console window or console log.) \n"));
  1062. #endif
  1063. #endif
  1064. g_TotalFrames = max( NumFramesSampled() - 1, 1 );
  1065. if ( NumFramesSampled() == 0 || GetTotalTimeSampled() == 0)
  1066. Msg( _T("No samples\n") );
  1067. else
  1068. {
  1069. if ( type & VPRT_SUMMARY )
  1070. {
  1071. Msg( _T("-- Summary --\n") );
  1072. Msg( _T("%d frames sampled for %.2f seconds\n"), g_TotalFrames, GetTotalTimeSampled() / 1000.0 );
  1073. Msg( _T("Average %.2f fps, %.2f ms per frame\n"), 1000.0 / ( GetTotalTimeSampled() / g_TotalFrames ), GetTotalTimeSampled() / g_TotalFrames );
  1074. Msg( _T("Peak %.2f ms frame\n"), GetPeakFrameTime() );
  1075. double timeAccountedFor = 100.0 - ( m_Root.GetTotalTimeLessChildren() / m_Root.GetTotalTime() );
  1076. Msg( _T("%.0f pct of time accounted for\n"), min( 100.0, timeAccountedFor ) );
  1077. Msg( _T("\n") );
  1078. }
  1079. if ( pszStartNode == NULL )
  1080. {
  1081. pszStartNode = GetRoot()->GetName();
  1082. }
  1083. SumTimes( pszStartNode, budgetGroupID );
  1084. // Dump the hierarchy
  1085. if ( type & VPRT_HIERARCHY )
  1086. {
  1087. Msg( _T("-- Hierarchical Call Graph --\n"));
  1088. if ( pszStartNode == NULL )
  1089. g_pStartNode = NULL;
  1090. else
  1091. g_pStartNode = FindNode( GetRoot(), pszStartNode );
  1092. DumpNodes( (!g_pStartNode) ? GetRoot() : g_pStartNode, 0, false );
  1093. Msg( _T("\n") );
  1094. }
  1095. if ( type & VPRT_HIERARCHY_TIME_PER_FRAME_AND_COUNT_ONLY )
  1096. {
  1097. Msg( _T("-- Hierarchical Call Graph --\n"));
  1098. if ( pszStartNode == NULL )
  1099. g_pStartNode = NULL;
  1100. else
  1101. g_pStartNode = FindNode( GetRoot(), pszStartNode );
  1102. DumpNodes( (!g_pStartNode) ? GetRoot() : g_pStartNode, 0, true );
  1103. Msg( _T("\n") );
  1104. }
  1105. int maxLen = ( type & VPRT_LIST_TOP_ITEMS_ONLY ) ? 25 : 999999;
  1106. if ( type & VPRT_LIST_BY_TIME )
  1107. {
  1108. DumpSorted( _T("-- Profile scopes sorted by time (including children) --"), GetTotalTimeSampled(), TimeCompare, maxLen );
  1109. Msg( _T("\n") );
  1110. }
  1111. if ( type & VPRT_LIST_BY_TIME_LESS_CHILDREN )
  1112. {
  1113. DumpSorted( _T("-- Profile scopes sorted by time (without children) --"), GetTotalTimeSampled(), TimeLessChildrenCompare, maxLen );
  1114. Msg( _T("\n") );
  1115. }
  1116. if ( type & VPRT_LIST_BY_AVG_TIME )
  1117. {
  1118. DumpSorted( _T("-- Profile scopes sorted by average time (including children) --"), GetTotalTimeSampled(), AverageTimeCompare, maxLen );
  1119. Msg( _T("\n") );
  1120. }
  1121. if ( type & VPRT_LIST_BY_AVG_TIME_LESS_CHILDREN )
  1122. {
  1123. DumpSorted( _T("-- Profile scopes sorted by average time (without children) --"), GetTotalTimeSampled(), AverageTimeLessChildrenCompare, maxLen );
  1124. Msg( _T("\n") );
  1125. }
  1126. if ( type & VPRT_LIST_BY_PEAK_TIME )
  1127. {
  1128. DumpSorted( _T("-- Profile scopes sorted by peak --"), GetTotalTimeSampled(), PeakCompare, maxLen);
  1129. Msg( _T("\n") );
  1130. }
  1131. if ( type & VPRT_LIST_BY_PEAK_OVER_AVERAGE )
  1132. {
  1133. DumpSorted( _T("-- Profile scopes sorted by peak over average (including children) --"), GetTotalTimeSampled(), PeakOverAverageCompare, maxLen );
  1134. Msg( _T("\n") );
  1135. }
  1136. // TODO: Functions by time less children
  1137. // TODO: Functions by time averages
  1138. // TODO: Functions by peak
  1139. // TODO: Peak deviation from average
  1140. g_TimesLessChildren.clear();
  1141. g_TimeSumsMap.clear();
  1142. g_TimeSums.clear();
  1143. #ifdef _X360
  1144. bool bPrintedHeader = true;
  1145. DumpPMC( FindNode( GetRoot(), pszStartNode ), bPrintedHeader );
  1146. #endif
  1147. }
  1148. Msg( _T("******** END VPROF REPORT ********\n"));
  1149. }
  1150. //=============================================================================
  1151. CVProfile::CVProfile()
  1152. : m_Root( _T("Root"), 0, NULL, VPROF_BUDGETGROUP_OTHER_UNACCOUNTED, 0 ),
  1153. m_pCurNode( &m_Root ),
  1154. m_nFrames( 0 ),
  1155. m_enabled( 0 ), // don't change this. if m_enabled is anything but zero coming out of this constructor, vprof will break.
  1156. m_pausedEnabledDepth( 0 ),
  1157. m_fAtRoot( true )
  1158. {
  1159. #ifdef VPROF_VTUNE_GROUP
  1160. m_GroupIDStackDepth = 1;
  1161. m_GroupIDStack[0] = 0; // VPROF_BUDGETGROUP_OTHER_UNACCOUNTED
  1162. #endif
  1163. m_TargetThreadId = ThreadGetCurrentId();
  1164. // Go ahead and allocate 32 slots for budget group names
  1165. MEM_ALLOC_CREDIT();
  1166. m_pBudgetGroups = new CVProfile::CBudgetGroup[32];
  1167. m_nBudgetGroupNames = 0;
  1168. m_nBudgetGroupNamesAllocated = 32;
  1169. // Add these here so that they will always be in the same order.
  1170. // VPROF_BUDGETGROUP_OTHER_UNACCOUNTED has to be FIRST!!!!
  1171. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_OTHER_UNACCOUNTED, BUDGETFLAG_OTHER | BUDGETFLAG_SERVER );
  1172. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_WORLD_RENDERING, BUDGETFLAG_CLIENT );
  1173. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_DISPLACEMENT_RENDERING, BUDGETFLAG_CLIENT );
  1174. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_GAME, BUDGETFLAG_OTHER | BUDGETFLAG_SERVER );
  1175. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_PLAYER, BUDGETFLAG_OTHER | BUDGETFLAG_SERVER );
  1176. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_NPCS, BUDGETFLAG_OTHER | BUDGETFLAG_SERVER );
  1177. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_SERVER_ANIM, BUDGETFLAG_OTHER | BUDGETFLAG_SERVER );
  1178. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_CLIENT_ANIMATION, BUDGETFLAG_CLIENT );
  1179. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_PHYSICS, BUDGETFLAG_OTHER | BUDGETFLAG_SERVER );
  1180. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_STATICPROP_RENDERING, BUDGETFLAG_CLIENT );
  1181. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_MODEL_RENDERING, BUDGETFLAG_CLIENT );
  1182. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_MODEL_FAST_PATH_RENDERING,BUDGETFLAG_CLIENT );
  1183. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_LIGHTCACHE, BUDGETFLAG_CLIENT );
  1184. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_BRUSHMODEL_RENDERING, BUDGETFLAG_CLIENT );
  1185. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_SHADOW_RENDERING, BUDGETFLAG_CLIENT );
  1186. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_DETAILPROP_RENDERING, BUDGETFLAG_CLIENT );
  1187. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_PARTICLE_RENDERING, BUDGETFLAG_CLIENT );
  1188. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_ROPES, BUDGETFLAG_CLIENT );
  1189. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_DLIGHT_RENDERING, BUDGETFLAG_CLIENT );
  1190. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_OTHER_NETWORKING, BUDGETFLAG_OTHER | BUDGETFLAG_SERVER );
  1191. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_OTHER_SOUND, BUDGETFLAG_OTHER | BUDGETFLAG_SERVER );
  1192. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_OTHER_VGUI, BUDGETFLAG_OTHER | BUDGETFLAG_SERVER );
  1193. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_OTHER_FILESYSTEM, BUDGETFLAG_OTHER | BUDGETFLAG_SERVER );
  1194. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_PREDICTION, BUDGETFLAG_CLIENT );
  1195. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_INTERPOLATION, BUDGETFLAG_CLIENT );
  1196. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_SWAP_BUFFERS, BUDGETFLAG_CLIENT );
  1197. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_OCCLUSION, BUDGETFLAG_CLIENT );
  1198. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_OVERLAYS, BUDGETFLAG_CLIENT );
  1199. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_TOOLS, BUDGETFLAG_OTHER | BUDGETFLAG_CLIENT );
  1200. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_TEXTURE_CACHE, BUDGETFLAG_CLIENT );
  1201. BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_REPLAY, BUDGETFLAG_SERVER );
  1202. // BudgetGroupNameToBudgetGroupID( VPROF_BUDGETGROUP_DISP_HULLTRACES );
  1203. m_bPMEInit = false;
  1204. m_bPMEEnabled = false;
  1205. #ifdef VPROF_VXCONSOLE_EXISTS
  1206. m_bTraceCompleteEvent = false;
  1207. m_iSuccessiveTraceIndex = 0;
  1208. m_ReportMode = VXCONSOLE_REPORT_TIME;
  1209. m_pReportScale[VXCONSOLE_REPORT_TIME] = 1000.0f;
  1210. m_pReportScale[VXCONSOLE_REPORT_L2CACHE_MISSES] = 1.0f;
  1211. m_pReportScale[VXCONSOLE_REPORT_LOAD_HIT_STORE] = 0.1f;
  1212. m_nFrameCount = 0;
  1213. m_nFramesRemaining = 1;
  1214. m_WorstCycles = 0;
  1215. m_WorstTraceFilename[ 0 ] = 0;
  1216. m_UpdateMode = 0;
  1217. #endif
  1218. #ifdef _X360
  1219. m_iCPUTraceEnabled = kDisabled;
  1220. #endif
  1221. }
  1222. CVProfile::~CVProfile()
  1223. {
  1224. Term();
  1225. }
  1226. void CVProfile::FreeNodes_R( CVProfNode *pNode )
  1227. {
  1228. CVProfNode *pNext;
  1229. for ( CVProfNode *pChild = pNode->GetChild(); pChild; pChild = pNext )
  1230. {
  1231. pNext = pChild->GetSibling();
  1232. FreeNodes_R( pChild );
  1233. }
  1234. if ( pNode == GetRoot() )
  1235. {
  1236. pNode->m_pChild = NULL;
  1237. }
  1238. else
  1239. {
  1240. delete pNode;
  1241. }
  1242. }
  1243. void CVProfile::Term()
  1244. {
  1245. int i;
  1246. for( i = 0; i < m_nBudgetGroupNames; i++ )
  1247. {
  1248. delete [] m_pBudgetGroups[i].m_pName;
  1249. }
  1250. delete m_pBudgetGroups;
  1251. m_nBudgetGroupNames = m_nBudgetGroupNamesAllocated = 0;
  1252. m_pBudgetGroups = NULL;
  1253. int n;
  1254. for( n = 0; n < m_NumCounters; n++ )
  1255. {
  1256. delete [] m_CounterNames[n];
  1257. m_CounterNames[n] = NULL;
  1258. }
  1259. m_NumCounters = 0;
  1260. // Free the nodes.
  1261. if ( GetRoot() )
  1262. {
  1263. FreeNodes_R( GetRoot() );
  1264. }
  1265. }
  1266. #define COLORMIN 160
  1267. #define COLORMAX 255
  1268. static int g_ColorLookup[4] =
  1269. {
  1270. COLORMIN,
  1271. COLORMAX,
  1272. COLORMIN+(COLORMAX-COLORMIN)/3,
  1273. COLORMIN+((COLORMAX-COLORMIN)*2)/3,
  1274. };
  1275. #define GET_BIT( val, bitnum ) ( ( val >> bitnum ) & 0x1 )
  1276. void CVProfile::GetBudgetGroupColor( int budgetGroupID, int &r, int &g, int &b, int &a )
  1277. {
  1278. budgetGroupID = budgetGroupID % ( 1 << 6 );
  1279. int index;
  1280. index = GET_BIT( budgetGroupID, 0 ) | ( GET_BIT( budgetGroupID, 5 ) << 1 );
  1281. r = g_ColorLookup[index];
  1282. index = GET_BIT( budgetGroupID, 1 ) | ( GET_BIT( budgetGroupID, 4 ) << 1 );
  1283. g = g_ColorLookup[index];
  1284. index = GET_BIT( budgetGroupID, 2 ) | ( GET_BIT( budgetGroupID, 3 ) << 1 );
  1285. b = g_ColorLookup[index];
  1286. a = 255;
  1287. }
  1288. // return -1 if it doesn't exist.
  1289. int CVProfile::FindBudgetGroupName( const tchar *pBudgetGroupName )
  1290. {
  1291. int i;
  1292. for( i = 0; i < m_nBudgetGroupNames; i++ )
  1293. {
  1294. if( _tcsicmp( pBudgetGroupName, m_pBudgetGroups[i].m_pName ) == 0 )
  1295. {
  1296. return i;
  1297. }
  1298. }
  1299. return -1;
  1300. }
  1301. int CVProfile::AddBudgetGroupName( const tchar *pBudgetGroupName, int budgetFlags )
  1302. {
  1303. MEM_ALLOC_CREDIT();
  1304. tchar *pNewString = new tchar[ _tcslen( pBudgetGroupName ) + 1 ];
  1305. _tcscpy( pNewString, pBudgetGroupName );
  1306. if( m_nBudgetGroupNames + 1 > m_nBudgetGroupNamesAllocated )
  1307. {
  1308. m_nBudgetGroupNamesAllocated *= 2;
  1309. m_nBudgetGroupNamesAllocated = max( m_nBudgetGroupNames + 6, m_nBudgetGroupNamesAllocated );
  1310. CBudgetGroup *pNew = new CBudgetGroup[ m_nBudgetGroupNamesAllocated ];
  1311. for ( int i=0; i < m_nBudgetGroupNames; i++ )
  1312. pNew[i] = m_pBudgetGroups[i];
  1313. delete [] m_pBudgetGroups;
  1314. m_pBudgetGroups = pNew;
  1315. }
  1316. m_pBudgetGroups[m_nBudgetGroupNames].m_pName = pNewString;
  1317. m_pBudgetGroups[m_nBudgetGroupNames].m_BudgetFlags = budgetFlags;
  1318. m_nBudgetGroupNames++;
  1319. if( m_pNumBudgetGroupsChangedCallBack )
  1320. {
  1321. (*m_pNumBudgetGroupsChangedCallBack)();
  1322. }
  1323. #if defined( VPROF_VXCONSOLE_EXISTS )
  1324. // re-start with all the known budgets
  1325. VXProfileStart();
  1326. #endif
  1327. return m_nBudgetGroupNames - 1;
  1328. }
  1329. int CVProfile::BudgetGroupNameToBudgetGroupID( const tchar *pBudgetGroupName, int budgetFlagsToORIn )
  1330. {
  1331. int budgetGroupID = FindBudgetGroupName( pBudgetGroupName );
  1332. if( budgetGroupID == -1 )
  1333. {
  1334. budgetGroupID = AddBudgetGroupName( pBudgetGroupName, budgetFlagsToORIn );
  1335. }
  1336. else
  1337. {
  1338. m_pBudgetGroups[budgetGroupID].m_BudgetFlags |= budgetFlagsToORIn;
  1339. }
  1340. return budgetGroupID;
  1341. }
  1342. int CVProfile::BudgetGroupNameToBudgetGroupID( const tchar *pBudgetGroupName )
  1343. {
  1344. return BudgetGroupNameToBudgetGroupID( pBudgetGroupName, BUDGETFLAG_OTHER );
  1345. }
  1346. int CVProfile::GetNumBudgetGroups( void )
  1347. {
  1348. return m_nBudgetGroupNames;
  1349. }
  1350. void CVProfile::RegisterNumBudgetGroupsChangedCallBack( void (*pCallBack)(void) )
  1351. {
  1352. m_pNumBudgetGroupsChangedCallBack = pCallBack;
  1353. }
  1354. void CVProfile::HideBudgetGroup( int budgetGroupID, bool bHide )
  1355. {
  1356. if( budgetGroupID != -1 )
  1357. {
  1358. if ( bHide )
  1359. m_pBudgetGroups[budgetGroupID].m_BudgetFlags |= BUDGETFLAG_HIDDEN;
  1360. else
  1361. m_pBudgetGroups[budgetGroupID].m_BudgetFlags &= ~BUDGETFLAG_HIDDEN;
  1362. }
  1363. }
  1364. int *CVProfile::FindOrCreateCounter( const tchar *pName, CounterGroup_t eCounterGroup )
  1365. {
  1366. Assert( m_NumCounters+1 < MAXCOUNTERS );
  1367. if ( m_NumCounters + 1 >= MAXCOUNTERS || !InTargetThread() )
  1368. {
  1369. static int dummy;
  1370. return &dummy;
  1371. }
  1372. int i;
  1373. for( i = 0; i < m_NumCounters; i++ )
  1374. {
  1375. if( _tcsicmp( m_CounterNames[i], pName ) == 0 )
  1376. {
  1377. // found it!
  1378. return &m_Counters[i];
  1379. }
  1380. }
  1381. // NOTE: These get freed in ~CVProfile.
  1382. MEM_ALLOC_CREDIT();
  1383. tchar *pNewName = new tchar[_tcslen( pName ) + 1];
  1384. _tcscpy( pNewName, pName );
  1385. m_Counters[m_NumCounters] = 0;
  1386. m_CounterGroups[m_NumCounters] = (char)eCounterGroup;
  1387. m_CounterNames[m_NumCounters++] = pNewName;
  1388. return &m_Counters[m_NumCounters-1];
  1389. }
  1390. void CVProfile::ResetCounters( CounterGroup_t eCounterGroup )
  1391. {
  1392. int i;
  1393. for( i = 0; i < m_NumCounters; i++ )
  1394. {
  1395. if ( m_CounterGroups[i] == eCounterGroup )
  1396. m_Counters[i] = 0;
  1397. }
  1398. }
  1399. int CVProfile::GetNumCounters() const
  1400. {
  1401. return m_NumCounters;
  1402. }
  1403. const tchar *CVProfile::GetCounterName( int index ) const
  1404. {
  1405. Assert( index >= 0 && index < m_NumCounters );
  1406. return m_CounterNames[index];
  1407. }
  1408. int CVProfile::GetCounterValue( int index ) const
  1409. {
  1410. Assert( index >= 0 && index < m_NumCounters );
  1411. return m_Counters[index];
  1412. }
  1413. const tchar *CVProfile::GetCounterNameAndValue( int index, int &val ) const
  1414. {
  1415. Assert( index >= 0 && index < m_NumCounters );
  1416. val = m_Counters[index];
  1417. return m_CounterNames[index];
  1418. }
  1419. CounterGroup_t CVProfile::GetCounterGroup( int index ) const
  1420. {
  1421. Assert( index >= 0 && index < m_NumCounters );
  1422. return (CounterGroup_t)m_CounterGroups[index];
  1423. }
  1424. #ifdef _X360
  1425. void CVProfile::LatchMultiFrame( int64 cycles )
  1426. {
  1427. if ( cycles > m_WorstCycles )
  1428. {
  1429. strncpy( m_WorstTraceFilename, GetCPUTraceFilename(), sizeof( m_WorstTraceFilename ) );
  1430. m_WorstCycles = cycles;
  1431. }
  1432. }
  1433. void CVProfile::SpewWorstMultiFrame()
  1434. {
  1435. CCycleCount cc( m_WorstCycles );
  1436. Msg( "%s == %.3f msec\n", m_WorstTraceFilename, cc.GetMillisecondsF() );
  1437. }
  1438. #endif
  1439. #ifdef DBGFLAG_VALIDATE
  1440. #ifdef _WIN64
  1441. #error the below is presumably broken on 64 bit
  1442. #endif // _WIN64
  1443. const int k_cSTLMapAllocOffset = 4;
  1444. #define GET_INTERNAL_MAP_ALLOC_PTR( pMap ) \
  1445. ( * ( (void **) ( ( ( byte * ) ( pMap ) ) + k_cSTLMapAllocOffset ) ) )
  1446. //-----------------------------------------------------------------------------
  1447. // Purpose: Ensure that all of our internal structures are consistent, and
  1448. // account for all memory that we've allocated.
  1449. // Input: validator - Our global validator object
  1450. // pchName - Our name (typically a member var in our container)
  1451. //-----------------------------------------------------------------------------
  1452. void CVProfile::Validate( CValidator &validator, tchar *pchName )
  1453. {
  1454. validator.Push( _T("CVProfile"), this, pchName );
  1455. m_Root.Validate( validator, _T("m_Root") );
  1456. for ( int iBudgetGroup=0; iBudgetGroup < m_nBudgetGroupNames; iBudgetGroup++ )
  1457. validator.ClaimMemory( m_pBudgetGroups[iBudgetGroup].m_pName );
  1458. validator.ClaimMemory( m_pBudgetGroups );
  1459. // The std template map class allocates memory internally, but offer no way to get
  1460. // access to their pointer. Since this is for debug purposes only and the
  1461. // std template classes don't change, just look at the well-known offset. This
  1462. // is arguably sick and wrong, kind of like marrying a squirrel.
  1463. validator.ClaimMemory( GET_INTERNAL_MAP_ALLOC_PTR( &g_TimesLessChildren ) );
  1464. validator.ClaimMemory( GET_INTERNAL_MAP_ALLOC_PTR( &g_TimeSumsMap ) );
  1465. validator.Pop( );
  1466. }
  1467. #endif // DBGFLAG_VALIDATE
  1468. #endif