//========= Copyright © 1996-2004, Valve LLC, All rights reserved. ============ // // Purpose: // // $NoKeywords: $ //============================================================================= #ifndef GC_JOBMGR_H #define GC_JOBMGR_H #ifdef _WIN32 #pragma once #endif #include "tier0/fasttimer.h" #include "tier1/utlpriorityqueue.h" #include "tier1/utlhashmaplarge.h" #include "tier1/utlpair.h" #include "job.h" #include "workthreadpool.h" class GCConVar; #include "tier0/memdbgon.h" namespace GCSDK { #if defined(_DEBUG) // this is restricted to debug builds due to the performance cost // that could be changed by removing the expensive sm_listAllJobs.Find() command #define DEBUG_JOB_LIST #endif // defined(_DEBUG) struct JobStats_t { uint m_cJobsCurrent; uint m_cJobsTotal; uint m_cJobsFailed; uint64 m_cJobsTimedOut; // # of jobs timed out ever double m_flSumJobTimeMicrosec; uint64 m_unMaxJobTimeMicrosec; uint m_cTimeslices; JobStats_t() { memset( this, 0, sizeof(JobStats_t) ); } }; struct JobStatsBucket_t { JobStatsBucket_t() { memset( this, 0, sizeof(JobStatsBucket_t) ); } char m_rgchName[64]; uint64 m_cCompletes; uint64 m_u64RunTimeMax; uint64 m_cTimeoutNetMsg; uint64 m_cLongInterYieldTime; uint64 m_cLocksAttempted; uint64 m_cLocksWaitedFor; uint64 m_cLocksFailed; uint64 m_cLocksLongHeld; uint64 m_cLocksLongWait; uint64 m_cWaitTimeout; uint64 m_u64JobDuration; uint64 m_cJobsPaused; uint64 m_cJobsFailed; uint64 m_u64RunTime; // use by ListJobs uint64 m_cPauseReasonNetworkMsg; uint64 m_cPauseReasonSleepForTime; uint64 m_cPauseReasonWaitingForLock; uint64 m_cPauseReasonYield; uint64 m_cPauseReasonSQL; uint64 m_cPauseReasonWorkItem; }; enum EJobProfileAction { k_EJobProfileAction_ErrorReport = 0, k_EJobProfileAction_Start = 1, k_EJobProfileAction_Stop = 2, k_EJobProfileAction_Dump = 3, k_EJobProfileAction_Clear = 4, }; enum EJobProfileSortOrder { k_EJobProfileSortOrder_Alpha = 0, k_EJobProfileSortOrder_Count = 1, k_EJobProfileSortOrder_TotalRuntime = 2, }; //the various msg contexts that exist for the GC enum EGCMsgContext { k_eGCMsgContext_None = 0, // This message cannot be called by anyone. This is intended only to be used by messages that are temporarily disabled but left in the codebase for some reason k_eGCMsgContext_Client = (1<<0), // This message is expected to arrive from a player/client k_eGCMsgContext_UnsecuredServer = (1<<1), // This message should come from a dedicated server, but one that is not securely managed and should not be trusted k_eGCMsgContext_TrustedServer = (1<<2), // This message should come from a dedicated server, but one that is securely managed k_eGCMsgContext_System = (1<<3), // This message should come from the GCH system k_eGCMsgContext_OtherGC = (1<<4), // This message should arrive from another sub GC or the master GC k_eGCMsgContext_AllServers = k_eGCMsgContext_UnsecuredServer | k_eGCMsgContext_TrustedServer, // a message from any server k_eGCMsgContext_All = k_eGCMsgContext_Client | k_eGCMsgContext_TrustedServer | k_eGCMsgContext_UnsecuredServer | k_eGCMsgContext_System | k_eGCMsgContext_OtherGC }; struct JobProfileStats_t { int m_iJobProfileSort; CUtlHashMapLarge< uint32, JobStatsBucket_t > *pmapStatsBucket; }; //----------------------------------------------------------------------------- // Purpose: This keeps track of all jobs that belong to a given hub. // It's primarily used for routing incoming messages to jobs. //----------------------------------------------------------------------------- class CJobMgr { public: // Constructors & destructors CJobMgr(); ~CJobMgr(); // gets the next available job ID JobID_t GetNewJobID(); // Set the thread count for the internal thread pool(s) void SetThreadPoolSize( uint cThreads ); // Run any sleeping jobs who's wakeup time has arrived and check for timeouts bool BFrameFuncRunSleepingJobs( CLimitTimer &limitTimer ); // Run any yielding jobs, even low priority ones bool BFrameFuncRunYieldingJobs( CLimitTimer &limitTimer ); // Route this message to an existing Job, or create a new one if that JobID does not exist. This takes the context that the message // is being created within so that it can validate that the message is being called from the right context. If the context doesn't overlap, // it will fail to create the job bool BRouteMsgToJob( void *pParent, IMsgNetPacket *pNetPacket, const JobMsgInfo_t &jobMsgInfo, EGCMsgContext nCreateContext = k_eGCMsgContext_All ); // Adds a new Job to the mgr and generates a JobID for it. void InsertJob( CJob &job ); // Removes a Job from the mgr (the caller is still responsible for freeing it) void RemoveJob( CJob &job ); //called by a job that has just been started to place itself on the yield queue instead of running void AddDelayedJobToYieldList( CJob &job ); #ifdef GC //given a message, this will determine if there is any custom routing associated with the message. It will return a function pointer //if custom routing is found, or NULL otherwise JobRoutingFunc_t GetRoutingForMsg( IMsgNetPacket *pNetPacket ); JobRoutingFunc_t GetRoutingForMsg( MsgType_t eMsg ); // resumes the specified job if it is, in fact, waiting for a SQL query to return bool BResumeSQLJob( JobID_t jobID ); // yields waiting for a query response bool BYieldingRunQuery( CJob &job, CGCSQLQueryGroup *pQueryGroup, ESchemaCatalog eSchemaCatalog ); //stats collected for SQL queries that run long struct LongSQLStats_t { //the string for this query CUtlString m_sQuery; //total amount of time spent in over limit queries uint32 m_nTotalTimeMS; uint32 m_nNumQueries; //the parameters for the longest running query uint32 m_nMaxQueryMS; CUtlString m_sMaxQueryParams; //a collection of N parameters that also exceeded the time limit static const uint32 knMaxParams = 8; CUtlVectorFixed< CUtlString, knMaxParams > m_Params; }; //access to the mapfor all the long running queries typedef CUtlHashMapLarge< const char*, LongSQLStats_t*, CaseSensitiveStrEquals, MurmurHash3ConstCharPtr > TLongSQLMap; TLongSQLMap& GetLongSQLMap() { return m_LongSqlMap; } // SQL profiling enum ESQLProfileSort { k_ESQLProfileSortTotalTime, k_ESQLProfileSortTotalCount, k_ESQLProfileSortAvgTime, k_ESQLProfileSortName }; void StartSQLProfiling(); void StopSQLProfiling(); void DumpSQLProfile( ESQLProfileSort eSort ); uint32 GetNumSQLQueriesInFlight() const { return m_mapSQLQueriesInFlight.Count(); } #endif // returns true if we're running any jobs of the specified name // slow to call if lots of jobs are running, should only be used by tests bool BIsJobRunning( const char *pchJobName ); // passes a network msg directly to the specified job void PassMsgToJob( CJob &job, IMsgNetPacket *pNetPacket, const JobMsgInfo_t &jobMsgInfo ); // Enter an advisory do-not-yield scope with line-and-file string for debugging void PushDoNotYield( CJob &job, const char *pchFileAndLine ); // Exit the innermost do-not-yield scope which matches this job void PopDoNotYield( CJob &job ); // yields until a network message is received bool BYieldingWaitForMsg( CJob &job ); // yields for a set amount of time bool BYieldingWaitTime( CJob &job, uint32 cMicrosecondsToSleep ); // simple yield until Run() called again bool BYield( CJob &job ); // Yield only if job manager decides we need to bool BYieldIfNeeded( CJob &job, bool *pbYielded ); // Thread pool work item bool BYieldingWaitForWorkItem( CJob &job, const char *pszWorkItemName = NULL ); bool BRouteWorkItemCompleted( JobID_t jobID, bool bWorkItemCanceled ) { return BRouteWorkItemCompletedInternal( jobID, bWorkItemCanceled, /* bShouldExist */ true, /* bResumeImmediately */ true ); } bool BRouteWorkItemCompletedIfExists( JobID_t jobID, bool bWorkItemCanceled ) { return BRouteWorkItemCompletedInternal( jobID, bWorkItemCanceled, /* bShouldExist */ false, /* bResumeImmediately */ true ); } bool BRouteWorkItemCompletedDelayed( JobID_t jobID, bool bWorkItemCanceled ) { return BRouteWorkItemCompletedInternal( jobID, bWorkItemCanceled, /* bShouldExist */ true, /* bResumeImmediately */ false ); } bool BRouteWorkItemCompletedIfExistsDelayed( JobID_t jobID, bool bWorkItemCanceled ) { return BRouteWorkItemCompletedInternal( jobID, bWorkItemCanceled, /* bShouldExist */ false, /* bResumeImmediately */ false ); } void AddThreadedJobWorkItem( CWorkItem *pWorkItem ); void StopWorkThreads() { m_WorkThreadPool.StopWorkThreads(); } static int ProfileSortFunc( void *pCtx, const int *lhs, const int *rhs ); void ProfileJobs( EJobProfileAction ejobProfileAction, EJobProfileSortOrder iSortOrder = k_EJobProfileSortOrder_Alpha ); int DumpJobSummary(); void DumpJob( JobID_t jobID, int nPrintLocksMax = 20 ) const; int CountJobs() const; // counts currently active jobs void CheckThreadID(); // make sure we are still in the correct thread int CountYieldingJobs() const { return m_ListJobsYieldingRegPri.Count(); } // counts jobs currently in a yielding state bool HasOutstandingThreadPoolWorkItems(); void SetIsShuttingDown( bool bIsShuttingDown ) { m_bIsShuttingDown = bIsShuttingDown; } bool GetIsShuttingDown() const { return m_bIsShuttingDown; } void *GetMainMemoryDebugInfo() { return g_memMainDebugInfo.Base(); } // wakes up a job that was waiting on a lock void WakeupLockedJob( CJob &job ); // returns true if there is a job active with the specified ID bool BJobExists( JobID_t jobID ) const; // returns a job CJob *GetPJob( JobID_t jobID ); const CJob *GetPJob( JobID_t jobID ) const; JobStats_t& GetJobStats() { return m_JobStats; } // Access work thread pool directly CWorkThreadPool *AccessWorkThreadPool() { return &m_WorkThreadPool; } // Debug helpers // dumps a list of all running jobs across ALL job managers void DumpJobs( const char *pszJobName, int nMax, int nPrintLocksMax = 1 ) const; // cause a debug break in the given job static void DebugJob( int iJob ); //sets a whitelist of messages that should be exempt from context filtering. This is meant only to be a utility during development to address //messages that were misclassified until a fix can be deployed void SetMsgContextWhitelist( const CUtlVector< MsgType_t >& msgList ) { m_MsgContextWhitelist = msgList; } private: //given a message, this will find the appropriate job type structure for handling this message const JobType_t* GetJobInfoForMsg( IMsgNetPacket *pNetPacket ) const; bool BRouteWorkItemCompletedInternal( JobID_t jobID, bool bWorkItemCanceled, bool bShouldExist, bool bResumeImmediately ); // Create a new job for this message bool BLaunchJobFromNetworkMsg( void *pParent, const JobMsgInfo_t &jobMsgInfo, IMsgNetPacket *pNetPacket, EGCMsgContext nCreateContext ); // Internal add to yield list (looks at priority) void AddToYieldList( CJob &job ); // Get an IJob given a job ID and pause reason bool BGetIJob( JobID_t jobID, EJobPauseReason eJobPauseReason, bool bShouldExist, int *pIJob ); // Map containing all of our jobs CUtlHashMapLarge m_MapJob; // jobs simply waiting until the next Run() struct JobYielding_t { JobID_t m_JobID; uint m_nIteration; }; CUtlLinkedList m_ListJobsYieldingRegPri; bool BResumeYieldingJobs( CLimitTimer &limitTimer ); bool BResumeYieldingJobsFromList( CUtlLinkedList &listJobsYielding, uint nCurrentIteration, CLimitTimer &limitTimer ); uint m_nCurrentYieldIterationRegPri; // jobs waiting on a timer struct JobSleeping_t { JobID_t m_JobID; CJobTime m_SWakeupTime; CJobTime m_STimeTouched; }; CUtlPriorityQueue m_QueueJobSleeping; bool BResumeSleepingJobs( CLimitTimer &limitTimer ); static bool JobSleepingLessFunc( JobSleeping_t const &lhs, JobSleeping_t const &rhs ); // timeout list of jobs, ordered from oldest to newest struct JobTimeout_t { JobID_t m_JobID; CJobTime m_STimePaused; CJobTime m_STimeTouched; uint32 m_cHeartbeatsBeforeTimeout; }; CUtlLinkedList m_ListJobTimeouts; CUtlHashMapLarge< JobID_t, int > m_MapJobTimeoutsIndexByJobID; void PauseJob( CJob &job, EJobPauseReason eJobPauseReason, const char *pszPauseResourceName = NULL ); void CheckForJobTimeouts( CLimitTimer &limitTimer ); void TimeoutJob( CJob &job ); bool m_bJobTimedOut; // thread pool usage, for running job functions in other threads CWorkThreadPool m_WorkThreadPool; void AccumulateStatsofJob( CJob &job ); void RecordOrphanedMessage( MsgType_t eMsg, JobID_t jobIDTarget ); // stats info JobStats_t m_JobStats; // static job registration static void RegisterJobType( const JobType_t *pJobType ); friend void Job_RegisterJobType( const JobType_t *pJobType ); JobID_t m_unNextJobID; uint m_unFrameFuncThreadID; // the thread is JobMgr is working in bool m_bProfiling; bool m_bIsShuttingDown; int m_cErrorsToReport; CUtlHashMapLarge< uint32, JobStatsBucket_t > m_mapStatsBucket; CUtlHashMapLarge< MsgType_t, int > m_mapOrphanMessages; CUtlMemory g_memMainDebugInfo; //a white list of messages to ignore context issues with CUtlVector< MsgType_t > m_MsgContextWhitelist; #ifdef GC // sql profiling bool m_bSQLProfiling; CFastTimer m_sqlTimer; // long SQL utilities TLongSQLMap m_LongSqlMap; struct PendingSQLJob_t { uint64 m_nStartMicrosec; CGCSQLQueryGroup *m_pQueryGroup; int32 m_iBucket; }; //utility function that given a pending SQL query will handle displaying it out to the console along with all of its parameters. Useful //for debugging and monitoring void DisplayPendingSQLJob( const PendingSQLJob_t& sqlJob ); struct SQLProfileBucket_t { uint64 m_nTotalMicrosec; uint32 m_unCount; uint32 m_unMaxMS; }; CUtlHashMapLarge m_mapSQLQueriesInFlight; CUtlDict m_dictSQLBuckets; struct SQLProfileCtx_t { ESQLProfileSort m_eSort; CUtlDict *pdictBuckets; }; static int SQLProfileSortFunc( void *pCtx, const int *lhs, const int *rhs ); #endif #ifdef DEBUG_JOB_LIST // static job debug list static CUtlLinkedList sm_listAllJobs; #endif // A stack of do not yield guards so we can print the right warning if they're nested // (Note that we also track jobid since it is not an error to start another job which // executes and immediately yields back to the parent, even if the parent can't yield) CUtlVector< CUtlPair< JobID_t, const char * > > m_stackDoNotYieldGuards; }; //----------------------------------------------------------------------------- // Purpose: passthrough function just so the CJob internal data can be kept private //----------------------------------------------------------------------------- inline void Job_RegisterJobType( const JobType_t *pJobType ) { CJobMgr::RegisterJobType( pJobType ); } //----------------------------------------------------------------------------- // Purpose: passthrough function just so the CJob internal data can be kept private //----------------------------------------------------------------------------- inline void Job_SetJobType( CJob &job, const JobType_t *pJobType ) { job.m_pJobType = pJobType; } //----------------------------------------------------------------------------- // Purpose: job registration macro //----------------------------------------------------------------------------- #define GC_REG_JOB_FULL( parentclass, jobclass, jobname, msg, contexts, routingfn, consolevar ) \ GCSDK::CJob *CreateJob_##jobclass( parentclass *pParent, void * pvStartParam ); \ static const GCSDK::JobType_t g_JobType_##jobclass = { jobname, (GCSDK::MsgType_t)msg, contexts, (GCSDK::JobCreationFunc_t)CreateJob_##jobclass, routingfn, consolevar }; \ GCSDK::CJob *CreateJob_##jobclass( parentclass *pParent, void * pvStartParam ) \ { \ GCSDK::CJob *job = GCSDK::CJob::AllocateJob( pParent ); \ if ( job ) \ { \ Job_SetJobType( *job, &g_JobType_##jobclass ); \ if ( pvStartParam ) job->SetStartParam( pvStartParam ); \ } \ else \ { \ AssertMsg( job, "CJob::AllocateJob<" #jobclass "> returned NULL!" ); \ } \ return job; \ } \ static class CRegJob_##jobclass \ { \ public: CRegJob_##jobclass() \ { \ Job_RegisterJobType( &g_JobType_##jobclass ); \ } \ } g_RegJob_##jobclass; #define GC_REG_JOB( parentclass, jobclass, jobname, msg ) \ GC_REG_JOB_FULL( parentclass, jobclass, jobname, msg, GCSDK::k_eGCMsgContext_All, NULL, NULL ) } // namespace GCSDK #include "tier0/memdbgoff.h" #endif // GC_JOBMGR_H