Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

602 lines
16 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. //=============================================================================
  6. #include "vmpi.h"
  7. #include "vmpi_distribute_work.h"
  8. #include "tier0/platform.h"
  9. #include "tier0/dbg.h"
  10. #include "utlvector.h"
  11. #include "utllinkedlist.h"
  12. #include "vmpi_dispatch.h"
  13. #include "pacifier.h"
  14. #include "vstdlib/random.h"
  15. #include "mathlib/mathlib.h"
  16. #include "threadhelpers.h"
  17. #include "threads.h"
  18. #include "tier1/strtools.h"
  19. #include "tier1/utlmap.h"
  20. #include "tier1/smartptr.h"
  21. #include "tier0/icommandline.h"
  22. #include "cmdlib.h"
  23. #include "vmpi_distribute_tracker.h"
  24. #include "vmpi_distribute_work_internal.h"
  25. #define DW_SUBPACKETID_WU_ASSIGNMENT (VMPI_DISTRIBUTE_WORK_EXTRA_SUBPACKET_BASE+0)
  26. static int s_numWusToDeal = -1;
  27. void VMPI_SetWorkUnitsPartitionSize( int numWusToDeal )
  28. {
  29. s_numWusToDeal = numWusToDeal;
  30. }
  31. class CWorkUnitInfo
  32. {
  33. public:
  34. WUIndexType m_iWorkUnit;
  35. };
  36. class CWULookupInfo
  37. {
  38. public:
  39. CWULookupInfo() : m_iWUInfo( -1 ), m_iPartition( -222222 ), m_iPartitionListIndex( -1 ) {}
  40. public:
  41. int m_iWUInfo; // Index into m_WUInfo.
  42. int m_iPartition; // Which partition it's in.
  43. int m_iPartitionListIndex; // Index into its partition's m_WUs.
  44. };
  45. class CPartitionInfo
  46. {
  47. public:
  48. typedef CUtlLinkedList< WUIndexType, int > PartitionWUs;
  49. public:
  50. int m_iPartition; // Index into m_Partitions.
  51. int m_iWorker; // Who owns this partition?
  52. PartitionWUs m_WUs; // Which WUs are in this partition?
  53. };
  54. // Work units tracker to track consecutive finished blocks
  55. class CWorkUnitsTracker
  56. {
  57. public:
  58. CWorkUnitsTracker() {}
  59. public:
  60. // Initializes the unit tracker to receive numUnits in future
  61. void PrepareForWorkUnits( uint64 numUnits );
  62. // Signals that a work unit has been finished
  63. // returns a zero-based index of the next pending work unit
  64. // up to which the task list has been processed fully now
  65. // because the received work unit filled the gap or was the next pending work unit.
  66. // returns 0 to indicate that this work unit is a "faster processed future work unit".
  67. uint64 WorkUnitFinished( uint64 iWorkUnit );
  68. public:
  69. enum WUInfo { kNone, kTrigger, kDone };
  70. CVisibleWindowVector< uint8 > m_arrInfo;
  71. };
  72. void CWorkUnitsTracker::PrepareForWorkUnits( uint64 numUnits )
  73. {
  74. m_arrInfo.Reset( numUnits + 1 );
  75. if ( numUnits )
  76. {
  77. m_arrInfo.ExpandWindow( 2ull, kNone );
  78. m_arrInfo.Get( 0ull ) = kTrigger;
  79. }
  80. }
  81. uint64 CWorkUnitsTracker::WorkUnitFinished( uint64 iWorkUnit )
  82. {
  83. uint64 uiResult = uint64( 0 );
  84. if ( iWorkUnit >= m_arrInfo.FirstPossibleIndex() && iWorkUnit < m_arrInfo.PastPossibleIndex() )
  85. {
  86. // Need to access the element
  87. m_arrInfo.ExpandWindow( iWorkUnit + 1, kNone );
  88. // Set it done
  89. uint8 &rchThere = m_arrInfo.Get( iWorkUnit ), chThere = rchThere;
  90. rchThere = kDone;
  91. // Should we trigger?
  92. if ( kTrigger == chThere )
  93. {
  94. // Go along all "done" work units and trigger the last found one
  95. while ( ( ( ++ iWorkUnit ) < m_arrInfo.PastVisibleIndex() ) &&
  96. ( kDone == m_arrInfo.Get( iWorkUnit ) ) )
  97. continue;
  98. m_arrInfo.Get( iWorkUnit ) = kTrigger;
  99. m_arrInfo.ShrinkWindow( iWorkUnit - 1 );
  100. uiResult = iWorkUnit;
  101. }
  102. else if( iWorkUnit == m_arrInfo.FirstPossibleIndex() )
  103. {
  104. // Go along all "done" work units and shrink including the last found one
  105. while ( ( ( ++ iWorkUnit ) < m_arrInfo.PastVisibleIndex() ) &&
  106. ( kDone == m_arrInfo.Get( iWorkUnit ) ) )
  107. continue;
  108. m_arrInfo.ShrinkWindow( iWorkUnit - 1 );
  109. }
  110. }
  111. return uiResult;
  112. }
  113. CWorkUnitsTracker g_MasterWorkUnitsTracker;
  114. static bool CompareSoonestWorkUnitSets( CPartitionInfo::PartitionWUs * const &x, CPartitionInfo::PartitionWUs * const &y )
  115. {
  116. // Compare by fourth/second/first job in the partitions
  117. WUIndexType missing = ~WUIndexType(0);
  118. WUIndexType jobsX[4] = { missing, missing, missing, missing };
  119. WUIndexType jobsY[4] = { missing, missing, missing, missing };
  120. int counter = 0;
  121. counter = 0;
  122. FOR_EACH_LL( (*x), i )
  123. {
  124. jobsX[ counter ++ ] = (*x)[i];
  125. if ( counter >= 4 )
  126. break;
  127. }
  128. counter = 0;
  129. FOR_EACH_LL( (*y), i )
  130. {
  131. jobsY[ counter ++ ] = (*y)[i];
  132. if ( counter >= 4 )
  133. break;
  134. }
  135. // Compare
  136. if ( jobsX[3] != jobsY[3] )
  137. return ( jobsX[3] < jobsY[3] );
  138. if ( jobsX[1] != jobsY[1] )
  139. return ( jobsX[1] < jobsY[1] );
  140. return jobsX[0] < jobsY[0];
  141. }
  142. class CDistributor_DefaultMaster : public IWorkUnitDistributorMaster
  143. {
  144. public:
  145. virtual void Release()
  146. {
  147. delete this;
  148. }
  149. virtual void DistributeWork_Master( CDSInfo *pInfo )
  150. {
  151. m_pInfo = pInfo;
  152. g_MasterWorkUnitsTracker.PrepareForWorkUnits( m_pInfo->m_nWorkUnits );
  153. m_WULookup.Reset( pInfo->m_nWorkUnits );
  154. while ( m_WULookup.FirstPossibleIndex() < m_WULookup.PastPossibleIndex() )
  155. {
  156. VMPI_DispatchNextMessage( 200 );
  157. VMPITracker_HandleDebugKeypresses();
  158. if ( g_pDistributeWorkCallbacks && g_pDistributeWorkCallbacks->Update() )
  159. break;
  160. }
  161. }
  162. virtual void OnWorkerReady( int iSource )
  163. {
  164. AssignWUsToWorker( iSource );
  165. }
  166. virtual bool HandleWorkUnitResults( WUIndexType iWorkUnit )
  167. {
  168. CWULookupInfo *pLookup = NULL;
  169. if ( iWorkUnit >= m_WULookup.FirstPossibleIndex() && iWorkUnit < m_WULookup.PastVisibleIndex() )
  170. pLookup = &m_WULookup.Get( iWorkUnit );
  171. if ( !pLookup || pLookup->m_iWUInfo == -1 )
  172. return false;
  173. // Mark this WU finished and remove it from the list of pending WUs.
  174. m_WUInfo.Remove( pLookup->m_iWUInfo );
  175. pLookup->m_iWUInfo = -1;
  176. // Get rid of the WU from its partition.
  177. int iPartition = pLookup->m_iPartition;
  178. CPartitionInfo *pPartition = m_Partitions[iPartition];
  179. pPartition->m_WUs.Remove( pLookup->m_iPartitionListIndex );
  180. // Shrink the window of the lookup work units
  181. if ( iWorkUnit == m_WULookup.FirstPossibleIndex() )
  182. {
  183. WUIndexType kwu = iWorkUnit;
  184. for ( WUIndexType kwuEnd = m_WULookup.PastVisibleIndex(); kwu < kwuEnd; ++ kwu )
  185. {
  186. if ( -1 != m_WULookup.Get( kwu ).m_iWUInfo && kwu > iWorkUnit )
  187. break;
  188. }
  189. m_WULookup.ShrinkWindow( kwu - 1 );
  190. }
  191. // Give the worker some new work if need be.
  192. if ( pPartition->m_WUs.Count() == 0 )
  193. {
  194. int iPartitionWorker = pPartition->m_iWorker;
  195. delete pPartition;
  196. m_Partitions.Remove( iPartition );
  197. // If there are any more WUs remaining, give the worker from this partition some more of them.
  198. if ( m_WULookup.FirstPossibleIndex() < m_WULookup.PastPossibleIndex() )
  199. {
  200. AssignWUsToWorker( iPartitionWorker );
  201. }
  202. }
  203. uint64 iDoneWorkUnits = g_MasterWorkUnitsTracker.WorkUnitFinished( iWorkUnit );
  204. if ( iDoneWorkUnits && g_pDistributeWorkCallbacks )
  205. {
  206. g_pDistributeWorkCallbacks->OnWorkUnitsCompleted( iDoneWorkUnits );
  207. }
  208. return true;
  209. }
  210. virtual void DisconnectHandler( int workerID )
  211. {
  212. int iPartitionLookup = FindPartitionByWorker( workerID );
  213. if ( iPartitionLookup != -1 )
  214. {
  215. // Mark this guy's partition as unowned so another worker can get it.
  216. CPartitionInfo *pPartition = m_Partitions[iPartitionLookup];
  217. pPartition->m_iWorker = -1;
  218. }
  219. }
  220. CPartitionInfo* AddPartition( int iWorker )
  221. {
  222. CPartitionInfo *pNew = new CPartitionInfo;
  223. pNew->m_iPartition = m_Partitions.AddToTail( pNew );
  224. pNew->m_iWorker = iWorker;
  225. return pNew;
  226. }
  227. bool SplitWUsPartition( CPartitionInfo *pPartitionLarge,
  228. CPartitionInfo **ppFirstHalf, CPartitionInfo **ppSecondHalf,
  229. int iFirstHalfWorker, int iSecondHalfWorker )
  230. {
  231. int nCount = pPartitionLarge->m_WUs.Count();
  232. if ( nCount > 1 ) // Allocate the partitions for the two workers
  233. {
  234. *ppFirstHalf = AddPartition( iFirstHalfWorker );
  235. *ppSecondHalf = AddPartition( iSecondHalfWorker );
  236. }
  237. else // Specially transfer a partition with too few work units
  238. {
  239. *ppFirstHalf = NULL;
  240. *ppSecondHalf = AddPartition( iSecondHalfWorker );
  241. }
  242. // Prepare for transfer
  243. CPartitionInfo *arrNewParts[2] = { *ppFirstHalf ? *ppFirstHalf : *ppSecondHalf, *ppSecondHalf };
  244. // Transfer the work units:
  245. // alternate first/second halves
  246. // don't put more than "half deal units" tasks into the second half
  247. // e.g. { 1, 2, 3, 4 }
  248. // becomes: 1st half { 1, 2 }, 2nd half { 3, 4 }
  249. for ( int k = 0; k < nCount; ++ k )
  250. {
  251. int iHead = pPartitionLarge->m_WUs.Head();
  252. WUIndexType iWU = pPartitionLarge->m_WUs[ iHead ];
  253. pPartitionLarge->m_WUs.Remove( iHead );
  254. /*
  255. int nHalf = !!( ( k % 2 ) || ( k >= nCount - 1 ) );
  256. if ( k == 5 ) // no more than 2 jobs to branch off
  257. arrNewParts[ 1 ] = arrNewParts[ 0 ];
  258. */
  259. int nHalf = !( k < nCount/2 );
  260. CPartitionInfo *pTo = arrNewParts[ nHalf ];
  261. CWULookupInfo &li = m_WULookup.Get( iWU );
  262. li.m_iPartition = pTo->m_iPartition;
  263. li.m_iPartitionListIndex = pTo->m_WUs.AddToTail( iWU );
  264. }
  265. // LogPartitionsWorkUnits( pInfo );
  266. return true;
  267. }
  268. void AssignWUsToWorker( int iWorker )
  269. {
  270. // Get rid of this worker's old partition.
  271. int iPrevious = FindPartitionByWorker( iWorker );
  272. if ( iPrevious != -1 )
  273. {
  274. delete m_Partitions[iPrevious];
  275. m_Partitions.Remove( iPrevious );
  276. }
  277. if ( g_iVMPIVerboseLevel >= 1 )
  278. Msg( "A" );
  279. CVisibleWindowVector< CWULookupInfo > &vlkup = m_WULookup;
  280. if ( CommandLine()->FindParm( "-mpi_NoScheduler" ) )
  281. {
  282. Warning( "\n\n-mpi_NoScheduler found: Warning - this should only be used for testing and with 1 worker!\n\n" );
  283. vlkup.ExpandWindow( m_pInfo->m_nWorkUnits );
  284. CPartitionInfo *pPartition = AddPartition( iWorker );
  285. for ( int i=0; i < m_pInfo->m_nWorkUnits; i++ )
  286. {
  287. CWorkUnitInfo info;
  288. info.m_iWorkUnit = i;
  289. CWULookupInfo &li = vlkup.Get( i );
  290. li.m_iPartition = pPartition->m_iPartition;
  291. li.m_iPartitionListIndex = pPartition->m_WUs.AddToTail( i );
  292. li.m_iWUInfo = m_WUInfo.AddToTail( info );
  293. }
  294. SendPartitionToWorker( pPartition, iWorker );
  295. return;
  296. }
  297. // Any partitions abandoned by workers?
  298. int iAbandonedPartition = FindPartitionByWorker( -1 );
  299. if ( -1 != iAbandonedPartition )
  300. {
  301. CPartitionInfo *pPartition = m_Partitions[ iAbandonedPartition ];
  302. pPartition->m_iWorker = iWorker;
  303. SendPartitionToWorker( pPartition, iWorker );
  304. }
  305. // Any absolutely untouched partitions yet?
  306. else if ( vlkup.PastVisibleIndex() < vlkup.PastPossibleIndex() )
  307. {
  308. // Figure out how many WUs to include in a batch
  309. int numWusToDeal = s_numWusToDeal;
  310. if ( numWusToDeal <= 0 )
  311. {
  312. uint64 uiFraction = vlkup.PastPossibleIndex() / g_nMaxWorkerCount;
  313. Assert( uiFraction < INT_MAX/2 );
  314. numWusToDeal = int( uiFraction );
  315. if ( numWusToDeal <= 0 )
  316. numWusToDeal = 8;
  317. }
  318. // Allocate room for upcoming work units lookup
  319. WUIndexType iBegin = vlkup.PastVisibleIndex();
  320. WUIndexType iEnd = min( iBegin + g_nMaxWorkerCount * numWusToDeal, vlkup.PastPossibleIndex() );
  321. vlkup.ExpandWindow( iEnd - 1 );
  322. // Allocate a partition
  323. size_t numPartitions = min( ( size_t )(iEnd - iBegin), ( size_t )g_nMaxWorkerCount );
  324. CArrayAutoPtr< CPartitionInfo * > spArrPartitions( new CPartitionInfo* [ numPartitions ] );
  325. CPartitionInfo **arrPartitions = spArrPartitions.Get();
  326. arrPartitions[0] = AddPartition( iWorker );
  327. for ( size_t k = 1; k < numPartitions; ++ k )
  328. arrPartitions[k] = AddPartition( -1 );
  329. // Assign upcoming work units to the partitions.
  330. for ( WUIndexType i = iBegin ; i < iEnd; ++ i )
  331. {
  332. CWorkUnitInfo info;
  333. info.m_iWorkUnit = i;
  334. CPartitionInfo *pPartition = arrPartitions[ size_t( (i - iBegin) % numPartitions ) ];
  335. CWULookupInfo &li = vlkup.Get( i );
  336. li.m_iPartition = pPartition->m_iPartition;
  337. li.m_iPartitionListIndex = pPartition->m_WUs.AddToTail( i );
  338. li.m_iWUInfo = m_WUInfo.AddToTail( info );
  339. }
  340. // Now send this guy the WU list in his partition.
  341. SendPartitionToWorker( arrPartitions[0], iWorker );
  342. }
  343. // Split one of the last partitions to finish sooner
  344. else
  345. {
  346. // Find a partition to split.
  347. int iPartToSplit = FindSoonestPartition();
  348. if ( iPartToSplit >= 0 )
  349. {
  350. CPartitionInfo *pPartition = m_Partitions[ iPartToSplit ];
  351. CPartitionInfo *pOldHalf = NULL, *pNewHalf = NULL;
  352. int iOldWorker = pPartition->m_iWorker, iNewWorker = iWorker;
  353. if ( SplitWUsPartition( pPartition, &pOldHalf, &pNewHalf, iOldWorker, iNewWorker ) )
  354. {
  355. if ( pOldHalf )
  356. SendPartitionToWorker( pOldHalf, iOldWorker );
  357. if ( pNewHalf )
  358. SendPartitionToWorker( pNewHalf, iNewWorker );
  359. // Delete the partition that got split
  360. Assert( pPartition->m_WUs.Count() == 0 );
  361. delete pPartition;
  362. m_Partitions.Remove( iPartToSplit );
  363. }
  364. }
  365. }
  366. }
  367. int FindSoonestPartition()
  368. {
  369. CUtlLinkedList < CPartitionInfo *, int > &lst = m_Partitions;
  370. // Sorted partitions
  371. CUtlMap< CPartitionInfo::PartitionWUs *, int > sortedPartitions ( CompareSoonestWorkUnitSets );
  372. sortedPartitions.EnsureCapacity( lst.Count() );
  373. FOR_EACH_LL( lst, i )
  374. {
  375. sortedPartitions.Insert( &lst[i]->m_WUs, i );
  376. }
  377. if ( sortedPartitions.Count() )
  378. {
  379. return sortedPartitions.Element( sortedPartitions.FirstInorder() );
  380. }
  381. return lst.Head();
  382. }
  383. int FindPartitionByWorker( int iWorker )
  384. {
  385. FOR_EACH_LL( m_Partitions, i )
  386. {
  387. if ( m_Partitions[i]->m_iWorker == iWorker )
  388. return i;
  389. }
  390. return -1;
  391. }
  392. void SendPartitionToWorker( CPartitionInfo *pPartition, int iWorker )
  393. {
  394. // Stuff the next nWUs work units into the buffer.
  395. MessageBuffer mb;
  396. PrepareDistributeWorkHeader( &mb, DW_SUBPACKETID_WU_ASSIGNMENT );
  397. FOR_EACH_LL( pPartition->m_WUs, i )
  398. {
  399. WUIndexType iWU = pPartition->m_WUs[i];
  400. mb.write( &iWU, sizeof( iWU ) );
  401. VMPITracker_WorkUnitSentToWorker( ( int ) iWU, iWorker );
  402. }
  403. VMPI_SendData( mb.data, mb.getLen(), iWorker );
  404. }
  405. virtual bool HandlePacket( MessageBuffer *pBuf, int iSource, bool bIgnoreContents )
  406. {
  407. return false;
  408. }
  409. private:
  410. CDSInfo *m_pInfo;
  411. CUtlLinkedList<CPartitionInfo*,int> m_Partitions;
  412. CVisibleWindowVector<CWULookupInfo> m_WULookup; // Map work unit index to CWorkUnitInfo.
  413. CUtlLinkedList<CWorkUnitInfo,int> m_WUInfo; // Sorted with most elegible WU at the head.
  414. };
  415. class CDistributor_DefaultWorker : public IWorkUnitDistributorWorker
  416. {
  417. public:
  418. virtual void Release()
  419. {
  420. delete this;
  421. }
  422. virtual void Init( CDSInfo *pInfo )
  423. {
  424. }
  425. virtual bool GetNextWorkUnit( WUIndexType *pWUIndex )
  426. {
  427. CCriticalSectionLock csLock( &m_CS );
  428. csLock.Lock();
  429. // NOTE: this is called from INSIDE worker threads.
  430. if ( m_WorkUnits.Count() == 0 )
  431. {
  432. return false;
  433. }
  434. else
  435. {
  436. *pWUIndex = m_WorkUnits[ m_WorkUnits.Head() ];
  437. m_WorkUnits.Remove( m_WorkUnits.Head() );
  438. return true;
  439. }
  440. }
  441. virtual void NoteLocalWorkUnitCompleted( WUIndexType iWU )
  442. {
  443. }
  444. virtual bool HandlePacket( MessageBuffer *pBuf, int iSource, bool bIgnoreContents )
  445. {
  446. if ( pBuf->data[1] == DW_SUBPACKETID_WU_ASSIGNMENT )
  447. {
  448. // If the message wasn't even related to the current DistributeWork() call we're on, ignore it.
  449. if ( bIgnoreContents )
  450. return true;
  451. if ( ((pBuf->getLen() - pBuf->getOffset()) % sizeof( WUIndexType )) != 0 )
  452. {
  453. Error( "DistributeWork: invalid work units packet from master" );
  454. }
  455. // Parse out the work unit indices.
  456. CCriticalSectionLock csLock( &m_CS );
  457. csLock.Lock();
  458. m_WorkUnits.Purge();
  459. int nIndices = (pBuf->getLen() - pBuf->getOffset()) / sizeof( WUIndexType );
  460. for ( int i=0; i < nIndices; i++ )
  461. {
  462. WUIndexType iWU;
  463. pBuf->read( &iWU, sizeof( iWU ) );
  464. // Add the index to the list.
  465. m_WorkUnits.AddToTail( iWU );
  466. }
  467. csLock.Unlock();
  468. return true;
  469. }
  470. else
  471. {
  472. return false;
  473. }
  474. }
  475. // Threads eat up the list of WUs in here.
  476. CCriticalSection m_CS;
  477. CUtlLinkedList<WUIndexType, int> m_WorkUnits; // A list of work units assigned to this worker
  478. };
  479. IWorkUnitDistributorMaster* CreateWUDistributor_DefaultMaster()
  480. {
  481. return new CDistributor_DefaultMaster;
  482. }
  483. IWorkUnitDistributorWorker* CreateWUDistributor_DefaultWorker()
  484. {
  485. return new CDistributor_DefaultWorker;
  486. }