Team Fortress 2 Source Code as on 22/4/2020
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

374 lines
11 KiB

  1. //========= Copyright Valve Corporation, All rights reserved. ============//
  2. //
  3. // Purpose:
  4. //
  5. //=============================================================================//
  6. #include <windows.h>
  7. #include <dbghelp.h>
  8. #include "vmpi.h"
  9. #include "cmdlib.h"
  10. #include "vmpi_tools_shared.h"
  11. #include "tier1/strtools.h"
  12. #include "mpi_stats.h"
  13. #include "iphelpers.h"
  14. #include "tier0/minidump.h"
  15. // ----------------------------------------------------------------------------- //
  16. // Globals.
  17. // ----------------------------------------------------------------------------- //
  18. static bool g_bReceivedDirectoryInfo = false; // Have we gotten the qdir info yet?
  19. static bool g_bReceivedDBInfo = false;
  20. static CDBInfo g_DBInfo;
  21. static unsigned long g_JobPrimaryID;
  22. static int g_nDisconnects = 0; // Tracks how many remote processes have disconnected ungracefully.
  23. // ----------------------------------------------------------------------------- //
  24. // Shared dispatch code.
  25. // ----------------------------------------------------------------------------- //
  26. bool SharedDispatch( MessageBuffer *pBuf, int iSource, int iPacketID )
  27. {
  28. char *pInPos = &pBuf->data[2];
  29. switch ( pBuf->data[1] )
  30. {
  31. case VMPI_SUBPACKETID_DIRECTORIES:
  32. {
  33. Q_strncpy( gamedir, pInPos, sizeof( gamedir ) );
  34. pInPos += strlen( pInPos ) + 1;
  35. Q_strncpy( qdir, pInPos, sizeof( qdir ) );
  36. g_bReceivedDirectoryInfo = true;
  37. }
  38. return true;
  39. case VMPI_SUBPACKETID_DBINFO:
  40. {
  41. g_DBInfo = *((CDBInfo*)pInPos);
  42. pInPos += sizeof( CDBInfo );
  43. g_JobPrimaryID = *((unsigned long*)pInPos);
  44. g_bReceivedDBInfo = true;
  45. }
  46. return true;
  47. case VMPI_SUBPACKETID_CRASH:
  48. {
  49. char const chCrashInfoType = *pInPos;
  50. pInPos += 2;
  51. switch ( chCrashInfoType )
  52. {
  53. case 't':
  54. Warning( "\nWorker '%s' dead: %s\n", VMPI_GetMachineName( iSource ), pInPos );
  55. break;
  56. case 'f':
  57. {
  58. int iFileSize = * reinterpret_cast< int const * >( pInPos );
  59. pInPos += sizeof( iFileSize );
  60. // Temp folder
  61. char const *szFolder = NULL;
  62. if ( !szFolder ) szFolder = getenv( "TEMP" );
  63. if ( !szFolder ) szFolder = getenv( "TMP" );
  64. if ( !szFolder ) szFolder = "c:";
  65. // Base module name
  66. char chModuleName[_MAX_PATH], *pModuleName = chModuleName;
  67. ::GetModuleFileName( NULL, chModuleName, sizeof( chModuleName ) / sizeof( chModuleName[0] ) );
  68. if ( char *pch = strrchr( chModuleName, '.' ) )
  69. *pch = 0;
  70. if ( char *pch = strrchr( chModuleName, '\\' ) )
  71. *pch = 0, pModuleName = pch + 1;
  72. // Current time
  73. time_t currTime = ::time( NULL );
  74. struct tm * pTime = ::localtime( &currTime );
  75. // Number of minidumps this run
  76. static int s_numMiniDumps = 0;
  77. ++ s_numMiniDumps;
  78. // Prepare the filename
  79. char chSaveFileName[ 2 * _MAX_PATH ] = { 0 };
  80. sprintf( chSaveFileName, "%s\\vmpi_%s_on_%s_%d%.2d%2d%.2d%.2d%.2d_%d.mdmp",
  81. szFolder,
  82. pModuleName,
  83. VMPI_GetMachineName( iSource ),
  84. pTime->tm_year + 1900, /* Year less 2000 */
  85. pTime->tm_mon + 1, /* month (0 - 11 : 0 = January) */
  86. pTime->tm_mday, /* day of month (1 - 31) */
  87. pTime->tm_hour, /* hour (0 - 23) */
  88. pTime->tm_min, /* minutes (0 - 59) */
  89. pTime->tm_sec, /* seconds (0 - 59) */
  90. s_numMiniDumps
  91. );
  92. if ( FILE *fDump = fopen( chSaveFileName, "wb" ) )
  93. {
  94. fwrite( pInPos, 1, iFileSize, fDump );
  95. fclose( fDump );
  96. Warning( "\nSaved worker crash minidump '%s', size %d byte(s).\n",
  97. chSaveFileName, iFileSize );
  98. }
  99. else
  100. {
  101. Warning( "\nReceived worker crash minidump size %d byte(s), failed to save.\n", iFileSize );
  102. }
  103. }
  104. break;
  105. }
  106. }
  107. return true;
  108. }
  109. return false;
  110. }
  111. CDispatchReg g_SharedDispatchReg( VMPI_SHARED_PACKET_ID, SharedDispatch );
  112. // ----------------------------------------------------------------------------- //
  113. // Module interfaces.
  114. // ----------------------------------------------------------------------------- //
  115. void SendQDirInfo()
  116. {
  117. char cPacketID[2] = { VMPI_SHARED_PACKET_ID, VMPI_SUBPACKETID_DIRECTORIES };
  118. MessageBuffer mb;
  119. mb.write( cPacketID, 2 );
  120. mb.write( gamedir, strlen( gamedir ) + 1 );
  121. mb.write( qdir, strlen( qdir ) + 1 );
  122. VMPI_SendData( mb.data, mb.getLen(), VMPI_PERSISTENT );
  123. }
  124. void RecvQDirInfo()
  125. {
  126. while ( !g_bReceivedDirectoryInfo )
  127. VMPI_DispatchNextMessage();
  128. }
  129. void SendDBInfo( const CDBInfo *pInfo, unsigned long jobPrimaryID )
  130. {
  131. char cPacketInfo[2] = { VMPI_SHARED_PACKET_ID, VMPI_SUBPACKETID_DBINFO };
  132. const void *pChunks[] = { cPacketInfo, pInfo, &jobPrimaryID };
  133. int chunkLengths[] = { 2, sizeof( CDBInfo ), sizeof( jobPrimaryID ) };
  134. VMPI_SendChunks( pChunks, chunkLengths, ARRAYSIZE( pChunks ), VMPI_PERSISTENT );
  135. }
  136. void RecvDBInfo( CDBInfo *pInfo, unsigned long *pJobPrimaryID )
  137. {
  138. while ( !g_bReceivedDBInfo )
  139. VMPI_DispatchNextMessage();
  140. *pInfo = g_DBInfo;
  141. *pJobPrimaryID = g_JobPrimaryID;
  142. }
  143. // If the file is successfully opened, read and sent returns the size of the file in bytes
  144. // otherwise returns 0 and nothing is sent
  145. int VMPI_SendFileChunk( const void *pvChunkPrefix, int lenPrefix, tchar const *ptchFileName )
  146. {
  147. HANDLE hFile = NULL;
  148. HANDLE hMapping = NULL;
  149. void const *pvMappedData = NULL;
  150. int iResult = 0;
  151. hFile = ::CreateFile( ptchFileName, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL );
  152. if ( !hFile || ( hFile == INVALID_HANDLE_VALUE ) )
  153. goto done;
  154. hMapping = ::CreateFileMapping( hFile, NULL, PAGE_READONLY, 0, 0, NULL );
  155. if ( !hMapping || ( hMapping == INVALID_HANDLE_VALUE ) )
  156. goto done;
  157. pvMappedData = ::MapViewOfFile( hMapping, FILE_MAP_READ, 0, 0, 0 );
  158. if ( !pvMappedData )
  159. goto done;
  160. int iMappedFileSize = ::GetFileSize( hFile, NULL );
  161. if ( INVALID_FILE_SIZE == iMappedFileSize )
  162. goto done;
  163. // Send the data over VMPI
  164. if ( VMPI_Send3Chunks(
  165. pvChunkPrefix, lenPrefix,
  166. &iMappedFileSize, sizeof( iMappedFileSize ),
  167. pvMappedData, iMappedFileSize,
  168. VMPI_MASTER_ID ) )
  169. iResult = iMappedFileSize;
  170. // Fall-through for cleanup code to execute
  171. done:
  172. if ( pvMappedData )
  173. ::UnmapViewOfFile( pvMappedData );
  174. if ( hMapping && ( hMapping != INVALID_HANDLE_VALUE ) )
  175. ::CloseHandle( hMapping );
  176. if ( hFile && ( hFile != INVALID_HANDLE_VALUE ) )
  177. ::CloseHandle( hFile );
  178. return iResult;
  179. }
  180. void VMPI_HandleCrash( const char *pMessage, void *pvExceptionInfo, bool bAssert )
  181. {
  182. static LONG crashHandlerCount = 0;
  183. if ( InterlockedIncrement( &crashHandlerCount ) == 1 )
  184. {
  185. Msg( "\nFAILURE: '%s' (assert: %d)\n", pMessage, bAssert );
  186. // Send a message to the master.
  187. char crashMsg[4] = { VMPI_SHARED_PACKET_ID, VMPI_SUBPACKETID_CRASH, 't', ':' };
  188. VMPI_Send2Chunks(
  189. crashMsg,
  190. sizeof( crashMsg ),
  191. pMessage,
  192. strlen( pMessage ) + 1,
  193. VMPI_MASTER_ID );
  194. // Now attempt to create a minidump with the given exception information
  195. if ( pvExceptionInfo )
  196. {
  197. struct _EXCEPTION_POINTERS *pvExPointers = ( struct _EXCEPTION_POINTERS * ) pvExceptionInfo;
  198. tchar tchMinidumpFileName[_MAX_PATH] = { 0 };
  199. bool bSucceededWritingMinidump = WriteMiniDumpUsingExceptionInfo(
  200. pvExPointers->ExceptionRecord->ExceptionCode,
  201. pvExPointers,
  202. ( MINIDUMP_TYPE )( MiniDumpWithDataSegs | MiniDumpWithIndirectlyReferencedMemory | MiniDumpWithProcessThreadData ),
  203. // ( MINIDUMP_TYPE )( MiniDumpWithDataSegs | MiniDumpWithFullMemory | MiniDumpWithHandleData | MiniDumpWithUnloadedModules | MiniDumpWithIndirectlyReferencedMemory | MiniDumpWithProcessThreadData | MiniDumpWithPrivateReadWriteMemory ),
  204. // ( MINIDUMP_TYPE )( MiniDumpNormal ),
  205. tchMinidumpFileName );
  206. if ( bSucceededWritingMinidump )
  207. {
  208. crashMsg[2] = 'f';
  209. VMPI_SendFileChunk( crashMsg, sizeof( crashMsg ), tchMinidumpFileName );
  210. ::DeleteFile( tchMinidumpFileName );
  211. }
  212. }
  213. // Let the messages go out.
  214. Sleep( 500 );
  215. }
  216. InterlockedDecrement( &crashHandlerCount );
  217. }
  218. // This is called if we crash inside our crash handler. It just terminates the process immediately.
  219. LONG __stdcall VMPI_SecondExceptionFilter( struct _EXCEPTION_POINTERS *ExceptionInfo )
  220. {
  221. TerminateProcess( GetCurrentProcess(), 2 );
  222. return EXCEPTION_EXECUTE_HANDLER; // (never gets here anyway)
  223. }
  224. void VMPI_ExceptionFilter( unsigned long uCode, void *pvExceptionInfo )
  225. {
  226. // This is called if we crash inside our crash handler. It just terminates the process immediately.
  227. SetUnhandledExceptionFilter( VMPI_SecondExceptionFilter );
  228. //DWORD code = ExceptionInfo->ExceptionRecord->ExceptionCode;
  229. #define ERR_RECORD( name ) { name, #name }
  230. struct
  231. {
  232. int code;
  233. char *pReason;
  234. } errors[] =
  235. {
  236. ERR_RECORD( EXCEPTION_ACCESS_VIOLATION ),
  237. ERR_RECORD( EXCEPTION_ARRAY_BOUNDS_EXCEEDED ),
  238. ERR_RECORD( EXCEPTION_BREAKPOINT ),
  239. ERR_RECORD( EXCEPTION_DATATYPE_MISALIGNMENT ),
  240. ERR_RECORD( EXCEPTION_FLT_DENORMAL_OPERAND ),
  241. ERR_RECORD( EXCEPTION_FLT_DIVIDE_BY_ZERO ),
  242. ERR_RECORD( EXCEPTION_FLT_INEXACT_RESULT ),
  243. ERR_RECORD( EXCEPTION_FLT_INVALID_OPERATION ),
  244. ERR_RECORD( EXCEPTION_FLT_OVERFLOW ),
  245. ERR_RECORD( EXCEPTION_FLT_STACK_CHECK ),
  246. ERR_RECORD( EXCEPTION_FLT_UNDERFLOW ),
  247. ERR_RECORD( EXCEPTION_ILLEGAL_INSTRUCTION ),
  248. ERR_RECORD( EXCEPTION_IN_PAGE_ERROR ),
  249. ERR_RECORD( EXCEPTION_INT_DIVIDE_BY_ZERO ),
  250. ERR_RECORD( EXCEPTION_INT_OVERFLOW ),
  251. ERR_RECORD( EXCEPTION_INVALID_DISPOSITION ),
  252. ERR_RECORD( EXCEPTION_NONCONTINUABLE_EXCEPTION ),
  253. ERR_RECORD( EXCEPTION_PRIV_INSTRUCTION ),
  254. ERR_RECORD( EXCEPTION_SINGLE_STEP ),
  255. ERR_RECORD( EXCEPTION_STACK_OVERFLOW ),
  256. ERR_RECORD( EXCEPTION_ACCESS_VIOLATION ),
  257. };
  258. int nErrors = sizeof( errors ) / sizeof( errors[0] );
  259. int i=0;
  260. char *pchReason = NULL;
  261. char chUnknownBuffer[32];
  262. for ( i; ( i < nErrors ) && !pchReason; i++ )
  263. {
  264. if ( errors[i].code == uCode )
  265. pchReason = errors[i].pReason;
  266. }
  267. if ( i == nErrors )
  268. {
  269. sprintf( chUnknownBuffer, "Error code 0x%08X", uCode );
  270. pchReason = chUnknownBuffer;
  271. }
  272. VMPI_HandleCrash( pchReason, pvExceptionInfo, true );
  273. TerminateProcess( GetCurrentProcess(), 1 );
  274. }
  275. void HandleMPIDisconnect( int procID, const char *pReason )
  276. {
  277. int nLiveWorkers = VMPI_GetCurrentNumberOfConnections() - g_nDisconnects - 1;
  278. // We ran into the size limit before and it wasn't readily apparent that the size limit had
  279. // been breached, so make sure to show errors about invalid packet sizes..
  280. bool bOldSuppress = g_bSuppressPrintfOutput;
  281. g_bSuppressPrintfOutput = ( Q_stristr( pReason, "invalid packet size" ) == 0 );
  282. Warning( "\n\n--- WARNING: lost connection to '%s' (%s).\n", VMPI_GetMachineName( procID ), pReason );
  283. if ( g_bMPIMaster )
  284. {
  285. Warning( "%d workers remain.\n\n", nLiveWorkers );
  286. ++g_nDisconnects;
  287. /*
  288. if ( VMPI_GetCurrentNumberOfConnections() - g_nDisconnects <= 1 )
  289. {
  290. Error( "All machines disconnected!" );
  291. }
  292. */
  293. }
  294. else
  295. {
  296. VMPI_HandleAutoRestart();
  297. Error( "Worker quitting." );
  298. }
  299. g_bSuppressPrintfOutput = bOldSuppress;
  300. }