Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1507 lines
44 KiB

  1. /*++
  2. Copyright (c) 1996 Microsoft Corporation
  3. Module Name:
  4. send.c
  5. Abstract:
  6. Domain Name System (DNS) Library
  7. Send response routines.
  8. Author:
  9. Jim Gilroy (jamesg) October, 1996
  10. Revision History:
  11. --*/
  12. #include "dnsincs.h"
  13. WORD gwTransactionId = 1;
  14. VOID
  15. DnsCompletion(
  16. PVOID pvContext,
  17. DWORD cbWritten,
  18. DWORD dwCompletionStatus,
  19. OVERLAPPED * lpo
  20. )
  21. {
  22. BOOL WasProcessed = TRUE;
  23. CAsyncDns *pCC = (CAsyncDns *) pvContext;
  24. _ASSERT(pCC);
  25. _ASSERT(pCC->IsValid());
  26. //
  27. // if we could not process a command, or we were
  28. // told to destroy this object, close the connection.
  29. //
  30. WasProcessed = pCC->ProcessClient(cbWritten, dwCompletionStatus, lpo);
  31. }
  32. void DeleteDnsRec(PSMTPDNS_RECS pDnsRec)
  33. {
  34. DWORD Loop = 0;
  35. PLIST_ENTRY pEntry = NULL;
  36. PMXIPLIST_ENTRY pQEntry = NULL;
  37. if(pDnsRec == NULL)
  38. {
  39. return;
  40. }
  41. while (pDnsRec->DnsArray[Loop] != NULL)
  42. {
  43. if(pDnsRec->DnsArray[Loop]->DnsName[0])
  44. {
  45. while(!IsListEmpty(&pDnsRec->DnsArray[Loop]->IpListHead))
  46. {
  47. pEntry = RemoveHeadList (&pDnsRec->DnsArray[Loop]->IpListHead);
  48. pQEntry = CONTAINING_RECORD( pEntry, MXIPLIST_ENTRY, ListEntry);
  49. delete pQEntry;
  50. }
  51. delete pDnsRec->DnsArray[Loop];
  52. }
  53. Loop++;
  54. }
  55. if(pDnsRec)
  56. {
  57. delete pDnsRec;
  58. pDnsRec = NULL;
  59. }
  60. }
  61. CAsyncDns::CAsyncDns(void)
  62. {
  63. m_signature = DNS_CONNECTION_SIGNATURE_VALID; // signature on object for sanity check
  64. m_cPendingIoCount = 0;
  65. m_cThreadCount = 0;
  66. m_cbReceived = 0;
  67. m_BytesToRead = 0;
  68. m_dwIpServer = 0;
  69. m_dwFlags = 0;
  70. m_fUdp = TRUE;
  71. m_FirstRead = TRUE;
  72. m_pMsgRecv = NULL;
  73. m_pMsgRecvBuf = NULL;
  74. m_pMsgSend = NULL;
  75. m_pMsgSendBuf = NULL;
  76. m_cbSendBufSize = 0;
  77. m_pAtqContext = NULL;
  78. m_HostName [0] = '\0';
  79. m_pTcpRegIpList = NULL;
  80. m_fIsGlobalDnsList = FALSE;
  81. }
  82. CAsyncDns::~CAsyncDns(void)
  83. {
  84. PATQ_CONTEXT pAtqContext = NULL;
  85. TraceFunctEnterEx((LPARAM)this, "CAsyncDns::~CAsyncDns");
  86. //
  87. // If we failed to connect to a DNS server, the following code attempts to
  88. // mark that DNS server down and fire off a query to another DNS server that
  89. // is marked UP.
  90. //
  91. if(m_pMsgSend)
  92. {
  93. delete [] m_pMsgSendBuf;
  94. m_pMsgSend = NULL;
  95. m_pMsgSendBuf = NULL;
  96. }
  97. if(m_pMsgRecv)
  98. {
  99. delete [] m_pMsgRecvBuf;
  100. m_pMsgRecv = NULL;
  101. m_pMsgRecvBuf = NULL;
  102. }
  103. //release the context from Atq
  104. pAtqContext = (PATQ_CONTEXT)InterlockedExchangePointer( (PVOID *)&m_pAtqContext, NULL);
  105. if ( pAtqContext != NULL )
  106. {
  107. AtqFreeContext( pAtqContext, TRUE );
  108. }
  109. m_signature = DNS_CONNECTION_SIGNATURE_FREE; // signature on object for sanity check
  110. }
  111. BOOL CAsyncDns::ReadFile(
  112. IN LPVOID pBuffer,
  113. IN DWORD cbSize /* = MAX_READ_BUFF_SIZE */
  114. )
  115. {
  116. BOOL fRet = TRUE;
  117. _ASSERT(pBuffer != NULL);
  118. _ASSERT(cbSize > 0);
  119. ZeroMemory(&m_ReadOverlapped, sizeof(m_ReadOverlapped));
  120. m_ReadOverlapped.LastIoState = DNS_READIO;
  121. IncPendingIoCount();
  122. fRet = AtqReadFile(m_pAtqContext, // Atq context
  123. pBuffer, // Buffer
  124. cbSize, // BytesToRead
  125. (OVERLAPPED *)&m_ReadOverlapped) ;
  126. if(!fRet)
  127. {
  128. DisconnectClient();
  129. DecPendingIoCount();
  130. }
  131. return fRet;
  132. }
  133. BOOL CAsyncDns::WriteFile(
  134. IN LPVOID pBuffer,
  135. IN DWORD cbSize /* = MAX_READ_BUFF_SIZE */
  136. )
  137. {
  138. BOOL fRet = TRUE;
  139. _ASSERT(pBuffer != NULL);
  140. _ASSERT(cbSize > 0);
  141. ZeroMemory(&m_WriteOverlapped, sizeof(m_WriteOverlapped));
  142. m_WriteOverlapped.LastIoState = DNS_WRITEIO;
  143. IncPendingIoCount();
  144. fRet = AtqWriteFile(m_pAtqContext, // Atq context
  145. pBuffer, // Buffer
  146. cbSize, // BytesToRead
  147. (OVERLAPPED *) &m_WriteOverlapped) ;
  148. if(!fRet)
  149. {
  150. DisconnectClient();
  151. DecPendingIoCount();
  152. }
  153. return fRet;
  154. }
  155. DNS_STATUS
  156. CAsyncDns::SendPacket(void)
  157. {
  158. return 0;
  159. }
  160. //
  161. // Public send routines
  162. //
  163. DNS_STATUS
  164. CAsyncDns::Dns_Send(
  165. )
  166. /*++
  167. Routine Description:
  168. Send a DNS packet.
  169. This is the generic send routine used for ANY send of a DNS message.
  170. It assumes nothing about the message type, but does assume:
  171. - pCurrent points at byte following end of desired data
  172. - RR count bytes are in HOST byte order
  173. Arguments:
  174. pMsg - message info for message to send
  175. Return Value:
  176. TRUE if successful.
  177. FALSE on send error.
  178. --*/
  179. {
  180. INT err = 0;
  181. BOOL fRet = TRUE;
  182. TraceFunctEnterEx((LPARAM) this, "CAsyncDns::Dns_Send");
  183. DebugTrace((LPARAM) this, "Sending DNS request for %s", m_HostName);
  184. fRet = WriteFile(m_pMsgSendBuf, (DWORD) m_cbSendBufSize);
  185. if(!fRet)
  186. {
  187. err = GetLastError();
  188. }
  189. return( (DNS_STATUS)err );
  190. } // Dns_Send
  191. //-----------------------------------------------------------------------------------
  192. // Description:
  193. // Kicks off an async query to DNS.
  194. //
  195. // Arguments:
  196. // IN pszQuestionName - Name to query for.
  197. //
  198. // IN wQuestionType - Record type to query for.
  199. //
  200. // IN dwFlags - DNS configuration flags for SMTP. Currently these dictate
  201. // what transport is used to talk to DNS (TCP/UDP). They are:
  202. //
  203. // DNS_FLAGS_NONE - Use UDP initially. If that fails, or if the
  204. // reply is truncated requery using TCP.
  205. //
  206. // DNS_FLAGS_TCP_ONLY - Use TCP only.
  207. //
  208. // DNS_FLAGS_UDP_ONLY - Use UDP only.
  209. //
  210. // IN MyFQDN - FQDN of this machine (for MX record sorting)
  211. //
  212. // IN fUdp - Should UDP or TCP be used for this query? When dwFlags is
  213. // DNS_FLAGS_NONE the initial query is UDP, and the retry query, if the
  214. // response was truncated, is TCP. Depending on whether we're retrying
  215. // this flag should be set appropriately by the caller.
  216. //
  217. // Returns:
  218. // ERROR_SUCCESS if an async query was pended
  219. // Win32 error if an error occurred and an async query was not pended. All
  220. // errors from this function are retryable (as opposed NDR'ing the message)
  221. // so the message is re-queued if an error occurred.
  222. //-----------------------------------------------------------------------------------
  223. DNS_STATUS
  224. CAsyncDns::Dns_QueryLib(
  225. IN DNS_NAME pszQuestionName,
  226. IN WORD wQuestionType,
  227. IN DWORD dwFlags,
  228. IN BOOL fUdp,
  229. IN CDnsServerList *pTcpRegIpList,
  230. IN BOOL fIsGlobalDnsList)
  231. {
  232. DNS_STATUS status = ERROR_NOT_ENOUGH_MEMORY;
  233. TraceFunctEnterEx((LPARAM) this, "CAsyncDns::Dns_QueryLib");
  234. _ASSERT(pTcpRegIpList);
  235. DNS_LOG_ASYNC_QUERY(
  236. pszQuestionName,
  237. wQuestionType,
  238. dwFlags,
  239. fUdp,
  240. pTcpRegIpList);
  241. m_dwFlags = dwFlags;
  242. m_fUdp = fUdp;
  243. m_pTcpRegIpList = pTcpRegIpList;
  244. m_fIsGlobalDnsList = fIsGlobalDnsList;
  245. lstrcpyn(m_HostName, pszQuestionName, sizeof(m_HostName));
  246. //
  247. // build send packet
  248. //
  249. m_pMsgSendBuf = new BYTE[DNS_TCP_DEFAULT_PACKET_LENGTH ];
  250. if( NULL == m_pMsgSendBuf )
  251. {
  252. TraceFunctLeaveEx((LPARAM) this);
  253. return (DNS_STATUS) ERROR_NOT_ENOUGH_MEMORY;
  254. }
  255. DWORD dwBufSize = DNS_TCP_DEFAULT_PACKET_LENGTH ;
  256. if( !m_fUdp )
  257. {
  258. m_pMsgSend = (PDNS_MESSAGE_BUFFER)(m_pMsgSendBuf+2);
  259. dwBufSize -= 2;
  260. }
  261. else
  262. {
  263. m_pMsgSend = (PDNS_MESSAGE_BUFFER)(m_pMsgSendBuf);
  264. }
  265. if( !DnsWriteQuestionToBuffer_UTF8 ( m_pMsgSend,
  266. &dwBufSize,
  267. pszQuestionName,
  268. wQuestionType,
  269. gwTransactionId++,
  270. !( dwFlags & DNS_QUERY_NO_RECURSION ) ) )
  271. {
  272. DNS_PRINTF_ERR("Unable to create query message.\n");
  273. ErrorTrace((LPARAM) this, "Unable to create DNS query for %s", pszQuestionName);
  274. TraceFunctLeaveEx((LPARAM) this);
  275. return ERROR_NOT_ENOUGH_MEMORY;
  276. }
  277. m_cbSendBufSize = (WORD) dwBufSize;
  278. if( !m_fUdp )
  279. {
  280. *((u_short*)m_pMsgSendBuf) = htons((WORD)dwBufSize );
  281. m_cbSendBufSize += 2;
  282. }
  283. if (m_pMsgSend)
  284. {
  285. status = DnsSendRecord();
  286. }
  287. else
  288. {
  289. status = ERROR_INVALID_NAME;
  290. }
  291. TraceFunctLeaveEx((LPARAM) this);
  292. return status;
  293. }
  294. void CAsyncDns::DisconnectClient(void)
  295. {
  296. SOCKET hSocket;
  297. hSocket = (SOCKET)InterlockedExchangePointer( (PVOID *)&m_DnsSocket, (PVOID) INVALID_SOCKET );
  298. if ( hSocket != INVALID_SOCKET )
  299. {
  300. if ( QueryAtqContext() != NULL )
  301. {
  302. AtqCloseSocket(QueryAtqContext() , TRUE);
  303. }
  304. }
  305. }
  306. //
  307. // TCP routines
  308. //
  309. DNS_STATUS
  310. CAsyncDns::Dns_OpenTcpConnectionAndSend()
  311. /*++
  312. Routine Description:
  313. Connect via TCP or UDP to a DNS server. The server list is held
  314. in a global variable read from the registry.
  315. Arguments:
  316. None
  317. Return Value:
  318. ERROR_SUCCESS on success
  319. Win32 error on failure
  320. --*/
  321. {
  322. INT err = 0;
  323. DWORD dwErrServList = ERROR_SUCCESS;
  324. BOOL fThrottle = FALSE;
  325. TraceFunctEnterEx((LPARAM) this, "CAsyncDns::Dns_OpenTcpConnectionAndSend");
  326. //
  327. // setup a TCP socket
  328. // - INADDR_ANY -- let stack select source IP
  329. //
  330. if(!m_fUdp)
  331. {
  332. m_DnsSocket = Dns_CreateSocket(SOCK_STREAM);
  333. BOOL fRet = FALSE;
  334. //Alway enable linger so sockets that connect to the server.
  335. //This will send a hard close to the server which will cause
  336. //the servers TCP/IP socket table to be flushed very early.
  337. //We should see very few, if any, sockets in the TIME_WAIT
  338. //state
  339. struct linger Linger;
  340. Linger.l_onoff = 1;
  341. Linger.l_linger = 0;
  342. err = setsockopt(m_DnsSocket, SOL_SOCKET, SO_LINGER, (const char FAR *)&Linger, sizeof(Linger));
  343. }
  344. else
  345. {
  346. m_DnsSocket = Dns_CreateSocket(SOCK_DGRAM);
  347. }
  348. if ( m_DnsSocket == INVALID_SOCKET )
  349. {
  350. err = WSAGetLastError();
  351. if ( !err )
  352. {
  353. err = WSAENOTSOCK;
  354. }
  355. ErrorTrace((LPARAM) this, "Received error %d opening a socket to DNS server", err);
  356. return( err );
  357. }
  358. m_RemoteAddress.sin_family = AF_INET;
  359. m_RemoteAddress.sin_port = DNS_PORT_NET_ORDER;
  360. //
  361. // Passing in fThrottle enables functionality in CTcpRegIpList to limit the
  362. // number of connections to servers on PROBATION (see ResetTimeoutServers...).
  363. // Throttling is disabled if Failover is disabled, because the tracking for
  364. // throttling is protocol (TCP/UDP) specific.
  365. //
  366. fThrottle = !FailoverDisabled();
  367. //
  368. // Get a working DNS server from the set of servers for this machine and
  369. // connect to it. The CTcpRegIpList has logic to keep track of the state
  370. // of DNS servers (UP or DOWN) and logic to retry DOWN DNS servers.
  371. //
  372. dwErrServList = GetDnsList()->GetWorkingServerIp(&m_dwIpServer, fThrottle);
  373. while(ERROR_SUCCESS == dwErrServList)
  374. {
  375. DNS_PRINTF_DBG("Connecting to DNS server %s over %s.\n",
  376. inet_ntoa(*((in_addr *)(&m_dwIpServer))), IsUdp() ? "UDP/IP" : "TCP/IP");
  377. m_RemoteAddress.sin_addr.s_addr = m_dwIpServer;
  378. err = connect(m_DnsSocket, (struct sockaddr *) &m_RemoteAddress, sizeof(SOCKADDR_IN));
  379. if ( !err )
  380. {
  381. DNS_PRINTF_MSG("Connected to DNS %s over %s.\n",
  382. inet_ntoa(*((in_addr *)(&m_dwIpServer))), IsUdp() ? "UDP/IP" : "TCP/IP");
  383. break;
  384. }
  385. else
  386. {
  387. DNS_PRINTF_ERR("Failed WinSock connect() to %s over %s, Winsock err - %d.\n",
  388. inet_ntoa(*((in_addr *)(&m_dwIpServer))), IsUdp() ? "UDP/IP" : "TCP/IP",
  389. WSAGetLastError());
  390. if(FailoverDisabled())
  391. break;
  392. GetDnsList()->MarkDown(m_dwIpServer, err, IsUdp());
  393. dwErrServList = GetDnsList()->GetWorkingServerIp(&m_dwIpServer, fThrottle);
  394. continue;
  395. }
  396. }
  397. if(!FailoverDisabled() &&
  398. (DNS_ERROR_NO_DNS_SERVERS == dwErrServList || ERROR_RETRY == dwErrServList))
  399. {
  400. //
  401. // If no servers are UP, just try a DOWN server. We must not simply
  402. // exit and ack the queue into retry in this situation. Consider the
  403. // case where all servers are DOWN. If we rely exclusively on GetWorking-
  404. // ServerIp(), then we will never try DNS till the retry time for the
  405. // DNS servers expires. Even if the admin kicks the queues, they will
  406. // go right back into retry because GetWorkingServerIp() will fail.
  407. //
  408. // Instead, if everything is DOWN, we will try SOMETHING by calling
  409. // GetAnyServerIp().
  410. //
  411. // -- If this fails, and ProcessClient gets the error ProcessClient
  412. // will try to failover to another DNS server. For this it calls
  413. // GetWorkingServerIp() which will fail, and the connection is acked
  414. // retry. Note that ProcessClient must not use GetAnyServerIp. If it
  415. // uses this function we are in danger of continuously looping trying
  416. // to spin connections to GetAnyServerIp.
  417. //
  418. // -- If the connection should fail in the connect below (for TCP/IP)
  419. // the failover logic is straightforward. We will simply ack the queue
  420. // to retry right away.
  421. //
  422. dwErrServList = GetDnsList()->GetAnyServerIp(&m_dwIpServer);
  423. if(DNS_ERROR_NO_DNS_SERVERS == dwErrServList)
  424. {
  425. // No configured servers error: this can happen if the serverlist
  426. // was deleted underneath us. Just fail the connection for now.
  427. DNS_PRINTF_ERR("No DNS servers available to query.\n");
  428. err = DNS_ERROR_NO_DNS_SERVERS;
  429. ErrorTrace((LPARAM) this, "No DNS servers. Error - %d", dwErrServList);
  430. return err;
  431. }
  432. m_RemoteAddress.sin_addr.s_addr = m_dwIpServer;
  433. err = connect(m_DnsSocket, (struct sockaddr *) &m_RemoteAddress, sizeof(SOCKADDR_IN));
  434. }
  435. _ASSERT(ERROR_SUCCESS == dwErrServList);
  436. //
  437. // We have a connection to DNS
  438. //
  439. if(ERROR_SUCCESS == err)
  440. {
  441. // Re-associate the handle to the ATQ
  442. // Call ATQ to associate the handle
  443. if (!AtqAddAsyncHandle(
  444. &m_pAtqContext,
  445. NULL,
  446. (LPVOID) this,
  447. DnsCompletion,
  448. 30, // ATQ_TIMEOUT_INTERVAL
  449. (HANDLE) m_DnsSocket))
  450. {
  451. return GetLastError();
  452. }
  453. //
  454. // send desired packet
  455. //
  456. err = Dns_Send();
  457. }
  458. else
  459. {
  460. DNS_PRINTF_DBG("Unable to open a connection to a DNS server.\n");
  461. if(m_DnsSocket != INVALID_SOCKET)
  462. {
  463. closesocket(m_DnsSocket);
  464. m_DnsSocket = INVALID_SOCKET;
  465. }
  466. }
  467. return( (DNS_STATUS)err );
  468. } // Dns_OpenTcpConnectionAndSend
  469. BOOL CAsyncDns::ProcessReadIO(IN DWORD InputBufferLen,
  470. IN DWORD dwCompletionStatus,
  471. IN OUT OVERLAPPED * lpo)
  472. {
  473. BOOL fRet = TRUE;
  474. DWORD DataSize = 0;
  475. DNS_STATUS DnsStatus = 0;
  476. PDNS_RECORD pRecordList = NULL;
  477. WORD wMessageLength = 0;
  478. TraceFunctEnterEx((LPARAM) this, "BOOL CAsyncDns::ProcessReadIO");
  479. //add up the number of bytes we received thus far
  480. m_cbReceived += InputBufferLen;
  481. //
  482. // read atleast 2 bytes
  483. //
  484. if(!m_fUdp && m_FirstRead && ( m_cbReceived < 2 ) )
  485. {
  486. fRet = ReadFile(&m_pMsgRecvBuf[m_cbReceived],DNS_TCP_DEFAULT_PACKET_LENGTH-1 );
  487. return fRet;
  488. }
  489. //
  490. // get the size of the message
  491. //
  492. if(!m_fUdp && m_FirstRead && (m_cbReceived >= 2))
  493. {
  494. DataSize = ntohs(*(u_short *)m_pMsgRecvBuf);
  495. //
  496. // add 2 bytes for the field which specifies the length of data
  497. //
  498. m_BytesToRead = DataSize + 2;
  499. m_FirstRead = FALSE;
  500. }
  501. //
  502. // pend another read if we have n't read enough
  503. //
  504. if(!m_fUdp && (m_cbReceived < m_BytesToRead))
  505. {
  506. DWORD cbMoreToRead = m_BytesToRead - m_cbReceived;
  507. if(m_cbReceived + m_BytesToRead >= DNS_TCP_DEFAULT_PACKET_LENGTH)
  508. {
  509. ErrorTrace((LPARAM)this,
  510. "Size field in DNS packet is corrupt - %08x: ",
  511. DataSize);
  512. DNS_PRINTF_ERR("Reply packet from DNS server is corrupt.\n");
  513. TraceFunctLeaveEx((LPARAM)this);
  514. return FALSE;
  515. }
  516. fRet = ReadFile(&m_pMsgRecvBuf[m_cbReceived], cbMoreToRead);
  517. }
  518. else
  519. {
  520. if( !m_fUdp )
  521. {
  522. //
  523. // message length is 2 bytes less to take care of the msg length
  524. // field.
  525. //
  526. //m_pMsgRecv->MessageLength = (WORD) m_cbReceived - 2;
  527. m_pMsgRecv = (PDNS_MESSAGE_BUFFER)(m_pMsgRecvBuf+2);
  528. }
  529. else
  530. {
  531. //m_pMsgRecv->MessageLength = (WORD) m_cbReceived;
  532. m_pMsgRecv = (PDNS_MESSAGE_BUFFER)m_pMsgRecvBuf;
  533. }
  534. SWAP_COUNT_BYTES(&m_pMsgRecv->MessageHead);
  535. //
  536. // We queried over UDP and the reply from DNS was truncated because the response
  537. // was longer than the UDP packet size. We requery DNS using TCP unless SMTP is
  538. // configured to use UDP only. RetryAsyncDnsQuery sets the members of this CAsyncDns
  539. // object appropriately depending on whether if fails or succeeds. After calling
  540. // RetryAsyncDnsQuery, this object must be deleted.
  541. //
  542. if(IsUdp() && !(m_dwFlags & DNS_FLAGS_UDP_ONLY) && m_pMsgRecv->MessageHead.Truncation)
  543. {
  544. //
  545. // Abort if we queried on TCP and got a truncated response. This is an illegal
  546. // response from DNS. If we don't abort we could loop forever.
  547. //
  548. if(m_dwFlags & DNS_FLAGS_TCP_ONLY)
  549. {
  550. DNS_PRINTF_ERR("Unexpected response. Reply packet had "
  551. "truncation bit set, though query was over TCP/IP.\n");
  552. _ASSERT(0 && "Shouldn't have truncated reply over TCP");
  553. return FALSE;
  554. }
  555. DNS_PRINTF_MSG("Truncated UDP response. Retrying query over TCP.\n");
  556. DebugTrace((LPARAM) this, "Truncated reply - reissuing query using TCP");
  557. RetryAsyncDnsQuery(FALSE); // FALSE == Do not use UDP
  558. return FALSE;
  559. }
  560. wMessageLength = (WORD)( m_fUdp ? ( m_cbReceived ) : ( m_cbReceived - 2 ));
  561. DnsStatus = DnsExtractRecordsFromMessage_UTF8(m_pMsgRecv,
  562. wMessageLength, &pRecordList);
  563. DNS_LOG_RESPONSE(DnsStatus, pRecordList, (PBYTE)m_pMsgRecv, wMessageLength);
  564. DnsProcessReply(DnsStatus, pRecordList);
  565. DnsRecordListFree(pRecordList, TRUE);
  566. }
  567. TraceFunctLeaveEx((LPARAM) this);
  568. return fRet;
  569. }
  570. BOOL CAsyncDns::ProcessClient (IN DWORD InputBufferLen,
  571. IN DWORD dwCompletionStatus,
  572. IN OUT OVERLAPPED * lpo)
  573. {
  574. BOOL RetStatus = FALSE;
  575. DWORD dwDnsTransportError = ERROR_SUCCESS;
  576. TraceFunctEnterEx((LPARAM) this, "CAsyncDns::ProcessClient()");
  577. IncThreadCount();
  578. //if lpo == NULL, then we timed out. Send an appropriate message
  579. //then close the connection
  580. if( (lpo == NULL) && (dwCompletionStatus == ERROR_SEM_TIMEOUT))
  581. {
  582. dwDnsTransportError = ERROR_SEM_TIMEOUT;
  583. //
  584. // fake a pending IO as we'll dec the overall count in the
  585. // exit processing of this routine needs to happen before
  586. // DisconnectClient else completing threads could tear us down
  587. //
  588. IncPendingIoCount();
  589. DNS_PRINTF_ERR("Timeout waiting for DNS server response.\n");
  590. DebugTrace( (LPARAM)this, "Async DNS client timed out");
  591. DisconnectClient();
  592. }
  593. else if((InputBufferLen == 0) || (dwCompletionStatus != NO_ERROR))
  594. {
  595. dwDnsTransportError = ERROR_RETRY;
  596. DebugTrace((LPARAM) this,
  597. "CAsyncDns::ProcessClient: InputBufferLen = %d dwCompletionStatus = %d"
  598. " - Closing connection", InputBufferLen, dwCompletionStatus);
  599. DNS_PRINTF_ERR("Connection dropped by DNS server - Win32 error %d.\n",
  600. dwCompletionStatus);
  601. DisconnectClient();
  602. }
  603. else if (lpo == (OVERLAPPED *) &m_ReadOverlapped)
  604. {
  605. if(m_DnsSocket == INVALID_SOCKET && InputBufferLen > 0)
  606. {
  607. //
  608. // This is to firewall against an ATQ bug where we callback with an
  609. // nonzero InputBufferLen after the ATQ disconnect. We shouldn't be
  610. // doing further processing after this point.
  611. //
  612. ErrorTrace((LPARAM)this, "Connection already closed, callback should not occur");
  613. }
  614. else
  615. {
  616. //A client based async IO completed
  617. DNS_PRINTF_DBG("Response received from DNS server.\n");
  618. RetStatus = ProcessReadIO(InputBufferLen, dwCompletionStatus, lpo);
  619. if(!FailoverDisabled())
  620. GetDnsList()->ResetServerOnConnect(m_RemoteAddress.sin_addr.s_addr);
  621. }
  622. }
  623. else if(lpo == (OVERLAPPED *) &m_WriteOverlapped)
  624. {
  625. RetStatus = ReadFile(m_pMsgRecvBuf, DNS_TCP_DEFAULT_PACKET_LENGTH);
  626. if(!RetStatus)
  627. {
  628. DNS_PRINTF_ERR("Network error on connection to DNS server.\n");
  629. ErrorTrace((LPARAM) this, "ReadFile failed");
  630. dwDnsTransportError = ERROR_RETRY;
  631. }
  632. }
  633. DebugTrace((LPARAM)this,"ASYNC DNS - Pending IOs: %d", m_cPendingIoCount);
  634. // Do NOT Touch the member variables past this POINT!
  635. // This object may be deleted!
  636. //
  637. // decrement the overall pending IO count for this session
  638. // tracing and ASSERTs if we're going down.
  639. //
  640. DecThreadCount();
  641. if (DecPendingIoCount() == 0)
  642. {
  643. DisconnectClient();
  644. DebugTrace((LPARAM)this,"ASYNC DNS - Pending IOs: %d", m_cPendingIoCount);
  645. DebugTrace((LPARAM)this,"ASYNC DNS - Thread count: %d", m_cThreadCount);
  646. if(ERROR_SUCCESS != dwDnsTransportError && !FailoverDisabled())
  647. {
  648. GetDnsList()->MarkDown(QueryDnsServer(), dwDnsTransportError, IsUdp());
  649. RetryAsyncDnsQuery(IsUdp());
  650. }
  651. delete this;
  652. }
  653. return TRUE;
  654. }
  655. DNS_STATUS
  656. CAsyncDns::DnsSendRecord()
  657. /*++
  658. Routine Description:
  659. Send message, receive response.
  660. Arguments:
  661. aipDnsServers -- specific DNS servers to query;
  662. OPTIONAL, if specified overrides normal list associated with machine
  663. Return Value:
  664. ERROR_SUCCESS if successful.
  665. Error code on failure.
  666. --*/
  667. {
  668. DNS_STATUS status = 0;
  669. m_pMsgRecvBuf = (BYTE*) new BYTE[DNS_TCP_DEFAULT_PACKET_LENGTH];
  670. if(m_pMsgRecvBuf == NULL)
  671. {
  672. return( DNS_ERROR_NO_MEMORY );
  673. }
  674. status = Dns_OpenTcpConnectionAndSend();
  675. return( status );
  676. }
  677. SOCKET
  678. CAsyncDns::Dns_CreateSocket(
  679. IN INT SockType
  680. )
  681. /*++
  682. Routine Description:
  683. Create socket.
  684. Arguments:
  685. SockType -- SOCK_DGRAM or SOCK_STREAM
  686. Return Value:
  687. socket if successful.
  688. Otherwise INVALID_SOCKET.
  689. --*/
  690. {
  691. SOCKET s;
  692. //
  693. // create socket
  694. //
  695. s = socket( AF_INET, SockType, 0 );
  696. if ( s == INVALID_SOCKET )
  697. {
  698. return INVALID_SOCKET;
  699. }
  700. return s;
  701. }
  702. //-----------------------------------------------------------------------------
  703. // Description:
  704. // Constructor and Destructor for class to maintain a list of IP addresses
  705. // (for DNS servers) and their state (UP or DOWN). The IP addresses are
  706. // held in an IP_ARRAY.
  707. //-----------------------------------------------------------------------------
  708. CDnsServerList::CDnsServerList()
  709. {
  710. m_IpListPtr = NULL;
  711. //
  712. // Shortcut to quickly figure out how many servers are down. This keeps track
  713. // of how many servers are marked up currently. Used in ResetServersIfNeeded
  714. // primarily to avoid checking the state of all servers in the usual case when
  715. // all servers are up.
  716. //
  717. m_cUpServers = 0;
  718. m_prgdwFailureTick = NULL;
  719. m_prgServerState = NULL;
  720. m_prgdwFailureCount = NULL;
  721. m_prgdwConnections = NULL;
  722. m_dwSig = TCP_REG_LIST_SIGNATURE;
  723. }
  724. CDnsServerList::~CDnsServerList()
  725. {
  726. if(m_IpListPtr)
  727. delete [] m_IpListPtr;
  728. if(m_prgdwFailureTick)
  729. delete [] m_prgdwFailureTick;
  730. if(m_prgServerState)
  731. delete [] m_prgServerState;
  732. if(m_prgdwFailureCount)
  733. delete [] m_prgdwFailureCount;
  734. if(m_prgdwConnections)
  735. delete [] m_prgdwConnections;
  736. m_IpListPtr = NULL;
  737. m_prgdwFailureTick = NULL;
  738. m_prgServerState = NULL;
  739. m_prgdwFailureCount = NULL;
  740. m_prgdwConnections = NULL;
  741. }
  742. //-----------------------------------------------------------------------------
  743. // Description:
  744. // Copies the the IP address list to m_IpListPtr by allocating a new block
  745. // of memory. If this fails due to out of memory, there's little we can do
  746. // so we just NULL out the server list and return FALSE indicating error.
  747. //
  748. // Arguments:
  749. // IpPtr - Ptr to IP_ARRAY of servers, this can be NULL in which case
  750. // we assume that there are no servers. On shutdown, the SMTP code
  751. // calls this with NULL.
  752. //
  753. // This argument is copied.
  754. //
  755. // Returns:
  756. // TRUE if the update succeeded.
  757. // FALSE if it failed.
  758. //-----------------------------------------------------------------------------
  759. BOOL CDnsServerList::Update(PIP_ARRAY IpPtr)
  760. {
  761. BOOL fFatalError = FALSE;
  762. BOOL fRet = FALSE;
  763. DWORD cbIpArraySize = 0;
  764. TraceFunctEnterEx((LPARAM) this, "CDnsServerList::Update");
  765. m_sl.ExclusiveLock();
  766. if(m_IpListPtr) {
  767. delete [] m_IpListPtr;
  768. m_IpListPtr = NULL;
  769. }
  770. if(m_prgdwFailureTick) {
  771. delete [] m_prgdwFailureTick;
  772. m_prgdwFailureTick = NULL;
  773. }
  774. if(m_prgServerState) {
  775. delete [] m_prgServerState;
  776. m_prgServerState = NULL;
  777. }
  778. if(m_prgdwConnections) {
  779. delete [] m_prgdwConnections;
  780. m_prgdwConnections = NULL;
  781. }
  782. // Note: IpPtr can be NULL
  783. if(IpPtr == NULL) {
  784. m_IpListPtr = NULL;
  785. m_cUpServers = 0;
  786. goto Exit;
  787. }
  788. // Copy the IpPtr
  789. cbIpArraySize = sizeof(IP_ARRAY) +
  790. sizeof(IP_ADDRESS) * (IpPtr->cAddrCount - 1);
  791. m_IpListPtr = (PIP_ARRAY)(new BYTE[cbIpArraySize]);
  792. if(!m_IpListPtr) {
  793. fFatalError = TRUE;
  794. goto Exit;
  795. }
  796. CopyMemory(m_IpListPtr, IpPtr, cbIpArraySize);
  797. m_cUpServers = IpPtr->cAddrCount;
  798. m_prgdwFailureTick = new DWORD[m_cUpServers];
  799. m_prgServerState = new SERVER_STATE[m_cUpServers];
  800. m_prgdwFailureCount = new DWORD[m_cUpServers];
  801. m_prgdwConnections = new DWORD[m_cUpServers];
  802. if(!m_prgdwFailureTick ||
  803. !m_prgServerState ||
  804. !m_prgdwFailureCount ||
  805. !m_prgdwConnections)
  806. {
  807. ErrorTrace((LPARAM) this, "Out of memory initializing DNS server list");
  808. fFatalError = TRUE;
  809. goto Exit;
  810. }
  811. for(int i = 0; i < m_cUpServers; i++) {
  812. m_prgdwFailureTick[i] = 0;
  813. m_prgServerState[i] = DNS_STATE_UP;
  814. m_prgdwFailureCount[i] = 0;
  815. m_prgdwConnections[i] = 0;
  816. }
  817. fRet = TRUE;
  818. Exit:
  819. if(fFatalError) {
  820. if(m_prgServerState) {
  821. delete [] m_prgServerState;
  822. m_prgServerState = NULL;
  823. }
  824. if(m_prgdwFailureTick) {
  825. delete [] m_prgdwFailureTick;
  826. m_prgdwFailureTick = NULL;
  827. }
  828. if(m_IpListPtr) {
  829. delete [] m_IpListPtr;
  830. m_IpListPtr = NULL;
  831. }
  832. if(m_prgdwFailureCount) {
  833. delete [] m_prgdwFailureCount;
  834. m_prgdwFailureCount = NULL;
  835. }
  836. if(m_prgdwConnections) {
  837. delete [] m_prgdwConnections;
  838. m_prgdwConnections = NULL;
  839. }
  840. m_cUpServers = 0;
  841. }
  842. m_sl.ExclusiveUnlock();
  843. TraceFunctLeaveEx((LPARAM) this);
  844. return fRet;
  845. }
  846. //-----------------------------------------------------------------------------
  847. // Description:
  848. // Checks to see if the DNS serverlist has changed, and calls update only
  849. // if it has. This allows us to preserve the failure-counts and state
  850. // information if the serverlist has not changed.
  851. // Arguments:
  852. // IN PIP_ARRAY pipServers - (Possibly) new server-list
  853. // Returns:
  854. // TRUE if UpdateIfChanged was successful (does NOT indicate if list was
  855. // changed.
  856. // FALSE if we hit a failure during the update.
  857. //-----------------------------------------------------------------------------
  858. BOOL CDnsServerList::UpdateIfChanged(
  859. PIP_ARRAY pipServers)
  860. {
  861. BOOL fUpdate = FALSE;
  862. BOOL fRet = TRUE;
  863. TraceFunctEnterEx((LPARAM) this, "CDnsServerList::UpdateIfChanged");
  864. m_sl.ShareLock();
  865. if(!m_IpListPtr && !pipServers) {
  866. // Both NULL, no update needed
  867. fUpdate = FALSE;
  868. } else if(!m_IpListPtr || !pipServers) {
  869. // If one is NULL but not the other, the update is needed
  870. fUpdate = TRUE;
  871. } else {
  872. // Both are non-NULL
  873. if(m_IpListPtr->cAddrCount != pipServers->cAddrCount) {
  874. // First check if the server count is different
  875. fUpdate = TRUE;
  876. } else {
  877. // If the servercount is identical, we can do a memcmp of the serverlist
  878. fUpdate = !!memcmp(m_IpListPtr->aipAddrs, pipServers->aipAddrs,
  879. sizeof(IP_ADDRESS) * m_IpListPtr->cAddrCount);
  880. }
  881. }
  882. m_sl.ShareUnlock();
  883. if(fUpdate) {
  884. DebugTrace((LPARAM)this, "Updating serverlist");
  885. TraceFunctLeaveEx((LPARAM)this);
  886. return Update(pipServers);
  887. }
  888. TraceFunctLeaveEx((LPARAM)this);
  889. return TRUE;
  890. }
  891. //-----------------------------------------------------------------------------
  892. // Description:
  893. // Creates a copy of m_IpListPtr and returns it to the caller. Note that
  894. // we cannot simply return m_IpListPtr, since that could change, so we
  895. // must return a copy of the list.
  896. // Arguments:
  897. // OUT PIP_ARRAY *ppipArray - The allocated copy is returned through this
  898. // Returns;
  899. // TRUE if a copy could be made successfully
  900. // FALSE if an error occurred (out of memory allocating copy).
  901. // Notes:
  902. // Caller must de-allocate copy by calling delete (MSVCRT heap).
  903. //-----------------------------------------------------------------------------
  904. BOOL CDnsServerList::CopyList(
  905. PIP_ARRAY *ppipArray)
  906. {
  907. BOOL fRet = FALSE;
  908. ULONG cbArraySize = 0;
  909. TraceFunctEnterEx((LPARAM)this, "CDnsServerList::CopyList");
  910. *ppipArray = NULL;
  911. m_sl.ShareLock();
  912. if(!m_IpListPtr || m_IpListPtr->cAddrCount == 0) {
  913. fRet = FALSE;
  914. goto Exit;
  915. }
  916. cbArraySize =
  917. sizeof(IP_ARRAY) +
  918. sizeof(IP_ADDRESS) * (m_IpListPtr->cAddrCount - 1);
  919. *ppipArray = (PIP_ARRAY) new BYTE[cbArraySize];
  920. if(!*ppipArray) {
  921. fRet = FALSE;
  922. goto Exit;
  923. }
  924. CopyMemory(*ppipArray, m_IpListPtr, cbArraySize);
  925. fRet = TRUE;
  926. Exit:
  927. m_sl.ShareUnlock();
  928. TraceFunctLeaveEx((LPARAM)this);
  929. return fRet;
  930. }
  931. //-----------------------------------------------------------------------------
  932. // Description:
  933. // Return the IP address of a server known to be UP. This function also
  934. // checks to see if any servers currently marked DOWN should be reset to
  935. // the UP state again (based on a retry interval).
  936. // Arguments:
  937. // DWORD *pdwIpServer - Sets the DWORD pointed to, to the IP address of
  938. // a server in the UP state.
  939. // BOOL fThrottle - Connections to a failing server are restricted. We do
  940. // not want to spin off hundreds of async DNS queries to a server
  941. // that may actually be unreachable or down. If a server is
  942. // suspiciously non-responsive, we will want to spin off a limited
  943. // number of connections to it. If all of them fail we will mark the
  944. // connection as DOWN, and if one of them succeeds, we will mark the
  945. // server UP. The number of connections to a server is throttled if
  946. // it is in the DNS_STATUS_PROBATION state. ResetTimeoutServers...
  947. // sets this state.
  948. // Returns:
  949. // ERROR_SUCCESS - If a DNS server in the UP state was found
  950. // ERROR_RETRY - If all DNS servers are currently DOWN or in PROBATION
  951. // and the MAX number of allowed connections for PROBATION servers
  952. // has been reached.
  953. // DNS_ERROR_NO_DNS_SERVERS - If no DNS servers are configured
  954. //-----------------------------------------------------------------------------
  955. DWORD CDnsServerList::GetWorkingServerIp(DWORD *pdwIpServer, BOOL fThrottle)
  956. {
  957. DWORD dwErr = ERROR_RETRY;
  958. int iServer = 0;
  959. _ASSERT(pdwIpServer != NULL);
  960. *pdwIpServer = INADDR_NONE;
  961. // Check if any servers were down and bring them up if the timeout has expired
  962. ResetTimeoutServersIfNeeded();
  963. m_sl.ShareLock();
  964. if(m_IpListPtr == NULL || m_IpListPtr->cAddrCount == 0) {
  965. dwErr = DNS_ERROR_NO_DNS_SERVERS;
  966. goto Exit;
  967. }
  968. if(m_cUpServers == 0) {
  969. dwErr = ERROR_RETRY;
  970. goto Exit;
  971. }
  972. for(iServer = 0; iServer < (int)m_IpListPtr->cAddrCount; iServer++) {
  973. if(m_prgServerState[iServer] != DNS_STATE_DOWN) {
  974. if(fThrottle && !AllowConnection(iServer))
  975. continue;
  976. dwErr = ERROR_SUCCESS;
  977. *pdwIpServer = m_IpListPtr->aipAddrs[iServer];
  978. break;
  979. }
  980. }
  981. Exit:
  982. m_sl.ShareUnlock();
  983. return dwErr;
  984. }
  985. //-----------------------------------------------------------------------------
  986. // Description:
  987. // Marks a server in the list as down and sets the next retry time for
  988. // that server. The next retry time is calculated modulo MAX_TICK_COUNT.
  989. // Arguments:
  990. // dwIp -- IP address of server to mark as DOWN
  991. // dwErr -- Error from DNS or network
  992. // fUdp -- TRUE if protocol used was UDP, FALSE if TCP
  993. //-----------------------------------------------------------------------------
  994. void CDnsServerList::MarkDown(
  995. DWORD dwIp,
  996. DWORD dwErr,
  997. BOOL fUdp)
  998. {
  999. int iServer = 0;
  1000. DWORD cUpServers = 0;
  1001. //
  1002. // Set to TRUE only when a server is actually marked DOWN. For instance,
  1003. // we've failed < ErrorsBeforeFailover() times, there's no need to
  1004. // log an event in MarkDown.
  1005. //
  1006. BOOL fLogEvent = FALSE;
  1007. TraceFunctEnterEx((LPARAM) this, "CDnsServerList::MarkDown");
  1008. m_sl.ExclusiveLock();
  1009. DNS_PRINTF_DBG("Marking DNS server %s as down.\n",
  1010. inet_ntoa(*((in_addr *)(&dwIp))));
  1011. if(m_IpListPtr == NULL || m_IpListPtr->cAddrCount == 0 || m_cUpServers == 0)
  1012. goto Exit;
  1013. // Find the server to mark as down among all the UP servers
  1014. for(iServer = 0; iServer < (int)m_IpListPtr->cAddrCount; iServer++) {
  1015. if(m_IpListPtr->aipAddrs[iServer] == dwIp)
  1016. break;
  1017. }
  1018. if(iServer >= (int)m_IpListPtr->cAddrCount ||
  1019. m_prgServerState[iServer] == DNS_STATE_DOWN)
  1020. goto Exit;
  1021. //
  1022. // A DNS server is not marked down till it has failed a number of times
  1023. // consecutively. This protects against occasional errors from DNS servers
  1024. // which can occur under heavy load. Even if 0.5% of connections have
  1025. // errors from DNS - on a heavily stressed server, with say 100 DNS queries
  1026. // per minute, we would end up with a server going down every 2 mins.
  1027. //
  1028. m_prgdwFailureCount[iServer]++;
  1029. if(m_prgdwConnections[iServer] > 0)
  1030. m_prgdwConnections[iServer]--;
  1031. if(m_prgdwFailureCount[iServer] < ErrorsBeforeFailover()) {
  1032. ErrorTrace((LPARAM) this,
  1033. "%d consecutive errors connecting to server %08x, error=%d",
  1034. m_prgdwFailureCount[iServer], dwIp, dwErr);
  1035. goto Exit;
  1036. }
  1037. // Mark server down
  1038. m_prgServerState[iServer] = DNS_STATE_DOWN;
  1039. m_prgdwConnections[iServer] = 0;
  1040. _ASSERT(m_cUpServers > 0);
  1041. m_cUpServers--;
  1042. m_prgdwFailureTick[iServer] = GetTickCount();
  1043. fLogEvent = TRUE;
  1044. Exit:
  1045. cUpServers = m_cUpServers;
  1046. m_sl.ExclusiveUnlock();
  1047. // Log events outside the ExclusiveLock()
  1048. if(fLogEvent)
  1049. LogServerDown(dwIp, fUdp, dwErr, cUpServers);
  1050. TraceFunctLeaveEx((LPARAM) this);
  1051. return;
  1052. }
  1053. //-----------------------------------------------------------------------------
  1054. // Description:
  1055. // If a server has been failing, we keep track of the number of
  1056. // consecutive failures in m_prgdwFailureCount. This function is called
  1057. // when we successfully connect to the server and we want to reset the
  1058. // failure count.
  1059. // Arguments:
  1060. // dwIp - IP Address of server to reset failure count for
  1061. // Note:
  1062. // This function is called for every successful query so it needs to be
  1063. // kept simple and quick especially in the usual case - when there is no
  1064. // Reset to be done.
  1065. //-----------------------------------------------------------------------------
  1066. void CDnsServerList::ResetServerOnConnect(DWORD dwIp)
  1067. {
  1068. int iServer = 0;
  1069. BOOL fShareLock = TRUE;
  1070. TraceFunctEnterEx((LPARAM) this, "CDnsServerList::ResetServerOnConnect");
  1071. m_sl.ShareLock();
  1072. if(!m_IpListPtr || m_IpListPtr->cAddrCount == 0)
  1073. goto Exit;
  1074. // Find the server to reset
  1075. for(iServer = 0;
  1076. iServer < (int)m_IpListPtr->cAddrCount &&
  1077. dwIp != m_IpListPtr->aipAddrs[iServer];
  1078. iServer++);
  1079. if(iServer >= (int)m_IpListPtr->cAddrCount)
  1080. goto Exit;
  1081. // Nothing to do if the specified server is UP and has a zero failure count
  1082. if(!m_prgdwFailureCount[iServer] && m_prgServerState[iServer] == DNS_STATE_UP)
  1083. goto Exit;
  1084. m_sl.ShareUnlock();
  1085. m_sl.ExclusiveLock();
  1086. fShareLock = FALSE;
  1087. // Re-verify that we still have something to do after ShareUnlock->ExclusiveLock
  1088. if(!m_prgdwFailureCount[iServer] && m_prgServerState[iServer] == DNS_STATE_UP)
  1089. goto Exit;
  1090. DebugTrace((LPARAM) this,
  1091. "Resetting server %08x, State=%d, Failure count=%d, Connection count=%d",
  1092. dwIp, m_prgServerState[iServer], m_prgdwFailureCount[iServer],
  1093. m_prgdwConnections[iServer]);
  1094. // If server was in the state DOWN/PROBATION, bring it UP
  1095. if(m_prgServerState[iServer] != DNS_STATE_UP) {
  1096. // Servers on PROBATION are already UP, so no need to inc UpServers
  1097. if(m_prgServerState[iServer] == DNS_STATE_DOWN)
  1098. m_cUpServers++;
  1099. m_prgServerState[iServer] = DNS_STATE_UP;
  1100. m_prgdwFailureTick[iServer] = 0;
  1101. _ASSERT(m_cUpServers <= (int)m_IpListPtr->cAddrCount);
  1102. }
  1103. // Clear all failures
  1104. m_prgdwFailureCount[iServer] = 0;
  1105. m_prgdwConnections[iServer] = 0;
  1106. Exit:
  1107. if(fShareLock)
  1108. m_sl.ShareUnlock();
  1109. else
  1110. m_sl.ExclusiveUnlock();
  1111. TraceFunctLeaveEx((LPARAM) this);
  1112. }
  1113. //-----------------------------------------------------------------------------
  1114. // Description:
  1115. // Checks if any servers are DOWN, and if the retry time has expired for
  1116. // those servers. If so those servers will be brought up and marked in the
  1117. // PROBATION state. We do not want to transition servers that were DOWN
  1118. // directly to UP, because we are still not sure whether or not these
  1119. // servers are really responding. While in the PROBATION state, we allow
  1120. // only a limited number of connections to a server, so as not to cause
  1121. // all remote-queues to choke up trying to connect to a possibly non-
  1122. // functional server. If one of these connections succeeds, the server
  1123. // will be marked back UP and all remote-queues will be able to use this
  1124. // server again. If all the (limited number of) connections fail, the
  1125. // server will go from the PROBATION state to DOWN again.
  1126. // Arguments:
  1127. // None.
  1128. // Returns:
  1129. // Nothing.
  1130. //-----------------------------------------------------------------------------
  1131. void CDnsServerList::ResetTimeoutServersIfNeeded()
  1132. {
  1133. int iServer = 0;
  1134. DWORD dwElapsedTicks = 0;
  1135. DWORD dwCurrentTick = 0;
  1136. //
  1137. // Quick check - if all servers are up (usual case) or there are no configured
  1138. // servers, there's nothing for us to do.
  1139. //
  1140. m_sl.ShareLock();
  1141. if(m_IpListPtr == NULL || m_IpListPtr->cAddrCount == 0 || m_cUpServers == m_IpListPtr->cAddrCount) {
  1142. m_sl.ShareUnlock();
  1143. return;
  1144. }
  1145. m_sl.ShareUnlock();
  1146. // Some servers are down... figure out which need to be brought up
  1147. m_sl.ExclusiveLock();
  1148. // Re-check that no one modified the list while we didn't have the sharelock
  1149. if(m_IpListPtr == NULL || m_IpListPtr->cAddrCount == 0 || m_cUpServers == m_IpListPtr->cAddrCount) {
  1150. m_sl.ExclusiveUnlock();
  1151. return;
  1152. }
  1153. dwCurrentTick = GetTickCount();
  1154. for(iServer = 0; iServer < (int)m_IpListPtr->cAddrCount; iServer++) {
  1155. if(m_prgServerState[iServer] != DNS_STATE_DOWN)
  1156. continue;
  1157. //
  1158. // Note: This also takes care of the special case where dwCurrentTick occurs
  1159. // after the wraparound and m_prgdwFailureTick occurs before the wraparound.
  1160. // This is because, in that case, the elapsed time is:
  1161. //
  1162. // time since wraparound + time before wraparound that failure occurred - 1
  1163. // (-1 is because it's 0 time to transition from MAX_TICK_VALUE to 0)
  1164. //
  1165. // = dwCurrentTick + (MAX_TICK_VALUE - m_prgdwFailureTick[iServer]) - 1
  1166. //
  1167. // Since MAX_TICK_VALUE == -1
  1168. //
  1169. // = dwCurrentTick + (-1 - m_prgdwFailureTick[iServer]) - 1
  1170. // = dwCurrentTick - m_prgdwFailureTick[iServer]
  1171. //
  1172. dwElapsedTicks = dwCurrentTick - m_prgdwFailureTick[iServer];
  1173. #define TICKS_TILL_RETRY 10 * 60 * 1000 // 10 minutes
  1174. if(dwElapsedTicks > TICKS_TILL_RETRY) {
  1175. m_prgServerState[iServer] = DNS_STATE_PROBATION;
  1176. m_prgdwFailureTick[iServer] = 0;
  1177. m_prgdwConnections[iServer] = 0;
  1178. m_cUpServers++;
  1179. _ASSERT(m_cUpServers <= (int)m_IpListPtr->cAddrCount);
  1180. }
  1181. }
  1182. m_sl.ExclusiveUnlock();
  1183. }