Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1507 lines
44 KiB

/*++
Copyright (c) 1996 Microsoft Corporation
Module Name:
send.c
Abstract:
Domain Name System (DNS) Library
Send response routines.
Author:
Jim Gilroy (jamesg) October, 1996
Revision History:
--*/
#include "dnsincs.h"
WORD gwTransactionId = 1;
VOID
DnsCompletion(
PVOID pvContext,
DWORD cbWritten,
DWORD dwCompletionStatus,
OVERLAPPED * lpo
)
{
BOOL WasProcessed = TRUE;
CAsyncDns *pCC = (CAsyncDns *) pvContext;
_ASSERT(pCC);
_ASSERT(pCC->IsValid());
//
// if we could not process a command, or we were
// told to destroy this object, close the connection.
//
WasProcessed = pCC->ProcessClient(cbWritten, dwCompletionStatus, lpo);
}
void DeleteDnsRec(PSMTPDNS_RECS pDnsRec)
{
DWORD Loop = 0;
PLIST_ENTRY pEntry = NULL;
PMXIPLIST_ENTRY pQEntry = NULL;
if(pDnsRec == NULL)
{
return;
}
while (pDnsRec->DnsArray[Loop] != NULL)
{
if(pDnsRec->DnsArray[Loop]->DnsName[0])
{
while(!IsListEmpty(&pDnsRec->DnsArray[Loop]->IpListHead))
{
pEntry = RemoveHeadList (&pDnsRec->DnsArray[Loop]->IpListHead);
pQEntry = CONTAINING_RECORD( pEntry, MXIPLIST_ENTRY, ListEntry);
delete pQEntry;
}
delete pDnsRec->DnsArray[Loop];
}
Loop++;
}
if(pDnsRec)
{
delete pDnsRec;
pDnsRec = NULL;
}
}
CAsyncDns::CAsyncDns(void)
{
m_signature = DNS_CONNECTION_SIGNATURE_VALID; // signature on object for sanity check
m_cPendingIoCount = 0;
m_cThreadCount = 0;
m_cbReceived = 0;
m_BytesToRead = 0;
m_dwIpServer = 0;
m_dwFlags = 0;
m_fUdp = TRUE;
m_FirstRead = TRUE;
m_pMsgRecv = NULL;
m_pMsgRecvBuf = NULL;
m_pMsgSend = NULL;
m_pMsgSendBuf = NULL;
m_cbSendBufSize = 0;
m_pAtqContext = NULL;
m_HostName [0] = '\0';
m_pTcpRegIpList = NULL;
m_fIsGlobalDnsList = FALSE;
}
CAsyncDns::~CAsyncDns(void)
{
PATQ_CONTEXT pAtqContext = NULL;
TraceFunctEnterEx((LPARAM)this, "CAsyncDns::~CAsyncDns");
//
// If we failed to connect to a DNS server, the following code attempts to
// mark that DNS server down and fire off a query to another DNS server that
// is marked UP.
//
if(m_pMsgSend)
{
delete [] m_pMsgSendBuf;
m_pMsgSend = NULL;
m_pMsgSendBuf = NULL;
}
if(m_pMsgRecv)
{
delete [] m_pMsgRecvBuf;
m_pMsgRecv = NULL;
m_pMsgRecvBuf = NULL;
}
//release the context from Atq
pAtqContext = (PATQ_CONTEXT)InterlockedExchangePointer( (PVOID *)&m_pAtqContext, NULL);
if ( pAtqContext != NULL )
{
AtqFreeContext( pAtqContext, TRUE );
}
m_signature = DNS_CONNECTION_SIGNATURE_FREE; // signature on object for sanity check
}
BOOL CAsyncDns::ReadFile(
IN LPVOID pBuffer,
IN DWORD cbSize /* = MAX_READ_BUFF_SIZE */
)
{
BOOL fRet = TRUE;
_ASSERT(pBuffer != NULL);
_ASSERT(cbSize > 0);
ZeroMemory(&m_ReadOverlapped, sizeof(m_ReadOverlapped));
m_ReadOverlapped.LastIoState = DNS_READIO;
IncPendingIoCount();
fRet = AtqReadFile(m_pAtqContext, // Atq context
pBuffer, // Buffer
cbSize, // BytesToRead
(OVERLAPPED *)&m_ReadOverlapped) ;
if(!fRet)
{
DisconnectClient();
DecPendingIoCount();
}
return fRet;
}
BOOL CAsyncDns::WriteFile(
IN LPVOID pBuffer,
IN DWORD cbSize /* = MAX_READ_BUFF_SIZE */
)
{
BOOL fRet = TRUE;
_ASSERT(pBuffer != NULL);
_ASSERT(cbSize > 0);
ZeroMemory(&m_WriteOverlapped, sizeof(m_WriteOverlapped));
m_WriteOverlapped.LastIoState = DNS_WRITEIO;
IncPendingIoCount();
fRet = AtqWriteFile(m_pAtqContext, // Atq context
pBuffer, // Buffer
cbSize, // BytesToRead
(OVERLAPPED *) &m_WriteOverlapped) ;
if(!fRet)
{
DisconnectClient();
DecPendingIoCount();
}
return fRet;
}
DNS_STATUS
CAsyncDns::SendPacket(void)
{
return 0;
}
//
// Public send routines
//
DNS_STATUS
CAsyncDns::Dns_Send(
)
/*++
Routine Description:
Send a DNS packet.
This is the generic send routine used for ANY send of a DNS message.
It assumes nothing about the message type, but does assume:
- pCurrent points at byte following end of desired data
- RR count bytes are in HOST byte order
Arguments:
pMsg - message info for message to send
Return Value:
TRUE if successful.
FALSE on send error.
--*/
{
INT err = 0;
BOOL fRet = TRUE;
TraceFunctEnterEx((LPARAM) this, "CAsyncDns::Dns_Send");
DebugTrace((LPARAM) this, "Sending DNS request for %s", m_HostName);
fRet = WriteFile(m_pMsgSendBuf, (DWORD) m_cbSendBufSize);
if(!fRet)
{
err = GetLastError();
}
return( (DNS_STATUS)err );
} // Dns_Send
//-----------------------------------------------------------------------------------
// Description:
// Kicks off an async query to DNS.
//
// Arguments:
// IN pszQuestionName - Name to query for.
//
// IN wQuestionType - Record type to query for.
//
// IN dwFlags - DNS configuration flags for SMTP. Currently these dictate
// what transport is used to talk to DNS (TCP/UDP). They are:
//
// DNS_FLAGS_NONE - Use UDP initially. If that fails, or if the
// reply is truncated requery using TCP.
//
// DNS_FLAGS_TCP_ONLY - Use TCP only.
//
// DNS_FLAGS_UDP_ONLY - Use UDP only.
//
// IN MyFQDN - FQDN of this machine (for MX record sorting)
//
// IN fUdp - Should UDP or TCP be used for this query? When dwFlags is
// DNS_FLAGS_NONE the initial query is UDP, and the retry query, if the
// response was truncated, is TCP. Depending on whether we're retrying
// this flag should be set appropriately by the caller.
//
// Returns:
// ERROR_SUCCESS if an async query was pended
// Win32 error if an error occurred and an async query was not pended. All
// errors from this function are retryable (as opposed NDR'ing the message)
// so the message is re-queued if an error occurred.
//-----------------------------------------------------------------------------------
DNS_STATUS
CAsyncDns::Dns_QueryLib(
IN DNS_NAME pszQuestionName,
IN WORD wQuestionType,
IN DWORD dwFlags,
IN BOOL fUdp,
IN CDnsServerList *pTcpRegIpList,
IN BOOL fIsGlobalDnsList)
{
DNS_STATUS status = ERROR_NOT_ENOUGH_MEMORY;
TraceFunctEnterEx((LPARAM) this, "CAsyncDns::Dns_QueryLib");
_ASSERT(pTcpRegIpList);
DNS_LOG_ASYNC_QUERY(
pszQuestionName,
wQuestionType,
dwFlags,
fUdp,
pTcpRegIpList);
m_dwFlags = dwFlags;
m_fUdp = fUdp;
m_pTcpRegIpList = pTcpRegIpList;
m_fIsGlobalDnsList = fIsGlobalDnsList;
lstrcpyn(m_HostName, pszQuestionName, sizeof(m_HostName));
//
// build send packet
//
m_pMsgSendBuf = new BYTE[DNS_TCP_DEFAULT_PACKET_LENGTH ];
if( NULL == m_pMsgSendBuf )
{
TraceFunctLeaveEx((LPARAM) this);
return (DNS_STATUS) ERROR_NOT_ENOUGH_MEMORY;
}
DWORD dwBufSize = DNS_TCP_DEFAULT_PACKET_LENGTH ;
if( !m_fUdp )
{
m_pMsgSend = (PDNS_MESSAGE_BUFFER)(m_pMsgSendBuf+2);
dwBufSize -= 2;
}
else
{
m_pMsgSend = (PDNS_MESSAGE_BUFFER)(m_pMsgSendBuf);
}
if( !DnsWriteQuestionToBuffer_UTF8 ( m_pMsgSend,
&dwBufSize,
pszQuestionName,
wQuestionType,
gwTransactionId++,
!( dwFlags & DNS_QUERY_NO_RECURSION ) ) )
{
DNS_PRINTF_ERR("Unable to create query message.\n");
ErrorTrace((LPARAM) this, "Unable to create DNS query for %s", pszQuestionName);
TraceFunctLeaveEx((LPARAM) this);
return ERROR_NOT_ENOUGH_MEMORY;
}
m_cbSendBufSize = (WORD) dwBufSize;
if( !m_fUdp )
{
*((u_short*)m_pMsgSendBuf) = htons((WORD)dwBufSize );
m_cbSendBufSize += 2;
}
if (m_pMsgSend)
{
status = DnsSendRecord();
}
else
{
status = ERROR_INVALID_NAME;
}
TraceFunctLeaveEx((LPARAM) this);
return status;
}
void CAsyncDns::DisconnectClient(void)
{
SOCKET hSocket;
hSocket = (SOCKET)InterlockedExchangePointer( (PVOID *)&m_DnsSocket, (PVOID) INVALID_SOCKET );
if ( hSocket != INVALID_SOCKET )
{
if ( QueryAtqContext() != NULL )
{
AtqCloseSocket(QueryAtqContext() , TRUE);
}
}
}
//
// TCP routines
//
DNS_STATUS
CAsyncDns::Dns_OpenTcpConnectionAndSend()
/*++
Routine Description:
Connect via TCP or UDP to a DNS server. The server list is held
in a global variable read from the registry.
Arguments:
None
Return Value:
ERROR_SUCCESS on success
Win32 error on failure
--*/
{
INT err = 0;
DWORD dwErrServList = ERROR_SUCCESS;
BOOL fThrottle = FALSE;
TraceFunctEnterEx((LPARAM) this, "CAsyncDns::Dns_OpenTcpConnectionAndSend");
//
// setup a TCP socket
// - INADDR_ANY -- let stack select source IP
//
if(!m_fUdp)
{
m_DnsSocket = Dns_CreateSocket(SOCK_STREAM);
BOOL fRet = FALSE;
//Alway enable linger so sockets that connect to the server.
//This will send a hard close to the server which will cause
//the servers TCP/IP socket table to be flushed very early.
//We should see very few, if any, sockets in the TIME_WAIT
//state
struct linger Linger;
Linger.l_onoff = 1;
Linger.l_linger = 0;
err = setsockopt(m_DnsSocket, SOL_SOCKET, SO_LINGER, (const char FAR *)&Linger, sizeof(Linger));
}
else
{
m_DnsSocket = Dns_CreateSocket(SOCK_DGRAM);
}
if ( m_DnsSocket == INVALID_SOCKET )
{
err = WSAGetLastError();
if ( !err )
{
err = WSAENOTSOCK;
}
ErrorTrace((LPARAM) this, "Received error %d opening a socket to DNS server", err);
return( err );
}
m_RemoteAddress.sin_family = AF_INET;
m_RemoteAddress.sin_port = DNS_PORT_NET_ORDER;
//
// Passing in fThrottle enables functionality in CTcpRegIpList to limit the
// number of connections to servers on PROBATION (see ResetTimeoutServers...).
// Throttling is disabled if Failover is disabled, because the tracking for
// throttling is protocol (TCP/UDP) specific.
//
fThrottle = !FailoverDisabled();
//
// Get a working DNS server from the set of servers for this machine and
// connect to it. The CTcpRegIpList has logic to keep track of the state
// of DNS servers (UP or DOWN) and logic to retry DOWN DNS servers.
//
dwErrServList = GetDnsList()->GetWorkingServerIp(&m_dwIpServer, fThrottle);
while(ERROR_SUCCESS == dwErrServList)
{
DNS_PRINTF_DBG("Connecting to DNS server %s over %s.\n",
inet_ntoa(*((in_addr *)(&m_dwIpServer))), IsUdp() ? "UDP/IP" : "TCP/IP");
m_RemoteAddress.sin_addr.s_addr = m_dwIpServer;
err = connect(m_DnsSocket, (struct sockaddr *) &m_RemoteAddress, sizeof(SOCKADDR_IN));
if ( !err )
{
DNS_PRINTF_MSG("Connected to DNS %s over %s.\n",
inet_ntoa(*((in_addr *)(&m_dwIpServer))), IsUdp() ? "UDP/IP" : "TCP/IP");
break;
}
else
{
DNS_PRINTF_ERR("Failed WinSock connect() to %s over %s, Winsock err - %d.\n",
inet_ntoa(*((in_addr *)(&m_dwIpServer))), IsUdp() ? "UDP/IP" : "TCP/IP",
WSAGetLastError());
if(FailoverDisabled())
break;
GetDnsList()->MarkDown(m_dwIpServer, err, IsUdp());
dwErrServList = GetDnsList()->GetWorkingServerIp(&m_dwIpServer, fThrottle);
continue;
}
}
if(!FailoverDisabled() &&
(DNS_ERROR_NO_DNS_SERVERS == dwErrServList || ERROR_RETRY == dwErrServList))
{
//
// If no servers are UP, just try a DOWN server. We must not simply
// exit and ack the queue into retry in this situation. Consider the
// case where all servers are DOWN. If we rely exclusively on GetWorking-
// ServerIp(), then we will never try DNS till the retry time for the
// DNS servers expires. Even if the admin kicks the queues, they will
// go right back into retry because GetWorkingServerIp() will fail.
//
// Instead, if everything is DOWN, we will try SOMETHING by calling
// GetAnyServerIp().
//
// -- If this fails, and ProcessClient gets the error ProcessClient
// will try to failover to another DNS server. For this it calls
// GetWorkingServerIp() which will fail, and the connection is acked
// retry. Note that ProcessClient must not use GetAnyServerIp. If it
// uses this function we are in danger of continuously looping trying
// to spin connections to GetAnyServerIp.
//
// -- If the connection should fail in the connect below (for TCP/IP)
// the failover logic is straightforward. We will simply ack the queue
// to retry right away.
//
dwErrServList = GetDnsList()->GetAnyServerIp(&m_dwIpServer);
if(DNS_ERROR_NO_DNS_SERVERS == dwErrServList)
{
// No configured servers error: this can happen if the serverlist
// was deleted underneath us. Just fail the connection for now.
DNS_PRINTF_ERR("No DNS servers available to query.\n");
err = DNS_ERROR_NO_DNS_SERVERS;
ErrorTrace((LPARAM) this, "No DNS servers. Error - %d", dwErrServList);
return err;
}
m_RemoteAddress.sin_addr.s_addr = m_dwIpServer;
err = connect(m_DnsSocket, (struct sockaddr *) &m_RemoteAddress, sizeof(SOCKADDR_IN));
}
_ASSERT(ERROR_SUCCESS == dwErrServList);
//
// We have a connection to DNS
//
if(ERROR_SUCCESS == err)
{
// Re-associate the handle to the ATQ
// Call ATQ to associate the handle
if (!AtqAddAsyncHandle(
&m_pAtqContext,
NULL,
(LPVOID) this,
DnsCompletion,
30, // ATQ_TIMEOUT_INTERVAL
(HANDLE) m_DnsSocket))
{
return GetLastError();
}
//
// send desired packet
//
err = Dns_Send();
}
else
{
DNS_PRINTF_DBG("Unable to open a connection to a DNS server.\n");
if(m_DnsSocket != INVALID_SOCKET)
{
closesocket(m_DnsSocket);
m_DnsSocket = INVALID_SOCKET;
}
}
return( (DNS_STATUS)err );
} // Dns_OpenTcpConnectionAndSend
BOOL CAsyncDns::ProcessReadIO(IN DWORD InputBufferLen,
IN DWORD dwCompletionStatus,
IN OUT OVERLAPPED * lpo)
{
BOOL fRet = TRUE;
DWORD DataSize = 0;
DNS_STATUS DnsStatus = 0;
PDNS_RECORD pRecordList = NULL;
WORD wMessageLength = 0;
TraceFunctEnterEx((LPARAM) this, "BOOL CAsyncDns::ProcessReadIO");
//add up the number of bytes we received thus far
m_cbReceived += InputBufferLen;
//
// read atleast 2 bytes
//
if(!m_fUdp && m_FirstRead && ( m_cbReceived < 2 ) )
{
fRet = ReadFile(&m_pMsgRecvBuf[m_cbReceived],DNS_TCP_DEFAULT_PACKET_LENGTH-1 );
return fRet;
}
//
// get the size of the message
//
if(!m_fUdp && m_FirstRead && (m_cbReceived >= 2))
{
DataSize = ntohs(*(u_short *)m_pMsgRecvBuf);
//
// add 2 bytes for the field which specifies the length of data
//
m_BytesToRead = DataSize + 2;
m_FirstRead = FALSE;
}
//
// pend another read if we have n't read enough
//
if(!m_fUdp && (m_cbReceived < m_BytesToRead))
{
DWORD cbMoreToRead = m_BytesToRead - m_cbReceived;
if(m_cbReceived + m_BytesToRead >= DNS_TCP_DEFAULT_PACKET_LENGTH)
{
ErrorTrace((LPARAM)this,
"Size field in DNS packet is corrupt - %08x: ",
DataSize);
DNS_PRINTF_ERR("Reply packet from DNS server is corrupt.\n");
TraceFunctLeaveEx((LPARAM)this);
return FALSE;
}
fRet = ReadFile(&m_pMsgRecvBuf[m_cbReceived], cbMoreToRead);
}
else
{
if( !m_fUdp )
{
//
// message length is 2 bytes less to take care of the msg length
// field.
//
//m_pMsgRecv->MessageLength = (WORD) m_cbReceived - 2;
m_pMsgRecv = (PDNS_MESSAGE_BUFFER)(m_pMsgRecvBuf+2);
}
else
{
//m_pMsgRecv->MessageLength = (WORD) m_cbReceived;
m_pMsgRecv = (PDNS_MESSAGE_BUFFER)m_pMsgRecvBuf;
}
SWAP_COUNT_BYTES(&m_pMsgRecv->MessageHead);
//
// We queried over UDP and the reply from DNS was truncated because the response
// was longer than the UDP packet size. We requery DNS using TCP unless SMTP is
// configured to use UDP only. RetryAsyncDnsQuery sets the members of this CAsyncDns
// object appropriately depending on whether if fails or succeeds. After calling
// RetryAsyncDnsQuery, this object must be deleted.
//
if(IsUdp() && !(m_dwFlags & DNS_FLAGS_UDP_ONLY) && m_pMsgRecv->MessageHead.Truncation)
{
//
// Abort if we queried on TCP and got a truncated response. This is an illegal
// response from DNS. If we don't abort we could loop forever.
//
if(m_dwFlags & DNS_FLAGS_TCP_ONLY)
{
DNS_PRINTF_ERR("Unexpected response. Reply packet had "
"truncation bit set, though query was over TCP/IP.\n");
_ASSERT(0 && "Shouldn't have truncated reply over TCP");
return FALSE;
}
DNS_PRINTF_MSG("Truncated UDP response. Retrying query over TCP.\n");
DebugTrace((LPARAM) this, "Truncated reply - reissuing query using TCP");
RetryAsyncDnsQuery(FALSE); // FALSE == Do not use UDP
return FALSE;
}
wMessageLength = (WORD)( m_fUdp ? ( m_cbReceived ) : ( m_cbReceived - 2 ));
DnsStatus = DnsExtractRecordsFromMessage_UTF8(m_pMsgRecv,
wMessageLength, &pRecordList);
DNS_LOG_RESPONSE(DnsStatus, pRecordList, (PBYTE)m_pMsgRecv, wMessageLength);
DnsProcessReply(DnsStatus, pRecordList);
DnsRecordListFree(pRecordList, TRUE);
}
TraceFunctLeaveEx((LPARAM) this);
return fRet;
}
BOOL CAsyncDns::ProcessClient (IN DWORD InputBufferLen,
IN DWORD dwCompletionStatus,
IN OUT OVERLAPPED * lpo)
{
BOOL RetStatus = FALSE;
DWORD dwDnsTransportError = ERROR_SUCCESS;
TraceFunctEnterEx((LPARAM) this, "CAsyncDns::ProcessClient()");
IncThreadCount();
//if lpo == NULL, then we timed out. Send an appropriate message
//then close the connection
if( (lpo == NULL) && (dwCompletionStatus == ERROR_SEM_TIMEOUT))
{
dwDnsTransportError = ERROR_SEM_TIMEOUT;
//
// fake a pending IO as we'll dec the overall count in the
// exit processing of this routine needs to happen before
// DisconnectClient else completing threads could tear us down
//
IncPendingIoCount();
DNS_PRINTF_ERR("Timeout waiting for DNS server response.\n");
DebugTrace( (LPARAM)this, "Async DNS client timed out");
DisconnectClient();
}
else if((InputBufferLen == 0) || (dwCompletionStatus != NO_ERROR))
{
dwDnsTransportError = ERROR_RETRY;
DebugTrace((LPARAM) this,
"CAsyncDns::ProcessClient: InputBufferLen = %d dwCompletionStatus = %d"
" - Closing connection", InputBufferLen, dwCompletionStatus);
DNS_PRINTF_ERR("Connection dropped by DNS server - Win32 error %d.\n",
dwCompletionStatus);
DisconnectClient();
}
else if (lpo == (OVERLAPPED *) &m_ReadOverlapped)
{
if(m_DnsSocket == INVALID_SOCKET && InputBufferLen > 0)
{
//
// This is to firewall against an ATQ bug where we callback with an
// nonzero InputBufferLen after the ATQ disconnect. We shouldn't be
// doing further processing after this point.
//
ErrorTrace((LPARAM)this, "Connection already closed, callback should not occur");
}
else
{
//A client based async IO completed
DNS_PRINTF_DBG("Response received from DNS server.\n");
RetStatus = ProcessReadIO(InputBufferLen, dwCompletionStatus, lpo);
if(!FailoverDisabled())
GetDnsList()->ResetServerOnConnect(m_RemoteAddress.sin_addr.s_addr);
}
}
else if(lpo == (OVERLAPPED *) &m_WriteOverlapped)
{
RetStatus = ReadFile(m_pMsgRecvBuf, DNS_TCP_DEFAULT_PACKET_LENGTH);
if(!RetStatus)
{
DNS_PRINTF_ERR("Network error on connection to DNS server.\n");
ErrorTrace((LPARAM) this, "ReadFile failed");
dwDnsTransportError = ERROR_RETRY;
}
}
DebugTrace((LPARAM)this,"ASYNC DNS - Pending IOs: %d", m_cPendingIoCount);
// Do NOT Touch the member variables past this POINT!
// This object may be deleted!
//
// decrement the overall pending IO count for this session
// tracing and ASSERTs if we're going down.
//
DecThreadCount();
if (DecPendingIoCount() == 0)
{
DisconnectClient();
DebugTrace((LPARAM)this,"ASYNC DNS - Pending IOs: %d", m_cPendingIoCount);
DebugTrace((LPARAM)this,"ASYNC DNS - Thread count: %d", m_cThreadCount);
if(ERROR_SUCCESS != dwDnsTransportError && !FailoverDisabled())
{
GetDnsList()->MarkDown(QueryDnsServer(), dwDnsTransportError, IsUdp());
RetryAsyncDnsQuery(IsUdp());
}
delete this;
}
return TRUE;
}
DNS_STATUS
CAsyncDns::DnsSendRecord()
/*++
Routine Description:
Send message, receive response.
Arguments:
aipDnsServers -- specific DNS servers to query;
OPTIONAL, if specified overrides normal list associated with machine
Return Value:
ERROR_SUCCESS if successful.
Error code on failure.
--*/
{
DNS_STATUS status = 0;
m_pMsgRecvBuf = (BYTE*) new BYTE[DNS_TCP_DEFAULT_PACKET_LENGTH];
if(m_pMsgRecvBuf == NULL)
{
return( DNS_ERROR_NO_MEMORY );
}
status = Dns_OpenTcpConnectionAndSend();
return( status );
}
SOCKET
CAsyncDns::Dns_CreateSocket(
IN INT SockType
)
/*++
Routine Description:
Create socket.
Arguments:
SockType -- SOCK_DGRAM or SOCK_STREAM
Return Value:
socket if successful.
Otherwise INVALID_SOCKET.
--*/
{
SOCKET s;
//
// create socket
//
s = socket( AF_INET, SockType, 0 );
if ( s == INVALID_SOCKET )
{
return INVALID_SOCKET;
}
return s;
}
//-----------------------------------------------------------------------------
// Description:
// Constructor and Destructor for class to maintain a list of IP addresses
// (for DNS servers) and their state (UP or DOWN). The IP addresses are
// held in an IP_ARRAY.
//-----------------------------------------------------------------------------
CDnsServerList::CDnsServerList()
{
m_IpListPtr = NULL;
//
// Shortcut to quickly figure out how many servers are down. This keeps track
// of how many servers are marked up currently. Used in ResetServersIfNeeded
// primarily to avoid checking the state of all servers in the usual case when
// all servers are up.
//
m_cUpServers = 0;
m_prgdwFailureTick = NULL;
m_prgServerState = NULL;
m_prgdwFailureCount = NULL;
m_prgdwConnections = NULL;
m_dwSig = TCP_REG_LIST_SIGNATURE;
}
CDnsServerList::~CDnsServerList()
{
if(m_IpListPtr)
delete [] m_IpListPtr;
if(m_prgdwFailureTick)
delete [] m_prgdwFailureTick;
if(m_prgServerState)
delete [] m_prgServerState;
if(m_prgdwFailureCount)
delete [] m_prgdwFailureCount;
if(m_prgdwConnections)
delete [] m_prgdwConnections;
m_IpListPtr = NULL;
m_prgdwFailureTick = NULL;
m_prgServerState = NULL;
m_prgdwFailureCount = NULL;
m_prgdwConnections = NULL;
}
//-----------------------------------------------------------------------------
// Description:
// Copies the the IP address list to m_IpListPtr by allocating a new block
// of memory. If this fails due to out of memory, there's little we can do
// so we just NULL out the server list and return FALSE indicating error.
//
// Arguments:
// IpPtr - Ptr to IP_ARRAY of servers, this can be NULL in which case
// we assume that there are no servers. On shutdown, the SMTP code
// calls this with NULL.
//
// This argument is copied.
//
// Returns:
// TRUE if the update succeeded.
// FALSE if it failed.
//-----------------------------------------------------------------------------
BOOL CDnsServerList::Update(PIP_ARRAY IpPtr)
{
BOOL fFatalError = FALSE;
BOOL fRet = FALSE;
DWORD cbIpArraySize = 0;
TraceFunctEnterEx((LPARAM) this, "CDnsServerList::Update");
m_sl.ExclusiveLock();
if(m_IpListPtr) {
delete [] m_IpListPtr;
m_IpListPtr = NULL;
}
if(m_prgdwFailureTick) {
delete [] m_prgdwFailureTick;
m_prgdwFailureTick = NULL;
}
if(m_prgServerState) {
delete [] m_prgServerState;
m_prgServerState = NULL;
}
if(m_prgdwConnections) {
delete [] m_prgdwConnections;
m_prgdwConnections = NULL;
}
// Note: IpPtr can be NULL
if(IpPtr == NULL) {
m_IpListPtr = NULL;
m_cUpServers = 0;
goto Exit;
}
// Copy the IpPtr
cbIpArraySize = sizeof(IP_ARRAY) +
sizeof(IP_ADDRESS) * (IpPtr->cAddrCount - 1);
m_IpListPtr = (PIP_ARRAY)(new BYTE[cbIpArraySize]);
if(!m_IpListPtr) {
fFatalError = TRUE;
goto Exit;
}
CopyMemory(m_IpListPtr, IpPtr, cbIpArraySize);
m_cUpServers = IpPtr->cAddrCount;
m_prgdwFailureTick = new DWORD[m_cUpServers];
m_prgServerState = new SERVER_STATE[m_cUpServers];
m_prgdwFailureCount = new DWORD[m_cUpServers];
m_prgdwConnections = new DWORD[m_cUpServers];
if(!m_prgdwFailureTick ||
!m_prgServerState ||
!m_prgdwFailureCount ||
!m_prgdwConnections)
{
ErrorTrace((LPARAM) this, "Out of memory initializing DNS server list");
fFatalError = TRUE;
goto Exit;
}
for(int i = 0; i < m_cUpServers; i++) {
m_prgdwFailureTick[i] = 0;
m_prgServerState[i] = DNS_STATE_UP;
m_prgdwFailureCount[i] = 0;
m_prgdwConnections[i] = 0;
}
fRet = TRUE;
Exit:
if(fFatalError) {
if(m_prgServerState) {
delete [] m_prgServerState;
m_prgServerState = NULL;
}
if(m_prgdwFailureTick) {
delete [] m_prgdwFailureTick;
m_prgdwFailureTick = NULL;
}
if(m_IpListPtr) {
delete [] m_IpListPtr;
m_IpListPtr = NULL;
}
if(m_prgdwFailureCount) {
delete [] m_prgdwFailureCount;
m_prgdwFailureCount = NULL;
}
if(m_prgdwConnections) {
delete [] m_prgdwConnections;
m_prgdwConnections = NULL;
}
m_cUpServers = 0;
}
m_sl.ExclusiveUnlock();
TraceFunctLeaveEx((LPARAM) this);
return fRet;
}
//-----------------------------------------------------------------------------
// Description:
// Checks to see if the DNS serverlist has changed, and calls update only
// if it has. This allows us to preserve the failure-counts and state
// information if the serverlist has not changed.
// Arguments:
// IN PIP_ARRAY pipServers - (Possibly) new server-list
// Returns:
// TRUE if UpdateIfChanged was successful (does NOT indicate if list was
// changed.
// FALSE if we hit a failure during the update.
//-----------------------------------------------------------------------------
BOOL CDnsServerList::UpdateIfChanged(
PIP_ARRAY pipServers)
{
BOOL fUpdate = FALSE;
BOOL fRet = TRUE;
TraceFunctEnterEx((LPARAM) this, "CDnsServerList::UpdateIfChanged");
m_sl.ShareLock();
if(!m_IpListPtr && !pipServers) {
// Both NULL, no update needed
fUpdate = FALSE;
} else if(!m_IpListPtr || !pipServers) {
// If one is NULL but not the other, the update is needed
fUpdate = TRUE;
} else {
// Both are non-NULL
if(m_IpListPtr->cAddrCount != pipServers->cAddrCount) {
// First check if the server count is different
fUpdate = TRUE;
} else {
// If the servercount is identical, we can do a memcmp of the serverlist
fUpdate = !!memcmp(m_IpListPtr->aipAddrs, pipServers->aipAddrs,
sizeof(IP_ADDRESS) * m_IpListPtr->cAddrCount);
}
}
m_sl.ShareUnlock();
if(fUpdate) {
DebugTrace((LPARAM)this, "Updating serverlist");
TraceFunctLeaveEx((LPARAM)this);
return Update(pipServers);
}
TraceFunctLeaveEx((LPARAM)this);
return TRUE;
}
//-----------------------------------------------------------------------------
// Description:
// Creates a copy of m_IpListPtr and returns it to the caller. Note that
// we cannot simply return m_IpListPtr, since that could change, so we
// must return a copy of the list.
// Arguments:
// OUT PIP_ARRAY *ppipArray - The allocated copy is returned through this
// Returns;
// TRUE if a copy could be made successfully
// FALSE if an error occurred (out of memory allocating copy).
// Notes:
// Caller must de-allocate copy by calling delete (MSVCRT heap).
//-----------------------------------------------------------------------------
BOOL CDnsServerList::CopyList(
PIP_ARRAY *ppipArray)
{
BOOL fRet = FALSE;
ULONG cbArraySize = 0;
TraceFunctEnterEx((LPARAM)this, "CDnsServerList::CopyList");
*ppipArray = NULL;
m_sl.ShareLock();
if(!m_IpListPtr || m_IpListPtr->cAddrCount == 0) {
fRet = FALSE;
goto Exit;
}
cbArraySize =
sizeof(IP_ARRAY) +
sizeof(IP_ADDRESS) * (m_IpListPtr->cAddrCount - 1);
*ppipArray = (PIP_ARRAY) new BYTE[cbArraySize];
if(!*ppipArray) {
fRet = FALSE;
goto Exit;
}
CopyMemory(*ppipArray, m_IpListPtr, cbArraySize);
fRet = TRUE;
Exit:
m_sl.ShareUnlock();
TraceFunctLeaveEx((LPARAM)this);
return fRet;
}
//-----------------------------------------------------------------------------
// Description:
// Return the IP address of a server known to be UP. This function also
// checks to see if any servers currently marked DOWN should be reset to
// the UP state again (based on a retry interval).
// Arguments:
// DWORD *pdwIpServer - Sets the DWORD pointed to, to the IP address of
// a server in the UP state.
// BOOL fThrottle - Connections to a failing server are restricted. We do
// not want to spin off hundreds of async DNS queries to a server
// that may actually be unreachable or down. If a server is
// suspiciously non-responsive, we will want to spin off a limited
// number of connections to it. If all of them fail we will mark the
// connection as DOWN, and if one of them succeeds, we will mark the
// server UP. The number of connections to a server is throttled if
// it is in the DNS_STATUS_PROBATION state. ResetTimeoutServers...
// sets this state.
// Returns:
// ERROR_SUCCESS - If a DNS server in the UP state was found
// ERROR_RETRY - If all DNS servers are currently DOWN or in PROBATION
// and the MAX number of allowed connections for PROBATION servers
// has been reached.
// DNS_ERROR_NO_DNS_SERVERS - If no DNS servers are configured
//-----------------------------------------------------------------------------
DWORD CDnsServerList::GetWorkingServerIp(DWORD *pdwIpServer, BOOL fThrottle)
{
DWORD dwErr = ERROR_RETRY;
int iServer = 0;
_ASSERT(pdwIpServer != NULL);
*pdwIpServer = INADDR_NONE;
// Check if any servers were down and bring them up if the timeout has expired
ResetTimeoutServersIfNeeded();
m_sl.ShareLock();
if(m_IpListPtr == NULL || m_IpListPtr->cAddrCount == 0) {
dwErr = DNS_ERROR_NO_DNS_SERVERS;
goto Exit;
}
if(m_cUpServers == 0) {
dwErr = ERROR_RETRY;
goto Exit;
}
for(iServer = 0; iServer < (int)m_IpListPtr->cAddrCount; iServer++) {
if(m_prgServerState[iServer] != DNS_STATE_DOWN) {
if(fThrottle && !AllowConnection(iServer))
continue;
dwErr = ERROR_SUCCESS;
*pdwIpServer = m_IpListPtr->aipAddrs[iServer];
break;
}
}
Exit:
m_sl.ShareUnlock();
return dwErr;
}
//-----------------------------------------------------------------------------
// Description:
// Marks a server in the list as down and sets the next retry time for
// that server. The next retry time is calculated modulo MAX_TICK_COUNT.
// Arguments:
// dwIp -- IP address of server to mark as DOWN
// dwErr -- Error from DNS or network
// fUdp -- TRUE if protocol used was UDP, FALSE if TCP
//-----------------------------------------------------------------------------
void CDnsServerList::MarkDown(
DWORD dwIp,
DWORD dwErr,
BOOL fUdp)
{
int iServer = 0;
DWORD cUpServers = 0;
//
// Set to TRUE only when a server is actually marked DOWN. For instance,
// we've failed < ErrorsBeforeFailover() times, there's no need to
// log an event in MarkDown.
//
BOOL fLogEvent = FALSE;
TraceFunctEnterEx((LPARAM) this, "CDnsServerList::MarkDown");
m_sl.ExclusiveLock();
DNS_PRINTF_DBG("Marking DNS server %s as down.\n",
inet_ntoa(*((in_addr *)(&dwIp))));
if(m_IpListPtr == NULL || m_IpListPtr->cAddrCount == 0 || m_cUpServers == 0)
goto Exit;
// Find the server to mark as down among all the UP servers
for(iServer = 0; iServer < (int)m_IpListPtr->cAddrCount; iServer++) {
if(m_IpListPtr->aipAddrs[iServer] == dwIp)
break;
}
if(iServer >= (int)m_IpListPtr->cAddrCount ||
m_prgServerState[iServer] == DNS_STATE_DOWN)
goto Exit;
//
// A DNS server is not marked down till it has failed a number of times
// consecutively. This protects against occasional errors from DNS servers
// which can occur under heavy load. Even if 0.5% of connections have
// errors from DNS - on a heavily stressed server, with say 100 DNS queries
// per minute, we would end up with a server going down every 2 mins.
//
m_prgdwFailureCount[iServer]++;
if(m_prgdwConnections[iServer] > 0)
m_prgdwConnections[iServer]--;
if(m_prgdwFailureCount[iServer] < ErrorsBeforeFailover()) {
ErrorTrace((LPARAM) this,
"%d consecutive errors connecting to server %08x, error=%d",
m_prgdwFailureCount[iServer], dwIp, dwErr);
goto Exit;
}
// Mark server down
m_prgServerState[iServer] = DNS_STATE_DOWN;
m_prgdwConnections[iServer] = 0;
_ASSERT(m_cUpServers > 0);
m_cUpServers--;
m_prgdwFailureTick[iServer] = GetTickCount();
fLogEvent = TRUE;
Exit:
cUpServers = m_cUpServers;
m_sl.ExclusiveUnlock();
// Log events outside the ExclusiveLock()
if(fLogEvent)
LogServerDown(dwIp, fUdp, dwErr, cUpServers);
TraceFunctLeaveEx((LPARAM) this);
return;
}
//-----------------------------------------------------------------------------
// Description:
// If a server has been failing, we keep track of the number of
// consecutive failures in m_prgdwFailureCount. This function is called
// when we successfully connect to the server and we want to reset the
// failure count.
// Arguments:
// dwIp - IP Address of server to reset failure count for
// Note:
// This function is called for every successful query so it needs to be
// kept simple and quick especially in the usual case - when there is no
// Reset to be done.
//-----------------------------------------------------------------------------
void CDnsServerList::ResetServerOnConnect(DWORD dwIp)
{
int iServer = 0;
BOOL fShareLock = TRUE;
TraceFunctEnterEx((LPARAM) this, "CDnsServerList::ResetServerOnConnect");
m_sl.ShareLock();
if(!m_IpListPtr || m_IpListPtr->cAddrCount == 0)
goto Exit;
// Find the server to reset
for(iServer = 0;
iServer < (int)m_IpListPtr->cAddrCount &&
dwIp != m_IpListPtr->aipAddrs[iServer];
iServer++);
if(iServer >= (int)m_IpListPtr->cAddrCount)
goto Exit;
// Nothing to do if the specified server is UP and has a zero failure count
if(!m_prgdwFailureCount[iServer] && m_prgServerState[iServer] == DNS_STATE_UP)
goto Exit;
m_sl.ShareUnlock();
m_sl.ExclusiveLock();
fShareLock = FALSE;
// Re-verify that we still have something to do after ShareUnlock->ExclusiveLock
if(!m_prgdwFailureCount[iServer] && m_prgServerState[iServer] == DNS_STATE_UP)
goto Exit;
DebugTrace((LPARAM) this,
"Resetting server %08x, State=%d, Failure count=%d, Connection count=%d",
dwIp, m_prgServerState[iServer], m_prgdwFailureCount[iServer],
m_prgdwConnections[iServer]);
// If server was in the state DOWN/PROBATION, bring it UP
if(m_prgServerState[iServer] != DNS_STATE_UP) {
// Servers on PROBATION are already UP, so no need to inc UpServers
if(m_prgServerState[iServer] == DNS_STATE_DOWN)
m_cUpServers++;
m_prgServerState[iServer] = DNS_STATE_UP;
m_prgdwFailureTick[iServer] = 0;
_ASSERT(m_cUpServers <= (int)m_IpListPtr->cAddrCount);
}
// Clear all failures
m_prgdwFailureCount[iServer] = 0;
m_prgdwConnections[iServer] = 0;
Exit:
if(fShareLock)
m_sl.ShareUnlock();
else
m_sl.ExclusiveUnlock();
TraceFunctLeaveEx((LPARAM) this);
}
//-----------------------------------------------------------------------------
// Description:
// Checks if any servers are DOWN, and if the retry time has expired for
// those servers. If so those servers will be brought up and marked in the
// PROBATION state. We do not want to transition servers that were DOWN
// directly to UP, because we are still not sure whether or not these
// servers are really responding. While in the PROBATION state, we allow
// only a limited number of connections to a server, so as not to cause
// all remote-queues to choke up trying to connect to a possibly non-
// functional server. If one of these connections succeeds, the server
// will be marked back UP and all remote-queues will be able to use this
// server again. If all the (limited number of) connections fail, the
// server will go from the PROBATION state to DOWN again.
// Arguments:
// None.
// Returns:
// Nothing.
//-----------------------------------------------------------------------------
void CDnsServerList::ResetTimeoutServersIfNeeded()
{
int iServer = 0;
DWORD dwElapsedTicks = 0;
DWORD dwCurrentTick = 0;
//
// Quick check - if all servers are up (usual case) or there are no configured
// servers, there's nothing for us to do.
//
m_sl.ShareLock();
if(m_IpListPtr == NULL || m_IpListPtr->cAddrCount == 0 || m_cUpServers == m_IpListPtr->cAddrCount) {
m_sl.ShareUnlock();
return;
}
m_sl.ShareUnlock();
// Some servers are down... figure out which need to be brought up
m_sl.ExclusiveLock();
// Re-check that no one modified the list while we didn't have the sharelock
if(m_IpListPtr == NULL || m_IpListPtr->cAddrCount == 0 || m_cUpServers == m_IpListPtr->cAddrCount) {
m_sl.ExclusiveUnlock();
return;
}
dwCurrentTick = GetTickCount();
for(iServer = 0; iServer < (int)m_IpListPtr->cAddrCount; iServer++) {
if(m_prgServerState[iServer] != DNS_STATE_DOWN)
continue;
//
// Note: This also takes care of the special case where dwCurrentTick occurs
// after the wraparound and m_prgdwFailureTick occurs before the wraparound.
// This is because, in that case, the elapsed time is:
//
// time since wraparound + time before wraparound that failure occurred - 1
// (-1 is because it's 0 time to transition from MAX_TICK_VALUE to 0)
//
// = dwCurrentTick + (MAX_TICK_VALUE - m_prgdwFailureTick[iServer]) - 1
//
// Since MAX_TICK_VALUE == -1
//
// = dwCurrentTick + (-1 - m_prgdwFailureTick[iServer]) - 1
// = dwCurrentTick - m_prgdwFailureTick[iServer]
//
dwElapsedTicks = dwCurrentTick - m_prgdwFailureTick[iServer];
#define TICKS_TILL_RETRY 10 * 60 * 1000 // 10 minutes
if(dwElapsedTicks > TICKS_TILL_RETRY) {
m_prgServerState[iServer] = DNS_STATE_PROBATION;
m_prgdwFailureTick[iServer] = 0;
m_prgdwConnections[iServer] = 0;
m_cUpServers++;
_ASSERT(m_cUpServers <= (int)m_IpListPtr->cAddrCount);
}
}
m_sl.ExclusiveUnlock();
}