/* SVMHANDLER.CPP (c) copyright 1998 Microsoft Corp Contains the class encapsulating the Support Vector Machine used to do on the fly spam detection Robert Rounthwaite (RobertRo@microsoft.com) */ #include #include "junkeng.h" #include "junkutil.h" #include "parsestm.h" #include #include #include class CBodyBuff { private: enum { CB_BODYBUFF_MAX = 4096 }; enum { BBF_CLEAR = 0x00000000, BBF_SET = 0x00000001, BBF_ALPHA = 0x00000002, BBF_NUM = 0x00000004, BBF_SPACE = 0x00000008, BBF_MASK = 0x0000000F }; private: IStream * m_pIStream; ULONG m_cbStream; ULONG m_ibStream; BYTE m_rgbBuff[CB_BODYBUFF_MAX]; ULONG m_cbBuffTotal; BYTE * m_pbBuffCurr; DWORD m_dwFlagsCurr; BYTE * m_pbBuffGood; BYTE * m_pbBuffPrev; DWORD m_dwFlagsPrev; public: CBodyBuff() : m_pIStream(NULL), m_cbStream(0), m_ibStream(0), m_cbBuffTotal(0), m_pbBuffCurr(m_rgbBuff), m_dwFlagsCurr(BBF_CLEAR), m_pbBuffGood(m_rgbBuff), m_pbBuffPrev(NULL), m_dwFlagsPrev(BBF_CLEAR) {} ~CBodyBuff() {SafeRelease(m_pIStream);} HRESULT HrInit(DWORD dwFlags, IStream * pIStream); HRESULT HrGetCurrChar(CHAR * pchNext); BOOL FDoMatch(FEATURECOMP * pfcomp); HRESULT HrMoveNext(VOID) { m_pbBuffPrev = m_pbBuffCurr; m_dwFlagsPrev = m_dwFlagsCurr; m_pbBuffCurr = (BYTE *) CharNext((LPSTR) m_pbBuffCurr); m_dwFlagsCurr = BBF_CLEAR; return S_OK; } private: HRESULT _HrFillBuffer(VOID); }; static const LPSTR szCountFeatureComp = "FeatureComponentCount = "; static const LPSTR szDefaultThresh = "dThresh = "; static const LPSTR szMostThresh = "mThresh = "; static const LPSTR szLeastThresh = "lThresh = "; static const LPSTR szThresh = "Threshold = "; static const LPSTR szNumberofDim = "NumDim = "; #ifdef DEBUG static const LPSTR STR_REG_PATH_FLAT = "Software\\Microsoft\\Outlook Express"; static const LPSTR szJunkMailPrefix = "JUNKMAIL"; static const LPSTR szJunkMailLog = "JUNKMAIL.LOG"; static const LPSTR LOG_TAGLINE = "Calculating Junk Mail for message: %s"; static const LPSTR LOG_FIRSTNAME = "User's First Name: %s"; static const LPSTR LOG_LASTNAME = "User's Last Name: %s"; static const LPSTR LOG_COMPANYNAME = "User's Company Name: %s"; static const LPSTR LOG_BODY = "Body contains: %s"; static const LPSTR LOG_SUBJECT = "Subject contains: %s"; static const LPSTR LOG_TO = "To line contains: %s"; static const LPSTR LOG_FROM = "From line contains: %s"; static const LPSTR LOG_FINAL = "Junk Mail percentage: %0.1d.%0.6d\r\n"; #endif // DEBUG BOOL FReadDouble(LPSTR pszLine, LPSTR pszToken, DOUBLE * pdblVal); #ifdef DEBUG VOID PrintToLogFile(ILogFile * pILogFile, LPSTR pszTmpl, LPSTR pszArg); #endif // DEBUG HRESULT CBodyBuff::HrInit(DWORD dwFlags, IStream * pIStream) { HRESULT hr = S_OK; // Check incoming params if (NULL == pIStream) { hr = E_INVALIDARG; goto exit; } // Set the stream m_pIStream = pIStream; m_pIStream->AddRef(); // Get the stream size hr = HrGetStreamSize(m_pIStream, &m_cbStream); if (FAILED(hr)) { goto exit; } // Reset the stream to the beginning hr = HrRewindStream(m_pIStream); if (FAILED(hr)) { goto exit; } // Start from the beginning m_ibStream = 0; exit: return hr; } HRESULT CBodyBuff::HrGetCurrChar(CHAR * pchNext) { HRESULT hr = S_OK; // Check incoming params Assert(NULL != pchNext); // Do we need to get any more characters? if (m_pbBuffCurr >= m_pbBuffGood) { // If we couldn't get any more characters if (S_OK != _HrFillBuffer()) { hr = E_FAIL; goto exit; } } // Get the current char *pchNext = *m_pbBuffCurr; hr = S_OK; exit: return hr; } BOOL CBodyBuff::FDoMatch(FEATURECOMP * pfcomp) { BOOL fRet = FALSE; BYTE * pbSearch = NULL; ULONG cchSearch = 0; LPSTR pszMatch = NULL; DWORD dwFlags = 0; // Check incoming params Assert(NULL != pfcomp); Assert(NULL != pfcomp->pszFeature); Assert(0 != pfcomp->cchFeature); // Set up some locals cchSearch = pfcomp->cchFeature; // Do we need more characters for the match? // Include the character after the string, just in case // we have a match and need to check the character after // the string for a word break if ((cchSearch + 1) > (ULONG) (m_pbBuffGood - m_pbBuffCurr)) { // Get more characters // If this fails, we still might be good, since // we might just have enough characters to do the // full match at the end of the stream. (VOID) _HrFillBuffer(); // Could we get enough? if (cchSearch > (ULONG) (m_pbBuffGood - m_pbBuffCurr)) { // No Match fRet = FALSE; goto exit; } } // Do match pbSearch = m_pbBuffCurr; pszMatch = pfcomp->pszFeature; while (0 != cchSearch--) { if (*(pszMatch++) != *(pbSearch++)) { // No Match fRet = FALSE; goto exit; } } // Validate the match // Do we need to figure out if it starts with a word break? if (0 != (pfcomp->dwFlags & CT_START_SET)) { dwFlags = pfcomp->dwFlags; } else { Assert(CT_END_SET != (dwFlags & CT_END_SET)); dwFlags = m_dwFlagsCurr; } Assert(CT_START_SET == BBF_SET); Assert(CT_START_ALPHA == BBF_ALPHA); fRet = FMatchToken((NULL == m_pbBuffPrev), ((m_ibStream >= m_cbStream) && ((m_pbBuffCurr + pfcomp->cchFeature) >= m_pbBuffGood)), (LPCSTR) m_pbBuffPrev, &m_dwFlagsPrev, pfcomp->pszFeature, pfcomp->cchFeature, &dwFlags, (LPCSTR) (m_pbBuffCurr + pfcomp->cchFeature)); // Save the changed flags pfcomp->dwFlags = dwFlags; // Cache the current character's state m_dwFlagsCurr = (dwFlags & BBF_MASK); exit: return fRet; } HRESULT CBodyBuff::_HrFillBuffer(VOID) { HRESULT hr = S_OK; LONG cbExtra = 0; ULONG cbRead = 0; ULONG cbToRead = 0; // If there isn't any more of the stream to grab if (m_ibStream >= m_cbStream) { hr = S_FALSE; goto exit; } // If this is the first time through, save nothing if (NULL == m_pbBuffPrev) { cbExtra = 0; } else { // How much space should I save? cbExtra = (ULONG) (m_cbBuffTotal - (m_pbBuffPrev - m_rgbBuff)); Assert(cbExtra > 0); // Save the unused data MoveMemory(m_rgbBuff, m_pbBuffPrev, (int)min(cbExtra, sizeof(m_rgbBuff))); // Reset the current pointer m_pbBuffCurr = m_rgbBuff + (m_pbBuffCurr - m_pbBuffPrev); // Reset the previous pointer m_pbBuffPrev = m_rgbBuff; } // Read in more data cbToRead = (int)min(CB_BODYBUFF_MAX - cbExtra - 1, (LONG) (m_cbStream - m_ibStream)); hr = m_pIStream->Read(m_rgbBuff + cbExtra, cbToRead, &cbRead); if ((FAILED(hr)) || (0 == cbRead)) { // End of stream hr = S_FALSE; } else { hr = S_OK; } // Track the number of bytes read m_ibStream += cbRead; // Set the total buffer size m_cbBuffTotal = cbExtra + cbRead; // Terminate the buffer, just in case m_rgbBuff[m_cbBuffTotal] = '\0'; // Uppercase the buffer m_pbBuffGood = m_rgbBuff + CharUpperBuff((CHAR *) m_rgbBuff, m_cbBuffTotal); exit: return hr; } HRESULT CJunkFilter::_HrBuildBodyList(USHORT cBodyItems) { HRESULT hr = S_OK; USHORT usIndex = 0; FEATURECOMP * pfcomp = NULL; USHORT iBodyList = 0; // Check incoming params if (0 == cBodyItems) { hr = E_INVALIDARG; goto exit; } Assert(USHRT_MAX > cBodyItems); // Make sure the old items are freed SafeMemFree(m_pblistBodyList); m_cblistBodyList = 0; // Initialize the list ZeroMemory(m_rgiBodyList, sizeof(m_rgiBodyList)); // Allocate space to hold all of the items hr = HrAlloc((VOID **) &m_pblistBodyList, sizeof(*m_pblistBodyList) * (cBodyItems + 1)); if (FAILED(hr)) { goto exit; } // Initialize the body list ZeroMemory(m_pblistBodyList, sizeof(*m_pblistBodyList) * (cBodyItems + 1)); // For each feature for (usIndex = 0, iBodyList = 1, pfcomp = m_rgfeaturecomps; usIndex < m_cFeatureComps; usIndex++, pfcomp++) { // If it's a body feature if (locBody == pfcomp->loc) { // Initialize it m_pblistBodyList[iBodyList].usItem = usIndex; // Add it to the list m_pblistBodyList[iBodyList].iNext = m_rgiBodyList[(UCHAR) (pfcomp->pszFeature[0])]; m_rgiBodyList[(UCHAR) (pfcomp->pszFeature[0])] = iBodyList; // Move to the next body item iBodyList++; } } // Save the number of items m_cblistBodyList = cBodyItems + 1; // Set the return value hr = S_OK; exit: return hr; } ///////////////////////////////////////////////////////////////////////////// // _FReadSVMOutput // // Read the SVM output from a file (".LKO file") ///////////////////////////////////////////////////////////////////////////// HRESULT CJunkFilter::_HrReadSVMOutput(LPCSTR pszFileName) { HRESULT hr = S_OK; CParseStream parsestm; ULONG ulIndex = 0; LPSTR pszBuff = NULL; ULONG cchBuff = 0; LPSTR pszDummy = NULL; LPSTR pszDefThresh = NULL; ULONG cFeatureComponents = 0; LPSTR pszFeature = NULL; ULONG ulFeatureComp = 0; USHORT cBodyItems = 0; FEATURECOMP * pfeaturecomp = NULL; if ((NULL == pszFileName) || ('\0' == *pszFileName)) { hr = E_INVALIDARG; goto exit; } // Get the parse stream hr = parsestm.HrSetFile(0, pszFileName); if (FAILED(hr)) { goto exit; } // skip first two lines for (ulIndex = 0; ulIndex < 3; ulIndex++) { SafeMemFree(pszBuff); hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff); if (FAILED(hr)) { goto exit; } } // parse 3rd line: only care about CC and DD if (FALSE == FReadDouble(pszBuff, "cc = ", &m_dblCC)) { hr = E_FAIL; goto exit; } if (FALSE == FReadDouble(pszBuff, "dd = ", &m_dblDD)) { hr = E_FAIL; goto exit; } SafeMemFree(pszBuff); hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff); if (FAILED(hr)) { goto exit; } if (FALSE == FReadDouble(pszBuff, szDefaultThresh, &m_dblDefaultThresh)) { m_dblDefaultThresh = THRESH_DEFAULT; } if (0 == m_dblSpamCutoff) { m_dblSpamCutoff = m_dblDefaultThresh; } if (FALSE == FReadDouble(pszBuff, szThresh, &m_dblThresh)) { hr = E_FAIL; goto exit; } SafeMemFree(pszBuff); hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff); if (FAILED(hr)) { goto exit; } if (FALSE == FReadDouble(pszBuff, szMostThresh, &m_dblMostThresh)) { m_dblMostThresh = THRESH_MOST; } if (FALSE == FReadDouble(pszBuff, szLeastThresh, &m_dblLeastThresh)) { m_dblLeastThresh = THRESH_LEAST; } SafeMemFree(pszBuff); hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff); if (FAILED(hr)) { goto exit; } m_cFeatures = StrToInt(pszBuff + lstrlen(szNumberofDim)); if (0 == m_cFeatures) { hr = E_FAIL; goto exit; } // We only support up to USHRT_MAX features if (m_cFeatures >= USHRT_MAX) { hr = E_OUTOFMEMORY; goto exit; } SafeMemFree(pszBuff); hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff); if (FAILED(hr)) { goto exit; } pszDummy = StrStr(pszBuff, szCountFeatureComp); if (NULL != pszDummy) { pszDummy += lstrlen(szCountFeatureComp); cFeatureComponents = StrToInt(pszDummy); } if (cFeatureComponents < m_cFeatures) { cFeatureComponents = m_cFeatures * 2; } while (0 != lstrcmp(pszBuff, "Weights")) { SafeMemFree(pszBuff); hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff); if (FAILED(hr)) { goto exit; } } SafeMemFree(m_rgdblSVMWeights); hr = HrAlloc((void **) &m_rgdblSVMWeights, sizeof(*m_rgdblSVMWeights) * m_cFeatures); if (FAILED(hr)) { goto exit; } SafeMemFree(m_rgulFeatureStatus); hr = HrAlloc((void **) &m_rgulFeatureStatus, sizeof(*m_rgulFeatureStatus) * m_cFeatures); if (FAILED(hr)) { goto exit; } FillMemory(m_rgulFeatureStatus, sizeof(*m_rgulFeatureStatus) * m_cFeatures, -1); SafeMemFree(m_rgfeaturecomps); hr = HrAlloc((void **) &m_rgfeaturecomps, sizeof(*m_rgfeaturecomps) * cFeatureComponents); if (FAILED(hr)) { goto exit; } // Initialize the features ZeroMemory(m_rgfeaturecomps, sizeof(*m_rgfeaturecomps) * cFeatureComponents); for (ulIndex = 0; ulIndex < m_cFeatures; ulIndex++) { UINT uiLoc; USHORT cbStr; boolop bop; BOOL fContinue; BOOL fNegative; SafeMemFree(pszBuff); hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff); if (FAILED(hr)) { goto exit; } // read the SVM weight pszDummy = pszBuff; fNegative = ('-' == *pszDummy); pszDummy++; m_rgdblSVMWeights[ulIndex] = StrToDbl(pszDummy, &pszDummy); if (FALSE != fNegative) { m_rgdblSVMWeights[ulIndex] *= -1; } pszDummy++; // skip the separator bop = boolopOr; fContinue = false; do { pfeaturecomp = &m_rgfeaturecomps[ulFeatureComp++]; // Skip over white space UlStripWhitespace(pszDummy, TRUE, FALSE, NULL); // Location (or "special") uiLoc = StrToInt(pszDummy); pszDummy = StrStr(pszDummy, ":"); // skip the separator pszDummy++; pfeaturecomp->loc = (FeatureLocation)uiLoc; pfeaturecomp->ulFeature = ulIndex; pfeaturecomp->bop = bop; if (locBody == pfeaturecomp->loc) { cBodyItems++; } if (uiLoc == 5) { UINT uiRuleNumber = StrToInt(pszDummy); pszDummy += StrSpn(pszDummy, "0123456789"); pfeaturecomp->ulRuleNum = uiRuleNumber; } else { cbStr = (USHORT) StrToInt(pszDummy); pszDummy = StrStr(pszDummy, ":"); pszDummy++; // We only support strings up to USHRT_MAX if (cbStr >= USHRT_MAX) { hr = E_OUTOFMEMORY; goto exit; } hr = HrAlloc((void **) &pszFeature, sizeof(*pszFeature) * (cbStr + 1)); if (FAILED(hr)) { goto exit; } StrCpyN(pszFeature, pszDummy, cbStr + 1); pszDummy += cbStr; if ('\0' != *pszDummy) { pszDummy++; // skip the separator } pszFeature[cbStr] = '\0'; Assert(cbStr == strlen(pszFeature)); // Save off the string pfeaturecomp->pszFeature = pszFeature; pszFeature = NULL; pfeaturecomp->cchFeature = cbStr; } UlStripWhitespace(pszDummy, TRUE, FALSE, NULL); switch(*pszDummy) { case '|': bop = boolopOr; fContinue = TRUE; break; case '&': bop = boolopAnd; fContinue = TRUE; break; default: fContinue = FALSE; break; } pszDummy++; } while (fContinue); } m_cFeatureComps = ulFeatureComp; // Build up body items... hr = _HrBuildBodyList(cBodyItems); if (FAILED(hr)) { goto exit; } hr = S_OK; exit: SafeMemFree(pszFeature); SafeMemFree(pszBuff); return hr; } ///////////////////////////////////////////////////////////////////////////// // _FInvokeSpecialRule // // Invokes the special rule that is this FEATURECOMP. // Returns the state of the feature. ///////////////////////////////////////////////////////////////////////////// BOOL CJunkFilter::_FInvokeSpecialRule(UINT iRuleNum) { BOOL fRet = FALSE; SYSTEMTIME stSent; CHAR rgchYear[6]; ULONG cbSize = 0; DWORD dwDummy = 0; switch (iRuleNum) { case 1: fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, m_pszFirstName, m_cchFirstName, 0); break; case 2: fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, m_pszLastName, m_cchLastName, 0); break; case 3: fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, m_pszCompanyName, m_cchCompanyName, 0); break; case 4: // year message received if (FALSE == FTimeEmpty(&m_ftMessageSent)) { // Convert to system time so we can get the year SideAssert(FALSE != FileTimeToSystemTime(&m_ftMessageSent, &stSent)); wnsprintf(rgchYear, ARRAYSIZE(rgchYear), "%d", stSent.wYear); dwDummy = CT_START_SET | CT_START_NUM | CT_END_SET | CT_END_NUM; fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, rgchYear, lstrlen(rgchYear), SSF_CASESENSITIVE); } break; case 5: // message received in the wee hours (>= 7pm or <6am if (FALSE == FTimeEmpty(&m_ftMessageSent)) { // Convert to system time so we can get the year SideAssert(FALSE != FileTimeToSystemTime(&m_ftMessageSent, &stSent)); fRet = (stSent.wHour >= (7 + 12)) || (stSent.wHour < 6); } break; case 6: // message received on weekend if (FALSE == FTimeEmpty(&m_ftMessageSent)) { // Convert to system time so we can get the year SideAssert(FALSE != FileTimeToSystemTime(&m_ftMessageSent, &stSent)); fRet = ((0 == stSent.wDayOfWeek) || (6 == stSent.wDayOfWeek)); } break; case 14: fRet = m_fRule14; // set in _HandleCaseSensitiveSpecialRules() break; case 15: fRet = FSpecialFeatureNonAlphaStm(m_pIStmBody); break; case 16: fRet = m_fDirectMessage; break; case 17: fRet = m_fRule17; // set in _HandleCaseSensitiveSpecialRules() break; case 18: fRet = FSpecialFeatureNonAlpha(m_pszSubject); break; case 19: fRet = ((NULL == m_pszTo) || ('\0' == *m_pszTo)); break; case 20: fRet = m_fHasAttach; break; case 40: fRet = (m_cbBody >= 125); break; case 41: fRet = (m_cbBody >= 250); break; case 42: fRet = (m_cbBody >= 500); break; case 43: fRet = (m_cbBody >= 1000); break; case 44: fRet = (m_cbBody >= 2000); break; case 45: fRet = (m_cbBody >= 4000); break; case 46: fRet = (m_cbBody >= 8000); break; case 47: fRet = (m_cbBody >= 16000); break; default: AssertSz(FALSE, "unsupported special feature"); break; } return fRet; } ///////////////////////////////////////////////////////////////////////////// // _HandleCaseSensitiveSpecialRules // // Called from _EvaluateFeatureComponents(). // Some special rules are case sensitive, so if they're present, we'll // evaluate them before we make the texts uppercase and cache the result // for when they are actually used. ///////////////////////////////////////////////////////////////////////////// VOID CJunkFilter::_HandleCaseSensitiveSpecialRules() { ULONG ulIndex = 0; for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++) { if (m_rgfeaturecomps[ulIndex].loc == locSpecial) { switch (m_rgfeaturecomps[ulIndex].ulRuleNum) { case 14: m_fRule14 = FSpecialFeatureUpperCaseWordsStm(m_pIStmBody); break; case 17: m_fRule17 = FSpecialFeatureUpperCaseWords(m_pszSubject); break; default: break; } } } return; } VOID CJunkFilter::_EvaluateBodyFeatures(VOID) { CBodyBuff buffBody; CHAR chMatch = '\0'; ULONG ulIndex = 0; FEATURECOMP * pfcomp = NULL; USHORT iBodyList = 0; // Check to see if we have work to do if (NULL == m_pIStmBody) { goto exit; } // Set the stream into the buffer if (FAILED(buffBody.HrInit(0, m_pIStmBody))) { goto exit; } // Initialize all the body features to no found for (iBodyList = 1; iBodyList < m_cblistBodyList; iBodyList++) { // Set it to not found m_rgfeaturecomps[m_pblistBodyList[iBodyList].usItem].fPresent = FALSE; } // While we have more bytes to read for (; S_OK == buffBody.HrGetCurrChar(&chMatch); buffBody.HrMoveNext()) { // Search for a match through the feature list for (iBodyList = m_rgiBodyList[(UCHAR) chMatch]; 0 != iBodyList; iBodyList = m_pblistBodyList[iBodyList].iNext) { pfcomp = &(m_rgfeaturecomps[m_pblistBodyList[iBodyList].usItem]); // If we have a body item and it hasn't been found yet if (FALSE == pfcomp->fPresent) { // Could this item be a possible match??? Assert(NULL != pfcomp->pszFeature); // Try to do the comparison pfcomp->fPresent = buffBody.FDoMatch(pfcomp); } } } exit: return; } ///////////////////////////////////////////////////////////////////////////// // _EvaluateFeatureComponents // // Evaluates all of the feature components. Sets fPresent in each component // to true if the feature is present, false otherwise ///////////////////////////////////////////////////////////////////////////// VOID CJunkFilter::_EvaluateFeatureComponents(VOID) { ULONG ulIndex = 0; FEATURECOMP * pfcomp = NULL; _HandleCaseSensitiveSpecialRules(); if (NULL != m_pszFrom) { CharUpperBuff(m_pszFrom, lstrlen(m_pszFrom)); } if (NULL != m_pszTo) { CharUpperBuff(m_pszTo, lstrlen(m_pszTo)); } if (NULL != m_pszSubject) { CharUpperBuff(m_pszSubject, lstrlen(m_pszSubject)); } for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++) { pfcomp = &m_rgfeaturecomps[ulIndex]; switch(pfcomp->loc) { case locNil: Assert(locNil != pfcomp->loc); pfcomp->fPresent = FALSE; break; case locSubj: pfcomp->fPresent = FWordPresent(m_pszSubject, &(pfcomp->dwFlags), pfcomp->pszFeature, pfcomp->cchFeature, NULL); break; case locFrom: pfcomp->fPresent = FWordPresent(m_pszFrom, &(pfcomp->dwFlags), pfcomp->pszFeature, pfcomp->cchFeature, NULL); break; case locTo: pfcomp->fPresent = FWordPresent(m_pszTo, &(pfcomp->dwFlags), pfcomp->pszFeature, pfcomp->cchFeature, NULL); break; case locSpecial: pfcomp->fPresent = _FInvokeSpecialRule(pfcomp->ulRuleNum); break; } } } ///////////////////////////////////////////////////////////////////////////// // ProcessFeatureComponentPresence // // Processes the presence (or absence) of the individual feature components, // setting the feature status of each feature (which may me made up of // multiple feature components). ///////////////////////////////////////////////////////////////////////////// VOID CJunkFilter::_ProcessFeatureComponentPresence(VOID) { ULONG ulIndex = 0; FEATURECOMP * pfcomp = NULL; ULONG ulFeature = 0; for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++) { pfcomp = &m_rgfeaturecomps[ulIndex]; ulFeature = pfcomp->ulFeature; if (-1 == m_rgulFeatureStatus[ulFeature]) // first feature of this feature { if (FALSE != pfcomp->fPresent) { m_rgulFeatureStatus[ulFeature] = 1; } else { m_rgulFeatureStatus[ulFeature] = 0; } } else { switch (pfcomp->bop) { case boolopOr: if (pfcomp->fPresent) { m_rgulFeatureStatus[ulFeature] = 1; } break; case boolopAnd: if (!pfcomp->fPresent) { m_rgulFeatureStatus[ulFeature] = 0; } break; default: Assert(FALSE); break; } } } } ///////////////////////////////////////////////////////////////////////////// // _DblDoSVMCalc // // Does the actual support vector machine calculation. // Returns the probability that the message is spam ///////////////////////////////////////////////////////////////////////////// DOUBLE CJunkFilter::_DblDoSVMCalc(VOID) { DOUBLE dblAccum; DOUBLE dblResult; ULONG ulIndex = 0; dblAccum = 0.0; for (ulIndex = 0; ulIndex < m_cFeatures; ulIndex++) { if (m_rgulFeatureStatus[ulIndex] == 1) { dblAccum += m_rgdblSVMWeights[ulIndex]; #ifdef DEBUG if (NULL != m_pILogFile) { _PrintFeatureToLog(ulIndex); } #endif // DEBUG } else if (m_rgulFeatureStatus[ulIndex] != 0) { AssertSz(FALSE, "What happened here!"); } } // Apply threshold; dblAccum -= m_dblThresh; // Apply sigmoid dblResult = (1 / (1 + exp((m_dblCC * dblAccum) + m_dblDD))); return dblResult; } ///////////////////////////////////////////////////////////////////////////// // BCalculateSpamProb // // Calculates the probability that the current message is spam. // Returns the probability (0 to 1) that the message is spam in prSpamProb // the boolean return is determined by comparing to the spam cutoff ///////////////////////////////////////////////////////////////////////////// BOOL CJunkFilter::FCalculateSpamProb(LPSTR pszFrom, LPSTR pszTo, LPSTR pszSubject, IStream * pIStmBody, BOOL fDirectMessage, BOOL fHasAttach, FILETIME * pftMessageSent, DOUBLE * pdblSpamProb, BOOL * pfIsSpam) { #ifdef DEBUG CHAR rgchBuff[1024]; DWORD dwVal = 0; #endif // DEBUG m_pszFrom = pszFrom; m_pszTo = pszTo; m_pszSubject = pszSubject; m_pIStmBody = pIStmBody; m_fDirectMessage = fDirectMessage; m_fHasAttach = fHasAttach; m_ftMessageSent = *pftMessageSent; // Set the size of the body if ((NULL == m_pIStmBody) || (FAILED(HrGetStreamSize(m_pIStmBody, &m_cbBody)))) { m_cbBody = 0; } #ifdef DEBUG // Get the logfile if we need it if (NULL == m_pILogFile) { _HrCreateLogFile(); } if (NULL != m_pILogFile) { PrintToLogFile(m_pILogFile, LOG_TAGLINE, pszSubject); PrintToLogFile(m_pILogFile, LOG_FIRSTNAME, m_pszFirstName); PrintToLogFile(m_pILogFile, LOG_LASTNAME, m_pszLastName); PrintToLogFile(m_pILogFile, LOG_COMPANYNAME, m_pszCompanyName); } #endif // DEBUG _EvaluateBodyFeatures(); _EvaluateFeatureComponents(); _ProcessFeatureComponentPresence(); *pdblSpamProb = _DblDoSVMCalc(); #ifdef DEBUG if (NULL != m_pILogFile) { dwVal = ( DWORD ) ((*pdblSpamProb * 1000000) + 0.5); wnsprintf(rgchBuff, ARRAYSIZE(rgchBuff), LOG_FINAL, dwVal / 1000000, dwVal % 1000000); m_pILogFile->WriteLog(LOGFILE_DB, rgchBuff); m_pILogFile->WriteLog(LOGFILE_DB, ""); } #endif // DEBUG *pfIsSpam = (*pdblSpamProb > m_dblSpamCutoff); return TRUE; } ///////////////////////////////////////////////////////////////////////////// // BReadDefaultSpamCutoff // // Reads the default spam cutoff without parsing entire file // Use GetDefaultSpamCutoff if using HrSetSVMDataLocation; // static member function ///////////////////////////////////////////////////////////////////////////// HRESULT CJunkFilter::HrReadDefaultSpamCutoff(LPSTR pszFullPath, DOUBLE * pdblDefCutoff) { HRESULT hr = S_OK; CParseStream parsestm; LPSTR pszBuff = NULL; ULONG cchBuff = 0; LPSTR pszDefThresh = NULL; ULONG ulIndex = 0; LPSTR pszDummy = NULL; if ((NULL == pszFullPath) || ('\0' == *pszFullPath) || (NULL == pdblDefCutoff)) { hr = E_INVALIDARG; goto exit; } // Get the parse stream hr = parsestm.HrSetFile(0, pszFullPath); if (FAILED(hr)) { goto exit; } // skip first three lines for (ulIndex = 0; ulIndex < 4; ulIndex++) { SafeMemFree(pszBuff); hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff); if (FAILED(hr)) { goto exit; } } // Find the default threshold pszDefThresh = StrStr(pszBuff, ::szDefaultThresh); if (NULL == pszDefThresh) { hr = E_FAIL; goto exit; } // Grab the value pszDefThresh += lstrlen(::szDefaultThresh); *pdblDefCutoff = StrToDbl(pszDefThresh, &pszDummy); // Set the proper return value hr = S_OK; exit: SafeMemFree(pszBuff); return hr; } ///////////////////////////////////////////////////////////////////////////// // Constructor/destructor // ///////////////////////////////////////////////////////////////////////////// CJunkFilter::CJunkFilter() : m_cRef(0), m_pszFirstName(NULL), m_cchFirstName(0), m_pszLastName(NULL), m_cchLastName(0), m_pszCompanyName(NULL), m_cchCompanyName(0), m_pblistBodyList(NULL), m_cblistBodyList(0), m_rgfeaturecomps(NULL), m_rgdblSVMWeights(NULL), m_dblCC(0), m_dblDD(0), m_dblThresh(-1), m_dblDefaultThresh(-1), m_dblMostThresh(0), m_dblLeastThresh(0), m_cFeatures(0), m_cFeatureComps(0), m_rgulFeatureStatus(0), m_pszLOCPath(NULL), m_dblSpamCutoff(0), m_pszFrom(NULL), m_pszTo(NULL), m_pszSubject(NULL), m_pIStmBody(NULL), m_cbBody(0), m_fDirectMessage(FALSE), m_fHasAttach(FALSE), m_fRule14(FALSE), m_fRule17(FALSE) { ZeroMemory(m_rgiBodyList, sizeof(m_rgiBodyList)); ZeroMemory(&m_ftMessageSent, sizeof(m_ftMessageSent)); InitializeCriticalSection(&m_cs); #ifdef DEBUG m_fJunkMailLogInit = FALSE; m_pILogFile = NULL; #endif // DEBUG } CJunkFilter::~CJunkFilter() { ULONG ulIndex = 0; SafeMemFree(m_pszFirstName); SafeMemFree(m_pszLastName); SafeMemFree(m_pszCompanyName); #ifdef DEBUG SafeRelease(m_pILogFile); #endif // DEBUG for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++) { if ((locNil != m_rgfeaturecomps[ulIndex].loc) && (locSpecial != m_rgfeaturecomps[ulIndex].loc)) { SafeMemFree(m_rgfeaturecomps[ulIndex].pszFeature); } } SafeMemFree(m_pblistBodyList); m_cblistBodyList = 0; ZeroMemory(m_rgiBodyList, sizeof(m_rgiBodyList)); SafeMemFree(m_rgdblSVMWeights); SafeMemFree(m_rgulFeatureStatus); SafeMemFree(m_rgfeaturecomps); DeleteCriticalSection(&m_cs); } STDMETHODIMP_(ULONG) CJunkFilter::AddRef() { return ::InterlockedIncrement(&m_cRef); } STDMETHODIMP_(ULONG) CJunkFilter::Release() { LONG cRef = 0; cRef = ::InterlockedDecrement(&m_cRef); if (0 == cRef) { delete this; return cRef; } return cRef; } STDMETHODIMP CJunkFilter::QueryInterface(REFIID riid, void ** ppvObject) { HRESULT hr = S_OK; // Check the incoming params if (NULL == ppvObject) { hr = E_INVALIDARG; goto exit; } // Initialize outgoing param *ppvObject = NULL; if ((riid == IID_IUnknown) || (riid == IID_IOEJunkFilter)) { *ppvObject = static_cast(this); } else { hr = E_NOINTERFACE; goto exit; } reinterpret_cast(*ppvObject)->AddRef(); hr = S_OK; exit: return hr; } STDMETHODIMP CJunkFilter::SetIdentity(LPCSTR pszFirstName, LPCSTR pszLastName, LPCSTR pszCompanyName) { HRESULT hr = S_OK; //Set the new first name SafeMemFree(m_pszFirstName); m_cchFirstName = 0; if (NULL != pszFirstName) { m_pszFirstName = PszDupA(pszFirstName); if (NULL == m_pszFirstName) { hr = E_OUTOFMEMORY; goto exit; } m_cchFirstName = CharUpperBuff(m_pszFirstName, lstrlen(m_pszFirstName)); } // Set the new last name SafeMemFree(m_pszLastName); m_cchLastName = 0; if (NULL != pszLastName) { m_pszLastName = PszDupA(pszLastName); if (NULL == m_pszLastName) { hr = E_OUTOFMEMORY; goto exit; } m_cchLastName = CharUpperBuff(m_pszLastName, lstrlen(m_pszLastName)); } // Set the new company name SafeMemFree(m_pszCompanyName); m_cchCompanyName = 0; if (NULL != pszCompanyName) { m_pszCompanyName = PszDupA(pszCompanyName); if (NULL == m_pszCompanyName) { hr = E_OUTOFMEMORY; goto exit; } m_cchCompanyName = CharUpperBuff(m_pszCompanyName, lstrlen(m_pszCompanyName)); } hr = S_OK; exit: return hr; } STDMETHODIMP CJunkFilter::LoadDataFile(LPCSTR pszFilePath) { HRESULT hr = S_OK; if ((NULL == pszFilePath) || ('\0' == *pszFilePath)) { hr = E_INVALIDARG; goto exit; } hr = _HrReadSVMOutput(pszFilePath); if (FAILED(hr)) { AssertSz(FALSE, "Unable to successfully read filter params"); goto exit; } // Set the proper return value hr = S_OK; exit: return hr; } STDMETHODIMP CJunkFilter::SetSpamThresh(ULONG ulThresh) { HRESULT hr = S_OK; switch (ulThresh) { case STF_USE_MOST: m_dblSpamCutoff = m_dblMostThresh; break; case STF_USE_MORE: m_dblSpamCutoff = m_dblDefaultThresh + ((m_dblMostThresh - m_dblDefaultThresh) / 2); break; case STF_USE_DEFAULT: m_dblSpamCutoff = m_dblDefaultThresh; break; case STF_USE_LESS: m_dblSpamCutoff = m_dblDefaultThresh - ((m_dblDefaultThresh - m_dblLeastThresh) / 2); break; case STF_USE_LEAST: m_dblSpamCutoff = m_dblLeastThresh; break; default: hr = E_INVALIDARG; goto exit; } hr = S_OK; exit: return hr; } STDMETHODIMP CJunkFilter::GetSpamThresh(ULONG * pulThresh) { HRESULT hr = S_OK; ULONG ulThresh = 0; // Check the incoming params if (NULL == pulThresh) { hr = E_INVALIDARG; goto exit; } // Initialize outgoing params if (m_dblDefaultThresh == m_dblSpamCutoff) { ulThresh = STF_USE_DEFAULT; } else if (m_dblMostThresh == m_dblSpamCutoff) { ulThresh = STF_USE_MOST; } else if (m_dblLeastThresh == m_dblSpamCutoff) { ulThresh = STF_USE_LEAST; } else if (m_dblSpamCutoff > m_dblDefaultThresh) { ulThresh = STF_USE_MORE; } else { ulThresh = STF_USE_LESS; } hr = S_OK; exit: return hr; } STDMETHODIMP CJunkFilter::GetDefaultSpamThresh(DOUBLE * pdblThresh) { HRESULT hr = S_OK; // Check the incoming params if (NULL == pdblThresh) { hr = E_INVALIDARG; goto exit; } // Initialize outgoing params *pdblThresh = m_dblDefaultThresh * 100.0; hr = S_OK; exit: return hr; } STDMETHODIMP CJunkFilter::CalcJunkProb(DWORD dwFlags, IMimePropertySet * pIMPropSet, IMimeMessage * pIMMsg, double * pdblProb) { HRESULT hr = S_OK; BOOL fSpam = FALSE; PROPVARIANT propvar = {0}; DWORD dwFlagsMsg = 0; FILETIME ftMsgSent = {0}; LPSTR pszFrom = NULL; LPSTR pszTo = NULL; LPSTR pszSubject = NULL; IStream * pIStmBody = NULL; IStream * pIStmHtml = NULL; BOOL fSentToMe = FALSE; BOOL fHasAttachments = FALSE; if ((NULL == pIMPropSet) || (NULL == pIMMsg)) { hr = E_INVALIDARG; goto exit; } // Get Message Flags if (SUCCEEDED(pIMMsg->GetFlags(&dwFlagsMsg))) { fHasAttachments = (0 != (dwFlagsMsg & IMF_ATTACHMENTS)); } // Was the message sent to me fSentToMe = (0 != (dwFlags & CJPF_SENT_TO_ME)); // Get the from field propvar.vt = VT_LPSTR; hr = pIMPropSet->GetProp(PIDTOSTR(PID_HDR_FROM), NOFLAGS, &propvar); if (SUCCEEDED(hr)) { pszFrom = propvar.pszVal; } // Get the To field propvar.vt = VT_LPSTR; hr = pIMPropSet->GetProp(PIDTOSTR(PID_HDR_TO), NOFLAGS, &propvar); if (SUCCEEDED(hr)) { pszTo = propvar.pszVal; } // Try to Get the Plain Text Stream if (FAILED(pIMMsg->GetTextBody(TXT_PLAIN, IET_DECODED, &pIStmBody, NULL))) { // Try to get the text version from the HTML stream if ((FAILED(pIMMsg->GetTextBody(TXT_HTML, IET_DECODED, &pIStmHtml, NULL))) || (FAILED(HrConvertHTMLToPlainText(pIStmHtml, &pIStmBody)))) { pIStmBody = NULL; } } // Get the Subject field propvar.vt = VT_LPSTR; hr = pIMPropSet->GetProp(PIDTOSTR(PID_HDR_SUBJECT), NOFLAGS, &propvar); if (SUCCEEDED(hr)) { pszSubject = propvar.pszVal; } // Is this a direct message // When was the message sent? propvar.vt = VT_FILETIME; hr = pIMPropSet->GetProp(PIDTOSTR(PID_ATT_SENTTIME), 0, &propvar); if (SUCCEEDED(hr)) { ftMsgSent = propvar.filetime; } FillMemory(m_rgulFeatureStatus, sizeof(*m_rgulFeatureStatus) * m_cFeatures, -1); if (FALSE == FCalculateSpamProb(pszFrom, pszTo, pszSubject, pIStmBody, fSentToMe, fHasAttachments, &ftMsgSent, pdblProb, &fSpam)) { hr = E_FAIL; goto exit; } hr = (FALSE != fSpam) ? S_OK : S_FALSE; exit: SafeRelease(pIStmHtml); SafeRelease(pIStmBody); SafeMemFree(pszSubject); SafeMemFree(pszTo); SafeMemFree(pszFrom); return hr; } /////////////////////////////////////////////////////////////////////////////// // // HrCreateJunkFilter // // This creates a junk filter. // // ppIRule - pointer to return the junk filter // // Returns: S_OK, on success // E_OUTOFMEMORY, if can't create the Junk Filter object // /////////////////////////////////////////////////////////////////////////////// HRESULT WINAPI HrCreateJunkFilter(DWORD dwFlags, IOEJunkFilter ** ppIJunkFilter) { CJunkFilter * pJunk = NULL; HRESULT hr = S_OK; // Check the incoming params if (NULL == ppIJunkFilter) { hr = E_INVALIDARG; goto exit; } // Initialize outgoing params *ppIJunkFilter = NULL; // Create the rules manager object pJunk = new CJunkFilter; if (NULL == pJunk) { hr = E_OUTOFMEMORY; goto exit; } // Get the rules manager interface hr = pJunk->QueryInterface(IID_IOEJunkFilter, (void **) ppIJunkFilter); if (FAILED(hr)) { goto exit; } pJunk = NULL; // Set the proper return value hr = S_OK; exit: if (NULL != pJunk) { delete pJunk; } return hr; } BOOL FReadDouble(LPSTR pszLine, LPSTR pszToken, DOUBLE * pdblVal) { BOOL fRet = FALSE; LPSTR pszVal = NULL; BOOL fNegative = FALSE; // Search for token pszVal = StrStr(pszLine, pszToken); // If token isn't found then bail if (NULL == pszVal) { fRet = FALSE; goto exit; } // Skip over the token pszVal += lstrlen(pszToken); // Check to see if the value is negative if ('-' == *pszVal) { fNegative = TRUE; pszVal++; } // Read in value *pdblVal = StrToDbl(pszVal, &pszVal); // Negate the value if neccessary if (FALSE != fNegative) { *pdblVal *= -1; } fRet = TRUE; exit: return fRet; } #ifdef DEBUG static const LPSTR LOG_SPECIAL_BODY_FIRSTNAME = "Special: Body contains the First Name"; static const LPSTR LOG_SPECIAL_BODY_LASTNAME = "Special: Body contains the Last Name"; static const LPSTR LOG_SPECIAL_BODY_COMPANYNAME = "Special: Body contains the Company Name"; static const LPSTR LOG_SPECIAL_BODY_YEARRECVD = "Special: Body contains the year message received"; static const LPSTR LOG_SPECIAL_SENTTIME_WEEHRS = "Special: Sent time was between 7PM and 6AM"; static const LPSTR LOG_SPECIAL_SENTTIME_WKEND = "Special: Sent time was on the weekend (Sat or Sun)"; static const LPSTR LOG_SPECIAL_BODY_25PCTUPCWDS = "Special: Body contains 25% uppercase words out of the first 50 words"; static const LPSTR LOG_SPECIAL_BODY_8PCTNONALPHA = "Special: Body contains 8% non-alpha characters out of the first 200 characters"; static const LPSTR LOG_SPECIAL_SENT_DIRECT = "Special: Sent directly to user"; static const LPSTR LOG_SPECIAL_SUBJECT_25PCTUPCWDS = "Special: Subject contains 25% uppercase words out of the first 50 words"; static const LPSTR LOG_SPECIAL_SUBJECT_8PCTNONALPHA = "Special: Subject contains 8% non-alpha characters out of the first 200 characters"; static const LPSTR LOG_SPECIAL_TO_EMPTY = "Special: To line is empty"; static const LPSTR LOG_SPECIAL_HASATTACH = "Special: Message has an attachment"; static const LPSTR LOG_SPECIAL_BODY_GT125B = "Special: Body is greater than 125 Bytes"; static const LPSTR LOG_SPECIAL_BODY_GT250B = "Special: Body is greater than 250 Bytes"; static const LPSTR LOG_SPECIAL_BODY_GT500B = "Special: Body is greater than 500 Bytes"; static const LPSTR LOG_SPECIAL_BODY_GT1000B = "Special: Body is greater than 1000 Bytes"; static const LPSTR LOG_SPECIAL_BODY_GT2000B = "Special: Body is greater than 2000 Bytes"; static const LPSTR LOG_SPECIAL_BODY_GT4000B = "Special: Body is greater than 4000 Bytes"; static const LPSTR LOG_SPECIAL_BODY_GT8000B = "Special: Body is greater than 8000 Bytes"; static const LPSTR LOG_SPECIAL_BODY_GT16000B = "Special: Body is greater than 16000 Bytes"; VOID CJunkFilter::_PrintSpecialFeatureToLog(UINT iRuleNum) { Assert(NULL != m_pILogFile); switch (iRuleNum) { case 1: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_FIRSTNAME); break; case 2: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_LASTNAME); break; case 3: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_COMPANYNAME); break; case 4: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_YEARRECVD); break; case 5: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SENTTIME_WEEHRS); break; case 6: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SENTTIME_WKEND); break; case 14: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_25PCTUPCWDS); break; case 15: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_8PCTNONALPHA); break; case 16: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SENT_DIRECT); break; case 17: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SUBJECT_25PCTUPCWDS); break; case 18: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SUBJECT_8PCTNONALPHA); break; case 19: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_TO_EMPTY); break; case 20: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_HASATTACH); break; case 40: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT125B); break; case 41: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT250B); break; case 42: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT500B); break; case 43: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT1000B); break; case 44: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT2000B); break; case 45: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT4000B); break; case 46: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT8000B); break; case 47: m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT16000B); break; default: AssertSz(FALSE, "unsupported special feature"); break; } return; } VOID CJunkFilter::_PrintFeatureToLog(ULONG ulIndex) { LPSTR pszBuff = NULL; LPSTR pszTag = NULL; // Figure out which tag line to use switch (m_rgfeaturecomps[ulIndex].loc) { case locNil: goto exit; break; case locBody: pszTag = LOG_BODY; break; case locSubj: pszTag = LOG_SUBJECT; break; case locFrom: pszTag = LOG_FROM; break; case locTo: pszTag = LOG_TO; break; case locSpecial: _PrintSpecialFeatureToLog(m_rgfeaturecomps[ulIndex].ulRuleNum); goto exit; break; } // Write out the feature to the log PrintToLogFile(m_pILogFile, pszTag, m_rgfeaturecomps[ulIndex].pszFeature); exit: SafeMemFree(pszBuff); return; } HRESULT CJunkFilter::_HrCreateLogFile(VOID) { HRESULT hr = S_OK; LPSTR pszLogFile = NULL; ULONG cbData = 0; ILogFile * pILogFile = NULL; DWORD dwData = 0; if (FALSE != m_fJunkMailLogInit) { hr = S_FALSE; goto exit; } m_fJunkMailLogInit = TRUE; // Get the size of the path to Outlook Express cbData = sizeof(dwData); if ((ERROR_SUCCESS != SHGetValue(HKEY_LOCAL_MACHINE, STR_REG_PATH_FLAT, "JunkMailLog", NULL, (BYTE *) &dwData, &cbData)) || (0 == dwData)) { hr = S_FALSE; goto exit; } // Get the size of the path to Outlook Express if (ERROR_SUCCESS != SHGetValue(HKEY_LOCAL_MACHINE, STR_REG_PATH_FLAT, "InstallRoot", NULL, NULL, &cbData)) { hr = E_FAIL; goto exit; } // How much room do we need to build up the path cbData += lstrlen(szJunkMailLog) + 2; // Allocate space to hold the path hr = HrAlloc((VOID **) &pszLogFile, cbData); if (FAILED(hr)) { goto exit; } // Get the path to Outlook Express ULONG cbBuffer = cbData; if (ERROR_SUCCESS != SHGetValue(HKEY_LOCAL_MACHINE, STR_REG_PATH_FLAT, "InstallRoot", NULL, (BYTE *) pszLogFile, &cbBuffer)) { hr = E_FAIL; goto exit; } // Build up the path to the Junk DLL if ('\\' != pszLogFile[lstrlen(pszLogFile)]) { StrCatBuff(pszLogFile, "\\", cbData); } StrCatBuff(pszLogFile, szJunkMailLog, cbData); hr = CreateLogFile(g_hInst, pszLogFile, szJunkMailPrefix, DONT_TRUNCATE, &pILogFile, FILE_SHARE_READ | FILE_SHARE_WRITE); if (FAILED(hr)) { goto exit; } SafeRelease(m_pILogFile); m_pILogFile = pILogFile; hr = S_OK; exit: SafeMemFree(pszLogFile); return hr; } VOID PrintToLogFile(ILogFile * pILogFile, LPSTR pszTmpl, LPSTR pszArg) { LPSTR pszBuff = NULL; ULONG cchBuff = 0; Assert(NULL != pILogFile); Assert(NULL != pszTmpl); if (NULL == pszArg) { pszArg = ""; } // Figure out the size of the resulting buffer cchBuff = lstrlen(pszTmpl) + lstrlen(pszArg) + 2; // Allocate the needed space if (FAILED(HrAlloc((VOID **) &pszBuff, cchBuff * sizeof(*pszBuff)))) { goto exit; } // Create the output string wnsprintf(pszBuff, cchBuff, pszTmpl, pszArg); // Print the buffer to the log file pILogFile->WriteLog(LOGFILE_DB, pszBuff); exit: SafeMemFree(pszBuff); return; } #endif // DEBUG