windows-server-2003/inetcore/outlookexpress/oejunk/junkeng.cpp

/*

  SVMHANDLER.CPP
  (c) copyright 1998 Microsoft Corp

  Contains the class encapsulating the Support Vector Machine used to do on the fly spam detection

  Robert Rounthwaite (RobertRo@microsoft.com)

*/

#include <pch.hxx>
#include "junkeng.h"
#include "junkutil.h"
#include "parsestm.h"
#include <iert.h>
#include <math.h>
#include <limits.h>

class CBodyBuff
{
    private:
        enum
        {
            CB_BODYBUFF_MAX = 4096
        };

        enum
        {
            BBF_CLEAR           = 0x00000000,
            BBF_SET             = 0x00000001,
            BBF_ALPHA           = 0x00000002,
            BBF_NUM             = 0x00000004,
            BBF_SPACE           = 0x00000008,
            BBF_MASK            = 0x0000000F
        };

    private:
        IStream *   m_pIStream;
        ULONG       m_cbStream;
        ULONG       m_ibStream;
        BYTE        m_rgbBuff[CB_BODYBUFF_MAX];
        ULONG       m_cbBuffTotal;
        BYTE *      m_pbBuffCurr;
        DWORD       m_dwFlagsCurr;
        BYTE *      m_pbBuffGood;
        BYTE *      m_pbBuffPrev;
        DWORD       m_dwFlagsPrev;

    public:
        CBodyBuff() : m_pIStream(NULL), m_cbStream(0), m_ibStream(0),
                        m_cbBuffTotal(0), m_pbBuffCurr(m_rgbBuff), m_dwFlagsCurr(BBF_CLEAR),
                        m_pbBuffGood(m_rgbBuff), m_pbBuffPrev(NULL), m_dwFlagsPrev(BBF_CLEAR) {}
        ~CBodyBuff() {SafeRelease(m_pIStream);}

        HRESULT HrInit(DWORD dwFlags, IStream * pIStream);
        HRESULT HrGetCurrChar(CHAR * pchNext);
        BOOL FDoMatch(FEATURECOMP * pfcomp);

        HRESULT HrMoveNext(VOID)
        {
            m_pbBuffPrev = m_pbBuffCurr;
            m_dwFlagsPrev = m_dwFlagsCurr;
            
            m_pbBuffCurr = (BYTE *) CharNext((LPSTR) m_pbBuffCurr);
            m_dwFlagsCurr = BBF_CLEAR;
            
            return S_OK;
        }

    private:
        HRESULT _HrFillBuffer(VOID);
};

static const LPSTR szCountFeatureComp       = "FeatureComponentCount = ";
static const LPSTR szDefaultThresh          = "dThresh =  ";
static const LPSTR szMostThresh             = "mThresh =  ";
static const LPSTR szLeastThresh            = "lThresh =  ";
static const LPSTR szThresh                 = "Threshold =  ";
static const LPSTR szNumberofDim            = "NumDim = ";

#ifdef DEBUG
static const LPSTR STR_REG_PATH_FLAT        = "Software\\Microsoft\\Outlook Express";
static const LPSTR szJunkMailPrefix         = "JUNKMAIL";
static const LPSTR szJunkMailLog            = "JUNKMAIL.LOG";

static const LPSTR LOG_TAGLINE              = "Calculating Junk Mail for message: %s";
static const LPSTR LOG_FIRSTNAME            = "User's First Name: %s";
static const LPSTR LOG_LASTNAME             = "User's Last Name: %s";
static const LPSTR LOG_COMPANYNAME          = "User's Company Name: %s";
static const LPSTR LOG_BODY                 = "Body contains: %s";
static const LPSTR LOG_SUBJECT              = "Subject contains: %s";
static const LPSTR LOG_TO                   = "To line contains: %s";
static const LPSTR LOG_FROM                 = "From line contains: %s";
static const LPSTR LOG_FINAL                = "Junk Mail percentage: %0.1d.%0.6d\r\n";
#endif  // DEBUG

BOOL FReadDouble(LPSTR pszLine, LPSTR pszToken, DOUBLE * pdblVal);
#ifdef DEBUG
VOID PrintToLogFile(ILogFile * pILogFile, LPSTR pszTmpl, LPSTR pszArg);
#endif  // DEBUG

HRESULT CBodyBuff::HrInit(DWORD dwFlags, IStream * pIStream)
{
    HRESULT     hr = S_OK;

    // Check incoming params
    if (NULL == pIStream)
    {
        hr = E_INVALIDARG;
        goto exit;
    }

    // Set the stream
    m_pIStream = pIStream;
    m_pIStream->AddRef();

    // Get the stream size
    hr = HrGetStreamSize(m_pIStream, &m_cbStream);
    if (FAILED(hr))
    {
        goto exit;
    }

    // Reset the stream to the beginning
    hr = HrRewindStream(m_pIStream);
    if (FAILED(hr))
    {
        goto exit;
    }

    // Start from the beginning
    m_ibStream = 0;
    
exit:
    return hr;
}

HRESULT CBodyBuff::HrGetCurrChar(CHAR * pchNext)
{
    HRESULT     hr = S_OK;

    // Check incoming params
    Assert(NULL != pchNext);

    // Do we need to get any more characters?
    if (m_pbBuffCurr >= m_pbBuffGood)
    {
        // If we couldn't get any more characters
        if (S_OK != _HrFillBuffer())
        {
            hr = E_FAIL;
            goto exit;
        }
    }
    
    // Get the current char
    *pchNext = *m_pbBuffCurr;

    hr = S_OK;
    
exit:
    return hr;
}

BOOL CBodyBuff::FDoMatch(FEATURECOMP * pfcomp)
{
    BOOL        fRet = FALSE;
    BYTE *      pbSearch = NULL;
    ULONG       cchSearch = 0;
    LPSTR       pszMatch = NULL;
    DWORD       dwFlags = 0;

    // Check incoming params
    Assert(NULL != pfcomp);
    Assert(NULL != pfcomp->pszFeature);
    Assert(0 != pfcomp->cchFeature);

    // Set up some locals
    cchSearch = pfcomp->cchFeature;

    // Do we need more characters for the match?
    
    // Include the character after the string, just in case
    // we have a match and need to check the character after
    // the string for a word break
    if ((cchSearch + 1) > (ULONG) (m_pbBuffGood - m_pbBuffCurr))
    {
        // Get more characters

        // If this fails, we still might be good, since
        // we might just have enough characters to do the
        // full match at the end of the stream.
        (VOID) _HrFillBuffer();   
        
        // Could we get enough?
        if (cchSearch > (ULONG) (m_pbBuffGood - m_pbBuffCurr))
        {
            // No Match
            fRet = FALSE;
            goto exit;
        }
    }
    
    // Do match
    pbSearch = m_pbBuffCurr;
    pszMatch = pfcomp->pszFeature;
    while (0 != cchSearch--)
    {
        if (*(pszMatch++) != *(pbSearch++))
        {
            // No Match
            fRet = FALSE;
            goto exit;
        }
    }
                    
    // Validate the match

    // Do we need to figure out if it starts with a word break?
    if (0 != (pfcomp->dwFlags & CT_START_SET))
    {
        dwFlags = pfcomp->dwFlags;
    }
    else
    {
        Assert(CT_END_SET != (dwFlags & CT_END_SET));
        dwFlags = m_dwFlagsCurr;
    }
    
    Assert(CT_START_SET == BBF_SET);
    Assert(CT_START_ALPHA == BBF_ALPHA);
    fRet = FMatchToken((NULL == m_pbBuffPrev),
                        ((m_ibStream >= m_cbStream) && ((m_pbBuffCurr + pfcomp->cchFeature) >= m_pbBuffGood)),
                        (LPCSTR) m_pbBuffPrev, &m_dwFlagsPrev, pfcomp->pszFeature,
                        pfcomp->cchFeature, &dwFlags, (LPCSTR) (m_pbBuffCurr + pfcomp->cchFeature));

    // Save the changed flags
    pfcomp->dwFlags = dwFlags;

    // Cache the current character's state
    m_dwFlagsCurr = (dwFlags & BBF_MASK);
    
exit:
    return fRet;
}

HRESULT CBodyBuff::_HrFillBuffer(VOID)
{
    HRESULT     hr = S_OK;
    LONG        cbExtra = 0;
    ULONG       cbRead = 0;
    ULONG       cbToRead = 0;

    // If there isn't any more of the stream to grab
    if (m_ibStream >= m_cbStream)
    {
        hr = S_FALSE;
        goto exit;
    }
    
    // If this is the first time through, save nothing
    if (NULL == m_pbBuffPrev)
    {
        cbExtra = 0;
    }
    else
    {
        // How much space should I save?
        cbExtra = (ULONG) (m_cbBuffTotal - (m_pbBuffPrev - m_rgbBuff));
        Assert(cbExtra > 0);
        
        // Save the unused data
        MoveMemory(m_rgbBuff, m_pbBuffPrev, (int)min(cbExtra, sizeof(m_rgbBuff)));
        
        // Reset the current pointer
        m_pbBuffCurr = m_rgbBuff + (m_pbBuffCurr - m_pbBuffPrev);

        // Reset the previous pointer
        m_pbBuffPrev = m_rgbBuff;    
    }

    // Read in more data
    cbToRead = (int)min(CB_BODYBUFF_MAX - cbExtra - 1, (LONG) (m_cbStream - m_ibStream));
    hr = m_pIStream->Read(m_rgbBuff + cbExtra, cbToRead, &cbRead);
    if ((FAILED(hr)) || (0 == cbRead))
    {
        // End of stream
        hr = S_FALSE;
    }
    else
    {
        hr = S_OK;
    }

    // Track the number of bytes read
    m_ibStream += cbRead;
    
    // Set the total buffer size
    m_cbBuffTotal = cbExtra + cbRead;

    // Terminate the buffer, just in case
    m_rgbBuff[m_cbBuffTotal] = '\0';
    
    // Uppercase the buffer
    m_pbBuffGood = m_rgbBuff + CharUpperBuff((CHAR *) m_rgbBuff, m_cbBuffTotal);
        
exit:
    return hr;
}

HRESULT CJunkFilter::_HrBuildBodyList(USHORT cBodyItems)
{
    HRESULT             hr = S_OK;
    USHORT              usIndex = 0;
    FEATURECOMP *       pfcomp = NULL;
    USHORT              iBodyList = 0;

    // Check incoming params
    if (0 == cBodyItems)
    {
        hr = E_INVALIDARG;
        goto exit;
    }

    Assert(USHRT_MAX > cBodyItems);
    
    // Make sure the old items are freed
    SafeMemFree(m_pblistBodyList);        
    m_cblistBodyList = 0;

    // Initialize the list
    ZeroMemory(m_rgiBodyList, sizeof(m_rgiBodyList));

    // Allocate space to hold all of the items
    hr = HrAlloc((VOID **) &m_pblistBodyList, sizeof(*m_pblistBodyList) * (cBodyItems + 1));
    if (FAILED(hr))
    {
        goto exit;
    }

    // Initialize the body list
    ZeroMemory(m_pblistBodyList, sizeof(*m_pblistBodyList) * (cBodyItems + 1));
    
    // For each feature
    for (usIndex = 0, iBodyList = 1, pfcomp = m_rgfeaturecomps; usIndex < m_cFeatureComps; usIndex++, pfcomp++)
    {
        // If it's a body feature
        if (locBody == pfcomp->loc)
        {
            // Initialize it
            m_pblistBodyList[iBodyList].usItem = usIndex;
            
            // Add it to the list
            m_pblistBodyList[iBodyList].iNext = m_rgiBodyList[(UCHAR) (pfcomp->pszFeature[0])];
            m_rgiBodyList[(UCHAR) (pfcomp->pszFeature[0])] = iBodyList;

            // Move to the next body item
            iBodyList++;
        }
    }

    // Save the number of items
    m_cblistBodyList = cBodyItems + 1;
    
    // Set the return value
    hr = S_OK;
    
exit:
    return hr;
}

/////////////////////////////////////////////////////////////////////////////
// _FReadSVMOutput
//
// Read the SVM output from a file (".LKO file")
/////////////////////////////////////////////////////////////////////////////
HRESULT CJunkFilter::_HrReadSVMOutput(LPCSTR pszFileName)
{
    HRESULT         hr = S_OK;
    CParseStream    parsestm;
    ULONG           ulIndex = 0;
    LPSTR           pszBuff = NULL;
    ULONG           cchBuff = 0;
    LPSTR           pszDummy = NULL;
    LPSTR           pszDefThresh = NULL;
    ULONG           cFeatureComponents = 0;
    LPSTR           pszFeature = NULL;
    ULONG           ulFeatureComp = 0;
    USHORT          cBodyItems = 0;
    FEATURECOMP *   pfeaturecomp = NULL;

    if ((NULL == pszFileName) || ('\0' == *pszFileName))
    {
        hr = E_INVALIDARG;
        goto exit;
    }
    
    // Get the parse stream
    hr = parsestm.HrSetFile(0, pszFileName);
    if (FAILED(hr))
    {
        goto exit;
    }
    
    // skip first two lines
    for (ulIndex = 0; ulIndex < 3; ulIndex++)
    {
        SafeMemFree(pszBuff);
        hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
        if (FAILED(hr))
        {
            goto exit;
        }
    }

    // parse 3rd line: only care about CC and DD
    if (FALSE == FReadDouble(pszBuff, "cc =  ", &m_dblCC))
    {
        hr = E_FAIL;
        goto exit;
    }
    
    if (FALSE == FReadDouble(pszBuff, "dd =  ", &m_dblDD))
    {
        hr = E_FAIL;
        goto exit;
    }
    
    SafeMemFree(pszBuff);
    hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
    if (FAILED(hr))
    {
        goto exit;
    }
    
    if (FALSE == FReadDouble(pszBuff, szDefaultThresh, &m_dblDefaultThresh))
    {
        m_dblDefaultThresh = THRESH_DEFAULT;
    }

    if (0 == m_dblSpamCutoff)
    {
        m_dblSpamCutoff = m_dblDefaultThresh;
    }
    
    if (FALSE == FReadDouble(pszBuff, szThresh, &m_dblThresh))
    {
        hr = E_FAIL;
        goto exit;
    }
        
    SafeMemFree(pszBuff);
    hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
    if (FAILED(hr))
    {
        goto exit;
    }
    
    if (FALSE == FReadDouble(pszBuff, szMostThresh, &m_dblMostThresh))
    {
        m_dblMostThresh = THRESH_MOST;
    }

    if (FALSE == FReadDouble(pszBuff, szLeastThresh, &m_dblLeastThresh))
    {
        m_dblLeastThresh = THRESH_LEAST;
    }

    SafeMemFree(pszBuff);
    hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
    if (FAILED(hr))
    {
        goto exit;
    }
    
    m_cFeatures = StrToInt(pszBuff + lstrlen(szNumberofDim));
    if (0 == m_cFeatures)
    {
        hr = E_FAIL;
        goto exit;
    }

    // We only support up to USHRT_MAX features
    if (m_cFeatures >= USHRT_MAX)
    {
        hr = E_OUTOFMEMORY;
        goto exit;
    }

    SafeMemFree(pszBuff);
    hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
    if (FAILED(hr))
    {
        goto exit;
    }
    
    pszDummy = StrStr(pszBuff, szCountFeatureComp);
    if (NULL != pszDummy)
    {
        pszDummy += lstrlen(szCountFeatureComp);
        cFeatureComponents = StrToInt(pszDummy);
    }

    if (cFeatureComponents < m_cFeatures)
    {
        cFeatureComponents = m_cFeatures * 2;
    }
    
    while (0 != lstrcmp(pszBuff, "Weights"))
    {
        SafeMemFree(pszBuff);
        hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
        if (FAILED(hr))
        {
            goto exit;
        }
    }

    SafeMemFree(m_rgdblSVMWeights);
    hr = HrAlloc((void **) &m_rgdblSVMWeights, sizeof(*m_rgdblSVMWeights) * m_cFeatures);
    if (FAILED(hr))
    {
        goto exit;
    }
    
    SafeMemFree(m_rgulFeatureStatus);
    hr = HrAlloc((void **) &m_rgulFeatureStatus, sizeof(*m_rgulFeatureStatus) * m_cFeatures);
    if (FAILED(hr))
    {
        goto exit;
    }
    FillMemory(m_rgulFeatureStatus, sizeof(*m_rgulFeatureStatus) * m_cFeatures, -1);
    
    SafeMemFree(m_rgfeaturecomps);
    hr = HrAlloc((void **) &m_rgfeaturecomps, sizeof(*m_rgfeaturecomps) * cFeatureComponents);
    if (FAILED(hr))
    {
        goto exit;
    }

    // Initialize the features
    ZeroMemory(m_rgfeaturecomps, sizeof(*m_rgfeaturecomps) * cFeatureComponents);
    
    for (ulIndex = 0; ulIndex < m_cFeatures; ulIndex++)
    {
        UINT    uiLoc;
        USHORT  cbStr;
        boolop  bop;
        BOOL    fContinue;
        BOOL    fNegative;
        
        SafeMemFree(pszBuff);
        hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
        if (FAILED(hr))
        {
            goto exit;
        }
        
        // read the SVM weight
        pszDummy = pszBuff;
        fNegative = ('-' == *pszDummy);
        pszDummy++;
        
        m_rgdblSVMWeights[ulIndex] = StrToDbl(pszDummy, &pszDummy);

        if (FALSE != fNegative)
        {
            m_rgdblSVMWeights[ulIndex] *= -1;
        }
        
        pszDummy++; // skip the separator
        bop = boolopOr;
        fContinue = false;
        do
        {
            pfeaturecomp = &m_rgfeaturecomps[ulFeatureComp++];
            
            // Skip over white space
            UlStripWhitespace(pszDummy, TRUE, FALSE, NULL);
            
            // Location (or "special")
            uiLoc = StrToInt(pszDummy);
            pszDummy = StrStr(pszDummy, ":"); // skip the separator
            pszDummy++;

            pfeaturecomp->loc = (FeatureLocation)uiLoc;
            pfeaturecomp->ulFeature = ulIndex;
            pfeaturecomp->bop = bop;

            if (locBody == pfeaturecomp->loc)
            {
                cBodyItems++;
            }
            
            if (uiLoc == 5)
            {
                UINT uiRuleNumber = StrToInt(pszDummy);
                pszDummy += StrSpn(pszDummy, "0123456789");

                pfeaturecomp->ulRuleNum = uiRuleNumber;
            }
            else
			{
                cbStr  = (USHORT) StrToInt(pszDummy);
                pszDummy = StrStr(pszDummy, ":");
                pszDummy++;

                // We only support strings up to USHRT_MAX
                if (cbStr >= USHRT_MAX)
                {
                    hr = E_OUTOFMEMORY;
                    goto exit;
                }
                
                hr = HrAlloc((void **) &pszFeature, sizeof(*pszFeature) * (cbStr + 1));
                if (FAILED(hr))
                {
                    goto exit;
                }
                
                StrCpyN(pszFeature, pszDummy, cbStr + 1);
                pszDummy += cbStr;
                if ('\0' != *pszDummy)
                {
                    pszDummy++; // skip the separator
                }
                
                pszFeature[cbStr] = '\0';
                Assert(cbStr == strlen(pszFeature));

                // Save off the string
                pfeaturecomp->pszFeature = pszFeature;
                pszFeature = NULL;
                pfeaturecomp->cchFeature = cbStr;
            }
            
            UlStripWhitespace(pszDummy, TRUE, FALSE, NULL);
            
            switch(*pszDummy)
            {
              case '|':  
                bop = boolopOr;
                fContinue = TRUE;
                break;
                
              case '&':  
                bop = boolopAnd;
                fContinue = TRUE;
                break;
                
              default: 
                fContinue = FALSE;
                break;
            }
            
            pszDummy++;
        }
        while (fContinue);
    }
    
    m_cFeatureComps = ulFeatureComp;

    // Build up body items...
    hr = _HrBuildBodyList(cBodyItems);
    if (FAILED(hr))
    {
        goto exit;
    }

    hr = S_OK;
    
exit:
    SafeMemFree(pszFeature);
    SafeMemFree(pszBuff);
    return hr;
}

/////////////////////////////////////////////////////////////////////////////
// _FInvokeSpecialRule
//
// Invokes the special rule that is this FEATURECOMP. 
// Returns the state of the feature.
/////////////////////////////////////////////////////////////////////////////
BOOL CJunkFilter::_FInvokeSpecialRule(UINT iRuleNum)
{
    BOOL        fRet = FALSE;
    SYSTEMTIME  stSent;
    CHAR        rgchYear[6];
    ULONG       cbSize = 0;
    DWORD       dwDummy = 0;
    
    switch (iRuleNum)
    {
        case 1:
            fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, m_pszFirstName, m_cchFirstName, 0);
            break;
            
        case 2: 
            fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, m_pszLastName, m_cchLastName, 0);
            break;
            
        case 3:
            fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, m_pszCompanyName, m_cchCompanyName, 0);
            break;
            
        case 4: 
            // year message received
            if (FALSE == FTimeEmpty(&m_ftMessageSent))
            {
                // Convert to system time so we can get the year
                SideAssert(FALSE != FileTimeToSystemTime(&m_ftMessageSent, &stSent));

                
                wnsprintf(rgchYear, ARRAYSIZE(rgchYear), "%d", stSent.wYear);
                dwDummy = CT_START_SET | CT_START_NUM | CT_END_SET | CT_END_NUM;
                fRet = FStreamStringSearch(m_pIStmBody, &dwDummy, rgchYear, lstrlen(rgchYear), SSF_CASESENSITIVE);
            }
            break;
            
        case 5:
            // message received in the wee hours (>= 7pm or <6am
            if (FALSE == FTimeEmpty(&m_ftMessageSent))
            {
                // Convert to system time so we can get the year
                SideAssert(FALSE != FileTimeToSystemTime(&m_ftMessageSent, &stSent));
                
                fRet = (stSent.wHour >= (7 + 12)) || (stSent.wHour < 6);
            }
            break;
            
        case 6:
            // message received on weekend
            if (FALSE == FTimeEmpty(&m_ftMessageSent))
            {
                // Convert to system time so we can get the year
                SideAssert(FALSE != FileTimeToSystemTime(&m_ftMessageSent, &stSent));
                
                fRet = ((0 == stSent.wDayOfWeek) || (6 == stSent.wDayOfWeek));
            }
            break;
            
        case 14:
            fRet = m_fRule14; // set in _HandleCaseSensitiveSpecialRules()
            break;
            
        case 15:
            fRet = FSpecialFeatureNonAlphaStm(m_pIStmBody);
            break;
            
        case 16:
            fRet = m_fDirectMessage;
            break;
            
        case 17:
            fRet = m_fRule17; // set in _HandleCaseSensitiveSpecialRules()
            break;
            
        case 18:
            fRet = FSpecialFeatureNonAlpha(m_pszSubject);
            break;
            
        case 19:
            fRet = ((NULL == m_pszTo) || ('\0' == *m_pszTo));
            break;
            
        case 20:
            fRet = m_fHasAttach;
            break;

        case 40:
            fRet = (m_cbBody >= 125);
            break;
            
        case 41:
            fRet = (m_cbBody >= 250);
            break;
            
        case 42:
            fRet = (m_cbBody >= 500);
            break;
            
        case 43:
            fRet = (m_cbBody >= 1000);
            break;
            
        case 44:
            fRet = (m_cbBody >= 2000);
            break;
            
        case 45:
            fRet = (m_cbBody >= 4000);
            break;
            
        case 46:
            fRet = (m_cbBody >= 8000);
            break;
            
        case 47:
            fRet = (m_cbBody >= 16000);
            break;
            
        default:
            AssertSz(FALSE, "unsupported special feature");
            break;
    }
    
    return fRet;
}


/////////////////////////////////////////////////////////////////////////////
// _HandleCaseSensitiveSpecialRules
//
// Called from _EvaluateFeatureComponents().
// Some special rules are case sensitive, so if they're present, we'll 
// evaluate them before we make the texts uppercase and cache the result
// for when they are actually used.
/////////////////////////////////////////////////////////////////////////////
VOID CJunkFilter::_HandleCaseSensitiveSpecialRules()
{
    ULONG   ulIndex = 0;
    
    for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++)
    {
        if (m_rgfeaturecomps[ulIndex].loc == locSpecial)
        {
            switch (m_rgfeaturecomps[ulIndex].ulRuleNum)
            {
              case 14:
                m_fRule14 = FSpecialFeatureUpperCaseWordsStm(m_pIStmBody);
                break;
                
              case 17:
                m_fRule17 = FSpecialFeatureUpperCaseWords(m_pszSubject);
                break;
                
              default: 
                break;
            }
        }
    }
    
    return;
}

VOID CJunkFilter::_EvaluateBodyFeatures(VOID)
{
    CBodyBuff           buffBody;
    CHAR                chMatch = '\0';
    ULONG               ulIndex = 0;
    FEATURECOMP *       pfcomp = NULL;
    USHORT              iBodyList = 0;
    
    // Check to see if we have work to do
    if (NULL == m_pIStmBody)
    {
        goto exit;
    }

    // Set the stream into the buffer
    if (FAILED(buffBody.HrInit(0, m_pIStmBody)))
    {
        goto exit;
    }

    // Initialize all the body features to no found
    for (iBodyList = 1; iBodyList < m_cblistBodyList; iBodyList++)
    {
        // Set it to not found
        m_rgfeaturecomps[m_pblistBodyList[iBodyList].usItem].fPresent = FALSE;
    }
    
    // While we have more bytes to read
    for (; S_OK == buffBody.HrGetCurrChar(&chMatch); buffBody.HrMoveNext())
    {
        // Search for a match through the feature list
        for (iBodyList = m_rgiBodyList[(UCHAR) chMatch]; 0 != iBodyList; iBodyList = m_pblistBodyList[iBodyList].iNext)
        {
            pfcomp = &(m_rgfeaturecomps[m_pblistBodyList[iBodyList].usItem]);
            
            // If we have a body item and it hasn't been found yet
            if (FALSE == pfcomp->fPresent)
            {
                // Could this item be a possible match???
                Assert(NULL != pfcomp->pszFeature);
                
                // Try to do the comparison
                pfcomp->fPresent = buffBody.FDoMatch(pfcomp);
            }
        }
    }

exit:
    return;
}

/////////////////////////////////////////////////////////////////////////////
// _EvaluateFeatureComponents
//
// Evaluates all of the feature components. Sets fPresent in each component
// to true if the feature is present, false otherwise
/////////////////////////////////////////////////////////////////////////////
VOID CJunkFilter::_EvaluateFeatureComponents(VOID)
{
    ULONG           ulIndex = 0;
    FEATURECOMP *   pfcomp = NULL;
    
    _HandleCaseSensitiveSpecialRules();

    if (NULL != m_pszFrom)
    {
        CharUpperBuff(m_pszFrom, lstrlen(m_pszFrom));
    }
    if (NULL != m_pszTo)
    {
        CharUpperBuff(m_pszTo, lstrlen(m_pszTo));
    }
    if (NULL != m_pszSubject)
    {
        CharUpperBuff(m_pszSubject, lstrlen(m_pszSubject));
    }

    for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++)
    {
        pfcomp = &m_rgfeaturecomps[ulIndex];
        
        switch(pfcomp->loc)
        {
          case locNil:
            Assert(locNil != pfcomp->loc);
            pfcomp->fPresent = FALSE;
            break;

          case locSubj:
            pfcomp->fPresent = FWordPresent(m_pszSubject, &(pfcomp->dwFlags), pfcomp->pszFeature, pfcomp->cchFeature, NULL);
            break;
            
          case locFrom:
            pfcomp->fPresent = FWordPresent(m_pszFrom, &(pfcomp->dwFlags), pfcomp->pszFeature, pfcomp->cchFeature, NULL);
            break;
            
          case locTo:
            pfcomp->fPresent = FWordPresent(m_pszTo, &(pfcomp->dwFlags), pfcomp->pszFeature, pfcomp->cchFeature, NULL);
            break;
            
          case locSpecial:
            pfcomp->fPresent = _FInvokeSpecialRule(pfcomp->ulRuleNum);
            break;
        }
    }
}

/////////////////////////////////////////////////////////////////////////////
// ProcessFeatureComponentPresence
//
// Processes the presence (or absence) of the individual feature components,
// setting the feature status of each feature (which may me made up of
// multiple feature components).
/////////////////////////////////////////////////////////////////////////////
VOID CJunkFilter::_ProcessFeatureComponentPresence(VOID)
{
    ULONG               ulIndex = 0;
    FEATURECOMP *       pfcomp = NULL;
    ULONG               ulFeature = 0;
    
    for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++)
    {
        pfcomp = &m_rgfeaturecomps[ulIndex];
        ulFeature = pfcomp->ulFeature;
        
        if (-1 == m_rgulFeatureStatus[ulFeature]) // first feature of this feature
        {
            if (FALSE != pfcomp->fPresent)
            {
                m_rgulFeatureStatus[ulFeature] = 1;
            }
            else
            {
                m_rgulFeatureStatus[ulFeature] = 0;
            }
        }
        else
        {
            switch (pfcomp->bop)
            {
              case boolopOr:
                if (pfcomp->fPresent)
                {
                    m_rgulFeatureStatus[ulFeature] = 1;
                }
                break;
                
              case boolopAnd:
                if (!pfcomp->fPresent)
                {
                    m_rgulFeatureStatus[ulFeature] = 0;
                }
                break;
                
              default:
                Assert(FALSE);
                break;
            }

        }
    }
}

/////////////////////////////////////////////////////////////////////////////
// _DblDoSVMCalc
//
// Does the actual support vector machine calculation.
// Returns the probability that the message is spam
/////////////////////////////////////////////////////////////////////////////
DOUBLE CJunkFilter::_DblDoSVMCalc(VOID)
{
    DOUBLE  dblAccum;
    DOUBLE  dblResult;
    ULONG   ulIndex = 0;

    dblAccum = 0.0;
    
    for (ulIndex = 0; ulIndex < m_cFeatures; ulIndex++)
    {
        if (m_rgulFeatureStatus[ulIndex] == 1)
        {
            dblAccum += m_rgdblSVMWeights[ulIndex];
#ifdef DEBUG    
            if (NULL != m_pILogFile)
            {
                _PrintFeatureToLog(ulIndex);
            }
#endif  // DEBUG
        }
        else if (m_rgulFeatureStatus[ulIndex] != 0)
        {
            AssertSz(FALSE, "What happened here!");
        }
    }
    
    // Apply threshold;
    dblAccum -= m_dblThresh;

    // Apply sigmoid
    dblResult = (1 / (1 + exp((m_dblCC * dblAccum) + m_dblDD)));

    return dblResult;
}

/////////////////////////////////////////////////////////////////////////////
// BCalculateSpamProb
//
// Calculates the probability that the current message is spam.
// Returns the probability (0 to 1) that the message is spam in prSpamProb
// the boolean return is determined by comparing to the spam cutoff
/////////////////////////////////////////////////////////////////////////////
BOOL CJunkFilter::FCalculateSpamProb(LPSTR pszFrom, LPSTR pszTo, LPSTR pszSubject, IStream * pIStmBody,
                            BOOL fDirectMessage, BOOL fHasAttach, FILETIME * pftMessageSent,
                            DOUBLE * pdblSpamProb, BOOL * pfIsSpam)
{
#ifdef DEBUG
    CHAR    rgchBuff[1024];
    DWORD   dwVal = 0;
#endif  // DEBUG
    
    m_pszFrom = pszFrom;
    m_pszTo = pszTo;        
    m_pszSubject = pszSubject;   
    m_pIStmBody = pIStmBody;      
    m_fDirectMessage = fDirectMessage;
    m_fHasAttach = fHasAttach;
    m_ftMessageSent = *pftMessageSent;

    // Set the size of the body
    if ((NULL == m_pIStmBody) || (FAILED(HrGetStreamSize(m_pIStmBody, &m_cbBody))))
    {
        m_cbBody = 0;
    }

#ifdef DEBUG
    // Get the logfile if we need it
    if (NULL == m_pILogFile)
    {
        _HrCreateLogFile();
    }

    if (NULL != m_pILogFile)
    {
        PrintToLogFile(m_pILogFile, LOG_TAGLINE, pszSubject);

        PrintToLogFile(m_pILogFile, LOG_FIRSTNAME, m_pszFirstName);
        
        PrintToLogFile(m_pILogFile, LOG_LASTNAME, m_pszLastName);
        
        PrintToLogFile(m_pILogFile, LOG_COMPANYNAME, m_pszCompanyName);
    }
#endif  // DEBUG
    
    _EvaluateBodyFeatures();
    _EvaluateFeatureComponents();
    _ProcessFeatureComponentPresence();

    *pdblSpamProb = _DblDoSVMCalc();
    
#ifdef DEBUG
    if (NULL != m_pILogFile)
    {
        dwVal = ( DWORD ) ((*pdblSpamProb * 1000000) + 0.5);
        
        wnsprintf(rgchBuff, ARRAYSIZE(rgchBuff), LOG_FINAL, dwVal / 1000000, dwVal % 1000000);
        
        m_pILogFile->WriteLog(LOGFILE_DB, rgchBuff);
        m_pILogFile->WriteLog(LOGFILE_DB, "");
    }
#endif  // DEBUG
    
    *pfIsSpam = (*pdblSpamProb > m_dblSpamCutoff);

    return TRUE;
}

/////////////////////////////////////////////////////////////////////////////
// BReadDefaultSpamCutoff
//
// Reads the default spam cutoff without parsing entire file
// Use GetDefaultSpamCutoff if using HrSetSVMDataLocation;
// static member function
/////////////////////////////////////////////////////////////////////////////
HRESULT CJunkFilter::HrReadDefaultSpamCutoff(LPSTR pszFullPath, DOUBLE * pdblDefCutoff)
{
    HRESULT         hr = S_OK;
    CParseStream    parsestm;
    LPSTR           pszBuff = NULL;
    ULONG           cchBuff = 0;
    LPSTR           pszDefThresh = NULL;
    ULONG           ulIndex = 0;
    LPSTR           pszDummy = NULL;
    
    if ((NULL == pszFullPath) || ('\0' == *pszFullPath) || (NULL == pdblDefCutoff))
    {
        hr = E_INVALIDARG;
        goto exit;
    }
    
    // Get the parse stream
    hr = parsestm.HrSetFile(0, pszFullPath);
    if (FAILED(hr))
    {
        goto exit;
    }
    
    // skip first three lines
    for (ulIndex = 0; ulIndex < 4; ulIndex++)
    {
        SafeMemFree(pszBuff);
        hr = parsestm.HrGetLine(0, &pszBuff, &cchBuff);
        if (FAILED(hr))
        {
            goto exit;
        }
    }

    // Find the default threshold
    pszDefThresh = StrStr(pszBuff, ::szDefaultThresh);
    if (NULL == pszDefThresh)
    {
        hr = E_FAIL;
        goto exit;
    }

    // Grab the value
    pszDefThresh += lstrlen(::szDefaultThresh);
    *pdblDefCutoff = StrToDbl(pszDefThresh, &pszDummy);

    // Set the proper return value
    hr = S_OK;
    
exit:
    SafeMemFree(pszBuff);
    return hr;
}

/////////////////////////////////////////////////////////////////////////////
// Constructor/destructor
//
/////////////////////////////////////////////////////////////////////////////
CJunkFilter::CJunkFilter() : m_cRef(0), m_pszFirstName(NULL), m_cchFirstName(0), m_pszLastName(NULL),
                m_cchLastName(0), m_pszCompanyName(NULL), m_cchCompanyName(0), m_pblistBodyList(NULL),
                m_cblistBodyList(0), m_rgfeaturecomps(NULL), m_rgdblSVMWeights(NULL), m_dblCC(0), m_dblDD(0),
                m_dblThresh(-1), m_dblDefaultThresh(-1), m_dblMostThresh(0), m_dblLeastThresh(0), m_cFeatures(0),
                m_cFeatureComps(0), m_rgulFeatureStatus(0),
                m_pszLOCPath(NULL), m_dblSpamCutoff(0), m_pszFrom(NULL), m_pszTo(NULL), m_pszSubject(NULL),
                m_pIStmBody(NULL), m_cbBody(0), m_fDirectMessage(FALSE), m_fHasAttach(FALSE),
                m_fRule14(FALSE), m_fRule17(FALSE)
{
    ZeroMemory(m_rgiBodyList, sizeof(m_rgiBodyList));
    
    ZeroMemory(&m_ftMessageSent, sizeof(m_ftMessageSent));
    InitializeCriticalSection(&m_cs);
#ifdef DEBUG
    m_fJunkMailLogInit = FALSE;
    m_pILogFile = NULL;
#endif  // DEBUG
}

CJunkFilter::~CJunkFilter()
{
    ULONG       ulIndex = 0;
    
    SafeMemFree(m_pszFirstName);
    SafeMemFree(m_pszLastName);
    SafeMemFree(m_pszCompanyName);
#ifdef DEBUG
    SafeRelease(m_pILogFile);
#endif  // DEBUG

    for (ulIndex = 0; ulIndex < m_cFeatureComps; ulIndex++)
    {
        if ((locNil != m_rgfeaturecomps[ulIndex].loc) && (locSpecial != m_rgfeaturecomps[ulIndex].loc))
        {
            SafeMemFree(m_rgfeaturecomps[ulIndex].pszFeature);
        }
    }

    SafeMemFree(m_pblistBodyList);
    m_cblistBodyList = 0;
    ZeroMemory(m_rgiBodyList, sizeof(m_rgiBodyList));
    
    SafeMemFree(m_rgdblSVMWeights);
    SafeMemFree(m_rgulFeatureStatus);
    SafeMemFree(m_rgfeaturecomps);
    
    DeleteCriticalSection(&m_cs);
}

STDMETHODIMP_(ULONG) CJunkFilter::AddRef()
{
    return ::InterlockedIncrement(&m_cRef);
}

STDMETHODIMP_(ULONG) CJunkFilter::Release()
{
    LONG    cRef = 0;

    cRef = ::InterlockedDecrement(&m_cRef);
    if (0 == cRef)
    {
        delete this;
        return cRef;
    }

    return cRef;
}

STDMETHODIMP CJunkFilter::QueryInterface(REFIID riid, void ** ppvObject)
{
    HRESULT hr = S_OK;

    // Check the incoming params
    if (NULL == ppvObject)
    {
        hr = E_INVALIDARG;
        goto exit;
    }

    // Initialize outgoing param
    *ppvObject = NULL;
    
    if ((riid == IID_IUnknown) || (riid == IID_IOEJunkFilter))
    {
        *ppvObject = static_cast<IOEJunkFilter *>(this);
    }
    else
    {
        hr = E_NOINTERFACE;
        goto exit;
    }

    reinterpret_cast<IUnknown *>(*ppvObject)->AddRef();

    hr = S_OK;
    
exit:
    return hr;
}

STDMETHODIMP CJunkFilter::SetIdentity(LPCSTR pszFirstName, LPCSTR pszLastName, LPCSTR pszCompanyName)
{
    HRESULT     hr = S_OK;

    //Set the new first name
    SafeMemFree(m_pszFirstName);
    m_cchFirstName = 0;
    if (NULL != pszFirstName)
    {
        m_pszFirstName = PszDupA(pszFirstName);
        if (NULL == m_pszFirstName)
        {
            hr = E_OUTOFMEMORY;
            goto exit;
        }

        m_cchFirstName = CharUpperBuff(m_pszFirstName, lstrlen(m_pszFirstName));
    }
    
    // Set the new last name
    SafeMemFree(m_pszLastName);
    m_cchLastName = 0;
    if (NULL != pszLastName)
    {
        m_pszLastName = PszDupA(pszLastName);
        if (NULL == m_pszLastName)
        {
            hr = E_OUTOFMEMORY;
            goto exit;
        }

        m_cchLastName = CharUpperBuff(m_pszLastName, lstrlen(m_pszLastName));
    }
    
    // Set the new company name
    SafeMemFree(m_pszCompanyName);
    m_cchCompanyName = 0;
    if (NULL != pszCompanyName)
    {
        m_pszCompanyName = PszDupA(pszCompanyName);
        if (NULL == m_pszCompanyName)
        {
            hr = E_OUTOFMEMORY;
            goto exit;
        }

        m_cchCompanyName = CharUpperBuff(m_pszCompanyName, lstrlen(m_pszCompanyName));
    }

    hr = S_OK;
    
exit:
    return hr;
}

STDMETHODIMP CJunkFilter::LoadDataFile(LPCSTR pszFilePath)
{
    HRESULT     hr = S_OK;

    if ((NULL == pszFilePath) || ('\0' == *pszFilePath))
    {
        hr = E_INVALIDARG;
        goto exit;
    }
    
    hr = _HrReadSVMOutput(pszFilePath);
    if (FAILED(hr))
    {
        AssertSz(FALSE, "Unable to successfully read filter params");
        goto exit;
    }
        
    // Set the proper return value
    hr = S_OK;
    
exit:    
    return hr;
}

STDMETHODIMP CJunkFilter::SetSpamThresh(ULONG ulThresh)
{
    HRESULT hr = S_OK;

    switch (ulThresh)
    {
        case STF_USE_MOST:
            m_dblSpamCutoff = m_dblMostThresh;
            break;
            
        case STF_USE_MORE:
            m_dblSpamCutoff = m_dblDefaultThresh + ((m_dblMostThresh - m_dblDefaultThresh) / 2);
            break;
            
        case STF_USE_DEFAULT:
            m_dblSpamCutoff = m_dblDefaultThresh;
            break;
                
        case STF_USE_LESS:
            m_dblSpamCutoff = m_dblDefaultThresh - ((m_dblDefaultThresh - m_dblLeastThresh) / 2);
            break;
            
        case STF_USE_LEAST:
            m_dblSpamCutoff = m_dblLeastThresh;
            break;
            
        default:
            hr = E_INVALIDARG;
            goto exit;
    }
    
    hr = S_OK;
    
exit:
    return hr;
}

STDMETHODIMP CJunkFilter::GetSpamThresh(ULONG * pulThresh)
{
    HRESULT hr = S_OK;
    ULONG   ulThresh = 0;

    // Check the incoming params
    if (NULL == pulThresh)
    {
        hr = E_INVALIDARG;
        goto exit;
    }

    // Initialize outgoing params
    if (m_dblDefaultThresh == m_dblSpamCutoff)
    {
        ulThresh = STF_USE_DEFAULT;
    }
    else if (m_dblMostThresh == m_dblSpamCutoff)
    {
        ulThresh = STF_USE_MOST;
    }
    else if (m_dblLeastThresh == m_dblSpamCutoff)
    {
        ulThresh = STF_USE_LEAST;
    }
    else if (m_dblSpamCutoff > m_dblDefaultThresh)
    {
        ulThresh = STF_USE_MORE;
    }
    else
    {
        ulThresh = STF_USE_LESS;
    }
        
    hr = S_OK;
    
exit:
    return hr;
}

STDMETHODIMP CJunkFilter::GetDefaultSpamThresh(DOUBLE * pdblThresh)
{
    HRESULT hr = S_OK;

    // Check the incoming params
    if (NULL == pdblThresh)
    {
        hr = E_INVALIDARG;
        goto exit;
    }

    // Initialize outgoing params
    *pdblThresh = m_dblDefaultThresh * 100.0;
        
    hr = S_OK;
    
exit:
    return hr;
}

STDMETHODIMP CJunkFilter::CalcJunkProb(DWORD dwFlags, IMimePropertySet * pIMPropSet, IMimeMessage * pIMMsg, double * pdblProb)
{
    HRESULT             hr = S_OK;
    BOOL                fSpam = FALSE;
    PROPVARIANT         propvar = {0};
    DWORD               dwFlagsMsg = 0;
    FILETIME            ftMsgSent = {0};
    LPSTR               pszFrom = NULL;
    LPSTR               pszTo = NULL;
    LPSTR               pszSubject = NULL;
    IStream *           pIStmBody = NULL;
    IStream *           pIStmHtml = NULL;
    BOOL                fSentToMe = FALSE;
    BOOL                fHasAttachments = FALSE;

    if ((NULL == pIMPropSet) || (NULL == pIMMsg))
    {
        hr = E_INVALIDARG;
        goto exit;
    }

    // Get Message Flags
    if (SUCCEEDED(pIMMsg->GetFlags(&dwFlagsMsg)))
    {
        fHasAttachments = (0 != (dwFlagsMsg & IMF_ATTACHMENTS));
    }

    // Was the message sent to me
    fSentToMe = (0 != (dwFlags & CJPF_SENT_TO_ME));
    
    // Get the from field
    propvar.vt = VT_LPSTR;
    hr = pIMPropSet->GetProp(PIDTOSTR(PID_HDR_FROM), NOFLAGS, &propvar);
    if (SUCCEEDED(hr))
    {
        pszFrom = propvar.pszVal;
    }
    
    // Get the To field
    propvar.vt = VT_LPSTR;
    hr = pIMPropSet->GetProp(PIDTOSTR(PID_HDR_TO), NOFLAGS, &propvar);
    if (SUCCEEDED(hr))
    {
        pszTo = propvar.pszVal;
    }
    
    // Try to Get the Plain Text Stream
    if (FAILED(pIMMsg->GetTextBody(TXT_PLAIN, IET_DECODED, &pIStmBody, NULL)))
    {
        // Try to get the text version from the HTML stream
        if ((FAILED(pIMMsg->GetTextBody(TXT_HTML, IET_DECODED, &pIStmHtml, NULL))) ||
                (FAILED(HrConvertHTMLToPlainText(pIStmHtml, &pIStmBody))))
        {
            pIStmBody = NULL;
        }
    }

    // Get the Subject field
    propvar.vt = VT_LPSTR;
    hr = pIMPropSet->GetProp(PIDTOSTR(PID_HDR_SUBJECT), NOFLAGS, &propvar);
    if (SUCCEEDED(hr))
    {
        pszSubject = propvar.pszVal;
    }
    
    // Is this a direct message

    // When was the message sent?
    propvar.vt = VT_FILETIME;
    hr = pIMPropSet->GetProp(PIDTOSTR(PID_ATT_SENTTIME), 0, &propvar);
    if (SUCCEEDED(hr))
    {
        ftMsgSent = propvar.filetime;
    }
    
    FillMemory(m_rgulFeatureStatus, sizeof(*m_rgulFeatureStatus) * m_cFeatures, -1);
    
    if (FALSE == FCalculateSpamProb(pszFrom, pszTo, pszSubject, pIStmBody,
                            fSentToMe, fHasAttachments, &ftMsgSent,
                            pdblProb, &fSpam))
    {
        hr = E_FAIL;
        goto exit;
    }

    hr = (FALSE != fSpam) ? S_OK : S_FALSE;
    
exit:
    SafeRelease(pIStmHtml);
    SafeRelease(pIStmBody);
    SafeMemFree(pszSubject);
    SafeMemFree(pszTo);
    SafeMemFree(pszFrom);
    return hr;
}

///////////////////////////////////////////////////////////////////////////////
//
//  HrCreateJunkFilter
//
//  This creates a junk filter.
//
//  ppIRule - pointer to return the junk filter
//
//  Returns:    S_OK, on success
//              E_OUTOFMEMORY, if can't create the Junk Filter object
//
///////////////////////////////////////////////////////////////////////////////
HRESULT WINAPI HrCreateJunkFilter(DWORD dwFlags, IOEJunkFilter ** ppIJunkFilter)
{
    CJunkFilter *   pJunk = NULL;
    HRESULT         hr = S_OK;

    // Check the incoming params
    if (NULL == ppIJunkFilter)
    {
        hr = E_INVALIDARG;
        goto exit;
    }

    // Initialize outgoing params
    *ppIJunkFilter = NULL;

    // Create the rules manager object
    pJunk = new CJunkFilter;
    if (NULL == pJunk)
    {
        hr = E_OUTOFMEMORY;
        goto exit;
    }

    // Get the rules manager interface
    hr = pJunk->QueryInterface(IID_IOEJunkFilter, (void **) ppIJunkFilter);
    if (FAILED(hr))
    {
        goto exit;
    }

    pJunk = NULL;
    
    // Set the proper return value
    hr = S_OK;
    
exit:
    if (NULL != pJunk)
    {
        delete pJunk;
    }
    
    return hr;
}

BOOL FReadDouble(LPSTR pszLine, LPSTR pszToken, DOUBLE * pdblVal)
{
    BOOL    fRet = FALSE;
    LPSTR   pszVal = NULL;
    BOOL    fNegative = FALSE;
    
    // Search for token
    pszVal = StrStr(pszLine, pszToken);

    // If token isn't found then bail
    if (NULL == pszVal)
    {
        fRet = FALSE;
        goto exit;
    }

    // Skip over the token
    pszVal += lstrlen(pszToken);
    
    // Check to see if the value is negative
    if ('-' == *pszVal)
    {
        fNegative = TRUE;
        pszVal++;
    }

    // Read in value
    *pdblVal = StrToDbl(pszVal, &pszVal);

    // Negate the value if neccessary
    if (FALSE != fNegative)
    {
        *pdblVal *= -1;
    }

    fRet = TRUE;
    
exit:
    return fRet;
}

#ifdef DEBUG
static const LPSTR LOG_SPECIAL_BODY_FIRSTNAME       = "Special: Body contains the First Name";
static const LPSTR LOG_SPECIAL_BODY_LASTNAME        = "Special: Body contains the Last Name";
static const LPSTR LOG_SPECIAL_BODY_COMPANYNAME     = "Special: Body contains the Company Name";
static const LPSTR LOG_SPECIAL_BODY_YEARRECVD       = "Special: Body contains the year message received";
static const LPSTR LOG_SPECIAL_SENTTIME_WEEHRS      = "Special: Sent time was between 7PM and 6AM";
static const LPSTR LOG_SPECIAL_SENTTIME_WKEND       = "Special: Sent time was on the weekend (Sat or Sun)";
static const LPSTR LOG_SPECIAL_BODY_25PCTUPCWDS     = "Special: Body contains 25% uppercase words out of the first 50 words";
static const LPSTR LOG_SPECIAL_BODY_8PCTNONALPHA    = "Special: Body contains 8% non-alpha characters out of the first 200 characters";
static const LPSTR LOG_SPECIAL_SENT_DIRECT          = "Special: Sent directly to user";
static const LPSTR LOG_SPECIAL_SUBJECT_25PCTUPCWDS  = "Special: Subject contains 25% uppercase words out of the first 50 words";
static const LPSTR LOG_SPECIAL_SUBJECT_8PCTNONALPHA = "Special: Subject contains 8% non-alpha characters out of the first 200 characters";
static const LPSTR LOG_SPECIAL_TO_EMPTY             = "Special: To line is empty";
static const LPSTR LOG_SPECIAL_HASATTACH            = "Special: Message has an attachment";
static const LPSTR LOG_SPECIAL_BODY_GT125B          = "Special: Body is greater than 125 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT250B          = "Special: Body is greater than 250 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT500B          = "Special: Body is greater than 500 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT1000B         = "Special: Body is greater than 1000 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT2000B         = "Special: Body is greater than 2000 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT4000B         = "Special: Body is greater than 4000 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT8000B         = "Special: Body is greater than 8000 Bytes";
static const LPSTR LOG_SPECIAL_BODY_GT16000B        = "Special: Body is greater than 16000 Bytes";

VOID CJunkFilter::_PrintSpecialFeatureToLog(UINT iRuleNum)
{
    Assert(NULL != m_pILogFile);
    
    switch (iRuleNum)
    {
        case 1:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_FIRSTNAME);
            break;
            
        case 2: 
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_LASTNAME);
            break;
            
        case 3:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_COMPANYNAME);
            break;
            
        case 4: 
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_YEARRECVD);
            break;
            
        case 5:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SENTTIME_WEEHRS);
            break;
            
        case 6:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SENTTIME_WKEND);
            break;
            
        case 14:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_25PCTUPCWDS);
            break;
            
        case 15:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_8PCTNONALPHA);
            break;
            
        case 16:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SENT_DIRECT);
            break;
            
        case 17:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SUBJECT_25PCTUPCWDS);
            break;
            
        case 18:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_SUBJECT_8PCTNONALPHA);
            break;
            
        case 19:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_TO_EMPTY);
            break;
            
        case 20:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_HASATTACH);
            break;

        case 40:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT125B);
            break;
            
        case 41:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT250B);
            break;
            
        case 42:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT500B);
            break;
            
        case 43:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT1000B);
            break;
            
        case 44:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT2000B);
            break;
            
        case 45:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT4000B);
            break;
            
        case 46:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT8000B);
            break;
            
        case 47:
            m_pILogFile->WriteLog(LOGFILE_DB, LOG_SPECIAL_BODY_GT16000B);
            break;
            
        default:
            AssertSz(FALSE, "unsupported special feature");
            break;
    }

    return;
}

VOID CJunkFilter::_PrintFeatureToLog(ULONG ulIndex)
{
    LPSTR   pszBuff = NULL;
    LPSTR   pszTag = NULL;

    // Figure out which tag line to use
    switch (m_rgfeaturecomps[ulIndex].loc)
    {
        case locNil:
            goto exit;
            break;
            
        case locBody:
            pszTag = LOG_BODY;
            break;
            
        case locSubj:
            pszTag = LOG_SUBJECT;
            break;
            
        case locFrom:
            pszTag = LOG_FROM;
            break;
            
        case locTo:
            pszTag = LOG_TO;
            break;

        case locSpecial:
            _PrintSpecialFeatureToLog(m_rgfeaturecomps[ulIndex].ulRuleNum);
            goto exit;
            break;
    }

    // Write out the feature to the log
    PrintToLogFile(m_pILogFile, pszTag, m_rgfeaturecomps[ulIndex].pszFeature);
    
exit:
    SafeMemFree(pszBuff);
    return;
}

HRESULT CJunkFilter::_HrCreateLogFile(VOID)
{
    HRESULT     hr = S_OK;
    LPSTR       pszLogFile = NULL;
    ULONG       cbData = 0;
    ILogFile *  pILogFile = NULL;
    DWORD       dwData = 0;

    if (FALSE != m_fJunkMailLogInit)
    {
        hr = S_FALSE;
        goto exit;
    }

    m_fJunkMailLogInit = TRUE;
    
    // Get the size of the path to Outlook Express
    cbData = sizeof(dwData);
    if ((ERROR_SUCCESS != SHGetValue(HKEY_LOCAL_MACHINE, STR_REG_PATH_FLAT, "JunkMailLog", NULL, (BYTE *) &dwData, &cbData)) ||
            (0 == dwData))
    {
        hr = S_FALSE;
        goto exit;
    }

    // Get the size of the path to Outlook Express
    if (ERROR_SUCCESS != SHGetValue(HKEY_LOCAL_MACHINE, STR_REG_PATH_FLAT, "InstallRoot", NULL, NULL, &cbData))
    {
        hr = E_FAIL;
        goto exit;
    }

    // How much room do we need to build up the path
    cbData += lstrlen(szJunkMailLog) + 2;

    // Allocate space to hold the path
    hr = HrAlloc((VOID **) &pszLogFile, cbData);
    if (FAILED(hr))
    {
        goto exit;
    }

    // Get the path to Outlook Express
    ULONG cbBuffer = cbData;
    if (ERROR_SUCCESS != SHGetValue(HKEY_LOCAL_MACHINE, STR_REG_PATH_FLAT, "InstallRoot", NULL, (BYTE *) pszLogFile, &cbBuffer))
    {
        hr = E_FAIL;
        goto exit;
    }

    // Build up the path to the Junk DLL
    if ('\\' != pszLogFile[lstrlen(pszLogFile)])
    {
        StrCatBuff(pszLogFile, "\\", cbData);
    }
    StrCatBuff(pszLogFile, szJunkMailLog, cbData);
    
    hr = CreateLogFile(g_hInst, pszLogFile, szJunkMailPrefix, DONT_TRUNCATE, &pILogFile, FILE_SHARE_READ | FILE_SHARE_WRITE);
    if (FAILED(hr))
    {
        goto exit;
    }

    SafeRelease(m_pILogFile);
    m_pILogFile = pILogFile;
    
    hr = S_OK;
    
exit:
    SafeMemFree(pszLogFile);
    return hr;
}

VOID PrintToLogFile(ILogFile * pILogFile, LPSTR pszTmpl, LPSTR pszArg)
{
    LPSTR   pszBuff = NULL;
    ULONG   cchBuff = 0;
    
    Assert(NULL != pILogFile);
    Assert(NULL != pszTmpl);

    if (NULL == pszArg)
    {
        pszArg = "";
    }
    
    // Figure out the size of the resulting buffer
    cchBuff = lstrlen(pszTmpl) + lstrlen(pszArg) + 2;

    // Allocate the needed space
    if (FAILED(HrAlloc((VOID **) &pszBuff, cchBuff * sizeof(*pszBuff))))
    {
        goto exit;
    }

    // Create the output string
    wnsprintf(pszBuff, cchBuff, pszTmpl, pszArg);

    // Print the buffer to the log file
    pILogFile->WriteLog(LOGFILE_DB, pszBuff);

exit:
    SafeMemFree(pszBuff);
    return;
}
#endif  // DEBUG