//+---------------------------------------------------------------------------
//
//  Microsoft Windows
//  Copyright (C) Microsoft Corporation, 1992 - 1996.
//
//  File:       datasnif.cxx
//
//  Contents:   Stream Mime type checking (attempts to guess the MIME type
//              of a buffer by simple pattern matching).
//
//  Classes:    CContentAnalyzer
//
//  Functions:  private:
//                CContentAnalyzer::SampleData
//                CContentAnalyzer::IsBMP
//                CContentAnalyzer::GetDataFormat
//                CContentAnalyzer::FormatAgreesWithData
//                CContentAnalyzer::MatchDWordAtOffset
//                CContentAnalyzer::FindAppFromExt
//                CContentAnalyzer::CheckTextHeaders
//                CContentAnalyzer::CheckBinaryHeaders
//
//              public:
//                CContentAnalyzer::FindMimeFromData
//                ::FindMimeFromData
//
//
//  History:    05-25-96   AdriaanC (Adriaan Canter) Created
//              07-16-96   AdriaanC (Adriaan Canter) Modified
//              08-06-96   AdriaanC (Adriaan Canter) Modified
//              08-14-96   AdriaanC (Adriaan Canter) Modified
//
//----------------------------------------------------------------------------

#include <trans.h>
#include "datasnif.hxx"
#include <shlwapip.h>
#ifdef UNIX
#include <mainwin.h>
#endif

PerfDbgTag(tagDataSniff, "Urlmon", "Log DataSniff", DEB_DATA);

// Max no. bytes to look at
#define SAMPLE_SIZE 256

// Registry Key for app/fileext associations
#define szApplicationRegistryKey "\\Shell\\Open\\Command"
#define szApplicationRegistryKey2 "\\Shell\\Connect To\\Command"
#define szMimeRegistryKey        "MIME\\Database\\Content Type\\"

// Magic header words
#define AU_SUN_MAGIC                    0x2e736e64
#define AU_SUN_INV_MAGIC                0x646e732e
#define AU_DEC_MAGIC                    0x2e736400
#define AU_DEC_INV_MAGIC                0x0064732e
#define AIFF_MAGIC                      0x464f524d
#define AIFF_INV_MAGIC                  0x4d524f46
#define AIFF_MAGIC_MORE_1               'AIFF'
#define AIFF_MAGIC_MORE_2               'AIFC'
#define RIFF_MAGIC                      0x52494646
#define AVI_MAGIC                       0x41564920
#define WAV_MAGIC                       0x57415645
#define JAVA_MAGIC                      0xcafebabe
#define MPEG_MAGIC                      0x000001b3
#define MPEG_MAGIC_2                    0x000001ba
#define EMF_MAGIC_1                     0x01000000
#define EMF_MAGIC_2                     0x20454d46
#define WMF_MAGIC                       0xd7cdc69a
#define JPEG_MAGIC_1                    0xFF
#define JPEG_MAGIC_2                    0xD8

// Magic header text
CHAR vszRichTextMagic[] =                "{\\rtf";
CHAR vszPostscriptMagic[] =              "%!";
CHAR vszBinHexMagic[] =                  "onverted with BinHex";
CHAR vszBase64Magic[] =                  "begin";
CHAR vszGif87Magic[] =                   "GIF87";
CHAR vszGif89Magic[] =                   "GIF89";
CHAR vszTiffMagic[] =                    "MM";
CHAR vszBmpMagic[] =                     "BM";
CHAR vszZipMagic[] =                     "PK";
CHAR vszExeMagic[] =                     "MZ";
CHAR vszPngMagic[] =                     "\211PNG\r\n\032\n";
CHAR vszCompressMagic[] =                "\037\235";
CHAR vszGzipMagic[] =                    "\037\213";
CHAR vszXbmMagic1[] =                    "define";
CHAR vszXbmMagic2[] =                    "width";
CHAR vszXbmMagic3[] =                    "bits";
CHAR vszPdfMagic[] =                     "%PDF";
CHAR vszJGMagic[] =                      "JG";
CHAR vszMIDMagic[] =                     "MThd";

// null MIME type
WCHAR vwzNULL[] =                        L"(null)";

// 7 bit MIME Types
WCHAR vwzTextPlain[] =                   L"text/plain";
WCHAR vwzTextRichText[] =                L"text/richtext";
WCHAR vwzImageXBitmap[] =                L"image/x-xbitmap";
WCHAR vwzApplicationPostscript[] =       L"application/postscript";
WCHAR vwzApplicationBase64[] =           L"application/base64";
WCHAR vwzApplicationMacBinhex[] =        L"application/macbinhex40";
WCHAR vwzApplicationPdf[] =              L"application/pdf";
WCHAR vwzApplicationCDF[] =              L"application/x-cdf";
WCHAR vwzApplicationNETCDF[] =           L"application/x-netcdf";
WCHAR vwzmultipartmixedreplace[] =       L"multipart/x-mixed-replace";
WCHAR vwzmultipartmixed[] =              L"multipart/mixed";
WCHAR vwzTextScriptlet[] =               L"text/scriptlet";
WCHAR vwzTextComponent[] =               L"text/x-component";
WCHAR vwzTextXML[] =                     L"text/xml";
WCHAR vwzApplicationHTA[] =              L"application/hta";

// 8 bit MIME types
WCHAR vwzAudioAiff[] =                   L"audio/x-aiff";
WCHAR vwzAudioBasic[] =                  L"audio/basic";
WCHAR vwzAudioWav[] =                    L"audio/wav";
WCHAR vwzAudioMID[] =                    L"audio/mid";
WCHAR vwzImageGif[] =                    L"image/gif";
WCHAR vwzImagePJpeg[] =                  L"image/pjpeg";
WCHAR vwzImageJpeg[] =                   L"image/jpeg";
WCHAR vwzImageTiff[] =                   L"image/tiff";
WCHAR vwzImagePng[] =                    L"image/x-png";
WCHAR vwzImageBmp[] =                    L"image/bmp";
WCHAR vwzImageJG[] =                     L"image/x-jg";
WCHAR vwzImageEmf[] =                    L"image/x-emf";
WCHAR vwzImageWmf[] =                    L"image/x-wmf";
WCHAR vwzVideoAvi[] =                    L"video/avi";
WCHAR vwzVideoMpeg[] =                   L"video/mpeg";
WCHAR vwzApplicationCompressed[] =       L"application/x-compressed";
WCHAR vwzApplicationZipCompressed[] =    L"application/x-zip-compressed";
WCHAR vwzApplicationGzipCompressed[] =   L"application/x-gzip-compressed";
WCHAR vwzApplicationJava[] =             L"application/java";
WCHAR vwzApplicationMSDownload[] =       L"application/x-msdownload";

// 7 or 8 bit MIME types
WCHAR vwzTextHTML[] =                    L"text/html";
WCHAR vwzApplicationOctetStream[] =      L"application/octet-stream";




//+---------------------------------------------------------------------------
//
//  Method:     CContentAnalyzer::SampleData
//
//  Synopsis:
//
//  Arguments:  (void)
//
//  Returns:    (void)
//
//  History:    5-25-96   AdriaanC (Adriaan Canter) Created
//
//  Notes:
//
//----------------------------------------------------------------------------
void CContentAnalyzer::SampleData()
{
    DEBUG_ENTER((DBG_TRANS,
                None,
                "CContentAnalyzer::SampleData",
                "this=%#x",
                this
                ));
                
    BOOL fFoundFirstXBitMapTag = FALSE;
    BOOL fFoundSecondXBitMapTag = FALSE;
    BOOL fFoundAsciiChar = FALSE;

    int nHTMLConfidence = 0;

    unsigned char *p = (unsigned char*) _pBuf;

    _cbNL = _cbCR = _cbFF = _cbText = _cbCtrl = _cbHigh = 0;

    // Count incidence of character types.
    for (int i = 0; i < _cbSample - 1; i++)
    {
        fFoundAsciiChar = FALSE;

        if (*p == '\n')           // new line
        {
            _cbNL++;
        }
        else if (*p == '\r')      // carriage return
        {
            _cbCR++;
        }
        else if (*p == '\f')      // form feed
        {
            _cbFF++;
        }
        else if (*p == '\t')      // tab
        {
            _cbText++;
        }
        else if (*p < 32)         // control character
        {
            _cbCtrl++;
        }
        else if (*p >= 32 && *p < 128)        // regular text
        {
            _cbText++;
            fFoundAsciiChar = TRUE;
        }
        else                      // extended text
        {
            _cbHigh++;
        }

        if (fFoundAsciiChar)
        {
            // check for html
            if (*p == '<')
            {
                if (!StrCmpNIC((char*) p+1, "?XML", sizeof("?XML") - 1) &&
                    (
                        (*(p+5) == ':') || 
                        (*(p+5) == ' ') || 
                        (*(p+5) == '\t')) )
                {
                    _fFoundXML = TRUE;
                    // don't break : for CDF
                }
                

                if (!StrCmpNIC((char*) p+1, "SCRIPTLET", sizeof("SCRIPTLET") - 1))
                {
                    _fFoundTextScriptlet = TRUE;
                    break;
                }

                if (!StrCmpNIC((char*) p+1, "HTML", sizeof("HTML") - 1)
                    || !StrCmpNIC((char*) p+1, "HEAD", sizeof("HEAD") - 1)
                    || !StrCmpNIC((char*) p+1, "TITLE", sizeof("TITLE") - 1)
                    || !StrCmpNIC((char*) p+1, "BODY", sizeof("BODY") - 1)
                    || !StrCmpNIC((char*) p+1, "SCRIPT", sizeof("SCRIPT") - 1)
                    || !StrCmpNIC((char*) p+1, "A HREF", sizeof("A HREF") - 1)
                    || !StrCmpNIC((char*) p+1, "PRE", sizeof("PRE") - 1)
                    || !StrCmpNIC((char*) p+1, "IMG", sizeof("IMG") - 1)
                    || !StrCmpNIC((char*) p+1, "PLAINTEXT", sizeof("PLAINTEXT") - 1)
                    || !StrCmpNIC((char*) p+1, "TABLE", sizeof("TABLE") - 1))
                {
                    _fFoundHTML = TRUE;
                    break;
                }
                else if (   !StrCmpNIC((char*) p+1, "HR", sizeof("HR") - 1)
                         || !StrCmpNIC((char*) p+1, "A", sizeof("A") - 1)
                         || !StrCmpNIC((char*) p+1, "/A", sizeof("/A") - 1)
                         || !StrCmpNIC((char*) p+1, "B", sizeof("B") - 1)
                         || !StrCmpNIC((char*) p+1, "/B", sizeof("/B") - 1)
                         || !StrCmpNIC((char*) p+1, "P", sizeof("P") - 1)
                         || !StrCmpNIC((char*) p+1, "/P", sizeof("/P") - 1)
                         || !StrCmpNIC((char*) p+1, "!--", sizeof("!--") - 1)
                        )
                {
                    //
                    // In order for this branch to identify this is HTML 
                    // We have to make sure:
                    //      1. some HTML control char exists
                    //      2. We've scanned the whole data block
                    //      3. 2/3 of the data should be text
                    //
                     
                    nHTMLConfidence += 50;
                    if (    nHTMLConfidence >= 100
                        &&  i == _cbSample - 1 
                        &&  _cbText >= ((_cbSample * 2) / 3)
                       )
                    {
                        _fFoundHTML = TRUE;
                        break;
                    }
                }
                if (!StrCmpNIC((char*) p+1, "CHANNEL", sizeof("CHANNEL") - 1))
                {
                    _fFoundCDF = TRUE;
                    break;
                }
        
            
            }
            else if (!StrCmpNIC((char*) p, "-->", sizeof("-->") - 1))
            {
                // comment begin
                // I really want to make sure that most of the 
                // char are printable 
                // potential issue: International code page?
                nHTMLConfidence += 50;
                if (   (nHTMLConfidence >= 100) 
                    && (i == _cbSample - 1 )
                    && (_cbText > (_cbSample * 2 /3) )
                   )
                {
                    _fFoundHTML = TRUE;
                    break;
                }
            }
            // check for xbitmap
            else if (*p == '#')
            {
                if (!StrCmpNC((char*) p+1, vszXbmMagic1, sizeof(vszXbmMagic1) - 1))
                    fFoundFirstXBitMapTag = TRUE;
            }
            else if (*p == '_' && fFoundSecondXBitMapTag)
            {
                if (!StrCmpNC((char*) p+1, vszXbmMagic3, sizeof(vszXbmMagic3) - 1))
                {
                    _fFoundXBitMap = TRUE;
                    break;
                }
            }
            else if (*p == '_' && fFoundFirstXBitMapTag)
            {
                if (!StrCmpNC((char*) p+1, vszXbmMagic2, sizeof(vszXbmMagic2) - 1))
                    fFoundSecondXBitMapTag = TRUE;
            }

            // MacBinhex
            else if (*p == 'c')
            {
                if (!StrCmpNC((char*) p+1, vszBinHexMagic, sizeof(vszBinHexMagic) - 1))
                {
                    _fFoundMacBinhex = TRUE;
                    break;
                }
            }

        }
        p++;
    }

    DEBUG_LEAVE(0);
}

//+---------------------------------------------------------------------------
//
//  Method:     CContentAnalyzer::IsBMP
//
//  Synopsis:
//
//  Arguments:  (void)
//
//  Returns:    BOOL
//
//  History:    5-25-96   AdriaanC (Adriaan Canter) Created
//
//  Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::IsBMP()
{
    DEBUG_ENTER((DBG_TRANS,
                Bool,
                "CContentAnalyzer::IsBMP",
                "this=%#x",
                this
                ));
                
    BOOL bRetVal = TRUE;

    BITMAPFILEHEADER UNALIGNED *pBMFileHdr;

    if (_cbSample < 2)
    {
        bRetVal = FALSE;
    }

    // Check header
    if (StrCmpNC(_pBuf, vszBmpMagic, sizeof(vszBmpMagic) - 1))
    {
        bRetVal = FALSE;
    }

    // Sample size needs to be big enough.
    if (_cbSample < sizeof(BITMAPFILEHEADER))
    {
        bRetVal = FALSE;
    }

    pBMFileHdr = (BITMAPFILEHEADER*)(_pBuf);

#ifdef UNIX

    /* Use 14 on Unix, because we want the size without the padding
     * done on Unix. sizeof(BITMAPFILEHEADER) = 16 on Unix with padding
     */
    #define UNIX_BITMAP_HEADER_SIZE 14
    BITMAPFILEHEADER bmFileHeader;

    if(MwReadBITMAPFILEHEADER((LPBYTE)_pBuf, UNIX_BITMAP_HEADER_SIZE, &bmFileHeader))
        pBMFileHdr = &bmFileHeader;

#endif /* UNIX */

    // The reserved fields must be set to 0
    if (pBMFileHdr->bfReserved1!=0 || pBMFileHdr->bfReserved2!=0)
    {
        bRetVal = FALSE;
    }

    DEBUG_LEAVE(bRetVal);
    return bRetVal;
}

//+---------------------------------------------------------------------------
//
//  Method:     CContentAnalyzer::GetDataFormat
//
//  Synopsis:
//
//  Arguments:  (WCHAR* wzMimeType)
//
//  Returns:    BOOL dwDataFormat
//
//  History:    7-21-96   AdriaanC (Adriaan Canter) Created
//
//  Notes:
//
//----------------------------------------------------------------------------
DWORD CContentAnalyzer::GetDataFormat(LPCWSTR wzMimeType)
{
    DEBUG_ENTER((DBG_TRANS,
                Dword,
                "CContentAnalyzer::GetDataFormat",
                "this=%#x, %.80wq",
                this, wzMimeType
                ));
                
    CLIPFORMAT cfFormat;
    DATAFORMAT dwDataFormat;
    HRESULT hr;

    if (!wzMimeType)
    {
        DEBUG_LEAVE(DATAFORMAT_AMBIGUOUS);
        return DATAFORMAT_AMBIGUOUS;
    }

    if( !_wcsicmp(wzMimeType, vwzNULL) )
    {
        DEBUG_LEAVE(DATAFORMAT_AMBIGUOUS);
        return DATAFORMAT_AMBIGUOUS;
    }


    hr = FindMediaTypeFormat(wzMimeType, &cfFormat, (DWORD *)&dwDataFormat);

    if (hr == S_OK)
    {
        DEBUG_LEAVE(dwDataFormat);
        return dwDataFormat;
    }
    else
    {
        DEBUG_LEAVE(DATAFORMAT_UNKNOWN);
        return DATAFORMAT_UNKNOWN;
    }

}

//+---------------------------------------------------------------------------
//
//  Method:     CContentAnalyzer::FormatAgreesWithData
//
//  Synopsis:
//
//  Arguments:  (void)
//
//  Returns:    BOOL
//
//  History:    8-14-96   AdriaanC (Adriaan Canter) Created
//
//  Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::FormatAgreesWithData(DWORD dwFormat)
{
    DEBUG_ENTER((DBG_TRANS,
                Bool,
                "CContentAnalyzer::FormatAgreesWithData",
                "this=%#x, %#x",
                this, dwFormat
                ));
                
    if (dwFormat == DATAFORMAT_TEXT && _fBinary == FALSE
        || dwFormat == DATAFORMAT_BINARY && _fBinary == TRUE
        || dwFormat == DATAFORMAT_TEXTORBINARY)
    {
        DEBUG_LEAVE(TRUE);
        return TRUE;
    }

    DEBUG_LEAVE(FALSE);
    return FALSE;
}


//+---------------------------------------------------------------------------
//
//  Method:     CContentAnalyzer::MatchDWordAtOffset
//
//  Synopsis:   Determines if a given magic word is found at
//              the specified offset.
//
//  Arguments:  (DWORD magic, int offset)
//
//  Returns:    BOOL
//
//  History:    5-25-96   AdriaanC (Adriaan Canter) Created
//
//  Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::MatchDWordAtOffset(DWORD magic, int offset)
{
    DEBUG_ENTER((DBG_TRANS,
                Bool,
                "CContentAnalyzer::MatchDWordAtOffset",
                "this=%#x, %#x, %d",
                this, magic, offset
                ));
                
    BOOL bRetVal = TRUE;

    DWORD dwWord = 0;

    unsigned char* p = (unsigned char*) _pBuf;

    if (_cbSample < offset + (int) sizeof(DWORD))
    {
        DEBUG_LEAVE(FALSE);
        return FALSE;
    }

    dwWord = (p[offset] << 24)
        | (p[offset+1] << 16)
        | (p[offset+2] << 8)
        |  p[offset+3];


    if (magic != dwWord)
    {
        bRetVal = FALSE;
    }

    DEBUG_LEAVE(bRetVal);
    return bRetVal;
}


//+---------------------------------------------------------------------------
//
//  Method:     CContentAnalyzer::FindAppFromExt
//
//  Synopsis:   Determines an associated application from
//              a given file extension
//
//  Arguments:  (LPSTR pszExt, LPSTR pszCommand (command line))
//
//  Returns:    BOOL (Associated Application is found or not)
//
//  History:    7-15-96   AdriaanC (Adriaan Canter) Created
//
//  Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::FindAppFromExt(LPSTR pszExt, LPSTR pszCommand, DWORD cbCommand)
{
    DEBUG_ENTER((DBG_TRANS,
                Bool,
                "CContentAnalyzer::FindAppFromExt",
                "this=%#x, %.80q, %.80q, %d",
                this, pszExt, pszCommand, cbCommand
                ));
                
    DWORD cbLen, dwType;
    CHAR szRegPath[MAX_PATH];
    BOOL fReturn = FALSE;
    HKEY hMimeKey = NULL;

    // BUGBUG - Is there a max registry path length?
    cbLen = MAX_PATH;

    // Should be a file extension
    TransAssert((pszExt[0] == '.'));

    // Open key on extension
    if (RegOpenKeyEx(HKEY_CLASSES_ROOT, pszExt, 0,
        KEY_QUERY_VALUE, &hMimeKey) == ERROR_SUCCESS)
    {
        // Find file type (txtfile, htmlfile, etc) .
        // These currently utilize a null key.
        if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType,
            (LPBYTE)szRegPath, &cbLen) == ERROR_SUCCESS)
        {
            strncat(szRegPath, szApplicationRegistryKey, MAX_PATH - strlen(szRegPath) - 1);

            HKEY hAppKey = NULL;
            cbLen = cbCommand;

            // szRegPath should now look similar to
            // "txtfile\Shell\Open\Command". Open key on szRegPath
            if (RegOpenKeyEx(HKEY_CLASSES_ROOT, szRegPath, 0,
                KEY_QUERY_VALUE, &hAppKey) == ERROR_SUCCESS) 
            {
                // Find the application command line - again, null key.
                if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType,
                    (LPBYTE)pszCommand, &cbLen) == ERROR_SUCCESS)
                {
                    // Success
                    fReturn = TRUE;
                }
                RegCloseKey(hAppKey);
            }

            else 
            {   
                // check "Shell\\Connect To\command" key - used by SmartTerm 

                // dynamic allocate szRegPath2 so that it won't take
                // unnecessary stack space - after all, this is not a 
                // common case
                CHAR* szRegPath2 = NULL;
                HKEY hAppKey2 = NULL;

                szRegPath2 = new CHAR[MAX_PATH];
                if( szRegPath2 )
                {
                    if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType,
                        (LPBYTE)szRegPath2, &cbLen) == ERROR_SUCCESS)
                    {
                        strncat(szRegPath2, szApplicationRegistryKey2, 
                            MAX_PATH - strlen(szRegPath2) - 1);
                    }
                    else
                    {
                        // this should not happen at all 
                        delete [] szRegPath2;
                        szRegPath2 = NULL;
                    }
                }

                if (szRegPath2 && 
                    RegOpenKeyEx(HKEY_CLASSES_ROOT, szRegPath2, 0, KEY_QUERY_VALUE, &hAppKey2) == ERROR_SUCCESS) 
                {
                    if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType,
                        (LPBYTE)pszCommand, &cbLen) == ERROR_SUCCESS)
                    {
                        // Success
                        fReturn = TRUE;
                    }
                    RegCloseKey(hAppKey2);

                }

                delete [] szRegPath2;
            }

        }
        RegCloseKey(hMimeKey);
    }

    DEBUG_LEAVE(fReturn);
    return fReturn;
}
//+---------------------------------------------------------------------------
//
//  Method:     CContentAnalyzer::CheckTextHeaders
//
//  Synopsis:
//
//
//  Arguments:  void
//
//  Returns:    void
//
//  History:    7-23-96   AdriaanC (Adriaan Canter) Created
//
//  Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::CheckTextHeaders()
{
    DEBUG_ENTER((DBG_TRANS,
                Bool,
                "CContentAnalyzer::CheckTextHeaders",
                "this=%#x",
                this
                ));
                
    BOOL bRet = TRUE;
    // application/pdf (Acrobat)
    if (!StrCmpNC(_pBuf, vszPdfMagic, sizeof(vszPdfMagic) - 1))
    {
        _wzMimeType = vwzApplicationPdf;
    }

    // application/Postscript
    else if (!StrCmpNC(_pBuf, vszPostscriptMagic, sizeof(vszPostscriptMagic) - 1))
    {
        _wzMimeType = vwzApplicationPostscript;
    }

    // text/richtext
    else if (!StrCmpNC(_pBuf, vszRichTextMagic, sizeof(vszRichTextMagic) - 1))
    {
        _wzMimeType = vwzTextRichText;
    }

    // application/base64
    else if (!StrCmpNC(_pBuf, vszBase64Magic, sizeof(vszBase64Magic) - 1))
    {
        _wzMimeType = vwzApplicationBase64;
    }

    // No matches - assume plain text.
    else
    {
        //_wzMimeType = vwzTextPlain;
        bRet = FALSE;
    }

    DEBUG_LEAVE(bRet);
    return bRet;

}

//+---------------------------------------------------------------------------
//
//  Method:     CContentAnalyzer::CheckBinaryHeaders
//
//  Synopsis:
//
//
//  Arguments:  void
//
//  Returns:    void
//
//  History:    7-23-96   AdriaanC (Adriaan Canter) Created
//
//  Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::CheckBinaryHeaders()
{
    DEBUG_ENTER((DBG_TRANS,
                Bool,
                "CContentAnalyzer::CheckBinaryHeaders",
                "this=%#x",
                this
                ));
                
    BOOL bRet = TRUE;
    // image/gif
    if (!StrCmpNIC(_pBuf, vszGif87Magic, sizeof(vszGif87Magic) - 1)
        || !StrCmpNIC(_pBuf, vszGif89Magic, sizeof(vszGif89Magic) - 1))
    {
        _wzMimeType = vwzImageGif;
    }

    // image/pjpeg
    else if ((BYTE)_pBuf[0] == JPEG_MAGIC_1 && (BYTE)_pBuf[1] == JPEG_MAGIC_2)
    {
        _wzMimeType = vwzImagePJpeg;
    }

    // img/bmp
    else if (IsBMP())
    {
        _wzMimeType = vwzImageBmp;
    }

    // audio/wav
    else if (MatchDWordAtOffset(RIFF_MAGIC, 0)
        && MatchDWordAtOffset(WAV_MAGIC, 8))
    {
        _wzMimeType = vwzAudioWav;
    }

    // audio/basic (.au files)
    else if (MatchDWordAtOffset(AU_DEC_MAGIC, 0)
           || MatchDWordAtOffset(AU_SUN_MAGIC, 0)
           || MatchDWordAtOffset(AU_DEC_INV_MAGIC, 0)
           || MatchDWordAtOffset(AU_SUN_INV_MAGIC, 0))
    {
       _wzMimeType = vwzAudioBasic;
    }

    // image/tiff
    else if (!StrCmpC(_pBuf, vszTiffMagic)) // "MM" followed by a \0
    {
        _wzMimeType = vwzImageTiff;
    }

    // application/x-msdownload
    else if (!StrCmpNC(_pBuf, vszExeMagic, sizeof(vszExeMagic) - 1))
    {
        _wzMimeType = vwzApplicationMSDownload;
    }

    // image/x-png
    else if (!StrCmpNC(_pBuf, vszPngMagic, sizeof(vszPngMagic) - 1))
    {
        _wzMimeType = vwzImagePng;
    }

    // image/x-jg
    else if (!StrCmpNC(_pBuf, vszJGMagic, sizeof(vszJGMagic) - 1)
        && (int) _pBuf[2] >= 3
        && (int) _pBuf[2] <= 31
        && _pBuf[4] == 0)
    {
        _wzMimeType = vwzImageJG;
    }

    // audio/x-aiff
    else if (MatchDWordAtOffset(AIFF_INV_MAGIC, 0))
    {
       _wzMimeType = vwzAudioAiff;
    }

    else if (MatchDWordAtOffset(AIFF_MAGIC, 0) &&
             ( MatchDWordAtOffset(AIFF_MAGIC_MORE_1, 8) ||
               MatchDWordAtOffset(AIFF_MAGIC_MORE_2, 8) ) )
    {
        //
        // according to DaveMay, the correct AIFF format would be:
        // 'FORM....AIFF' or 'FORM....AIFC'
        // Only check for 'FORM' is incorrect because .sc2 has the 
        // same sig
        //
       _wzMimeType = vwzAudioAiff;
    }

    // video/avi (or video/x-msvedio)
    else if (MatchDWordAtOffset(RIFF_MAGIC, 0)
        && MatchDWordAtOffset(AVI_MAGIC, 8))
    {
        _wzMimeType = vwzVideoAvi;
    }

    // video/mpeg
    else if (MatchDWordAtOffset(MPEG_MAGIC, 0)
            || MatchDWordAtOffset(MPEG_MAGIC_2, 0) )
    {
        _wzMimeType = vwzVideoMpeg;
    }

    // image/x-emf
    else if (MatchDWordAtOffset(EMF_MAGIC_1, 0)
        && MatchDWordAtOffset(EMF_MAGIC_2, 40))
    {
        _wzMimeType = vwzImageEmf;
    }

    // image/x-wmf
    else if (MatchDWordAtOffset(WMF_MAGIC, 0))
    {
        _wzMimeType = vwzImageWmf;
    }

    // application/java
    else if (MatchDWordAtOffset(JAVA_MAGIC, 0))
    {
        _wzMimeType = vwzApplicationJava;
    }

    // application/x-zip-compressed
    else if (!StrCmpNC(_pBuf, vszZipMagic, sizeof(vszZipMagic) - 1))
    {
        _wzMimeType = vwzApplicationZipCompressed;
    }

    // application/x-compress
    else if (!StrCmpNC(_pBuf, vszCompressMagic, sizeof(vszCompressMagic) - 1))
    {
        _wzMimeType = vwzApplicationCompressed;
    }

    // application/x-gzip
    else if (!StrCmpNC(_pBuf, vszGzipMagic, sizeof(vszGzipMagic) - 1))
    {
        _wzMimeType = vwzApplicationGzipCompressed;
    }

    // application/x-zip-compressed
    else if (!StrCmpNC(_pBuf, vszZipMagic, sizeof(vszZipMagic) - 1))
    {
        _wzMimeType = vwzApplicationZipCompressed;
    }

    // audio/mid
    else if (!StrCmpC(_pBuf, vszMIDMagic))
    {
        _wzMimeType = vwzAudioMID;
    }

    // application/pdf (Acrobat)
    else if (!StrCmpNC(_pBuf, vszPdfMagic, sizeof(vszPdfMagic) - 1))
    {
        _wzMimeType = vwzApplicationPdf;
    }

    // don't know what it is.
    else
    {
        //_wzMimeType = vwzApplicationOctetStream;
        bRet = FALSE;
    }

    DEBUG_LEAVE(bRet);
    return bRet;
}



//+---------------------------------------------------------------------------
//
//  Method:     CContentAnalyzer::FindMimeFromData
//
//  Synopsis:   Attempts to guess MIME type from buffer
//
//
//  Arguments:  pBuf, cbSample, wzSuggestedMimeType
//
//  Returns:    LPCWSTR (the MIME type guessed)
//
//  History:    5-25-96   AdriaanC (Adriaan Canter) Created
//
//  Notes:
//
//----------------------------------------------------------------------------
LPCWSTR CContentAnalyzer::FindMimeFromData(LPCWSTR wzFileName, char* pBuf,
    int cbSample, LPCWSTR wzSuggestedMimeType, DWORD grfFlags)
{
    DEBUG_ENTER((DBG_TRANS,
                Pointer,
                "CContentAnalyzer::FindMimeFromData",
                "this=%#x, %.80wq, %.80q, %d, %.80wq, %#x",
                this, wzFileName, pBuf, cbSample, wzSuggestedMimeType, grfFlags
                ));
                
    BOOL fSampledData = FALSE;
    BOOL fFoundMimeTypeFromExt = FALSE;

    CHAR* szFileExt = 0;
    CHAR szFileName[MAX_PATH];
    CHAR szMimeTypeFromExt[SZMIMESIZE_MAX];
    CHAR szCommand[MAX_PATH];
    CHAR cLastByte;

    DWORD dwMimeLen = SZMIMESIZE_MAX;
    DWORD dwExtMimeTypeDataFormat;
    DWORD dwSuggestedMimeTypeDataFormat;
    DWORD dwMimeTypeDataFormat;
    DWORD cbCommand = MAX_PATH;
    BOOL  fExtensionChecked = FALSE;

    _grfFlags = grfFlags;

    // BUGBUG - we can use this information for DBCS.
    // Remove any info appended to the suggested mime type
    // such as charset information. This is identified by ';'

    if (wzSuggestedMimeType)
    {
        WCHAR* wptr = wcsstr(wzSuggestedMimeType, L";");
        if (wptr)
        {
            *wptr = L'\0';
        }
    }

    // Check to see if the server is suggesting an unknown mime type
    dwSuggestedMimeTypeDataFormat = GetDataFormat(wzSuggestedMimeType);
    if (dwSuggestedMimeTypeDataFormat == DATAFORMAT_UNKNOWN)
    {
        // server push returns "multipart" content type 
        // this is not the real mimetype, so we have to sniff 
        // to find out the truth 
        if(    wcsicmp(wzSuggestedMimeType, vwzmultipartmixed)
            && wcsicmp(wzSuggestedMimeType, vwzmultipartmixedreplace) )
        {
            // If so, return the suggested mime type.
            _wzMimeType = (WCHAR*) wzSuggestedMimeType;

            DEBUG_LEAVE(_wzMimeType);
            return _wzMimeType;
        }
    }

/*****
    // check if we got an extension and extension mime
    // matches the suggested mime - only for text/plain
    if (    wzSuggestedMimeType 
        &&  wzFileName
        && !wcscmp(wzSuggestedMimeType,vwzTextPlain))
         
    {
        fExtensionChecked = TRUE;
        fFoundMimeTypeFromExt = FindMimeFromExt(
                                        wzFileName,
                                        szFileName,
                                        szMimeTypeFromExt,
                                        &dwExtMimeTypeDataFormat,
                                        &szFileExt
                                        );

        // If there is a mime type associated with the file
        // extension then return it.
        if (   fFoundMimeTypeFromExt
            && (dwExtMimeTypeDataFormat == dwSuggestedMimeTypeDataFormat)
            && !wcscmp(wzSuggestedMimeType,_wzMimeTypeFromExt)
            )
        {
            // If so, return the suggested mime type.
            _wzMimeType = (WCHAR*) wzSuggestedMimeType;
            return _wzMimeType;
        }
    }
*****/

    // Not enough data to tell anything
    if (!pBuf || cbSample <= 0)
    {
        _wzMimeType = (WCHAR*) wzSuggestedMimeType;

        DEBUG_LEAVE(_wzMimeType);
        return _wzMimeType;
    }

    _pBuf = pBuf;
    _cbSample = (cbSample <= SAMPLE_SIZE) ? cbSample : SAMPLE_SIZE;

    // Save off last character. Null terminate the buffer.
    cLastByte = _pBuf[_cbSample - 1];
    _pBuf[_cbSample - 1] = '\0';


    // Common cases first - check the server indicated mime type
    // for text/html, image/gif or image/[p]jpeg.
    if (   wzSuggestedMimeType
        && !StrCmpICW(wzSuggestedMimeType, vwzTextHTML))
    {
        // Sample the data. This routine also checks for the following
        // mime types which require extended scanning through the buffer:
        // text/html, image/x-xbitmap, application/macbinhex
        SampleData();
        fSampledData = TRUE;

        if (_fFoundHTML)
        {
            _wzMimeType = vwzTextHTML;
           goto exit;
        }
    }

    // image/gif
    else if (wzSuggestedMimeType
        && !wcsicmp(wzSuggestedMimeType, vwzImageGif))
    {
        if (!StrCmpNIC(_pBuf, vszGif87Magic, sizeof(vszGif87Magic) - 1)
           || !StrCmpNIC(_pBuf, vszGif89Magic, sizeof(vszGif89Magic) - 1))
        {
            _wzMimeType = vwzImageGif;
            goto exit;
        }
    }

    // image/jpeg or image/pjpeg
    else if (wzSuggestedMimeType
        && (!wcsicmp(wzSuggestedMimeType, vwzImagePJpeg)
        || !wcsicmp(wzSuggestedMimeType, vwzImageJpeg)))
    {
        if ((BYTE)_pBuf[0] == JPEG_MAGIC_1 && (BYTE)_pBuf[1] == JPEG_MAGIC_2)
        {
            _wzMimeType = vwzImagePJpeg;
            goto exit;
        }
    }


    //
    // ********************** BEGIN HACK ******************************* 
    //
    // we will remove this once tridents defined the unique signature
    // for .hta and .htc format
    //
    // DanpoZ (98.08.12) - refer to IE5 SUPERHOT bug 35478
    //
    if (wzFileName )
    {

        CHAR* szExt;
        CHAR szFile[MAX_PATH];
        W2A(wzFileName, szFile, MAX_PATH);

        if( grfFlags & FMFD_URLASFILENAME )
        {
            //
            // remove teh security context '\1' and replace it with '\0'
            // but only do this when we are using URL to replace the filename
            //
            CHAR* pch = StrChr(szFile, '\1');
            if (pch)
            {
                *pch = '\0';
            }
        }

        szExt = FindFileExtension(szFile);
        if( szExt && 
            ( !StrCmpNIC(szExt, ".hta", sizeof(".hta") - 1) ||
              !StrCmpNIC(szExt, ".htc", sizeof(".htc") - 1)  ) )
        {
            fExtensionChecked = TRUE;
            fFoundMimeTypeFromExt = FindMimeFromExt(
                                            wzFileName,
                                            szFileName,
                                            szMimeTypeFromExt,
                                            &dwExtMimeTypeDataFormat,
                                            &szFileExt
                                            );

            // If there is a mime type associated with the file
            // extension then return it.
            if (fFoundMimeTypeFromExt)
            {
                _wzMimeType = _wzMimeTypeFromExt;
                goto exit;
            }
        }
    }
    //
    // ********************** END HACK ********************************* 
    //

    // One of the following is true:

    // 1) The server indicated a common mime type (html, gif or jpeg),
    //    however, verification failed.
    // 2) The server indicated an ambiguous mime type or
    //    a known, but uncommon mime type.

    // If not done so already, sample the data.
    if (!fSampledData)
    {
        SampleData();
        fSampledData = TRUE;
    }

    // Return any mime type that was positively
    // identified during the data sampling
    if( _fFoundCDF )
    {
        _wzMimeType = vwzApplicationCDF;
        goto exit;
    }
    else if( _fFoundXML)
    {
        _wzMimeType = vwzTextXML;
        goto exit;
    }
    else if (_fFoundHTML)
    {
        _wzMimeType = vwzTextHTML;
        goto exit;
    }
    else if (_fFoundXBitMap)
    {
        _wzMimeType = vwzImageXBitmap;
        goto exit;
    }
    else if (_fFoundMacBinhex)
    {
        _wzMimeType = vwzApplicationMacBinhex;
        goto exit;
    }
    else if( _fFoundTextScriptlet )
    {
        _wzMimeType = vwzTextScriptlet;
        goto exit;
    }

    if(    !_fFoundCDF  
        && wzSuggestedMimeType
        && !wcsicmp(wzSuggestedMimeType, vwzApplicationNETCDF) 
      ) 
    {
        // only overwrite application/x-netcdf with aplication/x-cdf
        _wzMimeType = vwzApplicationNETCDF; 
        goto exit;
    }
    

    // Decide if buffer is primarily text or binary. Conduct
    // pattern matching to determine a mime type depending on the
    // finding.
    if (!_cbCtrl || _cbText + _cbFF >= 16 * (_cbCtrl + _cbHigh))
    {
        _fBinary = FALSE;
        if( !CheckTextHeaders() )
        {
            if( !CheckBinaryHeaders() )
            {
                _wzMimeType = vwzTextPlain;
            }
        }
    }
    else
    {
        _fBinary = TRUE;
        if( !CheckBinaryHeaders() )
        {
            if( !CheckTextHeaders() )
            {
                _wzMimeType = vwzApplicationOctetStream;
            }
        }
    }

    // Determine format of the mime type from data
    dwMimeTypeDataFormat = GetDataFormat(_wzMimeType);

    // If the format of the mime type found from examining the data
    // is not ambiguous, then return this mime type.
    if (dwMimeTypeDataFormat != DATAFORMAT_AMBIGUOUS)
    {
        goto exit;
    }

    // Examination of data is inconclusive.
    else
    {
        // If the suggested mime type is not ambiguous and does
        // not conflict with the data format then return it.
        if (dwSuggestedMimeTypeDataFormat != DATAFORMAT_AMBIGUOUS
            && FormatAgreesWithData(dwSuggestedMimeTypeDataFormat))
        {
            _wzMimeType = (WCHAR*) wzSuggestedMimeType;
            goto exit;
        }

        // Otherwise, attempt to obtain a mime type from any
        // file extension. If none is found, but an application
        // is registered for the file extension, return
        // application/octet-stream.


        // If there is a file extension, find any
        // associated mime type.
        if (wzFileName && !fExtensionChecked)
        {
            fExtensionChecked = TRUE;

            fFoundMimeTypeFromExt = FindMimeFromExt(
                                            wzFileName,
                                            szFileName,
                                            szMimeTypeFromExt,
                                            &dwExtMimeTypeDataFormat,
                                            &szFileExt
                                            );
        }

        // If there is a mime type associated with the file
        // extension then return it.
        if (fFoundMimeTypeFromExt)
        {
            if (dwExtMimeTypeDataFormat == DATAFORMAT_UNKNOWN)
            {
                _wzMimeType = _wzMimeTypeFromExt;
                goto exit;
            }
            else
            {
                goto exit;
            }
        }

        // Otherwise, check to see if there is an associated application.
        if (szFileExt && FindAppFromExt(szFileExt, szCommand, cbCommand))
        {
            // Found an associated application.
            _wzMimeType = vwzApplicationOctetStream;
            goto exit;
        }

        // No suggested mime type, no mime type from file extension
        // and no registered application found. Fall through and return
        // mime type found from the data
    }


    exit:
        // Replace the null termination with
        // the original character.
        _pBuf[_cbSample - 1] = cLastByte;


        DEBUG_LEAVE(_wzMimeType);
        return _wzMimeType;
}

//+---------------------------------------------------------------------------
//
//  Method:     CContentAnalyzer::FindMimeFromExt
//
//  Synopsis:
//
//  Arguments:  [wzFileName] --
//              [szFileName] --
//              [szMimeTypeFromExt] --
//              [pdwExtMimeTypeDataFormat] --
//
//  Returns:
//
//  History:    5-25-96   AdriaanC (Adriaan Canter)
//              1-28-1997   JohannP (Johann Posch)   made separate function
//
//  Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::FindMimeFromExt(
                        LPCWSTR wzFileName,
                        CHAR *szFileName,
                        CHAR *szMimeTypeFromExt,
                        DWORD *pdwExtMimeTypeDataFormat,
                        CHAR  **ppszFileExt)
{
    DEBUG_ENTER((DBG_TRANS,
                Bool,
                "CContentAnalyzer::FindMimeFromExt",
                "this=%#x, %.80wq, %.80q, %.80q, %#x, %#x",
                this, wzFileName, szFileName, szMimeTypeFromExt, pdwExtMimeTypeDataFormat, ppszFileExt
                ));
                
    BOOL fFoundMimeTypeFromExt = FALSE;
    UrlMkAssert((wzFileName && szFileName && pdwExtMimeTypeDataFormat));
    DWORD dwMimeLen = SZMIMESIZE_MAX;
    CHAR* szFileExt = 0;

    // If there is a file extension, find any
    // associated mime type.
    W2A(wzFileName, szFileName, MAX_PATH);
    szFileExt = FindFileExtension(szFileName);
    if (szFileExt && GetMimeFromExt(szFileExt,
        szMimeTypeFromExt, &dwMimeLen) == ERROR_SUCCESS)
    {
        fFoundMimeTypeFromExt = TRUE;
        A2W(szMimeTypeFromExt, _wzMimeTypeFromExt, SZMIMESIZE_MAX);
        *pdwExtMimeTypeDataFormat = GetDataFormat(_wzMimeTypeFromExt);
    }
    if (szFileExt && ppszFileExt)
    {
        *ppszFileExt = szFileExt;
    }

    DEBUG_LEAVE(fFoundMimeTypeFromExt);
    return fFoundMimeTypeFromExt;
}