windows-server-2003/inetcore/urlmon/trans/datasnif.cxx


								//+---------------------------------------------------------------------------

								//

								//  Microsoft Windows

								//  Copyright (C) Microsoft Corporation, 1992 - 1996.

								//

								//  File:       datasnif.cxx

								//

								//  Contents:   Stream Mime type checking (attempts to guess the MIME type

								//              of a buffer by simple pattern matching).

								//

								//  Classes:    CContentAnalyzer

								//

								//  Functions:  private:

								//                CContentAnalyzer::SampleData

								//                CContentAnalyzer::IsBMP

								//                CContentAnalyzer::GetDataFormat

								//                CContentAnalyzer::FormatAgreesWithData

								//                CContentAnalyzer::MatchDWordAtOffset

								//                CContentAnalyzer::FindAppFromExt

								//                CContentAnalyzer::CheckTextHeaders

								//                CContentAnalyzer::CheckBinaryHeaders

								//

								//              public:

								//                CContentAnalyzer::FindMimeFromData

								//                ::FindMimeFromData

								//

								//

								//  History:    05-25-96   AdriaanC (Adriaan Canter) Created

								//              07-16-96   AdriaanC (Adriaan Canter) Modified

								//              08-06-96   AdriaanC (Adriaan Canter) Modified

								//              08-14-96   AdriaanC (Adriaan Canter) Modified

								//

								//----------------------------------------------------------------------------


								#include <trans.h>

								#include "datasnif.hxx"

								#include <shlwapip.h>

								#ifdef UNIX

								#include <mainwin.h>

								#endif


								PerfDbgTag(tagDataSniff, "Urlmon", "Log DataSniff", DEB_DATA);


								// Max no. bytes to look at

								#define SAMPLE_SIZE 256


								// Registry Key for app/fileext associations

								#define szApplicationRegistryKey "\\Shell\\Open\\Command"

								#define szApplicationRegistryKey2 "\\Shell\\Connect To\\Command"

								#define szMimeRegistryKey        "MIME\\Database\\Content Type\\"


								// Magic header words

								#define AU_SUN_MAGIC                    0x2e736e64

								#define AU_SUN_INV_MAGIC                0x646e732e

								#define AU_DEC_MAGIC                    0x2e736400

								#define AU_DEC_INV_MAGIC                0x0064732e

								#define AIFF_MAGIC                      0x464f524d

								#define AIFF_INV_MAGIC                  0x4d524f46

								#define AIFF_MAGIC_MORE_1               'AIFF'

								#define AIFF_MAGIC_MORE_2               'AIFC'

								#define RIFF_MAGIC                      0x52494646

								#define AVI_MAGIC                       0x41564920

								#define WAV_MAGIC                       0x57415645

								#define JAVA_MAGIC                      0xcafebabe

								#define MPEG_MAGIC                      0x000001b3

								#define MPEG_MAGIC_2                    0x000001ba

								#define EMF_MAGIC_1                     0x01000000

								#define EMF_MAGIC_2                     0x20454d46

								#define WMF_MAGIC                       0xd7cdc69a

								#define JPEG_MAGIC_1                    0xFF

								#define JPEG_MAGIC_2                    0xD8


								// Magic header text

								CHAR vszRichTextMagic[] =                "{\\rtf";

								CHAR vszPostscriptMagic[] =              "%!";

								CHAR vszBinHexMagic[] =                  "onverted with BinHex";

								CHAR vszBase64Magic[] =                  "begin";

								CHAR vszGif87Magic[] =                   "GIF87";

								CHAR vszGif89Magic[] =                   "GIF89";

								CHAR vszTiffMagic[] =                    "MM";

								CHAR vszBmpMagic[] =                     "BM";

								CHAR vszZipMagic[] =                     "PK";

								CHAR vszExeMagic[] =                     "MZ";

								CHAR vszPngMagic[] =                     "\211PNG\r\n\032\n";

								CHAR vszCompressMagic[] =                "\037\235";

								CHAR vszGzipMagic[] =                    "\037\213";

								CHAR vszXbmMagic1[] =                    "define";

								CHAR vszXbmMagic2[] =                    "width";

								CHAR vszXbmMagic3[] =                    "bits";

								CHAR vszPdfMagic[] =                     "%PDF";

								CHAR vszJGMagic[] =                      "JG";

								CHAR vszMIDMagic[] =                     "MThd";


								// null MIME type

								WCHAR vwzNULL[] =                        L"(null)";


								// 7 bit MIME Types

								WCHAR vwzTextPlain[] =                   L"text/plain";

								WCHAR vwzTextRichText[] =                L"text/richtext";

								WCHAR vwzImageXBitmap[] =                L"image/x-xbitmap";

								WCHAR vwzApplicationPostscript[] =       L"application/postscript";

								WCHAR vwzApplicationBase64[] =           L"application/base64";

								WCHAR vwzApplicationMacBinhex[] =        L"application/macbinhex40";

								WCHAR vwzApplicationPdf[] =              L"application/pdf";

								WCHAR vwzApplicationCDF[] =              L"application/x-cdf";

								WCHAR vwzApplicationNETCDF[] =           L"application/x-netcdf";

								WCHAR vwzmultipartmixedreplace[] =       L"multipart/x-mixed-replace";

								WCHAR vwzmultipartmixed[] =              L"multipart/mixed";

								WCHAR vwzTextScriptlet[] =               L"text/scriptlet";

								WCHAR vwzTextComponent[] =               L"text/x-component";

								WCHAR vwzTextXML[] =                     L"text/xml";

								WCHAR vwzApplicationHTA[] =              L"application/hta";


								// 8 bit MIME types

								WCHAR vwzAudioAiff[] =                   L"audio/x-aiff";

								WCHAR vwzAudioBasic[] =                  L"audio/basic";

								WCHAR vwzAudioWav[] =                    L"audio/wav";

								WCHAR vwzAudioMID[] =                    L"audio/mid";

								WCHAR vwzImageGif[] =                    L"image/gif";

								WCHAR vwzImagePJpeg[] =                  L"image/pjpeg";

								WCHAR vwzImageJpeg[] =                   L"image/jpeg";

								WCHAR vwzImageTiff[] =                   L"image/tiff";

								WCHAR vwzImagePng[] =                    L"image/x-png";

								WCHAR vwzImageBmp[] =                    L"image/bmp";

								WCHAR vwzImageJG[] =                     L"image/x-jg";

								WCHAR vwzImageEmf[] =                    L"image/x-emf";

								WCHAR vwzImageWmf[] =                    L"image/x-wmf";

								WCHAR vwzVideoAvi[] =                    L"video/avi";

								WCHAR vwzVideoMpeg[] =                   L"video/mpeg";

								WCHAR vwzApplicationCompressed[] =       L"application/x-compressed";

								WCHAR vwzApplicationZipCompressed[] =    L"application/x-zip-compressed";

								WCHAR vwzApplicationGzipCompressed[] =   L"application/x-gzip-compressed";

								WCHAR vwzApplicationJava[] =             L"application/java";

								WCHAR vwzApplicationMSDownload[] =       L"application/x-msdownload";


								// 7 or 8 bit MIME types

								WCHAR vwzTextHTML[] =                    L"text/html";

								WCHAR vwzApplicationOctetStream[] =      L"application/octet-stream";


								//+---------------------------------------------------------------------------

								//

								//  Method:     CContentAnalyzer::SampleData

								//

								//  Synopsis:

								//

								//  Arguments:  (void)

								//

								//  Returns:    (void)

								//

								//  History:    5-25-96   AdriaanC (Adriaan Canter) Created

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------

								void CContentAnalyzer::SampleData()

								{

								    DEBUG_ENTER((DBG_TRANS,

								                None,

								                "CContentAnalyzer::SampleData",

								                "this=%#x",

								                this

								                ));


								    BOOL fFoundFirstXBitMapTag = FALSE;

								    BOOL fFoundSecondXBitMapTag = FALSE;

								    BOOL fFoundAsciiChar = FALSE;


								    int nHTMLConfidence = 0;


								    unsigned char *p = (unsigned char*) _pBuf;


								    _cbNL = _cbCR = _cbFF = _cbText = _cbCtrl = _cbHigh = 0;


								    // Count incidence of character types.

								    for (int i = 0; i < _cbSample - 1; i++)

								    {

								        fFoundAsciiChar = FALSE;


								        if (*p == '\n')           // new line

								        {

								            _cbNL++;

								        }

								        else if (*p == '\r')      // carriage return

								        {

								            _cbCR++;

								        }

								        else if (*p == '\f')      // form feed

								        {

								            _cbFF++;

								        }

								        else if (*p == '\t')      // tab

								        {

								            _cbText++;

								        }

								        else if (*p < 32)         // control character

								        {

								            _cbCtrl++;

								        }

								        else if (*p >= 32 && *p < 128)        // regular text

								        {

								            _cbText++;

								            fFoundAsciiChar = TRUE;

								        }

								        else                      // extended text

								        {

								            _cbHigh++;

								        }


								        if (fFoundAsciiChar)

								        {

								            // check for html

								            if (*p == '<')

								            {

								                if (!StrCmpNIC((char*) p+1, "?XML", sizeof("?XML") - 1) &&

								                    (

								                        (*(p+5) == ':') ||

								                        (*(p+5) == ' ') ||

								                        (*(p+5) == '\t')) )

								                {

								                    _fFoundXML = TRUE;

								                    // don't break : for CDF

								                }


								                if (!StrCmpNIC((char*) p+1, "SCRIPTLET", sizeof("SCRIPTLET") - 1))

								                {

								                    _fFoundTextScriptlet = TRUE;

								                    break;

								                }


								                if (!StrCmpNIC((char*) p+1, "HTML", sizeof("HTML") - 1)

								                    || !StrCmpNIC((char*) p+1, "HEAD", sizeof("HEAD") - 1)

								                    || !StrCmpNIC((char*) p+1, "TITLE", sizeof("TITLE") - 1)

								                    || !StrCmpNIC((char*) p+1, "BODY", sizeof("BODY") - 1)

								                    || !StrCmpNIC((char*) p+1, "SCRIPT", sizeof("SCRIPT") - 1)

								                    || !StrCmpNIC((char*) p+1, "A HREF", sizeof("A HREF") - 1)

								                    || !StrCmpNIC((char*) p+1, "PRE", sizeof("PRE") - 1)

								                    || !StrCmpNIC((char*) p+1, "IMG", sizeof("IMG") - 1)

								                    || !StrCmpNIC((char*) p+1, "PLAINTEXT", sizeof("PLAINTEXT") - 1)

								                    || !StrCmpNIC((char*) p+1, "TABLE", sizeof("TABLE") - 1))

								                {

								                    _fFoundHTML = TRUE;

								                    break;

								                }

								                else if (   !StrCmpNIC((char*) p+1, "HR", sizeof("HR") - 1)

								                         || !StrCmpNIC((char*) p+1, "A", sizeof("A") - 1)

								                         || !StrCmpNIC((char*) p+1, "/A", sizeof("/A") - 1)

								                         || !StrCmpNIC((char*) p+1, "B", sizeof("B") - 1)

								                         || !StrCmpNIC((char*) p+1, "/B", sizeof("/B") - 1)

								                         || !StrCmpNIC((char*) p+1, "P", sizeof("P") - 1)

								                         || !StrCmpNIC((char*) p+1, "/P", sizeof("/P") - 1)

								                         || !StrCmpNIC((char*) p+1, "!--", sizeof("!--") - 1)

								                        )

								                {

								                    //

								                    // In order for this branch to identify this is HTML

								                    // We have to make sure:

								                    //      1. some HTML control char exists

								                    //      2. We've scanned the whole data block

								                    //      3. 2/3 of the data should be text

								                    //


								                    nHTMLConfidence += 50;

								                    if (    nHTMLConfidence >= 100

								                        &&  i == _cbSample - 1

								                        &&  _cbText >= ((_cbSample * 2) / 3)

								                       )

								                    {

								                        _fFoundHTML = TRUE;

								                        break;

								                    }

								                }

								                if (!StrCmpNIC((char*) p+1, "CHANNEL", sizeof("CHANNEL") - 1))

								                {

								                    _fFoundCDF = TRUE;

								                    break;

								                }


								            }

								            else if (!StrCmpNIC((char*) p, "-->", sizeof("-->") - 1))

								            {

								                // comment begin

								                // I really want to make sure that most of the

								                // char are printable

								                // potential issue: International code page?

								                nHTMLConfidence += 50;

								                if (   (nHTMLConfidence >= 100)

								                    && (i == _cbSample - 1 )

								                    && (_cbText > (_cbSample * 2 /3) )

								                   )

								                {

								                    _fFoundHTML = TRUE;

								                    break;

								                }

								            }

								            // check for xbitmap

								            else if (*p == '#')

								            {

								                if (!StrCmpNC((char*) p+1, vszXbmMagic1, sizeof(vszXbmMagic1) - 1))

								                    fFoundFirstXBitMapTag = TRUE;

								            }

								            else if (*p == '_' && fFoundSecondXBitMapTag)

								            {

								                if (!StrCmpNC((char*) p+1, vszXbmMagic3, sizeof(vszXbmMagic3) - 1))

								                {

								                    _fFoundXBitMap = TRUE;

								                    break;

								                }

								            }

								            else if (*p == '_' && fFoundFirstXBitMapTag)

								            {

								                if (!StrCmpNC((char*) p+1, vszXbmMagic2, sizeof(vszXbmMagic2) - 1))

								                    fFoundSecondXBitMapTag = TRUE;

								            }


								            // MacBinhex

								            else if (*p == 'c')

								            {

								                if (!StrCmpNC((char*) p+1, vszBinHexMagic, sizeof(vszBinHexMagic) - 1))

								                {

								                    _fFoundMacBinhex = TRUE;

								                    break;

								                }

								            }


								        }

								        p++;

								    }


								    DEBUG_LEAVE(0);

								}


								//+---------------------------------------------------------------------------

								//

								//  Method:     CContentAnalyzer::IsBMP

								//

								//  Synopsis:

								//

								//  Arguments:  (void)

								//

								//  Returns:    BOOL

								//

								//  History:    5-25-96   AdriaanC (Adriaan Canter) Created

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------

								BOOL CContentAnalyzer::IsBMP()

								{

								    DEBUG_ENTER((DBG_TRANS,

								                Bool,

								                "CContentAnalyzer::IsBMP",

								                "this=%#x",

								                this

								                ));


								    BOOL bRetVal = TRUE;


								    BITMAPFILEHEADER UNALIGNED *pBMFileHdr;


								    if (_cbSample < 2)

								    {

								        bRetVal = FALSE;

								    }


								    // Check header

								    if (StrCmpNC(_pBuf, vszBmpMagic, sizeof(vszBmpMagic) - 1))

								    {

								        bRetVal = FALSE;

								    }


								    // Sample size needs to be big enough.

								    if (_cbSample < sizeof(BITMAPFILEHEADER))

								    {

								        bRetVal = FALSE;

								    }


								    pBMFileHdr = (BITMAPFILEHEADER*)(_pBuf);


								#ifdef UNIX


								    /* Use 14 on Unix, because we want the size without the padding

								     * done on Unix. sizeof(BITMAPFILEHEADER) = 16 on Unix with padding

								     */

								    #define UNIX_BITMAP_HEADER_SIZE 14

								    BITMAPFILEHEADER bmFileHeader;


								    if(MwReadBITMAPFILEHEADER((LPBYTE)_pBuf, UNIX_BITMAP_HEADER_SIZE, &bmFileHeader))

								        pBMFileHdr = &bmFileHeader;


								#endif /* UNIX */


								    // The reserved fields must be set to 0

								    if (pBMFileHdr->bfReserved1!=0 || pBMFileHdr->bfReserved2!=0)

								    {

								        bRetVal = FALSE;

								    }


								    DEBUG_LEAVE(bRetVal);

								    return bRetVal;

								}


								//+---------------------------------------------------------------------------

								//

								//  Method:     CContentAnalyzer::GetDataFormat

								//

								//  Synopsis:

								//

								//  Arguments:  (WCHAR* wzMimeType)

								//

								//  Returns:    BOOL dwDataFormat

								//

								//  History:    7-21-96   AdriaanC (Adriaan Canter) Created

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------

								DWORD CContentAnalyzer::GetDataFormat(LPCWSTR wzMimeType)

								{

								    DEBUG_ENTER((DBG_TRANS,

								                Dword,

								                "CContentAnalyzer::GetDataFormat",

								                "this=%#x, %.80wq",

								                this, wzMimeType

								                ));


								    CLIPFORMAT cfFormat;

								    DATAFORMAT dwDataFormat;

								    HRESULT hr;


								    if (!wzMimeType)

								    {

								        DEBUG_LEAVE(DATAFORMAT_AMBIGUOUS);

								        return DATAFORMAT_AMBIGUOUS;

								    }


								    if( !_wcsicmp(wzMimeType, vwzNULL) )

								    {

								        DEBUG_LEAVE(DATAFORMAT_AMBIGUOUS);

								        return DATAFORMAT_AMBIGUOUS;

								    }


								    hr = FindMediaTypeFormat(wzMimeType, &cfFormat, (DWORD *)&dwDataFormat);


								    if (hr == S_OK)

								    {

								        DEBUG_LEAVE(dwDataFormat);

								        return dwDataFormat;

								    }

								    else

								    {

								        DEBUG_LEAVE(DATAFORMAT_UNKNOWN);

								        return DATAFORMAT_UNKNOWN;

								    }


								}


								//+---------------------------------------------------------------------------

								//

								//  Method:     CContentAnalyzer::FormatAgreesWithData

								//

								//  Synopsis:

								//

								//  Arguments:  (void)

								//

								//  Returns:    BOOL

								//

								//  History:    8-14-96   AdriaanC (Adriaan Canter) Created

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------

								BOOL CContentAnalyzer::FormatAgreesWithData(DWORD dwFormat)

								{

								    DEBUG_ENTER((DBG_TRANS,

								                Bool,

								                "CContentAnalyzer::FormatAgreesWithData",

								                "this=%#x, %#x",

								                this, dwFormat

								                ));


								    if (dwFormat == DATAFORMAT_TEXT && _fBinary == FALSE

								        || dwFormat == DATAFORMAT_BINARY && _fBinary == TRUE

								        || dwFormat == DATAFORMAT_TEXTORBINARY)

								    {

								        DEBUG_LEAVE(TRUE);

								        return TRUE;

								    }


								    DEBUG_LEAVE(FALSE);

								    return FALSE;

								}


								//+---------------------------------------------------------------------------

								//

								//  Method:     CContentAnalyzer::MatchDWordAtOffset

								//

								//  Synopsis:   Determines if a given magic word is found at

								//              the specified offset.

								//

								//  Arguments:  (DWORD magic, int offset)

								//

								//  Returns:    BOOL

								//

								//  History:    5-25-96   AdriaanC (Adriaan Canter) Created

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------

								BOOL CContentAnalyzer::MatchDWordAtOffset(DWORD magic, int offset)

								{

								    DEBUG_ENTER((DBG_TRANS,

								                Bool,

								                "CContentAnalyzer::MatchDWordAtOffset",

								                "this=%#x, %#x, %d",

								                this, magic, offset

								                ));


								    BOOL bRetVal = TRUE;


								    DWORD dwWord = 0;


								    unsigned char* p = (unsigned char*) _pBuf;


								    if (_cbSample < offset + (int) sizeof(DWORD))

								    {

								        DEBUG_LEAVE(FALSE);

								        return FALSE;

								    }


								    dwWord = (p[offset] << 24)

								        | (p[offset+1] << 16)

								        | (p[offset+2] << 8)

								        |  p[offset+3];


								    if (magic != dwWord)

								    {

								        bRetVal = FALSE;

								    }


								    DEBUG_LEAVE(bRetVal);

								    return bRetVal;

								}


								//+---------------------------------------------------------------------------

								//

								//  Method:     CContentAnalyzer::FindAppFromExt

								//

								//  Synopsis:   Determines an associated application from

								//              a given file extension

								//

								//  Arguments:  (LPSTR pszExt, LPSTR pszCommand (command line))

								//

								//  Returns:    BOOL (Associated Application is found or not)

								//

								//  History:    7-15-96   AdriaanC (Adriaan Canter) Created

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------

								BOOL CContentAnalyzer::FindAppFromExt(LPSTR pszExt, LPSTR pszCommand, DWORD cbCommand)

								{

								    DEBUG_ENTER((DBG_TRANS,

								                Bool,

								                "CContentAnalyzer::FindAppFromExt",

								                "this=%#x, %.80q, %.80q, %d",

								                this, pszExt, pszCommand, cbCommand

								                ));


								    DWORD cbLen, dwType;

								    CHAR szRegPath[MAX_PATH];

								    BOOL fReturn = FALSE;

								    HKEY hMimeKey = NULL;


								    // BUGBUG - Is there a max registry path length?

								    cbLen = MAX_PATH;


								    // Should be a file extension

								    TransAssert((pszExt[0] == '.'));


								    // Open key on extension

								    if (RegOpenKeyEx(HKEY_CLASSES_ROOT, pszExt, 0,

								        KEY_QUERY_VALUE, &hMimeKey) == ERROR_SUCCESS)

								    {

								        // Find file type (txtfile, htmlfile, etc) .

								        // These currently utilize a null key.

								        if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType,

								            (LPBYTE)szRegPath, &cbLen) == ERROR_SUCCESS)

								        {

								            strncat(szRegPath, szApplicationRegistryKey, MAX_PATH - strlen(szRegPath) - 1);


								            HKEY hAppKey = NULL;

								            cbLen = cbCommand;


								            // szRegPath should now look similar to

								            // "txtfile\Shell\Open\Command". Open key on szRegPath

								            if (RegOpenKeyEx(HKEY_CLASSES_ROOT, szRegPath, 0,

								                KEY_QUERY_VALUE, &hAppKey) == ERROR_SUCCESS)

								            {

								                // Find the application command line - again, null key.

								                if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType,

								                    (LPBYTE)pszCommand, &cbLen) == ERROR_SUCCESS)

								                {

								                    // Success

								                    fReturn = TRUE;

								                }

								                RegCloseKey(hAppKey);

								            }


								            else

								            {

								                // check "Shell\\Connect To\command" key - used by SmartTerm


								                // dynamic allocate szRegPath2 so that it won't take

								                // unnecessary stack space - after all, this is not a

								                // common case

								                CHAR* szRegPath2 = NULL;

								                HKEY hAppKey2 = NULL;


								                szRegPath2 = new CHAR[MAX_PATH];

								                if( szRegPath2 )

								                {

								                    if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType,

								                        (LPBYTE)szRegPath2, &cbLen) == ERROR_SUCCESS)

								                    {

								                        strncat(szRegPath2, szApplicationRegistryKey2,

								                            MAX_PATH - strlen(szRegPath2) - 1);

								                    }

								                    else

								                    {

								                        // this should not happen at all

								                        delete [] szRegPath2;

								                        szRegPath2 = NULL;

								                    }

								                }


								                if (szRegPath2 &&

								                    RegOpenKeyEx(HKEY_CLASSES_ROOT, szRegPath2, 0, KEY_QUERY_VALUE, &hAppKey2) == ERROR_SUCCESS)

								                {

								                    if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType,

								                        (LPBYTE)pszCommand, &cbLen) == ERROR_SUCCESS)

								                    {

								                        // Success

								                        fReturn = TRUE;

								                    }

								                    RegCloseKey(hAppKey2);


								                }


								                delete [] szRegPath2;

								            }


								        }

								        RegCloseKey(hMimeKey);

								    }


								    DEBUG_LEAVE(fReturn);

								    return fReturn;

								}

								//+---------------------------------------------------------------------------

								//

								//  Method:     CContentAnalyzer::CheckTextHeaders

								//

								//  Synopsis:

								//

								//

								//  Arguments:  void

								//

								//  Returns:    void

								//

								//  History:    7-23-96   AdriaanC (Adriaan Canter) Created

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------

								BOOL CContentAnalyzer::CheckTextHeaders()

								{

								    DEBUG_ENTER((DBG_TRANS,

								                Bool,

								                "CContentAnalyzer::CheckTextHeaders",

								                "this=%#x",

								                this

								                ));


								    BOOL bRet = TRUE;

								    // application/pdf (Acrobat)

								    if (!StrCmpNC(_pBuf, vszPdfMagic, sizeof(vszPdfMagic) - 1))

								    {

								        _wzMimeType = vwzApplicationPdf;

								    }


								    // application/Postscript

								    else if (!StrCmpNC(_pBuf, vszPostscriptMagic, sizeof(vszPostscriptMagic) - 1))

								    {

								        _wzMimeType = vwzApplicationPostscript;

								    }


								    // text/richtext

								    else if (!StrCmpNC(_pBuf, vszRichTextMagic, sizeof(vszRichTextMagic) - 1))

								    {

								        _wzMimeType = vwzTextRichText;

								    }


								    // application/base64

								    else if (!StrCmpNC(_pBuf, vszBase64Magic, sizeof(vszBase64Magic) - 1))

								    {

								        _wzMimeType = vwzApplicationBase64;

								    }


								    // No matches - assume plain text.

								    else

								    {

								        //_wzMimeType = vwzTextPlain;

								        bRet = FALSE;

								    }


								    DEBUG_LEAVE(bRet);

								    return bRet;


								}


								//+---------------------------------------------------------------------------

								//

								//  Method:     CContentAnalyzer::CheckBinaryHeaders

								//

								//  Synopsis:

								//

								//

								//  Arguments:  void

								//

								//  Returns:    void

								//

								//  History:    7-23-96   AdriaanC (Adriaan Canter) Created

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------

								BOOL CContentAnalyzer::CheckBinaryHeaders()

								{

								    DEBUG_ENTER((DBG_TRANS,

								                Bool,

								                "CContentAnalyzer::CheckBinaryHeaders",

								                "this=%#x",

								                this

								                ));


								    BOOL bRet = TRUE;

								    // image/gif

								    if (!StrCmpNIC(_pBuf, vszGif87Magic, sizeof(vszGif87Magic) - 1)

								        || !StrCmpNIC(_pBuf, vszGif89Magic, sizeof(vszGif89Magic) - 1))

								    {

								        _wzMimeType = vwzImageGif;

								    }


								    // image/pjpeg

								    else if ((BYTE)_pBuf[0] == JPEG_MAGIC_1 && (BYTE)_pBuf[1] == JPEG_MAGIC_2)

								    {

								        _wzMimeType = vwzImagePJpeg;

								    }


								    // img/bmp

								    else if (IsBMP())

								    {

								        _wzMimeType = vwzImageBmp;

								    }


								    // audio/wav

								    else if (MatchDWordAtOffset(RIFF_MAGIC, 0)

								        && MatchDWordAtOffset(WAV_MAGIC, 8))

								    {

								        _wzMimeType = vwzAudioWav;

								    }


								    // audio/basic (.au files)

								    else if (MatchDWordAtOffset(AU_DEC_MAGIC, 0)

								           || MatchDWordAtOffset(AU_SUN_MAGIC, 0)

								           || MatchDWordAtOffset(AU_DEC_INV_MAGIC, 0)

								           || MatchDWordAtOffset(AU_SUN_INV_MAGIC, 0))

								    {

								       _wzMimeType = vwzAudioBasic;

								    }


								    // image/tiff

								    else if (!StrCmpC(_pBuf, vszTiffMagic)) // "MM" followed by a \0

								    {

								        _wzMimeType = vwzImageTiff;

								    }


								    // application/x-msdownload

								    else if (!StrCmpNC(_pBuf, vszExeMagic, sizeof(vszExeMagic) - 1))

								    {

								        _wzMimeType = vwzApplicationMSDownload;

								    }


								    // image/x-png

								    else if (!StrCmpNC(_pBuf, vszPngMagic, sizeof(vszPngMagic) - 1))

								    {

								        _wzMimeType = vwzImagePng;

								    }


								    // image/x-jg

								    else if (!StrCmpNC(_pBuf, vszJGMagic, sizeof(vszJGMagic) - 1)

								        && (int) _pBuf[2] >= 3

								        && (int) _pBuf[2] <= 31

								        && _pBuf[4] == 0)

								    {

								        _wzMimeType = vwzImageJG;

								    }


								    // audio/x-aiff

								    else if (MatchDWordAtOffset(AIFF_INV_MAGIC, 0))

								    {

								       _wzMimeType = vwzAudioAiff;

								    }


								    else if (MatchDWordAtOffset(AIFF_MAGIC, 0) &&

								             ( MatchDWordAtOffset(AIFF_MAGIC_MORE_1, 8) ||

								               MatchDWordAtOffset(AIFF_MAGIC_MORE_2, 8) ) )

								    {

								        //

								        // according to DaveMay, the correct AIFF format would be:

								        // 'FORM....AIFF' or 'FORM....AIFC'

								        // Only check for 'FORM' is incorrect because .sc2 has the

								        // same sig

								        //

								       _wzMimeType = vwzAudioAiff;

								    }


								    // video/avi (or video/x-msvedio)

								    else if (MatchDWordAtOffset(RIFF_MAGIC, 0)

								        && MatchDWordAtOffset(AVI_MAGIC, 8))

								    {

								        _wzMimeType = vwzVideoAvi;

								    }


								    // video/mpeg

								    else if (MatchDWordAtOffset(MPEG_MAGIC, 0)

								            || MatchDWordAtOffset(MPEG_MAGIC_2, 0) )

								    {

								        _wzMimeType = vwzVideoMpeg;

								    }


								    // image/x-emf

								    else if (MatchDWordAtOffset(EMF_MAGIC_1, 0)

								        && MatchDWordAtOffset(EMF_MAGIC_2, 40))

								    {

								        _wzMimeType = vwzImageEmf;

								    }


								    // image/x-wmf

								    else if (MatchDWordAtOffset(WMF_MAGIC, 0))

								    {

								        _wzMimeType = vwzImageWmf;

								    }


								    // application/java

								    else if (MatchDWordAtOffset(JAVA_MAGIC, 0))

								    {

								        _wzMimeType = vwzApplicationJava;

								    }


								    // application/x-zip-compressed

								    else if (!StrCmpNC(_pBuf, vszZipMagic, sizeof(vszZipMagic) - 1))

								    {

								        _wzMimeType = vwzApplicationZipCompressed;

								    }


								    // application/x-compress

								    else if (!StrCmpNC(_pBuf, vszCompressMagic, sizeof(vszCompressMagic) - 1))

								    {

								        _wzMimeType = vwzApplicationCompressed;

								    }


								    // application/x-gzip

								    else if (!StrCmpNC(_pBuf, vszGzipMagic, sizeof(vszGzipMagic) - 1))

								    {

								        _wzMimeType = vwzApplicationGzipCompressed;

								    }


								    // application/x-zip-compressed

								    else if (!StrCmpNC(_pBuf, vszZipMagic, sizeof(vszZipMagic) - 1))

								    {

								        _wzMimeType = vwzApplicationZipCompressed;

								    }


								    // audio/mid

								    else if (!StrCmpC(_pBuf, vszMIDMagic))

								    {

								        _wzMimeType = vwzAudioMID;

								    }


								    // application/pdf (Acrobat)

								    else if (!StrCmpNC(_pBuf, vszPdfMagic, sizeof(vszPdfMagic) - 1))

								    {

								        _wzMimeType = vwzApplicationPdf;

								    }


								    // don't know what it is.

								    else

								    {

								        //_wzMimeType = vwzApplicationOctetStream;

								        bRet = FALSE;

								    }


								    DEBUG_LEAVE(bRet);

								    return bRet;

								}


								//+---------------------------------------------------------------------------

								//

								//  Method:     CContentAnalyzer::FindMimeFromData

								//

								//  Synopsis:   Attempts to guess MIME type from buffer

								//

								//

								//  Arguments:  pBuf, cbSample, wzSuggestedMimeType

								//

								//  Returns:    LPCWSTR (the MIME type guessed)

								//

								//  History:    5-25-96   AdriaanC (Adriaan Canter) Created

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------

								LPCWSTR CContentAnalyzer::FindMimeFromData(LPCWSTR wzFileName, char* pBuf,

								    int cbSample, LPCWSTR wzSuggestedMimeType, DWORD grfFlags)

								{

								    DEBUG_ENTER((DBG_TRANS,

								                Pointer,

								                "CContentAnalyzer::FindMimeFromData",

								                "this=%#x, %.80wq, %.80q, %d, %.80wq, %#x",

								                this, wzFileName, pBuf, cbSample, wzSuggestedMimeType, grfFlags

								                ));


								    BOOL fSampledData = FALSE;

								    BOOL fFoundMimeTypeFromExt = FALSE;


								    CHAR* szFileExt = 0;

								    CHAR szFileName[MAX_PATH];

								    CHAR szMimeTypeFromExt[SZMIMESIZE_MAX];

								    CHAR szCommand[MAX_PATH];

								    CHAR cLastByte;


								    DWORD dwMimeLen = SZMIMESIZE_MAX;

								    DWORD dwExtMimeTypeDataFormat;

								    DWORD dwSuggestedMimeTypeDataFormat;

								    DWORD dwMimeTypeDataFormat;

								    DWORD cbCommand = MAX_PATH;

								    BOOL  fExtensionChecked = FALSE;


								    _grfFlags = grfFlags;


								    // BUGBUG - we can use this information for DBCS.

								    // Remove any info appended to the suggested mime type

								    // such as charset information. This is identified by ';'


								    if (wzSuggestedMimeType)

								    {

								        WCHAR* wptr = wcsstr(wzSuggestedMimeType, L";");

								        if (wptr)

								        {

								            *wptr = L'\0';

								        }

								    }


								    // Check to see if the server is suggesting an unknown mime type

								    dwSuggestedMimeTypeDataFormat = GetDataFormat(wzSuggestedMimeType);

								    if (dwSuggestedMimeTypeDataFormat == DATAFORMAT_UNKNOWN)

								    {

								        // server push returns "multipart" content type

								        // this is not the real mimetype, so we have to sniff

								        // to find out the truth

								        if(    wcsicmp(wzSuggestedMimeType, vwzmultipartmixed)

								            && wcsicmp(wzSuggestedMimeType, vwzmultipartmixedreplace) )

								        {

								            // If so, return the suggested mime type.

								            _wzMimeType = (WCHAR*) wzSuggestedMimeType;


								            DEBUG_LEAVE(_wzMimeType);

								            return _wzMimeType;

								        }

								    }


								/*****

								    // check if we got an extension and extension mime

								    // matches the suggested mime - only for text/plain

								    if (    wzSuggestedMimeType

								        &&  wzFileName

								        && !wcscmp(wzSuggestedMimeType,vwzTextPlain))


								    {

								        fExtensionChecked = TRUE;

								        fFoundMimeTypeFromExt = FindMimeFromExt(

								                                        wzFileName,

								                                        szFileName,

								                                        szMimeTypeFromExt,

								                                        &dwExtMimeTypeDataFormat,

								                                        &szFileExt

								                                        );


								        // If there is a mime type associated with the file

								        // extension then return it.

								        if (   fFoundMimeTypeFromExt

								            && (dwExtMimeTypeDataFormat == dwSuggestedMimeTypeDataFormat)

								            && !wcscmp(wzSuggestedMimeType,_wzMimeTypeFromExt)

								            )

								        {

								            // If so, return the suggested mime type.

								            _wzMimeType = (WCHAR*) wzSuggestedMimeType;

								            return _wzMimeType;

								        }

								    }

								*****/


								    // Not enough data to tell anything

								    if (!pBuf || cbSample <= 0)

								    {

								        _wzMimeType = (WCHAR*) wzSuggestedMimeType;


								        DEBUG_LEAVE(_wzMimeType);

								        return _wzMimeType;

								    }


								    _pBuf = pBuf;

								    _cbSample = (cbSample <= SAMPLE_SIZE) ? cbSample : SAMPLE_SIZE;


								    // Save off last character. Null terminate the buffer.

								    cLastByte = _pBuf[_cbSample - 1];

								    _pBuf[_cbSample - 1] = '\0';


								    // Common cases first - check the server indicated mime type

								    // for text/html, image/gif or image/[p]jpeg.

								    if (   wzSuggestedMimeType

								        && !StrCmpICW(wzSuggestedMimeType, vwzTextHTML))

								    {

								        // Sample the data. This routine also checks for the following

								        // mime types which require extended scanning through the buffer:

								        // text/html, image/x-xbitmap, application/macbinhex

								        SampleData();

								        fSampledData = TRUE;


								        if (_fFoundHTML)

								        {

								            _wzMimeType = vwzTextHTML;

								           goto exit;

								        }

								    }


								    // image/gif

								    else if (wzSuggestedMimeType

								        && !wcsicmp(wzSuggestedMimeType, vwzImageGif))

								    {

								        if (!StrCmpNIC(_pBuf, vszGif87Magic, sizeof(vszGif87Magic) - 1)

								           || !StrCmpNIC(_pBuf, vszGif89Magic, sizeof(vszGif89Magic) - 1))

								        {

								            _wzMimeType = vwzImageGif;

								            goto exit;

								        }

								    }


								    // image/jpeg or image/pjpeg

								    else if (wzSuggestedMimeType

								        && (!wcsicmp(wzSuggestedMimeType, vwzImagePJpeg)

								        || !wcsicmp(wzSuggestedMimeType, vwzImageJpeg)))

								    {

								        if ((BYTE)_pBuf[0] == JPEG_MAGIC_1 && (BYTE)_pBuf[1] == JPEG_MAGIC_2)

								        {

								            _wzMimeType = vwzImagePJpeg;

								            goto exit;

								        }

								    }


								    //

								    // ********************** BEGIN HACK *******************************

								    //

								    // we will remove this once tridents defined the unique signature

								    // for .hta and .htc format

								    //

								    // DanpoZ (98.08.12) - refer to IE5 SUPERHOT bug 35478

								    //

								    if (wzFileName )

								    {


								        CHAR* szExt;

								        CHAR szFile[MAX_PATH];

								        W2A(wzFileName, szFile, MAX_PATH);


								        if( grfFlags & FMFD_URLASFILENAME )

								        {

								            //

								            // remove teh security context '\1' and replace it with '\0'

								            // but only do this when we are using URL to replace the filename

								            //

								            CHAR* pch = StrChr(szFile, '\1');

								            if (pch)

								            {

								                *pch = '\0';

								            }

								        }


								        szExt = FindFileExtension(szFile);

								        if( szExt &&

								            ( !StrCmpNIC(szExt, ".hta", sizeof(".hta") - 1) ||

								              !StrCmpNIC(szExt, ".htc", sizeof(".htc") - 1)  ) )

								        {

								            fExtensionChecked = TRUE;

								            fFoundMimeTypeFromExt = FindMimeFromExt(

								                                            wzFileName,

								                                            szFileName,

								                                            szMimeTypeFromExt,

								                                            &dwExtMimeTypeDataFormat,

								                                            &szFileExt

								                                            );


								            // If there is a mime type associated with the file

								            // extension then return it.

								            if (fFoundMimeTypeFromExt)

								            {

								                _wzMimeType = _wzMimeTypeFromExt;

								                goto exit;

								            }

								        }

								    }

								    //

								    // ********************** END HACK *********************************

								    //


								    // One of the following is true:


								    // 1) The server indicated a common mime type (html, gif or jpeg),

								    //    however, verification failed.

								    // 2) The server indicated an ambiguous mime type or

								    //    a known, but uncommon mime type.


								    // If not done so already, sample the data.

								    if (!fSampledData)

								    {

								        SampleData();

								        fSampledData = TRUE;

								    }


								    // Return any mime type that was positively

								    // identified during the data sampling

								    if( _fFoundCDF )

								    {

								        _wzMimeType = vwzApplicationCDF;

								        goto exit;

								    }

								    else if( _fFoundXML)

								    {

								        _wzMimeType = vwzTextXML;

								        goto exit;

								    }

								    else if (_fFoundHTML)

								    {

								        _wzMimeType = vwzTextHTML;

								        goto exit;

								    }

								    else if (_fFoundXBitMap)

								    {

								        _wzMimeType = vwzImageXBitmap;

								        goto exit;

								    }

								    else if (_fFoundMacBinhex)

								    {

								        _wzMimeType = vwzApplicationMacBinhex;

								        goto exit;

								    }

								    else if( _fFoundTextScriptlet )

								    {

								        _wzMimeType = vwzTextScriptlet;

								        goto exit;

								    }


								    if(    !_fFoundCDF

								        && wzSuggestedMimeType

								        && !wcsicmp(wzSuggestedMimeType, vwzApplicationNETCDF)

								      )

								    {

								        // only overwrite application/x-netcdf with aplication/x-cdf

								        _wzMimeType = vwzApplicationNETCDF;

								        goto exit;

								    }


								    // Decide if buffer is primarily text or binary. Conduct

								    // pattern matching to determine a mime type depending on the

								    // finding.

								    if (!_cbCtrl || _cbText + _cbFF >= 16 * (_cbCtrl + _cbHigh))

								    {

								        _fBinary = FALSE;

								        if( !CheckTextHeaders() )

								        {

								            if( !CheckBinaryHeaders() )

								            {

								                _wzMimeType = vwzTextPlain;

								            }

								        }

								    }

								    else

								    {

								        _fBinary = TRUE;

								        if( !CheckBinaryHeaders() )

								        {

								            if( !CheckTextHeaders() )

								            {

								                _wzMimeType = vwzApplicationOctetStream;

								            }

								        }

								    }


								    // Determine format of the mime type from data

								    dwMimeTypeDataFormat = GetDataFormat(_wzMimeType);


								    // If the format of the mime type found from examining the data

								    // is not ambiguous, then return this mime type.

								    if (dwMimeTypeDataFormat != DATAFORMAT_AMBIGUOUS)

								    {

								        goto exit;

								    }


								    // Examination of data is inconclusive.

								    else

								    {

								        // If the suggested mime type is not ambiguous and does

								        // not conflict with the data format then return it.

								        if (dwSuggestedMimeTypeDataFormat != DATAFORMAT_AMBIGUOUS

								            && FormatAgreesWithData(dwSuggestedMimeTypeDataFormat))

								        {

								            _wzMimeType = (WCHAR*) wzSuggestedMimeType;

								            goto exit;

								        }


								        // Otherwise, attempt to obtain a mime type from any

								        // file extension. If none is found, but an application

								        // is registered for the file extension, return

								        // application/octet-stream.


								        // If there is a file extension, find any

								        // associated mime type.

								        if (wzFileName && !fExtensionChecked)

								        {

								            fExtensionChecked = TRUE;


								            fFoundMimeTypeFromExt = FindMimeFromExt(

								                                            wzFileName,

								                                            szFileName,

								                                            szMimeTypeFromExt,

								                                            &dwExtMimeTypeDataFormat,

								                                            &szFileExt

								                                            );

								        }


								        // If there is a mime type associated with the file

								        // extension then return it.

								        if (fFoundMimeTypeFromExt)

								        {

								            if (dwExtMimeTypeDataFormat == DATAFORMAT_UNKNOWN)

								            {

								                _wzMimeType = _wzMimeTypeFromExt;

								                goto exit;

								            }

								            else

								            {

								                goto exit;

								            }

								        }


								        // Otherwise, check to see if there is an associated application.

								        if (szFileExt && FindAppFromExt(szFileExt, szCommand, cbCommand))

								        {

								            // Found an associated application.

								            _wzMimeType = vwzApplicationOctetStream;

								            goto exit;

								        }


								        // No suggested mime type, no mime type from file extension

								        // and no registered application found. Fall through and return

								        // mime type found from the data

								    }


								    exit:

								        // Replace the null termination with

								        // the original character.

								        _pBuf[_cbSample - 1] = cLastByte;


								        DEBUG_LEAVE(_wzMimeType);

								        return _wzMimeType;

								}


								//+---------------------------------------------------------------------------

								//

								//  Method:     CContentAnalyzer::FindMimeFromExt

								//

								//  Synopsis:

								//

								//  Arguments:  [wzFileName] --

								//              [szFileName] --

								//              [szMimeTypeFromExt] --

								//              [pdwExtMimeTypeDataFormat] --

								//

								//  Returns:

								//

								//  History:    5-25-96   AdriaanC (Adriaan Canter)

								//              1-28-1997   JohannP (Johann Posch)   made separate function

								//

								//  Notes:

								//

								//----------------------------------------------------------------------------

								BOOL CContentAnalyzer::FindMimeFromExt(

								                        LPCWSTR wzFileName,

								                        CHAR *szFileName,

								                        CHAR *szMimeTypeFromExt,

								                        DWORD *pdwExtMimeTypeDataFormat,

								                        CHAR  **ppszFileExt)

								{

								    DEBUG_ENTER((DBG_TRANS,

								                Bool,

								                "CContentAnalyzer::FindMimeFromExt",

								                "this=%#x, %.80wq, %.80q, %.80q, %#x, %#x",

								                this, wzFileName, szFileName, szMimeTypeFromExt, pdwExtMimeTypeDataFormat, ppszFileExt

								                ));


								    BOOL fFoundMimeTypeFromExt = FALSE;

								    UrlMkAssert((wzFileName && szFileName && pdwExtMimeTypeDataFormat));

								    DWORD dwMimeLen = SZMIMESIZE_MAX;

								    CHAR* szFileExt = 0;


								    // If there is a file extension, find any

								    // associated mime type.

								    W2A(wzFileName, szFileName, MAX_PATH);

								    szFileExt = FindFileExtension(szFileName);

								    if (szFileExt && GetMimeFromExt(szFileExt,

								        szMimeTypeFromExt, &dwMimeLen) == ERROR_SUCCESS)

								    {

								        fFoundMimeTypeFromExt = TRUE;

								        A2W(szMimeTypeFromExt, _wzMimeTypeFromExt, SZMIMESIZE_MAX);

								        *pdwExtMimeTypeDataFormat = GetDataFormat(_wzMimeTypeFromExt);

								    }

								    if (szFileExt && ppszFileExt)

								    {

								        *ppszFileExt = szFileExt;

								    }


								    DEBUG_LEAVE(fFoundMimeTypeFromExt);

								    return fFoundMimeTypeFromExt;

								}