|
|
//+---------------------------------------------------------------------------
//
// Microsoft Windows
// Copyright (C) Microsoft Corporation, 1992 - 1996.
//
// File: datasnif.cxx
//
// Contents: Stream Mime type checking (attempts to guess the MIME type
// of a buffer by simple pattern matching).
//
// Classes: CContentAnalyzer
//
// Functions: private:
// CContentAnalyzer::SampleData
// CContentAnalyzer::IsBMP
// CContentAnalyzer::GetDataFormat
// CContentAnalyzer::FormatAgreesWithData
// CContentAnalyzer::MatchDWordAtOffset
// CContentAnalyzer::FindAppFromExt
// CContentAnalyzer::CheckTextHeaders
// CContentAnalyzer::CheckBinaryHeaders
//
// public:
// CContentAnalyzer::FindMimeFromData
// ::FindMimeFromData
//
//
// History: 05-25-96 AdriaanC (Adriaan Canter) Created
// 07-16-96 AdriaanC (Adriaan Canter) Modified
// 08-06-96 AdriaanC (Adriaan Canter) Modified
// 08-14-96 AdriaanC (Adriaan Canter) Modified
//
//----------------------------------------------------------------------------
#include <trans.h>
#include "datasnif.hxx"
#include <shlwapip.h>
#ifdef UNIX
#include <mainwin.h>
#endif
PerfDbgTag(tagDataSniff, "Urlmon", "Log DataSniff", DEB_DATA);
// Max no. bytes to look at
#define SAMPLE_SIZE 256
// Registry Key for app/fileext associations
#define szApplicationRegistryKey "\\Shell\\Open\\Command"
#define szApplicationRegistryKey2 "\\Shell\\Connect To\\Command"
#define szMimeRegistryKey "MIME\\Database\\Content Type\\"
// Magic header words
#define AU_SUN_MAGIC 0x2e736e64
#define AU_SUN_INV_MAGIC 0x646e732e
#define AU_DEC_MAGIC 0x2e736400
#define AU_DEC_INV_MAGIC 0x0064732e
#define AIFF_MAGIC 0x464f524d
#define AIFF_INV_MAGIC 0x4d524f46
#define AIFF_MAGIC_MORE_1 'AIFF'
#define AIFF_MAGIC_MORE_2 'AIFC'
#define RIFF_MAGIC 0x52494646
#define AVI_MAGIC 0x41564920
#define WAV_MAGIC 0x57415645
#define JAVA_MAGIC 0xcafebabe
#define MPEG_MAGIC 0x000001b3
#define MPEG_MAGIC_2 0x000001ba
#define EMF_MAGIC_1 0x01000000
#define EMF_MAGIC_2 0x20454d46
#define WMF_MAGIC 0xd7cdc69a
#define JPEG_MAGIC_1 0xFF
#define JPEG_MAGIC_2 0xD8
// Magic header text
CHAR vszRichTextMagic[] = "{\\rtf"; CHAR vszPostscriptMagic[] = "%!"; CHAR vszBinHexMagic[] = "onverted with BinHex"; CHAR vszBase64Magic[] = "begin"; CHAR vszGif87Magic[] = "GIF87"; CHAR vszGif89Magic[] = "GIF89"; CHAR vszTiffMagic[] = "MM"; CHAR vszBmpMagic[] = "BM"; CHAR vszZipMagic[] = "PK"; CHAR vszExeMagic[] = "MZ"; CHAR vszPngMagic[] = "\211PNG\r\n\032\n"; CHAR vszCompressMagic[] = "\037\235"; CHAR vszGzipMagic[] = "\037\213"; CHAR vszXbmMagic1[] = "define"; CHAR vszXbmMagic2[] = "width"; CHAR vszXbmMagic3[] = "bits"; CHAR vszPdfMagic[] = "%PDF"; CHAR vszJGMagic[] = "JG"; CHAR vszMIDMagic[] = "MThd";
// null MIME type
WCHAR vwzNULL[] = L"(null)";
// 7 bit MIME Types
WCHAR vwzTextPlain[] = L"text/plain"; WCHAR vwzTextRichText[] = L"text/richtext"; WCHAR vwzImageXBitmap[] = L"image/x-xbitmap"; WCHAR vwzApplicationPostscript[] = L"application/postscript"; WCHAR vwzApplicationBase64[] = L"application/base64"; WCHAR vwzApplicationMacBinhex[] = L"application/macbinhex40"; WCHAR vwzApplicationPdf[] = L"application/pdf"; WCHAR vwzApplicationCDF[] = L"application/x-cdf"; WCHAR vwzApplicationNETCDF[] = L"application/x-netcdf"; WCHAR vwzmultipartmixedreplace[] = L"multipart/x-mixed-replace"; WCHAR vwzmultipartmixed[] = L"multipart/mixed"; WCHAR vwzTextScriptlet[] = L"text/scriptlet"; WCHAR vwzTextComponent[] = L"text/x-component"; WCHAR vwzTextXML[] = L"text/xml"; WCHAR vwzApplicationHTA[] = L"application/hta";
// 8 bit MIME types
WCHAR vwzAudioAiff[] = L"audio/x-aiff"; WCHAR vwzAudioBasic[] = L"audio/basic"; WCHAR vwzAudioWav[] = L"audio/wav"; WCHAR vwzAudioMID[] = L"audio/mid"; WCHAR vwzImageGif[] = L"image/gif"; WCHAR vwzImagePJpeg[] = L"image/pjpeg"; WCHAR vwzImageJpeg[] = L"image/jpeg"; WCHAR vwzImageTiff[] = L"image/tiff"; WCHAR vwzImagePng[] = L"image/x-png"; WCHAR vwzImageBmp[] = L"image/bmp"; WCHAR vwzImageJG[] = L"image/x-jg"; WCHAR vwzImageEmf[] = L"image/x-emf"; WCHAR vwzImageWmf[] = L"image/x-wmf"; WCHAR vwzVideoAvi[] = L"video/avi"; WCHAR vwzVideoMpeg[] = L"video/mpeg"; WCHAR vwzApplicationCompressed[] = L"application/x-compressed"; WCHAR vwzApplicationZipCompressed[] = L"application/x-zip-compressed"; WCHAR vwzApplicationGzipCompressed[] = L"application/x-gzip-compressed"; WCHAR vwzApplicationJava[] = L"application/java"; WCHAR vwzApplicationMSDownload[] = L"application/x-msdownload";
// 7 or 8 bit MIME types
WCHAR vwzTextHTML[] = L"text/html"; WCHAR vwzApplicationOctetStream[] = L"application/octet-stream";
//+---------------------------------------------------------------------------
//
// Method: CContentAnalyzer::SampleData
//
// Synopsis:
//
// Arguments: (void)
//
// Returns: (void)
//
// History: 5-25-96 AdriaanC (Adriaan Canter) Created
//
// Notes:
//
//----------------------------------------------------------------------------
void CContentAnalyzer::SampleData() { DEBUG_ENTER((DBG_TRANS, None, "CContentAnalyzer::SampleData", "this=%#x", this )); BOOL fFoundFirstXBitMapTag = FALSE; BOOL fFoundSecondXBitMapTag = FALSE; BOOL fFoundAsciiChar = FALSE;
int nHTMLConfidence = 0;
unsigned char *p = (unsigned char*) _pBuf;
_cbNL = _cbCR = _cbFF = _cbText = _cbCtrl = _cbHigh = 0;
// Count incidence of character types.
for (int i = 0; i < _cbSample - 1; i++) { fFoundAsciiChar = FALSE;
if (*p == '\n') // new line
{ _cbNL++; } else if (*p == '\r') // carriage return
{ _cbCR++; } else if (*p == '\f') // form feed
{ _cbFF++; } else if (*p == '\t') // tab
{ _cbText++; } else if (*p < 32) // control character
{ _cbCtrl++; } else if (*p >= 32 && *p < 128) // regular text
{ _cbText++; fFoundAsciiChar = TRUE; } else // extended text
{ _cbHigh++; }
if (fFoundAsciiChar) { // check for html
if (*p == '<') { if (!StrCmpNIC((char*) p+1, "?XML", sizeof("?XML") - 1) && ( (*(p+5) == ':') || (*(p+5) == ' ') || (*(p+5) == '\t')) ) { _fFoundXML = TRUE; // don't break : for CDF
}
if (!StrCmpNIC((char*) p+1, "SCRIPTLET", sizeof("SCRIPTLET") - 1)) { _fFoundTextScriptlet = TRUE; break; }
if (!StrCmpNIC((char*) p+1, "HTML", sizeof("HTML") - 1) || !StrCmpNIC((char*) p+1, "HEAD", sizeof("HEAD") - 1) || !StrCmpNIC((char*) p+1, "TITLE", sizeof("TITLE") - 1) || !StrCmpNIC((char*) p+1, "BODY", sizeof("BODY") - 1) || !StrCmpNIC((char*) p+1, "SCRIPT", sizeof("SCRIPT") - 1) || !StrCmpNIC((char*) p+1, "A HREF", sizeof("A HREF") - 1) || !StrCmpNIC((char*) p+1, "PRE", sizeof("PRE") - 1) || !StrCmpNIC((char*) p+1, "IMG", sizeof("IMG") - 1) || !StrCmpNIC((char*) p+1, "PLAINTEXT", sizeof("PLAINTEXT") - 1) || !StrCmpNIC((char*) p+1, "TABLE", sizeof("TABLE") - 1)) { _fFoundHTML = TRUE; break; } else if ( !StrCmpNIC((char*) p+1, "HR", sizeof("HR") - 1) || !StrCmpNIC((char*) p+1, "A", sizeof("A") - 1) || !StrCmpNIC((char*) p+1, "/A", sizeof("/A") - 1) || !StrCmpNIC((char*) p+1, "B", sizeof("B") - 1) || !StrCmpNIC((char*) p+1, "/B", sizeof("/B") - 1) || !StrCmpNIC((char*) p+1, "P", sizeof("P") - 1) || !StrCmpNIC((char*) p+1, "/P", sizeof("/P") - 1) || !StrCmpNIC((char*) p+1, "!--", sizeof("!--") - 1) ) { //
// In order for this branch to identify this is HTML
// We have to make sure:
// 1. some HTML control char exists
// 2. We've scanned the whole data block
// 3. 2/3 of the data should be text
//
nHTMLConfidence += 50; if ( nHTMLConfidence >= 100 && i == _cbSample - 1 && _cbText >= ((_cbSample * 2) / 3) ) { _fFoundHTML = TRUE; break; } } if (!StrCmpNIC((char*) p+1, "CHANNEL", sizeof("CHANNEL") - 1)) { _fFoundCDF = TRUE; break; } } else if (!StrCmpNIC((char*) p, "-->", sizeof("-->") - 1)) { // comment begin
// I really want to make sure that most of the
// char are printable
// potential issue: International code page?
nHTMLConfidence += 50; if ( (nHTMLConfidence >= 100) && (i == _cbSample - 1 ) && (_cbText > (_cbSample * 2 /3) ) ) { _fFoundHTML = TRUE; break; } } // check for xbitmap
else if (*p == '#') { if (!StrCmpNC((char*) p+1, vszXbmMagic1, sizeof(vszXbmMagic1) - 1)) fFoundFirstXBitMapTag = TRUE; } else if (*p == '_' && fFoundSecondXBitMapTag) { if (!StrCmpNC((char*) p+1, vszXbmMagic3, sizeof(vszXbmMagic3) - 1)) { _fFoundXBitMap = TRUE; break; } } else if (*p == '_' && fFoundFirstXBitMapTag) { if (!StrCmpNC((char*) p+1, vszXbmMagic2, sizeof(vszXbmMagic2) - 1)) fFoundSecondXBitMapTag = TRUE; }
// MacBinhex
else if (*p == 'c') { if (!StrCmpNC((char*) p+1, vszBinHexMagic, sizeof(vszBinHexMagic) - 1)) { _fFoundMacBinhex = TRUE; break; } }
} p++; }
DEBUG_LEAVE(0); }
//+---------------------------------------------------------------------------
//
// Method: CContentAnalyzer::IsBMP
//
// Synopsis:
//
// Arguments: (void)
//
// Returns: BOOL
//
// History: 5-25-96 AdriaanC (Adriaan Canter) Created
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::IsBMP() { DEBUG_ENTER((DBG_TRANS, Bool, "CContentAnalyzer::IsBMP", "this=%#x", this )); BOOL bRetVal = TRUE;
BITMAPFILEHEADER UNALIGNED *pBMFileHdr;
if (_cbSample < 2) { bRetVal = FALSE; }
// Check header
if (StrCmpNC(_pBuf, vszBmpMagic, sizeof(vszBmpMagic) - 1)) { bRetVal = FALSE; }
// Sample size needs to be big enough.
if (_cbSample < sizeof(BITMAPFILEHEADER)) { bRetVal = FALSE; }
pBMFileHdr = (BITMAPFILEHEADER*)(_pBuf);
#ifdef UNIX
/* Use 14 on Unix, because we want the size without the padding
* done on Unix. sizeof(BITMAPFILEHEADER) = 16 on Unix with padding */ #define UNIX_BITMAP_HEADER_SIZE 14
BITMAPFILEHEADER bmFileHeader;
if(MwReadBITMAPFILEHEADER((LPBYTE)_pBuf, UNIX_BITMAP_HEADER_SIZE, &bmFileHeader)) pBMFileHdr = &bmFileHeader;
#endif /* UNIX */
// The reserved fields must be set to 0
if (pBMFileHdr->bfReserved1!=0 || pBMFileHdr->bfReserved2!=0) { bRetVal = FALSE; }
DEBUG_LEAVE(bRetVal); return bRetVal; }
//+---------------------------------------------------------------------------
//
// Method: CContentAnalyzer::GetDataFormat
//
// Synopsis:
//
// Arguments: (WCHAR* wzMimeType)
//
// Returns: BOOL dwDataFormat
//
// History: 7-21-96 AdriaanC (Adriaan Canter) Created
//
// Notes:
//
//----------------------------------------------------------------------------
DWORD CContentAnalyzer::GetDataFormat(LPCWSTR wzMimeType) { DEBUG_ENTER((DBG_TRANS, Dword, "CContentAnalyzer::GetDataFormat", "this=%#x, %.80wq", this, wzMimeType )); CLIPFORMAT cfFormat; DATAFORMAT dwDataFormat; HRESULT hr;
if (!wzMimeType) { DEBUG_LEAVE(DATAFORMAT_AMBIGUOUS); return DATAFORMAT_AMBIGUOUS; }
if( !_wcsicmp(wzMimeType, vwzNULL) ) { DEBUG_LEAVE(DATAFORMAT_AMBIGUOUS); return DATAFORMAT_AMBIGUOUS; }
hr = FindMediaTypeFormat(wzMimeType, &cfFormat, (DWORD *)&dwDataFormat);
if (hr == S_OK) { DEBUG_LEAVE(dwDataFormat); return dwDataFormat; } else { DEBUG_LEAVE(DATAFORMAT_UNKNOWN); return DATAFORMAT_UNKNOWN; }
}
//+---------------------------------------------------------------------------
//
// Method: CContentAnalyzer::FormatAgreesWithData
//
// Synopsis:
//
// Arguments: (void)
//
// Returns: BOOL
//
// History: 8-14-96 AdriaanC (Adriaan Canter) Created
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::FormatAgreesWithData(DWORD dwFormat) { DEBUG_ENTER((DBG_TRANS, Bool, "CContentAnalyzer::FormatAgreesWithData", "this=%#x, %#x", this, dwFormat )); if (dwFormat == DATAFORMAT_TEXT && _fBinary == FALSE || dwFormat == DATAFORMAT_BINARY && _fBinary == TRUE || dwFormat == DATAFORMAT_TEXTORBINARY) { DEBUG_LEAVE(TRUE); return TRUE; }
DEBUG_LEAVE(FALSE); return FALSE; }
//+---------------------------------------------------------------------------
//
// Method: CContentAnalyzer::MatchDWordAtOffset
//
// Synopsis: Determines if a given magic word is found at
// the specified offset.
//
// Arguments: (DWORD magic, int offset)
//
// Returns: BOOL
//
// History: 5-25-96 AdriaanC (Adriaan Canter) Created
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::MatchDWordAtOffset(DWORD magic, int offset) { DEBUG_ENTER((DBG_TRANS, Bool, "CContentAnalyzer::MatchDWordAtOffset", "this=%#x, %#x, %d", this, magic, offset )); BOOL bRetVal = TRUE;
DWORD dwWord = 0;
unsigned char* p = (unsigned char*) _pBuf;
if (_cbSample < offset + (int) sizeof(DWORD)) { DEBUG_LEAVE(FALSE); return FALSE; }
dwWord = (p[offset] << 24) | (p[offset+1] << 16) | (p[offset+2] << 8) | p[offset+3];
if (magic != dwWord) { bRetVal = FALSE; }
DEBUG_LEAVE(bRetVal); return bRetVal; }
//+---------------------------------------------------------------------------
//
// Method: CContentAnalyzer::FindAppFromExt
//
// Synopsis: Determines an associated application from
// a given file extension
//
// Arguments: (LPSTR pszExt, LPSTR pszCommand (command line))
//
// Returns: BOOL (Associated Application is found or not)
//
// History: 7-15-96 AdriaanC (Adriaan Canter) Created
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::FindAppFromExt(LPSTR pszExt, LPSTR pszCommand, DWORD cbCommand) { DEBUG_ENTER((DBG_TRANS, Bool, "CContentAnalyzer::FindAppFromExt", "this=%#x, %.80q, %.80q, %d", this, pszExt, pszCommand, cbCommand )); DWORD cbLen, dwType; CHAR szRegPath[MAX_PATH]; BOOL fReturn = FALSE; HKEY hMimeKey = NULL;
// BUGBUG - Is there a max registry path length?
cbLen = MAX_PATH;
// Should be a file extension
TransAssert((pszExt[0] == '.'));
// Open key on extension
if (RegOpenKeyEx(HKEY_CLASSES_ROOT, pszExt, 0, KEY_QUERY_VALUE, &hMimeKey) == ERROR_SUCCESS) { // Find file type (txtfile, htmlfile, etc) .
// These currently utilize a null key.
if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType, (LPBYTE)szRegPath, &cbLen) == ERROR_SUCCESS) { strncat(szRegPath, szApplicationRegistryKey, MAX_PATH - strlen(szRegPath) - 1);
HKEY hAppKey = NULL; cbLen = cbCommand;
// szRegPath should now look similar to
// "txtfile\Shell\Open\Command". Open key on szRegPath
if (RegOpenKeyEx(HKEY_CLASSES_ROOT, szRegPath, 0, KEY_QUERY_VALUE, &hAppKey) == ERROR_SUCCESS) { // Find the application command line - again, null key.
if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType, (LPBYTE)pszCommand, &cbLen) == ERROR_SUCCESS) { // Success
fReturn = TRUE; } RegCloseKey(hAppKey); }
else { // check "Shell\\Connect To\command" key - used by SmartTerm
// dynamic allocate szRegPath2 so that it won't take
// unnecessary stack space - after all, this is not a
// common case
CHAR* szRegPath2 = NULL; HKEY hAppKey2 = NULL;
szRegPath2 = new CHAR[MAX_PATH]; if( szRegPath2 ) { if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType, (LPBYTE)szRegPath2, &cbLen) == ERROR_SUCCESS) { strncat(szRegPath2, szApplicationRegistryKey2, MAX_PATH - strlen(szRegPath2) - 1); } else { // this should not happen at all
delete [] szRegPath2; szRegPath2 = NULL; } }
if (szRegPath2 && RegOpenKeyEx(HKEY_CLASSES_ROOT, szRegPath2, 0, KEY_QUERY_VALUE, &hAppKey2) == ERROR_SUCCESS) { if (RegQueryValueEx(hMimeKey, NULL, NULL, &dwType, (LPBYTE)pszCommand, &cbLen) == ERROR_SUCCESS) { // Success
fReturn = TRUE; } RegCloseKey(hAppKey2);
}
delete [] szRegPath2; }
} RegCloseKey(hMimeKey); }
DEBUG_LEAVE(fReturn); return fReturn; } //+---------------------------------------------------------------------------
//
// Method: CContentAnalyzer::CheckTextHeaders
//
// Synopsis:
//
//
// Arguments: void
//
// Returns: void
//
// History: 7-23-96 AdriaanC (Adriaan Canter) Created
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::CheckTextHeaders() { DEBUG_ENTER((DBG_TRANS, Bool, "CContentAnalyzer::CheckTextHeaders", "this=%#x", this )); BOOL bRet = TRUE; // application/pdf (Acrobat)
if (!StrCmpNC(_pBuf, vszPdfMagic, sizeof(vszPdfMagic) - 1)) { _wzMimeType = vwzApplicationPdf; }
// application/Postscript
else if (!StrCmpNC(_pBuf, vszPostscriptMagic, sizeof(vszPostscriptMagic) - 1)) { _wzMimeType = vwzApplicationPostscript; }
// text/richtext
else if (!StrCmpNC(_pBuf, vszRichTextMagic, sizeof(vszRichTextMagic) - 1)) { _wzMimeType = vwzTextRichText; }
// application/base64
else if (!StrCmpNC(_pBuf, vszBase64Magic, sizeof(vszBase64Magic) - 1)) { _wzMimeType = vwzApplicationBase64; }
// No matches - assume plain text.
else { //_wzMimeType = vwzTextPlain;
bRet = FALSE; }
DEBUG_LEAVE(bRet); return bRet;
}
//+---------------------------------------------------------------------------
//
// Method: CContentAnalyzer::CheckBinaryHeaders
//
// Synopsis:
//
//
// Arguments: void
//
// Returns: void
//
// History: 7-23-96 AdriaanC (Adriaan Canter) Created
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::CheckBinaryHeaders() { DEBUG_ENTER((DBG_TRANS, Bool, "CContentAnalyzer::CheckBinaryHeaders", "this=%#x", this )); BOOL bRet = TRUE; // image/gif
if (!StrCmpNIC(_pBuf, vszGif87Magic, sizeof(vszGif87Magic) - 1) || !StrCmpNIC(_pBuf, vszGif89Magic, sizeof(vszGif89Magic) - 1)) { _wzMimeType = vwzImageGif; }
// image/pjpeg
else if ((BYTE)_pBuf[0] == JPEG_MAGIC_1 && (BYTE)_pBuf[1] == JPEG_MAGIC_2) { _wzMimeType = vwzImagePJpeg; }
// img/bmp
else if (IsBMP()) { _wzMimeType = vwzImageBmp; }
// audio/wav
else if (MatchDWordAtOffset(RIFF_MAGIC, 0) && MatchDWordAtOffset(WAV_MAGIC, 8)) { _wzMimeType = vwzAudioWav; }
// audio/basic (.au files)
else if (MatchDWordAtOffset(AU_DEC_MAGIC, 0) || MatchDWordAtOffset(AU_SUN_MAGIC, 0) || MatchDWordAtOffset(AU_DEC_INV_MAGIC, 0) || MatchDWordAtOffset(AU_SUN_INV_MAGIC, 0)) { _wzMimeType = vwzAudioBasic; }
// image/tiff
else if (!StrCmpC(_pBuf, vszTiffMagic)) // "MM" followed by a \0
{ _wzMimeType = vwzImageTiff; }
// application/x-msdownload
else if (!StrCmpNC(_pBuf, vszExeMagic, sizeof(vszExeMagic) - 1)) { _wzMimeType = vwzApplicationMSDownload; }
// image/x-png
else if (!StrCmpNC(_pBuf, vszPngMagic, sizeof(vszPngMagic) - 1)) { _wzMimeType = vwzImagePng; }
// image/x-jg
else if (!StrCmpNC(_pBuf, vszJGMagic, sizeof(vszJGMagic) - 1) && (int) _pBuf[2] >= 3 && (int) _pBuf[2] <= 31 && _pBuf[4] == 0) { _wzMimeType = vwzImageJG; }
// audio/x-aiff
else if (MatchDWordAtOffset(AIFF_INV_MAGIC, 0)) { _wzMimeType = vwzAudioAiff; }
else if (MatchDWordAtOffset(AIFF_MAGIC, 0) && ( MatchDWordAtOffset(AIFF_MAGIC_MORE_1, 8) || MatchDWordAtOffset(AIFF_MAGIC_MORE_2, 8) ) ) { //
// according to DaveMay, the correct AIFF format would be:
// 'FORM....AIFF' or 'FORM....AIFC'
// Only check for 'FORM' is incorrect because .sc2 has the
// same sig
//
_wzMimeType = vwzAudioAiff; }
// video/avi (or video/x-msvedio)
else if (MatchDWordAtOffset(RIFF_MAGIC, 0) && MatchDWordAtOffset(AVI_MAGIC, 8)) { _wzMimeType = vwzVideoAvi; }
// video/mpeg
else if (MatchDWordAtOffset(MPEG_MAGIC, 0) || MatchDWordAtOffset(MPEG_MAGIC_2, 0) ) { _wzMimeType = vwzVideoMpeg; }
// image/x-emf
else if (MatchDWordAtOffset(EMF_MAGIC_1, 0) && MatchDWordAtOffset(EMF_MAGIC_2, 40)) { _wzMimeType = vwzImageEmf; }
// image/x-wmf
else if (MatchDWordAtOffset(WMF_MAGIC, 0)) { _wzMimeType = vwzImageWmf; }
// application/java
else if (MatchDWordAtOffset(JAVA_MAGIC, 0)) { _wzMimeType = vwzApplicationJava; }
// application/x-zip-compressed
else if (!StrCmpNC(_pBuf, vszZipMagic, sizeof(vszZipMagic) - 1)) { _wzMimeType = vwzApplicationZipCompressed; }
// application/x-compress
else if (!StrCmpNC(_pBuf, vszCompressMagic, sizeof(vszCompressMagic) - 1)) { _wzMimeType = vwzApplicationCompressed; }
// application/x-gzip
else if (!StrCmpNC(_pBuf, vszGzipMagic, sizeof(vszGzipMagic) - 1)) { _wzMimeType = vwzApplicationGzipCompressed; }
// application/x-zip-compressed
else if (!StrCmpNC(_pBuf, vszZipMagic, sizeof(vszZipMagic) - 1)) { _wzMimeType = vwzApplicationZipCompressed; }
// audio/mid
else if (!StrCmpC(_pBuf, vszMIDMagic)) { _wzMimeType = vwzAudioMID; }
// application/pdf (Acrobat)
else if (!StrCmpNC(_pBuf, vszPdfMagic, sizeof(vszPdfMagic) - 1)) { _wzMimeType = vwzApplicationPdf; }
// don't know what it is.
else { //_wzMimeType = vwzApplicationOctetStream;
bRet = FALSE; }
DEBUG_LEAVE(bRet); return bRet; }
//+---------------------------------------------------------------------------
//
// Method: CContentAnalyzer::FindMimeFromData
//
// Synopsis: Attempts to guess MIME type from buffer
//
//
// Arguments: pBuf, cbSample, wzSuggestedMimeType
//
// Returns: LPCWSTR (the MIME type guessed)
//
// History: 5-25-96 AdriaanC (Adriaan Canter) Created
//
// Notes:
//
//----------------------------------------------------------------------------
LPCWSTR CContentAnalyzer::FindMimeFromData(LPCWSTR wzFileName, char* pBuf, int cbSample, LPCWSTR wzSuggestedMimeType, DWORD grfFlags) { DEBUG_ENTER((DBG_TRANS, Pointer, "CContentAnalyzer::FindMimeFromData", "this=%#x, %.80wq, %.80q, %d, %.80wq, %#x", this, wzFileName, pBuf, cbSample, wzSuggestedMimeType, grfFlags )); BOOL fSampledData = FALSE; BOOL fFoundMimeTypeFromExt = FALSE;
CHAR* szFileExt = 0; CHAR szFileName[MAX_PATH]; CHAR szMimeTypeFromExt[SZMIMESIZE_MAX]; CHAR szCommand[MAX_PATH]; CHAR cLastByte;
DWORD dwMimeLen = SZMIMESIZE_MAX; DWORD dwExtMimeTypeDataFormat; DWORD dwSuggestedMimeTypeDataFormat; DWORD dwMimeTypeDataFormat; DWORD cbCommand = MAX_PATH; BOOL fExtensionChecked = FALSE;
_grfFlags = grfFlags;
// BUGBUG - we can use this information for DBCS.
// Remove any info appended to the suggested mime type
// such as charset information. This is identified by ';'
if (wzSuggestedMimeType) { WCHAR* wptr = wcsstr(wzSuggestedMimeType, L";"); if (wptr) { *wptr = L'\0'; } }
// Check to see if the server is suggesting an unknown mime type
dwSuggestedMimeTypeDataFormat = GetDataFormat(wzSuggestedMimeType); if (dwSuggestedMimeTypeDataFormat == DATAFORMAT_UNKNOWN) { // server push returns "multipart" content type
// this is not the real mimetype, so we have to sniff
// to find out the truth
if( wcsicmp(wzSuggestedMimeType, vwzmultipartmixed) && wcsicmp(wzSuggestedMimeType, vwzmultipartmixedreplace) ) { // If so, return the suggested mime type.
_wzMimeType = (WCHAR*) wzSuggestedMimeType;
DEBUG_LEAVE(_wzMimeType); return _wzMimeType; } }
/*****
// check if we got an extension and extension mime
// matches the suggested mime - only for text/plain
if ( wzSuggestedMimeType && wzFileName && !wcscmp(wzSuggestedMimeType,vwzTextPlain)) { fExtensionChecked = TRUE; fFoundMimeTypeFromExt = FindMimeFromExt( wzFileName, szFileName, szMimeTypeFromExt, &dwExtMimeTypeDataFormat, &szFileExt );
// If there is a mime type associated with the file
// extension then return it.
if ( fFoundMimeTypeFromExt && (dwExtMimeTypeDataFormat == dwSuggestedMimeTypeDataFormat) && !wcscmp(wzSuggestedMimeType,_wzMimeTypeFromExt) ) { // If so, return the suggested mime type.
_wzMimeType = (WCHAR*) wzSuggestedMimeType; return _wzMimeType; } } *****/
// Not enough data to tell anything
if (!pBuf || cbSample <= 0) { _wzMimeType = (WCHAR*) wzSuggestedMimeType;
DEBUG_LEAVE(_wzMimeType); return _wzMimeType; }
_pBuf = pBuf; _cbSample = (cbSample <= SAMPLE_SIZE) ? cbSample : SAMPLE_SIZE;
// Save off last character. Null terminate the buffer.
cLastByte = _pBuf[_cbSample - 1]; _pBuf[_cbSample - 1] = '\0';
// Common cases first - check the server indicated mime type
// for text/html, image/gif or image/[p]jpeg.
if ( wzSuggestedMimeType && !StrCmpICW(wzSuggestedMimeType, vwzTextHTML)) { // Sample the data. This routine also checks for the following
// mime types which require extended scanning through the buffer:
// text/html, image/x-xbitmap, application/macbinhex
SampleData(); fSampledData = TRUE;
if (_fFoundHTML) { _wzMimeType = vwzTextHTML; goto exit; } }
// image/gif
else if (wzSuggestedMimeType && !wcsicmp(wzSuggestedMimeType, vwzImageGif)) { if (!StrCmpNIC(_pBuf, vszGif87Magic, sizeof(vszGif87Magic) - 1) || !StrCmpNIC(_pBuf, vszGif89Magic, sizeof(vszGif89Magic) - 1)) { _wzMimeType = vwzImageGif; goto exit; } }
// image/jpeg or image/pjpeg
else if (wzSuggestedMimeType && (!wcsicmp(wzSuggestedMimeType, vwzImagePJpeg) || !wcsicmp(wzSuggestedMimeType, vwzImageJpeg))) { if ((BYTE)_pBuf[0] == JPEG_MAGIC_1 && (BYTE)_pBuf[1] == JPEG_MAGIC_2) { _wzMimeType = vwzImagePJpeg; goto exit; } }
//
// ********************** BEGIN HACK *******************************
//
// we will remove this once tridents defined the unique signature
// for .hta and .htc format
//
// DanpoZ (98.08.12) - refer to IE5 SUPERHOT bug 35478
//
if (wzFileName ) {
CHAR* szExt; CHAR szFile[MAX_PATH]; W2A(wzFileName, szFile, MAX_PATH);
if( grfFlags & FMFD_URLASFILENAME ) { //
// remove teh security context '\1' and replace it with '\0'
// but only do this when we are using URL to replace the filename
//
CHAR* pch = StrChr(szFile, '\1'); if (pch) { *pch = '\0'; } }
szExt = FindFileExtension(szFile); if( szExt && ( !StrCmpNIC(szExt, ".hta", sizeof(".hta") - 1) || !StrCmpNIC(szExt, ".htc", sizeof(".htc") - 1) ) ) { fExtensionChecked = TRUE; fFoundMimeTypeFromExt = FindMimeFromExt( wzFileName, szFileName, szMimeTypeFromExt, &dwExtMimeTypeDataFormat, &szFileExt );
// If there is a mime type associated with the file
// extension then return it.
if (fFoundMimeTypeFromExt) { _wzMimeType = _wzMimeTypeFromExt; goto exit; } } } //
// ********************** END HACK *********************************
//
// One of the following is true:
// 1) The server indicated a common mime type (html, gif or jpeg),
// however, verification failed.
// 2) The server indicated an ambiguous mime type or
// a known, but uncommon mime type.
// If not done so already, sample the data.
if (!fSampledData) { SampleData(); fSampledData = TRUE; }
// Return any mime type that was positively
// identified during the data sampling
if( _fFoundCDF ) { _wzMimeType = vwzApplicationCDF; goto exit; } else if( _fFoundXML) { _wzMimeType = vwzTextXML; goto exit; } else if (_fFoundHTML) { _wzMimeType = vwzTextHTML; goto exit; } else if (_fFoundXBitMap) { _wzMimeType = vwzImageXBitmap; goto exit; } else if (_fFoundMacBinhex) { _wzMimeType = vwzApplicationMacBinhex; goto exit; } else if( _fFoundTextScriptlet ) { _wzMimeType = vwzTextScriptlet; goto exit; }
if( !_fFoundCDF && wzSuggestedMimeType && !wcsicmp(wzSuggestedMimeType, vwzApplicationNETCDF) ) { // only overwrite application/x-netcdf with aplication/x-cdf
_wzMimeType = vwzApplicationNETCDF; goto exit; }
// Decide if buffer is primarily text or binary. Conduct
// pattern matching to determine a mime type depending on the
// finding.
if (!_cbCtrl || _cbText + _cbFF >= 16 * (_cbCtrl + _cbHigh)) { _fBinary = FALSE; if( !CheckTextHeaders() ) { if( !CheckBinaryHeaders() ) { _wzMimeType = vwzTextPlain; } } } else { _fBinary = TRUE; if( !CheckBinaryHeaders() ) { if( !CheckTextHeaders() ) { _wzMimeType = vwzApplicationOctetStream; } } }
// Determine format of the mime type from data
dwMimeTypeDataFormat = GetDataFormat(_wzMimeType);
// If the format of the mime type found from examining the data
// is not ambiguous, then return this mime type.
if (dwMimeTypeDataFormat != DATAFORMAT_AMBIGUOUS) { goto exit; }
// Examination of data is inconclusive.
else { // If the suggested mime type is not ambiguous and does
// not conflict with the data format then return it.
if (dwSuggestedMimeTypeDataFormat != DATAFORMAT_AMBIGUOUS && FormatAgreesWithData(dwSuggestedMimeTypeDataFormat)) { _wzMimeType = (WCHAR*) wzSuggestedMimeType; goto exit; }
// Otherwise, attempt to obtain a mime type from any
// file extension. If none is found, but an application
// is registered for the file extension, return
// application/octet-stream.
// If there is a file extension, find any
// associated mime type.
if (wzFileName && !fExtensionChecked) { fExtensionChecked = TRUE;
fFoundMimeTypeFromExt = FindMimeFromExt( wzFileName, szFileName, szMimeTypeFromExt, &dwExtMimeTypeDataFormat, &szFileExt ); }
// If there is a mime type associated with the file
// extension then return it.
if (fFoundMimeTypeFromExt) { if (dwExtMimeTypeDataFormat == DATAFORMAT_UNKNOWN) { _wzMimeType = _wzMimeTypeFromExt; goto exit; } else { goto exit; } }
// Otherwise, check to see if there is an associated application.
if (szFileExt && FindAppFromExt(szFileExt, szCommand, cbCommand)) { // Found an associated application.
_wzMimeType = vwzApplicationOctetStream; goto exit; }
// No suggested mime type, no mime type from file extension
// and no registered application found. Fall through and return
// mime type found from the data
}
exit: // Replace the null termination with
// the original character.
_pBuf[_cbSample - 1] = cLastByte;
DEBUG_LEAVE(_wzMimeType); return _wzMimeType; }
//+---------------------------------------------------------------------------
//
// Method: CContentAnalyzer::FindMimeFromExt
//
// Synopsis:
//
// Arguments: [wzFileName] --
// [szFileName] --
// [szMimeTypeFromExt] --
// [pdwExtMimeTypeDataFormat] --
//
// Returns:
//
// History: 5-25-96 AdriaanC (Adriaan Canter)
// 1-28-1997 JohannP (Johann Posch) made separate function
//
// Notes:
//
//----------------------------------------------------------------------------
BOOL CContentAnalyzer::FindMimeFromExt( LPCWSTR wzFileName, CHAR *szFileName, CHAR *szMimeTypeFromExt, DWORD *pdwExtMimeTypeDataFormat, CHAR **ppszFileExt) { DEBUG_ENTER((DBG_TRANS, Bool, "CContentAnalyzer::FindMimeFromExt", "this=%#x, %.80wq, %.80q, %.80q, %#x, %#x", this, wzFileName, szFileName, szMimeTypeFromExt, pdwExtMimeTypeDataFormat, ppszFileExt )); BOOL fFoundMimeTypeFromExt = FALSE; UrlMkAssert((wzFileName && szFileName && pdwExtMimeTypeDataFormat)); DWORD dwMimeLen = SZMIMESIZE_MAX; CHAR* szFileExt = 0;
// If there is a file extension, find any
// associated mime type.
W2A(wzFileName, szFileName, MAX_PATH); szFileExt = FindFileExtension(szFileName); if (szFileExt && GetMimeFromExt(szFileExt, szMimeTypeFromExt, &dwMimeLen) == ERROR_SUCCESS) { fFoundMimeTypeFromExt = TRUE; A2W(szMimeTypeFromExt, _wzMimeTypeFromExt, SZMIMESIZE_MAX); *pdwExtMimeTypeDataFormat = GetDataFormat(_wzMimeTypeFromExt); } if (szFileExt && ppszFileExt) { *ppszFileExt = szFileExt; }
DEBUG_LEAVE(fFoundMimeTypeFromExt); return fFoundMimeTypeFromExt; }
|