Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

2569 lines
76 KiB

//+-------------------------------------------------------------------------
//
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// PROGRAM: lrtest.cxx
//
// Test program for invoking language resources including wordbreakers
// and stemmers. Also invokes filters.
//
// PLATFORM: Windows
//
//--------------------------------------------------------------------------
#ifndef UNICODE
#define UNICODE
#endif
#define _OLE32_
#include <windows.h>
#include <oleext.h>
#include <psapi.h>
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <eh.h>
#include <ntquery.h>
#include <filterr.h>
#include <cierror.h>
#include <indexsrv.h>
#include "minici.hxx"
#define USE_FAKE_COM
//
// These are undocumented Indexing Service functions, but they're needed
// to load filters and not crash, and to load the plain text filter.
//
typedef void (__stdcall * PFnCIShutdown)( void );
typedef HRESULT (__stdcall * PFnLoadTextFilter)( WCHAR const * pwcPath,
IFilter ** ppIFilter );
PFnCIShutdown g_pCIShutdown = 0;
PFnLoadTextFilter g_pLoadTextFilter = 0;
// If this is non-zero, it's a file handle to which output is streamed
FILE * g_fpOut = 0;
// If TRUE, strings from wordbreakers and stemmers are dumped in hex
BOOL g_fDumpAsHex = FALSE;
enum enumFilterLoadMechanism
{
eIPersistFile,
eIPersistStream,
eIPersistStorage
};
//+-------------------------------------------------------------------------
//
// Function: out
//
// Synopsis: Like printf, only will send output to the output file if
// specified, or just to the console. Appends a carriage
// return / line feed to the text.
//
// Arguments: [pwcFormat] -- Characters whose type information is checked
// [...] -- Variable arguments
//
// Returns: count of characters emitted.
//
//--------------------------------------------------------------------------
int out( const WCHAR * pwcFormat, ... )
{
va_list arglist;
va_start( arglist, pwcFormat );
// Writing to the output file is done in binary mode so the output can be
// Unicode. The side-effect is that "\n" isn't translated into "\r\n"
// automatically, so it has to be explicit.
int i;
if ( 0 != g_fpOut )
{
i = vfwprintf( g_fpOut, pwcFormat, arglist );
i += fwprintf( g_fpOut, L"\r\n" );
}
else
{
i = vwprintf( pwcFormat, arglist );
i += wprintf( L"\n" );
}
va_end( arglist );
return i;
} //out
//+-------------------------------------------------------------------------
//
// Function: outstr
//
// Synopsis: Like printf, only will send output to the output file if
// specified, or just to the console.
//
// Arguments: [pwcFormat] -- Characters whose type information is checked
// [...] -- Variable arguments
//
// Returns: count of characters emitted.
//
//--------------------------------------------------------------------------
int outstr( const WCHAR * pwcFormat, ... )
{
va_list arglist;
va_start( arglist, pwcFormat );
int i;
if ( 0 != g_fpOut )
i = vfwprintf( g_fpOut, pwcFormat, arglist );
else
i = vwprintf( pwcFormat, arglist );
va_end( arglist );
return i;
} //outstr
//+-------------------------------------------------------------------------
//
// Function: Usage
//
// Synopsis: Displays usage information about the application, then exits.
//
//--------------------------------------------------------------------------
void Usage()
{
printf( "usage: lrtest [/d] [/b] [/f] [/q] [/s] [/x:#] /c:clsid [/o:file] [/i:file] [text]\n" );
printf( "\n" );
printf( " Language Resource test program\n" );
printf( "\n" );
printf( " arguments:\n" );
printf( " /b Load the wordbreaker (can't be used with /s or /f)\n" );
printf( " /c: CLSID of the wordbreaker or stemmer to load\n" );
printf( " /d Dumps output strings in hex as well as strings\n" );
printf( " /f Load the filter (can't be used with /b or /s)\n" );
printf( " If /c isn't specified, use Indexing Service's LoadIFilter\n" );
printf( " /fs Same as /f, but uses IPersistStream, not IPersistFile\n" );
printf( " /ft Same as /f, but uses IPersistStorage, not IPersistFile\n" );
printf( " /i: Path of an input file, if [text] isn't specified\n" );
printf( " /m: Optional path of the dll to load. Overrides COM CLSID lookup\n" );
printf( " /n No status information. Used with /f, only displays filter output\n" );
printf( " /o: Path of an output file. If not specified, console is used\n" );
printf( " /q If wordbreaking, do so for query instead of indexing\n" );
printf( " /s Load the stemmer (can't be used with /b or /f)\n" );
printf( " /t No text information; just chunks. Used with /f\n" );
printf( " /x:# Maximum token size, default is 100\n" );
printf( " text Text to wordbreak or stem, if /i: isn't specified\n" );
printf( "\n" );
printf( " examples:\n" );
printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} \"Alice's restaurant\"\n" );
printf( " lrtest /b /q /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} \"data-base\"\n" );
printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} /i:foo.doc\n" );
printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} /m:wb.dll /i:foo.doc\n" );
printf( " lrtest /d /s /c:{eeed4c20-7f1b-11ce-be57-00aa0051fe20} peach /o:output.txt\n" );
printf( " lrtest /f /c:{f07f3920-7b8c-11cf-9be8-00aa004b9986} /i:foo.doc\n" );
printf( " lrtest /f /i:foo.doc\n" );
printf( " lrtest /fs /i:foo.doc\n" );
printf( "\n" );
exit( 1 );
} //Usage
//+-------------------------------------------------------------------------
//
// Function: GetModuleOfAddress
//
// Synopsis: Returns the module handle of a given address or 0
//
// Arguments: [pAddress] -- Address in one of the modules loaded
//
//--------------------------------------------------------------------------
HMODULE GetModuleOfAddress( void * pAddress )
{
DWORD cbNeeded;
BOOL fOK = EnumProcessModules( GetCurrentProcess(),
0,
0,
&cbNeeded );
if ( fOK )
{
ULONG cModules = cbNeeded / sizeof HMODULE;
XPtr<HMODULE> aModules( cModules );
fOK = EnumProcessModules( GetCurrentProcess(),
aModules.Get(),
cbNeeded,
&cbNeeded );
if ( fOK )
{
for ( ULONG i = 0; i < cModules; i++ )
{
MODULEINFO mi;
GetModuleInformation( GetCurrentProcess(),
aModules[ i ],
&mi,
sizeof mi );
if ( ( pAddress >= mi.lpBaseOfDll ) &&
( pAddress < ( (BYTE *) mi.lpBaseOfDll + mi.SizeOfImage ) ) )
{
return aModules[i];
}
}
}
}
return 0;
} //GetModuleOfAddress
//+-------------------------------------------------------------------------
//
// Function: DumpStringAsHex
//
// Synopsis: Emits a string in hex format. Useful for East Asian languages.
//
//--------------------------------------------------------------------------
void DumpStringAsHex( WCHAR const * pwc, ULONG cwc )
{
if ( g_fDumpAsHex )
{
for ( ULONG i = 0; i < cwc; i++ )
{
if ( 0 != i )
outstr( L" " );
outstr( L"%#x", pwc[ i ] );
}
out( L"" );
}
} //DumpStringAsHex
//+---------------------------------------------------------------------------
//
// Class: CIStream
//
// Purpose: Wraps a file with an IStream.
//
//----------------------------------------------------------------------------
class CIStream : public IStream
{
public:
CIStream() : _hFile( INVALID_HANDLE_VALUE ),
_cRef( 1 ),
_lOffset( 0 ),
_cbData( 0 )
{
}
~CIStream()
{
Free();
}
void Free()
{
if ( INVALID_HANDLE_VALUE != _hFile )
{
CloseHandle( _hFile );
_hFile = INVALID_HANDLE_VALUE;
}
}
HRESULT Open( WCHAR const * pwcFile )
{
Free();
_hFile = CreateFile( pwcFile,
GENERIC_READ,
FILE_SHARE_READ | FILE_SHARE_WRITE |
FILE_SHARE_DELETE,
0,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
0 );
if ( INVALID_HANDLE_VALUE == _hFile )
return HRESULT_FROM_WIN32( GetLastError() );
_cbData = GetFileSize( _hFile, 0 );
return S_OK;
}
HRESULT STDMETHODCALLTYPE QueryInterface( REFIID riid, void ** ppvObj )
{
if ( 0 == ppvObj )
return E_INVALIDARG;
*ppvObj = 0;
if ( IID_IStream == riid )
*ppvObj = (IStream *) this;
else if ( IID_IUnknown == riid )
*ppvObj = (IUnknown *) this;
else
return E_NOINTERFACE;
AddRef();
return S_OK;
}
ULONG STDMETHODCALLTYPE AddRef()
{
return InterlockedIncrement( &_cRef );
}
ULONG STDMETHODCALLTYPE Release()
{
unsigned long uTmp = InterlockedDecrement( &_cRef );
if ( 0 == uTmp )
delete this;
return uTmp;
}
HRESULT STDMETHODCALLTYPE Read(
void * pv,
ULONG cb,
ULONG * pcbRead )
{
DWORD dwOff = SetFilePointer( _hFile,
_lOffset,
0,
FILE_BEGIN );
if ( INVALID_SET_FILE_POINTER == dwOff )
return HRESULT_FROM_WIN32( GetLastError() );
BOOL f = ReadFile( _hFile,
pv,
cb,
pcbRead,
0 );
if ( !f )
return HRESULT_FROM_WIN32( GetLastError() );
return S_OK;
}
HRESULT STDMETHODCALLTYPE Write(
VOID const * pv,
ULONG cb,
ULONG * pcbWritten )
{
return E_NOTIMPL;
}
HRESULT STDMETHODCALLTYPE Seek(
LARGE_INTEGER dlibMoveIn,
DWORD dwOrigin,
ULARGE_INTEGER * plibNewPosition )
{
HRESULT hr = S_OK;
LONG dlibMove = dlibMoveIn.LowPart;
ULONG cbNewPos = dlibMove;
switch(dwOrigin)
{
case STREAM_SEEK_SET:
if (dlibMove >= 0)
_lOffset = dlibMove;
else
hr = STG_E_SEEKERROR;
break;
case STREAM_SEEK_CUR:
if (!(dlibMove < 0 && ( -dlibMove > _lOffset)))
_lOffset += (ULONG) dlibMove;
else
hr = STG_E_SEEKERROR;
break;
case STREAM_SEEK_END:
if (!(dlibMove < 0 ))
_lOffset = _cbData + dlibMove;
else
hr = STG_E_SEEKERROR;
break;
default:
hr = STG_E_SEEKERROR;
}
if ( 0 != plibNewPosition )
ULISet32(*plibNewPosition, _lOffset);
return hr;
}
HRESULT STDMETHODCALLTYPE SetSize( ULARGE_INTEGER cb )
{
return E_NOTIMPL;
}
HRESULT STDMETHODCALLTYPE CopyTo(
IStream * pstm,
ULARGE_INTEGER cb,
ULARGE_INTEGER * pcbRead,
ULARGE_INTEGER * pcbWritten )
{
return E_NOTIMPL;
}
HRESULT STDMETHODCALLTYPE Commit( DWORD grfCommitFlags )
{
return S_OK;
}
HRESULT STDMETHODCALLTYPE Revert()
{
return S_OK;
}
HRESULT STDMETHODCALLTYPE LockRegion(
ULARGE_INTEGER libOffset,
ULARGE_INTEGER cb,
DWORD dwLockType )
{
return STG_E_INVALIDFUNCTION;
}
HRESULT STDMETHODCALLTYPE UnlockRegion(
ULARGE_INTEGER libOffset,
ULARGE_INTEGER cb,
DWORD dwLockType)
{
return STG_E_INVALIDFUNCTION;
}
HRESULT STDMETHODCALLTYPE Stat(
STATSTG * pstatstg,
DWORD statflag )
{
memset( pstatstg, 0, sizeof STATSTG );
pstatstg->type = STGTY_STREAM;
pstatstg->cbSize.QuadPart = _cbData;
pstatstg->grfMode = STGM_READ;
return S_OK;
}
HRESULT STDMETHODCALLTYPE Clone( IStream ** ppstm )
{
return E_NOTIMPL;
}
private:
LONG _cRef;
HANDLE _hFile;
LONG _lOffset;
LONG _cbData;
};
//+---------------------------------------------------------------------------
//
// Class: CPlainTextSource
//
// Purpose: Takes a simple buffer and provides a TEXT_SOURCE for it, which
// can be passed to wordbreakers.
//
//----------------------------------------------------------------------------
class CPlainTextSource : public TEXT_SOURCE
{
public:
CPlainTextSource(
WCHAR const * pwcText,
ULONG cwc )
{
awcBuffer = pwcText;
iCur = 0;
iEnd = cwc;
pfnFillTextBuffer = PlainFillBuf;
}
static HRESULT __stdcall PlainFillBuf( TEXT_SOURCE * pTextSource )
{
return WBREAK_E_END_OF_TEXT;
}
};
//+---------------------------------------------------------------------------
//
// Class: CFilterTextSource
//
// Purpose: Takes an IFilter and provides a TEXT_SOURCE for it, which
// can be passed to wordbreakers.
//
//----------------------------------------------------------------------------
#pragma warning(disable: 4512)
class CFilterTextSource : public TEXT_SOURCE
{
public:
CFilterTextSource( IFilter & filter ) :
_filter( filter ),
_hr( S_OK )
{
awcBuffer = _awcBuffer;
iCur = 0;
iEnd = 0;
pfnFillTextBuffer = FilterFillBuf;
// Get the first chunk
_hr = _filter.GetChunk( &_Stat );
// Get text for the chunk
FillBuf();
}
static HRESULT __stdcall FilterFillBuf( TEXT_SOURCE * pTextSource )
{
CFilterTextSource & This = * (CFilterTextSource *) pTextSource;
return This.FillBuf();
}
private:
HRESULT FillBuf()
{
// Never continue past an error condition except FILTER_E_NO_MORE_TEXT
if ( FAILED( _hr ) && _hr != FILTER_E_NO_MORE_TEXT )
return _hr;
if ( iCur > iEnd )
{
out( L"TEXT_SOURCE iCur (%#x) > iEnd (%#x), this is incorrect\n",
iCur, iEnd );
_hr = E_INVALIDARG;
return _hr;
}
// Move any existing text to beginning of buffer.
ULONG ccLeftOver = iEnd - iCur;
if ( ccLeftOver > 0 )
MoveMemory( _awcBuffer,
&_awcBuffer[iCur],
ccLeftOver * sizeof WCHAR );
iCur = 0;
iEnd = ccLeftOver;
ULONG ccRead = BufferWChars() - ccLeftOver;
const ULONG BUFFER_SLOP = 10;
//
// Get some more text. If *previous* call to GetText returned
// FILTER_S_LAST_TEXT, or FILTER_E_NO_MORE_TEXT then don't even
// bother trying.
//
if ( FILTER_S_LAST_TEXT == _hr || FILTER_E_NO_MORE_TEXT == _hr )
_hr = FILTER_E_NO_MORE_TEXT;
else
{
_hr = _filter.GetText( &ccRead,
&_awcBuffer[ccLeftOver] );
if ( SUCCEEDED( _hr ) )
{
iEnd += ccRead;
ccLeftOver += ccRead;
ccRead = BufferWChars() - ccLeftOver;
while ( ( S_OK == _hr ) && ( ccRead > BUFFER_SLOP ) )
{
// Attempt to fill in as much of buffer as possible
_hr = _filter.GetText( &ccRead,
&_awcBuffer[ccLeftOver] );
if ( SUCCEEDED( _hr ) )
{
iEnd += ccRead;
ccLeftOver += ccRead;
ccRead = BufferWChars() - ccLeftOver;
}
}
//
// Either return FILTER_S_LAST_TEXT or return S_OK because we
// have succeeded in adding text to the buffer.
//
if ( FILTER_S_LAST_TEXT == _hr )
return FILTER_S_LAST_TEXT;
return S_OK;
}
if ( ( FILTER_E_NO_MORE_TEXT != _hr ) &&
( FILTER_E_NO_TEXT != _hr ) )
{
// Weird failure, hence return, else goto next chunk
return _hr;
}
}
// Go to next chunk, if necessary.
while ( ( FILTER_E_NO_MORE_TEXT == _hr ) ||
( FILTER_E_NO_TEXT == _hr ) )
{
_hr = _filter.GetChunk( &_Stat );
if ( FILTER_E_END_OF_CHUNKS == _hr )
return WBREAK_E_END_OF_TEXT;
if ( FILTER_E_PARTIALLY_FILTERED == _hr )
return WBREAK_E_END_OF_TEXT;
if ( FAILED( _hr ) )
return( _hr );
//
// Skip over value chunks -- note that search products don't do
// this. They convert VT_LPSTR, VT_BSTR, and VT_LPWSTR to
// Unicode strings for the wordbreaker.
//
if ( CHUNK_TEXT != _Stat.flags )
continue;
ccRead = BufferWChars() - ccLeftOver;
_hr = _filter.GetText( &ccRead,
&_awcBuffer[ccLeftOver] );
if ( SUCCEEDED( _hr ) )
{
iEnd += ccRead;
ccLeftOver += ccRead;
ccRead = BufferWChars() - ccLeftOver;
while ( ( S_OK == _hr ) && ( ccRead > BUFFER_SLOP ) )
{
// Attempt to fill in as much of buffer as possible
_hr = _filter.GetText( &ccRead,
&_awcBuffer[ccLeftOver] );
if ( SUCCEEDED( _hr ) )
{
iEnd += ccRead;
ccLeftOver += ccRead;
ccRead = BufferWChars() - ccLeftOver;
}
}
//
// Either return FILTER_S_LAST_TEXT or return S_OK because we
// have succeeded in adding text to the buffer.
//
if ( FILTER_S_LAST_TEXT == _hr )
return FILTER_S_LAST_TEXT;
return S_OK;
}
}
if ( FAILED( _hr ) )
return _hr;
if ( 0 == ccRead )
return WBREAK_E_END_OF_TEXT;
return S_OK;
} //FillBuf
ULONG BufferWChars() const
{
return ArraySize( _awcBuffer );
}
IFilter & _filter;
HRESULT _hr;
STAT_CHUNK _Stat;
WCHAR _awcBuffer[ 1024 ];
};
//+---------------------------------------------------------------------------
//
// Class: CWordFormSink
//
// Purpose: Sample stemmer sink -- just prints the results.
//
//----------------------------------------------------------------------------
class CWordFormSink : public IWordFormSink
{
public:
CWordFormSink() {}
HRESULT STDMETHODCALLTYPE QueryInterface(
REFIID riid,
void ** ppvObject )
{
*ppvObject = this;
return S_OK;
}
ULONG STDMETHODCALLTYPE AddRef() { return 1; }
ULONG STDMETHODCALLTYPE Release() { return 1; }
HRESULT STDMETHODCALLTYPE PutAltWord(
WCHAR const * pwcBuf,
ULONG cwc )
{
out( L"IWordFormSink::PutAltWord: cwc %d, '%.*ws'", cwc, cwc, pwcBuf );
DumpStringAsHex( pwcBuf, cwc );
return S_OK;
}
HRESULT STDMETHODCALLTYPE PutWord (
WCHAR const * pwcBuf,
ULONG cwc )
{
out( L"IWordFormSink::PutWord: cwc %d, '%.*ws'", cwc, cwc, pwcBuf );
DumpStringAsHex( pwcBuf, cwc );
return S_OK;
}
};
//+---------------------------------------------------------------------------
//
// Class: CWordSink
//
// Purpose: Sample word sink -- just prints the results.
//
//----------------------------------------------------------------------------
class CWordSink : public IWordSink
{
public:
CWordSink() {}
HRESULT STDMETHODCALLTYPE QueryInterface(
REFIID riid,
void ** ppvObject )
{
*ppvObject = this;
return S_OK;
}
ULONG STDMETHODCALLTYPE AddRef() { return 1; }
ULONG STDMETHODCALLTYPE Release() { return 1; }
HRESULT STDMETHODCALLTYPE PutWord(
ULONG cwc,
WCHAR const * pwcBuf,
ULONG cwcSrcLen,
ULONG cwcSrcPos )
{
out( L"IWordSink::PutWord: cwcSrcLen %d, cwcSrcPos %d, cwc %d, '%.*ws'",
cwcSrcLen, cwcSrcPos, cwc, cwc, pwcBuf );
DumpStringAsHex( pwcBuf, cwc );
return S_OK;
}
HRESULT STDMETHODCALLTYPE PutAltWord(
ULONG cwc,
WCHAR const * pwcBuf,
ULONG cwcSrcLen,
ULONG cwcSrcPos )
{
out( L"IWordSink::PutAltWord: cwcSrcLen %d, cwcSrcPos %d, cwc %d, '%.*ws'",
cwcSrcLen, cwcSrcPos, cwc, cwc, pwcBuf );
DumpStringAsHex( pwcBuf, cwc );
return S_OK;
}
HRESULT STDMETHODCALLTYPE StartAltPhrase()
{
out( L"IWordSink::StartAltPhrase" );
return S_OK;
}
HRESULT STDMETHODCALLTYPE EndAltPhrase()
{
out( L"IWordSink::EndAltPhrase" );
return S_OK;
}
HRESULT STDMETHODCALLTYPE PutBreak( WORDREP_BREAK_TYPE wbt )
{
out( L"IWordSink::PutBreak, type (%d) %ws",
wbt,
( WORDREP_BREAK_EOW == wbt ) ? L"end of word" :
( WORDREP_BREAK_EOS == wbt ) ? L"end of sentence" :
( WORDREP_BREAK_EOP == wbt ) ? L"end of paragraph" :
( WORDREP_BREAK_EOC == wbt ) ? L"end of chapter" :
L"invalid break type" );
return S_OK;
}
};
//+---------------------------------------------------------------------------
//
// Class: CPhraseSink
//
// Purpose: Sample phrase sink -- just prints the results.
//
//----------------------------------------------------------------------------
class CPhraseSink: public IPhraseSink
{
public:
CPhraseSink() {}
HRESULT STDMETHODCALLTYPE QueryInterface(
REFIID riid,
void ** ppvObject )
{
// Assume the caller is well-behaved
*ppvObject = this;
return S_OK;
}
ULONG STDMETHODCALLTYPE AddRef() { return 1; }
ULONG STDMETHODCALLTYPE Release() { return 1; }
HRESULT STDMETHODCALLTYPE PutSmallPhrase(
const WCHAR * pwcNoun,
ULONG cwcNoun,
const WCHAR * pwcModifier,
ULONG cwcModifier,
ULONG ulAttachmentType )
{
out( L"IPhraseSink::PutSmallPhrase" );
return S_OK;
}
HRESULT STDMETHODCALLTYPE PutPhrase(
WCHAR const * pwcPhrase,
ULONG cwcPhrase )
{
out( L"IPhraseSink::PutPhrase: cwcPhrase %d, '%.*ws'",
cwcPhrase, cwcPhrase, pwcPhrase );
DumpStringAsHex( pwcPhrase, cwcPhrase );
return S_OK;
}
};
//+---------------------------------------------------------------------------
//
// Function: GetVersionKey
//
// Purpose: Displays a particular version key
//
// Arguments: [pbInfo] -- The version inforomation
// [pwcLang] -- The language of the string requested
// [pwcKey] -- Key name to retrieve
//
// Returns: TRUE if a value was found, FALSE otherwise
//
//----------------------------------------------------------------------------
BOOL GetVersionKey(
BYTE * pbInfo,
WCHAR const * pwcLang,
WCHAR const * pwcKey )
{
WCHAR awcKey[ 128 ];
wsprintf( awcKey, L"\\StringFileInfo\\%ws\\%ws", pwcLang, pwcKey );
WCHAR * pwcResult = 0;
UINT cb = 0;
if ( VerQueryValue( pbInfo,
awcKey,
(PVOID *) &pwcResult,
&cb ) )
{
out( L" %ws: '%ws'", pwcKey, pwcResult );
return TRUE;
}
return FALSE;
} //GetVersionKey
//+---------------------------------------------------------------------------
//
// Function: OutputFiletime
//
// Purpose: Displays a filetime
//
// Arguments: [pwcHeader] -- Prefix to print before the filetime
// [ft] -- Filetime to print, in UTC originally
//
//----------------------------------------------------------------------------
void OutputFiletime( WCHAR const * pwcHeader, FILETIME & ft )
{
FILETIME ftLocal;
FileTimeToLocalFileTime( &ft, &ftLocal );
SYSTEMTIME st;
FileTimeToSystemTime( &ftLocal, &st );
BOOL pm = st.wHour >= 12;
if ( st.wHour > 12 )
st.wHour -= 12;
else if ( 0 == st.wHour )
st.wHour = 12;
out( L"%ws: %2d-%02d-%04d %2d:%02d%wc",
pwcHeader,
(DWORD) st.wMonth,
(DWORD) st.wDay,
(DWORD) st.wYear,
(DWORD) st.wHour,
(DWORD) st.wMinute,
pm ? L'p' : L'a' );
} //OutputFiletime
//+---------------------------------------------------------------------------
//
// Function: DisplayModuleInformation
//
// Purpose: Displays information about a module -- dates and version
//
// Arguments: [hMod] -- Module handle
//
//----------------------------------------------------------------------------
HRESULT DisplayModuleInformation( HINSTANCE hMod )
{
WCHAR awcDllPath[ MAX_PATH ];
DWORD cwcCopied = GetModuleFileName( hMod,
awcDllPath,
ArraySize( awcDllPath ) );
awcDllPath[ ArraySize( awcDllPath ) - 1 ] = 0;
if ( 0 == cwcCopied )
return HRESULT_FROM_WIN32( GetLastError() );
out( L"dll loaded: %ws", awcDllPath );
DWORD dwHandle;
DWORD cbVersionInfo = GetFileVersionInfoSize( awcDllPath, &dwHandle );
if ( 0 == cbVersionInfo )
{
printf( "can't get dll version information size, error %d\n",
GetLastError() );
return HRESULT_FROM_WIN32( GetLastError() );
}
XPtr<BYTE> xVersionInfo( cbVersionInfo );
if ( xVersionInfo.IsNull() )
return E_OUTOFMEMORY;
BOOL fOK = GetFileVersionInfo( awcDllPath,
0,
cbVersionInfo,
xVersionInfo.Get() );
if ( !fOK )
{
printf( "unable to retrieve version information, error %d\n",
GetLastError() );
return HRESULT_FROM_WIN32( GetLastError() );
}
// Get the DLL version number
void * pvValue = 0;
UINT cbValue = 0;
fOK = VerQueryValue( xVersionInfo.Get(),
L"\\",
&pvValue,
&cbValue );
if ( !fOK || ( 0 == cbValue ) )
{
printf( "can't retrieve version root value, error %d\n",
GetLastError() );
return HRESULT_FROM_WIN32( GetLastError() );
}
VS_FIXEDFILEINFO & ffi = * (VS_FIXEDFILEINFO *) pvValue;
out( L" dll version %u.%u.%u.%u",
HIWORD( ffi.dwFileVersionMS ),
LOWORD( ffi.dwFileVersionMS ),
HIWORD( ffi.dwFileVersionLS ),
LOWORD( ffi.dwFileVersionLS ) );
if ( ( cbValue >= sizeof VS_FIXEDFILEINFO ) &&
( 0 != ffi.dwFileDateLS && 0 != ffi.dwFileDateMS ) )
{
FILETIME ft;
ft.dwLowDateTime = ffi.dwFileDateLS;
ft.dwHighDateTime = ffi.dwFileDateMS;
OutputFiletime( L" version creation date: ", ft );
}
HANDLE h = CreateFile( awcDllPath,
FILE_GENERIC_READ,
FILE_SHARE_READ | FILE_SHARE_DELETE,
0,
OPEN_EXISTING,
0,
0 );
if ( INVALID_HANDLE_VALUE != h )
{
FILETIME ftCreate, ftLastWrite;
fOK = GetFileTime( h, &ftCreate, 0, &ftLastWrite );
if ( fOK )
{
OutputFiletime( L" file create time", ftCreate );
OutputFiletime( L" file last write time", ftLastWrite );
}
CloseHandle( h );
}
//
// Get the language string. Not every dll stores it correctly, so fall
// back on English locales known to work for some special cases.
//
WCHAR awcLang[9];
awcLang[0] = 0;
DWORD * pdwLang;
UINT cb;
if ( VerQueryValue( xVersionInfo.Get(),
L"VarFileInfo\\Translation",
(PVOID *) &pdwLang,
&cb ) &&
( cb >= 4 ) )
{
wsprintf( awcLang,
L"%04x%04x",
LOWORD( *pdwLang ),
HIWORD( *pdwLang ) );
}
if ( 0 == awcLang[0] )
{
// Try English Unicode
wcscpy( awcLang, L"040904B0" );
if ( !GetVersionKey( xVersionInfo.Get(),
awcLang,
L"FileVersion" ) )
{
// Try English
wcscpy( awcLang, L"040904E4" );
if ( !GetVersionKey( xVersionInfo.Get(),
awcLang,
L"FileVersion" ) )
{
// Try English null codepage
wcscpy( awcLang, L"04090000" );
if ( !GetVersionKey( xVersionInfo.Get(),
awcLang,
L"FileVersion" ) )
awcLang[0] = 0;
}
}
}
else
{
GetVersionKey( xVersionInfo.Get(), awcLang, L"FileVersion" );
}
// Display additional version information if we found the language
if ( 0 != awcLang[0] )
{
GetVersionKey( xVersionInfo.Get(), awcLang, L"FileDescription" );
GetVersionKey( xVersionInfo.Get(), awcLang, L"CompanyName" );
GetVersionKey( xVersionInfo.Get(), awcLang, L"ProductName" );
}
return S_OK;
} //DisplayModuleInformation
//+---------------------------------------------------------------------------
//
// Function: CreateFromModule
//
// Purpose: Creates a COM object given a dll
//
// Arguments: [clsid] -- Class ID of the object to load
// [iid] -- Interface ID requested
// [ppvObject] -- Returns the object created
// [pwcModule] -- Dll to load
// [fShowStatusInfo] -- TRUE to print status information
//
// Returns: HRESULT, S_OK if successful
//
//----------------------------------------------------------------------------
HRESULT CreateFromModule(
REFIID clsid,
REFIID iid,
void ** ppvObject,
WCHAR const * pwcModule,
BOOL fShowStatusInfo = TRUE )
{
// Note: the module handle will be leaked. It's OK for a test program.
HMODULE hMod = LoadLibrary( pwcModule );
if ( 0 == hMod )
return HRESULT_FROM_WIN32( GetLastError() );
// Display information about the module -- ignore errors
if ( fShowStatusInfo )
DisplayModuleInformation( hMod );
LPFNGETCLASSOBJECT pfn = (LPFNGETCLASSOBJECT)
GetProcAddress( hMod, "DllGetClassObject" );
if ( 0 == pfn )
{
printf( "can't get DllGetClassObject: %d\n", GetLastError() );
return HRESULT_FROM_WIN32( GetLastError() );
}
XInterface<IClassFactory> xClassFactory;
HRESULT hr = pfn( clsid,
IID_IClassFactory,
xClassFactory.GetQIPointer() );
if ( FAILED( hr ) )
{
printf( "can't instantiate the class factory: %#x\n", hr );
return hr;
}
return xClassFactory->CreateInstance( 0, iid, ppvObject );
} //CreateFromModule
//+---------------------------------------------------------------------------
//
// Function: FakeCoCreateInstance
//
// Purpose: Creates a COM object
//
// Arguments: [clsid] -- Class ID of the object to load
// [iid] -- Interface ID requested
// [ppvObject] -- Returns the object created
// [fShowStatusInfo] -- TRUE to print status information
//
// Returns: HRESULT, S_OK if successful
//
// Needed because some wordbreakers register as single-threaded. Search
// products require multi-threaded because marshalling across apartments
// doesn't work and because it's too inefficient, especially on
// multi-processor machines.
//
//----------------------------------------------------------------------------
HRESULT FakeCoCreateInstance(
REFIID clsid,
REFIID iid,
void ** ppvObject,
BOOL fShowStatusInfo = TRUE )
{
WCHAR awcCLSID[ 40 ];
StringFromGUID2( clsid, awcCLSID, ArraySize( awcCLSID ) );
WCHAR awcKey[200];
swprintf( awcKey, L"CLSID\\%ws\\InprocServer32", awcCLSID );
HKEY hKey;
DWORD dwErr = RegOpenKey( HKEY_CLASSES_ROOT, awcKey, &hKey );
if ( NO_ERROR != dwErr )
return HRESULT_FROM_WIN32( dwErr );
WCHAR awcDll[MAX_PATH + 1];
DWORD dwType;
DWORD dwSize = sizeof awcDll;
dwErr = RegQueryValueEx( hKey,
L"",
0,
&dwType,
(LPBYTE) awcDll,
&dwSize );
RegCloseKey( hKey );
if ( 0 != dwErr )
return HRESULT_FROM_WIN32( dwErr );
return CreateFromModule( clsid, iid, ppvObject, awcDll, fShowStatusInfo );
} //FakeCoCreateInstance
//+---------------------------------------------------------------------------
//
// Function: Stem
//
// Purpose: Stems the input text using the specified stemmer
//
// Arguments: [pwcText] -- The text to be stemmed
// [clsid] -- Class ID of the stemmer to use
// [pwcModule] -- Optional module name to override COM lookup.
// [cwcMaxToken] -- Maximum token size for the stemmer
//
//----------------------------------------------------------------------------
HRESULT Stem(
WCHAR const * pwcText,
WCHAR const * pwcModule,
CLSID & clsid,
ULONG cwcMaxToken )
{
XInterface<IStemmer> xStemmer;
HRESULT hr = S_OK;
if ( 0 != pwcModule )
{
hr = CreateFromModule( clsid,
IID_IStemmer,
xStemmer.GetQIPointer(),
pwcModule );
}
else
{
#ifdef USE_FAKE_COM
hr = FakeCoCreateInstance( clsid,
IID_IStemmer,
xStemmer.GetQIPointer() );
#else
hr = CoCreateInstance( clsid,
0,
CLSCTX_INPROC_SERVER,
IID_IStemmer,
xStemmer.GetQIPointer() );
#endif
}
if ( FAILED( hr ) )
{
printf( "can't CoCreateInstance the stemmer: %#x\n", hr );
return hr;
}
BOOL fLicense = FALSE;
hr = xStemmer->Init( cwcMaxToken, &fLicense );
if ( FAILED( hr ) )
{
printf( "can't Init() in the stemmer: %#x\n", hr );
return hr;
}
out( L"Stemmer requires license: %ws", fLicense ? L"Yes" : L"No" );
const WCHAR *pwcsLicense = 0;
hr = xStemmer->GetLicenseToUse( &pwcsLicense );
if ( FAILED( hr ) )
out( L"can't GetLicenseToUse() in the stemmer: %#x\n", hr );
else
out( L"Stemmer license: '%ws'", pwcsLicense );
CWordFormSink sink;
if ( 0 != pwcText )
{
out( L"Original text: '%ws'", pwcText );
hr = xStemmer->GenerateWordForms( pwcText, (ULONG) wcslen( pwcText ), &sink );
if ( FAILED( hr ) )
{
printf( "can't GenerateWordForms() in the stemmer: %#x\n", hr );
return hr;
}
}
return S_OK;
} //Stem
//+---------------------------------------------------------------------------
//
// Function: WordBreak
//
// Purpose: Wordbreaks the input text or file
//
// Arguments: [fQuery] -- TRUE if query time FALSE if index time
// [pwcText] -- The text to be wordbroken.
// [pwcInputFile] -- Filename to be wordbroken if pwcText is 0
// [pwcModule] -- Optional module name to override COM lookup.
// [clsid] -- Class ID of the wordbreaker to use
// [cwcMaxToken] -- Maximum token size for the wordbreaker
//
//----------------------------------------------------------------------------
HRESULT WordBreak(
BOOL fQuery,
WCHAR const * pwcText,
WCHAR const * pwcInputFile,
WCHAR const * pwcModule,
CLSID & clsid,
ULONG cwcMaxToken )
{
XInterface<IWordBreaker> xWordBreaker;
HRESULT hr = S_OK;
if ( 0 != pwcModule )
{
hr = CreateFromModule( clsid,
IID_IWordBreaker,
xWordBreaker.GetQIPointer(),
pwcModule );
}
else
{
#ifdef USE_FAKE_COM
hr = FakeCoCreateInstance( clsid,
IID_IWordBreaker,
xWordBreaker.GetQIPointer() );
#else
hr = CoCreateInstance( clsid,
0,
CLSCTX_INPROC_SERVER,
IID_IWordBreaker,
xWordBreaker.GetQIPointer() );
#endif
}
if ( FAILED( hr ) )
{
printf( "can't CoCreateInstance the wordbreaker: %#x\n", hr );
return hr;
}
BOOL fLicense = FALSE;
hr = xWordBreaker->Init( fQuery, cwcMaxToken, &fLicense );
if ( FAILED( hr ) )
{
printf( "can't Init() in the wordbreaker: %#x\n", hr );
return hr;
}
out( L"Wordbreaker requires license: %ws", fLicense ? L"Yes" : L"No" );
const WCHAR *pwcsLicense = 0;
hr = xWordBreaker->GetLicenseToUse( &pwcsLicense );
if ( FAILED( hr ) )
{
printf( "can't GetLicenseToUse() in the wordbreaker: %#x\n", hr );
return hr;
}
out( L"Wordbreaker license: '%ws'", pwcsLicense );
CWordSink wordSink;
CPhraseSink phraseSink;
if ( 0 != pwcText )
{
out( L"Original text: '%ws'", pwcText );
CPlainTextSource textSource( pwcText, (ULONG) wcslen( pwcText ) );
hr = xWordBreaker->BreakText( &textSource, &wordSink, &phraseSink );
if ( FAILED( hr ) )
{
printf( "can't BreakText() in the wordbreaker: %#x\n", hr );
return hr;
}
}
else
{
out( L"Wordbreaking text from file %ws", pwcInputFile );
// Load the Indexing Service filter (should be fine for testing).
XInterface<IFilter> xIFilter;
hr = LoadIFilter( pwcInputFile, 0, xIFilter.GetQIPointer() );
if ( FAILED( hr ) )
{
// Fall back on the plain text filter.
printf( "Can't load filter, error %#x. Trying text filter.\n",
hr );
hr = g_pLoadTextFilter( pwcInputFile, xIFilter.GetPPointer() );
if ( FAILED( hr ) )
{
printf( "can't load filter, error %#x\n", hr );
return hr;
}
}
// Initialize the filter
ULONG ulFlags = 0;
hr = xIFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS |
IFILTER_INIT_CANON_HYPHENS |
IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,
0,
0,
&ulFlags );
if ( FAILED( hr ) )
{
printf( "can't initialize filter, error %#x\n", hr );
return hr;
}
CFilterTextSource textSource( xIFilter.GetReference() );
hr = xWordBreaker->BreakText( &textSource, &wordSink, &phraseSink );
if ( FAILED( hr ) )
{
printf( "can't BreakText() in the wordbreaker: %#x\n", hr );
return hr;
}
}
return S_OK;
} //WordBreak
//+-------------------------------------------------------------------------
//
// Function: Render
//
// Synopsis: Prints an item in a safearray
//
// Arguments: [vt] - type of the element
// [pa] - pointer to the item
//
//--------------------------------------------------------------------------
void PrintSafeArray( VARTYPE vt, LPSAFEARRAY pa );
void Render( VARTYPE vt, void * pv )
{
if ( VT_ARRAY & vt )
{
PrintSafeArray( (VARTYPE) (vt - VT_ARRAY), *(SAFEARRAY **) pv );
return;
}
switch ( vt )
{
case VT_UI1: outstr( L"%u", (unsigned) *(BYTE *)pv ); break;
case VT_I1: outstr( L"%d", (int) *(CHAR *)pv ); break;
case VT_UI2: outstr( L"%u", (unsigned) *(USHORT *)pv ); break;
case VT_I2: outstr( L"%d", (int) *(SHORT *)pv ); break;
case VT_UI4:
case VT_UINT: outstr( L"%u", (unsigned) *(ULONG *)pv ); break;
case VT_I4:
case VT_ERROR:
case VT_INT: outstr( L"%d", *(LONG *)pv ); break;
case VT_UI8: outstr( L"%I64u", *(unsigned __int64 *)pv ); break;
case VT_I8: outstr( L"%I64d", *(__int64 *)pv ); break;
case VT_R4: outstr( L"%f", *(float *)pv ); break;
case VT_R8: outstr( L"%lf", *(double *)pv ); break;
case VT_DECIMAL:
{
double dbl;
HRESULT hr = VarR8FromDec( (DECIMAL *) pv, &dbl );
if ( SUCCEEDED( hr ) )
outstr( L"%lf", dbl );
break;
}
case VT_CY:
{
double dbl;
HRESULT hr = VarR8FromCy( * (CY *) pv, &dbl );
if ( SUCCEEDED( hr ) )
outstr( L"%lf", dbl );
break;
}
case VT_BOOL: outstr( *(VARIANT_BOOL *)pv ? L"TRUE" : L"FALSE" ); break;
case VT_BSTR: outstr( L"%ws", *(BSTR *) pv ); break;
case VT_VARIANT:
{
PROPVARIANT * pVar = (PROPVARIANT *) pv;
Render( pVar->vt, & pVar->lVal );
break;
}
case VT_DATE:
{
SYSTEMTIME st;
BOOL fOK = VariantTimeToSystemTime( *(DATE *)pv, &st );
if ( !fOK )
break;
BOOL pm = st.wHour >= 12;
if ( st.wHour > 12 )
st.wHour -= 12;
else if ( 0 == st.wHour )
st.wHour = 12;
outstr( L"%2d-%02d-%04d %2d:%02d%wc",
(DWORD) st.wMonth,
(DWORD) st.wDay,
(DWORD) st.wYear,
(DWORD) st.wHour,
(DWORD) st.wMinute,
pm ? L'p' : L'a' );
break;
}
case VT_EMPTY:
case VT_NULL:
break;
default :
{
outstr( L"(vt 0x%x)", (int) vt );
break;
}
}
} //Render
//+-------------------------------------------------------------------------
//
// Function: PrintSafeArray
//
// Synopsis: Prints items in a safearray
//
// Arguments: [vt] - type of elements in the safearray
// [pa] - pointer to the safearray
//
//--------------------------------------------------------------------------
void PrintSafeArray( VARTYPE vt, LPSAFEARRAY pa )
{
// Get the dimensions of the array
UINT cDim = SafeArrayGetDim( pa );
if ( 0 == cDim )
return;
XPtr<LONG> xDim( cDim );
XPtr<LONG> xLo( cDim );
XPtr<LONG> xUp( cDim );
for ( UINT iDim = 0; iDim < cDim; iDim++ )
{
HRESULT hr = SafeArrayGetLBound( pa, iDim + 1, &xLo[iDim] );
if ( FAILED( hr ) )
return;
xDim[ iDim ] = xLo[ iDim ];
hr = SafeArrayGetUBound( pa, iDim + 1, &xUp[iDim] );
if ( FAILED( hr ) )
return;
outstr( L"{" );
}
// slog through the array
UINT iLastDim = cDim - 1;
BOOL fDone = FALSE;
while ( !fDone )
{
// inter-element formatting
if ( xDim[ iLastDim ] != xLo[ iLastDim ] )
outstr( L"," );
// Get the element and render it
void *pv;
HRESULT hr = SafeArrayPtrOfIndex( pa, xDim.Get(), &pv );
if ( FAILED( hr ) )
return;
Render( vt, pv );
// Move to the next element and carry if necessary
ULONG cOpen = 0;
for ( LONG iDim = iLastDim; iDim >= 0; iDim-- )
{
if ( xDim[ iDim ] < xUp[ iDim ] )
{
xDim[ iDim ] = 1 + xDim[ iDim ];
break;
}
outstr( L"}" );
if ( 0 == iDim )
fDone = TRUE;
else
{
cOpen++;
xDim[ iDim ] = xLo[ iDim ];
}
}
for ( ULONG i = 0; !fDone && i < cOpen; i++ )
outstr( L"{" );
}
} //PrintSafeArray
//+-------------------------------------------------------------------------
//
// Function: PrintVectorItems
//
// Synopsis: Prints items in a PROPVARIANT vector
//
// Arguments: [pVal] - The array of values
// [cVals] - The count of values
// [pcFmt] - The format string
//
//--------------------------------------------------------------------------
template<class T> void PrintVectorItems(
T * pVal,
ULONG cVals,
WCHAR const * pwcFmt )
{
outstr( L"{ " );
for( ULONG iVal = 0; iVal < cVals; iVal++ )
{
if ( 0 != iVal )
outstr( L"," );
outstr( pwcFmt, *pVal++ );
}
outstr( L" }" );
} //PrintVectorItems
//+-------------------------------------------------------------------------
//
// Function: DisplayValue
//
// Synopsis: Displays a PROPVARIANT value. Limited formatting is done.
//
// Arguments: [pVar] - The value to display
//
//--------------------------------------------------------------------------
void DisplayValue( PROPVARIANT const * pVar )
{
if ( 0 == pVar )
{
outstr( L"NULL" );
return;
}
// Display the most typical variant types
PROPVARIANT const & v = *pVar;
switch ( v.vt )
{
case VT_EMPTY : break;
case VT_NULL : break;
case VT_I4 : outstr( L"%10d", v.lVal ); break;
case VT_UI1 : outstr( L"%10d", v.bVal ); break;
case VT_I2 : outstr( L"%10d", v.iVal ); break;
case VT_R4 : outstr( L"%10f", v.fltVal ); break;
case VT_R8 : outstr( L"%10lf", v.dblVal ); break;
case VT_BOOL : outstr( v.boolVal ? L"TRUE" : L"FALSE" ); break;
case VT_I1 : outstr( L"%10d", v.cVal ); break;
case VT_UI2 : outstr( L"%10u", v.uiVal ); break;
case VT_UI4 : outstr( L"%10u", v.ulVal ); break;
case VT_INT : outstr( L"%10d", v.lVal ); break;
case VT_UINT : outstr( L"%10u", v.ulVal ); break;
case VT_I8 : outstr( L"%20I64d", v.hVal ); break;
case VT_UI8 : outstr( L"%20I64u", v.hVal ); break;
case VT_ERROR : outstr( L"%#x", v.scode ); break;
case VT_LPSTR : outstr( L"%S", v.pszVal ); break;
case VT_LPWSTR : outstr( L"%ws", v.pwszVal ); break;
case VT_BSTR : outstr( L"%ws", v.bstrVal ); break;
case VT_BLOB :
{
outstr( L"blob cb %u ", v.blob.cbSize );
for ( unsigned x = 0; x < v.blob.cbSize; x++ )
outstr( L" %#x ", v.blob.pBlobData[x] );
break;
}
case VT_CY:
{
double dbl;
HRESULT hr = VarR8FromCy( v.cyVal, &dbl );
if ( SUCCEEDED( hr ) )
outstr( L"%lf", dbl );
break;
}
case VT_DECIMAL :
{
double dbl;
HRESULT hr = VarR8FromDec( (DECIMAL *) &v.decVal, &dbl );
if ( SUCCEEDED( hr ) )
outstr( L"%lf", dbl );
break;
}
case VT_FILETIME :
case VT_DATE :
{
SYSTEMTIME st;
ZeroMemory( &st, sizeof st );
if ( VT_DATE == v.vt )
{
BOOL fOK = VariantTimeToSystemTime( v.date, &st );
if ( !fOK )
break;
}
else
{
FILETIME ft;
BOOL fOK = FileTimeToLocalFileTime( &v.filetime, &ft );
if ( fOK )
FileTimeToSystemTime( &ft, &st );
if ( !fOK )
break;
}
BOOL pm = st.wHour >= 12;
if ( st.wHour > 12 )
st.wHour -= 12;
else if ( 0 == st.wHour )
st.wHour = 12;
outstr( L"%2d-%02d-%04d %2d:%02d%wc",
(DWORD) st.wMonth,
(DWORD) st.wDay,
(DWORD) st.wYear,
(DWORD) st.wHour,
(DWORD) st.wMinute,
pm ? L'p' : L'a' );
break;
}
case VT_VECTOR | VT_I1:
PrintVectorItems( v.cac.pElems, v.cac.cElems, L"%d" ); break;
case VT_VECTOR | VT_I2:
PrintVectorItems( v.cai.pElems, v.cai.cElems, L"%d" ); break;
case VT_VECTOR | VT_I4:
PrintVectorItems( v.cal.pElems, v.cal.cElems, L"%d" ); break;
case VT_VECTOR | VT_I8:
PrintVectorItems( v.cah.pElems, v.cah.cElems, L"%I64d" ); break;
case VT_VECTOR | VT_UI1:
PrintVectorItems( v.caub.pElems, v.caub.cElems, L"%u" ); break;
case VT_VECTOR | VT_UI2:
PrintVectorItems( v.caui.pElems, v.caui.cElems, L"%u" ); break;
case VT_VECTOR | VT_UI4:
PrintVectorItems( v.caul.pElems, v.caul.cElems, L"%u" ); break;
case VT_VECTOR | VT_ERROR:
PrintVectorItems( v.cascode.pElems, v.cascode.cElems, L"%#x" ); break;
case VT_VECTOR | VT_UI8:
PrintVectorItems( v.cauh.pElems, v.cauh.cElems, L"%I64u" ); break;
case VT_VECTOR | VT_BSTR:
PrintVectorItems( v.cabstr.pElems, v.cabstr.cElems, L"%ws" ); break;
case VT_VECTOR | VT_LPSTR:
PrintVectorItems( v.calpstr.pElems, v.calpstr.cElems, L"%S" ); break;
case VT_VECTOR | VT_LPWSTR:
PrintVectorItems( v.calpwstr.pElems, v.calpwstr.cElems, L"%ws" ); break;
case VT_VECTOR | VT_R4:
PrintVectorItems( v.caflt.pElems, v.caflt.cElems, L"%f" ); break;
case VT_VECTOR | VT_R8:
PrintVectorItems( v.cadbl.pElems, v.cadbl.cElems, L"%lf" ); break;
default :
{
if ( VT_ARRAY & v.vt )
PrintSafeArray( (VARTYPE) ( v.vt - VT_ARRAY ), v.parray );
else
outstr( L"vt 0x%05x", v.vt );
break;
}
}
} //DisplayValue
//+---------------------------------------------------------------------------
//
// Function: Filter
//
// Purpose: Invokes an IFilter on a file
//
// Arguments: [pwcInputFile] -- Filename to be filtered
// [filterLoad] -- How to load the file into the filter.
// [pwcModule] -- Optional module name to override COM lookup.
// [pCLSID] -- Optional class ID of the filter to use.
// Required if pwcModule is specified.
// [fShowStatusInfo] -- TRUE to get other information
// FALSE for only output from the filter
// [fGetText] -- TRUE to retrieve text, FALSE to skip it
//
//----------------------------------------------------------------------------
HRESULT Filter(
WCHAR const * pwcInputFile,
enumFilterLoadMechanism filterLoad,
WCHAR const * pwcModule,
CLSID * pCLSID,
BOOL fShowStatusInfo,
BOOL fGetText )
{
XInterface<IFilter> xFilter;
HRESULT hr = S_OK;
if ( 0 != pwcModule )
{
// If the DLL is specified, use it
if ( fShowStatusInfo )
out( L"loading filter based on module name" );
hr = CreateFromModule( *pCLSID,
IID_IFilter,
xFilter.GetQIPointer(),
pwcModule,
fShowStatusInfo );
}
else if ( 0 != pCLSID )
{
// If we just have a CLSID and no module, use it
if ( fShowStatusInfo )
out( L"loading filter based on CLSID and the registry" );
#ifdef USE_FAKE_COM
hr = FakeCoCreateInstance( *pCLSID,
IID_IFilter,
xFilter.GetQIPointer(),
fShowStatusInfo );
#else
hr = CoCreateInstance( *pCLSID,
0,
CLSCTX_INPROC_SERVER,
IID_IFilter,
xFilter.GetQIPointer() );
#endif
}
else
{
// Use Indexing Service to load the filter
if ( fShowStatusInfo )
out( L"loading filter based on Indexing Service's LoadIFilter()" );
hr = LoadIFilter( pwcInputFile, 0, xFilter.GetQIPointer() );
if ( SUCCEEDED( hr ) && fShowStatusInfo )
{
// Dereference the VTable to get a pointer into the DLL
HMODULE hMod = GetModuleOfAddress( * (void **) xFilter.GetPointer() );
if ( 0 != hMod )
DisplayModuleInformation( hMod );
}
}
if ( FAILED( hr ) )
{
printf( "can't load the filter: %#x\n", hr );
return hr;
}
// Does the filter support IPersistStorage?
XInterface<IStorage> xStorage;
XInterface<IPersistStorage> xPersistStorage;
hr = xFilter->QueryInterface( IID_IPersistStorage,
xPersistStorage.GetQIPointer() );
if ( FAILED( hr ) )
{
if ( fShowStatusInfo )
out( L" filter doesn't support IPersistStorage, error %#x", hr );
if ( eIPersistStorage == filterLoad )
return hr;
}
else
{
if ( fShowStatusInfo )
out( L" filter supports IPersistStorage" );
if ( eIPersistStorage == filterLoad )
{
if ( fShowStatusInfo )
out( L" loading via IPersistStorage" );
hr = StgOpenStorage( pwcInputFile,
0,
STGM_READ | STGM_SHARE_DENY_WRITE,
0,
0,
xStorage.GetPPointer() );
if ( FAILED( hr ) )
{
printf( "can't open the file into a storage %#x\n", hr );
return hr;
}
hr = xPersistStorage->Load( xStorage.GetPointer() );
if ( FAILED( hr ) )
{
printf( "can't Load() the storage into the filter %#x\n", hr );
return hr;
}
}
}
xPersistStorage.Free();
// Does the filter support IPersistStream?
XInterface<CIStream> xStream;
XInterface<IPersistStream> xPersistStream;
hr = xFilter->QueryInterface( IID_IPersistStream,
xPersistStream.GetQIPointer() );
if ( FAILED( hr ) )
{
if ( fShowStatusInfo )
out( L" filter doesn't support IPersistStream, error %#x", hr );
if ( eIPersistStream == filterLoad )
return hr;
}
else
{
if ( fShowStatusInfo )
out( L" filter supports IPersistStream" );
if ( eIPersistStream == filterLoad )
{
if ( fShowStatusInfo )
out( L" loading via IPersistStream" );
xStream.Set( new CIStream() );
hr = xStream->Open( pwcInputFile );
if ( FAILED( hr ) )
{
printf( "can't open the file into a stream %#x\n", hr );
return hr;
}
hr = xPersistStream->Load( xStream.GetPointer() );
if ( FAILED( hr ) )
{
printf( "can't Load() the stream into the filter %#x\n", hr );
return hr;
}
}
}
xPersistStream.Free();
// Does the filter support IPersistFile?
XInterface<IPersistFile> xPersistFile;
hr = xFilter->QueryInterface( IID_IPersistFile,
xPersistFile.GetQIPointer() );
if ( FAILED( hr ) )
{
if ( fShowStatusInfo )
out( L"filter doesn't support IPersistFile, error %#x\n", hr );
if ( eIPersistFile == filterLoad )
return hr;
}
else
{
if ( fShowStatusInfo )
out( L" filter supports IPersistFile" );
if ( eIPersistFile == filterLoad )
{
if ( fShowStatusInfo )
out( L" loading via IPersistFile" );
hr = xPersistFile->Load( pwcInputFile,
STGM_READ | STGM_SHARE_DENY_NONE );
if ( FAILED( hr ) )
{
printf( "can't Load() the file into the filter %#x\n", hr );
return hr;
}
}
}
xPersistFile.Free();
// Initailize the IFilter
ULONG ulFlags = 0;
hr = xFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS |
IFILTER_INIT_HARD_LINE_BREAKS |
IFILTER_INIT_CANON_HYPHENS |
IFILTER_INIT_CANON_SPACES |
IFILTER_INIT_INDEXING_ONLY |
IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,
0,
0,
&ulFlags );
if ( FAILED( hr ) )
{
printf( "can't Init() the filter, error %#x\n", hr );
return hr;
}
if ( fShowStatusInfo )
out( L" flags returned from IFilter::Init(): %#x", ulFlags );
// Pull all the data out of the filter
BOOL fText;
STAT_CHUNK StatChunk;
StatChunk.attribute.psProperty.ulKind = PRSPEC_PROPID;
do
{
const ULONG cwcMaxBuffer = 1024;
WCHAR awcBuffer[ cwcMaxBuffer ];
hr = xFilter->GetChunk( &StatChunk );
if ( FILTER_E_EMBEDDING_UNAVAILABLE == hr )
{
if ( fShowStatusInfo )
out( L"[-- encountered an embedding for which no filter is available --]" );
continue;
}
if ( FILTER_E_LINK_UNAVAILABLE == hr )
{
if ( fShowStatusInfo )
out( L"[-- encountered a link for which no filter is available --]" );
continue;
}
if ( FAILED( hr ) && hr != FILTER_E_END_OF_CHUNKS )
{
out( L"GetChunk returned error %#x", hr );
break;
}
if ( FILTER_E_END_OF_CHUNKS == hr )
break;
fText = ( CHUNK_TEXT == StatChunk.flags );
// Display information about the chunk
if ( fShowStatusInfo )
{
out( L"" );
out( L"----------------------------------------------------------------------" );
outstr( L" attribute: %08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
StatChunk.attribute.guidPropSet.Data1,
StatChunk.attribute.guidPropSet.Data2,
StatChunk.attribute.guidPropSet.Data3,
StatChunk.attribute.guidPropSet.Data4[0],
StatChunk.attribute.guidPropSet.Data4[1],
StatChunk.attribute.guidPropSet.Data4[2],
StatChunk.attribute.guidPropSet.Data4[3],
StatChunk.attribute.guidPropSet.Data4[4],
StatChunk.attribute.guidPropSet.Data4[5],
StatChunk.attribute.guidPropSet.Data4[6],
StatChunk.attribute.guidPropSet.Data4[7] );
if ( StatChunk.attribute.psProperty.ulKind == PRSPEC_PROPID )
out( L" %d (%#x)",
StatChunk.attribute.psProperty.propid,
StatChunk.attribute.psProperty.propid );
else
out( L" \"%ws\"", StatChunk.attribute.psProperty.lpwstr );
out( L" idChunk: %d (%#x)", StatChunk.idChunk, StatChunk.idChunk );
outstr( L" breakType: %d (%#x)", StatChunk.breakType, StatChunk.breakType );
switch ( StatChunk.breakType )
{
case CHUNK_NO_BREAK: out( L" (no break) " ); break;
case CHUNK_EOW: out( L" (end of word) " ); break;
case CHUNK_EOS: out( L" (end of sentence) " ); break;
case CHUNK_EOP: out( L" (end of paragraph) " ); break;
case CHUNK_EOC: out( L" (end of chapter) " ); break;
default : out( L" (unknown break type) " ); break;
}
outstr( L" flags: %d (%#x)", StatChunk.flags, StatChunk.flags );
if ( CHUNK_TEXT & StatChunk.flags )
out( L" (text) " );
if ( CHUNK_VALUE & StatChunk.flags )
out( L" (value) " );
out( L" locale: %d (%#x)", StatChunk.locale, StatChunk.locale );
out( L" idChunkSource: %d (%#x)",
StatChunk.idChunkSource,
StatChunk.idChunkSource );
out( L" cwcStartSource: %d (%#x)",
StatChunk.cwcStartSource,
StatChunk.cwcStartSource );
out( L" cwcLenSource: %d (%#x)",
StatChunk.cwcLenSource,
StatChunk.cwcLenSource );
out( L" ------------------------------------------" );
}
if ( !fGetText )
continue;
// Retrieve all the data in the chunk
do
{
if ( fText )
{
ULONG cwcBuffer = cwcMaxBuffer;
hr = xFilter->GetText( &cwcBuffer, awcBuffer );
if ( FAILED( hr ) && ( FILTER_E_NO_MORE_TEXT != hr ) )
{
out( L"error %#x from GetText\n", hr );
return hr;
}
if ( FILTER_E_NO_MORE_TEXT == hr )
break;
awcBuffer[cwcBuffer] = 0;
out( L"%ws", awcBuffer );
if ( g_fDumpAsHex )
{
out( L"<--------> %d WCHARs in hex <-------->", cwcBuffer );
DumpStringAsHex( awcBuffer, cwcBuffer );
}
}
else
{
PROPVARIANT * pPropValue = 0;
hr = xFilter->GetValue( &pPropValue );
if ( FAILED( hr ) )
{
if ( ( FILTER_E_NO_MORE_VALUES == hr ) ||
( FILTER_E_NO_VALUES == hr ) )
break;
out( L"GetValue failed, error %#x\n", hr );
return hr;
}
if ( fShowStatusInfo )
out( L"[-- variant type %d (%#x) --]", pPropValue->vt, pPropValue->vt );
DisplayValue( pPropValue );
out( L"" );
if ( 0 != pPropValue )
{
PropVariantClear( pPropValue );
CoTaskMemFree( pPropValue );
pPropValue = 0;
}
}
} while( TRUE ); // data in a chunk
} while( TRUE ); // for each chunk
if ( fShowStatusInfo )
{
out( L"" );
out( L"======================================================================" );
out( L"Filtering completed" );
}
xStream.Free();
xStorage.Free();
xFilter.Free();
// Now see if the file handle is still being locked by the filter
HANDLE hFile = CreateFile( pwcInputFile,
GENERIC_READ,
0, //no sharing
0,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
0 );
if ( INVALID_HANDLE_VALUE == hFile )
{
out( L"Filter didn't release file; can't open %ws, error %#x\n", pwcInputFile, GetLastError() );
return HRESULT_FROM_WIN32( GetLastError() );
}
out( L"Filter closed file properly when released\n" );
CloseHandle( hFile );
return S_OK;
} //Filter
//+-------------------------------------------------------------------------
//
// Function: GetQueryFunctions
//
// Synopsis: Loads needed undocumented functions from query.dll.
//
// Returns: The module handle or 0 on failure.
//
//--------------------------------------------------------------------------
HINSTANCE GetQueryFunctions()
{
HINSTANCE h = LoadLibrary( L"query.dll" );
if ( 0 != h )
{
#ifdef _WIN64
char const * pcCIShutdown = "?CIShutdown@@YAXXZ";
#else
char const * pcCIShutdown = "?CIShutdown@@YGXXZ";
#endif
g_pCIShutdown = (PFnCIShutdown) GetProcAddress( h, pcCIShutdown );
if ( 0 == g_pCIShutdown )
{
printf( "can't get CIShutdown function address\n" );
FreeLibrary( h );
return 0;
}
g_pLoadTextFilter = (PFnLoadTextFilter)
GetProcAddress( h, "LoadTextFilter" );
if ( 0 == g_pLoadTextFilter )
{
printf( "can't get LoadTextFilter function address\n" );
FreeLibrary( h );
return 0;
}
}
return h;
} //GetQueryFunctions
//+-------------------------------------------------------------------------
//
// Function: ExceptionFilter
//
// Synopsis: Displays information about the exception
//
// Arguments: [pep] -- Exception pointers
//
// Returns: EXCEPTION_EXECUTE_HANDLER
//
//--------------------------------------------------------------------------
int ExceptionFilter( EXCEPTION_POINTERS * pep )
{
printf( "fatal exception caught\n" );
EXCEPTION_RECORD & r = * ( pep->ExceptionRecord );
printf( " exception code: %#x\n", r.ExceptionCode );
printf( " exception address %#p\n", r.ExceptionAddress );
if ( ( EXCEPTION_ACCESS_VIOLATION == r.ExceptionCode ) &&
( r.NumberParameters >= 2 ) )
{
printf( " attempted %ws at address %#p\n",
( 0 == r.ExceptionInformation[0] ) ?
L"read" : L"write",
(void *) r.ExceptionInformation[1] );
}
#ifdef _X86_
CONTEXT & c = * (CONTEXT *) (pep->ContextRecord );
if ( 0 != ( c.ContextFlags & CONTEXT_INTEGER ) )
{
printf( " eax: %#x\n", c.Eax );
printf( " ebx: %#x\n", c.Ebx );
printf( " ecx: %#x\n", c.Ecx );
printf( " edx: %#x\n", c.Edx );
printf( " edi: %#x\n", c.Edi );
printf( " esi: %#x\n", c.Esi );
}
if ( 0 != ( c.ContextFlags & CONTEXT_CONTROL ) )
{
printf( " ebp: %#x\n", c.Ebp );
printf( " eip: %#x\n", c.Eip );
printf( " esp: %#x\n", c.Esp );
}
#endif // _X86_
// Attempt to get the module name where the exception happened
HMODULE hMod = GetModuleOfAddress( r.ExceptionAddress );
if ( 0 != hMod )
{
WCHAR awcPath[ MAX_PATH ];
DWORD cwc= GetModuleFileName( hMod,
awcPath,
ArraySize( awcPath ) );
awcPath[ ArraySize( awcPath ) - 1 ] = 0;
if ( 0 != cwc )
printf( " exception in module %ws\n", awcPath );
}
return EXCEPTION_EXECUTE_HANDLER;
} //ExceptionFilter
//+-------------------------------------------------------------------------
//
// Function: wmain
//
// Synopsis: Main entrypoint for the program
//
// Arguments: [argc] -- Count of command-line arguments
// [argv] -- The command-line arguments
//
// Returns: Application return code
//
//--------------------------------------------------------------------------
extern "C" int __cdecl wmain( int argc, WCHAR * argv[] )
{
// Parse the command-line arguments
BOOL fWordBreak = FALSE;
BOOL fQuery = FALSE;
BOOL fStem = FALSE;
BOOL fFilter = FALSE;
BOOL fGetText = TRUE;
BOOL fShowStatusInfo = TRUE;
enumFilterLoadMechanism filterLoad = eIPersistFile;
WCHAR const * pwcModule = 0;
WCHAR const * pwcInputFile = 0;
WCHAR const * pwcOutputFile = 0;
WCHAR *pwcText = 0;
WCHAR const * pwcCLSID = 0;
ULONG cwcMaxToken = 100;
for ( int i = 1; i < argc; i++ )
{
if ( L'-' == argv[i][0] || L'/' == argv[i][0] )
{
WCHAR wc = towupper( argv[i][1] );
if ( ':' != argv[i][2] &&
'B' != wc &&
'D' != wc &&
'F' != wc &&
'T' != wc &&
'N' != wc &&
'Q' != wc &&
'S' != wc )
Usage();
if ( 'C' == wc )
pwcCLSID = argv[i] + 3;
else if ( 'D' == wc )
g_fDumpAsHex = TRUE;
else if ( 'I' == wc )
{
if ( 0 != pwcText )
Usage();
pwcInputFile = argv[i] + 3;
}
else if ( 'M' == wc )
pwcModule = argv[i] + 3;
else if ( 'N' == wc )
fShowStatusInfo = FALSE;
else if ( 'O' == wc )
pwcOutputFile = argv[i] + 3;
else if ( 'S' == wc )
fStem = TRUE;
else if ( 'T' == wc )
fGetText = FALSE;
else if ( 'B' == wc )
fWordBreak = TRUE;
else if ( 'F' == wc )
{
fFilter = TRUE;
WCHAR wcNext = towupper( argv[i][2] );
if ( L'S' == wcNext )
filterLoad = eIPersistStream;
else if ( L'T' == wcNext )
filterLoad = eIPersistStorage;
else if ( 0 != wcNext )
Usage();
}
else if ( 'Q' == wc )
fQuery = TRUE;
else if ( 'X' == wc )
cwcMaxToken = _wtoi( argv[i] + 3 );
else
Usage();
}
else if ( 0 != pwcText || 0 != pwcInputFile )
Usage();
else
pwcText = argv[i];
}
// We have to either wordbreak, stem, or filter
if ( ( fWordBreak + fStem + fFilter ) != 1 )
Usage();
// We need the classid of the wordbreaker or stemmer to load
if ( ( fWordBreak || fStem ) && ( 0 == pwcCLSID ) )
Usage();
// If we're loading by module, we need a CLSID
if ( ( 0 != pwcModule ) && ( 0 == pwcCLSID ) )
Usage();
// Need input text or an input file to wordbreak
if ( fWordBreak && ( 0 == pwcText ) && ( 0 == pwcInputFile ) )
Usage();
// Need input text to stem
if ( fStem && ( 0 == pwcText ) )
Usage();
// Need input file to filter
if ( fFilter && ( 0 == pwcInputFile ) )
Usage();
CLSID clsid;
if ( 0 != pwcCLSID )
{
HRESULT hr = CLSIDFromString( (LPOLESTR) pwcCLSID, &clsid );
if ( FAILED( hr ) )
{
printf( "can't convert CLSID string to a CLSID: %#x\n", hr );
exit( 1 );
}
}
// Get the full path of the input file, if specified
WCHAR awcPath[MAX_PATH];
if ( 0 != pwcInputFile )
{
_wfullpath( awcPath, pwcInputFile, MAX_PATH );
pwcInputFile = awcPath;
}
// Get the full path of the output file, if specified, then open it
WCHAR awcOutputPath[MAX_PATH];
if ( 0 != pwcOutputFile )
{
_wfullpath( awcOutputPath, pwcOutputFile, MAX_PATH );
pwcOutputFile = awcOutputPath;
g_fpOut = _wfopen( pwcOutputFile, L"wb" );
if ( 0 == g_fpOut )
{
printf( "unable to open output file '%ws'\n", pwcOutputFile );
exit( 1 );
}
const WCHAR awcUnicodeHeader[] = { 0xfeff, 0x0000 };
fwprintf( g_fpOut, awcUnicodeHeader );
}
// Initialize COM multi-threaded, just like search products do
HRESULT hr = CoInitializeEx( 0, COINIT_MULTITHREADED );
if ( FAILED( hr ) )
{
printf( "can't initialize com: %#x\n", hr );
exit( 1 );
}
// Load query.dll private exports
HINSTANCE hQuery = GetQueryFunctions();
if ( 0 == hQuery )
{
printf( "can't load needed functions from query.dll\n" );
exit( 1 );
}
// Do the work
__try
{
if ( fStem )
Stem( pwcText,
pwcModule,
clsid,
cwcMaxToken );
if ( fWordBreak )
WordBreak( fQuery,
pwcText,
pwcInputFile,
pwcModule,
clsid,
cwcMaxToken );
if ( fFilter )
Filter( pwcInputFile,
filterLoad,
pwcModule,
( 0 == pwcCLSID ) ? 0 : &clsid,
fShowStatusInfo,
fGetText );
}
__except( ExceptionFilter( GetExceptionInformation() ) )
{
printf( "fatal exception code %#x\n", GetExceptionCode() );
exit( -1 );
}
// Shut down query.dll's filter loading code so it won't AV on exit.
g_pCIShutdown();
FreeLibrary( hQuery );
CoUninitialize();
if ( 0 != g_fpOut )
{
fclose( g_fpOut );
g_fpOut = 0;
}
return 0;
} //wmain