//+------------------------------------------------------------------------- // // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A // PARTICULAR PURPOSE. // // Copyright (c) Microsoft Corporation. All rights reserved. // // PROGRAM: lrtest.cxx // // Test program for invoking language resources including wordbreakers // and stemmers. Also invokes filters. // // PLATFORM: Windows // //-------------------------------------------------------------------------- #ifndef UNICODE #define UNICODE #endif #define _OLE32_ #include #include #include #include #include #include #include #include #include #include #include #include "minici.hxx" #define USE_FAKE_COM // // These are undocumented Indexing Service functions, but they're needed // to load filters and not crash, and to load the plain text filter. // typedef void (__stdcall * PFnCIShutdown)( void ); typedef HRESULT (__stdcall * PFnLoadTextFilter)( WCHAR const * pwcPath, IFilter ** ppIFilter ); PFnCIShutdown g_pCIShutdown = 0; PFnLoadTextFilter g_pLoadTextFilter = 0; // If this is non-zero, it's a file handle to which output is streamed FILE * g_fpOut = 0; // If TRUE, strings from wordbreakers and stemmers are dumped in hex BOOL g_fDumpAsHex = FALSE; enum enumFilterLoadMechanism { eIPersistFile, eIPersistStream, eIPersistStorage }; //+------------------------------------------------------------------------- // // Function: out // // Synopsis: Like printf, only will send output to the output file if // specified, or just to the console. Appends a carriage // return / line feed to the text. // // Arguments: [pwcFormat] -- Characters whose type information is checked // [...] -- Variable arguments // // Returns: count of characters emitted. // //-------------------------------------------------------------------------- int out( const WCHAR * pwcFormat, ... ) { va_list arglist; va_start( arglist, pwcFormat ); // Writing to the output file is done in binary mode so the output can be // Unicode. The side-effect is that "\n" isn't translated into "\r\n" // automatically, so it has to be explicit. int i; if ( 0 != g_fpOut ) { i = vfwprintf( g_fpOut, pwcFormat, arglist ); i += fwprintf( g_fpOut, L"\r\n" ); } else { i = vwprintf( pwcFormat, arglist ); i += wprintf( L"\n" ); } va_end( arglist ); return i; } //out //+------------------------------------------------------------------------- // // Function: outstr // // Synopsis: Like printf, only will send output to the output file if // specified, or just to the console. // // Arguments: [pwcFormat] -- Characters whose type information is checked // [...] -- Variable arguments // // Returns: count of characters emitted. // //-------------------------------------------------------------------------- int outstr( const WCHAR * pwcFormat, ... ) { va_list arglist; va_start( arglist, pwcFormat ); int i; if ( 0 != g_fpOut ) i = vfwprintf( g_fpOut, pwcFormat, arglist ); else i = vwprintf( pwcFormat, arglist ); va_end( arglist ); return i; } //outstr //+------------------------------------------------------------------------- // // Function: Usage // // Synopsis: Displays usage information about the application, then exits. // //-------------------------------------------------------------------------- void Usage() { printf( "usage: lrtest [/d] [/b] [/f] [/q] [/s] [/x:#] /c:clsid [/o:file] [/i:file] [text]\n" ); printf( "\n" ); printf( " Language Resource test program\n" ); printf( "\n" ); printf( " arguments:\n" ); printf( " /b Load the wordbreaker (can't be used with /s or /f)\n" ); printf( " /c: CLSID of the wordbreaker or stemmer to load\n" ); printf( " /d Dumps output strings in hex as well as strings\n" ); printf( " /f Load the filter (can't be used with /b or /s)\n" ); printf( " If /c isn't specified, use Indexing Service's LoadIFilter\n" ); printf( " /fs Same as /f, but uses IPersistStream, not IPersistFile\n" ); printf( " /ft Same as /f, but uses IPersistStorage, not IPersistFile\n" ); printf( " /i: Path of an input file, if [text] isn't specified\n" ); printf( " /m: Optional path of the dll to load. Overrides COM CLSID lookup\n" ); printf( " /n No status information. Used with /f, only displays filter output\n" ); printf( " /o: Path of an output file. If not specified, console is used\n" ); printf( " /q If wordbreaking, do so for query instead of indexing\n" ); printf( " /s Load the stemmer (can't be used with /b or /f)\n" ); printf( " /t No text information; just chunks. Used with /f\n" ); printf( " /x:# Maximum token size, default is 100\n" ); printf( " text Text to wordbreak or stem, if /i: isn't specified\n" ); printf( "\n" ); printf( " examples:\n" ); printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} \"Alice's restaurant\"\n" ); printf( " lrtest /b /q /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} \"data-base\"\n" ); printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} /i:foo.doc\n" ); printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} /m:wb.dll /i:foo.doc\n" ); printf( " lrtest /d /s /c:{eeed4c20-7f1b-11ce-be57-00aa0051fe20} peach /o:output.txt\n" ); printf( " lrtest /f /c:{f07f3920-7b8c-11cf-9be8-00aa004b9986} /i:foo.doc\n" ); printf( " lrtest /f /i:foo.doc\n" ); printf( " lrtest /fs /i:foo.doc\n" ); printf( "\n" ); exit( 1 ); } //Usage //+------------------------------------------------------------------------- // // Function: GetModuleOfAddress // // Synopsis: Returns the module handle of a given address or 0 // // Arguments: [pAddress] -- Address in one of the modules loaded // //-------------------------------------------------------------------------- HMODULE GetModuleOfAddress( void * pAddress ) { DWORD cbNeeded; BOOL fOK = EnumProcessModules( GetCurrentProcess(), 0, 0, &cbNeeded ); if ( fOK ) { ULONG cModules = cbNeeded / sizeof HMODULE; XPtr aModules( cModules ); fOK = EnumProcessModules( GetCurrentProcess(), aModules.Get(), cbNeeded, &cbNeeded ); if ( fOK ) { for ( ULONG i = 0; i < cModules; i++ ) { MODULEINFO mi; GetModuleInformation( GetCurrentProcess(), aModules[ i ], &mi, sizeof mi ); if ( ( pAddress >= mi.lpBaseOfDll ) && ( pAddress < ( (BYTE *) mi.lpBaseOfDll + mi.SizeOfImage ) ) ) { return aModules[i]; } } } } return 0; } //GetModuleOfAddress //+------------------------------------------------------------------------- // // Function: DumpStringAsHex // // Synopsis: Emits a string in hex format. Useful for East Asian languages. // //-------------------------------------------------------------------------- void DumpStringAsHex( WCHAR const * pwc, ULONG cwc ) { if ( g_fDumpAsHex ) { for ( ULONG i = 0; i < cwc; i++ ) { if ( 0 != i ) outstr( L" " ); outstr( L"%#x", pwc[ i ] ); } out( L"" ); } } //DumpStringAsHex //+--------------------------------------------------------------------------- // // Class: CIStream // // Purpose: Wraps a file with an IStream. // //---------------------------------------------------------------------------- class CIStream : public IStream { public: CIStream() : _hFile( INVALID_HANDLE_VALUE ), _cRef( 1 ), _lOffset( 0 ), _cbData( 0 ) { } ~CIStream() { Free(); } void Free() { if ( INVALID_HANDLE_VALUE != _hFile ) { CloseHandle( _hFile ); _hFile = INVALID_HANDLE_VALUE; } } HRESULT Open( WCHAR const * pwcFile ) { Free(); _hFile = CreateFile( pwcFile, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0 ); if ( INVALID_HANDLE_VALUE == _hFile ) return HRESULT_FROM_WIN32( GetLastError() ); _cbData = GetFileSize( _hFile, 0 ); return S_OK; } HRESULT STDMETHODCALLTYPE QueryInterface( REFIID riid, void ** ppvObj ) { if ( 0 == ppvObj ) return E_INVALIDARG; *ppvObj = 0; if ( IID_IStream == riid ) *ppvObj = (IStream *) this; else if ( IID_IUnknown == riid ) *ppvObj = (IUnknown *) this; else return E_NOINTERFACE; AddRef(); return S_OK; } ULONG STDMETHODCALLTYPE AddRef() { return InterlockedIncrement( &_cRef ); } ULONG STDMETHODCALLTYPE Release() { unsigned long uTmp = InterlockedDecrement( &_cRef ); if ( 0 == uTmp ) delete this; return uTmp; } HRESULT STDMETHODCALLTYPE Read( void * pv, ULONG cb, ULONG * pcbRead ) { DWORD dwOff = SetFilePointer( _hFile, _lOffset, 0, FILE_BEGIN ); if ( INVALID_SET_FILE_POINTER == dwOff ) return HRESULT_FROM_WIN32( GetLastError() ); BOOL f = ReadFile( _hFile, pv, cb, pcbRead, 0 ); if ( !f ) return HRESULT_FROM_WIN32( GetLastError() ); return S_OK; } HRESULT STDMETHODCALLTYPE Write( VOID const * pv, ULONG cb, ULONG * pcbWritten ) { return E_NOTIMPL; } HRESULT STDMETHODCALLTYPE Seek( LARGE_INTEGER dlibMoveIn, DWORD dwOrigin, ULARGE_INTEGER * plibNewPosition ) { HRESULT hr = S_OK; LONG dlibMove = dlibMoveIn.LowPart; ULONG cbNewPos = dlibMove; switch(dwOrigin) { case STREAM_SEEK_SET: if (dlibMove >= 0) _lOffset = dlibMove; else hr = STG_E_SEEKERROR; break; case STREAM_SEEK_CUR: if (!(dlibMove < 0 && ( -dlibMove > _lOffset))) _lOffset += (ULONG) dlibMove; else hr = STG_E_SEEKERROR; break; case STREAM_SEEK_END: if (!(dlibMove < 0 )) _lOffset = _cbData + dlibMove; else hr = STG_E_SEEKERROR; break; default: hr = STG_E_SEEKERROR; } if ( 0 != plibNewPosition ) ULISet32(*plibNewPosition, _lOffset); return hr; } HRESULT STDMETHODCALLTYPE SetSize( ULARGE_INTEGER cb ) { return E_NOTIMPL; } HRESULT STDMETHODCALLTYPE CopyTo( IStream * pstm, ULARGE_INTEGER cb, ULARGE_INTEGER * pcbRead, ULARGE_INTEGER * pcbWritten ) { return E_NOTIMPL; } HRESULT STDMETHODCALLTYPE Commit( DWORD grfCommitFlags ) { return S_OK; } HRESULT STDMETHODCALLTYPE Revert() { return S_OK; } HRESULT STDMETHODCALLTYPE LockRegion( ULARGE_INTEGER libOffset, ULARGE_INTEGER cb, DWORD dwLockType ) { return STG_E_INVALIDFUNCTION; } HRESULT STDMETHODCALLTYPE UnlockRegion( ULARGE_INTEGER libOffset, ULARGE_INTEGER cb, DWORD dwLockType) { return STG_E_INVALIDFUNCTION; } HRESULT STDMETHODCALLTYPE Stat( STATSTG * pstatstg, DWORD statflag ) { memset( pstatstg, 0, sizeof STATSTG ); pstatstg->type = STGTY_STREAM; pstatstg->cbSize.QuadPart = _cbData; pstatstg->grfMode = STGM_READ; return S_OK; } HRESULT STDMETHODCALLTYPE Clone( IStream ** ppstm ) { return E_NOTIMPL; } private: LONG _cRef; HANDLE _hFile; LONG _lOffset; LONG _cbData; }; //+--------------------------------------------------------------------------- // // Class: CPlainTextSource // // Purpose: Takes a simple buffer and provides a TEXT_SOURCE for it, which // can be passed to wordbreakers. // //---------------------------------------------------------------------------- class CPlainTextSource : public TEXT_SOURCE { public: CPlainTextSource( WCHAR const * pwcText, ULONG cwc ) { awcBuffer = pwcText; iCur = 0; iEnd = cwc; pfnFillTextBuffer = PlainFillBuf; } static HRESULT __stdcall PlainFillBuf( TEXT_SOURCE * pTextSource ) { return WBREAK_E_END_OF_TEXT; } }; //+--------------------------------------------------------------------------- // // Class: CFilterTextSource // // Purpose: Takes an IFilter and provides a TEXT_SOURCE for it, which // can be passed to wordbreakers. // //---------------------------------------------------------------------------- #pragma warning(disable: 4512) class CFilterTextSource : public TEXT_SOURCE { public: CFilterTextSource( IFilter & filter ) : _filter( filter ), _hr( S_OK ) { awcBuffer = _awcBuffer; iCur = 0; iEnd = 0; pfnFillTextBuffer = FilterFillBuf; // Get the first chunk _hr = _filter.GetChunk( &_Stat ); // Get text for the chunk FillBuf(); } static HRESULT __stdcall FilterFillBuf( TEXT_SOURCE * pTextSource ) { CFilterTextSource & This = * (CFilterTextSource *) pTextSource; return This.FillBuf(); } private: HRESULT FillBuf() { // Never continue past an error condition except FILTER_E_NO_MORE_TEXT if ( FAILED( _hr ) && _hr != FILTER_E_NO_MORE_TEXT ) return _hr; if ( iCur > iEnd ) { out( L"TEXT_SOURCE iCur (%#x) > iEnd (%#x), this is incorrect\n", iCur, iEnd ); _hr = E_INVALIDARG; return _hr; } // Move any existing text to beginning of buffer. ULONG ccLeftOver = iEnd - iCur; if ( ccLeftOver > 0 ) MoveMemory( _awcBuffer, &_awcBuffer[iCur], ccLeftOver * sizeof WCHAR ); iCur = 0; iEnd = ccLeftOver; ULONG ccRead = BufferWChars() - ccLeftOver; const ULONG BUFFER_SLOP = 10; // // Get some more text. If *previous* call to GetText returned // FILTER_S_LAST_TEXT, or FILTER_E_NO_MORE_TEXT then don't even // bother trying. // if ( FILTER_S_LAST_TEXT == _hr || FILTER_E_NO_MORE_TEXT == _hr ) _hr = FILTER_E_NO_MORE_TEXT; else { _hr = _filter.GetText( &ccRead, &_awcBuffer[ccLeftOver] ); if ( SUCCEEDED( _hr ) ) { iEnd += ccRead; ccLeftOver += ccRead; ccRead = BufferWChars() - ccLeftOver; while ( ( S_OK == _hr ) && ( ccRead > BUFFER_SLOP ) ) { // Attempt to fill in as much of buffer as possible _hr = _filter.GetText( &ccRead, &_awcBuffer[ccLeftOver] ); if ( SUCCEEDED( _hr ) ) { iEnd += ccRead; ccLeftOver += ccRead; ccRead = BufferWChars() - ccLeftOver; } } // // Either return FILTER_S_LAST_TEXT or return S_OK because we // have succeeded in adding text to the buffer. // if ( FILTER_S_LAST_TEXT == _hr ) return FILTER_S_LAST_TEXT; return S_OK; } if ( ( FILTER_E_NO_MORE_TEXT != _hr ) && ( FILTER_E_NO_TEXT != _hr ) ) { // Weird failure, hence return, else goto next chunk return _hr; } } // Go to next chunk, if necessary. while ( ( FILTER_E_NO_MORE_TEXT == _hr ) || ( FILTER_E_NO_TEXT == _hr ) ) { _hr = _filter.GetChunk( &_Stat ); if ( FILTER_E_END_OF_CHUNKS == _hr ) return WBREAK_E_END_OF_TEXT; if ( FILTER_E_PARTIALLY_FILTERED == _hr ) return WBREAK_E_END_OF_TEXT; if ( FAILED( _hr ) ) return( _hr ); // // Skip over value chunks -- note that search products don't do // this. They convert VT_LPSTR, VT_BSTR, and VT_LPWSTR to // Unicode strings for the wordbreaker. // if ( CHUNK_TEXT != _Stat.flags ) continue; ccRead = BufferWChars() - ccLeftOver; _hr = _filter.GetText( &ccRead, &_awcBuffer[ccLeftOver] ); if ( SUCCEEDED( _hr ) ) { iEnd += ccRead; ccLeftOver += ccRead; ccRead = BufferWChars() - ccLeftOver; while ( ( S_OK == _hr ) && ( ccRead > BUFFER_SLOP ) ) { // Attempt to fill in as much of buffer as possible _hr = _filter.GetText( &ccRead, &_awcBuffer[ccLeftOver] ); if ( SUCCEEDED( _hr ) ) { iEnd += ccRead; ccLeftOver += ccRead; ccRead = BufferWChars() - ccLeftOver; } } // // Either return FILTER_S_LAST_TEXT or return S_OK because we // have succeeded in adding text to the buffer. // if ( FILTER_S_LAST_TEXT == _hr ) return FILTER_S_LAST_TEXT; return S_OK; } } if ( FAILED( _hr ) ) return _hr; if ( 0 == ccRead ) return WBREAK_E_END_OF_TEXT; return S_OK; } //FillBuf ULONG BufferWChars() const { return ArraySize( _awcBuffer ); } IFilter & _filter; HRESULT _hr; STAT_CHUNK _Stat; WCHAR _awcBuffer[ 1024 ]; }; //+--------------------------------------------------------------------------- // // Class: CWordFormSink // // Purpose: Sample stemmer sink -- just prints the results. // //---------------------------------------------------------------------------- class CWordFormSink : public IWordFormSink { public: CWordFormSink() {} HRESULT STDMETHODCALLTYPE QueryInterface( REFIID riid, void ** ppvObject ) { *ppvObject = this; return S_OK; } ULONG STDMETHODCALLTYPE AddRef() { return 1; } ULONG STDMETHODCALLTYPE Release() { return 1; } HRESULT STDMETHODCALLTYPE PutAltWord( WCHAR const * pwcBuf, ULONG cwc ) { out( L"IWordFormSink::PutAltWord: cwc %d, '%.*ws'", cwc, cwc, pwcBuf ); DumpStringAsHex( pwcBuf, cwc ); return S_OK; } HRESULT STDMETHODCALLTYPE PutWord ( WCHAR const * pwcBuf, ULONG cwc ) { out( L"IWordFormSink::PutWord: cwc %d, '%.*ws'", cwc, cwc, pwcBuf ); DumpStringAsHex( pwcBuf, cwc ); return S_OK; } }; //+--------------------------------------------------------------------------- // // Class: CWordSink // // Purpose: Sample word sink -- just prints the results. // //---------------------------------------------------------------------------- class CWordSink : public IWordSink { public: CWordSink() {} HRESULT STDMETHODCALLTYPE QueryInterface( REFIID riid, void ** ppvObject ) { *ppvObject = this; return S_OK; } ULONG STDMETHODCALLTYPE AddRef() { return 1; } ULONG STDMETHODCALLTYPE Release() { return 1; } HRESULT STDMETHODCALLTYPE PutWord( ULONG cwc, WCHAR const * pwcBuf, ULONG cwcSrcLen, ULONG cwcSrcPos ) { out( L"IWordSink::PutWord: cwcSrcLen %d, cwcSrcPos %d, cwc %d, '%.*ws'", cwcSrcLen, cwcSrcPos, cwc, cwc, pwcBuf ); DumpStringAsHex( pwcBuf, cwc ); return S_OK; } HRESULT STDMETHODCALLTYPE PutAltWord( ULONG cwc, WCHAR const * pwcBuf, ULONG cwcSrcLen, ULONG cwcSrcPos ) { out( L"IWordSink::PutAltWord: cwcSrcLen %d, cwcSrcPos %d, cwc %d, '%.*ws'", cwcSrcLen, cwcSrcPos, cwc, cwc, pwcBuf ); DumpStringAsHex( pwcBuf, cwc ); return S_OK; } HRESULT STDMETHODCALLTYPE StartAltPhrase() { out( L"IWordSink::StartAltPhrase" ); return S_OK; } HRESULT STDMETHODCALLTYPE EndAltPhrase() { out( L"IWordSink::EndAltPhrase" ); return S_OK; } HRESULT STDMETHODCALLTYPE PutBreak( WORDREP_BREAK_TYPE wbt ) { out( L"IWordSink::PutBreak, type (%d) %ws", wbt, ( WORDREP_BREAK_EOW == wbt ) ? L"end of word" : ( WORDREP_BREAK_EOS == wbt ) ? L"end of sentence" : ( WORDREP_BREAK_EOP == wbt ) ? L"end of paragraph" : ( WORDREP_BREAK_EOC == wbt ) ? L"end of chapter" : L"invalid break type" ); return S_OK; } }; //+--------------------------------------------------------------------------- // // Class: CPhraseSink // // Purpose: Sample phrase sink -- just prints the results. // //---------------------------------------------------------------------------- class CPhraseSink: public IPhraseSink { public: CPhraseSink() {} HRESULT STDMETHODCALLTYPE QueryInterface( REFIID riid, void ** ppvObject ) { // Assume the caller is well-behaved *ppvObject = this; return S_OK; } ULONG STDMETHODCALLTYPE AddRef() { return 1; } ULONG STDMETHODCALLTYPE Release() { return 1; } HRESULT STDMETHODCALLTYPE PutSmallPhrase( const WCHAR * pwcNoun, ULONG cwcNoun, const WCHAR * pwcModifier, ULONG cwcModifier, ULONG ulAttachmentType ) { out( L"IPhraseSink::PutSmallPhrase" ); return S_OK; } HRESULT STDMETHODCALLTYPE PutPhrase( WCHAR const * pwcPhrase, ULONG cwcPhrase ) { out( L"IPhraseSink::PutPhrase: cwcPhrase %d, '%.*ws'", cwcPhrase, cwcPhrase, pwcPhrase ); DumpStringAsHex( pwcPhrase, cwcPhrase ); return S_OK; } }; //+--------------------------------------------------------------------------- // // Function: GetVersionKey // // Purpose: Displays a particular version key // // Arguments: [pbInfo] -- The version inforomation // [pwcLang] -- The language of the string requested // [pwcKey] -- Key name to retrieve // // Returns: TRUE if a value was found, FALSE otherwise // //---------------------------------------------------------------------------- BOOL GetVersionKey( BYTE * pbInfo, WCHAR const * pwcLang, WCHAR const * pwcKey ) { WCHAR awcKey[ 128 ]; wsprintf( awcKey, L"\\StringFileInfo\\%ws\\%ws", pwcLang, pwcKey ); WCHAR * pwcResult = 0; UINT cb = 0; if ( VerQueryValue( pbInfo, awcKey, (PVOID *) &pwcResult, &cb ) ) { out( L" %ws: '%ws'", pwcKey, pwcResult ); return TRUE; } return FALSE; } //GetVersionKey //+--------------------------------------------------------------------------- // // Function: OutputFiletime // // Purpose: Displays a filetime // // Arguments: [pwcHeader] -- Prefix to print before the filetime // [ft] -- Filetime to print, in UTC originally // //---------------------------------------------------------------------------- void OutputFiletime( WCHAR const * pwcHeader, FILETIME & ft ) { FILETIME ftLocal; FileTimeToLocalFileTime( &ft, &ftLocal ); SYSTEMTIME st; FileTimeToSystemTime( &ftLocal, &st ); BOOL pm = st.wHour >= 12; if ( st.wHour > 12 ) st.wHour -= 12; else if ( 0 == st.wHour ) st.wHour = 12; out( L"%ws: %2d-%02d-%04d %2d:%02d%wc", pwcHeader, (DWORD) st.wMonth, (DWORD) st.wDay, (DWORD) st.wYear, (DWORD) st.wHour, (DWORD) st.wMinute, pm ? L'p' : L'a' ); } //OutputFiletime //+--------------------------------------------------------------------------- // // Function: DisplayModuleInformation // // Purpose: Displays information about a module -- dates and version // // Arguments: [hMod] -- Module handle // //---------------------------------------------------------------------------- HRESULT DisplayModuleInformation( HINSTANCE hMod ) { WCHAR awcDllPath[ MAX_PATH ]; DWORD cwcCopied = GetModuleFileName( hMod, awcDllPath, ArraySize( awcDllPath ) ); awcDllPath[ ArraySize( awcDllPath ) - 1 ] = 0; if ( 0 == cwcCopied ) return HRESULT_FROM_WIN32( GetLastError() ); out( L"dll loaded: %ws", awcDllPath ); DWORD dwHandle; DWORD cbVersionInfo = GetFileVersionInfoSize( awcDllPath, &dwHandle ); if ( 0 == cbVersionInfo ) { printf( "can't get dll version information size, error %d\n", GetLastError() ); return HRESULT_FROM_WIN32( GetLastError() ); } XPtr xVersionInfo( cbVersionInfo ); if ( xVersionInfo.IsNull() ) return E_OUTOFMEMORY; BOOL fOK = GetFileVersionInfo( awcDllPath, 0, cbVersionInfo, xVersionInfo.Get() ); if ( !fOK ) { printf( "unable to retrieve version information, error %d\n", GetLastError() ); return HRESULT_FROM_WIN32( GetLastError() ); } // Get the DLL version number void * pvValue = 0; UINT cbValue = 0; fOK = VerQueryValue( xVersionInfo.Get(), L"\\", &pvValue, &cbValue ); if ( !fOK || ( 0 == cbValue ) ) { printf( "can't retrieve version root value, error %d\n", GetLastError() ); return HRESULT_FROM_WIN32( GetLastError() ); } VS_FIXEDFILEINFO & ffi = * (VS_FIXEDFILEINFO *) pvValue; out( L" dll version %u.%u.%u.%u", HIWORD( ffi.dwFileVersionMS ), LOWORD( ffi.dwFileVersionMS ), HIWORD( ffi.dwFileVersionLS ), LOWORD( ffi.dwFileVersionLS ) ); if ( ( cbValue >= sizeof VS_FIXEDFILEINFO ) && ( 0 != ffi.dwFileDateLS && 0 != ffi.dwFileDateMS ) ) { FILETIME ft; ft.dwLowDateTime = ffi.dwFileDateLS; ft.dwHighDateTime = ffi.dwFileDateMS; OutputFiletime( L" version creation date: ", ft ); } HANDLE h = CreateFile( awcDllPath, FILE_GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, 0, OPEN_EXISTING, 0, 0 ); if ( INVALID_HANDLE_VALUE != h ) { FILETIME ftCreate, ftLastWrite; fOK = GetFileTime( h, &ftCreate, 0, &ftLastWrite ); if ( fOK ) { OutputFiletime( L" file create time", ftCreate ); OutputFiletime( L" file last write time", ftLastWrite ); } CloseHandle( h ); } // // Get the language string. Not every dll stores it correctly, so fall // back on English locales known to work for some special cases. // WCHAR awcLang[9]; awcLang[0] = 0; DWORD * pdwLang; UINT cb; if ( VerQueryValue( xVersionInfo.Get(), L"VarFileInfo\\Translation", (PVOID *) &pdwLang, &cb ) && ( cb >= 4 ) ) { wsprintf( awcLang, L"%04x%04x", LOWORD( *pdwLang ), HIWORD( *pdwLang ) ); } if ( 0 == awcLang[0] ) { // Try English Unicode wcscpy( awcLang, L"040904B0" ); if ( !GetVersionKey( xVersionInfo.Get(), awcLang, L"FileVersion" ) ) { // Try English wcscpy( awcLang, L"040904E4" ); if ( !GetVersionKey( xVersionInfo.Get(), awcLang, L"FileVersion" ) ) { // Try English null codepage wcscpy( awcLang, L"04090000" ); if ( !GetVersionKey( xVersionInfo.Get(), awcLang, L"FileVersion" ) ) awcLang[0] = 0; } } } else { GetVersionKey( xVersionInfo.Get(), awcLang, L"FileVersion" ); } // Display additional version information if we found the language if ( 0 != awcLang[0] ) { GetVersionKey( xVersionInfo.Get(), awcLang, L"FileDescription" ); GetVersionKey( xVersionInfo.Get(), awcLang, L"CompanyName" ); GetVersionKey( xVersionInfo.Get(), awcLang, L"ProductName" ); } return S_OK; } //DisplayModuleInformation //+--------------------------------------------------------------------------- // // Function: CreateFromModule // // Purpose: Creates a COM object given a dll // // Arguments: [clsid] -- Class ID of the object to load // [iid] -- Interface ID requested // [ppvObject] -- Returns the object created // [pwcModule] -- Dll to load // [fShowStatusInfo] -- TRUE to print status information // // Returns: HRESULT, S_OK if successful // //---------------------------------------------------------------------------- HRESULT CreateFromModule( REFIID clsid, REFIID iid, void ** ppvObject, WCHAR const * pwcModule, BOOL fShowStatusInfo = TRUE ) { // Note: the module handle will be leaked. It's OK for a test program. HMODULE hMod = LoadLibrary( pwcModule ); if ( 0 == hMod ) return HRESULT_FROM_WIN32( GetLastError() ); // Display information about the module -- ignore errors if ( fShowStatusInfo ) DisplayModuleInformation( hMod ); LPFNGETCLASSOBJECT pfn = (LPFNGETCLASSOBJECT) GetProcAddress( hMod, "DllGetClassObject" ); if ( 0 == pfn ) { printf( "can't get DllGetClassObject: %d\n", GetLastError() ); return HRESULT_FROM_WIN32( GetLastError() ); } XInterface xClassFactory; HRESULT hr = pfn( clsid, IID_IClassFactory, xClassFactory.GetQIPointer() ); if ( FAILED( hr ) ) { printf( "can't instantiate the class factory: %#x\n", hr ); return hr; } return xClassFactory->CreateInstance( 0, iid, ppvObject ); } //CreateFromModule //+--------------------------------------------------------------------------- // // Function: FakeCoCreateInstance // // Purpose: Creates a COM object // // Arguments: [clsid] -- Class ID of the object to load // [iid] -- Interface ID requested // [ppvObject] -- Returns the object created // [fShowStatusInfo] -- TRUE to print status information // // Returns: HRESULT, S_OK if successful // // Needed because some wordbreakers register as single-threaded. Search // products require multi-threaded because marshalling across apartments // doesn't work and because it's too inefficient, especially on // multi-processor machines. // //---------------------------------------------------------------------------- HRESULT FakeCoCreateInstance( REFIID clsid, REFIID iid, void ** ppvObject, BOOL fShowStatusInfo = TRUE ) { WCHAR awcCLSID[ 40 ]; StringFromGUID2( clsid, awcCLSID, ArraySize( awcCLSID ) ); WCHAR awcKey[200]; swprintf( awcKey, L"CLSID\\%ws\\InprocServer32", awcCLSID ); HKEY hKey; DWORD dwErr = RegOpenKey( HKEY_CLASSES_ROOT, awcKey, &hKey ); if ( NO_ERROR != dwErr ) return HRESULT_FROM_WIN32( dwErr ); WCHAR awcDll[MAX_PATH + 1]; DWORD dwType; DWORD dwSize = sizeof awcDll; dwErr = RegQueryValueEx( hKey, L"", 0, &dwType, (LPBYTE) awcDll, &dwSize ); RegCloseKey( hKey ); if ( 0 != dwErr ) return HRESULT_FROM_WIN32( dwErr ); return CreateFromModule( clsid, iid, ppvObject, awcDll, fShowStatusInfo ); } //FakeCoCreateInstance //+--------------------------------------------------------------------------- // // Function: Stem // // Purpose: Stems the input text using the specified stemmer // // Arguments: [pwcText] -- The text to be stemmed // [clsid] -- Class ID of the stemmer to use // [pwcModule] -- Optional module name to override COM lookup. // [cwcMaxToken] -- Maximum token size for the stemmer // //---------------------------------------------------------------------------- HRESULT Stem( WCHAR const * pwcText, WCHAR const * pwcModule, CLSID & clsid, ULONG cwcMaxToken ) { XInterface xStemmer; HRESULT hr = S_OK; if ( 0 != pwcModule ) { hr = CreateFromModule( clsid, IID_IStemmer, xStemmer.GetQIPointer(), pwcModule ); } else { #ifdef USE_FAKE_COM hr = FakeCoCreateInstance( clsid, IID_IStemmer, xStemmer.GetQIPointer() ); #else hr = CoCreateInstance( clsid, 0, CLSCTX_INPROC_SERVER, IID_IStemmer, xStemmer.GetQIPointer() ); #endif } if ( FAILED( hr ) ) { printf( "can't CoCreateInstance the stemmer: %#x\n", hr ); return hr; } BOOL fLicense = FALSE; hr = xStemmer->Init( cwcMaxToken, &fLicense ); if ( FAILED( hr ) ) { printf( "can't Init() in the stemmer: %#x\n", hr ); return hr; } out( L"Stemmer requires license: %ws", fLicense ? L"Yes" : L"No" ); const WCHAR *pwcsLicense = 0; hr = xStemmer->GetLicenseToUse( &pwcsLicense ); if ( FAILED( hr ) ) out( L"can't GetLicenseToUse() in the stemmer: %#x\n", hr ); else out( L"Stemmer license: '%ws'", pwcsLicense ); CWordFormSink sink; if ( 0 != pwcText ) { out( L"Original text: '%ws'", pwcText ); hr = xStemmer->GenerateWordForms( pwcText, (ULONG) wcslen( pwcText ), &sink ); if ( FAILED( hr ) ) { printf( "can't GenerateWordForms() in the stemmer: %#x\n", hr ); return hr; } } return S_OK; } //Stem //+--------------------------------------------------------------------------- // // Function: WordBreak // // Purpose: Wordbreaks the input text or file // // Arguments: [fQuery] -- TRUE if query time FALSE if index time // [pwcText] -- The text to be wordbroken. // [pwcInputFile] -- Filename to be wordbroken if pwcText is 0 // [pwcModule] -- Optional module name to override COM lookup. // [clsid] -- Class ID of the wordbreaker to use // [cwcMaxToken] -- Maximum token size for the wordbreaker // //---------------------------------------------------------------------------- HRESULT WordBreak( BOOL fQuery, WCHAR const * pwcText, WCHAR const * pwcInputFile, WCHAR const * pwcModule, CLSID & clsid, ULONG cwcMaxToken ) { XInterface xWordBreaker; HRESULT hr = S_OK; if ( 0 != pwcModule ) { hr = CreateFromModule( clsid, IID_IWordBreaker, xWordBreaker.GetQIPointer(), pwcModule ); } else { #ifdef USE_FAKE_COM hr = FakeCoCreateInstance( clsid, IID_IWordBreaker, xWordBreaker.GetQIPointer() ); #else hr = CoCreateInstance( clsid, 0, CLSCTX_INPROC_SERVER, IID_IWordBreaker, xWordBreaker.GetQIPointer() ); #endif } if ( FAILED( hr ) ) { printf( "can't CoCreateInstance the wordbreaker: %#x\n", hr ); return hr; } BOOL fLicense = FALSE; hr = xWordBreaker->Init( fQuery, cwcMaxToken, &fLicense ); if ( FAILED( hr ) ) { printf( "can't Init() in the wordbreaker: %#x\n", hr ); return hr; } out( L"Wordbreaker requires license: %ws", fLicense ? L"Yes" : L"No" ); const WCHAR *pwcsLicense = 0; hr = xWordBreaker->GetLicenseToUse( &pwcsLicense ); if ( FAILED( hr ) ) { printf( "can't GetLicenseToUse() in the wordbreaker: %#x\n", hr ); return hr; } out( L"Wordbreaker license: '%ws'", pwcsLicense ); CWordSink wordSink; CPhraseSink phraseSink; if ( 0 != pwcText ) { out( L"Original text: '%ws'", pwcText ); CPlainTextSource textSource( pwcText, (ULONG) wcslen( pwcText ) ); hr = xWordBreaker->BreakText( &textSource, &wordSink, &phraseSink ); if ( FAILED( hr ) ) { printf( "can't BreakText() in the wordbreaker: %#x\n", hr ); return hr; } } else { out( L"Wordbreaking text from file %ws", pwcInputFile ); // Load the Indexing Service filter (should be fine for testing). XInterface xIFilter; hr = LoadIFilter( pwcInputFile, 0, xIFilter.GetQIPointer() ); if ( FAILED( hr ) ) { // Fall back on the plain text filter. printf( "Can't load filter, error %#x. Trying text filter.\n", hr ); hr = g_pLoadTextFilter( pwcInputFile, xIFilter.GetPPointer() ); if ( FAILED( hr ) ) { printf( "can't load filter, error %#x\n", hr ); return hr; } } // Initialize the filter ULONG ulFlags = 0; hr = xIFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS | IFILTER_INIT_CANON_HYPHENS | IFILTER_INIT_APPLY_INDEX_ATTRIBUTES, 0, 0, &ulFlags ); if ( FAILED( hr ) ) { printf( "can't initialize filter, error %#x\n", hr ); return hr; } CFilterTextSource textSource( xIFilter.GetReference() ); hr = xWordBreaker->BreakText( &textSource, &wordSink, &phraseSink ); if ( FAILED( hr ) ) { printf( "can't BreakText() in the wordbreaker: %#x\n", hr ); return hr; } } return S_OK; } //WordBreak //+------------------------------------------------------------------------- // // Function: Render // // Synopsis: Prints an item in a safearray // // Arguments: [vt] - type of the element // [pa] - pointer to the item // //-------------------------------------------------------------------------- void PrintSafeArray( VARTYPE vt, LPSAFEARRAY pa ); void Render( VARTYPE vt, void * pv ) { if ( VT_ARRAY & vt ) { PrintSafeArray( (VARTYPE) (vt - VT_ARRAY), *(SAFEARRAY **) pv ); return; } switch ( vt ) { case VT_UI1: outstr( L"%u", (unsigned) *(BYTE *)pv ); break; case VT_I1: outstr( L"%d", (int) *(CHAR *)pv ); break; case VT_UI2: outstr( L"%u", (unsigned) *(USHORT *)pv ); break; case VT_I2: outstr( L"%d", (int) *(SHORT *)pv ); break; case VT_UI4: case VT_UINT: outstr( L"%u", (unsigned) *(ULONG *)pv ); break; case VT_I4: case VT_ERROR: case VT_INT: outstr( L"%d", *(LONG *)pv ); break; case VT_UI8: outstr( L"%I64u", *(unsigned __int64 *)pv ); break; case VT_I8: outstr( L"%I64d", *(__int64 *)pv ); break; case VT_R4: outstr( L"%f", *(float *)pv ); break; case VT_R8: outstr( L"%lf", *(double *)pv ); break; case VT_DECIMAL: { double dbl; HRESULT hr = VarR8FromDec( (DECIMAL *) pv, &dbl ); if ( SUCCEEDED( hr ) ) outstr( L"%lf", dbl ); break; } case VT_CY: { double dbl; HRESULT hr = VarR8FromCy( * (CY *) pv, &dbl ); if ( SUCCEEDED( hr ) ) outstr( L"%lf", dbl ); break; } case VT_BOOL: outstr( *(VARIANT_BOOL *)pv ? L"TRUE" : L"FALSE" ); break; case VT_BSTR: outstr( L"%ws", *(BSTR *) pv ); break; case VT_VARIANT: { PROPVARIANT * pVar = (PROPVARIANT *) pv; Render( pVar->vt, & pVar->lVal ); break; } case VT_DATE: { SYSTEMTIME st; BOOL fOK = VariantTimeToSystemTime( *(DATE *)pv, &st ); if ( !fOK ) break; BOOL pm = st.wHour >= 12; if ( st.wHour > 12 ) st.wHour -= 12; else if ( 0 == st.wHour ) st.wHour = 12; outstr( L"%2d-%02d-%04d %2d:%02d%wc", (DWORD) st.wMonth, (DWORD) st.wDay, (DWORD) st.wYear, (DWORD) st.wHour, (DWORD) st.wMinute, pm ? L'p' : L'a' ); break; } case VT_EMPTY: case VT_NULL: break; default : { outstr( L"(vt 0x%x)", (int) vt ); break; } } } //Render //+------------------------------------------------------------------------- // // Function: PrintSafeArray // // Synopsis: Prints items in a safearray // // Arguments: [vt] - type of elements in the safearray // [pa] - pointer to the safearray // //-------------------------------------------------------------------------- void PrintSafeArray( VARTYPE vt, LPSAFEARRAY pa ) { // Get the dimensions of the array UINT cDim = SafeArrayGetDim( pa ); if ( 0 == cDim ) return; XPtr xDim( cDim ); XPtr xLo( cDim ); XPtr xUp( cDim ); for ( UINT iDim = 0; iDim < cDim; iDim++ ) { HRESULT hr = SafeArrayGetLBound( pa, iDim + 1, &xLo[iDim] ); if ( FAILED( hr ) ) return; xDim[ iDim ] = xLo[ iDim ]; hr = SafeArrayGetUBound( pa, iDim + 1, &xUp[iDim] ); if ( FAILED( hr ) ) return; outstr( L"{" ); } // slog through the array UINT iLastDim = cDim - 1; BOOL fDone = FALSE; while ( !fDone ) { // inter-element formatting if ( xDim[ iLastDim ] != xLo[ iLastDim ] ) outstr( L"," ); // Get the element and render it void *pv; HRESULT hr = SafeArrayPtrOfIndex( pa, xDim.Get(), &pv ); if ( FAILED( hr ) ) return; Render( vt, pv ); // Move to the next element and carry if necessary ULONG cOpen = 0; for ( LONG iDim = iLastDim; iDim >= 0; iDim-- ) { if ( xDim[ iDim ] < xUp[ iDim ] ) { xDim[ iDim ] = 1 + xDim[ iDim ]; break; } outstr( L"}" ); if ( 0 == iDim ) fDone = TRUE; else { cOpen++; xDim[ iDim ] = xLo[ iDim ]; } } for ( ULONG i = 0; !fDone && i < cOpen; i++ ) outstr( L"{" ); } } //PrintSafeArray //+------------------------------------------------------------------------- // // Function: PrintVectorItems // // Synopsis: Prints items in a PROPVARIANT vector // // Arguments: [pVal] - The array of values // [cVals] - The count of values // [pcFmt] - The format string // //-------------------------------------------------------------------------- template void PrintVectorItems( T * pVal, ULONG cVals, WCHAR const * pwcFmt ) { outstr( L"{ " ); for( ULONG iVal = 0; iVal < cVals; iVal++ ) { if ( 0 != iVal ) outstr( L"," ); outstr( pwcFmt, *pVal++ ); } outstr( L" }" ); } //PrintVectorItems //+------------------------------------------------------------------------- // // Function: DisplayValue // // Synopsis: Displays a PROPVARIANT value. Limited formatting is done. // // Arguments: [pVar] - The value to display // //-------------------------------------------------------------------------- void DisplayValue( PROPVARIANT const * pVar ) { if ( 0 == pVar ) { outstr( L"NULL" ); return; } // Display the most typical variant types PROPVARIANT const & v = *pVar; switch ( v.vt ) { case VT_EMPTY : break; case VT_NULL : break; case VT_I4 : outstr( L"%10d", v.lVal ); break; case VT_UI1 : outstr( L"%10d", v.bVal ); break; case VT_I2 : outstr( L"%10d", v.iVal ); break; case VT_R4 : outstr( L"%10f", v.fltVal ); break; case VT_R8 : outstr( L"%10lf", v.dblVal ); break; case VT_BOOL : outstr( v.boolVal ? L"TRUE" : L"FALSE" ); break; case VT_I1 : outstr( L"%10d", v.cVal ); break; case VT_UI2 : outstr( L"%10u", v.uiVal ); break; case VT_UI4 : outstr( L"%10u", v.ulVal ); break; case VT_INT : outstr( L"%10d", v.lVal ); break; case VT_UINT : outstr( L"%10u", v.ulVal ); break; case VT_I8 : outstr( L"%20I64d", v.hVal ); break; case VT_UI8 : outstr( L"%20I64u", v.hVal ); break; case VT_ERROR : outstr( L"%#x", v.scode ); break; case VT_LPSTR : outstr( L"%S", v.pszVal ); break; case VT_LPWSTR : outstr( L"%ws", v.pwszVal ); break; case VT_BSTR : outstr( L"%ws", v.bstrVal ); break; case VT_BLOB : { outstr( L"blob cb %u ", v.blob.cbSize ); for ( unsigned x = 0; x < v.blob.cbSize; x++ ) outstr( L" %#x ", v.blob.pBlobData[x] ); break; } case VT_CY: { double dbl; HRESULT hr = VarR8FromCy( v.cyVal, &dbl ); if ( SUCCEEDED( hr ) ) outstr( L"%lf", dbl ); break; } case VT_DECIMAL : { double dbl; HRESULT hr = VarR8FromDec( (DECIMAL *) &v.decVal, &dbl ); if ( SUCCEEDED( hr ) ) outstr( L"%lf", dbl ); break; } case VT_FILETIME : case VT_DATE : { SYSTEMTIME st; ZeroMemory( &st, sizeof st ); if ( VT_DATE == v.vt ) { BOOL fOK = VariantTimeToSystemTime( v.date, &st ); if ( !fOK ) break; } else { FILETIME ft; BOOL fOK = FileTimeToLocalFileTime( &v.filetime, &ft ); if ( fOK ) FileTimeToSystemTime( &ft, &st ); if ( !fOK ) break; } BOOL pm = st.wHour >= 12; if ( st.wHour > 12 ) st.wHour -= 12; else if ( 0 == st.wHour ) st.wHour = 12; outstr( L"%2d-%02d-%04d %2d:%02d%wc", (DWORD) st.wMonth, (DWORD) st.wDay, (DWORD) st.wYear, (DWORD) st.wHour, (DWORD) st.wMinute, pm ? L'p' : L'a' ); break; } case VT_VECTOR | VT_I1: PrintVectorItems( v.cac.pElems, v.cac.cElems, L"%d" ); break; case VT_VECTOR | VT_I2: PrintVectorItems( v.cai.pElems, v.cai.cElems, L"%d" ); break; case VT_VECTOR | VT_I4: PrintVectorItems( v.cal.pElems, v.cal.cElems, L"%d" ); break; case VT_VECTOR | VT_I8: PrintVectorItems( v.cah.pElems, v.cah.cElems, L"%I64d" ); break; case VT_VECTOR | VT_UI1: PrintVectorItems( v.caub.pElems, v.caub.cElems, L"%u" ); break; case VT_VECTOR | VT_UI2: PrintVectorItems( v.caui.pElems, v.caui.cElems, L"%u" ); break; case VT_VECTOR | VT_UI4: PrintVectorItems( v.caul.pElems, v.caul.cElems, L"%u" ); break; case VT_VECTOR | VT_ERROR: PrintVectorItems( v.cascode.pElems, v.cascode.cElems, L"%#x" ); break; case VT_VECTOR | VT_UI8: PrintVectorItems( v.cauh.pElems, v.cauh.cElems, L"%I64u" ); break; case VT_VECTOR | VT_BSTR: PrintVectorItems( v.cabstr.pElems, v.cabstr.cElems, L"%ws" ); break; case VT_VECTOR | VT_LPSTR: PrintVectorItems( v.calpstr.pElems, v.calpstr.cElems, L"%S" ); break; case VT_VECTOR | VT_LPWSTR: PrintVectorItems( v.calpwstr.pElems, v.calpwstr.cElems, L"%ws" ); break; case VT_VECTOR | VT_R4: PrintVectorItems( v.caflt.pElems, v.caflt.cElems, L"%f" ); break; case VT_VECTOR | VT_R8: PrintVectorItems( v.cadbl.pElems, v.cadbl.cElems, L"%lf" ); break; default : { if ( VT_ARRAY & v.vt ) PrintSafeArray( (VARTYPE) ( v.vt - VT_ARRAY ), v.parray ); else outstr( L"vt 0x%05x", v.vt ); break; } } } //DisplayValue //+--------------------------------------------------------------------------- // // Function: Filter // // Purpose: Invokes an IFilter on a file // // Arguments: [pwcInputFile] -- Filename to be filtered // [filterLoad] -- How to load the file into the filter. // [pwcModule] -- Optional module name to override COM lookup. // [pCLSID] -- Optional class ID of the filter to use. // Required if pwcModule is specified. // [fShowStatusInfo] -- TRUE to get other information // FALSE for only output from the filter // [fGetText] -- TRUE to retrieve text, FALSE to skip it // //---------------------------------------------------------------------------- HRESULT Filter( WCHAR const * pwcInputFile, enumFilterLoadMechanism filterLoad, WCHAR const * pwcModule, CLSID * pCLSID, BOOL fShowStatusInfo, BOOL fGetText ) { XInterface xFilter; HRESULT hr = S_OK; if ( 0 != pwcModule ) { // If the DLL is specified, use it if ( fShowStatusInfo ) out( L"loading filter based on module name" ); hr = CreateFromModule( *pCLSID, IID_IFilter, xFilter.GetQIPointer(), pwcModule, fShowStatusInfo ); } else if ( 0 != pCLSID ) { // If we just have a CLSID and no module, use it if ( fShowStatusInfo ) out( L"loading filter based on CLSID and the registry" ); #ifdef USE_FAKE_COM hr = FakeCoCreateInstance( *pCLSID, IID_IFilter, xFilter.GetQIPointer(), fShowStatusInfo ); #else hr = CoCreateInstance( *pCLSID, 0, CLSCTX_INPROC_SERVER, IID_IFilter, xFilter.GetQIPointer() ); #endif } else { // Use Indexing Service to load the filter if ( fShowStatusInfo ) out( L"loading filter based on Indexing Service's LoadIFilter()" ); hr = LoadIFilter( pwcInputFile, 0, xFilter.GetQIPointer() ); if ( SUCCEEDED( hr ) && fShowStatusInfo ) { // Dereference the VTable to get a pointer into the DLL HMODULE hMod = GetModuleOfAddress( * (void **) xFilter.GetPointer() ); if ( 0 != hMod ) DisplayModuleInformation( hMod ); } } if ( FAILED( hr ) ) { printf( "can't load the filter: %#x\n", hr ); return hr; } // Does the filter support IPersistStorage? XInterface xStorage; XInterface xPersistStorage; hr = xFilter->QueryInterface( IID_IPersistStorage, xPersistStorage.GetQIPointer() ); if ( FAILED( hr ) ) { if ( fShowStatusInfo ) out( L" filter doesn't support IPersistStorage, error %#x", hr ); if ( eIPersistStorage == filterLoad ) return hr; } else { if ( fShowStatusInfo ) out( L" filter supports IPersistStorage" ); if ( eIPersistStorage == filterLoad ) { if ( fShowStatusInfo ) out( L" loading via IPersistStorage" ); hr = StgOpenStorage( pwcInputFile, 0, STGM_READ | STGM_SHARE_DENY_WRITE, 0, 0, xStorage.GetPPointer() ); if ( FAILED( hr ) ) { printf( "can't open the file into a storage %#x\n", hr ); return hr; } hr = xPersistStorage->Load( xStorage.GetPointer() ); if ( FAILED( hr ) ) { printf( "can't Load() the storage into the filter %#x\n", hr ); return hr; } } } xPersistStorage.Free(); // Does the filter support IPersistStream? XInterface xStream; XInterface xPersistStream; hr = xFilter->QueryInterface( IID_IPersistStream, xPersistStream.GetQIPointer() ); if ( FAILED( hr ) ) { if ( fShowStatusInfo ) out( L" filter doesn't support IPersistStream, error %#x", hr ); if ( eIPersistStream == filterLoad ) return hr; } else { if ( fShowStatusInfo ) out( L" filter supports IPersistStream" ); if ( eIPersistStream == filterLoad ) { if ( fShowStatusInfo ) out( L" loading via IPersistStream" ); xStream.Set( new CIStream() ); hr = xStream->Open( pwcInputFile ); if ( FAILED( hr ) ) { printf( "can't open the file into a stream %#x\n", hr ); return hr; } hr = xPersistStream->Load( xStream.GetPointer() ); if ( FAILED( hr ) ) { printf( "can't Load() the stream into the filter %#x\n", hr ); return hr; } } } xPersistStream.Free(); // Does the filter support IPersistFile? XInterface xPersistFile; hr = xFilter->QueryInterface( IID_IPersistFile, xPersistFile.GetQIPointer() ); if ( FAILED( hr ) ) { if ( fShowStatusInfo ) out( L"filter doesn't support IPersistFile, error %#x\n", hr ); if ( eIPersistFile == filterLoad ) return hr; } else { if ( fShowStatusInfo ) out( L" filter supports IPersistFile" ); if ( eIPersistFile == filterLoad ) { if ( fShowStatusInfo ) out( L" loading via IPersistFile" ); hr = xPersistFile->Load( pwcInputFile, STGM_READ | STGM_SHARE_DENY_NONE ); if ( FAILED( hr ) ) { printf( "can't Load() the file into the filter %#x\n", hr ); return hr; } } } xPersistFile.Free(); // Initailize the IFilter ULONG ulFlags = 0; hr = xFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS | IFILTER_INIT_HARD_LINE_BREAKS | IFILTER_INIT_CANON_HYPHENS | IFILTER_INIT_CANON_SPACES | IFILTER_INIT_INDEXING_ONLY | IFILTER_INIT_APPLY_INDEX_ATTRIBUTES, 0, 0, &ulFlags ); if ( FAILED( hr ) ) { printf( "can't Init() the filter, error %#x\n", hr ); return hr; } if ( fShowStatusInfo ) out( L" flags returned from IFilter::Init(): %#x", ulFlags ); // Pull all the data out of the filter BOOL fText; STAT_CHUNK StatChunk; StatChunk.attribute.psProperty.ulKind = PRSPEC_PROPID; do { const ULONG cwcMaxBuffer = 1024; WCHAR awcBuffer[ cwcMaxBuffer ]; hr = xFilter->GetChunk( &StatChunk ); if ( FILTER_E_EMBEDDING_UNAVAILABLE == hr ) { if ( fShowStatusInfo ) out( L"[-- encountered an embedding for which no filter is available --]" ); continue; } if ( FILTER_E_LINK_UNAVAILABLE == hr ) { if ( fShowStatusInfo ) out( L"[-- encountered a link for which no filter is available --]" ); continue; } if ( FAILED( hr ) && hr != FILTER_E_END_OF_CHUNKS ) { out( L"GetChunk returned error %#x", hr ); break; } if ( FILTER_E_END_OF_CHUNKS == hr ) break; fText = ( CHUNK_TEXT == StatChunk.flags ); // Display information about the chunk if ( fShowStatusInfo ) { out( L"" ); out( L"----------------------------------------------------------------------" ); outstr( L" attribute: %08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", StatChunk.attribute.guidPropSet.Data1, StatChunk.attribute.guidPropSet.Data2, StatChunk.attribute.guidPropSet.Data3, StatChunk.attribute.guidPropSet.Data4[0], StatChunk.attribute.guidPropSet.Data4[1], StatChunk.attribute.guidPropSet.Data4[2], StatChunk.attribute.guidPropSet.Data4[3], StatChunk.attribute.guidPropSet.Data4[4], StatChunk.attribute.guidPropSet.Data4[5], StatChunk.attribute.guidPropSet.Data4[6], StatChunk.attribute.guidPropSet.Data4[7] ); if ( StatChunk.attribute.psProperty.ulKind == PRSPEC_PROPID ) out( L" %d (%#x)", StatChunk.attribute.psProperty.propid, StatChunk.attribute.psProperty.propid ); else out( L" \"%ws\"", StatChunk.attribute.psProperty.lpwstr ); out( L" idChunk: %d (%#x)", StatChunk.idChunk, StatChunk.idChunk ); outstr( L" breakType: %d (%#x)", StatChunk.breakType, StatChunk.breakType ); switch ( StatChunk.breakType ) { case CHUNK_NO_BREAK: out( L" (no break) " ); break; case CHUNK_EOW: out( L" (end of word) " ); break; case CHUNK_EOS: out( L" (end of sentence) " ); break; case CHUNK_EOP: out( L" (end of paragraph) " ); break; case CHUNK_EOC: out( L" (end of chapter) " ); break; default : out( L" (unknown break type) " ); break; } outstr( L" flags: %d (%#x)", StatChunk.flags, StatChunk.flags ); if ( CHUNK_TEXT & StatChunk.flags ) out( L" (text) " ); if ( CHUNK_VALUE & StatChunk.flags ) out( L" (value) " ); out( L" locale: %d (%#x)", StatChunk.locale, StatChunk.locale ); out( L" idChunkSource: %d (%#x)", StatChunk.idChunkSource, StatChunk.idChunkSource ); out( L" cwcStartSource: %d (%#x)", StatChunk.cwcStartSource, StatChunk.cwcStartSource ); out( L" cwcLenSource: %d (%#x)", StatChunk.cwcLenSource, StatChunk.cwcLenSource ); out( L" ------------------------------------------" ); } if ( !fGetText ) continue; // Retrieve all the data in the chunk do { if ( fText ) { ULONG cwcBuffer = cwcMaxBuffer; hr = xFilter->GetText( &cwcBuffer, awcBuffer ); if ( FAILED( hr ) && ( FILTER_E_NO_MORE_TEXT != hr ) ) { out( L"error %#x from GetText\n", hr ); return hr; } if ( FILTER_E_NO_MORE_TEXT == hr ) break; awcBuffer[cwcBuffer] = 0; out( L"%ws", awcBuffer ); if ( g_fDumpAsHex ) { out( L"<--------> %d WCHARs in hex <-------->", cwcBuffer ); DumpStringAsHex( awcBuffer, cwcBuffer ); } } else { PROPVARIANT * pPropValue = 0; hr = xFilter->GetValue( &pPropValue ); if ( FAILED( hr ) ) { if ( ( FILTER_E_NO_MORE_VALUES == hr ) || ( FILTER_E_NO_VALUES == hr ) ) break; out( L"GetValue failed, error %#x\n", hr ); return hr; } if ( fShowStatusInfo ) out( L"[-- variant type %d (%#x) --]", pPropValue->vt, pPropValue->vt ); DisplayValue( pPropValue ); out( L"" ); if ( 0 != pPropValue ) { PropVariantClear( pPropValue ); CoTaskMemFree( pPropValue ); pPropValue = 0; } } } while( TRUE ); // data in a chunk } while( TRUE ); // for each chunk if ( fShowStatusInfo ) { out( L"" ); out( L"======================================================================" ); out( L"Filtering completed" ); } xStream.Free(); xStorage.Free(); xFilter.Free(); // Now see if the file handle is still being locked by the filter HANDLE hFile = CreateFile( pwcInputFile, GENERIC_READ, 0, //no sharing 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0 ); if ( INVALID_HANDLE_VALUE == hFile ) { out( L"Filter didn't release file; can't open %ws, error %#x\n", pwcInputFile, GetLastError() ); return HRESULT_FROM_WIN32( GetLastError() ); } out( L"Filter closed file properly when released\n" ); CloseHandle( hFile ); return S_OK; } //Filter //+------------------------------------------------------------------------- // // Function: GetQueryFunctions // // Synopsis: Loads needed undocumented functions from query.dll. // // Returns: The module handle or 0 on failure. // //-------------------------------------------------------------------------- HINSTANCE GetQueryFunctions() { HINSTANCE h = LoadLibrary( L"query.dll" ); if ( 0 != h ) { #ifdef _WIN64 char const * pcCIShutdown = "?CIShutdown@@YAXXZ"; #else char const * pcCIShutdown = "?CIShutdown@@YGXXZ"; #endif g_pCIShutdown = (PFnCIShutdown) GetProcAddress( h, pcCIShutdown ); if ( 0 == g_pCIShutdown ) { printf( "can't get CIShutdown function address\n" ); FreeLibrary( h ); return 0; } g_pLoadTextFilter = (PFnLoadTextFilter) GetProcAddress( h, "LoadTextFilter" ); if ( 0 == g_pLoadTextFilter ) { printf( "can't get LoadTextFilter function address\n" ); FreeLibrary( h ); return 0; } } return h; } //GetQueryFunctions //+------------------------------------------------------------------------- // // Function: ExceptionFilter // // Synopsis: Displays information about the exception // // Arguments: [pep] -- Exception pointers // // Returns: EXCEPTION_EXECUTE_HANDLER // //-------------------------------------------------------------------------- int ExceptionFilter( EXCEPTION_POINTERS * pep ) { printf( "fatal exception caught\n" ); EXCEPTION_RECORD & r = * ( pep->ExceptionRecord ); printf( " exception code: %#x\n", r.ExceptionCode ); printf( " exception address %#p\n", r.ExceptionAddress ); if ( ( EXCEPTION_ACCESS_VIOLATION == r.ExceptionCode ) && ( r.NumberParameters >= 2 ) ) { printf( " attempted %ws at address %#p\n", ( 0 == r.ExceptionInformation[0] ) ? L"read" : L"write", (void *) r.ExceptionInformation[1] ); } #ifdef _X86_ CONTEXT & c = * (CONTEXT *) (pep->ContextRecord ); if ( 0 != ( c.ContextFlags & CONTEXT_INTEGER ) ) { printf( " eax: %#x\n", c.Eax ); printf( " ebx: %#x\n", c.Ebx ); printf( " ecx: %#x\n", c.Ecx ); printf( " edx: %#x\n", c.Edx ); printf( " edi: %#x\n", c.Edi ); printf( " esi: %#x\n", c.Esi ); } if ( 0 != ( c.ContextFlags & CONTEXT_CONTROL ) ) { printf( " ebp: %#x\n", c.Ebp ); printf( " eip: %#x\n", c.Eip ); printf( " esp: %#x\n", c.Esp ); } #endif // _X86_ // Attempt to get the module name where the exception happened HMODULE hMod = GetModuleOfAddress( r.ExceptionAddress ); if ( 0 != hMod ) { WCHAR awcPath[ MAX_PATH ]; DWORD cwc= GetModuleFileName( hMod, awcPath, ArraySize( awcPath ) ); awcPath[ ArraySize( awcPath ) - 1 ] = 0; if ( 0 != cwc ) printf( " exception in module %ws\n", awcPath ); } return EXCEPTION_EXECUTE_HANDLER; } //ExceptionFilter //+------------------------------------------------------------------------- // // Function: wmain // // Synopsis: Main entrypoint for the program // // Arguments: [argc] -- Count of command-line arguments // [argv] -- The command-line arguments // // Returns: Application return code // //-------------------------------------------------------------------------- extern "C" int __cdecl wmain( int argc, WCHAR * argv[] ) { // Parse the command-line arguments BOOL fWordBreak = FALSE; BOOL fQuery = FALSE; BOOL fStem = FALSE; BOOL fFilter = FALSE; BOOL fGetText = TRUE; BOOL fShowStatusInfo = TRUE; enumFilterLoadMechanism filterLoad = eIPersistFile; WCHAR const * pwcModule = 0; WCHAR const * pwcInputFile = 0; WCHAR const * pwcOutputFile = 0; WCHAR *pwcText = 0; WCHAR const * pwcCLSID = 0; ULONG cwcMaxToken = 100; for ( int i = 1; i < argc; i++ ) { if ( L'-' == argv[i][0] || L'/' == argv[i][0] ) { WCHAR wc = towupper( argv[i][1] ); if ( ':' != argv[i][2] && 'B' != wc && 'D' != wc && 'F' != wc && 'T' != wc && 'N' != wc && 'Q' != wc && 'S' != wc ) Usage(); if ( 'C' == wc ) pwcCLSID = argv[i] + 3; else if ( 'D' == wc ) g_fDumpAsHex = TRUE; else if ( 'I' == wc ) { if ( 0 != pwcText ) Usage(); pwcInputFile = argv[i] + 3; } else if ( 'M' == wc ) pwcModule = argv[i] + 3; else if ( 'N' == wc ) fShowStatusInfo = FALSE; else if ( 'O' == wc ) pwcOutputFile = argv[i] + 3; else if ( 'S' == wc ) fStem = TRUE; else if ( 'T' == wc ) fGetText = FALSE; else if ( 'B' == wc ) fWordBreak = TRUE; else if ( 'F' == wc ) { fFilter = TRUE; WCHAR wcNext = towupper( argv[i][2] ); if ( L'S' == wcNext ) filterLoad = eIPersistStream; else if ( L'T' == wcNext ) filterLoad = eIPersistStorage; else if ( 0 != wcNext ) Usage(); } else if ( 'Q' == wc ) fQuery = TRUE; else if ( 'X' == wc ) cwcMaxToken = _wtoi( argv[i] + 3 ); else Usage(); } else if ( 0 != pwcText || 0 != pwcInputFile ) Usage(); else pwcText = argv[i]; } // We have to either wordbreak, stem, or filter if ( ( fWordBreak + fStem + fFilter ) != 1 ) Usage(); // We need the classid of the wordbreaker or stemmer to load if ( ( fWordBreak || fStem ) && ( 0 == pwcCLSID ) ) Usage(); // If we're loading by module, we need a CLSID if ( ( 0 != pwcModule ) && ( 0 == pwcCLSID ) ) Usage(); // Need input text or an input file to wordbreak if ( fWordBreak && ( 0 == pwcText ) && ( 0 == pwcInputFile ) ) Usage(); // Need input text to stem if ( fStem && ( 0 == pwcText ) ) Usage(); // Need input file to filter if ( fFilter && ( 0 == pwcInputFile ) ) Usage(); CLSID clsid; if ( 0 != pwcCLSID ) { HRESULT hr = CLSIDFromString( (LPOLESTR) pwcCLSID, &clsid ); if ( FAILED( hr ) ) { printf( "can't convert CLSID string to a CLSID: %#x\n", hr ); exit( 1 ); } } // Get the full path of the input file, if specified WCHAR awcPath[MAX_PATH]; if ( 0 != pwcInputFile ) { _wfullpath( awcPath, pwcInputFile, MAX_PATH ); pwcInputFile = awcPath; } // Get the full path of the output file, if specified, then open it WCHAR awcOutputPath[MAX_PATH]; if ( 0 != pwcOutputFile ) { _wfullpath( awcOutputPath, pwcOutputFile, MAX_PATH ); pwcOutputFile = awcOutputPath; g_fpOut = _wfopen( pwcOutputFile, L"wb" ); if ( 0 == g_fpOut ) { printf( "unable to open output file '%ws'\n", pwcOutputFile ); exit( 1 ); } const WCHAR awcUnicodeHeader[] = { 0xfeff, 0x0000 }; fwprintf( g_fpOut, awcUnicodeHeader ); } // Initialize COM multi-threaded, just like search products do HRESULT hr = CoInitializeEx( 0, COINIT_MULTITHREADED ); if ( FAILED( hr ) ) { printf( "can't initialize com: %#x\n", hr ); exit( 1 ); } // Load query.dll private exports HINSTANCE hQuery = GetQueryFunctions(); if ( 0 == hQuery ) { printf( "can't load needed functions from query.dll\n" ); exit( 1 ); } // Do the work __try { if ( fStem ) Stem( pwcText, pwcModule, clsid, cwcMaxToken ); if ( fWordBreak ) WordBreak( fQuery, pwcText, pwcInputFile, pwcModule, clsid, cwcMaxToken ); if ( fFilter ) Filter( pwcInputFile, filterLoad, pwcModule, ( 0 == pwcCLSID ) ? 0 : &clsid, fShowStatusInfo, fGetText ); } __except( ExceptionFilter( GetExceptionInformation() ) ) { printf( "fatal exception code %#x\n", GetExceptionCode() ); exit( -1 ); } // Shut down query.dll's filter loading code so it won't AV on exit. g_pCIShutdown(); FreeLibrary( hQuery ); CoUninitialize(); if ( 0 != g_fpOut ) { fclose( g_fpOut ); g_fpOut = 0; } return 0; } //wmain