You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2569 lines
76 KiB
2569 lines
76 KiB
//+-------------------------------------------------------------------------
|
|
//
|
|
// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
|
|
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
|
|
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
|
|
// PARTICULAR PURPOSE.
|
|
//
|
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
//
|
|
// PROGRAM: lrtest.cxx
|
|
//
|
|
// Test program for invoking language resources including wordbreakers
|
|
// and stemmers. Also invokes filters.
|
|
//
|
|
// PLATFORM: Windows
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
#ifndef UNICODE
|
|
#define UNICODE
|
|
#endif
|
|
|
|
#define _OLE32_
|
|
|
|
#include <windows.h>
|
|
#include <oleext.h>
|
|
#include <psapi.h>
|
|
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <limits.h>
|
|
#include <eh.h>
|
|
|
|
#include <ntquery.h>
|
|
#include <filterr.h>
|
|
#include <cierror.h>
|
|
#include <indexsrv.h>
|
|
|
|
#include "minici.hxx"
|
|
|
|
#define USE_FAKE_COM
|
|
|
|
//
|
|
// These are undocumented Indexing Service functions, but they're needed
|
|
// to load filters and not crash, and to load the plain text filter.
|
|
//
|
|
|
|
typedef void (__stdcall * PFnCIShutdown)( void );
|
|
typedef HRESULT (__stdcall * PFnLoadTextFilter)( WCHAR const * pwcPath,
|
|
IFilter ** ppIFilter );
|
|
|
|
PFnCIShutdown g_pCIShutdown = 0;
|
|
PFnLoadTextFilter g_pLoadTextFilter = 0;
|
|
|
|
// If this is non-zero, it's a file handle to which output is streamed
|
|
|
|
FILE * g_fpOut = 0;
|
|
|
|
// If TRUE, strings from wordbreakers and stemmers are dumped in hex
|
|
|
|
BOOL g_fDumpAsHex = FALSE;
|
|
|
|
enum enumFilterLoadMechanism
|
|
{
|
|
eIPersistFile,
|
|
eIPersistStream,
|
|
eIPersistStorage
|
|
};
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: out
|
|
//
|
|
// Synopsis: Like printf, only will send output to the output file if
|
|
// specified, or just to the console. Appends a carriage
|
|
// return / line feed to the text.
|
|
//
|
|
// Arguments: [pwcFormat] -- Characters whose type information is checked
|
|
// [...] -- Variable arguments
|
|
//
|
|
// Returns: count of characters emitted.
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
int out( const WCHAR * pwcFormat, ... )
|
|
{
|
|
va_list arglist;
|
|
va_start( arglist, pwcFormat );
|
|
|
|
// Writing to the output file is done in binary mode so the output can be
|
|
// Unicode. The side-effect is that "\n" isn't translated into "\r\n"
|
|
// automatically, so it has to be explicit.
|
|
|
|
int i;
|
|
|
|
if ( 0 != g_fpOut )
|
|
{
|
|
i = vfwprintf( g_fpOut, pwcFormat, arglist );
|
|
i += fwprintf( g_fpOut, L"\r\n" );
|
|
}
|
|
else
|
|
{
|
|
i = vwprintf( pwcFormat, arglist );
|
|
i += wprintf( L"\n" );
|
|
}
|
|
|
|
va_end( arglist );
|
|
return i;
|
|
} //out
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: outstr
|
|
//
|
|
// Synopsis: Like printf, only will send output to the output file if
|
|
// specified, or just to the console.
|
|
//
|
|
// Arguments: [pwcFormat] -- Characters whose type information is checked
|
|
// [...] -- Variable arguments
|
|
//
|
|
// Returns: count of characters emitted.
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
int outstr( const WCHAR * pwcFormat, ... )
|
|
{
|
|
va_list arglist;
|
|
va_start( arglist, pwcFormat );
|
|
|
|
int i;
|
|
|
|
if ( 0 != g_fpOut )
|
|
i = vfwprintf( g_fpOut, pwcFormat, arglist );
|
|
else
|
|
i = vwprintf( pwcFormat, arglist );
|
|
|
|
va_end( arglist );
|
|
return i;
|
|
} //outstr
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: Usage
|
|
//
|
|
// Synopsis: Displays usage information about the application, then exits.
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
void Usage()
|
|
{
|
|
printf( "usage: lrtest [/d] [/b] [/f] [/q] [/s] [/x:#] /c:clsid [/o:file] [/i:file] [text]\n" );
|
|
printf( "\n" );
|
|
printf( " Language Resource test program\n" );
|
|
printf( "\n" );
|
|
printf( " arguments:\n" );
|
|
printf( " /b Load the wordbreaker (can't be used with /s or /f)\n" );
|
|
printf( " /c: CLSID of the wordbreaker or stemmer to load\n" );
|
|
printf( " /d Dumps output strings in hex as well as strings\n" );
|
|
printf( " /f Load the filter (can't be used with /b or /s)\n" );
|
|
printf( " If /c isn't specified, use Indexing Service's LoadIFilter\n" );
|
|
printf( " /fs Same as /f, but uses IPersistStream, not IPersistFile\n" );
|
|
printf( " /ft Same as /f, but uses IPersistStorage, not IPersistFile\n" );
|
|
printf( " /i: Path of an input file, if [text] isn't specified\n" );
|
|
printf( " /m: Optional path of the dll to load. Overrides COM CLSID lookup\n" );
|
|
printf( " /n No status information. Used with /f, only displays filter output\n" );
|
|
printf( " /o: Path of an output file. If not specified, console is used\n" );
|
|
printf( " /q If wordbreaking, do so for query instead of indexing\n" );
|
|
printf( " /s Load the stemmer (can't be used with /b or /f)\n" );
|
|
printf( " /t No text information; just chunks. Used with /f\n" );
|
|
printf( " /x:# Maximum token size, default is 100\n" );
|
|
printf( " text Text to wordbreak or stem, if /i: isn't specified\n" );
|
|
printf( "\n" );
|
|
printf( " examples:\n" );
|
|
printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} \"Alice's restaurant\"\n" );
|
|
printf( " lrtest /b /q /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} \"data-base\"\n" );
|
|
printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} /i:foo.doc\n" );
|
|
printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} /m:wb.dll /i:foo.doc\n" );
|
|
printf( " lrtest /d /s /c:{eeed4c20-7f1b-11ce-be57-00aa0051fe20} peach /o:output.txt\n" );
|
|
printf( " lrtest /f /c:{f07f3920-7b8c-11cf-9be8-00aa004b9986} /i:foo.doc\n" );
|
|
printf( " lrtest /f /i:foo.doc\n" );
|
|
printf( " lrtest /fs /i:foo.doc\n" );
|
|
printf( "\n" );
|
|
|
|
exit( 1 );
|
|
} //Usage
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: GetModuleOfAddress
|
|
//
|
|
// Synopsis: Returns the module handle of a given address or 0
|
|
//
|
|
// Arguments: [pAddress] -- Address in one of the modules loaded
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
HMODULE GetModuleOfAddress( void * pAddress )
|
|
{
|
|
DWORD cbNeeded;
|
|
BOOL fOK = EnumProcessModules( GetCurrentProcess(),
|
|
0,
|
|
0,
|
|
&cbNeeded );
|
|
if ( fOK )
|
|
{
|
|
ULONG cModules = cbNeeded / sizeof HMODULE;
|
|
XPtr<HMODULE> aModules( cModules );
|
|
fOK = EnumProcessModules( GetCurrentProcess(),
|
|
aModules.Get(),
|
|
cbNeeded,
|
|
&cbNeeded );
|
|
if ( fOK )
|
|
{
|
|
for ( ULONG i = 0; i < cModules; i++ )
|
|
{
|
|
MODULEINFO mi;
|
|
|
|
GetModuleInformation( GetCurrentProcess(),
|
|
aModules[ i ],
|
|
&mi,
|
|
sizeof mi );
|
|
if ( ( pAddress >= mi.lpBaseOfDll ) &&
|
|
( pAddress < ( (BYTE *) mi.lpBaseOfDll + mi.SizeOfImage ) ) )
|
|
{
|
|
return aModules[i];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
} //GetModuleOfAddress
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: DumpStringAsHex
|
|
//
|
|
// Synopsis: Emits a string in hex format. Useful for East Asian languages.
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
void DumpStringAsHex( WCHAR const * pwc, ULONG cwc )
|
|
{
|
|
if ( g_fDumpAsHex )
|
|
{
|
|
for ( ULONG i = 0; i < cwc; i++ )
|
|
{
|
|
if ( 0 != i )
|
|
outstr( L" " );
|
|
|
|
outstr( L"%#x", pwc[ i ] );
|
|
}
|
|
|
|
out( L"" );
|
|
}
|
|
} //DumpStringAsHex
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Class: CIStream
|
|
//
|
|
// Purpose: Wraps a file with an IStream.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
class CIStream : public IStream
|
|
{
|
|
public:
|
|
CIStream() : _hFile( INVALID_HANDLE_VALUE ),
|
|
_cRef( 1 ),
|
|
_lOffset( 0 ),
|
|
_cbData( 0 )
|
|
{
|
|
}
|
|
|
|
~CIStream()
|
|
{
|
|
Free();
|
|
}
|
|
|
|
void Free()
|
|
{
|
|
if ( INVALID_HANDLE_VALUE != _hFile )
|
|
{
|
|
CloseHandle( _hFile );
|
|
_hFile = INVALID_HANDLE_VALUE;
|
|
}
|
|
}
|
|
|
|
HRESULT Open( WCHAR const * pwcFile )
|
|
{
|
|
Free();
|
|
|
|
_hFile = CreateFile( pwcFile,
|
|
GENERIC_READ,
|
|
FILE_SHARE_READ | FILE_SHARE_WRITE |
|
|
FILE_SHARE_DELETE,
|
|
0,
|
|
OPEN_EXISTING,
|
|
FILE_ATTRIBUTE_NORMAL,
|
|
0 );
|
|
|
|
if ( INVALID_HANDLE_VALUE == _hFile )
|
|
return HRESULT_FROM_WIN32( GetLastError() );
|
|
|
|
_cbData = GetFileSize( _hFile, 0 );
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE QueryInterface( REFIID riid, void ** ppvObj )
|
|
{
|
|
if ( 0 == ppvObj )
|
|
return E_INVALIDARG;
|
|
|
|
*ppvObj = 0;
|
|
|
|
if ( IID_IStream == riid )
|
|
*ppvObj = (IStream *) this;
|
|
else if ( IID_IUnknown == riid )
|
|
*ppvObj = (IUnknown *) this;
|
|
else
|
|
return E_NOINTERFACE;
|
|
|
|
AddRef();
|
|
return S_OK;
|
|
}
|
|
|
|
ULONG STDMETHODCALLTYPE AddRef()
|
|
{
|
|
return InterlockedIncrement( &_cRef );
|
|
}
|
|
|
|
ULONG STDMETHODCALLTYPE Release()
|
|
{
|
|
unsigned long uTmp = InterlockedDecrement( &_cRef );
|
|
|
|
if ( 0 == uTmp )
|
|
delete this;
|
|
|
|
return uTmp;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE Read(
|
|
void * pv,
|
|
ULONG cb,
|
|
ULONG * pcbRead )
|
|
{
|
|
DWORD dwOff = SetFilePointer( _hFile,
|
|
_lOffset,
|
|
0,
|
|
FILE_BEGIN );
|
|
|
|
if ( INVALID_SET_FILE_POINTER == dwOff )
|
|
return HRESULT_FROM_WIN32( GetLastError() );
|
|
|
|
BOOL f = ReadFile( _hFile,
|
|
pv,
|
|
cb,
|
|
pcbRead,
|
|
0 );
|
|
|
|
if ( !f )
|
|
return HRESULT_FROM_WIN32( GetLastError() );
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE Write(
|
|
VOID const * pv,
|
|
ULONG cb,
|
|
ULONG * pcbWritten )
|
|
{
|
|
return E_NOTIMPL;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE Seek(
|
|
LARGE_INTEGER dlibMoveIn,
|
|
DWORD dwOrigin,
|
|
ULARGE_INTEGER * plibNewPosition )
|
|
{
|
|
HRESULT hr = S_OK;
|
|
LONG dlibMove = dlibMoveIn.LowPart;
|
|
ULONG cbNewPos = dlibMove;
|
|
|
|
switch(dwOrigin)
|
|
{
|
|
case STREAM_SEEK_SET:
|
|
if (dlibMove >= 0)
|
|
_lOffset = dlibMove;
|
|
else
|
|
hr = STG_E_SEEKERROR;
|
|
break;
|
|
case STREAM_SEEK_CUR:
|
|
if (!(dlibMove < 0 && ( -dlibMove > _lOffset)))
|
|
_lOffset += (ULONG) dlibMove;
|
|
else
|
|
hr = STG_E_SEEKERROR;
|
|
break;
|
|
case STREAM_SEEK_END:
|
|
if (!(dlibMove < 0 ))
|
|
_lOffset = _cbData + dlibMove;
|
|
else
|
|
hr = STG_E_SEEKERROR;
|
|
break;
|
|
default:
|
|
hr = STG_E_SEEKERROR;
|
|
}
|
|
|
|
if ( 0 != plibNewPosition )
|
|
ULISet32(*plibNewPosition, _lOffset);
|
|
|
|
return hr;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE SetSize( ULARGE_INTEGER cb )
|
|
{
|
|
return E_NOTIMPL;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE CopyTo(
|
|
IStream * pstm,
|
|
ULARGE_INTEGER cb,
|
|
ULARGE_INTEGER * pcbRead,
|
|
ULARGE_INTEGER * pcbWritten )
|
|
{
|
|
return E_NOTIMPL;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE Commit( DWORD grfCommitFlags )
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE Revert()
|
|
{
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE LockRegion(
|
|
ULARGE_INTEGER libOffset,
|
|
ULARGE_INTEGER cb,
|
|
DWORD dwLockType )
|
|
{
|
|
return STG_E_INVALIDFUNCTION;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE UnlockRegion(
|
|
ULARGE_INTEGER libOffset,
|
|
ULARGE_INTEGER cb,
|
|
DWORD dwLockType)
|
|
{
|
|
return STG_E_INVALIDFUNCTION;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE Stat(
|
|
STATSTG * pstatstg,
|
|
DWORD statflag )
|
|
{
|
|
memset( pstatstg, 0, sizeof STATSTG );
|
|
pstatstg->type = STGTY_STREAM;
|
|
pstatstg->cbSize.QuadPart = _cbData;
|
|
pstatstg->grfMode = STGM_READ;
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE Clone( IStream ** ppstm )
|
|
{
|
|
return E_NOTIMPL;
|
|
}
|
|
|
|
private:
|
|
|
|
LONG _cRef;
|
|
HANDLE _hFile;
|
|
LONG _lOffset;
|
|
LONG _cbData;
|
|
};
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Class: CPlainTextSource
|
|
//
|
|
// Purpose: Takes a simple buffer and provides a TEXT_SOURCE for it, which
|
|
// can be passed to wordbreakers.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
class CPlainTextSource : public TEXT_SOURCE
|
|
{
|
|
public:
|
|
CPlainTextSource(
|
|
WCHAR const * pwcText,
|
|
ULONG cwc )
|
|
{
|
|
awcBuffer = pwcText;
|
|
iCur = 0;
|
|
iEnd = cwc;
|
|
pfnFillTextBuffer = PlainFillBuf;
|
|
}
|
|
|
|
static HRESULT __stdcall PlainFillBuf( TEXT_SOURCE * pTextSource )
|
|
{
|
|
return WBREAK_E_END_OF_TEXT;
|
|
}
|
|
};
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Class: CFilterTextSource
|
|
//
|
|
// Purpose: Takes an IFilter and provides a TEXT_SOURCE for it, which
|
|
// can be passed to wordbreakers.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
#pragma warning(disable: 4512)
|
|
|
|
class CFilterTextSource : public TEXT_SOURCE
|
|
{
|
|
public:
|
|
CFilterTextSource( IFilter & filter ) :
|
|
_filter( filter ),
|
|
_hr( S_OK )
|
|
{
|
|
awcBuffer = _awcBuffer;
|
|
iCur = 0;
|
|
iEnd = 0;
|
|
pfnFillTextBuffer = FilterFillBuf;
|
|
|
|
// Get the first chunk
|
|
|
|
_hr = _filter.GetChunk( &_Stat );
|
|
|
|
// Get text for the chunk
|
|
|
|
FillBuf();
|
|
}
|
|
|
|
static HRESULT __stdcall FilterFillBuf( TEXT_SOURCE * pTextSource )
|
|
{
|
|
CFilterTextSource & This = * (CFilterTextSource *) pTextSource;
|
|
return This.FillBuf();
|
|
}
|
|
|
|
private:
|
|
HRESULT FillBuf()
|
|
{
|
|
// Never continue past an error condition except FILTER_E_NO_MORE_TEXT
|
|
|
|
if ( FAILED( _hr ) && _hr != FILTER_E_NO_MORE_TEXT )
|
|
return _hr;
|
|
|
|
if ( iCur > iEnd )
|
|
{
|
|
out( L"TEXT_SOURCE iCur (%#x) > iEnd (%#x), this is incorrect\n",
|
|
iCur, iEnd );
|
|
_hr = E_INVALIDARG;
|
|
return _hr;
|
|
}
|
|
|
|
// Move any existing text to beginning of buffer.
|
|
|
|
ULONG ccLeftOver = iEnd - iCur;
|
|
if ( ccLeftOver > 0 )
|
|
MoveMemory( _awcBuffer,
|
|
&_awcBuffer[iCur],
|
|
ccLeftOver * sizeof WCHAR );
|
|
|
|
iCur = 0;
|
|
iEnd = ccLeftOver;
|
|
ULONG ccRead = BufferWChars() - ccLeftOver;
|
|
const ULONG BUFFER_SLOP = 10;
|
|
|
|
//
|
|
// Get some more text. If *previous* call to GetText returned
|
|
// FILTER_S_LAST_TEXT, or FILTER_E_NO_MORE_TEXT then don't even
|
|
// bother trying.
|
|
//
|
|
|
|
if ( FILTER_S_LAST_TEXT == _hr || FILTER_E_NO_MORE_TEXT == _hr )
|
|
_hr = FILTER_E_NO_MORE_TEXT;
|
|
else
|
|
{
|
|
_hr = _filter.GetText( &ccRead,
|
|
&_awcBuffer[ccLeftOver] );
|
|
if ( SUCCEEDED( _hr ) )
|
|
{
|
|
iEnd += ccRead;
|
|
ccLeftOver += ccRead;
|
|
ccRead = BufferWChars() - ccLeftOver;
|
|
|
|
while ( ( S_OK == _hr ) && ( ccRead > BUFFER_SLOP ) )
|
|
{
|
|
// Attempt to fill in as much of buffer as possible
|
|
|
|
_hr = _filter.GetText( &ccRead,
|
|
&_awcBuffer[ccLeftOver] );
|
|
if ( SUCCEEDED( _hr ) )
|
|
{
|
|
iEnd += ccRead;
|
|
ccLeftOver += ccRead;
|
|
ccRead = BufferWChars() - ccLeftOver;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Either return FILTER_S_LAST_TEXT or return S_OK because we
|
|
// have succeeded in adding text to the buffer.
|
|
//
|
|
|
|
if ( FILTER_S_LAST_TEXT == _hr )
|
|
return FILTER_S_LAST_TEXT;
|
|
|
|
return S_OK;
|
|
}
|
|
|
|
if ( ( FILTER_E_NO_MORE_TEXT != _hr ) &&
|
|
( FILTER_E_NO_TEXT != _hr ) )
|
|
{
|
|
// Weird failure, hence return, else goto next chunk
|
|
|
|
return _hr;
|
|
}
|
|
}
|
|
|
|
// Go to next chunk, if necessary.
|
|
|
|
while ( ( FILTER_E_NO_MORE_TEXT == _hr ) ||
|
|
( FILTER_E_NO_TEXT == _hr ) )
|
|
{
|
|
_hr = _filter.GetChunk( &_Stat );
|
|
|
|
if ( FILTER_E_END_OF_CHUNKS == _hr )
|
|
return WBREAK_E_END_OF_TEXT;
|
|
|
|
if ( FILTER_E_PARTIALLY_FILTERED == _hr )
|
|
return WBREAK_E_END_OF_TEXT;
|
|
|
|
if ( FAILED( _hr ) )
|
|
return( _hr );
|
|
|
|
//
|
|
// Skip over value chunks -- note that search products don't do
|
|
// this. They convert VT_LPSTR, VT_BSTR, and VT_LPWSTR to
|
|
// Unicode strings for the wordbreaker.
|
|
//
|
|
|
|
if ( CHUNK_TEXT != _Stat.flags )
|
|
continue;
|
|
|
|
ccRead = BufferWChars() - ccLeftOver;
|
|
_hr = _filter.GetText( &ccRead,
|
|
&_awcBuffer[ccLeftOver] );
|
|
if ( SUCCEEDED( _hr ) )
|
|
{
|
|
iEnd += ccRead;
|
|
ccLeftOver += ccRead;
|
|
ccRead = BufferWChars() - ccLeftOver;
|
|
|
|
while ( ( S_OK == _hr ) && ( ccRead > BUFFER_SLOP ) )
|
|
{
|
|
// Attempt to fill in as much of buffer as possible
|
|
|
|
_hr = _filter.GetText( &ccRead,
|
|
&_awcBuffer[ccLeftOver] );
|
|
if ( SUCCEEDED( _hr ) )
|
|
{
|
|
iEnd += ccRead;
|
|
ccLeftOver += ccRead;
|
|
ccRead = BufferWChars() - ccLeftOver;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Either return FILTER_S_LAST_TEXT or return S_OK because we
|
|
// have succeeded in adding text to the buffer.
|
|
//
|
|
if ( FILTER_S_LAST_TEXT == _hr )
|
|
return FILTER_S_LAST_TEXT;
|
|
|
|
return S_OK;
|
|
}
|
|
}
|
|
|
|
if ( FAILED( _hr ) )
|
|
return _hr;
|
|
|
|
if ( 0 == ccRead )
|
|
return WBREAK_E_END_OF_TEXT;
|
|
|
|
return S_OK;
|
|
} //FillBuf
|
|
|
|
ULONG BufferWChars() const
|
|
{
|
|
return ArraySize( _awcBuffer );
|
|
}
|
|
|
|
IFilter & _filter;
|
|
HRESULT _hr;
|
|
STAT_CHUNK _Stat;
|
|
WCHAR _awcBuffer[ 1024 ];
|
|
};
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Class: CWordFormSink
|
|
//
|
|
// Purpose: Sample stemmer sink -- just prints the results.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
class CWordFormSink : public IWordFormSink
|
|
{
|
|
public:
|
|
CWordFormSink() {}
|
|
|
|
HRESULT STDMETHODCALLTYPE QueryInterface(
|
|
REFIID riid,
|
|
void ** ppvObject )
|
|
{
|
|
*ppvObject = this;
|
|
return S_OK;
|
|
}
|
|
|
|
ULONG STDMETHODCALLTYPE AddRef() { return 1; }
|
|
|
|
ULONG STDMETHODCALLTYPE Release() { return 1; }
|
|
|
|
HRESULT STDMETHODCALLTYPE PutAltWord(
|
|
WCHAR const * pwcBuf,
|
|
ULONG cwc )
|
|
{
|
|
out( L"IWordFormSink::PutAltWord: cwc %d, '%.*ws'", cwc, cwc, pwcBuf );
|
|
DumpStringAsHex( pwcBuf, cwc );
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE PutWord (
|
|
WCHAR const * pwcBuf,
|
|
ULONG cwc )
|
|
{
|
|
out( L"IWordFormSink::PutWord: cwc %d, '%.*ws'", cwc, cwc, pwcBuf );
|
|
DumpStringAsHex( pwcBuf, cwc );
|
|
return S_OK;
|
|
}
|
|
};
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Class: CWordSink
|
|
//
|
|
// Purpose: Sample word sink -- just prints the results.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
class CWordSink : public IWordSink
|
|
{
|
|
public:
|
|
CWordSink() {}
|
|
|
|
HRESULT STDMETHODCALLTYPE QueryInterface(
|
|
REFIID riid,
|
|
void ** ppvObject )
|
|
{
|
|
*ppvObject = this;
|
|
return S_OK;
|
|
}
|
|
|
|
ULONG STDMETHODCALLTYPE AddRef() { return 1; }
|
|
|
|
ULONG STDMETHODCALLTYPE Release() { return 1; }
|
|
|
|
HRESULT STDMETHODCALLTYPE PutWord(
|
|
ULONG cwc,
|
|
WCHAR const * pwcBuf,
|
|
ULONG cwcSrcLen,
|
|
ULONG cwcSrcPos )
|
|
{
|
|
out( L"IWordSink::PutWord: cwcSrcLen %d, cwcSrcPos %d, cwc %d, '%.*ws'",
|
|
cwcSrcLen, cwcSrcPos, cwc, cwc, pwcBuf );
|
|
DumpStringAsHex( pwcBuf, cwc );
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE PutAltWord(
|
|
ULONG cwc,
|
|
WCHAR const * pwcBuf,
|
|
ULONG cwcSrcLen,
|
|
ULONG cwcSrcPos )
|
|
{
|
|
out( L"IWordSink::PutAltWord: cwcSrcLen %d, cwcSrcPos %d, cwc %d, '%.*ws'",
|
|
cwcSrcLen, cwcSrcPos, cwc, cwc, pwcBuf );
|
|
DumpStringAsHex( pwcBuf, cwc );
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE StartAltPhrase()
|
|
{
|
|
out( L"IWordSink::StartAltPhrase" );
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE EndAltPhrase()
|
|
{
|
|
out( L"IWordSink::EndAltPhrase" );
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE PutBreak( WORDREP_BREAK_TYPE wbt )
|
|
{
|
|
out( L"IWordSink::PutBreak, type (%d) %ws",
|
|
wbt,
|
|
( WORDREP_BREAK_EOW == wbt ) ? L"end of word" :
|
|
( WORDREP_BREAK_EOS == wbt ) ? L"end of sentence" :
|
|
( WORDREP_BREAK_EOP == wbt ) ? L"end of paragraph" :
|
|
( WORDREP_BREAK_EOC == wbt ) ? L"end of chapter" :
|
|
L"invalid break type" );
|
|
return S_OK;
|
|
}
|
|
};
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Class: CPhraseSink
|
|
//
|
|
// Purpose: Sample phrase sink -- just prints the results.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
class CPhraseSink: public IPhraseSink
|
|
{
|
|
public:
|
|
CPhraseSink() {}
|
|
|
|
HRESULT STDMETHODCALLTYPE QueryInterface(
|
|
REFIID riid,
|
|
void ** ppvObject )
|
|
{
|
|
// Assume the caller is well-behaved
|
|
|
|
*ppvObject = this;
|
|
return S_OK;
|
|
}
|
|
|
|
ULONG STDMETHODCALLTYPE AddRef() { return 1; }
|
|
|
|
ULONG STDMETHODCALLTYPE Release() { return 1; }
|
|
|
|
HRESULT STDMETHODCALLTYPE PutSmallPhrase(
|
|
const WCHAR * pwcNoun,
|
|
ULONG cwcNoun,
|
|
const WCHAR * pwcModifier,
|
|
ULONG cwcModifier,
|
|
ULONG ulAttachmentType )
|
|
{
|
|
out( L"IPhraseSink::PutSmallPhrase" );
|
|
return S_OK;
|
|
}
|
|
|
|
HRESULT STDMETHODCALLTYPE PutPhrase(
|
|
WCHAR const * pwcPhrase,
|
|
ULONG cwcPhrase )
|
|
{
|
|
out( L"IPhraseSink::PutPhrase: cwcPhrase %d, '%.*ws'",
|
|
cwcPhrase, cwcPhrase, pwcPhrase );
|
|
DumpStringAsHex( pwcPhrase, cwcPhrase );
|
|
return S_OK;
|
|
}
|
|
};
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: GetVersionKey
|
|
//
|
|
// Purpose: Displays a particular version key
|
|
//
|
|
// Arguments: [pbInfo] -- The version inforomation
|
|
// [pwcLang] -- The language of the string requested
|
|
// [pwcKey] -- Key name to retrieve
|
|
//
|
|
// Returns: TRUE if a value was found, FALSE otherwise
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
BOOL GetVersionKey(
|
|
BYTE * pbInfo,
|
|
WCHAR const * pwcLang,
|
|
WCHAR const * pwcKey )
|
|
{
|
|
WCHAR awcKey[ 128 ];
|
|
wsprintf( awcKey, L"\\StringFileInfo\\%ws\\%ws", pwcLang, pwcKey );
|
|
|
|
WCHAR * pwcResult = 0;
|
|
UINT cb = 0;
|
|
|
|
if ( VerQueryValue( pbInfo,
|
|
awcKey,
|
|
(PVOID *) &pwcResult,
|
|
&cb ) )
|
|
{
|
|
out( L" %ws: '%ws'", pwcKey, pwcResult );
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
} //GetVersionKey
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: OutputFiletime
|
|
//
|
|
// Purpose: Displays a filetime
|
|
//
|
|
// Arguments: [pwcHeader] -- Prefix to print before the filetime
|
|
// [ft] -- Filetime to print, in UTC originally
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
void OutputFiletime( WCHAR const * pwcHeader, FILETIME & ft )
|
|
{
|
|
FILETIME ftLocal;
|
|
FileTimeToLocalFileTime( &ft, &ftLocal );
|
|
|
|
SYSTEMTIME st;
|
|
FileTimeToSystemTime( &ftLocal, &st );
|
|
BOOL pm = st.wHour >= 12;
|
|
|
|
if ( st.wHour > 12 )
|
|
st.wHour -= 12;
|
|
else if ( 0 == st.wHour )
|
|
st.wHour = 12;
|
|
|
|
out( L"%ws: %2d-%02d-%04d %2d:%02d%wc",
|
|
pwcHeader,
|
|
(DWORD) st.wMonth,
|
|
(DWORD) st.wDay,
|
|
(DWORD) st.wYear,
|
|
(DWORD) st.wHour,
|
|
(DWORD) st.wMinute,
|
|
pm ? L'p' : L'a' );
|
|
} //OutputFiletime
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: DisplayModuleInformation
|
|
//
|
|
// Purpose: Displays information about a module -- dates and version
|
|
//
|
|
// Arguments: [hMod] -- Module handle
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
HRESULT DisplayModuleInformation( HINSTANCE hMod )
|
|
{
|
|
WCHAR awcDllPath[ MAX_PATH ];
|
|
DWORD cwcCopied = GetModuleFileName( hMod,
|
|
awcDllPath,
|
|
ArraySize( awcDllPath ) );
|
|
awcDllPath[ ArraySize( awcDllPath ) - 1 ] = 0;
|
|
if ( 0 == cwcCopied )
|
|
return HRESULT_FROM_WIN32( GetLastError() );
|
|
|
|
out( L"dll loaded: %ws", awcDllPath );
|
|
|
|
DWORD dwHandle;
|
|
DWORD cbVersionInfo = GetFileVersionInfoSize( awcDllPath, &dwHandle );
|
|
if ( 0 == cbVersionInfo )
|
|
{
|
|
printf( "can't get dll version information size, error %d\n",
|
|
GetLastError() );
|
|
return HRESULT_FROM_WIN32( GetLastError() );
|
|
}
|
|
|
|
XPtr<BYTE> xVersionInfo( cbVersionInfo );
|
|
if ( xVersionInfo.IsNull() )
|
|
return E_OUTOFMEMORY;
|
|
|
|
BOOL fOK = GetFileVersionInfo( awcDllPath,
|
|
0,
|
|
cbVersionInfo,
|
|
xVersionInfo.Get() );
|
|
if ( !fOK )
|
|
{
|
|
printf( "unable to retrieve version information, error %d\n",
|
|
GetLastError() );
|
|
return HRESULT_FROM_WIN32( GetLastError() );
|
|
}
|
|
|
|
// Get the DLL version number
|
|
|
|
void * pvValue = 0;
|
|
UINT cbValue = 0;
|
|
|
|
fOK = VerQueryValue( xVersionInfo.Get(),
|
|
L"\\",
|
|
&pvValue,
|
|
&cbValue );
|
|
if ( !fOK || ( 0 == cbValue ) )
|
|
{
|
|
printf( "can't retrieve version root value, error %d\n",
|
|
GetLastError() );
|
|
return HRESULT_FROM_WIN32( GetLastError() );
|
|
}
|
|
|
|
VS_FIXEDFILEINFO & ffi = * (VS_FIXEDFILEINFO *) pvValue;
|
|
|
|
out( L" dll version %u.%u.%u.%u",
|
|
HIWORD( ffi.dwFileVersionMS ),
|
|
LOWORD( ffi.dwFileVersionMS ),
|
|
HIWORD( ffi.dwFileVersionLS ),
|
|
LOWORD( ffi.dwFileVersionLS ) );
|
|
|
|
if ( ( cbValue >= sizeof VS_FIXEDFILEINFO ) &&
|
|
( 0 != ffi.dwFileDateLS && 0 != ffi.dwFileDateMS ) )
|
|
{
|
|
FILETIME ft;
|
|
ft.dwLowDateTime = ffi.dwFileDateLS;
|
|
ft.dwHighDateTime = ffi.dwFileDateMS;
|
|
OutputFiletime( L" version creation date: ", ft );
|
|
}
|
|
|
|
HANDLE h = CreateFile( awcDllPath,
|
|
FILE_GENERIC_READ,
|
|
FILE_SHARE_READ | FILE_SHARE_DELETE,
|
|
0,
|
|
OPEN_EXISTING,
|
|
0,
|
|
0 );
|
|
if ( INVALID_HANDLE_VALUE != h )
|
|
{
|
|
FILETIME ftCreate, ftLastWrite;
|
|
fOK = GetFileTime( h, &ftCreate, 0, &ftLastWrite );
|
|
if ( fOK )
|
|
{
|
|
OutputFiletime( L" file create time", ftCreate );
|
|
OutputFiletime( L" file last write time", ftLastWrite );
|
|
}
|
|
|
|
CloseHandle( h );
|
|
}
|
|
|
|
//
|
|
// Get the language string. Not every dll stores it correctly, so fall
|
|
// back on English locales known to work for some special cases.
|
|
//
|
|
|
|
WCHAR awcLang[9];
|
|
awcLang[0] = 0;
|
|
|
|
DWORD * pdwLang;
|
|
UINT cb;
|
|
|
|
if ( VerQueryValue( xVersionInfo.Get(),
|
|
L"VarFileInfo\\Translation",
|
|
(PVOID *) &pdwLang,
|
|
&cb ) &&
|
|
( cb >= 4 ) )
|
|
{
|
|
wsprintf( awcLang,
|
|
L"%04x%04x",
|
|
LOWORD( *pdwLang ),
|
|
HIWORD( *pdwLang ) );
|
|
}
|
|
|
|
if ( 0 == awcLang[0] )
|
|
{
|
|
// Try English Unicode
|
|
|
|
wcscpy( awcLang, L"040904B0" );
|
|
if ( !GetVersionKey( xVersionInfo.Get(),
|
|
awcLang,
|
|
L"FileVersion" ) )
|
|
{
|
|
// Try English
|
|
|
|
wcscpy( awcLang, L"040904E4" );
|
|
if ( !GetVersionKey( xVersionInfo.Get(),
|
|
awcLang,
|
|
L"FileVersion" ) )
|
|
{
|
|
// Try English null codepage
|
|
|
|
wcscpy( awcLang, L"04090000" );
|
|
if ( !GetVersionKey( xVersionInfo.Get(),
|
|
awcLang,
|
|
L"FileVersion" ) )
|
|
awcLang[0] = 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
GetVersionKey( xVersionInfo.Get(), awcLang, L"FileVersion" );
|
|
}
|
|
|
|
// Display additional version information if we found the language
|
|
|
|
if ( 0 != awcLang[0] )
|
|
{
|
|
GetVersionKey( xVersionInfo.Get(), awcLang, L"FileDescription" );
|
|
GetVersionKey( xVersionInfo.Get(), awcLang, L"CompanyName" );
|
|
GetVersionKey( xVersionInfo.Get(), awcLang, L"ProductName" );
|
|
}
|
|
|
|
return S_OK;
|
|
} //DisplayModuleInformation
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: CreateFromModule
|
|
//
|
|
// Purpose: Creates a COM object given a dll
|
|
//
|
|
// Arguments: [clsid] -- Class ID of the object to load
|
|
// [iid] -- Interface ID requested
|
|
// [ppvObject] -- Returns the object created
|
|
// [pwcModule] -- Dll to load
|
|
// [fShowStatusInfo] -- TRUE to print status information
|
|
//
|
|
// Returns: HRESULT, S_OK if successful
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
HRESULT CreateFromModule(
|
|
REFIID clsid,
|
|
REFIID iid,
|
|
void ** ppvObject,
|
|
WCHAR const * pwcModule,
|
|
BOOL fShowStatusInfo = TRUE )
|
|
{
|
|
// Note: the module handle will be leaked. It's OK for a test program.
|
|
|
|
HMODULE hMod = LoadLibrary( pwcModule );
|
|
if ( 0 == hMod )
|
|
return HRESULT_FROM_WIN32( GetLastError() );
|
|
|
|
// Display information about the module -- ignore errors
|
|
|
|
if ( fShowStatusInfo )
|
|
DisplayModuleInformation( hMod );
|
|
|
|
LPFNGETCLASSOBJECT pfn = (LPFNGETCLASSOBJECT)
|
|
GetProcAddress( hMod, "DllGetClassObject" );
|
|
if ( 0 == pfn )
|
|
{
|
|
printf( "can't get DllGetClassObject: %d\n", GetLastError() );
|
|
return HRESULT_FROM_WIN32( GetLastError() );
|
|
}
|
|
|
|
XInterface<IClassFactory> xClassFactory;
|
|
HRESULT hr = pfn( clsid,
|
|
IID_IClassFactory,
|
|
xClassFactory.GetQIPointer() );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't instantiate the class factory: %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
return xClassFactory->CreateInstance( 0, iid, ppvObject );
|
|
} //CreateFromModule
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: FakeCoCreateInstance
|
|
//
|
|
// Purpose: Creates a COM object
|
|
//
|
|
// Arguments: [clsid] -- Class ID of the object to load
|
|
// [iid] -- Interface ID requested
|
|
// [ppvObject] -- Returns the object created
|
|
// [fShowStatusInfo] -- TRUE to print status information
|
|
//
|
|
// Returns: HRESULT, S_OK if successful
|
|
//
|
|
// Needed because some wordbreakers register as single-threaded. Search
|
|
// products require multi-threaded because marshalling across apartments
|
|
// doesn't work and because it's too inefficient, especially on
|
|
// multi-processor machines.
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
HRESULT FakeCoCreateInstance(
|
|
REFIID clsid,
|
|
REFIID iid,
|
|
void ** ppvObject,
|
|
BOOL fShowStatusInfo = TRUE )
|
|
{
|
|
WCHAR awcCLSID[ 40 ];
|
|
StringFromGUID2( clsid, awcCLSID, ArraySize( awcCLSID ) );
|
|
|
|
WCHAR awcKey[200];
|
|
swprintf( awcKey, L"CLSID\\%ws\\InprocServer32", awcCLSID );
|
|
|
|
HKEY hKey;
|
|
DWORD dwErr = RegOpenKey( HKEY_CLASSES_ROOT, awcKey, &hKey );
|
|
if ( NO_ERROR != dwErr )
|
|
return HRESULT_FROM_WIN32( dwErr );
|
|
|
|
WCHAR awcDll[MAX_PATH + 1];
|
|
DWORD dwType;
|
|
DWORD dwSize = sizeof awcDll;
|
|
dwErr = RegQueryValueEx( hKey,
|
|
L"",
|
|
0,
|
|
&dwType,
|
|
(LPBYTE) awcDll,
|
|
&dwSize );
|
|
RegCloseKey( hKey );
|
|
if ( 0 != dwErr )
|
|
return HRESULT_FROM_WIN32( dwErr );
|
|
|
|
return CreateFromModule( clsid, iid, ppvObject, awcDll, fShowStatusInfo );
|
|
} //FakeCoCreateInstance
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: Stem
|
|
//
|
|
// Purpose: Stems the input text using the specified stemmer
|
|
//
|
|
// Arguments: [pwcText] -- The text to be stemmed
|
|
// [clsid] -- Class ID of the stemmer to use
|
|
// [pwcModule] -- Optional module name to override COM lookup.
|
|
// [cwcMaxToken] -- Maximum token size for the stemmer
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
HRESULT Stem(
|
|
WCHAR const * pwcText,
|
|
WCHAR const * pwcModule,
|
|
CLSID & clsid,
|
|
ULONG cwcMaxToken )
|
|
{
|
|
XInterface<IStemmer> xStemmer;
|
|
HRESULT hr = S_OK;
|
|
|
|
if ( 0 != pwcModule )
|
|
{
|
|
hr = CreateFromModule( clsid,
|
|
IID_IStemmer,
|
|
xStemmer.GetQIPointer(),
|
|
pwcModule );
|
|
}
|
|
else
|
|
{
|
|
#ifdef USE_FAKE_COM
|
|
hr = FakeCoCreateInstance( clsid,
|
|
IID_IStemmer,
|
|
xStemmer.GetQIPointer() );
|
|
#else
|
|
hr = CoCreateInstance( clsid,
|
|
0,
|
|
CLSCTX_INPROC_SERVER,
|
|
IID_IStemmer,
|
|
xStemmer.GetQIPointer() );
|
|
#endif
|
|
}
|
|
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't CoCreateInstance the stemmer: %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
BOOL fLicense = FALSE;
|
|
hr = xStemmer->Init( cwcMaxToken, &fLicense );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't Init() in the stemmer: %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
out( L"Stemmer requires license: %ws", fLicense ? L"Yes" : L"No" );
|
|
|
|
const WCHAR *pwcsLicense = 0;
|
|
hr = xStemmer->GetLicenseToUse( &pwcsLicense );
|
|
if ( FAILED( hr ) )
|
|
out( L"can't GetLicenseToUse() in the stemmer: %#x\n", hr );
|
|
else
|
|
out( L"Stemmer license: '%ws'", pwcsLicense );
|
|
|
|
CWordFormSink sink;
|
|
|
|
if ( 0 != pwcText )
|
|
{
|
|
out( L"Original text: '%ws'", pwcText );
|
|
hr = xStemmer->GenerateWordForms( pwcText, (ULONG) wcslen( pwcText ), &sink );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't GenerateWordForms() in the stemmer: %#x\n", hr );
|
|
return hr;
|
|
}
|
|
}
|
|
|
|
return S_OK;
|
|
} //Stem
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: WordBreak
|
|
//
|
|
// Purpose: Wordbreaks the input text or file
|
|
//
|
|
// Arguments: [fQuery] -- TRUE if query time FALSE if index time
|
|
// [pwcText] -- The text to be wordbroken.
|
|
// [pwcInputFile] -- Filename to be wordbroken if pwcText is 0
|
|
// [pwcModule] -- Optional module name to override COM lookup.
|
|
// [clsid] -- Class ID of the wordbreaker to use
|
|
// [cwcMaxToken] -- Maximum token size for the wordbreaker
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
HRESULT WordBreak(
|
|
BOOL fQuery,
|
|
WCHAR const * pwcText,
|
|
WCHAR const * pwcInputFile,
|
|
WCHAR const * pwcModule,
|
|
CLSID & clsid,
|
|
ULONG cwcMaxToken )
|
|
{
|
|
XInterface<IWordBreaker> xWordBreaker;
|
|
HRESULT hr = S_OK;
|
|
|
|
if ( 0 != pwcModule )
|
|
{
|
|
hr = CreateFromModule( clsid,
|
|
IID_IWordBreaker,
|
|
xWordBreaker.GetQIPointer(),
|
|
pwcModule );
|
|
}
|
|
else
|
|
{
|
|
#ifdef USE_FAKE_COM
|
|
hr = FakeCoCreateInstance( clsid,
|
|
IID_IWordBreaker,
|
|
xWordBreaker.GetQIPointer() );
|
|
#else
|
|
hr = CoCreateInstance( clsid,
|
|
0,
|
|
CLSCTX_INPROC_SERVER,
|
|
IID_IWordBreaker,
|
|
xWordBreaker.GetQIPointer() );
|
|
#endif
|
|
}
|
|
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't CoCreateInstance the wordbreaker: %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
BOOL fLicense = FALSE;
|
|
hr = xWordBreaker->Init( fQuery, cwcMaxToken, &fLicense );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't Init() in the wordbreaker: %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
out( L"Wordbreaker requires license: %ws", fLicense ? L"Yes" : L"No" );
|
|
|
|
const WCHAR *pwcsLicense = 0;
|
|
hr = xWordBreaker->GetLicenseToUse( &pwcsLicense );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't GetLicenseToUse() in the wordbreaker: %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
out( L"Wordbreaker license: '%ws'", pwcsLicense );
|
|
|
|
CWordSink wordSink;
|
|
CPhraseSink phraseSink;
|
|
|
|
if ( 0 != pwcText )
|
|
{
|
|
out( L"Original text: '%ws'", pwcText );
|
|
|
|
CPlainTextSource textSource( pwcText, (ULONG) wcslen( pwcText ) );
|
|
|
|
hr = xWordBreaker->BreakText( &textSource, &wordSink, &phraseSink );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't BreakText() in the wordbreaker: %#x\n", hr );
|
|
return hr;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
out( L"Wordbreaking text from file %ws", pwcInputFile );
|
|
|
|
// Load the Indexing Service filter (should be fine for testing).
|
|
|
|
XInterface<IFilter> xIFilter;
|
|
hr = LoadIFilter( pwcInputFile, 0, xIFilter.GetQIPointer() );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
// Fall back on the plain text filter.
|
|
|
|
printf( "Can't load filter, error %#x. Trying text filter.\n",
|
|
hr );
|
|
|
|
hr = g_pLoadTextFilter( pwcInputFile, xIFilter.GetPPointer() );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't load filter, error %#x\n", hr );
|
|
return hr;
|
|
}
|
|
}
|
|
|
|
// Initialize the filter
|
|
|
|
ULONG ulFlags = 0;
|
|
hr = xIFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS |
|
|
IFILTER_INIT_CANON_HYPHENS |
|
|
IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,
|
|
0,
|
|
0,
|
|
&ulFlags );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't initialize filter, error %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
CFilterTextSource textSource( xIFilter.GetReference() );
|
|
|
|
hr = xWordBreaker->BreakText( &textSource, &wordSink, &phraseSink );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't BreakText() in the wordbreaker: %#x\n", hr );
|
|
return hr;
|
|
}
|
|
}
|
|
|
|
return S_OK;
|
|
} //WordBreak
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: Render
|
|
//
|
|
// Synopsis: Prints an item in a safearray
|
|
//
|
|
// Arguments: [vt] - type of the element
|
|
// [pa] - pointer to the item
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
void PrintSafeArray( VARTYPE vt, LPSAFEARRAY pa );
|
|
|
|
void Render( VARTYPE vt, void * pv )
|
|
{
|
|
if ( VT_ARRAY & vt )
|
|
{
|
|
PrintSafeArray( (VARTYPE) (vt - VT_ARRAY), *(SAFEARRAY **) pv );
|
|
return;
|
|
}
|
|
|
|
switch ( vt )
|
|
{
|
|
case VT_UI1: outstr( L"%u", (unsigned) *(BYTE *)pv ); break;
|
|
case VT_I1: outstr( L"%d", (int) *(CHAR *)pv ); break;
|
|
case VT_UI2: outstr( L"%u", (unsigned) *(USHORT *)pv ); break;
|
|
case VT_I2: outstr( L"%d", (int) *(SHORT *)pv ); break;
|
|
case VT_UI4:
|
|
case VT_UINT: outstr( L"%u", (unsigned) *(ULONG *)pv ); break;
|
|
case VT_I4:
|
|
case VT_ERROR:
|
|
case VT_INT: outstr( L"%d", *(LONG *)pv ); break;
|
|
case VT_UI8: outstr( L"%I64u", *(unsigned __int64 *)pv ); break;
|
|
case VT_I8: outstr( L"%I64d", *(__int64 *)pv ); break;
|
|
case VT_R4: outstr( L"%f", *(float *)pv ); break;
|
|
case VT_R8: outstr( L"%lf", *(double *)pv ); break;
|
|
case VT_DECIMAL:
|
|
{
|
|
double dbl;
|
|
HRESULT hr = VarR8FromDec( (DECIMAL *) pv, &dbl );
|
|
if ( SUCCEEDED( hr ) )
|
|
outstr( L"%lf", dbl );
|
|
break;
|
|
}
|
|
case VT_CY:
|
|
{
|
|
double dbl;
|
|
HRESULT hr = VarR8FromCy( * (CY *) pv, &dbl );
|
|
if ( SUCCEEDED( hr ) )
|
|
outstr( L"%lf", dbl );
|
|
break;
|
|
}
|
|
case VT_BOOL: outstr( *(VARIANT_BOOL *)pv ? L"TRUE" : L"FALSE" ); break;
|
|
case VT_BSTR: outstr( L"%ws", *(BSTR *) pv ); break;
|
|
case VT_VARIANT:
|
|
{
|
|
PROPVARIANT * pVar = (PROPVARIANT *) pv;
|
|
Render( pVar->vt, & pVar->lVal );
|
|
break;
|
|
}
|
|
case VT_DATE:
|
|
{
|
|
SYSTEMTIME st;
|
|
BOOL fOK = VariantTimeToSystemTime( *(DATE *)pv, &st );
|
|
|
|
if ( !fOK )
|
|
break;
|
|
|
|
BOOL pm = st.wHour >= 12;
|
|
|
|
if ( st.wHour > 12 )
|
|
st.wHour -= 12;
|
|
else if ( 0 == st.wHour )
|
|
st.wHour = 12;
|
|
|
|
outstr( L"%2d-%02d-%04d %2d:%02d%wc",
|
|
(DWORD) st.wMonth,
|
|
(DWORD) st.wDay,
|
|
(DWORD) st.wYear,
|
|
(DWORD) st.wHour,
|
|
(DWORD) st.wMinute,
|
|
pm ? L'p' : L'a' );
|
|
break;
|
|
}
|
|
case VT_EMPTY:
|
|
case VT_NULL:
|
|
break;
|
|
default :
|
|
{
|
|
outstr( L"(vt 0x%x)", (int) vt );
|
|
break;
|
|
}
|
|
}
|
|
} //Render
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: PrintSafeArray
|
|
//
|
|
// Synopsis: Prints items in a safearray
|
|
//
|
|
// Arguments: [vt] - type of elements in the safearray
|
|
// [pa] - pointer to the safearray
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
void PrintSafeArray( VARTYPE vt, LPSAFEARRAY pa )
|
|
{
|
|
// Get the dimensions of the array
|
|
|
|
UINT cDim = SafeArrayGetDim( pa );
|
|
if ( 0 == cDim )
|
|
return;
|
|
|
|
XPtr<LONG> xDim( cDim );
|
|
XPtr<LONG> xLo( cDim );
|
|
XPtr<LONG> xUp( cDim );
|
|
|
|
for ( UINT iDim = 0; iDim < cDim; iDim++ )
|
|
{
|
|
HRESULT hr = SafeArrayGetLBound( pa, iDim + 1, &xLo[iDim] );
|
|
if ( FAILED( hr ) )
|
|
return;
|
|
|
|
xDim[ iDim ] = xLo[ iDim ];
|
|
|
|
hr = SafeArrayGetUBound( pa, iDim + 1, &xUp[iDim] );
|
|
if ( FAILED( hr ) )
|
|
return;
|
|
|
|
outstr( L"{" );
|
|
}
|
|
|
|
// slog through the array
|
|
|
|
UINT iLastDim = cDim - 1;
|
|
BOOL fDone = FALSE;
|
|
|
|
while ( !fDone )
|
|
{
|
|
// inter-element formatting
|
|
|
|
if ( xDim[ iLastDim ] != xLo[ iLastDim ] )
|
|
outstr( L"," );
|
|
|
|
// Get the element and render it
|
|
|
|
void *pv;
|
|
HRESULT hr = SafeArrayPtrOfIndex( pa, xDim.Get(), &pv );
|
|
if ( FAILED( hr ) )
|
|
return;
|
|
|
|
Render( vt, pv );
|
|
|
|
// Move to the next element and carry if necessary
|
|
|
|
ULONG cOpen = 0;
|
|
|
|
for ( LONG iDim = iLastDim; iDim >= 0; iDim-- )
|
|
{
|
|
if ( xDim[ iDim ] < xUp[ iDim ] )
|
|
{
|
|
xDim[ iDim ] = 1 + xDim[ iDim ];
|
|
break;
|
|
}
|
|
|
|
outstr( L"}" );
|
|
|
|
if ( 0 == iDim )
|
|
fDone = TRUE;
|
|
else
|
|
{
|
|
cOpen++;
|
|
xDim[ iDim ] = xLo[ iDim ];
|
|
}
|
|
}
|
|
|
|
for ( ULONG i = 0; !fDone && i < cOpen; i++ )
|
|
outstr( L"{" );
|
|
}
|
|
} //PrintSafeArray
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: PrintVectorItems
|
|
//
|
|
// Synopsis: Prints items in a PROPVARIANT vector
|
|
//
|
|
// Arguments: [pVal] - The array of values
|
|
// [cVals] - The count of values
|
|
// [pcFmt] - The format string
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
template<class T> void PrintVectorItems(
|
|
T * pVal,
|
|
ULONG cVals,
|
|
WCHAR const * pwcFmt )
|
|
{
|
|
outstr( L"{ " );
|
|
|
|
for( ULONG iVal = 0; iVal < cVals; iVal++ )
|
|
{
|
|
if ( 0 != iVal )
|
|
outstr( L"," );
|
|
outstr( pwcFmt, *pVal++ );
|
|
}
|
|
|
|
outstr( L" }" );
|
|
} //PrintVectorItems
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: DisplayValue
|
|
//
|
|
// Synopsis: Displays a PROPVARIANT value. Limited formatting is done.
|
|
//
|
|
// Arguments: [pVar] - The value to display
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
void DisplayValue( PROPVARIANT const * pVar )
|
|
{
|
|
if ( 0 == pVar )
|
|
{
|
|
outstr( L"NULL" );
|
|
return;
|
|
}
|
|
|
|
// Display the most typical variant types
|
|
|
|
PROPVARIANT const & v = *pVar;
|
|
|
|
switch ( v.vt )
|
|
{
|
|
case VT_EMPTY : break;
|
|
case VT_NULL : break;
|
|
case VT_I4 : outstr( L"%10d", v.lVal ); break;
|
|
case VT_UI1 : outstr( L"%10d", v.bVal ); break;
|
|
case VT_I2 : outstr( L"%10d", v.iVal ); break;
|
|
case VT_R4 : outstr( L"%10f", v.fltVal ); break;
|
|
case VT_R8 : outstr( L"%10lf", v.dblVal ); break;
|
|
case VT_BOOL : outstr( v.boolVal ? L"TRUE" : L"FALSE" ); break;
|
|
case VT_I1 : outstr( L"%10d", v.cVal ); break;
|
|
case VT_UI2 : outstr( L"%10u", v.uiVal ); break;
|
|
case VT_UI4 : outstr( L"%10u", v.ulVal ); break;
|
|
case VT_INT : outstr( L"%10d", v.lVal ); break;
|
|
case VT_UINT : outstr( L"%10u", v.ulVal ); break;
|
|
case VT_I8 : outstr( L"%20I64d", v.hVal ); break;
|
|
case VT_UI8 : outstr( L"%20I64u", v.hVal ); break;
|
|
case VT_ERROR : outstr( L"%#x", v.scode ); break;
|
|
case VT_LPSTR : outstr( L"%S", v.pszVal ); break;
|
|
case VT_LPWSTR : outstr( L"%ws", v.pwszVal ); break;
|
|
case VT_BSTR : outstr( L"%ws", v.bstrVal ); break;
|
|
case VT_BLOB :
|
|
{
|
|
outstr( L"blob cb %u ", v.blob.cbSize );
|
|
for ( unsigned x = 0; x < v.blob.cbSize; x++ )
|
|
outstr( L" %#x ", v.blob.pBlobData[x] );
|
|
break;
|
|
}
|
|
case VT_CY:
|
|
{
|
|
double dbl;
|
|
HRESULT hr = VarR8FromCy( v.cyVal, &dbl );
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
outstr( L"%lf", dbl );
|
|
break;
|
|
}
|
|
case VT_DECIMAL :
|
|
{
|
|
double dbl;
|
|
HRESULT hr = VarR8FromDec( (DECIMAL *) &v.decVal, &dbl );
|
|
|
|
if ( SUCCEEDED( hr ) )
|
|
outstr( L"%lf", dbl );
|
|
break;
|
|
}
|
|
case VT_FILETIME :
|
|
case VT_DATE :
|
|
{
|
|
SYSTEMTIME st;
|
|
ZeroMemory( &st, sizeof st );
|
|
|
|
if ( VT_DATE == v.vt )
|
|
{
|
|
BOOL fOK = VariantTimeToSystemTime( v.date, &st );
|
|
|
|
if ( !fOK )
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
FILETIME ft;
|
|
BOOL fOK = FileTimeToLocalFileTime( &v.filetime, &ft );
|
|
|
|
if ( fOK )
|
|
FileTimeToSystemTime( &ft, &st );
|
|
|
|
if ( !fOK )
|
|
break;
|
|
}
|
|
|
|
BOOL pm = st.wHour >= 12;
|
|
|
|
if ( st.wHour > 12 )
|
|
st.wHour -= 12;
|
|
else if ( 0 == st.wHour )
|
|
st.wHour = 12;
|
|
|
|
outstr( L"%2d-%02d-%04d %2d:%02d%wc",
|
|
(DWORD) st.wMonth,
|
|
(DWORD) st.wDay,
|
|
(DWORD) st.wYear,
|
|
(DWORD) st.wHour,
|
|
(DWORD) st.wMinute,
|
|
pm ? L'p' : L'a' );
|
|
break;
|
|
}
|
|
case VT_VECTOR | VT_I1:
|
|
PrintVectorItems( v.cac.pElems, v.cac.cElems, L"%d" ); break;
|
|
case VT_VECTOR | VT_I2:
|
|
PrintVectorItems( v.cai.pElems, v.cai.cElems, L"%d" ); break;
|
|
case VT_VECTOR | VT_I4:
|
|
PrintVectorItems( v.cal.pElems, v.cal.cElems, L"%d" ); break;
|
|
case VT_VECTOR | VT_I8:
|
|
PrintVectorItems( v.cah.pElems, v.cah.cElems, L"%I64d" ); break;
|
|
case VT_VECTOR | VT_UI1:
|
|
PrintVectorItems( v.caub.pElems, v.caub.cElems, L"%u" ); break;
|
|
case VT_VECTOR | VT_UI2:
|
|
PrintVectorItems( v.caui.pElems, v.caui.cElems, L"%u" ); break;
|
|
case VT_VECTOR | VT_UI4:
|
|
PrintVectorItems( v.caul.pElems, v.caul.cElems, L"%u" ); break;
|
|
case VT_VECTOR | VT_ERROR:
|
|
PrintVectorItems( v.cascode.pElems, v.cascode.cElems, L"%#x" ); break;
|
|
case VT_VECTOR | VT_UI8:
|
|
PrintVectorItems( v.cauh.pElems, v.cauh.cElems, L"%I64u" ); break;
|
|
case VT_VECTOR | VT_BSTR:
|
|
PrintVectorItems( v.cabstr.pElems, v.cabstr.cElems, L"%ws" ); break;
|
|
case VT_VECTOR | VT_LPSTR:
|
|
PrintVectorItems( v.calpstr.pElems, v.calpstr.cElems, L"%S" ); break;
|
|
case VT_VECTOR | VT_LPWSTR:
|
|
PrintVectorItems( v.calpwstr.pElems, v.calpwstr.cElems, L"%ws" ); break;
|
|
case VT_VECTOR | VT_R4:
|
|
PrintVectorItems( v.caflt.pElems, v.caflt.cElems, L"%f" ); break;
|
|
case VT_VECTOR | VT_R8:
|
|
PrintVectorItems( v.cadbl.pElems, v.cadbl.cElems, L"%lf" ); break;
|
|
default :
|
|
{
|
|
if ( VT_ARRAY & v.vt )
|
|
PrintSafeArray( (VARTYPE) ( v.vt - VT_ARRAY ), v.parray );
|
|
else
|
|
outstr( L"vt 0x%05x", v.vt );
|
|
break;
|
|
}
|
|
}
|
|
} //DisplayValue
|
|
|
|
//+---------------------------------------------------------------------------
|
|
//
|
|
// Function: Filter
|
|
//
|
|
// Purpose: Invokes an IFilter on a file
|
|
//
|
|
// Arguments: [pwcInputFile] -- Filename to be filtered
|
|
// [filterLoad] -- How to load the file into the filter.
|
|
// [pwcModule] -- Optional module name to override COM lookup.
|
|
// [pCLSID] -- Optional class ID of the filter to use.
|
|
// Required if pwcModule is specified.
|
|
// [fShowStatusInfo] -- TRUE to get other information
|
|
// FALSE for only output from the filter
|
|
// [fGetText] -- TRUE to retrieve text, FALSE to skip it
|
|
//
|
|
//----------------------------------------------------------------------------
|
|
|
|
HRESULT Filter(
|
|
WCHAR const * pwcInputFile,
|
|
enumFilterLoadMechanism filterLoad,
|
|
WCHAR const * pwcModule,
|
|
CLSID * pCLSID,
|
|
BOOL fShowStatusInfo,
|
|
BOOL fGetText )
|
|
{
|
|
XInterface<IFilter> xFilter;
|
|
HRESULT hr = S_OK;
|
|
|
|
if ( 0 != pwcModule )
|
|
{
|
|
// If the DLL is specified, use it
|
|
|
|
if ( fShowStatusInfo )
|
|
out( L"loading filter based on module name" );
|
|
|
|
hr = CreateFromModule( *pCLSID,
|
|
IID_IFilter,
|
|
xFilter.GetQIPointer(),
|
|
pwcModule,
|
|
fShowStatusInfo );
|
|
}
|
|
else if ( 0 != pCLSID )
|
|
{
|
|
// If we just have a CLSID and no module, use it
|
|
|
|
if ( fShowStatusInfo )
|
|
out( L"loading filter based on CLSID and the registry" );
|
|
|
|
#ifdef USE_FAKE_COM
|
|
hr = FakeCoCreateInstance( *pCLSID,
|
|
IID_IFilter,
|
|
xFilter.GetQIPointer(),
|
|
fShowStatusInfo );
|
|
#else
|
|
hr = CoCreateInstance( *pCLSID,
|
|
0,
|
|
CLSCTX_INPROC_SERVER,
|
|
IID_IFilter,
|
|
xFilter.GetQIPointer() );
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
// Use Indexing Service to load the filter
|
|
|
|
if ( fShowStatusInfo )
|
|
out( L"loading filter based on Indexing Service's LoadIFilter()" );
|
|
|
|
hr = LoadIFilter( pwcInputFile, 0, xFilter.GetQIPointer() );
|
|
|
|
if ( SUCCEEDED( hr ) && fShowStatusInfo )
|
|
{
|
|
// Dereference the VTable to get a pointer into the DLL
|
|
|
|
HMODULE hMod = GetModuleOfAddress( * (void **) xFilter.GetPointer() );
|
|
|
|
if ( 0 != hMod )
|
|
DisplayModuleInformation( hMod );
|
|
}
|
|
}
|
|
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't load the filter: %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
// Does the filter support IPersistStorage?
|
|
|
|
XInterface<IStorage> xStorage;
|
|
XInterface<IPersistStorage> xPersistStorage;
|
|
hr = xFilter->QueryInterface( IID_IPersistStorage,
|
|
xPersistStorage.GetQIPointer() );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L" filter doesn't support IPersistStorage, error %#x", hr );
|
|
if ( eIPersistStorage == filterLoad )
|
|
return hr;
|
|
}
|
|
else
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L" filter supports IPersistStorage" );
|
|
|
|
if ( eIPersistStorage == filterLoad )
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L" loading via IPersistStorage" );
|
|
|
|
hr = StgOpenStorage( pwcInputFile,
|
|
0,
|
|
STGM_READ | STGM_SHARE_DENY_WRITE,
|
|
0,
|
|
0,
|
|
xStorage.GetPPointer() );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't open the file into a storage %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
hr = xPersistStorage->Load( xStorage.GetPointer() );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't Load() the storage into the filter %#x\n", hr );
|
|
return hr;
|
|
}
|
|
}
|
|
}
|
|
|
|
xPersistStorage.Free();
|
|
|
|
// Does the filter support IPersistStream?
|
|
|
|
XInterface<CIStream> xStream;
|
|
XInterface<IPersistStream> xPersistStream;
|
|
hr = xFilter->QueryInterface( IID_IPersistStream,
|
|
xPersistStream.GetQIPointer() );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L" filter doesn't support IPersistStream, error %#x", hr );
|
|
if ( eIPersistStream == filterLoad )
|
|
return hr;
|
|
}
|
|
else
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L" filter supports IPersistStream" );
|
|
|
|
if ( eIPersistStream == filterLoad )
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L" loading via IPersistStream" );
|
|
xStream.Set( new CIStream() );
|
|
hr = xStream->Open( pwcInputFile );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't open the file into a stream %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
hr = xPersistStream->Load( xStream.GetPointer() );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't Load() the stream into the filter %#x\n", hr );
|
|
return hr;
|
|
}
|
|
}
|
|
}
|
|
|
|
xPersistStream.Free();
|
|
|
|
// Does the filter support IPersistFile?
|
|
|
|
XInterface<IPersistFile> xPersistFile;
|
|
hr = xFilter->QueryInterface( IID_IPersistFile,
|
|
xPersistFile.GetQIPointer() );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L"filter doesn't support IPersistFile, error %#x\n", hr );
|
|
if ( eIPersistFile == filterLoad )
|
|
return hr;
|
|
}
|
|
else
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L" filter supports IPersistFile" );
|
|
|
|
if ( eIPersistFile == filterLoad )
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L" loading via IPersistFile" );
|
|
|
|
hr = xPersistFile->Load( pwcInputFile,
|
|
STGM_READ | STGM_SHARE_DENY_NONE );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't Load() the file into the filter %#x\n", hr );
|
|
return hr;
|
|
}
|
|
}
|
|
}
|
|
|
|
xPersistFile.Free();
|
|
|
|
// Initailize the IFilter
|
|
|
|
ULONG ulFlags = 0;
|
|
hr = xFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS |
|
|
IFILTER_INIT_HARD_LINE_BREAKS |
|
|
IFILTER_INIT_CANON_HYPHENS |
|
|
IFILTER_INIT_CANON_SPACES |
|
|
IFILTER_INIT_INDEXING_ONLY |
|
|
IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,
|
|
0,
|
|
0,
|
|
&ulFlags );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't Init() the filter, error %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
if ( fShowStatusInfo )
|
|
out( L" flags returned from IFilter::Init(): %#x", ulFlags );
|
|
|
|
// Pull all the data out of the filter
|
|
|
|
BOOL fText;
|
|
STAT_CHUNK StatChunk;
|
|
StatChunk.attribute.psProperty.ulKind = PRSPEC_PROPID;
|
|
|
|
do
|
|
{
|
|
const ULONG cwcMaxBuffer = 1024;
|
|
WCHAR awcBuffer[ cwcMaxBuffer ];
|
|
|
|
hr = xFilter->GetChunk( &StatChunk );
|
|
if ( FILTER_E_EMBEDDING_UNAVAILABLE == hr )
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L"[-- encountered an embedding for which no filter is available --]" );
|
|
continue;
|
|
}
|
|
|
|
if ( FILTER_E_LINK_UNAVAILABLE == hr )
|
|
{
|
|
if ( fShowStatusInfo )
|
|
out( L"[-- encountered a link for which no filter is available --]" );
|
|
continue;
|
|
}
|
|
|
|
if ( FAILED( hr ) && hr != FILTER_E_END_OF_CHUNKS )
|
|
{
|
|
out( L"GetChunk returned error %#x", hr );
|
|
break;
|
|
}
|
|
|
|
if ( FILTER_E_END_OF_CHUNKS == hr )
|
|
break;
|
|
|
|
fText = ( CHUNK_TEXT == StatChunk.flags );
|
|
|
|
// Display information about the chunk
|
|
|
|
if ( fShowStatusInfo )
|
|
{
|
|
out( L"" );
|
|
out( L"----------------------------------------------------------------------" );
|
|
|
|
outstr( L" attribute: %08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
|
StatChunk.attribute.guidPropSet.Data1,
|
|
StatChunk.attribute.guidPropSet.Data2,
|
|
StatChunk.attribute.guidPropSet.Data3,
|
|
StatChunk.attribute.guidPropSet.Data4[0],
|
|
StatChunk.attribute.guidPropSet.Data4[1],
|
|
StatChunk.attribute.guidPropSet.Data4[2],
|
|
StatChunk.attribute.guidPropSet.Data4[3],
|
|
StatChunk.attribute.guidPropSet.Data4[4],
|
|
StatChunk.attribute.guidPropSet.Data4[5],
|
|
StatChunk.attribute.guidPropSet.Data4[6],
|
|
StatChunk.attribute.guidPropSet.Data4[7] );
|
|
|
|
if ( StatChunk.attribute.psProperty.ulKind == PRSPEC_PROPID )
|
|
out( L" %d (%#x)",
|
|
StatChunk.attribute.psProperty.propid,
|
|
StatChunk.attribute.psProperty.propid );
|
|
else
|
|
out( L" \"%ws\"", StatChunk.attribute.psProperty.lpwstr );
|
|
|
|
out( L" idChunk: %d (%#x)", StatChunk.idChunk, StatChunk.idChunk );
|
|
outstr( L" breakType: %d (%#x)", StatChunk.breakType, StatChunk.breakType );
|
|
|
|
switch ( StatChunk.breakType )
|
|
{
|
|
case CHUNK_NO_BREAK: out( L" (no break) " ); break;
|
|
case CHUNK_EOW: out( L" (end of word) " ); break;
|
|
case CHUNK_EOS: out( L" (end of sentence) " ); break;
|
|
case CHUNK_EOP: out( L" (end of paragraph) " ); break;
|
|
case CHUNK_EOC: out( L" (end of chapter) " ); break;
|
|
default : out( L" (unknown break type) " ); break;
|
|
}
|
|
|
|
outstr( L" flags: %d (%#x)", StatChunk.flags, StatChunk.flags );
|
|
|
|
if ( CHUNK_TEXT & StatChunk.flags )
|
|
out( L" (text) " );
|
|
|
|
if ( CHUNK_VALUE & StatChunk.flags )
|
|
out( L" (value) " );
|
|
|
|
out( L" locale: %d (%#x)", StatChunk.locale, StatChunk.locale );
|
|
out( L" idChunkSource: %d (%#x)",
|
|
StatChunk.idChunkSource,
|
|
StatChunk.idChunkSource );
|
|
out( L" cwcStartSource: %d (%#x)",
|
|
StatChunk.cwcStartSource,
|
|
StatChunk.cwcStartSource );
|
|
out( L" cwcLenSource: %d (%#x)",
|
|
StatChunk.cwcLenSource,
|
|
StatChunk.cwcLenSource );
|
|
out( L" ------------------------------------------" );
|
|
}
|
|
|
|
if ( !fGetText )
|
|
continue;
|
|
|
|
// Retrieve all the data in the chunk
|
|
|
|
do
|
|
{
|
|
if ( fText )
|
|
{
|
|
ULONG cwcBuffer = cwcMaxBuffer;
|
|
hr = xFilter->GetText( &cwcBuffer, awcBuffer );
|
|
if ( FAILED( hr ) && ( FILTER_E_NO_MORE_TEXT != hr ) )
|
|
{
|
|
out( L"error %#x from GetText\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
if ( FILTER_E_NO_MORE_TEXT == hr )
|
|
break;
|
|
|
|
awcBuffer[cwcBuffer] = 0;
|
|
out( L"%ws", awcBuffer );
|
|
|
|
if ( g_fDumpAsHex )
|
|
{
|
|
out( L"<--------> %d WCHARs in hex <-------->", cwcBuffer );
|
|
DumpStringAsHex( awcBuffer, cwcBuffer );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
PROPVARIANT * pPropValue = 0;
|
|
hr = xFilter->GetValue( &pPropValue );
|
|
|
|
if ( FAILED( hr ) )
|
|
{
|
|
if ( ( FILTER_E_NO_MORE_VALUES == hr ) ||
|
|
( FILTER_E_NO_VALUES == hr ) )
|
|
break;
|
|
|
|
out( L"GetValue failed, error %#x\n", hr );
|
|
return hr;
|
|
}
|
|
|
|
if ( fShowStatusInfo )
|
|
out( L"[-- variant type %d (%#x) --]", pPropValue->vt, pPropValue->vt );
|
|
|
|
DisplayValue( pPropValue );
|
|
out( L"" );
|
|
|
|
if ( 0 != pPropValue )
|
|
{
|
|
PropVariantClear( pPropValue );
|
|
CoTaskMemFree( pPropValue );
|
|
pPropValue = 0;
|
|
}
|
|
}
|
|
} while( TRUE ); // data in a chunk
|
|
} while( TRUE ); // for each chunk
|
|
|
|
if ( fShowStatusInfo )
|
|
{
|
|
out( L"" );
|
|
out( L"======================================================================" );
|
|
out( L"Filtering completed" );
|
|
}
|
|
|
|
xStream.Free();
|
|
xStorage.Free();
|
|
xFilter.Free();
|
|
|
|
// Now see if the file handle is still being locked by the filter
|
|
|
|
HANDLE hFile = CreateFile( pwcInputFile,
|
|
GENERIC_READ,
|
|
0, //no sharing
|
|
0,
|
|
OPEN_EXISTING,
|
|
FILE_ATTRIBUTE_NORMAL,
|
|
0 );
|
|
|
|
if ( INVALID_HANDLE_VALUE == hFile )
|
|
{
|
|
out( L"Filter didn't release file; can't open %ws, error %#x\n", pwcInputFile, GetLastError() );
|
|
return HRESULT_FROM_WIN32( GetLastError() );
|
|
}
|
|
|
|
out( L"Filter closed file properly when released\n" );
|
|
|
|
CloseHandle( hFile );
|
|
|
|
return S_OK;
|
|
} //Filter
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: GetQueryFunctions
|
|
//
|
|
// Synopsis: Loads needed undocumented functions from query.dll.
|
|
//
|
|
// Returns: The module handle or 0 on failure.
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
HINSTANCE GetQueryFunctions()
|
|
{
|
|
HINSTANCE h = LoadLibrary( L"query.dll" );
|
|
|
|
if ( 0 != h )
|
|
{
|
|
#ifdef _WIN64
|
|
char const * pcCIShutdown = "?CIShutdown@@YAXXZ";
|
|
#else
|
|
char const * pcCIShutdown = "?CIShutdown@@YGXXZ";
|
|
#endif
|
|
|
|
g_pCIShutdown = (PFnCIShutdown) GetProcAddress( h, pcCIShutdown );
|
|
if ( 0 == g_pCIShutdown )
|
|
{
|
|
printf( "can't get CIShutdown function address\n" );
|
|
FreeLibrary( h );
|
|
return 0;
|
|
}
|
|
|
|
g_pLoadTextFilter = (PFnLoadTextFilter)
|
|
GetProcAddress( h, "LoadTextFilter" );
|
|
|
|
if ( 0 == g_pLoadTextFilter )
|
|
{
|
|
printf( "can't get LoadTextFilter function address\n" );
|
|
FreeLibrary( h );
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return h;
|
|
} //GetQueryFunctions
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: ExceptionFilter
|
|
//
|
|
// Synopsis: Displays information about the exception
|
|
//
|
|
// Arguments: [pep] -- Exception pointers
|
|
//
|
|
// Returns: EXCEPTION_EXECUTE_HANDLER
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
int ExceptionFilter( EXCEPTION_POINTERS * pep )
|
|
{
|
|
printf( "fatal exception caught\n" );
|
|
|
|
EXCEPTION_RECORD & r = * ( pep->ExceptionRecord );
|
|
|
|
printf( " exception code: %#x\n", r.ExceptionCode );
|
|
printf( " exception address %#p\n", r.ExceptionAddress );
|
|
|
|
if ( ( EXCEPTION_ACCESS_VIOLATION == r.ExceptionCode ) &&
|
|
( r.NumberParameters >= 2 ) )
|
|
{
|
|
printf( " attempted %ws at address %#p\n",
|
|
( 0 == r.ExceptionInformation[0] ) ?
|
|
L"read" : L"write",
|
|
(void *) r.ExceptionInformation[1] );
|
|
}
|
|
|
|
#ifdef _X86_
|
|
|
|
CONTEXT & c = * (CONTEXT *) (pep->ContextRecord );
|
|
|
|
if ( 0 != ( c.ContextFlags & CONTEXT_INTEGER ) )
|
|
{
|
|
printf( " eax: %#x\n", c.Eax );
|
|
printf( " ebx: %#x\n", c.Ebx );
|
|
printf( " ecx: %#x\n", c.Ecx );
|
|
printf( " edx: %#x\n", c.Edx );
|
|
printf( " edi: %#x\n", c.Edi );
|
|
printf( " esi: %#x\n", c.Esi );
|
|
}
|
|
|
|
if ( 0 != ( c.ContextFlags & CONTEXT_CONTROL ) )
|
|
{
|
|
printf( " ebp: %#x\n", c.Ebp );
|
|
printf( " eip: %#x\n", c.Eip );
|
|
printf( " esp: %#x\n", c.Esp );
|
|
}
|
|
|
|
#endif // _X86_
|
|
|
|
// Attempt to get the module name where the exception happened
|
|
|
|
HMODULE hMod = GetModuleOfAddress( r.ExceptionAddress );
|
|
|
|
if ( 0 != hMod )
|
|
{
|
|
WCHAR awcPath[ MAX_PATH ];
|
|
DWORD cwc= GetModuleFileName( hMod,
|
|
awcPath,
|
|
ArraySize( awcPath ) );
|
|
awcPath[ ArraySize( awcPath ) - 1 ] = 0;
|
|
if ( 0 != cwc )
|
|
printf( " exception in module %ws\n", awcPath );
|
|
}
|
|
|
|
return EXCEPTION_EXECUTE_HANDLER;
|
|
} //ExceptionFilter
|
|
|
|
//+-------------------------------------------------------------------------
|
|
//
|
|
// Function: wmain
|
|
//
|
|
// Synopsis: Main entrypoint for the program
|
|
//
|
|
// Arguments: [argc] -- Count of command-line arguments
|
|
// [argv] -- The command-line arguments
|
|
//
|
|
// Returns: Application return code
|
|
//
|
|
//--------------------------------------------------------------------------
|
|
|
|
extern "C" int __cdecl wmain( int argc, WCHAR * argv[] )
|
|
{
|
|
// Parse the command-line arguments
|
|
|
|
BOOL fWordBreak = FALSE;
|
|
BOOL fQuery = FALSE;
|
|
BOOL fStem = FALSE;
|
|
BOOL fFilter = FALSE;
|
|
BOOL fGetText = TRUE;
|
|
BOOL fShowStatusInfo = TRUE;
|
|
enumFilterLoadMechanism filterLoad = eIPersistFile;
|
|
WCHAR const * pwcModule = 0;
|
|
WCHAR const * pwcInputFile = 0;
|
|
WCHAR const * pwcOutputFile = 0;
|
|
WCHAR *pwcText = 0;
|
|
WCHAR const * pwcCLSID = 0;
|
|
ULONG cwcMaxToken = 100;
|
|
|
|
for ( int i = 1; i < argc; i++ )
|
|
{
|
|
if ( L'-' == argv[i][0] || L'/' == argv[i][0] )
|
|
{
|
|
WCHAR wc = towupper( argv[i][1] );
|
|
|
|
if ( ':' != argv[i][2] &&
|
|
'B' != wc &&
|
|
'D' != wc &&
|
|
'F' != wc &&
|
|
'T' != wc &&
|
|
'N' != wc &&
|
|
'Q' != wc &&
|
|
'S' != wc )
|
|
Usage();
|
|
|
|
if ( 'C' == wc )
|
|
pwcCLSID = argv[i] + 3;
|
|
else if ( 'D' == wc )
|
|
g_fDumpAsHex = TRUE;
|
|
else if ( 'I' == wc )
|
|
{
|
|
if ( 0 != pwcText )
|
|
Usage();
|
|
|
|
pwcInputFile = argv[i] + 3;
|
|
}
|
|
else if ( 'M' == wc )
|
|
pwcModule = argv[i] + 3;
|
|
else if ( 'N' == wc )
|
|
fShowStatusInfo = FALSE;
|
|
else if ( 'O' == wc )
|
|
pwcOutputFile = argv[i] + 3;
|
|
else if ( 'S' == wc )
|
|
fStem = TRUE;
|
|
else if ( 'T' == wc )
|
|
fGetText = FALSE;
|
|
else if ( 'B' == wc )
|
|
fWordBreak = TRUE;
|
|
else if ( 'F' == wc )
|
|
{
|
|
fFilter = TRUE;
|
|
|
|
WCHAR wcNext = towupper( argv[i][2] );
|
|
|
|
if ( L'S' == wcNext )
|
|
filterLoad = eIPersistStream;
|
|
else if ( L'T' == wcNext )
|
|
filterLoad = eIPersistStorage;
|
|
else if ( 0 != wcNext )
|
|
Usage();
|
|
}
|
|
else if ( 'Q' == wc )
|
|
fQuery = TRUE;
|
|
else if ( 'X' == wc )
|
|
cwcMaxToken = _wtoi( argv[i] + 3 );
|
|
else
|
|
Usage();
|
|
}
|
|
else if ( 0 != pwcText || 0 != pwcInputFile )
|
|
Usage();
|
|
else
|
|
pwcText = argv[i];
|
|
}
|
|
|
|
// We have to either wordbreak, stem, or filter
|
|
|
|
if ( ( fWordBreak + fStem + fFilter ) != 1 )
|
|
Usage();
|
|
|
|
// We need the classid of the wordbreaker or stemmer to load
|
|
|
|
if ( ( fWordBreak || fStem ) && ( 0 == pwcCLSID ) )
|
|
Usage();
|
|
|
|
// If we're loading by module, we need a CLSID
|
|
|
|
if ( ( 0 != pwcModule ) && ( 0 == pwcCLSID ) )
|
|
Usage();
|
|
|
|
// Need input text or an input file to wordbreak
|
|
|
|
if ( fWordBreak && ( 0 == pwcText ) && ( 0 == pwcInputFile ) )
|
|
Usage();
|
|
|
|
// Need input text to stem
|
|
|
|
if ( fStem && ( 0 == pwcText ) )
|
|
Usage();
|
|
|
|
// Need input file to filter
|
|
|
|
if ( fFilter && ( 0 == pwcInputFile ) )
|
|
Usage();
|
|
|
|
CLSID clsid;
|
|
if ( 0 != pwcCLSID )
|
|
{
|
|
HRESULT hr = CLSIDFromString( (LPOLESTR) pwcCLSID, &clsid );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't convert CLSID string to a CLSID: %#x\n", hr );
|
|
exit( 1 );
|
|
}
|
|
}
|
|
|
|
// Get the full path of the input file, if specified
|
|
|
|
WCHAR awcPath[MAX_PATH];
|
|
if ( 0 != pwcInputFile )
|
|
{
|
|
_wfullpath( awcPath, pwcInputFile, MAX_PATH );
|
|
pwcInputFile = awcPath;
|
|
}
|
|
|
|
// Get the full path of the output file, if specified, then open it
|
|
|
|
WCHAR awcOutputPath[MAX_PATH];
|
|
if ( 0 != pwcOutputFile )
|
|
{
|
|
_wfullpath( awcOutputPath, pwcOutputFile, MAX_PATH );
|
|
pwcOutputFile = awcOutputPath;
|
|
|
|
g_fpOut = _wfopen( pwcOutputFile, L"wb" );
|
|
if ( 0 == g_fpOut )
|
|
{
|
|
printf( "unable to open output file '%ws'\n", pwcOutputFile );
|
|
exit( 1 );
|
|
}
|
|
|
|
const WCHAR awcUnicodeHeader[] = { 0xfeff, 0x0000 };
|
|
fwprintf( g_fpOut, awcUnicodeHeader );
|
|
}
|
|
|
|
// Initialize COM multi-threaded, just like search products do
|
|
|
|
HRESULT hr = CoInitializeEx( 0, COINIT_MULTITHREADED );
|
|
if ( FAILED( hr ) )
|
|
{
|
|
printf( "can't initialize com: %#x\n", hr );
|
|
exit( 1 );
|
|
}
|
|
|
|
// Load query.dll private exports
|
|
|
|
HINSTANCE hQuery = GetQueryFunctions();
|
|
if ( 0 == hQuery )
|
|
{
|
|
printf( "can't load needed functions from query.dll\n" );
|
|
exit( 1 );
|
|
}
|
|
|
|
// Do the work
|
|
|
|
__try
|
|
{
|
|
if ( fStem )
|
|
Stem( pwcText,
|
|
pwcModule,
|
|
clsid,
|
|
cwcMaxToken );
|
|
|
|
if ( fWordBreak )
|
|
WordBreak( fQuery,
|
|
pwcText,
|
|
pwcInputFile,
|
|
pwcModule,
|
|
clsid,
|
|
cwcMaxToken );
|
|
|
|
if ( fFilter )
|
|
Filter( pwcInputFile,
|
|
filterLoad,
|
|
pwcModule,
|
|
( 0 == pwcCLSID ) ? 0 : &clsid,
|
|
fShowStatusInfo,
|
|
fGetText );
|
|
}
|
|
__except( ExceptionFilter( GetExceptionInformation() ) )
|
|
{
|
|
printf( "fatal exception code %#x\n", GetExceptionCode() );
|
|
|
|
exit( -1 );
|
|
}
|
|
|
|
// Shut down query.dll's filter loading code so it won't AV on exit.
|
|
|
|
g_pCIShutdown();
|
|
|
|
FreeLibrary( hQuery );
|
|
|
|
CoUninitialize();
|
|
|
|
if ( 0 != g_fpOut )
|
|
{
|
|
fclose( g_fpOut );
|
|
g_fpOut = 0;
|
|
}
|
|
|
|
return 0;
|
|
} //wmain
|
|
|