Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2569 lines
76 KiB

  1. //+-------------------------------------------------------------------------
  2. //
  3. // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
  4. // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
  5. // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
  6. // PARTICULAR PURPOSE.
  7. //
  8. // Copyright (c) Microsoft Corporation. All rights reserved.
  9. //
  10. // PROGRAM: lrtest.cxx
  11. //
  12. // Test program for invoking language resources including wordbreakers
  13. // and stemmers. Also invokes filters.
  14. //
  15. // PLATFORM: Windows
  16. //
  17. //--------------------------------------------------------------------------
  18. #ifndef UNICODE
  19. #define UNICODE
  20. #endif
  21. #define _OLE32_
  22. #include <windows.h>
  23. #include <oleext.h>
  24. #include <psapi.h>
  25. #include <stdlib.h>
  26. #include <stdio.h>
  27. #include <limits.h>
  28. #include <eh.h>
  29. #include <ntquery.h>
  30. #include <filterr.h>
  31. #include <cierror.h>
  32. #include <indexsrv.h>
  33. #include "minici.hxx"
  34. #define USE_FAKE_COM
  35. //
  36. // These are undocumented Indexing Service functions, but they're needed
  37. // to load filters and not crash, and to load the plain text filter.
  38. //
  39. typedef void (__stdcall * PFnCIShutdown)( void );
  40. typedef HRESULT (__stdcall * PFnLoadTextFilter)( WCHAR const * pwcPath,
  41. IFilter ** ppIFilter );
  42. PFnCIShutdown g_pCIShutdown = 0;
  43. PFnLoadTextFilter g_pLoadTextFilter = 0;
  44. // If this is non-zero, it's a file handle to which output is streamed
  45. FILE * g_fpOut = 0;
  46. // If TRUE, strings from wordbreakers and stemmers are dumped in hex
  47. BOOL g_fDumpAsHex = FALSE;
  48. enum enumFilterLoadMechanism
  49. {
  50. eIPersistFile,
  51. eIPersistStream,
  52. eIPersistStorage
  53. };
  54. //+-------------------------------------------------------------------------
  55. //
  56. // Function: out
  57. //
  58. // Synopsis: Like printf, only will send output to the output file if
  59. // specified, or just to the console. Appends a carriage
  60. // return / line feed to the text.
  61. //
  62. // Arguments: [pwcFormat] -- Characters whose type information is checked
  63. // [...] -- Variable arguments
  64. //
  65. // Returns: count of characters emitted.
  66. //
  67. //--------------------------------------------------------------------------
  68. int out( const WCHAR * pwcFormat, ... )
  69. {
  70. va_list arglist;
  71. va_start( arglist, pwcFormat );
  72. // Writing to the output file is done in binary mode so the output can be
  73. // Unicode. The side-effect is that "\n" isn't translated into "\r\n"
  74. // automatically, so it has to be explicit.
  75. int i;
  76. if ( 0 != g_fpOut )
  77. {
  78. i = vfwprintf( g_fpOut, pwcFormat, arglist );
  79. i += fwprintf( g_fpOut, L"\r\n" );
  80. }
  81. else
  82. {
  83. i = vwprintf( pwcFormat, arglist );
  84. i += wprintf( L"\n" );
  85. }
  86. va_end( arglist );
  87. return i;
  88. } //out
  89. //+-------------------------------------------------------------------------
  90. //
  91. // Function: outstr
  92. //
  93. // Synopsis: Like printf, only will send output to the output file if
  94. // specified, or just to the console.
  95. //
  96. // Arguments: [pwcFormat] -- Characters whose type information is checked
  97. // [...] -- Variable arguments
  98. //
  99. // Returns: count of characters emitted.
  100. //
  101. //--------------------------------------------------------------------------
  102. int outstr( const WCHAR * pwcFormat, ... )
  103. {
  104. va_list arglist;
  105. va_start( arglist, pwcFormat );
  106. int i;
  107. if ( 0 != g_fpOut )
  108. i = vfwprintf( g_fpOut, pwcFormat, arglist );
  109. else
  110. i = vwprintf( pwcFormat, arglist );
  111. va_end( arglist );
  112. return i;
  113. } //outstr
  114. //+-------------------------------------------------------------------------
  115. //
  116. // Function: Usage
  117. //
  118. // Synopsis: Displays usage information about the application, then exits.
  119. //
  120. //--------------------------------------------------------------------------
  121. void Usage()
  122. {
  123. printf( "usage: lrtest [/d] [/b] [/f] [/q] [/s] [/x:#] /c:clsid [/o:file] [/i:file] [text]\n" );
  124. printf( "\n" );
  125. printf( " Language Resource test program\n" );
  126. printf( "\n" );
  127. printf( " arguments:\n" );
  128. printf( " /b Load the wordbreaker (can't be used with /s or /f)\n" );
  129. printf( " /c: CLSID of the wordbreaker or stemmer to load\n" );
  130. printf( " /d Dumps output strings in hex as well as strings\n" );
  131. printf( " /f Load the filter (can't be used with /b or /s)\n" );
  132. printf( " If /c isn't specified, use Indexing Service's LoadIFilter\n" );
  133. printf( " /fs Same as /f, but uses IPersistStream, not IPersistFile\n" );
  134. printf( " /ft Same as /f, but uses IPersistStorage, not IPersistFile\n" );
  135. printf( " /i: Path of an input file, if [text] isn't specified\n" );
  136. printf( " /m: Optional path of the dll to load. Overrides COM CLSID lookup\n" );
  137. printf( " /n No status information. Used with /f, only displays filter output\n" );
  138. printf( " /o: Path of an output file. If not specified, console is used\n" );
  139. printf( " /q If wordbreaking, do so for query instead of indexing\n" );
  140. printf( " /s Load the stemmer (can't be used with /b or /f)\n" );
  141. printf( " /t No text information; just chunks. Used with /f\n" );
  142. printf( " /x:# Maximum token size, default is 100\n" );
  143. printf( " text Text to wordbreak or stem, if /i: isn't specified\n" );
  144. printf( "\n" );
  145. printf( " examples:\n" );
  146. printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} \"Alice's restaurant\"\n" );
  147. printf( " lrtest /b /q /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} \"data-base\"\n" );
  148. printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} /i:foo.doc\n" );
  149. printf( " lrtest /b /c:{369647e0-17b0-11ce-9950-00aa004bbb1f} /m:wb.dll /i:foo.doc\n" );
  150. printf( " lrtest /d /s /c:{eeed4c20-7f1b-11ce-be57-00aa0051fe20} peach /o:output.txt\n" );
  151. printf( " lrtest /f /c:{f07f3920-7b8c-11cf-9be8-00aa004b9986} /i:foo.doc\n" );
  152. printf( " lrtest /f /i:foo.doc\n" );
  153. printf( " lrtest /fs /i:foo.doc\n" );
  154. printf( "\n" );
  155. exit( 1 );
  156. } //Usage
  157. //+-------------------------------------------------------------------------
  158. //
  159. // Function: GetModuleOfAddress
  160. //
  161. // Synopsis: Returns the module handle of a given address or 0
  162. //
  163. // Arguments: [pAddress] -- Address in one of the modules loaded
  164. //
  165. //--------------------------------------------------------------------------
  166. HMODULE GetModuleOfAddress( void * pAddress )
  167. {
  168. DWORD cbNeeded;
  169. BOOL fOK = EnumProcessModules( GetCurrentProcess(),
  170. 0,
  171. 0,
  172. &cbNeeded );
  173. if ( fOK )
  174. {
  175. ULONG cModules = cbNeeded / sizeof HMODULE;
  176. XPtr<HMODULE> aModules( cModules );
  177. fOK = EnumProcessModules( GetCurrentProcess(),
  178. aModules.Get(),
  179. cbNeeded,
  180. &cbNeeded );
  181. if ( fOK )
  182. {
  183. for ( ULONG i = 0; i < cModules; i++ )
  184. {
  185. MODULEINFO mi;
  186. GetModuleInformation( GetCurrentProcess(),
  187. aModules[ i ],
  188. &mi,
  189. sizeof mi );
  190. if ( ( pAddress >= mi.lpBaseOfDll ) &&
  191. ( pAddress < ( (BYTE *) mi.lpBaseOfDll + mi.SizeOfImage ) ) )
  192. {
  193. return aModules[i];
  194. }
  195. }
  196. }
  197. }
  198. return 0;
  199. } //GetModuleOfAddress
  200. //+-------------------------------------------------------------------------
  201. //
  202. // Function: DumpStringAsHex
  203. //
  204. // Synopsis: Emits a string in hex format. Useful for East Asian languages.
  205. //
  206. //--------------------------------------------------------------------------
  207. void DumpStringAsHex( WCHAR const * pwc, ULONG cwc )
  208. {
  209. if ( g_fDumpAsHex )
  210. {
  211. for ( ULONG i = 0; i < cwc; i++ )
  212. {
  213. if ( 0 != i )
  214. outstr( L" " );
  215. outstr( L"%#x", pwc[ i ] );
  216. }
  217. out( L"" );
  218. }
  219. } //DumpStringAsHex
  220. //+---------------------------------------------------------------------------
  221. //
  222. // Class: CIStream
  223. //
  224. // Purpose: Wraps a file with an IStream.
  225. //
  226. //----------------------------------------------------------------------------
  227. class CIStream : public IStream
  228. {
  229. public:
  230. CIStream() : _hFile( INVALID_HANDLE_VALUE ),
  231. _cRef( 1 ),
  232. _lOffset( 0 ),
  233. _cbData( 0 )
  234. {
  235. }
  236. ~CIStream()
  237. {
  238. Free();
  239. }
  240. void Free()
  241. {
  242. if ( INVALID_HANDLE_VALUE != _hFile )
  243. {
  244. CloseHandle( _hFile );
  245. _hFile = INVALID_HANDLE_VALUE;
  246. }
  247. }
  248. HRESULT Open( WCHAR const * pwcFile )
  249. {
  250. Free();
  251. _hFile = CreateFile( pwcFile,
  252. GENERIC_READ,
  253. FILE_SHARE_READ | FILE_SHARE_WRITE |
  254. FILE_SHARE_DELETE,
  255. 0,
  256. OPEN_EXISTING,
  257. FILE_ATTRIBUTE_NORMAL,
  258. 0 );
  259. if ( INVALID_HANDLE_VALUE == _hFile )
  260. return HRESULT_FROM_WIN32( GetLastError() );
  261. _cbData = GetFileSize( _hFile, 0 );
  262. return S_OK;
  263. }
  264. HRESULT STDMETHODCALLTYPE QueryInterface( REFIID riid, void ** ppvObj )
  265. {
  266. if ( 0 == ppvObj )
  267. return E_INVALIDARG;
  268. *ppvObj = 0;
  269. if ( IID_IStream == riid )
  270. *ppvObj = (IStream *) this;
  271. else if ( IID_IUnknown == riid )
  272. *ppvObj = (IUnknown *) this;
  273. else
  274. return E_NOINTERFACE;
  275. AddRef();
  276. return S_OK;
  277. }
  278. ULONG STDMETHODCALLTYPE AddRef()
  279. {
  280. return InterlockedIncrement( &_cRef );
  281. }
  282. ULONG STDMETHODCALLTYPE Release()
  283. {
  284. unsigned long uTmp = InterlockedDecrement( &_cRef );
  285. if ( 0 == uTmp )
  286. delete this;
  287. return uTmp;
  288. }
  289. HRESULT STDMETHODCALLTYPE Read(
  290. void * pv,
  291. ULONG cb,
  292. ULONG * pcbRead )
  293. {
  294. DWORD dwOff = SetFilePointer( _hFile,
  295. _lOffset,
  296. 0,
  297. FILE_BEGIN );
  298. if ( INVALID_SET_FILE_POINTER == dwOff )
  299. return HRESULT_FROM_WIN32( GetLastError() );
  300. BOOL f = ReadFile( _hFile,
  301. pv,
  302. cb,
  303. pcbRead,
  304. 0 );
  305. if ( !f )
  306. return HRESULT_FROM_WIN32( GetLastError() );
  307. return S_OK;
  308. }
  309. HRESULT STDMETHODCALLTYPE Write(
  310. VOID const * pv,
  311. ULONG cb,
  312. ULONG * pcbWritten )
  313. {
  314. return E_NOTIMPL;
  315. }
  316. HRESULT STDMETHODCALLTYPE Seek(
  317. LARGE_INTEGER dlibMoveIn,
  318. DWORD dwOrigin,
  319. ULARGE_INTEGER * plibNewPosition )
  320. {
  321. HRESULT hr = S_OK;
  322. LONG dlibMove = dlibMoveIn.LowPart;
  323. ULONG cbNewPos = dlibMove;
  324. switch(dwOrigin)
  325. {
  326. case STREAM_SEEK_SET:
  327. if (dlibMove >= 0)
  328. _lOffset = dlibMove;
  329. else
  330. hr = STG_E_SEEKERROR;
  331. break;
  332. case STREAM_SEEK_CUR:
  333. if (!(dlibMove < 0 && ( -dlibMove > _lOffset)))
  334. _lOffset += (ULONG) dlibMove;
  335. else
  336. hr = STG_E_SEEKERROR;
  337. break;
  338. case STREAM_SEEK_END:
  339. if (!(dlibMove < 0 ))
  340. _lOffset = _cbData + dlibMove;
  341. else
  342. hr = STG_E_SEEKERROR;
  343. break;
  344. default:
  345. hr = STG_E_SEEKERROR;
  346. }
  347. if ( 0 != plibNewPosition )
  348. ULISet32(*plibNewPosition, _lOffset);
  349. return hr;
  350. }
  351. HRESULT STDMETHODCALLTYPE SetSize( ULARGE_INTEGER cb )
  352. {
  353. return E_NOTIMPL;
  354. }
  355. HRESULT STDMETHODCALLTYPE CopyTo(
  356. IStream * pstm,
  357. ULARGE_INTEGER cb,
  358. ULARGE_INTEGER * pcbRead,
  359. ULARGE_INTEGER * pcbWritten )
  360. {
  361. return E_NOTIMPL;
  362. }
  363. HRESULT STDMETHODCALLTYPE Commit( DWORD grfCommitFlags )
  364. {
  365. return S_OK;
  366. }
  367. HRESULT STDMETHODCALLTYPE Revert()
  368. {
  369. return S_OK;
  370. }
  371. HRESULT STDMETHODCALLTYPE LockRegion(
  372. ULARGE_INTEGER libOffset,
  373. ULARGE_INTEGER cb,
  374. DWORD dwLockType )
  375. {
  376. return STG_E_INVALIDFUNCTION;
  377. }
  378. HRESULT STDMETHODCALLTYPE UnlockRegion(
  379. ULARGE_INTEGER libOffset,
  380. ULARGE_INTEGER cb,
  381. DWORD dwLockType)
  382. {
  383. return STG_E_INVALIDFUNCTION;
  384. }
  385. HRESULT STDMETHODCALLTYPE Stat(
  386. STATSTG * pstatstg,
  387. DWORD statflag )
  388. {
  389. memset( pstatstg, 0, sizeof STATSTG );
  390. pstatstg->type = STGTY_STREAM;
  391. pstatstg->cbSize.QuadPart = _cbData;
  392. pstatstg->grfMode = STGM_READ;
  393. return S_OK;
  394. }
  395. HRESULT STDMETHODCALLTYPE Clone( IStream ** ppstm )
  396. {
  397. return E_NOTIMPL;
  398. }
  399. private:
  400. LONG _cRef;
  401. HANDLE _hFile;
  402. LONG _lOffset;
  403. LONG _cbData;
  404. };
  405. //+---------------------------------------------------------------------------
  406. //
  407. // Class: CPlainTextSource
  408. //
  409. // Purpose: Takes a simple buffer and provides a TEXT_SOURCE for it, which
  410. // can be passed to wordbreakers.
  411. //
  412. //----------------------------------------------------------------------------
  413. class CPlainTextSource : public TEXT_SOURCE
  414. {
  415. public:
  416. CPlainTextSource(
  417. WCHAR const * pwcText,
  418. ULONG cwc )
  419. {
  420. awcBuffer = pwcText;
  421. iCur = 0;
  422. iEnd = cwc;
  423. pfnFillTextBuffer = PlainFillBuf;
  424. }
  425. static HRESULT __stdcall PlainFillBuf( TEXT_SOURCE * pTextSource )
  426. {
  427. return WBREAK_E_END_OF_TEXT;
  428. }
  429. };
  430. //+---------------------------------------------------------------------------
  431. //
  432. // Class: CFilterTextSource
  433. //
  434. // Purpose: Takes an IFilter and provides a TEXT_SOURCE for it, which
  435. // can be passed to wordbreakers.
  436. //
  437. //----------------------------------------------------------------------------
  438. #pragma warning(disable: 4512)
  439. class CFilterTextSource : public TEXT_SOURCE
  440. {
  441. public:
  442. CFilterTextSource( IFilter & filter ) :
  443. _filter( filter ),
  444. _hr( S_OK )
  445. {
  446. awcBuffer = _awcBuffer;
  447. iCur = 0;
  448. iEnd = 0;
  449. pfnFillTextBuffer = FilterFillBuf;
  450. // Get the first chunk
  451. _hr = _filter.GetChunk( &_Stat );
  452. // Get text for the chunk
  453. FillBuf();
  454. }
  455. static HRESULT __stdcall FilterFillBuf( TEXT_SOURCE * pTextSource )
  456. {
  457. CFilterTextSource & This = * (CFilterTextSource *) pTextSource;
  458. return This.FillBuf();
  459. }
  460. private:
  461. HRESULT FillBuf()
  462. {
  463. // Never continue past an error condition except FILTER_E_NO_MORE_TEXT
  464. if ( FAILED( _hr ) && _hr != FILTER_E_NO_MORE_TEXT )
  465. return _hr;
  466. if ( iCur > iEnd )
  467. {
  468. out( L"TEXT_SOURCE iCur (%#x) > iEnd (%#x), this is incorrect\n",
  469. iCur, iEnd );
  470. _hr = E_INVALIDARG;
  471. return _hr;
  472. }
  473. // Move any existing text to beginning of buffer.
  474. ULONG ccLeftOver = iEnd - iCur;
  475. if ( ccLeftOver > 0 )
  476. MoveMemory( _awcBuffer,
  477. &_awcBuffer[iCur],
  478. ccLeftOver * sizeof WCHAR );
  479. iCur = 0;
  480. iEnd = ccLeftOver;
  481. ULONG ccRead = BufferWChars() - ccLeftOver;
  482. const ULONG BUFFER_SLOP = 10;
  483. //
  484. // Get some more text. If *previous* call to GetText returned
  485. // FILTER_S_LAST_TEXT, or FILTER_E_NO_MORE_TEXT then don't even
  486. // bother trying.
  487. //
  488. if ( FILTER_S_LAST_TEXT == _hr || FILTER_E_NO_MORE_TEXT == _hr )
  489. _hr = FILTER_E_NO_MORE_TEXT;
  490. else
  491. {
  492. _hr = _filter.GetText( &ccRead,
  493. &_awcBuffer[ccLeftOver] );
  494. if ( SUCCEEDED( _hr ) )
  495. {
  496. iEnd += ccRead;
  497. ccLeftOver += ccRead;
  498. ccRead = BufferWChars() - ccLeftOver;
  499. while ( ( S_OK == _hr ) && ( ccRead > BUFFER_SLOP ) )
  500. {
  501. // Attempt to fill in as much of buffer as possible
  502. _hr = _filter.GetText( &ccRead,
  503. &_awcBuffer[ccLeftOver] );
  504. if ( SUCCEEDED( _hr ) )
  505. {
  506. iEnd += ccRead;
  507. ccLeftOver += ccRead;
  508. ccRead = BufferWChars() - ccLeftOver;
  509. }
  510. }
  511. //
  512. // Either return FILTER_S_LAST_TEXT or return S_OK because we
  513. // have succeeded in adding text to the buffer.
  514. //
  515. if ( FILTER_S_LAST_TEXT == _hr )
  516. return FILTER_S_LAST_TEXT;
  517. return S_OK;
  518. }
  519. if ( ( FILTER_E_NO_MORE_TEXT != _hr ) &&
  520. ( FILTER_E_NO_TEXT != _hr ) )
  521. {
  522. // Weird failure, hence return, else goto next chunk
  523. return _hr;
  524. }
  525. }
  526. // Go to next chunk, if necessary.
  527. while ( ( FILTER_E_NO_MORE_TEXT == _hr ) ||
  528. ( FILTER_E_NO_TEXT == _hr ) )
  529. {
  530. _hr = _filter.GetChunk( &_Stat );
  531. if ( FILTER_E_END_OF_CHUNKS == _hr )
  532. return WBREAK_E_END_OF_TEXT;
  533. if ( FILTER_E_PARTIALLY_FILTERED == _hr )
  534. return WBREAK_E_END_OF_TEXT;
  535. if ( FAILED( _hr ) )
  536. return( _hr );
  537. //
  538. // Skip over value chunks -- note that search products don't do
  539. // this. They convert VT_LPSTR, VT_BSTR, and VT_LPWSTR to
  540. // Unicode strings for the wordbreaker.
  541. //
  542. if ( CHUNK_TEXT != _Stat.flags )
  543. continue;
  544. ccRead = BufferWChars() - ccLeftOver;
  545. _hr = _filter.GetText( &ccRead,
  546. &_awcBuffer[ccLeftOver] );
  547. if ( SUCCEEDED( _hr ) )
  548. {
  549. iEnd += ccRead;
  550. ccLeftOver += ccRead;
  551. ccRead = BufferWChars() - ccLeftOver;
  552. while ( ( S_OK == _hr ) && ( ccRead > BUFFER_SLOP ) )
  553. {
  554. // Attempt to fill in as much of buffer as possible
  555. _hr = _filter.GetText( &ccRead,
  556. &_awcBuffer[ccLeftOver] );
  557. if ( SUCCEEDED( _hr ) )
  558. {
  559. iEnd += ccRead;
  560. ccLeftOver += ccRead;
  561. ccRead = BufferWChars() - ccLeftOver;
  562. }
  563. }
  564. //
  565. // Either return FILTER_S_LAST_TEXT or return S_OK because we
  566. // have succeeded in adding text to the buffer.
  567. //
  568. if ( FILTER_S_LAST_TEXT == _hr )
  569. return FILTER_S_LAST_TEXT;
  570. return S_OK;
  571. }
  572. }
  573. if ( FAILED( _hr ) )
  574. return _hr;
  575. if ( 0 == ccRead )
  576. return WBREAK_E_END_OF_TEXT;
  577. return S_OK;
  578. } //FillBuf
  579. ULONG BufferWChars() const
  580. {
  581. return ArraySize( _awcBuffer );
  582. }
  583. IFilter & _filter;
  584. HRESULT _hr;
  585. STAT_CHUNK _Stat;
  586. WCHAR _awcBuffer[ 1024 ];
  587. };
  588. //+---------------------------------------------------------------------------
  589. //
  590. // Class: CWordFormSink
  591. //
  592. // Purpose: Sample stemmer sink -- just prints the results.
  593. //
  594. //----------------------------------------------------------------------------
  595. class CWordFormSink : public IWordFormSink
  596. {
  597. public:
  598. CWordFormSink() {}
  599. HRESULT STDMETHODCALLTYPE QueryInterface(
  600. REFIID riid,
  601. void ** ppvObject )
  602. {
  603. *ppvObject = this;
  604. return S_OK;
  605. }
  606. ULONG STDMETHODCALLTYPE AddRef() { return 1; }
  607. ULONG STDMETHODCALLTYPE Release() { return 1; }
  608. HRESULT STDMETHODCALLTYPE PutAltWord(
  609. WCHAR const * pwcBuf,
  610. ULONG cwc )
  611. {
  612. out( L"IWordFormSink::PutAltWord: cwc %d, '%.*ws'", cwc, cwc, pwcBuf );
  613. DumpStringAsHex( pwcBuf, cwc );
  614. return S_OK;
  615. }
  616. HRESULT STDMETHODCALLTYPE PutWord (
  617. WCHAR const * pwcBuf,
  618. ULONG cwc )
  619. {
  620. out( L"IWordFormSink::PutWord: cwc %d, '%.*ws'", cwc, cwc, pwcBuf );
  621. DumpStringAsHex( pwcBuf, cwc );
  622. return S_OK;
  623. }
  624. };
  625. //+---------------------------------------------------------------------------
  626. //
  627. // Class: CWordSink
  628. //
  629. // Purpose: Sample word sink -- just prints the results.
  630. //
  631. //----------------------------------------------------------------------------
  632. class CWordSink : public IWordSink
  633. {
  634. public:
  635. CWordSink() {}
  636. HRESULT STDMETHODCALLTYPE QueryInterface(
  637. REFIID riid,
  638. void ** ppvObject )
  639. {
  640. *ppvObject = this;
  641. return S_OK;
  642. }
  643. ULONG STDMETHODCALLTYPE AddRef() { return 1; }
  644. ULONG STDMETHODCALLTYPE Release() { return 1; }
  645. HRESULT STDMETHODCALLTYPE PutWord(
  646. ULONG cwc,
  647. WCHAR const * pwcBuf,
  648. ULONG cwcSrcLen,
  649. ULONG cwcSrcPos )
  650. {
  651. out( L"IWordSink::PutWord: cwcSrcLen %d, cwcSrcPos %d, cwc %d, '%.*ws'",
  652. cwcSrcLen, cwcSrcPos, cwc, cwc, pwcBuf );
  653. DumpStringAsHex( pwcBuf, cwc );
  654. return S_OK;
  655. }
  656. HRESULT STDMETHODCALLTYPE PutAltWord(
  657. ULONG cwc,
  658. WCHAR const * pwcBuf,
  659. ULONG cwcSrcLen,
  660. ULONG cwcSrcPos )
  661. {
  662. out( L"IWordSink::PutAltWord: cwcSrcLen %d, cwcSrcPos %d, cwc %d, '%.*ws'",
  663. cwcSrcLen, cwcSrcPos, cwc, cwc, pwcBuf );
  664. DumpStringAsHex( pwcBuf, cwc );
  665. return S_OK;
  666. }
  667. HRESULT STDMETHODCALLTYPE StartAltPhrase()
  668. {
  669. out( L"IWordSink::StartAltPhrase" );
  670. return S_OK;
  671. }
  672. HRESULT STDMETHODCALLTYPE EndAltPhrase()
  673. {
  674. out( L"IWordSink::EndAltPhrase" );
  675. return S_OK;
  676. }
  677. HRESULT STDMETHODCALLTYPE PutBreak( WORDREP_BREAK_TYPE wbt )
  678. {
  679. out( L"IWordSink::PutBreak, type (%d) %ws",
  680. wbt,
  681. ( WORDREP_BREAK_EOW == wbt ) ? L"end of word" :
  682. ( WORDREP_BREAK_EOS == wbt ) ? L"end of sentence" :
  683. ( WORDREP_BREAK_EOP == wbt ) ? L"end of paragraph" :
  684. ( WORDREP_BREAK_EOC == wbt ) ? L"end of chapter" :
  685. L"invalid break type" );
  686. return S_OK;
  687. }
  688. };
  689. //+---------------------------------------------------------------------------
  690. //
  691. // Class: CPhraseSink
  692. //
  693. // Purpose: Sample phrase sink -- just prints the results.
  694. //
  695. //----------------------------------------------------------------------------
  696. class CPhraseSink: public IPhraseSink
  697. {
  698. public:
  699. CPhraseSink() {}
  700. HRESULT STDMETHODCALLTYPE QueryInterface(
  701. REFIID riid,
  702. void ** ppvObject )
  703. {
  704. // Assume the caller is well-behaved
  705. *ppvObject = this;
  706. return S_OK;
  707. }
  708. ULONG STDMETHODCALLTYPE AddRef() { return 1; }
  709. ULONG STDMETHODCALLTYPE Release() { return 1; }
  710. HRESULT STDMETHODCALLTYPE PutSmallPhrase(
  711. const WCHAR * pwcNoun,
  712. ULONG cwcNoun,
  713. const WCHAR * pwcModifier,
  714. ULONG cwcModifier,
  715. ULONG ulAttachmentType )
  716. {
  717. out( L"IPhraseSink::PutSmallPhrase" );
  718. return S_OK;
  719. }
  720. HRESULT STDMETHODCALLTYPE PutPhrase(
  721. WCHAR const * pwcPhrase,
  722. ULONG cwcPhrase )
  723. {
  724. out( L"IPhraseSink::PutPhrase: cwcPhrase %d, '%.*ws'",
  725. cwcPhrase, cwcPhrase, pwcPhrase );
  726. DumpStringAsHex( pwcPhrase, cwcPhrase );
  727. return S_OK;
  728. }
  729. };
  730. //+---------------------------------------------------------------------------
  731. //
  732. // Function: GetVersionKey
  733. //
  734. // Purpose: Displays a particular version key
  735. //
  736. // Arguments: [pbInfo] -- The version inforomation
  737. // [pwcLang] -- The language of the string requested
  738. // [pwcKey] -- Key name to retrieve
  739. //
  740. // Returns: TRUE if a value was found, FALSE otherwise
  741. //
  742. //----------------------------------------------------------------------------
  743. BOOL GetVersionKey(
  744. BYTE * pbInfo,
  745. WCHAR const * pwcLang,
  746. WCHAR const * pwcKey )
  747. {
  748. WCHAR awcKey[ 128 ];
  749. wsprintf( awcKey, L"\\StringFileInfo\\%ws\\%ws", pwcLang, pwcKey );
  750. WCHAR * pwcResult = 0;
  751. UINT cb = 0;
  752. if ( VerQueryValue( pbInfo,
  753. awcKey,
  754. (PVOID *) &pwcResult,
  755. &cb ) )
  756. {
  757. out( L" %ws: '%ws'", pwcKey, pwcResult );
  758. return TRUE;
  759. }
  760. return FALSE;
  761. } //GetVersionKey
  762. //+---------------------------------------------------------------------------
  763. //
  764. // Function: OutputFiletime
  765. //
  766. // Purpose: Displays a filetime
  767. //
  768. // Arguments: [pwcHeader] -- Prefix to print before the filetime
  769. // [ft] -- Filetime to print, in UTC originally
  770. //
  771. //----------------------------------------------------------------------------
  772. void OutputFiletime( WCHAR const * pwcHeader, FILETIME & ft )
  773. {
  774. FILETIME ftLocal;
  775. FileTimeToLocalFileTime( &ft, &ftLocal );
  776. SYSTEMTIME st;
  777. FileTimeToSystemTime( &ftLocal, &st );
  778. BOOL pm = st.wHour >= 12;
  779. if ( st.wHour > 12 )
  780. st.wHour -= 12;
  781. else if ( 0 == st.wHour )
  782. st.wHour = 12;
  783. out( L"%ws: %2d-%02d-%04d %2d:%02d%wc",
  784. pwcHeader,
  785. (DWORD) st.wMonth,
  786. (DWORD) st.wDay,
  787. (DWORD) st.wYear,
  788. (DWORD) st.wHour,
  789. (DWORD) st.wMinute,
  790. pm ? L'p' : L'a' );
  791. } //OutputFiletime
  792. //+---------------------------------------------------------------------------
  793. //
  794. // Function: DisplayModuleInformation
  795. //
  796. // Purpose: Displays information about a module -- dates and version
  797. //
  798. // Arguments: [hMod] -- Module handle
  799. //
  800. //----------------------------------------------------------------------------
  801. HRESULT DisplayModuleInformation( HINSTANCE hMod )
  802. {
  803. WCHAR awcDllPath[ MAX_PATH ];
  804. DWORD cwcCopied = GetModuleFileName( hMod,
  805. awcDllPath,
  806. ArraySize( awcDllPath ) );
  807. awcDllPath[ ArraySize( awcDllPath ) - 1 ] = 0;
  808. if ( 0 == cwcCopied )
  809. return HRESULT_FROM_WIN32( GetLastError() );
  810. out( L"dll loaded: %ws", awcDllPath );
  811. DWORD dwHandle;
  812. DWORD cbVersionInfo = GetFileVersionInfoSize( awcDllPath, &dwHandle );
  813. if ( 0 == cbVersionInfo )
  814. {
  815. printf( "can't get dll version information size, error %d\n",
  816. GetLastError() );
  817. return HRESULT_FROM_WIN32( GetLastError() );
  818. }
  819. XPtr<BYTE> xVersionInfo( cbVersionInfo );
  820. if ( xVersionInfo.IsNull() )
  821. return E_OUTOFMEMORY;
  822. BOOL fOK = GetFileVersionInfo( awcDllPath,
  823. 0,
  824. cbVersionInfo,
  825. xVersionInfo.Get() );
  826. if ( !fOK )
  827. {
  828. printf( "unable to retrieve version information, error %d\n",
  829. GetLastError() );
  830. return HRESULT_FROM_WIN32( GetLastError() );
  831. }
  832. // Get the DLL version number
  833. void * pvValue = 0;
  834. UINT cbValue = 0;
  835. fOK = VerQueryValue( xVersionInfo.Get(),
  836. L"\\",
  837. &pvValue,
  838. &cbValue );
  839. if ( !fOK || ( 0 == cbValue ) )
  840. {
  841. printf( "can't retrieve version root value, error %d\n",
  842. GetLastError() );
  843. return HRESULT_FROM_WIN32( GetLastError() );
  844. }
  845. VS_FIXEDFILEINFO & ffi = * (VS_FIXEDFILEINFO *) pvValue;
  846. out( L" dll version %u.%u.%u.%u",
  847. HIWORD( ffi.dwFileVersionMS ),
  848. LOWORD( ffi.dwFileVersionMS ),
  849. HIWORD( ffi.dwFileVersionLS ),
  850. LOWORD( ffi.dwFileVersionLS ) );
  851. if ( ( cbValue >= sizeof VS_FIXEDFILEINFO ) &&
  852. ( 0 != ffi.dwFileDateLS && 0 != ffi.dwFileDateMS ) )
  853. {
  854. FILETIME ft;
  855. ft.dwLowDateTime = ffi.dwFileDateLS;
  856. ft.dwHighDateTime = ffi.dwFileDateMS;
  857. OutputFiletime( L" version creation date: ", ft );
  858. }
  859. HANDLE h = CreateFile( awcDllPath,
  860. FILE_GENERIC_READ,
  861. FILE_SHARE_READ | FILE_SHARE_DELETE,
  862. 0,
  863. OPEN_EXISTING,
  864. 0,
  865. 0 );
  866. if ( INVALID_HANDLE_VALUE != h )
  867. {
  868. FILETIME ftCreate, ftLastWrite;
  869. fOK = GetFileTime( h, &ftCreate, 0, &ftLastWrite );
  870. if ( fOK )
  871. {
  872. OutputFiletime( L" file create time", ftCreate );
  873. OutputFiletime( L" file last write time", ftLastWrite );
  874. }
  875. CloseHandle( h );
  876. }
  877. //
  878. // Get the language string. Not every dll stores it correctly, so fall
  879. // back on English locales known to work for some special cases.
  880. //
  881. WCHAR awcLang[9];
  882. awcLang[0] = 0;
  883. DWORD * pdwLang;
  884. UINT cb;
  885. if ( VerQueryValue( xVersionInfo.Get(),
  886. L"VarFileInfo\\Translation",
  887. (PVOID *) &pdwLang,
  888. &cb ) &&
  889. ( cb >= 4 ) )
  890. {
  891. wsprintf( awcLang,
  892. L"%04x%04x",
  893. LOWORD( *pdwLang ),
  894. HIWORD( *pdwLang ) );
  895. }
  896. if ( 0 == awcLang[0] )
  897. {
  898. // Try English Unicode
  899. wcscpy( awcLang, L"040904B0" );
  900. if ( !GetVersionKey( xVersionInfo.Get(),
  901. awcLang,
  902. L"FileVersion" ) )
  903. {
  904. // Try English
  905. wcscpy( awcLang, L"040904E4" );
  906. if ( !GetVersionKey( xVersionInfo.Get(),
  907. awcLang,
  908. L"FileVersion" ) )
  909. {
  910. // Try English null codepage
  911. wcscpy( awcLang, L"04090000" );
  912. if ( !GetVersionKey( xVersionInfo.Get(),
  913. awcLang,
  914. L"FileVersion" ) )
  915. awcLang[0] = 0;
  916. }
  917. }
  918. }
  919. else
  920. {
  921. GetVersionKey( xVersionInfo.Get(), awcLang, L"FileVersion" );
  922. }
  923. // Display additional version information if we found the language
  924. if ( 0 != awcLang[0] )
  925. {
  926. GetVersionKey( xVersionInfo.Get(), awcLang, L"FileDescription" );
  927. GetVersionKey( xVersionInfo.Get(), awcLang, L"CompanyName" );
  928. GetVersionKey( xVersionInfo.Get(), awcLang, L"ProductName" );
  929. }
  930. return S_OK;
  931. } //DisplayModuleInformation
  932. //+---------------------------------------------------------------------------
  933. //
  934. // Function: CreateFromModule
  935. //
  936. // Purpose: Creates a COM object given a dll
  937. //
  938. // Arguments: [clsid] -- Class ID of the object to load
  939. // [iid] -- Interface ID requested
  940. // [ppvObject] -- Returns the object created
  941. // [pwcModule] -- Dll to load
  942. // [fShowStatusInfo] -- TRUE to print status information
  943. //
  944. // Returns: HRESULT, S_OK if successful
  945. //
  946. //----------------------------------------------------------------------------
  947. HRESULT CreateFromModule(
  948. REFIID clsid,
  949. REFIID iid,
  950. void ** ppvObject,
  951. WCHAR const * pwcModule,
  952. BOOL fShowStatusInfo = TRUE )
  953. {
  954. // Note: the module handle will be leaked. It's OK for a test program.
  955. HMODULE hMod = LoadLibrary( pwcModule );
  956. if ( 0 == hMod )
  957. return HRESULT_FROM_WIN32( GetLastError() );
  958. // Display information about the module -- ignore errors
  959. if ( fShowStatusInfo )
  960. DisplayModuleInformation( hMod );
  961. LPFNGETCLASSOBJECT pfn = (LPFNGETCLASSOBJECT)
  962. GetProcAddress( hMod, "DllGetClassObject" );
  963. if ( 0 == pfn )
  964. {
  965. printf( "can't get DllGetClassObject: %d\n", GetLastError() );
  966. return HRESULT_FROM_WIN32( GetLastError() );
  967. }
  968. XInterface<IClassFactory> xClassFactory;
  969. HRESULT hr = pfn( clsid,
  970. IID_IClassFactory,
  971. xClassFactory.GetQIPointer() );
  972. if ( FAILED( hr ) )
  973. {
  974. printf( "can't instantiate the class factory: %#x\n", hr );
  975. return hr;
  976. }
  977. return xClassFactory->CreateInstance( 0, iid, ppvObject );
  978. } //CreateFromModule
  979. //+---------------------------------------------------------------------------
  980. //
  981. // Function: FakeCoCreateInstance
  982. //
  983. // Purpose: Creates a COM object
  984. //
  985. // Arguments: [clsid] -- Class ID of the object to load
  986. // [iid] -- Interface ID requested
  987. // [ppvObject] -- Returns the object created
  988. // [fShowStatusInfo] -- TRUE to print status information
  989. //
  990. // Returns: HRESULT, S_OK if successful
  991. //
  992. // Needed because some wordbreakers register as single-threaded. Search
  993. // products require multi-threaded because marshalling across apartments
  994. // doesn't work and because it's too inefficient, especially on
  995. // multi-processor machines.
  996. //
  997. //----------------------------------------------------------------------------
  998. HRESULT FakeCoCreateInstance(
  999. REFIID clsid,
  1000. REFIID iid,
  1001. void ** ppvObject,
  1002. BOOL fShowStatusInfo = TRUE )
  1003. {
  1004. WCHAR awcCLSID[ 40 ];
  1005. StringFromGUID2( clsid, awcCLSID, ArraySize( awcCLSID ) );
  1006. WCHAR awcKey[200];
  1007. swprintf( awcKey, L"CLSID\\%ws\\InprocServer32", awcCLSID );
  1008. HKEY hKey;
  1009. DWORD dwErr = RegOpenKey( HKEY_CLASSES_ROOT, awcKey, &hKey );
  1010. if ( NO_ERROR != dwErr )
  1011. return HRESULT_FROM_WIN32( dwErr );
  1012. WCHAR awcDll[MAX_PATH + 1];
  1013. DWORD dwType;
  1014. DWORD dwSize = sizeof awcDll;
  1015. dwErr = RegQueryValueEx( hKey,
  1016. L"",
  1017. 0,
  1018. &dwType,
  1019. (LPBYTE) awcDll,
  1020. &dwSize );
  1021. RegCloseKey( hKey );
  1022. if ( 0 != dwErr )
  1023. return HRESULT_FROM_WIN32( dwErr );
  1024. return CreateFromModule( clsid, iid, ppvObject, awcDll, fShowStatusInfo );
  1025. } //FakeCoCreateInstance
  1026. //+---------------------------------------------------------------------------
  1027. //
  1028. // Function: Stem
  1029. //
  1030. // Purpose: Stems the input text using the specified stemmer
  1031. //
  1032. // Arguments: [pwcText] -- The text to be stemmed
  1033. // [clsid] -- Class ID of the stemmer to use
  1034. // [pwcModule] -- Optional module name to override COM lookup.
  1035. // [cwcMaxToken] -- Maximum token size for the stemmer
  1036. //
  1037. //----------------------------------------------------------------------------
  1038. HRESULT Stem(
  1039. WCHAR const * pwcText,
  1040. WCHAR const * pwcModule,
  1041. CLSID & clsid,
  1042. ULONG cwcMaxToken )
  1043. {
  1044. XInterface<IStemmer> xStemmer;
  1045. HRESULT hr = S_OK;
  1046. if ( 0 != pwcModule )
  1047. {
  1048. hr = CreateFromModule( clsid,
  1049. IID_IStemmer,
  1050. xStemmer.GetQIPointer(),
  1051. pwcModule );
  1052. }
  1053. else
  1054. {
  1055. #ifdef USE_FAKE_COM
  1056. hr = FakeCoCreateInstance( clsid,
  1057. IID_IStemmer,
  1058. xStemmer.GetQIPointer() );
  1059. #else
  1060. hr = CoCreateInstance( clsid,
  1061. 0,
  1062. CLSCTX_INPROC_SERVER,
  1063. IID_IStemmer,
  1064. xStemmer.GetQIPointer() );
  1065. #endif
  1066. }
  1067. if ( FAILED( hr ) )
  1068. {
  1069. printf( "can't CoCreateInstance the stemmer: %#x\n", hr );
  1070. return hr;
  1071. }
  1072. BOOL fLicense = FALSE;
  1073. hr = xStemmer->Init( cwcMaxToken, &fLicense );
  1074. if ( FAILED( hr ) )
  1075. {
  1076. printf( "can't Init() in the stemmer: %#x\n", hr );
  1077. return hr;
  1078. }
  1079. out( L"Stemmer requires license: %ws", fLicense ? L"Yes" : L"No" );
  1080. const WCHAR *pwcsLicense = 0;
  1081. hr = xStemmer->GetLicenseToUse( &pwcsLicense );
  1082. if ( FAILED( hr ) )
  1083. out( L"can't GetLicenseToUse() in the stemmer: %#x\n", hr );
  1084. else
  1085. out( L"Stemmer license: '%ws'", pwcsLicense );
  1086. CWordFormSink sink;
  1087. if ( 0 != pwcText )
  1088. {
  1089. out( L"Original text: '%ws'", pwcText );
  1090. hr = xStemmer->GenerateWordForms( pwcText, (ULONG) wcslen( pwcText ), &sink );
  1091. if ( FAILED( hr ) )
  1092. {
  1093. printf( "can't GenerateWordForms() in the stemmer: %#x\n", hr );
  1094. return hr;
  1095. }
  1096. }
  1097. return S_OK;
  1098. } //Stem
  1099. //+---------------------------------------------------------------------------
  1100. //
  1101. // Function: WordBreak
  1102. //
  1103. // Purpose: Wordbreaks the input text or file
  1104. //
  1105. // Arguments: [fQuery] -- TRUE if query time FALSE if index time
  1106. // [pwcText] -- The text to be wordbroken.
  1107. // [pwcInputFile] -- Filename to be wordbroken if pwcText is 0
  1108. // [pwcModule] -- Optional module name to override COM lookup.
  1109. // [clsid] -- Class ID of the wordbreaker to use
  1110. // [cwcMaxToken] -- Maximum token size for the wordbreaker
  1111. //
  1112. //----------------------------------------------------------------------------
  1113. HRESULT WordBreak(
  1114. BOOL fQuery,
  1115. WCHAR const * pwcText,
  1116. WCHAR const * pwcInputFile,
  1117. WCHAR const * pwcModule,
  1118. CLSID & clsid,
  1119. ULONG cwcMaxToken )
  1120. {
  1121. XInterface<IWordBreaker> xWordBreaker;
  1122. HRESULT hr = S_OK;
  1123. if ( 0 != pwcModule )
  1124. {
  1125. hr = CreateFromModule( clsid,
  1126. IID_IWordBreaker,
  1127. xWordBreaker.GetQIPointer(),
  1128. pwcModule );
  1129. }
  1130. else
  1131. {
  1132. #ifdef USE_FAKE_COM
  1133. hr = FakeCoCreateInstance( clsid,
  1134. IID_IWordBreaker,
  1135. xWordBreaker.GetQIPointer() );
  1136. #else
  1137. hr = CoCreateInstance( clsid,
  1138. 0,
  1139. CLSCTX_INPROC_SERVER,
  1140. IID_IWordBreaker,
  1141. xWordBreaker.GetQIPointer() );
  1142. #endif
  1143. }
  1144. if ( FAILED( hr ) )
  1145. {
  1146. printf( "can't CoCreateInstance the wordbreaker: %#x\n", hr );
  1147. return hr;
  1148. }
  1149. BOOL fLicense = FALSE;
  1150. hr = xWordBreaker->Init( fQuery, cwcMaxToken, &fLicense );
  1151. if ( FAILED( hr ) )
  1152. {
  1153. printf( "can't Init() in the wordbreaker: %#x\n", hr );
  1154. return hr;
  1155. }
  1156. out( L"Wordbreaker requires license: %ws", fLicense ? L"Yes" : L"No" );
  1157. const WCHAR *pwcsLicense = 0;
  1158. hr = xWordBreaker->GetLicenseToUse( &pwcsLicense );
  1159. if ( FAILED( hr ) )
  1160. {
  1161. printf( "can't GetLicenseToUse() in the wordbreaker: %#x\n", hr );
  1162. return hr;
  1163. }
  1164. out( L"Wordbreaker license: '%ws'", pwcsLicense );
  1165. CWordSink wordSink;
  1166. CPhraseSink phraseSink;
  1167. if ( 0 != pwcText )
  1168. {
  1169. out( L"Original text: '%ws'", pwcText );
  1170. CPlainTextSource textSource( pwcText, (ULONG) wcslen( pwcText ) );
  1171. hr = xWordBreaker->BreakText( &textSource, &wordSink, &phraseSink );
  1172. if ( FAILED( hr ) )
  1173. {
  1174. printf( "can't BreakText() in the wordbreaker: %#x\n", hr );
  1175. return hr;
  1176. }
  1177. }
  1178. else
  1179. {
  1180. out( L"Wordbreaking text from file %ws", pwcInputFile );
  1181. // Load the Indexing Service filter (should be fine for testing).
  1182. XInterface<IFilter> xIFilter;
  1183. hr = LoadIFilter( pwcInputFile, 0, xIFilter.GetQIPointer() );
  1184. if ( FAILED( hr ) )
  1185. {
  1186. // Fall back on the plain text filter.
  1187. printf( "Can't load filter, error %#x. Trying text filter.\n",
  1188. hr );
  1189. hr = g_pLoadTextFilter( pwcInputFile, xIFilter.GetPPointer() );
  1190. if ( FAILED( hr ) )
  1191. {
  1192. printf( "can't load filter, error %#x\n", hr );
  1193. return hr;
  1194. }
  1195. }
  1196. // Initialize the filter
  1197. ULONG ulFlags = 0;
  1198. hr = xIFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS |
  1199. IFILTER_INIT_CANON_HYPHENS |
  1200. IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,
  1201. 0,
  1202. 0,
  1203. &ulFlags );
  1204. if ( FAILED( hr ) )
  1205. {
  1206. printf( "can't initialize filter, error %#x\n", hr );
  1207. return hr;
  1208. }
  1209. CFilterTextSource textSource( xIFilter.GetReference() );
  1210. hr = xWordBreaker->BreakText( &textSource, &wordSink, &phraseSink );
  1211. if ( FAILED( hr ) )
  1212. {
  1213. printf( "can't BreakText() in the wordbreaker: %#x\n", hr );
  1214. return hr;
  1215. }
  1216. }
  1217. return S_OK;
  1218. } //WordBreak
  1219. //+-------------------------------------------------------------------------
  1220. //
  1221. // Function: Render
  1222. //
  1223. // Synopsis: Prints an item in a safearray
  1224. //
  1225. // Arguments: [vt] - type of the element
  1226. // [pa] - pointer to the item
  1227. //
  1228. //--------------------------------------------------------------------------
  1229. void PrintSafeArray( VARTYPE vt, LPSAFEARRAY pa );
  1230. void Render( VARTYPE vt, void * pv )
  1231. {
  1232. if ( VT_ARRAY & vt )
  1233. {
  1234. PrintSafeArray( (VARTYPE) (vt - VT_ARRAY), *(SAFEARRAY **) pv );
  1235. return;
  1236. }
  1237. switch ( vt )
  1238. {
  1239. case VT_UI1: outstr( L"%u", (unsigned) *(BYTE *)pv ); break;
  1240. case VT_I1: outstr( L"%d", (int) *(CHAR *)pv ); break;
  1241. case VT_UI2: outstr( L"%u", (unsigned) *(USHORT *)pv ); break;
  1242. case VT_I2: outstr( L"%d", (int) *(SHORT *)pv ); break;
  1243. case VT_UI4:
  1244. case VT_UINT: outstr( L"%u", (unsigned) *(ULONG *)pv ); break;
  1245. case VT_I4:
  1246. case VT_ERROR:
  1247. case VT_INT: outstr( L"%d", *(LONG *)pv ); break;
  1248. case VT_UI8: outstr( L"%I64u", *(unsigned __int64 *)pv ); break;
  1249. case VT_I8: outstr( L"%I64d", *(__int64 *)pv ); break;
  1250. case VT_R4: outstr( L"%f", *(float *)pv ); break;
  1251. case VT_R8: outstr( L"%lf", *(double *)pv ); break;
  1252. case VT_DECIMAL:
  1253. {
  1254. double dbl;
  1255. HRESULT hr = VarR8FromDec( (DECIMAL *) pv, &dbl );
  1256. if ( SUCCEEDED( hr ) )
  1257. outstr( L"%lf", dbl );
  1258. break;
  1259. }
  1260. case VT_CY:
  1261. {
  1262. double dbl;
  1263. HRESULT hr = VarR8FromCy( * (CY *) pv, &dbl );
  1264. if ( SUCCEEDED( hr ) )
  1265. outstr( L"%lf", dbl );
  1266. break;
  1267. }
  1268. case VT_BOOL: outstr( *(VARIANT_BOOL *)pv ? L"TRUE" : L"FALSE" ); break;
  1269. case VT_BSTR: outstr( L"%ws", *(BSTR *) pv ); break;
  1270. case VT_VARIANT:
  1271. {
  1272. PROPVARIANT * pVar = (PROPVARIANT *) pv;
  1273. Render( pVar->vt, & pVar->lVal );
  1274. break;
  1275. }
  1276. case VT_DATE:
  1277. {
  1278. SYSTEMTIME st;
  1279. BOOL fOK = VariantTimeToSystemTime( *(DATE *)pv, &st );
  1280. if ( !fOK )
  1281. break;
  1282. BOOL pm = st.wHour >= 12;
  1283. if ( st.wHour > 12 )
  1284. st.wHour -= 12;
  1285. else if ( 0 == st.wHour )
  1286. st.wHour = 12;
  1287. outstr( L"%2d-%02d-%04d %2d:%02d%wc",
  1288. (DWORD) st.wMonth,
  1289. (DWORD) st.wDay,
  1290. (DWORD) st.wYear,
  1291. (DWORD) st.wHour,
  1292. (DWORD) st.wMinute,
  1293. pm ? L'p' : L'a' );
  1294. break;
  1295. }
  1296. case VT_EMPTY:
  1297. case VT_NULL:
  1298. break;
  1299. default :
  1300. {
  1301. outstr( L"(vt 0x%x)", (int) vt );
  1302. break;
  1303. }
  1304. }
  1305. } //Render
  1306. //+-------------------------------------------------------------------------
  1307. //
  1308. // Function: PrintSafeArray
  1309. //
  1310. // Synopsis: Prints items in a safearray
  1311. //
  1312. // Arguments: [vt] - type of elements in the safearray
  1313. // [pa] - pointer to the safearray
  1314. //
  1315. //--------------------------------------------------------------------------
  1316. void PrintSafeArray( VARTYPE vt, LPSAFEARRAY pa )
  1317. {
  1318. // Get the dimensions of the array
  1319. UINT cDim = SafeArrayGetDim( pa );
  1320. if ( 0 == cDim )
  1321. return;
  1322. XPtr<LONG> xDim( cDim );
  1323. XPtr<LONG> xLo( cDim );
  1324. XPtr<LONG> xUp( cDim );
  1325. for ( UINT iDim = 0; iDim < cDim; iDim++ )
  1326. {
  1327. HRESULT hr = SafeArrayGetLBound( pa, iDim + 1, &xLo[iDim] );
  1328. if ( FAILED( hr ) )
  1329. return;
  1330. xDim[ iDim ] = xLo[ iDim ];
  1331. hr = SafeArrayGetUBound( pa, iDim + 1, &xUp[iDim] );
  1332. if ( FAILED( hr ) )
  1333. return;
  1334. outstr( L"{" );
  1335. }
  1336. // slog through the array
  1337. UINT iLastDim = cDim - 1;
  1338. BOOL fDone = FALSE;
  1339. while ( !fDone )
  1340. {
  1341. // inter-element formatting
  1342. if ( xDim[ iLastDim ] != xLo[ iLastDim ] )
  1343. outstr( L"," );
  1344. // Get the element and render it
  1345. void *pv;
  1346. HRESULT hr = SafeArrayPtrOfIndex( pa, xDim.Get(), &pv );
  1347. if ( FAILED( hr ) )
  1348. return;
  1349. Render( vt, pv );
  1350. // Move to the next element and carry if necessary
  1351. ULONG cOpen = 0;
  1352. for ( LONG iDim = iLastDim; iDim >= 0; iDim-- )
  1353. {
  1354. if ( xDim[ iDim ] < xUp[ iDim ] )
  1355. {
  1356. xDim[ iDim ] = 1 + xDim[ iDim ];
  1357. break;
  1358. }
  1359. outstr( L"}" );
  1360. if ( 0 == iDim )
  1361. fDone = TRUE;
  1362. else
  1363. {
  1364. cOpen++;
  1365. xDim[ iDim ] = xLo[ iDim ];
  1366. }
  1367. }
  1368. for ( ULONG i = 0; !fDone && i < cOpen; i++ )
  1369. outstr( L"{" );
  1370. }
  1371. } //PrintSafeArray
  1372. //+-------------------------------------------------------------------------
  1373. //
  1374. // Function: PrintVectorItems
  1375. //
  1376. // Synopsis: Prints items in a PROPVARIANT vector
  1377. //
  1378. // Arguments: [pVal] - The array of values
  1379. // [cVals] - The count of values
  1380. // [pcFmt] - The format string
  1381. //
  1382. //--------------------------------------------------------------------------
  1383. template<class T> void PrintVectorItems(
  1384. T * pVal,
  1385. ULONG cVals,
  1386. WCHAR const * pwcFmt )
  1387. {
  1388. outstr( L"{ " );
  1389. for( ULONG iVal = 0; iVal < cVals; iVal++ )
  1390. {
  1391. if ( 0 != iVal )
  1392. outstr( L"," );
  1393. outstr( pwcFmt, *pVal++ );
  1394. }
  1395. outstr( L" }" );
  1396. } //PrintVectorItems
  1397. //+-------------------------------------------------------------------------
  1398. //
  1399. // Function: DisplayValue
  1400. //
  1401. // Synopsis: Displays a PROPVARIANT value. Limited formatting is done.
  1402. //
  1403. // Arguments: [pVar] - The value to display
  1404. //
  1405. //--------------------------------------------------------------------------
  1406. void DisplayValue( PROPVARIANT const * pVar )
  1407. {
  1408. if ( 0 == pVar )
  1409. {
  1410. outstr( L"NULL" );
  1411. return;
  1412. }
  1413. // Display the most typical variant types
  1414. PROPVARIANT const & v = *pVar;
  1415. switch ( v.vt )
  1416. {
  1417. case VT_EMPTY : break;
  1418. case VT_NULL : break;
  1419. case VT_I4 : outstr( L"%10d", v.lVal ); break;
  1420. case VT_UI1 : outstr( L"%10d", v.bVal ); break;
  1421. case VT_I2 : outstr( L"%10d", v.iVal ); break;
  1422. case VT_R4 : outstr( L"%10f", v.fltVal ); break;
  1423. case VT_R8 : outstr( L"%10lf", v.dblVal ); break;
  1424. case VT_BOOL : outstr( v.boolVal ? L"TRUE" : L"FALSE" ); break;
  1425. case VT_I1 : outstr( L"%10d", v.cVal ); break;
  1426. case VT_UI2 : outstr( L"%10u", v.uiVal ); break;
  1427. case VT_UI4 : outstr( L"%10u", v.ulVal ); break;
  1428. case VT_INT : outstr( L"%10d", v.lVal ); break;
  1429. case VT_UINT : outstr( L"%10u", v.ulVal ); break;
  1430. case VT_I8 : outstr( L"%20I64d", v.hVal ); break;
  1431. case VT_UI8 : outstr( L"%20I64u", v.hVal ); break;
  1432. case VT_ERROR : outstr( L"%#x", v.scode ); break;
  1433. case VT_LPSTR : outstr( L"%S", v.pszVal ); break;
  1434. case VT_LPWSTR : outstr( L"%ws", v.pwszVal ); break;
  1435. case VT_BSTR : outstr( L"%ws", v.bstrVal ); break;
  1436. case VT_BLOB :
  1437. {
  1438. outstr( L"blob cb %u ", v.blob.cbSize );
  1439. for ( unsigned x = 0; x < v.blob.cbSize; x++ )
  1440. outstr( L" %#x ", v.blob.pBlobData[x] );
  1441. break;
  1442. }
  1443. case VT_CY:
  1444. {
  1445. double dbl;
  1446. HRESULT hr = VarR8FromCy( v.cyVal, &dbl );
  1447. if ( SUCCEEDED( hr ) )
  1448. outstr( L"%lf", dbl );
  1449. break;
  1450. }
  1451. case VT_DECIMAL :
  1452. {
  1453. double dbl;
  1454. HRESULT hr = VarR8FromDec( (DECIMAL *) &v.decVal, &dbl );
  1455. if ( SUCCEEDED( hr ) )
  1456. outstr( L"%lf", dbl );
  1457. break;
  1458. }
  1459. case VT_FILETIME :
  1460. case VT_DATE :
  1461. {
  1462. SYSTEMTIME st;
  1463. ZeroMemory( &st, sizeof st );
  1464. if ( VT_DATE == v.vt )
  1465. {
  1466. BOOL fOK = VariantTimeToSystemTime( v.date, &st );
  1467. if ( !fOK )
  1468. break;
  1469. }
  1470. else
  1471. {
  1472. FILETIME ft;
  1473. BOOL fOK = FileTimeToLocalFileTime( &v.filetime, &ft );
  1474. if ( fOK )
  1475. FileTimeToSystemTime( &ft, &st );
  1476. if ( !fOK )
  1477. break;
  1478. }
  1479. BOOL pm = st.wHour >= 12;
  1480. if ( st.wHour > 12 )
  1481. st.wHour -= 12;
  1482. else if ( 0 == st.wHour )
  1483. st.wHour = 12;
  1484. outstr( L"%2d-%02d-%04d %2d:%02d%wc",
  1485. (DWORD) st.wMonth,
  1486. (DWORD) st.wDay,
  1487. (DWORD) st.wYear,
  1488. (DWORD) st.wHour,
  1489. (DWORD) st.wMinute,
  1490. pm ? L'p' : L'a' );
  1491. break;
  1492. }
  1493. case VT_VECTOR | VT_I1:
  1494. PrintVectorItems( v.cac.pElems, v.cac.cElems, L"%d" ); break;
  1495. case VT_VECTOR | VT_I2:
  1496. PrintVectorItems( v.cai.pElems, v.cai.cElems, L"%d" ); break;
  1497. case VT_VECTOR | VT_I4:
  1498. PrintVectorItems( v.cal.pElems, v.cal.cElems, L"%d" ); break;
  1499. case VT_VECTOR | VT_I8:
  1500. PrintVectorItems( v.cah.pElems, v.cah.cElems, L"%I64d" ); break;
  1501. case VT_VECTOR | VT_UI1:
  1502. PrintVectorItems( v.caub.pElems, v.caub.cElems, L"%u" ); break;
  1503. case VT_VECTOR | VT_UI2:
  1504. PrintVectorItems( v.caui.pElems, v.caui.cElems, L"%u" ); break;
  1505. case VT_VECTOR | VT_UI4:
  1506. PrintVectorItems( v.caul.pElems, v.caul.cElems, L"%u" ); break;
  1507. case VT_VECTOR | VT_ERROR:
  1508. PrintVectorItems( v.cascode.pElems, v.cascode.cElems, L"%#x" ); break;
  1509. case VT_VECTOR | VT_UI8:
  1510. PrintVectorItems( v.cauh.pElems, v.cauh.cElems, L"%I64u" ); break;
  1511. case VT_VECTOR | VT_BSTR:
  1512. PrintVectorItems( v.cabstr.pElems, v.cabstr.cElems, L"%ws" ); break;
  1513. case VT_VECTOR | VT_LPSTR:
  1514. PrintVectorItems( v.calpstr.pElems, v.calpstr.cElems, L"%S" ); break;
  1515. case VT_VECTOR | VT_LPWSTR:
  1516. PrintVectorItems( v.calpwstr.pElems, v.calpwstr.cElems, L"%ws" ); break;
  1517. case VT_VECTOR | VT_R4:
  1518. PrintVectorItems( v.caflt.pElems, v.caflt.cElems, L"%f" ); break;
  1519. case VT_VECTOR | VT_R8:
  1520. PrintVectorItems( v.cadbl.pElems, v.cadbl.cElems, L"%lf" ); break;
  1521. default :
  1522. {
  1523. if ( VT_ARRAY & v.vt )
  1524. PrintSafeArray( (VARTYPE) ( v.vt - VT_ARRAY ), v.parray );
  1525. else
  1526. outstr( L"vt 0x%05x", v.vt );
  1527. break;
  1528. }
  1529. }
  1530. } //DisplayValue
  1531. //+---------------------------------------------------------------------------
  1532. //
  1533. // Function: Filter
  1534. //
  1535. // Purpose: Invokes an IFilter on a file
  1536. //
  1537. // Arguments: [pwcInputFile] -- Filename to be filtered
  1538. // [filterLoad] -- How to load the file into the filter.
  1539. // [pwcModule] -- Optional module name to override COM lookup.
  1540. // [pCLSID] -- Optional class ID of the filter to use.
  1541. // Required if pwcModule is specified.
  1542. // [fShowStatusInfo] -- TRUE to get other information
  1543. // FALSE for only output from the filter
  1544. // [fGetText] -- TRUE to retrieve text, FALSE to skip it
  1545. //
  1546. //----------------------------------------------------------------------------
  1547. HRESULT Filter(
  1548. WCHAR const * pwcInputFile,
  1549. enumFilterLoadMechanism filterLoad,
  1550. WCHAR const * pwcModule,
  1551. CLSID * pCLSID,
  1552. BOOL fShowStatusInfo,
  1553. BOOL fGetText )
  1554. {
  1555. XInterface<IFilter> xFilter;
  1556. HRESULT hr = S_OK;
  1557. if ( 0 != pwcModule )
  1558. {
  1559. // If the DLL is specified, use it
  1560. if ( fShowStatusInfo )
  1561. out( L"loading filter based on module name" );
  1562. hr = CreateFromModule( *pCLSID,
  1563. IID_IFilter,
  1564. xFilter.GetQIPointer(),
  1565. pwcModule,
  1566. fShowStatusInfo );
  1567. }
  1568. else if ( 0 != pCLSID )
  1569. {
  1570. // If we just have a CLSID and no module, use it
  1571. if ( fShowStatusInfo )
  1572. out( L"loading filter based on CLSID and the registry" );
  1573. #ifdef USE_FAKE_COM
  1574. hr = FakeCoCreateInstance( *pCLSID,
  1575. IID_IFilter,
  1576. xFilter.GetQIPointer(),
  1577. fShowStatusInfo );
  1578. #else
  1579. hr = CoCreateInstance( *pCLSID,
  1580. 0,
  1581. CLSCTX_INPROC_SERVER,
  1582. IID_IFilter,
  1583. xFilter.GetQIPointer() );
  1584. #endif
  1585. }
  1586. else
  1587. {
  1588. // Use Indexing Service to load the filter
  1589. if ( fShowStatusInfo )
  1590. out( L"loading filter based on Indexing Service's LoadIFilter()" );
  1591. hr = LoadIFilter( pwcInputFile, 0, xFilter.GetQIPointer() );
  1592. if ( SUCCEEDED( hr ) && fShowStatusInfo )
  1593. {
  1594. // Dereference the VTable to get a pointer into the DLL
  1595. HMODULE hMod = GetModuleOfAddress( * (void **) xFilter.GetPointer() );
  1596. if ( 0 != hMod )
  1597. DisplayModuleInformation( hMod );
  1598. }
  1599. }
  1600. if ( FAILED( hr ) )
  1601. {
  1602. printf( "can't load the filter: %#x\n", hr );
  1603. return hr;
  1604. }
  1605. // Does the filter support IPersistStorage?
  1606. XInterface<IStorage> xStorage;
  1607. XInterface<IPersistStorage> xPersistStorage;
  1608. hr = xFilter->QueryInterface( IID_IPersistStorage,
  1609. xPersistStorage.GetQIPointer() );
  1610. if ( FAILED( hr ) )
  1611. {
  1612. if ( fShowStatusInfo )
  1613. out( L" filter doesn't support IPersistStorage, error %#x", hr );
  1614. if ( eIPersistStorage == filterLoad )
  1615. return hr;
  1616. }
  1617. else
  1618. {
  1619. if ( fShowStatusInfo )
  1620. out( L" filter supports IPersistStorage" );
  1621. if ( eIPersistStorage == filterLoad )
  1622. {
  1623. if ( fShowStatusInfo )
  1624. out( L" loading via IPersistStorage" );
  1625. hr = StgOpenStorage( pwcInputFile,
  1626. 0,
  1627. STGM_READ | STGM_SHARE_DENY_WRITE,
  1628. 0,
  1629. 0,
  1630. xStorage.GetPPointer() );
  1631. if ( FAILED( hr ) )
  1632. {
  1633. printf( "can't open the file into a storage %#x\n", hr );
  1634. return hr;
  1635. }
  1636. hr = xPersistStorage->Load( xStorage.GetPointer() );
  1637. if ( FAILED( hr ) )
  1638. {
  1639. printf( "can't Load() the storage into the filter %#x\n", hr );
  1640. return hr;
  1641. }
  1642. }
  1643. }
  1644. xPersistStorage.Free();
  1645. // Does the filter support IPersistStream?
  1646. XInterface<CIStream> xStream;
  1647. XInterface<IPersistStream> xPersistStream;
  1648. hr = xFilter->QueryInterface( IID_IPersistStream,
  1649. xPersistStream.GetQIPointer() );
  1650. if ( FAILED( hr ) )
  1651. {
  1652. if ( fShowStatusInfo )
  1653. out( L" filter doesn't support IPersistStream, error %#x", hr );
  1654. if ( eIPersistStream == filterLoad )
  1655. return hr;
  1656. }
  1657. else
  1658. {
  1659. if ( fShowStatusInfo )
  1660. out( L" filter supports IPersistStream" );
  1661. if ( eIPersistStream == filterLoad )
  1662. {
  1663. if ( fShowStatusInfo )
  1664. out( L" loading via IPersistStream" );
  1665. xStream.Set( new CIStream() );
  1666. hr = xStream->Open( pwcInputFile );
  1667. if ( FAILED( hr ) )
  1668. {
  1669. printf( "can't open the file into a stream %#x\n", hr );
  1670. return hr;
  1671. }
  1672. hr = xPersistStream->Load( xStream.GetPointer() );
  1673. if ( FAILED( hr ) )
  1674. {
  1675. printf( "can't Load() the stream into the filter %#x\n", hr );
  1676. return hr;
  1677. }
  1678. }
  1679. }
  1680. xPersistStream.Free();
  1681. // Does the filter support IPersistFile?
  1682. XInterface<IPersistFile> xPersistFile;
  1683. hr = xFilter->QueryInterface( IID_IPersistFile,
  1684. xPersistFile.GetQIPointer() );
  1685. if ( FAILED( hr ) )
  1686. {
  1687. if ( fShowStatusInfo )
  1688. out( L"filter doesn't support IPersistFile, error %#x\n", hr );
  1689. if ( eIPersistFile == filterLoad )
  1690. return hr;
  1691. }
  1692. else
  1693. {
  1694. if ( fShowStatusInfo )
  1695. out( L" filter supports IPersistFile" );
  1696. if ( eIPersistFile == filterLoad )
  1697. {
  1698. if ( fShowStatusInfo )
  1699. out( L" loading via IPersistFile" );
  1700. hr = xPersistFile->Load( pwcInputFile,
  1701. STGM_READ | STGM_SHARE_DENY_NONE );
  1702. if ( FAILED( hr ) )
  1703. {
  1704. printf( "can't Load() the file into the filter %#x\n", hr );
  1705. return hr;
  1706. }
  1707. }
  1708. }
  1709. xPersistFile.Free();
  1710. // Initailize the IFilter
  1711. ULONG ulFlags = 0;
  1712. hr = xFilter->Init( IFILTER_INIT_CANON_PARAGRAPHS |
  1713. IFILTER_INIT_HARD_LINE_BREAKS |
  1714. IFILTER_INIT_CANON_HYPHENS |
  1715. IFILTER_INIT_CANON_SPACES |
  1716. IFILTER_INIT_INDEXING_ONLY |
  1717. IFILTER_INIT_APPLY_INDEX_ATTRIBUTES,
  1718. 0,
  1719. 0,
  1720. &ulFlags );
  1721. if ( FAILED( hr ) )
  1722. {
  1723. printf( "can't Init() the filter, error %#x\n", hr );
  1724. return hr;
  1725. }
  1726. if ( fShowStatusInfo )
  1727. out( L" flags returned from IFilter::Init(): %#x", ulFlags );
  1728. // Pull all the data out of the filter
  1729. BOOL fText;
  1730. STAT_CHUNK StatChunk;
  1731. StatChunk.attribute.psProperty.ulKind = PRSPEC_PROPID;
  1732. do
  1733. {
  1734. const ULONG cwcMaxBuffer = 1024;
  1735. WCHAR awcBuffer[ cwcMaxBuffer ];
  1736. hr = xFilter->GetChunk( &StatChunk );
  1737. if ( FILTER_E_EMBEDDING_UNAVAILABLE == hr )
  1738. {
  1739. if ( fShowStatusInfo )
  1740. out( L"[-- encountered an embedding for which no filter is available --]" );
  1741. continue;
  1742. }
  1743. if ( FILTER_E_LINK_UNAVAILABLE == hr )
  1744. {
  1745. if ( fShowStatusInfo )
  1746. out( L"[-- encountered a link for which no filter is available --]" );
  1747. continue;
  1748. }
  1749. if ( FAILED( hr ) && hr != FILTER_E_END_OF_CHUNKS )
  1750. {
  1751. out( L"GetChunk returned error %#x", hr );
  1752. break;
  1753. }
  1754. if ( FILTER_E_END_OF_CHUNKS == hr )
  1755. break;
  1756. fText = ( CHUNK_TEXT == StatChunk.flags );
  1757. // Display information about the chunk
  1758. if ( fShowStatusInfo )
  1759. {
  1760. out( L"" );
  1761. out( L"----------------------------------------------------------------------" );
  1762. outstr( L" attribute: %08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
  1763. StatChunk.attribute.guidPropSet.Data1,
  1764. StatChunk.attribute.guidPropSet.Data2,
  1765. StatChunk.attribute.guidPropSet.Data3,
  1766. StatChunk.attribute.guidPropSet.Data4[0],
  1767. StatChunk.attribute.guidPropSet.Data4[1],
  1768. StatChunk.attribute.guidPropSet.Data4[2],
  1769. StatChunk.attribute.guidPropSet.Data4[3],
  1770. StatChunk.attribute.guidPropSet.Data4[4],
  1771. StatChunk.attribute.guidPropSet.Data4[5],
  1772. StatChunk.attribute.guidPropSet.Data4[6],
  1773. StatChunk.attribute.guidPropSet.Data4[7] );
  1774. if ( StatChunk.attribute.psProperty.ulKind == PRSPEC_PROPID )
  1775. out( L" %d (%#x)",
  1776. StatChunk.attribute.psProperty.propid,
  1777. StatChunk.attribute.psProperty.propid );
  1778. else
  1779. out( L" \"%ws\"", StatChunk.attribute.psProperty.lpwstr );
  1780. out( L" idChunk: %d (%#x)", StatChunk.idChunk, StatChunk.idChunk );
  1781. outstr( L" breakType: %d (%#x)", StatChunk.breakType, StatChunk.breakType );
  1782. switch ( StatChunk.breakType )
  1783. {
  1784. case CHUNK_NO_BREAK: out( L" (no break) " ); break;
  1785. case CHUNK_EOW: out( L" (end of word) " ); break;
  1786. case CHUNK_EOS: out( L" (end of sentence) " ); break;
  1787. case CHUNK_EOP: out( L" (end of paragraph) " ); break;
  1788. case CHUNK_EOC: out( L" (end of chapter) " ); break;
  1789. default : out( L" (unknown break type) " ); break;
  1790. }
  1791. outstr( L" flags: %d (%#x)", StatChunk.flags, StatChunk.flags );
  1792. if ( CHUNK_TEXT & StatChunk.flags )
  1793. out( L" (text) " );
  1794. if ( CHUNK_VALUE & StatChunk.flags )
  1795. out( L" (value) " );
  1796. out( L" locale: %d (%#x)", StatChunk.locale, StatChunk.locale );
  1797. out( L" idChunkSource: %d (%#x)",
  1798. StatChunk.idChunkSource,
  1799. StatChunk.idChunkSource );
  1800. out( L" cwcStartSource: %d (%#x)",
  1801. StatChunk.cwcStartSource,
  1802. StatChunk.cwcStartSource );
  1803. out( L" cwcLenSource: %d (%#x)",
  1804. StatChunk.cwcLenSource,
  1805. StatChunk.cwcLenSource );
  1806. out( L" ------------------------------------------" );
  1807. }
  1808. if ( !fGetText )
  1809. continue;
  1810. // Retrieve all the data in the chunk
  1811. do
  1812. {
  1813. if ( fText )
  1814. {
  1815. ULONG cwcBuffer = cwcMaxBuffer;
  1816. hr = xFilter->GetText( &cwcBuffer, awcBuffer );
  1817. if ( FAILED( hr ) && ( FILTER_E_NO_MORE_TEXT != hr ) )
  1818. {
  1819. out( L"error %#x from GetText\n", hr );
  1820. return hr;
  1821. }
  1822. if ( FILTER_E_NO_MORE_TEXT == hr )
  1823. break;
  1824. awcBuffer[cwcBuffer] = 0;
  1825. out( L"%ws", awcBuffer );
  1826. if ( g_fDumpAsHex )
  1827. {
  1828. out( L"<--------> %d WCHARs in hex <-------->", cwcBuffer );
  1829. DumpStringAsHex( awcBuffer, cwcBuffer );
  1830. }
  1831. }
  1832. else
  1833. {
  1834. PROPVARIANT * pPropValue = 0;
  1835. hr = xFilter->GetValue( &pPropValue );
  1836. if ( FAILED( hr ) )
  1837. {
  1838. if ( ( FILTER_E_NO_MORE_VALUES == hr ) ||
  1839. ( FILTER_E_NO_VALUES == hr ) )
  1840. break;
  1841. out( L"GetValue failed, error %#x\n", hr );
  1842. return hr;
  1843. }
  1844. if ( fShowStatusInfo )
  1845. out( L"[-- variant type %d (%#x) --]", pPropValue->vt, pPropValue->vt );
  1846. DisplayValue( pPropValue );
  1847. out( L"" );
  1848. if ( 0 != pPropValue )
  1849. {
  1850. PropVariantClear( pPropValue );
  1851. CoTaskMemFree( pPropValue );
  1852. pPropValue = 0;
  1853. }
  1854. }
  1855. } while( TRUE ); // data in a chunk
  1856. } while( TRUE ); // for each chunk
  1857. if ( fShowStatusInfo )
  1858. {
  1859. out( L"" );
  1860. out( L"======================================================================" );
  1861. out( L"Filtering completed" );
  1862. }
  1863. xStream.Free();
  1864. xStorage.Free();
  1865. xFilter.Free();
  1866. // Now see if the file handle is still being locked by the filter
  1867. HANDLE hFile = CreateFile( pwcInputFile,
  1868. GENERIC_READ,
  1869. 0, //no sharing
  1870. 0,
  1871. OPEN_EXISTING,
  1872. FILE_ATTRIBUTE_NORMAL,
  1873. 0 );
  1874. if ( INVALID_HANDLE_VALUE == hFile )
  1875. {
  1876. out( L"Filter didn't release file; can't open %ws, error %#x\n", pwcInputFile, GetLastError() );
  1877. return HRESULT_FROM_WIN32( GetLastError() );
  1878. }
  1879. out( L"Filter closed file properly when released\n" );
  1880. CloseHandle( hFile );
  1881. return S_OK;
  1882. } //Filter
  1883. //+-------------------------------------------------------------------------
  1884. //
  1885. // Function: GetQueryFunctions
  1886. //
  1887. // Synopsis: Loads needed undocumented functions from query.dll.
  1888. //
  1889. // Returns: The module handle or 0 on failure.
  1890. //
  1891. //--------------------------------------------------------------------------
  1892. HINSTANCE GetQueryFunctions()
  1893. {
  1894. HINSTANCE h = LoadLibrary( L"query.dll" );
  1895. if ( 0 != h )
  1896. {
  1897. #ifdef _WIN64
  1898. char const * pcCIShutdown = "?CIShutdown@@YAXXZ";
  1899. #else
  1900. char const * pcCIShutdown = "?CIShutdown@@YGXXZ";
  1901. #endif
  1902. g_pCIShutdown = (PFnCIShutdown) GetProcAddress( h, pcCIShutdown );
  1903. if ( 0 == g_pCIShutdown )
  1904. {
  1905. printf( "can't get CIShutdown function address\n" );
  1906. FreeLibrary( h );
  1907. return 0;
  1908. }
  1909. g_pLoadTextFilter = (PFnLoadTextFilter)
  1910. GetProcAddress( h, "LoadTextFilter" );
  1911. if ( 0 == g_pLoadTextFilter )
  1912. {
  1913. printf( "can't get LoadTextFilter function address\n" );
  1914. FreeLibrary( h );
  1915. return 0;
  1916. }
  1917. }
  1918. return h;
  1919. } //GetQueryFunctions
  1920. //+-------------------------------------------------------------------------
  1921. //
  1922. // Function: ExceptionFilter
  1923. //
  1924. // Synopsis: Displays information about the exception
  1925. //
  1926. // Arguments: [pep] -- Exception pointers
  1927. //
  1928. // Returns: EXCEPTION_EXECUTE_HANDLER
  1929. //
  1930. //--------------------------------------------------------------------------
  1931. int ExceptionFilter( EXCEPTION_POINTERS * pep )
  1932. {
  1933. printf( "fatal exception caught\n" );
  1934. EXCEPTION_RECORD & r = * ( pep->ExceptionRecord );
  1935. printf( " exception code: %#x\n", r.ExceptionCode );
  1936. printf( " exception address %#p\n", r.ExceptionAddress );
  1937. if ( ( EXCEPTION_ACCESS_VIOLATION == r.ExceptionCode ) &&
  1938. ( r.NumberParameters >= 2 ) )
  1939. {
  1940. printf( " attempted %ws at address %#p\n",
  1941. ( 0 == r.ExceptionInformation[0] ) ?
  1942. L"read" : L"write",
  1943. (void *) r.ExceptionInformation[1] );
  1944. }
  1945. #ifdef _X86_
  1946. CONTEXT & c = * (CONTEXT *) (pep->ContextRecord );
  1947. if ( 0 != ( c.ContextFlags & CONTEXT_INTEGER ) )
  1948. {
  1949. printf( " eax: %#x\n", c.Eax );
  1950. printf( " ebx: %#x\n", c.Ebx );
  1951. printf( " ecx: %#x\n", c.Ecx );
  1952. printf( " edx: %#x\n", c.Edx );
  1953. printf( " edi: %#x\n", c.Edi );
  1954. printf( " esi: %#x\n", c.Esi );
  1955. }
  1956. if ( 0 != ( c.ContextFlags & CONTEXT_CONTROL ) )
  1957. {
  1958. printf( " ebp: %#x\n", c.Ebp );
  1959. printf( " eip: %#x\n", c.Eip );
  1960. printf( " esp: %#x\n", c.Esp );
  1961. }
  1962. #endif // _X86_
  1963. // Attempt to get the module name where the exception happened
  1964. HMODULE hMod = GetModuleOfAddress( r.ExceptionAddress );
  1965. if ( 0 != hMod )
  1966. {
  1967. WCHAR awcPath[ MAX_PATH ];
  1968. DWORD cwc= GetModuleFileName( hMod,
  1969. awcPath,
  1970. ArraySize( awcPath ) );
  1971. awcPath[ ArraySize( awcPath ) - 1 ] = 0;
  1972. if ( 0 != cwc )
  1973. printf( " exception in module %ws\n", awcPath );
  1974. }
  1975. return EXCEPTION_EXECUTE_HANDLER;
  1976. } //ExceptionFilter
  1977. //+-------------------------------------------------------------------------
  1978. //
  1979. // Function: wmain
  1980. //
  1981. // Synopsis: Main entrypoint for the program
  1982. //
  1983. // Arguments: [argc] -- Count of command-line arguments
  1984. // [argv] -- The command-line arguments
  1985. //
  1986. // Returns: Application return code
  1987. //
  1988. //--------------------------------------------------------------------------
  1989. extern "C" int __cdecl wmain( int argc, WCHAR * argv[] )
  1990. {
  1991. // Parse the command-line arguments
  1992. BOOL fWordBreak = FALSE;
  1993. BOOL fQuery = FALSE;
  1994. BOOL fStem = FALSE;
  1995. BOOL fFilter = FALSE;
  1996. BOOL fGetText = TRUE;
  1997. BOOL fShowStatusInfo = TRUE;
  1998. enumFilterLoadMechanism filterLoad = eIPersistFile;
  1999. WCHAR const * pwcModule = 0;
  2000. WCHAR const * pwcInputFile = 0;
  2001. WCHAR const * pwcOutputFile = 0;
  2002. WCHAR *pwcText = 0;
  2003. WCHAR const * pwcCLSID = 0;
  2004. ULONG cwcMaxToken = 100;
  2005. for ( int i = 1; i < argc; i++ )
  2006. {
  2007. if ( L'-' == argv[i][0] || L'/' == argv[i][0] )
  2008. {
  2009. WCHAR wc = towupper( argv[i][1] );
  2010. if ( ':' != argv[i][2] &&
  2011. 'B' != wc &&
  2012. 'D' != wc &&
  2013. 'F' != wc &&
  2014. 'T' != wc &&
  2015. 'N' != wc &&
  2016. 'Q' != wc &&
  2017. 'S' != wc )
  2018. Usage();
  2019. if ( 'C' == wc )
  2020. pwcCLSID = argv[i] + 3;
  2021. else if ( 'D' == wc )
  2022. g_fDumpAsHex = TRUE;
  2023. else if ( 'I' == wc )
  2024. {
  2025. if ( 0 != pwcText )
  2026. Usage();
  2027. pwcInputFile = argv[i] + 3;
  2028. }
  2029. else if ( 'M' == wc )
  2030. pwcModule = argv[i] + 3;
  2031. else if ( 'N' == wc )
  2032. fShowStatusInfo = FALSE;
  2033. else if ( 'O' == wc )
  2034. pwcOutputFile = argv[i] + 3;
  2035. else if ( 'S' == wc )
  2036. fStem = TRUE;
  2037. else if ( 'T' == wc )
  2038. fGetText = FALSE;
  2039. else if ( 'B' == wc )
  2040. fWordBreak = TRUE;
  2041. else if ( 'F' == wc )
  2042. {
  2043. fFilter = TRUE;
  2044. WCHAR wcNext = towupper( argv[i][2] );
  2045. if ( L'S' == wcNext )
  2046. filterLoad = eIPersistStream;
  2047. else if ( L'T' == wcNext )
  2048. filterLoad = eIPersistStorage;
  2049. else if ( 0 != wcNext )
  2050. Usage();
  2051. }
  2052. else if ( 'Q' == wc )
  2053. fQuery = TRUE;
  2054. else if ( 'X' == wc )
  2055. cwcMaxToken = _wtoi( argv[i] + 3 );
  2056. else
  2057. Usage();
  2058. }
  2059. else if ( 0 != pwcText || 0 != pwcInputFile )
  2060. Usage();
  2061. else
  2062. pwcText = argv[i];
  2063. }
  2064. // We have to either wordbreak, stem, or filter
  2065. if ( ( fWordBreak + fStem + fFilter ) != 1 )
  2066. Usage();
  2067. // We need the classid of the wordbreaker or stemmer to load
  2068. if ( ( fWordBreak || fStem ) && ( 0 == pwcCLSID ) )
  2069. Usage();
  2070. // If we're loading by module, we need a CLSID
  2071. if ( ( 0 != pwcModule ) && ( 0 == pwcCLSID ) )
  2072. Usage();
  2073. // Need input text or an input file to wordbreak
  2074. if ( fWordBreak && ( 0 == pwcText ) && ( 0 == pwcInputFile ) )
  2075. Usage();
  2076. // Need input text to stem
  2077. if ( fStem && ( 0 == pwcText ) )
  2078. Usage();
  2079. // Need input file to filter
  2080. if ( fFilter && ( 0 == pwcInputFile ) )
  2081. Usage();
  2082. CLSID clsid;
  2083. if ( 0 != pwcCLSID )
  2084. {
  2085. HRESULT hr = CLSIDFromString( (LPOLESTR) pwcCLSID, &clsid );
  2086. if ( FAILED( hr ) )
  2087. {
  2088. printf( "can't convert CLSID string to a CLSID: %#x\n", hr );
  2089. exit( 1 );
  2090. }
  2091. }
  2092. // Get the full path of the input file, if specified
  2093. WCHAR awcPath[MAX_PATH];
  2094. if ( 0 != pwcInputFile )
  2095. {
  2096. _wfullpath( awcPath, pwcInputFile, MAX_PATH );
  2097. pwcInputFile = awcPath;
  2098. }
  2099. // Get the full path of the output file, if specified, then open it
  2100. WCHAR awcOutputPath[MAX_PATH];
  2101. if ( 0 != pwcOutputFile )
  2102. {
  2103. _wfullpath( awcOutputPath, pwcOutputFile, MAX_PATH );
  2104. pwcOutputFile = awcOutputPath;
  2105. g_fpOut = _wfopen( pwcOutputFile, L"wb" );
  2106. if ( 0 == g_fpOut )
  2107. {
  2108. printf( "unable to open output file '%ws'\n", pwcOutputFile );
  2109. exit( 1 );
  2110. }
  2111. const WCHAR awcUnicodeHeader[] = { 0xfeff, 0x0000 };
  2112. fwprintf( g_fpOut, awcUnicodeHeader );
  2113. }
  2114. // Initialize COM multi-threaded, just like search products do
  2115. HRESULT hr = CoInitializeEx( 0, COINIT_MULTITHREADED );
  2116. if ( FAILED( hr ) )
  2117. {
  2118. printf( "can't initialize com: %#x\n", hr );
  2119. exit( 1 );
  2120. }
  2121. // Load query.dll private exports
  2122. HINSTANCE hQuery = GetQueryFunctions();
  2123. if ( 0 == hQuery )
  2124. {
  2125. printf( "can't load needed functions from query.dll\n" );
  2126. exit( 1 );
  2127. }
  2128. // Do the work
  2129. __try
  2130. {
  2131. if ( fStem )
  2132. Stem( pwcText,
  2133. pwcModule,
  2134. clsid,
  2135. cwcMaxToken );
  2136. if ( fWordBreak )
  2137. WordBreak( fQuery,
  2138. pwcText,
  2139. pwcInputFile,
  2140. pwcModule,
  2141. clsid,
  2142. cwcMaxToken );
  2143. if ( fFilter )
  2144. Filter( pwcInputFile,
  2145. filterLoad,
  2146. pwcModule,
  2147. ( 0 == pwcCLSID ) ? 0 : &clsid,
  2148. fShowStatusInfo,
  2149. fGetText );
  2150. }
  2151. __except( ExceptionFilter( GetExceptionInformation() ) )
  2152. {
  2153. printf( "fatal exception code %#x\n", GetExceptionCode() );
  2154. exit( -1 );
  2155. }
  2156. // Shut down query.dll's filter loading code so it won't AV on exit.
  2157. g_pCIShutdown();
  2158. FreeLibrary( hQuery );
  2159. CoUninitialize();
  2160. if ( 0 != g_fpOut )
  2161. {
  2162. fclose( g_fpOut );
  2163. g_fpOut = 0;
  2164. }
  2165. return 0;
  2166. } //wmain