Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

903 lines
28 KiB

  1. /*************************************************************************
  2. * *
  3. * STOP.C *
  4. * *
  5. * Copyright (C) Microsoft Corporation 1990-1994 *
  6. * All Rights reserved. *
  7. * *
  8. **************************************************************************
  9. * *
  10. * Module Intent *
  11. * Stop list indexing and retrieval *
  12. * *
  13. **************************************************************************
  14. * *
  15. * Written By : Binh Nguyen *
  16. * Current Owner: Binh Nguyen *
  17. * *
  18. *************************************************************************/
  19. #include <mvopsys.h>
  20. #include <orkin.h>
  21. #include <mem.h>
  22. #include <memory.h>
  23. #include <io.h>
  24. #include <mvsearch.h>
  25. #include "common.h"
  26. #ifdef _DEBUG
  27. static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
  28. #endif
  29. #define cbSTOP_BUF ((CB)512) // Number of bytes read at a time
  30. // from the stop-word file.
  31. /*************************************************************************
  32. *
  33. * API FUNCTIONS
  34. * Those functions should be exported in a .DEF file
  35. *************************************************************************/
  36. PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListAddWord(LPSIPB, LST);
  37. PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListIndexLoad (HFPB, LPSIPB, LSZ);
  38. PUBLIC LPSIPB EXPORT_API FAR PASCAL MVStopListInitiate(WORD, PHRESULT);
  39. PUBLIC void EXPORT_API FAR PASCAL MVStopListDispose(LPSIPB);
  40. PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListLoad(HFPB, LPSIPB, LSZ,
  41. BREAKER_FUNC, LPV);
  42. PUBLIC HRESULT EXPORT_API PASCAL FAR MVStopFileBuild (HFPB, LPSIPB, LSZ);
  43. PUBLIC LPCHAIN EXPORT_API FAR PASCAL MVStopListFind(_LPSIPB lpsipb, LST lstWord);
  44. /*************************************************************************
  45. *
  46. * INTERNAL PRIVATE FUNCTIONS
  47. * All of them should be declared near
  48. *************************************************************************/
  49. PRIVATE WORD NEAR PASCAL GetHashKey (WORD, LST);
  50. /*************************************************************************
  51. *
  52. * INTERNAL PUBLIC FUNCTIONS
  53. * All of them should be declared far, and included in some include file
  54. *************************************************************************/
  55. PUBLIC HRESULT FAR PASCAL FStopCallback(LST, LST, LFO, LPV);
  56. /*************************************************************************
  57. * @doc API RETRIEVAL
  58. *
  59. * @func LPSIPB FAR PASCAL | MVStopListInitiate |
  60. * Create and initiate a stop-word information structure
  61. *
  62. * @parm PHRESULT | phr |
  63. * Pointer to error buffer.
  64. *
  65. * @parm WORD | wTabSize |
  66. * Table size in DWORD. The process of stop word checking will
  67. * be faster with larger values of dwTabSize.
  68. *
  69. * @rdesc the pointer to the stop-list structure if succeeded,
  70. * NULL if failed. The error buffer will contain descriptions about
  71. * the cause of the failure
  72. *************************************************************************/
  73. PUBLIC LPSIPB EXPORT_API FAR PASCAL MVStopListInitiate(WORD wTabSize,
  74. PHRESULT phr)
  75. {
  76. _LPSIPB lpsipb;
  77. if (wTabSize < HASH_SIZE)
  78. wTabSize = HASH_SIZE;
  79. /* Allocate a StopInfo structure */
  80. if ((lpsipb = (_LPSIPB)GLOBALLOCKEDSTRUCTMEMALLOC(sizeof(SIPB) +
  81. wTabSize * sizeof(LPB))) == NULL)
  82. {
  83. exit00:
  84. SetErrCode(phr, E_OUTOFMEMORY);
  85. return NULL;
  86. }
  87. lpsipb->HashTab = (LPCHAIN FAR *)((LPB)lpsipb + sizeof(SIPB));
  88. /* Allocate a word block buffer */
  89. if ((lpsipb->lpBlkMgr = BlockInitiate (WORDBUF_SIZE, 0, 0, 0)) == NULL)
  90. {
  91. GlobalLockedStructMemFree((LPV)lpsipb);
  92. goto exit00;
  93. }
  94. lpsipb->wTabSize = wTabSize; /* Size of hash table */
  95. lpsipb->lpfnStopListLookup = MVStopListLookup;
  96. return (LPSIPB)lpsipb;
  97. }
  98. /*************************************************************************
  99. * @doc API RETRIEVAL
  100. *
  101. * @func HRESULT FAR PASCAL | MVStopListAddWord |
  102. * Add a word to a stop list
  103. *
  104. * @parm LPSIPB | lpsipb |
  105. * Pointer to stop-word information structure
  106. *
  107. * @parm LST | lstWord |
  108. * Pointer to 2-byte length preceded Pascal word to be added
  109. * into the stop-word list
  110. *
  111. * @rdesc S_OK if succeeded
  112. *************************************************************************/
  113. PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListAddWord(_LPSIPB lpsipb, LST lstWord)
  114. {
  115. WORD wHash;
  116. LPCHAIN lpChain;
  117. WORD wByteUsed;
  118. // Sanity check
  119. if (lpsipb == NULL || lstWord == NULL)
  120. return(E_INVALIDARG);
  121. /* Look for the word. If it is already there then just
  122. * return S_OK, don't add it into the list
  123. */
  124. if (lpChain = MVStopListFind (lpsipb, lstWord))
  125. {
  126. // Don't add if already there.
  127. lpChain->dwCount++;
  128. return S_OK;
  129. }
  130. wByteUsed = *(LPUW)lstWord + 2;
  131. #ifndef _32BIT
  132. if (lpsipb->cbTextUsed + wByteUsed > MAX_STOPWORD_BUFSIZE) {
  133. /* There are too many stop words */
  134. return ERR_TOOMANYSTOPS;
  135. }
  136. #endif
  137. lpsipb->cbTextUsed += wByteUsed ;
  138. /* Copy the word into the word buffer block */
  139. if ((lpChain = (LPCHAIN)BlockCopy (lpsipb->lpBlkMgr, lstWord, wByteUsed,
  140. sizeof(CHAIN) - 1)) == NULL)
  141. return E_OUTOFMEMORY;
  142. lpChain->dwCount = 0;
  143. /* Compute hash key */
  144. wHash = GetHashKey(lpsipb->wTabSize, lstWord);
  145. /* Add the word to the hash table */
  146. CH_NEXT(lpChain) = lpsipb->HashTab[wHash];
  147. lpsipb->HashTab[wHash] = lpChain;
  148. return S_OK; // Function worked.
  149. }
  150. /*************************************************************************
  151. * @doc API RETRIEVAL
  152. *
  153. * @func void FAR PASCAL | MVStopListDispose |
  154. * Frees memory associated with a stop list.
  155. *
  156. * @parm LPSIPB | lpsipb |
  157. * Pointer to stop-word information structure
  158. *************************************************************************/
  159. PUBLIC void EXPORT_API FAR PASCAL MVStopListDispose (_LPSIPB lpsipb)
  160. {
  161. if (lpsipb == NULL)
  162. return;
  163. /* Free the word buffer */
  164. BlockFree(lpsipb->lpBlkMgr);
  165. /* Free the stop info structure */
  166. GlobalLockedStructMemFree((LPV)lpsipb);
  167. }
  168. /*************************************************************************
  169. * @doc API RETRIEVAL
  170. *
  171. * @func HRESULT FAR PASCAL | MVStopListIndexLoad |
  172. * Read a stop-word list stored in the subfile/dos file.
  173. *
  174. * @parm HFPB | hfpb |
  175. * Handle to input file. Can be mvfs subfile or separate dos file.
  176. *
  177. * @parm LPSIPB | lpsipb |
  178. * Pointer to stop-word information structure
  179. *
  180. * @parm LPIDX | lpidx |
  181. * Pointer to index structure
  182. *
  183. * @parm LSZ | lszWordBreaker |
  184. * Word breaker to be used
  185. *
  186. * @rdesc S_OK if succeeded, other errors if failed.
  187. *************************************************************************/
  188. /*
  189. The strings are stored in the file in a sequence of pascal strings
  190. */
  191. PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListIndexLoad (HFPB hfpbSysFile,
  192. _LPSIPB lpsipb, LSZ lszStopFile)
  193. {
  194. BYTE argbInBuf[CB_STOP_BUF];
  195. FILEOFFSET lfo;
  196. FILEOFFSET foStart;
  197. HFPB hfpbSubFile;
  198. BOOL fOpenedFile;
  199. HRESULT fRet = S_OK;
  200. WORD cbRead;
  201. int fLast;
  202. LPSTOP lpStopHdr;
  203. LPB lpWord;
  204. WORD wOffsetInBuf;
  205. WORD wLen;
  206. ERRB errb;
  207. /* Sanity check */
  208. if (lpsipb == NULL)
  209. return SetErrCode (NULL, E_INVALIDARG);
  210. /* Open the subfile */
  211. if ((fOpenedFile =
  212. FsTypeFromHfpb(hfpbSubFile = hfpbSysFile) != FS_SUBFILE) &&
  213. (hfpbSubFile = FileOpen
  214. (hfpbSysFile, lszStopFile, hfpbSysFile ? FS_SUBFILE : REGULAR_FILE,
  215. READ, &errb)) == NULL)
  216. {
  217. return errb;
  218. }
  219. // If we didn't open the file, we need to find out where the file seek
  220. // pointer is initially so that we only seek relative to that starting
  221. // position (i.e. the caller owns the part of the file that comes before).
  222. foStart = (fOpenedFile ? MakeFo(0,0) :
  223. FileSeek (hfpbSubFile, MakeFo (0, 0), wFSSeekCur, &fRet));
  224. /* Read and check the file validity */
  225. if (FAILED(fRet) ||
  226. (cbRead = (WORD)FileSeekRead
  227. (hfpbSubFile, (LPV)(lpStopHdr = (LPSTOP)argbInBuf),
  228. FoAddFo(foStart, MakeFo(0, 0)), sizeof(STOP_HDR), &fRet))
  229. != sizeof(STOP_HDR))
  230. {
  231. exit01:
  232. // Close file only if we were the one's who opened it.
  233. if (fOpenedFile)
  234. (void)FileClose(hfpbSubFile); // Return value not checked
  235. // because the file is open
  236. // for read-only.
  237. return fRet;
  238. }
  239. /* MAC codes. They will be eliminated through optimization */
  240. lpStopHdr->FileStamp = SWAPWORD(lpStopHdr->FileStamp);
  241. lpStopHdr->version = SWAPWORD(lpStopHdr->version);
  242. lpStopHdr->dwFileSize = SWAPLONG(lpStopHdr->dwFileSize);
  243. if (lpStopHdr->FileStamp != STOP_STAMP ||
  244. lpStopHdr->version != VERCURRENT)
  245. {
  246. fRet = SetErrCode(&errb, E_BADVERSION);
  247. goto exit01;
  248. }
  249. /* Start at the beginning of the buffer */
  250. wOffsetInBuf = 0;
  251. for (lfo = FoAddFo(foStart, MakeFo(STOP_HDR_SIZE, 0));;)
  252. {
  253. LPB lpbCur;
  254. WORD cbReadOurs = 0;
  255. if ((cbRead = (WORD)FileSeekRead(hfpbSubFile,
  256. lpbCur = ((LPB)argbInBuf + wOffsetInBuf), lfo,
  257. CB_STOP_BUF - wOffsetInBuf, &errb)) == cbIO_ERROR)
  258. {
  259. SetErrCode(&errb, fRet = E_FILEREAD);
  260. goto exit01;
  261. }
  262. lfo = FoAddDw(lfo, (DWORD)cbRead);
  263. while (cbRead - cbReadOurs++ >= sizeof(WORD))
  264. {
  265. if (*((WORD UNALIGNED * UNALIGNED)lpbCur) == 0)
  266. {
  267. FILEOFFSET foCur;
  268. // Get our current seek position.
  269. foCur = FileSeek (hfpbSubFile, MakeFo (0, 0), wFSSeekCur, &fRet);
  270. // We already advanced cbReadOurs by one in the loop
  271. // condition; advance it by one more to account for
  272. // the second byte of the NULL word. Then we move
  273. // the seek pointer back by the difference so that we
  274. // don't leave it past the end of our data.
  275. FileSeek (hfpbSubFile,
  276. FoSubFo(foCur, MakeFo(cbRead - ++cbReadOurs, 0)),
  277. wFSSeekSet, &fRet);
  278. ITASSERT(SUCCEEDED(fRet));
  279. cbRead = cbReadOurs;
  280. fLast = TRUE;
  281. }
  282. else
  283. lpbCur++;
  284. }
  285. cbRead += wOffsetInBuf; // Catch what's left from previous scan
  286. wOffsetInBuf = 0;
  287. /* Add the word into the stop word list */
  288. for (lpWord = argbInBuf; cbRead > 0;)
  289. {
  290. /* If the whole word has been read in, just add it to the
  291. stop list, else we have to "reconstruct" it
  292. */
  293. // erinfox: we have to byte-swap on Mac
  294. *(WORD UNALIGNED * UNALIGNED)lpWord = SWAPWORD(*(WORD UNALIGNED * UNALIGNED)lpWord);
  295. wLen = *(LPUW)(lpWord) + 2;
  296. if (wLen <= cbRead)
  297. {
  298. /* Everything fits */
  299. if ((fRet = MVStopListAddWord(lpsipb, lpWord)) != S_OK)
  300. goto exit01;
  301. cbRead -= wLen;
  302. lpWord += wLen; /* Move to next word */
  303. }
  304. else
  305. {
  306. /* Copy the word to the beginning of the buffer */
  307. MEMCPY(argbInBuf, lpWord, cbRead);
  308. wOffsetInBuf = cbRead;
  309. break;
  310. }
  311. }
  312. if (fLast)
  313. break;
  314. }
  315. fRet = S_OK; // Succeeded
  316. goto exit01;
  317. }
  318. /*************************************************************************
  319. * @doc API INDEX RETRIEVAL
  320. *
  321. * @func HRESULT FAR PASCAL | MVStopListLoad |
  322. * Read a stop-word list from an external file. The file must have
  323. * only one stop word per line, or else there is potential loss
  324. * of stop words.
  325. *
  326. * @parm LPSIPB | lpsipb |
  327. * Pointer to stop-word information structure
  328. *
  329. * @parm LSZ | lszStopFile |
  330. * Stop word filename. This is a simple ASCII text file
  331. *
  332. * @parm BREAKER_FUNC | lpfnBreakFunc |
  333. * Word breaker to be used
  334. *
  335. * @parm PHRESULT | phr |
  336. * Pointer to error buffer.
  337. *
  338. * @rdesc S_OK if succeeded, other errors failed.
  339. *************************************************************************/
  340. PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListLoad(HFPB hfpbIn, _LPSIPB lpsipb,
  341. LSZ lszStopFile, BREAKER_FUNC lpfnBreakFunc,
  342. LPCHARTAB lpCharTab)
  343. {
  344. BYTE argbInBuf[cbSTOP_BUF]; // IO buffer
  345. HFPB hfpb; // File handle
  346. BOOL fOpenedFile;
  347. _LPIBI lpibi; // Pointer to internal breaker info
  348. HANDLE hbi; // Handle to internal brekaer info
  349. HRESULT fRet; // Returned value
  350. BRK_PARMS brkParms; // Breaker parameters structure
  351. LPB lpStart; // Beginning of strings to be parsed
  352. LPB lpEnd; // End of strings to be parsed
  353. WORD wStrLength; // Bytes in string
  354. CB cbTobeRead; // Bytes to be read
  355. CB cbRead; // Bytes actually read
  356. int fLast; // TRUE if this is the last read
  357. int fGetWord; // TRUE if we get a whole word
  358. /* Sanity check */
  359. if (lpsipb == NULL || (lszStopFile == NULL && hfpbIn == NULL)
  360. || lpfnBreakFunc == NULL)
  361. return E_INVALIDARG;
  362. if ((fOpenedFile = FsTypeFromHfpb(hfpb = hfpbIn) != FS_SUBFILE) &&
  363. (hfpb = (HANDLE)FileOpen
  364. (hfpbIn, lszStopFile, hfpbIn ? FS_SUBFILE : REGULAR_FILE,
  365. READ, &fRet)) == 0)
  366. {
  367. return (fRet);
  368. }
  369. /* Allocate a breaker info block */
  370. if ((hbi = _GLOBALALLOC(DLLGMEM_ZEROINIT, (LCB)sizeof(IBI))) == NULL)
  371. {
  372. return E_OUTOFMEMORY;
  373. }
  374. lpibi = (_LPIBI)_GLOBALLOCK(hbi);
  375. /* Initialize variables */
  376. brkParms.lcbBufOffset = 0L;
  377. brkParms.lpInternalBreakInfo = lpibi;
  378. brkParms.lpvUser = lpsipb;
  379. brkParms.lpfnOutWord = (FWORDCB)FStopCallback;
  380. brkParms.lpStopInfoBlock = NULL;
  381. brkParms.lpCharTab = lpCharTab;
  382. cbTobeRead = cbSTOP_BUF; // Read in a buffer whole
  383. lpStart = lpEnd = (LPB)argbInBuf; // Start & End of string
  384. fGetWord = FALSE; // We didn't get any word yet
  385. wStrLength = 0;
  386. /* The idea is to break the file into sequences of lines, and pass
  387. * each line to the word breaker. The assumption made is that we
  388. * should only have one word per line, since various type breakers
  389. * can only handle one word a type.
  390. */
  391. for (;;)
  392. {
  393. cbRead = (WORD)FileRead(hfpb, lpEnd, cbTobeRead, &fRet);
  394. if (FAILED(fRet))
  395. {
  396. exit01:
  397. /* Free breaker info block */
  398. _GLOBALUNLOCK(hbi);
  399. _GLOBALFREE(hbi);
  400. /* Close the file */
  401. if (fOpenedFile)
  402. FileClose(hfpb);
  403. return fRet;
  404. }
  405. else
  406. fLast = (cbRead != cbTobeRead);
  407. lpEnd = lpStart;
  408. cbRead += wStrLength; // Get what left in buffer
  409. wStrLength = 0;
  410. while (cbRead != (CB)-1)
  411. {
  412. /* Break the buffer into lines */
  413. if (*lpEnd == '\r' || *lpEnd == '\n' || !cbRead)
  414. {
  415. if (wStrLength)
  416. {
  417. /* Process the word we got */
  418. brkParms.lpbBuf = lpStart;
  419. brkParms.cbBufCount = wStrLength;
  420. if ((fRet = (*lpfnBreakFunc)((LPBRK_PARMS)&brkParms))
  421. != S_OK)
  422. goto exit01;
  423. /* Flush the breaker buffer */
  424. brkParms.lpbBuf = NULL;
  425. brkParms.cbBufCount = 0;
  426. if ((fRet = (*lpfnBreakFunc)((LPBRK_PARMS)&brkParms))
  427. != S_OK)
  428. goto exit01;
  429. wStrLength = 0;
  430. }
  431. }
  432. else
  433. {
  434. /* Update the pointer to the new word */
  435. if (wStrLength == 0)
  436. lpStart = lpEnd;
  437. wStrLength++; // Increase string's length
  438. }
  439. cbRead--;
  440. lpEnd++;
  441. }
  442. if (fLast)
  443. break;
  444. /* Now copy the partial string to the beginning of the buffer */
  445. MEMCPY(argbInBuf, lpStart, wStrLength);
  446. lpEnd = (lpStart = argbInBuf) + wStrLength;
  447. cbTobeRead = cbSTOP_BUF - wStrLength; // Read in a buffer whole
  448. }
  449. if (wStrLength)
  450. {
  451. /* Flush the breaker buffer */
  452. brkParms.lpbBuf = NULL;
  453. brkParms.cbBufCount = 0;
  454. if ((fRet = (*lpfnBreakFunc)((LPBRK_PARMS)&brkParms)) != S_OK)
  455. goto exit01;
  456. }
  457. fRet = S_OK; // Succeeded
  458. goto exit01;
  459. }
  460. /*************************************************************************
  461. * @doc INTERNAL
  462. *
  463. * @func WORD NEAR PASCAL | GetHashKey |
  464. * Compute the hash key of a string. This key is used for indexing
  465. * into the stop word hash table
  466. *
  467. * @parm LST | lstWord |
  468. * Pointer to a 2-byte length preceded Pascal-type string
  469. *
  470. * @rdesc
  471. * Return the index into the stop words hash table
  472. *************************************************************************/
  473. PRIVATE WORD NEAR PASCAL GetHashKey (WORD hashSize, LST lstWord)
  474. {
  475. register unsigned int wHash;
  476. register unsigned int nLength;
  477. wHash = 0;
  478. nLength = *(LPUW)lstWord;
  479. lstWord += sizeof(WORD);
  480. for (; nLength; nLength--)
  481. {
  482. wHash = (wHash << 1) | (wHash >> 15);
  483. wHash ^= *lstWord++;
  484. }
  485. wHash %= hashSize;
  486. return ((WORD)wHash);
  487. }
  488. /*************************************************************************
  489. * @doc API RETRIEVAL INDEX
  490. *
  491. * @func LPCHAIN FAR PASCAL | MVStopListFind |
  492. * This looks for a word (lstWord) in a stop-word (lpsipb)
  493. *
  494. * @parm LPSIPB | lpsipb |
  495. * Pointer to stop-word list structure
  496. *
  497. * @parm LST | lstWord |
  498. * Pointer to string to be looked for
  499. *
  500. * @rdesc Pointer to the node if found, NULL otherwise
  501. *************************************************************************/
  502. PUBLIC LPCHAIN EXPORT_API FAR PASCAL MVStopListFind(_LPSIPB lpsipb, LST lstWord)
  503. {
  504. WORD wHash; // Hash key
  505. LPCHAIN lpChain; // Pointer to the word chain
  506. // Sanity check
  507. if (lpsipb == NULL || lstWord == NULL)
  508. return(NULL);
  509. /* Compute hash key */
  510. wHash = GetHashKey(lpsipb->wTabSize, lstWord);
  511. lpChain = lpsipb->HashTab[wHash];
  512. while (lpChain)
  513. {
  514. if (!StringDiff2 (&CH_WORD(lpChain), lstWord))
  515. return (lpChain);
  516. lpChain = CH_NEXT(lpChain);
  517. }
  518. return (NULL);
  519. }
  520. /*************************************************************************
  521. * @doc API RETRIEVAL INDEX
  522. *
  523. * @func HRESULT FAR PASCAL | MVStopListLookup |
  524. * This looks for a word (lstWord) in a stop-word (lpsipb)
  525. *
  526. * @parm LPSIPB | lpsipb |
  527. * Pointer to stop-word list structure
  528. *
  529. * @parm LST | lstWord |
  530. * Pointer to string to be looked for
  531. *
  532. * @rdesc S_OK if found, E_FAIL if not, or other errors
  533. *************************************************************************/
  534. PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListLookup(_LPSIPB lpsipb, LST lstWord)
  535. {
  536. WORD wHash; // Hash key
  537. LPCHAIN lpChain; // Pointer to the word chain
  538. // Sanity check
  539. if (lpsipb == NULL || lstWord == NULL)
  540. return(E_INVALIDARG);
  541. /* Compute hash key */
  542. wHash = GetHashKey(lpsipb->wTabSize, lstWord);
  543. lpChain = lpsipb->HashTab[wHash];
  544. while (lpChain)
  545. {
  546. if (!StringDiff2 (&CH_WORD(lpChain), lstWord))
  547. return (S_OK);
  548. lpChain = CH_NEXT(lpChain);
  549. }
  550. return (E_FAIL);
  551. }
  552. /*************************************************************************
  553. * @doc API INDEX
  554. *
  555. * @func HRESULT PASCAL FAR | MVStopFileBuild |
  556. * Incorporate the stop word list into the system file
  557. *
  558. * @parm HFPB | hpfbSysFile |
  559. * If non-zero, handle to an opened system file.
  560. *
  561. * @parm LPSIPB | lpsipb |
  562. * Pointer to stop-word information structure
  563. *
  564. * @parm LSZ | lszFilename |
  565. * If hpfbSysFile is non-zero, this is the name of the stop's subfile
  566. * else this is a regular DOS file
  567. *
  568. * @rdesc S_OK if succeeded, E_FAIL if tehre is nothing to build
  569. * or other errors
  570. *************************************************************************/
  571. PUBLIC HRESULT EXPORT_API PASCAL FAR MVStopFileBuild (HFPB hfpbSysFile,
  572. _LPSIPB lpsipb, LSZ lszFilename)
  573. {
  574. HFPB hfpbStop; // Pointer to final index file info.
  575. HRESULT fRet = S_OK;
  576. STOP_HDR Stop_hdr;
  577. HFPB hfpb = 0;
  578. BOOL fCreatedFile;
  579. BYTE Dummy[STOP_HDR_SIZE]; // Dummy buffer to write 0
  580. int i;
  581. LPCHAIN lpChain;
  582. LST lstWord;
  583. WORD wLen;
  584. CB cbByteLeft;
  585. GHANDLE hBuf;
  586. LPB lpbBuf;
  587. LPB lpbStart;
  588. LPB lpbLimit;
  589. ERRB errb;
  590. FILEOFFSET fo;
  591. FILEOFFSET foStart;
  592. /* Sanity check */
  593. if (lpsipb == NULL || (lszFilename == NULL && hfpbSysFile == NULL))
  594. return E_INVALIDARG;
  595. if (lpsipb->cbTextUsed == 0)
  596. return E_FAIL; /* Nothing to build */
  597. if ((fCreatedFile =
  598. FsTypeFromHfpb(hfpbStop = hfpbSysFile) != FS_SUBFILE) &&
  599. (hfpbStop = FileCreate(hfpbSysFile, lszFilename,
  600. hfpbSysFile ? FS_SUBFILE: REGULAR_FILE, &errb)) == 0)
  601. return errb;
  602. // If we didn't open the file, we need to find out where the file seek
  603. // pointer is initially so that we only seek relative to that starting
  604. // position (i.e. the caller owns the part of the file that comes before).
  605. foStart = (fCreatedFile ? MakeFo(0,0) :
  606. FileSeek (hfpbStop, MakeFo (0, 0), wFSSeekCur, &fRet));
  607. if (FAILED(fRet))
  608. goto exit01;
  609. /* Write out the stop file header */
  610. Stop_hdr.FileStamp = STOP_STAMP;
  611. Stop_hdr.version = VERCURRENT;
  612. Stop_hdr.dwFileSize = lpsipb->cbTextUsed;
  613. MEMSET(Dummy, 0, STOP_HDR_SIZE);
  614. /* Write all zeroes to the header area, which is larger than the
  615. * STOP_HDR structure.
  616. */
  617. if (FileSeekWrite (hfpbStop, Dummy, FoAddFo(foStart, MakeFo (0, 0)),
  618. STOP_HDR_SIZE, &errb) != STOP_HDR_SIZE)
  619. {
  620. fRet = errb;
  621. exit01:
  622. if (fCreatedFile)
  623. FileClose (hfpbStop);
  624. return(fRet);
  625. }
  626. if (FileSeekWrite (hfpbStop, &Stop_hdr, FoAddFo(foStart, MakeFo (0, 0)),
  627. sizeof (STOP_HDR), &errb) != sizeof (STOP_HDR))
  628. {
  629. fRet = errb;
  630. goto exit01;
  631. }
  632. /* Allocate a buffer to flush the data */
  633. if ((hBuf = _GLOBALALLOC (DLLGMEM, cbByteLeft = CB_HUGE_BUF)) == NULL)
  634. {
  635. SetErrCode (&errb, fRet = E_OUTOFMEMORY);
  636. goto exit01;
  637. }
  638. lpbBuf = lpbStart = (LPB)_GLOBALLOCK(hBuf);
  639. lpbLimit = lpbStart + CB_HUGE_BUF - CB_MAX_WORD_LEN;
  640. /* Seek the file to the correct offset */
  641. fo = FoAddFo(foStart, MakeFo (STOP_HDR_SIZE, 0));
  642. if (!FoEquals (FileSeek (hfpbStop, fo, 0, &errb), fo))
  643. {
  644. fRet = E_FILESEEK;
  645. exit02:
  646. _GLOBALUNLOCK(hBuf);
  647. _GLOBALFREE(hBuf);
  648. goto exit01;
  649. }
  650. /* Write out the buffer */
  651. for (i = lpsipb->wTabSize - 1; i >= 0; i--)
  652. {
  653. for (lpChain = lpsipb->HashTab[i]; lpChain;
  654. lpChain = CH_NEXT(lpChain))
  655. {
  656. lstWord = &CH_WORD (lpChain);
  657. MEMCPY (lpbBuf, lstWord, wLen = *(WORD FAR *)lstWord + 2);
  658. lpbBuf += wLen;
  659. if (lpbBuf >= lpbLimit)
  660. {
  661. /* No more room, just flush the buffer */
  662. FileWrite(hfpbStop, lpbStart, (DWORD)(lpbBuf - lpbStart), &errb);
  663. if ((fRet = errb) != S_OK)
  664. goto exit02;
  665. lpbBuf = lpbStart;
  666. }
  667. }
  668. }
  669. /* Flush the buffer */
  670. FileWrite (hfpbStop, lpbStart, (DWORD)(lpbBuf - lpbStart), &errb);
  671. if ((fRet = errb) == S_OK)
  672. {
  673. /* Write a trailing 0 word (i.e. a NULL st) to mark
  674. * the end of the word list.
  675. */
  676. *((WORD *)lpbStart) = 0;
  677. FileWrite (hfpbStop, lpbStart, sizeof(WORD), &errb);
  678. fRet = errb;
  679. }
  680. goto exit02;
  681. }
  682. PUBLIC HRESULT FAR PASCAL FStopCallback(
  683. LST lstRawWord,
  684. LST lstNormWord,
  685. LFO lfoWordOffset,
  686. _LPSIPB lpsipb)
  687. {
  688. return MVStopListAddWord(lpsipb, lstNormWord);
  689. }
  690. /*************************************************************************
  691. * @doc API RETRIEVAL
  692. *
  693. * @func HRESULT FAR PASCAL | MVStopListEnumWords |
  694. * Enumerate the words in a stop list, getting a pointer to each.
  695. *
  696. * @parm LPSIPB | lpsipb |
  697. * Pointer to stop-word information structure
  698. *
  699. * @parm LST* | plstWord |
  700. * Indirect Pointer to 2-byte length preceded Pascal word that is
  701. * the next word identified by *pdwWordInfo and *ppvWordInfo.
  702. *
  703. * @parm LONG* | plWordInfo |
  704. * Pointer to information used to determine what the next word is
  705. * in the stop word list. Passing -1 along with NULL for *ppvWordInfo
  706. * means start at the beginning. On exit, this contains an appropriate
  707. * value that can be passed in again to get the next word, provided
  708. * that no intervening calls have been made to MVStopListAddWord.
  709. *
  710. * @parm LPVOID* | ppvWordInfo |
  711. * Indirect pointer to information used to determine what the next word is
  712. * in the stop word list. Passing NULL along with -1 for *plWordInfo
  713. * means start at the beginning. On exit, this contains an appropriate
  714. * value that can be passed in again to get the next word, provided
  715. * that no intervening calls have been made to MVStopListAddWord.
  716. *
  717. * @rdesc S_OK if succeeded
  718. * @rdesc E_OUTOFRANGE if there are no more words in the stop list.
  719. *************************************************************************/
  720. PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListEnumWords(_LPSIPB lpsipb,
  721. LST *plstWord, LONG *plWordInfo, LPVOID *ppvWordInfo)
  722. {
  723. LPCHAIN lpchain = NULL;
  724. LONG iHashChain;
  725. if (lpsipb == NULL || plstWord == NULL ||
  726. plWordInfo == NULL || ppvWordInfo == NULL)
  727. return (SetErrReturn(E_POINTER));
  728. iHashChain = *plWordInfo;
  729. // If after the last call to us, we were left sitting on a hash chain
  730. // element, just advance to the next one (which may be NULL).
  731. if ((lpchain = (LPCHAIN) *ppvWordInfo) != NULL)
  732. lpchain = CH_NEXT(lpchain);
  733. // If we're now sitting on a NULL hash chain (initial condition or we
  734. // reached the end of a previous chain), we need to find the beginning
  735. // of the next chain in the hash table.
  736. while (iHashChain < lpsipb->wTabSize - 1 && lpchain == NULL)
  737. lpchain = lpsipb->HashTab[++iHashChain];
  738. if (iHashChain >= lpsipb->wTabSize - 1 && lpchain == NULL)
  739. return (SetErrReturn(E_OUTOFRANGE));
  740. *plstWord = &CH_WORD(lpchain);
  741. *ppvWordInfo = (LPVOID)lpchain;
  742. *plWordInfo = iHashChain;
  743. return (S_OK);
  744. }
  745. /*************************************************************************
  746. * @doc API RETRIEVAL
  747. *
  748. * @func HRESULT FAR PASCAL | MVStopListFindWordPtr |
  749. * Find a word in the stop list and return a pointer to it.
  750. *
  751. * @parm LPSIPB | lpsipb |
  752. * Pointer to stop-word information structure
  753. *
  754. * @parm LST | lstWord |
  755. * Pointer to a 2-byte length preceded Pascal
  756. * string containing the word to find.
  757. *
  758. * @parm LST* | plstWordInList |
  759. * On exit, indirect pointer to 2-byte length preceded Pascal
  760. * string for the word that was found.
  761. *
  762. * @rdesc S_OK if succeeded
  763. * @rdesc E_NOTFOUND if the word isn't in the stop list
  764. *************************************************************************/
  765. PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListFindWordPtr(_LPSIPB lpsipb,
  766. LST lstWord, LST *plstWordInList)
  767. {
  768. HRESULT hr = S_OK;
  769. LPCHAIN lpchain;
  770. if ((lpchain = MVStopListFind(lpsipb, lstWord)) != NULL)
  771. *(LST UNALIGNED * UNALIGNED)plstWordInList = &CH_WORD(lpchain);
  772. else
  773. hr = E_NOTFOUND;
  774. return (hr);
  775. }