Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

210 lines
5.7 KiB

  1. /////////////////////////////////////////////////////////////////////////////
  2. //
  3. // Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved.
  4. //
  5. // BSDict.h :
  6. //
  7. // Owner : ChaeSeong Lim, HET MSCH RND (e-mail:[email protected])
  8. //
  9. // History : 1996/Mar
  10. /////////////////////////////////////////////////////////////////////////////
  11. #ifndef __DOUBLEBSDICT_H__
  12. #define __DOUBLEBSDICT_H__
  13. #if !defined (_UNICODE) && !defined (_MBCS)
  14. #error _UNICODE or _MBCS is required.
  15. #endif
  16. // Maximun number of length dictionary in the silsa dict. (currently use 9)
  17. #define MAX_LENGTH_DICT 9 // You should check word hash size in hash.h
  18. // Currently using 20 byte long buffer can
  19. // contain 18 byte(9 chars) length word.
  20. #define MAX_BUFFER_SIZE 2048
  21. /////////////////////////////////////////////////////////////////////////////
  22. // _IndexHeader will used as a index Header
  23. struct _IndexHeader {
  24. // 16 bytes
  25. BYTE wordLen;
  26. BYTE reserved;
  27. UINT indexSize, blockSize;
  28. WORD numOfBlocks;
  29. UINT numberOfWords;
  30. _IndexHeader() {
  31. wordLen = 0; indexSize = blockSize = 0; numOfBlocks = 0;
  32. numberOfWords = 0; reserved = 0;
  33. }
  34. _IndexHeader(BYTE _wordLen, UINT _blockSize, UINT _indexSize) {
  35. wordLen = _wordLen;
  36. indexSize = _indexSize;
  37. //content word size(bytes) + pumsa(2) + index(2) + numOfWords(8);
  38. blockSize = _blockSize;
  39. numOfBlocks = 0;
  40. numberOfWords = 0; reserved = 0;
  41. }
  42. };
  43. #define SILSA_DICT_HEADER_SIZE 1024
  44. //#define COPYRIGHT_STR "Copyright (C) 1996 Hangul Engineering Team. Microsoft Korea(MSCH). All rights reserved.\nVer 2.0 1996/3"
  45. struct _DictHeader {
  46. //char COPYRIGHT_HEADER[150];
  47. WORD numOfLenDict;
  48. DWORD iBlock;
  49. _DictHeader() {
  50. numOfLenDict=0; iBlock=0;
  51. //memset(COPYRIGHT_HEADER, '\0', sizeof(COPYRIGHT_HEADER));
  52. //strcpy(COPYRIGHT_HEADER, COPYRIGHT_STR);
  53. //COPYRIGHT_HEADER[strlen(COPYRIGHT_HEADER)+1] = '\032';
  54. //numOfLenDict=0; iBlock=0;
  55. }
  56. };
  57. //#define DICT_HEADER_SIZE 16
  58. //#define INDEX_HEADER_SIZE 20
  59. /////////////////////////////////////////////////////////////////////////////
  60. /////////////////////////////////////////////////////////////////////////////
  61. /////////////////////////////////////////////////////////////////////////////
  62. // CDoubleBSDict
  63. class CDoubleBSDict {
  64. public:
  65. // Constructor
  66. // m_wordLen denote number of real two byte word. not byte length
  67. CDoubleBSDict() {
  68. m_pIndexHeader = new _IndexHeader;
  69. hIndex = 0;
  70. }
  71. CDoubleBSDict(int wordLen, int blockSize) {
  72. m_pIndexHeader = new _IndexHeader((BYTE)wordLen, blockSize,
  73. (wordLen << 1) + 2 + sizeof(WORD)*2); // index size
  74. hIndex = 0;
  75. }
  76. // Attributes
  77. // Operations
  78. //virtual void Delete(const _TCHAR *key) = 0;
  79. //virtual BOOL Find(const _TCHAR *) = 0;
  80. int GetNumOfBlocks() { return m_pIndexHeader->numOfBlocks; }
  81. int GetBlockSize() { return m_pIndexHeader->blockSize; }
  82. int GetIndexSize() { return m_pIndexHeader->indexSize; }
  83. int GetWordLen() { return m_pIndexHeader->wordLen; }
  84. int GetWordByteLen() { return ((m_pIndexHeader->wordLen)<<1); }
  85. // Implementations
  86. protected:
  87. _IndexHeader *m_pIndexHeader;
  88. HGLOBAL hIndex;
  89. public:
  90. // Destructor
  91. ~CDoubleBSDict() {
  92. if (m_pIndexHeader) delete m_pIndexHeader;
  93. if (hIndex) GlobalFree(hIndex);
  94. }
  95. };
  96. /////////////////////////////////////////////////////////////////////////////
  97. // CDoubleMemDict class
  98. class CDoubleMemBSDict : public CDoubleBSDict {
  99. public:
  100. // Constructor
  101. CDoubleMemBSDict() { hBlocks = 0; }
  102. CDoubleMemBSDict(int wordSize, int blockSize)
  103. : CDoubleBSDict(wordSize, blockSize) { hBlocks = 0; }
  104. // Attributes
  105. // Operations
  106. void BuildFromTextFile(LPCTSTR lpfilename);
  107. DWORD WriteIndex(HANDLE hOut);
  108. DWORD WriteBlocks(HANDLE hOut);
  109. //void Delete(const _TCHAR *key);
  110. // Implementations
  111. protected:
  112. HGLOBAL hBlocks;
  113. HANDLE hInput;
  114. UINT m_maxWordsInBlock;
  115. int m_readPerOnce;
  116. int ReadWord(BYTE *contentWord, int *pumsa);
  117. void ReadBlock(int blockNumber, int *readNum, int *readUniQue);
  118. BOOL AllocIndexNBlock();
  119. private:
  120. public:
  121. // Destructor
  122. ~CDoubleMemBSDict();
  123. };
  124. class BlockCache;
  125. /////////////////////////////////////////////////////////////////////////////
  126. // CDoubleFileBSDict class
  127. class CDoubleFileBSDict : public CDoubleBSDict {
  128. public:
  129. // Constructor
  130. CDoubleFileBSDict() : CDoubleBSDict() { }
  131. CDoubleFileBSDict(int wordSize, int blockSize)
  132. : CDoubleBSDict(wordSize, blockSize) { }
  133. // Attributes
  134. // Operations
  135. void LoadIndex(HANDLE hInput);
  136. int FindWord(HANDLE hDict, DWORD fpBlock, LPCTSTR lpWord);
  137. // Implementations
  138. protected:
  139. void LoadIndexHeader(HANDLE hInput);
  140. int FindIndex(LPCTSTR lpWord, int left, int right, BYTE *pumsa);
  141. int FindBlock(LPCTSTR lpWord, int left, int right);
  142. int Comp(LPCTSTR lpMiddle, LPCTSTR lpWord);
  143. BYTE *lpIndex;
  144. private:
  145. static BYTE lpBuffer[MAX_BUFFER_SIZE];
  146. static BYTE *lpCurIndex;
  147. public:
  148. // Destructor
  149. ~CDoubleFileBSDict() { }
  150. };
  151. /////////////////////////////////////////////////////////////////////////////
  152. // CDoubleFileBSDict class inline fuction
  153. inline
  154. int CDoubleFileBSDict::Comp(LPCTSTR lpMiddle, LPCTSTR lpWord )
  155. {
  156. #ifdef _MBCS
  157. for (int i=0; i<GetWordByteLen(); i++) {
  158. #elif _UNICODE
  159. for (int i=0; i<GetWordLen(); i++) {
  160. #endif
  161. int test = *(lpMiddle+i) - *(lpWord+i);
  162. if (test<0) return -1;
  163. else
  164. if (test>0) return 1;
  165. }
  166. return 0;
  167. }
  168. #endif // !__DOUBLEBSDICT_H__