Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

334 lines
12 KiB

  1. /******************************************************************************\
  2. * FILE: locale.h
  3. *
  4. * Public structures and functions library that are used to access the
  5. * localization information.
  6. *
  7. * There are two major pieces of this. The first deals with stuff available
  8. * to the runtime of shipped products (e.g. the recognizer). The second is
  9. * stuff needed at train time that we do not want in the shipped product
  10. * (usually for size reasons).
  11. \******************************************************************************/
  12. #if !defined (__HWX_LOCALE__)
  13. #define __HWX_LOCALE__
  14. #ifdef __cplusplus
  15. extern "C" {
  16. #endif
  17. /******************************************************************************\
  18. * Stuff for the product runtime, also used by all the other code.
  19. \******************************************************************************/
  20. //
  21. // Constants
  22. //
  23. // Masks to get at baseline and height values from a BLINE_HEIGHT value.
  24. #define LOCBH_BASE_MASK ((BYTE)0x0F)
  25. #define LOCBH_HEIGHT_MASK ((BYTE)0xF0)
  26. //
  27. // Structures and types
  28. //
  29. // define the types needed for ClassBigrams and BaseLine Height
  30. typedef unsigned char CODEPOINT_CLASS;
  31. typedef unsigned char BLINE_HEIGHT;
  32. // CodePointClass Header describes:
  33. // - how the CodePointClass codes for a certain subrange are stored (Array vs ExceptionList)
  34. // - The size of the Array/(Exception Lists)
  35. // - Offset to the Array/the 1st Exception List
  36. typedef struct tagCODEPOINT_CLASS_HEADER
  37. {
  38. WORD iFlags; // Bit15 indicates the format of the data
  39. // 0:Array (Full array is supplied)
  40. // 1:Exception (A default and exception list(s) are supplied
  41. // if Bit0=0
  42. // Bits 0-14 Number of Array Entries (up 32768)
  43. // if Bit15=1
  44. // Bits 8-14 Number of Exceptions
  45. // Bits 1-8 Default BigramClass code
  46. WORD iOffset; // Offset in bytes in the ArrayBuffer (Bit15=0) or ExceptionsBuffer (Bit0=1)
  47. }CODEPOINT_CLASS_HEADER;
  48. // BLHeight Header describes:
  49. // - how the BLHeight codes for a certain subrange are stored (Array vs ExceptionList)
  50. // - The size of the Array/(Exception Lists)
  51. // - Offset to the Array/the 1st Exception List
  52. typedef struct tagBLINE_HEIGHT_HEADER
  53. {
  54. WORD iFlags; // Bit15 indicates the format of the data
  55. // 0:Array (Full array is supplied)
  56. // 1:Exception (A default and exception list(s) are supplied
  57. // if Bit15=0
  58. // Bits 1-14 Number of Array Entries (up 32768)
  59. // if Bit15=1
  60. // Bits 8-14 Number of exceptions
  61. // Bits 1-8 Default BLHeight code
  62. WORD iOffset; // Offset in bytes in the ArrayBuffer (Bit15=0) or ExceptionsBuffer (Bit0=1)
  63. }BLINE_HEIGHT_HEADER;
  64. // CodePointClass Exception describes the Class Bigram exception structure
  65. typedef struct tagCODEPOINT_CLASS_EXCEPTION
  66. {
  67. CODEPOINT_CLASS clCode; // The CodePointClass Code for this list
  68. BYTE cNumEntries; // Number of entries in the exception list
  69. wchar_t wDenseCode[1]; // an Array of CodePoint Indices in the subrange
  70. }CODEPOINT_CLASS_EXCEPTION;
  71. // BLineHgt Exception describes the Class Bigram exception structure
  72. typedef struct tagBLINE_HEIGHT_EXCEPTION
  73. {
  74. BLINE_HEIGHT blhCode; // The BLineHgt Code for this list
  75. BYTE cNumEntries; // Number of entries in the exception list
  76. wchar_t wDenseCode[1]; // an Array of CodePoint Indices in the subrange
  77. }BLINE_HEIGHT_EXCEPTION;
  78. // Range specification for ALC subranges of Dense coding of the character set.
  79. // Giving both the first code and the number of codes is redundent, but we
  80. // would waste the space anyway to keep DWORD alignment, and it makes some
  81. // coding easier. Note that the ALC bits for the first range are zero because
  82. // you need to use the ALC table for individual ALC values for each code in
  83. // the range.
  84. typedef struct tagLOCRUN_ALC_SUBRANGE {
  85. WORD iFirstCode; // First code in range.
  86. WORD cCodesInRange; // Number of codes in this range.
  87. ALC alcRange; // The ALC bits for all code points in this range.
  88. CODEPOINT_CLASS_HEADER clHeader; // Class Bigram Header
  89. BLINE_HEIGHT_HEADER blhHeader; // BaseLine Header
  90. } LOCRUN_ALC_SUBRANGE;
  91. // Defines to make it easy to have pointers to folding sets.
  92. #define LOCRUN_FOLD_MAX_ALTERNATES 8 // Max alternates in the folding table
  93. typedef WORD LOCRUN_FOLDING_SET[LOCRUN_FOLD_MAX_ALTERNATES];
  94. // Structure giving access to a loaded copy of the localization runtime tables.
  95. // NOTE: The arrays pointed to by pClassExcpts and pBLineHgtExcpts contain
  96. // word data that is only byte aligned, and can cause data misalignment faults
  97. // in CE. Search for the string "UNALIGNED" in locrun.c to see how this is worked
  98. // around.
  99. typedef struct tagLOCRUN_INFO {
  100. DWORD adwSignature [3]; // A signature computed from the loc info
  101. // DWORD0: Date/Time Stamp (time_t)
  102. // DWORD1: XORING ALC values for all CPs
  103. // DWORD2 HIWORD: XORING ALL Valid CPs
  104. // LOWORD: XORING ALL BasLn,Hgt Info
  105. WORD cCodePoints; // Number of supported code points.
  106. BYTE cALCSubranges; // Number of subranges defined
  107. // across Dense coding
  108. BYTE cFoldingSets; // Number of folding sets defined
  109. WORD cClassesArraySize; // size in bytes of classes arrays
  110. WORD cClassesExcptSize; // size in bytes of classes exception lists
  111. WORD cBLineHgtArraySize; // size in bytes of BLineHgt arrays
  112. WORD cBLineHgtExcptSize; // size in bytes of BLineHgt exception lists
  113. wchar_t *pDense2Unicode; // Map from Dense coding to Unicode.
  114. LOCRUN_ALC_SUBRANGE *pALCSubranges; // Subranges of Dense coding, and
  115. // their ALC values.
  116. ALC *pSubrange0ALC; // The ALC values for the first
  117. // subrange
  118. LOCRUN_FOLDING_SET *pFoldingSets; // List of folding sets
  119. ALC *pFoldingSetsALC; // The merged ALCs for the folded
  120. // characters.
  121. CODEPOINT_CLASS *pClasses; // Array of Codepoint classes for all subranges
  122. BYTE *pClassExcpts; // classes Exception lists for all subranges
  123. BLINE_HEIGHT *pBLineHgtCodes; // Array of BLineHgt codes for all subranges
  124. BYTE *pBLineHgtExcpts; // BLineHgt Exception lists for all subranges
  125. void *pLoadInfo1; // Handles needed to unload the data
  126. void *pLoadInfo2;
  127. void *pLoadInfo3;
  128. } LOCRUN_INFO;
  129. //
  130. // Macros to access the runtime localization information
  131. //
  132. // Is value a valid Dense code
  133. #define LocRunIsDenseCode(pLocRunInfo,dch) \
  134. ((dch) < (pLocRunInfo)->cCodePoints)
  135. // Is value a valid folded code. Folded codes are placed directly after
  136. // the Dense codes.
  137. #define LocRunIsFoldedCode(pLocRunInfo,dch) ( \
  138. ((pLocRunInfo)->cCodePoints <= (dch)) && \
  139. ((dch) < ((pLocRunInfo)->cCodePoints + (pLocRunInfo)->cFoldingSets)) \
  140. )
  141. // Convert Dense code to Unicode.
  142. #define LocRunDense2Unicode(pLocRunInfo,dch) \
  143. ((pLocRunInfo)->pDense2Unicode[dch])
  144. // Get pointer to folding set for a folded code.
  145. #define LocRunFolded2FoldingSet(pLocRunInfo,fch) ( \
  146. (wchar_t *)(pLocRunInfo)->pFoldingSets[(fch) - (pLocRunInfo)->cCodePoints] \
  147. )
  148. // Convert Folded code to Dense code.
  149. #define LocRunFolded2Dense(pLocRunInfo,fch) ( \
  150. (pLocRunInfo)->pFoldingSets[(fch) - (pLocRunInfo)->cCodePoints][0] \
  151. )
  152. // Get ALC from either dense or folded code.
  153. #define LocRun2ALC(pLocRunInfo, dch) ( \
  154. LocRunIsFoldedCode(pLocRunInfo,dch) \
  155. ? LocRunFolded2ALC(pLocRunInfo, dch) \
  156. : LocRunDense2ALC(pLocRunInfo, dch) \
  157. )
  158. // Value returned when GetDensecodeClass is unable to determine the class
  159. #define LOC_RUN_NO_CLASS 0xFE
  160. // Value returned when LocRunDense2BLineHgt is unable to determine the BlineHgt
  161. #define LOC_RUN_NO_BLINEHGT 0xFE
  162. // Value returned by LocRunUnicode2Dense and LocTrainUnicode2Dense when there is
  163. // no dense code for the supplied Unicode character.
  164. // JRB: Should really rename this LOC_RUN_NO_DENSE_CODE
  165. #define LOC_TRAIN_NO_DENSE_CODE L'\xFFFF'
  166. //
  167. //Functions to access the runtime localization information
  168. //
  169. // Load runtime localization information from a file.
  170. BOOL LocRunLoadFile(LOCRUN_INFO *pLocRunInfo, wchar_t *pPath);
  171. // Unload runtime localization information that was loaded from a file.
  172. BOOL LocRunUnloadFile(LOCRUN_INFO *pLocRunInfo);
  173. // Load runtime localization information from a resource.
  174. // Note, don't need to unload resources.
  175. BOOL LocRunLoadRes(
  176. LOCRUN_INFO *pLocRunInfo,
  177. HINSTANCE hInst,
  178. int nResID,
  179. int nType
  180. );
  181. // Load runtime localization information from an image already loaded into
  182. // memory.
  183. BOOL LocRunLoadPointer(LOCRUN_INFO *pLocRunInfo, void *pData);
  184. // Write a properly formated binary file containing the runtime localization
  185. // information.
  186. #ifndef HWX_PRODUCT // Hide this when compiling product
  187. BOOL LocRunWriteFile(LOCRUN_INFO *pLocRunInfo, FILE *pFile);
  188. #endif
  189. // Get ALC value for a Dense coded character
  190. ALC LocRunDense2ALC(LOCRUN_INFO *pLocRunInfo, wchar_t dch);
  191. // Get ALC value for a folded character
  192. ALC LocRunFolded2ALC(LOCRUN_INFO *pLocRunInfo, wchar_t dch);
  193. // Convert Dense code to Folded code.
  194. wchar_t LocRunDense2Folded(LOCRUN_INFO *pLocRunInfo, wchar_t dch);
  195. // Convert from Dense coding to Unicode. If no dense code for the Unicode
  196. // value, 0xFFFF is returned.
  197. // WARNING: this is expensive. For code outside the runtime recognizer, use
  198. // the LocTranUnicode2Dense function. For the recognizer, you have to use
  199. // this, but use it as little as possable.
  200. wchar_t LocRunUnicode2Dense(LOCRUN_INFO *pLocRunInfo, wchar_t wch);
  201. // Get the BLineHgt code for a specific dense code
  202. BLINE_HEIGHT
  203. LocRunDense2BLineHgt(LOCRUN_INFO *pLocRunInfo, wchar_t dch);
  204. // Convert a dense coded character to its character class.
  205. CODEPOINT_CLASS
  206. LocRunDensecode2Class(LOCRUN_INFO *pLocRunInfo, wchar_t dch);
  207. /******************************************************************************\
  208. * Stuff for the training programs, not to be used by product code.
  209. \******************************************************************************/
  210. #ifndef HWX_PRODUCT // Hide this when compiling product
  211. //
  212. // Structures and types
  213. //
  214. // Information on min and max stroke counts for a code point.
  215. typedef struct tagSTROKE_COUNT_INFO {
  216. BYTE minStrokes; // Min legal strokes for char.
  217. BYTE maxStrokes; // Max legal strokes for char.
  218. } STROKE_COUNT_INFO;
  219. // Structure giving access to a loaded copy of the localization training time
  220. // tables.
  221. typedef struct tagLOCTRAIN_INFO {
  222. // Convsion from unicode to dense.
  223. WORD cCodePoints; // Number of supported code points.
  224. wchar_t *pUnicode2Dense; // Map from Unicode to Dense coding.
  225. // Stroke count info per character. Indexed by dense code.
  226. WORD cStrokeCountInfo; // Number of entries array.
  227. STROKE_COUNT_INFO *pStrokeCountInfo; // Stoke count info array.
  228. void *pLoadInfo1; // Handles needed to unload the
  229. void *pLoadInfo2; // data.
  230. void *pLoadInfo3;
  231. } LOCTRAIN_INFO;
  232. //
  233. // Constants
  234. //
  235. // Values to set min and max stroke counts to when they are not known.
  236. #define LOC_TRAIN_UNKNOWN_MIN_STROKE_COUNT 0x00
  237. #define LOC_TRAIN_UNKNOWN_MAX_STROKE_COUNT 0xFF
  238. //
  239. // Macros to access the train time localization information
  240. //
  241. // Convert Unicode to Dense code.
  242. #define LocTrainUnicode2Dense(pLocTrainInfo,wch) \
  243. ((pLocTrainInfo)->pUnicode2Dense[wch])
  244. //
  245. //Functions to access the train time localization information
  246. //
  247. // Load train time localization information from a file.
  248. BOOL LocTrainLoadFile(LOCRUN_INFO *pLocRunInfo, LOCTRAIN_INFO *pLocTrainInfo, wchar_t *pPath);
  249. // Unload train time localization information that was loaded from a file.
  250. BOOL LocTrainUnloadFile(LOCTRAIN_INFO *pLocTrainInfo);
  251. // Load train time localization information from a resource.
  252. BOOL LocTrainLoadRes(
  253. LOCRUN_INFO *pLocRunInfo, LOCTRAIN_INFO *pLocTrainInfo, HINSTANCE hInst, int nResID, int nType
  254. );
  255. // Load train time localization information from an image already loaded into
  256. // memory.
  257. BOOL LocTrainLoadPointer(LOCRUN_INFO *pLocRunInfo, LOCTRAIN_INFO *pLocTrainInfo, void *pData);
  258. // Write a properly formated binary file containing the train time
  259. // localization information.
  260. BOOL LocTrainWriteFile(LOCRUN_INFO *pLocRunInfo, LOCTRAIN_INFO *pLocTrainInfo, FILE *pFile);
  261. // Check if valid stroke count for character. Takes dense code.
  262. BOOL LocTrainValidStrokeCount(
  263. LOCTRAIN_INFO *pLocTrainInfo, wchar_t dch, int cStrokes
  264. );
  265. #endif
  266. #ifdef __cplusplus
  267. }
  268. #endif
  269. #endif