Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

170 lines
5.8 KiB

  1. /******************************************************************************\
  2. * FILE: bigram.h
  3. *
  4. * Public structures and functions library that are used to access the
  5. * bigram information.
  6. *
  7. * Note that the code to create the binary file is in mktable, not in the
  8. * common library.
  9. \******************************************************************************/
  10. #ifndef __INCLUDE_BIGRAM
  11. #define __INCLUDE_BIGRAM
  12. #ifdef __cplusplus
  13. extern "C" {
  14. #endif
  15. /************************************************************************************************\
  16. * Public interface to bigram data.
  17. \************************************************************************************************/
  18. //
  19. // Structures and types
  20. //
  21. // Structure to hold second character and probability for bigram.
  22. typedef struct tagBIGRAM_CHAR_PROB {
  23. wchar_t dch; // Second char of bigram, in dense coding.
  24. WORD prob; // Probability as a score (-10 * log2(prob)).
  25. } BIGRAM_CHAR_PROB;
  26. // Structure giving access to a loaded copy of the bigram tables.
  27. typedef struct tagBIGRAM_INFO {
  28. WORD cInitialCodes; // Number of entries in initial code table.
  29. WORD cRareCodes; // Number of entries in rare table.
  30. WORD cSecondaryTable; // Number of entries in secondary table.
  31. WORD *pInitialOffsets; // Pointer to offsets indexed by initial codes.
  32. WORD *pRareOffsets; // Pointer to offsets indexed by rare (initial) codes.
  33. BIGRAM_CHAR_PROB *pSecondaryTable; // Pointer to secondary table of char and prob.
  34. void *pLoadInfo1; // Handles needed to unload the data
  35. void *pLoadInfo2;
  36. void *pLoadInfo3;
  37. } BIGRAM_INFO;
  38. //
  39. // Functions.
  40. //
  41. // Load bigram information from a file.
  42. BOOL BigramLoadFile(LOCRUN_INFO *pLocRunInfo, BIGRAM_INFO *pBigramInfo, wchar_t *pPath);
  43. // Unload runtime localization information that was loaded from a file.
  44. BOOL BigramUnloadFile(BIGRAM_INFO *pBigramInfo);
  45. // Load bigram information from a resource.
  46. // Note, don't need to unload resources.
  47. BOOL BigramLoadRes(
  48. LOCRUN_INFO *pLocRunInfo,
  49. BIGRAM_INFO *pBigramInfo,
  50. HINSTANCE hInst,
  51. int nResID,
  52. int nType
  53. );
  54. // Load runtime localization information from an image already loaded into
  55. // memory.
  56. BOOL BigramLoadPointer(LOCRUN_INFO *pLocRunInfo, BIGRAM_INFO *pBigramInfo, void *pData);
  57. // Get bigram probability for selected characters. Characters must be passed in as
  58. // dense coded values.
  59. FLOAT BigramTransitionCost(
  60. LOCRUN_INFO *pLocRunInfo,
  61. BIGRAM_INFO *pBigramInfo,
  62. wchar_t dchPrev,
  63. wchar_t dchCur
  64. );
  65. /************************************************************************************************\
  66. * Stuff to access binary bigram file, only used by common and mktable.
  67. \************************************************************************************************/
  68. // The format for the bigram file is:
  69. // Header:
  70. // DWORD File type indicator.
  71. // DWORD Size of header.
  72. // BYTE Lowest version this code that can read this file.
  73. // BYTE Version of this code that wrote this file.
  74. // wchar_t[4] Locale ID (3 characters plus null).
  75. // DWORD * 3 Locale signature
  76. // WORD Number of entries in initial code table.
  77. // WORD Number of entries in rare table.
  78. // WORD Number of entries in secondary table.
  79. // DWORD * 2 Reserved for future use.
  80. // Initial code table:
  81. // WORD Index to second table for dense code 0.
  82. // WORD Index to second table for dense code 1.
  83. // .
  84. // .
  85. // .
  86. // WORD Index to second table for dense code N.
  87. // Rare (initial) code table:
  88. // WORD Index to second table for first class code.
  89. // WORD Index to second table for second class code.
  90. // .
  91. // .
  92. // .
  93. // WORD Index to second table for Nth class code.
  94. // WORD Index to end of table, so we can check range of last code.
  95. // Secondary table:
  96. // BIGRAM_CHAR_PROB Character and probability.
  97. // .
  98. // .
  99. // .
  100. // BIGRAM_CHAR_PROB Character and probability.
  101. //
  102. // NOTE: all characters are stored as dense coded values, and values with the type byte set
  103. // to 0xFF are summary codes for groups of codes.
  104. //
  105. // Constants
  106. //
  107. // Magic key the identifies the Local Runtime files
  108. #define BIGRAM_FILE_TYPE 0x879AB8DF
  109. // Version information for file.
  110. #define BIGRAM_MIN_FILE_VERSION 0 // First version of code that can read this file
  111. #define BIGRAM_CUR_FILE_VERSION 0 // Current version of code.
  112. #define BIGRAM_OLD_FILE_VERSION 0 // Oldest file version this code can read.
  113. // Character class code values
  114. // Note that BIGRAM_MAX_CLASS_CODES, is just an upper limit, there may not actually be
  115. // that many classes.
  116. #define BIGRAM_FIRST_CLASS_CODE 0xFF00 // Value to start class codes at.
  117. #define BIGRAM_MAX_CLASS_CODES 0x20 // Upper limit on number of class codes.
  118. #define BIGRAM_LAST_CLASS_CODE (BIGRAM_FIRST_CLASS_CODE + BIGRAM_MAX_CLASS_CODES - 1)
  119. // Highest class code that can be returned.
  120. // Probability to use if no bigram entry found.
  121. #define BIGRAM_DEFAULT_PROB 255 // JRB: Figure a good value for this ???
  122. //
  123. // Structures and types
  124. //
  125. // Structure to hold file header.
  126. typedef struct tagBIGRAM_HEADER {
  127. DWORD fileType; // This should always be set to BIGRAM_FILE_TYPE.
  128. DWORD headerSize; // Size of the header.
  129. BYTE minFileVer; // Earliest version of code that can read this file
  130. BYTE curFileVer; // Current version of code that wrote the file.
  131. wchar_t locale[4]; // Locale ID string.
  132. DWORD adwSignature[3]; // Locale signature
  133. WORD cInitialCodes; // Number of entries in initial code table.
  134. WORD cRareCodes; // Number of entries in rare table.
  135. WORD cSecondaryTable;// Number of entries in secondary table.
  136. DWORD reserved[2];
  137. } BIGRAM_HEADER;
  138. //
  139. // Functions
  140. //
  141. // Convert a dense coded character to its character class.
  142. wchar_t BigramDense2Class(LOCRUN_INFO *pLocRunInfo, wchar_t dch);
  143. #ifdef __cplusplus
  144. }
  145. #endif
  146. #endif // __INCLUDE_BIGRAM