Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

136 lines
4.5 KiB

  1. /******************************************************************************\
  2. * FILE: unigram.h
  3. *
  4. * Public structures and functions library that are used to access the
  5. * unigram information.
  6. *
  7. * Note that the code to create the binary file is in mkuni, not in the
  8. * common library.
  9. \******************************************************************************/
  10. #ifdef __cplusplus
  11. extern "C" {
  12. #endif
  13. /************************************************************************************************\
  14. * Public interface to unigram data.
  15. \************************************************************************************************/
  16. //
  17. // Structures and types
  18. //
  19. // Structure giving access to a loaded copy of the unigram tables. We store the
  20. // frequencies as scores that are -10 * log2(prob).
  21. // Note we do a hack to keep the score values in one byte. We subtract an
  22. // offset from the values. Values that overflow that range are truncated to fit.
  23. typedef struct tagUNIGRAM_INFO {
  24. WORD cScores; // Number of entries in score table.
  25. WORD iRareScore; // Frequency of items not in freq. table.
  26. BYTE iOffset; // Offset to add to scores.
  27. BYTE spare[3]; // keep alignment.
  28. BYTE *pScores; // Pointer to scores.
  29. void *pLoadInfo1; // Handles needed to unload the data
  30. void *pLoadInfo2;
  31. void *pLoadInfo3;
  32. } UNIGRAM_INFO;
  33. //
  34. // Functions.
  35. //
  36. // Load unigram information from a file.
  37. BOOL UnigramLoadFile(LOCRUN_INFO *pLocRunInfo, UNIGRAM_INFO *pUnigramInfo, wchar_t *pPath);
  38. // Unload runtime localization information that was loaded from a file.
  39. BOOL UnigramUnloadFile(UNIGRAM_INFO *pUnigramInfo);
  40. // Load unigram information from a resource.
  41. // Note, don't need to unload resources.
  42. BOOL UnigramLoadRes(
  43. LOCRUN_INFO *pLocRunInfo,
  44. UNIGRAM_INFO *pUnigramInfo,
  45. HINSTANCE hInst,
  46. int nResID,
  47. int nType
  48. );
  49. // Load runtime localization information from an image already loaded into
  50. // memory.
  51. BOOL UnigramLoadPointer(LOCRUN_INFO *pLocRunInfo, UNIGRAM_INFO *pUnigramInfo, void *pData);
  52. // Get unigram probability for a character. Character must be passed in as
  53. // dense coded value. Warning: value returned as log2(prob)/10. I don't know
  54. // why, but this is what the old code did!
  55. float UnigramCost(
  56. UNIGRAM_INFO *pUnigramInfo,
  57. wchar_t dch
  58. );
  59. #ifdef ZTRAIN
  60. // Takes a character (possibly folded) and returns the probability of that
  61. // character occurring.
  62. float UnigramCostFolded(LOCRUN_INFO *pLocRunInfo, UNIGRAM_INFO *pUnigramInfo, wchar_t wFold);
  63. #endif
  64. /************************************************************************************************\
  65. * Stuff to access binary unigram file, only used by common and mktable.
  66. \************************************************************************************************/
  67. // The format for the unigram file is:
  68. // Header:
  69. // DWORD File type indicator.
  70. // DWORD Size of header.
  71. // BYTE Lowest version this code that can read this file.
  72. // BYTE Version of this code that wrote this file.
  73. // wchar_t[4] Locale ID (3 characters plus null).
  74. // DWORD * 3 Locale signature
  75. // WORD Number of entries in frequency table.
  76. // WORD Frequency of items not in freq. table.
  77. // WORD Reserved for future use.
  78. // DWORD * 2 Reserved for future use.
  79. // Frequency table:
  80. // BYTE Frequency for dense code 0.
  81. // BYTE Frequency for dense code 1.
  82. // .
  83. // .
  84. // .
  85. // BYTE Frequency for dense code N.
  86. //
  87. // NOTE: Frequencies are stored as -10 * log2(prob)
  88. //
  89. // Constants
  90. //
  91. // Magic key the identifies the Local Runtime files
  92. #define UNIGRAM_FILE_TYPE 0xFD8BA978
  93. // Version information for file.
  94. #define UNIGRAM_MIN_FILE_VERSION 0 // First version of code that can read this file
  95. #define UNIGRAM_CUR_FILE_VERSION 0 // Current version of code.
  96. #define UNIGRAM_OLD_FILE_VERSION 0 // Oldest file version this code can read.
  97. //
  98. // Structures and types
  99. //
  100. // Structure to hold file header.
  101. typedef struct tagUNIGRAM_HEADER {
  102. DWORD fileType; // This should always be set to UNIGRAM_FILE_TYPE.
  103. DWORD headerSize; // Size of the header.
  104. BYTE minFileVer; // Earliest version of code that can read this file
  105. BYTE curFileVer; // Current version of code that wrote the file.
  106. wchar_t locale[4]; // Locale ID string.
  107. DWORD adwSignature [3]; // Locale signature
  108. WORD cScores; // Number of entries in score table.
  109. WORD iRareScore; // Frequency of items not in freq. table.
  110. BYTE iOffset;
  111. BYTE reserved1;
  112. DWORD reserved2[2];
  113. } UNIGRAM_HEADER;
  114. #ifdef __cplusplus
  115. }
  116. #endif