Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

308 lines
7.5 KiB

  1. //+-------------------------------------------------------------------------
  2. //
  3. // Copyright (C) 1991, Microsoft Corporation.
  4. //
  5. // File: XlatChar.cxx
  6. //
  7. // Contents: Character translation class.
  8. //
  9. // Classes: CXlatChar
  10. //
  11. // History: 02-13-92 KyleP Created
  12. // 03-11-97 arunk Modified for regex lib
  13. //--------------------------------------------------------------------------
  14. #include <stdlib.h>
  15. #include <search.h>
  16. // Local includes:
  17. #include <XlatChar.hxx>
  18. #define TOUPPER towupper
  19. //+-------------------------------------------------------------------------
  20. //
  21. // Member: CXlatChar::CXlatChar, public
  22. //
  23. // Synopsis: Initializes character mapping (no char classes).
  24. //
  25. // Arguments: [fCaseSens] -- true if case sensitive mapping.
  26. //
  27. // History: 20-Jan-92 KyleP Created
  28. // 02-Jul-92 KyleP Added case sensitivity
  29. //
  30. //--------------------------------------------------------------------------
  31. CXlatChar::CXlatChar( bool fCaseSens )
  32. : _cAllocation( 31 ),
  33. _cRange( 1 ),
  34. _iPrevRange( 0 ),
  35. _fCaseSens( fCaseSens )
  36. {
  37. _pwcRangeEnd = new WCHAR [ _cAllocation ];
  38. *_pwcRangeEnd = (WCHAR)-1; // Largest possible character.
  39. }
  40. //+-------------------------------------------------------------------------
  41. //
  42. // Member: CXlatChar::CXlatChar, public
  43. //
  44. // Synopsis: Copy constructor
  45. //
  46. // Arguments: [src] -- Source
  47. //
  48. // History: 13-Jul-95 KyleP Created
  49. //
  50. //--------------------------------------------------------------------------
  51. CXlatChar::CXlatChar( CXlatChar const & src )
  52. : _cRange( src._cRange ),
  53. _cAllocation( src._cAllocation ),
  54. _iPrevRange( src._iPrevRange ),
  55. _fCaseSens( src._fCaseSens )
  56. {
  57. _pwcRangeEnd = new WCHAR [ _cAllocation ];
  58. RtlCopyMemory( _pwcRangeEnd, src._pwcRangeEnd, _cAllocation*sizeof(_pwcRangeEnd[0]) );
  59. }
  60. //+-------------------------------------------------------------------------
  61. //
  62. // Member: CXlatChar::AddRange, public
  63. //
  64. // Synopsis: Adds range as a new equivalance class.
  65. //
  66. // Arguments: [wcStart] -- Start of range.
  67. // [wcEnd] -- End of range.
  68. //
  69. // History: 20-Jan-92 KyleP Created
  70. // 02-Jul-92 KyleP Added case sensitivity
  71. //
  72. //--------------------------------------------------------------------------
  73. void CXlatChar::AddRange( WCHAR wcStart, WCHAR wcEnd )
  74. {
  75. if ( !_fCaseSens )
  76. {
  77. wcStart = TOUPPER( wcStart );
  78. wcEnd = TOUPPER( wcEnd );
  79. }
  80. //
  81. // Make sure there's room for start and end of range in array.
  82. //
  83. if ( _cAllocation - _cRange < 2 )
  84. _Realloc();
  85. _pwcRangeEnd[_cRange++] = wcStart - 1;
  86. _pwcRangeEnd[_cRange++] = wcEnd;
  87. }
  88. //+-------------------------------------------------------------------------
  89. //
  90. // Member: CXlatChar::Translate, public
  91. //
  92. // Synopsis: Maps character to its equivalence class.
  93. //
  94. // Arguments: [wc] -- Character to map.
  95. //
  96. // Returns: The equivalence class of character.
  97. //
  98. // Algorithm: Binary search array until the correct bin is found.
  99. //
  100. // History: 20-Jan-92 KyleP Created
  101. // 02-Jul-92 KyleP Added case sensitivity
  102. //
  103. //--------------------------------------------------------------------------
  104. UINT CXlatChar::Translate( WCHAR wc )
  105. {
  106. if ( !_fCaseSens )
  107. wc = TOUPPER( wc );
  108. if ( wc == '.' )
  109. return( symDot );
  110. UINT i = _cAllocation / 2;
  111. UINT step = (_cAllocation + 3) / 4;
  112. WCHAR wcCurrent = _pwcRangeEnd[i];
  113. while ( step != 0 )
  114. {
  115. if ( wcCurrent == wc )
  116. {
  117. break;
  118. }
  119. else if( wcCurrent < wc )
  120. {
  121. i += step;
  122. }
  123. else
  124. {
  125. i -= step;
  126. }
  127. step = step / 2;
  128. wcCurrent = _pwcRangeEnd[i];
  129. }
  130. //
  131. // If we can't go anywhere, then either i or i + 1 is correct.
  132. //
  133. if ( wcCurrent < wc )
  134. {
  135. i++;
  136. }
  137. return( i + 1 + cSpecialCharClasses );
  138. }
  139. //+-------------------------------------------------------------------------
  140. //
  141. // Member: CXlatChar::TranslateRange, public
  142. //
  143. // Synopsis: Iterator mapping character range to set of equivalence
  144. // classes.
  145. //
  146. // Arguments: [wcStart] -- Start of range.
  147. // [wcEnd] -- End of range.
  148. //
  149. // Returns: If [wcStart] is 0 then the next class in the most
  150. // recently specified range is returned. Otherwise the
  151. // first class in the new range is returned.
  152. //
  153. // History: 20-Jan-92 KyleP Created
  154. // 02-Jul-92 KyleP Added case sensitivity
  155. //
  156. //--------------------------------------------------------------------------
  157. UINT CXlatChar::TranslateRange( WCHAR wcStart, WCHAR wcEnd )
  158. {
  159. if ( !_fCaseSens )
  160. {
  161. wcStart = TOUPPER( wcStart );
  162. wcEnd = TOUPPER( wcEnd );
  163. }
  164. if ( wcStart > wcEnd )
  165. {
  166. throw ERROR_INVALID_PARAMETER;
  167. }
  168. if ( wcStart != 0 )
  169. {
  170. _iPrevRange = Translate( wcStart );
  171. }
  172. else
  173. {
  174. if ( _iPrevRange - cSpecialCharClasses >= _cRange )
  175. {
  176. return( 0 );
  177. }
  178. else
  179. {
  180. if ( _pwcRangeEnd[_iPrevRange-1-cSpecialCharClasses] >= wcEnd )
  181. {
  182. _iPrevRange = 0;
  183. }
  184. else
  185. {
  186. _iPrevRange++;
  187. }
  188. }
  189. }
  190. return( _iPrevRange );
  191. }
  192. //+-------------------------------------------------------------------------
  193. //
  194. // Member: CXlatChar::Prepare, public
  195. //
  196. // Synopsis: Prepares class for translation.
  197. //
  198. // Requires: All equivalance classes must be added before prepare is
  199. // called.
  200. //
  201. // History: 20-Jan-92 KyleP Created
  202. //
  203. //--------------------------------------------------------------------------
  204. int _cdecl CompareFn( void const * Elt1, void const * Elt2 )
  205. {
  206. return( *(WCHAR *)Elt1 - *(WCHAR *)Elt2 );
  207. }
  208. //+-------------------------------------------------------------------------
  209. //
  210. // Member: CXlatChar::Prepare, public
  211. //
  212. // Synopsis: Called after ranges added to prepare for searching.
  213. //
  214. // History: 20-Jan-92 KyleP Created
  215. //
  216. //--------------------------------------------------------------------------
  217. void CXlatChar::Prepare()
  218. {
  219. //
  220. // Sort and then remove duplicates from the array.
  221. //
  222. qsort( _pwcRangeEnd, _cRange, sizeof( *_pwcRangeEnd ), CompareFn );
  223. UINT iGood, iCurrent;
  224. for ( iGood = 0, iCurrent = 1; iCurrent < _cRange; iCurrent++ )
  225. {
  226. if ( _pwcRangeEnd[iGood] != _pwcRangeEnd[iCurrent] )
  227. {
  228. _pwcRangeEnd[++iGood] = _pwcRangeEnd[iCurrent];
  229. }
  230. }
  231. _cRange = iGood + 1;
  232. //
  233. // Make all the extra entries at the end look like the maximum
  234. // possible character so the binary search works.
  235. //
  236. memset( _pwcRangeEnd + _cRange,
  237. 0xFF,
  238. (_cAllocation - _cRange) * sizeof( WCHAR ) );
  239. }
  240. //+-------------------------------------------------------------------------
  241. //
  242. // Member: CXlatChar::_Realloc, private
  243. //
  244. // Synopsis: Grows the character array.
  245. //
  246. // History: 20-Jan-92 KyleP Created
  247. //
  248. //--------------------------------------------------------------------------
  249. void CXlatChar::_Realloc()
  250. {
  251. WCHAR * oldRangeEnd = _pwcRangeEnd;
  252. UINT oldcAllocation = _cAllocation;
  253. _cAllocation = (_cAllocation + 1) * 2 - 1;
  254. _pwcRangeEnd = new WCHAR [ _cAllocation ];
  255. memcpy( _pwcRangeEnd, oldRangeEnd, oldcAllocation * sizeof( WCHAR ) );
  256. delete oldRangeEnd;
  257. }