Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

358 lines
9.2 KiB

  1. //+-------------------------------------------------------------------------
  2. //
  3. // Microsoft Windows
  4. // Copyright (C) Microsoft Corporation, 1992-2000.
  5. //
  6. // File: XlatChar.cxx
  7. //
  8. // Contents: Character translation class.
  9. //
  10. // Classes: CXlatChar
  11. //
  12. // History: 02-13-92 KyleP Created
  13. //
  14. //--------------------------------------------------------------------------
  15. #include <pch.cxx>
  16. #pragma hdrstop
  17. // Local includes:
  18. #include <xlatchar.hxx>
  19. inline WCHAR CiToUpper( WCHAR c )
  20. {
  21. return ( c < L'a' ) ? c : ( c <= L'z' ) ? ( c - (L'a' - L'A') ) :
  22. RtlUpcaseUnicodeChar( c );
  23. } //CiToUpper
  24. #define TOUPPER CiToUpper
  25. //+-------------------------------------------------------------------------
  26. //
  27. // Member: CXlatChar::CXlatChar, public
  28. //
  29. // Synopsis: Initializes character mapping (no char classes).
  30. //
  31. // Arguments: [fCaseSens] -- TRUE if case sensitive mapping.
  32. //
  33. // History: 20-Jan-92 KyleP Created
  34. // 02-Jul-92 KyleP Added case sensitivity
  35. //
  36. //--------------------------------------------------------------------------
  37. CXlatChar::CXlatChar( BOOLEAN fCaseSens )
  38. : _cAllocation( 31 ),
  39. _cRange( 1 ),
  40. _iPrevRange( 0 ),
  41. _fCaseSens( fCaseSens )
  42. {
  43. _pwcRangeEnd = new WCHAR [ _cAllocation ];
  44. *_pwcRangeEnd = (WCHAR)-1; // Largest possible character.
  45. #if (CIDBG == 1)
  46. _fPrepared = FALSE;
  47. #endif
  48. }
  49. //+-------------------------------------------------------------------------
  50. //
  51. // Member: CXlatChar::CXlatChar, public
  52. //
  53. // Synopsis: Copy constructor
  54. //
  55. // Arguments: [src] -- Source
  56. //
  57. // History: 13-Jul-95 KyleP Created
  58. //
  59. //--------------------------------------------------------------------------
  60. CXlatChar::CXlatChar( CXlatChar const & src )
  61. : _cRange( src._cRange ),
  62. _cAllocation( src._cAllocation ),
  63. _iPrevRange( src._iPrevRange ),
  64. _fCaseSens( src._fCaseSens )
  65. #if (CIDBG == 1)
  66. ,_fPrepared( src._fPrepared )
  67. #endif
  68. {
  69. _pwcRangeEnd = new WCHAR [ _cAllocation ];
  70. RtlCopyMemory( _pwcRangeEnd, src._pwcRangeEnd, _cAllocation*sizeof(_pwcRangeEnd[0]) );
  71. }
  72. //+-------------------------------------------------------------------------
  73. //
  74. // Member: CXlatChar::AddRange, public
  75. //
  76. // Synopsis: Adds range as a new equivalance class.
  77. //
  78. // Arguments: [wcStart] -- Start of range.
  79. // [wcEnd] -- End of range.
  80. //
  81. // History: 20-Jan-92 KyleP Created
  82. // 02-Jul-92 KyleP Added case sensitivity
  83. //
  84. //--------------------------------------------------------------------------
  85. void CXlatChar::AddRange( WCHAR wcStart, WCHAR wcEnd )
  86. {
  87. vqAssert( !_fPrepared );
  88. if ( !_fCaseSens )
  89. {
  90. wcStart = TOUPPER( wcStart );
  91. wcEnd = TOUPPER( wcEnd );
  92. }
  93. //
  94. // Make sure there's room for start and end of range in array.
  95. //
  96. if ( _cAllocation - _cRange < 2 )
  97. _Realloc();
  98. _pwcRangeEnd[_cRange++] = wcStart - 1;
  99. _pwcRangeEnd[_cRange++] = wcEnd;
  100. }
  101. //+-------------------------------------------------------------------------
  102. //
  103. // Member: CXlatChar::Translate, public
  104. //
  105. // Synopsis: Maps character to its equivalence class.
  106. //
  107. // Arguments: [wc] -- Character to map.
  108. //
  109. // Returns: The equivalence class of character.
  110. //
  111. // Algorithm: Binary search array until the correct bin is found.
  112. //
  113. // History: 20-Jan-92 KyleP Created
  114. // 02-Jul-92 KyleP Added case sensitivity
  115. //
  116. //--------------------------------------------------------------------------
  117. UINT CXlatChar::Translate( WCHAR wc ) const
  118. {
  119. vqAssert( _fPrepared );
  120. if ( !_fCaseSens )
  121. wc = TOUPPER( wc );
  122. if ( wc == '.' )
  123. return( symDot );
  124. UINT i = _cAllocation / 2;
  125. UINT step = (_cAllocation + 3) / 4;
  126. WCHAR wcCurrent = _pwcRangeEnd[i];
  127. while ( step != 0 )
  128. {
  129. if ( wcCurrent == wc )
  130. break;
  131. if ( wcCurrent < wc )
  132. i += step;
  133. else
  134. i -= step;
  135. step = step / 2;
  136. wcCurrent = _pwcRangeEnd[i];
  137. }
  138. //
  139. // If we can't go anywhere, then either i or i + 1 is correct.
  140. //
  141. if ( wcCurrent < wc )
  142. i++;
  143. return( i + 1 + cSpecialCharClasses );
  144. }
  145. //+-------------------------------------------------------------------------
  146. //
  147. // Member: CXlatChar::TranslateRange, public
  148. //
  149. // Synopsis: Iterator mapping character range to set of equivalence
  150. // classes.
  151. //
  152. // Arguments: [wcStart] -- Start of range.
  153. // [wcEnd] -- End of range.
  154. //
  155. // Returns: If [wcStart] is 0 then the next class in the most
  156. // recently specified range is returned. Otherwise the
  157. // first class in the new range is returned.
  158. //
  159. // History: 20-Jan-92 KyleP Created
  160. // 02-Jul-92 KyleP Added case sensitivity
  161. //
  162. //--------------------------------------------------------------------------
  163. UINT CXlatChar::TranslateRange( WCHAR wcStart, WCHAR wcEnd )
  164. {
  165. if ( !_fCaseSens )
  166. {
  167. if ( 0 != wcStart )
  168. wcStart = TOUPPER( wcStart );
  169. wcEnd = TOUPPER( wcEnd );
  170. }
  171. if ( wcStart > wcEnd )
  172. {
  173. vqDebugOut(( DEB_ERROR, "Invalid regex.\n" ));
  174. THROW( CException( STATUS_INVALID_PARAMETER ) );
  175. }
  176. if ( wcStart != 0 )
  177. {
  178. _iPrevRange = Translate( wcStart );
  179. vqDebugOut(( DEB_REGEX, "First range = " ));
  180. }
  181. else
  182. {
  183. if ( _iPrevRange - cSpecialCharClasses >= _cRange )
  184. return 0;
  185. if ( _pwcRangeEnd[_iPrevRange-1-cSpecialCharClasses] >= wcEnd )
  186. _iPrevRange = 0;
  187. else
  188. _iPrevRange++;
  189. vqDebugOut(( DEB_REGEX, "Secondary range = " ));
  190. }
  191. vqDebugOut(( DEB_REGEX | DEB_NOCOMPNAME, "%u\n", _iPrevRange ));
  192. return _iPrevRange;
  193. }
  194. //+-------------------------------------------------------------------------
  195. //
  196. // Member: CXlatChar::Prepare, public
  197. //
  198. // Synopsis: Prepares class for translation.
  199. //
  200. // Requires: All equivalance classes must be added before prepare is
  201. // called.
  202. //
  203. // History: 20-Jan-92 KyleP Created
  204. //
  205. //--------------------------------------------------------------------------
  206. int _cdecl CompareFn( void const * Elt1, void const * Elt2 )
  207. {
  208. return( *(WCHAR *)Elt1 - *(WCHAR *)Elt2 );
  209. }
  210. //+-------------------------------------------------------------------------
  211. //
  212. // Member: CXlatChar::Prepare, public
  213. //
  214. // Synopsis: Called after ranges added to prepare for searching.
  215. //
  216. // History: 20-Jan-92 KyleP Created
  217. //
  218. //--------------------------------------------------------------------------
  219. void CXlatChar::Prepare()
  220. {
  221. //
  222. // Sort and then remove duplicates from the array.
  223. //
  224. qsort( _pwcRangeEnd, _cRange, sizeof( *_pwcRangeEnd ), CompareFn );
  225. UINT iGood, iCurrent;
  226. for ( iGood = 0, iCurrent = 1; iCurrent < _cRange; iCurrent++ )
  227. {
  228. if ( _pwcRangeEnd[iGood] != _pwcRangeEnd[iCurrent] )
  229. {
  230. _pwcRangeEnd[++iGood] = _pwcRangeEnd[iCurrent];
  231. }
  232. }
  233. _cRange = iGood + 1;
  234. //
  235. // Make all the extra entries at the end look like the maximum
  236. // possible character so the binary search works.
  237. //
  238. memset( _pwcRangeEnd + _cRange,
  239. 0xFF,
  240. (_cAllocation - _cRange) * sizeof( WCHAR ) );
  241. #if (CIDBG == 1)
  242. _fPrepared = TRUE;
  243. #endif
  244. }
  245. //+-------------------------------------------------------------------------
  246. //
  247. // Member: CXlatChar::_Realloc, private
  248. //
  249. // Synopsis: Grows the character array.
  250. //
  251. // History: 20-Jan-92 KyleP Created
  252. //
  253. //--------------------------------------------------------------------------
  254. void CXlatChar::_Realloc()
  255. {
  256. WCHAR * oldRangeEnd = _pwcRangeEnd;
  257. UINT oldcAllocation = _cAllocation;
  258. _cAllocation = (_cAllocation + 1) * 2 - 1;
  259. _pwcRangeEnd = new WCHAR [ _cAllocation ];
  260. memcpy( _pwcRangeEnd, oldRangeEnd, oldcAllocation * sizeof( WCHAR ) );
  261. delete oldRangeEnd;
  262. }
  263. //
  264. // Debug methods
  265. //
  266. #if (CIDBG == 1)
  267. void CXlatChar::Display() const
  268. {
  269. vqDebugOut(( DEB_REGEX, "Range\tStart - End\n"
  270. "-----\t----- ---\n" ));
  271. vqDebugOut(( DEB_REGEX, "%s\t\t%u\n", "Any", symAny ));
  272. vqDebugOut(( DEB_REGEX, "%s\t\t%u\n", "<bol>", symBeginLine ));
  273. vqDebugOut(( DEB_REGEX, "%s\t\t%u\n", "<eol>", symEndLine ));
  274. vqDebugOut(( DEB_REGEX, "%s\t%u\n", "Invalid", symInvalid ));
  275. vqDebugOut(( DEB_REGEX, "%s\t%u\n", "Epsilon", symEpsilon ));
  276. vqDebugOut(( DEB_REGEX, "%s\t%u\n", "Dot (.)", symDot ));
  277. vqDebugOut(( DEB_REGEX, "%u\t1\t%u\n",
  278. 1+cSpecialCharClasses,
  279. _pwcRangeEnd[0] ));
  280. for ( UINT i = 1; i < _cRange; i++ )
  281. {
  282. if ( _pwcRangeEnd[i-1]+1 < ' ' ||
  283. _pwcRangeEnd[i-1]+1 > '~' ||
  284. _pwcRangeEnd[i] < ' ' ||
  285. _pwcRangeEnd[i] > '~' )
  286. vqDebugOut(( DEB_REGEX,
  287. "%u\t%u\t%u\n",
  288. i+1+cSpecialCharClasses,
  289. _pwcRangeEnd[i-1]+1,
  290. _pwcRangeEnd[i] ));
  291. else
  292. vqDebugOut(( DEB_REGEX,
  293. "%u\t\"%c\"\t\"%c\"\n",
  294. i+1+cSpecialCharClasses,
  295. _pwcRangeEnd[i-1]+1,
  296. _pwcRangeEnd[i] ));
  297. }
  298. }
  299. #endif // (CIDBG == 1)