Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1063 lines
23 KiB

  1. /*++
  2. Copyright (c) 1997-2001 Microsoft Corporation
  3. Module Name:
  4. utf8.c
  5. Abstract:
  6. Domain Name System (DNS) Library
  7. UTF8 to\from unicode and ANSI conversions
  8. The UTF8\unicode routines are similar to the generic ones floating
  9. around the NT group, but a heck of a lot cleaner and more robust,
  10. including catching the invalid UTF8 string case on the utf8 to unicode
  11. conversion.
  12. The UTF8\ANSI routines are optimized for the 99% case where all the
  13. characters are <128 and no conversions is actually required.
  14. Author:
  15. Jim Gilroy (jamesg) March 1997
  16. Revision History:
  17. --*/
  18. #include "local.h"
  19. //
  20. // Macros to simplify UTF8 conversions
  21. //
  22. #define UTF8_1ST_OF_2 0xc0 // 110x xxxx
  23. #define UTF8_1ST_OF_3 0xe0 // 1110 xxxx
  24. #define UTF8_1ST_OF_4 0xf0 // 1111 xxxx
  25. #define UTF8_TRAIL 0x80 // 10xx xxxx
  26. #define UTF8_2_MAX 0x07ff // max unicode character representable in
  27. // in two byte UTF8
  28. #define BIT7(ch) ((ch) & 0x80)
  29. #define BIT6(ch) ((ch) & 0x40)
  30. #define BIT5(ch) ((ch) & 0x20)
  31. #define BIT4(ch) ((ch) & 0x10)
  32. #define BIT3(ch) ((ch) & 0x08)
  33. #define LOW6BITS(ch) ((ch) & 0x3f)
  34. #define LOW5BITS(ch) ((ch) & 0x1f)
  35. #define LOW4BITS(ch) ((ch) & 0x0f)
  36. #define HIGHBYTE(wch) ((wch) & 0xff00)
  37. //
  38. // Surrogate pair support
  39. // Two unicode characters may be linked to form a surrogate pair.
  40. // And for some totally unknown reason, someone thought they
  41. // should travel in UTF8 as four bytes instead of six.
  42. // No one has any idea why this is true other than to complicate
  43. // the code.
  44. //
  45. #define HIGH_SURROGATE_START 0xd800
  46. #define HIGH_SURROGATE_END 0xdbff
  47. #define LOW_SURROGATE_START 0xdc00
  48. #define LOW_SURROGATE_END 0xdfff
  49. //
  50. // Max "normal conversion", make space for MAX_PATH,
  51. // this covers all valid DNS names and strings.
  52. //
  53. #define TEMP_BUFFER_LENGTH (2*MAX_PATH)
  54. DNS_STATUS
  55. _fastcall
  56. Dns_ValidateUtf8Byte(
  57. IN BYTE chUtf8,
  58. IN OUT PDWORD pdwTrailCount
  59. )
  60. /*++
  61. Routine Description:
  62. Verifies that byte is valid UTF8 byte.
  63. Arguments:
  64. Return Value:
  65. ERROR_SUCCESS -- if valid UTF8 given trail count
  66. ERROR_INVALID_DATA -- if invalid
  67. --*/
  68. {
  69. DWORD trailCount = *pdwTrailCount;
  70. DNSDBG( TRACE, ( "Dns_ValidateUtf8Byte()\n" ));
  71. //
  72. // if ASCII byte, only requirement is no trail count
  73. //
  74. if ( (UCHAR)chUtf8 < 0x80 )
  75. {
  76. if ( trailCount == 0 )
  77. {
  78. return( ERROR_SUCCESS );
  79. }
  80. return( ERROR_INVALID_DATA );
  81. }
  82. //
  83. // trail byte
  84. // - must be in multi-byte set
  85. //
  86. if ( BIT6(chUtf8) == 0 )
  87. {
  88. if ( trailCount == 0 )
  89. {
  90. return( ERROR_INVALID_DATA );
  91. }
  92. --trailCount;
  93. }
  94. //
  95. // multi-byte lead byte
  96. // - must NOT be in existing multi-byte set
  97. // - verify valid lead byte
  98. else
  99. {
  100. if ( trailCount != 0 )
  101. {
  102. return( ERROR_INVALID_DATA );
  103. }
  104. // first of two bytes (110xxxxx)
  105. if ( BIT5(chUtf8) == 0 )
  106. {
  107. trailCount = 1;
  108. }
  109. // first of three bytes (1110xxxx)
  110. else if ( BIT4(chUtf8) == 0 )
  111. {
  112. trailCount = 2;
  113. }
  114. // first of four bytes (surrogate character) (11110xxx)
  115. else if ( BIT3(chUtf8) == 0 )
  116. {
  117. trailCount = 3;
  118. }
  119. else
  120. {
  121. return( ERROR_INVALID_DATA );
  122. }
  123. }
  124. // reset caller's trail count
  125. *pdwTrailCount = trailCount;
  126. return( ERROR_SUCCESS );
  127. }
  128. //
  129. // UTF8 to unicode conversions
  130. //
  131. // For some reason UTF8 is not supported in Win9x.
  132. // AND the implementation itself is not careful about
  133. // validating UTF8.
  134. //
  135. DWORD
  136. _fastcall
  137. Dns_UnicodeToUtf8(
  138. IN PWCHAR pwUnicode,
  139. IN DWORD cchUnicode,
  140. OUT PCHAR pchResult,
  141. IN DWORD cchResult
  142. )
  143. /*++
  144. Routine Description:
  145. Convert unicode characters to UTF8.
  146. Result is NULL terminated if sufficient space in result
  147. buffer is available.
  148. Arguments:
  149. pwUnicode -- ptr to start of unicode buffer
  150. cchUnicode -- length of unicode buffer
  151. pchResult -- ptr to start of result buffer for UTF8 chars
  152. cchResult -- length of result buffer
  153. Return Value:
  154. Count of UTF8 characters in result, if successful.
  155. 0 on error. GetLastError() has error code.
  156. --*/
  157. {
  158. WCHAR wch; // current unicode character being converted
  159. DWORD lengthUtf8 = 0; // length of UTF8 result string
  160. WORD lowSurrogate;
  161. DWORD surrogateDword;
  162. DNSDBG( TRACE, (
  163. "Dns_UnicodeToUtf8( %.*S )\n",
  164. cchUnicode,
  165. pwUnicode ));
  166. //
  167. // loop converting unicode chars until run out or error
  168. //
  169. while ( cchUnicode-- )
  170. {
  171. wch = *pwUnicode++;
  172. //
  173. // ASCII character (7 bits or less) -- converts to directly
  174. //
  175. if ( wch < 0x80 )
  176. {
  177. lengthUtf8++;
  178. if ( pchResult )
  179. {
  180. if ( lengthUtf8 >= cchResult )
  181. {
  182. goto OutOfBuffer;
  183. }
  184. *pchResult++ = (CHAR)wch;
  185. }
  186. continue;
  187. }
  188. //
  189. // wide character less than 0x07ff (11bits) converts to two bytes
  190. // - upper 5 bits in first byte
  191. // - lower 6 bits in secondar byte
  192. //
  193. else if ( wch <= UTF8_2_MAX )
  194. {
  195. lengthUtf8 += 2;
  196. if ( pchResult )
  197. {
  198. if ( lengthUtf8 >= cchResult )
  199. {
  200. goto OutOfBuffer;
  201. }
  202. *pchResult++ = UTF8_1ST_OF_2 | wch >> 6;
  203. *pchResult++ = UTF8_TRAIL | LOW6BITS( (UCHAR)wch );
  204. }
  205. continue;
  206. }
  207. //
  208. // surrogate pair
  209. // - if have high surrogate followed by low surrogate then
  210. // process as surrogate pair
  211. // - otherwise treat character as ordinary unicode "three-byte"
  212. // character, by falling through to below
  213. //
  214. else if ( wch >= HIGH_SURROGATE_START &&
  215. wch <= HIGH_SURROGATE_END &&
  216. cchUnicode &&
  217. (lowSurrogate = *pwUnicode) &&
  218. lowSurrogate >= LOW_SURROGATE_START &&
  219. lowSurrogate <= LOW_SURROGATE_END )
  220. {
  221. // have a surrogate pair
  222. // - suck up next unicode character (low surrogate of pair)
  223. // - make full DWORD surrogate pair
  224. // - then lay out four UTF8 bytes
  225. // 1st of four, then three trail bytes
  226. // 0x1111xxxx
  227. // 0x10xxxxxx
  228. // 0x10xxxxxx
  229. // 0x10xxxxxx
  230. DNSDBG( TRACE, (
  231. "Have surrogate pair %hx : %hx\n",
  232. wch,
  233. lowSurrogate ));
  234. pwUnicode++;
  235. cchUnicode--;
  236. lengthUtf8 += 4;
  237. if ( pchResult )
  238. {
  239. if ( lengthUtf8 >= cchResult )
  240. {
  241. goto OutOfBuffer;
  242. }
  243. surrogateDword = (((wch-0xD800) << 10) + (lowSurrogate - 0xDC00) + 0x10000);
  244. *pchResult++ = UTF8_1ST_OF_4 | (UCHAR) (surrogateDword >> 18);
  245. *pchResult++ = UTF8_TRAIL | (UCHAR) LOW6BITS(surrogateDword >> 12);
  246. *pchResult++ = UTF8_TRAIL | (UCHAR) LOW6BITS(surrogateDword >> 6);
  247. *pchResult++ = UTF8_TRAIL | (UCHAR) LOW6BITS(surrogateDword);
  248. DNSDBG( TRACE, (
  249. "Converted surrogate -- DWORD = %08x\n"
  250. "\tconverted %x %x %x %x\n",
  251. surrogateDword,
  252. (UCHAR) *(pchResult-3),
  253. (UCHAR) *(pchResult-2),
  254. (UCHAR) *(pchResult-1),
  255. (UCHAR) *pchResult ));
  256. }
  257. }
  258. //
  259. // wide character (non-zero in top 5 bits) converts to three bytes
  260. // - top 4 bits in first byte
  261. // - middle 6 bits in second byte
  262. // - low 6 bits in third byte
  263. //
  264. else
  265. {
  266. lengthUtf8 += 3;
  267. if ( pchResult )
  268. {
  269. if ( lengthUtf8 >= cchResult )
  270. {
  271. goto OutOfBuffer;
  272. }
  273. *pchResult++ = UTF8_1ST_OF_3 | (wch >> 12);
  274. *pchResult++ = UTF8_TRAIL | LOW6BITS( wch >> 6 );
  275. *pchResult++ = UTF8_TRAIL | LOW6BITS( wch );
  276. }
  277. }
  278. }
  279. //
  280. // NULL terminate buffer
  281. // return UTF8 character count
  282. //
  283. if ( pchResult && lengthUtf8 < cchResult )
  284. {
  285. *pchResult = 0;
  286. }
  287. return( lengthUtf8 );
  288. OutOfBuffer:
  289. SetLastError( ERROR_INSUFFICIENT_BUFFER );
  290. return( 0 );
  291. }
  292. DWORD
  293. _fastcall
  294. Dns_Utf8ToUnicode(
  295. IN PCHAR pchUtf8,
  296. IN DWORD cchUtf8,
  297. OUT PWCHAR pwResult,
  298. IN DWORD cwResult
  299. )
  300. /*++
  301. Routine Description:
  302. Convert UTF8 characters to unicode.
  303. Result is NULL terminated if sufficient space in result
  304. buffer is available.
  305. Arguments:
  306. pwResult -- ptr to start of result buffer for unicode chars
  307. cwResult -- length of result buffer in WCHAR
  308. pwUtf8 -- ptr to start of UTF8 buffer
  309. cchUtf8 -- length of UTF8 buffer
  310. Return Value:
  311. Count of unicode characters in result, if successful.
  312. 0 on error. GetLastError() has error code.
  313. --*/
  314. {
  315. CHAR ch; // current UTF8 character
  316. WCHAR wch; // current unicode character
  317. DWORD trailCount = 0; // count of UTF8 trail bytes to follow
  318. DWORD lengthUnicode = 0; // length of unicode result string
  319. BOOL bsurrogatePair = FALSE;
  320. DWORD surrogateDword;
  321. //
  322. // loop converting UTF8 chars until run out or error
  323. //
  324. while ( cchUtf8-- )
  325. {
  326. ch = *pchUtf8++;
  327. //
  328. // ASCII character -- just copy
  329. //
  330. if ( BIT7(ch) == 0 )
  331. {
  332. lengthUnicode++;
  333. if ( pwResult )
  334. {
  335. if ( lengthUnicode >= cwResult )
  336. {
  337. goto OutOfBuffer;
  338. }
  339. *pwResult++ = (WCHAR)ch;
  340. }
  341. continue;
  342. }
  343. //
  344. // UTF8 trail byte
  345. // - if not expected, error
  346. // - otherwise shift unicode character 6 bits and
  347. // copy in lower six bits of UTF8
  348. // - if last UTF8 byte, copy result to unicode string
  349. //
  350. else if ( BIT6(ch) == 0 )
  351. {
  352. if ( trailCount == 0 )
  353. {
  354. goto InvalidUtf8;
  355. }
  356. if ( !bsurrogatePair )
  357. {
  358. wch <<= 6;
  359. wch |= LOW6BITS( ch );
  360. if ( --trailCount == 0 )
  361. {
  362. lengthUnicode++;
  363. if ( pwResult )
  364. {
  365. if ( lengthUnicode >= cwResult )
  366. {
  367. goto OutOfBuffer;
  368. }
  369. *pwResult++ = wch;
  370. }
  371. }
  372. continue;
  373. }
  374. // surrogate pair
  375. // - same as above EXCEPT build two unicode chars
  376. // from surrogateDword
  377. else
  378. {
  379. surrogateDword <<= 6;
  380. surrogateDword |= LOW6BITS( ch );
  381. if ( --trailCount == 0 )
  382. {
  383. lengthUnicode += 2;
  384. if ( pwResult )
  385. {
  386. if ( lengthUnicode >= cwResult )
  387. {
  388. goto OutOfBuffer;
  389. }
  390. surrogateDword -= 0x10000;
  391. *pwResult++ = (WCHAR) ((surrogateDword >> 10) + HIGH_SURROGATE_START);
  392. *pwResult++ = (WCHAR) ((surrogateDword & 0x3ff) + LOW_SURROGATE_START);
  393. }
  394. bsurrogatePair = FALSE;
  395. }
  396. }
  397. }
  398. //
  399. // UTF8 lead byte
  400. // - if currently in extension, error
  401. else
  402. {
  403. if ( trailCount != 0 )
  404. {
  405. goto InvalidUtf8;
  406. }
  407. // first of two byte character (110xxxxx)
  408. if ( BIT5(ch) == 0 )
  409. {
  410. trailCount = 1;
  411. wch = LOW5BITS(ch);
  412. continue;
  413. }
  414. // first of three byte character (1110xxxx)
  415. else if ( BIT4(ch) == 0 )
  416. {
  417. trailCount = 2;
  418. wch = LOW4BITS(ch);
  419. continue;
  420. }
  421. // first of four byte surrogate pair (11110xxx)
  422. else if ( BIT3(ch) == 0 )
  423. {
  424. trailCount = 3;
  425. surrogateDword = LOW4BITS(ch);
  426. bsurrogatePair = TRUE;
  427. }
  428. else
  429. {
  430. goto InvalidUtf8;
  431. }
  432. }
  433. }
  434. // catch if hit end in the middle of UTF8 multi-byte character
  435. if ( trailCount )
  436. {
  437. goto InvalidUtf8;
  438. }
  439. //
  440. // NULL terminate buffer
  441. // return the number of Unicode characters written.
  442. //
  443. if ( pwResult && lengthUnicode < cwResult )
  444. {
  445. *pwResult = 0;
  446. }
  447. return( lengthUnicode );
  448. OutOfBuffer:
  449. SetLastError( ERROR_INSUFFICIENT_BUFFER );
  450. return( 0 );
  451. InvalidUtf8:
  452. SetLastError( ERROR_INVALID_DATA );
  453. return( 0 );
  454. }
  455. //
  456. // UTF8 \ ANSI conversions
  457. //
  458. DWORD
  459. Dns_Utf8ToOrFromAnsi(
  460. OUT PCHAR pchResult,
  461. IN DWORD cchResult,
  462. IN PCHAR pchIn,
  463. IN DWORD cchIn,
  464. IN DNS_CHARSET InCharSet,
  465. IN DNS_CHARSET OutCharSet
  466. )
  467. /*++
  468. Routine Description:
  469. Convert UTF8 characters to ANSI or vice versa.
  470. Note: this function appears to call string functions (string.c)
  471. which call back to it. However, this calls those functions
  472. ONLY for conversions to\from unicode which do NOT call back
  473. to these functions. Ultimately need to check if LCMapString
  474. can handle these issues.
  475. Arguments:
  476. pchResult -- ptr to start of result buffer for ansi chars
  477. cchResult -- length of result buffer
  478. pchIn -- ptr to start of input string
  479. cchIn -- length of input string
  480. InCharSet -- char set of input string (DnsCharSetAnsi or DnsCharSetUtf8)
  481. OutCharSet -- char set for result string (DnsCharSetUtf8 or DnsCharSetAnsi)
  482. Return Value:
  483. Count of bytes in result (including terminating NULL).
  484. 0 on error. GetLastError() has error code.
  485. --*/
  486. {
  487. DWORD unicodeLength;
  488. DWORD resultLength;
  489. CHAR tempBuffer[ TEMP_BUFFER_LENGTH ];
  490. PCHAR ptemp = tempBuffer;
  491. DNS_STATUS status;
  492. DNSDBG( TRACE, (
  493. "Dns_Utf8ToOrFromAnsi()\n"
  494. "\tbuffer = %p\n"
  495. "\tbuf length = %d\n"
  496. "\tpchString = %p (%*s)\n"
  497. "\tcchString = %d\n"
  498. "\tCharSetIn = %d\n"
  499. "\tCharSetOut = %d\n",
  500. pchResult,
  501. cchResult,
  502. pchIn,
  503. cchIn, pchIn,
  504. cchIn,
  505. InCharSet,
  506. OutCharSet ));
  507. //
  508. // validate charsets
  509. //
  510. ASSERT( InCharSet != OutCharSet );
  511. ASSERT( InCharSet == DnsCharSetAnsi || InCharSet == DnsCharSetUtf8 );
  512. ASSERT( OutCharSet == DnsCharSetAnsi || OutCharSet == DnsCharSetUtf8 );
  513. //
  514. // if length not given, calculate
  515. //
  516. if ( cchIn == 0 )
  517. {
  518. cchIn = strlen( pchIn );
  519. }
  520. //
  521. // string completely ASCII
  522. // - simple memcopy suffices
  523. // - note result must have terminating NULL
  524. //
  525. if ( Dns_IsStringAsciiEx(
  526. pchIn,
  527. cchIn ) )
  528. {
  529. if ( !pchResult )
  530. {
  531. return( cchIn + 1 );
  532. }
  533. if ( cchResult <= cchIn )
  534. {
  535. status = ERROR_INSUFFICIENT_BUFFER;
  536. goto Failed;
  537. }
  538. memcpy(
  539. pchResult,
  540. pchIn,
  541. cchIn );
  542. pchResult[ cchIn ] = 0;
  543. return( cchIn+1 );
  544. }
  545. //
  546. // non-ASCII
  547. // - convert to unicode, then to result character set
  548. //
  549. // DCR_PERF: LCMapStringA() might be able to handle all this
  550. // haven't figured out how yet
  551. //
  552. unicodeLength = Dns_GetBufferLengthForStringCopy(
  553. pchIn,
  554. cchIn,
  555. InCharSet,
  556. DnsCharSetUnicode
  557. );
  558. if ( unicodeLength > TEMP_BUFFER_LENGTH )
  559. {
  560. // can't use static buffer, must allocate
  561. ptemp = Dns_StringCopyAllocate(
  562. pchIn,
  563. cchIn,
  564. InCharSet,
  565. DnsCharSetUnicode
  566. );
  567. if ( !ptemp )
  568. {
  569. status = ERROR_INVALID_DATA;
  570. goto Failed;
  571. }
  572. }
  573. else
  574. {
  575. if ( unicodeLength == 0 )
  576. {
  577. status = ERROR_INVALID_DATA;
  578. goto Failed;
  579. }
  580. // copy into temporary buffer
  581. resultLength = Dns_StringCopy(
  582. ptemp,
  583. NULL, // adequate buffer length
  584. pchIn,
  585. cchIn,
  586. InCharSet,
  587. DnsCharSetUnicode
  588. );
  589. if ( !resultLength )
  590. {
  591. status = ERROR_INVALID_DATA;
  592. goto Failed;
  593. }
  594. ASSERT( resultLength == unicodeLength );
  595. }
  596. //
  597. // conversion to result char set
  598. // - if have result buffer, convert into it
  599. // - should have at least ONE two byte character
  600. // otherwise should have taken fast path above
  601. //
  602. if ( pchResult )
  603. {
  604. resultLength = Dns_StringCopy(
  605. pchResult,
  606. & cchResult, // result buffer length
  607. ptemp,
  608. 0,
  609. DnsCharSetUnicode,
  610. OutCharSet
  611. );
  612. if ( resultLength == 0 )
  613. {
  614. status = ERROR_INSUFFICIENT_BUFFER;
  615. goto Failed;
  616. }
  617. ASSERT( resultLength <= cchResult );
  618. ASSERT( pchResult[resultLength-1] == 0 );
  619. ASSERT( resultLength >= unicodeLength/2 );
  620. }
  621. else
  622. {
  623. resultLength = Dns_GetBufferLengthForStringCopy(
  624. ptemp,
  625. 0,
  626. DnsCharSetUnicode,
  627. OutCharSet
  628. );
  629. ASSERT( resultLength >= unicodeLength/2 );
  630. }
  631. //
  632. // final mapping from unicode to result character set
  633. //
  634. if ( ptemp != tempBuffer )
  635. {
  636. FREE_HEAP( ptemp );
  637. }
  638. return( resultLength );
  639. Failed:
  640. SetLastError( status );
  641. if ( ptemp != tempBuffer )
  642. {
  643. FREE_HEAP( ptemp );
  644. }
  645. return( 0 );
  646. }
  647. DWORD
  648. Dns_AnsiToUtf8(
  649. IN PCHAR pchAnsi,
  650. IN DWORD cchAnsi,
  651. OUT PCHAR pchResult,
  652. IN DWORD cchResult
  653. )
  654. /*++
  655. Routine Description:
  656. Convert ANSI characters to UTF8.
  657. Arguments:
  658. pchAnsi -- ptr to start of ansi buffer
  659. cchAnsi -- length of ansi buffer
  660. pchResult -- ptr to start of result buffer for UTF8 chars
  661. cchResult -- length of result buffer
  662. Return Value:
  663. Count of UTF8 characters in result, if successful.
  664. 0 on error. GetLastError() has error code.
  665. --*/
  666. {
  667. return Dns_Utf8ToOrFromAnsi(
  668. pchResult, // result buffer
  669. cchResult,
  670. pchAnsi, // in string
  671. cchAnsi,
  672. DnsCharSetAnsi, // ANSI in
  673. DnsCharSetUtf8 // UTF8 out
  674. );
  675. }
  676. DWORD
  677. Dns_Utf8ToAnsi(
  678. IN PCHAR pchUtf8,
  679. IN DWORD cchUtf8,
  680. OUT PCHAR pchResult,
  681. IN DWORD cchResult
  682. )
  683. /*++
  684. Routine Description:
  685. Convert UTF8 characters to ANSI.
  686. Arguments:
  687. pchResult -- ptr to start of result buffer for ansi chars
  688. cchResult -- length of result buffer
  689. pwUtf8 -- ptr to start of UTF8 buffer
  690. cchUtf8 -- length of UTF8 buffer
  691. Return Value:
  692. Count of ansi characters in result, if successful.
  693. 0 on error. GetLastError() has error code.
  694. --*/
  695. {
  696. return Dns_Utf8ToOrFromAnsi(
  697. pchResult, // result buffer
  698. cchResult,
  699. pchUtf8, // in string
  700. cchUtf8,
  701. DnsCharSetUtf8, // UTF8 in
  702. DnsCharSetAnsi // ANSI out
  703. );
  704. }
  705. BOOL
  706. _fastcall
  707. Dns_IsStringAscii(
  708. IN LPSTR pszString
  709. )
  710. /*++
  711. Routine Description:
  712. Check if string is ASCII.
  713. This is equivalent to saying
  714. - is ANSI string already in UTF8
  715. or
  716. - is UTF8 string already in ANSI
  717. This allows you to optimize for the 99% case where just
  718. passing ASCII strings.
  719. Arguments:
  720. pszString -- ANSI or UTF8 string to check for ASCIIhood
  721. Return Value:
  722. TRUE if string is all ASCII (characters all < 128)
  723. FALSE if non-ASCII characters.
  724. --*/
  725. {
  726. register UCHAR ch;
  727. //
  728. // loop through until hit non-ASCII character
  729. //
  730. while ( ch = (UCHAR) *pszString++ )
  731. {
  732. if ( ch < 0x80 )
  733. {
  734. continue;
  735. }
  736. return( FALSE );
  737. }
  738. return( TRUE );
  739. }
  740. BOOL
  741. _fastcall
  742. Dns_IsStringAsciiEx(
  743. IN PCHAR pchString,
  744. IN DWORD cchString
  745. )
  746. /*++
  747. Routine Description:
  748. Check if ANSI (or UTF8) string is ASCII.
  749. This is equivalent to saying
  750. - is ANSI string already in UTF8
  751. or
  752. - is UTF8 string already in ANSI
  753. This allows you to optimize for the 99% case where just
  754. passing ASCII strings.
  755. Arguments:
  756. pchString -- ptr to start of ansi buffer
  757. cchString -- length of ansi buffer
  758. Return Value:
  759. TRUE if string is all ASCII (characters all < 128)
  760. FALSE if non-ASCII characters.
  761. --*/
  762. {
  763. //
  764. // loop through until hit non-ASCII character
  765. //
  766. while ( cchString-- )
  767. {
  768. if ( (UCHAR)*pchString++ < 0x80 )
  769. {
  770. continue;
  771. }
  772. return( FALSE );
  773. }
  774. return( TRUE );
  775. }
  776. BOOL
  777. _fastcall
  778. Dns_IsWideStringAscii(
  779. IN PWCHAR pwszString
  780. )
  781. /*++
  782. Routine Description:
  783. Check if unicode string is ASCII.
  784. This means all characters < 128.
  785. Strings without extended characters need NOT be downcased
  786. on the wire. This allows us to optimize for the 99% case
  787. where just passing ASCII strings.
  788. Arguments:
  789. pwszString -- ptr to unicode string
  790. Return Value:
  791. TRUE if string is all ASCII (characters all < 128)
  792. FALSE if non-ASCII characters.
  793. --*/
  794. {
  795. register USHORT ch;
  796. //
  797. // loop through until hit non-ASCII character
  798. //
  799. while ( ch = (USHORT) *pwszString++ )
  800. {
  801. if ( ch < 0x80 )
  802. {
  803. continue;
  804. }
  805. return( FALSE );
  806. }
  807. return( TRUE );
  808. }
  809. //
  810. // End utf8.c
  811. //