Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

746 lines
22 KiB

  1. /* Copyright (C) Boris Nikolaus, Germany, 1996-1997. All rights reserved. */
  2. /* Copyright (C) Microsoft Corporation, 1997-1998. All rights reserved. */
  3. #include "precomp.h"
  4. #ifdef ENABLE_BER
  5. extern ASN1int32_t _WideCharToUTF8(WCHAR *, ASN1int32_t, ASN1char_t *, ASN1int32_t);
  6. extern ASN1int32_t _UTF8ToWideChar(ASN1char_t *, ASN1int32_t, WCHAR *, ASN1int32_t);
  7. int ASN1BEREncUTF8String(ASN1encoding_t enc, ASN1uint32_t tag, ASN1uint32_t length, WCHAR *value)
  8. {
  9. if (value && length)
  10. {
  11. // first, get the size of the dest UTF8 string
  12. ASN1int32_t cbStrSize = _WideCharToUTF8(value, length, NULL, 0);
  13. if (cbStrSize)
  14. {
  15. ASN1char_t *psz = (ASN1char_t *) EncMemAlloc(enc, cbStrSize);
  16. if (psz)
  17. {
  18. int rc;
  19. ASN1int32_t cbStrSize2 = _WideCharToUTF8(value, length, psz, cbStrSize);
  20. EncAssert(enc, cbStrSize2);
  21. EncAssert(enc, cbStrSize == cbStrSize2);
  22. rc = ASN1BEREncOctetString(enc, tag, cbStrSize2, psz);
  23. EncMemFree(enc, psz);
  24. return rc;
  25. }
  26. }
  27. else
  28. {
  29. ASN1EncSetError(enc, ASN1_ERR_UTF8);
  30. }
  31. }
  32. else
  33. {
  34. return ASN1BEREncOctetString(enc, tag, 0, NULL);
  35. }
  36. return 0;
  37. }
  38. int ASN1BERDecUTF8String(ASN1decoding_t dec, ASN1uint32_t tag, ASN1wstring_t *val)
  39. {
  40. ASN1octetstring_t ostr;
  41. if (ASN1BERDecOctetString(dec, tag, &ostr))
  42. {
  43. if (ostr.length)
  44. {
  45. ASN1int32_t cchWideChar = _UTF8ToWideChar(ostr.value, ostr.length, NULL, 0);
  46. if (cchWideChar)
  47. {
  48. val->value = (WCHAR *) DecMemAlloc(dec, sizeof(WCHAR) * cchWideChar);
  49. if (val->value)
  50. {
  51. val->length = _UTF8ToWideChar(ostr.value, ostr.length, val->value, cchWideChar);
  52. DecAssert(dec, val->length);
  53. DecAssert(dec, cchWideChar == (ASN1int32_t) val->length);
  54. ASN1octetstring_free(&ostr);
  55. return 1;
  56. }
  57. }
  58. else
  59. {
  60. ASN1DecSetError(dec, ASN1_ERR_UTF8);
  61. }
  62. ASN1octetstring_free(&ostr);
  63. }
  64. else
  65. {
  66. val->length = 0;
  67. val->value = NULL;
  68. return 1;
  69. }
  70. }
  71. return 0;
  72. }
  73. #if 1
  74. //
  75. // Constant Declarations.
  76. //
  77. #define ASCII 0x007f
  78. #define SHIFT_IN '+' // beginning of a shift sequence
  79. #define SHIFT_OUT '-' // end of a shift sequence
  80. #define UTF8_2_MAX 0x07ff // max UTF8 2-byte sequence (32 * 64 = 2048)
  81. #define UTF8_1ST_OF_2 0xc0 // 110x xxxx
  82. #define UTF8_1ST_OF_3 0xe0 // 1110 xxxx
  83. #define UTF8_1ST_OF_4 0xf0 // 1111 xxxx
  84. #define UTF8_TRAIL 0x80 // 10xx xxxx
  85. #define HIGHER_6_BIT(u) ((u) >> 12)
  86. #define MIDDLE_6_BIT(u) (((u) & 0x0fc0) >> 6)
  87. #define LOWER_6_BIT(u) ((u) & 0x003f)
  88. #define BIT7(a) ((a) & 0x80)
  89. #define BIT6(a) ((a) & 0x40)
  90. #define HIGH_SURROGATE_START 0xd800
  91. #define HIGH_SURROGATE_END 0xdbff
  92. #define LOW_SURROGATE_START 0xdc00
  93. #define LOW_SURROGATE_END 0xdfff
  94. ////////////////////////////////////////////////////////////////////////////
  95. //
  96. // UTF8ToUnicode
  97. //
  98. // Maps a UTF-8 character string to its wide character string counterpart.
  99. //
  100. // 02-06-96 JulieB Created.
  101. ////////////////////////////////////////////////////////////////////////////
  102. ASN1int32_t _UTF8ToWideChar
  103. (
  104. /* in */ ASN1char_t *lpSrcStr,
  105. /* in */ ASN1int32_t cchSrc,
  106. /* out */ WCHAR *lpDestStr,
  107. /* in */ ASN1int32_t cchDest
  108. )
  109. {
  110. int nTB = 0; // # trail bytes to follow
  111. int cchWC = 0; // # of Unicode code points generated
  112. LPCSTR pUTF8 = lpSrcStr;
  113. DWORD dwSurrogateChar; // Full surrogate char
  114. BOOL bSurrogatePair = FALSE; // Indicate we'r collecting a surrogate pair
  115. char UTF8;
  116. while ((cchSrc--) && ((cchDest == 0) || (cchWC < cchDest)))
  117. {
  118. //
  119. // See if there are any trail bytes.
  120. //
  121. if (BIT7(*pUTF8) == 0)
  122. {
  123. //
  124. // Found ASCII.
  125. //
  126. if (cchDest)
  127. {
  128. lpDestStr[cchWC] = (WCHAR)*pUTF8;
  129. }
  130. bSurrogatePair = FALSE;
  131. cchWC++;
  132. }
  133. else if (BIT6(*pUTF8) == 0)
  134. {
  135. //
  136. // Found a trail byte.
  137. // Note : Ignore the trail byte if there was no lead byte.
  138. //
  139. if (nTB != 0)
  140. {
  141. //
  142. // Decrement the trail byte counter.
  143. //
  144. nTB--;
  145. if (bSurrogatePair)
  146. {
  147. dwSurrogateChar <<= 6;
  148. dwSurrogateChar |= LOWER_6_BIT(*pUTF8);
  149. if (nTB == 0)
  150. {
  151. if (cchDest)
  152. {
  153. if ((cchWC + 1) < cchDest)
  154. {
  155. lpDestStr[cchWC] = (WCHAR)
  156. (((dwSurrogateChar - 0x10000) >> 10) + HIGH_SURROGATE_START);
  157. lpDestStr[cchWC+1] = (WCHAR)
  158. ((dwSurrogateChar - 0x10000)%0x400 + LOW_SURROGATE_START);
  159. }
  160. }
  161. cchWC += 2;
  162. bSurrogatePair = FALSE;
  163. }
  164. }
  165. else
  166. {
  167. //
  168. // Make room for the trail byte and add the trail byte
  169. // value.
  170. //
  171. if (cchDest)
  172. {
  173. lpDestStr[cchWC] <<= 6;
  174. lpDestStr[cchWC] |= LOWER_6_BIT(*pUTF8);
  175. }
  176. if (nTB == 0)
  177. {
  178. //
  179. // End of sequence. Advance the output counter.
  180. //
  181. cchWC++;
  182. }
  183. }
  184. }
  185. else
  186. {
  187. // error - not expecting a trail byte
  188. bSurrogatePair = FALSE;
  189. }
  190. }
  191. else
  192. {
  193. //
  194. // Found a lead byte.
  195. //
  196. if (nTB > 0)
  197. {
  198. //
  199. // Error - previous sequence not finished.
  200. //
  201. nTB = 0;
  202. bSurrogatePair = FALSE;
  203. cchWC++;
  204. }
  205. else
  206. {
  207. //
  208. // Calculate the number of bytes to follow.
  209. // Look for the first 0 from left to right.
  210. //
  211. UTF8 = *pUTF8;
  212. while (BIT7(UTF8) != 0)
  213. {
  214. UTF8 <<= 1;
  215. nTB++;
  216. }
  217. //
  218. // If this is a surrogate unicode pair
  219. //
  220. if (nTB == 4)
  221. {
  222. dwSurrogateChar = UTF8 >> nTB;
  223. bSurrogatePair = TRUE;
  224. }
  225. //
  226. // Store the value from the first byte and decrement
  227. // the number of bytes to follow.
  228. //
  229. if (cchDest)
  230. {
  231. lpDestStr[cchWC] = UTF8 >> nTB;
  232. }
  233. nTB--;
  234. }
  235. }
  236. pUTF8++;
  237. }
  238. //
  239. // Make sure the destination buffer was large enough.
  240. //
  241. if (cchDest && (cchSrc >= 0))
  242. {
  243. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  244. return (0);
  245. }
  246. //
  247. // Return the number of Unicode characters written.
  248. //
  249. return (cchWC);
  250. }
  251. ////////////////////////////////////////////////////////////////////////////
  252. //
  253. // UnicodeToUTF8
  254. //
  255. // Maps a Unicode character string to its UTF-8 string counterpart.
  256. //
  257. // 02-06-96 JulieB Created.
  258. ////////////////////////////////////////////////////////////////////////////
  259. ASN1int32_t _WideCharToUTF8
  260. (
  261. /* in */ WCHAR *lpSrcStr,
  262. /* in */ ASN1int32_t cchSrc,
  263. /* out */ ASN1char_t *lpDestStr,
  264. /* in */ ASN1int32_t cchDest
  265. )
  266. {
  267. LPCWSTR lpWC = lpSrcStr;
  268. int cchU8 = 0; // # of UTF8 chars generated
  269. DWORD dwSurrogateChar;
  270. WCHAR wchHighSurrogate = 0;
  271. BOOL bHandled;
  272. while ((cchSrc--) && ((cchDest == 0) || (cchU8 < cchDest)))
  273. {
  274. bHandled = FALSE;
  275. //
  276. // Check if high surrogate is available
  277. //
  278. if ((*lpWC >= HIGH_SURROGATE_START) && (*lpWC <= HIGH_SURROGATE_END))
  279. {
  280. if (cchDest)
  281. {
  282. // Another high surrogate, then treat the 1st as normal
  283. // Unicode character.
  284. if (wchHighSurrogate)
  285. {
  286. if ((cchU8 + 2) < cchDest)
  287. {
  288. lpDestStr[cchU8++] = UTF8_1ST_OF_3 | HIGHER_6_BIT(wchHighSurrogate);
  289. lpDestStr[cchU8++] = UTF8_TRAIL | MIDDLE_6_BIT(wchHighSurrogate);
  290. lpDestStr[cchU8++] = UTF8_TRAIL | LOWER_6_BIT(wchHighSurrogate);
  291. }
  292. else
  293. {
  294. // not enough buffer
  295. cchSrc++;
  296. break;
  297. }
  298. }
  299. }
  300. else
  301. {
  302. cchU8 += 3;
  303. }
  304. wchHighSurrogate = *lpWC;
  305. bHandled = TRUE;
  306. }
  307. if (!bHandled && wchHighSurrogate)
  308. {
  309. if ((*lpWC >= LOW_SURROGATE_START) && (*lpWC <= LOW_SURROGATE_END))
  310. {
  311. // wheee, valid surrogate pairs
  312. if (cchDest)
  313. {
  314. if ((cchU8 + 3) < cchDest)
  315. {
  316. dwSurrogateChar = (((wchHighSurrogate-0xD800) << 10) + (*lpWC - 0xDC00) + 0x10000);
  317. lpDestStr[cchU8++] = (UTF8_1ST_OF_4 |
  318. (unsigned char)(dwSurrogateChar >> 18)); // 3 bits from 1st byte
  319. lpDestStr[cchU8++] = (UTF8_TRAIL |
  320. (unsigned char)((dwSurrogateChar >> 12) & 0x3f)); // 6 bits from 2nd byte
  321. lpDestStr[cchU8++] = (UTF8_TRAIL |
  322. (unsigned char)((dwSurrogateChar >> 6) & 0x3f)); // 6 bits from 3rd byte
  323. lpDestStr[cchU8++] = (UTF8_TRAIL |
  324. (unsigned char)(0x3f & dwSurrogateChar)); // 6 bits from 4th byte
  325. }
  326. else
  327. {
  328. // not enough buffer
  329. cchSrc++;
  330. break;
  331. }
  332. }
  333. else
  334. {
  335. // we already counted 3 previously (in high surrogate)
  336. cchU8 += 1;
  337. }
  338. bHandled = TRUE;
  339. }
  340. else
  341. {
  342. // Bad Surrogate pair : ERROR
  343. // Just process wchHighSurrogate , and the code below will
  344. // process the current code point
  345. if (cchDest)
  346. {
  347. if ((cchU8 + 2) < cchDest)
  348. {
  349. lpDestStr[cchU8++] = UTF8_1ST_OF_3 | HIGHER_6_BIT(wchHighSurrogate);
  350. lpDestStr[cchU8++] = UTF8_TRAIL | MIDDLE_6_BIT(wchHighSurrogate);
  351. lpDestStr[cchU8++] = UTF8_TRAIL | LOWER_6_BIT(wchHighSurrogate);
  352. }
  353. else
  354. {
  355. // not enough buffer
  356. cchSrc++;
  357. break;
  358. }
  359. }
  360. }
  361. wchHighSurrogate = 0;
  362. }
  363. if (!bHandled)
  364. {
  365. if (*lpWC <= ASCII)
  366. {
  367. //
  368. // Found ASCII.
  369. //
  370. if (cchDest)
  371. {
  372. lpDestStr[cchU8] = (char)*lpWC;
  373. }
  374. cchU8++;
  375. }
  376. else if (*lpWC <= UTF8_2_MAX)
  377. {
  378. //
  379. // Found 2 byte sequence if < 0x07ff (11 bits).
  380. //
  381. if (cchDest)
  382. {
  383. if ((cchU8 + 1) < cchDest)
  384. {
  385. //
  386. // Use upper 5 bits in first byte.
  387. // Use lower 6 bits in second byte.
  388. //
  389. lpDestStr[cchU8++] = UTF8_1ST_OF_2 | (*lpWC >> 6);
  390. lpDestStr[cchU8++] = UTF8_TRAIL | LOWER_6_BIT(*lpWC);
  391. }
  392. else
  393. {
  394. //
  395. // Error - buffer too small.
  396. //
  397. cchSrc++;
  398. break;
  399. }
  400. }
  401. else
  402. {
  403. cchU8 += 2;
  404. }
  405. }
  406. else
  407. {
  408. //
  409. // Found 3 byte sequence.
  410. //
  411. if (cchDest)
  412. {
  413. if ((cchU8 + 2) < cchDest)
  414. {
  415. //
  416. // Use upper 4 bits in first byte.
  417. // Use middle 6 bits in second byte.
  418. // Use lower 6 bits in third byte.
  419. //
  420. lpDestStr[cchU8++] = UTF8_1ST_OF_3 | HIGHER_6_BIT(*lpWC);
  421. lpDestStr[cchU8++] = UTF8_TRAIL | MIDDLE_6_BIT(*lpWC);
  422. lpDestStr[cchU8++] = UTF8_TRAIL | LOWER_6_BIT(*lpWC);
  423. }
  424. else
  425. {
  426. //
  427. // Error - buffer too small.
  428. //
  429. cchSrc++;
  430. break;
  431. }
  432. }
  433. else
  434. {
  435. cchU8 += 3;
  436. }
  437. }
  438. }
  439. lpWC++;
  440. }
  441. //
  442. // If the last character was a high surrogate, then handle it as a normal
  443. // unicode character.
  444. //
  445. if ((cchSrc < 0) && (wchHighSurrogate != 0))
  446. {
  447. if (cchDest)
  448. {
  449. if ((cchU8 + 2) < cchDest)
  450. {
  451. lpDestStr[cchU8++] = UTF8_1ST_OF_3 | HIGHER_6_BIT(wchHighSurrogate);
  452. lpDestStr[cchU8++] = UTF8_TRAIL | MIDDLE_6_BIT(wchHighSurrogate);
  453. lpDestStr[cchU8++] = UTF8_TRAIL | LOWER_6_BIT(wchHighSurrogate);
  454. }
  455. else
  456. {
  457. cchSrc++;
  458. }
  459. }
  460. }
  461. //
  462. // Make sure the destination buffer was large enough.
  463. //
  464. if (cchDest && (cchSrc >= 0))
  465. {
  466. SetLastError(ERROR_INSUFFICIENT_BUFFER);
  467. return (0);
  468. }
  469. //
  470. // Return the number of UTF-8 characters written.
  471. //
  472. return (cchU8);
  473. }
  474. #else
  475. //+-------------------------------------------------------------------------
  476. //
  477. // Microsoft Windows
  478. //
  479. // Copyright (C) Microsoft Corporation, 1995 - 1997
  480. //
  481. // File: utf8.cpp
  482. //
  483. // Contents: WideChar to/from UTF8 APIs
  484. //
  485. // Functions: WideCharToUTF8
  486. // UTF8ToWideChar
  487. //
  488. // History: 19-Feb-97 philh created
  489. //--------------------------------------------------------------------------
  490. //+-------------------------------------------------------------------------
  491. // Maps a wide-character (Unicode) string to a new UTF-8 encoded character
  492. // string.
  493. //
  494. // The wide characters are mapped as follows:
  495. //
  496. // Start End Bits UTF-8 Characters
  497. // ------ ------ ---- --------------------------------
  498. // 0x0000 0x007F 7 0x0xxxxxxx
  499. // 0x0080 0x07FF 11 0x110xxxxx 0x10xxxxxx
  500. // 0x0800 0xFFFF 16 0x1110xxxx 0x10xxxxxx 0x10xxxxxx
  501. //
  502. // The parameter and return value semantics are the same as for the
  503. // Win32 API, WideCharToMultiByte.
  504. //
  505. // Note, starting with NT 4.0, WideCharToMultiByte supports CP_UTF8. CP_UTF8
  506. // isn't supported on Win95.
  507. //--------------------------------------------------------------------------
  508. ASN1int32_t _WideCharToUTF8
  509. (
  510. /* in */ WCHAR *lpWideCharStr,
  511. /* in */ ASN1int32_t cchWideChar,
  512. /* out */ ASN1char_t *lpUTF8Str,
  513. /* in */ ASN1int32_t cchUTF8
  514. )
  515. {
  516. if (cchUTF8 >= 0)
  517. {
  518. ASN1int32_t cchRemainUTF8 = cchUTF8;
  519. if (cchWideChar < 0)
  520. {
  521. cchWideChar = My_lstrlenW(lpWideCharStr) + 1;
  522. }
  523. while (cchWideChar--)
  524. {
  525. WCHAR wch = *lpWideCharStr++;
  526. if (wch <= 0x7F)
  527. {
  528. // 7 bits
  529. cchRemainUTF8--;
  530. if (cchRemainUTF8 >= 0)
  531. {
  532. *lpUTF8Str++ = (ASN1char_t) wch;
  533. }
  534. }
  535. else
  536. if (wch <= 0x7FF)
  537. {
  538. // 11 bits
  539. cchRemainUTF8 -= 2;
  540. if (cchRemainUTF8 >= 0)
  541. {
  542. *lpUTF8Str++ = (ASN1char_t) (0xC0 | ((wch >> 6) & 0x1F));
  543. *lpUTF8Str++ = (ASN1char_t) (0x80 | (wch & 0x3F));
  544. }
  545. }
  546. else
  547. {
  548. // 16 bits
  549. cchRemainUTF8 -= 3;
  550. if (cchRemainUTF8 >= 0)
  551. {
  552. *lpUTF8Str++ = (ASN1char_t) (0xE0 | ((wch >> 12) & 0x0F));
  553. *lpUTF8Str++ = (ASN1char_t) (0x80 | ((wch >> 6) & 0x3F));
  554. *lpUTF8Str++ = (ASN1char_t) (0x80 | (wch & 0x3F));
  555. }
  556. }
  557. }
  558. if (cchRemainUTF8 >= 0)
  559. {
  560. return (cchUTF8 - cchRemainUTF8);
  561. }
  562. else
  563. if (cchUTF8 == 0)
  564. {
  565. return (-cchRemainUTF8);
  566. }
  567. }
  568. return 0;
  569. }
  570. //+-------------------------------------------------------------------------
  571. // Maps a UTF-8 encoded character string to a new wide-character (Unicode)
  572. // string.
  573. //
  574. // See CertWideCharToUTF8 for how the UTF-8 characters are mapped to wide
  575. // characters.
  576. //
  577. // The parameter and return value semantics are the same as for the
  578. // Win32 API, MultiByteToWideChar.
  579. //
  580. // If the UTF-8 characters don't contain the expected high order bits,
  581. // ERROR_INVALID_PARAMETER is set and 0 is returned.
  582. //
  583. // Note, starting with NT 4.0, MultiByteToWideChar supports CP_UTF8. CP_UTF8
  584. // isn't supported on Win95.
  585. //--------------------------------------------------------------------------
  586. ASN1int32_t _UTF8ToWideChar
  587. (
  588. /* in */ ASN1char_t *lpUTF8Str,
  589. /* in */ ASN1int32_t cchUTF8,
  590. /* out */ WCHAR *lpWideCharStr,
  591. /* in */ ASN1int32_t cchWideChar
  592. )
  593. {
  594. if (cchWideChar >= 0)
  595. {
  596. ASN1int32_t cchRemainWideChar = cchWideChar;
  597. if (cchUTF8 < 0)
  598. {
  599. cchUTF8 = My_lstrlenA(lpUTF8Str) + 1;
  600. }
  601. while (cchUTF8--)
  602. {
  603. ASN1char_t ch = *lpUTF8Str++;
  604. WCHAR wch;
  605. ASN1char_t ch2, ch3;
  606. if (0 == (ch & 0x80))
  607. {
  608. // 7 bits, 1 byte
  609. wch = (WCHAR) ch;
  610. }
  611. else
  612. if (0xC0 == (ch & 0xE0))
  613. {
  614. // 11 bits, 2 bytes
  615. if (--cchUTF8 >= 0)
  616. {
  617. ch2 = *lpUTF8Str++;
  618. if (0x80 == (ch2 & 0xC0))
  619. {
  620. wch = (((WCHAR) ch & 0x1F) << 6) |
  621. ((WCHAR) ch2 & 0x3F);
  622. }
  623. else
  624. {
  625. goto MyExit;
  626. }
  627. }
  628. else
  629. {
  630. goto MyExit;
  631. }
  632. }
  633. else
  634. if (0xE0 == (ch & 0xF0))
  635. {
  636. // 16 bits, 3 bytes
  637. cchUTF8 -= 2;
  638. if (cchUTF8 >= 0)
  639. {
  640. ch2 = *lpUTF8Str++;
  641. ch3 = *lpUTF8Str++;
  642. if (0x80 == (ch2 & 0xC0) && 0x80 == (ch3 & 0xC0))
  643. {
  644. wch = (((WCHAR) ch & 0x0F) << 12) |
  645. (((WCHAR) ch2 & 0x3F) << 6) |
  646. ((WCHAR) ch3 & 0x3F);
  647. }
  648. else
  649. {
  650. goto MyExit;
  651. }
  652. }
  653. else
  654. {
  655. goto MyExit;
  656. }
  657. }
  658. else
  659. {
  660. goto MyExit;
  661. }
  662. if (--cchRemainWideChar >= 0)
  663. {
  664. *lpWideCharStr++ = wch;
  665. }
  666. }
  667. if (cchRemainWideChar >= 0)
  668. {
  669. return (cchWideChar - cchRemainWideChar);
  670. }
  671. else
  672. if (cchWideChar == 0)
  673. {
  674. return (-cchRemainWideChar);
  675. }
  676. }
  677. MyExit:
  678. return 0;
  679. }
  680. #endif // 1
  681. #endif // ENABLE_BER