Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1527 lines
36 KiB

  1. /* File: D:\WACKER\tdll\tchar.c (Created: 08-Dec-1993)
  2. *
  3. * Copyright 1994 by Hilgraeve Inc. -- Monroe, MI
  4. * All rights reserved
  5. *
  6. * $Revision: 19 $
  7. * $Date: 7/08/02 6:49p $
  8. */
  9. #include <windows.h>
  10. #pragma hdrstop
  11. #include <tchar.h>
  12. #include "stdtyp.h"
  13. #include "tdll.h"
  14. #include "assert.h"
  15. #include "htchar.h"
  16. #include "mc.h"
  17. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
  18. * FUNCTION:
  19. * TCHAR_Fill
  20. *
  21. * DESCRIPTION:
  22. * Fills a TCHAR string with the specified TCHAR.
  23. *
  24. * ARGUMENTS:
  25. * dest - string to fill.
  26. * c - character to fill string with.
  27. * size_t - number of TCHAR units to copy.
  28. *
  29. * RETURNS:
  30. * pointer to string.
  31. *
  32. */
  33. TCHAR *TCHAR_Fill(TCHAR *dest, TCHAR c, size_t count)
  34. {
  35. #if defined(UNICODE)
  36. int i;
  37. for (i = 0 ; i < count ; ++i)
  38. dest[i] = c;
  39. return dest;
  40. #else
  41. return (TCHAR *)memset(dest, c, count);
  42. #endif
  43. }
  44. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
  45. * FUNCTION:
  46. * TCHAR_Trim
  47. *
  48. * DESCRIPTION:
  49. * This function is called to clean up user input. It strips all white
  50. * space from the front and rear of a string. Sometimes nothing is left.
  51. *
  52. * NOTE: This won't work on strings > 512 bytes
  53. *
  54. * ARGUEMENTS:
  55. * pszStr -- the string to trim
  56. *
  57. * RETURNS:
  58. * pointer to the string
  59. */
  60. TCHAR *TCHAR_Trim(TCHAR *pszStr)
  61. {
  62. int nExit;
  63. TCHAR *pszPtr;
  64. TCHAR *pszLast;
  65. TCHAR acBuf[512];
  66. /* Skip the leading white space */
  67. for (nExit = FALSE, pszPtr = pszStr; nExit == FALSE; )
  68. {
  69. switch (*pszPtr)
  70. {
  71. /* Anything here is considered white space */
  72. case 0x20:
  73. case 0x9:
  74. case 0xA:
  75. case 0xB:
  76. case 0xC:
  77. case 0xD:
  78. pszPtr += 1; /* Skip the white space */
  79. break;
  80. default:
  81. nExit = TRUE;
  82. break;
  83. }
  84. }
  85. if ((unsigned int)lstrlen(pszPtr) > sizeof(acBuf))
  86. {
  87. return NULL;
  88. }
  89. lstrcpy(acBuf, pszPtr);
  90. /* Find the last non white space character */
  91. pszPtr = pszLast = acBuf;
  92. while (*pszPtr != TEXT('\0'))
  93. {
  94. switch (*pszPtr)
  95. {
  96. /* Anything here is considered white space */
  97. case 0x20:
  98. case 0x9:
  99. case 0xA:
  100. case 0xB:
  101. case 0xC:
  102. case 0xD:
  103. break;
  104. default:
  105. pszLast = pszPtr;
  106. break;
  107. }
  108. pszPtr += 1;
  109. }
  110. pszLast += 1;
  111. *pszLast = TEXT('\0');
  112. lstrcpy(pszStr, acBuf);
  113. return pszStr;
  114. }
  115. #if 0 // Thought I needed this but I didn't. May be useful someday however.
  116. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
  117. * FUNCTION:
  118. * TCHAR_Trunc
  119. *
  120. * DESCRIPTION:
  121. * Removes trailing space from a character array. Does not assume
  122. *
  123. * ARGUMENTS:
  124. * psz - string of characters (null terminated).
  125. *
  126. * RETURNS:
  127. * Length of truncated string
  128. *
  129. */
  130. int TCHAR_Trunc(const LPTSTR psz)
  131. {
  132. int i;
  133. for (i = lstrlen(psz) - 1 ; i > 0 ; --i)
  134. {
  135. switch (psz[i])
  136. {
  137. /* Whitespace characters */
  138. case TEXT(' '):
  139. case TEXT('\t'):
  140. case TEXT('\n'):
  141. case TEXT('\v'):
  142. case TEXT('\f'):
  143. case TEXT('\r'):
  144. break;
  145. default:
  146. psz[i+1] = TEXT('\0');
  147. return i;
  148. }
  149. }
  150. return i;
  151. }
  152. #endif
  153. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  154. * FUNCTION:
  155. * StrCharNext
  156. *
  157. * DESCRIPTION:
  158. *
  159. * PARAMETERS:
  160. *
  161. * RETURNS:
  162. */
  163. LPTSTR StrCharNext(LPCTSTR pszStr)
  164. {
  165. LPTSTR pszRet = (LPTSTR)NULL;
  166. if (pszStr != (LPTSTR)NULL)
  167. {
  168. #if defined(CHAR_MIXED)
  169. /* Could be done with 'IsDBCSLeadByte' etc. */
  170. pszRet = CharNextExA(0, pszStr, 0);
  171. #else
  172. pszRet = (LPTSTR)pszStr + 1;
  173. #endif
  174. }
  175. return pszRet;
  176. }
  177. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  178. * FUNCTION:
  179. * StrCharPrev
  180. *
  181. * DESCRIPTION:
  182. *
  183. * PARAMETERS:
  184. *
  185. * RETURNS:
  186. */
  187. LPTSTR StrCharPrev(LPCTSTR pszStart, LPCTSTR pszStr)
  188. {
  189. LPTSTR pszRet = (LPTSTR)NULL;
  190. if ((pszStart != (LPTSTR)NULL) && (pszStr != (LPTSTR)NULL))
  191. {
  192. #if defined(CHAR_MIXED)
  193. pszRet = CharPrev(pszStart, pszStr);
  194. #else
  195. if (pszStr > pszStart)
  196. pszRet = (LPTSTR)pszStr - 1;
  197. else
  198. pszRet = (LPTSTR)pszStart;
  199. #endif
  200. }
  201. return pszRet;
  202. }
  203. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  204. * FUNCTION:
  205. * StrCharLast
  206. *
  207. * DESCRIPTION:
  208. * Returns a pointer to the last character in a string
  209. *
  210. * PARAMETERS:
  211. *
  212. * RETURNS:
  213. */
  214. LPTSTR StrCharLast(LPCTSTR pszStr)
  215. {
  216. LPTSTR pszRet = (LPTSTR)NULL;
  217. if (pszStr != (LPTSTR)NULL)
  218. {
  219. #if defined(CHAR_MIXED)
  220. while (*pszStr != TEXT('\0'))
  221. {
  222. pszRet = (LPTSTR)pszStr;
  223. pszStr = CharNextExA(0, pszStr, 0);
  224. }
  225. #else
  226. /* It might be possible to use 'strlen' here. Then again... */
  227. // pszRet = pszStr + StrCharGetByteCount(pszStr) - 1;
  228. pszRet = (LPTSTR)pszStr + lstrlen(pszStr) - 1;
  229. #endif
  230. }
  231. return pszRet;
  232. }
  233. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  234. * FUNCTION:
  235. * StrCharEnd
  236. *
  237. * DESCRIPTION:
  238. * Returns a pointer to the NULL terminating a string
  239. *
  240. * PARAMETERS:
  241. *
  242. * RETURNS:
  243. */
  244. LPTSTR StrCharEnd(LPCTSTR pszStr)
  245. {
  246. if (pszStr != (LPTSTR)NULL)
  247. {
  248. #if defined(CHAR_MIXED)
  249. while (*pszStr != TEXT('\0'))
  250. {
  251. pszStr = StrCharNext(pszStr);
  252. pszStr += 1;
  253. }
  254. #else
  255. pszStr = pszStr + lstrlen(pszStr);
  256. #endif
  257. }
  258. return (LPTSTR)pszStr;
  259. }
  260. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  261. * FUNCTION:
  262. * StrCharFindFirst
  263. *
  264. * DESCRIPTION:
  265. *
  266. * PARAMETERS:
  267. *
  268. * RETURNS:
  269. */
  270. LPTSTR StrCharFindFirst(LPCTSTR pszStr, int nChar)
  271. {
  272. #if defined(CHAR_MIXED)
  273. WORD *pszW;
  274. #endif
  275. if (pszStr != (LPTSTR)NULL)
  276. {
  277. #if defined(CHAR_MIXED)
  278. while (*pszStr != TEXT('\0'))
  279. {
  280. /*
  281. * NOTE: this may not work for UNICODE
  282. */
  283. if (nChar > 0xFF)
  284. {
  285. /* Two byte character */
  286. if (IsDBCSLeadByte(*pszStr))
  287. {
  288. pszW = (WORD *)pszStr;
  289. if (*pszW == (WORD)nChar)
  290. return (LPTSTR)pszStr;
  291. }
  292. }
  293. else
  294. {
  295. /* Single byte character */
  296. if (*pszStr == (TCHAR)nChar)
  297. return (LPTSTR)pszStr;
  298. }
  299. pszStr = CharNextExA(0, pszStr, 0);
  300. }
  301. #else
  302. while (pszStr && (*pszStr != TEXT('\0')) )
  303. {
  304. if (*pszStr == (TCHAR)nChar)
  305. return (LPTSTR)pszStr;
  306. pszStr = StrCharNext(pszStr);
  307. }
  308. #endif
  309. }
  310. return (LPTSTR)NULL;
  311. }
  312. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  313. * FUNCTION:
  314. * StrCharFindLast
  315. *
  316. * DESCRIPTION:
  317. *
  318. * PARAMETERS:
  319. *
  320. * RETURNS:
  321. */
  322. LPTSTR StrCharFindLast(LPCTSTR pszStr, int nChar)
  323. {
  324. LPTSTR pszRet = (LPTSTR)NULL;
  325. #if defined(CHAR_MIXED)
  326. WORD *pszW;
  327. #else
  328. LPTSTR pszEnd;
  329. #endif
  330. if (pszStr != (LPTSTR)NULL)
  331. {
  332. #if defined(CHAR_MIXED)
  333. while (*pszStr != TEXT('\0'))
  334. {
  335. /*
  336. * NOTE: this may not work for UNICODE
  337. */
  338. if (nChar > 0xFF)
  339. {
  340. /* Two byte character */
  341. if (IsDBCSLeadByte(*pszStr))
  342. {
  343. pszW = (WORD *)pszStr;
  344. if (*pszW == (WORD)nChar)
  345. pszRet = (LPTSTR)pszStr;
  346. }
  347. }
  348. else
  349. {
  350. /* Single byte character */
  351. if (*pszStr == (TCHAR)nChar)
  352. pszRet = (LPTSTR)pszStr;
  353. }
  354. pszStr = CharNextExA(0, pszStr, 0);
  355. }
  356. #else
  357. pszEnd = StrCharLast(pszStr);
  358. while (pszEnd && (pszEnd > pszStr) )
  359. {
  360. if (*pszEnd == (TCHAR)nChar)
  361. {
  362. pszRet = (LPTSTR)pszEnd;
  363. break;
  364. }
  365. pszEnd = StrCharPrev(pszStr, pszEnd);
  366. }
  367. #endif
  368. }
  369. return pszRet;
  370. }
  371. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  372. * FUNCTION:
  373. * StrCharGetStrLength
  374. *
  375. * DESCRIPTION:
  376. * This function returns the number of characters in a string. A two byte
  377. * character counts as one.
  378. *
  379. * PARAMETERS:
  380. *
  381. * RETURNS:
  382. */
  383. int StrCharGetStrLength(LPCTSTR pszStr)
  384. {
  385. int nRet = 0;
  386. #if DEADWOOD
  387. #if defined(CHAR_MIXED)
  388. if (pszStr != (LPTSTR)NULL)
  389. {
  390. while (*pszStr != TEXT('\0'))
  391. {
  392. nRet++;
  393. pszStr = CharNextExA(0, pszStr, 0);
  394. }
  395. }
  396. #else
  397. if (pszStr != (LPTSTR)NULL)
  398. {
  399. nRet = lstrlen(pszStr);
  400. }
  401. #endif
  402. #else // DEADWOOD
  403. if (pszStr != (LPTSTR)NULL)
  404. {
  405. nRet = lstrlen(pszStr);
  406. }
  407. #endif // DEADWOOD
  408. return nRet;
  409. }
  410. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  411. * FUNCTION:
  412. * StrCharGetByteCount
  413. *
  414. * DESCRIPTION:
  415. * This function returns the number of bytes in a string. A two byte char
  416. * counts as two.
  417. *
  418. * PARAMETERS:
  419. *
  420. * RETURNS:
  421. */
  422. int StrCharGetByteCount(LPCTSTR pszStr)
  423. {
  424. int nRet = 0;
  425. #if defined(CHAR_MIXED)
  426. LPCTSTR pszFoo;
  427. if (pszStr != (LPTSTR)NULL)
  428. {
  429. pszFoo = pszStr;
  430. while (*pszFoo != TEXT('\0'))
  431. {
  432. pszFoo = CharNextExA(0, pszFoo, 0);
  433. }
  434. nRet = (int)(pszFoo - pszStr);
  435. }
  436. #else
  437. if (pszStr != (LPTSTR)NULL)
  438. {
  439. nRet = lstrlen(pszStr);
  440. }
  441. #endif
  442. return nRet;
  443. }
  444. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  445. * FUNCTION:
  446. * StrCharCopy
  447. *
  448. * DESCRIPTION:
  449. *
  450. * PARAMETERS:
  451. *
  452. * RETURNS:
  453. */
  454. LPTSTR StrCharCopy(LPTSTR pszDst, LPCTSTR pszSrc)
  455. {
  456. return lstrcpy(pszDst, pszSrc);
  457. }
  458. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  459. * FUNCTION:
  460. * StrCharCat
  461. *
  462. * DESCRIPTION:
  463. *
  464. * PARAMETERS:
  465. *
  466. * RETURNS:
  467. */
  468. LPTSTR StrCharCat(LPTSTR pszDst, LPCTSTR pszSrc)
  469. {
  470. return lstrcat(pszDst, pszSrc);
  471. }
  472. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  473. * FUNCTION:
  474. * StrCharCmp
  475. *
  476. * DESCRIPTION:
  477. *
  478. * PARAMETERS:
  479. *
  480. * RETURNS:
  481. */
  482. int StrCharCmp(LPCTSTR pszA, LPCTSTR pszB)
  483. {
  484. return lstrcmp(pszA, pszB);
  485. }
  486. int StrCharCmpi(LPCTSTR pszA, LPCTSTR pszB)
  487. {
  488. return lstrcmpi(pszA, pszB);
  489. }
  490. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  491. * FUNCTION:
  492. * StrCharCmpN
  493. *
  494. * DESCRIPTION:
  495. *
  496. * PARAMETERS:
  497. *
  498. * RETURNS:
  499. */
  500. int StrCharCmpN(LPCTSTR pszA, LPCTSTR pszB, size_t iLen)
  501. {
  502. return _tcsncmp(pszA, pszB, iLen);
  503. }
  504. int StrCharCmpiN(LPCTSTR pszA, LPCTSTR pszB, size_t iLen)
  505. {
  506. return _tcsnicmp(pszA, pszB, iLen);
  507. }
  508. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  509. * FUNCTION:
  510. * StrCharStrStr
  511. *
  512. * DESCRIPTION:
  513. *
  514. * PARAMETERS:
  515. *
  516. * RETURNS:
  517. */
  518. LPTSTR StrCharStrStr(LPCTSTR pszA, LPCTSTR pszB)
  519. {
  520. LPTSTR pszRet = (LPTSTR)NULL;
  521. int nSize;
  522. int nRemaining;
  523. LPTSTR pszPtr;
  524. /*
  525. * We need to write a version of 'strstr' that will work.
  526. * Do we really know what the problems are ?
  527. */
  528. nSize = StrCharGetByteCount(pszB);
  529. pszPtr = (LPTSTR)pszA;
  530. while ((nRemaining = StrCharGetByteCount(pszPtr)) >= nSize)
  531. {
  532. if (memcmp(pszPtr, pszB, (size_t)nSize) == 0)
  533. return pszPtr;
  534. pszPtr = StrCharNext(pszPtr);
  535. }
  536. return pszRet;
  537. }
  538. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
  539. * FUNCTION:
  540. * StrCharCopyN
  541. *
  542. * DESCRIPTION:
  543. * Basically do a lstrcpy of n bytes, with one exception, we make sure that
  544. * the copied string does not end in a lead-byte of a double-byte character.
  545. *
  546. * ARGUMENTS:
  547. * pszDst - pointer to the copy target string.
  548. * pszSrc - pointer to the copy source string.
  549. * iLen - the maximum number of TCHARs to copy. Like strcpyn, the
  550. string may not be null terminated if the buffer is exceeded.
  551. *
  552. * RETURNS:
  553. * 0=error, else pszDst
  554. *
  555. */
  556. LPTSTR StrCharCopyN(LPTSTR pszDst, LPCTSTR pszSrc, int iLen)
  557. {
  558. int i = 0;
  559. int iCounter = iLen * sizeof(TCHAR); // Use a temporary character counter.
  560. LPCTSTR psz = pszSrc;
  561. if (pszDst == 0 || pszSrc == 0 || iLen == 0 || iCounter == 0)
  562. return 0;
  563. while (1)
  564. {
  565. i = (int)(StrCharNext(psz) - psz);
  566. iCounter -= i;
  567. if (iCounter <= 0)
  568. break;
  569. if (*psz == TEXT('\0'))
  570. {
  571. //
  572. // Since StrCharNext() will return the pointer to the
  573. // terminating null character if at the end of the string,
  574. // so just increment to the next address location so we
  575. // have the correct number of bytes to copy (excluding
  576. // the terminating NULL character). We NULL terminate
  577. // the string at the end of this function, so we don't
  578. // have to copy the NULL character. REV: 12/28/2000.
  579. //
  580. psz += 1; // still need to increment
  581. break;
  582. }
  583. psz += i;
  584. }
  585. //
  586. // Make sure we don't overwrite memory. REV: 12/28/2000.
  587. //
  588. i = min((LONG)((psz - pszSrc) + sizeof(TCHAR)), iLen * (int)sizeof(TCHAR));
  589. MemCopy(pszDst, pszSrc, i);
  590. //
  591. // Make sure the string is null terminated. REV: 12/28/2000.
  592. //
  593. pszDst[(i / sizeof(TCHAR)) - 1] = TEXT('\0');
  594. return pszDst;
  595. }
  596. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  597. * FUNCTION:
  598. * StrCharPBrk
  599. *
  600. * DESCRIPTION:
  601. * Searches a string for the first occurrence of a character contained in a
  602. * specified buffer. This search does not include the null terminator.
  603. *
  604. * PARAMETERS:
  605. * pszStr - Address of the string to be searched.
  606. * pszSet - Address of a null-terminated character buffer that contains the
  607. * characters for which to search.
  608. *
  609. * RETURNS:
  610. * Returns the address in psz of the first occurrence of a character
  611. * contained in the buffer at pszSet, or NULL if no match is found.
  612. */
  613. LPTSTR StrCharPBrk(LPCTSTR pszStr, LPCTSTR pszSet)
  614. {
  615. LPCTSTR psz = pszSet;
  616. LPTSTR pszRetVal = NULL;
  617. while (*psz != TEXT('\0'))
  618. {
  619. if ((pszRetVal = StrCharFindFirst(pszStr, (int)(*psz))) != NULL)
  620. break;
  621. psz++;
  622. }
  623. return pszRetVal;
  624. }
  625. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  626. * FUNCTION:
  627. * CnvrtMBCStoECHAR
  628. *
  629. * DESCRIPTION:
  630. * Converts a DBCS (mixed byte) string into an ECHAR (double byte) string.
  631. *
  632. * PARAMETERS:
  633. * tchrSource - Source String
  634. * ulDestSize - Length of Destination String in Bytes
  635. * echrDest - Destination String
  636. * ulSourceSize - Length of Destination String in Bytes
  637. *
  638. * RETURNS:
  639. * 0 - Success
  640. * 1 - Error
  641. */
  642. int CnvrtMBCStoECHAR(ECHAR * echrDest, const unsigned long ulDestSize, const TCHAR * const tchrSource, const unsigned long ulSourceSize)
  643. {
  644. ULONG ulLoop = 0;
  645. ULONG ulDestCount = 0;
  646. ULONG ulDestEChars = ulDestSize / sizeof(ECHAR);
  647. BOOL fLeadByteFound = FALSE;
  648. if ((echrDest == NULL) || (tchrSource == NULL))
  649. {
  650. assert(FALSE);
  651. return TRUE;
  652. }
  653. // Make sure that the destination string is big enough to handle to source string
  654. if (ulDestEChars < ulSourceSize)
  655. {
  656. assert(FALSE);
  657. return 1;
  658. }
  659. #if defined(CHAR_MIXED)
  660. // because we do a strcpy in the NARROW version of this function,
  661. // and we want the behavior to be the save between the two. We
  662. // clear out the string, just like strcpy does
  663. memset(echrDest, 0, ulDestSize);
  664. for (ulLoop = 0; ulLoop < ulSourceSize; ulLoop++)
  665. {
  666. if ((IsDBCSLeadByte(tchrSource[ulLoop])) && (!fLeadByteFound))
  667. // If we found a lead byte, and the last one was not a lead
  668. // byte. We load the byte into the top half of the ECHAR
  669. {
  670. echrDest[ulDestCount] = (tchrSource[ulLoop] & 0x00FF);
  671. echrDest[ulDestCount] = (ECHAR)(echrDest[ulDestCount] << 8);
  672. fLeadByteFound = TRUE;
  673. }
  674. else if (fLeadByteFound)
  675. {
  676. // If the last byte was a lead byte, we or it into the
  677. // bottom half of the ECHAR
  678. echrDest[ulDestCount] |= (tchrSource[ulLoop] & 0x00FF);
  679. fLeadByteFound = FALSE;
  680. ulDestCount++;
  681. }
  682. else
  683. {
  684. // Otherwise we load the byte into the bottom half of the
  685. // ECHAR and clear the top half.
  686. echrDest[ulDestCount] = (tchrSource[ulLoop] & 0x00FF);
  687. ulDestCount++;
  688. }
  689. }
  690. #else
  691. // ECHAR is only a byte, so do a straight string copy.
  692. if (ulSourceSize)
  693. MemCopy(echrDest, tchrSource, ulSourceSize);
  694. #endif
  695. return 0;
  696. }
  697. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  698. * FUNCTION:
  699. * CnvrtECHARtoMBCS
  700. *
  701. * DESCRIPTION:
  702. * Converts an ECHAR (double byte) string into a DBCS (mixed byte) string.
  703. *
  704. * PARAMETERS:
  705. * echrSource - Source String
  706. * ulDestSize - Length of Destination String in Bytes
  707. * tchrDest - Destination String
  708. *
  709. * RETURNS:
  710. * Number of bytes in the converted string
  711. * 1 - Error
  712. */
  713. int CnvrtECHARtoMBCS(TCHAR * tchrDest, const unsigned long ulDestSize, const ECHAR * const echrSource, const unsigned long ulSourceSize)
  714. {
  715. ULONG ulLoop = 0;
  716. ULONG ulDestCount = 0;
  717. ULONG ulSourceEChars = ulSourceSize / sizeof(ECHAR);
  718. #if defined(INCL_VTUTF8)
  719. extern BOOL DoUTF8;
  720. #endif
  721. if ((tchrDest == NULL) || (echrSource == NULL))
  722. {
  723. assert(FALSE);
  724. return TRUE;
  725. }
  726. #if defined(CHAR_MIXED)
  727. // because we do a strcpy in the NARROW version of this function,
  728. // and we want the behavior to be the save between the two. We
  729. // clear out the string, just like strcpy does
  730. memset(tchrDest, 0, ulDestSize);
  731. // We can't do a strlen of an ECHAR string, so we loop
  732. // until we hit NULL or we are over the size of the destination.
  733. while ((ulLoop < ulSourceEChars) && (ulDestCount <= ulDestSize))
  734. {
  735. if (echrSource[ulLoop] & 0xFF00)
  736. // Lead byte in this character, load the lead byte into one
  737. // TCHAR and the lower byte into a second TCHAR.
  738. {
  739. tchrDest[ulDestCount] = (TCHAR)((echrSource[ulLoop] & 0xFF00) >> 8);
  740. ulDestCount++;
  741. tchrDest[ulDestCount] = (TCHAR)(echrSource[ulLoop] & 0x00FF);
  742. }
  743. else
  744. // No lead byte in this ECHAR, just load the lower half into
  745. // the TCHAR.
  746. {
  747. tchrDest[ulDestCount] = (TCHAR)(echrSource[ulLoop] & 0x00FF);
  748. }
  749. ulDestCount++;
  750. ulLoop++;
  751. if(ulDestCount > ulDestSize)
  752. assert(FALSE);
  753. }
  754. return ulDestCount;
  755. #else
  756. // ECHAR is only a byte, so do a straight string copy.
  757. if (ulSourceSize)
  758. MemCopy(tchrDest, echrSource, ulSourceSize);
  759. return ulSourceSize;
  760. #endif
  761. }
  762. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  763. * FUNCTION:
  764. * StrCharGetEcharLen
  765. *
  766. * DESCRIPTION:
  767. *
  768. * PARAMETERS:
  769. *
  770. * RETURNS:
  771. */
  772. int StrCharGetEcharLen(const ECHAR * const pszA)
  773. {
  774. int nReturn = 0;
  775. if (pszA == NULL)
  776. {
  777. assert(FALSE);
  778. return nReturn;
  779. }
  780. #if defined(CHAR_MIXED)
  781. while (pszA[nReturn] != ETEXT('\0'))
  782. {
  783. nReturn++;
  784. }
  785. #else
  786. nReturn = strlen(pszA);
  787. #endif
  788. return nReturn;
  789. }
  790. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  791. * FUNCTION:
  792. * StrCharGetEcharByteCount
  793. *
  794. * DESCRIPTION:
  795. *
  796. * PARAMETERS:
  797. *
  798. * RETURNS:
  799. */
  800. int StrCharGetEcharByteCount(const ECHAR * const pszA)
  801. {
  802. #if defined(CHAR_MIXED)
  803. int nLoop = 0;
  804. #endif
  805. if (pszA == NULL)
  806. {
  807. assert(FALSE);
  808. return 0;
  809. }
  810. #if defined(CHAR_MIXED)
  811. while (pszA[nLoop] != 0)
  812. {
  813. nLoop++;
  814. }
  815. nLoop *= (int)sizeof(ECHAR);
  816. return nLoop;
  817. #else
  818. return (int)strlen(pszA);
  819. #endif
  820. }
  821. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
  822. * FUNCTION:
  823. * StrCharCmpEtoT
  824. *
  825. * DESCRIPTION:
  826. *
  827. * PARAMETERS:
  828. *
  829. * RETURNS:
  830. */
  831. int StrCharCmpEtoT(const ECHAR * const pszA, const TCHAR * const pszB)
  832. {
  833. #if defined(CHAR_MIXED)
  834. TCHAR *tpszA = NULL;
  835. int nLenA = StrCharGetEcharLen(pszA);
  836. tpszA = (TCHAR *)malloc((unsigned int)nLenA * sizeof(ECHAR));
  837. if (tpszA == NULL)
  838. {
  839. assert(FALSE);
  840. return 0;
  841. }
  842. CnvrtECHARtoMBCS(tpszA, (unsigned long)(nLenA * (int)sizeof(ECHAR)), pszA, StrCharGetEcharByteCount(pszA));
  843. return StrCharCmp(tpszA, pszB);
  844. #else
  845. return strcmp(pszA, pszB);
  846. #endif
  847. }
  848. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
  849. * FUNCTION:
  850. * ECHAR_Fill
  851. *
  852. * DESCRIPTION:
  853. * Fills a ECHAR string with the specified ECHAR.
  854. *
  855. * ARGUMENTS:
  856. * dest - string to fill.
  857. * c - character to fill string with.
  858. * size_t - number of ECHAR units to copy.
  859. *
  860. * RETURNS:
  861. * pointer to string.
  862. *
  863. */
  864. ECHAR *ECHAR_Fill(ECHAR *dest, ECHAR c, size_t count)
  865. {
  866. #if defined(CHAR_NARROW)
  867. return (TCHAR *)memset(dest, c, count);
  868. #else
  869. unsigned int i;
  870. if (dest == NULL)
  871. {
  872. assert(FALSE);
  873. return 0;
  874. }
  875. for (i = 0 ; i < count ; ++i)
  876. dest[i] = c;
  877. return dest;
  878. #endif
  879. }
  880. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
  881. * FUNCTION:
  882. * ECHAR_Fill
  883. *
  884. * DESCRIPTION:
  885. *
  886. *
  887. * ARGUMENTS:
  888. * pszDest - string to fill.
  889. * cchDest - number of TCHAR units to copy.
  890. * eChar - character to fill string with.
  891. *
  892. * RETURNS:
  893. * pointer to string.
  894. *
  895. */
  896. int CnvrtECHARtoTCHAR(LPTSTR pszDest, int cchDest, ECHAR eChar)
  897. {
  898. #if defined(CHAR_NARROW)
  899. pszDest[0] = eChar;
  900. pszDest[1] = ETEXT('\0');
  901. #else
  902. memset(pszDest, 0, cchDest*sizeof(*pszDest));
  903. // This is the only place where we convert a single ECHAR to TCHAR's
  904. // so as of right now we will not make this into a function.
  905. if (eChar & 0xFF00)
  906. // Lead byte in this character, load the lead byte into one
  907. // TCHAR and the lower byte into a second TCHAR.
  908. {
  909. if (cchDest >= 2)
  910. {
  911. pszDest[0] = (TCHAR)((eChar & 0xFF00) >> 8);
  912. pszDest[1] = (TCHAR)(eChar & 0x00FF);
  913. }
  914. else
  915. {
  916. return 1;
  917. }
  918. }
  919. else
  920. // No lead byte in this ECHAR, just load the lower half into
  921. // the TCHAR.
  922. {
  923. pszDest[0] = (TCHAR)(eChar & 0x00FF);
  924. }
  925. #endif
  926. return 0;
  927. }
  928. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
  929. * FUNCTION:
  930. * isDBCSChar
  931. *
  932. * DESCRIPTION:
  933. * Determines if the Character is Double Byte or not
  934. *
  935. *
  936. * ARGUMENTS:
  937. * c - character to test.
  938. *
  939. * RETURNS:
  940. * int TRUE - if DBCS
  941. * FALSE - if SBCS
  942. *
  943. */
  944. int isDBCSChar(unsigned int Char)
  945. {
  946. int rtn = 0;
  947. #if defined(CHAR_NARROW)
  948. rtn = 0;
  949. #else
  950. ECHAR ech = 0;
  951. char ch;
  952. if (Char == 0)
  953. {
  954. // assert(FALSE);
  955. return FALSE;
  956. }
  957. ech = ETEXT(Char);
  958. if (ech & 0xFF00)
  959. {
  960. ch = (char)(ech >> 8);
  961. if (IsDBCSLeadByte(ch))
  962. {
  963. rtn = 1;
  964. }
  965. }
  966. #endif
  967. return rtn;
  968. }
  969. /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
  970. * FUNCTION:
  971. * StrCharStripDBCSString
  972. *
  973. * DESCRIPTION:
  974. * Strips out Left/Right pairs of wide characters and leaves a single wide character
  975. * in it's place
  976. *
  977. *
  978. * ARGUMENTS:
  979. * aech - String to be stripped
  980. *
  981. * RETURNS:
  982. * int - number of characters striped out of string
  983. *
  984. */
  985. int StrCharStripDBCSString(ECHAR *aechDest, const long lDestSize,
  986. ECHAR *aechSource)
  987. {
  988. int nCount = 0;
  989. #if !defined(CHAR_NARROW)
  990. ECHAR *pechTmpS;
  991. ECHAR *pechTmpD;
  992. long j;
  993. long lDLen = lDestSize / sizeof(ECHAR);;
  994. if ((aechSource == NULL) || (aechDest == NULL))
  995. {
  996. assert(FALSE);
  997. return nCount;
  998. }
  999. pechTmpS = aechSource;
  1000. pechTmpD = aechDest;
  1001. for (j = 0; (*pechTmpS != '\0') && (j < lDLen); j++)
  1002. {
  1003. *pechTmpD = *pechTmpS;
  1004. if ((isDBCSChar(*pechTmpS)) && (*(pechTmpS + 1) != '\0'))
  1005. {
  1006. if (*pechTmpS == *(pechTmpS + 1))
  1007. {
  1008. pechTmpS++;
  1009. nCount++;
  1010. }
  1011. }
  1012. pechTmpS++;
  1013. pechTmpD++;
  1014. }
  1015. *pechTmpD = ETEXT('\0');
  1016. #endif
  1017. return nCount;
  1018. }
  1019. #if defined(DEADWOOD)
  1020. #if defined(INCL_VTUTF8)
  1021. //******************************************************************************
  1022. // Function: TranslateUTF8ToDBCS
  1023. //
  1024. // Description:
  1025. // This function will convert a UTF-8 character to a DBCS character. If the
  1026. // character passed is not a full description of a UTF-8 character, then the
  1027. // character is appended to the UNICODE buffer.
  1028. // Arguments:
  1029. // IncomingByte
  1030. // pUTF8Buffer
  1031. // iUTF8BufferLength
  1032. // pUNICODEBuffer
  1033. // iUNICODEBufferLength
  1034. // pDBCSBuffer
  1035. // iDBCSBufferLength
  1036. //
  1037. // Returns:
  1038. //
  1039. // Throws:
  1040. //
  1041. // Author: Ron E. Vorndam, 03/06/2001
  1042. //
  1043. BOOLEAN TranslateUTF8ToDBCS(UCHAR IncomingByte,
  1044. UCHAR *pUTF8Buffer,
  1045. int iUTF8BufferLength,
  1046. WCHAR *pUNICODEBuffer,
  1047. int iUNICODEBufferLength,
  1048. TCHAR *pDBCSBuffer,
  1049. int iDBCSBufferLength)
  1050. {
  1051. BOOLEAN bReturn = FALSE;
  1052. int iLength = 0;
  1053. if (pUTF8Buffer != NULL && iUTF8BufferLength > 0 &&
  1054. pUNICODEBuffer != NULL && iUNICODEBufferLength > 0 &&
  1055. pDBCSBuffer != NULL && iDBCSBufferLength > 0)
  1056. {
  1057. //
  1058. // Translate from UTF8 to UNICODE.
  1059. //
  1060. if (TranslateUtf8ToUnicode(IncomingByte,
  1061. pUTF8Buffer,
  1062. pUNICODEBuffer) == TRUE)
  1063. {
  1064. //
  1065. // Now Translate the UNICODE to DBCS characters.
  1066. //
  1067. iLength = WideCharToMultiByte(CP_OEMCP,
  1068. //iLength = WideCharToMultiByte(CP_ACP,
  1069. 0, //WC_COMPOSITECHECK | WC_SEPCHARS,
  1070. pUNICODEBuffer, -1,
  1071. NULL, 0, NULL, NULL );
  1072. if (iLength > 0 && iDBCSBufferLength >= iLength)
  1073. {
  1074. WideCharToMultiByte(CP_OEMCP,
  1075. //WideCharToMultiByte(CP_ACP,
  1076. 0, //WC_COMPOSITECHECK | WC_SEPCHARS,
  1077. pUNICODEBuffer, -1,
  1078. pDBCSBuffer, iLength, NULL, NULL);
  1079. if (iLength > 0)
  1080. {
  1081. bReturn = TRUE;
  1082. }
  1083. }
  1084. else
  1085. {
  1086. //
  1087. // Return an error and report the number of bytes required to
  1088. // make the data conversion.
  1089. //
  1090. iDBCSBufferLength = iLength * -1;
  1091. }
  1092. }
  1093. }
  1094. return bReturn;
  1095. }
  1096. BOOLEAN TranslateDBCSToUTF8(const TCHAR *pDBCSBuffer,
  1097. int iDBCSBufferLength,
  1098. WCHAR *pUNICODEBuffer,
  1099. int iUNICODEBufferLength,
  1100. UCHAR *pUTF8Buffer,
  1101. int iUTF8BufferLength)
  1102. {
  1103. BOOLEAN bReturn = FALSE;
  1104. int iLength = 0;
  1105. //iLength = WideCharToMultiByte(CP_OEMCP,
  1106. iLength = MultiByteToWideChar(CP_ACP,
  1107. 0, //WC_COMPOSITECHECK | WC_SEPCHARS,
  1108. pDBCSBuffer, -1,
  1109. pUNICODEBuffer, iLength);
  1110. if (iLength > 0 && iDBCSBufferLength > 0)
  1111. {
  1112. if (pUNICODEBuffer != NULL && iUNICODEBufferLength >= iLength)
  1113. {
  1114. //
  1115. // Translate the DBCS to UNICODE characters.
  1116. //
  1117. //WideCharToMultiByte(CP_OEMCP,
  1118. MultiByteToWideChar(CP_ACP,
  1119. 0, //MB_COMPOSITE,
  1120. pDBCSBuffer, -1,
  1121. pUNICODEBuffer, iLength);
  1122. if (iLength > 0 && iLength <= iUTF8BufferLength)
  1123. {
  1124. //
  1125. // Translate from UNICODE to UTF8.
  1126. //
  1127. bReturn = TranslateUnicodeToUtf8(pUNICODEBuffer,
  1128. pUTF8Buffer);
  1129. }
  1130. }
  1131. }
  1132. return bReturn;
  1133. }
  1134. //
  1135. // The following functions are from code obtained directly from
  1136. // Microsoft for converting Unicode to UTF-8 and UTF-8 to unicode
  1137. // buffers. REV: 03/02/2001
  1138. //
  1139. BOOLEAN TranslateUnicodeToUtf8(PCWSTR SourceBuffer,
  1140. UCHAR *DestinationBuffer)
  1141. /*++
  1142. Routine Description:
  1143. translates a unicode buffer into a UTF8 version.
  1144. Arguments:
  1145. SourceBuffer - unicode buffer to be translated.
  1146. DestinationBuffer - receives UTF8 version of same buffer.
  1147. Return Value:
  1148. TRUE - We successfully translated the Unicode value into its
  1149. corresponding UTF8 encoding.
  1150. FALSE - The translation failed.
  1151. --*/
  1152. {
  1153. ULONG Count = 0;
  1154. //
  1155. // convert into UTF8 for actual transmission
  1156. //
  1157. // UTF-8 encodes 2-byte Unicode characters as follows:
  1158. // If the first nine bits are zero (00000000 0xxxxxxx), encode it as one byte 0xxxxxxx
  1159. // If the first five bits are zero (00000yyy yyxxxxxx), encode it as two bytes 110yyyyy 10xxxxxx
  1160. // Otherwise (zzzzyyyy yyxxxxxx), encode it as three bytes 1110zzzz 10yyyyyy 10xxxxxx
  1161. //
  1162. DestinationBuffer[Count] = (UCHAR)'\0';
  1163. while (*SourceBuffer) {
  1164. if( (*SourceBuffer & 0xFF80) == 0 ) {
  1165. //
  1166. // if the top 9 bits are zero, then just
  1167. // encode as 1 byte. (ASCII passes through unchanged).
  1168. //
  1169. DestinationBuffer[Count++] = (UCHAR)(*SourceBuffer & 0x7F);
  1170. } else if( (*SourceBuffer & 0xF800) == 0 ) {
  1171. //
  1172. // if the top 5 bits are zero, then encode as 2 bytes
  1173. //
  1174. DestinationBuffer[Count++] = (UCHAR)((*SourceBuffer >> 6) & 0x1F) | 0xC0;
  1175. DestinationBuffer[Count++] = (UCHAR)(*SourceBuffer & 0xBF) | 0x80;
  1176. } else {
  1177. //
  1178. // encode as 3 bytes
  1179. //
  1180. DestinationBuffer[Count++] = (UCHAR)((*SourceBuffer >> 12) & 0xF) | 0xE0;
  1181. DestinationBuffer[Count++] = (UCHAR)((*SourceBuffer >> 6) & 0x3F) | 0x80;
  1182. DestinationBuffer[Count++] = (UCHAR)(*SourceBuffer & 0xBF) | 0x80;
  1183. }
  1184. SourceBuffer += 1;
  1185. }
  1186. DestinationBuffer[Count] = (UCHAR)'\0';
  1187. return(TRUE);
  1188. }
  1189. BOOLEAN TranslateUtf8ToUnicode(UCHAR IncomingByte,
  1190. UCHAR *ExistingUtf8Buffer,
  1191. WCHAR *DestinationUnicodeVal)
  1192. /*++
  1193. Routine Description:
  1194. Takes IncomingByte and concatenates it onto ExistingUtf8Buffer.
  1195. Then attempts to decode the new contents of ExistingUtf8Buffer.
  1196. Arguments:
  1197. IncomingByte - New character to be appended onto
  1198. ExistingUtf8Buffer.
  1199. ExistingUtf8Buffer - running buffer containing incomplete UTF8
  1200. encoded unicode value. When it gets full,
  1201. we'll decode the value and return the
  1202. corresponding Unicode value.
  1203. Note that if we *do* detect a completed UTF8
  1204. buffer and actually do a decode and return a
  1205. Unicode value, then we will zero-fill the
  1206. contents of ExistingUtf8Buffer.
  1207. DestinationUnicodeVal - receives Unicode version of the UTF8 buffer.
  1208. Note that if we do *not* detect a completed
  1209. UTF8 buffer and thus can not return any data
  1210. in DestinationUnicodeValue, then we will
  1211. zero-fill the contents of DestinationUnicodeVal.
  1212. Return Value:
  1213. TRUE - We received a terminating character for our UTF8 buffer and will
  1214. return a decoded Unicode value in DestinationUnicode.
  1215. FALSE - We haven't yet received a terminating character for our UTF8
  1216. buffer.
  1217. --*/
  1218. {
  1219. // ULONG Count = 0;
  1220. ULONG i = 0;
  1221. BOOLEAN ReturnValue = FALSE;
  1222. //
  1223. // Insert our byte into ExistingUtf8Buffer.
  1224. //
  1225. i = 0;
  1226. do {
  1227. if( ExistingUtf8Buffer[i] == 0 ) {
  1228. ExistingUtf8Buffer[i] = IncomingByte;
  1229. break;
  1230. }
  1231. i++;
  1232. } while( i < 3 );
  1233. //
  1234. // If we didn't get to actually insert our IncomingByte,
  1235. // then someone sent us a fully-qualified UTF8 buffer.
  1236. // This means we're about to drop IncomingByte.
  1237. //
  1238. // Drop the zero-th byte, shift everything over by one
  1239. // and insert our new character.
  1240. //
  1241. // This implies that we should *never* need to zero out
  1242. // the contents of ExistingUtf8Buffer unless we detect
  1243. // a completed UTF8 packet. Otherwise, assume one of
  1244. // these cases:
  1245. // 1. We started listening mid-stream, so we caught the
  1246. // last half of a UTF8 packet. In this case, we'll
  1247. // end up shifting the contents of ExistingUtf8Buffer
  1248. // until we detect a proper UTF8 start byte in the zero-th
  1249. // position.
  1250. // 2. We got some garbage character, which would invalidate
  1251. // a UTF8 packet. By using the logic below, we would
  1252. // end up disregarding that packet and waiting for
  1253. // the next UTF8 packet to come in.
  1254. if( i >= 3 ) {
  1255. ExistingUtf8Buffer[0] = ExistingUtf8Buffer[1];
  1256. ExistingUtf8Buffer[1] = ExistingUtf8Buffer[2];
  1257. ExistingUtf8Buffer[2] = IncomingByte;
  1258. }
  1259. //
  1260. // Attempt to convert the UTF8 buffer
  1261. //
  1262. // UTF8 decodes to Unicode in the following fashion:
  1263. // If the high-order bit is 0 in the first byte:
  1264. // 0xxxxxxx yyyyyyyy zzzzzzzz decodes to a Unicode value of 00000000 0xxxxxxx
  1265. //
  1266. // If the high-order 3 bits in the first byte == 6:
  1267. // 110xxxxx 10yyyyyy zzzzzzzz decodes to a Unicode value of 00000xxx xxyyyyyy
  1268. //
  1269. // If the high-order 3 bits in the first byte == 7:
  1270. // 1110xxxx 10yyyyyy 10zzzzzz decodes to a Unicode value of xxxxyyyy yyzzzzzz
  1271. //
  1272. if( (ExistingUtf8Buffer[0] & 0x80) == 0 ) {
  1273. //
  1274. // First case described above. Just return the first byte
  1275. // of our UTF8 buffer.
  1276. //
  1277. *DestinationUnicodeVal = (WCHAR)(ExistingUtf8Buffer[0]);
  1278. //
  1279. // We used 1 byte. Discard that byte and shift everything
  1280. // in our buffer over by 1.
  1281. //
  1282. ExistingUtf8Buffer[0] = ExistingUtf8Buffer[1];
  1283. ExistingUtf8Buffer[1] = ExistingUtf8Buffer[2];
  1284. ExistingUtf8Buffer[2] = 0;
  1285. ReturnValue = TRUE;
  1286. } else if( (ExistingUtf8Buffer[0] & 0xE0) == 0xC0 ) {
  1287. //
  1288. // Second case described above. Decode the first 2 bytes of
  1289. // of our UTF8 buffer.
  1290. //
  1291. if( (ExistingUtf8Buffer[1] & 0xC0) == 0x80 ) {
  1292. // upper byte: 00000xxx
  1293. *DestinationUnicodeVal = ((ExistingUtf8Buffer[0] >> 2) & 0x07);
  1294. *DestinationUnicodeVal = *DestinationUnicodeVal << 8;
  1295. // high bits of lower byte: xx000000
  1296. *DestinationUnicodeVal |= ((ExistingUtf8Buffer[0] & 0x03) << 6);
  1297. // low bits of lower byte: 00yyyyyy
  1298. *DestinationUnicodeVal |= (ExistingUtf8Buffer[1] & 0x3F);
  1299. //
  1300. // We used 2 bytes. Discard those bytes and shift everything
  1301. // in our buffer over by 2.
  1302. //
  1303. ExistingUtf8Buffer[0] = ExistingUtf8Buffer[2];
  1304. ExistingUtf8Buffer[1] = 0;
  1305. ExistingUtf8Buffer[2] = 0;
  1306. ReturnValue = TRUE;
  1307. }
  1308. } else if( (ExistingUtf8Buffer[0] & 0xF0) == 0xE0 ) {
  1309. //
  1310. // Third case described above. Decode the all 3 bytes of
  1311. // of our UTF8 buffer.
  1312. //
  1313. if( (ExistingUtf8Buffer[1] & 0xC0) == 0x80 ) {
  1314. if( (ExistingUtf8Buffer[2] & 0xC0) == 0x80 ) {
  1315. // upper byte: xxxx0000
  1316. *DestinationUnicodeVal = ((ExistingUtf8Buffer[0] << 4) & 0xF0);
  1317. // upper byte: 0000yyyy
  1318. *DestinationUnicodeVal |= ((ExistingUtf8Buffer[1] >> 2) & 0x0F);
  1319. *DestinationUnicodeVal = *DestinationUnicodeVal << 8;
  1320. // lower byte: yy000000
  1321. *DestinationUnicodeVal |= ((ExistingUtf8Buffer[1] << 6) & 0xC0);
  1322. // lower byte: 00zzzzzz
  1323. *DestinationUnicodeVal |= (ExistingUtf8Buffer[2] & 0x3F);
  1324. //
  1325. // We used all 3 bytes. Zero out the buffer.
  1326. //
  1327. ExistingUtf8Buffer[0] = 0;
  1328. ExistingUtf8Buffer[1] = 0;
  1329. ExistingUtf8Buffer[2] = 0;
  1330. ReturnValue = TRUE;
  1331. }
  1332. }
  1333. }
  1334. return ReturnValue;
  1335. }
  1336. #endif //INCL_VTUTF8
  1337. #endif // defined(DEADWOOD)