Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1251 lines
29 KiB

  1. // This is a part of the Active Template Library.
  2. // Copyright (C) 1996-2001 Microsoft Corporation
  3. // All rights reserved.
  4. //
  5. // This source code is only intended as a supplement to the
  6. // Active Template Library Reference and related
  7. // electronic documentation provided with the library.
  8. // See these sources for detailed information regarding the
  9. // Active Template Library product.
  10. #ifndef __ATLENC_H__
  11. #define __ATLENC_H__
  12. #pragma once
  13. #include <atlbase.h>
  14. #include <stdio.h>
  15. namespace ATL {
  16. //Not including CRLFs
  17. //NOTE: For BASE64 and UUENCODE, this actually
  18. //represents the amount of unencoded characters
  19. //per line
  20. #define ATLSMTP_MAX_QP_LINE_LENGTH 76
  21. #define ATLSMTP_MAX_BASE64_LINE_LENGTH 57
  22. #define ATLSMTP_MAX_UUENCODE_LINE_LENGTH 45
  23. //=======================================================================
  24. // Base64Encode/Base64Decode
  25. // compliant with RFC 2045
  26. //=======================================================================
  27. //
  28. #define ATL_BASE64_FLAG_NONE 0
  29. #define ATL_BASE64_FLAG_NOPAD 1
  30. #define ATL_BASE64_FLAG_NOCRLF 2
  31. inline int Base64EncodeGetRequiredLength(int nSrcLen, DWORD dwFlags=ATL_BASE64_FLAG_NONE) throw()
  32. {
  33. int nRet = nSrcLen*4/3;
  34. if ((dwFlags & ATL_BASE64_FLAG_NOPAD) == 0)
  35. nRet += nSrcLen % 3;
  36. int nCRLFs = nRet / 76;
  37. int nOnLastLine = nRet % 76;
  38. if (nOnLastLine)
  39. {
  40. nCRLFs++;
  41. if (nOnLastLine % 4)
  42. nRet += 4-(nOnLastLine % 4);
  43. }
  44. nCRLFs *= 2;
  45. if ((dwFlags & ATL_BASE64_FLAG_NOCRLF) == 0)
  46. nRet += nCRLFs;
  47. return nRet+1;
  48. }
  49. inline int Base64DecodeGetRequiredLength(int nSrcLen) throw()
  50. {
  51. return nSrcLen;
  52. }
  53. inline BOOL Base64Encode(
  54. const BYTE *pbSrcData,
  55. int nSrcLen,
  56. LPSTR szDest,
  57. int *pnDestLen,
  58. DWORD dwFlags=ATL_BASE64_FLAG_NONE) throw()
  59. {
  60. static const char s_chBase64EncodingTable[64] = {
  61. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q',
  62. 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
  63. 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y',
  64. 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' };
  65. if (!pbSrcData || !szDest || !pnDestLen)
  66. {
  67. return FALSE;
  68. }
  69. ATLASSERT(*pnDestLen >= Base64EncodeGetRequiredLength(nSrcLen, dwFlags));
  70. int nWritten( 0 );
  71. int nLen1( (nSrcLen/3)*4 );
  72. int nLen2( nLen1/76 );
  73. int nLen3( 19 );
  74. for (int i=0; i<=nLen2; i++)
  75. {
  76. if (i==nLen2)
  77. nLen3 = (nLen1%76)/4;
  78. for (int j=0; j<nLen3; j++)
  79. {
  80. DWORD dwCurr(0);
  81. for (int n=0; n<3; n++)
  82. {
  83. dwCurr |= *pbSrcData++;
  84. dwCurr <<= 8;
  85. }
  86. for (int k=0; k<4; k++)
  87. {
  88. BYTE b = (BYTE)(dwCurr>>26);
  89. *szDest++ = s_chBase64EncodingTable[b];
  90. dwCurr <<= 6;
  91. }
  92. }
  93. nWritten+= nLen3*4;
  94. if ((dwFlags & ATL_BASE64_FLAG_NOCRLF)==0)
  95. {
  96. *szDest++ = '\r';
  97. *szDest++ = '\n';
  98. nWritten+= 2;
  99. }
  100. }
  101. nLen2 = nSrcLen%3 ? nSrcLen%3 + 1 : 0;
  102. if (nLen2)
  103. {
  104. if ((dwFlags & ATL_BASE64_FLAG_NOCRLF)==0)
  105. {
  106. szDest-= 2;
  107. nWritten-= 2;
  108. }
  109. DWORD dwCurr(0);
  110. for (int n=0; n<3; n++)
  111. {
  112. if (n<(nSrcLen%3))
  113. dwCurr |= *pbSrcData++;
  114. dwCurr <<= 8;
  115. }
  116. for (int k=0; k<nLen2; k++)
  117. {
  118. BYTE b = (BYTE)(dwCurr>>26);
  119. *szDest++ = s_chBase64EncodingTable[b];
  120. dwCurr <<= 6;
  121. }
  122. nWritten+= nLen2;
  123. if ((dwFlags & ATL_BASE64_FLAG_NOPAD)==0)
  124. {
  125. nLen3 = nLen2 ? 4-nLen2 : 0;
  126. for (int j=0; j<nLen3; j++)
  127. {
  128. *szDest++ = '=';
  129. }
  130. nWritten+= nLen3;
  131. }
  132. if ((dwFlags & ATL_BASE64_FLAG_NOCRLF)==0)
  133. {
  134. *szDest++ = '\r';
  135. *szDest++ = '\n';
  136. nWritten+= 2;
  137. }
  138. }
  139. *pnDestLen = nWritten;
  140. return TRUE;
  141. }
  142. inline int DecodeBase64Char(unsigned int ch) throw()
  143. {
  144. // returns -1 if the character is invalid
  145. // or should be skipped
  146. // otherwise, returns the 6-bit code for the character
  147. // from the encoding table
  148. if (ch >= 'A' && ch <= 'Z')
  149. return ch - 'A' + 0; // 0 range starts at 'A'
  150. if (ch >= 'a' && ch <= 'z')
  151. return ch - 'a' + 26; // 26 range starts at 'a'
  152. if (ch >= '0' && ch <= '9')
  153. return ch - '0' + 52; // 52 range starts at '0'
  154. if (ch == '+')
  155. return 62;
  156. if (ch == '/')
  157. return 63;
  158. return -1;
  159. }
  160. inline BOOL Base64Decode(LPCSTR szSrc, int nSrcLen, BYTE *pbDest, int *pnDestLen) throw()
  161. {
  162. // walk the source buffer
  163. // each four character sequence is converted to 3 bytes
  164. // CRLFs and =, and any characters not in the encoding table
  165. // are skiped
  166. if (!szSrc || !pbDest || !pnDestLen)
  167. {
  168. return FALSE;
  169. }
  170. LPCSTR szSrcEnd = szSrc + nSrcLen;
  171. int nWritten = 0;
  172. while (szSrc < szSrcEnd)
  173. {
  174. DWORD dwCurr = 0;
  175. int i;
  176. int nBits = 0;
  177. for (i=0; i<4; i++)
  178. {
  179. if (szSrc >= szSrcEnd)
  180. break;
  181. int nCh = DecodeBase64Char(*szSrc);
  182. szSrc++;
  183. if (nCh == -1)
  184. {
  185. // skip this char
  186. i--;
  187. continue;
  188. }
  189. dwCurr <<= 6;
  190. dwCurr |= nCh;
  191. nBits += 6;
  192. }
  193. // dwCurr has the 3 bytes to write to the output buffer
  194. // left to right
  195. dwCurr <<= 24-nBits;
  196. for (i=0; i<nBits/8; i++)
  197. {
  198. *pbDest = (BYTE) ((dwCurr & 0x00ff0000) >> 16);
  199. dwCurr <<= 8;
  200. pbDest++;
  201. nWritten++;
  202. }
  203. }
  204. *pnDestLen = nWritten;
  205. return TRUE;
  206. }
  207. //=======================================================================
  208. // UUEncode/UUDecode
  209. // compliant with POSIX P1003.2b/D11
  210. //=======================================================================
  211. //
  212. //Flag to determine whether or not we should encode the header
  213. #define ATLSMTP_UUENCODE_HEADER 1
  214. //Flag to determine whether or not we should encode the end
  215. #define ATLSMTP_UUENCODE_END 2
  216. //Flag to determine whether or not we should do data stuffing
  217. #define ATLSMTP_UUENCODE_DOT 4
  218. //The the (rough) required length of the uuencoded stream based
  219. //on input of length nSrcLen
  220. inline int UUEncodeGetRequiredLength(int nSrcLen) throw()
  221. {
  222. int nRet = nSrcLen*4/3;
  223. nRet += 3*(nSrcLen/ATLSMTP_MAX_UUENCODE_LINE_LENGTH);
  224. nRet += 12+_MAX_FNAME;
  225. nRet += 8;
  226. return nRet;
  227. }
  228. //Get the decode required length
  229. inline int UUDecodeGetRequiredLength(int nSrcLen) throw()
  230. {
  231. return nSrcLen;
  232. }
  233. //encode a chunk of data
  234. inline BOOL UUEncode(
  235. const BYTE* pbSrcData,
  236. int nSrcLen,
  237. LPSTR szDest,
  238. int* pnDestLen,
  239. LPCTSTR lpszFile = _T("file"),
  240. DWORD dwFlags = 0) throw()
  241. {
  242. //The UUencode character set
  243. static const char s_chUUEncodeChars[64] = {
  244. '`','!','"','#','$','%','&','\'','(',')','*','+',',',
  245. '-','.','/','0','1','2','3','4','5','6','7','8','9',
  246. ':',';','<','=','>','?','@','A','B','C','D','E','F',
  247. 'G','H','I','J','K','L','M','N','O','P','Q','R','S',
  248. 'T','U','V','W','X','Y','Z','[','\\',']','^','_'
  249. };
  250. if (!pbSrcData || !szDest || !pnDestLen)
  251. {
  252. return FALSE;
  253. }
  254. ATLASSERT(*pnDestLen >= UUEncodeGetRequiredLength(nSrcLen));
  255. BYTE ch1 = 0, ch2 = 0, ch3 = 0;
  256. int nTotal = 0, nCurr = 0, nWritten = 0, nCnt = 0;
  257. //if ATL_UUENCODE_HEADER
  258. //header
  259. if (dwFlags & ATLSMTP_UUENCODE_HEADER)
  260. {
  261. //default permission is 666
  262. nWritten = sprintf(szDest, "begin 666 %s\r\n", (LPCSTR)(CT2CAEX<MAX_PATH+1>( lpszFile )));
  263. szDest += nWritten;
  264. }
  265. //while we haven't reached the end of the data
  266. while (nTotal < nSrcLen)
  267. {
  268. //If the amount of data is greater than MAX_UUENCODE_LINE_LENGTH
  269. //cut off at MAX_UUENCODE_LINE_LENGTH
  270. if (nSrcLen-nTotal >= ATLSMTP_MAX_UUENCODE_LINE_LENGTH)
  271. nCurr = ATLSMTP_MAX_UUENCODE_LINE_LENGTH;
  272. else
  273. nCurr = nSrcLen-nTotal+1;
  274. nCnt = 1;
  275. if (nCurr < ATLSMTP_MAX_UUENCODE_LINE_LENGTH)
  276. *szDest = (char)(nCurr+31);
  277. else
  278. *szDest = (char)(nCurr+32);
  279. nWritten++;
  280. //if we need to stuff an extra dot (e.g. when we are sending via SMTP), do it
  281. if ((dwFlags & ATLSMTP_UUENCODE_DOT) && *szDest == '.')
  282. {
  283. *(++szDest) = '.';
  284. nWritten++;
  285. }
  286. szDest++;
  287. while (nCnt < nCurr)
  288. {
  289. //Set to 0 in the uuencoding alphabet
  290. ch1 = ch2 = ch3 = ' ';
  291. ch1 = *pbSrcData++;
  292. nCnt++;
  293. nTotal++;
  294. if (nTotal < nSrcLen)
  295. {
  296. ch2 = *pbSrcData++;
  297. nCnt++;
  298. nTotal++;
  299. }
  300. if (nTotal < nSrcLen)
  301. {
  302. ch3 = *pbSrcData++;
  303. nCnt++;
  304. nTotal++;
  305. }
  306. //encode the first 6 bits of ch1
  307. *szDest++ = s_chUUEncodeChars[(ch1 >> 2) & 0x3F];
  308. //encode the last 2 bits of ch1 and the first 4 bits of ch2
  309. *szDest++ = s_chUUEncodeChars[((ch1 << 4) & 0x30) | ((ch2 >> 4) & 0x0F)];
  310. //encode the last 4 bits of ch2 and the first 2 bits of ch3
  311. *szDest++ = s_chUUEncodeChars[((ch2 << 2) & 0x3C) | ((ch3 >> 6) & 0x03)];
  312. //encode the last 6 bits of ch3
  313. *szDest++ = s_chUUEncodeChars[ch3 & 0x3F];
  314. nWritten += 4;
  315. }
  316. //output a CRLF
  317. *szDest++ = '\r';
  318. *szDest++ = '\n';
  319. nWritten += 2;
  320. }
  321. //if we need to encode the end, do it
  322. if (dwFlags & ATLSMTP_UUENCODE_END)
  323. {
  324. *szDest++ = '`';
  325. *szDest++ = '\r';
  326. *szDest++ = '\n';
  327. nWritten += 3;
  328. nWritten += sprintf(szDest, "end\r\n");
  329. }
  330. *pnDestLen = nWritten;
  331. return TRUE;
  332. }
  333. inline BOOL UUDecode(
  334. BYTE* pbSrcData,
  335. int nSrcLen,
  336. BYTE* pbDest,
  337. int* pnDestLen,
  338. BYTE* szFileName,
  339. int* pnFileNameLength,
  340. int* pnPermissions,
  341. DWORD dwFlags = 0) throw()
  342. {
  343. if (!pbSrcData || !pbDest || !szFileName ||
  344. !pnFileNameLength || !pnPermissions || !pnDestLen)
  345. {
  346. return FALSE;
  347. }
  348. int i = 0, j = 0;
  349. int nLineLen = 0;
  350. char ch;
  351. int nRead = 0, nWritten = 0;
  352. char tmpBuf[256];
  353. //get the file name
  354. //eat the begin statement
  355. while (*pbSrcData != 'b')
  356. {
  357. ATLASSERT( nRead < nSrcLen );
  358. pbSrcData++;
  359. nRead++;
  360. }
  361. pbSrcData--;
  362. while ((ch = *pbSrcData) != ' ')
  363. {
  364. ATLASSERT( nRead < nSrcLen );
  365. ATLASSERT( i < 256 );
  366. pbSrcData++;
  367. tmpBuf[i++] = ch;
  368. nRead++;
  369. }
  370. nRead++;
  371. //uuencode block must start with a begin
  372. if (strncmp(tmpBuf, "begin", 5))
  373. {
  374. return FALSE;
  375. }
  376. while((ch = *pbSrcData) == ' ')
  377. {
  378. ATLASSERT( nRead < nSrcLen );
  379. pbSrcData++;
  380. nRead++;
  381. }
  382. //get the permissions
  383. i = 0;
  384. pbSrcData--;
  385. while ((ch = *pbSrcData++) != ' ')
  386. {
  387. ATLASSERT( nRead < nSrcLen );
  388. ATLASSERT( i < 256 );
  389. tmpBuf[i++] = ch;
  390. nRead++;
  391. }
  392. *pnPermissions = atoi(tmpBuf);
  393. nRead++;
  394. //get the filename
  395. i = 0;
  396. while (((ch = *pbSrcData++) != '\r') && ch != '\n' && i < *pnFileNameLength)
  397. {
  398. ATLASSERT( nRead < nSrcLen );
  399. *szFileName = ch;
  400. szFileName++;
  401. nRead++;
  402. i++;
  403. }
  404. *pnFileNameLength = i;
  405. nRead++;
  406. char chars[4];
  407. while (nRead < nSrcLen)
  408. {
  409. for (j = 0; j < 4; j++)
  410. {
  411. if (nRead < nSrcLen)
  412. {
  413. chars[j] = *pbSrcData++;
  414. nRead++;
  415. // if the character is a carriage return, skip the next '\n' and continue
  416. if (chars[j] == '\r')
  417. {
  418. nLineLen = 0;
  419. pbSrcData++;
  420. nRead++;
  421. j--;
  422. continue;
  423. }
  424. //if the character is a line-feed, skip it
  425. if (chars[j] == '\n')
  426. {
  427. nLineLen = 0;
  428. j--;
  429. continue;
  430. }
  431. //if we're at the beginning of a line, or it is an invalid character
  432. if (nLineLen == 0 || chars[j] < 31 || chars[j] > 96)
  433. {
  434. //if we're at the 'end'
  435. if (chars[j] == 'e')
  436. {
  437. //set the rest of the array to ' ' and break
  438. for (int k = j; k < 4; k++)
  439. {
  440. chars[k] = ' ';
  441. nWritten--;
  442. }
  443. nWritten++;
  444. nRead = nSrcLen+1;
  445. break;
  446. }
  447. if ((dwFlags & ATLSMTP_UUENCODE_DOT) && nLineLen == 0 && chars[j] == '.')
  448. {
  449. if ((nRead+1) < nSrcLen)
  450. {
  451. pbSrcData++;
  452. chars[j] = *pbSrcData++;
  453. nRead++;
  454. }
  455. else
  456. {
  457. return FALSE;
  458. }
  459. }
  460. else
  461. {
  462. j--;
  463. }
  464. nLineLen++;
  465. continue;
  466. }
  467. }
  468. else
  469. {
  470. chars[j] = ' ';
  471. }
  472. }
  473. if (nWritten < (*pnDestLen-3))
  474. {
  475. //decode the characters
  476. *pbDest++ = (BYTE)((((chars[0] - ' ') & 0x3F) << 2) | (((chars[1] - ' ') & 0x3F) >> 4));
  477. *pbDest++ = (BYTE)((((chars[1] - ' ') & 0x3F) << 4) | (((chars[2] - ' ') & 0x3F) >> 2));
  478. *pbDest++ = (BYTE)((((chars[2] - ' ') & 0x3F) << 6) | ((chars[3] - ' ') & 0x3F));
  479. nWritten += 3;
  480. continue;
  481. }
  482. break;
  483. }
  484. *pnDestLen = nWritten;
  485. return TRUE;
  486. }
  487. //=======================================================================
  488. // Quoted Printable encode/decode
  489. // compliant with RFC 2045
  490. //=======================================================================
  491. //
  492. inline int QPEncodeGetRequiredLength(int nSrcLen) throw()
  493. {
  494. int nRet = 3*((3*nSrcLen)/(ATLSMTP_MAX_QP_LINE_LENGTH-8));
  495. nRet += 3*nSrcLen;
  496. nRet += 3;
  497. return nRet;
  498. }
  499. inline int QPDecodeGetRequiredLength(int nSrcLen) throw()
  500. {
  501. return nSrcLen;
  502. }
  503. #define ATLSMTP_QPENCODE_DOT 1
  504. #define ATLSMTP_QPENCODE_TRAILING_SOFT 2
  505. inline BOOL QPEncode(BYTE* pbSrcData, int nSrcLen, LPSTR szDest, int* pnDestLen, DWORD dwFlags = 0) throw()
  506. {
  507. //The hexadecimal character set
  508. static const char s_chHexChars[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  509. 'A', 'B', 'C', 'D', 'E', 'F'};
  510. if (!pbSrcData || !szDest || !pnDestLen)
  511. {
  512. return FALSE;
  513. }
  514. ATLASSERT(*pnDestLen >= QPEncodeGetRequiredLength(nSrcLen));
  515. int nRead = 0, nWritten = 0, nLineLen = 0;
  516. char ch;
  517. while (nRead < nSrcLen)
  518. {
  519. ch = *pbSrcData++;
  520. nRead++;
  521. if (nLineLen == 0 && ch == '.' && (dwFlags & ATLSMTP_QPENCODE_DOT))
  522. {
  523. *szDest++ = '.';
  524. nWritten++;
  525. nLineLen++;
  526. }
  527. if ((ch > 32 && ch < 61) || (ch > 61 && ch < 127))
  528. {
  529. *szDest++ = ch;
  530. nWritten++;
  531. nLineLen++;
  532. }
  533. else if ((ch == ' ' || ch == '\t') && (nLineLen < (ATLSMTP_MAX_QP_LINE_LENGTH-12)))
  534. {
  535. *szDest++ = ch;
  536. nWritten++;
  537. nLineLen++;
  538. }
  539. else
  540. {
  541. *szDest++ = '=';
  542. *szDest++ = s_chHexChars[(ch >> 4) & 0x0F];
  543. *szDest++ = s_chHexChars[ch & 0x0F];
  544. nWritten += 3;
  545. nLineLen += 3;
  546. }
  547. if (nLineLen >= (ATLSMTP_MAX_QP_LINE_LENGTH-11))
  548. {
  549. *szDest++ = '=';
  550. *szDest++ = '\r';
  551. *szDest++ = '\n';
  552. nLineLen = 0;
  553. nWritten += 3;
  554. }
  555. }
  556. if (dwFlags & ATLSMTP_QPENCODE_TRAILING_SOFT)
  557. {
  558. *szDest++ = '=';
  559. *szDest++ = '\r';
  560. *szDest++ = '\n';
  561. nWritten += 3;
  562. }
  563. *pnDestLen = nWritten;
  564. return TRUE;
  565. }
  566. inline BOOL QPDecode(BYTE* pbSrcData, int nSrcLen, LPSTR szDest, int* pnDestLen, DWORD dwFlags = 0) throw()
  567. {
  568. if (!pbSrcData || !szDest || !pnDestLen)
  569. {
  570. return FALSE;
  571. }
  572. int nRead = 0, nWritten = 0, nLineLen = -1;
  573. char ch;
  574. while (nRead <= nSrcLen)
  575. {
  576. ch = *pbSrcData++;
  577. nRead++;
  578. nLineLen++;
  579. if (ch == '=')
  580. {
  581. //if the next character is a digit or a character, convert
  582. if (nRead < nSrcLen && (isdigit(*pbSrcData) || isalpha(*pbSrcData)))
  583. {
  584. char szBuf[5];
  585. szBuf[0] = *pbSrcData++;
  586. szBuf[1] = *pbSrcData++;
  587. szBuf[2] = '\0';
  588. char* tmp = '\0';
  589. *szDest++ = (BYTE)strtoul(szBuf, &tmp, 16);
  590. nWritten++;
  591. nRead += 2;
  592. continue;
  593. }
  594. //if the next character is a carriage return or line break, eat it
  595. if (nRead < nSrcLen && *pbSrcData == '\r' && (nRead+1 < nSrcLen) && *(pbSrcData+1)=='\n')
  596. {
  597. pbSrcData++;
  598. nRead++;
  599. nLineLen = -1;
  600. continue;
  601. }
  602. return FALSE;
  603. }
  604. if (ch == '\r' || ch == '\n')
  605. {
  606. nLineLen = -1;
  607. continue;
  608. }
  609. if ((dwFlags & ATLSMTP_QPENCODE_DOT) && ch == '.' && nLineLen == 0)
  610. {
  611. continue;
  612. }
  613. *szDest++ = ch;
  614. nWritten++;
  615. }
  616. *pnDestLen = nWritten-1;
  617. return TRUE;
  618. }
  619. //=======================================================================
  620. // Q and B encoding (for encoding MIME header information)
  621. // compliant with RFC 2047
  622. //=======================================================================
  623. inline int IsExtendedChar(char ch) throw()
  624. {
  625. return ((ch > 126 || ch < 32) && ch != '\t' && ch != '\n' && ch != '\r');
  626. }
  627. inline int GetExtendedChars(LPCSTR szSrc, int nSrcLen) throw()
  628. {
  629. ATLASSERT( szSrc );
  630. int nChars(0);
  631. for (int i=0; i<nSrcLen; i++)
  632. {
  633. if (IsExtendedChar(*szSrc++))
  634. nChars++;
  635. }
  636. return nChars;
  637. }
  638. #ifndef ATL_MAX_ENC_CHARSET_LENGTH
  639. #define ATL_MAX_ENC_CHARSET_LENGTH 50
  640. #endif
  641. //Get the required length to hold this encoding based on nSrcLen
  642. inline int QEncodeGetRequiredLength(int nSrcLen, int nCharsetLen) throw()
  643. {
  644. return QPEncodeGetRequiredLength(nSrcLen)+7+nCharsetLen;
  645. }
  646. //QEncode pbSrcData with the charset specified by pszCharSet
  647. inline BOOL QEncode(
  648. BYTE* pbSrcData,
  649. int nSrcLen,
  650. LPSTR szDest,
  651. int* pnDestLen,
  652. LPCSTR pszCharSet,
  653. int* pnNumEncoded = NULL) throw()
  654. {
  655. //The hexadecimal character set
  656. static const char s_chHexChars[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  657. 'A', 'B', 'C', 'D', 'E', 'F'};
  658. if (!pbSrcData || !szDest || !pszCharSet || !pnDestLen)
  659. {
  660. return FALSE;
  661. }
  662. ATLASSERT(*pnDestLen >= QEncodeGetRequiredLength(nSrcLen, ATL_MAX_ENC_CHARSET_LENGTH));
  663. int nRead = 0, nWritten = 0, nEncCnt = 0;
  664. char ch;
  665. *szDest++ = '=';
  666. *szDest++ = '?';
  667. nWritten = 2;
  668. //output the charset
  669. while (*pszCharSet != '\0')
  670. {
  671. *szDest++ = *pszCharSet++;
  672. nWritten++;
  673. }
  674. *szDest++ = '?';
  675. *szDest++ = 'Q';
  676. *szDest++ = '?';
  677. nWritten += 3;
  678. while (nRead < nSrcLen)
  679. {
  680. ch = *pbSrcData++;
  681. nRead++;
  682. if (((ch > 32 && ch < 61) || (ch > 61 && ch < 127)) && ch != '?' && ch != '_')
  683. {
  684. *szDest++ = ch;
  685. nWritten++;
  686. continue;
  687. }
  688. //otherwise it is an unprintable/unsafe character
  689. *szDest++ = '=';
  690. *szDest++ = s_chHexChars[(ch >> 4) & 0x0F];
  691. *szDest++ = s_chHexChars[ch & 0x0F];
  692. if (ch < 32 || ch > 126)
  693. nEncCnt++;
  694. nWritten += 3;
  695. }
  696. *szDest++ = '?';
  697. *szDest++ = '=';
  698. *szDest = 0;
  699. nWritten += 2;
  700. *pnDestLen = nWritten;
  701. if (pnNumEncoded)
  702. *pnNumEncoded = nEncCnt;
  703. return TRUE;
  704. }
  705. //Get the required length to hold this encoding based on nSrcLen
  706. inline int BEncodeGetRequiredLength(int nSrcLen, int nCharsetLen) throw()
  707. {
  708. return Base64EncodeGetRequiredLength(nSrcLen)+7+nCharsetLen;
  709. }
  710. //BEncode pbSrcData with the charset specified by pszCharSet
  711. inline BOOL BEncode(BYTE* pbSrcData, int nSrcLen, LPSTR szDest, int* pnDestLen, LPCSTR pszCharSet) throw()
  712. {
  713. if (!pbSrcData || !szDest || !pszCharSet || !pnDestLen)
  714. {
  715. return FALSE;
  716. }
  717. ATLASSERT(*pnDestLen >= BEncodeGetRequiredLength(nSrcLen, ATL_MAX_ENC_CHARSET_LENGTH));
  718. int nWritten = 0;
  719. *szDest++ = '=';
  720. *szDest++ = '?';
  721. nWritten = 2;
  722. //output the charset
  723. while (*pszCharSet != '\0')
  724. {
  725. *szDest++ = *pszCharSet++;
  726. nWritten++;
  727. }
  728. *szDest++ = '?';
  729. *szDest++ = 'B';
  730. *szDest++ = '?';
  731. nWritten += 3;
  732. BOOL bRet = Base64Encode(pbSrcData, nSrcLen, szDest, pnDestLen, ATL_BASE64_FLAG_NOCRLF);
  733. if (!bRet)
  734. return FALSE;
  735. szDest += *pnDestLen;
  736. *szDest++ = '?';
  737. *szDest++ = '=';
  738. *szDest = 0;
  739. nWritten += 2;
  740. *pnDestLen += nWritten;
  741. return TRUE;
  742. }
  743. //=======================================================================
  744. // AtlUnicodeToUTF8
  745. //
  746. // Support for converting UNICODE strings to UTF8
  747. // (WideCharToMultiByte does not support UTF8 in Win98)
  748. //
  749. // This function is from the SDK implementation of
  750. // WideCharToMultiByte with the CP_UTF8 codepage
  751. //
  752. //=======================================================================
  753. //
  754. #define ATL_ASCII 0x007f
  755. #define ATL_UTF8_2_MAX 0x07ff // max UTF8 2-byte sequence (32 * 64 = 2048)
  756. #define ATL_UTF8_1ST_OF_2 0xc0 // 110x xxxx
  757. #define ATL_UTF8_1ST_OF_3 0xe0 // 1110 xxxx
  758. #define ATL_UTF8_1ST_OF_4 0xf0 // 1111 xxxx
  759. #define ATL_UTF8_TRAIL 0x80 // 10xx xxxx
  760. #define ATL_HIGHER_6_BIT(u) ((u) >> 12)
  761. #define ATL_MIDDLE_6_BIT(u) (((u) & 0x0fc0) >> 6)
  762. #define ATL_LOWER_6_BIT(u) ((u) & 0x003f)
  763. #define ATL_HIGH_SURROGATE_START 0xd800
  764. #define ATL_HIGH_SURROGATE_END 0xdbff
  765. #define ATL_LOW_SURROGATE_START 0xdc00
  766. #define ATL_LOW_SURROGATE_END 0xdfff
  767. ATL_NOINLINE inline
  768. int AtlUnicodeToUTF8(
  769. LPCWSTR wszSrc,
  770. int nSrc,
  771. LPSTR szDest,
  772. int nDest)
  773. {
  774. LPCWSTR pwszSrc = wszSrc;
  775. int nU8 = 0; // # of UTF8 chars generated
  776. DWORD dwSurrogateChar;
  777. WCHAR wchHighSurrogate = 0;
  778. BOOL bHandled;
  779. while ((nSrc--) && ((nDest == 0) || (nU8 < nDest)))
  780. {
  781. bHandled = FALSE;
  782. // Check if high surrogate is available
  783. if ((*pwszSrc >= ATL_HIGH_SURROGATE_START) && (*pwszSrc <= ATL_HIGH_SURROGATE_END))
  784. {
  785. if (nDest)
  786. {
  787. // Another high surrogate, then treat the 1st as normal Unicode character.
  788. if (wchHighSurrogate)
  789. {
  790. if ((nU8 + 2) < nDest)
  791. {
  792. szDest[nU8++] = (char)(ATL_UTF8_1ST_OF_3 | ATL_HIGHER_6_BIT(wchHighSurrogate));
  793. szDest[nU8++] = (char)(ATL_UTF8_TRAIL | ATL_MIDDLE_6_BIT(wchHighSurrogate));
  794. szDest[nU8++] = (char)(ATL_UTF8_TRAIL | ATL_LOWER_6_BIT(wchHighSurrogate));
  795. }
  796. else
  797. {
  798. // not enough buffer
  799. nSrc++;
  800. break;
  801. }
  802. }
  803. }
  804. else
  805. {
  806. nU8 += 3;
  807. }
  808. wchHighSurrogate = *pwszSrc;
  809. bHandled = TRUE;
  810. }
  811. if (!bHandled && wchHighSurrogate)
  812. {
  813. if ((*pwszSrc >= ATL_LOW_SURROGATE_START) && (*pwszSrc <= ATL_LOW_SURROGATE_END))
  814. {
  815. // valid surrogate pairs
  816. if (nDest)
  817. {
  818. if ((nU8 + 3) < nDest)
  819. {
  820. dwSurrogateChar = (((wchHighSurrogate-0xD800) << 10) + (*pwszSrc - 0xDC00) + 0x10000);
  821. szDest[nU8++] = (ATL_UTF8_1ST_OF_4 |
  822. (unsigned char)(dwSurrogateChar >> 18)); // 3 bits from 1st byte
  823. szDest[nU8++] = (ATL_UTF8_TRAIL |
  824. (unsigned char)((dwSurrogateChar >> 12) & 0x3f)); // 6 bits from 2nd byte
  825. szDest[nU8++] = (ATL_UTF8_TRAIL |
  826. (unsigned char)((dwSurrogateChar >> 6) & 0x3f)); // 6 bits from 3rd byte
  827. szDest[nU8++] = (ATL_UTF8_TRAIL |
  828. (unsigned char)(0x3f & dwSurrogateChar)); // 6 bits from 4th byte
  829. }
  830. else
  831. {
  832. // not enough buffer
  833. nSrc++;
  834. break;
  835. }
  836. }
  837. else
  838. {
  839. // we already counted 3 previously (in high surrogate)
  840. nU8 += 1;
  841. }
  842. bHandled = TRUE;
  843. }
  844. else
  845. {
  846. // Bad Surrogate pair : ERROR
  847. // Just process wchHighSurrogate , and the code below will
  848. // process the current code point
  849. if (nDest)
  850. {
  851. if ((nU8 + 2) < nDest)
  852. {
  853. szDest[nU8++] = (char)(ATL_UTF8_1ST_OF_3 | ATL_HIGHER_6_BIT(wchHighSurrogate));
  854. szDest[nU8++] = (char)(ATL_UTF8_TRAIL | ATL_MIDDLE_6_BIT(wchHighSurrogate));
  855. szDest[nU8++] = (char)(ATL_UTF8_TRAIL | ATL_LOWER_6_BIT(wchHighSurrogate));
  856. }
  857. else
  858. {
  859. // not enough buffer
  860. nSrc++;
  861. break;
  862. }
  863. }
  864. }
  865. wchHighSurrogate = 0;
  866. }
  867. if (!bHandled)
  868. {
  869. if (*pwszSrc <= ATL_ASCII)
  870. {
  871. // Found ASCII.
  872. if (nDest)
  873. {
  874. szDest[nU8] = (char)*pwszSrc;
  875. }
  876. nU8++;
  877. }
  878. else if (*pwszSrc <= ATL_UTF8_2_MAX)
  879. {
  880. // Found 2 byte sequence if < 0x07ff (11 bits).
  881. if (nDest)
  882. {
  883. if ((nU8 + 1) < nDest)
  884. {
  885. // Use upper 5 bits in first byte.
  886. // Use lower 6 bits in second byte.
  887. szDest[nU8++] = (char)(ATL_UTF8_1ST_OF_2 | (*pwszSrc >> 6));
  888. szDest[nU8++] = (char)(ATL_UTF8_TRAIL | ATL_LOWER_6_BIT(*pwszSrc));
  889. }
  890. else
  891. {
  892. // Error - buffer too small.
  893. nSrc++;
  894. break;
  895. }
  896. }
  897. else
  898. {
  899. nU8 += 2;
  900. }
  901. }
  902. else
  903. {
  904. // Found 3 byte sequence.
  905. if (nDest)
  906. {
  907. if ((nU8 + 2) < nDest)
  908. {
  909. // Use upper 4 bits in first byte.
  910. // Use middle 6 bits in second byte.
  911. // Use lower 6 bits in third byte.
  912. szDest[nU8++] = (char)(ATL_UTF8_1ST_OF_3 | ATL_HIGHER_6_BIT(*pwszSrc));
  913. szDest[nU8++] = (char)(ATL_UTF8_TRAIL | ATL_MIDDLE_6_BIT(*pwszSrc));
  914. szDest[nU8++] = (char)(ATL_UTF8_TRAIL | ATL_LOWER_6_BIT(*pwszSrc));
  915. }
  916. else
  917. {
  918. // Error - buffer too small.
  919. nSrc++;
  920. break;
  921. }
  922. }
  923. else
  924. {
  925. nU8 += 3;
  926. }
  927. }
  928. }
  929. pwszSrc++;
  930. }
  931. // If the last character was a high surrogate, then handle it as a normal unicode character.
  932. if ((nSrc < 0) && (wchHighSurrogate != 0))
  933. {
  934. if (nDest)
  935. {
  936. if ((nU8 + 2) < nDest)
  937. {
  938. szDest[nU8++] = (char)(ATL_UTF8_1ST_OF_3 | ATL_HIGHER_6_BIT(wchHighSurrogate));
  939. szDest[nU8++] = (char)(ATL_UTF8_TRAIL | ATL_MIDDLE_6_BIT(wchHighSurrogate));
  940. szDest[nU8++] = (char)(ATL_UTF8_TRAIL | ATL_LOWER_6_BIT(wchHighSurrogate));
  941. }
  942. else
  943. {
  944. nSrc++;
  945. }
  946. }
  947. }
  948. // Make sure the destination buffer was large enough.
  949. if (nDest && (nSrc >= 0))
  950. {
  951. return 0;
  952. }
  953. // Return the number of UTF-8 characters written.
  954. return nU8;
  955. }
  956. //=======================================================================
  957. // EscapeHTML, EscapeXML
  958. //
  959. // Support for escaping strings for use in HTML and XML documents
  960. //=======================================================================
  961. //
  962. #define ATL_ESC_FLAG_NONE 0
  963. #define ATL_ESC_FLAG_ATTR 1 // escape for attribute values
  964. #define ATL_ESC_FLAG_HTML 2 // escape for HTML -- special case of XML escaping
  965. inline int EscapeXML(const char *szIn, int nSrcLen, char *szEsc, int nDestLen, DWORD dwFlags = ATL_ESC_FLAG_NONE) throw()
  966. {
  967. ATLASSERT( szIn != NULL );
  968. int nCnt(0);
  969. int nCurrLen(nDestLen);
  970. int nInc(0);
  971. while (nSrcLen--)
  972. {
  973. switch (*szIn)
  974. {
  975. case '<': case '>':
  976. if ((szEsc != NULL) && (3 < nCurrLen))
  977. {
  978. *szEsc++ = '&';
  979. *szEsc++ = (*szIn=='<' ? 'l' : 'g');
  980. *szEsc++ = 't';
  981. *szEsc++ = ';';
  982. }
  983. nInc = 4;
  984. break;
  985. case '&':
  986. if ((szEsc != NULL) && (4 < nCurrLen))
  987. {
  988. memcpy(szEsc, "&amp;", 5);
  989. szEsc+= 5;
  990. }
  991. nInc = 5;
  992. break;
  993. case '\'': case '\"': // escaping for attribute values
  994. if ((dwFlags & ATL_ESC_FLAG_ATTR) && (*szIn == '\"' || (dwFlags & ATL_ESC_FLAG_HTML)==0))
  995. {
  996. if ((szEsc != NULL) && (5 < nCurrLen))
  997. {
  998. memcpy(szEsc, (*szIn == '\'' ? "&apos;" : "&quot;"), 6);
  999. szEsc+= 6;
  1000. }
  1001. nInc = 6;
  1002. break;
  1003. }
  1004. // fall through
  1005. default:
  1006. if (((unsigned char)*szIn) > 31 || *szIn == '\r' || *szIn == '\n' || *szIn == '\t')
  1007. {
  1008. if (szEsc && 0 < nCurrLen)
  1009. {
  1010. *szEsc++ = *szIn;
  1011. }
  1012. nInc = 1;
  1013. }
  1014. else
  1015. {
  1016. if ((szEsc != NULL) && (5 < nCurrLen))
  1017. {
  1018. char szHex[7];
  1019. sprintf(szHex, "&#x%2X;", (unsigned char)*szIn);
  1020. memcpy(szEsc, szHex, 6);
  1021. szEsc+= 6;
  1022. }
  1023. nInc = 6;
  1024. }
  1025. }
  1026. nCurrLen -= nInc;
  1027. nCnt+= nInc;
  1028. szIn++;
  1029. }
  1030. if ((szEsc != NULL) && (nCurrLen < 0))
  1031. {
  1032. return 0;
  1033. }
  1034. return nCnt;
  1035. }
  1036. // wide-char version
  1037. inline int EscapeXML(const wchar_t *szIn, int nSrcLen, wchar_t *szEsc, int nDestLen, DWORD dwFlags = ATL_ESC_FLAG_NONE) throw()
  1038. {
  1039. ATLASSERT( szIn != NULL );
  1040. int nCnt(0);
  1041. int nCurrLen(nDestLen);
  1042. int nInc(0);
  1043. while (nSrcLen--)
  1044. {
  1045. switch (*szIn)
  1046. {
  1047. case L'<': case L'>':
  1048. if ((szEsc != NULL) && (3 < nCurrLen))
  1049. {
  1050. *szEsc++ = L'&';
  1051. *szEsc++ = (*szIn==L'<' ? L'l' : L'g');
  1052. *szEsc++ = L't';
  1053. *szEsc++ = L';';
  1054. }
  1055. nInc = 4;
  1056. break;
  1057. case L'&':
  1058. if ((szEsc != NULL) && (4 < nCurrLen))
  1059. {
  1060. memcpy(szEsc, L"&amp;", 5*sizeof(wchar_t));
  1061. szEsc+= 5;
  1062. }
  1063. nInc = 5;
  1064. break;
  1065. case L'\'': case L'\"': // escaping for attribute values
  1066. if ((dwFlags & ATL_ESC_FLAG_ATTR) && (*szIn == L'\"' || (dwFlags & ATL_ESC_FLAG_HTML)==0))
  1067. {
  1068. if ((szEsc != NULL) && (5 < nCurrLen))
  1069. {
  1070. memcpy(szEsc, (*szIn == L'\'' ? L"&apos;" : L"&quot;"), 6*sizeof(wchar_t));
  1071. szEsc+= 6;
  1072. }
  1073. nInc = 6;
  1074. break;
  1075. }
  1076. // fall through
  1077. default:
  1078. if ((*szIn < 0x0020) || (*szIn > 0x007E))
  1079. {
  1080. if ((szEsc != NULL) && (8 < nCurrLen))
  1081. {
  1082. wchar_t szHex[9];
  1083. wsprintfW(szHex, L"&#x%04X;", *szIn);
  1084. memcpy(szEsc, szHex, 8*sizeof(wchar_t));
  1085. szEsc+= 8;
  1086. }
  1087. nInc = 8;
  1088. }
  1089. else
  1090. {
  1091. if ((szEsc != NULL) && (0 < nCurrLen))
  1092. {
  1093. *szEsc++ = *szIn;
  1094. }
  1095. nInc = 1;
  1096. }
  1097. }
  1098. nCurrLen -= nInc;
  1099. nCnt+= nInc;
  1100. szIn++;
  1101. }
  1102. if ((szEsc != NULL) && (nCurrLen < 0))
  1103. {
  1104. return 0;
  1105. }
  1106. return nCnt;
  1107. }
  1108. inline int EscapeHTML(const char *szIn, int nSrcLen, char *szEsc, int nDestLen, DWORD dwFlags = ATL_ESC_FLAG_NONE) throw()
  1109. {
  1110. return EscapeXML(szIn, nSrcLen, szEsc, nDestLen, dwFlags | ATL_ESC_FLAG_HTML);
  1111. }
  1112. } // namespace ATL
  1113. #endif // __ATLENC_H__