Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

560 lines
21 KiB

  1. //-----------------------------------------------------------------------------
  2. //
  3. //
  4. // File: dsn_utf7.cpp
  5. //
  6. // Description:
  7. //
  8. // Author: Mike Swafford (MikeSwa)
  9. //
  10. // History:
  11. // 10/20/98 - MikeSwa Created
  12. //
  13. // Copyright (C) 1998 Microsoft Corporation
  14. //
  15. //-----------------------------------------------------------------------------
  16. #include "precomp.h"
  17. //---[ CUTF7ConversionContext::chNeedsEncoding ]--------------------------------
  18. //
  19. //
  20. // Description:
  21. // Determines if a character needs to be encoded... returns it's ASCII
  22. // equivalent if not.
  23. // Parameters:
  24. // wch Wide character to check
  25. // Returns:
  26. // 0, if the character needs encoding
  27. // The ASCII equivalent if not.
  28. // History:
  29. // 10/23/98 - MikeSwa Created
  30. //
  31. //-----------------------------------------------------------------------------
  32. CHAR CUTF7ConversionContext::chNeedsEncoding(WCHAR wch)
  33. {
  34. CHAR ch = 0;
  35. //First look for characters that are a straight ASCII conversion for all
  36. //cases. This is Set D and Set O in the RFC1642
  37. if (((L'a' <= wch) && (L'z' >= wch)) ||
  38. ((L'A' <= wch) && (L'Z' >= wch)) ||
  39. ((L'0' <= wch) && (L'9' >= wch)) ||
  40. ((L'!'<= wch) && (L'*' >= wch)) ||
  41. ((L',' <= wch) && (L'/' >= wch)) ||
  42. ((L';' <= wch) && (L'@' >= wch)) ||
  43. ((L']' <= wch) && (L'`' >= wch)) ||
  44. ((L'{' <= wch) && (L'}' >= wch)) ||
  45. (L' ' == wch) || (L'\t' == wch) ||
  46. (L'[' == wch))
  47. {
  48. ch = (CHAR) wch & 0x00FF;
  49. }
  50. //Check things are not converted for content, but are for headers
  51. else if (!(UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState))
  52. {
  53. //Handle whitespace
  54. if ((L'\r' == wch) || (L'\n' == wch))
  55. ch = (CHAR) wch & 0x00FF;
  56. }
  57. //NOTE - We not not want to handle UNICODE <LINE SEPARATOR> (0x2028)
  58. //and <PARAGRAPH SEPARATOR> (0x2029)... which should ideally be
  59. //converted to CRLF. We will consider this a mal-formed resource. ASSERT
  60. //in Debug and encode as UNICODE on retail.
  61. _ASSERT((0x2028 != wch) && "Malformed Resource String");
  62. _ASSERT((0x2029 != wch) && "Malformed Resource String");
  63. return ch;
  64. }
  65. //---[ UTF7ConversionContext::CUTF7ConversionContext ]-------------------------
  66. //
  67. //
  68. // Description:
  69. // Constuctor for UTF7ConversionContext object
  70. // Parameters:
  71. // IN fIsRFC1522Subject TRUE if we need to worry about converting
  72. // to an RFC1522 Subject (defaults to FALSE)
  73. // Returns:
  74. // -
  75. // History:
  76. // 10/20/98 - MikeSwa Created
  77. //
  78. //-----------------------------------------------------------------------------
  79. CUTF7ConversionContext::CUTF7ConversionContext(BOOL fIsRFC1522Subject)
  80. {
  81. m_dwSignature = UTF7_CONTEXT_SIG;
  82. m_dwCurrentState = UTF7_INITIAL_STATE;
  83. if (fIsRFC1522Subject)
  84. m_dwCurrentState |= UTF7_ENCODING_RFC1522_SUBJECT;
  85. m_cBytesSinceCRLF = 0;
  86. }
  87. //---[ <function> ]------------------------------------------------------------
  88. //
  89. //
  90. // Description:
  91. // Writes a single character to the output buffer... used by
  92. // fConvertBuffer. Also updates relevant member vars/
  93. // Parameters:
  94. // IN ch Character to write
  95. // IN OUT ppbBuffer Buffer to write it to
  96. // IN OUT pcbWritten Running total of bytes written
  97. // Returns:
  98. // -
  99. // History:
  100. // 10/26/98 - MikeSwa Created
  101. //
  102. //-----------------------------------------------------------------------------
  103. inline void CUTF7ConversionContext::WriteChar(IN CHAR ch,
  104. IN OUT BYTE ** ppbBuffer,
  105. IN OUT DWORD *pcbWritten)
  106. {
  107. _ASSERT(ppbBuffer);
  108. _ASSERT(*ppbBuffer);
  109. _ASSERT(pcbWritten);
  110. **ppbBuffer = (BYTE) ch;
  111. (*ppbBuffer)++;
  112. (*pcbWritten)++;
  113. m_cBytesSinceCRLF++;
  114. if (UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState)
  115. _ASSERT(UTF7_RFC1522_MAX_LENGTH >= m_cBytesSinceCRLF);
  116. }
  117. //---[ CUTF7ConversionContext::fWriteString ]----------------------------------
  118. //
  119. //
  120. // Description:
  121. // Used by fConvertBuffer to write a string to the outputt buffer.
  122. // Updates m_cBytesSinceCRLF in the process.
  123. // Parameters:
  124. // IN szString String to write
  125. // IN cbString Size of string
  126. // IN cbBuffer Total size of output buffer
  127. // IN OUT ppbBuffer Buffer to write it to
  128. // IN OUT pcbWritten Running total of bytes written
  129. // Returns:
  130. //
  131. // History:
  132. // 10/26/98 - MikeSwa Created
  133. //
  134. //-----------------------------------------------------------------------------
  135. inline BOOL CUTF7ConversionContext::fWriteString(IN LPSTR szString, IN DWORD cbString,
  136. IN DWORD cbBuffer,
  137. IN OUT BYTE ** ppbBuffer,
  138. IN OUT DWORD *pcbWritten)
  139. {
  140. _ASSERT(szString);
  141. _ASSERT(ppbBuffer);
  142. _ASSERT(*ppbBuffer);
  143. _ASSERT(pcbWritten);
  144. if (cbString > (cbBuffer - *pcbWritten))
  145. return FALSE; //There is not enough room to write our buffer
  146. memcpy(*ppbBuffer, szString, cbString);
  147. (*ppbBuffer) += cbString;
  148. (*pcbWritten) += cbString;
  149. m_cBytesSinceCRLF += cbString;
  150. if (UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState)
  151. _ASSERT(UTF7_RFC1522_MAX_LENGTH >= m_cBytesSinceCRLF);
  152. return TRUE;
  153. }
  154. //---[ CUTF7ConversionContext::fSubjectNeedsEncodin ]--------------------------
  155. //
  156. //
  157. // Description:
  158. // Determines if a subject needs to be UTF7 encoded... or can be
  159. // transmitted as is.
  160. // Parameters:
  161. // IN pbInputBuffer Pointer to UNICODE string buffer
  162. // IN cbInputBuffer Size (in bytes) of string buffer
  163. // Returns:
  164. // TRUE if we need to encode the buffer
  165. // FALSE if we do not
  166. // History:
  167. // 10/26/98 - MikeSwa Created
  168. //
  169. //-----------------------------------------------------------------------------
  170. BOOL CUTF7ConversionContext::fSubjectNeedsEncoding(IN BYTE *pbBuffer,
  171. IN DWORD cbBuffer)
  172. {
  173. LPWSTR wszBuffer = (LPWSTR) pbBuffer;
  174. LPWSTR wszBufferEnd = (LPWSTR) (pbBuffer + cbBuffer);
  175. WCHAR wch = L'\0';
  176. while (wszBuffer < wszBufferEnd)
  177. {
  178. wch = *wszBuffer;
  179. if ((127 < wch) || (L'\r' == wch) || (L'\n' == wch))
  180. {
  181. //Encountered a non-valid char... must encode
  182. return TRUE;
  183. }
  184. wszBuffer++;
  185. }
  186. return FALSE;
  187. }
  188. //---[ UTF7ConversionContext::fConvertBufferTo7BitASCII ]----------------------
  189. //
  190. //
  191. // Description:
  192. // Converts a buffer that is UNICODE contianing only 7bit ASCII characters
  193. // to an ASCII buffer.
  194. // Parameters:
  195. // IN pbInputBuffer Pointer to UNICODE string buffer
  196. // IN cbInputBuffer Size (in bytes) of string buffer
  197. // IN pbOutputBuffer Buffer to write data to
  198. // IN cbOutputBuffer Size of buffer to write data to
  199. // OUT pcbWritten # of bytes written to output bufferbuffer
  200. // OUT pcbRead # of bytes read from Input buffer
  201. // Returns:
  202. // TRUE if entire input buffer was processed
  203. // FALSE if buffer needs to be processe some more
  204. // History:
  205. // 10/26/98 - MikeSwa Created
  206. //
  207. //-----------------------------------------------------------------------------
  208. BOOL CUTF7ConversionContext::fConvertBufferTo7BitASCII(
  209. IN PBYTE pbInputBuffer,
  210. IN DWORD cbInputBuffer,
  211. IN PBYTE pbOutputBuffer,
  212. IN DWORD cbOutputBuffer,
  213. OUT DWORD *pcbWritten,
  214. OUT DWORD *pcbRead)
  215. {
  216. LPWSTR wszBuffer = (LPWSTR) pbInputBuffer;
  217. LPWSTR wszBufferEnd = (LPWSTR) (pbInputBuffer + cbInputBuffer);
  218. WCHAR wch = L'\0';
  219. BYTE *pbCurrentOut = pbOutputBuffer;
  220. _ASSERT(pbCurrentOut);
  221. while ((*pcbWritten < cbOutputBuffer) && (wszBuffer < wszBufferEnd))
  222. {
  223. _ASSERT(!(0xFF80 & *wszBuffer)); //must be only 7-bit
  224. WriteChar((CHAR) *wszBuffer, &pbCurrentOut, pcbWritten);
  225. wszBuffer++;
  226. *pcbRead += sizeof(WCHAR);
  227. }
  228. return (wszBuffer == wszBufferEnd);
  229. }
  230. //---[ CUTF7ConversionContext::fUTF7EncodeBuffer ]------------------------------
  231. //
  232. //
  233. // Description:
  234. // Converts buffer to UTF7 Encoding
  235. //
  236. // This function implements the main state machine for UTF7 encoding. It
  237. // handles encoding of both RFC1522 subject encoding as well as regular
  238. // UTF7 content-encoding.
  239. // Parameters:
  240. // IN pbInputBuffer Pointer to UNICODE string buffer
  241. // IN cbInputBuffer Size (in bytes) of string buffer
  242. // IN pbOutputBuffer Buffer to write data to
  243. // IN cbOutputBuffer Size of buffer to write data to
  244. // OUT pcbWritten # of bytes written to output bufferbuffer
  245. // OUT pcbRead # of bytes read from Input buffer
  246. // Returns:
  247. // TRUE if entire input buffer was processed
  248. // FALSE if buffer needs to be processe some more
  249. // History:
  250. // 10/26/98 - MikeSwa Created
  251. //
  252. //-----------------------------------------------------------------------------
  253. BOOL CUTF7ConversionContext::fUTF7EncodeBuffer(
  254. IN PBYTE pbInputBuffer,
  255. IN DWORD cbInputBuffer,
  256. IN PBYTE pbOutputBuffer,
  257. IN DWORD cbOutputBuffer,
  258. OUT DWORD *pcbWritten,
  259. OUT DWORD *pcbRead)
  260. {
  261. LPWSTR wszBuffer = (LPWSTR) pbInputBuffer;
  262. WCHAR wch = L'\0';
  263. CHAR ch = '\0';
  264. BYTE *pbCurrentOut = pbOutputBuffer;
  265. BOOL fDone = FALSE;
  266. //Use loop to make sure we never exceed our buffers
  267. while (*pcbWritten < cbOutputBuffer)
  268. {
  269. //See if we need to handle any state that does not require reading
  270. //from the input buffer.
  271. if (UTF7_FLUSH_BUFFERS & m_dwCurrentState)
  272. {
  273. //We have converted characters buffered up... we need to write them
  274. //to the output buffer
  275. if (!m_Base64Stream.fNextValidChar(&ch))
  276. {
  277. //Nothing left to write
  278. m_dwCurrentState ^= UTF7_FLUSH_BUFFERS;
  279. continue;
  280. }
  281. WriteChar(ch, &pbCurrentOut, pcbWritten);
  282. }
  283. else if (UTF7_RFC1522_CHARSET_PENDING & m_dwCurrentState)
  284. {
  285. //We need to start with the =?charset?Q?+ stuff
  286. if (!fWriteString(UTF7_RFC1522_ENCODE_START,
  287. sizeof(UTF7_RFC1522_ENCODE_START)-sizeof(CHAR),
  288. cbOutputBuffer, &pbCurrentOut, pcbWritten))
  289. {
  290. return FALSE;
  291. }
  292. m_dwCurrentState ^= UTF7_RFC1522_CHARSET_PENDING;
  293. m_dwCurrentState |= (UTF7_ENCODING_WORD | UTF7_RFC1522_CURRENTLY_ENCODING);
  294. }
  295. else if (UTF7_WORD_CLOSING_PENDING & m_dwCurrentState)
  296. {
  297. //Need to write closing '-'
  298. m_dwCurrentState ^= UTF7_WORD_CLOSING_PENDING;
  299. WriteChar(UTF7_STOP_STREAM_CHAR, &pbCurrentOut, pcbWritten);
  300. }
  301. else if (UTF7_RFC1522_CLOSING_PENDING & m_dwCurrentState)
  302. {
  303. if (!fWriteString(UTF7_RFC1522_ENCODE_STOP,
  304. sizeof(UTF7_RFC1522_ENCODE_STOP)-sizeof(CHAR),
  305. cbOutputBuffer, &pbCurrentOut, pcbWritten))
  306. {
  307. return FALSE;
  308. }
  309. m_dwCurrentState ^= (UTF7_RFC1522_CLOSING_PENDING | UTF7_FOLD_HEADER_PENDING);
  310. }
  311. else if (UTF7_FOLD_HEADER_PENDING & m_dwCurrentState)
  312. {
  313. if (*pcbRead >= cbInputBuffer) //there is no more text to read.. we don't need to wrap
  314. {
  315. fDone = TRUE;
  316. m_dwCurrentState ^= UTF7_FOLD_HEADER_PENDING;
  317. break;
  318. }
  319. m_cBytesSinceCRLF = 0; //We're writing a CRLF now
  320. if (!fWriteString(UTF7_RFC1522_PHRASE_SEPARATOR,
  321. sizeof(UTF7_RFC1522_PHRASE_SEPARATOR)-sizeof(CHAR),
  322. cbOutputBuffer, &pbCurrentOut, pcbWritten))
  323. {
  324. return FALSE;
  325. }
  326. m_cBytesSinceCRLF = sizeof(CHAR);//set count to leading tab
  327. m_dwCurrentState ^= UTF7_FOLD_HEADER_PENDING;
  328. }
  329. else if (*pcbRead >= cbInputBuffer)
  330. {
  331. //We have read our entire input buffer... now we need to handle
  332. //any sort of cleanup.
  333. if (m_Base64Stream.fTerminateStream(TRUE))
  334. {
  335. _ASSERT(UTF7_ENCODING_WORD & m_dwCurrentState);
  336. m_dwCurrentState |= UTF7_FLUSH_BUFFERS;
  337. }
  338. else if (UTF7_ENCODING_WORD & m_dwCurrentState)
  339. {
  340. //We have already written everything to output.. but we
  341. //still need to write the close of the stream
  342. _ASSERT(!(UTF7_WORD_CLOSING_PENDING & m_dwCurrentState));
  343. m_dwCurrentState ^= (UTF7_ENCODING_WORD | UTF7_WORD_CLOSING_PENDING);
  344. }
  345. else if (UTF7_RFC1522_CURRENTLY_ENCODING & m_dwCurrentState)
  346. {
  347. //Need to write closing ?=
  348. m_dwCurrentState |= UTF7_RFC1522_CLOSING_PENDING;
  349. }
  350. else
  351. {
  352. fDone = TRUE;
  353. break; //We're done
  354. }
  355. }
  356. else //need to process more of the input buffer
  357. {
  358. wch = *wszBuffer;
  359. ch = chNeedsEncoding(wch);
  360. //Are we at the end of a RFC1522 phrase? (ch will be 0)
  361. if ((UTF7_RFC1522_CURRENTLY_ENCODING & m_dwCurrentState) &&
  362. !ch && iswspace(wch))
  363. {
  364. //reset state
  365. if (UTF7_ENCODING_WORD & m_dwCurrentState)
  366. m_dwCurrentState |= UTF7_WORD_CLOSING_PENDING; //need to write -
  367. m_dwCurrentState |= UTF7_RFC1522_CLOSING_PENDING;
  368. m_dwCurrentState &= ~(UTF7_ENCODING_WORD |
  369. UTF7_RFC1522_CURRENTLY_ENCODING);
  370. //eat up any extra whitespace
  371. do
  372. {
  373. wszBuffer++;
  374. *pcbRead += sizeof(WCHAR);
  375. if (*pcbRead >= cbInputBuffer)
  376. break;
  377. wch = *wszBuffer;
  378. } while (iswspace(wch));
  379. }
  380. else if (UTF7_ENCODING_WORD & m_dwCurrentState)
  381. {
  382. if (ch) //we need to stop encoding
  383. {
  384. m_Base64Stream.fTerminateStream(TRUE);
  385. _ASSERT(!(UTF7_WORD_CLOSING_PENDING & m_dwCurrentState));
  386. m_dwCurrentState ^= (UTF7_ENCODING_WORD | UTF7_WORD_CLOSING_PENDING | UTF7_FLUSH_BUFFERS);
  387. }
  388. else if (!m_Base64Stream.fProcessWideChar(wch))
  389. {
  390. //flush our buffers and then continue on as we were
  391. m_dwCurrentState |= UTF7_FLUSH_BUFFERS;
  392. }
  393. else
  394. {
  395. //The write worked...
  396. wszBuffer++;
  397. *pcbRead += sizeof(WCHAR);
  398. }
  399. }
  400. else if (!ch)
  401. {
  402. //we need to start encoding
  403. if ((UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState) &&
  404. !(UTF7_RFC1522_CURRENTLY_ENCODING & m_dwCurrentState))
  405. {
  406. //We need to start with the =?charset?Q?+ stuff
  407. m_dwCurrentState |= UTF7_RFC1522_CHARSET_PENDING;
  408. }
  409. else
  410. {
  411. //We are either not encoding RFC1522... or are already
  412. //in the middle of a RFC1522 encoded phrase.. in this case
  413. //we only need to write the '+'
  414. WriteChar(UTF7_START_STREAM_CHAR, &pbCurrentOut, pcbWritten);
  415. m_dwCurrentState |= UTF7_ENCODING_WORD;
  416. }
  417. }
  418. else
  419. {
  420. //
  421. // NOTE: It is not clear why we do not close out the UTF7 word
  422. // i.e. why we do not go into the UTF7 word closing pending state
  423. // like we do when we encounter an iswspace char. This means that
  424. // when a string is <jpn-char><0x0020><jpn-char> we will encode it
  425. // as =?charset?Q?+stuff, while if it is <jpn-char><0x3000><jpn-char>
  426. // where 0x3000 == Japanese whitespace, we will encode as:
  427. // =?charset?Q?+stuff<CRLF>=?charset?Q?+stuff.
  428. //
  429. // If this is "fixed" in the future (i.e. we start closing out UTF7
  430. // encodings when we encounter 0x0020) be aware that we rely on the
  431. // current (non-closing) functionality for HrWriteModifiedUnicodeString.
  432. //
  433. //we are not encoding... and character can be written normally
  434. WriteChar(ch, &pbCurrentOut, pcbWritten);
  435. wszBuffer++;
  436. *pcbRead += sizeof(WCHAR);
  437. //if it was a space... and we are doing headers... lets fold
  438. //the header
  439. if ((UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState)
  440. && isspace((UCHAR)ch))
  441. {
  442. //eat up any extra whitespace
  443. while (iswspace(*wszBuffer))
  444. {
  445. wszBuffer++;
  446. *pcbRead += sizeof(WCHAR);
  447. if (*pcbRead >= cbInputBuffer)
  448. break;
  449. }
  450. m_dwCurrentState |= UTF7_FOLD_HEADER_PENDING;
  451. }
  452. }
  453. }
  454. }
  455. return fDone;
  456. }
  457. //---[ CUTF7ConversionContext::fConvertBuffer ]--------------------------------
  458. //
  459. //
  460. // Description:
  461. // Converts UNICODE string to UTF7
  462. // Parameters:
  463. // IN fASCII TRUE if buffer is ASCII
  464. // IN pbInputBuffer Pointer to UNICODE string buffer
  465. // IN cbInputBuffer Size (in bytes) of string buffer
  466. // IN pbOutputBuffer Buffer to write data to
  467. // IN cbOutputBuffer Size of buffer to write data to
  468. // OUT pcbWritten # of bytes written to output bufferbuffer
  469. // OUT pcbRead # of bytes read from Input buffer
  470. // Returns:
  471. // TRUE if entire input buffer was processed
  472. // FALSE if buffer needs to be processe some more
  473. // History:
  474. // 10/21/98 - MikeSwa Created
  475. //
  476. //-----------------------------------------------------------------------------
  477. BOOL CUTF7ConversionContext::fConvertBuffer(
  478. IN BOOL fASCII,
  479. IN PBYTE pbInputBuffer,
  480. IN DWORD cbInputBuffer,
  481. IN PBYTE pbOutputBuffer,
  482. IN DWORD cbOutputBuffer,
  483. OUT DWORD *pcbWritten,
  484. OUT DWORD *pcbRead)
  485. {
  486. _ASSERT(pcbWritten);
  487. _ASSERT(pcbRead);
  488. _ASSERT(pbInputBuffer);
  489. _ASSERT(pbOutputBuffer);
  490. //Let the default implementation handle straight ASCII
  491. if (fASCII)
  492. {
  493. return CDefaultResourceConversionContext::fConvertBuffer(fASCII,
  494. pbInputBuffer, cbInputBuffer, pbOutputBuffer, cbOutputBuffer,
  495. pcbWritten, pcbRead);
  496. }
  497. //Now we know it is UNICODE... cbInputBuffer should be a multiple of sizeof(WCHAR)
  498. _ASSERT(0 == (cbInputBuffer % sizeof(WCHAR)));
  499. //If we are encoding the subject, and we haven't classified it yet,
  500. //we need to check to see if it needs encoding
  501. if (UTF7_ENCODING_RFC1522_SUBJECT & m_dwCurrentState &&
  502. !((UTF7_SOME_INVALID_RFC822_CHARS | UFT7_ALL_VALID_RFC822_CHARS) &
  503. m_dwCurrentState))
  504. {
  505. if (fSubjectNeedsEncoding(pbInputBuffer, cbInputBuffer))
  506. m_dwCurrentState |= UTF7_SOME_INVALID_RFC822_CHARS;
  507. else
  508. m_dwCurrentState |= UFT7_ALL_VALID_RFC822_CHARS;
  509. }
  510. *pcbWritten = 0;
  511. *pcbRead = 0;
  512. if (UFT7_ALL_VALID_RFC822_CHARS & m_dwCurrentState)
  513. {
  514. return fConvertBufferTo7BitASCII(pbInputBuffer, cbInputBuffer, pbOutputBuffer,
  515. cbOutputBuffer, pcbWritten, pcbRead);
  516. }
  517. else //we must convert
  518. {
  519. return fUTF7EncodeBuffer(pbInputBuffer, cbInputBuffer, pbOutputBuffer,
  520. cbOutputBuffer, pcbWritten, pcbRead);
  521. }
  522. }