Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

412 lines
13 KiB

  1. // ============================================================================
  2. // Internet Character Set Conversion: Input from UTF-7
  3. // ============================================================================
  4. #include "private.h"
  5. #include "fechrcnv.h"
  6. #include "utf7obj.h"
  7. //+-----------------------------------------------------------------------
  8. //
  9. // Function: IsBase64
  10. //
  11. // Synopsis: We use the following table to quickly determine if we have
  12. // a valid base64 character.
  13. //
  14. //------------------------------------------------------------------------
  15. static UCHAR g_aBase64[256] =
  16. {
  17. /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, */
  18. /* 00-0f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  19. /* 10-1f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  20. /* 20-2f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63,
  21. /* 30-3f */ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 255, 255, 255,
  22. /* 40-4f */ 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  23. /* 50-5f */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255,
  24. /* 60-6f */ 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  25. /* 70-7f */ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255,
  26. /* 80-8f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  27. /* 90-9f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  28. /* a0-af */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  29. /* b0-bf */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  30. /* c0-cf */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  31. /* d0-df */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  32. /* e0-ef */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  33. /* f0-ff */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
  34. };
  35. // Direct encoded ASCII table
  36. static UCHAR g_aDirectChar[128] =
  37. {
  38. /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, */
  39. /* 00-0f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 72, 73, 255, 255, 74, 255, 255,
  40. /* 10-1f */ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
  41. /* 20-2f */ 71, 255, 255, 255, 255, 255, 255, 62, 63, 64, 255, 255, 65, 66, 67, 68,
  42. /* 30-3f */ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 69, 255, 255, 255, 255, 70,
  43. /* 40-4f */ 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
  44. /* 50-5f */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 255,
  45. /* 60-6f */ 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
  46. /* 70-7f */ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255,
  47. };
  48. // Base64 byte value table
  49. static UCHAR g_aInvBase64[] = { "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" };
  50. static inline BOOL
  51. IsBase64(UCHAR t )
  52. {
  53. return g_aBase64[t] < 64;
  54. }
  55. /******************************************************************************
  56. ************************** C O N S T R U C T O R **************************
  57. ******************************************************************************/
  58. CInccUTF7In::CInccUTF7In(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
  59. {
  60. Reset(); // initialization
  61. return ;
  62. }
  63. /******************************************************************************
  64. ******************************* R E S E T *********************************
  65. ******************************************************************************/
  66. void CInccUTF7In::Reset()
  67. {
  68. m_pfnConv = ConvMain;
  69. m_pfnCleanUp = CleanUpMain;
  70. m_fUTF7Mode = FALSE ;
  71. m_nBitCount = 0 ;
  72. m_tcUnicode = 0 ;
  73. m_nOutCount = 0 ;
  74. return ;
  75. }
  76. /******************************************************************************
  77. ************************* C O N V E R T C H A R *************************
  78. ******************************************************************************/
  79. HRESULT CInccUTF7In::ConvertChar(UCHAR tc, int cchSrc)
  80. {
  81. BOOL fDone = (this->*m_pfnConv)(tc);
  82. if (fDone)
  83. return S_OK;
  84. else
  85. return E_FAIL;
  86. }
  87. /******************************************************************************
  88. ***************************** C L E A N U P *****************************
  89. ******************************************************************************/
  90. BOOL CInccUTF7In::CleanUp()
  91. {
  92. return (this->*m_pfnCleanUp)();
  93. }
  94. /******************************************************************************
  95. **************************** C O N V M A I N ****************************
  96. ******************************************************************************/
  97. BOOL CInccUTF7In::ConvMain(UCHAR tc)
  98. {
  99. BOOL fDone = TRUE;
  100. // are we in UTF-7 mode ?
  101. if (m_fUTF7Mode )
  102. {
  103. if ( IsBase64(tc) )
  104. {
  105. UCHAR t64, outc ;
  106. LONG tcUnicode ;
  107. // save the Base64 value and update bit count
  108. t64 = g_aBase64[tc] ;
  109. m_tcUnicode = m_tcUnicode << 6 | t64 ;
  110. m_nBitCount += 6 ;
  111. // see if we accumulate enough bits
  112. if ( m_nBitCount >= 16 )
  113. {
  114. // get higher 16 bits data from buffer
  115. tcUnicode = m_tcUnicode >> ( m_nBitCount - 16 ) ;
  116. // output one Unicode char
  117. outc = (UCHAR) tcUnicode ;
  118. Output( outc );
  119. outc = (UCHAR) ( tcUnicode >> 8 ) ;
  120. fDone = Output( outc );
  121. // update output char count
  122. m_nOutCount ++ ;
  123. m_nBitCount -= 16 ;
  124. }
  125. }
  126. // not a Base64 char, reset UTF-7 mode
  127. else
  128. {
  129. // special case +- decodes to +
  130. if ( tc == '-' && m_nOutCount == 0 && m_nBitCount == 0 )
  131. {
  132. Output('+');
  133. fDone=Output(0);
  134. }
  135. // absorb shiht-out char '-', otherwise output char
  136. else if ( tc != '-')
  137. {
  138. Output(tc);
  139. fDone=Output(0);
  140. }
  141. // reset variables and UTF7Mode
  142. m_fUTF7Mode = FALSE ;
  143. m_nBitCount = 0 ;
  144. m_tcUnicode = 0 ;
  145. m_nOutCount = 0 ;
  146. }
  147. }
  148. // is it a UTF-7 shift-in char ?
  149. else if ( tc == '+' )
  150. {
  151. m_fUTF7Mode = TRUE ;
  152. m_nBitCount = 0 ;
  153. m_tcUnicode = 0 ;
  154. m_nOutCount = 0 ;
  155. }
  156. else
  157. // convert ASCII directly to Unicode if it is not in UFT-7 mode
  158. {
  159. Output(tc);
  160. fDone = Output(0);
  161. }
  162. return fDone;
  163. }
  164. /******************************************************************************
  165. ************************ C L E A N U P M A I N ************************
  166. ******************************************************************************/
  167. BOOL CInccUTF7In::CleanUpMain()
  168. {
  169. return TRUE;
  170. }
  171. int CInccUTF7In::GetUnconvertBytes()
  172. {
  173. return 0 ;
  174. }
  175. DWORD CInccUTF7In::GetConvertMode()
  176. {
  177. DWORD dwMode ;
  178. if ( m_fUTF7Mode )
  179. {
  180. dwMode = ( m_tcUnicode & 0xffff ) | ( m_nBitCount << 16 ) ;
  181. if ( dwMode == 0 )
  182. dwMode = 1L ; // it is ok, since bitcount is 0
  183. }
  184. else
  185. dwMode = 0 ;
  186. return dwMode;
  187. }
  188. void CInccUTF7In::SetConvertMode(DWORD mode)
  189. {
  190. Reset(); // initialization
  191. if (mode)
  192. {
  193. m_fUTF7Mode = TRUE ;
  194. m_tcUnicode = ( mode & 0x7fff );
  195. m_nBitCount = ( mode >> 16 ) & 0xffff ;
  196. }
  197. else
  198. m_fUTF7Mode = FALSE ;
  199. }
  200. // ============================================================================
  201. // Internet Character Set Conversion: Output to UTF-7
  202. // ============================================================================
  203. /******************************************************************************
  204. ************************** C O N S T R U C T O R **************************
  205. ******************************************************************************/
  206. CInccUTF7Out::CInccUTF7Out(UINT uCodePage, int nCodeSet) : CINetCodeConverter(uCodePage, nCodeSet)
  207. {
  208. Reset(); // initialization
  209. return ;
  210. }
  211. /******************************************************************************
  212. ******************************* R E S E T *********************************
  213. ******************************************************************************/
  214. void CInccUTF7Out::Reset()
  215. {
  216. m_fDoubleByte = FALSE;
  217. m_fUTF7Mode = FALSE ;
  218. m_nBitCount = 0 ;
  219. m_tcUnicode = 0 ;
  220. return;
  221. }
  222. HRESULT CInccUTF7Out::ConvertChar(UCHAR tc, int cchSrc)
  223. {
  224. BOOL fDone = TRUE;
  225. WORD uc ;
  226. // 2nd byte of Unicode
  227. if (m_fDoubleByte )
  228. {
  229. BOOL bNeedShift ;
  230. // compose the 16 bits char
  231. uc = ( (WORD) tc << 8 | m_tcFirstByte ) ;
  232. // check whether the char can be direct encoded ?
  233. bNeedShift = uc > 0x7f ? TRUE : g_aDirectChar[(UCHAR)uc] == 255 ;
  234. if ( bNeedShift && m_fUTF7Mode == FALSE)
  235. {
  236. // output Shift-in char to change to UTF-7 Mode
  237. fDone = Output('+');
  238. // handle special case '+-'
  239. if ( uc == '+' ) // single byte "+"
  240. {
  241. fDone=Output('-');
  242. }
  243. else
  244. m_fUTF7Mode = TRUE ;
  245. }
  246. if (m_fUTF7Mode)
  247. {
  248. LONG tcUnicode ;
  249. UCHAR t64 ;
  250. int pad_bits ;
  251. // either write the char to the bit buffer
  252. // or pad bit buffer out to a full base64 char
  253. if (bNeedShift)
  254. {
  255. m_tcUnicode = m_tcUnicode << 16 | uc ;
  256. m_nBitCount += 16 ;
  257. }
  258. // pad bit buffer out to a full base64 char
  259. else if (m_nBitCount % 6 )
  260. {
  261. pad_bits = 6 - (m_nBitCount % 6 ) ;
  262. // get to next 6 multiple, pad these bits with 0
  263. m_tcUnicode = m_tcUnicode << pad_bits ;
  264. m_nBitCount += pad_bits ;
  265. }
  266. // flush out as many full base64 char as possible
  267. while ( m_nBitCount >= 6 && fDone )
  268. {
  269. tcUnicode = ( m_tcUnicode >> ( m_nBitCount - 6 ) );
  270. t64 = (UCHAR) ( tcUnicode & 0x3f ) ;
  271. fDone = Output(g_aInvBase64[t64]);
  272. m_nBitCount -= 6 ;
  273. }
  274. if (!bNeedShift)
  275. {
  276. // output Shift-out char
  277. fDone = Output('-');
  278. m_fUTF7Mode = FALSE ;
  279. m_nBitCount = 0 ;
  280. m_tcUnicode = 0 ;
  281. }
  282. }
  283. // the character can be directly encoded as ASCII
  284. if (!bNeedShift)
  285. {
  286. fDone = Output(m_tcFirstByte);
  287. }
  288. m_fDoubleByte = FALSE ;
  289. }
  290. // 1st byte of Unicode
  291. else
  292. {
  293. m_tcFirstByte = tc ;
  294. m_fDoubleByte = TRUE ;
  295. }
  296. if (fDone)
  297. return S_OK;
  298. else
  299. return E_FAIL;
  300. }
  301. /******************************************************************************
  302. ***************************** C L E A N U P *****************************
  303. ******************************************************************************/
  304. BOOL CInccUTF7Out::CleanUp()
  305. {
  306. BOOL fDone = TRUE;
  307. if (m_fUTF7Mode)
  308. {
  309. UCHAR t64 ;
  310. LONG tcUnicode ;
  311. int pad_bits ;
  312. // pad bit buffer out to a full base64 char
  313. if (m_nBitCount % 6 )
  314. {
  315. pad_bits = 6 - (m_nBitCount % 6 ) ;
  316. // get to next 6 multiple, pad these bits with 0
  317. m_tcUnicode = m_tcUnicode << pad_bits ;
  318. m_nBitCount += pad_bits ;
  319. }
  320. // flush out as many full base64 char as possible
  321. while ( m_nBitCount >= 6 && fDone )
  322. {
  323. tcUnicode = ( m_tcUnicode >> ( m_nBitCount - 6 ) );
  324. t64 = (UCHAR) ( tcUnicode & 0x3f ) ;
  325. fDone = Output(g_aInvBase64[t64]);
  326. m_nBitCount -= 6 ;
  327. }
  328. {
  329. // output Shift-out char
  330. fDone = Output('-');
  331. m_fUTF7Mode = FALSE ;
  332. m_nBitCount = 0 ;
  333. m_tcUnicode = 0 ;
  334. }
  335. }
  336. return fDone;
  337. }
  338. int CInccUTF7Out::GetUnconvertBytes()
  339. {
  340. return m_fDoubleByte ? 1 : 0 ;
  341. }
  342. DWORD CInccUTF7Out::GetConvertMode()
  343. {
  344. return 0 ;
  345. }
  346. void CInccUTF7Out::SetConvertMode(DWORD mode)
  347. {
  348. Reset(); // initialization
  349. return ;
  350. }