Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

370 lines
9.3 KiB

  1. #include "stdafx.h"
  2. #include "Msg.h"
  3. #include "ConvEng.h"
  4. #include "TextFile.h"
  5. #ifdef RTF_SUPPORT
  6. #include "RtfParser.h"
  7. #endif
  8. BOOL ConvertTextFile(
  9. PBYTE pbySource,
  10. DWORD dwFileSize,
  11. PBYTE pbyTarget,
  12. DWORD dwTargetSize,
  13. BOOL fAnsiToUnicode,
  14. PINT pnTargetFileSize)
  15. {
  16. BOOL fRet = FALSE;
  17. if (!fAnsiToUnicode && *((PWORD)pbySource) != 0xFEFF) {
  18. MsgNotUnicodeTextSourceFile();
  19. return FALSE;
  20. }
  21. if (fAnsiToUnicode && *((PWORD)pbySource) == 0xFEFF) {
  22. MsgNotAnsiTextSourceFile();
  23. return FALSE;
  24. }
  25. if (fAnsiToUnicode) {
  26. PWCH pwchTarget = (PWCH)pbyTarget;
  27. // Put Unicode text file flag
  28. *pwchTarget = 0xFEFF;
  29. *pnTargetFileSize = 1;
  30. // Null file
  31. if (!dwFileSize) {
  32. fRet = TRUE;
  33. goto Exit;
  34. }
  35. // Convert
  36. *pnTargetFileSize += AnsiStrToUnicodeStr(pbySource, dwFileSize,
  37. pwchTarget+1, dwTargetSize-2);
  38. *pnTargetFileSize *= sizeof(WCHAR);
  39. } else {
  40. // Check and skip Uncode text file flag
  41. if (dwFileSize < 2) {
  42. goto Exit;
  43. }
  44. PWCH pwchData = (PWCH)pbySource;
  45. if (*pwchData != 0xFEFF) {
  46. goto Exit;
  47. }
  48. pwchData++;
  49. // Null file w/ Unicode flag only
  50. if (dwFileSize == 2) {
  51. fRet = TRUE;
  52. goto Exit;
  53. }
  54. // Convert
  55. *pnTargetFileSize = UnicodeStrToAnsiStr(pwchData,
  56. dwFileSize/sizeof(WCHAR) - 1, (PCHAR)pbyTarget, dwTargetSize);
  57. }
  58. if (*pnTargetFileSize) {
  59. fRet = TRUE;
  60. }
  61. Exit:
  62. return fRet;
  63. }
  64. BOOL ConvertHtmlFile(
  65. PBYTE pbySource,
  66. DWORD dwFileSize,
  67. PBYTE pbyTarget,
  68. DWORD dwTargetSize,
  69. BOOL fAnsiToUnicode,
  70. PINT pnTargetFileSize)
  71. {
  72. BOOL fRet = FALSE;
  73. if (!ConvertTextFile(pbySource, dwFileSize, pbyTarget,
  74. // Reserve the last space to explicitly assign zero to the last
  75. // character in the buffer
  76. dwTargetSize - (fAnsiToUnicode ? sizeof(WCHAR):sizeof(char)),
  77. fAnsiToUnicode, pnTargetFileSize)) {
  78. return FALSE;
  79. }
  80. // Change charset
  81. if (fAnsiToUnicode) {
  82. const WCHAR* const wszUnicodeCharset = L"charset=unicode";
  83. WCHAR *pwch1, *pwch2;
  84. int nLengthIncrease;
  85. *((PWCH)(pbyTarget+*pnTargetFileSize)) = 0;
  86. pwch1 = wcsstr((PWCH)pbyTarget, L"charset=");
  87. if (!pwch1) {
  88. // Some Html file may haven't code page flag,
  89. // We skip charset replace step for this kind of files
  90. fRet = TRUE;
  91. goto Exit;
  92. }
  93. pwch2 = wcschr(pwch1, L'\"');
  94. if (!pwch2 || (pwch2 - pwch1 >= 20)) {
  95. goto Exit;
  96. }
  97. nLengthIncrease = (int)(wcslen(wszUnicodeCharset) - (pwch2 - pwch1));
  98. if (*pnTargetFileSize + nLengthIncrease*sizeof(WCHAR) > dwTargetSize) {
  99. goto Exit;
  100. }
  101. MoveMemory(pwch2 + nLengthIncrease, pwch2,
  102. pbyTarget + *pnTargetFileSize - (PBYTE)pwch2);
  103. CopyMemory(pwch1, wszUnicodeCharset, wcslen(wszUnicodeCharset)*sizeof(WCHAR));
  104. *pnTargetFileSize += nLengthIncrease*sizeof(WCHAR);
  105. fRet = TRUE;
  106. } else {
  107. const CHAR* const szGBCharset = "charset=gb18030";
  108. CHAR *pch1, *pch2;
  109. int nLengthIncrease;
  110. *((PCHAR)(pbyTarget+*pnTargetFileSize)) = 0;
  111. pch1 = strstr((PCHAR)pbyTarget, "charset=");
  112. if (!pch1) {
  113. // Some Html file may haven't code page flag,
  114. // We skip charset replace step for this kind of files
  115. fRet = TRUE;
  116. goto Exit;
  117. }
  118. pch2 = strchr(pch1, '\"');
  119. if (!pch2 || (pch2 - pch1 >= 20)) {
  120. goto Exit;
  121. }
  122. nLengthIncrease = (int)(strlen(szGBCharset) - (pch2 - pch1));
  123. if (*pnTargetFileSize + nLengthIncrease > (int)dwTargetSize) {
  124. goto Exit;
  125. }
  126. MoveMemory(pch2 + nLengthIncrease, pch2,
  127. (PCHAR)pbyTarget + *pnTargetFileSize - pch2);
  128. CopyMemory(pch1, szGBCharset, strlen(szGBCharset)*sizeof(char));
  129. *pnTargetFileSize += nLengthIncrease*sizeof(char);
  130. fRet = TRUE;
  131. }
  132. Exit:
  133. return fRet;
  134. }
  135. #ifdef XML_SUPPORT
  136. BOOL ConvertXmlFile(
  137. PBYTE pbySource,
  138. DWORD dwFileSize,
  139. PBYTE pbyTarget,
  140. DWORD dwTargetSize,
  141. BOOL fAnsiToUnicode,
  142. PINT pnTargetFileSize)
  143. {
  144. BOOL fRet = FALSE;
  145. if (!ConvertTextFile(pbySource, dwFileSize, pbyTarget,
  146. // Reserve the last space to explicitly assign zero to the last
  147. // character in the buffer
  148. dwTargetSize - (fAnsiToUnicode ? sizeof(WCHAR):sizeof(char)),
  149. fAnsiToUnicode, pnTargetFileSize)) {
  150. return FALSE;
  151. }
  152. // Change charset
  153. if (fAnsiToUnicode) {
  154. const WCHAR* const wszUnicodeCharset = L"UTF-16";
  155. WCHAR *pwchEnd, *pwch1, *pwch2;
  156. int nLengthIncrease;
  157. *((PWCH)(pbyTarget+*pnTargetFileSize)) = 0;
  158. pwch1 = wcsstr((PWCH)pbyTarget, L"<?xml");
  159. if (!pwch1) {
  160. goto Exit;
  161. }
  162. pwchEnd = wcsstr(pwch1, L"?>");
  163. if (!pwchEnd) {
  164. goto Exit;
  165. }
  166. // temp set to null-terminal
  167. *pwchEnd = 0;
  168. pwch1 = wcsstr(pwch1, L"encoding=");
  169. if (!pwch1) {
  170. // Some Html file may haven't code page flag,
  171. // We skip charset replace step for this kind of files
  172. fRet = TRUE;
  173. *pwchEnd = '?';
  174. goto Exit;
  175. }
  176. pwch1 += wcslen(L"encoding=");
  177. WCHAR wch = *pwch1;
  178. pwch1++;
  179. if (wch != '\"' && wch != '\'') {
  180. *pwchEnd = '?';
  181. goto Exit;
  182. }
  183. pwch2 = wcschr(pwch1, wch);
  184. if (!pwch2 || (pwch2 - pwch1 >= 20)) {
  185. *pwchEnd = '?';
  186. goto Exit;
  187. }
  188. // restore *pwch2
  189. *pwchEnd = '?';
  190. nLengthIncrease = (int)(wcslen(wszUnicodeCharset) - (pwch2 - pwch1));
  191. if (*pnTargetFileSize + nLengthIncrease*sizeof(WCHAR) > dwTargetSize) {
  192. goto Exit;
  193. }
  194. MoveMemory(pwch2 + nLengthIncrease, pwch2,
  195. pbyTarget + *pnTargetFileSize - (PBYTE)pwch2);
  196. CopyMemory(pwch1, wszUnicodeCharset, wcslen(wszUnicodeCharset)*sizeof(WCHAR));
  197. *pnTargetFileSize += nLengthIncrease*sizeof(WCHAR);
  198. fRet = TRUE;
  199. } else {
  200. const char* const szGBCharset = "GB18030";
  201. char *pchEnd, *pch1, *pch2;
  202. int nLengthIncrease;
  203. *((PCH)(pbyTarget+*pnTargetFileSize)) = 0;
  204. pch1 = strstr((char*)pbyTarget, "<?xml");
  205. if (!pch1) {
  206. goto Exit;
  207. }
  208. pchEnd = strstr(pch1, "?>");
  209. if (!pchEnd) {
  210. goto Exit;
  211. }
  212. // temp set to null-terminal
  213. *pchEnd = 0;
  214. pch1 = strstr(pch1, "encoding=");
  215. if (!pch1) {
  216. // Some Html file may haven't code page flag,
  217. // We skip charset replace step for this kind of files
  218. fRet = TRUE;
  219. *pchEnd = '?';
  220. goto Exit;
  221. }
  222. pch1 += strlen("encoding=");
  223. CHAR ch = *pch1;
  224. pch1++;
  225. if (ch != '\"' && ch != '\'') {
  226. *pchEnd = '?';
  227. goto Exit;
  228. }
  229. pch2 = strchr(pch1, ch);
  230. if (!pch2 || (pch2 - pch1 >= 20)) {
  231. *pchEnd = '?';
  232. goto Exit;
  233. }
  234. // restore *pwch2
  235. *pchEnd = '?';
  236. nLengthIncrease = (int)(strlen(szGBCharset) - (pch2 - pch1));
  237. if (*pnTargetFileSize + nLengthIncrease > (int)dwTargetSize) {
  238. goto Exit;
  239. }
  240. MoveMemory(pch2 + nLengthIncrease, pch2,
  241. pbyTarget + *pnTargetFileSize - (PBYTE)pch2);
  242. CopyMemory(pch1, szGBCharset, strlen(szGBCharset));
  243. *pnTargetFileSize += nLengthIncrease;
  244. fRet = TRUE;
  245. }
  246. Exit:
  247. return fRet;
  248. }
  249. #endif
  250. #ifdef RTF_SUPPORT
  251. BOOL ConvertRtfFile(
  252. PBYTE pBuf, // Read buf
  253. DWORD dwSize, // File size
  254. PBYTE pWrite, // Write buf
  255. DWORD dwWriteSize,
  256. BOOL fAnsiToUnicode,
  257. PINT pnTargetFileSize)
  258. {
  259. CRtfParser* pcParser;
  260. DWORD dwVersion;
  261. DWORD dwCodepage;
  262. BOOL fRet = FALSE;
  263. pcParser = new CRtfParser(pBuf, dwSize, pWrite, dwSize*3);
  264. if (!pcParser) {
  265. MsgOverflow();
  266. goto gotoExit;
  267. }
  268. if (!pcParser->fRTFFile()) {
  269. MsgNotRtfSourceFile();
  270. goto gotoExit;
  271. }
  272. if (ecOK != pcParser->GetVersion(&dwVersion) ||
  273. dwVersion != 1) {
  274. MsgNotRtfSourceFile();
  275. goto gotoExit;
  276. }
  277. if (ecOK != pcParser->GetCodepage(&dwCodepage) ||
  278. dwCodepage != 936) {
  279. MsgNotRtfSourceFile();
  280. goto gotoExit;
  281. }
  282. // Explain WordID by corresponding word text
  283. if (ecOK != pcParser->Do()) {
  284. MsgNotRtfSourceFile();
  285. goto gotoExit;
  286. }
  287. pcParser->GetResult((PDWORD)pnTargetFileSize);
  288. fRet = TRUE;
  289. gotoExit:
  290. if (pcParser) {
  291. delete pcParser;
  292. }
  293. return fRet;
  294. }
  295. #endif