Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1015 lines
28 KiB

  1. /*++
  2. Copyright (c) 2000-2001, Microsoft Corporation All rights reserved.
  3. Module Name:
  4. helpapis.c
  5. Abstract:
  6. SP_GetFmtValueW
  7. SP_PutNumberW
  8. CaseHelper
  9. CompareHelper
  10. GetOpenSaveFileHelper
  11. FindReplaceTextHelper
  12. RtlIsTextUnicode
  13. Revision History:
  14. 17 Mar 2001 v-michka Created.
  15. --*/
  16. #include "precomp.h"
  17. // forward declare, since win9xu.c has no header
  18. int __stdcall GodotCompareStringW(LCID Locale, DWORD dwCmpFlags,
  19. LPCWSTR lpString1, int cchCount1,
  20. LPCWSTR lpString2, int cchCount2);
  21. /*-------------------------------
  22. SP_GetFmtValueW
  23. stolen from wsprintf.c
  24. -------------------------------*/
  25. LPCWSTR SP_GetFmtValueW(LPCWSTR lpch, int *lpw)
  26. {
  27. int ii = 0;
  28. /* It might not work for some locales or digit sets */
  29. while (*lpch >= L'0' && *lpch <= L'9') {
  30. ii *= 10;
  31. ii += (int)(*lpch - L'0');
  32. lpch++;
  33. }
  34. *lpw = ii;
  35. /*
  36. * return the address of the first non-digit character
  37. */
  38. return lpch;
  39. }
  40. /*-------------------------------
  41. SP_PutNumberW
  42. stolen from wsprintf.c
  43. -------------------------------*/
  44. int SP_PutNumberW(LPWSTR lpstr, ULONG64 n, int limit, DWORD radix, int uppercase)
  45. {
  46. DWORD mod;
  47. int count = 0;
  48. /* It might not work for some locales or digit sets */
  49. if(uppercase)
  50. uppercase = 'A'-'0'-10;
  51. else
  52. uppercase = 'a'-'0'-10;
  53. if (count < limit) {
  54. do {
  55. mod = (ULONG)(n % radix);
  56. n /= radix;
  57. mod += '0';
  58. if (mod > '9')
  59. mod += uppercase;
  60. *lpstr++ = (WCHAR)mod;
  61. count++;
  62. } while((count < limit) && n);
  63. }
  64. return count;
  65. }
  66. /*-------------------------------
  67. SP_ReverseW
  68. stolen from wsprintf.c
  69. -------------------------------*/
  70. void SP_ReverseW(LPWSTR lpFirst, LPWSTR lpLast)
  71. {
  72. WCHAR ch;
  73. while(lpLast > lpFirst){
  74. ch = *lpFirst;
  75. *lpFirst++ = *lpLast;
  76. *lpLast-- = ch;
  77. }
  78. }
  79. #define NLS_CP_DLL_PROC_NAME "NlsDllCodePageTranslation"
  80. // Must dynamically link to "NlsDllCodePageTranslation"
  81. // because the DLL may not be on the machine
  82. typedef BOOL (__stdcall *PFNgb) (DWORD, DWORD, LPSTR, int, LPWSTR, int, LPCPINFO);
  83. static PFNgb s_pfnGB;
  84. /*-------------------------------
  85. GB18030Helper
  86. Provider for our GB18030 support
  87. -------------------------------*/
  88. DWORD GB18030Helper(DWORD cpg, DWORD dw, LPSTR lpMB, int cchMB, LPWSTR lpWC, int cchWC, LPCPINFO lpCPI)
  89. {
  90. HMODULE hmod = 0;
  91. if (s_pfnGB == NULL)
  92. s_pfnGB = (PFNgb)GetProcAddress(GetGB18030Handle(), NLS_CP_DLL_PROC_NAME);
  93. if (s_pfnGB)
  94. return(s_pfnGB(cpg, dw, lpMB, cchMB, lpWC, cchWC, lpCPI));
  95. else
  96. {
  97. SetLastError(ERROR_INVALID_PARAMETER);
  98. return(0);
  99. }
  100. }
  101. /*-------------------------------
  102. CaseHelper
  103. -------------------------------*/
  104. void CaseHelper(LPWSTR pchBuff, DWORD cch, BOOL fUpper)
  105. {
  106. LPSTR pszA;
  107. int cb;
  108. if(!pchBuff || !cch || !*pchBuff)
  109. return;
  110. if (-1 == cch)
  111. cch = gwcslen(pchBuff);
  112. cb = 1 + g_mcs * cch;
  113. pszA = GodotHeapAlloc(cb+1);
  114. if(pszA==NULL || cb==0)
  115. {
  116. // Not much else we can do here, so bail
  117. SetLastError(ERROR_OUTOFMEMORY);
  118. }
  119. else
  120. {
  121. *(pszA + cb) = '\0';
  122. WideCharToMultiByte(g_acp, 0, pchBuff, cch, pszA, cch, NULL, NULL);
  123. if(fUpper)
  124. CharUpperA(pszA);
  125. else
  126. CharLowerA(pszA);
  127. MultiByteToWideChar(g_acp, 0, pszA, cch, pchBuff, cch);
  128. GodotHeapFree(pszA);
  129. }
  130. return;
  131. }
  132. /*-------------------------------
  133. CompareHelper
  134. -------------------------------*/
  135. int CompareHelper(LPCWSTR lpsz1, LPCWSTR lpsz2, BOOL fCaseSensitive)
  136. {
  137. int RetVal;
  138. DWORD dwCmpFlags = (fCaseSensitive ? NORM_IGNORECASE : 0);
  139. RetVal=GodotCompareStringW(LOCALE_USER_DEFAULT, dwCmpFlags, lpsz1, -1, lpsz2, -1);
  140. if(RetVal==0)
  141. {
  142. //
  143. // The caller is not expecting failure. Try the system
  144. // default locale id.
  145. //
  146. RetVal=GodotCompareStringW(LOCALE_SYSTEM_DEFAULT, dwCmpFlags, lpsz1, -1, lpsz2, -1);
  147. }
  148. if(RetVal==0)
  149. {
  150. if (lpsz1 && lpsz2)
  151. {
  152. //
  153. // The caller is not expecting failure. We've never had a
  154. // failure indicator before. Note we can still fail here in
  155. // an out of memory situation; what else can we do, though?
  156. //
  157. LPSTR sz1, sz2;
  158. ALLOCRETURN ar1, ar2;
  159. RetVal = 0;
  160. sz1 = NULL;
  161. ar1 = GodotToAcpOnHeap(lpsz1, &sz1);
  162. if(ar1 != arFailed)
  163. {
  164. sz2 = NULL;
  165. ar2 = GodotToAcpOnHeap(lpsz2, &sz2);
  166. if(ar2 != arFailed)
  167. {
  168. if(sz1 && sz2)
  169. {
  170. if(fCaseSensitive)
  171. RetVal = lstrcmpA(sz1, sz2);
  172. else
  173. RetVal = lstrcmpiA(sz1, sz2);
  174. }
  175. if(ar2==arAlloc)
  176. GodotHeapFree(sz2);
  177. }
  178. if(ar1==arAlloc)
  179. GodotHeapFree(sz1);
  180. }
  181. return(RetVal);
  182. }
  183. else if (lpsz1)
  184. {
  185. return (1);
  186. }
  187. else if (lpsz2)
  188. {
  189. return (-1);
  190. }
  191. else
  192. {
  193. return (0);
  194. }
  195. }
  196. return(RetVal - 2);
  197. }
  198. /*-------------------------------
  199. GetOpenSaveFileHelper
  200. Since 95% of the FileOpen and FileSave code is identical,
  201. we use one shared function for both
  202. -------------------------------*/
  203. BOOL GetOpenSaveFileHelper(LPOPENFILENAMEW lpofn, BOOL fOpenFile)
  204. {
  205. // Begin locals
  206. BOOL RetVal;
  207. BOOL fFailedHook = FALSE;
  208. OPENFILENAMEA ofnA;
  209. LPGODOTTLSINFO lpgti;
  210. // Do not use sizeof(OPENFILENAMEA) since that will give us the
  211. // Windows 2000 structure, which would be BAAAAD (Win9x will choke
  212. // on the lStructSize).
  213. ZeroMemory(&ofnA, OPENFILENAME_SIZE_VERSION_400A);
  214. ofnA.lStructSize = OPENFILENAME_SIZE_VERSION_400A;
  215. if(!(lpgti = GetThreadInfoSafe(TRUE)))
  216. {
  217. SetLastError(ERROR_OUTOFMEMORY);
  218. return(FALSE);
  219. }
  220. // Do the hook handling
  221. if(lpofn->Flags & OFN_EXPLORER)
  222. {
  223. if(fOpenFile)
  224. {
  225. if(lpgti->pfnGetOpenFileName)
  226. fFailedHook = TRUE;
  227. else if((lpofn->Flags & OFN_ENABLEHOOK) && lpofn->lpfnHook)
  228. {
  229. lpgti->pfnGetOpenFileName = lpofn->lpfnHook;
  230. ofnA.lpfnHook = &OFNHookProc;
  231. }
  232. }
  233. else
  234. {
  235. if(lpgti->pfnGetSaveFileName)
  236. fFailedHook = TRUE;
  237. else if((lpofn->Flags & OFN_ENABLEHOOK) && lpofn->lpfnHook)
  238. {
  239. lpgti->pfnGetSaveFileName = lpofn->lpfnHook;
  240. ofnA.lpfnHook = &OFNHookProcSave;
  241. }
  242. }
  243. }
  244. else
  245. {
  246. if(fOpenFile)
  247. {
  248. if(lpgti->pfnGetOpenFileNameOldStyle)
  249. fFailedHook = TRUE;
  250. else if((lpofn->Flags & OFN_ENABLEHOOK) && lpofn->lpfnHook)
  251. {
  252. lpgti->pfnGetOpenFileNameOldStyle = lpofn->lpfnHook;
  253. ofnA.lpfnHook = &OFNHookProcOldStyle;
  254. }
  255. }
  256. else
  257. {
  258. if(lpgti->pfnGetSaveFileNameOldStyle)
  259. fFailedHook = TRUE;
  260. else if((lpofn->Flags & OFN_ENABLEHOOK) && lpofn->lpfnHook)
  261. {
  262. lpgti->pfnGetSaveFileNameOldStyle = lpofn->lpfnHook;
  263. ofnA.lpfnHook = &OFNHookProcOldStyleSave;
  264. }
  265. }
  266. }
  267. // Check to see if we tripped over any of the hooks
  268. if(fFailedHook)
  269. {
  270. SetLastError(ERROR_INVALID_FILTER_PROC);
  271. return(FALSE);
  272. }
  273. if((lpofn->Flags & OFN_ENABLETEMPLATEHANDLE) || (lpofn->Flags & OFN_ENABLETEMPLATE))
  274. ofnA.hInstance = lpofn->hInstance;
  275. if(FSTRING_VALID(lpofn->lpstrFilter))
  276. {
  277. size_t lpstrFilterLen = cchUnicodeMultiSz(lpofn->lpstrFilter);
  278. _STACKALLOC(lpstrFilterLen*g_mcs, ofnA.lpstrFilter);
  279. WideCharToMultiByte(g_acp, 0, lpofn->lpstrFilter, lpstrFilterLen, (LPSTR)ofnA.lpstrFilter, lpstrFilterLen * g_mcs, NULL, NULL);
  280. }
  281. if ((lpofn->nMaxCustFilter > 0) && FSTRING_VALID(lpofn->lpstrCustomFilter))
  282. {
  283. ofnA.nMaxCustFilter = (lpofn->nMaxCustFilter*g_mcs);
  284. _STACKALLOC(ofnA.nMaxCustFilter, ofnA.lpstrCustomFilter);
  285. WideCharToMultiByte(g_acp, 0, lpofn->lpstrCustomFilter, lpofn->nMaxCustFilter, ofnA.lpstrCustomFilter, ofnA.nMaxCustFilter, NULL, NULL);
  286. ofnA.nFilterIndex = lpofn->nFilterIndex;
  287. }
  288. ofnA.nMaxFile = (lpofn->nMaxFile*g_mcs);
  289. _STACKALLOC(ofnA.nMaxFile, ofnA.lpstrFile);
  290. WideCharToMultiByte(g_acp, 0, lpofn->lpstrFile, lpofn->nMaxFile, ofnA.lpstrFile, ofnA.nMaxFile, NULL, NULL);
  291. if ((lpofn->nMaxFileTitle > 0) && (FSTRING_VALID(lpofn->lpstrFileTitle)))
  292. {
  293. ofnA.nMaxFileTitle = (lpofn->nMaxFileTitle*g_mcs);
  294. _STACKALLOC(ofnA.nMaxFileTitle, ofnA.lpstrFileTitle);
  295. WideCharToMultiByte(g_acp, 0, lpofn->lpstrFileTitle, lpofn->nMaxFileTitle, ofnA.lpstrFileTitle, ofnA.nMaxFileTitle, NULL, NULL);
  296. }
  297. if(FSTRING_VALID(ofnA.lpstrFile))
  298. {
  299. // nFileOffset and nFileExtension are to provide info about the
  300. // file name and extension location in lpstrFile, but there is
  301. // no reasonable way to get it from the return so we just recalc
  302. CHAR driveA[_MAX_DRIVE];
  303. CHAR dirA[_MAX_DIR];
  304. CHAR fileA[_MAX_FNAME];
  305. gsplitpath(ofnA.lpstrFile, driveA, dirA, fileA, NULL);
  306. ofnA.nFileOffset = (lstrlenA(driveA) + lstrlenA(dirA));
  307. ofnA.nFileExtension = ofnA.nFileOffset + lstrlenA(fileA);
  308. }
  309. GODOT_TO_ACP_STACKALLOC(lpofn->lpstrInitialDir, ofnA.lpstrInitialDir);
  310. GODOT_TO_ACP_STACKALLOC(lpofn->lpstrTitle, ofnA.lpstrTitle);
  311. GODOT_TO_ACP_STACKALLOC(lpofn->lpstrDefExt, ofnA.lpstrDefExt);
  312. ofnA.lCustData = lpofn->lCustData;
  313. ofnA.hwndOwner = lpofn->hwndOwner;
  314. ofnA.Flags = lpofn->Flags;
  315. if(lpofn->Flags & OFN_ENABLETEMPLATE)
  316. {
  317. GODOT_TO_ACP_STACKALLOC(lpofn->lpTemplateName, ofnA.lpTemplateName);
  318. }
  319. // Call the 'A' version of the API, then clear the hook
  320. // if we are in the "Explorer" style FileOpen dlg.
  321. if(fOpenFile)
  322. {
  323. INIT_WINDOW_SNIFF(lpgti->hHook);
  324. RetVal=GetOpenFileNameA(&ofnA);
  325. TERM_WINDOW_SNIFF(lpgti->hHook);
  326. if(lpofn->Flags & OFN_ENABLEHOOK)
  327. {
  328. if(lpofn->Flags & OFN_EXPLORER)
  329. lpgti->pfnGetOpenFileName = NULL;
  330. else
  331. lpgti->pfnGetOpenFileNameOldStyle = NULL;
  332. }
  333. }
  334. else
  335. {
  336. INIT_WINDOW_SNIFF(lpgti->hHook);
  337. RetVal=GetSaveFileNameA(&ofnA);
  338. TERM_WINDOW_SNIFF(lpgti->hHook);
  339. if(lpofn->Flags & OFN_ENABLEHOOK)
  340. {
  341. if(lpofn->Flags & OFN_EXPLORER)
  342. lpgti->pfnGetSaveFileName = NULL;
  343. else
  344. lpgti->pfnGetSaveFileNameOldStyle = NULL;
  345. }
  346. }
  347. // Begin postcall
  348. if(RetVal)
  349. {
  350. if ((ofnA.lpstrCustomFilter) && (lpofn->lpstrCustomFilter))
  351. {
  352. MultiByteToWideChar(g_acp, 0, ofnA.lpstrCustomFilter, ofnA.nMaxCustFilter, lpofn->lpstrCustomFilter, lpofn->nMaxCustFilter);
  353. lpofn->nFilterIndex = ofnA.nFilterIndex;
  354. }
  355. MultiByteToWideChar(g_acp, 0, ofnA.lpstrFile, ofnA.nMaxFile, lpofn->lpstrFile, lpofn->nMaxFile);
  356. if((ofnA.lpstrFileTitle) && (lpofn->lpstrFileTitle))
  357. MultiByteToWideChar(g_acp, 0, ofnA.lpstrFileTitle, ofnA.nMaxFileTitle, lpofn->lpstrFileTitle, lpofn->nMaxFileTitle);
  358. lpofn->Flags = ofnA.Flags;
  359. {
  360. // nFileOffset and nFileExtension are to provide info about the
  361. // file name and extension location in lpstrFile, but there is
  362. // no reasonable way to get it from the return so we just recalc
  363. WCHAR drive[_MAX_DRIVE];
  364. WCHAR dir[_MAX_DIR];
  365. WCHAR file[_MAX_FNAME];
  366. gwsplitpath(lpofn->lpstrFile, drive, dir, file, NULL);
  367. lpofn->nFileOffset = (gwcslen(drive) + gwcslen(dir));
  368. lpofn->nFileExtension = lpofn->nFileOffset + gwcslen(file);
  369. }
  370. }
  371. else if (lpofn->lpstrFile)
  372. {
  373. // There was a file, but there was no room in the buffer.
  374. // According to the docs, if buffer too small first 2 bytes
  375. // are the required size
  376. memcpy(lpofn->lpstrFile, ofnA.lpstrFile, sizeof(short));
  377. }
  378. // Finished
  379. return RetVal;
  380. }
  381. /*-------------------------------
  382. FindReplaceTextHelper
  383. Since 95% of the FindText and ReplaceText code is identical,
  384. we use one shared function for both
  385. -------------------------------*/
  386. HWND FindReplaceTextHelper(LPFINDREPLACEW lpfr, BOOL fFind)
  387. {
  388. HWND RetVal;
  389. LPGODOTTLSINFO lpgti;
  390. LPFINDREPLACEA lpfra;
  391. BOOL fTemplate = ((lpfr->Flags & FR_ENABLETEMPLATE) && (lpfr->lpTemplateName));
  392. size_t cchTemplateName = (fTemplate, gwcslen(lpfr->lpTemplateName) , 0);
  393. LPWSTR lpszBuffer;
  394. size_t cchBuffer;
  395. // If we cannot get out TLS info, then we cannot proceed
  396. if(!(lpgti = GetThreadInfoSafe(TRUE)))
  397. {
  398. SetLastError(ERROR_OUTOFMEMORY);
  399. return(0);
  400. }
  401. if((fFind && lpgti->pfnFindText) ||
  402. (!fFind && lpgti->pfnReplaceText))
  403. {
  404. SetLastError(ERROR_INVALID_FILTER_PROC);
  405. return(0);
  406. }
  407. // Easier than copying, casting!
  408. lpfra = (LPFINDREPLACEA)lpfr;
  409. // First, get the max temp buffer size and allocate it
  410. cchBuffer = lpfr->wFindWhatLen ;
  411. if (!fFind && (lpfr->wReplaceWithLen > cchBuffer))
  412. cchBuffer = lpfr->wReplaceWithLen;
  413. if (fTemplate && (cchTemplateName > cchBuffer))
  414. cchBuffer = cchTemplateName;
  415. lpszBuffer = GodotHeapAlloc(cchBuffer * g_mcs);
  416. if(!lpszBuffer)
  417. {
  418. SetLastError(ERROR_OUTOFMEMORY);
  419. return(0);
  420. }
  421. // Handle the Find string
  422. gwcscpy(lpszBuffer, lpfr->lpstrFindWhat);
  423. WideCharToMultiByte(g_acp, 0,
  424. lpfr->lpstrFindWhat, lpfr->wFindWhatLen,
  425. lpfra->lpstrFindWhat, lpfra->wFindWhatLen,
  426. NULL, NULL);
  427. if(!fFind)
  428. {
  429. //This is the replace dlg, so handle the replacement
  430. gwcscpy(lpszBuffer, lpfr->lpstrReplaceWith);
  431. WideCharToMultiByte(g_acp, 0,
  432. lpfr->lpstrReplaceWith, lpfr->wReplaceWithLen,
  433. lpfra->lpstrReplaceWith, lpfra->wReplaceWithLen,
  434. NULL, NULL);
  435. }
  436. if(fTemplate)
  437. {
  438. // They have specified a template, so it must be converted/copied
  439. gwcscpy(lpszBuffer, lpfr->lpTemplateName);
  440. WideCharToMultiByte(g_acp, 0,
  441. lpfr->lpTemplateName, cchTemplateName,
  442. (LPSTR)lpfra->lpTemplateName, cchTemplateName * g_mcs,
  443. NULL, NULL);
  444. }
  445. GodotHeapFree(lpszBuffer);
  446. // Since we are always setting the hook, the flags must always be munged
  447. lpfra->Flags |= FR_ENABLEHOOK;
  448. // Now, lets set the hook, cache the caller hook if there is
  449. // one, cache the memory, and call the API. Which vars we use
  450. // here depend on which API they are calling
  451. if(fFind)
  452. {
  453. if((lpfr->Flags & FR_ENABLEHOOK) && (lpfr->lpfnHook))
  454. lpgti->pfnFindText = lpfr->lpfnHook;
  455. lpfra->lpfnHook = &FRHookProcFind;
  456. lpgti->lpfrwFind = lpfr;
  457. INIT_WINDOW_SNIFF(lpgti->hHook);
  458. RetVal=FindTextA(lpfra);
  459. TERM_WINDOW_SNIFF(lpgti->hHook);
  460. if(RetVal==0)
  461. lpgti->lpfrwFind = NULL;
  462. }
  463. else
  464. {
  465. if((lpfr->Flags & FR_ENABLEHOOK) && (lpfr->lpfnHook))
  466. lpgti->pfnReplaceText = lpfr->lpfnHook;
  467. lpfra->lpfnHook = &FRHookProcReplace;
  468. lpgti->lpfrwReplace = lpfr;
  469. INIT_WINDOW_SNIFF(lpgti->hHook);
  470. RetVal=ReplaceTextA(lpfra);
  471. TERM_WINDOW_SNIFF(lpgti->hHook);
  472. if(RetVal==0)
  473. lpgti->lpfrwReplace = NULL;
  474. }
  475. // It makes no sense to copy things out since there is nothing they
  476. // would do with the structure. So don't do anything!
  477. // Finished
  478. return RetVal;
  479. }
  480. //
  481. //
  482. //
  483. //
  484. // Stolen from map.c, in the BASE depot
  485. //
  486. //
  487. //
  488. //
  489. #define UNICODE_FFFF 0xFFFF
  490. #define REVERSE_BYTE_ORDER_MARK 0xFFFE
  491. #define BYTE_ORDER_MARK 0xFEFF
  492. #define PARAGRAPH_SEPARATOR 0x2029
  493. #define LINE_SEPARATOR 0x2028
  494. #define UNICODE_TAB 0x0009
  495. #define UNICODE_LF 0x000A
  496. #define UNICODE_CR 0x000D
  497. #define UNICODE_SPACE 0x0020
  498. #define UNICODE_CJK_SPACE 0x3000
  499. #define UNICODE_R_TAB 0x0900
  500. #define UNICODE_R_LF 0x0A00
  501. #define UNICODE_R_CR 0x0D00
  502. #define UNICODE_R_SPACE 0x2000
  503. #define UNICODE_R_CJK_SPACE 0x0030 /* Ambiguous - same as ASCII '0' */
  504. #define ASCII_CRLF 0x0A0D
  505. #define __max(a,b) (((a) > (b)) ? (a) : (b))
  506. #define __min(a,b) (((a) < (b)) ? (a) : (b))
  507. BOOL RtlIsTextUnicode(PVOID Buffer, ULONG Size, PULONG Result)
  508. /*++
  509. Routine Description:
  510. IsTextUnicode performs a series of inexpensive heuristic checks
  511. on a buffer in order to verify that it contains Unicode data.
  512. [[ need to fix this section, see at the end ]]
  513. Found Return Result
  514. BOM TRUE BOM
  515. RBOM FALSE RBOM
  516. FFFF FALSE Binary
  517. NULL FALSE Binary
  518. null TRUE null bytes
  519. ASCII_CRLF FALSE CRLF
  520. UNICODE_TAB etc. TRUE Zero Ext Controls
  521. UNICODE_TAB_R FALSE Reversed Controls
  522. UNICODE_ZW etc. TRUE Unicode specials
  523. 1/3 as little variation in hi-byte as in lo byte: TRUE Correl
  524. 3/1 or worse " FALSE AntiCorrel
  525. Arguments:
  526. Buffer - pointer to buffer containing text to examine.
  527. Size - size of buffer in bytes. At most 256 characters in this will
  528. be examined. If the size is less than the size of a unicode
  529. character, then this function returns FALSE.
  530. Result - optional pointer to a flag word that contains additional information
  531. about the reason for the return value. If specified, this value on
  532. input is a mask that is used to limit the factors this routine uses
  533. to make its decision. On output, this flag word is set to contain
  534. those flags that were used to make its decision.
  535. Return Value:
  536. Boolean value that is TRUE if Buffer contains unicode characters.
  537. --*/
  538. {
  539. UNALIGNED WCHAR *lpBuff = Buffer;
  540. PUCHAR lpb = Buffer;
  541. ULONG iBOM = 0;
  542. ULONG iCR = 0;
  543. ULONG iLF = 0;
  544. ULONG iTAB = 0;
  545. ULONG iSPACE = 0;
  546. ULONG iCJK_SPACE = 0;
  547. ULONG iFFFF = 0;
  548. ULONG iPS = 0;
  549. ULONG iLS = 0;
  550. ULONG iRBOM = 0;
  551. ULONG iR_CR = 0;
  552. ULONG iR_LF = 0;
  553. ULONG iR_TAB = 0;
  554. ULONG iR_SPACE = 0;
  555. ULONG iNull = 0;
  556. ULONG iUNULL = 0;
  557. ULONG iCRLF = 0;
  558. ULONG iTmp;
  559. ULONG LastLo = 0;
  560. ULONG LastHi = 0;
  561. ULONG iHi = 0;
  562. ULONG iLo = 0;
  563. ULONG HiDiff = 0;
  564. ULONG LoDiff = 0;
  565. ULONG cLeadByte = 0;
  566. ULONG cWeird = 0;
  567. ULONG iResult = 0;
  568. ULONG iMaxTmp = __min(256, Size / sizeof(WCHAR));
  569. //
  570. // Special case when the size is less than or equal to 2.
  571. // Make sure we don't have a character followed by a null byte.
  572. //
  573. if ((Size < 2) ||
  574. ((Size == 2) && (lpBuff[0] != 0) && (lpb[1] == 0)))
  575. {
  576. if (Result)
  577. {
  578. *Result = IS_TEXT_UNICODE_ASCII16 | IS_TEXT_UNICODE_CONTROLS;
  579. }
  580. return (FALSE);
  581. }
  582. else if ((Size > 2) && ((Size / sizeof(WCHAR)) <= 256))
  583. {
  584. //
  585. // If the Size passed in is an even number, we don't want to
  586. // use the last WCHAR because it will contain the final null
  587. // byte.
  588. //
  589. if (((Size % sizeof(WCHAR)) == 0) &&
  590. ((lpBuff[iMaxTmp - 1] & 0xff00) == 0))
  591. {
  592. iMaxTmp--;
  593. }
  594. }
  595. //
  596. // Check at most 256 wide characters, collect various statistics.
  597. //
  598. for (iTmp = 0; iTmp < iMaxTmp; iTmp++)
  599. {
  600. switch (lpBuff[iTmp])
  601. {
  602. case BYTE_ORDER_MARK:
  603. iBOM++;
  604. break;
  605. case PARAGRAPH_SEPARATOR:
  606. iPS++;
  607. break;
  608. case LINE_SEPARATOR:
  609. iLS++;
  610. break;
  611. case UNICODE_LF:
  612. iLF++;
  613. break;
  614. case UNICODE_TAB:
  615. iTAB++;
  616. break;
  617. case UNICODE_SPACE:
  618. iSPACE++;
  619. break;
  620. case UNICODE_CJK_SPACE:
  621. iCJK_SPACE++;
  622. break;
  623. case UNICODE_CR:
  624. iCR++;
  625. break;
  626. //
  627. // The following codes are expected to show up in
  628. // byte reversed files.
  629. //
  630. case REVERSE_BYTE_ORDER_MARK:
  631. iRBOM++;
  632. break;
  633. case UNICODE_R_LF:
  634. iR_LF++;
  635. break;
  636. case UNICODE_R_TAB:
  637. iR_TAB++;
  638. break;
  639. case UNICODE_R_CR:
  640. iR_CR++;
  641. break;
  642. case UNICODE_R_SPACE:
  643. iR_SPACE++;
  644. break;
  645. //
  646. // The following codes are illegal and should never occur.
  647. //
  648. case UNICODE_FFFF:
  649. iFFFF++;
  650. break;
  651. case UNICODE_NULL:
  652. iUNULL++;
  653. break;
  654. //
  655. // The following is not currently a Unicode character
  656. // but is expected to show up accidentally when reading
  657. // in ASCII files which use CRLF on a little endian machine.
  658. //
  659. case ASCII_CRLF:
  660. iCRLF++;
  661. break; /* little endian */
  662. }
  663. //
  664. // Collect statistics on the fluctuations of high bytes
  665. // versus low bytes.
  666. //
  667. iHi = HIBYTE(lpBuff[iTmp]);
  668. iLo = LOBYTE(lpBuff[iTmp]);
  669. //
  670. // Count cr/lf and lf/cr that cross two words.
  671. //
  672. if ((iLo == '\r' && LastHi == '\n') ||
  673. (iLo == '\n' && LastHi == '\r'))
  674. {
  675. cWeird++;
  676. }
  677. iNull += (iHi ? 0 : 1) + (iLo ? 0 : 1); /* count Null bytes */
  678. HiDiff += __max(iHi, LastHi) - __min(LastHi, iHi);
  679. LoDiff += __max(iLo, LastLo) - __min(LastLo, iLo);
  680. LastLo = iLo;
  681. LastHi = iHi;
  682. }
  683. //
  684. // Count cr/lf and lf/cr that cross two words.
  685. //
  686. if ((iLo == '\r' && LastHi == '\n') ||
  687. (iLo == '\n' && LastHi == '\r'))
  688. {
  689. cWeird++;
  690. }
  691. if (iHi == '\0') /* don't count the last null */
  692. iNull--;
  693. if (iHi == 26) /* count ^Z at end as weird */
  694. cWeird++;
  695. /*
  696. iMaxTmp = __min(256 * sizeof(WCHAR), Size);
  697. if (g_mcs > 1)
  698. {
  699. for (iTmp = 0; iTmp < iMaxTmp; iTmp++)
  700. {
  701. if (NlsLeadByteInfo[lpb[iTmp]])
  702. {
  703. cLeadByte++;
  704. iTmp++; // should check for trailing-byte range
  705. }
  706. }
  707. }
  708. */
  709. //
  710. // Sift through the statistical evidence.
  711. //
  712. if (LoDiff < 127 && HiDiff == 0)
  713. {
  714. iResult |= IS_TEXT_UNICODE_ASCII16; /* likely 16-bit ASCII */
  715. }
  716. if (HiDiff && LoDiff == 0)
  717. {
  718. iResult |= IS_TEXT_UNICODE_REVERSE_ASCII16; /* reverse 16-bit ASCII */
  719. }
  720. //
  721. // Use leadbyte info to weight statistics.
  722. //
  723. if (!(g_mcs > 1) || cLeadByte == 0 ||
  724. !Result || !(*Result & IS_TEXT_UNICODE_DBCS_LEADBYTE))
  725. {
  726. iHi = 3;
  727. }
  728. else
  729. {
  730. //
  731. // A ratio of cLeadByte:cb of 1:2 ==> dbcs
  732. // Very crude - should have a nice eq.
  733. //
  734. iHi = __min(256, Size / sizeof(WCHAR)) / 2;
  735. if (cLeadByte < (iHi - 1) / 3)
  736. {
  737. iHi = 3;
  738. }
  739. else if (cLeadByte < (2 * (iHi - 1)) / 3)
  740. {
  741. iHi = 2;
  742. }
  743. else
  744. {
  745. iHi = 1;
  746. }
  747. iResult |= IS_TEXT_UNICODE_DBCS_LEADBYTE;
  748. }
  749. if (iHi * HiDiff < LoDiff)
  750. {
  751. iResult |= IS_TEXT_UNICODE_STATISTICS;
  752. }
  753. if (iHi * LoDiff < HiDiff)
  754. {
  755. iResult |= IS_TEXT_UNICODE_REVERSE_STATISTICS;
  756. }
  757. //
  758. // Any control codes widened to 16 bits? Any Unicode character
  759. // which contain one byte in the control code range?
  760. //
  761. if (iCR + iLF + iTAB + iSPACE + iCJK_SPACE /*+iPS+iLS*/)
  762. {
  763. iResult |= IS_TEXT_UNICODE_CONTROLS;
  764. }
  765. if (iR_LF + iR_CR + iR_TAB + iR_SPACE)
  766. {
  767. iResult |= IS_TEXT_UNICODE_REVERSE_CONTROLS;
  768. }
  769. //
  770. // Any characters that are illegal for Unicode?
  771. //
  772. if ((iRBOM + iFFFF + iUNULL + iCRLF) != 0 ||
  773. (cWeird != 0 && cWeird >= iMaxTmp/40))
  774. {
  775. iResult |= IS_TEXT_UNICODE_ILLEGAL_CHARS;
  776. }
  777. //
  778. // Odd buffer length cannot be Unicode.
  779. //
  780. if (Size & 1)
  781. {
  782. iResult |= IS_TEXT_UNICODE_ODD_LENGTH;
  783. }
  784. //
  785. // Any NULL bytes? (Illegal in ANSI)
  786. //
  787. if (iNull)
  788. {
  789. iResult |= IS_TEXT_UNICODE_NULL_BYTES;
  790. }
  791. //
  792. // POSITIVE evidence, BOM or RBOM used as signature.
  793. //
  794. if (*lpBuff == BYTE_ORDER_MARK)
  795. {
  796. iResult |= IS_TEXT_UNICODE_SIGNATURE;
  797. }
  798. else if (*lpBuff == REVERSE_BYTE_ORDER_MARK)
  799. {
  800. iResult |= IS_TEXT_UNICODE_REVERSE_SIGNATURE;
  801. }
  802. //
  803. // Limit to desired categories if requested.
  804. //
  805. if (Result)
  806. {
  807. iResult &= *Result;
  808. *Result = iResult;
  809. }
  810. //
  811. // There are four separate conclusions:
  812. //
  813. // 1: The file APPEARS to be Unicode AU
  814. // 2: The file CANNOT be Unicode CU
  815. // 3: The file CANNOT be ANSI CA
  816. //
  817. //
  818. // This gives the following possible results
  819. //
  820. // CU
  821. // + -
  822. //
  823. // AU AU
  824. // + - + -
  825. // -------- --------
  826. // CA +| 0 0 2 3
  827. // |
  828. // -| 1 1 4 5
  829. //
  830. //
  831. // Note that there are only 6 really different cases, not 8.
  832. //
  833. // 0 - This must be a binary file
  834. // 1 - ANSI file
  835. // 2 - Unicode file (High probability)
  836. // 3 - Unicode file (more than 50% chance)
  837. // 5 - No evidence for Unicode (ANSI is default)
  838. //
  839. // The whole thing is more complicated if we allow the assumption
  840. // of reverse polarity input. At this point we have a simplistic
  841. // model: some of the reverse Unicode evidence is very strong,
  842. // we ignore most weak evidence except statistics. If this kind of
  843. // strong evidence is found together with Unicode evidence, it means
  844. // its likely NOT Text at all. Furthermore if a REVERSE_BYTE_ORDER_MARK
  845. // is found, it precludes normal Unicode. If both byte order marks are
  846. // found it's not Unicode.
  847. //
  848. //
  849. // Unicode signature : uncontested signature outweighs reverse evidence.
  850. //
  851. if ((iResult & IS_TEXT_UNICODE_SIGNATURE) &&
  852. !(iResult & (IS_TEXT_UNICODE_NOT_UNICODE_MASK&(~IS_TEXT_UNICODE_DBCS_LEADBYTE))))
  853. {
  854. return (TRUE);
  855. }
  856. //
  857. // If we have conflicting evidence, it's not Unicode.
  858. //
  859. if (iResult & IS_TEXT_UNICODE_REVERSE_MASK)
  860. {
  861. return (FALSE);
  862. }
  863. //
  864. // Statistical and other results (cases 2 and 3).
  865. //
  866. if (!(iResult & IS_TEXT_UNICODE_NOT_UNICODE_MASK) &&
  867. ((iResult & IS_TEXT_UNICODE_NOT_ASCII_MASK) ||
  868. (iResult & IS_TEXT_UNICODE_UNICODE_MASK)))
  869. {
  870. return (TRUE);
  871. }
  872. return (FALSE);
  873. }