Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

122 lines
5.4 KiB

  1. /*****************************************************************************\
  2. FILE: encoding.h
  3. DESCRIPTION:
  4. Handle taking internet strings by detecting if they are UTF-8 encoded
  5. or DBCS and finding out what code page was used.
  6. \*****************************************************************************/
  7. #ifndef _STRENCODE_H
  8. #define _STRENCODE_H
  9. #include <mlang.h>
  10. // Turned off until MLANG can successfully detect short strings.
  11. // TODO: We also need to fix ftpfolder when it doesn't have a pidl
  12. // but still needs the site's CWireEncoding state
  13. //#define FEATURE_CP_AUTODETECT
  14. // FTP_FIND_DATA is different than WIN32_FIND_DATA because
  15. // the .cFileName is in WIRECHAR instead of CHAR
  16. #define FTP_FIND_DATA WIN32_FIND_DATAA
  17. #define LPFTP_FIND_DATA LPWIN32_FIND_DATAA
  18. // WIRESTR stands for WireBites which could be DBCS/MBCS or UTF-8
  19. #define WIRECHAR CHAR
  20. #define LPCWIRESTR LPCSTR
  21. #define LPWIRESTR LPSTR
  22. BOOL SHIsUTF8Encoded(LPCWIRESTR pszIsUTF8);
  23. /*****************************************************************************\
  24. CLASS: CMultiLanguageCache
  25. DESCRIPTION:
  26. We can't cache the IMultiLanguage2 * across threads, but we do need to
  27. cache it when we are in a loop because we don't want to keep calling
  28. CoCreateInstance.
  29. \*****************************************************************************/
  30. class CMultiLanguageCache
  31. {
  32. public:
  33. CMultiLanguageCache(void) {m_pml2 = NULL;};
  34. ~CMultiLanguageCache(void) {ATOMICRELEASE(m_pml2);};
  35. IMultiLanguage2 * GetIMultiLanguage2(void) {EVAL(SUCCEEDED(_Init())); return m_pml2;};
  36. private:
  37. // Private member variables
  38. IMultiLanguage2 * m_pml2;
  39. // Private member functions
  40. HRESULT _Init(void);
  41. };
  42. // dwFlags for WireBytesToUnicode() & UnicodeToWireBytes()
  43. #define WIREENC_NONE 0x00000000 // None
  44. #define WIREENC_USE_UTF8 0x00000001 // Prefer UTF-8 because this is a new file. For UnicodeToWireBytes() only.
  45. #define WIREENC_IMPROVE_ACCURACY 0x00000002 // Detect the accuracy. For WireBytesToUnicode() only.
  46. #define DETECT_CONFIDENCE 75 // We want to be this confident.
  47. /*****************************************************************************\
  48. CLASS: CWireEncoding
  49. DESCRIPTION:
  50. 2.1.1 No Data Loss Support (UTF-8)
  51. Server: The server is required to support the FEAT FTP command (rfc2389 http://www.cis.ohio-state.edu/htbin/rfc/rfc2389.html) and the "utf8" feature (http://w3.hethmon.com/ftpext/drafts/draft-ietf-ftpext-intl-ftp-04.txt). If the client sends the server the "utf8" command, the server then needs to accept and return UTF-8 encoded filenames. It's not known when IIS will support this but it won't be supported in the version that ships with Windows 2000.
  52. Network Client (wininet): Wininet needs to respect the unicode filepaths in the FtpGetFileEx() and FtpPutFileEx() APIs. This won't be supported in IE 5.
  53. UI Client (msieftp): It's necessary to see if the server supports the "utf8" command via the FEAT command. If the command is supported, it should be sent to the server and all future strings will be UTF-8 encoded. This should be supported in IE 5 if there is enough time in the schedule.
  54. 2.1.0 Data Loss Backward Compat (DBCS)
  55. MSIEFTP will only support DBCS if and only if the code page on the client matches the server's code page and all ftp directories and filenames used. In future versions I may attempt to sniff the code page.
  56. IMultiLanguage2::DetectCodepage(MLDETECTCP_8BIT, 0, psz, NULL, &DetectEncodingInfo, ARRAYSIZE(DetectEncodingInfo))
  57. MLDETECTCP_8BIT, MLDETECTCP_DBCS, MLCONVCHARF_AUTODETECT
  58. DetectEncodingInfo.nCodePage (IMultiLanguage2::DetectCodepage)
  59. CP_1252: This is english/french/german and the most common.
  60. CP_JPN_SJ: Most common Japanese
  61. CP_CYRILLIC_AUTO = 51251L,
  62. CP_GREEK_AUTO = 51253L,
  63. CP_ARABIC_AUTO = 51256L,
  64. CP_1251 = 1251L: Lucian
  65. \*****************************************************************************/
  66. class CWireEncoding
  67. {
  68. public:
  69. CWireEncoding(void);
  70. ~CWireEncoding(void);
  71. HRESULT WireBytesToUnicode(CMultiLanguageCache * pmlc, LPCWIRESTR pwStr, DWORD dwFlags, LPWSTR pwzDest, DWORD cchSize);
  72. HRESULT UnicodeToWireBytes(CMultiLanguageCache * pmlc, LPCWSTR pwzStr, DWORD dwFlags, LPWIRESTR pwbDest, DWORD cchSize);
  73. HRESULT ReSetCodePages(CMultiLanguageCache * pmlc, CFtpPidlList * pFtpPidlList);
  74. HRESULT CreateFtpItemID(CMultiLanguageCache * pmlc, LPFTP_FIND_DATA pwfd, LPITEMIDLIST * ppidl);
  75. HRESULT ChangeFtpItemIDName(CMultiLanguageCache * pmlc, LPCITEMIDLIST pidlBefore, LPCWSTR pwzNewName, BOOL fUTF8, LPITEMIDLIST * ppidlAfter);
  76. UINT GetCodePage(void) {return m_uiCodePage;};
  77. INT GetConfidence(void) {return m_nConfidence;};
  78. BOOL IsUTF8Supported(void) {return m_fUseUTF8;};
  79. void SetUTF8Support(BOOL fIsUTF8Supported) {m_fUseUTF8 = fIsUTF8Supported;};
  80. private:
  81. // Private member variables
  82. INT m_nConfidence; // How accurate is our guess at m_uiCodePage.
  83. UINT m_uiCodePage; // The code page we guess this to be.
  84. DWORD m_dwMode; // State used by IMultiLanguage2's ::ConvertStringFromUnicode
  85. BOOL m_fUseUTF8; //
  86. // Private member functions
  87. void _ImproveAccuracy(CMultiLanguageCache * pmlc, LPCWIRESTR pwStr, BOOL fUpdateCP, UINT * puiCodePath);
  88. };
  89. #endif // _STRENCODE_H