Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

351 lines
8.3 KiB

  1. /////////////////////////////////////////////////////////////////////////////
  2. //
  3. // common.h
  4. //
  5. // Created by JOEM 02-2000
  6. // Copyright (C) 2000 Microsoft Corporation
  7. // All Rights Reserved
  8. //
  9. //////////////////////////////////////////////////////////// JOEM 02-2000 //
  10. #ifndef _COMMON_H_
  11. #define _COMMON_H_
  12. #include <spddkhlp.h>
  13. #include <spcollec.h>
  14. #include <stdio.h>
  15. #ifdef _WIN32
  16. #include <wchar.h>
  17. #include <windows.h>
  18. #else
  19. #define OutputDebugStringW puts
  20. #endif
  21. #define MAX_LINE 256
  22. #define SkipWhiteSpace(ptr) while (*(ptr) && isspace(*(ptr))) {(ptr)++;}
  23. #define SkipNonWhiteSpace(ptr) while (*(ptr) && !isspace(*(ptr))) {(ptr)++;}
  24. #define StringUpperCase(ptr) { char8* str = (ptr); while (*str) {*str = (char)toupper (*str); str++;} }
  25. #define StringLowerCase(ptr) { char8* str = (ptr); while (*str) {*str = (char)tolower (*str); str++;} }
  26. #define WSkipWhiteSpace(ptr) while (*(ptr) && iswspace(*(ptr))) {(ptr)++;}
  27. #define WSkipNonWhiteSpace(ptr) while (*(ptr) && !iswspace(*(ptr))) {(ptr)++;}
  28. #define WStringUpperCase(ptr) { WCHAR* str = (ptr); while (*str) {*str = towupper (*str); str++;} }
  29. #define WStringLowerCase(ptr) { WCHAR* str = (ptr); while (*str) {*str = towlower (*str); str++;} }
  30. enum PUNC
  31. {
  32. KEEP_PUNCTUATION = 0,
  33. REMOVE_PUNCTUATION = 1
  34. };
  35. //////////////////////////////////////////////////////////////////////
  36. //
  37. // CDynStrArray is a helper class for a hash that contains an array
  38. // of strings for each hash value
  39. //
  40. /////////////////////////////////////////////////////// JOEM 7-2000 //
  41. class CDynStr
  42. {
  43. public:
  44. CDynStr() {}
  45. CDynStr(const WCHAR* sz) { dstr = sz; }
  46. CDynStr(const CSpDynamicString& inDstr) { dstr = inDstr; }
  47. ~CDynStr() { dstr.Clear(); }
  48. public:
  49. CSpDynamicString dstr;
  50. };
  51. class CDynStrArray : public IUnknown
  52. {
  53. public:
  54. CDynStrArray() {}
  55. ~CDynStrArray()
  56. {
  57. for ( int i=0; i<m_aDstr.GetSize(); i++ )
  58. {
  59. m_aDstr[i].dstr.Clear();
  60. }
  61. }
  62. STDMETHOD(QueryInterface)(const IID& iid, void** ppv) { return S_OK; }
  63. STDMETHOD_(ULONG, AddRef)() { return 0; }
  64. STDMETHOD_(ULONG, Release)() { delete this; return 0; }
  65. public:
  66. CSPArray<CDynStr,CDynStr> m_aDstr;
  67. };
  68. //////////////////////////////////////////////////////////////////////
  69. // CountWords
  70. //
  71. /////////////////////////////////////////////////////// JOEM 7-2000 //
  72. inline USHORT CountWords (const WCHAR* pszText)
  73. {
  74. int wordNum = 0;
  75. SPDBG_ASSERT (pszText);
  76. while (*pszText)
  77. {
  78. WSkipWhiteSpace(pszText);
  79. if (!*pszText)
  80. {
  81. break;
  82. }
  83. WSkipNonWhiteSpace(pszText);
  84. wordNum++;
  85. }
  86. return (USHORT) wordNum;
  87. }
  88. //////////////////////////////////////////////////////////////////////
  89. // SplitWords
  90. //
  91. // Text will be modified, wordList will be allocated with
  92. // wordCount WCHAR*
  93. //
  94. /////////////////////////////////////////////////////// JOEM 7-2000 //
  95. inline HRESULT SplitWords (WCHAR* text, WCHAR*** wordList, USHORT* wordCount)
  96. {
  97. SPDBG_FUNC( "SplitWords" );
  98. HRESULT hr = S_OK;
  99. ULONG i = 0;
  100. SPDBG_ASSERT (text);
  101. SPDBG_ASSERT (wordList);
  102. SPDBG_ASSERT (wordCount);
  103. *wordCount = CountWords (text);
  104. *wordList = (WCHAR**) calloc (*wordCount, sizeof(**wordList));
  105. if ( !*wordList )
  106. {
  107. hr = E_OUTOFMEMORY;
  108. }
  109. if ( SUCCEEDED(hr) )
  110. {
  111. for (i=0; i<*wordCount; i++)
  112. {
  113. WSkipWhiteSpace (text);
  114. (*wordList)[i] = text;
  115. WSkipNonWhiteSpace (text);
  116. *text++ = L'\0';
  117. }
  118. }
  119. SPDBG_REPORT_ON_FAIL( hr );
  120. return hr;
  121. }
  122. //////////////////////////////////////////////////////////////////////
  123. // RemovePunctuation
  124. //
  125. // Text will be modified
  126. //
  127. /////////////////////////////////////////////////////// JOEM 8-2000 //
  128. inline HRESULT RemovePunctuation (WCHAR** wordList, USHORT* wordCount)
  129. {
  130. SPDBG_FUNC( "RemovePunctuation" );
  131. HRESULT hr = S_OK;
  132. WCHAR* pszWord = NULL;
  133. WCHAR* psz = NULL;
  134. USHORT i = 0;
  135. USHORT nextItem = 0;
  136. USHORT numSkipped = 0;
  137. SPDBG_ASSERT (wordList);
  138. SPDBG_ASSERT (wordCount);
  139. for ( i=0; i<*wordCount; i++ )
  140. {
  141. // If the first char is ' or " or ` then get rid of it
  142. if ( !wcscspn(wordList[i], L"\"'`") )
  143. {
  144. psz = wordList[i]+1;
  145. wcscpy(wordList[i], psz);
  146. psz = NULL;
  147. }
  148. // If the last char is ' or " or ` or one of these .,;:?! then get rid of it.
  149. // psz points to the last char
  150. psz = wordList[i] + wcslen(wordList[i]) - 1;
  151. if ( !wcscspn(psz, L"\"'`.,;:?!") )
  152. {
  153. psz[0] = L'\0';
  154. }
  155. }
  156. // reposition the list items, skipping empty strings
  157. for ( i=0; i<*wordCount; i++ )
  158. {
  159. if ( !wcslen(wordList[i]) )
  160. {
  161. nextItem = i+1;
  162. while ( nextItem < *wordCount && !wcslen(wordList[nextItem]) )
  163. {
  164. nextItem++;
  165. }
  166. if ( nextItem < *wordCount )
  167. {
  168. wordList[i] = wordList[nextItem];
  169. wordList[nextItem] = L"";
  170. }
  171. else
  172. {
  173. break; // out of items
  174. }
  175. }
  176. }
  177. *wordCount = i;
  178. SPDBG_REPORT_ON_FAIL( hr );
  179. return hr;
  180. }
  181. //////////////////////////////////////////////////////////////////////
  182. // AssembleText
  183. //
  184. // ppszText will be allocated.
  185. //
  186. /////////////////////////////////////////////////////// JOEM 7-2000 //
  187. inline HRESULT AssembleText(const int iStartWord, const int iEndWord, WCHAR** ppszWordList, WCHAR** ppszText)
  188. {
  189. SPDBG_FUNC( "AssembleText" );
  190. HRESULT hr = S_OK;
  191. int i = 0;
  192. int iStrLen = 0;
  193. for ( i=iStartWord; i<=iEndWord; i++ )
  194. {
  195. iStrLen += wcslen(ppszWordList[i]) + 1;
  196. }
  197. if ( iStrLen )
  198. {
  199. *ppszText = new WCHAR[iStrLen];
  200. if ( !*ppszText )
  201. {
  202. hr = E_OUTOFMEMORY;
  203. }
  204. else
  205. {
  206. (*ppszText)[0] = L'\0';
  207. for ( i=iStartWord; i<=iEndWord; i++ )
  208. {
  209. wcscat(*ppszText, ppszWordList[i]);
  210. if ( i < iEndWord )
  211. {
  212. wcscat(*ppszText, L" ");
  213. }
  214. }
  215. WStringUpperCase(*ppszText);
  216. }
  217. }
  218. SPDBG_REPORT_ON_FAIL( hr );
  219. return hr;
  220. }
  221. //////////////////////////////////////////////////////////////////////
  222. // RegularizeText
  223. //
  224. // Regularizes whitespace, optionally removing punctuation.
  225. //
  226. /////////////////////////////////////////////////////// JOEM 7-2000 //
  227. inline HRESULT RegularizeText(WCHAR* pszText, PUNC removePunc)
  228. {
  229. SPDBG_FUNC( "RegularizeText" );
  230. HRESULT hr = S_OK;
  231. WCHAR** wordList = NULL;
  232. WCHAR* pszNewText = NULL;
  233. USHORT wordCount = 0;
  234. if ( !pszText )
  235. {
  236. hr = E_INVALIDARG;
  237. }
  238. if ( SUCCEEDED(hr) )
  239. {
  240. hr = SplitWords (pszText, &wordList, &wordCount);
  241. }
  242. if ( SUCCEEDED(hr) && removePunc )
  243. {
  244. hr = RemovePunctuation(wordList, &wordCount);
  245. }
  246. if ( SUCCEEDED(hr) )
  247. {
  248. hr = AssembleText(0, wordCount-1, wordList, &pszNewText);
  249. }
  250. if ( SUCCEEDED(hr) && pszNewText )
  251. {
  252. WStringUpperCase(pszNewText);
  253. wcscpy(pszText, pszNewText);
  254. }
  255. if ( pszNewText )
  256. {
  257. delete [] pszNewText;
  258. pszNewText = NULL;
  259. }
  260. if ( wordList )
  261. {
  262. free(wordList);
  263. }
  264. SPDBG_REPORT_ON_FAIL( hr );
  265. return hr;
  266. }
  267. //////////////////////////////////////////////////////////////////////
  268. // FindUnicodeControlChar
  269. //
  270. /////////////////////////////////////////////////////// JOEM 7-2000 //
  271. inline WCHAR* FindUnicodeControlChar (WCHAR* pszText)
  272. {
  273. ULONG i = 0;
  274. while ( i<wcslen(pszText) && !iswcntrl(pszText[i]) )
  275. {
  276. i++;
  277. }
  278. if ( i == wcslen(pszText) )
  279. {
  280. return NULL;
  281. }
  282. else
  283. {
  284. return &pszText[i];
  285. }
  286. }
  287. //////////////////////////////////////////////////////////////////////
  288. // FileExist
  289. //
  290. //
  291. /////////////////////////////////////////////////////// JOEM 3-2000 //
  292. inline bool FileExist(const WCHAR *pszName)
  293. {
  294. SPDBG_FUNC( "FileExist" );
  295. FILE* fp;
  296. if ( !pszName || !wcslen(pszName) || ( (fp = _wfopen(pszName, L"r")) == NULL ) )
  297. {
  298. return false;
  299. }
  300. fclose (fp);
  301. return true;
  302. }
  303. #endif