Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1013 lines
23 KiB

  1. //////////////////////////////////////////////////////////////////////
  2. // File: stressTest.cpp
  3. //
  4. // Copyright (c) 2001 Microsoft Corporation. All Rights Reserved.
  5. //
  6. // Purpose:
  7. // <Description>
  8. //
  9. // History:
  10. // 05/24/2001 pmidge Created
  11. //
  12. //////////////////////////////////////////////////////////////////////
  13. #include "crawler.h"
  14. LPSTR g_szStressTestName = "AsyncWebCrawler";
  15. LPWSTR g_szDictPath = L"http://mildew/stress/xmldict/5000.xml";
  16. HINTERNET g_hSession = NULL;
  17. PXMLDICT g_pDictionary = NULL;
  18. HANDLE g_hIOCP = NULL;
  19. HANDLE g_evtMoreUrls = NULL;
  20. HANDLE g_evtQuit = NULL;
  21. HANDLE g_arThreads[WORKER_THREADS];
  22. LONG g_lRefCount = 0L;
  23. LONG g_lUrlObjsAlloc = 0L;
  24. LONG g_lUrlObjsFreed = 0L;
  25. DWORD WINAPI WorkerThread(LPVOID pv);
  26. #define CALLBACK_FLAGS ( WINHTTP_CALLBACK_STATUS_SENDREQUEST_COMPLETE \
  27. | WINHTTP_CALLBACK_STATUS_REDIRECT \
  28. | WINHTTP_CALLBACK_STATUS_REQUEST_ERROR \
  29. | WINHTTP_CALLBACK_STATUS_HEADERS_AVAILABLE \
  30. | WINHTTP_CALLBACK_STATUS_DATA_AVAILABLE \
  31. | WINHTTP_CALLBACK_STATUS_READ_COMPLETE \
  32. | WINHTTP_CALLBACK_STATUS_HANDLE_CLOSING \
  33. | WINHTTP_CALLBACK_STATUS_CONNECTION_CLOSED )
  34. void
  35. AddRef(void)
  36. {
  37. InterlockedIncrement(&g_lRefCount);
  38. }
  39. void
  40. Release(void)
  41. {
  42. InterlockedDecrement(&g_lRefCount);
  43. if( g_lRefCount == 0 )
  44. SetEvent(g_evtQuit);
  45. }
  46. class Url
  47. {
  48. public:
  49. Url(LPSTR host, LPSTR object, USHORT port)
  50. {
  51. this->host = __ansitowide(host);
  52. this->object = __ansitowide(object);
  53. this->port = port;
  54. connect = NULL;
  55. request = NULL;
  56. bytes = 0L;
  57. read = 0L;
  58. closed = FALSE;
  59. buffer = NULL;
  60. qda = FALSE;
  61. pending = FALSE;
  62. InterlockedIncrement(&g_lUrlObjsAlloc);
  63. }
  64. ~Url()
  65. {
  66. if( !HandlesClosed() )
  67. {
  68. if( connect )
  69. {
  70. WinHttpSetStatusCallback(
  71. connect,
  72. NULL,
  73. CALLBACK_FLAGS,
  74. NULL
  75. );
  76. }
  77. if( request )
  78. {
  79. WinHttpSetStatusCallback(
  80. request,
  81. NULL,
  82. CALLBACK_FLAGS,
  83. NULL
  84. );
  85. }
  86. CloseHandles();
  87. }
  88. if( connect )
  89. Release();
  90. if( request )
  91. Release();
  92. delete [] host;
  93. delete [] object;
  94. if( buffer )
  95. delete [] buffer;
  96. InterlockedIncrement(&g_lUrlObjsFreed);
  97. }
  98. public:
  99. LPWSTR Host(void) { return host; }
  100. LPWSTR Object(void) { return object; }
  101. USHORT Port(void) { return port; }
  102. void Connect(HINTERNET hConnect) { AddRef(); connect = hConnect; }
  103. void Request(HINTERNET hRequest) { AddRef(); request = hRequest; }
  104. HINTERNET Connect(void) { return connect; }
  105. HINTERNET Request(void) { return request; }
  106. void Read(DWORD cbData);
  107. void CloseHandles(void) { closed=TRUE; WinHttpCloseHandle(request); WinHttpCloseHandle(connect); }
  108. BOOL HandlesClosed(void) { return closed; }
  109. BOOL IsConnect(HINTERNET hInternet) { return (hInternet == connect); }
  110. BOOL IsPending(void) { return pending; }
  111. private:
  112. LPWSTR host;
  113. LPWSTR object;
  114. USHORT port;
  115. HINTERNET connect;
  116. HINTERNET request;
  117. LPBYTE buffer;
  118. DWORD bytes;
  119. DWORD read;
  120. BOOL qda;
  121. BOOL pending;
  122. BOOL closed;
  123. };
  124. typedef class Url URL;
  125. typedef class Url* PURL;
  126. BOOL Initialize(void);
  127. void Cleanup(void);
  128. BOOL NavigateAsync(PURL pUrl);
  129. void DumpHeaders(PURL pUrl);
  130. DWORD GetContentLength(PURL pUrl);
  131. // main function
  132. BOOL
  133. WinHttp_StressTest()
  134. {
  135. BOOL bContinueStress = TRUE;
  136. BOOL bContinue = TRUE;
  137. BSTR bsWord = NULL;
  138. PURL pUrl = NULL;
  139. CHAR url[MAX_PATH];
  140. if( !Initialize() )
  141. {
  142. LogText("[tid=%#0.8x] failed to initialize, exiting", GetCurrentThreadId());
  143. bContinueStress = FALSE;
  144. goto exit;
  145. }
  146. while( bContinue && !IsTimeToExitStress() )
  147. {
  148. LogText("[tid=%#0.8x] processing urls...", GetCurrentThreadId());
  149. for(int n=0; n < 100; n++)
  150. {
  151. if( bsWord = g_pDictionary->GetWord() )
  152. {
  153. wsprintf(url, "www.%S.com", bsWord);
  154. if( pUrl = new URL(url, "/", 80) )
  155. PostQueuedCompletionStatus(g_hIOCP, 0L, (ULONG_PTR) pUrl, NULL);
  156. /*
  157. if( pUrl = new URL(url, "/", 443) )
  158. PostQueuedCompletionStatus(g_hIOCP, 0L, (ULONG_PTR) pUrl, NULL);
  159. */
  160. SysFreeString(bsWord);
  161. }
  162. else
  163. {
  164. LogText("[tid=%#0.8x] urls exhausted, signaling workers to exit", GetCurrentThreadId());
  165. //bContinueStress = FALSE; // DEBUGONLY
  166. bContinue = FALSE;
  167. break;
  168. }
  169. }
  170. WaitForSingleObject(g_evtMoreUrls, 120000);
  171. LogText("[tid=%#0.8x] url object stats: alloc=%d; freed=%d", GetCurrentThreadId(), g_lUrlObjsAlloc, g_lUrlObjsFreed);
  172. }
  173. //
  174. // post quit messages and wait
  175. //
  176. LogText("[tid=%#0.8x] waiting for threads to exit...", GetCurrentThreadId());
  177. for(int n=0; n < WORKER_THREADS; n++)
  178. {
  179. PostQueuedCompletionStatus(g_hIOCP, 0L, CK_QUIT_THREAD, NULL);
  180. }
  181. WaitForMultipleObjects(WORKER_THREADS, g_arThreads, TRUE, INFINITE);
  182. for(int n=0; n < WORKER_THREADS; n++)
  183. {
  184. CloseHandle(g_arThreads[n]);
  185. g_arThreads[n] = NULL;
  186. }
  187. while( g_lRefCount > 0 )
  188. {
  189. LogText("[tid=%#0.8x] waiting for %d internet handles...", GetCurrentThreadId(), g_lRefCount);
  190. WaitForSingleObject(g_evtQuit, 5000);
  191. }
  192. exit:
  193. Cleanup();
  194. LogText("[tid=%#0.8x] final url object stats: alloc=%d; freed=%d", GetCurrentThreadId(), g_lUrlObjsAlloc, g_lUrlObjsFreed);
  195. return bContinueStress;
  196. }
  197. DWORD
  198. WINAPI
  199. WorkerThread(LPVOID pv)
  200. {
  201. DWORD bytes = 0L;
  202. ULONG_PTR key = 0L;
  203. LPOVERLAPPED lpo = NULL;
  204. PURL pUrl = NULL;
  205. BOOL bQuit = FALSE;
  206. while( !bQuit )
  207. {
  208. if( !GetQueuedCompletionStatus(g_hIOCP, &bytes, &key, &lpo, 7000) )
  209. {
  210. if( GetLastError() == WAIT_TIMEOUT )
  211. {
  212. SetEvent(g_evtMoreUrls);
  213. continue;
  214. }
  215. }
  216. else
  217. {
  218. switch( key )
  219. {
  220. case CK_QUIT_THREAD :
  221. {
  222. bQuit = TRUE;
  223. }
  224. break;
  225. case NULL :
  226. {
  227. LogText("[tid=%#0.8x] ERROR! NULL pUrl dequeued!", GetCurrentThreadId());
  228. }
  229. break;
  230. default :
  231. {
  232. pUrl = (PURL) key;
  233. NavigateAsync(pUrl);
  234. }
  235. break;
  236. }
  237. }
  238. }
  239. LogText("[tid=%#0.8x] exiting", GetCurrentThreadId());
  240. return 1L;
  241. }
  242. BOOL
  243. NavigateAsync(PURL pUrl)
  244. {
  245. BOOL bRet = FALSE;
  246. DWORD dwError = ERROR_SUCCESS;
  247. HINTERNET hConnect = NULL;
  248. HINTERNET hRequest = NULL;
  249. LPCWSTR arAcceptTypes[] = {L"*/*",L"image/*",L"text/*",NULL};
  250. //-------------------------------------------------------------------------------------
  251. // open connect handle
  252. //-------------------------------------------------------------------------------------
  253. hConnect = WinHttpConnect(
  254. g_hSession,
  255. pUrl->Host(),
  256. pUrl->Port(),
  257. 0L
  258. );
  259. if( hConnect )
  260. {
  261. pUrl->Connect(hConnect);
  262. }
  263. else
  264. {
  265. dwError = GetLastError();
  266. LogText(
  267. "[tid=%#0.8x] WinHttpConnect failed for servername %S, error %d [%s]",
  268. GetCurrentThreadId(),
  269. pUrl->Host(),
  270. dwError,
  271. MapErrorToString(dwError)
  272. );
  273. goto quit;
  274. }
  275. //-------------------------------------------------------------------------------------
  276. // set the callback
  277. //-------------------------------------------------------------------------------------
  278. WinHttpSetStatusCallback(
  279. pUrl->Connect(),
  280. MyStatusCallback,
  281. CALLBACK_FLAGS,
  282. NULL
  283. );
  284. //-------------------------------------------------------------------------------------
  285. // open request handle
  286. //-------------------------------------------------------------------------------------
  287. hRequest = WinHttpOpenRequest(
  288. pUrl->Connect(),
  289. L"GET",
  290. pUrl->Object(),
  291. NULL,
  292. NULL,
  293. arAcceptTypes,
  294. ((pUrl->Port() == 80) ? 0L : WINHTTP_FLAG_SECURE)
  295. );
  296. if( hRequest )
  297. {
  298. pUrl->Request(hRequest);
  299. }
  300. else
  301. {
  302. dwError = GetLastError();
  303. LogText(
  304. "[tid=%#0.8x] WinHttpOpenRequest failed for %S, error %d [%s]",
  305. GetCurrentThreadId(),
  306. pUrl->Object(),
  307. dwError,
  308. MapErrorToString(dwError)
  309. );
  310. goto quit;
  311. }
  312. //-------------------------------------------------------------------------------------
  313. // send the request - this is the first opportunity for a call to go async
  314. //-------------------------------------------------------------------------------------
  315. if( !WinHttpSendRequest(pUrl->Request(), NULL, 0L, NULL, 0L, 0L, (DWORD_PTR) pUrl) )
  316. {
  317. dwError = GetLastError();
  318. if( dwError == ERROR_IO_PENDING )
  319. {
  320. #if 0
  321. LogText(
  322. "[tid=%#0.8x; con=%#0.8x; req=%#0.8x] %s://%S%S request went async...",
  323. GetCurrentThreadId(),
  324. hConnect,
  325. hRequest,
  326. ((pUrl->Port() == 80) ? "http" : "https"),
  327. pUrl->Host(),
  328. pUrl->Object()
  329. );
  330. #endif
  331. }
  332. else
  333. {
  334. LogText(
  335. "[tid=%#0.8x; con=%#0.8x; req=%#0.8x] %s://%S%S request failed: %d [%s]!",
  336. GetCurrentThreadId(),
  337. hConnect,
  338. hRequest,
  339. ((pUrl->Port() == 80) ? "http" : "https"),
  340. pUrl->Host(),
  341. pUrl->Object(),
  342. dwError,
  343. MapErrorToString(dwError)
  344. );
  345. goto quit;
  346. }
  347. }
  348. else
  349. {
  350. LogText("[tid=%#0.8x] ERROR! WinHttpSendRequest returned TRUE in async mode!!!", GetCurrentThreadId());
  351. goto quit;
  352. }
  353. //-------------------------------------------------------------------------------------
  354. // if we get here, we've succeeded in our mission, set exit code to true
  355. //-------------------------------------------------------------------------------------
  356. bRet = TRUE;
  357. quit:
  358. //-------------------------------------------------------------------------------------
  359. // handle errors and exit
  360. //-------------------------------------------------------------------------------------
  361. if( !bRet )
  362. delete pUrl;
  363. return bRet;
  364. }
  365. VOID
  366. CALLBACK
  367. MyStatusCallback(
  368. HINTERNET hInternet,
  369. DWORD_PTR dwContext,
  370. DWORD dwInternetStatus,
  371. LPVOID lpvStatusInformation,
  372. DWORD dwStatusInformationLength
  373. )
  374. {
  375. PURL pUrl = (PURL) dwContext;
  376. #if 0
  377. LogText(
  378. "[tid=%#0.8x; con=%#0.8x; req=%#0.8x] %s://%S%S in %s",
  379. GetCurrentThreadId(),
  380. pUrl->Connect(),
  381. pUrl->Request(),
  382. ((pUrl->Port() == 80) ? "http" : "https"),
  383. pUrl->Host(),
  384. pUrl->Object(),
  385. MapCallbackToString(dwInternetStatus)
  386. );
  387. #endif
  388. switch(dwInternetStatus)
  389. {
  390. case WINHTTP_CALLBACK_STATUS_SENDREQUEST_COMPLETE :
  391. {
  392. //
  393. // a WHSR call is completing
  394. //
  395. WinHttpReceiveResponse(pUrl->Request(), NULL);
  396. }
  397. break;
  398. case WINHTTP_CALLBACK_STATUS_REDIRECT :
  399. {
  400. pUrl->CloseHandles();
  401. }
  402. break;
  403. case WINHTTP_CALLBACK_STATUS_HEADERS_AVAILABLE :
  404. {
  405. //
  406. // a WHRR call is completing
  407. //
  408. pUrl->Read(GetContentLength(pUrl));
  409. //pUrl->Read(0);
  410. }
  411. break;
  412. case WINHTTP_CALLBACK_STATUS_DATA_AVAILABLE :
  413. {
  414. //
  415. // a WHQDA call is completing
  416. //
  417. pUrl->Read(dwStatusInformationLength);
  418. }
  419. break;
  420. case WINHTTP_CALLBACK_STATUS_READ_COMPLETE :
  421. {
  422. //
  423. // a WHRD call is completing
  424. //
  425. #if 0
  426. DataDump((LPBYTE) lpvStatusInformation, dwStatusInformationLength);
  427. #endif
  428. pUrl->Read(dwStatusInformationLength);
  429. }
  430. break;
  431. case WINHTTP_CALLBACK_STATUS_CONNECTION_CLOSED :
  432. {
  433. if( !pUrl->IsPending() )
  434. {
  435. if( !pUrl->HandlesClosed() )
  436. {
  437. pUrl->CloseHandles();
  438. }
  439. }
  440. }
  441. break;
  442. case WINHTTP_CALLBACK_STATUS_HANDLE_CLOSING :
  443. {
  444. //
  445. // we're done with this particular URL
  446. //
  447. if( pUrl->IsConnect(hInternet) )
  448. delete pUrl;
  449. }
  450. break;
  451. case WINHTTP_CALLBACK_STATUS_REQUEST_ERROR :
  452. {
  453. #if 0
  454. WINHTTP_ASYNC_RESULT* pwar = (WINHTTP_ASYNC_RESULT*) lpvStatusInformation;
  455. LogText(
  456. "[tid=%#0.8x; hInternet=%#0.8x] async api error: dwResult=%d; dwError=%s",
  457. GetCurrentThreadId(),
  458. hInternet,
  459. pwar->dwResult,
  460. MapAsyncErrorToString(pwar->dwError)
  461. );
  462. #endif
  463. pUrl->CloseHandles();
  464. }
  465. break;
  466. }
  467. }
  468. void
  469. Url::Read(DWORD cbData)
  470. {
  471. BOOL bSuccess = FALSE;
  472. DWORD dwError = ERROR_SUCCESS;
  473. query_data:
  474. // if a read is pending, we know that we're handling a READ_COMPLETE callback
  475. if( !pending )
  476. {
  477. // if we haven't recently called WHQDA, do so and handle errors
  478. if( !qda && !(bytes = cbData) )
  479. {
  480. bSuccess = WinHttpQueryDataAvailable(request, &bytes);
  481. dwError = GetLastError();
  482. if( !bSuccess )
  483. {
  484. if( dwError != ERROR_IO_PENDING )
  485. {
  486. CloseHandles();
  487. }
  488. else
  489. {
  490. qda = TRUE;
  491. }
  492. return;
  493. }
  494. }
  495. // we got here, so there must be some data to read, reset the QDA flag and read data.
  496. qda = FALSE;
  497. buffer = new BYTE[bytes];
  498. bSuccess = WinHttpReadData(request, (LPVOID) buffer, bytes, &read);
  499. dwError = GetLastError();
  500. if( bSuccess && (read == 0) )
  501. {
  502. CloseHandles();
  503. }
  504. else
  505. {
  506. if( dwError == ERROR_IO_PENDING )
  507. {
  508. pending = TRUE;
  509. }
  510. else
  511. {
  512. CloseHandles();
  513. }
  514. }
  515. }
  516. else
  517. {
  518. // an async read has completed, did we read anything? if not, close handles and return,
  519. // otherwise free the old buffer and reset our internal state. then, to keep things
  520. // rolling, loop back up and call WHQDA.
  521. if( cbData == 0 )
  522. {
  523. pending = FALSE;
  524. CloseHandles();
  525. }
  526. else
  527. {
  528. delete [] buffer;
  529. buffer = NULL;
  530. bytes = 0;
  531. read = 0;
  532. cbData = 0;
  533. pending = FALSE;
  534. goto query_data;
  535. }
  536. }
  537. }
  538. DWORD
  539. GetContentLength(PURL pUrl)
  540. {
  541. DWORD dwCL = 0L;
  542. DWORD cbData = sizeof(DWORD);
  543. WinHttpQueryHeaders(
  544. pUrl->Request(),
  545. WINHTTP_QUERY_CONTENT_LENGTH + WINHTTP_QUERY_FLAG_NUMBER,
  546. NULL,
  547. &dwCL,
  548. &cbData,
  549. NULL
  550. );
  551. SetLastError(0);
  552. return dwCL;
  553. }
  554. BOOL
  555. Initialize(void)
  556. {
  557. BOOL bRet = FALSE;
  558. DWORD dwError = 0L;
  559. if( FAILED(CoInitializeEx(NULL, COINIT_MULTITHREADED)) )
  560. {
  561. LogText("failed to initialize COM");
  562. goto exit;
  563. }
  564. //-------------------------------------------------------------------------------------
  565. // open dictionary file
  566. //-------------------------------------------------------------------------------------
  567. if( !g_pDictionary )
  568. {
  569. g_pDictionary = new XMLDICT(g_szDictPath);
  570. if( !g_pDictionary )
  571. {
  572. goto exit;
  573. }
  574. if( g_pDictionary->IsLoaded() )
  575. {
  576. LogText("dictionary loaded.");
  577. }
  578. else
  579. {
  580. goto exit;
  581. }
  582. }
  583. //-------------------------------------------------------------------------------------
  584. // create completion port
  585. //-------------------------------------------------------------------------------------
  586. g_hIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0L, MAX_CONCURRENT);
  587. if( !g_hIOCP )
  588. {
  589. dwError = GetLastError();
  590. LogText("failed to open completion port, error %d [%s]", dwError, MapErrorToString(dwError));
  591. goto exit;
  592. }
  593. //-------------------------------------------------------------------------------------
  594. // create worker threads
  595. //-------------------------------------------------------------------------------------
  596. for(int n=0; n < WORKER_THREADS; n++)
  597. {
  598. g_arThreads[n] = CreateThread(
  599. NULL,
  600. 0L,
  601. WorkerThread,
  602. NULL,
  603. 0L,
  604. NULL
  605. );
  606. if( !g_arThreads[n] )
  607. {
  608. dwError = GetLastError();
  609. LogText("failed to create thread %d, error %d [%s]", n, dwError, MapErrorToString(dwError));
  610. goto exit;
  611. }
  612. }
  613. //-------------------------------------------------------------------------------------
  614. // create 'no more urls' event
  615. //-------------------------------------------------------------------------------------
  616. g_evtMoreUrls = CreateEvent(NULL, FALSE, FALSE, NULL);
  617. if( !g_evtMoreUrls )
  618. {
  619. dwError = GetLastError();
  620. LogText("failed to create url event, error %d [%s]", dwError, MapErrorToString(dwError));
  621. goto exit;
  622. }
  623. //-------------------------------------------------------------------------------------
  624. // create 'all requests complete' event
  625. //-------------------------------------------------------------------------------------
  626. g_evtQuit = CreateEvent(NULL, FALSE, FALSE, NULL);
  627. if( !g_evtQuit )
  628. {
  629. dwError = GetLastError();
  630. LogText("failed to create quit event, error %d [%s]", dwError, MapErrorToString(dwError));
  631. goto exit;
  632. }
  633. //-------------------------------------------------------------------------------------
  634. // open shared session handle
  635. //-------------------------------------------------------------------------------------
  636. g_hSession = WinHttpOpen(
  637. L"foo",
  638. WINHTTP_ACCESS_TYPE_NAMED_PROXY,
  639. L"itgproxy",
  640. L"<local>",
  641. WINHTTP_FLAG_ASYNC
  642. );
  643. if( !g_hSession )
  644. {
  645. dwError = GetLastError();
  646. LogText("failed to open winhttp, error %d [%s]", dwError, MapErrorToString(dwError));
  647. goto exit;
  648. }
  649. //-------------------------------------------------------------------------------------
  650. // set global timeouts
  651. //-------------------------------------------------------------------------------------
  652. bRet = WinHttpSetTimeouts(
  653. g_hSession,
  654. 60000, // resolve
  655. 10000, // connect
  656. 5000, // send
  657. 5000 // receive
  658. );
  659. if( !bRet )
  660. {
  661. dwError = GetLastError();
  662. LogText("failed to set timeouts, error %d [%s]", dwError, MapErrorToString(dwError));
  663. }
  664. exit:
  665. return bRet;
  666. }
  667. void
  668. Cleanup(void)
  669. {
  670. if( g_pDictionary )
  671. {
  672. //g_pDictionary->Reset();
  673. delete g_pDictionary;
  674. g_pDictionary = NULL;
  675. }
  676. if( g_hIOCP != NULL )
  677. {
  678. CloseHandle(g_hIOCP);
  679. g_hIOCP = NULL;
  680. }
  681. if( g_evtMoreUrls != NULL )
  682. {
  683. CloseHandle(g_evtMoreUrls);
  684. g_evtMoreUrls = NULL;
  685. }
  686. if( g_evtQuit != NULL )
  687. {
  688. CloseHandle(g_evtQuit);
  689. g_evtQuit = NULL;
  690. }
  691. if( g_hSession )
  692. {
  693. WinHttpCloseHandle(g_hSession);
  694. g_hSession = NULL;
  695. }
  696. CoUninitialize();
  697. }
  698. XMLDict::XMLDict(LPWSTR dictname)
  699. {
  700. HRESULT hr = S_OK;
  701. BSTR tag = NULL;
  702. VARIANT_BOOL bSuccess = VARIANT_FALSE;
  703. VARIANT doc;
  704. LogText("loading dictionary...");
  705. hr = CoCreateInstance(
  706. CLSID_DOMDocument,
  707. NULL,
  708. CLSCTX_INPROC_SERVER,
  709. IID_IXMLDOMDocument,
  710. (void**) &pDoc
  711. );
  712. if( SUCCEEDED(hr) )
  713. {
  714. hr = pDoc->put_async(bSuccess);
  715. VariantInit(&doc);
  716. V_VT(&doc) = VT_BSTR;
  717. V_BSTR(&doc) = SysAllocString(dictname);
  718. hr = pDoc->load(doc, &bSuccess);
  719. if( FAILED(hr) || (bSuccess == VARIANT_FALSE) )
  720. {
  721. LogText("failed to load xml dictionary");
  722. goto quit;
  723. }
  724. hr = pDoc->get_documentElement(&pRoot);
  725. if( FAILED(hr) )
  726. {
  727. LogText("couldn\'t find root node!");
  728. goto quit;
  729. }
  730. tag = SysAllocString(L"keyphrase");
  731. hr = pDoc->getElementsByTagName(tag, &pList);
  732. if( FAILED(hr) )
  733. {
  734. LogText("couldn\'t find any words!");
  735. goto quit;
  736. }
  737. hr = pList->get_length(&lWords);
  738. if( FAILED(hr) )
  739. {
  740. LogText("couldn\'t determine the number of words in the list!");
  741. }
  742. szPattern = SysAllocString(L"string");
  743. lCurrentWord = 0L;
  744. }
  745. quit:
  746. VariantClear(&doc);
  747. if( tag )
  748. {
  749. SysFreeString(tag);
  750. }
  751. }
  752. XMLDict::~XMLDict()
  753. {
  754. LogText("unloading dictionary...");
  755. if( szPattern )
  756. {
  757. SysFreeString(szPattern);
  758. }
  759. if( pList )
  760. {
  761. pList->Release();
  762. }
  763. if( pRoot )
  764. {
  765. pRoot->Release();
  766. }
  767. if( pDoc )
  768. {
  769. pDoc->Release();
  770. }
  771. }
  772. BOOL
  773. XMLDict::IsLoaded(void)
  774. {
  775. LONG state = 0L;
  776. if( pDoc )
  777. {
  778. pDoc->get_readyState(&state);
  779. }
  780. else
  781. {
  782. return state;
  783. }
  784. return (state == 4);
  785. }
  786. BSTR
  787. XMLDict::GetWord(void)
  788. {
  789. HRESULT hr = S_OK;
  790. IXMLDOMNode* pEntry = NULL;
  791. IXMLDOMNode* pWord = NULL;
  792. BSTR bsWord = NULL;
  793. do_over:
  794. hr = pList->get_item(lCurrentWord, &pEntry);
  795. if( FAILED(hr) || !pEntry )
  796. goto quit;
  797. ++lCurrentWord;
  798. hr = pEntry->selectSingleNode(szPattern, &pWord);
  799. if( FAILED(hr) || !pWord )
  800. goto quit;
  801. hr = pWord->get_text(&bsWord);
  802. if( FAILED(hr) )
  803. goto quit;
  804. // some of the words in the dictionary have apostrophes. urls can't have
  805. // apostrophes, so we strip them out.
  806. if( wcschr(bsWord, L'\'') )
  807. {
  808. SysFreeString(bsWord);
  809. bsWord = NULL;
  810. pEntry->Release();
  811. pWord->Release();
  812. goto do_over;
  813. }
  814. quit:
  815. if( pEntry )
  816. pEntry->Release();
  817. if( pWord )
  818. pWord->Release();
  819. return bsWord;
  820. }