Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1062 lines
29 KiB

  1. /*
  2. * X P R S . C P P
  3. *
  4. * XML push model parsing
  5. *
  6. * Copyright 1986-1997 Microsoft Corporation, All Rights Reserved
  7. */
  8. #include "_xmllib.h"
  9. #define IID_INodeFactory __uuidof(INodeFactory)
  10. #define IID_IXMLParser __uuidof(IXMLParser)
  11. // Debugging: Node types -----------------------------------------------------
  12. //
  13. DEC_CONST WCHAR gc_wszUnknown[] = L"UNKNOWN";
  14. #define WszNodeType(_t) {_t,L#_t}
  15. typedef struct _NodeTypeMap {
  16. DWORD dwType;
  17. LPCWSTR wszType;
  18. } NTM;
  19. #ifdef DBG
  20. const NTM gc_mpnt[] = {
  21. #pragma warning(disable:4245) // signed/unsigned conversion
  22. WszNodeType(XML_ELEMENT),
  23. WszNodeType(XML_ATTRIBUTE),
  24. WszNodeType(XML_PI),
  25. WszNodeType(XML_XMLDECL),
  26. WszNodeType(XML_DOCTYPE),
  27. WszNodeType(XML_DTDATTRIBUTE),
  28. WszNodeType(XML_ENTITYDECL),
  29. WszNodeType(XML_ELEMENTDECL),
  30. WszNodeType(XML_ATTLISTDECL),
  31. WszNodeType(XML_NOTATION),
  32. WszNodeType(XML_GROUP),
  33. WszNodeType(XML_INCLUDESECT),
  34. WszNodeType(XML_PCDATA),
  35. WszNodeType(XML_CDATA),
  36. WszNodeType(XML_IGNORESECT),
  37. WszNodeType(XML_COMMENT),
  38. WszNodeType(XML_ENTITYREF),
  39. WszNodeType(XML_WHITESPACE),
  40. WszNodeType(XML_NAME),
  41. WszNodeType(XML_NMTOKEN),
  42. WszNodeType(XML_STRING),
  43. WszNodeType(XML_PEREF),
  44. WszNodeType(XML_MODEL),
  45. WszNodeType(XML_ATTDEF),
  46. WszNodeType(XML_ATTTYPE),
  47. WszNodeType(XML_ATTPRESENCE),
  48. WszNodeType(XML_DTDSUBSET),
  49. WszNodeType(XML_LASTNODETYPE)
  50. #pragma warning(default:4245) // signed/unsigned conversion
  51. };
  52. #endif // DBG
  53. inline LPCWSTR
  54. PwszNodeType (DWORD dwType)
  55. {
  56. #ifdef DBG
  57. for (UINT i = 0; i < CElems(gc_mpnt); i++)
  58. if (gc_mpnt[i].dwType == dwType)
  59. return gc_mpnt[i].wszType;
  60. #endif // DBG
  61. return gc_wszUnknown;
  62. }
  63. // Debugging: Sub-node Types -------------------------------------------------
  64. //
  65. #ifdef DBG
  66. const NTM gc_mpsnt[] = {
  67. #pragma warning(disable:4245) // signed/unsigned conversion
  68. WszNodeType(0),
  69. WszNodeType(XML_VERSION),
  70. WszNodeType(XML_ENCODING),
  71. WszNodeType(XML_STANDALONE),
  72. WszNodeType(XML_NS),
  73. WszNodeType(XML_XMLSPACE),
  74. WszNodeType(XML_XMLLANG),
  75. WszNodeType(XML_SYSTEM),
  76. WszNodeType(XML_PUBLIC),
  77. WszNodeType(XML_NDATA),
  78. WszNodeType(XML_AT_CDATA),
  79. WszNodeType(XML_AT_ID),
  80. WszNodeType(XML_AT_IDREF),
  81. WszNodeType(XML_AT_IDREFS),
  82. WszNodeType(XML_AT_ENTITY),
  83. WszNodeType(XML_AT_ENTITIES),
  84. WszNodeType(XML_AT_NMTOKEN),
  85. WszNodeType(XML_AT_NMTOKENS),
  86. WszNodeType(XML_AT_NOTATION),
  87. WszNodeType(XML_AT_REQUIRED),
  88. WszNodeType(XML_AT_IMPLIED),
  89. WszNodeType(XML_AT_FIXED),
  90. WszNodeType(XML_PENTITYDECL),
  91. WszNodeType(XML_EMPTY),
  92. WszNodeType(XML_ANY),
  93. WszNodeType(XML_MIXED),
  94. WszNodeType(XML_SEQUENCE),
  95. WszNodeType(XML_CHOICE),
  96. WszNodeType(XML_STAR),
  97. WszNodeType(XML_PLUS),
  98. WszNodeType(XML_QUESTIONMARK),
  99. WszNodeType(XML_LASTSUBNODETYPE)
  100. #pragma warning(default:4245) // signed/unsigned conversion
  101. };
  102. #endif // DBG
  103. inline LPCWSTR
  104. PwszSubnodeType (DWORD dwType)
  105. {
  106. #ifdef DBG
  107. for (UINT i = 0; i < CElems(gc_mpsnt); i++)
  108. if (gc_mpsnt[i].dwType == dwType)
  109. return gc_mpsnt[i].wszType;
  110. #endif // DBG
  111. return gc_wszUnknown;
  112. }
  113. // Debugging: Events ---------------------------------------------------------
  114. //
  115. #ifdef DBG
  116. const NTM gc_mpevt[] = {
  117. #pragma warning(disable:4245) // signed/unsigned conversion
  118. WszNodeType(XMLNF_STARTDOCUMENT),
  119. WszNodeType(XMLNF_STARTDTD),
  120. WszNodeType(XMLNF_ENDDTD),
  121. WszNodeType(XMLNF_STARTDTDSUBSET),
  122. WszNodeType(XMLNF_ENDDTDSUBSET),
  123. WszNodeType(XMLNF_ENDPROLOG),
  124. WszNodeType(XMLNF_STARTENTITY),
  125. WszNodeType(XMLNF_ENDENTITY),
  126. WszNodeType(XMLNF_ENDDOCUMENT),
  127. WszNodeType(XMLNF_DATAAVAILABLE)
  128. #pragma warning(default:4245) // signed/unsigned conversion
  129. };
  130. #endif // DBG
  131. inline LPCWSTR
  132. PwszEvent (DWORD dwType)
  133. {
  134. #ifdef DBG
  135. for (UINT i = 0; i < CElems(gc_mpevt); i++)
  136. if (gc_mpevt[i].dwType == dwType)
  137. return gc_mpevt[i].wszType;
  138. #endif // DBG
  139. return gc_wszUnknown;
  140. }
  141. // Error codes ---------------------------------------------------------------
  142. //
  143. #ifdef DBG
  144. const NTM gc_mpec[] = {
  145. #pragma warning(disable:4245) // signed/unsigned conversion
  146. WszNodeType(XML_E_ENDOFINPUT),
  147. WszNodeType(XML_E_UNCLOSEDPI),
  148. WszNodeType(XML_E_MISSINGEQUALS),
  149. WszNodeType(XML_E_UNCLOSEDSTARTTAG),
  150. WszNodeType(XML_E_UNCLOSEDENDTAG),
  151. WszNodeType(XML_E_UNCLOSEDSTRING),
  152. WszNodeType(XML_E_MISSINGQUOTE),
  153. WszNodeType(XML_E_COMMENTSYNTAX),
  154. WszNodeType(XML_E_UNCLOSEDCOMMENT),
  155. WszNodeType(XML_E_BADSTARTNAMECHAR),
  156. WszNodeType(XML_E_BADNAMECHAR),
  157. WszNodeType(XML_E_UNCLOSEDDECL),
  158. WszNodeType(XML_E_BADCHARINSTRING),
  159. WszNodeType(XML_E_XMLDECLSYNTAX),
  160. WszNodeType(XML_E_BADCHARDATA),
  161. WszNodeType(XML_E_UNCLOSEDMARKUPDECL),
  162. WszNodeType(XML_E_UNCLOSEDCDATA),
  163. WszNodeType(XML_E_MISSINGWHITESPACE),
  164. WszNodeType(XML_E_BADDECLNAME),
  165. WszNodeType(XML_E_BADEXTERNALID),
  166. WszNodeType(XML_E_EXPECTINGTAGEND),
  167. WszNodeType(XML_E_BADCHARINDTD),
  168. WszNodeType(XML_E_BADELEMENTINDTD),
  169. WszNodeType(XML_E_BADCHARINDECL),
  170. WszNodeType(XML_E_MISSINGSEMICOLON),
  171. WszNodeType(XML_E_BADCHARINENTREF),
  172. WszNodeType(XML_E_UNBALANCEDPAREN),
  173. WszNodeType(XML_E_EXPECTINGOPENBRACKET),
  174. WszNodeType(XML_E_BADENDCONDSECT),
  175. WszNodeType(XML_E_RESERVEDNAMESPACE),
  176. WszNodeType(XML_E_INTERNALERROR),
  177. WszNodeType(XML_E_EXPECTING_VERSION),
  178. WszNodeType(XML_E_EXPECTING_ENCODING),
  179. WszNodeType(XML_E_EXPECTING_NAME),
  180. WszNodeType(XML_E_UNEXPECTED_WHITESPACE),
  181. WszNodeType(XML_E_UNEXPECTED_ATTRIBUTE),
  182. WszNodeType(XML_E_SUSPENDED),
  183. WszNodeType(XML_E_STOPPED),
  184. WszNodeType(XML_E_UNEXPECTEDENDTAG),
  185. WszNodeType(XML_E_ENDTAGMISMATCH),
  186. WszNodeType(XML_E_UNCLOSEDTAG),
  187. WszNodeType(XML_E_DUPLICATEATTRIBUTE),
  188. WszNodeType(XML_E_MULTIPLEROOTS),
  189. WszNodeType(XML_E_INVALIDATROOTLEVEL),
  190. WszNodeType(XML_E_BADXMLDECL),
  191. WszNodeType(XML_E_INVALIDENCODING),
  192. WszNodeType(XML_E_INVALIDSWITCH),
  193. WszNodeType(XML_E_MISSINGROOT),
  194. WszNodeType(XML_E_INCOMPLETE_ENCODING),
  195. WszNodeType(XML_E_EXPECTING_NDATA),
  196. WszNodeType(XML_E_INVALID_MODEL),
  197. WszNodeType(XML_E_BADCHARINMIXEDMODEL),
  198. WszNodeType(XML_E_MISSING_STAR),
  199. WszNodeType(XML_E_BADCHARINMODEL),
  200. WszNodeType(XML_E_MISSING_PAREN),
  201. WszNodeType(XML_E_INVALID_TYPE),
  202. WszNodeType(XML_E_INVALIDXMLSPACE),
  203. WszNodeType(XML_E_MULTI_ATTR_VALUE),
  204. WszNodeType(XML_E_INVALID_PRESENCE),
  205. WszNodeType(XML_E_BADCHARINENUMERATION),
  206. WszNodeType(XML_E_UNEXPECTEDEOF),
  207. WszNodeType(XML_E_BADPEREFINSUBSET),
  208. WszNodeType(XML_E_BADXMLCASE),
  209. WszNodeType(XML_E_CONDSECTINSUBSET),
  210. WszNodeType(XML_E_CDATAINVALID),
  211. WszNodeType(XML_E_INVALID_STANDALONE),
  212. WszNodeType(XML_E_PE_NESTING),
  213. WszNodeType(XML_E_UNEXPECTED_STANDALONE),
  214. WszNodeType(XML_E_DOCTYPE_IN_DTD),
  215. WszNodeType(XML_E_INVALID_CDATACLOSINGTAG),
  216. WszNodeType(XML_E_PIDECLSYNTAX),
  217. WszNodeType(XML_E_EXPECTINGCLOSEQUOTE),
  218. WszNodeType(XML_E_DTDELEMENT_OUTSIDE_DTD),
  219. WszNodeType(XML_E_DUPLICATEDOCTYPE),
  220. WszNodeType(XML_E_MISSING_ENTITY),
  221. WszNodeType(XML_E_ENTITYREF_INNAME),
  222. WszNodeType(XML_E_DOCTYPE_OUTSIDE_PROLOG),
  223. WszNodeType(XML_E_INVALID_VERSION),
  224. WszNodeType(XML_E_MULTIPLE_COLONS),
  225. WszNodeType(XML_E_INVALID_DECIMAL),
  226. WszNodeType(XML_E_INVALID_HEXIDECIMAL),
  227. WszNodeType(XML_E_INVALID_UNICODE),
  228. WszNodeType(XML_E_RESOURCE),
  229. WszNodeType(XML_E_LASTERROR)
  230. #pragma warning(default:4245) // signed/unsigned conversion
  231. };
  232. #endif // DBG
  233. inline LPCWSTR
  234. PwszErrorCode (SCODE sc)
  235. {
  236. #ifdef DBG
  237. for (UINT i = 0; i < CElems(gc_mpec); i++)
  238. if (gc_mpec[i].dwType == static_cast<DWORD>(sc))
  239. return gc_mpec[i].wszType;
  240. #endif // DBG
  241. return gc_wszUnknown;
  242. }
  243. void __fastcall
  244. XmlTraceNodeInfo (const XML_NODE_INFO * pNodeInfo)
  245. {
  246. #ifdef DBG
  247. CStackBuffer<WCHAR,MAX_PATH> pwsz(CbSizeWsz(pNodeInfo->ulLen));
  248. if (NULL != pwsz.get())
  249. {
  250. wcsncpy(pwsz.get(), pNodeInfo->pwcText, pNodeInfo->ulLen);
  251. pwsz[pNodeInfo->ulLen] = 0;
  252. }
  253. else
  254. {
  255. XmlTrace ("XML: WARNING: not enough memory to trace\n");
  256. return;
  257. }
  258. // _XML_NODE_INFO
  259. //
  260. // typedef struct _XML_NODE_INFO {
  261. //
  262. // DWORD dwType;
  263. // DWORD dwSubType;
  264. // BOOL fTerminal;
  265. // WCHAR __RPC_FAR *pwcText;
  266. // ULONG ulLen;
  267. // ULONG ulNsPrefixLen;
  268. // PVOID pNode;
  269. // PVOID pReserved;
  270. //
  271. // } XML_NODE_INFO;
  272. //
  273. XmlTrace ("- pNodeInfo:\n"
  274. "-- dwSize: %ld bytes\n"
  275. "-- dwType: %ws (0x%08X)\n"
  276. "-- dwSubType: %ws (0x%08X)\n"
  277. "-- fTerminal: %ld\n"
  278. "-- pwcText: '%ws'\n"
  279. "-- ulLen: %ld (0x%08X)\n"
  280. "-- ulNsPrefixLen: %ld (0x%08X)\n"
  281. "-- pNode: 0x%08X\n"
  282. "-- pReserved: 0x%08X\n",
  283. pNodeInfo->dwSize,
  284. PwszNodeType(pNodeInfo->dwType), pNodeInfo->dwType,
  285. PwszSubnodeType(pNodeInfo->dwSubType), pNodeInfo->dwSubType,
  286. static_cast<DWORD>(pNodeInfo->fTerminal),
  287. pwsz.get(),
  288. pNodeInfo->ulLen, pNodeInfo->ulLen,
  289. pNodeInfo->ulNsPrefixLen, pNodeInfo->ulNsPrefixLen,
  290. pNodeInfo->pNode,
  291. pNodeInfo->pReserved);
  292. #endif // DBG
  293. }
  294. void __fastcall
  295. XmlTraceCountedNodeInfo (const USHORT cNumRecs, XML_NODE_INFO **apNodeInfo)
  296. {
  297. #ifdef DBG
  298. for (USHORT iNi = 0; iNi < cNumRecs; iNi++)
  299. XmlTraceNodeInfo (*apNodeInfo++);
  300. #endif // DBG
  301. }
  302. // EXO class statics ---------------------------------------------------------
  303. //
  304. BEGIN_INTERFACE_TABLE(CNodeFactory)
  305. INTERFACE_MAP(CNodeFactory, IXMLNodeFactory)
  306. END_INTERFACE_TABLE(CNodeFactory);
  307. EXO_GLOBAL_DATA_DECL(CNodeFactory, EXO);
  308. // class CNodeFactory --------------------------------------------------------
  309. //
  310. HRESULT STDMETHODCALLTYPE CNodeFactory::NotifyEvent(
  311. /* [in] */ IXMLNodeSource __RPC_FAR*,
  312. /* [in] */ XML_NODEFACTORY_EVENT iEvt)
  313. {
  314. XmlTrace ("Xml: INodeFactory::NotifyEvent() called\n");
  315. XmlTrace ("- iEvt: %ws (0x%08X)\n", PwszEvent(iEvt), iEvt);
  316. switch (iEvt)
  317. {
  318. case XMLNF_STARTDOCUMENT:
  319. // Take note that we have started processing a document
  320. //
  321. m_state = ST_PROLOGUE;
  322. break;
  323. case XMLNF_ENDPROLOG:
  324. // Take note that we have completed prologue processing
  325. // and are now processing the document body.
  326. //
  327. Assert (m_state == ST_PROLOGUE);
  328. m_state = ST_INDOC;
  329. break;
  330. case XMLNF_ENDDOCUMENT:
  331. // The state should be an error or document state
  332. //
  333. m_state = ST_NODOC;
  334. break;
  335. case XMLNF_DATAAVAILABLE:
  336. // More data got pushed to the XMLParser. There is no
  337. // specific action for us, but we shouldn't fail this
  338. // either.
  339. //
  340. break;
  341. case XMLNF_STARTDTD:
  342. case XMLNF_ENDDTD:
  343. case XMLNF_STARTDTDSUBSET:
  344. case XMLNF_ENDDTDSUBSET:
  345. case XMLNF_STARTENTITY:
  346. case XMLNF_ENDENTITY:
  347. default:
  348. // Unhandled notications
  349. //
  350. return E_DAV_XML_PARSE_ERROR;
  351. }
  352. return S_OK;
  353. }
  354. HRESULT STDMETHODCALLTYPE CNodeFactory::BeginChildren(
  355. /* [in] */ IXMLNodeSource __RPC_FAR*,
  356. /* [in] */ XML_NODE_INFO __RPC_FAR *pNodeInfo)
  357. {
  358. XmlTrace ("Xml: INodeFactory::BeginChildren() called\n");
  359. XmlTraceNodeInfo (pNodeInfo);
  360. // There should be no required action in our parsing
  361. // mechanism here.
  362. //
  363. return S_OK;
  364. }
  365. HRESULT STDMETHODCALLTYPE CNodeFactory::EndChildren(
  366. /* [in] */ IXMLNodeSource __RPC_FAR*,
  367. /* [in] */ BOOL fEmpty,
  368. /* [in] */ XML_NODE_INFO __RPC_FAR *pNodeInfo)
  369. {
  370. XmlTrace ("Xml: INodeFactory::EndChildren() called\n");
  371. XmlTrace ("- fEmtpy: %ld\n", static_cast<DWORD>(fEmpty));
  372. XmlTraceNodeInfo (pNodeInfo);
  373. SCODE sc = S_OK;
  374. if (ST_INDOC == m_state)
  375. {
  376. // If the node was being handled by a subclass, then
  377. // pass the ::EndChildren along to the subclass.
  378. //
  379. if (m_cUnhandled == 0)
  380. {
  381. sc = ScCompleteChildren (fEmpty,
  382. pNodeInfo->dwType,
  383. pNodeInfo->pwcText,
  384. pNodeInfo->ulLen);
  385. if (FAILED (sc))
  386. goto ret;
  387. }
  388. else
  389. {
  390. // Otherwise pop the unhandled count
  391. //
  392. PopUnhandled();
  393. }
  394. }
  395. ret:
  396. // If there was a scope context, leave the scope.
  397. //
  398. if (pNodeInfo->pNode)
  399. {
  400. // A ref added when we handed the object to the
  401. // XMLParser. Reclaim that ref and release the
  402. // object.
  403. //
  404. auto_ref_ptr<CXmlnsScope> pscope;
  405. pscope.take_ownership(reinterpret_cast<CXmlnsScope*>(pNodeInfo->pNode));
  406. pscope->LeaveScope(this);
  407. pNodeInfo->pNode = NULL;
  408. }
  409. return sc;
  410. }
  411. HRESULT STDMETHODCALLTYPE CNodeFactory::Error(
  412. /* [in] */ IXMLNodeSource __RPC_FAR*,
  413. /* [in] */ HRESULT hrErrorCode,
  414. /* [in] */ USHORT cNumRecs,
  415. /* [in] */ XML_NODE_INFO __RPC_FAR *__RPC_FAR *apNodeInfo)
  416. {
  417. XmlTrace ("Xml: INodeFactory::Error() called\n");
  418. XmlTrace ("- hrErrorCode: %ws (0x%08X)\n"
  419. "- cNumRecs: %hd\n",
  420. PwszErrorCode(hrErrorCode), hrErrorCode,
  421. cNumRecs);
  422. // Argh...
  423. //
  424. // MSXML currently has a bug where if the error occurs whilst
  425. // processing the root -- ie. a non-xml document, then ::Error()
  426. // is called with a cNumRecs of 1 and a null apNodeInfo. Oops.
  427. //
  428. if (NULL == apNodeInfo)
  429. return S_OK;
  430. // Argh...
  431. //
  432. // There was an error in the XML somewhere. I don't know if
  433. // this is info that would ever help the client.
  434. //
  435. XmlTraceCountedNodeInfo (cNumRecs, apNodeInfo);
  436. m_hrParserError = hrErrorCode;
  437. m_state = ST_XMLERROR;
  438. for (; cNumRecs--; apNodeInfo++)
  439. {
  440. // If there was a scope context, leave the scope.
  441. //
  442. if ((*apNodeInfo)->pNode)
  443. {
  444. // A ref added when we handed the object to the
  445. // XMLParser. Reclaim that ref and release the
  446. // object.
  447. //
  448. auto_ref_ptr<CXmlnsScope> pscope;
  449. pscope.take_ownership(reinterpret_cast<CXmlnsScope*>((*apNodeInfo)->pNode));
  450. pscope->LeaveScope(this);
  451. (*apNodeInfo)->pNode = NULL;
  452. }
  453. }
  454. return S_OK;
  455. }
  456. HRESULT STDMETHODCALLTYPE CNodeFactory::CreateNode(
  457. /* [in] */ IXMLNodeSource __RPC_FAR*,
  458. /* [in] */ PVOID pNodeParent,
  459. /* [in] */ USHORT cNumRecs,
  460. /* [in] */ XML_NODE_INFO __RPC_FAR **apNodeInfo)
  461. {
  462. XmlTrace ("Xml: INodeFactory::CreateNode() called\n");
  463. XmlTrace ("- pNodeParent: 0x%08X\n"
  464. "- cNumRecs: %hd\n",
  465. pNodeParent,
  466. cNumRecs);
  467. XmlTraceCountedNodeInfo (cNumRecs, apNodeInfo);
  468. auto_ref_ptr<CNmspc> pnsLocal;
  469. auto_ref_ptr<CXmlnsScope> pxmlnss;
  470. LPCWSTR pwcNamespaceAttributeDefault = NULL;
  471. LPCWSTR pwcNamespace = NULL;
  472. SCODE sc = S_OK;
  473. ULONG ulNsPrefiLenAttributeDefault = 0;
  474. USHORT iNi;
  475. // We really do not care much about anything in the
  476. // prologue.
  477. //
  478. if (ST_INDOC != m_state)
  479. goto ret;
  480. // The processing for ::CreateNode() really is a two pass
  481. // mechanism for all the nodes being created. First, the
  482. // list of nodes are scanned for namespaces and they are
  483. // added to the cache. This is required because namespace
  484. // definitions for this node's scope can apear anywhere in
  485. // the list of attributes.
  486. //
  487. // Once all the namespaces have been processed, the subclass
  488. // is called for each node -- with the expanded names for
  489. // both XML_ELEMENTS and XML_ATTRIBUTES
  490. //
  491. for (iNi = 0; iNi < cNumRecs; iNi++)
  492. {
  493. if (XML_NS == apNodeInfo[iNi]->dwSubType)
  494. {
  495. // This should always be the case. The enumeration
  496. // that defines the subtypes picks up where the node
  497. // types left off.
  498. //
  499. Assert (XML_ATTRIBUTE == apNodeInfo[iNi]->dwType);
  500. //
  501. // However, handle this case -- just in case...
  502. //
  503. if (XML_ATTRIBUTE != apNodeInfo[iNi]->dwType)
  504. continue;
  505. // Since we are about to create some namespaces that
  506. // are scoped by this node, create a scoping object
  507. // and set it into the node info.
  508. //
  509. // When we hand this back to those wacky XML guys, we
  510. // need to keep our reference so the object lives beyond
  511. // the current instance. It gets cleaned up in response
  512. // to ::Error() or ::EndChildren() calls.
  513. //
  514. if (NULL == pxmlnss.get())
  515. {
  516. pxmlnss.take_ownership(new CXmlnsScope);
  517. if (NULL == pxmlnss.get())
  518. {
  519. sc = E_OUTOFMEMORY;
  520. goto ret;
  521. }
  522. }
  523. // Ok, we have a namespace, and need to construct and
  524. // cache it.
  525. //
  526. // If this is a default namespace -- ie. one that does
  527. // not have an alias associated with its use -- then
  528. // the length of the namespace prefix should be zero.
  529. //
  530. auto_ref_ptr<CNmspc> pns;
  531. pns.take_ownership(new CNmspc());
  532. if (NULL == pns.get())
  533. {
  534. sc = E_OUTOFMEMORY;
  535. goto ret;
  536. }
  537. if (0 == apNodeInfo[iNi]->ulNsPrefixLen)
  538. {
  539. // Set the empty alias
  540. //
  541. Assert (CchConstString(gc_wszXmlns) == apNodeInfo[iNi]->ulLen);
  542. Assert (!wcsncmp (apNodeInfo[iNi]->pwcText, gc_wszXmlns, CchConstString(gc_wszXmlns)));
  543. sc = pns->ScSetAlias (apNodeInfo[iNi]->pwcText, 0);
  544. if (FAILED (sc))
  545. goto ret;
  546. }
  547. else
  548. {
  549. UINT cch = apNodeInfo[iNi]->ulLen - apNodeInfo[iNi]->ulNsPrefixLen - 1;
  550. LPCWSTR pwsz = apNodeInfo[iNi]->pwcText + apNodeInfo[iNi]->ulLen - cch;
  551. // The alias for this namespace is the text following
  552. // the single colon in the namespace decl.
  553. //
  554. Assert (CchConstString(gc_wszXmlns) < apNodeInfo[iNi]->ulLen);
  555. Assert (!wcsncmp (apNodeInfo[iNi]->pwcText, gc_wszXmlns, CchConstString(gc_wszXmlns)));
  556. Assert (L':' == *(apNodeInfo[iNi]->pwcText + CchConstString(gc_wszXmlns)));
  557. sc = pns->ScSetAlias (pwsz, cch);
  558. if (FAILED (sc))
  559. goto ret;
  560. }
  561. // Now assemble the href. The href is defined by the next N
  562. // consecutive nodes of type XML_PCDATA.
  563. //
  564. while (++iNi < cNumRecs)
  565. {
  566. if (XML_PCDATA != apNodeInfo[iNi]->dwType)
  567. break;
  568. if (-1 == m_sbValue.Append(apNodeInfo[iNi]->ulLen * sizeof(WCHAR),
  569. apNodeInfo[iNi]->pwcText))
  570. {
  571. sc = E_OUTOFMEMORY;
  572. goto ret;
  573. }
  574. }
  575. // At this point, we have hit the end of this current namespace
  576. // declaration and can set the href into the namespace.
  577. //
  578. sc = pns->ScSetHref (m_sbValue.PContents(), m_sbValue.CchSize());
  579. if (FAILED (sc))
  580. goto ret;
  581. m_sbValue.Reset();
  582. // The namespace has been completed, so we should cache it
  583. // at this point; and clear the namespace in construction.
  584. //
  585. Assert (pns.get());
  586. Assert (pns->PszHref() && pns->PszAlias());
  587. CachePersisted (pns);
  588. // Make sure the scoping for this namespace is handled.
  589. //
  590. Assert (pxmlnss.get());
  591. pxmlnss->ScopeNamespace (pns.get());
  592. // Ok, if we simply move on to the next node, then we will skip the
  593. // node that brought us out of the namespace processing.
  594. //
  595. iNi--;
  596. }
  597. }
  598. // Now that we have all the namespaces taken care of, call the subclass
  599. // for each of the nodes.
  600. //
  601. for (iNi = 0; iNi < cNumRecs; iNi++)
  602. {
  603. LPCWSTR pwcText = apNodeInfo[iNi]->pwcText;
  604. ULONG ulLen = apNodeInfo[iNi]->ulLen;
  605. ULONG ulNsPrefixLen = apNodeInfo[iNi]->ulNsPrefixLen;
  606. switch (apNodeInfo[iNi]->dwType)
  607. {
  608. case XML_ATTRIBUTE:
  609. case XML_ELEMENT:
  610. // For both XML_ELEMENTs and XML_ATTRIBUTEs, we want to
  611. // do the namespace translation and hand the subclass the
  612. // fully qualified name! The only exception to this would
  613. // be for the special node and element subtypes.
  614. //
  615. if (0 == apNodeInfo[iNi]->dwSubType)
  616. {
  617. // For attributes, if there was no translation, then we
  618. // want to use this node's namespace for defaulting the
  619. // attribute namespaces.
  620. //
  621. if ((XML_ATTRIBUTE == apNodeInfo[iNi]->dwType) &&
  622. (0 == apNodeInfo[iNi]->ulNsPrefixLen))
  623. {
  624. pwcNamespace = pwcNamespaceAttributeDefault;
  625. ulNsPrefixLen = ulNsPrefiLenAttributeDefault;
  626. }
  627. else
  628. {
  629. // Otherwise try and translate...
  630. //
  631. sc = TranslateToken (&pwcText,
  632. &ulLen,
  633. &pwcNamespace,
  634. &ulNsPrefixLen);
  635. }
  636. // For elements, if there was no translation and there
  637. // is a current default namespace declaired for this xml
  638. // this document, this is invalid xml.
  639. //
  640. Assert (!FAILED (sc));
  641. if (S_FALSE == sc)
  642. {
  643. XmlTrace ("Xml: element has no valid namespace\n");
  644. sc = E_DAV_XML_PARSE_ERROR;
  645. goto ret;
  646. }
  647. // Check for an empty property name. An empty property name
  648. // is invalid. ulLen is the size of the property name with
  649. // the prefix stripped. ***
  650. //
  651. if (0 == ulLen)
  652. {
  653. XmlTrace("Xml: property has noname\n");
  654. sc = E_DAV_XML_PARSE_ERROR;
  655. goto ret;
  656. }
  657. }
  658. // Handle empty tags here -- ie. all namespace!
  659. //
  660. if (0 == apNodeInfo[iNi]->ulLen)
  661. {
  662. XmlTrace ("Xml: element has no valid tag\n");
  663. sc = E_DAV_XML_PARSE_ERROR;
  664. }
  665. // If this is the first node in the list, then set the defaults
  666. //
  667. if (0 == iNi)
  668. {
  669. pwcNamespaceAttributeDefault = pwcNamespace;
  670. ulNsPrefiLenAttributeDefault = ulNsPrefixLen;
  671. }
  672. /* !!! FALL THROUGH !!! */
  673. case XML_PI:
  674. case XML_XMLDECL:
  675. case XML_DOCTYPE:
  676. case XML_DTDATTRIBUTE:
  677. case XML_ENTITYDECL:
  678. case XML_ELEMENTDECL:
  679. case XML_ATTLISTDECL:
  680. case XML_NOTATION:
  681. case XML_GROUP:
  682. case XML_INCLUDESECT:
  683. case XML_PCDATA:
  684. case XML_CDATA:
  685. case XML_IGNORESECT:
  686. case XML_COMMENT:
  687. case XML_ENTITYREF:
  688. case XML_WHITESPACE:
  689. case XML_NAME:
  690. case XML_NMTOKEN:
  691. case XML_STRING:
  692. case XML_PEREF:
  693. case XML_MODEL:
  694. case XML_ATTDEF:
  695. case XML_ATTTYPE:
  696. case XML_ATTPRESENCE:
  697. case XML_DTDSUBSET:
  698. default:
  699. {
  700. // If we are currently in a state where the subclass has chosen
  701. // not to handle a node (and subsequently its children), then we
  702. // do not want to even bother the subclass.
  703. //
  704. //$ REVIEW:
  705. //
  706. // We do not cut this off earlier such that we can process and
  707. // know the namespaces of the unhandled nodes. Otherwise we cannot
  708. // do any XML validation of the unhandled nodes.
  709. //
  710. if (0 == m_cUnhandled)
  711. {
  712. // Call the subclass
  713. // Note that we don't need to call subclass if the it's a XML_NS node,
  714. // because we've handled all the namespaces.
  715. //
  716. Assert (pwcNamespace ||
  717. (0 == apNodeInfo[iNi]->ulNsPrefixLen) ||
  718. (apNodeInfo[iNi]->dwSubType == XML_NS));
  719. // If we see a sub type of XML_NS, this must be a XML_ATTRIBUTE
  720. //
  721. Assert ((apNodeInfo[iNi]->dwSubType != XML_NS) ||
  722. (apNodeInfo[iNi]->dwType == XML_ATTRIBUTE));
  723. sc = ScHandleNode (apNodeInfo[iNi]->dwType,
  724. apNodeInfo[iNi]->dwSubType,
  725. apNodeInfo[iNi]->fTerminal,
  726. pwcText,
  727. ulLen,
  728. ulNsPrefixLen,
  729. pwcNamespace,
  730. apNodeInfo[iNi]->ulNsPrefixLen);
  731. if (FAILED (sc))
  732. goto ret;
  733. }
  734. // Watch for UNHANDLED nodes. Any node that ends up not being handled
  735. // pushes us into a state where all we do is continue processing the
  736. // XML stream until our unhandled count (which is really a depth) goesbd
  737. // back to zero. A subclass tells us something was unhandled by passing
  738. // back S_FALSE;
  739. //
  740. if (S_FALSE == sc)
  741. {
  742. // Any type that results in an EndChildren() call
  743. // needs to add to the unhandled depth.
  744. //
  745. //$ WORKAROUND: There is a bug in the XML parser where it is
  746. // giving us a non-terminal PCDATA! Work around that here!
  747. //
  748. if (!apNodeInfo[iNi]->fTerminal && (XML_PCDATA != apNodeInfo[iNi]->dwType))
  749. {
  750. // We should only get non-terminal node info structures
  751. // as the first in the list or as attributes!
  752. //
  753. Assert ((0 == iNi) || (XML_ATTRIBUTE == apNodeInfo[iNi]->dwType));
  754. PushUnhandled ();
  755. }
  756. }
  757. // For most attributes we expect the attribute to be followed by
  758. // XML_PCDATA element, but we want to allow the empty namespace
  759. // definition xmlns:a="". This has no data element, so we need
  760. // to adjust the state for this case.
  761. //
  762. if ((ST_INDOC == m_state) && (XML_NS == apNodeInfo[iNi]->dwSubType))
  763. {
  764. // If we have sub type XML_NS, we know that it has to be of type
  765. // XML_ATTTRIBUTE.
  766. //
  767. Assert (XML_ATTRIBUTE == apNodeInfo[iNi]->dwType);
  768. // If there are no more records or if the next element is not of
  769. // type XML_PCDATA, we know we hit an empty namespace declaration.
  770. //
  771. if ((iNi == cNumRecs - 1) ||
  772. (XML_PCDATA != apNodeInfo[iNi + 1]->dwType))
  773. {
  774. m_state = ST_INATTRDATA;
  775. }
  776. }
  777. // If we just processed an attribute, then we had better transition
  778. // into the right state for processing its value.
  779. //
  780. switch (m_state)
  781. {
  782. case ST_INDOC:
  783. if (XML_ATTRIBUTE == apNodeInfo[iNi]->dwType)
  784. {
  785. //$ REVIEW: if this is the last node, that means that the
  786. // attribute value is empty. Don't transition...
  787. //
  788. if (iNi < (cNumRecs - 1))
  789. {
  790. // Remember that we have started processing an attribute.
  791. // We need to do this so that we can call the subclass to
  792. // tell them that the attribute is completed.
  793. //
  794. m_state = ST_INATTR;
  795. }
  796. //
  797. //$ REVIEW: end.
  798. }
  799. break;
  800. case ST_INATTR:
  801. // We better not get anything other than PCDATA when dealing
  802. // with attributes, otherwise it is an error.
  803. //
  804. if (XML_PCDATA == apNodeInfo[iNi]->dwType)
  805. {
  806. // We also need to close the attribute off if this is
  807. // the last node in the list, so we should fall through
  808. // below to handle the termination case.
  809. //
  810. m_state = ST_INATTRDATA;
  811. }
  812. else
  813. {
  814. // We better not get anything other than PCDATA
  815. // when dealing with attributes, otherwise it
  816. // is an error.
  817. //
  818. XmlTrace ("Xml: got something other than PC_DATA\n");
  819. sc = E_DAV_XML_PARSE_ERROR;
  820. goto ret;
  821. }
  822. /* !!! FALL THROUGH !!! */
  823. case ST_INATTRDATA:
  824. // The next node is anything but PC_DATA or this is the
  825. // last node in the list, then we need to close the current
  826. // attribute.
  827. //
  828. if ((iNi == cNumRecs - 1) ||
  829. (XML_PCDATA != apNodeInfo[iNi + 1]->dwType))
  830. {
  831. m_state = ST_INDOC;
  832. // Now that all the bits that define the node are handled
  833. // by the subclass, we can pass on the end of the attributes.
  834. //
  835. // If the subclass is handling the current context, pass
  836. // the call along
  837. //
  838. if (0 == m_cUnhandled)
  839. {
  840. sc = ScCompleteAttribute ();
  841. if (FAILED (sc))
  842. goto ret;
  843. }
  844. else
  845. {
  846. // Don't call the subclass but certainly pop the
  847. // unhandled state
  848. //
  849. PopUnhandled();
  850. }
  851. }
  852. break;
  853. }
  854. break;
  855. }
  856. }
  857. }
  858. // Complete the CreateNode() call
  859. //
  860. Assert (0 != cNumRecs);
  861. sc = ScCompleteCreateNode (apNodeInfo[0]->dwType);
  862. if (FAILED (sc))
  863. goto ret;
  864. // Assert that in a completely successful call, we are still
  865. // in the ST_INDOC state.
  866. //
  867. Assert (ST_INDOC == m_state);
  868. // Make sure that any scoping that needed to happen, happens
  869. //
  870. Assert ((NULL == pxmlnss.get()) || (0 != cNumRecs));
  871. apNodeInfo[0]->pNode = pxmlnss.relinquish();
  872. ret:
  873. return sc;
  874. }
  875. // ScNewXMLParser() ----------------------------------------------------------
  876. //
  877. SCODE
  878. ScNewXMLParser (CNodeFactory * pnf, IStream * pstm, IXMLParser ** ppxprsRet)
  879. {
  880. auto_ref_ptr<IXMLParser> pxprsNew;
  881. SCODE sc = S_OK;
  882. //$ IMPORTANT: we are trusting that IIS has initialized co
  883. // for us. We have been told by the powers that be that
  884. // we shall not init co.
  885. //
  886. // Grab an instance of the XML parser
  887. //
  888. sc = CoCreateInstance (CLSID_XMLParser,
  889. NULL,
  890. CLSCTX_INPROC_SERVER,
  891. IID_IXMLParser,
  892. reinterpret_cast<LPVOID*>(pxprsNew.load()));
  893. //
  894. //$ IMPORTANT: end
  895. if (FAILED (sc))
  896. goto ret;
  897. // Set the input to the parser
  898. //
  899. sc = pxprsNew->SetInput (pstm);
  900. if (FAILED (sc))
  901. goto ret;
  902. // Initialize the node factory
  903. //
  904. sc = pnf->ScInit();
  905. if (FAILED (sc))
  906. goto ret;
  907. // Push our node factory
  908. //
  909. sc = pxprsNew->SetFactory (pnf);
  910. if (FAILED (sc))
  911. goto ret;
  912. // Set some flags that are fairly useful
  913. //
  914. sc = pxprsNew->SetFlags (XMLFLAG_SHORTENDTAGS | XMLFLAG_NOWHITESPACE);
  915. if (FAILED (sc))
  916. goto ret;
  917. // Pass back the instantiated parser
  918. //
  919. *ppxprsRet = pxprsNew.relinquish();
  920. ret:
  921. return sc;
  922. }
  923. // ScParseXML() --------------------------------------------------------------
  924. //
  925. SCODE
  926. ScParseXML (IXMLParser* pxprs, CNodeFactory * pnf)
  927. {
  928. // Note Run() can return E_PENDING when I/O is pending
  929. // on the stream which we are parsing.
  930. //
  931. SCODE sc = pxprs->Run (-1);
  932. if (FAILED (sc) && pnf->FParserError(sc))
  933. sc = E_DAV_XML_PARSE_ERROR;
  934. return sc;
  935. }