/* * X N O D E . C P P * * XML emitter processing * * Copyright 1986-1997 Microsoft Corporation, All Rights Reserved */ #include "_xmllib.h" #include #include // class CXNode - Emitting --------------------------------------------------- // // Our own version of WideCharToMultiByte(CP_UTF8, ...) // // UTF-8 multi-byte encoding. See Appendix A.2 of the Unicode book for // more info. // // Unicode value 1st byte 2nd byte 3rd byte // 000000000xxxxxxx 0xxxxxxx // 00000yyyyyxxxxxx 110yyyyy 10xxxxxx // zzzzyyyyyyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx // inline VOID WideCharToUTF8Chars (WCHAR wch, BYTE * pb, UINT * pib) { Assert (pb); Assert (pib); UINT ib = *pib; // single-byte: 0xxxxxxx // if (wch < 0x80) { pb[ib] = static_cast(wch); } // // two-byte: 110xxxxx 10xxxxxx // else if (wch < 0x800) { // Because we alloc'd two extra-bytes, // we know there is room at the tail of // the buffer for the overflow... // pb[ib++] = static_cast((wch >> 6) | 0xC0); pb[ib] = static_cast((wch & 0x3F) | 0x80); } // // three-byte: 1110xxxx 10xxxxxx 10xxxxxx // else { // Because we alloc'd two extra-bytes, // we know there is room at the tail of // the buffer for the overflow... // pb[ib++] = static_cast((wch >> 12) | 0xE0); pb[ib++] = static_cast(((wch >> 6) & 0x3F) | 0x80); pb[ib] = static_cast((wch & 0x3F) | 0x80); } *pib = ib; } SCODE CXNode::ScAddUnicodeResponseBytes ( /* [in] */ UINT cch, /* [in] */ LPCWSTR pcwsz) { SCODE sc = S_OK; // Argh! We need to have a buffer to fill that is // at least 3 bytes long for the odd occurrence of a // single unicode char with significant bits above // 0x7f. // UINT cb = min (cch + 2, CB_XMLBODYPART_SIZE); // We really can handle zero bytes being sloughed into // the buffer. // UINT ib; UINT iwch; CStackBuffer pb; if (NULL == pb.resize(cb)) { sc = E_OUTOFMEMORY; goto ret; } for (iwch = 0; iwch < cch; ) { for (ib = 0; (ib < cb-2) && (iwch < cch); ib++, iwch++) { WideCharToUTF8Chars (pcwsz[iwch], pb.get(), &ib); } // Add the bytes // Assert (ib <= cb); sc = m_pxb->ScAddTextBytes (ib, reinterpret_cast(pb.get())); if (FAILED(sc)) goto ret; } ret: return sc; } SCODE CXNode::ScAddEscapedValueBytes (UINT cch, LPCSTR psz) { SCODE sc = S_OK; const CHAR* pch; const CHAR* pchLast; for (pchLast = pch = psz; pch < psz + cch; pch++) { // Character Range // [2] Char ::= #x9 // | #xA // | #xD // | [#x20-#xD7FF] // | [#xE000-#xFFFD] // | [#x10000-#x10FFFF] // // /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */ // // Valid characters also escaped in values: // // & -- escaped as & // < -- excaped as < // > -- excaped as > // if ('&' == *pch) { // Add the bytes up to this position // sc = m_pxb->ScAddTextBytes (static_cast(pch - pchLast), pchLast); if (FAILED(sc)) goto ret; // Add the escape sequence // sc = m_pxb->ScAddTextBytes (CchConstString(gc_szAmp), gc_szAmp); if (FAILED(sc)) goto ret; // Update pchLast to account for what has been emitted // pchLast = pch + 1; } else if ('<' == *pch) { // Add the bytes up to this position // sc = m_pxb->ScAddTextBytes (static_cast(pch - pchLast), pchLast); if (FAILED(sc)) goto ret; // Add the escape sequence // sc = m_pxb->ScAddTextBytes (CchConstString(gc_szLessThan), gc_szLessThan); if (FAILED(sc)) goto ret; // Update pchLast to account for what has been emitted // pchLast = pch + 1; } else if ('>' == *pch) { // Add the bytes up to this position // sc = m_pxb->ScAddTextBytes (static_cast(pch - pchLast), pchLast); if (FAILED(sc)) goto ret; // Add the escape sequence // sc = m_pxb->ScAddTextBytes (CchConstString(gc_szGreaterThan), gc_szGreaterThan); if (FAILED(sc)) goto ret; // Update pchLast to account for what has been emitted // pchLast = pch + 1; } else if ( (0x9 > static_cast(*pch)) || (0xB == *pch) || (0xC == *pch) || ((0x20 > *pch) && (0xD < *pch))) { char rgch[10]; // Add the bytes up to this position // sc = m_pxb->ScAddTextBytes (static_cast(pch - pchLast), pchLast); if (FAILED(sc)) goto ret; // Add the escape sequence... // sprintf (rgch, "&#x%02X;", *pch); Assert (strlen(rgch) == CchConstString("�")); sc = m_pxb->ScAddTextBytes (CchConstString("�"), rgch); if (FAILED(sc)) goto ret; pchLast = pch + 1; } else if (pch - pchLast + 1 >= CB_XMLBODYPART_SIZE) { // Break up if the bodyparts gets too big // sc = m_pxb->ScAddTextBytes (static_cast(pch - pchLast + 1), pchLast); if (FAILED(sc)) goto ret; pchLast = pch + 1; } } // Add any remaining bytes // sc = m_pxb->ScAddTextBytes (static_cast(pch - pchLast), pchLast); if (FAILED(sc)) goto ret; ret: return sc; } SCODE CXNode::ScAddEscapedAttributeBytes (UINT cch, LPCSTR psz) { SCODE sc = S_OK; const CHAR* pch; const CHAR* pchLast; for (pchLast = pch = psz; pch < psz + cch; pch++) { // Characters escaped in values: // // & -- escaped as & // " -- excaped as " // if ('&' == *pch) { // Add the bytes up to this position // sc = m_pxb->ScAddTextBytes (static_cast(pch - pchLast), pchLast); if (FAILED(sc)) goto ret; // Add the escape sequence // sc = m_pxb->ScAddTextBytes (CchConstString(gc_szAmp), gc_szAmp); if (FAILED(sc)) goto ret; // Update pchLast to account for what has been emitted // pchLast = pch + 1; } else if ('"' == *pch) { // Add the bytes up to this position // sc = m_pxb->ScAddTextBytes (static_cast(pch - pchLast), pchLast); if (FAILED(sc)) goto ret; // Add the escape sequence // sc = m_pxb->ScAddTextBytes (CchConstString(gc_szQuote), gc_szQuote); if (FAILED(sc)) goto ret; // Update pchLast to account for what has been emitted // pchLast = pch + 1; } else if ((0x9 > static_cast(*pch)) || (0xB == *pch) || (0xC == *pch) || ((0x20 > *pch) && (0xD < *pch))) { char rgch[10]; // Add the bytes up to this position // sc = m_pxb->ScAddTextBytes (static_cast(pch - pchLast), pchLast); if (FAILED(sc)) goto ret; // Add the escape sequence... // sprintf (rgch, "&#x%02X;", *pch); Assert (strlen(rgch) == CchConstString("�")); sc = m_pxb->ScAddTextBytes (CchConstString("�"), rgch); if (FAILED(sc)) goto ret; pchLast = pch + 1; } } // Add any remaining bytes // sc = m_pxb->ScAddTextBytes (static_cast(pch - pchLast), pchLast); if (FAILED(sc)) goto ret; ret: return sc; } // class CXNode - Construction ----------------------------------------------- // SCODE CXNode::ScWriteTagName () { SCODE sc = S_OK; // If there is a namespace associated with this node, // when writing out the tag name, add the alias and a // separator to the data stream. // if (m_pns.get() && m_pns->CchAlias()) { // Add the alias // sc = ScAddUnicodeResponseBytes (m_pns->CchAlias(), m_pns->PszAlias()); if (FAILED(sc)) goto ret; // Add in the separator // sc = m_pxb->ScAddTextBytes(1, &gc_chColon); if (FAILED(sc)) goto ret; } // Write the tag // Assert (m_pwszTagEscaped.get()); sc = ScAddUnicodeResponseBytes (m_cchTagEscaped, m_pwszTagEscaped.get()); if (FAILED(sc)) goto ret; ret: return sc; } SCODE CXNode::ScSetTag (CXMLEmitter* pmsr, UINT cchTag, LPCWSTR pwszTag) { LPCWSTR pwszName = pwszTag; SCODE sc = S_OK; BOOL fAddNmspc = FALSE; UINT cch = 0; UINT cchName = 0; UINT cchTagEscaped = 64; auto_heap_ptr pwszTagEscaped; // Namespace nodes do not have a namespace associated with them, // so don't even bother looking... // switch (m_xnt) { case XN_ELEMENT: case XN_ATTRIBUTE: // See if a namespace applies to this tag // cch = CchNmspcFromTag (cchTag, pwszTag, &pwszName); if (0 == cch) { m_fHasEmptyNamespace = TRUE; } else { // Find the namespace to use // sc = pmsr->ScFindNmspc (pwszTag, cch, m_pns); if (FAILED (sc)) goto ret; // If a new namespace is added in the local namespace // cache, make sure we emit it in the node // //$NOTE: this is how we handle pilot namespace, this is //$NOTE: is NOT the normal way of handling namespaces. All //$NOTE: common namespaces should be preloaded. //$NOTE: // fAddNmspc = (sc == S_FALSE); // We should have preloaded all namespaces. The pilot // namespace is handled here to avoid emitting invalid // xml. But we should look into the reason why the pilot // namespace comes up. so assert here. // // Note that this assert should be removed if we decide // we want to leave uncommon namesapces not preloaded and // expect them to be treated as pilot namespaces. // AssertSz(!fAddNmspc, "Pilot namespace found, safe to ingore," "but please raid against HTTP-DAV"); } break; case XN_NAMESPACE: break; } // Record the new tag and\or its length // // NOTE: the item that goes into the tag cache is the name // of the property with the namespace stripped off. This is // important to know when doing searches in the tag cache. // cchName = static_cast(pwszTag + cchTag - pwszName); if (0 == cchName ) { // We really need to have a tag that has a value. Empty // tags produce invalid XML. // sc = E_DAV_INVALID_PROPERTY_NAME; goto ret; } sc = CXAtomCache::ScCacheAtom (&pwszName, cchName); if (FAILED (sc)) goto ret; // ScSetTag shouldn't have been called for this node. // Assert (!m_pwszTagEscaped.get()); // Allocate buffer for the property tag. // pwszTagEscaped = static_cast(ExAlloc(CbSizeWsz(cchTagEscaped))); if (!pwszTagEscaped.get()) { sc = E_OUTOFMEMORY; goto ret; } // Escape the tag name as required. // // If we have an empty namespace, we need to impose additional // restrictions on the first character of the property name because // it will be the first character of the xml node, and the first // character of an xml node can only be a letter or an underscore // (numbers, etc. are not allowed). // // Note: This will disallow an xml node <123> because it is invalid // xml, but it will ALLOW the xml node even though this is // also invalid. This is by design because most xml parsers will handle // this appropriately, and it makes more sense to clients. // sc = ScEscapePropertyName (pwszName, cchName, pwszTagEscaped.get(), &cchTagEscaped, m_fHasEmptyNamespace); if (S_FALSE == sc) { pwszTagEscaped.clear(); pwszTagEscaped = static_cast(ExAlloc(CbSizeWsz(cchTagEscaped))); if (!pwszTagEscaped.get()) { sc = E_OUTOFMEMORY; goto ret; } sc = ScEscapePropertyName (pwszName, cchName, pwszTagEscaped.get(), &cchTagEscaped, m_fHasEmptyNamespace); Assert (S_OK == sc); } m_pwszTagEscaped = pwszTagEscaped.relinquish(); m_cchTagEscaped = cchTagEscaped; // Start a new node if XN_ELEMENT // if (m_xnt == XN_ELEMENT) { sc = m_pxb->ScAddTextBytes (1, "<"); if (FAILED(sc)) goto ret; } sc = ScWriteTagName(); if (FAILED(sc)) goto ret; if (fAddNmspc) { // Add the namespace attribute in the node if necessary // sc = pmsr->ScAddNmspc (m_pns, this); if (FAILED(sc)) goto ret; // Save the emitter which can be used later to remove the temporary nmspc // m_pmsr = pmsr; } ret: return sc; } SCODE CXNode::ScDone () { SCODE sc = S_OK; // This method should never be called twice // Assert (!m_fDone); switch (m_xnt) { case XN_ELEMENT: if (!m_pwszTagEscaped.get()) { //$ RAID: 85824: When an invalid property name is unpacked, // ScSetTag will fail with E_DAV_INVALID_PROPERTY_NAME. // // Usuallly, the client will fail when it sees any error // from CXNode methods, but in this case it may choose to // continue and ignore this node completely. // // For us, it's safe to not to emit anything when no tag name // is available. // break; // //$RAID: 85824 } if (m_fNodeOpen) { // Node is open, so emit a complete closing node // // sc = m_pxb->ScAddTextBytes (2, "ScAddTextBytes (1, ">"); if (FAILED(sc)) goto ret; } else { // Close directly // sc = m_pxb->ScAddTextBytes (2, "/>"); if (FAILED(sc)) goto ret; } break; case XN_NAMESPACE: // Namespace nodes, should not have a namespace associated with // them. // Assert (NULL == m_pns.get()); // // Otherwise treat it at an attribute -- and fall through case XN_ATTRIBUTE: Assert (m_pwszTagEscaped.get()); break; } // Remove the pilot namespace from global cache // if (m_pmsr) m_pmsr->RemovePersisted(m_pns); m_fDone = TRUE; ret: return sc; } SCODE CXNode::ScSetFormatedXML (LPCSTR pszValue, UINT cch) { SCODE sc = S_OK; Assert (m_xnt == XN_ELEMENT); if (!m_fNodeOpen) { // We must have written the tag name // Assert (m_pwszTagEscaped.get()); // Now that we are adding value to the element node // We should write the node open // sc = m_pxb->ScAddTextBytes (1, ">"); if (FAILED(sc)) goto ret; m_fNodeOpen = TRUE; } // Add the value directly // sc = m_pxb->ScAddTextBytes (cch, pszValue); if (FAILED(sc)) goto ret; ret: return sc; } SCODE CXNode::ScSetFormatedXML (LPCWSTR pwszValue, UINT cch) { SCODE sc = S_OK; Assert (m_xnt == XN_ELEMENT); if (!m_fNodeOpen) { // We must have written the tag name // Assert (m_pwszTagEscaped.get()); // Now that we are adding value to the element node // We should write the node open // sc = m_pxb->ScAddTextBytes (1, ">"); if (FAILED(sc)) goto ret; m_fNodeOpen = TRUE; } // Add the value directly // sc = ScAddUnicodeResponseBytes (cch, pwszValue); if (FAILED(sc)) goto ret; ret: return sc; } SCODE CXNode::ScSetUTF8Value (LPCSTR pszValue, UINT cch) { SCODE sc = S_OK; switch (m_xnt) { case XN_ELEMENT: if (!m_fNodeOpen) { // We must have written the tag name // Assert (m_pwszTagEscaped.get()); // Now that we are adding value to the element node // We should write the node open // sc = m_pxb->ScAddTextBytes (1, ">"); if (FAILED(sc)) goto ret; m_fNodeOpen = TRUE; } // Write the value // sc = ScAddEscapedValueBytes (cch, pszValue); if (FAILED(sc)) goto ret; break; case XN_NAMESPACE: case XN_ATTRIBUTE: // Write the value directly // sc = ScAddEscapedAttributeBytes (cch, pszValue); if (FAILED(sc)) goto ret; break; } ret: return sc; } SCODE CXNode::ScSetValue (LPCSTR pszValue, UINT cch) { // Ok, against all better judgement, we need to take this // multi-byte string and convert it to unicode before doing // any UTF8 processing on it. // // Translations from multibyte to unicode, can never grow in // character counts, so we are relatively safe allocating this // on the stack. // UINT cchUnicode; CStackBuffer pwsz; if (NULL == pwsz.resize(CbSizeWsz(cch))) return E_OUTOFMEMORY; cchUnicode = MultiByteToWideChar (GetACP(), 0, pszValue, cch, pwsz.get(), cch + 1); // Terminate the string // Assert ((0 == cchUnicode) || (0 != *(pwsz.get() + cchUnicode - 1))); *(pwsz.get() + cchUnicode) = 0; // Set the value // return ScSetValue (pwsz.get(), cchUnicode); } SCODE CXNode::ScSetValue (LPCWSTR pcwsz, UINT cch) { SCODE sc = S_OK; // Argh! We need to have a buffer to fill that is // at least 3 bytes long for the odd occurrence of a // single unicode char with significant bits above // 0x7f. // Note that when the value UINT cb = min (cch + 2, CB_XMLBODYPART_SIZE); // We really can handle zero bytes being sloughed into // the buffer. // UINT ib; UINT iwch; CStackBuffer pb; if (NULL == pb.resize(cb)) { sc = E_OUTOFMEMORY; goto ret; } for (iwch = 0; iwch < cch; ) { for (ib = 0; (ib < cb-2) && (iwch < cch); ib++, iwch++) WideCharToUTF8Chars (pcwsz[iwch], pb.get(), &ib); // Add the bytes // Assert (ib <= cb); sc = ScSetUTF8Value (reinterpret_cast(pb.get()), ib); if (FAILED(sc)) goto ret; } ret: return sc; } SCODE CXNode::ScGetChildNode (XNT xntType, CXNode **ppxnChild) { SCODE sc = S_OK; auto_ref_ptr pxn; Assert (ppxnChild); if (XN_ELEMENT == xntType) { // Now that new element child node is added, then this node is done open. // i.e close by ">", instead of "/>" // if (!m_fNodeOpen) { sc = m_pxb->ScAddTextBytes (1, ">"); if (FAILED(sc)) goto ret; // Then this node is an open node // m_fNodeOpen = TRUE; } } else { Assert ((XN_ATTRIBUTE == xntType) || (XN_NAMESPACE == xntType)); } // Create the child node // pxn.take_ownership (new CXNode(xntType, m_pxb)); if (!pxn.get()) { sc = E_OUTOFMEMORY; goto ret; } // Pass back // *ppxnChild = pxn.relinquish(); ret: return sc; }