You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
838 lines
18 KiB
838 lines
18 KiB
* X N O D E . C P P
* XML emitter processing
* Copyright 1986-1997 Microsoft Corporation, All Rights Reserved
#include "_xmllib.h"
#include <string.h>
#include <stdio.h>
// class CXNode - Emitting ---------------------------------------------------
// Our own version of WideCharToMultiByte(CP_UTF8, ...)
// UTF-8 multi-byte encoding. See Appendix A.2 of the Unicode book for
// more info.
// Unicode value 1st byte 2nd byte 3rd byte
// 000000000xxxxxxx 0xxxxxxx
// 00000yyyyyxxxxxx 110yyyyy 10xxxxxx
// zzzzyyyyyyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
VOID WideCharToUTF8Chars (WCHAR wch, BYTE * pb, UINT * pib)
Assert (pb);
Assert (pib);
UINT ib = *pib;
// single-byte: 0xxxxxxx
if (wch < 0x80)
pb[ib] = static_cast<BYTE>(wch);
// two-byte: 110xxxxx 10xxxxxx
else if (wch < 0x800)
// Because we alloc'd two extra-bytes,
// we know there is room at the tail of
// the buffer for the overflow...
pb[ib++] = static_cast<BYTE>((wch >> 6) | 0xC0);
pb[ib] = static_cast<BYTE>((wch & 0x3F) | 0x80);
// three-byte: 1110xxxx 10xxxxxx 10xxxxxx
// Because we alloc'd two extra-bytes,
// we know there is room at the tail of
// the buffer for the overflow...
pb[ib++] = static_cast<BYTE>((wch >> 12) | 0xE0);
pb[ib++] = static_cast<BYTE>(((wch >> 6) & 0x3F) | 0x80);
pb[ib] = static_cast<BYTE>((wch & 0x3F) | 0x80);
*pib = ib;
CXNode::ScAddUnicodeResponseBytes (
/* [in] */ UINT cch,
/* [in] */ LPCWSTR pcwsz)
SCODE sc = S_OK;
// Argh! We need to have a buffer to fill that is
// at least 3 bytes long for the odd occurrence of a
// single unicode char with significant bits above
// 0x7f.
UINT cb = min (cch + 2, CB_XMLBODYPART_SIZE);
// We really can handle zero bytes being sloughed into
// the buffer.
UINT ib;
UINT iwch;
CStackBuffer<BYTE,512> pb;
if (NULL == pb.resize(cb))
goto ret;
for (iwch = 0; iwch < cch; )
for (ib = 0;
(ib < cb-2) && (iwch < cch);
ib++, iwch++)
WideCharToUTF8Chars (pcwsz[iwch], pb.get(), &ib);
// Add the bytes
Assert (ib <= cb);
sc = m_pxb->ScAddTextBytes (ib, reinterpret_cast<LPSTR>(pb.get()));
if (FAILED(sc))
goto ret;
return sc;
CXNode::ScAddEscapedValueBytes (UINT cch, LPCSTR psz)
SCODE sc = S_OK;
const CHAR* pch;
const CHAR* pchLast;
for (pchLast = pch = psz; pch < psz + cch; pch++)
// Character Range
// [2] Char ::= #x9
// | #xA
// | #xD
// | [#x20-#xD7FF]
// | [#xE000-#xFFFD]
// | [#x10000-#x10FFFF]
// /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
// Valid characters also escaped in values:
// & -- escaped as &
// < -- excaped as <
// > -- excaped as >
if ('&' == *pch)
// Add the bytes up to this position
sc = m_pxb->ScAddTextBytes (static_cast<UINT>(pch - pchLast), pchLast);
if (FAILED(sc))
goto ret;
// Add the escape sequence
sc = m_pxb->ScAddTextBytes (CchConstString(gc_szAmp), gc_szAmp);
if (FAILED(sc))
goto ret;
// Update pchLast to account for what has been emitted
pchLast = pch + 1;
else if ('<' == *pch)
// Add the bytes up to this position
sc = m_pxb->ScAddTextBytes (static_cast<UINT>(pch - pchLast), pchLast);
if (FAILED(sc))
goto ret;
// Add the escape sequence
sc = m_pxb->ScAddTextBytes (CchConstString(gc_szLessThan), gc_szLessThan);
if (FAILED(sc))
goto ret;
// Update pchLast to account for what has been emitted
pchLast = pch + 1;
else if ('>' == *pch)
// Add the bytes up to this position
sc = m_pxb->ScAddTextBytes (static_cast<UINT>(pch - pchLast), pchLast);
if (FAILED(sc))
goto ret;
// Add the escape sequence
sc = m_pxb->ScAddTextBytes (CchConstString(gc_szGreaterThan), gc_szGreaterThan);
if (FAILED(sc))
goto ret;
// Update pchLast to account for what has been emitted
pchLast = pch + 1;
else if ( (0x9 > static_cast<BYTE>(*pch))
|| (0xB == *pch)
|| (0xC == *pch)
|| ((0x20 > *pch) && (0xD < *pch)))
char rgch[10];
// Add the bytes up to this position
sc = m_pxb->ScAddTextBytes (static_cast<UINT>(pch - pchLast), pchLast);
if (FAILED(sc))
goto ret;
// Add the escape sequence...
sprintf (rgch, "&#x%02X;", *pch);
Assert (strlen(rgch) == CchConstString("�"));
sc = m_pxb->ScAddTextBytes (CchConstString("�"), rgch);
if (FAILED(sc))
goto ret;
pchLast = pch + 1;
else if (pch - pchLast + 1 >= CB_XMLBODYPART_SIZE)
// Break up if the bodyparts gets too big
sc = m_pxb->ScAddTextBytes (static_cast<UINT>(pch - pchLast + 1), pchLast);
if (FAILED(sc))
goto ret;
pchLast = pch + 1;
// Add any remaining bytes
sc = m_pxb->ScAddTextBytes (static_cast<UINT>(pch - pchLast), pchLast);
if (FAILED(sc))
goto ret;
return sc;
CXNode::ScAddEscapedAttributeBytes (UINT cch, LPCSTR psz)
SCODE sc = S_OK;
const CHAR* pch;
const CHAR* pchLast;
for (pchLast = pch = psz; pch < psz + cch; pch++)
// Characters escaped in values:
// & -- escaped as &
// " -- excaped as "
if ('&' == *pch)
// Add the bytes up to this position
sc = m_pxb->ScAddTextBytes (static_cast<UINT>(pch - pchLast), pchLast);
if (FAILED(sc))
goto ret;
// Add the escape sequence
sc = m_pxb->ScAddTextBytes (CchConstString(gc_szAmp), gc_szAmp);
if (FAILED(sc))
goto ret;
// Update pchLast to account for what has been emitted
pchLast = pch + 1;
else if ('"' == *pch)
// Add the bytes up to this position
sc = m_pxb->ScAddTextBytes (static_cast<UINT>(pch - pchLast), pchLast);
if (FAILED(sc))
goto ret;
// Add the escape sequence
sc = m_pxb->ScAddTextBytes (CchConstString(gc_szQuote), gc_szQuote);
if (FAILED(sc))
goto ret;
// Update pchLast to account for what has been emitted
pchLast = pch + 1;
else if ((0x9 > static_cast<BYTE>(*pch))
|| (0xB == *pch)
|| (0xC == *pch)
|| ((0x20 > *pch) && (0xD < *pch)))
char rgch[10];
// Add the bytes up to this position
sc = m_pxb->ScAddTextBytes (static_cast<UINT>(pch - pchLast), pchLast);
if (FAILED(sc))
goto ret;
// Add the escape sequence...
sprintf (rgch, "&#x%02X;", *pch);
Assert (strlen(rgch) == CchConstString("�"));
sc = m_pxb->ScAddTextBytes (CchConstString("�"), rgch);
if (FAILED(sc))
goto ret;
pchLast = pch + 1;
// Add any remaining bytes
sc = m_pxb->ScAddTextBytes (static_cast<UINT>(pch - pchLast), pchLast);
if (FAILED(sc))
goto ret;
return sc;
// class CXNode - Construction -----------------------------------------------
CXNode::ScWriteTagName ()
SCODE sc = S_OK;
// If there is a namespace associated with this node,
// when writing out the tag name, add the alias and a
// separator to the data stream.
if (m_pns.get() && m_pns->CchAlias())
// Add the alias
sc = ScAddUnicodeResponseBytes (m_pns->CchAlias(), m_pns->PszAlias());
if (FAILED(sc))
goto ret;
// Add in the separator
sc = m_pxb->ScAddTextBytes(1, &gc_chColon);
if (FAILED(sc))
goto ret;
// Write the tag
Assert (m_pwszTagEscaped.get());
sc = ScAddUnicodeResponseBytes (m_cchTagEscaped, m_pwszTagEscaped.get());
if (FAILED(sc))
goto ret;
return sc;
CXNode::ScSetTag (CXMLEmitter* pmsr, UINT cchTag, LPCWSTR pwszTag)
LPCWSTR pwszName = pwszTag;
SCODE sc = S_OK;
BOOL fAddNmspc = FALSE;
UINT cch = 0;
UINT cchName = 0;
UINT cchTagEscaped = 64;
auto_heap_ptr<WCHAR> pwszTagEscaped;
// Namespace nodes do not have a namespace associated with them,
// so don't even bother looking...
switch (m_xnt)
// See if a namespace applies to this tag
cch = CchNmspcFromTag (cchTag, pwszTag, &pwszName);
if (0 == cch)
m_fHasEmptyNamespace = TRUE;
// Find the namespace to use
sc = pmsr->ScFindNmspc (pwszTag, cch, m_pns);
if (FAILED (sc))
goto ret;
// If a new namespace is added in the local namespace
// cache, make sure we emit it in the node
//$NOTE: this is how we handle pilot namespace, this is
//$NOTE: is NOT the normal way of handling namespaces. All
//$NOTE: common namespaces should be preloaded.
fAddNmspc = (sc == S_FALSE);
// We should have preloaded all namespaces. The pilot
// namespace is handled here to avoid emitting invalid
// xml. But we should look into the reason why the pilot
// namespace comes up. so assert here.
// Note that this assert should be removed if we decide
// we want to leave uncommon namesapces not preloaded and
// expect them to be treated as pilot namespaces.
AssertSz(!fAddNmspc, "Pilot namespace found, safe to ingore,"
"but please raid against HTTP-DAV");
// Record the new tag and\or its length
// NOTE: the item that goes into the tag cache is the name
// of the property with the namespace stripped off. This is
// important to know when doing searches in the tag cache.
cchName = static_cast<UINT>(pwszTag + cchTag - pwszName);
if (0 == cchName )
// We really need to have a tag that has a value. Empty
// tags produce invalid XML.
goto ret;
sc = CXAtomCache::ScCacheAtom (&pwszName, cchName);
if (FAILED (sc))
goto ret;
// ScSetTag shouldn't have been called for this node.
Assert (!m_pwszTagEscaped.get());
// Allocate buffer for the property tag.
pwszTagEscaped = static_cast<WCHAR*>(ExAlloc(CbSizeWsz(cchTagEscaped)));
if (!pwszTagEscaped.get())
goto ret;
// Escape the tag name as required.
// If we have an empty namespace, we need to impose additional
// restrictions on the first character of the property name because
// it will be the first character of the xml node, and the first
// character of an xml node can only be a letter or an underscore
// (numbers, etc. are not allowed).
// Note: This will disallow an xml node <123> because it is invalid
// xml, but it will ALLOW the xml node <a:123> even though this is
// also invalid. This is by design because most xml parsers will handle
// this appropriately, and it makes more sense to clients.
sc = ScEscapePropertyName (pwszName, cchName, pwszTagEscaped.get(), &cchTagEscaped, m_fHasEmptyNamespace);
if (S_FALSE == sc)
pwszTagEscaped = static_cast<WCHAR*>(ExAlloc(CbSizeWsz(cchTagEscaped)));
if (!pwszTagEscaped.get())
goto ret;
sc = ScEscapePropertyName (pwszName, cchName, pwszTagEscaped.get(), &cchTagEscaped, m_fHasEmptyNamespace);
Assert (S_OK == sc);
m_pwszTagEscaped = pwszTagEscaped.relinquish();
m_cchTagEscaped = cchTagEscaped;
// Start a new node if XN_ELEMENT
if (m_xnt == XN_ELEMENT)
sc = m_pxb->ScAddTextBytes (1, "<");
if (FAILED(sc))
goto ret;
sc = ScWriteTagName();
if (FAILED(sc))
goto ret;
if (fAddNmspc)
// Add the namespace attribute in the node if necessary
sc = pmsr->ScAddNmspc (m_pns, this);
if (FAILED(sc))
goto ret;
// Save the emitter which can be used later to remove the temporary nmspc
m_pmsr = pmsr;
return sc;
CXNode::ScDone ()
SCODE sc = S_OK;
// This method should never be called twice
Assert (!m_fDone);
switch (m_xnt)
if (!m_pwszTagEscaped.get())
//$ RAID: 85824: When an invalid property name is unpacked,
// ScSetTag will fail with E_DAV_INVALID_PROPERTY_NAME.
// Usuallly, the client will fail when it sees any error
// from CXNode methods, but in this case it may choose to
// continue and ignore this node completely.
// For us, it's safe to not to emit anything when no tag name
// is available.
//$RAID: 85824
if (m_fNodeOpen)
// Node is open, so emit a complete closing node
// </tag>
sc = m_pxb->ScAddTextBytes (2, "</");
if (FAILED(sc))
goto ret;
// Add tag
sc = ScWriteTagName();
if (FAILED(sc))
goto ret;
// closing
sc = m_pxb->ScAddTextBytes (1, ">");
if (FAILED(sc))
goto ret;
// Close directly
sc = m_pxb->ScAddTextBytes (2, "/>");
if (FAILED(sc))
goto ret;
// Namespace nodes, should not have a namespace associated with
// them.
Assert (NULL == m_pns.get());
// Otherwise treat it at an attribute -- and fall through
Assert (m_pwszTagEscaped.get());
// Remove the pilot namespace from global cache
if (m_pmsr)
m_fDone = TRUE;
return sc;
CXNode::ScSetFormatedXML (LPCSTR pszValue, UINT cch)
SCODE sc = S_OK;
Assert (m_xnt == XN_ELEMENT);
if (!m_fNodeOpen)
// We must have written the tag name
Assert (m_pwszTagEscaped.get());
// Now that we are adding value to the element node
// We should write the node open
sc = m_pxb->ScAddTextBytes (1, ">");
if (FAILED(sc))
goto ret;
m_fNodeOpen = TRUE;
// Add the value directly
sc = m_pxb->ScAddTextBytes (cch, pszValue);
if (FAILED(sc))
goto ret;
return sc;
CXNode::ScSetFormatedXML (LPCWSTR pwszValue, UINT cch)
SCODE sc = S_OK;
Assert (m_xnt == XN_ELEMENT);
if (!m_fNodeOpen)
// We must have written the tag name
Assert (m_pwszTagEscaped.get());
// Now that we are adding value to the element node
// We should write the node open
sc = m_pxb->ScAddTextBytes (1, ">");
if (FAILED(sc))
goto ret;
m_fNodeOpen = TRUE;
// Add the value directly
sc = ScAddUnicodeResponseBytes (cch, pwszValue);
if (FAILED(sc))
goto ret;
return sc;
CXNode::ScSetUTF8Value (LPCSTR pszValue, UINT cch)
SCODE sc = S_OK;
switch (m_xnt)
if (!m_fNodeOpen)
// We must have written the tag name
Assert (m_pwszTagEscaped.get());
// Now that we are adding value to the element node
// We should write the node open
sc = m_pxb->ScAddTextBytes (1, ">");
if (FAILED(sc))
goto ret;
m_fNodeOpen = TRUE;
// Write the value
sc = ScAddEscapedValueBytes (cch, pszValue);
if (FAILED(sc))
goto ret;
// Write the value directly
sc = ScAddEscapedAttributeBytes (cch, pszValue);
if (FAILED(sc))
goto ret;
return sc;
CXNode::ScSetValue (LPCSTR pszValue, UINT cch)
// Ok, against all better judgement, we need to take this
// multi-byte string and convert it to unicode before doing
// any UTF8 processing on it.
// Translations from multibyte to unicode, can never grow in
// character counts, so we are relatively safe allocating this
// on the stack.
UINT cchUnicode;
CStackBuffer<WCHAR,512> pwsz;
if (NULL == pwsz.resize(CbSizeWsz(cch)))
cchUnicode = MultiByteToWideChar (GetACP(),
cch + 1);
// Terminate the string
Assert ((0 == cchUnicode) || (0 != *(pwsz.get() + cchUnicode - 1)));
*(pwsz.get() + cchUnicode) = 0;
// Set the value
return ScSetValue (pwsz.get(), cchUnicode);
CXNode::ScSetValue (LPCWSTR pcwsz, UINT cch)
SCODE sc = S_OK;
// Argh! We need to have a buffer to fill that is
// at least 3 bytes long for the odd occurrence of a
// single unicode char with significant bits above
// 0x7f.
// Note that when the value
UINT cb = min (cch + 2, CB_XMLBODYPART_SIZE);
// We really can handle zero bytes being sloughed into
// the buffer.
UINT ib;
UINT iwch;
CStackBuffer<BYTE,512> pb;
if (NULL == pb.resize(cb))
goto ret;
for (iwch = 0; iwch < cch; )
for (ib = 0; (ib < cb-2) && (iwch < cch); ib++, iwch++)
WideCharToUTF8Chars (pcwsz[iwch], pb.get(), &ib);
// Add the bytes
Assert (ib <= cb);
sc = ScSetUTF8Value (reinterpret_cast<LPSTR>(pb.get()), ib);
if (FAILED(sc))
goto ret;
return sc;
CXNode::ScGetChildNode (XNT xntType, CXNode **ppxnChild)
SCODE sc = S_OK;
auto_ref_ptr<CXNode> pxn;
Assert (ppxnChild);
if (XN_ELEMENT == xntType)
// Now that new element child node is added, then this node is done open.
// i.e close by ">", instead of "/>"
if (!m_fNodeOpen)
sc = m_pxb->ScAddTextBytes (1, ">");
if (FAILED(sc))
goto ret;
// Then this node is an open node
m_fNodeOpen = TRUE;
Assert ((XN_ATTRIBUTE == xntType) || (XN_NAMESPACE == xntType));
// Create the child node
pxn.take_ownership (new CXNode(xntType, m_pxb));
if (!pxn.get())
goto ret;
// Pass back
*ppxnChild = pxn.relinquish();
return sc;