You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
806 lines
21 KiB
806 lines
21 KiB
#pragma once
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
typedef unsigned char BYTE;
|
|
typedef BYTE *PBYTE;
|
|
|
|
|
|
|
|
//
|
|
// These "raw tokens" are the stuff that comes out of the
|
|
// base tokenization engine. Special characters are given
|
|
// names, a 'special character' being one that is called out
|
|
// anywhere in the XML spec as having a meaning other than
|
|
// text.
|
|
//
|
|
typedef enum
|
|
{
|
|
NTXML_RAWTOKEN_ERROR,
|
|
NTXML_RAWTOKEN_DASH,
|
|
NTXML_RAWTOKEN_DOT,
|
|
NTXML_RAWTOKEN_END_OF_STREAM,
|
|
NTXML_RAWTOKEN_EQUALS,
|
|
NTXML_RAWTOKEN_FORWARDSLASH,
|
|
NTXML_RAWTOKEN_GT,
|
|
NTXML_RAWTOKEN_LT,
|
|
NTXML_RAWTOKEN_QUESTIONMARK,
|
|
NTXML_RAWTOKEN_QUOTE,
|
|
NTXML_RAWTOKEN_DOUBLEQUOTE,
|
|
NTXML_RAWTOKEN_START_OF_STREAM,
|
|
NTXML_RAWTOKEN_TEXT,
|
|
NTXML_RAWTOKEN_WHITESPACE,
|
|
NTXML_RAWTOKEN_OPENBRACKET,
|
|
NTXML_RAWTOKEN_CLOSEBRACKET,
|
|
NTXML_RAWTOKEN_BANG,
|
|
NTXML_RAWTOKEN_OPENCURLY,
|
|
NTXML_RAWTOKEN_CLOSECURLY,
|
|
NTXML_RAWTOKEN_COLON,
|
|
NTXML_RAWTOKEN_SEMICOLON,
|
|
NTXML_RAWTOKEN_UNDERSCORE,
|
|
NTXML_RAWTOKEN_AMPERSTAND,
|
|
NTXML_RAWTOKEN_POUNDSIGN
|
|
} NTXML_RAW_TOKEN;
|
|
|
|
|
|
typedef enum {
|
|
|
|
XMLEF_UNKNOWN = 0,
|
|
XMLEF_UCS_4_LE,
|
|
XMLEF_UCS_4_BE,
|
|
XMLEF_UTF_16_LE,
|
|
XMLEF_UTF_16_BE,
|
|
XMLEF_UTF_8_OR_ASCII
|
|
|
|
} XML_ENCODING_FAMILY;
|
|
|
|
|
|
typedef struct _XML_EXTENT {
|
|
PVOID pvData; // Pointer into the original XML document
|
|
SIZE_T cbData; // Byte count from the extent base
|
|
XML_ENCODING_FAMILY Encoding; // Encoding family for faster decoding
|
|
ULONG ulCharacters; // Character count in this extent
|
|
}
|
|
XML_EXTENT, *PXML_EXTENT;
|
|
|
|
typedef const struct _XML_EXTENT * PCXML_EXTENT;
|
|
|
|
|
|
//
|
|
// Clients of the raw tokenizer should provide a "next character"
|
|
// functionality. This way, the tokenization engine doesn't need
|
|
// to know anything about how to get the next thing out of a pvoid
|
|
// blob of data, allowing for compressed streams, multiple encodings,
|
|
// etc.
|
|
//
|
|
typedef ULONG (__fastcall *NTXMLRAWNEXTCHARACTER)(
|
|
struct _XML_RAWTOKENIZATION_STATE* pContext
|
|
);
|
|
|
|
typedef struct _XML_SPECIAL_STRING {
|
|
//
|
|
// UNICODE representation of the string
|
|
//
|
|
WCHAR *wszStringText;
|
|
SIZE_T cchwszStringText;
|
|
}
|
|
XML_SPECIAL_STRING, *PXML_SPECIAL_STRING;
|
|
|
|
typedef const struct _XML_SPECIAL_STRING *PCXML_SPECIAL_STRING;
|
|
|
|
#define EMPTY_SPECIAL_STRING { NULL, 0 }
|
|
#define MAKE_SPECIAL_STRING(str) { L##str, NUMBER_OF(L##str) - 1 }
|
|
|
|
|
|
|
|
extern XML_SPECIAL_STRING xss_CDATA;
|
|
extern XML_SPECIAL_STRING xss_xml;
|
|
extern XML_SPECIAL_STRING xss_encoding;
|
|
extern XML_SPECIAL_STRING xss_standalone;
|
|
extern XML_SPECIAL_STRING xss_version;
|
|
|
|
//
|
|
// A 'raw' token is more or less a run of bytes in the XML that is given
|
|
// a name. The low-level tokenizer returns these as it runs, and assumes
|
|
// that the higher-level tokenizer knows how to turn groups of these into
|
|
// productions, and from there the lexer knows how to turn groups of the
|
|
// real tokens into meaning.
|
|
//
|
|
typedef struct _XML_RAW_TOKEN
|
|
{
|
|
//
|
|
// This is the 'name' of this token, so that we can easily switch on
|
|
// it in upper-level layers.
|
|
//
|
|
NTXML_RAW_TOKEN TokenName;
|
|
|
|
//
|
|
// Pointer and length of the extent
|
|
//
|
|
XML_EXTENT Run;
|
|
}
|
|
XML_RAW_TOKEN, *PXML_RAW_TOKEN;
|
|
|
|
//
|
|
// This is the base tokenization state blob necessary to keep tokenizing
|
|
// between calls. See member descriptions for more details.
|
|
//
|
|
typedef struct _XML_RAWTOKENIZATION_STATE
|
|
{
|
|
|
|
//
|
|
// PVOID and length of the original XML document
|
|
//
|
|
XML_EXTENT OriginalDocument;
|
|
|
|
//
|
|
// Pointer to the 'end' of the document.
|
|
//
|
|
PVOID pvDocumentEnd;
|
|
|
|
//
|
|
// Pointer into the XML data that represents where we are at the moment
|
|
// in tokenization. Will not be moved by the raw tokenizer - you must
|
|
// use the NtRawXmlAdvanceCursor (or related) to move the cursor along
|
|
// the data stream. Hence, calling the tokenizer twice in a row will
|
|
// get you the same token.
|
|
//
|
|
PVOID pvCursor;
|
|
|
|
//
|
|
// The function that this tokenization run is using for getting the
|
|
// next WCHAR out of the PVOID pointed to by pvCursor. If this member
|
|
// is NULL, you get a bit of default functionality that knows about
|
|
// UNICODE, little-endianness, and UTF8.
|
|
//
|
|
NTXMLRAWNEXTCHARACTER pfnNextChar;
|
|
|
|
//
|
|
// The encoding family can be detected from the first bytes in the
|
|
// incoming stream. They are classified according to the XML spec,
|
|
// which defaults to UTF-8.
|
|
//
|
|
XML_ENCODING_FAMILY EncodingFamily;
|
|
|
|
//
|
|
// When the upper-level tokenizer detects the "encoding" statement
|
|
// in the <?xml ...?> declaration, it should set this member to the
|
|
// code page that was found. Noticably, this will start out as
|
|
// zero on initialization. A smart "next character" function will
|
|
// do some default operation to continue working even if this is
|
|
// unset.
|
|
//
|
|
ULONG DetectedCodePage;
|
|
|
|
XML_RAW_TOKEN LastTokenCache;
|
|
PVOID pvLastCursor;
|
|
|
|
//
|
|
// How many bytes were in the last thing?
|
|
//
|
|
SIZE_T cbBytesInLastRawToken;
|
|
|
|
//
|
|
// Result of the next-character call
|
|
//
|
|
NTSTATUS NextCharacterResult;
|
|
|
|
//
|
|
// Default character size, set by the initializer that determines the
|
|
// encoding.
|
|
//
|
|
SIZE_T DefaultCharacterSize;
|
|
}
|
|
XML_RAWTOKENIZATION_STATE, *PXML_RAWTOKENIZATION_STATE;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
// Simple interface out to the Real World. This allocator should be
|
|
// replaced (eventually) with calls directly into the proper
|
|
// allocator (HeapAlloc/ExAllocatePoolWithTag) in production code.
|
|
//
|
|
typedef NTSTATUS (*NTXML_ALLOCATOR)(
|
|
SIZE_T ulBytes,
|
|
PVOID *ppvAllocated,
|
|
PVOID pvAllocationContext);
|
|
|
|
//
|
|
// Frees memory allocated with the corresponding NTXML_ALLOCATOR
|
|
// call.
|
|
//
|
|
typedef NTSTATUS (*NTXML_DEALLOCATOR)(PVOID pvAllocated, PVOID pvContext);
|
|
|
|
|
|
/*++
|
|
|
|
Normal operation would go like this:
|
|
|
|
<?xml version="1.0"? encoding="UTF-8" standalone="yes"?>
|
|
<!-- commentary -->
|
|
<?bonk foo?>
|
|
<ham>
|
|
<frooby:cheese hot="yes"/>
|
|
</ham>
|
|
|
|
XTLS_STREAM_START
|
|
XTLS_XMLDECL {XTSS_XMLDECL_OPEN "<?xml" }
|
|
XTLS_XMLDECL {XTSS_XMLDECL_VERSION "version" }
|
|
XTLS_XMLDECL {XTSS_XMLDECL_EQUALS "=" }
|
|
XTLS_XMLDECL {XTSS_XMLDECL_VALUE "1.0" }
|
|
XTLS_XMLDECL {XTSS_XMLDECL_ENCODING "encoding" }
|
|
XTLS_XMLDECL {XTSS_XMLDECL_EQUALS "=" }
|
|
XTLS_XMLDECL {XTSS_XMLDECL_VALUE "UTF-8" }
|
|
XTLS_XMLDECL {XTSS_XMLDECL_STANDALONE "standalone" }
|
|
XTLS_XMLDECL {XTSS_XMLDECL_EQUALS "=" }
|
|
XTLS_XMLDECL {XTSS_XMLDECL_VALUE "yes" }
|
|
XTLS_XMLDECL {XTSS_XMLDECL_CLOSE "?>" }
|
|
XTLS_COMMENT {XTSS_COMMENT_OPEN "<!--" }
|
|
XTLS_COMMENT {XTSS_COMMENT_CONTENT " commentary " }
|
|
XTLS_COMMENT {XTSS_COMMENT_CLOSE "-->" }
|
|
XTLS_PROCESSING_INSTRUCTION {XTSS_PI_OPEN "<?" }
|
|
XTLS_PROCESSING_INSTRUCTION {XTSS_PI_NAME "bonk" }
|
|
XTLS_PROCESSING_INSTRUCTION {XTSS_PI_CONTENT "foo" }
|
|
XTLS_PROCESSING_INSTRUCTION {XTSS_PI_CLOSE "?>" }
|
|
XTLS_FLOATINGDATA {XTSS_FD_WHITESPACE "\n" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_OPEN "<" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_NAME "ham" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_CLOSE ">" }
|
|
XTLS_FLOATINGDATA {XTSS_FLOATINGDATA "\n " }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_OPEN "<" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_NAMESPACE "frooby" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_NAME "cheese" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_VALUENAME "hot" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_VALUE "yes" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_EMPTYCLOSE "/>" }
|
|
XTLS_FLOATINGDATA {XTSS_FLOATINGDATA "\n" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_CLOSETAG "</" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_NAME "ham" }
|
|
XTLS_ELEMENT {XTSS_ELEMENT_CLOSE ">" }
|
|
XTLS_STREAM_END
|
|
|
|
--*/
|
|
|
|
|
|
typedef enum {
|
|
|
|
XTSS_ERRONEOUS,
|
|
|
|
|
|
//
|
|
// In the middle of "nowhere" - the hyperspace between elements
|
|
//
|
|
XTSS_STREAM_HYPERSPACE,
|
|
|
|
//
|
|
// At the start of the input stream
|
|
//
|
|
XTSS_STREAM_START,
|
|
|
|
//
|
|
// At the end of the input stream
|
|
//
|
|
XTSS_STREAM_END,
|
|
|
|
|
|
////////////////////////////////////////////
|
|
//
|
|
// ELEMENT STATES
|
|
//
|
|
////////////////////////////////////////////
|
|
|
|
//
|
|
// Meaning: An element tag was found.
|
|
//
|
|
// Rawtoken: NTXML_RAWTOKEN_LT
|
|
//
|
|
XTSS_ELEMENT_OPEN,
|
|
|
|
//
|
|
// Meaning: A run of text was found that could represent a name.
|
|
// This is basically all the text found between the opening
|
|
// element tag and some illegal values.
|
|
//
|
|
// Rawtoken: A run of any of the following:
|
|
// NTXML_RAWTOKEN_TEXT
|
|
// NTXML_RAWTOKEN_DOT
|
|
// NTXML_RAWTOKEN_COLON
|
|
// NTXML_RAWTOKEN_UNDERSCORE
|
|
// NTXML_RAWTOKEN_DASH
|
|
// The name ends when something else appears.
|
|
//
|
|
XTSS_ELEMENT_NAME,
|
|
|
|
|
|
//
|
|
// Found the xmlns part of <foo xmlns:bar=
|
|
//
|
|
XTSS_ELEMENT_XMLNS,
|
|
|
|
//
|
|
// Found <foo xmlns=
|
|
//
|
|
XTSS_ELEMENT_XMLNS_DEFAULT,
|
|
|
|
//
|
|
// Found the 'a' in <foo xml:a=
|
|
//
|
|
XTSS_ELEMENT_XMLNS_ALIAS,
|
|
|
|
//
|
|
// Found the colon between xmlns and the alias
|
|
//
|
|
XTSS_ELEMENT_XMLNS_COLON,
|
|
|
|
//
|
|
// Found the equals sign between xmlns and the value
|
|
//
|
|
XTSS_ELEMENT_XMLNS_EQUALS,
|
|
|
|
XTSS_ELEMENT_XMLNS_VALUE_OPEN,
|
|
XTSS_ELEMENT_XMLNS_VALUE_CLOSE,
|
|
XTSS_ELEMENT_XMLNS_VALUE,
|
|
|
|
//
|
|
// This is the prefix for an element name, if present
|
|
//
|
|
XTSS_ELEMENT_NAME_NS_PREFIX,
|
|
|
|
//
|
|
// This is the colon after an element name ns prefix
|
|
//
|
|
XTSS_ELEMENT_NAME_NS_COLON,
|
|
|
|
//
|
|
// This is the prefix on an attribute name for a namespace
|
|
//
|
|
XTSS_ELEMENT_ATTRIBUTE_NAME_NS_PREFIX,
|
|
|
|
//
|
|
// This is the colon after an element attribute name namespace prefix
|
|
//
|
|
XTSS_ELEMENT_ATTRIBUTE_NAME_NS_COLON,
|
|
|
|
//
|
|
// Meaning: A close of a tag (>) was found
|
|
//
|
|
// Rawtoken: NTXML_RAWTOKEN_GT
|
|
//
|
|
XTSS_ELEMENT_CLOSE,
|
|
|
|
//
|
|
// Meaning: An empty-tag (/>) was found
|
|
//
|
|
// Rawtoken: NTXML_RAWTOKEN_FORWARDSLASH NTXML_RAWTOKEN_GT
|
|
//
|
|
XTSS_ELEMENT_CLOSE_EMPTY,
|
|
|
|
//
|
|
// Meaning: An attribute name was found
|
|
//
|
|
// Rawtoken: See rules for XTSS_ELEMENT_NAME
|
|
//
|
|
XTSS_ELEMENT_ATTRIBUTE_NAME,
|
|
|
|
//
|
|
// Meaning: An equals sign was found in an element
|
|
//
|
|
// Rawtoken: NTXML_RAWTOKEN_EQUALS
|
|
//
|
|
XTSS_ELEMENT_ATTRIBUTE_EQUALS,
|
|
|
|
//
|
|
// Meaning: The quote (start or end) of an element-attribute value
|
|
// was found.
|
|
//
|
|
// Rawtokne; NTXML_RAWTOKEN_QUOTE
|
|
//
|
|
XTSS_ELEMENT_ATTRIBUTE_QUOTE,
|
|
|
|
//
|
|
// Meaning: Element attribute value data was found after a
|
|
// quote of some variety.
|
|
//
|
|
// Rawtoken: A run of any thing that's not the following:
|
|
// NTXML_RAWTOKEN_LT
|
|
// NTXML_RAWTOKEN_QUOTE (unless this quote is not the same
|
|
// as the quote in
|
|
// XTSS_ELEMENT_ATTRIBUTE_QUOTE)
|
|
//
|
|
// N.B.: See special rules on handling entities in text.
|
|
//
|
|
XTSS_ELEMENT_ATTRIBUTE_VALUE,
|
|
XTSS_ELEMENT_ATTRIBUTE_OPEN,
|
|
XTSS_ELEMENT_ATTRIBUTE_CLOSE,
|
|
|
|
//
|
|
// Meaning: Whitespace was found in the element tag at this point
|
|
//
|
|
// Rawtoken: NTXML_RAWTOKEN_WHITESPACE
|
|
//
|
|
XTSS_ELEMENT_WHITESPACE,
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////
|
|
//
|
|
// END ELEMENT SPECIFIC STATES
|
|
//
|
|
////////////////////////////////////////////
|
|
|
|
//
|
|
// Meaning: The start of an "end element" was found
|
|
//
|
|
// Rawtoken: NTXML_RAWTOKEN_LT NTXML_RAWTOKEN_FORWARDSLASH
|
|
//
|
|
XTSS_ENDELEMENT_OPEN,
|
|
|
|
//
|
|
// Meaning: The name of an end element was found
|
|
//
|
|
// Rawtoken: See rules for XTSS_ELEMENT_NAME
|
|
//
|
|
XTSS_ENDELEMENT_NAME,
|
|
|
|
//
|
|
// Meaning: We're in the whitespace portion of the end element
|
|
//
|
|
// Rawtoken: NTXML_RAWTOKEN_WHITESPACE
|
|
//
|
|
XTSS_ENDELEMENT_WHITESPACE,
|
|
|
|
//
|
|
// Meaning: The close of an endelement tag was found
|
|
//
|
|
// Rawtoken: NTXML_RAWTOKEN_GT
|
|
//
|
|
XTSS_ENDELEMENT_CLOSE,
|
|
|
|
//
|
|
// Namespace prefix on the endelement name
|
|
//
|
|
XTSS_ENDELEMENT_NS_PREFIX,
|
|
|
|
//
|
|
// Colon after the namespace prefix in the endelement tag
|
|
//
|
|
XTSS_ENDELEMENT_NS_COLON,
|
|
|
|
|
|
|
|
////////////////////////////////////////////
|
|
//
|
|
// XML PROCESSING INSTRUCTION STATES
|
|
//
|
|
////////////////////////////////////////////
|
|
|
|
//
|
|
// Meaning: The start of an xml processing instruction was found
|
|
//
|
|
// Rawtokens: NTXML_RAWTOKEN_LT NTXML_RAWTOKEN_QUESTIONMARK
|
|
//
|
|
XTSS_PI_OPEN,
|
|
|
|
//
|
|
// Meaning: The end of an XML processing instruction was found
|
|
//
|
|
// Rawtokens: NTXML_RAWTOKEN_QUESTIONMARK NTXML_RAWTOKEN_GT
|
|
//
|
|
XTSS_PI_CLOSE,
|
|
|
|
//
|
|
// Meaning: The processing instruction name was found
|
|
//
|
|
// Rawtokens: A nonempty stream of tokens identifying a name. See the
|
|
// rules for XTSS_ELEMENT_NAME for details.
|
|
//
|
|
XTSS_PI_TARGET,
|
|
|
|
//
|
|
// Meaning: Some processing instruction metadata was found.
|
|
//
|
|
// Rawtokens: Anything except the sequence
|
|
// NTXML_RAWTOKEN_QUESTIONMARK NTXML_RAWTOKEN_GT
|
|
//
|
|
XTSS_PI_VALUE,
|
|
|
|
//
|
|
// Meaning: Whitespace between the target and the value was found
|
|
//
|
|
// Rawtokens: NTXML_RAWTOKEN_WHITESPACE
|
|
//
|
|
XTSS_PI_WHITESPACE,
|
|
|
|
|
|
|
|
////////////////////////////////////////////
|
|
//
|
|
// XML PROCESSING INSTRUCTION STATES
|
|
//
|
|
////////////////////////////////////////////
|
|
|
|
//
|
|
// Meaning: Start of a comment block
|
|
//
|
|
// Rawtokens: NTXML_RAWTOKEN_LT NTXML_RAWTOKEN_BANG NTXML_RAWTOKEN_DASH NTXML_RAWTOKEN_DASH
|
|
//
|
|
XTSS_COMMENT_OPEN,
|
|
|
|
//
|
|
// Meaning: Commentary data, should be ignored by a good processor
|
|
//
|
|
// Rawtokens: Anything except the sequence:
|
|
// NTXML_RAWTOKEN_DASH NTXML_RAWTOKEN_DASH
|
|
//
|
|
XTSS_COMMENT_COMMENTARY,
|
|
|
|
//
|
|
// Meaning: Comment close tag
|
|
//
|
|
// Rawtokens: NTXML_RAWTOKEN_DASH NTXML_RAWTOKEN_DASH NTXML_RAWTOKEN_GT
|
|
//
|
|
XTSS_COMMENT_CLOSE,
|
|
|
|
|
|
////////////////////////////////////////////
|
|
//
|
|
// XML PROCESSING INSTRUCTION STATES
|
|
//
|
|
////////////////////////////////////////////
|
|
|
|
//
|
|
// Meaning: Opening of a CDATA block
|
|
//
|
|
// Rawtokens: NTXML_RAWTOKEN_LT
|
|
// NTXML_RAWTOKEN_BRACE
|
|
// NTXML_RAWTOKEN_BANG
|
|
// NTXML_RAWTOKEN_TEXT (CDATA)
|
|
// NTXML_RAWTOKEN_BRACE
|
|
//
|
|
XTSS_CDATA_OPEN,
|
|
|
|
//
|
|
// Meaning: Unparseable CDATA stuff
|
|
//
|
|
// Rawtokens: Anything except the sequence
|
|
// NTXML_RAWTOKEN_BRACE
|
|
// NTXML_RAWTOKEN_BRACE
|
|
// NTXML_RAWTOKEN_GT
|
|
//
|
|
XTSS_CDATA_CDATA,
|
|
|
|
//
|
|
// Meaning: End of a CDATA block
|
|
//
|
|
XTSS_CDATA_CLOSE,
|
|
|
|
|
|
////////////////////////////////////////////
|
|
//
|
|
// XMLDECL (<?xml) states
|
|
//
|
|
////////////////////////////////////////////
|
|
|
|
XTSS_XMLDECL_OPEN,
|
|
XTSS_XMLDECL_CLOSE,
|
|
XTSS_XMLDECL_WHITESPACE,
|
|
XTSS_XMLDECL_EQUALS,
|
|
XTSS_XMLDECL_ENCODING,
|
|
XTSS_XMLDECL_STANDALONE,
|
|
XTSS_XMLDECL_VERSION,
|
|
XTSS_XMLDECL_VALUE_OPEN,
|
|
XTSS_XMLDECL_VALUE,
|
|
XTSS_XMLDECL_VALUE_CLOSE,
|
|
|
|
|
|
|
|
} XML_TOKENIZATION_SPECIFIC_STATE;
|
|
|
|
|
|
//
|
|
// Another, similar XML token structure for the 'cooked' XML bits.
|
|
//
|
|
typedef struct _XML_TOKEN {
|
|
|
|
//
|
|
// Pointer and length of the data in the token
|
|
//
|
|
XML_EXTENT Run;
|
|
|
|
//
|
|
// What state are we in at the moment
|
|
//
|
|
XML_TOKENIZATION_SPECIFIC_STATE State;
|
|
|
|
//
|
|
// Was there an error gathering up this state?
|
|
//
|
|
BOOLEAN fError;
|
|
|
|
}
|
|
XML_TOKEN, *PXML_TOKEN;
|
|
|
|
typedef const struct _XML_TOKEN *PCXML_TOKEN;
|
|
|
|
typedef enum {
|
|
XML_STRING_COMPARE_EQUALS = 0,
|
|
XML_STRING_COMPARE_GT = 1,
|
|
XML_STRING_COMPARE_LT = -1
|
|
}
|
|
XML_STRING_COMPARE;
|
|
|
|
|
|
|
|
//
|
|
// This function knows how to compare a pvoid and a length against
|
|
// a 7-bit ascii string
|
|
//
|
|
typedef NTSTATUS (*NTXMLSPECIALSTRINGCOMPARE)(
|
|
struct _XML_TOKENIZATION_STATE *pState,
|
|
const struct _XML_EXTENT *pRawToken,
|
|
const struct _XML_SPECIAL_STRING *pSpecialString,
|
|
XML_STRING_COMPARE *pfResult
|
|
);
|
|
|
|
|
|
|
|
//
|
|
// Compare two extents
|
|
//
|
|
typedef NTSTATUS (*NTXMLCOMPARESTRINGS)(
|
|
struct _XML_TOKENIZATION_STATE *TokenizationState,
|
|
PXML_EXTENT pLeft,
|
|
PXML_EXTENT pRight,
|
|
XML_STRING_COMPARE *pfEquivalent);
|
|
|
|
|
|
typedef NTSTATUS (*RTLXMLCALLBACK)(
|
|
PVOID pvCallbackContext,
|
|
struct _XML_TOKENIZATION_STATE *State,
|
|
PCXML_TOKEN Token,
|
|
PBOOLEAN StopTokenization
|
|
);
|
|
|
|
|
|
//
|
|
// Now let's address the 'cooked' tokenization
|
|
// methodology.
|
|
//
|
|
typedef struct _XML_TOKENIZATION_STATE {
|
|
|
|
//
|
|
// Core tokenization state data
|
|
//
|
|
XML_RAWTOKENIZATION_STATE RawTokenState;
|
|
|
|
//
|
|
// State values
|
|
//
|
|
XML_TOKENIZATION_SPECIFIC_STATE PreviousState;
|
|
|
|
//
|
|
// Scratch pad for holding tokens
|
|
//
|
|
XML_RAW_TOKEN RawTokenScratch[20];
|
|
|
|
//
|
|
// Ways to compare two strings
|
|
//
|
|
NTXMLCOMPARESTRINGS pfnCompareStrings;
|
|
|
|
//
|
|
// Compare an extent against a 'magic' string
|
|
//
|
|
NTXMLSPECIALSTRINGCOMPARE pfnCompareSpecialString;
|
|
|
|
//
|
|
// Scratch space for the opening quote rawtoken name, if we're in
|
|
// a quoted string (ie: attribute value, etc.)
|
|
//
|
|
NTXML_RAW_TOKEN QuoteTemp;
|
|
|
|
//
|
|
// Callback
|
|
//
|
|
PVOID prgXmlTokenCallbackContext;
|
|
RTLXMLCALLBACK prgXmlTokenCallback;
|
|
|
|
}
|
|
XML_TOKENIZATION_STATE, *PXML_TOKENIZATION_STATE;
|
|
|
|
|
|
|
|
NTSTATUS
|
|
RtlXmlAdvanceTokenization(
|
|
PXML_TOKENIZATION_STATE pState,
|
|
PXML_TOKEN pToken
|
|
);
|
|
|
|
|
|
|
|
NTSTATUS
|
|
RtlXmlDetermineStreamEncoding(
|
|
PXML_TOKENIZATION_STATE pState,
|
|
PSIZE_T pulBytesOfEncoding,
|
|
PXML_EXTENT EncodingName
|
|
);
|
|
|
|
|
|
NTSTATUS
|
|
RtlXmlInitializeTokenization(
|
|
PXML_TOKENIZATION_STATE pState,
|
|
PVOID pvData,
|
|
SIZE_T cbData,
|
|
NTXMLRAWNEXTCHARACTER pfnNextCharacter,
|
|
NTXMLSPECIALSTRINGCOMPARE pfnSpecialStringComparison,
|
|
NTXMLCOMPARESTRINGS pfnNormalStringComparison
|
|
);
|
|
|
|
NTSTATUS
|
|
RtlXmlCloneRawTokenizationState(
|
|
const PXML_RAWTOKENIZATION_STATE pStartState,
|
|
PXML_RAWTOKENIZATION_STATE pTargetState
|
|
);
|
|
|
|
|
|
NTSTATUS
|
|
RtlXmlCloneTokenizationState(
|
|
const PXML_TOKENIZATION_STATE pStartState,
|
|
PXML_TOKENIZATION_STATE pTargetState
|
|
);
|
|
|
|
|
|
NTSTATUS
|
|
RtlXmlNextToken(
|
|
PXML_TOKENIZATION_STATE pState,
|
|
PXML_TOKEN pToken,
|
|
BOOLEAN fAdvanceState
|
|
);
|
|
|
|
|
|
NTSTATUS
|
|
RtlXmlCopyStringOut(
|
|
PXML_TOKENIZATION_STATE pState,
|
|
PXML_EXTENT pExtent,
|
|
PWSTR pwszTarget,
|
|
SIZE_T *pCchResult
|
|
);
|
|
|
|
NTSTATUS
|
|
RtlXmlDefaultCompareStrings(
|
|
PXML_TOKENIZATION_STATE pState,
|
|
PCXML_EXTENT pLeft,
|
|
PCXML_EXTENT pRight,
|
|
XML_STRING_COMPARE *pfEqual
|
|
);
|
|
|
|
|
|
NTSTATUS
|
|
RtlXmlIsExtentWhitespace(
|
|
PXML_TOKENIZATION_STATE pState,
|
|
PCXML_EXTENT Run,
|
|
PBOOLEAN pfIsWhitespace
|
|
);
|
|
|
|
NTXML_RAW_TOKEN FORCEINLINE FASTCALL
|
|
_RtlpDecodeCharacter(ULONG ulCharacter);
|
|
|
|
|
|
#define STATUS_NTXML_INVALID_FORMAT (0xc0100000)
|
|
|
|
|
|
#ifndef NUMBER_OF
|
|
#define NUMBER_OF(q) (sizeof(q)/sizeof((q)[0]))
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
};
|
|
#endif
|
|
|