mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
201 lines
6.0 KiB
201 lines
6.0 KiB
/*
|
|
This file was derived from the libwww code, version 2.15, from CERN.
|
|
A number of modifications have been made by Spyglass.
|
|
|
|
[email protected]
|
|
*/
|
|
|
|
/* SGML parse and stream definition for libwww
|
|
SGML AND STRUCTURED STREAMS
|
|
|
|
The SGML parser is a state machine. It is called for every character
|
|
|
|
of the input stream. The DTD data structure contains pointers
|
|
|
|
to functions which are called to implement the actual effect of the
|
|
|
|
text read. When these functions are called, the attribute structures pointed to by the
|
|
DTD are valid, and the function is passed a pointer to the curent tag structure, and an
|
|
"element stack" which represents the state of nesting within SGML elements.
|
|
|
|
The following aspects are from Dan Connolly's suggestions: Binary search, Strcutured
|
|
object scheme basically, SGML content enum type.
|
|
|
|
(c) Copyright CERN 1991 - See Copyright.html
|
|
|
|
*/
|
|
#ifndef SGML_H
|
|
#define SGML_H
|
|
|
|
/*
|
|
|
|
SGML content types
|
|
|
|
*/
|
|
typedef enum _SGMLContent
|
|
{
|
|
SGML_EMPTY, /* no content */
|
|
SGML_LITERAL, /* character data. Recognized exact close tag only.
|
|
Old www server compatibility only! Not SGML */
|
|
SGML_CDATA, /* character data. recognize </ only */
|
|
SGML_RCDATA, /* replaceable character data. recognize </ and &ref; */
|
|
SGML_MIXED, /* elements and parsed character data. recognize all markup */
|
|
SGML_ELEMENT, /* any data found will be returned as an error */
|
|
SGML_NEST /* handles unusual nesting */
|
|
}
|
|
SGMLContent;
|
|
|
|
typedef enum _HTTagClass
|
|
{
|
|
HTTAG_OTHER,
|
|
HTTAG_HEADER
|
|
}
|
|
HTTagClass;
|
|
|
|
typedef struct
|
|
{
|
|
char *name; /* The (constant) name of the attribute */
|
|
/* Could put type info in here */
|
|
}
|
|
attr;
|
|
|
|
|
|
/* A tag structure describes an SGML element.
|
|
** -----------------------------------------
|
|
**
|
|
**
|
|
** name is the string which comes after the tag opener "<".
|
|
**
|
|
** attributes points to a zero-terminated array
|
|
** of attribute names.
|
|
**
|
|
** litteral determines how the SGML engine parses the charaters
|
|
** within the element. If set, tag openers are ignored
|
|
** except for that which opens a matching closing tag.
|
|
**
|
|
*/
|
|
typedef struct _tag HTTag;
|
|
struct _tag
|
|
{
|
|
char *name; /* The name of the tag */
|
|
attr *attributes; /* The list of acceptable attributes */
|
|
int number_of_attributes; /* Number of possible attributes */
|
|
SGMLContent contents; /* End only on end tag @@ */
|
|
HTTagClass tagclass; /* currently now - header vs non header */
|
|
};
|
|
|
|
|
|
|
|
|
|
/* DTD Information
|
|
** ---------------
|
|
**
|
|
** Not the whole DTD, but all this parser usues of it.
|
|
*/
|
|
typedef struct
|
|
{
|
|
HTTag *tags; /* Must be in strcmp order by name */
|
|
int number_of_tags;
|
|
CONST char **entity_names; /* Must be in strcmp order by name */
|
|
CONST char **entity_values; /* Must be in same order as entity_names */
|
|
int number_of_entities;
|
|
}
|
|
SGML_dtd;
|
|
|
|
#define MAX_ENTITY_LEN 6 /* Number of characters in longest entity name */
|
|
#define MAX_ATTRIBUTES 20 /* Max number of attributes per element */
|
|
|
|
/* SGML context passed to parsers
|
|
*/
|
|
typedef struct _HTSGMLContext *HTSGMLContext; /* Hidden */
|
|
|
|
|
|
/*__________________________________________________________________________
|
|
*/
|
|
|
|
/*
|
|
|
|
Structured Object definition
|
|
|
|
A structured object is something which can reasonably be represented in SGML. I'll
|
|
rephrase that. A structured object is am ordered tree-structured arrangement of data
|
|
which is representable as text.The SGML parer outputs to a Structured object. A
|
|
Structured object can output its contents to another Structured Object. It's a kind of
|
|
typed stream. The architecure is largely Dan Conolly's. Elements and entities are
|
|
passed to the sob by number, implying a knowledge of the DTD. Knowledge of the SGML
|
|
syntax is not here, though.
|
|
|
|
Superclass: HTStream
|
|
|
|
The creation methods will vary on the type of Structured Object.Maybe the callerData is
|
|
enough info to pass along.
|
|
|
|
*/
|
|
typedef struct _HTStructured HTStructured;
|
|
|
|
typedef struct _HTStructuredClass
|
|
{
|
|
|
|
char *name; /* Just for diagnostics */
|
|
void (*free) (HTStructured * me);
|
|
void (*abort) (HTStructured * me, HTError e);
|
|
void (*put_character) (HTStructured * me, char ch);
|
|
void (*put_string) (HTStructured * me, CONST char *str);
|
|
void (*write) (HTStructured * me, CONST char *str, int len);
|
|
void (*start_element) (HTStructured * me, int element_number, CONST BOOL * attribute_present, CONST char **attribute_value);
|
|
void (*end_element) (HTStructured * me, int element_number);
|
|
void (*put_entity) (HTStructured * me, int entity_number);
|
|
|
|
/* This function is called to pass along the source code as it's read.
|
|
add_source may be NULL, in which case the function just isn't called. */
|
|
void (*add_source)(HTStructured *me, CONST char *str, int len);
|
|
|
|
/* Indicates that an arbitrary-sized block of data has been processed.
|
|
The intent is to provide a way for the sob to periodically update
|
|
what it's doing. block_done may be NULL, in which case it won't be called. */
|
|
void (*block_done)(HTStructured *me);
|
|
|
|
/* This function is called to obtain the CS_Stream for source code as it's read.
|
|
get_source may be NULL, in which case the function just isn't called. */
|
|
LPVOID (*get_source)(HTStructured *me);
|
|
}
|
|
HTStructuredClass;
|
|
|
|
/*
|
|
Find a Tag by Name
|
|
|
|
Returns a pointer to the tag within the DTD.
|
|
*/
|
|
extern HTTag *SGMLFindTag(CONST SGML_dtd * dtd, CONST char *string);
|
|
|
|
|
|
/*
|
|
** Create an SGML parser
|
|
** On entry,
|
|
** dtd must point to a DTD structure as defined above
|
|
** callbacks must point to user routines.
|
|
** callData is returned in callbacks transparently.
|
|
** On exit,
|
|
** The default tag starter has been processed.
|
|
*/
|
|
|
|
|
|
extern HTStream *SGML_new(struct Mwin *tw, CONST SGML_dtd * dtd, HTStructured * target, HTRequest *request);
|
|
|
|
extern HTStreamClass SGMLParser;
|
|
|
|
|
|
#endif /* SGML_H */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
*/
|