mirror of https://github.com/lianthony/NT4.0
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1521 lines
36 KiB
1521 lines
36 KiB
/*
|
|
This file was derived from the libwww code, version 2.15, from CERN.
|
|
A number of modifications have been made by Spyglass.
|
|
|
|
[email protected]
|
|
*/
|
|
/* General SGML Parser code SGML.c
|
|
** ========================
|
|
**
|
|
** This module implements an HTStream object. To parse an
|
|
** SGML file, create this object which is a parser. The object
|
|
** is (currently) created by being passed a DTD structure,
|
|
** and a target HTStructured oject at which to throw the parsed stuff.
|
|
**
|
|
** 6 Feb 93 Binary seraches used. Intreface modified.
|
|
*/
|
|
#include "all.h"
|
|
|
|
#ifdef INVALID
|
|
#undef INVALID
|
|
#endif
|
|
#define INVALID (-1)
|
|
|
|
#define MAX_ATTR_LENGTH (2*MAX_URL_STRING)
|
|
|
|
/* Element Stack
|
|
** -------------
|
|
** This allows us to return down the stack reselcting styles.
|
|
** As we return, attribute values will be garbage in general.
|
|
*/
|
|
typedef struct _HTElement HTElement;
|
|
struct _HTElement
|
|
{
|
|
HTElement *next; /* Previously nested element or 0 */
|
|
HTTag *tag; /* The tag at this level */
|
|
};
|
|
|
|
|
|
/* Internal Context Data Structure
|
|
** -------------------------------
|
|
*/
|
|
struct _HTStream
|
|
{
|
|
|
|
CONST HTStreamClass *isa; /* inherited from HTStream */
|
|
|
|
CONST SGML_dtd *dtd;
|
|
HTStructuredClass *actions; /* target class */
|
|
HTStructured *target; /* target object */
|
|
|
|
int expected_length;
|
|
int bytes_processed;
|
|
HTTag *current_tag;
|
|
int current_attribute_number;
|
|
HTChunk *string;
|
|
HTElement *element_stack;
|
|
int extra; /* extra info for specific states */
|
|
enum sgml_state
|
|
{
|
|
S_text, S_literal, S_tag, S_tag_gap,
|
|
S_attr, S_attr_gap, S_equals, S_value,
|
|
S_ero, S_cro,
|
|
#ifdef ISO_2022_JP
|
|
S_esc, S_dollar, S_paren, S_nonascii_text,
|
|
#endif
|
|
S_squoted, S_dquoted, S_invalueent,
|
|
S_end, S_entity, S_junk_tag,
|
|
S_bang, S_bangdash, S_comment, S_commentdash, S_commentdashdash,
|
|
S_commentdashdashbang, S_EOF
|
|
}
|
|
state, old_state;
|
|
enum CR_LF_State
|
|
{
|
|
CRLF_None,
|
|
CR_Seen,
|
|
LF_Seen
|
|
}
|
|
crlf_state;
|
|
#ifdef CALLERDATA
|
|
void *callerData;
|
|
#endif
|
|
BOOL present[MAX_ATTRIBUTES]; /* Flags: attribute is present? */
|
|
char *value[MAX_ATTRIBUTES]; /* malloc'd strings or NULL if none */
|
|
|
|
struct Mwin *tw;
|
|
HTRequest *request;
|
|
FILE * fpDc;
|
|
char * pszDcFile;
|
|
HTFormat format_inDc;
|
|
BOOL fDCache;
|
|
BOOL fInRecovery;
|
|
int cbRecovery; /* in recovery, this is charpos in cs_stream */
|
|
struct CharStream *cs_source;
|
|
};
|
|
|
|
|
|
#define PUTC(ch) ((*context->actions->put_character)(context->target, ch))
|
|
|
|
PRIVATE BOOL SGML_character(HTStream * context, char c);
|
|
|
|
|
|
/* Lookup Attribute Name
|
|
*
|
|
* ----------------
|
|
*/
|
|
/* PUBLIC CONST char * SGML_default = ""; ?? */
|
|
PRIVATE int find_attribute_name(HTStream * context, const char *s)
|
|
{
|
|
HTTag *tag = context->current_tag;
|
|
attr *attributes = tag->attributes;
|
|
int high, low, i, diff; /* Binary search for attribute name */
|
|
|
|
for (low = 0, high = tag->number_of_attributes;
|
|
high > low;
|
|
diff < 0 ? (low = i + 1) : (high = i))
|
|
{
|
|
i = (low + (high - low) / 2);
|
|
diff = strcasecomp(attributes[i].name, s);
|
|
XX_DMsg(DBG_SGML, ("Checking %s\n", attributes[i].name));
|
|
if (diff == 0) return i;
|
|
|
|
} /* for */
|
|
|
|
return INVALID; /* Invalid */
|
|
}
|
|
|
|
/* Handle Attribute
|
|
*
|
|
* ----------------
|
|
*/
|
|
/* PUBLIC CONST char * SGML_default = ""; ?? */
|
|
|
|
PRIVATE void handle_attribute_name(HTStream * context, const char *s)
|
|
{
|
|
|
|
int i; /* Binary search for attribute name */
|
|
|
|
XX_DMsg(DBG_SGML, ("Handling attribute %s for tag %s\n", s, context->current_tag->name));
|
|
|
|
i = find_attribute_name(context, s);
|
|
if (i != INVALID)
|
|
{
|
|
context->current_attribute_number = i;
|
|
context->present[i] = YES;
|
|
if (context->value[i])
|
|
{
|
|
GTR_FREE(context->value[i]);
|
|
context->value[i] = NULL;
|
|
}
|
|
}
|
|
#ifdef XX_DEBUG
|
|
else
|
|
{ /* for */
|
|
XX_DMsg(DBG_SGML, ("Unknown attribute %s for tag %s\n", s, context->current_tag->name));
|
|
}
|
|
#endif
|
|
context->current_attribute_number = i;
|
|
}
|
|
|
|
|
|
/* Handle attribute value
|
|
** ----------------------
|
|
*/
|
|
PRIVATE void handle_attribute_value(HTStream * context, const char *s)
|
|
{
|
|
if (context->current_attribute_number != INVALID)
|
|
{
|
|
if (context->value[context->current_attribute_number]) /* TODO is this needed? */
|
|
{
|
|
GTR_FREE(context->value[context->current_attribute_number]);
|
|
}
|
|
context->value[context->current_attribute_number] = GTR_strdup(s);
|
|
}
|
|
else
|
|
{
|
|
XX_DMsg(DBG_SGML, ("SGML: Attribute value %s ignored\n", s));
|
|
}
|
|
context->current_attribute_number = INVALID; /* can't have two assignments! */
|
|
}
|
|
|
|
#ifdef FEATURE_INTL
|
|
PRIVATE void start_element(HTStream * context);
|
|
PRIVATE void end_element(HTStream * context, HTTag * old_tag);
|
|
#endif
|
|
/* Handle entity
|
|
** -------------
|
|
**
|
|
** On entry,
|
|
** s contains the entity name zero terminated
|
|
** Returns TRUE if context contains exactly an entity.
|
|
** If it contains no entity or trailing source, returns FALSE
|
|
*/
|
|
PRIVATE BOOL handle_entity(HTStream * context)
|
|
{
|
|
|
|
CONST char **entities = context->dtd->entity_names;
|
|
char *s = context->string->data;
|
|
int cbSLen = strlen(s);
|
|
int cbLen = cbSLen > MAX_ENTITY_LEN ? MAX_ENTITY_LEN : cbSLen;
|
|
int cbPrefix;
|
|
int high, low, i, diff;
|
|
char szEntity[MAX_ENTITY_LEN+1];
|
|
enum entitySpellings
|
|
{
|
|
esUntouched,
|
|
esLower,
|
|
esUpper,
|
|
esFirstUpper,
|
|
esFirst2Upper,
|
|
esDone
|
|
} curSpelling;
|
|
|
|
curSpelling = esUntouched;
|
|
while (curSpelling != esDone)
|
|
{
|
|
strncpy(szEntity, s, MAX_ENTITY_LEN);
|
|
switch (curSpelling)
|
|
{
|
|
case esLower:
|
|
for (i = 0; i < cbLen; i++)
|
|
szEntity[i] = tolower(szEntity[i]);
|
|
break;
|
|
case esUpper:
|
|
for (i = 0; i < cbLen; i++)
|
|
szEntity[i] = toupper(szEntity[i]);
|
|
break;
|
|
case esFirstUpper:
|
|
// NOTE, this will still work if cbLen < 2, but be wasted effort
|
|
// this is low frequency (like never) so why waste code
|
|
szEntity[0] = toupper(szEntity[0]);
|
|
for (i = 1; i < cbLen; i++)
|
|
szEntity[i] = tolower(szEntity[i]);
|
|
break;
|
|
case esFirst2Upper:
|
|
// NOTE, this will still work if cbLen < 3, but be wasted effort
|
|
// this is low frequency (like never) so why waste code
|
|
szEntity[0] = toupper(szEntity[0]);
|
|
szEntity[1] = toupper(szEntity[1]);
|
|
for (i = 2; i < cbLen; i++)
|
|
szEntity[i] = tolower(szEntity[i]);
|
|
break;
|
|
}
|
|
cbPrefix = cbLen;
|
|
while (cbPrefix)
|
|
{
|
|
szEntity[cbPrefix] = '\0';
|
|
for (low = 0, high = context->dtd->number_of_entities;
|
|
high > low;
|
|
diff < 0 ? (low = i + 1) : (high = i))
|
|
{ /* Binary search */
|
|
i = (low + (high - low) / 2);
|
|
diff = strcmp(entities[i], szEntity); /* Case sensitive! */
|
|
if (diff == 0)
|
|
{ /* success: found it */
|
|
#ifdef FEATURE_INTL
|
|
// CharacterEntity character conflict with DBCS character.
|
|
// So it's forced changing to ANSI font.
|
|
//
|
|
HTTag *t, *save;
|
|
BOOL fNeedsChangeFont=FALSE;
|
|
|
|
if(context->tw->w3doc
|
|
&& GETMIMECP(context->tw->w3doc) != 1252)
|
|
{
|
|
save = context->current_tag;
|
|
if(fNeedsChangeFont = ((UCHAR)*(context->dtd->entity_values[i]) >= (UCHAR)'\200'))
|
|
{
|
|
t = SGMLFindTag(context->dtd, "ENTITY");
|
|
context->current_tag = t;
|
|
start_element(context);
|
|
}
|
|
}
|
|
#endif
|
|
(*context->actions->put_entity) (context->target, i);
|
|
#ifdef FEATURE_INTL
|
|
if(fNeedsChangeFont){
|
|
end_element(context, context->current_tag);
|
|
context->current_tag = save;
|
|
}
|
|
#endif
|
|
if (cbPrefix == cbSLen) return TRUE;
|
|
goto exitWhile;
|
|
}
|
|
}
|
|
cbPrefix--;
|
|
}
|
|
curSpelling++;
|
|
}
|
|
|
|
PUTC('&');
|
|
/* If entity string not found, display as text */
|
|
XX_DMsg(DBG_SGML, ("SGML: Unknown entity %s\n", s));
|
|
|
|
exitWhile:
|
|
|
|
{
|
|
CONST char *p;
|
|
for (p = s + cbPrefix; *p; p++)
|
|
{
|
|
PUTC(*p);
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
/* Handle an entity inside an attribute value. If the parameter
|
|
points to an entity, it replaces it with the fixed up value. */
|
|
PRIVATE BOOL SGML_HandleValueEntity(HTChunk *string, const SGML_dtd *dtd, int index)
|
|
{
|
|
char buf[256];
|
|
int value;
|
|
|
|
XX_Assert((string->data[index] == '&'), ("Entity didn't start with '&'!"));
|
|
|
|
if (string->size <= index || string->size > index + 254)
|
|
{
|
|
/* There's no way this is a valid entity! */
|
|
return FALSE;
|
|
}
|
|
|
|
memcpy(buf, string->data + index, string->size - index);
|
|
buf[string->size - index] = '\0';
|
|
|
|
if (buf[0] == '#')
|
|
{
|
|
if (string->size <= index + 1)
|
|
{
|
|
/* There was no number given! */
|
|
return FALSE;
|
|
}
|
|
else
|
|
{
|
|
value = atoi(buf + 1);
|
|
if (value)
|
|
{
|
|
string->data[index] = (char) value;
|
|
string->size = index + 1;
|
|
}
|
|
else
|
|
{
|
|
return FALSE;
|
|
}
|
|
}
|
|
return TRUE;
|
|
}
|
|
else
|
|
{
|
|
CONST char **entities = dtd->entity_names;
|
|
CONST char *s;
|
|
int high, low, i, diff;
|
|
BOOL bFound;
|
|
|
|
s = buf + 1;
|
|
bFound = FALSE;
|
|
for (low = 0, high = dtd->number_of_entities;
|
|
high > low;
|
|
diff < 0 ? (low = i + 1) : (high = i))
|
|
{ /* Binary serach */
|
|
i = (low + (high - low) / 2);
|
|
diff = strcmp(entities[i], s); /* Csse sensitive! */
|
|
if (diff == 0)
|
|
{
|
|
strcpy(string->data + index, dtd->entity_values[i]);
|
|
string->size = index + strlen(dtd->entity_values[i]);
|
|
bFound = TRUE;
|
|
break;
|
|
}
|
|
}
|
|
if (!bFound)
|
|
{
|
|
XX_DMsg(DBG_SGML, ("SGML: Unknown entity %s\n", s));
|
|
}
|
|
|
|
return bFound;
|
|
}
|
|
}
|
|
|
|
/* End element
|
|
** -----------
|
|
*/
|
|
PRIVATE void end_element(HTStream * context, HTTag * old_tag)
|
|
{
|
|
HTElement *N;
|
|
HTTag *t;
|
|
BOOL bHeader;
|
|
|
|
/*
|
|
TODO when we have strict HTML flag and reporting,
|
|
put some warnings in appropriate places here.
|
|
*/
|
|
XX_DMsg(DBG_SGML, ("SGML: End </%s>\n", old_tag->name));
|
|
if (old_tag->contents == SGML_EMPTY)
|
|
{
|
|
XX_DMsg(DBG_SGML, ("SGML: Illegal end tag </%s> found.\n",
|
|
old_tag->name));
|
|
return;
|
|
}
|
|
if (old_tag->contents == SGML_NEST)
|
|
{
|
|
(*context->actions->end_element) (context->target, old_tag - context->dtd->tags);
|
|
return;
|
|
}
|
|
|
|
/* Make sure that the tag exists in the stack at all, so that we
|
|
don't close all open tags */
|
|
bHeader = old_tag->tagclass == HTTAG_HEADER;
|
|
for (N = context->element_stack; N; N = N->next)
|
|
{
|
|
if (old_tag == N->tag ||
|
|
(old_tag->tagclass == N->tag->tagclass && bHeader))
|
|
break;
|
|
}
|
|
if (!N)
|
|
{
|
|
XX_DMsg(DBG_SGML, ("SGML: Found </%s> with no <%s>.\n", old_tag->name, old_tag->name));
|
|
return;
|
|
}
|
|
|
|
while (context->element_stack)
|
|
{ /* Loop is error path only */
|
|
N = context->element_stack;
|
|
t = N->tag;
|
|
if (old_tag != t &&
|
|
(! (old_tag->tagclass == t->tagclass && bHeader)))
|
|
{ /* Mismatch: syntax error */
|
|
|
|
if (context->element_stack->next)
|
|
{ /* This is not the last level */
|
|
XX_DMsg(DBG_SGML,
|
|
("SGML: Found </%s> when expecting </%s>. </%s> assumed.\n",
|
|
old_tag->name, t->name, t->name));
|
|
}
|
|
else
|
|
{ /* last level */
|
|
/* This should never happen, since it should have been caught before
|
|
we entered this loop. */
|
|
XX_DMsg(DBG_SGML,
|
|
("SGML: Found </%s> when expecting </%s>. </%s> Ignored.\n",
|
|
old_tag->name, t->name, old_tag->name));
|
|
return; /* Ignore */
|
|
}
|
|
}
|
|
|
|
context->element_stack = N->next; /* Remove from stack */
|
|
GTR_FREE(N);
|
|
(*context->actions->end_element) (context->target,
|
|
t - context->dtd->tags);
|
|
if (old_tag == t ||
|
|
(old_tag->tagclass == t->tagclass && bHeader))
|
|
return; /* Correct sequence */
|
|
|
|
/* Syntax error path only */
|
|
}
|
|
XX_DMsg(DBG_SGML,
|
|
("SGML: Extra end tag </%s> found and ignored.\n", old_tag->name));
|
|
}
|
|
|
|
|
|
/* Start a element
|
|
*/
|
|
PRIVATE void start_element(HTStream * context)
|
|
{
|
|
HTTag *new_tag = context->current_tag;
|
|
|
|
XX_DMsg(DBG_SGML, ("SGML: Start <%s>\n", new_tag->name));
|
|
|
|
(*context->actions->start_element) (
|
|
context->target,
|
|
new_tag - context->dtd->tags,
|
|
context->present,
|
|
(CONST char **) context->value); /* coerce type for think c */
|
|
if (new_tag->contents != SGML_EMPTY && new_tag->contents != SGML_NEST)
|
|
{ /* i.e. tag not empty */
|
|
HTElement *N = (HTElement *) GTR_MALLOC(sizeof(HTElement));
|
|
if (N)
|
|
{
|
|
N->next = context->element_stack;
|
|
N->tag = new_tag;
|
|
context->element_stack = N;
|
|
}
|
|
else
|
|
{
|
|
/* TODO */
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* Find Tag in DTD tag list
|
|
** ------------------------
|
|
**
|
|
** On entry,
|
|
** dtd points to dtd structire including valid tag list
|
|
** string points to name of tag in question
|
|
**
|
|
** On exit,
|
|
** returns:
|
|
** NULL tag not found
|
|
** else address of tag structure in dtd
|
|
*/
|
|
PUBLIC HTTag *SGMLFindTag(CONST SGML_dtd * dtd, CONST char *string)
|
|
{
|
|
int high, low, i, diff;
|
|
for (low = 0, high = dtd->number_of_tags;
|
|
high > low;
|
|
diff < 0 ? (low = i + 1) : (high = i))
|
|
{ /* Binary serach */
|
|
i = (low + (high - low) / 2);
|
|
diff = strcasecomp(dtd->tags[i].name, string); /* Case insensitive */
|
|
if (diff == 0)
|
|
{ /* success: found it */
|
|
return &dtd->tags[i];
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/*________________________________________________________________________
|
|
** Public Methods
|
|
*/
|
|
|
|
|
|
/* Could check that we are back to bottom of stack! @@ */
|
|
|
|
PRIVATE void SGML_free(HTStream * context, DCACHETIME dctExpires, DCACHETIME dctLastModif)
|
|
{
|
|
int cnt;
|
|
char *pool;
|
|
int cbLength;
|
|
int cbChar;
|
|
int cbPosSync;
|
|
int depth;
|
|
|
|
// Attempt to recovery from unterminated comments, which are quite common, due to
|
|
// Netscape's loose attitude. Unsafe HTML!
|
|
|
|
while (context->state == S_comment || context->state == S_commentdash || context->state == S_commentdashdash)
|
|
{
|
|
if (context->cs_source == NULL) break;
|
|
pool = CS_GetPool(context->cs_source);
|
|
cbLength = CS_GetLength(context->cs_source);
|
|
if (cbLength == 0) break;
|
|
|
|
depth = 0;
|
|
cbPosSync = -1;
|
|
context->fInRecovery = TRUE;
|
|
for (cbChar = context->extra; cbChar < cbLength; cbChar++)
|
|
{
|
|
switch (pool[cbChar])
|
|
{
|
|
case '>':
|
|
depth--;
|
|
if (depth < 0) cbPosSync = cbChar;
|
|
break;
|
|
case '<':
|
|
depth++;
|
|
if (cbPosSync >= 0) goto breakFor;
|
|
break;
|
|
}
|
|
}
|
|
breakFor:
|
|
if (cbPosSync < 0) break;
|
|
context->state = S_text;
|
|
for (cbChar = cbPosSync + 1; cbChar < cbLength; cbChar++)
|
|
{
|
|
if (pool[cbChar] != LF)
|
|
{
|
|
context->cbRecovery = cbChar;
|
|
SGML_character(context, pool[cbChar]);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (context->fDCache)
|
|
UpdateStreamDCache(context, dctExpires, dctLastModif, /*fAbort=*/FALSE, context->tw);
|
|
|
|
while (context->element_stack)
|
|
{ /* Make sure, that all tags are gone */
|
|
HTElement *ptr = context->element_stack;
|
|
|
|
XX_DMsg(DBG_SGML, ("SGML: Non-matched tag found: <%s>\n",
|
|
context->element_stack->tag->name));
|
|
context->element_stack = ptr->next;
|
|
GTR_FREE(ptr);
|
|
}
|
|
(*context->actions->free) (context->target);
|
|
HTChunkFree(context->string);
|
|
for (cnt = 0; cnt < MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
|
|
if (context->value[cnt])
|
|
GTR_FREE(context->value[cnt]);
|
|
|
|
/* Show the transfer as being completed, even if the expected length didn't match the number
|
|
of bytes we actually got. */
|
|
if (context->expected_length)
|
|
WAIT_SetTherm(context->tw, context->expected_length);
|
|
GTR_FREE(context);
|
|
}
|
|
|
|
PRIVATE void SGML_abort(HTStream * context, HTError e)
|
|
{
|
|
int cnt;
|
|
DCACHETIME dct={0,0};
|
|
|
|
if (context->fDCache)
|
|
UpdateStreamDCache(context, dct, dct, /*fAbort=*/TRUE, context->tw);
|
|
|
|
while (context->element_stack)
|
|
{ /* Make sure, that all tags are gone */
|
|
HTElement *ptr = context->element_stack;
|
|
|
|
XX_DMsg(DBG_SGML, ("SGML: Non-matched tag found: <%s>\n",
|
|
context->element_stack->tag->name));
|
|
context->element_stack = ptr->next;
|
|
GTR_FREE(ptr);
|
|
}
|
|
(*context->actions->abort) (context->target, e);
|
|
HTChunkFree(context->string);
|
|
for (cnt = 0; cnt < MAX_ATTRIBUTES; cnt++) /* Leak fix Henrik 18/02-94 */
|
|
if (context->value[cnt])
|
|
GTR_FREE(context->value[cnt]);
|
|
GTR_FREE(context);
|
|
}
|
|
|
|
|
|
/* Read and write user callback handle
|
|
** -----------------------------------
|
|
**
|
|
** The callbacks from the SGML parser have an SGML context parameter.
|
|
** These calls allow the caller to associate his own context with a
|
|
** particular SGML context.
|
|
*/
|
|
|
|
#ifdef CALLERDATA
|
|
PUBLIC void *SGML_callerData(HTStream * context)
|
|
{
|
|
return context->callerData;
|
|
}
|
|
|
|
PUBLIC void SGML_setCallerData(HTStream * context, void *data)
|
|
{
|
|
context->callerData = data;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* SGML_patchurl
|
|
* -------------
|
|
*
|
|
* called when we terminate quoted attribute value prematurely by detecting
|
|
* a '>'. this indicates mismatched quotes and the url field of the tag
|
|
* has likely consumed the following attribute name. we attempt to delete
|
|
* that from the end of the src URL.
|
|
*
|
|
* case 1: <img src="foo alt="bar">
|
|
* case 2: <img src="foo" alt="bar>
|
|
* case 3: <img src="foo ismap>
|
|
* case 4: <img src="foo>
|
|
*
|
|
* we look for ...{whitespace}+ATTRIBUTENAME{whitespace}*[=], where
|
|
* ATTRIBUTENAME is one of those valid for img tag
|
|
*/
|
|
PRIVATE void SGML_patchurl(HTStream *context)
|
|
{
|
|
char *p;
|
|
char *wsp;
|
|
char *attr;
|
|
char chSave;
|
|
int attrId;
|
|
int urlIdx;
|
|
|
|
XX_DMsg(DBG_SGML, ("SGML: patch <%s>\n", context->current_tag));
|
|
switch (context->current_tag - context->dtd->tags)
|
|
{
|
|
case HTML_A:
|
|
urlIdx = HTML_A_HREF;
|
|
break;
|
|
case HTML_IMG:
|
|
urlIdx = HTML_IMG_SRC;
|
|
break;
|
|
case HTML_BGSOUND:
|
|
urlIdx = HTML_BGSOUND_SRC;
|
|
break;
|
|
case HTML_BODY:
|
|
urlIdx = HTML_BODY_BACKGROUND;
|
|
break;
|
|
case HTML_FETCH:
|
|
urlIdx = HTML_FETCH_SRC;
|
|
break;
|
|
case HTML_FORM:
|
|
urlIdx = HTML_FORM_ACTION;
|
|
break;
|
|
case HTML_INPUT:
|
|
urlIdx = HTML_INPUT_SRC;
|
|
break;
|
|
case HTML_ISINDEX:
|
|
urlIdx = HTML_ISINDEX_ACTION;
|
|
break;
|
|
default:
|
|
return;
|
|
}
|
|
if (context->present[urlIdx] && (wsp = context->value[urlIdx]))
|
|
{
|
|
while (*wsp && !WHITE(*wsp))
|
|
wsp++;
|
|
if (!*wsp) return;
|
|
attr = wsp;
|
|
while (*attr && WHITE(*attr))
|
|
attr++;
|
|
if (!*attr) return;
|
|
p = attr;
|
|
while (*p && (!WHITE(*p)) && *p != '=')
|
|
p++;
|
|
chSave = *p;
|
|
*p = '\0';
|
|
attrId = find_attribute_name(context, attr);
|
|
*p = chSave;
|
|
if (attrId != INVALID)
|
|
*wsp = '\0';
|
|
}
|
|
}
|
|
|
|
|
|
// NOTE this procedure is not win32 thread safe (it has a static)
|
|
PRIVATE BOOL SGML_character(HTStream * context, char c)
|
|
{
|
|
BOOL bStatus;
|
|
CONST SGML_dtd *dtd = context->dtd;
|
|
HTChunk *string = context->string;
|
|
static int reentrant = 0;
|
|
static const char szCRLF[3] = "\015\012";
|
|
|
|
if (reentrant++ == 0)
|
|
{
|
|
if (!context->fInRecovery)
|
|
{
|
|
context->bytes_processed++;
|
|
if (context->expected_length)
|
|
WAIT_SetTherm(context->tw, context->bytes_processed);
|
|
|
|
if (context->actions->add_source)
|
|
{
|
|
switch (c)
|
|
{
|
|
case LF:
|
|
switch (context->crlf_state)
|
|
{
|
|
case CRLF_None:
|
|
case LF_Seen:
|
|
context->actions->add_source(context->target, szCRLF, 2);
|
|
context->crlf_state = LF_Seen;
|
|
break;
|
|
case CR_Seen:
|
|
context->crlf_state = CRLF_None;
|
|
break;
|
|
}
|
|
break;
|
|
case CR:
|
|
switch (context->crlf_state)
|
|
{
|
|
case CR_Seen:
|
|
case CRLF_None:
|
|
context->actions->add_source(context->target, szCRLF, 2);
|
|
context->crlf_state = CR_Seen;
|
|
break;
|
|
case LF_Seen:
|
|
context->crlf_state = CRLF_None;
|
|
break;
|
|
}
|
|
break;
|
|
default:
|
|
context->actions->add_source(context->target, &c, 1);
|
|
context->crlf_state = CRLF_None;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (c == 0)
|
|
{
|
|
if (context->state != S_EOF) context->extra = context->state;
|
|
context->state = S_EOF;
|
|
}
|
|
|
|
restate:
|
|
switch (context->state)
|
|
{
|
|
case S_EOF:
|
|
if (c != 0)
|
|
{
|
|
context->state = context->extra;
|
|
goto restate;
|
|
}
|
|
break;
|
|
case S_text:
|
|
#ifdef ISO_2022_JP
|
|
if (c == '\033')
|
|
{
|
|
context->state = S_esc;
|
|
PUTC(c);
|
|
break;
|
|
}
|
|
#endif /* ISO_2022_JP */
|
|
if (c == '&' && (!context->element_stack || (
|
|
context->element_stack->tag &&
|
|
(context->element_stack->tag->contents == SGML_MIXED
|
|
||context->element_stack->tag->contents == SGML_LITERAL
|
|
|| context->element_stack->tag->contents ==
|
|
SGML_RCDATA)
|
|
)))
|
|
{
|
|
string->size = 0;
|
|
context->state = S_ero;
|
|
|
|
}
|
|
else if (c == '<')
|
|
{
|
|
string->size = 0;
|
|
context->state = (context->element_stack &&
|
|
context->element_stack->tag &&
|
|
context->element_stack->tag->contents == SGML_LITERAL) ?
|
|
S_literal : S_tag;
|
|
}
|
|
else
|
|
PUTC(c);
|
|
break;
|
|
|
|
#ifdef ISO_2022_JP
|
|
case S_esc:
|
|
if (c == '$')
|
|
{
|
|
context->state = S_dollar;
|
|
}
|
|
else if (c == '(')
|
|
{
|
|
context->state = S_paren;
|
|
}
|
|
else
|
|
{
|
|
context->state = S_text;
|
|
}
|
|
PUTC(c);
|
|
break;
|
|
case S_dollar:
|
|
if (c == '@' || c == 'B')
|
|
{
|
|
context->state = S_nonascii_text;
|
|
}
|
|
else
|
|
{
|
|
context->state = S_text;
|
|
}
|
|
PUTC(c);
|
|
break;
|
|
case S_paren:
|
|
if (c == 'B' || c == 'J')
|
|
{
|
|
context->state = S_text;
|
|
}
|
|
else
|
|
{
|
|
context->state = S_text;
|
|
}
|
|
PUTC(c);
|
|
break;
|
|
case S_nonascii_text:
|
|
if (c == '\033')
|
|
{
|
|
context->state = S_esc;
|
|
PUTC(c);
|
|
}
|
|
else
|
|
{
|
|
PUTC(c);
|
|
}
|
|
break;
|
|
#endif /* ISO_2022_JP */
|
|
|
|
/* In literal mode, waits only for specific end tag!
|
|
** Only foir compatibility with old servers.
|
|
*/
|
|
case S_literal:
|
|
HTChunkPutc(string, c);
|
|
if (TOUPPER(c) != ((string->size == 1) ? '/'
|
|
: context->element_stack->tag->name[string->size - 2]))
|
|
{
|
|
int i;
|
|
|
|
/* If complete match, end literal */
|
|
if ((c == '>') && (!context->element_stack->tag->name[string->size - 2]))
|
|
{
|
|
end_element(context, context->element_stack->tag);
|
|
string->size = 0;
|
|
context->current_attribute_number = INVALID;
|
|
context->state = S_text;
|
|
break;
|
|
} /* If Mismatch: recover string. */
|
|
PUTC('<');
|
|
for (i = 0; i < string->size; i++) /* recover */
|
|
PUTC(string->data[i]);
|
|
context->state = S_text;
|
|
}
|
|
|
|
break;
|
|
|
|
/* Character reference or Entity
|
|
*/
|
|
case S_ero:
|
|
if (c == '#')
|
|
{
|
|
context->state = S_cro; /* &# is Char Ref Open */
|
|
break;
|
|
}
|
|
context->state = S_entity; /* Fall through! */
|
|
|
|
/* Handle Entities
|
|
*/
|
|
case S_entity:
|
|
if (isalnum(c))
|
|
HTChunkPutc(string, c);
|
|
else
|
|
{
|
|
HTChunkTerminate(string);
|
|
bStatus = handle_entity(context);
|
|
context->state = S_text;
|
|
|
|
#ifdef FEATURE_INTL
|
|
// If c == '<', we have to handle this character reentrantly.
|
|
if (!bStatus
|
|
|| (context->tw->w3doc
|
|
&& IsFECodePage(GETMIMECP(context->tw->w3doc))
|
|
&& c == '<'))
|
|
#else
|
|
if (!bStatus)
|
|
#endif
|
|
{
|
|
/* Call ourselves reentrantly to handle the terminator
|
|
of an illegal entity */
|
|
SGML_character(context, c);
|
|
}
|
|
}
|
|
break;
|
|
|
|
/* Character reference
|
|
*/
|
|
case S_cro:
|
|
if (isalnum(c))
|
|
HTChunkPutc(string, c); /* accumulate a character NUMBER */
|
|
else
|
|
{
|
|
int value;
|
|
HTChunkTerminate(string);
|
|
value = atoi(string->data);
|
|
if (value)
|
|
{
|
|
PUTC((char) value);
|
|
}
|
|
context->state = S_text;
|
|
if ( c == '<' ) // go ahead and handle start tag, even though no delimeter used
|
|
{
|
|
/* Call ourselves reentrantly to handle the terminator */
|
|
SGML_character(context, c);
|
|
}
|
|
}
|
|
break;
|
|
|
|
/* Tag
|
|
*/
|
|
case S_tag: /* new tag */
|
|
if (isalnum(c))
|
|
HTChunkPutc(string, c);
|
|
else if (c == '!')
|
|
context->state = S_bang;
|
|
else
|
|
{ /* End of tag name */
|
|
HTTag *t;
|
|
if (c == '/')
|
|
{
|
|
context->state = S_end;
|
|
break;
|
|
}
|
|
HTChunkTerminate(string);
|
|
|
|
t = SGMLFindTag(dtd, string->data);
|
|
if (!t)
|
|
{
|
|
XX_DMsg(DBG_SGML, ("SGML: *** Unknown element %s\n",
|
|
string->data));
|
|
context->state = (c == '>') ? S_text : S_junk_tag;
|
|
break;
|
|
}
|
|
context->current_tag = t;
|
|
|
|
/* Clear out attributes
|
|
*/
|
|
|
|
{
|
|
int i;
|
|
for (i = 0; i < context->current_tag->number_of_attributes; i++)
|
|
context->present[i] = NO;
|
|
}
|
|
string->size = 0;
|
|
context->current_attribute_number = INVALID;
|
|
|
|
if (c == '>')
|
|
{
|
|
if (context->current_tag->name)
|
|
start_element(context);
|
|
context->state = S_text;
|
|
}
|
|
else
|
|
{
|
|
context->state = S_tag_gap;
|
|
}
|
|
}
|
|
break;
|
|
|
|
|
|
case S_tag_gap: /* Expecting attribute or > */
|
|
if (WHITE(c))
|
|
break; /* Gap between attributes */
|
|
if (c == '>')
|
|
{ /* End of tag */
|
|
if (context->current_tag->name)
|
|
start_element(context);
|
|
context->state = S_text;
|
|
break;
|
|
}
|
|
HTChunkPutc(string, c);
|
|
context->state = S_attr; /* Get attribute */
|
|
break;
|
|
|
|
/* accumulating value */
|
|
case S_attr:
|
|
if (WHITE(c) || (c == '>') || (c == '='))
|
|
{ /* End of word */
|
|
HTChunkTerminate(string);
|
|
handle_attribute_name(context, string->data);
|
|
string->size = 0;
|
|
if (c == '>')
|
|
{ /* End of tag */
|
|
if (context->current_tag->name)
|
|
start_element(context);
|
|
context->state = S_text;
|
|
break;
|
|
}
|
|
context->state = (c == '=' ? S_equals : S_attr_gap);
|
|
}
|
|
else
|
|
{
|
|
HTChunkPutc(string, c);
|
|
}
|
|
break;
|
|
|
|
case S_attr_gap: /* Expecting attribute or = or > */
|
|
if (WHITE(c))
|
|
break; /* Gap after attribute */
|
|
if (c == '>')
|
|
{ /* End of tag */
|
|
if (context->current_tag->name)
|
|
start_element(context);
|
|
context->state = S_text;
|
|
break;
|
|
}
|
|
else if (c == '=')
|
|
{
|
|
context->state = S_equals;
|
|
break;
|
|
}
|
|
HTChunkPutc(string, c);
|
|
context->state = S_attr; /* Get next attribute */
|
|
break;
|
|
|
|
case S_equals: /* After attr = */
|
|
if (WHITE(c))
|
|
break; /* Before attribute value */
|
|
if (c == '>')
|
|
{ /* End of tag */
|
|
XX_DMsg(DBG_SGML, ("SGML: found = but no value\n"));
|
|
if (context->current_tag->name)
|
|
start_element(context);
|
|
context->state = S_text;
|
|
break;
|
|
|
|
}
|
|
else if (c == '\'')
|
|
{
|
|
context->state = S_squoted;
|
|
break;
|
|
|
|
}
|
|
else if (c == '"')
|
|
{
|
|
context->state = S_dquoted;
|
|
break;
|
|
}
|
|
HTChunkPutc(string, c);
|
|
context->state = S_value;
|
|
break;
|
|
|
|
case S_value:
|
|
if (WHITE(c) || (c == '>'))
|
|
{ /* End of word */
|
|
HTChunkTerminate(string);
|
|
handle_attribute_value(context, string->data);
|
|
string->size = 0;
|
|
if (c == '>')
|
|
{ /* End of tag */
|
|
if (context->current_tag->name)
|
|
start_element(context);
|
|
context->state = S_text;
|
|
break;
|
|
}
|
|
else
|
|
context->state = S_tag_gap;
|
|
}
|
|
else if (c == '&')
|
|
{
|
|
/* Entities inside attribute values are not handled
|
|
identically to ones in regular markup, mostly for ease
|
|
of coding. The 2.0 spec requires that entities end in
|
|
a semicolon. Here we are a little more lenient and
|
|
assume as a terminator anything other than an alpha-
|
|
numeric character or a '#'. */
|
|
context->extra = string->size; /* Save where entity starts */
|
|
HTChunkPutc(string, c);
|
|
context->old_state = context->state;
|
|
context->state = S_invalueent;
|
|
}
|
|
else
|
|
{
|
|
HTChunkPutc(string, c);
|
|
}
|
|
break;
|
|
|
|
case S_squoted: /* Quoted attribute value */
|
|
case S_dquoted: /* Quoted attribute value */
|
|
// CMF: many htmls expect end of tag to end quoted string. CF: HTML Spec
|
|
// 3.4.3: Attributes
|
|
if ((context->state == S_squoted && c == '\'') ||
|
|
(context->state == S_dquoted && c == '"') ||
|
|
(c == '>'))
|
|
{
|
|
/* End of attribute value */
|
|
HTChunkTerminate(string);
|
|
handle_attribute_value(context, string->data);
|
|
string->size = 0;
|
|
context->state = S_tag_gap;
|
|
if (c == '>')
|
|
{
|
|
/* Patch up src attribute if present to deal with mismatched
|
|
quotes */
|
|
SGML_patchurl(context);
|
|
/* Call ourselves reentrantly to handle the terminator
|
|
of an illegal entity */
|
|
SGML_character(context, c);
|
|
}
|
|
}
|
|
else if (string->size < MAX_ATTR_LENGTH)
|
|
{
|
|
/* We restrict an attribute value to a finite length,
|
|
since if it goes longer than that it's probably an
|
|
error anyway. */
|
|
if (c == '&')
|
|
{
|
|
/* Entities inside attribute values are not handled
|
|
identically to ones in regular markup, mostly for ease
|
|
of coding. The 2.0 spec requires that entities end in
|
|
a semicolon. Here we are a little more lenient and
|
|
assume as a terminator anything other than an alpha-
|
|
numeric character or a '#'. */
|
|
context->extra = string->size; /* Save where entity starts */
|
|
HTChunkPutc(string, c);
|
|
context->old_state = context->state;
|
|
context->state = S_invalueent;
|
|
}
|
|
else
|
|
{
|
|
HTChunkPutc(string, c);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case S_invalueent: /* Accumulating entity in an attribute value */
|
|
if (isalnum(c) || c == '#')
|
|
{
|
|
if (string->size < MAX_ATTR_LENGTH)
|
|
{
|
|
/* We restrict an attribute value to a finite length,
|
|
since if it goes longer than that it's probably an
|
|
error anyway. */
|
|
HTChunkPutc(string, c);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
bStatus = SGML_HandleValueEntity(string, context->dtd, context->extra);
|
|
context->state = context->old_state;
|
|
if (!bStatus)
|
|
{
|
|
/* Call ourselves reentrantly to handle the terminator
|
|
of an illegal entity */
|
|
SGML_character(context, c);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case S_end: /* </ */
|
|
if (isalnum(c))
|
|
HTChunkPutc(string, c);
|
|
else
|
|
{ /* End of end tag name */
|
|
HTTag *t;
|
|
HTChunkTerminate(string);
|
|
if (!*string->data)
|
|
{ /* Empty end tag */
|
|
if (context->element_stack)
|
|
t = context->element_stack->tag;
|
|
else
|
|
t = NULL;
|
|
}
|
|
else
|
|
{
|
|
t = SGMLFindTag(dtd, string->data);
|
|
}
|
|
if (!t)
|
|
{
|
|
XX_DMsg(DBG_SGML, ("Unknown end tag </%s>\n", string->data ? string->data : ""));
|
|
}
|
|
else
|
|
{
|
|
context->current_tag = t;
|
|
end_element(context, context->current_tag);
|
|
}
|
|
|
|
string->size = 0;
|
|
context->current_attribute_number = INVALID;
|
|
if (c != '>')
|
|
{
|
|
context->state = S_junk_tag;
|
|
}
|
|
else
|
|
{
|
|
context->state = S_text;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case S_junk_tag:
|
|
if (c == '>')
|
|
{
|
|
context->state = S_text;
|
|
}
|
|
break;
|
|
|
|
case S_bang:
|
|
/* We've gotten "<!" */
|
|
if (c == '-')
|
|
context->state = S_bangdash;
|
|
else if (c == '>')
|
|
{
|
|
XX_DMsg(DBG_SGML, ("Malformed comment: '<!>'\n"));
|
|
context->state = S_text;
|
|
}
|
|
else
|
|
{
|
|
XX_DMsg(DBG_SGML, ("Malformed comment: '<!%c...'\n", c));
|
|
context->state = S_junk_tag;
|
|
}
|
|
break;
|
|
|
|
case S_bangdash:
|
|
/* We've gotten "<!-" */
|
|
if (c == '-')
|
|
{
|
|
context->state = S_comment;
|
|
context->extra = context->fInRecovery ?
|
|
context->cbRecovery : (context->cs_source ? CS_GetLength(context->cs_source) : 0);
|
|
}
|
|
else if (c == '>')
|
|
{
|
|
XX_DMsg(DBG_SGML, ("Malformed comment: '<!->'\n"));
|
|
context->state = S_text;
|
|
}
|
|
else
|
|
{
|
|
XX_DMsg(DBG_SGML, ("Malformed comment: '<!-%c...'\n", c));
|
|
context->state = S_junk_tag;
|
|
}
|
|
break;
|
|
|
|
case S_comment:
|
|
/* We've gotten "<!--", and are looking for "-->" */
|
|
if (c == '-')
|
|
context->state = S_commentdash;
|
|
break;
|
|
|
|
case S_commentdash:
|
|
if (c == '-')
|
|
context->state = S_commentdashdash;
|
|
else if (c == '>') // Relaxed rule: allow "->" to end a comment
|
|
context->state = S_text;
|
|
else
|
|
context->state = S_comment;
|
|
break;
|
|
|
|
case S_commentdashdash:
|
|
if ( c == '!' )
|
|
context->state = S_commentdashdashbang;
|
|
else if (c == '>')
|
|
context->state = S_text;
|
|
else if (c != '-')
|
|
context->state = S_comment;
|
|
break;
|
|
|
|
case S_commentdashdashbang:
|
|
if (c == '>')
|
|
context->state = S_text;
|
|
else
|
|
context->state = S_comment;
|
|
break;
|
|
} /* switch on context->state */
|
|
reentrant--;
|
|
return TRUE;
|
|
} /* SGML_character */
|
|
|
|
|
|
PRIVATE BOOL SGML_string(HTStream * context, CONST char *str)
|
|
{
|
|
CONST char *p;
|
|
#ifdef FEATURE_INTL
|
|
CONST char *pPCChar;
|
|
int len = -1;
|
|
|
|
if(context->tw && context->tw->w3doc && aMimeCharSet[context->tw->w3doc->iMimeCharSet].iChrCnv)
|
|
{
|
|
MIMECSETTBL *pMime = aMimeCharSet + context->tw->w3doc->iMimeCharSet;
|
|
|
|
pPCChar = EncodeMBCSString(str, &len, pMime);
|
|
for (p = pPCChar; *p; p++)
|
|
SGML_character(context, *p);
|
|
GTR_FREE((UCHAR *)pPCChar);
|
|
}
|
|
else
|
|
#endif
|
|
for (p = str; *p; p++)
|
|
SGML_character(context, *p);
|
|
/* Technically this should check the return values from SGML_character()
|
|
but as of now that function always returns TRUE so I don't bother. */
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
PRIVATE BOOL SGML_write(HTStream * context, CONST char *str, int l, BOOL fDCache)
|
|
{
|
|
CONST char *p;
|
|
CONST char *e = str + l;
|
|
#ifdef FEATURE_INTL
|
|
CONST char *pPCChar;
|
|
int len = l;
|
|
#endif
|
|
|
|
if (fDCache && context->isa->write_dcache)
|
|
(context->isa->write_dcache)(context, str, l);
|
|
|
|
#ifdef FEATURE_INTL
|
|
if(context->tw && context->tw->w3doc && aMimeCharSet[context->tw->w3doc->iMimeCharSet].iChrCnv)
|
|
{
|
|
MIMECSETTBL *pMime = aMimeCharSet + context->tw->w3doc->iMimeCharSet;
|
|
|
|
pPCChar = EncodeMBCSString(str, &len, pMime);
|
|
e = pPCChar + len;
|
|
for (p = pPCChar; p < e; p++)
|
|
SGML_character(context, *p);
|
|
GTR_FREE((UCHAR *)pPCChar);
|
|
}
|
|
else
|
|
#endif
|
|
for (p = str; p < e; p++)
|
|
SGML_character(context, *p);
|
|
/* Technically this should check the return values from SGML_character()
|
|
but as of now that function always returns TRUE so I don't bother. */
|
|
if (*context->actions->block_done)
|
|
{
|
|
/* Notify the target stream that we've done a block's worth. */
|
|
(*context->actions->block_done)(context->target);
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
PRIVATE int SGML_init(struct Mwin *tw, int nState, void **ppInfo)
|
|
{
|
|
struct Params_InitStream *pParams;
|
|
|
|
pParams = (struct Params_InitStream *) *ppInfo;
|
|
|
|
switch (nState)
|
|
{
|
|
case STATE_INIT:
|
|
pParams->request = HTRequest_validate(pParams->request);
|
|
if (pParams->request == NULL)
|
|
{
|
|
*pParams->pResult = -1;
|
|
return STATE_DONE;
|
|
}
|
|
pParams->me->fDCache = pParams->fDCache;
|
|
if (pParams->fDCache)
|
|
{
|
|
#ifdef FEATURE_INTL
|
|
SetFileDCache( tw->w3doc,
|
|
pParams->request->destination->szActualURL,
|
|
pParams->request->content_encoding,
|
|
&pParams->me->fpDc,
|
|
&pParams->me->pszDcFile,
|
|
pParams->atomMIMEType);
|
|
#else
|
|
SetFileDCache( pParams->request->destination->szActualURL,
|
|
pParams->request->content_encoding,
|
|
&pParams->me->fpDc,
|
|
&pParams->me->pszDcFile,
|
|
pParams->atomMIMEType);
|
|
#endif
|
|
pParams->me->format_inDc = pParams->atomMIMEType;
|
|
}
|
|
else
|
|
{
|
|
pParams->me->fpDc = NULL;
|
|
pParams->me->pszDcFile = NULL;
|
|
}
|
|
|
|
*pParams->pResult = 1;
|
|
return STATE_DONE;
|
|
|
|
case STATE_ABORT:
|
|
pParams->request = HTRequest_validate(pParams->request);
|
|
if (pParams->fDCache)
|
|
{
|
|
AbortFileDCache(&pParams->me->fpDc, &pParams->me->pszDcFile);
|
|
pParams->me->fDCache = FALSE; // So SGML_free knows.
|
|
}
|
|
*pParams->pResult = -1;
|
|
return STATE_DONE;
|
|
}
|
|
}
|
|
|
|
PRIVATE void SGML_write_dcache(HTStream *me, CONST char *str, int cb)
|
|
{
|
|
AssertDiskCacheEnabled();
|
|
if (me->fpDc)
|
|
CbWriteDCache(str, 1, cb, &me->fpDc, &me->pszDcFile, NULL, 0, me->tw);
|
|
}
|
|
|
|
/*_______________________________________________________________________
|
|
*/
|
|
|
|
/* Structured Object Class
|
|
** -----------------------
|
|
*/
|
|
PUBLIC HTStreamClass SGMLParser =
|
|
{
|
|
"SGMLParser",
|
|
/* TODO: These strings should really be dependent on the DTD */
|
|
NULL,
|
|
NULL,
|
|
SGML_init,
|
|
SGML_free,
|
|
SGML_abort,
|
|
SGML_character,
|
|
SGML_string,
|
|
SGML_write,
|
|
NULL,
|
|
SGML_write_dcache
|
|
};
|
|
|
|
/* Create SGML Engine
|
|
** ------------------
|
|
**
|
|
** On entry,
|
|
** dtd represents the DTD, along with
|
|
** actions is the sink for the data as a set of routines.
|
|
**
|
|
*/
|
|
|
|
PUBLIC HTStream *SGML_new(struct Mwin *tw, CONST SGML_dtd * dtd, HTStructured * target, HTRequest *request)
|
|
{
|
|
int i;
|
|
HTStream *context = (HTStream *) GTR_MALLOC(sizeof(*context));
|
|
|
|
HTLoadStatusStrings(&SGMLParser,RES_STRING_SGML_NO,RES_STRING_SGML_YES);
|
|
if (!context)
|
|
{
|
|
ERR_SimpleError(tw, errLowMemory, RES_STRING_LOADDOC1);
|
|
return NULL;
|
|
}
|
|
context->expected_length = request->content_length;
|
|
|
|
if (context->expected_length)
|
|
WAIT_SetRange(tw, 0, 100, context->expected_length);
|
|
context->bytes_processed = 0;
|
|
|
|
context->isa = &SGMLParser;
|
|
context->string = HTChunkCreate(128); /* Grow by this much */
|
|
context->dtd = dtd;
|
|
context->target = target;
|
|
context->actions = (HTStructuredClass *) (((HTStream *) target)->isa);
|
|
/* Ugh: no OO */
|
|
context->state = S_text;
|
|
context->crlf_state = CRLF_None;
|
|
context->element_stack = 0; /* empty */
|
|
#ifdef CALLERDATA
|
|
context->callerData = (void *) callerData;
|
|
#endif
|
|
for (i = 0; i < MAX_ATTRIBUTES; i++)
|
|
context->value[i] = 0;
|
|
context->tw = tw;
|
|
context->request = request;
|
|
context->fInRecovery = FALSE;
|
|
context->cs_source = (struct CharStream *) (context->actions->get_source ? ((*context->actions->get_source)(context->target)) : NULL);
|
|
return context;
|
|
}
|