mirror of https://github.com/tongzx/nt5src
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
627 lines
18 KiB
627 lines
18 KiB
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
|
|
#include "extract.h"
|
|
#include "tags.h"
|
|
|
|
|
|
#define SPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
|
|
|
|
/* Output routine error messages */
|
|
char errOut[] = "%s: Error writing to file.\n";
|
|
|
|
/* Standard templated error messages */
|
|
static char errmsg[] = "%s (%u): %s\n";
|
|
|
|
|
|
/* File private functions
|
|
*/
|
|
static WORD CommonGetBlock(NPSourceFile sf, PSTR p);
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
*
|
|
* @api void | OutputTag | Print a tag name to the output file.
|
|
*
|
|
* @parm NPSourceFile | sf | Specifies the source file buffer block.
|
|
*
|
|
* @parm WORD | wBlock | Specifies the current outerlevel block type.
|
|
*
|
|
* @parm WORD | wTag | Gives the index of the tag to print.
|
|
*
|
|
* @comm Prints the innerlevel block tag specified by <p wTag>, as
|
|
* determined from the global tag array. The output tag printed is
|
|
* affected by the current outerlevel block type, so that different
|
|
* outerlevel blocks will generate different output tags for the same
|
|
* input tag.
|
|
*
|
|
* The tag text is followed by a tab character. No output will occur if
|
|
* the global fNoOutput flag is True.
|
|
*
|
|
*/
|
|
void OutputTag(NPSourceFile sf, WORD wBlock, WORD wTag)
|
|
{
|
|
if (fNoOutput)
|
|
return;
|
|
/* Output text, and if error occurs, exit() for now. HACK! */
|
|
putc(TAG, fpOutput);
|
|
|
|
assert(wBlock < NUM_LEVELS);
|
|
|
|
/* Make sure there's a valid output tag to print */
|
|
assert(DocTags[wBlock][wTag] != NULL);
|
|
|
|
if (fputs(DocTags[wBlock][wTag], fpOutput)) {
|
|
fprintf(stderr, errOut, sf->fileEntry->filename);
|
|
exit(4);
|
|
}
|
|
putc('\t', fpOutput);
|
|
}
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api void | OutputTagText | Print a tag to the output file, where
|
|
* the tag is specified by an immediate string.
|
|
*
|
|
* @parm NPSourceFile | sf | Specifies the source file buffer block.
|
|
* @parm PSTR | szTag | Specifies the tag to output.
|
|
*
|
|
* @comm Prints tag <p szTag> to the output file. The tag text is
|
|
* followed by a tab character. <p szTag> should not include the tag
|
|
* prefix character (ie the '@') sign, as this is printed automatically.
|
|
* No output will occur if the global flag fNoOutput is set.
|
|
*
|
|
*/
|
|
void OutputTagText(NPSourceFile sf, PSTR szTag)
|
|
{
|
|
if (fNoOutput)
|
|
return;
|
|
/* Output text, and if error occurs, exit() for now. HACK! */
|
|
putc(TAG, fpOutput);
|
|
|
|
if (fputs(szTag, fpOutput)) {
|
|
fprintf(stderr, errOut, sf->fileEntry->filename);
|
|
exit(4);
|
|
}
|
|
putc('\t', fpOutput);
|
|
}
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api void | OutputRegion | Print the text between the point and
|
|
* the mark, inclusive.
|
|
*
|
|
* @parm NPSourceFile | sf | Specifies the source file buffer block,
|
|
* containing the output buffer, point, and mark.
|
|
* @parm char | chPost | Specifies character with which to output
|
|
* after outputing the text region. Usually a newline. This character
|
|
* is ignored if NULL.
|
|
*
|
|
* @comm Prints the region given by the pt and mark fields of <p sf>.
|
|
* The text printed is inclusive from the point to the mark. The
|
|
* <p chPost> character is appended to the output if <p chPost> is
|
|
* non-NULL (useful for printing newlines or tabs).
|
|
*
|
|
* If a write error occurs, an error message is printed to stderr and
|
|
* the program exited.
|
|
*
|
|
* No output occurs if the global fNoOutput flag is TRUE.
|
|
*
|
|
*/
|
|
void OutputRegion(NPSourceFile sf, char chPost)
|
|
{
|
|
char c;
|
|
|
|
if (fNoOutput)
|
|
return;
|
|
|
|
/* Save char following mark, replace with NULL for printing */
|
|
if (*sf->mark) {
|
|
c = *(sf->mark + 1);
|
|
*(sf->mark + 1) = '\0';
|
|
}
|
|
|
|
if (fputs(sf->pt, fpOutput)) {
|
|
TextOutputError:
|
|
fprintf(stderr, errOut, sf->fileEntry->filename);
|
|
exit(4);
|
|
}
|
|
|
|
/* Send newline if one was asked for */
|
|
if (chPost)
|
|
if (EOF == putc(chPost, fpOutput))
|
|
goto TextOutputError;
|
|
|
|
/* Restored NULLed over character */
|
|
if (*sf->mark)
|
|
*(sf->mark + 1) = c;
|
|
|
|
}
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api void | OutputText | Outputs an arbitrary text string to the
|
|
* output file.
|
|
*
|
|
* @parm NPSourceFile | sf | Specifies the source file buffer block.
|
|
* @parm PSTR | szText | Specifies the text string to print.
|
|
*
|
|
* @comm Prints <p szText> to the output file. If a write error
|
|
* occurs, an error message is printed and the program exited. If the
|
|
* global fNoOutput flag is set, no output occurs. No newlines or other
|
|
* formatting characters are appended to the output.
|
|
*
|
|
*/
|
|
void OutputText(NPSourceFile sf, PSTR szText)
|
|
{
|
|
if (fNoOutput)
|
|
return;
|
|
if (fputs(szText, fpOutput)) {
|
|
TextOutputError:
|
|
fprintf(stderr, errOut, sf->fileEntry->filename);
|
|
exit(4);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api void | CopyRegion | Copies the current region from point to
|
|
* mark inclusive into a null terminated buffer.
|
|
* @parm NPSourceFile | sf | Specifies the source file buffer block.
|
|
* @parm PSTR | buf | Pointer to buffer into which region will be
|
|
* placed.
|
|
* @parm WORD | wLen | Length in bytes of buffer <p buf>.
|
|
*
|
|
* @comm Copies the region from point to mark inclusive into the
|
|
* <p buf>. Up to <p wLen> - 1 characters will be copied, and <p buf> is
|
|
* guaranteed to be NULL terminated.
|
|
*
|
|
*/
|
|
void CopyRegion(NPSourceFile sf, PSTR buf, WORD wLen)
|
|
{
|
|
PSTR p;
|
|
PSTR end;
|
|
|
|
/* Fixup end to smaller of length of buffer, or region to copy */
|
|
end = sf->pt + (int) min(wLen, ((int) (sf->mark - sf->pt)));
|
|
|
|
for (p = sf->pt; *p && p < end; *buf++ = *p++);
|
|
*buf = '\0';
|
|
}
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api BOOL | FindNextTag | Moves the point forward until it points
|
|
* to the next tag in a comment block, and moves the mark to the end of
|
|
* the tag word.
|
|
*
|
|
* @parm NPSourceFile | sf | Specifies the source file buffer block.
|
|
*
|
|
* @rdesc Returns TRUE if a tag was found, or FALSE if no tag was found
|
|
* in the comment block.
|
|
*
|
|
* @comm Starting from the current point, moves the point forward to
|
|
* the next tag in the block. The mark is moved to the end of the tag
|
|
* that is found. If no next tag exists in the buffer, FALSE is
|
|
* returned and the mark and point are undefined.
|
|
*
|
|
* Note that multiple calls to <f FindNextTag> without intervening calls
|
|
* to move the point will cause the same tag to be repeatadly
|
|
* found, as the search for tags begins at the point.
|
|
*
|
|
*/
|
|
BOOL FindNextTag(NPSourceFile sf)
|
|
{
|
|
PSTR p;
|
|
|
|
/* move forward until finding next tag, put point there */
|
|
p = sf->pt;
|
|
BogusNextTag:
|
|
for (; *p && *p != TAG; p++);
|
|
/* Make sure that this is a tag by testing for a \n before the TAG char */
|
|
if (p > sf->lpbuf && *(p-1) != '\n') {
|
|
p++;
|
|
goto BogusNextTag;
|
|
}
|
|
|
|
if (!*p)
|
|
return FALSE; // end of comment block!
|
|
|
|
p++;
|
|
if (!(*p && !SPACE(*p))) {
|
|
sf->mark = p;
|
|
return FALSE;
|
|
}
|
|
|
|
/* save beginning of tag */
|
|
sf->pt = p - 1;
|
|
|
|
/* now move forward until finding next space, set mark there */
|
|
for (; *p && !SPACE(*p); p++);
|
|
sf->mark = p;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api WORD | GetFirstBlock | Moves the point and mark to surround
|
|
* the first block of text following a tag that has been located with
|
|
* <f FindNextTag>.
|
|
*
|
|
* @parm NPSourceFile | sf | Identifies the source file buffer
|
|
* block.
|
|
*
|
|
* @rdesc If the call succeeds, the point is set to the start of the
|
|
* text block that immediately follows the tag. The mark is set to the
|
|
* end of this block, and either RET_ENDTAG or RET_ENDBLOCK is returned,
|
|
* depending on if there are no more blocks in the tag or if there is a block
|
|
* following respectively.
|
|
*
|
|
* If the call fails, the point is set to the start of the next tag or
|
|
* the end of the comment buffer if no more tags exist, and
|
|
* RET_EMPTYBLOCK is returned.
|
|
*
|
|
* In any case, if this function is followed by a call to
|
|
* <f FindNextTag>, no problems will result.
|
|
*
|
|
* @comm This call expects the point to be pointing the beginning of
|
|
* the tag upon entry (as setup by <f FindNextTag>). Error conditions
|
|
* should be checked upon exit from this function.
|
|
*
|
|
*/
|
|
WORD GetFirstBlock(NPSourceFile sf)
|
|
{
|
|
PSTR p;
|
|
|
|
p = sf->pt;
|
|
/* Assumes that I'm on beginning of tag */
|
|
assert(*p == TAG);
|
|
|
|
/* Move forward to first non-whitespace, to skip over tag */
|
|
for (; *p && !SPACE(*p); p++); // skip word
|
|
for (; *p && SPACE(*p); p++); // skip whitespace
|
|
|
|
/* Set point to this location, the beginning of the text */
|
|
sf->pt = p;
|
|
|
|
return CommonGetBlock(sf, p);
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api WORD | GetNextBlock | Moves the point and mark to surround
|
|
* the next block of text of a particular tag.
|
|
*
|
|
* @parm NPSourceFile | sf | Identifies the source file buffer
|
|
* information.
|
|
*
|
|
* @rdesc If the call succeeds, the point is set to the start of the
|
|
* text block that follows the initial mark. The mark upon return is
|
|
* set the end of the next text block. Either RET_ENDTAG or
|
|
* RET_ENDBLOCK is returned.
|
|
*
|
|
* If the call fails due to a non existent block, or encountering the
|
|
* end of the comment buffer, RET_ENDCOMMENT is returned and the point
|
|
* is set to the start of the next tag or the end of the comment
|
|
* buffer.
|
|
*
|
|
* @comm This procedure, in combination with <f GetFirstBlock>, allows
|
|
* the tag reader to step through the text fields associated with a tag.
|
|
* Contiguous calls to <f GetNextBlock> are possible, which will
|
|
* move the region forward to surround each field. If the tag's text
|
|
* fields end prematurely, RET_EMPTYBLOCK will be returned as an error
|
|
* flag.
|
|
*
|
|
* Calls to <f GetNextBlock> may always be followed by a call to
|
|
* <f FindNextTag>.
|
|
*
|
|
*/
|
|
WORD GetNextBlock(NPSourceFile sf)
|
|
{
|
|
PSTR p;
|
|
WORD ret;
|
|
|
|
/* Entry: mark is at end of previous block of text. Move forward
|
|
* to find the start of the next block (the one we want).
|
|
*/
|
|
|
|
p = sf->mark;
|
|
/* If I'm on a block char, then this is an empty block being exited,
|
|
* So we want to not skip whitespace
|
|
*/
|
|
if (*p != BLOCK)
|
|
p++;
|
|
|
|
/* Skip whitespace, till `|' char found */
|
|
for (; *p && SPACE(*p); p++);
|
|
|
|
/* This should be the start of next block. If not, then puke */
|
|
if (*p != BLOCK) {
|
|
sf->pt = sf->mark = p; // reset mark and point for FindNextTag.
|
|
return RET_EMPTYBLOCK;
|
|
}
|
|
|
|
/* Don't bother with END_COMMENT conditions (ie NULL), as CommonGetBlock
|
|
* will return RET_EMPTYBLOCK for this case. The next FindNextTag()
|
|
* will then fail, causing a general comment buffer failure to result!
|
|
*/
|
|
#if 0
|
|
if (!*p) {
|
|
sf->pt = p;
|
|
return RET_ENDCOMMENT;
|
|
}
|
|
#endif
|
|
|
|
/* Skip more whitespace, to start of actual text, set point there */
|
|
/* (if this under EOF, no pt advance is done */
|
|
if (*p) // skip the '|' char if there is one.
|
|
p++;
|
|
for (; *p && SPACE(*p); p++);
|
|
sf->pt = p; // point at beginning of text
|
|
|
|
return CommonGetBlock(sf, p);
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api WORD | CommonGetBlock | Common block searcher routine for use
|
|
* by <f GetFirstBlock> and <f GetNextBlock>.
|
|
*
|
|
* @parm NPSourceFile | sf | Blah.
|
|
* @parm PSTR | p | Point to start searching for the beginning of a
|
|
* text block from.
|
|
*
|
|
* @rdesc Returns RET_ENDBLOCK when there are text blocks following
|
|
* this tag, RET_ENDTAG when no more text blocks follow for this tag, or
|
|
* RET_EMPTYBLOCK when this block has no text. Current region (point to
|
|
* mark inclusive) is set to the selected block.
|
|
*
|
|
* @comm Performs magic. This does the real work for <f GetNextBlock>
|
|
* and <f GetFirstBlock>.
|
|
*
|
|
*/
|
|
static WORD CommonGetBlock(NPSourceFile sf, PSTR p)
|
|
{
|
|
PSTR porig;
|
|
PSTR psave;
|
|
WORD ret;
|
|
|
|
/* Entry: Save the initial p, as this is assumed to be the
|
|
* start of the current block.
|
|
*/
|
|
porig = p;
|
|
|
|
/* Scan forward until end of this block, either @ or | or EOF */
|
|
GetBlockScan:
|
|
for (; *p && !(*p == TAG || *p == BLOCK); p++);
|
|
/* Make sure there isn't an escaped char kicking off the scan */
|
|
if (*p == BLOCK)
|
|
if (p > sf->lpbuf && *(p-1) == '\\') {
|
|
p++;
|
|
goto GetBlockScan;
|
|
}
|
|
/* Check the same thing for at characters */
|
|
if (*p == TAG)
|
|
/* Tag must be on start of new line, so if not there, kick it out */
|
|
if (p > sf->lpbuf && *(p-1) != '\n') {
|
|
p++;
|
|
goto GetBlockScan;
|
|
}
|
|
|
|
/* Encountered another tag, or another block. For both, backup to
|
|
* last non-white character, set mark there. Return appropriate
|
|
* condition codes.
|
|
*/
|
|
ret = RET_ENDTAG; // the default return value.
|
|
if (*p == BLOCK)
|
|
ret = RET_ENDBLOCK; // if encountered another block following
|
|
|
|
/* Now back up whitespaces until last non-whitespace is found.
|
|
* If we end up backing up over the original setting of p on entry,
|
|
* then this is an empty block, and return error condition.
|
|
*/
|
|
psave = p; // hang onto this location, if EMPTYBLOCK occurs.
|
|
for (p--; *p && SPACE(*p) && p >= porig; p--);
|
|
if (p < porig) { // emptyblock, so pt = end of prev block.
|
|
sf->mark = sf->pt = psave; // point to next tag
|
|
return RET_EMPTYBLOCK;
|
|
}
|
|
else { // normal backed up to end of block, set mark there.
|
|
sf->mark = p;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api void | FixLineCounts | Updates the line counts of the current
|
|
* point and mark for error reporting purposes.
|
|
*
|
|
* @parm NPSourceFile | sf | Blah.
|
|
*
|
|
* @parm PSTR | pt | Point to return the line number of. This must be
|
|
* a valid point within the comment buffer of <p sf>.
|
|
*
|
|
* @rdesc Returns the line number of point <p pt> within the comment
|
|
* buffer of <p sf>. Newlines are counted to determine the line offset
|
|
* within the buffer, and the resulting number of newlines added to the
|
|
* initial line number of the first line of the comment buffer. This
|
|
* value is returned. It is thus important for other tag reader
|
|
* routines not to alter the original comment buffer, as the line number
|
|
* returned from this routine would then be invalid.
|
|
*
|
|
*/
|
|
WORD FixLineCounts(NPSourceFile sf, PSTR pt)
|
|
{
|
|
PSTR c;
|
|
WORD w;
|
|
|
|
/* Update the line counts for the point and mark by counting
|
|
* newlines in the buffer
|
|
*/
|
|
w = 0;
|
|
for (c = sf->lpbuf; c <= pt; c++) {
|
|
if (*c == '\n')
|
|
w++;
|
|
if (c == pt)
|
|
return (sf->wLineBuf + w);
|
|
}
|
|
/* something bogus happened */
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api void | PrintError | Prints an error message in a standard
|
|
* format, and sets the exit condition flag for the source file block.
|
|
*
|
|
* @parm NPSourceFile | sf | Specifies the source file buffer block.
|
|
* @parm PSTR | szMessage | Error message to print.
|
|
* @parm BOOL | fExit | Indicates whether this is a fatal exit. If
|
|
* TRUE, the program will exit when the current file has been completely
|
|
* parsed.
|
|
*
|
|
* @comm Prints the source file filename and the line number of the
|
|
* current point to standard error, followed by <p szMessage>.
|
|
*
|
|
*/
|
|
void PrintError(NPSourceFile sf, PSTR szMessage, BOOL fExit)
|
|
{
|
|
WORD w;
|
|
|
|
w = FixLineCounts(sf, sf->pt);
|
|
fprintf(stderr, errmsg, sf->fileEntry->filename, w, szMessage);
|
|
if (fExit)
|
|
sf->fExitAfter = TRUE;
|
|
}
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api WORD | ProcessWordList | Process a whitespace or comma
|
|
* separated list of words following a tag, formatting
|
|
* them as a space separated list of words.
|
|
*
|
|
* @parm NPSourceFile | sf | Blah.
|
|
* @parm PSTR * | bufPt | Pointer to a buffer pointer, which should
|
|
* initially contain a near buffer obtained with <f NearMalloc>, where
|
|
* the formatted word list will be placed. The buffer pointed to will
|
|
* be automatically expanded as necessary.
|
|
*
|
|
* @parm BOOL | fCap | Specifies whether to convert to uppercase
|
|
* the processed list of words.
|
|
*
|
|
* @rdesc Returns either RET_ENDBLOCK or RET_ENDTAG, depending on
|
|
* whether there are following blocks within the tag's text or not,
|
|
* respectively. (What a horrible sentence). The point and mark will
|
|
* be at the end of the text block upon return. If there is no text
|
|
* block following the tag, then RET_EMPTYBLOCK is returned, and the
|
|
* point and mark point to the next tag in the comment block, or the
|
|
* end of the comment block.
|
|
*
|
|
*/
|
|
#define SEPSPACE(c) ((c)==' ' || (c)=='\n' || (c)=='\t' ||(c)==','||(c)==';')
|
|
|
|
WORD ProcessWordList(NPSourceFile sf, PSTR *bufPt, BOOL fCap)
|
|
{
|
|
WORD ret; // hold return code
|
|
PSTR pNew; // runner on copy buffer
|
|
PSTR pOldMark; // keep the old mark around
|
|
PSTR p; // runner on comment block
|
|
|
|
ret = RET_ENDTAG;
|
|
|
|
/* Get the text of the first block, ie the doclevel specification */
|
|
ret = GetFirstBlock(sf);
|
|
if (ret == RET_EMPTYBLOCK)
|
|
return ret;
|
|
|
|
/* Warn if there's extra text blocks on DOC tag, ie ret == RET_ENDBLOCK */
|
|
|
|
/* Grow the memory copy buffer if needed */
|
|
if (NearSize(*bufPt) < (int) (sf->mark - sf->pt) + 5)
|
|
*bufPt = NearRealloc(*bufPt, (WORD) (sf->mark - sf->pt) + 10);
|
|
|
|
/* Save away copy buffer status */
|
|
pNew = *bufPt;
|
|
pOldMark = sf->mark + 1; // save mark plus one
|
|
|
|
p = sf->pt;
|
|
while (1) {
|
|
/* skip whitespace before doc level word */
|
|
for (; p < pOldMark && SEPSPACE(*p); p++);
|
|
|
|
if (p >= pOldMark) {
|
|
dprintf("ProcessWordList: Breaking loop after space skip\n");
|
|
break;
|
|
}
|
|
|
|
/* Save this location, beginning of word, and move to end of word */
|
|
for (sf->pt = p; p < pOldMark && !SEPSPACE(*p); p++)
|
|
if (fCap)
|
|
*pNew++ = (char) toupper(*p);
|
|
else
|
|
*pNew++ = *p;
|
|
|
|
/* Put a space between the words, and then null terminate in
|
|
* case this is the last word in a list
|
|
*/
|
|
*pNew++ = ' ';
|
|
*pNew = '\0';
|
|
|
|
/* Check if we're at end of buffer */
|
|
if (p >= pOldMark) {
|
|
dprintf("ProcessWordList: Breaking loop after word copy.\n");
|
|
break; // get out of loop
|
|
}
|
|
} // while loop
|
|
|
|
/* Restore point and mark to the end of @doc text block */
|
|
sf->pt = sf->mark = pOldMark - 1;
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
* @doc EXTRACT
|
|
* @api void | OutputFileHeader | Prints an output file header using
|
|
* compiled in constants and system information.
|
|
*
|
|
* @parm FILE * | fpOut | File pointer to which to write header.
|
|
*
|
|
* @comm Currently, only the program name, version, and the current
|
|
* time (in UNIX <f asctime>) format. The file header is surrounded by
|
|
* header begin and end tags.
|
|
*
|
|
*/
|
|
#include <time.h>
|
|
#include "version.h"
|
|
|
|
void OutputFileHeader(FILE *fpOut)
|
|
{
|
|
time_t curtime;
|
|
|
|
fprintf(fpOut, "@%s\t\n", T2TEXT_BEGINHEADER);
|
|
fprintf(fpOut, "@%s\t%s\n", T2TEXT_EXTRACTID, VERSIONNAME);
|
|
fprintf(fpOut, "@%s\t%d.%d.%d\n", T2TEXT_EXTRACTVER, rmj, rmm, rup);
|
|
time(&curtime);
|
|
fprintf(fpOut, "@%s\t%s", T2TEXT_EXTRACTDATE, asctime(localtime(&curtime)));
|
|
fprintf(fpOut, "@%s\t\n", T2TEXT_ENDHEADER);
|
|
|
|
}
|