windows-xp/Source/XPSP1/NT/multimedia/media/tools/extract/extract.c


								/*

								 * EXTRACT.C

								 *

								 * Documentation extractor.  Extracts tagged comment blocks from source

								 * code, interprets and reformats the tag definitions, and outputs an

								 * intermediate level 2 tag file, suitable for processing by a final

								 * formatting tool to coerce the level 2 tags into something appropriate

								 * for the presentation medium (paper, WinHelp RTF, Ventura, etc).

								 *

								 */


								#include <stdio.h>

								#include <string.h>

								#include <stdlib.h>

								#include <assert.h>

								#include "extract.h"

								#include "tags.h"

								#include "version.h"

								#if MMWIN

								#include <mmsysver.h>

								#endif


								/* Whether to do any output at all?  */

								BOOL	fNoOutput	= False;

								/*  The output file to use if not stdout */

								PSTR	szOutputFile	= NULL;

								/*  The actual output file pointer  */

								FILE	*fpOutput;


								/*

								 *  File-private procedure templates

								 */

								void ProcessSourceFile( NPSourceFile sf );

								void AppendLineToBuf(NPSourceFile sf, PSTR buf);

								BOOL LookForCommentStart(NPSourceFile sf, PSTR buf, PSTR *nbuf);

								BOOL IsTag(PSTR p);

								BOOL PrepLine( NPSourceFile sf, PSTR buf, PSTR *nbuf );


								/*

								 *  User messages

								 */

								char msgStdin[] = "Using Standard Input for source text...\n";

								char msgCurFile[] = "Processing file %s...\n";

								char msgSyntaxCheck[] = "Syntax check only.\n";


								char msgTypeMASM[] = "%s (%d): File is MASM source.\n";

								char msgTypeC[] = "%s (%d): File is C source.\n";


								char errOutputFile[] = "%s: Can not open output file\n";

								char errInputFile[] = "%s: Can not open file.\n";

								char errEOFinComment[] = "%s (%d): Premature end of file within comment block.\n";

								char errRead[] = "%s (%d): Unable to read.\n";


								/*

								 * @doc EXTRACT

								 *

								 * @func int | main | This program extracts documentation information

								 * from the given input file and sends it to the standard output.

								 * Information is not sorted or formatted, but parsed from the

								 * initial tag types to an intermediate tag output format that contains

								 * full information as to tag placement within documentation/function

								 * declarations.

								 *

								 * @rdesc The return value is zero if there are no errors, otherwise the

								 * return value is a non-zero error code.

								 *

								 */

								void main(argc, argv)

								int argc;	/* Specifies the number of arguments. */

								char *argv[];	/* Specifies an array of pointers to the arguments */

								{

									SourceFile	sourceBuf;

									FileEntry	fileEntry;

									BOOL		fStdin = False;


									#define INITIAL_BUF	8192


								#ifdef MMWIN

									/* announce our existance */

									fprintf(stderr, "%s\n", VERSIONNAME);

									fprintf(stderr, "Program Version %d.%d.%d\t%s\n", rmj, rmm, rup,

										MMSYSVERSIONSTR);

								#ifdef DEBUG

									fprintf(stderr, "Compiled: %s %s by %s\n", __DATE__, __TIME__,

										szVerUser);

									fDebug = 1;

								#endif

								#endif


									ParseArgs(argc, argv);


									if (fNoOutput) {

									  fprintf(stderr, msgSyntaxCheck);

									  szOutputFile == NULL;

									}

									else {

									   /*  Open the output file, if one was specified.  If !szOutputFile,

									    *  then use stdout.

									    */

									   if (szOutputFile) {

									     fpOutput = fopen(szOutputFile, "w");

									     if (fpOutput == NULL) {

										fprintf(stderr, errOutputFile, szOutputFile);

										exit(1);

									     }

									   }

									   else {		/* Using stdout for output */

									     fpOutput = stdout;

									     szOutputFile = StringAlloc("stdout");

									   }


									   OutputFileHeader(fpOutput);

									}


									/*  If no files were specified on command line, use stdin.

									 *  Fake a fileEntry structure for stdin.

									 */

									if (FilesToProcess == NULL) {

										/* No files specified, use stdin */

										fileEntry.filename = StringAlloc("stdin");

										fileEntry.next = NULL;

										fileEntry.type = SRC_UNKNOWN;

										FilesToProcess = &fileEntry;

										fStdin = True;

									}


									/*

									 *  Loop over all files specified on command line

									 */

									while (FilesToProcess) {

										/*

										 *  Setup the source file access buffer

										 */

										sourceBuf.fileEntry = FilesToProcess;	// get head of list.


										/*  Open the file, except when using stdin */

										if (fStdin) {

											sourceBuf.fp = stdin;

											fprintf(stderr, msgStdin);

										}

										else {	// deal with normal file, need to open it.

											sourceBuf.fp = fopen(FilesToProcess->filename, "r");

											/* couldn't open file */

											if (!sourceBuf.fp) {

											  fprintf(stderr, errInputFile,

												  FilesToProcess->filename);

											  /* Skip to next file in list */

											  FilesToProcess = FilesToProcess->next;

											  continue;

											}


											/* Send message telling current file */

											fprintf(stderr, msgCurFile, FilesToProcess->filename);

										}


										/* Reset line numbers of input files to zero */

										sourceBuf.wLineNo = 0;

										sourceBuf.wLineBuf = 0;

										/* Setup copy buffer */

										sourceBuf.lpbuf = NearMalloc(INITIAL_BUF, False);

										sourceBuf.pt = sourceBuf.mark = sourceBuf.lpbuf;

										sourceBuf.fHasTags = sourceBuf.fTag = False;

										sourceBuf.fExitAfter = FALSE;


										ProcessSourceFile( &sourceBuf );


										if (!fStdin)

											fclose(sourceBuf.fp);

										NearFree(sourceBuf.lpbuf);

										NearFree(FilesToProcess->filename);

										FilesToProcess = FilesToProcess->next;

										/*

										 * Bail out with non-zero exit if fatal error encountered

										 */

										if (sourceBuf.fExitAfter) {

											fcloseall();

											exit(1);

										}

									}


									/*

									 *  Close output file if not stdout.

									 */

									fcloseall();

									exit(0);

								}


								/*

								 * @doc	EXTRACT

								 * @api	void | ProcessSourceFile | Process a given file, searching

								 * for and extracting doc tagged comment blocks and processing and

								 * outputting these comment blocks.

								 *

								 * @parm	NPSourceFile | sf | Specifies the source file comment block.

								 * It must have a valid file pointer, and a valid buffer (lpbuf field)

								 * before calling this function.  The file pointer will be open upon

								 * return.

								 *

								 * @comm	This proc sits in a loop reading lines until it finds a

								 * comment.  Once inside a comment, the lines are stripped of fuzz

								 * pretty printing characters and examined for being an autodoc tagged

								 * line.  If a tag is found in the comment block, the following comment

								 * lines are copied into the lpbuf buffer of <p sf>, and passed to the

								 * <f TagProcessBuffer> function to parse and output the tags.

								 *

								 */

								#define LOCALBUF_SIZE	1024


								void ProcessSourceFile( NPSourceFile sf )

								{

								  char	*buf;

								  char	*pOrigBuf;

								  char	*nBuf, *nBuf2;

								  int	inComment;

								  int	w;


								  inComment = False;

								  pOrigBuf = NearMalloc(LOCALBUF_SIZE, False);

								  buf = pOrigBuf + 1;	// give one space of padding at beginning


								  while (!feof(sf->fp)) {

								    /*

								     *  Grab the next line

								     */

								    #ifdef HEAPDEBUG

									NearHeapCheck();

								    #endif


								    w = (int) fgets(buf, LOCALBUF_SIZE, sf->fp);


								    #ifdef HEAPDEBUG

									NearHeapCheck();

								    #endif


								    /*  Handle error or EOF conditions  */

								    if (w == 0) {

								      /*  Am i at EOF?  */

								      if (feof(sf->fp)) {

									      /* Message is EOF happened while in a comment block */

									      if (inComment) {

										      /* MASM comment blocks can end on EOF,

										       * so go handle it if in a masm file.

										       */

										      if (sf->fileEntry->type == SRC_MASM) {

											      if (sf->fTag)

												      /*  This is BOGUS!! */

												      TagProcessBuffer(sf);

										      }

										      else {	// premature eof otherwise

											      fprintf(stderr, errEOFinComment,

												      sf->fileEntry->filename, sf->wLineNo);

										      }

									      }

									      /* Cause the enclosing while loop to exit on EOF */

									      continue;

								      }

								      else {	// error condition, bail out!

									      fprintf(stderr, errRead, sf->fileEntry->filename, sf->wLineNo);

									      goto BailOut;

								      }

								    }

								    else {

								      /*

								       * Process this line - depending on current mode:

								       *

								       * -- CommentSearch mode:  inComment = False

								       * Not currently in a comment, looking for comment begin

								       * characters.  If commentBegin found, enter InsideComment

								       * mode to look for end of comment and prep lines for

								       * output processing.

								       *

								       * -- InsideComment mode:  inComment = True

								       * Inside a comment block, taking each line, stripping beginning

								       * whitespace, and appending to global buffer for output

								       * processing.  When end of comment is found, send the entire

								       * buffer for tag processing.  (only if there was a tag

								       * detected!).  Enter CommentSearch mode.

								       *

								       */

								      sf->wLineNo++;		// line count for file - now current line no.


								      /*

								       * I'm in InsideComment mode, so process the next line as a comment

								       * line.  The magic is in PrepLine(), which strips whitespace, sets the

								       * fTag flag of the sourceBuf if a tag is detected, and returns TRUE

								       * when end of comment is detected.

								       *

								       */

								      if (inComment) {

									w = PrepLine(sf, buf, &nBuf);

									AppendLineToBuf(sf, nBuf);

									if (w) {	// detected end of comment, exit in comment state

									  if (sf->fTag) {	// a tag was in the current buffer

										TagProcessBuffer(sf);

									  }


									  /* Go back to comment-search mode */

									  inComment = False;


									}

								      }

								      /*

								       * Otherwise, I'm in CommentSearch mode, looking for a comment begin.

								       * LookForCommentStart() returns TRUE when a comment start is detected.

								       * It also fiddles <buf> so that the beginning of <buf> now points to

								       * the character following the comment start.

								       *

								       * Pass to PrepLine() to detect an immediate comment close, and then

								       * add this initial line to the global buffer after reseting buffer

								       * status.

								       *

								       * Enter InsideComment mode.

								       */

								      else {		// not in a comment buffer

									if (LookForCommentStart(sf, buf, &nBuf)) {

									  // dprintf("Entering InsideComment mode, point is %d\n",

									  //	  (int) (sf->pt - sf->lpbuf));


									  /* Reset source file buffer status */

									  sf->fTag = sf->fHasTags = False;

									  sf->wLineBuf = sf->wLineNo;

									  sf->pt = sf->mark = sf->lpbuf;


									  /* Check for immediate comment close */

									  if (PrepLine(sf, nBuf, &nBuf2)) {

										  assert(sf->fTag == False);

										  continue;		// detected immediate end comment

									  }


									  AppendLineToBuf(sf, nBuf2);


									  /*  Enter InsideComment mode  */

									  inComment = True;

									}

									/* else, no comment start found, continue scan */

								      }  // endof CommentSearch mode stuff.

								    }/* else not a string read error */

								  } /* file-level while loop */


								BailOut:


								  NearFree(pOrigBuf);


								}


								#define ISSPACE(c) ((c) == ' ' || (c) == '\t')


								/*

								 * @doc	EXTRACT

								 * @api	BOOL | PrepLine | Prepares an InsideComment mode line,

								 * stripping off initial whitespace and fuzz characters, and detecting

								 * end of comment conditions.

								 *

								 * @parm	NPSourceFile | sf | Pointer to source file status buffer.

								 * @parm	PSTR | buf | Pointer to beginning of source text line, as

								 * read from the source file.

								 * @parm	PSTR * | nbuf | Pointer to a char pointer, which is altered

								 * to point the post-processed and stripped beginning of the line upon

								 * procedure exit.

								 *

								 * @rdesc	Returns TRUE when end of comment is encountered.  In this

								 * case, the end of comment characters are not included in the return

								 * string.  Returns FALSE when no end of comment is detected.

								 *

								 * The char pointer pointed to by the <p nbuf> parameter is altered to

								 * point to the new (post-processed and stripped) beginning of the line.

								 * This new beginning is the beginning of the text of interest, having

								 * had all comment leader characters and whitespace stripped off.  NULL

								 * is an acceptable string to return, which will simply add nothing to

								 * the tag buffer.  If a blank line is encountered, (ie simply a

								 * newline), then the newline should be returned.

								 *

								 * If a tag is detected on the line, then the <p sf->fTag> flag is set

								 * to True to indicate that this is a valid tagged comment block.

								 *

								 * @comm	This procedure does the stripping of language specific fuzz

								 * characters into a simple text block.  The setting of <p sf->fTag> is

								 * critical, and may be accomplished by calling the <f IsTag> procedure when

								 * the tag should appear within the source line.

								 *

								 */

								BOOL PrepLine( NPSourceFile sf, PSTR buf, PSTR *nbuf )

								{

								  PSTR	chClose;

								  PSTR	pend;


								  /* Scan forward, removing initial whitespace */

								  for (; *buf && ISSPACE(*buf); buf++);


								  /* I never have to deal with begin comment processing, this is done

								   * by the LookForCommentStart() proc.  In C, PrepLine() is invoked on

								   * the char following the '/ *'.  In MASM, the ';' is left in.

								   */


								  switch (sf->fileEntry->type) {

								    case SRC_MASM:


									/*  End of comment check:  If this first character (after whitespace

									 *  stripped out) is not a ';', then this is the end of the comment

									 *  block.  Return TRUE to indicate this.

									 */

									if (*buf && *buf != ';') {

										*buf = '\0';

										*nbuf = buf;

										return True;

									}


									/* strip contiguous ';' and '*', followed by whitespace */

									for (; *buf && (*buf == ';' || *buf == '*'); buf++);

									for (; *buf && ISSPACE(*buf); buf++);

									if (IsTag(buf)) {

										sf->fTag = True;

										*nbuf = buf;

									}

									else {

										/* HACK!

										 * If first char is a @ (and not a tag), pad with a space

										 */

										if (*buf == TAG) {

											*(--buf) = ' ';

										}

										*nbuf = buf;

									}


									/* Very hack way of kicking out extra comments */

									if ((buf = strstr(buf, "//")) != NULL)

										*buf = '\0';


									return False;


								    case SRC_C:

									/* Remove leading stars */

									for (; *buf && *buf == '*'; buf++);

									/* Quick check for close comment - */

									if (*buf && *buf == '/') {

										*buf = '\0';

										*nbuf = buf;

										return True;

									}


									/* Otherwise, remove whitespace between the '*' and the text */

									for (; *buf && ISSPACE(*buf); buf++);

									/* Check for a tag here */

									if (IsTag(buf))

										sf->fTag = True;

									else {

										/*  If not tag but a @ on first char of line  */

										if (*buf == TAG) {

											buf--;	// can do this since buf is padded by one

											*buf = ' ';

										}

									}


									/* Implement the comment scheme of Rick's request */

									if ((pend = strstr(buf, "//")) != NULL)

										*pend = '\0';


									/* And if the line hasn't ended, search line for a close comment */

									chClose = strstr(buf, "*/");

									if (chClose) {

										/* found end of comment, NULL this spot, and return from func

										 * with TRUE, with nbuf pointing the beginning of non-white

										 * space text above

										 */

										*nbuf = buf;

										*chClose = '\0';

										return True;

									}


									/* Otherwise, found no end of comment on this line, so simply

									 * return whole line

									 */

									*nbuf = buf;

									return False;


								    default:

									// dprintf("Invalid source type in PrepLine()!\n");

									assert(False);

									exit(5);


								  }  /* switch */


								}


								/*

								 * @doc	EXTRACT

								 * @api	BOOL | IsTag | Perform a quick and dirty check to see if the

								 * word pointed to by <p p> is a tag.

								 *

								 * @parm	PSTR | p | Buffer, queued to the start of a word/tag.  If

								 * this is a possible tag, then it must point to the initial '@'

								 * character.

								 *

								 * @rdesc	Returns TRUE if this is probably a tag, or FALSE otherwise.

								 *

								 * @comm	This is a hack test, but works 99.9% of the time.

								 *

								 */

								BOOL IsTag(PSTR p)

								{

								  PSTR	pbegin;


								  pbegin = p;


								  if (*p != TAG)

									  return False;


								  /*  For this procedure, allow newline as a whitespace delimeter */


								  /* Skip to next whitespace */

								  for (; *p && !(ISSPACE(*p) || *p == '\n'); p++);


								  /* This is a test for a tag, but if the first char was

								   * a '@' and there is a space following the word, then I'm going to

								   * say it is a tag.

								   */

								  if (*p && (p > pbegin + 1) && (ISSPACE(*p) || *p == '\n'))

									  return True;


								  return False;

								}


								/*

								 * @doc	EXTRACT

								 * @api	BOOL | LookForCommentStart | Search a source line for comment

								 * start characters.

								 *

								 * @parm	NPSourceFile | sf | Pointer to the source file block

								 * structure.

								 * @parm	PSTR | buf | Pointer to beginning of source text file line to

								 * examine.

								 * @parm	PSTR * | nbuf | Pointer to a pointer that is modified to

								 * indicate the beginning of the true source text line if a comment

								 * block begin is found.

								 *

								 * @rdesc	Returns False if no comment start characters are found.

								 * Returns True if a comment start is found.  If True is returned,

								 * <p *nbuf> will point to the start of the source text line as it

								 * should be passed to <f AppendLineToBuf>.

								 *

								 * This examination method for determining start of comment depends on

								 * the source file type (as obtained from the fileEntry.type field of

								 * <p sf>).  Unknown file types are examined and placed into one of the

								 * other known source types as soon as distinguishing characters are

								 * found.  (ie if '/ *' is found in an unknown, the file is marked as C

								 * source file the remainder of file processing.  Note that this can

								 * cause unknown file types to be incorrectly processed.)

								 *

								 */

								BOOL LookForCommentStart(NPSourceFile sf, PSTR buf, PSTR *nbuf)

								{


								  /*  Skip leading whitespace  */

								  for (; *buf && ISSPACE(*buf); buf++);


								  if (!*buf)

									  return False;


								  switch (sf->fileEntry->type) {

								    case SRC_C:

									if (!*(buf + 1))

										return False;

									if ((*buf == '/') && (*(buf+1) == '*')) {

										*nbuf = buf+2;

										return True;

									}

									break;


								    case SRC_MASM:

									if (*buf == ';') {

										*nbuf = buf;

										return True;

									}

									break;


								    /*

								     *  The catch all.  This has serious potential for disaster!

								     */

								    case SRC_UNKNOWN:

									/* Try the MASM comment character */

									if (*buf == ';') {

										fprintf(stderr, msgTypeMASM,

											sf->fileEntry->filename, sf->wLineNo);

										sf->fileEntry->type = SRC_MASM;

										*nbuf = buf;

										return True;

									}


									/* Otherwise, try the C-method */

									if (!*(buf + 1))

										return False;

									if ((*buf == '/') && (*(buf+1) == '*')) {

										fprintf(stderr, msgTypeC,

											sf->fileEntry->filename, sf->wLineNo);

										sf->fileEntry->type = SRC_C;

										*nbuf = buf+2;

										return True;

									}

									break;


								    default:

									// dprintf("Unknown filetype identifier in sourceFile buffer.\n");

									assert(False);


								  }


								  return False;


								}


								/*

								 * @doc	EXTRACT

								 * @api	void | AppendLineToBuf | Appends an stripped comment line the

								 * comment buffer contained in <p sf>.

								 *

								 * @parm	NPSourceFile | sf | Source file buffer block pointer.

								 * Contains the buffer that is appended to.

								 * @parm	PSTR | buf | Pointer to NULL terminated line to add to the

								 * comment buffer.

								 *

								 * @comm	Appends <p buf> to the comment buffer, contained in the lpbuf

								 * field of <p sf>.  The current point in the comment buffer, (given by

								 * the pt field of <p sf>) is advanced to the end of the appended

								 * string.

								 *

								 */

								void AppendLineToBuf(NPSourceFile sf, PSTR buf)

								{

								  int	size;

								  PSTR	ch;

								  PSTR	end;


								  #define GROWSIZE	1024


								  if (!sf->fHasTags)

									  /* If buffer doesn't yet have tags, check if one was just

									   * found, and the copy

									   */

									  if (sf->fTag) {

										  sf->fHasTags = True;

										  sf->wLineBuf = sf->wLineNo;

									  }

									  /*  Or no tags in buffer yet, return  */

									  else {

										  *sf->pt = '\0';

										  return;

									  }


								  // dprintf("AppendLineToBuf:  %d\n", (int) (sf->pt - sf->lpbuf));


								  /*  Otherwise, the buffer has tags, so copy the new string  */

								  end = (PSTR) (sf->lpbuf + (int) NearSize(sf->lpbuf));


								  for (ch = buf; *ch && (sf->pt < end); *sf->pt++ = *ch++);

								  /* Deal with possible buffer overrun */

								  if (sf->pt >= end) {

									WORD	origPt;

									int	needSize;


								/*	dprintf("AppendLine:  expanding buf %x, pt %x, end %x\n",

										sf->lpbuf, sf->pt, end);

								*/

									origPt = (WORD) (sf->pt - sf->lpbuf);	// save current offset

									needSize = strlen(ch) + 1;		// grow by this much


									sf->lpbuf = NearRealloc(sf->lpbuf,

										       (WORD)(NearSize(sf->lpbuf) + max(needSize, GROWSIZE)));

									sf->pt = sf->lpbuf + origPt;


									/* Continue with the copy */

									for (; *ch; *sf->pt++ = *ch++);

								  }


								  /* make sure that final buffer is null terminated */

								  *sf->pt = '\0';

								}