Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

682 lines
18 KiB

  1. /*
  2. * EXTRACT.C
  3. *
  4. * Documentation extractor. Extracts tagged comment blocks from source
  5. * code, interprets and reformats the tag definitions, and outputs an
  6. * intermediate level 2 tag file, suitable for processing by a final
  7. * formatting tool to coerce the level 2 tags into something appropriate
  8. * for the presentation medium (paper, WinHelp RTF, Ventura, etc).
  9. *
  10. */
  11. #include <stdio.h>
  12. #include <string.h>
  13. #include <stdlib.h>
  14. #include <assert.h>
  15. #include "extract.h"
  16. #include "tags.h"
  17. #include "version.h"
  18. #if MMWIN
  19. #include <mmsysver.h>
  20. #endif
  21. /* Whether to do any output at all? */
  22. BOOL fNoOutput = False;
  23. /* The output file to use if not stdout */
  24. PSTR szOutputFile = NULL;
  25. /* The actual output file pointer */
  26. FILE *fpOutput;
  27. /*
  28. * File-private procedure templates
  29. */
  30. void ProcessSourceFile( NPSourceFile sf );
  31. void AppendLineToBuf(NPSourceFile sf, PSTR buf);
  32. BOOL LookForCommentStart(NPSourceFile sf, PSTR buf, PSTR *nbuf);
  33. BOOL IsTag(PSTR p);
  34. BOOL PrepLine( NPSourceFile sf, PSTR buf, PSTR *nbuf );
  35. /*
  36. * User messages
  37. */
  38. char msgStdin[] = "Using Standard Input for source text...\n";
  39. char msgCurFile[] = "Processing file %s...\n";
  40. char msgSyntaxCheck[] = "Syntax check only.\n";
  41. char msgTypeMASM[] = "%s (%d): File is MASM source.\n";
  42. char msgTypeC[] = "%s (%d): File is C source.\n";
  43. char errOutputFile[] = "%s: Can not open output file\n";
  44. char errInputFile[] = "%s: Can not open file.\n";
  45. char errEOFinComment[] = "%s (%d): Premature end of file within comment block.\n";
  46. char errRead[] = "%s (%d): Unable to read.\n";
  47. /*
  48. * @doc EXTRACT
  49. *
  50. * @func int | main | This program extracts documentation information
  51. * from the given input file and sends it to the standard output.
  52. * Information is not sorted or formatted, but parsed from the
  53. * initial tag types to an intermediate tag output format that contains
  54. * full information as to tag placement within documentation/function
  55. * declarations.
  56. *
  57. * @rdesc The return value is zero if there are no errors, otherwise the
  58. * return value is a non-zero error code.
  59. *
  60. */
  61. void main(argc, argv)
  62. int argc; /* Specifies the number of arguments. */
  63. char *argv[]; /* Specifies an array of pointers to the arguments */
  64. {
  65. SourceFile sourceBuf;
  66. FileEntry fileEntry;
  67. BOOL fStdin = False;
  68. #define INITIAL_BUF 8192
  69. #ifdef MMWIN
  70. /* announce our existance */
  71. fprintf(stderr, "%s\n", VERSIONNAME);
  72. fprintf(stderr, "Program Version %d.%d.%d\t%s\n", rmj, rmm, rup,
  73. MMSYSVERSIONSTR);
  74. #ifdef DEBUG
  75. fprintf(stderr, "Compiled: %s %s by %s\n", __DATE__, __TIME__,
  76. szVerUser);
  77. fDebug = 1;
  78. #endif
  79. #endif
  80. ParseArgs(argc, argv);
  81. if (fNoOutput) {
  82. fprintf(stderr, msgSyntaxCheck);
  83. szOutputFile == NULL;
  84. }
  85. else {
  86. /* Open the output file, if one was specified. If !szOutputFile,
  87. * then use stdout.
  88. */
  89. if (szOutputFile) {
  90. fpOutput = fopen(szOutputFile, "w");
  91. if (fpOutput == NULL) {
  92. fprintf(stderr, errOutputFile, szOutputFile);
  93. exit(1);
  94. }
  95. }
  96. else { /* Using stdout for output */
  97. fpOutput = stdout;
  98. szOutputFile = StringAlloc("stdout");
  99. }
  100. OutputFileHeader(fpOutput);
  101. }
  102. /* If no files were specified on command line, use stdin.
  103. * Fake a fileEntry structure for stdin.
  104. */
  105. if (FilesToProcess == NULL) {
  106. /* No files specified, use stdin */
  107. fileEntry.filename = StringAlloc("stdin");
  108. fileEntry.next = NULL;
  109. fileEntry.type = SRC_UNKNOWN;
  110. FilesToProcess = &fileEntry;
  111. fStdin = True;
  112. }
  113. /*
  114. * Loop over all files specified on command line
  115. */
  116. while (FilesToProcess) {
  117. /*
  118. * Setup the source file access buffer
  119. */
  120. sourceBuf.fileEntry = FilesToProcess; // get head of list.
  121. /* Open the file, except when using stdin */
  122. if (fStdin) {
  123. sourceBuf.fp = stdin;
  124. fprintf(stderr, msgStdin);
  125. }
  126. else { // deal with normal file, need to open it.
  127. sourceBuf.fp = fopen(FilesToProcess->filename, "r");
  128. /* couldn't open file */
  129. if (!sourceBuf.fp) {
  130. fprintf(stderr, errInputFile,
  131. FilesToProcess->filename);
  132. /* Skip to next file in list */
  133. FilesToProcess = FilesToProcess->next;
  134. continue;
  135. }
  136. /* Send message telling current file */
  137. fprintf(stderr, msgCurFile, FilesToProcess->filename);
  138. }
  139. /* Reset line numbers of input files to zero */
  140. sourceBuf.wLineNo = 0;
  141. sourceBuf.wLineBuf = 0;
  142. /* Setup copy buffer */
  143. sourceBuf.lpbuf = NearMalloc(INITIAL_BUF, False);
  144. sourceBuf.pt = sourceBuf.mark = sourceBuf.lpbuf;
  145. sourceBuf.fHasTags = sourceBuf.fTag = False;
  146. sourceBuf.fExitAfter = FALSE;
  147. ProcessSourceFile( &sourceBuf );
  148. if (!fStdin)
  149. fclose(sourceBuf.fp);
  150. NearFree(sourceBuf.lpbuf);
  151. NearFree(FilesToProcess->filename);
  152. FilesToProcess = FilesToProcess->next;
  153. /*
  154. * Bail out with non-zero exit if fatal error encountered
  155. */
  156. if (sourceBuf.fExitAfter) {
  157. fcloseall();
  158. exit(1);
  159. }
  160. }
  161. /*
  162. * Close output file if not stdout.
  163. */
  164. fcloseall();
  165. exit(0);
  166. }
  167. /*
  168. * @doc EXTRACT
  169. * @api void | ProcessSourceFile | Process a given file, searching
  170. * for and extracting doc tagged comment blocks and processing and
  171. * outputting these comment blocks.
  172. *
  173. * @parm NPSourceFile | sf | Specifies the source file comment block.
  174. * It must have a valid file pointer, and a valid buffer (lpbuf field)
  175. * before calling this function. The file pointer will be open upon
  176. * return.
  177. *
  178. * @comm This proc sits in a loop reading lines until it finds a
  179. * comment. Once inside a comment, the lines are stripped of fuzz
  180. * pretty printing characters and examined for being an autodoc tagged
  181. * line. If a tag is found in the comment block, the following comment
  182. * lines are copied into the lpbuf buffer of <p sf>, and passed to the
  183. * <f TagProcessBuffer> function to parse and output the tags.
  184. *
  185. */
  186. #define LOCALBUF_SIZE 1024
  187. void ProcessSourceFile( NPSourceFile sf )
  188. {
  189. char *buf;
  190. char *pOrigBuf;
  191. char *nBuf, *nBuf2;
  192. int inComment;
  193. int w;
  194. inComment = False;
  195. pOrigBuf = NearMalloc(LOCALBUF_SIZE, False);
  196. buf = pOrigBuf + 1; // give one space of padding at beginning
  197. while (!feof(sf->fp)) {
  198. /*
  199. * Grab the next line
  200. */
  201. #ifdef HEAPDEBUG
  202. NearHeapCheck();
  203. #endif
  204. w = (int) fgets(buf, LOCALBUF_SIZE, sf->fp);
  205. #ifdef HEAPDEBUG
  206. NearHeapCheck();
  207. #endif
  208. /* Handle error or EOF conditions */
  209. if (w == 0) {
  210. /* Am i at EOF? */
  211. if (feof(sf->fp)) {
  212. /* Message is EOF happened while in a comment block */
  213. if (inComment) {
  214. /* MASM comment blocks can end on EOF,
  215. * so go handle it if in a masm file.
  216. */
  217. if (sf->fileEntry->type == SRC_MASM) {
  218. if (sf->fTag)
  219. /* This is BOGUS!! */
  220. TagProcessBuffer(sf);
  221. }
  222. else { // premature eof otherwise
  223. fprintf(stderr, errEOFinComment,
  224. sf->fileEntry->filename, sf->wLineNo);
  225. }
  226. }
  227. /* Cause the enclosing while loop to exit on EOF */
  228. continue;
  229. }
  230. else { // error condition, bail out!
  231. fprintf(stderr, errRead, sf->fileEntry->filename, sf->wLineNo);
  232. goto BailOut;
  233. }
  234. }
  235. else {
  236. /*
  237. * Process this line - depending on current mode:
  238. *
  239. * -- CommentSearch mode: inComment = False
  240. * Not currently in a comment, looking for comment begin
  241. * characters. If commentBegin found, enter InsideComment
  242. * mode to look for end of comment and prep lines for
  243. * output processing.
  244. *
  245. * -- InsideComment mode: inComment = True
  246. * Inside a comment block, taking each line, stripping beginning
  247. * whitespace, and appending to global buffer for output
  248. * processing. When end of comment is found, send the entire
  249. * buffer for tag processing. (only if there was a tag
  250. * detected!). Enter CommentSearch mode.
  251. *
  252. */
  253. sf->wLineNo++; // line count for file - now current line no.
  254. /*
  255. * I'm in InsideComment mode, so process the next line as a comment
  256. * line. The magic is in PrepLine(), which strips whitespace, sets the
  257. * fTag flag of the sourceBuf if a tag is detected, and returns TRUE
  258. * when end of comment is detected.
  259. *
  260. */
  261. if (inComment) {
  262. w = PrepLine(sf, buf, &nBuf);
  263. AppendLineToBuf(sf, nBuf);
  264. if (w) { // detected end of comment, exit in comment state
  265. if (sf->fTag) { // a tag was in the current buffer
  266. TagProcessBuffer(sf);
  267. }
  268. /* Go back to comment-search mode */
  269. inComment = False;
  270. }
  271. }
  272. /*
  273. * Otherwise, I'm in CommentSearch mode, looking for a comment begin.
  274. * LookForCommentStart() returns TRUE when a comment start is detected.
  275. * It also fiddles <buf> so that the beginning of <buf> now points to
  276. * the character following the comment start.
  277. *
  278. * Pass to PrepLine() to detect an immediate comment close, and then
  279. * add this initial line to the global buffer after reseting buffer
  280. * status.
  281. *
  282. * Enter InsideComment mode.
  283. */
  284. else { // not in a comment buffer
  285. if (LookForCommentStart(sf, buf, &nBuf)) {
  286. // dprintf("Entering InsideComment mode, point is %d\n",
  287. // (int) (sf->pt - sf->lpbuf));
  288. /* Reset source file buffer status */
  289. sf->fTag = sf->fHasTags = False;
  290. sf->wLineBuf = sf->wLineNo;
  291. sf->pt = sf->mark = sf->lpbuf;
  292. /* Check for immediate comment close */
  293. if (PrepLine(sf, nBuf, &nBuf2)) {
  294. assert(sf->fTag == False);
  295. continue; // detected immediate end comment
  296. }
  297. AppendLineToBuf(sf, nBuf2);
  298. /* Enter InsideComment mode */
  299. inComment = True;
  300. }
  301. /* else, no comment start found, continue scan */
  302. } // endof CommentSearch mode stuff.
  303. }/* else not a string read error */
  304. } /* file-level while loop */
  305. BailOut:
  306. NearFree(pOrigBuf);
  307. }
  308. #define ISSPACE(c) ((c) == ' ' || (c) == '\t')
  309. /*
  310. * @doc EXTRACT
  311. * @api BOOL | PrepLine | Prepares an InsideComment mode line,
  312. * stripping off initial whitespace and fuzz characters, and detecting
  313. * end of comment conditions.
  314. *
  315. * @parm NPSourceFile | sf | Pointer to source file status buffer.
  316. * @parm PSTR | buf | Pointer to beginning of source text line, as
  317. * read from the source file.
  318. * @parm PSTR * | nbuf | Pointer to a char pointer, which is altered
  319. * to point the post-processed and stripped beginning of the line upon
  320. * procedure exit.
  321. *
  322. * @rdesc Returns TRUE when end of comment is encountered. In this
  323. * case, the end of comment characters are not included in the return
  324. * string. Returns FALSE when no end of comment is detected.
  325. *
  326. * The char pointer pointed to by the <p nbuf> parameter is altered to
  327. * point to the new (post-processed and stripped) beginning of the line.
  328. * This new beginning is the beginning of the text of interest, having
  329. * had all comment leader characters and whitespace stripped off. NULL
  330. * is an acceptable string to return, which will simply add nothing to
  331. * the tag buffer. If a blank line is encountered, (ie simply a
  332. * newline), then the newline should be returned.
  333. *
  334. * If a tag is detected on the line, then the <p sf->fTag> flag is set
  335. * to True to indicate that this is a valid tagged comment block.
  336. *
  337. * @comm This procedure does the stripping of language specific fuzz
  338. * characters into a simple text block. The setting of <p sf->fTag> is
  339. * critical, and may be accomplished by calling the <f IsTag> procedure when
  340. * the tag should appear within the source line.
  341. *
  342. */
  343. BOOL PrepLine( NPSourceFile sf, PSTR buf, PSTR *nbuf )
  344. {
  345. PSTR chClose;
  346. PSTR pend;
  347. /* Scan forward, removing initial whitespace */
  348. for (; *buf && ISSPACE(*buf); buf++);
  349. /* I never have to deal with begin comment processing, this is done
  350. * by the LookForCommentStart() proc. In C, PrepLine() is invoked on
  351. * the char following the '/ *'. In MASM, the ';' is left in.
  352. */
  353. switch (sf->fileEntry->type) {
  354. case SRC_MASM:
  355. /* End of comment check: If this first character (after whitespace
  356. * stripped out) is not a ';', then this is the end of the comment
  357. * block. Return TRUE to indicate this.
  358. */
  359. if (*buf && *buf != ';') {
  360. *buf = '\0';
  361. *nbuf = buf;
  362. return True;
  363. }
  364. /* strip contiguous ';' and '*', followed by whitespace */
  365. for (; *buf && (*buf == ';' || *buf == '*'); buf++);
  366. for (; *buf && ISSPACE(*buf); buf++);
  367. if (IsTag(buf)) {
  368. sf->fTag = True;
  369. *nbuf = buf;
  370. }
  371. else {
  372. /* HACK!
  373. * If first char is a @ (and not a tag), pad with a space
  374. */
  375. if (*buf == TAG) {
  376. *(--buf) = ' ';
  377. }
  378. *nbuf = buf;
  379. }
  380. /* Very hack way of kicking out extra comments */
  381. if ((buf = strstr(buf, "//")) != NULL)
  382. *buf = '\0';
  383. return False;
  384. case SRC_C:
  385. /* Remove leading stars */
  386. for (; *buf && *buf == '*'; buf++);
  387. /* Quick check for close comment - */
  388. if (*buf && *buf == '/') {
  389. *buf = '\0';
  390. *nbuf = buf;
  391. return True;
  392. }
  393. /* Otherwise, remove whitespace between the '*' and the text */
  394. for (; *buf && ISSPACE(*buf); buf++);
  395. /* Check for a tag here */
  396. if (IsTag(buf))
  397. sf->fTag = True;
  398. else {
  399. /* If not tag but a @ on first char of line */
  400. if (*buf == TAG) {
  401. buf--; // can do this since buf is padded by one
  402. *buf = ' ';
  403. }
  404. }
  405. /* Implement the comment scheme of Rick's request */
  406. if ((pend = strstr(buf, "//")) != NULL)
  407. *pend = '\0';
  408. /* And if the line hasn't ended, search line for a close comment */
  409. chClose = strstr(buf, "*/");
  410. if (chClose) {
  411. /* found end of comment, NULL this spot, and return from func
  412. * with TRUE, with nbuf pointing the beginning of non-white
  413. * space text above
  414. */
  415. *nbuf = buf;
  416. *chClose = '\0';
  417. return True;
  418. }
  419. /* Otherwise, found no end of comment on this line, so simply
  420. * return whole line
  421. */
  422. *nbuf = buf;
  423. return False;
  424. default:
  425. // dprintf("Invalid source type in PrepLine()!\n");
  426. assert(False);
  427. exit(5);
  428. } /* switch */
  429. }
  430. /*
  431. * @doc EXTRACT
  432. * @api BOOL | IsTag | Perform a quick and dirty check to see if the
  433. * word pointed to by <p p> is a tag.
  434. *
  435. * @parm PSTR | p | Buffer, queued to the start of a word/tag. If
  436. * this is a possible tag, then it must point to the initial '@'
  437. * character.
  438. *
  439. * @rdesc Returns TRUE if this is probably a tag, or FALSE otherwise.
  440. *
  441. * @comm This is a hack test, but works 99.9% of the time.
  442. *
  443. */
  444. BOOL IsTag(PSTR p)
  445. {
  446. PSTR pbegin;
  447. pbegin = p;
  448. if (*p != TAG)
  449. return False;
  450. /* For this procedure, allow newline as a whitespace delimeter */
  451. /* Skip to next whitespace */
  452. for (; *p && !(ISSPACE(*p) || *p == '\n'); p++);
  453. /* This is a test for a tag, but if the first char was
  454. * a '@' and there is a space following the word, then I'm going to
  455. * say it is a tag.
  456. */
  457. if (*p && (p > pbegin + 1) && (ISSPACE(*p) || *p == '\n'))
  458. return True;
  459. return False;
  460. }
  461. /*
  462. * @doc EXTRACT
  463. * @api BOOL | LookForCommentStart | Search a source line for comment
  464. * start characters.
  465. *
  466. * @parm NPSourceFile | sf | Pointer to the source file block
  467. * structure.
  468. * @parm PSTR | buf | Pointer to beginning of source text file line to
  469. * examine.
  470. * @parm PSTR * | nbuf | Pointer to a pointer that is modified to
  471. * indicate the beginning of the true source text line if a comment
  472. * block begin is found.
  473. *
  474. * @rdesc Returns False if no comment start characters are found.
  475. * Returns True if a comment start is found. If True is returned,
  476. * <p *nbuf> will point to the start of the source text line as it
  477. * should be passed to <f AppendLineToBuf>.
  478. *
  479. * This examination method for determining start of comment depends on
  480. * the source file type (as obtained from the fileEntry.type field of
  481. * <p sf>). Unknown file types are examined and placed into one of the
  482. * other known source types as soon as distinguishing characters are
  483. * found. (ie if '/ *' is found in an unknown, the file is marked as C
  484. * source file the remainder of file processing. Note that this can
  485. * cause unknown file types to be incorrectly processed.)
  486. *
  487. */
  488. BOOL LookForCommentStart(NPSourceFile sf, PSTR buf, PSTR *nbuf)
  489. {
  490. /* Skip leading whitespace */
  491. for (; *buf && ISSPACE(*buf); buf++);
  492. if (!*buf)
  493. return False;
  494. switch (sf->fileEntry->type) {
  495. case SRC_C:
  496. if (!*(buf + 1))
  497. return False;
  498. if ((*buf == '/') && (*(buf+1) == '*')) {
  499. *nbuf = buf+2;
  500. return True;
  501. }
  502. break;
  503. case SRC_MASM:
  504. if (*buf == ';') {
  505. *nbuf = buf;
  506. return True;
  507. }
  508. break;
  509. /*
  510. * The catch all. This has serious potential for disaster!
  511. */
  512. case SRC_UNKNOWN:
  513. /* Try the MASM comment character */
  514. if (*buf == ';') {
  515. fprintf(stderr, msgTypeMASM,
  516. sf->fileEntry->filename, sf->wLineNo);
  517. sf->fileEntry->type = SRC_MASM;
  518. *nbuf = buf;
  519. return True;
  520. }
  521. /* Otherwise, try the C-method */
  522. if (!*(buf + 1))
  523. return False;
  524. if ((*buf == '/') && (*(buf+1) == '*')) {
  525. fprintf(stderr, msgTypeC,
  526. sf->fileEntry->filename, sf->wLineNo);
  527. sf->fileEntry->type = SRC_C;
  528. *nbuf = buf+2;
  529. return True;
  530. }
  531. break;
  532. default:
  533. // dprintf("Unknown filetype identifier in sourceFile buffer.\n");
  534. assert(False);
  535. }
  536. return False;
  537. }
  538. /*
  539. * @doc EXTRACT
  540. * @api void | AppendLineToBuf | Appends an stripped comment line the
  541. * comment buffer contained in <p sf>.
  542. *
  543. * @parm NPSourceFile | sf | Source file buffer block pointer.
  544. * Contains the buffer that is appended to.
  545. * @parm PSTR | buf | Pointer to NULL terminated line to add to the
  546. * comment buffer.
  547. *
  548. * @comm Appends <p buf> to the comment buffer, contained in the lpbuf
  549. * field of <p sf>. The current point in the comment buffer, (given by
  550. * the pt field of <p sf>) is advanced to the end of the appended
  551. * string.
  552. *
  553. */
  554. void AppendLineToBuf(NPSourceFile sf, PSTR buf)
  555. {
  556. int size;
  557. PSTR ch;
  558. PSTR end;
  559. #define GROWSIZE 1024
  560. if (!sf->fHasTags)
  561. /* If buffer doesn't yet have tags, check if one was just
  562. * found, and the copy
  563. */
  564. if (sf->fTag) {
  565. sf->fHasTags = True;
  566. sf->wLineBuf = sf->wLineNo;
  567. }
  568. /* Or no tags in buffer yet, return */
  569. else {
  570. *sf->pt = '\0';
  571. return;
  572. }
  573. // dprintf("AppendLineToBuf: %d\n", (int) (sf->pt - sf->lpbuf));
  574. /* Otherwise, the buffer has tags, so copy the new string */
  575. end = (PSTR) (sf->lpbuf + (int) NearSize(sf->lpbuf));
  576. for (ch = buf; *ch && (sf->pt < end); *sf->pt++ = *ch++);
  577. /* Deal with possible buffer overrun */
  578. if (sf->pt >= end) {
  579. WORD origPt;
  580. int needSize;
  581. /* dprintf("AppendLine: expanding buf %x, pt %x, end %x\n",
  582. sf->lpbuf, sf->pt, end);
  583. */
  584. origPt = (WORD) (sf->pt - sf->lpbuf); // save current offset
  585. needSize = strlen(ch) + 1; // grow by this much
  586. sf->lpbuf = NearRealloc(sf->lpbuf,
  587. (WORD)(NearSize(sf->lpbuf) + max(needSize, GROWSIZE)));
  588. sf->pt = sf->lpbuf + origPt;
  589. /* Continue with the copy */
  590. for (; *ch; *sf->pt++ = *ch++);
  591. }
  592. /* make sure that final buffer is null terminated */
  593. *sf->pt = '\0';
  594. }