Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

627 lines
18 KiB

  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <assert.h>
  4. #include <string.h>
  5. #include "extract.h"
  6. #include "tags.h"
  7. #define SPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
  8. /* Output routine error messages */
  9. char errOut[] = "%s: Error writing to file.\n";
  10. /* Standard templated error messages */
  11. static char errmsg[] = "%s (%u): %s\n";
  12. /* File private functions
  13. */
  14. static WORD CommonGetBlock(NPSourceFile sf, PSTR p);
  15. /*
  16. * @doc EXTRACT
  17. *
  18. * @api void | OutputTag | Print a tag name to the output file.
  19. *
  20. * @parm NPSourceFile | sf | Specifies the source file buffer block.
  21. *
  22. * @parm WORD | wBlock | Specifies the current outerlevel block type.
  23. *
  24. * @parm WORD | wTag | Gives the index of the tag to print.
  25. *
  26. * @comm Prints the innerlevel block tag specified by <p wTag>, as
  27. * determined from the global tag array. The output tag printed is
  28. * affected by the current outerlevel block type, so that different
  29. * outerlevel blocks will generate different output tags for the same
  30. * input tag.
  31. *
  32. * The tag text is followed by a tab character. No output will occur if
  33. * the global fNoOutput flag is True.
  34. *
  35. */
  36. void OutputTag(NPSourceFile sf, WORD wBlock, WORD wTag)
  37. {
  38. if (fNoOutput)
  39. return;
  40. /* Output text, and if error occurs, exit() for now. HACK! */
  41. putc(TAG, fpOutput);
  42. assert(wBlock < NUM_LEVELS);
  43. /* Make sure there's a valid output tag to print */
  44. assert(DocTags[wBlock][wTag] != NULL);
  45. if (fputs(DocTags[wBlock][wTag], fpOutput)) {
  46. fprintf(stderr, errOut, sf->fileEntry->filename);
  47. exit(4);
  48. }
  49. putc('\t', fpOutput);
  50. }
  51. /*
  52. * @doc EXTRACT
  53. * @api void | OutputTagText | Print a tag to the output file, where
  54. * the tag is specified by an immediate string.
  55. *
  56. * @parm NPSourceFile | sf | Specifies the source file buffer block.
  57. * @parm PSTR | szTag | Specifies the tag to output.
  58. *
  59. * @comm Prints tag <p szTag> to the output file. The tag text is
  60. * followed by a tab character. <p szTag> should not include the tag
  61. * prefix character (ie the '@') sign, as this is printed automatically.
  62. * No output will occur if the global flag fNoOutput is set.
  63. *
  64. */
  65. void OutputTagText(NPSourceFile sf, PSTR szTag)
  66. {
  67. if (fNoOutput)
  68. return;
  69. /* Output text, and if error occurs, exit() for now. HACK! */
  70. putc(TAG, fpOutput);
  71. if (fputs(szTag, fpOutput)) {
  72. fprintf(stderr, errOut, sf->fileEntry->filename);
  73. exit(4);
  74. }
  75. putc('\t', fpOutput);
  76. }
  77. /*
  78. * @doc EXTRACT
  79. * @api void | OutputRegion | Print the text between the point and
  80. * the mark, inclusive.
  81. *
  82. * @parm NPSourceFile | sf | Specifies the source file buffer block,
  83. * containing the output buffer, point, and mark.
  84. * @parm char | chPost | Specifies character with which to output
  85. * after outputing the text region. Usually a newline. This character
  86. * is ignored if NULL.
  87. *
  88. * @comm Prints the region given by the pt and mark fields of <p sf>.
  89. * The text printed is inclusive from the point to the mark. The
  90. * <p chPost> character is appended to the output if <p chPost> is
  91. * non-NULL (useful for printing newlines or tabs).
  92. *
  93. * If a write error occurs, an error message is printed to stderr and
  94. * the program exited.
  95. *
  96. * No output occurs if the global fNoOutput flag is TRUE.
  97. *
  98. */
  99. void OutputRegion(NPSourceFile sf, char chPost)
  100. {
  101. char c;
  102. if (fNoOutput)
  103. return;
  104. /* Save char following mark, replace with NULL for printing */
  105. if (*sf->mark) {
  106. c = *(sf->mark + 1);
  107. *(sf->mark + 1) = '\0';
  108. }
  109. if (fputs(sf->pt, fpOutput)) {
  110. TextOutputError:
  111. fprintf(stderr, errOut, sf->fileEntry->filename);
  112. exit(4);
  113. }
  114. /* Send newline if one was asked for */
  115. if (chPost)
  116. if (EOF == putc(chPost, fpOutput))
  117. goto TextOutputError;
  118. /* Restored NULLed over character */
  119. if (*sf->mark)
  120. *(sf->mark + 1) = c;
  121. }
  122. /*
  123. * @doc EXTRACT
  124. * @api void | OutputText | Outputs an arbitrary text string to the
  125. * output file.
  126. *
  127. * @parm NPSourceFile | sf | Specifies the source file buffer block.
  128. * @parm PSTR | szText | Specifies the text string to print.
  129. *
  130. * @comm Prints <p szText> to the output file. If a write error
  131. * occurs, an error message is printed and the program exited. If the
  132. * global fNoOutput flag is set, no output occurs. No newlines or other
  133. * formatting characters are appended to the output.
  134. *
  135. */
  136. void OutputText(NPSourceFile sf, PSTR szText)
  137. {
  138. if (fNoOutput)
  139. return;
  140. if (fputs(szText, fpOutput)) {
  141. TextOutputError:
  142. fprintf(stderr, errOut, sf->fileEntry->filename);
  143. exit(4);
  144. }
  145. }
  146. /*
  147. * @doc EXTRACT
  148. * @api void | CopyRegion | Copies the current region from point to
  149. * mark inclusive into a null terminated buffer.
  150. * @parm NPSourceFile | sf | Specifies the source file buffer block.
  151. * @parm PSTR | buf | Pointer to buffer into which region will be
  152. * placed.
  153. * @parm WORD | wLen | Length in bytes of buffer <p buf>.
  154. *
  155. * @comm Copies the region from point to mark inclusive into the
  156. * <p buf>. Up to <p wLen> - 1 characters will be copied, and <p buf> is
  157. * guaranteed to be NULL terminated.
  158. *
  159. */
  160. void CopyRegion(NPSourceFile sf, PSTR buf, WORD wLen)
  161. {
  162. PSTR p;
  163. PSTR end;
  164. /* Fixup end to smaller of length of buffer, or region to copy */
  165. end = sf->pt + (int) min(wLen, ((int) (sf->mark - sf->pt)));
  166. for (p = sf->pt; *p && p < end; *buf++ = *p++);
  167. *buf = '\0';
  168. }
  169. /*
  170. * @doc EXTRACT
  171. * @api BOOL | FindNextTag | Moves the point forward until it points
  172. * to the next tag in a comment block, and moves the mark to the end of
  173. * the tag word.
  174. *
  175. * @parm NPSourceFile | sf | Specifies the source file buffer block.
  176. *
  177. * @rdesc Returns TRUE if a tag was found, or FALSE if no tag was found
  178. * in the comment block.
  179. *
  180. * @comm Starting from the current point, moves the point forward to
  181. * the next tag in the block. The mark is moved to the end of the tag
  182. * that is found. If no next tag exists in the buffer, FALSE is
  183. * returned and the mark and point are undefined.
  184. *
  185. * Note that multiple calls to <f FindNextTag> without intervening calls
  186. * to move the point will cause the same tag to be repeatadly
  187. * found, as the search for tags begins at the point.
  188. *
  189. */
  190. BOOL FindNextTag(NPSourceFile sf)
  191. {
  192. PSTR p;
  193. /* move forward until finding next tag, put point there */
  194. p = sf->pt;
  195. BogusNextTag:
  196. for (; *p && *p != TAG; p++);
  197. /* Make sure that this is a tag by testing for a \n before the TAG char */
  198. if (p > sf->lpbuf && *(p-1) != '\n') {
  199. p++;
  200. goto BogusNextTag;
  201. }
  202. if (!*p)
  203. return FALSE; // end of comment block!
  204. p++;
  205. if (!(*p && !SPACE(*p))) {
  206. sf->mark = p;
  207. return FALSE;
  208. }
  209. /* save beginning of tag */
  210. sf->pt = p - 1;
  211. /* now move forward until finding next space, set mark there */
  212. for (; *p && !SPACE(*p); p++);
  213. sf->mark = p;
  214. return TRUE;
  215. }
  216. /*
  217. * @doc EXTRACT
  218. * @api WORD | GetFirstBlock | Moves the point and mark to surround
  219. * the first block of text following a tag that has been located with
  220. * <f FindNextTag>.
  221. *
  222. * @parm NPSourceFile | sf | Identifies the source file buffer
  223. * block.
  224. *
  225. * @rdesc If the call succeeds, the point is set to the start of the
  226. * text block that immediately follows the tag. The mark is set to the
  227. * end of this block, and either RET_ENDTAG or RET_ENDBLOCK is returned,
  228. * depending on if there are no more blocks in the tag or if there is a block
  229. * following respectively.
  230. *
  231. * If the call fails, the point is set to the start of the next tag or
  232. * the end of the comment buffer if no more tags exist, and
  233. * RET_EMPTYBLOCK is returned.
  234. *
  235. * In any case, if this function is followed by a call to
  236. * <f FindNextTag>, no problems will result.
  237. *
  238. * @comm This call expects the point to be pointing the beginning of
  239. * the tag upon entry (as setup by <f FindNextTag>). Error conditions
  240. * should be checked upon exit from this function.
  241. *
  242. */
  243. WORD GetFirstBlock(NPSourceFile sf)
  244. {
  245. PSTR p;
  246. p = sf->pt;
  247. /* Assumes that I'm on beginning of tag */
  248. assert(*p == TAG);
  249. /* Move forward to first non-whitespace, to skip over tag */
  250. for (; *p && !SPACE(*p); p++); // skip word
  251. for (; *p && SPACE(*p); p++); // skip whitespace
  252. /* Set point to this location, the beginning of the text */
  253. sf->pt = p;
  254. return CommonGetBlock(sf, p);
  255. }
  256. /*
  257. * @doc EXTRACT
  258. * @api WORD | GetNextBlock | Moves the point and mark to surround
  259. * the next block of text of a particular tag.
  260. *
  261. * @parm NPSourceFile | sf | Identifies the source file buffer
  262. * information.
  263. *
  264. * @rdesc If the call succeeds, the point is set to the start of the
  265. * text block that follows the initial mark. The mark upon return is
  266. * set the end of the next text block. Either RET_ENDTAG or
  267. * RET_ENDBLOCK is returned.
  268. *
  269. * If the call fails due to a non existent block, or encountering the
  270. * end of the comment buffer, RET_ENDCOMMENT is returned and the point
  271. * is set to the start of the next tag or the end of the comment
  272. * buffer.
  273. *
  274. * @comm This procedure, in combination with <f GetFirstBlock>, allows
  275. * the tag reader to step through the text fields associated with a tag.
  276. * Contiguous calls to <f GetNextBlock> are possible, which will
  277. * move the region forward to surround each field. If the tag's text
  278. * fields end prematurely, RET_EMPTYBLOCK will be returned as an error
  279. * flag.
  280. *
  281. * Calls to <f GetNextBlock> may always be followed by a call to
  282. * <f FindNextTag>.
  283. *
  284. */
  285. WORD GetNextBlock(NPSourceFile sf)
  286. {
  287. PSTR p;
  288. WORD ret;
  289. /* Entry: mark is at end of previous block of text. Move forward
  290. * to find the start of the next block (the one we want).
  291. */
  292. p = sf->mark;
  293. /* If I'm on a block char, then this is an empty block being exited,
  294. * So we want to not skip whitespace
  295. */
  296. if (*p != BLOCK)
  297. p++;
  298. /* Skip whitespace, till `|' char found */
  299. for (; *p && SPACE(*p); p++);
  300. /* This should be the start of next block. If not, then puke */
  301. if (*p != BLOCK) {
  302. sf->pt = sf->mark = p; // reset mark and point for FindNextTag.
  303. return RET_EMPTYBLOCK;
  304. }
  305. /* Don't bother with END_COMMENT conditions (ie NULL), as CommonGetBlock
  306. * will return RET_EMPTYBLOCK for this case. The next FindNextTag()
  307. * will then fail, causing a general comment buffer failure to result!
  308. */
  309. #if 0
  310. if (!*p) {
  311. sf->pt = p;
  312. return RET_ENDCOMMENT;
  313. }
  314. #endif
  315. /* Skip more whitespace, to start of actual text, set point there */
  316. /* (if this under EOF, no pt advance is done */
  317. if (*p) // skip the '|' char if there is one.
  318. p++;
  319. for (; *p && SPACE(*p); p++);
  320. sf->pt = p; // point at beginning of text
  321. return CommonGetBlock(sf, p);
  322. }
  323. /*
  324. * @doc EXTRACT
  325. * @api WORD | CommonGetBlock | Common block searcher routine for use
  326. * by <f GetFirstBlock> and <f GetNextBlock>.
  327. *
  328. * @parm NPSourceFile | sf | Blah.
  329. * @parm PSTR | p | Point to start searching for the beginning of a
  330. * text block from.
  331. *
  332. * @rdesc Returns RET_ENDBLOCK when there are text blocks following
  333. * this tag, RET_ENDTAG when no more text blocks follow for this tag, or
  334. * RET_EMPTYBLOCK when this block has no text. Current region (point to
  335. * mark inclusive) is set to the selected block.
  336. *
  337. * @comm Performs magic. This does the real work for <f GetNextBlock>
  338. * and <f GetFirstBlock>.
  339. *
  340. */
  341. static WORD CommonGetBlock(NPSourceFile sf, PSTR p)
  342. {
  343. PSTR porig;
  344. PSTR psave;
  345. WORD ret;
  346. /* Entry: Save the initial p, as this is assumed to be the
  347. * start of the current block.
  348. */
  349. porig = p;
  350. /* Scan forward until end of this block, either @ or | or EOF */
  351. GetBlockScan:
  352. for (; *p && !(*p == TAG || *p == BLOCK); p++);
  353. /* Make sure there isn't an escaped char kicking off the scan */
  354. if (*p == BLOCK)
  355. if (p > sf->lpbuf && *(p-1) == '\\') {
  356. p++;
  357. goto GetBlockScan;
  358. }
  359. /* Check the same thing for at characters */
  360. if (*p == TAG)
  361. /* Tag must be on start of new line, so if not there, kick it out */
  362. if (p > sf->lpbuf && *(p-1) != '\n') {
  363. p++;
  364. goto GetBlockScan;
  365. }
  366. /* Encountered another tag, or another block. For both, backup to
  367. * last non-white character, set mark there. Return appropriate
  368. * condition codes.
  369. */
  370. ret = RET_ENDTAG; // the default return value.
  371. if (*p == BLOCK)
  372. ret = RET_ENDBLOCK; // if encountered another block following
  373. /* Now back up whitespaces until last non-whitespace is found.
  374. * If we end up backing up over the original setting of p on entry,
  375. * then this is an empty block, and return error condition.
  376. */
  377. psave = p; // hang onto this location, if EMPTYBLOCK occurs.
  378. for (p--; *p && SPACE(*p) && p >= porig; p--);
  379. if (p < porig) { // emptyblock, so pt = end of prev block.
  380. sf->mark = sf->pt = psave; // point to next tag
  381. return RET_EMPTYBLOCK;
  382. }
  383. else { // normal backed up to end of block, set mark there.
  384. sf->mark = p;
  385. return ret;
  386. }
  387. }
  388. /*
  389. * @doc EXTRACT
  390. * @api void | FixLineCounts | Updates the line counts of the current
  391. * point and mark for error reporting purposes.
  392. *
  393. * @parm NPSourceFile | sf | Blah.
  394. *
  395. * @parm PSTR | pt | Point to return the line number of. This must be
  396. * a valid point within the comment buffer of <p sf>.
  397. *
  398. * @rdesc Returns the line number of point <p pt> within the comment
  399. * buffer of <p sf>. Newlines are counted to determine the line offset
  400. * within the buffer, and the resulting number of newlines added to the
  401. * initial line number of the first line of the comment buffer. This
  402. * value is returned. It is thus important for other tag reader
  403. * routines not to alter the original comment buffer, as the line number
  404. * returned from this routine would then be invalid.
  405. *
  406. */
  407. WORD FixLineCounts(NPSourceFile sf, PSTR pt)
  408. {
  409. PSTR c;
  410. WORD w;
  411. /* Update the line counts for the point and mark by counting
  412. * newlines in the buffer
  413. */
  414. w = 0;
  415. for (c = sf->lpbuf; c <= pt; c++) {
  416. if (*c == '\n')
  417. w++;
  418. if (c == pt)
  419. return (sf->wLineBuf + w);
  420. }
  421. /* something bogus happened */
  422. return 0;
  423. }
  424. /*
  425. * @doc EXTRACT
  426. * @api void | PrintError | Prints an error message in a standard
  427. * format, and sets the exit condition flag for the source file block.
  428. *
  429. * @parm NPSourceFile | sf | Specifies the source file buffer block.
  430. * @parm PSTR | szMessage | Error message to print.
  431. * @parm BOOL | fExit | Indicates whether this is a fatal exit. If
  432. * TRUE, the program will exit when the current file has been completely
  433. * parsed.
  434. *
  435. * @comm Prints the source file filename and the line number of the
  436. * current point to standard error, followed by <p szMessage>.
  437. *
  438. */
  439. void PrintError(NPSourceFile sf, PSTR szMessage, BOOL fExit)
  440. {
  441. WORD w;
  442. w = FixLineCounts(sf, sf->pt);
  443. fprintf(stderr, errmsg, sf->fileEntry->filename, w, szMessage);
  444. if (fExit)
  445. sf->fExitAfter = TRUE;
  446. }
  447. /*
  448. * @doc EXTRACT
  449. * @api WORD | ProcessWordList | Process a whitespace or comma
  450. * separated list of words following a tag, formatting
  451. * them as a space separated list of words.
  452. *
  453. * @parm NPSourceFile | sf | Blah.
  454. * @parm PSTR * | bufPt | Pointer to a buffer pointer, which should
  455. * initially contain a near buffer obtained with <f NearMalloc>, where
  456. * the formatted word list will be placed. The buffer pointed to will
  457. * be automatically expanded as necessary.
  458. *
  459. * @parm BOOL | fCap | Specifies whether to convert to uppercase
  460. * the processed list of words.
  461. *
  462. * @rdesc Returns either RET_ENDBLOCK or RET_ENDTAG, depending on
  463. * whether there are following blocks within the tag's text or not,
  464. * respectively. (What a horrible sentence). The point and mark will
  465. * be at the end of the text block upon return. If there is no text
  466. * block following the tag, then RET_EMPTYBLOCK is returned, and the
  467. * point and mark point to the next tag in the comment block, or the
  468. * end of the comment block.
  469. *
  470. */
  471. #define SEPSPACE(c) ((c)==' ' || (c)=='\n' || (c)=='\t' ||(c)==','||(c)==';')
  472. WORD ProcessWordList(NPSourceFile sf, PSTR *bufPt, BOOL fCap)
  473. {
  474. WORD ret; // hold return code
  475. PSTR pNew; // runner on copy buffer
  476. PSTR pOldMark; // keep the old mark around
  477. PSTR p; // runner on comment block
  478. ret = RET_ENDTAG;
  479. /* Get the text of the first block, ie the doclevel specification */
  480. ret = GetFirstBlock(sf);
  481. if (ret == RET_EMPTYBLOCK)
  482. return ret;
  483. /* Warn if there's extra text blocks on DOC tag, ie ret == RET_ENDBLOCK */
  484. /* Grow the memory copy buffer if needed */
  485. if (NearSize(*bufPt) < (int) (sf->mark - sf->pt) + 5)
  486. *bufPt = NearRealloc(*bufPt, (WORD) (sf->mark - sf->pt) + 10);
  487. /* Save away copy buffer status */
  488. pNew = *bufPt;
  489. pOldMark = sf->mark + 1; // save mark plus one
  490. p = sf->pt;
  491. while (1) {
  492. /* skip whitespace before doc level word */
  493. for (; p < pOldMark && SEPSPACE(*p); p++);
  494. if (p >= pOldMark) {
  495. dprintf("ProcessWordList: Breaking loop after space skip\n");
  496. break;
  497. }
  498. /* Save this location, beginning of word, and move to end of word */
  499. for (sf->pt = p; p < pOldMark && !SEPSPACE(*p); p++)
  500. if (fCap)
  501. *pNew++ = (char) toupper(*p);
  502. else
  503. *pNew++ = *p;
  504. /* Put a space between the words, and then null terminate in
  505. * case this is the last word in a list
  506. */
  507. *pNew++ = ' ';
  508. *pNew = '\0';
  509. /* Check if we're at end of buffer */
  510. if (p >= pOldMark) {
  511. dprintf("ProcessWordList: Breaking loop after word copy.\n");
  512. break; // get out of loop
  513. }
  514. } // while loop
  515. /* Restore point and mark to the end of @doc text block */
  516. sf->pt = sf->mark = pOldMark - 1;
  517. return ret;
  518. }
  519. /*
  520. * @doc EXTRACT
  521. * @api void | OutputFileHeader | Prints an output file header using
  522. * compiled in constants and system information.
  523. *
  524. * @parm FILE * | fpOut | File pointer to which to write header.
  525. *
  526. * @comm Currently, only the program name, version, and the current
  527. * time (in UNIX <f asctime>) format. The file header is surrounded by
  528. * header begin and end tags.
  529. *
  530. */
  531. #include <time.h>
  532. #include "version.h"
  533. void OutputFileHeader(FILE *fpOut)
  534. {
  535. time_t curtime;
  536. fprintf(fpOut, "@%s\t\n", T2TEXT_BEGINHEADER);
  537. fprintf(fpOut, "@%s\t%s\n", T2TEXT_EXTRACTID, VERSIONNAME);
  538. fprintf(fpOut, "@%s\t%d.%d.%d\n", T2TEXT_EXTRACTVER, rmj, rmm, rup);
  539. time(&curtime);
  540. fprintf(fpOut, "@%s\t%s", T2TEXT_EXTRACTDATE, asctime(localtime(&curtime)));
  541. fprintf(fpOut, "@%s\t\n", T2TEXT_ENDHEADER);
  542. }