Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

551 lines
13 KiB

  1. package Pod::Text;
  2. =head1 NAME
  3. Pod::Text - convert POD data to formatted ASCII text
  4. =head1 SYNOPSIS
  5. use Pod::Text;
  6. pod2text("perlfunc.pod");
  7. Also:
  8. pod2text [B<-a>] [B<->I<width>] < input.pod
  9. =head1 DESCRIPTION
  10. Pod::Text is a module that can convert documentation in the POD format (such
  11. as can be found throughout the Perl distribution) into formatted ASCII.
  12. Termcap is optionally supported for boldface/underline, and can enabled via
  13. C<$Pod::Text::termcap=1>. If termcap has not been enabled, then backspaces
  14. will be used to simulate bold and underlined text.
  15. A separate F<pod2text> program is included that is primarily a wrapper for
  16. Pod::Text.
  17. The single function C<pod2text()> can take the optional options B<-a>
  18. for an alternative output format, then a B<->I<width> option with the
  19. max terminal width, followed by one or two arguments. The first
  20. should be the name of a file to read the pod from, or "E<lt>&STDIN" to read from
  21. STDIN. A second argument, if provided, should be a filehandle glob where
  22. output should be sent.
  23. =head1 AUTHOR
  24. Tom Christiansen E<lt>F<[email protected]>E<gt>
  25. =head1 TODO
  26. Cleanup work. The input and output locations need to be more flexible,
  27. termcap shouldn't be a global variable, and the terminal speed needs to
  28. be properly calculated.
  29. =cut
  30. use Term::Cap;
  31. require Exporter;
  32. @ISA = Exporter;
  33. @EXPORT = qw(pod2text);
  34. use vars qw($VERSION);
  35. $VERSION = "1.0203";
  36. use locale; # make \w work right in non-ASCII lands
  37. $termcap=0;
  38. $opt_alt_format = 0;
  39. #$use_format=1;
  40. $UNDL = "\x1b[4m";
  41. $INV = "\x1b[7m";
  42. $BOLD = "\x1b[1m";
  43. $NORM = "\x1b[0m";
  44. sub pod2text {
  45. shift if $opt_alt_format = ($_[0] eq '-a');
  46. if($termcap and !$setuptermcap) {
  47. $setuptermcap=1;
  48. my($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
  49. $UNDL = $term->{'_us'};
  50. $INV = $term->{'_mr'};
  51. $BOLD = $term->{'_md'};
  52. $NORM = $term->{'_me'};
  53. }
  54. $SCREEN = ($_[0] =~ /^-(\d+)/ && (shift, $1))
  55. || $ENV{COLUMNS}
  56. || ($ENV{TERMCAP} =~ /co#(\d+)/)[0]
  57. || ($^O ne 'MSWin32' && $^O ne 'dos' && (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0])
  58. || 72;
  59. @_ = ("<&STDIN") unless @_;
  60. local($file,*OUTPUT) = @_;
  61. *OUTPUT = *STDOUT if @_<2;
  62. local $: = $:;
  63. $: = " \n" if $opt_alt_format; # Do not break ``-L/lib/'' into ``- L/lib/''.
  64. $/ = "";
  65. $FANCY = 0;
  66. $cutting = 1;
  67. $DEF_INDENT = 4;
  68. $indent = $DEF_INDENT;
  69. $needspace = 0;
  70. $begun = "";
  71. open(IN, $file) || die "Couldn't open $file: $!";
  72. POD_DIRECTIVE: while (<IN>) {
  73. if ($cutting) {
  74. next unless /^=/;
  75. $cutting = 0;
  76. }
  77. if ($begun) {
  78. if (/^=end\s+$begun/) {
  79. $begun = "";
  80. }
  81. elsif ($begun eq "text") {
  82. print OUTPUT $_;
  83. }
  84. next;
  85. }
  86. 1 while s{^(.*?)(\t+)(.*)$}{
  87. $1
  88. . (' ' x (length($2) * 8 - length($1) % 8))
  89. . $3
  90. }me;
  91. # Translate verbatim paragraph
  92. if (/^\s/) {
  93. output($_);
  94. next;
  95. }
  96. if (/^=for\s+(\S+)\s*(.*)/s) {
  97. if ($1 eq "text") {
  98. print OUTPUT $2,"";
  99. } else {
  100. # ignore unknown for
  101. }
  102. next;
  103. }
  104. elsif (/^=begin\s+(\S+)\s*(.*)/s) {
  105. $begun = $1;
  106. if ($1 eq "text") {
  107. print OUTPUT $2."";
  108. }
  109. next;
  110. }
  111. sub prepare_for_output {
  112. s/\s*$/\n/;
  113. &init_noremap;
  114. # need to hide E<> first; they're processed in clear_noremap
  115. s/(E<[^<>]+>)/noremap($1)/ge;
  116. $maxnest = 10;
  117. while ($maxnest-- && /[A-Z]</) {
  118. unless ($FANCY) {
  119. if ($opt_alt_format) {
  120. s/[BC]<(.*?)>/``$1''/sg;
  121. s/F<(.*?)>/"$1"/sg;
  122. } else {
  123. s/C<(.*?)>/`$1'/sg;
  124. }
  125. } else {
  126. s/C<(.*?)>/noremap("E<lchevron>${1}E<rchevron>")/sge;
  127. }
  128. # s/[IF]<(.*?)>/italic($1)/ge;
  129. s/I<(.*?)>/*$1*/sg;
  130. # s/[CB]<(.*?)>/bold($1)/ge;
  131. s/X<.*?>//sg;
  132. # LREF: a la HREF L<show this text|man/section>
  133. s:L<([^|>]+)\|[^>]+>:$1:g;
  134. # LREF: a manpage(3f)
  135. s:L<([a-zA-Z][^\s\/]+)(\([^\)]+\))?>:the $1$2 manpage:g;
  136. # LREF: an =item on another manpage
  137. s{
  138. L<
  139. ([^/]+)
  140. /
  141. (
  142. [:\w]+
  143. (\(\))?
  144. )
  145. >
  146. } {the "$2" entry in the $1 manpage}gx;
  147. # LREF: an =item on this manpage
  148. s{
  149. ((?:
  150. L<
  151. /
  152. (
  153. [:\w]+
  154. (\(\))?
  155. )
  156. >
  157. (,?\s+(and\s+)?)?
  158. )+)
  159. } { internal_lrefs($1) }gex;
  160. # LREF: a =head2 (head1?), maybe on a manpage, maybe right here
  161. # the "func" can disambiguate
  162. s{
  163. L<
  164. (?:
  165. ([a-zA-Z]\S+?) /
  166. )?
  167. "?(.*?)"?
  168. >
  169. }{
  170. do {
  171. $1 # if no $1, assume it means on this page.
  172. ? "the section on \"$2\" in the $1 manpage"
  173. : "the section on \"$2\""
  174. }
  175. }sgex;
  176. s/[A-Z]<(.*?)>/$1/sg;
  177. }
  178. clear_noremap(1);
  179. }
  180. &prepare_for_output;
  181. if (s/^=//) {
  182. # $needspace = 0; # Assume this.
  183. # s/\n/ /g;
  184. ($Cmd, $_) = split(' ', $_, 2);
  185. # clear_noremap(1);
  186. if ($Cmd eq 'cut') {
  187. $cutting = 1;
  188. }
  189. elsif ($Cmd eq 'pod') {
  190. $cutting = 0;
  191. }
  192. elsif ($Cmd eq 'head1') {
  193. makespace();
  194. if ($opt_alt_format) {
  195. print OUTPUT "\n";
  196. s/^(.+?)[ \t]*$/==== $1 ====/;
  197. }
  198. print OUTPUT;
  199. # print OUTPUT uc($_);
  200. $needspace = $opt_alt_format;
  201. }
  202. elsif ($Cmd eq 'head2') {
  203. makespace();
  204. # s/(\w+)/\u\L$1/g;
  205. #print ' ' x $DEF_INDENT, $_;
  206. # print "\xA7";
  207. s/(\w)/\xA7 $1/ if $FANCY;
  208. if ($opt_alt_format) {
  209. s/^(.+?)[ \t]*$/== $1 ==/;
  210. print OUTPUT "\n", $_;
  211. } else {
  212. print OUTPUT ' ' x ($DEF_INDENT/2), $_, "\n";
  213. }
  214. $needspace = $opt_alt_format;
  215. }
  216. elsif ($Cmd eq 'over') {
  217. push(@indent,$indent);
  218. $indent += ($_ + 0) || $DEF_INDENT;
  219. }
  220. elsif ($Cmd eq 'back') {
  221. $indent = pop(@indent);
  222. warn "Unmatched =back\n" unless defined $indent;
  223. }
  224. elsif ($Cmd eq 'item') {
  225. makespace();
  226. # s/\A(\s*)\*/$1\xb7/ if $FANCY;
  227. # s/^(\s*\*\s+)/$1 /;
  228. {
  229. if (length() + 3 < $indent) {
  230. my $paratag = $_;
  231. $_ = <IN>;
  232. if (/^=/) { # tricked!
  233. local($indent) = $indent[$#indent - 1] || $DEF_INDENT;
  234. output($paratag);
  235. redo POD_DIRECTIVE;
  236. }
  237. &prepare_for_output;
  238. IP_output($paratag, $_);
  239. } else {
  240. local($indent) = $indent[$#indent - 1] || $DEF_INDENT;
  241. output($_, 0);
  242. }
  243. }
  244. }
  245. else {
  246. warn "Unrecognized directive: $Cmd\n";
  247. }
  248. }
  249. else {
  250. # clear_noremap(1);
  251. makespace();
  252. output($_, 1);
  253. }
  254. }
  255. close(IN);
  256. }
  257. #########################################################################
  258. sub makespace {
  259. if ($needspace) {
  260. print OUTPUT "\n";
  261. $needspace = 0;
  262. }
  263. }
  264. sub bold {
  265. my $line = shift;
  266. return $line if $use_format;
  267. if($termcap) {
  268. $line = "$BOLD$line$NORM";
  269. } else {
  270. $line =~ s/(.)/$1\b$1/g;
  271. }
  272. # $line = "$BOLD$line$NORM" if $ansify;
  273. return $line;
  274. }
  275. sub italic {
  276. my $line = shift;
  277. return $line if $use_format;
  278. if($termcap) {
  279. $line = "$UNDL$line$NORM";
  280. } else {
  281. $line =~ s/(.)/$1\b_/g;
  282. }
  283. # $line = "$UNDL$line$NORM" if $ansify;
  284. return $line;
  285. }
  286. # Fill a paragraph including underlined and overstricken chars.
  287. # It's not perfect for words longer than the margin, and it's probably
  288. # slow, but it works.
  289. sub fill {
  290. local $_ = shift;
  291. my $par = "";
  292. my $indent_space = " " x $indent;
  293. my $marg = $SCREEN-$indent;
  294. my $line = $indent_space;
  295. my $line_length;
  296. foreach (split) {
  297. my $word_length = length;
  298. $word_length -= 2 while /\010/g; # Subtract backspaces
  299. if ($line_length + $word_length > $marg) {
  300. $par .= $line . "\n";
  301. $line= $indent_space . $_;
  302. $line_length = $word_length;
  303. }
  304. else {
  305. if ($line_length) {
  306. $line_length++;
  307. $line .= " ";
  308. }
  309. $line_length += $word_length;
  310. $line .= $_;
  311. }
  312. }
  313. $par .= "$line\n" if $line;
  314. $par .= "\n";
  315. return $par;
  316. }
  317. sub IP_output {
  318. local($tag, $_) = @_;
  319. local($tag_indent) = $indent[$#indent - 1] || $DEF_INDENT;
  320. $tag_cols = $SCREEN - $tag_indent;
  321. $cols = $SCREEN - $indent;
  322. $tag =~ s/\s*$//;
  323. s/\s+/ /g;
  324. s/^ //;
  325. $str = "format OUTPUT = \n"
  326. . (($opt_alt_format && $tag_indent > 1)
  327. ? ":" . " " x ($tag_indent - 1)
  328. : " " x ($tag_indent))
  329. . '@' . ('<' x ($indent - $tag_indent - 1))
  330. . "^" . ("<" x ($cols - 1)) . "\n"
  331. . '$tag, $_'
  332. . "\n~~"
  333. . (" " x ($indent-2))
  334. . "^" . ("<" x ($cols - 5)) . "\n"
  335. . '$_' . "\n\n.\n1";
  336. #warn $str; warn "tag is $tag, _ is $_";
  337. eval $str || die;
  338. write OUTPUT;
  339. }
  340. sub output {
  341. local($_, $reformat) = @_;
  342. if ($reformat) {
  343. $cols = $SCREEN - $indent;
  344. s/\s+/ /g;
  345. s/^ //;
  346. $str = "format OUTPUT = \n~~"
  347. . (" " x ($indent-2))
  348. . "^" . ("<" x ($cols - 5)) . "\n"
  349. . '$_' . "\n\n.\n1";
  350. eval $str || die;
  351. write OUTPUT;
  352. } else {
  353. s/^/' ' x $indent/gem;
  354. s/^\s+\n$/\n/gm;
  355. s/^ /: /s if defined($reformat) && $opt_alt_format;
  356. print OUTPUT;
  357. }
  358. }
  359. sub noremap {
  360. local($thing_to_hide) = shift;
  361. $thing_to_hide =~ tr/\000-\177/\200-\377/;
  362. return $thing_to_hide;
  363. }
  364. sub init_noremap {
  365. die "unmatched init" if $mapready++;
  366. #mask off high bit characters in input stream
  367. s/([\200-\377])/"E<".ord($1).">"/ge;
  368. }
  369. sub clear_noremap {
  370. my $ready_to_print = $_[0];
  371. die "unmatched clear" unless $mapready--;
  372. tr/\200-\377/\000-\177/;
  373. # now for the E<>s, which have been hidden until now
  374. # otherwise the interative \w<> processing would have
  375. # been hosed by the E<gt>
  376. s {
  377. E<
  378. (
  379. ( \d+ )
  380. | ( [A-Za-z]+ )
  381. )
  382. >
  383. } {
  384. do {
  385. defined $2
  386. ? chr($2)
  387. :
  388. defined $HTML_Escapes{$3}
  389. ? do { $HTML_Escapes{$3} }
  390. : do {
  391. warn "Unknown escape: E<$1> in $_";
  392. "E<$1>";
  393. }
  394. }
  395. }egx if $ready_to_print;
  396. }
  397. sub internal_lrefs {
  398. local($_) = shift;
  399. s{L</([^>]+)>}{$1}g;
  400. my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
  401. my $retstr = "the ";
  402. my $i;
  403. for ($i = 0; $i <= $#items; $i++) {
  404. $retstr .= "C<$items[$i]>";
  405. $retstr .= ", " if @items > 2 && $i != $#items;
  406. $retstr .= " and " if $i+2 == @items;
  407. }
  408. $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
  409. . " elsewhere in this document ";
  410. return $retstr;
  411. }
  412. BEGIN {
  413. %HTML_Escapes = (
  414. 'amp' => '&', # ampersand
  415. 'lt' => '<', # left chevron, less-than
  416. 'gt' => '>', # right chevron, greater-than
  417. 'quot' => '"', # double quote
  418. "Aacute" => "\xC1", # capital A, acute accent
  419. "aacute" => "\xE1", # small a, acute accent
  420. "Acirc" => "\xC2", # capital A, circumflex accent
  421. "acirc" => "\xE2", # small a, circumflex accent
  422. "AElig" => "\xC6", # capital AE diphthong (ligature)
  423. "aelig" => "\xE6", # small ae diphthong (ligature)
  424. "Agrave" => "\xC0", # capital A, grave accent
  425. "agrave" => "\xE0", # small a, grave accent
  426. "Aring" => "\xC5", # capital A, ring
  427. "aring" => "\xE5", # small a, ring
  428. "Atilde" => "\xC3", # capital A, tilde
  429. "atilde" => "\xE3", # small a, tilde
  430. "Auml" => "\xC4", # capital A, dieresis or umlaut mark
  431. "auml" => "\xE4", # small a, dieresis or umlaut mark
  432. "Ccedil" => "\xC7", # capital C, cedilla
  433. "ccedil" => "\xE7", # small c, cedilla
  434. "Eacute" => "\xC9", # capital E, acute accent
  435. "eacute" => "\xE9", # small e, acute accent
  436. "Ecirc" => "\xCA", # capital E, circumflex accent
  437. "ecirc" => "\xEA", # small e, circumflex accent
  438. "Egrave" => "\xC8", # capital E, grave accent
  439. "egrave" => "\xE8", # small e, grave accent
  440. "ETH" => "\xD0", # capital Eth, Icelandic
  441. "eth" => "\xF0", # small eth, Icelandic
  442. "Euml" => "\xCB", # capital E, dieresis or umlaut mark
  443. "euml" => "\xEB", # small e, dieresis or umlaut mark
  444. "Iacute" => "\xCD", # capital I, acute accent
  445. "iacute" => "\xED", # small i, acute accent
  446. "Icirc" => "\xCE", # capital I, circumflex accent
  447. "icirc" => "\xEE", # small i, circumflex accent
  448. "Igrave" => "\xCD", # capital I, grave accent
  449. "igrave" => "\xED", # small i, grave accent
  450. "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
  451. "iuml" => "\xEF", # small i, dieresis or umlaut mark
  452. "Ntilde" => "\xD1", # capital N, tilde
  453. "ntilde" => "\xF1", # small n, tilde
  454. "Oacute" => "\xD3", # capital O, acute accent
  455. "oacute" => "\xF3", # small o, acute accent
  456. "Ocirc" => "\xD4", # capital O, circumflex accent
  457. "ocirc" => "\xF4", # small o, circumflex accent
  458. "Ograve" => "\xD2", # capital O, grave accent
  459. "ograve" => "\xF2", # small o, grave accent
  460. "Oslash" => "\xD8", # capital O, slash
  461. "oslash" => "\xF8", # small o, slash
  462. "Otilde" => "\xD5", # capital O, tilde
  463. "otilde" => "\xF5", # small o, tilde
  464. "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
  465. "ouml" => "\xF6", # small o, dieresis or umlaut mark
  466. "szlig" => "\xDF", # small sharp s, German (sz ligature)
  467. "THORN" => "\xDE", # capital THORN, Icelandic
  468. "thorn" => "\xFE", # small thorn, Icelandic
  469. "Uacute" => "\xDA", # capital U, acute accent
  470. "uacute" => "\xFA", # small u, acute accent
  471. "Ucirc" => "\xDB", # capital U, circumflex accent
  472. "ucirc" => "\xFB", # small u, circumflex accent
  473. "Ugrave" => "\xD9", # capital U, grave accent
  474. "ugrave" => "\xF9", # small u, grave accent
  475. "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
  476. "uuml" => "\xFC", # small u, dieresis or umlaut mark
  477. "Yacute" => "\xDD", # capital Y, acute accent
  478. "yacute" => "\xFD", # small y, acute accent
  479. "yuml" => "\xFF", # small y, dieresis or umlaut mark
  480. "lchevron" => "\xAB", # left chevron (double less than)
  481. "rchevron" => "\xBB", # right chevron (double greater than)
  482. );
  483. }
  484. 1;