Source code of Windows XP (NT5)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1784 lines
49 KiB

  1. package Pod::Html;
  2. use Pod::Functions;
  3. use Getopt::Long; # package for handling command-line parameters
  4. use File::Spec::Unix;
  5. require Exporter;
  6. use vars qw($VERSION);
  7. $VERSION = 1.02;
  8. @ISA = Exporter;
  9. @EXPORT = qw(pod2html htmlify);
  10. use Cwd;
  11. use Carp;
  12. use locale; # make \w work right in non-ASCII lands
  13. use strict;
  14. use Config;
  15. =head1 NAME
  16. Pod::Html - module to convert pod files to HTML
  17. =head1 SYNOPSIS
  18. use Pod::Html;
  19. pod2html([options]);
  20. =head1 DESCRIPTION
  21. Converts files from pod format (see L<perlpod>) to HTML format. It
  22. can automatically generate indexes and cross-references, and it keeps
  23. a cache of things it knows how to cross-reference.
  24. =head1 ARGUMENTS
  25. Pod::Html takes the following arguments:
  26. =over 4
  27. =item help
  28. --help
  29. Displays the usage message.
  30. =item htmldir
  31. --htmldir=name
  32. Sets the directory in which the resulting HTML file is placed. This
  33. is used to generate relative links to other files. Not passing this
  34. causes all links to be absolute, since this is the value that tells
  35. Pod::Html the root of the documentation tree.
  36. =item htmlroot
  37. --htmlroot=name
  38. Sets the base URL for the HTML files. When cross-references are made,
  39. the HTML root is prepended to the URL.
  40. =item infile
  41. --infile=name
  42. Specify the pod file to convert. Input is taken from STDIN if no
  43. infile is specified.
  44. =item outfile
  45. --outfile=name
  46. Specify the HTML file to create. Output goes to STDOUT if no outfile
  47. is specified.
  48. =item podroot
  49. --podroot=name
  50. Specify the base directory for finding library pods.
  51. =item podpath
  52. --podpath=name:...:name
  53. Specify which subdirectories of the podroot contain pod files whose
  54. HTML converted forms can be linked-to in cross-references.
  55. =item libpods
  56. --libpods=name:...:name
  57. List of page names (eg, "perlfunc") which contain linkable C<=item>s.
  58. =item netscape
  59. --netscape
  60. Use Netscape HTML directives when applicable.
  61. =item nonetscape
  62. --nonetscape
  63. Do not use Netscape HTML directives (default).
  64. =item index
  65. --index
  66. Generate an index at the top of the HTML file (default behaviour).
  67. =item noindex
  68. --noindex
  69. Do not generate an index at the top of the HTML file.
  70. =item recurse
  71. --recurse
  72. Recurse into subdirectories specified in podpath (default behaviour).
  73. =item norecurse
  74. --norecurse
  75. Do not recurse into subdirectories specified in podpath.
  76. =item title
  77. --title=title
  78. Specify the title of the resulting HTML file.
  79. =item css
  80. --css=stylesheet
  81. Specify the URL of a cascading style sheet.
  82. =item verbose
  83. --verbose
  84. Display progress messages.
  85. =item quiet
  86. --quiet
  87. Don't display I<mostly harmless> warning messages.
  88. =back
  89. =head1 EXAMPLE
  90. pod2html("pod2html",
  91. "--podpath=lib:ext:pod:vms",
  92. "--podroot=/usr/src/perl",
  93. "--htmlroot=/perl/nmanual",
  94. "--libpods=perlfunc:perlguts:perlvar:perlrun:perlop",
  95. "--recurse",
  96. "--infile=foo.pod",
  97. "--outfile=/perl/nmanual/foo.html");
  98. =head1 ENVIRONMENT
  99. Uses $Config{pod2html} to setup default options.
  100. =head1 AUTHOR
  101. Tom Christiansen, E<lt>tchrist@perl.comE<gt>.
  102. =head1 BUGS
  103. Has trouble with C<> etc in = commands.
  104. =head1 SEE ALSO
  105. L<perlpod>
  106. =head1 COPYRIGHT
  107. This program is distributed under the Artistic License.
  108. =cut
  109. my $cache_ext = $^O eq 'VMS' ? ".tmp" : ".x~~";
  110. my $dircache = "pod2htmd$cache_ext";
  111. my $itemcache = "pod2htmi$cache_ext";
  112. my @begin_stack = (); # begin/end stack
  113. my @libpods = (); # files to search for links from C<> directives
  114. my $htmlroot = "/"; # http-server base directory from which all
  115. # relative paths in $podpath stem.
  116. my $htmldir = ""; # The directory to which the html pages
  117. # will (eventually) be written.
  118. my $htmlfile = ""; # write to stdout by default
  119. my $htmlfileurl = "" ; # The url that other files would use to
  120. # refer to this file. This is only used
  121. # to make relative urls that point to
  122. # other files.
  123. my $podfile = ""; # read from stdin by default
  124. my @podpath = (); # list of directories containing library pods.
  125. my $podroot = "."; # filesystem base directory from which all
  126. # relative paths in $podpath stem.
  127. my $css = ''; # Cascading style sheet
  128. my $recurse = 1; # recurse on subdirectories in $podpath.
  129. my $quiet = 0; # not quiet by default
  130. my $verbose = 0; # not verbose by default
  131. my $doindex = 1; # non-zero if we should generate an index
  132. my $listlevel = 0; # current list depth
  133. my @listitem = (); # stack of HTML commands to use when a =item is
  134. # encountered. the top of the stack is the
  135. # current list.
  136. my @listdata = (); # similar to @listitem, but for the text after
  137. # an =item
  138. my @listend = (); # similar to @listitem, but the text to use to
  139. # end the list.
  140. my $ignore = 1; # whether or not to format text. we don't
  141. # format text until we hit our first pod
  142. # directive.
  143. my %items_named = (); # for the multiples of the same item in perlfunc
  144. my @items_seen = ();
  145. my $netscape = 0; # whether or not to use netscape directives.
  146. my $title; # title to give the pod(s)
  147. my $header = 0; # produce block header/footer
  148. my $top = 1; # true if we are at the top of the doc. used
  149. # to prevent the first <HR> directive.
  150. my $paragraph; # which paragraph we're processing (used
  151. # for error messages)
  152. my %pages = (); # associative array used to find the location
  153. # of pages referenced by L<> links.
  154. my %sections = (); # sections within this page
  155. my %items = (); # associative array used to find the location
  156. # of =item directives referenced by C<> links
  157. my $Is83; # is dos with short filenames (8.3)
  158. sub init_globals {
  159. $dircache = "pod2htmd$cache_ext";
  160. $itemcache = "pod2htmi$cache_ext";
  161. @begin_stack = (); # begin/end stack
  162. @libpods = (); # files to search for links from C<> directives
  163. $htmlroot = "/"; # http-server base directory from which all
  164. # relative paths in $podpath stem.
  165. $htmlfile = ""; # write to stdout by default
  166. $podfile = ""; # read from stdin by default
  167. @podpath = (); # list of directories containing library pods.
  168. $podroot = "."; # filesystem base directory from which all
  169. # relative paths in $podpath stem.
  170. $css = ''; # Cascading style sheet
  171. $recurse = 1; # recurse on subdirectories in $podpath.
  172. $quiet = 0; # not quiet by default
  173. $verbose = 0; # not verbose by default
  174. $doindex = 1; # non-zero if we should generate an index
  175. $listlevel = 0; # current list depth
  176. @listitem = (); # stack of HTML commands to use when a =item is
  177. # encountered. the top of the stack is the
  178. # current list.
  179. @listdata = (); # similar to @listitem, but for the text after
  180. # an =item
  181. @listend = (); # similar to @listitem, but the text to use to
  182. # end the list.
  183. $ignore = 1; # whether or not to format text. we don't
  184. # format text until we hit our first pod
  185. # directive.
  186. @items_seen = ();
  187. %items_named = ();
  188. $netscape = 0; # whether or not to use netscape directives.
  189. $header = 0; # produce block header/footer
  190. $title = ''; # title to give the pod(s)
  191. $top = 1; # true if we are at the top of the doc. used
  192. # to prevent the first <HR> directive.
  193. $paragraph = ''; # which paragraph we're processing (used
  194. # for error messages)
  195. %sections = (); # sections within this page
  196. # These are not reinitialised here but are kept as a cache.
  197. # See get_cache and related cache management code.
  198. #%pages = (); # associative array used to find the location
  199. # of pages referenced by L<> links.
  200. #%items = (); # associative array used to find the location
  201. # of =item directives referenced by C<> links
  202. $Is83=$^O eq 'dos';
  203. }
  204. sub pod2html {
  205. local(@ARGV) = @_;
  206. local($/);
  207. local $_;
  208. init_globals();
  209. $Is83 = 0 if (defined (&Dos::UseLFN) && Dos::UseLFN());
  210. # cache of %pages and %items from last time we ran pod2html
  211. #undef $opt_help if defined $opt_help;
  212. # parse the command-line parameters
  213. parse_command_line();
  214. # set some variables to their default values if necessary
  215. local *POD;
  216. unless (@ARGV && $ARGV[0]) {
  217. $podfile = "-" unless $podfile; # stdin
  218. open(POD, "<$podfile")
  219. || die "$0: cannot open $podfile file for input: $!\n";
  220. } else {
  221. $podfile = $ARGV[0]; # XXX: might be more filenames
  222. *POD = *ARGV;
  223. }
  224. $htmlfile = "-" unless $htmlfile; # stdout
  225. $htmlroot = "" if $htmlroot eq "/"; # so we don't get a //
  226. $htmldir =~ s#/$## ; # so we don't get a //
  227. if ( $htmlroot eq ''
  228. && defined( $htmldir )
  229. && $htmldir ne ''
  230. && substr( $htmlfile, 0, length( $htmldir ) ) eq $htmldir
  231. )
  232. {
  233. # Set the 'base' url for this file, so that we can use it
  234. # as the location from which to calculate relative links
  235. # to other files. If this is '', then absolute links will
  236. # be used throughout.
  237. $htmlfileurl= "$htmldir/" . substr( $htmlfile, length( $htmldir ) + 1);
  238. }
  239. # read the pod a paragraph at a time
  240. warn "Scanning for sections in input file(s)\n" if $verbose;
  241. $/ = "";
  242. my @poddata = <POD>;
  243. close(POD);
  244. # scan the pod for =head[1-6] directives and build an index
  245. my $index = scan_headings(\%sections, @poddata);
  246. unless($index) {
  247. warn "No headings in $podfile\n" if $verbose;
  248. }
  249. # open the output file
  250. open(HTML, ">$htmlfile")
  251. || die "$0: cannot open $htmlfile file for output: $!\n";
  252. # put a title in the HTML file if one wasn't specified
  253. if ($title eq '') {
  254. TITLE_SEARCH: {
  255. for (my $i = 0; $i < @poddata; $i++) {
  256. if ($poddata[$i] =~ /^=head1\s*NAME\b/m) {
  257. for my $para ( @poddata[$i, $i+1] ) {
  258. last TITLE_SEARCH
  259. if ($title) = $para =~ /(\S+\s+-+.*\S)/s;
  260. }
  261. }
  262. }
  263. }
  264. }
  265. if (!$title and $podfile =~ /\.pod$/) {
  266. # probably a split pod so take first =head[12] as title
  267. for (my $i = 0; $i < @poddata; $i++) {
  268. last if ($title) = $poddata[$i] =~ /^=head[12]\s*(.*)/;
  269. }
  270. warn "adopted '$title' as title for $podfile\n"
  271. if $verbose and $title;
  272. }
  273. if ($title) {
  274. $title =~ s/\s*\(.*\)//;
  275. } else {
  276. warn "$0: no title for $podfile" unless $quiet;
  277. $podfile =~ /^(.*)(\.[^.\/]+)?$/;
  278. $title = ($podfile eq "-" ? 'No Title' : $1);
  279. warn "using $title" if $verbose;
  280. }
  281. my $csslink = $css ? qq(\n<LINK REL="stylesheet" HREF="$css" TYPE="text/css">) : '';
  282. $csslink =~ s,\\,/,g;
  283. $csslink =~ s,(/.):,$1|,;
  284. my $block = $header ? <<END_OF_BLOCK : '';
  285. <TABLE BORDER=0 CELLPADDING=0 CELLSPACING=0 WIDTH=100%>
  286. <TR><TD CLASS=block VALIGN=MIDDLE WIDTH=100% BGCOLOR="#cccccc">
  287. <FONT SIZE=+1><STRONG><P CLASS=block>&nbsp;$title</P></STRONG></FONT>
  288. </TD></TR>
  289. </TABLE>
  290. END_OF_BLOCK
  291. print HTML <<END_OF_HEAD;
  292. <HTML>
  293. <HEAD>
  294. <TITLE>$title</TITLE>$csslink
  295. <LINK REV="made" HREF="mailto:$Config{perladmin}">
  296. </HEAD>
  297. <BODY>
  298. $block
  299. END_OF_HEAD
  300. # load/reload/validate/cache %pages and %items
  301. get_cache($dircache, $itemcache, \@podpath, $podroot, $recurse);
  302. # scan the pod for =item directives
  303. scan_items("", \%items, @poddata);
  304. # put an index at the top of the file. note, if $doindex is 0 we
  305. # still generate an index, but surround it with an html comment.
  306. # that way some other program can extract it if desired.
  307. $index =~ s/--+/-/g;
  308. print HTML "<!-- INDEX BEGIN -->\n";
  309. print HTML "<!--\n" unless $doindex;
  310. print HTML $index;
  311. print HTML "-->\n" unless $doindex;
  312. print HTML "<!-- INDEX END -->\n\n";
  313. print HTML "<HR>\n" if $doindex and $index;
  314. # now convert this file
  315. warn "Converting input file\n" if $verbose;
  316. foreach my $i (0..$#poddata) {
  317. $_ = $poddata[$i];
  318. $paragraph = $i+1;
  319. if (/^(=.*)/s) { # is it a pod directive?
  320. $ignore = 0;
  321. $_ = $1;
  322. if (/^=begin\s+(\S+)\s*(.*)/si) {# =begin
  323. process_begin($1, $2);
  324. } elsif (/^=end\s+(\S+)\s*(.*)/si) {# =end
  325. process_end($1, $2);
  326. } elsif (/^=cut/) { # =cut
  327. process_cut();
  328. } elsif (/^=pod/) { # =pod
  329. process_pod();
  330. } else {
  331. next if @begin_stack && $begin_stack[-1] ne 'html';
  332. if (/^=(head[1-6])\s+(.*\S)/s) { # =head[1-6] heading
  333. process_head($1, $2);
  334. } elsif (/^=item\s*(.*\S)/sm) { # =item text
  335. process_item($1);
  336. } elsif (/^=over\s*(.*)/) { # =over N
  337. process_over();
  338. } elsif (/^=back/) { # =back
  339. process_back();
  340. } elsif (/^=for\s+(\S+)\s+(.*)/si) {# =for
  341. process_for($1,$2);
  342. } else {
  343. /^=(\S*)\s*/;
  344. warn "$0: $podfile: unknown pod directive '$1' in "
  345. . "paragraph $paragraph. ignoring.\n";
  346. }
  347. }
  348. $top = 0;
  349. }
  350. else {
  351. next if $ignore;
  352. next if @begin_stack && $begin_stack[-1] ne 'html';
  353. my $text = $_;
  354. process_text(\$text, 1);
  355. print HTML "<P>\n$text</P>\n";
  356. }
  357. }
  358. # finish off any pending directives
  359. finish_list();
  360. print HTML <<END_OF_TAIL;
  361. $block
  362. </BODY>
  363. </HTML>
  364. END_OF_TAIL
  365. # close the html file
  366. close(HTML);
  367. warn "Finished\n" if $verbose;
  368. }
  369. ##############################################################################
  370. my $usage; # see below
  371. sub usage {
  372. my $podfile = shift;
  373. warn "$0: $podfile: @_\n" if @_;
  374. die $usage;
  375. }
  376. $usage =<<END_OF_USAGE;
  377. Usage: $0 --help --htmlroot=<name> --infile=<name> --outfile=<name>
  378. --podpath=<name>:...:<name> --podroot=<name>
  379. --libpods=<name>:...:<name> --recurse --verbose --index
  380. --netscape --norecurse --noindex
  381. --flush - flushes the item and directory caches.
  382. --help - prints this message.
  383. --htmlroot - http-server base directory from which all relative paths
  384. in podpath stem (default is /).
  385. --index - generate an index at the top of the resulting html
  386. (default).
  387. --infile - filename for the pod to convert (input taken from stdin
  388. by default).
  389. --libpods - colon-separated list of pages to search for =item pod
  390. directives in as targets of C<> and implicit links (empty
  391. by default). note, these are not filenames, but rather
  392. page names like those that appear in L<> links.
  393. --netscape - will use netscape html directives when applicable.
  394. --nonetscape - will not use netscape directives (default).
  395. --outfile - filename for the resulting html file (output sent to
  396. stdout by default).
  397. --podpath - colon-separated list of directories containing library
  398. pods. empty by default.
  399. --podroot - filesystem base directory from which all relative paths
  400. in podpath stem (default is .).
  401. --noindex - don't generate an index at the top of the resulting html.
  402. --norecurse - don't recurse on those subdirectories listed in podpath.
  403. --recurse - recurse on those subdirectories listed in podpath
  404. (default behavior).
  405. --title - title that will appear in resulting html file.
  406. --header - produce block header/footer
  407. --css - stylesheet URL
  408. --verbose - self-explanatory
  409. --quiet - supress some benign warning messages
  410. END_OF_USAGE
  411. sub parse_command_line {
  412. my ($opt_flush,$opt_help,$opt_htmldir,$opt_htmlroot,$opt_index,$opt_infile,$opt_libpods,$opt_netscape,$opt_outfile,$opt_podpath,$opt_podroot,$opt_norecurse,$opt_recurse,$opt_title,$opt_verbose,$opt_css,$opt_header,$opt_quiet);
  413. unshift @ARGV, split ' ', $Config{pod2html} if $Config{pod2html};
  414. my $result = GetOptions(
  415. 'flush' => \$opt_flush,
  416. 'help' => \$opt_help,
  417. 'htmldir=s' => \$opt_htmldir,
  418. 'htmlroot=s' => \$opt_htmlroot,
  419. 'index!' => \$opt_index,
  420. 'infile=s' => \$opt_infile,
  421. 'libpods=s' => \$opt_libpods,
  422. 'netscape!' => \$opt_netscape,
  423. 'outfile=s' => \$opt_outfile,
  424. 'podpath=s' => \$opt_podpath,
  425. 'podroot=s' => \$opt_podroot,
  426. 'norecurse' => \$opt_norecurse,
  427. 'recurse!' => \$opt_recurse,
  428. 'title=s' => \$opt_title,
  429. 'header' => \$opt_header,
  430. 'css=s' => \$opt_css,
  431. 'verbose' => \$opt_verbose,
  432. 'quiet' => \$opt_quiet,
  433. );
  434. usage("-", "invalid parameters") if not $result;
  435. usage("-") if defined $opt_help; # see if the user asked for help
  436. $opt_help = ""; # just to make -w shut-up.
  437. $podfile = $opt_infile if defined $opt_infile;
  438. $htmlfile = $opt_outfile if defined $opt_outfile;
  439. $htmldir = $opt_htmldir if defined $opt_outfile;
  440. @podpath = split(":", $opt_podpath) if defined $opt_podpath;
  441. @libpods = split(":", $opt_libpods) if defined $opt_libpods;
  442. warn "Flushing item and directory caches\n"
  443. if $opt_verbose && defined $opt_flush;
  444. unlink($dircache, $itemcache) if defined $opt_flush;
  445. $htmlroot = $opt_htmlroot if defined $opt_htmlroot;
  446. $podroot = $opt_podroot if defined $opt_podroot;
  447. $doindex = $opt_index if defined $opt_index;
  448. $recurse = $opt_recurse if defined $opt_recurse;
  449. $title = $opt_title if defined $opt_title;
  450. $header = defined $opt_header ? 1 : 0;
  451. $css = $opt_css if defined $opt_css;
  452. $verbose = defined $opt_verbose ? 1 : 0;
  453. $quiet = defined $opt_quiet ? 1 : 0;
  454. $netscape = $opt_netscape if defined $opt_netscape;
  455. }
  456. my $saved_cache_key;
  457. sub get_cache {
  458. my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
  459. my @cache_key_args = @_;
  460. # A first-level cache:
  461. # Don't bother reading the cache files if they still apply
  462. # and haven't changed since we last read them.
  463. my $this_cache_key = cache_key(@cache_key_args);
  464. return if $saved_cache_key and $this_cache_key eq $saved_cache_key;
  465. # load the cache of %pages and %items if possible. $tests will be
  466. # non-zero if successful.
  467. my $tests = 0;
  468. if (-f $dircache && -f $itemcache) {
  469. warn "scanning for item cache\n" if $verbose;
  470. $tests = load_cache($dircache, $itemcache, $podpath, $podroot);
  471. }
  472. # if we didn't succeed in loading the cache then we must (re)build
  473. # %pages and %items.
  474. if (!$tests) {
  475. warn "scanning directories in pod-path\n" if $verbose;
  476. scan_podpath($podroot, $recurse, 0);
  477. }
  478. $saved_cache_key = cache_key(@cache_key_args);
  479. }
  480. sub cache_key {
  481. my($dircache, $itemcache, $podpath, $podroot, $recurse) = @_;
  482. return join('!', $dircache, $itemcache, $recurse,
  483. @$podpath, $podroot, stat($dircache), stat($itemcache));
  484. }
  485. #
  486. # load_cache - tries to find if the caches stored in $dircache and $itemcache
  487. # are valid caches of %pages and %items. if they are valid then it loads
  488. # them and returns a non-zero value.
  489. #
  490. sub load_cache {
  491. my($dircache, $itemcache, $podpath, $podroot) = @_;
  492. my($tests);
  493. local $_;
  494. $tests = 0;
  495. open(CACHE, "<$itemcache") ||
  496. die "$0: error opening $itemcache for reading: $!\n";
  497. $/ = "\n";
  498. # is it the same podpath?
  499. $_ = <CACHE>;
  500. chomp($_);
  501. $tests++ if (join(":", @$podpath) eq $_);
  502. # is it the same podroot?
  503. $_ = <CACHE>;
  504. chomp($_);
  505. $tests++ if ($podroot eq $_);
  506. # load the cache if its good
  507. if ($tests != 2) {
  508. close(CACHE);
  509. return 0;
  510. }
  511. warn "loading item cache\n" if $verbose;
  512. while (<CACHE>) {
  513. /(.*?) (.*)$/;
  514. $items{$1} = $2;
  515. }
  516. close(CACHE);
  517. warn "scanning for directory cache\n" if $verbose;
  518. open(CACHE, "<$dircache") ||
  519. die "$0: error opening $dircache for reading: $!\n";
  520. $/ = "\n";
  521. $tests = 0;
  522. # is it the same podpath?
  523. $_ = <CACHE>;
  524. chomp($_);
  525. $tests++ if (join(":", @$podpath) eq $_);
  526. # is it the same podroot?
  527. $_ = <CACHE>;
  528. chomp($_);
  529. $tests++ if ($podroot eq $_);
  530. # load the cache if its good
  531. if ($tests != 2) {
  532. close(CACHE);
  533. return 0;
  534. }
  535. warn "loading directory cache\n" if $verbose;
  536. while (<CACHE>) {
  537. /(.*?) (.*)$/;
  538. $pages{$1} = $2;
  539. }
  540. close(CACHE);
  541. return 1;
  542. }
  543. #
  544. # scan_podpath - scans the directories specified in @podpath for directories,
  545. # .pod files, and .pm files. it also scans the pod files specified in
  546. # @libpods for =item directives.
  547. #
  548. sub scan_podpath {
  549. my($podroot, $recurse, $append) = @_;
  550. my($pwd, $dir);
  551. my($libpod, $dirname, $pod, @files, @poddata);
  552. unless($append) {
  553. %items = ();
  554. %pages = ();
  555. }
  556. # scan each directory listed in @podpath
  557. $pwd = getcwd();
  558. chdir($podroot)
  559. || die "$0: error changing to directory $podroot: $!\n";
  560. foreach $dir (@podpath) {
  561. scan_dir($dir, $recurse);
  562. }
  563. # scan the pods listed in @libpods for =item directives
  564. foreach $libpod (@libpods) {
  565. # if the page isn't defined then we won't know where to find it
  566. # on the system.
  567. next unless defined $pages{$libpod} && $pages{$libpod};
  568. # if there is a directory then use the .pod and .pm files within it.
  569. # NOTE: Only finds the first so-named directory in the tree.
  570. # if ($pages{$libpod} =~ /([^:]*[^(\.pod|\.pm)]):/) {
  571. if ($pages{$libpod} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
  572. # find all the .pod and .pm files within the directory
  573. $dirname = $1;
  574. opendir(DIR, $dirname) ||
  575. die "$0: error opening directory $dirname: $!\n";
  576. @files = grep(/(\.pod|\.pm)$/ && ! -d $_, readdir(DIR));
  577. closedir(DIR);
  578. # scan each .pod and .pm file for =item directives
  579. foreach $pod (@files) {
  580. open(POD, "<$dirname/$pod") ||
  581. die "$0: error opening $dirname/$pod for input: $!\n";
  582. @poddata = <POD>;
  583. close(POD);
  584. scan_items("$dirname/$pod", @poddata);
  585. }
  586. # use the names of files as =item directives too.
  587. foreach $pod (@files) {
  588. $pod =~ /^(.*)(\.pod|\.pm)$/;
  589. $items{$1} = "$dirname/$1.html" if $1;
  590. }
  591. } elsif ($pages{$libpod} =~ /([^:]*\.pod):/ ||
  592. $pages{$libpod} =~ /([^:]*\.pm):/) {
  593. # scan the .pod or .pm file for =item directives
  594. $pod = $1;
  595. open(POD, "<$pod") ||
  596. die "$0: error opening $pod for input: $!\n";
  597. @poddata = <POD>;
  598. close(POD);
  599. scan_items("$pod", @poddata);
  600. } else {
  601. warn "$0: shouldn't be here (line ".__LINE__."\n";
  602. }
  603. }
  604. @poddata = (); # clean-up a bit
  605. chdir($pwd)
  606. || die "$0: error changing to directory $pwd: $!\n";
  607. # cache the item list for later use
  608. warn "caching items for later use\n" if $verbose;
  609. open(CACHE, ">$itemcache") ||
  610. die "$0: error open $itemcache for writing: $!\n";
  611. print CACHE join(":", @podpath) . "\n$podroot\n";
  612. foreach my $key (keys %items) {
  613. print CACHE "$key $items{$key}\n";
  614. }
  615. close(CACHE);
  616. # cache the directory list for later use
  617. warn "caching directories for later use\n" if $verbose;
  618. open(CACHE, ">$dircache") ||
  619. die "$0: error open $dircache for writing: $!\n";
  620. print CACHE join(":", @podpath) . "\n$podroot\n";
  621. foreach my $key (keys %pages) {
  622. print CACHE "$key $pages{$key}\n";
  623. }
  624. close(CACHE);
  625. }
  626. #
  627. # scan_dir - scans the directory specified in $dir for subdirectories, .pod
  628. # files, and .pm files. notes those that it finds. this information will
  629. # be used later in order to figure out where the pages specified in L<>
  630. # links are on the filesystem.
  631. #
  632. sub scan_dir {
  633. my($dir, $recurse) = @_;
  634. my($t, @subdirs, @pods, $pod, $dirname, @dirs);
  635. local $_;
  636. @subdirs = ();
  637. @pods = ();
  638. opendir(DIR, $dir) ||
  639. die "$0: error opening directory $dir: $!\n";
  640. while (defined($_ = readdir(DIR))) {
  641. if (-d "$dir/$_" && $_ ne "." && $_ ne "..") { # directory
  642. $pages{$_} = "" unless defined $pages{$_};
  643. $pages{$_} .= "$dir/$_:";
  644. push(@subdirs, $_);
  645. } elsif (/\.pod$/) { # .pod
  646. s/\.pod$//;
  647. $pages{$_} = "" unless defined $pages{$_};
  648. $pages{$_} .= "$dir/$_.pod:";
  649. push(@pods, "$dir/$_.pod");
  650. } elsif (/\.pm$/) { # .pm
  651. s/\.pm$//;
  652. $pages{$_} = "" unless defined $pages{$_};
  653. $pages{$_} .= "$dir/$_.pm:";
  654. push(@pods, "$dir/$_.pm");
  655. }
  656. }
  657. closedir(DIR);
  658. # recurse on the subdirectories if necessary
  659. if ($recurse) {
  660. foreach my $subdir (@subdirs) {
  661. scan_dir("$dir/$subdir", $recurse);
  662. }
  663. }
  664. }
  665. #
  666. # scan_headings - scan a pod file for head[1-6] tags, note the tags, and
  667. # build an index.
  668. #
  669. sub scan_headings {
  670. my($sections, @data) = @_;
  671. my($tag, $which_head, $title, $listdepth, $index);
  672. # here we need local $ignore = 0;
  673. # unfortunately, we can't have it, because $ignore is lexical
  674. $ignore = 0;
  675. $listdepth = 0;
  676. $index = "";
  677. # scan for =head directives, note their name, and build an index
  678. # pointing to each of them.
  679. foreach my $line (@data) {
  680. if ($line =~ /^=(head)([1-6])\s+(.*)/) {
  681. ($tag,$which_head, $title) = ($1,$2,$3);
  682. chomp($title);
  683. $$sections{htmlify(0,$title)} = 1;
  684. while ($which_head != $listdepth) {
  685. if ($which_head > $listdepth) {
  686. $index .= "\n" . ("\t" x $listdepth) . "<UL>\n";
  687. $listdepth++;
  688. } elsif ($which_head < $listdepth) {
  689. $listdepth--;
  690. $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
  691. }
  692. }
  693. $index .= "\n" . ("\t" x $listdepth) . "<LI>" .
  694. "<A HREF=\"#" . htmlify(0,$title) . "\">" .
  695. html_escape(process_text(\$title, 0)) . "</A></LI>";
  696. }
  697. }
  698. # finish off the lists
  699. while ($listdepth--) {
  700. $index .= "\n" . ("\t" x $listdepth) . "</UL>\n";
  701. }
  702. # get rid of bogus lists
  703. $index =~ s,\t*<UL>\s*</UL>\n,,g;
  704. $ignore = 1; # restore old value;
  705. return $index;
  706. }
  707. #
  708. # scan_items - scans the pod specified by $pod for =item directives. we
  709. # will use this information later on in resolving C<> links.
  710. #
  711. sub scan_items {
  712. my($pod, @poddata) = @_;
  713. my($i, $item);
  714. local $_;
  715. $pod =~ s/\.pod$//;
  716. $pod .= ".html" if $pod;
  717. foreach $i (0..$#poddata) {
  718. $_ = $poddata[$i];
  719. # remove any formatting instructions
  720. s,[A-Z]<([^<>]*)>,$1,g;
  721. # figure out what kind of item it is and get the first word of
  722. # it's name.
  723. if (/^=item\s+(\w*)\s*.*$/s) {
  724. if ($1 eq "*") { # bullet list
  725. /\A=item\s+\*\s*(.*?)\s*\Z/s;
  726. $item = $1;
  727. } elsif ($1 =~ /^\d+/) { # numbered list
  728. /\A=item\s+\d+\.?(.*?)\s*\Z/s;
  729. $item = $1;
  730. } else {
  731. # /\A=item\s+(.*?)\s*\Z/s;
  732. /\A=item\s+(\w*)/s;
  733. $item = $1;
  734. }
  735. $items{$item} = "$pod" if $item;
  736. }
  737. }
  738. }
  739. #
  740. # process_head - convert a pod head[1-6] tag and convert it to HTML format.
  741. #
  742. sub process_head {
  743. my($tag, $heading) = @_;
  744. my $firstword;
  745. # figure out the level of the =head
  746. $tag =~ /head([1-6])/;
  747. my $level = $1;
  748. # can't have a heading full of spaces and speechmarks and so on
  749. $firstword = $heading; $firstword =~ s/\s*(\w+)\s.*/$1/;
  750. print HTML "<P>\n" unless $listlevel;
  751. print HTML "<HR>\n" unless $listlevel || $top;
  752. print HTML "<H$level>"; # unless $listlevel;
  753. #print HTML "<H$level>" unless $listlevel;
  754. my $convert = $heading; process_text(\$convert, 0);
  755. $convert = html_escape($convert);
  756. print HTML '<A NAME="' . htmlify(0,$heading) . "\">$convert</A>";
  757. print HTML "</H$level>"; # unless $listlevel;
  758. print HTML "\n";
  759. }
  760. #
  761. # process_item - convert a pod item tag and convert it to HTML format.
  762. #
  763. sub process_item {
  764. my $text = $_[0];
  765. my($i, $quote, $name);
  766. my $need_preamble = 0;
  767. my $this_entry;
  768. # lots of documents start a list without doing an =over. this is
  769. # bad! but, the proper thing to do seems to be to just assume
  770. # they did do an =over. so warn them once and then continue.
  771. warn "$0: $podfile: unexpected =item directive in paragraph $paragraph. ignoring.\n"
  772. unless $listlevel;
  773. process_over() unless $listlevel;
  774. return unless $listlevel;
  775. # remove formatting instructions from the text
  776. 1 while $text =~ s/[A-Z]<([^<>]*)>/$1/g;
  777. pre_escape(\$text);
  778. $need_preamble = $items_seen[$listlevel]++ == 0;
  779. # check if this is the first =item after an =over
  780. $i = $listlevel - 1;
  781. my $need_new = $listlevel >= @listitem;
  782. if ($text =~ /\A\*/) { # bullet
  783. if ($need_preamble) {
  784. push(@listend, "</UL>");
  785. print HTML "<UL>\n";
  786. }
  787. print HTML '<LI>';
  788. if ($text =~ /\A\*\s*(.+)\Z/s) {
  789. print HTML '<STRONG>';
  790. if ($items_named{$1}++) {
  791. print HTML html_escape($1);
  792. } else {
  793. my $name = 'item_' . htmlify(1,$1);
  794. print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
  795. }
  796. print HTML '</STRONG>';
  797. }
  798. } elsif ($text =~ /\A[\d#]+/) { # numbered list
  799. if ($need_preamble) {
  800. push(@listend, "</OL>");
  801. print HTML "<OL>\n";
  802. }
  803. print HTML '<LI>';
  804. if ($text =~ /\A\d+\.?\s*(.+)\Z/s) {
  805. print HTML '<STRONG>';
  806. if ($items_named{$1}++) {
  807. print HTML html_escape($1);
  808. } else {
  809. my $name = 'item_' . htmlify(0,$1);
  810. print HTML qq(<A NAME="$name">), html_escape($1), '</A>';
  811. }
  812. print HTML '</STRONG>';
  813. }
  814. } else { # all others
  815. if ($need_preamble) {
  816. push(@listend, '</DL>');
  817. print HTML "<DL>\n";
  818. }
  819. print HTML '<DT>';
  820. if ($text =~ /(\S+)/) {
  821. print HTML '<STRONG>';
  822. if ($items_named{$1}++) {
  823. print HTML html_escape($text);
  824. } else {
  825. my $name = 'item_' . htmlify(1,$text);
  826. print HTML qq(<A NAME="$name">), html_escape($text), '</A>';
  827. }
  828. print HTML '</STRONG>';
  829. }
  830. print HTML '<DD>';
  831. }
  832. print HTML "\n";
  833. }
  834. #
  835. # process_over - process a pod over tag and start a corresponding HTML
  836. # list.
  837. #
  838. sub process_over {
  839. # start a new list
  840. $listlevel++;
  841. }
  842. #
  843. # process_back - process a pod back tag and convert it to HTML format.
  844. #
  845. sub process_back {
  846. warn "$0: $podfile: unexpected =back directive in paragraph $paragraph. ignoring.\n"
  847. unless $listlevel;
  848. return unless $listlevel;
  849. # close off the list. note, I check to see if $listend[$listlevel] is
  850. # defined because an =item directive may have never appeared and thus
  851. # $listend[$listlevel] may have never been initialized.
  852. $listlevel--;
  853. print HTML $listend[$listlevel] if defined $listend[$listlevel];
  854. print HTML "\n";
  855. # don't need the corresponding perl code anymore
  856. pop(@listitem);
  857. pop(@listdata);
  858. pop(@listend);
  859. pop(@items_seen);
  860. }
  861. #
  862. # process_cut - process a pod cut tag, thus stop ignoring pod directives.
  863. #
  864. sub process_cut {
  865. $ignore = 1;
  866. }
  867. #
  868. # process_pod - process a pod pod tag, thus ignore pod directives until we see a
  869. # corresponding cut.
  870. #
  871. sub process_pod {
  872. # no need to set $ignore to 0 cause the main loop did it
  873. }
  874. #
  875. # process_for - process a =for pod tag. if it's for html, split
  876. # it out verbatim, if illustration, center it, otherwise ignore it.
  877. #
  878. sub process_for {
  879. my($whom, $text) = @_;
  880. if ( $whom =~ /^(pod2)?html$/i) {
  881. print HTML $text;
  882. } elsif ($whom =~ /^illustration$/i) {
  883. 1 while chomp $text;
  884. for my $ext (qw[.png .gif .jpeg .jpg .tga .pcl .bmp]) {
  885. $text .= $ext, last if -r "$text$ext";
  886. }
  887. print HTML qq{<p align = "center"><img src = "$text" alt = "$text illustration"></p>};
  888. }
  889. }
  890. #
  891. # process_begin - process a =begin pod tag. this pushes
  892. # whom we're beginning on the begin stack. if there's a
  893. # begin stack, we only print if it us.
  894. #
  895. sub process_begin {
  896. my($whom, $text) = @_;
  897. $whom = lc($whom);
  898. push (@begin_stack, $whom);
  899. if ( $whom =~ /^(pod2)?html$/) {
  900. print HTML $text if $text;
  901. }
  902. }
  903. #
  904. # process_end - process a =end pod tag. pop the
  905. # begin stack. die if we're mismatched.
  906. #
  907. sub process_end {
  908. my($whom, $text) = @_;
  909. $whom = lc($whom);
  910. if ($begin_stack[-1] ne $whom ) {
  911. die "Unmatched begin/end at chunk $paragraph\n"
  912. }
  913. pop @begin_stack;
  914. }
  915. #
  916. # process_text - handles plaintext that appears in the input pod file.
  917. # there may be pod commands embedded within the text so those must be
  918. # converted to html commands.
  919. #
  920. sub process_text {
  921. my($text, $escapeQuotes) = @_;
  922. my($result, $rest, $s1, $s2, $s3, $s4, $match, $bf);
  923. my($podcommand, $params, $tag, $quote);
  924. return if $ignore;
  925. $quote = 0; # status of double-quote conversion
  926. $result = "";
  927. $rest = $$text;
  928. if ($rest =~ /^\s+/) { # preformatted text, no pod directives
  929. $rest =~ s/\n+\Z//;
  930. $rest =~ s#.*#
  931. my $line = $&;
  932. 1 while $line =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e;
  933. $line;
  934. #eg;
  935. $rest =~ s/&/&amp;/g;
  936. $rest =~ s/</&lt;/g;
  937. $rest =~ s/>/&gt;/g;
  938. $rest =~ s/"/&quot;/g;
  939. # try and create links for all occurrences of perl.* within
  940. # the preformatted text.
  941. $rest =~ s{
  942. (\s*)(perl\w+)
  943. }{
  944. if (defined $pages{$2}) { # is a link
  945. qq($1<A HREF="$htmlroot/$pages{$2}">$2</A>);
  946. } elsif (defined $pages{dosify($2)}) { # is a link
  947. qq($1<A HREF="$htmlroot/$pages{dosify($2)}">$2</A>);
  948. } else {
  949. "$1$2";
  950. }
  951. }xeg;
  952. # $rest =~ s/(<A HREF=)([^>:]*:)?([^>:]*)\.pod:([^>:]*:)?/$1$3.html/g;
  953. $rest =~ s{
  954. (<A\ HREF="?) ([^>:]*:)? ([^>:]*) \.pod: ([^>:]*:)?
  955. }{
  956. my $url ;
  957. if ( $htmlfileurl ne '' ) {
  958. # Here, we take advantage of the knowledge
  959. # that $htmlfileurl ne '' implies $htmlroot eq ''.
  960. # Since $htmlroot eq '', we need to prepend $htmldir
  961. # on the fron of the link to get the absolute path
  962. # of the link's target. We check for a leading '/'
  963. # to avoid corrupting links that are #, file:, etc.
  964. my $old_url = $3 ;
  965. $old_url = "$htmldir$old_url"
  966. if ( $old_url =~ m{^\/} ) ;
  967. $url = relativize_url( "$old_url.html", $htmlfileurl );
  968. # print( " a: [$old_url.html,$htmlfileurl,$url]\n" ) ;
  969. }
  970. else {
  971. $url = "$3.html" ;
  972. }
  973. "$1$url" ;
  974. }xeg;
  975. # Look for embedded URLs and make them in to links. We don't
  976. # relativize them since they are best left as the author intended.
  977. my $urls = '(' . join ('|', qw{
  978. http
  979. telnet
  980. mailto
  981. news
  982. gopher
  983. file
  984. wais
  985. ftp
  986. } )
  987. . ')';
  988. my $ltrs = '\w';
  989. my $gunk = '/#~:.?+=&%@!\-';
  990. my $punc = '.:?\-';
  991. my $any = "${ltrs}${gunk}${punc}";
  992. $rest =~ s{
  993. \b # start at word boundary
  994. ( # begin $1 {
  995. $urls : # need resource and a colon
  996. (?!:) # Ignore File::, among others.
  997. [$any] +? # followed by on or more
  998. # of any valid character, but
  999. # be conservative and take only
  1000. # what you need to....
  1001. ) # end $1 }
  1002. (?= # look-ahead non-consumptive assertion
  1003. [$punc]* # either 0 or more puntuation
  1004. [^$any] # followed by a non-url char
  1005. | # or else
  1006. $ # then end of the string
  1007. )
  1008. }{<A HREF="$1">$1</A>}igox;
  1009. $result = "<PRE>" # text should be as it is (verbatim)
  1010. . "$rest\n"
  1011. . "</PRE>\n";
  1012. } else { # formatted text
  1013. # parse through the string, stopping each time we find a
  1014. # pod-escape. once the string has been throughly processed
  1015. # we can output it.
  1016. while (length $rest) {
  1017. # check to see if there are any possible pod directives in
  1018. # the remaining part of the text.
  1019. if ($rest =~ m/[BCEIFLSZ]</) {
  1020. warn "\$rest\t= $rest\n" unless
  1021. $rest =~ /\A
  1022. ([^<]*?)
  1023. ([BCEIFLSZ]?)
  1024. <
  1025. (.*)\Z/xs;
  1026. $s1 = $1; # pure text
  1027. $s2 = $2; # the type of pod-escape that follows
  1028. $s3 = '<'; # '<'
  1029. $s4 = $3; # the rest of the string
  1030. } else {
  1031. $s1 = $rest;
  1032. $s2 = "";
  1033. $s3 = "";
  1034. $s4 = "";
  1035. }
  1036. if ($s3 eq '<' && $s2) { # a pod-escape
  1037. $result .= ($escapeQuotes ? process_puretext($s1, \$quote) : $s1);
  1038. $podcommand = "$s2<";
  1039. $rest = $s4;
  1040. # find the matching '>'
  1041. $match = 1;
  1042. $bf = 0;
  1043. while ($match && !$bf) {
  1044. $bf = 1;
  1045. if ($rest =~ /\A([^<>]*[BCEIFLSZ]<)(.*)\Z/s) {
  1046. $bf = 0;
  1047. $match++;
  1048. $podcommand .= $1;
  1049. $rest = $2;
  1050. } elsif ($rest =~ /\A([^>]*>)(.*)\Z/s) {
  1051. $bf = 0;
  1052. $match--;
  1053. $podcommand .= $1;
  1054. $rest = $2;
  1055. }
  1056. }
  1057. if ($match != 0) {
  1058. warn <<WARN;
  1059. $0: $podfile: cannot find matching > for $s2 in paragraph $paragraph.
  1060. WARN
  1061. $result .= substr $podcommand, 0, 2;
  1062. $rest = substr($podcommand, 2) . $rest;
  1063. next;
  1064. }
  1065. # pull out the parameters to the pod-escape
  1066. $podcommand =~ /^([BCFEILSZ]?)<(.*)>$/s;
  1067. $tag = $1;
  1068. $params = $2;
  1069. # process the text within the pod-escape so that any escapes
  1070. # which must occur do.
  1071. process_text(\$params, 0) unless $tag eq 'L';
  1072. $s1 = $params;
  1073. if (!$tag || $tag eq " ") { # <> : no tag
  1074. $s1 = "&lt;$params&gt;";
  1075. } elsif ($tag eq "L") { # L<> : link
  1076. $s1 = process_L($params);
  1077. } elsif ($tag eq "I" || # I<> : italicize text
  1078. $tag eq "B" || # B<> : bold text
  1079. $tag eq "F") { # F<> : file specification
  1080. $s1 = process_BFI($tag, $params);
  1081. } elsif ($tag eq "C") { # C<> : literal code
  1082. $s1 = process_C($params, 1);
  1083. } elsif ($tag eq "E") { # E<> : escape
  1084. $s1 = process_E($params);
  1085. } elsif ($tag eq "Z") { # Z<> : zero-width character
  1086. $s1 = process_Z($params);
  1087. } elsif ($tag eq "S") { # S<> : non-breaking space
  1088. $s1 = process_S($params);
  1089. } elsif ($tag eq "X") { # S<> : non-breaking space
  1090. $s1 = process_X($params);
  1091. } else {
  1092. warn "$0: $podfile: unhandled tag '$tag' in paragraph $paragraph\n";
  1093. }
  1094. $result .= "$s1";
  1095. } else {
  1096. # for pure text we must deal with implicit links and
  1097. # double-quotes among other things.
  1098. $result .= ($escapeQuotes ? process_puretext("$s1$s2$s3", \$quote) : "$s1$s2$s3");
  1099. $rest = $s4;
  1100. }
  1101. }
  1102. }
  1103. $$text = $result;
  1104. }
  1105. sub html_escape {
  1106. my $rest = $_[0];
  1107. $rest =~ s/&(?!\w+;|#)/&amp;/g; # XXX not bulletproof
  1108. $rest =~ s/</&lt;/g;
  1109. $rest =~ s/>/&gt;/g;
  1110. $rest =~ s/"/&quot;/g;
  1111. return $rest;
  1112. }
  1113. #
  1114. # process_puretext - process pure text (without pod-escapes) converting
  1115. # double-quotes and handling implicit C<> links.
  1116. #
  1117. sub process_puretext {
  1118. my($text, $quote) = @_;
  1119. my(@words, $result, $rest, $lead, $trail);
  1120. # convert double-quotes to single-quotes
  1121. $text =~ s/\A([^"]*)"/$1''/s if $$quote;
  1122. while ($text =~ s/\A([^"]*)["]([^"]*)["]/$1``$2''/sg) {}
  1123. $$quote = ($text =~ m/"/ ? 1 : 0);
  1124. $text =~ s/\A([^"]*)"/$1``/s if $$quote;
  1125. # keep track of leading and trailing white-space
  1126. $lead = ($text =~ /\A(\s*)/s ? $1 : "");
  1127. $trail = ($text =~ /(\s*)\Z/s ? $1 : "");
  1128. # collapse all white space into a single space
  1129. $text =~ s/\s+/ /g;
  1130. @words = split(" ", $text);
  1131. # process each word individually
  1132. foreach my $word (@words) {
  1133. # see if we can infer a link
  1134. if ($word =~ /^\w+\(/) {
  1135. # has parenthesis so should have been a C<> ref
  1136. $word = process_C($word);
  1137. # $word =~ /^[^()]*]\(/;
  1138. # if (defined $items{$1} && $items{$1}) {
  1139. # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$1}#item_"
  1140. # . htmlify(0,$word)
  1141. # . "\">$word</A></CODE>";
  1142. # } elsif (defined $items{$word} && $items{$word}) {
  1143. # $word = "\n<CODE><A HREF=\"$htmlroot/$items{$word}#item_"
  1144. # . htmlify(0,$word)
  1145. # . "\">$word</A></CODE>";
  1146. # } else {
  1147. # $word = "\n<CODE><A HREF=\"#item_"
  1148. # . htmlify(0,$word)
  1149. # . "\">$word</A></CODE>";
  1150. # }
  1151. } elsif ($word =~ /^[\$\@%&*]+\w+$/) {
  1152. # perl variables, should be a C<> ref
  1153. $word = process_C($word, 1);
  1154. } elsif ($word =~ m,^\w+://\w,) {
  1155. # looks like a URL
  1156. # Don't relativize it: leave it as the author intended
  1157. $word = qq(<A HREF="$word">$word</A>);
  1158. } elsif ($word =~ /[\w.-]+\@[\w-]+\.\w/) {
  1159. # looks like an e-mail address
  1160. my ($w1, $w2, $w3) = ("", $word, "");
  1161. ($w1, $w2, $w3) = ("(", $1, ")$2") if $word =~ /^\((.*?)\)(,?)/;
  1162. ($w1, $w2, $w3) = ("&lt;", $1, "&gt;$2") if $word =~ /^<(.*?)>(,?)/;
  1163. $word = qq($w1<A HREF="mailto:$w2">$w2</A>$w3);
  1164. } elsif ($word !~ /[a-z]/ && $word =~ /[A-Z]/) { # all uppercase?
  1165. $word = html_escape($word) if $word =~ /["&<>]/;
  1166. $word = "\n<FONT SIZE=-1>$word</FONT>" if $netscape;
  1167. } else {
  1168. $word = html_escape($word) if $word =~ /["&<>]/;
  1169. }
  1170. }
  1171. # build a new string based upon our conversion
  1172. $result = "";
  1173. $rest = join(" ", @words);
  1174. while (length($rest) > 75) {
  1175. if ( $rest =~ m/^(.{0,75})\s(.*?)$/o ||
  1176. $rest =~ m/^(\S*)\s(.*?)$/o) {
  1177. $result .= "$1\n";
  1178. $rest = $2;
  1179. } else {
  1180. $result .= "$rest\n";
  1181. $rest = "";
  1182. }
  1183. }
  1184. $result .= $rest if $rest;
  1185. # restore the leading and trailing white-space
  1186. $result = "$lead$result$trail";
  1187. return $result;
  1188. }
  1189. #
  1190. # pre_escape - convert & in text to $amp;
  1191. #
  1192. sub pre_escape {
  1193. my($str) = @_;
  1194. $$str =~ s/&(?!\w+;|#)/&amp;/g; # XXX not bulletproof
  1195. }
  1196. #
  1197. # dosify - convert filenames to 8.3
  1198. #
  1199. sub dosify {
  1200. my($str) = @_;
  1201. return lc($str) if $^O eq 'VMS'; # VMS just needs casing
  1202. if ($Is83) {
  1203. $str = lc $str;
  1204. $str =~ s/(\.\w+)/substr ($1,0,4)/ge;
  1205. $str =~ s/(\w+)/substr ($1,0,8)/ge;
  1206. }
  1207. return $str;
  1208. }
  1209. #
  1210. # process_L - convert a pod L<> directive to a corresponding HTML link.
  1211. # most of the links made are inferred rather than known about directly
  1212. # (i.e it's not known whether the =head\d section exists in the target file,
  1213. # or whether a .pod file exists in the case of split files). however, the
  1214. # guessing usually works.
  1215. #
  1216. # Unlike the other directives, this should be called with an unprocessed
  1217. # string, else tags in the link won't be matched.
  1218. #
  1219. sub process_L {
  1220. my($str) = @_;
  1221. my($s1, $s2, $linktext, $page, $page83, $section, $link); # work strings
  1222. $str =~ s/\n/ /g; # undo word-wrapped tags
  1223. $s1 = $str;
  1224. for ($s1) {
  1225. # LREF: a la HREF L<show this text|man/section>
  1226. $linktext = $1 if s:^([^|]+)\|::;
  1227. # make sure sections start with a /
  1228. s,^",/",g;
  1229. s,^,/,g if (!m,/, && / /);
  1230. # check if there's a section specified
  1231. if (m,^(.*?)/"?(.*?)"?$,) { # yes
  1232. ($page, $section) = ($1, $2);
  1233. } else { # no
  1234. ($page, $section) = ($str, "");
  1235. }
  1236. # check if we know that this is a section in this page
  1237. if (!defined $pages{$page} && defined $sections{$page}) {
  1238. $section = $page;
  1239. $page = "";
  1240. }
  1241. # remove trailing punctuation, like ()
  1242. $section =~ s/\W*$// ;
  1243. }
  1244. $page83=dosify($page);
  1245. $page=$page83 if (defined $pages{$page83});
  1246. if ($page eq "") {
  1247. $link = "#" . htmlify(0,$section);
  1248. $linktext = $section unless defined($linktext);
  1249. } elsif ( $page =~ /::/ ) {
  1250. $linktext = ($section ? "$section" : "$page");
  1251. $page =~ s,::,/,g;
  1252. # Search page cache for an entry keyed under the html page name,
  1253. # then look to see what directory that page might be in. NOTE:
  1254. # this will only find one page. A better solution might be to produce
  1255. # an intermediate page that is an index to all such pages.
  1256. my $page_name = $page ;
  1257. $page_name =~ s,^.*/,, ;
  1258. if ( defined( $pages{ $page_name } ) &&
  1259. $pages{ $page_name } =~ /([^:]*$page)\.(?:pod|pm):/
  1260. ) {
  1261. $page = $1 ;
  1262. }
  1263. else {
  1264. # NOTE: This branch assumes that all A::B pages are located in
  1265. # $htmlroot/A/B.html . This is often incorrect, since they are
  1266. # often in $htmlroot/lib/A/B.html or such like. Perhaps we could
  1267. # analyze the contents of %pages and figure out where any
  1268. # cousins of A::B are, then assume that. So, if A::B isn't found,
  1269. # but A::C is found in lib/A/C.pm, then A::B is assumed to be in
  1270. # lib/A/B.pm. This is also limited, but it's an improvement.
  1271. # Maybe a hints file so that the links point to the correct places
  1272. # non-theless?
  1273. # Also, maybe put a warn "$0: cannot resolve..." here.
  1274. }
  1275. $link = "$htmlroot/$page.html";
  1276. $link .= "#" . htmlify(0,$section) if ($section);
  1277. } elsif (!defined $pages{$page}) {
  1278. warn "$0: $podfile: cannot resolve L<$str> in paragraph $paragraph: no such page '$page'\n" unless $quiet;
  1279. $link = "";
  1280. $linktext = $page unless defined($linktext);
  1281. } else {
  1282. $linktext = ($section ? "$section" : "the $page manpage") unless defined($linktext);
  1283. $section = htmlify(0,$section) if $section ne "";
  1284. # if there is a directory by the name of the page, then assume that an
  1285. # appropriate section will exist in the subdirectory
  1286. # if ($section ne "" && $pages{$page} =~ /([^:]*[^(\.pod|\.pm)]):/) {
  1287. if ($section ne "" && $pages{$page} =~ /([^:]*(?<!\.pod)(?<!\.pm)):/) {
  1288. $link = "$htmlroot/$1/$section.html";
  1289. # since there is no directory by the name of the page, the section will
  1290. # have to exist within a .html of the same name. thus, make sure there
  1291. # is a .pod or .pm that might become that .html
  1292. } else {
  1293. $section = "#$section";
  1294. # check if there is a .pod with the page name
  1295. if ($pages{$page} =~ /([^:]*)\.pod:/) {
  1296. $link = "$htmlroot/$1.html$section";
  1297. } elsif ($pages{$page} =~ /([^:]*)\.pm:/) {
  1298. $link = "$htmlroot/$1.html$section";
  1299. } else {
  1300. warn "$0: $podfile: cannot resolve L$str in paragraph $paragraph: ".
  1301. "no .pod or .pm found\n";
  1302. $link = "";
  1303. $linktext = $section unless defined($linktext);
  1304. }
  1305. }
  1306. }
  1307. process_text(\$linktext, 0);
  1308. if ($link) {
  1309. # Here, we take advantage of the knowledge that $htmlfileurl ne ''
  1310. # implies $htmlroot eq ''. This means that the link in question
  1311. # needs a prefix of $htmldir if it begins with '/'. The test for
  1312. # the initial '/' is done to avoid '#'-only links, and to allow
  1313. # for other kinds of links, like file:, ftp:, etc.
  1314. my $url ;
  1315. if ( $htmlfileurl ne '' ) {
  1316. $link = "$htmldir$link"
  1317. if ( $link =~ m{^/} ) ;
  1318. $url = relativize_url( $link, $htmlfileurl ) ;
  1319. # print( " b: [$link,$htmlfileurl,$url]\n" ) ;
  1320. }
  1321. else {
  1322. $url = $link ;
  1323. }
  1324. $s1 = "<A HREF=\"$url\">$linktext</A>";
  1325. } else {
  1326. $s1 = "<EM>$linktext</EM>";
  1327. }
  1328. return $s1;
  1329. }
  1330. #
  1331. # relativize_url - convert an absolute URL to one relative to a base URL.
  1332. # Assumes both end in a filename.
  1333. #
  1334. sub relativize_url {
  1335. my ($dest,$source) = @_ ;
  1336. my ($dest_volume,$dest_directory,$dest_file) =
  1337. File::Spec::Unix->splitpath( $dest ) ;
  1338. $dest = File::Spec::Unix->catpath( $dest_volume, $dest_directory, '' ) ;
  1339. my ($source_volume,$source_directory,$source_file) =
  1340. File::Spec::Unix->splitpath( $source ) ;
  1341. $source = File::Spec::Unix->catpath( $source_volume, $source_directory, '' ) ;
  1342. my $rel_path = '' ;
  1343. if ( $dest ne '' ) {
  1344. $rel_path = File::Spec::Unix->abs2rel( $dest, $source ) ;
  1345. }
  1346. if ( $rel_path ne '' &&
  1347. substr( $rel_path, -1 ) ne '/' &&
  1348. substr( $dest_file, 0, 1 ) ne '#'
  1349. ) {
  1350. $rel_path .= "/$dest_file" ;
  1351. }
  1352. else {
  1353. $rel_path .= "$dest_file" ;
  1354. }
  1355. return $rel_path ;
  1356. }
  1357. #
  1358. # process_BFI - process any of the B<>, F<>, or I<> pod-escapes and
  1359. # convert them to corresponding HTML directives.
  1360. #
  1361. sub process_BFI {
  1362. my($tag, $str) = @_;
  1363. my($s1); # work string
  1364. my(%repltext) = ( 'B' => 'STRONG',
  1365. 'F' => 'EM',
  1366. 'I' => 'EM');
  1367. # extract the modified text and convert to HTML
  1368. $s1 = "<$repltext{$tag}>$str</$repltext{$tag}>";
  1369. return $s1;
  1370. }
  1371. #
  1372. # process_C - process the C<> pod-escape.
  1373. #
  1374. sub process_C {
  1375. my($str, $doref) = @_;
  1376. my($s1, $s2);
  1377. $s1 = $str;
  1378. $s1 =~ s/\([^()]*\)//g; # delete parentheses
  1379. $s2 = $s1;
  1380. $s1 =~ s/\W//g; # delete bogus characters
  1381. $str = html_escape($str);
  1382. # if there was a pod file that we found earlier with an appropriate
  1383. # =item directive, then create a link to that page.
  1384. if ($doref && defined $items{$s1}) {
  1385. if ( $items{$s1} ) {
  1386. my $link = "$htmlroot/$items{$s1}#item_" . htmlify(0,$s2) ;
  1387. # Here, we take advantage of the knowledge that $htmlfileurl ne ''
  1388. # implies $htmlroot eq ''.
  1389. my $url ;
  1390. if ( $htmlfileurl ne '' ) {
  1391. $link = "$htmldir$link" ;
  1392. $url = relativize_url( $link, $htmlfileurl ) ;
  1393. }
  1394. else {
  1395. $url = $link ;
  1396. }
  1397. $s1 = "<A HREF=\"$url\">$str</A>" ;
  1398. }
  1399. else {
  1400. $s1 = "<A HREF=\"#item_" . htmlify(0,$s2) . "\">$str</A>" ;
  1401. }
  1402. $s1 =~ s,(perl\w+/(\S+)\.html)#item_\2\b,$1,;
  1403. confess "s1 has space: $s1" if $s1 =~ /HREF="[^"]*\s[^"]*"/;
  1404. } else {
  1405. $s1 = "<CODE>$str</CODE>";
  1406. # warn "$0: $podfile: cannot resolve C<$str> in paragraph $paragraph\n" if $verbose
  1407. }
  1408. return $s1;
  1409. }
  1410. #
  1411. # process_E - process the E<> pod directive which seems to escape a character.
  1412. #
  1413. sub process_E {
  1414. my($str) = @_;
  1415. for ($str) {
  1416. s,([^/].*),\&$1\;,g;
  1417. }
  1418. return $str;
  1419. }
  1420. #
  1421. # process_Z - process the Z<> pod directive which really just amounts to
  1422. # ignoring it. this allows someone to start a paragraph with an =
  1423. #
  1424. sub process_Z {
  1425. my($str) = @_;
  1426. # there is no equivalent in HTML for this so just ignore it.
  1427. $str = "";
  1428. return $str;
  1429. }
  1430. #
  1431. # process_S - process the S<> pod directive which means to convert all
  1432. # spaces in the string to non-breaking spaces (in HTML-eze).
  1433. #
  1434. sub process_S {
  1435. my($str) = @_;
  1436. # convert all spaces in the text to non-breaking spaces in HTML.
  1437. $str =~ s/ /&nbsp;/g;
  1438. return $str;
  1439. }
  1440. #
  1441. # process_X - this is supposed to make an index entry. we'll just
  1442. # ignore it.
  1443. #
  1444. sub process_X {
  1445. return '';
  1446. }
  1447. #
  1448. # Adapted from Nick Ing-Simmons' PodToHtml package.
  1449. sub relative_url {
  1450. my $source_file = shift ;
  1451. my $destination_file = shift;
  1452. my $source = URI::file->new_abs($source_file);
  1453. my $uo = URI::file->new($destination_file,$source)->abs;
  1454. return $uo->rel->as_string;
  1455. }
  1456. #
  1457. # finish_list - finish off any pending HTML lists. this should be called
  1458. # after the entire pod file has been read and converted.
  1459. #
  1460. sub finish_list {
  1461. while ($listlevel > 0) {
  1462. print HTML "</DL>\n";
  1463. $listlevel--;
  1464. }
  1465. }
  1466. #
  1467. # htmlify - converts a pod section specification to a suitable section
  1468. # specification for HTML. if first arg is 1, only takes 1st word.
  1469. #
  1470. sub htmlify {
  1471. my($compact, $heading) = @_;
  1472. if ($compact) {
  1473. $heading =~ /^(\w+)/;
  1474. $heading = $1;
  1475. }
  1476. # $heading = lc($heading);
  1477. $heading =~ s/[^\w\s]/_/g;
  1478. $heading =~ s/(\s+)/ /g;
  1479. $heading =~ s/^\s*(.*?)\s*$/$1/s;
  1480. $heading =~ s/ /_/g;
  1481. $heading =~ s/\A(.{32}).*\Z/$1/s;
  1482. $heading =~ s/\s+\Z//;
  1483. $heading =~ s/_{2,}/_/g;
  1484. return $heading;
  1485. }
  1486. BEGIN {
  1487. }
  1488. 1;