Also use pandoc to convert POD files

Turns out it now supports that as well. Yay.
This commit is contained in:
Yorhel 2025-02-12 14:20:13 +01:00
parent af1f4c0dd2
commit 5acc8eee92
5 changed files with 74 additions and 91 deletions

View file

@ -9,11 +9,12 @@
# .md files because those already have a title embedded in the file.
#
# Supported file types:
# .md: Converted directly into .html with pandoc.
# .pod: Perl's Plain Old Documentation, converted through HTML into a .md
# file which is then converted into .html again with the proper template.
# .log: A ChangeLog-formatted file, converted through .md into .html.
# .mdoc / .man: manual page, converted through .md into .html.
# .md: Converted directly into .html with pandoc.
# (the types below are first converted into .md and then into .html)
# .log: A ChangeLog-style file
# .pod: Perl's Plain Old Documentation
# .mdoc: Mandoc page
# .man: Manual page
PAGES=\
"contributing.md"\
"doc.md"\
@ -165,18 +166,10 @@ ${FETCH}: dat/%:
@curl --user-agent 'YHDEV' -s ${shell for i in ${PAGES}; do case "$$i" in "$* "*) echo "$$i" | awk '{print$$2}';; esac; done} -o "$@"
# There is a 'pod2markdown' program, but going through HTML with a little bit
# of Perl magic tends to give better results, if only because definition lists
# are properly converted this way and I have more control over links.
# The final pass through perl is to fix a pandoc bug where a code block as the
# first thing inside a definition does not survive a round-trip.
${POD_MD}: dat/%.md: dat/%.pod mkpod.pl
${POD_MD}: dat/%.md: dat/%.pod
@echo "POD $*"
@cat "$<" | ./mkpod.pl |\
pandoc -f html -t markdown -s \
--metadata title="${shell for i in ${PAGES}; do case "$$i" in "$*.pod "*) echo "$$i" | sed -E 's/[^ ]+ +[^ ]+ +//';; esac; done}" |\
perl -e '$$/=undef; print (scalar(<>) =~ s/: ([^\s].*)\n ([^\s].*)/: $$1\n $$2/gr)' >"$@"
@(echo "% ${shell for i in ${PAGES}; do case "$$i" in "$*.pod "*) echo "$$i" | sed -E 's/[^ ]+ +[^ ]+ +//';; esac; done}";\
sed -E -e 's/^=encoding.+//' $< | pandoc -f pod -t markdown | ./manproc.pl pod ) >"$@"
${CHANGES_MD}: dat/%.md: dat/%.log mkchangelog.pl
@echo "MD $*"
@ -186,12 +179,12 @@ ${CHANGES_MD}: dat/%.md: dat/%.log mkchangelog.pl
${MDOC_MD}: dat/%.md: dat/%.mdoc
@echo "MDOC $*"
@(echo "% ${shell for i in ${PAGES}; do case "$$i" in "$*.mdoc "*) echo "$$i" | sed -E 's/[^ ]+ +[^ ]+ +//';; esac; done}";\
pandoc -f mdoc -t markdown $< | ./mdocproc.pl ) >"$@"
pandoc -f mdoc -t markdown $< | ./manproc.pl mdoc ) >"$@"
${MAN_MD}: dat/%.md: dat/%.man
@echo "MAN $*"
@(echo "% ${shell for i in ${PAGES}; do case "$$i" in "$*.man "*) echo "$$i" | sed -E 's/[^ ]+ +[^ ]+ +//';; esac; done}";\
pandoc -f man -t markdown $< | ./mdocproc.pl ) >"$@"
pandoc -f man -t markdown $< | ./manproc.pl man ) >"$@"
dat/download.md: mkdlpage.pl pub/download/*.sha256

View file

@ -6,6 +6,6 @@ Build-time:
- GNU Make
- curl
- Perl (+ Pod::Simple)
- Perl
- TUWF
- pandoc (>= 3.6)

62
manproc.pl Executable file
View file

@ -0,0 +1,62 @@
#!/usr/bin/perl
use v5.36;
my $man = $ARGV[0] eq 'man';
my $mdoc = $ARGV[0] eq 'mdoc';
my $pod = $ARGV[0] eq 'pod';
sub man2url($page) {
+{qw{
globsterctl(1) /globster/ctl
globster-launch(1) /globster/launch
globster(1) /globster/daemon
globster-api(7) /globster/api
ncdu(1) /ncdu/man
}}->{$page||''} || ($page =~ /(.+)\((.)\)/ and "https://manned.org/man/$1.$2");
}
sub pod2url($page, $sect='') {
my $lnk = {
'TUWF' => '/tuwf/man',
'TUWF::DB' => '/tuwf/man/db',
'TUWF::Intro' => '/tuwf/man/intro',
'TUWF::Misc' => '/tuwf/man/misc',
'TUWF::Request' => '/tuwf/man/request',
'TUWF::Response' => '/tuwf/man/response',
'TUWF::XML' => '/tuwf/man/xml',
'TUWF::Validate' => '/tuwf/man/validate',
'' => '',
}->{$page||''} // "https://metacpan.org/pod/$page";
$lnk .= '#'.(lc $sect =~ s/\s+/-/gsr) if $sect;
$lnk
}
my $data = '';
my $insyn = 0;
while (<STDIN>) {
# Improve styling of mdoc synopsis
$insyn = /^# SYNOPSIS$/ if /^#/;
if ($mdoc && $insyn) {
s/\\\[`/<span class="synopt">[`/g;
s/\\]/]<\/span>/g;
}
s/^ // if $pod && /^ /; # POD code sections have too much indentation
$data .= $_;
}
$_ = $data;
# Turn man page references into links
s{\[([^\]]+)\]\{\.Xr\}}{sprintf '[%s](%s)', $1, man2url $1}eg if $mdoc;
s{\[([^\]]+)\]\(\)\{manual="([^"]+\))"\}}{sprintf '[%s](%s)', $1, man2url $2}seg if $pod;
s{\*\*([a-zA-Z0-9-]+)\*\*\(([1-8])\)}{sprintf '[%s](%s)', "$1($2)", man2url "$1($2)"}eg if $man;
# POD references too
s{\[([^\]]+)\]\(\)\{manual="([^"\)]+)"\}}{sprintf '[%s](%s)', $1, pod2url $2}seg if $pod;
s{\[([^\]]+)\]\(\)\{manual="([^"\)]+)"\s+section="([^"]+)"\}}{sprintf '[%s](%s)', $1, pod2url $2, $3}seg if $pod;
print;

View file

@ -1,17 +0,0 @@
#!/usr/bin/perl
use v5.36;
my $insyn = 0;
while (<>) {
$insyn = /^# SYNOPSIS$/ if /^#/;
if ($insyn) {
# Improve styling of synopsis
s/\\\[`/<span class="synopt">[`/g;
s/\\]/]<\/span>/g;
} else {
# Turn man page references into links
s{\[([a-zA-Z0-9-]+)\(([1-8])\)\]\{\.Xr\}}{[$1($2)](https://manned.org/man/$1.$2)}g;
}
print;
}

View file

@ -1,55 +0,0 @@
#!/usr/bin/perl
package POD2HTML;
use Pod::Simple::XHTML;
@ISA = qw/Pod::Simple::XHTML/;
sub new {
bless shift->SUPER::new(@_), __PACKAGE__;
}
sub resolve_pod_page_link {
(undef, $page, $section) = @_;
$lnk = {
'TUWF' => '/tuwf/man',
'TUWF::DB' => '/tuwf/man/db',
'TUWF::Intro' => '/tuwf/man/intro',
'TUWF::Misc' => '/tuwf/man/misc',
'TUWF::Request' => '/tuwf/man/request',
'TUWF::Response' => '/tuwf/man/response',
'TUWF::XML' => '/tuwf/man/xml',
'TUWF::Validate' => '/tuwf/man/validate',
'' => '',
}->{$page||''} // "https://metacpan.org/pod/$page";
$lnk .= '#'.($section =~ s/ /-/gr) if $section;
$lnk
}
sub resolve_man_page_link {
(undef, $page, undef) = @_;
my $lnk = {qw{
globsterctl(1) /globster/ctl
globster-launch(1) /globster/launch
globster(1) /globster/daemon
globster-api(7) /globster/api
ncdu(1) /ncdu/man
}}->{$page||''} || ($page =~ /(.+)\((.)\)/ and "https://manned.org/$1.$2");
$lnk
}
$p = POD2HTML->new();
$html = '';
#$p->anchor_items(1); # pandoc doesn't support this :(
$p->output_string(\$html);
$p->parse_file(\*STDIN);
# Some post-processing to improve the pandoc-generated markdown
$html =~ s/^ //mg;
$html =~ s/<code> /<code>/g;
$html =~ s/<li><p>/<li>/g;
print $html;