Drop grog(1) in favor of 'groff -mandoc' + two regexes

This provides an almost 2x speedup in man page rendering time and
removes some heuristics to work around bad guesses by grog(1).

Funnily enough, this also fixes rendering of obscure man pages that
happen to use 'grap' macros; grog detected those correctly but my groff
installation doesn't actually support rendering that.

No doubt I broke rendering of other pages, will have to see.
This commit is contained in:
Yorhel 2024-04-28 14:57:31 +02:00
parent 83ab6c3671
commit ccecb28835

View file

@ -13,33 +13,6 @@ require XSLoader;
XSLoader::load('ManUtils', $VERSION);
sub _groff {
my($input, $output, $errors, $cv, @cmd) = @_;
# $MANWIDTH works by using the following groff options: -rLL=100n -rLT=100n
splice @cmd, 1, 0, qw|-Tutf8 -DUTF-8 -P-c -rLL=80n -rLT=80n|;
$input =
# Disable hyphenation, since that screws up man page references. :-(
".hy 0\n.de hy\n..\n"
# Emulate man-db's --nj option
.".na\n.de ad\n..\n"
.$input;
my $groff = run_cmd \@cmd,
'<' => \$input,
'>' => \my $fmt,
'2>' => sub { if($_[0]) { chomp(my $e = $_[0]); push @$errors, "groff: $e" } };
$groff->cb(sub {
$$output = $fmt ? decode_utf8($fmt) : '';
$$output =~ s/[\t\s\r\n]+$//;
$cv->send;
});
$cv
}
# Usage: $cv = fmt($input, \$output, \@errors)
# $cv = AnyEvent condition variable, fired when done.
# $input = UTF-8 encoded manual page source
@ -47,53 +20,34 @@ sub _groff {
# @errors = list of warnings/errors while running groff
sub fmt {
my($input, $output, $errors) = @_;
my $cv = AE::cv;
$$output = '';
@$errors = ();
$input = encode_utf8($input);
my @cmd = 'groff';
push @cmd, '-t' if $input =~ /^\.TS/m;
push @cmd, '-e' if $input =~ /^\.EQ/m;
# $MANWIDTH works by using the following groff options: -rLL=100n -rLT=100n
push @cmd, qw/-mandoc -Tutf8 -DUTF-8 -P-c -rLL=80n -rLT=80n -/;
# grog has a tendency to recognize pod2man generated pages as -ms, let's just work around that by enforcing -man
#return _groff $input, $output, $errors, $cv, 'groff', '-man' if $input =~ /^.\\" Automatically generated by Pod::Man/;
$input =
# Disable hyphenation, since that screws up man page references. :-(
".hy 0\n.de hy\n..\n"
# Emulate man-db's --nj option
.".na\n.de ad\n..\n"
.encode_utf8($input);
# Call grog to figure out which preprocessors to use.
my $grog = run_cmd [qw|grog -Tutf8 -DUTF-8 -|],
my $groff = run_cmd \@cmd,
'<' => \$input,
'>' => \my $cmd,
'2>' => sub { $_[0] && push @$errors, "grog: $_[0]" };
'>' => $output,
'2>' => sub { if($_[0]) { chomp(my $e = $_[0]); push @$errors, "groff: $e" } };
$grog->cb(sub {
chomp($cmd);
if(!$cmd || $cmd =~ /\n/) {
push @$errors, !$cmd ? 'grog failed to produce output' : "Excessive grog output: $cmd";
$cv->send;
return;
}
my $double;
@$errors = grep {
chomp;
s/^grog: grog: /grog: /;
!$double && /there are several macro packages: (.+)$/ ? ($double = $1) && 0 : 1;
} @$errors;
my @cmd = split / /, $cmd;
if($double) {
my %double = map +($_,1), split / /, $double;
# Use the first macro package in ASCIIbetical order. (This is somewhat
# arbitrary, need to find a better conflict resolution method).
my $macros = (sort keys %double)[0];
# Replace macro arguments with our selected one.
@cmd = grep !$double{$_}, @cmd;
@cmd = (@cmd[0..$#cmd-1], $macros, $cmd[$#cmd]);
push @$errors, "grog detected several macro packages: $double. Using $macros. (@cmd)";
}
@cmd = map $_ eq '-ms' ? '-man' : $_, @cmd; # -ms is almost(?) always wrong.
_groff $input, $output, $errors, $cv, @cmd;
my $cv = AE::cv;
$groff->cb(sub {
decode_utf8 $$output;
$$output =~ s/[\t\s\r\n]+$//;
$cv->send;
});
$cv;
$cv
}