manned/lib/ManUtils/ManUtils.pm
Yorhel 6114b17389 Experimental rewrite of grotty to html conversion in Rust
The previous C code was troublesome.
- Didn't handle long lines
- I couldn't convince myself that it was free of memory safety issues
- Needed improving anyway, there are some formatting bugs. These are
  hard to fix in the current code.

I mostly replicated the formatting bugs of the old C implementation in
Rust, and possibly added a few new bugs as well. It's not a significant
improvement right now, more testing and fixing will be needed.

The performance of both implementations is comparable, with the Rust
version being slightly faster in many cases (and slower in some others).
I did spend more time trying to optimize this Rust version than I did
with the old C code. I initially tried a naive-ish conversion of the C
code to Rust, but that turned out to be much slower and I had to resort
to using regexes and different data structures fix that.
2017-01-15 12:17:34 +01:00

104 lines
2.8 KiB
Perl

package ManUtils;
use strict;
use warnings;
use AE;
use AnyEvent::Util;
use Encode 'decode_utf8', 'encode_utf8';
our $VERSION = '0.01';
require XSLoader;
XSLoader::load('ManUtils', $VERSION);
# Usage: $cv = fmt($input, \$output, \@errors)
# $cv = AnyEvent condition variable, fired when done.
# $input = UTF-8 encoded manual page source
# $output = variable that will hold the output when done
# @errors = list of warnings/errors while running groff
sub fmt {
my($input, $output, $errors) = @_;
my $cv = AE::cv;
$$output = '';
@$errors = ();
# tix comes with[1] a custom(?) macro package. But it looks okay even without
# loading that.
# [1] It actually doesn't, the tcllib package appears to have that file, but
# doesn't '.so' it.
$input =~ s/^\.so man.macros$//mg;
# Other .so's should be handled by html()
$input =~ s/^\.so (.+)$/.in -10\n.sp\n\[\[\[MANNEDINCLUDE$1\]\]\]/mg;
$input =
# Disable hyphenation, since that screws up man page references. :-(
".hy 0\n.de hy\n..\n"
# Emulate man-db's --nj option
.".na\n.de ad\n..\n"
.$input;
$input = encode_utf8($input);
# Call grog to figure out which preprocessors to use.
# $MANWIDTH works by using the following groff options: -rLL=100n -rLT=100n
my $grog = run_cmd [qw|grog -Tutf8 -P-c -DUTF-8 -rLL=80n -rLT=80n -|],
'<' => \$input,
'>' => \my $cmd,
'2>' => sub { $_[0] && push @$errors, "grog: $_[0]" };
$grog->cb(sub {
chomp($cmd);
if(!$cmd || $cmd =~ /\n/) {
push @$errors, !$cmd ? 'grog failed to produce output' : "Excessive grog output: $cmd";
$cv->send;
return;
}
my $double;
@$errors = grep {
chomp;
s/^grog: grog: /grog: /;
!$double && /there are several macro packages: (.+)$/ ? ($double = $1) && 0 : 1;
} @$errors;
my @cmd = split / /, $cmd;
if($double) {
my %double = map +($_,1), split / /, $double;
# Use the first macro package in ASCIIbetical order. (This is somewhat
# arbitrary, need to find a better conflict resolution method).
my $macros = (sort keys %double)[0];
# Replace macro arguments with our selected one.
@cmd = grep !$double{$_}, @cmd;
@cmd = (@cmd[0..$#cmd-1], $macros, $cmd[$#cmd]);
push @$errors, "grog detected several macro packages: $double. Using $macros. (@cmd)";
}
my $groff = run_cmd \@cmd,
'<' => \$input,
'>' => \my $fmt,
'2>' => sub { if($_[0]) { chomp(my $e = $_[0]); push @$errors, "groff: $e" } };
$groff->cb(sub {
$$output = $fmt ? decode_utf8($fmt) : '';
$$output =~ s/[\t\s\r\n]+$//;
$cv->send;
});
});
$cv;
}
# Blocking version of fmt(). Returns the formatted man page, errors are
# forwarded to warn().
sub fmt_block {
my $c = shift;
my $cv = fmt $c, \my $out, \my @err;
$cv->recv;
#warn "$_\n" for @err;
$out;
}
1;