diff --git a/schema.sql b/schema.sql index 785c81d..45eb069 100644 --- a/schema.sql +++ b/schema.sql @@ -14,9 +14,11 @@ CREATE TABLE contents ( -- encoding conversion and removing 0-bytes. This means taking sha1(content) -- may not necessary match the hash, and it's possible for the same content -- to be in the database under multiple hashes (but I suspect that's rare). - hash bytea NOT NULL UNIQUE, - content text NOT NULL + hash bytea NOT NULL UNIQUE, + content text NOT NULL, + html text ); +CREATE INDEX contents_nohtml ON contents (id) WHERE html IS NULL; -- Unique man page, as identified by name & section diff --git a/util/cache-html.pl b/util/cache-html.pl new file mode 100755 index 0000000..42696a9 --- /dev/null +++ b/util/cache-html.pl @@ -0,0 +1,57 @@ +#!/usr/bin/env perl + +# This script populates the HTML-rendered man page cache in the database. +# +# Usage: cache-html.pl +# +# --verbose +# Be more verbose. +# +# --delay=$SEC +# Seconds (fraction supported) to wait between rendering subsequent pages. +# +# --maxpages=$NUM +# Maximum number of pages to render before exiting. +# +# Multiple instances of this script can run in parallel in order to speed up cache generation. + +use v5.36; +use FU::Pg; +use Getopt::Long; +use Time::HiRes 'time'; +use Cwd 'abs_path'; +our $ROOT; +BEGIN { ($ROOT = abs_path $0) =~ s{/util/cache-html\.pl$}{}; } + +use lib "$ROOT/ManUtils/blib/lib", "$ROOT/ManUtils/blib/arch"; +use ManUtils; + +my $verbose = 0; +my $delay = 0; +my $maxpages = 0; +GetOptions('verbose' => \$verbose, 'delay=i' => \$delay, 'maxpages=i' => \$maxpages); + +my $conn = FU::Pg->connect($ENV{MANNED_PG}//''); + +while (1) { + my $txn = $conn->txn; + my($id, $content) = $txn->q('SELECT id, content FROM contents WHERE html IS NULL FOR UPDATE SKIP LOCKED LIMIT 1')->rowl; + last if !$id; + + my $start = time; + my $html = eval { ManUtils::html ManUtils::fmt $content }; + my $end = time; + + # Should be rare. Do save *something* in the database, so we won't get + # stuck trying this again and we can easily query for broken pages. + if (!defined $html) { + $html = '(Error rendering man page)'; + warn "$id: Error rendering page: $@\n"; + } + + $txn->q('UPDATE contents SET html = $1 WHERE id = $2', $html, $id)->exec; + $txn->commit; + printf "%10d: %5.1f ms, %d raw, %d html\n", $id, ($end-$start)*1000, length($content), length($html) if $verbose; + last if !--$maxpages; + sleep $delay if $delay; +} diff --git a/www/index.pl b/www/index.pl index dd48c38..7cd6a26 100755 --- a/www/index.pl +++ b/www/index.pl @@ -709,7 +709,7 @@ sub man_nav_($man, $url, $toc, $htmllang) { sub man_page($man, $url) { fu->set_lastmod($man->{released}); - my($hash, $content) = fu->SQL('SELECT hash, content FROM contents WHERE id =', $man->{content})->rowl; + my($hash, $content, $fmt) = fu->SQL('SELECT hash, content, html FROM contents WHERE id =', $man->{content})->rowl; if($url->{fmt} eq 'raw') { fu->set_header('content-type', 'text/plain'); fu->set_header('content-disposition', sprintf 'filename="%s.%s"', $man->{name}, $man->{section}); @@ -724,11 +724,11 @@ sub man_page($man, $url) { my $data = $content =~ s/^\.\\".*//rmg; if ($data =~ m{^\s*\.so (?:[^\s]*/)?([^\s/]+)\s*$}s) { ($follow) = man_pref_name $1, SQL 'v.id =', $man->{verid}; - $content = fu->SQL('SELECT content FROM contents WHERE id =', $follow->{content})->val if $follow; + ($content, $fmt) = fu->SQL('SELECT content, html FROM contents WHERE id =', $follow->{content})->rowh if $follow; } } - my $fmt = ManUtils::html ManUtils::fmt $content; + $fmt //= ManUtils::html ManUtils::fmt $content; if($url->{fmt} eq 'txt') { # TODO: The 'txt' format is kind of broken right now as it includes our HTML formatting codes. # This feature is a WIP and not advertised at the moment, anyway.