cache-html.pl: Now with transaction batching support

Because I'm trying to seed the cache from another machine and this
significantly reduces the effects of network latency.
This commit is contained in:
Yorhel 2025-05-25 15:13:42 +02:00
parent d3bebc8888
commit bed455039e

View file

@ -8,10 +8,13 @@
# Be more verbose.
#
# --delay=$SEC
# Seconds (fraction supported) to wait between rendering subsequent pages.
# Seconds (fraction supported) to wait between batches.
#
# --maxpages=$NUM
# Maximum number of pages to render before exiting.
# --batch=$NUM
# Number of pages to render per batch.
#
# --maxbatches=$NUM
# Maximum number of batches to run before exiting.
#
# Multiple instances of this script can run in parallel in order to speed up cache generation.
@ -28,30 +31,39 @@ use ManUtils;
my $verbose = 0;
my $delay = 0;
my $maxpages = 0;
GetOptions('verbose' => \$verbose, 'delay=i' => \$delay, 'maxpages=i' => \$maxpages);
my $batch = 1;
my $maxbatches = 0;
GetOptions('verbose' => \$verbose, 'delay=i' => \$delay, 'batch=i' => \$batch, 'maxbatches=i' => \$maxbatches);
my $conn = FU::Pg->connect($ENV{MANNED_PG}//'');
while (1) {
my $txn = $conn->txn;
my($id, $content) = $txn->q('SELECT id, content FROM contents WHERE html IS NULL FOR UPDATE SKIP LOCKED LIMIT 1')->rowl;
last if !$id;
my $start = time;
my $html = eval { ManUtils::html ManUtils::fmt $content };
my $end = time;
my $lst = $txn->q('SELECT id, content FROM contents WHERE html IS NULL FOR UPDATE SKIP LOCKED LIMIT $1', $batch)->flat;
last if !@$lst;
# Should be rare. Do save *something* in the database, so we won't get
# stuck trying this again and we can easily query for broken pages.
if (!defined $html) {
$html = '(Error rendering man page)';
warn "$id: Error rendering page: $@\n";
my @save;
for my($id, $content) (@$lst) {
my $start = time;
my $html = eval { ManUtils::html ManUtils::fmt $content };
my $end = time;
# Should be rare. Do save *something* in the database, so we won't get
# stuck trying this again and we can easily query for broken pages.
if (!defined $html) {
$html = '(Error rendering man page)';
warn "$id: Error rendering page: $@\n";
}
printf "%10d: %5.1f ms, %d raw, %d html\n", $id, ($end-$start)*1000, length($content), length($html) if $verbose;
push @save, $id, $html;
}
$txn->q('UPDATE contents SET html = $1 WHERE id = $2', $html, $id)->exec;
$txn->q('UPDATE contents SET html = nhtml FROM (VALUES '.
join(', ', map sprintf('($%d::int, $%d)', $_*2-1, $_*2), 1..@save/2).
') d(nid, nhtml) WHERE id = nid', @save
)->exec;
$txn->commit;
printf "%10d: %5.1f ms, %d raw, %d html\n", $id, ($end-$start)*1000, length($content), length($html) if $verbose;
last if !--$maxpages;
last if !--$maxbatches;
sleep $delay if $delay;
}