Add support for caching HTML-rendered pages

Downside is that this consumes significant disk space, requires
recreating the entire cache when changing something to the way that
pages are rendered and removes flexibility to add dynamic
render-influencing settings in the future.

Alas, crawlers are getting more aggressive and I don't like the idea of
adding more invasive anti-bot tech.
This might not be enough in the long term, we also have a few slow SQL
queries that I'm not yet sure how to optimize. But this ought to give us
more time, at least.
This commit is contained in:
Yorhel 2025-05-25 14:41:00 +02:00
parent 8edb226a18
commit d3bebc8888
3 changed files with 64 additions and 5 deletions

View file

@ -709,7 +709,7 @@ sub man_nav_($man, $url, $toc, $htmllang) {
sub man_page($man, $url) {
fu->set_lastmod($man->{released});
my($hash, $content) = fu->SQL('SELECT hash, content FROM contents WHERE id =', $man->{content})->rowl;
my($hash, $content, $fmt) = fu->SQL('SELECT hash, content, html FROM contents WHERE id =', $man->{content})->rowl;
if($url->{fmt} eq 'raw') {
fu->set_header('content-type', 'text/plain');
fu->set_header('content-disposition', sprintf 'filename="%s.%s"', $man->{name}, $man->{section});
@ -724,11 +724,11 @@ sub man_page($man, $url) {
my $data = $content =~ s/^\.\\".*//rmg;
if ($data =~ m{^\s*\.so (?:[^\s]*/)?([^\s/]+)\s*$}s) {
($follow) = man_pref_name $1, SQL 'v.id =', $man->{verid};
$content = fu->SQL('SELECT content FROM contents WHERE id =', $follow->{content})->val if $follow;
($content, $fmt) = fu->SQL('SELECT content, html FROM contents WHERE id =', $follow->{content})->rowh if $follow;
}
}
my $fmt = ManUtils::html ManUtils::fmt $content;
$fmt //= ManUtils::html ManUtils::fmt $content;
if($url->{fmt} eq 'txt') {
# TODO: The 'txt' format is kind of broken right now as it includes our HTML formatting codes.
# This feature is a WIP and not advertised at the moment, anyway.