Large-ish SQL schema revamp/optimizations
Primarily aimed at reducing the size of the old 'man' (now: files) table, using smaller integers to refer to man contents and text fields, and storing a shorthash as an integer for quick lookups. This better normalization also removes the need to keep a separate 'man_index' cache for the search function. The old schema wasn't necessarily bad, but I was in the mood for some optimizations. And a little cleanup. Prolly introduces a bunch of new bugs, I haven't tested this too well.
This commit is contained in:
parent
6f7f59c6df
commit
f376f1f137
6 changed files with 268 additions and 128 deletions
140
www/index.pl
140
www/index.pl
|
|
@ -73,8 +73,11 @@ sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } }
|
|||
# Firefox seems to escape [ and ] in URLs. It doesn't really have to...
|
||||
sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg }
|
||||
|
||||
sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex
|
||||
sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int
|
||||
|
||||
# Subquery returning all packages that have a man page.
|
||||
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id)))';
|
||||
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))';
|
||||
|
||||
sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg }
|
||||
|
||||
|
|
@ -86,13 +89,6 @@ sub sql_join {
|
|||
sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' }
|
||||
sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' }
|
||||
|
||||
# Subquery to match $sql_expr::bytea against a $prefix (hex string). Hopefully indexable.
|
||||
sub sql_hash_prefix {
|
||||
my($sql_expr, $prefix) = @_;
|
||||
my $esc = unpack 'H*', escape_like pack 'H*', $prefix;
|
||||
sql '(', $sql_expr, "like ('\\x$esc'::bytea||'%'))"
|
||||
}
|
||||
|
||||
|
||||
sub pkg_frompath {
|
||||
my($sys_where, $path) = @_;
|
||||
|
|
@ -139,27 +135,29 @@ sub man_pref {
|
|||
# 7. sysrel: Prefer a more recent system release over an older release
|
||||
# 8. secorder: Lower sections before higher sections (because man does it this way, for some reason)
|
||||
# 9. pkgdate: Prefer more recent packages (cross-distro)
|
||||
# 10. Fall back on hash comparison, to ensure the result is stable
|
||||
# 10. Fall back on shorthash comparison, to ensure the result is stable
|
||||
|
||||
state $archid = sysbyshort->{arch}{id};
|
||||
state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id};
|
||||
|
||||
tuwf->dbRowi(q{
|
||||
WITH unfiltered AS (
|
||||
SELECT s AS sys, p AS pkg, v AS ver, m AS man
|
||||
FROM man m
|
||||
JOIN package_versions v ON v.id = m.package
|
||||
SELECT m.name, m.section, l.locale, f.shorthash, f.content, f.filename, s AS sys, p AS pkg, v AS ver
|
||||
FROM files f
|
||||
JOIN locales l ON l.id = f.locale
|
||||
JOIN mans m ON m.id = f.man
|
||||
JOIN package_versions v ON v.id = f.pkgver
|
||||
JOIN packages p ON p.id = v.package
|
||||
JOIN systems s ON s.id = p.system
|
||||
WHERE}, $where, q{
|
||||
), f_english AS(
|
||||
SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale)
|
||||
SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale(locale)) OR is_english_locale(locale)
|
||||
), f_pkgver AS(
|
||||
SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released)
|
||||
), f_stdloc AS(
|
||||
SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename)
|
||||
SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location(filename)) OR is_standard_man_location(filename)
|
||||
), f_secmatch AS(
|
||||
SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section =}, \$section, q{) OR (man).section =}, \$section, q{
|
||||
SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE section =}, \$section, q{) OR section =}, \$section, q{
|
||||
), f_arch AS(
|
||||
SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid
|
||||
), f_debian AS(
|
||||
|
|
@ -167,13 +165,13 @@ sub man_pref {
|
|||
), f_sysrel AS(
|
||||
SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id)
|
||||
), f_secorder AS(
|
||||
SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section)
|
||||
SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE section > section)
|
||||
), f_pkgdate AS(
|
||||
SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released)
|
||||
)
|
||||
SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid,
|
||||
(man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash
|
||||
FROM f_pkgdate ORDER BY (man).hash LIMIT 1
|
||||
name, section, filename, locale, shorthash, content
|
||||
FROM f_pkgdate ORDER BY shorthash LIMIT 1
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -562,13 +560,11 @@ sub search_man {
|
|||
my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : '';
|
||||
my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : '';
|
||||
|
||||
return !$name ? [] : tuwf->dbAll(
|
||||
'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?',
|
||||
{
|
||||
'lower(name) LIKE ?' => escape_like(lc $name).'%',
|
||||
$sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (),
|
||||
},
|
||||
$limit
|
||||
return !$name ? [] : tuwf->dbAlli(
|
||||
'SELECT name, section FROM mans WHERE', sql_and(
|
||||
sql('lower(name) LIKE', \(escape_like(lc $name).'%')),
|
||||
$sect ? sql('section ILIKE', \(escape_like(lc $sect).'%')) : (),
|
||||
), 'ORDER BY name, section LIMIT', \$limit,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -614,10 +610,11 @@ TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub {
|
|||
|
||||
my $nfo = tuwf->dbRowi('
|
||||
SELECT m.name, m.section, v.released, c.content
|
||||
FROM man m
|
||||
JOIN package_versions v ON v.id = m.package
|
||||
JOIN contents c ON c.hash = m.hash
|
||||
WHERE m.name =', \$name, 'AND', sql_hash_prefix('m.hash', $hash), '
|
||||
FROM files f
|
||||
JOIN mans m ON m.id = f.man
|
||||
JOIN package_versions v ON v.id = f.pkgver
|
||||
JOIN contents c ON c.id = f.content
|
||||
WHERE m.name =', \$name, 'AND f.shorthash =', \shorthash_to_int($hash), '
|
||||
LIMIT 1'
|
||||
);
|
||||
return tuwf->resNotFound if !$nfo->{name};
|
||||
|
|
@ -633,13 +630,16 @@ sub _man_nav {
|
|||
my($man, $toc) = @_;
|
||||
|
||||
my @sect = map $_->{section}, tuwf->dbAlli(
|
||||
'SELECT DISTINCT section FROM man WHERE name =', \$man->{name}, 'ORDER BY section'
|
||||
'SELECT DISTINCT section FROM mans WHERE name =', \$man->{name}, 'ORDER BY section'
|
||||
)->@*;
|
||||
|
||||
my @lang = map $_->{lang}, tuwf->dbAlli(
|
||||
"SELECT DISTINCT substring(locale from '^[^.]+') AS lang
|
||||
FROM man WHERE name =", \$man->{name}, 'AND section =', \$man->{section}, "
|
||||
ORDER BY substring(locale from '^[^.]+') NULLS FIRST"
|
||||
"SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang
|
||||
FROM files f
|
||||
JOIN mans m ON m.id = f.man
|
||||
JOIN locales l ON l.id = f.locale
|
||||
WHERE m.name =", \$man->{name}, 'AND m.section =', \$man->{section}, "
|
||||
ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST"
|
||||
)->@*;
|
||||
return if !@sect && !@lang && !@$toc;
|
||||
|
||||
|
|
@ -707,7 +707,7 @@ sub soelim {
|
|||
my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : ();
|
||||
$man->{name}
|
||||
# Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database
|
||||
? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')"))
|
||||
? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE id =", \$man->{content}))
|
||||
: ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]"
|
||||
}emg;
|
||||
$src;
|
||||
|
|
@ -727,12 +727,12 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
|
|||
# the same package as the requested man page. Use the man_pref logic here
|
||||
# to deterministically select a good package.
|
||||
my($man, undef) = $shorthash
|
||||
? man_pref undef, sql 'm.name =', \$name, 'AND', sql_hash_prefix 'm.hash', $shorthash
|
||||
? man_pref undef, sql 'm.name =', \$name, 'AND f.shorthash =', shorthash_to_int($shorthash)
|
||||
: man_pref_name $name, 'true';
|
||||
return tuwf->resNotFound() if !$man->{name};
|
||||
|
||||
my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid},
|
||||
tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')");
|
||||
my $content = tuwf->dbRowi('SELECT encode(hash, \'hex\') AS hash, content FROM contents WHERE id =', \$man->{content});
|
||||
my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, $content->{content};
|
||||
my @toc;
|
||||
$fmt =~ s{\n<b>(.+?)<\/b>\n}{
|
||||
push @toc, $1;
|
||||
|
|
@ -741,9 +741,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
|
|||
}eg;
|
||||
|
||||
my $hasversions = tuwf->dbVali(
|
||||
'SELECT 1 FROM man WHERE name =', \$man->{name}, 'AND section =', \$man->{section},
|
||||
'AND locale IS NOT DISTINCT FROM', \$man->{locale},
|
||||
'AND hash <> decode(', \$man->{hash}, ", 'hex') LIMIT 1"
|
||||
'SELECT 1
|
||||
FROM files f
|
||||
JOIN mans m ON m.id = f.man
|
||||
JOIN locales l ON l.id = f.locale
|
||||
WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section}, '
|
||||
AND l.locale =', \$man->{locale}, '
|
||||
AND f.shorthash <> ', \shorthash_to_int($man->{shorthash}), '
|
||||
LIMIT 1'
|
||||
);
|
||||
|
||||
tuwf->resLastMod($man->{released});
|
||||
|
|
@ -751,14 +756,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
|
|||
_man_nav $man, \@toc;
|
||||
div_ id => 'manbuttons', sub {
|
||||
h1_ $man->{name};
|
||||
ul_ 'data-hash' => $man->{hash},
|
||||
ul_ 'data-hash' => $content->{hash},
|
||||
'data-name' => $man->{name},
|
||||
'data-section' => $man->{section},
|
||||
'data-locale' => $man->{locale}||'',
|
||||
'data-hasversions' => $hasversions?1:0,
|
||||
sub {
|
||||
li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8).'/src', 'source' };
|
||||
li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8), 'permalink' };
|
||||
li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}).'/src', 'source' };
|
||||
li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'permalink' };
|
||||
}
|
||||
};
|
||||
div_ id => 'manres', class => 'hidden', '';
|
||||
|
|
@ -828,7 +833,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
|
|||
SELECT id, version, released
|
||||
FROM package_versions v
|
||||
WHERE package =', \$pkg->{id}, '
|
||||
AND EXISTS(SELECT 1 FROM man m WHERE m.package = v.id)
|
||||
AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = v.id)
|
||||
ORDER BY released DESC'
|
||||
);
|
||||
my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0];
|
||||
|
|
@ -836,11 +841,14 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
|
|||
|
||||
my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data;
|
||||
|
||||
my $count = tuwf->dbVali('SELECT count(*) FROM man WHERE package =', \$sel->{id});
|
||||
my $count = tuwf->dbVali('SELECT count(*) FROM files WHERE pkgver =', \$sel->{id});
|
||||
my $mans = tuwf->dbPagei({ results => 200, page => $p },
|
||||
"SELECT name, encode(hash, 'hex') AS hash, section, locale, filename
|
||||
FROM man WHERE package =", \$sel->{id}, '
|
||||
ORDER BY name, locale NULLS FIRST, filename'
|
||||
"SELECT m.name, m.section, f.shorthash, f.filename, l.locale
|
||||
FROM files f
|
||||
JOIN locales l ON l.id = f.locale
|
||||
JOIN mans m ON m.id = f.man
|
||||
WHERE f.pkgver =", \$sel->{id}, '
|
||||
ORDER BY m.name, l.locale, f.filename'
|
||||
);
|
||||
|
||||
# Latest version of this package determines last modification date of the page.
|
||||
|
|
@ -871,7 +879,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
|
|||
paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p;
|
||||
ul_ sub {
|
||||
li_ sub {
|
||||
a_ href => "/$_->{name}/".substr($_->{hash},0,8), "$_->{name}($_->{section})";
|
||||
a_ href => "/$_->{name}/".shorthash_to_hex($_->{shorthash}), "$_->{name}($_->{section})";
|
||||
b_ " $_->{locale}" if $_->{locale};
|
||||
small_ " $_->{filename}";
|
||||
} for(@$mans);
|
||||
|
|
@ -922,7 +930,7 @@ TUWF::get qr{/man/([^/]+)/(.+)} => sub {
|
|||
}
|
||||
return tuwf->resNotFound if !$man;
|
||||
|
||||
tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp');
|
||||
tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp');
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -932,9 +940,9 @@ TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub {
|
|||
my $lang = tuwf->capture(1);
|
||||
my $name = normalize_name tuwf->capture(2);
|
||||
my($man, undef) = man_pref_name $name,
|
||||
sql "substring(locale from '^[^.]+') ilike", \escape_like $lang;
|
||||
return tuwf->resNotFound if !$man->{name};
|
||||
tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp');
|
||||
sql "substring(l.locale from '^[^.]+') ilike", \(escape_like($lang).'%');
|
||||
return tuwf->resNotFound if !length $man->{name};
|
||||
tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp');
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -949,22 +957,24 @@ TUWF::get '/json/tree.json' => sub {
|
|||
return tuwf->resNotFound() if !$f->{hash} && !($f->{section} && $f->{name});
|
||||
|
||||
my $l = tuwf->dbAlli("
|
||||
SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, m.filename, m.locale, encode(m.hash, 'hex') AS hash
|
||||
FROM man m
|
||||
JOIN package_versions v ON v.id = m.package
|
||||
SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, f.filename, f.shorthash, l.locale
|
||||
FROM files f
|
||||
JOIN locales l ON l.id = f.locale
|
||||
JOIN mans m ON m.id = f.man
|
||||
JOIN package_versions v ON v.id = f.pkgver
|
||||
JOIN packages p ON p.id = v.package
|
||||
JOIN systems s ON s.id = p.system
|
||||
WHERE", sql_and(
|
||||
length $f->{hash} ? sql 'm.hash = decode(', \$f->{hash}, ", 'hex')" : (),
|
||||
length $f->{hash} ? sql 'f.content = (SELECT id FROM contents WHERE hash = decode(', \$f->{hash}, ", 'hex'))" : (),
|
||||
length $f->{name} ? sql 'm.name =', \$f->{name} : (),
|
||||
length $f->{section} ? sql 'm.section =', \$f->{section} : (),
|
||||
length $f->{locale} ? sql 'm.locale =', \$f->{locale} : (),
|
||||
defined $f->{locale} && $f->{locale} eq '' ? 'm.locale IS NULL' : (),
|
||||
defined $f->{locale} ? sql 'l.locale =', \$f->{locale} : (),
|
||||
), '
|
||||
ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, m.locale NULLS FIRST, m.filename
|
||||
ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, l.locale, f.filename
|
||||
');
|
||||
|
||||
# Convert the list into a tree
|
||||
my $cur = $f->{cur} ? shorthash_to_int substr $f->{cur}, 0, 8 : 0;
|
||||
my $tree = [];
|
||||
my($sys, $sysver, $pkg, $pkgver);
|
||||
for my $m (@$l) {
|
||||
|
|
@ -992,12 +1002,12 @@ TUWF::get '/json/tree.json' => sub {
|
|||
$pkgver && $pkgver eq $m->{version} ? {name=>''} :
|
||||
{name => $m->{version}, href => "/pkg/".sysbyid->{$m->{system}}{short}."/$m->{category}/$m->{package}/$m->{version}"},
|
||||
{ name => "$m->{name}($m->{section})",
|
||||
$f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? ()
|
||||
: (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8))
|
||||
$f->{hash} || $cur == $m->{shorthash} ? ()
|
||||
: (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash}))
|
||||
},
|
||||
{ name => substr($m->{hash}, 0, 8),
|
||||
$f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? ()
|
||||
: (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8))
|
||||
{ name => shorthash_to_hex($m->{shorthash}),
|
||||
$f->{hash} || $cur == $m->{shorthash} ? ()
|
||||
: (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash}))
|
||||
},
|
||||
{ name => $m->{filename} }
|
||||
];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue