Large-ish SQL schema revamp/optimizations

Primarily aimed at reducing the size of the old 'man' (now: files)
table, using smaller integers to refer to man contents and text fields,
and storing a shorthash as an integer for quick lookups. This better
normalization also removes the need to keep a separate 'man_index' cache
for the search function.

The old schema wasn't necessarily bad, but I was in the mood for some
optimizations. And a little cleanup.

Prolly introduces a bunch of new bugs, I haven't tested this too well.
This commit is contained in:
Yorhel 2021-12-14 15:06:05 +01:00
parent 6f7f59c6df
commit f376f1f137
6 changed files with 268 additions and 128 deletions

View file

@ -87,7 +87,7 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
// XXX: Should we update released & arch here?
verid = res?.get(0);
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap();
Some(verid)
} else {
@ -97,13 +97,21 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
}
fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, hash: &[u8]) {
fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, content: i32) {
let (name, sect, locale) = man::parse_path(path).unwrap();
let locale = if locale == "" { None } else { Some(locale) };
if let Err(e) = tr.execute(
"INSERT INTO man (package, name, filename, locale, hash, section, encoding) VALUES ($1, $2, '/'||$3, $4, $5, $6, $7)",
&[&verid, &name, &path, &locale, &hash, &sect, &enc]
) {
let q = "WITH ms(id) AS (SELECT id FROM mans WHERE name = $2 AND section = $3),
mi(id) AS (INSERT INTO mans (name, section) SELECT $2, $3 WHERE NOT EXISTS(SELECT 1 FROM ms) RETURNING id),
m(id) AS (SELECT id FROM ms UNION SELECT id FROM mi),
ls(id) AS (SELECT id FROM locales WHERE locale = $5),
li(id) AS (INSERT INTO locales (locale) SELECT $5 WHERE NOT EXISTS(SELECT 1 FROM ls) RETURNING id),
l(id) AS (SELECT id FROM ls UNION SELECT id FROM li),
es(id) AS (SELECT id FROM encodings WHERE encoding = $6),
ei(id) AS (INSERT INTO encodings (encoding) SELECT $6 WHERE NOT EXISTS(SELECT 1 FROM es) RETURNING id),
e(id) AS (SELECT id FROM es UNION SELECT id FROM ei),
c(shorthash) AS (SELECT hash_to_shorthash(hash) FROM contents WHERE id = $4)
INSERT INTO files (pkgver, man, content, shorthash, locale, encoding, filename)
SELECT $1, m.id, $4, c.shorthash, l.id, e.id, '/'||$7 FROM m, l, e, c";
if let Err(e) = tr.execute(q, &[&verid, &name, &sect, &content, &locale, &enc, &path]) {
// I think this can only happen if archread gives us the same file twice, which really
// shouldn't happen. But I'd rather continue with an error logged than panic.
error!("Can't insert verid {} fn {}: {}", verid, path, e);
@ -123,29 +131,30 @@ fn insert_man<T: postgres::GenericClient>(tr: &mut T, verid: i32, paths: &[&str]
cont = cont.replace(0 as char, "");
}
tr.execute(
"INSERT INTO contents (hash, content) VALUES($1, $2) ON CONFLICT (hash) DO NOTHING",
&[&dig.as_ref(), &cont]
).unwrap();
let q = "WITH s(id) AS (SELECT id FROM contents WHERE hash = $1),
i(id) AS (INSERT INTO contents (hash, content) SELECT $1, $2 WHERE NOT EXISTS(SELECT 1 FROM s) RETURNING id)
SELECT id FROM s UNION SELECT id FROM i";
let id: i32 = tr.query_one(q, &[&dig.as_ref(), &cont]).unwrap().get(0);
for path in paths {
insert_man_row(tr, verid, path, enc, dig.as_ref());
insert_man_row(tr, verid, path, enc, id);
info!("Inserted man page: {} ({})", path, enc);
}
}
fn insert_link<T>(tr: &mut T, verid: i32, src: &str, dest: &str) where T: postgres::GenericClient {
let res = match tr.query_opt("SELECT hash, encoding FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap() {
let q = "SELECT f.content, e.encoding FROM files f JOIN encodings e ON e.id = f.encoding WHERE pkgver = $1 AND filename = '/'||$2";
let res = match tr.query_opt(q, &[&verid, &dest]).unwrap() {
None => { /* Can happen if man::decode() failed previously. */
error!("Link to unindexed man page: {} -> {}", src, dest);
return;
},
Some(x) => x
};
let hash: Vec<u8> = res.get(0);
let content: i32 = res.get(0);
let enc: String = res.get(1);
insert_man_row(tr, verid, src, &enc, &hash);
insert_man_row(tr, verid, src, &enc, content);
info!("Inserted man link: {} -> {}", src, dest);
}

View file

@ -7,15 +7,44 @@ CREATE TABLE systems (
short varchar NOT NULL
);
CREATE TABLE contents (
id SERIAL PRIMARY KEY,
-- 'hash' is the SHA1 of the man page file after decompression but *before*
-- encoding conversion and removing 0-bytes. This means taking sha1(content)
-- may not necessary match the hash, and it's possible for the same content
-- to be in the database under multiple hashes (but I suspect that's rare).
hash bytea PRIMARY KEY,
content varchar NOT NULL
hash bytea NOT NULL UNIQUE,
content text NOT NULL
);
-- Unique man page, as identified by name & section
CREATE TABLE mans (
id SERIAL PRIMARY KEY,
name text NOT NULL,
section text NOT NULL,
UNIQUE(name, section)
);
CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
-- List of man page locales for efficient referencing. Some locales include
-- the encoding in their name, which isn't really correct or even necessary
-- since we convert everything to UTF-8 anyway, but w/e, Can fix later.
CREATE TABLE locales (
id SMALLSERIAL PRIMARY KEY,
locale text NOT NULL UNIQUE
);
-- List of encodings for efficient referencing.
CREATE TABLE encodings (
id SMALLSERIAL PRIMARY KEY,
encoding text NOT NULL UNIQUE
);
CREATE TABLE packages (
id SERIAL PRIMARY KEY,
system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE,
@ -31,6 +60,7 @@ CREATE TABLE packages (
UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common
);
CREATE TABLE package_versions (
id SERIAL PRIMARY KEY,
package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
@ -40,43 +70,36 @@ CREATE TABLE package_versions (
UNIQUE(package, version)
);
CREATE TABLE man (
package integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE,
name varchar NOT NULL,
filename varchar NOT NULL,
locale varchar,
hash bytea NOT NULL,
section varchar NOT NULL,
encoding varchar,
UNIQUE(package, filename)
CREATE TABLE files (
pkgver integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE,
man integer NOT NULL REFERENCES mans(id),
content integer NOT NULL REFERENCES content(id),
shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
locale smallint NOT NULL REFERENCES locales(id)
-- The original encoding the man page was found in. This column isn't really
-- used at the moment, but is potentially useful when investigating encoding
-- issues.
encoding smallint NOT NULL REFERENCES encodings(id),
filename text NOT NULL,
PRIMARY KEY(pkgver, filename)
);
CREATE INDEX ON files (man, shorthash);
CREATE INDEX ON files (content);
CREATE INDEX ON man (hash);
CREATE INDEX ON man (name);
-- For stats_cache
\i util/update_indices.sql
CREATE TABLE man_index AS SELECT DISTINCT name, section FROM man;
CREATE INDEX ON man_index USING btree(lower(name) text_pattern_ops);
CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man;
-- Removes any path components and compression extensions from the filename.
CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$
SELECT regexp_replace(fn, '^.+/([^/][^/]*?)(?:\.gz|\.lzma|\.xz|\.bz2|\.zst)*$', '\1');
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$
SELECT regexp_replace(basename_from_filename($1), '^.+\.([^.]+)$', '\1');
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$
SELECT regexp_replace(basename_from_filename($1), '^(.+)\.[^.]+$', '\1');
$$ LANGUAGE SQL;
-- Interpret first 4 bytes of hash as a signed 32-bit integer.
CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
SELECT CASE WHEN get_byte(hash, 3) < 128
THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
END;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$

106
sql/update-2021-12-14.sql Normal file
View file

@ -0,0 +1,106 @@
-- WARNING: This script does a complete rewrite of the two largest tables.
-- This requires ~25G of temporary space and takes anywhere between 10 and 20 minutes.
-- The site needs to be taken down during that time.
DROP FUNCTION section_from_filename(text);
DROP FUNCTION name_from_filename(text);
DROP FUNCTION basename_from_filename(text);
-- Interpret first 4 bytes of hash as a signed 32-bit integer.
CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
SELECT CASE WHEN get_byte(hash, 3) < 128
THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
END;
$$ LANGUAGE SQL IMMUTABLE;
ALTER TABLE contents DROP CONSTRAINT contents_pkey;
ALTER TABLE contents RENAME TO contents_old;
CREATE SEQUENCE contents_id_seq AS integer;
CREATE TABLE contents (
id integer NOT NULL DEFAULT nextval('contents_id_seq'::regclass),
hash bytea NOT NULL,
content text NOT NULL
);
ALTER SEQUENCE contents_id_seq OWNED BY contents.id;
-- 4m15s; start 29.3G end 53.3G; +24G
INSERT INTO contents (hash, content) SELECT hash, content FROM contents_old;
DROP TABLE contents_old; -- back to 29.3G
ALTER TABLE contents ADD PRIMARY KEY (id);
ALTER TABLE contents ADD UNIQUE (hash);
-- 29.7G at this point
CREATE TABLE mans (
id SERIAL PRIMARY KEY,
name text NOT NULL,
section text NOT NULL,
UNIQUE(name, section)
);
INSERT INTO mans (name, section) SELECT DISTINCT name, section FROM man;
CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
CREATE TABLE locales (
id SMALLSERIAL PRIMARY KEY,
locale text NOT NULL UNIQUE
);
INSERT INTO locales (id, locale) VALUES (0,''); -- 0 for default locale is handy, I guess
INSERT INTO locales (locale) SELECT locale FROM man WHERE locale IS NOT NULL GROUP BY locale ORDER BY locale;
-- Encodings are stored for reference but are never actually used anywhere.
CREATE TABLE encodings (
id SMALLSERIAL PRIMARY KEY,
encoding text NOT NULL UNIQUE
);
INSERT INTO encodings (id, encoding) VALUES (0,'');
INSERT INTO encodings (encoding) SELECT encoding FROM man WHERE encoding IS NOT NULL GROUP BY encoding ORDER BY encoding;
-- Replaces the 'man' table; It's the largest table in terms of number of rows
-- and it's pretty frequently accessed, so keeping the rows small helps.
CREATE TABLE files (
pkgver integer NOT NULL, -- package_versions.id
man integer NOT NULL, -- mans.id
content integer NOT NULL, -- content.id
shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
locale smallint NOT NULL, -- locales.id
encoding smallint NOT NULL, -- encodings.id
filename text NOT NULL
);
-- 1min; 29.7G -> 31.8G
INSERT INTO files
SELECT o.package, m.id, c.id, hash_to_shorthash(o.hash), l.id, e.id, o.filename
FROM man o
JOIN mans m ON m.name = o.name AND m.section = o.section
JOIN contents c ON c.hash = o.hash
JOIN locales l ON l.locale = coalesce(o.locale, '')
JOIN encodings e ON e.encoding = coalesce(o.encoding, '');
-- 1min; 31.8G -> 33.7G for both indices
ALTER TABLE files ADD PRIMARY KEY (pkgver, filename);
CREATE INDEX ON files (man, shorthash);
CREATE INDEX ON files (content);
-- 20sec to verify
ALTER TABLE files
ADD CONSTRAINT files_pkgver_fkey FOREIGN KEY (pkgver) REFERENCES package_versions (id) ON DELETE CASCADE,
ADD CONSTRAINT files_man_fkey FOREIGN KEY (man) REFERENCES mans (id),
ADD CONSTRAINT files_content_fkey FOREIGN KEY (content) REFERENCES contents (id),
ADD CONSTRAINT files_locale_fkey FOREIGN KEY (locale) REFERENCES locales (id),
ADD CONSTRAINT files_encoding_fkey FOREIGN KEY (encoding) REFERENCES encodings (id);
DROP TABLE man;
DROP TABLE man_index;
-- final: 29.1G; we saved a whole 300M! \o/
-- There's only about a 100 unreferenced rows, leftovers from removals of
-- incorrectly indexed packages. Let's remove them while we're at it.
DELETE FROM contents WHERE NOT EXISTS(SELECT 1 FROM files WHERE content = id);
VACUUM ANALYZE mans, files, contents, locales, encodings;

View file

@ -10,9 +10,5 @@ PSQL="psql -U manned -Awtq"
./fedora.sh current
./ubuntu.sh current
# Only update indices once a week (on mondays). This process is slow and the data doesn't often change anyway.
if [ `date +%u` == 1 ]
then
echo "============ Updating SQL indices"
$PSQL -f update_indices.sql
fi
echo "============ Updating SQL indices"
$PSQL -f update_indices.sql

View file

@ -2,14 +2,10 @@
-- the table being replaced. The site should remain responsive while these
-- queries are run.
BEGIN;
CREATE TABLE man_index_new AS SELECT DISTINCT name, section FROM man;
CREATE INDEX ON man_index_new USING btree(lower(name) text_pattern_ops);
DROP TABLE man_index;
ALTER TABLE man_index_new RENAME TO man_index;
COMMIT;
BEGIN;
CREATE TABLE stats_cache_new AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man;
CREATE TABLE stats_cache_new AS
SELECT (SELECT count(*) FROM contents) AS hashes,
(SELECT count(distinct name) FROM mans) AS mans, *
FROM (SELECT count(*), count(distinct pkgver) FROM files) x(files, packages);
DROP TABLE stats_cache;
ALTER TABLE stats_cache_new RENAME TO stats_cache;
COMMIT;

View file

@ -73,8 +73,11 @@ sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } }
# Firefox seems to escape [ and ] in URLs. It doesn't really have to...
sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg }
sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex
sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int
# Subquery returning all packages that have a man page.
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id)))';
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))';
sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg }
@ -86,13 +89,6 @@ sub sql_join {
sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' }
sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' }
# Subquery to match $sql_expr::bytea against a $prefix (hex string). Hopefully indexable.
sub sql_hash_prefix {
my($sql_expr, $prefix) = @_;
my $esc = unpack 'H*', escape_like pack 'H*', $prefix;
sql '(', $sql_expr, "like ('\\x$esc'::bytea||'%'))"
}
sub pkg_frompath {
my($sys_where, $path) = @_;
@ -139,27 +135,29 @@ sub man_pref {
# 7. sysrel: Prefer a more recent system release over an older release
# 8. secorder: Lower sections before higher sections (because man does it this way, for some reason)
# 9. pkgdate: Prefer more recent packages (cross-distro)
# 10. Fall back on hash comparison, to ensure the result is stable
# 10. Fall back on shorthash comparison, to ensure the result is stable
state $archid = sysbyshort->{arch}{id};
state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id};
tuwf->dbRowi(q{
WITH unfiltered AS (
SELECT s AS sys, p AS pkg, v AS ver, m AS man
FROM man m
JOIN package_versions v ON v.id = m.package
SELECT m.name, m.section, l.locale, f.shorthash, f.content, f.filename, s AS sys, p AS pkg, v AS ver
FROM files f
JOIN locales l ON l.id = f.locale
JOIN mans m ON m.id = f.man
JOIN package_versions v ON v.id = f.pkgver
JOIN packages p ON p.id = v.package
JOIN systems s ON s.id = p.system
WHERE}, $where, q{
), f_english AS(
SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale)
SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale(locale)) OR is_english_locale(locale)
), f_pkgver AS(
SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released)
), f_stdloc AS(
SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename)
SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location(filename)) OR is_standard_man_location(filename)
), f_secmatch AS(
SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section =}, \$section, q{) OR (man).section =}, \$section, q{
SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE section =}, \$section, q{) OR section =}, \$section, q{
), f_arch AS(
SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid
), f_debian AS(
@ -167,13 +165,13 @@ sub man_pref {
), f_sysrel AS(
SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id)
), f_secorder AS(
SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section)
SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE section > section)
), f_pkgdate AS(
SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released)
)
SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid,
(man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash
FROM f_pkgdate ORDER BY (man).hash LIMIT 1
name, section, filename, locale, shorthash, content
FROM f_pkgdate ORDER BY shorthash LIMIT 1
});
}
@ -562,13 +560,11 @@ sub search_man {
my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : '';
my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : '';
return !$name ? [] : tuwf->dbAll(
'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?',
{
'lower(name) LIKE ?' => escape_like(lc $name).'%',
$sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (),
},
$limit
return !$name ? [] : tuwf->dbAlli(
'SELECT name, section FROM mans WHERE', sql_and(
sql('lower(name) LIKE', \(escape_like(lc $name).'%')),
$sect ? sql('section ILIKE', \(escape_like(lc $sect).'%')) : (),
), 'ORDER BY name, section LIMIT', \$limit,
);
}
@ -614,10 +610,11 @@ TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub {
my $nfo = tuwf->dbRowi('
SELECT m.name, m.section, v.released, c.content
FROM man m
JOIN package_versions v ON v.id = m.package
JOIN contents c ON c.hash = m.hash
WHERE m.name =', \$name, 'AND', sql_hash_prefix('m.hash', $hash), '
FROM files f
JOIN mans m ON m.id = f.man
JOIN package_versions v ON v.id = f.pkgver
JOIN contents c ON c.id = f.content
WHERE m.name =', \$name, 'AND f.shorthash =', \shorthash_to_int($hash), '
LIMIT 1'
);
return tuwf->resNotFound if !$nfo->{name};
@ -633,13 +630,16 @@ sub _man_nav {
my($man, $toc) = @_;
my @sect = map $_->{section}, tuwf->dbAlli(
'SELECT DISTINCT section FROM man WHERE name =', \$man->{name}, 'ORDER BY section'
'SELECT DISTINCT section FROM mans WHERE name =', \$man->{name}, 'ORDER BY section'
)->@*;
my @lang = map $_->{lang}, tuwf->dbAlli(
"SELECT DISTINCT substring(locale from '^[^.]+') AS lang
FROM man WHERE name =", \$man->{name}, 'AND section =', \$man->{section}, "
ORDER BY substring(locale from '^[^.]+') NULLS FIRST"
"SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang
FROM files f
JOIN mans m ON m.id = f.man
JOIN locales l ON l.id = f.locale
WHERE m.name =", \$man->{name}, 'AND m.section =', \$man->{section}, "
ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST"
)->@*;
return if !@sect && !@lang && !@$toc;
@ -707,7 +707,7 @@ sub soelim {
my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : ();
$man->{name}
# Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database
? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')"))
? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE id =", \$man->{content}))
: ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]"
}emg;
$src;
@ -727,12 +727,12 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
# the same package as the requested man page. Use the man_pref logic here
# to deterministically select a good package.
my($man, undef) = $shorthash
? man_pref undef, sql 'm.name =', \$name, 'AND', sql_hash_prefix 'm.hash', $shorthash
? man_pref undef, sql 'm.name =', \$name, 'AND f.shorthash =', shorthash_to_int($shorthash)
: man_pref_name $name, 'true';
return tuwf->resNotFound() if !$man->{name};
my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid},
tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')");
my $content = tuwf->dbRowi('SELECT encode(hash, \'hex\') AS hash, content FROM contents WHERE id =', \$man->{content});
my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, $content->{content};
my @toc;
$fmt =~ s{\n<b>(.+?)<\/b>\n}{
push @toc, $1;
@ -741,9 +741,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
}eg;
my $hasversions = tuwf->dbVali(
'SELECT 1 FROM man WHERE name =', \$man->{name}, 'AND section =', \$man->{section},
'AND locale IS NOT DISTINCT FROM', \$man->{locale},
'AND hash <> decode(', \$man->{hash}, ", 'hex') LIMIT 1"
'SELECT 1
FROM files f
JOIN mans m ON m.id = f.man
JOIN locales l ON l.id = f.locale
WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section}, '
AND l.locale =', \$man->{locale}, '
AND f.shorthash <> ', \shorthash_to_int($man->{shorthash}), '
LIMIT 1'
);
tuwf->resLastMod($man->{released});
@ -751,14 +756,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
_man_nav $man, \@toc;
div_ id => 'manbuttons', sub {
h1_ $man->{name};
ul_ 'data-hash' => $man->{hash},
ul_ 'data-hash' => $content->{hash},
'data-name' => $man->{name},
'data-section' => $man->{section},
'data-locale' => $man->{locale}||'',
'data-hasversions' => $hasversions?1:0,
sub {
li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8).'/src', 'source' };
li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8), 'permalink' };
li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}).'/src', 'source' };
li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'permalink' };
}
};
div_ id => 'manres', class => 'hidden', '';
@ -828,7 +833,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
SELECT id, version, released
FROM package_versions v
WHERE package =', \$pkg->{id}, '
AND EXISTS(SELECT 1 FROM man m WHERE m.package = v.id)
AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = v.id)
ORDER BY released DESC'
);
my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0];
@ -836,11 +841,14 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data;
my $count = tuwf->dbVali('SELECT count(*) FROM man WHERE package =', \$sel->{id});
my $count = tuwf->dbVali('SELECT count(*) FROM files WHERE pkgver =', \$sel->{id});
my $mans = tuwf->dbPagei({ results => 200, page => $p },
"SELECT name, encode(hash, 'hex') AS hash, section, locale, filename
FROM man WHERE package =", \$sel->{id}, '
ORDER BY name, locale NULLS FIRST, filename'
"SELECT m.name, m.section, f.shorthash, f.filename, l.locale
FROM files f
JOIN locales l ON l.id = f.locale
JOIN mans m ON m.id = f.man
WHERE f.pkgver =", \$sel->{id}, '
ORDER BY m.name, l.locale, f.filename'
);
# Latest version of this package determines last modification date of the page.
@ -871,7 +879,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p;
ul_ sub {
li_ sub {
a_ href => "/$_->{name}/".substr($_->{hash},0,8), "$_->{name}($_->{section})";
a_ href => "/$_->{name}/".shorthash_to_hex($_->{shorthash}), "$_->{name}($_->{section})";
b_ " $_->{locale}" if $_->{locale};
small_ " $_->{filename}";
} for(@$mans);
@ -922,7 +930,7 @@ TUWF::get qr{/man/([^/]+)/(.+)} => sub {
}
return tuwf->resNotFound if !$man;
tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp');
tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp');
};
@ -932,9 +940,9 @@ TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub {
my $lang = tuwf->capture(1);
my $name = normalize_name tuwf->capture(2);
my($man, undef) = man_pref_name $name,
sql "substring(locale from '^[^.]+') ilike", \escape_like $lang;
return tuwf->resNotFound if !$man->{name};
tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp');
sql "substring(l.locale from '^[^.]+') ilike", \(escape_like($lang).'%');
return tuwf->resNotFound if !length $man->{name};
tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp');
};
@ -949,22 +957,24 @@ TUWF::get '/json/tree.json' => sub {
return tuwf->resNotFound() if !$f->{hash} && !($f->{section} && $f->{name});
my $l = tuwf->dbAlli("
SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, m.filename, m.locale, encode(m.hash, 'hex') AS hash
FROM man m
JOIN package_versions v ON v.id = m.package
SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, f.filename, f.shorthash, l.locale
FROM files f
JOIN locales l ON l.id = f.locale
JOIN mans m ON m.id = f.man
JOIN package_versions v ON v.id = f.pkgver
JOIN packages p ON p.id = v.package
JOIN systems s ON s.id = p.system
WHERE", sql_and(
length $f->{hash} ? sql 'm.hash = decode(', \$f->{hash}, ", 'hex')" : (),
length $f->{hash} ? sql 'f.content = (SELECT id FROM contents WHERE hash = decode(', \$f->{hash}, ", 'hex'))" : (),
length $f->{name} ? sql 'm.name =', \$f->{name} : (),
length $f->{section} ? sql 'm.section =', \$f->{section} : (),
length $f->{locale} ? sql 'm.locale =', \$f->{locale} : (),
defined $f->{locale} && $f->{locale} eq '' ? 'm.locale IS NULL' : (),
defined $f->{locale} ? sql 'l.locale =', \$f->{locale} : (),
), '
ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, m.locale NULLS FIRST, m.filename
ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, l.locale, f.filename
');
# Convert the list into a tree
my $cur = $f->{cur} ? shorthash_to_int substr $f->{cur}, 0, 8 : 0;
my $tree = [];
my($sys, $sysver, $pkg, $pkgver);
for my $m (@$l) {
@ -992,12 +1002,12 @@ TUWF::get '/json/tree.json' => sub {
$pkgver && $pkgver eq $m->{version} ? {name=>''} :
{name => $m->{version}, href => "/pkg/".sysbyid->{$m->{system}}{short}."/$m->{category}/$m->{package}/$m->{version}"},
{ name => "$m->{name}($m->{section})",
$f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? ()
: (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8))
$f->{hash} || $cur == $m->{shorthash} ? ()
: (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash}))
},
{ name => substr($m->{hash}, 0, 8),
$f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? ()
: (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8))
{ name => shorthash_to_hex($m->{shorthash}),
$f->{hash} || $cur == $m->{shorthash} ? ()
: (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash}))
},
{ name => $m->{filename} }
];