Large-ish SQL schema revamp/optimizations

Primarily aimed at reducing the size of the old 'man' (now: files)
table, using smaller integers to refer to man contents and text fields,
and storing a shorthash as an integer for quick lookups. This better
normalization also removes the need to keep a separate 'man_index' cache
for the search function.

The old schema wasn't necessarily bad, but I was in the mood for some
optimizations. And a little cleanup.

Prolly introduces a bunch of new bugs, I haven't tested this too well.
This commit is contained in:
Yorhel 2021-12-14 15:06:05 +01:00
parent 6f7f59c6df
commit f376f1f137
6 changed files with 268 additions and 128 deletions

View file

@ -87,7 +87,7 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
// XXX: Should we update released & arch here? // XXX: Should we update released & arch here?
verid = res?.get(0); verid = res?.get(0);
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo); info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap(); tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap();
Some(verid) Some(verid)
} else { } else {
@ -97,13 +97,21 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
} }
fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, hash: &[u8]) { fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, content: i32) {
let (name, sect, locale) = man::parse_path(path).unwrap(); let (name, sect, locale) = man::parse_path(path).unwrap();
let locale = if locale == "" { None } else { Some(locale) }; let q = "WITH ms(id) AS (SELECT id FROM mans WHERE name = $2 AND section = $3),
if let Err(e) = tr.execute( mi(id) AS (INSERT INTO mans (name, section) SELECT $2, $3 WHERE NOT EXISTS(SELECT 1 FROM ms) RETURNING id),
"INSERT INTO man (package, name, filename, locale, hash, section, encoding) VALUES ($1, $2, '/'||$3, $4, $5, $6, $7)", m(id) AS (SELECT id FROM ms UNION SELECT id FROM mi),
&[&verid, &name, &path, &locale, &hash, &sect, &enc] ls(id) AS (SELECT id FROM locales WHERE locale = $5),
) { li(id) AS (INSERT INTO locales (locale) SELECT $5 WHERE NOT EXISTS(SELECT 1 FROM ls) RETURNING id),
l(id) AS (SELECT id FROM ls UNION SELECT id FROM li),
es(id) AS (SELECT id FROM encodings WHERE encoding = $6),
ei(id) AS (INSERT INTO encodings (encoding) SELECT $6 WHERE NOT EXISTS(SELECT 1 FROM es) RETURNING id),
e(id) AS (SELECT id FROM es UNION SELECT id FROM ei),
c(shorthash) AS (SELECT hash_to_shorthash(hash) FROM contents WHERE id = $4)
INSERT INTO files (pkgver, man, content, shorthash, locale, encoding, filename)
SELECT $1, m.id, $4, c.shorthash, l.id, e.id, '/'||$7 FROM m, l, e, c";
if let Err(e) = tr.execute(q, &[&verid, &name, &sect, &content, &locale, &enc, &path]) {
// I think this can only happen if archread gives us the same file twice, which really // I think this can only happen if archread gives us the same file twice, which really
// shouldn't happen. But I'd rather continue with an error logged than panic. // shouldn't happen. But I'd rather continue with an error logged than panic.
error!("Can't insert verid {} fn {}: {}", verid, path, e); error!("Can't insert verid {} fn {}: {}", verid, path, e);
@ -123,29 +131,30 @@ fn insert_man<T: postgres::GenericClient>(tr: &mut T, verid: i32, paths: &[&str]
cont = cont.replace(0 as char, ""); cont = cont.replace(0 as char, "");
} }
tr.execute( let q = "WITH s(id) AS (SELECT id FROM contents WHERE hash = $1),
"INSERT INTO contents (hash, content) VALUES($1, $2) ON CONFLICT (hash) DO NOTHING", i(id) AS (INSERT INTO contents (hash, content) SELECT $1, $2 WHERE NOT EXISTS(SELECT 1 FROM s) RETURNING id)
&[&dig.as_ref(), &cont] SELECT id FROM s UNION SELECT id FROM i";
).unwrap(); let id: i32 = tr.query_one(q, &[&dig.as_ref(), &cont]).unwrap().get(0);
for path in paths { for path in paths {
insert_man_row(tr, verid, path, enc, dig.as_ref()); insert_man_row(tr, verid, path, enc, id);
info!("Inserted man page: {} ({})", path, enc); info!("Inserted man page: {} ({})", path, enc);
} }
} }
fn insert_link<T>(tr: &mut T, verid: i32, src: &str, dest: &str) where T: postgres::GenericClient { fn insert_link<T>(tr: &mut T, verid: i32, src: &str, dest: &str) where T: postgres::GenericClient {
let res = match tr.query_opt("SELECT hash, encoding FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap() { let q = "SELECT f.content, e.encoding FROM files f JOIN encodings e ON e.id = f.encoding WHERE pkgver = $1 AND filename = '/'||$2";
let res = match tr.query_opt(q, &[&verid, &dest]).unwrap() {
None => { /* Can happen if man::decode() failed previously. */ None => { /* Can happen if man::decode() failed previously. */
error!("Link to unindexed man page: {} -> {}", src, dest); error!("Link to unindexed man page: {} -> {}", src, dest);
return; return;
}, },
Some(x) => x Some(x) => x
}; };
let hash: Vec<u8> = res.get(0); let content: i32 = res.get(0);
let enc: String = res.get(1); let enc: String = res.get(1);
insert_man_row(tr, verid, src, &enc, &hash); insert_man_row(tr, verid, src, &enc, content);
info!("Inserted man link: {} -> {}", src, dest); info!("Inserted man link: {} -> {}", src, dest);
} }

View file

@ -7,15 +7,44 @@ CREATE TABLE systems (
short varchar NOT NULL short varchar NOT NULL
); );
CREATE TABLE contents ( CREATE TABLE contents (
id SERIAL PRIMARY KEY,
-- 'hash' is the SHA1 of the man page file after decompression but *before* -- 'hash' is the SHA1 of the man page file after decompression but *before*
-- encoding conversion and removing 0-bytes. This means taking sha1(content) -- encoding conversion and removing 0-bytes. This means taking sha1(content)
-- may not necessary match the hash, and it's possible for the same content -- may not necessary match the hash, and it's possible for the same content
-- to be in the database under multiple hashes (but I suspect that's rare). -- to be in the database under multiple hashes (but I suspect that's rare).
hash bytea PRIMARY KEY, hash bytea NOT NULL UNIQUE,
content varchar NOT NULL content text NOT NULL
); );
-- Unique man page, as identified by name & section
CREATE TABLE mans (
id SERIAL PRIMARY KEY,
name text NOT NULL,
section text NOT NULL,
UNIQUE(name, section)
);
CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
-- List of man page locales for efficient referencing. Some locales include
-- the encoding in their name, which isn't really correct or even necessary
-- since we convert everything to UTF-8 anyway, but w/e, Can fix later.
CREATE TABLE locales (
id SMALLSERIAL PRIMARY KEY,
locale text NOT NULL UNIQUE
);
-- List of encodings for efficient referencing.
CREATE TABLE encodings (
id SMALLSERIAL PRIMARY KEY,
encoding text NOT NULL UNIQUE
);
CREATE TABLE packages ( CREATE TABLE packages (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE, system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE,
@ -31,6 +60,7 @@ CREATE TABLE packages (
UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common
); );
CREATE TABLE package_versions ( CREATE TABLE package_versions (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE, package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
@ -40,43 +70,36 @@ CREATE TABLE package_versions (
UNIQUE(package, version) UNIQUE(package, version)
); );
CREATE TABLE man (
package integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE, CREATE TABLE files (
name varchar NOT NULL, pkgver integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE,
filename varchar NOT NULL, man integer NOT NULL REFERENCES mans(id),
locale varchar, content integer NOT NULL REFERENCES content(id),
hash bytea NOT NULL, shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
section varchar NOT NULL, locale smallint NOT NULL REFERENCES locales(id)
encoding varchar, -- The original encoding the man page was found in. This column isn't really
UNIQUE(package, filename) -- used at the moment, but is potentially useful when investigating encoding
-- issues.
encoding smallint NOT NULL REFERENCES encodings(id),
filename text NOT NULL,
PRIMARY KEY(pkgver, filename)
); );
CREATE INDEX ON files (man, shorthash);
CREATE INDEX ON files (content);
CREATE INDEX ON man (hash);
CREATE INDEX ON man (name); -- For stats_cache
\i util/update_indices.sql
CREATE TABLE man_index AS SELECT DISTINCT name, section FROM man; -- Interpret first 4 bytes of hash as a signed 32-bit integer.
CREATE INDEX ON man_index USING btree(lower(name) text_pattern_ops); CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
SELECT CASE WHEN get_byte(hash, 3) < 128
CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man; THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
END;
$$ LANGUAGE SQL IMMUTABLE;
-- Removes any path components and compression extensions from the filename.
CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$
SELECT regexp_replace(fn, '^.+/([^/][^/]*?)(?:\.gz|\.lzma|\.xz|\.bz2|\.zst)*$', '\1');
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$
SELECT regexp_replace(basename_from_filename($1), '^.+\.([^.]+)$', '\1');
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$
SELECT regexp_replace(basename_from_filename($1), '^(.+)\.[^.]+$', '\1');
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$ CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$

106
sql/update-2021-12-14.sql Normal file
View file

@ -0,0 +1,106 @@
-- WARNING: This script does a complete rewrite of the two largest tables.
-- This requires ~25G of temporary space and takes anywhere between 10 and 20 minutes.
-- The site needs to be taken down during that time.
DROP FUNCTION section_from_filename(text);
DROP FUNCTION name_from_filename(text);
DROP FUNCTION basename_from_filename(text);
-- Interpret first 4 bytes of hash as a signed 32-bit integer.
CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
SELECT CASE WHEN get_byte(hash, 3) < 128
THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
END;
$$ LANGUAGE SQL IMMUTABLE;
ALTER TABLE contents DROP CONSTRAINT contents_pkey;
ALTER TABLE contents RENAME TO contents_old;
CREATE SEQUENCE contents_id_seq AS integer;
CREATE TABLE contents (
id integer NOT NULL DEFAULT nextval('contents_id_seq'::regclass),
hash bytea NOT NULL,
content text NOT NULL
);
ALTER SEQUENCE contents_id_seq OWNED BY contents.id;
-- 4m15s; start 29.3G end 53.3G; +24G
INSERT INTO contents (hash, content) SELECT hash, content FROM contents_old;
DROP TABLE contents_old; -- back to 29.3G
ALTER TABLE contents ADD PRIMARY KEY (id);
ALTER TABLE contents ADD UNIQUE (hash);
-- 29.7G at this point
CREATE TABLE mans (
id SERIAL PRIMARY KEY,
name text NOT NULL,
section text NOT NULL,
UNIQUE(name, section)
);
INSERT INTO mans (name, section) SELECT DISTINCT name, section FROM man;
CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
CREATE TABLE locales (
id SMALLSERIAL PRIMARY KEY,
locale text NOT NULL UNIQUE
);
INSERT INTO locales (id, locale) VALUES (0,''); -- 0 for default locale is handy, I guess
INSERT INTO locales (locale) SELECT locale FROM man WHERE locale IS NOT NULL GROUP BY locale ORDER BY locale;
-- Encodings are stored for reference but are never actually used anywhere.
CREATE TABLE encodings (
id SMALLSERIAL PRIMARY KEY,
encoding text NOT NULL UNIQUE
);
INSERT INTO encodings (id, encoding) VALUES (0,'');
INSERT INTO encodings (encoding) SELECT encoding FROM man WHERE encoding IS NOT NULL GROUP BY encoding ORDER BY encoding;
-- Replaces the 'man' table; It's the largest table in terms of number of rows
-- and it's pretty frequently accessed, so keeping the rows small helps.
CREATE TABLE files (
pkgver integer NOT NULL, -- package_versions.id
man integer NOT NULL, -- mans.id
content integer NOT NULL, -- content.id
shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
locale smallint NOT NULL, -- locales.id
encoding smallint NOT NULL, -- encodings.id
filename text NOT NULL
);
-- 1min; 29.7G -> 31.8G
INSERT INTO files
SELECT o.package, m.id, c.id, hash_to_shorthash(o.hash), l.id, e.id, o.filename
FROM man o
JOIN mans m ON m.name = o.name AND m.section = o.section
JOIN contents c ON c.hash = o.hash
JOIN locales l ON l.locale = coalesce(o.locale, '')
JOIN encodings e ON e.encoding = coalesce(o.encoding, '');
-- 1min; 31.8G -> 33.7G for both indices
ALTER TABLE files ADD PRIMARY KEY (pkgver, filename);
CREATE INDEX ON files (man, shorthash);
CREATE INDEX ON files (content);
-- 20sec to verify
ALTER TABLE files
ADD CONSTRAINT files_pkgver_fkey FOREIGN KEY (pkgver) REFERENCES package_versions (id) ON DELETE CASCADE,
ADD CONSTRAINT files_man_fkey FOREIGN KEY (man) REFERENCES mans (id),
ADD CONSTRAINT files_content_fkey FOREIGN KEY (content) REFERENCES contents (id),
ADD CONSTRAINT files_locale_fkey FOREIGN KEY (locale) REFERENCES locales (id),
ADD CONSTRAINT files_encoding_fkey FOREIGN KEY (encoding) REFERENCES encodings (id);
DROP TABLE man;
DROP TABLE man_index;
-- final: 29.1G; we saved a whole 300M! \o/
-- There's only about a 100 unreferenced rows, leftovers from removals of
-- incorrectly indexed packages. Let's remove them while we're at it.
DELETE FROM contents WHERE NOT EXISTS(SELECT 1 FROM files WHERE content = id);
VACUUM ANALYZE mans, files, contents, locales, encodings;

View file

@ -10,9 +10,5 @@ PSQL="psql -U manned -Awtq"
./fedora.sh current ./fedora.sh current
./ubuntu.sh current ./ubuntu.sh current
# Only update indices once a week (on mondays). This process is slow and the data doesn't often change anyway. echo "============ Updating SQL indices"
if [ `date +%u` == 1 ] $PSQL -f update_indices.sql
then
echo "============ Updating SQL indices"
$PSQL -f update_indices.sql
fi

View file

@ -2,14 +2,10 @@
-- the table being replaced. The site should remain responsive while these -- the table being replaced. The site should remain responsive while these
-- queries are run. -- queries are run.
BEGIN; BEGIN;
CREATE TABLE man_index_new AS SELECT DISTINCT name, section FROM man; CREATE TABLE stats_cache_new AS
CREATE INDEX ON man_index_new USING btree(lower(name) text_pattern_ops); SELECT (SELECT count(*) FROM contents) AS hashes,
DROP TABLE man_index; (SELECT count(distinct name) FROM mans) AS mans, *
ALTER TABLE man_index_new RENAME TO man_index; FROM (SELECT count(*), count(distinct pkgver) FROM files) x(files, packages);
COMMIT;
BEGIN;
CREATE TABLE stats_cache_new AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man;
DROP TABLE stats_cache; DROP TABLE stats_cache;
ALTER TABLE stats_cache_new RENAME TO stats_cache; ALTER TABLE stats_cache_new RENAME TO stats_cache;
COMMIT; COMMIT;

View file

@ -73,8 +73,11 @@ sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } }
# Firefox seems to escape [ and ] in URLs. It doesn't really have to... # Firefox seems to escape [ and ] in URLs. It doesn't really have to...
sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg } sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg }
sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex
sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int
# Subquery returning all packages that have a man page. # Subquery returning all packages that have a man page.
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id)))'; my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))';
sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg } sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg }
@ -86,13 +89,6 @@ sub sql_join {
sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' } sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' }
sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' } sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' }
# Subquery to match $sql_expr::bytea against a $prefix (hex string). Hopefully indexable.
sub sql_hash_prefix {
my($sql_expr, $prefix) = @_;
my $esc = unpack 'H*', escape_like pack 'H*', $prefix;
sql '(', $sql_expr, "like ('\\x$esc'::bytea||'%'))"
}
sub pkg_frompath { sub pkg_frompath {
my($sys_where, $path) = @_; my($sys_where, $path) = @_;
@ -139,27 +135,29 @@ sub man_pref {
# 7. sysrel: Prefer a more recent system release over an older release # 7. sysrel: Prefer a more recent system release over an older release
# 8. secorder: Lower sections before higher sections (because man does it this way, for some reason) # 8. secorder: Lower sections before higher sections (because man does it this way, for some reason)
# 9. pkgdate: Prefer more recent packages (cross-distro) # 9. pkgdate: Prefer more recent packages (cross-distro)
# 10. Fall back on hash comparison, to ensure the result is stable # 10. Fall back on shorthash comparison, to ensure the result is stable
state $archid = sysbyshort->{arch}{id}; state $archid = sysbyshort->{arch}{id};
state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id}; state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id};
tuwf->dbRowi(q{ tuwf->dbRowi(q{
WITH unfiltered AS ( WITH unfiltered AS (
SELECT s AS sys, p AS pkg, v AS ver, m AS man SELECT m.name, m.section, l.locale, f.shorthash, f.content, f.filename, s AS sys, p AS pkg, v AS ver
FROM man m FROM files f
JOIN package_versions v ON v.id = m.package JOIN locales l ON l.id = f.locale
JOIN mans m ON m.id = f.man
JOIN package_versions v ON v.id = f.pkgver
JOIN packages p ON p.id = v.package JOIN packages p ON p.id = v.package
JOIN systems s ON s.id = p.system JOIN systems s ON s.id = p.system
WHERE}, $where, q{ WHERE}, $where, q{
), f_english AS( ), f_english AS(
SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale) SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale(locale)) OR is_english_locale(locale)
), f_pkgver AS( ), f_pkgver AS(
SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released) SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released)
), f_stdloc AS( ), f_stdloc AS(
SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename) SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location(filename)) OR is_standard_man_location(filename)
), f_secmatch AS( ), f_secmatch AS(
SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section =}, \$section, q{) OR (man).section =}, \$section, q{ SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE section =}, \$section, q{) OR section =}, \$section, q{
), f_arch AS( ), f_arch AS(
SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid
), f_debian AS( ), f_debian AS(
@ -167,13 +165,13 @@ sub man_pref {
), f_sysrel AS( ), f_sysrel AS(
SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id) SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id)
), f_secorder AS( ), f_secorder AS(
SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section) SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE section > section)
), f_pkgdate AS( ), f_pkgdate AS(
SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released) SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released)
) )
SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid, SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid,
(man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash name, section, filename, locale, shorthash, content
FROM f_pkgdate ORDER BY (man).hash LIMIT 1 FROM f_pkgdate ORDER BY shorthash LIMIT 1
}); });
} }
@ -562,13 +560,11 @@ sub search_man {
my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : ''; my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : '';
my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : ''; my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : '';
return !$name ? [] : tuwf->dbAll( return !$name ? [] : tuwf->dbAlli(
'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?', 'SELECT name, section FROM mans WHERE', sql_and(
{ sql('lower(name) LIKE', \(escape_like(lc $name).'%')),
'lower(name) LIKE ?' => escape_like(lc $name).'%', $sect ? sql('section ILIKE', \(escape_like(lc $sect).'%')) : (),
$sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (), ), 'ORDER BY name, section LIMIT', \$limit,
},
$limit
); );
} }
@ -614,10 +610,11 @@ TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub {
my $nfo = tuwf->dbRowi(' my $nfo = tuwf->dbRowi('
SELECT m.name, m.section, v.released, c.content SELECT m.name, m.section, v.released, c.content
FROM man m FROM files f
JOIN package_versions v ON v.id = m.package JOIN mans m ON m.id = f.man
JOIN contents c ON c.hash = m.hash JOIN package_versions v ON v.id = f.pkgver
WHERE m.name =', \$name, 'AND', sql_hash_prefix('m.hash', $hash), ' JOIN contents c ON c.id = f.content
WHERE m.name =', \$name, 'AND f.shorthash =', \shorthash_to_int($hash), '
LIMIT 1' LIMIT 1'
); );
return tuwf->resNotFound if !$nfo->{name}; return tuwf->resNotFound if !$nfo->{name};
@ -633,13 +630,16 @@ sub _man_nav {
my($man, $toc) = @_; my($man, $toc) = @_;
my @sect = map $_->{section}, tuwf->dbAlli( my @sect = map $_->{section}, tuwf->dbAlli(
'SELECT DISTINCT section FROM man WHERE name =', \$man->{name}, 'ORDER BY section' 'SELECT DISTINCT section FROM mans WHERE name =', \$man->{name}, 'ORDER BY section'
)->@*; )->@*;
my @lang = map $_->{lang}, tuwf->dbAlli( my @lang = map $_->{lang}, tuwf->dbAlli(
"SELECT DISTINCT substring(locale from '^[^.]+') AS lang "SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang
FROM man WHERE name =", \$man->{name}, 'AND section =', \$man->{section}, " FROM files f
ORDER BY substring(locale from '^[^.]+') NULLS FIRST" JOIN mans m ON m.id = f.man
JOIN locales l ON l.id = f.locale
WHERE m.name =", \$man->{name}, 'AND m.section =', \$man->{section}, "
ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST"
)->@*; )->@*;
return if !@sect && !@lang && !@$toc; return if !@sect && !@lang && !@$toc;
@ -707,7 +707,7 @@ sub soelim {
my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : (); my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : ();
$man->{name} $man->{name}
# Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database # Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database
? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')")) ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE id =", \$man->{content}))
: ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]" : ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]"
}emg; }emg;
$src; $src;
@ -727,12 +727,12 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
# the same package as the requested man page. Use the man_pref logic here # the same package as the requested man page. Use the man_pref logic here
# to deterministically select a good package. # to deterministically select a good package.
my($man, undef) = $shorthash my($man, undef) = $shorthash
? man_pref undef, sql 'm.name =', \$name, 'AND', sql_hash_prefix 'm.hash', $shorthash ? man_pref undef, sql 'm.name =', \$name, 'AND f.shorthash =', shorthash_to_int($shorthash)
: man_pref_name $name, 'true'; : man_pref_name $name, 'true';
return tuwf->resNotFound() if !$man->{name}; return tuwf->resNotFound() if !$man->{name};
my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, my $content = tuwf->dbRowi('SELECT encode(hash, \'hex\') AS hash, content FROM contents WHERE id =', \$man->{content});
tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')"); my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, $content->{content};
my @toc; my @toc;
$fmt =~ s{\n<b>(.+?)<\/b>\n}{ $fmt =~ s{\n<b>(.+?)<\/b>\n}{
push @toc, $1; push @toc, $1;
@ -741,9 +741,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
}eg; }eg;
my $hasversions = tuwf->dbVali( my $hasversions = tuwf->dbVali(
'SELECT 1 FROM man WHERE name =', \$man->{name}, 'AND section =', \$man->{section}, 'SELECT 1
'AND locale IS NOT DISTINCT FROM', \$man->{locale}, FROM files f
'AND hash <> decode(', \$man->{hash}, ", 'hex') LIMIT 1" JOIN mans m ON m.id = f.man
JOIN locales l ON l.id = f.locale
WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section}, '
AND l.locale =', \$man->{locale}, '
AND f.shorthash <> ', \shorthash_to_int($man->{shorthash}), '
LIMIT 1'
); );
tuwf->resLastMod($man->{released}); tuwf->resLastMod($man->{released});
@ -751,14 +756,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
_man_nav $man, \@toc; _man_nav $man, \@toc;
div_ id => 'manbuttons', sub { div_ id => 'manbuttons', sub {
h1_ $man->{name}; h1_ $man->{name};
ul_ 'data-hash' => $man->{hash}, ul_ 'data-hash' => $content->{hash},
'data-name' => $man->{name}, 'data-name' => $man->{name},
'data-section' => $man->{section}, 'data-section' => $man->{section},
'data-locale' => $man->{locale}||'', 'data-locale' => $man->{locale}||'',
'data-hasversions' => $hasversions?1:0, 'data-hasversions' => $hasversions?1:0,
sub { sub {
li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8).'/src', 'source' }; li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}).'/src', 'source' };
li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8), 'permalink' }; li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'permalink' };
} }
}; };
div_ id => 'manres', class => 'hidden', ''; div_ id => 'manres', class => 'hidden', '';
@ -828,7 +833,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
SELECT id, version, released SELECT id, version, released
FROM package_versions v FROM package_versions v
WHERE package =', \$pkg->{id}, ' WHERE package =', \$pkg->{id}, '
AND EXISTS(SELECT 1 FROM man m WHERE m.package = v.id) AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = v.id)
ORDER BY released DESC' ORDER BY released DESC'
); );
my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0]; my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0];
@ -836,11 +841,14 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data; my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data;
my $count = tuwf->dbVali('SELECT count(*) FROM man WHERE package =', \$sel->{id}); my $count = tuwf->dbVali('SELECT count(*) FROM files WHERE pkgver =', \$sel->{id});
my $mans = tuwf->dbPagei({ results => 200, page => $p }, my $mans = tuwf->dbPagei({ results => 200, page => $p },
"SELECT name, encode(hash, 'hex') AS hash, section, locale, filename "SELECT m.name, m.section, f.shorthash, f.filename, l.locale
FROM man WHERE package =", \$sel->{id}, ' FROM files f
ORDER BY name, locale NULLS FIRST, filename' JOIN locales l ON l.id = f.locale
JOIN mans m ON m.id = f.man
WHERE f.pkgver =", \$sel->{id}, '
ORDER BY m.name, l.locale, f.filename'
); );
# Latest version of this package determines last modification date of the page. # Latest version of this package determines last modification date of the page.
@ -871,7 +879,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p; paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p;
ul_ sub { ul_ sub {
li_ sub { li_ sub {
a_ href => "/$_->{name}/".substr($_->{hash},0,8), "$_->{name}($_->{section})"; a_ href => "/$_->{name}/".shorthash_to_hex($_->{shorthash}), "$_->{name}($_->{section})";
b_ " $_->{locale}" if $_->{locale}; b_ " $_->{locale}" if $_->{locale};
small_ " $_->{filename}"; small_ " $_->{filename}";
} for(@$mans); } for(@$mans);
@ -922,7 +930,7 @@ TUWF::get qr{/man/([^/]+)/(.+)} => sub {
} }
return tuwf->resNotFound if !$man; return tuwf->resNotFound if !$man;
tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp'); tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp');
}; };
@ -932,9 +940,9 @@ TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub {
my $lang = tuwf->capture(1); my $lang = tuwf->capture(1);
my $name = normalize_name tuwf->capture(2); my $name = normalize_name tuwf->capture(2);
my($man, undef) = man_pref_name $name, my($man, undef) = man_pref_name $name,
sql "substring(locale from '^[^.]+') ilike", \escape_like $lang; sql "substring(l.locale from '^[^.]+') ilike", \(escape_like($lang).'%');
return tuwf->resNotFound if !$man->{name}; return tuwf->resNotFound if !length $man->{name};
tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp'); tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp');
}; };
@ -949,22 +957,24 @@ TUWF::get '/json/tree.json' => sub {
return tuwf->resNotFound() if !$f->{hash} && !($f->{section} && $f->{name}); return tuwf->resNotFound() if !$f->{hash} && !($f->{section} && $f->{name});
my $l = tuwf->dbAlli(" my $l = tuwf->dbAlli("
SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, m.filename, m.locale, encode(m.hash, 'hex') AS hash SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, f.filename, f.shorthash, l.locale
FROM man m FROM files f
JOIN package_versions v ON v.id = m.package JOIN locales l ON l.id = f.locale
JOIN mans m ON m.id = f.man
JOIN package_versions v ON v.id = f.pkgver
JOIN packages p ON p.id = v.package JOIN packages p ON p.id = v.package
JOIN systems s ON s.id = p.system JOIN systems s ON s.id = p.system
WHERE", sql_and( WHERE", sql_and(
length $f->{hash} ? sql 'm.hash = decode(', \$f->{hash}, ", 'hex')" : (), length $f->{hash} ? sql 'f.content = (SELECT id FROM contents WHERE hash = decode(', \$f->{hash}, ", 'hex'))" : (),
length $f->{name} ? sql 'm.name =', \$f->{name} : (), length $f->{name} ? sql 'm.name =', \$f->{name} : (),
length $f->{section} ? sql 'm.section =', \$f->{section} : (), length $f->{section} ? sql 'm.section =', \$f->{section} : (),
length $f->{locale} ? sql 'm.locale =', \$f->{locale} : (), defined $f->{locale} ? sql 'l.locale =', \$f->{locale} : (),
defined $f->{locale} && $f->{locale} eq '' ? 'm.locale IS NULL' : (),
), ' ), '
ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, m.locale NULLS FIRST, m.filename ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, l.locale, f.filename
'); ');
# Convert the list into a tree # Convert the list into a tree
my $cur = $f->{cur} ? shorthash_to_int substr $f->{cur}, 0, 8 : 0;
my $tree = []; my $tree = [];
my($sys, $sysver, $pkg, $pkgver); my($sys, $sysver, $pkg, $pkgver);
for my $m (@$l) { for my $m (@$l) {
@ -992,12 +1002,12 @@ TUWF::get '/json/tree.json' => sub {
$pkgver && $pkgver eq $m->{version} ? {name=>''} : $pkgver && $pkgver eq $m->{version} ? {name=>''} :
{name => $m->{version}, href => "/pkg/".sysbyid->{$m->{system}}{short}."/$m->{category}/$m->{package}/$m->{version}"}, {name => $m->{version}, href => "/pkg/".sysbyid->{$m->{system}}{short}."/$m->{category}/$m->{package}/$m->{version}"},
{ name => "$m->{name}($m->{section})", { name => "$m->{name}($m->{section})",
$f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? () $f->{hash} || $cur == $m->{shorthash} ? ()
: (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8)) : (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash}))
}, },
{ name => substr($m->{hash}, 0, 8), { name => shorthash_to_hex($m->{shorthash}),
$f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? () $f->{hash} || $cur == $m->{shorthash} ? ()
: (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8)) : (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash}))
}, },
{ name => $m->{filename} } { name => $m->{filename} }
]; ];