Large-ish SQL schema revamp/optimizations
Primarily aimed at reducing the size of the old 'man' (now: files) table, using smaller integers to refer to man contents and text fields, and storing a shorthash as an integer for quick lookups. This better normalization also removes the need to keep a separate 'man_index' cache for the search function. The old schema wasn't necessarily bad, but I was in the mood for some optimizations. And a little cleanup. Prolly introduces a bunch of new bugs, I haven't tested this too well.
This commit is contained in:
parent
6f7f59c6df
commit
f376f1f137
6 changed files with 268 additions and 128 deletions
|
|
@ -87,7 +87,7 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
|
|||
// XXX: Should we update released & arch here?
|
||||
verid = res?.get(0);
|
||||
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
||||
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
|
||||
tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap();
|
||||
Some(verid)
|
||||
|
||||
} else {
|
||||
|
|
@ -97,13 +97,21 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
|
|||
}
|
||||
|
||||
|
||||
fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, hash: &[u8]) {
|
||||
fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, content: i32) {
|
||||
let (name, sect, locale) = man::parse_path(path).unwrap();
|
||||
let locale = if locale == "" { None } else { Some(locale) };
|
||||
if let Err(e) = tr.execute(
|
||||
"INSERT INTO man (package, name, filename, locale, hash, section, encoding) VALUES ($1, $2, '/'||$3, $4, $5, $6, $7)",
|
||||
&[&verid, &name, &path, &locale, &hash, §, &enc]
|
||||
) {
|
||||
let q = "WITH ms(id) AS (SELECT id FROM mans WHERE name = $2 AND section = $3),
|
||||
mi(id) AS (INSERT INTO mans (name, section) SELECT $2, $3 WHERE NOT EXISTS(SELECT 1 FROM ms) RETURNING id),
|
||||
m(id) AS (SELECT id FROM ms UNION SELECT id FROM mi),
|
||||
ls(id) AS (SELECT id FROM locales WHERE locale = $5),
|
||||
li(id) AS (INSERT INTO locales (locale) SELECT $5 WHERE NOT EXISTS(SELECT 1 FROM ls) RETURNING id),
|
||||
l(id) AS (SELECT id FROM ls UNION SELECT id FROM li),
|
||||
es(id) AS (SELECT id FROM encodings WHERE encoding = $6),
|
||||
ei(id) AS (INSERT INTO encodings (encoding) SELECT $6 WHERE NOT EXISTS(SELECT 1 FROM es) RETURNING id),
|
||||
e(id) AS (SELECT id FROM es UNION SELECT id FROM ei),
|
||||
c(shorthash) AS (SELECT hash_to_shorthash(hash) FROM contents WHERE id = $4)
|
||||
INSERT INTO files (pkgver, man, content, shorthash, locale, encoding, filename)
|
||||
SELECT $1, m.id, $4, c.shorthash, l.id, e.id, '/'||$7 FROM m, l, e, c";
|
||||
if let Err(e) = tr.execute(q, &[&verid, &name, §, &content, &locale, &enc, &path]) {
|
||||
// I think this can only happen if archread gives us the same file twice, which really
|
||||
// shouldn't happen. But I'd rather continue with an error logged than panic.
|
||||
error!("Can't insert verid {} fn {}: {}", verid, path, e);
|
||||
|
|
@ -123,29 +131,30 @@ fn insert_man<T: postgres::GenericClient>(tr: &mut T, verid: i32, paths: &[&str]
|
|||
cont = cont.replace(0 as char, "");
|
||||
}
|
||||
|
||||
tr.execute(
|
||||
"INSERT INTO contents (hash, content) VALUES($1, $2) ON CONFLICT (hash) DO NOTHING",
|
||||
&[&dig.as_ref(), &cont]
|
||||
).unwrap();
|
||||
let q = "WITH s(id) AS (SELECT id FROM contents WHERE hash = $1),
|
||||
i(id) AS (INSERT INTO contents (hash, content) SELECT $1, $2 WHERE NOT EXISTS(SELECT 1 FROM s) RETURNING id)
|
||||
SELECT id FROM s UNION SELECT id FROM i";
|
||||
let id: i32 = tr.query_one(q, &[&dig.as_ref(), &cont]).unwrap().get(0);
|
||||
|
||||
for path in paths {
|
||||
insert_man_row(tr, verid, path, enc, dig.as_ref());
|
||||
insert_man_row(tr, verid, path, enc, id);
|
||||
info!("Inserted man page: {} ({})", path, enc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn insert_link<T>(tr: &mut T, verid: i32, src: &str, dest: &str) where T: postgres::GenericClient {
|
||||
let res = match tr.query_opt("SELECT hash, encoding FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap() {
|
||||
let q = "SELECT f.content, e.encoding FROM files f JOIN encodings e ON e.id = f.encoding WHERE pkgver = $1 AND filename = '/'||$2";
|
||||
let res = match tr.query_opt(q, &[&verid, &dest]).unwrap() {
|
||||
None => { /* Can happen if man::decode() failed previously. */
|
||||
error!("Link to unindexed man page: {} -> {}", src, dest);
|
||||
return;
|
||||
},
|
||||
Some(x) => x
|
||||
};
|
||||
let hash: Vec<u8> = res.get(0);
|
||||
let content: i32 = res.get(0);
|
||||
let enc: String = res.get(1);
|
||||
insert_man_row(tr, verid, src, &enc, &hash);
|
||||
insert_man_row(tr, verid, src, &enc, content);
|
||||
info!("Inserted man link: {} -> {}", src, dest);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,15 +7,44 @@ CREATE TABLE systems (
|
|||
short varchar NOT NULL
|
||||
);
|
||||
|
||||
|
||||
CREATE TABLE contents (
|
||||
id SERIAL PRIMARY KEY,
|
||||
-- 'hash' is the SHA1 of the man page file after decompression but *before*
|
||||
-- encoding conversion and removing 0-bytes. This means taking sha1(content)
|
||||
-- may not necessary match the hash, and it's possible for the same content
|
||||
-- to be in the database under multiple hashes (but I suspect that's rare).
|
||||
hash bytea PRIMARY KEY,
|
||||
content varchar NOT NULL
|
||||
hash bytea NOT NULL UNIQUE,
|
||||
content text NOT NULL
|
||||
);
|
||||
|
||||
|
||||
-- Unique man page, as identified by name & section
|
||||
CREATE TABLE mans (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name text NOT NULL,
|
||||
section text NOT NULL,
|
||||
UNIQUE(name, section)
|
||||
);
|
||||
CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
|
||||
|
||||
|
||||
-- List of man page locales for efficient referencing. Some locales include
|
||||
-- the encoding in their name, which isn't really correct or even necessary
|
||||
-- since we convert everything to UTF-8 anyway, but w/e, Can fix later.
|
||||
CREATE TABLE locales (
|
||||
id SMALLSERIAL PRIMARY KEY,
|
||||
locale text NOT NULL UNIQUE
|
||||
);
|
||||
|
||||
|
||||
-- List of encodings for efficient referencing.
|
||||
CREATE TABLE encodings (
|
||||
id SMALLSERIAL PRIMARY KEY,
|
||||
encoding text NOT NULL UNIQUE
|
||||
);
|
||||
|
||||
|
||||
CREATE TABLE packages (
|
||||
id SERIAL PRIMARY KEY,
|
||||
system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE,
|
||||
|
|
@ -31,6 +60,7 @@ CREATE TABLE packages (
|
|||
UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common
|
||||
);
|
||||
|
||||
|
||||
CREATE TABLE package_versions (
|
||||
id SERIAL PRIMARY KEY,
|
||||
package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
|
||||
|
|
@ -40,43 +70,36 @@ CREATE TABLE package_versions (
|
|||
UNIQUE(package, version)
|
||||
);
|
||||
|
||||
CREATE TABLE man (
|
||||
package integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE,
|
||||
name varchar NOT NULL,
|
||||
filename varchar NOT NULL,
|
||||
locale varchar,
|
||||
hash bytea NOT NULL,
|
||||
section varchar NOT NULL,
|
||||
encoding varchar,
|
||||
UNIQUE(package, filename)
|
||||
|
||||
CREATE TABLE files (
|
||||
pkgver integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE,
|
||||
man integer NOT NULL REFERENCES mans(id),
|
||||
content integer NOT NULL REFERENCES content(id),
|
||||
shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
|
||||
locale smallint NOT NULL REFERENCES locales(id)
|
||||
-- The original encoding the man page was found in. This column isn't really
|
||||
-- used at the moment, but is potentially useful when investigating encoding
|
||||
-- issues.
|
||||
encoding smallint NOT NULL REFERENCES encodings(id),
|
||||
filename text NOT NULL,
|
||||
PRIMARY KEY(pkgver, filename)
|
||||
);
|
||||
CREATE INDEX ON files (man, shorthash);
|
||||
CREATE INDEX ON files (content);
|
||||
|
||||
CREATE INDEX ON man (hash);
|
||||
CREATE INDEX ON man (name);
|
||||
|
||||
-- For stats_cache
|
||||
\i util/update_indices.sql
|
||||
|
||||
|
||||
|
||||
CREATE TABLE man_index AS SELECT DISTINCT name, section FROM man;
|
||||
CREATE INDEX ON man_index USING btree(lower(name) text_pattern_ops);
|
||||
|
||||
CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man;
|
||||
|
||||
|
||||
|
||||
-- Removes any path components and compression extensions from the filename.
|
||||
CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$
|
||||
SELECT regexp_replace(fn, '^.+/([^/][^/]*?)(?:\.gz|\.lzma|\.xz|\.bz2|\.zst)*$', '\1');
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$
|
||||
SELECT regexp_replace(basename_from_filename($1), '^.+\.([^.]+)$', '\1');
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$
|
||||
SELECT regexp_replace(basename_from_filename($1), '^(.+)\.[^.]+$', '\1');
|
||||
$$ LANGUAGE SQL;
|
||||
-- Interpret first 4 bytes of hash as a signed 32-bit integer.
|
||||
CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
|
||||
SELECT CASE WHEN get_byte(hash, 3) < 128
|
||||
THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
|
||||
ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
|
||||
END;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$
|
||||
|
|
|
|||
106
sql/update-2021-12-14.sql
Normal file
106
sql/update-2021-12-14.sql
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
-- WARNING: This script does a complete rewrite of the two largest tables.
|
||||
-- This requires ~25G of temporary space and takes anywhere between 10 and 20 minutes.
|
||||
-- The site needs to be taken down during that time.
|
||||
|
||||
|
||||
DROP FUNCTION section_from_filename(text);
|
||||
DROP FUNCTION name_from_filename(text);
|
||||
DROP FUNCTION basename_from_filename(text);
|
||||
|
||||
-- Interpret first 4 bytes of hash as a signed 32-bit integer.
|
||||
CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
|
||||
SELECT CASE WHEN get_byte(hash, 3) < 128
|
||||
THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
|
||||
ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
|
||||
END;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
ALTER TABLE contents DROP CONSTRAINT contents_pkey;
|
||||
ALTER TABLE contents RENAME TO contents_old;
|
||||
|
||||
CREATE SEQUENCE contents_id_seq AS integer;
|
||||
CREATE TABLE contents (
|
||||
id integer NOT NULL DEFAULT nextval('contents_id_seq'::regclass),
|
||||
hash bytea NOT NULL,
|
||||
content text NOT NULL
|
||||
);
|
||||
ALTER SEQUENCE contents_id_seq OWNED BY contents.id;
|
||||
-- 4m15s; start 29.3G end 53.3G; +24G
|
||||
INSERT INTO contents (hash, content) SELECT hash, content FROM contents_old;
|
||||
DROP TABLE contents_old; -- back to 29.3G
|
||||
ALTER TABLE contents ADD PRIMARY KEY (id);
|
||||
ALTER TABLE contents ADD UNIQUE (hash);
|
||||
-- 29.7G at this point
|
||||
|
||||
|
||||
|
||||
CREATE TABLE mans (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name text NOT NULL,
|
||||
section text NOT NULL,
|
||||
UNIQUE(name, section)
|
||||
);
|
||||
INSERT INTO mans (name, section) SELECT DISTINCT name, section FROM man;
|
||||
CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
|
||||
|
||||
CREATE TABLE locales (
|
||||
id SMALLSERIAL PRIMARY KEY,
|
||||
locale text NOT NULL UNIQUE
|
||||
);
|
||||
INSERT INTO locales (id, locale) VALUES (0,''); -- 0 for default locale is handy, I guess
|
||||
INSERT INTO locales (locale) SELECT locale FROM man WHERE locale IS NOT NULL GROUP BY locale ORDER BY locale;
|
||||
|
||||
-- Encodings are stored for reference but are never actually used anywhere.
|
||||
CREATE TABLE encodings (
|
||||
id SMALLSERIAL PRIMARY KEY,
|
||||
encoding text NOT NULL UNIQUE
|
||||
);
|
||||
INSERT INTO encodings (id, encoding) VALUES (0,'');
|
||||
INSERT INTO encodings (encoding) SELECT encoding FROM man WHERE encoding IS NOT NULL GROUP BY encoding ORDER BY encoding;
|
||||
|
||||
|
||||
-- Replaces the 'man' table; It's the largest table in terms of number of rows
|
||||
-- and it's pretty frequently accessed, so keeping the rows small helps.
|
||||
CREATE TABLE files (
|
||||
pkgver integer NOT NULL, -- package_versions.id
|
||||
man integer NOT NULL, -- mans.id
|
||||
content integer NOT NULL, -- content.id
|
||||
shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
|
||||
locale smallint NOT NULL, -- locales.id
|
||||
encoding smallint NOT NULL, -- encodings.id
|
||||
filename text NOT NULL
|
||||
);
|
||||
|
||||
-- 1min; 29.7G -> 31.8G
|
||||
INSERT INTO files
|
||||
SELECT o.package, m.id, c.id, hash_to_shorthash(o.hash), l.id, e.id, o.filename
|
||||
FROM man o
|
||||
JOIN mans m ON m.name = o.name AND m.section = o.section
|
||||
JOIN contents c ON c.hash = o.hash
|
||||
JOIN locales l ON l.locale = coalesce(o.locale, '')
|
||||
JOIN encodings e ON e.encoding = coalesce(o.encoding, '');
|
||||
|
||||
-- 1min; 31.8G -> 33.7G for both indices
|
||||
ALTER TABLE files ADD PRIMARY KEY (pkgver, filename);
|
||||
CREATE INDEX ON files (man, shorthash);
|
||||
CREATE INDEX ON files (content);
|
||||
|
||||
-- 20sec to verify
|
||||
ALTER TABLE files
|
||||
ADD CONSTRAINT files_pkgver_fkey FOREIGN KEY (pkgver) REFERENCES package_versions (id) ON DELETE CASCADE,
|
||||
ADD CONSTRAINT files_man_fkey FOREIGN KEY (man) REFERENCES mans (id),
|
||||
ADD CONSTRAINT files_content_fkey FOREIGN KEY (content) REFERENCES contents (id),
|
||||
ADD CONSTRAINT files_locale_fkey FOREIGN KEY (locale) REFERENCES locales (id),
|
||||
ADD CONSTRAINT files_encoding_fkey FOREIGN KEY (encoding) REFERENCES encodings (id);
|
||||
|
||||
DROP TABLE man;
|
||||
DROP TABLE man_index;
|
||||
-- final: 29.1G; we saved a whole 300M! \o/
|
||||
|
||||
|
||||
-- There's only about a 100 unreferenced rows, leftovers from removals of
|
||||
-- incorrectly indexed packages. Let's remove them while we're at it.
|
||||
DELETE FROM contents WHERE NOT EXISTS(SELECT 1 FROM files WHERE content = id);
|
||||
|
||||
VACUUM ANALYZE mans, files, contents, locales, encodings;
|
||||
|
|
@ -10,9 +10,5 @@ PSQL="psql -U manned -Awtq"
|
|||
./fedora.sh current
|
||||
./ubuntu.sh current
|
||||
|
||||
# Only update indices once a week (on mondays). This process is slow and the data doesn't often change anyway.
|
||||
if [ `date +%u` == 1 ]
|
||||
then
|
||||
echo "============ Updating SQL indices"
|
||||
$PSQL -f update_indices.sql
|
||||
fi
|
||||
echo "============ Updating SQL indices"
|
||||
$PSQL -f update_indices.sql
|
||||
|
|
|
|||
|
|
@ -2,14 +2,10 @@
|
|||
-- the table being replaced. The site should remain responsive while these
|
||||
-- queries are run.
|
||||
BEGIN;
|
||||
CREATE TABLE man_index_new AS SELECT DISTINCT name, section FROM man;
|
||||
CREATE INDEX ON man_index_new USING btree(lower(name) text_pattern_ops);
|
||||
DROP TABLE man_index;
|
||||
ALTER TABLE man_index_new RENAME TO man_index;
|
||||
COMMIT;
|
||||
|
||||
BEGIN;
|
||||
CREATE TABLE stats_cache_new AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man;
|
||||
CREATE TABLE stats_cache_new AS
|
||||
SELECT (SELECT count(*) FROM contents) AS hashes,
|
||||
(SELECT count(distinct name) FROM mans) AS mans, *
|
||||
FROM (SELECT count(*), count(distinct pkgver) FROM files) x(files, packages);
|
||||
DROP TABLE stats_cache;
|
||||
ALTER TABLE stats_cache_new RENAME TO stats_cache;
|
||||
COMMIT;
|
||||
|
|
|
|||
140
www/index.pl
140
www/index.pl
|
|
@ -73,8 +73,11 @@ sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } }
|
|||
# Firefox seems to escape [ and ] in URLs. It doesn't really have to...
|
||||
sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg }
|
||||
|
||||
sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex
|
||||
sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int
|
||||
|
||||
# Subquery returning all packages that have a man page.
|
||||
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id)))';
|
||||
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))';
|
||||
|
||||
sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg }
|
||||
|
||||
|
|
@ -86,13 +89,6 @@ sub sql_join {
|
|||
sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' }
|
||||
sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' }
|
||||
|
||||
# Subquery to match $sql_expr::bytea against a $prefix (hex string). Hopefully indexable.
|
||||
sub sql_hash_prefix {
|
||||
my($sql_expr, $prefix) = @_;
|
||||
my $esc = unpack 'H*', escape_like pack 'H*', $prefix;
|
||||
sql '(', $sql_expr, "like ('\\x$esc'::bytea||'%'))"
|
||||
}
|
||||
|
||||
|
||||
sub pkg_frompath {
|
||||
my($sys_where, $path) = @_;
|
||||
|
|
@ -139,27 +135,29 @@ sub man_pref {
|
|||
# 7. sysrel: Prefer a more recent system release over an older release
|
||||
# 8. secorder: Lower sections before higher sections (because man does it this way, for some reason)
|
||||
# 9. pkgdate: Prefer more recent packages (cross-distro)
|
||||
# 10. Fall back on hash comparison, to ensure the result is stable
|
||||
# 10. Fall back on shorthash comparison, to ensure the result is stable
|
||||
|
||||
state $archid = sysbyshort->{arch}{id};
|
||||
state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id};
|
||||
|
||||
tuwf->dbRowi(q{
|
||||
WITH unfiltered AS (
|
||||
SELECT s AS sys, p AS pkg, v AS ver, m AS man
|
||||
FROM man m
|
||||
JOIN package_versions v ON v.id = m.package
|
||||
SELECT m.name, m.section, l.locale, f.shorthash, f.content, f.filename, s AS sys, p AS pkg, v AS ver
|
||||
FROM files f
|
||||
JOIN locales l ON l.id = f.locale
|
||||
JOIN mans m ON m.id = f.man
|
||||
JOIN package_versions v ON v.id = f.pkgver
|
||||
JOIN packages p ON p.id = v.package
|
||||
JOIN systems s ON s.id = p.system
|
||||
WHERE}, $where, q{
|
||||
), f_english AS(
|
||||
SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale)
|
||||
SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale(locale)) OR is_english_locale(locale)
|
||||
), f_pkgver AS(
|
||||
SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released)
|
||||
), f_stdloc AS(
|
||||
SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename)
|
||||
SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location(filename)) OR is_standard_man_location(filename)
|
||||
), f_secmatch AS(
|
||||
SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section =}, \$section, q{) OR (man).section =}, \$section, q{
|
||||
SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE section =}, \$section, q{) OR section =}, \$section, q{
|
||||
), f_arch AS(
|
||||
SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid
|
||||
), f_debian AS(
|
||||
|
|
@ -167,13 +165,13 @@ sub man_pref {
|
|||
), f_sysrel AS(
|
||||
SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id)
|
||||
), f_secorder AS(
|
||||
SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section)
|
||||
SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE section > section)
|
||||
), f_pkgdate AS(
|
||||
SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released)
|
||||
)
|
||||
SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid,
|
||||
(man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash
|
||||
FROM f_pkgdate ORDER BY (man).hash LIMIT 1
|
||||
name, section, filename, locale, shorthash, content
|
||||
FROM f_pkgdate ORDER BY shorthash LIMIT 1
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -562,13 +560,11 @@ sub search_man {
|
|||
my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : '';
|
||||
my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : '';
|
||||
|
||||
return !$name ? [] : tuwf->dbAll(
|
||||
'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?',
|
||||
{
|
||||
'lower(name) LIKE ?' => escape_like(lc $name).'%',
|
||||
$sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (),
|
||||
},
|
||||
$limit
|
||||
return !$name ? [] : tuwf->dbAlli(
|
||||
'SELECT name, section FROM mans WHERE', sql_and(
|
||||
sql('lower(name) LIKE', \(escape_like(lc $name).'%')),
|
||||
$sect ? sql('section ILIKE', \(escape_like(lc $sect).'%')) : (),
|
||||
), 'ORDER BY name, section LIMIT', \$limit,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -614,10 +610,11 @@ TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub {
|
|||
|
||||
my $nfo = tuwf->dbRowi('
|
||||
SELECT m.name, m.section, v.released, c.content
|
||||
FROM man m
|
||||
JOIN package_versions v ON v.id = m.package
|
||||
JOIN contents c ON c.hash = m.hash
|
||||
WHERE m.name =', \$name, 'AND', sql_hash_prefix('m.hash', $hash), '
|
||||
FROM files f
|
||||
JOIN mans m ON m.id = f.man
|
||||
JOIN package_versions v ON v.id = f.pkgver
|
||||
JOIN contents c ON c.id = f.content
|
||||
WHERE m.name =', \$name, 'AND f.shorthash =', \shorthash_to_int($hash), '
|
||||
LIMIT 1'
|
||||
);
|
||||
return tuwf->resNotFound if !$nfo->{name};
|
||||
|
|
@ -633,13 +630,16 @@ sub _man_nav {
|
|||
my($man, $toc) = @_;
|
||||
|
||||
my @sect = map $_->{section}, tuwf->dbAlli(
|
||||
'SELECT DISTINCT section FROM man WHERE name =', \$man->{name}, 'ORDER BY section'
|
||||
'SELECT DISTINCT section FROM mans WHERE name =', \$man->{name}, 'ORDER BY section'
|
||||
)->@*;
|
||||
|
||||
my @lang = map $_->{lang}, tuwf->dbAlli(
|
||||
"SELECT DISTINCT substring(locale from '^[^.]+') AS lang
|
||||
FROM man WHERE name =", \$man->{name}, 'AND section =', \$man->{section}, "
|
||||
ORDER BY substring(locale from '^[^.]+') NULLS FIRST"
|
||||
"SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang
|
||||
FROM files f
|
||||
JOIN mans m ON m.id = f.man
|
||||
JOIN locales l ON l.id = f.locale
|
||||
WHERE m.name =", \$man->{name}, 'AND m.section =', \$man->{section}, "
|
||||
ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST"
|
||||
)->@*;
|
||||
return if !@sect && !@lang && !@$toc;
|
||||
|
||||
|
|
@ -707,7 +707,7 @@ sub soelim {
|
|||
my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : ();
|
||||
$man->{name}
|
||||
# Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database
|
||||
? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')"))
|
||||
? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE id =", \$man->{content}))
|
||||
: ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]"
|
||||
}emg;
|
||||
$src;
|
||||
|
|
@ -727,12 +727,12 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
|
|||
# the same package as the requested man page. Use the man_pref logic here
|
||||
# to deterministically select a good package.
|
||||
my($man, undef) = $shorthash
|
||||
? man_pref undef, sql 'm.name =', \$name, 'AND', sql_hash_prefix 'm.hash', $shorthash
|
||||
? man_pref undef, sql 'm.name =', \$name, 'AND f.shorthash =', shorthash_to_int($shorthash)
|
||||
: man_pref_name $name, 'true';
|
||||
return tuwf->resNotFound() if !$man->{name};
|
||||
|
||||
my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid},
|
||||
tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')");
|
||||
my $content = tuwf->dbRowi('SELECT encode(hash, \'hex\') AS hash, content FROM contents WHERE id =', \$man->{content});
|
||||
my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, $content->{content};
|
||||
my @toc;
|
||||
$fmt =~ s{\n<b>(.+?)<\/b>\n}{
|
||||
push @toc, $1;
|
||||
|
|
@ -741,9 +741,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
|
|||
}eg;
|
||||
|
||||
my $hasversions = tuwf->dbVali(
|
||||
'SELECT 1 FROM man WHERE name =', \$man->{name}, 'AND section =', \$man->{section},
|
||||
'AND locale IS NOT DISTINCT FROM', \$man->{locale},
|
||||
'AND hash <> decode(', \$man->{hash}, ", 'hex') LIMIT 1"
|
||||
'SELECT 1
|
||||
FROM files f
|
||||
JOIN mans m ON m.id = f.man
|
||||
JOIN locales l ON l.id = f.locale
|
||||
WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section}, '
|
||||
AND l.locale =', \$man->{locale}, '
|
||||
AND f.shorthash <> ', \shorthash_to_int($man->{shorthash}), '
|
||||
LIMIT 1'
|
||||
);
|
||||
|
||||
tuwf->resLastMod($man->{released});
|
||||
|
|
@ -751,14 +756,14 @@ TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
|
|||
_man_nav $man, \@toc;
|
||||
div_ id => 'manbuttons', sub {
|
||||
h1_ $man->{name};
|
||||
ul_ 'data-hash' => $man->{hash},
|
||||
ul_ 'data-hash' => $content->{hash},
|
||||
'data-name' => $man->{name},
|
||||
'data-section' => $man->{section},
|
||||
'data-locale' => $man->{locale}||'',
|
||||
'data-hasversions' => $hasversions?1:0,
|
||||
sub {
|
||||
li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8).'/src', 'source' };
|
||||
li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8), 'permalink' };
|
||||
li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}).'/src', 'source' };
|
||||
li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'permalink' };
|
||||
}
|
||||
};
|
||||
div_ id => 'manres', class => 'hidden', '';
|
||||
|
|
@ -828,7 +833,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
|
|||
SELECT id, version, released
|
||||
FROM package_versions v
|
||||
WHERE package =', \$pkg->{id}, '
|
||||
AND EXISTS(SELECT 1 FROM man m WHERE m.package = v.id)
|
||||
AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = v.id)
|
||||
ORDER BY released DESC'
|
||||
);
|
||||
my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0];
|
||||
|
|
@ -836,11 +841,14 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
|
|||
|
||||
my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data;
|
||||
|
||||
my $count = tuwf->dbVali('SELECT count(*) FROM man WHERE package =', \$sel->{id});
|
||||
my $count = tuwf->dbVali('SELECT count(*) FROM files WHERE pkgver =', \$sel->{id});
|
||||
my $mans = tuwf->dbPagei({ results => 200, page => $p },
|
||||
"SELECT name, encode(hash, 'hex') AS hash, section, locale, filename
|
||||
FROM man WHERE package =", \$sel->{id}, '
|
||||
ORDER BY name, locale NULLS FIRST, filename'
|
||||
"SELECT m.name, m.section, f.shorthash, f.filename, l.locale
|
||||
FROM files f
|
||||
JOIN locales l ON l.id = f.locale
|
||||
JOIN mans m ON m.id = f.man
|
||||
WHERE f.pkgver =", \$sel->{id}, '
|
||||
ORDER BY m.name, l.locale, f.filename'
|
||||
);
|
||||
|
||||
# Latest version of this package determines last modification date of the page.
|
||||
|
|
@ -871,7 +879,7 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
|
|||
paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p;
|
||||
ul_ sub {
|
||||
li_ sub {
|
||||
a_ href => "/$_->{name}/".substr($_->{hash},0,8), "$_->{name}($_->{section})";
|
||||
a_ href => "/$_->{name}/".shorthash_to_hex($_->{shorthash}), "$_->{name}($_->{section})";
|
||||
b_ " $_->{locale}" if $_->{locale};
|
||||
small_ " $_->{filename}";
|
||||
} for(@$mans);
|
||||
|
|
@ -922,7 +930,7 @@ TUWF::get qr{/man/([^/]+)/(.+)} => sub {
|
|||
}
|
||||
return tuwf->resNotFound if !$man;
|
||||
|
||||
tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp');
|
||||
tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp');
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -932,9 +940,9 @@ TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub {
|
|||
my $lang = tuwf->capture(1);
|
||||
my $name = normalize_name tuwf->capture(2);
|
||||
my($man, undef) = man_pref_name $name,
|
||||
sql "substring(locale from '^[^.]+') ilike", \escape_like $lang;
|
||||
return tuwf->resNotFound if !$man->{name};
|
||||
tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp');
|
||||
sql "substring(l.locale from '^[^.]+') ilike", \(escape_like($lang).'%');
|
||||
return tuwf->resNotFound if !length $man->{name};
|
||||
tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp');
|
||||
};
|
||||
|
||||
|
||||
|
|
@ -949,22 +957,24 @@ TUWF::get '/json/tree.json' => sub {
|
|||
return tuwf->resNotFound() if !$f->{hash} && !($f->{section} && $f->{name});
|
||||
|
||||
my $l = tuwf->dbAlli("
|
||||
SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, m.filename, m.locale, encode(m.hash, 'hex') AS hash
|
||||
FROM man m
|
||||
JOIN package_versions v ON v.id = m.package
|
||||
SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, f.filename, f.shorthash, l.locale
|
||||
FROM files f
|
||||
JOIN locales l ON l.id = f.locale
|
||||
JOIN mans m ON m.id = f.man
|
||||
JOIN package_versions v ON v.id = f.pkgver
|
||||
JOIN packages p ON p.id = v.package
|
||||
JOIN systems s ON s.id = p.system
|
||||
WHERE", sql_and(
|
||||
length $f->{hash} ? sql 'm.hash = decode(', \$f->{hash}, ", 'hex')" : (),
|
||||
length $f->{hash} ? sql 'f.content = (SELECT id FROM contents WHERE hash = decode(', \$f->{hash}, ", 'hex'))" : (),
|
||||
length $f->{name} ? sql 'm.name =', \$f->{name} : (),
|
||||
length $f->{section} ? sql 'm.section =', \$f->{section} : (),
|
||||
length $f->{locale} ? sql 'm.locale =', \$f->{locale} : (),
|
||||
defined $f->{locale} && $f->{locale} eq '' ? 'm.locale IS NULL' : (),
|
||||
defined $f->{locale} ? sql 'l.locale =', \$f->{locale} : (),
|
||||
), '
|
||||
ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, m.locale NULLS FIRST, m.filename
|
||||
ORDER BY s.name, s.id DESC, p.name, v.released DESC, m.name, l.locale, f.filename
|
||||
');
|
||||
|
||||
# Convert the list into a tree
|
||||
my $cur = $f->{cur} ? shorthash_to_int substr $f->{cur}, 0, 8 : 0;
|
||||
my $tree = [];
|
||||
my($sys, $sysver, $pkg, $pkgver);
|
||||
for my $m (@$l) {
|
||||
|
|
@ -992,12 +1002,12 @@ TUWF::get '/json/tree.json' => sub {
|
|||
$pkgver && $pkgver eq $m->{version} ? {name=>''} :
|
||||
{name => $m->{version}, href => "/pkg/".sysbyid->{$m->{system}}{short}."/$m->{category}/$m->{package}/$m->{version}"},
|
||||
{ name => "$m->{name}($m->{section})",
|
||||
$f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? ()
|
||||
: (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8))
|
||||
$f->{hash} || $cur == $m->{shorthash} ? ()
|
||||
: (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash}))
|
||||
},
|
||||
{ name => substr($m->{hash}, 0, 8),
|
||||
$f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? ()
|
||||
: (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8))
|
||||
{ name => shorthash_to_hex($m->{shorthash}),
|
||||
$f->{hash} || $cur == $m->{shorthash} ? ()
|
||||
: (href => sprintf('/%s/%s', $m->{name}, shorthash_to_hex $m->{shorthash}))
|
||||
},
|
||||
{ name => $m->{filename} }
|
||||
];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue