Large-ish SQL schema revamp/optimizations

Primarily aimed at reducing the size of the old 'man' (now: files)
table, using smaller integers to refer to man contents and text fields,
and storing a shorthash as an integer for quick lookups. This better
normalization also removes the need to keep a separate 'man_index' cache
for the search function.

The old schema wasn't necessarily bad, but I was in the mood for some
optimizations. And a little cleanup.

Prolly introduces a bunch of new bugs, I haven't tested this too well.
This commit is contained in:
Yorhel 2021-12-14 15:06:05 +01:00
parent 6f7f59c6df
commit f376f1f137
6 changed files with 268 additions and 128 deletions

View file

@ -87,7 +87,7 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
// XXX: Should we update released & arch here?
verid = res?.get(0);
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap();
Some(verid)
} else {
@ -97,13 +97,21 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
}
fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, hash: &[u8]) {
fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, content: i32) {
let (name, sect, locale) = man::parse_path(path).unwrap();
let locale = if locale == "" { None } else { Some(locale) };
if let Err(e) = tr.execute(
"INSERT INTO man (package, name, filename, locale, hash, section, encoding) VALUES ($1, $2, '/'||$3, $4, $5, $6, $7)",
&[&verid, &name, &path, &locale, &hash, &sect, &enc]
) {
let q = "WITH ms(id) AS (SELECT id FROM mans WHERE name = $2 AND section = $3),
mi(id) AS (INSERT INTO mans (name, section) SELECT $2, $3 WHERE NOT EXISTS(SELECT 1 FROM ms) RETURNING id),
m(id) AS (SELECT id FROM ms UNION SELECT id FROM mi),
ls(id) AS (SELECT id FROM locales WHERE locale = $5),
li(id) AS (INSERT INTO locales (locale) SELECT $5 WHERE NOT EXISTS(SELECT 1 FROM ls) RETURNING id),
l(id) AS (SELECT id FROM ls UNION SELECT id FROM li),
es(id) AS (SELECT id FROM encodings WHERE encoding = $6),
ei(id) AS (INSERT INTO encodings (encoding) SELECT $6 WHERE NOT EXISTS(SELECT 1 FROM es) RETURNING id),
e(id) AS (SELECT id FROM es UNION SELECT id FROM ei),
c(shorthash) AS (SELECT hash_to_shorthash(hash) FROM contents WHERE id = $4)
INSERT INTO files (pkgver, man, content, shorthash, locale, encoding, filename)
SELECT $1, m.id, $4, c.shorthash, l.id, e.id, '/'||$7 FROM m, l, e, c";
if let Err(e) = tr.execute(q, &[&verid, &name, &sect, &content, &locale, &enc, &path]) {
// I think this can only happen if archread gives us the same file twice, which really
// shouldn't happen. But I'd rather continue with an error logged than panic.
error!("Can't insert verid {} fn {}: {}", verid, path, e);
@ -123,29 +131,30 @@ fn insert_man<T: postgres::GenericClient>(tr: &mut T, verid: i32, paths: &[&str]
cont = cont.replace(0 as char, "");
}
tr.execute(
"INSERT INTO contents (hash, content) VALUES($1, $2) ON CONFLICT (hash) DO NOTHING",
&[&dig.as_ref(), &cont]
).unwrap();
let q = "WITH s(id) AS (SELECT id FROM contents WHERE hash = $1),
i(id) AS (INSERT INTO contents (hash, content) SELECT $1, $2 WHERE NOT EXISTS(SELECT 1 FROM s) RETURNING id)
SELECT id FROM s UNION SELECT id FROM i";
let id: i32 = tr.query_one(q, &[&dig.as_ref(), &cont]).unwrap().get(0);
for path in paths {
insert_man_row(tr, verid, path, enc, dig.as_ref());
insert_man_row(tr, verid, path, enc, id);
info!("Inserted man page: {} ({})", path, enc);
}
}
fn insert_link<T>(tr: &mut T, verid: i32, src: &str, dest: &str) where T: postgres::GenericClient {
let res = match tr.query_opt("SELECT hash, encoding FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap() {
let q = "SELECT f.content, e.encoding FROM files f JOIN encodings e ON e.id = f.encoding WHERE pkgver = $1 AND filename = '/'||$2";
let res = match tr.query_opt(q, &[&verid, &dest]).unwrap() {
None => { /* Can happen if man::decode() failed previously. */
error!("Link to unindexed man page: {} -> {}", src, dest);
return;
},
Some(x) => x
};
let hash: Vec<u8> = res.get(0);
let content: i32 = res.get(0);
let enc: String = res.get(1);
insert_man_row(tr, verid, src, &enc, &hash);
insert_man_row(tr, verid, src, &enc, content);
info!("Inserted man link: {} -> {}", src, dest);
}