Large-ish SQL schema revamp/optimizations
Primarily aimed at reducing the size of the old 'man' (now: files) table, using smaller integers to refer to man contents and text fields, and storing a shorthash as an integer for quick lookups. This better normalization also removes the need to keep a separate 'man_index' cache for the search function. The old schema wasn't necessarily bad, but I was in the mood for some optimizations. And a little cleanup. Prolly introduces a bunch of new bugs, I haven't tested this too well.
This commit is contained in:
parent
6f7f59c6df
commit
f376f1f137
6 changed files with 268 additions and 128 deletions
|
|
@ -87,7 +87,7 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
|
|||
// XXX: Should we update released & arch here?
|
||||
verid = res?.get(0);
|
||||
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
||||
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
|
||||
tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap();
|
||||
Some(verid)
|
||||
|
||||
} else {
|
||||
|
|
@ -97,13 +97,21 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
|
|||
}
|
||||
|
||||
|
||||
fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, hash: &[u8]) {
|
||||
fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, content: i32) {
|
||||
let (name, sect, locale) = man::parse_path(path).unwrap();
|
||||
let locale = if locale == "" { None } else { Some(locale) };
|
||||
if let Err(e) = tr.execute(
|
||||
"INSERT INTO man (package, name, filename, locale, hash, section, encoding) VALUES ($1, $2, '/'||$3, $4, $5, $6, $7)",
|
||||
&[&verid, &name, &path, &locale, &hash, §, &enc]
|
||||
) {
|
||||
let q = "WITH ms(id) AS (SELECT id FROM mans WHERE name = $2 AND section = $3),
|
||||
mi(id) AS (INSERT INTO mans (name, section) SELECT $2, $3 WHERE NOT EXISTS(SELECT 1 FROM ms) RETURNING id),
|
||||
m(id) AS (SELECT id FROM ms UNION SELECT id FROM mi),
|
||||
ls(id) AS (SELECT id FROM locales WHERE locale = $5),
|
||||
li(id) AS (INSERT INTO locales (locale) SELECT $5 WHERE NOT EXISTS(SELECT 1 FROM ls) RETURNING id),
|
||||
l(id) AS (SELECT id FROM ls UNION SELECT id FROM li),
|
||||
es(id) AS (SELECT id FROM encodings WHERE encoding = $6),
|
||||
ei(id) AS (INSERT INTO encodings (encoding) SELECT $6 WHERE NOT EXISTS(SELECT 1 FROM es) RETURNING id),
|
||||
e(id) AS (SELECT id FROM es UNION SELECT id FROM ei),
|
||||
c(shorthash) AS (SELECT hash_to_shorthash(hash) FROM contents WHERE id = $4)
|
||||
INSERT INTO files (pkgver, man, content, shorthash, locale, encoding, filename)
|
||||
SELECT $1, m.id, $4, c.shorthash, l.id, e.id, '/'||$7 FROM m, l, e, c";
|
||||
if let Err(e) = tr.execute(q, &[&verid, &name, §, &content, &locale, &enc, &path]) {
|
||||
// I think this can only happen if archread gives us the same file twice, which really
|
||||
// shouldn't happen. But I'd rather continue with an error logged than panic.
|
||||
error!("Can't insert verid {} fn {}: {}", verid, path, e);
|
||||
|
|
@ -123,29 +131,30 @@ fn insert_man<T: postgres::GenericClient>(tr: &mut T, verid: i32, paths: &[&str]
|
|||
cont = cont.replace(0 as char, "");
|
||||
}
|
||||
|
||||
tr.execute(
|
||||
"INSERT INTO contents (hash, content) VALUES($1, $2) ON CONFLICT (hash) DO NOTHING",
|
||||
&[&dig.as_ref(), &cont]
|
||||
).unwrap();
|
||||
let q = "WITH s(id) AS (SELECT id FROM contents WHERE hash = $1),
|
||||
i(id) AS (INSERT INTO contents (hash, content) SELECT $1, $2 WHERE NOT EXISTS(SELECT 1 FROM s) RETURNING id)
|
||||
SELECT id FROM s UNION SELECT id FROM i";
|
||||
let id: i32 = tr.query_one(q, &[&dig.as_ref(), &cont]).unwrap().get(0);
|
||||
|
||||
for path in paths {
|
||||
insert_man_row(tr, verid, path, enc, dig.as_ref());
|
||||
insert_man_row(tr, verid, path, enc, id);
|
||||
info!("Inserted man page: {} ({})", path, enc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn insert_link<T>(tr: &mut T, verid: i32, src: &str, dest: &str) where T: postgres::GenericClient {
|
||||
let res = match tr.query_opt("SELECT hash, encoding FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap() {
|
||||
let q = "SELECT f.content, e.encoding FROM files f JOIN encodings e ON e.id = f.encoding WHERE pkgver = $1 AND filename = '/'||$2";
|
||||
let res = match tr.query_opt(q, &[&verid, &dest]).unwrap() {
|
||||
None => { /* Can happen if man::decode() failed previously. */
|
||||
error!("Link to unindexed man page: {} -> {}", src, dest);
|
||||
return;
|
||||
},
|
||||
Some(x) => x
|
||||
};
|
||||
let hash: Vec<u8> = res.get(0);
|
||||
let content: i32 = res.get(0);
|
||||
let enc: String = res.get(1);
|
||||
insert_man_row(tr, verid, src, &enc, &hash);
|
||||
insert_man_row(tr, verid, src, &enc, content);
|
||||
info!("Inserted man link: {} -> {}", src, dest);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue