SQL: Add packages.c_hasman cache to speed up package listings

Going from average ~100ms to ~10ms or so. The previous query had a
tendency to be much slower sometimes, let's see if this cache also takes
care of those outliers.

Migration script:

  ALTER TABLE packages ADD COLUMN c_hasman boolean NOT NULL DEFAULT FALSE;

  DROP INDEX packages_system_name_key;
  CREATE UNIQUE INDEX packages_system_name_key ON packages (system, name) INCLUDE (id, c_hasman, dead);

  UPDATE packages SET c_hasman = NOT c_hasman
   WHERE c_hasman <> EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = packages.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id));
This commit is contained in:
Yorhel 2024-04-29 21:12:54 +02:00
parent 5d56bded66
commit 1ee5c9c2df
4 changed files with 34 additions and 15 deletions

View file

@ -47,7 +47,7 @@ pub struct PkgOpt<'a> {
}
fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<(i32,i32)> {
let pkginfo = format!("sys {} / {} - {} @ {:?} @ {}", opt.sys, opt.pkg, opt.ver, opt.date, opt.file.path);
// Use a custom CTE-based insert-or-update. Using an INSERT with an ON CONFLICT clause would be
@ -80,14 +80,14 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id";
verid = tr.query_one(q, &[&pkgid, &opt.ver, &date, &opt.arch]).unwrap().get(0);
info!("New package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
Some(verid)
Some((pkgid,verid))
} else if opt.force {
// XXX: Should we update released & arch here?
verid = res?.get(0);
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap();
Some(verid)
Some((pkgid,verid))
} else {
debug!("Package already in database, pkgid {} verid {}, {}", pkgid, res?.get::<usize,i32>(0), pkginfo);
@ -187,10 +187,12 @@ fn with_pkg<F,T>(opt: &mut PkgOpt, cb: F) -> std::io::Result<T>
}
fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, verid: i32) -> std::io::Result<()> {
fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, pkgid: i32, verid: i32) -> std::io::Result<()> {
let mut hasman = false;
let missed = with_pkg(&mut opt, |e, opt| {
archread::FileList::read(e, man::ismanpath, |ent| opt.date.update(ent), |paths, ent| {
insert_man(tr, verid, paths, ent);
hasman = true;
Ok(())
})
})?.links(|src, dest| { insert_link(tr, verid, src, dest) });
@ -199,10 +201,15 @@ fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, verid: i32
warn!("Some links were missed, reading package again");
with_pkg(&mut opt, |e, _| { missed.read(e, |paths, ent| {
insert_man(tr, verid, paths, ent);
hasman = true;
Ok(())
}) })?
}
if hasman {
tr.execute("UPDATE packages SET c_hasman = true WHERE NOT c_hasman AND id = $1", &[&pkgid]).unwrap();
}
match opt.date {
Date::Known(_) => Ok(()),
Date::Found(t) | Date::MaxVal(t) => {
@ -219,12 +226,12 @@ fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, verid: i32
pub fn pkg<T>(conn: &mut T, opt: PkgOpt) where T: postgres::GenericClient {
let mut tr = conn.transaction().unwrap();
let verid = match insert_pkg(&mut tr, &opt) { Some(x) => x, None => return };
let (pkgid,verid) = match insert_pkg(&mut tr, &opt) { Some(x) => x, None => return };
if unsafe { DRY_RUN } {
return;
}
if let Err(e) = index_pkg(&mut tr, opt, verid) {
if let Err(e) = index_pkg(&mut tr, opt, pkgid, verid) {
error!("Error reading package: {}", e);
return;
}