SQL: Add packages.c_hasman cache to speed up package listings
Going from average ~100ms to ~10ms or so. The previous query had a tendency to be much slower sometimes, let's see if this cache also takes care of those outliers. Migration script: ALTER TABLE packages ADD COLUMN c_hasman boolean NOT NULL DEFAULT FALSE; DROP INDEX packages_system_name_key; CREATE UNIQUE INDEX packages_system_name_key ON packages (system, name) INCLUDE (id, c_hasman, dead); UPDATE packages SET c_hasman = NOT c_hasman WHERE c_hasman <> EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = packages.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id));
This commit is contained in:
parent
5d56bded66
commit
1ee5c9c2df
4 changed files with 34 additions and 15 deletions
|
|
@ -47,7 +47,7 @@ pub struct PkgOpt<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
|
fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<(i32,i32)> {
|
||||||
let pkginfo = format!("sys {} / {} - {} @ {:?} @ {}", opt.sys, opt.pkg, opt.ver, opt.date, opt.file.path);
|
let pkginfo = format!("sys {} / {} - {} @ {:?} @ {}", opt.sys, opt.pkg, opt.ver, opt.date, opt.file.path);
|
||||||
|
|
||||||
// Use a custom CTE-based insert-or-update. Using an INSERT with an ON CONFLICT clause would be
|
// Use a custom CTE-based insert-or-update. Using an INSERT with an ON CONFLICT clause would be
|
||||||
|
|
@ -80,14 +80,14 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
|
||||||
let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id";
|
let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id";
|
||||||
verid = tr.query_one(q, &[&pkgid, &opt.ver, &date, &opt.arch]).unwrap().get(0);
|
verid = tr.query_one(q, &[&pkgid, &opt.ver, &date, &opt.arch]).unwrap().get(0);
|
||||||
info!("New package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
info!("New package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
||||||
Some(verid)
|
Some((pkgid,verid))
|
||||||
|
|
||||||
} else if opt.force {
|
} else if opt.force {
|
||||||
// XXX: Should we update released & arch here?
|
// XXX: Should we update released & arch here?
|
||||||
verid = res?.get(0);
|
verid = res?.get(0);
|
||||||
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
||||||
tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap();
|
tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap();
|
||||||
Some(verid)
|
Some((pkgid,verid))
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
debug!("Package already in database, pkgid {} verid {}, {}", pkgid, res?.get::<usize,i32>(0), pkginfo);
|
debug!("Package already in database, pkgid {} verid {}, {}", pkgid, res?.get::<usize,i32>(0), pkginfo);
|
||||||
|
|
@ -187,10 +187,12 @@ fn with_pkg<F,T>(opt: &mut PkgOpt, cb: F) -> std::io::Result<T>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, verid: i32) -> std::io::Result<()> {
|
fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, pkgid: i32, verid: i32) -> std::io::Result<()> {
|
||||||
|
let mut hasman = false;
|
||||||
let missed = with_pkg(&mut opt, |e, opt| {
|
let missed = with_pkg(&mut opt, |e, opt| {
|
||||||
archread::FileList::read(e, man::ismanpath, |ent| opt.date.update(ent), |paths, ent| {
|
archread::FileList::read(e, man::ismanpath, |ent| opt.date.update(ent), |paths, ent| {
|
||||||
insert_man(tr, verid, paths, ent);
|
insert_man(tr, verid, paths, ent);
|
||||||
|
hasman = true;
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
})?.links(|src, dest| { insert_link(tr, verid, src, dest) });
|
})?.links(|src, dest| { insert_link(tr, verid, src, dest) });
|
||||||
|
|
@ -199,10 +201,15 @@ fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, verid: i32
|
||||||
warn!("Some links were missed, reading package again");
|
warn!("Some links were missed, reading package again");
|
||||||
with_pkg(&mut opt, |e, _| { missed.read(e, |paths, ent| {
|
with_pkg(&mut opt, |e, _| { missed.read(e, |paths, ent| {
|
||||||
insert_man(tr, verid, paths, ent);
|
insert_man(tr, verid, paths, ent);
|
||||||
|
hasman = true;
|
||||||
Ok(())
|
Ok(())
|
||||||
}) })?
|
}) })?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if hasman {
|
||||||
|
tr.execute("UPDATE packages SET c_hasman = true WHERE NOT c_hasman AND id = $1", &[&pkgid]).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
match opt.date {
|
match opt.date {
|
||||||
Date::Known(_) => Ok(()),
|
Date::Known(_) => Ok(()),
|
||||||
Date::Found(t) | Date::MaxVal(t) => {
|
Date::Found(t) | Date::MaxVal(t) => {
|
||||||
|
|
@ -219,12 +226,12 @@ fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, verid: i32
|
||||||
pub fn pkg<T>(conn: &mut T, opt: PkgOpt) where T: postgres::GenericClient {
|
pub fn pkg<T>(conn: &mut T, opt: PkgOpt) where T: postgres::GenericClient {
|
||||||
let mut tr = conn.transaction().unwrap();
|
let mut tr = conn.transaction().unwrap();
|
||||||
|
|
||||||
let verid = match insert_pkg(&mut tr, &opt) { Some(x) => x, None => return };
|
let (pkgid,verid) = match insert_pkg(&mut tr, &opt) { Some(x) => x, None => return };
|
||||||
if unsafe { DRY_RUN } {
|
if unsafe { DRY_RUN } {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Err(e) = index_pkg(&mut tr, opt, verid) {
|
if let Err(e) = index_pkg(&mut tr, opt, pkgid, verid) {
|
||||||
error!("Error reading package: {}", e);
|
error!("Error reading package: {}", e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,12 @@ CREATE TABLE packages (
|
||||||
-- Packages where the latest version does not have any man pages may also be
|
-- Packages where the latest version does not have any man pages may also be
|
||||||
-- marked as dead even if the package is still available in the repos.
|
-- marked as dead even if the package is still available in the repos.
|
||||||
dead boolean NOT NULL DEFAULT FALSE,
|
dead boolean NOT NULL DEFAULT FALSE,
|
||||||
UNIQUE(system, name)
|
-- Whether this package has at least one man page indexed in the database.
|
||||||
|
-- The indexer uses this table to keep track of which packages it has
|
||||||
|
-- already indexed, but not all packages seen by the indexer have a man page.
|
||||||
|
-- This cache helps the web front-end filter out irrelevant packages faster.
|
||||||
|
c_hasman boolean NOT NULL DEFAULT FALSE,
|
||||||
|
UNIQUE(system, name) INCLUDE (id, c_hasman, dead)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,3 +9,13 @@ CREATE TABLE stats_cache_new AS
|
||||||
DROP TABLE stats_cache;
|
DROP TABLE stats_cache;
|
||||||
ALTER TABLE stats_cache_new RENAME TO stats_cache;
|
ALTER TABLE stats_cache_new RENAME TO stats_cache;
|
||||||
COMMIT;
|
COMMIT;
|
||||||
|
|
||||||
|
|
||||||
|
-- Update c_hasman.
|
||||||
|
-- This query is commented out because the indexer will take care to set the
|
||||||
|
-- c_hasman column automatically. It's included here as "documentation" so it
|
||||||
|
-- can be run manually when package versions or man pages are removed from the
|
||||||
|
-- database.
|
||||||
|
--
|
||||||
|
-- UPDATE packages SET c_hasman = NOT c_hasman
|
||||||
|
-- WHERE c_hasman <> EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = packages.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id));
|
||||||
|
|
|
||||||
13
www/index.pl
13
www/index.pl
|
|
@ -77,9 +77,6 @@ sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg }
|
||||||
sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex
|
sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex
|
||||||
sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int
|
sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int
|
||||||
|
|
||||||
# Subquery returning all packages that have a man page.
|
|
||||||
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))';
|
|
||||||
|
|
||||||
sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg }
|
sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg }
|
||||||
|
|
||||||
sub sql_join {
|
sub sql_join {
|
||||||
|
|
@ -111,8 +108,8 @@ sub pkg_frompath {
|
||||||
|
|
||||||
my $pkg = tuwf->dbRowi('
|
my $pkg = tuwf->dbRowi('
|
||||||
SELECT id, system, name
|
SELECT id, system, name
|
||||||
FROM', $packages_with_man, 'p
|
FROM packages p
|
||||||
WHERE', $sys_where, 'AND name IN', \@names, '
|
WHERE c_hasman AND', $sys_where, 'AND name IN', \@names, '
|
||||||
ORDER BY system DESC, length(name) DESC
|
ORDER BY system DESC, length(name) DESC
|
||||||
LIMIT 1
|
LIMIT 1
|
||||||
');
|
');
|
||||||
|
|
@ -928,10 +925,10 @@ TUWF::get qr{/pkg/([^/]+)} => sub {
|
||||||
p => { onerror => 1, uint => 1, range => [1,200] },
|
p => { onerror => 1, uint => 1, range => [1,200] },
|
||||||
)->data;
|
)->data;
|
||||||
|
|
||||||
my $where = sql 'NOT dead AND system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : ();
|
my $where = sql 'c_hasman AND NOT dead AND system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : ();
|
||||||
my $count = tuwf->dbVali('SELECT count(*) FROM', $packages_with_man, 'p WHERE', $where);
|
my $count = tuwf->dbVali('SELECT count(*) FROM packages p WHERE', $where);
|
||||||
my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} },
|
my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} },
|
||||||
'SELECT id, system, name FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name'
|
'SELECT id, system, name FROM packages p WHERE', $where, 'ORDER BY name'
|
||||||
);
|
);
|
||||||
|
|
||||||
framework_ title => $sys->{full}, mainclass => 'pkglist', sub {
|
framework_ title => $sys->{full}, mainclass => 'pkglist', sub {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue