SQL: Add packages.c_hasman cache to speed up package listings
Going from average ~100ms to ~10ms or so. The previous query had a tendency to be much slower sometimes, let's see if this cache also takes care of those outliers. Migration script: ALTER TABLE packages ADD COLUMN c_hasman boolean NOT NULL DEFAULT FALSE; DROP INDEX packages_system_name_key; CREATE UNIQUE INDEX packages_system_name_key ON packages (system, name) INCLUDE (id, c_hasman, dead); UPDATE packages SET c_hasman = NOT c_hasman WHERE c_hasman <> EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = packages.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id));
This commit is contained in:
parent
5d56bded66
commit
1ee5c9c2df
4 changed files with 34 additions and 15 deletions
|
|
@ -47,7 +47,7 @@ pub struct PkgOpt<'a> {
|
|||
}
|
||||
|
||||
|
||||
fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
|
||||
fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<(i32,i32)> {
|
||||
let pkginfo = format!("sys {} / {} - {} @ {:?} @ {}", opt.sys, opt.pkg, opt.ver, opt.date, opt.file.path);
|
||||
|
||||
// Use a custom CTE-based insert-or-update. Using an INSERT with an ON CONFLICT clause would be
|
||||
|
|
@ -80,14 +80,14 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
|
|||
let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id";
|
||||
verid = tr.query_one(q, &[&pkgid, &opt.ver, &date, &opt.arch]).unwrap().get(0);
|
||||
info!("New package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
||||
Some(verid)
|
||||
Some((pkgid,verid))
|
||||
|
||||
} else if opt.force {
|
||||
// XXX: Should we update released & arch here?
|
||||
verid = res?.get(0);
|
||||
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
||||
tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap();
|
||||
Some(verid)
|
||||
Some((pkgid,verid))
|
||||
|
||||
} else {
|
||||
debug!("Package already in database, pkgid {} verid {}, {}", pkgid, res?.get::<usize,i32>(0), pkginfo);
|
||||
|
|
@ -187,10 +187,12 @@ fn with_pkg<F,T>(opt: &mut PkgOpt, cb: F) -> std::io::Result<T>
|
|||
}
|
||||
|
||||
|
||||
fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, verid: i32) -> std::io::Result<()> {
|
||||
fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, pkgid: i32, verid: i32) -> std::io::Result<()> {
|
||||
let mut hasman = false;
|
||||
let missed = with_pkg(&mut opt, |e, opt| {
|
||||
archread::FileList::read(e, man::ismanpath, |ent| opt.date.update(ent), |paths, ent| {
|
||||
insert_man(tr, verid, paths, ent);
|
||||
hasman = true;
|
||||
Ok(())
|
||||
})
|
||||
})?.links(|src, dest| { insert_link(tr, verid, src, dest) });
|
||||
|
|
@ -199,10 +201,15 @@ fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, verid: i32
|
|||
warn!("Some links were missed, reading package again");
|
||||
with_pkg(&mut opt, |e, _| { missed.read(e, |paths, ent| {
|
||||
insert_man(tr, verid, paths, ent);
|
||||
hasman = true;
|
||||
Ok(())
|
||||
}) })?
|
||||
}
|
||||
|
||||
if hasman {
|
||||
tr.execute("UPDATE packages SET c_hasman = true WHERE NOT c_hasman AND id = $1", &[&pkgid]).unwrap();
|
||||
}
|
||||
|
||||
match opt.date {
|
||||
Date::Known(_) => Ok(()),
|
||||
Date::Found(t) | Date::MaxVal(t) => {
|
||||
|
|
@ -219,12 +226,12 @@ fn index_pkg<T: postgres::GenericClient>(tr: &mut T, mut opt: PkgOpt, verid: i32
|
|||
pub fn pkg<T>(conn: &mut T, opt: PkgOpt) where T: postgres::GenericClient {
|
||||
let mut tr = conn.transaction().unwrap();
|
||||
|
||||
let verid = match insert_pkg(&mut tr, &opt) { Some(x) => x, None => return };
|
||||
let (pkgid,verid) = match insert_pkg(&mut tr, &opt) { Some(x) => x, None => return };
|
||||
if unsafe { DRY_RUN } {
|
||||
return;
|
||||
}
|
||||
|
||||
if let Err(e) = index_pkg(&mut tr, opt, verid) {
|
||||
if let Err(e) = index_pkg(&mut tr, opt, pkgid, verid) {
|
||||
error!("Error reading package: {}", e);
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,12 @@ CREATE TABLE packages (
|
|||
-- Packages where the latest version does not have any man pages may also be
|
||||
-- marked as dead even if the package is still available in the repos.
|
||||
dead boolean NOT NULL DEFAULT FALSE,
|
||||
UNIQUE(system, name)
|
||||
-- Whether this package has at least one man page indexed in the database.
|
||||
-- The indexer uses this table to keep track of which packages it has
|
||||
-- already indexed, but not all packages seen by the indexer have a man page.
|
||||
-- This cache helps the web front-end filter out irrelevant packages faster.
|
||||
c_hasman boolean NOT NULL DEFAULT FALSE,
|
||||
UNIQUE(system, name) INCLUDE (id, c_hasman, dead)
|
||||
);
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,3 +9,13 @@ CREATE TABLE stats_cache_new AS
|
|||
DROP TABLE stats_cache;
|
||||
ALTER TABLE stats_cache_new RENAME TO stats_cache;
|
||||
COMMIT;
|
||||
|
||||
|
||||
-- Update c_hasman.
|
||||
-- This query is commented out because the indexer will take care to set the
|
||||
-- c_hasman column automatically. It's included here as "documentation" so it
|
||||
-- can be run manually when package versions or man pages are removed from the
|
||||
-- database.
|
||||
--
|
||||
-- UPDATE packages SET c_hasman = NOT c_hasman
|
||||
-- WHERE c_hasman <> EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = packages.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id));
|
||||
|
|
|
|||
13
www/index.pl
13
www/index.pl
|
|
@ -77,9 +77,6 @@ sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg }
|
|||
sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex
|
||||
sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int
|
||||
|
||||
# Subquery returning all packages that have a man page.
|
||||
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))';
|
||||
|
||||
sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg }
|
||||
|
||||
sub sql_join {
|
||||
|
|
@ -111,8 +108,8 @@ sub pkg_frompath {
|
|||
|
||||
my $pkg = tuwf->dbRowi('
|
||||
SELECT id, system, name
|
||||
FROM', $packages_with_man, 'p
|
||||
WHERE', $sys_where, 'AND name IN', \@names, '
|
||||
FROM packages p
|
||||
WHERE c_hasman AND', $sys_where, 'AND name IN', \@names, '
|
||||
ORDER BY system DESC, length(name) DESC
|
||||
LIMIT 1
|
||||
');
|
||||
|
|
@ -928,10 +925,10 @@ TUWF::get qr{/pkg/([^/]+)} => sub {
|
|||
p => { onerror => 1, uint => 1, range => [1,200] },
|
||||
)->data;
|
||||
|
||||
my $where = sql 'NOT dead AND system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : ();
|
||||
my $count = tuwf->dbVali('SELECT count(*) FROM', $packages_with_man, 'p WHERE', $where);
|
||||
my $where = sql 'c_hasman AND NOT dead AND system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : ();
|
||||
my $count = tuwf->dbVali('SELECT count(*) FROM packages p WHERE', $where);
|
||||
my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} },
|
||||
'SELECT id, system, name FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name'
|
||||
'SELECT id, system, name FROM packages p WHERE', $where, 'ORDER BY name'
|
||||
);
|
||||
|
||||
framework_ title => $sys->{full}, mainclass => 'pkglist', sub {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue