diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs index 9993ecf..07d16c7 100644 --- a/indexer/src/pkg.rs +++ b/indexer/src/pkg.rs @@ -47,7 +47,7 @@ pub struct PkgOpt<'a> { } -fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option { +fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<(i32,i32)> { let pkginfo = format!("sys {} / {} - {} @ {:?} @ {}", opt.sys, opt.pkg, opt.ver, opt.date, opt.file.path); // Use a custom CTE-based insert-or-update. Using an INSERT with an ON CONFLICT clause would be @@ -80,14 +80,14 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option { let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id"; verid = tr.query_one(q, &[&pkgid, &opt.ver, &date, &opt.arch]).unwrap().get(0); info!("New package pkgid {} verid {}, {}", pkgid, verid, pkginfo); - Some(verid) + Some((pkgid,verid)) } else if opt.force { // XXX: Should we update released & arch here? verid = res?.get(0); info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo); tr.query("DELETE FROM files WHERE pkgver = $1", &[&verid]).unwrap(); - Some(verid) + Some((pkgid,verid)) } else { debug!("Package already in database, pkgid {} verid {}, {}", pkgid, res?.get::(0), pkginfo); @@ -187,10 +187,12 @@ fn with_pkg(opt: &mut PkgOpt, cb: F) -> std::io::Result } -fn index_pkg(tr: &mut T, mut opt: PkgOpt, verid: i32) -> std::io::Result<()> { +fn index_pkg(tr: &mut T, mut opt: PkgOpt, pkgid: i32, verid: i32) -> std::io::Result<()> { + let mut hasman = false; let missed = with_pkg(&mut opt, |e, opt| { archread::FileList::read(e, man::ismanpath, |ent| opt.date.update(ent), |paths, ent| { insert_man(tr, verid, paths, ent); + hasman = true; Ok(()) }) })?.links(|src, dest| { insert_link(tr, verid, src, dest) }); @@ -199,10 +201,15 @@ fn index_pkg(tr: &mut T, mut opt: PkgOpt, verid: i32 warn!("Some links were missed, reading package again"); with_pkg(&mut opt, |e, _| { missed.read(e, |paths, ent| { insert_man(tr, verid, paths, ent); + hasman = true; Ok(()) }) })? } + if hasman { + tr.execute("UPDATE packages SET c_hasman = true WHERE NOT c_hasman AND id = $1", &[&pkgid]).unwrap(); + } + match opt.date { Date::Known(_) => Ok(()), Date::Found(t) | Date::MaxVal(t) => { @@ -219,12 +226,12 @@ fn index_pkg(tr: &mut T, mut opt: PkgOpt, verid: i32 pub fn pkg(conn: &mut T, opt: PkgOpt) where T: postgres::GenericClient { let mut tr = conn.transaction().unwrap(); - let verid = match insert_pkg(&mut tr, &opt) { Some(x) => x, None => return }; + let (pkgid,verid) = match insert_pkg(&mut tr, &opt) { Some(x) => x, None => return }; if unsafe { DRY_RUN } { return; } - if let Err(e) = index_pkg(&mut tr, opt, verid) { + if let Err(e) = index_pkg(&mut tr, opt, pkgid, verid) { error!("Error reading package: {}", e); return; } diff --git a/schema.sql b/schema.sql index 0cbe18f..785c81d 100644 --- a/schema.sql +++ b/schema.sql @@ -56,7 +56,12 @@ CREATE TABLE packages ( -- Packages where the latest version does not have any man pages may also be -- marked as dead even if the package is still available in the repos. dead boolean NOT NULL DEFAULT FALSE, - UNIQUE(system, name) + -- Whether this package has at least one man page indexed in the database. + -- The indexer uses this table to keep track of which packages it has + -- already indexed, but not all packages seen by the indexer have a man page. + -- This cache helps the web front-end filter out irrelevant packages faster. + c_hasman boolean NOT NULL DEFAULT FALSE, + UNIQUE(system, name) INCLUDE (id, c_hasman, dead) ); diff --git a/util/update_indices.sql b/util/update_indices.sql index b977d8a..d1f510d 100644 --- a/util/update_indices.sql +++ b/util/update_indices.sql @@ -9,3 +9,13 @@ CREATE TABLE stats_cache_new AS DROP TABLE stats_cache; ALTER TABLE stats_cache_new RENAME TO stats_cache; COMMIT; + + +-- Update c_hasman. +-- This query is commented out because the indexer will take care to set the +-- c_hasman column automatically. It's included here as "documentation" so it +-- can be run manually when package versions or man pages are removed from the +-- database. +-- +-- UPDATE packages SET c_hasman = NOT c_hasman +-- WHERE c_hasman <> EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = packages.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)); diff --git a/www/index.pl b/www/index.pl index 71bcf31..ef3ba90 100755 --- a/www/index.pl +++ b/www/index.pl @@ -77,9 +77,6 @@ sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg } sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int -# Subquery returning all packages that have a man page. -my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))'; - sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg } sub sql_join { @@ -111,8 +108,8 @@ sub pkg_frompath { my $pkg = tuwf->dbRowi(' SELECT id, system, name - FROM', $packages_with_man, 'p - WHERE', $sys_where, 'AND name IN', \@names, ' + FROM packages p + WHERE c_hasman AND', $sys_where, 'AND name IN', \@names, ' ORDER BY system DESC, length(name) DESC LIMIT 1 '); @@ -928,10 +925,10 @@ TUWF::get qr{/pkg/([^/]+)} => sub { p => { onerror => 1, uint => 1, range => [1,200] }, )->data; - my $where = sql 'NOT dead AND system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : (); - my $count = tuwf->dbVali('SELECT count(*) FROM', $packages_with_man, 'p WHERE', $where); + my $where = sql 'c_hasman AND NOT dead AND system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : (); + my $count = tuwf->dbVali('SELECT count(*) FROM packages p WHERE', $where); my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} }, - 'SELECT id, system, name FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name' + 'SELECT id, system, name FROM packages p WHERE', $where, 'ORDER BY name' ); framework_ title => $sys->{full}, mainclass => 'pkglist', sub {