Arch: Mark deleted packages as dead and hide them from listings

We've got a lot of packages in the DB that have long been removed from
the Arch repos. These are still indexed, but won't clutter the package
listing anymore.

Also fixed an issue with packages.id numbers getting rather large
because the indexer allocates a new ID for every package on every
update.
This commit is contained in:
Yorhel 2021-12-13 08:18:13 +01:00
parent f3323de5e4
commit b27d55215a
5 changed files with 50 additions and 7 deletions

View file

@ -51,10 +51,14 @@ pub struct PkgOpt<'a> {
fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> { fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
let pkginfo = format!("sys {} / {} / {} - {} @ {:?} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path); let pkginfo = format!("sys {} / {} / {} - {} @ {:?} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path);
// The ON CONFLICT .. DO UPDATE is used instead of DO NOTHING because in that case the // Use a custom CTE-based insert-or-update. Using an INSERT with an ON CONFLICT clause would be
// RETURNING clause wouldn't give us a package id. // easier, but has the downside of allocating a new package id even if one already exists.
let q = "INSERT INTO packages (system, category, name) VALUES($1, $2, $3) // The separate UPDATE query makes sure to unflag the package as dead while not causing any
ON CONFLICT ON CONSTRAINT packages_system_name_category_key DO UPDATE SET name=$3 RETURNING id"; // database writes when the row's already fine.
let q = "WITH p(id) AS (SELECT id FROM packages WHERE system = $1 AND category = $2 AND name = $3),
u AS (UPDATE packages SET dead = FALSE FROM p WHERE packages.id = p.id AND dead),
i(id) AS (INSERT INTO packages (system, category, name) SELECT $1, $2, $3 WHERE NOT EXISTS(SELECT 1 FROM p) RETURNING id)
SELECT id FROM p UNION SELECT id FROM i";
let pkgid: i32 = match tr.query_one(q, &[&opt.sys, &opt.cat, &opt.pkg]) { let pkgid: i32 = match tr.query_one(q, &[&opt.sys, &opt.cat, &opt.pkg]) {
Err(e) => { Err(e) => {
error!("Can't insert package in database: {}", e); error!("Can't insert package in database: {}", e);

View file

@ -1,5 +1,6 @@
use std::str::FromStr; use std::str::FromStr;
use std::io::{Read,BufRead,BufReader,Result}; use std::io::{Read,BufRead,BufReader,Result};
use std::collections::HashSet;
use regex::Regex; use regex::Regex;
use chrono::NaiveDateTime; use chrono::NaiveDateTime;
use postgres; use postgres;
@ -91,6 +92,7 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str, repo
let mut hasman = false; let mut hasman = false;
let mut meta = None; let mut meta = None;
let mut allpkgs = HashSet::new();
let r = archive::walk(ent, |x| { let r = archive::walk(ent, |x| {
if x.filetype() == archive::FileType::Directory { if x.filetype() == archive::FileType::Directory {
hasman = false; hasman = false;
@ -120,6 +122,7 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str, repo
canbelocal: false, canbelocal: false,
}, },
}); });
allpkgs.insert(m.name.into_boxed_str());
} }
Ok(true) Ok(true)
@ -128,4 +131,30 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str, repo
if let Err(e) = r { if let Err(e) = r {
error!("Error reading package index: {}", e); error!("Error reading package index: {}", e);
} }
mark_dead(pg, sys, repo, allpkgs);
}
fn mark_dead<T: postgres::GenericClient>(pg: &mut T, sys: i32, repo: &str, pkgs: HashSet<Box<str>>) {
let mut dead = Vec::new();
for row in pg.query("SELECT id, name FROM packages WHERE system = $1 AND category = $2 AND NOT dead", &[&sys,&repo]).unwrap() {
let id: i32 = row.get(0);
let name: &str = row.get(1);
if !pkgs.contains(name) {
info!("Package not available in database anymore, marking dead; sys {} / {} / pkg {} ({})", sys, repo, id, name);
dead.push(id);
}
}
if dead.is_empty() {
return;
}
let mut tr = pg.transaction().unwrap();
let q = tr.prepare("UPDATE packages SET dead = TRUE WHERE id = $1").unwrap();
for id in dead {
tr.execute(&q, &[&id]).unwrap();
}
if let Err(e) = tr.commit() {
error!("Error finishing transaction: {}", e);
}
} }

View file

@ -19,8 +19,15 @@ CREATE TABLE contents (
CREATE TABLE packages ( CREATE TABLE packages (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE, system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE,
category varchar, category varchar NOT NULL,
name varchar NOT NULL, name varchar NOT NULL,
-- Whether this package has been seen in the last repository update. This
-- field is only updated for a few systems that are likely to delete packages
-- over time; non-rolling-release distros tend to not delete packages after
-- all.
-- Packages where the latest version does not have any man pages may also be
-- marked as dead even if the package is still available in the repos.
dead boolean NOT NULL DEFAULT FALSE,
UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common
); );

View file

@ -0,0 +1,3 @@
ALTER TABLE packages
ALTER COLUMN category SET NOT NULL,
ADD COLUMN dead boolean NOT NULL DEFAULT FALSE;

View file

@ -781,10 +781,10 @@ TUWF::get qr{/pkg/([^/]+)} => sub {
p => { onerror => 1, uint => 1, range => [1,200] }, p => { onerror => 1, uint => 1, range => [1,200] },
)->data; )->data;
my $where = sql 'system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : (); my $where = sql 'NOT dead AND system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : ();
my $count = tuwf->dbVali('SELECT count(*) FROM', $packages_with_man, 'p WHERE', $where); my $count = tuwf->dbVali('SELECT count(*) FROM', $packages_with_man, 'p WHERE', $where);
my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} }, my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} },
'SELECT id, system, name, category FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name, category' 'SELECT id, system, name, category, dead FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name, category'
); );
my $title = $sys->{name}.($sys->{release}?" $sys->{release}":""); my $title = $sys->{name}.($sys->{release}?" $sys->{release}":"");