Arch: Mark deleted packages as dead and hide them from listings
We've got a lot of packages in the DB that have long been removed from the Arch repos. These are still indexed, but won't clutter the package listing anymore. Also fixed an issue with packages.id numbers getting rather large because the indexer allocates a new ID for every package on every update.
This commit is contained in:
parent
f3323de5e4
commit
b27d55215a
5 changed files with 50 additions and 7 deletions
|
|
@ -51,10 +51,14 @@ pub struct PkgOpt<'a> {
|
|||
fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
|
||||
let pkginfo = format!("sys {} / {} / {} - {} @ {:?} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path);
|
||||
|
||||
// The ON CONFLICT .. DO UPDATE is used instead of DO NOTHING because in that case the
|
||||
// RETURNING clause wouldn't give us a package id.
|
||||
let q = "INSERT INTO packages (system, category, name) VALUES($1, $2, $3)
|
||||
ON CONFLICT ON CONSTRAINT packages_system_name_category_key DO UPDATE SET name=$3 RETURNING id";
|
||||
// Use a custom CTE-based insert-or-update. Using an INSERT with an ON CONFLICT clause would be
|
||||
// easier, but has the downside of allocating a new package id even if one already exists.
|
||||
// The separate UPDATE query makes sure to unflag the package as dead while not causing any
|
||||
// database writes when the row's already fine.
|
||||
let q = "WITH p(id) AS (SELECT id FROM packages WHERE system = $1 AND category = $2 AND name = $3),
|
||||
u AS (UPDATE packages SET dead = FALSE FROM p WHERE packages.id = p.id AND dead),
|
||||
i(id) AS (INSERT INTO packages (system, category, name) SELECT $1, $2, $3 WHERE NOT EXISTS(SELECT 1 FROM p) RETURNING id)
|
||||
SELECT id FROM p UNION SELECT id FROM i";
|
||||
let pkgid: i32 = match tr.query_one(q, &[&opt.sys, &opt.cat, &opt.pkg]) {
|
||||
Err(e) => {
|
||||
error!("Can't insert package in database: {}", e);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use std::str::FromStr;
|
||||
use std::io::{Read,BufRead,BufReader,Result};
|
||||
use std::collections::HashSet;
|
||||
use regex::Regex;
|
||||
use chrono::NaiveDateTime;
|
||||
use postgres;
|
||||
|
|
@ -91,6 +92,7 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str, repo
|
|||
|
||||
let mut hasman = false;
|
||||
let mut meta = None;
|
||||
let mut allpkgs = HashSet::new();
|
||||
let r = archive::walk(ent, |x| {
|
||||
if x.filetype() == archive::FileType::Directory {
|
||||
hasman = false;
|
||||
|
|
@ -120,6 +122,7 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str, repo
|
|||
canbelocal: false,
|
||||
},
|
||||
});
|
||||
allpkgs.insert(m.name.into_boxed_str());
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
|
|
@ -128,4 +131,30 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str, repo
|
|||
if let Err(e) = r {
|
||||
error!("Error reading package index: {}", e);
|
||||
}
|
||||
mark_dead(pg, sys, repo, allpkgs);
|
||||
}
|
||||
|
||||
fn mark_dead<T: postgres::GenericClient>(pg: &mut T, sys: i32, repo: &str, pkgs: HashSet<Box<str>>) {
|
||||
let mut dead = Vec::new();
|
||||
for row in pg.query("SELECT id, name FROM packages WHERE system = $1 AND category = $2 AND NOT dead", &[&sys,&repo]).unwrap() {
|
||||
let id: i32 = row.get(0);
|
||||
let name: &str = row.get(1);
|
||||
if !pkgs.contains(name) {
|
||||
info!("Package not available in database anymore, marking dead; sys {} / {} / pkg {} ({})", sys, repo, id, name);
|
||||
dead.push(id);
|
||||
}
|
||||
}
|
||||
if dead.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut tr = pg.transaction().unwrap();
|
||||
let q = tr.prepare("UPDATE packages SET dead = TRUE WHERE id = $1").unwrap();
|
||||
for id in dead {
|
||||
tr.execute(&q, &[&id]).unwrap();
|
||||
}
|
||||
|
||||
if let Err(e) = tr.commit() {
|
||||
error!("Error finishing transaction: {}", e);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,8 +19,15 @@ CREATE TABLE contents (
|
|||
CREATE TABLE packages (
|
||||
id SERIAL PRIMARY KEY,
|
||||
system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE,
|
||||
category varchar,
|
||||
category varchar NOT NULL,
|
||||
name varchar NOT NULL,
|
||||
-- Whether this package has been seen in the last repository update. This
|
||||
-- field is only updated for a few systems that are likely to delete packages
|
||||
-- over time; non-rolling-release distros tend to not delete packages after
|
||||
-- all.
|
||||
-- Packages where the latest version does not have any man pages may also be
|
||||
-- marked as dead even if the package is still available in the repos.
|
||||
dead boolean NOT NULL DEFAULT FALSE,
|
||||
UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common
|
||||
);
|
||||
|
||||
|
|
|
|||
3
sql/update-2021-12-13b.sql
Normal file
3
sql/update-2021-12-13b.sql
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
ALTER TABLE packages
|
||||
ALTER COLUMN category SET NOT NULL,
|
||||
ADD COLUMN dead boolean NOT NULL DEFAULT FALSE;
|
||||
|
|
@ -781,10 +781,10 @@ TUWF::get qr{/pkg/([^/]+)} => sub {
|
|||
p => { onerror => 1, uint => 1, range => [1,200] },
|
||||
)->data;
|
||||
|
||||
my $where = sql 'system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : ();
|
||||
my $where = sql 'NOT dead AND system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : ();
|
||||
my $count = tuwf->dbVali('SELECT count(*) FROM', $packages_with_man, 'p WHERE', $where);
|
||||
my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} },
|
||||
'SELECT id, system, name, category FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name, category'
|
||||
'SELECT id, system, name, category, dead FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name, category'
|
||||
);
|
||||
|
||||
my $title = $sys->{name}.($sys->{release}?" $sys->{release}":"");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue