Get rid of package categories

Whether or not the package name itself or the (category,name) tuple
uniquely identified a package within a system has been a source of
confusion for a long time. Back in
03d278e4ff I ended up playing playing it
"safe" by going for (category,name), but in practice this doesn't make a
whole lot of sense. While it's *possible* for the same package name to
refer to completely different packages in different "categories", in
reality distributions can't sanely support this anyway.

For distributions where the category referred to a repository, the only
cases where the same package name was used in different repos was when
the package has moved from one repo to another. Those should certainly
not be treated as different packages.

For distributions where the category really referred to a category,
there's the Debian approach where the category is purely a tag and
doesn't help identify the package in any way, and then there's FreeBSD
where the category technically ought to be part of the name.  There were
a few cases where FreeBSD used categories to separate out different
versions of the same package (e.g. ipv6 vs non-ipv6), but none were
relevant for man pages so I ended up merging those as well.

Getting rid of the categories simplifies and shortens URLs, unclutters
the UI a little bit and merges the packages in listings that should've
been merged all along.

Migration script:

  -- Merge packages that are in multiple categories.
  -- All versions are moved to the package with the lowest ID.
  -- If the same version already exists in a lower ID, the higher-ID version is deleted.
  BEGIN;
  WITH migrate(old, new, second) AS (
    SELECT q.id, MIN(p.id), MAX(p.id)
      FROM packages p
      JOIN packages q ON q.id > p.id AND p.system = q.system AND p.name = q.name
     GROUP BY q.id
  ), ded(n) AS (
    UPDATE packages SET dead = false
      FROM migrate m
      JOIN packages q ON q.id = m.old
     WHERE packages.id = m.new AND packages.dead AND NOT q.dead
    RETURNING 1
  ), mov(n) AS (
    UPDATE package_versions SET package = m.new
      FROM migrate m
     WHERE package_versions.package = m.old
       AND NOT EXISTS(
          SELECT 1
            FROM package_versions v
           WHERE v.package IN(m.new, m.second)
             AND v.version = package_versions.version)
    RETURNING 1
  ), del(n) AS (
    DELETE FROM packages WHERE id IN(SELECT old FROM migrate)
    RETURNING 1
  ) SELECT (SELECT count(*) FROM migrate) AS migrate,
           (SELECT count(*) FROM ded) AS ded,
           (SELECT count(*) FROM mov) AS mov,
           (SELECT count(*) FROM del) AS del;

  ALTER TABLE packages DROP CONSTRAINT packages_system_name_category_key;
  CREATE UNIQUE INDEX packages_system_name_key ON packages (system, name);
  ALTER TABLE packages DROP COLUMN category;
  COMMIT;
This commit is contained in:
Yorhel 2024-04-28 10:37:02 +02:00
parent bc26633fc7
commit 83ab6c3671
16 changed files with 152 additions and 182 deletions

View file

@ -35,7 +35,6 @@ fn main() {
(about: "Index a single package")
(@arg force: --force "Overwrite existing indexed package")
(@arg sys: --sys +required +takes_value "System short-name")
(@arg cat: --cat +required +takes_value "Package category")
(@arg pkg: --pkg +required +takes_value "Package name")
(@arg ver: --ver +required +takes_value "Package version")
(@arg date: --date +required +takes_value "Package release date")
@ -75,13 +74,11 @@ fn main() {
(@subcommand rpmdir =>
(about: "Index a bare RPM directory")
(@arg sys: --sys +required +takes_value "System short-name")
(@arg cat: --cat +required +takes_value "Category to set for all packages")
(@arg mirror: --mirror +required +takes_value "Mirror URL")
)
(@subcommand rpm =>
(about: "Index an RPM repository")
(@arg sys: --sys +required +takes_value "System short-name")
(@arg cat: --cat +required +takes_value "Category to set for all packages")
(@arg mirror: --mirror +required +takes_value "Mirror URL")
)
).get_matches();
@ -125,7 +122,6 @@ fn main() {
pkg::pkg(&mut db, pkg::PkgOpt {
force: matches.is_present("force"),
sys: sys,
cat: matches.value_of("cat").unwrap(),
pkg: matches.value_of("pkg").unwrap(),
ver: matches.value_of("ver").unwrap(),
date: date,
@ -177,7 +173,6 @@ fn main() {
if let Some(matches) = arg.subcommand_matches("rpmdir") {
let sys = sysbyshort(&mut db, matches.value_of("sys").unwrap());
sys_rpmdir::sync(&mut db, sys,
matches.value_of("cat").unwrap(),
matches.value_of("mirror").unwrap()
).unwrap_or_else(|e| error!("{}", e));
}
@ -185,7 +180,6 @@ fn main() {
if let Some(matches) = arg.subcommand_matches("rpm") {
let sys = sysbyshort(&mut db, matches.value_of("sys").unwrap());
sys_rpm::sync(&mut db, sys,
matches.value_of("cat").unwrap(),
matches.value_of("mirror").unwrap()
).unwrap_or_else(|e| error!("{}", e));
}

View file

@ -39,7 +39,6 @@ impl<'a> Date<'a> {
pub struct PkgOpt<'a> {
pub force: bool,
pub sys: i32,
pub cat: &'a str,
pub pkg: &'a str,
pub ver: &'a str,
pub date: Date<'a>,
@ -49,17 +48,17 @@ pub struct PkgOpt<'a> {
fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<i32> {
let pkginfo = format!("sys {} / {} / {} - {} @ {:?} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path);
let pkginfo = format!("sys {} / {} - {} @ {:?} @ {}", opt.sys, opt.pkg, opt.ver, opt.date, opt.file.path);
// Use a custom CTE-based insert-or-update. Using an INSERT with an ON CONFLICT clause would be
// easier, but has the downside of allocating a new package id even if one already exists.
// The separate UPDATE query makes sure to unflag the package as dead while not causing any
// database writes when the row's already fine.
let q = "WITH p(id) AS (SELECT id FROM packages WHERE system = $1 AND category = $2 AND name = $3),
let q = "WITH p(id) AS (SELECT id FROM packages WHERE system = $1 AND name = $2),
u AS (UPDATE packages SET dead = FALSE FROM p WHERE packages.id = p.id AND dead),
i(id) AS (INSERT INTO packages (system, category, name) SELECT $1, $2, $3 WHERE NOT EXISTS(SELECT 1 FROM p) RETURNING id)
i(id) AS (INSERT INTO packages (system, name) SELECT $1, $2 WHERE NOT EXISTS(SELECT 1 FROM p) RETURNING id)
SELECT id FROM p UNION SELECT id FROM i";
let pkgid: i32 = match tr.query_one(q, &[&opt.sys, &opt.cat, &opt.pkg]) {
let pkgid: i32 = match tr.query_one(q, &[&opt.sys, &opt.pkg]) {
Err(e) => {
error!("Can't insert package in database: {}", e);
return None;

View file

@ -50,7 +50,6 @@ pub fn read_index<T: postgres::GenericClient, R: Read>(pg: &mut T, sys: i32, mir
pkg::pkg(pg, pkg::PkgOpt{
force: false,
sys: sys,
cat: repo,
pkg: pname,
ver: pver,
date: builddate.map(pkg::Date::Found).unwrap_or(pkg::Date::Max),

View file

@ -111,7 +111,6 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str, repo
pkg::pkg(pg, pkg::PkgOpt{
force: false,
sys: sys,
cat: repo,
pkg: &m.name,
ver: &m.version,
date: pkg::Date::Known(&m.date),

View file

@ -49,7 +49,6 @@ fn get_contents(f: Option<open::Path>) -> Result<HashSet<String>> {
#[derive(Default)]
struct Pkg {
name: Option<String>,
section: Option<String>,
arch: Option<String>,
version: Option<String>,
filename: Option<String>,
@ -61,7 +60,6 @@ fn handlepkg<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str, man
if manpkgs.len() > 0 && !manpkgs.contains(name) {
return
}
let section = match pkg.section { Some(ref x) => x, None => { error!("Package {} has no section", name); return } };
let version = match pkg.version { Some(ref x) => x, None => { error!("Package {} has no version", name); return } };
let filename = match pkg.filename { Some(ref x) => x, None => { error!("Package {} has no filename", name); return } };
@ -80,7 +78,6 @@ fn handlepkg<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str, man
pkg::pkg(pg, pkg::PkgOpt{
force: false,
sys: sys,
cat: &section,
pkg: &name,
ver: &version,
date: pkg::Date::Deb,
@ -128,7 +125,6 @@ pub fn sync<T: postgres::GenericClient >(pg: &mut T, sys: i32, mirror: &str, con
// Use case-insensitive matching, older package archives used lowercase keys
match str::from_utf8(&cap[1]).unwrap().to_lowercase().as_ref() {
"package" => pkg.name = Some(val.to_string()),
"section" => pkg.section = Some(val.to_string()),
"version" => pkg.version = Some(val.to_string()),
"architecture" => pkg.arch = Some(val.to_string()),
"filename" => pkg.filename = Some(val.to_string()),

View file

@ -71,7 +71,6 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, arch: &str, mirror
pkg::pkg(pg, pkg::PkgOpt{
force: false,
sys: sys,
cat: &cat,
pkg: pkg,
ver: ver,
date: pkg::Date::Desc,

View file

@ -31,7 +31,6 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str) -> R
lazy_static!(
static ref RE_NAME : Regex = Regex::new(r#""name"\s*:\s*"(?u:([^ "]+))""#).unwrap();
static ref RE_VER : Regex = Regex::new(r#""version"\s*:\s*"(?u:([^ "]+))""#).unwrap();
static ref RE_CAT : Regex = Regex::new(r#""origin"\s*:\s*"(?u:([^ "/]+))"#).unwrap();
static ref RE_PATH : Regex = Regex::new(r#""path"\s*:\s*"(?u:([^ "]+))""#).unwrap();
static ref RE_ARCH : Regex = Regex::new(r#""arch"\s*:\s*"(?u:([^ "]+))""#).unwrap();
);
@ -40,14 +39,12 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str) -> R
let line = line?;
let name = match RE_NAME.captures(&line) { None => continue, Some(c) => str::from_utf8(c.get(1).unwrap().as_bytes()).unwrap() };
let ver = match RE_VER .captures(&line) { None => continue, Some(c) => str::from_utf8(c.get(1).unwrap().as_bytes()).unwrap() };
let cat = match RE_CAT .captures(&line) { None => continue, Some(c) => str::from_utf8(c.get(1).unwrap().as_bytes()).unwrap() };
let path = match RE_PATH.captures(&line) { None => continue, Some(c) => str::from_utf8(c.get(1).unwrap().as_bytes()).unwrap() };
let arch = match RE_ARCH.captures(&line) { None => continue, Some(c) => str::from_utf8(c.get(1).unwrap().as_bytes()).unwrap() };
let uri = format!("{}{}", mirror, path);
pkg::pkg(pg, pkg::PkgOpt{
force: false,
sys: sys,
cat: cat,
pkg: name,
ver: ver,
date: pkg::Date::Max,

View file

@ -156,7 +156,7 @@ fn repomd(url: String) -> Result<(String,String),Box<dyn Error>> {
}
pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, cat: &str, mirror: &str) -> Result<(),Box<dyn Error>> {
pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str) -> Result<(),Box<dyn Error>> {
let(primary, filelists) = repomd(format!("{}repodata/repomd.xml", mirror))?;
let mut pkgswithman = HashSet::new();
@ -172,7 +172,6 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, cat: &str, mirror:
pkg::pkg(pg, pkg::PkgOpt{
force: false,
sys: sys,
cat: cat,
pkg: &name,
ver: &pkg.ver.unwrap(),
date: pkg::Date::Known(&date),

View file

@ -5,7 +5,7 @@ use postgres;
use crate::open;
use crate::pkg;
pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, cat: &str, mirror: &str) -> Result<()> {
pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, mirror: &str) -> Result<()> {
let pkgs : Vec<String> = open::Path{path: mirror, cache: true, canbelocal: false}
.dirlist()?.into_iter()
.filter_map(|(n,d)| if d { None } else { Some(n) })
@ -29,7 +29,6 @@ pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, cat: &str, mirror:
pkg::pkg(pg, pkg::PkgOpt{
force: false,
sys: sys,
cat: cat,
pkg: name,
ver: ver,
date: pkg::Date::Max,