Whether or not the package name itself or the (category,name) tuple
uniquely identified a package within a system has been a source of
confusion for a long time. Back in
03d278e4ff I ended up playing playing it
"safe" by going for (category,name), but in practice this doesn't make a
whole lot of sense. While it's *possible* for the same package name to
refer to completely different packages in different "categories", in
reality distributions can't sanely support this anyway.
For distributions where the category referred to a repository, the only
cases where the same package name was used in different repos was when
the package has moved from one repo to another. Those should certainly
not be treated as different packages.
For distributions where the category really referred to a category,
there's the Debian approach where the category is purely a tag and
doesn't help identify the package in any way, and then there's FreeBSD
where the category technically ought to be part of the name. There were
a few cases where FreeBSD used categories to separate out different
versions of the same package (e.g. ipv6 vs non-ipv6), but none were
relevant for man pages so I ended up merging those as well.
Getting rid of the categories simplifies and shortens URLs, unclutters
the UI a little bit and merges the packages in listings that should've
been merged all along.
Migration script:
-- Merge packages that are in multiple categories.
-- All versions are moved to the package with the lowest ID.
-- If the same version already exists in a lower ID, the higher-ID version is deleted.
BEGIN;
WITH migrate(old, new, second) AS (
SELECT q.id, MIN(p.id), MAX(p.id)
FROM packages p
JOIN packages q ON q.id > p.id AND p.system = q.system AND p.name = q.name
GROUP BY q.id
), ded(n) AS (
UPDATE packages SET dead = false
FROM migrate m
JOIN packages q ON q.id = m.old
WHERE packages.id = m.new AND packages.dead AND NOT q.dead
RETURNING 1
), mov(n) AS (
UPDATE package_versions SET package = m.new
FROM migrate m
WHERE package_versions.package = m.old
AND NOT EXISTS(
SELECT 1
FROM package_versions v
WHERE v.package IN(m.new, m.second)
AND v.version = package_versions.version)
RETURNING 1
), del(n) AS (
DELETE FROM packages WHERE id IN(SELECT old FROM migrate)
RETURNING 1
) SELECT (SELECT count(*) FROM migrate) AS migrate,
(SELECT count(*) FROM ded) AS ded,
(SELECT count(*) FROM mov) AS mov,
(SELECT count(*) FROM del) AS del;
ALTER TABLE packages DROP CONSTRAINT packages_system_name_category_key;
CREATE UNIQUE INDEX packages_system_name_key ON packages (system, name);
ALTER TABLE packages DROP COLUMN category;
COMMIT;
184 lines
5.9 KiB
Rust
184 lines
5.9 KiB
Rust
use std::collections::HashSet;
|
|
use std::io::Result;
|
|
use regex::Regex;
|
|
use postgres;
|
|
|
|
use crate::open;
|
|
use crate::pkg;
|
|
|
|
|
|
// Sync a FreeBSD <= 9.2 package respository.
|
|
//
|
|
// Reads "." to get a list of categories, "Latest" to get a list of all packages, and all category
|
|
// directories to figure out which package belongs in which category.
|
|
//
|
|
// Splitting a package filename into a package name and version is a hard problem. There are two
|
|
// strategies:
|
|
// 1. Use the listing from 'Latest' to get the list of package names, and use that to find the
|
|
// longest matching substring in the package filename to split off the version.
|
|
// 2. Guessing, like splitver() below.
|
|
//
|
|
// Both strategies lead to errors. (1) doesn't always work because the 'Latest' directory tends to
|
|
// miss a few packages. (2) doesn't always work because version strings are too damn irregular.
|
|
// This function tries (1) first, then falls back to (2) if it couldn't find a matching package.
|
|
// This combined solution also isn't perfect, as sometimes a package prefix does exist, but is
|
|
// incomplete. E.g. 'pear-PHPUnit-1.3.3.tbz' is parsed as 'pear version PHPUnit-1.3.3' rather than
|
|
// 'pear-PHPUnit version 1.3.3', because there is a 'pear' package in 'Latest' but no
|
|
// 'pear-PHPUnit'. This is handled with a static list of package names to add to the 'pkgs' list,
|
|
// see EXTRA_PKGS below.
|
|
pub fn sync<T: postgres::GenericClient>(pg: &mut T, sys: i32, arch: &str, mirror: &str) -> Result<()> {
|
|
let path = format!("{}Latest/", mirror);
|
|
let mut pkgs : Vec<String> = open::Path{path: &path, cache: true, canbelocal: false}
|
|
.dirlist()?.into_iter()
|
|
.map(|(n,_)| trimext(&n).to_string())
|
|
.collect();
|
|
|
|
pkgs.extend(EXTRA_PKGS.into_iter().map(|e| e.to_string()));
|
|
pkgs.sort_by(|a, b| b.len().cmp(&a.len())); // Longest first
|
|
|
|
// List of packages (name+version) we've already seen; Some packages are present in multiple
|
|
// categories, we only index the first found.
|
|
let mut seenpkgs = HashSet::new();
|
|
|
|
let cats = open::Path{path: mirror, cache: true, canbelocal: false}
|
|
.dirlist()?.into_iter()
|
|
.filter(|&(ref n,i)| i && n != "All" && n != "Latest")
|
|
.map(|(n,_)| n);
|
|
|
|
for cat in cats {
|
|
trace!("Category: {}", cat);
|
|
let path = format!("{}{}/", mirror, cat);
|
|
let lst = open::Path{path: &path, cache: true, canbelocal: false}.dirlist()?.into_iter().map(|(n,_)| n);
|
|
for f in lst {
|
|
let name = trimext(&f);
|
|
if !name.is_ascii() {
|
|
warn!("Non-ASCII package name: {}", f);
|
|
continue;
|
|
}
|
|
|
|
// The take() mystifies me; why is it necessary?
|
|
let pkg = pkgs.iter()
|
|
.find(|p| name.len() > p.len()+1 && name.starts_with(&p as &str) && &name[p.len() .. p.len()+1] == "-")
|
|
.take().map(|p| (p as &str, &name[p.len()+1 .. ]))
|
|
.or_else(|| splitver(name));
|
|
|
|
if let Some((pkg, ver)) = pkg {
|
|
if !seenpkgs.insert((pkg.to_string(), ver.to_string())) {
|
|
continue;
|
|
}
|
|
|
|
let path = format!("{}{}/{}", mirror, cat, f);
|
|
pkg::pkg(pg, pkg::PkgOpt{
|
|
force: false,
|
|
sys: sys,
|
|
pkg: pkg,
|
|
ver: ver,
|
|
date: pkg::Date::Desc,
|
|
arch: Some(arch),
|
|
file: open::Path{
|
|
path: &path,
|
|
cache: false,
|
|
canbelocal: false,
|
|
},
|
|
});
|
|
} else {
|
|
warn!("Unknown package: {}/{}", cat, f);
|
|
}
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
|
|
fn trimext(n: &str) -> &str {
|
|
n.trim_end_matches(".tgz").trim_end_matches(".tbz")
|
|
}
|
|
|
|
|
|
fn splitver(n: &str) -> Option<(&str, &str)> {
|
|
lazy_static!(
|
|
static ref RE1: Regex = Regex::new("^(.+?)-([0-9].*)$").unwrap();
|
|
static ref RE2: Regex = Regex::new("^(.+)-([^-]+)$").unwrap();
|
|
);
|
|
if let Some(cap) = RE1.captures(n) {
|
|
Some((cap.get(1).unwrap().as_str(), cap.get(2).unwrap().as_str()))
|
|
} else if let Some(cap) = RE2.captures(n) {
|
|
Some((cap.get(1).unwrap().as_str(), cap.get(2).unwrap().as_str()))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
|
|
// This list may not be complete, and these packages may not necessarily have man pages.
|
|
const EXTRA_PKGS : &'static [&'static str] = &[
|
|
"amanda-client",
|
|
"amanda-server",
|
|
"apache-event",
|
|
"apache-itk",
|
|
"apache-peruser",
|
|
"apache-tomcat",
|
|
"apache-worker",
|
|
"bison-devel",
|
|
"boxbackup-devel",
|
|
"boxbackup-devel",
|
|
"ffmpeg-devel",
|
|
"flex-sdk",
|
|
"fpc-gdb",
|
|
"freeradius-mysql",
|
|
"gdb-insight",
|
|
"glib-reference",
|
|
"gmime-24",
|
|
"gmime-24-sharp",
|
|
"gtk-reference",
|
|
"gtk-sharp",
|
|
"gtkmm-reference",
|
|
"horde-content",
|
|
"horde-groupware",
|
|
"horde-timeobjects",
|
|
"horde-webmail",
|
|
"hping-devel",
|
|
"ja-jvim-direct_canna",
|
|
"ja-mutt-devel",
|
|
"kdelibs-experimental",
|
|
"kdepim-runtime",
|
|
"lame-devel",
|
|
"libdivxdecore-devel",
|
|
"libquicktime-lame",
|
|
"libtorrent-rasterbar",
|
|
"linux-netscape-communicator",
|
|
"mkisofs-devel",
|
|
"mldonkey-core-devel",
|
|
"mldonkey-gui-devel",
|
|
"mod_log_sql-dtc",
|
|
"nethack-qt",
|
|
"nfdump-devel",
|
|
"openssl-beta",
|
|
"pear-PHPUnit",
|
|
"pear-XML_Query2XML",
|
|
"pear-phpunit-PHPUnit",
|
|
"pgadmin3-unicode",
|
|
"proftpd-mod_ldap",
|
|
"proftpd-mod_sql_mysql",
|
|
"proftpd-mod_sql_odbc",
|
|
"proftpd-mod_sql_postgres",
|
|
"proftpd-mod_sql_sqlite",
|
|
"proftpd-mod_sql_tds",
|
|
"qt-static",
|
|
"rsyslog-gnutls",
|
|
"rsyslog-gssapi",
|
|
"rsyslog-libdbi",
|
|
"rsyslog-mysql",
|
|
"rsyslog-pgsql",
|
|
"rsyslog-relp",
|
|
"rsyslog-rfc3195",
|
|
"rsyslog-snmp",
|
|
"samba-libsmbclient",
|
|
"samba-nmblookup",
|
|
"squirrelmail-shared_calendars-plugin",
|
|
"tcl-thread",
|
|
"wxgtk2-common-devel",
|
|
"wxgtk2-contrib-common-devel",
|
|
"wxgtk2-utils-devel",
|
|
];
|
|
|