From cb81bedac133ebc32b2f028e4c3d3a8b4ef31d44 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 6 Nov 2016 16:05:13 +0100 Subject: [PATCH] Add arch/encoding metadata to DB + Fetch Arch Linux x86_64 The encoding metadata will be very useful in finding badly decoded man pages. The package 'arch' is necessary to properly identify which package was used, which is not obvious now that I'm going to switch more systems to the (more common) x86_64 arch. --- indexer/src/main.rs | 2 ++ indexer/src/pkg.rs | 23 ++++++++++++----------- indexer/src/sys_arch.rs | 10 +++++++--- sql/schema.sql | 4 +++- sql/update-2016-11-06.sql | 2 ++ www/index.pl | 11 ++++++----- 6 files changed, 32 insertions(+), 20 deletions(-) create mode 100644 sql/update-2016-11-06.sql diff --git a/indexer/src/main.rs b/indexer/src/main.rs index 1083559..b46602d 100644 --- a/indexer/src/main.rs +++ b/indexer/src/main.rs @@ -42,6 +42,7 @@ fn main() { (@arg pkg: --pkg +required +takes_value "Package name") (@arg ver: --ver +required +takes_value "Package version") (@arg date: --date +required +takes_value "Package release date") + (@arg arch: --arch +takes_value "Architecture") (@arg FILE: +required "Package file") ) (@subcommand arch => @@ -81,6 +82,7 @@ fn main() { pkg: matches.value_of("pkg").unwrap(), ver: matches.value_of("ver").unwrap(), date: matches.value_of("date").unwrap(), + arch: matches.value_of("arch"), file: open::Path{ path: matches.value_of("FILE").unwrap(), cache: false, canbelocal: true}, }); } diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs index 4d3379d..d8766b3 100644 --- a/indexer/src/pkg.rs +++ b/indexer/src/pkg.rs @@ -14,6 +14,7 @@ pub struct PkgOpt<'a> { pub pkg: &'a str, pub ver: &'a str, pub date: &'a str, // TODO: Option to extract date from package metadata itself + pub arch: Option<&'a str>, pub file: open::Path<'a> } @@ -36,8 +37,8 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option Option {}", src, dest); return; } - let hash: Vec = hash.get(0).get(0); - insert_man_row(tr, verid, src, &hash); + let hash: Vec = res.get(0).get(0); + let enc: String = res.get(0).get(1); + insert_man_row(tr, verid, src, &enc, &hash); debug!("Inserted man link: {} -> {}", src, dest); } diff --git a/indexer/src/sys_arch.rs b/indexer/src/sys_arch.rs index 7a0bf1f..1c3623d 100644 --- a/indexer/src/sys_arch.rs +++ b/indexer/src/sys_arch.rs @@ -15,6 +15,7 @@ struct Meta { name: String, version: String, date: String, + arch: Option, } @@ -43,6 +44,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result> { let mut name = None; let mut version = None; let mut builddate = None; + let mut arch = None; for kv in RE.captures_iter(&data) { let key = kv.at(1).unwrap(); @@ -53,6 +55,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result> { "NAME" => name = Some(val), "VERSION" => version = Some(val), "BUILDDATE" => builddate = i64::from_str(val).ok(), + "ARCH" => arch = Some(val), _ => {}, } } @@ -63,6 +66,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result> { name: name.unwrap().to_string(), version: version.unwrap().to_string(), date: NaiveDateTime::from_timestamp(builddate.unwrap(), 0).format("%Y-%m-%d").to_string(), + arch: arch.map(str::to_string), })) } else { warn!("Metadata missing from package description: {}", path); @@ -71,11 +75,10 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result> { } -// TODO: Switch to x86_64 instead of i686 pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str) { info!("Reading packages from {} {}", mirror, repo); - let path = format!("{}/{}/os/i686/{1:}.files.tar.gz", mirror, repo); + let path = format!("{}/{}/os/x86_64/{1:}.files.tar.gz", mirror, repo); let path = open::Path{ path: &path, cache: true, canbelocal: false }; let mut index = match path.open() { Err(e) => { error!("Can't read package index: {}", e); return }, @@ -103,7 +106,7 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str hasman = false; let m = meta.take().unwrap(); - let p = format!("{}/{}/os/i686/{}", mirror, repo, m.filename); + let p = format!("{}/{}/os/x86_64/{}", mirror, repo, m.filename); pkg::pkg(pg, pkg::PkgOpt{ force: false, sys: sys, @@ -111,6 +114,7 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str pkg: &m.name, ver: &m.version, date: &m.date, + arch: m.arch.as_ref().map(|e| &e[..]), file: open::Path{ path: &p, cache: false, diff --git a/sql/schema.sql b/sql/schema.sql index 4bac1b9..5d48d1a 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -24,16 +24,18 @@ CREATE TABLE package_versions ( package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE, version varchar NOT NULL, released date NOT NULL, + arch varchar, UNIQUE(package, version) ); CREATE TABLE man ( package integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE, name varchar NOT NULL, - section varchar NOT NULL, filename varchar NOT NULL, locale varchar, hash bytea NOT NULL REFERENCES contents(hash), + section varchar NOT NULL, + encoding varchar, UNIQUE(package, filename) ); diff --git a/sql/update-2016-11-06.sql b/sql/update-2016-11-06.sql new file mode 100644 index 0000000..7f4e820 --- /dev/null +++ b/sql/update-2016-11-06.sql @@ -0,0 +1,2 @@ +ALTER TABLE package_versions ADD COLUMN arch varchar; +ALTER TABLE man ADD COLUMN encoding varchar; diff --git a/www/index.pl b/www/index.pl index 37c5c8d..6eec011 100755 --- a/www/index.pl +++ b/www/index.pl @@ -238,7 +238,9 @@ sub about {
Arch Linux
The core, extra and community repositories are fetched from a local - Arch mirror. Indexing started around begin June 2012.
+ Arch mirror. Indexing started around begin June 2012. The i686 + architecture was indexed until November 6th, 2016, packages after that + were fetched from from x86_64.
Debian
Historical releases were fetched from http://archive.debian.org/debian/ @@ -268,13 +270,12 @@ sub about { $release-security repositories are indexed. Backports are not included at the moment. Indexing started around mid June 2012.
- Only packages for a single architecture (i386 or i686) are scanned. To my + Only packages for a single architecture (i386 or amd64) are scanned. To my knowledge, packages that come with different manuals for different architectures either don't exist or are extremely rare. It does happen that some packages are not available for all architectures. Usually, though, - every package is at least available for i386/i686, so hopefully we're not - missing out on much. -

+ every package is at least available for the most popular architecture, so + hopefully we're not missing out on much.

The repositories are scanned for new packages on a daily basis. _ end;