Add arch/encoding metadata to DB + Fetch Arch Linux x86_64

The encoding metadata will be very useful in finding badly decoded man
pages. The package 'arch' is necessary to properly identify which
package was used, which is not obvious now that I'm going to switch more
systems to the (more common) x86_64 arch.
This commit is contained in:
Yorhel 2016-11-06 16:05:13 +01:00
parent b8a1945d38
commit cb81bedac1
6 changed files with 32 additions and 20 deletions

View file

@ -42,6 +42,7 @@ fn main() {
(@arg pkg: --pkg +required +takes_value "Package name")
(@arg ver: --ver +required +takes_value "Package version")
(@arg date: --date +required +takes_value "Package release date")
(@arg arch: --arch +takes_value "Architecture")
(@arg FILE: +required "Package file")
)
(@subcommand arch =>
@ -81,6 +82,7 @@ fn main() {
pkg: matches.value_of("pkg").unwrap(),
ver: matches.value_of("ver").unwrap(),
date: matches.value_of("date").unwrap(),
arch: matches.value_of("arch"),
file: open::Path{ path: matches.value_of("FILE").unwrap(), cache: false, canbelocal: true},
});
}

View file

@ -14,6 +14,7 @@ pub struct PkgOpt<'a> {
pub pkg: &'a str,
pub ver: &'a str,
pub date: &'a str, // TODO: Option to extract date from package metadata itself
pub arch: Option<&'a str>,
pub file: open::Path<'a>
}
@ -36,8 +37,8 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i
let verid : i32;
if res.is_empty() {
let q = "INSERT INTO package_versions (package, version, released) VALUES($1, $2, $3::text::date) RETURNING id";
verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date]).unwrap().get(0).get(0);
let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id";
verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date, &opt.arch]).unwrap().get(0).get(0);
info!("New package pkgid {} verid {}", pkgid, verid);
Some(verid)
@ -54,12 +55,11 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i
}
fn insert_man_row(tr: &postgres::GenericConnection, verid: i32, path: &str, hash: &[u8]) {
// TODO: Store 'encoding' in the database
fn insert_man_row(tr: &postgres::GenericConnection, verid: i32, path: &str, enc: &str, hash: &[u8]) {
let (name, sect, locale) = man::parse_path(path).unwrap();
if let Err(e) = tr.execute(
"INSERT INTO man (package, name, filename, locale, hash, section) VALUES ($1, $2, '/'||$3, $4, $5, $6)",
&[&verid, &name, &path, &locale, &hash, &sect]
"INSERT INTO man (package, name, filename, locale, hash, section, encoding) VALUES ($1, $2, '/'||$3, $4, $5, $6, $7)",
&[&verid, &name, &path, &locale, &hash, &sect, &enc]
) {
// I think this can only happen if archread gives us the same file twice, which really
// shouldn't happen. But I'd rather continue with an error logged than panic.
@ -84,20 +84,21 @@ fn insert_man(tr: &postgres::GenericConnection, verid: i32, paths: &[&str], ent:
).unwrap();
for path in paths {
insert_man_row(tr, verid, path, dig.as_ref());
insert_man_row(tr, verid, path, enc, dig.as_ref());
debug!("Inserted man page: {} ({})", path, enc);
}
}
fn insert_link(tr: &postgres::GenericConnection, verid: i32, src: &str, dest: &str) {
let hash = tr.query("SELECT hash FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap();
if hash.is_empty() { /* Can happen if man::decode() failed previously. */
let res = tr.query("SELECT hash, encoding FROM man WHERE package = $1 AND filename = '/'||$2", &[&verid, &dest]).unwrap();
if res.is_empty() { /* Can happen if man::decode() failed previously. */
error!("Link to unindexed man page: {} -> {}", src, dest);
return;
}
let hash: Vec<u8> = hash.get(0).get(0);
insert_man_row(tr, verid, src, &hash);
let hash: Vec<u8> = res.get(0).get(0);
let enc: String = res.get(0).get(1);
insert_man_row(tr, verid, src, &enc, &hash);
debug!("Inserted man link: {} -> {}", src, dest);
}

View file

@ -15,6 +15,7 @@ struct Meta {
name: String,
version: String,
date: String,
arch: Option<String>,
}
@ -43,6 +44,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> {
let mut name = None;
let mut version = None;
let mut builddate = None;
let mut arch = None;
for kv in RE.captures_iter(&data) {
let key = kv.at(1).unwrap();
@ -53,6 +55,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> {
"NAME" => name = Some(val),
"VERSION" => version = Some(val),
"BUILDDATE" => builddate = i64::from_str(val).ok(),
"ARCH" => arch = Some(val),
_ => {},
}
}
@ -63,6 +66,7 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> {
name: name.unwrap().to_string(),
version: version.unwrap().to_string(),
date: NaiveDateTime::from_timestamp(builddate.unwrap(), 0).format("%Y-%m-%d").to_string(),
arch: arch.map(str::to_string),
}))
} else {
warn!("Metadata missing from package description: {}", path);
@ -71,11 +75,10 @@ fn read_desc(rd: &mut archive::ArchiveEntry) -> Result<Option<Meta>> {
}
// TODO: Switch to x86_64 instead of i686
pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str) {
info!("Reading packages from {} {}", mirror, repo);
let path = format!("{}/{}/os/i686/{1:}.files.tar.gz", mirror, repo);
let path = format!("{}/{}/os/x86_64/{1:}.files.tar.gz", mirror, repo);
let path = open::Path{ path: &path, cache: true, canbelocal: false };
let mut index = match path.open() {
Err(e) => { error!("Can't read package index: {}", e); return },
@ -103,7 +106,7 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str
hasman = false;
let m = meta.take().unwrap();
let p = format!("{}/{}/os/i686/{}", mirror, repo, m.filename);
let p = format!("{}/{}/os/x86_64/{}", mirror, repo, m.filename);
pkg::pkg(pg, pkg::PkgOpt{
force: false,
sys: sys,
@ -111,6 +114,7 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str
pkg: &m.name,
ver: &m.version,
date: &m.date,
arch: m.arch.as_ref().map(|e| &e[..]),
file: open::Path{
path: &p,
cache: false,