Indexer: Support reading date from .deb archives

This commit is contained in:
Yorhel 2016-11-20 09:01:33 +01:00
parent a1e5a2d80d
commit ecb1a9e25b
8 changed files with 78 additions and 23 deletions

View file

@ -210,6 +210,12 @@ impl<'a> ArchiveEntry<'a> {
}
}
// A proper implementation would call mtime_is_set() and _mtime_nsec() to return an
// Option<SomeHighResolutionTimestamp>. But this'll do for what I need.
pub fn mtime(&self) -> i64 {
unsafe { ffi::archive_entry_mtime(self.e) }
}
fn symlink(&self) -> Option<String> {
let c_str: &CStr = unsafe {
let ptr = ffi::archive_entry_symlink(self.e);
@ -329,6 +335,7 @@ mod tests {
ent = ent.next().unwrap().unwrap();
t(&mut ent, Some("simple/file"), 3, FileType::File, "Hi\n");
assert_eq!(ent.mtime(), 1479627842);
ent = ent.next().unwrap().unwrap();
t(&mut ent, Some("simple/link"), 0, FileType::Link("file".to_string()), "");

View file

@ -76,8 +76,8 @@ impl FileList {
*
* Returns a FileList struct that can be used to retreive all interesting non-regular files.
*/
pub fn read<F,G>(ent: Option<ArchiveEntry>, interest_cb: F, mut file_cb: G) -> Result<FileList>
where F: Fn(&str) -> bool, G: FnMut(&[&str], &mut ArchiveEntry) -> Result<()>
pub fn read<F,G>(ent: Option<ArchiveEntry>, mut interest_cb: F, mut file_cb: G) -> Result<FileList>
where F: FnMut(&ArchiveEntry) -> bool, G: FnMut(&[&str], &mut ArchiveEntry) -> Result<()>
{
let mut fl = FileList {
seen: HashMap::new(),
@ -101,7 +101,7 @@ impl FileList {
let et = match ft {
FileType::File => {
if interest_cb(&path) {
if interest_cb(&e) {
let pathv = [&path as &str];
try!(file_cb(&pathv[..], &mut e));
EntryType::Handled
@ -110,7 +110,7 @@ impl FileList {
}
},
FileType::Link(l) => {
if interest_cb(&path) {
if interest_cb(&e) {
fl.links.push(path.clone());
}
EntryType::Link(l)
@ -260,7 +260,7 @@ mod tests {
let arch = Archive::open_archive(&mut f).unwrap();
let mut cnt = 0;
FileList::read(arch,
|p| p.starts_with("man/man"),
|p| p.path().unwrap().starts_with("man/man"),
|p,e| {
assert_eq!(cnt, 0);
cnt += 1;

View file

@ -88,13 +88,17 @@ fn main() {
debug!("Connected to database");
if let Some(matches) = arg.subcommand_matches("pkg") {
let date = match matches.value_of("date").unwrap() {
"deb" => pkg::Date::Deb,
s => pkg::Date::Known(s),
};
pkg::pkg(&db, pkg::PkgOpt {
force: matches.is_present("force"),
sys: sysbyshort(&db, matches.value_of("sys").unwrap()),
cat: matches.value_of("cat").unwrap(),
pkg: matches.value_of("pkg").unwrap(),
ver: matches.value_of("ver").unwrap(),
date: matches.value_of("date").unwrap(),
date: date,
arch: matches.value_of("arch"),
file: open::Path{ path: matches.value_of("FILE").unwrap(), cache: false, canbelocal: true},
});

View file

@ -1,26 +1,47 @@
use std;
use std::io::{Error,ErrorKind,Read};
use postgres;
use chrono::NaiveDateTime;
use open;
use archread;
use man;
use archive::{Format,Archive,ArchiveEntry};
#[derive(Debug,Clone,Copy)]
pub enum Date<'a> {
Known(&'a str), // Given in PkgOpt
Found(i64), // Found in package
Deb, // Should be read from the timestamp of the 'debian-binary' file
}
impl<'a> Date<'a> {
fn update(&mut self, ent: &ArchiveEntry) {
// TODO: Validate that the mtime() date is sensible (e.g. 1990 < date < now)
*self = match *self {
Date::Deb if ent.format() == Format::Ar && ent.path() == Some("debian-binary") => Date::Found(ent.mtime()),
x => x,
}
}
}
pub struct PkgOpt<'a> {
pub force: bool,
pub sys: i32,
pub cat: &'a str,
pub pkg: &'a str,
pub ver: &'a str,
pub date: &'a str, // TODO: Option to extract date from package metadata itself
pub date: Date<'a>,
pub arch: Option<&'a str>,
pub file: open::Path<'a>
}
fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i32> {
let pkginfo = format!("sys {} / {} / {} - {} @ {} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path);
let pkginfo = format!("sys {} / {} / {} - {} @ {:?} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path);
// The ON CONFLICT .. DO UPDATE is used instead of DO NOTHING because in that case the
// RETURNING clause wouldn't give us a package id.
@ -38,9 +59,15 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i
let res = tr.query(q, &[&pkgid, &opt.ver]).unwrap();
let verid : i32;
let date = match opt.date {
Date::Known(d) => d,
_ => "1980-01-01", // Placeholder
};
if res.is_empty() {
let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id";
verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date, &opt.arch]).unwrap().get(0).get(0);
verid = tr.query(q, &[&pkgid, &opt.ver, &date, &opt.arch]).unwrap().get(0).get(0);
info!("New package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
Some(verid)
@ -106,21 +133,23 @@ fn insert_link(tr: &postgres::GenericConnection, verid: i32, src: &str, dest: &s
}
fn with_pkg<F,T>(f: open::Path, cb: F) -> std::io::Result<T>
where F: FnOnce(Option<ArchiveEntry>) -> std::io::Result<T>
fn with_pkg<F,T>(opt: &mut PkgOpt, cb: F) -> std::io::Result<T>
where F: FnOnce(Option<ArchiveEntry>, &mut PkgOpt) -> std::io::Result<T>
{
let mut rd = f.open()?;
let mut rd = opt.file.open()?;
let ent = match Archive::open_archive(&mut rd)? {
None => return cb(None),
None => return cb(None, opt),
Some(x) => x,
};
// .deb ("2.0")
if ent.format() == Format::Ar && ent.path() == Some("debian-binary") {
opt.date.update(&ent);
let mut ent = ent.next()?;
while let Some(mut e) = ent {
opt.date.update(&e);
if e.path().map(|p| p.starts_with("data.tar")) == Some(true) {
return cb(Archive::open_archive(&mut e)?);
return cb(Archive::open_archive(&mut e)?, opt);
}
ent = e.next()?
}
@ -128,25 +157,39 @@ fn with_pkg<F,T>(f: open::Path, cb: F) -> std::io::Result<T>
// any other archive (Arch/FreeBSD .tar)
} else {
cb(Some(ent))
cb(Some(ent), opt)
}
}
fn index_pkg(tr: &postgres::GenericConnection, opt: &PkgOpt, verid: i32) -> std::io::Result<()> {
fn index_pkg(tr: &postgres::GenericConnection, mut opt: PkgOpt, verid: i32) -> std::io::Result<()> {
let indexfunc = |paths: &[&str], ent: &mut ArchiveEntry| {
insert_man(tr, verid, paths, ent);
Ok(()) /* Don't propagate errors, continue handling other man pages */
};
let missed = with_pkg(opt.file, |e| { archread::FileList::read(e, man::ismanpath, &indexfunc) })?
.links(|src, dest| { insert_link(tr, verid, src, dest) });
let missed = with_pkg(&mut opt, |e, opt| {
archread::FileList::read(e, |ent: &ArchiveEntry| {
opt.date.update(ent);
man::ismanpath(ent.path().unwrap())
}, &indexfunc)
})?.links(|src, dest| { insert_link(tr, verid, src, dest) });
if let Some(missed) = missed {
warn!("Some links were missed, reading package again");
with_pkg(opt.file, |e| { missed.read(e, indexfunc) })?
with_pkg(&mut opt, |e, _| { missed.read(e, indexfunc) })?
}
match opt.date {
Date::Known(_) => Ok(()),
Date::Found(t) => {
let date = NaiveDateTime::from_timestamp(t, 0).format("%Y-%m-%d").to_string();
debug!("Date from package: {}", date);
tr.execute("UPDATE package_versions SET released = $1::text::date WHERE id = $2", &[&date, &verid]).unwrap();
Ok(())
},
_ => Err(Error::new(ErrorKind::Other, "No valid date found in this package")),
}
Ok(())
}
@ -156,7 +199,7 @@ pub fn pkg(conn: &postgres::GenericConnection, opt: PkgOpt) {
let verid = match insert_pkg(&tr, &opt) { Some(x) => x, None => return };
match index_pkg(&tr, &opt, verid) {
match index_pkg(&tr, opt, verid) {
Err(e) => error!("Error reading package: {}", e),
Ok(_) => tr.set_commit()
}

View file

@ -113,7 +113,7 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, repo: &str
cat: repo,
pkg: &m.name,
ver: &m.version,
date: &m.date,
date: pkg::Date::Known(&m.date),
arch: m.arch.as_ref().map(|e| &e[..]),
file: open::Path{
path: &p,

View file

@ -68,7 +68,7 @@ fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs:
cat: &section,
pkg: &name,
ver: &version,
date: "1980-01-01", // TODO: Fetch date from somewhere (package contents itself, likely)
date: pkg::Date::Deb,
arch: Some(arch),
file: open::Path{
path: &uri,

View file

@ -9,6 +9,7 @@
mkdir simple
echo Hi >simple/file
touch -d '2016-11-20 08:44:02+01:00' simple/file
ln -s file simple/link
ln simple/file simple/hardlink
mkfifo simple/fifo

Binary file not shown.