From 50fe17a6049c40d47835da3ec2db047d8132b2ef Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 15 Nov 2016 21:11:18 +0100 Subject: [PATCH] Indexer: Support .deb archives --- indexer/src/archive.rs | 22 ++++++++++++++++++++++ indexer/src/open.rs | 1 + indexer/src/pkg.rs | 39 +++++++++++++++++++++++++++++++-------- 3 files changed, 54 insertions(+), 8 deletions(-) diff --git a/indexer/src/archive.rs b/indexer/src/archive.rs index c536dcb..a81755a 100644 --- a/indexer/src/archive.rs +++ b/indexer/src/archive.rs @@ -46,6 +46,14 @@ pub enum FileType { Other, // Also includes Link() } +// Top-level formats, as in ARCHIVE_FORMAT_* +#[derive(Debug,PartialEq,Eq)] +pub enum Format { + Tar, + Ar, + Other, // Ultra lazyness +} + unsafe extern "C" fn archive_read_cb(_: *mut ffi::Struct_archive, data: *mut c_void, buf: *mut *const c_void) -> ssize_t { let arch: &mut Archive = &mut *(data as *mut Archive); @@ -189,6 +197,19 @@ impl<'a> ArchiveEntry<'a> { unsafe { ffi::archive_entry_size(self.e) as usize } } + pub fn format(&self) -> Format { + // Interestingly, archive_format() is a property of the entry itself, not of the top-level + // archive. Hence it requires archive_read_next_header() and hence it's better placed as + // part of this ArchiveEntry object rather than the Archive object. + // ...that said, the top-level format isn't likely to change, it's the lower 16 bits that + // might be different. + match unsafe { ffi::archive_format(self.a.a) } >> 16 { + 0x3 => Format::Tar, + 0x7 => Format::Ar, + _ => Format::Other, + } + } + fn symlink(&self) -> Option { let c_str: &CStr = unsafe { let ptr = ffi::archive_entry_symlink(self.e); @@ -295,6 +316,7 @@ mod tests { let mut ent = Archive::open_archive(&mut f).unwrap().unwrap(); let t = |e:&mut ArchiveEntry, path, size, ft, cont| { + assert_eq!(e.format(), Format::Tar); assert_eq!(e.path(), path); assert_eq!(e.size(), size); assert_eq!(e.filetype(), ft); diff --git a/indexer/src/open.rs b/indexer/src/open.rs index ef7e8b2..6bbe82d 100644 --- a/indexer/src/open.rs +++ b/indexer/src/open.rs @@ -10,6 +10,7 @@ const CACHE_PATH: &'static str = "/var/tmp/manned-indexer"; const CACHE_TIME: u64 = 23*3600; +#[derive(Clone,Copy)] pub struct Path<'a> { pub path: &'a str, pub cache: bool, diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs index 9d59898..11cfc9f 100644 --- a/indexer/src/pkg.rs +++ b/indexer/src/pkg.rs @@ -1,11 +1,11 @@ use std; -use std::io::Read; +use std::io::{Error,ErrorKind,Read}; use postgres; use open; use archread; use man; -use archive::{Archive,ArchiveEntry}; +use archive::{Format,Archive,ArchiveEntry}; pub struct PkgOpt<'a> { pub force: bool, @@ -104,22 +104,45 @@ fn insert_link(tr: &postgres::GenericConnection, verid: i32, src: &str, dest: &s } +fn with_pkg(f: open::Path, cb: F) -> std::io::Result + where F: FnOnce(Option) -> std::io::Result +{ + let mut rd = f.open()?; + let ent = match Archive::open_archive(&mut rd)? { + None => return cb(None), + Some(x) => x, + }; + + // .deb ("2.0") + if ent.format() == Format::Ar && ent.path() == Some("debian-binary") { + let mut ent = ent.next()?; + while let Some(mut e) = ent { + if e.path().map(|p| p.starts_with("data.tar")) == Some(true) { + return cb(Archive::open_archive(&mut e)?); + } + ent = e.next()? + } + Err(Error::new(ErrorKind::Other, "Debian file without data.tar")) + + // any other archive (Arch/FreeBSD .tar) + } else { + cb(Some(ent)) + } +} + + fn index_pkg(tr: &postgres::GenericConnection, opt: &PkgOpt, verid: i32) -> std::io::Result<()> { let indexfunc = |paths: &[&str], ent: &mut ArchiveEntry| { insert_man(tr, verid, paths, ent); Ok(()) /* Don't propagate errors, continue handling other man pages */ }; - let mut rd = try!(opt.file.open()); - let missed = try!(archread::FileList::read( - try!(Archive::open_archive(&mut rd)), - man::ismanpath, &indexfunc)) + let missed = with_pkg(opt.file, |e| { archread::FileList::read(e, man::ismanpath, &indexfunc) })? .links(|src, dest| { insert_link(tr, verid, src, dest) }); if let Some(missed) = missed { warn!("Some links were missed, reading package again"); - let mut rd = try!(opt.file.open()); - try!(missed.read(try!(Archive::open_archive(&mut rd)), indexfunc)); + with_pkg(opt.file, |e| { missed.read(e, indexfunc) })? } Ok(()) }