Indexer: Add --dryrun and workarounds for old deb repos

This commit is contained in:
Yorhel 2016-11-20 11:26:00 +01:00
parent ecb1a9e25b
commit 5d44d0e2ec
4 changed files with 41 additions and 16 deletions

View file

@ -35,6 +35,7 @@ fn main() {
let arg = clap_app!(indexer =>
(about: "Manned.org man page indexer")
(@arg v: -v +multiple "Increase verbosity")
(@arg dry: --dryrun "Don't actually download and index packages")
(@subcommand pkg =>
(about: "Index a single package")
(@arg force: --force "Overwrite existing indexed package")
@ -56,11 +57,13 @@ fn main() {
(about: "Index a Debian repository")
(@arg sys: --sys +required +takes_value "System short-name")
(@arg mirror: --mirror +required +takes_value "Mirror URL")
(@arg contents: --contents +required +takes_value "Contents file")
(@arg contents: --contents +takes_value "Contents file")
(@arg packages: --packages +required +takes_value "Packages file")
)
).get_matches();
unsafe { pkg::DRY_RUN = arg.is_present("dry") };
let verbose = arg.occurrences_of("v");
env_logger::LogBuilder::new()
.filter(Some("indexer"), match verbose {
@ -85,7 +88,7 @@ fn main() {
Ok(x) => x,
Err(x) => { error!("Can't connect to postgres: {}", x); return },
};
debug!("Connected to database");
trace!("Connected to database");
if let Some(matches) = arg.subcommand_matches("pkg") {
let date = match matches.value_of("date").unwrap() {
@ -116,8 +119,10 @@ fn main() {
sys_deb::sync(&db,
sysbyshort(&db, matches.value_of("sys").unwrap()),
matches.value_of("mirror").unwrap(),
open::Path{ path: matches.value_of("contents").unwrap(), cache: true, canbelocal: true},
matches.value_of("contents").map(|e| { open::Path{ path: e, cache: true, canbelocal: true} }),
open::Path{ path: matches.value_of("packages").unwrap(), cache: true, canbelocal: true},
);
}
trace!("Exiting");
}

View file

@ -204,6 +204,7 @@ pub fn decode(paths: &[&str], ent: &mut Read) -> io::Result<(digest::Digest,&'st
let dig = digest::digest(&digest::SHA1, &data);
// TODO: Handle BOM? UTF-16?
// TODO: This fails badly for ISO-2022-JP. How the hell do we cleanly fix that?
// If it passes as UTF-8, then just consider it UTF-8.
if let Ok(_) = str::from_utf8(&data) {
return Ok((dig, "utf8", unsafe { String::from_utf8_unchecked(data) } ));

View file

@ -8,6 +8,8 @@ use archread;
use man;
use archive::{Format,Archive,ArchiveEntry};
pub static mut DRY_RUN: bool = false;
#[derive(Debug,Clone,Copy)]
pub enum Date<'a> {
@ -72,6 +74,7 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i
Some(verid)
} else if opt.force {
// XXX: Should we update released & arch here?
verid = res.get(0).get(0);
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
@ -198,6 +201,9 @@ pub fn pkg(conn: &postgres::GenericConnection, opt: PkgOpt) {
tr.set_rollback();
let verid = match insert_pkg(&tr, &opt) { Some(x) => x, None => return };
if unsafe { DRY_RUN } {
return;
}
match index_pkg(&tr, opt, verid) {
Err(e) => error!("Error reading package: {}", e),

View file

@ -2,6 +2,7 @@ use std::io::{Result,BufReader,BufRead};
use std::collections::HashSet;
use std::str;
use postgres;
use regex;
use regex::bytes::Regex;
use man;
@ -11,7 +12,8 @@ use archive;
// Reference: https://wiki.debian.org/RepositoryFormat
fn get_contents(f: open::Path) -> Result<HashSet<String>> {
fn get_contents(f: Option<open::Path>) -> Result<HashSet<String>> {
let f = match f { Some(f) => f, None => return Ok(HashSet::new()) };
let mut fd = f.open()?;
let rd = archive::Archive::open_raw(&mut fd)?;
let brd = BufReader::new(rd);
@ -53,14 +55,24 @@ struct Pkg {
fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs: &HashSet<String>, pkg: &Pkg) {
let name = match pkg.name { Some(ref x) => x, None => return };
if !manpkgs.contains(name) {
if manpkgs.len() > 0 && !manpkgs.contains(name) {
return
}
let section = match pkg.section { Some(ref x) => x, None => { error!("Package {} has no section", name); return } };
let arch = match pkg.arch { Some(ref x) => x, None => { error!("Package {} has no arch", name); return } };
let version = match pkg.version { Some(ref x) => x, None => { error!("Package {} has no version", name); return } };
let filename = match pkg.filename { Some(ref x) => x, None => { error!("Package {} has no filename", name); return } };
let uri = format!("{}{}", mirror, filename);
// Workarounds for some bad repos
let uri = if sys == 18 || sys == 19 {
let filename = regex::Regex::new("^(Debian-1.[12])/").unwrap().replace(filename, "dists/$1/main/");
if filename.starts_with("contrib/") {
format!("{}dists/Debian-1.{}/{}", mirror, if sys == 18 { 1 } else { 2 }, filename)
} else {
format!("{}{}", mirror, filename)
}
} else {
format!("{}{}", mirror, filename)
};
pkg::pkg(pg, pkg::PkgOpt{
force: false,
@ -69,7 +81,7 @@ fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs:
pkg: &name,
ver: &version,
date: pkg::Date::Deb,
arch: Some(arch),
arch: pkg.arch.as_ref().map(|e| &e[..]),
file: open::Path{
path: &uri,
cache: false,
@ -79,9 +91,9 @@ fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs:
}
pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents: open::Path, packages: open::Path) {
pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents: Option<open::Path>, packages: open::Path) {
let manpkgs = match get_contents(contents) {
Err(e) => { error!("Can't read {}: {}", contents.path, e); return },
Err(e) => { error!("Can't read {}: {}", contents.unwrap().path, e); return },
Ok(x) => x,
};
@ -110,12 +122,13 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents:
}
if let Some(cap) = kv.captures(&line) {
let val = str::from_utf8(cap.at(2).unwrap()).unwrap();
match str::from_utf8(cap.at(1).unwrap()).unwrap() {
"Package" => pkg.name = Some(val.to_string()),
"Section" => pkg.section = Some(val.to_string()),
"Version" => pkg.version = Some(val.to_string()),
"Architecture" => pkg.arch = Some(val.to_string()),
"Filename" => pkg.filename = Some(val.to_string()),
// Use case-insensitive matching, older package archives used lowercase keys
match str::from_utf8(cap.at(1).unwrap()).unwrap().to_lowercase().as_ref() {
"package" => pkg.name = Some(val.to_string()),
"section" => pkg.section = Some(val.to_string()),
"version" => pkg.version = Some(val.to_string()),
"architecture" => pkg.arch = Some(val.to_string()),
"filename" => pkg.filename = Some(val.to_string()),
_ => {}
}
}