Indexer: Add --dryrun and workarounds for old deb repos
This commit is contained in:
parent
ecb1a9e25b
commit
5d44d0e2ec
4 changed files with 41 additions and 16 deletions
|
|
@ -35,6 +35,7 @@ fn main() {
|
|||
let arg = clap_app!(indexer =>
|
||||
(about: "Manned.org man page indexer")
|
||||
(@arg v: -v +multiple "Increase verbosity")
|
||||
(@arg dry: --dryrun "Don't actually download and index packages")
|
||||
(@subcommand pkg =>
|
||||
(about: "Index a single package")
|
||||
(@arg force: --force "Overwrite existing indexed package")
|
||||
|
|
@ -56,11 +57,13 @@ fn main() {
|
|||
(about: "Index a Debian repository")
|
||||
(@arg sys: --sys +required +takes_value "System short-name")
|
||||
(@arg mirror: --mirror +required +takes_value "Mirror URL")
|
||||
(@arg contents: --contents +required +takes_value "Contents file")
|
||||
(@arg contents: --contents +takes_value "Contents file")
|
||||
(@arg packages: --packages +required +takes_value "Packages file")
|
||||
)
|
||||
).get_matches();
|
||||
|
||||
unsafe { pkg::DRY_RUN = arg.is_present("dry") };
|
||||
|
||||
let verbose = arg.occurrences_of("v");
|
||||
env_logger::LogBuilder::new()
|
||||
.filter(Some("indexer"), match verbose {
|
||||
|
|
@ -85,7 +88,7 @@ fn main() {
|
|||
Ok(x) => x,
|
||||
Err(x) => { error!("Can't connect to postgres: {}", x); return },
|
||||
};
|
||||
debug!("Connected to database");
|
||||
trace!("Connected to database");
|
||||
|
||||
if let Some(matches) = arg.subcommand_matches("pkg") {
|
||||
let date = match matches.value_of("date").unwrap() {
|
||||
|
|
@ -116,8 +119,10 @@ fn main() {
|
|||
sys_deb::sync(&db,
|
||||
sysbyshort(&db, matches.value_of("sys").unwrap()),
|
||||
matches.value_of("mirror").unwrap(),
|
||||
open::Path{ path: matches.value_of("contents").unwrap(), cache: true, canbelocal: true},
|
||||
matches.value_of("contents").map(|e| { open::Path{ path: e, cache: true, canbelocal: true} }),
|
||||
open::Path{ path: matches.value_of("packages").unwrap(), cache: true, canbelocal: true},
|
||||
);
|
||||
}
|
||||
|
||||
trace!("Exiting");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -204,6 +204,7 @@ pub fn decode(paths: &[&str], ent: &mut Read) -> io::Result<(digest::Digest,&'st
|
|||
let dig = digest::digest(&digest::SHA1, &data);
|
||||
|
||||
// TODO: Handle BOM? UTF-16?
|
||||
// TODO: This fails badly for ISO-2022-JP. How the hell do we cleanly fix that?
|
||||
// If it passes as UTF-8, then just consider it UTF-8.
|
||||
if let Ok(_) = str::from_utf8(&data) {
|
||||
return Ok((dig, "utf8", unsafe { String::from_utf8_unchecked(data) } ));
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ use archread;
|
|||
use man;
|
||||
use archive::{Format,Archive,ArchiveEntry};
|
||||
|
||||
pub static mut DRY_RUN: bool = false;
|
||||
|
||||
|
||||
#[derive(Debug,Clone,Copy)]
|
||||
pub enum Date<'a> {
|
||||
|
|
@ -72,6 +74,7 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i
|
|||
Some(verid)
|
||||
|
||||
} else if opt.force {
|
||||
// XXX: Should we update released & arch here?
|
||||
verid = res.get(0).get(0);
|
||||
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
||||
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
|
||||
|
|
@ -198,6 +201,9 @@ pub fn pkg(conn: &postgres::GenericConnection, opt: PkgOpt) {
|
|||
tr.set_rollback();
|
||||
|
||||
let verid = match insert_pkg(&tr, &opt) { Some(x) => x, None => return };
|
||||
if unsafe { DRY_RUN } {
|
||||
return;
|
||||
}
|
||||
|
||||
match index_pkg(&tr, opt, verid) {
|
||||
Err(e) => error!("Error reading package: {}", e),
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ use std::io::{Result,BufReader,BufRead};
|
|||
use std::collections::HashSet;
|
||||
use std::str;
|
||||
use postgres;
|
||||
use regex;
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use man;
|
||||
|
|
@ -11,7 +12,8 @@ use archive;
|
|||
|
||||
// Reference: https://wiki.debian.org/RepositoryFormat
|
||||
|
||||
fn get_contents(f: open::Path) -> Result<HashSet<String>> {
|
||||
fn get_contents(f: Option<open::Path>) -> Result<HashSet<String>> {
|
||||
let f = match f { Some(f) => f, None => return Ok(HashSet::new()) };
|
||||
let mut fd = f.open()?;
|
||||
let rd = archive::Archive::open_raw(&mut fd)?;
|
||||
let brd = BufReader::new(rd);
|
||||
|
|
@ -53,14 +55,24 @@ struct Pkg {
|
|||
|
||||
fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs: &HashSet<String>, pkg: &Pkg) {
|
||||
let name = match pkg.name { Some(ref x) => x, None => return };
|
||||
if !manpkgs.contains(name) {
|
||||
if manpkgs.len() > 0 && !manpkgs.contains(name) {
|
||||
return
|
||||
}
|
||||
let section = match pkg.section { Some(ref x) => x, None => { error!("Package {} has no section", name); return } };
|
||||
let arch = match pkg.arch { Some(ref x) => x, None => { error!("Package {} has no arch", name); return } };
|
||||
let version = match pkg.version { Some(ref x) => x, None => { error!("Package {} has no version", name); return } };
|
||||
let filename = match pkg.filename { Some(ref x) => x, None => { error!("Package {} has no filename", name); return } };
|
||||
let uri = format!("{}{}", mirror, filename);
|
||||
|
||||
// Workarounds for some bad repos
|
||||
let uri = if sys == 18 || sys == 19 {
|
||||
let filename = regex::Regex::new("^(Debian-1.[12])/").unwrap().replace(filename, "dists/$1/main/");
|
||||
if filename.starts_with("contrib/") {
|
||||
format!("{}dists/Debian-1.{}/{}", mirror, if sys == 18 { 1 } else { 2 }, filename)
|
||||
} else {
|
||||
format!("{}{}", mirror, filename)
|
||||
}
|
||||
} else {
|
||||
format!("{}{}", mirror, filename)
|
||||
};
|
||||
|
||||
pkg::pkg(pg, pkg::PkgOpt{
|
||||
force: false,
|
||||
|
|
@ -69,7 +81,7 @@ fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs:
|
|||
pkg: &name,
|
||||
ver: &version,
|
||||
date: pkg::Date::Deb,
|
||||
arch: Some(arch),
|
||||
arch: pkg.arch.as_ref().map(|e| &e[..]),
|
||||
file: open::Path{
|
||||
path: &uri,
|
||||
cache: false,
|
||||
|
|
@ -79,9 +91,9 @@ fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs:
|
|||
}
|
||||
|
||||
|
||||
pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents: open::Path, packages: open::Path) {
|
||||
pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents: Option<open::Path>, packages: open::Path) {
|
||||
let manpkgs = match get_contents(contents) {
|
||||
Err(e) => { error!("Can't read {}: {}", contents.path, e); return },
|
||||
Err(e) => { error!("Can't read {}: {}", contents.unwrap().path, e); return },
|
||||
Ok(x) => x,
|
||||
};
|
||||
|
||||
|
|
@ -110,12 +122,13 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents:
|
|||
}
|
||||
if let Some(cap) = kv.captures(&line) {
|
||||
let val = str::from_utf8(cap.at(2).unwrap()).unwrap();
|
||||
match str::from_utf8(cap.at(1).unwrap()).unwrap() {
|
||||
"Package" => pkg.name = Some(val.to_string()),
|
||||
"Section" => pkg.section = Some(val.to_string()),
|
||||
"Version" => pkg.version = Some(val.to_string()),
|
||||
"Architecture" => pkg.arch = Some(val.to_string()),
|
||||
"Filename" => pkg.filename = Some(val.to_string()),
|
||||
// Use case-insensitive matching, older package archives used lowercase keys
|
||||
match str::from_utf8(cap.at(1).unwrap()).unwrap().to_lowercase().as_ref() {
|
||||
"package" => pkg.name = Some(val.to_string()),
|
||||
"section" => pkg.section = Some(val.to_string()),
|
||||
"version" => pkg.version = Some(val.to_string()),
|
||||
"architecture" => pkg.arch = Some(val.to_string()),
|
||||
"filename" => pkg.filename = Some(val.to_string()),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue