Indexer: Add --dryrun and workarounds for old deb repos
This commit is contained in:
parent
ecb1a9e25b
commit
5d44d0e2ec
4 changed files with 41 additions and 16 deletions
|
|
@ -35,6 +35,7 @@ fn main() {
|
||||||
let arg = clap_app!(indexer =>
|
let arg = clap_app!(indexer =>
|
||||||
(about: "Manned.org man page indexer")
|
(about: "Manned.org man page indexer")
|
||||||
(@arg v: -v +multiple "Increase verbosity")
|
(@arg v: -v +multiple "Increase verbosity")
|
||||||
|
(@arg dry: --dryrun "Don't actually download and index packages")
|
||||||
(@subcommand pkg =>
|
(@subcommand pkg =>
|
||||||
(about: "Index a single package")
|
(about: "Index a single package")
|
||||||
(@arg force: --force "Overwrite existing indexed package")
|
(@arg force: --force "Overwrite existing indexed package")
|
||||||
|
|
@ -56,11 +57,13 @@ fn main() {
|
||||||
(about: "Index a Debian repository")
|
(about: "Index a Debian repository")
|
||||||
(@arg sys: --sys +required +takes_value "System short-name")
|
(@arg sys: --sys +required +takes_value "System short-name")
|
||||||
(@arg mirror: --mirror +required +takes_value "Mirror URL")
|
(@arg mirror: --mirror +required +takes_value "Mirror URL")
|
||||||
(@arg contents: --contents +required +takes_value "Contents file")
|
(@arg contents: --contents +takes_value "Contents file")
|
||||||
(@arg packages: --packages +required +takes_value "Packages file")
|
(@arg packages: --packages +required +takes_value "Packages file")
|
||||||
)
|
)
|
||||||
).get_matches();
|
).get_matches();
|
||||||
|
|
||||||
|
unsafe { pkg::DRY_RUN = arg.is_present("dry") };
|
||||||
|
|
||||||
let verbose = arg.occurrences_of("v");
|
let verbose = arg.occurrences_of("v");
|
||||||
env_logger::LogBuilder::new()
|
env_logger::LogBuilder::new()
|
||||||
.filter(Some("indexer"), match verbose {
|
.filter(Some("indexer"), match verbose {
|
||||||
|
|
@ -85,7 +88,7 @@ fn main() {
|
||||||
Ok(x) => x,
|
Ok(x) => x,
|
||||||
Err(x) => { error!("Can't connect to postgres: {}", x); return },
|
Err(x) => { error!("Can't connect to postgres: {}", x); return },
|
||||||
};
|
};
|
||||||
debug!("Connected to database");
|
trace!("Connected to database");
|
||||||
|
|
||||||
if let Some(matches) = arg.subcommand_matches("pkg") {
|
if let Some(matches) = arg.subcommand_matches("pkg") {
|
||||||
let date = match matches.value_of("date").unwrap() {
|
let date = match matches.value_of("date").unwrap() {
|
||||||
|
|
@ -116,8 +119,10 @@ fn main() {
|
||||||
sys_deb::sync(&db,
|
sys_deb::sync(&db,
|
||||||
sysbyshort(&db, matches.value_of("sys").unwrap()),
|
sysbyshort(&db, matches.value_of("sys").unwrap()),
|
||||||
matches.value_of("mirror").unwrap(),
|
matches.value_of("mirror").unwrap(),
|
||||||
open::Path{ path: matches.value_of("contents").unwrap(), cache: true, canbelocal: true},
|
matches.value_of("contents").map(|e| { open::Path{ path: e, cache: true, canbelocal: true} }),
|
||||||
open::Path{ path: matches.value_of("packages").unwrap(), cache: true, canbelocal: true},
|
open::Path{ path: matches.value_of("packages").unwrap(), cache: true, canbelocal: true},
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
trace!("Exiting");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -204,6 +204,7 @@ pub fn decode(paths: &[&str], ent: &mut Read) -> io::Result<(digest::Digest,&'st
|
||||||
let dig = digest::digest(&digest::SHA1, &data);
|
let dig = digest::digest(&digest::SHA1, &data);
|
||||||
|
|
||||||
// TODO: Handle BOM? UTF-16?
|
// TODO: Handle BOM? UTF-16?
|
||||||
|
// TODO: This fails badly for ISO-2022-JP. How the hell do we cleanly fix that?
|
||||||
// If it passes as UTF-8, then just consider it UTF-8.
|
// If it passes as UTF-8, then just consider it UTF-8.
|
||||||
if let Ok(_) = str::from_utf8(&data) {
|
if let Ok(_) = str::from_utf8(&data) {
|
||||||
return Ok((dig, "utf8", unsafe { String::from_utf8_unchecked(data) } ));
|
return Ok((dig, "utf8", unsafe { String::from_utf8_unchecked(data) } ));
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,8 @@ use archread;
|
||||||
use man;
|
use man;
|
||||||
use archive::{Format,Archive,ArchiveEntry};
|
use archive::{Format,Archive,ArchiveEntry};
|
||||||
|
|
||||||
|
pub static mut DRY_RUN: bool = false;
|
||||||
|
|
||||||
|
|
||||||
#[derive(Debug,Clone,Copy)]
|
#[derive(Debug,Clone,Copy)]
|
||||||
pub enum Date<'a> {
|
pub enum Date<'a> {
|
||||||
|
|
@ -72,6 +74,7 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i
|
||||||
Some(verid)
|
Some(verid)
|
||||||
|
|
||||||
} else if opt.force {
|
} else if opt.force {
|
||||||
|
// XXX: Should we update released & arch here?
|
||||||
verid = res.get(0).get(0);
|
verid = res.get(0).get(0);
|
||||||
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
|
||||||
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
|
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
|
||||||
|
|
@ -198,6 +201,9 @@ pub fn pkg(conn: &postgres::GenericConnection, opt: PkgOpt) {
|
||||||
tr.set_rollback();
|
tr.set_rollback();
|
||||||
|
|
||||||
let verid = match insert_pkg(&tr, &opt) { Some(x) => x, None => return };
|
let verid = match insert_pkg(&tr, &opt) { Some(x) => x, None => return };
|
||||||
|
if unsafe { DRY_RUN } {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
match index_pkg(&tr, opt, verid) {
|
match index_pkg(&tr, opt, verid) {
|
||||||
Err(e) => error!("Error reading package: {}", e),
|
Err(e) => error!("Error reading package: {}", e),
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ use std::io::{Result,BufReader,BufRead};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::str;
|
use std::str;
|
||||||
use postgres;
|
use postgres;
|
||||||
|
use regex;
|
||||||
use regex::bytes::Regex;
|
use regex::bytes::Regex;
|
||||||
|
|
||||||
use man;
|
use man;
|
||||||
|
|
@ -11,7 +12,8 @@ use archive;
|
||||||
|
|
||||||
// Reference: https://wiki.debian.org/RepositoryFormat
|
// Reference: https://wiki.debian.org/RepositoryFormat
|
||||||
|
|
||||||
fn get_contents(f: open::Path) -> Result<HashSet<String>> {
|
fn get_contents(f: Option<open::Path>) -> Result<HashSet<String>> {
|
||||||
|
let f = match f { Some(f) => f, None => return Ok(HashSet::new()) };
|
||||||
let mut fd = f.open()?;
|
let mut fd = f.open()?;
|
||||||
let rd = archive::Archive::open_raw(&mut fd)?;
|
let rd = archive::Archive::open_raw(&mut fd)?;
|
||||||
let brd = BufReader::new(rd);
|
let brd = BufReader::new(rd);
|
||||||
|
|
@ -53,14 +55,24 @@ struct Pkg {
|
||||||
|
|
||||||
fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs: &HashSet<String>, pkg: &Pkg) {
|
fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs: &HashSet<String>, pkg: &Pkg) {
|
||||||
let name = match pkg.name { Some(ref x) => x, None => return };
|
let name = match pkg.name { Some(ref x) => x, None => return };
|
||||||
if !manpkgs.contains(name) {
|
if manpkgs.len() > 0 && !manpkgs.contains(name) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
let section = match pkg.section { Some(ref x) => x, None => { error!("Package {} has no section", name); return } };
|
let section = match pkg.section { Some(ref x) => x, None => { error!("Package {} has no section", name); return } };
|
||||||
let arch = match pkg.arch { Some(ref x) => x, None => { error!("Package {} has no arch", name); return } };
|
|
||||||
let version = match pkg.version { Some(ref x) => x, None => { error!("Package {} has no version", name); return } };
|
let version = match pkg.version { Some(ref x) => x, None => { error!("Package {} has no version", name); return } };
|
||||||
let filename = match pkg.filename { Some(ref x) => x, None => { error!("Package {} has no filename", name); return } };
|
let filename = match pkg.filename { Some(ref x) => x, None => { error!("Package {} has no filename", name); return } };
|
||||||
let uri = format!("{}{}", mirror, filename);
|
|
||||||
|
// Workarounds for some bad repos
|
||||||
|
let uri = if sys == 18 || sys == 19 {
|
||||||
|
let filename = regex::Regex::new("^(Debian-1.[12])/").unwrap().replace(filename, "dists/$1/main/");
|
||||||
|
if filename.starts_with("contrib/") {
|
||||||
|
format!("{}dists/Debian-1.{}/{}", mirror, if sys == 18 { 1 } else { 2 }, filename)
|
||||||
|
} else {
|
||||||
|
format!("{}{}", mirror, filename)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
format!("{}{}", mirror, filename)
|
||||||
|
};
|
||||||
|
|
||||||
pkg::pkg(pg, pkg::PkgOpt{
|
pkg::pkg(pg, pkg::PkgOpt{
|
||||||
force: false,
|
force: false,
|
||||||
|
|
@ -69,7 +81,7 @@ fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs:
|
||||||
pkg: &name,
|
pkg: &name,
|
||||||
ver: &version,
|
ver: &version,
|
||||||
date: pkg::Date::Deb,
|
date: pkg::Date::Deb,
|
||||||
arch: Some(arch),
|
arch: pkg.arch.as_ref().map(|e| &e[..]),
|
||||||
file: open::Path{
|
file: open::Path{
|
||||||
path: &uri,
|
path: &uri,
|
||||||
cache: false,
|
cache: false,
|
||||||
|
|
@ -79,9 +91,9 @@ fn handlepkg(pg: &postgres::GenericConnection, sys: i32, mirror: &str, manpkgs:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents: open::Path, packages: open::Path) {
|
pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents: Option<open::Path>, packages: open::Path) {
|
||||||
let manpkgs = match get_contents(contents) {
|
let manpkgs = match get_contents(contents) {
|
||||||
Err(e) => { error!("Can't read {}: {}", contents.path, e); return },
|
Err(e) => { error!("Can't read {}: {}", contents.unwrap().path, e); return },
|
||||||
Ok(x) => x,
|
Ok(x) => x,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -110,12 +122,13 @@ pub fn sync(pg: &postgres::GenericConnection, sys: i32, mirror: &str, contents:
|
||||||
}
|
}
|
||||||
if let Some(cap) = kv.captures(&line) {
|
if let Some(cap) = kv.captures(&line) {
|
||||||
let val = str::from_utf8(cap.at(2).unwrap()).unwrap();
|
let val = str::from_utf8(cap.at(2).unwrap()).unwrap();
|
||||||
match str::from_utf8(cap.at(1).unwrap()).unwrap() {
|
// Use case-insensitive matching, older package archives used lowercase keys
|
||||||
"Package" => pkg.name = Some(val.to_string()),
|
match str::from_utf8(cap.at(1).unwrap()).unwrap().to_lowercase().as_ref() {
|
||||||
"Section" => pkg.section = Some(val.to_string()),
|
"package" => pkg.name = Some(val.to_string()),
|
||||||
"Version" => pkg.version = Some(val.to_string()),
|
"section" => pkg.section = Some(val.to_string()),
|
||||||
"Architecture" => pkg.arch = Some(val.to_string()),
|
"version" => pkg.version = Some(val.to_string()),
|
||||||
"Filename" => pkg.filename = Some(val.to_string()),
|
"architecture" => pkg.arch = Some(val.to_string()),
|
||||||
|
"filename" => pkg.filename = Some(val.to_string()),
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue