indexer: Add support for indexing RPM repositories
This code hasn't been thoroughly tested, I'll see how things go when indexing a live repo. And XML parsing sucks in every language.
This commit is contained in:
parent
f77db5f541
commit
608f79eb93
4 changed files with 200 additions and 0 deletions
10
indexer/Cargo.lock
generated
10
indexer/Cargo.lock
generated
|
|
@ -12,6 +12,7 @@ dependencies = [
|
||||||
"libc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)",
|
"libc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"postgres 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
"postgres 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"quick-xml 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"ring 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"ring 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"url 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
"url 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
|
@ -350,6 +351,14 @@ dependencies = [
|
||||||
"postgres-protocol 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"postgres-protocol 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quick-xml"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redox_syscall"
|
name = "redox_syscall"
|
||||||
version = "0.1.16"
|
version = "0.1.16"
|
||||||
|
|
@ -575,6 +584,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
"checksum postgres 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585ca978431cddac0aa926246f18fe30a47401eabbe9bbda573dc60389c10ea1"
|
"checksum postgres 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585ca978431cddac0aa926246f18fe30a47401eabbe9bbda573dc60389c10ea1"
|
||||||
"checksum postgres-protocol 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "283e27d237a5772ef00c9e3f97e632f9a565ff514761af3e88e129576af7077c"
|
"checksum postgres-protocol 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "283e27d237a5772ef00c9e3f97e632f9a565ff514761af3e88e129576af7077c"
|
||||||
"checksum postgres-shared 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6f09b8819c2586032ed23bfbe95f6edfbebdc18bf9d0fe02c1f785f659958fbb"
|
"checksum postgres-shared 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6f09b8819c2586032ed23bfbe95f6edfbebdc18bf9d0fe02c1f785f659958fbb"
|
||||||
|
"checksum quick-xml 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e685d9ea689e56229debf59cb6d24e28021a9c950bbd988af24e43da3ea2bd79"
|
||||||
"checksum redox_syscall 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "8dd35cc9a8bdec562c757e3d43c1526b5c6d2653e23e2315065bc25556550753"
|
"checksum redox_syscall 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "8dd35cc9a8bdec562c757e3d43c1526b5c6d2653e23e2315065bc25556550753"
|
||||||
"checksum regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4278c17d0f6d62dfef0ab00028feb45bd7d2102843f80763474eeb1be8a10c01"
|
"checksum regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4278c17d0f6d62dfef0ab00028feb45bd7d2102843f80763474eeb1be8a10c01"
|
||||||
"checksum regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9191b1f57603095f105d317e375d19b1c9c5c3185ea9633a99a6dcbed04457"
|
"checksum regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9191b1f57603095f105d317e375d19b1c9c5c3185ea9633a99a6dcbed04457"
|
||||||
|
|
|
||||||
|
|
@ -17,3 +17,4 @@ clap = "2.20.0"
|
||||||
hyper = { version = "0.10.0", default-features = false }
|
hyper = { version = "0.10.0", default-features = false }
|
||||||
url = "1.2.3"
|
url = "1.2.3"
|
||||||
chrono = "0.2.25"
|
chrono = "0.2.25"
|
||||||
|
quick-xml = "0.5.0"
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ extern crate postgres;
|
||||||
extern crate hyper;
|
extern crate hyper;
|
||||||
extern crate url;
|
extern crate url;
|
||||||
extern crate chrono;
|
extern crate chrono;
|
||||||
|
extern crate quick_xml;
|
||||||
|
|
||||||
mod archive;
|
mod archive;
|
||||||
mod archread;
|
mod archread;
|
||||||
|
|
@ -22,6 +23,7 @@ mod sys_deb;
|
||||||
mod sys_freebsd1;
|
mod sys_freebsd1;
|
||||||
mod sys_freebsd2;
|
mod sys_freebsd2;
|
||||||
mod sys_rpmdir;
|
mod sys_rpmdir;
|
||||||
|
mod sys_rpm;
|
||||||
|
|
||||||
|
|
||||||
// Convenience function to get a system id by short-name. Panics if the system doesn't exist.
|
// Convenience function to get a system id by short-name. Panics if the system doesn't exist.
|
||||||
|
|
@ -80,6 +82,12 @@ fn main() {
|
||||||
(@arg cat: --cat +required +takes_value "Category to set for all packages")
|
(@arg cat: --cat +required +takes_value "Category to set for all packages")
|
||||||
(@arg mirror: --mirror +required +takes_value "Mirror URL")
|
(@arg mirror: --mirror +required +takes_value "Mirror URL")
|
||||||
)
|
)
|
||||||
|
(@subcommand rpm =>
|
||||||
|
(about: "Index an RPM repository")
|
||||||
|
(@arg sys: --sys +required +takes_value "System short-name")
|
||||||
|
(@arg cat: --cat +required +takes_value "Category to set for all packages")
|
||||||
|
(@arg mirror: --mirror +required +takes_value "Mirror URL")
|
||||||
|
)
|
||||||
).get_matches();
|
).get_matches();
|
||||||
|
|
||||||
unsafe { pkg::DRY_RUN = arg.is_present("dry") };
|
unsafe { pkg::DRY_RUN = arg.is_present("dry") };
|
||||||
|
|
@ -169,5 +177,13 @@ fn main() {
|
||||||
).unwrap_or_else(|e| error!("{}", e));
|
).unwrap_or_else(|e| error!("{}", e));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(matches) = arg.subcommand_matches("rpm") {
|
||||||
|
sys_rpm::sync(&db,
|
||||||
|
sysbyshort(&db, matches.value_of("sys").unwrap()),
|
||||||
|
matches.value_of("cat").unwrap(),
|
||||||
|
matches.value_of("mirror").unwrap()
|
||||||
|
).unwrap_or_else(|e| error!("{}", e));
|
||||||
|
}
|
||||||
|
|
||||||
trace!("Exiting");
|
trace!("Exiting");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
173
indexer/src/sys_rpm.rs
Normal file
173
indexer/src/sys_rpm.rs
Normal file
|
|
@ -0,0 +1,173 @@
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::io::BufReader;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::error::Error;
|
||||||
|
use chrono::NaiveDateTime;
|
||||||
|
use postgres;
|
||||||
|
use quick_xml as xml;
|
||||||
|
|
||||||
|
use archive;
|
||||||
|
use open;
|
||||||
|
use pkg;
|
||||||
|
use man;
|
||||||
|
|
||||||
|
|
||||||
|
fn xml_getattr(e: &xml::Element, attr: &str) -> Result<String,Box<Error>> {
|
||||||
|
for kv in e.unescaped_attributes() {
|
||||||
|
let (key, val) = kv.map_err(|(e,_)| e)?;
|
||||||
|
if key == attr.as_bytes() {
|
||||||
|
return Ok(String::from_utf8(val.into_owned())?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(Box::new(xml::error::Error::EOL))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
struct PkgInfo {
|
||||||
|
name: Option<String>,
|
||||||
|
arch: Option<String>,
|
||||||
|
ver: Option<String>,
|
||||||
|
date: Option<i64>,
|
||||||
|
path: Option<String>,
|
||||||
|
hasman: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Shared function to read primary.xml.gz and filelists.xml.gz. Runs the callback for each package
|
||||||
|
// with the info that was found.
|
||||||
|
fn readpkgs<F>(url: String, mut cb: F) -> Result<(),Box<Error>>
|
||||||
|
where F: FnMut(PkgInfo)
|
||||||
|
{
|
||||||
|
debug!("Reading {}", url);
|
||||||
|
let mut fd = open::Path{path: &url, cache: true, canbelocal: false}.open()?;
|
||||||
|
let xml = xml::XmlReader::from_reader(
|
||||||
|
BufReader::new(
|
||||||
|
archive::Archive::open_raw(&mut fd)?
|
||||||
|
)
|
||||||
|
).trim_text(true);
|
||||||
|
|
||||||
|
let mut savestr = false;
|
||||||
|
let mut saved = None;
|
||||||
|
let mut pkg = PkgInfo::default();
|
||||||
|
|
||||||
|
let arch_src = Some("src".to_string());
|
||||||
|
|
||||||
|
for event in xml {
|
||||||
|
let event = event.map_err(|(e,_)| e)?;
|
||||||
|
match event {
|
||||||
|
|
||||||
|
xml::Event::Start(ref e) =>
|
||||||
|
match e.name() {
|
||||||
|
b"name" |
|
||||||
|
b"file" |
|
||||||
|
b"arch" => savestr = true,
|
||||||
|
b"version" => pkg.ver = Some(format!("{}-{}", xml_getattr(e, "ver")?, xml_getattr(e, "rel")?)),
|
||||||
|
b"location" => pkg.path = Some(xml_getattr(e, "href")?),
|
||||||
|
b"time" => pkg.date = Some(i64::from_str(&xml_getattr(e, "build")?)?),
|
||||||
|
b"package" => {
|
||||||
|
pkg.name = xml_getattr(e, "name").ok();
|
||||||
|
pkg.arch = xml_getattr(e, "arch").ok();
|
||||||
|
},
|
||||||
|
_ => (),
|
||||||
|
},
|
||||||
|
|
||||||
|
xml::Event::Text(e) =>
|
||||||
|
if savestr {
|
||||||
|
saved = Some(e.into_unescaped_string()?);
|
||||||
|
savestr = false
|
||||||
|
},
|
||||||
|
|
||||||
|
xml::Event::End(ref e) => {
|
||||||
|
savestr = false;
|
||||||
|
match e.name() {
|
||||||
|
b"name" => pkg.name = Some(saved.take().unwrap()),
|
||||||
|
b"arch" => pkg.arch = Some(saved.take().unwrap()),
|
||||||
|
b"file" => pkg.hasman = pkg.hasman || man::ismanpath(&saved.take().unwrap()),
|
||||||
|
b"package" => {
|
||||||
|
if pkg.arch != arch_src {
|
||||||
|
cb(pkg);
|
||||||
|
}
|
||||||
|
pkg = PkgInfo::default();
|
||||||
|
},
|
||||||
|
_ => (),
|
||||||
|
};
|
||||||
|
},
|
||||||
|
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Reads repomd.xml and returns the path to the primary.xml.gz and filelists.xml.gz
|
||||||
|
fn repomd(url: String) -> Result<(String,String),Box<Error>> {
|
||||||
|
debug!("Reading {}", url);
|
||||||
|
let mut fd = open::Path{path: &url, cache: true, canbelocal: false}.open()?;
|
||||||
|
let xml = xml::XmlReader::from_reader(
|
||||||
|
BufReader::new(
|
||||||
|
archive::Archive::open_raw(&mut fd)?
|
||||||
|
)
|
||||||
|
).trim_text(true);
|
||||||
|
|
||||||
|
let mut primary = String::new();
|
||||||
|
let mut filelists = String::new();
|
||||||
|
let mut datatype = 0;
|
||||||
|
|
||||||
|
for event in xml {
|
||||||
|
if let xml::Event::Start(ref e) = event.map_err(|(e,_)| e)? {
|
||||||
|
match e.name() {
|
||||||
|
b"data" =>
|
||||||
|
datatype = match &xml_getattr(e, "type")? as &str {
|
||||||
|
"primary" => 1,
|
||||||
|
"filelists" => 2,
|
||||||
|
_ => 0,
|
||||||
|
},
|
||||||
|
|
||||||
|
b"location" =>
|
||||||
|
match datatype {
|
||||||
|
1 => primary = xml_getattr(e, "href")?,
|
||||||
|
2 => filelists = xml_getattr(e, "href")?,
|
||||||
|
_ => (),
|
||||||
|
},
|
||||||
|
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok((primary, filelists))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
pub fn sync(pg: &postgres::GenericConnection, sys: i32, cat: &str, mirror: &str) -> Result<(),Box<Error>> {
|
||||||
|
let(primary, filelists) = repomd(format!("{}repodata/repomd.xml", mirror))?;
|
||||||
|
|
||||||
|
let mut pkgswithman = HashSet::new();
|
||||||
|
readpkgs(format!("{}{}", mirror, filelists), |pkg| {
|
||||||
|
if pkg.hasman { pkgswithman.insert(pkg.name.unwrap()); () }
|
||||||
|
})?;
|
||||||
|
|
||||||
|
readpkgs(format!("{}{}", mirror, primary), |pkg| {
|
||||||
|
let name = pkg.name.unwrap();
|
||||||
|
if pkgswithman.contains(&name) {
|
||||||
|
let uri = format!("{}{}", mirror, pkg.path.unwrap());
|
||||||
|
let date = NaiveDateTime::from_timestamp(pkg.date.unwrap(), 0).format("%Y-%m-%d").to_string();
|
||||||
|
pkg::pkg(pg, pkg::PkgOpt{
|
||||||
|
force: false,
|
||||||
|
sys: sys,
|
||||||
|
cat: cat,
|
||||||
|
pkg: &name,
|
||||||
|
ver: &pkg.ver.unwrap(),
|
||||||
|
date: pkg::Date::Known(&date),
|
||||||
|
arch: Some(&pkg.arch.unwrap()),
|
||||||
|
file: open::Path{
|
||||||
|
path: &uri,
|
||||||
|
cache: false,
|
||||||
|
canbelocal: false,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue