indexer: Support for indexing FreeBSD <9.3 repositories
This commit is contained in:
parent
1ca0cd4325
commit
defaa032f8
9 changed files with 892 additions and 826 deletions
|
|
@ -19,6 +19,7 @@ mod open;
|
|||
mod pkg;
|
||||
mod sys_arch;
|
||||
mod sys_deb;
|
||||
mod sys_freebsd1;
|
||||
|
||||
|
||||
// Convenience function to get a system id by short-name. Panics if the system doesn't exist.
|
||||
|
|
@ -60,6 +61,12 @@ fn main() {
|
|||
(@arg contents: --contents +takes_value "Contents file")
|
||||
(@arg packages: --packages +required +takes_value "Packages file")
|
||||
)
|
||||
(@subcommand freebsd1 =>
|
||||
(about: "Index packages from a FreeBSD <= 9.2 package repo")
|
||||
(@arg sys: --sys +required +takes_value "System short-name")
|
||||
(@arg mirror: --mirror +required +takes_value "Mirror URL (should point to the packages/ dir)")
|
||||
(@arg arch: --arch +required +takes_value "Arch")
|
||||
)
|
||||
).get_matches();
|
||||
|
||||
unsafe { pkg::DRY_RUN = arg.is_present("dry") };
|
||||
|
|
@ -93,6 +100,7 @@ fn main() {
|
|||
if let Some(matches) = arg.subcommand_matches("pkg") {
|
||||
let date = match matches.value_of("date").unwrap() {
|
||||
"deb" => pkg::Date::Deb,
|
||||
"desc" => pkg::Date::Desc,
|
||||
s => pkg::Date::Known(s),
|
||||
};
|
||||
pkg::pkg(&db, pkg::PkgOpt {
|
||||
|
|
@ -124,5 +132,13 @@ fn main() {
|
|||
);
|
||||
}
|
||||
|
||||
if let Some(matches) = arg.subcommand_matches("freebsd1") {
|
||||
sys_freebsd1::sync(&db,
|
||||
sysbyshort(&db, matches.value_of("sys").unwrap()),
|
||||
matches.value_of("arch").unwrap(),
|
||||
matches.value_of("mirror").unwrap()
|
||||
).unwrap_or_else(|e| error!("{}", e));
|
||||
}
|
||||
|
||||
trace!("Exiting");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
use std::io::{Read,Result,Error,ErrorKind,copy};
|
||||
use std::io::{BufRead,BufReader,Read,Result,Error,ErrorKind,copy};
|
||||
use std::fs::{File,create_dir_all,metadata,read_dir,remove_file};
|
||||
use std::hash::{Hash,Hasher,SipHasher};
|
||||
use std::time::{Duration,SystemTime};
|
||||
use regex::bytes::Regex;
|
||||
use url::Url;
|
||||
use url::percent_encoding::percent_decode;
|
||||
use hyper;
|
||||
|
||||
|
||||
|
|
@ -90,4 +92,41 @@ impl<'a> Path<'a> {
|
|||
Err(Error::new(ErrorKind::Other, "Invalid URL"))
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to parse a HTTP directory listing. Returns the name and whether it's a directory for
|
||||
// each item.
|
||||
// Only tested with a lighttpd/1.4 and apache 2.4 server.
|
||||
// (I tried using FTP before, but that didn't work out well; While FTP does return a more easily
|
||||
// parsable file list, some servers have issues with generating a list of a large directory)
|
||||
pub fn dirlist(&self) -> Result<Vec<(String,bool)>> {
|
||||
lazy_static!(
|
||||
static ref RE: Regex = Regex::new("(?i:<a +href *= *\"([^?/\"]+)(/?)\">)").unwrap();
|
||||
);
|
||||
let rd = self.open()?;
|
||||
let brd = BufReader::new(rd);
|
||||
let mut res = Vec::new();
|
||||
for line in brd.split(b'\n') {
|
||||
let line = line?;
|
||||
let mut matches = RE.captures_iter(&line);
|
||||
let first = matches.next();
|
||||
|
||||
// There's only a single link per line.
|
||||
if first.is_some() && matches.next().is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(cap) = first {
|
||||
let name = cap.at(1).unwrap();
|
||||
if name == b".." || name.starts_with(b"/") {
|
||||
continue;
|
||||
}
|
||||
if let Ok(name) = percent_decode(name).decode_utf8() {
|
||||
let isdir = cap.at(2) == Some(b"/");
|
||||
res.push((name.to_string(), isdir));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ pub enum Date<'a> {
|
|||
Known(&'a str), // Given in PkgOpt
|
||||
Found(i64), // Found in package
|
||||
Deb, // Should be read from the timestamp of the 'debian-binary' file
|
||||
Desc, // Should be read from the '+DESC' file (FreeBSD <= 9.2)
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -24,6 +25,7 @@ impl<'a> Date<'a> {
|
|||
// TODO: Validate that the mtime() date is sensible (e.g. 1990 < date < now)
|
||||
*self = match *self {
|
||||
Date::Deb if ent.format() == Format::Ar && ent.path() == Some("debian-binary") => Date::Found(ent.mtime()),
|
||||
Date::Desc if ent.path() == Some("+DESC") => Date::Found(ent.mtime()),
|
||||
x => x,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
186
indexer/src/sys_freebsd1.rs
Normal file
186
indexer/src/sys_freebsd1.rs
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
use std::collections::HashSet;
|
||||
use std::ascii::AsciiExt;
|
||||
use std::io::Result;
|
||||
use regex::Regex;
|
||||
use postgres;
|
||||
|
||||
use open;
|
||||
use pkg;
|
||||
|
||||
|
||||
// Sync a FreeBSD <= 9.2 package respository.
|
||||
//
|
||||
// Reads "." to get a list of categories, "Latest" to get a list of all packages, and all category
|
||||
// directories to figure out which package belongs in which category.
|
||||
//
|
||||
// Splitting a package filename into a package name and version is a hard problem. There are two
|
||||
// strategies:
|
||||
// 1. Use the listing from 'Latest' to get the list of package names, and use that to find the
|
||||
// longest matching substring in the package filename to split off the version.
|
||||
// 2. Guessing, like splitver() below.
|
||||
//
|
||||
// Both strategies lead to errors. (1) doesn't always work because the 'Latest' directory tends to
|
||||
// miss a few packages. (2) doesn't always work because version strings are too damn irregular.
|
||||
// This function tries (1) first, then falls back to (2) if it couldn't find a matching package.
|
||||
// This combined solution also isn't perfect, as sometimes a package prefix does exist, but is
|
||||
// incomplete. E.g. 'pear-PHPUnit-1.3.3.tbz' is parsed as 'pear version PHPUnit-1.3.3' rather than
|
||||
// 'pear-PHPUnit version 1.3.3', because there is a 'pear' package in 'Latest' but no
|
||||
// 'pear-PHPUnit'. This is handled with a static list of package names to add to the 'pkgs' list,
|
||||
// see EXTRA_PKGS below.
|
||||
pub fn sync(pg: &postgres::GenericConnection, sys: i32, arch: &str, mirror: &str) -> Result<()> {
|
||||
let path = format!("{}Latest/", mirror);
|
||||
let mut pkgs : Vec<String> = open::Path{path: &path, cache: true, canbelocal: false}
|
||||
.dirlist()?.into_iter()
|
||||
.map(|(n,_)| trimext(&n).to_string())
|
||||
.collect();
|
||||
|
||||
pkgs.extend(EXTRA_PKGS.into_iter().map(|e| e.to_string()));
|
||||
pkgs.sort_by(|a, b| b.len().cmp(&a.len())); // Longest first
|
||||
|
||||
// List of packages (name+version) we've already seen; Some packages are present in multiple
|
||||
// categories, we only index the first found.
|
||||
let mut seenpkgs = HashSet::new();
|
||||
|
||||
let cats = open::Path{path: mirror, cache: true, canbelocal: false}
|
||||
.dirlist()?.into_iter()
|
||||
.filter(|&(ref n,i)| i && n != "All" && n != "Latest")
|
||||
.map(|(n,_)| n);
|
||||
|
||||
for cat in cats {
|
||||
trace!("Category: {}", cat);
|
||||
let path = format!("{}{}/", mirror, cat);
|
||||
let lst = open::Path{path: &path, cache: true, canbelocal: false}.dirlist()?.into_iter().map(|(n,_)| n);
|
||||
for f in lst {
|
||||
let name = trimext(&f);
|
||||
if !name.is_ascii() {
|
||||
warn!("Non-ASCII package name: {}", f);
|
||||
continue;
|
||||
}
|
||||
|
||||
// The take() mystifies me; why is it necessary?
|
||||
let pkg = pkgs.iter()
|
||||
.find(|p| name.len() > p.len()+1 && name.starts_with(&p as &str) && &name[p.len() .. p.len()+1] == "-")
|
||||
.take().map(|p| (p as &str, &name[p.len()+1 .. ]))
|
||||
.or_else(|| splitver(name));
|
||||
|
||||
if let Some((pkg, ver)) = pkg {
|
||||
if !seenpkgs.insert((pkg.to_string(), ver.to_string())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let path = format!("{}{}/{}", mirror, cat, f);
|
||||
pkg::pkg(pg, pkg::PkgOpt{
|
||||
force: false,
|
||||
sys: sys,
|
||||
cat: &cat,
|
||||
pkg: pkg,
|
||||
ver: ver,
|
||||
date: pkg::Date::Desc,
|
||||
arch: Some(arch),
|
||||
file: open::Path{
|
||||
path: &path,
|
||||
cache: false,
|
||||
canbelocal: false,
|
||||
},
|
||||
});
|
||||
} else {
|
||||
warn!("Unknown package: {}/{}", cat, f);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
fn trimext(n: &str) -> &str {
|
||||
n.trim_right_matches(".tgz").trim_right_matches(".tbz")
|
||||
}
|
||||
|
||||
|
||||
fn splitver(n: &str) -> Option<(&str, &str)> {
|
||||
lazy_static!(
|
||||
static ref RE1: Regex = Regex::new("^(.+?)-([0-9].*)$").unwrap();
|
||||
static ref RE2: Regex = Regex::new("^(.+)-([^-]+)$").unwrap();
|
||||
);
|
||||
if let Some(cap) = RE1.captures(n) {
|
||||
Some((cap.at(1).unwrap(), cap.at(2).unwrap()))
|
||||
} else if let Some(cap) = RE2.captures(n) {
|
||||
Some((cap.at(1).unwrap(), cap.at(2).unwrap()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// This list may not be complete, and these packages may not necessarily have man pages.
|
||||
const EXTRA_PKGS : &'static [&'static str] = &[
|
||||
"amanda-client",
|
||||
"amanda-server",
|
||||
"apache-event",
|
||||
"apache-itk",
|
||||
"apache-peruser",
|
||||
"apache-tomcat",
|
||||
"apache-worker",
|
||||
"bison-devel",
|
||||
"boxbackup-devel",
|
||||
"boxbackup-devel",
|
||||
"ffmpeg-devel",
|
||||
"flex-sdk",
|
||||
"fpc-gdb",
|
||||
"freeradius-mysql",
|
||||
"gdb-insight",
|
||||
"glib-reference",
|
||||
"gmime-24",
|
||||
"gmime-24-sharp",
|
||||
"gtk-reference",
|
||||
"gtk-sharp",
|
||||
"gtkmm-reference",
|
||||
"horde-content",
|
||||
"horde-groupware",
|
||||
"horde-timeobjects",
|
||||
"horde-webmail",
|
||||
"hping-devel",
|
||||
"ja-jvim-direct_canna",
|
||||
"ja-mutt-devel",
|
||||
"kdelibs-experimental",
|
||||
"kdepim-runtime",
|
||||
"lame-devel",
|
||||
"libdivxdecore-devel",
|
||||
"libquicktime-lame",
|
||||
"libtorrent-rasterbar",
|
||||
"linux-netscape-communicator",
|
||||
"mkisofs-devel",
|
||||
"mldonkey-core-devel",
|
||||
"mldonkey-gui-devel",
|
||||
"mod_log_sql-dtc",
|
||||
"nethack-qt",
|
||||
"nfdump-devel",
|
||||
"openssl-beta",
|
||||
"pear-PHPUnit",
|
||||
"pear-XML_Query2XML",
|
||||
"pear-phpunit-PHPUnit",
|
||||
"pgadmin3-unicode",
|
||||
"proftpd-mod_ldap",
|
||||
"proftpd-mod_sql_mysql",
|
||||
"proftpd-mod_sql_odbc",
|
||||
"proftpd-mod_sql_postgres",
|
||||
"proftpd-mod_sql_sqlite",
|
||||
"proftpd-mod_sql_tds",
|
||||
"qt-static",
|
||||
"rsyslog-gnutls",
|
||||
"rsyslog-gssapi",
|
||||
"rsyslog-libdbi",
|
||||
"rsyslog-mysql",
|
||||
"rsyslog-pgsql",
|
||||
"rsyslog-relp",
|
||||
"rsyslog-rfc3195",
|
||||
"rsyslog-snmp",
|
||||
"samba-libsmbclient",
|
||||
"samba-nmblookup",
|
||||
"squirrelmail-shared_calendars-plugin",
|
||||
"tcl-thread",
|
||||
"wxgtk2-common-devel",
|
||||
"wxgtk2-contrib-common-devel",
|
||||
"wxgtk2-utils-devel",
|
||||
];
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue