Indexer: Improve logging + cache management

This commit is contained in:
Yorhel 2016-11-20 07:31:55 +01:00
parent 4bdd91f65e
commit a1e5a2d80d
3 changed files with 29 additions and 14 deletions

View file

@ -72,6 +72,11 @@ fn main() {
.filter(Some("postgres"), if verbose >= 4 { log::LogLevelFilter::Trace } else { log::LogLevelFilter::Info }) .filter(Some("postgres"), if verbose >= 4 { log::LogLevelFilter::Trace } else { log::LogLevelFilter::Info })
.init().unwrap(); .init().unwrap();
if let Err(e) = open::clear_cache() {
error!("Error clearing cache: {}", e);
return;
}
let dbhost = match std::env::var("MANNED_PG") { let dbhost = match std::env::var("MANNED_PG") {
Ok(x) => x, Ok(x) => x,
Err(_) => { error!("MANNED_PG not set."); return } Err(_) => { error!("MANNED_PG not set."); return }

View file

@ -1,5 +1,5 @@
use std::io::{Read,Result,Error,ErrorKind,copy}; use std::io::{Read,Result,Error,ErrorKind,copy};
use std::fs::{File,create_dir_all,metadata}; use std::fs::{File,create_dir_all,metadata,read_dir,remove_file};
use std::hash::{Hash,Hasher,SipHasher}; use std::hash::{Hash,Hasher,SipHasher};
use std::time::{Duration,SystemTime}; use std::time::{Duration,SystemTime};
use url::Url; use url::Url;
@ -7,7 +7,7 @@ use hyper;
const CACHE_PATH: &'static str = "/var/tmp/manned-indexer"; const CACHE_PATH: &'static str = "/var/tmp/manned-indexer";
const CACHE_TIME: u64 = 23*3600; const CACHE_TIME: u64 = 20*3600;
#[derive(Clone,Copy)] #[derive(Clone,Copy)]
@ -47,6 +47,19 @@ fn file(path: &str) -> Result<Box<Read>> {
} }
pub fn clear_cache() -> Result<()> {
create_dir_all(CACHE_PATH)?;
for f in read_dir(CACHE_PATH)? {
let f = f?.path();
let m = metadata(&f)?;
if m.modified().unwrap() < SystemTime::now() - Duration::from_secs(CACHE_TIME) {
remove_file(&f)?;
}
}
Ok(())
}
impl<'a> Path<'a> { impl<'a> Path<'a> {
pub fn open(&self) -> Result<Box<Read>> { pub fn open(&self) -> Result<Box<Read>> {
if let Ok(url) = Url::parse(self.path) { if let Ok(url) = Url::parse(self.path) {
@ -56,12 +69,9 @@ impl<'a> Path<'a> {
if self.cache { if self.cache {
let cfn = cache_fn(&url); let cfn = cache_fn(&url);
if let Ok(m) = metadata(&cfn) { if let Ok(f) = file(&cfn) {
if m.modified().unwrap() > SystemTime::now() - Duration::from_secs(CACHE_TIME) { return Ok(f);
return file(&cfn);
}
} }
try!(create_dir_all(CACHE_PATH));
{ {
let mut rd = try!(fetch(url.as_str())); let mut rd = try!(fetch(url.as_str()));
let mut wr = try!(File::create(&cfn)); let mut wr = try!(File::create(&cfn));

View file

@ -20,6 +20,8 @@ pub struct PkgOpt<'a> {
fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i32> { fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i32> {
let pkginfo = format!("sys {} / {} / {} - {} @ {} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path);
// The ON CONFLICT .. DO UPDATE is used instead of DO NOTHING because in that case the // The ON CONFLICT .. DO UPDATE is used instead of DO NOTHING because in that case the
// RETURNING clause wouldn't give us a package id. // RETURNING clause wouldn't give us a package id.
let q = "INSERT INTO packages (system, category, name) VALUES($1, $2, $3) let q = "INSERT INTO packages (system, category, name) VALUES($1, $2, $3)
@ -39,17 +41,17 @@ fn insert_pkg(tr: &postgres::transaction::Transaction, opt: &PkgOpt) -> Option<i
if res.is_empty() { if res.is_empty() {
let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id"; let q = "INSERT INTO package_versions (package, version, released, arch) VALUES($1, $2, $3::text::date, $4) RETURNING id";
verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date, &opt.arch]).unwrap().get(0).get(0); verid = tr.query(q, &[&pkgid, &opt.ver, &opt.date, &opt.arch]).unwrap().get(0).get(0);
info!("New package pkgid {} verid {}", pkgid, verid); info!("New package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
Some(verid) Some(verid)
} else if opt.force { } else if opt.force {
verid = res.get(0).get(0); verid = res.get(0).get(0);
info!("Overwriting package pkgid {} verid {}", pkgid, verid); info!("Overwriting package pkgid {} verid {}, {}", pkgid, verid, pkginfo);
tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap(); tr.query("DELETE FROM man WHERE package = $1", &[&verid]).unwrap();
Some(verid) Some(verid)
} else { } else {
info!("Package already in database, pkgid {} verid {}", pkgid, res.get(0).get::<usize,i32>(0)); debug!("Package already in database, pkgid {} verid {}, {}", pkgid, res.get(0).get::<usize,i32>(0), pkginfo);
None None
} }
} }
@ -86,7 +88,7 @@ fn insert_man(tr: &postgres::GenericConnection, verid: i32, paths: &[&str], ent:
for path in paths { for path in paths {
insert_man_row(tr, verid, path, enc, dig.as_ref()); insert_man_row(tr, verid, path, enc, dig.as_ref());
debug!("Inserted man page: {} ({})", path, enc); info!("Inserted man page: {} ({})", path, enc);
} }
} }
@ -100,7 +102,7 @@ fn insert_link(tr: &postgres::GenericConnection, verid: i32, src: &str, dest: &s
let hash: Vec<u8> = res.get(0).get(0); let hash: Vec<u8> = res.get(0).get(0);
let enc: String = res.get(0).get(1); let enc: String = res.get(0).get(1);
insert_man_row(tr, verid, src, &enc, &hash); insert_man_row(tr, verid, src, &enc, &hash);
debug!("Inserted man link: {} -> {}", src, dest); info!("Inserted man link: {} -> {}", src, dest);
} }
@ -149,8 +151,6 @@ fn index_pkg(tr: &postgres::GenericConnection, opt: &PkgOpt, verid: i32) -> std:
pub fn pkg(conn: &postgres::GenericConnection, opt: PkgOpt) { pub fn pkg(conn: &postgres::GenericConnection, opt: PkgOpt) {
info!("Handling pkg: {} / {} / {} - {} @ {} @ {}", opt.sys, opt.cat, opt.pkg, opt.ver, opt.date, opt.file.path);
let tr = conn.transaction().unwrap(); let tr = conn.transaction().unwrap();
tr.set_rollback(); tr.set_rollback();