indexer: Switch to ureq + debloat stuff a bit
And stop using the "url" crate directly, its API is too unstable for it to be worth using. ...that applies to several other crates as well, but meh.
This commit is contained in:
parent
4588e67b64
commit
c48feedc85
5 changed files with 184 additions and 1065 deletions
1190
indexer/Cargo.lock
generated
1190
indexer/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -11,10 +11,10 @@ lazy_static = "1.0.0"
|
|||
libc = "0.2.39"
|
||||
libarchive3-sys = "0.1.2"
|
||||
encoding = { git = "https://github.com/lifthrasiir/rust-encoding", features = ["no-optimized-legacy-encoding"] }
|
||||
ring = "0.14.6"
|
||||
ring = "0.16.20"
|
||||
postgres = "0.17.5"
|
||||
clap = "2.31.2"
|
||||
reqwest = "0.9.17"
|
||||
url = "1.7.0"
|
||||
ureq = "2.3.1"
|
||||
percent-encoding = "2.0"
|
||||
chrono = "0.4.0"
|
||||
quick-xml = "0.14.0"
|
||||
|
|
|
|||
|
|
@ -8,10 +8,10 @@ extern crate libc;
|
|||
extern crate ring;
|
||||
extern crate encoding;
|
||||
extern crate postgres;
|
||||
extern crate reqwest;
|
||||
extern crate url;
|
||||
extern crate ureq;
|
||||
extern crate chrono;
|
||||
extern crate quick_xml;
|
||||
extern crate percent_encoding;
|
||||
|
||||
mod archive;
|
||||
mod archread;
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ pub fn decode(paths: &[&str], ent: &mut Read) -> io::Result<(digest::Digest,&'st
|
|||
return Err(io::Error::new(io::ErrorKind::InvalidData, e));
|
||||
}
|
||||
|
||||
let dig = digest::digest(&digest::SHA1, &data);
|
||||
let dig = digest::digest(&digest::SHA1_FOR_LEGACY_USE_ONLY, &data);
|
||||
|
||||
// Create a list of encodings to try, starting with UTF-8
|
||||
let mut encs : Vec<EncodingRef> = vec![all::UTF_8];
|
||||
|
|
|
|||
|
|
@ -3,9 +3,8 @@ use std::fs::{File,create_dir_all,metadata,read_dir,remove_file};
|
|||
use std::time::{Duration,SystemTime};
|
||||
use regex::bytes::Regex;
|
||||
use ring::digest;
|
||||
use url::Url;
|
||||
use url::percent_encoding::percent_decode;
|
||||
use reqwest;
|
||||
use percent_encoding::percent_decode;
|
||||
use ureq;
|
||||
|
||||
|
||||
const CACHE_PATH: &'static str = "/var/tmp/manned-indexer";
|
||||
|
|
@ -20,29 +19,16 @@ pub struct Path<'a> {
|
|||
}
|
||||
|
||||
|
||||
fn cache_fn(url: &Url) -> String {
|
||||
let name = url.path_segments().unwrap().last().unwrap();
|
||||
let name = if name == "" { "index" } else { name };
|
||||
|
||||
let hash = digest::digest(&digest::SHA1, url.as_str().as_bytes())
|
||||
.as_ref()[0..8].into_iter()
|
||||
.fold(0u64, |a, &e| (a<<8) + e as u64);
|
||||
|
||||
format!("{}/{}-{}-{:x}", CACHE_PATH, url.host_str().unwrap(), name, hash)
|
||||
}
|
||||
|
||||
|
||||
fn fetch(url: &str) -> Result<Box<Read>> {
|
||||
let res = try!(reqwest::Client::new()
|
||||
.get(url)
|
||||
.header("User-Agent", "Man page crawler (info@manned.org; https://manned.org/)")
|
||||
.send()
|
||||
.map_err(|e| Error::new(ErrorKind::Other, format!("Reqwest: {}", e)))
|
||||
let res = try!(ureq::get(url)
|
||||
.set("User-Agent", "Man page crawler (info@manned.org; https://manned.org/)")
|
||||
.call()
|
||||
.map_err(|e| Error::new(ErrorKind::Other, format!("Ureq: {}", e)))
|
||||
);
|
||||
if !res.status().is_success() {
|
||||
if res.status() != 200 {
|
||||
return Err(Error::new(ErrorKind::Other, format!("HTTP: {}", res.status()) ));
|
||||
}
|
||||
Ok(Box::new(res) as Box<Read>)
|
||||
Ok(Box::new(res.into_reader()) as Box<Read>)
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -66,27 +52,26 @@ pub fn clear_cache() -> Result<()> {
|
|||
|
||||
impl<'a> Path<'a> {
|
||||
pub fn open(&self) -> Result<Box<Read>> {
|
||||
if let Ok(url) = Url::parse(self.path) {
|
||||
if url.scheme() != "http" {
|
||||
return Err(Error::new(ErrorKind::Other, "Invalid scheme"));
|
||||
}
|
||||
|
||||
if self.path.starts_with("http://") || self.path.starts_with("https://") {
|
||||
if self.cache {
|
||||
let cfn = cache_fn(&url);
|
||||
let hash = digest::digest(&digest::SHA256, self.path.as_bytes())
|
||||
.as_ref()[0..8].into_iter()
|
||||
.fold(0u64, |a, &e| (a<<8) + e as u64);
|
||||
|
||||
let cfn = format!("{}/{:x}", CACHE_PATH, hash);
|
||||
if let Ok(f) = file(&cfn) {
|
||||
return Ok(f);
|
||||
}
|
||||
{
|
||||
let mut rd = try!(fetch(url.as_str()));
|
||||
let mut rd = try!(fetch(self.path));
|
||||
let mut wr = try!(File::create(&cfn));
|
||||
try!(copy(&mut rd, &mut wr));
|
||||
}
|
||||
file(&cfn)
|
||||
|
||||
} else {
|
||||
fetch(url.as_str())
|
||||
fetch(self.path)
|
||||
}
|
||||
|
||||
} else if self.canbelocal {
|
||||
file(self.path)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue