indexer: Improve Debian Contents file parsing performance by 5.2x
Further improvements can be gained by caching the results of get_contents(), since the same Contents file is often parsed multiple times in a single cron run. But this is already a significant achievement.
This commit is contained in:
parent
de28175cd3
commit
eb15b6e2c7
1 changed files with 19 additions and 15 deletions
|
|
@ -18,24 +18,28 @@ fn get_contents(f: Option<open::Path>) -> Result<HashSet<String>> {
|
||||||
let rd = archive::Archive::open_raw(&mut fd)?;
|
let rd = archive::Archive::open_raw(&mut fd)?;
|
||||||
let brd = BufReader::new(rd);
|
let brd = BufReader::new(rd);
|
||||||
let mut pkgs = HashSet::new();
|
let mut pkgs = HashSet::new();
|
||||||
let mut filecnt = 0;
|
let mut filecnt = -1;
|
||||||
let mut mancnt = 0;
|
let mut mancnt = 0;
|
||||||
|
|
||||||
// Run the regex on bytes instead of strings, as paths aren't always UTF-8. This regex will
|
|
||||||
// not match non-UTF-8 paths.
|
|
||||||
let re = Regex::new(r"^(?u:([^\s].*?))\s+(?u:([^\s]+))\s*$").unwrap();
|
|
||||||
|
|
||||||
for line in brd.split(b'\n') {
|
for line in brd.split(b'\n') {
|
||||||
re.captures(&line?).map(|cap| {
|
let line = line?;
|
||||||
filecnt += 1;
|
let line = match str::from_utf8(&line) { Ok(x) => x, _ => continue };
|
||||||
let path = str::from_utf8(cap.at(1).unwrap()).unwrap();
|
if line.starts_with("FILE ") {
|
||||||
if man::ismanpath(path) {
|
filecnt = 0;
|
||||||
mancnt += 1;
|
continue;
|
||||||
pkgs.extend( str::from_utf8(cap.at(2).unwrap()).unwrap().split(',').map(|e| {
|
} else if filecnt < 0 {
|
||||||
e.split('/').last().unwrap().to_string()
|
continue;
|
||||||
}) );
|
}
|
||||||
}
|
filecnt += 1;
|
||||||
});
|
let mut it = line.split(' ');
|
||||||
|
let pkg = it.next_back().unwrap();
|
||||||
|
let path = it.fold(String::new(), |acc, x| acc + " " + x);
|
||||||
|
if man::ismanpath(&path.trim()) {
|
||||||
|
mancnt += 1;
|
||||||
|
pkgs.extend( pkg.split(',').map(|e| {
|
||||||
|
e.split('/').last().unwrap().to_string()
|
||||||
|
}) );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("Found {}/{} man files in {} relevant packages from {}", mancnt, filecnt, pkgs.len(), f.path);
|
debug!("Found {}/{} man files in {} relevant packages from {}", mancnt, filecnt, pkgs.len(), f.path);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue