manned/indexer/src/man.rs
Yorhel c8bb4da246 Use libarchive3-sys crate directly + improve archread API
This all should offer a more convenient and robust interface to handle
all sorts of archives.
2016-10-29 09:33:39 +02:00

85 lines
3.7 KiB
Rust

use regex::Regex;
// Checks a path for a man page candidate. Returns None if it doesn't seem like a man page
// location, otherwise Some((manPageName, Section, Locale)).
fn parse_path(path: &str) -> Option<(&str, &str, &str)> {
// Roughly: man[/locale]/man1/manpage.section[.compression]+
lazy_static! {
static ref RE: Regex = Regex::new(r"(?x)
man
(?: / ([^/]+) )? # Optional locale
/man[a-z0-9]/ # Subdir
([^/]+?) # Man page name (non-greedy)
\. ([^/\.]+) # Section
(?: \. (?: gz|lzma|bz2|xz ))* $ # Any number of compression extensions
").unwrap();
}
let cap = match RE.captures(path) { Some(x) => x, None => return None };
let locale = cap.at(1).unwrap_or("");
let name = cap.at(2).unwrap();
let section = cap.at(3).unwrap();
// Not everything matching the regex is necessarily a man page, exclude some special cases.
match (name, section, locale) {
// Files that totally aren't man pages
("Makefile", "in", _) |
("Makefile", "am", _) |
(".cvsignore", _, _) |
(_, "gz", _) |
(_, "lzma", _) |
(_, "bz2", _) |
(_, "xz", _) |
(_, "html", _) => None,
// Some weird directories that happen to match the locale
(n, s, "5man") |
(n, s, "c") |
(n, s, "man1") |
(n, s, "man2") |
(n, s, "man3") |
(n, s, "man4") |
(n, s, "man5") |
(n, s, "man6") |
(n, s, "man7") |
(n, s, "man8") |
(n, s, "Man-Part1") |
(n, s, "Man-Part2") => Some((n, s, "")),
// Nothing special!
x => Some(x)
}
}
#[test]
fn test_parse_path() {
// Generic tests
assert_eq!(parse_path("/"), None);
assert_eq!(parse_path("/man1/ncdu.1"), None);
assert_eq!(parse_path("/man/man?/ncdu.1"), None);
assert_eq!(parse_path("/man/man1/ncdu.1"), Some(("ncdu", "1", "")));
assert_eq!(parse_path("/man/man1/ncdu.1.gz.lzma.xz.bz2.gz"), Some(("ncdu", "1", ""))); // This stuff happens
assert_eq!(parse_path("/man/en_US.UTF-8/man1/ncdu.1"), Some(("ncdu", "1", "en_US.UTF-8")));
// Special cases
assert_eq!(parse_path("/usr/share/man/man1/INDEX"), None);
assert_eq!(parse_path("/usr/share/man/man1/Makefile"), None);
assert_eq!(parse_path("/usr/share/man/man1/Makefile.am"), None);
assert_eq!(parse_path("/usr/share/man/man1/Makefile.in"), None);
assert_eq!(parse_path("/usr/share/man/man1/.cvsignore"), None);
assert_eq!(parse_path("/usr/share/man/man1/.cvsignore.gz"), None);
// Some actual locations
assert_eq!(parse_path("/usr/local/man/man1/list_audio_tracks.1.gz"), Some(("list_audio_tracks", "1", "")));
assert_eq!(parse_path("/usr/local/lib/perl5/site_perl/man/man3/DBIx::Class::Helper::ResultSet::DateMethods1::Announcement.3.gz"), Some(("DBIx::Class::Helper::ResultSet::DateMethods1::Announcement", "3", "")));
assert_eq!(parse_path("/usr/man/man3/exit.3tk"), Some(("exit", "3tk", "")));
assert_eq!(parse_path("/usr/local/brlcad/share/man/mann/exit.nged.gz"), Some(("exit", "nged", "")));
assert_eq!(parse_path("/usr/X11R6/man/man3/intro.3xglut.gz"), Some(("intro", "3xglut", "")));
assert_eq!(parse_path("/usr/local/share/man/ko_KR.eucKR/man3/intro.3.gz"), Some(("intro", "3", "ko_KR.eucKR")));
assert_eq!(parse_path("/usr/lib/scilab/man/Man-Part1/man1/ans.1"), Some(("ans", "1", "")));
assert_eq!(parse_path("/heirloom/usr/share/man/5man/man1/chgrp.1.gz"), Some(("chgrp", "1", "")));
assert_eq!(parse_path("/usr/local/plan9/man/man8/index.html"), None);
assert_eq!(parse_path("/usr/local/share/doc/gmt/html/man/grdpaste.html"), None);
}