From d19c56f28520bf7a066c094b2a1a59f0d0632c65 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 16 Dec 2021 13:44:39 +0100 Subject: [PATCH] Correctly handle a few more mis-identified locales --- indexer/src/man.rs | 13 ++++++++++--- sql/update-2021-12-16.sql | 8 ++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 sql/update-2021-12-16.sql diff --git a/indexer/src/man.rs b/indexer/src/man.rs index 0dcb095..7f9ca1f 100644 --- a/indexer/src/man.rs +++ b/indexer/src/man.rs @@ -32,10 +32,15 @@ pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> { } let cap = match RE.captures(path) { Some(x) => x, None => return None }; - let locale = cap.get(1).map(|e| e.as_str()).unwrap_or(""); + let mut locale = cap.get(1).map(|e| e.as_str()).unwrap_or(""); let name = cap.get(2).unwrap().as_str(); let section = cap.get(3).unwrap().as_str(); + // Some weird directories that happen to match the locale + if locale.contains("openmpi") || locale.contains("mpich") || locale.contains("mvapich") { + locale = ""; + } + // Not everything matching the regex is necessarily a man page, exclude some special cases. match (name, section, locale) { // Files that totally aren't man pages @@ -47,9 +52,10 @@ pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> { (_, "bz2", _) | (_, "xz", _) | (_, "html", _) => None, - // Some weird directories that happen to match the locale + // Some more weird directories that happen to match the locale (n, s, "5man") | (n, s, "c") | + (n, s, "man") | (n, s, "man1") | (n, s, "man2") | (n, s, "man3") | @@ -59,7 +65,8 @@ pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> { (n, s, "man7") | (n, s, "man8") | (n, s, "Man-Part1") | - (n, s, "Man-Part2") => Some((n, s, "")), + (n, s, "Man-Part2") | + (n, s, "overrides") => Some((n, s, "")), // Nothing special! x => Some(x) } diff --git a/sql/update-2021-12-16.sql b/sql/update-2021-12-16.sql new file mode 100644 index 0000000..9a93131 --- /dev/null +++ b/sql/update-2021-12-16.sql @@ -0,0 +1,8 @@ +-- 'overrides' is not a valid locale. Rather, that was a directory that CentOS +-- used for updated man pages without overwriting the pages already installed +-- on the system. +-- 'man' is just a badly nested directory. +-- The others are openmpi using non-standard directories for some reason. +WITH b(id) AS (SELECT id FROM locales WHERE locale IN('man', 'overrides') OR locale ~ '(openmpi|mpich|mvapich)'), + f AS (UPDATE files SET locale = 0 WHERE locale IN(SELECT id FROM b)) +DELETE FROM locales WHERE id IN(SELECT id FROM b);