From 7648603685c82fb07ae27cfb88d227675e006cba Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 13 Dec 2021 18:16:14 +0100 Subject: [PATCH] Recognize .zst-compressed man pages + fix SQL basename_from_filename() to recognize .xz Also greatly simplified basename_from_filename() because apparently I couldn't write regexes back then. (And the removed REFERENCES line is to sync schema.sql with the actual state of the DB, which doesn't have that constraint for some reason. I'll prolly fix that later) --- indexer/src/man.rs | 2 +- sql/schema.sql | 21 +++++---------------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/indexer/src/man.rs b/indexer/src/man.rs index e66166a..0dcb095 100644 --- a/indexer/src/man.rs +++ b/indexer/src/man.rs @@ -27,7 +27,7 @@ pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> { /man[a-z0-9]/ # Subdir ([^/]+?) # Man page name (non-greedy) \. ([^/\.]+) # Section - (?: \. (?: gz|lzma|bz2|xz ))* $ # Any number of compression extensions + (?: \. (?: gz|lzma|bz2|xz|zst ))* $ # Any number of compression extensions ").unwrap(); } diff --git a/sql/schema.sql b/sql/schema.sql index 2ab0ca9..a5b43c2 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -45,7 +45,7 @@ CREATE TABLE man ( name varchar NOT NULL, filename varchar NOT NULL, locale varchar, - hash bytea NOT NULL REFERENCES contents(hash), + hash bytea NOT NULL, section varchar NOT NULL, encoding varchar, UNIQUE(package, filename) @@ -65,28 +65,17 @@ CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinc -- Removes any path components and compression extensions from the filename. CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$ -DECLARE - ret text; - tmp text; -BEGIN - ret := regexp_replace(fn, '^.+/([^/]+)', E'\\1'); - LOOP - tmp := regexp_replace(regexp_replace(regexp_replace(ret, E'\\.gz$', ''), E'\\.lzma$', ''), E'\\.bz2$', ''); - EXIT WHEN tmp = ret; - ret := tmp; - END LOOP; - RETURN ret; -END; -$$ LANGUAGE plpgsql; + SELECT regexp_replace(fn, '^.+/([^/][^/]*?)(?:\.gz|\.lzma|\.xz|\.bz2|\.zst)*$', '\1'); +$$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$ - SELECT regexp_replace(basename_from_filename($1), E'^.+\\.([^.]+)$', E'\\1'); + SELECT regexp_replace(basename_from_filename($1), '^.+\.([^.]+)$', '\1'); $$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$ - SELECT regexp_replace(basename_from_filename($1), E'^(.+)\\.[^.]+$', E'\\1'); + SELECT regexp_replace(basename_from_filename($1), '^(.+)\.[^.]+$', '\1'); $$ LANGUAGE SQL;