diff --git a/indexer/src/man.rs b/indexer/src/man.rs index e66166a..0dcb095 100644 --- a/indexer/src/man.rs +++ b/indexer/src/man.rs @@ -27,7 +27,7 @@ pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> { /man[a-z0-9]/ # Subdir ([^/]+?) # Man page name (non-greedy) \. ([^/\.]+) # Section - (?: \. (?: gz|lzma|bz2|xz ))* $ # Any number of compression extensions + (?: \. (?: gz|lzma|bz2|xz|zst ))* $ # Any number of compression extensions ").unwrap(); } diff --git a/sql/schema.sql b/sql/schema.sql index 2ab0ca9..a5b43c2 100644 --- a/sql/schema.sql +++ b/sql/schema.sql @@ -45,7 +45,7 @@ CREATE TABLE man ( name varchar NOT NULL, filename varchar NOT NULL, locale varchar, - hash bytea NOT NULL REFERENCES contents(hash), + hash bytea NOT NULL, section varchar NOT NULL, encoding varchar, UNIQUE(package, filename) @@ -65,28 +65,17 @@ CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinc -- Removes any path components and compression extensions from the filename. CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$ -DECLARE - ret text; - tmp text; -BEGIN - ret := regexp_replace(fn, '^.+/([^/]+)', E'\\1'); - LOOP - tmp := regexp_replace(regexp_replace(regexp_replace(ret, E'\\.gz$', ''), E'\\.lzma$', ''), E'\\.bz2$', ''); - EXIT WHEN tmp = ret; - ret := tmp; - END LOOP; - RETURN ret; -END; -$$ LANGUAGE plpgsql; + SELECT regexp_replace(fn, '^.+/([^/][^/]*?)(?:\.gz|\.lzma|\.xz|\.bz2|\.zst)*$', '\1'); +$$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$ - SELECT regexp_replace(basename_from_filename($1), E'^.+\\.([^.]+)$', E'\\1'); + SELECT regexp_replace(basename_from_filename($1), '^.+\.([^.]+)$', '\1'); $$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$ - SELECT regexp_replace(basename_from_filename($1), E'^(.+)\\.[^.]+$', E'\\1'); + SELECT regexp_replace(basename_from_filename($1), '^(.+)\.[^.]+$', '\1'); $$ LANGUAGE SQL;