Recognize .zst-compressed man pages + fix SQL basename_from_filename() to recognize .xz

Also greatly simplified basename_from_filename() because apparently I
couldn't write regexes back then.

(And the removed REFERENCES line is to sync schema.sql with the actual
state of the DB, which doesn't have that constraint for some reason.
I'll prolly fix that later)
This commit is contained in:
Yorhel 2021-12-13 18:16:14 +01:00
parent d8b60a251a
commit 7648603685
2 changed files with 6 additions and 17 deletions

View file

@ -27,7 +27,7 @@ pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> {
/man[a-z0-9]/ # Subdir
([^/]+?) # Man page name (non-greedy)
\. ([^/\.]+) # Section
(?: \. (?: gz|lzma|bz2|xz ))* $ # Any number of compression extensions
(?: \. (?: gz|lzma|bz2|xz|zst ))* $ # Any number of compression extensions
").unwrap();
}

View file

@ -45,7 +45,7 @@ CREATE TABLE man (
name varchar NOT NULL,
filename varchar NOT NULL,
locale varchar,
hash bytea NOT NULL REFERENCES contents(hash),
hash bytea NOT NULL,
section varchar NOT NULL,
encoding varchar,
UNIQUE(package, filename)
@ -65,28 +65,17 @@ CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinc
-- Removes any path components and compression extensions from the filename.
CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$
DECLARE
ret text;
tmp text;
BEGIN
ret := regexp_replace(fn, '^.+/([^/]+)', E'\\1');
LOOP
tmp := regexp_replace(regexp_replace(regexp_replace(ret, E'\\.gz$', ''), E'\\.lzma$', ''), E'\\.bz2$', '');
EXIT WHEN tmp = ret;
ret := tmp;
END LOOP;
RETURN ret;
END;
$$ LANGUAGE plpgsql;
SELECT regexp_replace(fn, '^.+/([^/][^/]*?)(?:\.gz|\.lzma|\.xz|\.bz2|\.zst)*$', '\1');
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$
SELECT regexp_replace(basename_from_filename($1), E'^.+\\.([^.]+)$', E'\\1');
SELECT regexp_replace(basename_from_filename($1), '^.+\.([^.]+)$', '\1');
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$
SELECT regexp_replace(basename_from_filename($1), E'^(.+)\\.[^.]+$', E'\\1');
SELECT regexp_replace(basename_from_filename($1), '^(.+)\.[^.]+$', '\1');
$$ LANGUAGE SQL;