manned/sql/schema.sql
Yorhel b27d55215a Arch: Mark deleted packages as dead and hide them from listings
We've got a lot of packages in the DB that have long been removed from
the Arch repos. These are still indexed, but won't clutter the package
listing anymore.

Also fixed an issue with packages.id numbers getting rather large
because the indexer allocates a new ID for every package on every
update.
2021-12-13 08:18:17 +01:00

109 lines
3.9 KiB
PL/PgSQL

CREATE TABLE systems (
-- Manually assigned number. The id is also used for ordering different
-- releases of the same system, as identified by 'name'.
id integer PRIMARY KEY,
name varchar NOT NULL,
release varchar,
short varchar NOT NULL
);
CREATE TABLE contents (
-- 'hash' is the SHA1 of the man page file after decompression but *before*
-- encoding conversion and removing 0-bytes. This means taking sha1(content)
-- may not necessary match the hash, and it's possible for the same content
-- to be in the database under multiple hashes (but I suspect that's rare).
hash bytea PRIMARY KEY,
content varchar NOT NULL
);
CREATE TABLE packages (
id SERIAL PRIMARY KEY,
system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE,
category varchar NOT NULL,
name varchar NOT NULL,
-- Whether this package has been seen in the last repository update. This
-- field is only updated for a few systems that are likely to delete packages
-- over time; non-rolling-release distros tend to not delete packages after
-- all.
-- Packages where the latest version does not have any man pages may also be
-- marked as dead even if the package is still available in the repos.
dead boolean NOT NULL DEFAULT FALSE,
UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common
);
CREATE TABLE package_versions (
id SERIAL PRIMARY KEY,
package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
version varchar NOT NULL,
released date NOT NULL,
arch varchar,
UNIQUE(package, version)
);
CREATE TABLE man (
package integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE,
name varchar NOT NULL,
filename varchar NOT NULL,
locale varchar,
hash bytea NOT NULL REFERENCES contents(hash),
section varchar NOT NULL,
encoding varchar,
UNIQUE(package, filename)
);
CREATE INDEX ON man (hash);
CREATE INDEX ON man (name);
CREATE TABLE man_index AS SELECT DISTINCT name, section FROM man;
CREATE INDEX ON man_index USING btree(lower(name) text_pattern_ops);
CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man;
-- Removes any path components and compression extensions from the filename.
CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$
DECLARE
ret text;
tmp text;
BEGIN
ret := regexp_replace(fn, '^.+/([^/]+)', E'\\1');
LOOP
tmp := regexp_replace(regexp_replace(regexp_replace(ret, E'\\.gz$', ''), E'\\.lzma$', ''), E'\\.bz2$', '');
EXIT WHEN tmp = ret;
ret := tmp;
END LOOP;
RETURN ret;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$
SELECT regexp_replace(basename_from_filename($1), E'^.+\\.([^.]+)$', E'\\1');
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$
SELECT regexp_replace(basename_from_filename($1), E'^(.+)\\.[^.]+$', E'\\1');
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$
SELECT locale IS NULL OR locale LIKE 'en%';
$$ IMMUTABLE LANGUAGE SQL;
CREATE OR REPLACE FUNCTION is_standard_man_location(path text) RETURNS bool AS $$
SELECT path LIKE '/usr/share/man/man%' OR path LIKE '/usr/local/man/man%';
$$ IMMUTABLE LANGUAGE sql;
-- Convenient function to match the first character of a string. Second argument must be lowercase 'a'-'z' or '0'.
-- Postgres can inline and partially evaluate this function into the query plan, so it's fairly efficient.
CREATE OR REPLACE FUNCTION match_firstchar(str text, chr text) RETURNS boolean AS $$
SELECT CASE WHEN chr = '0'
THEN (ascii(str) < 97 OR ascii(str) > 122) AND (ascii(str) < 65 OR ascii(str) > 90)
ELSE ascii(str) IN(ascii(chr),ascii(upper(chr)))
END;
$$ LANGUAGE SQL IMMUTABLE;