We've got a lot of packages in the DB that have long been removed from the Arch repos. These are still indexed, but won't clutter the package listing anymore. Also fixed an issue with packages.id numbers getting rather large because the indexer allocates a new ID for every package on every update.
109 lines
3.9 KiB
PL/PgSQL
109 lines
3.9 KiB
PL/PgSQL
CREATE TABLE systems (
|
|
-- Manually assigned number. The id is also used for ordering different
|
|
-- releases of the same system, as identified by 'name'.
|
|
id integer PRIMARY KEY,
|
|
name varchar NOT NULL,
|
|
release varchar,
|
|
short varchar NOT NULL
|
|
);
|
|
|
|
CREATE TABLE contents (
|
|
-- 'hash' is the SHA1 of the man page file after decompression but *before*
|
|
-- encoding conversion and removing 0-bytes. This means taking sha1(content)
|
|
-- may not necessary match the hash, and it's possible for the same content
|
|
-- to be in the database under multiple hashes (but I suspect that's rare).
|
|
hash bytea PRIMARY KEY,
|
|
content varchar NOT NULL
|
|
);
|
|
|
|
CREATE TABLE packages (
|
|
id SERIAL PRIMARY KEY,
|
|
system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE,
|
|
category varchar NOT NULL,
|
|
name varchar NOT NULL,
|
|
-- Whether this package has been seen in the last repository update. This
|
|
-- field is only updated for a few systems that are likely to delete packages
|
|
-- over time; non-rolling-release distros tend to not delete packages after
|
|
-- all.
|
|
-- Packages where the latest version does not have any man pages may also be
|
|
-- marked as dead even if the package is still available in the repos.
|
|
dead boolean NOT NULL DEFAULT FALSE,
|
|
UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common
|
|
);
|
|
|
|
CREATE TABLE package_versions (
|
|
id SERIAL PRIMARY KEY,
|
|
package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
|
|
version varchar NOT NULL,
|
|
released date NOT NULL,
|
|
arch varchar,
|
|
UNIQUE(package, version)
|
|
);
|
|
|
|
CREATE TABLE man (
|
|
package integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE,
|
|
name varchar NOT NULL,
|
|
filename varchar NOT NULL,
|
|
locale varchar,
|
|
hash bytea NOT NULL REFERENCES contents(hash),
|
|
section varchar NOT NULL,
|
|
encoding varchar,
|
|
UNIQUE(package, filename)
|
|
);
|
|
|
|
CREATE INDEX ON man (hash);
|
|
CREATE INDEX ON man (name);
|
|
|
|
|
|
|
|
CREATE TABLE man_index AS SELECT DISTINCT name, section FROM man;
|
|
CREATE INDEX ON man_index USING btree(lower(name) text_pattern_ops);
|
|
|
|
CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man;
|
|
|
|
|
|
|
|
-- Removes any path components and compression extensions from the filename.
|
|
CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$
|
|
DECLARE
|
|
ret text;
|
|
tmp text;
|
|
BEGIN
|
|
ret := regexp_replace(fn, '^.+/([^/]+)', E'\\1');
|
|
LOOP
|
|
tmp := regexp_replace(regexp_replace(regexp_replace(ret, E'\\.gz$', ''), E'\\.lzma$', ''), E'\\.bz2$', '');
|
|
EXIT WHEN tmp = ret;
|
|
ret := tmp;
|
|
END LOOP;
|
|
RETURN ret;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$
|
|
SELECT regexp_replace(basename_from_filename($1), E'^.+\\.([^.]+)$', E'\\1');
|
|
$$ LANGUAGE SQL;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$
|
|
SELECT regexp_replace(basename_from_filename($1), E'^(.+)\\.[^.]+$', E'\\1');
|
|
$$ LANGUAGE SQL;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$
|
|
SELECT locale IS NULL OR locale LIKE 'en%';
|
|
$$ IMMUTABLE LANGUAGE SQL;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION is_standard_man_location(path text) RETURNS bool AS $$
|
|
SELECT path LIKE '/usr/share/man/man%' OR path LIKE '/usr/local/man/man%';
|
|
$$ IMMUTABLE LANGUAGE sql;
|
|
|
|
-- Convenient function to match the first character of a string. Second argument must be lowercase 'a'-'z' or '0'.
|
|
-- Postgres can inline and partially evaluate this function into the query plan, so it's fairly efficient.
|
|
CREATE OR REPLACE FUNCTION match_firstchar(str text, chr text) RETURNS boolean AS $$
|
|
SELECT CASE WHEN chr = '0'
|
|
THEN (ascii(str) < 97 OR ascii(str) > 122) AND (ascii(str) < 65 OR ascii(str) > 90)
|
|
ELSE ascii(str) IN(ascii(chr),ascii(upper(chr)))
|
|
END;
|
|
$$ LANGUAGE SQL IMMUTABLE;
|