CREATE TABLE systems ( -- Manually assigned number. The id is also used for ordering different -- releases of the same system, as identified by 'name'. id integer PRIMARY KEY, name varchar NOT NULL, release varchar, short varchar NOT NULL ); CREATE TABLE contents ( -- 'hash' is the SHA1 of the man page file after decompression but *before* -- encoding conversion and removing 0-bytes. This means taking sha1(content) -- may not necessary match the hash, and it's possible for the same content -- to be in the database under multiple hashes (but I suspect that's rare). hash bytea PRIMARY KEY, content varchar NOT NULL ); CREATE TABLE packages ( id SERIAL PRIMARY KEY, system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE, category varchar NOT NULL, name varchar NOT NULL, -- Whether this package has been seen in the last repository update. This -- field is only updated for a few systems that are likely to delete packages -- over time; non-rolling-release distros tend to not delete packages after -- all. -- Packages where the latest version does not have any man pages may also be -- marked as dead even if the package is still available in the repos. dead boolean NOT NULL DEFAULT FALSE, UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common ); CREATE TABLE package_versions ( id SERIAL PRIMARY KEY, package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE, version varchar NOT NULL, released date NOT NULL, arch varchar, UNIQUE(package, version) ); CREATE TABLE man ( package integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE, name varchar NOT NULL, filename varchar NOT NULL, locale varchar, hash bytea NOT NULL REFERENCES contents(hash), section varchar NOT NULL, encoding varchar, UNIQUE(package, filename) ); CREATE INDEX ON man (hash); CREATE INDEX ON man (name); CREATE TABLE man_index AS SELECT DISTINCT name, section FROM man; CREATE INDEX ON man_index USING btree(lower(name) text_pattern_ops); CREATE TABLE stats_cache AS SELECT count(distinct hash) AS hashes, count(distinct name) AS mans, count(*) AS files, count(distinct package) AS packages FROM man; -- Removes any path components and compression extensions from the filename. CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$ DECLARE ret text; tmp text; BEGIN ret := regexp_replace(fn, '^.+/([^/]+)', E'\\1'); LOOP tmp := regexp_replace(regexp_replace(regexp_replace(ret, E'\\.gz$', ''), E'\\.lzma$', ''), E'\\.bz2$', ''); EXIT WHEN tmp = ret; ret := tmp; END LOOP; RETURN ret; END; $$ LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$ SELECT regexp_replace(basename_from_filename($1), E'^.+\\.([^.]+)$', E'\\1'); $$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$ SELECT regexp_replace(basename_from_filename($1), E'^(.+)\\.[^.]+$', E'\\1'); $$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$ SELECT locale IS NULL OR locale LIKE 'en%'; $$ IMMUTABLE LANGUAGE SQL; CREATE OR REPLACE FUNCTION is_standard_man_location(path text) RETURNS bool AS $$ SELECT path LIKE '/usr/share/man/man%' OR path LIKE '/usr/local/man/man%'; $$ IMMUTABLE LANGUAGE sql; -- Convenient function to match the first character of a string. Second argument must be lowercase 'a'-'z' or '0'. -- Postgres can inline and partially evaluate this function into the query plan, so it's fairly efficient. CREATE OR REPLACE FUNCTION match_firstchar(str text, chr text) RETURNS boolean AS $$ SELECT CASE WHEN chr = '0' THEN (ascii(str) < 97 OR ascii(str) > 122) AND (ascii(str) < 65 OR ascii(str) > 90) ELSE ascii(str) IN(ascii(chr),ascii(upper(chr))) END; $$ LANGUAGE SQL IMMUTABLE;