121 lines
4.5 KiB
PL/PgSQL
121 lines
4.5 KiB
PL/PgSQL
CREATE TABLE systems (
|
|
-- Manually assigned number. The id is also used for ordering different
|
|
-- releases of the same system, as identified by 'name'.
|
|
id integer PRIMARY KEY,
|
|
name varchar NOT NULL,
|
|
release varchar,
|
|
short varchar NOT NULL
|
|
);
|
|
|
|
|
|
CREATE TABLE contents (
|
|
id SERIAL PRIMARY KEY,
|
|
-- 'hash' is the SHA1 of the man page file after decompression but *before*
|
|
-- encoding conversion and removing 0-bytes. This means taking sha1(content)
|
|
-- may not necessary match the hash, and it's possible for the same content
|
|
-- to be in the database under multiple hashes (but I suspect that's rare).
|
|
hash bytea NOT NULL UNIQUE,
|
|
content text NOT NULL
|
|
);
|
|
|
|
|
|
-- Unique man page, as identified by name & section
|
|
CREATE TABLE mans (
|
|
id SERIAL PRIMARY KEY,
|
|
name text NOT NULL,
|
|
section text NOT NULL,
|
|
UNIQUE(name, section)
|
|
);
|
|
CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
|
|
|
|
|
|
-- List of man page locales for efficient referencing. Some locales include
|
|
-- the encoding in their name, which isn't really correct or even necessary
|
|
-- since we convert everything to UTF-8 anyway, but w/e, Can fix later.
|
|
CREATE TABLE locales (
|
|
id SMALLSERIAL PRIMARY KEY,
|
|
locale text NOT NULL UNIQUE
|
|
);
|
|
|
|
|
|
-- List of encodings for efficient referencing.
|
|
CREATE TABLE encodings (
|
|
id SMALLSERIAL PRIMARY KEY,
|
|
encoding text NOT NULL UNIQUE
|
|
);
|
|
|
|
|
|
CREATE TABLE packages (
|
|
id SERIAL PRIMARY KEY,
|
|
system integer NOT NULL REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE,
|
|
category varchar NOT NULL,
|
|
name varchar NOT NULL,
|
|
-- Whether this package has been seen in the last repository update. This
|
|
-- field is only updated for a few systems that are likely to delete packages
|
|
-- over time; non-rolling-release distros tend to not delete packages after
|
|
-- all.
|
|
-- Packages where the latest version does not have any man pages may also be
|
|
-- marked as dead even if the package is still available in the repos.
|
|
dead boolean NOT NULL DEFAULT FALSE,
|
|
UNIQUE(system, name, category) -- Note the order, lookups on (system,name) are common
|
|
);
|
|
|
|
|
|
CREATE TABLE package_versions (
|
|
id SERIAL PRIMARY KEY,
|
|
package integer NOT NULL REFERENCES packages(id) ON DELETE CASCADE,
|
|
version varchar NOT NULL,
|
|
released date NOT NULL,
|
|
arch varchar,
|
|
UNIQUE(package, version)
|
|
);
|
|
|
|
|
|
CREATE TABLE files (
|
|
pkgver integer NOT NULL REFERENCES package_versions(id) ON DELETE CASCADE,
|
|
man integer NOT NULL REFERENCES mans(id),
|
|
content integer NOT NULL REFERENCES content(id),
|
|
shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
|
|
locale smallint NOT NULL REFERENCES locales(id)
|
|
-- The original encoding the man page was found in. This column isn't really
|
|
-- used at the moment, but is potentially useful when investigating encoding
|
|
-- issues.
|
|
encoding smallint NOT NULL REFERENCES encodings(id),
|
|
filename text NOT NULL,
|
|
PRIMARY KEY(pkgver, filename)
|
|
);
|
|
CREATE INDEX ON files (man, shorthash);
|
|
CREATE INDEX ON files (content);
|
|
|
|
|
|
-- For stats_cache
|
|
\i util/update_indices.sql
|
|
|
|
|
|
|
|
-- Interpret first 4 bytes of hash as a signed 32-bit integer.
|
|
CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
|
|
SELECT CASE WHEN get_byte(hash, 3) < 128
|
|
THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
|
|
ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
|
|
END;
|
|
$$ LANGUAGE SQL IMMUTABLE;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$
|
|
SELECT locale IS NULL OR locale = '' OR locale LIKE 'en%';
|
|
$$ IMMUTABLE LANGUAGE SQL;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION is_standard_man_location(path text) RETURNS bool AS $$
|
|
SELECT path LIKE '/usr/share/man/man%' OR path LIKE '/usr/local/man/man%';
|
|
$$ IMMUTABLE LANGUAGE sql;
|
|
|
|
-- Convenient function to match the first character of a string. Second argument must be lowercase 'a'-'z' or '0'.
|
|
-- Postgres can inline and partially evaluate this function into the query plan, so it's fairly efficient.
|
|
CREATE OR REPLACE FUNCTION match_firstchar(str text, chr text) RETURNS boolean AS $$
|
|
SELECT CASE WHEN chr = '0'
|
|
THEN (ascii(str) < 97 OR ascii(str) > 122) AND (ascii(str) < 65 OR ascii(str) > 90)
|
|
ELSE ascii(str) IN(ascii(chr),ascii(upper(chr)))
|
|
END;
|
|
$$ LANGUAGE SQL IMMUTABLE;
|