98 lines
3.2 KiB
PL/PgSQL
98 lines
3.2 KiB
PL/PgSQL
|
|
-- TODO: "system" -> "repository"?
|
|
-- TODO: index of (reverse) man page references?
|
|
-- TODO: Probably want an index on man(name). Or try swapping column order in the unique index.
|
|
-- TODO: Use some consistent naming of tables and columns
|
|
|
|
|
|
CREATE TABLE systems (
|
|
id integer PRIMARY KEY, -- hardcoded ID.
|
|
name varchar NOT NULL,
|
|
release varchar,
|
|
relorder integer NOT NULL DEFAULT 0, -- simple way of ordering different releases for the same system
|
|
short varchar NOT NULL
|
|
);
|
|
|
|
|
|
CREATE TABLE contents (
|
|
hash bytea PRIMARY KEY,
|
|
content varchar NOT NULL
|
|
);
|
|
|
|
|
|
-- Note: If there are multiple arches available for the same package, then
|
|
-- generally only a single one is chosen (not stored here which one).
|
|
-- Also, a package may be listed here even if it has no man pages indexed, in
|
|
-- order for the fetcher to determine whether it has already processed the
|
|
-- package or not. This doesn't mean all packages of a repository are listed
|
|
-- here. For example, the Arch fetcher checks the file list of a package before
|
|
-- considering to handle it.
|
|
CREATE TABLE package (
|
|
id SERIAL PRIMARY KEY,
|
|
system integer NOT NULL REFERENCES systems(id),
|
|
category varchar, -- depends on system (e.g. "community" on Arch, "x11" on Debian)
|
|
name varchar NOT NULL,
|
|
version varchar NOT NULL,
|
|
released date NOT NULL,
|
|
UNIQUE(system, name, version)
|
|
);
|
|
|
|
|
|
CREATE TABLE man (
|
|
package integer NOT NULL REFERENCES package(id),
|
|
name varchar NOT NULL, -- 'fopen', 'du', etc (TODO: An index on name_from_filename(filename) may also work)
|
|
section varchar NOT NULL, -- extracted from filename (TODO: Is this column really necessary?)
|
|
filename varchar NOT NULL, -- full path + file name
|
|
locale varchar, -- parsed from the file name, NULL for the "main" man page (in the C or en_US locale)
|
|
hash bytea NOT NULL REFERENCES contents(hash),
|
|
UNIQUE(package, filename)
|
|
);
|
|
|
|
|
|
CREATE INDEX ON man USING hash (hash);
|
|
|
|
|
|
INSERT INTO systems (id, name, release, short, relorder) VALUES
|
|
(1, 'Arch Linux', NULL, 'arch', 0),
|
|
(2, 'Ubuntu', '4.10', 'ubuntu-warty', 0),
|
|
(3, 'Ubuntu', '5.04', 'ubuntu-hoary', 1),
|
|
(4, 'Ubuntu', '5.10', 'ubuntu-breezy', 2);
|
|
|
|
|
|
-- Removes any path components and compression extensions from the filename.
|
|
CREATE OR REPLACE FUNCTION basename_from_filename(fn text) RETURNS text AS $$
|
|
DECLARE
|
|
ret text;
|
|
tmp text;
|
|
BEGIN
|
|
ret := regexp_replace(fn, '^.+/([^/]+)', E'\\1');
|
|
LOOP
|
|
tmp := regexp_replace(regexp_replace(regexp_replace(ret, E'\\.gz$', ''), E'\\.lzma$', ''), E'\\.bz2$', '');
|
|
EXIT WHEN tmp = ret;
|
|
ret := tmp;
|
|
END LOOP;
|
|
RETURN ret;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION section_from_filename(text) RETURNS text AS $$
|
|
SELECT regexp_replace(basename_from_filename($1), E'^.+\\.([^.]+)$', E'\\1');
|
|
$$ LANGUAGE SQL;
|
|
|
|
|
|
CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$
|
|
SELECT regexp_replace(basename_from_filename($1), E'^(.+)\\.[^.]+$', E'\\1');
|
|
$$ LANGUAGE SQL;
|
|
|
|
|
|
|
|
|
|
-- Some handy admin queries
|
|
|
|
--BEGIN;
|
|
--DELETE FROM man WHERE package IN(SELECT id FROM package WHERE name = '');
|
|
--DELETE FROM package WHERE name = '';
|
|
--DELETE FROM contents c WHERE NOT EXISTS(SELECT 1 FROM man m WHERE m.hash = c.hash);
|
|
--COMMIT;
|
|
|