Cleanup SQL files

This commit is contained in:
Yorhel 2024-04-14 11:35:39 +02:00
parent c9eef9e10a
commit f92639d22c
5 changed files with 0 additions and 161 deletions

View file

@ -1,44 +0,0 @@
CREATE OR REPLACE FUNCTION match_firstchar(str text, chr text) RETURNS boolean AS $$
SELECT CASE WHEN chr = '0'
THEN (ascii(str) < 97 OR ascii(str) > 122) AND (ascii(str) < 65 OR ascii(str) > 90)
ELSE ascii(str) IN(ascii(chr),ascii(upper(chr)))
END;
$$ LANGUAGE SQL IMMUTABLE;
ALTER TABLE packages DROP CONSTRAINT packages_system_fkey;
ALTER TABLE packages ADD CONSTRAINT packages_system_fkey FOREIGN KEY (system) REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE;
-- Remapping system IDs so we can sort on that rather than 'relorder'
-- (Yes, this means adding a new system somewhere in-between requires changing
-- system IDs again, but that's fine, these IDs are only used internally)
BEGIN;
WITH map(old,new) AS (
-- FreeBSD
SELECT 80, 84
UNION SELECT 84, 80
UNION SELECT 132, 99
UNION SELECT 99, 129
UNION SELECT 129, 132
UNION SELECT 188, 198
UNION SELECT 192, 188
UNION SELECT 198, 203
UNION SELECT 203, 192
-- CentOS
UNION SELECT 175, 176
UNION SELECT 176, 177
UNION SELECT 177, 178
UNION SELECT 178, 179
UNION SELECT 179, 182
UNION SELECT 182, 183
UNION SELECT 183, 175
UNION SELECT 195, 200
UNION SELECT 196, 201
UNION SELECT 200, 195
UNION SELECT 201, 206
UNION SELECT 206, 196
) UPDATE systems SET id = new+10000 FROM map WHERE id = old;
UPDATE systems SET id = id-10000 WHERE id >= 10000;
COMMIT;
ALTER TABLE systems DROP COLUMN relorder;

View file

@ -1,3 +0,0 @@
ALTER TABLE packages
ALTER COLUMN category SET NOT NULL,
ADD COLUMN dead boolean NOT NULL DEFAULT FALSE;

View file

@ -1,106 +0,0 @@
-- WARNING: This script does a complete rewrite of the two largest tables.
-- This requires ~25G of temporary space and takes anywhere between 10 and 20 minutes.
-- The site needs to be taken down during that time.
DROP FUNCTION section_from_filename(text);
DROP FUNCTION name_from_filename(text);
DROP FUNCTION basename_from_filename(text);
-- Interpret first 4 bytes of hash as a signed 32-bit integer.
CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
SELECT CASE WHEN get_byte(hash, 3) < 128
THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
END;
$$ LANGUAGE SQL IMMUTABLE;
ALTER TABLE contents DROP CONSTRAINT contents_pkey;
ALTER TABLE contents RENAME TO contents_old;
CREATE SEQUENCE contents_id_seq AS integer;
CREATE TABLE contents (
id integer NOT NULL DEFAULT nextval('contents_id_seq'::regclass),
hash bytea NOT NULL,
content text NOT NULL
);
ALTER SEQUENCE contents_id_seq OWNED BY contents.id;
-- 4m15s; start 29.3G end 53.3G; +24G
INSERT INTO contents (hash, content) SELECT hash, content FROM contents_old;
DROP TABLE contents_old; -- back to 29.3G
ALTER TABLE contents ADD PRIMARY KEY (id);
ALTER TABLE contents ADD UNIQUE (hash);
-- 29.7G at this point
CREATE TABLE mans (
id SERIAL PRIMARY KEY,
name text NOT NULL,
section text NOT NULL,
UNIQUE(name, section)
);
INSERT INTO mans (name, section) SELECT DISTINCT name, section FROM man;
CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
CREATE TABLE locales (
id SMALLSERIAL PRIMARY KEY,
locale text NOT NULL UNIQUE
);
INSERT INTO locales (id, locale) VALUES (0,''); -- 0 for default locale is handy, I guess
INSERT INTO locales (locale) SELECT locale FROM man WHERE locale IS NOT NULL GROUP BY locale ORDER BY locale;
-- Encodings are stored for reference but are never actually used anywhere.
CREATE TABLE encodings (
id SMALLSERIAL PRIMARY KEY,
encoding text NOT NULL UNIQUE
);
INSERT INTO encodings (id, encoding) VALUES (0,'');
INSERT INTO encodings (encoding) SELECT encoding FROM man WHERE encoding IS NOT NULL GROUP BY encoding ORDER BY encoding;
-- Replaces the 'man' table; It's the largest table in terms of number of rows
-- and it's pretty frequently accessed, so keeping the rows small helps.
CREATE TABLE files (
pkgver integer NOT NULL, -- package_versions.id
man integer NOT NULL, -- mans.id
content integer NOT NULL, -- content.id
shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
locale smallint NOT NULL, -- locales.id
encoding smallint NOT NULL, -- encodings.id
filename text NOT NULL
);
-- 1min; 29.7G -> 31.8G
INSERT INTO files
SELECT o.package, m.id, c.id, hash_to_shorthash(o.hash), l.id, e.id, o.filename
FROM man o
JOIN mans m ON m.name = o.name AND m.section = o.section
JOIN contents c ON c.hash = o.hash
JOIN locales l ON l.locale = coalesce(o.locale, '')
JOIN encodings e ON e.encoding = coalesce(o.encoding, '');
-- 1min; 31.8G -> 33.7G for both indices
ALTER TABLE files ADD PRIMARY KEY (pkgver, filename);
CREATE INDEX ON files (man, shorthash);
CREATE INDEX ON files (content);
-- 20sec to verify
ALTER TABLE files
ADD CONSTRAINT files_pkgver_fkey FOREIGN KEY (pkgver) REFERENCES package_versions (id) ON DELETE CASCADE,
ADD CONSTRAINT files_man_fkey FOREIGN KEY (man) REFERENCES mans (id),
ADD CONSTRAINT files_content_fkey FOREIGN KEY (content) REFERENCES contents (id),
ADD CONSTRAINT files_locale_fkey FOREIGN KEY (locale) REFERENCES locales (id),
ADD CONSTRAINT files_encoding_fkey FOREIGN KEY (encoding) REFERENCES encodings (id);
DROP TABLE man;
DROP TABLE man_index;
-- final: 29.1G; we saved a whole 300M! \o/
-- There's only about a 100 unreferenced rows, leftovers from removals of
-- incorrectly indexed packages. Let's remove them while we're at it.
DELETE FROM contents WHERE NOT EXISTS(SELECT 1 FROM files WHERE content = id);
VACUUM ANALYZE mans, files, contents, locales, encodings;

View file

@ -1,8 +0,0 @@
-- 'overrides' is not a valid locale. Rather, that was a directory that CentOS
-- used for updated man pages without overwriting the pages already installed
-- on the system.
-- 'man' is just a badly nested directory.
-- The others are openmpi using non-standard directories for some reason.
WITH b(id) AS (SELECT id FROM locales WHERE locale IN('man', 'overrides') OR locale ~ '(openmpi|mpich|mvapich)'),
f AS (UPDATE files SET locale = 0 WHERE locale IN(SELECT id FROM b))
DELETE FROM locales WHERE id IN(SELECT id FROM b);