Cleanup SQL files
This commit is contained in:
parent
c9eef9e10a
commit
f92639d22c
5 changed files with 0 additions and 161 deletions
|
|
@ -1,44 +0,0 @@
|
|||
CREATE OR REPLACE FUNCTION match_firstchar(str text, chr text) RETURNS boolean AS $$
|
||||
SELECT CASE WHEN chr = '0'
|
||||
THEN (ascii(str) < 97 OR ascii(str) > 122) AND (ascii(str) < 65 OR ascii(str) > 90)
|
||||
ELSE ascii(str) IN(ascii(chr),ascii(upper(chr)))
|
||||
END;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
ALTER TABLE packages DROP CONSTRAINT packages_system_fkey;
|
||||
ALTER TABLE packages ADD CONSTRAINT packages_system_fkey FOREIGN KEY (system) REFERENCES systems(id) ON DELETE CASCADE ON UPDATE CASCADE;
|
||||
|
||||
-- Remapping system IDs so we can sort on that rather than 'relorder'
|
||||
-- (Yes, this means adding a new system somewhere in-between requires changing
|
||||
-- system IDs again, but that's fine, these IDs are only used internally)
|
||||
BEGIN;
|
||||
WITH map(old,new) AS (
|
||||
-- FreeBSD
|
||||
SELECT 80, 84
|
||||
UNION SELECT 84, 80
|
||||
UNION SELECT 132, 99
|
||||
UNION SELECT 99, 129
|
||||
UNION SELECT 129, 132
|
||||
UNION SELECT 188, 198
|
||||
UNION SELECT 192, 188
|
||||
UNION SELECT 198, 203
|
||||
UNION SELECT 203, 192
|
||||
-- CentOS
|
||||
UNION SELECT 175, 176
|
||||
UNION SELECT 176, 177
|
||||
UNION SELECT 177, 178
|
||||
UNION SELECT 178, 179
|
||||
UNION SELECT 179, 182
|
||||
UNION SELECT 182, 183
|
||||
UNION SELECT 183, 175
|
||||
UNION SELECT 195, 200
|
||||
UNION SELECT 196, 201
|
||||
UNION SELECT 200, 195
|
||||
UNION SELECT 201, 206
|
||||
UNION SELECT 206, 196
|
||||
) UPDATE systems SET id = new+10000 FROM map WHERE id = old;
|
||||
UPDATE systems SET id = id-10000 WHERE id >= 10000;
|
||||
COMMIT;
|
||||
|
||||
ALTER TABLE systems DROP COLUMN relorder;
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
ALTER TABLE packages
|
||||
ALTER COLUMN category SET NOT NULL,
|
||||
ADD COLUMN dead boolean NOT NULL DEFAULT FALSE;
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
-- WARNING: This script does a complete rewrite of the two largest tables.
|
||||
-- This requires ~25G of temporary space and takes anywhere between 10 and 20 minutes.
|
||||
-- The site needs to be taken down during that time.
|
||||
|
||||
|
||||
DROP FUNCTION section_from_filename(text);
|
||||
DROP FUNCTION name_from_filename(text);
|
||||
DROP FUNCTION basename_from_filename(text);
|
||||
|
||||
-- Interpret first 4 bytes of hash as a signed 32-bit integer.
|
||||
CREATE OR REPLACE FUNCTION hash_to_shorthash(hash bytea) RETURNS integer AS $$
|
||||
SELECT CASE WHEN get_byte(hash, 3) < 128
|
||||
THEN (get_byte(hash, 3)::int<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
|
||||
ELSE -2147483648 + ((get_byte(hash, 3)::int - 128)<<24) + (get_byte(hash, 2)::int<<16) + (get_byte(hash, 1)::int<<8) + get_byte(hash, 0)
|
||||
END;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
ALTER TABLE contents DROP CONSTRAINT contents_pkey;
|
||||
ALTER TABLE contents RENAME TO contents_old;
|
||||
|
||||
CREATE SEQUENCE contents_id_seq AS integer;
|
||||
CREATE TABLE contents (
|
||||
id integer NOT NULL DEFAULT nextval('contents_id_seq'::regclass),
|
||||
hash bytea NOT NULL,
|
||||
content text NOT NULL
|
||||
);
|
||||
ALTER SEQUENCE contents_id_seq OWNED BY contents.id;
|
||||
-- 4m15s; start 29.3G end 53.3G; +24G
|
||||
INSERT INTO contents (hash, content) SELECT hash, content FROM contents_old;
|
||||
DROP TABLE contents_old; -- back to 29.3G
|
||||
ALTER TABLE contents ADD PRIMARY KEY (id);
|
||||
ALTER TABLE contents ADD UNIQUE (hash);
|
||||
-- 29.7G at this point
|
||||
|
||||
|
||||
|
||||
CREATE TABLE mans (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name text NOT NULL,
|
||||
section text NOT NULL,
|
||||
UNIQUE(name, section)
|
||||
);
|
||||
INSERT INTO mans (name, section) SELECT DISTINCT name, section FROM man;
|
||||
CREATE INDEX mans_name ON mans USING btree(lower(name) text_pattern_ops);
|
||||
|
||||
CREATE TABLE locales (
|
||||
id SMALLSERIAL PRIMARY KEY,
|
||||
locale text NOT NULL UNIQUE
|
||||
);
|
||||
INSERT INTO locales (id, locale) VALUES (0,''); -- 0 for default locale is handy, I guess
|
||||
INSERT INTO locales (locale) SELECT locale FROM man WHERE locale IS NOT NULL GROUP BY locale ORDER BY locale;
|
||||
|
||||
-- Encodings are stored for reference but are never actually used anywhere.
|
||||
CREATE TABLE encodings (
|
||||
id SMALLSERIAL PRIMARY KEY,
|
||||
encoding text NOT NULL UNIQUE
|
||||
);
|
||||
INSERT INTO encodings (id, encoding) VALUES (0,'');
|
||||
INSERT INTO encodings (encoding) SELECT encoding FROM man WHERE encoding IS NOT NULL GROUP BY encoding ORDER BY encoding;
|
||||
|
||||
|
||||
-- Replaces the 'man' table; It's the largest table in terms of number of rows
|
||||
-- and it's pretty frequently accessed, so keeping the rows small helps.
|
||||
CREATE TABLE files (
|
||||
pkgver integer NOT NULL, -- package_versions.id
|
||||
man integer NOT NULL, -- mans.id
|
||||
content integer NOT NULL, -- content.id
|
||||
shorthash integer NOT NULL, -- cache: hash_to_shorthash(content.hash)
|
||||
locale smallint NOT NULL, -- locales.id
|
||||
encoding smallint NOT NULL, -- encodings.id
|
||||
filename text NOT NULL
|
||||
);
|
||||
|
||||
-- 1min; 29.7G -> 31.8G
|
||||
INSERT INTO files
|
||||
SELECT o.package, m.id, c.id, hash_to_shorthash(o.hash), l.id, e.id, o.filename
|
||||
FROM man o
|
||||
JOIN mans m ON m.name = o.name AND m.section = o.section
|
||||
JOIN contents c ON c.hash = o.hash
|
||||
JOIN locales l ON l.locale = coalesce(o.locale, '')
|
||||
JOIN encodings e ON e.encoding = coalesce(o.encoding, '');
|
||||
|
||||
-- 1min; 31.8G -> 33.7G for both indices
|
||||
ALTER TABLE files ADD PRIMARY KEY (pkgver, filename);
|
||||
CREATE INDEX ON files (man, shorthash);
|
||||
CREATE INDEX ON files (content);
|
||||
|
||||
-- 20sec to verify
|
||||
ALTER TABLE files
|
||||
ADD CONSTRAINT files_pkgver_fkey FOREIGN KEY (pkgver) REFERENCES package_versions (id) ON DELETE CASCADE,
|
||||
ADD CONSTRAINT files_man_fkey FOREIGN KEY (man) REFERENCES mans (id),
|
||||
ADD CONSTRAINT files_content_fkey FOREIGN KEY (content) REFERENCES contents (id),
|
||||
ADD CONSTRAINT files_locale_fkey FOREIGN KEY (locale) REFERENCES locales (id),
|
||||
ADD CONSTRAINT files_encoding_fkey FOREIGN KEY (encoding) REFERENCES encodings (id);
|
||||
|
||||
DROP TABLE man;
|
||||
DROP TABLE man_index;
|
||||
-- final: 29.1G; we saved a whole 300M! \o/
|
||||
|
||||
|
||||
-- There's only about a 100 unreferenced rows, leftovers from removals of
|
||||
-- incorrectly indexed packages. Let's remove them while we're at it.
|
||||
DELETE FROM contents WHERE NOT EXISTS(SELECT 1 FROM files WHERE content = id);
|
||||
|
||||
VACUUM ANALYZE mans, files, contents, locales, encodings;
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
-- 'overrides' is not a valid locale. Rather, that was a directory that CentOS
|
||||
-- used for updated man pages without overwriting the pages already installed
|
||||
-- on the system.
|
||||
-- 'man' is just a badly nested directory.
|
||||
-- The others are openmpi using non-standard directories for some reason.
|
||||
WITH b(id) AS (SELECT id FROM locales WHERE locale IN('man', 'overrides') OR locale ~ '(openmpi|mpich|mvapich)'),
|
||||
f AS (UPDATE files SET locale = 0 WHERE locale IN(SELECT id FROM b))
|
||||
DELETE FROM locales WHERE id IN(SELECT id FROM b);
|
||||
Loading…
Add table
Add a link
Reference in a new issue