From cd5d2c6a20b48f2fc7077c5f9e766b92586e1d88 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 1 May 2024 16:57:33 +0200 Subject: [PATCH] Remove encodings from "locales" table + delete incorrect locales The frontend always stripped off the encodings already, so no point in keeping that in the DB indices. The full locale was extracted from the filename, which we still keep, so no information is list. SQL "migration" script: BEGIN; CREATE INDEX files_tmp_locale ON files (locale); INSERT INTO locales (locale) VALUES ('pl_PL'), ('is_IS'), ('ko_KR'); WITH obs(id, locale, lang) AS ( SELECT id, locale, regexp_replace(locale, '^([^.]+)\..+$', '\1') FROM locales WHERE locale LIKE '%.%' UNION ALL SELECT id, locale, '' FROM locales WHERE locale LIKE 'node%' OR locale = 'common' ), rep(old, new) AS ( SELECT o.id, x.id FROM obs o LEFT JOIN locales x ON x.locale = o.lang ), upd AS ( UPDATE files SET locale = new FROM rep WHERE locale = old ) DELETE FROM locales WHERE id IN(SELECT id FROM obs); DROP INDEX files_tmp_locale; COMMIT; --- indexer/src/man.rs | 3 ++- indexer/src/pkg.rs | 3 ++- www/index.pl | 17 ++++++++--------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/indexer/src/man.rs b/indexer/src/man.rs index d594d81..f0c81f7 100644 --- a/indexer/src/man.rs +++ b/indexer/src/man.rs @@ -37,7 +37,7 @@ pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> { let section = cap.get(3).unwrap().as_str(); // Some weird directories that happen to match the locale - if locale.contains("openmpi") || locale.contains("mpich") || locale.contains("mvapich") { + if locale.contains("openmpi") || locale.contains("mpich") || locale.contains("mvapich") || locale.starts_with("nodejs") { locale = ""; } @@ -59,6 +59,7 @@ pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> { // Some more weird directories that happen to match the locale (n, s, "5man") | (n, s, "c") | + (n, s, "common") | (n, s, "man") | (n, s, "man1") | (n, s, "man2") | diff --git a/indexer/src/pkg.rs b/indexer/src/pkg.rs index 07d16c7..fcb416f 100644 --- a/indexer/src/pkg.rs +++ b/indexer/src/pkg.rs @@ -98,6 +98,7 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<(i32,i32)> fn insert_man_row(tr: &mut T, verid: i32, path: &str, enc: &str, content: i32) { let (name, sect, locale) = man::parse_path(path).unwrap(); + let lang = locale.split_once('.').map(|x| x.0).unwrap_or(locale); let q = "WITH ms(id) AS (SELECT id FROM mans WHERE name = $2 AND section = $3), mi(id) AS (INSERT INTO mans (name, section) SELECT $2, $3 WHERE NOT EXISTS(SELECT 1 FROM ms) RETURNING id), m(id) AS (SELECT id FROM ms UNION SELECT id FROM mi), @@ -110,7 +111,7 @@ fn insert_man_row(tr: &mut T, verid: i32, path: &str c(shorthash) AS (SELECT hash_to_shorthash(hash) FROM contents WHERE id = $4) INSERT INTO files (pkgver, man, content, shorthash, locale, encoding, filename) SELECT $1, m.id, $4, c.shorthash, l.id, e.id, '/'||$7 FROM m, l, e, c"; - if let Err(e) = tr.execute(q, &[&verid, &name, §, &content, &locale, &enc, &path]) { + if let Err(e) = tr.execute(q, &[&verid, &name, §, &content, &lang, &enc, &path]) { // I think this can only happen if archread gives us the same file twice, which really // shouldn't happen. But I'd rather continue with an error logged than panic. error!("Can't insert verid {} fn {}: {}", verid, path, e); diff --git a/www/index.pl b/www/index.pl index ef3ba90..b4de51c 100755 --- a/www/index.pl +++ b/www/index.pl @@ -199,13 +199,13 @@ sub man_pref_name { sub man_languages { my($name, $sect) = @_; - [ map $_->{lang}, tuwf->dbAlli( - "SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang + [ map $_->{locale}, tuwf->dbAlli( + "SELECT DISTINCT l.locale FROM files f JOIN mans m ON m.id = f.man JOIN locales l ON l.id = f.locale WHERE m.name =", \$name, 'AND m.section =', \$sect, " - ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST" + ORDER BY l.locale" )->@* ]; } @@ -741,9 +741,8 @@ sub man_nav_ { if(@$lang > 1) { b_ 'Languages'; p_ sub { - (my $cur = $man->{locale}||'') =~ s/\..*//; for (@$lang) { - if(($_||'') eq $cur) { + if(($_||'') eq $man->{locale}) { i_ $_ || 'default'; } else { a_ href => $_ ? "/man.$_/$man->{name}.$man->{section}" : "/man/$man->{name}.$man->{section}", $_ || 'default'; @@ -894,7 +893,7 @@ TUWF::get qr{/(?man|txt|raw)(?:\.(?[a-fA-F0-9]{8}))?(?:\.(?dbAlli(' SELECT p.system, p.name AS package, v.version, v.released, f.shorthash @@ -1145,7 +1144,7 @@ TUWF::get qr{/ver(?:\.([a-fA-F0-9]{8}))?(?:\.([^/]+))?/([^/]+)\.([0-9a-zA-Z])}, JOIN package_versions v ON v.id = f.pkgver JOIN packages p ON p.id = v.package WHERE f.man = (SELECT id FROM mans WHERE name =', \$name, 'AND section =', \$sect, ') - AND f.locale IN(SELECT id FROM locales WHERE locale', $lang ? ('ILIKE', \(escape_like($lang).'%')) : ("= ''"), ') + AND f.locale IN(SELECT id FROM locales WHERE locale =', \$lang, ') ORDER BY p.system DESC, p.name, v.released DESC, f.shorthash '); @@ -1163,7 +1162,7 @@ TUWF::get qr{/ver(?:\.([a-fA-F0-9]{8}))?(?:\.([^/]+))?/([^/]+)\.([0-9a-zA-Z])}, for (0..$#{$langs}) { txt_ ', ' if $_ > 0; if(($langs->[$_]||'') eq $lang) { - i_ $langs->[$_] || 'default'; + b_ $langs->[$_] || 'default'; } else { a_ href => '/ver'.($langs->[$_]?".$langs->[$_]":'')."/$name.$sect", $langs->[$_] || 'default'; }