Remove encodings from "locales" table + delete incorrect locales

The frontend always stripped off the encodings already, so no point in
keeping that in the DB indices. The full locale was extracted from the
filename, which we still keep, so no information is list.

SQL "migration" script:

  BEGIN;
  CREATE INDEX files_tmp_locale ON files (locale);

  INSERT INTO locales (locale) VALUES ('pl_PL'), ('is_IS'), ('ko_KR');

  WITH obs(id, locale, lang) AS (
    SELECT id, locale, regexp_replace(locale, '^([^.]+)\..+$', '\1') FROM locales WHERE locale LIKE '%.%'
     UNION ALL
    SELECT id, locale, '' FROM locales WHERE locale LIKE 'node%' OR locale = 'common'
  ), rep(old, new) AS (
    SELECT o.id, x.id FROM obs o LEFT JOIN locales x ON x.locale = o.lang
  ), upd AS (
    UPDATE files SET locale = new FROM rep WHERE locale = old
  ) DELETE FROM locales WHERE id IN(SELECT id FROM obs);

  DROP INDEX files_tmp_locale;

  COMMIT;
This commit is contained in:
Yorhel 2024-05-01 16:57:33 +02:00
parent 1ee5c9c2df
commit cd5d2c6a20
3 changed files with 12 additions and 11 deletions

View file

@ -37,7 +37,7 @@ pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> {
let section = cap.get(3).unwrap().as_str(); let section = cap.get(3).unwrap().as_str();
// Some weird directories that happen to match the locale // Some weird directories that happen to match the locale
if locale.contains("openmpi") || locale.contains("mpich") || locale.contains("mvapich") { if locale.contains("openmpi") || locale.contains("mpich") || locale.contains("mvapich") || locale.starts_with("nodejs") {
locale = ""; locale = "";
} }
@ -59,6 +59,7 @@ pub fn parse_path(path: &str) -> Option<(&str, &str, &str)> {
// Some more weird directories that happen to match the locale // Some more weird directories that happen to match the locale
(n, s, "5man") | (n, s, "5man") |
(n, s, "c") | (n, s, "c") |
(n, s, "common") |
(n, s, "man") | (n, s, "man") |
(n, s, "man1") | (n, s, "man1") |
(n, s, "man2") | (n, s, "man2") |

View file

@ -98,6 +98,7 @@ fn insert_pkg(tr: &mut postgres::Transaction, opt: &PkgOpt) -> Option<(i32,i32)>
fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, content: i32) { fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str, enc: &str, content: i32) {
let (name, sect, locale) = man::parse_path(path).unwrap(); let (name, sect, locale) = man::parse_path(path).unwrap();
let lang = locale.split_once('.').map(|x| x.0).unwrap_or(locale);
let q = "WITH ms(id) AS (SELECT id FROM mans WHERE name = $2 AND section = $3), let q = "WITH ms(id) AS (SELECT id FROM mans WHERE name = $2 AND section = $3),
mi(id) AS (INSERT INTO mans (name, section) SELECT $2, $3 WHERE NOT EXISTS(SELECT 1 FROM ms) RETURNING id), mi(id) AS (INSERT INTO mans (name, section) SELECT $2, $3 WHERE NOT EXISTS(SELECT 1 FROM ms) RETURNING id),
m(id) AS (SELECT id FROM ms UNION SELECT id FROM mi), m(id) AS (SELECT id FROM ms UNION SELECT id FROM mi),
@ -110,7 +111,7 @@ fn insert_man_row<T: postgres::GenericClient>(tr: &mut T, verid: i32, path: &str
c(shorthash) AS (SELECT hash_to_shorthash(hash) FROM contents WHERE id = $4) c(shorthash) AS (SELECT hash_to_shorthash(hash) FROM contents WHERE id = $4)
INSERT INTO files (pkgver, man, content, shorthash, locale, encoding, filename) INSERT INTO files (pkgver, man, content, shorthash, locale, encoding, filename)
SELECT $1, m.id, $4, c.shorthash, l.id, e.id, '/'||$7 FROM m, l, e, c"; SELECT $1, m.id, $4, c.shorthash, l.id, e.id, '/'||$7 FROM m, l, e, c";
if let Err(e) = tr.execute(q, &[&verid, &name, &sect, &content, &locale, &enc, &path]) { if let Err(e) = tr.execute(q, &[&verid, &name, &sect, &content, &lang, &enc, &path]) {
// I think this can only happen if archread gives us the same file twice, which really // I think this can only happen if archread gives us the same file twice, which really
// shouldn't happen. But I'd rather continue with an error logged than panic. // shouldn't happen. But I'd rather continue with an error logged than panic.
error!("Can't insert verid {} fn {}: {}", verid, path, e); error!("Can't insert verid {} fn {}: {}", verid, path, e);

View file

@ -199,13 +199,13 @@ sub man_pref_name {
sub man_languages { sub man_languages {
my($name, $sect) = @_; my($name, $sect) = @_;
[ map $_->{lang}, tuwf->dbAlli( [ map $_->{locale}, tuwf->dbAlli(
"SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang "SELECT DISTINCT l.locale
FROM files f FROM files f
JOIN mans m ON m.id = f.man JOIN mans m ON m.id = f.man
JOIN locales l ON l.id = f.locale JOIN locales l ON l.id = f.locale
WHERE m.name =", \$name, 'AND m.section =', \$sect, " WHERE m.name =", \$name, 'AND m.section =', \$sect, "
ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST" ORDER BY l.locale"
)->@* ]; )->@* ];
} }
@ -741,9 +741,8 @@ sub man_nav_ {
if(@$lang > 1) { if(@$lang > 1) {
b_ 'Languages'; b_ 'Languages';
p_ sub { p_ sub {
(my $cur = $man->{locale}||'') =~ s/\..*//;
for (@$lang) { for (@$lang) {
if(($_||'') eq $cur) { if(($_||'') eq $man->{locale}) {
i_ $_ || 'default'; i_ $_ || 'default';
} else { } else {
a_ href => $_ ? "/man.$_/$man->{name}.$man->{section}" : "/man/$man->{name}.$man->{section}", $_ || 'default'; a_ href => $_ ? "/man.$_/$man->{name}.$man->{section}" : "/man/$man->{name}.$man->{section}", $_ || 'default';
@ -894,7 +893,7 @@ TUWF::get qr{/(?<fmt>man|txt|raw)(?:\.(?<shorthash>[a-fA-F0-9]{8}))?(?:\.(?<lang
push @where, sql 'v.version =', \$ver if length $ver; push @where, sql 'v.version =', \$ver if length $ver;
push @where, sql 'f.shorthash =', \shorthash_to_int $shorthash if $shorthash; push @where, sql 'f.shorthash =', \shorthash_to_int $shorthash if $shorthash;
push @where, sql 'l.locale ilike', \(escape_like($lang).'%') if $lang; push @where, sql 'l.locale =', \$lang if $lang;
my($man, $section) = man_pref_name $name, sql_and @where; my($man, $section) = man_pref_name $name, sql_and @where;
return tuwf->resNotFound if !$man; return tuwf->resNotFound if !$man;
@ -1137,7 +1136,7 @@ TUWF::get qr{/loc/([a-fA-F0-9]{40})}, sub {
TUWF::get qr{/ver(?:\.([a-fA-F0-9]{8}))?(?:\.([^/]+))?/([^/]+)\.([0-9a-zA-Z])}, sub { TUWF::get qr{/ver(?:\.([a-fA-F0-9]{8}))?(?:\.([^/]+))?/([^/]+)\.([0-9a-zA-Z])}, sub {
my($shorthash, $lang, $name, $sect) = tuwf->captures(1,2,3,4); my($shorthash, $lang, $name, $sect) = tuwf->captures(1,2,3,4);
$shorthash = $shorthash ? shorthash_to_int $shorthash : -1; $shorthash = $shorthash ? shorthash_to_int $shorthash : -1;
($lang ||= '') =~ s/\..*//; $lang ||= '';
my $l = tuwf->dbAlli(' my $l = tuwf->dbAlli('
SELECT p.system, p.name AS package, v.version, v.released, f.shorthash SELECT p.system, p.name AS package, v.version, v.released, f.shorthash
@ -1145,7 +1144,7 @@ TUWF::get qr{/ver(?:\.([a-fA-F0-9]{8}))?(?:\.([^/]+))?/([^/]+)\.([0-9a-zA-Z])},
JOIN package_versions v ON v.id = f.pkgver JOIN package_versions v ON v.id = f.pkgver
JOIN packages p ON p.id = v.package JOIN packages p ON p.id = v.package
WHERE f.man = (SELECT id FROM mans WHERE name =', \$name, 'AND section =', \$sect, ') WHERE f.man = (SELECT id FROM mans WHERE name =', \$name, 'AND section =', \$sect, ')
AND f.locale IN(SELECT id FROM locales WHERE locale', $lang ? ('ILIKE', \(escape_like($lang).'%')) : ("= ''"), ') AND f.locale IN(SELECT id FROM locales WHERE locale =', \$lang, ')
ORDER BY p.system DESC, p.name, v.released DESC, f.shorthash ORDER BY p.system DESC, p.name, v.released DESC, f.shorthash
'); ');
@ -1163,7 +1162,7 @@ TUWF::get qr{/ver(?:\.([a-fA-F0-9]{8}))?(?:\.([^/]+))?/([^/]+)\.([0-9a-zA-Z])},
for (0..$#{$langs}) { for (0..$#{$langs}) {
txt_ ', ' if $_ > 0; txt_ ', ' if $_ > 0;
if(($langs->[$_]||'') eq $lang) { if(($langs->[$_]||'') eq $lang) {
i_ $langs->[$_] || 'default'; b_ $langs->[$_] || 'default';
} else { } else {
a_ href => '/ver'.($langs->[$_]?".$langs->[$_]":'')."/$name.$sect", $langs->[$_] || 'default'; a_ href => '/ver'.($langs->[$_]?".$langs->[$_]":'')."/$name.$sect", $langs->[$_] || 'default';
} }