Implement man selection algorithm in SQL + fix various related bugs
Man selection has to be performed over several thousand rows in some cases. Loading all those in Perl and then doing the selection isn't very efficient[1]. The getman() implementation was also buggy: The comparison function used to determine which man page should be preferred was not associative[2], and the result thus depended on the order in which the man pages were compared. This resulted in some wrong selections in some cases. While I was at it, I also made the selection more strict: - /man/unknown-hash would previously ignore the hash and just select whatever man page. Now it results in a 404. - Same with /man.unknown-section - /man.section/hash is now disallowed, it's either /man.section or /man/hash. 1) Note that all possible man pages are currently still loaded into Perl anyway, because the ugly navigation menu on the right needs them. I plan to revamp that entire menu to be more efficient and usable. 2) Initially I wrote the SQL implementation in a similar fashion to the Perl implementation, and ended up with the same bug. I wasted more than a day before I finally got to the current CTE query.
This commit is contained in:
parent
ed00c5fd46
commit
659b7afece
3 changed files with 97 additions and 57 deletions
|
|
@ -171,3 +171,13 @@ $$ LANGUAGE SQL;
|
|||
CREATE OR REPLACE FUNCTION name_from_filename(text) RETURNS text AS $$
|
||||
SELECT regexp_replace(basename_from_filename($1), E'^(.+)\\.[^.]+$', E'\\1');
|
||||
$$ LANGUAGE SQL;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$
|
||||
SELECT locale IS NULL OR locale LIKE 'en%';
|
||||
$$ IMMUTABLE LANGUAGE SQL;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION is_standard_man_location(path text) RETURNS bool AS $$
|
||||
SELECT path LIKE '/usr/share/man/man%' OR path LIKE '/usr/local/man/man%';
|
||||
$$ IMMUTABLE LANGUAGE sql;
|
||||
|
|
|
|||
7
sql/update-2016-10-09.sql
Normal file
7
sql/update-2016-10-09.sql
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
CREATE OR REPLACE FUNCTION is_english_locale(locale text) RETURNS bool AS $$
|
||||
SELECT locale IS NULL OR locale LIKE 'en%';
|
||||
$$ IMMUTABLE LANGUAGE SQL;
|
||||
|
||||
CREATE OR REPLACE FUNCTION is_standard_man_location(path text) RETURNS bool AS $$
|
||||
SELECT path LIKE '/usr/share/man/man%' OR path LIKE '/usr/local/man/man%';
|
||||
$$ IMMUTABLE LANGUAGE sql;
|
||||
137
www/index.pl
137
www/index.pl
|
|
@ -457,62 +457,6 @@ sub manjslist {
|
|||
}
|
||||
|
||||
|
||||
# Given the name and optionally the hash of a man page, check with a list of
|
||||
# man pages with the same name to select the right one for display.
|
||||
sub getman {
|
||||
my($self, $name, $hash) = @_;
|
||||
|
||||
# Fetch all files with $name and split off the section part if $name has one
|
||||
my $list = $self->dbManInfo(name => $name);
|
||||
my $sect;
|
||||
if(!@$list && $name =~ s/\.([^\.]+)$//) {
|
||||
$sect = $1;
|
||||
$list = $self->dbManInfo(name => $name);
|
||||
}
|
||||
return (undef, undef) if !@$list;
|
||||
|
||||
# If we already have a shorthash, just get the full hash
|
||||
if($hash) {
|
||||
$_->{hash} =~ /^$hash/ && return ($_, $list) for (@$list);
|
||||
}
|
||||
|
||||
# If that failed, use some heuristics
|
||||
my $cmp = sub {
|
||||
local($a,$b) = @_;
|
||||
# English or non-locale packages always win
|
||||
!(($a->{locale}||'') =~ /^(en|$)/) != !(($b->{locale}||'') =~ /^(en|$)/)
|
||||
? (($a->{locale}||'') =~ /^(en|$)/ ? -1 : 1)
|
||||
# Newer versions of a package have higher priority
|
||||
: $a->{system} == $b->{system} && $a->{package} eq $b->{package} && $a->{version} ne $b->{version}
|
||||
? $b->{released} cmp $a->{released}
|
||||
# Section prefix match.
|
||||
: $sect && !($a->{section} =~ /^\Q$sect/) != !($b->{section} =~ /^\Q$sect/)
|
||||
? ($a->{section} =~ /^\Q$sect/ ? -1 : 1)
|
||||
# Give lower priority to pages in a non-standard directory
|
||||
: !($a->{filename} =~ q{^/usr/share/man}) != !($b->{filename} =~ q{^/usr/share/man})
|
||||
? ($a->{filename} =~ q{^/usr/share/man} ? -1 : 1)
|
||||
# Prefer Arch over other systems
|
||||
: ($a->{system} != 1 || $b->{system} != 1) && $self->{sysbyid}{$a->{system}}{name} ne $self->{sysbyid}{$b->{system}}{name}
|
||||
? ($a->{system} == 1 ? -1 : 1)
|
||||
# Prefer a later system release over an older one
|
||||
: $a->{system} != $b->{system} && $self->{sysbyid}{$a->{system}}{name} eq $self->{sysbyid}{$b->{system}}{name}
|
||||
? $self->{sysbyid}{$b->{system}}{relorder} <=> $self->{sysbyid}{$a->{system}}{relorder}
|
||||
# Lower sections > higher sections (because 'man' does this as well)
|
||||
: substr($a->{section},0,1) ne substr($b->{section},0,1)
|
||||
? $a->{section} cmp $b->{section}
|
||||
# Sections without appendix before sections with appendix
|
||||
: $a->{section} ne $b->{section}
|
||||
? $a->{section} cmp $b->{section}
|
||||
# Fallback to hash if nothing else matters (guarantees the order is at least stable)
|
||||
: $a->{hash} cmp $b->{hash};
|
||||
};
|
||||
|
||||
my $winner = $list->[0];
|
||||
$cmp->($winner, $_) > 0 and ($winner = $_) for (@$list);
|
||||
($winner, $list);
|
||||
}
|
||||
|
||||
|
||||
sub man {
|
||||
my($self, $name, $hash) = @_;
|
||||
|
||||
|
|
@ -520,7 +464,12 @@ sub man {
|
|||
$name =~ s/%5b/[/ig;
|
||||
$name =~ s/%5d/]/ig;
|
||||
|
||||
my($man, $m) = getman($self, $name, $hash);
|
||||
my $man;
|
||||
if($hash) {
|
||||
$man = $self->dbManInfo(name => $name, shorthash => $hash)->[0];
|
||||
} else {
|
||||
($man, undef) = $self->dbManPrefName($name);
|
||||
}
|
||||
return $self->resNotFound() if !$man;
|
||||
|
||||
my $view = $self->formValidate({get => 'v', regex => qr/^[a-z2-7]+$/});
|
||||
|
|
@ -578,6 +527,7 @@ sub man {
|
|||
end;
|
||||
end;
|
||||
|
||||
my $m = $self->dbManInfo(name => $man->{name});
|
||||
$self->htmlFooter(js => { hash => substr($man->{hash}, 0, 8), name => $man->{name}, view => $view, mans => manjslist($self, $m) });
|
||||
}
|
||||
|
||||
|
|
@ -616,6 +566,12 @@ package TUWF::Object;
|
|||
use TUWF ':html', 'html_escape';
|
||||
use Time::Local 'timegm';
|
||||
|
||||
sub escape_like {
|
||||
(my $v = shift) =~ s/([_%])/\\$1/g;
|
||||
$v;
|
||||
}
|
||||
|
||||
|
||||
sub htmlHeader {
|
||||
my $self = shift;
|
||||
my %o = @_;
|
||||
|
|
@ -743,6 +699,73 @@ sub dbSearch {
|
|||
}
|
||||
|
||||
|
||||
# Get the preferred man page for the given filters. Returns a row with the same fields as dbManInfo().
|
||||
sub dbManPref {
|
||||
my($s, $name, $section, %o) = @_;
|
||||
my %where = (
|
||||
'm.name = ?' => $name,
|
||||
$section ? ('m.section LIKE ?' => escape_like($section).'%') : (),
|
||||
$o{sysid} ? ('p.system = ?' => $o{sysid}) : (),
|
||||
$o{package} ? ('p.id = ?' => $o{package}) : (),
|
||||
$o{pkgver} ? ('v.id = ?' => $o{pkgver}) : (),
|
||||
);
|
||||
|
||||
# Criteria to determine a "preferred" man page:
|
||||
# 1. english: English versions of a man page have preference over other locales
|
||||
# 2. pkgver: Newer versions of the same package have preference over older versions
|
||||
# 3. stdloc: Prefer man pages in standard locations
|
||||
# 4. secmatch: Prefer an exact section match
|
||||
# 5. arch: Prefer Arch over other systems (because it tends to be the most up-to-date, and closest to upstreams)
|
||||
# 6. sysrel: Prefer a later system release over an older release
|
||||
# 7. secorder: Lower sections before higher sections (because man does it this way, for some reason)
|
||||
# 8. Fall back on hash comparison, to ensure the result is stable
|
||||
|
||||
$s->dbAll(q{
|
||||
WITH unfiltered AS (
|
||||
SELECT s AS sys, p AS pkg, v AS ver, m AS man
|
||||
FROM man m
|
||||
JOIN package_versions v ON v.id = m.package
|
||||
JOIN packages p ON p.id = v.package
|
||||
JOIN systems s ON s.id = p.system
|
||||
!W
|
||||
), f_english AS(
|
||||
SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale)
|
||||
), f_pkgver AS(
|
||||
SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released)
|
||||
), f_stdloc AS(
|
||||
SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename)
|
||||
), f_secmatch AS(
|
||||
SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section = ?) OR (man).section = ?
|
||||
), f_arch AS(
|
||||
SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM f_secmatch WHERE (sys).id = 1) OR (sys).id = 1
|
||||
), f_sysrel AS(
|
||||
SELECT * FROM f_arch a WHERE NOT EXISTS(SELECT 1 FROM f_arch b WHERE (a.sys).name = (b.sys).name AND (a.sys).relorder < (b.sys).relorder)
|
||||
), f_secorder AS(
|
||||
SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section)
|
||||
)
|
||||
SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released,
|
||||
(man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash
|
||||
FROM f_secorder ORDER BY (man).hash LIMIT 1
|
||||
}, \%where, $section, $section)->[0];
|
||||
}
|
||||
|
||||
|
||||
# Given the name of a man page with optional section, find out the actual name
|
||||
# and section prefix of the man page and the preferred version.
|
||||
sub dbManPrefName {
|
||||
my($s, $name, %o) = @_;
|
||||
|
||||
my $man = $s->dbManPref($name, '', %o);
|
||||
return ($man, '') if $man;
|
||||
|
||||
return (undef, '') if $name !~ s/\.([^.]+)$//;
|
||||
my $section = $1;
|
||||
$man = $s->dbManPref($name, $section, %o);
|
||||
return ($man, $section) if $man;
|
||||
return (undef, '');
|
||||
}
|
||||
|
||||
|
||||
sub dbSystemGet {
|
||||
return shift->dbAll('SELECT id, name, release, short, relorder FROM systems ORDER BY name, relorder');
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue