From 997dd8728c0e08cfa7986435dbcbcf290b5192a4 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 19 Dec 2021 17:33:50 +0100 Subject: [PATCH] www: Add some new flexible URL formats New canonical permalink format is now: /man.//.
Including the system name makes for more predictable navigation between man pages of the same system. This new URL format also fixes a problem with not being able to browse from a package's page to the correct page when the package has multiple versions of the same man. This is also yak shaving for some new navigation and formatting features I have planned. --- www/index.pl | 290 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 189 insertions(+), 101 deletions(-) diff --git a/www/index.pl b/www/index.pl index e07f95c..fe97206 100755 --- a/www/index.pl +++ b/www/index.pl @@ -177,7 +177,7 @@ sub man_pref { # Given the name of a man page with optional section, find out the actual name -# and section prefix of the man page and the preferred version. +# and section suffix of the man page and the preferred version. sub man_pref_name { my($name, $where) = @_; @@ -378,25 +378,55 @@ TUWF::get '/info/about' => sub {

Man pages

The following URLs are available to refer to an individual man page:

-
/<name>/<8-hex-digits>
- This is the permalink format for a specific man page (e.g. /ls/910be0ed).
-
/<name>[.<section>]
+
/<name>[.<section>] or /man/<name>[.<section>]
Will try to get the latest and most-close-to-upstream version of a man - page (e.g. /socket or /socket.7). Note that this will fetch the man page - from any of the available systems, so may result in confusing scenarios - for system-specific documentation. I try to at least keep the selection - algorithm stable and deterministic, but can't provide any - guarantees.
+ page. That this will fetch the man page from any of the available + systems, so may result in confusing scenarios for system-specific + documentation. I try to at least keep the selection algorithm stable and + deterministic, but can't provide any guarantees. Examples:
+ /socket
+ /socket.7
+ /man/socket.7
/man/<system>/<name>[.<section>]
- Will get the latest version of a man page from the given system (e.g. /man/ubuntu-xenial/rsync)
+ Will get the latest version of a man page from the given system, e.g.:
+ /man/ubuntu/rsync
+ /man/ubuntu-xenial/rsync
/man/<system>/<category>/<package>/<name>[.<section>]
- Will get the latest version of a man page from the given package (e.g. /man/ubuntu-xenial/net/rsync/rsync)
+ Will get the latest version of a man page from the given package, e.g.:
+ /man/ubuntu-xenial/net/rsync/rsync
/man/<system>/<category>/<package>/<version>/<name>[.<section>]
- Will get the man page from a specific package version (e.g. /man/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync)
+ Will get the man page from a specific package version, e.g.:
+ /man/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync +
/man.<language>/...
+ Adding a language code to the /man/ component will select + the man page in the requested language. The man page has to be available + in that language, otherwise you will get a 404. Redirects to other + languages as fallback may be implemented in the future. English man + pages are typically not tagged with a language at all, so explicitely + requesting /man.en/... will usually fail. This, too, may be + improved in the future. Examples:
+ /man.de/faked-tcp
+ /man.de/fedora/rsync.1
+
/man.<8-hex-digits>/...
+ Permalink format. Adding the shorthash of the man page to the + /man/ component of the above URLs will get that specific + man page from the requested system and/or package. The contents of the + man page should generally be the same regardless of which system or + package is included in the URL, but the UI may provide a different + nagivation context. Examples:
+ /man.910be0ed/ls
+ /man.910be0ed/fedora/ls
+ /man.910be0ed/arch/ls
+ /man.910be0ed/fedora/everything/coreutils-common/ls
+
/raw...
+ In all of the above URL formats, you can change /man with + /raw to get the raw UTF-8 encoded man page source, e.g.:
+ /raw/socket.7
+ /raw/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync
+ /raw.de/faked-tcp
+ /raw.910be0ed/fedora/ls
+
/<name>/<8-hex-digits>
+ Old permalink format for a specific man page (e.g. /ls/910be0ed).

In all URLs where an optional .<section> can be provided, the search is performed as a prefix match. For example, sub { }; -TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub { - my $name = normalize_name tuwf->capture(1); - my $hash = tuwf->capture(2); - - my $nfo = tuwf->dbRowi(' - SELECT m.name, m.section, v.released, c.content - FROM files f - JOIN mans m ON m.id = f.man - JOIN package_versions v ON v.id = f.pkgver - JOIN contents c ON c.id = f.content - WHERE m.name =', \$name, 'AND f.shorthash =', \shorthash_to_int($hash), ' - LIMIT 1' - ); - return tuwf->resNotFound if !$nfo->{name}; - - tuwf->resLastMod($nfo->{released}); - tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); - tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s"', $nfo->{name}, $nfo->{section}); - lit $nfo->{content}; +# Object to represent the various URLs to a man page. +# +# Parameters: +# fmt => man|txt|raw +# shorthash => 8-char hex +# lang => language code +# system => system shortname +# category => package category +# package => name of the package +# version => package version +# man => name of the man page +# section => man page section +# +# URL format: +# /$fmt[.$shorthash][.$lang][/$system[/$category/$package[/$version]]]/$man[.$section] +# +# Note that the URL format has some ambiguity: +# - $category may contain a slash, so a database lookup is required to +# disambiguate between URLs with [/$version] and those without. +# - $man may contain a dot, so a database lookup is required to disambiguate +# between URLs with [.$section] and those without +# +# $system may also refer to system shortnames without the version suffix (e.g. +# 'ubuntu' rather than 'ubuntu-impish'). In that case the man page from the +# latest release of that system is chosen. +package ManUrl { + sub new { my($p,%o)=@_; bless \%o, $p } + sub set { my($o,@o)=@_; bless +{%$o,@o}, ref $o } + sub mansect { $_[0]{man}.(defined $_[0]{section} ? ".$_[0]{section}" : '') } + use overload '""' => sub { + my($o)=@_; + "/$o->{fmt}".(defined $o->{shorthash} ? ".$o->{shorthash}" : '').(defined $o->{lang} ? ".$o->{lang}" : '') + .(defined $o->{system} ? ("/$o->{system}" + .(defined $o->{category} ? ("/$o->{category}/$o->{package}" + .(defined $o->{version} ? "/$o->{version}" : '')) : '')) : '') + .'/'.$o->mansect + }; }; sub man_nav_ { - my($man, $toc, $htmllang) = @_; + my($man, $url, $toc, $htmllang) = @_; my @systems = tuwf->dbAlli(' SELECT DISTINCT p.system @@ -655,8 +703,8 @@ sub man_nav_ { )->@*; nav_ sub { - form_ action => "/sysredir/$man->{name}.$man->{section}", method => 'get', - onsubmit => 'location.href="/man/"+system_select[system_select.selectedIndex].value+"/'.$man->{name}.'.'.$man->{section}.'";return false', + form_ action => '/sysredir/'.$url->mansect(), method => 'get', + onsubmit => 'location.href="/man/"+system_select[system_select.selectedIndex].value+"/'.$url->mansect().'";return false', sub { my %names; push $names{$_->{name}}->@*, $_ for map sysbyid->{$_->{system}}, sort { $b->{system} <=> $a->{system} } @systems; @@ -686,7 +734,7 @@ sub man_nav_ { if($man->{section} eq $_) { i_ $_; } else { - a_ href => "/$man->{name}.$_", $_; + a_ href => "/man/$man->{name}.$_", $_; } txt_ ' '; } @@ -701,7 +749,7 @@ sub man_nav_ { if(($_||'') eq $cur) { i_ $_ || 'default'; } else { - a_ href => $_ ? "/lang/$_/$man->{name}.$man->{section}" : "/$man->{name}.$man->{section}", $_ || 'default'; + a_ href => $_ ? "/man.$_/$man->{name}.$man->{section}" : "/man/$man->{name}.$man->{section}", $_ || 'default'; } txt_ ' '; } @@ -746,10 +794,27 @@ sub soelim { sub man_page { - my($man) = @_; + my($man, $url) = @_; + tuwf->resLastMod($man->{released}); my $content = tuwf->dbRowi('SELECT encode(hash, \'hex\') AS hash, content FROM contents WHERE id =', \$man->{content}); + if($url->{fmt} eq 'raw') { + tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); + tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s"', $man->{name}, $man->{section}); + lit $content->{content}; + return; + } + my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, $content->{content}; + if($url->{fmt} eq 'txt') { + # TODO: The 'txt' format is kind of broken right now as it includes our HTML formatting codes. + # This feature is a WIP and not advertised at the moment, anyway. + tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); + tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s.txt"', $man->{name}, $man->{section}); + lit $fmt; + return; + } + my @toc; $fmt =~ s{\n(.+?)<\/b>\n}{ push @toc, $1; @@ -769,9 +834,8 @@ sub man_page { ); my @htmllang = $man->{locale} =~ /^([a-z]{2,3})(?:_([A-Z]{2}))?(?:$|@|\.)/ ? (lang => $1.($2?"-$2":'')) : (); - tuwf->resLastMod($man->{released}); framework_ title => $man->{name}, mainclass => 'manpage', sub { - man_nav_ $man, \@toc, \@htmllang; + man_nav_ $man, $url, \@toc, \@htmllang; # TODO: Replace the 'versions' and 'locations' functionality with non-JS alternatives. div_ id => 'manbuttons', sub { h1_ $man->{name}; @@ -781,8 +845,8 @@ sub man_page { 'data-locale' => $man->{locale}||'', 'data-hasversions' => $hasversions?1:0, sub { - li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}).'/src', 'source' }; - li_ sub { a_ href => "/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'permalink' }; + li_ sub { a_ href => $url->set(fmt => 'raw'), 'source' }; + li_ sub { a_ href => $url->set(system => sysbyid->{$man->{system}}{short}, category => undef, shorthash => shorthash_to_hex $man->{shorthash}), 'permalink' }; } }; div_ id => 'manres', class => 'hidden', ''; @@ -791,55 +855,77 @@ sub man_page { } +# /[.section] - short and handy catch-all URL for man pages +# // - old permalink format # This one has to go before the other mappings, to ensure that links work for -# man pages called 'pkg' or 'man'. This also means that we can't have a -# system named 8 hex digits, but at least that's easy to guarantee. :) +# man pages called 'pkg' or 'man'. TUWF::get qr{/(?[^/]+)(?:/(?[0-9a-f]{8}))?} => sub { my $name = normalize_name tuwf->capture('name'); my $shorthash = tuwf->capture('hash'); - # Unfortunately, even in the permalink format with the hash, we don't know - # from which system and package we're supposed to get the man page. This - # info is used in the UI and needed in order to do .so substitution, so we - # can substitute files from the same package as the requested man page. Use - # the man_pref logic here to deterministically select a good package. - my($man, undef) = $shorthash - ? man_pref undef, sql 'm.name =', \$name, 'AND f.shorthash =', shorthash_to_int($shorthash) - : man_pref_name $name, 'true'; + my($man, $sect) = man_pref_name $name, $shorthash ? sql 'f.shorthash =', \shorthash_to_int $shorthash : 'true'; return tuwf->resNotFound() if !$man->{name}; - man_page $man; + man_page $man, ManUrl->new( + fmt => 'man', + man => length $sect ? $man->{name} : $name, + section => length $sect ? $sect : undef, + ); }; -TUWF::get qr{/man/([^/]+)/(.+)} => sub { - my($sys, $path) = tuwf->captures(1,2); +# ///src - old URL format to get the raw man page +TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub { + my $name = normalize_name tuwf->capture(1); + my $shorthash = tuwf->capture(2); - # Path can be: - # 1. - # 2. // - # 3. /// + my($man) = man_pref_name $name, sql 'f.shorthash =', \shorthash_to_int $shorthash; + return tuwf->resNotFound if !$man->{name}; + man_page $man, ManUrl->new(fmt => 'raw', man => $name); +}; + + +TUWF::get qr{/(?man|txt|raw)(?:\.(?[a-fA-F0-9]{8}))?(?:\.(?[^/]+))?/(?.+)} => sub { + my($fmt, $shorthash, $lang, $path) = tuwf->captures(qw|fmt shorthash lang path|); + + my @where; + my $name = normalize_name($path =~ s{/?([^/]+)$}{} && $1); + my $system = $path =~ s{^([^/]+)/?}{} && $1; # $sys can be either a full system 'short' name, or a prefix (e.g. 'debian' meaning 'any debian-* version') - my $sysid = sysbyshort->{$sys}; - $sysid = $sysid ? [$sysid->{id}] : [ map sysbyshort->{$_}{id}, grep /^\Q$sys\E-/, keys sysbyshort->%* ]; - return tuwf->resNotFound if !@$sysid; - - my $man; - if($path !~ m{/}) { # (1) - ($man) = man_pref_name $path, sql 's.id IN', $sysid; - - } else { - $path =~ s{/([^/]+)$}{}; - my $name = $1; - - my($pkg, $ver) = pkg_frompath sql('system IN', $sysid), $path; # Handles (2) and (3) - return tuwf->resNotFound if !$pkg; - - ($man) = man_pref_name $name, sql 's.id IN', $sysid, 'AND p.id =', \$pkg->{id}, $ver ? ('AND v.version =', \$ver) : (); + if($system) { + my $sysid = sysbyshort->{$system}; + $sysid = $sysid ? [$sysid->{id}] : [ map sysbyshort->{$_}{id}, grep /^\Q$system\E-/, keys sysbyshort->%* ]; + return tuwf->resNotFound if !@$sysid; + push @where, sql 'system IN', $sysid; } + + # $path is now either: + # 1. $category/$package + # 2. $cagegory/$package/$version + my($pkg, $ver) = length $path ? pkg_frompath sql_and(@where), $path : (undef,undef); + return tuwf->resNotFound if length $path && !$pkg; + push @where, sql 'p.id =', \$pkg->{id} if $pkg; + push @where, sql 'v.version =', \$ver if length $ver; + + push @where, sql 'f.shorthash =', \shorthash_to_int $shorthash if $shorthash; + push @where, sql 'l.locale ilike', \(escape_like($lang).'%') if $lang; + + my($man, $section) = man_pref_name $name, sql_and @where; return tuwf->resNotFound if !$man; - man_page $man; + + my $url = ManUrl->new( + fmt => $fmt, + shorthash => $shorthash, + lang => $lang, + system => length $system ? $system : undef, + category => $pkg ? $pkg->{category} : undef, + package => $pkg ? $pkg->{name} : undef, + version => length $ver ? $ver : undef, + man => length $section ? $man->{name} : $name, + section => length $section ? $section : undef, + ); + man_page $man, $url; }; @@ -909,14 +995,23 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub { my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data; my $count = tuwf->dbVali('SELECT count(*) FROM files WHERE pkgver =', \$sel->{id}); - my $mans = tuwf->dbPagei({ results => 200, page => $p }, - "SELECT m.name, m.section, f.shorthash, f.filename, l.locale - FROM files f - JOIN locales l ON l.id = f.locale - JOIN mans m ON m.id = f.man - WHERE f.pkgver =", \$sel->{id}, ' - ORDER BY m.name, l.locale, f.filename' - ); + my $mans = tuwf->dbPagei({ results => 200, page => $p }, ' + WITH lst AS ( + SELECT f.man, m.name, m.section, f.shorthash, f.filename, l.locale + FROM files f + JOIN locales l ON l.id = f.locale + JOIN mans m ON m.id = f.man + WHERE f.pkgver =', \$sel->{id}, ' + ), needlang AS ( + SELECT man FROM lst GROUP BY man HAVING count(*) > 1 + ), needhash AS ( + SELECT man, locale FROM lst GROUP BY man, locale HAVING count(*) > 1 + ) SELECT name, section, shorthash, filename, locale + , EXISTS(SELECT 1 FROM needlang WHERE man = l.man) AS needlang + , EXISTS(SELECT 1 FROM needhash WHERE man = l.man AND locale = l.locale) AS needhash + FROM lst l + ORDER BY name, section, locale, filename + '); # Latest version of this package determines last modification date of the page. tuwf->resLastMod($vers->[0]{released}); @@ -946,9 +1041,9 @@ TUWF::get qr{/pkg/([^/]+)/(.+)} => sub { paginate_ "/pkg/$pkgpath/$sel->{version}?p=", $count, 200, $p; ul_ sub { li_ sub { - # BUG: This URL should include the shorthash (or locale, at least), - # because the same package may have multiple pages with the same name and section. - a_ href => "/man/$pkgpath/$sel->{version}/$_->{name}.$_->{section}", "$_->{name}($_->{section})"; + # Only add the hash or locale to the URL if it's necessary to select the proper man page. + my $ext = $_->{needhash} ? '.'.shorthash_to_hex $_->{shorthash} : $_->{needlang} ? ".$_->{locale}" : ''; + a_ href => "/man$ext/$pkgpath/$sel->{version}/$_->{name}.$_->{section}", "$_->{name}($_->{section})"; b_ " $_->{locale}" if $_->{locale}; small_ " $_->{filename}"; } for(@$mans); @@ -974,17 +1069,10 @@ TUWF::get qr{/browse/([^/]+)/([^/]+)(?:/([^/]+))?} => sub { # Redirect for the system selection box, for visitors who have disabled JS. TUWF::get qr{/sysredir/([^/]+)} => sub { tuwf->resRedirect('/man/'.(tuwf->reqGet('system')//'arch').'/'.tuwf->capture(1), 'temp') }; - -# Redirect for a specific language for a man page. -# I'm not a fan of this solution; might drop it in the future. -TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub { - my $lang = tuwf->capture(1); - my $name = normalize_name tuwf->capture(2); - my($man, undef) = man_pref_name $name, - sql "substring(l.locale from '^[^.]+') ilike", \(escape_like($lang).'%'); - return tuwf->resNotFound if !length $man->{name}; - tuwf->resRedirect("/$man->{name}/".shorthash_to_hex($man->{shorthash}), 'temp'); -}; +# Redirect for a specific language for a man page. I have no idea if anyone +# still uses this URL format, but it was supported at some point, so let's keep +# it around. +TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub { tuwf->resRedirect('/man.'.tuwf->capture(1).'/'.tuwf->capture(2), 'temp') }; TUWF::get '/json/tree.json' => sub {