#!/usr/bin/perl use v5.26; use warnings; use TUWF ':html5_', 'uri_escape'; use POSIX 'ceil'; use List::Util 'uniq', 'min'; use SQL::Interp 'sql', 'sql_interp'; use Time::Local 'timegm'; use Cwd 'abs_path'; our $ROOT; BEGIN { ($ROOT = abs_path $0) =~ s{/www/index\.pl$}{}; } # Force the pure-perl AnyEvent backend; More lightweight and we don't need the # performance of EV. Fixes an issue with subprocess spawning under TUWF's # built-in web server that I haven't been able to track down. BEGIN { $ENV{PERL_ANYEVENT_MODEL} = 'Perl'; } use lib "$ROOT/lib/ManUtils/inst/lib/perl5"; use ManUtils; TUWF::set( logfile => $ENV{TUWF_LOG}, db_login => [undef, undef, undef], debug => $ENV{TUWF_DEBUG}, xml_pretty => 0, log_slow_pages => 500, ); TUWF::hook before => sub { if(tuwf->{_TUWF}{http}) { if(tuwf->resFile("$ROOT/www", tuwf->reqPath)) { tuwf->resHeader('Cache-Control' => 'max-age=31536000'); tuwf->done; } } }; # TODO: Add SQL::Interp support to TUWF directly, in some form. sub TUWF::Object::dbExeci { shift->dbExec(sql_interp @_) } sub TUWF::Object::dbVali { shift->dbVal (sql_interp @_) } sub TUWF::Object::dbRowi { shift->dbRow (sql_interp @_) } sub TUWF::Object::dbAlli { shift->dbAll (sql_interp @_) } sub TUWF::Object::dbPagei { shift->dbPage(shift, sql_interp @_) } # Set the last modification time from a string in yyyy-mm-dd format. sub TUWF::Object::resLastMod { my($s, $d) = @_; return if $d !~ /^(\d{4})-(\d{2})-(\d{2})/; my @t = gmtime timegm 0,0,0,$3,$2-1,$1; $s->resHeader('Last-Modified', sprintf '%s, %02d %s %04d %02d:%02d:%02d GMT', (qw|Sun Mon Tue Wed Thu Fri Sat|)[$t[6]], $t[3], (qw|Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec|)[$t[4]], $t[5]+1900, $t[2], $t[1], $t[0]); } # The systems table doesn't change often, so keep an in-memory cache for quick lookups. sub systems { state $s ||= [ map { $_->{full} = $_->{name}.($_->{release}?' '.$_->{release}:''); $_ } tuwf->dbAll('SELECT id, name, release, short FROM systems ORDER BY name, id')->@* ]; } sub sysbyid { state $s ||= { map +($_->{id}, $_), systems->@* } } sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } } # URL-unescape some special characters that may occur in man names. # Firefox seems to escape [ and ] in URLs. It doesn't really have to... sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg } sub shorthash_to_hex { unpack 'H*', pack 'i', $_[0] } # int -> hex sub shorthash_to_int { unpack 'i', pack 'H*', $_[0] } # hex -> int # Subquery returning all packages that have a man page. my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = pv.id)))'; sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg } sub sql_join { my $sep = shift; my @args = map +($sep, $_), @_; sql @args[1..$#args]; } sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' } sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' } # Returns ($pkg_obj, $ver_str, $should_redir) sub pkg_frompath { my($sys_where, $path) = @_; # $path could either be: # $name # $name/$version # $category/$name (deprecated) # $category/$name/$version (deprecated) # $category may contain a slash. We don't have the categories in the # database anymore, so we'll just provide a redirect for anything that # looks like it might have been a category. # $name currently never contains a slash but may do so in the future, so # let's also handle that. my @comp = split '/', $path; my @names = map join('/', @$_), map +([@comp[$_..$#comp]], [@comp[$_..$#comp-1]]), 0..$#comp; my $pkg = tuwf->dbRowi(' SELECT id, system, name FROM', $packages_with_man, 'p WHERE', $sys_where, 'AND name IN', \@names, ' ORDER BY system DESC, length(name) DESC LIMIT 1 '); return (undef, '', 0) if !$pkg->{id}; my $ver = $path =~ m{\Q$pkg->{name}\E/([^/]+)$} ? $1 : ''; ($pkg, $ver, $path !~ /^\Q$pkg->{name}/); } # Get the preferred man page for the given filters. sub man_pref { my($section, $where) = @_; $where = sql_and $where, sql 'm.section LIKE', \(escape_like($section).'%') if length $section; # Criteria to determine a "preferred" man page: # 1. english: English versions of a man page have preference over other locales # 2. pkgver: Newer versions of the same package have preference over older versions # 3. stdloc: Prefer man pages in standard locations # 4. secmatch: Prefer an exact section match # 5. arch: Prefer Arch over other systems (because it tends to be the most up-to-date, and closest to upstreams) # 6. debian: If there's no Arch, prefer latest Debian over other systems (again, tends to be more up-to-date) # (also resolves distro-specific tooling disputes such as https://code.blicky.net/yorhel/manned/issues/1 ) # 7. sysrel: Prefer a more recent system release over an older release # 8. secorder: Lower sections before higher sections (because man does it this way, for some reason) # 9. pkgdate: Prefer more recent packages (cross-distro) # 10. Fall back on shorthash comparison, to ensure the result is stable state $archid = sysbyshort->{arch}{id}; state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id}; tuwf->dbRowi(q{ WITH unfiltered AS ( SELECT m.name, m.section, l.locale, f.shorthash, f.content, f.filename, s AS sys, p AS pkg, v AS ver FROM files f JOIN locales l ON l.id = f.locale JOIN mans m ON m.id = f.man JOIN package_versions v ON v.id = f.pkgver JOIN packages p ON p.id = v.package JOIN systems s ON s.id = p.system WHERE}, $where, q{ ), f_english AS( SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale(locale)) OR is_english_locale(locale) ), f_pkgver AS( SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released) ), f_stdloc AS( SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location(filename)) OR is_standard_man_location(filename) ), f_secmatch AS( SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE section =}, \$section, q{) OR section =}, \$section, q{ ), f_arch AS( SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid ), f_debian AS( SELECT * FROM f_arch WHERE NOT EXISTS(SELECT 1 FROM f_arch WHERE (sys).id = $debid) OR (sys).id = $debid ), f_sysrel AS( SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id) ), f_secorder AS( SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE a.section > b.section) ), f_pkgdate AS( SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released) ) SELECT (pkg).system, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid, name, section, filename, locale, shorthash, content FROM f_pkgdate ORDER BY shorthash LIMIT 1 }); } # Given the name of a man page with optional section, find out the actual name # and section suffix of the man page and the preferred version. sub man_pref_name { my($name, $where) = @_; # Check the .
format first, because ~most~ cases where # there's a collision in the format, the -only page is either # uninteresting or a file name parsing error. if ($name =~ /^(.+)\.([^.]+)$/) { my($n, $s) = ($1,$2); my $man = man_pref $s, sql_and $where, sql 'm.name =', \$n; return ($man, $s) if length $man->{name}; } my $man = man_pref undef, sql_and $where, sql 'm.name =', \$name; length $man->{name} ? ($man, '') : (undef, ''); } sub man_languages { my($name, $sect) = @_; [ map $_->{lang}, tuwf->dbAlli( "SELECT DISTINCT substring(l.locale from '^[^.]+') AS lang FROM files f JOIN mans m ON m.id = f.man JOIN locales l ON l.id = f.locale WHERE m.name =", \$name, 'AND m.section =', \$sect, " ORDER BY substring(l.locale from '^[^.]+') NULLS FIRST" )->@* ]; } sub framework_ { my $content = pop; my(%o) = @_; html_ lang => 'en', sub { head_ sub { link_ rel => 'stylesheet', type => 'text/css', href => '/man.css?7'; title_ $o{title}.' - manned.org'; }; body_ sub { header_ sub { a_ href => '/', 'Manned.org'; form_ action => '/browse/search', method => 'get', sub { input_ type => 'text', name => 'q', id => 'q', placeholder => 'ncdu, btrfs.8, git-*', value => $o{q}, tabindex => 1; input_ type => 'submit', value => 'Search'; } }; main_ class => $o{mainclass}, $content; footer_ sub { span_ sub { a_ href => '/info/about', 'about'; txt_ ' | '; a_ href => 'mailto:manned@yorhel.nl', 'contact'; txt_ ' | '; a_ href => 'https://code.blicky.net/yorhel/manned', 'source'; }; span_ 'all manual pages are copyrighted by their respective authors.'; }; } }; # write the SQL queries as a HTML comment when debugging is enabled # (stolen from VNDB code) # (TODO: Move this into TUWF or something) if(tuwf->debug) { my(@sql_r, @sql_i) = (); for (tuwf->{_TUWF}{DB}{queries}->@*) { my($sql, $params, $time) = @$_; my @params = sort { $a =~ /^[0-9]+$/ && $b =~ /^[0-9]+$/ ? $a <=> $b : $a cmp $b } keys %$params; my $prefix = sprintf " [%6.2fms] ", $time*1000; push @sql_r, sprintf "%s%s | %s", $prefix, $sql, join ', ', map "$_:".DBI::neat($params->{$_}), @params; my $i=1; push @sql_i, $prefix.($sql =~ s/\?/tuwf->dbh->quote($params->{$i++})/egr); } my $sql_r = join "\n", @sql_r; my $sql_i = join "\n", @sql_i; my $modules = join "\n", sort keys %INC; lit_ "\n"; } } sub paginate_ { my($url, $count, $perpage, $p) = @_; return if $count <= $perpage; my sub l_ { my($c)= @_; a_ href => "$url$c", $c if $c != $p; b_ $c if $c == $p; }; my $lp = ceil($count/$perpage); nav_ class => 'paginate', sub { l_ 1 if $p > 1+4; b_ '...' if $p > 1+5; l_ $_ for (($p > 4 ? $p-4 : 1)..($p+4 > $lp ? $lp : $p+4)); b_ '...' if $p < $lp-5; l_ $lp if $p < $lp-4; } } TUWF::set error_404_handler => sub { tuwf->resStatus(404); my $title = 'No manual entry for '.tuwf->reqPath; framework_ title => $title, sub { h1_ $title; p_ 'That is, the page you were looking for doesn\'t exist.'; }; }; TUWF::get '/' => sub { my $stats = tuwf->dbRow('SELECT * FROM stats_cache'); sub num { local $_=shift; 1 while(s/(\d)(\d{3})($|,)/$1,$2/); $_ }; framework_ title => 'Man Pages Archive', mainclass => 'thin', sub { h1_ 'Welcome to Manned.org'; h2_ 'The archive for man pages'; lit_ sprintf <<' _', map num($stats->{$_}), qw|hashes mans files packages|;

Indexing %s versions of %s manual pages found in %s files of %s packages.

Manned.org aims to index all manual pages from a variety of systems, both old and new, and provides a convenient interface for looking up and viewing the various versions of each man page. More information »

_ h2_ 'Indexed systems'; div_ class => 'systems', sub { my %sys; push $sys{$_->{name}}->@*, $_ for systems->@*; div_ sub { my $sys = $sys{$_}; my $img = $sys->[0]{short} =~ s/^(.+)-.+$/$1/r; if(@$sys == 1) { a_ href => "/pkg/$sys->[0]{short}", sub { img_ width => 50, height => 50, src => "images/$img.png"; b_ $sys->[0]{name}; }; return; } img_ width => 50, height => 50, src => "images/$img.png"; div_ sub { b_ $sys->[0]{name}; for(reverse @$sys) { a_ href => "/pkg/$_->{short}", $_->{release}; lit_ ' '; } }; } for sort keys %sys; }; h2_ 'Other relevant sites'; ul_ sub { li_ sub { a_ href => 'https://man7.org/linux/man-pages/index.html', 'man7.org'; txt_ ' - Linux man pages from several upstream projects.' }; li_ sub { a_ href => 'https://manpag.es/', 'ManPag.es'; txt_ ' - Man pages from several Linux distributions.' }; li_ sub { a_ href => 'https://manpage.me/', 'manpage.me'; txt_ ' - Has a large collection as well, including from older Unices.' }; li_ sub { a_ href => 'https://www.mankier.com/', 'ManKier'; txt_ ' - Fedora Rawhide + some manually imported man pages; Nicely formatted and with some unique features.' }; li_ sub { a_ href => 'https://man.cx/', 'man.cx'; txt_ ' - Man pages extracted from Debian testing.' }; li_ sub { a_ href => 'http://man.he.net/', 'man.he.net'; txt_ ' - Also seems to be from a Debian-like system.' }; li_ sub { a_ href => 'https://linux.die.net/man/', 'die.net'; txt_ ' - Seems to be based on an RPM-based Linux distribution.' }; li_ sub { a_ href => 'https://manpages.org/', 'manpages.org'; txt_ ' - Lots of mostly-nicely formatted man pages, no clue about source.' }; li_ sub { a_ href => 'https://www.manpagez.com/', 'manpagez.com'; txt_ ' - Mac OS X, has some GTK-html and texinfo documentation as well.' }; li_ sub { a_ href => 'https://man.archlinux.org/', 'Arch Linux Man Pages' }; li_ sub { a_ href => 'https://manpages.debian.org/', 'Debian Man Pages' }; li_ sub { a_ href => 'https://www.dragonflybsd.org/cgi/web-man', 'DragonFlyBSD Man Pages' }; li_ sub { a_ href => 'https://www.freebsd.org/cgi/man.cgi', 'FreeBSD.org Man Pages' }; li_ sub { a_ href => 'https://man.netbsd.org/', 'NetBSD Man Pages' }; li_ sub { a_ href => 'https://www.openbsd.org/cgi-bin/man.cgi', 'OpenBSD Man Pages' }; li_ sub { a_ href => 'https://manpages.ubuntu.com/', 'Ubuntu Manuals' }; li_ sub { a_ href => 'https://man.voidlinux.org/', 'Void Linux manpages' }; }; }; }; TUWF::get '/info/about' => sub { framework_ title => 'About', mainclass => 'thin', sub { h1_ 'About Manned.org'; lit_ <<' _';

Goal

The state of online indices of manual pages used to be a sad one. Existing sites used to only offer you a single version of a man page: From one origin, and often only in a single language. Most didn't even tell you where the manual actually originated from, making it very hard to determine whether the manual you found applied to your situation and even harder to find a manual for a specific system. Additionally, some sites rendered the manuals in an unreadable way, didn't correctly handle special formatting - like tables - or didn't correctly display non-ASCII characters.

Nowadays there are many good alternatives, but Manned.org was one of the sites created in order to improve that situation. This site aims to index the manual pages from a variaty of systems, both old and new, and allows you to browse through the various versions of a manual page to find out how each system behaves. The manuals are stored in the database as UTF-8, and are passed through groff to render them in (mostly) the same way as they are displayed in your terminal.

This website is open source (AGPL licensed) and written in a combination of Perl and Rust. The entire PostgreSQL database is available for download.

URL format

You can link to specific packages and man pages with several URL formats. These URLs will keep working in the future, so you should not have to worry about eventual dead links.

Man pages

The following URLs are available to refer to an individual man page:

/<name>[.<section>] or /man/<name>[.<section>]
Will try to get the latest and most-close-to-upstream version of a man page. Note that this will fetch the man page from any of the available systems, so may result in confusing scenarios for system-specific documentation. I try to at least keep the selection algorithm stable and deterministic, but can't provide any guarantees. Examples:
/socket
/socket.7
/man/socket.7
/man/<system>/<name>[.<section>]
Will get the latest version of a man page from the given system, e.g.:
/man/ubuntu/rsync
/man/ubuntu-xenial/rsync
/man/<system>/<package>/<name>[.<section>]
Will get the latest version of a man page from the given package, e.g.:
/man/ubuntu-xenial/rsync/rsync
/man/<system>/<package>/<version>/<name>[.<section>]
Will get the man page from a specific package version, e.g.:
/man/ubuntu-xenial/rsync/3.1.1-3ubuntu1/rsync
/man.<language>/...
Adding a language code to the /man/ component will select the man page in the requested language. The man page has to be available in that language, otherwise you will get a 404. Redirects to other languages as fallback may be implemented in the future. English man pages are typically not tagged with a language at all, so explicitely requesting /man.en/... will usually fail. This, too, may be improved in the future. Examples:
/man.de/faked-tcp
/man.de/fedora/rsync.1
/man.<8-hex-digits>/...
Permalink format. Adding the shorthash of the man page to the /man/ component of the above URLs will get that specific man page from the requested system and/or package. The contents of the man page should generally be the same regardless of which system or package is included in the URL, but the UI may provide a different nagivation context. Examples:
/man.910be0ed/ls
/man.910be0ed/fedora/ls
/man.910be0ed/arch/ls
/man.910be0ed/fedora/everything/coreutils-common/ls
/raw...
In all of the above URL formats, you can change /man with /raw to get the raw UTF-8 encoded man page source, e.g.:
/raw/socket.7
/raw/ubuntu-xenial/rsync/3.1.1-3ubuntu1/rsync
/raw.de/faked-tcp
/raw.910be0ed/fedora/ls
/<name>/<8-hex-digits>
Old permalink format for a specific man page (e.g. /ls/910be0ed).

In all URLs where an optional .<section> can be provided, the search is performed as a prefix match. For example, /cat.3 will provide the cat.3tcl man page if no exact cat.3 version is available. Linking to the full section name is also possible: /cat.3tcl. If no section is given and multiple sections are available, the lowest section number is chosen.

Packages

Linking to individual packages is also possible. These pages will show a listing of all manual pages available in the given package.

/pkg/<system>/<package>
For the latest version of a package (e.g. /pkg/arch/coreutils).
/pkg/<system>/<package>/<version>
For a particular version of a package (e.g. /pkg/arch/coreutils/8.25-2).

This site only indexes packages that actually have manual pages, linking to a package that doesn't have any will result in a 404 page.

The indexing process

All man pages are fetched right from the (binary) packages available on the public repositories of Linux distributions. In particular:

Alpine Linux
The main (since 3.0) and community (since 3.3) repositories are indexed for the x86_64 architecture. Indexing started in December 2021, packages and releases not available in the repositories at that time have not been indexed. I haven't found an archive for version 2.x releases yet.
Arch Linux
The core, extra and community repositories are fetched from a local Arch mirror. Indexing started around begin June 2012. The i686 architecture was indexed until November 6th, 2016, packages after that were fetched from from x86_64.
Debian
Historical releases were fetched from http://archive.debian.org/debian/ and http://snapshot.debian.org/. For buzz, rex and bo, we're missing a few man pages because some packages were missing from the repository archives. Where available, all components (main, contrib and non-free) from the $release and $release-updates repositories are indexed.
CentOS
Historical releases were fetched from vault.centos.org, current releases from a local mirror. Where applicable, the following repositories were indexed: addons, centosplus, contrib, extras, os. The i386 architecture was indexed for versions lower than 7.0, since 7.0 the packages from x86_64 are indexed.
Fedora
Historical releases were fetched from archives.fedoraproject.org, current releases from a local repository. Fedora Core 1 till 6 are (incorrectly) called 'Fedora' here. To compensate for that, Fedora 3 till 6 also include the Extras repository. For Fedora 7 and later, the 'Everything' and 'updates' repositories are indexed. The i386 arch was indexed for Fedora 17 and older, the x86_64 arch starting with Fedora 18.
FreeBSD
Historical releases were fetched from http://ftp-archive.freebsd.org/mirror/FreeBSD-Archive/. The base installation tarballs are included in the database as packages prefixed with core-. The package repositories have also been indexed, except for 2.0.5 - 2.2.7 and 3.0 - 3.3 because those were not available on the ftp archive. Only the -RELEASE repositories have been included, which is generally a snapshot of the ports directory around the time of the release. The release dates indicated for many packages were guessed from the file modification dates in the tarball, and may be inaccurate. The i368 arch was indexed for FreeBSD 11.0 and older, the amd64 arch starting with 11.1.
NetBSD
Only the core installation sets have been indexed, pkgsrc is awesome but out of scope for now. The i368 arch was indexed for 5.x and older, the amd64 arch starting with 6.0. Releases before 1.3 only distributed preformatted man pages and have therefore not been indexed. The original roff sources could perhaps be extracted from the source tarballs, but that's a project for another time.
OpenBSD
Only the core file sets have been indexed, no packages (yet). All from amd64. Releases before 5.0 distributed preformatted man pages and have therefore not been indexed.
Ubuntu
Historical releases were fetched from http://old-releases.ubuntu.com/ubuntu/, supported releases from a local mirror. All components (main, universe, restricted and multiverse) from the $release, $release-updates and $release-security repositories are indexed. Indexing started around mid June 2012. All releases before 2017 were indexed from the i386 repositories, starting with 17.04 the amd64 repositories were used.

Only packages for a single architecture (i386 or amd64) are scanned. To my knowledge, packages that come with different manuals for different architectures either don't exist or are extremely rare. It does happen that some packages are not available for all architectures. Usually, though, every package is at least available for the most popular architecture, so hopefully we're not missing out on much.

The repositories are scanned for new packages on a daily basis.

Database download

This site is backed by a PostgreSQL database containing all the man pages. Weekly dumps of the full database are available for download at http://dl.manned.org/dumps/.

Be warned that the download server may not be terribly fast or reliable, so it is advisable to use a client that supports resumption of partial downloads. See wget's -c or curl's -C.

The database schema is "documented" at schema.sql in the git repo. Keep in mind that these dumps don't constitute a stable API and, while this won't happen frequently, incompatible schema changes or Postgres major version bumps will occassionally occur.

Future plans

This site isn't nearly as awesome yet as it could be. Here's some ideas that would be nice to have in the future:

  • Index a few more systems: Gentoo (now that it has official binary packages) and perhaps others.
  • Better browsing and discovery features.
  • Improved, more intelligent, search,
  • apropos(1) emulation(?),
  • Diffs between various versions of a man page,
  • Anchor links within man pages, for easier linking to a section or paragraph,
  • Alternative formats (Text, PDF, more semantic HTML, etc),
  • A command-line client, like man(1) with manned.org as database backend.

All manual pages are copyrighted by their respective authors. The manuals have been fetched from publically available repositories of free and (primarily) open source software. The distributors of said software have put in efforts to only include software and documentation that allows free distribution. Nonetheless, if a manual that does not allow to be redistributed has been inadvertently included in our index, please let me know and I will have it removed as soon as possible.

_ }; }; TUWF::get '/browse/search' => sub { my $q = tuwf->reqGet('q')||''; my $name = $q; my $sect = $name =~ s/^([0-9])\s+// || $name =~ s/\(([a-zA-Z0-9]+)\)$// || $name =~ s/\.([0-9][a-zA-Z0-9]*)$// ? $1 : ''; ($name,$sect) = ($sect,'') if !length $name; # Redirect if we have an exact match my @sectsql = length $sect ? ('AND section =', \$sect) : (); my $man = length $name && tuwf->dbRowi('SELECT name, section FROM mans WHERE name =', \$name, @sectsql); return tuwf->resRedirect("/man/$man->{name}".(length $sect ? ".$man->{section}" : ''), 'temp') if length $man->{name}; # Otherwise, do case-insensitive glob search my $nameq = escape_like(lc $name) =~ tr/?*/_%/r; my $lst = !length $nameq ? [] : tuwf->dbAlli(' SELECT name, section FROM mans WHERE lower(name) LIKE', \$nameq, @sectsql, ' ORDER BY name, section LIMIT 500'); framework_ title => 'Search results for '.$q, mainclass => 'searchres', q => $q, sub { h1_ 'Search results for '.(length $sect ? "$name in section $sect" : $q); if(@$lst) { p_ 'Truncated to the first 500 results.' if @$lst >= 150; ul_ sub { li_ sub { a_ href => "/man/$_->{name}.$_->{section}", $_->{name}; small_ " $_->{section}"; } for @$lst; } } else { p_ 'No results :-('; p_ sub { a_ href => '?q='.uri_escape($name), 'Try again in other sections?' if length $sect; }; } }; }; # Object to represent the various URLs to a man page. # # Parameters: # fmt => man|txt|raw # shorthash => 8-char hex # lang => language code # system => system shortname # package => name of the package # version => package version # man => name of the man page # section => man page section # # URL format: # /$fmt[.$shorthash][.$lang][/$system[[/$category]/$package[/$version]]]/$man[.$section] # # Note that the URL format has some ambiguity: # - $category (deprecated, only used for compatibility with old URLs) and # $package may contain a slash, so a database lookup is required to # disambiguate between URLs with [/$version] and those without. # - $man may contain a dot, so a database lookup is required to disambiguate # between URLs with [.$section] and those without # # $system may also refer to system shortnames without the version suffix (e.g. # 'ubuntu' rather than 'ubuntu-impish'). In that case the man page from the # latest release of that system is chosen. package ManUrl { sub new { my($p,%o)=@_; bless \%o, $p } sub set { my($o,@o)=@_; bless +{%$o,@o}, ref $o } sub mansect { $_[0]{man}.(defined $_[0]{section} ? ".$_[0]{section}" : '') } use overload '""' => sub { my($o)=@_; "/$o->{fmt}".(defined $o->{shorthash} ? ".$o->{shorthash}" : '').(defined $o->{lang} ? ".$o->{lang}" : '') .(defined $o->{system} ? ("/$o->{system}" .(defined $o->{package} ? ("/$o->{package}" .(defined $o->{version} ? "/$o->{version}" : '')) : '')) : '') .'/'.$o->mansect }; }; sub man_nav_ { my($man, $url, $toc, $htmllang) = @_; my @systems = tuwf->dbAlli(' SELECT DISTINCT p.system FROM packages p JOIN package_versions v ON v.package = p.id JOIN files f ON f.pkgver = v.id JOIN mans m ON m.id = f.man WHERE m.name =', \$man->{name}, 'AND m.section =', \$man->{section} )->@*; my @sect = map $_->{section}, tuwf->dbAlli( 'SELECT DISTINCT section FROM mans WHERE name =', \$man->{name}, 'ORDER BY section' )->@*; my $lang = man_languages $man->{name}, $man->{section}; nav_ sub { form_ action => '/sysredir/'.$url->mansect(), method => 'get', onsubmit => 'location.href="/man/"+system_select[system_select.selectedIndex].value+"/'.$url->mansect().'";return false', sub { my %names; push $names{$_->{name}}->@*, $_ for map sysbyid->{$_->{system}}, sort { $b->{system} <=> $a->{system} } @systems; select_ id => 'system_select', name => 'system', sub { for (sort { ($names{$b}->@* == 1) <=> ($names{$a}->@* == 1) || $a cmp $b } keys %names) { my $s = $names{$_}; if (@$s == 1) { option_ value => $s->[0]{short}, selected => $s->[0]{id} == $man->{system}?'':undef, $s->[0]{full}; next; } optgroup_ label => $_, sub { option_ value => $_->{short}, selected => $_->{id} == $man->{system}?'':undef, $_->{full} for @$s; }; } }; input_ type => 'submit', value => 'Go'; } if @systems > 1; # TODO: This is ugly, especially because clicking on a translation or # section, you can end up with a man page that is nowhere close to the # man page you're currently reading. Sections or languages available # for the currently selected system should be highlighted. if(@sect > 1) { b_ 'Sections'; p_ sub { for (@sect) { if($man->{section} eq $_) { i_ $_; } else { a_ href => "/man/$man->{name}.$_", $_; } txt_ ' '; } } } if(@$lang > 1) { b_ 'Languages'; p_ sub { (my $cur = $man->{locale}||'') =~ s/\..*//; for (@$lang) { if(($_||'') eq $cur) { i_ $_ || 'default'; } else { a_ href => $_ ? "/man.$_/$man->{name}.$man->{section}" : "/man/$man->{name}.$man->{section}", $_ || 'default'; } txt_ ' '; } } } if(@$toc > 1) { b_ 'Table of Contents'; ul_ sub { for (0..$#$toc) { li_ sub { a_ @$htmllang, href => sprintf('#head%d', $_+1), sub { lit_ lc $toc->[$_] }; } } } } } } # Replace .so's in man source with the contents (if available in the same # package) or with a reference to the other man page. sub soelim { my($verid, $src) = @_; # tix comes with* a custom(?) macro package. But it looks okay even without loading that. # (* It actually doesn't, the tcllib package appears to have that file, but doesn't '.so' it) $src =~ s/^\.so man.macros$//mg; # Other .so's should be handled by html() $src =~ s{^\.so (.+)$}{ my $path = $1; my $name = (reverse split /\//, $path)[0]; my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : (); $man->{name} # Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE id =", \$man->{content})) : ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]" }emg; $src; } sub man_page { my($man, $url) = @_; tuwf->resLastMod($man->{released}); my $content = tuwf->dbRowi('SELECT encode(hash, \'hex\') AS hash, content FROM contents WHERE id =', \$man->{content}); if($url->{fmt} eq 'raw') { tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s"', $man->{name}, $man->{section}); lit_ $content->{content}; return; } my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid}, $content->{content}; if($url->{fmt} eq 'txt') { # TODO: The 'txt' format is kind of broken right now as it includes our HTML formatting codes. # This feature is a WIP and not advertised at the moment, anyway. tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s.txt"', $man->{name}, $man->{section}); lit_ $fmt; return; } # Prefix links to other man pages with the current system, to ensure we # grab the most relevant man page. # XXX: This is a hack, prefixing is better done directly in ManUtils. my $sys = sysbyid->{$man->{system}}{short}; $fmt =~ s{([^<\n]+?)<\/b>\n}{ push @toc, $1; my $c = @toc; qq{\n$1\n} }eg; my @htmllang = $man->{locale} =~ /^([a-z]{2,3})(?:_([A-Z]{2}))?(?:$|@|\.)/ ? (lang => $1.($2?"-$2":'')) : (); framework_ title => $man->{name}, mainclass => 'manpage', sub { man_nav_ $man, $url, \@toc, \@htmllang; div_ id => 'manbuttons', sub { h1_ $man->{name}; ul_ sub { li_ sub { a_ href => $url->set(fmt => 'raw'), 'source' }; li_ sub { a_ href => $url->set(system => sysbyid->{$man->{system}}{short}, package => undef, shorthash => shorthash_to_hex $man->{shorthash}), 'permalink' }; li_ sub { a_ href => "/ver.".shorthash_to_hex($man->{shorthash}).($man->{locale}?".$man->{locale}":'')."/$man->{name}.$man->{section}", 'versions' }; li_ sub { a_ href => "/loc/$content->{hash}", 'locations' }; } }; pre_ @htmllang, sub { lit_ $fmt }; }; } # /[.section] - short and handy catch-all URL for man pages # // - old permalink format # This one has to go before the other mappings, to ensure that links work for # man pages called 'pkg' or 'man'. TUWF::get qr{/(?[^/]+)(?:/(?[0-9a-f]{8}))?} => sub { my $name = normalize_name tuwf->capture('name'); my $shorthash = tuwf->capture('hash'); my($man, $sect) = man_pref_name $name, $shorthash ? sql 'f.shorthash =', \shorthash_to_int $shorthash : 'true'; return tuwf->resNotFound() if !$man->{name}; man_page $man, ManUrl->new( fmt => 'man', man => length $sect ? $man->{name} : $name, section => length $sect ? $sect : undef, ); }; # ///src - old URL format to get the raw man page TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub { my $name = normalize_name tuwf->capture(1); my $shorthash = tuwf->capture(2); my($man) = man_pref_name $name, sql 'f.shorthash =', \shorthash_to_int $shorthash; return tuwf->resNotFound if !$man->{name}; man_page $man, ManUrl->new(fmt => 'raw', man => $name); }; TUWF::get qr{/(?man|txt|raw)(?:\.(?[a-fA-F0-9]{8}))?(?:\.(?[^/]+))?/(?.+)} => sub { my($fmt, $shorthash, $lang, $path) = tuwf->captures(qw|fmt shorthash lang path|); my @where; my $name = normalize_name($path =~ s{/?([^/]+)$}{} && $1); my $system = $path =~ s{^([^/]+)/?}{} && $1; # $sys can be either a full system 'short' name, or a prefix (e.g. 'debian' meaning 'any debian-* version') if($system) { my $sysid = sysbyshort->{$system}; $sysid = $sysid ? [$sysid->{id}] : [ map sysbyshort->{$_}{id}, grep /^\Q$system\E-/, keys sysbyshort->%* ]; return tuwf->resNotFound if !@$sysid; push @where, sql 'system IN', $sysid; } my($pkg, $ver, $redir) = length $path ? pkg_frompath sql_and(@where), $path : (undef,undef); return tuwf->resNotFound if length $path && !$pkg; push @where, sql 'p.id =', \$pkg->{id} if $pkg; push @where, sql 'v.version =', \$ver if length $ver; push @where, sql 'f.shorthash =', \shorthash_to_int $shorthash if $shorthash; push @where, sql 'l.locale ilike', \(escape_like($lang).'%') if $lang; my($man, $section) = man_pref_name $name, sql_and @where; return tuwf->resNotFound if !$man; my $url = ManUrl->new( fmt => $fmt, shorthash => $shorthash, lang => $lang, system => length $system ? $system : undef, package => $pkg ? $pkg->{name} : undef, version => length $ver ? $ver : undef, man => length $section ? $man->{name} : $name, section => length $section ? $section : undef, ); return tuwf->resRedirect($url, 'perm') if $redir; man_page $man, $url; }; TUWF::get qr{/pkg/([^/]+)} => sub { my $short = tuwf->capture(1); my $sys = sysbyshort->{$short}; return tuwf->resNotFound if !$sys; my $f = tuwf->validate(get => c => { onerror => 'all', enum => [ '0', 'all', 'a'..'z' ] }, p => { onerror => 1, uint => 1, range => [1,200] }, )->data; my $where = sql 'NOT dead AND system =', \$sys->{id}, $f->{c} ne 'all' ? ('AND match_firstchar(name,', \$f->{c}, ')') : (); my $count = tuwf->dbVali('SELECT count(*) FROM', $packages_with_man, 'p WHERE', $where); my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} }, 'SELECT id, system, name FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name' ); framework_ title => $sys->{full}, mainclass => 'pkglist', sub { div_ sub { div_ sub { h1_ $sys->{full}; }; nav_ class => 'charselect', sub { for('all', 0, 'a'..'z') { a_ href => "/pkg/$short?c=$_", $_?uc$_:'#' if $_ ne $f->{c}; b_ $_?uc$_:'#' if $_ eq $f->{c}; } }; }; small_ '(Packages without man pages are not listed)'; paginate_ "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p}; ul_ sub { li_ sub { a_ href => "/pkg/$short/$_->{name}", $_->{name}; } for @$pkg; }; paginate_ "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p}; }; }; # Package info: /pkg/$system[/$category]/$name[/$version]; $category and $name may contain slashes, too. TUWF::get qr{/pkg/([^/]+)/(.+)} => sub { my ($short, $path) = tuwf->captures(1,2); my $sys = sysbyshort->{$short}; return tuwf->resNotFound if !$sys; my($pkg, $ver, $redir) = pkg_frompath(sql('system =', \$sys->{id}), $path); return tuwf->resNotFound if !$pkg; return tuwf->resRedirect("/pkg/$short/$pkg->{name}".($ver?"/$ver":''), 'perm') if $redir; my $vers = tuwf->dbAlli(' SELECT id, version, released FROM package_versions v WHERE package =', \$pkg->{id}, ' AND EXISTS(SELECT 1 FROM files f WHERE f.pkgver = v.id) ORDER BY released DESC' ); my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0]; return tuwf->resNotFound if !$sel; my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data; my $count = tuwf->dbVali('SELECT count(*) FROM files WHERE pkgver =', \$sel->{id}); my $mans = tuwf->dbPagei({ results => 200, page => $p }, ' WITH lst AS ( SELECT f.man, m.name, m.section, f.shorthash, f.filename, l.locale FROM files f JOIN locales l ON l.id = f.locale JOIN mans m ON m.id = f.man WHERE f.pkgver =', \$sel->{id}, ' ), needlang AS ( SELECT man FROM lst GROUP BY man HAVING count(*) > 1 ), needhash AS ( SELECT man, locale FROM lst GROUP BY man, locale HAVING count(*) > 1 ) SELECT name, section, shorthash, filename, locale , EXISTS(SELECT 1 FROM needlang WHERE man = l.man) AS needlang , EXISTS(SELECT 1 FROM needhash WHERE man = l.man AND locale = l.locale) AS needhash FROM lst l ORDER BY name, section, locale, filename '); # Latest version of this package determines last modification date of the page. tuwf->resLastMod($vers->[0]{released}); my $subtitle = " / $pkg->{name}"; my $pkgpath = "$sys->{short}/$pkg->{name}"; framework_ title => "$sys->{full}$subtitle $sel->{version}", mainclass => 'pkgpage', sub { h1_ sub { a_ href => "/pkg/$sys->{short}", $sys->{full}; txt_ $subtitle; }; div_ sub { section_ sub { h2_ 'Versions'; ul_ sub { li_ sub { a_ href => "/pkg/$pkgpath/$_->{version}", $_->{version} if $_ != $sel; b_ " $_->{version}" if $_ == $sel; small_ " $_->{released}"; } for(@$vers); } }; section_ sub { h2_ "Manuals for version $sel->{version}"; paginate_ "/pkg/$pkgpath/$sel->{version}?p=", $count, 200, $p; ul_ sub { li_ sub { # Only add the hash or locale to the URL if it's necessary to select the proper man page. my $ext = $_->{needhash} ? '.'.shorthash_to_hex $_->{shorthash} : $_->{needlang} && length $_->{locale} ? ".$_->{locale}" : ''; a_ href => "/man$ext/$pkgpath/$sel->{version}/$_->{name}.$_->{section}", "$_->{name}($_->{section})"; b_ " $_->{locale}" if $_->{locale}; small_ " $_->{filename}"; } for(@$mans); }; paginate_ "/pkg/$pkgpath/$sel->{version}?p=", $count, 200, $p; }; }; } }; # /browse/ has been moved to /pkg/. TUWF::get qr{/browse/(.+)} => sub { tuwf->resRedirect('/pkg/'.tuwf->capture(1), 'perm') }; # Redirect for the system selection box, for visitors who have disabled JS. TUWF::get qr{/sysredir/([^/]+)} => sub { tuwf->resRedirect('/man/'.(tuwf->reqGet('system')//'arch').'/'.tuwf->capture(1), 'temp') }; # Redirect for a specific language for a man page. I have no idea if anyone # still uses this URL format, but it was supported at some point, so let's keep # it around. TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub { tuwf->resRedirect('/man.'.tuwf->capture(1).'/'.tuwf->capture(2), 'temp') }; TUWF::get qr{/loc/([a-fA-F0-9]{40})}, sub { my $hash = tuwf->capture(1); # There are a few files that have been duplicated far too many times for # this page to be very useful. Add some limits to make sure the page at # least manages to load something. my $maxlisting = 30_000; my $maxpersys = 500; my $l = tuwf->dbAlli(' SELECT p.system, p.name AS package, v.version, f.filename, f.shorthash, m.name, m.section FROM files f JOIN mans m ON m.id = f.man JOIN package_versions v ON v.id = f.pkgver JOIN packages p ON p.id = v.package WHERE f.content = (SELECT id FROM contents WHERE hash = decode(', \$hash, ", 'hex')) ORDER BY p.system DESC, p.name, v.released DESC, f.filename LIMIT ", \$maxlisting ); return tuwf->resNotFound() if !@$l; my %sys; push $sys{ sysbyid->{$_->{system}}{name} }->@*, $_ for @$l; my @mans = uniq sort map "$_->{name}($_->{section})", @$l; framework_ title => 'Locations for '.join(', ', @mans), mainclass => 'locpage', sub { h1_ 'Locations of this man page'; p_ sub { txt_ 'Listing all man pages identified by SHA1 '; code_ $hash; txt_ '.'; if (@$l >= $maxlisting) { br_; b_ sprintf 'WARNING: This file has more than %d locations, the list below is incomplete.', $maxlisting; } br_; br_; txt_ 'Included man pages: '; for (0..$#mans) { txt_ ', ' if $_ > 0; a_ href => "/man.".lc(substr($hash,0,8))."/".($mans[$_] =~ s/\(/./r =~ s/\)//r), $mans[$_]; } txt_ '.'; if (keys %sys > 1) { br_; br_; txt_ 'System index: '; my @sys = sort keys %sys; for (0..$#sys) { txt_ ', ' if $_ > 0; a_ href => "#$sys[$_]", $sys[$_]; } txt_ '.'; } }; for my $sysname (sort keys %sys) { h2_ sub { a_ href => "#$sysname", id => "$sysname", $sysname }; table_ sub { thead_ sub { tr_ sub { td_ 'Release' if sysbyid->{$sys{$sysname}[0]{system}}{release}; td_ 'Package'; td_ 'Path'; }}; my $lastrel = ''; tr_ sub { my $sys = sysbyid->{$_->{system}}; td_ $lastrel eq $sys->{release} ? '' : $sys->{release} if $sys->{release}; $lastrel = $sys->{release}; td_ sub { a_ href => "/pkg/$sys->{short}/$_->{package}/$_->{version}", $_->{package}.'-'.$_->{version}; }; td_ $_->{filename}; } for @{$sys{$sysname}}[0..min $maxpersys, $#{$sys{$sysname}}]; }; small_ sprintf 'List truncated to the first %d results out of %d.', $maxpersys, scalar $sys{$sysname}->@* if $sys{$sysname}->@* > 500; } }; }; # /ver[.$shorthash][.$lang]/$name.$section TUWF::get qr{/ver(?:\.([a-fA-F0-9]{8}))?(?:\.([^/]+))?/([^/]+)\.([0-9a-zA-Z])}, sub { my($shorthash, $lang, $name, $sect) = tuwf->captures(1,2,3,4); $shorthash = $shorthash ? shorthash_to_int $shorthash : -1; ($lang ||= '') =~ s/\..*//; my $l = tuwf->dbAlli(' SELECT p.system, p.name AS package, v.version, v.released, f.shorthash FROM files f JOIN package_versions v ON v.id = f.pkgver JOIN packages p ON p.id = v.package WHERE f.man = (SELECT id FROM mans WHERE name =', \$name, 'AND section =', \$sect, ') AND f.locale IN(SELECT id FROM locales WHERE locale', $lang ? ('ILIKE', \(escape_like($lang).'%')) : ("= ''"), ') ORDER BY p.system DESC, p.name, v.released DESC, f.shorthash '); my $langs = man_languages $name, $sect; my %sys; push $sys{ sysbyid->{$_->{system}}{name} }->@*, $_ for @$l; my $title = "Versions of $name($sect)".($lang ? " in locale $lang" : ''); framework_ title => $title, mainclass => 'verpage', sub { h1_ $title; p_ sub { txt_ 'Available languages: '; for (0..$#{$langs}) { txt_ ', ' if $_ > 0; if(($langs->[$_]||'') eq $lang) { i_ $langs->[$_] || 'default'; } else { a_ href => '/ver'.($langs->[$_]?".$langs->[$_]":'')."/$name.$sect", $langs->[$_] || 'default'; } } txt_ '.'; } if @$langs > 1; p_ sub { txt_ 'System index: '; my @sys = sort keys %sys; for (0..$#sys) { txt_ ', ' if $_ > 0; a_ href => "#$sys[$_]", $sys[$_]; } txt_ '.'; } if keys %sys > 1; for my $sysname (sort keys %sys) { h2_ sub { a_ href => "#$sysname", id => "$sysname", $sysname }; table_ sub { thead_ sub { tr_ sub { td_ 'Release' if sysbyid->{$sys{$sysname}[0]{system}}{release}; td_ 'Package'; td_ 'Date'; td_ 'Hash'; }}; my $lastrel = ''; tr_ sub { my $sys = sysbyid->{$_->{system}}; td_ $lastrel eq $sys->{release} ? '' : $sys->{release} if $sys->{release}; $lastrel = $sys->{release}; td_ sub { a_ href => "/pkg/$sys->{short}/$_->{package}/$_->{version}", $_->{package}.'-'.$_->{version}; }; td_ $_->{released}; td_ class => 'sh', sub { my $hex = shorthash_to_hex $_->{shorthash}; txt_ $hex if $_->{shorthash} == $shorthash; a_ href => "/man.$hex/$sys->{short}/$name.$sect", $hex if $_->{shorthash} != $shorthash; }; } for $sys{$sysname}->@*; }; } }; }; TUWF::run();