#!/usr/bin/perl use v5.26; use warnings; use TUWF ':html5_', ':xml'; use POSIX 'ceil'; use SQL::Interp 'sql', 'sql_interp'; use Time::Local 'timegm'; use Cwd 'abs_path'; our $ROOT; BEGIN { ($ROOT = abs_path $0) =~ s{/www/index\.pl$}{}; } # Force the pure-perl AnyEvent backend; More lightweight and we don't need the # performance of EV. Fixes an issue with subprocess spawning under TUWF's # built-in web server that I haven't been able to track down. BEGIN { $ENV{PERL_ANYEVENT_MODEL} = 'Perl'; } use lib "$ROOT/lib/ManUtils/inst/lib/perl5"; use ManUtils; TUWF::set( logfile => $ENV{TUWF_LOG}, db_login => [undef, undef, undef], debug => $ENV{TUWF_DEBUG}, xml_pretty => 0, log_slow_pages => 500, ); TUWF::hook before => sub { if(tuwf->{_TUWF}{http}) { if(tuwf->resFile("$ROOT/www", tuwf->reqPath)) { tuwf->resHeader('Cache-Control' => 'max-age=31536000'); tuwf->done; } } }; # TODO: Add SQL::Interp support to TUWF directly, in some form. sub TUWF::Object::dbExeci { shift->dbExec(sql_interp @_) } sub TUWF::Object::dbVali { shift->dbVal (sql_interp @_) } sub TUWF::Object::dbRowi { shift->dbRow (sql_interp @_) } sub TUWF::Object::dbAlli { shift->dbAll (sql_interp @_) } sub TUWF::Object::dbPagei { shift->dbPage(shift, sql_interp @_) } # Set the last modification time from a string in yyyy-mm-dd format. sub TUWF::Object::resLastMod { my($s, $d) = @_; return if $d !~ /^(\d{4})-(\d{2})-(\d{2})/; my @t = gmtime timegm 0,0,0,$3,$2-1,$1; $s->resHeader('Last-Modified', sprintf '%s, %02d %s %04d %02d:%02d:%02d GMT', (qw|Sun Mon Tue Wed Thu Fri Sat|)[$t[6]], $t[3], (qw|Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec|)[$t[4]], $t[5]+1900, $t[2], $t[1], $t[0]); } # The systems table doesn't change often, so keep an in-memory cache for quick lookups. sub systems { state $s ||= [ map { $_->{full} = $_->{name}.($_->{release}?' '.$_->{release}:''); $_ } tuwf->dbAll('SELECT id, name, release, short FROM systems ORDER BY name, id')->@* ]; } sub sysbyid { state $s ||= { map +($_->{id}, $_), systems->@* } } sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } } # URL-unescape some special characters that may occur in man names. # Firefox seems to escape [ and ] in URLs. It doesn't really have to... sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg } # Subquery returning all packages that have a man page. my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id)))'; sub escape_like { $_[0] =~ s/([_%\\])/\\$1/rg } sub sql_join { my $sep = shift; my @args = map +($sep, $_), @_; sql @args[1..$#args]; } sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' } sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' } # Subquery to match $sql_expr::bytea against a $prefix (hex string). Hopefully indexable. sub sql_hash_prefix { my($sql_expr, $prefix) = @_; my $esc = unpack 'H*', escape_like pack 'H*', $prefix; sql '(', $sql_expr, "like ('\\x$esc'::bytea||'%'))" } sub pkg_frompath { my($sys_where, $path) = @_; # $path should be "$category/$name" or "$category/$name/$version", since # $category may contain a slash, let's try both options. my sub lookup { my($cat, $name) = @_; tuwf->dbRowi('SELECT id, system, name, category FROM', $packages_with_man, 'p WHERE', $sys_where, 'AND category =', \$cat, 'AND name =', \$name); } # $category/$name # e.g. contrib/games/alien if($path =~ m{^(.+)/([^/]+)$}) { my $pkg = lookup $1, $2; return ($pkg, '') if $pkg->{id}; } # $category/$name/$version # e.g. contrib/games/alien/10.2 if($path =~ m{^(.+)/([^/]+)/([^/]+)$}) { my $pkg = lookup $1, $2; return ($pkg, $3) if $pkg->{id}; } (undef, ''); } # Get the preferred man page for the given filters. sub man_pref { my($section, $where) = @_; $where = sql_and $where, sql 'm.section LIKE', \(escape_like($section).'%') if length $section; # Criteria to determine a "preferred" man page: # 1. english: English versions of a man page have preference over other locales # 2. pkgver: Newer versions of the same package have preference over older versions # 3. stdloc: Prefer man pages in standard locations # 4. secmatch: Prefer an exact section match # 5. arch: Prefer Arch over other systems (because it tends to be the most up-to-date, and closest to upstreams) # 6. debian: If there's no Arch, prefer latest Debian over other systems (again, tends to be more up-to-date) # (also resolves distro-specific tooling disputes such as https://code.blicky.net/yorhel/manned/issues/1 ) # 7. sysrel: Prefer a more recent system release over an older release # 8. secorder: Lower sections before higher sections (because man does it this way, for some reason) # 9. pkgdate: Prefer more recent packages (cross-distro) # 10. Fall back on hash comparison, to ensure the result is stable state $archid = sysbyshort->{arch}{id}; state $debid = (sort { $b->{id} <=> $a->{id} } grep $_->{short} =~ /^debian-/, systems->@*)[0]{id}; tuwf->dbRowi(q{ WITH unfiltered AS ( SELECT s AS sys, p AS pkg, v AS ver, m AS man FROM man m JOIN package_versions v ON v.id = m.package JOIN packages p ON p.id = v.package JOIN systems s ON s.id = p.system WHERE}, $where, q{ ), f_english AS( SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale) ), f_pkgver AS( SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released) ), f_stdloc AS( SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename) ), f_secmatch AS( SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section =}, \$section, q{) OR (man).section =}, \$section, q{ ), f_arch AS( SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', qq{WHERE (sys).id = $archid) OR (sys).id = $archid ), f_debian AS( SELECT * FROM f_arch WHERE NOT EXISTS(SELECT 1 FROM f_arch WHERE (sys).id = $debid) OR (sys).id = $debid ), f_sysrel AS( SELECT * FROM f_debian a WHERE NOT EXISTS(SELECT 1 FROM f_debian b WHERE (a.sys).name = (b.sys).name AND (a.sys).id < (b.sys).id) ), f_secorder AS( SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section) ), f_pkgdate AS( SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released) ) SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid, (man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash FROM f_pkgdate ORDER BY (man).hash LIMIT 1 }); } # Given the name of a man page with optional section, find out the actual name # and section prefix of the man page and the preferred version. sub man_pref_name { my($name, $where) = @_; my $man = man_pref undef, sql_and $where, sql 'm.name =', \$name; return ($man, '') if length $man->{name}; return (undef, '') if $name !~ s/\.([^.]+)$// || !length $name; my $section = $1; $man = man_pref $section, sql_and $where, sql 'm.name =', \$name; length $man->{name} ? ($man, $section) : (undef, ''); } sub framework_ { my $content = pop; my(%o) = @_; html_ sub { head_ sub { link_ rel => 'stylesheet', type => 'text/css', href => '/man.css?5'; title_ $o{title}.' - manned.org'; }; body_ sub { header_ sub { a_ href => '/', 'Manned.org'; form_ action => '/browse/search', method => 'get', sub { input_ type => 'text', name => 'q', id => 'q', tabindex => 1; input_ type => 'submit', value => 'Search'; } }; main_ class => $o{mainclass}, $content; footer_ sub { span_ sub { a_ href => '/info/about', 'about'; txt_ ' | '; a_ href => 'mailto:contact@manned.org', 'contact'; txt_ ' | '; a_ href => 'https://code.blicky.net/yorhel/manned', 'source'; }; span_ 'all manual pages are copyrighted by their respective authors.'; }; script_ type => 'text/javascript', src => '/man.js', ''; } }; # write the SQL queries as a HTML comment when debugging is enabled # (stolen from VNDB code) # (TODO: Move this into TUWF or something) if(tuwf->debug) { my(@sql_r, @sql_i) = (); for (tuwf->{_TUWF}{DB}{queries}->@*) { my($sql, $params, $time) = @$_; my @params = sort { $a =~ /^[0-9]+$/ && $b =~ /^[0-9]+$/ ? $a <=> $b : $a cmp $b } keys %$params; my $prefix = sprintf " [%6.2fms] ", $time*1000; push @sql_r, sprintf "%s%s | %s", $prefix, $sql, join ', ', map "$_:".DBI::neat($params->{$_}), @params; my $i=1; push @sql_i, $prefix.($sql =~ s/\?/tuwf->dbh->quote($params->{$i++})/egr); } my $sql_r = join "\n", @sql_r; my $sql_i = join "\n", @sql_i; my $modules = join "\n", sort keys %INC; lit_ "\n"; } } sub paginate_ { my($url, $count, $perpage, $p) = @_; return if $count <= $perpage; my sub l_ { my($c)= @_; a_ href => "$url$c", $c if $c != $p; b_ $c if $c == $p; }; my $lp = ceil($count/$perpage); nav_ class => 'paginate', sub { l_ 1 if $p > 1+4; b_ '...' if $p > 1+5; l_ $_ for (($p > 4 ? $p-4 : 1)..($p+4 > $lp ? $lp : $p+4)); b_ '...' if $p < $lp-5; l_ $lp if $p < $lp-4; } } TUWF::set error_404_handler => sub { tuwf->resStatus(404); my $title = 'No manual entry for '.tuwf->reqPath; framework_ title => $title, sub { h1_ $title; p_ 'That is, the page you were looking for doesn\'t exist.'; }; }; TUWF::get '/' => sub { my $stats = tuwf->dbRow('SELECT * FROM stats_cache'); sub num { local $_=shift; 1 while(s/(\d)(\d{3})($|,)/$1,$2/); $_ }; framework_ title => 'Man Pages Archive', mainclass => 'thin', sub { h1_ 'Welcome to Manned.org'; h2_ 'The archive for man pages'; lit sprintf <<' _', map num($stats->{$_}), qw|hashes mans files packages|;
Indexing %s versions of %s manual pages found in %s files of %s packages.
Manned.org aims to index all manual pages from a variety of systems, both old and new, and provides a convenient interface for looking up and viewing the various versions of each man page. More information »
_ h2_ 'Indexed systems'; div_ class => 'systems', sub { my %sys; push $sys{$_->{name}}->@*, $_ for systems->@*; div_ sub { my $sys = $sys{$_}; my $img = $sys->[0]{short} =~ s/^(.+)-.+$/$1/r; if(@$sys == 1) { a_ href => "/pkg/$sys->[0]{short}", sub { img_ src => "images/$img.png"; b_ $sys->[0]{name}; }; return; } img_ src => "images/$img.png"; div_ sub { b_ $sys->[0]{name}; for(reverse @$sys) { a_ href => "/pkg/$_->{short}", $_->{release}; lit_ ' '; } }; } for sort keys %sys; }; h2_ 'Other relevant sites'; ul_ sub { li_ sub { a_ href => 'http://man7.org/linux/man-pages/index.html', 'man7.org'; txt_ ' - Linux man pages from several upstream projects.' }; li_ sub { a_ href => 'https://manpag.es/', 'ManPag.es'; txt_ ' - Man pages from several Linux distributions.' }; li_ sub { a_ href => 'https://www.mankier.com/', 'ManKier'; txt_ ' - Fedora Rawhide + some manually imported man pages; Nicely formatted and with some unique features.' }; li_ sub { a_ href => 'https://man.cx/', 'man.cx'; txt_ ' - Man pages extracted from Debian testing.' }; li_ sub { a_ href => 'http://man.he.net/', 'man.he.net'; txt_ ' - Also seems to be from a Debian-like system.' }; li_ sub { a_ href => 'https://linux.die.net/man/', 'die.net'; txt_ ' - Seems to be based on an RPM-based Linux distribution.' }; li_ sub { a_ href => 'http://manpages.org/', 'manpages.org'; txt_ ' - Lots of mostly-nicely formatted man pages, no clue about source.' }; li_ sub { a_ href => 'https://www.manpagez.com/', 'manpagez.com'; txt_ ' - Mac OS X, has some GTK-html and texinfo documentation as well.' }; li_ sub { a_ href => 'https://man.archlinux.org/', 'Arch Linux Man Pages' }; li_ sub { a_ href => 'https://manpages.debian.org/', 'Debian Man Pages' }; li_ sub { a_ href => 'https://www.dragonflybsd.org/cgi/web-man', 'DragonFlyBSD Man Pages' }; li_ sub { a_ href => 'https://www.freebsd.org/cgi/man.cgi', 'FreeBSD.org Man Pages' }; li_ sub { a_ href => 'https://netbsd.gw.com/cgi-bin/man-cgi', 'NetBSD Man Pages' }; li_ sub { a_ href => 'https://www.openbsd.org/cgi-bin/man.cgi', 'OpenBSD Man Pages' }; li_ sub { a_ href => 'https://manpages.ubuntu.com/', 'Ubuntu Manuals' }; li_ sub { a_ href => 'https://man.voidlinux.org/', 'Void Linux manpages' }; }; }; }; TUWF::get '/info/about' => sub { framework_ title => 'About', mainclass => 'thin', sub { h1_ 'About Manned.org'; lit <<' _';The state of online indices of manual pages used to be a sad one. Existing sites used to only offer you a single version of a man page: From one origin, and often only in a single language. Most didn't even tell you where the manual actually originated from, making it very hard to determine whether the manual you found actually applied to your situation and even harder to find a manual for a specific system. Additionally, some sites rendered the manuals in an unreadable way, didn't correctly handle special formatting - like tables - or didn't correctly display non-ASCII characters.
Nowadays there are many good alternatives, but Manned.org was one of the sites created in order to improve that situation. This site aims to index the manual pages from a variaty of systems, both old and new, and allows you to browse through the various versions of a manual page to find out how each system behaves. The manuals are stored in the database as UTF-8, and are passed through groff to render them in (mostly) the same way as they are displayed in your terminal.
This website is open source (MIT licensed) and written in a combination of Perl and Rust. The entire PostgreSQL database is available for download.
You can link to specific packages and man pages with several URL formats. These URLs will keep working in the future, so you should not have to worry about eventual dead links.
The following URLs are available to refer to an individual man page:
/<name>/<8-hex-digits>/<name>[.<section>]/man/<system>/<name>[.<section>]/man/<system>/<category>/<package>/<name>[.<section>]/man/<system>/<category>/<package>/<version>/<name>[.<section>]Currently, the last three URLs will perform a redirect to the
appropriate permalink URL, but this may change in the future.
In all URLs where an optional .<section> can be provided,
the search is performed as a prefix match. For example, /cat.3 will provide the cat.3tcl man page if
no exact cat.3 version is available. Linking to the full
section name is also possible: /cat.3tcl. If no
section is given and multiple sections are available, the lowest section
number is chosen.
Linking to individual packages is also possible. These pages will show a listing of all manual pages available in the given package.
/pkg/<system>/<category>/<package>/pkg/<system>/<category>/<package>/<version>Note that this site only indexes packages that actually have manual pages; Linking to a package that doesn't have any will result in a 404 page.
All man pages are fetched right from the (binary) packages available on the
public repositories of Linux distributions. In particular:
Only packages for a single architecture (i386 or amd64) are scanned. To my
knowledge, packages that come with different manuals for different
architectures either don't exist or are extremely rare. It does happen that
some packages are not available for all architectures. Usually, though,
every package is at least available for the most popular architecture, so
hopefully we're not missing out on much.
The repositories are scanned for new packages on a daily basis.
This site is backed by a PostgreSQL database containing all the man pages.
Weekly dumps of the full database are available for download at
http://dl.manned.org/dumps/.
Be warned that the download server may not be terribly reliable, so it is
advisable to use a client that supports resumption of partial downloads. See
wget's -c or curl's -C.
The database schema is "documented" at schema.sql
in the git repo. Note that these dumps don't constitute a stable API and,
while this won't happen frequently, incompatible schema changes or Postgres
major version bumps may occur.
Suggestions for new (or old) systems to index are welcome.
It would be great to index a few more non-Linux systems such as other BSDs,
Solaris/Illumos and Mac OS X. Unfortunately, those don't always follow a
binary package based approach, or are otherwise less easy to properly index.
In general, systems that follow an entirely source-based distribution
approach can't be indexed without compiling everything. Since that is both
very resource-heavy and open to security issues, there are no plans to
include manuals from such systems at the moment. So unless someone comes
with a solution I hadn't thought of yet, there won't be any Gentoo manuals
here. :-(
This site isn't nearly as awesome yet as it could be. Here's some ideas that would be nice to have in the future:
All manual pages are copyrighted by their respective authors. The manuals have been fetched from publically available repositories of free and (primarily) open source software. The distributors of said software have put in efforts to only include software and documentation that allows free distribution. Nonetheless, if a manual that does not allow to be redistributed has been inadvertently included in our index, please let me know and I will have it removed as soon as possible.
_ }; }; # Very simple (and fast) prefix match. sub search_man { my($q, $limit) = @_; my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : ''; my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : ''; return !$name ? [] : tuwf->dbAll( 'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?', { 'lower(name) LIKE ?' => escape_like(lc $name).'%', $sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (), }, $limit ); } TUWF::get '/browse/search' => sub { my $q = tuwf->reqGet('q')||''; my $man = search_man $q, 150; return tuwf->resRedirect("/$man->[0]{name}.$man->[0]{section}", 'temp') if @$man == 1; framework_ title => 'Search results for '.$q, mainclass => 'searchres', sub { h1_ 'Search results for '.$q; # Package search would also be useful. p_ 'Note: This is just a simple case-insensitive prefix match on the man names. In the future we\'ll have more powerful search functionality. Hopefully.'; if(@$man) { ul_ sub { li_ sub { a_ href => "/$_->{name}.$_->{section}", $_->{name}; small_ " $_->{section}"; } for @$man; } } else { p_ 'No results :-('; } }; }; TUWF::get '/xml/search.xml' => sub { my $q = tuwf->reqGet('q')||''; my $man = search_man $q, 20; tuwf->resHeader('Content-Type' => 'text/xml; charset=UTF-8'); xml; tag 'results', sub { tag 'item', id => "$_->{name}.$_->{section}", %$_, undef for @$man; }; }; TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub { my $name = normalize_name tuwf->capture(1); my $hash = tuwf->capture(2); my $nfo = tuwf->dbRowi(' SELECT m.name, m.section, v.released, c.content FROM man m JOIN package_versions v ON v.id = m.package JOIN contents c ON c.hash = m.hash WHERE m.name =', \$name, 'AND', sql_hash_prefix('m.hash', $hash), ' LIMIT 1' ); return tuwf->resNotFound if !$nfo->{name}; tuwf->resLastMod($nfo->{released}); tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s"', $nfo->{name}, $nfo->{section}); lit $nfo->{content}; }; sub _man_nav { my($man, $toc) = @_; my @sect = map $_->{section}, tuwf->dbAlli( 'SELECT DISTINCT section FROM man WHERE name =', \$man->{name}, 'ORDER BY section' )->@*; my @lang = map $_->{lang}, tuwf->dbAlli( "SELECT DISTINCT substring(locale from '^[^.]+') AS lang FROM man WHERE name =", \$man->{name}, 'AND section =', \$man->{section}, " ORDER BY substring(locale from '^[^.]+') NULLS FIRST" )->@*; return if !@sect && !@lang && !@$toc; # TODO: This is ugly, especially because clicking on a translation or # section, you can end up with a man page that is nowhere close to the man # page you're currently reading. Opening a version selector box might be a # better alternative. div_ id => 'nav', sub { if(@sect > 1) { b_ 'Sections'; p_ sub { for (@sect) { if($man->{section} eq $_) { i_ $_; } else { a_ href => "/$man->{name}.$_", $_; } txt_ ' '; } } } if(@lang > 1) { b_ 'Languages'; p_ sub { (my $cur = $man->{locale}||'') =~ s/\..*//; for (@lang) { if(($_||'') eq $cur) { i_ $_ || 'default'; } else { a_ href => $_ ? "/lang/$_/$man->{name}.$man->{section}" : "/$man->{name}.$man->{section}", $_ || 'default'; } txt_ ' '; } } } if(@$toc > 1) { b_ 'Table of Contents'; ul_ sub { for (0..$#$toc) { li_ sub { a_ href => sprintf('#head%d', $_+1), lc $toc->[$_]; } } } } } } # Replace .so's in man source with the contents (if available in the same # package) or with a reference to the other man page. sub soelim { my($verid, $src) = @_; # tix comes with* a custom(?) macro package. But it looks okay even without loading that. # (* It actually doesn't, the tcllib package appears to have that file, but doesn't '.so' it) $src =~ s/^\.so man.macros$//mg; # Other .so's should be handled by html() $src =~ s{^\.so (.+)$}{ my $path = $1; my $name = (reverse split /\//, $path)[0]; my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : (); $man->{name} # Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')")) : ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]" }emg; $src; } # This one has to go before the other mappings, to ensure that links work for # man pages called 'pkg' or 'man'. This also means that we can't have a # system named 8 hex digits, but at least that's easy to guarantee. :) TUWF::get qr{/(?