manned/www/index.pl
Yorhel a96aeb6310 Fix /man/* redirects
Forgot to update these functions in the rewrite.
2021-06-09 16:56:43 +02:00

1024 lines
44 KiB
Prolog
Executable file

#!/usr/bin/perl
use v5.26;
use warnings;
use TUWF ':html_', ':xml';
use POSIX 'ceil';
use SQL::Interp 'sql', 'sql_interp';
use Time::Local 'timegm';
use Cwd 'abs_path';
our $ROOT;
BEGIN { ($ROOT = abs_path $0) =~ s{/www/index\.pl$}{}; }
# Force the pure-perl AnyEvent backend; More lightweight and we don't need the
# performance of EV. Fixes an issue with subprocess spawning under TUWF's
# built-in web server that I haven't been able to track down.
BEGIN { $ENV{PERL_ANYEVENT_MODEL} = 'Perl'; }
use lib "$ROOT/lib/ManUtils/inst/lib/perl5";
use ManUtils;
TUWF::set(
logfile => $ENV{TUWF_LOG},
db_login => [undef, undef, undef],
debug => $ENV{TUWF_DEBUG},
xml_pretty => 0,
log_slow_pages => 500,
);
TUWF::hook before => sub {
if(tuwf->{_TUWF}{http}) {
if(tuwf->resFile("$ROOT/www", tuwf->reqPath)) {
tuwf->resHeader('Cache-Control' => 'max-age=31536000');
tuwf->done;
}
}
};
# TODO: Add SQL::Interp support to TUWF directly, in some form.
sub TUWF::Object::dbExeci { shift->dbExec(sql_interp @_) }
sub TUWF::Object::dbVali { shift->dbVal (sql_interp @_) }
sub TUWF::Object::dbRowi { shift->dbRow (sql_interp @_) }
sub TUWF::Object::dbAlli { shift->dbAll (sql_interp @_) }
sub TUWF::Object::dbPagei { shift->dbPage(shift, sql_interp @_) }
# Set the last modification time from a string in yyyy-mm-dd format.
sub TUWF::Object::resLastMod {
my($s, $d) = @_;
return if $d !~ /^(\d{4})-(\d{2})-(\d{2})/;
my @t = gmtime timegm 0,0,0,$3,$2-1,$1;
$s->resHeader('Last-Modified', sprintf '%s, %02d %s %04d %02d:%02d:%02d GMT',
(qw|Sun Mon Tue Wed Thu Fri Sat|)[$t[6]], $t[3],
(qw|Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec|)[$t[4]],
$t[5]+1900, $t[2], $t[1], $t[0]);
}
# The systems table doesn't change often, so keep an in-memory cache for quick lookups.
sub systems {
state $s ||= [ map {
$_->{full} = $_->{name}.($_->{release}?' '.$_->{release}:'');
$_
} tuwf->dbAll('SELECT id, name, release, short, relorder FROM systems ORDER BY name, relorder')->@* ];
}
sub sysbyid { state $s ||= { map +($_->{id}, $_), systems->@* } }
sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } }
# URL-unescape some special characters that may occur in man names.
# Firefox seems to escape [ and ] in URLs. It doesn't really have to...
sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg }
# Subquery returning all packages that have a man page.
my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id)))';
sub escape_like { $_[0] =~ s/([_%])/\\$1/rg }
sub sql_join {
my $sep = shift;
my @args = map +($sep, $_), @_;
sql @args[1..$#args];
}
sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' }
sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' }
# Subquery to match $sql_expr::bytea against a $prefix (hex string). Hopefully indexable.
sub sql_hash_prefix {
my($sql_expr, $prefix) = @_;
my $esc = unpack 'H*', escape_like pack 'H*', $prefix;
sql '(', $sql_expr, "like ('\\x$esc'::bytea||'%'))"
}
sub pkg_frompath {
my($sys_where, $path) = @_;
# $path should be "$category/$name" or "$category/$name/$version", since
# $category may contain a slash, let's try both options.
my sub lookup {
my($cat, $name) = @_;
tuwf->dbRowi('SELECT id, system, name, category FROM', $packages_with_man, 'p WHERE', $sys_where, 'AND category =', \$cat, 'AND name =', \$name);
}
# $category/$name
# e.g. contrib/games/alien
if($path =~ m{^(.+)/([^/]+)$}) {
my $pkg = lookup $1, $2;
return ($pkg, '') if $pkg->{id};
}
# $category/$name/$version
# e.g. contrib/games/alien/10.2
if($path =~ m{^(.+)/([^/]+)/([^/]+)$}) {
my $pkg = lookup $1, $2;
return ($pkg, $3) if $pkg->{id};
}
(undef, '');
}
# Get the preferred man page for the given filters.
sub man_pref {
my($section, $where) = @_;
$where = sql_and $where, sql 'm.section LIKE', \(escape_like($section).'%') if length $section;
# Criteria to determine a "preferred" man page:
# 1. english: English versions of a man page have preference over other locales
# 2. pkgver: Newer versions of the same package have preference over older versions
# 3. stdloc: Prefer man pages in standard locations
# 4. secmatch: Prefer an exact section match
# 5. arch: Prefer Arch over other systems (because it tends to be the most up-to-date, and closest to upstreams)
# 6. ubuntu: If there's no Arch, prefer Ubuntu over other systems (again, tends to be more up-to-date)
# (also resolves distro-specific tooling disputes such as https://code.blicky.net/yorhel/manned/issues/1 )
# 7. sysrel: Prefer a later system release over an older release
# 8. secorder: Lower sections before higher sections (because man does it this way, for some reason)
# 9. pkgdate: Prefer more recent packages (cross-distro)
# 10. Fall back on hash comparison, to ensure the result is stable
tuwf->dbRowi(q{
WITH unfiltered AS (
SELECT s AS sys, p AS pkg, v AS ver, m AS man
FROM man m
JOIN package_versions v ON v.id = m.package
JOIN packages p ON p.id = v.package
JOIN systems s ON s.id = p.system
WHERE}, $where, q{
), f_english AS(
SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale)
), f_pkgver AS(
SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released)
), f_stdloc AS(
SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename)
), f_secmatch AS(
SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section =}, \$section, q{) OR (man).section =}, \$section, q{
), f_arch AS(
SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', q{WHERE (sys).id = 1) OR (sys).id = 1
), f_ubuntu AS(
SELECT * FROM f_arch WHERE NOT EXISTS(SELECT 1 FROM f_arch WHERE (sys).name = 'Ubuntu') OR (sys).name = 'Ubuntu'
), f_sysrel AS(
SELECT * FROM f_ubuntu a WHERE NOT EXISTS(SELECT 1 FROM f_ubuntu b WHERE (a.sys).name = (b.sys).name AND (a.sys).relorder < (b.sys).relorder)
), f_secorder AS(
SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section)
), f_pkgdate AS(
SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released)
)
SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid,
(man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash
FROM f_pkgdate ORDER BY (man).hash LIMIT 1
});
}
# Given the name of a man page with optional section, find out the actual name
# and section prefix of the man page and the preferred version.
sub man_pref_name {
my($name, $where) = @_;
my $man = man_pref undef, sql_and $where, sql 'm.name =', \$name;
return ($man, '') if length $man->{name};
return (undef, '') if $name !~ s/\.([^.]+)$// || !length $name;
my $section = $1;
$man = man_pref $section, sql_and $where, sql 'm.name =', \$name;
length $man->{name} ? ($man, $section) : (undef, '');
}
sub framework_ {
my $content = pop;
my(%o) = @_;
html_ sub {
head_ sub {
link_ rel => 'stylesheet', type => 'text/css', href => '/man.css?4';
title_ $o{title}.' - manned.org';
};
body_ sub {
div_ id => 'header', sub {
a_ href => '/', 'manned.org';
form_ action => '/browse/search', method => 'get', sub {
input_ type => 'text', name => 'q', id => 'q', tabindex => 1;
input_ type => 'submit', value => ' ';
}
};
div_ id => 'body', sub {
$content->();
br_ style => 'clear: both';
};
div_ id => 'footer', sub {
lit_ 'All manual pages are copyrighted by their respective authors.
| <a href="/info/about">About manned.org</a>
| <a href="mailto:contact@manned.org">Contact</a>
| <a href="https://code.blicky.net/yorhel/manned">Source</a>';
};
script_ type => 'text/javascript', src => '/man.js', '';
}
};
# write the SQL queries as a HTML comment when debugging is enabled
# (stolen from VNDB code)
# (TODO: Move this into TUWF or something)
if(tuwf->debug) {
my(@sql_r, @sql_i) = ();
for (tuwf->{_TUWF}{DB}{queries}->@*) {
my($sql, $params, $time) = @$_;
my @params = sort { $a =~ /^[0-9]+$/ && $b =~ /^[0-9]+$/ ? $a <=> $b : $a cmp $b } keys %$params;
my $prefix = sprintf " [%6.2fms] ", $time*1000;
push @sql_r, sprintf "%s%s | %s", $prefix, $sql, join ', ', map "$_:".DBI::neat($params->{$_}), @params;
my $i=1;
push @sql_i, $prefix.($sql =~ s/\?/tuwf->dbh->quote($params->{$i++})/egr);
}
my $sql_r = join "\n", @sql_r;
my $sql_i = join "\n", @sql_i;
my $modules = join "\n", sort keys %INC;
lit_ "\n<!--\nSQL (with placeholders):\n$sql_r\n\nSQL (interpolated, possibly buggy):\n$sql_i\n\nMODULES:\n$modules\n-->";
}
}
sub paginate_ {
my($url, $count, $perpage, $p) = @_;
return if $count <= $perpage;
my sub l_ {
my($c)= @_;
a_ href => "$url$c", $c if $c != $p;
b_ $c if $c == $p;
};
my $lp = ceil($count/$perpage);
p_ class => 'paginate', sub {
l_ 1 if $p > 1+4;
b_ '...' if $p > 1+5;
l_ $_ for (($p > 4 ? $p-4 : 1)..($p+4 > $lp ? $lp : $p+4));
b_ '...' if $p < $lp-5;
l_ $lp if $p < $lp-4;
}
}
TUWF::set error_404_handler => sub {
tuwf->resStatus(404);
my $title = 'No manual entry for '.tuwf->reqPath;
framework_ title => $title, sub {
h1_ $title;
p_ 'That is, the page you were looking for doesn\'t exist.';
};
};
TUWF::get '/' => sub {
my $stats = tuwf->dbRow('SELECT * FROM stats_cache');
sub num { local $_=shift; 1 while(s/(\d)(\d{3})($|,)/$1,$2/); $_ };
framework_ title => 'Man Pages Archive', sub {
h1_ 'Man Pages Archive';
p_ class => 'txt', sub {
lit sprintf <<' _', map num($stats->{$_}), qw|hashes mans files packages|;
Indexing <b>%s</b> versions of <b>%s</b> manual pages found in <b>%s</b>
files of <b>%s</b> packages.
<br /><br />
Manned.org aims to index all manual pages from a variety of systems, both
old and new, and provides a convenient interface for looking up and viewing
the various versions of each man page.
<a href="/info/about">About manned.org &raquo;</a>
_
};
h2_ 'Browse the manuals';
ul_ id => 'systems', sub {
my %sys;
push $sys{$_->{name}}->@*, $_ for(systems->@*);
li_ sub {
my $sys = $sys{$_};
my $img = $sys->[0]{short} =~ s/^(.+)-.+$/$1/r;
if(@$sys == 1) {
a_ href => "/pkg/$sys->[0]{short}", sub {
span_ style => "background-image: url('images/$img.png')", '';
b_ $sys->[0]{name};
};
return;
}
span_ style => "background-image: url('images/$img.png')", '';
b_ $sys->[0]{name};
my $i = 0;
for(reverse @$sys) {
a_ href => "/pkg/$_->{short}", ++$i > 3 ? (class => 'hidden') : (), $_->{release};
lit_ ' ';
}
a_ href => '#', class => 'more', 'more...' if $i > 3;
} for sort keys %sys;
};
h2_ 'Other sites';
ul_ id => 'external', sub {
li_ sub { a_ href => 'http://man7.org/linux/man-pages/index.html', 'man7.org'; txt_ ' - Linux man pages from several upstream projects.' };
li_ sub { a_ href => 'https://manpag.es/', 'ManPag.es'; txt_ ' - Man pages from several Linux distributions.' };
li_ sub { a_ href => 'https://www.mankier.com/', 'ManKier'; txt_ ' - Fedora Rawhide + some manually imported man pages; Nicely formatted and with some unique features.' };
li_ sub { a_ href => 'https://man.cx/', 'man.cx'; txt_ ' - Man pages extracted from Debian testing.' };
li_ sub { a_ href => 'http://man.he.net/', 'man.he.net'; txt_ ' - Also seems to be from a Debian-like system.' };
li_ sub { a_ href => 'https://linux.die.net/man/', 'die.net'; txt_ ' - Seems to be based on an RPM-based Linux distribution.' };
li_ sub { a_ href => 'http://manpages.org/', 'manpages.org'; txt_ ' - Lots of mostly-nicely formatted man pages, no clue about source.' };
li_ sub { a_ href => 'https://www.manpagez.com/', 'manpagez.com'; txt_ ' - Mac OS X, has some GTK-html and texinfo documentation as well.' };
li_ sub { a_ href => 'https://man.archlinux.org/', 'Arch Linux Man Pages' };
li_ sub { a_ href => 'https://manpages.debian.org/', 'Debian Man Pages' };
li_ sub { a_ href => 'https://www.dragonflybsd.org/cgi/web-man', 'DragonFlyBSD Man Pages' };
li_ sub { a_ href => 'https://www.freebsd.org/cgi/man.cgi', 'FreeBSD.org Man Pages' };
li_ sub { a_ href => 'https://netbsd.gw.com/cgi-bin/man-cgi', 'NetBSD Man Pages' };
li_ sub { a_ href => 'https://www.openbsd.org/cgi-bin/man.cgi', 'OpenBSD Man Pages' };
li_ sub { a_ href => 'https://manpages.ubuntu.com/', 'Ubuntu Manuals' };
li_ sub { a_ href => 'https://man.voidlinux.org/', 'Void Linux manpages' };
};
}
};
TUWF::get '/info/about' => sub {
framework_ title => 'About', sub {
h1_ 'About Manned.org';
div_ id => 'about', sub {
lit <<' _';
<h2>Goal</h2>
<p>
The state of online indices of manual pages used to be a sad one. Existing
sites used to only offer you a single version of a man page: From one
origin, and often only in a single language. Most didn't even tell you where
the manual actually originated from, making it very hard to determine
whether the manual you found actually applied to your situation and even
harder to find a manual for a specific system. Additionally, some sites
rendered the manuals in an unreadable way, didn't correctly handle special
formatting - like tables - or didn't correctly display non-ASCII characters.
<br /><br />
Nowadays there are many good alternatives, but Manned.org was one of the
sites created in order to improve that situation. This site aims to index
the manual pages from a variaty of systems, both old and new, and allows you
to browse through the various versions of a manual page to find out how each
system behaves. The manuals are stored in the database as UTF-8, and are
passed through <a href="http://www.gnu.org/software/groff/">groff</a> to
render them in (mostly) the same way as they are displayed in your terminal.
<br /><br />
This website is <a href="https://code.blicky.net/yorhel/manned">open
source</a> (MIT licensed) and written in a combination of Perl and Rust. The
entire PostgreSQL database is available for download (see "Database
download" below).
</p>
<h2>URL format</h2>
<p>You can link to specific packages and man pages with several URL formats.
These URLs will keep working in the future, so you should not have to worry
about eventual dead links.</p>
<h3>Man pages</h3>
<p>The following URLs are available to refer to an individual man page:</p>
<dl>
<dt><code>/&lt;name>/&lt;8-hex-digits></code></dt><dd>
This is the permalink format for a specific man page (e.g. <a href="/ls/910be0ed">/ls/910be0ed</a>).</dd>
<dt><code>/&lt;name>[.&lt;section>]</code></dt><dd>
Will try to get the latest and most-close-to-upstream version of a man
page (e.g. <a href="/socket">/socket</a> or <a
href="/socket.7">/socket.7</a>). Note that this may fetch the man page
from any available system, so may result in confusing scenarios for
system-specific documentation.</dd>
<dt><code>/man/&lt;system>/&lt;name>[.&lt;section>]</code></dt><dd>
Will get the latest version of a man page from the given system (e.g. <a
href="/man/ubuntu-xenial/rsync">/man/ubuntu-xenial/rsync</a>)</dd>
<dt><code>/man/&lt;system>/&lt;category>/&lt;package>/&lt;name>[.&lt;section>]</code></dt><dd>
Will get the latest version of a man page from the given package (e.g. <a
href="/man/ubuntu-xenial/net/rsync/rsync">/man/ubuntu-xenial/net/rsync/rsync</a>)</dd>
<dt><code>/man/&lt;system>/&lt;category>/&lt;package>/&lt;version>/&lt;name>[.&lt;section>]</code></dt><dd>
Will get the man page from a specific package version (e.g. <a
href="/man/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync">/man/ubuntu-xenial/net/rsync/3.1.1-3ubuntu1/rsync</a>)</dd>
</dl>
<p>Currently, the last three URLs will perform a redirect to the
appropriate permalink URL, but this may change in the future.<br />
In all URLs where an optional <code>.&lt;section></code> can be provided,
the search is performed as a prefix match. For example, <a
href="/cat.3">/cat.3</a> will provide the <code>cat.3tcl</code> man page if
no exact <code>cat.3</code> version is available. Linking to the full
section name is also possible: <a href="/cat.3tcl">/cat.3tcl</a>. If no
section is given and multiple sections are available, the lowest section
number is chosen.</p>
<h3>Packages</h3>
<p>Linking to individual packages is also possible. These pages will show a
listing of all manual pages available in the given package.</p>
<dl>
<dt><code>/pkg/&lt;system>/&lt;category>/&lt;package></code></dt><dd>
For the latest version of a package (e.g. <a
href="/pkg/arch/core/coreutils">/pkg/arch/core/coreutils</a>).</dd>
<dt><code>/pkg/&lt;system>/&lt;category>/&lt;package>/&lt;version></code></dt><dd>
For a particular version of a package (e.g. <a
href="/pkg/arch/core/coreutils/8.25-2">/pkg/arch/core/coreutils/8.25-2</a>).</dd>
</dl>
<p>Note that this site only indexes packages that actually have manual
pages; Linking to a package that doesn't have any will result in a 404
page.</p>
<h2>The indexing process</h2>
<p>
All man pages are fetched right from the (binary) packages available on the
public repositories of Linux distributions. In particular:<br />
</p>
<dl>
<dt>Arch Linux</dt><dd>
The core, extra and community repositories are fetched from a local
Arch mirror. Indexing started around begin June 2012. The i686
architecture was indexed until November 6th, 2016, packages after that
were fetched from from x86_64.</dd>
<dt>Debian</dt><dd>
Historical releases were fetched from <a
href="http://archive.debian.org/debian/">http://archive.debian.org/debian/</a>
and <a href="http://snapshot.debian.org/">http://snapshot.debian.org/</a>.
For buzz, rex and bo, we're missing a few man pages because some packages
were missing from the repository archives. Where available, all components
(main, contrib and non-free) from the $release and $release-updates
repositories are indexed.</dd>
<dt>CentOS</dt><dd>
Historical releases were fetched from <a
href="http://vault.centos.org/">vault.centos.org</a>, current releases
from a local mirror. Where applicable, the following repositories were
indexed: addons, centosplus, contrib, extras, os. The i386 architecture
was indexed for versions lower than 7.0, since 7.0 the packages from
x86_64 are indexed.
<dt>Fedora</dt><dd>
Historical releases were fetched from <a
href="http://archives.fedoraproject.org/pub/archive/fedora/linux/">archives.fedoraproject.org</a>,
current releases from a local repository. Fedora Core 1 till 6 are
(incorrectly) called 'Fedora' here. To compensate for that, Fedora 3 till
6 also include the Extras repository. For Fedora 7 and later, the
'Everything' and 'updates' repositories are indexed. The i386 arch was
indexed for Fedora 17 and older, the x86_64 arch starting with Fedora
18.</dd>
<dt>FreeBSD</dt><dd>
Historical releases were fetched from <a
href="http://ftp-archive.freebsd.org/mirror/FreeBSD-Archive/">http://ftp-archive.freebsd.org/mirror/FreeBSD-Archive/</a>.
The base installation tarballs are included in the database as packages
prefixed with <i>core-</i>. The package repositories have also been
indexed, except for 2.0.5 - 2.2.7 and 3.0 - 3.3 because those were not
available on the ftp archive. Only the -RELEASE repositories have been
included, which is generally a snapshot of the ports directory around the
time of the release. The release dates indicated for many packages were
guessed from the file modification dates in the tarball, and may be
inaccurate. The i368 arch was indexed for FreeBSD 11.0 and older, the
amd64 arch starting with 11.1.</dd>
<dt>Ubuntu</dt><dd>
Historical releases were fetched from <a
href="http://old-releases.ubuntu.com/ubuntu/">http://old-releases.ubuntu.com/ubuntu/</a>,
supported releases from a local mirror. All components (main, universe,
restricted and multiverse) from the $release, $release-updates and
$release-security repositories are indexed. Indexing started around mid
June 2012. All releases before 2017 were indexed from the i386
repositories, starting with 17.04 the amd64 repositories were used.</dd>
</dl>
<p>
Only packages for a single architecture (i386 or amd64) are scanned. To my
knowledge, packages that come with different manuals for different
architectures either don't exist or are extremely rare. It does happen that
some packages are not available for all architectures. Usually, though,
every package is at least available for the most popular architecture, so
hopefully we're not missing out on much. <br /><br />
The repositories are scanned for new packages on a daily basis.
</p>
<h2>Database download</h2>
<p>
This site is backed by a PostgreSQL database containing all the man pages.
Weekly dumps of the full database are available for download at
<a href="http://dl.manned.org/dumps/">http://dl.manned.org/dumps/</a>.
<br /><br />
Be warned that the download server may not be terribly reliable, so it is
advisable to use a client that supports resumption of partial downloads. See
<a href="/wget">wget's -c</a> or <a href="/curl">curl's -C</a>.
<br /><br />
The database schema is "documented" at <a
href="https://code.blicky.net/yorhel/manned/src/branch/master/sql/schema.sql">schema.sql</a>
in the git repo. Note that these dumps don't constitute a stable API and,
while this won't happen frequently, incompatible schema changes or Postgres
major version bumps may occur.
</p>
<h2>Other systems</h2>
<p>
Suggestions for new (or old) systems to index are welcome.
<br /><br />
It would be great to index a few more non-Linux systems such as other BSDs,
Solaris/Illumos and Mac OS X. Unfortunately, those don't always follow a
binary package based approach, or are otherwise less easy to properly index.
<br /><br />
In general, systems that follow an entirely source-based distribution
approach can't be indexed without compiling everything. Since that is both
very resource-heavy and open to security issues, there are no plans to
include manuals from such systems at the moment. So unless someone comes
with a solution I hadn't thought of yet, there won't be any Gentoo manuals
here. :-(
</p>
<h2>Future plans</h2>
<p>
This site isn't nearly as awesome yet as it could be. Here's some ideas that
would be nice to have in the future:
<ul>
<li>Improved, more intelligent, search,</li>
<li><a href="/apropos.1">apropos(1)</a> emulation(?),</li>
<li>Diffs between various versions of a man page,</li>
<li>Anchor links within man pages, for easier linking to a section or paragraph,</li>
<li>Alternative formats (Text, PDF, more semantic HTML, etc),</li>
<li>A command-line client, like <a href="/man.1">man(1)</a> with manned.org as database backend.</li>
</ul>
</p>
<h2>Copyright</h2>
<p>
All manual pages are copyrighted by their respective authors. The manuals
have been fetched from publically available repositories of free and
(primarily) open source software. The distributors of said software have put
in efforts to only include software and documentation that allows free
distribution. Nonetheless, if a manual that does not allow to be
redistributed has been inadvertently included in our index, please let me
know and I will have it removed as soon as possible.
</p>
_
} }
};
# Very simple (and fast) prefix match.
sub search_man {
my($q, $limit) = @_;
my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : '';
my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : '';
return !$name ? [] : tuwf->dbAll(
'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?',
{
'lower(name) LIKE ?' => escape_like(lc $name).'%',
$sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (),
},
$limit
);
}
TUWF::get '/browse/search' => sub {
my $q = tuwf->reqGet('q')||'';
my $man = search_man $q, 150;
return tuwf->resRedirect("/$man->[0]{name}.$man->[0]{section}", 'temp') if @$man == 1;
framework_ title => 'Search results for '.$q, sub {
h1_ 'Search results for '.$q;
# Package search would also be useful.
p_ 'Note: This is just a simple case-insensitive prefix match on the man names. In the future we\'ll have more powerful search functionality. Hopefully.';
if(@$man) {
ul_ id => 'searchres', sub {
li_ sub {
a_ href => "/$_->{name}.$_->{section}", $_->{name};
i_ " $_->{section}";
} for @$man;
}
} else {
br_; br_;
b_ 'No results :-(';
}
};
};
TUWF::get '/xml/search.xml' => sub {
my $q = tuwf->reqGet('q')||'';
my $man = search_man $q, 20;
tuwf->resHeader('Content-Type' => 'text/xml; charset=UTF-8');
xml;
tag 'results', sub {
tag 'item', id => "$_->{name}.$_->{section}", %$_, undef for @$man;
};
};
TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub {
my $name = normalize_name tuwf->capture(1);
my $hash = tuwf->capture(2);
my $nfo = tuwf->dbRowi('
SELECT m.name, m.section, v.released, c.content
FROM man m
JOIN package_versions v ON v.id = m.package
JOIN contents c ON c.hash = m.hash
WHERE m.name =', \$name, 'AND', sql_hash_prefix('m.hash', $hash), '
LIMIT 1'
);
return tuwf->resNotFound if !$nfo->{name};
tuwf->resLastMod($nfo->{released});
tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8');
tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s"', $nfo->{name}, $nfo->{section});
lit $nfo->{content};
};
sub _man_nav {
my($man, $toc) = @_;
my @sect = map $_->{section}, tuwf->dbAlli(
'SELECT DISTINCT section FROM man WHERE name =', \$man->{name}, 'ORDER BY section'
)->@*;
my @lang = map $_->{lang}, tuwf->dbAlli(
"SELECT DISTINCT substring(locale from '^[^.]+') AS lang
FROM man WHERE name =", \$man->{name}, 'AND section =', \$man->{section}, "
ORDER BY substring(locale from '^[^.]+') NULLS FIRST"
)->@*;
return if !@sect && !@lang && !@$toc;
# TODO: This is ugly, especially because clicking on a translation or
# section, you can end up with a man page that is nowhere close to the man
# page you're currently reading. Opening a version selector box might be a
# better alternative.
div_ id => 'nav', sub {
if(@sect > 1) {
b_ 'Sections';
p_ sub {
for (@sect) {
if($man->{section} eq $_) {
i_ $_;
} else {
a_ href => "/$man->{name}.$_", $_;
}
txt_ ' ';
}
}
}
if(@lang > 1) {
b_ 'Languages';
p_ sub {
(my $cur = $man->{locale}||'') =~ s/\..*//;
for (@lang) {
if(($_||'') eq $cur) {
i_ $_ || 'default';
} else {
a_ href => $_ ? "/lang/$_/$man->{name}.$man->{section}" : "/$man->{name}.$man->{section}", $_ || 'default';
}
txt_ ' ';
}
}
}
if(@$toc > 1) {
b_ 'Table of Contents';
ul_ sub {
for (0..$#$toc) {
li_ sub {
a_ href => sprintf('#head%d', $_+1), lc $toc->[$_];
}
}
}
}
}
}
# Replace .so's in man source with the contents (if available in the same
# package) or with a reference to the other man page.
sub soelim {
my($verid, $src) = @_;
# tix comes with* a custom(?) macro package. But it looks okay even without loading that.
# (* It actually doesn't, the tcllib package appears to have that file, but doesn't '.so' it)
$src =~ s/^\.so man.macros$//mg;
# Other .so's should be handled by html()
$src =~ s{^\.so (.+)$}{
my $path = $1;
my $name = (reverse split /\//, $path)[0];
my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : ();
$man->{name}
# Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database
? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')"))
: ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]"
}emg;
$src;
}
# This one has to go before the other mappings, to ensure that links work for
# man pages called 'pkg' or 'man'. This also means that we can't have a
# system named 8 hex digits, but at least that's easy to guarantee. :)
TUWF::get qr{/(?<name>[^/]+)(?:/(?<hash>[0-9a-f]{8}))?} => sub {
my $name = normalize_name tuwf->capture('name');
my $shorthash = tuwf->capture('hash');
# Unfortunately, even in the permalink format with the hash, we don't know
# from which package we're supposed to get the man page. This info is
# needed in order to do .so substitution, so we can substitute files from
# the same package as the requested man page. Use the man_pref logic here
# to deterministically select a good package.
my($man, undef) = $shorthash
? man_pref undef, sql 'm.name =', \$name, 'AND', sql_hash_prefix 'm.hash', $shorthash
: man_pref_name $name, 'true';
return tuwf->resNotFound() if !$man->{name};
my $fmt = ManUtils::html ManUtils::fmt_block soelim $man->{verid},
tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')");
my @toc;
$fmt =~ s{\n<b>(.+?)<\/b>\n}{
push @toc, $1;
my $c = @toc;
qq{\n<a href="#head$c" id="head$c">$1</a>\n}
}eg;
my $hasversions = tuwf->dbVali(
'SELECT 1 FROM man WHERE name =', \$man->{name}, 'AND section =', \$man->{section},
'AND locale IS NOT DISTINCT FROM', \$man->{locale},
'AND hash <> decode(', \$man->{hash}, ", 'hex') LIMIT 1"
);
tuwf->resLastMod($man->{released});
framework_ title => $name, sub {
_man_nav $man, \@toc;
div_ id => 'manbuttons', sub {
h1_ $man->{name};
ul_ 'data-hash' => $man->{hash},
'data-name' => $man->{name},
'data-section' => $man->{section},
'data-locale' => $man->{locale}||'',
'data-hasversions' => $hasversions?1:0,
sub {
li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8).'/src', 'source' };
li_ sub { a_ href => "/$man->{name}/".substr($man->{hash}, 0, 8), 'permalink' };
}
};
div_ id => 'manres', class => 'hidden', '';
div_ id => 'contents', sub {
pre_ sub { lit_ $fmt }
}
};
};
TUWF::get qr{/pkg/([^/]+)} => sub {
my $short = tuwf->capture(1);
my $sys = sysbyshort->{$short};
return tuwf->resNotFound if !$sys;
my $f = tuwf->validate(get =>
c => { onerror => 'all', enum => [ '0', 'all', 'a'..'z' ] },
p => { onerror => 1, uint => 1, range => [1,200] },
)->data;
my $where = sql 'system =', \$sys->{id},
$f->{c} eq '0' ? ('AND (ASCII(name) < 97 OR ASCII(name) > 122) AND (ASCII(name) < 65 OR ASCII(name) > 90)') :
$f->{c} ne 'all' ? ('AND LOWER(SUBSTR(name, 1, 1)) =', \$f->{c}) : ();
my $count = tuwf->dbVali('SELECT count(*) FROM', $packages_with_man, 'p WHERE', $where);
my $pkg = tuwf->dbPagei({ results => 200, page => $f->{p} },
'SELECT id, system, name, category FROM', $packages_with_man, 'p WHERE', $where, 'ORDER BY name'
);
my $title = "Packages for $sys->{name}".($sys->{release}?" $sys->{release}":"");
framework_ title => $title, sub {
div_ id => 'pkglist', sub {
h1_ $title;
p_ class => 'charselect', sub {
for('all', 0, 'a'..'z') {
a_ href => "/pkg/$short?c=$_", $_?uc$_:'#' if $_ ne $f->{c};
b_ $_?uc$_:'#' if $_ eq $f->{c};
}
};
p_ 'Note: Packages without man pages are not listed.';
paginate_ "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p};
ul_ id => 'packages', sub {
li_ sub {
a_ href => "/pkg/$short/$_->{category}/$_->{name}", $_->{name};
i_ ' '.$_->{category};
} for @$pkg;
};
paginate_ "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p};
}
};
};
# Package info: /pkg/$system/$category/$name (/$version); $category may contain a slash, too.
TUWF::get qr{/pkg/([^/]+)/(.+)} => sub {
my ($short, $path) = tuwf->captures(1,2);
my $sys = sysbyshort->{$short};
return tuwf->resNotFound if !$sys;
my($pkg, $ver) = pkg_frompath(sql('system =', \$sys->{id}), $path);
return tuwf->resNotFound if !$pkg;
my $vers = tuwf->dbAlli('
SELECT id, version, released
FROM package_versions v
WHERE package =', \$pkg->{id}, '
AND EXISTS(SELECT 1 FROM man m WHERE m.package = v.id)
ORDER BY released DESC'
);
my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0];
return tuwf->resNotFound if !$sel;
my $p = tuwf->validate(get => p => { onerror => 1, uint => 1, range => [1,100] })->data;
my $count = tuwf->dbVali('SELECT count(*) FROM man WHERE package =', \$sel->{id});
my $mans = tuwf->dbPagei({ results => 200, page => $p },
"SELECT name, encode(hash, 'hex') AS hash, section, locale, filename
FROM man WHERE package =", \$sel->{id}, '
ORDER BY name, locale NULLS FIRST, filename'
);
# Latest version of this package determines last modification date of the page.
tuwf->resLastMod($vers->[0]{released});
my $title = "$sys->{name}".($sys->{release}?" $sys->{release}":"")." / $pkg->{category} / $pkg->{name}";
framework_ title => "$title $sel->{version}", sub {
h1_ $title;
div_ id => 'pkgversions', sub {
h2_ 'Versions';
ul_ sub {
li_ sub {
a_ href => "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$_->{version}", $_->{version} if $_ != $sel;
b_ " $_->{version}" if $_ == $sel;
i_ " $_->{released}";
} for(@$vers);
}
};
div_ id => 'pkgmans', sub {
h2_ "Manuals for version $sel->{version}";
paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p;
ul_ sub {
li_ sub {
a_ href => "/$_->{name}/".substr($_->{hash},0,8), "$_->{name}($_->{section})";
b_ " $_->{locale}" if $_->{locale};
i_ " $_->{filename}";
} for(@$mans);
};
paginate_ "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $p;
}
}
};
# /browse/<pkg> has been moved to /pkg/ with the package category added to the path
TUWF::get qr{/browse/([^/]+)} => sub { tuwf->resRedirect('/pkg/'.tuwf->capture(1), 'perm') };
TUWF::get qr{/browse/([^/]+)/([^/]+)(?:/([^/]+))?} => sub {
my($sys, $name, $ver) = tuwf->captures(1,2,3);
$sys = sysbyshort->{$sys};
return tuwf->resNotFound if !$sys;
my $pkgs = tuwf->dbRowi('SELECT category FROM packages WHERE system =', \$sys->{id}, 'AND name =', \$name, 'LIMIT 1');
return tuwf->resNotFound if !defined $pkgs->{category};
tuwf->resRedirect("/pkg/$sys->{short}/$pkgs->{category}/$name".($ver ? "/$ver" :''), 'perm');
};
# Redirects for canonical URLs
TUWF::get qr{/man/([^/]+)/(.+)} => sub {
my($sys, $path) = tuwf->captures(1,2);
# Path can be:
# 1. <name>
# 2. <category>/<package>/<name>
# 3. <category>/<package>/<version>/<name>
# $sys can be either a full system 'short' name, or a prefix (e.g. 'debian' meaning 'any debian-* version')
my $sysid = sysbyshort->{$sys};
$sysid = $sysid ? [$sysid->{id}] : [ map sysbyshort->{$_}{id}, grep /^\Q$sys\E-/, keys sysbyshort->%* ];
return tuwf->resNotFound if !@$sysid;
my $man;
if($path !~ m{/}) { # (1)
($man) = man_pref_name $path, sql 's.id IN', $sysid;
} else {
$path =~ s{/([^/]+)$}{};
my $name = $1;
my($pkg, $ver) = pkg_frompath sql('system IN', $sysid), $path; # Handles (2) and (3)
return tuwf->resNotFound if !$pkg;
($man) = man_pref_name $name, sql 's.id IN', $sysid, 'AND p.id =', \$pkg->{id}, $ver ? ('AND v.version =', \$ver) : ();
}
return tuwf->resNotFound if !$man;
tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp');
};
# Redirect for a specific language for a man page.
# I'm not a fan of this solution; might drop it in the future.
TUWF::get qr{/lang/([^/]+)/([^/]+)} => sub {
my $lang = tuwf->capture(1);
my $name = normalize_name tuwf->capture(2);
my($man, undef) = man_pref_name $name,
sql "substring(locale from '^[^.]+') ilike", \escape_like $lang;
return tuwf->resNotFound if !$man->{name};
tuwf->resRedirect("/$man->{name}/".substr($man->{hash}, 0, 8), 'temp');
};
TUWF::get '/json/tree.json' => sub {
my $f = tuwf->validate(get =>
name => { required => 0, maxlength => 256 },
section => { required => 0, maxlength => 32 },
locale => { required => 0, maxlength => 32 },
cur => { required => 0, default => '', regex => qr/^[a-fA-F0-9]{40}$/ },
hash => { required => 0, default => '', regex => qr/^[a-fA-F0-9]{40}$/ },
)->data;
return tuwf->resNotFound() if !$f->{hash} && !($f->{section} && $f->{name});
my $l = tuwf->dbAlli("
SELECT p.system, p.category, p.name AS package, v.version, v.released, v.id AS verid, m.name, m.section, m.filename, m.locale, encode(m.hash, 'hex') AS hash
FROM man m
JOIN package_versions v ON v.id = m.package
JOIN packages p ON p.id = v.package
JOIN systems s ON s.id = p.system
WHERE", sql_and(
length $f->{hash} ? sql 'm.hash = decode(', \$f->{hash}, ", 'hex')" : (),
length $f->{name} ? sql 'm.name =', \$f->{name} : (),
length $f->{section} ? sql 'm.section =', \$f->{section} : (),
length $f->{locale} ? sql 'm.locale =', \$f->{locale} : (),
defined $f->{locale} && $f->{locale} eq '' ? 'm.locale IS NULL' : (),
), '
ORDER BY s.name, s.relorder DESC, p.name, v.released DESC, m.name, m.locale NULLS FIRST, m.filename
');
# Convert the list into a tree
my $tree = [];
my($sys, $sysver, $pkg, $pkgver);
for my $m (@$l) {
my $sysname = sysbyid->{$m->{system}}{name};
if(!$sys || $sysname ne $sys->{name}) {
$sys = { name => $sysname, childs => [] };
$sysver = undef;
push @$tree, $sys;
}
my $sysversion = sysbyid->{$m->{system}}{release} || '';
if(!$sysver || $sysversion ne $sysver->{name}) {
$sysver = { name => $sysversion, childs => [] };
$pkg = undef;
push @{$sys->{childs}}, $sysver;
}
if(!$pkg || $m->{package} ne $pkg->{name}) {
$pkg = { name => $m->{package}, i => $m->{category}, table => [] };
$pkgver = undef;
push @{$sysver->{childs}}, $pkg;
}
push @{$pkg->{table}}, [
$pkgver && $pkgver eq $m->{version} ? {name=>''} :
{name => $m->{version}, href => "/pkg/".sysbyid->{$m->{system}}{short}."/$m->{category}/$m->{package}/$m->{version}"},
{ name => "$m->{name}($m->{section})",
$f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? ()
: (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8))
},
{ name => substr($m->{hash}, 0, 8),
$f->{hash} || lc($m->{hash}) eq lc($f->{cur}) ? ()
: (href => sprintf('/%s/%s', $m->{name}, substr $m->{hash}, 0, 8))
},
{ name => $m->{filename} }
];
$pkgver = $m->{version};
}
# Determine which elements to show/hide by default.
# It might make more sense to do this in JS, but since I am utterly
# incapable of writing maintainable JS I'm doing it here in order to keep the
# JS stupid and simple.
# TODO: Highlight systems/packages where the 'current' man page is?
for my $sys (@$tree) {
$sys->{expand} = 1 if $sys->{childs}[0]{name}; # Expand all systems that have named versions
$sys->{expand} = 1 if $f->{hash}; # Expand everything on 'location'
my $i = 0;
for my $sysver (@{$sys->{childs}}) {
$i++;
$sysver->{expand} = 1 if !$sysver->{name}; # Expand unnamed versions (since you can't click them)
$sysver->{expand} = 1 if $f->{hash}; # Expand everything on 'location'
$sysver->{hide} = 1 if $i > 3 && @{$sys->{childs}} > 5; # Show only the first 3 versions
for my $pkg (@{$sysver->{childs}}) {
$pkg->{expand} = 1 if @{$sysver->{childs}} <= 3; # Expand everything if there's not too many things to expand
$pkg->{expand} = 1 if $f->{hash}; # Expand everything on 'location'
# TODO: Show/Hide duplicate hashes?
}
}
}
tuwf->resJSON($tree);
};
TUWF::run();