diff --git a/README.md b/README.md
index 99d3164..2aadc53 100644
--- a/README.md
+++ b/README.md
@@ -13,11 +13,12 @@ Ironically, documentation about how things work is completely lacking.
### Web front-end
-- DBI
-- DBD::Pg
-- TUWF
-- JSON::XS
- AnyEvent
+- DBD::Pg
+- DBI
+- JSON::XS
+- SQL::Interp
+- TUWF
### Man page indexer
diff --git a/www/index.pl b/www/index.pl
index a8f9395..f8e7b46 100755
--- a/www/index.pl
+++ b/www/index.pl
@@ -1,10 +1,11 @@
#!/usr/bin/perl
-use strict;
+use v5.26;
use warnings;
-use TUWF ':html', 'html_escape', ':xml';
-use JSON::XS;
+use TUWF ':html_', ':xml';
use POSIX 'ceil';
+use SQL::Interp 'sql', 'sql_interp';
+use Time::Local 'timegm';
use Cwd 'abs_path';
our $ROOT;
@@ -20,31 +21,11 @@ use ManUtils;
TUWF::set(
- logfile => $ENV{TUWF_LOG},
- db_login => [undef, undef, undef],
- debug => $ENV{TUWF_DEBUG},
- xml_pretty => 0,
- log_slow_pages => 500,
- # Cache the system information
- pre_request_handler => sub {
- my $self = shift;
- if(!$self->{systems}) {
- $self->{systems} = $self->dbSystemGet;
- $_->{full} = $_->{name}.($_->{release}?' '.$_->{release}:'') for(@{$self->{systems}});
- $self->{sysbyid} = { map +($_->{id}, $_), @{$self->{systems}} };
- $self->{sysbyshort} = { map +($_->{short}, $_), @{$self->{systems}} };
- }
- 1;
- },
- error_404_handler => sub {
- my $self = shift;
- $self->resStatus(404);
- my $title = 'No manual entry for '.$self->reqPath;
- $self->htmlHeader(title => $title);
- h1 $title;
- p 'That is, the page you were looking for doesn\'t exist.';
- $self->htmlFooter;
- },
+ logfile => $ENV{TUWF_LOG},
+ db_login => [undef, undef, undef],
+ debug => $ENV{TUWF_DEBUG},
+ xml_pretty => 0,
+ log_slow_pages => 500,
);
@@ -58,1097 +39,989 @@ TUWF::hook before => sub {
};
-TUWF::register(
- qr// => \&home,
- qr{info/about} => \&about,
- qr{browse/search} => \&browsesearch,
+# TODO: Add SQL::Interp support to TUWF directly, in some form.
+sub TUWF::Object::dbExeci { shift->dbExec(sql_interp @_) }
+sub TUWF::Object::dbVali { shift->dbVal (sql_interp @_) }
+sub TUWF::Object::dbRowi { shift->dbRow (sql_interp @_) }
+sub TUWF::Object::dbAlli { shift->dbAll (sql_interp @_) }
+sub TUWF::Object::dbPagei { shift->dbPage(shift, sql_interp @_) }
- # These have to go before the other mappings, to ensure that links work for
- # man pages called 'pkg' or 'man'. This also means that we can't have a
- # system named 8 hex digits, but at least that's easy to guarantee. :)
- qr{([^/]+)/([0-9a-f]{8})} => \&man,
- qr{([^/]+)/([0-9a-f]{8})/src} => \&src,
- # We don't have any other single-component paths
- qr{([^/]+)} => \&man,
-
- qr{pkg/([^/]+)} => \&pkg_list,
- # pkg/$system/$category/$name (/$version); $category may contain a slash, too.
- qr{pkg/([^/]+)/(.+)} => \&pkg_info,
-
- # Redirects for canonical URLs
- qr{man/([^/]+)/(.+)} => \&man_redir,
-
- # Redirects for old URLs.
- # /browse/ You can link to specific packages and man pages with several URL formats.
- These URLs will keep working in the future, so you should not have to worry
- about eventual dead links. The following URLs are available to refer to an individual man page: Currently, the last three URLs will perform a redirect to the
- appropriate permalink URL, but this may change in the future. Linking to individual packages is also possible. These pages will show a
- listing of all manual pages available in the given package. Note that this site only indexes packages that actually have manual
- pages; Linking to a package that doesn't have any will result in a 404
- page.
- Only packages for a single architecture (i386 or amd64) are scanned. To my
- knowledge, packages that come with different manuals for different
- architectures either don't exist or are extremely rare. It does happen that
- some packages are not available for all architectures. Usually, though,
- every package is at least available for the most popular architecture, so
- hopefully we're not missing out on much.
+ The state of online indices of manual pages used to be a sad one. Existing
+ sites used to only offer you a single version of a man page: From one
+ origin, and often only in a single language. Most didn't even tell you where
+ the manual actually originated from, making it very hard to determine
+ whether the manual you found actually applied to your situation and even
+ harder to find a manual for a specific system. Additionally, some sites
+ rendered the manuals in an unreadable way, didn't correctly handle special
+ formatting - like tables - or didn't correctly display non-ASCII characters.
+ You can link to specific packages and man pages with several URL formats.
+ These URLs will keep working in the future, so you should not have to worry
+ about eventual dead links. The following URLs are available to refer to an individual man page: Currently, the last three URLs will perform a redirect to the
+ appropriate permalink URL, but this may change in the future. Linking to individual packages is also possible. These pages will show a
+ listing of all manual pages available in the given package. Note that this site only indexes packages that actually have manual
+ pages; Linking to a package that doesn't have any will result in a 404
+ page.
+ All man pages are fetched right from the (binary) packages available on the
+ public repositories of Linux distributions. In particular:
+ Only packages for a single architecture (i386 or amd64) are scanned. To my
+ knowledge, packages that come with different manuals for different
+ architectures either don't exist or are extremely rare. It does happen that
+ some packages are not available for all architectures. Usually, though,
+ every package is at least available for the most popular architecture, so
+ hopefully we're not missing out on much.
+ This site is backed by a PostgreSQL database containing all the man pages.
+ Weekly dumps of the full database are available for download at
+ http://dl.manned.org/dumps/.
+
+ Suggestions for new (or old) systems to index are welcome.
+
+ This site isn't nearly as awesome yet as it could be. Here's some ideas that
+ would be nice to have in the future:
+
- Manned.org aims to index all manual pages from a variety of systems, both
- old and new, and provides a convenient interface for looking up and viewing
- the various versions of each man page.
- About manned.org »
- _
- end;
-
- h2 'Browse the manuals';
- ul id => 'systems';
- my %sys;
- push @{$sys{$_->{name}}}, $_ for(@{$self->{systems}});
- for my $sys (sort keys %sys) {
- $sys = $sys{$sys};
- (my $img = $sys->[0]{short}) =~ s/^(.+)-.+$/$1/;
- li;
- a href => "/pkg/$sys->[0]{short}" if @$sys == 1;
- span style => "background-image: url('images/$img.png')", '';
- b $sys->[0]{name};
- if(@$sys > 1) {
- my $i = 0;
- for(reverse @$sys) {
- a href => "/pkg/$_->{short}", ++$i > 3 ? (class => 'hidden') : (), $_->{release};
- lit ' ';
- }
- a href => "#", class => 'more', 'more...' if $i > 3;
- }
- end 'a' if @$sys == 1;
- end;
- }
- end;
-
- h2 'Other sites';
- ul id => 'external';
- li; a href => 'http://man7.org/linux/man-pages/index.html', 'man7.org'; txt ' - Linux man pages from several upstream projects.'; end;
- li; a href => 'https://manpag.es/', 'ManPag.es'; txt ' - Man pages from several Linux distributions.'; end;
- li; a href => 'https://www.mankier.com/', 'ManKier'; txt ' - Fedora Rawhide + some manually imported man pages; Nicely formatted and with some unique features.'; end;
- li; a href => 'https://man.cx/', 'man.cx'; txt ' - Man pages extracted from Debian testing.'; end;
- li; a href => 'http://man.he.net/', 'man.he.net'; txt ' - Also seems to be from a Debian-like system.'; end;
- li; a href => 'https://linux.die.net/man/', 'die.net'; txt ' - Seems to be based on an RPM-based Linux distribution.'; end;
- li; a href => 'http://manpages.org/', 'manpages.org'; txt ' - Lots of mostly-nicely formatted man pages, no clue about source.'; end;
- li; a href => 'https://www.manpagez.com/', 'manpagez.com'; txt ' - Mac OS X, has some GTK-html and texinfo documentation as well.'; end;
- li; a href => 'https://man.archlinux.org/', 'Arch Linux Man Pages'; end;
- li; a href => 'https://manpages.debian.org/', 'Debian Man Pages'; end;
- li; a href => 'https://www.dragonflybsd.org/cgi/web-man', 'DragonFlyBSD Man Pages'; end;
- li; a href => 'https://www.freebsd.org/cgi/man.cgi', 'FreeBSD.org Man Pages'; end;
- li; a href => 'https://netbsd.gw.com/cgi-bin/man-cgi', 'NetBSD Man Pages'; end;
- li; a href => 'https://www.openbsd.org/cgi-bin/man.cgi', 'OpenBSD Man Pages'; end;
- li; a href => 'https://manpages.ubuntu.com/', 'Ubuntu Manuals'; end;
- li; a href => 'https://man.voidlinux.org/', 'Void Linux manpages'; end;
- end;
- $self->htmlFooter;
+# Set the last modification time from a string in yyyy-mm-dd format.
+sub TUWF::Object::resLastMod {
+ my($s, $d) = @_;
+ return if $d !~ /^(\d{4})-(\d{2})-(\d{2})/;
+ my @t = gmtime timegm 0,0,0,$3,$2-1,$1;
+ $s->resHeader('Last-Modified', sprintf '%s, %02d %s %04d %02d:%02d:%02d GMT',
+ (qw|Sun Mon Tue Wed Thu Fri Sat|)[$t[6]], $t[3],
+ (qw|Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec|)[$t[4]],
+ $t[5]+1900, $t[2], $t[1], $t[0]);
}
-sub about {
- my $self = shift;
- $self->htmlHeader(title => 'About');
- h1 'About Manned.org';
- div id => 'about';
-
- h2 'Goal';
- p; lit <<' _';
- The state of online indices of manual pages used to be a sad one. Existing
- sites used to only offer you a single version of a man page: From one
- origin, and often only in a single language. Most didn't even tell you where
- the manual actually originated from, making it very hard to determine
- whether the manual you found actually applied to your situation and even
- harder to find a manual for a specific system. Additionally, some sites
- rendered the manuals in an unreadable way, didn't correctly handle special
- formatting - like tables - or didn't correctly display non-ASCII characters.
-
- Nowadays there are many good alternatives, but Manned.org was one of the
- sites created in order to improve that situation. This site aims to index
- the manual pages from a variaty of systems, both old and new, and allows you
- to browse through the various versions of a manual page to find out how each
- system behaves. The manuals are stored in the database as UTF-8, and are
- passed through groff to
- render them in (mostly) the same way as they are displayed in your terminal.
-
- This website is open
- source (MIT licensed) and written in a combination of Perl and Rust. The
- entire PostgreSQL database is available for download (see "Database
- download" below).
- _
- end;
-
- h2 'URL format';
- lit <<' _';
- Man pages
-
-
- /<name>/<8-hex-digits>/<name>[.<section>]/man/<system>/<name>[.<section>]/man/<system>/<category>/<package>/<name>[.<section>]/man/<system>/<category>/<package>/<version>/<name>[.<section>]
- In all URLs where an optional .<section> can be provided,
- the search is performed as a prefix match. For example, /cat.3 will provide the cat.3tcl man page if
- no exact cat.3 version is available. Linking to the full
- section name is also possible: /cat.3tcl. If no
- section is given and multiple sections are available, the lowest section
- number is chosen.Packages
-
-
- /pkg/<system>/<category>/<package>/pkg/<system>/<category>/<package>/<version>
-
-
-
- The repositories are scanned for new packages on a daily basis.
-
- Be warned that the download server may not be terribly reliable, so it is
- advisable to use a client that supports resumption of partial downloads. See
- wget's -c or curl's -C.
-
- The database schema is "documented" at schema.sql
- in the git repo. Note that these dumps don't constitute a stable API and,
- while this won't happen frequently, incompatible schema changes or Postgres
- major version bumps may occur.
- _
- end;
-
- h2 'Other systems';
- p; lit <<' _';
- Suggestions for new (or old) systems to index are welcome.
-
- It would be great to index a few more non-Linux systems such as other BSDs,
- Solaris/Illumos and Mac OS X. Unfortunately, those don't always follow a
- binary package based approach, or are otherwise less easy to properly index.
-
- In general, systems that follow an entirely source-based distribution
- approach can't be indexed without compiling everything. Since that is both
- very resource-heavy and open to security issues, there are no plans to
- include manuals from such systems at the moment. So unless someone comes
- with a solution I hadn't thought of yet, there won't be any Gentoo manuals
- here. :-(
- _
- end;
-
- h2 'Future plans';
- p; lit <<' _';
- This site isn't nearly as awesome yet as it could be. Here's some ideas that
- would be nice to have in the future:
-
-
- _
- end;
-
- h2 'Copyright';
- p; lit <<' _';
- All manual pages are copyrighted by their respective authors. The manuals
- have been fetched from publically available repositories of free and
- (primarily) open source software. The distributors of said software have put
- in efforts to only include software and documentation that allows free
- distribution. Nonetheless, if a manual that does not allow to be
- redistributed has been inadvertently included in our index, please let me
- know and I will have it removed as soon as possible.
- _
- end;
-
- end;
- $self->htmlFooter;
+# The systems table doesn't change often, so keep an in-memory cache for quick lookups.
+sub systems {
+ state $s ||= [ map {
+ $_->{full} = $_->{name}.($_->{release}?' '.$_->{release}:'');
+ $_
+ } tuwf->dbAll('SELECT id, name, release, short, relorder FROM systems ORDER BY name, relorder')->@* ];
}
+sub sysbyid { state $s ||= { map +($_->{id}, $_), systems->@* } }
+sub sysbyshort { state $s ||= { map +($_->{short}, $_), systems->@* } }
-sub paginate {
- my($url, $count, $perpage, $p) = @_;
- return if $count <= $perpage;
+# URL-unescape some special characters that may occur in man names.
+# Firefox seems to escape [ and ] in URLs. It doesn't really have to...
+sub normalize_name { $_[0] =~ s/%5b/[/irg =~ s/%5d/]/irg =~ s/%20/ /rg }
- my $l = sub {
- my $c = shift;
- a href => sprintf('%s%d', $url, $c), $c if $c != $p;
- b $c if $c == $p;
- };
+# Subquery returning all packages that have a man page.
+my $packages_with_man = '(SELECT * FROM packages p WHERE EXISTS(SELECT 1 FROM package_versions pv WHERE pv.package = p.id AND EXISTS(SELECT 1 FROM man m WHERE m.package = pv.id)))';
- my $lp = ceil($count/$perpage);
- p class => 'paginate';
- $l->(1) if $p > 1+4;
- b '...' if $p > 1+5;
- $l->($_) for (($p > 4 ? $p-4 : 1)..($p+4 > $lp ? $lp : $p+4));
- b '...' if $p < $lp-5;
- $l->($lp) if $p < $lp-4;
- end;
+sub escape_like { $_[0] =~ s/([_%])/\\$1/rg }
+
+sub sql_join {
+ my $sep = shift;
+ my @args = map +($sep, $_), @_;
+ sql @args[1..$#args];
}
+sub sql_and { @_ ? sql_join 'AND', map sql('(', $_, ')'), @_ : sql '1=1' }
+sub sql_or { @_ ? sql_join 'OR', map sql('(', $_, ')'), @_ : sql '1=0' }
-
-sub browsesearch {
- my $self = shift;
- my $q = $self->reqGet('q')||'';
- my $man = $self->dbSearch($q, 150);
-
- return $self->resRedirect("/$man->[0]{name}.$man->[0]{section}", 'temp') if @$man == 1;
-
- $self->htmlHeader(title => 'Search results for '.$q);
- h1 'Search results for '.$q;
- p 'Note: This is just a simple case-insensitive prefix match on the man names. In the future we\'ll have more powerful search functionality. Hopefully.';
- if(@$man) {
- ul id => 'searchres';
- for(@$man) {
- li;
- a href => "/$_->{name}.$_->{section}", $_->{name};
- i " $_->{section}";
- end;
- }
- end;
- } else {
- br; br;
- b 'No results :-(';
- }
-
- $self->htmlFooter;
-}
-
-
-sub pkg_list {
- my($self, $short) = @_;
-
- my $sys = $self->{sysbyshort}{$short};
- return $self->resNotFound if !$sys;
-
- my $f = $self->formValidate(
- { get => 'c', required => 0, enum => [ '0', 'all', 'a'..'z' ], default => 'all' },
- { get => 'p', required => 0, default => 1, template => 'uint', min => 1, max => 200 },
- );
- return $self->resNotFound if $f->{_err};
-
- my %opt = (hasman => 1, sysid => $sys->{id}, char => $f->{c} eq 'all' ? undef : $f->{c});
- my $pkg = $self->dbPackageGet(%opt, results => 200, page => $f->{p});
- my $count = $self->dbPackageGet(%opt, countonly => 1)->[0]{count};
-
- my $title = "Packages for $sys->{name}".($sys->{release}?" $sys->{release}":"");
- $self->htmlHeader(title => $title);
- div id => 'pkglist';
- h1 $title;
-
- p class => 'charselect';
- for('all', 0, 'a'..'z') {
- a href => "/pkg/$short?c=$_", $_?uc$_:'#' if $_ ne $f->{c};
- b $_?uc$_:'#' if $_ eq $f->{c};
- }
- end;
-
- p 'Note: Packages without man pages are not listed.';
- paginate "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p};
- ul id => 'packages';
- for(@$pkg) {
- li;
- a href => "/pkg/$short/$_->{category}/$_->{name}", $_->{name};
- i ' '.$_->{category};
- end;
- }
- end;
- paginate "/pkg/$short?c=$f->{c};p=", $count, 200, $f->{p};
-
- end;
- $self->htmlFooter;
+# Subquery to match $sql_expr::bytea against a $prefix (hex string). Hopefully indexable.
+sub sql_hash_prefix {
+ my($sql_expr, $prefix) = @_;
+ my $esc = unpack 'H*', escape_like pack 'H*', $prefix;
+ sql '(', $sql_expr, "like ('\\x$esc'::bytea||'%'))"
}
sub pkg_frompath {
- my($self, $sys, $path) = @_;
+ my($sys_where, $path) = @_;
- # $path should be "$category/$name" or "$category/$name/$version", since
- # $category may contain a slash, let's try both options.
+ # $path should be "$category/$name" or "$category/$name/$version", since
+ # $category may contain a slash, let's try both options.
- # $category/$name
- # e.g. contrib/games/alien
- if($path =~ m{^(.+)/([^/]+)$}) {
- my($category, $name) = ($1, $2);
- my $pkg = $self->dbPackageGet(sysid => $sys, category => $category, name => $name, hasman => 1)->[0];
- return ($pkg, '') if $pkg;
- }
+ my sub lookup {
+ my($cat, $name) = @_;
+ tuwf->dbRowi('SELECT id, system, name, category FROM', $packages_with_man, 'p WHERE', $sys_where, 'AND category =', \$cat, 'AND name =', \$name);
+ }
- # $category/$name/$version
- # e.g. contrib/games/alien/10.2
- if($path =~ m{^(.+)/([^/]+)/([^/]+)$}) {
- my($category, $name, $version) = ($1, $2, $3);
- my $pkg = $self->dbPackageGet(sysid => $sys, category => $category, name => $name, hasman => 1)->[0];
- return ($pkg, $version) if $pkg;
- }
+ # $category/$name
+ # e.g. contrib/games/alien
+ if($path =~ m{^(.+)/([^/]+)$}) {
+ my $pkg = lookup $1, $2;
+ return ($pkg, '') if $pkg->{id};
+ }
- (undef, '');
+ # $category/$name/$version
+ # e.g. contrib/games/alien/10.2
+ if($path =~ m{^(.+)/([^/]+)/([^/]+)$}) {
+ my $pkg = lookup $1, $2;
+ return ($pkg, $3) if $pkg->{id};
+ }
+
+ (undef, '');
}
-sub pkg_info {
- my($self, $short, $path) = @_;
+# Get the preferred man page for the given filters.
+sub man_pref {
+ my($section, $where) = @_;
+ $where = sql_and $where, sql 'm.section LIKE', \(escape_like($section).'%') if length $section;
- my $sys = $self->{sysbyshort}{$short};
- return $self->resNotFound if !$sys;
+ # Criteria to determine a "preferred" man page:
+ # 1. english: English versions of a man page have preference over other locales
+ # 2. pkgver: Newer versions of the same package have preference over older versions
+ # 3. stdloc: Prefer man pages in standard locations
+ # 4. secmatch: Prefer an exact section match
+ # 5. arch: Prefer Arch over other systems (because it tends to be the most up-to-date, and closest to upstreams)
+ # 6. ubuntu: If there's no Arch, prefer Ubuntu over other systems (again, tends to be more up-to-date)
+ # (also resolves distro-specific tooling disputes such as https://code.blicky.net/yorhel/manned/issues/1 )
+ # 7. sysrel: Prefer a later system release over an older release
+ # 8. secorder: Lower sections before higher sections (because man does it this way, for some reason)
+ # 9. pkgdate: Prefer more recent packages (cross-distro)
+ # 10. Fall back on hash comparison, to ensure the result is stable
- my($pkg, $ver) = pkg_frompath($self, $sys->{id}, $path);
- return $self->resNotFound if !$pkg;
-
- my $vers = $self->dbPackageVersions($pkg->{id});
-
- my $sel = $ver ? (grep $_->{version} eq $ver, @$vers)[0] : $vers->[0];
- return $self->resNotFound if !$sel;
-
- my $f = $self->formValidate({ get => 'p', required => 0, default => 1, template => 'uint', min => 1, max => 100});
- return $self->resNotFound if $f->{_err};
-
- my $mans = $self->dbManInfo(package => $sel->{id}, results => 200, page => $f->{p}, sort => 'syspkgname');
- my $count = $self->dbManInfo(package => $sel->{id}, countonly => 1)->[0]{count};
-
- # Latest version of this package determines last modification date of the page.
- $self->setLastMod($vers->[0]{released});
-
- my $title = "$sys->{name}".($sys->{release}?" $sys->{release}":"")." / $pkg->{category} / $pkg->{name}";
- $self->htmlHeader(title => "$title $sel->{version}");
- h1 $title;
-
- div id => 'pkgversions';
- h2 'Versions';
- ul;
- for(@$vers) {
- li;
- a href => "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$_->{version}", $_->{version} if $_ != $sel;
- b " $_->{version}" if $_ == $sel;
- i " $_->{released}";
- end;
- }
- end;
- end;
-
- div id => 'pkgmans';
- h2 "Manuals for version $sel->{version}";
- paginate "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $f->{p};
- ul;
- for(@$mans) {
- li;
- a href => "/$_->{name}/".substr($_->{hash},0,8), "$_->{name}($_->{section})";
- b " $_->{locale}" if $_->{locale};
- i " $_->{filename}";
- end;
- }
- end;
- paginate "/pkg/$sys->{short}/$pkg->{category}/$pkg->{name}/$sel->{version}?p=", $count, 200, $f->{p};
- end;
-
- $self->htmlFooter;
+ tuwf->dbRowi(q{
+ WITH unfiltered AS (
+ SELECT s AS sys, p AS pkg, v AS ver, m AS man
+ FROM man m
+ JOIN package_versions v ON v.id = m.package
+ JOIN packages p ON p.id = v.package
+ JOIN systems s ON s.id = p.system
+ WHERE}, $where, q{
+ ), f_english AS(
+ SELECT * FROM unfiltered WHERE NOT EXISTS(SELECT 1 FROM unfiltered WHERE is_english_locale((man).locale)) OR is_english_locale((man).locale)
+ ), f_pkgver AS(
+ SELECT * FROM f_english a WHERE NOT EXISTS(SELECT 1 FROM f_english b WHERE (a.ver).package = (b.ver).package AND (a.ver).released < (b.ver).released)
+ ), f_stdloc AS(
+ SELECT * FROM f_pkgver WHERE NOT EXISTS(SELECT 1 FROM f_pkgver WHERE is_standard_man_location((man).filename)) OR is_standard_man_location((man).filename)
+ ), f_secmatch AS(
+ SELECT * FROM f_stdloc WHERE NOT EXISTS(SELECT 1 FROM f_stdloc WHERE (man).section =}, \$section, q{) OR (man).section =}, \$section, q{
+ ), f_arch AS(
+ SELECT * FROM f_secmatch WHERE NOT EXISTS(SELECT 1 FROM}, length $section ? 'f_secmatch' : 'f_stdloc', q{WHERE (sys).id = 1) OR (sys).id = 1
+ ), f_ubuntu AS(
+ SELECT * FROM f_arch WHERE NOT EXISTS(SELECT 1 FROM f_arch WHERE (sys).name = 'Ubuntu') OR (sys).name = 'Ubuntu'
+ ), f_sysrel AS(
+ SELECT * FROM f_ubuntu a WHERE NOT EXISTS(SELECT 1 FROM f_ubuntu b WHERE (a.sys).name = (b.sys).name AND (a.sys).relorder < (b.sys).relorder)
+ ), f_secorder AS(
+ SELECT * FROM f_sysrel a WHERE NOT EXISTS(SELECT 1 FROM f_sysrel b WHERE (a.man).section > (b.man).section)
+ ), f_pkgdate AS(
+ SELECT * FROM f_secorder a WHERE NOT EXISTS(SELECT 1 FROM f_secorder b WHERE (a.ver).released < (b.ver).released)
+ )
+ SELECT (pkg).system, (pkg).category, (pkg).name AS package, (ver).version, (ver).released, (ver).id AS verid,
+ (man).name, (man).section, (man).filename, (man).locale, encode((man).hash, 'hex') AS hash
+ FROM f_pkgdate ORDER BY (man).hash LIMIT 1
+ });
}
-sub man_redir {
- my($self, $sys, $path) = @_;
+# Given the name of a man page with optional section, find out the actual name
+# and section prefix of the man page and the preferred version.
+sub man_pref_name {
+ my($name, $where) = @_;
- # Path can be:
- # 1.
+ Manned.org aims to index all manual pages from a variety of systems, both
+ old and new, and provides a convenient interface for looking up and viewing
+ the various versions of each man page.
+ About manned.org »
+ _
+ };
+
+ h2_ 'Browse the manuals';
+ ul_ id => 'systems', sub {
+ my %sys;
+ push $sys{$_->{name}}->@*, $_ for(systems->@*);
+ li_ sub {
+ my $sys = $sys{$_};
+ my $img = $sys->[0]{short} =~ s/^(.+)-.+$/$1/r;
+ if(@$sys == 1) {
+ a_ href => "/pkg/$sys->[0]{short}", sub {
+ span_ style => "background-image: url('images/$img.png')", '';
+ b_ $sys->[0]{name};
+ };
+ return;
+ }
+ span_ style => "background-image: url('images/$img.png')", '';
+ b_ $sys->[0]{name};
+ my $i = 0;
+ for(reverse @$sys) {
+ a_ href => "/pkg/$_->{short}", ++$i > 3 ? (class => 'hidden') : (), $_->{release};
+ lit_ ' ';
+ }
+ a_ href => '#', class => 'more', 'more...' if $i > 3;
+ } for sort keys %sys;
+ };
+
+ h2_ 'Other sites';
+ ul_ id => 'external', sub {
+ li_ sub { a_ href => 'http://man7.org/linux/man-pages/index.html', 'man7.org'; txt_ ' - Linux man pages from several upstream projects.' };
+ li_ sub { a_ href => 'https://manpag.es/', 'ManPag.es'; txt_ ' - Man pages from several Linux distributions.' };
+ li_ sub { a_ href => 'https://www.mankier.com/', 'ManKier'; txt_ ' - Fedora Rawhide + some manually imported man pages; Nicely formatted and with some unique features.' };
+ li_ sub { a_ href => 'https://man.cx/', 'man.cx'; txt_ ' - Man pages extracted from Debian testing.' };
+ li_ sub { a_ href => 'http://man.he.net/', 'man.he.net'; txt_ ' - Also seems to be from a Debian-like system.' };
+ li_ sub { a_ href => 'https://linux.die.net/man/', 'die.net'; txt_ ' - Seems to be based on an RPM-based Linux distribution.' };
+ li_ sub { a_ href => 'http://manpages.org/', 'manpages.org'; txt_ ' - Lots of mostly-nicely formatted man pages, no clue about source.' };
+ li_ sub { a_ href => 'https://www.manpagez.com/', 'manpagez.com'; txt_ ' - Mac OS X, has some GTK-html and texinfo documentation as well.' };
+ li_ sub { a_ href => 'https://man.archlinux.org/', 'Arch Linux Man Pages' };
+ li_ sub { a_ href => 'https://manpages.debian.org/', 'Debian Man Pages' };
+ li_ sub { a_ href => 'https://www.dragonflybsd.org/cgi/web-man', 'DragonFlyBSD Man Pages' };
+ li_ sub { a_ href => 'https://www.freebsd.org/cgi/man.cgi', 'FreeBSD.org Man Pages' };
+ li_ sub { a_ href => 'https://netbsd.gw.com/cgi-bin/man-cgi', 'NetBSD Man Pages' };
+ li_ sub { a_ href => 'https://www.openbsd.org/cgi-bin/man.cgi', 'OpenBSD Man Pages' };
+ li_ sub { a_ href => 'https://manpages.ubuntu.com/', 'Ubuntu Manuals' };
+ li_ sub { a_ href => 'https://man.voidlinux.org/', 'Void Linux manpages' };
+ };
+ }
+};
+
+
+TUWF::get '/info/about' => sub {
+ framework_ title => 'About', sub {
+ h1_ 'About Manned.org';
+ div_ id => 'about', sub {
+ lit <<' _';
+ Goal
+
+ Nowadays there are many good alternatives, but Manned.org was one of the
+ sites created in order to improve that situation. This site aims to index
+ the manual pages from a variaty of systems, both old and new, and allows you
+ to browse through the various versions of a manual page to find out how each
+ system behaves. The manuals are stored in the database as UTF-8, and are
+ passed through groff to
+ render them in (mostly) the same way as they are displayed in your terminal.
+
+ This website is open
+ source (MIT licensed) and written in a combination of Perl and Rust. The
+ entire PostgreSQL database is available for download (see "Database
+ download" below).
+ URL format
+ Man pages
+
+
+ /<name>/<8-hex-digits>/<name>[.<section>]/man/<system>/<name>[.<section>]/man/<system>/<category>/<package>/<name>[.<section>]/man/<system>/<category>/<package>/<version>/<name>[.<section>]
+ In all URLs where an optional .<section> can be provided,
+ the search is performed as a prefix match. For example, /cat.3 will provide the cat.3tcl man page if
+ no exact cat.3 version is available. Linking to the full
+ section name is also possible: /cat.3tcl. If no
+ section is given and multiple sections are available, the lowest section
+ number is chosen.Packages
+
+
+ /pkg/<system>/<category>/<package>/pkg/<system>/<category>/<package>/<version>The indexing process
+
+
+
+
+ The repositories are scanned for new packages on a daily basis.
+ Database download
+
+ Be warned that the download server may not be terribly reliable, so it is
+ advisable to use a client that supports resumption of partial downloads. See
+ wget's -c or curl's -C.
+
+ The database schema is "documented" at schema.sql
+ in the git repo. Note that these dumps don't constitute a stable API and,
+ while this won't happen frequently, incompatible schema changes or Postgres
+ major version bumps may occur.
+ Other systems
+
+ It would be great to index a few more non-Linux systems such as other BSDs,
+ Solaris/Illumos and Mac OS X. Unfortunately, those don't always follow a
+ binary package based approach, or are otherwise less easy to properly index.
+
+ In general, systems that follow an entirely source-based distribution
+ approach can't be indexed without compiling everything. Since that is both
+ very resource-heavy and open to security issues, there are no plans to
+ include manuals from such systems at the moment. So unless someone comes
+ with a solution I hadn't thought of yet, there won't be any Gentoo manuals
+ here. :-(
+ Future plans
+
+
+
+ All manual pages are copyrighted by their respective authors. The manuals + have been fetched from publically available repositories of free and + (primarily) open source software. The distributors of said software have put + in efforts to only include software and documentation that allows free + distribution. Nonetheless, if a manual that does not allow to be + redistributed has been inadvertently included in our index, please let me + know and I will have it removed as soon as possible. +
+ _ + } } +}; + + +# Very simple (and fast) prefix match. +sub search_man { + my($q, $limit) = @_; + + my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : ''; + my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : ''; + + return !$name ? [] : tuwf->dbAll( + 'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?', + { + 'lower(name) LIKE ?' => escape_like(lc $name).'%', + $sect ? ('section ILIKE ?' => escape_like(lc $sect).'%') : (), + }, + $limit + ); +} + + +TUWF::get '/browse/search' => sub { + my $q = tuwf->reqGet('q')||''; + my $man = search_man $q, 150; + return tuwf->resRedirect("/$man->[0]{name}.$man->[0]{section}", 'temp') if @$man == 1; + + framework_ title => 'Search results for '.$q, sub { + h1_ 'Search results for '.$q; + # Package search would also be useful. + p_ 'Note: This is just a simple case-insensitive prefix match on the man names. In the future we\'ll have more powerful search functionality. Hopefully.'; + if(@$man) { + ul_ id => 'searchres', sub { + li_ sub { + a_ href => "/$_->{name}.$_->{section}", $_->{name}; + i_ " $_->{section}"; + } for @$man; + } + } else { + br_; br_; + b_ 'No results :-('; + } + }; +}; + + +TUWF::get '/xml/search.xml' => sub { + my $q = tuwf->reqGet('q')||''; + my $man = search_man $q, 20; + + tuwf->resHeader('Content-Type' => 'text/xml; charset=UTF-8'); + xml; + tag 'results', sub { + tag 'item', id => "$_->{name}.$_->{section}", %$_, undef for @$man; + }; +}; + + +TUWF::get qr{/([^/]+)/([0-9a-f]{8})/src} => sub { + my $name = normalize_name tuwf->capture(1); + my $hash = tuwf->capture(2); + + my $nfo = tuwf->dbRowi(' + SELECT m.name, m.section, v.released, c.content + FROM man m + JOIN package_versions v ON v.id = m.package + JOIN contents c ON c.hash = m.hash + WHERE m.name =', \$name, 'AND', sql_hash_prefix('m.hash', $hash), ' + LIMIT 1' + ); + return tuwf->resNotFound if !$nfo->{name}; + + tuwf->resLastMod($nfo->{released}); + tuwf->resHeader('Content-Type', 'text/plain; charset=UTF-8'); + tuwf->resHeader('Content-Disposition', sprintf 'filename="%s.%s"', $nfo->{name}, $nfo->{section}); + lit $nfo->{content}; }; sub _man_nav { - my($self, $man, $toc) = @_; + my($man, $toc) = @_; - my @sect = $self->dbManSections($man->{name}); - my @lang = $self->dbManLanguages($man->{name}, $man->{section}); - return if !@sect && !@lang && !@$toc; + my @sect = map $_->{section}, tuwf->dbAlli( + 'SELECT DISTINCT section FROM man WHERE name =', \$man->{name}, 'ORDER BY section' + )->@*; - # TODO: This is ugly, especially because clicking on a translation or - # section, you can end up with a man page that is nowhere close to the man - # page you're currently reading. Opening a version selector box might be a - # better alternative. + my @lang = map $_->{lang}, tuwf->dbAlli( + "SELECT DISTINCT substring(locale from '^[^.]+') AS lang + FROM man WHERE name =", \$man->{name}, 'AND section =', \$man->{section}, " + ORDER BY substring(locale from '^[^.]+') NULLS FIRST" + )->@*; + return if !@sect && !@lang && !@$toc; - div id => 'nav'; - - if(@sect > 1) { - b 'Sections'; - p; - for (@sect) { - if($man->{section} eq $_) { - i $_; - } else { - a href => "/$man->{name}.$_", $_; + # TODO: This is ugly, especially because clicking on a translation or + # section, you can end up with a man page that is nowhere close to the man + # page you're currently reading. Opening a version selector box might be a + # better alternative. + div_ id => 'nav', sub { + if(@sect > 1) { + b_ 'Sections'; + p_ sub { + for (@sect) { + if($man->{section} eq $_) { + i_ $_; + } else { + a_ href => "/$man->{name}.$_", $_; + } + txt_ ' '; + } + } } - txt ' '; - } - end; - } - if(@lang > 1) { - b 'Languages'; - p; - (my $cur = $man->{locale}||'') =~ s/\..*//; - for (@lang) { - if(($_||'') eq $cur) { - i $_ || 'default'; - } else { - a href => $_ ? "/lang/$_/$man->{name}.$man->{section}" : "/$man->{name}.$man->{section}", $_ || 'default'; + if(@lang > 1) { + b_ 'Languages'; + p_ sub { + (my $cur = $man->{locale}||'') =~ s/\..*//; + for (@lang) { + if(($_||'') eq $cur) { + i_ $_ || 'default'; + } else { + a_ href => $_ ? "/lang/$_/$man->{name}.$man->{section}" : "/$man->{name}.$man->{section}", $_ || 'default'; + } + txt_ ' '; + } + } } - txt ' '; - } - end; - } - if(@$toc > 1) { - b 'Table of Contents'; - ul; - for (0..$#$toc) { - li; - a href => sprintf('#head%d', $_+1), lc $toc->[$_]; - end; - } - end; - } - end; -} - - -sub _normalizename { - local $_ = shift; - # Firefox seems to escape [ and ] in URLs. It doesn't really have to... - s/%5b/[/ig; - s/%5d/]/ig; - # Man pages with spaces in the path, eww - s/%20/ /g; - $_; + if(@$toc > 1) { + b_ 'Table of Contents'; + ul_ sub { + for (0..$#$toc) { + li_ sub { + a_ href => sprintf('#head%d', $_+1), lc $toc->[$_]; + } + } + } + } + } } # Replace .so's in man source with the contents (if available in the same # package) or with a reference to the other man page. sub soelim { - my($self, $verid, $src) = @_; + my($verid, $src) = @_; - # tix comes with[1] a custom(?) macro package. But it looks okay even without - # loading that. - # [1] It actually doesn't, the tcllib package appears to have that file, but - # doesn't '.so' it. - $src =~ s/^\.so man.macros$//mg; + # tix comes with* a custom(?) macro package. But it looks okay even without loading that. + # (* It actually doesn't, the tcllib package appears to have that file, but doesn't '.so' it) + $src =~ s/^\.so man.macros$//mg; + + # Other .so's should be handled by html() + $src =~ s{^\.so (.+)$}{ + my $path = $1; + my $name = (reverse split /\//, $path)[0]; + my($man) = $verid ? man_pref_name $name, sql 'v.id =', \$verid : (); + $man->{name} + # Recursive soelim, but the second call gets $verid=0 so we don't keep checking the database + ? soelim(0, tuwf->dbVali("SELECT content FROM contents WHERE hash = decode(", \$man->{hash}, ", 'hex')")) + : ".in -10\n.sp\n\[\[\[MANNEDINCLUDE$path\]\]\]" + }emg; + $src; +} + + +# This one has to go before the other mappings, to ensure that links work for +# man pages called 'pkg' or 'man'. This also means that we can't have a +# system named 8 hex digits, but at least that's easy to guarantee. :) +TUWF::get qr{/(?