Rewrite to static site
With a complete reorganisation of the directory structure and most of the content converted to pandoc-flavoured markdown. Some TODO's left before this can go live: - Main page - Atom feeds - Bug tracker
This commit is contained in:
parent
5c85a7d32f
commit
6242b2ee9c
291 changed files with 4346 additions and 6141 deletions
89
.gitignore
vendored
89
.gitignore
vendored
|
|
@ -1,3 +1,88 @@
|
|||
*.gz
|
||||
*.zip
|
||||
download/doc
|
||||
*.gz
|
||||
*.pdf
|
||||
dat/globster/api.md
|
||||
dat/globster/api.pod
|
||||
dat/globster/ctl.md
|
||||
dat/globster/ctl.pod
|
||||
dat/globster/daemon.md
|
||||
dat/globster/daemon.pod
|
||||
dat/globster/launch.md
|
||||
dat/globster/launch.pod
|
||||
dat/ncdc/changes.log
|
||||
dat/ncdc/changes.md
|
||||
dat/ncdc/man.md
|
||||
dat/ncdc/man.pod
|
||||
dat/ncdu/changes.log
|
||||
dat/ncdu/changes.md
|
||||
dat/ncdu/man.md
|
||||
dat/ncdu/man.pod
|
||||
dat/nginx-confgen/changes.log
|
||||
dat/nginx-confgen/changes.md
|
||||
dat/nginx-confgen/man.md
|
||||
dat/nginx-confgen/man.pod
|
||||
dat/tuwf/changes.log
|
||||
dat/tuwf/changes.md
|
||||
dat/tuwf/man.md
|
||||
dat/tuwf/man.pod
|
||||
dat/tuwf/man/db.md
|
||||
dat/tuwf/man/db.pod
|
||||
dat/tuwf/man/intro.md
|
||||
dat/tuwf/man/intro.pod
|
||||
dat/tuwf/man/misc.md
|
||||
dat/tuwf/man/misc.pod
|
||||
dat/tuwf/man/request.md
|
||||
dat/tuwf/man/request.pod
|
||||
dat/tuwf/man/response.md
|
||||
dat/tuwf/man/response.pod
|
||||
dat/tuwf/man/validate.md
|
||||
dat/tuwf/man/validate.pod
|
||||
dat/tuwf/man/xml.md
|
||||
dat/tuwf/man/xml.pod
|
||||
dat/ylib.md
|
||||
dat/ylib.pod
|
||||
pub/doc.html
|
||||
pub/doc/commvis.html
|
||||
pub/doc/dcstats.html
|
||||
pub/doc/easyipc.html
|
||||
pub/doc/funcweb.html
|
||||
pub/doc/sqlaccess.html
|
||||
pub/dump.html
|
||||
pub/dump/awshrink.html
|
||||
pub/dump/btrfssize.html
|
||||
pub/dump/demo.html
|
||||
pub/dump/grenamr.html
|
||||
pub/dump/insbench.html
|
||||
pub/dump/nccolour.html
|
||||
pub/globster.html
|
||||
pub/globster/api.html
|
||||
pub/globster/ctl.html
|
||||
pub/globster/daemon.html
|
||||
pub/globster/launch.html
|
||||
pub/ncdc.html
|
||||
pub/ncdc/changes.html
|
||||
pub/ncdc/faq.html
|
||||
pub/ncdc/install.html
|
||||
pub/ncdc/man.html
|
||||
pub/ncdc/scr.html
|
||||
pub/ncdu.html
|
||||
pub/ncdu/changes.html
|
||||
pub/ncdu/jsonfmt.html
|
||||
pub/ncdu/man.html
|
||||
pub/ncdu/scr.html
|
||||
pub/nginx-confgen.html
|
||||
pub/nginx-confgen/changes.html
|
||||
pub/nginx-confgen/man.html
|
||||
pub/tuwf.html
|
||||
pub/tuwf/changes.html
|
||||
pub/tuwf/man.html
|
||||
pub/tuwf/man/db.html
|
||||
pub/tuwf/man/intro.html
|
||||
pub/tuwf/man/misc.html
|
||||
pub/tuwf/man/request.html
|
||||
pub/tuwf/man/response.html
|
||||
pub/tuwf/man/validate.html
|
||||
pub/tuwf/man/xml.html
|
||||
pub/ylib.html
|
||||
pub/yxml.html
|
||||
pub/yxml/man.html
|
||||
|
|
|
|||
139
Makefile
Normal file
139
Makefile
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
# List of all input files. Each file is converted into a .html file at the same path.
|
||||
#
|
||||
# The format of each line is: $path $URL $title
|
||||
#
|
||||
# If no $URL is given or the $URL is '-', then the input file is assumed to be
|
||||
# in dat/, otherwise it will be fetched from $URL.
|
||||
#
|
||||
# A $title should be given for .pod and .log files, it is ignored for .md files
|
||||
# because those already have a title embedded in the file.
|
||||
#
|
||||
# Supported file types:
|
||||
# .md: Converted directly into .html with pandoc.
|
||||
# .pod: Perl's Plain Old Documentation, converted through HTML into a .md
|
||||
# file which is then converted into .html again with the proper template.
|
||||
# .log: A ChangeLog-formatted file, converted through .md into .html.
|
||||
PAGES=\
|
||||
"doc.md"\
|
||||
"doc/commvis.md"\
|
||||
"doc/dcstats.md"\
|
||||
"doc/easyipc.md"\
|
||||
"doc/funcweb.md"\
|
||||
"doc/sqlaccess.md"\
|
||||
"dump.md"\
|
||||
"dump/awshrink.md"\
|
||||
"dump/btrfssize.md"\
|
||||
"dump/demo.md"\
|
||||
"dump/grenamr.md"\
|
||||
"dump/insbench.md"\
|
||||
"dump/nccolour.md"\
|
||||
"globster.md"\
|
||||
"globster/api.pod https://g.blicky.net/globster.git/plain/doc/api.pod The Globster D-Bus API"\
|
||||
"globster/ctl.pod https://g.blicky.net/globster.git/plain/doc/globsterctl.pod The globsterctl(1) Man Page"\
|
||||
"globster/daemon.pod https://g.blicky.net/globster.git/plain/doc/globster.pod The globster(1) Man Page"\
|
||||
"globster/launch.pod https://g.blicky.net/globster.git/plain/doc/globster-launch.pod The globster-launch(1) Man Page"\
|
||||
"ncdc.md"\
|
||||
"ncdc/changes.log https://g.blicky.net/ncdc.git/plain/ChangeLog Ncdc Release History"\
|
||||
"ncdc/faq.md"\
|
||||
"ncdc/install.md"\
|
||||
"ncdc/man.pod - Ncdc Manual"\
|
||||
"ncdc/scr.md"\
|
||||
"ncdu.md"\
|
||||
"ncdu/changes.log https://g.blicky.net/ncdu.git/plain/ChangeLog Ncdu Release History"\
|
||||
"ncdu/jsonfmt.md"\
|
||||
"ncdu/man.pod https://g.blicky.net/ncdu.git/plain/doc/ncdu.pod Ncdu Manual"\
|
||||
"ncdu/scr.md"\
|
||||
"nginx-confgen.md"\
|
||||
"nginx-confgen/changes.log https://g.blicky.net/nginx-confgen.git/plain/ChangeLog Nginx-confgen Release History"\
|
||||
"nginx-confgen/man.pod https://g.blicky.net/nginx-confgen.git/plain/nginx-confgen.pod The nginx-confgen(1) Man Page"\
|
||||
"tuwf.md"\
|
||||
"tuwf/changes.log https://g.blicky.net/tuwf.git/plain/ChangeLog TUWF Release History"\
|
||||
"tuwf/man.pod https://g.blicky.net/tuwf.git/plain/lib/TUWF.pod TUWF Documentation"\
|
||||
"tuwf/man/db.pod https://g.blicky.net/tuwf.git/plain/lib/TUWF/DB.pod TUWF::DB Documentation"\
|
||||
"tuwf/man/intro.pod https://g.blicky.net/tuwf.git/plain/lib/TUWF/Intro.pod TUWF::Intro Documentation"\
|
||||
"tuwf/man/misc.pod https://g.blicky.net/tuwf.git/plain/lib/TUWF/Misc.pod TUWF::Misc Documentation"\
|
||||
"tuwf/man/request.pod https://g.blicky.net/tuwf.git/plain/lib/TUWF/Request.pod TUWF::Request Documentation"\
|
||||
"tuwf/man/response.pod https://g.blicky.net/tuwf.git/plain/lib/TUWF/Response.pod TUWF::Response Documentation"\
|
||||
"tuwf/man/validate.pod https://g.blicky.net/tuwf.git/plain/lib/TUWF/Validate.pod TUWF::Validate Documentation"\
|
||||
"tuwf/man/xml.pod https://g.blicky.net/tuwf.git/plain/lib/TUWF/XML.pod TUWF::XML Documentation"\
|
||||
"ylib.pod https://g.blicky.net/ylib.git/plain/README.pod Ylib"\
|
||||
"yxml.md"\
|
||||
"yxml/man.md"
|
||||
|
||||
|
||||
|
||||
# Files we need to download
|
||||
FETCH := $(shell for i in ${PAGES}; do echo "$$i" | grep -Eo '^[^ ]+ +[^-][^ ]+' | sed -E 's/^([^ ]+).*/dat\/\1/'; done)
|
||||
|
||||
# List of generated .html files
|
||||
HTML_OUT := $(shell for i in ${PAGES}; do echo "$$i" | sed -E 's/^([^ ]+)\.[^\. ]+.*$$/pub\/\1.html/'; done)
|
||||
|
||||
# List of .md files generated from .pod files
|
||||
POD_MD := $(shell for i in ${PAGES}; do echo "$$i" | grep -Eo '^[^ ]+\.pod' | sed -E 's/(.+)\.pod$$/dat\/\1.md/'; done)
|
||||
|
||||
# List of .md files generated from .log files
|
||||
CHANGES_MD := $(shell for i in ${PAGES}; do echo "$$i" | grep -Eo '^[^ ]+\.log' | sed -E 's/(.+)\.log$$/dat\/\1.md/'; done)
|
||||
|
||||
# All fetched & generated files
|
||||
CLEAN := ${FETCH} ${POD_MD} ${CHANGES_MD} ${HTML_OUT}
|
||||
|
||||
|
||||
.PHONY: all clean
|
||||
|
||||
all: .gitignore ${HTML_OUT}
|
||||
|
||||
|
||||
${FETCH}: dat/%:
|
||||
@echo "FETCH $*"
|
||||
@mkdir -p $$(dirname "$@")
|
||||
@curl -s ${shell for i in ${PAGES}; do case "$$i" in "$* "*) echo "$$i" | awk '{print$$2}';; esac; done} -o "$@"
|
||||
|
||||
|
||||
# There is a 'pod2markdown' program, but going through HTML with a little bit
|
||||
# of Perl magic tends to give better results, if only because definition lists
|
||||
# are properly converted this way and I have more control over links.
|
||||
${POD_MD}: dat/%.md: dat/%.pod mkpod.pl
|
||||
@echo "POD $*"
|
||||
@cat "$<" | ./mkpod.pl |\
|
||||
pandoc -f html -t markdown -s -o "$@" \
|
||||
--metadata title="${shell for i in ${PAGES}; do case "$$i" in "$*.pod "*) echo "$$i" | sed -E 's/[^ ]+ +[^ ]+ +//';; esac; done}"
|
||||
@rm -f pod2htmd.tmp pod2html.tmp
|
||||
|
||||
|
||||
${CHANGES_MD}: dat/%.md: dat/%.log mkchangelog.pl
|
||||
@echo "MD $*"
|
||||
@./mkchangelog.pl "$*" "${shell for i in ${PAGES}; do case "$$i" in "$*.log "*) echo "$$i" | sed -E 's/[^ ]+ +[^ ]+ +//';; esac; done}" <"$<" >"$@"
|
||||
|
||||
|
||||
${HTML_OUT}: pub/%.html: dat/%.md template.html
|
||||
@echo "HTML $*"
|
||||
@mkdir -p $$(dirname "$@")
|
||||
@cat "$<" |\
|
||||
perl -pe 's{\[dllink ([^ \]]+)\]}{<a href="/download/$$1">$$1</a><b class="sig"><a href="/download/$$1.asc">pgp</a>-<a href="/download/$$1.sha1">sha1</a>-<a href="/download/$$1.md5">md5</a></b>}' |\
|
||||
pandoc -f markdown -t html5 --template template.html \
|
||||
--metadata path1=$$(echo "$*" | sed 's/\/.*//') \
|
||||
--metadata path2=$$(echo "$*" | sed 's/\//-/' | sed 's/\/.*//') \
|
||||
--metadata path3=$$(echo "$*" | sed 's/\//-/g') \
|
||||
--variable menu-$$(case "$*" in\
|
||||
globster*) echo "globster";;\
|
||||
ncdc*) echo "ncdc";;\
|
||||
ncdu*) echo "ncdu";;\
|
||||
nginx-confgen*) echo "nginx-confgen";;\
|
||||
tuwf*) echo "tuwf";;\
|
||||
yxml*) echo "yxml";;\
|
||||
*) echo "main";;\
|
||||
esac)\
|
||||
-o "$@"
|
||||
|
||||
|
||||
.gitignore: Makefile
|
||||
@echo "GIT"
|
||||
@echo '*.zip' >$@
|
||||
@echo '*.gz' >>$@
|
||||
@echo '*.pdf' >>$@
|
||||
@for i in ${CLEAN}; do echo "$$i"; done | sort >>$@
|
||||
|
||||
|
||||
clean:
|
||||
rm -rf ${CLEAN}
|
||||
find dat pub -type d -empty -print -delete
|
||||
16
README.md
Normal file
16
README.md
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# Requirements
|
||||
|
||||
Build-time:
|
||||
|
||||
- GNU Make
|
||||
- curl
|
||||
- Perl (+ Pod::Simple)
|
||||
- pandoc
|
||||
|
||||
Run-time (for the issue tracker):
|
||||
|
||||
- Apache
|
||||
- Perl
|
||||
- TUWF
|
||||
- DBI
|
||||
- DBD::SQLite
|
||||
60
dat/doc
60
dat/doc
|
|
@ -1,60 +0,0 @@
|
|||
=pod
|
||||
|
||||
I don't often write stuff. Certainly not enough to warrant a blog. But
|
||||
sometimes I do feel the need to write down my thoughts. The results of those
|
||||
rare occasions are published on this page.
|
||||
|
||||
=head2 Articles That May As Well Be Considered Blog Posts
|
||||
|
||||
=over
|
||||
|
||||
=item C<2017-05-28 > - L<An Opinionated Survey of Functional Web Development|https://dev.yorhel.nl/doc/funcweb>
|
||||
|
||||
The title says it all.
|
||||
|
||||
=item C<2014-07-29 > - L<The Sorry State of Convenient IPC|https://dev.yorhel.nl/doc/easyipc>
|
||||
|
||||
A long rant about IPC systems.
|
||||
|
||||
=item C<2014-01-09 > - L<Some Measurements on Direct Connect File Lists|https://dev.yorhel.nl/doc/dcstats>
|
||||
|
||||
A short measurement study on the file lists obtained from a Direct Connect hub.
|
||||
Lots of graphs!
|
||||
|
||||
=item C<2012-02-15 > - L<A Distributed Communication System for Modular Applications|https://dev.yorhel.nl/doc/commvis>
|
||||
|
||||
In this article I explain a vision of mine, and the results of a small research
|
||||
project aimed at realizing that vision.
|
||||
|
||||
=item C<2011-11-26 > - L<Multi-threaded Access to an SQLite3 Database|https://dev.yorhel.nl/doc/sqlaccess>
|
||||
|
||||
So you have a single database and some threads. How do you combine these in a
|
||||
program?
|
||||
|
||||
=back
|
||||
|
||||
=head2 Longer Reports
|
||||
|
||||
=over
|
||||
|
||||
=item C<2014-06-10 > - L<Biased Random Periodic Switching in Direct Connect|https://dev.yorhel.nl/download/doc/brpsdc.pdf> (PDF)
|
||||
|
||||
My masters thesis.
|
||||
|
||||
=item C<2013-04-05 > - L<Peer Selection in Direct Connect|https://dev.yorhel.nl/download/doc/psdc.pdf> (PDF)
|
||||
|
||||
The rather long-ish literature study that precluded my masters thesis.
|
||||
|
||||
=item C<2010-06-02 > - L<Design and implementation of a compressed linked list library|https://dev.yorhel.nl/download/doc/compll.pdf> (PDF)
|
||||
|
||||
The report for the final project of my professional (HBO) bachelor of
|
||||
Electrical Engineering. I was very liberal with some terminology in this
|
||||
report. For example, "linked lists" aren't what you think they are, and I
|
||||
didn't even use the term "locality of reference" where I really should have. It
|
||||
was also written for an audience with little knowledge on the subject, so I
|
||||
elaborated on a lot of things that should be obvious for most people in the
|
||||
field. Then there is a lot of uninteresting overhead about the project itself,
|
||||
which just happened to be mandatory for this report. Nonetheless, if you can
|
||||
ignore these faults it's not such a bad read, if I may say so myself. :-)
|
||||
|
||||
=back
|
||||
683
dat/doc-easyipc
683
dat/doc-easyipc
|
|
@ -1,683 +0,0 @@
|
|||
=pod
|
||||
|
||||
(Published on B<2014-07-29>.)
|
||||
|
||||
=head1 The Problem
|
||||
|
||||
How do you implement communication between two or more processes? This is a
|
||||
question that has been haunting me for at least 6 years now. Of course, this
|
||||
question is very broad and has many possible answers, depending on your
|
||||
scenario. So let me get more specific by describing the problem I want to
|
||||
solve.
|
||||
|
||||
What I want is to write a daemon process that runs in the background and can be
|
||||
controlled from other programs or libraries. The intention is that people can
|
||||
easily write custom interfaces or quick scripts to control the daemon. The
|
||||
service that the daemon offers over this communication channel can be thought
|
||||
of as its primary API, in this way you can think of the daemon as a persistent
|
||||
programming library. This concept is similar to existing programs such as
|
||||
L<btpd|https://github.com/btpd/btpd>, L<MPD|http://www.musicpd.org/>,
|
||||
L<Transmission|https://www.transmissionbt.com/> and
|
||||
L<Telepathy|http://telepathy.freedesktop.org/wiki/> - I'll get back to these
|
||||
later.
|
||||
|
||||
More specifically, the most recent project I've been working on that follows
|
||||
this pattern is L<Globster|https://dev.yorhel.nl/globster>, a remotely
|
||||
controllable Direct Connect client (if you're not familiar with Direct Connect,
|
||||
think of it as IRC with some additional file sharing capabilities built in).
|
||||
While the problem I describe is not specific to Globster, it still serves as an
|
||||
important use case. I see many other projects with similar IPC requirements.
|
||||
|
||||
The IPC mechanism should support two messaging patterns: Request/response and
|
||||
asynchronous notifications. The request/response pattern is what you typically
|
||||
get in RPC systems - the client requests something of the daemon and the daemon
|
||||
then replies with a response. Asynchronous notifications are useful in allowing
|
||||
the daemon to send asynchronous status updates to the client, such as incoming
|
||||
chat messages or file transfer status. Lack of support for such notifications
|
||||
would mean that a client needs to continuously poll for updates, which is
|
||||
inefficient.
|
||||
|
||||
So what I'm looking for is a high-level IPC mechanism that handles this
|
||||
communication. Solutions are evaluated by the following criteria, in no
|
||||
particular order.
|
||||
|
||||
=over
|
||||
|
||||
=item B<Easy>
|
||||
|
||||
And with I<easy> I refer to I<ease of use>. As mentioned above, other people
|
||||
should be able to write applications and scripts to control the daemon. Not
|
||||
many people are willing to invest days of work just to figure out how to
|
||||
communicate with the daemon.
|
||||
|
||||
=item B<Simple>
|
||||
|
||||
Simplicity refers to the actual protocol and the complexity of the code
|
||||
necessary to implement it. Complex protocols require complex code, and complex
|
||||
code is hard to maintain and will inevitably contain bugs. Note that I<simple>
|
||||
and I<easy> are very different things and often even conflict with each other.
|
||||
|
||||
=item B<Small>
|
||||
|
||||
The IPC implementation shouldn't be too large, and shouldn't depend on huge
|
||||
libraries. If you need several megabytes worth of libraries just to send a few
|
||||
messages over a socket, you're doing it wrong.
|
||||
|
||||
=item B<Language independent>
|
||||
|
||||
Control the daemon with whatever programming language you're familiar with.
|
||||
|
||||
=item B<Networked>
|
||||
|
||||
A good solution should be accessible from both the local system (daemon running
|
||||
on the same machine as the client) and from the network (daemon and client
|
||||
running different machines).
|
||||
|
||||
=item B<Secure>
|
||||
|
||||
There's three parts in having a secure IPC mechanism. One part is to realize
|
||||
that IPC operates at a I<trust boundary>; The daemon can't blindly trust
|
||||
everything the client says and vice versa, so message validation and other
|
||||
mechanisms to prevent DoS or information disclosure on either part are
|
||||
necessary.
|
||||
|
||||
Then there the matter of I<confidentiality>. On a local system, UNIX sockets
|
||||
will provide all the confidentiality you can get, so that's trivial. Networked
|
||||
access, on the other hand, requires some form of transport layer security.
|
||||
|
||||
And finally, we need some form of I<authentication>. There should be some
|
||||
mechanism to prevent just about anyone to connect to the daemon. A
|
||||
coarse-grained solution such as file permissions on a local UNIX socket or a
|
||||
password-based approach for networked access will do just fine for most
|
||||
purposes. Really, just keep it simple.
|
||||
|
||||
=item B<Fast>
|
||||
|
||||
Although performance isn't really a primary goal, the communication between the
|
||||
daemon and the clients shouldn't be too slow or heavyweight. For my purposes,
|
||||
anything that supports about a hundred messages a second on average hardware
|
||||
will do perfectly fine. And that shouldn't be particularly hard to achieve.
|
||||
|
||||
=item B<Proxy support>
|
||||
|
||||
This isn't really a hard requirement either, but it would be nice to allow
|
||||
other processes (say, plugins of the daemon, or clients connecting to the
|
||||
daemon) to export services over the same IPC channel as the main daemon. This
|
||||
is especially useful in implementing a cross-language plugin architecture. But
|
||||
again, not a hard requirement, because even if the IPC mechanism doesn't
|
||||
directly support proxying, it's always possible for the daemon to implement
|
||||
some custom APIs to achieve the same effect. This, however, requires extra work
|
||||
and may not be as elegant as a built-in solution.
|
||||
|
||||
=back
|
||||
|
||||
Now let's discuss some existing solutions...
|
||||
|
||||
|
||||
=head1 Custom Protocol
|
||||
|
||||
Why use an existing IPC mechanism in the first place when all you need is
|
||||
UNIX/TCP sockets? This is the approach taken by
|
||||
L<btpd|https://github.com/btpd/btpd>, L<MPD|http://www.musicpd.org/>
|
||||
(L<protocol spec|http://www.musicpd.org/doc/protocol/index.html>) and older
|
||||
versions of Transmission (see their L<1.2x
|
||||
spec|https://trac.transmissionbt.com/browser/branches/1.2x/doc/ipcproto.txt>).
|
||||
Brpd hasn't taken the time to documented the protocol format, suggesting it's
|
||||
not really intended to be used as a convenient API (other than through their
|
||||
btcli), and Transmission has since changed to a different protocol. I'll mainly
|
||||
focus on MPD here.
|
||||
|
||||
MPD uses a text-based request/response mechanism, where each request is a
|
||||
simple one-line command and a response consists of one or more lines, ending
|
||||
with an C<OK> or C<ACK> line. There's no support for asynchronous
|
||||
notifications, although that could obviously have been implemented, too. Let's
|
||||
grade this protocol...
|
||||
|
||||
=over
|
||||
|
||||
=item B<Easy?> Not really.
|
||||
|
||||
Although MPD has conventions for how messages are formatted, each individual
|
||||
message still requires custom parsing and validation. This can be automated by
|
||||
designing an
|
||||
L<IDL|https://en.wikipedia.org/wiki/Interface_description_language> and
|
||||
accompanying code generator, but writing one specific for a single project
|
||||
doesn't seem like a particularly fun task.
|
||||
|
||||
The protocol, despite its apparent simplicity, is apparently painful enough to
|
||||
use that there is a special I<libmpdclient> library to abstract away the
|
||||
communication with MPD, and interfaces to this library are available in many
|
||||
programming languages. If you have access to such an application-specific
|
||||
library for your language of choice, then sure, using the IPC mechanism is easy
|
||||
enough. But that applies to literally any IPC mechanism.
|
||||
|
||||
Ideally, such a library needs to be written only once for the IPC mechanism in
|
||||
use, and after that no additional code is needed to communicate with
|
||||
services/daemons using that particular IPC mechanism. Code re-use among
|
||||
different projects is great, yo. It also doesn't scale very well when extending
|
||||
the services offered by daemon, any addition to the API will require
|
||||
modifications to all implementations.
|
||||
|
||||
=item B<Simple?> Definitely.
|
||||
|
||||
I only needed a quick glance at the MPD protocol reference and I was able to
|
||||
play a bit with telnet and control my MPD. Writing an implementation doesn't
|
||||
seem like a complex task. Of course, this doesn't necessarily apply to all
|
||||
custom protocols, but you can make it as simple or complex as you want it to
|
||||
be.
|
||||
|
||||
=item B<Small?> Sure.
|
||||
|
||||
This obviously depends on how elaborate you design your protocol. If you have a
|
||||
large or complex API, the size of a generic message parser and validator can
|
||||
easily compensate for the custom parser and validator needed for each custom
|
||||
message. But for a simple APIs, it's hard to beat a custom protocol in terms of
|
||||
size.
|
||||
|
||||
=item B<Language independent?> Depends.
|
||||
|
||||
Of course, a socket library is available to most programming languages, and in
|
||||
that sense any IPC mechanism built on sockets is language independent. This is,
|
||||
as such, more of an argument as to how convenient it is to communicate with the
|
||||
protocol directly rather than with a library that abstracts the protocol away.
|
||||
In the case of MPD, the text-based protocol seems easy enough to use directly
|
||||
from most languages, yet for some reason most people prefer language-specific
|
||||
libraries for MPD.
|
||||
|
||||
If you design a binary protocol or anything more complex than simple
|
||||
request/response message types, using your protocol directly is going to be a
|
||||
pain in certain languages, and people will definitely want a library specific
|
||||
to your daemon for their favourite programming language. Something you'll want
|
||||
to avoid, I suppose.
|
||||
|
||||
=item B<Networked?> Sure enough.
|
||||
|
||||
Just a switch between UNIX sockets and TCP sockets. Whether a simple solution
|
||||
like that is a good idea, however, depends on the next point...
|
||||
|
||||
=item B<Secure?> Ugh.
|
||||
|
||||
Security is hard to get right, so having an existing infrastructure that takes
|
||||
care of most security sensitive features will help a lot. Implementing your own
|
||||
protocol means that you also have to implement your own security, to some
|
||||
extent at least.
|
||||
|
||||
Writing code to parse and validate custom messages is error-prone, and a bug in
|
||||
this code could make both the daemon and the client vulnerable to crashes and
|
||||
buffer overflows. A statically-typed abstraction that handles parsing and
|
||||
validation would help a lot.
|
||||
|
||||
For networked communication, you'll need some form of confidentiality. MPD does
|
||||
not seem to support this, so any networked access to an MPD server is
|
||||
vulnerable to passive observers and MITM attacks. This may be fine for a local
|
||||
network (presumably what it is intended to be used for), but certainly doesn't
|
||||
work for exposing your MPD control interface to the wider internet. Existing
|
||||
protocols such as TLS or SSH can be used to create a secure channel, but these
|
||||
libraries tend to be large and hard to use securely. This is especially true
|
||||
for TLS, but at least there's L<stunnel|https://www.stunnel.org/> to simplify
|
||||
the implementation - at the cost of less convenient deployment.
|
||||
|
||||
In terms of authentication, you again need to implement this yourself. MPD
|
||||
supports authentication using a plain-text password. This is fine for a trusted
|
||||
network, but on an untrusted network you certainly want confidentiality to
|
||||
prevent a random observer from reading your password.
|
||||
|
||||
=item B<Fast?> Sure.
|
||||
|
||||
Existing protocols may have put more effort into profiling and implementing
|
||||
various optimizations than one would typically do with a custom and
|
||||
quickly-hacked-together protocol, but still, it probably takes effort to design
|
||||
a protocol that isn't fast enough.
|
||||
|
||||
=item B<Proxy support?> Depends...
|
||||
|
||||
Really depends on how elaborate you want to be. It can be very simple if all
|
||||
you want is to route some messages, it can get very complex if you want to
|
||||
ensure that these messages follow some format or if you want to reserve certain
|
||||
interfaces or namespaces to certain clients. What surprised me about the MPD
|
||||
protocol is that it actually has L<some support for
|
||||
proxying|http://www.musicpd.org/doc/protocol/ch03s11.html>. But considering the
|
||||
ad-hoc nature of the MPD protocol, the primitiveness and simplicity of this
|
||||
proxy support wasn't too surprising. Gets the job done, I suppose.
|
||||
|
||||
=back
|
||||
|
||||
Overall, and as a rather obvious conclusion, a custom protocol really is what
|
||||
you make of it. In general, though, it's a lot of work, not always easy to use,
|
||||
and a challenge to get the security part right.
|
||||
|
||||
|
||||
|
||||
=head1 D-Bus
|
||||
|
||||
D-Bus is being used in L<Transmission|https://www.transmissionbt.com/> and is
|
||||
what I used for L<Globster|https://dev.yorhel.nl/globster>.
|
||||
|
||||
On a quick glance, D-Bus looks I<perfect>. It is high-level, has the messaging
|
||||
patterns I described, the L<protocol
|
||||
specification|http://dbus.freedesktop.org/doc/dbus-specification.html> does not
|
||||
seem I<overly> complex (though certainly could be simplified), it has
|
||||
implementations for a number of programming languages, has support for
|
||||
networking, proxying is part of normal operation, and it seems fast enough for
|
||||
most purposes. When you actually give it a closer look, however, reality isn't
|
||||
as rose-colored.
|
||||
|
||||
D-Bus is designed for two very specific use-cases. One is to allow local
|
||||
applications to securely interact with system-level daemons such as
|
||||
L<HAL|https://en.wikipedia.org/wiki/HAL_(software)> (now long dead) and
|
||||
L<systemd|http://freedesktop.org/wiki/Software/systemd/>, and the other
|
||||
use-case is to allow communication between different applications inside one
|
||||
login session. As such, on a typical Linux system there are two D-Bus daemons
|
||||
where applications can export interfaces and where messages can be routed
|
||||
through. These are called the I<system bus> and the I<session bus>.
|
||||
|
||||
=over
|
||||
|
||||
=item B<Easy?> Almost.
|
||||
|
||||
The basic ideas behind D-Bus seem easy enough to use. The fact that is has
|
||||
type-safe messages, interface descriptions and introspection really help in
|
||||
making D-Bus a convenient IPC mechanism.
|
||||
|
||||
The main reasons why I think D-Bus isn't all that easy to use in practice is
|
||||
due to the lack of good introductionary documentation and the crappy state of
|
||||
the various D-Bus implementations. There is a L<fairly good
|
||||
article|https://pythonhosted.org/txdbus/dbus_overview.html> providing a
|
||||
high-level overview to D-Bus, but there isn't a lot of material that covers how
|
||||
to actually use D-Bus to interact with applications or to implement a service.
|
||||
|
||||
On the implementations, I have had rather bad experiences with the actual
|
||||
libraries. I've personally used the official libdbus-1, which markets itself a
|
||||
"low-level" library designed to facilitate writing bindings for other
|
||||
languages. In practice, the functionality that it offers appears to be too
|
||||
high-level for writing bindings (L<GDBus|https://developer.gnome.org/glib/>
|
||||
doesn't use it for this reason), and it is indeed missing a lot of
|
||||
functionality to make it convenient to use directly. I've also played around
|
||||
with Perl's L<Net::DBus|http://search.cpan.org/perldoc?Net%3A%3ADBus> and was
|
||||
highly disappointed. Not only is the documentation rather incomplete, the
|
||||
actual implementation has more bugs than features. And instead of building on
|
||||
top of one of the many good event loops for Perl (such as
|
||||
L<AnyEvent|http://search.cpan.org/perldoc?AnyEvent>), it chooses to implement
|
||||
L<its own event
|
||||
loop|http://search.cpan.org/perldoc?Net%3A%3ADBus%3A%3AReactor>. The existence
|
||||
of several different libraries for Python doesn't incite much confidence,
|
||||
either.
|
||||
|
||||
I was also disappointed in terms of the available tooling to help in the
|
||||
development, testing and debugging of services. The L<gdbus(1)> tool is useful
|
||||
for monitoring messages and scripting some things, but is not all that
|
||||
convenient because D-Bus has too many namespaces and the terrible Java-like
|
||||
naming conventions make typing everything out a rather painful experience.
|
||||
L<D-Feet|http://live.gnome.org/DFeet/> offers a great way to explore services,
|
||||
but lacks functionality for quick debugging sessions. I L<made an
|
||||
attempt|http://g.blicky.net/dbush.git/> to write a convenient command-line
|
||||
shell, but lost interest halfway. :-(
|
||||
|
||||
D-Bus has the potential to be an easy and convenient IPC mechanism, but the
|
||||
lack of any centralized organization to offer good implementations,
|
||||
documentation and tooling makes using D-Bus a pain to use.
|
||||
|
||||
=item B<Simple?> Not quite.
|
||||
|
||||
D-Bus is conceptually easy and the message protocol is alright, too. Some
|
||||
aspects of D-Bus, however, are rather more complex than they need to be.
|
||||
|
||||
I have once made an attempt to fully understand how D-Bus discovers and
|
||||
connects to the session bus, but I gave up halfway because there are too many
|
||||
special cases. To quickly summarize what I found, there's the
|
||||
C<DBUS_SESSION_BUS_ADDRESS> environment variable which could point to the
|
||||
(filesystem or abstract) path of a UNIX socket or a TCP address. If that
|
||||
variable isn't set, D-Bus will try to connect to your X server and get the
|
||||
address from that. In order to avoid linking everything against X libraries, a
|
||||
separate L<dbus-launch> utility is spawned instead. Then the bus address could
|
||||
also be obtained from a file in your C<$HOME/.dbus/> directory, with added
|
||||
complexity to still support a different session bus for each X session. I've no
|
||||
idea how exactly connection initiation to the system bus works, but my
|
||||
impression is that a bunch of special cases exist there, too, depending on
|
||||
which init system your OS happens to use.
|
||||
|
||||
As if all the options in connection initiation aren't annoying enough, there's
|
||||
also work on L<kdbus|https://lwn.net/Articles/580194/>, a Linux kernel
|
||||
implementation to get better performance. Not only will kdbus use a different
|
||||
underlying communication mechanism, it will also switch to a completely
|
||||
different serialization format. If/when this becomes widespread you will have
|
||||
to implement and support two completely different protocols and pray that your
|
||||
application works with both.
|
||||
|
||||
On the design aspect there is, in my opinion, needless complexity with regards
|
||||
to naming and namespaces. First there is a global namespace for I<bus names>,
|
||||
which are probably better called I<application names>, because that's usually
|
||||
what they represent. Then, there is a separate I<object> namespace local to
|
||||
each bus name. Each object has methods and properties, and these are
|
||||
associated with an I<interface name>, in a namespace specific to the particular
|
||||
object. Despite these different namespaces, the convention is to use a full and
|
||||
globally unique path for everything that has a name. For example, to list the
|
||||
IM protocols that Telepathy supports, you call the C<ListProtocols> method in
|
||||
the C<org.freedesktop.Telepathy.ConnectionManager> interface on the
|
||||
C</org/freedesktop/Telepathy/ConnectionManager> object at the
|
||||
C<org.freedesktop.Telepathy> bus. Fun times indeed. I can understand the
|
||||
reasoning behind most of these choices, but in my opinion they found the wrong
|
||||
trade-off.
|
||||
|
||||
Another point of complexity that annoys me is the fact that an XML format is
|
||||
used to describe interfaces. Supporting XML as an IDL format is alright, but
|
||||
requiring a separate format for an introspection interface gives me the
|
||||
impression that the message format wasn't powerful enough for such a simple
|
||||
purpose. The direct effect of this is that any application wishing to use
|
||||
introspection data will have to link against an XML parser, and almost all
|
||||
conforming XML parser implementations are as large as the D-Bus implementation
|
||||
itself.
|
||||
|
||||
=item B<Small?> Kind of.
|
||||
|
||||
C<libdbus-1.so.3.8.6> on my system is about 240 KiB. It doesn't cover parsing
|
||||
interface descriptions or implementing a D-Bus daemon, but still covers most of
|
||||
what is needed to interact with services and to offer services over D-Bus.
|
||||
It's not I<that> small, but then again, libdbus-1 was not really written with
|
||||
small size in mind. There's room for optimization.
|
||||
|
||||
=item B<Language independent?> Sure.
|
||||
|
||||
D-Bus libraries exist for a number of programming languages.
|
||||
|
||||
=item B<Networked?> Half-assed.
|
||||
|
||||
D-Bus I<officially> supports networked connections to a D-Bus daemon. Actually
|
||||
using this, however, is painful. Convincing L<dbus-daemon(1)> to accept
|
||||
connections on a TCP socket involves disabling all authentication (it expects
|
||||
UNIX credential passing, normally) and requires adding an undocumented C<<
|
||||
<allow_anonymous/> >> tag in the configuration (I only figured this out from
|
||||
reading the source code).
|
||||
|
||||
Even when you've gotten that to work, there is the problem that D-Bus isn't
|
||||
totally agnostic to the underlying socket protocol. D-Bus has support for
|
||||
passing UNIX file descriptors over the connection, and this of course doesn't
|
||||
work over TCP. While this feature is optional and easily avoided, some services
|
||||
(I can't find one now) use UNIX fds in order to keep track of processes that
|
||||
listen to a certain event. Obviously, those services can't be accessed over the
|
||||
network.
|
||||
|
||||
=item B<Secure?> Only locally.
|
||||
|
||||
D-Bus has statically typed messages that can be validated automatically, so
|
||||
that's a plus.
|
||||
|
||||
For local authentication, there is support for standard UNIX permissions and
|
||||
credential passing for more fine-grained authorization. For remote
|
||||
authentication, I think there is support for a shared secret cookie, but I
|
||||
haven't tried to use this yet.
|
||||
|
||||
There is, as with MPD, no support at all for confidentiality, so using
|
||||
networked D-Bus over an untrusted network would be a very bad idea anyway.
|
||||
|
||||
=item B<Fast?> Mostly.
|
||||
|
||||
The messaging protocol is fairly lightweight, so no problems there. I do have
|
||||
to mention two potential performance issues, however.
|
||||
|
||||
The first issue is that the normal mode of operation in D-Bus is to proxy all
|
||||
messages through an intermediate D-Bus daemon. This involves extra context
|
||||
switches and message parsing passes in order to get one message from
|
||||
application A to application B. I believe it is I<officially> supported to
|
||||
bypass this daemon and to communicate directly between two processes, but after
|
||||
my experience with networking I am wary of trying anything that isn't part of
|
||||
how D-Bus is I<intended> to be used. This particular performance issue is what
|
||||
kdbus addresses, so I suppose it won't apply to future Linux systems.
|
||||
|
||||
The other issue is that a daemon that provides a service over D-Bus does not
|
||||
know whether there exists an application that is interested receiving its
|
||||
notifications. This means that the daemon always has to spend resources to send
|
||||
out notification messages, even if no application is actually interested in
|
||||
receiving them. In practice this means that the notification mechanism is
|
||||
avoided for events that may occur fairly often, and an equally inefficient
|
||||
polling approach has to be used instead. It is possible for a service provider
|
||||
to keep track of interested applications, but this is not part of the D-Bus
|
||||
protocol and not something you would want to implement for each possible event.
|
||||
I've no idea if kdbus addresses this issue, but it would be stupid not to.
|
||||
|
||||
=item B<Proxy support?> Yup.
|
||||
|
||||
It's part of normal operation, even.
|
||||
|
||||
=back
|
||||
|
||||
D-Bus has many faults, some of them are by design, but many are fixable. I
|
||||
would have contributed to improving the situation, but I get the feeling that
|
||||
the goals of the D-Bus maintainers are not at all aligned with mine. My
|
||||
impression is that the D-Bus maintainers are far too focussed on their own
|
||||
specific needs and care little about projects with slightly different needs.
|
||||
Especially with the introduction of kdbus, I consider D-Bus too complex now to
|
||||
consider it worth the effort to improve. Starting from scratch seems less work.
|
||||
|
||||
|
||||
=head1 JSON/XML RPC
|
||||
|
||||
While I haven't extensively used JSON-RPC or XML-RPC myself, it's still an
|
||||
interesting alternative to study.
|
||||
L<Transmission|https://www.transmissionbt.com/> uses JSON-RPC
|
||||
(L<spec|https://trac.transmissionbt.com/browser/trunk/extras/rpc-spec.txt>) as
|
||||
its primary IPC mechanism, and L<RTorrent|http://rakshasa.github.io/rtorrent/>
|
||||
has support for an optional XML-RPC interface. (Why do I keep referencing
|
||||
torrent clients? Surely there are other interesting applications? Oh well.)
|
||||
|
||||
The main selling point of HTTP-based IPC is that it is accessible from
|
||||
browser-based applications, assuming everything has been setup correctly. This
|
||||
is a nice advantage, but lack of this support is not really a deal-breaker for
|
||||
me. Browser-based applications can still use any other IPC mechanism, as long
|
||||
as there are browser plugins or some form of proxy server that converts the
|
||||
messages of the IPC mechanism to something that is usable over HTTP. For
|
||||
example, both solutions exist for D-Bus, in the form of the L<Browser DBus
|
||||
Bridge|http://sandbox.movial.com/wiki/index.php/Browser_DBus_Bridge> and
|
||||
L<cloudeebus|https://github.com/01org/cloudeebus>. Of course, such solutions
|
||||
typically aren't as convenient as native HTTP support.
|
||||
|
||||
Since HTTP is, by design, purely request-response, JSON-RPC and XML-RPC don't
|
||||
generally support asynchronous notifications. It's possible to still get
|
||||
asynchronous notifications by using
|
||||
L<WebSockets|https://en.wikipedia.org/wiki/WebSocket> (Ugh, opaque stream
|
||||
sockets, time to go back to our L<custom protocol|/Custom Protocol>) or by
|
||||
having the client implement a HTTP server itself and send its URL to the
|
||||
service provider (This is known as a
|
||||
L<callback|https://duckduckgo.com/?q=web%20service%20callback> in the
|
||||
L<SOAP|https://en.wikipedia.org/wiki/SOAP> world. I have a lot of respect for
|
||||
developers who can put up with that crap). As I already hinted, neither
|
||||
solution is simple or easy.
|
||||
|
||||
Let's move on to the usual grading...
|
||||
|
||||
=over
|
||||
|
||||
=item B<Easy?> Sure.
|
||||
|
||||
The ubiquity of HTTP, JSON and XML on the internet means that most developers
|
||||
are already familiar with using it. And even if you aren't, there are so many
|
||||
easy-to-use and well-documented libraries available that you're ready to go in
|
||||
a matter of minutes.
|
||||
|
||||
Although interface description languages/formats exist for XML-RPC (and
|
||||
possibly for JSON-RPC, too), I get the impression these are not often used
|
||||
outside of the SOAP world. As a result, interacting with such a service tends
|
||||
be weakly/stringly typed, which, I imagine, is not as convenient in strongly
|
||||
typed programming languages.
|
||||
|
||||
=item B<Simple?> Not really.
|
||||
|
||||
Many people have the impression that HTTP is somehow a simple protocol. Sure,
|
||||
it may look simple on the wire, but in reality it is a hugely bloated and
|
||||
complex protocol. I strongly encourage everyone to read through L<RFC
|
||||
2616|https://tools.ietf.org/html/rfc2616> at least once to get an idea of its
|
||||
size and complexity. To make things worse, there's a lot of recent activity to
|
||||
standardize on a next generation HTTP
|
||||
(L<SPDY|https://en.wikipedia.org/wiki/SPDY> and L<HTTP
|
||||
2.0|https://en.wikipedia.org/wiki/HTTP_2.0>), but I suppose we can ignore these
|
||||
developments for the foreseeable future for the use case of IPC.
|
||||
|
||||
Of course, a lot of the functionality specified for HTTP is optional and can be
|
||||
ignored for the purpose of IPC, but that doesn't mean that these options don't
|
||||
exist. When implementing a client, it would be useful to know exactly which
|
||||
HTTP options the server supports. It would be wasteful to implement compression
|
||||
support if the server doesn't support it, or keep-alive, or content
|
||||
negotiation, or ranged requests, or authentication, or correct handling for all
|
||||
response codes when the server will only ever send 'OK'. What also commonly
|
||||
happens is that server implementors want to support as much as possible, to the
|
||||
point that you can have JSON or XML output, depending on what the client
|
||||
requested.
|
||||
|
||||
XML faces a similar problem. The format looks simple, but the specification has
|
||||
a bunch of features that hardly anyone uses. In contrast to HTTP, however, a
|
||||
correct XML parser can't just decide to not parse C<< <!DOCTYPE ..> >> stuff,
|
||||
so it I<has> to implement some of this complexity.
|
||||
|
||||
On the upside, JSON is a really simple serialization format, and if you're
|
||||
careful enough to only implement the functionality necessary for basic HTTP, a
|
||||
JSON-RPC implementation I<can> be somewhat simple.
|
||||
|
||||
=item B<Small?> Not really.
|
||||
|
||||
What typically happens is that implementors take an existing HTTP library and
|
||||
build on top of that. A generic HTTP library likely implements a lot more than
|
||||
necessary for IPC, so that's not going to be very small. RTorrent, for example,
|
||||
makes use of the not-very-small L<xmlrpc-c|http://xmlrpc-c.sourceforge.net/>,
|
||||
which in turn uses L<libcurl|http://curl.haxx.se/> (400 KiB, excluding TLS
|
||||
library) and either the bloated L<libxml2|http://xmlsoft.org/> (1.5 MiB) or
|
||||
L<libexpat|http://www.libexpat.org/> (170 KiB). In any case, expect your
|
||||
programs to grow by a megabyte or more if you go this route.
|
||||
|
||||
Transmission seems rather less bloated. It uses the HTTP library that is built
|
||||
into L<libevent|http://libevent.org/> (totalling ~500 KiB, but libevent is also
|
||||
used for other networking parts), and a simple JSON parser can't be that large
|
||||
either. I'm sure that if you reimplement everything from scratch for the
|
||||
purpose of building an API, you could get something much smaller. Then again,
|
||||
even if you manage to shrink the size of the server that way, you can't expect
|
||||
all your users to do the same.
|
||||
|
||||
If HTTPS is to be supported, add ~500 KiB more. TLS isn't the simplest
|
||||
protocol, either.
|
||||
|
||||
=item B<Language independent?> Yes.
|
||||
|
||||
Almost every language has libraries for web stuff.
|
||||
|
||||
=item B<Networked?> Definitely.
|
||||
|
||||
In fact, I've never seen anyone use XML/JSON RPC over UNIX sockets.
|
||||
|
||||
=item B<Secure?> Alright.
|
||||
|
||||
HTTP has built-in support for authentication, but it also isn't uncommon to use
|
||||
some other mechanism (based on cookies, I guess?).
|
||||
|
||||
Confidentiality can be achieved with HTTPS. There is the problem of verifying
|
||||
the certificate, since I doubt anyone is going to have certificates of their
|
||||
local applications signed by a certificate authority, but there's always the
|
||||
option of trust-on-first use. Custom applications can also include a
|
||||
fingerprint of the server certificate in the URL for verification, but this
|
||||
won't work for web apps.
|
||||
|
||||
=item B<Fast?> No.
|
||||
|
||||
JSON/XML RPC messages add significant overhead to the network and requires more
|
||||
parsing than a simple custom solution or D-Bus. I wouldn't really call it
|
||||
I<fast>, but admittedly, it might still be I<fast enough> for most purposes.
|
||||
|
||||
=item B<Proxy support?> Sure.
|
||||
|
||||
HTTP has native support for proxying, and it's always possible to proxy some
|
||||
URI on the main server to another server, assuming the libraries you use
|
||||
support that. It's not necessarily simple to implement, however.
|
||||
|
||||
=back
|
||||
|
||||
The lack of asynchronous notifications and the overhead and complexity of
|
||||
JSON/XML RPC make me stay away from it, but it certainly is a solution that
|
||||
many client developers will like because of its ease of use.
|
||||
|
||||
|
||||
=head1 Other Systems
|
||||
|
||||
There are a more alternatives out there than I have described so far. Most of
|
||||
those were options I dismissed early on because they're either incomplete
|
||||
solutions or specific to a single framework or language. I'll still mention a
|
||||
few here.
|
||||
|
||||
=head2 Message Queues
|
||||
|
||||
In the context of IPC I see that message queues such as
|
||||
L<RabbitMQ|https://www.rabbitmq.com/> and L<ZeroMQ|http://zeromq.org/> are
|
||||
quite popular. I can't say I have much experience with any of these, but these
|
||||
MQs don't seem to offer a solution to the problem I described in the
|
||||
introduction. My impression of MQs is that they offer a higher-level and more
|
||||
powerful alternative to TCP and UDP. That is, they route messages from one
|
||||
endpoint to another. The contents of the messages are still completely up to
|
||||
the application, so you're still on your own in implementing an RPC mechanism
|
||||
on top of that. And for the purpose of building a simple RPC mechanism, I'm
|
||||
convinced that plain old UNIX sockets or TCP will do just fine.
|
||||
|
||||
=head2 Cap'n Proto
|
||||
|
||||
I probably should be spending a full chapter on L<Cap'n
|
||||
Proto|http://kentonv.github.io/capnproto/> instead of this tiny little section,
|
||||
but I'm simply not familiar enough with it to offer any deep insights. I can
|
||||
still offer my blatantly uninformed impression of it: It looks very promising,
|
||||
but puts, in my opinion, too much emphasis on performance and too little
|
||||
emphasis on ease of use. It lacks introspection and requires that clients have
|
||||
already obtained the schema of the service in order to interact with it. It
|
||||
also uses a capability system to handle authorization, which, despite being
|
||||
elegant and powerful, increases complexity and cognitive load (though I
|
||||
obviously need more experience to quantify this). It still lacks
|
||||
confidentiality for networked access and the number of bindings to other
|
||||
programming languages is limited, but these problems can be addressed.
|
||||
|
||||
Cap'n Proto seems like the ideal IPC mechanism for internal communication
|
||||
within a single (distributed) application and offers a bunch of unique features
|
||||
not found in other RPC systems. But it doesn't feel quite right as an easy API
|
||||
for others to use.
|
||||
|
||||
=head2 CORBA
|
||||
|
||||
CORBA has been used by the GNOME project in the past, and was later abandoned
|
||||
in favour of D-Bus, primarily (I think) because CORBA was deemed too L<complex
|
||||
and incomplete|http://dbus.freedesktop.org/doc/dbus-faq.html#corba>. A system
|
||||
that is deemed more complex than D-Bus is an immediate red flag. The L<long and
|
||||
painful history of CORBA|http://queue.acm.org/detail.cfm?id=1142044> also makes
|
||||
me want to avoid it, if only because that makes it very hard to judge the
|
||||
quality and modernness of existing implementations.
|
||||
|
||||
=head2 Project Tanja
|
||||
|
||||
A bit over two years ago I was researching the same problem, but from a much
|
||||
more generic angle. The result of that was a project that I called Tanja. I
|
||||
described its concepts L<in an earlier
|
||||
article|https://dev.yorhel.nl/doc/commvis>, and wrote an incomplete
|
||||
L<specification|http://g.blicky.net/tanja.git/> along with implementations in
|
||||
L<C|http://g.blicky.net/tanja-c.git/>, L<Go|http://g.blicky.net/tanja-go.git/>
|
||||
and L<Perl|http://g.blicky.net/tanja-perl.git/>. I consider project Tanja a
|
||||
failure, primarily because of its genericity. It supported too many
|
||||
communication models and the lack of a specification as to which model was
|
||||
used, and the lack of any guarantee that this model was actually followed, made
|
||||
Tanja hard to use in practice. It was a very interesting experiment, but not
|
||||
something I would actually use. I learned the hard way that you sometimes have
|
||||
to move some complexity down into a lower abstraction layer in order to keep
|
||||
the complexity in check at higher layers of abstraction.
|
||||
|
||||
|
||||
=head1 Conclusions
|
||||
|
||||
This must be the longest rant I've written so far.
|
||||
|
||||
In any case, there isn't really a perfect IPC mechanism for my use case. A
|
||||
custom protocol involves reimplementing a lot of stuff, D-Bus is a pain, and
|
||||
JSON/XML RPC are bloat.
|
||||
|
||||
I am still undecided on what to do. I have a lot of ideas as to what a perfect
|
||||
IPC solution would look like, both in terms of features and in how to implement
|
||||
it, and I feel like I have enough experience by now to actually develop a
|
||||
proper solution. Unfortunately, writing a complete IPC system with the required
|
||||
utilities and language bindings takes B<a lot> of time and effort. It's not
|
||||
really worth it if I am the only one using it.
|
||||
|
||||
So here is my plea to you, dear reader: If you know of any existing solutions
|
||||
I've missed, please tell me. If you empathize with me and want a better
|
||||
solution to this problem, please get in touch as well! I'd love to hear about
|
||||
projects which face similar problems and have similar requirements.
|
||||
|
||||
571
dat/doc-funcweb
571
dat/doc-funcweb
|
|
@ -1,571 +0,0 @@
|
|||
An Opinionated Survey of Functional Web Development
|
||||
|
||||
=pod
|
||||
|
||||
(Published on B<2017-05-28>.)
|
||||
|
||||
=head1 Intro
|
||||
|
||||
TL;DR: In this article I provide an overview of the frameworks and libraries
|
||||
available for creating websites in statically-typed functional programming
|
||||
languages.
|
||||
|
||||
I recommend you now skip directly to the next section, but if you're interested
|
||||
in some context and don't mind a rant, feel free to read on. :-)
|
||||
|
||||
B<< <Rant mode> >>
|
||||
|
||||
When compared to native desktop application development, web development just
|
||||
sucks. Native development is relatively simple with toolkits such as
|
||||
L<Qt|https://www.qt.io/>, L<GTK+|https://www.gtk.org/> and others: You have
|
||||
convenient widget libraries, and you can describe your entire application, from
|
||||
interface design to all behavioural aspects, in a single programming language.
|
||||
You're also largely free to structure code in whichever way makes most sense.
|
||||
You can describe what a certain input field looks like, what happens when the
|
||||
user interacts with it and what will happen with the input data, all succinctly
|
||||
in a single file. There are even drag-and-drop UI builders to speed up
|
||||
development.
|
||||
|
||||
Web development is the exact opposite of that. There are several different
|
||||
technologies you're forced to work with even when creating the most mundane
|
||||
website, and there's a necessary but annoying split between code that runs on
|
||||
the server and code that runs in the browser. Creating a simple input field
|
||||
requires you to consider and maintain several ends:
|
||||
|
||||
=over
|
||||
|
||||
=item
|
||||
|
||||
The back end (server-side code) that describes how the input field interacts
|
||||
with the database.
|
||||
|
||||
=item
|
||||
|
||||
Some JavaScript code to describe how the user can interact with the input
|
||||
field.
|
||||
|
||||
=item
|
||||
|
||||
Some CSS to describe what the input field looks like.
|
||||
|
||||
=item
|
||||
|
||||
And then there's HTML to act as a glue between the above.
|
||||
|
||||
=back
|
||||
|
||||
In many web development setups, all four of the above technologies are
|
||||
maintained in different files. If you want to add, remove or modify an input
|
||||
field, or just about anything else on a page, you'll be editing at least four
|
||||
different files with different syntax and meaning. I don't know how other
|
||||
developers deal with this, but the only way I've been able to keep these places
|
||||
synchronized is to just edit one or two places, test if it works in a browser,
|
||||
and then edit the other places accordingly to fix whatever issues I find. This
|
||||
doesn't always work well: I don't get a warning if I remove an HTML element
|
||||
somewhere and forget to also remove the associated CSS. Heck, in larger
|
||||
projects I can't even tell whether it's safe to remove or edit a certain line
|
||||
of CSS because I have no way to know for sure that it's not still being used
|
||||
elsewhere. Perhaps this particular case can be solved with proper organization
|
||||
and discipline, but similar problems exist with the other technologies.
|
||||
|
||||
Yet despite that, why do I still create websites in my free time? Because it is
|
||||
the only environment with high portability and low friction - after all, pretty
|
||||
much anyone can browse the web. I would not have been able to create a useful
|
||||
"L<Visual Novel Database|https://vndb.org/>" any other way than through a
|
||||
website. And the entire purpose of L<Manned.org|https://manned.org/> is to
|
||||
provide quick access to man pages from anywhere, which is not easily possible
|
||||
with native applications.
|
||||
|
||||
B<< </Rant mode> >>
|
||||
|
||||
Fortunately, I am not the only one who sees the problems with the "classic"
|
||||
development strategy mentioned above. There are many existing attempts to
|
||||
improve on that situation. A popular approach to simplify development is the
|
||||
L<Single-page
|
||||
application|https://en.wikipedia.org/wiki/Single-page_application> (SPA). The
|
||||
idea is to move as much code as possible to the front end, and keep only a
|
||||
minimal back end. Both the HTML and the entire behaviour of the page can be
|
||||
defined in the same language and same file. With libraries such as
|
||||
L<React|https://facebook.github.io/react/> and browser support for L<Web
|
||||
components|https://developer.mozilla.org/en-US/docs/Web/Web_Components>, the
|
||||
split between files described above can be largely eliminated. And if
|
||||
JavaScript isn't your favorite language, there are many alternative languages
|
||||
that compile to JavaScript. (See L<The JavaScript
|
||||
Minefield|http://walkercoderanger.com/blog/2014/02/javascript-minefield/> for
|
||||
an excellent series of articles on that topic).
|
||||
|
||||
While that approach certainly has the potential to make web development more
|
||||
pleasant, it has a very significant drawback: Performance. For some
|
||||
applications, such as web based email clients or CRM systems, it can be
|
||||
perfectly acceptable to have a megabyte of JavaScript as part of the initial
|
||||
page load. But for most other sites, such as this one, or the two sites I
|
||||
mentioned earlier, or sites like Wikipedia, a slow initial page load is
|
||||
something I consider to be absolutely unacceptable. The web can be really fast,
|
||||
and developer laziness is not a valid excuse to ruin it. (If you haven't seen
|
||||
or read L<The Website Obesity
|
||||
Crisis|http://idlewords.com/talks/website_obesity.htm> yet, please do so now).
|
||||
|
||||
I'm much more interested in the opposite approach to SPA: Move as much code as
|
||||
possible to the back end, and only send a minimal amount of JavaScript to the
|
||||
browser. This is arguably how web development has always been done in the past,
|
||||
and there's little reason to deviate from it. The difference, however, is that
|
||||
people tend to expect much more "interactivity" from web sites nowadays, so the
|
||||
amount of JavaScript is increasing. And that is alright, so long as the
|
||||
JavaScript doesn't prevent the initial page from loading quickly. But this
|
||||
increase in JavaScript does amplify the "multiple files" problem I ranted about
|
||||
earlier.
|
||||
|
||||
So my ideal solution is a framework where I can describe all aspects of a site
|
||||
in a single language, and organize the code among files in a way that makes
|
||||
sense to me. That is, I want the same kind of freedom that I get with native
|
||||
desktop software development. Such a framework should run on the back end, and
|
||||
automatically generate efficient JavaScript and, optionally, CSS for the front
|
||||
end. As an additional requirement (or rather, strong preference), all this
|
||||
should be in a statically-typed language - because I am seemingly incapable of
|
||||
writing large reliable applications with dynamic typing - and in a language
|
||||
from functional heritage - because programming in functional languages has
|
||||
spoiled me.
|
||||
|
||||
I'm confident that what I describe is possible, and it's evident that I'm not
|
||||
the only person to want this, as several (potential) solutions like this do
|
||||
indeed exist. I've been looking around for these solutions and have
|
||||
experimented with a few that looked promising. This article provides an
|
||||
overview of what I have found so far.
|
||||
|
||||
=head1 OCaml
|
||||
|
||||
My adventure began with L<OCaml|https://ocaml.org/>. It's been a few years
|
||||
since I last used OCaml for anything, but development on the language and its
|
||||
ecosystem have all but halted. L<Real World OCaml|https://realworldocaml.org/>
|
||||
has been a great resource to get me up to speed again.
|
||||
|
||||
=head2 Ocsigen
|
||||
|
||||
For OCaml there is one project that has it all: L<Ocsigen|http://ocsigen.org/>.
|
||||
It comes with an OCaml to JavaScript compiler, a web server, several handy
|
||||
libraries, and a L<framework|http://ocsigen.org/eliom/> to put everything
|
||||
together. Its L<syntax
|
||||
extension|http://ocsigen.org/eliom/6.2/manual/ppx-syntax> allows you to mix
|
||||
front and back end code, and you can easily share code between both ends. The
|
||||
final result is a binary that runs the server and a JavaScript file that
|
||||
handles everything on the client side.
|
||||
|
||||
The framework comes with an embedded DSL with which you can conveniently
|
||||
generate HTML without actually typing HTML. And best of all, this DSL works on
|
||||
both the client and the server: On the server side it generates an HTML string
|
||||
that can be sent to the client, and running the same code on the client side
|
||||
will result in a DOM element that is ready to be used.
|
||||
|
||||
Ocsigen makes heavy use of the OCaml type system to statically guarantee the
|
||||
correctness of various aspects of the application. The HTML DSL ensures not
|
||||
only that the generated HTML well-formed, but also prevents you from
|
||||
incorrectly nesting certain elements and using the wrong attributes on the
|
||||
wrong elements. Similarly, an HTML element generated on the server side can be
|
||||
referenced from client side code without having to manually assign a unique ID
|
||||
to the element. This prevents accidental typos in the ID naming and guarantees
|
||||
that the element that the client side code refers to actually exists. URL
|
||||
routing and links to internal pages are also checked at compile time.
|
||||
|
||||
Ocsigen almost exactly matches what I previously described as the perfect
|
||||
development framework. Unfortunately, it has a few drawbacks:
|
||||
|
||||
=over
|
||||
|
||||
=item
|
||||
|
||||
The generated JavaScript is quite large, a bit over 400 KiB for an hello world.
|
||||
In my brief experience with the framework, this also results in a noticeably
|
||||
slower page load. I don't know if it was done for performance purposes, but
|
||||
subsequent page views are per default performed via in-browser XHR requests,
|
||||
which do not require that all the JavaScript is re-parsed and evaluated, and is
|
||||
thus much faster. This, however, doesn't work well if the user opens pages in
|
||||
multiple tabs or performs a page reload for whatever reason. And as I
|
||||
mentioned, I care a lot about the initial page loading time.
|
||||
|
||||
=item
|
||||
|
||||
The framework has a steep learning curve, and the available documentation is by
|
||||
far not complete enough to help you. I've found myself wondering many times how
|
||||
I was supposed to use a certain API and have had to look for example code for
|
||||
enlightenment. At some point I ended up just reading the source code instead of
|
||||
going for the documentation. What doesn't help here is that, because of the
|
||||
heavy use of the type system to ensure code correctness, most of the function
|
||||
signatures are far from intuitive and are sometimes very hard to interpret.
|
||||
This problem is made even worse with the generally unhelpful error messages
|
||||
from the compiler. (A few months with L<Rust|https://www.rust-lang.org/> and
|
||||
its excellent error messages has really spoiled me on this aspect, I suppose).
|
||||
|
||||
=item
|
||||
|
||||
I believe they went a bit too far with the compile-time verification of certain
|
||||
correctness properties. Apart from making the framework harder to learn, it
|
||||
also increases the verbosity of the code and removes a lot of flexibility. For
|
||||
instance, in order for internal links to be checked, you have to declare your
|
||||
URLs (or I<services>, as they call it) somewhere central such that the view
|
||||
part of your application can access it. Then elsewhere you have to register a
|
||||
handler to that service. This adds boilerplate and enforces a certain code
|
||||
structure. And the gain of all this is, in my opinion, pretty small: In the 15
|
||||
years that I have been building web sites, I don't remember a single occurrence
|
||||
where I mistyped the URL in an internal link. I do suppose that this feature
|
||||
makes it easy to change URLs without causing breakage, but there is a trivial
|
||||
counter-argument to that: L<Cool URIs don't
|
||||
change|https://www.w3.org/Provider/Style/URI.html>. (Also, somewhat ironically,
|
||||
I have found more dead internal links on the Ocsigen website than on any other
|
||||
site I have visited in the past year, so perhaps this was indeed a problem they
|
||||
considered worth fixing. Too bad it didn't seem to work out so well for them).
|
||||
|
||||
=back
|
||||
|
||||
Despite these drawbacks, I am really impressed with what the Ocsigen project
|
||||
has achieved, and it has set a high bar for the future frameworks that I will
|
||||
be considering.
|
||||
|
||||
|
||||
=head1 Haskell
|
||||
|
||||
I have always seen Haskell as that potentially awesome language that I just
|
||||
can't seem to wrap my head around, despite several attempts in the past to
|
||||
learn it. Apparently the only thing I was missing in those attempts was a
|
||||
proper goal: When I finally started playing around with some web frameworks I
|
||||
actually managed to get productive in Haskell with relative ease. What also
|
||||
helped me this time was a practical introductory Haskell reference, L<What I
|
||||
Wish I Knew When Learning Haskell|http://dev.stephendiehl.com/hask/>, in
|
||||
addition to the more theoretical L<Learn You A Haskell for Great
|
||||
Good|http://learnyouahaskell.com/>.
|
||||
|
||||
Haskell itself already has a few advantages when compared to OCaml: For one, it
|
||||
has a larger ecosystem, so for any task you can think of there is probably
|
||||
already at least one existing library. As an example, I was unable to find an
|
||||
actively maintained SQL DSL for OCaml, while there are several available for
|
||||
Haskell. Another advantage that I found were the much more friendly and
|
||||
detailed error messages generated by the Haskell compiler, GHC. In terms of
|
||||
build systems, Haskell has standardized on
|
||||
L<Cabal|https://www.haskell.org/cabal/>, which works alright most of the time.
|
||||
Packaging is still often complex and messy, but it's certainly improving as
|
||||
L<Stack|http://haskellstack.org/> is gaining more widespread adoption. Finally,
|
||||
I feel that the Haskell syntax is slightly less verbose, and more easily lends
|
||||
itself to convenient DSLs.
|
||||
|
||||
Despite Haskell's larger web development community, I could not find a single
|
||||
complete and integrated client/server development framework such as Ocsigen.
|
||||
Instead, there are a whole bunch of different projects focussing on either the
|
||||
back end or the front end. I'll explore some of them with the idea that,
|
||||
perhaps, it's possible to mix and match different libraries and frameworks in
|
||||
order to get the perfect development environment. And indeed, this seems to be
|
||||
a common approach in many Haskell projects.
|
||||
|
||||
=head2 Server-side
|
||||
|
||||
Let's start with a few back end frameworks.
|
||||
|
||||
=over
|
||||
|
||||
=item Scotty
|
||||
|
||||
L<Scotty|https://github.com/scotty-web/scotty> is a web framework inspired by
|
||||
L<Sinatra|http://www.sinatrarb.com/>. I have no experience with (web)
|
||||
development in Ruby and have never used Sinatra, but it has some similarities
|
||||
to what I have been using for a long time: L<TUWF|https://dev.yorhel.nl/tuwf>.
|
||||
|
||||
Scotty is a very minimalist framework; It does routing (that is, mapping URLs
|
||||
to Haskell functions), it has some functions to access request data and some
|
||||
functions to create and modify a response. That's it. No database handling,
|
||||
session management, HTML generation, form handling or other niceties. But
|
||||
that's alright, because there are many generic libraries to help you out there.
|
||||
|
||||
Thanks to its minimalism, I found Scotty to be very easy to learn and get used
|
||||
to. Even as a Haskell newbie I had a simple website running within a day. The
|
||||
documentation is appropriate, but the idiomatic way of combining Scotty with
|
||||
other libraries is through the use of Monad Transformers, and a few more
|
||||
examples in this area would certainly have helped.
|
||||
|
||||
=item Spock
|
||||
|
||||
Continuing with the Star Trek franchise, there's
|
||||
L<Spock|https://www.spock.li/>. Spock is very similar to Scotty, but comes with
|
||||
type-safe routing and various other goodies such as session and state
|
||||
management, L<CSRF|https://en.wikipedia.org/wiki/Cross-site_request_forgery>
|
||||
protection and database helpers.
|
||||
|
||||
As with everything that is (supposedly) more convenient, it also comes with a
|
||||
slightly steeper learning curve. I haven't, for example, figured out yet how to
|
||||
do regular expression based routing. I don't even know if that's still possible
|
||||
in the latest version - the documentation isn't very clear. Likewise, it's
|
||||
unclear to me what the session handling does exactly (Does it store something?
|
||||
And where? Is there a timeout?) and how that interacts with CSRF protection.
|
||||
Spock seems useful, but requires more than just a cursory glance.
|
||||
|
||||
=item Servant
|
||||
|
||||
L<Servant|http://haskell-servant.github.io/> is another minimalist web
|
||||
framework, although it is primarily designed for creating RESTful APIs.
|
||||
|
||||
Servant distinguishes itself from Scotty and Spock by not only featuring
|
||||
type-safe routing, it furthermore allows you to describe your complete public
|
||||
API as a type, and get strongly typed responses for free. This also enables
|
||||
support for automatically generated documentation and client-side API wrappers.
|
||||
|
||||
Servant would be an excellent back end for a SPA, but it does not seem like an
|
||||
obvious approach to building regular websites.
|
||||
|
||||
=item Happstack / Snap / Yesod
|
||||
|
||||
L<Happstack|http://www.happstack.com/>, L<Yesod|http://www.yesodweb.com/> and
|
||||
L<Snap|http://snapframework.com/> are three large frameworks with many
|
||||
auxiliary libraries. They all come with a core web server, routing, state and
|
||||
database management. Many of the libraries are not specific to the framework
|
||||
and can be used together with other frameworks. I won't go into a detailed
|
||||
comparison between the three projects because I have no personal experience
|
||||
with any of them, and fortunately L<someone else already wrote a
|
||||
comparison|http://softwaresimply.blogspot.nl/2012/04/hopefully-fair-and-useful-comparison-of.html>
|
||||
in 2012 - though I don't know how accurate that still is today.
|
||||
|
||||
=back
|
||||
|
||||
So there are a fair amount of frameworks to choose from, and they can all work
|
||||
together with other libraries to implement additional functions. Apart from the
|
||||
framework, another important aspect of web development is how you generate the
|
||||
HTML to send to the client. In true Haskell style, there are several answers.
|
||||
|
||||
For those who prefer embedded DSLs, there are
|
||||
L<xhtml|http://hackage.haskell.org/package/xhtml>,
|
||||
L<BlazeHTML|https://jaspervdj.be/blaze/> and
|
||||
L<Lucid|https://github.com/chrisdone/lucid>. The xhtml package is not being
|
||||
used much nowadays and has been superseded by BlazeHTML, which is both faster
|
||||
and offers a more readable DSL using Haskell's do-notation. Lucid is heavily
|
||||
inspired by Blaze, and attempts to L<fix several of its
|
||||
shortcomings|http://chrisdone.com/posts/lucid>. Having used Lucid a bit myself,
|
||||
I can attest that it is easy to get started with and pretty convenient in use.
|
||||
|
||||
I definitely prefer to generate HTML using DSLs as that keeps the entire
|
||||
application in a single host language and with consistent syntax, but the
|
||||
alternative approach, templating, is also fully supported in Haskell. The Snap
|
||||
framework comes with L<Heist|https://github.com/snapframework/heist>, which are
|
||||
run-time interpreted templates, like similar systems in most other languages.
|
||||
Yesod comes with L<Shakespeare|http://hackage.haskell.org/package/shakespeare>,
|
||||
which is a type-safe templating system with support for inlining the templates
|
||||
in Haskell code. Interestingly, Shakespeare also has explicit support for
|
||||
templating JavaScript code. Too bad that this doesn't take away the need to
|
||||
write the JavaScript yourself, so I don't see how this is an improvement over
|
||||
some other JavaScript solution that uses JSON for communication with the back
|
||||
end.
|
||||
|
||||
=head2 Client-side
|
||||
|
||||
It is rather unusual to have multiple compiler implementations targeting
|
||||
JavaScript for the same source language, but Haskell has three of them. All
|
||||
three can be used to write front end code without touching a single line of
|
||||
JavaScript, but there are large philosophical differences between the three
|
||||
projects.
|
||||
|
||||
=over
|
||||
|
||||
=item Fay
|
||||
|
||||
L<Fay|https://github.com/faylang/fay/wiki> compiles Haskell code directly to
|
||||
JavaScript. The main advantage of Fay is that it does not come with a large
|
||||
runtime, resulting small and efficient JavaScript. The main downside is that it
|
||||
only L<supports a subset of
|
||||
Haskell|https://github.com/faylang/fay/wiki/What-is-not-supported?>. The
|
||||
result is a development environment that is very browser-friendly, but where
|
||||
you can't share much code between the front and back ends. You're basically
|
||||
back to the separated front and back end situation in classic web development,
|
||||
but at least you can use the same language for both - somewhat.
|
||||
|
||||
Fay itself doesn't come with many convenient UI libraries, but
|
||||
L<Cinder|http://crooney.github.io/cinder/index.html> covers that with a
|
||||
convenient HTML DSL and DOM manipulation library.
|
||||
|
||||
Fay is still seeing sporadic development activity, but there is not much of a
|
||||
lively community around it. Most people have moved on to other solutions.
|
||||
|
||||
=item GHCJS
|
||||
|
||||
L<GHCJS|https://github.com/ghcjs/ghcjs> uses GHC itself to compile Haskell to a
|
||||
low-level intermediate language, and then compiles that language to JavaScript.
|
||||
This allows GHCJS to achieve excellent compatibility with native Haskell code,
|
||||
but comes, quite predictably, at the high cost of duplicating a large part of
|
||||
the Haskell runtime into the JavaScript output. The generated JavaScript code
|
||||
is typically measured in megabytes rather than kilobytes, which is (in my
|
||||
opinion) far too large for regular web sites. The upside of this high
|
||||
compatibility, of course, is that you can re-use a lot of code between the
|
||||
front and back ends, which will certainly make web development more tolerable.
|
||||
|
||||
The community around GHCJS seems to be more active than that of Fay. GHCJS
|
||||
integrates properly with the Stack package manager, and there are a L<whole
|
||||
bunch|http://hackage.haskell.org/packages/search?terms=ghcjs> of libraries
|
||||
available.
|
||||
|
||||
=item Haste
|
||||
|
||||
L<Haste|https://github.com/valderman/haste-compiler> provides a middle ground
|
||||
between Fay and GHCJS. Like GHCJS, Haste is based on GHC, but it instead of
|
||||
using low-level compiler output, Haste uses a higher-level intermediate
|
||||
language. This results in good compatibility with regular Haskell code while
|
||||
keeping the output size in check. Haste has a JavaScript runtime of around 60
|
||||
KiB and the compiled code is roughly as space-efficient as Fay.
|
||||
|
||||
While it should be possible to share a fair amount of code between the front
|
||||
and back ends, not all libraries work well with Haste. I tried to use Lucid
|
||||
within a Haste application, for example, but that did not work. Apparently one
|
||||
of its dependencies (probably the UTF-8 codec, as far as I could debug the
|
||||
problem) performs some low-level performance optimizations that are
|
||||
incompatible with Haste.
|
||||
|
||||
Haste itself is still being sporadically developed, but not active enough to be
|
||||
called alive. The compiler lags behind on the GHC version, and the upcoming 0.6
|
||||
version has stayed unreleased and in limbo state for at least 4 months on the
|
||||
git repository. The community around Haste is in a similar state. Various
|
||||
libraries do exist, such as L<Shade|https://github.com/takeoutweight/shade>
|
||||
(HTML DSL, Reactive UI), L<Perch|https://github.com/agocorona/haste-perch>
|
||||
(another HTML DSL), L<haste-markup|https://github.com/ajnsit/haste-markup> (yet
|
||||
another HTML DSL) and
|
||||
L<haste-dome|https://github.com/wilfriedvanasten/haste-dome> (I<yet> another
|
||||
HTML DSL), but they're all pretty much dead.
|
||||
|
||||
=back
|
||||
|
||||
Despite having three options available, only Haste provides enough benefit of
|
||||
code reuse while remaining efficient enough for the kind of site that I
|
||||
envision. Haste really deserves more love than it is currently getting.
|
||||
|
||||
=head2 More Haskell
|
||||
|
||||
In my quest for Haskell web development frameworks and tools, I came across a
|
||||
few other interesting libraries. One of them is
|
||||
L<Clay|http://fvisser.nl/clay/>, a CSS preprocessor as a DSL. This will by
|
||||
itself not solve the CSS synchronisation problem that I mentioned at the start
|
||||
of this article, but it could still be used to keep the CSS closer to code
|
||||
implementing the rest of the site.
|
||||
|
||||
It also would not do to write an article on Haskell web development and not
|
||||
mention a set of related projects: L<MFlow|https://github.com/agocorona/MFlow>,
|
||||
L<HPlayground|https://github.com/agocorona/hplayground> and the more recent
|
||||
L<Axiom|https://github.com/transient-haskell/axiom>. These are ambitious
|
||||
efforts at building a very high-level and functional framework for both front
|
||||
and back end web development. I haven't spend nearly enough time on these
|
||||
projects to fully understand their scope, but I'm afraid of these being a bit
|
||||
too high level. This invariably results in reduced flexibility (i.e. too many
|
||||
opinions being hard-coded in the API) and less efficient JavaScript output.
|
||||
Axiom being based on GHCJS reinforces the latter concern.
|
||||
|
||||
|
||||
=head1 Other languages
|
||||
|
||||
I've covered OCaml and Haskell now, but there are relevant projects in other
|
||||
languages, too:
|
||||
|
||||
=over
|
||||
|
||||
=item PureScript
|
||||
|
||||
L<PureScript|http://www.purescript.org/> is the spiritual successor of Fay -
|
||||
except it does not try to be compatible with Haskell, and in fact
|
||||
L<intentionally deviates from
|
||||
Haskell|https://github.com/purescript/documentation/blob/master/language/Differences-from-Haskell.md>
|
||||
at several points. Like Fay, and perhaps even more so, PureScript compiles down
|
||||
to efficient and small JavaScript.
|
||||
|
||||
Being a not-quite-Haskell language, sharing code between a PureScript front end
|
||||
and a Haskell back end is not possible, the differences are simply too large.
|
||||
It is, however, possible to go into the other direction: PureScript could also
|
||||
run on the back end in a NodeJS environment. I don't really know how well this
|
||||
is supported by the language ecosystem, but I'm not sure I'm comfortable with
|
||||
replacing the excellent quality of Haskell back end frameworks with a fragile
|
||||
NodeJS back end (or such is my perception, I admittedly don't have too much
|
||||
faith in most JavaScript-heavy projects).
|
||||
|
||||
The PureScript community is very active and many libraries are available in the
|
||||
L<Persuit|https://pursuit.purescript.org/> package repository. Of note is
|
||||
L<Halogen|https://pursuit.purescript.org/packages/purescript-halogen>, a
|
||||
high-level reactive UI library. One thing to be aware of is that not all
|
||||
libraries are written with space efficiency as their highest priority, the
|
||||
simple L<Halogen
|
||||
button|https://github.com/slamdata/purescript-halogen/tree/v2.0.1/examples/basic>
|
||||
example already compiles down to a hefty 300 KB for me.
|
||||
|
||||
=item Elm
|
||||
|
||||
L<Elm|http://elm-lang.org/> is similar to PureScript, but rather than trying to
|
||||
be a generic something-to-JavaScript compiler, Elm focuses exclusively on
|
||||
providing a good environment to create web UIs. The reactive UI libraries are
|
||||
well maintained and part of the core Elm project. Elm has a strong focus on
|
||||
being easy to learn and comes with good documentation and many examples to get
|
||||
started with.
|
||||
|
||||
=item Ur/Web
|
||||
|
||||
L<Ur/Web|http://www.impredicative.com/ur/> is an ML and Haskell inspired
|
||||
programming language specifically designed for client/server programming. Based
|
||||
on its description, Ur/Web is exactly the kind of thing I'm looking for: It
|
||||
uses a single language for the front and back ends and provides convenient
|
||||
methods for communication between the two.
|
||||
|
||||
This has been a low priority on my to-try list because it seems to be primarily
|
||||
a one-man effort, and the ecosystem around it is pretty small. Using Ur/Web for
|
||||
practical applications will likely involve writing your own libraries or
|
||||
wrappers for many common tasks, such as for image manipulation or advanced text
|
||||
processing. Nonetheless, I definitely should be giving this a try sometime.
|
||||
|
||||
(Besides, who still uses frames in this day and age? :-)
|
||||
|
||||
=item Opa
|
||||
|
||||
I'll be moving out of the functional programming world for a bit.
|
||||
|
||||
L<Opa|http://opalang.org/> is another language and environment designed for
|
||||
client/server programming. Opa takes a similar approach to "everything in
|
||||
PureScript": Just compile everything to JavaScript and run the server-side code
|
||||
on NodeJS. The main difference with other to-JavaScript compilers is that Opa
|
||||
supports mixing back end code with front end code, and it can automatically
|
||||
figure out where the code should be run and how the back and front ends
|
||||
communicate with each other.
|
||||
|
||||
Opa, as a language, is reminiscent of a statically-typed JavaScript with
|
||||
various syntax extensions. While it does support SQL databases, its database
|
||||
API seems to strongly favor object-oriented use rather than relational database
|
||||
access.
|
||||
|
||||
=item GWT
|
||||
|
||||
Previously I compared web development to native GUI application development.
|
||||
There is no reason why you can't directly apply native development structure
|
||||
and strategies onto the web, and that's exactly what
|
||||
L<GWT|http://www.gwtproject.org/> does. It provides a widget-based programming
|
||||
environment that eventually runs on the server and compiles the client-side
|
||||
part to JavaScript. I haven't really considered it further, as Java is not a
|
||||
language I can be very productive in.
|
||||
|
||||
=item Webtoolkit
|
||||
|
||||
In the same vein, there's L<Wt|https://www.webtoolkit.eu/wt>. The name might
|
||||
suggest that it is a web-based clone of Qt, and indeed that's what it looks
|
||||
like. Wt is written in C++, but there are wrappers for L<other
|
||||
languages|https://www.webtoolkit.eu/wt/other_language>. None of the languages
|
||||
really interest me much, however.
|
||||
|
||||
That said, if I had to write a web UI for a resource-constrained device, this
|
||||
seems like an excellent project to consider.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head1 To conclude
|
||||
|
||||
To be honest, I am a bit overwhelmed at the number of options. On the one hand,
|
||||
it makes me very happy to see that a lot is happening in this world, and that
|
||||
alternatives to boring web frameworks do exist. Yet after all this research I
|
||||
still have no clue what I should use to develop my next website. I do like the
|
||||
mix and match culture of Haskell, which has the potential to form a development
|
||||
environment entirely to my own taste and with my own chosen trade-offs. On the
|
||||
other hand, the client-side Haskell solutions are simply too immature and
|
||||
integration with the back end frameworks is almost nonexistent.
|
||||
|
||||
Almost none of the frameworks I discussed attempt to tackle the CSS problem
|
||||
that I mentioned in the introduction, so there is clearly room for more
|
||||
research in this area.
|
||||
|
||||
There are a few technologies that I should spend more time on to familiarize
|
||||
myself with. Ur/Web is an obvious candidate here, but perhaps it is possible to
|
||||
create a Haskell interface to Wt. Or maybe some enhancements to the Haste
|
||||
ecosystem could be enough to make that a workable solution instead.
|
||||
45
dat/doc.md
Normal file
45
dat/doc.md
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
% Writing
|
||||
|
||||
I don't often write stuff. Certainly not enough to warrant a blog. But
|
||||
sometimes I do feel the need to write down my thoughts. The results of those
|
||||
rare occasions are published on this page.
|
||||
|
||||
## Articles That May As Well Be Considered Blog Posts
|
||||
|
||||
`2017-05-28` - [An Opinionated Survey of Functional Web Development](/doc/funcweb)
|
||||
: The title says it all.
|
||||
|
||||
`2014-07-29` - [The Sorry State of Convenient IPC](/doc/easyipc)
|
||||
: A long rant about IPC systems.
|
||||
|
||||
`2014-01-09` - [Some Measurements on Direct Connect File Lists](/doc/dcstats)
|
||||
: A short measurement study on the file lists obtained from a Direct Connect
|
||||
hub. Lots of graphs!
|
||||
|
||||
`2012-02-15` - [A Distributed Communication System for Modular Applications](/doc/commvis)
|
||||
: In this article I explain a vision of mine, and the results of a small
|
||||
research project aimed at realizing that vision.
|
||||
|
||||
`2011-11-26` - [Multi-threaded Access to an SQLite3 Database](/doc/sqlaccess)
|
||||
: So you have a single database and some threads. How do you combine these in
|
||||
a program?
|
||||
|
||||
## Longer Reports
|
||||
|
||||
`2014-06-10` - [Biased Random Periodic Switching in Direct Connect](/download/doc/brpsdc.pdf) (PDF)
|
||||
: My masters thesis.
|
||||
|
||||
`2013-04-05` - [Peer Selection in Direct Connect](/download/doc/psdc.pdf) (PDF)
|
||||
: The rather long-ish literature study that precluded my masters thesis.
|
||||
|
||||
`2010-06-02` - [Design and implementation of a compressed linked list library](https://dev.yorhel.nl/download/doc/compll.pdf) (PDF)
|
||||
: The report for the final project of my professional (HBO) bachelor of
|
||||
Electrical Engineering. I was very liberal with some terminology in this
|
||||
report. For example, "linked lists" aren't what you think they are, and I
|
||||
didn't even use the term "locality of reference" where I really should
|
||||
have. It was also written for an audience with little knowledge on the
|
||||
subject, so I elaborated on a lot of things that should be obvious for most
|
||||
people in the field. Then there is a lot of uninteresting overhead about
|
||||
the project itself, which just happened to be mandatory for this report.
|
||||
Nonetheless, if you can ignore these faults it's not such a bad read, if I
|
||||
may say so myself. :-)
|
||||
|
|
@ -1,11 +1,8 @@
|
|||
A Distributed Communication System for Modular Applications
|
||||
% A Distributed Communication System for Modular Applications
|
||||
|
||||
=pod
|
||||
(Published on **2012-02-15**)
|
||||
|
||||
(Published on B<2012-02-15>. Also available in L<POD|https://dev.yorhel.nl/dat/doc-commvis>.)
|
||||
|
||||
|
||||
=head1 Introduction
|
||||
# Introduction
|
||||
|
||||
I have a vision. A vision in which rigid point-to-point IPC is replaced with a
|
||||
far more flexible and distributed communication system. A vision in which
|
||||
|
|
@ -24,21 +21,19 @@ believe to have found an answer. In this article I'll try to explain my ideas
|
|||
and how they may be used to realize this vision.
|
||||
|
||||
My ideas have been heavily inspired by
|
||||
L<Linda|http://en.wikipedia.org/wiki/Linda_(coordination_language)>. If you're
|
||||
already familiar with that, then what I present here probably won't be very
|
||||
revolutionary. Still, there are several aspects in which my ideas differ
|
||||
[Linda](https://en.wikipedia.org/wiki/Linda_\(coordination_language\)). If
|
||||
you're already familiar with that, then what I present here probably won't be
|
||||
very revolutionary. Still, there are several aspects in which my ideas differ
|
||||
significantly from Linda, so you won't be bored reading this. :-)
|
||||
|
||||
|
||||
|
||||
=head1 The Concept
|
||||
# The Concept
|
||||
|
||||
In this section I'll try to introduce the overall concept and some terminology.
|
||||
This is going to be somewhat abstract and technical, but please bear with me.
|
||||
I promise that things will get more interesting in the later sections.
|
||||
|
||||
Let me first define an abstract communications framework. We have a B<network>
|
||||
and a bunch of B<sessions> connected to that network. Sessions can communicate
|
||||
Let me first define an abstract communications framework. We have a **network**
|
||||
and a bunch of **sessions** connected to that network. Sessions can communicate
|
||||
with each other through this network (that's usually what a network is for,
|
||||
after all). These sessions do not have to be static: they may come and go.
|
||||
Keep in mind that, for the purpose of explaining this concept, these terms are
|
||||
|
|
@ -49,15 +44,15 @@ sockets, pipes, a HTTP server, a broadcast network or just shared memory
|
|||
between threads. If it allows sessions to communicate I'll call it a network.
|
||||
|
||||
Unlike many communication systems, this network does not have the concept of
|
||||
I<addresses>. There is no direct way for one session to identify another, and
|
||||
_addresses_. There is no direct way for one session to identify another, and
|
||||
indeed there is no need to do so for the purposes of communication. Instead,
|
||||
the primary means of communication is by using B<tuples> and patterns.
|
||||
the primary means of communication is by using **tuples** and patterns.
|
||||
|
||||
A tuple is an ordered set (list, array, whatever terminology you prefer) of
|
||||
zero or more elements. Each element may have a different type, so it can hold
|
||||
booleans, integers, floating point numbers, strings and even more complex data
|
||||
structures as arrays or maps. You may think of a tuple as an array in
|
||||
L<JSON|http://json.org/> notation, if that makes things easier to understand.
|
||||
[JSON](https://json.org/) notation, if that makes things easier to understand.
|
||||
|
||||
Sessions send and receive tuples to communicate with each other. On the sending
|
||||
side, a session simply "passes" a tuple to the network. This is a non-blocking,
|
||||
|
|
@ -66,12 +61,12 @@ action, because the sender can not know whether it will be received by any
|
|||
other session anyway. The tuple may be received by many other sessions, or
|
||||
there may not even be a single session interested in the tuple at all.
|
||||
|
||||
On the receiving side, sessions B<register> patterns. A pattern itself is
|
||||
On the receiving side, sessions **register** patterns. A pattern itself is
|
||||
mostly just a tuple, but with a more limited set of allowed types: only those
|
||||
types for which exact matching makes sense, like booleans, integers and
|
||||
strings. A pattern matches an incoming tuple if the first C<n> elements of the
|
||||
strings. A pattern matches an incoming tuple if the first `n` elements of the
|
||||
tuple exactly match the corresponding elements of the pattern. A special
|
||||
I<wildcard> element may be used to match any value of any type.
|
||||
_wildcard_ element may be used to match any value of any type.
|
||||
|
||||
A sessions thus only receives tuples from other sessions if they have
|
||||
registered a pattern for them. As mentioned, it is not illegal to send a tuple
|
||||
|
|
@ -83,7 +78,7 @@ own patterns, then it will receive its own tuple. (However, programming
|
|||
interfaces might allow this to be detected and/or disabled if this eases the
|
||||
implementation of a session).
|
||||
|
||||
Finally, there is the concept of a B<return-path>. Upon sending out a tuple, a
|
||||
Finally, there is the concept of a **return-path**. Upon sending out a tuple, a
|
||||
session may indicate that it is interested in receiving replies. The network
|
||||
is then responsible for providing a return-path: a way for receivers of the
|
||||
tuple to reply to it. When a tuple is received, the session has the option to
|
||||
|
|
@ -99,18 +94,15 @@ received by multiple sessions, then the replies will be interleaved over the
|
|||
return-path, and the path is closed when all of the receiving sessions have
|
||||
closed their end.
|
||||
|
||||
|
||||
|
||||
=head1 Common design patterns and solutions
|
||||
# Common design patterns and solutions
|
||||
|
||||
The previous section was rather abstract. This section provides several
|
||||
examples on how to do common tasks and design patterns by using the previously
|
||||
described concepts.
|
||||
|
||||
## Broadcast notifications
|
||||
|
||||
=head2 Broadcast notifications
|
||||
|
||||
This is commonly implemented in OOP systems using the I<Observer pattern>.
|
||||
This is commonly implemented in OOP systems using the _Observer pattern_.
|
||||
Implementing the same using tuples and patterns is an order of magnitude more
|
||||
simple, as broadcast notifications are pretty much the native means of
|
||||
communication.
|
||||
|
|
@ -125,56 +117,43 @@ that follows a certain pattern, then you very easily achieve the same
|
|||
functionality as with an OOP implementation. In fact, there are some advantages
|
||||
to doing it this way:
|
||||
|
||||
=over
|
||||
|
||||
=item *
|
||||
|
||||
Sessions stay registered to the same notifications even if the "object" (the
|
||||
session that is being observed) is restarted or replaced with something else.
|
||||
It's the network itself that keeps track of the registrations, not the sessions
|
||||
that provide the notifications. Of course, this can be seen as a drawback, but
|
||||
you can easily emulate OOP behaviour by providing an extra notification when
|
||||
the "object" is shut down, indicating that the observing sessions can remove
|
||||
their patterns.
|
||||
|
||||
=item *
|
||||
|
||||
Since there is no need for the session that is being observed to keep a list of
|
||||
sessions that are observing it, it also doesn't have walk the list and send out
|
||||
multiple notifications. Notifying the observers is as simple as sending out a
|
||||
single tuple.
|
||||
|
||||
=item *
|
||||
|
||||
Many implementations of the Observer pattern maintain only a single list of
|
||||
observers per object, and each listed observer will be notified for every
|
||||
change to the object. For example, if an object maintains a list and provides
|
||||
notifications when something is added and deleted to the list, every observer
|
||||
will be notified of both the "added" action and the "deleted" action. The use
|
||||
of tuples and patterns allows observers to register for all actions, or just
|
||||
for a single one. If an "add" action would be notified with a tuple of
|
||||
C<["object", "add", id]> and a "delete" action with
|
||||
C<["object", "delete", id]>, then an observing session can register with the
|
||||
pattern C<["object", *]> to be notified for both actions, or just
|
||||
C<["object", "add"]> to register only for additions.
|
||||
|
||||
=back
|
||||
- Sessions stay registered to the same notifications even if the "object" (the
|
||||
session that is being observed) is restarted or replaced with something else.
|
||||
It's the network itself that keeps track of the registrations, not the
|
||||
sessions that provide the notifications. Of course, this can be seen as a
|
||||
drawback, but you can easily emulate OOP behaviour by providing an extra
|
||||
notification when the "object" is shut down, indicating that the observing
|
||||
sessions can remove their patterns.
|
||||
- Since there is no need for the session that is being observed to keep a list
|
||||
of sessions that are observing it, it also doesn't have walk the list and
|
||||
send out multiple notifications. Notifying the observers is as simple as
|
||||
sending out a single tuple.
|
||||
- Many implementations of the Observer pattern maintain only a single list of
|
||||
observers per object, and each listed observer will be notified for every
|
||||
change to the object. For example, if an object maintains a list and provides
|
||||
notifications when something is added and deleted to the list, every observer
|
||||
will be notified of both the "added" action and the "deleted" action. The use
|
||||
of tuples and patterns allows observers to register for all actions, or just
|
||||
for a single one. If an "add" action would be notified with a tuple of
|
||||
`["object", "add", id]` and a "delete" action with `["object", "delete",
|
||||
id]`, then an observing session can register with the pattern `["object",
|
||||
*]` to be notified for both actions, or just `["object", "add"]` to
|
||||
register only for additions.
|
||||
|
||||
Of course, this is only one way to implement a notification mechanism. There
|
||||
are also solutions that more accurately mimic the behaviour of the Observer
|
||||
pattern OOP in cases where that is desired.
|
||||
|
||||
## Commands
|
||||
|
||||
=head2 Commands
|
||||
|
||||
A I<command> is what I call something along the lines of one session telling an
|
||||
A _command_ is what I call something along the lines of one session telling an
|
||||
other session to do something. Suppose we have a session representing a file
|
||||
system. A command for this session could then be something like "delete file
|
||||
X".
|
||||
|
||||
In a sense, this isn't much different from a notification as described above.
|
||||
The file system session would have registered a pattern like
|
||||
C<["fs", "delete", *]>, where the wildcard is used for the file name. If an
|
||||
`["fs", "delete", *]`, where the wildcard is used for the file name. If an
|
||||
other session then wants to have a file deleted, the only thing it will have to
|
||||
do is send out a tuple matching that pattern, and the file system session will
|
||||
take care of deleting it.
|
||||
|
|
@ -183,25 +162,24 @@ In the above scenario, the session sending the command has no feedback
|
|||
whatsoever on whether the command has been successfully executed or not.
|
||||
Whether this is acceptable depends of course on the specific application. One
|
||||
way of still providing some form of feedback is to have the file system session
|
||||
send out a notification tuple, e.g. C<["fs", "deleted", "file"]> (Note that the
|
||||
second element is now C<deleted> rather than C<delete>. Using the same tuple
|
||||
send out a notification tuple, e.g. `["fs", "deleted", "file"]` (Note that the
|
||||
second element is now `deleted` rather than `delete`. Using the same tuple
|
||||
for actions and notifications is going to be very messy...). This way the
|
||||
session sending the command, in addition to any other sessions that happen to
|
||||
be interested in file deletion, will be notified of the deletion of the file.
|
||||
An alternative solution is to use the RPC-like method, as described below.
|
||||
|
||||
## RPC
|
||||
|
||||
=head2 RPC
|
||||
|
||||
L<RPC|http://en.wikipedia.org/wiki/Remote_procedure_cal> is in essence nothing
|
||||
else than providing an interface similar to a regular function call to a
|
||||
component that can't be reached via a regular function call (e.g. because the
|
||||
[RPC](https://en.wikipedia.org/wiki/Remote_procedure_call) is in essence
|
||||
nothing else than providing an interface similar to a regular function call to
|
||||
a component that can't be reached via a regular function call (e.g. because the
|
||||
object isn't inside the address space of the program). RPC is generally a
|
||||
request-response type of interaction, and making use of the return-path
|
||||
facility as I described earlier, all of the functionality of RPC is also
|
||||
available with the concept of tuple communication.
|
||||
|
||||
=head3 Commands, the RPC-way
|
||||
### Commands, the RPC-way
|
||||
|
||||
Take the previous file system example. Instead of just sending the command
|
||||
tuple to delete the file, the session could indicate that it is interested in
|
||||
|
|
@ -224,19 +202,16 @@ a notification tuple. Of course, it all depends on the application whether this
|
|||
is necessary, you only have to implement the functionality that is necessary
|
||||
for your purposes.
|
||||
|
||||
=head3 Requesting information
|
||||
### Requesting information
|
||||
|
||||
Another use of RPC, and thus also of the return-path, is to allow sessions to
|
||||
request information from each other. Using the same example again, the file
|
||||
system session could register for a pattern such as C<["fs", "list"]>. Upon
|
||||
system session could register for a pattern such as `["fs", "list"]`. Upon
|
||||
receiving a tuple matching that pattern, the session would send a list of all
|
||||
its files over the return-path. Other sessions can then request this list by
|
||||
simply sending out the right tuple and waiting for the replies.
|
||||
|
||||
|
||||
|
||||
|
||||
=head1 Advantages over other systems
|
||||
# Advantages over other systems
|
||||
|
||||
Now that I've hopefully convinced you that my communication concept is powerful
|
||||
enough to build applications with it, you may be wondering why you should use
|
||||
|
|
@ -246,10 +221,10 @@ systems. Let me present some of the inherent advantages that this system has
|
|||
compared to others, and why it will help in designing flexible and modular
|
||||
applications.
|
||||
|
||||
=head2 Loose coupling of components
|
||||
## Loose coupling of components
|
||||
|
||||
Sessions (representing the components of a system) do not have to have a lot of
|
||||
knowledge about each other. Sessions implicitly provide abstracted I<services>
|
||||
knowledge about each other. Sessions implicitly provide abstracted _services_
|
||||
using tuple communications, in much the same way as interfaces explicitly do in
|
||||
OOP.
|
||||
|
||||
|
|
@ -262,16 +237,16 @@ worry about how long a certain function call block the callers' thread. Since
|
|||
communication between the different sessions is completely asynchronous, these
|
||||
worries are gone.
|
||||
|
||||
=head2 Location independence
|
||||
## Location independence
|
||||
|
||||
Sessions can communicate with other sessions without knowing I<where> they are.
|
||||
Sessions can communicate with other sessions without knowing _where_ they are.
|
||||
This has as major advantage that a session can be moved around without having
|
||||
to change a single line of code in any of the sessions relying on its service.
|
||||
This allows sessions that communicate a lot with each other to be placed in the
|
||||
same process, while resource-heavy sessions may be distributed among several
|
||||
physical devices.
|
||||
|
||||
=head2 Programming language independence
|
||||
## Programming language independence
|
||||
|
||||
All communication is solely done with tuples, which can be represented as
|
||||
abstract objects and serialized and deserialized (or marshalled/unmarshalled,
|
||||
|
|
@ -290,7 +265,7 @@ sessions in a low-level language such as C. Similarly, it allows developers to
|
|||
hook into your application even when they are not familiar with your favorite
|
||||
programming language.
|
||||
|
||||
=head2 Easy debugging
|
||||
## Easy debugging
|
||||
|
||||
Not only can other applications and/or plugins hook into your application, you
|
||||
can also connect a simple debugger to the network. The debugger just has to
|
||||
|
|
@ -302,78 +277,67 @@ is being sent over a return-path is generally not visible to anyone but the
|
|||
receiver of the replies, although a network implementation might allow a
|
||||
debugging application to look into that as well.
|
||||
|
||||
|
||||
|
||||
=head1 Where to go from here
|
||||
# Where to go from here
|
||||
|
||||
What I've described above is nothing more than a bunch of ideas. To actually
|
||||
use this, there's a lot to be done.
|
||||
|
||||
=over
|
||||
Defining a "tuple"
|
||||
: What types can be used in tuples? Should a tuple have some maximum size or a
|
||||
maximum number of elements? Should a `NULL` type be included? What about a
|
||||
boolean type, why not use the integers 1 and 0 for that? Should it be possible
|
||||
to interchange binary data, or only UTF-8 strings?
|
||||
|
||||
=item Defining a "tuple"
|
||||
What will be the size of an integer that a session can reasonably assume to be
|
||||
available? Specifying something like "infinite" is going to be either
|
||||
inefficient in terms of memory and CPU overhead or will require extra overhead
|
||||
(in terms of code) in usage. Specifying that everything should fit in a 64bit
|
||||
integer is a lot more practical, but may be somewhat annoying to cope with in
|
||||
many dynamically typed languages running on 32bit architectures. Specifying
|
||||
that integers are 32bits will definitely ease the implementation of the network
|
||||
library in interpreted languages, but lowers the usefulness of the integer type
|
||||
and is still a pain to use in OCaml (which has 31bit integers).
|
||||
|
||||
What types can be used in tuples? Should a tuple have some maximum size or a
|
||||
maximum number of elements? Should a C<NULL> type be included? What about a
|
||||
boolean type, why not use the integers 1 and 0 for that? Should it be possible
|
||||
to interchange binary data, or only UTF-8 strings?
|
||||
These choices greatly affect the ease of implementing a networking library for
|
||||
specific programming languages and the ease of using the network to actually
|
||||
develop an application.
|
||||
|
||||
What will be the size of an integer that a session can reasonably assume to be
|
||||
available? Specifying something like "infinite" is going to be either
|
||||
inefficient in terms of memory and CPU overhead or will require extra overhead
|
||||
(in terms of code) in usage. Specifying that everything should fit in a 64bit
|
||||
integer is a lot more practical, but may be somewhat annoying to cope with in
|
||||
many dynamically typed languages running on 32bit architectures. Specifying
|
||||
that integers are 32bits will definitely ease the implementation of the network
|
||||
library in interpreted languages, but lowers the usefulness of the integer type
|
||||
and is still a pain to use in OCaml (which has 31bit integers).
|
||||
The exact semantics of matching
|
||||
: Somewhat similar to the previous point, the semantics of matching tuples with
|
||||
patterns should also be defined in some way. Some related questions are whether
|
||||
values of different types may be equivalent. For example, is the string
|
||||
`"1234"` equivalent to an integer with that value? What about NULL and/or
|
||||
boolean types? If there is a floating point type, you probably won't need exact
|
||||
matching on those values (floating points are too imprecise for that anyway),
|
||||
but you might still want the floating point number `10.0` to match the integer
|
||||
`10` to ease the use in dynamic languages where the distinction between
|
||||
integer and float is blurred.
|
||||
|
||||
These choices greatly affect the ease of implementing a networking library for
|
||||
specific programming languages and the ease of using the network to actually
|
||||
develop an application.
|
||||
Defining the protocol(s)
|
||||
: Making my vision of modularity and ease of use a reality requires that any
|
||||
session can easily communicate with an other session, even if they have a
|
||||
vastly different implementation. To do this, we need a protocol to connect
|
||||
multiple processes together, whether they run on a local machine or over a
|
||||
physical network.
|
||||
|
||||
=item The exact semantics of matching
|
||||
|
||||
Somewhat similar to the previous point, the semantics of matching tuples with
|
||||
patterns should also be defined in some way. Some related questions are whether
|
||||
values of different types may be equivalent. For example, is the string
|
||||
C<"1234"> equivalent to an integer with that value? What about NULL and/or
|
||||
boolean types? If there is a floating point type, you probably won't need exact
|
||||
matching on those values (floating points are too imprecise for that anyway),
|
||||
but you might still want the floating point number C<10.0> to match the integer
|
||||
C<10> to ease the use in dynamic languages where the distinction between
|
||||
integer and float is blurred.
|
||||
|
||||
=item Defining the protocol(s)
|
||||
|
||||
Making my vision of modularity and ease of use a reality requires that any
|
||||
session can easily communicate with an other session, even if they have a
|
||||
vastly different implementation. To do this, we need a protocol to connect
|
||||
multiple processes together, whether they run on a local machine or over a
|
||||
physical network.
|
||||
|
||||
=item Coding the stuff
|
||||
|
||||
Obviously, all of this remains as a mere concept if nothing ever gets
|
||||
implemented. Easy-to-use libraries are needed for several programming
|
||||
languages. And more importantly, actual applications will have to be developed
|
||||
using these libraries.
|
||||
|
||||
=back
|
||||
Coding the stuff
|
||||
: Obviously, all of this remains as a mere concept if nothing ever gets
|
||||
implemented. Easy-to-use libraries are needed for several programming
|
||||
languages. And more importantly, actual applications will have to be developed
|
||||
using these libraries.
|
||||
|
||||
Of course, realizing all of the above is an iterative process. You can't write
|
||||
an implementation without knowing what data types a tuple is made of, but it is
|
||||
equally impossible to determine the exact definition of a tuple without having
|
||||
experienced with an actual implementation.
|
||||
|
||||
|
||||
=head2 What's the plan?
|
||||
## What's the plan?
|
||||
|
||||
I've been working on documenting the basics of the semantics and the
|
||||
point-to-point communication protocol, and have started on an early
|
||||
implementation in the Go programming language to experiment with. I've dubbed
|
||||
the project B<Tanja>, and have published my progress on a
|
||||
L<git repo|http://g.blicky.net/tanja.git/>.
|
||||
the project **Tanja**, and have published my progress on a
|
||||
[git repo](https://g.blicky.net/tanja.git/).
|
||||
|
||||
My intention is to also write implementations for C and Perl, experiment with
|
||||
that, and see if I can refine the semantics to make this concept one that is
|
||||
|
|
@ -389,5 +353,5 @@ how things work out.
|
|||
In either case, if this article managed to get you interested in this concept
|
||||
or in project Tanja, and you have any questions, feedback or (gasp!) feel like
|
||||
helping out, don't hesitate to contact me! I'm available as 'Yorhel' on Direct
|
||||
Connect at C<adc://blicky.net:2780> and IRC at C<irc.synirc.net>, or just drop
|
||||
me a mail at C<projects@yorhel.nl>.
|
||||
Connect at `adc://blicky.net:2780` and IRC at `irc.synirc.net`, or just drop me
|
||||
a mail at `projects@yorhel.nl`.
|
||||
|
|
@ -1,25 +1,21 @@
|
|||
Some Measurements on Direct Connect File Lists
|
||||
% Some Measurements on Direct Connect File Lists
|
||||
|
||||
=pod
|
||||
(Published on **2014-01-09**)
|
||||
|
||||
(Published on B<2014-01-09>.)
|
||||
|
||||
=head1 Introduction
|
||||
# Introduction
|
||||
|
||||
I've been working on Direct Connect related projects for a while now. This
|
||||
includes maintaining L<ncdc|https://dev.yorhel.nl/ncdc> and
|
||||
L<Globster|https://dev.yorhel.nl/globster>, and doing a bit of research into
|
||||
improving the downloading performance and scalability (to be published at some
|
||||
later date). Whether I'm writing code or trying to setup experiments for
|
||||
research, there's one thing that helps a lot in making decisions. Measurements
|
||||
from an actual network.
|
||||
includes maintaining [ncdc](/ncdc) and [Globster](/globster), and doing a bit
|
||||
of research into improving the downloading performance and scalability (to be
|
||||
published at some later date). Whether I'm writing code or trying to setup
|
||||
experiments for research, there's one thing that helps a lot in making
|
||||
decisions. Measurements from an actual network.
|
||||
|
||||
Because useful measurements are often missing, I decided to do some myself.
|
||||
There's a lot to measure in an actual P2P network, but I restricted myself to
|
||||
information that can be gathered quite easily from file lists.
|
||||
|
||||
|
||||
=head1 Obtaining the Data
|
||||
# Obtaining the Data
|
||||
|
||||
Different hubs will likely have totally different patterns in terms of what is
|
||||
being shared. In order to keep this experiment simple, I limited myself to a
|
||||
|
|
@ -40,7 +36,7 @@ the evening.
|
|||
|
||||
One thing I learned from this experience was that the downloading algorithm in
|
||||
ncdc (1.18.1) does not scale particularly well. Every 60 seconds, it would try
|
||||
to open a connection with B<all> users listed in the download queue. You can
|
||||
to open a connection with **all** users listed in the download queue. You can
|
||||
imagine that trying to connect to 11k users simultaneously put a significantly
|
||||
heavier load on the hub than would have been necessary. Not good. Not something
|
||||
a well-behaving netizen would do. Surprisingly enough, the hub didn't seem to
|
||||
|
|
@ -50,7 +46,7 @@ are typically not the most busy days in P2P land. Weekends tend to be busier.
|
|||
Despite that scalability issue, I successfully managed to download the file
|
||||
lists of almost everyone who remained online for long enough to finally get
|
||||
their list downloaded. In total I managed to download 14143 file lists (that's
|
||||
one list too many for C<10000*sqrt(2)>, I should have stopped the process a bit
|
||||
one list too many for `10000*sqrt(2)`, I should have stopped the process a bit
|
||||
earlier). The total bzip2-compressed size of these lists is 6.5 GiB.
|
||||
|
||||
For obvious reasons, I won't be sharing my modifications to ncdc. I already
|
||||
|
|
@ -58,15 +54,14 @@ tarnished the reputation of ncdc enough in that single day. If you wish to
|
|||
repeat this experiment, please do so with a scalable downloading
|
||||
implementation. :-)
|
||||
|
||||
|
||||
=head1 Obtaining the Stats
|
||||
# Obtaining the Stats
|
||||
|
||||
And then comes the challenge of aggregating statistics on 6.5 GiB of compressed
|
||||
XML files. This didn't really sound like much of a challenge. After all, all
|
||||
one needs to do is decompress the file lists, do some XML parsing and update
|
||||
some values. Most of the CPU time in this process would likely be spent on
|
||||
bzip2 decompression, so I figured I'd just pipe the output of L<bzcat(1)> to a
|
||||
Perl script and be done with it.
|
||||
bzip2 decompression, so I figured I'd just pipe the output of
|
||||
[bzcat(1)](https://manned.org/bzcat) to a Perl script and be done with it.
|
||||
|
||||
To get the statistics on the sizes and the distribution of unique files, a data
|
||||
structure containing information on all unique files in the lists was
|
||||
|
|
@ -81,73 +76,73 @@ solution was needed.
|
|||
When faced with such a problem, some people will try to optimize the algorithm,
|
||||
others will throw extra hardware at it, and I did what I do best: Optimize away
|
||||
the constants. That is, I rewrote the data analysis program in C. Using the
|
||||
excellent L<khash|https://github.com/attractivechaos/klib> hash table library
|
||||
to keep track of the file information and the equally awesome
|
||||
L<yxml|https://dev.yorhel.nl/yxml> library (a little bit of self-promotion
|
||||
doesn't hurt, right?) to do the XML parsing, I was able to do all the necessary
|
||||
processing in 30 minutes using at most 3.6GB of RAM.
|
||||
excellent [khash](https://github.com/attractivechaos/klib) hash table library
|
||||
to keep track of the file information and the equally awesome [yxml](/yxml)
|
||||
library (a little bit of self-promotion doesn't hurt, right?) to do the XML
|
||||
parsing, I was able to do all the necessary processing in 30 minutes using at
|
||||
most 3.6GB of RAM.
|
||||
|
||||
Long story short, here's my analysis program:
|
||||
L<dcfilestats.c|http://g.blicky.net/dcstats.git/tree/dcfilestats.c>.
|
||||
[dcfilestats.c](https://g.blicky.net/dcstats.git/tree/dcfilestats.c).
|
||||
|
||||
|
||||
=head1 A Look at the Stats
|
||||
# A Look at the Stats
|
||||
|
||||
Some lists didn't decompress/parse correctly, so the actual number of file
|
||||
lists used in these stats is B<14137>. The total compressed size of these lists
|
||||
is B<6,945,269,469> bytes (6.5 GiB), and uncompressed B<25,533,519,352> bytes
|
||||
(24 GiB). In total these lists mentioned B<197,413,253> files. After taking
|
||||
duplicate listings in account, there's still B<84,131,932> unique files.
|
||||
lists used in these stats is **14137**. The total compressed size of these
|
||||
lists is **6,945,269,469** bytes (6.5 GiB), and uncompressed **25,533,519,352**
|
||||
bytes (24 GiB). In total these lists mentioned **197,413,253** files. After
|
||||
taking duplicate listings in account, there's still **84,131,932** unique
|
||||
files.
|
||||
|
||||
And now for some graphs...
|
||||
|
||||
=head2 Size of the File Lists
|
||||
## Size of the File Lists
|
||||
|
||||
Behold, the compressed and uncompressed size of the downloaded file lists:
|
||||
|
||||
[img graph dclistsize.png ]
|
||||

|
||||
|
||||
Nothing too surprising here, I guess. 100 KiB seems to be a common size for a
|
||||
compressed file lists, but lists of 1 MiB aren't too weird, either. The largest
|
||||
file list in this set is 34.8 MiB compressed and 120 MiB uncompressed. The
|
||||
uncompressed size of a list tends to be (*gasp*) a bit larger, but we can't
|
||||
uncompressed size of a list tends to be (\*gasp\*) a bit larger, but we can't
|
||||
easily infer the compression ratio from this graph. Hence, another graph:
|
||||
|
||||
[img graph dclistcomp.png ]
|
||||

|
||||
|
||||
Most file lists compress to about 24% - 35% of their original size. This seems
|
||||
to be consistent with L<similar
|
||||
measurements|http://forum.dcbase.org/viewtopic.php?f=18&t=667> done in 2010.
|
||||
to be consistent with [similar
|
||||
measurements](http://forum.dcbase.org/viewtopic.php?f=18&t=667) done in 2010.
|
||||
|
||||
The raw data for these graphs is found in
|
||||
L<dclistsize|http://g.blicky.net/dcstats.git/tree/dclistsize>, which lists the
|
||||
[dclistsize](https://g.blicky.net/dcstats.git/tree/dclistsize), which lists the
|
||||
compressed and uncompressed size, respectively, for each file list. The gnuplot
|
||||
script for the first graph is
|
||||
L<dclistsize.plot|http://g.blicky.net/dcstats.git/tree/dclistsize.plot> and
|
||||
L<dclistcomp.plot|http://g.blicky.net/dcstats.git/tree/dclistcomp.plot> for the
|
||||
[dclistsize.plot](https://g.blicky.net/dcstats.git/tree/dclistsize.plot) and
|
||||
[dclistcomp.plot](https://g.blicky.net/dcstats.git/tree/dclistcomp.plot) for the
|
||||
second.
|
||||
|
||||
=head2 Number of Files Per List
|
||||
## Number of Files Per List
|
||||
|
||||
So how many files are people sharing? Let's find out.
|
||||
|
||||
[img graph dcnumfiles.png ]
|
||||

|
||||
|
||||
As expected, this graph looks very similar to the one about the size of the
|
||||
file list. The size of a list tends to be linear in the number of items it
|
||||
holds, after all.
|
||||
|
||||
The raw data for this graph is found in
|
||||
L<dcnumfiles|http://g.blicky.net/dcstats.git/tree/dcnumfiles>, which lists the
|
||||
[dcnumfiles](https://g.blicky.net/dcstats.git/tree/dcnumfiles), which lists the
|
||||
unique and total number of files, respectively, for each file list. The gnuplot
|
||||
script is
|
||||
L<dcnumfiles.plot|http://g.blicky.net/dcstats.git/tree/dcnumfiles.plot>.
|
||||
[dcnumfiles.plot](https://g.blicky.net/dcstats.git/tree/dcnumfiles.plot).
|
||||
|
||||
=head2 File Sizes
|
||||
## File Sizes
|
||||
|
||||
And how large are the files being shared? Well,
|
||||
|
||||
[img graph dcfilesize.png ]
|
||||

|
||||
|
||||
This graph is fun, and rather hard to explain without knowing what kind of
|
||||
files we're dealing with. I'm not going to do any further analysis on what kind
|
||||
|
|
@ -169,25 +164,25 @@ information in the file lists, but I don't expect the number of fake files to
|
|||
be very significant.
|
||||
|
||||
The "raw" data for this graph is found in
|
||||
L<dcfilesize|http://g.blicky.net/dcstats.git/tree/dcfilesize>. Because I wasn't
|
||||
[dcfilesize](https://g.blicky.net/dcstats.git/tree/dcfilesize). Because I wasn't
|
||||
interested in dealing with a text file of 84 million lines, the data is already
|
||||
binned. The first column is the bin number and the second column the number of
|
||||
unique files in that bin. The file sizes that each bin represents are between
|
||||
C<2^(bin+9)> and C<2^(bin+10)>, with the exception of bin 0, which starts at a
|
||||
`2^(bin+9)` and `2^(bin+10)`, with the exception of bin 0, which starts at a
|
||||
file size of 0. The source of the gnuplot script is
|
||||
L<dcfilesize.plot|http://g.blicky.net/dcstats.git/tree/dcfilesize.plot>.
|
||||
[dcfilesize.plot](https://g.blicky.net/dcstats.git/tree/dcfilesize.plot).
|
||||
|
||||
=head2 Distribution of Files
|
||||
## Distribution of Files
|
||||
|
||||
Another interesting thing to measure is how often files are shared. That is,
|
||||
how many users have the same file?
|
||||
|
||||
[img graph dcfiledist.png ]
|
||||

|
||||
|
||||
Many files are only available from a single user. That's not really a good sign
|
||||
when you wish to download such a file, but luckily there are also tons of files
|
||||
that I<are> available from multiple users. What is interesting in this graph
|
||||
isn't that it follows the L<power law|https://en.wikipedia.org/wiki/Power_law>,
|
||||
that _are_ available from multiple users. What is interesting in this graph
|
||||
isn't that it follows the [power law](https://en.wikipedia.org/wiki/Power_law),
|
||||
but it's wondering what those outliers could possibly be. There's a collection
|
||||
of 269 files that has been shared among 831 users, and there appears to be a
|
||||
similar group of around 510-515 files that is shared among 20 or so users. I've
|
||||
|
|
@ -201,12 +196,11 @@ the empty file. There are so many ways to get an empty file somewhere in your
|
|||
filesystem, after all.
|
||||
|
||||
The raw data for this graph is found in
|
||||
L<dcfiledist|http://g.blicky.net/dcstats.git/tree/dcfiledist>, which lists the
|
||||
[dcfiledist](https://g.blicky.net/dcstats.git/tree/dcfiledist), which lists the
|
||||
number of times shared and the aggregate number of files. The gnuplot script is
|
||||
L<dcfiledist.plot|http://g.blicky.net/dcstats.git/tree/dcfiledist.plot>.
|
||||
[dcfiledist.plot](https://g.blicky.net/dcstats.git/tree/dcfiledist.plot).
|
||||
|
||||
|
||||
=head1 Final Notes
|
||||
# Final Notes
|
||||
|
||||
So, erm, what conclusions can we draw from this? That stats are fun, I guess.
|
||||
If anyone (including me) is going to repeat this experiment on a fresh data
|
||||
|
|
@ -218,5 +212,4 @@ Furthermore, keep in mind that this is just a snapshot of a single day on a
|
|||
single hub. The graphs may look very different when the file lists are
|
||||
harvested at some other time. And it's also quite likely that different hubs
|
||||
will have very different share profiles. It could be interesting to try and
|
||||
graph everything, but I don't have I<that> kind of free time.
|
||||
|
||||
graph everything, but I don't have _that_ kind of free time.
|
||||
632
dat/doc/easyipc.md
Normal file
632
dat/doc/easyipc.md
Normal file
|
|
@ -0,0 +1,632 @@
|
|||
% The Sorry State of Convenient IPC
|
||||
|
||||
(Published on **2014-07-29**)
|
||||
|
||||
# The Problem
|
||||
|
||||
How do you implement communication between two or more processes? This is a
|
||||
question that has been haunting me for at least 6 years now. Of course, this
|
||||
question is very broad and has many possible answers, depending on your
|
||||
scenario. So let me get more specific by describing the problem I want to
|
||||
solve.
|
||||
|
||||
What I want is to write a daemon process that runs in the background and can be
|
||||
controlled from other programs or libraries. The intention is that people can
|
||||
easily write custom interfaces or quick scripts to control the daemon. The
|
||||
service that the daemon offers over this communication channel can be thought
|
||||
of as its primary API, in this way you can think of the daemon as a persistent
|
||||
programming library. This concept is similar to existing programs such as
|
||||
[btpd](https://github.com/btpd/btpd), [MPD](http://www.musicpd.org/),
|
||||
[Transmission](https://www.transmissionbt.com/) and
|
||||
[Telepathy](http://telepathy.freedesktop.org/wiki/) - I'll get back to these
|
||||
later.
|
||||
|
||||
More specifically, the most recent project I've been working on that follows
|
||||
this pattern is [Globster](/globster), a remotely controllable Direct Connect
|
||||
client (if you're not familiar with Direct Connect, think of it as IRC with
|
||||
some additional file sharing capabilities built in). While the problem I
|
||||
describe is not specific to Globster, it still serves as an important use case.
|
||||
I see many other projects with similar IPC requirements.
|
||||
|
||||
The IPC mechanism should support two messaging patterns: Request/response and
|
||||
asynchronous notifications. The request/response pattern is what you typically
|
||||
get in RPC systems - the client requests something of the daemon and the daemon
|
||||
then replies with a response. Asynchronous notifications are useful in allowing
|
||||
the daemon to send asynchronous status updates to the client, such as incoming
|
||||
chat messages or file transfer status. Lack of support for such notifications
|
||||
would mean that a client needs to continuously poll for updates, which is
|
||||
inefficient.
|
||||
|
||||
So what I'm looking for is a high-level IPC mechanism that handles this
|
||||
communication. Solutions are evaluated by the following criteria, in no
|
||||
particular order.
|
||||
|
||||
**Easy**
|
||||
: And with _easy_ I refer to _ease of use_. As mentioned above, other people
|
||||
should be able to write applications and scripts to control the daemon. Not
|
||||
many people are willing to invest days of work just to figure out how to
|
||||
communicate with the daemon.
|
||||
|
||||
**Simple**
|
||||
: Simplicity refers to the actual protocol and the complexity of the code
|
||||
necessary to implement it. Complex protocols require complex code, and complex
|
||||
code is hard to maintain and will inevitably contain bugs. Note that _simple_
|
||||
and _easy_ are very different things and often even conflict with each other.
|
||||
|
||||
**Small**
|
||||
: The IPC implementation shouldn't be too large, and shouldn't depend on huge
|
||||
libraries. If you need several megabytes worth of libraries just to send a few
|
||||
messages over a socket, you're doing it wrong.
|
||||
|
||||
**Language independent**
|
||||
: Control the daemon with whatever programming language you're familiar with.
|
||||
|
||||
**Networked**
|
||||
: A good solution should be accessible from both the local system (daemon running
|
||||
on the same machine as the client) and from the network (daemon and client
|
||||
running different machines).
|
||||
|
||||
**Secure**
|
||||
: There's three parts in having a secure IPC mechanism. One part is to realize
|
||||
that IPC operates at a _trust boundary_; The daemon can't blindly trust
|
||||
everything the client says and vice versa, so message validation and other
|
||||
mechanisms to prevent DoS or information disclosure on either part are
|
||||
necessary.
|
||||
|
||||
Then there the matter of _confidentiality_. On a local system, UNIX sockets
|
||||
will provide all the confidentiality you can get, so that's trivial. Networked
|
||||
access, on the other hand, requires some form of transport layer security.
|
||||
|
||||
And finally, we need some form of _authentication_. There should be some
|
||||
mechanism to prevent just about anyone to connect to the daemon. A
|
||||
coarse-grained solution such as file permissions on a local UNIX socket or a
|
||||
password-based approach for networked access will do just fine for most
|
||||
purposes. Really, just keep it simple.
|
||||
|
||||
**Fast**
|
||||
: Although performance isn't really a primary goal, the communication between the
|
||||
daemon and the clients shouldn't be too slow or heavyweight. For my purposes,
|
||||
anything that supports about a hundred messages a second on average hardware
|
||||
will do perfectly fine. And that shouldn't be particularly hard to achieve.
|
||||
|
||||
**Proxy support**
|
||||
: This isn't really a hard requirement either, but it would be nice to allow
|
||||
other processes (say, plugins of the daemon, or clients connecting to the
|
||||
daemon) to export services over the same IPC channel as the main daemon. This
|
||||
is especially useful in implementing a cross-language plugin architecture. But
|
||||
again, not a hard requirement, because even if the IPC mechanism doesn't
|
||||
directly support proxying, it's always possible for the daemon to implement
|
||||
some custom APIs to achieve the same effect. This, however, requires extra work
|
||||
and may not be as elegant as a built-in solution.
|
||||
|
||||
Now let's discuss some existing solutions...
|
||||
|
||||
# Custom Protocol
|
||||
|
||||
Why use an existing IPC mechanism in the first place when all you need is
|
||||
UNIX/TCP sockets? This is the approach taken by
|
||||
[btpd](https://github.com/btpd/btpd), [MPD](http://www.musicpd.org/)
|
||||
([protocol spec](http://www.musicpd.org/doc/protocol/index.html)) and older
|
||||
versions of Transmission (see their [1.2x
|
||||
spec](https://trac.transmissionbt.com/browser/branches/1.2x/doc/ipcproto.txt)).
|
||||
Brpd hasn't taken the time to documented the protocol format, suggesting it's
|
||||
not really intended to be used as a convenient API (other than through their
|
||||
btcli), and Transmission has since changed to a different protocol. I'll mainly
|
||||
focus on MPD here.
|
||||
|
||||
MPD uses a text-based request/response mechanism, where each request is a
|
||||
simple one-line command and a response consists of one or more lines, ending
|
||||
with an `OK` or `ACK` line. There's no support for asynchronous
|
||||
notifications, although that could obviously have been implemented, too. Let's
|
||||
grade this protocol...
|
||||
|
||||
**Easy?** Not really.
|
||||
: Although MPD has conventions for how messages are formatted, each individual
|
||||
message still requires custom parsing and validation. This can be automated by
|
||||
designing an
|
||||
[IDL](https://en.wikipedia.org/wiki/Interface_description_language) and
|
||||
accompanying code generator, but writing one specific for a single project
|
||||
doesn't seem like a particularly fun task.
|
||||
|
||||
The protocol, despite its apparent simplicity, is apparently painful enough to
|
||||
use that there is a special _libmpdclient_ library to abstract away the
|
||||
communication with MPD, and interfaces to this library are available in many
|
||||
programming languages. If you have access to such an application-specific
|
||||
library for your language of choice, then sure, using the IPC mechanism is easy
|
||||
enough. But that applies to literally any IPC mechanism.
|
||||
|
||||
Ideally, such a library needs to be written only once for the IPC mechanism in
|
||||
use, and after that no additional code is needed to communicate with
|
||||
services/daemons using that particular IPC mechanism. Code re-use among
|
||||
different projects is great, yo. It also doesn't scale very well when extending
|
||||
the services offered by daemon, any addition to the API will require
|
||||
modifications to all implementations.
|
||||
|
||||
**Simple?** Definitely.
|
||||
: I only needed a quick glance at the MPD protocol reference and I was able to
|
||||
play a bit with telnet and control my MPD. Writing an implementation doesn't
|
||||
seem like a complex task. Of course, this doesn't necessarily apply to all
|
||||
custom protocols, but you can make it as simple or complex as you want it to
|
||||
be.
|
||||
|
||||
**Small?** Sure.
|
||||
: This obviously depends on how elaborate you design your protocol. If you have a
|
||||
large or complex API, the size of a generic message parser and validator can
|
||||
easily compensate for the custom parser and validator needed for each custom
|
||||
message. But for a simple APIs, it's hard to beat a custom protocol in terms of
|
||||
size.
|
||||
|
||||
**Language independent?** Depends.
|
||||
: Of course, a socket library is available to most programming languages, and in
|
||||
that sense any IPC mechanism built on sockets is language independent. This is,
|
||||
as such, more of an argument as to how convenient it is to communicate with the
|
||||
protocol directly rather than with a library that abstracts the protocol away.
|
||||
In the case of MPD, the text-based protocol seems easy enough to use directly
|
||||
from most languages, yet for some reason most people prefer language-specific
|
||||
libraries for MPD.
|
||||
|
||||
If you design a binary protocol or anything more complex than simple
|
||||
request/response message types, using your protocol directly is going to be a
|
||||
pain in certain languages, and people will definitely want a library specific
|
||||
to your daemon for their favourite programming language. Something you'll want
|
||||
to avoid, I suppose.
|
||||
|
||||
**Networked?** Sure enough.
|
||||
: Just a switch between UNIX sockets and TCP sockets. Whether a simple solution
|
||||
like that is a good idea, however, depends on the next point...
|
||||
|
||||
**Secure?** Ugh.
|
||||
: Security is hard to get right, so having an existing infrastructure that takes
|
||||
care of most security sensitive features will help a lot. Implementing your own
|
||||
protocol means that you also have to implement your own security, to some
|
||||
extent at least.
|
||||
|
||||
Writing code to parse and validate custom messages is error-prone, and a bug in
|
||||
this code could make both the daemon and the client vulnerable to crashes and
|
||||
buffer overflows. A statically-typed abstraction that handles parsing and
|
||||
validation would help a lot.
|
||||
|
||||
For networked communication, you'll need some form of confidentiality. MPD does
|
||||
not seem to support this, so any networked access to an MPD server is
|
||||
vulnerable to passive observers and MITM attacks. This may be fine for a local
|
||||
network (presumably what it is intended to be used for), but certainly doesn't
|
||||
work for exposing your MPD control interface to the wider internet. Existing
|
||||
protocols such as TLS or SSH can be used to create a secure channel, but these
|
||||
libraries tend to be large and hard to use securely. This is especially true
|
||||
for TLS, but at least there's [stunnel](https://www.stunnel.org/) to simplify
|
||||
the implementation - at the cost of less convenient deployment.
|
||||
|
||||
In terms of authentication, you again need to implement this yourself. MPD
|
||||
supports authentication using a plain-text password. This is fine for a trusted
|
||||
network, but on an untrusted network you certainly want confidentiality to
|
||||
prevent a random observer from reading your password.
|
||||
|
||||
**Fast?** Sure.
|
||||
: Existing protocols may have put more effort into profiling and implementing
|
||||
various optimizations than one would typically do with a custom and
|
||||
quickly-hacked-together protocol, but still, it probably takes effort to design
|
||||
a protocol that isn't fast enough.
|
||||
|
||||
**Proxy support?** Depends...
|
||||
: Really depends on how elaborate you want to be. It can be very simple if all
|
||||
you want is to route some messages, it can get very complex if you want to
|
||||
ensure that these messages follow some format or if you want to reserve certain
|
||||
interfaces or namespaces to certain clients. What surprised me about the MPD
|
||||
protocol is that it actually has [some support for
|
||||
proxying](http://www.musicpd.org/doc/protocol/ch03s11.html). But considering the
|
||||
ad-hoc nature of the MPD protocol, the primitiveness and simplicity of this
|
||||
proxy support wasn't too surprising. Gets the job done, I suppose.
|
||||
|
||||
Overall, and as a rather obvious conclusion, a custom protocol really is what
|
||||
you make of it. In general, though, it's a lot of work, not always easy to use,
|
||||
and a challenge to get the security part right.
|
||||
|
||||
# D-Bus
|
||||
|
||||
D-Bus is being used in [Transmission](https://www.transmissionbt.com/) and is
|
||||
what I used for [Globster](/globster).
|
||||
|
||||
On a quick glance, D-Bus looks _perfect_. It is high-level, has the messaging
|
||||
patterns I described, the [protocol
|
||||
specification](http://dbus.freedesktop.org/doc/dbus-specification.html) does
|
||||
not seem _overly_ complex (though certainly could be simplified), it has
|
||||
implementations for a number of programming languages, has support for
|
||||
networking, proxying is part of normal operation, and it seems fast enough for
|
||||
most purposes. When you actually give it a closer look, however, reality isn't
|
||||
as rose-colored.
|
||||
|
||||
D-Bus is designed for two very specific use-cases. One is to allow local
|
||||
applications to securely interact with system-level daemons such as
|
||||
[HAL](https://en.wikipedia.org/wiki/HAL_\(software\)) (now long dead) and
|
||||
[systemd](http://freedesktop.org/wiki/Software/systemd/), and the other
|
||||
use-case is to allow communication between different applications inside one
|
||||
login session. As such, on a typical Linux system there are two D-Bus daemons
|
||||
where applications can export interfaces and where messages can be routed
|
||||
through. These are called the _system bus_ and the _session bus_.
|
||||
|
||||
**Easy?** Almost.
|
||||
: The basic ideas behind D-Bus seem easy enough to use. The fact that is has
|
||||
type-safe messages, interface descriptions and introspection really help in
|
||||
making D-Bus a convenient IPC mechanism.
|
||||
|
||||
The main reasons why I think D-Bus isn't all that easy to use in practice is
|
||||
due to the lack of good introductionary documentation and the crappy state of
|
||||
the various D-Bus implementations. There is a [fairly good
|
||||
article](https://pythonhosted.org/txdbus/dbus_overview.html) providing a
|
||||
high-level overview to D-Bus, but there isn't a lot of material that covers how
|
||||
to actually use D-Bus to interact with applications or to implement a service.
|
||||
|
||||
On the implementations, I have had rather bad experiences with the actual
|
||||
libraries. I've personally used the official libdbus-1, which markets itself a
|
||||
"low-level" library designed to facilitate writing bindings for other
|
||||
languages. In practice, the functionality that it offers appears to be too
|
||||
high-level for writing bindings ([GDBus](https://developer.gnome.org/glib/)
|
||||
doesn't use it for this reason), and it is indeed missing a lot of
|
||||
functionality to make it convenient to use directly. I've also played around
|
||||
with Perl's [Net::DBus](http://search.cpan.org/perldoc?Net%3A%3ADBus) and was
|
||||
highly disappointed. Not only is the documentation rather incomplete, the
|
||||
actual implementation has more bugs than features. And instead of building on
|
||||
top of one of the many good event loops for Perl (such as
|
||||
[AnyEvent](http://search.cpan.org/perldoc?AnyEvent)), it chooses to implement
|
||||
[its own event
|
||||
loop](http://search.cpan.org/perldoc?Net%3A%3ADBus%3A%3AReactor). The existence
|
||||
of several different libraries for Python doesn't incite much confidence,
|
||||
either.
|
||||
|
||||
I was also disappointed in terms of the available tooling to help in the
|
||||
development, testing and debugging of services. The [gdbus(1)](http://man.he.net/man1/gdbus) tool is useful
|
||||
for monitoring messages and scripting some things, but is not all that
|
||||
convenient because D-Bus has too many namespaces and the terrible Java-like
|
||||
naming conventions make typing everything out a rather painful experience.
|
||||
[D-Feet](http://live.gnome.org/DFeet/) offers a great way to explore services,
|
||||
but lacks functionality for quick debugging sessions. I [made an
|
||||
attempt](http://g.blicky.net/dbush.git/) to write a convenient command-line
|
||||
shell, but lost interest halfway. :-(
|
||||
|
||||
D-Bus has the potential to be an easy and convenient IPC mechanism, but the
|
||||
lack of any centralized organization to offer good implementations,
|
||||
documentation and tooling makes using D-Bus a pain to use.
|
||||
|
||||
**Simple?** Not quite.
|
||||
|
||||
: D-Bus is conceptually easy and the message protocol is alright, too. Some
|
||||
aspects of D-Bus, however, are rather more complex than they need to be.
|
||||
|
||||
I have once made an attempt to fully understand how D-Bus discovers and
|
||||
connects to the session bus, but I gave up halfway because there are too
|
||||
many special cases. To quickly summarize what I found, there's the
|
||||
`DBUS_SESSION_BUS_ADDRESS` environment variable which could point to the
|
||||
(filesystem or abstract) path of a UNIX socket or a TCP address. If that
|
||||
variable isn't set, D-Bus will try to connect to your X server and get the
|
||||
address from that. In order to avoid linking everything against X
|
||||
libraries, a separate [dbus-launch](https://metacpan.org/pod/dbus-launch)
|
||||
utility is spawned instead. Then the bus address could also be obtained
|
||||
from a file in your `$HOME/.dbus/` directory, with added complexity to
|
||||
still support a different session bus for each X session. I've no idea how
|
||||
exactly connection initiation to the system bus works, but my impression is
|
||||
that a bunch of special cases exist there, too, depending on which init
|
||||
system your OS happens to use.
|
||||
|
||||
As if all the options in connection initiation aren't annoying enough,
|
||||
there's also work on [kdbus](https://lwn.net/Articles/580194/), a Linux
|
||||
kernel implementation to get better performance. Not only will kdbus use a
|
||||
different underlying communication mechanism, it will also switch to a
|
||||
completely different serialization format. If/when this becomes widespread
|
||||
you will have to implement and support two completely different protocols
|
||||
and pray that your application works with both.
|
||||
|
||||
On the design aspect there is, in my opinion, needless complexity with
|
||||
regards to naming and namespaces. First there is a global namespace for
|
||||
_bus names_, which are probably better called _application names_, because
|
||||
that's usually what they represent. Then, there is a separate _object_
|
||||
namespace local to each bus name. Each object has methods and properties,
|
||||
and these are associated with an _interface name_, in a namespace specific
|
||||
to the particular object. Despite these different namespaces, the
|
||||
convention is to use a full and globally unique path for everything that
|
||||
has a name. For example, to list the IM protocols that Telepathy supports,
|
||||
you call the `ListProtocols` method in the
|
||||
`org.freedesktop.Telepathy.ConnectionManager` interface on the
|
||||
`/org/freedesktop/Telepathy/ConnectionManager` object at the
|
||||
`org.freedesktop.Telepathy` bus. Fun times indeed. I can understand the
|
||||
reasoning behind most of these choices, but in my opinion they found the
|
||||
wrong trade-off.
|
||||
|
||||
Another point of complexity that annoys me is the fact that an XML format
|
||||
is used to describe interfaces. Supporting XML as an IDL format is alright,
|
||||
but requiring a separate format for an introspection interface gives me the
|
||||
impression that the message format wasn't powerful enough for such a simple
|
||||
purpose. The direct effect of this is that any application wishing to use
|
||||
introspection data will have to link against an XML parser, and almost all
|
||||
conforming XML parser implementations are as large as the D-Bus
|
||||
implementation itself.
|
||||
|
||||
**Small?** Kind of.
|
||||
: `libdbus-1.so.3.8.6` on my system is about 240 KiB. It doesn't cover parsing
|
||||
interface descriptions or implementing a D-Bus daemon, but still covers most of
|
||||
what is needed to interact with services and to offer services over D-Bus.
|
||||
It's not _that_ small, but then again, libdbus-1 was not really written with
|
||||
small size in mind. There's room for optimization.
|
||||
|
||||
**Language independent?** Sure.
|
||||
: D-Bus libraries exist for a number of programming languages.
|
||||
|
||||
**Networked?** Half-assed.
|
||||
: D-Bus _officially_ supports networked connections to a D-Bus daemon. Actually
|
||||
using this, however, is painful. Convincing
|
||||
[dbus-daemon(1)](http://man.he.net/man1/dbus-daemon) to accept connections
|
||||
on a TCP socket involves disabling all authentication (it expects UNIX
|
||||
credential passing, normally) and requires adding an undocumented
|
||||
`<allow_anonymous/>` tag in the configuration (I only figured this out from
|
||||
reading the source code).
|
||||
|
||||
Even when you've gotten that to work, there is the problem that D-Bus isn't
|
||||
totally agnostic to the underlying socket protocol. D-Bus has support for
|
||||
passing UNIX file descriptors over the connection, and this of course doesn't
|
||||
work over TCP. While this feature is optional and easily avoided, some services
|
||||
(I can't find one now) use UNIX fds in order to keep track of processes that
|
||||
listen to a certain event. Obviously, those services can't be accessed over the
|
||||
network.
|
||||
|
||||
**Secure?** Only locally.
|
||||
: D-Bus has statically typed messages that can be validated automatically, so
|
||||
that's a plus.
|
||||
|
||||
For local authentication, there is support for standard UNIX permissions and
|
||||
credential passing for more fine-grained authorization. For remote
|
||||
authentication, I think there is support for a shared secret cookie, but I
|
||||
haven't tried to use this yet.
|
||||
|
||||
There is, as with MPD, no support at all for confidentiality, so using
|
||||
networked D-Bus over an untrusted network would be a very bad idea anyway.
|
||||
|
||||
**Fast?** Mostly.
|
||||
: The messaging protocol is fairly lightweight, so no problems there. I do have
|
||||
to mention two potential performance issues, however.
|
||||
|
||||
The first issue is that the normal mode of operation in D-Bus is to proxy all
|
||||
messages through an intermediate D-Bus daemon. This involves extra context
|
||||
switches and message parsing passes in order to get one message from
|
||||
application A to application B. I believe it is _officially_ supported to
|
||||
bypass this daemon and to communicate directly between two processes, but after
|
||||
my experience with networking I am wary of trying anything that isn't part of
|
||||
how D-Bus is _intended_ to be used. This particular performance issue is what
|
||||
kdbus addresses, so I suppose it won't apply to future Linux systems.
|
||||
|
||||
The other issue is that a daemon that provides a service over D-Bus does not
|
||||
know whether there exists an application that is interested receiving its
|
||||
notifications. This means that the daemon always has to spend resources to send
|
||||
out notification messages, even if no application is actually interested in
|
||||
receiving them. In practice this means that the notification mechanism is
|
||||
avoided for events that may occur fairly often, and an equally inefficient
|
||||
polling approach has to be used instead. It is possible for a service provider
|
||||
to keep track of interested applications, but this is not part of the D-Bus
|
||||
protocol and not something you would want to implement for each possible event.
|
||||
I've no idea if kdbus addresses this issue, but it would be stupid not to.
|
||||
|
||||
**Proxy support?** Yup.
|
||||
: It's part of normal operation, even.
|
||||
|
||||
D-Bus has many faults, some of them are by design, but many are fixable. I
|
||||
would have contributed to improving the situation, but I get the feeling that
|
||||
the goals of the D-Bus maintainers are not at all aligned with mine. My
|
||||
impression is that the D-Bus maintainers are far too focussed on their own
|
||||
specific needs and care little about projects with slightly different needs.
|
||||
Especially with the introduction of kdbus, I consider D-Bus too complex now to
|
||||
consider it worth the effort to improve. Starting from scratch seems less work.
|
||||
|
||||
# JSON/XML RPC
|
||||
|
||||
While I haven't extensively used JSON-RPC or XML-RPC myself, it's still an
|
||||
interesting alternative to study.
|
||||
[Transmission](https://www.transmissionbt.com/) uses JSON-RPC
|
||||
([spec](https://trac.transmissionbt.com/browser/trunk/extras/rpc-spec.txt)) as
|
||||
its primary IPC mechanism, and [RTorrent](http://rakshasa.github.io/rtorrent/)
|
||||
has support for an optional XML-RPC interface. (Why do I keep referencing
|
||||
torrent clients? Surely there are other interesting applications? Oh well.)
|
||||
|
||||
The main selling point of HTTP-based IPC is that it is accessible from
|
||||
browser-based applications, assuming everything has been setup correctly. This
|
||||
is a nice advantage, but lack of this support is not really a deal-breaker for
|
||||
me. Browser-based applications can still use any other IPC mechanism, as long
|
||||
as there are browser plugins or some form of proxy server that converts the
|
||||
messages of the IPC mechanism to something that is usable over HTTP. For
|
||||
example, both solutions exist for D-Bus, in the form of the [Browser DBus
|
||||
Bridge](http://sandbox.movial.com/wiki/index.php/Browser_DBus_Bridge) and
|
||||
[cloudeebus](https://github.com/01org/cloudeebus). Of course, such solutions
|
||||
typically aren't as convenient as native HTTP support.
|
||||
|
||||
Since HTTP is, by design, purely request-response, JSON-RPC and XML-RPC don't
|
||||
generally support asynchronous notifications. It's possible to still get
|
||||
asynchronous notifications by using
|
||||
[WebSockets](https://en.wikipedia.org/wiki/WebSocket) (Ugh, opaque stream
|
||||
sockets, time to go back to our [custom protocol](#custom-protocol)) or by
|
||||
having the client implement a HTTP server itself and send its URL to the
|
||||
service provider (This is known as a
|
||||
[callback](https://duckduckgo.com/?q=web%20service%20callback) in the
|
||||
[SOAP](https://en.wikipedia.org/wiki/SOAP) world. I have a lot of respect for
|
||||
developers who can put up with that crap). As I already hinted, neither
|
||||
solution is simple or easy.
|
||||
|
||||
Let's move on to the usual grading...
|
||||
|
||||
**Easy?** Sure.
|
||||
: The ubiquity of HTTP, JSON and XML on the internet means that most developers
|
||||
are already familiar with using it. And even if you aren't, there are so many
|
||||
easy-to-use and well-documented libraries available that you're ready to go in
|
||||
a matter of minutes.
|
||||
|
||||
Although interface description languages/formats exist for XML-RPC (and
|
||||
possibly for JSON-RPC, too), I get the impression these are not often used
|
||||
outside of the SOAP world. As a result, interacting with such a service tends
|
||||
be weakly/stringly typed, which, I imagine, is not as convenient in strongly
|
||||
typed programming languages.
|
||||
|
||||
**Simple?** Not really.
|
||||
: Many people have the impression that HTTP is somehow a simple protocol. Sure,
|
||||
it may look simple on the wire, but in reality it is a hugely bloated and
|
||||
complex protocol. I strongly encourage everyone to read through [RFC
|
||||
2616](https://tools.ietf.org/html/rfc2616) at least once to get an idea of its
|
||||
size and complexity. To make things worse, there's a lot of recent activity to
|
||||
standardize on a next generation HTTP
|
||||
([SPDY](https://en.wikipedia.org/wiki/SPDY) and [HTTP
|
||||
2.0](https://en.wikipedia.org/wiki/HTTP_2.0)), but I suppose we can ignore these
|
||||
developments for the foreseeable future for the use case of IPC.
|
||||
|
||||
Of course, a lot of the functionality specified for HTTP is optional and can be
|
||||
ignored for the purpose of IPC, but that doesn't mean that these options don't
|
||||
exist. When implementing a client, it would be useful to know exactly which
|
||||
HTTP options the server supports. It would be wasteful to implement compression
|
||||
support if the server doesn't support it, or keep-alive, or content
|
||||
negotiation, or ranged requests, or authentication, or correct handling for all
|
||||
response codes when the server will only ever send 'OK'. What also commonly
|
||||
happens is that server implementors want to support as much as possible, to the
|
||||
point that you can have JSON or XML output, depending on what the client
|
||||
requested.
|
||||
|
||||
XML faces a similar problem. The format looks simple, but the specification has
|
||||
a bunch of features that hardly anyone uses. In contrast to HTTP, however, a
|
||||
correct XML parser can't just decide to not parse `<!DOCTYPE ..>` stuff,
|
||||
so it _has_ to implement some of this complexity.
|
||||
|
||||
On the upside, JSON is a really simple serialization format, and if you're
|
||||
careful enough to only implement the functionality necessary for basic HTTP, a
|
||||
JSON-RPC implementation _can_ be somewhat simple.
|
||||
|
||||
**Small?** Not really.
|
||||
: What typically happens is that implementors take an existing HTTP library and
|
||||
build on top of that. A generic HTTP library likely implements a lot more than
|
||||
necessary for IPC, so that's not going to be very small. RTorrent, for example,
|
||||
makes use of the not-very-small [xmlrpc-c](http://xmlrpc-c.sourceforge.net/),
|
||||
which in turn uses [libcurl](http://curl.haxx.se/) (400 KiB, excluding TLS
|
||||
library) and either the bloated [libxml2](http://xmlsoft.org/) (1.5 MiB) or
|
||||
[libexpat](http://www.libexpat.org/) (170 KiB). In any case, expect your
|
||||
programs to grow by a megabyte or more if you go this route.
|
||||
|
||||
Transmission seems rather less bloated. It uses the HTTP library that is built
|
||||
into [libevent](http://libevent.org/) (totalling ~500 KiB, but libevent is also
|
||||
used for other networking parts), and a simple JSON parser can't be that large
|
||||
either. I'm sure that if you reimplement everything from scratch for the
|
||||
purpose of building an API, you could get something much smaller. Then again,
|
||||
even if you manage to shrink the size of the server that way, you can't expect
|
||||
all your users to do the same.
|
||||
|
||||
If HTTPS is to be supported, add ~500 KiB more. TLS isn't the simplest
|
||||
protocol, either.
|
||||
|
||||
**Language independent?** Yes.
|
||||
: Almost every language has libraries for web stuff.
|
||||
|
||||
**Networked?** Definitely.
|
||||
: In fact, I've never seen anyone use XML/JSON RPC over UNIX sockets.
|
||||
|
||||
**Secure?** Alright.
|
||||
: HTTP has built-in support for authentication, but it also isn't uncommon to use
|
||||
some other mechanism (based on cookies, I guess?).
|
||||
|
||||
Confidentiality can be achieved with HTTPS. There is the problem of verifying
|
||||
the certificate, since I doubt anyone is going to have certificates of their
|
||||
local applications signed by a certificate authority, but there's always the
|
||||
option of trust-on-first use. Custom applications can also include a
|
||||
fingerprint of the server certificate in the URL for verification, but this
|
||||
won't work for web apps.
|
||||
|
||||
**Fast?** No.
|
||||
: JSON/XML RPC messages add significant overhead to the network and requires more
|
||||
parsing than a simple custom solution or D-Bus. I wouldn't really call it
|
||||
_fast_, but admittedly, it might still be _fast enough_ for most purposes.
|
||||
|
||||
**Proxy support?** Sure.
|
||||
: HTTP has native support for proxying, and it's always possible to proxy some
|
||||
URI on the main server to another server, assuming the libraries you use
|
||||
support that. It's not necessarily simple to implement, however.
|
||||
|
||||
The lack of asynchronous notifications and the overhead and complexity of
|
||||
JSON/XML RPC make me stay away from it, but it certainly is a solution that
|
||||
many client developers will like because of its ease of use.
|
||||
|
||||
# Other Systems
|
||||
|
||||
There are a more alternatives out there than I have described so far. Most of
|
||||
those were options I dismissed early on because they're either incomplete
|
||||
solutions or specific to a single framework or language. I'll still mention a
|
||||
few here.
|
||||
|
||||
## Message Queues
|
||||
|
||||
In the context of IPC I see that message queues such as
|
||||
[RabbitMQ](https://www.rabbitmq.com/) and [ZeroMQ](http://zeromq.org/) are
|
||||
quite popular. I can't say I have much experience with any of these, but these
|
||||
MQs don't seem to offer a solution to the problem I described in the
|
||||
introduction. My impression of MQs is that they offer a higher-level and more
|
||||
powerful alternative to TCP and UDP. That is, they route messages from one
|
||||
endpoint to another. The contents of the messages are still completely up to
|
||||
the application, so you're still on your own in implementing an RPC mechanism
|
||||
on top of that. And for the purpose of building a simple RPC mechanism, I'm
|
||||
convinced that plain old UNIX sockets or TCP will do just fine.
|
||||
|
||||
## Cap'n Proto
|
||||
|
||||
I probably should be spending a full chapter on [Cap'n
|
||||
Proto](http://kentonv.github.io/capnproto/) instead of this tiny little section,
|
||||
but I'm simply not familiar enough with it to offer any deep insights. I can
|
||||
still offer my blatantly uninformed impression of it: It looks very promising,
|
||||
but puts, in my opinion, too much emphasis on performance and too little
|
||||
emphasis on ease of use. It lacks introspection and requires that clients have
|
||||
already obtained the schema of the service in order to interact with it. It
|
||||
also uses a capability system to handle authorization, which, despite being
|
||||
elegant and powerful, increases complexity and cognitive load (though I
|
||||
obviously need more experience to quantify this). It still lacks
|
||||
confidentiality for networked access and the number of bindings to other
|
||||
programming languages is limited, but these problems can be addressed.
|
||||
|
||||
Cap'n Proto seems like the ideal IPC mechanism for internal communication
|
||||
within a single (distributed) application and offers a bunch of unique features
|
||||
not found in other RPC systems. But it doesn't feel quite right as an easy API
|
||||
for others to use.
|
||||
|
||||
## CORBA
|
||||
|
||||
CORBA has been used by the GNOME project in the past, and was later abandoned
|
||||
in favour of D-Bus, primarily (I think) because CORBA was deemed too [complex
|
||||
and incomplete](http://dbus.freedesktop.org/doc/dbus-faq.html#corba). A system
|
||||
that is deemed more complex than D-Bus is an immediate red flag. The [long and
|
||||
painful history of CORBA](http://queue.acm.org/detail.cfm?id=1142044) also makes
|
||||
me want to avoid it, if only because that makes it very hard to judge the
|
||||
quality and modernness of existing implementations.
|
||||
|
||||
## Project Tanja
|
||||
|
||||
A bit over two years ago I was researching the same problem, but from a much
|
||||
more generic angle. The result of that was a project that I called Tanja. I
|
||||
described its concepts [in an earlier article](/doc/commvis), and wrote an
|
||||
incomplete [specification](https://g.blicky.net/tanja.git/) along with
|
||||
implementations in [C](https://g.blicky.net/tanja-c.git/),
|
||||
[Go](https://g.blicky.net/tanja-go.git/) and
|
||||
[Perl](https://g.blicky.net/tanja-perl.git/). I consider project Tanja a
|
||||
failure, primarily because of its genericity. It supported too many
|
||||
communication models and the lack of a specification as to which model was
|
||||
used, and the lack of any guarantee that this model was actually followed, made
|
||||
Tanja hard to use in practice. It was a very interesting experiment, but not
|
||||
something I would actually use. I learned the hard way that you sometimes have
|
||||
to move some complexity down into a lower abstraction layer in order to keep
|
||||
the complexity in check at higher layers of abstraction.
|
||||
|
||||
# Conclusions
|
||||
|
||||
This must be the longest rant I've written so far.
|
||||
|
||||
In any case, there isn't really a perfect IPC mechanism for my use case. A
|
||||
custom protocol involves reimplementing a lot of stuff, D-Bus is a pain, and
|
||||
JSON/XML RPC are bloat.
|
||||
|
||||
I am still undecided on what to do. I have a lot of ideas as to what a perfect
|
||||
IPC solution would look like, both in terms of features and in how to implement
|
||||
it, and I feel like I have enough experience by now to actually develop a
|
||||
proper solution. Unfortunately, writing a complete IPC system with the required
|
||||
utilities and language bindings takes **a lot** of time and effort. It's not
|
||||
really worth it if I am the only one using it.
|
||||
|
||||
So here is my plea to you, dear reader: If you know of any existing solutions
|
||||
I've missed, please tell me. If you empathize with me and want a better
|
||||
solution to this problem, please get in touch as well! I'd love to hear about
|
||||
projects which face similar problems and have similar requirements.
|
||||
517
dat/doc/funcweb.md
Normal file
517
dat/doc/funcweb.md
Normal file
|
|
@ -0,0 +1,517 @@
|
|||
% An Opinionated Survey of Functional Web Development
|
||||
|
||||
(Published on **2017-05-28**)
|
||||
|
||||
# Intro
|
||||
|
||||
TL;DR: In this article I provide an overview of the frameworks and libraries
|
||||
available for creating websites in statically-typed functional programming
|
||||
languages.
|
||||
|
||||
I recommend you now skip directly to the next section, but if you're interested
|
||||
in some context and don't mind a rant, feel free to read on. :-)
|
||||
|
||||
**<Rant mode>**
|
||||
|
||||
When compared to native desktop application development, web development just
|
||||
sucks. Native development is relatively simple with toolkits such as
|
||||
[Qt](https://www.qt.io/), [GTK+](https://www.gtk.org/) and others: You have
|
||||
convenient widget libraries, and you can describe your entire application, from
|
||||
interface design to all behavioural aspects, in a single programming language.
|
||||
You're also largely free to structure code in whichever way makes most sense.
|
||||
You can describe what a certain input field looks like, what happens when the
|
||||
user interacts with it and what will happen with the input data, all succinctly
|
||||
in a single file. There are even drag-and-drop UI builders to speed up
|
||||
development.
|
||||
|
||||
Web development is the exact opposite of that. There are several different
|
||||
technologies you're forced to work with even when creating the most mundane
|
||||
website, and there's a necessary but annoying split between code that runs on
|
||||
the server and code that runs in the browser. Creating a simple input field
|
||||
requires you to consider and maintain several ends:
|
||||
|
||||
- The back end (server-side code) that describes how the input field interacts
|
||||
with the database.
|
||||
- Some JavaScript code to describe how the user can interact with the input
|
||||
field.
|
||||
- Some CSS to describe what the input field looks like.
|
||||
- And then there's HTML to act as a glue between the above.
|
||||
|
||||
In many web development setups, all four of the above technologies are
|
||||
maintained in different files. If you want to add, remove or modify an input
|
||||
field, or just about anything else on a page, you'll be editing at least four
|
||||
different files with different syntax and meaning. I don't know how other
|
||||
developers deal with this, but the only way I've been able to keep these places
|
||||
synchronized is to just edit one or two places, test if it works in a browser,
|
||||
and then edit the other places accordingly to fix whatever issues I find. This
|
||||
doesn't always work well: I don't get a warning if I remove an HTML element
|
||||
somewhere and forget to also remove the associated CSS. Heck, in larger
|
||||
projects I can't even tell whether it's safe to remove or edit a certain line
|
||||
of CSS because I have no way to know for sure that it's not still being used
|
||||
elsewhere. Perhaps this particular case can be solved with proper organization
|
||||
and discipline, but similar problems exist with the other technologies.
|
||||
|
||||
Yet despite that, why do I still create websites in my free time? Because it is
|
||||
the only environment with high portability and low friction - after all, pretty
|
||||
much anyone can browse the web. I would not have been able to create a useful
|
||||
"[Visual Novel Database](https://vndb.org/)" any other way than through a
|
||||
website. And the entire purpose of [Manned.org](https://manned.org/) is to
|
||||
provide quick access to man pages from anywhere, which is not easily possible
|
||||
with native applications.
|
||||
|
||||
**</Rant mode>**
|
||||
|
||||
Fortunately, I am not the only one who sees the problems with the "classic"
|
||||
development strategy mentioned above. There are many existing attempts to
|
||||
improve on that situation. A popular approach to simplify development is the
|
||||
[Single-page
|
||||
application](https://en.wikipedia.org/wiki/Single-page_application) (SPA). The
|
||||
idea is to move as much code as possible to the front end, and keep only a
|
||||
minimal back end. Both the HTML and the entire behaviour of the page can be
|
||||
defined in the same language and same file. With libraries such as
|
||||
[React](https://facebook.github.io/react/) and browser support for [Web
|
||||
components](https://developer.mozilla.org/en-US/docs/Web/Web_Components), the
|
||||
split between files described above can be largely eliminated. And if
|
||||
JavaScript isn't your favorite language, there are many alternative languages
|
||||
that compile to JavaScript. (See [The JavaScript
|
||||
Minefield](http://walkercoderanger.com/blog/2014/02/javascript-minefield/) for
|
||||
an excellent series of articles on that topic).
|
||||
|
||||
While that approach certainly has the potential to make web development more
|
||||
pleasant, it has a very significant drawback: Performance. For some
|
||||
applications, such as web based email clients or CRM systems, it can be
|
||||
perfectly acceptable to have a megabyte of JavaScript as part of the initial
|
||||
page load. But for most other sites, such as this one, or the two sites I
|
||||
mentioned earlier, or sites like Wikipedia, a slow initial page load is
|
||||
something I consider to be absolutely unacceptable. The web can be really fast,
|
||||
and developer laziness is not a valid excuse to ruin it. (If you haven't seen
|
||||
or read [The Website Obesity
|
||||
Crisis](http://idlewords.com/talks/website_obesity.htm) yet, please do so now).
|
||||
|
||||
I'm much more interested in the opposite approach to SPA: Move as much code as
|
||||
possible to the back end, and only send a minimal amount of JavaScript to the
|
||||
browser. This is arguably how web development has always been done in the past,
|
||||
and there's little reason to deviate from it. The difference, however, is that
|
||||
people tend to expect much more "interactivity" from web sites nowadays, so the
|
||||
amount of JavaScript is increasing. And that is alright, so long as the
|
||||
JavaScript doesn't prevent the initial page from loading quickly. But this
|
||||
increase in JavaScript does amplify the "multiple files" problem I ranted about
|
||||
earlier.
|
||||
|
||||
So my ideal solution is a framework where I can describe all aspects of a site
|
||||
in a single language, and organize the code among files in a way that makes
|
||||
sense to me. That is, I want the same kind of freedom that I get with native
|
||||
desktop software development. Such a framework should run on the back end, and
|
||||
automatically generate efficient JavaScript and, optionally, CSS for the front
|
||||
end. As an additional requirement (or rather, strong preference), all this
|
||||
should be in a statically-typed language - because I am seemingly incapable of
|
||||
writing large reliable applications with dynamic typing - and in a language
|
||||
from functional heritage - because programming in functional languages has
|
||||
spoiled me.
|
||||
|
||||
I'm confident that what I describe is possible, and it's evident that I'm not
|
||||
the only person to want this, as several (potential) solutions like this do
|
||||
indeed exist. I've been looking around for these solutions and have
|
||||
experimented with a few that looked promising. This article provides an
|
||||
overview of what I have found so far.
|
||||
|
||||
# OCaml
|
||||
|
||||
My adventure began with [OCaml](https://ocaml.org/). It's been a few years
|
||||
since I last used OCaml for anything, but development on the language and its
|
||||
ecosystem have all but halted. [Real World OCaml](https://realworldocaml.org/)
|
||||
has been a great resource to get me up to speed again.
|
||||
|
||||
## Ocsigen
|
||||
|
||||
For OCaml there is one project that has it all: [Ocsigen](http://ocsigen.org/).
|
||||
It comes with an OCaml to JavaScript compiler, a web server, several handy
|
||||
libraries, and a [framework](http://ocsigen.org/eliom/) to put everything
|
||||
together. Its [syntax
|
||||
extension](http://ocsigen.org/eliom/6.2/manual/ppx-syntax) allows you to mix
|
||||
front and back end code, and you can easily share code between both ends. The
|
||||
final result is a binary that runs the server and a JavaScript file that
|
||||
handles everything on the client side.
|
||||
|
||||
The framework comes with an embedded DSL with which you can conveniently
|
||||
generate HTML without actually typing HTML. And best of all, this DSL works on
|
||||
both the client and the server: On the server side it generates an HTML string
|
||||
that can be sent to the client, and running the same code on the client side
|
||||
will result in a DOM element that is ready to be used.
|
||||
|
||||
Ocsigen makes heavy use of the OCaml type system to statically guarantee the
|
||||
correctness of various aspects of the application. The HTML DSL ensures not
|
||||
only that the generated HTML well-formed, but also prevents you from
|
||||
incorrectly nesting certain elements and using the wrong attributes on the
|
||||
wrong elements. Similarly, an HTML element generated on the server side can be
|
||||
referenced from client side code without having to manually assign a unique ID
|
||||
to the element. This prevents accidental typos in the ID naming and guarantees
|
||||
that the element that the client side code refers to actually exists. URL
|
||||
routing and links to internal pages are also checked at compile time.
|
||||
|
||||
Ocsigen almost exactly matches what I previously described as the perfect
|
||||
development framework. Unfortunately, it has a few drawbacks:
|
||||
|
||||
- The generated JavaScript is quite large, a bit over 400 KiB for an hello
|
||||
world. In my brief experience with the framework, this also results in a
|
||||
noticeably slower page load. I don't know if it was done for performance
|
||||
purposes, but subsequent page views are per default performed via in-browser
|
||||
XHR requests, which do not require that all the JavaScript is re-parsed and
|
||||
evaluated, and is thus much faster. This, however, doesn't work well if the
|
||||
user opens pages in multiple tabs or performs a page reload for whatever
|
||||
reason. And as I mentioned, I care a lot about the initial page loading time.
|
||||
- The framework has a steep learning curve, and the available documentation is
|
||||
by far not complete enough to help you. I've found myself wondering many
|
||||
times how I was supposed to use a certain API and have had to look for
|
||||
example code for enlightenment. At some point I ended up just reading the
|
||||
source code instead of going for the documentation. What doesn't help here is
|
||||
that, because of the heavy use of the type system to ensure code correctness,
|
||||
most of the function signatures are far from intuitive and are sometimes very
|
||||
hard to interpret. This problem is made even worse with the generally
|
||||
unhelpful error messages from the compiler. (A few months with
|
||||
[Rust](https://www.rust-lang.org/) and its excellent error messages has
|
||||
really spoiled me on this aspect, I suppose).
|
||||
- I believe they went a bit too far with the compile-time verification of
|
||||
certain correctness properties. Apart from making the framework harder to
|
||||
learn, it also increases the verbosity of the code and removes a lot of
|
||||
flexibility. For instance, in order for internal links to be checked, you
|
||||
have to declare your URLs (or _services_, as they call it) somewhere central
|
||||
such that the view part of your application can access it. Then elsewhere you
|
||||
have to register a handler to that service. This adds boilerplate and
|
||||
enforces a certain code structure. And the gain of all this is, in my
|
||||
opinion, pretty small: In the 15 years that I have been building web sites, I
|
||||
don't remember a single occurrence where I mistyped the URL in an internal
|
||||
link. I do suppose that this feature makes it easy to change URLs without
|
||||
causing breakage, but there is a trivial counter-argument to that: [Cool URIs
|
||||
don't change](https://www.w3.org/Provider/Style/URI.html). (Also, somewhat
|
||||
ironically, I have found more dead internal links on the Ocsigen website than
|
||||
on any other site I have visited in the past year, so perhaps this was indeed
|
||||
a problem they considered worth fixing. Too bad it didn't seem to work out so
|
||||
well for them).
|
||||
|
||||
Despite these drawbacks, I am really impressed with what the Ocsigen project
|
||||
has achieved, and it has set a high bar for the future frameworks that I will
|
||||
be considering.
|
||||
|
||||
# Haskell
|
||||
|
||||
I have always seen Haskell as that potentially awesome language that I just
|
||||
can't seem to wrap my head around, despite several attempts in the past to
|
||||
learn it. Apparently the only thing I was missing in those attempts was a
|
||||
proper goal: When I finally started playing around with some web frameworks I
|
||||
actually managed to get productive in Haskell with relative ease. What also
|
||||
helped me this time was a practical introductory Haskell reference, [What I
|
||||
Wish I Knew When Learning Haskell](http://dev.stephendiehl.com/hask/), in
|
||||
addition to the more theoretical [Learn You A Haskell for Great
|
||||
Good](http://learnyouahaskell.com/).
|
||||
|
||||
Haskell itself already has a few advantages when compared to OCaml: For one, it
|
||||
has a larger ecosystem, so for any task you can think of there is probably
|
||||
already at least one existing library. As an example, I was unable to find an
|
||||
actively maintained SQL DSL for OCaml, while there are several available for
|
||||
Haskell. Another advantage that I found were the much more friendly and
|
||||
detailed error messages generated by the Haskell compiler, GHC. In terms of
|
||||
build systems, Haskell has standardized on
|
||||
[Cabal](https://www.haskell.org/cabal/), which works alright most of the time.
|
||||
Packaging is still often complex and messy, but it's certainly improving as
|
||||
[Stack](http://haskellstack.org/) is gaining more widespread adoption. Finally,
|
||||
I feel that the Haskell syntax is slightly less verbose, and more easily lends
|
||||
itself to convenient DSLs.
|
||||
|
||||
Despite Haskell's larger web development community, I could not find a single
|
||||
complete and integrated client/server development framework such as Ocsigen.
|
||||
Instead, there are a whole bunch of different projects focussing on either the
|
||||
back end or the front end. I'll explore some of them with the idea that,
|
||||
perhaps, it's possible to mix and match different libraries and frameworks in
|
||||
order to get the perfect development environment. And indeed, this seems to be
|
||||
a common approach in many Haskell projects.
|
||||
|
||||
## Server-side
|
||||
|
||||
Let's start with a few back end frameworks.
|
||||
|
||||
Scotty
|
||||
: [Scotty](https://github.com/scotty-web/scotty) is a web framework inspired by
|
||||
[Sinatra](http://www.sinatrarb.com/). I have no experience with (web)
|
||||
development in Ruby and have never used Sinatra, but it has some similarities
|
||||
to what I have been using for a long time: [TUWF](https://dev.yorhel.nl/tuwf).
|
||||
|
||||
Scotty is a very minimalist framework; It does routing (that is, mapping URLs
|
||||
to Haskell functions), it has some functions to access request data and some
|
||||
functions to create and modify a response. That's it. No database handling,
|
||||
session management, HTML generation, form handling or other niceties. But
|
||||
that's alright, because there are many generic libraries to help you out there.
|
||||
|
||||
Thanks to its minimalism, I found Scotty to be very easy to learn and get used
|
||||
to. Even as a Haskell newbie I had a simple website running within a day. The
|
||||
documentation is appropriate, but the idiomatic way of combining Scotty with
|
||||
other libraries is through the use of Monad Transformers, and a few more
|
||||
examples in this area would certainly have helped.
|
||||
|
||||
Spock
|
||||
: Continuing with the Star Trek franchise, there's
|
||||
[Spock](https://www.spock.li/). Spock is very similar to Scotty, but comes with
|
||||
type-safe routing and various other goodies such as session and state
|
||||
management, [CSRF](https://en.wikipedia.org/wiki/Cross-site_request_forgery)
|
||||
protection and database helpers.
|
||||
|
||||
As with everything that is (supposedly) more convenient, it also comes with a
|
||||
slightly steeper learning curve. I haven't, for example, figured out yet how to
|
||||
do regular expression based routing. I don't even know if that's still possible
|
||||
in the latest version - the documentation isn't very clear. Likewise, it's
|
||||
unclear to me what the session handling does exactly (Does it store something?
|
||||
And where? Is there a timeout?) and how that interacts with CSRF protection.
|
||||
Spock seems useful, but requires more than just a cursory glance.
|
||||
|
||||
Servant
|
||||
: [Servant](http://haskell-servant.github.io/) is another minimalist web
|
||||
framework, although it is primarily designed for creating RESTful APIs.
|
||||
|
||||
Servant distinguishes itself from Scotty and Spock by not only featuring
|
||||
type-safe routing, it furthermore allows you to describe your complete public
|
||||
API as a type, and get strongly typed responses for free. This also enables
|
||||
support for automatically generated documentation and client-side API wrappers.
|
||||
|
||||
Servant would be an excellent back end for a SPA, but it does not seem like an
|
||||
obvious approach to building regular websites.
|
||||
|
||||
Happstack / Snap / Yesod
|
||||
: [Happstack](http://www.happstack.com/), [Yesod](http://www.yesodweb.com/) and
|
||||
[Snap](http://snapframework.com/) are three large frameworks with many
|
||||
auxiliary libraries. They all come with a core web server, routing, state and
|
||||
database management. Many of the libraries are not specific to the framework
|
||||
and can be used together with other frameworks. I won't go into a detailed
|
||||
comparison between the three projects because I have no personal experience
|
||||
with any of them, and fortunately [someone else already wrote a
|
||||
comparison](http://softwaresimply.blogspot.nl/2012/04/hopefully-fair-and-useful-comparison-of.html)
|
||||
in 2012 - though I don't know how accurate that still is today.
|
||||
|
||||
So there are a fair amount of frameworks to choose from, and they can all work
|
||||
together with other libraries to implement additional functions. Apart from the
|
||||
framework, another important aspect of web development is how you generate the
|
||||
HTML to send to the client. In true Haskell style, there are several answers.
|
||||
|
||||
For those who prefer embedded DSLs, there are
|
||||
[xhtml](http://hackage.haskell.org/package/xhtml),
|
||||
[BlazeHTML](https://jaspervdj.be/blaze/) and
|
||||
[Lucid](https://github.com/chrisdone/lucid). The xhtml package is not being
|
||||
used much nowadays and has been superseded by BlazeHTML, which is both faster
|
||||
and offers a more readable DSL using Haskell's do-notation. Lucid is heavily
|
||||
inspired by Blaze, and attempts to [fix several of its
|
||||
shortcomings](http://chrisdone.com/posts/lucid). Having used Lucid a bit
|
||||
myself, I can attest that it is easy to get started with and pretty convenient
|
||||
in use.
|
||||
|
||||
I definitely prefer to generate HTML using DSLs as that keeps the entire
|
||||
application in a single host language and with consistent syntax, but the
|
||||
alternative approach, templating, is also fully supported in Haskell. The Snap
|
||||
framework comes with [Heist](https://github.com/snapframework/heist), which are
|
||||
run-time interpreted templates, like similar systems in most other languages.
|
||||
Yesod comes with [Shakespeare](http://hackage.haskell.org/package/shakespeare),
|
||||
which is a type-safe templating system with support for inlining the templates
|
||||
in Haskell code. Interestingly, Shakespeare also has explicit support for
|
||||
templating JavaScript code. Too bad that this doesn't take away the need to
|
||||
write the JavaScript yourself, so I don't see how this is an improvement over
|
||||
some other JavaScript solution that uses JSON for communication with the back
|
||||
end.
|
||||
|
||||
## Client-side
|
||||
|
||||
It is rather unusual to have multiple compiler implementations targeting
|
||||
JavaScript for the same source language, but Haskell has three of them. All
|
||||
three can be used to write front end code without touching a single line of
|
||||
JavaScript, but there are large philosophical differences between the three
|
||||
projects.
|
||||
|
||||
Fay
|
||||
: [Fay](https://github.com/faylang/fay/wiki) compiles Haskell code directly to
|
||||
JavaScript. The main advantage of Fay is that it does not come with a large
|
||||
runtime, resulting small and efficient JavaScript. The main downside is that it
|
||||
only [supports a subset of
|
||||
Haskell](https://github.com/faylang/fay/wiki/What-is-not-supported?). The
|
||||
result is a development environment that is very browser-friendly, but where
|
||||
you can't share much code between the front and back ends. You're basically
|
||||
back to the separated front and back end situation in classic web development,
|
||||
but at least you can use the same language for both - somewhat.
|
||||
|
||||
Fay itself doesn't come with many convenient UI libraries, but
|
||||
[Cinder](http://crooney.github.io/cinder/index.html) covers that with a
|
||||
convenient HTML DSL and DOM manipulation library.
|
||||
|
||||
Fay is still seeing sporadic development activity, but there is not much of
|
||||
a lively community around it. Most people have moved on to other solutions.
|
||||
|
||||
GHCJS
|
||||
: [GHCJS](https://github.com/ghcjs/ghcjs) uses GHC itself to compile Haskell to a
|
||||
low-level intermediate language, and then compiles that language to JavaScript.
|
||||
This allows GHCJS to achieve excellent compatibility with native Haskell code,
|
||||
but comes, quite predictably, at the high cost of duplicating a large part of
|
||||
the Haskell runtime into the JavaScript output. The generated JavaScript code
|
||||
is typically measured in megabytes rather than kilobytes, which is (in my
|
||||
opinion) far too large for regular web sites. The upside of this high
|
||||
compatibility, of course, is that you can re-use a lot of code between the
|
||||
front and back ends, which will certainly make web development more tolerable.
|
||||
|
||||
The community around GHCJS seems to be more active than that of Fay. GHCJS
|
||||
integrates properly with the Stack package manager, and there are a [whole
|
||||
bunch](http://hackage.haskell.org/packages/search?terms=ghcjs) of libraries
|
||||
available.
|
||||
|
||||
Haste
|
||||
: [Haste](https://github.com/valderman/haste-compiler) provides a middle ground
|
||||
between Fay and GHCJS. Like GHCJS, Haste is based on GHC, but it instead of
|
||||
using low-level compiler output, Haste uses a higher-level intermediate
|
||||
language. This results in good compatibility with regular Haskell code while
|
||||
keeping the output size in check. Haste has a JavaScript runtime of around 60
|
||||
KiB and the compiled code is roughly as space-efficient as Fay.
|
||||
|
||||
While it should be possible to share a fair amount of code between the front
|
||||
and back ends, not all libraries work well with Haste. I tried to use Lucid
|
||||
within a Haste application, for example, but that did not work. Apparently one
|
||||
of its dependencies (probably the UTF-8 codec, as far as I could debug the
|
||||
problem) performs some low-level performance optimizations that are
|
||||
incompatible with Haste.
|
||||
|
||||
Haste itself is still being sporadically developed, but not active enough to be
|
||||
called alive. The compiler lags behind on the GHC version, and the upcoming 0.6
|
||||
version has stayed unreleased and in limbo state for at least 4 months on the
|
||||
git repository. The community around Haste is in a similar state. Various
|
||||
libraries do exist, such as [Shade](https://github.com/takeoutweight/shade)
|
||||
(HTML DSL, Reactive UI), [Perch](https://github.com/agocorona/haste-perch)
|
||||
(another HTML DSL), [haste-markup](https://github.com/ajnsit/haste-markup) (yet
|
||||
another HTML DSL) and
|
||||
[haste-dome](https://github.com/wilfriedvanasten/haste-dome) (_yet_ another
|
||||
HTML DSL), but they're all pretty much dead.
|
||||
|
||||
Despite having three options available, only Haste provides enough benefit of
|
||||
code reuse while remaining efficient enough for the kind of site that I
|
||||
envision. Haste really deserves more love than it is currently getting.
|
||||
|
||||
## More Haskell
|
||||
|
||||
In my quest for Haskell web development frameworks and tools, I came across a
|
||||
few other interesting libraries. One of them is
|
||||
[Clay](http://fvisser.nl/clay/), a CSS preprocessor as a DSL. This will by
|
||||
itself not solve the CSS synchronisation problem that I mentioned at the start
|
||||
of this article, but it could still be used to keep the CSS closer to code
|
||||
implementing the rest of the site.
|
||||
|
||||
It also would not do to write an article on Haskell web development and not
|
||||
mention a set of related projects: [MFlow](https://github.com/agocorona/MFlow),
|
||||
[HPlayground](https://github.com/agocorona/hplayground) and the more recent
|
||||
[Axiom](https://github.com/transient-haskell/axiom). These are ambitious
|
||||
efforts at building a very high-level and functional framework for both front
|
||||
and back end web development. I haven't spend nearly enough time on these
|
||||
projects to fully understand their scope, but I'm afraid of these being a bit
|
||||
too high level. This invariably results in reduced flexibility (i.e. too many
|
||||
opinions being hard-coded in the API) and less efficient JavaScript output.
|
||||
Axiom being based on GHCJS reinforces the latter concern.
|
||||
|
||||
# Other languages
|
||||
|
||||
I've covered OCaml and Haskell now, but there are relevant projects in other
|
||||
languages, too:
|
||||
|
||||
PureScript
|
||||
: [PureScript](http://www.purescript.org/) is the spiritual successor of Fay -
|
||||
except it does not try to be compatible with Haskell, and in fact
|
||||
[intentionally deviates from
|
||||
Haskell](https://github.com/purescript/documentation/blob/master/language/Differences-from-Haskell.md)
|
||||
at several points. Like Fay, and perhaps even more so, PureScript compiles down
|
||||
to efficient and small JavaScript.
|
||||
|
||||
Being a not-quite-Haskell language, sharing code between a PureScript front end
|
||||
and a Haskell back end is not possible, the differences are simply too large.
|
||||
It is, however, possible to go into the other direction: PureScript could also
|
||||
run on the back end in a NodeJS environment. I don't really know how well this
|
||||
is supported by the language ecosystem, but I'm not sure I'm comfortable with
|
||||
replacing the excellent quality of Haskell back end frameworks with a fragile
|
||||
NodeJS back end (or such is my perception, I admittedly don't have too much
|
||||
faith in most JavaScript-heavy projects).
|
||||
|
||||
The PureScript community is very active and many libraries are available in the
|
||||
[Persuit](https://pursuit.purescript.org/) package repository. Of note is
|
||||
[Halogen](https://pursuit.purescript.org/packages/purescript-halogen), a
|
||||
high-level reactive UI library. One thing to be aware of is that not all
|
||||
libraries are written with space efficiency as their highest priority, the
|
||||
simple [Halogen
|
||||
button](https://github.com/slamdata/purescript-halogen/tree/v2.0.1/examples/basic)
|
||||
example already compiles down to a hefty 300 KB for me.
|
||||
|
||||
Elm
|
||||
: [Elm](http://elm-lang.org/) is similar to PureScript, but rather than trying to
|
||||
be a generic something-to-JavaScript compiler, Elm focuses exclusively on
|
||||
providing a good environment to create web UIs. The reactive UI libraries are
|
||||
well maintained and part of the core Elm project. Elm has a strong focus on
|
||||
being easy to learn and comes with good documentation and many examples to get
|
||||
started with.
|
||||
|
||||
Ur/Web
|
||||
: [Ur/Web](http://www.impredicative.com/ur/) is an ML and Haskell inspired
|
||||
programming language specifically designed for client/server programming. Based
|
||||
on its description, Ur/Web is exactly the kind of thing I'm looking for: It
|
||||
uses a single language for the front and back ends and provides convenient
|
||||
methods for communication between the two.
|
||||
|
||||
This has been a low priority on my to-try list because it seems to be primarily
|
||||
a one-man effort, and the ecosystem around it is pretty small. Using Ur/Web for
|
||||
practical applications will likely involve writing your own libraries or
|
||||
wrappers for many common tasks, such as for image manipulation or advanced text
|
||||
processing. Nonetheless, I definitely should be giving this a try sometime.
|
||||
|
||||
(Besides, who still uses frames in this day and age? :-)
|
||||
|
||||
Opa
|
||||
: I'll be moving out of the functional programming world for a bit.
|
||||
|
||||
[Opa](http://opalang.org/) is another language and environment designed for
|
||||
client/server programming. Opa takes a similar approach to "everything in
|
||||
PureScript": Just compile everything to JavaScript and run the server-side code
|
||||
on NodeJS. The main difference with other to-JavaScript compilers is that Opa
|
||||
supports mixing back end code with front end code, and it can automatically
|
||||
figure out where the code should be run and how the back and front ends
|
||||
communicate with each other.
|
||||
|
||||
Opa, as a language, is reminiscent of a statically-typed JavaScript with
|
||||
various syntax extensions. While it does support SQL databases, its database
|
||||
API seems to strongly favor object-oriented use rather than relational database
|
||||
access.
|
||||
|
||||
GWT
|
||||
: Previously I compared web development to native GUI application development.
|
||||
There is no reason why you can't directly apply native development structure
|
||||
and strategies onto the web, and that's exactly what
|
||||
[GWT](http://www.gwtproject.org/) does. It provides a widget-based programming
|
||||
environment that eventually runs on the server and compiles the client-side
|
||||
part to JavaScript. I haven't really considered it further, as Java is not a
|
||||
language I can be very productive in.
|
||||
|
||||
Webtoolkit
|
||||
: In the same vein, there's [Wt](https://www.webtoolkit.eu/wt). The name might
|
||||
suggest that it is a web-based clone of Qt, and indeed that's what it looks
|
||||
like. Wt is written in C++, but there are wrappers for [other
|
||||
languages](https://www.webtoolkit.eu/wt/other_language). None of the languages
|
||||
really interest me much, however.
|
||||
|
||||
That said, if I had to write a web UI for a resource-constrained device, this
|
||||
seems like an excellent project to consider.
|
||||
|
||||
# To conclude
|
||||
|
||||
To be honest, I am a bit overwhelmed at the number of options. On the one hand,
|
||||
it makes me very happy to see that a lot is happening in this world, and that
|
||||
alternatives to boring web frameworks do exist. Yet after all this research I
|
||||
still have no clue what I should use to develop my next website. I do like the
|
||||
mix and match culture of Haskell, which has the potential to form a development
|
||||
environment entirely to my own taste and with my own chosen trade-offs. On the
|
||||
other hand, the client-side Haskell solutions are simply too immature and
|
||||
integration with the back end frameworks is almost nonexistent.
|
||||
|
||||
Almost none of the frameworks I discussed attempt to tackle the CSS problem
|
||||
that I mentioned in the introduction, so there is clearly room for more
|
||||
research in this area.
|
||||
|
||||
There are a few technologies that I should spend more time on to familiarize
|
||||
myself with. Ur/Web is an obvious candidate here, but perhaps it is possible to
|
||||
create a Haskell interface to Wt. Or maybe some enhancements to the Haste
|
||||
ecosystem could be enough to make that a workable solution instead.
|
||||
576
dat/doc/sqlaccess.md
Normal file
576
dat/doc/sqlaccess.md
Normal file
|
|
@ -0,0 +1,576 @@
|
|||
% Multi-threaded Access to an SQLite3 Database
|
||||
|
||||
(Published on **2011-11-26**)
|
||||
|
||||
(Minor 2013-04-06 update: I abstracted my message passing solution from ncdc
|
||||
and implemented it in a POSIX C library for general use. It's called _sqlasync_
|
||||
and is part of my [Ylib library collection](/ylib).)
|
||||
|
||||
# Introduction
|
||||
|
||||
As I was porting [ncdc](/ncdc) over to use SQLite3 as storage backend, I
|
||||
stumbled on a problem: The program uses a few threads for background jobs, and
|
||||
it would be nice to give these threads access to the database.
|
||||
|
||||
Serializing all database access through the main thread wouldn't have been very
|
||||
hard to implement in this particular case, but that would have been far from
|
||||
optimal. The main thread is also responsible for keeping the user interface
|
||||
responsive and handling most of the network interaction. Overall responsiveness
|
||||
of the program would significantly improve when the threads could access the
|
||||
database without involvement of the main thread.
|
||||
|
||||
Which brought me to the following questions: What solutions are available for
|
||||
providing multi-threaded access to an SQLite database? What problems may I run
|
||||
in to? I was unable to find a good overview in this area on the net, so I wrote
|
||||
this article with the hope to improve that situation.
|
||||
|
||||
# SQLite3 and threading
|
||||
|
||||
Let's first see what SQLite3 itself has to offer in terms of threading support.
|
||||
The official documentation mentions threading support several times in various
|
||||
places, but this information is scattered around and no good overview is given.
|
||||
Someone has tried to organize this before on a [single
|
||||
page](http://www.sqlite.org/cvstrac/wiki?p=MultiThreading), and while this
|
||||
indeed gives a nice overview, it has unfortunately not been updated since 2006.
|
||||
The advices are therefore a little on the conservative side.
|
||||
|
||||
Nonetheless, it is wise to remain portable with different SQLite versions,
|
||||
especially when writing programs that dynamically link with some random version
|
||||
installed on someone's system. It should be fairly safe to assume that SQLite
|
||||
binaries provided by most systems, if not all, are compiled with thread safety
|
||||
enabled. This doesn't mean all that much, unfortunately: The only thing _thread
|
||||
safe_ means in this context is that you can use SQLite3 in multiple threads,
|
||||
but a single database connection should still stay within a single thread.
|
||||
|
||||
Since SQLite 3.3.1, which was released in early 2006, it is possible to move a
|
||||
single database connection along multiple threads. Doing this with older
|
||||
versions is not advisable, as explained in [the SQLite
|
||||
FAQ](http://www.sqlite.org/faq.html#q6). But even with 3.3.1 and later there is
|
||||
an annoying restriction: A connection can only be passed to another thread when
|
||||
any outstanding statements are closed and finalized. In practice this means
|
||||
that it is not possible to keep a prepared statement in memory for later
|
||||
executions.
|
||||
|
||||
Since SQLite 3.5.0, released in 2007, a single SQLite connection can be used
|
||||
from multiple threads simultaneously. SQLite will internally manage locks to
|
||||
avoid any data corruption. I can't recommend making use of this facility,
|
||||
however, as there are still many issues with the API. The [error fetching
|
||||
functions](http://www.sqlite.org/c3ref/errcode.html) and
|
||||
[sqlite3\_last\_insert\_row\_id()](http://www.sqlite.org/c3ref/last_insert_rowid.html),
|
||||
among others, are still useless without explicit locking in the application. I
|
||||
also believe that the previously mentioned restriction on having to finalize
|
||||
statements has been relaxed in this version, so keeping prepared statements in
|
||||
memory and passing them among different threads becomes possible.
|
||||
|
||||
When using multiple database connections within a single process, SQLite offers
|
||||
a facility to allow [sharing of its
|
||||
cache](http://www.sqlite.org/sharedcache.html), in order to reduce memory usage
|
||||
and disk I/O. The semantics of this feature have changed with different SQLite
|
||||
versions and appear to have stabilised in 3.5.0. This feature may prove useful
|
||||
to optimize certain situations, but does not open up new possibilities of
|
||||
communicating with a shared database.
|
||||
|
||||
# Criteria
|
||||
|
||||
Before looking at some available solutions, let's first determine the criteria
|
||||
we can use to evaluate them.
|
||||
|
||||
Implementation size
|
||||
: Obviously, a solution that requires only a few lines of code to implement is
|
||||
preferable over one that requires several levels of abstraction in order to be
|
||||
usable. I won't be giving actual implementations here, so the sizes will be
|
||||
rough estimates for comparison purposes. The actual size of an implementation
|
||||
is of course heavily dependent on the programming environment as well.
|
||||
|
||||
Memory/CPU overhead
|
||||
: The most efficient solution for a single-threaded application is to simply have
|
||||
direct access to a single database connection. Every solution is in principle a
|
||||
modification or extension of this idea, and will therefore add a certain
|
||||
overhead. This overhead manifests itself in both increased CPU and memory
|
||||
usage. The order of which varies between solutions.
|
||||
|
||||
Prepared statement re-use
|
||||
: Is it possible to prepare a statement once and keep using it for the lifetime
|
||||
of the program? Or will prepared statements have to be thrown away and
|
||||
recreated every time? Keeping statement handles in memory will result in a nice
|
||||
performance boost for applications that run the same SQL statement many times.
|
||||
|
||||
Transaction grouping
|
||||
: A somewhat similar issue to prepared statement re-use: From a performance point
|
||||
of view, it is very important to try to batch many UPDATE/DELETE/INSERT
|
||||
statements within a single transaction, as opposed to running each modify query
|
||||
separately. Running each query separately will force SQLite to flush the data
|
||||
to disk separately every time, whereas a single transaction will batch-flush
|
||||
all the changes to disk in a single go. Some solutions allow for grouping
|
||||
multiple statements in a single transaction quite easily, while others require
|
||||
more involved steps.
|
||||
|
||||
Background processing
|
||||
: In certain situations it may be desirable to queue a certain query for later
|
||||
processing, without explicitly waiting for it to complete. For example, if
|
||||
something in the database has to be modified as a result of user interaction in
|
||||
a UI thread, then the application would feel a lot more responsive if the
|
||||
UPDATE query was simply queued to be processed in a background thread than when
|
||||
the query had run in the UI thread itself. A database accessing solution with
|
||||
built-in support for background processing of queries will significantly help
|
||||
with building a responsive application.
|
||||
|
||||
Concurrency
|
||||
: Concurrency indicates how well the solution allows for concurrent access. The
|
||||
worst possible concurrency is achieved when a single database connection is
|
||||
used for all threads, as only a single action can be performed on the database
|
||||
at any point in time. Maximum concurrency is achieved when each thread has its
|
||||
own SQLite connection. Note that maximum concurrency doesn't mean that the
|
||||
database can be accessed in a _fully_ concurrent manner. SQLite uses internal
|
||||
database-level locks to avoid data corruption, and these will limit the actual
|
||||
maximum concurrency. I am not too knowledgeable about the inner workings of
|
||||
these locks, but it is at least possible to have a large number truly
|
||||
concurrent database _reads_. Database _writes_ from multiple threads may
|
||||
still allow for significantly more concurrency than when they are manually
|
||||
serialized over a single database connection.
|
||||
|
||||
Portability
|
||||
: What is the minimum SQLite version required to implement the solution? Does it
|
||||
require any special OS features or SQLite compilation settings? As outlined
|
||||
above, different versions of SQLite offer different features with regards to
|
||||
threading. Relying one of the relatively new features will decrease
|
||||
portability.
|
||||
|
||||
# The Solutions
|
||||
|
||||
Here I present four solutions to allow database access from multiple threads.
|
||||
Note that this list may not be exhaustive, these are just a few solutions that
|
||||
I am aware of. Also note that none of the solutions presented here are in any
|
||||
way new. Most of these paradigms date back to the entire notion of concurrent
|
||||
programming, and have been applied in software since decades ago.
|
||||
|
||||
## Connection sharing
|
||||
|
||||
By far the simplest solution to implement: Keep a single database connection
|
||||
throughout your program and allow every thread to access it. Of course, you
|
||||
will need to be careful to always put locks around the code where you access
|
||||
the database handler. An example implementation could look like the following:
|
||||
|
||||
```c
|
||||
// The global SQLite connection
|
||||
sqlite3 *db;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if(sqlite3_open("database.sqlite3", &db))
|
||||
exit(1);
|
||||
|
||||
// start some threads
|
||||
// wait until the threads are finished
|
||||
|
||||
sqlite3_close(db);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *some_thread(void *arg) {
|
||||
sqlite3_mutex_enter(sqlite3_db_mutex(db));
|
||||
// Perform some queries on the database
|
||||
sqlite3_mutex_leave(sqlite3_db_mutex(db));
|
||||
}
|
||||
```
|
||||
|
||||
Implementation size
|
||||
: This is where connection sharing shines: There is little extra code required
|
||||
when compared to using a database connection in a single-threaded context. All
|
||||
you need to be careful of is to lock the mutex before using the database, and
|
||||
to unlock it again afterwards.
|
||||
|
||||
Memory/CPU overhead
|
||||
: As the only addition to the single-threaded case are the locks, this solution
|
||||
has practically no memory overhead. The mutexes are provided by SQLite,
|
||||
after all. CPU overhead is also as minimal as it can be: mutexes are the most
|
||||
primitive type provided by threading libraries to serialize access to a shared
|
||||
resource, and are therefore very efficient.
|
||||
|
||||
Prepared statement re-use
|
||||
: Prepared statements can be safely re-used inside a single enter/leave block.
|
||||
However, if you want to remain portable with SQLite versions before 3.5.0, then
|
||||
any prepared statements **must** be freed before the mutex is unlocked. This can
|
||||
be a major downside if the enter/leave blocks themselves are relatively short
|
||||
but accessed quite often. If portability with older versions is not an issue,
|
||||
then this restriction is gone and prepared statements can be re-used easily.
|
||||
|
||||
Transaction grouping
|
||||
: A reliable implementation will not allow transactions to span multiple
|
||||
enter/leave blocks. So as with prepared statements, transactions need to be
|
||||
committed to disk before the mutex is unlocked. Again shared with prepared
|
||||
statement re-use is that this limitation may prove to be a significant problem
|
||||
in optimizing application performance, disk I/O in particular. One way to lower
|
||||
the effects of this limitation is to increase the size of a single enter/leave
|
||||
block, thus allowing for more work to be done in a single transaction. Code
|
||||
restructuring may be required in order to efficiently implement this. Another
|
||||
way to get around this problem is to do allow a transaction to span multiple
|
||||
enter/leave blocks. Implementing this reliably may not be an easy task,
|
||||
however, and will most likely require application-specific knowledge.
|
||||
|
||||
Background processing
|
||||
: Background processing is not natively supported with connection sharing. It is
|
||||
possible to spawn a background thread to perform database operations each time
|
||||
that this is desirable. But care should be taken to make sure that these
|
||||
background threads will execute dependent queries in the correct order. For
|
||||
example, if thread A spawns a background thread, say B, to execute an UPDATE
|
||||
query, and later thread A wants to read that same data back, it must first wait
|
||||
for thread B to finish execution. This may add more inter-thread communication
|
||||
than is preferable.
|
||||
|
||||
Concurrency
|
||||
: There is no concurrency at all here. Since the database connection is protected
|
||||
by an exclusive lock, only a single thread can operate on the database at any
|
||||
point in time. Additionally, one may be tempted to increase the size of an
|
||||
enter/leave block in order to allow for larger transactions or better re-use of
|
||||
prepared statements. However, any time spent on performing operations that do
|
||||
not directly use the database within such an enter/leave block will lower the
|
||||
maximum possible database concurrency even further.
|
||||
|
||||
Portability
|
||||
: Connection sharing requires at least SQLite 3.3.1 in order to pass the same
|
||||
database connection around. SQLite must be compiled with threading support
|
||||
enabled. If prepared statements are kept around outside of an enter/leave
|
||||
block, then version 3.5.0 or higher will be required.
|
||||
|
||||
## Message passing
|
||||
|
||||
An alternative approach is to allow only a single thread to access the
|
||||
database. Any other thread that wants to access the database in any way will
|
||||
then have to communicate with this database thread. This communication is done
|
||||
by sending messages (_requests_) to the database thread, and, when query
|
||||
results are required, receiving back one or more _response_ messages.
|
||||
|
||||
Message passing schemes and libraries are available for many programming
|
||||
languages and come in many different forms. For this article, I am going to
|
||||
assume that an asynchronous and unbounded FIFO queue is used to pass around
|
||||
messages, but most of the following discussion will apply to bounded queues as
|
||||
well. I'll try to note the important differences between the two where
|
||||
applicable.
|
||||
|
||||
A very simple and naive implementation of a message passing solution is given
|
||||
below. Here I assume that `queue_create()` will create a message queue (type
|
||||
`message_queue`), `queue_get()` will return the next message in the queue, or
|
||||
block if the queue is empty. `thread_create(func, arg)` will run _func_ in a
|
||||
newly created thread and pass _arg_ as its argument. Error handling has been
|
||||
ommitted to keep this example consice.
|
||||
|
||||
```c
|
||||
void *db_thread(void *arg) {
|
||||
message_queue *q = arg;
|
||||
|
||||
sqlite3 *db;
|
||||
if(sqlite3_open("database.sqlite3", &db))
|
||||
return ERROR;
|
||||
|
||||
request_msg *m;
|
||||
while((m = queue_get(q)) {
|
||||
if(m->action == QUIT)
|
||||
break;
|
||||
if(m->action == EXEC)
|
||||
sqlite3_exec(db, m->query, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
sqlite3_close(db);
|
||||
return OK;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
message_queue *db_queue = queue_create();
|
||||
thread_create(db_thread, db_queue);
|
||||
// Do work.
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
This example implementation has a single database thread running in the
|
||||
background that accepts the messages `QUIT`, to stop processing queries and
|
||||
close the database, and `EXEC`, to run a certain query on the database. No
|
||||
support is available yet for passing query results back to the thread that sent
|
||||
the message. This can be implemented by including a separate `message_queue`
|
||||
object in the request messages, to which the results can be sent.
|
||||
|
||||
Implementation size
|
||||
: This will largely depend on the used programming environment and the complexity
|
||||
of the database thread. If your environment already comes with a message queue
|
||||
implementation, and constructing the request/response messages is relatively
|
||||
simple, then a simple implementation as shown above will not require much code.
|
||||
On the other hand, if you have to implement your own message queue or want more
|
||||
intelligence in the database thread to improve efficiency, then the complete
|
||||
implementation may be significantly larger than that of connection sharing.
|
||||
|
||||
Memory/CPU overhead
|
||||
: Constructing and passing around messages will incur a CPU overhead, though with
|
||||
an efficient implementation this should not be significant enough to worry
|
||||
about. Memory usage is highly dependent on the size of the messages being
|
||||
passed around and the length of the queue. If messages are queued faster than
|
||||
they are processed and there is no bound on the queue length, then a process
|
||||
may quickly run of out memory. On the other hand, if messages are processed
|
||||
fast enough then the queue will generally not have more than a single message
|
||||
in it, and the memory overhead will remain fairly small.
|
||||
|
||||
Prepared statement re-use
|
||||
: As the database connection will never leave the database thread, prepared
|
||||
statements can be kept in memory and re-used without problems.
|
||||
|
||||
Transaction grouping
|
||||
: A naive but robust implementation will handle each message in its own
|
||||
transaction. A more clever database thread, however, could wait for multiple
|
||||
messages to be queued and can then batch-execute them in a single transaction.
|
||||
Correctly implementing this may require some additional information to be
|
||||
specified along with the request, such as whether the query may be combined in
|
||||
a single transaction or whether it may only be executed outside of a
|
||||
transaction. Some threads may want to have confirmation that the data has been
|
||||
successfully written to disk, in which case responsiveness will not improve if
|
||||
such actions are queued for later processing. Nonetheless, since the database
|
||||
thread has all the knowledge about the state of the database and any
|
||||
outstanding actions, transaction grouping can be implemented quite reliably.
|
||||
|
||||
Background processing
|
||||
: Background processing is supported natively with a message passing
|
||||
implementation: a thread that isn't interested in query results can simply
|
||||
queue the action to be performed by the database thread without indicating a
|
||||
return path for the results. Of course, if a thread queues many messages that
|
||||
do not require results followed by one that does, it will have to wait for all
|
||||
earlier messages to be processed before receiving any results for the last one.
|
||||
In the case that the actions are not dependent on each other, the database
|
||||
thread may re-order the messages in order to process the last request first.
|
||||
This requires knowledge about dependencies and may significantly complicate the
|
||||
implementation, however.
|
||||
|
||||
Concurrency
|
||||
: As with a shared database connection, database access is exclusive: Only a
|
||||
single action can be performed on the database at a time. Unlike connection
|
||||
sharing, however, any processing within the application will not further
|
||||
degrade the maximum attainable concurrency. As long as unbounded asynchronous
|
||||
queues are used to pass around messages, the database thread will be able to
|
||||
continue working on the database without waiting for another thread to process
|
||||
the results.
|
||||
|
||||
Portability
|
||||
: This is where message passing shines: SQLite is only used within the database
|
||||
thread, no other thread will have a need to call any SQLite function. This
|
||||
allows any version of SQLite to be used, even those that have not been compiled
|
||||
with thread safety enabled.
|
||||
|
||||
## Thread-local connections
|
||||
|
||||
A rather different approach to giving each thread access to a single database
|
||||
is to simply open a new database connection for each thread. This way each
|
||||
connection will be local to the specific thread, which in turn has the power to
|
||||
do with it as it likes without worrying about what the other threads do. The
|
||||
following is a short example to illustrate the idea:
|
||||
|
||||
```c
|
||||
void *some_thread(void *arg) {
|
||||
sqlite3 *db;
|
||||
if(sqlite3_open("database.sqlite3", &db))
|
||||
return ERROR;
|
||||
|
||||
// Do some work on the database
|
||||
|
||||
sqlite3_close(db);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int i;
|
||||
for(i=0; i<10; i++)
|
||||
thread_create(some_thread, NULL);
|
||||
|
||||
// Wait until the threads are done
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
Implementation size
|
||||
: Giving each thread its own connection is practically not much different from
|
||||
the single-threaded case where there is only a single database connection. And
|
||||
as the example shows, this can be implemented quite trivially.
|
||||
|
||||
Memory/CPU overhead
|
||||
: If we assume that threads are not created very often and each thread has a
|
||||
relatively long life, then the CPU and I/O overhead caused by opening a new
|
||||
connection for each thread will not be very significant. On the other hand, if
|
||||
threads are created quite often and lead a relatively short life before they
|
||||
are destroyed again, then opening a new connection each time will soon require
|
||||
more resources than running the queries themselves.
|
||||
|
||||
There is a significant memory overhead: every new database connection requires
|
||||
memory. If each connection also has a separate cache, then every thread will
|
||||
quickly require several megabytes only to interact with the database. Since
|
||||
version 3.5.0, SQLite allows sharing of this cache with the other threads,
|
||||
which will reduce this memory overhead.
|
||||
|
||||
Prepared statement re-use
|
||||
: Prepared statements can be re-used without limitations within a single thread.
|
||||
This will allow full re-use of prepared statements if each thread has a
|
||||
different task, in which case every thread will have different queries and
|
||||
access patterns anyway. But when every thread runs the same code, and thus also
|
||||
the same queries, it will still need its own copy of the prepared statement.
|
||||
Prepared statements are specific to a single database connection, so they can't
|
||||
be passed around between the threads. The same argument for CPU overhead works
|
||||
here: as long as threads are long-lived, then this will not be a very large
|
||||
problem.
|
||||
|
||||
Transaction grouping
|
||||
: Each thread has full access to its own database connection, so it can easily
|
||||
batch many queries in a single transaction. It is not possible, however, to
|
||||
group queries from the other threads in this same transaction as well. The
|
||||
grouping may therefore not be as optimal as a message passing solution could
|
||||
provide, but it is still a large improvement compared to connection sharing.
|
||||
|
||||
Background processing
|
||||
: Background processing is not easily possible. While it is possible to spawn a
|
||||
separate thread for each query that needs to be processed in the background, a
|
||||
new database connection will have to be opened every time this is done. This
|
||||
solution will obviously not be very efficient.
|
||||
|
||||
Concurrency
|
||||
: In general, it is not possible to get better concurrency than by providing each
|
||||
thread with its own database connection. This solution definitely wins in this
|
||||
area.
|
||||
|
||||
Portability
|
||||
: Thread-local connections are very portable: the only requirement is that SQLite
|
||||
has been built with threading support enabled. Connections are not passed
|
||||
around between threads, so any SQLite version will do. In order to make use of
|
||||
the shared cache feature, however, SQLite 3.5.0 is required.
|
||||
|
||||
## Connection pooling
|
||||
|
||||
A common approach in server-like applications is to have a connection pool.
|
||||
When a thread wishes to have access to the database, it requests a database
|
||||
connection from a pool of (currently) unused database connections. If no unused
|
||||
connections are available, it can either wait until one becomes available, or
|
||||
create a new database connection on its own. When a thread is done with a
|
||||
connection, it will add it back to the pool to allow it to be re-used in an
|
||||
other thread.
|
||||
|
||||
The following example illustrates a basic connection pool implementation in
|
||||
which a thread creates a new database connection when no connections are
|
||||
available. A global `db_pool` is defined, on which any thread can call
|
||||
`pool_pop()` to get an SQLite connection if there is one available, and
|
||||
`pool_push()` can be used to push a connection back to the pool. This pool can
|
||||
be implemented as any kind of set: a FIFO or a stack could do the trick, as
|
||||
long as it can be accessed from multiple threads concurrently.
|
||||
|
||||
```c
|
||||
// Some global pool of database connections
|
||||
pool_t *db_pool;
|
||||
|
||||
sqlite3 *get_database() {
|
||||
sqlite3 *db = pool_pop(db_pool);
|
||||
if(db)
|
||||
return db;
|
||||
if(sqlite3_open("database.sqlite3", &db))
|
||||
return NULL;
|
||||
return db;
|
||||
}
|
||||
|
||||
void *some_thread(void *arg) {
|
||||
// Do some work
|
||||
|
||||
sqlite3 *db = get_database();
|
||||
|
||||
// Do some work on the database
|
||||
|
||||
pool_push(db_pool, db);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int i;
|
||||
for(i=0; i<10; i++)
|
||||
thread_create(some_thread, NULL);
|
||||
|
||||
// Wait until the threads are done
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
Implementation size
|
||||
: A connection pool is in essense not very different from thread-local
|
||||
connections. The only major difference is that the call to sqlite3\_open() is
|
||||
replaced with a function call to obtain a connection from the pool and
|
||||
sqlite3\_close() with one to give it back to the pool. As shown above, these
|
||||
functions can be fairly simple. Note, however, that unlike with thread-local
|
||||
connections it is advisable to "open" and "close" a connection more often in
|
||||
long-running threads, in order to give other threads a chance to use the
|
||||
connection as well.
|
||||
|
||||
Memory/CPU overhead
|
||||
: This mainly depends on the number of connections you allow to be in memory at
|
||||
any point in time. If this number is not bounded, as in the above example, then
|
||||
you can assume that after running your program for a certain time, there will
|
||||
always be enough unused connections available in the pool. Requesting a
|
||||
connection will then be very fast, since the overhead of creating a new
|
||||
connection, as would have been done with thread-local connections, is
|
||||
completely gone.
|
||||
|
||||
In terms of memory usage, however, it would be more efficient to put a maximum
|
||||
limit on the number of open connections, and have the thread wait until another
|
||||
thread gives a connection back to the pool. Similarly to thread-local
|
||||
connections, memory usage can be decreased by using SQLite's cache sharing
|
||||
feature.
|
||||
|
||||
Prepared statement re-use
|
||||
: Unfortunately, this is where connection pooling borrows from connection
|
||||
sharing. Prepared statements must be cleaned up before passing a connection to
|
||||
another thread if one aims to be portable. But even if you remove that
|
||||
portability requirement, prepared statements are always specific to a single
|
||||
connection. Since you can't assume that you will always get the same connection
|
||||
from the pool, caching prepared statements is not practical.
|
||||
|
||||
On the other hand, a connection pool does allow you to use a single connection
|
||||
for a longer period of time than with connection sharing without negatively
|
||||
affecting concurrency. Unless, of course, there is a limit on the number of
|
||||
open connections, in which case using a connection for a long period of time
|
||||
may starve another thread.
|
||||
|
||||
Transaction grouping
|
||||
: Pretty much the same arguments with re-using prepared statements also apply to
|
||||
transaction grouping: Transactions should be committed to disk before passing a
|
||||
connection back to the pool.
|
||||
|
||||
Background processing
|
||||
: This is also where a connection pool shares a lot of similarity with connection
|
||||
sharing. With thread-local storage, creating a worker thread to perform
|
||||
database operations on the background would be very inefficient. But since this
|
||||
inefficiency is being tackled by allowing connection re-use with a connection
|
||||
pool, it's not a problem. Still the same warning applies with regard to
|
||||
dependent queries, though.
|
||||
|
||||
Concurrency
|
||||
: Connection pooling gives you fine-grained control over how much concurrency
|
||||
you'd like to have. For maximum concurrency, don't put a limit on the number of
|
||||
maximum database connections. If there is a limit, then that will decrease the
|
||||
maximim concurrency in favor of lower memory usage.
|
||||
|
||||
Portability
|
||||
: Since database connections are being passed among threads, connection pooling
|
||||
will require at least SQLite 3.3.1 compiled with thread safety enabled. Making
|
||||
use of its cache sharing capibilities to reduce memory usage will require
|
||||
SQLite 3.5.0 or higher.
|
||||
|
||||
# Final notes
|
||||
|
||||
As for what I used for ncdc. I initially chose connection sharing, for its
|
||||
simplicity. Then when I noticed that the UI became less responsive than I found
|
||||
acceptable I started adding a simple queue for background processing of
|
||||
queries. Later I stumbled upon the main problem with that solution: I wanted to
|
||||
read back a value that was written in a background thread, and had no way of
|
||||
knowing whether the background thread had finished executing that query or not.
|
||||
I then decided to expand the background thread to allow for passing back query
|
||||
results, and transformed everything into a full message passing solution. This
|
||||
appears to be working well at the moment, and my current implementation has
|
||||
support for both prepared statement re-use and transaction grouping, which
|
||||
measurably increased performance.
|
||||
|
||||
To summarize, there isn't really a _best_ solution that works for every
|
||||
application. Connection sharing works well for applications where
|
||||
responsiveness and concurrency isn't of major importance. Message passing works
|
||||
well for applications that aim to be responsive, and is flexible enough for
|
||||
optimizing CPU and I/O by re-using prepared statements and grouping queries in
|
||||
larger transactions. Thread-local connections are suitable for applications
|
||||
that have a relatively fixed number of threads, whereas connection pooling
|
||||
works better for applications with a varying number of worker threads.
|
||||
75
dat/dump
75
dat/dump
|
|
@ -1,75 +0,0 @@
|
|||
=pod
|
||||
|
||||
I write a lot of miscellaneous little perl/shell scripts and micro-libraries
|
||||
for the purpose of getting something done. This page is a listing of those I
|
||||
thought might be of useful to others as well.
|
||||
|
||||
I also maintain a collection of miscellaneous C micro-libraries. Those are
|
||||
listed under the collective name of L<Ylib|https://dev.yorhel.nl/ylib>.
|
||||
|
||||
=head2 maildir.pl
|
||||
|
||||
October 2012. A tiny weechat plugin to display the number of unread emails in a
|
||||
local Maildir. L<Latest
|
||||
source|http://www.weechat.org/scripts/source/stable/maildir.pl.html/>
|
||||
(L<1.0|http://p.blicky.net/wzbzs>).
|
||||
|
||||
=head2 ncdc-share-report
|
||||
|
||||
December 2011. Playing around with the Go programming language, I wrote another
|
||||
transfer log parser and statistics generator for ncdc.
|
||||
L<Example output|http://s.blicky.net/2012/ncdc-share-report.html>.
|
||||
|
||||
Download: L<0.3|http://p.blicky.net/h25z8>
|
||||
(L<0.2|http://p.blicky.net/6yx2d>, L<0.1|http://p.blicky.net/ab4lm>).
|
||||
|
||||
=head2 ncdc-transfer-stats
|
||||
|
||||
September 2011. L<ncdc|https://dev.yorhel.nl/ncdc> gained transfer logging
|
||||
features, and I wrote a quick Perl script to fetch some simple statistics from
|
||||
it. L<source|https://p.blicky.net/4V9Kg59kUJUN> (L<0.2|http://p.blicky.net/eu00a>, L<0.1|http://p.blicky.net/agolr>).
|
||||
|
||||
=head2 json.mll
|
||||
|
||||
December 2010. I was writing a client for the L<public VNDB
|
||||
API|http://vndb.org/d11> in OCaml and needed a JSON parser/generator. Since I
|
||||
wasn't happy with the currently available solutions - they try to do too many
|
||||
things and have too many dependencies - I decided to write a minimal JSON
|
||||
library myself. L<source|http://g.blicky.net/serika.git/tree/json.mll>
|
||||
|
||||
=head2 vinfo.c
|
||||
|
||||
November 2009. The L<public VNDB API|http://vndb.org/d11> was designed to be
|
||||
easy to use even from low level languages. I wrote this simple program to see
|
||||
how much work it would be to use the API in C, and as example code for anyone
|
||||
wishing to use the API for something more useful. Read the comments for more
|
||||
info. L<source|https://dev.yorhel.nl/download/code/vinfo.c>
|
||||
|
||||
=head2 Microdc2 log file parser
|
||||
|
||||
June 2007. Simple perl script that parses log files created by
|
||||
L<microdc2|http://corsair626.no-ip.org/microdc/> and outputs a simple and
|
||||
ugly html file with all uploaded files. It correctly merges chunked
|
||||
uploads, calculates average upload speed per file and total bandwidth used
|
||||
for uploads. L<source|https://dev.yorhel.nl/download/code/mdc2-parse.pl>
|
||||
|
||||
B<Note:> for those of you who still use microdc2, please have a look at
|
||||
L<ncdc|https://dev.yorhel.nl/ncdc>, a modern alternative.
|
||||
|
||||
=head2 yapong.c
|
||||
|
||||
Feburary 2006. Yet Another Pong, and yet another program written just for
|
||||
testing/ learning purposes. Tested to work with the ncurses or pdcurses
|
||||
libraries. L<source|https://dev.yorhel.nl/download/code/yapong.c> (L<older
|
||||
version|https://dev.yorhel.nl/download/code/yapong-0.01.c>).
|
||||
|
||||
=head2 echoserv.c
|
||||
|
||||
February 2006. A simple non-blocking single-threaded TCP echo server,
|
||||
displaying how the select() system call can be used to handle multiple
|
||||
connections. L<source|https://dev.yorhel.nl/download/code/echoserv.c>
|
||||
|
||||
=head2 bbcode.c
|
||||
|
||||
January 2006. Simple BBCode to HTML converter written in plain C, for learning
|
||||
puroses. L<source|https://dev.yorhel.nl/download/code/bbcode.c>
|
||||
|
|
@ -1,102 +0,0 @@
|
|||
=pod
|
||||
|
||||
People who run AWStats on large log files have most likely noticed: the data
|
||||
files can grow quite large, resulting in both a waste of disk space and longer
|
||||
page generation times for the AWStats pages. I wrote a small script that
|
||||
analyzes these data files and can remove any information you think is
|
||||
unnecessary.
|
||||
|
||||
B<Download:> L<awshrink|https://dev.yorhel.nl/download/code/awshrink> (copy to
|
||||
/usr/bin to install).
|
||||
|
||||
|
||||
=head2 Important
|
||||
|
||||
Do B<NOT> use this script on data files that are not completed yet (i.e. data
|
||||
files of the month you're living in). This will result in inaccurate sorting of
|
||||
visits, pages, referers and whatever other list you're shrinking. Also, keep
|
||||
in mind that this is just a fast written perl hack, it is by no means fast and
|
||||
may hog some memory while shrinking data files.
|
||||
|
||||
|
||||
=head2 Usage
|
||||
|
||||
awshrink [-c -s] [-SECTION LINES] [..] datafile
|
||||
-s Show statistics
|
||||
-c Overwrite datafile instead of writing to a backupfile (datafile~)
|
||||
-SECTION LINES
|
||||
Shrink the selected SECTION to LINES lines. (See example below)
|
||||
|
||||
|
||||
=head2 Typical command-line usage
|
||||
|
||||
While awshrink is most useful for monthly cron jobs, here's an example of basic
|
||||
command line usage to demonstrate what the script can do:
|
||||
|
||||
$ wc -c awstats122007.a.txt
|
||||
29916817 awstats122007.a.txt
|
||||
|
||||
$ awshrink -s awstats122007.a.txt
|
||||
Section Size (Bytes) Lines
|
||||
SCREENSIZE* 74 0
|
||||
WORMS 131 0
|
||||
EMAILRECEIVER 135 0
|
||||
EMAILSENDER 143 0
|
||||
CLUSTER* 144 0
|
||||
LOGIN 155 0
|
||||
ORIGIN* 178 6
|
||||
ERRORS* 229 10
|
||||
SESSION* 236 7
|
||||
FILETYPES* 340 12
|
||||
MISC* 341 10
|
||||
GENERAL* 362 8
|
||||
OS* 414 29
|
||||
SEREFERRALS 587 34
|
||||
TIME* 1270 24
|
||||
DAY* 1293 31
|
||||
ROBOT 1644 40
|
||||
BROWSER 1992 127
|
||||
DOMAIN 2377 131
|
||||
UNKNOWNREFERERBROWSER 5439 105
|
||||
UNKNOWNREFERER 20585 317
|
||||
SIDER_404 74717 2199
|
||||
PAGEREFS 130982 2500
|
||||
KEYWORDS 288189 27036
|
||||
SIDER 1058723 25470
|
||||
SEARCHWORDS 5038611 157807
|
||||
VISITOR 23285662 416084
|
||||
* = not shrinkable
|
||||
|
||||
$ awshrink -s -c -VISITOR 100 -SEARCHWORDS 100 -SIDER 100 awstats122007.a.txt
|
||||
Section Size (Bytes) Lines
|
||||
SCREENSIZE* 74 0
|
||||
WORMS 131 0
|
||||
EMAILRECEIVER 135 0
|
||||
EMAILSENDER 143 0
|
||||
CLUSTER* 144 0
|
||||
LOGIN 155 0
|
||||
ORIGIN* 178 6
|
||||
ERRORS* 229 10
|
||||
SESSION* 236 7
|
||||
FILETYPES* 340 12
|
||||
MISC* 341 10
|
||||
GENERAL* 362 8
|
||||
OS* 414 29
|
||||
SEREFERRALS 587 34
|
||||
TIME* 1270 24
|
||||
DAY* 1293 31
|
||||
ROBOT 1644 40
|
||||
BROWSER 1992 127
|
||||
SEARCHWORDS 2289 100
|
||||
DOMAIN 2377 131
|
||||
SIDER 3984 100
|
||||
UNKNOWNREFERERBROWSER 5439 105
|
||||
VISITOR 5980 100
|
||||
UNKNOWNREFERER 20585 317
|
||||
SIDER_404 74717 2199
|
||||
PAGEREFS 130982 2500
|
||||
KEYWORDS 288189 27036
|
||||
* = not shrinkable
|
||||
|
||||
$ wc -c awstats122007.a.txt
|
||||
546074 awstats122007.a.txt
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
=pod
|
||||
|
||||
I<2016-08-16> - btrfs-size.pl is a quick little script to provide an overview
|
||||
of the disk space used by btrfs subvolumes. It's comparable to
|
||||
L<btrfs-size.sh|https://poisonpacket.wordpress.com/2015/05/26/btrfs-snapshot-size-disk-usage/>,
|
||||
but is somewhat faster and has a few options to sort the output.
|
||||
|
||||
Honestly, it's still hard to draw any conclusions from the sizes provided by
|
||||
btrfs, but sadly, L<ncdu|https://dev.yorhel.nl/ncdu> is useless for
|
||||
snapshot-heavy filesystems.
|
||||
|
||||
Only tested with btrfs-progs v4.4.1.
|
||||
|
||||
B<Download:> L<btrfs-size.pl|https://p.blicky.net/FNPXpbwMXfTI.txt>
|
||||
(L<syntax-highligted version|https://p.blicky.net/FNPXpbwMXfTI>).
|
||||
|
||||
|
||||
=head2 Usage
|
||||
|
||||
btrfs-size.pl --help [-nser] <path>
|
||||
-n Order by path name
|
||||
-s Order by (total) subvolume size
|
||||
-e Order by exclusive subvolume size
|
||||
-r Reverse order
|
||||
|
||||
|
||||
=head2 Example output
|
||||
|
||||
# btrfs-size.pl /data
|
||||
gfbf007/cur 46.32 GiB 16.00 KiB
|
||||
gfbf007/snap/2016-08-14.08 46.32 GiB 428.00 KiB
|
||||
gfbf007/snap/2016-08-15.03 46.32 GiB 428.00 KiB
|
||||
gfbf007/snap/2016-08-16.03 46.32 GiB 16.00 KiB
|
||||
ggit011/cur 23.92 MiB 16.00 KiB
|
||||
ggit011/snap/2016-08-14.08 23.90 MiB 300.00 KiB
|
||||
ggit011/snap/2016-08-15.08 23.92 MiB 16.00 KiB
|
||||
gman015/cur 3.74 GiB 16.00 KiB
|
||||
gman015/snap/2016-08-14.08 3.74 GiB 112.00 KiB
|
||||
gman015/snap/2016-08-15.02 3.74 GiB 96.00 KiB
|
||||
gman015/snap/2016-08-16.02 3.74 GiB 16.00 KiB
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
=pod
|
||||
|
||||
GRenamR is a GTK+ mass file renamer written in Perl, the functionality is
|
||||
insipred by the
|
||||
L<rename|http://search.cpan.org/~rmbarker/File-Rename-0.05/rename.PL> command
|
||||
that comes with a Perl module.
|
||||
|
||||
GRenamR allows multiple file renaming using perl expressions. You can see the
|
||||
effects of your expression while typing it, and can preview your action before
|
||||
applying them. The accepted expressions are mostly the same as the rename
|
||||
command (see above paragrah): your expression will be evaluated with $_ set to
|
||||
the filename, and any modifications to this variable will result in the
|
||||
renaming of the file. There's one other variable that the rename command does
|
||||
not have: $i, which reflects the file number (starting from 0) in the current
|
||||
list. This allows expressions such as as C<$_=sprintf'%03d.txt',$i>.
|
||||
|
||||
B<Download: > L<grenamr|https://dev.yorhel.nl/download/code/grenamr-0.1.pl>
|
||||
(copy to /usr/bin/ to install)
|
||||
|
||||
Requires the Gtk2 Perl module. Most distributions have a perl-gtk2 package.
|
||||
|
||||
|
||||
=head2 Example expressions
|
||||
|
||||
y/A-Z/a-z/ # Convert filenames to lowercase
|
||||
$_=lc # Same
|
||||
s/\.txt$/.utf8/ # Change all '.txt' extensions to '.utf8'
|
||||
s/([0-9]+)/sprintf'%04d',$1/eg # Zero-pad all numbers in filenames
|
||||
|
||||
# Replace each image filename with a zero-padded number starting from 1
|
||||
s/^.+\.jpg$/sprintf'%03d.jpg',$i+1/e
|
||||
|
||||
|
||||
=head2 Caveats / bugs / TODO
|
||||
|
||||
=over
|
||||
|
||||
=item * Calling functions as 'sleep' or 'exit' in the expression will trash the program
|
||||
|
||||
=item * It's currently not possible to manually order the file list, so $i is
|
||||
not useful in every situation
|
||||
|
||||
=item * It's currently not possible to manually rename files or exclude items
|
||||
from being effected by the expression
|
||||
|
||||
=item * The expression isn't executed in the opened directory, so things like
|
||||
L<-X|http://perldoc.perl.org/functions/-X.html> won't work
|
||||
|
||||
=back
|
||||
|
||||
=head2 Screenshot
|
||||
|
||||
[img scr grenamr.png GRenamR]
|
||||
|
|
@ -1,101 +0,0 @@
|
|||
=pod
|
||||
|
||||
I decided to do some experimentation with how the colours defined in ncurses
|
||||
are actually displayed in terminals, what the effects are of combining these
|
||||
colours with other attributes, and how colour schemes of a terminal can affect
|
||||
the displayed colours. To this end I wrote a small c file and ran it in
|
||||
different terminals and different configurations. Note that only the 8 basic
|
||||
NCurses colours are tested, the more flexible init_color() function is not
|
||||
used.
|
||||
|
||||
B<Source code: > L<nccolour.c|https://dev.yorhel.nl/download/code/nccolour.c>
|
||||
(L<syntax highlighed version|http://p.blicky.net/xu35c>)
|
||||
|
||||
=head2 Notes / observations
|
||||
|
||||
=over
|
||||
|
||||
=item * The most obvious conclusion: the displayed colours do not have the
|
||||
exact same colour value in every terminal. Some terminals also allow users to
|
||||
modify these colours.
|
||||
|
||||
=item * You can not assume that the default foreground or background colour can
|
||||
be represented by one of the 8 basic colours defined by NCurses.
|
||||
|
||||
=item * Specifying -1 as colour, to indicate the default foreground or
|
||||
background colour, seems to work fine in any terminal tested so far.
|
||||
|
||||
=item * All tested terminals render the foreground colour in a lighter shade
|
||||
when the A_BOLD attribute is set. This does not apply to the background colour.
|
||||
The result of this is that the text becomes visible when using A_BOLD when the
|
||||
foreground and background colour are set to the same value.
|
||||
|
||||
=item * Unfortunately, not all terminals are configured in such a way that all
|
||||
possible colours are readable. So as a developer you'll still have to support
|
||||
configurable colour schemes in your ncurses application. :-(
|
||||
|
||||
=item * On most terminals, setting the foreground and background colour to the
|
||||
same value without applying the A_BOLD attribute will make the text invisible.
|
||||
Don't rely on this, however, as this is not the case on OS X.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head2 Full screenshot
|
||||
|
||||
To avoid wasting unecessary space, the comparison screenshots below only
|
||||
display the colour table. Here's a screenshot of the full output of the
|
||||
program, which also explains what each column means.
|
||||
|
||||
[img scr nccol-full.png ]
|
||||
|
||||
|
||||
=head2 Screenshots
|
||||
|
||||
=over
|
||||
|
||||
=item Arch Linux, Roxterm, Default color scheme
|
||||
|
||||
[img scr nccol-rox-b.png ]
|
||||
|
||||
=item Arch Linux, Roxterm, GTK color scheme
|
||||
|
||||
[img scr nccol-rox-w.png ]
|
||||
|
||||
=item Arch Linux, Roxterm, Tango color scheme
|
||||
|
||||
[img scr nccol-rox-t.png ]
|
||||
|
||||
=item Arch Linux, Roxterm, Modified Tango color scheme
|
||||
|
||||
[img scr nccol-rox-c.png ]
|
||||
|
||||
=item Arch Linux, xterm (default settings)
|
||||
|
||||
[img scr nccol-xterm.png ]
|
||||
|
||||
=item Ubuntu 11.10, Gnome-terminal
|
||||
|
||||
[img scr nccol-ubuntu.png ]
|
||||
|
||||
=item Debian Squeeze, VT (default settings)
|
||||
|
||||
[img scr nccol-debian.png ]
|
||||
|
||||
=item FreeBSD, VT (default settings)
|
||||
|
||||
[img scr nccol-fbsd.png ]
|
||||
|
||||
=item Mac OS X, Terminal
|
||||
|
||||
[img scr nccol-osx-terminal.png ]
|
||||
|
||||
=item Mac OS X, iTerm2
|
||||
|
||||
[img scr nccol-osx-iterm2.png ]
|
||||
|
||||
=item CentOS 6.4
|
||||
|
||||
[img scr nccol-centos64.png ]
|
||||
|
||||
=back
|
||||
76
dat/dump.md
Normal file
76
dat/dump.md
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
% Code dump
|
||||
|
||||
I write a lot of miscellaneous little perl/shell scripts and micro-libraries
|
||||
for the purpose of getting something done. This page is a listing of those I
|
||||
thought might be of useful to others as well.
|
||||
|
||||
I also maintain a collection of miscellaneous C micro-libraries. Those are
|
||||
listed under the collective name of [Ylib](/ylib).
|
||||
|
||||
## maildir.pl
|
||||
|
||||
October 2012. A tiny weechat plugin to display the number of unread emails in a
|
||||
local Maildir. [Latest
|
||||
source](https://weechat.org/scripts/source/stable/maildir.pl.html/)
|
||||
([1.0](https://p.blicky.net/wzbzs)).
|
||||
|
||||
## ncdc-share-report
|
||||
|
||||
December 2011. Playing around with the Go programming language, I wrote another
|
||||
transfer log parser and statistics generator for ncdc.
|
||||
[Example output](https://s.blicky.net/2012/ncdc-share-report.html).
|
||||
|
||||
Download: [0.3](https://p.blicky.net/h25z8)
|
||||
([0.2](https://p.blicky.net/6yx2d), [0.1](https://p.blicky.net/ab4lm)).
|
||||
|
||||
## ncdc-transfer-stats
|
||||
|
||||
September 2011. [ncdc](/ncdc) gained transfer logging features, and I wrote a
|
||||
quick Perl script to fetch some simple statistics from it.
|
||||
[source](https://p.blicky.net/4V9Kg59kUJUN)
|
||||
([0.2](https://p.blicky.net/eu00a), [0.1](https://p.blicky.net/agolr)).
|
||||
|
||||
## json.mll
|
||||
|
||||
December 2010. I was writing a client for the [public VNDB
|
||||
API](https://vndb.org/d11) in OCaml and needed a JSON parser/generator. Since I
|
||||
wasn't happy with the currently available solutions - they try to do too many
|
||||
things and have too many dependencies - I decided to write a minimal JSON
|
||||
library myself. [source](https://g.blicky.net/serika.git/tree/json.mll)
|
||||
|
||||
## vinfo.c
|
||||
|
||||
November 2009. The [public VNDB API](https://vndb.org/d11) was designed to be
|
||||
easy to use even from low level languages. I wrote this simple program to see
|
||||
how much work it would be to use the API in C, and as example code for anyone
|
||||
wishing to use the API for something more useful. Read the comments for more
|
||||
info. [source](/download/code/vinfo.c)
|
||||
|
||||
## Microdc2 log file parser
|
||||
|
||||
June 2007. Simple perl script that parses log files created by
|
||||
[microdc2](http://corsair626.no-ip.org/microdc/) and outputs a simple and ugly
|
||||
html file with all uploaded files. It correctly merges chunked uploads,
|
||||
calculates average upload speed per file and total bandwidth used for uploads.
|
||||
[source](/download/code/mdc2-parse.pl)
|
||||
|
||||
**Note:** for those of you who still use microdc2, please have a look at
|
||||
[ncdc](/ncdc), a modern alternative.
|
||||
|
||||
## yapong.c
|
||||
|
||||
Feburary 2006. Yet Another Pong, and yet another program written just for
|
||||
testing/ learning purposes. Tested to work with the ncurses or pdcurses
|
||||
libraries. [source](/download/code/yapong.c) ([older
|
||||
version](/download/code/yapong-0.01.c)).
|
||||
|
||||
## echoserv.c
|
||||
|
||||
February 2006. A simple non-blocking single-threaded TCP echo server,
|
||||
displaying how the select() system call can be used to handle multiple
|
||||
connections. [source](/download/code/echoserv.c)
|
||||
|
||||
## bbcode.c
|
||||
|
||||
January 2006. Simple BBCode to HTML converter written in plain C, for learning
|
||||
puroses. [source](/download/code/bbcode.c)
|
||||
99
dat/dump/awshrink.md
Normal file
99
dat/dump/awshrink.md
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
% AWStats Data File Shrinker
|
||||
|
||||
People who run AWStats on large log files have most likely noticed: the data
|
||||
files can grow quite large, resulting in both a waste of disk space and longer
|
||||
page generation times for the AWStats pages. I wrote a small script that
|
||||
analyzes these data files and can remove any information you think is
|
||||
unnecessary.
|
||||
|
||||
**Download:** [awshrink](/download/code/awshrink) (copy to /usr/bin to
|
||||
install).
|
||||
|
||||
## Important
|
||||
|
||||
Do **NOT** use this script on data files that are not completed yet (i.e. data
|
||||
files of the month you're living in). This will result in inaccurate sorting of
|
||||
visits, pages, referers and whatever other list you're shrinking. Also, keep
|
||||
in mind that this is just a fast written perl hack, it is by no means fast and
|
||||
may hog some memory while shrinking data files.
|
||||
|
||||
## Usage
|
||||
|
||||
awshrink [-c -s] [-SECTION LINES] [..] datafile
|
||||
-s Show statistics
|
||||
-c Overwrite datafile instead of writing to a backupfile (datafile~)
|
||||
-SECTION LINES
|
||||
Shrink the selected SECTION to LINES lines. (See example below)
|
||||
|
||||
## Typical command-line usage
|
||||
|
||||
While awshrink is most useful for monthly cron jobs, here's an example of basic
|
||||
command line usage to demonstrate what the script can do:
|
||||
|
||||
$ wc -c awstats122007.a.txt
|
||||
29916817 awstats122007.a.txt
|
||||
|
||||
$ awshrink -s awstats122007.a.txt
|
||||
Section Size (Bytes) Lines
|
||||
SCREENSIZE* 74 0
|
||||
WORMS 131 0
|
||||
EMAILRECEIVER 135 0
|
||||
EMAILSENDER 143 0
|
||||
CLUSTER* 144 0
|
||||
LOGIN 155 0
|
||||
ORIGIN* 178 6
|
||||
ERRORS* 229 10
|
||||
SESSION* 236 7
|
||||
FILETYPES* 340 12
|
||||
MISC* 341 10
|
||||
GENERAL* 362 8
|
||||
OS* 414 29
|
||||
SEREFERRALS 587 34
|
||||
TIME* 1270 24
|
||||
DAY* 1293 31
|
||||
ROBOT 1644 40
|
||||
BROWSER 1992 127
|
||||
DOMAIN 2377 131
|
||||
UNKNOWNREFERERBROWSER 5439 105
|
||||
UNKNOWNREFERER 20585 317
|
||||
SIDER_404 74717 2199
|
||||
PAGEREFS 130982 2500
|
||||
KEYWORDS 288189 27036
|
||||
SIDER 1058723 25470
|
||||
SEARCHWORDS 5038611 157807
|
||||
VISITOR 23285662 416084
|
||||
* = not shrinkable
|
||||
|
||||
$ awshrink -s -c -VISITOR 100 -SEARCHWORDS 100 -SIDER 100 awstats122007.a.txt
|
||||
Section Size (Bytes) Lines
|
||||
SCREENSIZE* 74 0
|
||||
WORMS 131 0
|
||||
EMAILRECEIVER 135 0
|
||||
EMAILSENDER 143 0
|
||||
CLUSTER* 144 0
|
||||
LOGIN 155 0
|
||||
ORIGIN* 178 6
|
||||
ERRORS* 229 10
|
||||
SESSION* 236 7
|
||||
FILETYPES* 340 12
|
||||
MISC* 341 10
|
||||
GENERAL* 362 8
|
||||
OS* 414 29
|
||||
SEREFERRALS 587 34
|
||||
TIME* 1270 24
|
||||
DAY* 1293 31
|
||||
ROBOT 1644 40
|
||||
BROWSER 1992 127
|
||||
SEARCHWORDS 2289 100
|
||||
DOMAIN 2377 131
|
||||
SIDER 3984 100
|
||||
UNKNOWNREFERERBROWSER 5439 105
|
||||
VISITOR 5980 100
|
||||
UNKNOWNREFERER 20585 317
|
||||
SIDER_404 74717 2199
|
||||
PAGEREFS 130982 2500
|
||||
KEYWORDS 288189 27036
|
||||
* = not shrinkable
|
||||
|
||||
$ wc -c awstats122007.a.txt
|
||||
546074 awstats122007.a.txt
|
||||
37
dat/dump/btrfssize.md
Normal file
37
dat/dump/btrfssize.md
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
% btrfs-size.pl
|
||||
|
||||
_2016-08-16_ - btrfs-size.pl is a quick little script to provide an overview of
|
||||
the disk space used by btrfs subvolumes. It's comparable to
|
||||
[btrfs-size.sh](https://poisonpacket.wordpress.com/2015/05/26/btrfs-snapshot-size-disk-usage/),
|
||||
but is somewhat faster and has a few options to sort the output.
|
||||
|
||||
Honestly, it's still hard to draw any conclusions from the sizes provided by
|
||||
btrfs, but sadly, [ncdu](/ncdu) is useless for snapshot-heavy filesystems.
|
||||
|
||||
Only tested with btrfs-progs v4.4.1.
|
||||
|
||||
**Download:** [btrfs-size.pl](https://p.blicky.net/FNPXpbwMXfTI.txt)
|
||||
([syntax-highligted version](https://p.blicky.net/FNPXpbwMXfTI)).
|
||||
|
||||
## Usage
|
||||
|
||||
btrfs-size.pl --help [-nser] <path>
|
||||
-n Order by path name
|
||||
-s Order by (total) subvolume size
|
||||
-e Order by exclusive subvolume size
|
||||
-r Reverse order
|
||||
|
||||
## Example output
|
||||
|
||||
# btrfs-size.pl /data
|
||||
gfbf007/cur 46.32 GiB 16.00 KiB
|
||||
gfbf007/snap/2016-08-14.08 46.32 GiB 428.00 KiB
|
||||
gfbf007/snap/2016-08-15.03 46.32 GiB 428.00 KiB
|
||||
gfbf007/snap/2016-08-16.03 46.32 GiB 16.00 KiB
|
||||
ggit011/cur 23.92 MiB 16.00 KiB
|
||||
ggit011/snap/2016-08-14.08 23.90 MiB 300.00 KiB
|
||||
ggit011/snap/2016-08-15.08 23.92 MiB 16.00 KiB
|
||||
gman015/cur 3.74 GiB 16.00 KiB
|
||||
gman015/snap/2016-08-14.08 3.74 GiB 112.00 KiB
|
||||
gman015/snap/2016-08-15.02 3.74 GiB 96.00 KiB
|
||||
gman015/snap/2016-08-16.02 3.74 GiB 16.00 KiB
|
||||
|
|
@ -1,28 +1,31 @@
|
|||
=pod
|
||||
% Demos
|
||||
|
||||
Yes, I realise that the title is plural, suggesting there's more than one demo.
|
||||
That is not quite true, unfortunately. The reason I chose to use plural form is
|
||||
simply in the hopes that I do, in fact, write more demos, and that this page
|
||||
will actually get more content in the future. I still happen to be a huge fan
|
||||
of the L<demoscene|http://demoscene.info/>, and still wish to contribute to
|
||||
of the [demoscene](http://demoscene.info/), and still wish to contribute to
|
||||
it... if only I could find the time and self-discipline to do so. In the
|
||||
meanwhile, here's one demo I did write some time ago:
|
||||
meanwhile, here's one demo I did write some time ago.
|
||||
|
||||
=head1 Blue Cubes
|
||||
*(2019 update: Don't get your hopes up, I likely won't ever write another demo.
|
||||
I don't have the patience for it, I guess.)*
|
||||
|
||||
[img right bluecubes.png Blue Cubes.]
|
||||
# Blue Cubes
|
||||
|
||||
{.right}
|
||||
August 2006. My first demo - or more exact: intro. Blue Cubes is a 64kB intro
|
||||
written in OpenGL/SDL with Linux as target OS. I wrote this intro within 10
|
||||
days without any prior experience in any of the fields of computer generated
|
||||
graphics or music. So needlessly to say, it sucks. I am ashamed even of the
|
||||
thought of releasing it at a respectable demoparty like
|
||||
L<Evoke|http://www.evoke.eu/2006/>. Still, it didn't feel I was unwelcome, I
|
||||
[Evoke](https://www.evoke.eu/2006/). Still, it didn't feel I was unwelcome, I
|
||||
did actually receive three prices: 3rd price in the 64k competition (there were
|
||||
only 3 actual entries, but oh well), best non-windows 64k intro (it was the
|
||||
only one in the competition), and the Digitale Kultur newcomer award, which
|
||||
actually is something to be proud of, I guess.
|
||||
|
||||
L<download|https://dev.yorhel.nl/download/yorhel~bluecubes.zip> -
|
||||
L<mirror|http://scene.org/file.php?file=/parties/2006/evoke06/in64/yorhel_bluecubes.zip&fileinfo>
|
||||
[download](/download/yorhel~bluecubes.zip) -
|
||||
[mirror](https://scene.org/file.php?file=/parties/2006/evoke06/in64/yorhel_bluecubes.zip&fileinfo)
|
||||
(includes linux binaries, windows port, and sources) -
|
||||
L<pouet comments|http://pouet.net/prod.php?which=25866>.
|
||||
[pouet comments](https://pouet.net/prod.php?which=25866).
|
||||
44
dat/dump/grenamr.md
Normal file
44
dat/dump/grenamr.md
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
% GTK+ Mass File Renamer
|
||||
|
||||
GRenamR is a GTK+ mass file renamer written in Perl, the functionality is
|
||||
insipred by the
|
||||
[rename](https://search.cpan.org/~rmbarker/File-Rename-0.05/rename.PL) command
|
||||
that comes with a Perl module.
|
||||
|
||||
GRenamR allows multiple file renaming using perl expressions. You can see the
|
||||
effects of your expression while typing it, and can preview your action before
|
||||
applying them. The accepted expressions are mostly the same as the rename
|
||||
command (see above paragrah): your expression will be evaluated with `$_` set
|
||||
to the filename, and any modifications to this variable will result in the
|
||||
renaming of the file. There's one other variable that the rename command does
|
||||
not have: `$i`, which reflects the file number (starting from 0) in the current
|
||||
list. This allows expressions such as as `$_=sprintf'%03d.txt',$i`.
|
||||
|
||||
**Download:** [grenamr](/download/code/grenamr-0.1.pl)
|
||||
(copy to /usr/bin/ to install)
|
||||
|
||||
Requires the Gtk2 Perl module. Most distributions have a perl-gtk2 package.
|
||||
|
||||
## Example expressions
|
||||
|
||||
y/A-Z/a-z/ # Convert filenames to lowercase
|
||||
$_=lc # Same
|
||||
s/\.txt$/.utf8/ # Change all '.txt' extensions to '.utf8'
|
||||
s/([0-9]+)/sprintf'%04d',$1/eg # Zero-pad all numbers in filenames
|
||||
|
||||
# Replace each image filename with a zero-padded number starting from 1
|
||||
s/^.+\.jpg$/sprintf'%03d.jpg',$i+1/e
|
||||
|
||||
## Caveats / bugs / TODO
|
||||
|
||||
- Calling functions as 'sleep' or 'exit' in the expression will trash the program
|
||||
- It's currently not possible to manually order the file list, so $i is
|
||||
not useful in every situation
|
||||
- It's currently not possible to manually rename files or exclude items
|
||||
from being effected by the expression
|
||||
- The expression isn't executed in the opened directory, so things like
|
||||
[-X](https://perldoc.perl.org/functions/-X.html) won't work
|
||||
|
||||
## Screenshot
|
||||
|
||||
{.scr}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
=pod
|
||||
% Insertion Performance Benchmarks
|
||||
|
||||
I<2013-07-05> - One of my favourite data structures in C is the ordered vector
|
||||
_2013-07-05_ - One of my favourite data structures in C is the ordered vector
|
||||
(or array, whatever you call them). Incredibly simple to implement, very low
|
||||
memory overhead, and can provide O(log n) lookup with a simple binary search.
|
||||
However, ordered vectors have one very weak point: insertion and deletion of
|
||||
|
|
@ -15,17 +15,16 @@ how much worse does insertion performance get compared to more complex data
|
|||
structures?
|
||||
|
||||
For comparison, I chose the B-tree and hash table implementations from
|
||||
L<klib|https://github.com/attractivechaos/klib> (from commit fff70758, to be
|
||||
[klib](https://github.com/attractivechaos/klib) (from commit fff70758, to be
|
||||
precise). My goal wasn't to benchmark the performance of different
|
||||
implementations, so I simply chose two implementations that I suspect are among
|
||||
the fastest. The vector implementation in the benchmarks is my own creation:
|
||||
L<vec.h|http://g.blicky.net/globster.git/tree/src/util/vec.h?id=2c11d2a> from
|
||||
the L<Globster|https://dev.yorhel.nl/globster> code base.
|
||||
[vec.h](https://g.blicky.net/globster.git/tree/src/util/vec.h?id=2c11d2a) from
|
||||
the [Globster](/globster) code base.
|
||||
|
||||
B<Source code:> L<ins-bench.c|http://p.blicky.net/r746e>
|
||||
**Source code:** [ins-bench.c](https://p.blicky.net/r746e)
|
||||
|
||||
|
||||
=head2 Best case & worst case
|
||||
## Best case & worst case
|
||||
|
||||
For a start, I decided to benchmark the best and worst case performance of
|
||||
inserting elements into a vector. The best case happens when inserting all
|
||||
|
|
@ -39,44 +38,42 @@ search. Actual performance will be thus be a bit worse, depending on whether
|
|||
the final application needs that binary search or whether it can assume its
|
||||
input to be already sorted.
|
||||
|
||||
L<[img graph insbench-bench-thumb.png ]|https://dev.yorhel.nl/img/insbench-bench.png>
|
||||
[  ](/img/insbench-bench.png)
|
||||
|
||||
Gnuplot script: (The awk(ward) part can likely be done natively in gnuplot as
|
||||
well, but I was too lazy to figure out how)
|
||||
|
||||
set terminal png size 1000, 1500
|
||||
set output "bench.png"
|
||||
set logscale xy
|
||||
set xlabel "number of items"
|
||||
set ylabel "average time per insert (ms)"
|
||||
set grid mxtics xtics mytics ytics
|
||||
plot "< awk '{print $1, $2/$1*1000}' bench-vec" title 'vector, worst case',\
|
||||
"< awk '{print $1, $2/$1*1000}' bench-best" title 'vector, best case',\
|
||||
"< awk '{print $1, $2/$1*1000}' bench-hash" title 'khash',\
|
||||
"< awk '{print $1, $2/$1*1000}' bench-btree" title 'kbtree'
|
||||
set terminal png size 1000, 1500
|
||||
set output "bench.png"
|
||||
set logscale xy
|
||||
set xlabel "number of items"
|
||||
set ylabel "average time per insert (ms)"
|
||||
set grid mxtics xtics mytics ytics
|
||||
plot "< awk '{print $1, $2/$1*1000}' bench-vec" title 'vector, worst case',\
|
||||
"< awk '{print $1, $2/$1*1000}' bench-best" title 'vector, best case',\
|
||||
"< awk '{print $1, $2/$1*1000}' bench-hash" title 'khash',\
|
||||
"< awk '{print $1, $2/$1*1000}' bench-btree" title 'kbtree'
|
||||
|
||||
## Average case
|
||||
|
||||
=head2 Average case
|
||||
|
||||
For the second benchmark I inserted values created with C<rand()>, which should
|
||||
For the second benchmark I inserted values created with `rand()`, which should
|
||||
be a more accurate simulation of some real-world applications. This time I'm
|
||||
not cheating with the vector implementation, a binary search is performed in
|
||||
order to insert the items in the correct location.
|
||||
|
||||
L<[img graph insbench-rand-thumb.png ]|https://dev.yorhel.nl/img/insbench-rand.png>
|
||||
[  ](/img/insbench-rand.png)
|
||||
|
||||
set terminal png size 1000, 1500
|
||||
set output "bench-rand.png"
|
||||
set logscale xy
|
||||
set xlabel "number of items"
|
||||
set ylabel "average time per insert (ms)"
|
||||
set grid mxtics xtics mytics ytics
|
||||
plot "< awk '{print $1, $2/$1*1000}' rand-vec" title 'vector',\
|
||||
"< awk '{print $1, $2/$1*1000}' rand-hash" title 'khash',\
|
||||
"< awk '{print $1, $2/$1*1000}' rand-btree" title 'kbtree'
|
||||
set terminal png size 1000, 1500
|
||||
set output "bench-rand.png"
|
||||
set logscale xy
|
||||
set xlabel "number of items"
|
||||
set ylabel "average time per insert (ms)"
|
||||
set grid mxtics xtics mytics ytics
|
||||
plot "< awk '{print $1, $2/$1*1000}' rand-vec" title 'vector',\
|
||||
"< awk '{print $1, $2/$1*1000}' rand-hash" title 'khash',\
|
||||
"< awk '{print $1, $2/$1*1000}' rand-btree" title 'kbtree'
|
||||
|
||||
|
||||
=head2 Benchmarking setup
|
||||
## Benchmarking setup
|
||||
|
||||
All benchmarks were performed on a 3 GHz Core Duo E8400 with a 6 MiB cache.
|
||||
Compiled with the Gentoo-provided gcc 4.6.3 at -O3, linked against glibc 2.15,
|
||||
86
dat/dump/nccolour.md
Normal file
86
dat/dump/nccolour.md
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
% Colours in NCurses
|
||||
|
||||
I decided to do some experimentation with how the colours defined in ncurses
|
||||
are actually displayed in terminals, what the effects are of combining these
|
||||
colours with other attributes, and how colour schemes of a terminal can affect
|
||||
the displayed colours. To this end I wrote a small c file and ran it in
|
||||
different terminals and different configurations. Note that only the 8 basic
|
||||
NCurses colours are tested, the more flexible init\_color() function is not
|
||||
used.
|
||||
|
||||
**Source code:** [nccolour.c](/download/code/nccolour.c)
|
||||
([syntax highlighed version](http://p.blicky.net/xu35c))
|
||||
|
||||
## Notes / observations
|
||||
|
||||
- The most obvious conclusion: the displayed colours do not have the exact same
|
||||
colour value in every terminal. Some terminals also allow users to modify
|
||||
these colours.
|
||||
- You can not assume that the default foreground or background colour can be
|
||||
represented by one of the 8 basic colours defined by NCurses.
|
||||
- Specifying -1 as colour, to indicate the default foreground or background
|
||||
colour, seems to work fine in any terminal tested so far.
|
||||
- All tested terminals render the foreground colour in a lighter shade when the
|
||||
A\_BOLD attribute is set. This does not apply to the background colour. The
|
||||
result of this is that the text becomes visible when using A\_BOLD when the
|
||||
foreground and background colour are set to the same value.
|
||||
- Unfortunately, not all terminals are configured in such a way that all
|
||||
possible colours are readable. So as a developer you'll still have to support
|
||||
configurable colour schemes in your ncurses application. :-(
|
||||
- On most terminals, setting the foreground and background colour to the same
|
||||
value without applying the A\_BOLD attribute will make the text invisible.
|
||||
Don't rely on this, however, as this is not the case on OS X.
|
||||
|
||||
## Full screenshot
|
||||
|
||||
To avoid wasting unecessary space, the comparison screenshots below only
|
||||
display the colour table. Here's a screenshot of the full output of the
|
||||
program, which also explains what each column means.
|
||||
|
||||

|
||||
|
||||
## Screenshots
|
||||
|
||||
Arch Linux, Roxterm, Default color scheme
|
||||
|
||||

|
||||
|
||||
Arch Linux, Roxterm, GTK color scheme
|
||||
|
||||

|
||||
|
||||
Arch Linux, Roxterm, Tango color scheme
|
||||
|
||||

|
||||
|
||||
Arch Linux, Roxterm, Modified Tango color scheme
|
||||
|
||||

|
||||
|
||||
Arch Linux, xterm (default settings)
|
||||
|
||||

|
||||
|
||||
Ubuntu 11.10, Gnome-terminal
|
||||
|
||||

|
||||
|
||||
Debian Squeeze, VT (default settings)
|
||||
|
||||

|
||||
|
||||
FreeBSD, VT (default settings)
|
||||
|
||||

|
||||
|
||||
Mac OS X, Terminal
|
||||
|
||||

|
||||
|
||||
Mac OS X, iTerm2
|
||||
|
||||

|
||||
|
||||
CentOS 6.4
|
||||
|
||||

|
||||
122
dat/globster
122
dat/globster
|
|
@ -1,122 +0,0 @@
|
|||
=pod
|
||||
|
||||
[html]<p><b style="color: #f00">Project Abandoned</b><br />
|
||||
I've stopped development on Globster. I still believe the overall idea and
|
||||
architecture of Globster are good, and the DC community would definitely
|
||||
benefit from a remotely controllable client, but Globster in its current form
|
||||
wasn't going into the direction I wanted it to. I might restart the project
|
||||
from scratch (yet again) in the future, but for now... it's as dead as a cute
|
||||
zombie whale.<br /><br />
|
||||
</p>
|
||||
|
||||
<!-- This code is ugly as hell. -->
|
||||
<div style="width: 600px; height: 227px; background-image: url(/img/globster.png); margin-bottom: -30px">
|
||||
<b style="font-size: 14px; position: relative; left: 150px; top: 10px">The Globster What?</b>
|
||||
<p style="position: relative; left: 150px; top: 20px; width: 420px; text-align: right">
|
||||
Globster is an efficient file sharing client for the Direct Connect<br />
|
||||
network. It runs as a background daemon and provides<br />
|
||||
a convenient and high-level D-Bus API, making<br />
|
||||
it easy to write scripts, bots and user<br />
|
||||
interfaces for Direct Connect.
|
||||
</p></div>É
|
||||
|
||||
=head1 Adopt your own Globster
|
||||
|
||||
=head2 Download
|
||||
|
||||
There are no tarballs at the moment. You'll have to get it from the git repo:
|
||||
|
||||
git clone --recursive git://g.blicky.net/globster.git
|
||||
cd globster
|
||||
autoreconf -i
|
||||
./configure
|
||||
make
|
||||
sudo make install
|
||||
|
||||
When doing a C<git pull> to update your version later on, make sure to follow
|
||||
up with a C<git submodule update> to get the right dependencies, too.
|
||||
|
||||
The git repo is available for
|
||||
L<online browsing|http://g.blicky.net/globster.git/>.
|
||||
|
||||
=head2 Requirements
|
||||
|
||||
Globster can be compiled with a (moderately recent) GCC or clang. You'll need
|
||||
the following libraries: L<libdbus|http://dbus.freedesktop.org/>,
|
||||
L<GnuTLS|http://gnutls.org> and L<zlib|http://zlib.net/>. If your GnuTLS is too
|
||||
old (<= 2.12), you also need libgcrypt. The globsterctl script requires Perl
|
||||
and the Net::DBus module.
|
||||
|
||||
On Debian and Ubuntu, that boils down to the following:
|
||||
|
||||
apt-get install git make gcc libc-dev automake autoconf\
|
||||
pkg-config libdbus-1-dev libgnutls-dev libnet-dbus-perl
|
||||
|
||||
And for Arch Linux:
|
||||
|
||||
pacman -S base-devel git perl-net-dbus
|
||||
|
||||
I've only tested things on Linux (glibc and L<musl|http://www.musl-libc.org>),
|
||||
but I intent to support more kinda-sane POSIX systems in the future as well.
|
||||
Globster will no doubt require some more libraries as more basic features are
|
||||
being implemented. And, yes, I<of course> we will get static binaries!
|
||||
|
||||
=head2 Status
|
||||
|
||||
Remember when I called Globster a "file sharing" client? I lied. It doesn't
|
||||
share or download files yet, since it's currently in an early alpha stage. So
|
||||
what I<does> it do?
|
||||
|
||||
=over
|
||||
|
||||
=item * Connect to ADC and NMDC hubs
|
||||
|
||||
=item * User list management
|
||||
|
||||
=item * Chatting and private messaging
|
||||
|
||||
=back
|
||||
|
||||
Those features already make it perfectly suitable for writing chat-only bots
|
||||
and interfaces.
|
||||
|
||||
=head2 Usage
|
||||
|
||||
Globster isn't particularly hard to use, but usage documentation is currently a
|
||||
bit lacking. I have every intention to fix that, but for now, you're encouraged
|
||||
to join the development hub and bug me for help: C<adc://dc.blicky.net:2780/>.
|
||||
I did already write some
|
||||
L<API documentation|https://dev.yorhel.nl/globster/api>.
|
||||
|
||||
There are at this point not many scripts or interfaces available for Globster:
|
||||
|
||||
=over
|
||||
|
||||
=item * L<globsterctl|https://dev.yorhel.nl/globster/ctl> - A control script for the daemon, included in the git repo.
|
||||
|
||||
=item * L<globster-feedspam.pl|http://p.blicky.net/0z9uw> - An RSS / Atom notification script.
|
||||
|
||||
=item * L<globster-mhc.pl|http://p.blicky.net/8y8mv> - A hub chat link script. More useful as an example than anything else.
|
||||
|
||||
=item * L<globgraph|http://p.blicky.net/qvg59> - Munin plugin to monitor Direct Connect hubs.
|
||||
|
||||
=back
|
||||
|
||||
There's more to come. I'd love to have at least a convenient console client (a
|
||||
weechat or irssi plugin? An ncdc fork?) and perhaps a web-based interface. But
|
||||
other stuff is welcome, too. Who's going to write all that, you ask? Erm...
|
||||
well... You, perhaps? :-)
|
||||
|
||||
=head2 Final notes
|
||||
|
||||
As you've come to expect from me I<(right?)>, Globster is entirely written in C
|
||||
and available under a liberal MIT license.
|
||||
|
||||
Globster incorporates code from
|
||||
L<libev|http://software.schmorp.de/pkg/libev.html>,
|
||||
L<freetiger|http://klondike.es/freetiger/>,
|
||||
L<klib|https://github.com/attractivechaos/klib> and
|
||||
L<ylib|https://dev.yorhel.nl/ylib>.
|
||||
Additionally, L<autoconf-lean|https://bitbucket.org/GregorR/autoconf-lean> is
|
||||
used to keep the configure script fast.
|
||||
|
||||
|
|
@ -1 +0,0 @@
|
|||
../../globster/doc/api.pod
|
||||
|
|
@ -1 +0,0 @@
|
|||
../../globster/doc/globsterctl.pod
|
||||
|
|
@ -1 +0,0 @@
|
|||
../../globster/doc/globster.pod
|
||||
|
|
@ -1 +0,0 @@
|
|||
../../globster/doc/globster-launch.pod
|
||||
105
dat/globster.md
Normal file
105
dat/globster.md
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
% The Globster Direct Connect Client
|
||||
|
||||
<b style="color: #f00">Project Abandoned</b><br>
|
||||
I've stopped development on Globster. I still believe the overall idea and
|
||||
architecture of Globster are good, and the DC community would definitely
|
||||
benefit from a remotely controllable client, but Globster in its current form
|
||||
wasn't going into the direction I wanted it to. I might restart the project
|
||||
from scratch (yet again) in the future, but for now... it's as dead as a cute
|
||||
zombie whale.
|
||||
|
||||
<div style="width: 600px; height: 227px; background-image: url(/img/globster.png); margin-bottom: -30px">
|
||||
<b style="font-size: 14px; position: relative; left: 150px; top: 10px">The Globster What?</b>
|
||||
<p style="position: relative; left: 150px; top: 20px; width: 420px; text-align: right">
|
||||
Globster is an efficient file sharing client for the Direct Connect<br>
|
||||
network. It runs as a background daemon and provides<br>
|
||||
a convenient and high-level D-Bus API, making<br>
|
||||
it easy to write scripts, bots and user<br>
|
||||
interfaces for Direct Connect.
|
||||
</p></div>
|
||||
|
||||
# Adopt your own Globster
|
||||
|
||||
## Download
|
||||
|
||||
There are no tarballs at the moment. You'll have to get it from the git repo:
|
||||
|
||||
git clone --recursive git://g.blicky.net/globster.git
|
||||
cd globster
|
||||
autoreconf -i
|
||||
./configure
|
||||
make
|
||||
sudo make install
|
||||
|
||||
When doing a `git pull` to update your version later on, make sure to follow
|
||||
up with a `git submodule update` to get the right dependencies, too.
|
||||
|
||||
The git repo is available for
|
||||
[online browsing](https://g.blicky.net/globster.git/).
|
||||
|
||||
## Requirements
|
||||
|
||||
Globster can be compiled with a (moderately recent) GCC or clang. You'll need
|
||||
the following libraries: [libdbus](http://dbus.freedesktop.org/),
|
||||
[GnuTLS](http://gnutls.org) and [zlib](http://zlib.net/). If your GnuTLS is too
|
||||
old (<= 2.12), you also need libgcrypt. The globsterctl script requires Perl
|
||||
and the Net::DBus module.
|
||||
|
||||
On Debian and Ubuntu, that boils down to the following:
|
||||
|
||||
apt-get install git make gcc libc-dev automake autoconf\
|
||||
pkg-config libdbus-1-dev libgnutls-dev libnet-dbus-perl
|
||||
|
||||
And for Arch Linux:
|
||||
|
||||
pacman -S base-devel git perl-net-dbus
|
||||
|
||||
I've only tested things on Linux (glibc and [musl](http://www.musl-libc.org)),
|
||||
but I intent to support more kinda-sane POSIX systems in the future as well.
|
||||
Globster will no doubt require some more libraries as more basic features are
|
||||
being implemented. And, yes, _of course_ we will get static binaries!
|
||||
|
||||
## Status
|
||||
|
||||
Remember when I called Globster a "file sharing" client? I lied. It doesn't
|
||||
share or download files yet, since it's currently in an early alpha stage. So
|
||||
what _does_ it do?
|
||||
|
||||
- Connect to ADC and NMDC hubs
|
||||
- User list management
|
||||
- Chatting and private messaging
|
||||
|
||||
Those features already make it perfectly suitable for writing chat-only bots
|
||||
and interfaces.
|
||||
|
||||
## Usage
|
||||
|
||||
Globster isn't particularly hard to use, but usage documentation is currently a
|
||||
bit lacking. I have every intention to fix that, but for now, you're encouraged
|
||||
to join the development hub and bug me for help: `adc://dc.blicky.net:2780/`.
|
||||
I did already write some [API documentation](/globster/api).
|
||||
|
||||
There are at this point not many scripts or interfaces available for Globster:
|
||||
|
||||
- [globsterctl](/globster/ctl) - A control script for the daemon, included in the git repo.
|
||||
- [globster-feedspam.pl](http://p.blicky.net/0z9uw) - An RSS / Atom notification script.
|
||||
- [globster-mhc.pl](http://p.blicky.net/8y8mv) - A hub chat link script. More useful as an example than anything else.
|
||||
- [globgraph](http://p.blicky.net/qvg59) - Munin plugin to monitor Direct Connect hubs.
|
||||
|
||||
There's more to come. I'd love to have at least a convenient console client (a
|
||||
weechat or irssi plugin? An ncdc fork?) and perhaps a web-based interface. But
|
||||
other stuff is welcome, too. Who's going to write all that, you ask? Erm...
|
||||
well... You, perhaps? :-)
|
||||
|
||||
## Final notes
|
||||
|
||||
As you've come to expect from me _(right?)_, Globster is entirely written in C
|
||||
and available under a liberal MIT license.
|
||||
|
||||
Globster incorporates code from
|
||||
[libev](http://software.schmorp.de/pkg/libev.html),
|
||||
[freetiger](http://klondike.es/freetiger/),
|
||||
[klib](https://github.com/attractivechaos/klib) and
|
||||
[ylib](https://dev.yorhel.nl/ylib).
|
||||
Additionally, [autoconf-lean](https://bitbucket.org/GregorR/autoconf-lean) is
|
||||
used to keep the configure script fast.
|
||||
140
dat/ncdc
140
dat/ncdc
|
|
@ -1,140 +0,0 @@
|
|||
=pod
|
||||
|
||||
Ncdc is a modern and lightweight direct connect client with a friendly
|
||||
ncurses interface.
|
||||
|
||||
|
||||
=head2 Get ncdc!
|
||||
|
||||
=over
|
||||
|
||||
=item Latest version
|
||||
|
||||
1.20 ([dllink ncdc-1.20.tar.gz download]
|
||||
- L<changes|https://dev.yorhel.nl/ncdc/changes>)
|
||||
|
||||
Convenient static binaries for Linux:
|
||||
L<64-bit|https://dev.yorhel.nl/download/ncdc-linux-x86_64-1.20-6-g5111a.tar.gz> -
|
||||
L<32-bit|https://dev.yorhel.nl/download/ncdc-linux-i486-1.20-6-g5111a.tar.gz> -
|
||||
L<ARM|https://dev.yorhel.nl/download/ncdc-linux-arm-1.20-6-g5111a.tar.gz>. Check the
|
||||
L<installation instructions|https://dev.yorhel.nl/ncdc/install> for more info.
|
||||
|
||||
=item Development version
|
||||
|
||||
The latest development version is available from git and can be cloned using
|
||||
C<git clone git://g.blicky.net/ncdc.git>. The repository is available for
|
||||
L<online browsing|http://g.blicky.net/ncdc.git/>.
|
||||
|
||||
=item Requirements
|
||||
|
||||
The following libraries are required: ncurses, zlib, bzip2, sqlite3, glib2 and
|
||||
gnutls.
|
||||
|
||||
Ncdc is entirely written in C and available under a liberal MIT license.
|
||||
|
||||
=item Community
|
||||
|
||||
[html]
|
||||
L<Bug tracker|https://dev.yorhel.nl/ncdc/bug> - For bugs reports, feature requests and patches.<br />
|
||||
C<adcs://dc.blicky.net:2780/> - For real-time chat.
|
||||
É
|
||||
|
||||
=item Packages and ports
|
||||
|
||||
Are available for the following systems:
|
||||
L<Arch Linux|https://aur.archlinux.org/packages/ncdc/> -
|
||||
L<Fedora|https://apps.fedoraproject.org/packages/ncdc/overview/> -
|
||||
L<FreeBSD|http://www.freshports.org/net-p2p/ncdc/> -
|
||||
L<Frugalware|http://frugalware.org/packages?srch=ncdc&op=pkg&arch=all&ver=all> -
|
||||
L<Gentoo|http://packages.gentoo.org/package/net-p2p/ncdc> -
|
||||
L<GNU Guix|https://www.gnu.org/software/guix/package-list.html> -
|
||||
L<Homebrew|http://braumeister.org/formula/ncdc> -
|
||||
L<OpenSUSE|http://packman.links2linux.org/package/ncdc> -
|
||||
L<Source Mage|http://download.sourcemage.org/grimoire/codex/test/ftp/ncdc/>
|
||||
|
||||
I have a few old packages on the L<Open Build
|
||||
Service|https://build.opensuse.org/package/show/home:yorhel/ncdc>,
|
||||
but these are unmaintained. The static binaries are preferred.
|
||||
|
||||
A convenient installer is available for
|
||||
L<Android|http://code.ivysaur.me/ncdcinstaller.html>.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Features
|
||||
|
||||
Common features all modern DC clients (should) have:
|
||||
|
||||
=over
|
||||
|
||||
=item * Connecting to multiple hubs at the same time,
|
||||
|
||||
=item * Support for both ADC and NMDC protocols,
|
||||
|
||||
=item * Chatting and private messaging,
|
||||
|
||||
=item * Browsing the user list of a connected hub,
|
||||
|
||||
=item * Share management and file uploading,
|
||||
|
||||
=item * Connections and download queue management,
|
||||
|
||||
=item * File list browsing,
|
||||
|
||||
=item * TTH-checked, multi-source and segmented file downloading,
|
||||
|
||||
=item * Searching for files,
|
||||
|
||||
=item * Secure hub (adcs:// and nmdcs://) and client connections on both protocols,
|
||||
|
||||
=item * Bandwidth throttling,
|
||||
|
||||
=item * IPv6 support.
|
||||
|
||||
=back
|
||||
|
||||
And special features not commonly found in other clients:
|
||||
|
||||
=over
|
||||
|
||||
=item * Different connection settings for each hub,
|
||||
|
||||
=item * Encrypted UDP messages (ADC SUDP),
|
||||
|
||||
=item * Subdirectory refreshing,
|
||||
|
||||
=item * Nick notification and highlighting in chat windows,
|
||||
|
||||
=item * Trust on First Use for TLS-enabled hubs,
|
||||
|
||||
=item * A single listen port for both TLS and TCP connections,
|
||||
|
||||
=item * Efficient file uploads using sendfile(),
|
||||
|
||||
=item * Large file lists are opened in a background thread,
|
||||
|
||||
=item * Doesn't trash your OS file cache (with the flush_file_cache option enabled),
|
||||
|
||||
=item * (Relatively...) low memory usage.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head2 What doesn't ncdc do?
|
||||
|
||||
Since the above list is getting larger and larger every time, it may be more
|
||||
interesting to list a few features that are (relatively) common in other DC
|
||||
clients, but which ncdc doesn't do. Yet.
|
||||
|
||||
=over
|
||||
|
||||
=item * NAT Traversal,
|
||||
|
||||
=item * OP features (e.g. client detection, file list scanning and other useful stuff for OPs),
|
||||
|
||||
=item * SOCKS support.
|
||||
|
||||
=back
|
||||
|
||||
Of course, there are many more features that could be implemented or improved.
|
||||
These will all be addressed in later versions (hopefully :).
|
||||
|
|
@ -1,396 +0,0 @@
|
|||
1.20 - 2016-12-30
|
||||
- Support bracketed paste mode in input handling (cologic)
|
||||
- Add 'geoip_cc4' and 'geoip_cc6' settings
|
||||
- Add 'log_hubchat' setting
|
||||
- Add 'local' option to 'active_ip' setting
|
||||
- Add support for multistream bzip2 filelists
|
||||
- Disable RC4 ciphers by default from tls_priority
|
||||
- Fix potential null pointer deference
|
||||
- Fix chmod of destination directories (Johannes Beisswenger)
|
||||
|
||||
1.19.1 - 2014-04-23
|
||||
- Fix remote null pointer dereference
|
||||
- Searching now works in the search results list
|
||||
- Fix possible file corruption when moving file to destination
|
||||
- Fix error handling when finalizing a file download
|
||||
- Fix downloading of 0-byte files
|
||||
- Fix extremely slow /gc
|
||||
- Fix sendfile() with large files on 32-bit Linux
|
||||
- Fix minor display issue with multicolumn characters
|
||||
|
||||
1.19 - 2014-02-11
|
||||
- Add search functionality to the file browser and user list (/,. keys)
|
||||
- Add geoip support (requires --with-geoip at configure)
|
||||
- Add 'download_segment' setting to change minimum segment size
|
||||
- Log hashing progress to stderr.log
|
||||
- Fix three (potential) security vulnerabilities
|
||||
- Fix downloading of file lists when other user has no free slots
|
||||
|
||||
1.18.1 - 2013-10-05
|
||||
- Fix crash when downloading files from multiple sources
|
||||
- Use the yxml library to parse files.xml.bz2 files
|
||||
- Fix various XML conformance bugs in parsing files.xml.bz2 files
|
||||
|
||||
1.18 - 2013-09-25
|
||||
- Add support for segmented downloading
|
||||
- Support $MyINFO without flags byte on NMDC hubs
|
||||
- Don't require pod2man on build
|
||||
- Fix tab-completion of nick names when full nick is specified
|
||||
- Fix cursor position on selected line in listings
|
||||
- Fix bug with schema-less /connect
|
||||
|
||||
1.17 - 2013-06-15
|
||||
- Add 'q' key to user list for matching a users' files with download queue
|
||||
- Add transfers.log format documentation to manual page
|
||||
- Consider non-alphanumeric characters as word separators in input line
|
||||
- Fix outgoing UDP messages to respect local_address setting
|
||||
- Fix Alt+Backspace on xterm-like terminals
|
||||
- Fix handling of "." and ".." file/directory names in files.xml.bz2
|
||||
- Fix possible crash when receiving unexpected encrypted search results
|
||||
- Fix sendfile() handling to use fallback on EOVERFLOW
|
||||
- Fix possible crash when logging UDP messages
|
||||
|
||||
1.16.1 - 2013-03-23
|
||||
- Fix crash when opening connection on ADC in passive mode
|
||||
- Fix documentation of 'd' key in download_exclude setting
|
||||
|
||||
1.16 - 2013-03-21
|
||||
- List of granted users is now remembered across restarts
|
||||
- Don't throttle users who are granted a slot
|
||||
- Support CIDs of variable size on ADC
|
||||
- Log, but otherwise ignore, DSTA messages on ADC
|
||||
- Fix possible crash with graceful disconnect on C-C connections
|
||||
- Fix bug with enabling active mode when active_ip is set
|
||||
- Fix reporting of active mode on NMDC hubs
|
||||
- Fix bug with the 'X' key on the queue tab
|
||||
- Fix idle disconnect timeout when a file transfer is active
|
||||
|
||||
1.15 - 2013-03-02
|
||||
- IPv6 support
|
||||
- Significantly shorten certificate creation time with old GnuTLS versions
|
||||
- Always enable tls_policy and sudp_policy by default
|
||||
- Link against libgcrypt if detected GnuTLS is older than 3.0
|
||||
- Add color_tab_active setting
|
||||
- Remove active_tls_port setting
|
||||
- Allow '-', '.' and '_' characters in hub names
|
||||
- Allow spaces before a command
|
||||
- Add Alt+backspace as alias for Ctrl+w
|
||||
- Add throttle for 'CGET tthl' requests
|
||||
- Don't throw away PMs from unknown users
|
||||
- Recognize mode field in $MyINFO without tag
|
||||
- Fix possible crash with C-C TLS and old GnuTLS versions
|
||||
- Fix old references to the removed ncdc-db-upgrade utility
|
||||
- Fix loading of file lists from Shareaza 2.6.0.0 and earlier
|
||||
- Fix handling of tab and carriage return in log window
|
||||
- Fix changing of download_dir/incoming_dir if either dir has been deleted
|
||||
- Fix compilation against glib < 2.26
|
||||
- Fix unclean C-C TLS disconnect on timeout
|
||||
|
||||
1.14 - 2012-11-04
|
||||
- Added BLOM support for ADC ('/hset adc_blom true' to enable it)
|
||||
- Added section on connection settings to man page
|
||||
- Fix incorrect char signedness assumption on ARM
|
||||
- Fix possible crash when downloading small files
|
||||
- Fix hub counts reported to the hub on login on ADC
|
||||
- Fix local time display issue when built against musl (0.9.6)
|
||||
- Removed legacy ncdc-db-upgrade utility
|
||||
|
||||
1.13 - 2012-08-16
|
||||
- zlib library added as a required dependency
|
||||
- Purge empty directories from share by default
|
||||
- Added "share_emptydirs" setting
|
||||
- Disable tls_policy by default when using an old GnuTLS version
|
||||
- Improved support for group chat
|
||||
- Honor G_FILENAME_ENCODING for path autocomplete, /share and queued files
|
||||
- Use a default connection string on NMDC if no 'connection' has been set
|
||||
- Support ZLIG for partial file list transfers on ADC
|
||||
- Send more subdirectories in partial file list transfers
|
||||
- Removed use of system-provided realpath()
|
||||
- Don't allow /search with an empty string
|
||||
- Fix segfault on /search command without query
|
||||
- Fix display of 'sudp_policy' setting if SUDP is not supported
|
||||
- Fix --enable-git-version when cross-compiling
|
||||
|
||||
1.12 - 2012-07-10
|
||||
- Don't follow symlinks in share by default
|
||||
- Added 'share_symlink' option
|
||||
- Added bell notification and 'notify_bell' option
|
||||
- Added 'sudp_policy' setting
|
||||
- List all configured hubs on '/open'
|
||||
- Added '/delhub' command to remove hub configuration
|
||||
- Added filtering options to connections tab
|
||||
- Added TLS support indication to user list
|
||||
- Added Alt+a key to cycle through tabs with recent activity
|
||||
- Allow binding to ports below 1024
|
||||
- Add space after autocompleting a command
|
||||
- Fix uploading chunks of 2GiB and larger (bug #12)
|
||||
- Fix bug with duplicate directory detection in '/share'
|
||||
- Fix display of timer on search tab
|
||||
- ADC: Use shorter search token to save some bandwidth
|
||||
- Various attempts at cleaning up some code
|
||||
|
||||
1.11 - 2012-05-15
|
||||
- Drop libxml2 in favour of custom XML parser & writer
|
||||
- Allow using a single listen port for TCP and TLS
|
||||
- Added support for encrypted UDP messages (ADC SUDP)
|
||||
- Included 'makeheaders' in the distribution
|
||||
- Removed GNU-specific extensions from the Makefile
|
||||
- Fix /disconnect to cancel automatic reconnect
|
||||
- Fix loading of file lists with invalid UTF-8 sequences
|
||||
- Fix ncurses detection on OpenIndiana
|
||||
- Fix use of TLS in passive mode on ADC
|
||||
- Fix configure warning when git could not be found
|
||||
|
||||
1.10 - 2012-05-03
|
||||
- Rewrote network backend to use plain sockets instead of GIO
|
||||
- Added GnuTLS as required dependency
|
||||
- Removed GIO and glib-networking dependencies
|
||||
- Removed 'ncdc-gen-cert' utility - ncdc can now generate certs by itself
|
||||
- Enable client-to-client TLS by default
|
||||
- Added 'tls_priority' setting
|
||||
- Added 'reconnect_timeout' setting
|
||||
- Don't quit ncdc on Ctrl+C
|
||||
- Display age of file list in the title bar
|
||||
- Don't build the 'ncdc-db-upgrade' tool by default
|
||||
- Switched to a single top-level Makefile
|
||||
- Fix '/browse user -f' ('-f' argument after username)
|
||||
- Fix hub login when it checks for public hubs = 0
|
||||
- Fix overflow of long tab titles
|
||||
- Fix loading of microdc2-generated file lists
|
||||
- Fix loading of file lists with an invalid character
|
||||
- Fix occasional crash when TLS is enabled
|
||||
- Fix transfer rate indication and limiting with TLS connections
|
||||
- Fix small memory leak when 'upload_rate' is set
|
||||
|
||||
1.9 - 2012-03-14
|
||||
- Allow all 'active_' settings to be changed on a per-hub basis
|
||||
- Allow 'active_ip' to be unset and automatically get IP from hub
|
||||
- Added 'active_udp_port' and 'active_tcp_port' settings
|
||||
- Renamed 'active_bind' to 'local_address' and use it for outgoing
|
||||
connections as well
|
||||
- Display connection settings in hub info bar
|
||||
- Added '/listen' command to display currently used ports
|
||||
- Don't listen on TLS port when tls_policy is disabled
|
||||
- Added 'disconnect_offline' setting
|
||||
- Display '(global)' indicator when showing /hset variables
|
||||
- Don't strip whitespace from /say
|
||||
- Don't allow directory separator as /share name
|
||||
- Allow 'global.' and '#hubname.' prefix for /set keys
|
||||
- Fix display of long IP addresses on user list
|
||||
|
||||
1.8 - 2012-02-13
|
||||
- Added bandwidth limiting (upload_rate and download_rate settings)
|
||||
- Added hash speed limiting (hash_rate setting)
|
||||
- Added 'm' key to connection tab to /msg selected user
|
||||
- Disable client-to-client TLS by default
|
||||
- Don't throw away some search results on NMDC
|
||||
- (Partially) fixed uploading of >2GB chunks
|
||||
- Fixed file descriptor leak when using the backlog feature
|
||||
- Fixed crash when opening invalid filelist from search twice
|
||||
- Use POD for the manual pages
|
||||
- Minor typo fixes
|
||||
|
||||
1.7 - 2011-12-30
|
||||
- Split /set command in a /set (global) and /hset (hub)
|
||||
- File downloads are performed in a background thread
|
||||
- Added glob-style matching on /set and /hset keys
|
||||
- Added UTF-8 locale check
|
||||
- Added 'sendfile' setting
|
||||
- Added finer granularity for the flush_file_cache setting
|
||||
- Allow flush_file_cache to be enabled for downloads
|
||||
- Fix sending of $MyINFO with wrong public hub count
|
||||
- Fix incorrect inclusion of gdbm.h
|
||||
|
||||
1.6 - 2011-12-07
|
||||
- Use SQLite3 for storage instead of GDBM
|
||||
- Converted config.ini to SQLite3 database
|
||||
- Added ncdc-db-upgrade utility
|
||||
- Session directory is architecture-independent
|
||||
- All data is safe against crashes and power failures
|
||||
- Added support for removing/adding directories without rehashing
|
||||
- Always match every file list on 'Q' key on TTH search
|
||||
- Immediately flush log entries to the kernel
|
||||
- Faster start-up
|
||||
- Added support for per-hub 'active_ip' settings
|
||||
- Allow interval notation when setting autorefresh
|
||||
- Broadcast SF (number of shared files) on ADC hubs
|
||||
- Combine TTH data for downloaded files to blocks of at least 1MiB
|
||||
- Increased hash buffer size (10KiB -> 512KiB)
|
||||
- Fix case-insensitivity of search results
|
||||
- Fix reporting of user state in pm tabs at hub disconnect
|
||||
- Fix generation of client certificates with openssl
|
||||
- Fix segfault with duplicate users on an ADC hub
|
||||
- Fix segfault when opening of a filelist fails
|
||||
- Fix base32 decoding bug (fixes login sequence on some ADC hubs)
|
||||
|
||||
1.5 - 2011-11-03
|
||||
- Added filelist_maxage setting
|
||||
- Added flush_file_cache setting
|
||||
- Added /ungrant and improved /grant management
|
||||
- Added key to download queue to clear user state for all files
|
||||
- Added keys to search results to download file list and match queue
|
||||
- Select the right user when using the 'q' key in connection tab
|
||||
- Fixed possible crash when opening file list from search results
|
||||
- Fixed detection of incompatible session directory version
|
||||
|
||||
1.4 - 2011-10-26
|
||||
- Added sorting functionality to file list
|
||||
- Added color settings: title, separator, list_default, list_header and
|
||||
list_select
|
||||
- Added "blink" color attribute
|
||||
- Allow /disconnect to be used on the main tab
|
||||
- Display number of matched and added items when using match queue feature
|
||||
- Use git-describe to create a version string, if available
|
||||
- Decreased memory usage for large file lists
|
||||
- Handle duplicate filenames in other users' file list
|
||||
- Fixed incorrect setting of the "Incomplete" flag in files.xml.bz2
|
||||
- Fixed handling of the PM param in MSG commands on ADC
|
||||
- Fixed user change notifications for PM tabs
|
||||
|
||||
1.3 - 2011-10-14
|
||||
- Added multi-source downloading
|
||||
- Added user information view and management keys to download queue tab
|
||||
- Added "search for alternative" key to queue, file browser and search tabs
|
||||
- Added "match queue" key to file browser and search tabs
|
||||
- Added ui_time_format setting
|
||||
- Added chat_only setting
|
||||
- Changed default value of color_log_time to dark grey
|
||||
- Improved tracking of a parent for each tab
|
||||
- Improved portability for Solaris
|
||||
- Fixed crash when closing a hub tab while it is connecting
|
||||
- Fixed crash when auto-completing settings without auto-completion
|
||||
- Fixed bug with file name display if download_dir ends with a slash
|
||||
- Fixed bug with uploading chunks larger than 2GiB
|
||||
- Fixed handling of directory search results on ADC
|
||||
|
||||
1.2 - 2011-09-25
|
||||
- Fixed incorrect handling of outgoing NMDC connections
|
||||
|
||||
1.1 - 2011-09-25
|
||||
- Select item in file browser when opened from a search result
|
||||
- Added active_bind setting
|
||||
- Added share_exclude setting
|
||||
- Added download_exclude setting
|
||||
- Added incoming_dir setting
|
||||
- Added autocompletion for the previous values of certain settings
|
||||
- Allow the "connection" setting to be used for ADC as well
|
||||
- Added IP column to user list
|
||||
- Allow sorting on description, email, tag and IP columns in user list
|
||||
- Display upload speeds in the user list of an ADC hub
|
||||
- Added TLS indication to connection list
|
||||
- Mark selected items bold in listings
|
||||
- Allow /reconnect on the main tab to reconnect all hubs
|
||||
- Added slash to base path in partial file lists
|
||||
- Added delay of 5 seconds before reconnecting to a hub
|
||||
- Added recognition of the AP param on ADC
|
||||
- Added support for UserIP2 on NMDC
|
||||
- Removed support for unexpected incoming NMDC connections
|
||||
|
||||
1.0 - 2011-09-16
|
||||
- Added ncdc(1) and ncdc-gen-cert(1) manual pages
|
||||
- Documented settings (/help set <setting>)
|
||||
- Documented key bindings (/help keys)
|
||||
- Improved line wrapping algorithm for the log window
|
||||
- Added support for client-to-client TLS on NMDC
|
||||
- Added support for the CGFI command on ADC
|
||||
- Throttle GET requests on the same file + offset
|
||||
- Fixed glib assertion failure when disabling active mode
|
||||
- Fixed downloading from clients using $ADCSND with -1 bytes
|
||||
- Fixed race condition in file uploading code
|
||||
- Fixed idle time calculation while connecting to another client
|
||||
- Properly include unistd.h in dl.c
|
||||
|
||||
0.9 - 2011-09-03
|
||||
- Added TLS support (adcs://, nmdcs://, and ADC client-to-client)
|
||||
- Added tls_policy setting
|
||||
- Added KEYP support for ADC
|
||||
- Added warning when a hub changes TLS certificate
|
||||
- Display exact listen ports when enabling active mode
|
||||
|
||||
0.8 - 2011-08-26
|
||||
- Added transfer log
|
||||
- Added log_downloads and log_uploads settings
|
||||
- Added day changed indicators to the log windows
|
||||
- Added common readline keys to the text input box
|
||||
- Changed /refresh shortcut from Ctrl+e/u to Alt+r
|
||||
- Allow join messages to work even when the join completion detection fails
|
||||
- Select parent tab when closing a userlist, PM or filelist tab
|
||||
- Re-open log files when receiving SIGUSR1
|
||||
- Perform a clean shutdown when the terminal is closed
|
||||
- Fixed bug in formatting the title of a /search tab
|
||||
- Fixed log indent for non-ASCII nicks
|
||||
- Fixed log highlighting and indenting for /me messages
|
||||
|
||||
0.7 - 2011-08-17
|
||||
- Added word wrapping for the log window
|
||||
- Added basic colors and nick highlighting to the log window
|
||||
- Allow colors to be changed with the /set command
|
||||
- Added backlog feature and setting
|
||||
- Added silent building to the configure script
|
||||
- Automatically re-open log files when they are moved/truncated externally
|
||||
- Accept 'nmdc://' URLs as alternative to 'dchub://'
|
||||
- Fixed spamming of useless $MyINFO and BINF commands every 5 minutes
|
||||
- Fixed minor memory leak when closing/clearing the log window
|
||||
|
||||
0.6 - 2011-08-08
|
||||
- Added file searching, through a /search command
|
||||
- Added tab to display the search results
|
||||
- Listen for incoming messages on UDP in active mode
|
||||
- Allow specifying a hub address with /open
|
||||
- Fixed case-sensitivity of shared files
|
||||
- Various bugfixes and other improvements
|
||||
|
||||
0.5 - 2011-08-02
|
||||
- Downloaded files are now TTH-checked
|
||||
- Added download queue priorities
|
||||
- Download queue items are automatically disabled on error
|
||||
- Improved error handling and reporting for downloads
|
||||
- Added download_slots setting
|
||||
- Use a separate thread to load other users' file list
|
||||
- Improved /gc to also clean up download queue related data
|
||||
- Decreased memory usage for large file lists
|
||||
- Improved error handling with sendfile()
|
||||
- Fixed downloading in passive mode on ADC hubs
|
||||
- Fixed adding a dir to the download queue while connected to the user
|
||||
- Fixed segfault when the userlist is open while disconnecting from a hub
|
||||
|
||||
0.4 - 2011-07-23
|
||||
- Added file downloading support
|
||||
WARNING: Downloaded files are not TTH checked at this moment.
|
||||
- Added persistent download queue
|
||||
- Added busy indicators on start-up and with /gc
|
||||
- Added download speed indicator to status bar
|
||||
- Improved connection list interface
|
||||
- Improved performance of UI message handling
|
||||
- Fixed a remote crash
|
||||
- Fixed incorrect reporting of hub counters
|
||||
|
||||
0.3 - 2011-07-15
|
||||
- Added file list browser
|
||||
- Added downloading of other people's file list
|
||||
- Added 'hubname' setting to rename hub tabs
|
||||
- Added -v, -c and -n commandline options
|
||||
- Added -n option to /open to prevent an autoconnect
|
||||
- Added referer notification
|
||||
- Improved handling of some ADC commands
|
||||
- Improved logging of debug messages
|
||||
- Fixed error when uploading an empty file list
|
||||
- Fixed display of join/quits on ADC hubs
|
||||
- Fixed several crashes
|
||||
|
||||
0.2 - 2011-06-27
|
||||
- ADC support
|
||||
- Added slot granting and /grant command
|
||||
- Added /kick (for NMDC hubs)
|
||||
- Added /pm and /nick aliasses
|
||||
- Added support for passworded login
|
||||
- Added /me command (mostly useful for ADC hubs)
|
||||
- Added /whois command
|
||||
- Added 'share_hidden' option (default: false)
|
||||
- Improved minislots support
|
||||
- Added 'minislots' and 'minislot_size' options
|
||||
- Slightly improved user list and connection list
|
||||
- /set displays default values for unset options
|
||||
|
||||
0.1 - 2011-06-20
|
||||
Initial version
|
||||
201
dat/ncdc-install
201
dat/ncdc-install
|
|
@ -1,201 +0,0 @@
|
|||
|
||||
=head1 General instructions
|
||||
|
||||
=head2 Building from source
|
||||
|
||||
In theory, the following instructions should work everywhere:
|
||||
|
||||
=over
|
||||
|
||||
=item * Install the required dependencies: ncurses, bzip2, zlib, sqlite3, glib2 and gnutls,
|
||||
|
||||
=item * Download and extract the source tarball from the L<homepage|https://dev.yorhel.nl/ncdc>,
|
||||
|
||||
=item * C<./configure>
|
||||
|
||||
=item * C<make>
|
||||
|
||||
=item * And then run C<make install> with superuser permissions.
|
||||
|
||||
=back
|
||||
|
||||
In practice, however, this does not always work and may not always be the
|
||||
prefered method of installation. On this page I try to collect instructions for
|
||||
each OS and distribution to make the installation process a bit easier for
|
||||
everyone.
|
||||
|
||||
If your system is missing from this page or if you're still having trouble,
|
||||
don't hesitate to join the support hub at C<adc://dc.blicky.net/> or send me a
|
||||
mail at L<projects@yorhel.nl|mailto:projects@yorhel.nl>. Contributions to this
|
||||
page are of course highly welcomed as well. :-)
|
||||
|
||||
|
||||
|
||||
=head2 Statically linked binaries
|
||||
|
||||
If you just want to get ncdc running without going through the trouble of
|
||||
compiling and/or installing it, I also offer statically linked binaries:
|
||||
|
||||
=over
|
||||
|
||||
=item * L<Linux, 64-bit|https://dev.yorhel.nl/download/ncdc-linux-x86_64-1.20-6-g5111a.tar.gz>
|
||||
|
||||
=item * L<Linux, 32-bit|https://dev.yorhel.nl/download/ncdc-linux-i486-1.20-6-g5111a.tar.gz>
|
||||
|
||||
=item * L<Linux, ARM|https://dev.yorhel.nl/download/ncdc-linux-arm-1.20-6-g5111a.tar.gz>
|
||||
|
||||
=back
|
||||
|
||||
To use them, simply download and extract the tarball, and then run C<./ncdc> on
|
||||
the command line.
|
||||
|
||||
The binaries include all the required dependencies and are linked against
|
||||
L<musl|http://www.etalabs.net/musl/>, so they should run on any Linux machine
|
||||
with the right architecture. If you want binaries for an other OS or
|
||||
architecture, please bug me and I'll see what I can do.
|
||||
|
||||
|
||||
|
||||
=head1 System-specific instructions
|
||||
|
||||
=head2 Android
|
||||
|
||||
An L<convenient installer|http://code.ivysaur.me/ncdcinstaller.html> is
|
||||
available for Android 2.3 and later, which makes use of the static binary.
|
||||
|
||||
|
||||
=head2 Arch Linux
|
||||
|
||||
Ncdc is available on L<AUR|https://aur.archlinux.org/packages/ncdc/>, to
|
||||
install it you can use your favorite AUR-installer. If you don't have a
|
||||
favorite, go for the manual approach:
|
||||
|
||||
wget https://aur.archlinux.org/cgit/aur.git/snapshot/ncdc.tar.gz
|
||||
tar -xf ncdc.tar.gz
|
||||
cd ncdc
|
||||
makepkg -si
|
||||
|
||||
|
||||
=head2 Fedora
|
||||
|
||||
There's a L<package|https://apps.fedoraproject.org/packages/ncdc/overview/>
|
||||
available for Fedora.
|
||||
|
||||
|
||||
=head2 FreeBSD
|
||||
|
||||
Ncdc is available in the Ports Collection. To install, L<make sure your
|
||||
collection is
|
||||
up-to-date|http://www.freebsd.org/doc/en_US.ISO8859-1/books/handbook/ports-using.html>
|
||||
and install the Port as any other:
|
||||
|
||||
cd /usr/ports/net-p2p/ncdc
|
||||
make install clean
|
||||
|
||||
|
||||
|
||||
=head2 Gentoo
|
||||
|
||||
Ncdc is available in the Portage tree, so installation is trivial:
|
||||
|
||||
emerge ncdc
|
||||
|
||||
|
||||
|
||||
=head2 Mac OS X
|
||||
|
||||
Ncdc is available in L<Homebrew|http://braumeister.org/formula/ncdc>.
|
||||
|
||||
|
||||
=head2 OpenIndiana
|
||||
|
||||
This has been tested on OpenIndiana Build 151a Server, but may work on other
|
||||
versions as well. Compiling from source is your only option at the moment.
|
||||
First install some required packages (as root):
|
||||
|
||||
pkg install gcc-3 glib2 gnutls gettext header-math perl-510/extra
|
||||
|
||||
Then, fetch the ncdc source tarball, extract and build as follows:
|
||||
|
||||
wget https://dev.yorhel.nl/download/ncdc-1.20.tar.gz
|
||||
tar -xf ncdc-1.20.tar.gz
|
||||
cd ncdc-1.20
|
||||
export PATH="$PATH:/usr/perl5/5.10.0/bin"
|
||||
./configure --prefix=/usr LDFLAGS='-L/usr/gnu/lib -R/usr/gnu/lib'
|
||||
make
|
||||
|
||||
And finally, to actually install ncdc, run C<make install> as root. You can
|
||||
safely revert C<$PATH> back to its previous value if you wish, it was only
|
||||
necessary in order for C<./configure> and C<make> to find C<pod2man>.
|
||||
|
||||
|
||||
|
||||
=head2 OpenSUSE
|
||||
|
||||
Get the package from L<PackMan|http://packman.links2linux.org/package/ncdc>:
|
||||
Select your openSUSE release and hit the "1 click install" button.
|
||||
|
||||
|
||||
|
||||
=head2 Ubuntu & Debian
|
||||
|
||||
The preferred way of installing ncdc on Ubuntu or Debian is to use the static
|
||||
binaries provided above.
|
||||
|
||||
Alternatively, you can also try to compile ncdc from source. To do so, first
|
||||
install the required libraries:
|
||||
|
||||
sudo apt-get install libbz2-dev libsqlite3-dev libncurses5-dev\
|
||||
libncursesw5-dev libglib2.0-dev libgnutls-dev zlib1g-dev
|
||||
|
||||
Then run the following commands to download and install ncdc:
|
||||
|
||||
wget https://dev.yorhel.nl/download/ncdc-1.20.tar.gz
|
||||
tar -xf ncdc-1.20.tar.gz
|
||||
cd ncdc-1.20
|
||||
./configure --prefix=/usr
|
||||
make
|
||||
sudo make install
|
||||
|
||||
|
||||
|
||||
=head2 Windows (Cygwin)
|
||||
|
||||
Surprisingly enough, ncdc can be used even on Windows, thanks to Cygwin. If
|
||||
you haven't done so already, get C<setup.exe> from the L<Cygwin
|
||||
website|http://cygwin.com/> and use it to install the following packages:
|
||||
|
||||
=over
|
||||
|
||||
=item * make
|
||||
|
||||
=item * gcc4
|
||||
|
||||
=item * perl
|
||||
|
||||
=item * pkg-config
|
||||
|
||||
=item * wget
|
||||
|
||||
=item * zlib-devel
|
||||
|
||||
=item * libncursesw-devel
|
||||
|
||||
=item * libbz2-devel
|
||||
|
||||
=item * libglib2.0-devel
|
||||
|
||||
=item * libsqlite3-devel
|
||||
|
||||
=item * gnutls-devel
|
||||
|
||||
=back
|
||||
|
||||
Then open a Cygwin terminal and run the following commands to download,
|
||||
compile, and install ncdc:
|
||||
|
||||
wget https://dev.yorhel.nl/download/ncdc-1.20.tar.gz
|
||||
tar -xf ncdc-1.20.tar.gz
|
||||
cd ncdc-1.20
|
||||
./configure --prefix=/usr
|
||||
make install
|
||||
936
dat/ncdc-man
936
dat/ncdc-man
|
|
@ -1,936 +0,0 @@
|
|||
|
||||
=head1 NAME
|
||||
|
||||
ncdc - Ncurses Direct Connect Client
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
ncdc [options]
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Ncdc is a modern and lightweight direct connect client with a friendly ncurses
|
||||
interface.
|
||||
|
||||
=head1 GETTING STARTED
|
||||
|
||||
This is a basic introduction for those who are new to ncdc. See the chapters
|
||||
below for a more detailed description of the available functionality.
|
||||
|
||||
What you see when starting up ncdc is an input line where you can input
|
||||
commands and a log window where the results are displayed, much like a regular
|
||||
terminal. Commands within ncdc start with a slash (e.g. C</help>) and have tab
|
||||
completion to help you.
|
||||
|
||||
The first thing you will want to do after starting ncdc for the first time is
|
||||
to setup some basic information and settings:
|
||||
|
||||
/set nick MyNick
|
||||
/set description ncdc is awesome!
|
||||
/set connection 10
|
||||
/share "My Awesome Files" /path/to/files
|
||||
|
||||
And if you have a direct connection to the internet or if your router allows
|
||||
port forwarding, you may also want to enable active mode:
|
||||
|
||||
/set active_port 34194
|
||||
/set active true
|
||||
|
||||
See the help text for each of the commands and settings for more information.
|
||||
Of course, all of the above settings are saved to the database and will be used
|
||||
again on the next run.
|
||||
|
||||
To connect to a hub, use /open:
|
||||
|
||||
/open ncdc adc://dc.blicky.net:2780/
|
||||
|
||||
Here I<ncdc> is the personal name you give to the hub, and the second argument
|
||||
the URL. This URL will be saved in the database, so the next time you want to
|
||||
connect to this hub, you can simply do C</open ncdc>. See the help text for
|
||||
C</open> and C</connect> for more information. If you want to automatically
|
||||
connect to a hub when ncdc starts up, use the C<autoconnect> setting.
|
||||
|
||||
Ncdc uses a tabbed interface: every hub opens in a new tab, and there are
|
||||
several other kinds of tabs available as well. The type of tab is indicated in
|
||||
the tab list on the bottom of the screen with a character prefix. Hubs, for
|
||||
example, are prefixed with a C<#>. If a tab needs your attention, a colored
|
||||
exclamation mark is displayed before the tab name, different colors are used
|
||||
for different types of activity.
|
||||
|
||||
Everything else should be fairly self-explanatory: To search for files, use the
|
||||
C</search> command. To browse through the user list of a hub, use C</userlist> or
|
||||
hit Alt+u. To browse someone's file list, use C</browse> or hit the 'b' key in
|
||||
the user list. And to monitor your upload and download connections, use
|
||||
C</connections> or hit Alt+n.
|
||||
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=over
|
||||
|
||||
=item B<-c, --session-dir=> I<dir>
|
||||
|
||||
Use a different session directory. Defaults to the contents of the environment
|
||||
variable `$NCDC_DIR' or if this is unset to `$HOME/.ncdc'.
|
||||
|
||||
=item B<-h, --help>
|
||||
|
||||
Display summary of options.
|
||||
|
||||
=item B<-n, --no-autoconnect>
|
||||
|
||||
Don't automatically connect to hubs with the C<autoconnect> option set.
|
||||
|
||||
=item B<--no-bracketed-paste>
|
||||
|
||||
Disable bracketed pasting.
|
||||
|
||||
=item B<-v, --version>
|
||||
|
||||
Display ncdc version.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head1 GETTING CONNECTED
|
||||
|
||||
As with most file sharing clients, ncdc supports two modes of being connected:
|
||||
I<active> and I<passive>. In passive mode (the default), you can connect to the
|
||||
outside world but nobody can connect (directly) to you. When passive, you will
|
||||
only be able to transfer files with people who are in active mode. In active
|
||||
mode, however, you will have some port open to the rest of the network to which
|
||||
other clients can connect. When active, you will be able to transfer files with
|
||||
everyone and you may get more and faster search results. Configuring active
|
||||
mode is therefore recommended.
|
||||
|
||||
In many setups, all you need to do to switch to active mode is to set a TCP/UDP
|
||||
port and enable the C<active> setting:
|
||||
|
||||
/set active_port 34194
|
||||
/set active true
|
||||
|
||||
When you connect to a hub, the status bar will tell you whether you are active
|
||||
or passive on that particular hub, and what IP address is being used to allow
|
||||
others to connect to you. For most hubs, your IP address will be detected
|
||||
automatically, but in the event that this fails, you can also set it yourself:
|
||||
|
||||
/set active_ip 13.33.33.7
|
||||
|
||||
If you are behind a NAT or firewall, you have to ensure that the port you
|
||||
configured is somehow allowed and/or forwarded. The C<active_port> setting is
|
||||
used for incoming TCP connections and UDP messages. You can configure a
|
||||
different UDP port with the C<active_udp_port> setting. Contrary to many toher
|
||||
Direct Connect clients, ncdc only uses a single port for incoming TCP and TLS
|
||||
connections; There is no separate port for TLS.
|
||||
|
||||
The C</listen> command can tell you which ports it expects to be forwarded, and
|
||||
for which hubs these ports will be used. It only lists hubs on which you are
|
||||
currently active, so the output will change when you open or close a hub
|
||||
connection.
|
||||
|
||||
If you have multiple network interfaces, you can force ncdc to use only a
|
||||
single interface by setting the C<local_address> setting to the address of that
|
||||
interface. This affects both outgoing connections (they will be forced to go
|
||||
through the configured interface) and incoming connections (the ports will be
|
||||
bound to the configured interface).
|
||||
|
||||
All of the previously mentioned settings can be set globally (with C</set>) and
|
||||
on a per-hub basis (with C</hset>). This allows you to be active on an internet
|
||||
hub and a LAN-only hub at the same time. It also allows you to be active in one
|
||||
hub while passive in another, or to use different ports for each hub.
|
||||
|
||||
|
||||
=head1 INTERACTIVE COMMANDS
|
||||
|
||||
The following is the list of commands that can be used within ncdc. The /help
|
||||
command can also be used get a list of available commands and to access this
|
||||
documentation.
|
||||
|
||||
=over
|
||||
|
||||
=item B</accept>
|
||||
|
||||
Use this command to accept the TLS certificate of a hub. This command is used only in the case the keyprint of the TLS certificate of a hub does not match the keyprint stored in the database.
|
||||
|
||||
=item B</browse> [[-f] <user>]
|
||||
|
||||
Without arguments, this opens a new tab where you can browse your own file list. Note that changes to your list are not immediately visible in the browser. You need to re-open the tab to get the latest version of your list.
|
||||
|
||||
With arguments, the file list of the specified user will be downloaded (if it has not been downloaded already) and the browse tab will open once it's complete. The `-f' flag can be used to force the file list to be (re-)downloaded.
|
||||
|
||||
=item B</clear>
|
||||
|
||||
Clears the log displayed on the screen. Does not affect the log files in any way. Ctrl+l is a shortcut for this command.
|
||||
|
||||
=item B</close>
|
||||
|
||||
Close the current tab. When closing a hub tab, you will be disconnected from the hub and all related userlist and PM tabs will also be closed. Alt+c is a shortcut for this command.
|
||||
|
||||
=item B</connect> [<address>]
|
||||
|
||||
Initiate a connection with a hub. If no address is specified, will connect to the hub you last used on the current tab. The address should be in the form of `protocol://host:port/' or `host:port'. The `:port' part is in both cases optional and defaults to :411. The following protocols are recognized: dchub, nmdc, nmdcs, adc, adcs. When connecting to an nmdcs or adcs hub and the SHA256 keyprint is known, you can attach this to the url as `?kp=SHA256/<base32-encoded-keyprint>'
|
||||
|
||||
Note that this command can only be used on hub tabs. If you want to open a new connection to a hub, you need to use /open first. For example:
|
||||
|
||||
/open testhub
|
||||
/connect dchub://dc.some-test-hub.com/
|
||||
|
||||
See the /open command for more information.
|
||||
|
||||
=item B</connections>
|
||||
|
||||
Open the connections tab.
|
||||
|
||||
=item B</delhub> <name>
|
||||
|
||||
Remove a hub from the configuration
|
||||
|
||||
=item B</disconnect>
|
||||
|
||||
Disconnect from a hub.
|
||||
|
||||
=item B</gc>
|
||||
|
||||
Cleans up unused data and reorganizes existing data to allow more efficient storage and usage. Currently, this commands removes unused hash data, does a VACUUM on db.sqlite3, removes unused files in inc/ and old files in fl/.
|
||||
|
||||
This command may take some time to complete, and will fully block ncdc while it is running. It is recommended to run this command every once in a while. Every month is a good interval. Note that when ncdc says that it has completed this command, it's lying to you. Ncdc will still run a few large queries on the background, which may take up to a minute to complete.
|
||||
|
||||
=item B</grant> [-list|<user>]
|
||||
|
||||
Grant someone a slot. This allows the user to download from you even if you have no free slots. The slot will remain granted until the /ungrant command is used, even if ncdc has been restarted in the mean time.
|
||||
|
||||
To get a list of users whom you have granted a slot, use `/grant' without arguments or with `-list'. Be warned that using `/grant' without arguments on a PM tab will grant the slot to the user you are talking with. Make sure to use `-list' in that case.
|
||||
|
||||
Note that a granted slot is specific to a single hub. If the same user is also on other hubs, he/she will not be granted a slot on those hubs.
|
||||
|
||||
=item B</help> [<command>|set <key>|keys [<section>]]
|
||||
|
||||
To get a list of available commands, use /help without arguments.
|
||||
To get information on a particular command, use /help <command>.
|
||||
To get information on a configuration setting, use /help set <setting>.
|
||||
To get help on key bindings, use /help keys.
|
||||
|
||||
|
||||
=item B</hset> [<key> [<value>]]
|
||||
|
||||
Get or set per-hub configuration variables. Works equivalent to the `/set' command, but can only be used on hub tabs. Use `/hunset' to reset a variable back to its global value.
|
||||
|
||||
=item B</hunset> [<key>]
|
||||
|
||||
This command can be used to reset a per-hub configuration variable back to its global value.
|
||||
|
||||
=item B</kick> <user>
|
||||
|
||||
Kick a user from the hub. This command only works on NMDC hubs, and you need to be an OP to be able to use it.
|
||||
|
||||
=item B</listen>
|
||||
|
||||
List currently opened ports.
|
||||
|
||||
=item B</me> <message>
|
||||
|
||||
This allows you to talk in third person. Most clients will display your message as something like:
|
||||
|
||||
** Nick is doing something
|
||||
|
||||
Note that this command only works correctly on ADC hubs. The NMDC protocol does not have this feature, and your message will be sent as-is, including the /me.
|
||||
|
||||
=item B</msg> <user> [<message>]
|
||||
|
||||
Send a private message to a user on the currently opened hub. If no message is given, the tab will be opened but no message will be sent.
|
||||
|
||||
=item B</nick> [<nick>]
|
||||
|
||||
Alias for `/hset nick' on hub tabs, and `/set nick' otherwise.
|
||||
|
||||
=item B</open> [-n] [<name>] [<address>]
|
||||
|
||||
Without arguments, list all hubs known by the current configuration. Otherwise, this opens a new tab to use for a hub. The name is a (short) personal name you use to identify the hub, and will be used for storing hub-specific configuration.
|
||||
|
||||
If you have specified an address or have previously connected to a hub from a tab with the same name, /open will automatically connect to the hub. Use the `-n' flag to disable this behaviour.
|
||||
|
||||
See /connect for more information on connecting to a hub.
|
||||
|
||||
=item B</password> <password>
|
||||
|
||||
This command can be used to send a password to the hub without saving it to the database. If you wish to login automatically without having to type /password every time, use '/hset password <password>'. Be warned, however, that your password will be saved unencrypted in that case.
|
||||
|
||||
=item B</pm> <user> [<message>]
|
||||
|
||||
Alias for /msg
|
||||
|
||||
=item B</queue>
|
||||
|
||||
Open the download queue.
|
||||
|
||||
=item B</quit>
|
||||
|
||||
Quit ncdc.
|
||||
|
||||
=item B</reconnect>
|
||||
|
||||
Reconnect to the hub. When your nick or the hub encoding have been changed, the new settings will be used after the reconnect.
|
||||
|
||||
This command can also be used on the main tab, in which case all connected hubs will be reconnected.
|
||||
|
||||
=item B</refresh> [<path>]
|
||||
|
||||
Initiates share refresh. If no argument is given, the complete list will be refreshed. Otherwise only the specified directory will be refreshed. The path argument can be either an absolute filesystem path or a virtual path within your share.
|
||||
|
||||
=item B</say> <message>
|
||||
|
||||
Sends a chat message to the current hub or user. You normally don't have to use the /say command explicitly, any command not staring with '/' will automatically imply `/say <command>'. For example, typing `hello.' in the command line is equivalent to `/say hello.'. Using the /say command explicitly may be useful to send message starting with '/' to the chat, for example `/say /help is what you are looking for'.
|
||||
|
||||
=item B</search> [options] <query>
|
||||
|
||||
Performs a file search, opening a new tab with the results.
|
||||
|
||||
Available options:
|
||||
|
||||
-hub Search the current hub only. (default)
|
||||
-all Search all connected hubs, except those with `chat_only' set.
|
||||
-le <s> Size of the file must be less than <s>.
|
||||
-ge <s> Size of the file must be larger than <s>.
|
||||
-t <t> File must be of type <t>. (see below)
|
||||
-tth <h> TTH root of this file must match <h>.
|
||||
|
||||
File sizes (<s> above) accept the following suffixes: G (GiB), M (MiB) and K (KiB).
|
||||
|
||||
The following file types can be used with the -t option:
|
||||
|
||||
1 any Any file or directory. (default)
|
||||
2 audio Audio files.
|
||||
3 archive (Compressed) archives.
|
||||
4 doc Text documents.
|
||||
5 exe Windows executables.
|
||||
6 img Image files.
|
||||
7 video Video files.
|
||||
8 dir Directories.
|
||||
|
||||
Note that file type matching is done using file extensions, and is not very reliable.
|
||||
|
||||
=item B</set> [<key> [<value>]]
|
||||
|
||||
Get or set global configuration variables. Use without arguments to get a list of all global settings and their current value. Glob-style pattern matching on the settings is also possible. Use, for example, `/set color*' to list all color-related settings.
|
||||
|
||||
See the `/unset' command to change a setting back to its default, and the `/hset' command to manage configuration on a per-hub basis. Changes to the settings are automatically saved to the database, and will not be lost after restarting ncdc.
|
||||
|
||||
To get information on a particular setting, use `/help set <key>'.
|
||||
|
||||
=item B</share> [<name> <path>]
|
||||
|
||||
Use /share without arguments to get a list of shared directories.
|
||||
When called with a name and a path, the path will be added to your share. Note that shell escaping may be used in the name. For example, to add a directory with the name `Fun Stuff', you could do the following:
|
||||
|
||||
/share "Fun Stuff" /path/to/fun/stuff
|
||||
|
||||
Or:
|
||||
|
||||
/share Fun\ Stuff /path/to/fun/stuff
|
||||
|
||||
The full path to the directory will not be visible to others, only the name you give it will be public. An initial `/refresh' is done automatically on the added directory.
|
||||
|
||||
=item B</ungrant> [<user>]
|
||||
|
||||
Revoke a granted slot.
|
||||
|
||||
=item B</unset> [<key>]
|
||||
|
||||
This command can be used to reset a global configuration variable back to its default value.
|
||||
|
||||
=item B</unshare> [<name>]
|
||||
|
||||
To remove a single directory from your share, use `/unshare <name>', to remove all directories from your share, use `/unshare /'.
|
||||
|
||||
Note that the hash data associated with the removed files will remain in the database. This allows you to re-add the files to your share without needing to re-hash them. The downside is that the database file may grow fairly large with unneeded information. See the `/gc' command to clean that up.
|
||||
|
||||
=item B</userlist>
|
||||
|
||||
Opens the user list of the currently selected hub. Can also be accessed using Alt+u.
|
||||
|
||||
=item B</version>
|
||||
|
||||
Display version information.
|
||||
|
||||
=item B</whois> <user>
|
||||
|
||||
This will open the user list and select the given user.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
|
||||
|
||||
=head1 SETTINGS
|
||||
|
||||
The following is a list of configuration settings. These settings can be
|
||||
changed and queried using the C</set> command for global settings and C</hset>
|
||||
for hub-local settings. All configuration data is stored in the db.sqlite3 file
|
||||
in the session directory.
|
||||
|
||||
=over
|
||||
|
||||
=item B<active> <boolean>
|
||||
|
||||
Enables or disables active mode. You may have to configure your router and/or firewall for this to work, see the `active_ip' and `active_port' settings for more information.
|
||||
|
||||
=item B<active_ip> <string>
|
||||
|
||||
Your public IP address for use in active mode. If this is not set or set to '0.0.0.0' for IPv4 or '::' for IPv6, then ncdc will try to automatically get your IP address from the hub. If you do set this manually, it is important that other clients can reach you using this IP address. If you connect to a hub on the internet, this should be your internet (WAN) IP. Likewise, if you connect to a hub on your LAN, this should be your LAN IP.
|
||||
|
||||
Both an IPv4 and an IPv6 address are set by providing two IP addresses separated with a comma. When unset, '0.0.0.0,::' is assumed. Only the IP version used to connect to the hub is used. That is, if you connect to an IPv6 hub, then the configured IPv6 address is used and the IPv4 address is ignored.
|
||||
|
||||
When set to the special value `local', ncdc will automatically get your IP address from the local network interface that is used to connect to the hub. This option should only be used if there is no NAT between you and the hub, because this will give the wrong IP if you are behind a NAT.
|
||||
|
||||
=item B<active_port> <integer>
|
||||
|
||||
The listen port for incoming connections in active mode. Set to `0' to automatically assign a random port. This setting is by default also used for the UDP port, see the `active_tls_port' settings to change that. If you are behind a router or firewall, make sure that you have configured it to forward and allow these ports.
|
||||
|
||||
=item B<active_udp_port> <integer>
|
||||
|
||||
The listen port for incoming UDP connections in active mode. Defaults to the `active_port' setting, or to a random number if `active_port' is not set.
|
||||
|
||||
=item B<adc_blom> <boolean>
|
||||
|
||||
Whether to support the BLOM extension on ADC hubs. This may decrease the bandwidth usage on the hub connection, in exchange for a bit of computational overhead. Some hubs require this setting to be enabled. This setting requires a reconnect with the hub to be active.
|
||||
|
||||
=item B<autoconnect> <boolean>
|
||||
|
||||
Set to true to automatically connect to the current hub when ncdc starts up.
|
||||
|
||||
=item B<autorefresh> <interval>
|
||||
|
||||
The time between automatic file refreshes. Recognized suffices are 's' for seconds, 'm' for minutes, 'h' for hours and 'd' for days. Set to 0 to disable automatically refreshing the file list. This setting also determines whether ncdc will perform a refresh on startup. See the `/refresh' command to manually refresh your file list.
|
||||
|
||||
=item B<backlog> <integer>
|
||||
|
||||
When opening a hub or PM tab, ncdc can load a certain amount of lines from the log file into the log window. Setting this to a positive value enables this feature and configures the number of lines to load. Note that, while this setting can be set on a per-hub basis, PM windows will use the global value (global.backlog).
|
||||
|
||||
=item B<chat_only> <boolean>
|
||||
|
||||
Set to true to indicate that this hub is only used for chatting. That is, you won't or can't download from it. This setting affects the /search command when it is given the -all option.
|
||||
|
||||
=item B<color_*> <color>
|
||||
|
||||
The settings starting with the `color_' prefix allow you to change the interface colors. The following is a list of available color settings:
|
||||
|
||||
list_default - default item in a list
|
||||
list_header - header of a list
|
||||
list_select - selected item in a list
|
||||
log_default - default log color
|
||||
log_time - the time prefix in log messages
|
||||
log_nick - default nick color
|
||||
log_highlight - nick color of a highlighted line
|
||||
log_ownnick - color of your own nick
|
||||
log_join - color of join messages
|
||||
log_quit - color of quit messages
|
||||
separator - the list separator/footer bar
|
||||
tab_active - the active tab in the tab list
|
||||
tabprio_low - low priority tab notification color
|
||||
tabprio_med - medium priority tab notification color
|
||||
tabprio_high - high priority tab notification color
|
||||
title - the title bar
|
||||
|
||||
The actual color value can be set with a comma-separated list of color names and/or attributes. The first color in the list is the foreground color, the second color is used for the background. When the fore- or background color is not specified, the default colors of your terminal will be used.
|
||||
The following color names can be used: black, blue, cyan, default, green, magenta, red, white and yellow.
|
||||
The following attributes can be used: bold, blink, reverse and underline.
|
||||
The actual color values displayed by your terminal may vary. Adding the `bold' attribute usually makes the foreground color appear brighter as well.
|
||||
|
||||
=item B<connection> <string>
|
||||
|
||||
Set your upload speed. This is just an indication for other users in the hub so that they know what speed they can expect when downloading from you. The actual format you can use here may vary, but it is recommended to set it to either a plain number for Mbit/s (e.g. `50' for 50 mbit) or a number with a `KiB/s' indicator (e.g. `2300 KiB/s'). On ADC hubs you must use one of the previously mentioned formats, otherwise no upload speed will be broadcasted. This setting is broadcasted as-is on NMDC hubs, to allow for using old-style connection values (e.g. `DSL' or `Cable') on hubs that require this.
|
||||
|
||||
This setting is ignored if `upload_rate' has been set. If it is, that value is broadcasted instead.
|
||||
|
||||
=item B<description> <string>
|
||||
|
||||
A short public description that will be displayed in the user list of a hub.
|
||||
|
||||
=item B<disconnect_offline> <boolean>
|
||||
|
||||
Automatically disconnect any upload or download transfers when a user leaves the hub, or when you leave the hub. Setting this to `true' ensures that you are only connected with people who are online on the same hubs as you are.
|
||||
|
||||
=item B<download_dir> <path>
|
||||
|
||||
The directory where finished downloads are moved to. Finished downloads are by default stored in <session directory>/dl/. It is possible to set this to a location that is on a different filesystem than the incoming directory, but doing so is not recommended: ncdc will block when moving the completed files to their final destination.
|
||||
|
||||
=item B<download_exclude> <regex>
|
||||
|
||||
When recursively adding a directory to the download queue - by pressing `d' on a directory in the file list browser - any item in the selected directory with a name that matches this regular expression will not be added to the download queue.
|
||||
|
||||
This regex is not checked when adding individual files from either the file list browser or the search results.
|
||||
|
||||
=item B<download_rate> <speed>
|
||||
|
||||
Maximum combined transfer rate of all downloads. The total download speed will be limited to this value. The suffixes `G', 'M', and 'K' can be used for GiB/s, MiB/s and KiB/s, respectively. Note that, similar to upload_rate, TCP overhead are not counted towards this limit, so the actual bandwidth usage might be a little higher.
|
||||
|
||||
=item B<download_segment> <size>
|
||||
|
||||
Minimum segment size to use when requesting file data from another user. Set to 0 to disable segmented downloading.
|
||||
|
||||
=item B<download_slots> <integer>
|
||||
|
||||
Maximum number of simultaneous downloads.
|
||||
|
||||
=item B<email> <string>
|
||||
|
||||
Your email address. This will be displayed in the user list of the hub, so only set this if you want it to be public.
|
||||
|
||||
=item B<encoding> <string>
|
||||
|
||||
The character set/encoding to use for hub and PM messages. This setting is only used on NMDC hubs, ADC always uses UTF-8. Some common values are:
|
||||
|
||||
CP1250 (Central Europe)
|
||||
CP1251 (Cyrillic)
|
||||
CP1252 (Western Europe)
|
||||
ISO-8859-7 (Greek)
|
||||
KOI8-R (Cyrillic)
|
||||
UTF-8 (International)
|
||||
|
||||
=item B<filelist_maxage> <interval>
|
||||
|
||||
The maximum age of a downloaded file list. If a file list was downloaded longer ago than the configured interval, it will be removed from the cache (the fl/ directory) and subsequent requests to open the file list will result in the list being downloaded from the user again. Recognized suffices are 's' for seconds, 'm' for minutes, 'h' for hours and 'd' for days. Set to 0 to disable the cache altogether.
|
||||
|
||||
=item B<flush_file_cache> <none|upload|download|hash>[,...]
|
||||
|
||||
Tell the OS to flush the file (disk) cache for file contents read while hashing and/or uploading or written to while downloading. On one hand, this will avoid trashing your disk cache with large files and thus improve the overall responsiveness of your system. On the other hand, ncdc may purge any shared files from the cache, even if they are still used by other applications. In general, it is a good idea to enable this if you also use your system for other things besides ncdc, you share large files (>100MB) and people are not constantly downloading the same file from you.
|
||||
|
||||
=item B<geoip_cc4> <path>|disabled
|
||||
|
||||
Path to the GeoIP Country database file for IPv4, or 'disabled' to disable GeoIP lookup for IPv4 addresses.
|
||||
|
||||
=item B<geoip_cc6> <path>|disabled
|
||||
|
||||
Path to the GeoIP Country database file for IPv6, or 'disabled' to disable GeoIP lookup for IPv6 addresses.
|
||||
|
||||
=item B<hash_rate> <speed>
|
||||
|
||||
Maximum file hashing speed. See the `download_rate' setting for allowed formats for this setting.
|
||||
|
||||
=item B<hubname> <string>
|
||||
|
||||
The name of the currently opened hub tab. This is a user-assigned name, and is only used within ncdc itself. This is the same name as given to the `/open' command.
|
||||
|
||||
=item B<incoming_dir> <path>
|
||||
|
||||
The directory where incomplete downloads are stored. This setting can only be changed when the download queue is empty. Also see the download_dir setting.
|
||||
|
||||
=item B<local_address> <string>
|
||||
|
||||
Specifies the address of the local network interface to use for connecting to the outside and for accepting incoming connections in active mode. Both an IPv4 and an IPv6 address are set by providing two IP addresses separated with a comma. When unset, '0.0.0.0,::' is assumed.
|
||||
|
||||
If no IPv4 address is specified, '0.0.0.0' is added automatically. Similarly, if no IPv6 address is specified, '::' is added automatically. The address that is actually used depends on the IP version actually used. That is, if you're on an IPv6 hub, then ncdc will listen on the specified IPv6 address. Note that, even if the hub you're on is on IPv6, ncdc may still try to connect to another client over IPv4, at which point the socket will be bound to the configured IPv4 address.
|
||||
|
||||
=item B<log_debug> <boolean>
|
||||
|
||||
Log debug messages to stderr.log in the session directory. It is highly recommended to enable this setting if you wish to debug or hack ncdc. Be warned, however, that this may generate a lot of data if you're connected to a large hub.
|
||||
|
||||
=item B<log_downloads> <boolean>
|
||||
|
||||
Log downloaded files to transfers.log.
|
||||
|
||||
=item B<log_hubchat> <boolean>
|
||||
|
||||
Log the main hub chat. Note that changing this requires any affected hub tabs to be closed and reopened before the change is effective.
|
||||
|
||||
=item B<log_uploads> <boolean>
|
||||
|
||||
Log file uploads to transfers.log.
|
||||
|
||||
=item B<minislots> <integer>
|
||||
|
||||
Set the number of available minislots. A `minislot' is a special slot that is used when all regular upload slots are in use and someone is requesting your filelist or a small file. In this case, the other client automatically applies for a minislot, and can still download from you as long as not all minislots are in use. What constitutes a `small' file can be changed with the `minislot_size' setting. Also see the `slots' configuration setting and the `/grant' command.
|
||||
|
||||
=item B<minislot_size> <integer>
|
||||
|
||||
The maximum size of a file that may be downloaded using a `minislot', in KiB. See the `minislots' setting for more information.
|
||||
|
||||
=item B<nick> <string>
|
||||
|
||||
Your nick. Nick changes are only visible on newly connected hubs, use the `/reconnect' command to use your new nick immediately. Note that it is highly discouraged to change your nick on NMDC hubs. This is because clients downloading from you have no way of knowing that you changed your nick, and therefore can't immediately continue to download from you.
|
||||
|
||||
=item B<notify_bell> <disable|low|medium|high>
|
||||
|
||||
When enabled, ncdc will send a bell to your terminal when a tab indicates a notification. The notification types are:
|
||||
|
||||
high - Messages directed to you (PM or highlight in hub chat),
|
||||
medium - Regular hub chat,
|
||||
low - User joins/quits, new search results, etc.
|
||||
|
||||
How a "bell" (or "beep" or "alert", whatever you prefer to call it) manifests itself depends on your terminal. In some setups, this generates an audible system bell. In other setups it can makes your terminal window flash or do other annoying things to get your attention. And in some setups it is ignored completely.
|
||||
|
||||
=item B<password> <string>
|
||||
|
||||
Sets your password for the current hub and enables auto-login on connect. If you just want to login to a hub without saving your password, use the `/password' command instead. Passwords are saved unencrypted in the config file.
|
||||
|
||||
=item B<reconnect_timeout> <interval>
|
||||
|
||||
The time to wait before automatically reconnecting to a hub. Set to 0 to disable automatic reconnect.
|
||||
|
||||
=item B<sendfile> <boolean>
|
||||
|
||||
Whether or not to use the sendfile() system call to upload files, if supported. Using sendfile() allows less resource usage while uploading, but may not work well on all systems.
|
||||
|
||||
=item B<share_emptydirs> <boolean>
|
||||
|
||||
Share empty directories. When disabled (the default), empty directories in your share will not be visible to others. This also affects empty directories containing only empty directories, etc. A file list refresh is required for this setting to be effective.
|
||||
|
||||
=item B<share_exclude> <regex>
|
||||
|
||||
Any file or directory with a name that matches this regular expression will not be shared. A file list refresh is required for this setting to be effective.
|
||||
|
||||
=item B<share_hidden> <boolean>
|
||||
|
||||
Whether to share hidden files and directories. A `hidden' file or directory is one of which the file name starts with a dot. (e.g. `.bashrc'). A file list refresh is required for this setting to be effective.
|
||||
|
||||
=item B<share_symlinks> <boolean>
|
||||
|
||||
Whether to follow symlinks in shared directories. When disabled (default), ncdc will never share any files outside of the directory you specified. When enabled, any symlinks in your shared directories will be followed, even when they point to a directory outside your share.
|
||||
|
||||
=item B<show_joinquit> <boolean>
|
||||
|
||||
Whether to display join/quit messages in the hub chat.
|
||||
|
||||
=item B<slots> <integer>
|
||||
|
||||
The number of upload slots. This determines for the most part how many people can download from you simultaneously. It is possible that this limit is exceeded in certain circumstances, see the `minislots' setting and the `/grant' command.
|
||||
|
||||
=item B<sudp_policy> <disabled|allow|prefer>
|
||||
|
||||
Set the policy for sending or receiving encrypted UDP search results. When set to `disabled', all UDP search results will be sent and received in plain text. Set this to `allow' to let ncdc reply with encrypted search results if the other client requested it. `prefer' will also cause ncdc itself to request encryption.
|
||||
|
||||
Note that, regardless of this setting, encrypted UDP search results are only used on ADCS hubs. They will never be sent on NMDC or non-TLS ADC hubs. Also note that, even if you set this to `prefer', encryption is still only used when the client on the other side of the connection also supports it.
|
||||
|
||||
=item B<tls_policy> <disabled|allow|prefer>
|
||||
|
||||
Set the policy for secure client-to-client connections. Setting this to `disabled' disables TLS support for client connections, but still allows you to connect to TLS-enabled hubs. `allow' will allow the use of TLS if the other client requests this, but ncdc itself will not request TLS when connecting to others. Setting this to `prefer' tells ncdc to also request TLS when connecting to others.
|
||||
|
||||
The use of TLS for client connections usually results in less optimal performance when uploading and downloading, but is quite effective at avoiding protocol-specific traffic shaping that some ISPs may do. Also note that, even if you set this to `prefer', TLS will only be used if the connecting party also supports it.
|
||||
|
||||
=item B<tls_priority> <string>
|
||||
|
||||
Set the GnuTLS priority string used for all TLS-enabled connections. See the "Priority strings" section in the GnuTLS manual for details on what this does and how it works. Currently it is not possible to set a different priority string for different types of connections (e.g. hub or incoming/outgoing client connections).
|
||||
|
||||
=item B<ui_time_format> <string>
|
||||
|
||||
The format of the time displayed in the lower-left of the screen. Set `-' to not display a time at all. The string is passed to the Glib g_date_time_format() function, which accepts roughly the same formats as strftime(). Check out the strftime(3) man page or the Glib documentation for more information. Note that this setting does not influence the date/time format used in other places, such as the chat window or log files.
|
||||
|
||||
=item B<upload_rate> <speed>
|
||||
|
||||
Maximum combined transfer rate of all uploads. See the `download_rate' setting for more information on rate limiting. Note that this setting also overrides any `connection' setting.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
|
||||
|
||||
=head1 KEY BINDINGS
|
||||
|
||||
On any tab without the text input line, you can press `?' to get the key
|
||||
bindings for that tab. The list of key bindings is available through the
|
||||
C</help keys> command, and is reproduced below.
|
||||
|
||||
=over
|
||||
|
||||
=item B<Global key bindings>
|
||||
|
||||
Alt+j Open previous tab.
|
||||
Alt+k Open next tab.
|
||||
Alt+h Move current tab left.
|
||||
Alt+l Move current tab right.
|
||||
Alt+a Move tab with recent activity.
|
||||
Alt+<num> Open tab with number <num>.
|
||||
Alt+c Close current tab.
|
||||
Alt+n Open the connections tab.
|
||||
Alt+q Open the download queue tab.
|
||||
Alt+o Open own file list.
|
||||
Alt+r Refresh file list.
|
||||
|
||||
Keys for tabs with a log window:
|
||||
Ctrl+l Clear current log window.
|
||||
PgUp Scroll the log backward.
|
||||
PgDown Scroll the log forward.
|
||||
|
||||
Keys for tabs with a text input line:
|
||||
Left/Right Move cursor one character left or right.
|
||||
End/Home Move cursor to the end / start of the line.
|
||||
Up/Down Scroll through the command history.
|
||||
Tab Auto-complete current command, nick or argument.
|
||||
Alt+b Move cursor one word backward.
|
||||
Alt+f Move cursor one word forward.
|
||||
Backspace Delete character before cursor.
|
||||
Delete Delete character under cursor.
|
||||
Ctrl+w Delete to previous space.
|
||||
Alt+d Delete to next space.
|
||||
Ctrl+k Delete everything after cursor.
|
||||
Ctrl+u Delete entire line.
|
||||
|
||||
=item B<File browser>
|
||||
|
||||
Up/Down Select one item up/down.
|
||||
k/j Select one item up/down.
|
||||
PgUp/PgDown Select one page of items up/down.
|
||||
End/Home Select last/first item in the list.
|
||||
/ Start incremental regex search (press Return to stop editing).
|
||||
,/. Search next / previous.
|
||||
Right/l Open selected directory.
|
||||
Left/h Open parent directory.
|
||||
t Toggle sorting directories before files.
|
||||
s Order by file size.
|
||||
n Order by file name.
|
||||
d Add selected file/directory to the download queue.
|
||||
m Match selected item with the download queue.
|
||||
M Match entire file list with the download queue.
|
||||
a Search for alternative download sources.
|
||||
|
||||
=item B<Connection list>
|
||||
|
||||
Up/Down Select one item up/down.
|
||||
k/j Select one item up/down.
|
||||
PgUp/PgDown Select one page of items up/down.
|
||||
End/Home Select last/first item in the list.
|
||||
d Disconnect selected connection.
|
||||
i/Return Toggle information box.
|
||||
f Find user in user list.
|
||||
m Send a PM to the selected user.
|
||||
q Find file in download queue.
|
||||
|
||||
=item B<Download queue>
|
||||
|
||||
Up/Down Select one item up/down.
|
||||
k/j Select one item up/down.
|
||||
PgUp/PgDown Select one page of items up/down.
|
||||
End/Home Select last/first item in the list.
|
||||
K/J Select one user up/down.
|
||||
f Find user in user list.
|
||||
c Find connection in the connection list.
|
||||
a Search for alternative download sources.
|
||||
d Remove selected file from the queue.
|
||||
+/- Increase/decrease priority.
|
||||
i/Return Toggle user list.
|
||||
r Remove selected user for this file.
|
||||
R Remove selected user from all files in the download queue.
|
||||
x Clear error state for the selected user for this file.
|
||||
X Clear error state for the selected user for all files.
|
||||
|
||||
Note: when an item in the queue has `ERR' indicated in the
|
||||
priority column, you have two choices: You can remove the
|
||||
item from the queue using `d', or attempt to continue the
|
||||
download by increasing its priority using `+'.
|
||||
|
||||
=item B<Search results tab>
|
||||
|
||||
Up/Down Select one item up/down.
|
||||
k/j Select one item up/down.
|
||||
PgUp/PgDown Select one page of items up/down.
|
||||
End/Home Select last/first item in the list.
|
||||
f Find user in user list.
|
||||
b/B Browse the selected users' list, B to force a redownload.
|
||||
d Add selected file to the download queue.
|
||||
h Toggle hub column visibility.
|
||||
u Order by username.
|
||||
s Order by file size.
|
||||
l Order by free slots.
|
||||
n Order by file name.
|
||||
m Match selected item with the download queue.
|
||||
M Match all search results with the download queue.
|
||||
q Match selected users' list with the download queue.
|
||||
Q Match all matched users' lists with the download queue.
|
||||
a Search for alternative download sources.
|
||||
|
||||
=item B<User list tab>
|
||||
|
||||
Up/Down Select one item up/down.
|
||||
k/j Select one item up/down.
|
||||
PgUp/PgDown Select one page of items up/down.
|
||||
End/Home Select last/first item in the list.
|
||||
/ Start incremental regex search (press Return to stop editing).
|
||||
,/. Search next / previous.
|
||||
o Toggle sorting OPs before others.
|
||||
s/S Order by share size.
|
||||
u/U Order by username.
|
||||
t/T Toggle visibility / order by tag column.
|
||||
e/E Toggle visibility / order by email column.
|
||||
c/C Toggle visibility / order by connection column.
|
||||
p/P Toggle visibility / order by IP column.
|
||||
i/Return Toggle information box.
|
||||
m Send a PM to the selected user.
|
||||
g Grant a slot to the selected user.
|
||||
b/B Browse the selected users' list, B to force a redownload.
|
||||
q Match selected users' list with the download queue.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
|
||||
|
||||
=head1 ENVIRONMENT
|
||||
|
||||
$NCDC_DIR is used to determine the session dir, it is only honoured if I<-c> is
|
||||
not set on the command line.
|
||||
|
||||
|
||||
=head1 FILES
|
||||
|
||||
$NCDC_DIR corresponds to the session dir set via I<-c>, environment variable
|
||||
$NCDC_DIR or $HOME/.ncdc.
|
||||
|
||||
=over
|
||||
|
||||
=item $NCDC_DIR/cert/
|
||||
|
||||
Directory where the client certificates are stored. Must contain a private key
|
||||
file (client.key) and public certificate (client.crt). These will be generated
|
||||
automatically when ncdc starts up the first time.
|
||||
|
||||
=item $NCDC_DIR/db.sqlite3
|
||||
|
||||
The database. This stores all configuration variables, hash data of shared
|
||||
files, download queue information and other state information. Manually editing
|
||||
this file with the `sqlite3' commandline tool is possible but discouraged. Any
|
||||
changes made to the database while ncdc is running will not be read, and may
|
||||
even get overwritten by ncdc.
|
||||
|
||||
=item $NCDC_DIR/dl/
|
||||
|
||||
Directory where completed downloads are moved to by default. Can be changed
|
||||
with the C<download_dir> configuration option.
|
||||
|
||||
=item $NCDC_DIR/files.xml.bz2
|
||||
|
||||
Filelist containing a listing of all shared files.
|
||||
|
||||
=item $NCDC_DIR/fl/
|
||||
|
||||
Directory where downloaded file lists from other users are stored. The names of
|
||||
the files are hex-encoded user IDs that are used internally by ncdc. Old file
|
||||
lists are deleted automatically after a configurable interval. See the
|
||||
C<filelist_maxage> configuration option.
|
||||
|
||||
|
||||
=item $NCDC_DIR/history
|
||||
|
||||
Command history.
|
||||
|
||||
=item $NCDC_DIR/inc/
|
||||
|
||||
Default location for incomplete downloads. Can be changed with the
|
||||
C<incoming_dir> setting. The file names in this directory are the
|
||||
base32-encoded TTH root of the completed file.
|
||||
|
||||
=item $NCDC_DIR/logs/
|
||||
|
||||
Directory where all the log files are stored. File names starting with `#' are
|
||||
hub logs and `~' are user (PM) logs. Special log files are transfers.log and
|
||||
main.log.
|
||||
|
||||
ncdc does not have built-in functionality to rotate or compress log files
|
||||
automatically. When rotating log files manually (e.g. via a cron job), make
|
||||
sure to send the SIGUSR1 signal afterwards to force ncdc to flush the old logs
|
||||
and create or open the new log files.
|
||||
|
||||
=item $NCDC_DIR/stderr.log
|
||||
|
||||
Error/debug log. This file is cleared every time ncdc starts up.
|
||||
|
||||
=item $NCDC_DIR/version
|
||||
|
||||
Version of the data directory. This file locked while an ncdc instance is
|
||||
running, making sure that no two ncdc instances work with the same session
|
||||
directory at the same time.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Format of transfers.log
|
||||
|
||||
Uploads and downloads are logged in the transfers.log file. Transfers are
|
||||
separated by a newline (C<0x0a>). Each log line has the following fields,
|
||||
separated by a space:
|
||||
|
||||
=over
|
||||
|
||||
=item 1.
|
||||
|
||||
Date/time when the transfer ended, formatted as C<[YYYY-MM-DD HH:MM:SS ZONE]>,
|
||||
|
||||
=item 2.
|
||||
|
||||
Hub name, including the C<#> prefix,
|
||||
|
||||
=item 3.
|
||||
|
||||
Base32-encoded CID of the other user for ADC transfers, or a '-' for NMDC,
|
||||
|
||||
=item 4.
|
||||
|
||||
User name (escaped),
|
||||
|
||||
=item 5.
|
||||
|
||||
IPv4 or IPv6 address,
|
||||
|
||||
=item 6.
|
||||
|
||||
Direction, C<u> for upload or C<d> for download,
|
||||
|
||||
=item 7.
|
||||
|
||||
Whether the transfer completed successfully (C<c>) or has been
|
||||
interrupted/disconnected before all requested file data has been transferred
|
||||
(C<i>),
|
||||
|
||||
=item 8.
|
||||
|
||||
Base32-encoded TTH of the transferred file, or '-' for C<files.xml.bz2>,
|
||||
|
||||
=item 9.
|
||||
|
||||
Total transfer time, in seconds,
|
||||
|
||||
=item 10.
|
||||
|
||||
File size, in bytes,
|
||||
|
||||
=item 11.
|
||||
|
||||
File offset, in bytes,
|
||||
|
||||
=item 12.
|
||||
|
||||
Transfer size, in bytes,
|
||||
|
||||
=item 13.
|
||||
|
||||
File path (escaped). Absolute virtual path for uploads, destination path for
|
||||
downloads.
|
||||
|
||||
=back
|
||||
|
||||
All fields are encoded in UTF-8. Fields that may contain a space or newline are
|
||||
escaped as follows: A space is escaped as C<\s>, a newline as C<\n> and a
|
||||
backslash as C<\\>. The timestamp is not escaped.
|
||||
|
||||
Many clients download files is separate (smallish) chunks. Ncdc makes no
|
||||
attempt to combine multiple chunk requests in a single log entry, so you may
|
||||
see the same uploaded file several times with a different file offset.
|
||||
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
Copyright (C) 2011-2013 Yoran Heling <projects@yorhel.nl>
|
||||
|
||||
ncdc is distributed under the MIT license, please read the COPYING file for
|
||||
more information.
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Please report bugs or feature requests to the bug tracker or the mailing list.
|
||||
Both can be found on the ncdc homepage at L<http://dev.yorhel.nl/ncdc>. There
|
||||
is also an ADC hub available at C<adc://dc.blicky.net:2780/> for general
|
||||
support and discussions.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
ncdc is written by Yoran Heling <projects@yorhel.nl>
|
||||
|
||||
Web: L<http://dev.yorhel.nl/ncdc>
|
||||
21
dat/ncdc-scr
21
dat/ncdc-scr
|
|
@ -1,21 +0,0 @@
|
|||
=pod
|
||||
|
||||
Note: While these screenshots are from version 1.5, the latest version has only
|
||||
little visible changes. Let me also apologise for the crappy formatting, I
|
||||
should take some smaller shots next time...
|
||||
|
||||
=head2 Main chat
|
||||
|
||||
[img scr ncdchub.png Ncdc in the mainchat.]
|
||||
|
||||
=head2 File browser
|
||||
|
||||
[img scr ncdcbrowse.png Simple file list browser.]
|
||||
|
||||
=head2 User list
|
||||
|
||||
[img scr ncdcusers.png Ncdc displaying the userlist of a hub.]
|
||||
|
||||
=head2 Built-in help
|
||||
|
||||
[img scr ncdchelp.png Ncdc built-in help.]
|
||||
93
dat/ncdc.md
Normal file
93
dat/ncdc.md
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
% NCurses Direct Connect
|
||||
|
||||
Ncdc is a modern and lightweight direct connect client with a friendly ncurses
|
||||
interface.
|
||||
|
||||
## Get ncdc!
|
||||
|
||||
Latest version
|
||||
: 1.20 ([dllink ncdc-1.20.tar.gz]
|
||||
\- [changes](https://dev.yorhel.nl/ncdc/changes))
|
||||
|
||||
Convenient static binaries for Linux:
|
||||
[64-bit](/download/ncdc-linux-x86_64-1.20-6-g5111a.tar.gz) -
|
||||
[32-bit](/download/ncdc-linux-i486-1.20-6-g5111a.tar.gz) -
|
||||
[ARM](/download/ncdc-linux-arm-1.20-6-g5111a.tar.gz). Check the
|
||||
[installation instructions](/ncdc/install) for more info.
|
||||
|
||||
Development version
|
||||
: The latest development version is available from git and can be cloned using
|
||||
`git clone git://g.blicky.net/ncdc.git`. The repository is available for
|
||||
[online browsing](https://g.blicky.net/ncdc.git/).
|
||||
|
||||
Requirements
|
||||
: The following libraries are required: ncurses, zlib, bzip2, sqlite3, glib2 and
|
||||
gnutls.
|
||||
|
||||
Ncdc is entirely written in C and available under a liberal MIT license.
|
||||
|
||||
Community
|
||||
: - [Bug tracker](https://dev.yorhel.nl/ncdc/bug) - For bugs reports, feature requests and patches.
|
||||
- `adcs://dc.blicky.net:2780/` - For real-time chat.
|
||||
|
||||
Packages and ports
|
||||
: Are available for the following systems:
|
||||
[Arch Linux](https://aur.archlinux.org/packages/ncdc/) -
|
||||
[Fedora](https://apps.fedoraproject.org/packages/ncdc/overview/) -
|
||||
[FreeBSD](http://www.freshports.org/net-p2p/ncdc/) -
|
||||
[Frugalware](http://frugalware.org/packages?srch=ncdc&op=pkg&arch=all&ver=all) -
|
||||
[Gentoo](http://packages.gentoo.org/package/net-p2p/ncdc) -
|
||||
[GNU Guix](https://www.gnu.org/software/guix/package-list.html) -
|
||||
[Homebrew](http://braumeister.org/formula/ncdc) -
|
||||
[OpenSUSE](http://packman.links2linux.org/package/ncdc) -
|
||||
[Source Mage](http://download.sourcemage.org/grimoire/codex/test/ftp/ncdc/)
|
||||
|
||||
I have a few old packages on the [Open Build
|
||||
Service](https://build.opensuse.org/package/show/home:yorhel/ncdc), but
|
||||
these are unmaintained. The static binaries are preferred.
|
||||
|
||||
A convenient installer is available for
|
||||
[Android](http://code.ivysaur.me/ncdcinstaller.html).
|
||||
|
||||
## Features
|
||||
|
||||
Common features all modern DC clients (should) have:
|
||||
|
||||
- Connecting to multiple hubs at the same time,
|
||||
- Support for both ADC and NMDC protocols,
|
||||
- Chatting and private messaging,
|
||||
- Browsing the user list of a connected hub,
|
||||
- Share management and file uploading,
|
||||
- Connections and download queue management,
|
||||
- File list browsing,
|
||||
- TTH-checked, multi-source and segmented file downloading,
|
||||
- Searching for files,
|
||||
- Secure hub (adcs:// and nmdcs://) and client connections on both protocols,
|
||||
- Bandwidth throttling,
|
||||
- IPv6 support.
|
||||
|
||||
And special features not commonly found in other clients:
|
||||
|
||||
- Different connection settings for each hub,
|
||||
- Encrypted UDP messages (ADC SUDP),
|
||||
- Subdirectory refreshing,
|
||||
- Nick notification and highlighting in chat windows,
|
||||
- Trust on First Use for TLS-enabled hubs,
|
||||
- A single listen port for both TLS and TCP connections,
|
||||
- Efficient file uploads using sendfile(),
|
||||
- Large file lists are opened in a background thread,
|
||||
- Doesn't trash your OS file cache (with the flush\_file\_cache option enabled),
|
||||
- (Relatively...) low memory usage.
|
||||
|
||||
## What doesn't ncdc do?
|
||||
|
||||
Since the above list is getting larger and larger every time, it may be more
|
||||
interesting to list a few features that are (relatively) common in other DC
|
||||
clients, but which ncdc doesn't do. Yet.
|
||||
|
||||
- NAT Traversal,
|
||||
- OP features (e.g. client detection, file list scanning and other useful stuff for OPs),
|
||||
- SOCKS support.
|
||||
|
||||
Of course, there are many more features that could be implemented or improved.
|
||||
These will all be addressed in later versions (hopefully :).
|
||||
|
|
@ -1,13 +1,14 @@
|
|||
=head1 About ncdc
|
||||
% Ncdc Q&A
|
||||
|
||||
# About ncdc
|
||||
|
||||
=head2 What about other text-mode clients?
|
||||
## What about other text-mode clients?
|
||||
|
||||
L<microdc2|http://corsair626.no-ip.org/microdc/> - A rather nice client, yet
|
||||
[microdc2](http://corsair626.no-ip.org/microdc/) - A rather nice client, yet
|
||||
not exactly there. It's limited to connecting to a single hub, hasn't been
|
||||
updated since 2006, and the readline interface is slightly awkward to use.
|
||||
|
||||
L<nanodc|http://sourceforge.net/projects/nanodc/> - Can't comment much on this,
|
||||
[nanodc](http://sourceforge.net/projects/nanodc/) - Can't comment much on this,
|
||||
except maybe that rocket science is perhaps easier than getting nanodc to
|
||||
compile.
|
||||
|
||||
|
|
@ -15,18 +16,17 @@ LDCC - Uses DCTC as backend and an interface based on TurboVision. All
|
|||
mentioned projects are dead: neither LDCC, DCTC nor TurboVision are seeing any
|
||||
recent development.
|
||||
|
||||
L<ShakesPeer|http://shakespeer.bzero.se/> - Appears to have a commandline
|
||||
[ShakesPeer](http://shakespeer.bzero.se/) - Appears to have a commandline
|
||||
interface as well. I haven't personally tried it, but have not heard many
|
||||
positive things about it. Has not seen any recent development, either.
|
||||
|
||||
|
||||
=head2 Why did you start from scratch? Why not use the DC++ core?
|
||||
## Why did you start from scratch? Why not use the DC++ core?
|
||||
|
||||
There are several reasons why I chose not to use code from existing projects,
|
||||
but the two most important reasons are the following: 1) I am a control freak,
|
||||
and 2) personal preferences.
|
||||
|
||||
B<Control freak:> I have no idea how to create an interface to a protocol if I
|
||||
**Control freak:** I have no idea how to create an interface to a protocol if I
|
||||
don't know the overall design and all the tiny details of the actual protocol
|
||||
I'm working with. And what's a better way to get used to a protocol than by
|
||||
writing everything yourself? Then there's some other advantages to
|
||||
|
|
@ -34,20 +34,22 @@ reimplementing everything: I get to choose the library dependencies and the
|
|||
memory/CPU efficiency trade-offs, and I am not limited by an existing
|
||||
implementation that needs quite a few modifications to achieve what I want.
|
||||
Most of the "special features not commonly found in other clients" mentioned on
|
||||
the L<homepage|https://dev.yorhel.nl/ncdc> are a direct result of this.
|
||||
the [homepage](/ncdc) are a direct result of this.
|
||||
|
||||
B<Personal preferences:> These are simple: I rather dislike C++ and working
|
||||
**Personal preferences:** These are simple: I rather dislike C++ and working
|
||||
with other people's code. Working with other people's C++ code isn't exactly
|
||||
something I wish to spend my free time on.
|
||||
|
||||
|
||||
=head2 Does ncdc support TLS 1.2?
|
||||
## Does ncdc support TLS 1.2?
|
||||
|
||||
Yes, but you need a recent version of GnuTLS. Nobody knows what counts as
|
||||
"recent", exactly, but I'm guessing any 3.0+ version will do.
|
||||
|
||||
## Does ncdc support TLS 1.3?
|
||||
|
||||
=head2 What protocol features does ncdc support?
|
||||
Yes, but you need an even more recent version of GnuTLS.
|
||||
|
||||
## What protocol features does ncdc support?
|
||||
|
||||
For ADC: BASE, RF, TIGR, BZIP, BLOM, ADCS, KEYP and SUDP.
|
||||
|
||||
|
|
@ -59,103 +61,89 @@ does not support some of the older NMDC protocol features, like $Get,
|
|||
$GetZBlock, $CHUNK, $Cancel or non-XML file lists. I am not aware of an other
|
||||
up-to-date client that still uses any of these features.
|
||||
|
||||
|
||||
=head2 What are those flags / character indications in the connection list?
|
||||
## What are those flags / character indications in the connection list?
|
||||
|
||||
Since the manual page doesn't cover those yet, I'll document it here for now:
|
||||
|
||||
The header has C<St>, where the C<S> stands for Status and C<t> for whether TLS
|
||||
encryption is used or not. The status flags can be either B<C>onnecting,
|
||||
B<H>andshake, B<I>dle, B<D>ownloading, B<U>ploading or B<-> for disconnected.
|
||||
The header has `St`, where the `S` stands for Status and `t` for whether TLS
|
||||
encryption is used or not. The status flags can be either **C**onnecting,
|
||||
**H**andshake, **I**dle, **D**ownloading, **U**ploading or **-** for disconnected.
|
||||
|
||||
## ...And what about those in the user list?
|
||||
|
||||
=head2 ...And what about those in the user list?
|
||||
The user list has three boolean flags: **O**perator, **P**assive, and whether the client has **T**LS enabled.
|
||||
|
||||
The user list has three boolean flags: B<O>perator, B<P>assive, and whether the client has B<T>LS enabled.
|
||||
# Troubleshooting
|
||||
|
||||
|
||||
|
||||
=head1 Troubleshooting
|
||||
|
||||
=head2 Luadch: "(error-40) Invalid named parameter in inf: I4"
|
||||
## Luadch: "(error-40) Invalid named parameter in inf: I4"
|
||||
|
||||
This error occurs when connecting to (some?) luadch hubs. The problem here is
|
||||
that IP address autodetection is broken on these hubs, and you can work around
|
||||
it by manually setting C<active_ip> to your (public) IP address: C</set
|
||||
active_ip 1.3.3.7>.
|
||||
it by manually setting `active_ip` to your (public) IP address: `/set
|
||||
active_ip 1.3.3.7`.
|
||||
|
||||
=head2 The Alt- keys don't work!
|
||||
## The Alt- keys don't work!
|
||||
|
||||
The ncdc manual refers to the "meta" key as Alt-something, but the actual key
|
||||
to use tends to differ depending on your setup. In almost every setup, you can
|
||||
press and release the 'Esc' key as a replacement for Alt-something. If you're
|
||||
on OS X, L<this stackoverflow answer|http://stackoverflow.com/a/438892>
|
||||
on OS X, [this stackoverflow answer](http://stackoverflow.com/a/438892)
|
||||
may be helpful.
|
||||
|
||||
=head2 Ncdc crashes a lot!
|
||||
## Ncdc crashes a lot!
|
||||
|
||||
Ncdc 1.19.1 has no known bugs that may cause a crash. If you're running an older
|
||||
Ncdc 1.20 has no known bugs that may cause a crash. If you're running an older
|
||||
version of ncdc, please upgrade. If your ncdc is up to date and you still have
|
||||
a crash, please report a bug.
|
||||
|
||||
## Ncdc uses too much disk space!
|
||||
|
||||
=head2 Ncdc uses too much disk space!
|
||||
First, look where this disk space goes to (hint: use [ncdu](/ncdu)). If it's
|
||||
the log files: you can safely delete or rotate them (see next question).
|
||||
|
||||
First, look where this disk space goes to (hint: use
|
||||
L<ncdu|https://dev.yorhel.nl/ncdu>). If it's the log files: you can safely
|
||||
delete or rotate them (see next question).
|
||||
|
||||
The I<db.sqlite3> file can also grow quite large in certain situations. If you
|
||||
The _db.sqlite3_ file can also grow quite large in certain situations. If you
|
||||
modify or rename a lot of files in your share and ncdc re-hashes them, the old
|
||||
hash data associated with the files is not removed from the database, resulting
|
||||
in wasted disk space. The C</gc> command in ncdc can be used to clean up this
|
||||
in wasted disk space. The `/gc` command in ncdc can be used to clean up this
|
||||
unused data. Be warned, however, that this command needs roughly twice the size
|
||||
of the old db.sqlite3 file for temporary storage, so make sure you have enough
|
||||
space available. (Note that this behaviour is not specific to ncdc, most other
|
||||
DC clients do the same.)
|
||||
|
||||
|
||||
=head2 Why doesn't ncdc rotate log files automatically?
|
||||
## Why doesn't ncdc rotate log files automatically?
|
||||
|
||||
Because you can easily do that yourself. You can either use logrotate or a
|
||||
simple script that runs from a cron. For an example of the latter option,
|
||||
L<this is the script I use|http://p.blicky.net/s7132>, which is run as a
|
||||
[this is the script I use](http://p.blicky.net/s7132), which is run as a
|
||||
monthly cron job.
|
||||
|
||||
# Can ncdc...
|
||||
|
||||
|
||||
=head1 Can ncdc...
|
||||
|
||||
|
||||
=head2 Can ncdc run in the background / as a daemon?
|
||||
## Can ncdc run in the background / as a daemon?
|
||||
|
||||
As with most ncurses applications: no. At least, it does not have this
|
||||
functionality built-in. Ncdc is designed to be used in combination with a
|
||||
separate terminal multiplexer or detach utility to handle this. Have a look at
|
||||
L<GNU screen|http://www.gnu.org/s/screen/>,
|
||||
L<tmux|http://tmux.sourceforge.net/> or L<dtach|http://dtach.sourceforge.net/>.
|
||||
[GNU screen](http://www.gnu.org/s/screen/),
|
||||
[tmux](http://tmux.sourceforge.net/) or [dtach](http://dtach.sourceforge.net/).
|
||||
|
||||
## Does ncdc support UPnP?
|
||||
|
||||
=head2 Does ncdc support UPnP?
|
||||
|
||||
Not natively. However, it is possible to use L<this
|
||||
script|http://www.howtoforge.com/administrating-your-gateway-device-via-upnp>
|
||||
Not natively. However, it is possible to use [this
|
||||
script](http://www.howtoforge.com/administrating-your-gateway-device-via-upnp)
|
||||
and manually keep a port open using a cron job. I have no experience with this
|
||||
myself, though. I just run ncdc directly on my router. :-)
|
||||
|
||||
|
||||
=head2 Are there any programs available for analyzing the transfers.log file?
|
||||
## Are there any programs available for analyzing the transfers.log file?
|
||||
|
||||
Nothing like that is included in the release yet, but there is a simple Perl
|
||||
script available: L<ncdc-transfer-stats|http://p.blicky.net/eu00a>, and a short
|
||||
Go program: L<ncdc-share-report|http://p.blicky.net/h25z8>.
|
||||
script available: [ncdc-transfer-stats](http://p.blicky.net/eu00a), and a short
|
||||
Go program: [ncdc-share-report](http://p.blicky.net/h25z8).
|
||||
|
||||
|
||||
=head2 Can ncdc use the hash data or configuration from an existing DC++ installation?
|
||||
## Can ncdc use the hash data or configuration from an existing DC++ installation?
|
||||
|
||||
No, ncdc uses its own configuration and hash storage directory. However, on
|
||||
popular demand I could write a conversion utility to transfer the hash data
|
||||
from other clients to ncdc's format. (Contrary to my expectations, there hasn't
|
||||
been much interest in such a tool ever since I wrote this FAQ entry two years
|
||||
been much interest in such a tool ever since I wrote this FAQ entry many years
|
||||
ago. So I guess this isn't really a FAQ).
|
||||
|
||||
156
dat/ncdc/install.md
Normal file
156
dat/ncdc/install.md
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
% Ncdc Installation Instructions
|
||||
|
||||
# General instructions
|
||||
|
||||
## Building from source
|
||||
|
||||
In theory, the following instructions should work everywhere:
|
||||
|
||||
- Install the required dependencies: ncurses, bzip2, zlib, sqlite3, glib2 and gnutls,
|
||||
- Download and extract the source tarball from the [homepage](/ncdc),
|
||||
- `./configure`
|
||||
- `make`
|
||||
- And then run `make install` with superuser permissions.
|
||||
|
||||
In practice, however, this does not always work and may not always be the
|
||||
prefered method of installation. On this page I try to collect instructions for
|
||||
each OS and distribution to make the installation process a bit easier for
|
||||
everyone.
|
||||
|
||||
If your system is missing from this page or if you're still having trouble,
|
||||
don't hesitate to join the support hub at `adc://dc.blicky.net/` or send me a
|
||||
mail at [projects@yorhel.nl](mailto:projects@yorhel.nl). Contributions to this
|
||||
page are of course highly welcomed as well. :-)
|
||||
|
||||
## Statically linked binaries
|
||||
|
||||
If you just want to get ncdc running without going through the trouble of
|
||||
compiling and/or installing it, I also offer statically linked binaries:
|
||||
|
||||
- [Linux, 64-bit](/download/ncdc-linux-x86_64-1.20-6-g5111a.tar.gz)
|
||||
- [Linux, 32-bit](/download/ncdc-linux-i486-1.20-6-g5111a.tar.gz)
|
||||
- [Linux, ARM](/download/ncdc-linux-arm-1.20-6-g5111a.tar.gz)
|
||||
|
||||
To use them, simply download and extract the tarball, and then run `./ncdc` on
|
||||
the command line.
|
||||
|
||||
The binaries include all the required dependencies and are linked against
|
||||
[musl](http://www.etalabs.net/musl/), so they should run on any Linux machine
|
||||
with the right architecture. If you want binaries for an other OS or
|
||||
architecture, please bug me and I'll see what I can do.
|
||||
|
||||
# System-specific instructions
|
||||
|
||||
## Android
|
||||
|
||||
An [convenient installer](http://code.ivysaur.me/ncdcinstaller.html) is
|
||||
available for Android 2.3 and later, which makes use of the static binary.
|
||||
|
||||
## Arch Linux
|
||||
|
||||
Ncdc is available on [AUR](https://aur.archlinux.org/packages/ncdc/), to
|
||||
install it you can use your favorite AUR-installer. If you don't have a
|
||||
favorite, go for the manual approach:
|
||||
|
||||
wget https://aur.archlinux.org/cgit/aur.git/snapshot/ncdc.tar.gz
|
||||
tar -xf ncdc.tar.gz
|
||||
cd ncdc
|
||||
makepkg -si
|
||||
|
||||
## Fedora
|
||||
|
||||
There's a [package](https://apps.fedoraproject.org/packages/ncdc/overview/)
|
||||
available for Fedora.
|
||||
|
||||
## FreeBSD
|
||||
|
||||
Ncdc is available in the Ports Collection. To install, [make sure your
|
||||
collection is
|
||||
up-to-date](http://www.freebsd.org/doc/en_US.ISO8859-1/books/handbook/ports-using.html)
|
||||
and install the Port as any other:
|
||||
|
||||
cd /usr/ports/net-p2p/ncdc
|
||||
make install clean
|
||||
|
||||
## Gentoo
|
||||
|
||||
Ncdc is available in the Portage tree, so installation is trivial:
|
||||
|
||||
emerge ncdc
|
||||
|
||||
## Mac OS X
|
||||
|
||||
Ncdc is available in [Homebrew](http://braumeister.org/formula/ncdc).
|
||||
|
||||
## OpenIndiana
|
||||
|
||||
This has been tested on OpenIndiana Build 151a Server, but may work on other
|
||||
versions as well. Compiling from source is your only option at the moment.
|
||||
First install some required packages (as root):
|
||||
|
||||
pkg install gcc-3 glib2 gnutls gettext header-math perl-510/extra
|
||||
|
||||
Then, fetch the ncdc source tarball, extract and build as follows:
|
||||
|
||||
wget https://dev.yorhel.nl/download/ncdc-1.20.tar.gz
|
||||
tar -xf ncdc-1.20.tar.gz
|
||||
cd ncdc-1.20
|
||||
export PATH="$PATH:/usr/perl5/5.10.0/bin"
|
||||
./configure --prefix=/usr LDFLAGS='-L/usr/gnu/lib -R/usr/gnu/lib'
|
||||
make
|
||||
|
||||
And finally, to actually install ncdc, run `make install` as root. You can
|
||||
safely revert `$PATH` back to its previous value if you wish, it was only
|
||||
necessary in order for `./configure` and `make` to find `pod2man`.
|
||||
|
||||
## OpenSUSE
|
||||
|
||||
Get the package from [PackMan](http://packman.links2linux.org/package/ncdc):
|
||||
Select your openSUSE release and hit the "1 click install" button.
|
||||
|
||||
## Ubuntu & Debian
|
||||
|
||||
The preferred way of installing ncdc on Ubuntu or Debian is to use the static
|
||||
binaries provided above.
|
||||
|
||||
Alternatively, you can also try to compile ncdc from source. To do so, first
|
||||
install the required libraries:
|
||||
|
||||
sudo apt-get install libbz2-dev libsqlite3-dev libncurses5-dev\
|
||||
libncursesw5-dev libglib2.0-dev libgnutls-dev zlib1g-dev
|
||||
|
||||
Then run the following commands to download and install ncdc:
|
||||
|
||||
wget https://dev.yorhel.nl/download/ncdc-1.20.tar.gz
|
||||
tar -xf ncdc-1.20.tar.gz
|
||||
cd ncdc-1.20
|
||||
./configure --prefix=/usr
|
||||
make
|
||||
sudo make install
|
||||
|
||||
## Windows (Cygwin)
|
||||
|
||||
Surprisingly enough, ncdc can be used even on Windows, thanks to Cygwin. If
|
||||
you haven't done so already, get `setup.exe` from the [Cygwin
|
||||
website](http://cygwin.com/) and use it to install the following packages:
|
||||
|
||||
- make
|
||||
- gcc4
|
||||
- perl
|
||||
- pkg-config
|
||||
- wget
|
||||
- zlib-devel
|
||||
- libncursesw-devel
|
||||
- libbz2-devel
|
||||
- libglib2.0-devel
|
||||
- libsqlite3-devel
|
||||
- gnutls-devel
|
||||
|
||||
Then open a Cygwin terminal and run the following commands to download,
|
||||
compile, and install ncdc:
|
||||
|
||||
wget https://dev.yorhel.nl/download/ncdc-1.20.tar.gz
|
||||
tar -xf ncdc-1.20.tar.gz
|
||||
cd ncdc-1.20
|
||||
./configure --prefix=/usr
|
||||
make install
|
||||
21
dat/ncdc/scr.md
Normal file
21
dat/ncdc/scr.md
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
% Ncdc Screenshots
|
||||
|
||||
Note: While these screenshots are from version 1.5, the latest version has only
|
||||
little visible changes. Let me also apologise for the crappy formatting, I
|
||||
should take some smaller shots next time...
|
||||
|
||||
## Main chat
|
||||
|
||||

|
||||
|
||||
## File browser
|
||||
|
||||

|
||||
|
||||
## User list
|
||||
|
||||

|
||||
|
||||
## Built-in help
|
||||
|
||||

|
||||
102
dat/ncdu
102
dat/ncdu
|
|
@ -1,102 +0,0 @@
|
|||
=pod
|
||||
|
||||
Ncdu is a disk usage analyzer with an ncurses interface. It is designed to find
|
||||
space hogs on a remote server where you don't have an entire graphical setup
|
||||
available, but it is a useful tool even on regular desktop systems. Ncdu aims
|
||||
to be fast, simple and easy to use, and should be able to run in any minimal
|
||||
POSIX-like environment with ncurses installed.
|
||||
|
||||
|
||||
=head2 Download
|
||||
|
||||
=over
|
||||
|
||||
=item Latest version
|
||||
|
||||
1.14 ([dllink ncdu-1.14.tar.gz download]
|
||||
- L<changes|https://dev.yorhel.nl/ncdu/changes>)
|
||||
|
||||
I also have convenient static binaries for Linux
|
||||
L<i486|https://dev.yorhel.nl/download/ncdu-linux-i486-1.14.tar.gz> and
|
||||
L<ARM|https://dev.yorhel.nl/download/ncdu-linux-arm-1.14.tar.gz>. Download,
|
||||
extract and run; no compilation or installation necessary (uses
|
||||
L<musl|http://www.musl-libc.org/>).
|
||||
|
||||
=item Development version
|
||||
|
||||
The most recent code is available on a git repository and can be cloned with
|
||||
C<git clone git://g.blicky.net/ncdu.git/>. The repository is also available for
|
||||
L<online browsing|http://g.blicky.net/ncdu.git/>.
|
||||
|
||||
=back
|
||||
|
||||
Ncdu is entirely written in C and available under a liberal MIT license.
|
||||
|
||||
|
||||
|
||||
=head2 Packages and ports
|
||||
|
||||
Ncdu has been packaged for quite a few systems, here's a list of the ones I am aware of:
|
||||
|
||||
L<AIX|http://www.perzl.org/aix/index.php?n=Main.Ncdu> -
|
||||
L<Alpine Linux|http://pkgs.alpinelinux.org/packages?name=ncdu> -
|
||||
L<ALT Linux|http://sisyphus.ru/en/srpm/ncdu> -
|
||||
L<Arch Linux|https://www.archlinux.org/packages/?q=ncdu> -
|
||||
L<CRUX|https://crux.nu/portdb/?q=ncdu&a=search> -
|
||||
L<Cygwin|https://cygwin.com/cgi-bin2/package-grep.cgi?grep=ncdu> -
|
||||
L<Debian|http://packages.debian.org/ncdu> -
|
||||
L<Fedora|https://apps.fedoraproject.org/packages/ncdu> -
|
||||
L<FreeBSD|https://www.freebsd.org/cgi/ports.cgi?query=ncdu&stype=all> -
|
||||
L<Frugalware|http://frugalware.org/packages/?op=pkg&srch=ncdu&arch=all&ver=all> -
|
||||
L<Gentoo|https://packages.gentoo.org/packages/sys-fs/ncdu> -
|
||||
L<GNU Guix|https://www.gnu.org/software/guix/package-list.html> -
|
||||
L<OpenBSD|http://cvsweb.openbsd.org/cgi-bin/cvsweb/ports/sysutils/ncdu/> -
|
||||
Mac OS X (L<Fink|http://pdb.finkproject.org/pdb/package.php/ncdu> - L<Homebrew|https://formulae.brew.sh/formula/ncdu> - L<MacPorts|http://www.macports.org/ports.php?by=name&substr=ncdu>) -
|
||||
L<Puppy Linux|http://www.murga-linux.com/puppy/viewtopic.php?t=35024> -
|
||||
L<Solaris|http://www.opencsw.org/packages/ncdu> -
|
||||
L<Slackware|http://slackbuilds.org/repository/14.2/system/ncdu/> -
|
||||
L<Slax Linux|http://www.slax.org/modules.php?detail=ncdu> -
|
||||
L<Ubuntu|http://packages.ubuntu.com/search?searchon=sourcenames&keywords=ncdu> -
|
||||
L<Void Linux|https://voidlinux.org/packages/>
|
||||
|
||||
Packages for RHEL and (open)SUSE can be found on the
|
||||
L<Open Build Service|https://software.opensuse.org//download.html?project=utilities&package=ncdu>.
|
||||
|
||||
Packages for NetBSD, DragonFlyBSD, MirBSD and others can be found on
|
||||
L<pkgsrc|http://pkgsrc.se/sysutils/ncdu>.
|
||||
|
||||
A port to z/OS is available L<here|https://dovetail.com/community/ncdu.html>.
|
||||
|
||||
|
||||
|
||||
=head2 Similar projects
|
||||
|
||||
=over
|
||||
|
||||
=item L<Duc|http://duc.zevv.nl/> - Multiple user interfaces.
|
||||
|
||||
=item L<gt5|http://gt5.sourceforge.net/> - Quite similar to ncdu, but a different approach.
|
||||
|
||||
=item L<tdu|http://webonastick.com/tdu/> - Another small ncurses-based disk usage visualization utility.
|
||||
|
||||
=item L<TreeSize|http://treesize.sourceforge.net/> - GTK, using a treeview.
|
||||
|
||||
=item L<Baobab|http://www.marzocca.net/linux/baobab.html> - GTK, using pie-charts, a treeview and a treemap. Comes with GNOME.
|
||||
|
||||
=item L<GdMap|http://gdmap.sourceforge.net/> - GTK, with a treemap display.
|
||||
|
||||
=item L<Filelight|http://www.methylblue.com/filelight/> - KDE, using pie-charts.
|
||||
|
||||
=item L<KDirStat|http://kdirstat.sourceforge.net/> - KDE, with a treemap display.
|
||||
|
||||
=item L<QDiskUsage|http://qt-apps.org/content/show.php/QDiskUsage?content=107012> - Qt, using pie-charts.
|
||||
|
||||
=item L<xdiskusage|http://xdiskusage.sourceforge.net/> - FLTK, with a treemap display.
|
||||
|
||||
=item L<fsv|http://fsv.sourceforge.net/> - 3D visualization.
|
||||
|
||||
=item L<Philesight|http://zevv.nl/play/code/philesight/> - Web-based clone of Filelight.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
|
|
@ -1,159 +0,0 @@
|
|||
1.14 - 2019-02-04
|
||||
- Add mtime display and sorting (Alex Wilson)
|
||||
- Add (limited) --follow-symlinks option (Simon Doppler)
|
||||
- Display larger file counts in browser UI
|
||||
- Add -V, --version, and --help alias flags
|
||||
- Fix crash when attempting to sort an empty directory
|
||||
- Fix 100% CPU bug when ncdu loses the terminal
|
||||
- Fix '--color=off' flag
|
||||
- Fix some typos
|
||||
|
||||
1.13 - 2018-01-29
|
||||
- Add "extended information" mode and -e flag
|
||||
- Add file mode, modification time and uid/gid to info window with -e
|
||||
- Add experimental color support and --color flag
|
||||
- Add -rr option to disable shell spawning
|
||||
- Remove directory nesting limit on file import
|
||||
- Fix handling of interrupts during file import
|
||||
- Fix undefined behavior that triggered crash on OS X
|
||||
|
||||
1.12 - 2016-08-24
|
||||
- Add NCDU_SHELL environment variable
|
||||
- Add --confirm-quit flag
|
||||
- Fix compilation due to missing sys/wait.h include
|
||||
|
||||
1.11 - 2015-04-05
|
||||
- Added 'b' key to spawn shell in the current directory
|
||||
- Support scanning (and refreshing) of empty directories
|
||||
- Added --si flag for base 10 prefixes
|
||||
- Fix toggle dirs before files
|
||||
|
||||
1.10 - 2013-05-09
|
||||
- Added 'c' key to display item counts
|
||||
- Added 'C' key to order by item counts
|
||||
- Added CACHEDIR.TAG support and --exclude-caches option
|
||||
- Use locale-dependent thousand separator
|
||||
- Use pkg-config to detect ncurses
|
||||
- Clip file/dir sizes to 8 EiB minus one byte
|
||||
- Fix buffer overflow when formatting huge file sizes
|
||||
|
||||
1.9 - 2012-09-27
|
||||
- Added option to dump scanned directory information to a file (-o)
|
||||
- Added option to load scanned directory information from a file (-f)
|
||||
- Added multiple scan and load interfaces (-0,-1,-2)
|
||||
- Fit loading and error windows to the terminal width (#13)
|
||||
- Fix symlink resolving bug (#18)
|
||||
- Fix path display when scanning an empty directory (#15)
|
||||
- Fix hang when terminal is resized to a too small size while loading
|
||||
- Use top-level automake build
|
||||
- Remove useless AUTHORS, INSTALL and NEWS files
|
||||
- ncdu.1 now uses POD as source format
|
||||
|
||||
1.8 - 2011-11-03
|
||||
- Use hash table to speed up hard link detection
|
||||
- Added read-only option (-r)
|
||||
- Use KiB instead of kiB (#3399279)
|
||||
|
||||
1.7 - 2010-08-13
|
||||
- List the detected hard links in file info window
|
||||
- Count the size of a hard linked file once for each directory it appears in
|
||||
- Fixed crash on browsing dirs with a small window size (#2991787)
|
||||
- Fixed buffer overflow when some directories can't be scanned (#2981704)
|
||||
- Fixed segfault when launched on a nonexistant directory (#3012787)
|
||||
- Fixed segfault when root dir only contains hidden files
|
||||
- Improved browsing performance
|
||||
- More intuitive multi-page browsing
|
||||
- Display size graph by default
|
||||
- Various minor fixes
|
||||
|
||||
1.6 - 2009-10-23
|
||||
- Implemented hard link detection
|
||||
- Properly select the next item after deletion
|
||||
- Removed reliance of dirfd()
|
||||
- Fixed non-void return in void delete_process()
|
||||
- Fixed several tiny memory leaks
|
||||
- Return to previously opened directory on failed recalculation
|
||||
- Properly display MiB units instead of MB (IEEE 1541 - bug #2831412)
|
||||
- Link to ncursesw when available
|
||||
- Improved support for non-ASCII characters
|
||||
- VIM keybindings for browsing through the tree (#2788249, #1880622)
|
||||
|
||||
1.5 - 2009-05-02
|
||||
- Fixed incorrect apparent size on directory refresh
|
||||
- Browsing keys now work while file info window is displayed
|
||||
- Current directory is assumed when no directory is specified
|
||||
- Size graph uses the apparent size if that is displayed
|
||||
- Items are ordered by displayed size rather than disk usage
|
||||
- Removed switching between powers of 1000/1024
|
||||
- Don't rely on the availability of suseconds_t
|
||||
- Correctly handle paths longer than PATH_MAX
|
||||
- Fixed various bugs related to rpath()
|
||||
- Major code rewrite
|
||||
- Fixed line width when displaying 100%
|
||||
|
||||
1.4 - 2008-09-10
|
||||
- Removed the startup window
|
||||
- Filenames ending with a tidle (~) will now also
|
||||
be hidden with the 'h'-key
|
||||
- Fixed buffer overflow when supplying a path longer
|
||||
than PATH_MAX (patch by Tobias Stoeckmann)
|
||||
- Used S_BLKSIZE instead of a hardcoded block size of 512
|
||||
- Fixed display of disk usage and apparent sizes
|
||||
- Updated ncdu -h
|
||||
- Included patches for Cygwin
|
||||
- Cursor now follows the selected item
|
||||
- Added spaces around path (debian #472194)
|
||||
- Fixed segfault on empty directory (debian #472294)
|
||||
- A few code rewrites and improvements
|
||||
|
||||
1.3 - 2007-08-05
|
||||
- Added 'r'-key to refresh the current directory
|
||||
- Removed option to calculate apparent size: both
|
||||
the disk usage and the apparent size are calculated.
|
||||
- Added 'a'-key to switch between showing apparent
|
||||
size and disk usage.
|
||||
- Added 'i'-key to display information about the
|
||||
selected item.
|
||||
- Small performance improvements
|
||||
- configure checks for ncurses.h (bug #1764304)
|
||||
|
||||
1.2 - 2007-07-24
|
||||
- Fixed some bugs on cygwin
|
||||
- Added du-like exclude patterns
|
||||
- Fixed bug #1758403: large directories work fine now
|
||||
- Rewrote a large part of the code
|
||||
- Fixed a bug with wide characters
|
||||
- Performance improvements when browsing large dirs
|
||||
|
||||
1.1 - 2007-04-30
|
||||
- Deleting files and directories is now possible from
|
||||
within ncdu.
|
||||
- The key for sorting directories between files has
|
||||
changed to 't' instead of 'd'. The 'd'-key is now
|
||||
used for deleting files.
|
||||
|
||||
1.0 - 2007-04-06
|
||||
- First stable release
|
||||
- Small code cleanup
|
||||
- Added a key to toggle between sorting dirs before
|
||||
files and dirs between files
|
||||
- Added graphs and percentages to the directory
|
||||
browser (can be enabled or disabled with the 'g'-key)
|
||||
|
||||
0.3 - 2007-03-04
|
||||
- When browsing back to the previous directory, the
|
||||
directory you're getting back from will be selected.
|
||||
- Added directory scanning in quiet mode to save
|
||||
bandwidth on remote connections.
|
||||
|
||||
0.2 - 2007-02-26
|
||||
- Fixed POSIX compliance: replaced realpath() with my
|
||||
own implementation, and gettimeofday() is not
|
||||
required anymore (but highly recommended)
|
||||
- Added a warning for terminals smaller than 60x16
|
||||
- Mountpoints (or any other directory pointing to
|
||||
another filesystem) are now considered to be
|
||||
directories rather than files.
|
||||
|
||||
0.1 - 2007-02-21
|
||||
- Initial version
|
||||
255
dat/ncdu-jsonfmt
255
dat/ncdu-jsonfmt
|
|
@ -1,255 +0,0 @@
|
|||
=pod
|
||||
|
||||
This document describes the file format that ncdu 1.9 and later use for the
|
||||
export/import feature (the C<-o> and C<-f> options). Check the L<ncdu
|
||||
manual|https://dev.yorhel.nl/ncdu/man> for a description on how to use that
|
||||
feature.
|
||||
|
||||
=head2 Top-level object
|
||||
|
||||
Ncdu uses L<JSON|http://json.org/> notation as its data format. The top-level
|
||||
object is an array:
|
||||
|
||||
[
|
||||
<majorver>,
|
||||
<minorver>,
|
||||
<metadata>,
|
||||
<directory>
|
||||
]
|
||||
|
||||
=head2 Versioning
|
||||
|
||||
The C<< <majorver> >> and C<< <minorver> >> elements indicate the version of
|
||||
the file format. These are numbers with accepted values in the range of C<< 0
|
||||
<= version <= 10000 >>. Major version must be C<1>. Minor version is C<0> for
|
||||
ncdu 1.9 till 1.12, and C<1> since ncdu 1.13 for the addition of the extended
|
||||
mode. The major version should increase if backwards-incompatible changes are
|
||||
made (preferably never), the minor version can be increased to indicate
|
||||
additions to the existing format.
|
||||
|
||||
=head2 Metadata
|
||||
|
||||
The C<< <metadata> >> element is a JSON object holding whatever (short)
|
||||
metadata you'd want. This block is currently (1.9-1.13) ignored by ncdu when
|
||||
importing, but it writes out the following keys when exporting:
|
||||
|
||||
=over
|
||||
|
||||
=item progname
|
||||
|
||||
String, name of the program that generated the file, i.e. C<"ncdu">.
|
||||
|
||||
=item progver
|
||||
|
||||
String, version of the program that generated the file, e.g. C<"1.10">.
|
||||
|
||||
=item timestamp
|
||||
|
||||
Number, UNIX timestamp as returned by the POSIX C<time()> function at the time
|
||||
the file was generated. Note that this may not necessarily be equivant to when
|
||||
the directory has been scanned.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Directory Info
|
||||
|
||||
A C<< <directory> >> is represented with a JSON array:
|
||||
|
||||
[
|
||||
<infoblock>,
|
||||
<directory>, <directory>, <infoblock>, ...
|
||||
]
|
||||
|
||||
That is, the first element of the array must be an C<< <infoblock> >>. If the
|
||||
directory is empty, that will be its only element. If it isn't, its
|
||||
subdirectories and files are listed in the remaining elements. Each
|
||||
subdirectory is represented as a C<< <directory> >> array again, and each file
|
||||
is represented as just an C<< <infoblock> >> object.
|
||||
|
||||
=head2 The Info Object
|
||||
|
||||
An C<< <infoblock> >> is a JSON object holding information about a file or
|
||||
directory. The following fields are supported:
|
||||
|
||||
=over
|
||||
|
||||
=item name
|
||||
|
||||
String I<(required)>. Name of the file/dir. For the top-level directory (that
|
||||
is, the C<< <directory> >> item in the top-level JSON array), this should be
|
||||
the full absolute filesystem path, e.g. C<"/media/harddrive">. For any items
|
||||
below the top-level directory, the name should be just the name of the item.
|
||||
|
||||
The name will be in the same encoding as reported by the filesystem (i.e.
|
||||
L<readdir()|http://manned.org/readdir.3>). The name may not exceed 32768 bytes.
|
||||
|
||||
=item asize
|
||||
|
||||
Number. The apparent file size, as reported by C<lstat().st_size>. If absent, 0
|
||||
is assumed. Accepted values are in the range of C<< 0 <= asize < 2^63 >>.
|
||||
|
||||
=item dsize
|
||||
|
||||
Number. Size of the file, as consumed on the disk. This is obtained through
|
||||
C<lstat().st_blocks*S_BLKSIZE>. If absent, 0 is assumed. Accepted values are in
|
||||
the range of C<< 0 <= dsize < 2^63 >>.
|
||||
|
||||
=item dev
|
||||
|
||||
Number. The device ID. Has to be a unique ID within the context of the exported
|
||||
dump, but may not have any meaning outside of that. I.e. this can be a
|
||||
serialization of C<lstat().st_dev>, but also a randomly generated number only
|
||||
used within this file. As long as it uniquely identifies the device/filesystem
|
||||
on which this file is stored. This field may be absent, in which case it is
|
||||
equivalent to that of the parent directory. If this field is absent for the
|
||||
parent directory, a value of 0 is assumed. Accepted values are in the range of
|
||||
C<< 0 <= dev < 2^64 >>.
|
||||
|
||||
=item ino
|
||||
|
||||
Number. Inode number as reported by C<lstat().st_ino>. Together with the Device
|
||||
ID this uniquely identifies a file in this dump. In the case of hard links, two
|
||||
objects may appear with the same (C<dev>,C<ino>) combination. A value of 0 is
|
||||
assumed if this field is absent. This is currently (ncdu 1.9-1.13) not a
|
||||
problem as long as the C<hlnkc> field is false, otherwise it will consider
|
||||
everything with the same C<dev> and empty C<ino> values as a single hardlinked
|
||||
file. Accepted values are in the range of C<< 0 <= ino < 2^64 >>.
|
||||
|
||||
=item hlnkc
|
||||
|
||||
Boolean. C<true> if this is a file with C<< lstat().st_nlink > 1 >>. If absent,
|
||||
C<false> is assumed.
|
||||
|
||||
=item read_error
|
||||
|
||||
Boolean. C<true> if something went wrong while reading this entry. I.e. the
|
||||
information in this entry may not be complete. For files, this indicates that
|
||||
the C<lstat()> call failed. For directories, this means that an error occurred
|
||||
while obtaining the file listing, and some items may be missing. Note that if
|
||||
C<lstat()> failed, ncdu has no way of knowing whether an item is a file or a
|
||||
directory, so a file with C<read_error> set might as well be a directory. If
|
||||
absent, C<false> is assumed.
|
||||
|
||||
=item excluded
|
||||
|
||||
String. Set if this file or directory is to be excluded from calculation for
|
||||
some reason. The following values are recognized:
|
||||
|
||||
=over
|
||||
|
||||
=item C<"pattern">
|
||||
|
||||
If the path matched an exclude pattern.
|
||||
|
||||
=item C<"otherfs">
|
||||
|
||||
If the item is on a different device/filesystem.
|
||||
|
||||
=back
|
||||
|
||||
Excluded items may still be included in the export, but only by name. C<size>,
|
||||
C<asize> and other information may be absent. If this item was excluded by a
|
||||
pattern, ncdu will not do an C<lstat()> on it, and may thus report this item as
|
||||
a file even if it is a directory.
|
||||
|
||||
Other values than mentioned above are accepted by ncdu, but are currently
|
||||
interpreted to be equivalent to "pattern". This field should be absent if the
|
||||
item has not been excluded from the calculation.
|
||||
|
||||
=item notreg
|
||||
|
||||
Boolean. This is C<true> if neither S_ISREG() nor S_ISDIR() evaluates to true.
|
||||
I.e. this is a symlink, character device, block device, FIFO, socket, or
|
||||
whatever else your system may support. If absent, C<false> is assumed.
|
||||
|
||||
=back
|
||||
|
||||
=head3 Extended information
|
||||
|
||||
In addition, the following fields are exported when I<extended information>
|
||||
mode is enabled (available since ncdu 1.13). See the C<-e> flag in L<ncdu(1)>
|
||||
for details.
|
||||
|
||||
=over
|
||||
|
||||
=item uid
|
||||
|
||||
Number, user ID who owns the file. Accepted values are in the range
|
||||
C<< 0 <= uid < 2^31 >>.
|
||||
|
||||
=item gid
|
||||
|
||||
Number, group ID who owns the file. Accepted values are in the range
|
||||
C<< 0 <= uid < 2^31 >>.
|
||||
|
||||
=item mode
|
||||
|
||||
Number, the raw file mode as returned by L<lstat(3)>. For Linux systems, see
|
||||
L<inode(7)> for the interpretation of this field. Accepted range:
|
||||
C<< 0 <= mode < 2^16 >>.
|
||||
|
||||
=item mtime
|
||||
|
||||
Number, last modification time as a UNIX timestamp. Accepted range:
|
||||
C<< 0 <= mtime < 2^64 >>.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Miscellaneous notes
|
||||
|
||||
As mentioned above, file/directory names are B<not> converted to any specific
|
||||
encoding when exporting. If you want the exported info dump to be valid JSON
|
||||
(and thus valid UTF-8), you'll have to ensure that you have either no non-UTF-8
|
||||
filenames in your filesystem, or you should process the dump through a
|
||||
conversion utility such as C<iconv>. When browsing an imported file with ncdu,
|
||||
you'll usually want to ensure that the filenames are in the same encoding as
|
||||
what your terminal is expecting. The browsing interface may look garbled or
|
||||
otherwise ugly if that's not the case.
|
||||
|
||||
Another important thing to keep in mind is that an export can be fairly large.
|
||||
If you write a program that reads a file in this format and you care about
|
||||
handling directories with several million files, make sure to optimize for
|
||||
that. For example, prefer the use of a stream-based JSON parser over a JSON
|
||||
library that reads the entire file in a single generic data structure, and only
|
||||
keep the minimum amount of data that you care about in memory.
|
||||
|
||||
=head2 Example Export
|
||||
|
||||
Here's a simple example export that displays the basic structure of the format.
|
||||
|
||||
[
|
||||
1,
|
||||
0,
|
||||
{
|
||||
"progname" : "ncdu",
|
||||
"progver" : "1.9",
|
||||
"timestamp" : 1354477149
|
||||
},
|
||||
[
|
||||
{ "name" : "/media/harddrive",
|
||||
"dsize" : 4096,
|
||||
"asize" : 422,
|
||||
"dev" : 39123423,
|
||||
"ino" : 29342345
|
||||
},
|
||||
{ "name" : "SomeFile",
|
||||
"dsize" : 32768,
|
||||
"asize" : 32414,
|
||||
"ino" : 91245479284
|
||||
},
|
||||
[
|
||||
{ "name" : "EmptyDir",
|
||||
"dsize" : 4096,
|
||||
"asize" : 10,
|
||||
"ino" : 3924
|
||||
}
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
The directory described above has the following structure:
|
||||
|
||||
/media/harddrive
|
||||
├── SomeFile
|
||||
└── EmptyDir
|
||||
|
||||
425
dat/ncdu-man
425
dat/ncdu-man
|
|
@ -1,425 +0,0 @@
|
|||
=head1 NAME
|
||||
|
||||
B<ncdu> - NCurses Disk Usage
|
||||
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<ncdu> [I<options>] I<dir>
|
||||
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
ncdu (NCurses Disk Usage) is a curses-based version of the well-known 'du', and
|
||||
provides a fast way to see what directories are using your disk space.
|
||||
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
=head2 Mode Selection
|
||||
|
||||
=over
|
||||
|
||||
=item -h, --help
|
||||
|
||||
Print a short help message and quit.
|
||||
|
||||
=item -v, -V, --version
|
||||
|
||||
Print ncdu version and quit.
|
||||
|
||||
=item -f I<FILE>
|
||||
|
||||
Load the given file, which has earlier been created with the C<-o> option. If
|
||||
I<FILE> is equivalent to C<->, the file is read from standard input.
|
||||
|
||||
For the sake of preventing a screw-up, the current version of ncdu will assume
|
||||
that the directory information in the imported file does not represent the
|
||||
filesystem on which the file is being imported. That is, the refresh, file
|
||||
deletion and shell spawning options in the browser will be disabled.
|
||||
|
||||
=item I<dir>
|
||||
|
||||
Scan the given directory.
|
||||
|
||||
=item -o I<FILE>
|
||||
|
||||
Export all necessary information to I<FILE> instead of opening the browser
|
||||
interface. If I<FILE> is C<->, the data is written to standard output. See the
|
||||
examples section below for some handy use cases.
|
||||
|
||||
Be warned that the exported data may grow quite large when exporting a
|
||||
directory with many files. 10.000 files will get you an export in the order of
|
||||
600 to 700 KiB uncompressed, or a little over 100 KiB when compressed with
|
||||
gzip. This scales linearly, so be prepared to handle a few tens of megabytes
|
||||
when dealing with millions of files.
|
||||
|
||||
=item -e
|
||||
|
||||
Enable extended information mode. This will, in addition to the usual file
|
||||
information, also read the ownership, permissions and last modification time
|
||||
for each file. This will result in higher memory usage (by roughly ~30%) and in
|
||||
a larger output file when exporting.
|
||||
|
||||
When using the file export/import function, this flag will need to be added
|
||||
both when exporting (to make sure the information is added to the export), and
|
||||
when importing (to read this extra information in memory). This flag has no
|
||||
effect when importing a file that has been exported without the extended
|
||||
information.
|
||||
|
||||
This enables viewing and sorting by the latest child mtime, or modified time,
|
||||
using 'm' and 'M', respectively.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Interface options
|
||||
|
||||
=over
|
||||
|
||||
=item -0
|
||||
|
||||
Don't give any feedback while scanning a directory or importing a file, other
|
||||
than when a fatal error occurs. Ncurses will not be initialized until the scan
|
||||
is complete. When exporting the data with C<-o>, ncurses will not be
|
||||
initialized at all. This option is the default when exporting to standard
|
||||
output.
|
||||
|
||||
=item -1
|
||||
|
||||
Similar to C<-0>, but does give feedback on the scanning progress with a single
|
||||
line of output. This option is the default when exporting to a file.
|
||||
|
||||
In some cases, the ncurses browser interface which you'll see after the
|
||||
scan/import is complete may look garbled when using this option. If you're not
|
||||
exporting to a file, C<-2> is probably a better choice.
|
||||
|
||||
=item -2
|
||||
|
||||
Provide a full-screen ncurses interface while scanning a directory or importing
|
||||
a file. This is the only interface that provides feedback on any non-fatal
|
||||
errors while scanning.
|
||||
|
||||
=item -q
|
||||
|
||||
Quiet mode. While scanning or importing the directory, ncdu will update the
|
||||
screen 10 times a second by default, this will be decreased to once every 2
|
||||
seconds in quiet mode. Use this feature to save bandwidth over remote
|
||||
connections. This option has no effect when C<-0> is used.
|
||||
|
||||
=item -r
|
||||
|
||||
Read-only mode. This will disable the built-in file deletion feature. This
|
||||
option has no effect when C<-o> is used, because there will not be a browser
|
||||
interface in that case. It has no effect when C<-f> is used, either, because
|
||||
the deletion feature is disabled in that case anyway.
|
||||
|
||||
WARNING: This option will only prevent deletion through the file browser. It is
|
||||
still possible to spawn a shell from ncdu and delete or modify files from
|
||||
there. To disable that feature as well, pass the C<-r> option twice (see
|
||||
C<-rr>).
|
||||
|
||||
=item -rr
|
||||
|
||||
In addition to C<-r>, this will also disable the shell spawning feature of the
|
||||
file browser.
|
||||
|
||||
=item --si
|
||||
|
||||
List sizes using base 10 prefixes, that is, powers of 1000 (KB, MB, etc), as
|
||||
defined in the International System of Units (SI), instead of the usual base 2
|
||||
prefixes, that is, powers of 1024 (KiB, MiB, etc).
|
||||
|
||||
=item --confirm-quit
|
||||
|
||||
Requires a confirmation before quitting ncdu. Very helpful when you
|
||||
accidentally press 'q' during or after a very long scan.
|
||||
|
||||
=item --color I<SCHEME>
|
||||
|
||||
Select a color scheme. Currently only two schemes are recognized: I<off> to
|
||||
disable colors (the default) and I<dark> for a color scheme intended for dark
|
||||
backgrounds.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Scan Options
|
||||
|
||||
These options affect the scanning progress, and have no effect when importing
|
||||
directory information from a file.
|
||||
|
||||
=over
|
||||
|
||||
=item -x
|
||||
|
||||
Do not cross filesystem boundaries, i.e. only count files and directories on
|
||||
the same filesystem as the directory being scanned.
|
||||
|
||||
=item --exclude I<PATTERN>
|
||||
|
||||
Exclude files that match I<PATTERN>. The files will still be displayed by
|
||||
default, but are not counted towards the disk usage statistics. This argument
|
||||
can be added multiple times to add more patterns.
|
||||
|
||||
=item -X I<FILE>, --exclude-from I<FILE>
|
||||
|
||||
Exclude files that match any pattern in I<FILE>. Patterns should be separated
|
||||
by a newline.
|
||||
|
||||
=item --exclude-caches
|
||||
|
||||
Exclude directories containing CACHEDIR.TAG. The directories will still be
|
||||
displayed, but not their content, and they are not counted towards the disk
|
||||
usage statistics.
|
||||
See http://www.brynosaurus.com/cachedir/
|
||||
|
||||
=item -L, --follow-symlinks
|
||||
|
||||
Follow symlinks and count the size of the file they point to. As of ncdu 1.14,
|
||||
this option will not follow symlinks to directories and will count each
|
||||
symlinked file as a unique file (i.e. unlike how hard links are handled). This
|
||||
is subject to change in later versions.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head1 KEYS
|
||||
|
||||
=over
|
||||
|
||||
=item ?
|
||||
|
||||
Show help + keys + about screen
|
||||
|
||||
=item up, down j, k
|
||||
|
||||
Cycle through the items
|
||||
|
||||
=item right, enter, l
|
||||
|
||||
Open selected directory
|
||||
|
||||
=item left, <, h
|
||||
|
||||
Go to parent directory
|
||||
|
||||
=item n
|
||||
|
||||
Order by filename (press again for descending order)
|
||||
|
||||
=item s
|
||||
|
||||
Order by filesize (press again for descending order)
|
||||
|
||||
=item C
|
||||
|
||||
Order by number of items (press again for descending order)
|
||||
|
||||
=item a
|
||||
|
||||
Toggle between showing disk usage and showing apparent size.
|
||||
|
||||
=item M
|
||||
|
||||
Order by latest child mtime, or modified time. (press again for descending order)
|
||||
Requires the -e flag.
|
||||
|
||||
=item d
|
||||
|
||||
Delete the selected file or directory. An error message will be shown when the
|
||||
contents of the directory do not match or do not exist anymore on the
|
||||
filesystem.
|
||||
|
||||
=item t
|
||||
|
||||
Toggle dirs before files when sorting.
|
||||
|
||||
=item g
|
||||
|
||||
Toggle between showing percentage, graph, both, or none. Percentage is relative
|
||||
to the size of the current directory, graph is relative to the largest item in
|
||||
the current directory.
|
||||
|
||||
=item c
|
||||
|
||||
Toggle display of child item counts.
|
||||
|
||||
=item m
|
||||
|
||||
Toggle display of latest child mtime, or modified time. Requires the -e flag.
|
||||
|
||||
=item e
|
||||
|
||||
Show/hide 'hidden' or 'excluded' files and directories. Please note that even
|
||||
though you can't see the hidden files and directories, they are still there and
|
||||
they are still included in the directory sizes. If you suspect that the totals
|
||||
shown at the bottom of the screen are not correct, make sure you haven't
|
||||
enabled this option.
|
||||
|
||||
=item i
|
||||
|
||||
Show information about the current selected item.
|
||||
|
||||
=item r
|
||||
|
||||
Refresh/recalculate the current directory.
|
||||
|
||||
=item b
|
||||
|
||||
Spawn shell in current directory.
|
||||
|
||||
Ncdu will determine your preferred shell from the C<NCDU_SHELL> or C<SHELL>
|
||||
variable (in that order), or will call C</bin/sh> if neither are set. This
|
||||
allows you to also configure another command to be run when he 'b' key is
|
||||
pressed. For example, to spawn the L<vifm(1)> file manager instead of a shell,
|
||||
run ncdu as follows:
|
||||
|
||||
export NCDU_SHELL=vifm
|
||||
ncdu
|
||||
|
||||
=item q
|
||||
|
||||
Quit
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head1 FILE FLAGS
|
||||
|
||||
Entries in the browser interface may be prefixed by a one-character flag. These
|
||||
flags have the following meaning:
|
||||
|
||||
=over
|
||||
|
||||
=item !
|
||||
|
||||
An error occurred while reading this directory.
|
||||
|
||||
=item .
|
||||
|
||||
An error occurred while reading a subdirectory, so the indicated size may not be
|
||||
correct.
|
||||
|
||||
=item <
|
||||
|
||||
File or directory is excluded from the statistics by using exlude patterns.
|
||||
|
||||
=item >
|
||||
|
||||
Directory is on another filesystem.
|
||||
|
||||
=item @
|
||||
|
||||
This is neither a file nor a folder (symlink, socket, ...).
|
||||
|
||||
=item H
|
||||
|
||||
Same file was already counted (hard link).
|
||||
|
||||
=item e
|
||||
|
||||
Empty directory.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
To scan and browse the directory you're currently in, all you need is a simple:
|
||||
|
||||
ncdu
|
||||
|
||||
If you want to scan a full filesystem, your root filesystem, for example, then
|
||||
you'll want to use C<-x>:
|
||||
|
||||
ncdu -x /
|
||||
|
||||
Since scanning a large directory may take a while, you can scan a directory and
|
||||
export the results for later viewing:
|
||||
|
||||
ncdu -1xo- / | gzip >export.gz
|
||||
# ...some time later:
|
||||
zcat export.gz | ncdu -f-
|
||||
|
||||
To export from a cron job, make sure to replace C<-1> with C<-0> to suppress
|
||||
any unnecessary output.
|
||||
|
||||
You can also export a directory and browse it once scanning is done:
|
||||
|
||||
ncdu -o- | tee export.file | ./ncdu -f-
|
||||
|
||||
The same is possible with gzip compression, but is a bit kludgey:
|
||||
|
||||
ncdu -o- | gzip | tee export.gz | gunzip | ./ncdu -f-
|
||||
|
||||
To scan a system remotely, but browse through the files locally:
|
||||
|
||||
ssh -C user@system ncdu -o- / | ./ncdu -f-
|
||||
|
||||
The C<-C> option to ssh enables compression, which will be very useful over
|
||||
slow links. Remote scanning and local viewing has two major advantages when
|
||||
compared to running ncdu directly on the remote system: You can browse through
|
||||
the scanned directory on the local system without any network latency, and ncdu
|
||||
does not keep the entire directory structure in memory when exporting, so you
|
||||
won't consume much memory on the remote system.
|
||||
|
||||
|
||||
=head1 HARD LINKS
|
||||
|
||||
Every disk usage analysis utility has its own way of (not) counting hard links.
|
||||
There does not seem to be any universally agreed method of handling hard links,
|
||||
and it is even inconsistent among different versions of ncdu. This section
|
||||
explains what each version of ncdu does.
|
||||
|
||||
ncdu 1.5 and below does not support any hard link detection at all: each link
|
||||
is considered a separate inode and its size is counted for every link. This
|
||||
means that the displayed directory sizes are incorrect when analyzing
|
||||
directories which contain hard links.
|
||||
|
||||
ncdu 1.6 has basic hard link detection: When a link to a previously encountered
|
||||
inode is detected, the link is considered to have a file size of zero bytes.
|
||||
Its size is not counted again, and the link is indicated in the browser
|
||||
interface with a 'H' mark. The displayed directory sizes are only correct when
|
||||
all links to an inode reside within that directory. When this is not the case,
|
||||
the sizes may or may not be correct, depending on which links were considered
|
||||
as "duplicate" and which as "original". The indicated size of the topmost
|
||||
directory (that is, the one specified on the command line upon starting ncdu)
|
||||
is always correct.
|
||||
|
||||
ncdu 1.7 and later has improved hard link detection. Each file that has more
|
||||
than two links has the "H" mark visible in the browser interface. Each hard
|
||||
link is counted exactly once for every directory it appears in. The indicated
|
||||
size of each directory is therefore, correctly, the sum of the sizes of all
|
||||
unique inodes that can be found in that directory. Note, however, that this may
|
||||
not always be same as the space that will be reclaimed after deleting the
|
||||
directory, as some inodes may still be accessible from hard links outside it.
|
||||
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Directory hard links are not supported. They will not be detected as being hard
|
||||
links, and will thus be scanned and counted multiple times.
|
||||
|
||||
Some minor glitches may appear when displaying filenames that contain multibyte
|
||||
or multicolumn characters.
|
||||
|
||||
All sizes are internally represented as a signed 64bit integer. If you have a
|
||||
directory larger than 8 EiB minus one byte, ncdu will clip its size to 8 EiB
|
||||
minus one byte. When deleting items in a directory with a clipped size, the
|
||||
resulting sizes will be incorrect.
|
||||
|
||||
Item counts are stored in a signed 32-bit integer without overflow detection.
|
||||
If you have a directory with more than 2 billion files, quite literally
|
||||
anything can happen.
|
||||
|
||||
Please report any other bugs you may find at the bug tracker, which can be
|
||||
found on the web site at https://dev.yorhel.nl/ncdu
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Written by Yoran Heling <projects@yorhel.nl>.
|
||||
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<du(1)>
|
||||
29
dat/ncdu-scr
29
dat/ncdu-scr
|
|
@ -1,29 +0,0 @@
|
|||
=pod
|
||||
|
||||
These screenshots were made with ncdu 1.13 with the C<--color=dark> option.
|
||||
Colors are not available in older versions and (in 1.13) still disabled by
|
||||
default.
|
||||
|
||||
=head2 Scanning...
|
||||
|
||||
[img scr ncduscan-2.png Ncdu scanning a large directory.]
|
||||
|
||||
=head2 Done scanning
|
||||
|
||||
[img scr ncdudone-2.png Ncdu done scanning a large directory.]
|
||||
|
||||
=head2 Directory information
|
||||
|
||||
[img scr ncduinfo-2.png Ncdu displaying directory information.]
|
||||
|
||||
=head2 Delete confirmation
|
||||
|
||||
[img scr ncduconfirm-2.png Ncdu asking for confirmation to delete a file.]
|
||||
|
||||
=head2 Help screen
|
||||
|
||||
[img scr ncduhelp1-2.png Ncdu help screen.]
|
||||
|
||||
=head2 About screen
|
||||
|
||||
[img scr ncduhelp2-2.png Ncdu about screen.]
|
||||
73
dat/ncdu.md
Normal file
73
dat/ncdu.md
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
% NCurses Disk Usage
|
||||
|
||||
Ncdu is a disk usage analyzer with an ncurses interface. It is designed to find
|
||||
space hogs on a remote server where you don't have an entire graphical setup
|
||||
available, but it is a useful tool even on regular desktop systems. Ncdu aims
|
||||
to be fast, simple and easy to use, and should be able to run in any minimal
|
||||
POSIX-like environment with ncurses installed.
|
||||
|
||||
## Download
|
||||
|
||||
Latest version
|
||||
: 1.14 ([dllink ncdu-1.14.tar.gz] - [changes](/ncdu/changes))
|
||||
|
||||
I also have convenient static binaries for Linux
|
||||
[i486](/download/ncdu-linux-i486-1.14.tar.gz) and
|
||||
[ARM](/download/ncdu-linux-arm-1.14.tar.gz). Download, extract and run; no
|
||||
compilation or installation necessary (uses
|
||||
[musl](http://www.musl-libc.org/)).
|
||||
|
||||
Development version
|
||||
: The most recent code is available on a git repository and can be cloned
|
||||
with `git clone git://g.blicky.net/ncdu.git/`. The repository is also available
|
||||
for [online browsing](https://g.blicky.net/ncdu.git/).
|
||||
|
||||
Ncdu is entirely written in C and available under a liberal MIT license.
|
||||
|
||||
## Packages and ports
|
||||
|
||||
Ncdu has been packaged for quite a few systems, here's a list of the ones I am aware of:
|
||||
|
||||
[AIX](http://www.perzl.org/aix/index.php?n=Main.Ncdu) -
|
||||
[Alpine Linux](http://pkgs.alpinelinux.org/packages?name=ncdu) -
|
||||
[ALT Linux](http://sisyphus.ru/en/srpm/ncdu) -
|
||||
[Arch Linux](https://www.archlinux.org/packages/?q=ncdu) -
|
||||
[CRUX](https://crux.nu/portdb/?q=ncdu&a=search) -
|
||||
[Cygwin](https://cygwin.com/cgi-bin2/package-grep.cgi?grep=ncdu) -
|
||||
[Debian](http://packages.debian.org/ncdu) -
|
||||
[Fedora](https://apps.fedoraproject.org/packages/ncdu) -
|
||||
[FreeBSD](https://www.freebsd.org/cgi/ports.cgi?query=ncdu&stype=all) -
|
||||
[Frugalware](http://frugalware.org/packages/?op=pkg&srch=ncdu&arch=all&ver=all) -
|
||||
[Gentoo](https://packages.gentoo.org/packages/sys-fs/ncdu) -
|
||||
[GNU Guix](https://www.gnu.org/software/guix/package-list.html) -
|
||||
[OpenBSD](http://cvsweb.openbsd.org/cgi-bin/cvsweb/ports/sysutils/ncdu/) -
|
||||
Mac OS X ([Fink](http://pdb.finkproject.org/pdb/package.php/ncdu) - [Homebrew](https://formulae.brew.sh/formula/ncdu) - [MacPorts](http://www.macports.org/ports.php?by=name&substr=ncdu)) -
|
||||
[Puppy Linux](http://www.murga-linux.com/puppy/viewtopic.php?t=35024) -
|
||||
[Solaris](http://www.opencsw.org/packages/ncdu) -
|
||||
[Slackware](http://slackbuilds.org/repository/14.2/system/ncdu/) -
|
||||
[Slax Linux](http://www.slax.org/modules.php?detail=ncdu) -
|
||||
[Ubuntu](http://packages.ubuntu.com/search?searchon=sourcenames&keywords=ncdu) -
|
||||
[Void Linux](https://voidlinux.org/packages/)
|
||||
|
||||
Packages for RHEL and (open)SUSE can be found on the
|
||||
[Open Build Service](https://software.opensuse.org//download.html?project=utilities&package=ncdu).
|
||||
|
||||
Packages for NetBSD, DragonFlyBSD, MirBSD and others can be found on
|
||||
[pkgsrc](http://pkgsrc.se/sysutils/ncdu).
|
||||
|
||||
A port to z/OS is available [here](https://dovetail.com/community/ncdu.html).
|
||||
|
||||
## Similar projects
|
||||
|
||||
- [Duc](http://duc.zevv.nl/) - Multiple user interfaces.
|
||||
- [gt5](http://gt5.sourceforge.net/) - Quite similar to ncdu, but a different approach.
|
||||
- [tdu](http://webonastick.com/tdu/) - Another small ncurses-based disk usage visualization utility.
|
||||
- [TreeSize](http://treesize.sourceforge.net/) - GTK, using a treeview.
|
||||
- [Baobab](http://www.marzocca.net/linux/baobab.html) - GTK, using pie-charts, a treeview and a treemap. Comes with GNOME.
|
||||
- [GdMap](http://gdmap.sourceforge.net/) - GTK, with a treemap display.
|
||||
- [Filelight](http://www.methylblue.com/filelight/) - KDE, using pie-charts.
|
||||
- [KDirStat](http://kdirstat.sourceforge.net/) - KDE, with a treemap display.
|
||||
- [QDiskUsage](http://qt-apps.org/content/show.php/QDiskUsage?content=107012) - Qt, using pie-charts.
|
||||
- [xdiskusage](http://xdiskusage.sourceforge.net/) - FLTK, with a treemap display.
|
||||
- [fsv](http://fsv.sourceforge.net/) - 3D visualization.
|
||||
- [Philesight](http://zevv.nl/play/code/philesight/) - Web-based clone of Filelight.
|
||||
220
dat/ncdu/jsonfmt.md
Normal file
220
dat/ncdu/jsonfmt.md
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
% Ncdu Export File Format
|
||||
|
||||
This document describes the file format that ncdu 1.9 and later use for the
|
||||
export/import feature (the `-o` and `-f` options). Check the [ncdu
|
||||
manual](/ncdu/man) for a description on how to use that feature.
|
||||
|
||||
## Top-level object
|
||||
|
||||
Ncdu uses [JSON](http://json.org/) notation as its data format. The top-level
|
||||
object is an array:
|
||||
|
||||
[
|
||||
<majorver>,
|
||||
<minorver>,
|
||||
<metadata>,
|
||||
<directory>
|
||||
]
|
||||
|
||||
## Versioning
|
||||
|
||||
The `<majorver>` and `<minorver>` elements indicate the version of
|
||||
the file format. These are numbers with accepted values in the range of `0
|
||||
<= version <= 10000`. Major version must be `1`. Minor version is `0` for
|
||||
ncdu 1.9 till 1.12, and `1` since ncdu 1.13 for the addition of the extended
|
||||
mode. The major version should increase if backwards-incompatible changes are
|
||||
made (preferably never), the minor version can be increased to indicate
|
||||
additions to the existing format.
|
||||
|
||||
## Metadata
|
||||
|
||||
The `<metadata>` element is a JSON object holding whatever (short)
|
||||
metadata you'd want. This block is currently (1.9-1.13) ignored by ncdu when
|
||||
importing, but it writes out the following keys when exporting:
|
||||
|
||||
progname
|
||||
: String, name of the program that generated the file, i.e. `"ncdu"`.
|
||||
|
||||
progver
|
||||
: String, version of the program that generated the file, e.g. `"1.10"`.
|
||||
|
||||
timestamp
|
||||
: Number, UNIX timestamp as returned by the POSIX `time()` function at the time
|
||||
the file was generated. Note that this may not necessarily be equivant to when
|
||||
the directory has been scanned.
|
||||
|
||||
## Directory Info
|
||||
|
||||
A `<directory>` is represented with a JSON array:
|
||||
|
||||
[
|
||||
<infoblock>,
|
||||
<directory>, <directory>, <infoblock>, ...
|
||||
]
|
||||
|
||||
That is, the first element of the array must be an `<infoblock>`. If the
|
||||
directory is empty, that will be its only element. If it isn't, its
|
||||
subdirectories and files are listed in the remaining elements. Each
|
||||
subdirectory is represented as a `<directory>` array again, and each file
|
||||
is represented as just an `<infoblock>` object.
|
||||
|
||||
## The Info Object
|
||||
|
||||
An `<infoblock>` is a JSON object holding information about a file or
|
||||
directory. The following fields are supported:
|
||||
|
||||
name
|
||||
: String _(required)_. Name of the file/dir. For the top-level directory (that
|
||||
is, the `<directory>` item in the top-level JSON array), this should be
|
||||
the full absolute filesystem path, e.g. `"/media/harddrive"`. For any items
|
||||
below the top-level directory, the name should be just the name of the item.
|
||||
|
||||
The name will be in the same encoding as reported by the filesystem (i.e.
|
||||
[readdir()](http://manned.org/readdir.3)). The name may not exceed 32768 bytes.
|
||||
|
||||
asize
|
||||
: Number. The apparent file size, as reported by `lstat().st_size`. If absent, 0
|
||||
is assumed. Accepted values are in the range of `0 <= asize < 2^63`.
|
||||
|
||||
dsize
|
||||
: Number. Size of the file, as consumed on the disk. This is obtained through
|
||||
`lstat().st_blocks*S_BLKSIZE`. If absent, 0 is assumed. Accepted values are in
|
||||
the range of `0 <= dsize < 2^63`.
|
||||
|
||||
dev
|
||||
: Number. The device ID. Has to be a unique ID within the context of the exported
|
||||
dump, but may not have any meaning outside of that. I.e. this can be a
|
||||
serialization of `lstat().st_dev`, but also a randomly generated number only
|
||||
used within this file. As long as it uniquely identifies the device/filesystem
|
||||
on which this file is stored. This field may be absent, in which case it is
|
||||
equivalent to that of the parent directory. If this field is absent for the
|
||||
parent directory, a value of 0 is assumed. Accepted values are in the range of
|
||||
`0 <= dev < 2^64`.
|
||||
|
||||
ino
|
||||
: Number. Inode number as reported by `lstat().st_ino`. Together with the Device
|
||||
ID this uniquely identifies a file in this dump. In the case of hard links, two
|
||||
objects may appear with the same (`dev`,`ino`) combination. A value of 0 is
|
||||
assumed if this field is absent. This is currently (ncdu 1.9-1.13) not a
|
||||
problem as long as the `hlnkc` field is false, otherwise it will consider
|
||||
everything with the same `dev` and empty `ino` values as a single hardlinked
|
||||
file. Accepted values are in the range of `0 <= ino < 2^64`.
|
||||
|
||||
hlnkc
|
||||
: Boolean. `true` if this is a file with `lstat().st_nlink > 1`. If absent,
|
||||
`false` is assumed.
|
||||
|
||||
read\_error
|
||||
: Boolean. `true` if something went wrong while reading this entry. I.e. the
|
||||
information in this entry may not be complete. For files, this indicates that
|
||||
the `lstat()` call failed. For directories, this means that an error occurred
|
||||
while obtaining the file listing, and some items may be missing. Note that if
|
||||
`lstat()` failed, ncdu has no way of knowing whether an item is a file or a
|
||||
directory, so a file with `read_error` set might as well be a directory. If
|
||||
absent, `false` is assumed.
|
||||
|
||||
excluded
|
||||
: String. Set if this file or directory is to be excluded from calculation for
|
||||
some reason. The following values are recognized:
|
||||
|
||||
`"pattern"`
|
||||
: If the path matched an exclude pattern.
|
||||
|
||||
`"otherfs"`
|
||||
: If the item is on a different device/filesystem.
|
||||
|
||||
Excluded items may still be included in the export, but only by name. `size`,
|
||||
`asize` and other information may be absent. If this item was excluded by a
|
||||
pattern, ncdu will not do an `lstat()` on it, and may thus report this item as
|
||||
a file even if it is a directory.
|
||||
|
||||
Other values than mentioned above are accepted by ncdu, but are currently
|
||||
interpreted to be equivalent to "pattern". This field should be absent if the
|
||||
item has not been excluded from the calculation.
|
||||
|
||||
notreg
|
||||
: Boolean. This is `true` if neither S\_ISREG() nor S\_ISDIR() evaluates to true.
|
||||
I.e. this is a symlink, character device, block device, FIFO, socket, or
|
||||
whatever else your system may support. If absent, `false` is assumed.
|
||||
|
||||
### Extended information
|
||||
|
||||
In addition, the following fields are exported when _extended information_ mode
|
||||
is enabled (available since ncdu 1.13). See the `-e` flag in
|
||||
[ncdu(1)](/ncdu/man) for details.
|
||||
|
||||
uid
|
||||
: Number, user ID who owns the file. Accepted values are in the range
|
||||
`0 <= uid < 2^31`.
|
||||
|
||||
gid
|
||||
: Number, group ID who owns the file. Accepted values are in the range
|
||||
`0 <= uid < 2^31`.
|
||||
|
||||
mode
|
||||
: Number, the raw file mode as returned by
|
||||
[lstat(3)](https://manned.org/lstat.3). For Linux systems, see
|
||||
[inode(7)](https://manned.org/inode.7) for the interpretation of this
|
||||
field. Accepted range: `0 <= mode < 2^16`.
|
||||
|
||||
mtime
|
||||
: Number, last modification time as a UNIX timestamp. Accepted range:
|
||||
`0 <= mtime < 2^64`.
|
||||
|
||||
## Miscellaneous notes
|
||||
|
||||
As mentioned above, file/directory names are **not** converted to any specific
|
||||
encoding when exporting. If you want the exported info dump to be valid JSON
|
||||
(and thus valid UTF-8), you'll have to ensure that you have either no non-UTF-8
|
||||
filenames in your filesystem, or you should process the dump through a
|
||||
conversion utility such as `iconv`. When browsing an imported file with ncdu,
|
||||
you'll usually want to ensure that the filenames are in the same encoding as
|
||||
what your terminal is expecting. The browsing interface may look garbled or
|
||||
otherwise ugly if that's not the case.
|
||||
|
||||
Another important thing to keep in mind is that an export can be fairly large.
|
||||
If you write a program that reads a file in this format and you care about
|
||||
handling directories with several million files, make sure to optimize for
|
||||
that. For example, prefer the use of a stream-based JSON parser over a JSON
|
||||
library that reads the entire file in a single generic data structure, and only
|
||||
keep the minimum amount of data that you care about in memory.
|
||||
|
||||
## Example Export
|
||||
|
||||
Here's a simple example export that displays the basic structure of the format.
|
||||
|
||||
[
|
||||
1,
|
||||
0,
|
||||
{
|
||||
"progname" : "ncdu",
|
||||
"progver" : "1.9",
|
||||
"timestamp" : 1354477149
|
||||
},
|
||||
[
|
||||
{ "name" : "/media/harddrive",
|
||||
"dsize" : 4096,
|
||||
"asize" : 422,
|
||||
"dev" : 39123423,
|
||||
"ino" : 29342345
|
||||
},
|
||||
{ "name" : "SomeFile",
|
||||
"dsize" : 32768,
|
||||
"asize" : 32414,
|
||||
"ino" : 91245479284
|
||||
},
|
||||
[
|
||||
{ "name" : "EmptyDir",
|
||||
"dsize" : 4096,
|
||||
"asize" : 10,
|
||||
"ino" : 3924
|
||||
}
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
The directory described above has the following structure:
|
||||
|
||||
/media/harddrive
|
||||
├── SomeFile
|
||||
└── EmptyDir
|
||||
29
dat/ncdu/scr.md
Normal file
29
dat/ncdu/scr.md
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
% Ncdu Screenshots
|
||||
|
||||
These screenshots were made with ncdu 1.13 with the `--color=dark` option.
|
||||
Colors are not available in older versions and (in 1.13) still disabled by
|
||||
default.
|
||||
|
||||
## Scanning...
|
||||
|
||||

|
||||
|
||||
## Done scanning
|
||||
|
||||

|
||||
|
||||
## Directory information
|
||||
|
||||

|
||||
|
||||
## Delete confirmation
|
||||
|
||||

|
||||
|
||||
## Help screen
|
||||
|
||||

|
||||
|
||||
## About screen
|
||||
|
||||

|
||||
|
|
@ -1,54 +0,0 @@
|
|||
=pod
|
||||
|
||||
nginx-confgen is a simple preprocessor and macro system for
|
||||
L<nginx|http://nginx.org/> and nginx-like configuration files. It support
|
||||
variable substitution, macro expansion and using the output of arbitrary
|
||||
commands to generate config files.
|
||||
|
||||
=head2 Example
|
||||
|
||||
pre_set $certdir /etc/nginx-certificates/;
|
||||
|
||||
# Fetch the 'resolver' from /etc/resolv.conf
|
||||
pre_exec $nameserver "grep nameserver /etc/resolv.conf \\
|
||||
| head -n 1 | sed 's/^nameserver //'";
|
||||
resolver $nameserver;
|
||||
|
||||
# Convenient macro to create a HTTPS virtual host
|
||||
macro vhost $domain @aliases &block {
|
||||
server {
|
||||
listen [::]:443 ssl;
|
||||
server_name $domain @aliases;
|
||||
|
||||
ssl_certificate $certdir/$domain/fullchain.pem;
|
||||
ssl_certificate_key $certdir/$domain/privkey.pem;
|
||||
pre_if -f $certdir/$domain/ocsp.der {
|
||||
ssl_stapling_file $certdir/$domain/ocsp.der;
|
||||
}
|
||||
|
||||
█
|
||||
}
|
||||
}
|
||||
|
||||
vhost example.com www.example.com {
|
||||
root /var/www/example.com;
|
||||
}
|
||||
|
||||
See the L<manual|https://dev.yorhel.nl/nginx-confgen/man> for more features.
|
||||
|
||||
|
||||
=head2 Download
|
||||
|
||||
If you're on a x86_64 Linux system, you can simply use the binary:
|
||||
|
||||
curl -s https://dev.yorhel.nl/download/nginx-confgen-linux-amd64-1.2.tar.gz | tar -xzf-
|
||||
./nginx-confgen <input.conf >output.conf
|
||||
|
||||
To compile from source, install L<Haskell Stack|https://haskellstack.org/> and run:
|
||||
|
||||
git clone https://code.blicky.net/yorhel/nginx-confgen.git
|
||||
cd nginx-confgen
|
||||
stack install
|
||||
|
||||
The git repository is also available for L<online
|
||||
browsing|https://code.blicky.net/yorhel/nginx-confgen>.
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
1.2 - 2018-02-23
|
||||
- Preserve original string quoting and variable formatting
|
||||
- Fix parser to be more lenient with argument formats
|
||||
- Fix handling of the \\-escape sequence
|
||||
- Fix handling of quoting & parenthesis in 'if' directive
|
||||
- Fix handling of empty string in pre_if
|
||||
- Remove support for parenthesis around pre_if arguments
|
||||
|
||||
1.1 - 2018-01-24
|
||||
- Add pre_warn directive
|
||||
- Add -i/-o/-v/-h command line arguments
|
||||
- Add support for custom pre_include search paths (-I flag)
|
||||
- Fix handling of some common custom block directives (e.g. 'types')
|
||||
|
||||
1.0 - 2018-01-19
|
||||
- Initial version
|
||||
|
|
@ -1,233 +0,0 @@
|
|||
=pod
|
||||
|
||||
=head1 NAME
|
||||
|
||||
nginx-confgen - A preprocessor and macro system for nginx(-like) configuration
|
||||
files.
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
nginx-confgen -i input.conf -o output.conf
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
nginx-confgen can be used to do pre-processing for nginx configuration files
|
||||
(and other configuration files with a similar syntax). It has support for
|
||||
"compile-time" macro expansion and variable interpolation, which should make it
|
||||
less tedious to maintain large and complex configurations.
|
||||
|
||||
nginx-confgen works by parsing the input into a syntax tree, modifying this
|
||||
tree, and then formatting the tree to generate the output. It is completely
|
||||
oblivious to nginx contexts and directives, so it is possible to do nonsensical
|
||||
transformations and generate incorrect configuration files. Comments in the
|
||||
input file will not be present in the output. See also the L</BUGS & WARTS>
|
||||
below.
|
||||
|
||||
B<WARNING:> Do NOT use nginx-confgen with untrusted input, the C<pre_exec>
|
||||
directive allows, by design, arbitrary code execution.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
The following command-line options are supported:
|
||||
|
||||
=over
|
||||
|
||||
=item -h
|
||||
|
||||
Show help text.
|
||||
|
||||
=item -V, --version
|
||||
|
||||
Show program version.
|
||||
|
||||
=item -i I<FILE>
|
||||
|
||||
Use the given file name as input file. If this option is not given or set to
|
||||
C<->, then the file will be read from standard input.
|
||||
|
||||
=item -o I<FILE>
|
||||
|
||||
Write the output to the given file. If this option is not given or set to C<->,
|
||||
then the file will be written to standard output.
|
||||
|
||||
=item -I I<DIR>
|
||||
|
||||
Set the search path for I<pre_include> directives. This option can be given
|
||||
multiple times to search several directories in order. If this option is not
|
||||
given, then include files are resolved relative to the directory that
|
||||
nginx-confgen is run from (i.e. C<-I .>).
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head1 DIRECTIVES
|
||||
|
||||
nginx-confgen recognizes and interprets the following directives:
|
||||
|
||||
=head2 pre_include
|
||||
|
||||
Similar to the C<include> directive in nginx, except that the file is included
|
||||
during preprocessing. The included file may contain any preprocessing
|
||||
directives supported by nginx-confgen. Variables and macros defined in the
|
||||
included file will be available in the parent file.
|
||||
|
||||
Relative paths are searched for in the directories given with the C<-I> flag.
|
||||
|
||||
=head2 pre_set
|
||||
|
||||
Similar to the C<set> directive in nginx, except that variables defined with
|
||||
C<pre_set> are resolved during preprocessing. Note that variables defined with
|
||||
C<pre_set> are only available in the same scope as they are defined, for
|
||||
example:
|
||||
|
||||
pre_set $var outer;
|
||||
location / {
|
||||
pre_set $var inner;
|
||||
# $var is now "inner" within this location block.
|
||||
}
|
||||
# $var is "outer" again after the location block.
|
||||
|
||||
(This may change in the future)
|
||||
|
||||
=head2 pre_exec
|
||||
|
||||
Run a shell command, and store the output in a variable. For example, nginx
|
||||
will not use your system's DNS resolution methods to resolve domain names.
|
||||
Instead you need to manually set a C<resolver> address. With the following hack
|
||||
you can fetch the nameserver from C</etc/resolv.conf> and use that as the
|
||||
C<resolver>:
|
||||
|
||||
pre_exec $nameserver "grep nameserver /etc/resolv.conf \\
|
||||
| head -n 1 | sed 's/^nameserver //'";
|
||||
resolver $nameserver;
|
||||
|
||||
(The C<\\> is necessary, otherwise your shell will consider the newline as a
|
||||
new command).
|
||||
|
||||
=head2 pre_if
|
||||
|
||||
Similar to the C<if> directive in nginx, except that this is evaluated during
|
||||
preprocessing. Also unlike C<if>, parenthesis around the arguments are not
|
||||
supported. Some examples:
|
||||
|
||||
pre_if -f $certdir/ocsp.der {
|
||||
ssl_stapling on;
|
||||
ssl_stapling_file $certdir/ocsp.der;
|
||||
}
|
||||
pre_if !-f $certdir/ocsp.der {
|
||||
ssl_stapling off;
|
||||
}
|
||||
|
||||
# You can have different configuration depending on the name of
|
||||
# the system on which nginx-confgen runs. Like... yeah.
|
||||
pre_exec $hostname 'hostname';
|
||||
pre_if $hostname ~* ^proxy_for_(.+) {
|
||||
proxy_pass http://$1/;
|
||||
}
|
||||
|
||||
=head2 pre_warn
|
||||
|
||||
This directive, when interpreted, will generate a warning to the standard error
|
||||
of nginx-confgen. Can be used to signal that a special configuration is being
|
||||
used:
|
||||
|
||||
pre_if -e /etc/offline-mode {
|
||||
pre_warn "Putting website in offline mode!";
|
||||
}
|
||||
|
||||
Or to warn about certain directives:
|
||||
|
||||
pre_macro proxy_cache $var {
|
||||
pre_warn "Using proxy_cache with $var violates company policy!";
|
||||
|
||||
# But we can output it anyway.
|
||||
proxy_cache $var;
|
||||
}
|
||||
|
||||
=head2 macro
|
||||
|
||||
Define a I<macro>, which is a configuration block that you can later refer to.
|
||||
The general syntax is as follows:
|
||||
|
||||
macro macro_name $var1 $var2 @remaining_vars &block_var {
|
||||
# contents
|
||||
}
|
||||
|
||||
The optional C<@remaining_vars> argument will capture any number of variables,
|
||||
and can be passed to another directive inside the macro contents. The optional
|
||||
C<&block_var> allows the macro to be invoked with a block argument, which will
|
||||
expand to any number of directives. Some examples:
|
||||
|
||||
macro le {
|
||||
location /.well-known/acme-challenge {
|
||||
alias /etc/letsencrypt/challenge;
|
||||
}
|
||||
}
|
||||
# Usage:
|
||||
le;
|
||||
|
||||
macro redir $path $to {
|
||||
location $path {
|
||||
return 301 $to;
|
||||
}
|
||||
}
|
||||
# Usage:
|
||||
redir / http://blicky.net/;
|
||||
|
||||
macro vhost $primary_name @aliases &block {
|
||||
server {
|
||||
listen [::]:443 ssl;
|
||||
server_name $primary_name @aliases;
|
||||
ssl_certificate $crtdir/$primary_name/fullchain.pem;
|
||||
ssl_certificate_key $crtdir/$primary_name/privkey.pem;
|
||||
█
|
||||
}
|
||||
}
|
||||
# Usage:
|
||||
vhost example.com {
|
||||
root /var/www/example.com;
|
||||
}
|
||||
vhost example.org alias.example.org {
|
||||
root /var/www/example.org;
|
||||
}
|
||||
|
||||
Note that these are I<hygienic> macros, so variable capture is predictable (but
|
||||
not necessarily the most useful):
|
||||
|
||||
pre_var $dest /a;
|
||||
macro redir {
|
||||
# This will be /a, regardless of the context in which this macro is called.
|
||||
return 301 $dest;
|
||||
}
|
||||
# $dest is still '/a' inside the macro after this new variable definition.
|
||||
pre_var $dest /b;
|
||||
redir;
|
||||
|
||||
Similarly, macro arguments will not be available inside C<&block> expansion or
|
||||
nested macro expansion.
|
||||
|
||||
|
||||
=head1 BUGS & WARTS
|
||||
|
||||
nginx-confgen is a quickly written hack to solve a particular use case, it is
|
||||
quite likely to have some weird behavior and bugs.
|
||||
|
||||
Comments and whitespace in the input files are thrown away and ignored. The
|
||||
generated output is completely reformatted.
|
||||
|
||||
The nginx configuration syntax is not as regular as I had hoped. It's possible
|
||||
for nginx modules to extend the syntax somewhat. A good example is the I<types>
|
||||
directive in I<ngx_http_core_module>. While nginx-confgen should be able to
|
||||
handle the I<types> directive just fine, other extensions may cause syntax
|
||||
errors or will not survive a round-trip through nginx-confgen.
|
||||
This applies to all I<*_by_lua_block> directives in the I<ngx_http_lua_module>.
|
||||
The I<_by_lua> directives that accept a string should work just fine.
|
||||
|
||||
The error messages given by C<nginx-confgen> aren't always helpful.
|
||||
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
nginx-confgen is written by Yoran Heling <projects@yorhel.nl>
|
||||
|
||||
Web: L<https://dev.yorhel.nl/nginx-confgen>
|
||||
59
dat/nginx-confgen.md
Normal file
59
dat/nginx-confgen.md
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
% Nginx Configuration Generator
|
||||
|
||||
nginx-confgen is a simple preprocessor and macro system for
|
||||
[nginx](http://nginx.org/) and nginx-like configuration files. It support
|
||||
variable substitution, macro expansion and using the output of arbitrary
|
||||
commands to generate config files.
|
||||
|
||||
## Example
|
||||
|
||||
```bash
|
||||
pre_set $certdir /etc/nginx-certificates/;
|
||||
|
||||
# Fetch the 'resolver' from /etc/resolv.conf
|
||||
pre_exec $nameserver "grep nameserver /etc/resolv.conf \\
|
||||
| head -n 1 | sed 's/^nameserver //'";
|
||||
resolver $nameserver;
|
||||
|
||||
# Convenient macro to create a HTTPS virtual host
|
||||
macro vhost $domain @aliases &block {
|
||||
server {
|
||||
listen [::]:443 ssl;
|
||||
server_name $domain @aliases;
|
||||
|
||||
ssl_certificate $certdir/$domain/fullchain.pem;
|
||||
ssl_certificate_key $certdir/$domain/privkey.pem;
|
||||
pre_if -f $certdir/$domain/ocsp.der {
|
||||
ssl_stapling_file $certdir/$domain/ocsp.der;
|
||||
}
|
||||
|
||||
█
|
||||
}
|
||||
}
|
||||
|
||||
vhost example.com www.example.com {
|
||||
root /var/www/example.com;
|
||||
}
|
||||
```
|
||||
|
||||
See the [manual](/nginx-confgen/man) for more features.
|
||||
|
||||
## Download
|
||||
|
||||
If you're on a x86\_64 Linux system, you can simply use the binary:
|
||||
|
||||
```
|
||||
curl -s https://dev.yorhel.nl/download/nginx-confgen-linux-amd64-1.2.tar.gz | tar -xzf-
|
||||
./nginx-confgen <input.conf >output.conf
|
||||
```
|
||||
|
||||
To compile from source, install [Haskell Stack](https://haskellstack.org/) and run:
|
||||
|
||||
```
|
||||
git clone https://code.blicky.net/yorhel/nginx-confgen.git
|
||||
cd nginx-confgen
|
||||
stack install
|
||||
```
|
||||
|
||||
The git repository is also available for [online
|
||||
browsing](https://code.blicky.net/yorhel/nginx-confgen).
|
||||
644
dat/sqlaccess
644
dat/sqlaccess
|
|
@ -1,644 +0,0 @@
|
|||
Multi-threaded Access to an SQLite3 Database
|
||||
|
||||
=pod
|
||||
|
||||
(Published on B<2011-11-26>. Also available in L<POD|https://dev.yorhel.nl/dat/sqlaccess>.)
|
||||
|
||||
(Minor 2013-04-06 update: I abstracted my message passing solution from ncdc
|
||||
and implemented it in a POSIX C library for general use. It's called
|
||||
I<sqlasync> and is part of my L<Ylib library collection|https://dev.yorhel.nl/ylib>.)
|
||||
|
||||
=head1 Introduction
|
||||
|
||||
As I was porting L<ncdc|https://dev.yorhel.nl/ncdc> over to use SQLite3 as
|
||||
storage backend, I stumbled on a problem: The program uses a few threads for
|
||||
background jobs, and it would be nice to give these threads access to the
|
||||
database.
|
||||
|
||||
Serializing all database access through the main thread wouldn't have been very
|
||||
hard to implement in this particular case, but that would have been far from
|
||||
optimal. The main thread is also responsible for keeping the user interface
|
||||
responsive and handling most of the network interaction. Overall responsiveness
|
||||
of the program would significantly improve when the threads could access the
|
||||
database without involvement of the main thread.
|
||||
|
||||
Which brought me to the following questions: What solutions are available for
|
||||
providing multi-threaded access to an SQLite database? What problems may I run
|
||||
in to? I was unable to find a good overview in this area on the net, so I wrote
|
||||
this article with the hope to improve that situation.
|
||||
|
||||
|
||||
|
||||
|
||||
=head1 SQLite3 and threading
|
||||
|
||||
Let's first see what SQLite3 itself has to offer in terms of threading support.
|
||||
The official documentation mentions threading support several times in various
|
||||
places, but this information is scattered around and no good overview is given.
|
||||
Someone has tried to organize this before on a L<single
|
||||
page|http://www.sqlite.org/cvstrac/wiki?p=MultiThreading>, and while this
|
||||
indeed gives a nice overview, it has unfortunately not been updated since 2006.
|
||||
The advices are therefore a little on the conservative side.
|
||||
|
||||
Nonetheless, it is wise to remain portable with different SQLite versions,
|
||||
especially when writing programs that dynamically link with some random version
|
||||
installed on someone's system. It should be fairly safe to assume that SQLite
|
||||
binaries provided by most systems, if not all, are compiled with thread safety
|
||||
enabled. This doesn't mean all that much, unfortunately: The only thing
|
||||
I<thread safe> means in this context is that you can use SQLite3 in multiple
|
||||
threads, but a single database connection should still stay within a single
|
||||
thread.
|
||||
|
||||
Since SQLite 3.3.1, which was released in early 2006, it is possible to move a
|
||||
single database connection along multiple threads. Doing this with older
|
||||
versions is not advisable, as explained in L<the SQLite
|
||||
FAQ|http://www.sqlite.org/faq.html#q6>. But even with 3.3.1 and later there is
|
||||
an annoying restriction: A connection can only be passed to another thread when
|
||||
any outstanding statements are closed and finalized. In practice this means
|
||||
that it is not possible to keep a prepared statement in memory for later
|
||||
executions.
|
||||
|
||||
Since SQLite 3.5.0, released in 2007, a single SQLite connection can be used
|
||||
from multiple threads simultaneously. SQLite will internally manage locks to
|
||||
avoid any data corruption. I can't recommend making use of this facility,
|
||||
however, as there are still many issues with the API. The L<error fetching
|
||||
functions|http://www.sqlite.org/c3ref/errcode.html> and
|
||||
L<sqlite3_last_insert_row_id()|http://www.sqlite.org/c3ref/last_insert_rowid.html>,
|
||||
among others, are still useless without explicit locking in the application. I
|
||||
also believe that the previously mentioned restriction on having to finalize
|
||||
statements has been relaxed in this version, so keeping prepared statements in
|
||||
memory and passing them among different threads becomes possible.
|
||||
|
||||
When using multiple database connections within a single process, SQLite offers
|
||||
a facility to allow L<sharing of its
|
||||
cache|http://www.sqlite.org/sharedcache.html>, in order to reduce memory usage
|
||||
and disk I/O. The semantics of this feature have changed with different SQLite
|
||||
versions and appear to have stabilised in 3.5.0. This feature may prove useful
|
||||
to optimize certain situations, but does not open up new possibilities of
|
||||
communicating with a shared database.
|
||||
|
||||
|
||||
|
||||
=head1 Criteria
|
||||
|
||||
Before looking at some available solutions, let's first determine the criteria
|
||||
we can use to evaluate them.
|
||||
|
||||
=over
|
||||
|
||||
=item Implementation size
|
||||
|
||||
Obviously, a solution that requires only a few lines of code to implement is
|
||||
preferable over one that requires several levels of abstraction in order to be
|
||||
usable. I won't be giving actual implementations here, so the sizes will be
|
||||
rough estimates for comparison purposes. The actual size of an implementation
|
||||
is of course heavily dependent on the programming environment as well.
|
||||
|
||||
=item Memory/CPU overhead
|
||||
|
||||
The most efficient solution for a single-threaded application is to simply have
|
||||
direct access to a single database connection. Every solution is in principle a
|
||||
modification or extension of this idea, and will therefore add a certain
|
||||
overhead. This overhead manifests itself in both increased CPU and memory
|
||||
usage. The order of which varies between solutions.
|
||||
|
||||
=item Prepared statement re-use
|
||||
|
||||
Is it possible to prepare a statement once and keep using it for the lifetime
|
||||
of the program? Or will prepared statements have to be thrown away and
|
||||
recreated every time? Keeping statement handles in memory will result in a nice
|
||||
performance boost for applications that run the same SQL statement many times.
|
||||
|
||||
=item Transaction grouping
|
||||
|
||||
A somewhat similar issue to prepared statement re-use: From a performance point
|
||||
of view, it is very important to try to batch many UPDATE/DELETE/INSERT
|
||||
statements within a single transaction, as opposed to running each modify query
|
||||
separately. Running each query separately will force SQLite to flush the data
|
||||
to disk separately every time, whereas a single transaction will batch-flush
|
||||
all the changes to disk in a single go. Some solutions allow for grouping
|
||||
multiple statements in a single transaction quite easily, while others require
|
||||
more involved steps.
|
||||
|
||||
=item Background processing
|
||||
|
||||
In certain situations it may be desirable to queue a certain query for later
|
||||
processing, without explicitly waiting for it to complete. For example, if
|
||||
something in the database has to be modified as a result of user interaction in
|
||||
a UI thread, then the application would feel a lot more responsive if the
|
||||
UPDATE query was simply queued to be processed in a background thread than when
|
||||
the query had run in the UI thread itself. A database accessing solution with
|
||||
built-in support for background processing of queries will significantly help
|
||||
with building a responsive application.
|
||||
|
||||
=item Concurrency
|
||||
|
||||
Concurrency indicates how well the solution allows for concurrent access. The
|
||||
worst possible concurrency is achieved when a single database connection is
|
||||
used for all threads, as only a single action can be performed on the database
|
||||
at any point in time. Maximum concurrency is achieved when each thread has its
|
||||
own SQLite connection. Note that maximum concurrency doesn't mean that the
|
||||
database can be accessed in a I<fully> concurrent manner. SQLite uses internal
|
||||
database-level locks to avoid data corruption, and these will limit the actual
|
||||
maximum concurrency. I am not too knowledgeable about the inner workings of
|
||||
these locks, but it is at least possible to have a large number truly
|
||||
concurrent database I<reads>. Database I<writes> from multiple threads may
|
||||
still allow for significantly more concurrency than when they are manually
|
||||
serialized over a single database connection.
|
||||
|
||||
=item Portability
|
||||
|
||||
What is the minimum SQLite version required to implement the solution? Does it
|
||||
require any special OS features or SQLite compilation settings? As outlined
|
||||
above, different versions of SQLite offer different features with regards to
|
||||
threading. Relying one of the relatively new features will decrease
|
||||
portability.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
|
||||
|
||||
=head1 The Solutions
|
||||
|
||||
Here I present four solutions to allow database access from multiple threads.
|
||||
Note that this list may not be exhaustive, these are just a few solutions that
|
||||
I am aware of. Also note that none of the solutions presented here are in any
|
||||
way new. Most of these paradigms date back to the entire notion of concurrent
|
||||
programming, and have been applied in software since decades ago.
|
||||
|
||||
|
||||
=head2 Connection sharing
|
||||
|
||||
By far the simplest solution to implement: Keep a single database connection
|
||||
throughout your program and allow every thread to access it. Of course, you
|
||||
will need to be careful to always put locks around the code where you access
|
||||
the database handler. An example implementation could look like the following:
|
||||
|
||||
// The global SQLite connection
|
||||
sqlite3 *db;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if(sqlite3_open("database.sqlite3", &db))
|
||||
exit(1);
|
||||
|
||||
// start some threads
|
||||
// wait until the threads are finished
|
||||
|
||||
sqlite3_close(db);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *some_thread(void *arg) {
|
||||
sqlite3_mutex_enter(sqlite3_db_mutex(db));
|
||||
// Perform some queries on the database
|
||||
sqlite3_mutex_leave(sqlite3_db_mutex(db));
|
||||
}
|
||||
|
||||
=over
|
||||
|
||||
=item Implementation size
|
||||
|
||||
This is where connection sharing shines: There is little extra code required
|
||||
when compared to using a database connection in a single-threaded context. All
|
||||
you need to be careful of is to lock the mutex before using the database, and
|
||||
to unlock it again afterwards.
|
||||
|
||||
=item Memory/CPU overhead
|
||||
|
||||
As the only addition to the single-threaded case are the locks, this solution
|
||||
has practically no memory overhead. The mutexes are provided by SQLite,
|
||||
after all. CPU overhead is also as minimal as it can be: mutexes are the most
|
||||
primitive type provided by threading libraries to serialize access to a shared
|
||||
resource, and are therefore very efficient.
|
||||
|
||||
=item Prepared statement re-use
|
||||
|
||||
Prepared statements can be safely re-used inside a single enter/leave block.
|
||||
However, if you want to remain portable with SQLite versions before 3.5.0, then
|
||||
any prepared statements B<must> be freed before the mutex is unlocked. This can
|
||||
be a major downside if the enter/leave blocks themselves are relatively short
|
||||
but accessed quite often. If portability with older versions is not an issue,
|
||||
then this restriction is gone and prepared statements can be re-used easily.
|
||||
|
||||
=item Transaction grouping
|
||||
|
||||
A reliable implementation will not allow transactions to span multiple
|
||||
enter/leave blocks. So as with prepared statements, transactions need to be
|
||||
committed to disk before the mutex is unlocked. Again shared with prepared
|
||||
statement re-use is that this limitation may prove to be a significant problem
|
||||
in optimizing application performance, disk I/O in particular. One way to lower
|
||||
the effects of this limitation is to increase the size of a single enter/leave
|
||||
block, thus allowing for more work to be done in a single transaction. Code
|
||||
restructuring may be required in order to efficiently implement this. Another
|
||||
way to get around this problem is to do allow a transaction to span multiple
|
||||
enter/leave blocks. Implementing this reliably may not be an easy task,
|
||||
however, and will most likely require application-specific knowledge.
|
||||
|
||||
=item Background processing
|
||||
|
||||
Background processing is not natively supported with connection sharing. It is
|
||||
possible to spawn a background thread to perform database operations each time
|
||||
that this is desirable. But care should be taken to make sure that these
|
||||
background threads will execute dependent queries in the correct order. For
|
||||
example, if thread A spawns a background thread, say B, to execute an UPDATE
|
||||
query, and later thread A wants to read that same data back, it must first wait
|
||||
for thread B to finish execution. This may add more inter-thread communication
|
||||
than is preferable.
|
||||
|
||||
=item Concurrency
|
||||
|
||||
There is no concurrency at all here. Since the database connection is protected
|
||||
by an exclusive lock, only a single thread can operate on the database at any
|
||||
point in time. Additionally, one may be tempted to increase the size of an
|
||||
enter/leave block in order to allow for larger transactions or better re-use of
|
||||
prepared statements. However, any time spent on performing operations that do
|
||||
not directly use the database within such an enter/leave block will lower the
|
||||
maximum possible database concurrency even further.
|
||||
|
||||
=item Portability
|
||||
|
||||
Connection sharing requires at least SQLite 3.3.1 in order to pass the same
|
||||
database connection around. SQLite must be compiled with threading support
|
||||
enabled. If prepared statements are kept around outside of an enter/leave
|
||||
block, then version 3.5.0 or higher will be required.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head2 Message passing
|
||||
|
||||
An alternative approach is to allow only a single thread to access the
|
||||
database. Any other thread that wants to access the database in any way will
|
||||
then have to communicate with this database thread. This communication is done
|
||||
by sending messages (I<requests>) to the database thread, and, when query
|
||||
results are required, receiving back one or more I<response> messages.
|
||||
|
||||
Message passing schemes and libraries are available for many programming
|
||||
languages and come in many different forms. For this article, I am going to
|
||||
assume that an asynchronous and unbounded FIFO queue is used to pass around
|
||||
messages, but most of the following discussion will apply to bounded queues as
|
||||
well. I'll try to note the important differences between the two where
|
||||
applicable.
|
||||
|
||||
A very simple and naive implementation of a message passing solution is given
|
||||
below. Here I assume that C<queue_create()> will create a message queue (type
|
||||
C<message_queue>), C<queue_get()> will return the next message in the queue, or
|
||||
block if the queue is empty. C<thread_create(func, arg)> will run I<func> in a
|
||||
newly created thread and pass I<arg> as its argument. Error handling has been
|
||||
ommitted to keep this example consice.
|
||||
|
||||
void *db_thread(void *arg) {
|
||||
message_queue *q = arg;
|
||||
|
||||
sqlite3 *db;
|
||||
if(sqlite3_open("database.sqlite3", &db))
|
||||
return ERROR;
|
||||
|
||||
request_msg *m;
|
||||
while((m = queue_get(q)) {
|
||||
if(m->action == QUIT)
|
||||
break;
|
||||
if(m->action == EXEC)
|
||||
sqlite3_exec(db, m->query, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
sqlite3_close(db);
|
||||
return OK;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
message_queue *db_queue = queue_create();
|
||||
thread_create(db_thread, db_queue);
|
||||
// Do work.
|
||||
return 0;
|
||||
}
|
||||
|
||||
This example implementation has a single database thread running in the
|
||||
background that accepts the messages C<QUIT>, to stop processing queries and
|
||||
close the database, and C<EXEC>, to run a certain query on the database. No
|
||||
support is available yet for passing query results back to the thread that sent
|
||||
the message. This can be implemented by including a separate C<message_queue>
|
||||
object in the request messages, to which the results can be sent.
|
||||
|
||||
=over
|
||||
|
||||
=item Implementation size
|
||||
|
||||
This will largely depend on the used programming environment and the complexity
|
||||
of the database thread. If your environment already comes with a message queue
|
||||
implementation, and constructing the request/response messages is relatively
|
||||
simple, then a simple implementation as shown above will not require much code.
|
||||
On the other hand, if you have to implement your own message queue or want more
|
||||
intelligence in the database thread to improve efficiency, then the complete
|
||||
implementation may be significantly larger than that of connection sharing.
|
||||
|
||||
=item Memory/CPU overhead
|
||||
|
||||
Constructing and passing around messages will incur a CPU overhead, though with
|
||||
an efficient implementation this should not be significant enough to worry
|
||||
about. Memory usage is highly dependent on the size of the messages being
|
||||
passed around and the length of the queue. If messages are queued faster than
|
||||
they are processed and there is no bound on the queue length, then a process
|
||||
may quickly run of out memory. On the other hand, if messages are processed
|
||||
fast enough then the queue will generally not have more than a single message
|
||||
in it, and the memory overhead will remain fairly small.
|
||||
|
||||
=item Prepared statement re-use
|
||||
|
||||
As the database connection will never leave the database thread, prepared
|
||||
statements can be kept in memory and re-used without problems.
|
||||
|
||||
=item Transaction grouping
|
||||
|
||||
A naive but robust implementation will handle each message in its own
|
||||
transaction. A more clever database thread, however, could wait for multiple
|
||||
messages to be queued and can then batch-execute them in a single transaction.
|
||||
Correctly implementing this may require some additional information to be
|
||||
specified along with the request, such as whether the query may be combined in
|
||||
a single transaction or whether it may only be executed outside of a
|
||||
transaction. Some threads may want to have confirmation that the data has been
|
||||
successfully written to disk, in which case responsiveness will not improve if
|
||||
such actions are queued for later processing. Nonetheless, since the database
|
||||
thread has all the knowledge about the state of the database and any
|
||||
outstanding actions, transaction grouping can be implemented quite reliably.
|
||||
|
||||
=item Background processing
|
||||
|
||||
Background processing is supported natively with a message passing
|
||||
implementation: a thread that isn't interested in query results can simply
|
||||
queue the action to be performed by the database thread without indicating a
|
||||
return path for the results. Of course, if a thread queues many messages that
|
||||
do not require results followed by one that does, it will have to wait for all
|
||||
earlier messages to be processed before receiving any results for the last one.
|
||||
In the case that the actions are not dependent on each other, the database
|
||||
thread may re-order the messages in order to process the last request first.
|
||||
This requires knowledge about dependencies and may significantly complicate the
|
||||
implementation, however.
|
||||
|
||||
=item Concurrency
|
||||
|
||||
As with a shared database connection, database access is exclusive: Only a
|
||||
single action can be performed on the database at a time. Unlike connection
|
||||
sharing, however, any processing within the application will not further
|
||||
degrade the maximum attainable concurrency. As long as unbounded asynchronous
|
||||
queues are used to pass around messages, the database thread will be able to
|
||||
continue working on the database without waiting for another thread to process
|
||||
the results.
|
||||
|
||||
=item Portability
|
||||
|
||||
This is where message passing shines: SQLite is only used within the database
|
||||
thread, no other thread will have a need to call any SQLite function. This
|
||||
allows any version of SQLite to be used, even those that have not been compiled
|
||||
with thread safety enabled.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head2 Thread-local connections
|
||||
|
||||
A rather different approach to giving each thread access to a single database
|
||||
is to simply open a new database connection for each thread. This way each
|
||||
connection will be local to the specific thread, which in turn has the power to
|
||||
do with it as it likes without worrying about what the other threads do. The
|
||||
following is a short example to illustrate the idea:
|
||||
|
||||
void *some_thread(void *arg) {
|
||||
sqlite3 *db;
|
||||
if(sqlite3_open("database.sqlite3", &db))
|
||||
return ERROR;
|
||||
|
||||
// Do some work on the database
|
||||
|
||||
sqlite3_close(db);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int i;
|
||||
for(i=0; i<10; i++)
|
||||
thread_create(some_thread, NULL);
|
||||
|
||||
// Wait until the threads are done
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
=over
|
||||
|
||||
=item Implementation size
|
||||
|
||||
Giving each thread its own connection is practically not much different from
|
||||
the single-threaded case where there is only a single database connection. And
|
||||
as the example shows, this can be implemented quite trivially.
|
||||
|
||||
=item Memory/CPU overhead
|
||||
|
||||
If we assume that threads are not created very often and each thread has a
|
||||
relatively long life, then the CPU and I/O overhead caused by opening a new
|
||||
connection for each thread will not be very significant. On the other hand, if
|
||||
threads are created quite often and lead a relatively short life before they
|
||||
are destroyed again, then opening a new connection each time will soon require
|
||||
more resources than running the queries themselves.
|
||||
|
||||
There is a significant memory overhead: every new database connection requires
|
||||
memory. If each connection also has a separate cache, then every thread will
|
||||
quickly require several megabytes only to interact with the database. Since
|
||||
version 3.5.0, SQLite allows sharing of this cache with the other threads,
|
||||
which will reduce this memory overhead.
|
||||
|
||||
=item Prepared statement re-use
|
||||
|
||||
Prepared statements can be re-used without limitations within a single thread.
|
||||
This will allow full re-use of prepared statements if each thread has a
|
||||
different task, in which case every thread will have different queries and
|
||||
access patterns anyway. But when every thread runs the same code, and thus also
|
||||
the same queries, it will still need its own copy of the prepared statement.
|
||||
Prepared statements are specific to a single database connection, so they can't
|
||||
be passed around between the threads. The same argument for CPU overhead works
|
||||
here: as long as threads are long-lived, then this will not be a very large
|
||||
problem.
|
||||
|
||||
=item Transaction grouping
|
||||
|
||||
Each thread has full access to its own database connection, so it can easily
|
||||
batch many queries in a single transaction. It is not possible, however, to
|
||||
group queries from the other threads in this same transaction as well. The
|
||||
grouping may therefore not be as optimal as a message passing solution could
|
||||
provide, but it is still a large improvement compared to connection sharing.
|
||||
|
||||
=item Background processing
|
||||
|
||||
Background processing is not easily possible. While it is possible to spawn a
|
||||
separate thread for each query that needs to be processed in the background, a
|
||||
new database connection will have to be opened every time this is done. This
|
||||
solution will obviously not be very efficient.
|
||||
|
||||
=item Concurrency
|
||||
|
||||
In general, it is not possible to get better concurrency than by providing each
|
||||
thread with its own database connection. This solution definitely wins in this
|
||||
area.
|
||||
|
||||
=item Portability
|
||||
|
||||
Thread-local connections are very portable: the only requirement is that SQLite
|
||||
has been built with threading support enabled. Connections are not passed
|
||||
around between threads, so any SQLite version will do. In order to make use of
|
||||
the shared cache feature, however, SQLite 3.5.0 is required.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head2 Connection pooling
|
||||
|
||||
A common approach in server-like applications is to have a connection pool.
|
||||
When a thread wishes to have access to the database, it requests a database
|
||||
connection from a pool of (currently) unused database connections. If no unused
|
||||
connections are available, it can either wait until one becomes available, or
|
||||
create a new database connection on its own. When a thread is done with a
|
||||
connection, it will add it back to the pool to allow it to be re-used in an
|
||||
other thread.
|
||||
|
||||
The following example illustrates a basic connection pool implementation in
|
||||
which a thread creates a new database connection when no connections are
|
||||
available. A global C<db_pool> is defined, on which any thread can call
|
||||
C<pool_pop()> to get an SQLite connection if there is one available, and
|
||||
C<pool_push()> can be used to push a connection back to the pool. This pool can
|
||||
be implemented as any kind of set: a FIFO or a stack could do the trick, as
|
||||
long as it can be accessed from multiple threads concurrently.
|
||||
|
||||
// Some global pool of database connections
|
||||
pool_t *db_pool;
|
||||
|
||||
sqlite3 *get_database() {
|
||||
sqlite3 *db = pool_pop(db_pool);
|
||||
if(db)
|
||||
return db;
|
||||
if(sqlite3_open("database.sqlite3", &db))
|
||||
return NULL;
|
||||
return db;
|
||||
}
|
||||
|
||||
void *some_thread(void *arg) {
|
||||
// Do some work
|
||||
|
||||
sqlite3 *db = get_database();
|
||||
|
||||
// Do some work on the database
|
||||
|
||||
pool_push(db_pool, db);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int i;
|
||||
for(i=0; i<10; i++)
|
||||
thread_create(some_thread, NULL);
|
||||
|
||||
// Wait until the threads are done
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
=over
|
||||
|
||||
=item Implementation size
|
||||
|
||||
A connection pool is in essense not very different from thread-local
|
||||
connections. The only major difference is that the call to sqlite3_open() is
|
||||
replaced with a function call to obtain a connection from the pool and
|
||||
sqlite3_close() with one to give it back to the pool. As shown above, these
|
||||
functions can be fairly simple. Note, however, that unlike with thread-local
|
||||
connections it is advisable to "open" and "close" a connection more often in
|
||||
long-running threads, in order to give other threads a chance to use the
|
||||
connection as well.
|
||||
|
||||
=item Memory/CPU overhead
|
||||
|
||||
This mainly depends on the number of connections you allow to be in memory at
|
||||
any point in time. If this number is not bounded, as in the above example, then
|
||||
you can assume that after running your program for a certain time, there will
|
||||
always be enough unused connections available in the pool. Requesting a
|
||||
connection will then be very fast, since the overhead of creating a new
|
||||
connection, as would have been done with thread-local connections, is
|
||||
completely gone.
|
||||
|
||||
In terms of memory usage, however, it would be more efficient to put a maximum
|
||||
limit on the number of open connections, and have the thread wait until another
|
||||
thread gives a connection back to the pool. Similarly to thread-local
|
||||
connections, memory usage can be decreased by using SQLite's cache sharing
|
||||
feature.
|
||||
|
||||
=item Prepared statement re-use
|
||||
|
||||
Unfortunately, this is where connection pooling borrows from connection
|
||||
sharing. Prepared statements must be cleaned up before passing a connection to
|
||||
another thread if one aims to be portable. But even if you remove that
|
||||
portability requirement, prepared statements are always specific to a single
|
||||
connection. Since you can't assume that you will always get the same connection
|
||||
from the pool, caching prepared statements is not practical.
|
||||
|
||||
On the other hand, a connection pool does allow you to use a single connection
|
||||
for a longer period of time than with connection sharing without negatively
|
||||
affecting concurrency. Unless, of course, there is a limit on the number of
|
||||
open connections, in which case using a connection for a long period of time
|
||||
may starve another thread.
|
||||
|
||||
=item Transaction grouping
|
||||
|
||||
Pretty much the same arguments with re-using prepared statements also apply to
|
||||
transaction grouping: Transactions should be committed to disk before passing a
|
||||
connection back to the pool.
|
||||
|
||||
=item Background processing
|
||||
|
||||
This is also where a connection pool shares a lot of similarity with connection
|
||||
sharing. With thread-local storage, creating a worker thread to perform
|
||||
database operations on the background would be very inefficient. But since this
|
||||
inefficiency is being tackled by allowing connection re-use with a connection
|
||||
pool, it's not a problem. Still the same warning applies with regard to
|
||||
dependent queries, though.
|
||||
|
||||
=item Concurrency
|
||||
|
||||
Connection pooling gives you fine-grained control over how much concurrency
|
||||
you'd like to have. For maximum concurrency, don't put a limit on the number of
|
||||
maximum database connections. If there is a limit, then that will decrease the
|
||||
maximim concurrency in favor of lower memory usage.
|
||||
|
||||
=item Portability
|
||||
|
||||
Since database connections are being passed among threads, connection pooling
|
||||
will require at least SQLite 3.3.1 compiled with thread safety enabled. Making
|
||||
use of its cache sharing capibilities to reduce memory usage will require
|
||||
SQLite 3.5.0 or higher.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
|
||||
|
||||
=head1 Final notes
|
||||
|
||||
As for what I used for ncdc. I initially chose connection sharing, for its
|
||||
simplicity. Then when I noticed that the UI became less responsive than I found
|
||||
acceptable I started adding a simple queue for background processing of
|
||||
queries. Later I stumbled upon the main problem with that solution: I wanted to
|
||||
read back a value that was written in a background thread, and had no way of
|
||||
knowing whether the background thread had finished executing that query or not.
|
||||
I then decided to expand the background thread to allow for passing back query
|
||||
results, and transformed everything into a full message passing solution. This
|
||||
appears to be working well at the moment, and my current implementation has
|
||||
support for both prepared statement re-use and transaction grouping, which
|
||||
measurably increased performance.
|
||||
|
||||
To summarize, there isn't really a I<best> solution that works for every
|
||||
application. Connection sharing works well for applications where
|
||||
responsiveness and concurrency isn't of major importance. Message passing works
|
||||
well for applications that aim to be responsive, and is flexible enough for
|
||||
optimizing CPU and I/O by re-using prepared statements and grouping queries in
|
||||
larger transactions. Thread-local connections are suitable for applications
|
||||
that have a relatively fixed number of threads, whereas connection pooling
|
||||
works better for applications with a varying number of worker threads.
|
||||
|
||||
=cut
|
||||
64
dat/tuwf
64
dat/tuwf
|
|
@ -1,64 +0,0 @@
|
|||
=pod
|
||||
|
||||
TUWF is a very small and lightweight web development framework for Perl. It has
|
||||
evolved from being a few abstraction layers in two large websites to a separate
|
||||
set of modules. While initially designed to be used for large and complex
|
||||
websites, it is also perfectly suited for small single-file websites.
|
||||
|
||||
=head2 Main features
|
||||
|
||||
=over
|
||||
|
||||
=item * Very small, and no extra modules required for the base functionality,
|
||||
|
||||
=item * Easy built-in routing,
|
||||
|
||||
=item * Handy form validation functions,
|
||||
|
||||
=item * Easy XML/HTML generation,
|
||||
|
||||
=item * Response buffering and output compression,
|
||||
|
||||
=item * Easy access to request data,
|
||||
|
||||
=item * Support for CGI, FastCGI and a built-in web server,
|
||||
|
||||
=item * Uses UTF-8 for all text,
|
||||
|
||||
=item * Convenient SQL execution functions and correct transaction handling,
|
||||
|
||||
=item * Open source (duh!) and available under a liberal MIT license.
|
||||
|
||||
=back
|
||||
|
||||
Read the L<description|TUWF/DESCRIPTION> in the documentation for more
|
||||
information and details.
|
||||
|
||||
|
||||
=head2 Download
|
||||
|
||||
B<Latest packaged version:> 1.2 ([dllink TUWF-1.2.tar.gz download]
|
||||
- L<CPAN mirror|https://metacpan.org/release/TUWF>)
|
||||
|
||||
TUWF is also available on a git repository at L<https://code.blicky.net/yorhel/tuwf>.
|
||||
|
||||
|
||||
=head2 Websites using TUWF
|
||||
|
||||
(Not a whole lot)
|
||||
|
||||
=over
|
||||
|
||||
=item * L<VNDB.org|https://vndb.org/> (the site that spawned TUWF - L<open source|https://g.blicky.net/vndb.git/>)
|
||||
|
||||
=item * L<Manned.org|https://manned.org/> (L<open source|https://g.blicky.net/manned.git/>)
|
||||
|
||||
=item * L<This website|https://dev.yorhel.nl/> (L<open source|https://g.blicky.net/yorhel-dev.git/tree/index.cgi>)
|
||||
|
||||
=item * L<Blicky.net Pastebin|https://p.blicky.net/> (L<open source|https://g.blicky.net/bpaste.git/tree/index.cgi>)
|
||||
|
||||
=item * The website embedded in the L<D&R Axum|http://www.d-r.nl/axum.html> mixing console.
|
||||
|
||||
=item * L<333networks|http://333networks.com/>
|
||||
|
||||
=back
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
1.2 - 2018-02-18
|
||||
- Add tuwf() exported function as alias to $self or $TUWF::OBJ
|
||||
- Add TUWF::get/put/post/etc() as better alternative to TUWF::register()
|
||||
- Add TUWF::hook() as better alternative to (pre|post)_request_handler
|
||||
- Add capture() to access route captures
|
||||
- Add standandlone HTTP dev server (requires HTTP::Server::Simple)
|
||||
- Add pass() and done() methods to prematurely abort the current handler
|
||||
- Add 'import_modules' setting
|
||||
- TUWF::Request: Add reqJSON()
|
||||
- TUWF::Request: Disallow control characters in HTTP request data
|
||||
- TUWF::Response: Add resJSON()
|
||||
- TUWF::Response: Add resBinary()
|
||||
- TUWF::Response: Add resFile() + mime_types/mime_default settings
|
||||
- TUWF::Response: Allow setting headers before resRedirect()
|
||||
- TUWF::Response: resRedirect() now sets a relative 'Location' HTTP header
|
||||
- TUWF::DB: Add DB query logging and profiling to non-TUWF database functions
|
||||
- TUWF::DB: Add dbVal()
|
||||
- TUWF::XML: Add functional-style DSL
|
||||
- TUWF::XML: Add HTML5 support
|
||||
- TUWF::XML: Add different naming convention support
|
||||
- TUWF::XML: Add 'mkclass' utility function
|
||||
- Improved error pages
|
||||
- Various documentation improvements
|
||||
|
||||
1.1 - 2017-11-26
|
||||
- Disallow exclamation mark in email address validation
|
||||
- Add reqProtocol() method
|
||||
- Add reqFCGI() method
|
||||
- Remove 'X-Powered-By' header
|
||||
- Fix handling of space character in load_recursive()
|
||||
|
||||
1.0 - 2015-09-17
|
||||
- !! Some backwards-imcompatible changes, marked * !!
|
||||
- kv_validate() improvements:
|
||||
- Fix maxcount option
|
||||
- Fix non-array argument to 'func'
|
||||
- Added some default templates: num, int, uint, ascii, email, weburl
|
||||
- * Removed 'min' and 'max' options, these now require the num template
|
||||
- Add 'inherit' option for template definitions
|
||||
- Allow templates to provide default values for 'required', 'default',
|
||||
'rmwhitespace', 'multi', 'mincount' and 'maxcount'
|
||||
- Add tests
|
||||
- * reqPath() now includes the leading slash
|
||||
- * reqGet(), reqPost(), reqParam(), reqUploadMIME() and reqUploadRaw()
|
||||
now only work in scalar context.
|
||||
- * Add plural versions of the above methods (reqGets() etc) that only
|
||||
work in list context.
|
||||
- Add reqQuery()
|
||||
- Fix warning with Perl 5.22
|
||||
|
||||
0.2 - 2012-01-19
|
||||
- Fixed bug with in-place utf8_decode() in recent Perls
|
||||
- Lowered minimum Perl version to 5.8.0
|
||||
|
||||
0.1 - 2011-02-07
|
||||
Initial version
|
||||
|
||||
40
dat/tuwf.md
Normal file
40
dat/tuwf.md
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
% The Ultimate Website Framework
|
||||
|
||||
TUWF is a very small and lightweight web development framework for Perl. It has
|
||||
evolved from being a few abstraction layers in two large websites to a separate
|
||||
set of modules. While initially designed to be used for large and complex
|
||||
websites, it is also perfectly suited for small single-file websites.
|
||||
|
||||
## Main features
|
||||
|
||||
- Very small, and no extra modules required for the base functionality,
|
||||
- Easy built-in routing,
|
||||
- Handy form validation functions,
|
||||
- Easy XML/HTML generation,
|
||||
- Response buffering and output compression,
|
||||
- Easy access to request data,
|
||||
- Support for CGI, FastCGI and a built-in web server,
|
||||
- Uses UTF-8 for all text,
|
||||
- Convenient SQL execution functions and correct transaction handling,
|
||||
- Open source (duh!) and available under a liberal MIT license.
|
||||
|
||||
Read the [description](/tuwf/man#description) in the documentation for more
|
||||
information and details.
|
||||
|
||||
## Download
|
||||
|
||||
**Latest packaged version:** 1.2 ([dllink TUWF-1.2.tar.gz]
|
||||
\- [CPAN mirror](https://metacpan.org/release/TUWF))
|
||||
|
||||
TUWF is also available on a git repository at
|
||||
[https://code.blicky.net/yorhel/tuwf](https://code.blicky.net/yorhel/tuwf).
|
||||
|
||||
## Websites using TUWF
|
||||
|
||||
(Not a whole lot)
|
||||
|
||||
- [VNDB.org](https://vndb.org/) (the site that spawned TUWF - [open source](https://g.blicky.net/vndb.git/))
|
||||
- [Manned.org](https://manned.org/) ([open source](https://g.blicky.net/manned.git/))
|
||||
- [Blicky.net Pastebin](https://p.blicky.net/) ([open source](https://g.blicky.net/bpaste.git/tree/index.cgi))
|
||||
- The website embedded in the [D&R Axum](http://www.d-r.nl/axum.html) mixing console.
|
||||
- [333networks](http://333networks.com/)
|
||||
1
dat/ylib
1
dat/ylib
|
|
@ -1 +0,0 @@
|
|||
../../ylib/
|
||||
208
dat/yxml
208
dat/yxml
|
|
@ -1,208 +0,0 @@
|
|||
=pod
|
||||
|
||||
I<*But see the L<Bugs and Limitations|/Bugs and Limitations> and L<Conformance Issues|/Conformance Issues> below.>
|
||||
|
||||
Yxml is a small (C<6 KiB>) L<non-validating|/Validating vs. non-validating> yet
|
||||
mostly conforming XML parser written in C. Its primary goals are small binary
|
||||
size, simplicity and correctness. It also happens to be L<pretty
|
||||
fast|/Comparison>.
|
||||
|
||||
The code can be obtained from the L<git repo|http://g.blicky.net/yxml.git> and
|
||||
is available under a permissive MIT license. The only two files you need are
|
||||
L<yxml.c|http://g.blicky.net/yxml.git/plain/yxml.c> and
|
||||
L<yxml.h|http://g.blicky.net/yxml.git/plain/yxml.h>, which can easily be
|
||||
included and compiled as part of your project. Complete API documentation is
|
||||
available in L<the manual|https://dev.yorhel.nl/yxml/man>.
|
||||
|
||||
The API follows a simple and mostly buffer-less design, and only consists of
|
||||
three functions:
|
||||
|
||||
void yxml_init(yxml_t *x, void *buf, size_t bufsize);
|
||||
yxml_ret_t yxml_parse(yxml_t *x, int ch);
|
||||
yxml_ret_t yxml_eof(yxml_t *x);
|
||||
|
||||
Be aware that I<simple> is not necessarily I<easy> or I<convenient>. The API is
|
||||
relatively low-level and designed to integrate into pretty much any application
|
||||
and for any use case. This includes incrementally parsing data from a socket in
|
||||
an event-driven fashion and parsing large XML files on memory-restricted
|
||||
devices. It is possible to implement a more convenient and high-level API on
|
||||
top of yxml, but I'm not very fond of libraries that do more than what I
|
||||
strictly need.
|
||||
|
||||
There are no tarball releases available at the moment. The API is relatively
|
||||
stable, but I won't currently promise any ABI stability. Dynamic linking
|
||||
against yxml is therefore not a very good idea.
|
||||
|
||||
=head3 Features
|
||||
|
||||
=over
|
||||
|
||||
=item * Simple and low-level API.
|
||||
|
||||
=item * Does not require C<malloc()>.
|
||||
|
||||
=item * Pure C, should be very portable.
|
||||
|
||||
=item * Recognizes and consumes the UTF-8 BOM.
|
||||
|
||||
=item * Parses entity references (C<&>) and character references (C<&>).
|
||||
|
||||
=item * Verifies most well-formedness constraints, including the correct
|
||||
nesting of elements.
|
||||
|
||||
=item * Parses XML documents in any ASCII-compatible encoding.
|
||||
|
||||
=back
|
||||
|
||||
But let's not be I<too> optimistic, because there are also...
|
||||
|
||||
=head3 Bugs and Limitations
|
||||
|
||||
=over
|
||||
|
||||
=item * A conditional section in a C<< <!DOCTYPE ..> >> declaration will result
|
||||
in a parse error.
|
||||
|
||||
=item * Allows multiple C<< <!DOCTYPE ..> >> declarations.
|
||||
|
||||
=item * Information encoded in the XML and doctype declarations is currently
|
||||
not available through the API.
|
||||
|
||||
=back
|
||||
|
||||
I hope to have these issues fixed in the near future.
|
||||
|
||||
=head3 Conformance Issues
|
||||
|
||||
=over
|
||||
|
||||
=item * Does not verify that non-ASCII characters in element names, element
|
||||
content, attribute names and attribute values are within the allowed Unicode
|
||||
character ranges.
|
||||
|
||||
=item * Does not verify that attribute names within the same element are unique.
|
||||
|
||||
=item * Does not verify that the contents of a C<< <!DOCTYPE ..> >> declaration
|
||||
follow the XML grammar.
|
||||
|
||||
=item * Can't parse documents in a non-ASCII-compatible encoding. You'll have
|
||||
to convert it to UTF-8 or something similar first.
|
||||
|
||||
=item * No support for custom entity references, neither through the API nor
|
||||
using C<< <!ENTITY> >>.
|
||||
|
||||
=back
|
||||
|
||||
These conformance issues are the result of the byte-oriented and minimal design
|
||||
of yxml, and I do not intent to fix these directly within the library. The
|
||||
intention is to make sure that all of the above mentioned issues can be fixed
|
||||
on top of yxml (by the application, or by a wrapper) if strict conformance is
|
||||
required, but the required functionality to support custom entity references
|
||||
and DTD handling has not been implemented yet.
|
||||
|
||||
=head3 Non-features
|
||||
|
||||
And now follows a list of things that are not part of the core XML
|
||||
specification and are not directly supported. As with the conformance issues,
|
||||
these features can be implemented on top of yxml.
|
||||
|
||||
=over
|
||||
|
||||
=item * No helper functions to deal with namespaces. Yxml will parse XML files
|
||||
with namespaces just fine, but it's up to the application to do the rest.
|
||||
|
||||
=item * No DTD or XML Schema validation.
|
||||
|
||||
=item * No XSLT.
|
||||
|
||||
=item * No XPath.
|
||||
|
||||
=item * Doesn't do your household chores.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head2 Comparison
|
||||
|
||||
The following benchmark compares L<expat|http://expat.sourceforge.net/>,
|
||||
L<libxml2|http://xmlsoft.org/> and
|
||||
L<Mini-XML|http://www.msweet.org/projects.php?Z3> with yxml. A L<strlen(3)>
|
||||
implementation is also included as an indication of the "theoretical" minimum.
|
||||
|
||||
SIZE PERFORMANCE
|
||||
LIB VER LICENSE OBJ STATIC WIKI DISCOGS
|
||||
strlen 25 816 0.16 0.09
|
||||
expat 2.1.0 MIT 162 139 194 432 1.47 1.09
|
||||
libxml2 2.9.1 MIT 464 328 518 816 2.53 1.75
|
||||
mxml 2.7 LGPL2+static 32 733 75 832 12.38 7.80
|
||||
yxml git MIT 5 971 31 416 1.15 0.74
|
||||
|
||||
The code for these benchmarks is available in the
|
||||
L<bench/|http://g.blicky.net/yxml.git/tree/bench> directory on git. Some
|
||||
explanatory notes:
|
||||
|
||||
=over
|
||||
|
||||
=item * C<OBJ> is the total size of all object code of the library, measured
|
||||
with L<size(1)>.
|
||||
|
||||
=item * C<STATIC> is the file size of a minimal statically linked binary when
|
||||
linked against L<musl|http://www.musl-libc.org/> 0.9.13, measured with
|
||||
L<wc(1)> after running L<strip(1)>.
|
||||
|
||||
=item * The performance is the time, in seconds, to load a large XML file.
|
||||
C<WIKI> refers to C<enwiki-20130805-abstract5.xml> (162 MiB) from a L<Wikipedia
|
||||
Dump|http://dumps.wikimedia.org/enwiki/>, C<DISCOGS> refers to
|
||||
C<discogs_20130801_labels.xml> (94 MiB) from a L<Discogs Data
|
||||
Dump|http://www.discogs.com/data/>.
|
||||
|
||||
=item * Libxml2 has been compiled with most of its features disabled with
|
||||
C<./configure>, but it still manages to be the very definition of bloat.
|
||||
|
||||
=item * Everything has been compiled with gcc 4.8.1 at C<-O2>.
|
||||
|
||||
=item * Benchmarks are run on Linux 3.10.7 with a 3 Ghz Intel Core Duo E8400
|
||||
and with 4GB RAM.
|
||||
|
||||
=back
|
||||
|
||||
And just for fun, here's the same comparison when compiled with C<-Os>, i.e.
|
||||
optimized for small size. Interestingly enough, Mini-XML actually runs faster
|
||||
with C<-Os> than with C<-O2>.
|
||||
|
||||
SIZE PERFORMANCE
|
||||
LIB VER LICENSE OBJ STATIC WIKI DISCOGS
|
||||
strlen 25 816 0.16 0.09
|
||||
expat 2.1.0 MIT 113 314 145 632 1.58 1.20
|
||||
libxml2 2.9.1 MIT 356 948 412 256 3.01 2.08
|
||||
mxml 2.7 LGPL2+static 27 725 71 704 11.70 7.44
|
||||
yxml git MIT 4 955 30 392 1.67 1.02
|
||||
|
||||
|
||||
=head2 Validating vs. non-validating
|
||||
|
||||
TL;DR: yxml does I<not> accept garbage XML documents, it will correctly handle
|
||||
and report issues if the input does not strictly follow the XML grammar.
|
||||
|
||||
The terms I<validating> and I<non-validating> have specific meanings within the
|
||||
context of XML. A validating parser is one that reads the doctype declaration
|
||||
(DTD) associated with a document, and validates that the contents of the
|
||||
document follow the rules described in the DTD. A DTD may also include
|
||||
instructions on how to parse the document, including the definition of custom
|
||||
entity references (C<&whatever;>) and instructions on how attribute values or
|
||||
element contents should be normalized before passing its data to the
|
||||
application.
|
||||
|
||||
A non-validating parser is one that ignores the DTD and happily parses
|
||||
documents that do not follow the rules described in that DTD. They (usually)
|
||||
don't support entity references and will not normalize attribute values or
|
||||
element contents. A non-validating parser still has to verify that the XML
|
||||
document follows the XML syntax rules.
|
||||
|
||||
It should be noted that a lot of XML documents found in the wild are not
|
||||
described with a DTD, but instead use an alternative technology such as XML
|
||||
schema. Wikipedia L<has more
|
||||
information|https://en.wikipedia.org/wiki/XML#Schemas_and_validation> on this.
|
||||
Using a validating parser for such documents would only add bloat and may
|
||||
introduce L<potential security
|
||||
vulnerabilities|https://en.wikipedia.org/wiki/Billion_laughs>.
|
||||
|
|
@ -1 +0,0 @@
|
|||
../../yxml/yxml.pod
|
||||
169
dat/yxml.md
Normal file
169
dat/yxml.md
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
% Yxml - A small, fast and correct\* XML parser
|
||||
|
||||
_\*But see the [Bugs and Limitations](#bugs-and-limitations) and [Conformance Issues](#conformance-issues) below._
|
||||
|
||||
Yxml is a small (`6 KiB`) [non-validating](#validating-vs.-non-validating) yet
|
||||
mostly conforming XML parser written in C. Its primary goals are small binary
|
||||
size, simplicity and correctness. It also happens to be [pretty
|
||||
fast](#comparison).
|
||||
|
||||
The code can be obtained from the [git repo](https://g.blicky.net/yxml.git) and
|
||||
is available under a permissive MIT license. The only two files you need are
|
||||
[yxml.c](https://g.blicky.net/yxml.git/plain/yxml.c) and
|
||||
[yxml.h](https://g.blicky.net/yxml.git/plain/yxml.h), which can easily be
|
||||
included and compiled as part of your project. Complete API documentation is
|
||||
available in [the manual](/yxml/man).
|
||||
|
||||
The API follows a simple and mostly buffer-less design, and only consists of
|
||||
three functions:
|
||||
|
||||
```c
|
||||
void yxml_init(yxml_t *x, void *buf, size_t bufsize);
|
||||
yxml_ret_t yxml_parse(yxml_t *x, int ch);
|
||||
yxml_ret_t yxml_eof(yxml_t *x);
|
||||
```
|
||||
|
||||
Be aware that _simple_ is not necessarily _easy_ or _convenient_. The API is
|
||||
relatively low-level and designed to integrate into pretty much any application
|
||||
and for any use case. This includes incrementally parsing data from a socket in
|
||||
an event-driven fashion and parsing large XML files on memory-restricted
|
||||
devices. It is possible to implement a more convenient and high-level API on
|
||||
top of yxml, but I'm not very fond of libraries that do more than what I
|
||||
strictly need.
|
||||
|
||||
There are no tarball releases available at the moment. The API is relatively
|
||||
stable, but I won't currently promise any ABI stability. Dynamic linking
|
||||
against yxml is therefore not a very good idea.
|
||||
|
||||
### Features
|
||||
|
||||
- Simple and low-level API.
|
||||
- Does not require `malloc()`.
|
||||
- Pure C, should be very portable.
|
||||
- Recognizes and consumes the UTF-8 BOM.
|
||||
- Parses entity references (`&`) and character references (`&`).
|
||||
- Verifies most well-formedness constraints, including the correct nesting of
|
||||
elements.
|
||||
- Parses XML documents in any ASCII-compatible encoding.
|
||||
|
||||
But let's not be _too_ optimistic, because there are also...
|
||||
|
||||
### Bugs and Limitations
|
||||
|
||||
- A conditional section in a `<!DOCTYPE ..>` declaration will result in a parse
|
||||
error.
|
||||
- Allows multiple `<!DOCTYPE ..>` declarations.
|
||||
- Information encoded in the XML and doctype declarations is currently not
|
||||
available through the API.
|
||||
|
||||
I hope to have these issues fixed in the near future.
|
||||
|
||||
### Conformance Issues
|
||||
|
||||
- Does not verify that non-ASCII characters in element names, element content,
|
||||
attribute names and attribute values are within the allowed Unicode character
|
||||
ranges.
|
||||
- Does not verify that attribute names within the same element are unique.
|
||||
- Does not verify that the contents of a `<!DOCTYPE ..>` declaration follow the
|
||||
XML grammar.
|
||||
- Can't parse documents in a non-ASCII-compatible encoding. You'll have to
|
||||
convert it to UTF-8 or something similar first.
|
||||
- No support for custom entity references, neither through the API nor using
|
||||
`<!ENTITY>`.
|
||||
|
||||
These conformance issues are the result of the byte-oriented and minimal design
|
||||
of yxml, and I do not intent to fix these directly within the library. The
|
||||
intention is to make sure that all of the above mentioned issues can be fixed
|
||||
on top of yxml (by the application, or by a wrapper) if strict conformance is
|
||||
required, but the required functionality to support custom entity references
|
||||
and DTD handling has not been implemented yet.
|
||||
|
||||
### Non-features
|
||||
|
||||
And now follows a list of things that are not part of the core XML
|
||||
specification and are not directly supported. As with the conformance issues,
|
||||
these features can be implemented on top of yxml.
|
||||
|
||||
- No helper functions to deal with namespaces. Yxml will parse XML files with
|
||||
namespaces just fine, but it's up to the application to do the rest.
|
||||
- No DTD or XML Schema validation.
|
||||
- No XSLT.
|
||||
- No XPath.
|
||||
- Doesn't do your household chores.
|
||||
|
||||
## Comparison
|
||||
|
||||
The following benchmark compares [expat](http://expat.sourceforge.net/),
|
||||
[libxml2](http://xmlsoft.org/) and
|
||||
[Mini-XML](http://www.msweet.org/projects.php?Z3) with yxml. A
|
||||
[strlen(3)](http://man.he.net/man3/strlen) implementation is also included as
|
||||
an indication of the "theoretical" minimum.
|
||||
|
||||
SIZE PERFORMANCE
|
||||
LIB VER LICENSE OBJ STATIC WIKI DISCOGS
|
||||
strlen 25 816 0.16 0.09
|
||||
expat 2.1.0 MIT 162 139 194 432 1.47 1.09
|
||||
libxml2 2.9.1 MIT 464 328 518 816 2.53 1.75
|
||||
mxml 2.7 LGPL2+static 32 733 75 832 12.38 7.80
|
||||
yxml git MIT 5 971 31 416 1.15 0.74
|
||||
|
||||
The code for these benchmarks is available in the
|
||||
[bench/](https://g.blicky.net/yxml.git/tree/bench) directory on git. Some
|
||||
explanatory notes:
|
||||
|
||||
- `OBJ` is the total size of all object code of the library, measured with
|
||||
[size(1)](https://manned.org/size.1).
|
||||
- `STATIC` is the file size of a minimal statically linked binary when linked
|
||||
against [musl](http://www.musl-libc.org/) 0.9.13, measured with
|
||||
[wc(1)](https://manned.org/wc.1) after running
|
||||
[strip(1)](https://manned.org/strip.1).
|
||||
- The performance is the time, in seconds, to load a large XML file. `WIKI`
|
||||
refers to `enwiki-20130805-abstract5.xml` (162 MiB) from a [Wikipedia
|
||||
Dump](http://dumps.wikimedia.org/enwiki/), `DISCOGS` refers to
|
||||
`discogs_20130801_labels.xml` (94 MiB) from a [Discogs Data
|
||||
Dump](http://www.discogs.com/data/).
|
||||
- Libxml2 has been compiled with most of its features disabled with
|
||||
`./configure`, but it still manages to be the very definition of bloat.
|
||||
- Everything has been compiled with gcc 4.8.1 at `-O2`.
|
||||
- Benchmarks are run on Linux 3.10.7 with a 3 Ghz Intel Core Duo E8400 and with
|
||||
4GB RAM.
|
||||
|
||||
And just for fun, here's the same comparison when compiled with `-Os`, i.e.
|
||||
optimized for small size. Interestingly enough, Mini-XML actually runs faster
|
||||
with `-Os` than with `-O2`.
|
||||
|
||||
SIZE PERFORMANCE
|
||||
LIB VER LICENSE OBJ STATIC WIKI DISCOGS
|
||||
strlen 25 816 0.16 0.09
|
||||
expat 2.1.0 MIT 113 314 145 632 1.58 1.20
|
||||
libxml2 2.9.1 MIT 356 948 412 256 3.01 2.08
|
||||
mxml 2.7 LGPL2+static 27 725 71 704 11.70 7.44
|
||||
yxml git MIT 4 955 30 392 1.67 1.02
|
||||
|
||||
## Validating vs. non-validating
|
||||
|
||||
TL;DR: yxml does _not_ accept garbage XML documents, it will correctly handle
|
||||
and report issues if the input does not strictly follow the XML grammar.
|
||||
|
||||
The terms _validating_ and _non-validating_ have specific meanings within the
|
||||
context of XML. A validating parser is one that reads the doctype declaration
|
||||
(DTD) associated with a document, and validates that the contents of the
|
||||
document follow the rules described in the DTD. A DTD may also include
|
||||
instructions on how to parse the document, including the definition of custom
|
||||
entity references (`&whatever;`) and instructions on how attribute values or
|
||||
element contents should be normalized before passing its data to the
|
||||
application.
|
||||
|
||||
A non-validating parser is one that ignores the DTD and happily parses
|
||||
documents that do not follow the rules described in that DTD. They (usually)
|
||||
don't support entity references and will not normalize attribute values or
|
||||
element contents. A non-validating parser still has to verify that the XML
|
||||
document follows the XML syntax rules.
|
||||
|
||||
It should be noted that a lot of XML documents found in the wild are not
|
||||
described with a DTD, but instead use an alternative technology such as XML
|
||||
schema. Wikipedia [has more
|
||||
information](https://en.wikipedia.org/wiki/XML#Schemas_and_validation) on this.
|
||||
Using a validating parser for such documents would only add bloat and may
|
||||
introduce [potential security
|
||||
vulnerabilities](https://en.wikipedia.org/wiki/Billion_laughs).
|
||||
428
dat/yxml/man.md
Normal file
428
dat/yxml/man.md
Normal file
|
|
@ -0,0 +1,428 @@
|
|||
% Yxml Manual
|
||||
|
||||
# Introduction
|
||||
|
||||
Yxml is a small non-validating and mostly conforming XML parser written in C.
|
||||
|
||||
The latest version of yxml and this document can be found on
|
||||
[https://dev.yorhel.nl/yxml](https://dev.yorhel.nl/yxml).
|
||||
|
||||
# Compiling yxml
|
||||
|
||||
Due to the small size of yxml, the recommended way to use it is to copy the
|
||||
[yxml.c](https://g.blicky.net/yxml.git/plain/yxml.c) and
|
||||
[yxml.h](https://g.blicky.net/yxml.git/plain/yxml.h) from the git repository
|
||||
into your project directory, and compile and link yxml.c as part of your
|
||||
program or library.
|
||||
|
||||
The git repository also includes a Makefile. Running `make` without specifying
|
||||
a target will compile a `.a` file for easy static linking. A test suite is
|
||||
available under `make test`.
|
||||
|
||||
# API documentation
|
||||
|
||||
## Overview
|
||||
|
||||
Yxml is designed to be very flexible and efficient, and thus offers a
|
||||
relatively low-level stream-based API. The entire API consists of two typedefs
|
||||
and three functions:
|
||||
|
||||
```c
|
||||
typedef enum { /* .. */ } yxml_ret_t;
|
||||
typedef struct { /* .. */ } yxml_t;
|
||||
|
||||
void yxml_init(yxml_t *x, void *buf, size_t bufsize);
|
||||
yxml_ret_t yxml_parse(yxml_t *x, int ch);
|
||||
yxml_ret_t yxml_eof(yxml_t *x);
|
||||
```
|
||||
|
||||
The values of _yxml\_ret\_t_ and the public fields of _yxml\_t_ are explained
|
||||
in detail below. Parsing a file using yxml involves three steps:
|
||||
|
||||
1. Initialization, using `yxml_init()`.
|
||||
2. Parsing. This is performed in a loop where `yxml_parse()` is called on each
|
||||
character of the input file.
|
||||
3. Finalization, using `yxml_eof()`.
|
||||
|
||||
## Initialization
|
||||
|
||||
```c
|
||||
#define BUFSIZE 4096
|
||||
void *buf = malloc(BUFSIZE);
|
||||
yxml_t x;
|
||||
yxml_init(&x, buf, BUFSIZE);
|
||||
```
|
||||
|
||||
The parsing state for an input document is remembered in the `yxml_t`
|
||||
structure. This structure needs to be allocated and initialized before parsing
|
||||
a new XML document.
|
||||
|
||||
Allocating space for the `yxml_t` structure is the responsibility of the
|
||||
application. Allocation can be done on the stack, but it is also possible to
|
||||
embed the struct inside a larger object or to allocate space for the struct
|
||||
separately.
|
||||
|
||||
`yxml_init()` takes a pointer to an (uninitialized) `yxml_t` struct as first
|
||||
argument and performs the necessary initialization. The two additional
|
||||
arguments specify a pointer to a buffer and the size of this buffer. The given
|
||||
buffer must be writable, but does not have to be initialized by the
|
||||
application.
|
||||
|
||||
The buffer is used internally by yxml to keep a stack of opened XML element
|
||||
names, property names and PI targets. The size of the buffer determines both
|
||||
the maximum depth in which XML elements can be nested and the maximum length of
|
||||
element names, property names and PI targets. Each name consumes
|
||||
`strlen(name)+1` bytes in the buffer, and the first byte of the buffer is
|
||||
reserved for the `\0` byte. This means that in order to parse an XML document
|
||||
with an element name of 100 bytes, a property name or PI target of 50 bytes and
|
||||
a nesting depth of 10 levels, the buffer must be at least
|
||||
`1+10*(100+1)+(50+1)=1062` bytes. Note that properties and PIs don't nest, so
|
||||
the `max(PI_name, property_name)` only needs to be counted once.
|
||||
|
||||
It is not currently possibly to dynamically grow the buffer while parsing, so
|
||||
it is important to choose a buffer size that is large enough to handle all the
|
||||
XML documents that you want to parse. Since element names, property names and
|
||||
PI targets are typically much shorter than in the previous example, a buffer
|
||||
size of 4 or 8 KiB will give enough headroom even for documents with deep
|
||||
nesting.
|
||||
|
||||
As a useful hack, it is possible to merge the memory for the `yxml_t` struct
|
||||
and the stack buffer in a single allocation:
|
||||
|
||||
```c
|
||||
yxml_t *x = malloc(sizeof(yxml_t) + BUFSIZE);
|
||||
yxml_init(x, x+1, BUFSIZE);
|
||||
```
|
||||
|
||||
This way, the complete parsing state can be passed around with a single
|
||||
pointer, and both the struct and the buffer can be freed with a single call to
|
||||
`free(x)`.
|
||||
|
||||
## Parsing
|
||||
|
||||
```c
|
||||
yxml_t *x; /* An initialized state */
|
||||
char *doc; /* The XML document as a zero-terminated string */
|
||||
for(; *doc; doc++) {
|
||||
yxml_ret_t r = yxml_parse(x, *doc);
|
||||
if(r < 0)
|
||||
exit(1); /* Handle error */
|
||||
/* Handle any tokens we are interested in */
|
||||
}
|
||||
```
|
||||
|
||||
The actual parsing of an XML document is facilitated by the `yxml_parse()`
|
||||
function. It accepts a pointer to an initialized `yxml_t` struct as first
|
||||
argument and a byte as second argument. The byte is passed as an `int`, and
|
||||
values in the range of -128 to 255 (both inclusive) are accepted. This way you
|
||||
can pass either `signed char` or `unsigned char` values, yxml will work fine
|
||||
with both. To parse a complete document, `yxml_parse()` needs to be called for
|
||||
each byte of the document in sequence, as done in the above example.
|
||||
|
||||
For each byte, `yxml_parse()` will return either _YXML\_OK_ (0), a token (>0)
|
||||
or an error (<0). _YXML\_OK_ is returned if the given byte has been
|
||||
parsed/consumed correctly but that otherwise nothing worthy of note has
|
||||
happened. The application should then continue processing and pass the next
|
||||
byte of the document.
|
||||
|
||||
### Public State Variables
|
||||
|
||||
After each call to `yxml_parse()`, a number of interesting fields in the
|
||||
`yxml_t` struct are updated. The fields documented here are part of the API,
|
||||
and are considered as extra return values of `yxml_parse()`. All of these
|
||||
fields should be considered read-only.
|
||||
|
||||
`char *elem;`
|
||||
: Name of the currently opened XML element. Points into the buffer given to
|
||||
`yxml_init()`. Described in ["Elements"](#elements).
|
||||
|
||||
`char *attr;`
|
||||
: Name of the currently opened attribute. Points into the buffer given to
|
||||
`yxml_init()`. Described in ["Attributes"](#attributes).
|
||||
|
||||
`char *pi;`
|
||||
: Target of the currently opened PI. Points into the buffer given to
|
||||
`yxml_init()`. Described in ["Processing Instructions"](#processing-instructions).
|
||||
|
||||
`char data[8];`
|
||||
: Character data of element contents, attribute values or PI contents. Described
|
||||
in ["Character Data"](#character-data).
|
||||
|
||||
`uint32_t line;`
|
||||
: Number of the line in the XML document that is currently being parsed.
|
||||
|
||||
`uint64_t byte;`
|
||||
: Byte offset into the current line the XML document.
|
||||
|
||||
`uint64_t total;`
|
||||
: Byte offset into the XML document.
|
||||
|
||||
The values of the _elem_, _attr_, _pi_ and _data_ elements depend on the
|
||||
parsing context, and only remain valid within that context. The exact contexts
|
||||
in which these fields contain valid information is described in their
|
||||
respective sections below.
|
||||
|
||||
The _line_, _byte_ and _total_ fields are mainly useful for error reporting.
|
||||
When `yxml_parse()` reports an error, these fields can be used to generate a
|
||||
useful error message. For example:
|
||||
|
||||
```c
|
||||
printf("Parsing error at %s:%"PRIu32":%"PRIu64" byte offset %"PRIu64",
|
||||
filename, x->line, x->byte, x->total);
|
||||
```
|
||||
|
||||
### Error Handling
|
||||
|
||||
Errors are not recoverable. No further calls to `yxml_parse()` or `yxml_eof()`
|
||||
should be performed on the same `yxml_t` struct. Re-initializing the same
|
||||
struct using `yxml_init()` to start parsing a new document is possible,
|
||||
however. The following error values may be returned by `yxml_parse()`:
|
||||
|
||||
YXML\_EREF
|
||||
: Invalid character or entity reference. E.g. `&whatever;` or `&#ABC;`.
|
||||
|
||||
YXML\_ECLOSE
|
||||
: Close tag does not match open tag. E.g. `<Tag> .. </SomeOtherTag>`.
|
||||
|
||||
YXML\_ESTACK
|
||||
: Stack overflow. This happens when the buffer given to `yxml_init()` was not
|
||||
large enough to parse this document. E.g. when elements are too deeply nested
|
||||
or an element name, attribute name or PI target is too long.
|
||||
|
||||
YXML\_ESYN
|
||||
: Miscellaneous syntax error.
|
||||
|
||||
## Handling Tokens
|
||||
|
||||
The `yxml_parse()` function will return tokens as they are found. When loading
|
||||
an XML document, it is important to know which tokens are returned in which
|
||||
situation and how to handle them.
|
||||
|
||||
The following graph shows the (simplified) state machine of the parser to
|
||||
illustrate the order in which tokens are returned. The labels on the edge
|
||||
indicate the tokens that are returned by `yxml_parse()`, with their `YXML_`
|
||||
prefix removed. The special return value `YXML_OK` and error returns are not
|
||||
displayed.
|
||||
|
||||

|
||||
|
||||
Tokens that the application is not interested in can be ignored safely. For
|
||||
example, if you are not interested in handling processing instructions, then
|
||||
the `YXML_PISTART`, `YXML_PICONTENT` and `YXML_PIEND` tokens can be handled
|
||||
exactly as if they were an alias for `YXML_OK`.
|
||||
|
||||
### Elements
|
||||
|
||||
The `YXML_ELEMSTART` and `YXML_ELEMEND` tokens are returned when an XML
|
||||
element is opened and closed, respectively. When `YXML_ELEMSTART` is returned,
|
||||
the _elem_ struct field will hold the name of the element. This field will be
|
||||
valid (i.e. keeps pointing to the name of the opened element) until the end of
|
||||
the attribute list. That is, until any token other than those described in
|
||||
["Attributes"](#attributes) is returned. Although the _elem_ pointer itself may be reused
|
||||
and modified while parsing the contents of the element, the buffer that _elem_
|
||||
points to will remain valid up to and including the corresponding
|
||||
`YXML_ELEMEND`.
|
||||
|
||||
Yxml will verify that elements properly nest and that the name of each closing
|
||||
tag properly matches that of the corresponding opening tag. The application may
|
||||
safely assume that each `YXML_ELEMSTART` is properly matched with a
|
||||
`YXML_ELEMEND`, or that otherwise an error is returned. Furthermore, only a
|
||||
single root element is allowed. When the root element is closed, no further
|
||||
`YXML_ELEMSTART` tokens will be returned.
|
||||
|
||||
No distinction is made between self-closing tags and elements with empty
|
||||
content. For example, both `<a/>` and `<a></a>` will result in the
|
||||
`YXML_ELEMSTART` token (with `elem="a"`) followed by `YXML_ELEMEND`.
|
||||
|
||||
Element contents are returned in the form of the `YXML_CONTENT` token and the
|
||||
_data_ field. This is described in more detail in ["Character
|
||||
Data"](#character-data).
|
||||
|
||||
### Attributes
|
||||
|
||||
Element attributes are passed using the `YXML_ATTRSTART`, `YXML_ATTRVAL` and
|
||||
`YXML_ATTREND` tokens. The name of the attribute is available in the _attr_
|
||||
field, which is available when `YXML_ATTRSTART` is returned and valid up to
|
||||
and including the next `YXML_ATTREND`.
|
||||
|
||||
Yxml does not verify that attribute names are unique within a single element.
|
||||
It is thus possible that the same attribute will appear twice, possibly with a
|
||||
different value. The correct way to handle this situation is to stop parsing
|
||||
the rest of the document and to report an error, but if the application is not
|
||||
interested in all attributes, detecting duplicates in them may complicate the
|
||||
code and possibly even introduce security vulnerabilities (e.g. algorithmic
|
||||
complexity attacks in a hash table). As such, the best solution is to report an
|
||||
error when you can easily detect a duplicate attribute, but ignore duplicates
|
||||
that require more effort to be detected.
|
||||
|
||||
The attribute value is returned with the `YXML_ATTRVAL` token and the _data_
|
||||
field. This is described in more detail in ["Character Data"](#character-data).
|
||||
|
||||
### Processing Instructions
|
||||
|
||||
Processing instructions are passed in similar fashion to attributes, and are
|
||||
passed using `YXML_PISTART`, `YXML_PICONTENT` and `YXML_PIEND`. The target of
|
||||
the PI is available in the _pi_ field after `YXML_PISTART` and remains valid up
|
||||
to (but excluding) the next `YXML_PIEND` token.
|
||||
|
||||
PI contents are returned as `YXML_PICONTENT` tokens and using the _data_ field,
|
||||
described in more detail in ["Character Data"](#character-data).
|
||||
|
||||
### Character Data
|
||||
|
||||
Element contents (`YXML_CONTENT`), attribute values (`YXML_ATTRVAL`) and PI
|
||||
contents (`YXML_PICONTENT`) are all passed to the application in small chunks
|
||||
through the _data_ field. Each time that `yxml_parse()` returns one of these
|
||||
tokens, the _data_ field will contain one or more bytes of the element
|
||||
contents, attribute value or PI content. The string is zero-terminated, and its
|
||||
value is only valid until the next call to `yxml_parse()`.
|
||||
|
||||
Typically only a single byte is returned after each call, but multiple bytes
|
||||
can be returned in the following special cases:
|
||||
|
||||
- Character references outside of the ASCII character range. When a character
|
||||
reference is encountered in element contents or in an attribute value, it is
|
||||
automatically replaced with the referenced character. For example, the XML
|
||||
string `/` is replaced with the single character "/". If the character
|
||||
value is above 127, its value is encoded in UTF-8 and then returned as a
|
||||
multi-byte string in the _data_ field. For example, the character reference
|
||||
`ç` is returned as the C string "\\xc3\\xa9", which is the UTF-8
|
||||
encoding for the character "é". Character references are not expanded in PI
|
||||
contents.
|
||||
- The special character "\]" in CDATA sections. When the "\]" character is
|
||||
encountered inside a CDATA section, yxml can't immediately return it to the
|
||||
application because it does not know whether the character is part of the
|
||||
CDATA ending or whether it is still part of its contents. So it remembers the
|
||||
character for the next call to `yxml_parse()`, and if it then turns out that
|
||||
the character was part of the CDATA contents, it returns both the "\]"
|
||||
character and the following byte in the same _data_ string. Similarly, if two
|
||||
"\]" characters appear in sequence as part of the CDATA content, then the two
|
||||
characters are returned in a single _data_ string together with the byte that
|
||||
follows. CDATA sections only appear in element contents, so this does not
|
||||
happen in attribute values or PI contents.
|
||||
- The special character "?" in PI contents. This is similar to the issue with
|
||||
"\]" characters in CDATA sections. Yxml remembers a "?" character while
|
||||
parsing a PI, and then returns it together with the byte following it if it
|
||||
turned out to be part of the PI contents.
|
||||
|
||||
Note that `yxml_parse()` operates on bytes rather than characters. If the
|
||||
document is encoded in a multi-byte character encoding such as UTF-8, then each
|
||||
Unicode character that occupies more than a single byte will be broken up and
|
||||
its bytes processed individually. As a result, the bytes returned in the
|
||||
_data_ field may not necessarily represent a single Unicode character. To
|
||||
ensure that multi-byte characters are not broken up, the application can
|
||||
concatenate multiple data tokens to a single buffer before attempting to do
|
||||
further processing on the result.
|
||||
|
||||
To make processing easier, an application may want to combine all the tokens
|
||||
into a single buffer. This can be easily implemented as follows:
|
||||
|
||||
```c
|
||||
SomeString attrval;
|
||||
while(..) {
|
||||
yxml_ret_t r = yxml_parse(x, ch);
|
||||
switch(r) {
|
||||
case YXML_ATTRSTART:
|
||||
somestring_initialize(attrval);
|
||||
break;
|
||||
case YXML_ATTRVAL:
|
||||
somestring_append(attrval, x->data);
|
||||
break;
|
||||
case YXML_ATTREND:
|
||||
/* Now we have a full attribute. Its name is in x->attr, and its value is
|
||||
* in the string 'attrval'. */
|
||||
somestring_reset(attrval);
|
||||
break;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The `SomeString` type and `somestring_` functions are stubs for any string
|
||||
handling library of your choosing. When using Glib, for example, one could use
|
||||
the [GString](https://developer.gnome.org/glib/stable/glib-Strings.html)
|
||||
type and the `g_string_new()`, `g_string_append()` and `g_string_free()`
|
||||
functions. For a more lighter-weight string library there is also
|
||||
[kstring.h in klib](https://github.com/attractivechaos/klib), but the
|
||||
functionality required in the above example can easily be implemented in a few
|
||||
lines of pure C, too.
|
||||
|
||||
When buffering data into an ever-growing string, as done in the previous
|
||||
example, one should be careful to protect against memory exhaustion. This can
|
||||
be done trivially by limiting the size of the total XML document or the maximum
|
||||
length of the buffer. If you want to extract information from an XML document
|
||||
that might not fit into memory, but you know that the information you care
|
||||
about is limited in size and is only stored in specific attributes or elements,
|
||||
you can choose to ignore data you don't care about. For example, if you only
|
||||
want to extract the "Size" attribute and you know that its value is never
|
||||
larger than 63 bytes, you can limit your code to read only that value and store
|
||||
it into a small pre-allocated buffer:
|
||||
|
||||
```c
|
||||
char sizebuf[64], *sizecur = NULL, *tmp;
|
||||
while(..) {
|
||||
yxml_ret_t r = yxml_parse(x, ch);
|
||||
switch(r) {
|
||||
case YXML_ATTRSTART:
|
||||
if(strcmp(x->attr, "Size") == 0)
|
||||
sizecur = sizebuf;
|
||||
break;
|
||||
case YXML_ATTRVAL:
|
||||
if(!sizecur) /* Are we in the "Size" attribute? */
|
||||
break;
|
||||
/* Append x->data to sizecur while there is space */
|
||||
tmp = x->data;
|
||||
while(*tmp && sizecur < sizebuf+sizeof(sizebuf))
|
||||
*(sizecur++) = *(tmp++);
|
||||
if(sizecur == sizebuf+sizeof(sizebuf))
|
||||
exit(1); /* Too long attribute value, handle error */
|
||||
*sizecur = 0;
|
||||
break;
|
||||
case YXML_ATTREND:
|
||||
if(sizecur) {
|
||||
/* Now we have the value of the "Size" attribute in sizebuf */
|
||||
sizecur = NULL;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Finalization
|
||||
|
||||
```c
|
||||
yxml_t *x; /* An initialized state */
|
||||
yxml_ret_t r = yxml_eof(x);
|
||||
if(r < 0)
|
||||
exit(1); /* Handle error */
|
||||
else
|
||||
/* No errors in the XML document */
|
||||
```
|
||||
|
||||
Because `yxml_parse()` does not know when the end of the XML document has been
|
||||
reached, it is unable to detect certain errors in the document. This is why,
|
||||
after successfully parsing a complete document with `yxml_parse()`, the
|
||||
application should call `yxml_eof()` to perform some extra checks.
|
||||
|
||||
`yxml_eof()` will return `YXML_OK` if the parsed XML document is well-formed,
|
||||
`YXML_EEOF` otherwise. The following errors are not detected by
|
||||
`yxml_parse()` but will result in an error on `yxml_eof()`:
|
||||
|
||||
- The XML document did not contain a root element (e.g. an empty file).
|
||||
- The XML root element has not been closed (e.g. "`<a> ..`").
|
||||
- The XML document ended in the middle of a comment or PI (e.g.
|
||||
"`<a/><!-- ..`").
|
||||
|
||||
## Utility functions
|
||||
|
||||
```c
|
||||
size_t yxml_symlen(yxml_t *, const char *);
|
||||
```
|
||||
|
||||
`yxml_symlen()` returns the length of the element name (`x->elem`), attribute
|
||||
name (`x->attr`), or PI name (`x->pi`). When used correctly, it gives the same
|
||||
result as `strlen()`, except without having to scan through the string. This
|
||||
function should **ONLY** be used directly after the `YXML_ELEMSTART`,
|
||||
`YXML_ATTRSTART` or `YXML_PISTART` (respectively) tokens have been returned by
|
||||
`yxml_parse()`, calling this function at any other time may not give the
|
||||
correct results. This function should **NOT** be used on strings other than
|
||||
`x->elem`, `x->attr` or `x->pi`.
|
||||
0
index.cgi
Executable file → Normal file
0
index.cgi
Executable file → Normal file
19
mkchangelog.pl
Executable file
19
mkchangelog.pl
Executable file
|
|
@ -0,0 +1,19 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
($project) = (shift =~ /^([^ \/]+)/);
|
||||
$title = shift;
|
||||
|
||||
print "---\ntitle: $title\npage-type: changelog\n...\n";
|
||||
|
||||
for (split /\n\n/, join '', <>) {
|
||||
s/^([0-9]+\.[0-9]+(?:\.[0-9]+)?)\s+-\s+([0-9]{4}-[0-9]{2}-[0-9]{2})//;
|
||||
print "\n- **$1** - $2";
|
||||
$dl = "$project-$1.tar.gz";
|
||||
print " - [dllink $dl]" if -f "pub/download/$dl";
|
||||
print "\n";
|
||||
|
||||
for (split /\r?\n\s+-\s+/) {
|
||||
s/([*_\\])/\\$1/g;
|
||||
print " - $_\n" if $_;
|
||||
}
|
||||
}
|
||||
55
mkpod.pl
Executable file
55
mkpod.pl
Executable file
|
|
@ -0,0 +1,55 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
package POD2HTML;
|
||||
|
||||
use Pod::Simple::XHTML;
|
||||
@ISA = qw/Pod::Simple::XHTML/;
|
||||
|
||||
sub new {
|
||||
bless shift->SUPER::new(@_), __PACKAGE__;
|
||||
}
|
||||
|
||||
sub resolve_pod_page_link {
|
||||
(undef, $page, $section) = @_;
|
||||
|
||||
$lnk = {
|
||||
'TUWF' => '/tuwf/man',
|
||||
'TUWF::DB' => '/tuwf/man/db',
|
||||
'TUWF::Intro' => '/tuwf/man/intro',
|
||||
'TUWF::Misc' => '/tuwf/man/misc',
|
||||
'TUWF::Request' => '/tuwf/man/request',
|
||||
'TUWF::Response' => '/tuwf/man/response',
|
||||
'TUWF::XML' => '/tuwf/man/xml',
|
||||
'TUWF::Validate' => '/tuwf/man/validate',
|
||||
'' => '',
|
||||
}->{$page||''} // "https://metacpan.org/pod/$page";
|
||||
$lnk .= '#'.($section =~ s/ /-/gr) if $section;
|
||||
$lnk
|
||||
}
|
||||
|
||||
sub resolve_man_page_link {
|
||||
(undef, $page, undef) = @_;
|
||||
|
||||
my $lnk = {qw{
|
||||
globsterctl(1) /globster/ctl
|
||||
globster-launch(1) /globster/launch
|
||||
globster(1) /globster/daemon
|
||||
globster-api(7) /globster/api
|
||||
ncdu(1) /ncdu/man
|
||||
}}->{$page||''} || ($page =~ /(.+)\((.)\)/ and "https://manned.org/$1.$2");
|
||||
$lnk
|
||||
}
|
||||
|
||||
|
||||
$p = POD2HTML->new();
|
||||
$html = '';
|
||||
#$p->anchor_items(1); # pandoc doesn't support this :(
|
||||
$p->output_string(\$html);
|
||||
$p->parse_file(\*STDIN);
|
||||
|
||||
# Some post-processing to improve the pandoc-generated markdown
|
||||
$html =~ s/^ //mg;
|
||||
$html =~ s/<code> //g;
|
||||
$html =~ s/<li><p>/<li>/g;
|
||||
|
||||
print $html;
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue