diff --git a/dat/dump-insbench b/dat/dump-insbench new file mode 100644 index 0000000..4b3d744 --- /dev/null +++ b/dat/dump-insbench @@ -0,0 +1,84 @@ +=pod + +I<2013-07-05> - One of my favourite data structures in C is the ordered vector +(or array, whatever you call them). Incredibly simple to implement, very low +memory overhead, and can provide O(log n) lookup with a simple binary search. +However, ordered vectors have one very weak point: insertion and deletion of +items is O(n). For small n that doesn't really matter, but if the number of +items in the list can grow a bit, you may run into performance issues. If +you're not careful, this could even turn your ordered vector into an attack +vector (apologies for the terrible pun). + +My goal with this benchmark is to get a feeling on how, exactly, insertion +performance behaves with an ordered vector. What values of n are "small"? And +how much worse does insertion performance get compared to more complex data +structures? + +For comparison, I chose the B-tree and hash table implementations from +L (from commit fff70758, to be +precise). My goal wasn't to benchmark the performance of different +implementations, so I simply chose two implementations that I suspect are among +the fastest. The vector implementation in the benchmarks is my own creation: +L from +the L code base. + +B L + + +=head2 Best case & worst case + +For a start, I decided to benchmark the best and worst case performance of +inserting elements into a vector. The best case happens when inserting all +items at the end of the vector, the worst case when inserting them in front. +The B-tree and hash table benchmarks provided for comparison have all items +inserted in order. + +I'm cheating here with the vector implementation, because all elements are +inserted in the list without first finding out the position with a binary +search. Actual performance will be thus be a bit worse, depending on whether +the final application needs that binary search or whether it can assume its +input to be already sorted. + +L<[img graph insbench-bench-thumb.png ]|http://dev.yorhel.nl/img/insbench-bench.png> + +Gnuplot script: (The awk(ward) part can likely be done natively in gnuplot as +well, but I was too lazy to figure out how) + + set terminal png size 1000, 1500 + set output "bench.png" + set logscale xy + set xlabel "number of items" + set ylabel "average time per insert (ms)" + set grid mxtics xtics mytics ytics + plot "< awk '{print $1, $2/$1*1000}' bench-vec" title 'vector, worst case',\ + "< awk '{print $1, $2/$1*1000}' bench-best" title 'vector, best case',\ + "< awk '{print $1, $2/$1*1000}' bench-hash" title 'khash',\ + "< awk '{print $1, $2/$1*1000}' bench-btree" title 'kbtree' + + +=head2 Average case + +For the second benchmark I inserted values created with C, which should +be a more accurate simulation of some real-world applications. This time I'm +not cheating with the vector implementation, a binary search is performed in +order to insert the items in the correct location. + +L<[img graph insbench-rand-thumb.png ]|http://dev.yorhel.nl/img/insbench-rand.png> + + set terminal png size 1000, 1500 + set output "bench-rand.png" + set logscale xy + set xlabel "number of items" + set ylabel "average time per insert (ms)" + set grid mxtics xtics mytics ytics + plot "< awk '{print $1, $2/$1*1000}' rand-vec" title 'vector',\ + "< awk '{print $1, $2/$1*1000}' rand-hash" title 'khash',\ + "< awk '{print $1, $2/$1*1000}' rand-btree" title 'kbtree' + + +=head2 Benchmarking setup + +All benchmarks were performed on a 3 GHz Core Duo E8400 with a 6 MiB cache. +Compiled with the Gentoo-provided gcc 4.6.3 at -O3, linked against glibc 2.15, +and run on a Linux 3.8.13-gentoo kernel. Boring details, but somehow good to +document. diff --git a/dat/dump-nccolour b/dat/dump-nccolour index e28cf93..3abf6e8 100644 --- a/dat/dump-nccolour +++ b/dat/dump-nccolour @@ -97,3 +97,7 @@ program, which also explains what each column means. =item Mac OS X, iTerm2 [img scr nccol-osx-iterm2.png ] + +=item CentOS 6.4 + +[img scr nccol-centos64.png ] diff --git a/dat/ylib b/dat/ylib new file mode 120000 index 0000000..b6fb969 --- /dev/null +++ b/dat/ylib @@ -0,0 +1 @@ +../../ylib/ \ No newline at end of file diff --git a/img/globster.png b/img/globster.png new file mode 100644 index 0000000..8219997 Binary files /dev/null and b/img/globster.png differ diff --git a/img/insbench-bench-thumb.png b/img/insbench-bench-thumb.png new file mode 100644 index 0000000..f085519 Binary files /dev/null and b/img/insbench-bench-thumb.png differ diff --git a/img/insbench-bench.png b/img/insbench-bench.png new file mode 100644 index 0000000..ac6d6bb Binary files /dev/null and b/img/insbench-bench.png differ diff --git a/img/insbench-rand-thumb.png b/img/insbench-rand-thumb.png new file mode 100644 index 0000000..6e5e2eb Binary files /dev/null and b/img/insbench-rand-thumb.png differ diff --git a/img/insbench-rand.png b/img/insbench-rand.png new file mode 100644 index 0000000..af585c0 Binary files /dev/null and b/img/insbench-rand.png differ diff --git a/img/nccol-centos64.png b/img/nccol-centos64.png new file mode 100644 index 0000000..402da97 Binary files /dev/null and b/img/nccol-centos64.png differ diff --git a/index.cgi b/index.cgi index 5e5e257..5b2bf85 100755 --- a/index.cgi +++ b/index.cgi @@ -12,6 +12,7 @@ BEGIN { ($ROOT = abs_path $0) =~ s{index\.cgi$}{}; } my @changes = ( + [ '2013-07-05', '/dump/insbench', 'Documented a little data structure benchmark' ], [ '2013-06-15', '/ncdc', 'ncdc 1.17 released' ], [ '2013-05-09', '/ncdu', 'ncdu 1.10 released' ], [ '2013-04-04', '/ylib', 'Created a page for Ylib' ], @@ -103,6 +104,7 @@ TUWF::register( qr{dump/awshrink} => sub { podpage(shift, 'dump-awshrink', 'dump', 'awshrink', 'AWStats Data File Shrinker') }, qr{dump/grenamr} => sub { podpage(shift, 'dump-grenamr', 'dump', 'grenamr', 'GTK+ Mass File Renamer') }, qr{dump/nccolour} => sub { podpage(shift, 'dump-nccolour', 'dump', 'nccolour', 'Colours in NCurses') }, + qr{dump/insbench} => sub { podpage(shift, 'dump-insbench', 'dump', 'insbench', 'Insertion Performance Benchmarks') }, qr{feed\.atom} => \&atom, qr{(ncdc|ncdu|globster)/bug} => \&bug_list, qr{(ncdc|ncdu|globster)/bug/post} => \&bug_post, @@ -527,6 +529,7 @@ sub htmlMenu { $m->('/dump/awshrink','AWShrink', $o{page} eq 'dump' && $o{sec} eq 'awshrink'); $m->('/dump/grenamr', 'Grenamr', $o{page} eq 'dump' && $o{sec} eq 'grenamr'); $m->('/dump/nccolour','NC-Colour', $o{page} eq 'dump' && $o{sec} eq 'nccolour'); + $m->('/dump/insbench','Ins-bench', $o{page} eq 'dump' && $o{sec} eq 'insbench'); }); } if($o{spec}{$o{page}}) {