From 9c8ce3f782f6d3c24cf6eabba98a46f91cdd9d0c Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 28 Jan 2025 14:49:29 +0100 Subject: [PATCH] Add benchmarking tool + improve integer formatting performance Typical priorities: make it fast before fixing all the bugs. *shrug* --- FU/Benchmarks.pod | 101 ++++++++++++++++++++++++++++++++++++ FU/Util.pod | 7 +-- bench.PL | 129 ++++++++++++++++++++++++++++++++++++++++++++++ c/jsonfmt.c | 35 ++++++++++--- 4 files changed, 259 insertions(+), 13 deletions(-) create mode 100644 FU/Benchmarks.pod create mode 100755 bench.PL diff --git a/FU/Benchmarks.pod b/FU/Benchmarks.pod new file mode 100644 index 0000000..125373f --- /dev/null +++ b/FU/Benchmarks.pod @@ -0,0 +1,101 @@ +=head1 NAME + +FU::Benchmarks - A bunch of automated benchmark results. + +=head1 DESCRIPTION + +This file is automatically generated from 'bench.pl' in the L distribution. +These benchmarks compare performance of some FU functionality against similar +modules found on CPAN. + +=head1 CONTEXT + +These benchmarks were performed on 2025-01-28 with perl v5.40.0 on x86_64-linux. + +The following module versions were used: + +=over + +=item L 0.1 + +=item L 4.38 + +=item L 4.16 + +=item L 4.03 + +=item L 1.06 + + + +=back + +=head1 BENCHMARKS + +=head2 JSON Formatting + +These benchmarks run on large-ish arrays with repeated values. JSON encoding is +sufficiently fast that Perl function calling overhead tends to dominate for +smaller inputs, but I don't find that overhead very interesting. Other modules +will likely do better in benchmarks on small inputs. + +Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the +SIMD parts are only used for parsing. + +API object from L documentation. + + JSON::PP 5370/s + Cpanel::JSON::XS 112211/s + JSON::SIMD 128743/s + JSON::XS 130606/s + FU::Util 130813/s + +Small integers + + JSON::PP 113/s + Cpanel::JSON::XS 7262/s + JSON::SIMD 8217/s + JSON::XS 8142/s + FU::Util 9154/s + +Large integers + + JSON::PP 2136/s + Cpanel::JSON::XS 29220/s + JSON::SIMD 35834/s + JSON::XS 35879/s + FU::Util 117838/s + +ASCII strings + + JSON::PP 2893/s + Cpanel::JSON::XS 118698/s + JSON::SIMD 137235/s + JSON::XS 135933/s + FU::Util 172207/s + +Unicode strings + + JSON::PP 5186/s + Cpanel::JSON::XS 97154/s + JSON::SIMD 109441/s + JSON::XS 105691/s + FU::Util 106058/s + +String escaping (few) + + JSON::PP 4280/s + Cpanel::JSON::XS 140105/s + JSON::SIMD 161231/s + JSON::XS 160077/s + FU::Util 182074/s + +String escaping (many) + + JSON::PP 2235/s + Cpanel::JSON::XS 144829/s + JSON::SIMD 161006/s + JSON::XS 161246/s + FU::Util 136568/s + + diff --git a/FU/Util.pod b/FU/Util.pod index d543796..e82bb1e 100644 --- a/FU/Util.pod +++ b/FU/Util.pod @@ -17,17 +17,14 @@ doesn't believe in the concept of a "batteries included" standard library. This module comes with a custom C-based JSON parser and formatter. These functions conform strictly to L, -non-standard extensions are not supported and never will be. +non-standard extensions are not supported and never will be. It also happens to +be pretty fast, refer to L for some numbers. JSON booleans are decoded into C and C. When formatting, those builtin constants are the I recognized boolean values - alternative representations such as C and C are not recognized and attempting to format such values will croak. -I - -I - =over =item json_format($scalar, %options) diff --git a/bench.PL b/bench.PL new file mode 100755 index 0000000..06e49da --- /dev/null +++ b/bench.PL @@ -0,0 +1,129 @@ +#!/usr/bin/perl + +# Can be invoked as: +# ./bench.PL # (or 'make bench') generates FU/Benchmarks.pod +# ./bench.PL regex # run benchmark(s) matching the regex + +use v5.36; +use builtin 'true', 'false'; +use Benchmark ':hireswallclock', 'timethis'; +use Config; + +my $modules = join '', map sprintf("=item L<%s> %s\n\n", $_, eval "require $_; \$${_}::VERSION"), qw/ + FU + Cpanel::JSON::XS + JSON::PP + JSON::XS + JSON::SIMD +/; + + +my(%bench, @bench); +sub bench($name, @arg) { + push @bench, $name; + $bench{$name} = \@arg; +} + +sub runbench($text, @f) { + print "$text\n\n"; + + # TODO: Should include variance; factor-compared-to-slowest might be cool too + for my ($t, $f) (@f) { + my $o = timethis -1, $f, 0, 'none'; + printf " %18s%10d/s\n", $t, $o->iters/$o->real; + } + print "\n"; +} + +sub runbenches($re) { + runbench $bench{$_}->@* for grep /$re/, @bench; +} + + + + +# Use similar options for fair comparisons. +my $j_cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed; +my $j_pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed; +my $j_xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed; +my $j_si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed; +use FU::Util 'json_format'; + +sub jsonfmt($name, $text, $data) { + bench "jsonfmt/$name", $text, + 'JSON::PP', sub { $j_pp->encode($data) }, + 'Cpanel::JSON::XS',sub { $j_cp->encode($data) }, + 'JSON::SIMD', sub { $j_si->encode($data) }, + 'JSON::XS', sub { $j_xs->encode($data) }, + 'FU::Util', sub { json_format $data }; +} + +# From JSON::XS POD. +jsonfmt api => 'API object from L documentation.', + [ map +{method => 'handleMessage', params => ['user1','we were just talking'], 'id' => undef, 'array' => [1,11,234,-5,1e5,1e7,1,0]}, 1..10 ]; + +jsonfmt ints => 'Small integers', [ -5000..5000 ]; +jsonfmt intl => 'Large integers', [ map { my $n=$_; map +($n+1<<$_), 10..60 } 1..10 ]; +jsonfmt strs => 'ASCII strings', [ map +('hello, world', 'one more string', 'another string'), 1..100 ]; +jsonfmt stru => 'Unicode strings', do { use utf8; + [ map +('グリザイアの果実 -LE FRUIT DE LA GRISAIA-', '💩', 'Я люблю нічого не робити'), 1..50 ]; +}; +jsonfmt stres => 'String escaping (few)', [ map 'This string needs to "be escaped" a little bit', 1..100 ]; +jsonfmt strel => 'String escaping (many)', [ map "This \" \\ needs \b\x01\x02\x03\x04 more", 1..100 ]; + + + + + + +if (!@ARGV || $ARGV[0] eq 'bench') { + chomp(my $date = `date +%F`); + print "Writing to FU/Benchmarks.pod...\n"; + open my $F, '>FU/Benchmarks.pod' or die $!; + select $F; + while () { + s/^:modules/$modules/; + s/^:benches (.+)/runbenches $1/e; + s/^:context/These benchmarks were performed on $date with perl $^V on $Config{archname}./; + print; + } +} else { + runbenches $_ for @ARGV; +} + +__DATA__ +=head1 NAME + +FU::Benchmarks - A bunch of automated benchmark results. + +=head1 DESCRIPTION + +This file is automatically generated from 'bench.pl' in the L distribution. +These benchmarks compare performance of some FU functionality against similar +modules found on CPAN. + +=head1 CONTEXT + +:context + +The following module versions were used: + +=over + +:modules + +=back + +=head1 BENCHMARKS + +=head2 JSON Formatting + +These benchmarks run on large-ish arrays with repeated values. JSON encoding is +sufficiently fast that Perl function calling overhead tends to dominate for +smaller inputs, but I don't find that overhead very interesting. Other modules +will likely do better in benchmarks on small inputs. + +Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the +SIMD parts are only used for parsing. + +:benches ^jsonfmt diff --git a/c/jsonfmt.c b/c/jsonfmt.c index c7c843f..0285c57 100644 --- a/c/jsonfmt.c +++ b/c/jsonfmt.c @@ -5,7 +5,11 @@ static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) { const unsigned char *str = (const unsigned char *)stri; unsigned char x = 0; - /* Validate entire string for conformance if this is flagged as a utf8 string, this lets us be lazy further on. */ + /* Validate entire string for conformance if this is flagged as a utf8 + * string, this lets us be lazy further on. + * Commenting this out doubles the performance for formatting unicode + * strings, I suspect there's room for optimizations in + * is_c9strict_utf8_string(). */ if (utf8 && !is_c9strict_utf8_string(str, len)) { return; /* TODO: Throw error. */ } @@ -63,9 +67,22 @@ static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) { fustr_write(out, "\"", 1); } +/* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */ +static const char fujson_digits[] = + "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + static void fujson_fmt_int(fustr *out, SV *val) { char buf[32]; - size_t idx = 32; + char *r = buf+31; int neg = 0; IV iv; UV uv; @@ -83,13 +100,15 @@ static void fujson_fmt_int(fustr *out, SV *val) { return; } - while (uv > 0) { - /* TODO: can use a lookup table to optimize for 0 - 100; need benchmark */ - buf[--idx] = '0' + (uv % 10); - uv /= 10; + while (uv >= 10) { + r -= 2; + memcpy(r, fujson_digits + ((uv % 100)<<1), 2); + uv /= 100; } - if (neg) buf[--idx] = '-'; - fustr_write(out, buf+idx, sizeof buf - idx); + if (uv > 0) *(--r) = '0' + (uv % 10); + if (neg) *(--r) = '-'; + uv = 31 - (r - buf); + fustr_write(out, r, uv); } static void fujson_fmt_av(fustr *out, AV *av) {