Add benchmarking tool + improve integer formatting performance

Typical priorities: make it fast before fixing all the bugs. *shrug*
This commit is contained in:
Yorhel 2025-01-28 14:49:29 +01:00
parent c16a9fa493
commit 9c8ce3f782
4 changed files with 259 additions and 13 deletions

101
FU/Benchmarks.pod Normal file
View file

@ -0,0 +1,101 @@
=head1 NAME
FU::Benchmarks - A bunch of automated benchmark results.
=head1 DESCRIPTION
This file is automatically generated from 'bench.pl' in the L<FU> distribution.
These benchmarks compare performance of some FU functionality against similar
modules found on CPAN.
=head1 CONTEXT
These benchmarks were performed on 2025-01-28 with perl v5.40.0 on x86_64-linux.
The following module versions were used:
=over
=item L<FU> 0.1
=item L<Cpanel::JSON::XS> 4.38
=item L<JSON::PP> 4.16
=item L<JSON::XS> 4.03
=item L<JSON::SIMD> 1.06
=back
=head1 BENCHMARKS
=head2 JSON Formatting
These benchmarks run on large-ish arrays with repeated values. JSON encoding is
sufficiently fast that Perl function calling overhead tends to dominate for
smaller inputs, but I don't find that overhead very interesting. Other modules
will likely do better in benchmarks on small inputs.
Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the
SIMD parts are only used for parsing.
API object from L<JSON::XS> documentation.
JSON::PP 5370/s
Cpanel::JSON::XS 112211/s
JSON::SIMD 128743/s
JSON::XS 130606/s
FU::Util 130813/s
Small integers
JSON::PP 113/s
Cpanel::JSON::XS 7262/s
JSON::SIMD 8217/s
JSON::XS 8142/s
FU::Util 9154/s
Large integers
JSON::PP 2136/s
Cpanel::JSON::XS 29220/s
JSON::SIMD 35834/s
JSON::XS 35879/s
FU::Util 117838/s
ASCII strings
JSON::PP 2893/s
Cpanel::JSON::XS 118698/s
JSON::SIMD 137235/s
JSON::XS 135933/s
FU::Util 172207/s
Unicode strings
JSON::PP 5186/s
Cpanel::JSON::XS 97154/s
JSON::SIMD 109441/s
JSON::XS 105691/s
FU::Util 106058/s
String escaping (few)
JSON::PP 4280/s
Cpanel::JSON::XS 140105/s
JSON::SIMD 161231/s
JSON::XS 160077/s
FU::Util 182074/s
String escaping (many)
JSON::PP 2235/s
Cpanel::JSON::XS 144829/s
JSON::SIMD 161006/s
JSON::XS 161246/s
FU::Util 136568/s

View file

@ -17,17 +17,14 @@ doesn't believe in the concept of a "batteries included" standard library.
This module comes with a custom C-based JSON parser and formatter. These
functions conform strictly to L<RFC-8259|https://tools.ietf.org/html/rfc8259>,
non-standard extensions are not supported and never will be.
non-standard extensions are not supported and never will be. It also happens to
be pretty fast, refer to L<FU::Benchmarks> for some numbers.
JSON booleans are decoded into C<builtin::true> and C<builtin::false>. When
formatting, those builtin constants are the I<only> recognized boolean values -
alternative representations such as C<JSON::PP::true> and C<JSON::PP::false>
are not recognized and attempting to format such values will croak.
I<TODO: point to benchmarks.>
I<TODO: FU::JSON wrapper with somewhat-compatible JSON::{PP,XS} API>
=over
=item json_format($scalar, %options)

129
bench.PL Executable file
View file

@ -0,0 +1,129 @@
#!/usr/bin/perl
# Can be invoked as:
# ./bench.PL # (or 'make bench') generates FU/Benchmarks.pod
# ./bench.PL regex # run benchmark(s) matching the regex
use v5.36;
use builtin 'true', 'false';
use Benchmark ':hireswallclock', 'timethis';
use Config;
my $modules = join '', map sprintf("=item L<%s> %s\n\n", $_, eval "require $_; \$${_}::VERSION"), qw/
FU
Cpanel::JSON::XS
JSON::PP
JSON::XS
JSON::SIMD
/;
my(%bench, @bench);
sub bench($name, @arg) {
push @bench, $name;
$bench{$name} = \@arg;
}
sub runbench($text, @f) {
print "$text\n\n";
# TODO: Should include variance; factor-compared-to-slowest might be cool too
for my ($t, $f) (@f) {
my $o = timethis -1, $f, 0, 'none';
printf " %18s%10d/s\n", $t, $o->iters/$o->real;
}
print "\n";
}
sub runbenches($re) {
runbench $bench{$_}->@* for grep /$re/, @bench;
}
# Use similar options for fair comparisons.
my $j_cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed;
my $j_pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed;
my $j_xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed;
my $j_si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed;
use FU::Util 'json_format';
sub jsonfmt($name, $text, $data) {
bench "jsonfmt/$name", $text,
'JSON::PP', sub { $j_pp->encode($data) },
'Cpanel::JSON::XS',sub { $j_cp->encode($data) },
'JSON::SIMD', sub { $j_si->encode($data) },
'JSON::XS', sub { $j_xs->encode($data) },
'FU::Util', sub { json_format $data };
}
# From JSON::XS POD.
jsonfmt api => 'API object from L<JSON::XS> documentation.',
[ map +{method => 'handleMessage', params => ['user1','we were just talking'], 'id' => undef, 'array' => [1,11,234,-5,1e5,1e7,1,0]}, 1..10 ];
jsonfmt ints => 'Small integers', [ -5000..5000 ];
jsonfmt intl => 'Large integers', [ map { my $n=$_; map +($n+1<<$_), 10..60 } 1..10 ];
jsonfmt strs => 'ASCII strings', [ map +('hello, world', 'one more string', 'another string'), 1..100 ];
jsonfmt stru => 'Unicode strings', do { use utf8;
[ map +('グリザイアの果実 -LE FRUIT DE LA GRISAIA-', '💩', 'Я люблю нічого не робити'), 1..50 ];
};
jsonfmt stres => 'String escaping (few)', [ map 'This string needs to "be escaped" a little bit', 1..100 ];
jsonfmt strel => 'String escaping (many)', [ map "This \" \\ needs \b\x01\x02\x03\x04 more", 1..100 ];
if (!@ARGV || $ARGV[0] eq 'bench') {
chomp(my $date = `date +%F`);
print "Writing to FU/Benchmarks.pod...\n";
open my $F, '>FU/Benchmarks.pod' or die $!;
select $F;
while (<DATA>) {
s/^:modules/$modules/;
s/^:benches (.+)/runbenches $1/e;
s/^:context/These benchmarks were performed on $date with perl $^V on $Config{archname}./;
print;
}
} else {
runbenches $_ for @ARGV;
}
__DATA__
=head1 NAME
FU::Benchmarks - A bunch of automated benchmark results.
=head1 DESCRIPTION
This file is automatically generated from 'bench.pl' in the L<FU> distribution.
These benchmarks compare performance of some FU functionality against similar
modules found on CPAN.
=head1 CONTEXT
:context
The following module versions were used:
=over
:modules
=back
=head1 BENCHMARKS
=head2 JSON Formatting
These benchmarks run on large-ish arrays with repeated values. JSON encoding is
sufficiently fast that Perl function calling overhead tends to dominate for
smaller inputs, but I don't find that overhead very interesting. Other modules
will likely do better in benchmarks on small inputs.
Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the
SIMD parts are only used for parsing.
:benches ^jsonfmt

View file

@ -5,7 +5,11 @@ static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) {
const unsigned char *str = (const unsigned char *)stri;
unsigned char x = 0;
/* Validate entire string for conformance if this is flagged as a utf8 string, this lets us be lazy further on. */
/* Validate entire string for conformance if this is flagged as a utf8
* string, this lets us be lazy further on.
* Commenting this out doubles the performance for formatting unicode
* strings, I suspect there's room for optimizations in
* is_c9strict_utf8_string(). */
if (utf8 && !is_c9strict_utf8_string(str, len)) {
return; /* TODO: Throw error. */
}
@ -63,9 +67,22 @@ static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) {
fustr_write(out, "\"", 1);
}
/* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */
static const char fujson_digits[] =
"00010203040506070809"
"10111213141516171819"
"20212223242526272829"
"30313233343536373839"
"40414243444546474849"
"50515253545556575859"
"60616263646566676869"
"70717273747576777879"
"80818283848586878889"
"90919293949596979899";
static void fujson_fmt_int(fustr *out, SV *val) {
char buf[32];
size_t idx = 32;
char *r = buf+31;
int neg = 0;
IV iv;
UV uv;
@ -83,13 +100,15 @@ static void fujson_fmt_int(fustr *out, SV *val) {
return;
}
while (uv > 0) {
/* TODO: can use a lookup table to optimize for 0 - 100; need benchmark */
buf[--idx] = '0' + (uv % 10);
uv /= 10;
while (uv >= 10) {
r -= 2;
memcpy(r, fujson_digits + ((uv % 100)<<1), 2);
uv /= 100;
}
if (neg) buf[--idx] = '-';
fustr_write(out, buf+idx, sizeof buf - idx);
if (uv > 0) *(--r) = '0' + (uv % 10);
if (neg) *(--r) = '-';
uv = 31 - (r - buf);
fustr_write(out, r, uv);
}
static void fujson_fmt_av(fustr *out, AV *av) {