Add benchmarking tool + improve integer formatting performance
Typical priorities: make it fast before fixing all the bugs. *shrug*
This commit is contained in:
parent
c16a9fa493
commit
9c8ce3f782
4 changed files with 259 additions and 13 deletions
101
FU/Benchmarks.pod
Normal file
101
FU/Benchmarks.pod
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
=head1 NAME
|
||||
|
||||
FU::Benchmarks - A bunch of automated benchmark results.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This file is automatically generated from 'bench.pl' in the L<FU> distribution.
|
||||
These benchmarks compare performance of some FU functionality against similar
|
||||
modules found on CPAN.
|
||||
|
||||
=head1 CONTEXT
|
||||
|
||||
These benchmarks were performed on 2025-01-28 with perl v5.40.0 on x86_64-linux.
|
||||
|
||||
The following module versions were used:
|
||||
|
||||
=over
|
||||
|
||||
=item L<FU> 0.1
|
||||
|
||||
=item L<Cpanel::JSON::XS> 4.38
|
||||
|
||||
=item L<JSON::PP> 4.16
|
||||
|
||||
=item L<JSON::XS> 4.03
|
||||
|
||||
=item L<JSON::SIMD> 1.06
|
||||
|
||||
|
||||
|
||||
=back
|
||||
|
||||
=head1 BENCHMARKS
|
||||
|
||||
=head2 JSON Formatting
|
||||
|
||||
These benchmarks run on large-ish arrays with repeated values. JSON encoding is
|
||||
sufficiently fast that Perl function calling overhead tends to dominate for
|
||||
smaller inputs, but I don't find that overhead very interesting. Other modules
|
||||
will likely do better in benchmarks on small inputs.
|
||||
|
||||
Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the
|
||||
SIMD parts are only used for parsing.
|
||||
|
||||
API object from L<JSON::XS> documentation.
|
||||
|
||||
JSON::PP 5370/s
|
||||
Cpanel::JSON::XS 112211/s
|
||||
JSON::SIMD 128743/s
|
||||
JSON::XS 130606/s
|
||||
FU::Util 130813/s
|
||||
|
||||
Small integers
|
||||
|
||||
JSON::PP 113/s
|
||||
Cpanel::JSON::XS 7262/s
|
||||
JSON::SIMD 8217/s
|
||||
JSON::XS 8142/s
|
||||
FU::Util 9154/s
|
||||
|
||||
Large integers
|
||||
|
||||
JSON::PP 2136/s
|
||||
Cpanel::JSON::XS 29220/s
|
||||
JSON::SIMD 35834/s
|
||||
JSON::XS 35879/s
|
||||
FU::Util 117838/s
|
||||
|
||||
ASCII strings
|
||||
|
||||
JSON::PP 2893/s
|
||||
Cpanel::JSON::XS 118698/s
|
||||
JSON::SIMD 137235/s
|
||||
JSON::XS 135933/s
|
||||
FU::Util 172207/s
|
||||
|
||||
Unicode strings
|
||||
|
||||
JSON::PP 5186/s
|
||||
Cpanel::JSON::XS 97154/s
|
||||
JSON::SIMD 109441/s
|
||||
JSON::XS 105691/s
|
||||
FU::Util 106058/s
|
||||
|
||||
String escaping (few)
|
||||
|
||||
JSON::PP 4280/s
|
||||
Cpanel::JSON::XS 140105/s
|
||||
JSON::SIMD 161231/s
|
||||
JSON::XS 160077/s
|
||||
FU::Util 182074/s
|
||||
|
||||
String escaping (many)
|
||||
|
||||
JSON::PP 2235/s
|
||||
Cpanel::JSON::XS 144829/s
|
||||
JSON::SIMD 161006/s
|
||||
JSON::XS 161246/s
|
||||
FU::Util 136568/s
|
||||
|
||||
|
||||
|
|
@ -17,17 +17,14 @@ doesn't believe in the concept of a "batteries included" standard library.
|
|||
|
||||
This module comes with a custom C-based JSON parser and formatter. These
|
||||
functions conform strictly to L<RFC-8259|https://tools.ietf.org/html/rfc8259>,
|
||||
non-standard extensions are not supported and never will be.
|
||||
non-standard extensions are not supported and never will be. It also happens to
|
||||
be pretty fast, refer to L<FU::Benchmarks> for some numbers.
|
||||
|
||||
JSON booleans are decoded into C<builtin::true> and C<builtin::false>. When
|
||||
formatting, those builtin constants are the I<only> recognized boolean values -
|
||||
alternative representations such as C<JSON::PP::true> and C<JSON::PP::false>
|
||||
are not recognized and attempting to format such values will croak.
|
||||
|
||||
I<TODO: point to benchmarks.>
|
||||
|
||||
I<TODO: FU::JSON wrapper with somewhat-compatible JSON::{PP,XS} API>
|
||||
|
||||
=over
|
||||
|
||||
=item json_format($scalar, %options)
|
||||
|
|
|
|||
129
bench.PL
Executable file
129
bench.PL
Executable file
|
|
@ -0,0 +1,129 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# Can be invoked as:
|
||||
# ./bench.PL # (or 'make bench') generates FU/Benchmarks.pod
|
||||
# ./bench.PL regex # run benchmark(s) matching the regex
|
||||
|
||||
use v5.36;
|
||||
use builtin 'true', 'false';
|
||||
use Benchmark ':hireswallclock', 'timethis';
|
||||
use Config;
|
||||
|
||||
my $modules = join '', map sprintf("=item L<%s> %s\n\n", $_, eval "require $_; \$${_}::VERSION"), qw/
|
||||
FU
|
||||
Cpanel::JSON::XS
|
||||
JSON::PP
|
||||
JSON::XS
|
||||
JSON::SIMD
|
||||
/;
|
||||
|
||||
|
||||
my(%bench, @bench);
|
||||
sub bench($name, @arg) {
|
||||
push @bench, $name;
|
||||
$bench{$name} = \@arg;
|
||||
}
|
||||
|
||||
sub runbench($text, @f) {
|
||||
print "$text\n\n";
|
||||
|
||||
# TODO: Should include variance; factor-compared-to-slowest might be cool too
|
||||
for my ($t, $f) (@f) {
|
||||
my $o = timethis -1, $f, 0, 'none';
|
||||
printf " %18s%10d/s\n", $t, $o->iters/$o->real;
|
||||
}
|
||||
print "\n";
|
||||
}
|
||||
|
||||
sub runbenches($re) {
|
||||
runbench $bench{$_}->@* for grep /$re/, @bench;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
# Use similar options for fair comparisons.
|
||||
my $j_cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed;
|
||||
my $j_pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed;
|
||||
my $j_xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed;
|
||||
my $j_si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed;
|
||||
use FU::Util 'json_format';
|
||||
|
||||
sub jsonfmt($name, $text, $data) {
|
||||
bench "jsonfmt/$name", $text,
|
||||
'JSON::PP', sub { $j_pp->encode($data) },
|
||||
'Cpanel::JSON::XS',sub { $j_cp->encode($data) },
|
||||
'JSON::SIMD', sub { $j_si->encode($data) },
|
||||
'JSON::XS', sub { $j_xs->encode($data) },
|
||||
'FU::Util', sub { json_format $data };
|
||||
}
|
||||
|
||||
# From JSON::XS POD.
|
||||
jsonfmt api => 'API object from L<JSON::XS> documentation.',
|
||||
[ map +{method => 'handleMessage', params => ['user1','we were just talking'], 'id' => undef, 'array' => [1,11,234,-5,1e5,1e7,1,0]}, 1..10 ];
|
||||
|
||||
jsonfmt ints => 'Small integers', [ -5000..5000 ];
|
||||
jsonfmt intl => 'Large integers', [ map { my $n=$_; map +($n+1<<$_), 10..60 } 1..10 ];
|
||||
jsonfmt strs => 'ASCII strings', [ map +('hello, world', 'one more string', 'another string'), 1..100 ];
|
||||
jsonfmt stru => 'Unicode strings', do { use utf8;
|
||||
[ map +('グリザイアの果実 -LE FRUIT DE LA GRISAIA-', '💩', 'Я люблю нічого не робити'), 1..50 ];
|
||||
};
|
||||
jsonfmt stres => 'String escaping (few)', [ map 'This string needs to "be escaped" a little bit', 1..100 ];
|
||||
jsonfmt strel => 'String escaping (many)', [ map "This \" \\ needs \b\x01\x02\x03\x04 more", 1..100 ];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if (!@ARGV || $ARGV[0] eq 'bench') {
|
||||
chomp(my $date = `date +%F`);
|
||||
print "Writing to FU/Benchmarks.pod...\n";
|
||||
open my $F, '>FU/Benchmarks.pod' or die $!;
|
||||
select $F;
|
||||
while (<DATA>) {
|
||||
s/^:modules/$modules/;
|
||||
s/^:benches (.+)/runbenches $1/e;
|
||||
s/^:context/These benchmarks were performed on $date with perl $^V on $Config{archname}./;
|
||||
print;
|
||||
}
|
||||
} else {
|
||||
runbenches $_ for @ARGV;
|
||||
}
|
||||
|
||||
__DATA__
|
||||
=head1 NAME
|
||||
|
||||
FU::Benchmarks - A bunch of automated benchmark results.
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This file is automatically generated from 'bench.pl' in the L<FU> distribution.
|
||||
These benchmarks compare performance of some FU functionality against similar
|
||||
modules found on CPAN.
|
||||
|
||||
=head1 CONTEXT
|
||||
|
||||
:context
|
||||
|
||||
The following module versions were used:
|
||||
|
||||
=over
|
||||
|
||||
:modules
|
||||
|
||||
=back
|
||||
|
||||
=head1 BENCHMARKS
|
||||
|
||||
=head2 JSON Formatting
|
||||
|
||||
These benchmarks run on large-ish arrays with repeated values. JSON encoding is
|
||||
sufficiently fast that Perl function calling overhead tends to dominate for
|
||||
smaller inputs, but I don't find that overhead very interesting. Other modules
|
||||
will likely do better in benchmarks on small inputs.
|
||||
|
||||
Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the
|
||||
SIMD parts are only used for parsing.
|
||||
|
||||
:benches ^jsonfmt
|
||||
35
c/jsonfmt.c
35
c/jsonfmt.c
|
|
@ -5,7 +5,11 @@ static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) {
|
|||
const unsigned char *str = (const unsigned char *)stri;
|
||||
unsigned char x = 0;
|
||||
|
||||
/* Validate entire string for conformance if this is flagged as a utf8 string, this lets us be lazy further on. */
|
||||
/* Validate entire string for conformance if this is flagged as a utf8
|
||||
* string, this lets us be lazy further on.
|
||||
* Commenting this out doubles the performance for formatting unicode
|
||||
* strings, I suspect there's room for optimizations in
|
||||
* is_c9strict_utf8_string(). */
|
||||
if (utf8 && !is_c9strict_utf8_string(str, len)) {
|
||||
return; /* TODO: Throw error. */
|
||||
}
|
||||
|
|
@ -63,9 +67,22 @@ static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) {
|
|||
fustr_write(out, "\"", 1);
|
||||
}
|
||||
|
||||
/* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */
|
||||
static const char fujson_digits[] =
|
||||
"00010203040506070809"
|
||||
"10111213141516171819"
|
||||
"20212223242526272829"
|
||||
"30313233343536373839"
|
||||
"40414243444546474849"
|
||||
"50515253545556575859"
|
||||
"60616263646566676869"
|
||||
"70717273747576777879"
|
||||
"80818283848586878889"
|
||||
"90919293949596979899";
|
||||
|
||||
static void fujson_fmt_int(fustr *out, SV *val) {
|
||||
char buf[32];
|
||||
size_t idx = 32;
|
||||
char *r = buf+31;
|
||||
int neg = 0;
|
||||
IV iv;
|
||||
UV uv;
|
||||
|
|
@ -83,13 +100,15 @@ static void fujson_fmt_int(fustr *out, SV *val) {
|
|||
return;
|
||||
}
|
||||
|
||||
while (uv > 0) {
|
||||
/* TODO: can use a lookup table to optimize for 0 - 100; need benchmark */
|
||||
buf[--idx] = '0' + (uv % 10);
|
||||
uv /= 10;
|
||||
while (uv >= 10) {
|
||||
r -= 2;
|
||||
memcpy(r, fujson_digits + ((uv % 100)<<1), 2);
|
||||
uv /= 100;
|
||||
}
|
||||
if (neg) buf[--idx] = '-';
|
||||
fustr_write(out, buf+idx, sizeof buf - idx);
|
||||
if (uv > 0) *(--r) = '0' + (uv % 10);
|
||||
if (neg) *(--r) = '-';
|
||||
uv = 31 - (r - buf);
|
||||
fustr_write(out, r, uv);
|
||||
}
|
||||
|
||||
static void fujson_fmt_av(fustr *out, AV *av) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue