bench: Cache benchmark results + better re-run filtering

2025-01-29 10:00:08 +01:00 · 2025-01-29 10:00:08 +01:00 · 3ae9347ad2
commit 3ae9347ad2
parent 12326ca8e4
3 changed files with 174 additions and 82 deletions
--- a/bench.PL
+++ b/bench.PL
@ -1,17 +1,21 @@
 #!/usr/bin/perl

-exit if @ARGV && @ARGV[0] eq 'bench';
-
 # Can be invoked as:
-#   ./bench.PL          # (or 'make bench') generates FU/Benchmarks.pod
-#   ./bench.PL regex    # run benchmark(s) matching the regex
+#   ./bench.PL          # generates FU/Benchmarks.pod, running new benchmarks as necessary
+#   ./bench.PL id func  # invalidate cache for the (regex-)matching benchmark IDs and funcs and re-run them
+#
+# This script obviously has more dependencies than the FU distribution itself.
+# It's supposed to be used by maintainers, not users.
+
+
+# MakeMaker automatically runs this script as a default built step, but that's not very useful.
+BEGIN { exit if @ARGV && @ARGV[0] eq 'bench'; }

 use v5.36;
 use builtin 'true', 'false';
 use Benchmark ':hireswallclock', 'timethis';
-use Config;

-my $modules = join '', map sprintf("=item L<%s> %s\n\n", $_, eval "require $_; \$${_}::VERSION"), qw/
+my %modules = map +($_, eval "require $_; \$${_}::VERSION"), qw/
    FU
    Cpanel::JSON::XS
    JSON::PP
@ -19,26 +23,43 @@ my $modules = join '', map sprintf("=item L<%s> %s\n\n", $_, eval "require $_; \
    JSON::SIMD
 /;

-
-my(%bench, @bench);
-sub bench($name, @arg) {
-    push @bench, $name;
-    $bench{$name} = \@arg;
-}
-
-sub runbench($text, @f) {
-    print "$text\n\n";
-
-    # TODO: Should include variance; factor-compared-to-slowest might be cool too
-    for my ($t, $f) (@f) {
-        my $o = timethis -1, $f, 0, 'none';
-        printf "  %18s%10d/s\n", $t, $o->iters/$o->real;
+my %data; # "id func modver" => { id func module modver rate exists }
+{
+    my $indata;
+    if (open my $F, '<', 'FU/Benchmarks.pod') {
+        while (<$F>) {
+            chomp;
+            $indata = 1 if /^# Cached data used by bench\.PL/;
+            next if !$indata || !$_ || /^#/;
+            my %d;
+            @d{qw/id func module modver rate/} = split /\t/;
+            $data{"$d{id} $d{func} $d{modver}"} = \%d;
+        }
    }
-    print "\n";
 }

-sub runbenches($re) {
-    runbench $bench{$_}->@* for grep /$re/, @bench;
+if (@ARGV) {
+    my $idre = qr/$ARGV[0]/i;
+    my $funcre = $ARGV[1] ? qr/$ARGV[1]/i : qr/.*/;
+    delete $_->{rate} for grep $_->{id} =~ /$idre/ && $_->{func} =~ /$funcre/, values %data;
+}
+
+
+my @bench; # [ id, text, [ func_1, funcmodule_1, funcsub_n, .. ] ]
+sub def($id, $text, @f) {
+    for my ($f, $m, $sub) (@f) {
+        $m ||= $f;
+        my $d = "$id $f $modules{$m}";
+        $data{$d} ||= { id => $id, func => $f, module => $m, modver => $modules{$m} };
+        $d = $data{$d};
+        $d->{exists} = 1;
+        if (!exists $d->{rate}) {
+            my $o = timethis -1, $sub, 0, 'none';
+            $d->{rate} = sprintf '%.0f', $o->iters/$o->real;
+            printf "%-20s%-20s%10d/s\n", $d->{id}, $d->{func}, $d->{rate};
+        }
+    }
+    push @bench, [ $id, $text, \@f ];
 }


@ -52,12 +73,12 @@ my $j_si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed;
 use FU::Util 'json_format';

 sub jsonfmt($name, $text, $data) {
-    bench "jsonfmt/$name", $text,
-        'JSON::PP',        sub { $j_pp->encode($data) },
-        'Cpanel::JSON::XS',sub { $j_cp->encode($data) },
-        'JSON::SIMD',      sub { $j_si->encode($data) },
-        'JSON::XS',        sub { $j_xs->encode($data) },
-        'FU::Util',        sub { json_format $data };
+    def "jsonfmt/$name", $text,
+        'JSON::PP',         undef, sub { $j_pp->encode($data) },
+        'Cpanel::JSON::XS', undef, sub { $j_cp->encode($data) },
+        'JSON::SIMD',       undef, sub { $j_si->encode($data) },
+        'JSON::XS',         undef, sub { $j_xs->encode($data) },
+        'FU::Util',         'FU',  sub { json_format $data };
 }

 # From JSON::XS POD.
@ -77,20 +98,29 @@ jsonfmt strel => 'String escaping (many)', [ map "This \" \\ needs \b\x01\x02\x0



+delete @data{ grep !$data{$_}{exists}, keys %data };

-if (!@ARGV || $ARGV[0] eq 'bench') {
-    chomp(my $date = `date +%F`);
-    print "Writing to FU/Benchmarks.pod...\n";
+sub fmtbench($id, $text, $fs) {
+    my $r = "$text\n\n";
+    for my ($f, $m, $sub) (@$fs) {
+        $m ||= $f;
+        $r .= sprintf "%18s%10d/s\n", $f, $data{"$id $f $modules{$m}"}{rate};
+    }
+    "$r\n"
+}
+
+{
    open my $F, '>FU/Benchmarks.pod' or die $!;
    select $F;
    while (<DATA>) {
-        s/^:modules/$modules/;
-        s/^:benches (.+)/runbenches $1/e;
-        s/^:context/These benchmarks were performed on $date with perl $^V on $Config{archname}./;
+        s#^:modules#join '', map sprintf("=item L<%s> %s\n\n", $_, $modules{$_}), sort keys %modules#e;
+        s#^:benches (.+)#join '', map fmtbench(@$_), grep $_->[0] =~ /$1/, @bench#e;
        print;
    }
-} else {
-    runbenches $_ for @ARGV;
+    for (sort keys %data) {
+        my $b = $data{$_};
+        print join("\t", @{$b}{qw/ id func module modver rate /})."\n";
+    }
 }

 __DATA__
@ -100,13 +130,20 @@ FU::Benchmarks - A bunch of automated benchmark results.

 =head1 DESCRIPTION

-This file is automatically generated from 'bench.pl' in the L<FU> distribution.
+This file is automatically generated from 'bench.PL' in the L<FU> distribution.
 These benchmarks compare performance of some FU functionality against similar
 modules found on CPAN.

-=head1 CONTEXT
+B<DISCLAIMER#1:> Obtaining accurate measurements is notoriously hard. Take the
+numbers below with a few buckets of salt, any difference below 10% is most
+likely noise.

-:context
+B<DISCLAIMER#2:> Goodhart's law: "When a measure becomes a target, it ceases to
+be a good measure". I've used these benchmarks to find and optimize hotspots in
+FU, which in turn means these numbers may look better than they are in
+real-world use.
+
+=head1 MODULE VERSIONS

 The following module versions were used:

@ -129,3 +166,7 @@ Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the
 SIMD parts are only used for parsing.

 :benches ^jsonfmt
+
+=cut
+
+# Cached data used by bench.PL.