jsonfmt: Move arg parsing into XS

Going to need a way to pass arguments into the XS function anyway, so
might as well do the entire arg parsing step in XS while we're at it.
Provides a significant speedup for tiny inputs as well, but I don't find
that too interesting.
This commit is contained in:
Yorhel 2025-01-29 11:42:13 +01:00
parent e0161cd22c
commit 8ef2a724d1
6 changed files with 40 additions and 18 deletions

8
FU.xs
View file

@ -10,10 +10,6 @@ MODULE = FU PACKAGE = FU::XS
PROTOTYPES: DISABLE PROTOTYPES: DISABLE
void json_format(SV *val) void json_format(SV *val, ...)
CODE: CODE:
fustr buf; ST(0) = fujson_fmt_xs(aTHX_ ax, items, val);
fustr_init(&buf, 128);
fujson_fmt(aTHX_ &buf, val);
ST(0) = fustr_done(&buf);
SvUTF8_on(ST(0));

View file

@ -43,8 +43,7 @@ The following module versions were used:
These benchmarks run on large-ish arrays with repeated values. JSON encoding is These benchmarks run on large-ish arrays with repeated values. JSON encoding is
sufficiently fast that Perl function calling overhead tends to dominate for sufficiently fast that Perl function calling overhead tends to dominate for
smaller inputs, but I don't find that overhead very interesting. Other modules smaller inputs, but I don't find that overhead very interesting.
will likely do better in benchmarks on small inputs.
Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the
SIMD parts are only used for parsing. SIMD parts are only used for parsing.

View file

@ -6,12 +6,6 @@ use Exporter 'import';
our @EXPORT_OK = qw/json_format/; our @EXPORT_OK = qw/json_format/;
*json_format = *FU::XS::json_format;
sub json_format($val, %opt) {
my $r = FU::XS::json_format($val);
# XXX: Does this go over the bytes? If so, not setting SvUTF8_on() in the first place would be much faster.
utf8::encode($r) if $opt{utf8};
$r
}
1; 1;

View file

@ -159,8 +159,7 @@ The following module versions were used:
These benchmarks run on large-ish arrays with repeated values. JSON encoding is These benchmarks run on large-ish arrays with repeated values. JSON encoding is
sufficiently fast that Perl function calling overhead tends to dominate for sufficiently fast that Perl function calling overhead tends to dominate for
smaller inputs, but I don't find that overhead very interesting. Other modules smaller inputs, but I don't find that overhead very interesting.
will likely do better in benchmarks on small inputs.
Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the
SIMD parts are only used for parsing. SIMD parts are only used for parsing.

View file

@ -172,6 +172,35 @@ static void fujson_fmt(pTHX_ fustr *out, SV *val) {
} }
} }
static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
I32 i = 1;
int encutf8 = 0;
char *arg;
SV *r;
while (i < argc) {
arg = SvPV_nolen(ST(i));
i++;
if (i == argc) croak("Odd name/value argument for json_format()");
r = ST(i);
i++;
if (strcmp(arg, "utf8") == 0) {
encutf8 = SvPVXtrue(r);
} else {
croak("Unknown flag: '%s'", arg);
}
}
fustr buf;
fustr_init(&buf, 128);
fujson_fmt(aTHX_ &buf, val);
r = fustr_done(&buf);
if (!encutf8) SvUTF8_on(r);
return r;
}
/* TODO: canonical */ /* TODO: canonical */
/* TODO: pretty */ /* TODO: pretty */
/* TODO: max depth? */ /* TODO: max depth? */

View file

@ -58,12 +58,17 @@ my @errors = (
do { no warnings 'portable'; "\x{ffffffff}" }, qr/invalid codepoint encountered in string/, do { no warnings 'portable'; "\x{ffffffff}" }, qr/invalid codepoint encountered in string/,
); );
plan tests => @tests + @errors/2 + 6; plan tests => @tests*2 + @errors/2 + 6;
for my($in, $exp) (@tests) { for my($in, $exp) (@tests) {
my $out = json_format $in; my $out = json_format $in;
is $out, $exp; is $out, $exp;
ok utf8::is_utf8($out); ok utf8::is_utf8($out);
$out = json_format $in, utf8 => 1;
utf8::encode(my $uexp = $exp);
is $out, $uexp;
ok !utf8::is_utf8($out);
} }
for my ($in, $exp) (@errors) { for my ($in, $exp) (@errors) {