jsonfmt: Add canonical option

Not as bad as I had expected it to be; managed to keep the
implementation a little bit simpler and cleaner than JSON::XS.
This commit is contained in:
Yorhel 2025-01-29 18:42:27 +01:00
parent 163a60b4ba
commit 1a0fb03205
5 changed files with 115 additions and 23 deletions

View file

@ -104,6 +104,22 @@ String escaping (many)
JSON::XS 154280/s
FU::Util 132514/s
Canonical hash key ordering (small)
JSON::PP 835/s
Cpanel::JSON::XS 28155/s
JSON::SIMD 30066/s
JSON::XS 32151/s
FU::Util 27079/s
Canonical hash key ordering (large)
JSON::PP 756/s
Cpanel::JSON::XS 10710/s
JSON::SIMD 12640/s
JSON::XS 12858/s
FU::Util 12819/s
=cut
@ -114,6 +130,16 @@ jsonfmt/api FU::Util FU 0.1 129117
jsonfmt/api JSON::PP JSON::PP 4.16 5342
jsonfmt/api JSON::SIMD JSON::SIMD 1.06 128161
jsonfmt/api JSON::XS JSON::XS 4.03 130434
jsonfmt/canonl Cpanel::JSON::XS Cpanel::JSON::XS 4.38 10710
jsonfmt/canonl FU::Util FU 0.1 12819
jsonfmt/canonl JSON::PP JSON::PP 4.16 756
jsonfmt/canonl JSON::SIMD JSON::SIMD 1.06 12640
jsonfmt/canonl JSON::XS JSON::XS 4.03 12858
jsonfmt/canons Cpanel::JSON::XS Cpanel::JSON::XS 4.38 28155
jsonfmt/canons FU::Util FU 0.1 27079
jsonfmt/canons JSON::PP JSON::PP 4.16 835
jsonfmt/canons JSON::SIMD JSON::SIMD 1.06 30066
jsonfmt/canons JSON::XS JSON::XS 4.03 32151
jsonfmt/intl Cpanel::JSON::XS Cpanel::JSON::XS 4.38 29299
jsonfmt/intl FU::Util FU 0.1 114084
jsonfmt/intl JSON::PP JSON::PP 4.16 2208

View file

@ -49,6 +49,11 @@ The following C<%options> are supported:
=over
=item canonical
When set to a true value, write hash keys in deterministic (sorted) order. This
option currently has no effect on tied hashes.
=item utf8
When set to a true value, returns a UTF-8 encoded byte string instead of a Perl

View file

@ -65,20 +65,28 @@ sub def($id, $text, @f) {
# Use similar options for fair comparisons.
my $j_cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed;
my $j_pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed;
my $j_xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed;
my $j_si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed;
use FU::Util 'json_format';
sub jsonfmt($name, $text, $data) {
# Use similar options for fair comparisons.
my $cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed;
my $pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed;
my $xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed;
my $si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed;
my @opt = ();
if ($name =~ /^canon/) {
$cp = $cp->canonical;
$pp = $pp->canonical;
$xs = $xs->canonical;
$si = $si->canonical;
@opt = (canonical => 1);
}
def "jsonfmt/$name", $text,
'JSON::PP', undef, sub { $j_pp->encode($data) },
'Cpanel::JSON::XS', undef, sub { $j_cp->encode($data) },
'JSON::SIMD', undef, sub { $j_si->encode($data) },
'JSON::XS', undef, sub { $j_xs->encode($data) },
'FU::Util', 'FU', sub { json_format $data };
'JSON::PP', undef, sub { $pp->encode($data) },
'Cpanel::JSON::XS', undef, sub { $cp->encode($data) },
'JSON::SIMD', undef, sub { $si->encode($data) },
'JSON::XS', undef, sub { $xs->encode($data) },
'FU::Util', 'FU', sub { json_format $data, @opt };
}
# From JSON::XS POD.
@ -94,7 +102,8 @@ jsonfmt stru => 'Unicode strings', do { use utf8;
jsonfmt stres => 'String escaping (few)', [ map 'This string needs to "be escaped" a little bit', 1..100 ];
jsonfmt strel => 'String escaping (many)', [ map "This \" \\ needs \b\x01\x02\x03\x04 more", 1..100 ];
jsonfmt canons => 'Canonical hash key ordering (small)', [ map +{ map +("string$_", 1), 'a'..'f' }, 0..100 ];
jsonfmt canonl => 'Canonical hash key ordering (large)', { map +("string$_-something", 1), 'aa'..'zz' };

View file

@ -1,6 +1,7 @@
typedef struct {
fustr out;
UV depth;
int canon;
} fujson_fmt_ctx;
static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *);
@ -126,19 +127,65 @@ static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) {
fustr_write(&ctx->out, "]", 1);
}
static int fujson_fmt_hvcmp(const void *pa, const void *pb) {
dTHX;
HE *a = *(HE **)pa;
HE *b = *(HE **)pb;
STRLEN alen, blen;
char *astr = HePV(a, alen);
char *bstr = HePV(b, blen);
int autf = HeUTF8(a);
int butf = HeUTF8(b);
if (autf == butf) {
int cmp = memcmp(bstr, astr, alen < blen ? alen : blen);
return cmp != 0 ? cmp : blen < alen ? -1 : blen == alen ? 0 : 1;
}
return autf ? bytes_cmp_utf8((const U8*)bstr, blen, (const U8*)astr, alen)
: -bytes_cmp_utf8((const U8*)astr, alen, (const U8*)bstr, blen);
}
static void fujson_fmt_hvkv(pTHX_ fujson_fmt_ctx *ctx, HV *hv, HE *he, char **hestr) {
STRLEN helen;
if (*hestr) fustr_write(&ctx->out, ",", 1);
*hestr = HePV(he, helen);
fujson_fmt_str(aTHX_ ctx, *hestr, helen, HeUTF8(he));
fustr_write(&ctx->out, ":", 1);
fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
}
static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) {
HE *he;
STRLEN helen;
char *hestr = NULL;
hv_iterinit(hv);
int numkeys = hv_iterinit(hv);
fustr_write(&ctx->out, "{", 1);
while ((he = hv_iternext(hv))) {
if (hestr) fustr_write(&ctx->out, ",", 1);
hestr = HePV(he, helen);
fujson_fmt_str(aTHX_ ctx, hestr, helen, HeUTF8(he));
fustr_write(&ctx->out, ":", 1);
fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
/* Canonical order on tied hashes is not supported. Cpanel::JSON::XS has
* code to deal with that case and it's absolutely horrifying. */
if (ctx->canon && !(SvMAGICAL(hv) && SvTIED_mg((SV*)hv, PERL_MAGIC_tied))) {
SAVETMPS;
if (numkeys < 4) numkeys = 4;
if (SvMAGICAL(hv)) numkeys = 32;
SV *keys_sv = sv_2mortal(newSV(numkeys * sizeof(HE*)));
HE **keys = (HE **)SvPVX(keys_sv);
int i = 0;
while ((he = hv_iternext(hv))) {
if (i >= numkeys) {
numkeys += numkeys >> 1;
keys = (HE **)SvGROW(keys_sv, numkeys * sizeof(HE*));
numkeys = SvLEN(keys_sv) / sizeof(HE*);
}
keys[i++] = he;
}
qsort(keys, i, sizeof(HE *), fujson_fmt_hvcmp);
while (i--) fujson_fmt_hvkv(aTHX_ ctx, hv, keys[i], &hestr);
FREETMPS;
} else {
while ((he = hv_iternext(hv))) fujson_fmt_hvkv(aTHX_ ctx, hv, he, &hestr);
}
fustr_write(&ctx->out, "}", 1);
}
@ -222,6 +269,7 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
ctx.out.maxlen = 0;
ctx.depth = 0;
ctx.canon = 0;
while (i < argc) {
arg = SvPV_nolen(ST(i));
i++;
@ -229,7 +277,8 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
r = ST(i);
i++;
if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r);
else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
else if (strcmp(arg, "max_size") == 0) ctx.out.maxlen = SvUV(r);
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
else croak("Unknown flag: '%s'", arg);
@ -244,5 +293,4 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
return r;
}
/* TODO: canonical */
/* TODO: pretty */

View file

@ -44,7 +44,7 @@ my @tests = (
{}, '{}',
{'a',1}, '{"a":1}',
do { tie my %h, 'Tie::StdHash'; %h = ('a',1); \%h }, '{"a":1}',
do { tie my %h, 'Tie::StdHash'; %h = ('b',1); \%h }, '{"b":1}',
do { tie my %h, 'MyOrderedHash', one => 1, two => undef, three => []; \%h }, '{"one":1,"two":null,"three":[]}',
do { my $o = [true]; bless \$o, 'MyToJSON' }, '[1,"MyToJSON",[true]]',
@ -68,7 +68,7 @@ my @errors = (
do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/,
);
plan tests => @tests*2 + @errors/2 + 8;
plan tests => @tests*2 + @errors/2 + 9;
for my($in, $exp) (@tests) {
my $out = json_format $in;
@ -87,6 +87,10 @@ for my ($in, $exp) (@errors) {
}
is json_format({qw/a 1 b 2 c 3 d 4 d1 5 d11 6/, do { use utf8; qw/ü 7 月 8 💩 9/ }}, canonical => 1),
do { use utf8; '{"a":"1","b":"2","c":"3","d":"4","d1":"5","d11":"6","ü":"7","月":"8","💩":"9"}' };
eval { json_format [[]], max_depth => 2 };
like $@, qr/max_depth exceeded while formatting JSON/;