diff --git a/FU/Benchmarks.pod b/FU/Benchmarks.pod index 7366387..8f5aa73 100644 --- a/FU/Benchmarks.pod +++ b/FU/Benchmarks.pod @@ -104,6 +104,22 @@ String escaping (many) JSON::XS 154280/s FU::Util 132514/s +Canonical hash key ordering (small) + + JSON::PP 835/s + Cpanel::JSON::XS 28155/s + JSON::SIMD 30066/s + JSON::XS 32151/s + FU::Util 27079/s + +Canonical hash key ordering (large) + + JSON::PP 756/s + Cpanel::JSON::XS 10710/s + JSON::SIMD 12640/s + JSON::XS 12858/s + FU::Util 12819/s + =cut @@ -114,6 +130,16 @@ jsonfmt/api FU::Util FU 0.1 129117 jsonfmt/api JSON::PP JSON::PP 4.16 5342 jsonfmt/api JSON::SIMD JSON::SIMD 1.06 128161 jsonfmt/api JSON::XS JSON::XS 4.03 130434 +jsonfmt/canonl Cpanel::JSON::XS Cpanel::JSON::XS 4.38 10710 +jsonfmt/canonl FU::Util FU 0.1 12819 +jsonfmt/canonl JSON::PP JSON::PP 4.16 756 +jsonfmt/canonl JSON::SIMD JSON::SIMD 1.06 12640 +jsonfmt/canonl JSON::XS JSON::XS 4.03 12858 +jsonfmt/canons Cpanel::JSON::XS Cpanel::JSON::XS 4.38 28155 +jsonfmt/canons FU::Util FU 0.1 27079 +jsonfmt/canons JSON::PP JSON::PP 4.16 835 +jsonfmt/canons JSON::SIMD JSON::SIMD 1.06 30066 +jsonfmt/canons JSON::XS JSON::XS 4.03 32151 jsonfmt/intl Cpanel::JSON::XS Cpanel::JSON::XS 4.38 29299 jsonfmt/intl FU::Util FU 0.1 114084 jsonfmt/intl JSON::PP JSON::PP 4.16 2208 diff --git a/FU/Util.pod b/FU/Util.pod index dbf5055..fd6bb5e 100644 --- a/FU/Util.pod +++ b/FU/Util.pod @@ -49,6 +49,11 @@ The following C<%options> are supported: =over +=item canonical + +When set to a true value, write hash keys in deterministic (sorted) order. This +option currently has no effect on tied hashes. + =item utf8 When set to a true value, returns a UTF-8 encoded byte string instead of a Perl diff --git a/bench.PL b/bench.PL index 5a8b236..dbc9935 100755 --- a/bench.PL +++ b/bench.PL @@ -65,20 +65,28 @@ sub def($id, $text, @f) { -# Use similar options for fair comparisons. -my $j_cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed; -my $j_pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed; -my $j_xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed; -my $j_si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed; use FU::Util 'json_format'; sub jsonfmt($name, $text, $data) { + # Use similar options for fair comparisons. + my $cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed; + my $pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed; + my $xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed; + my $si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed; + my @opt = (); + if ($name =~ /^canon/) { + $cp = $cp->canonical; + $pp = $pp->canonical; + $xs = $xs->canonical; + $si = $si->canonical; + @opt = (canonical => 1); + } def "jsonfmt/$name", $text, - 'JSON::PP', undef, sub { $j_pp->encode($data) }, - 'Cpanel::JSON::XS', undef, sub { $j_cp->encode($data) }, - 'JSON::SIMD', undef, sub { $j_si->encode($data) }, - 'JSON::XS', undef, sub { $j_xs->encode($data) }, - 'FU::Util', 'FU', sub { json_format $data }; + 'JSON::PP', undef, sub { $pp->encode($data) }, + 'Cpanel::JSON::XS', undef, sub { $cp->encode($data) }, + 'JSON::SIMD', undef, sub { $si->encode($data) }, + 'JSON::XS', undef, sub { $xs->encode($data) }, + 'FU::Util', 'FU', sub { json_format $data, @opt }; } # From JSON::XS POD. @@ -94,7 +102,8 @@ jsonfmt stru => 'Unicode strings', do { use utf8; jsonfmt stres => 'String escaping (few)', [ map 'This string needs to "be escaped" a little bit', 1..100 ]; jsonfmt strel => 'String escaping (many)', [ map "This \" \\ needs \b\x01\x02\x03\x04 more", 1..100 ]; - +jsonfmt canons => 'Canonical hash key ordering (small)', [ map +{ map +("string$_", 1), 'a'..'f' }, 0..100 ]; +jsonfmt canonl => 'Canonical hash key ordering (large)', { map +("string$_-something", 1), 'aa'..'zz' }; diff --git a/c/jsonfmt.c b/c/jsonfmt.c index 48264dc..70dfbef 100644 --- a/c/jsonfmt.c +++ b/c/jsonfmt.c @@ -1,6 +1,7 @@ typedef struct { fustr out; UV depth; + int canon; } fujson_fmt_ctx; static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *); @@ -126,19 +127,65 @@ static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) { fustr_write(&ctx->out, "]", 1); } +static int fujson_fmt_hvcmp(const void *pa, const void *pb) { + dTHX; + HE *a = *(HE **)pa; + HE *b = *(HE **)pb; + STRLEN alen, blen; + char *astr = HePV(a, alen); + char *bstr = HePV(b, blen); + int autf = HeUTF8(a); + int butf = HeUTF8(b); + + if (autf == butf) { + int cmp = memcmp(bstr, astr, alen < blen ? alen : blen); + return cmp != 0 ? cmp : blen < alen ? -1 : blen == alen ? 0 : 1; + } + return autf ? bytes_cmp_utf8((const U8*)bstr, blen, (const U8*)astr, alen) + : -bytes_cmp_utf8((const U8*)astr, alen, (const U8*)bstr, blen); +} + +static void fujson_fmt_hvkv(pTHX_ fujson_fmt_ctx *ctx, HV *hv, HE *he, char **hestr) { + STRLEN helen; + if (*hestr) fustr_write(&ctx->out, ",", 1); + *hestr = HePV(he, helen); + fujson_fmt_str(aTHX_ ctx, *hestr, helen, HeUTF8(he)); + fustr_write(&ctx->out, ":", 1); + fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he)); +} + static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) { HE *he; - STRLEN helen; char *hestr = NULL; - hv_iterinit(hv); + int numkeys = hv_iterinit(hv); fustr_write(&ctx->out, "{", 1); - while ((he = hv_iternext(hv))) { - if (hestr) fustr_write(&ctx->out, ",", 1); - hestr = HePV(he, helen); - fujson_fmt_str(aTHX_ ctx, hestr, helen, HeUTF8(he)); - fustr_write(&ctx->out, ":", 1); - fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he)); + + /* Canonical order on tied hashes is not supported. Cpanel::JSON::XS has + * code to deal with that case and it's absolutely horrifying. */ + if (ctx->canon && !(SvMAGICAL(hv) && SvTIED_mg((SV*)hv, PERL_MAGIC_tied))) { + SAVETMPS; + if (numkeys < 4) numkeys = 4; + if (SvMAGICAL(hv)) numkeys = 32; + + SV *keys_sv = sv_2mortal(newSV(numkeys * sizeof(HE*))); + HE **keys = (HE **)SvPVX(keys_sv); + int i = 0; + + while ((he = hv_iternext(hv))) { + if (i >= numkeys) { + numkeys += numkeys >> 1; + keys = (HE **)SvGROW(keys_sv, numkeys * sizeof(HE*)); + numkeys = SvLEN(keys_sv) / sizeof(HE*); + } + keys[i++] = he; + } + qsort(keys, i, sizeof(HE *), fujson_fmt_hvcmp); + while (i--) fujson_fmt_hvkv(aTHX_ ctx, hv, keys[i], &hestr); + FREETMPS; + + } else { + while ((he = hv_iternext(hv))) fujson_fmt_hvkv(aTHX_ ctx, hv, he, &hestr); } fustr_write(&ctx->out, "}", 1); } @@ -222,6 +269,7 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) { ctx.out.maxlen = 0; ctx.depth = 0; + ctx.canon = 0; while (i < argc) { arg = SvPV_nolen(ST(i)); i++; @@ -229,7 +277,8 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) { r = ST(i); i++; - if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r); + if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r); + else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r); else if (strcmp(arg, "max_size") == 0) ctx.out.maxlen = SvUV(r); else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r); else croak("Unknown flag: '%s'", arg); @@ -244,5 +293,4 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) { return r; } -/* TODO: canonical */ /* TODO: pretty */ diff --git a/t/json_format.t b/t/json_format.t index 03b20e7..4a5470f 100644 --- a/t/json_format.t +++ b/t/json_format.t @@ -44,7 +44,7 @@ my @tests = ( {}, '{}', {'a',1}, '{"a":1}', - do { tie my %h, 'Tie::StdHash'; %h = ('a',1); \%h }, '{"a":1}', + do { tie my %h, 'Tie::StdHash'; %h = ('b',1); \%h }, '{"b":1}', do { tie my %h, 'MyOrderedHash', one => 1, two => undef, three => []; \%h }, '{"one":1,"two":null,"three":[]}', do { my $o = [true]; bless \$o, 'MyToJSON' }, '[1,"MyToJSON",[true]]', @@ -68,7 +68,7 @@ my @errors = ( do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/, ); -plan tests => @tests*2 + @errors/2 + 8; +plan tests => @tests*2 + @errors/2 + 9; for my($in, $exp) (@tests) { my $out = json_format $in; @@ -87,6 +87,10 @@ for my ($in, $exp) (@errors) { } +is json_format({qw/a 1 b 2 c 3 d 4 d1 5 d11 6/, do { use utf8; qw/ü 7 月 8 💩 9/ }}, canonical => 1), + do { use utf8; '{"a":"1","b":"2","c":"3","d":"4","d1":"5","d11":"6","ü":"7","月":"8","💩":"9"}' }; + + eval { json_format [[]], max_depth => 2 }; like $@, qr/max_depth exceeded while formatting JSON/;