jsonfmt: Add canonical option
Not as bad as I had expected it to be; managed to keep the implementation a little bit simpler and cleaner than JSON::XS.
This commit is contained in:
parent
163a60b4ba
commit
1a0fb03205
5 changed files with 115 additions and 23 deletions
|
|
@ -104,6 +104,22 @@ String escaping (many)
|
||||||
JSON::XS 154280/s
|
JSON::XS 154280/s
|
||||||
FU::Util 132514/s
|
FU::Util 132514/s
|
||||||
|
|
||||||
|
Canonical hash key ordering (small)
|
||||||
|
|
||||||
|
JSON::PP 835/s
|
||||||
|
Cpanel::JSON::XS 28155/s
|
||||||
|
JSON::SIMD 30066/s
|
||||||
|
JSON::XS 32151/s
|
||||||
|
FU::Util 27079/s
|
||||||
|
|
||||||
|
Canonical hash key ordering (large)
|
||||||
|
|
||||||
|
JSON::PP 756/s
|
||||||
|
Cpanel::JSON::XS 10710/s
|
||||||
|
JSON::SIMD 12640/s
|
||||||
|
JSON::XS 12858/s
|
||||||
|
FU::Util 12819/s
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
=cut
|
=cut
|
||||||
|
|
@ -114,6 +130,16 @@ jsonfmt/api FU::Util FU 0.1 129117
|
||||||
jsonfmt/api JSON::PP JSON::PP 4.16 5342
|
jsonfmt/api JSON::PP JSON::PP 4.16 5342
|
||||||
jsonfmt/api JSON::SIMD JSON::SIMD 1.06 128161
|
jsonfmt/api JSON::SIMD JSON::SIMD 1.06 128161
|
||||||
jsonfmt/api JSON::XS JSON::XS 4.03 130434
|
jsonfmt/api JSON::XS JSON::XS 4.03 130434
|
||||||
|
jsonfmt/canonl Cpanel::JSON::XS Cpanel::JSON::XS 4.38 10710
|
||||||
|
jsonfmt/canonl FU::Util FU 0.1 12819
|
||||||
|
jsonfmt/canonl JSON::PP JSON::PP 4.16 756
|
||||||
|
jsonfmt/canonl JSON::SIMD JSON::SIMD 1.06 12640
|
||||||
|
jsonfmt/canonl JSON::XS JSON::XS 4.03 12858
|
||||||
|
jsonfmt/canons Cpanel::JSON::XS Cpanel::JSON::XS 4.38 28155
|
||||||
|
jsonfmt/canons FU::Util FU 0.1 27079
|
||||||
|
jsonfmt/canons JSON::PP JSON::PP 4.16 835
|
||||||
|
jsonfmt/canons JSON::SIMD JSON::SIMD 1.06 30066
|
||||||
|
jsonfmt/canons JSON::XS JSON::XS 4.03 32151
|
||||||
jsonfmt/intl Cpanel::JSON::XS Cpanel::JSON::XS 4.38 29299
|
jsonfmt/intl Cpanel::JSON::XS Cpanel::JSON::XS 4.38 29299
|
||||||
jsonfmt/intl FU::Util FU 0.1 114084
|
jsonfmt/intl FU::Util FU 0.1 114084
|
||||||
jsonfmt/intl JSON::PP JSON::PP 4.16 2208
|
jsonfmt/intl JSON::PP JSON::PP 4.16 2208
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,11 @@ The following C<%options> are supported:
|
||||||
|
|
||||||
=over
|
=over
|
||||||
|
|
||||||
|
=item canonical
|
||||||
|
|
||||||
|
When set to a true value, write hash keys in deterministic (sorted) order. This
|
||||||
|
option currently has no effect on tied hashes.
|
||||||
|
|
||||||
=item utf8
|
=item utf8
|
||||||
|
|
||||||
When set to a true value, returns a UTF-8 encoded byte string instead of a Perl
|
When set to a true value, returns a UTF-8 encoded byte string instead of a Perl
|
||||||
|
|
|
||||||
31
bench.PL
31
bench.PL
|
|
@ -65,20 +65,28 @@ sub def($id, $text, @f) {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Use similar options for fair comparisons.
|
|
||||||
my $j_cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed;
|
|
||||||
my $j_pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed;
|
|
||||||
my $j_xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed;
|
|
||||||
my $j_si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed;
|
|
||||||
use FU::Util 'json_format';
|
use FU::Util 'json_format';
|
||||||
|
|
||||||
sub jsonfmt($name, $text, $data) {
|
sub jsonfmt($name, $text, $data) {
|
||||||
|
# Use similar options for fair comparisons.
|
||||||
|
my $cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed;
|
||||||
|
my $pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed;
|
||||||
|
my $xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed;
|
||||||
|
my $si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed;
|
||||||
|
my @opt = ();
|
||||||
|
if ($name =~ /^canon/) {
|
||||||
|
$cp = $cp->canonical;
|
||||||
|
$pp = $pp->canonical;
|
||||||
|
$xs = $xs->canonical;
|
||||||
|
$si = $si->canonical;
|
||||||
|
@opt = (canonical => 1);
|
||||||
|
}
|
||||||
def "jsonfmt/$name", $text,
|
def "jsonfmt/$name", $text,
|
||||||
'JSON::PP', undef, sub { $j_pp->encode($data) },
|
'JSON::PP', undef, sub { $pp->encode($data) },
|
||||||
'Cpanel::JSON::XS', undef, sub { $j_cp->encode($data) },
|
'Cpanel::JSON::XS', undef, sub { $cp->encode($data) },
|
||||||
'JSON::SIMD', undef, sub { $j_si->encode($data) },
|
'JSON::SIMD', undef, sub { $si->encode($data) },
|
||||||
'JSON::XS', undef, sub { $j_xs->encode($data) },
|
'JSON::XS', undef, sub { $xs->encode($data) },
|
||||||
'FU::Util', 'FU', sub { json_format $data };
|
'FU::Util', 'FU', sub { json_format $data, @opt };
|
||||||
}
|
}
|
||||||
|
|
||||||
# From JSON::XS POD.
|
# From JSON::XS POD.
|
||||||
|
|
@ -94,7 +102,8 @@ jsonfmt stru => 'Unicode strings', do { use utf8;
|
||||||
jsonfmt stres => 'String escaping (few)', [ map 'This string needs to "be escaped" a little bit', 1..100 ];
|
jsonfmt stres => 'String escaping (few)', [ map 'This string needs to "be escaped" a little bit', 1..100 ];
|
||||||
jsonfmt strel => 'String escaping (many)', [ map "This \" \\ needs \b\x01\x02\x03\x04 more", 1..100 ];
|
jsonfmt strel => 'String escaping (many)', [ map "This \" \\ needs \b\x01\x02\x03\x04 more", 1..100 ];
|
||||||
|
|
||||||
|
jsonfmt canons => 'Canonical hash key ordering (small)', [ map +{ map +("string$_", 1), 'a'..'f' }, 0..100 ];
|
||||||
|
jsonfmt canonl => 'Canonical hash key ordering (large)', { map +("string$_-something", 1), 'aa'..'zz' };
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
68
c/jsonfmt.c
68
c/jsonfmt.c
|
|
@ -1,6 +1,7 @@
|
||||||
typedef struct {
|
typedef struct {
|
||||||
fustr out;
|
fustr out;
|
||||||
UV depth;
|
UV depth;
|
||||||
|
int canon;
|
||||||
} fujson_fmt_ctx;
|
} fujson_fmt_ctx;
|
||||||
|
|
||||||
static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *);
|
static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *);
|
||||||
|
|
@ -126,19 +127,65 @@ static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) {
|
||||||
fustr_write(&ctx->out, "]", 1);
|
fustr_write(&ctx->out, "]", 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int fujson_fmt_hvcmp(const void *pa, const void *pb) {
|
||||||
|
dTHX;
|
||||||
|
HE *a = *(HE **)pa;
|
||||||
|
HE *b = *(HE **)pb;
|
||||||
|
STRLEN alen, blen;
|
||||||
|
char *astr = HePV(a, alen);
|
||||||
|
char *bstr = HePV(b, blen);
|
||||||
|
int autf = HeUTF8(a);
|
||||||
|
int butf = HeUTF8(b);
|
||||||
|
|
||||||
|
if (autf == butf) {
|
||||||
|
int cmp = memcmp(bstr, astr, alen < blen ? alen : blen);
|
||||||
|
return cmp != 0 ? cmp : blen < alen ? -1 : blen == alen ? 0 : 1;
|
||||||
|
}
|
||||||
|
return autf ? bytes_cmp_utf8((const U8*)bstr, blen, (const U8*)astr, alen)
|
||||||
|
: -bytes_cmp_utf8((const U8*)astr, alen, (const U8*)bstr, blen);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fujson_fmt_hvkv(pTHX_ fujson_fmt_ctx *ctx, HV *hv, HE *he, char **hestr) {
|
||||||
|
STRLEN helen;
|
||||||
|
if (*hestr) fustr_write(&ctx->out, ",", 1);
|
||||||
|
*hestr = HePV(he, helen);
|
||||||
|
fujson_fmt_str(aTHX_ ctx, *hestr, helen, HeUTF8(he));
|
||||||
|
fustr_write(&ctx->out, ":", 1);
|
||||||
|
fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
|
||||||
|
}
|
||||||
|
|
||||||
static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) {
|
static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) {
|
||||||
HE *he;
|
HE *he;
|
||||||
STRLEN helen;
|
|
||||||
char *hestr = NULL;
|
char *hestr = NULL;
|
||||||
|
|
||||||
hv_iterinit(hv);
|
int numkeys = hv_iterinit(hv);
|
||||||
fustr_write(&ctx->out, "{", 1);
|
fustr_write(&ctx->out, "{", 1);
|
||||||
while ((he = hv_iternext(hv))) {
|
|
||||||
if (hestr) fustr_write(&ctx->out, ",", 1);
|
/* Canonical order on tied hashes is not supported. Cpanel::JSON::XS has
|
||||||
hestr = HePV(he, helen);
|
* code to deal with that case and it's absolutely horrifying. */
|
||||||
fujson_fmt_str(aTHX_ ctx, hestr, helen, HeUTF8(he));
|
if (ctx->canon && !(SvMAGICAL(hv) && SvTIED_mg((SV*)hv, PERL_MAGIC_tied))) {
|
||||||
fustr_write(&ctx->out, ":", 1);
|
SAVETMPS;
|
||||||
fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
|
if (numkeys < 4) numkeys = 4;
|
||||||
|
if (SvMAGICAL(hv)) numkeys = 32;
|
||||||
|
|
||||||
|
SV *keys_sv = sv_2mortal(newSV(numkeys * sizeof(HE*)));
|
||||||
|
HE **keys = (HE **)SvPVX(keys_sv);
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
while ((he = hv_iternext(hv))) {
|
||||||
|
if (i >= numkeys) {
|
||||||
|
numkeys += numkeys >> 1;
|
||||||
|
keys = (HE **)SvGROW(keys_sv, numkeys * sizeof(HE*));
|
||||||
|
numkeys = SvLEN(keys_sv) / sizeof(HE*);
|
||||||
|
}
|
||||||
|
keys[i++] = he;
|
||||||
|
}
|
||||||
|
qsort(keys, i, sizeof(HE *), fujson_fmt_hvcmp);
|
||||||
|
while (i--) fujson_fmt_hvkv(aTHX_ ctx, hv, keys[i], &hestr);
|
||||||
|
FREETMPS;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
while ((he = hv_iternext(hv))) fujson_fmt_hvkv(aTHX_ ctx, hv, he, &hestr);
|
||||||
}
|
}
|
||||||
fustr_write(&ctx->out, "}", 1);
|
fustr_write(&ctx->out, "}", 1);
|
||||||
}
|
}
|
||||||
|
|
@ -222,6 +269,7 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
||||||
|
|
||||||
ctx.out.maxlen = 0;
|
ctx.out.maxlen = 0;
|
||||||
ctx.depth = 0;
|
ctx.depth = 0;
|
||||||
|
ctx.canon = 0;
|
||||||
while (i < argc) {
|
while (i < argc) {
|
||||||
arg = SvPV_nolen(ST(i));
|
arg = SvPV_nolen(ST(i));
|
||||||
i++;
|
i++;
|
||||||
|
|
@ -229,7 +277,8 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
||||||
r = ST(i);
|
r = ST(i);
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
|
if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r);
|
||||||
|
else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
|
||||||
else if (strcmp(arg, "max_size") == 0) ctx.out.maxlen = SvUV(r);
|
else if (strcmp(arg, "max_size") == 0) ctx.out.maxlen = SvUV(r);
|
||||||
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
|
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
|
||||||
else croak("Unknown flag: '%s'", arg);
|
else croak("Unknown flag: '%s'", arg);
|
||||||
|
|
@ -244,5 +293,4 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: canonical */
|
|
||||||
/* TODO: pretty */
|
/* TODO: pretty */
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ my @tests = (
|
||||||
|
|
||||||
{}, '{}',
|
{}, '{}',
|
||||||
{'a',1}, '{"a":1}',
|
{'a',1}, '{"a":1}',
|
||||||
do { tie my %h, 'Tie::StdHash'; %h = ('a',1); \%h }, '{"a":1}',
|
do { tie my %h, 'Tie::StdHash'; %h = ('b',1); \%h }, '{"b":1}',
|
||||||
do { tie my %h, 'MyOrderedHash', one => 1, two => undef, three => []; \%h }, '{"one":1,"two":null,"three":[]}',
|
do { tie my %h, 'MyOrderedHash', one => 1, two => undef, three => []; \%h }, '{"one":1,"two":null,"three":[]}',
|
||||||
|
|
||||||
do { my $o = [true]; bless \$o, 'MyToJSON' }, '[1,"MyToJSON",[true]]',
|
do { my $o = [true]; bless \$o, 'MyToJSON' }, '[1,"MyToJSON",[true]]',
|
||||||
|
|
@ -68,7 +68,7 @@ my @errors = (
|
||||||
do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/,
|
do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/,
|
||||||
);
|
);
|
||||||
|
|
||||||
plan tests => @tests*2 + @errors/2 + 8;
|
plan tests => @tests*2 + @errors/2 + 9;
|
||||||
|
|
||||||
for my($in, $exp) (@tests) {
|
for my($in, $exp) (@tests) {
|
||||||
my $out = json_format $in;
|
my $out = json_format $in;
|
||||||
|
|
@ -87,6 +87,10 @@ for my ($in, $exp) (@errors) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
is json_format({qw/a 1 b 2 c 3 d 4 d1 5 d11 6/, do { use utf8; qw/ü 7 月 8 💩 9/ }}, canonical => 1),
|
||||||
|
do { use utf8; '{"a":"1","b":"2","c":"3","d":"4","d1":"5","d11":"6","ü":"7","月":"8","💩":"9"}' };
|
||||||
|
|
||||||
|
|
||||||
eval { json_format [[]], max_depth => 2 };
|
eval { json_format [[]], max_depth => 2 };
|
||||||
like $@, qr/max_depth exceeded while formatting JSON/;
|
like $@, qr/max_depth exceeded while formatting JSON/;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue