jsonfmt: Add canonical option
Not as bad as I had expected it to be; managed to keep the implementation a little bit simpler and cleaner than JSON::XS.
This commit is contained in:
parent
163a60b4ba
commit
1a0fb03205
5 changed files with 115 additions and 23 deletions
|
|
@ -104,6 +104,22 @@ String escaping (many)
|
|||
JSON::XS 154280/s
|
||||
FU::Util 132514/s
|
||||
|
||||
Canonical hash key ordering (small)
|
||||
|
||||
JSON::PP 835/s
|
||||
Cpanel::JSON::XS 28155/s
|
||||
JSON::SIMD 30066/s
|
||||
JSON::XS 32151/s
|
||||
FU::Util 27079/s
|
||||
|
||||
Canonical hash key ordering (large)
|
||||
|
||||
JSON::PP 756/s
|
||||
Cpanel::JSON::XS 10710/s
|
||||
JSON::SIMD 12640/s
|
||||
JSON::XS 12858/s
|
||||
FU::Util 12819/s
|
||||
|
||||
|
||||
|
||||
=cut
|
||||
|
|
@ -114,6 +130,16 @@ jsonfmt/api FU::Util FU 0.1 129117
|
|||
jsonfmt/api JSON::PP JSON::PP 4.16 5342
|
||||
jsonfmt/api JSON::SIMD JSON::SIMD 1.06 128161
|
||||
jsonfmt/api JSON::XS JSON::XS 4.03 130434
|
||||
jsonfmt/canonl Cpanel::JSON::XS Cpanel::JSON::XS 4.38 10710
|
||||
jsonfmt/canonl FU::Util FU 0.1 12819
|
||||
jsonfmt/canonl JSON::PP JSON::PP 4.16 756
|
||||
jsonfmt/canonl JSON::SIMD JSON::SIMD 1.06 12640
|
||||
jsonfmt/canonl JSON::XS JSON::XS 4.03 12858
|
||||
jsonfmt/canons Cpanel::JSON::XS Cpanel::JSON::XS 4.38 28155
|
||||
jsonfmt/canons FU::Util FU 0.1 27079
|
||||
jsonfmt/canons JSON::PP JSON::PP 4.16 835
|
||||
jsonfmt/canons JSON::SIMD JSON::SIMD 1.06 30066
|
||||
jsonfmt/canons JSON::XS JSON::XS 4.03 32151
|
||||
jsonfmt/intl Cpanel::JSON::XS Cpanel::JSON::XS 4.38 29299
|
||||
jsonfmt/intl FU::Util FU 0.1 114084
|
||||
jsonfmt/intl JSON::PP JSON::PP 4.16 2208
|
||||
|
|
|
|||
|
|
@ -49,6 +49,11 @@ The following C<%options> are supported:
|
|||
|
||||
=over
|
||||
|
||||
=item canonical
|
||||
|
||||
When set to a true value, write hash keys in deterministic (sorted) order. This
|
||||
option currently has no effect on tied hashes.
|
||||
|
||||
=item utf8
|
||||
|
||||
When set to a true value, returns a UTF-8 encoded byte string instead of a Perl
|
||||
|
|
|
|||
31
bench.PL
31
bench.PL
|
|
@ -65,20 +65,28 @@ sub def($id, $text, @f) {
|
|||
|
||||
|
||||
|
||||
# Use similar options for fair comparisons.
|
||||
my $j_cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed;
|
||||
my $j_pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed;
|
||||
my $j_xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed;
|
||||
my $j_si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed;
|
||||
use FU::Util 'json_format';
|
||||
|
||||
sub jsonfmt($name, $text, $data) {
|
||||
# Use similar options for fair comparisons.
|
||||
my $cp = Cpanel::JSON::XS->new->allow_nonref->unblessed_bool->convert_blessed;
|
||||
my $pp = JSON::PP->new->allow_nonref->core_bools->convert_blessed;
|
||||
my $xs = JSON::XS->new->allow_nonref->boolean_values([false,true])->convert_blessed;
|
||||
my $si = JSON::SIMD->new->allow_nonref->core_bools->convert_blessed;
|
||||
my @opt = ();
|
||||
if ($name =~ /^canon/) {
|
||||
$cp = $cp->canonical;
|
||||
$pp = $pp->canonical;
|
||||
$xs = $xs->canonical;
|
||||
$si = $si->canonical;
|
||||
@opt = (canonical => 1);
|
||||
}
|
||||
def "jsonfmt/$name", $text,
|
||||
'JSON::PP', undef, sub { $j_pp->encode($data) },
|
||||
'Cpanel::JSON::XS', undef, sub { $j_cp->encode($data) },
|
||||
'JSON::SIMD', undef, sub { $j_si->encode($data) },
|
||||
'JSON::XS', undef, sub { $j_xs->encode($data) },
|
||||
'FU::Util', 'FU', sub { json_format $data };
|
||||
'JSON::PP', undef, sub { $pp->encode($data) },
|
||||
'Cpanel::JSON::XS', undef, sub { $cp->encode($data) },
|
||||
'JSON::SIMD', undef, sub { $si->encode($data) },
|
||||
'JSON::XS', undef, sub { $xs->encode($data) },
|
||||
'FU::Util', 'FU', sub { json_format $data, @opt };
|
||||
}
|
||||
|
||||
# From JSON::XS POD.
|
||||
|
|
@ -94,7 +102,8 @@ jsonfmt stru => 'Unicode strings', do { use utf8;
|
|||
jsonfmt stres => 'String escaping (few)', [ map 'This string needs to "be escaped" a little bit', 1..100 ];
|
||||
jsonfmt strel => 'String escaping (many)', [ map "This \" \\ needs \b\x01\x02\x03\x04 more", 1..100 ];
|
||||
|
||||
|
||||
jsonfmt canons => 'Canonical hash key ordering (small)', [ map +{ map +("string$_", 1), 'a'..'f' }, 0..100 ];
|
||||
jsonfmt canonl => 'Canonical hash key ordering (large)', { map +("string$_-something", 1), 'aa'..'zz' };
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
68
c/jsonfmt.c
68
c/jsonfmt.c
|
|
@ -1,6 +1,7 @@
|
|||
typedef struct {
|
||||
fustr out;
|
||||
UV depth;
|
||||
int canon;
|
||||
} fujson_fmt_ctx;
|
||||
|
||||
static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *);
|
||||
|
|
@ -126,19 +127,65 @@ static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) {
|
|||
fustr_write(&ctx->out, "]", 1);
|
||||
}
|
||||
|
||||
static int fujson_fmt_hvcmp(const void *pa, const void *pb) {
|
||||
dTHX;
|
||||
HE *a = *(HE **)pa;
|
||||
HE *b = *(HE **)pb;
|
||||
STRLEN alen, blen;
|
||||
char *astr = HePV(a, alen);
|
||||
char *bstr = HePV(b, blen);
|
||||
int autf = HeUTF8(a);
|
||||
int butf = HeUTF8(b);
|
||||
|
||||
if (autf == butf) {
|
||||
int cmp = memcmp(bstr, astr, alen < blen ? alen : blen);
|
||||
return cmp != 0 ? cmp : blen < alen ? -1 : blen == alen ? 0 : 1;
|
||||
}
|
||||
return autf ? bytes_cmp_utf8((const U8*)bstr, blen, (const U8*)astr, alen)
|
||||
: -bytes_cmp_utf8((const U8*)astr, alen, (const U8*)bstr, blen);
|
||||
}
|
||||
|
||||
static void fujson_fmt_hvkv(pTHX_ fujson_fmt_ctx *ctx, HV *hv, HE *he, char **hestr) {
|
||||
STRLEN helen;
|
||||
if (*hestr) fustr_write(&ctx->out, ",", 1);
|
||||
*hestr = HePV(he, helen);
|
||||
fujson_fmt_str(aTHX_ ctx, *hestr, helen, HeUTF8(he));
|
||||
fustr_write(&ctx->out, ":", 1);
|
||||
fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
|
||||
}
|
||||
|
||||
static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) {
|
||||
HE *he;
|
||||
STRLEN helen;
|
||||
char *hestr = NULL;
|
||||
|
||||
hv_iterinit(hv);
|
||||
int numkeys = hv_iterinit(hv);
|
||||
fustr_write(&ctx->out, "{", 1);
|
||||
while ((he = hv_iternext(hv))) {
|
||||
if (hestr) fustr_write(&ctx->out, ",", 1);
|
||||
hestr = HePV(he, helen);
|
||||
fujson_fmt_str(aTHX_ ctx, hestr, helen, HeUTF8(he));
|
||||
fustr_write(&ctx->out, ":", 1);
|
||||
fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
|
||||
|
||||
/* Canonical order on tied hashes is not supported. Cpanel::JSON::XS has
|
||||
* code to deal with that case and it's absolutely horrifying. */
|
||||
if (ctx->canon && !(SvMAGICAL(hv) && SvTIED_mg((SV*)hv, PERL_MAGIC_tied))) {
|
||||
SAVETMPS;
|
||||
if (numkeys < 4) numkeys = 4;
|
||||
if (SvMAGICAL(hv)) numkeys = 32;
|
||||
|
||||
SV *keys_sv = sv_2mortal(newSV(numkeys * sizeof(HE*)));
|
||||
HE **keys = (HE **)SvPVX(keys_sv);
|
||||
int i = 0;
|
||||
|
||||
while ((he = hv_iternext(hv))) {
|
||||
if (i >= numkeys) {
|
||||
numkeys += numkeys >> 1;
|
||||
keys = (HE **)SvGROW(keys_sv, numkeys * sizeof(HE*));
|
||||
numkeys = SvLEN(keys_sv) / sizeof(HE*);
|
||||
}
|
||||
keys[i++] = he;
|
||||
}
|
||||
qsort(keys, i, sizeof(HE *), fujson_fmt_hvcmp);
|
||||
while (i--) fujson_fmt_hvkv(aTHX_ ctx, hv, keys[i], &hestr);
|
||||
FREETMPS;
|
||||
|
||||
} else {
|
||||
while ((he = hv_iternext(hv))) fujson_fmt_hvkv(aTHX_ ctx, hv, he, &hestr);
|
||||
}
|
||||
fustr_write(&ctx->out, "}", 1);
|
||||
}
|
||||
|
|
@ -222,6 +269,7 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
|||
|
||||
ctx.out.maxlen = 0;
|
||||
ctx.depth = 0;
|
||||
ctx.canon = 0;
|
||||
while (i < argc) {
|
||||
arg = SvPV_nolen(ST(i));
|
||||
i++;
|
||||
|
|
@ -229,7 +277,8 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
|||
r = ST(i);
|
||||
i++;
|
||||
|
||||
if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
|
||||
if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r);
|
||||
else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
|
||||
else if (strcmp(arg, "max_size") == 0) ctx.out.maxlen = SvUV(r);
|
||||
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
|
||||
else croak("Unknown flag: '%s'", arg);
|
||||
|
|
@ -244,5 +293,4 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
|||
return r;
|
||||
}
|
||||
|
||||
/* TODO: canonical */
|
||||
/* TODO: pretty */
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ my @tests = (
|
|||
|
||||
{}, '{}',
|
||||
{'a',1}, '{"a":1}',
|
||||
do { tie my %h, 'Tie::StdHash'; %h = ('a',1); \%h }, '{"a":1}',
|
||||
do { tie my %h, 'Tie::StdHash'; %h = ('b',1); \%h }, '{"b":1}',
|
||||
do { tie my %h, 'MyOrderedHash', one => 1, two => undef, three => []; \%h }, '{"one":1,"two":null,"three":[]}',
|
||||
|
||||
do { my $o = [true]; bless \$o, 'MyToJSON' }, '[1,"MyToJSON",[true]]',
|
||||
|
|
@ -68,7 +68,7 @@ my @errors = (
|
|||
do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/,
|
||||
);
|
||||
|
||||
plan tests => @tests*2 + @errors/2 + 8;
|
||||
plan tests => @tests*2 + @errors/2 + 9;
|
||||
|
||||
for my($in, $exp) (@tests) {
|
||||
my $out = json_format $in;
|
||||
|
|
@ -87,6 +87,10 @@ for my ($in, $exp) (@errors) {
|
|||
}
|
||||
|
||||
|
||||
is json_format({qw/a 1 b 2 c 3 d 4 d1 5 d11 6/, do { use utf8; qw/ü 7 月 8 💩 9/ }}, canonical => 1),
|
||||
do { use utf8; '{"a":"1","b":"2","c":"3","d":"4","d1":"5","d11":"6","ü":"7","月":"8","💩":"9"}' };
|
||||
|
||||
|
||||
eval { json_format [[]], max_depth => 2 };
|
||||
like $@, qr/max_depth exceeded while formatting JSON/;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue