jsonparse: A bunch of performance improvements
Turrns out JSON::XS had some pretty good ideas that I could borrow.
This commit is contained in:
parent
ca8d1b72be
commit
ebe84167e7
5 changed files with 144 additions and 111 deletions
|
|
@ -55,7 +55,7 @@ API object from L<JSON::XS> documentation.
|
|||
Cpanel::JSON::XS 112845/s 102903/s 101843/s
|
||||
JSON::SIMD 125127/s 116569/s 117446/s
|
||||
JSON::XS 128792/s 118616/s 117257/s
|
||||
FU::Util 126324/s 106597/s 62633/s
|
||||
FU::Util 136151/s 115564/s 120184/s
|
||||
|
||||
Object (small)
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ Object (small)
|
|||
Cpanel::JSON::XS 42755/s 27849/s 18920/s
|
||||
JSON::SIMD 48487/s 30266/s 22527/s
|
||||
JSON::XS 48980/s 30558/s 24065/s
|
||||
FU::Util 43172/s 24882/s 6818/s
|
||||
FU::Util 49346/s 26316/s 20768/s
|
||||
|
||||
Object (large)
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ Object (large)
|
|||
Cpanel::JSON::XS 29889/s 11629/s 16264/s
|
||||
JSON::SIMD 33950/s 12050/s 22649/s
|
||||
JSON::XS 34534/s 12399/s 23418/s
|
||||
FU::Util 37298/s 12001/s 5413/s
|
||||
FU::Util 40482/s 13735/s 18161/s
|
||||
|
||||
Object (large, mixed unicode)
|
||||
|
||||
|
|
@ -82,7 +82,7 @@ Object (large, mixed unicode)
|
|||
Cpanel::JSON::XS 20563/s 1402/s 7590/s
|
||||
JSON::SIMD 24996/s 1401/s 15959/s
|
||||
JSON::XS 26915/s 1454/s 8848/s
|
||||
FU::Util 24280/s 10461/s 5998/s
|
||||
FU::Util 24706/s 11403/s 9379/s
|
||||
|
||||
Small integers
|
||||
|
||||
|
|
@ -91,7 +91,7 @@ Small integers
|
|||
Cpanel::JSON::XS 7235/s 6008/s
|
||||
JSON::SIMD 8058/s 4335/s
|
||||
JSON::XS 8036/s 5998/s
|
||||
FU::Util 8344/s 5656/s
|
||||
FU::Util 8701/s 5902/s
|
||||
|
||||
Large integers
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ Large integers
|
|||
Cpanel::JSON::XS 31337/s 48514/s
|
||||
JSON::SIMD 36876/s 52446/s
|
||||
JSON::XS 36665/s 48689/s
|
||||
FU::Util 115559/s 62931/s
|
||||
FU::Util 122700/s 62048/s
|
||||
|
||||
ASCII strings
|
||||
|
||||
|
|
@ -109,7 +109,7 @@ ASCII strings
|
|||
Cpanel::JSON::XS 117604/s 43652/s
|
||||
JSON::SIMD 136214/s 50865/s
|
||||
JSON::XS 136486/s 40599/s
|
||||
FU::Util 168713/s 32742/s
|
||||
FU::Util 192974/s 43327/s
|
||||
|
||||
Unicode strings
|
||||
|
||||
|
|
@ -118,7 +118,7 @@ Unicode strings
|
|||
Cpanel::JSON::XS 92871/s 66032/s
|
||||
JSON::SIMD 106664/s 100555/s
|
||||
JSON::XS 104716/s 60538/s
|
||||
FU::Util 108032/s 36331/s
|
||||
FU::Util 124756/s 49825/s
|
||||
|
||||
String escaping (few)
|
||||
|
||||
|
|
@ -127,7 +127,7 @@ String escaping (few)
|
|||
Cpanel::JSON::XS 133222/s 104174/s
|
||||
JSON::SIMD 155205/s 148939/s
|
||||
JSON::XS 123154/s 92594/s
|
||||
FU::Util 206172/s 76491/s
|
||||
FU::Util 202792/s 85352/s
|
||||
|
||||
String escaping (many)
|
||||
|
||||
|
|
@ -136,7 +136,7 @@ String escaping (many)
|
|||
Cpanel::JSON::XS 139049/s 99260/s
|
||||
JSON::SIMD 152317/s 113037/s
|
||||
JSON::XS 153421/s 107918/s
|
||||
FU::Util 132774/s 64880/s
|
||||
FU::Util 132523/s 78199/s
|
||||
|
||||
|
||||
|
||||
|
|
@ -144,122 +144,122 @@ String escaping (many)
|
|||
|
||||
# Cached data used by bench.PL. Same as the formatted tables above but easier to parse.
|
||||
json/api Canonical Cpanel::JSON::XS 102903
|
||||
json/api Canonical FU::Util 106597
|
||||
json/api Canonical FU::Util 115564
|
||||
json/api Canonical JSON::PP 4973
|
||||
json/api Canonical JSON::SIMD 116569
|
||||
json/api Canonical JSON::XS 118616
|
||||
json/api Decode Cpanel::JSON::XS 101843
|
||||
json/api Decode FU::Util 62633
|
||||
json/api Decode FU::Util 120184
|
||||
json/api Decode JSON::PP 1290
|
||||
json/api Decode JSON::SIMD 117446
|
||||
json/api Decode JSON::XS 117257
|
||||
json/api Encode Cpanel::JSON::XS 112845
|
||||
json/api Encode FU::Util 126324
|
||||
json/api Encode FU::Util 136151
|
||||
json/api Encode JSON::PP 5397
|
||||
json/api Encode JSON::SIMD 125127
|
||||
json/api Encode JSON::XS 128792
|
||||
json/intl Decode Cpanel::JSON::XS 48514
|
||||
json/intl Decode FU::Util 62931
|
||||
json/intl Decode FU::Util 62048
|
||||
json/intl Decode JSON::PP 330
|
||||
json/intl Decode JSON::SIMD 52446
|
||||
json/intl Decode JSON::XS 48689
|
||||
json/intl Encode Cpanel::JSON::XS 31337
|
||||
json/intl Encode FU::Util 115559
|
||||
json/intl Encode FU::Util 122700
|
||||
json/intl Encode JSON::PP 2174
|
||||
json/intl Encode JSON::SIMD 36876
|
||||
json/intl Encode JSON::XS 36665
|
||||
json/ints Decode Cpanel::JSON::XS 6008
|
||||
json/ints Decode FU::Util 5656
|
||||
json/ints Decode FU::Util 5902
|
||||
json/ints Decode JSON::PP 29
|
||||
json/ints Decode JSON::SIMD 4335
|
||||
json/ints Decode JSON::XS 5998
|
||||
json/ints Encode Cpanel::JSON::XS 7235
|
||||
json/ints Encode FU::Util 8344
|
||||
json/ints Encode FU::Util 8701
|
||||
json/ints Encode JSON::PP 113
|
||||
json/ints Encode JSON::SIMD 8058
|
||||
json/ints Encode JSON::XS 8036
|
||||
json/objl Canonical Cpanel::JSON::XS 11629
|
||||
json/objl Canonical FU::Util 12001
|
||||
json/objl Canonical FU::Util 13735
|
||||
json/objl Canonical JSON::PP 744
|
||||
json/objl Canonical JSON::SIMD 12050
|
||||
json/objl Canonical JSON::XS 12399
|
||||
json/objl Decode Cpanel::JSON::XS 16264
|
||||
json/objl Decode FU::Util 5413
|
||||
json/objl Decode FU::Util 18161
|
||||
json/objl Decode JSON::PP 105
|
||||
json/objl Decode JSON::SIMD 22649
|
||||
json/objl Decode JSON::XS 23418
|
||||
json/objl Encode Cpanel::JSON::XS 29889
|
||||
json/objl Encode FU::Util 37298
|
||||
json/objl Encode FU::Util 40482
|
||||
json/objl Encode JSON::PP 916
|
||||
json/objl Encode JSON::SIMD 33950
|
||||
json/objl Encode JSON::XS 34534
|
||||
json/objs Canonical Cpanel::JSON::XS 27849
|
||||
json/objs Canonical FU::Util 24882
|
||||
json/objs Canonical FU::Util 26316
|
||||
json/objs Canonical JSON::PP 827
|
||||
json/objs Canonical JSON::SIMD 30266
|
||||
json/objs Canonical JSON::XS 30558
|
||||
json/objs Decode Cpanel::JSON::XS 18920
|
||||
json/objs Decode FU::Util 6818
|
||||
json/objs Decode FU::Util 20768
|
||||
json/objs Decode JSON::PP 206
|
||||
json/objs Decode JSON::SIMD 22527
|
||||
json/objs Decode JSON::XS 24065
|
||||
json/objs Encode Cpanel::JSON::XS 42755
|
||||
json/objs Encode FU::Util 43172
|
||||
json/objs Encode FU::Util 49346
|
||||
json/objs Encode JSON::PP 905
|
||||
json/objs Encode JSON::SIMD 48487
|
||||
json/objs Encode JSON::XS 48980
|
||||
json/obju Canonical Cpanel::JSON::XS 1402
|
||||
json/obju Canonical FU::Util 10461
|
||||
json/obju Canonical FU::Util 11403
|
||||
json/obju Canonical JSON::PP 699
|
||||
json/obju Canonical JSON::SIMD 1401
|
||||
json/obju Canonical JSON::XS 1454
|
||||
json/obju Decode Cpanel::JSON::XS 7590
|
||||
json/obju Decode FU::Util 5998
|
||||
json/obju Decode FU::Util 9379
|
||||
json/obju Decode JSON::PP 86
|
||||
json/obju Decode JSON::SIMD 15959
|
||||
json/obju Decode JSON::XS 8848
|
||||
json/obju Encode Cpanel::JSON::XS 20563
|
||||
json/obju Encode FU::Util 24280
|
||||
json/obju Encode FU::Util 24706
|
||||
json/obju Encode JSON::PP 845
|
||||
json/obju Encode JSON::SIMD 24996
|
||||
json/obju Encode JSON::XS 26915
|
||||
json/strel Decode Cpanel::JSON::XS 99260
|
||||
json/strel Decode FU::Util 64880
|
||||
json/strel Decode FU::Util 78199
|
||||
json/strel Decode JSON::PP 360
|
||||
json/strel Decode JSON::SIMD 113037
|
||||
json/strel Decode JSON::XS 107918
|
||||
json/strel Encode Cpanel::JSON::XS 139049
|
||||
json/strel Encode FU::Util 132774
|
||||
json/strel Encode FU::Util 132523
|
||||
json/strel Encode JSON::PP 2230
|
||||
json/strel Encode JSON::SIMD 152317
|
||||
json/strel Encode JSON::XS 153421
|
||||
json/stres Decode Cpanel::JSON::XS 104174
|
||||
json/stres Decode FU::Util 76491
|
||||
json/stres Decode FU::Util 85352
|
||||
json/stres Decode JSON::PP 350
|
||||
json/stres Decode JSON::SIMD 148939
|
||||
json/stres Decode JSON::XS 92594
|
||||
json/stres Encode Cpanel::JSON::XS 133222
|
||||
json/stres Encode FU::Util 206172
|
||||
json/stres Encode FU::Util 202792
|
||||
json/stres Encode JSON::PP 4258
|
||||
json/stres Encode JSON::SIMD 155205
|
||||
json/stres Encode JSON::XS 123154
|
||||
json/strs Decode Cpanel::JSON::XS 43652
|
||||
json/strs Decode FU::Util 32742
|
||||
json/strs Decode FU::Util 43327
|
||||
json/strs Decode JSON::PP 341
|
||||
json/strs Decode JSON::SIMD 50865
|
||||
json/strs Decode JSON::XS 40599
|
||||
json/strs Encode Cpanel::JSON::XS 117604
|
||||
json/strs Encode FU::Util 168713
|
||||
json/strs Encode FU::Util 192974
|
||||
json/strs Encode JSON::PP 2922
|
||||
json/strs Encode JSON::SIMD 136214
|
||||
json/strs Encode JSON::XS 136486
|
||||
json/stru Decode Cpanel::JSON::XS 66032
|
||||
json/stru Decode FU::Util 36331
|
||||
json/stru Decode FU::Util 49825
|
||||
json/stru Decode JSON::PP 254
|
||||
json/stru Decode JSON::SIMD 100555
|
||||
json/stru Decode JSON::XS 60538
|
||||
json/stru Encode Cpanel::JSON::XS 92871
|
||||
json/stru Encode FU::Util 108032
|
||||
json/stru Encode FU::Util 124756
|
||||
json/stru Encode JSON::PP 5144
|
||||
json/stru Encode JSON::SIMD 106664
|
||||
json/stru Encode JSON::XS 104716
|
||||
|
|
|
|||
51
c/common.c
51
c/common.c
|
|
@ -2,29 +2,47 @@
|
|||
|
||||
typedef struct {
|
||||
SV *sv;
|
||||
SV *mortal;
|
||||
char *cur;
|
||||
char *end;
|
||||
size_t maxlen;
|
||||
int setutf8;
|
||||
char sbuf[4096];
|
||||
} fustr;
|
||||
|
||||
/* sv must be a new SV with a preallocated buffer */
|
||||
static void fustr_init_(pTHX_ fustr *s, SV *sv, size_t maxlen) {
|
||||
s->sv = sv;
|
||||
SvPOK_only(s->sv);
|
||||
s->cur = SvPVX(s->sv);
|
||||
s->end = SvEND(s->sv);
|
||||
static void fustr_init_(pTHX_ fustr *s, SV *mortal, size_t maxlen) {
|
||||
s->sv = NULL;
|
||||
s->cur = s->sbuf;
|
||||
s->end = s->sbuf + (maxlen > sizeof s->sbuf ? sizeof s->sbuf : maxlen);
|
||||
s->maxlen = maxlen;
|
||||
s->mortal = mortal;
|
||||
s->setutf8 = 0;
|
||||
}
|
||||
|
||||
#define fustr_start(s) (((s)->sv ? SvPVX((s)->sv) : (s)->sbuf))
|
||||
|
||||
static void fustr_grow(pTHX_ fustr *s, size_t add) {
|
||||
size_t off = s->cur - SvPVX(s->sv);
|
||||
size_t newlen = 64;
|
||||
size_t off = s->cur - (s->sv ? SvPVX(s->sv) : s->sbuf);
|
||||
size_t newlen = sizeof s->sbuf;
|
||||
char *buf;
|
||||
add += off;
|
||||
if (add > s->maxlen) croak("maximum string length exceeded");
|
||||
/* Increment to next power of two; SvGROW's default strategy is slow */
|
||||
while (newlen < add) newlen <<= 1;
|
||||
if (newlen > s->maxlen) newlen = s->maxlen;
|
||||
char *buf = SvGROW(s->sv, newlen);
|
||||
if (s->sv) {
|
||||
buf = SvGROW(s->sv, newlen);
|
||||
} else {
|
||||
if (s->mortal) {
|
||||
s->sv = s->mortal;
|
||||
sv_setpv_bufsize(s->sv, off, newlen);
|
||||
} else {
|
||||
s->sv = newSV(newlen);
|
||||
}
|
||||
SvPOK_only(s->sv);
|
||||
buf = SvPVX(s->sv);
|
||||
memcpy(buf, s->sbuf, off);
|
||||
}
|
||||
s->cur = buf + off;
|
||||
s->end = buf + (SvLEN(s->sv) > s->maxlen ? s->maxlen : SvLEN(s->sv));
|
||||
}
|
||||
|
|
@ -39,6 +57,11 @@ static inline void fustr_write_(pTHX_ fustr *s, const char *str, size_t n) {
|
|||
s->cur += n;
|
||||
}
|
||||
|
||||
static inline void fustr_write_ch_(pTHX_ fustr *s, char x) {
|
||||
fustr_reserve_(aTHX_ s, 1);
|
||||
*(s->cur++) = x;
|
||||
}
|
||||
|
||||
/* Adds n uninitialized bytes to the string and returns a buffer to write the data to */
|
||||
static inline char *fustr_write_buf_(pTHX_ fustr *s, size_t n) {
|
||||
fustr_reserve_(aTHX_ s, n);
|
||||
|
|
@ -50,13 +73,19 @@ static inline char *fustr_write_buf_(pTHX_ fustr *s, size_t n) {
|
|||
static SV *fustr_done_(pTHX_ fustr *s) {
|
||||
fustr_reserve_(aTHX_ s, 1);
|
||||
*s->cur = 0;
|
||||
SvCUR_set(s->sv, s->cur - SvPVX(s->sv));
|
||||
// TODO: SvPV_shrink_to_cur?
|
||||
if (s->sv) {
|
||||
SvCUR_set(s->sv, s->cur - SvPVX(s->sv));
|
||||
// TODO: SvPV_shrink_to_cur?
|
||||
} else {
|
||||
s->sv = newSVpvn_flags(s->sbuf, s->cur - s->sbuf, s->mortal ? SVs_TEMP : 0);
|
||||
}
|
||||
if (s->setutf8) SvUTF8_on(s->sv);
|
||||
return s->sv;
|
||||
}
|
||||
|
||||
#define fustr_init(a,b,c) fustr_init_(aTHX_ a,b,c)
|
||||
#define fustr_reserve(a,b) fustr_reserve_(aTHX_ a,b)
|
||||
#define fustr_write(a,b,c) fustr_write_(aTHX_ a,b,c)
|
||||
#define fustr_write_ch(a,b) fustr_write_ch_(aTHX_ a,b)
|
||||
#define fustr_write_buf(a,b) fustr_write_buf_(aTHX_ a,b)
|
||||
#define fustr_done(a) fustr_done_(aTHX_ a)
|
||||
|
|
|
|||
24
c/jsonfmt.c
24
c/jsonfmt.c
|
|
@ -30,7 +30,7 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l
|
|||
croak("invalid codepoint encountered in string, cannot format to JSON");
|
||||
}
|
||||
|
||||
fustr_write(&ctx->out, "\"", 1);
|
||||
fustr_write_ch(&ctx->out, '\"');
|
||||
fustr_reserve(&ctx->out, len);
|
||||
|
||||
while (off < len) {
|
||||
|
|
@ -78,7 +78,7 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l
|
|||
}
|
||||
}
|
||||
|
||||
fustr_write(&ctx->out, "\"", 1);
|
||||
fustr_write_ch(&ctx->out, '\"');
|
||||
}
|
||||
|
||||
static const char fujson_digits[] =
|
||||
|
|
@ -109,7 +109,7 @@ static void fujson_fmt_int(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
|
|||
}
|
||||
|
||||
if (uv == 0) {
|
||||
fustr_write(&ctx->out, "0", 1);
|
||||
fustr_write_ch(&ctx->out, '0');
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -125,10 +125,10 @@ static void fujson_fmt_int(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
|
|||
|
||||
static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) {
|
||||
int i, len = av_count(av);
|
||||
fustr_write(&ctx->out, "[", 1);
|
||||
fustr_write_ch(&ctx->out, '[');
|
||||
ctx->pretty++;
|
||||
for (i=0; i<len; i++) {
|
||||
if (i) fustr_write(&ctx->out, ",", 1);
|
||||
if (i) fustr_write_ch(&ctx->out, ',');
|
||||
fujson_fmt_indent(aTHX_ ctx);
|
||||
SV **sv = av_fetch(av, i, 0);
|
||||
if (sv) fujson_fmt(aTHX_ ctx, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */
|
||||
|
|
@ -136,7 +136,7 @@ static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) {
|
|||
}
|
||||
ctx->pretty--;
|
||||
if (i) fujson_fmt_indent(aTHX_ ctx);
|
||||
fustr_write(&ctx->out, "]", 1);
|
||||
fustr_write_ch(&ctx->out, ']');
|
||||
}
|
||||
|
||||
static int fujson_fmt_hvcmp(const void *pa, const void *pb) {
|
||||
|
|
@ -159,12 +159,12 @@ static int fujson_fmt_hvcmp(const void *pa, const void *pb) {
|
|||
|
||||
static void fujson_fmt_hvkv(pTHX_ fujson_fmt_ctx *ctx, HV *hv, HE *he, char **hestr) {
|
||||
STRLEN helen;
|
||||
if (*hestr) fustr_write(&ctx->out, ",", 1);
|
||||
if (*hestr) fustr_write_ch(&ctx->out, ',');
|
||||
fujson_fmt_indent(aTHX_ ctx);
|
||||
*hestr = HePV(he, helen);
|
||||
fujson_fmt_str(aTHX_ ctx, *hestr, helen, HeUTF8(he));
|
||||
if (ctx->pretty > 0) fustr_write(&ctx->out, " : ", 3);
|
||||
else fustr_write(&ctx->out, ":", 1);
|
||||
else fustr_write_ch(&ctx->out, ':');
|
||||
fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
|
||||
}
|
||||
|
||||
|
|
@ -173,7 +173,7 @@ static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) {
|
|||
char *hestr = NULL;
|
||||
|
||||
int numkeys = hv_iterinit(hv);
|
||||
fustr_write(&ctx->out, "{", 1);
|
||||
fustr_write_ch(&ctx->out, '{');
|
||||
ctx->pretty++;
|
||||
|
||||
/* Canonical order on tied hashes is not supported. Cpanel::JSON::XS has
|
||||
|
|
@ -204,7 +204,7 @@ static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) {
|
|||
}
|
||||
ctx->pretty--;
|
||||
if (hestr) fujson_fmt_indent(aTHX_ ctx);
|
||||
fustr_write(&ctx->out, "}", 1);
|
||||
fustr_write_ch(&ctx->out, '}');
|
||||
}
|
||||
|
||||
static void fujson_fmt_obj(pTHX_ fujson_fmt_ctx *ctx, SV *rv, SV *obj) {
|
||||
|
|
@ -305,9 +305,9 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
|||
if (ctx.out.maxlen == 0) ctx.out.maxlen = 1<<30;
|
||||
if (ctx.depth == 0) ctx.depth = 512;
|
||||
|
||||
fustr_init(&ctx.out, sv_2mortal(newSV(128)), ctx.out.maxlen);
|
||||
fustr_init(&ctx.out, sv_newmortal(), ctx.out.maxlen);
|
||||
fujson_fmt(aTHX_ &ctx, val);
|
||||
if (ctx.pretty >= 0) fustr_write(&ctx.out, "\n", 1);
|
||||
if (ctx.pretty >= 0) fustr_write_ch(&ctx.out, '\n');
|
||||
r = fustr_done(&ctx.out);
|
||||
if (!encutf8) SvUTF8_on(r);
|
||||
return r;
|
||||
|
|
|
|||
107
c/jsonparse.c
107
c/jsonparse.c
|
|
@ -14,19 +14,19 @@ static void fujson_parse_ws(pTHX_ fujson_parse_ctx *ctx) {
|
|||
}
|
||||
}
|
||||
|
||||
static int fujson_parse_string_escape(pTHX_ fujson_parse_ctx *ctx, fustr *r) {
|
||||
static inline int fujson_parse_string_escape(fujson_parse_ctx *ctx, fustr *r) {
|
||||
unsigned int n, s;
|
||||
ctx->buf++; /* '\\' */
|
||||
if (ctx->buf == ctx->end) return 1;
|
||||
if (UNLIKELY(ctx->buf == ctx->end)) return 1;
|
||||
switch (*(ctx->buf++)) {
|
||||
case '"': fustr_write(r, "\"", 1); break;
|
||||
case '\\': fustr_write(r, "\\", 1); break;
|
||||
case '/': fustr_write(r, "/", 1); break; /* We don't escape this one */
|
||||
case 'b': fustr_write(r, "\x08", 1); break;
|
||||
case 't': fustr_write(r, "\x09", 1); break;
|
||||
case 'n': fustr_write(r, "\x0a", 1); break;
|
||||
case 'f': fustr_write(r, "\x0c", 1); break;
|
||||
case 'r': fustr_write(r, "\x0d", 1); break;
|
||||
case '"': *(r->cur++) = '\"'; break;
|
||||
case '\\':*(r->cur++) = '\\'; break;
|
||||
case '/': *(r->cur++) = '/'; break; /* We don't escape this one */
|
||||
case 'b': *(r->cur++) = 0x08; break;
|
||||
case 't': *(r->cur++) = 0x09; break;
|
||||
case 'n': *(r->cur++) = 0x0a; break;
|
||||
case 'f': *(r->cur++) = 0x0c; break;
|
||||
case 'r': *(r->cur++) = 0x0d; break;
|
||||
case 'u':
|
||||
/* (awful code adapted from ncdu) */
|
||||
#define INV (1<<16)
|
||||
|
|
@ -44,8 +44,8 @@ static int fujson_parse_string_escape(pTHX_ fujson_parse_ctx *ctx, fustr *r) {
|
|||
n = 0x10000 + (((n & 0x03ff) << 10) | (s & 0x03ff));
|
||||
ctx->buf += 6;
|
||||
}
|
||||
fustr_reserve(r, 4);
|
||||
r->cur = (char *)uvchr_to_utf8((U8 *)r->cur, n);
|
||||
if (n >= 0x80) r->setutf8 = 1;
|
||||
break;
|
||||
#undef INV
|
||||
#undef hn
|
||||
|
|
@ -56,45 +56,41 @@ static int fujson_parse_string_escape(pTHX_ fujson_parse_ctx *ctx, fustr *r) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static SV *fujson_parse_string(pTHX_ fujson_parse_ctx *ctx) {
|
||||
fustr r;
|
||||
size_t len, maxlen;
|
||||
unsigned char x = 0;
|
||||
fustr_init(&r, newSV(32), SIZE_MAX);
|
||||
static int fujson_parse_string_buf(pTHX_ fujson_parse_ctx *ctx, fustr *r) {
|
||||
size_t len;
|
||||
unsigned char x;
|
||||
ctx->buf++; /* '"' */
|
||||
while (true) {
|
||||
/* Fast path: ASCII, no unescaping needed */
|
||||
len = 0;
|
||||
maxlen = ctx->end - ctx->buf;
|
||||
while (len < maxlen) {
|
||||
x = ctx->buf[len];
|
||||
/* While we always escape 0x7f when formatting, JSON does permit it unescaped */
|
||||
if (x <= 0x1f || x == '"' || x == '\\' || x >= 0x80) break;
|
||||
len++;
|
||||
}
|
||||
if (len == maxlen) goto err;
|
||||
fustr_write(&r, (const char *)ctx->buf, len);
|
||||
ctx->buf += len;
|
||||
|
||||
/* Slow path */
|
||||
if (x == '"') {
|
||||
fustr_reserve(aTHX_ r, 4);
|
||||
if (UNLIKELY(ctx->buf == ctx->end)) return 1;
|
||||
x = *ctx->buf;
|
||||
if (UNLIKELY(x == '"')) {
|
||||
ctx->buf++;
|
||||
SvUTF8_on(fustr_done(&r));
|
||||
return r.sv;
|
||||
} else if (x == '\\') {
|
||||
if (fujson_parse_string_escape(aTHX_ ctx, &r)) goto err;
|
||||
return 0;
|
||||
} else if (UNLIKELY(x == '\\')) {
|
||||
if (fujson_parse_string_escape(ctx, r)) return 1;
|
||||
} else if (x >= 0x80) {
|
||||
len = isC9_STRICT_UTF8_CHAR(ctx->buf, ctx->end);
|
||||
if (len == 0) goto err;
|
||||
fustr_write(&r, (const char *)ctx->buf, len);
|
||||
if (UNLIKELY((len = isC9_STRICT_UTF8_CHAR(ctx->buf, ctx->end)) == 0)) return 1;
|
||||
memcpy(r->cur, ctx->buf, len);
|
||||
r->cur += len;
|
||||
ctx->buf += len;
|
||||
} else {
|
||||
goto err;
|
||||
}
|
||||
r->setutf8 = 1;
|
||||
} else if (x >= 0x20) {
|
||||
*(r->cur++) = x;
|
||||
ctx->buf++;
|
||||
} else return 1;
|
||||
}
|
||||
}
|
||||
|
||||
static SV *fujson_parse_string(pTHX_ fujson_parse_ctx *ctx) {
|
||||
fustr r;
|
||||
fustr_init(&r, NULL, SIZE_MAX);
|
||||
if (fujson_parse_string_buf(aTHX_ ctx, &r)) {
|
||||
if (r.sv) SvREFCNT_dec(r.sv);
|
||||
return NULL;
|
||||
} else {
|
||||
return fustr_done(&r);
|
||||
}
|
||||
err:
|
||||
SvREFCNT_dec(r.sv);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Validate JSON grammar of a number, increments ctx->buf to the end of the
|
||||
|
|
@ -180,8 +176,12 @@ err:
|
|||
|
||||
static SV *fujson_parse_obj(pTHX_ fujson_parse_ctx *ctx) {
|
||||
HV *hv = newHV();
|
||||
SV *key = NULL;
|
||||
SV *val;
|
||||
char *keystart;
|
||||
UV keyhash;
|
||||
fustr key;
|
||||
fustr_init(&key, NULL, SIZE_MAX);
|
||||
|
||||
ctx->buf++; /* '{' */
|
||||
fujson_parse_ws(aTHX_ ctx);
|
||||
if (ctx->buf == ctx->end) goto err;
|
||||
|
|
@ -189,9 +189,11 @@ static SV *fujson_parse_obj(pTHX_ fujson_parse_ctx *ctx) {
|
|||
while (true) {
|
||||
/* key */
|
||||
if (*ctx->buf != '"') goto err;
|
||||
if (!(key = fujson_parse_string(aTHX_ ctx))) goto err;
|
||||
/* TODO: Use precomputed hash */
|
||||
if (hv_exists_ent(hv, key, 0)) goto err;
|
||||
if (fujson_parse_string_buf(aTHX_ ctx, &key)) goto err;
|
||||
keystart = fustr_start(&key);
|
||||
if (key.setutf8) keyhash = 0;
|
||||
else PERL_HASH(keyhash, keystart, key.cur - keystart);
|
||||
if (hv_common(hv, NULL, keystart, key.cur - keystart, key.setutf8, HV_FETCH_ISEXISTS, NULL, keyhash)) goto err;
|
||||
|
||||
/* ':' */
|
||||
fujson_parse_ws(aTHX_ ctx);
|
||||
|
|
@ -201,9 +203,9 @@ static SV *fujson_parse_obj(pTHX_ fujson_parse_ctx *ctx) {
|
|||
|
||||
/* value */
|
||||
if (!(val = fujson_parse(aTHX_ ctx))) goto err;
|
||||
hv_store_ent(hv, key, val, 0);
|
||||
SvREFCNT_dec(key); /* TODO: can reuse buffer */
|
||||
key = NULL;
|
||||
hv_common(hv, NULL, keystart, key.cur - keystart, key.setutf8, HV_FETCH_ISSTORE|HV_FETCH_JUST_SV, val, keyhash);
|
||||
key.cur = keystart;
|
||||
key.setutf8 = 0;
|
||||
|
||||
fujson_parse_ws(aTHX_ ctx);
|
||||
if (ctx->buf == ctx->end) goto err;
|
||||
|
|
@ -213,10 +215,11 @@ static SV *fujson_parse_obj(pTHX_ fujson_parse_ctx *ctx) {
|
|||
fujson_parse_ws(aTHX_ ctx);
|
||||
}
|
||||
done:
|
||||
if (key.sv) SvREFCNT_dec(key.sv);
|
||||
ctx->buf++; /* '}' */
|
||||
return newRV_noinc((SV *)hv);
|
||||
err:
|
||||
if (key) SvREFCNT_dec(key);
|
||||
if (key.sv) SvREFCNT_dec(key.sv);
|
||||
SvREFCNT_dec((SV *)hv);
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ my @error = (
|
|||
'{"":1,}',
|
||||
'{,}',
|
||||
'{"":1,"":2}',
|
||||
'{"ë":1,"ë":1}',
|
||||
);
|
||||
for my $s (@error) {
|
||||
ok !eval { json_parse($s); 1 };
|
||||
|
|
@ -70,10 +71,10 @@ sub str($in, $exp) {
|
|||
utf8::encode(my $str = $in);
|
||||
my $out = json_parse($in);
|
||||
is $out, $exp, $str;
|
||||
ok utf8::is_utf8($out);
|
||||
ok utf8::is_utf8($out) || $out =~ /^[\x00-\x7f]*$/;
|
||||
$out = json_parse($str, utf8 => 1);
|
||||
is $out, $exp, $str;
|
||||
ok utf8::is_utf8($out);
|
||||
ok utf8::is_utf8($out) || $out =~ /^[\x00-\x7f]*$/;
|
||||
}
|
||||
str '""', '';
|
||||
str '"hello, world"', 'hello, world';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue