diff --git a/FU/Util.pod b/FU/Util.pod index e82bb1e..dbf5055 100644 --- a/FU/Util.pod +++ b/FU/Util.pod @@ -45,6 +45,25 @@ be sufficient. The following is probably an improvement: json_format($data) =~ s{ are supported: + +=over + +=item utf8 + +When set to a true value, returns a UTF-8 encoded byte string instead of a Perl +Unicode string. + +=item max_size + +Maximum permitted size, in bytes, of the generated JSON string. Defaults to 1 GiB. + +=item max_depth + +Maximum permitted nesting depth of Perl values. Defaults to 512. + +=back + =back (Why the hell yet another JSON codec when CPAN is already full of them!? Well, diff --git a/c/common.c b/c/common.c index 37a6961..db2b94f 100644 --- a/c/common.c +++ b/c/common.c @@ -4,24 +4,29 @@ typedef struct { SV *sv; char *cur; char *end; + size_t maxlen; } fustr; -static void fustr_init_(pTHX_ fustr *s, size_t prealloc) { +static void fustr_init_(pTHX_ fustr *s, size_t prealloc, size_t maxlen) { + if (prealloc > maxlen) prealloc = maxlen; s->sv = sv_2mortal(newSV(prealloc)); SvPOK_only(s->sv); s->cur = SvPVX(s->sv); s->end = SvEND(s->sv); + s->maxlen = maxlen; } static void fustr_grow(pTHX_ fustr *s, size_t add) { size_t off = s->cur - SvPVX(s->sv); size_t newlen = 64; add += off; + if (add > s->maxlen) croak("maximum string length exceeded"); /* Increment to next power of two; SvGROW's default strategy is slow */ while (newlen < add) newlen <<= 1; + if (newlen > s->maxlen) newlen = s->maxlen; char *buf = SvGROW(s->sv, newlen); s->cur = buf + off; - s->end = buf + SvLEN(s->sv); + s->end = buf + (SvLEN(s->sv) > s->maxlen ? s->maxlen : SvLEN(s->sv)); } static inline void fustr_reserve_(pTHX_ fustr *s, size_t add) { @@ -50,7 +55,7 @@ static SV *fustr_done_(pTHX_ fustr *s) { return s->sv; } -#define fustr_init(a,b) fustr_init_(aTHX_ a,b) +#define fustr_init(a,b,c) fustr_init_(aTHX_ a,b,c) #define fustr_reserve(a,b) fustr_reserve_(aTHX_ a,b) #define fustr_write(a,b,c) fustr_write_(aTHX_ a,b,c) #define fustr_write_buf(a,b) fustr_write_buf_(aTHX_ a,b) diff --git a/c/jsonfmt.c b/c/jsonfmt.c index a46b29a..48264dc 100644 --- a/c/jsonfmt.c +++ b/c/jsonfmt.c @@ -1,6 +1,11 @@ -static void fujson_fmt(pTHX_ fustr *, SV *); +typedef struct { + fustr out; + UV depth; +} fujson_fmt_ctx; -static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int utf8) { +static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *); + +static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t len, int utf8) { size_t off = 0, loff; const unsigned char *str = (const unsigned char *)stri; unsigned char *buf; @@ -15,8 +20,8 @@ static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int u croak("invalid codepoint encountered in string, cannot format to JSON"); } - fustr_write(out, "\"", 1); - fustr_reserve(out, len); + fustr_write(&ctx->out, "\"", 1); + fustr_reserve(&ctx->out, len); while (off < len) { /* Fast path: no escaping needed */ @@ -36,25 +41,25 @@ static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int u off++; } } - fustr_write(out, (char *)str+loff, off-loff); + fustr_write(&ctx->out, (char *)str+loff, off-loff); if (off < len) { /* early break, which means current byte needs special processing */ switch (x) { - case '"': fustr_write(out, "\\\"", 2); break; - case '\\': fustr_write(out, "\\\\", 2); break; - case 0x08: fustr_write(out, "\\b", 2); break; - case 0x09: fustr_write(out, "\\t", 2); break; - case 0x0a: fustr_write(out, "\\n", 2); break; - case 0x0c: fustr_write(out, "\\f", 2); break; - case 0x0d: fustr_write(out, "\\r", 2); break; + case '"': fustr_write(&ctx->out, "\\\"", 2); break; + case '\\': fustr_write(&ctx->out, "\\\\", 2); break; + case 0x08: fustr_write(&ctx->out, "\\b", 2); break; + case 0x09: fustr_write(&ctx->out, "\\t", 2); break; + case 0x0a: fustr_write(&ctx->out, "\\n", 2); break; + case 0x0c: fustr_write(&ctx->out, "\\f", 2); break; + case 0x0d: fustr_write(&ctx->out, "\\r", 2); break; default: if (x < 0x80) { - buf = (unsigned char *)fustr_write_buf(out, 6); + buf = (unsigned char *)fustr_write_buf(&ctx->out, 6); memcpy(buf, "\\u00", 4); buf[4] = PL_hexdigit[(x >> 4) & 0x0f]; buf[5] = PL_hexdigit[x & 0x0f]; } else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */ - buf = (unsigned char *)fustr_write_buf(out, 2); + buf = (unsigned char *)fustr_write_buf(&ctx->out, 2); buf[0] = 0xc0 | (x >> 6); buf[1] = 0x80 | (x & 0x3f); } @@ -63,7 +68,7 @@ static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int u } } - fustr_write(out, "\"", 1); + fustr_write(&ctx->out, "\"", 1); } /* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */ @@ -79,7 +84,7 @@ static const char fujson_digits[] = "80818283848586878889" "90919293949596979899"; -static void fujson_fmt_int(pTHX_ fustr *out, SV *val) { +static void fujson_fmt_int(pTHX_ fujson_fmt_ctx *ctx, SV *val) { char buf[32]; char *r = buf+31; int neg = 0; @@ -95,7 +100,7 @@ static void fujson_fmt_int(pTHX_ fustr *out, SV *val) { } if (uv == 0) { - fustr_write(out, "0", 1); + fustr_write(&ctx->out, "0", 1); return; } @@ -106,39 +111,39 @@ static void fujson_fmt_int(pTHX_ fustr *out, SV *val) { } if (uv > 0) *(--r) = '0' + (uv % 10); if (neg) *(--r) = '-'; - fustr_write(out, r, 31 - (r - buf)); + fustr_write(&ctx->out, r, 31 - (r - buf)); } -static void fujson_fmt_av(pTHX_ fustr *out, AV *av) { +static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) { int i, len = av_count(av); - fustr_write(out, "[", 1); + fustr_write(&ctx->out, "[", 1); for (i=0; iout, ",", 1); SV **sv = av_fetch(av, i, 0); - if (sv) fujson_fmt(aTHX_ out, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */ - else fustr_write(out, "null", 4); + if (sv) fujson_fmt(aTHX_ ctx, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */ + else fustr_write(&ctx->out, "null", 4); } - fustr_write(out, "]", 1); + fustr_write(&ctx->out, "]", 1); } -static void fujson_fmt_hv(pTHX_ fustr *out, HV *hv) { +static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) { HE *he; STRLEN helen; char *hestr = NULL; hv_iterinit(hv); - fustr_write(out, "{", 1); + fustr_write(&ctx->out, "{", 1); while ((he = hv_iternext(hv))) { - if (hestr) fustr_write(out, ",", 1); + if (hestr) fustr_write(&ctx->out, ",", 1); hestr = HePV(he, helen); - fujson_fmt_str(aTHX_ out, hestr, helen, HeUTF8(he)); - fustr_write(out, ":", 1); - fujson_fmt(aTHX_ out, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he)); + fujson_fmt_str(aTHX_ ctx, hestr, helen, HeUTF8(he)); + fustr_write(&ctx->out, ":", 1); + fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he)); } - fustr_write(out, "}", 1); + fustr_write(&ctx->out, "}", 1); } -static void fujson_fmt_obj(pTHX_ fustr *out, SV *rv, SV *obj) { +static void fujson_fmt_obj(pTHX_ fujson_fmt_ctx *ctx, SV *rv, SV *obj) { dSP; GV *method = gv_fetchmethod_autoload(SvSTASH(obj), "TO_JSON", 0); @@ -162,41 +167,46 @@ static void fujson_fmt_obj(pTHX_ fustr *out, SV *rv, SV *obj) { obj = POPs; PUTBACK; - fujson_fmt(aTHX_ out, obj); + fujson_fmt(aTHX_ ctx, obj); FREETMPS; LEAVE; } -static void fujson_fmt(pTHX_ fustr *out, SV *val) { +static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) { SvGETMAGIC(val); /* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need * to explicitly check for those; does this work in 5.36 as well? */ if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */ - if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(out, "true", 4); - else fustr_write(out, "false", 5); + if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(&ctx->out, "true", 4); + else fustr_write(&ctx->out, "false", 5); } else if (SvPOKp(val)) { - fujson_fmt_str(aTHX_ out, SvPVX(val), SvCUR(val), SvUTF8(val)); + fujson_fmt_str(aTHX_ ctx, SvPVX(val), SvCUR(val), SvUTF8(val)); } else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */ NV nv = SvNV_nomg(val); if (isinfnan(nv)) croak("unable to format floating point NaN or Inf as JSON"); /* XXX: Cpanel::JSON::XS appears to always append a ".0" for round numbers, other modules do not. */ /* XXX#2: This doesn't support quadmath. Makefile.PL checks for that */ - fustr_reserve(out, NV_DIG+1); - Gconvert(nv, NV_DIG, 0, out->cur); - out->cur += strlen(out->cur); + fustr_reserve(&ctx->out, NV_DIG+1); + Gconvert(nv, NV_DIG, 0, ctx->out.cur); + ctx->out.cur += strlen(ctx->out.cur); } else if (SvIOKp(val)) { - fujson_fmt_int(aTHX_ out, val); + fujson_fmt_int(aTHX_ ctx, val); } else if (SvROK(val)) { + /* Simply consider every reference a form of nesting. TO_JSON may + * return a scalar, but it may also return another TO_JSON object and + * cause a stack overflow that way. */ + if (--ctx->depth == 0) croak("max_depth exceeded while formatting JSON"); SV *rv = SvRV(val); SvGETMAGIC(rv); - if (UNLIKELY(SvOBJECT(rv))) fujson_fmt_obj(aTHX_ out, val, rv); - else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(aTHX_ out, (HV *)rv); - else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(aTHX_ out, (AV *)rv); + if (UNLIKELY(SvOBJECT(rv))) fujson_fmt_obj(aTHX_ ctx, val, rv); + else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(aTHX_ ctx, (HV *)rv); + else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(aTHX_ ctx, (AV *)rv); else croak("unable to format reference '%s' as JSON", SvPV_nolen(val)); + ctx->depth++; } else if (!SvOK(val)) { - fustr_write(out, "null", 4); + fustr_write(&ctx->out, "null", 4); } else { croak("unable to format unknown value '%s' as JSON", SvPV_nolen(val)); } @@ -208,7 +218,10 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) { int encutf8 = 0; char *arg; SV *r; + fujson_fmt_ctx ctx; + ctx.out.maxlen = 0; + ctx.depth = 0; while (i < argc) { arg = SvPV_nolen(ST(i)); i++; @@ -216,21 +229,20 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) { r = ST(i); i++; - if (strcmp(arg, "utf8") == 0) { - encutf8 = SvPVXtrue(r); - } else { - croak("Unknown flag: '%s'", arg); - } + if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r); + else if (strcmp(arg, "max_size") == 0) ctx.out.maxlen = SvUV(r); + else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r); + else croak("Unknown flag: '%s'", arg); } + if (ctx.out.maxlen == 0) ctx.out.maxlen = 1<<30; + if (ctx.depth == 0) ctx.depth = 512; - fustr buf; - fustr_init(&buf, 128); - fujson_fmt(aTHX_ &buf, val); - r = fustr_done(&buf); + fustr_init(&ctx.out, 128, ctx.out.maxlen); + fujson_fmt(aTHX_ &ctx, val); + r = fustr_done(&ctx.out); if (!encutf8) SvUTF8_on(r); return r; } /* TODO: canonical */ /* TODO: pretty */ -/* TODO: max depth? */ diff --git a/t/json_format.t b/t/json_format.t index 8a6f014..03b20e7 100644 --- a/t/json_format.t +++ b/t/json_format.t @@ -68,7 +68,7 @@ my @errors = ( do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/, ); -plan tests => @tests*2 + @errors/2 + 6; +plan tests => @tests*2 + @errors/2 + 8; for my($in, $exp) (@tests) { my $out = json_format $in; @@ -87,6 +87,12 @@ for my ($in, $exp) (@errors) { } +eval { json_format [[]], max_depth => 2 }; +like $@, qr/max_depth exceeded while formatting JSON/; + +eval { json_format 'hello world', max_size => 8 }; +like $@, qr/maximum string length exceeded/; + # http://e-choroba.eu/18-yapc slide 6