jsonfmt: Add max_size and max_depth options

This commit is contained in:
Yorhel 2025-01-29 15:05:48 +01:00
parent a85ff98914
commit 163a60b4ba
4 changed files with 101 additions and 59 deletions

View file

@ -45,6 +45,25 @@ be sufficient. The following is probably an improvement:
json_format($data) =~ s{</}{<\\/}rg =~ s/<!--/<\\u0021--/rg; json_format($data) =~ s{</}{<\\/}rg =~ s/<!--/<\\u0021--/rg;
The following C<%options> are supported:
=over
=item utf8
When set to a true value, returns a UTF-8 encoded byte string instead of a Perl
Unicode string.
=item max_size
Maximum permitted size, in bytes, of the generated JSON string. Defaults to 1 GiB.
=item max_depth
Maximum permitted nesting depth of Perl values. Defaults to 512.
=back
=back =back
(Why the hell yet another JSON codec when CPAN is already full of them!? Well, (Why the hell yet another JSON codec when CPAN is already full of them!? Well,

View file

@ -4,24 +4,29 @@ typedef struct {
SV *sv; SV *sv;
char *cur; char *cur;
char *end; char *end;
size_t maxlen;
} fustr; } fustr;
static void fustr_init_(pTHX_ fustr *s, size_t prealloc) { static void fustr_init_(pTHX_ fustr *s, size_t prealloc, size_t maxlen) {
if (prealloc > maxlen) prealloc = maxlen;
s->sv = sv_2mortal(newSV(prealloc)); s->sv = sv_2mortal(newSV(prealloc));
SvPOK_only(s->sv); SvPOK_only(s->sv);
s->cur = SvPVX(s->sv); s->cur = SvPVX(s->sv);
s->end = SvEND(s->sv); s->end = SvEND(s->sv);
s->maxlen = maxlen;
} }
static void fustr_grow(pTHX_ fustr *s, size_t add) { static void fustr_grow(pTHX_ fustr *s, size_t add) {
size_t off = s->cur - SvPVX(s->sv); size_t off = s->cur - SvPVX(s->sv);
size_t newlen = 64; size_t newlen = 64;
add += off; add += off;
if (add > s->maxlen) croak("maximum string length exceeded");
/* Increment to next power of two; SvGROW's default strategy is slow */ /* Increment to next power of two; SvGROW's default strategy is slow */
while (newlen < add) newlen <<= 1; while (newlen < add) newlen <<= 1;
if (newlen > s->maxlen) newlen = s->maxlen;
char *buf = SvGROW(s->sv, newlen); char *buf = SvGROW(s->sv, newlen);
s->cur = buf + off; s->cur = buf + off;
s->end = buf + SvLEN(s->sv); s->end = buf + (SvLEN(s->sv) > s->maxlen ? s->maxlen : SvLEN(s->sv));
} }
static inline void fustr_reserve_(pTHX_ fustr *s, size_t add) { static inline void fustr_reserve_(pTHX_ fustr *s, size_t add) {
@ -50,7 +55,7 @@ static SV *fustr_done_(pTHX_ fustr *s) {
return s->sv; return s->sv;
} }
#define fustr_init(a,b) fustr_init_(aTHX_ a,b) #define fustr_init(a,b,c) fustr_init_(aTHX_ a,b,c)
#define fustr_reserve(a,b) fustr_reserve_(aTHX_ a,b) #define fustr_reserve(a,b) fustr_reserve_(aTHX_ a,b)
#define fustr_write(a,b,c) fustr_write_(aTHX_ a,b,c) #define fustr_write(a,b,c) fustr_write_(aTHX_ a,b,c)
#define fustr_write_buf(a,b) fustr_write_buf_(aTHX_ a,b) #define fustr_write_buf(a,b) fustr_write_buf_(aTHX_ a,b)

View file

@ -1,6 +1,11 @@
static void fujson_fmt(pTHX_ fustr *, SV *); typedef struct {
fustr out;
UV depth;
} fujson_fmt_ctx;
static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int utf8) { static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *);
static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t len, int utf8) {
size_t off = 0, loff; size_t off = 0, loff;
const unsigned char *str = (const unsigned char *)stri; const unsigned char *str = (const unsigned char *)stri;
unsigned char *buf; unsigned char *buf;
@ -15,8 +20,8 @@ static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int u
croak("invalid codepoint encountered in string, cannot format to JSON"); croak("invalid codepoint encountered in string, cannot format to JSON");
} }
fustr_write(out, "\"", 1); fustr_write(&ctx->out, "\"", 1);
fustr_reserve(out, len); fustr_reserve(&ctx->out, len);
while (off < len) { while (off < len) {
/* Fast path: no escaping needed */ /* Fast path: no escaping needed */
@ -36,25 +41,25 @@ static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int u
off++; off++;
} }
} }
fustr_write(out, (char *)str+loff, off-loff); fustr_write(&ctx->out, (char *)str+loff, off-loff);
if (off < len) { /* early break, which means current byte needs special processing */ if (off < len) { /* early break, which means current byte needs special processing */
switch (x) { switch (x) {
case '"': fustr_write(out, "\\\"", 2); break; case '"': fustr_write(&ctx->out, "\\\"", 2); break;
case '\\': fustr_write(out, "\\\\", 2); break; case '\\': fustr_write(&ctx->out, "\\\\", 2); break;
case 0x08: fustr_write(out, "\\b", 2); break; case 0x08: fustr_write(&ctx->out, "\\b", 2); break;
case 0x09: fustr_write(out, "\\t", 2); break; case 0x09: fustr_write(&ctx->out, "\\t", 2); break;
case 0x0a: fustr_write(out, "\\n", 2); break; case 0x0a: fustr_write(&ctx->out, "\\n", 2); break;
case 0x0c: fustr_write(out, "\\f", 2); break; case 0x0c: fustr_write(&ctx->out, "\\f", 2); break;
case 0x0d: fustr_write(out, "\\r", 2); break; case 0x0d: fustr_write(&ctx->out, "\\r", 2); break;
default: default:
if (x < 0x80) { if (x < 0x80) {
buf = (unsigned char *)fustr_write_buf(out, 6); buf = (unsigned char *)fustr_write_buf(&ctx->out, 6);
memcpy(buf, "\\u00", 4); memcpy(buf, "\\u00", 4);
buf[4] = PL_hexdigit[(x >> 4) & 0x0f]; buf[4] = PL_hexdigit[(x >> 4) & 0x0f];
buf[5] = PL_hexdigit[x & 0x0f]; buf[5] = PL_hexdigit[x & 0x0f];
} else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */ } else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */
buf = (unsigned char *)fustr_write_buf(out, 2); buf = (unsigned char *)fustr_write_buf(&ctx->out, 2);
buf[0] = 0xc0 | (x >> 6); buf[0] = 0xc0 | (x >> 6);
buf[1] = 0x80 | (x & 0x3f); buf[1] = 0x80 | (x & 0x3f);
} }
@ -63,7 +68,7 @@ static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int u
} }
} }
fustr_write(out, "\"", 1); fustr_write(&ctx->out, "\"", 1);
} }
/* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */ /* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */
@ -79,7 +84,7 @@ static const char fujson_digits[] =
"80818283848586878889" "80818283848586878889"
"90919293949596979899"; "90919293949596979899";
static void fujson_fmt_int(pTHX_ fustr *out, SV *val) { static void fujson_fmt_int(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
char buf[32]; char buf[32];
char *r = buf+31; char *r = buf+31;
int neg = 0; int neg = 0;
@ -95,7 +100,7 @@ static void fujson_fmt_int(pTHX_ fustr *out, SV *val) {
} }
if (uv == 0) { if (uv == 0) {
fustr_write(out, "0", 1); fustr_write(&ctx->out, "0", 1);
return; return;
} }
@ -106,39 +111,39 @@ static void fujson_fmt_int(pTHX_ fustr *out, SV *val) {
} }
if (uv > 0) *(--r) = '0' + (uv % 10); if (uv > 0) *(--r) = '0' + (uv % 10);
if (neg) *(--r) = '-'; if (neg) *(--r) = '-';
fustr_write(out, r, 31 - (r - buf)); fustr_write(&ctx->out, r, 31 - (r - buf));
} }
static void fujson_fmt_av(pTHX_ fustr *out, AV *av) { static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) {
int i, len = av_count(av); int i, len = av_count(av);
fustr_write(out, "[", 1); fustr_write(&ctx->out, "[", 1);
for (i=0; i<len; i++) { for (i=0; i<len; i++) {
if (i) fustr_write(out, ",", 1); if (i) fustr_write(&ctx->out, ",", 1);
SV **sv = av_fetch(av, i, 0); SV **sv = av_fetch(av, i, 0);
if (sv) fujson_fmt(aTHX_ out, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */ if (sv) fujson_fmt(aTHX_ ctx, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */
else fustr_write(out, "null", 4); else fustr_write(&ctx->out, "null", 4);
} }
fustr_write(out, "]", 1); fustr_write(&ctx->out, "]", 1);
} }
static void fujson_fmt_hv(pTHX_ fustr *out, HV *hv) { static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) {
HE *he; HE *he;
STRLEN helen; STRLEN helen;
char *hestr = NULL; char *hestr = NULL;
hv_iterinit(hv); hv_iterinit(hv);
fustr_write(out, "{", 1); fustr_write(&ctx->out, "{", 1);
while ((he = hv_iternext(hv))) { while ((he = hv_iternext(hv))) {
if (hestr) fustr_write(out, ",", 1); if (hestr) fustr_write(&ctx->out, ",", 1);
hestr = HePV(he, helen); hestr = HePV(he, helen);
fujson_fmt_str(aTHX_ out, hestr, helen, HeUTF8(he)); fujson_fmt_str(aTHX_ ctx, hestr, helen, HeUTF8(he));
fustr_write(out, ":", 1); fustr_write(&ctx->out, ":", 1);
fujson_fmt(aTHX_ out, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he)); fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
} }
fustr_write(out, "}", 1); fustr_write(&ctx->out, "}", 1);
} }
static void fujson_fmt_obj(pTHX_ fustr *out, SV *rv, SV *obj) { static void fujson_fmt_obj(pTHX_ fujson_fmt_ctx *ctx, SV *rv, SV *obj) {
dSP; dSP;
GV *method = gv_fetchmethod_autoload(SvSTASH(obj), "TO_JSON", 0); GV *method = gv_fetchmethod_autoload(SvSTASH(obj), "TO_JSON", 0);
@ -162,41 +167,46 @@ static void fujson_fmt_obj(pTHX_ fustr *out, SV *rv, SV *obj) {
obj = POPs; obj = POPs;
PUTBACK; PUTBACK;
fujson_fmt(aTHX_ out, obj); fujson_fmt(aTHX_ ctx, obj);
FREETMPS; FREETMPS;
LEAVE; LEAVE;
} }
static void fujson_fmt(pTHX_ fustr *out, SV *val) { static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
SvGETMAGIC(val); SvGETMAGIC(val);
/* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need /* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need
* to explicitly check for those; does this work in 5.36 as well? */ * to explicitly check for those; does this work in 5.36 as well? */
if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */ if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */
if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(out, "true", 4); if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(&ctx->out, "true", 4);
else fustr_write(out, "false", 5); else fustr_write(&ctx->out, "false", 5);
} else if (SvPOKp(val)) { } else if (SvPOKp(val)) {
fujson_fmt_str(aTHX_ out, SvPVX(val), SvCUR(val), SvUTF8(val)); fujson_fmt_str(aTHX_ ctx, SvPVX(val), SvCUR(val), SvUTF8(val));
} else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */ } else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */
NV nv = SvNV_nomg(val); NV nv = SvNV_nomg(val);
if (isinfnan(nv)) croak("unable to format floating point NaN or Inf as JSON"); if (isinfnan(nv)) croak("unable to format floating point NaN or Inf as JSON");
/* XXX: Cpanel::JSON::XS appears to always append a ".0" for round numbers, other modules do not. */ /* XXX: Cpanel::JSON::XS appears to always append a ".0" for round numbers, other modules do not. */
/* XXX#2: This doesn't support quadmath. Makefile.PL checks for that */ /* XXX#2: This doesn't support quadmath. Makefile.PL checks for that */
fustr_reserve(out, NV_DIG+1); fustr_reserve(&ctx->out, NV_DIG+1);
Gconvert(nv, NV_DIG, 0, out->cur); Gconvert(nv, NV_DIG, 0, ctx->out.cur);
out->cur += strlen(out->cur); ctx->out.cur += strlen(ctx->out.cur);
} else if (SvIOKp(val)) { } else if (SvIOKp(val)) {
fujson_fmt_int(aTHX_ out, val); fujson_fmt_int(aTHX_ ctx, val);
} else if (SvROK(val)) { } else if (SvROK(val)) {
/* Simply consider every reference a form of nesting. TO_JSON may
* return a scalar, but it may also return another TO_JSON object and
* cause a stack overflow that way. */
if (--ctx->depth == 0) croak("max_depth exceeded while formatting JSON");
SV *rv = SvRV(val); SV *rv = SvRV(val);
SvGETMAGIC(rv); SvGETMAGIC(rv);
if (UNLIKELY(SvOBJECT(rv))) fujson_fmt_obj(aTHX_ out, val, rv); if (UNLIKELY(SvOBJECT(rv))) fujson_fmt_obj(aTHX_ ctx, val, rv);
else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(aTHX_ out, (HV *)rv); else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(aTHX_ ctx, (HV *)rv);
else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(aTHX_ out, (AV *)rv); else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(aTHX_ ctx, (AV *)rv);
else croak("unable to format reference '%s' as JSON", SvPV_nolen(val)); else croak("unable to format reference '%s' as JSON", SvPV_nolen(val));
ctx->depth++;
} else if (!SvOK(val)) { } else if (!SvOK(val)) {
fustr_write(out, "null", 4); fustr_write(&ctx->out, "null", 4);
} else { } else {
croak("unable to format unknown value '%s' as JSON", SvPV_nolen(val)); croak("unable to format unknown value '%s' as JSON", SvPV_nolen(val));
} }
@ -208,7 +218,10 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
int encutf8 = 0; int encutf8 = 0;
char *arg; char *arg;
SV *r; SV *r;
fujson_fmt_ctx ctx;
ctx.out.maxlen = 0;
ctx.depth = 0;
while (i < argc) { while (i < argc) {
arg = SvPV_nolen(ST(i)); arg = SvPV_nolen(ST(i));
i++; i++;
@ -216,21 +229,20 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
r = ST(i); r = ST(i);
i++; i++;
if (strcmp(arg, "utf8") == 0) { if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
encutf8 = SvPVXtrue(r); else if (strcmp(arg, "max_size") == 0) ctx.out.maxlen = SvUV(r);
} else { else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
croak("Unknown flag: '%s'", arg); else croak("Unknown flag: '%s'", arg);
}
} }
if (ctx.out.maxlen == 0) ctx.out.maxlen = 1<<30;
if (ctx.depth == 0) ctx.depth = 512;
fustr buf; fustr_init(&ctx.out, 128, ctx.out.maxlen);
fustr_init(&buf, 128); fujson_fmt(aTHX_ &ctx, val);
fujson_fmt(aTHX_ &buf, val); r = fustr_done(&ctx.out);
r = fustr_done(&buf);
if (!encutf8) SvUTF8_on(r); if (!encutf8) SvUTF8_on(r);
return r; return r;
} }
/* TODO: canonical */ /* TODO: canonical */
/* TODO: pretty */ /* TODO: pretty */
/* TODO: max depth? */

View file

@ -68,7 +68,7 @@ my @errors = (
do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/, do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/,
); );
plan tests => @tests*2 + @errors/2 + 6; plan tests => @tests*2 + @errors/2 + 8;
for my($in, $exp) (@tests) { for my($in, $exp) (@tests) {
my $out = json_format $in; my $out = json_format $in;
@ -87,6 +87,12 @@ for my ($in, $exp) (@errors) {
} }
eval { json_format [[]], max_depth => 2 };
like $@, qr/max_depth exceeded while formatting JSON/;
eval { json_format 'hello world', max_size => 8 };
like $@, qr/maximum string length exceeded/;
# http://e-choroba.eu/18-yapc slide 6 # http://e-choroba.eu/18-yapc slide 6