jsonfmt: Add max_size and max_depth options
This commit is contained in:
parent
a85ff98914
commit
163a60b4ba
4 changed files with 101 additions and 59 deletions
11
c/common.c
11
c/common.c
|
|
@ -4,24 +4,29 @@ typedef struct {
|
|||
SV *sv;
|
||||
char *cur;
|
||||
char *end;
|
||||
size_t maxlen;
|
||||
} fustr;
|
||||
|
||||
static void fustr_init_(pTHX_ fustr *s, size_t prealloc) {
|
||||
static void fustr_init_(pTHX_ fustr *s, size_t prealloc, size_t maxlen) {
|
||||
if (prealloc > maxlen) prealloc = maxlen;
|
||||
s->sv = sv_2mortal(newSV(prealloc));
|
||||
SvPOK_only(s->sv);
|
||||
s->cur = SvPVX(s->sv);
|
||||
s->end = SvEND(s->sv);
|
||||
s->maxlen = maxlen;
|
||||
}
|
||||
|
||||
static void fustr_grow(pTHX_ fustr *s, size_t add) {
|
||||
size_t off = s->cur - SvPVX(s->sv);
|
||||
size_t newlen = 64;
|
||||
add += off;
|
||||
if (add > s->maxlen) croak("maximum string length exceeded");
|
||||
/* Increment to next power of two; SvGROW's default strategy is slow */
|
||||
while (newlen < add) newlen <<= 1;
|
||||
if (newlen > s->maxlen) newlen = s->maxlen;
|
||||
char *buf = SvGROW(s->sv, newlen);
|
||||
s->cur = buf + off;
|
||||
s->end = buf + SvLEN(s->sv);
|
||||
s->end = buf + (SvLEN(s->sv) > s->maxlen ? s->maxlen : SvLEN(s->sv));
|
||||
}
|
||||
|
||||
static inline void fustr_reserve_(pTHX_ fustr *s, size_t add) {
|
||||
|
|
@ -50,7 +55,7 @@ static SV *fustr_done_(pTHX_ fustr *s) {
|
|||
return s->sv;
|
||||
}
|
||||
|
||||
#define fustr_init(a,b) fustr_init_(aTHX_ a,b)
|
||||
#define fustr_init(a,b,c) fustr_init_(aTHX_ a,b,c)
|
||||
#define fustr_reserve(a,b) fustr_reserve_(aTHX_ a,b)
|
||||
#define fustr_write(a,b,c) fustr_write_(aTHX_ a,b,c)
|
||||
#define fustr_write_buf(a,b) fustr_write_buf_(aTHX_ a,b)
|
||||
|
|
|
|||
122
c/jsonfmt.c
122
c/jsonfmt.c
|
|
@ -1,6 +1,11 @@
|
|||
static void fujson_fmt(pTHX_ fustr *, SV *);
|
||||
typedef struct {
|
||||
fustr out;
|
||||
UV depth;
|
||||
} fujson_fmt_ctx;
|
||||
|
||||
static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int utf8) {
|
||||
static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *);
|
||||
|
||||
static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t len, int utf8) {
|
||||
size_t off = 0, loff;
|
||||
const unsigned char *str = (const unsigned char *)stri;
|
||||
unsigned char *buf;
|
||||
|
|
@ -15,8 +20,8 @@ static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int u
|
|||
croak("invalid codepoint encountered in string, cannot format to JSON");
|
||||
}
|
||||
|
||||
fustr_write(out, "\"", 1);
|
||||
fustr_reserve(out, len);
|
||||
fustr_write(&ctx->out, "\"", 1);
|
||||
fustr_reserve(&ctx->out, len);
|
||||
|
||||
while (off < len) {
|
||||
/* Fast path: no escaping needed */
|
||||
|
|
@ -36,25 +41,25 @@ static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int u
|
|||
off++;
|
||||
}
|
||||
}
|
||||
fustr_write(out, (char *)str+loff, off-loff);
|
||||
fustr_write(&ctx->out, (char *)str+loff, off-loff);
|
||||
|
||||
if (off < len) { /* early break, which means current byte needs special processing */
|
||||
switch (x) {
|
||||
case '"': fustr_write(out, "\\\"", 2); break;
|
||||
case '\\': fustr_write(out, "\\\\", 2); break;
|
||||
case 0x08: fustr_write(out, "\\b", 2); break;
|
||||
case 0x09: fustr_write(out, "\\t", 2); break;
|
||||
case 0x0a: fustr_write(out, "\\n", 2); break;
|
||||
case 0x0c: fustr_write(out, "\\f", 2); break;
|
||||
case 0x0d: fustr_write(out, "\\r", 2); break;
|
||||
case '"': fustr_write(&ctx->out, "\\\"", 2); break;
|
||||
case '\\': fustr_write(&ctx->out, "\\\\", 2); break;
|
||||
case 0x08: fustr_write(&ctx->out, "\\b", 2); break;
|
||||
case 0x09: fustr_write(&ctx->out, "\\t", 2); break;
|
||||
case 0x0a: fustr_write(&ctx->out, "\\n", 2); break;
|
||||
case 0x0c: fustr_write(&ctx->out, "\\f", 2); break;
|
||||
case 0x0d: fustr_write(&ctx->out, "\\r", 2); break;
|
||||
default:
|
||||
if (x < 0x80) {
|
||||
buf = (unsigned char *)fustr_write_buf(out, 6);
|
||||
buf = (unsigned char *)fustr_write_buf(&ctx->out, 6);
|
||||
memcpy(buf, "\\u00", 4);
|
||||
buf[4] = PL_hexdigit[(x >> 4) & 0x0f];
|
||||
buf[5] = PL_hexdigit[x & 0x0f];
|
||||
} else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */
|
||||
buf = (unsigned char *)fustr_write_buf(out, 2);
|
||||
buf = (unsigned char *)fustr_write_buf(&ctx->out, 2);
|
||||
buf[0] = 0xc0 | (x >> 6);
|
||||
buf[1] = 0x80 | (x & 0x3f);
|
||||
}
|
||||
|
|
@ -63,7 +68,7 @@ static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int u
|
|||
}
|
||||
}
|
||||
|
||||
fustr_write(out, "\"", 1);
|
||||
fustr_write(&ctx->out, "\"", 1);
|
||||
}
|
||||
|
||||
/* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */
|
||||
|
|
@ -79,7 +84,7 @@ static const char fujson_digits[] =
|
|||
"80818283848586878889"
|
||||
"90919293949596979899";
|
||||
|
||||
static void fujson_fmt_int(pTHX_ fustr *out, SV *val) {
|
||||
static void fujson_fmt_int(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
|
||||
char buf[32];
|
||||
char *r = buf+31;
|
||||
int neg = 0;
|
||||
|
|
@ -95,7 +100,7 @@ static void fujson_fmt_int(pTHX_ fustr *out, SV *val) {
|
|||
}
|
||||
|
||||
if (uv == 0) {
|
||||
fustr_write(out, "0", 1);
|
||||
fustr_write(&ctx->out, "0", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -106,39 +111,39 @@ static void fujson_fmt_int(pTHX_ fustr *out, SV *val) {
|
|||
}
|
||||
if (uv > 0) *(--r) = '0' + (uv % 10);
|
||||
if (neg) *(--r) = '-';
|
||||
fustr_write(out, r, 31 - (r - buf));
|
||||
fustr_write(&ctx->out, r, 31 - (r - buf));
|
||||
}
|
||||
|
||||
static void fujson_fmt_av(pTHX_ fustr *out, AV *av) {
|
||||
static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) {
|
||||
int i, len = av_count(av);
|
||||
fustr_write(out, "[", 1);
|
||||
fustr_write(&ctx->out, "[", 1);
|
||||
for (i=0; i<len; i++) {
|
||||
if (i) fustr_write(out, ",", 1);
|
||||
if (i) fustr_write(&ctx->out, ",", 1);
|
||||
SV **sv = av_fetch(av, i, 0);
|
||||
if (sv) fujson_fmt(aTHX_ out, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */
|
||||
else fustr_write(out, "null", 4);
|
||||
if (sv) fujson_fmt(aTHX_ ctx, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */
|
||||
else fustr_write(&ctx->out, "null", 4);
|
||||
}
|
||||
fustr_write(out, "]", 1);
|
||||
fustr_write(&ctx->out, "]", 1);
|
||||
}
|
||||
|
||||
static void fujson_fmt_hv(pTHX_ fustr *out, HV *hv) {
|
||||
static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) {
|
||||
HE *he;
|
||||
STRLEN helen;
|
||||
char *hestr = NULL;
|
||||
|
||||
hv_iterinit(hv);
|
||||
fustr_write(out, "{", 1);
|
||||
fustr_write(&ctx->out, "{", 1);
|
||||
while ((he = hv_iternext(hv))) {
|
||||
if (hestr) fustr_write(out, ",", 1);
|
||||
if (hestr) fustr_write(&ctx->out, ",", 1);
|
||||
hestr = HePV(he, helen);
|
||||
fujson_fmt_str(aTHX_ out, hestr, helen, HeUTF8(he));
|
||||
fustr_write(out, ":", 1);
|
||||
fujson_fmt(aTHX_ out, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
|
||||
fujson_fmt_str(aTHX_ ctx, hestr, helen, HeUTF8(he));
|
||||
fustr_write(&ctx->out, ":", 1);
|
||||
fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
|
||||
}
|
||||
fustr_write(out, "}", 1);
|
||||
fustr_write(&ctx->out, "}", 1);
|
||||
}
|
||||
|
||||
static void fujson_fmt_obj(pTHX_ fustr *out, SV *rv, SV *obj) {
|
||||
static void fujson_fmt_obj(pTHX_ fujson_fmt_ctx *ctx, SV *rv, SV *obj) {
|
||||
dSP;
|
||||
|
||||
GV *method = gv_fetchmethod_autoload(SvSTASH(obj), "TO_JSON", 0);
|
||||
|
|
@ -162,41 +167,46 @@ static void fujson_fmt_obj(pTHX_ fustr *out, SV *rv, SV *obj) {
|
|||
|
||||
obj = POPs;
|
||||
PUTBACK;
|
||||
fujson_fmt(aTHX_ out, obj);
|
||||
fujson_fmt(aTHX_ ctx, obj);
|
||||
|
||||
FREETMPS;
|
||||
LEAVE;
|
||||
}
|
||||
|
||||
static void fujson_fmt(pTHX_ fustr *out, SV *val) {
|
||||
static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
|
||||
SvGETMAGIC(val);
|
||||
|
||||
/* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need
|
||||
* to explicitly check for those; does this work in 5.36 as well? */
|
||||
if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */
|
||||
if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(out, "true", 4);
|
||||
else fustr_write(out, "false", 5);
|
||||
if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(&ctx->out, "true", 4);
|
||||
else fustr_write(&ctx->out, "false", 5);
|
||||
} else if (SvPOKp(val)) {
|
||||
fujson_fmt_str(aTHX_ out, SvPVX(val), SvCUR(val), SvUTF8(val));
|
||||
fujson_fmt_str(aTHX_ ctx, SvPVX(val), SvCUR(val), SvUTF8(val));
|
||||
} else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */
|
||||
NV nv = SvNV_nomg(val);
|
||||
if (isinfnan(nv)) croak("unable to format floating point NaN or Inf as JSON");
|
||||
/* XXX: Cpanel::JSON::XS appears to always append a ".0" for round numbers, other modules do not. */
|
||||
/* XXX#2: This doesn't support quadmath. Makefile.PL checks for that */
|
||||
fustr_reserve(out, NV_DIG+1);
|
||||
Gconvert(nv, NV_DIG, 0, out->cur);
|
||||
out->cur += strlen(out->cur);
|
||||
fustr_reserve(&ctx->out, NV_DIG+1);
|
||||
Gconvert(nv, NV_DIG, 0, ctx->out.cur);
|
||||
ctx->out.cur += strlen(ctx->out.cur);
|
||||
} else if (SvIOKp(val)) {
|
||||
fujson_fmt_int(aTHX_ out, val);
|
||||
fujson_fmt_int(aTHX_ ctx, val);
|
||||
} else if (SvROK(val)) {
|
||||
/* Simply consider every reference a form of nesting. TO_JSON may
|
||||
* return a scalar, but it may also return another TO_JSON object and
|
||||
* cause a stack overflow that way. */
|
||||
if (--ctx->depth == 0) croak("max_depth exceeded while formatting JSON");
|
||||
SV *rv = SvRV(val);
|
||||
SvGETMAGIC(rv);
|
||||
if (UNLIKELY(SvOBJECT(rv))) fujson_fmt_obj(aTHX_ out, val, rv);
|
||||
else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(aTHX_ out, (HV *)rv);
|
||||
else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(aTHX_ out, (AV *)rv);
|
||||
if (UNLIKELY(SvOBJECT(rv))) fujson_fmt_obj(aTHX_ ctx, val, rv);
|
||||
else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(aTHX_ ctx, (HV *)rv);
|
||||
else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(aTHX_ ctx, (AV *)rv);
|
||||
else croak("unable to format reference '%s' as JSON", SvPV_nolen(val));
|
||||
ctx->depth++;
|
||||
} else if (!SvOK(val)) {
|
||||
fustr_write(out, "null", 4);
|
||||
fustr_write(&ctx->out, "null", 4);
|
||||
} else {
|
||||
croak("unable to format unknown value '%s' as JSON", SvPV_nolen(val));
|
||||
}
|
||||
|
|
@ -208,7 +218,10 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
|||
int encutf8 = 0;
|
||||
char *arg;
|
||||
SV *r;
|
||||
fujson_fmt_ctx ctx;
|
||||
|
||||
ctx.out.maxlen = 0;
|
||||
ctx.depth = 0;
|
||||
while (i < argc) {
|
||||
arg = SvPV_nolen(ST(i));
|
||||
i++;
|
||||
|
|
@ -216,21 +229,20 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
|||
r = ST(i);
|
||||
i++;
|
||||
|
||||
if (strcmp(arg, "utf8") == 0) {
|
||||
encutf8 = SvPVXtrue(r);
|
||||
} else {
|
||||
croak("Unknown flag: '%s'", arg);
|
||||
}
|
||||
if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
|
||||
else if (strcmp(arg, "max_size") == 0) ctx.out.maxlen = SvUV(r);
|
||||
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
|
||||
else croak("Unknown flag: '%s'", arg);
|
||||
}
|
||||
if (ctx.out.maxlen == 0) ctx.out.maxlen = 1<<30;
|
||||
if (ctx.depth == 0) ctx.depth = 512;
|
||||
|
||||
fustr buf;
|
||||
fustr_init(&buf, 128);
|
||||
fujson_fmt(aTHX_ &buf, val);
|
||||
r = fustr_done(&buf);
|
||||
fustr_init(&ctx.out, 128, ctx.out.maxlen);
|
||||
fujson_fmt(aTHX_ &ctx, val);
|
||||
r = fustr_done(&ctx.out);
|
||||
if (!encutf8) SvUTF8_on(r);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* TODO: canonical */
|
||||
/* TODO: pretty */
|
||||
/* TODO: max depth? */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue