jsonparse: Add max_depth, max_size and offset options

This completes all the functionality that I wanted from the JSON parser.
This commit is contained in:
Yorhel 2025-02-01 11:01:43 +01:00
parent abfbba3c10
commit 13eaeb1d4a
3 changed files with 105 additions and 9 deletions

View file

@ -1,6 +1,7 @@
typedef struct {
const unsigned char *buf;
const unsigned char *end;
UV depth;
} fujson_parse_ctx;
static SV *fujson_parse(pTHX_ fujson_parse_ctx *);
@ -153,6 +154,7 @@ static SV *fujson_parse_number(pTHX_ fujson_parse_ctx *ctx) {
static SV *fujson_parse_array(pTHX_ fujson_parse_ctx *ctx) {
AV *av = newAV();
SV *r;
if (--ctx->depth == 0) return NULL;
ctx->buf++; /* '[' */
fujson_parse_ws(aTHX_ ctx);
if (ctx->buf == ctx->end) goto err;
@ -168,6 +170,7 @@ static SV *fujson_parse_array(pTHX_ fujson_parse_ctx *ctx) {
}
done:
ctx->buf++; /* ']' */
ctx->depth++;
return newRV_noinc((SV *)av);
err:
SvREFCNT_dec((SV *)av);
@ -182,6 +185,7 @@ static SV *fujson_parse_obj(pTHX_ fujson_parse_ctx *ctx) {
fustr key;
fustr_init(&key, NULL, SIZE_MAX);
if (--ctx->depth == 0) return NULL;
ctx->buf++; /* '{' */
fujson_parse_ws(aTHX_ ctx);
if (ctx->buf == ctx->end) goto err;
@ -217,6 +221,7 @@ static SV *fujson_parse_obj(pTHX_ fujson_parse_ctx *ctx) {
done:
if (key.sv) SvREFCNT_dec(key.sv);
ctx->buf++; /* '}' */
ctx->depth++;
return newRV_noinc((SV *)hv);
err:
if (key.sv) SvREFCNT_dec(key.sv);
@ -257,10 +262,13 @@ static SV *fujson_parse_xs(pTHX_ I32 ax, I32 argc, SV *val) {
I32 i = 1;
char *arg;
SV *r;
SV *offset = NULL;
UV maxlen = 0;
int decutf8 = 0;
STRLEN buflen;
fujson_parse_ctx ctx;
ctx.depth = 0;
while (i < argc) {
arg = SvPV_nolen(ST(i));
i++;
@ -269,24 +277,40 @@ static SV *fujson_parse_xs(pTHX_ I32 ax, I32 argc, SV *val) {
i++;
if (strcmp(arg, "utf8") == 0) decutf8 = SvPVXtrue(r);
else if (strcmp(arg, "max_size") == 0) maxlen = SvUV(r);
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
else if (strcmp(arg, "offset") == 0) offset = r;
else croak("Unknown flag: '%s'", arg);
}
if (maxlen == 0) maxlen = 1<<30;
if (ctx.depth == 0) ctx.depth = 512;
arg = decutf8 ? SvPVbyte(val, buflen) : SvPVutf8(val, buflen);
ctx.buf = (const unsigned char *)arg;
ctx.end = ctx.buf + buflen;
if (offset) {
if (!SvROK(offset)) croak("Offset must be a reference to a scalar");
offset = SvRV(offset);
if (!looks_like_number(offset) || SvIV(offset) < 0) croak("Offset must be a positive integer");
if (SvUV(offset) >= buflen) croak("Offset too large");
ctx.buf += SvUV(offset);
if ((UV)(ctx.end - ctx.buf) > maxlen) ctx.end = ctx.buf + maxlen;
} else if ((UV)(ctx.end - ctx.buf) > maxlen)
croak("Input string is larger than max_size");
r = fujson_parse(aTHX_ &ctx);
if (!r) croak("JSON parsing failed at offset %"UVuf, (UV)((char *)ctx.buf - arg));
fujson_parse_ws(aTHX_ &ctx);
if (ctx.buf != ctx.end) {
if (offset) {
if (ctx.buf == ctx.end) sv_set_undef(offset);
else SvUV_set(offset, (UV)((char *)ctx.buf - arg));
} else if (ctx.buf != ctx.end) {
SvREFCNT_dec(r);
croak("garbage after JSON value at offset %"UVuf, (UV)((char *)ctx.buf - arg));
}
return sv_2mortal(r);
}
/* TODO: incremental parsing (accept & return a byte offset) */
/* TODO: max_depth & max_size */