jsonparse: Add max_depth, max_size and offset options
This completes all the functionality that I wanted from the JSON parser.
This commit is contained in:
parent
abfbba3c10
commit
13eaeb1d4a
3 changed files with 105 additions and 9 deletions
40
FU/Util.pm
40
FU/Util.pm
|
|
@ -18,9 +18,9 @@ doesn't believe in the concept of a "batteries included" standard library.
|
|||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use FU::Util qw/json_format/;
|
||||
use FU::Util qw/json_format/;
|
||||
|
||||
my $data = json_format [1, 2, 3];
|
||||
my $data = json_format [1, 2, 3];
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
|
|
@ -51,7 +51,12 @@ to format a floating point C<NaN> or C<Inf> results in an error.
|
|||
Parse a JSON string and return a Perl value. With the default options, this
|
||||
function is roughly similar to:
|
||||
|
||||
JSON::PP->new->allow_nonref->core_bools-decode($string);
|
||||
JSON::PP->new->allow_nonref->core_bools-decode($string);
|
||||
|
||||
Croaks on invalid JSON, but the error messages are not super useful. This
|
||||
function also throws an error on JSON objects with duplicate keys, which is
|
||||
consistent with the default behavior of L<Cpanel::JSON::XS> but inconsistent
|
||||
with other modules.
|
||||
|
||||
Supported C<%options>:
|
||||
|
||||
|
|
@ -62,6 +67,31 @@ Supported C<%options>:
|
|||
Boolean, interpret the input C<$string> as a UTF-8 encoded byte string instead
|
||||
of a Perl Unicode string.
|
||||
|
||||
=item max_depth
|
||||
|
||||
Maximum permitted nesting depth of arrays and objects. Defaults to 512.
|
||||
|
||||
=item max_size
|
||||
|
||||
Throw an error if the JSON data is larger than the given size in bytes.
|
||||
Defaults to 1 GiB.
|
||||
|
||||
=item offset
|
||||
|
||||
Takes a reference to a scalar that indicates from which byte offset in
|
||||
C<$string> to start parsing. On success, the offset is updated to point to the
|
||||
next non-whitespace character or C<undef> if the string has been fully
|
||||
consumed.
|
||||
|
||||
This option can be used to parse a stream of JSON values:
|
||||
|
||||
my $data = '{"obj":1}{"obj":2}';
|
||||
my $offset = 0;
|
||||
my $obj1 = json_parse($data, offset => \$offset);
|
||||
# $obj1 = {obj=>1}; $offset = 9;
|
||||
my $obj2 = json_parse($data, offset => \$offset);
|
||||
# $obj2 = {obj=>2}; $offset = undef;
|
||||
|
||||
=back
|
||||
|
||||
|
||||
|
|
@ -70,14 +100,14 @@ of a Perl Unicode string.
|
|||
Format a Perl value as JSON. With the default options, this function behaves
|
||||
roughly similar to:
|
||||
|
||||
JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar);
|
||||
JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar);
|
||||
|
||||
Some modules escape the slash character in encoded strings to prevent a
|
||||
potential XSS vulnerability when embedding JSON inside C<< <script> ..
|
||||
</script> >> tags. This function does I<not> do that because it might not even
|
||||
be sufficient. The following is probably an improvement:
|
||||
|
||||
json_format($data) =~ s{</}{<\\/}rg =~ s/<!--/<\\u0021--/rg;
|
||||
json_format($data) =~ s{</}{<\\/}rg =~ s/<!--/<\\u0021--/rg;
|
||||
|
||||
The following C<%options> are supported:
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
typedef struct {
|
||||
const unsigned char *buf;
|
||||
const unsigned char *end;
|
||||
UV depth;
|
||||
} fujson_parse_ctx;
|
||||
|
||||
static SV *fujson_parse(pTHX_ fujson_parse_ctx *);
|
||||
|
|
@ -153,6 +154,7 @@ static SV *fujson_parse_number(pTHX_ fujson_parse_ctx *ctx) {
|
|||
static SV *fujson_parse_array(pTHX_ fujson_parse_ctx *ctx) {
|
||||
AV *av = newAV();
|
||||
SV *r;
|
||||
if (--ctx->depth == 0) return NULL;
|
||||
ctx->buf++; /* '[' */
|
||||
fujson_parse_ws(aTHX_ ctx);
|
||||
if (ctx->buf == ctx->end) goto err;
|
||||
|
|
@ -168,6 +170,7 @@ static SV *fujson_parse_array(pTHX_ fujson_parse_ctx *ctx) {
|
|||
}
|
||||
done:
|
||||
ctx->buf++; /* ']' */
|
||||
ctx->depth++;
|
||||
return newRV_noinc((SV *)av);
|
||||
err:
|
||||
SvREFCNT_dec((SV *)av);
|
||||
|
|
@ -182,6 +185,7 @@ static SV *fujson_parse_obj(pTHX_ fujson_parse_ctx *ctx) {
|
|||
fustr key;
|
||||
fustr_init(&key, NULL, SIZE_MAX);
|
||||
|
||||
if (--ctx->depth == 0) return NULL;
|
||||
ctx->buf++; /* '{' */
|
||||
fujson_parse_ws(aTHX_ ctx);
|
||||
if (ctx->buf == ctx->end) goto err;
|
||||
|
|
@ -217,6 +221,7 @@ static SV *fujson_parse_obj(pTHX_ fujson_parse_ctx *ctx) {
|
|||
done:
|
||||
if (key.sv) SvREFCNT_dec(key.sv);
|
||||
ctx->buf++; /* '}' */
|
||||
ctx->depth++;
|
||||
return newRV_noinc((SV *)hv);
|
||||
err:
|
||||
if (key.sv) SvREFCNT_dec(key.sv);
|
||||
|
|
@ -257,10 +262,13 @@ static SV *fujson_parse_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
|||
I32 i = 1;
|
||||
char *arg;
|
||||
SV *r;
|
||||
SV *offset = NULL;
|
||||
UV maxlen = 0;
|
||||
int decutf8 = 0;
|
||||
STRLEN buflen;
|
||||
fujson_parse_ctx ctx;
|
||||
|
||||
ctx.depth = 0;
|
||||
while (i < argc) {
|
||||
arg = SvPV_nolen(ST(i));
|
||||
i++;
|
||||
|
|
@ -269,24 +277,40 @@ static SV *fujson_parse_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
|||
i++;
|
||||
|
||||
if (strcmp(arg, "utf8") == 0) decutf8 = SvPVXtrue(r);
|
||||
else if (strcmp(arg, "max_size") == 0) maxlen = SvUV(r);
|
||||
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
|
||||
else if (strcmp(arg, "offset") == 0) offset = r;
|
||||
else croak("Unknown flag: '%s'", arg);
|
||||
}
|
||||
if (maxlen == 0) maxlen = 1<<30;
|
||||
if (ctx.depth == 0) ctx.depth = 512;
|
||||
|
||||
arg = decutf8 ? SvPVbyte(val, buflen) : SvPVutf8(val, buflen);
|
||||
ctx.buf = (const unsigned char *)arg;
|
||||
ctx.end = ctx.buf + buflen;
|
||||
|
||||
if (offset) {
|
||||
if (!SvROK(offset)) croak("Offset must be a reference to a scalar");
|
||||
offset = SvRV(offset);
|
||||
if (!looks_like_number(offset) || SvIV(offset) < 0) croak("Offset must be a positive integer");
|
||||
if (SvUV(offset) >= buflen) croak("Offset too large");
|
||||
ctx.buf += SvUV(offset);
|
||||
if ((UV)(ctx.end - ctx.buf) > maxlen) ctx.end = ctx.buf + maxlen;
|
||||
|
||||
} else if ((UV)(ctx.end - ctx.buf) > maxlen)
|
||||
croak("Input string is larger than max_size");
|
||||
|
||||
r = fujson_parse(aTHX_ &ctx);
|
||||
if (!r) croak("JSON parsing failed at offset %"UVuf, (UV)((char *)ctx.buf - arg));
|
||||
|
||||
fujson_parse_ws(aTHX_ &ctx);
|
||||
if (ctx.buf != ctx.end) {
|
||||
if (offset) {
|
||||
if (ctx.buf == ctx.end) sv_set_undef(offset);
|
||||
else SvUV_set(offset, (UV)((char *)ctx.buf - arg));
|
||||
} else if (ctx.buf != ctx.end) {
|
||||
SvREFCNT_dec(r);
|
||||
croak("garbage after JSON value at offset %"UVuf, (UV)((char *)ctx.buf - arg));
|
||||
}
|
||||
|
||||
return sv_2mortal(r);
|
||||
}
|
||||
|
||||
/* TODO: incremental parsing (accept & return a byte offset) */
|
||||
/* TODO: max_depth & max_size */
|
||||
|
|
|
|||
|
|
@ -50,6 +50,9 @@ my @error = (
|
|||
'{,}',
|
||||
'{"":1,"":2}',
|
||||
'{"ë":1,"ë":1}',
|
||||
|
||||
'[] x',
|
||||
'{}x',
|
||||
);
|
||||
for my $s (@error) {
|
||||
ok !eval { json_parse($s); 1 };
|
||||
|
|
@ -177,6 +180,10 @@ for (2000..2100, 4000..4200, 8100..8200, 12200..12300, 16300..16400) {
|
|||
is json_parse("\"$s\""), $s
|
||||
}
|
||||
|
||||
ok !eval { json_parse '[[[[]]]]', max_depth => 4; 1 };
|
||||
ok !eval { json_parse '{"":{"":{"":{"":1}}}}', max_depth => 4; 1 };
|
||||
|
||||
|
||||
# 500 depth
|
||||
{
|
||||
$v = json_parse('['x500 . ']'x500);
|
||||
|
|
@ -191,4 +198,39 @@ for (2000..2100, 4000..4200, 8100..8200, 12200..12300, 16300..16400) {
|
|||
is $i, 500;
|
||||
}
|
||||
|
||||
|
||||
# offset / max_size
|
||||
{
|
||||
my $off = 0;
|
||||
my $str = '0123-5.3e1"x"[]{}truefalse 1 '; # cursed
|
||||
is json_parse($str, offset => \$off), 0;
|
||||
is $off, 1;
|
||||
is json_parse($str, offset => \$off, max_size => 4), 123;
|
||||
is $off, 4;
|
||||
is json_parse($str, offset => \$off), -53;
|
||||
is $off, 10;
|
||||
is json_parse($str, offset => \$off), 'x';
|
||||
is $off, 13;
|
||||
is ref json_parse($str, offset => \$off), 'ARRAY';
|
||||
is $off, 15;
|
||||
is ref json_parse($str, offset => \$off), 'HASH';
|
||||
is $off, 17;
|
||||
ok json_parse($str, offset => \$off);
|
||||
is $off, 21;
|
||||
ok !json_parse($str, offset => \$off);
|
||||
is $off, 27;
|
||||
is json_parse($str, offset => \$off), 1;
|
||||
ok !defined $off;
|
||||
ok !eval { json_parse $str, offset => \$off; 1 };
|
||||
|
||||
$off = 100;
|
||||
ok !eval { json_parse $str, offset => \$off; 1 };
|
||||
|
||||
$off = 17;
|
||||
ok !eval { json_parse $str, offset => \$off, max_size => 3; 1 };
|
||||
|
||||
is json_parse('"string"', max_size => 8), 'string';
|
||||
ok !eval { json_parse '"string"', max_size => 7 };
|
||||
}
|
||||
|
||||
done_testing;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue