jsonparse: Add basic JSON parser
Some TODO's left and this needs benchmarking.
This commit is contained in:
parent
aebe5a93dc
commit
7cdc02e399
8 changed files with 508 additions and 21 deletions
8
FU.xs
8
FU.xs
|
|
@ -5,11 +5,17 @@
|
||||||
|
|
||||||
#include "c/common.c"
|
#include "c/common.c"
|
||||||
#include "c/jsonfmt.c"
|
#include "c/jsonfmt.c"
|
||||||
|
#include "c/jsonparse.c"
|
||||||
|
|
||||||
MODULE = FU PACKAGE = FU::XS
|
|
||||||
|
MODULE = FU PACKAGE = FU::Util
|
||||||
|
|
||||||
PROTOTYPES: DISABLE
|
PROTOTYPES: DISABLE
|
||||||
|
|
||||||
void json_format(SV *val, ...)
|
void json_format(SV *val, ...)
|
||||||
CODE:
|
CODE:
|
||||||
ST(0) = fujson_fmt_xs(aTHX_ ax, items, val);
|
ST(0) = fujson_fmt_xs(aTHX_ ax, items, val);
|
||||||
|
|
||||||
|
void json_parse(SV *val, ...)
|
||||||
|
CODE:
|
||||||
|
ST(0) = fujson_parse_xs(aTHX_ ax, items, val);
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,6 @@ use v5.36;
|
||||||
use FU::XS;
|
use FU::XS;
|
||||||
use Exporter 'import';
|
use Exporter 'import';
|
||||||
|
|
||||||
our @EXPORT_OK = qw/json_format/;
|
our @EXPORT_OK = qw/json_format json_parse/;
|
||||||
|
|
||||||
*json_format = *FU::XS::json_format;
|
|
||||||
|
|
||||||
1;
|
1;
|
||||||
|
|
|
||||||
37
FU/Util.pod
37
FU/Util.pod
|
|
@ -20,24 +20,47 @@ functions conform strictly to L<RFC-8259|https://tools.ietf.org/html/rfc8259>,
|
||||||
non-standard extensions are not supported and never will be. It also happens to
|
non-standard extensions are not supported and never will be. It also happens to
|
||||||
be pretty fast, refer to L<FU::Benchmarks> for some numbers.
|
be pretty fast, refer to L<FU::Benchmarks> for some numbers.
|
||||||
|
|
||||||
JSON booleans are decoded into C<builtin::true> and C<builtin::false>. When
|
JSON booleans are parsed into C<builtin::true> and C<builtin::false>. When
|
||||||
formatting, those builtin constants are the I<only> recognized boolean values -
|
formatting, those builtin constants are the I<only> recognized boolean values -
|
||||||
alternative representations such as C<JSON::PP::true> and C<JSON::PP::false>
|
alternative representations such as C<JSON::PP::true> and C<JSON::PP::false>
|
||||||
are not recognized and attempting to format such values will croak.
|
are not recognized and attempting to format such values will croak.
|
||||||
|
|
||||||
|
JSON numbers that are too large fit into a Perl integer are parsed into a
|
||||||
|
floating point value instead. This obviously loses precision, but is consistent
|
||||||
|
with C<JSON.parse()> in JavaScript land - except Perl does support the full
|
||||||
|
range of a 64bit integer. JSON numbers with a fraction or exponent are also
|
||||||
|
converted into floating point, which may lose precision as well.
|
||||||
|
L<Math::BigInt> and L<Math::BigFloat> are not currently supported. Attempting
|
||||||
|
to format a floating point C<NaN> or C<Inf> results in an error.
|
||||||
|
|
||||||
=over
|
=over
|
||||||
|
|
||||||
|
=item json_parse($string, %options)
|
||||||
|
|
||||||
|
Parse a JSON string and return a Perl value. With the default options, this
|
||||||
|
function is roughly similar to:
|
||||||
|
|
||||||
|
JSON::PP->new->allow_nonref->core_bools-decode($string);
|
||||||
|
|
||||||
|
Supported C<%options>:
|
||||||
|
|
||||||
|
=over
|
||||||
|
|
||||||
|
=item utf8
|
||||||
|
|
||||||
|
Boolean, interpret the input C<$string> as a UTF-8 encoded byte string instead
|
||||||
|
of a Perl Unicode string.
|
||||||
|
|
||||||
|
=back
|
||||||
|
|
||||||
|
|
||||||
=item json_format($scalar, %options)
|
=item json_format($scalar, %options)
|
||||||
|
|
||||||
Format a Perl value as JSON.
|
Format a Perl value as JSON. With the default options, this function behaves
|
||||||
|
roughly similar to:
|
||||||
With the default options, this function behaves roughly similar to:
|
|
||||||
|
|
||||||
JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar);
|
JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar);
|
||||||
|
|
||||||
This function croaks when attempting to format a floating point C<NaN> or
|
|
||||||
C<Inf>.
|
|
||||||
|
|
||||||
Some modules escape the slash character in encoded strings to prevent a
|
Some modules escape the slash character in encoded strings to prevent a
|
||||||
potential XSS vulnerability when embedding JSON inside C<< <script> ..
|
potential XSS vulnerability when embedding JSON inside C<< <script> ..
|
||||||
</script> >> tags. This function does I<not> do that because it might not even
|
</script> >> tags. This function does I<not> do that because it might not even
|
||||||
|
|
|
||||||
|
|
@ -7,9 +7,9 @@ typedef struct {
|
||||||
size_t maxlen;
|
size_t maxlen;
|
||||||
} fustr;
|
} fustr;
|
||||||
|
|
||||||
static void fustr_init_(pTHX_ fustr *s, size_t prealloc, size_t maxlen) {
|
/* sv must be a new SV with a preallocated buffer */
|
||||||
if (prealloc > maxlen) prealloc = maxlen;
|
static void fustr_init_(pTHX_ fustr *s, SV *sv, size_t maxlen) {
|
||||||
s->sv = sv_2mortal(newSV(prealloc));
|
s->sv = sv;
|
||||||
SvPOK_only(s->sv);
|
SvPOK_only(s->sv);
|
||||||
s->cur = SvPVX(s->sv);
|
s->cur = SvPVX(s->sv);
|
||||||
s->end = SvEND(s->sv);
|
s->end = SvEND(s->sv);
|
||||||
|
|
|
||||||
|
|
@ -81,7 +81,6 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l
|
||||||
fustr_write(&ctx->out, "\"", 1);
|
fustr_write(&ctx->out, "\"", 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */
|
|
||||||
static const char fujson_digits[] =
|
static const char fujson_digits[] =
|
||||||
"00010203040506070809"
|
"00010203040506070809"
|
||||||
"10111213141516171819"
|
"10111213141516171819"
|
||||||
|
|
@ -306,7 +305,7 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
||||||
if (ctx.out.maxlen == 0) ctx.out.maxlen = 1<<30;
|
if (ctx.out.maxlen == 0) ctx.out.maxlen = 1<<30;
|
||||||
if (ctx.depth == 0) ctx.depth = 512;
|
if (ctx.depth == 0) ctx.depth = 512;
|
||||||
|
|
||||||
fustr_init(&ctx.out, 128, ctx.out.maxlen);
|
fustr_init(&ctx.out, sv_2mortal(newSV(128)), ctx.out.maxlen);
|
||||||
fujson_fmt(aTHX_ &ctx, val);
|
fujson_fmt(aTHX_ &ctx, val);
|
||||||
if (ctx.pretty >= 0) fustr_write(&ctx.out, "\n", 1);
|
if (ctx.pretty >= 0) fustr_write(&ctx.out, "\n", 1);
|
||||||
r = fustr_done(&ctx.out);
|
r = fustr_done(&ctx.out);
|
||||||
|
|
|
||||||
289
c/jsonparse.c
Normal file
289
c/jsonparse.c
Normal file
|
|
@ -0,0 +1,289 @@
|
||||||
|
typedef struct {
|
||||||
|
const unsigned char *buf;
|
||||||
|
const unsigned char *end;
|
||||||
|
} fujson_parse_ctx;
|
||||||
|
|
||||||
|
static SV *fujson_parse(pTHX_ fujson_parse_ctx *);
|
||||||
|
|
||||||
|
static void fujson_parse_ws(pTHX_ fujson_parse_ctx *ctx) {
|
||||||
|
unsigned char x;
|
||||||
|
while (ctx->buf < ctx->end) {
|
||||||
|
x = *ctx->buf;
|
||||||
|
if (!(x == 0x09 || x == 0x0a || x == 0x0d || x == 0x20)) break;
|
||||||
|
ctx->buf++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int fujson_parse_string_escape(pTHX_ fujson_parse_ctx *ctx, fustr *r) {
|
||||||
|
unsigned int n, s;
|
||||||
|
ctx->buf++; /* '\\' */
|
||||||
|
if (ctx->buf == ctx->end) return 1;
|
||||||
|
switch (*(ctx->buf++)) {
|
||||||
|
case '"': fustr_write(r, "\"", 1); break;
|
||||||
|
case '\\': fustr_write(r, "\\", 1); break;
|
||||||
|
case '/': fustr_write(r, "/", 1); break; /* We don't escape this one */
|
||||||
|
case 'b': fustr_write(r, "\x08", 1); break;
|
||||||
|
case 't': fustr_write(r, "\x09", 1); break;
|
||||||
|
case 'n': fustr_write(r, "\x0a", 1); break;
|
||||||
|
case 'f': fustr_write(r, "\x0c", 1); break;
|
||||||
|
case 'r': fustr_write(r, "\x0d", 1); break;
|
||||||
|
case 'u':
|
||||||
|
/* (awful code adapted from ncdu) */
|
||||||
|
#define INV (1<<16)
|
||||||
|
#define hn(n) (n >= '0' && n <= '9' ? n-'0' : n >= 'A' && n <= 'F' ? n-'A'+10 : n >= 'a' && n <= 'f' ? n-'a'+10 : INV)
|
||||||
|
#define h4(b) (hn((b)[0])<<12) + (hn((b)[1])<<8) + (hn((b)[2])<<4) + hn((b)[3])
|
||||||
|
if (ctx->end - ctx->buf < 4) return 1;
|
||||||
|
n = h4(ctx->buf);
|
||||||
|
if (n >= INV || (n & 0xfc00) == 0xdc00) return 1;
|
||||||
|
ctx->buf += 4;
|
||||||
|
if ((n & 0xfc00) == 0xd800) { /* high surrogate */
|
||||||
|
if (ctx->end - ctx->buf < 6) return 1;
|
||||||
|
if (ctx->buf[0] != '\\' || ctx->buf[1] != 'u') return 1;
|
||||||
|
s = h4(ctx->buf+2);
|
||||||
|
if (s >= INV || (s & 0xfc00) != 0xdc00) return 1;
|
||||||
|
n = 0x10000 + (((n & 0x03ff) << 10) | (s & 0x03ff));
|
||||||
|
ctx->buf += 6;
|
||||||
|
}
|
||||||
|
fustr_reserve(r, 4);
|
||||||
|
r->cur = (char *)uvchr_to_utf8((U8 *)r->cur, n);
|
||||||
|
break;
|
||||||
|
#undef INV
|
||||||
|
#undef hn
|
||||||
|
#undef h4
|
||||||
|
default:
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SV *fujson_parse_string(pTHX_ fujson_parse_ctx *ctx) {
|
||||||
|
fustr r;
|
||||||
|
size_t len, maxlen;
|
||||||
|
unsigned char x = 0;
|
||||||
|
fustr_init(&r, newSV(32), SIZE_MAX);
|
||||||
|
ctx->buf++; /* '"' */
|
||||||
|
while (true) {
|
||||||
|
/* Fast path: ASCII, no unescaping needed */
|
||||||
|
len = 0;
|
||||||
|
maxlen = ctx->end - ctx->buf;
|
||||||
|
while (len < maxlen) {
|
||||||
|
x = ctx->buf[len];
|
||||||
|
/* While we always escape 0x7f when formatting, JSON does permit it unescaped */
|
||||||
|
if (x <= 0x1f || x == '"' || x == '\\' || x >= 0x80) break;
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
if (len == maxlen) goto err;
|
||||||
|
fustr_write(&r, (const char *)ctx->buf, len);
|
||||||
|
ctx->buf += len;
|
||||||
|
|
||||||
|
/* Slow path */
|
||||||
|
if (x == '"') {
|
||||||
|
ctx->buf++;
|
||||||
|
SvUTF8_on(fustr_done(&r));
|
||||||
|
return r.sv;
|
||||||
|
} else if (x == '\\') {
|
||||||
|
if (fujson_parse_string_escape(aTHX_ ctx, &r)) goto err;
|
||||||
|
} else if (x >= 0x80) {
|
||||||
|
len = isC9_STRICT_UTF8_CHAR(ctx->buf, ctx->end);
|
||||||
|
if (len == 0) goto err;
|
||||||
|
fustr_write(&r, (const char *)ctx->buf, len);
|
||||||
|
ctx->buf += len;
|
||||||
|
} else {
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
err:
|
||||||
|
SvREFCNT_dec(r.sv);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Validate JSON grammar of a number, increments ctx->buf to the end of the
|
||||||
|
* number and returns -1 on error, 0 if it's an int, 1 for floats. */
|
||||||
|
static int fujson_parse_number_grammar(fujson_parse_ctx *ctx) {
|
||||||
|
int ret = 0;
|
||||||
|
if (*ctx->buf == '-') ctx->buf++;
|
||||||
|
if (ctx->buf == ctx->end) return -1;
|
||||||
|
if (*ctx->buf == '0' && (ctx->buf+1 == ctx->end ||
|
||||||
|
!(ctx->buf[1] == '.' || ctx->buf[1] == 'e' || ctx->buf[1] == 'E'))) {
|
||||||
|
/* rfc8259 permits "-0", so we'll not check for that */
|
||||||
|
ctx->buf++;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#define DIG1 \
|
||||||
|
if (ctx->buf == ctx->end || *ctx->buf < '0' || *ctx->buf > '9') return -1; \
|
||||||
|
ctx->buf++; \
|
||||||
|
while (ctx->buf != ctx->end && *ctx->buf >= '0' && *ctx->buf <= '9') ctx->buf++;
|
||||||
|
|
||||||
|
/* int part */
|
||||||
|
DIG1;
|
||||||
|
/* decimal part */
|
||||||
|
if (ctx->buf != ctx->end && *ctx->buf == '.') {
|
||||||
|
ret = 1;
|
||||||
|
ctx->buf++;
|
||||||
|
DIG1;
|
||||||
|
}
|
||||||
|
/* exponent */
|
||||||
|
if (ctx->buf != ctx->end && (*ctx->buf == 'e' || *ctx->buf == 'E')) {
|
||||||
|
ret = 1;
|
||||||
|
ctx->buf++;
|
||||||
|
if (ctx->buf == ctx->end) return -1;
|
||||||
|
if (*ctx->buf == '+' || *ctx->buf == '-') ctx->buf++;
|
||||||
|
DIG1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef DIG1
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SV *fujson_parse_number(pTHX_ fujson_parse_ctx *ctx) {
|
||||||
|
const unsigned char *start = ctx->buf;
|
||||||
|
int isnum = fujson_parse_number_grammar(ctx);
|
||||||
|
if (isnum == -1) return NULL;
|
||||||
|
|
||||||
|
UV uv;
|
||||||
|
const char *end = (const char *)ctx->buf;
|
||||||
|
/* grok_atoUV() in this context can only return false on overflow */
|
||||||
|
if (!isnum && grok_atoUV((const char *)(*start == '-' ? start+1 : start), &uv, &end)) {
|
||||||
|
if (*start != '-') return newSVuv(uv);
|
||||||
|
if (uv <= ((UV)IV_MAX)+1) return newSViv(-uv);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* floating point or overflowed integer, might lose precision */
|
||||||
|
NV val;
|
||||||
|
my_atof3((const char *)start, &val, ctx->buf - start); /* this function is not documented to be public... */
|
||||||
|
return newSVnv(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
static SV *fujson_parse_array(pTHX_ fujson_parse_ctx *ctx) {
|
||||||
|
AV *av = newAV();
|
||||||
|
SV *r;
|
||||||
|
ctx->buf++; /* '[' */
|
||||||
|
fujson_parse_ws(aTHX_ ctx);
|
||||||
|
if (ctx->buf == ctx->end) goto err;
|
||||||
|
if (*ctx->buf == ']') goto done;
|
||||||
|
while (true) {
|
||||||
|
if (!(r = fujson_parse(aTHX_ ctx))) goto err;
|
||||||
|
av_push_simple(av, r);
|
||||||
|
fujson_parse_ws(aTHX_ ctx);
|
||||||
|
if (ctx->buf == ctx->end) goto err;
|
||||||
|
if (*ctx->buf == ']') goto done;
|
||||||
|
if (*ctx->buf != ',') goto err;
|
||||||
|
ctx->buf++;
|
||||||
|
}
|
||||||
|
done:
|
||||||
|
ctx->buf++; /* ']' */
|
||||||
|
return newRV_noinc((SV *)av);
|
||||||
|
err:
|
||||||
|
SvREFCNT_dec((SV *)av);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SV *fujson_parse_obj(pTHX_ fujson_parse_ctx *ctx) {
|
||||||
|
HV *hv = newHV();
|
||||||
|
SV *key = NULL;
|
||||||
|
SV *val;
|
||||||
|
ctx->buf++; /* '{' */
|
||||||
|
fujson_parse_ws(aTHX_ ctx);
|
||||||
|
if (ctx->buf == ctx->end) goto err;
|
||||||
|
if (*ctx->buf == '}') goto done;
|
||||||
|
while (true) {
|
||||||
|
/* key */
|
||||||
|
if (*ctx->buf != '"') goto err;
|
||||||
|
if (!(key = fujson_parse_string(aTHX_ ctx))) goto err;
|
||||||
|
/* TODO: Use precomputed hash */
|
||||||
|
if (hv_exists_ent(hv, key, 0)) goto err;
|
||||||
|
|
||||||
|
/* ':' */
|
||||||
|
fujson_parse_ws(aTHX_ ctx);
|
||||||
|
if (ctx->buf == ctx->end) goto err;
|
||||||
|
if (*ctx->buf != ':') goto err;
|
||||||
|
ctx->buf++;
|
||||||
|
|
||||||
|
/* value */
|
||||||
|
if (!(val = fujson_parse(aTHX_ ctx))) goto err;
|
||||||
|
hv_store_ent(hv, key, val, 0);
|
||||||
|
SvREFCNT_dec(key); /* TODO: can reuse buffer */
|
||||||
|
key = NULL;
|
||||||
|
|
||||||
|
fujson_parse_ws(aTHX_ ctx);
|
||||||
|
if (ctx->buf == ctx->end) goto err;
|
||||||
|
if (*ctx->buf == '}') goto done;
|
||||||
|
if (*ctx->buf != ',') goto err;
|
||||||
|
ctx->buf++;
|
||||||
|
fujson_parse_ws(aTHX_ ctx);
|
||||||
|
}
|
||||||
|
done:
|
||||||
|
ctx->buf++; /* '}' */
|
||||||
|
return newRV_noinc((SV *)hv);
|
||||||
|
err:
|
||||||
|
if (key) SvREFCNT_dec(key);
|
||||||
|
SvREFCNT_dec((SV *)hv);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SV *fujson_parse(pTHX_ fujson_parse_ctx *ctx) {
|
||||||
|
fujson_parse_ws(aTHX_ ctx);
|
||||||
|
if (ctx->buf == ctx->end) return NULL;
|
||||||
|
switch (*ctx->buf) {
|
||||||
|
case '"': return fujson_parse_string(aTHX_ ctx);
|
||||||
|
case '{': return fujson_parse_obj(aTHX_ ctx);
|
||||||
|
case '[': return fujson_parse_array(aTHX_ ctx);
|
||||||
|
case 't':
|
||||||
|
if (ctx->end - ctx->buf < 4) return NULL;
|
||||||
|
if (memcmp(ctx->buf, "true", 4) != 0) return NULL;
|
||||||
|
ctx->buf += 4;
|
||||||
|
return newSV_true();
|
||||||
|
case 'f':
|
||||||
|
if (ctx->end - ctx->buf < 5) return NULL;
|
||||||
|
if (memcmp(ctx->buf, "false", 5) != 0) return NULL;
|
||||||
|
ctx->buf += 5;
|
||||||
|
return newSV_false();
|
||||||
|
case 'n':
|
||||||
|
if (ctx->end - ctx->buf < 4) return NULL;
|
||||||
|
if (memcmp(ctx->buf, "null", 4) != 0) return NULL;
|
||||||
|
ctx->buf += 4;
|
||||||
|
return newSV(0);
|
||||||
|
default:
|
||||||
|
if (*ctx->buf == '-' || (*ctx->buf >= '0' && *ctx->buf <= '9'))
|
||||||
|
return fujson_parse_number(aTHX_ ctx);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SV *fujson_parse_xs(pTHX_ I32 ax, I32 argc, SV *val) {
|
||||||
|
I32 i = 1;
|
||||||
|
char *arg;
|
||||||
|
SV *r;
|
||||||
|
int decutf8 = 0;
|
||||||
|
STRLEN buflen;
|
||||||
|
fujson_parse_ctx ctx;
|
||||||
|
|
||||||
|
while (i < argc) {
|
||||||
|
arg = SvPV_nolen(ST(i));
|
||||||
|
i++;
|
||||||
|
if (i == argc) croak("Odd name/value argument for json_parse()");
|
||||||
|
r = ST(i);
|
||||||
|
i++;
|
||||||
|
|
||||||
|
if (strcmp(arg, "utf8") == 0) decutf8 = SvPVXtrue(r);
|
||||||
|
else croak("Unknown flag: '%s'", arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
arg = decutf8 ? SvPVbyte(val, buflen) : SvPVutf8(val, buflen);
|
||||||
|
ctx.buf = (const unsigned char *)arg;
|
||||||
|
ctx.end = ctx.buf + buflen;
|
||||||
|
|
||||||
|
r = fujson_parse(aTHX_ &ctx);
|
||||||
|
if (!r) croak("JSON parsing failed at offset %"UVuf, (UV)((char *)ctx.buf - arg));
|
||||||
|
|
||||||
|
fujson_parse_ws(aTHX_ &ctx);
|
||||||
|
if (ctx.buf != ctx.end) {
|
||||||
|
SvREFCNT_dec(r);
|
||||||
|
croak("garbage after JSON value at offset %"UVuf, (UV)((char *)ctx.buf - arg));
|
||||||
|
}
|
||||||
|
|
||||||
|
return sv_2mortal(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO: incremental parsing (accept & return a byte offset) */
|
||||||
|
/* TODO: max_depth & max_size */
|
||||||
|
|
@ -31,11 +31,9 @@ my @tests = (
|
||||||
"\x01é\r\n\x1f💩", '"\u0001é\r\n\u001f💩"',
|
"\x01é\r\n\x1f💩", '"\u0001é\r\n\u001f💩"',
|
||||||
)},
|
)},
|
||||||
|
|
||||||
do { use bytes; (
|
"\x011\r\n\x8c", "\"\\u00011\\r\\n\x8c\"",
|
||||||
"\x011\r\n\x8c", "\"\\u00011\\r\\n\x8c\"",
|
"\xff\xff", "\"\xff\xff\"",
|
||||||
"\xff\xff", "\"\xff\xff\"",
|
"\x{1f4a9}", do { use utf8; '"💩"' },
|
||||||
"\x{1f4a9}", do { use utf8; '"💩"' },
|
|
||||||
)},
|
|
||||||
|
|
||||||
[], '[]',
|
[], '[]',
|
||||||
[0,1], '[0,1]',
|
[0,1], '[0,1]',
|
||||||
|
|
@ -63,6 +61,8 @@ my @errors = (
|
||||||
*STDOUT, qr/unable to format unknown value/,
|
*STDOUT, qr/unable to format unknown value/,
|
||||||
'NaN'+0, qr/unable to format floating point NaN or Inf as JSON/,
|
'NaN'+0, qr/unable to format floating point NaN or Inf as JSON/,
|
||||||
'Inf'+0, qr/unable to format floating point NaN or Inf as JSON/,
|
'Inf'+0, qr/unable to format floating point NaN or Inf as JSON/,
|
||||||
|
"\x{D83D}", qr/invalid codepoint encountered in string/,
|
||||||
|
"\x{DE03}", qr/invalid codepoint encountered in string/,
|
||||||
do { no warnings 'portable'; "\x{ffffffff}" }, qr/invalid codepoint encountered in string/,
|
do { no warnings 'portable'; "\x{ffffffff}" }, qr/invalid codepoint encountered in string/,
|
||||||
do { my $o = {}; bless $o, 'FU::Whatever' }, qr/unable to format 'FU::Whatever' object as JSON/,
|
do { my $o = {}; bless $o, 'FU::Whatever' }, qr/unable to format 'FU::Whatever' object as JSON/,
|
||||||
do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/,
|
do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/,
|
||||||
|
|
|
||||||
172
t/json_parse.t
Normal file
172
t/json_parse.t
Normal file
|
|
@ -0,0 +1,172 @@
|
||||||
|
use v5.36;
|
||||||
|
use Test::More;
|
||||||
|
use FU::Util 'json_parse';
|
||||||
|
no warnings 'experimental::builtin';
|
||||||
|
use builtin 'is_bool', 'created_as_number';
|
||||||
|
|
||||||
|
my @error = (
|
||||||
|
'',
|
||||||
|
'tru',
|
||||||
|
'nul',
|
||||||
|
'fals',
|
||||||
|
'true,',
|
||||||
|
|
||||||
|
'"',
|
||||||
|
"\"\x00\"",
|
||||||
|
'"\x"',
|
||||||
|
'"\u',
|
||||||
|
'" \u123',
|
||||||
|
'"\\',
|
||||||
|
'"\ud812"',
|
||||||
|
'"\u123g"',
|
||||||
|
'"\udc12"',
|
||||||
|
'"\udc12\u1234"',
|
||||||
|
"\"\x{110000}\"",
|
||||||
|
|
||||||
|
'1.',
|
||||||
|
'01',
|
||||||
|
'1e',
|
||||||
|
'1e+',
|
||||||
|
'1x',
|
||||||
|
'1e-',
|
||||||
|
'--1',
|
||||||
|
'+1',
|
||||||
|
'0x1',
|
||||||
|
'1..1',
|
||||||
|
'1ee1',
|
||||||
|
'1e1.1',
|
||||||
|
|
||||||
|
' [ ',
|
||||||
|
'[,true]',
|
||||||
|
'[true,]',
|
||||||
|
'[,]',
|
||||||
|
|
||||||
|
' { ',
|
||||||
|
'{1:2}',
|
||||||
|
'{""}',
|
||||||
|
'{"":}',
|
||||||
|
'{"":1',
|
||||||
|
'{"":1,}',
|
||||||
|
'{,}',
|
||||||
|
'{"":1,"":2}',
|
||||||
|
);
|
||||||
|
for my $s (@error) {
|
||||||
|
ok !eval { json_parse($s); 1 };
|
||||||
|
}
|
||||||
|
|
||||||
|
my $v;
|
||||||
|
|
||||||
|
ok !defined json_parse " null ";
|
||||||
|
|
||||||
|
$v = json_parse " true \t\r\n ";
|
||||||
|
ok is_bool $v;
|
||||||
|
ok $v;
|
||||||
|
|
||||||
|
$v = json_parse " false ";
|
||||||
|
ok is_bool $v;
|
||||||
|
ok !$v;
|
||||||
|
|
||||||
|
sub str($in, $exp) {
|
||||||
|
utf8::encode(my $str = $in);
|
||||||
|
my $out = json_parse($in);
|
||||||
|
is $out, $exp, $str;
|
||||||
|
ok utf8::is_utf8($out);
|
||||||
|
$out = json_parse($str, utf8 => 1);
|
||||||
|
is $out, $exp, $str;
|
||||||
|
ok utf8::is_utf8($out);
|
||||||
|
}
|
||||||
|
str '""', '';
|
||||||
|
str '"hello, world"', 'hello, world';
|
||||||
|
str '"\u0000\u0099\u0234\u1234"', "\x{00}\x{99}\x{234}\x{1234}";
|
||||||
|
str "\"\x{7f}\x{99}\x{234}\x{1234}\x{12345}\"", "\x{7f}\x{99}\x{234}\x{1234}\x{12345}";
|
||||||
|
str '"\/\"\\\\\b\t\n\f\r"', "/\"\\\x{08}\x{09}\x{0a}\x{0c}\x{0d}";
|
||||||
|
str '"\uD83D\uDE03"', "\x{1F603}";
|
||||||
|
|
||||||
|
sub num($in, $exp=$in) {
|
||||||
|
my $out = json_parse($in);
|
||||||
|
is $out, $exp;
|
||||||
|
ok created_as_number $out;
|
||||||
|
}
|
||||||
|
num 0;
|
||||||
|
num ' -0 ', 0;
|
||||||
|
num '-9223372036854775808';
|
||||||
|
num '9223372036854775807';
|
||||||
|
num '18446744073709551615';
|
||||||
|
num '-9223372036854775809', -9.22337203685478e+18;
|
||||||
|
num '18446744073709551616', 1.84467440737096e+19;
|
||||||
|
num '1.234';
|
||||||
|
num '1e5', 100000;
|
||||||
|
num '1e+5', 100000;
|
||||||
|
num '1e-5', 0.00001;
|
||||||
|
num '2.5e-5', 0.000025;
|
||||||
|
num '2.5e5', 250000;
|
||||||
|
num '2.5E5', 250000;
|
||||||
|
num '-0.000000000000000000000000000000000000000000000000000000000000000000000000000001', -1e-78;
|
||||||
|
|
||||||
|
$v = json_parse ' [ ] ';
|
||||||
|
is ref $v, 'ARRAY';
|
||||||
|
is scalar @$v, 0;
|
||||||
|
|
||||||
|
$v = json_parse ' [ true , null , false ] ';
|
||||||
|
is ref $v, 'ARRAY';
|
||||||
|
is scalar @$v, 3;
|
||||||
|
ok $v->[0];
|
||||||
|
ok !defined $v->[1];
|
||||||
|
ok !$v->[2];
|
||||||
|
|
||||||
|
$v = json_parse ' [true,null,false] ';
|
||||||
|
is ref $v, 'ARRAY';
|
||||||
|
is scalar @$v, 3;
|
||||||
|
ok $v->[0];
|
||||||
|
ok !defined $v->[1];
|
||||||
|
ok !$v->[2];
|
||||||
|
|
||||||
|
$v = json_parse ' [ [] ] ';
|
||||||
|
is ref $v, 'ARRAY';
|
||||||
|
is scalar @$v, 1;
|
||||||
|
is ref $v->[0], 'ARRAY';
|
||||||
|
is scalar $v->[0]->@*, 0;
|
||||||
|
|
||||||
|
$v = json_parse '{}';
|
||||||
|
is ref $v, 'HASH';
|
||||||
|
is keys %$v, 0;
|
||||||
|
|
||||||
|
$v = json_parse '{"a":1}';
|
||||||
|
is ref $v, 'HASH';
|
||||||
|
is keys %$v, 1;
|
||||||
|
is $v->{a}, 1;
|
||||||
|
|
||||||
|
sub large($s) {
|
||||||
|
$v = json_parse $s;
|
||||||
|
is ref $v, 'HASH';
|
||||||
|
is keys %$v, 3;
|
||||||
|
|
||||||
|
ok exists $v->{a};
|
||||||
|
is ref $v->{a}, 'ARRAY';
|
||||||
|
is scalar $v->{a}->@*, 5;
|
||||||
|
ok created_as_number $v->{a}[0];
|
||||||
|
is $v->{a}[0], 1;
|
||||||
|
ok created_as_number $v->{a}[1];
|
||||||
|
is $v->{a}[1], 0.1;
|
||||||
|
ok is_bool $v->{a}[2];
|
||||||
|
ok $v->{a}[2];
|
||||||
|
ok !defined $v->{a}[3];
|
||||||
|
is ref $v->{a}[4], 'HASH';
|
||||||
|
is keys $v->{a}[4]->%*, 0;
|
||||||
|
|
||||||
|
ok exists $v->{''};
|
||||||
|
ok created_as_number $v->{''};
|
||||||
|
is $v->{''}, 0;
|
||||||
|
|
||||||
|
ok exists $v->{'ë'};
|
||||||
|
is ref $v->{'ë'}, 'ARRAY';
|
||||||
|
is scalar $v->{'ë'}->@*, 0;
|
||||||
|
}
|
||||||
|
large '{"a":[1,0.1,true,null,{}],"":-0,"ë":[]}';
|
||||||
|
large ' {
|
||||||
|
"a" : [ 1 , 0.1 , true , null , { } ] ,
|
||||||
|
"" : -0 ,
|
||||||
|
"ë" : [ ]
|
||||||
|
} ';
|
||||||
|
|
||||||
|
done_testing;
|
||||||
Loading…
Add table
Add a link
Reference in a new issue