Add initial JSON formatter
It works and can format all "plain" Perl data, but has a few known bugs and limitations that still need to be worked out. It's about 8x smaller than JSON::XS's encoder and *much* smaller than Cpanel::JSON::XS, but this is just a first attempt, it'll grow.
This commit is contained in:
parent
9c80f2465a
commit
c16a9fa493
10 changed files with 421 additions and 0 deletions
43
c/common.c
Normal file
43
c/common.c
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
/* Custom string builder, comparable to functionality provided by SV*
|
||||
* functions, but with less magic and better inlineable. */
|
||||
|
||||
typedef struct {
|
||||
size_t len;
|
||||
size_t size;
|
||||
char *buf;
|
||||
} fustr;
|
||||
|
||||
/* No need to call this, an empty fustr is already usable.
|
||||
* This allows setting a custom initial size. */
|
||||
static void fustr_init(fustr *s, size_t prealloc) {
|
||||
s->len = 0;
|
||||
s->size = prealloc;
|
||||
s->buf = safemalloc(prealloc);
|
||||
}
|
||||
|
||||
static void fustr_grow(fustr *s, size_t add) {
|
||||
if (s->size == 0) s->size = 512;
|
||||
while (s->size < s->len + add)
|
||||
s->size *= 2;
|
||||
s->buf = saferealloc(s->buf, s->size);
|
||||
}
|
||||
|
||||
#define fustr_reserve(s, n) do {\
|
||||
if (UNLIKELY((s)->size < (s)->len + (n))) fustr_grow(s, n);\
|
||||
} while(0)
|
||||
|
||||
#define fustr_write(s, str, n) do {\
|
||||
fustr_reserve(s, n);\
|
||||
memcpy((s)->buf+(s)->len, str, (n));\
|
||||
(s)->len += (n);\
|
||||
} while(0)
|
||||
|
||||
/* Move the string buffer into a new SV; fustr should be considered invalid after this call.
|
||||
* Does not set the UTF8 flag. */
|
||||
static SV *fustr_sv(fustr *s) {
|
||||
SV *r = newSV(0);
|
||||
fustr_write(s, "", 1); // trailing nul
|
||||
sv_usepvn_flags(r, s->buf, s->len-1, SV_HAS_TRAILING_NUL);
|
||||
// TODO: SvPV_shrink_to_cur?
|
||||
return r;
|
||||
}
|
||||
162
c/jsonfmt.c
Normal file
162
c/jsonfmt.c
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
static void fujson_fmt(fustr *, SV *);
|
||||
|
||||
static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) {
|
||||
size_t off = 0, loff;
|
||||
const unsigned char *str = (const unsigned char *)stri;
|
||||
unsigned char x = 0;
|
||||
|
||||
/* Validate entire string for conformance if this is flagged as a utf8 string, this lets us be lazy further on. */
|
||||
if (utf8 && !is_c9strict_utf8_string(str, len)) {
|
||||
return; /* TODO: Throw error. */
|
||||
}
|
||||
|
||||
fustr_write(out, "\"", 1);
|
||||
fustr_reserve(out, len);
|
||||
|
||||
while (off < len) {
|
||||
/* Fast path: no escaping needed */
|
||||
loff = off;
|
||||
if (utf8) {
|
||||
/* we already validated everything >=0x80 */
|
||||
while (off < len) {
|
||||
x = str[off];
|
||||
if (x <= 0x1f || x == '"' || x == '\\' || x == 0x7f) break;
|
||||
off++;
|
||||
}
|
||||
} else {
|
||||
/* binary strings need special handling for >=0x80 */
|
||||
while (off < len) {
|
||||
x = str[off];
|
||||
if (x <= 0x1f || x == '"' || x == '\\' || x >= 0x7f) break;
|
||||
off++;
|
||||
}
|
||||
}
|
||||
fustr_write(out, str+loff, off-loff);
|
||||
|
||||
if (off < len) { /* early break, which means current byte needs special processing */
|
||||
switch (x) {
|
||||
case '"': fustr_write(out, "\\\"", 2); break;
|
||||
case '\\': fustr_write(out, "\\\\", 2); break;
|
||||
case 0x08: fustr_write(out, "\\b", 2); break;
|
||||
case 0x09: fustr_write(out, "\\t", 2); break;
|
||||
case 0x0a: fustr_write(out, "\\n", 2); break;
|
||||
case 0x0c: fustr_write(out, "\\f", 2); break;
|
||||
case 0x0d: fustr_write(out, "\\r", 2); break;
|
||||
default:
|
||||
if (x < 0x80) {
|
||||
fustr_reserve(out, 6);
|
||||
memcpy(out->buf+out->len, "\\u00", 4);
|
||||
out->buf[out->len+4] = PL_hexdigit[(x >> 4) & 0x0f];
|
||||
out->buf[out->len+5] = PL_hexdigit[x & 0x0f];
|
||||
out->len += 6;
|
||||
} else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */
|
||||
fustr_reserve(out, 2);
|
||||
out->buf[out->len ] = 0xc0 | (x >> 6);
|
||||
out->buf[out->len+1] = 0x80 | (x & 0x3f);
|
||||
out->len += 2;
|
||||
}
|
||||
}
|
||||
off++;
|
||||
}
|
||||
}
|
||||
|
||||
fustr_write(out, "\"", 1);
|
||||
}
|
||||
|
||||
static void fujson_fmt_int(fustr *out, SV *val) {
|
||||
char buf[32];
|
||||
size_t idx = 32;
|
||||
int neg = 0;
|
||||
IV iv;
|
||||
UV uv;
|
||||
|
||||
if (SvIsUV(val)) { /* Why is this macro not documented? */
|
||||
uv = SvUV_nomg(val);
|
||||
} else {
|
||||
iv = SvIV_nomg(val);
|
||||
neg = iv < 0;
|
||||
uv = neg ? -iv : iv;
|
||||
}
|
||||
|
||||
if (uv == 0) {
|
||||
fustr_write(out, "0", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
while (uv > 0) {
|
||||
/* TODO: can use a lookup table to optimize for 0 - 100; need benchmark */
|
||||
buf[--idx] = '0' + (uv % 10);
|
||||
uv /= 10;
|
||||
}
|
||||
if (neg) buf[--idx] = '-';
|
||||
fustr_write(out, buf+idx, sizeof buf - idx);
|
||||
}
|
||||
|
||||
static void fujson_fmt_av(fustr *out, AV *av) {
|
||||
int i, len = av_count(av);
|
||||
fustr_write(out, "[", 1);
|
||||
for (i=0; i<len; i++) {
|
||||
if (i) fustr_write(out, ",", 1);
|
||||
SV **sv = av_fetch(av, i, 0);
|
||||
if (sv) fujson_fmt(out, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */
|
||||
else fustr_write(out, "null", 4);
|
||||
}
|
||||
fustr_write(out, "]", 1);
|
||||
}
|
||||
|
||||
static void fujson_fmt_hv(fustr *out, HV *hv) {
|
||||
HE *he;
|
||||
STRLEN helen;
|
||||
char *hestr = NULL;
|
||||
|
||||
hv_iterinit(hv);
|
||||
fustr_write(out, "{", 1);
|
||||
while ((he = hv_iternext(hv))) {
|
||||
if (hestr) fustr_write(out, ",", 1);
|
||||
hestr = HePV(he, helen);
|
||||
fujson_fmt_str(out, hestr, helen, HeUTF8(he));
|
||||
fustr_write(out, ":", 1);
|
||||
fujson_fmt(out, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
|
||||
}
|
||||
fustr_write(out, "}", 1);
|
||||
}
|
||||
|
||||
|
||||
/* BUG: Leaks *out on error, that should be on the temp stack */
|
||||
static void fujson_fmt(fustr *out, SV *val) {
|
||||
SvGETMAGIC(val);
|
||||
|
||||
/* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need
|
||||
* to explicitly check for those; does this work in 5.36 as well? */
|
||||
if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */
|
||||
if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(out, "true", 4);
|
||||
else fustr_write(out, "false", 5);
|
||||
} else if (SvPOKp(val)) {
|
||||
fujson_fmt_str(out, SvPVX(val), SvCUR(val), SvUTF8(val));
|
||||
} else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */
|
||||
/* TODO: quadmath? */
|
||||
NV nv = SvNV_nomg(val);
|
||||
if (isinfnan(nv)) return; /* TODO: error */
|
||||
fustr_reserve(out, NV_DIG+1);
|
||||
Gconvert(nv, NV_DIG, 0, out->buf + out->len);
|
||||
out->len += strlen(out->buf + out->len);
|
||||
} else if (SvIOKp(val)) {
|
||||
fujson_fmt_int(out, val);
|
||||
} else if (SvROK(val)) {
|
||||
SV *rv = SvRV(val);
|
||||
SvGETMAGIC(rv);
|
||||
if (UNLIKELY(SvOBJECT(rv))) { /* TODO: Check for TO_JSON */ }
|
||||
else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(out, (HV *)rv);
|
||||
else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(out, (AV *)rv);
|
||||
else return; /* TODO: error */
|
||||
} else if (!SvOK(val)) {
|
||||
fustr_write(out, "null", 4);
|
||||
} else {
|
||||
/* TODO: error */
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: canonical */
|
||||
/* TODO: pretty */
|
||||
/* TODO: max depth? */
|
||||
/* TODO: threading support */
|
||||
Loading…
Add table
Add a link
Reference in a new issue