fu/c/jsonfmt.c

178 lines
5.9 KiB
C

static void fujson_fmt(pTHX_ fustr *, SV *);
static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int utf8) {
size_t off = 0, loff;
const unsigned char *str = (const unsigned char *)stri;
unsigned char *buf;
unsigned char x = 0;
/* Validate entire string for conformance if this is flagged as a utf8
* string, this lets us be lazy further on.
* Commenting this out doubles the performance for formatting unicode
* strings, I suspect there's room for optimizations in
* is_c9strict_utf8_string(). */
if (utf8 && !is_c9strict_utf8_string(str, len)) {
return; /* TODO: Throw error. */
}
fustr_write(out, "\"", 1);
fustr_reserve(out, len);
while (off < len) {
/* Fast path: no escaping needed */
loff = off;
if (utf8) {
/* we already validated everything >=0x80 */
while (off < len) {
x = str[off];
if (x <= 0x1f || x == '"' || x == '\\' || x == 0x7f) break;
off++;
}
} else {
/* binary strings need special handling for >=0x80 */
while (off < len) {
x = str[off];
if (x <= 0x1f || x == '"' || x == '\\' || x >= 0x7f) break;
off++;
}
}
fustr_write(out, (char *)str+loff, off-loff);
if (off < len) { /* early break, which means current byte needs special processing */
switch (x) {
case '"': fustr_write(out, "\\\"", 2); break;
case '\\': fustr_write(out, "\\\\", 2); break;
case 0x08: fustr_write(out, "\\b", 2); break;
case 0x09: fustr_write(out, "\\t", 2); break;
case 0x0a: fustr_write(out, "\\n", 2); break;
case 0x0c: fustr_write(out, "\\f", 2); break;
case 0x0d: fustr_write(out, "\\r", 2); break;
default:
if (x < 0x80) {
buf = (unsigned char *)fustr_write_buf(out, 6);
memcpy(buf, "\\u00", 4);
buf[4] = PL_hexdigit[(x >> 4) & 0x0f];
buf[5] = PL_hexdigit[x & 0x0f];
} else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */
buf = (unsigned char *)fustr_write_buf(out, 2);
buf[0] = 0xc0 | (x >> 6);
buf[1] = 0x80 | (x & 0x3f);
}
}
off++;
}
}
fustr_write(out, "\"", 1);
}
/* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */
static const char fujson_digits[] =
"00010203040506070809"
"10111213141516171819"
"20212223242526272829"
"30313233343536373839"
"40414243444546474849"
"50515253545556575859"
"60616263646566676869"
"70717273747576777879"
"80818283848586878889"
"90919293949596979899";
static void fujson_fmt_int(pTHX_ fustr *out, SV *val) {
char buf[32];
char *r = buf+31;
int neg = 0;
IV iv;
UV uv;
if (SvIsUV(val)) { /* Why is this macro not documented? */
uv = SvUV_nomg(val);
} else {
iv = SvIV_nomg(val);
neg = iv < 0;
uv = neg ? -iv : iv;
}
if (uv == 0) {
fustr_write(out, "0", 1);
return;
}
while (uv >= 10) {
r -= 2;
memcpy(r, fujson_digits + ((uv % 100)<<1), 2);
uv /= 100;
}
if (uv > 0) *(--r) = '0' + (uv % 10);
if (neg) *(--r) = '-';
uv = 31 - (r - buf);
fustr_write(out, r, uv);
}
static void fujson_fmt_av(pTHX_ fustr *out, AV *av) {
int i, len = av_count(av);
fustr_write(out, "[", 1);
for (i=0; i<len; i++) {
if (i) fustr_write(out, ",", 1);
SV **sv = av_fetch(av, i, 0);
if (sv) fujson_fmt(aTHX_ out, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */
else fustr_write(out, "null", 4);
}
fustr_write(out, "]", 1);
}
static void fujson_fmt_hv(pTHX_ fustr *out, HV *hv) {
HE *he;
STRLEN helen;
char *hestr = NULL;
hv_iterinit(hv);
fustr_write(out, "{", 1);
while ((he = hv_iternext(hv))) {
if (hestr) fustr_write(out, ",", 1);
hestr = HePV(he, helen);
fujson_fmt_str(aTHX_ out, hestr, helen, HeUTF8(he));
fustr_write(out, ":", 1);
fujson_fmt(aTHX_ out, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
}
fustr_write(out, "}", 1);
}
static void fujson_fmt(pTHX_ fustr *out, SV *val) {
SvGETMAGIC(val);
/* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need
* to explicitly check for those; does this work in 5.36 as well? */
if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */
if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(out, "true", 4);
else fustr_write(out, "false", 5);
} else if (SvPOKp(val)) {
fujson_fmt_str(aTHX_ out, SvPVX(val), SvCUR(val), SvUTF8(val));
} else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */
/* TODO: quadmath? */
NV nv = SvNV_nomg(val);
if (isinfnan(nv)) return; /* TODO: error */
fustr_reserve(out, NV_DIG+1);
Gconvert(nv, NV_DIG, 0, out->cur);
out->cur += strlen(out->cur);
} else if (SvIOKp(val)) {
fujson_fmt_int(aTHX_ out, val);
} else if (SvROK(val)) {
SV *rv = SvRV(val);
SvGETMAGIC(rv);
if (UNLIKELY(SvOBJECT(rv))) { /* TODO: Check for TO_JSON */ }
else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(aTHX_ out, (HV *)rv);
else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(aTHX_ out, (AV *)rv);
else return; /* TODO: error */
} else if (!SvOK(val)) {
fustr_write(out, "null", 4);
} else {
/* TODO: error */
}
}
/* TODO: canonical */
/* TODO: pretty */
/* TODO: max depth? */