static void fujson_fmt(pTHX_ fustr *, SV *); static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int utf8) { size_t off = 0, loff; const unsigned char *str = (const unsigned char *)stri; unsigned char *buf; unsigned char x = 0; /* Validate entire string for conformance if this is flagged as a utf8 * string, this lets us be lazy further on. * Commenting this out doubles the performance for formatting unicode * strings, I suspect there's room for optimizations in * is_c9strict_utf8_string(). */ if (utf8 && !is_c9strict_utf8_string(str, len)) { croak("invalid codepoint encountered in string, cannot format to JSON"); } fustr_write(out, "\"", 1); fustr_reserve(out, len); while (off < len) { /* Fast path: no escaping needed */ loff = off; if (utf8) { /* we already validated everything >=0x80 */ while (off < len) { x = str[off]; if (x <= 0x1f || x == '"' || x == '\\' || x == 0x7f) break; off++; } } else { /* binary strings need special handling for >=0x80 */ while (off < len) { x = str[off]; if (x <= 0x1f || x == '"' || x == '\\' || x >= 0x7f) break; off++; } } fustr_write(out, (char *)str+loff, off-loff); if (off < len) { /* early break, which means current byte needs special processing */ switch (x) { case '"': fustr_write(out, "\\\"", 2); break; case '\\': fustr_write(out, "\\\\", 2); break; case 0x08: fustr_write(out, "\\b", 2); break; case 0x09: fustr_write(out, "\\t", 2); break; case 0x0a: fustr_write(out, "\\n", 2); break; case 0x0c: fustr_write(out, "\\f", 2); break; case 0x0d: fustr_write(out, "\\r", 2); break; default: if (x < 0x80) { buf = (unsigned char *)fustr_write_buf(out, 6); memcpy(buf, "\\u00", 4); buf[4] = PL_hexdigit[(x >> 4) & 0x0f]; buf[5] = PL_hexdigit[x & 0x0f]; } else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */ buf = (unsigned char *)fustr_write_buf(out, 2); buf[0] = 0xc0 | (x >> 6); buf[1] = 0x80 | (x & 0x3f); } } off++; } } fustr_write(out, "\"", 1); } /* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */ static const char fujson_digits[] = "00010203040506070809" "10111213141516171819" "20212223242526272829" "30313233343536373839" "40414243444546474849" "50515253545556575859" "60616263646566676869" "70717273747576777879" "80818283848586878889" "90919293949596979899"; static void fujson_fmt_int(pTHX_ fustr *out, SV *val) { char buf[32]; char *r = buf+31; int neg = 0; IV iv; UV uv; if (SvIsUV(val)) { /* Why is this macro not documented? */ uv = SvUV_nomg(val); } else { iv = SvIV_nomg(val); neg = iv < 0; uv = neg ? -iv : iv; } if (uv == 0) { fustr_write(out, "0", 1); return; } while (uv >= 10) { r -= 2; memcpy(r, fujson_digits + ((uv % 100)<<1), 2); uv /= 100; } if (uv > 0) *(--r) = '0' + (uv % 10); if (neg) *(--r) = '-'; uv = 31 - (r - buf); fustr_write(out, r, uv); } static void fujson_fmt_av(pTHX_ fustr *out, AV *av) { int i, len = av_count(av); fustr_write(out, "[", 1); for (i=0; icur); out->cur += strlen(out->cur); } else if (SvIOKp(val)) { fujson_fmt_int(aTHX_ out, val); } else if (SvROK(val)) { SV *rv = SvRV(val); SvGETMAGIC(rv); if (UNLIKELY(SvOBJECT(rv))) { /* TODO: Check for TO_JSON */ } else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(aTHX_ out, (HV *)rv); else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(aTHX_ out, (AV *)rv); else croak("unable to format reference '%s' as JSON", SvPV_nolen(val)); } else if (!SvOK(val)) { fustr_write(out, "null", 4); } else { croak("unable to format unknown value '%s' as JSON", SvPV_nolen(val)); } } /* TODO: canonical */ /* TODO: pretty */ /* TODO: max depth? */