static void fujson_fmt(fustr *, SV *); static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) { size_t off = 0, loff; const unsigned char *str = (const unsigned char *)stri; unsigned char x = 0; /* Validate entire string for conformance if this is flagged as a utf8 * string, this lets us be lazy further on. * Commenting this out doubles the performance for formatting unicode * strings, I suspect there's room for optimizations in * is_c9strict_utf8_string(). */ if (utf8 && !is_c9strict_utf8_string(str, len)) { return; /* TODO: Throw error. */ } fustr_write(out, "\"", 1); fustr_reserve(out, len); while (off < len) { /* Fast path: no escaping needed */ loff = off; if (utf8) { /* we already validated everything >=0x80 */ while (off < len) { x = str[off]; if (x <= 0x1f || x == '"' || x == '\\' || x == 0x7f) break; off++; } } else { /* binary strings need special handling for >=0x80 */ while (off < len) { x = str[off]; if (x <= 0x1f || x == '"' || x == '\\' || x >= 0x7f) break; off++; } } fustr_write(out, str+loff, off-loff); if (off < len) { /* early break, which means current byte needs special processing */ switch (x) { case '"': fustr_write(out, "\\\"", 2); break; case '\\': fustr_write(out, "\\\\", 2); break; case 0x08: fustr_write(out, "\\b", 2); break; case 0x09: fustr_write(out, "\\t", 2); break; case 0x0a: fustr_write(out, "\\n", 2); break; case 0x0c: fustr_write(out, "\\f", 2); break; case 0x0d: fustr_write(out, "\\r", 2); break; default: if (x < 0x80) { fustr_reserve(out, 6); memcpy(out->buf+out->len, "\\u00", 4); out->buf[out->len+4] = PL_hexdigit[(x >> 4) & 0x0f]; out->buf[out->len+5] = PL_hexdigit[x & 0x0f]; out->len += 6; } else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */ fustr_reserve(out, 2); out->buf[out->len ] = 0xc0 | (x >> 6); out->buf[out->len+1] = 0x80 | (x & 0x3f); out->len += 2; } } off++; } } fustr_write(out, "\"", 1); } /* All digits between 0 and 100, a trick I borrowed from the Zig stdlib. */ static const char fujson_digits[] = "00010203040506070809" "10111213141516171819" "20212223242526272829" "30313233343536373839" "40414243444546474849" "50515253545556575859" "60616263646566676869" "70717273747576777879" "80818283848586878889" "90919293949596979899"; static void fujson_fmt_int(fustr *out, SV *val) { char buf[32]; char *r = buf+31; int neg = 0; IV iv; UV uv; if (SvIsUV(val)) { /* Why is this macro not documented? */ uv = SvUV_nomg(val); } else { iv = SvIV_nomg(val); neg = iv < 0; uv = neg ? -iv : iv; } if (uv == 0) { fustr_write(out, "0", 1); return; } while (uv >= 10) { r -= 2; memcpy(r, fujson_digits + ((uv % 100)<<1), 2); uv /= 100; } if (uv > 0) *(--r) = '0' + (uv % 10); if (neg) *(--r) = '-'; uv = 31 - (r - buf); fustr_write(out, r, uv); } static void fujson_fmt_av(fustr *out, AV *av) { int i, len = av_count(av); fustr_write(out, "[", 1); for (i=0; ibuf + out->len); out->len += strlen(out->buf + out->len); } else if (SvIOKp(val)) { fujson_fmt_int(out, val); } else if (SvROK(val)) { SV *rv = SvRV(val); SvGETMAGIC(rv); if (UNLIKELY(SvOBJECT(rv))) { /* TODO: Check for TO_JSON */ } else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(out, (HV *)rv); else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(out, (AV *)rv); else return; /* TODO: error */ } else if (!SvOK(val)) { fustr_write(out, "null", 4); } else { /* TODO: error */ } } /* TODO: canonical */ /* TODO: pretty */ /* TODO: max depth? */ /* TODO: threading support */