jsonfmt: Fix threading support + memory leak bug

This commit is contained in:
Yorhel 2025-01-28 19:19:18 +01:00
parent 9c8ce3f782
commit 12326ca8e4
5 changed files with 79 additions and 69 deletions

17
FU.xs
View file

@ -10,15 +10,10 @@ MODULE = FU PACKAGE = FU::XS
PROTOTYPES: DISABLE PROTOTYPES: DISABLE
SV *json_format(val) void json_format(SV *val)
SV *val
PREINIT:
SV *r;
fustr buf = {};
CODE: CODE:
fujson_fmt(&buf, val); fustr buf;
r = fustr_sv(&buf); fustr_init(&buf, 128);
SvUTF8_on(r); fujson_fmt(aTHX_ &buf, val);
RETVAL = r; ST(0) = fustr_done(&buf);
OUTPUT: SvUTF8_on(ST(0));
RETVAL

View file

@ -1,6 +1,8 @@
~($|/) ~($|/)
(^|/)\. (^|/)\.
^MANIFEST\.bak ^MANIFEST\.bak
^Makefile\.old
^Makefile$
^pm_to_blib ^pm_to_blib
^blib/ ^blib/
^FU-[^/]+/ ^FU-[^/]+/

View file

@ -1,5 +1,7 @@
#!/usr/bin/perl #!/usr/bin/perl
exit if @ARGV && @ARGV[0] eq 'bench';
# Can be invoked as: # Can be invoked as:
# ./bench.PL # (or 'make bench') generates FU/Benchmarks.pod # ./bench.PL # (or 'make bench') generates FU/Benchmarks.pod
# ./bench.PL regex # run benchmark(s) matching the regex # ./bench.PL regex # run benchmark(s) matching the regex

View file

@ -1,43 +1,57 @@
/* Custom string builder, comparable to functionality provided by SV* /* Custom string builder, should be slightly faster than using Sv* macros directly. */
* functions, but with less magic and better inlineable. */
typedef struct { typedef struct {
size_t len; SV *sv;
size_t size; char *cur;
char *buf; char *end;
} fustr; } fustr;
/* No need to call this, an empty fustr is already usable. static void fustr_init_(pTHX_ fustr *s, size_t prealloc) {
* This allows setting a custom initial size. */ s->sv = sv_2mortal(newSV(prealloc));
static void fustr_init(fustr *s, size_t prealloc) { SvPOK_only(s->sv);
s->len = 0; s->cur = SvPVX(s->sv);
s->size = prealloc; s->end = SvEND(s->sv);
s->buf = safemalloc(prealloc);
} }
static void fustr_grow(fustr *s, size_t add) { static void fustr_grow(pTHX_ fustr *s, size_t add) {
if (s->size == 0) s->size = 512; size_t off = s->cur - SvPVX(s->sv);
while (s->size < s->len + add) size_t newlen = 64;
s->size *= 2; add += off;
s->buf = saferealloc(s->buf, s->size); /* Increment to next power of two; SvGROW's default strategy is slow */
while (newlen < add) newlen <<= 1;
char *buf = SvGROW(s->sv, newlen);
s->cur = buf + off;
s->end = buf + SvLEN(s->sv);
} }
#define fustr_reserve(s, n) do {\ static inline void fustr_reserve_(pTHX_ fustr *s, size_t add) {
if (UNLIKELY((s)->size < (s)->len + (n))) fustr_grow(s, n);\ if (UNLIKELY(s->end < s->cur + add)) fustr_grow(aTHX_ s, add);
} while(0) }
#define fustr_write(s, str, n) do {\ static inline void fustr_write_(pTHX_ fustr *s, const char *str, size_t n) {
fustr_reserve(s, n);\ fustr_reserve_(aTHX_ s, n);
memcpy((s)->buf+(s)->len, str, (n));\ memcpy(s->cur, str, n);
(s)->len += (n);\ s->cur += n;
} while(0) }
/* Move the string buffer into a new SV; fustr should be considered invalid after this call. /* Adds n uninitialized bytes to the string and returns a buffer to write the data to */
* Does not set the UTF8 flag. */ static inline char *fustr_write_buf_(pTHX_ fustr *s, size_t n) {
static SV *fustr_sv(fustr *s) { fustr_reserve_(aTHX_ s, n);
SV *r = newSV(0); char *buf = s->cur;
fustr_write(s, "", 1); // trailing nul s->cur += n;
sv_usepvn_flags(r, s->buf, s->len-1, SV_HAS_TRAILING_NUL); return buf;
}
static SV *fustr_done_(pTHX_ fustr *s) {
fustr_reserve_(aTHX_ s, 1);
*s->cur = 0;
SvCUR_set(s->sv, s->cur - SvPVX(s->sv));
// TODO: SvPV_shrink_to_cur? // TODO: SvPV_shrink_to_cur?
return r; return s->sv;
} }
#define fustr_init(a,b) fustr_init_(aTHX_ a,b)
#define fustr_reserve(a,b) fustr_reserve_(aTHX_ a,b)
#define fustr_write(a,b,c) fustr_write_(aTHX_ a,b,c)
#define fustr_write_buf(a,b) fustr_write_buf_(aTHX_ a,b)
#define fustr_done(a) fustr_done_(aTHX_ a)

View file

@ -1,8 +1,9 @@
static void fujson_fmt(fustr *, SV *); static void fujson_fmt(pTHX_ fustr *, SV *);
static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) { static void fujson_fmt_str(pTHX_ fustr *out, const char *stri, size_t len, int utf8) {
size_t off = 0, loff; size_t off = 0, loff;
const unsigned char *str = (const unsigned char *)stri; const unsigned char *str = (const unsigned char *)stri;
unsigned char *buf;
unsigned char x = 0; unsigned char x = 0;
/* Validate entire string for conformance if this is flagged as a utf8 /* Validate entire string for conformance if this is flagged as a utf8
@ -35,7 +36,7 @@ static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) {
off++; off++;
} }
} }
fustr_write(out, str+loff, off-loff); fustr_write(out, (char *)str+loff, off-loff);
if (off < len) { /* early break, which means current byte needs special processing */ if (off < len) { /* early break, which means current byte needs special processing */
switch (x) { switch (x) {
@ -48,16 +49,14 @@ static void fujson_fmt_str(fustr *out, const char *stri, size_t len, int utf8) {
case 0x0d: fustr_write(out, "\\r", 2); break; case 0x0d: fustr_write(out, "\\r", 2); break;
default: default:
if (x < 0x80) { if (x < 0x80) {
fustr_reserve(out, 6); buf = (unsigned char *)fustr_write_buf(out, 6);
memcpy(out->buf+out->len, "\\u00", 4); memcpy(buf, "\\u00", 4);
out->buf[out->len+4] = PL_hexdigit[(x >> 4) & 0x0f]; buf[4] = PL_hexdigit[(x >> 4) & 0x0f];
out->buf[out->len+5] = PL_hexdigit[x & 0x0f]; buf[5] = PL_hexdigit[x & 0x0f];
out->len += 6;
} else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */ } else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */
fustr_reserve(out, 2); buf = (unsigned char *)fustr_write_buf(out, 2);
out->buf[out->len ] = 0xc0 | (x >> 6); buf[0] = 0xc0 | (x >> 6);
out->buf[out->len+1] = 0x80 | (x & 0x3f); buf[1] = 0x80 | (x & 0x3f);
out->len += 2;
} }
} }
off++; off++;
@ -80,7 +79,7 @@ static const char fujson_digits[] =
"80818283848586878889" "80818283848586878889"
"90919293949596979899"; "90919293949596979899";
static void fujson_fmt_int(fustr *out, SV *val) { static void fujson_fmt_int(pTHX_ fustr *out, SV *val) {
char buf[32]; char buf[32];
char *r = buf+31; char *r = buf+31;
int neg = 0; int neg = 0;
@ -111,19 +110,19 @@ static void fujson_fmt_int(fustr *out, SV *val) {
fustr_write(out, r, uv); fustr_write(out, r, uv);
} }
static void fujson_fmt_av(fustr *out, AV *av) { static void fujson_fmt_av(pTHX_ fustr *out, AV *av) {
int i, len = av_count(av); int i, len = av_count(av);
fustr_write(out, "[", 1); fustr_write(out, "[", 1);
for (i=0; i<len; i++) { for (i=0; i<len; i++) {
if (i) fustr_write(out, ",", 1); if (i) fustr_write(out, ",", 1);
SV **sv = av_fetch(av, i, 0); SV **sv = av_fetch(av, i, 0);
if (sv) fujson_fmt(out, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */ if (sv) fujson_fmt(aTHX_ out, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */
else fustr_write(out, "null", 4); else fustr_write(out, "null", 4);
} }
fustr_write(out, "]", 1); fustr_write(out, "]", 1);
} }
static void fujson_fmt_hv(fustr *out, HV *hv) { static void fujson_fmt_hv(pTHX_ fustr *out, HV *hv) {
HE *he; HE *he;
STRLEN helen; STRLEN helen;
char *hestr = NULL; char *hestr = NULL;
@ -133,16 +132,15 @@ static void fujson_fmt_hv(fustr *out, HV *hv) {
while ((he = hv_iternext(hv))) { while ((he = hv_iternext(hv))) {
if (hestr) fustr_write(out, ",", 1); if (hestr) fustr_write(out, ",", 1);
hestr = HePV(he, helen); hestr = HePV(he, helen);
fujson_fmt_str(out, hestr, helen, HeUTF8(he)); fujson_fmt_str(aTHX_ out, hestr, helen, HeUTF8(he));
fustr_write(out, ":", 1); fustr_write(out, ":", 1);
fujson_fmt(out, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he)); fujson_fmt(aTHX_ out, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
} }
fustr_write(out, "}", 1); fustr_write(out, "}", 1);
} }
/* BUG: Leaks *out on error, that should be on the temp stack */ static void fujson_fmt(pTHX_ fustr *out, SV *val) {
static void fujson_fmt(fustr *out, SV *val) {
SvGETMAGIC(val); SvGETMAGIC(val);
/* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need /* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need
@ -151,22 +149,22 @@ static void fujson_fmt(fustr *out, SV *val) {
if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(out, "true", 4); if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(out, "true", 4);
else fustr_write(out, "false", 5); else fustr_write(out, "false", 5);
} else if (SvPOKp(val)) { } else if (SvPOKp(val)) {
fujson_fmt_str(out, SvPVX(val), SvCUR(val), SvUTF8(val)); fujson_fmt_str(aTHX_ out, SvPVX(val), SvCUR(val), SvUTF8(val));
} else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */ } else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */
/* TODO: quadmath? */ /* TODO: quadmath? */
NV nv = SvNV_nomg(val); NV nv = SvNV_nomg(val);
if (isinfnan(nv)) return; /* TODO: error */ if (isinfnan(nv)) return; /* TODO: error */
fustr_reserve(out, NV_DIG+1); fustr_reserve(out, NV_DIG+1);
Gconvert(nv, NV_DIG, 0, out->buf + out->len); Gconvert(nv, NV_DIG, 0, out->cur);
out->len += strlen(out->buf + out->len); out->cur += strlen(out->cur);
} else if (SvIOKp(val)) { } else if (SvIOKp(val)) {
fujson_fmt_int(out, val); fujson_fmt_int(aTHX_ out, val);
} else if (SvROK(val)) { } else if (SvROK(val)) {
SV *rv = SvRV(val); SV *rv = SvRV(val);
SvGETMAGIC(rv); SvGETMAGIC(rv);
if (UNLIKELY(SvOBJECT(rv))) { /* TODO: Check for TO_JSON */ } if (UNLIKELY(SvOBJECT(rv))) { /* TODO: Check for TO_JSON */ }
else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(out, (HV *)rv); else if (SvTYPE(rv) == SVt_PVHV) fujson_fmt_hv(aTHX_ out, (HV *)rv);
else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(out, (AV *)rv); else if (SvTYPE(rv) == SVt_PVAV) fujson_fmt_av(aTHX_ out, (AV *)rv);
else return; /* TODO: error */ else return; /* TODO: error */
} else if (!SvOK(val)) { } else if (!SvOK(val)) {
fustr_write(out, "null", 4); fustr_write(out, "null", 4);
@ -178,4 +176,3 @@ static void fujson_fmt(fustr *out, SV *val) {
/* TODO: canonical */ /* TODO: canonical */
/* TODO: pretty */ /* TODO: pretty */
/* TODO: max depth? */ /* TODO: max depth? */
/* TODO: threading support */