pg: Add json, jsonb, jsonpath support

NOW we're really getting to the part where this module is more awesome
than DBD::Pg.

(When I started working on this module I was expecting that the Postgres
binary protocol would send jsonb in a binary format as well and that I'd
be duplicating parts of the JSON parser/formatter to make that work, but
it turns out that Postgres just uses plain json for exchange. Saves me
some trouble, I guess)
This commit is contained in:
Yorhel 2025-02-08 15:03:17 +01:00
parent 7f1c48e0cf
commit 2aaec6a218
4 changed files with 98 additions and 49 deletions

View file

@ -1,5 +1,5 @@
typedef struct { typedef struct {
fustr out; fustr *out;
UV depth; UV depth;
int canon; int canon;
int pretty; /* <0 when disabled, current nesting level otherwise */ int pretty; /* <0 when disabled, current nesting level otherwise */
@ -9,7 +9,7 @@ static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *);
static void fujson_fmt_indent(pTHX_ fujson_fmt_ctx *ctx) { static void fujson_fmt_indent(pTHX_ fujson_fmt_ctx *ctx) {
if (ctx->pretty >= 0) { if (ctx->pretty >= 0) {
char *buf = fustr_write_buf(&ctx->out, 1 + ctx->pretty*3); char *buf = fustr_write_buf(ctx->out, 1 + ctx->pretty*3);
*buf = '\n'; *buf = '\n';
memset(buf+1, ' ', ctx->pretty*3); memset(buf+1, ' ', ctx->pretty*3);
} }
@ -30,8 +30,8 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l
croak("invalid codepoint encountered in string, cannot format to JSON"); croak("invalid codepoint encountered in string, cannot format to JSON");
} }
fustr_write_ch(&ctx->out, '\"'); fustr_write_ch(ctx->out, '\"');
fustr_reserve(&ctx->out, len); fustr_reserve(ctx->out, len);
while (off < len) { while (off < len) {
/* Fast path: no escaping needed */ /* Fast path: no escaping needed */
@ -51,25 +51,25 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l
off++; off++;
} }
} }
fustr_write(&ctx->out, (char *)str+loff, off-loff); fustr_write(ctx->out, (char *)str+loff, off-loff);
if (off < len) { /* early break, which means current byte needs special processing */ if (off < len) { /* early break, which means current byte needs special processing */
switch (x) { switch (x) {
case '"': fustr_write(&ctx->out, "\\\"", 2); break; case '"': fustr_write(ctx->out, "\\\"", 2); break;
case '\\': fustr_write(&ctx->out, "\\\\", 2); break; case '\\': fustr_write(ctx->out, "\\\\", 2); break;
case 0x08: fustr_write(&ctx->out, "\\b", 2); break; case 0x08: fustr_write(ctx->out, "\\b", 2); break;
case 0x09: fustr_write(&ctx->out, "\\t", 2); break; case 0x09: fustr_write(ctx->out, "\\t", 2); break;
case 0x0a: fustr_write(&ctx->out, "\\n", 2); break; case 0x0a: fustr_write(ctx->out, "\\n", 2); break;
case 0x0c: fustr_write(&ctx->out, "\\f", 2); break; case 0x0c: fustr_write(ctx->out, "\\f", 2); break;
case 0x0d: fustr_write(&ctx->out, "\\r", 2); break; case 0x0d: fustr_write(ctx->out, "\\r", 2); break;
default: default:
if (x < 0x80) { if (x < 0x80) {
buf = (unsigned char *)fustr_write_buf(&ctx->out, 6); buf = (unsigned char *)fustr_write_buf(ctx->out, 6);
memcpy(buf, "\\u00", 4); memcpy(buf, "\\u00", 4);
buf[4] = PL_hexdigit[(x >> 4) & 0x0f]; buf[4] = PL_hexdigit[(x >> 4) & 0x0f];
buf[5] = PL_hexdigit[x & 0x0f]; buf[5] = PL_hexdigit[x & 0x0f];
} else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */ } else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */
buf = (unsigned char *)fustr_write_buf(&ctx->out, 2); buf = (unsigned char *)fustr_write_buf(ctx->out, 2);
buf[0] = 0xc0 | (x >> 6); buf[0] = 0xc0 | (x >> 6);
buf[1] = 0x80 | (x & 0x3f); buf[1] = 0x80 | (x & 0x3f);
} }
@ -78,7 +78,7 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l
} }
} }
fustr_write_ch(&ctx->out, '\"'); fustr_write_ch(ctx->out, '\"');
} }
static const char fujson_digits[] = static const char fujson_digits[] =
@ -109,7 +109,7 @@ static void fujson_fmt_int(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
} }
if (uv == 0) { if (uv == 0) {
fustr_write_ch(&ctx->out, '0'); fustr_write_ch(ctx->out, '0');
return; return;
} }
@ -120,23 +120,23 @@ static void fujson_fmt_int(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
} }
if (uv > 0) *(--r) = '0' + (uv % 10); if (uv > 0) *(--r) = '0' + (uv % 10);
if (neg) *(--r) = '-'; if (neg) *(--r) = '-';
fustr_write(&ctx->out, r, 31 - (r - buf)); fustr_write(ctx->out, r, 31 - (r - buf));
} }
static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) { static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) {
int i, len = av_count(av); int i, len = av_count(av);
fustr_write_ch(&ctx->out, '['); fustr_write_ch(ctx->out, '[');
ctx->pretty++; ctx->pretty++;
for (i=0; i<len; i++) { for (i=0; i<len; i++) {
if (i) fustr_write_ch(&ctx->out, ','); if (i) fustr_write_ch(ctx->out, ',');
fujson_fmt_indent(aTHX_ ctx); fujson_fmt_indent(aTHX_ ctx);
SV **sv = av_fetch(av, i, 0); SV **sv = av_fetch(av, i, 0);
if (sv) fujson_fmt(aTHX_ ctx, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */ if (sv) fujson_fmt(aTHX_ ctx, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */
else fustr_write(&ctx->out, "null", 4); else fustr_write(ctx->out, "null", 4);
} }
ctx->pretty--; ctx->pretty--;
if (i) fujson_fmt_indent(aTHX_ ctx); if (i) fujson_fmt_indent(aTHX_ ctx);
fustr_write_ch(&ctx->out, ']'); fustr_write_ch(ctx->out, ']');
} }
static int fujson_fmt_hvcmp(const void *pa, const void *pb) { static int fujson_fmt_hvcmp(const void *pa, const void *pb) {
@ -159,12 +159,12 @@ static int fujson_fmt_hvcmp(const void *pa, const void *pb) {
static void fujson_fmt_hvkv(pTHX_ fujson_fmt_ctx *ctx, HV *hv, HE *he, char **hestr) { static void fujson_fmt_hvkv(pTHX_ fujson_fmt_ctx *ctx, HV *hv, HE *he, char **hestr) {
STRLEN helen; STRLEN helen;
if (*hestr) fustr_write_ch(&ctx->out, ','); if (*hestr) fustr_write_ch(ctx->out, ',');
fujson_fmt_indent(aTHX_ ctx); fujson_fmt_indent(aTHX_ ctx);
*hestr = HePV(he, helen); *hestr = HePV(he, helen);
fujson_fmt_str(aTHX_ ctx, *hestr, helen, HeUTF8(he)); fujson_fmt_str(aTHX_ ctx, *hestr, helen, HeUTF8(he));
if (ctx->pretty > 0) fustr_write(&ctx->out, " : ", 3); if (ctx->pretty > 0) fustr_write(ctx->out, " : ", 3);
else fustr_write_ch(&ctx->out, ':'); else fustr_write_ch(ctx->out, ':');
fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he)); fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he));
} }
@ -173,7 +173,7 @@ static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) {
char *hestr = NULL; char *hestr = NULL;
int numkeys = hv_iterinit(hv); int numkeys = hv_iterinit(hv);
fustr_write_ch(&ctx->out, '{'); fustr_write_ch(ctx->out, '{');
ctx->pretty++; ctx->pretty++;
/* Canonical order on tied hashes is not supported. Cpanel::JSON::XS has /* Canonical order on tied hashes is not supported. Cpanel::JSON::XS has
@ -204,7 +204,7 @@ static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) {
} }
ctx->pretty--; ctx->pretty--;
if (hestr) fujson_fmt_indent(aTHX_ ctx); if (hestr) fujson_fmt_indent(aTHX_ ctx);
fustr_write_ch(&ctx->out, '}'); fustr_write_ch(ctx->out, '}');
} }
static void fujson_fmt_obj(pTHX_ fujson_fmt_ctx *ctx, SV *rv, SV *obj) { static void fujson_fmt_obj(pTHX_ fujson_fmt_ctx *ctx, SV *rv, SV *obj) {
@ -243,8 +243,8 @@ static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
/* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need /* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need
* to explicitly check for those; does this work in 5.36 as well? */ * to explicitly check for those; does this work in 5.36 as well? */
if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */ if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */
if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(&ctx->out, "true", 4); if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(ctx->out, "true", 4);
else fustr_write(&ctx->out, "false", 5); else fustr_write(ctx->out, "false", 5);
} else if (SvPOKp(val)) { } else if (SvPOKp(val)) {
fujson_fmt_str(aTHX_ ctx, SvPVX(val), SvCUR(val), SvUTF8(val)); fujson_fmt_str(aTHX_ ctx, SvPVX(val), SvCUR(val), SvUTF8(val));
} else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */ } else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */
@ -252,9 +252,9 @@ static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
if (isinfnan(nv)) croak("unable to format floating point NaN or Inf as JSON"); if (isinfnan(nv)) croak("unable to format floating point NaN or Inf as JSON");
/* XXX: Cpanel::JSON::XS appears to always append a ".0" for round numbers, other modules do not. */ /* XXX: Cpanel::JSON::XS appears to always append a ".0" for round numbers, other modules do not. */
/* XXX#2: This doesn't support quadmath. Makefile.PL checks for that */ /* XXX#2: This doesn't support quadmath. Makefile.PL checks for that */
fustr_reserve(&ctx->out, NV_DIG+1); fustr_reserve(ctx->out, NV_DIG+1);
Gconvert(nv, NV_DIG, 0, ctx->out.cur); Gconvert(nv, NV_DIG, 0, ctx->out->cur);
ctx->out.cur += strlen(ctx->out.cur); ctx->out->cur += strlen(ctx->out->cur);
} else if (SvIOKp(val)) { } else if (SvIOKp(val)) {
fujson_fmt_int(aTHX_ ctx, val); fujson_fmt_int(aTHX_ ctx, val);
} else if (SvROK(val)) { } else if (SvROK(val)) {
@ -270,7 +270,7 @@ static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
else croak("unable to format reference '%s' as JSON", SvPV_nolen(val)); else croak("unable to format reference '%s' as JSON", SvPV_nolen(val));
ctx->depth++; ctx->depth++;
} else if (!SvOK(val)) { } else if (!SvOK(val)) {
fustr_write(&ctx->out, "null", 4); fustr_write(ctx->out, "null", 4);
} else { } else {
croak("unable to format unknown value '%s' as JSON", SvPV_nolen(val)); croak("unable to format unknown value '%s' as JSON", SvPV_nolen(val));
} }
@ -282,9 +282,11 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
int encutf8 = 0; int encutf8 = 0;
char *arg; char *arg;
SV *r; SV *r;
fustr out;
fujson_fmt_ctx ctx; fujson_fmt_ctx ctx;
ctx.out.maxlen = 0; out.maxlen = 0;
ctx.out = &out;
ctx.depth = 0; ctx.depth = 0;
ctx.pretty = INT_MIN; ctx.pretty = INT_MIN;
ctx.canon = 0; ctx.canon = 0;
@ -298,17 +300,17 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) {
if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r); if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r);
else if (strcmp(arg, "pretty") == 0) ctx.pretty = SvPVXtrue(r) ? 0 : INT_MIN; else if (strcmp(arg, "pretty") == 0) ctx.pretty = SvPVXtrue(r) ? 0 : INT_MIN;
else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r); else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r);
else if (strcmp(arg, "max_size") == 0) ctx.out.maxlen = SvUV(r); else if (strcmp(arg, "max_size") == 0) out.maxlen = SvUV(r);
else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r); else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r);
else croak("Unknown flag: '%s'", arg); else croak("Unknown flag: '%s'", arg);
} }
if (ctx.out.maxlen == 0) ctx.out.maxlen = 1<<30; if (out.maxlen == 0) out.maxlen = 1<<30;
if (ctx.depth == 0) ctx.depth = 512; if (ctx.depth == 0) ctx.depth = 512;
fustr_init(&ctx.out, sv_newmortal(), ctx.out.maxlen); fustr_init(&out, sv_newmortal(), out.maxlen);
fujson_fmt(aTHX_ &ctx, val); fujson_fmt(aTHX_ &ctx, val);
if (ctx.pretty >= 0) fustr_write_ch(&ctx.out, '\n'); if (ctx.pretty >= 0) fustr_write_ch(&out, '\n');
r = fustr_done(&ctx.out); r = fustr_done(&out);
if (!encutf8) SvUTF8_on(r); if (!encutf8) SvUTF8_on(r);
return r; return r;
} }

View file

@ -406,7 +406,7 @@ static void fupg_results_setup(pTHX_ fupg_st *st) {
st->recv = safecalloc(st->nfields, sizeof(*st->recv)); st->recv = safecalloc(st->nfields, sizeof(*st->recv));
if (st->stflags & FUPG_TEXT_RESULTS) { if (st->stflags & FUPG_TEXT_RESULTS) {
for (i=0; i<st->nfields; i++) for (i=0; i<st->nfields; i++)
st->recv[i].fn = fupg_recv_textfmt; st->recv[i].fn = fupg_recv_text;
return; return;
} }

View file

@ -57,10 +57,6 @@ typedef struct {
if (iv < min || iv > max) fu_confess("Integer %"IVdf" out of range for type '%s' (oid %u)", iv, ctx->name, ctx->oid) if (iv < min || iv > max) fu_confess("Integer %"IVdf" out of range for type '%s' (oid %u)", iv, ctx->name, ctx->oid)
RECVFN(textfmt) {
return newSVpvn_utf8(buf, len, 1);
}
RECVFN(bool) { RECVFN(bool) {
RLEN(1); RLEN(1);
return *buf ? &PL_sv_yes : &PL_sv_no; return *buf ? &PL_sv_yes : &PL_sv_no;
@ -182,6 +178,44 @@ SENDFN(float8) {
fustr_write(out, (const char *)&uv, 8); fustr_write(out, (const char *)&uv, 8);
} }
RECVFN(json) {
fujson_parse_ctx json = {
.buf = (const unsigned char *)buf,
.end = (const unsigned char *)buf + len,
.depth = 512
};
SV *sv = fujson_parse(aTHX_ &json);
if (sv == NULL) fu_confess("Received invalid JSON for type '%s' (oid %u)", ctx->name, ctx->oid);
if (json.buf != json.end) fu_confess("Received invalid JSON for type '%s' (oid %u)", ctx->name, ctx->oid);
return sv;
}
SENDFN(json) {
fujson_fmt_ctx json = { .out = out, .depth = 512, .canon = 1, .pretty = 0 };
fujson_fmt(aTHX_ &json, val);
}
RECVFN(jsonb) {
if (len <= 1 || *buf != 1) fu_confess("Unexpected format for type '%s' (oid %u)", ctx->name, ctx->oid);
return fupg_recv_json(aTHX_ ctx, buf+1, len-1);
}
SENDFN(jsonb) {
fustr_write_ch(out, 1);
fupg_send_json(aTHX_ ctx, val, out);
}
RECVFN(jsonpath) {
if (len <= 1 || *buf != 1) fu_confess("Unexpected format for type '%s' (oid %u)", ctx->name, ctx->oid);
return fupg_recv_text(aTHX_ ctx, buf+1, len-1);
}
SENDFN(jsonpath) {
fustr_write_ch(out, 1);
fupg_send_text(aTHX_ ctx, val, out);
}
#undef SIV
#undef RLEN #undef RLEN
#undef RECVFN #undef RECVFN
#undef SENDFN #undef SENDFN
@ -230,7 +264,7 @@ SENDFN(float8) {
B( 28, "xid", uint4 )\ B( 28, "xid", uint4 )\
B( 29, "cid", uint4 )\ B( 29, "cid", uint4 )\
/* 30 oidvector */ \ /* 30 oidvector */ \
/* 114 json */ \ B( 114, "json", json )\
B( 142, "xml", text )\ B( 142, "xml", text )\
B( 194, "pg_node_tree", text ) /* can't be used as a bind param */\ B( 194, "pg_node_tree", text ) /* can't be used as a bind param */\
/* 600 point */\ /* 600 point */\
@ -275,8 +309,8 @@ SENDFN(float8) {
/* 3642 gtsvector, does not support binary send/recv */\ /* 3642 gtsvector, does not support binary send/recv */\
B( 3734, "regconfig", uint4 )\ B( 3734, "regconfig", uint4 )\
B( 3769, "regdictionary", uint4 )\ B( 3769, "regdictionary", uint4 )\
/* 3802 jsonb */\ B( 3802, "jsonb", jsonb )\
/* 4072 jsonpath */\ B( 4072, "jsonpath", jsonpath)\
B( 4089, "regnamespace", uint4 )\ B( 4089, "regnamespace", uint4 )\
B( 4096, "regrole", uint4 )\ B( 4096, "regrole", uint4 )\
B( 4191, "regcollation", uint4 )\ B( 4191, "regcollation", uint4 )\

View file

@ -21,15 +21,15 @@ sub v($type, $p_in, @args) {
my $res = $conn->q("SELECT \$1::$type", $s_in)->text_params->val; my $res = $conn->q("SELECT \$1::$type", $s_in)->text_params->val;
ok is_bool($res), "$type $s_in is bool" if $type eq 'bool'; ok is_bool($res), "$type $s_in is bool" if $type eq 'bool';
ok created_as_number($res), "$type $s_in is number" if $type =~ /^int/; ok created_as_number($res), "$type $s_in is number" if $type =~ /^int/;
is_deeply $res, $p_out, "$type $s_in text->bin"; is_deeply $res, $p_out, "$type $s_in text->bin" =~ s/\n/\\n/rg;
} }
{ {
my $res = $conn->q("SELECT \$1::$type", $p_in)->text_results->val; my $res = $conn->q("SELECT \$1::$type", $p_in)->text_results->val;
is $res, $s_out, "$type $s_out bin->text"; is $res, $s_out, "$type $s_out bin->text" =~ s/\n/\\n/rg;
} }
{ {
my $res = $conn->q("SELECT \$1::$type", $p_in)->val; my $res = $conn->q("SELECT \$1::$type", $p_in)->val;
is_deeply $res, $p_out, "$type $s_in bin->bin"; is_deeply $res, $p_out, "$type $s_in bin->bin" =~ s/\n/\\n/rg;
} }
} }
sub f($type, $p_in) { sub f($type, $p_in) {
@ -77,4 +77,17 @@ f float4 => $_ for ('', 'a', '123g', []);
v float8 => $_ for (0, 1234, 1.5); v float8 => $_ for (0, 1234, 1.5);
f float8 => $_ for ('', 'a', '123g', []); f float8 => $_ for ('', 'a', '123g', []);
# Limitation: There's no way to send a JSON 'null' or differentiate between that and SQL NULL.
v json => {}, undef, '{}';
# XXX: Huh, what's causing this "pretty" formatting?
v json => [1, undef, true, "hello"], undef, qq#[\n 1,\n null,\n true,\n "hello"\n]#;
f json => \1;
v jsonb => {}, undef, '{}';
v jsonb => [1, undef, true, "hello"], undef, '[1, null, true, "hello"]';
f jsonb => \1;
v jsonpath => $_ for ('$."key"', '$."a[*]"?(@ > 2)');
f jsonpath => $_ for ('', 'hello world');
done_testing; done_testing;