From 2aaec6a21812a48b4e7f43b9ea00655f87bf08cf Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 8 Feb 2025 15:03:17 +0100 Subject: [PATCH] pg: Add json, jsonb, jsonpath support NOW we're really getting to the part where this module is more awesome than DBD::Pg. (When I started working on this module I was expecting that the Postgres binary protocol would send jsonb in a binary format as well and that I'd be duplicating parts of the JSON parser/formatter to make that work, but it turns out that Postgres just uses plain json for exchange. Saves me some trouble, I guess) --- c/jsonfmt.c | 78 +++++++++++++++++++++++++++-------------------------- c/pgconn.c | 2 +- c/pgtypes.c | 48 ++++++++++++++++++++++++++++----- t/pgtypes.t | 19 ++++++++++--- 4 files changed, 98 insertions(+), 49 deletions(-) diff --git a/c/jsonfmt.c b/c/jsonfmt.c index 7fd8859..bd50f9b 100644 --- a/c/jsonfmt.c +++ b/c/jsonfmt.c @@ -1,5 +1,5 @@ typedef struct { - fustr out; + fustr *out; UV depth; int canon; int pretty; /* <0 when disabled, current nesting level otherwise */ @@ -9,7 +9,7 @@ static void fujson_fmt(pTHX_ fujson_fmt_ctx *, SV *); static void fujson_fmt_indent(pTHX_ fujson_fmt_ctx *ctx) { if (ctx->pretty >= 0) { - char *buf = fustr_write_buf(&ctx->out, 1 + ctx->pretty*3); + char *buf = fustr_write_buf(ctx->out, 1 + ctx->pretty*3); *buf = '\n'; memset(buf+1, ' ', ctx->pretty*3); } @@ -30,8 +30,8 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l croak("invalid codepoint encountered in string, cannot format to JSON"); } - fustr_write_ch(&ctx->out, '\"'); - fustr_reserve(&ctx->out, len); + fustr_write_ch(ctx->out, '\"'); + fustr_reserve(ctx->out, len); while (off < len) { /* Fast path: no escaping needed */ @@ -51,25 +51,25 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l off++; } } - fustr_write(&ctx->out, (char *)str+loff, off-loff); + fustr_write(ctx->out, (char *)str+loff, off-loff); if (off < len) { /* early break, which means current byte needs special processing */ switch (x) { - case '"': fustr_write(&ctx->out, "\\\"", 2); break; - case '\\': fustr_write(&ctx->out, "\\\\", 2); break; - case 0x08: fustr_write(&ctx->out, "\\b", 2); break; - case 0x09: fustr_write(&ctx->out, "\\t", 2); break; - case 0x0a: fustr_write(&ctx->out, "\\n", 2); break; - case 0x0c: fustr_write(&ctx->out, "\\f", 2); break; - case 0x0d: fustr_write(&ctx->out, "\\r", 2); break; + case '"': fustr_write(ctx->out, "\\\"", 2); break; + case '\\': fustr_write(ctx->out, "\\\\", 2); break; + case 0x08: fustr_write(ctx->out, "\\b", 2); break; + case 0x09: fustr_write(ctx->out, "\\t", 2); break; + case 0x0a: fustr_write(ctx->out, "\\n", 2); break; + case 0x0c: fustr_write(ctx->out, "\\f", 2); break; + case 0x0d: fustr_write(ctx->out, "\\r", 2); break; default: if (x < 0x80) { - buf = (unsigned char *)fustr_write_buf(&ctx->out, 6); + buf = (unsigned char *)fustr_write_buf(ctx->out, 6); memcpy(buf, "\\u00", 4); buf[4] = PL_hexdigit[(x >> 4) & 0x0f]; buf[5] = PL_hexdigit[x & 0x0f]; } else { /* x >= 0x80, !utf8, so encode as 2-byte UTF-8 */ - buf = (unsigned char *)fustr_write_buf(&ctx->out, 2); + buf = (unsigned char *)fustr_write_buf(ctx->out, 2); buf[0] = 0xc0 | (x >> 6); buf[1] = 0x80 | (x & 0x3f); } @@ -78,7 +78,7 @@ static void fujson_fmt_str(pTHX_ fujson_fmt_ctx *ctx, const char *stri, size_t l } } - fustr_write_ch(&ctx->out, '\"'); + fustr_write_ch(ctx->out, '\"'); } static const char fujson_digits[] = @@ -109,7 +109,7 @@ static void fujson_fmt_int(pTHX_ fujson_fmt_ctx *ctx, SV *val) { } if (uv == 0) { - fustr_write_ch(&ctx->out, '0'); + fustr_write_ch(ctx->out, '0'); return; } @@ -120,23 +120,23 @@ static void fujson_fmt_int(pTHX_ fujson_fmt_ctx *ctx, SV *val) { } if (uv > 0) *(--r) = '0' + (uv % 10); if (neg) *(--r) = '-'; - fustr_write(&ctx->out, r, 31 - (r - buf)); + fustr_write(ctx->out, r, 31 - (r - buf)); } static void fujson_fmt_av(pTHX_ fujson_fmt_ctx *ctx, AV *av) { int i, len = av_count(av); - fustr_write_ch(&ctx->out, '['); + fustr_write_ch(ctx->out, '['); ctx->pretty++; for (i=0; iout, ','); + if (i) fustr_write_ch(ctx->out, ','); fujson_fmt_indent(aTHX_ ctx); SV **sv = av_fetch(av, i, 0); if (sv) fujson_fmt(aTHX_ ctx, *sv); /* sv will have magic if av is tied, but fujson_fmt() handles that. */ - else fustr_write(&ctx->out, "null", 4); + else fustr_write(ctx->out, "null", 4); } ctx->pretty--; if (i) fujson_fmt_indent(aTHX_ ctx); - fustr_write_ch(&ctx->out, ']'); + fustr_write_ch(ctx->out, ']'); } static int fujson_fmt_hvcmp(const void *pa, const void *pb) { @@ -159,12 +159,12 @@ static int fujson_fmt_hvcmp(const void *pa, const void *pb) { static void fujson_fmt_hvkv(pTHX_ fujson_fmt_ctx *ctx, HV *hv, HE *he, char **hestr) { STRLEN helen; - if (*hestr) fustr_write_ch(&ctx->out, ','); + if (*hestr) fustr_write_ch(ctx->out, ','); fujson_fmt_indent(aTHX_ ctx); *hestr = HePV(he, helen); fujson_fmt_str(aTHX_ ctx, *hestr, helen, HeUTF8(he)); - if (ctx->pretty > 0) fustr_write(&ctx->out, " : ", 3); - else fustr_write_ch(&ctx->out, ':'); + if (ctx->pretty > 0) fustr_write(ctx->out, " : ", 3); + else fustr_write_ch(ctx->out, ':'); fujson_fmt(aTHX_ ctx, UNLIKELY(SvMAGICAL(hv)) ? hv_iterval(hv, he) : HeVAL(he)); } @@ -173,7 +173,7 @@ static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) { char *hestr = NULL; int numkeys = hv_iterinit(hv); - fustr_write_ch(&ctx->out, '{'); + fustr_write_ch(ctx->out, '{'); ctx->pretty++; /* Canonical order on tied hashes is not supported. Cpanel::JSON::XS has @@ -204,7 +204,7 @@ static void fujson_fmt_hv(pTHX_ fujson_fmt_ctx *ctx, HV *hv) { } ctx->pretty--; if (hestr) fujson_fmt_indent(aTHX_ ctx); - fustr_write_ch(&ctx->out, '}'); + fustr_write_ch(ctx->out, '}'); } static void fujson_fmt_obj(pTHX_ fujson_fmt_ctx *ctx, SV *rv, SV *obj) { @@ -243,8 +243,8 @@ static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) { /* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need * to explicitly check for those; does this work in 5.36 as well? */ if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */ - if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(&ctx->out, "true", 4); - else fustr_write(&ctx->out, "false", 5); + if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(ctx->out, "true", 4); + else fustr_write(ctx->out, "false", 5); } else if (SvPOKp(val)) { fujson_fmt_str(aTHX_ ctx, SvPVX(val), SvCUR(val), SvUTF8(val)); } else if (SvNOKp(val)) { /* Must check before IOKp, because integer conversion might have been lossy */ @@ -252,9 +252,9 @@ static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) { if (isinfnan(nv)) croak("unable to format floating point NaN or Inf as JSON"); /* XXX: Cpanel::JSON::XS appears to always append a ".0" for round numbers, other modules do not. */ /* XXX#2: This doesn't support quadmath. Makefile.PL checks for that */ - fustr_reserve(&ctx->out, NV_DIG+1); - Gconvert(nv, NV_DIG, 0, ctx->out.cur); - ctx->out.cur += strlen(ctx->out.cur); + fustr_reserve(ctx->out, NV_DIG+1); + Gconvert(nv, NV_DIG, 0, ctx->out->cur); + ctx->out->cur += strlen(ctx->out->cur); } else if (SvIOKp(val)) { fujson_fmt_int(aTHX_ ctx, val); } else if (SvROK(val)) { @@ -270,7 +270,7 @@ static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) { else croak("unable to format reference '%s' as JSON", SvPV_nolen(val)); ctx->depth++; } else if (!SvOK(val)) { - fustr_write(&ctx->out, "null", 4); + fustr_write(ctx->out, "null", 4); } else { croak("unable to format unknown value '%s' as JSON", SvPV_nolen(val)); } @@ -282,9 +282,11 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) { int encutf8 = 0; char *arg; SV *r; + fustr out; fujson_fmt_ctx ctx; - ctx.out.maxlen = 0; + out.maxlen = 0; + ctx.out = &out; ctx.depth = 0; ctx.pretty = INT_MIN; ctx.canon = 0; @@ -298,17 +300,17 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) { if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r); else if (strcmp(arg, "pretty") == 0) ctx.pretty = SvPVXtrue(r) ? 0 : INT_MIN; else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r); - else if (strcmp(arg, "max_size") == 0) ctx.out.maxlen = SvUV(r); + else if (strcmp(arg, "max_size") == 0) out.maxlen = SvUV(r); else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r); else croak("Unknown flag: '%s'", arg); } - if (ctx.out.maxlen == 0) ctx.out.maxlen = 1<<30; + if (out.maxlen == 0) out.maxlen = 1<<30; if (ctx.depth == 0) ctx.depth = 512; - fustr_init(&ctx.out, sv_newmortal(), ctx.out.maxlen); + fustr_init(&out, sv_newmortal(), out.maxlen); fujson_fmt(aTHX_ &ctx, val); - if (ctx.pretty >= 0) fustr_write_ch(&ctx.out, '\n'); - r = fustr_done(&ctx.out); + if (ctx.pretty >= 0) fustr_write_ch(&out, '\n'); + r = fustr_done(&out); if (!encutf8) SvUTF8_on(r); return r; } diff --git a/c/pgconn.c b/c/pgconn.c index 94897ca..e6fd3b5 100644 --- a/c/pgconn.c +++ b/c/pgconn.c @@ -406,7 +406,7 @@ static void fupg_results_setup(pTHX_ fupg_st *st) { st->recv = safecalloc(st->nfields, sizeof(*st->recv)); if (st->stflags & FUPG_TEXT_RESULTS) { for (i=0; infields; i++) - st->recv[i].fn = fupg_recv_textfmt; + st->recv[i].fn = fupg_recv_text; return; } diff --git a/c/pgtypes.c b/c/pgtypes.c index 2c134c5..284b8c2 100644 --- a/c/pgtypes.c +++ b/c/pgtypes.c @@ -57,10 +57,6 @@ typedef struct { if (iv < min || iv > max) fu_confess("Integer %"IVdf" out of range for type '%s' (oid %u)", iv, ctx->name, ctx->oid) -RECVFN(textfmt) { - return newSVpvn_utf8(buf, len, 1); -} - RECVFN(bool) { RLEN(1); return *buf ? &PL_sv_yes : &PL_sv_no; @@ -182,6 +178,44 @@ SENDFN(float8) { fustr_write(out, (const char *)&uv, 8); } +RECVFN(json) { + fujson_parse_ctx json = { + .buf = (const unsigned char *)buf, + .end = (const unsigned char *)buf + len, + .depth = 512 + }; + SV *sv = fujson_parse(aTHX_ &json); + if (sv == NULL) fu_confess("Received invalid JSON for type '%s' (oid %u)", ctx->name, ctx->oid); + if (json.buf != json.end) fu_confess("Received invalid JSON for type '%s' (oid %u)", ctx->name, ctx->oid); + return sv; +} + +SENDFN(json) { + fujson_fmt_ctx json = { .out = out, .depth = 512, .canon = 1, .pretty = 0 }; + fujson_fmt(aTHX_ &json, val); +} + +RECVFN(jsonb) { + if (len <= 1 || *buf != 1) fu_confess("Unexpected format for type '%s' (oid %u)", ctx->name, ctx->oid); + return fupg_recv_json(aTHX_ ctx, buf+1, len-1); +} + +SENDFN(jsonb) { + fustr_write_ch(out, 1); + fupg_send_json(aTHX_ ctx, val, out); +} + +RECVFN(jsonpath) { + if (len <= 1 || *buf != 1) fu_confess("Unexpected format for type '%s' (oid %u)", ctx->name, ctx->oid); + return fupg_recv_text(aTHX_ ctx, buf+1, len-1); +} + +SENDFN(jsonpath) { + fustr_write_ch(out, 1); + fupg_send_text(aTHX_ ctx, val, out); +} + +#undef SIV #undef RLEN #undef RECVFN #undef SENDFN @@ -230,7 +264,7 @@ SENDFN(float8) { B( 28, "xid", uint4 )\ B( 29, "cid", uint4 )\ /* 30 oidvector */ \ - /* 114 json */ \ + B( 114, "json", json )\ B( 142, "xml", text )\ B( 194, "pg_node_tree", text ) /* can't be used as a bind param */\ /* 600 point */\ @@ -275,8 +309,8 @@ SENDFN(float8) { /* 3642 gtsvector, does not support binary send/recv */\ B( 3734, "regconfig", uint4 )\ B( 3769, "regdictionary", uint4 )\ - /* 3802 jsonb */\ - /* 4072 jsonpath */\ + B( 3802, "jsonb", jsonb )\ + B( 4072, "jsonpath", jsonpath)\ B( 4089, "regnamespace", uint4 )\ B( 4096, "regrole", uint4 )\ B( 4191, "regcollation", uint4 )\ diff --git a/t/pgtypes.t b/t/pgtypes.t index 070f888..39dafc8 100644 --- a/t/pgtypes.t +++ b/t/pgtypes.t @@ -21,15 +21,15 @@ sub v($type, $p_in, @args) { my $res = $conn->q("SELECT \$1::$type", $s_in)->text_params->val; ok is_bool($res), "$type $s_in is bool" if $type eq 'bool'; ok created_as_number($res), "$type $s_in is number" if $type =~ /^int/; - is_deeply $res, $p_out, "$type $s_in text->bin"; + is_deeply $res, $p_out, "$type $s_in text->bin" =~ s/\n/\\n/rg; } { my $res = $conn->q("SELECT \$1::$type", $p_in)->text_results->val; - is $res, $s_out, "$type $s_out bin->text"; + is $res, $s_out, "$type $s_out bin->text" =~ s/\n/\\n/rg; } { my $res = $conn->q("SELECT \$1::$type", $p_in)->val; - is_deeply $res, $p_out, "$type $s_in bin->bin"; + is_deeply $res, $p_out, "$type $s_in bin->bin" =~ s/\n/\\n/rg; } } sub f($type, $p_in) { @@ -77,4 +77,17 @@ f float4 => $_ for ('', 'a', '123g', []); v float8 => $_ for (0, 1234, 1.5); f float8 => $_ for ('', 'a', '123g', []); +# Limitation: There's no way to send a JSON 'null' or differentiate between that and SQL NULL. +v json => {}, undef, '{}'; +# XXX: Huh, what's causing this "pretty" formatting? +v json => [1, undef, true, "hello"], undef, qq#[\n 1,\n null,\n true,\n "hello"\n]#; +f json => \1; + +v jsonb => {}, undef, '{}'; +v jsonb => [1, undef, true, "hello"], undef, '[1, null, true, "hello"]'; +f jsonb => \1; + +v jsonpath => $_ for ('$."key"', '$."a[*]"?(@ > 2)'); +f jsonpath => $_ for ('', 'hello world'); + done_testing;