From 7d71e446d038952927707344687eede5ef208b24 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 9 Feb 2025 16:17:58 +0100 Subject: [PATCH] pg: Support array types That wasn't quite as painful as I had anticipated. \o/ --- c/pgconn.c | 102 +++++++++++++------ c/pgtypes.c | 243 +++++++++++++++++++++++++++++++++++++++++--- t/pgtypes-dynamic.t | 3 + t/pgtypes.t | 23 ++++- 4 files changed, 323 insertions(+), 48 deletions(-) diff --git a/c/pgconn.c b/c/pgconn.c index 606207c..a5e46a6 100644 --- a/c/pgconn.c +++ b/c/pgconn.c @@ -270,31 +270,40 @@ static void fupg_refresh_types(pTHX_ fupg_conn *c) { c->types = 0; c->ntypes = 0; - const char *sql = "SELECT oid, typname, typtype FROM pg_type ORDER BY oid"; + const char *sql = + "SELECT oid, typname, typtype" + ", CASE WHEN typcategory = 'A' THEN typelem ELSE 0 END" + " FROM pg_type" + " ORDER BY oid"; PGresult *r = PQexecParams(c->conn, sql, 0, NULL, NULL, NULL, NULL, 1); if (!r) fupg_conn_croak(c, "exec"); if (PQresultStatus(r) != PGRES_TUPLES_OK) fupg_result_croak(r, "exec", sql); c->ntypes = PQntuples(r); - c->types = calloc(c->ntypes, sizeof(*c->types)); + c->types = safecalloc(c->ntypes, sizeof(*c->types)); int i; for (i=0; intypes; i++) { fupg_type *t = c->types + i; t->oid = __builtin_bswap32(*((Oid *)PQgetvalue(r, i, 0))); snprintf(t->name, sizeof(t->name), "%s", PQgetvalue(r, i, 1)); char typ = *PQgetvalue(r, i, 2); + t->elemoid = __builtin_bswap32(*((Oid *)PQgetvalue(r, i, 3))); - /* enum, can use text send/recv */ - if (typ == 'e') { + if (t->elemoid) { + /* array */ + t->send = fupg_send_array; + t->recv = fupg_recv_array; + } else if (typ == 'e') { + /* enum, can use text send/recv */ t->send = fupg_send_text; t->recv = fupg_recv_text; - continue; - } - /* TODO: Array types, records, custom overrides, by-name lookup for dynamic-oid types */ - const fupg_type *builtin = fupg_builtin_byoid(t->oid); - if (builtin) { - t->send = builtin->send; - t->recv = builtin->recv; + } else { + /* TODO: records, (multi)ranges, custom overrides, by-name lookup for dynamic-oid types */ + const fupg_type *builtin = fupg_builtin_byoid(t->oid); + if (builtin) { + t->send = builtin->send; + t->recv = builtin->recv; + } } } PQclear(r); @@ -437,6 +446,27 @@ static void fupg_st_check_dupcols(pTHX_ PGresult *r) { SvREFCNT_dec((SV *)hv); } +static void fupg_params_send(pTHX_ fupg_st *st, Oid oid, SV *val, fustr *out, int *refresh_done) { + fupg_send send, elem; + const fupg_type *t = fupg_lookup_type(aTHX_ st->conn, refresh_done, oid); + if (!t) fu_confess("No type found with oid %u", oid); + if (!t->send) fu_confess("Unable to use type '%s' (oid %u) as bind parameter", t->name, t->oid); + send.oid = oid; + send.name = t->name; + send.fn = t->send; + if (t->send == fupg_send_array) { + if (!t->elemoid) fu_confess("Type '%s' (oid %u) is marked as an array type, but element type is unknown", t->name, t->oid); + const fupg_type *e = fupg_lookup_type(aTHX_ st->conn, refresh_done, t->elemoid); + if (!e) fu_confess("No type found with oid %u", t->elemoid); + send.arrayelem = &elem; + elem.oid = e->oid; + elem.name = e->name; + elem.fn = e->send; + assert(e->send != fupg_send_array); /* TODO: might as well fix this, we'll need recursion for record types anyway */ + } + send.fn(aTHX_ &send, val, out); +} + static void fupg_params_setup(pTHX_ fupg_st *st, int *refresh_done) { int i; st->param_values = safecalloc(st->nbind, sizeof(*st->param_values)); @@ -456,15 +486,8 @@ static void fupg_params_setup(pTHX_ fupg_st *st, int *refresh_done) { st->param_values[i] = NULL; continue; } - fupg_send send; - send.oid = PQparamtype(st->describe, i); - const fupg_type *t = fupg_lookup_type(aTHX_ st->conn, refresh_done, send.oid); - if (!t) fu_confess("No type found with oid %u", send.oid); - if (!t->send) fu_confess("Unable to use type '%s' (oid %u) as bind parameter", t->name, t->oid); - send.name = t->name; - send.fn = t->send; off = fustr_len(buf); - send.fn(aTHX_ &send, st->bind[i], buf); + fupg_params_send(aTHX_ st, PQparamtype(st->describe, i), st->bind[i], buf, refresh_done); st->param_lengths[i] = fustr_len(buf) - off; st->param_formats[i] = 1; st->param_values[i] = ""; @@ -480,23 +503,32 @@ static void fupg_params_setup(pTHX_ fupg_st *st, int *refresh_done) { } } -static void fupg_results_setup(pTHX_ fupg_st *st, int *refresh_done) { - int i; - st->recv = safecalloc(st->nfields, sizeof(*st->recv)); +static void fupg_recv_setup(pTHX_ fupg_st *st, fupg_recv *r, Oid oid, int *refresh_done) { + r->oid = oid; if (st->stflags & FUPG_TEXT_RESULTS) { - for (i=0; infields; i++) - st->recv[i].fn = fupg_recv_text; + r->name = "{textfmt}"; + r->fn = fupg_recv_text; return; } - for (i=0; infields; i++) { - fupg_recv *r = st->recv + i; - r->oid = PQftype(st->result, i); - const fupg_type *t = fupg_lookup_type(aTHX_ st->conn, refresh_done, r->oid); - if (!t) fu_confess("No type found with oid %u", r->oid); - if (!t->recv) fu_confess("Unable to receive data of type '%s' (oid %u)", t->name, t->oid); - r->name = t->name; - r->fn = t->recv; + const fupg_type *t = fupg_lookup_type(aTHX_ st->conn, refresh_done, r->oid); + if (!t) fu_confess("No type found with oid %u", r->oid); + if (!t->recv) fu_confess("Unable to receive data of type '%s' (oid %u)", t->name, t->oid); + r->name = t->name; + r->fn = t->recv; + + if (r->fn == fupg_recv_array) { + if (!t->elemoid) fu_confess("Type '%s' (oid %u) is marked as an array type, but element type is unknown", t->name, t->oid); + r->arrayelem = safecalloc(1, sizeof(*r->arrayelem)); + fupg_recv_setup(aTHX_ st, r->arrayelem, t->elemoid, refresh_done); + } +} + +static void fupg_recv_free(fupg_recv *r) { + if (!r) return; + if (r->fn == fupg_recv_array) { + fupg_recv_free(r->arrayelem); + safefree(r->arrayelem); } } @@ -539,7 +571,10 @@ static void fupg_st_execute(pTHX_ fupg_st *st) { st->result = r; st->nfields = PQnfields(r); - fupg_results_setup(aTHX_ st, &refresh_done); + st->recv = safecalloc(st->nfields, sizeof(*st->recv)); + int i; + for (i=0; infields; i++) + fupg_recv_setup(aTHX_ st, st->recv + i, PQftype(st->result, i), &refresh_done); } static SV *fupg_st_getval(pTHX_ fupg_st *st, int row, int col) { @@ -618,6 +653,7 @@ static void fupg_st_destroy(fupg_st *st) { safefree(st->param_values); safefree(st->param_lengths); safefree(st->param_formats); + if (st->recv) for (i=0; infields; i++) fupg_recv_free(st->recv + i); safefree(st->recv); PQclear(st->describe); PQclear(st->result); diff --git a/c/pgtypes.c b/c/pgtypes.c index ac5b2d0..968ef5d 100644 --- a/c/pgtypes.c +++ b/c/pgtypes.c @@ -14,16 +14,23 @@ struct fupg_send { Oid oid; const char *name; fupg_send_fn fn; + union { + fupg_send *arrayelem; + }; }; struct fupg_recv { Oid oid; const char *name; fupg_recv_fn fn; + union { + fupg_recv *arrayelem; + }; }; typedef struct { Oid oid; + Oid elemoid; /* For arrays */ char name[64]; fupg_send_fn send; fupg_recv_fn recv; @@ -216,6 +223,150 @@ SENDFN(jsonpath) { fupg_send_text(aTHX_ ctx, val, out); } + +#define ARRAY_MAXDIM 100 + +static SV *fupg_recv_array_elem(pTHX_ const fupg_recv *elem, U32 *header, U32 dim, U32 ndim, const char **buf, int *buflen) { + SV *r; + if (dim == ndim) { + if (*buflen < 4) fu_confess("Invalid array format"); + U32 len; + memcpy(&len, *buf, 4); /* Buffer is not necessarily aligned at this point */ + len = __builtin_bswap32(len); + *buf += 4; + *buflen -= 4; + + r = &PL_sv_undef; + if (len != (U32)-1) { + if ((U32)*buflen < len) fu_confess("Invalid array format"); + r = elem->fn(aTHX_ elem, *buf, len); + *buf += len; + *buflen -= len; + } + + } else { + U32 n = __builtin_bswap32(header[dim*2]); + AV *av = newAV_alloc_x(n); + r = sv_2mortal(newRV_noinc((SV *)av)); + U32 i; + for (i=0; iname, ctx->oid); + U32 *header = (U32 *)buf; + U32 ndim = __builtin_bswap32(header[0]); + // header[1] is hasnull, can safely ignore + Oid elemtype = __builtin_bswap32(header[2]); + if (elemtype != ctx->arrayelem->oid) + fu_confess("Array type '%s' (oid %u) expected elements of type %u but got %u", ctx->name, ctx->oid, ctx->arrayelem->oid, elemtype); + + if (ndim == 0) return newRV_noinc((SV *)newAV()); + if (ndim > ARRAY_MAXDIM) fu_confess("Array value of type '%s' (oid %u) has too many dimensions (%d)", ctx->name, ctx->oid, ndim); + if ((U32)len < 12 + ndim*8) fu_confess("Invalid array format for type '%s' (oid %u)", ctx->name, ctx->oid); + + buf += 12 + ndim * 8; + len -= 12 + ndim * 8; + header = header + 3; + + /* Arrays need to be created as temporaries so that they are cleaned up + * when the elem recv function croaks. Their refcnt is increased on + * successful return. */ + ENTER; + SAVETMPS; + SV *r = fupg_recv_array_elem(aTHX_ ctx->arrayelem, header, 0, ndim, &buf, &len); + FREETMPS; + LEAVE; + return r; +} + +void fupg_send_array_elem(aTHX_ const fupg_send *elem, const U32 *dims, U32 dim, U32 ndim, SV *v, fustr *out, int *hasnull) { + SvGETMAGIC(v); + if (dim == ndim) { + if (!SvOK(v)) { + fustr_write(out, "\xff\xff\xff\xff", 4); + *hasnull = 1; + return; + } + size_t lenoff = fustr_len(out); + fustr_write(out, "\0\0\0\0", 4); + elem->fn(elem, v, out); + U32 len = __builtin_bswap32(fustr_len(out) - lenoff - 4); + memcpy(fustr_start(out)+lenoff, &len, 4); + return; + } + + if (!SvROK(v)) fu_confess("Invalid array structure in bind parameter"); + v = SvRV(v); + SvGETMAGIC(v); + if (SvTYPE(v) != SVt_PVAV) fu_confess("Invalid array structure in bind parameter"); + AV *av = (AV*)v; + if (av_count(av) != dims[dim]) fu_confess("Invalid array structure in bind parameter"); + U32 i; + for (i=0; i= ARRAY_MAXDIM) fu_confess("Maximum depth exceeded while attempting to send array of type '%s' (oid %u)", ctx->name, ctx->oid); + dims[ndim] = av_count((AV*)v); + if (ndim > 0 && dims[ndim] == 0) fu_confess("Empty dimension while attempting to send array of type '%s' (oid %u)", ctx->name, ctx->oid); + ndim++; + SV **sv = av_fetch((AV*)v, 0, 0); + if (!sv || !*sv) break; + v = *sv; + } + if (ndim == 0) fu_confess("Type '%s' (oid %u) expects an array", ctx->name, ctx->oid); + if (dims[0] == 0) ndim = 0; + + /* Write header */ + U32 tmp = __builtin_bswap32(ndim); + fustr_write(out, (const char *)&tmp, 4); + fustr_write(out, "\0\0\0\0", 4); /* Placeholder for isnull */ + size_t hasnull_off = fustr_len(out) - 1; + tmp = __builtin_bswap32(ctx->arrayelem->oid); + fustr_write(out, (const char *)&tmp, 4); + U32 i; + for (i=0; ioid == 22 || ctx->oid == 30) fustr_write(out, "\0\0\0\0", 4); + else fustr_write(out, "\0\0\0\1", 4); + } + if (ndim == 0) return; + + /* write the elements */ + int hasnull = 0; + fupg_send_array_elem(aTHX_ ctx->arrayelem, dims, 0, ndim, val, out, &hasnull); + if (hasnull) fustr_start(out)[hasnull_off] = 1; +} + +#undef ARRAY_MAXDIM + #undef SIV #undef RLEN #undef RECVFN @@ -227,18 +378,16 @@ SENDFN(jsonpath) { /* List of types we handle directly in this module. Ideally, this includes everything returned by: - select oid, typname, typreceive, typsend - from pg_type - where typtype = 'b' - and typnamespace = 'pg_catalog'::regnamespace - and typinput != 'array_in'::regproc - order by oid + SELECT oid, typname, typelem, typreceive + FROM pg_type t + WHERE typtype = 'b' + AND typnamespace = 'pg_catalog'::regnamespace + AND (typelem = 0 OR EXISTS(SELECT 1 FROM pg_type e WHERE e.oid = t.typelem AND e.typtype = 'b')) + ORDER by oid + (i.e. all base types and arrays of base types) Plus hopefully a bunch of common extension types. - Arrays, records and enums can be handled with generic code. - TODO: pre-seed this list with common array types. - The "reg#" types are a bit funny: the Postgres devs obviously realized that writing JOINs is cumbersome, so they hacked together a numeric identifier type that automatically resolves to a string when formatted as text, or @@ -256,7 +405,7 @@ SENDFN(jsonpath) { B( 19, "name", text )\ B( 20, "int8", int8 )\ B( 21, "int2", int2 )\ - /* 22 int2vector */ \ + A( 22, "int2vector", 21 )\ B( 23, "int4", int4 )\ B( 24, "regproc", uint4 )\ B( 25, "text", text )\ @@ -264,10 +413,13 @@ SENDFN(jsonpath) { /* 27 tid: u32 block, u16 offset; represent as hash? */ \ B( 28, "xid", uint4 )\ B( 29, "cid", uint4 )\ - /* 30 oidvector */ \ + A( 30, "oidvector", 26 )\ B( 114, "json", json )\ B( 142, "xml", text )\ + A( 143, "_xml", 142 )\ B( 194, "pg_node_tree", text ) /* can't be used as a bind param */\ + A( 199, "_json", 114 )\ + A( 271, "_xid8", 5069 )\ /* 600 point */\ /* 601 lseg */\ /* 602 path */\ @@ -275,56 +427,119 @@ SENDFN(jsonpath) { /* 604 polygon */\ /* 628 line */\ /* 650 cidr */\ + A( 629, "_line", 628 )\ + A( 651, "_cidr", 650 )\ B( 700, "float4", float4)\ B( 701, "float8", float8)\ /* 718 circle */\ + A( 719, "_circle", 718 )\ /* 774 macaddr8 */\ + A( 775, "_macaddr8", 774 )\ /* 790 money */\ + A( 791, "_money", 790 )\ /* 829 macaddr */\ /* 869 inet */\ - /* 1033 aclitem, does not support binary send/recv */\ + A( 1000, "_bool", 16 )\ + A( 1001, "_bytea", 17 )\ + A( 1002, "_char", 18 )\ + A( 1003, "_name", 19 )\ + A( 1005, "_int2", 21 )\ + A( 1006, "_int2vector", 22 )\ + A( 1007, "_int4", 23 )\ + A( 1008, "_regproc", 24 )\ + A( 1009, "_text", 25 )\ + A( 1010, "_tid", 27 )\ + A( 1011, "_xid", 28 )\ + A( 1012, "_cid", 29 )\ + A( 1013, "_oidvector", 30 )\ + A( 1014, "_bpchar", 1042 )\ + A( 1015, "_varchar", 1043 )\ + A( 1016, "_int8", 20 )\ + A( 1017, "_point", 600 )\ + A( 1018, "_lseg", 601 )\ + A( 1019, "_path", 602 )\ + A( 1020, "_box", 603 )\ + A( 1021, "_float4", 700 )\ + A( 1022, "_float8", 701 )\ + A( 1027, "_polygon", 604 )\ + A( 1028, "_oid", 26 )\ + /* 1033 aclitem, does not support send/recv */\ + /* A( 1034, "_aclitem", 1033 ) */\ + A( 1040, "_macaddr", 829 )\ + A( 1041, "_inet", 869 )\ B( 1042, "bpchar", text )\ B( 1043, "varchar", text )\ /* 1082 date */\ /* 1083 time */\ + A( 1115, "_timestamp", 1114 )\ /* 1114 timestamp */\ + A( 1182, "_date", 1082 )\ + A( 1183, "_time", 1083 )\ /* 1184 timestamptz */\ + A( 1185, "_timestamptz", 1184 )\ /* 1186 interval */\ + A( 1187, "_interval", 1186 )\ + A( 1231, "_numeric", 1700 )\ /* 1266 timetz */\ + A( 1270, "_timetz", 1266 )\ /* 1560 bit */\ + A( 1561, "_bit", 1560 )\ /* 1562 varbit */\ + A( 1563, "_varbit", 1562 )\ /* 1700 numeric */\ B( 1790, "refcursor", text )\ + A( 2201, "_refcursor", 1790 )\ B( 2202, "regprocedure", uint4 )\ B( 2203, "regoper", uint4 )\ B( 2204, "regoperator", uint4 )\ B( 2205, "regclass", uint4 )\ B( 2206, "regtype", uint4 )\ + A( 2207, "_regprocedure", 2202 )\ + A( 2208, "_regoper", 2203 )\ + A( 2209, "_regoperator", 2204 )\ + A( 2210, "_regclass", 2205 )\ + A( 2211, "_regtype", 2206 )\ + A( 2949, "_txid_snapshot", 2970 )\ /* 2950 uuid */\ + A( 2951, "_uuid", 2950 )\ /* 2970 txid_snapshot */\ /* 3220 pg_lsn */\ + A( 3221, "_pg_lsn", 3220 )\ /* 3361 pg_ndistinct */\ /* 3402 pg_dependencies */\ /* 3614 tsvector */\ /* 3615 tsquery */\ - /* 3642 gtsvector, does not support binary send/recv */\ + /* 3642 gtsvector, does not support send/recv */\ + A( 3643, "_tsvector", 3614 )\ + /*A( 3644, "_gtsvector", 3642 )*/\ + A( 3645, "_tsquery", 3615 )\ B( 3734, "regconfig", uint4 )\ + A( 3735, "_regconfig", 3734 )\ B( 3769, "regdictionary", uint4 )\ + A( 3770, "_regdictionary", 3769 )\ B( 3802, "jsonb", jsonb )\ + A( 3807, "_jsonb", 3802 )\ B( 4072, "jsonpath", jsonpath)\ + A( 4073, "_jsonpath", 4072 )\ B( 4089, "regnamespace", uint4 )\ + A( 4090, "_regnamespace", 4089 )\ B( 4096, "regrole", uint4 )\ + A( 4097, "_regrole", 4096 )\ B( 4191, "regcollation", uint4 )\ + A( 4192, "_regcollation", 4191 )\ /* 4600 pg_brin_bloom_summary */\ /* 4601 pg_brin_minmax_multi_summary */\ /* 5017 pg_mcv_list */\ /* 5038 pg_snapshot */\ + A( 5039, "_pg_snapshot", 5038 )\ /* 5069 xid8 */ static const fupg_type fupg_builtin[] = { -#define B(oid, name, fun) { oid, name"\0", fupg_send_##fun, fupg_recv_##fun }, +#define B(oid, name, fun) { oid, 0, name"\0", fupg_send_##fun, fupg_recv_##fun }, +#define A(oid, name, eoid) { oid, eoid, name"\0", fupg_send_array, fupg_recv_array }, BUILTINS #undef B +#undef A }; #undef BUILTINS diff --git a/t/pgtypes-dynamic.t b/t/pgtypes-dynamic.t index fddcda8..ae996de 100644 --- a/t/pgtypes-dynamic.t +++ b/t/pgtypes-dynamic.t @@ -15,6 +15,9 @@ like $@, qr/Unable to use type/; $txn->exec("CREATE TYPE fupg_test_enum AS ENUM('a', 'b', 'ccccccccccccccccccc')"); is $txn->q("SELECT 'a'::fupg_test_enum")->val, 'a'; is $txn->q('SELECT $1::fupg_test_enum', 'ccccccccccccccccccc')->val, 'ccccccccccccccccccc'; + + is_deeply $txn->q("SELECT '{a,b,null}'::fupg_test_enum[]")->val, ['a','b',undef]; + is $txn->q('SELECT $1::fupg_test_enum[]', ['a','b',undef])->text_results->val, '{a,b,NULL}'; } done_testing; diff --git a/t/pgtypes.t b/t/pgtypes.t index 50a1cd7..bf57c6c 100644 --- a/t/pgtypes.t +++ b/t/pgtypes.t @@ -22,7 +22,7 @@ sub v($type, $p_in, @args) { { my $res = $conn->q("SELECT \$1::$type", $s_in)->text_params->val; ok is_bool($res), "$test is bool" if $type eq 'bool'; - ok created_as_number($res), "$test is number" if $type =~ /^(int|float)/; + ok created_as_number($res), "$test is number" if $type =~ /^(int|float)\d/; is_deeply $res, $p_out, "$test text->bin"; } { @@ -94,4 +94,25 @@ f jsonb => \1; v jsonpath => $_ for ('$."key"', '$."a[*]"?(@ > 2)'); f jsonpath => $_ for ('', 'hello world'); +v 'int[]', [], undef, '{}'; +v 'int[]', [1], undef, '{1}'; +v 'int[]', [1,-3,undef,3], undef, '{1,-3,NULL,3}'; +v 'int[]', [[1],[2],[3]], undef, '{{1},{2},{3}}'; +v 'int[]', [[1,-1],[2,-2],[undef,undef],[3,-3]], undef, '{{1,-1},{2,-2},{NULL,NULL},{3,-3}}'; +v 'bool[]', [[[[true]]],[[[undef]]]], undef, '{{{{t}}},{{{NULL}}}}'; +v 'text[]', ["\x{1234}"], undef, "{\x{1234}}"; + +f 'int[]', [[1],undef]; +f 'int[]', [[1],[2,3]]; +f 'int[]', [[]]; +f 'oidvector', [undef]; + +# Example from https://www.postgresql.org/docs/17/arrays.html#ARRAYS-IO +# Lower bounds are discarded. +is_deeply $conn->q("SELECT '[1:1][-2:-1][3:5]={{{1,2,3},{4,5,6}}}'::int[]")->val, [[[1,2,3],[4,5,6]]]; + +is $conn->q('SELECT ($1::int2[])[2]', [1,2,3,4])->val, 2; +is $conn->q('SELECT ($1::int2vector)[1]', [1,2,3,4])->val, 2; +is $conn->q('SELECT ($1::oidvector)[1]', [1,2,3,4])->val, 2; + done_testing;