pg: Support array types

That wasn't quite as painful as I had anticipated. \o/
This commit is contained in:
Yorhel 2025-02-09 16:17:58 +01:00
parent b6517cf05a
commit 7d71e446d0
4 changed files with 323 additions and 48 deletions

View file

@ -270,31 +270,40 @@ static void fupg_refresh_types(pTHX_ fupg_conn *c) {
c->types = 0;
c->ntypes = 0;
const char *sql = "SELECT oid, typname, typtype FROM pg_type ORDER BY oid";
const char *sql =
"SELECT oid, typname, typtype"
", CASE WHEN typcategory = 'A' THEN typelem ELSE 0 END"
" FROM pg_type"
" ORDER BY oid";
PGresult *r = PQexecParams(c->conn, sql, 0, NULL, NULL, NULL, NULL, 1);
if (!r) fupg_conn_croak(c, "exec");
if (PQresultStatus(r) != PGRES_TUPLES_OK) fupg_result_croak(r, "exec", sql);
c->ntypes = PQntuples(r);
c->types = calloc(c->ntypes, sizeof(*c->types));
c->types = safecalloc(c->ntypes, sizeof(*c->types));
int i;
for (i=0; i<c->ntypes; i++) {
fupg_type *t = c->types + i;
t->oid = __builtin_bswap32(*((Oid *)PQgetvalue(r, i, 0)));
snprintf(t->name, sizeof(t->name), "%s", PQgetvalue(r, i, 1));
char typ = *PQgetvalue(r, i, 2);
t->elemoid = __builtin_bswap32(*((Oid *)PQgetvalue(r, i, 3)));
/* enum, can use text send/recv */
if (typ == 'e') {
if (t->elemoid) {
/* array */
t->send = fupg_send_array;
t->recv = fupg_recv_array;
} else if (typ == 'e') {
/* enum, can use text send/recv */
t->send = fupg_send_text;
t->recv = fupg_recv_text;
continue;
}
/* TODO: Array types, records, custom overrides, by-name lookup for dynamic-oid types */
const fupg_type *builtin = fupg_builtin_byoid(t->oid);
if (builtin) {
t->send = builtin->send;
t->recv = builtin->recv;
} else {
/* TODO: records, (multi)ranges, custom overrides, by-name lookup for dynamic-oid types */
const fupg_type *builtin = fupg_builtin_byoid(t->oid);
if (builtin) {
t->send = builtin->send;
t->recv = builtin->recv;
}
}
}
PQclear(r);
@ -437,6 +446,27 @@ static void fupg_st_check_dupcols(pTHX_ PGresult *r) {
SvREFCNT_dec((SV *)hv);
}
static void fupg_params_send(pTHX_ fupg_st *st, Oid oid, SV *val, fustr *out, int *refresh_done) {
fupg_send send, elem;
const fupg_type *t = fupg_lookup_type(aTHX_ st->conn, refresh_done, oid);
if (!t) fu_confess("No type found with oid %u", oid);
if (!t->send) fu_confess("Unable to use type '%s' (oid %u) as bind parameter", t->name, t->oid);
send.oid = oid;
send.name = t->name;
send.fn = t->send;
if (t->send == fupg_send_array) {
if (!t->elemoid) fu_confess("Type '%s' (oid %u) is marked as an array type, but element type is unknown", t->name, t->oid);
const fupg_type *e = fupg_lookup_type(aTHX_ st->conn, refresh_done, t->elemoid);
if (!e) fu_confess("No type found with oid %u", t->elemoid);
send.arrayelem = &elem;
elem.oid = e->oid;
elem.name = e->name;
elem.fn = e->send;
assert(e->send != fupg_send_array); /* TODO: might as well fix this, we'll need recursion for record types anyway */
}
send.fn(aTHX_ &send, val, out);
}
static void fupg_params_setup(pTHX_ fupg_st *st, int *refresh_done) {
int i;
st->param_values = safecalloc(st->nbind, sizeof(*st->param_values));
@ -456,15 +486,8 @@ static void fupg_params_setup(pTHX_ fupg_st *st, int *refresh_done) {
st->param_values[i] = NULL;
continue;
}
fupg_send send;
send.oid = PQparamtype(st->describe, i);
const fupg_type *t = fupg_lookup_type(aTHX_ st->conn, refresh_done, send.oid);
if (!t) fu_confess("No type found with oid %u", send.oid);
if (!t->send) fu_confess("Unable to use type '%s' (oid %u) as bind parameter", t->name, t->oid);
send.name = t->name;
send.fn = t->send;
off = fustr_len(buf);
send.fn(aTHX_ &send, st->bind[i], buf);
fupg_params_send(aTHX_ st, PQparamtype(st->describe, i), st->bind[i], buf, refresh_done);
st->param_lengths[i] = fustr_len(buf) - off;
st->param_formats[i] = 1;
st->param_values[i] = "";
@ -480,23 +503,32 @@ static void fupg_params_setup(pTHX_ fupg_st *st, int *refresh_done) {
}
}
static void fupg_results_setup(pTHX_ fupg_st *st, int *refresh_done) {
int i;
st->recv = safecalloc(st->nfields, sizeof(*st->recv));
static void fupg_recv_setup(pTHX_ fupg_st *st, fupg_recv *r, Oid oid, int *refresh_done) {
r->oid = oid;
if (st->stflags & FUPG_TEXT_RESULTS) {
for (i=0; i<st->nfields; i++)
st->recv[i].fn = fupg_recv_text;
r->name = "{textfmt}";
r->fn = fupg_recv_text;
return;
}
for (i=0; i<st->nfields; i++) {
fupg_recv *r = st->recv + i;
r->oid = PQftype(st->result, i);
const fupg_type *t = fupg_lookup_type(aTHX_ st->conn, refresh_done, r->oid);
if (!t) fu_confess("No type found with oid %u", r->oid);
if (!t->recv) fu_confess("Unable to receive data of type '%s' (oid %u)", t->name, t->oid);
r->name = t->name;
r->fn = t->recv;
const fupg_type *t = fupg_lookup_type(aTHX_ st->conn, refresh_done, r->oid);
if (!t) fu_confess("No type found with oid %u", r->oid);
if (!t->recv) fu_confess("Unable to receive data of type '%s' (oid %u)", t->name, t->oid);
r->name = t->name;
r->fn = t->recv;
if (r->fn == fupg_recv_array) {
if (!t->elemoid) fu_confess("Type '%s' (oid %u) is marked as an array type, but element type is unknown", t->name, t->oid);
r->arrayelem = safecalloc(1, sizeof(*r->arrayelem));
fupg_recv_setup(aTHX_ st, r->arrayelem, t->elemoid, refresh_done);
}
}
static void fupg_recv_free(fupg_recv *r) {
if (!r) return;
if (r->fn == fupg_recv_array) {
fupg_recv_free(r->arrayelem);
safefree(r->arrayelem);
}
}
@ -539,7 +571,10 @@ static void fupg_st_execute(pTHX_ fupg_st *st) {
st->result = r;
st->nfields = PQnfields(r);
fupg_results_setup(aTHX_ st, &refresh_done);
st->recv = safecalloc(st->nfields, sizeof(*st->recv));
int i;
for (i=0; i<st->nfields; i++)
fupg_recv_setup(aTHX_ st, st->recv + i, PQftype(st->result, i), &refresh_done);
}
static SV *fupg_st_getval(pTHX_ fupg_st *st, int row, int col) {
@ -618,6 +653,7 @@ static void fupg_st_destroy(fupg_st *st) {
safefree(st->param_values);
safefree(st->param_lengths);
safefree(st->param_formats);
if (st->recv) for (i=0; i<st->nfields; i++) fupg_recv_free(st->recv + i);
safefree(st->recv);
PQclear(st->describe);
PQclear(st->result);

View file

@ -14,16 +14,23 @@ struct fupg_send {
Oid oid;
const char *name;
fupg_send_fn fn;
union {
fupg_send *arrayelem;
};
};
struct fupg_recv {
Oid oid;
const char *name;
fupg_recv_fn fn;
union {
fupg_recv *arrayelem;
};
};
typedef struct {
Oid oid;
Oid elemoid; /* For arrays */
char name[64];
fupg_send_fn send;
fupg_recv_fn recv;
@ -216,6 +223,150 @@ SENDFN(jsonpath) {
fupg_send_text(aTHX_ ctx, val, out);
}
#define ARRAY_MAXDIM 100
static SV *fupg_recv_array_elem(pTHX_ const fupg_recv *elem, U32 *header, U32 dim, U32 ndim, const char **buf, int *buflen) {
SV *r;
if (dim == ndim) {
if (*buflen < 4) fu_confess("Invalid array format");
U32 len;
memcpy(&len, *buf, 4); /* Buffer is not necessarily aligned at this point */
len = __builtin_bswap32(len);
*buf += 4;
*buflen -= 4;
r = &PL_sv_undef;
if (len != (U32)-1) {
if ((U32)*buflen < len) fu_confess("Invalid array format");
r = elem->fn(aTHX_ elem, *buf, len);
*buf += len;
*buflen -= len;
}
} else {
U32 n = __builtin_bswap32(header[dim*2]);
AV *av = newAV_alloc_x(n);
r = sv_2mortal(newRV_noinc((SV *)av));
U32 i;
for (i=0; i<n; i++)
av_push_simple(av, fupg_recv_array_elem(aTHX_ elem, header, dim+1, ndim, buf, buflen));
SvREFCNT_inc(r); /* We're done here, make sure it survives the mortal stack cleanup */
}
return r;
}
RECVFN(array) {
if (len < 12) fu_confess("Invalid array format for type '%s' (oid %u)", ctx->name, ctx->oid);
U32 *header = (U32 *)buf;
U32 ndim = __builtin_bswap32(header[0]);
// header[1] is hasnull, can safely ignore
Oid elemtype = __builtin_bswap32(header[2]);
if (elemtype != ctx->arrayelem->oid)
fu_confess("Array type '%s' (oid %u) expected elements of type %u but got %u", ctx->name, ctx->oid, ctx->arrayelem->oid, elemtype);
if (ndim == 0) return newRV_noinc((SV *)newAV());
if (ndim > ARRAY_MAXDIM) fu_confess("Array value of type '%s' (oid %u) has too many dimensions (%d)", ctx->name, ctx->oid, ndim);
if ((U32)len < 12 + ndim*8) fu_confess("Invalid array format for type '%s' (oid %u)", ctx->name, ctx->oid);
buf += 12 + ndim * 8;
len -= 12 + ndim * 8;
header = header + 3;
/* Arrays need to be created as temporaries so that they are cleaned up
* when the elem recv function croaks. Their refcnt is increased on
* successful return. */
ENTER;
SAVETMPS;
SV *r = fupg_recv_array_elem(aTHX_ ctx->arrayelem, header, 0, ndim, &buf, &len);
FREETMPS;
LEAVE;
return r;
}
void fupg_send_array_elem(aTHX_ const fupg_send *elem, const U32 *dims, U32 dim, U32 ndim, SV *v, fustr *out, int *hasnull) {
SvGETMAGIC(v);
if (dim == ndim) {
if (!SvOK(v)) {
fustr_write(out, "\xff\xff\xff\xff", 4);
*hasnull = 1;
return;
}
size_t lenoff = fustr_len(out);
fustr_write(out, "\0\0\0\0", 4);
elem->fn(elem, v, out);
U32 len = __builtin_bswap32(fustr_len(out) - lenoff - 4);
memcpy(fustr_start(out)+lenoff, &len, 4);
return;
}
if (!SvROK(v)) fu_confess("Invalid array structure in bind parameter");
v = SvRV(v);
SvGETMAGIC(v);
if (SvTYPE(v) != SVt_PVAV) fu_confess("Invalid array structure in bind parameter");
AV *av = (AV*)v;
if (av_count(av) != dims[dim]) fu_confess("Invalid array structure in bind parameter");
U32 i;
for (i=0; i<dims[dim]; i++) {
SV **sv = av_fetch(av, i, 0);
if (!sv || !*sv) fu_confess("Invalid array structure in bind parameter");
fupg_send_array_elem(aTHX_ elem, dims, dim+1, ndim, *sv, out, hasnull);
}
}
SENDFN(array) {
U32 ndim = 0;
U32 dims[ARRAY_MAXDIM];
/* First figure out ndim and length-per-dim. The has-null flag and
* verification that each array-per-dimension has the same length is done
* while writing the elements.
* This is prone to errors if the elem type also accepts arrays as input,
* not quite sure how to deal with that case. */
SV *v = val;
while (true) {
SvGETMAGIC(v);
if (!SvROK(v)) break;
v = SvRV(v);
SvGETMAGIC(v);
if (SvTYPE(v) != SVt_PVAV) break;
if (ndim >= ARRAY_MAXDIM) fu_confess("Maximum depth exceeded while attempting to send array of type '%s' (oid %u)", ctx->name, ctx->oid);
dims[ndim] = av_count((AV*)v);
if (ndim > 0 && dims[ndim] == 0) fu_confess("Empty dimension while attempting to send array of type '%s' (oid %u)", ctx->name, ctx->oid);
ndim++;
SV **sv = av_fetch((AV*)v, 0, 0);
if (!sv || !*sv) break;
v = *sv;
}
if (ndim == 0) fu_confess("Type '%s' (oid %u) expects an array", ctx->name, ctx->oid);
if (dims[0] == 0) ndim = 0;
/* Write header */
U32 tmp = __builtin_bswap32(ndim);
fustr_write(out, (const char *)&tmp, 4);
fustr_write(out, "\0\0\0\0", 4); /* Placeholder for isnull */
size_t hasnull_off = fustr_len(out) - 1;
tmp = __builtin_bswap32(ctx->arrayelem->oid);
fustr_write(out, (const char *)&tmp, 4);
U32 i;
for (i=0; i<ndim; i++) {
tmp = __builtin_bswap32(dims[i]);
fustr_write(out, (const char *)&tmp, 4);
/* int2vector and oidvector expect 0-based indexing,
* everything else defaults to 1-based indexing. */
if (ctx->oid == 22 || ctx->oid == 30) fustr_write(out, "\0\0\0\0", 4);
else fustr_write(out, "\0\0\0\1", 4);
}
if (ndim == 0) return;
/* write the elements */
int hasnull = 0;
fupg_send_array_elem(aTHX_ ctx->arrayelem, dims, 0, ndim, val, out, &hasnull);
if (hasnull) fustr_start(out)[hasnull_off] = 1;
}
#undef ARRAY_MAXDIM
#undef SIV
#undef RLEN
#undef RECVFN
@ -227,18 +378,16 @@ SENDFN(jsonpath) {
/* List of types we handle directly in this module.
Ideally, this includes everything returned by:
select oid, typname, typreceive, typsend
from pg_type
where typtype = 'b'
and typnamespace = 'pg_catalog'::regnamespace
and typinput != 'array_in'::regproc
order by oid
SELECT oid, typname, typelem, typreceive
FROM pg_type t
WHERE typtype = 'b'
AND typnamespace = 'pg_catalog'::regnamespace
AND (typelem = 0 OR EXISTS(SELECT 1 FROM pg_type e WHERE e.oid = t.typelem AND e.typtype = 'b'))
ORDER by oid
(i.e. all base types and arrays of base types)
Plus hopefully a bunch of common extension types.
Arrays, records and enums can be handled with generic code.
TODO: pre-seed this list with common array types.
The "reg#" types are a bit funny: the Postgres devs obviously realized that
writing JOINs is cumbersome, so they hacked together a numeric identifier
type that automatically resolves to a string when formatted as text, or
@ -256,7 +405,7 @@ SENDFN(jsonpath) {
B( 19, "name", text )\
B( 20, "int8", int8 )\
B( 21, "int2", int2 )\
/* 22 int2vector */ \
A( 22, "int2vector", 21 )\
B( 23, "int4", int4 )\
B( 24, "regproc", uint4 )\
B( 25, "text", text )\
@ -264,10 +413,13 @@ SENDFN(jsonpath) {
/* 27 tid: u32 block, u16 offset; represent as hash? */ \
B( 28, "xid", uint4 )\
B( 29, "cid", uint4 )\
/* 30 oidvector */ \
A( 30, "oidvector", 26 )\
B( 114, "json", json )\
B( 142, "xml", text )\
A( 143, "_xml", 142 )\
B( 194, "pg_node_tree", text ) /* can't be used as a bind param */\
A( 199, "_json", 114 )\
A( 271, "_xid8", 5069 )\
/* 600 point */\
/* 601 lseg */\
/* 602 path */\
@ -275,56 +427,119 @@ SENDFN(jsonpath) {
/* 604 polygon */\
/* 628 line */\
/* 650 cidr */\
A( 629, "_line", 628 )\
A( 651, "_cidr", 650 )\
B( 700, "float4", float4)\
B( 701, "float8", float8)\
/* 718 circle */\
A( 719, "_circle", 718 )\
/* 774 macaddr8 */\
A( 775, "_macaddr8", 774 )\
/* 790 money */\
A( 791, "_money", 790 )\
/* 829 macaddr */\
/* 869 inet */\
/* 1033 aclitem, does not support binary send/recv */\
A( 1000, "_bool", 16 )\
A( 1001, "_bytea", 17 )\
A( 1002, "_char", 18 )\
A( 1003, "_name", 19 )\
A( 1005, "_int2", 21 )\
A( 1006, "_int2vector", 22 )\
A( 1007, "_int4", 23 )\
A( 1008, "_regproc", 24 )\
A( 1009, "_text", 25 )\
A( 1010, "_tid", 27 )\
A( 1011, "_xid", 28 )\
A( 1012, "_cid", 29 )\
A( 1013, "_oidvector", 30 )\
A( 1014, "_bpchar", 1042 )\
A( 1015, "_varchar", 1043 )\
A( 1016, "_int8", 20 )\
A( 1017, "_point", 600 )\
A( 1018, "_lseg", 601 )\
A( 1019, "_path", 602 )\
A( 1020, "_box", 603 )\
A( 1021, "_float4", 700 )\
A( 1022, "_float8", 701 )\
A( 1027, "_polygon", 604 )\
A( 1028, "_oid", 26 )\
/* 1033 aclitem, does not support send/recv */\
/* A( 1034, "_aclitem", 1033 ) */\
A( 1040, "_macaddr", 829 )\
A( 1041, "_inet", 869 )\
B( 1042, "bpchar", text )\
B( 1043, "varchar", text )\
/* 1082 date */\
/* 1083 time */\
A( 1115, "_timestamp", 1114 )\
/* 1114 timestamp */\
A( 1182, "_date", 1082 )\
A( 1183, "_time", 1083 )\
/* 1184 timestamptz */\
A( 1185, "_timestamptz", 1184 )\
/* 1186 interval */\
A( 1187, "_interval", 1186 )\
A( 1231, "_numeric", 1700 )\
/* 1266 timetz */\
A( 1270, "_timetz", 1266 )\
/* 1560 bit */\
A( 1561, "_bit", 1560 )\
/* 1562 varbit */\
A( 1563, "_varbit", 1562 )\
/* 1700 numeric */\
B( 1790, "refcursor", text )\
A( 2201, "_refcursor", 1790 )\
B( 2202, "regprocedure", uint4 )\
B( 2203, "regoper", uint4 )\
B( 2204, "regoperator", uint4 )\
B( 2205, "regclass", uint4 )\
B( 2206, "regtype", uint4 )\
A( 2207, "_regprocedure", 2202 )\
A( 2208, "_regoper", 2203 )\
A( 2209, "_regoperator", 2204 )\
A( 2210, "_regclass", 2205 )\
A( 2211, "_regtype", 2206 )\
A( 2949, "_txid_snapshot", 2970 )\
/* 2950 uuid */\
A( 2951, "_uuid", 2950 )\
/* 2970 txid_snapshot */\
/* 3220 pg_lsn */\
A( 3221, "_pg_lsn", 3220 )\
/* 3361 pg_ndistinct */\
/* 3402 pg_dependencies */\
/* 3614 tsvector */\
/* 3615 tsquery */\
/* 3642 gtsvector, does not support binary send/recv */\
/* 3642 gtsvector, does not support send/recv */\
A( 3643, "_tsvector", 3614 )\
/*A( 3644, "_gtsvector", 3642 )*/\
A( 3645, "_tsquery", 3615 )\
B( 3734, "regconfig", uint4 )\
A( 3735, "_regconfig", 3734 )\
B( 3769, "regdictionary", uint4 )\
A( 3770, "_regdictionary", 3769 )\
B( 3802, "jsonb", jsonb )\
A( 3807, "_jsonb", 3802 )\
B( 4072, "jsonpath", jsonpath)\
A( 4073, "_jsonpath", 4072 )\
B( 4089, "regnamespace", uint4 )\
A( 4090, "_regnamespace", 4089 )\
B( 4096, "regrole", uint4 )\
A( 4097, "_regrole", 4096 )\
B( 4191, "regcollation", uint4 )\
A( 4192, "_regcollation", 4191 )\
/* 4600 pg_brin_bloom_summary */\
/* 4601 pg_brin_minmax_multi_summary */\
/* 5017 pg_mcv_list */\
/* 5038 pg_snapshot */\
A( 5039, "_pg_snapshot", 5038 )\
/* 5069 xid8 */
static const fupg_type fupg_builtin[] = {
#define B(oid, name, fun) { oid, name"\0", fupg_send_##fun, fupg_recv_##fun },
#define B(oid, name, fun) { oid, 0, name"\0", fupg_send_##fun, fupg_recv_##fun },
#define A(oid, name, eoid) { oid, eoid, name"\0", fupg_send_array, fupg_recv_array },
BUILTINS
#undef B
#undef A
};
#undef BUILTINS

View file

@ -15,6 +15,9 @@ like $@, qr/Unable to use type/;
$txn->exec("CREATE TYPE fupg_test_enum AS ENUM('a', 'b', 'ccccccccccccccccccc')");
is $txn->q("SELECT 'a'::fupg_test_enum")->val, 'a';
is $txn->q('SELECT $1::fupg_test_enum', 'ccccccccccccccccccc')->val, 'ccccccccccccccccccc';
is_deeply $txn->q("SELECT '{a,b,null}'::fupg_test_enum[]")->val, ['a','b',undef];
is $txn->q('SELECT $1::fupg_test_enum[]', ['a','b',undef])->text_results->val, '{a,b,NULL}';
}
done_testing;

View file

@ -22,7 +22,7 @@ sub v($type, $p_in, @args) {
{
my $res = $conn->q("SELECT \$1::$type", $s_in)->text_params->val;
ok is_bool($res), "$test is bool" if $type eq 'bool';
ok created_as_number($res), "$test is number" if $type =~ /^(int|float)/;
ok created_as_number($res), "$test is number" if $type =~ /^(int|float)\d/;
is_deeply $res, $p_out, "$test text->bin";
}
{
@ -94,4 +94,25 @@ f jsonb => \1;
v jsonpath => $_ for ('$."key"', '$."a[*]"?(@ > 2)');
f jsonpath => $_ for ('', 'hello world');
v 'int[]', [], undef, '{}';
v 'int[]', [1], undef, '{1}';
v 'int[]', [1,-3,undef,3], undef, '{1,-3,NULL,3}';
v 'int[]', [[1],[2],[3]], undef, '{{1},{2},{3}}';
v 'int[]', [[1,-1],[2,-2],[undef,undef],[3,-3]], undef, '{{1,-1},{2,-2},{NULL,NULL},{3,-3}}';
v 'bool[]', [[[[true]]],[[[undef]]]], undef, '{{{{t}}},{{{NULL}}}}';
v 'text[]', ["\x{1234}"], undef, "{\x{1234}}";
f 'int[]', [[1],undef];
f 'int[]', [[1],[2,3]];
f 'int[]', [[]];
f 'oidvector', [undef];
# Example from https://www.postgresql.org/docs/17/arrays.html#ARRAYS-IO
# Lower bounds are discarded.
is_deeply $conn->q("SELECT '[1:1][-2:-1][3:5]={{{1,2,3},{4,5,6}}}'::int[]")->val, [[[1,2,3],[4,5,6]]];
is $conn->q('SELECT ($1::int2[])[2]', [1,2,3,4])->val, 2;
is $conn->q('SELECT ($1::int2vector)[1]', [1,2,3,4])->val, 2;
is $conn->q('SELECT ($1::oidvector)[1]', [1,2,3,4])->val, 2;
done_testing;