From 7f1c48e0cf318f32ca071831bb3277e74128523b Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 8 Feb 2025 14:03:35 +0100 Subject: [PATCH] pg: Add send/recv support for a few more easy types --- FU/PG.pm | 16 ++--- c/pgconn.c | 1 - c/pgtypes.c | 189 +++++++++++++++++++++++++++++++++++++++++++++++++--- t/pgtypes.t | 41 ++++++++++-- 4 files changed, 222 insertions(+), 25 deletions(-) diff --git a/FU/PG.pm b/FU/PG.pm index c730ad7..8c9729e 100644 --- a/FU/PG.pm +++ b/FU/PG.pm @@ -385,19 +385,15 @@ as well. Much older versions will certainly not work fine. =item * (Probably) not thread-safe. -=item * Only supports the UTF-8 encoding for string columns (text, char, -varchar, etc). When using the binary format (the default) this only works if -your database encoding is UTF-8. Non-UTF-8 databases are still supported with -the text format by setting `client_encoding=utf8` as part of the connection -string or by manually switching to it after C: +=item * Only supports the UTF-8 encoding for all text strings sent to and +received from the PostgreSQL server. The encoding is assumed to be UTF-8 by +default, but if this may not be the case in your situation, setting +`client_encoding=utf8` as part of the connection string or manually switching +to it after C is always safe: - my $conn = FU::PG->connect("")->text; + my $conn = FU::PG->connect(''); $conn->exec('SET client_encoding=utf8'); -(But you're missing out on most features this module has to offer if you're -stuck with the text format, so L might be a better choice in that -case) - =item * Only works with blocking (synchronous) calls, not very suitable for use in asynchronous frameworks unless you know your queries are fast and you have a low-latency connection with the Postgres server. diff --git a/c/pgconn.c b/c/pgconn.c index ee672a0..94897ca 100644 --- a/c/pgconn.c +++ b/c/pgconn.c @@ -547,5 +547,4 @@ static void fupg_st_destroy(fupg_st *st) { /* TODO: $st->alla, allh, flat, kvv, kva, kvh */ /* TODO: Prepared statement caching */ -/* TODO: Binary format bind parameters */ /* TODO: Custom type handling */ diff --git a/c/pgtypes.c b/c/pgtypes.c index 160c3e0..2c134c5 100644 --- a/c/pgtypes.c +++ b/c/pgtypes.c @@ -103,25 +103,196 @@ SENDFN(int8) { fustr_write(out, (const char *)&v, 8); } +RECVFN(uint4) { + RLEN(4); + return newSViv(__builtin_bswap32(*((U32 *)buf))); +} + +SENDFN(uint4) { + SIV(0, UINT32_MAX); + U32 v = __builtin_bswap32((U32)iv); + fustr_write(out, (const char *)&v, 4); +} + +RECVFN(bytea) { + return newSVpvn(buf, len); +} + +SENDFN(bytea) { + STRLEN len; + const char *buf = SvPVbyte(val, len); + fustr_write(out, buf, len); +} + +RECVFN(char) { + RLEN(1); + return newSVpvn(buf, len); +} + +SENDFN(char) { + STRLEN len; + const char *buf = SvPVbyte(val, len); + if (len != 1) fu_confess("Type '%s' (oid %u) expects a 1-byte string", ctx->name, ctx->oid); + fustr_write(out, buf, len); +} + +/* Works for many text-based column types. + * Assumes client_encoding=utf8, will create a mess otherwise */ +RECVFN(text) { + return newSVpvn_utf8(buf, len, 1); +} + +SENDFN(text) { + STRLEN len; + const char *buf = SvPVutf8(val, len); + fustr_write(out, buf, len); +} + +RECVFN(float4) { + RLEN(4); + U32 uv = __builtin_bswap32(*((U32 *)buf)); + float r; + memcpy(&r, &uv, 4); + return newSVnv(r); +} + +SENDFN(float4) { + if (!looks_like_number(val)) fu_confess("Type '%s' (oid %u) expects a number", ctx->name, ctx->oid); + float r = SvNV(val); + U32 uv; + memcpy(&uv, &r, 4); + uv = __builtin_bswap32(uv); + fustr_write(out, (const char *)&uv, 4); +} + +RECVFN(float8) { + RLEN(8); + U64 uv = __builtin_bswap64(*((U64 *)buf)); + double r; + memcpy(&r, &uv, 8); + return newSVnv(r); +} + +SENDFN(float8) { + if (!looks_like_number(val)) fu_confess("Type '%s' (oid %u) expects a number", ctx->name, ctx->oid); + double r = SvNV(val); + U64 uv; + memcpy(&uv, &r, 8); + uv = __builtin_bswap64(uv); + fustr_write(out, (const char *)&uv, 8); +} + #undef RLEN #undef RECVFN #undef SENDFN -#define R(name) fupg_recv_##name -#define S(name) fupg_send_##name -/* Sorted by oid to support binary search. */ +/* List of types we handle directly in this module. + Ideally, this includes everything returned by: + + select oid, typname, typreceive, typsend + from pg_type + where typtype = 'b' + and typnamespace = 'pg_catalog'::regnamespace + and typinput != 'array_in'::regproc + order by oid + + Plus hopefully a bunch of common extension types. + + Arrays, records and enums can be handled with generic code. + TODO: pre-seed this list with common array types. + + The "reg#" types are a bit funny: the Postgres devs obviously realized that + writing JOINs is cumbersome, so they hacked together a numeric identifier + type that automatically resolves to a string when formatted as text, or + performs a lookup in the database when parsing text. In the text format, you + don't get to see the numeric identifier, but sadly that conversion is not + performed in the byte format so we're dealing with numbers instead. Oh well. + Not worth writing custom lookup code for, users will have to adapt. + + Ordered by oid to support binary search. + (name is only used when formatting error messages, for now) */ +#define CORETYPES \ + B( 16, "bool", bool )\ + B( 17, "bytea", bytea )\ + B( 18, "char", char )\ + B( 19, "name", text )\ + B( 20, "int8", int8 )\ + B( 21, "int2", int2 )\ + /* 22 int2vector */ \ + B( 23, "int4", int4 )\ + B( 24, "regproc", uint4 )\ + B( 25, "text", text )\ + B( 26, "oid", uint4 )\ + /* 27 tid: u32 block, u16 offset; represent as hash? */ \ + B( 28, "xid", uint4 )\ + B( 29, "cid", uint4 )\ + /* 30 oidvector */ \ + /* 114 json */ \ + B( 142, "xml", text )\ + B( 194, "pg_node_tree", text ) /* can't be used as a bind param */\ + /* 600 point */\ + /* 601 lseg */\ + /* 602 path */\ + /* 603 box */\ + /* 604 polygon */\ + /* 628 line */\ + /* 650 cidr */\ + B( 700, "float4", float4)\ + B( 701, "float8", float8)\ + /* 718 circle */\ + /* 774 macaddr8 */\ + /* 790 money */\ + /* 829 macaddr */\ + /* 869 inet */\ + /* 1033 aclitem, does not support binary send/recv */\ + B( 1042, "bpchar", text )\ + B( 1043, "varchar", text )\ + /* 1082 date */\ + /* 1083 time */\ + /* 1114 timestamp */\ + /* 1184 timestamptz */\ + /* 1186 interval */\ + /* 1266 timetz */\ + /* 1560 bit */\ + /* 1562 varbit */\ + /* 1700 numeric */\ + B( 1790, "refcursor", text )\ + B( 2202, "regprocedure", uint4 )\ + B( 2203, "regoper", uint4 )\ + B( 2204, "regoperator", uint4 )\ + B( 2205, "regclass", uint4 )\ + B( 2206, "regtype", uint4 )\ + /* 2950 uuid */\ + /* 2970 txid_snapshot */\ + /* 3220 pg_lsn */\ + /* 3361 pg_ndistinct */\ + /* 3402 pg_dependencies */\ + /* 3614 tsvector */\ + /* 3615 tsquery */\ + /* 3642 gtsvector, does not support binary send/recv */\ + B( 3734, "regconfig", uint4 )\ + B( 3769, "regdictionary", uint4 )\ + /* 3802 jsonb */\ + /* 4072 jsonpath */\ + B( 4089, "regnamespace", uint4 )\ + B( 4096, "regrole", uint4 )\ + B( 4191, "regcollation", uint4 )\ + /* 4600 pg_brin_bloom_summary */\ + /* 4601 pg_brin_minmax_multi_summary */\ + /* 5017 pg_mcv_list */\ + /* 5038 pg_snapshot */\ + /* 5069 xid8 */ + static const fupg_core_type fupg_core_types[] = { - { 16, "bool", S(bool), R(bool) }, - { 20, "int8", S(int8), R(int8) }, - { 21, "int2", S(int2), R(int2) }, - { 23, "int4", S(int4), R(int4) }, +#define B(oid, name, fun) { oid, name"\0", fupg_send_##fun, fupg_recv_##fun }, + CORETYPES +#undef B }; -/* TODO: A LOT MORE TYPES */ -#undef R +#undef CORETYPES #define FUPG_CORE_TYPES (sizeof(fupg_core_types) / sizeof(fupg_core_type)) diff --git a/t/pgtypes.t b/t/pgtypes.t index 51a1111..070f888 100644 --- a/t/pgtypes.t +++ b/t/pgtypes.t @@ -13,9 +13,9 @@ $conn->_debug_trace(0); # TODO: Test behavior of magic bind params sub v($type, $p_in, @args) { - my $p_out = @args > 0 && ref $args[0] ne 'SCALAR' ? $args[0] : $p_in; - my $s_in = @args > 1 && ref $args[1] ne 'SCALAR' ? $args[1] : $p_in; - my $s_out = @args > 2 && ref $args[2] ne 'SCALAR' ? $args[2] : $s_in; + my $p_out = @args > 0 && defined $args[0] ? $args[0] : $p_in; + my $s_in = @args > 1 && defined $args[1] ? $args[1] : $p_in; + my $s_out = @args > 2 && defined $args[2] ? $args[2] : $s_in; { my $res = $conn->q("SELECT \$1::$type", $s_in)->text_params->val; @@ -36,8 +36,8 @@ sub f($type, $p_in) { ok !eval { $conn->q("SELECT \$1::$type", $p_in)->val; 1 }, "$type $p_in fail"; } -v bool => true, 1, 'true', 't'; -v bool => false, '', 'false', 'f'; +v bool => true, undef, 1, 't'; +v bool => false, undef, 0, 'f'; v int2 => $_ for (1, -1, -32768, 32767, '12345', -12345, 123.0); f int2 => $_ for (-32769, 32768, [], '', 'a', 1.5); @@ -46,4 +46,35 @@ f int4 => $_ for (-2147483649, 2147483648, []); v int8 => $_ for (1, -1, -9223372036854775808, 9223372036854775807, 1234567890123456789, -1234567890123456789, 1e10); f int8 => $_ for ('aaa', '-9223372036854775809', '9223372036854775808', 1e20); +for my $t (qw/regproc oid xid cid regprocedure regoper regoperator regtype regconfig regdictionary regnamespace regrole regcollation/) { + # These numbers must not refer to an existing thing in the database, otherwise the text format differs + v $t, $_ for (1, 12345678, 4294967295); + f $t, $_ for (-1, 4294967296); +} +v regtype => 17, undef, 'bytea'; # like this + +v bytea => '', undef, '\x'; +v bytea => 'hello', undef, '\x68656c6c6f'; +v bytea => "\xaf\x90", undef, '\xaf90'; +f bytea => "\x{1234}"; + +v '"char"' => $_ for (1, '1', 'a', 'A', '-'); +v '"char"' => "\x84", undef, '\204'; +f '"char"' => $_ for ('', 'ab', "\x{1234}"); + +for my $t (qw/name text bpchar varchar/) { + v $t, $_ for ('', "\x{1234}", "hello, world"); +} +f name => 'a'x64; +# These truncate rather than throw an error on conversion? +v 'char(3)' => 'abcd', 'abc', 'abcd', 'abc'; +v 'varchar(3)' => 'abcd', 'abc', 'abcd', 'abc'; + +# TODO: xml; requires postgres to be built with support for it + +v float4 => $_ for (0, 1234, 1.5); +f float4 => $_ for ('', 'a', '123g', []); +v float8 => $_ for (0, 1234, 1.5); +f float8 => $_ for ('', 'a', '123g', []); + done_testing;