fu/c/pgtypes.c
Yorhel 2aaec6a218 pg: Add json, jsonb, jsonpath support
NOW we're really getting to the part where this module is more awesome
than DBD::Pg.

(When I started working on this module I was expecting that the Postgres
binary protocol would send jsonb in a binary format as well and that I'd
be duplicating parts of the JSON parser/formatter to make that work, but
it turns out that Postgres just uses plain json for exchange. Saves me
some trouble, I guess)
2025-02-08 15:16:47 +01:00

343 lines
10 KiB
C

typedef struct fupg_send fupg_send;
typedef struct fupg_recv fupg_recv;
/* Send function, takes a Perl value and should write the binary encoded
* format into the given fustr. */
typedef void (*fupg_send_fn)(pTHX_ const fupg_send *, SV *, fustr *);
/* Receive function, takes a binary string and should return a Perl value.
* libpq guarantees that the given buffer is aligned to MAXIMUM_ALIGNOF.
*/
typedef SV *(*fupg_recv_fn)(pTHX_ const fupg_recv *, const char *, int);
struct fupg_send {
Oid oid;
const char *name;
fupg_send_fn fn;
};
struct fupg_recv {
Oid oid;
const char *name;
fupg_recv_fn fn;
};
typedef struct {
Oid oid;
char name[16]; /* Postgres has a 64 byte limit on names, but this is sufficient for the core types listed here */
fupg_send_fn send;
fupg_recv_fn recv;
} fupg_core_type;
#define RECVFN(name) static SV *fupg_recv_##name(pTHX_ const fupg_recv *ctx __attribute__((unused)), const char *buf, int len)
#define SENDFN(name) static void fupg_send_##name(pTHX_ const fupg_send *ctx __attribute__((unused)), SV *val, fustr *out)
#define RLEN(l) if (l != len) fu_confess("Invalid length for type '%s' (oid %u), expected %d but got %d", ctx->name, ctx->oid, l, len)
/* Perl likes to play loose with SV-to-integer conversions, but that's not
* very fun when trying to store values in a database. Text-based bind
* parameters get stricter validation by Postgres, so let's emulate some of
* that for binary parameters as well. */
#define SIV(min, max) IV iv;\
if (SvIOK(val)) iv = SvIV(val); \
else if (SvNOK(val)) { \
NV nv = SvNV(val); \
if (nv < IV_MIN || nv > IV_MAX || fabs(nv - floor(nv)) > 0.0000000001) \
fu_confess("Type '%s' (oid %u) expects an integer but got a floating point", ctx->name, ctx->oid); \
iv = SvIV(val); \
} else if (SvPOK(val)) {\
STRLEN sl; \
UV uv; \
char *s = SvPV(val, sl); \
if (*s == '-' && grok_atoUV(s+1, &uv, NULL) && uv <= ((UV)IV_MAX)+1) iv = SvIV(val);\
else if (grok_atoUV(s, &uv, NULL) && uv <= IV_MAX) iv = SvIV(val);\
else fu_confess("Type '%s' (oid %u) expects an integer", ctx->name, ctx->oid); \
} else fu_confess("Type '%s' (oid %u) expects an integer", ctx->name, ctx->oid);\
if (iv < min || iv > max) fu_confess("Integer %"IVdf" out of range for type '%s' (oid %u)", iv, ctx->name, ctx->oid)
RECVFN(bool) {
RLEN(1);
return *buf ? &PL_sv_yes : &PL_sv_no;
}
SENDFN(bool) {
fustr_write_ch(out, SvTRUE(val) ? 1 : 0);
}
RECVFN(int2) {
RLEN(2);
return newSViv((I16)__builtin_bswap16(*((U16 *)buf)));
}
SENDFN(int2) {
SIV(-32768, 32767);
U16 v = __builtin_bswap16((U16)iv);
fustr_write(out, (const char *)&v, 2);
}
RECVFN(int4) {
RLEN(4);
return newSViv((I32)__builtin_bswap32(*((U32 *)buf)));
}
SENDFN(int4) {
SIV(-2147483648, 2147483647);
U32 v = __builtin_bswap32((U32)iv);
fustr_write(out, (const char *)&v, 4);
}
RECVFN(int8) {
RLEN(8);
return newSViv((I64)__builtin_bswap64(*((U64 *)buf)));
}
SENDFN(int8) {
SIV(IV_MIN, IV_MAX);
U64 v = __builtin_bswap64((U64)SvIV(val));
fustr_write(out, (const char *)&v, 8);
}
RECVFN(uint4) {
RLEN(4);
return newSViv(__builtin_bswap32(*((U32 *)buf)));
}
SENDFN(uint4) {
SIV(0, UINT32_MAX);
U32 v = __builtin_bswap32((U32)iv);
fustr_write(out, (const char *)&v, 4);
}
RECVFN(bytea) {
return newSVpvn(buf, len);
}
SENDFN(bytea) {
STRLEN len;
const char *buf = SvPVbyte(val, len);
fustr_write(out, buf, len);
}
RECVFN(char) {
RLEN(1);
return newSVpvn(buf, len);
}
SENDFN(char) {
STRLEN len;
const char *buf = SvPVbyte(val, len);
if (len != 1) fu_confess("Type '%s' (oid %u) expects a 1-byte string", ctx->name, ctx->oid);
fustr_write(out, buf, len);
}
/* Works for many text-based column types.
* Assumes client_encoding=utf8, will create a mess otherwise */
RECVFN(text) {
return newSVpvn_utf8(buf, len, 1);
}
SENDFN(text) {
STRLEN len;
const char *buf = SvPVutf8(val, len);
fustr_write(out, buf, len);
}
RECVFN(float4) {
RLEN(4);
U32 uv = __builtin_bswap32(*((U32 *)buf));
float r;
memcpy(&r, &uv, 4);
return newSVnv(r);
}
SENDFN(float4) {
if (!looks_like_number(val)) fu_confess("Type '%s' (oid %u) expects a number", ctx->name, ctx->oid);
float r = SvNV(val);
U32 uv;
memcpy(&uv, &r, 4);
uv = __builtin_bswap32(uv);
fustr_write(out, (const char *)&uv, 4);
}
RECVFN(float8) {
RLEN(8);
U64 uv = __builtin_bswap64(*((U64 *)buf));
double r;
memcpy(&r, &uv, 8);
return newSVnv(r);
}
SENDFN(float8) {
if (!looks_like_number(val)) fu_confess("Type '%s' (oid %u) expects a number", ctx->name, ctx->oid);
double r = SvNV(val);
U64 uv;
memcpy(&uv, &r, 8);
uv = __builtin_bswap64(uv);
fustr_write(out, (const char *)&uv, 8);
}
RECVFN(json) {
fujson_parse_ctx json = {
.buf = (const unsigned char *)buf,
.end = (const unsigned char *)buf + len,
.depth = 512
};
SV *sv = fujson_parse(aTHX_ &json);
if (sv == NULL) fu_confess("Received invalid JSON for type '%s' (oid %u)", ctx->name, ctx->oid);
if (json.buf != json.end) fu_confess("Received invalid JSON for type '%s' (oid %u)", ctx->name, ctx->oid);
return sv;
}
SENDFN(json) {
fujson_fmt_ctx json = { .out = out, .depth = 512, .canon = 1, .pretty = 0 };
fujson_fmt(aTHX_ &json, val);
}
RECVFN(jsonb) {
if (len <= 1 || *buf != 1) fu_confess("Unexpected format for type '%s' (oid %u)", ctx->name, ctx->oid);
return fupg_recv_json(aTHX_ ctx, buf+1, len-1);
}
SENDFN(jsonb) {
fustr_write_ch(out, 1);
fupg_send_json(aTHX_ ctx, val, out);
}
RECVFN(jsonpath) {
if (len <= 1 || *buf != 1) fu_confess("Unexpected format for type '%s' (oid %u)", ctx->name, ctx->oid);
return fupg_recv_text(aTHX_ ctx, buf+1, len-1);
}
SENDFN(jsonpath) {
fustr_write_ch(out, 1);
fupg_send_text(aTHX_ ctx, val, out);
}
#undef SIV
#undef RLEN
#undef RECVFN
#undef SENDFN
/* List of types we handle directly in this module.
Ideally, this includes everything returned by:
select oid, typname, typreceive, typsend
from pg_type
where typtype = 'b'
and typnamespace = 'pg_catalog'::regnamespace
and typinput != 'array_in'::regproc
order by oid
Plus hopefully a bunch of common extension types.
Arrays, records and enums can be handled with generic code.
TODO: pre-seed this list with common array types.
The "reg#" types are a bit funny: the Postgres devs obviously realized that
writing JOINs is cumbersome, so they hacked together a numeric identifier
type that automatically resolves to a string when formatted as text, or
performs a lookup in the database when parsing text. In the text format, you
don't get to see the numeric identifier, but sadly that conversion is not
performed in the byte format so we're dealing with numbers instead. Oh well.
Not worth writing custom lookup code for, users will have to adapt.
Ordered by oid to support binary search.
(name is only used when formatting error messages, for now) */
#define CORETYPES \
B( 16, "bool", bool )\
B( 17, "bytea", bytea )\
B( 18, "char", char )\
B( 19, "name", text )\
B( 20, "int8", int8 )\
B( 21, "int2", int2 )\
/* 22 int2vector */ \
B( 23, "int4", int4 )\
B( 24, "regproc", uint4 )\
B( 25, "text", text )\
B( 26, "oid", uint4 )\
/* 27 tid: u32 block, u16 offset; represent as hash? */ \
B( 28, "xid", uint4 )\
B( 29, "cid", uint4 )\
/* 30 oidvector */ \
B( 114, "json", json )\
B( 142, "xml", text )\
B( 194, "pg_node_tree", text ) /* can't be used as a bind param */\
/* 600 point */\
/* 601 lseg */\
/* 602 path */\
/* 603 box */\
/* 604 polygon */\
/* 628 line */\
/* 650 cidr */\
B( 700, "float4", float4)\
B( 701, "float8", float8)\
/* 718 circle */\
/* 774 macaddr8 */\
/* 790 money */\
/* 829 macaddr */\
/* 869 inet */\
/* 1033 aclitem, does not support binary send/recv */\
B( 1042, "bpchar", text )\
B( 1043, "varchar", text )\
/* 1082 date */\
/* 1083 time */\
/* 1114 timestamp */\
/* 1184 timestamptz */\
/* 1186 interval */\
/* 1266 timetz */\
/* 1560 bit */\
/* 1562 varbit */\
/* 1700 numeric */\
B( 1790, "refcursor", text )\
B( 2202, "regprocedure", uint4 )\
B( 2203, "regoper", uint4 )\
B( 2204, "regoperator", uint4 )\
B( 2205, "regclass", uint4 )\
B( 2206, "regtype", uint4 )\
/* 2950 uuid */\
/* 2970 txid_snapshot */\
/* 3220 pg_lsn */\
/* 3361 pg_ndistinct */\
/* 3402 pg_dependencies */\
/* 3614 tsvector */\
/* 3615 tsquery */\
/* 3642 gtsvector, does not support binary send/recv */\
B( 3734, "regconfig", uint4 )\
B( 3769, "regdictionary", uint4 )\
B( 3802, "jsonb", jsonb )\
B( 4072, "jsonpath", jsonpath)\
B( 4089, "regnamespace", uint4 )\
B( 4096, "regrole", uint4 )\
B( 4191, "regcollation", uint4 )\
/* 4600 pg_brin_bloom_summary */\
/* 4601 pg_brin_minmax_multi_summary */\
/* 5017 pg_mcv_list */\
/* 5038 pg_snapshot */\
/* 5069 xid8 */
static const fupg_core_type fupg_core_types[] = {
#define B(oid, name, fun) { oid, name"\0", fupg_send_##fun, fupg_recv_##fun },
CORETYPES
#undef B
};
#undef CORETYPES
#define FUPG_CORE_TYPES (sizeof(fupg_core_types) / sizeof(fupg_core_type))
static const fupg_core_type *fupg_core_type_byoid(Oid oid) {
int i, b = 0, e = FUPG_CORE_TYPES-1;
while (b <= e) {
i = b + (e - b)/2;
if (fupg_core_types[i].oid == oid) return fupg_core_types+i;
if (fupg_core_types[i].oid < oid) b = i+1;
else e = i-1;
}
return NULL;
}