From 0c59b56ee8ced9a76bc4f82d2a1d2886f6e09659 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 26 Feb 2025 08:06:57 +0100 Subject: [PATCH 01/99] Minor doc & portability fixes --- FU/Util.pm | 2 +- FU/XMLWriter.pm | 8 ++++---- Makefile.PL | 4 ++++ c/xmlwr.c | 3 ++- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/FU/Util.pm b/FU/Util.pm index b36ab37..af5b4e4 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -366,7 +366,7 @@ here anyway because the L supervisor uses them: =item fdpass_send($send_fd, $pass_fd, $message) Send a message and a file descriptor (C<$pass_fd>) over the given socket -(<$send_fd>). C<$message> must not be empty, even if you don't intend to do +(C<$send_fd>). C<$message> must not be empty, even if you don't intend to do anything with it on receipt. Both C<$send_fd> and C<$pass_fd> must be numeric file descriptors, as obtained by C. diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index 5678d6c..0241007 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -90,7 +90,7 @@ changes, see the main L module for details. =head1 SYNOPSIS - use FU::XMLWriter ':html5'; + use FU::XMLWriter ':html5_'; my $html_string = html_ sub { head_ sub { @@ -108,7 +108,7 @@ changes, see the main L module for details. # Or XML: - use FU::XMLWriter ':xml'; + use FU::XMLWriter ':xml_'; my $xml_string = xml_ sub { tag_ feed => xmlns => 'http://www.w3.org/2005/Atom', @@ -268,12 +268,12 @@ and C<"> are replaced with their XML entity. All of the functions mentioned in this document can be imported individually. There are also two import groups: - use FU::XMLWriter ':html'; + use FU::XMLWriter ':html_'; Exports C, C, C, C and all of the C<< _ >> functions mentioned above. - use FU::XMLWriter ':xml'; + use FU::XMLWriter ':xml_'; Exports C, C, C and C. diff --git a/Makefile.PL b/Makefile.PL index 2da15d4..87362fc 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -17,6 +17,10 @@ WriteMakefile( dynamic_config => 0, resources => { repository => 'https://code.blicky.net/yorhel/fu', + bugtracker => 'https://code.blicky.net/yorhel/fu/issues', + }, + no_index => { + file => 'bench.PL', }, }, depend => { '$(OBJECT)', 'c/*.c' }, diff --git a/c/xmlwr.c b/c/xmlwr.c index e6d8ac3..f81d94c 100644 --- a/c/xmlwr.c +++ b/c/xmlwr.c @@ -31,6 +31,7 @@ static void fuxmlwr_escape(pTHX_ fuxmlwr *wr, SV *sv) { const unsigned char *str = (unsigned char *)SvPV_const(sv, len); const unsigned char *tmp, *end = str + len; unsigned char x = 0; + unsigned char *buf; int utf8 = SvUTF8(sv); while (str < end) { @@ -55,7 +56,7 @@ static void fuxmlwr_escape(pTHX_ fuxmlwr *wr, SV *sv) { case '&': fustr_write(&wr->out, "&", 5); break; case '"': fustr_write(&wr->out, """, 6); break; default: - unsigned char *buf = (unsigned char *)fustr_write_buf(&wr->out, 2); + buf = (unsigned char *)fustr_write_buf(&wr->out, 2); buf[0] = 0xc0 | (x >> 6); buf[1] = 0x80 | (x & 0x3f); break; From 43928b91e8588b7f1f85842d308963a28239074f Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 26 Feb 2025 08:53:16 +0100 Subject: [PATCH 02/99] Fix two memory leaks --- FU.xs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FU.xs b/FU.xs index 4d809c7..cddf171 100644 --- a/FU.xs +++ b/FU.xs @@ -362,7 +362,7 @@ void nrows(fupg_st *st) void query(fupg_st *st) CODE: - ST(0) = newSVpvn_utf8(st->query, strlen(st->query), 1); + ST(0) = newSVpvn_flags(st->query, strlen(st->query), SVs_TEMP|SVf_UTF8); void exec_time(fupg_st *st) CODE: @@ -398,7 +398,7 @@ void _new() void _done(fuxmlwr *wr) CODE: - ST(0) = fustr_done(&wr->out); + ST(0) = sv_2mortal(fustr_done(&wr->out)); fustr_init(&wr->out, NULL, SIZE_MAX); void lit_(SV *sv) From 29dd09e809e2bbcfbf357a5faec75ffd9b6b946a Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 26 Feb 2025 08:59:33 +0100 Subject: [PATCH 03/99] FU: Drop Zstd support Compress::Zstd decided to bundle libzstd instead of linking to the system lib, and it predictably hasn't been updated in 6 years. I consider that broken to the point of DO-NOT-USE. Maybe I'll do a custom dlopen() wrapper for that later, but for now let's just stick with gzip. --- FU.pm | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/FU.pm b/FU.pm index 4601423..ce830a6 100644 --- a/FU.pm +++ b/FU.pm @@ -739,7 +739,6 @@ sub _error_page($, $code, $title, $msg) { sub _finalize { state $haszlib = eval { require Compress::Raw::Zlib; 1 }; - state $haszstd = eval { require Compress::Zstd; 1 }; my $r = $FU::REQ; if ($r->{status} == 204 || $r->{status} == 304) { @@ -749,17 +748,13 @@ sub _finalize { $r->{resbody} = ''; } else { - if (($haszlib || $haszstd) && length($r->{resbody}) > 256 + if ($haszlib && length($r->{resbody}) > 256 && !defined $r->{reshdr}{'content-encoding'} && FU::compress_mimes->{$r->{reshdr}{'content-type'}}) { $r->{reshdr}{'vary'} = ($r->{reshdr}{'vary'} ? $r->{reshdr}{'vary'}.', ' : '').'accept-encoding' if ($r->{reshdr}{'vary'}||'') !~ /accept-encoding/i; - if ($haszstd && ($r->{hdr}{'accept-encoding'}||'') =~ /zstd/) { - $r->{resbody} = Compress::Zstd::compress($r->{resbody}); - $r->{reshdr}{'content-encoding'} = 'zstd'; - - } elsif ($haszlib && ($r->{hdr}{'accept-encoding'}||'') =~ /gzip/) { + if ($haszlib && ($r->{hdr}{'accept-encoding'}||'') =~ /gzip/) { # Use lower-level API because the higher-level Compress::Zlib loads a whole bunch of other modules. my $z = Compress::Raw::Zlib::Deflate->new(-WindowBits => Compress::Raw::Zlib::WANT_GZIP(), -Level => 3, -AppendOutput => 1); $z->deflate($r->{resbody}, my $buf); @@ -884,9 +879,6 @@ is). There are a few additional optional dependencies: =item * C - required for L, dynamically loaded through C. -=item * L - to support transparent HTTP compression through -Zstandard. - =back From 8dca0a22a92d7a16590a2ae53e4999046624047c Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 26 Feb 2025 11:28:19 +0100 Subject: [PATCH 04/99] FU: Add randomized --max-reqs option --- FU.pm | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/FU.pm b/FU.pm index ce830a6..25db727 100644 --- a/FU.pm +++ b/FU.pm @@ -507,7 +507,7 @@ sub _spawn { $c{proc} = $1 if /^--proc=([0-9]+)$/; $c{monitor} = 1 if /^--monitor$/; $c{monitor} = 0 if /^--no-monitor$/; - $c{max_reqs} = $1 if /^--max-reqs=([0-9]+)$/; + $c{max_reqs} = $1 if /^--max-reqs=([0-9]+(?::[0-9]+)?)$/; debug 1 if /^--debug$/; debug 0 if /^--no-debug$/; $ENV{FU_LOG_FILE} = $1 if /^--log-file=(.+)$/; @@ -553,6 +553,7 @@ sub _spawn { _supervisor \%c; } else { $c{supervisor_sock}->syswrite('r'.pack 'V', $$) if $c{supervisor_sock}; + $c{max_reqs} = $1 >= $2 ? $1 : $1 + int rand $2-$1 if $c{max_reqs} =~ /^([0-9]+):([0-9]+)$/; _run_loop \%c; } } @@ -1387,12 +1388,19 @@ significant cost in performance - better not enable this in production. =item FU_MAX_REQS=n +=item FU_MAX_REQS=min:max + =item --max-reqs=n +=item --max-reqs=min:max + Worker processes can automatically restart after handling a number of requests. -Set to 0 (the default) to disable this feature. This option can be useful when -your worker processes keep accumulating memory over time. A little pruning now -and then can never hurt. +Set to 0 (the default) to disable this feature. When set as C, the +number of requests is randomized in the given range, which is useful to avoid +restarting all worker processes around the same time. + +This option can be useful when your worker processes keep accumulating memory +over time. A little pruning now and then can never hurt. =item FU_DEBUG=0/1 From de36b90cdee92cb8751b13ff11a39106cff01382 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 27 Feb 2025 09:34:14 +0100 Subject: [PATCH 05/99] Fixes for longdouble perl builds Mostly test fixes, but the Pg 'timestamp' type did have a small conversion bug. --- c/pgtypes.c | 4 ++-- t/json_format.t | 3 ++- t/json_parse.t | 5 +++-- t/pgtypes.t | 1 + 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/c/pgtypes.c b/c/pgtypes.c index c51226b..cb4eb41 100644 --- a/c/pgtypes.c +++ b/c/pgtypes.c @@ -55,7 +55,7 @@ typedef struct { if (SvIOK(val)) iv = SvIV(val); \ else if (SvNOK(val)) { \ NV nv = SvNV(val); \ - if (nv < IV_MIN || nv > IV_MAX || fabs(nv - floor(nv)) > 0.0000000001) SERR("expected integer");\ + if (nv < IV_MIN || nv > IV_MAX || fabs((double)(nv - floor(nv))) > 0.0000000001) SERR("expected integer");\ iv = SvIV(val); \ } else if (SvPOK(val)) {\ STRLEN sl; \ @@ -503,7 +503,7 @@ SENDFN(uuid) { RECVFN(timestamp) { RLEN(8); IV ts = fu_frombeI(64, buf); - return newSVnv(((double)ts / 1000000) + UNIX_PG_EPOCH); + return newSVnv(((NV)ts / 1000000) + UNIX_PG_EPOCH); } SENDFN(timestamp) { diff --git a/t/json_format.t b/t/json_format.t index e7f41d5..0797a4f 100644 --- a/t/json_format.t +++ b/t/json_format.t @@ -5,6 +5,7 @@ use Test::More; use Tie::Array; use Tie::Hash; use FU::Util 'json_format'; +use Config; sub MyToJSON::TO_JSON { [scalar @_, ref $_[0], ${$_[0]}] } @@ -54,7 +55,7 @@ my @tests = ( ''.$$, '"'.$$.'"', do { my $x = 12; utf8::decode($x); $x }, '"12"', do { no warnings 'numeric'; my $x = '19a'; $x += 0; $x }, '19', - 1844674407370955161 / 10, '1.84467440737096e+17', + 1844674407370955161 / 10, $Config{uselongdouble} ? 184467440737095516 : '1.84467440737096e+17', ); my @errors = ( diff --git a/t/json_parse.t b/t/json_parse.t index d223077..901d01f 100644 --- a/t/json_parse.t +++ b/t/json_parse.t @@ -3,6 +3,7 @@ use Test::More; use FU::Util 'json_parse'; no warnings 'experimental::builtin'; use builtin 'is_bool', 'created_as_number'; +use Config; my @error = ( '', @@ -96,8 +97,8 @@ num ' -0 ', 0; num '-9223372036854775808'; num '9223372036854775807'; num '18446744073709551615'; -num '-9223372036854775809', -9.22337203685478e+18; -num '18446744073709551616', 1.84467440737096e+19; +num '-9223372036854775809', $Config{uselongdouble} ? -9.22337203685477581e+18 : -9.22337203685478e+18; +num '18446744073709551616', $Config{uselongdouble} ? 1.84467440737095516e+19 : 1.84467440737096e+19; num '1.234'; num '1e5', 100000; num '1e+5', 100000; diff --git a/t/pgtypes.t b/t/pgtypes.t index 326ccce..ba18815 100644 --- a/t/pgtypes.t +++ b/t/pgtypes.t @@ -2,6 +2,7 @@ use v5.36; use Test::More; no warnings 'experimental::builtin'; use builtin qw/true false is_bool created_as_number/; +use Config; plan skip_all => $@ if !eval { require FU::Pg; } && $@ =~ /Unable to load libpq/; die $@ if $@; From b06cc2482648af085d0d48a5404a8f2a8fb5c87a Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 27 Feb 2025 09:10:37 +0100 Subject: [PATCH 06/99] FU: Add super awesome and butt-ugly FU::debug_info web interface This is so much more useful than embedding debugging info inside pages, as I've been doing before. --- FU.pm | 100 ++++++++++---- FU/DebugImpl.pm | 353 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 430 insertions(+), 23 deletions(-) create mode 100644 FU/DebugImpl.pm diff --git a/FU.pm b/FU.pm index 25db727..d5ce879 100644 --- a/FU.pm +++ b/FU.pm @@ -130,21 +130,27 @@ sub init_db($info) { } -my @before_request; -my @after_request; -sub before_request :prototype(&) ($f) { push @before_request, $f } -sub after_request :prototype(&) ($f) { unshift @after_request, $f } +sub _caller_info { + my($i, @c, @x) = (1); + $x[0] !~ /^FU(?:$|::)/ && push @c, [ @x[0..3] ] while (@x = caller $i++); + \@c +} + +our @before_request; +our @after_request; +sub before_request :prototype(&) ($f) { push @before_request, [ $f, _caller_info ] } +sub after_request :prototype(&) ($f) { unshift @after_request, [ $f, _caller_info ] } -my %path_routes; -my %re_routes; +our %path_routes; +our %re_routes; sub _add_route($path, $sub, $method) { if (ref $path eq 'REGEXP' || ref $path eq 'Regexp') { - push $re_routes{$method}->@*, [ qr/^$path$/, $sub ]; + push $re_routes{$method}->@*, [ qr/^$path$/, $sub, _caller_info ]; } elsif (!ref $path) { confess("A route has already been registered for $method $path") if $path_routes{$method}{$path}; - $path_routes{$method}{$path} = $sub; + $path_routes{$method}{$path} = [ $sub, _caller_info ]; } else { confess('Path argument in route registration must be a string or regex'); } @@ -207,6 +213,12 @@ sub _monitor { } +our $debug_info = []; +sub debug_info($path, $storage=undef, $history=100) { + $debug_info = { path => $path, storage => $storage, history => $history } +} + + our $hdrname_re = qr/[!#\$\%&'\*\+-\.^_`\|~0-9a-zA-Z]{1,127}/; our $method_re = qr/(?:HEAD|GET|POST|DELETE|OPTIONS|PUT|PATCH|QUERY)/; @@ -305,7 +317,7 @@ sub _log_err($e) { } sub _do_req($c) { - local $REQ = { hdr => {}, trace_start => time }; + local $REQ = { hdr => {}, trace_start => time, trace_id => sprintf('%010x%08x%04x', int time, $$, int rand 1<<16) }; local $fu = bless {}, 'FU::obj'; $REQ->{ip} = $c->{client_sock} isa 'IO::Socket::INET' ? $c->{client_sock}->peerhost : '127.0.0.1'; @@ -315,15 +327,27 @@ sub _do_req($c) { _read_req $c; $REQ->{trace_start} = time; - for my $h (@before_request) { $h->() } - my $path = fu->path; my $method = fu->method eq 'HEAD' ? 'GET' : fu->method; + + # Intercept requests for debug_info, ensuring no website hooks get called. + if (debug && $method eq 'GET' && $debug_info->{path} && $path eq $debug_info->{path}) { + require FU::DebugImpl; + FU::DebugImpl::render(); + fu->_flush($c->{fcgi_obj} || $c->{client_sock}); + fu->error(-1); + } + + for my $h (@before_request) { $h->[0]->() } + my $r = $path_routes{$method}{$path}; - if ($r) { $r->() } - else { + if ($r) { + $REQ->{trace_han} = [ $path, $r->[1] ]; + $r->[0]->(); + } else { for $r ($re_routes{ fu->method }->@*) { if($path =~ $r->[0]) { + $REQ->{trace_han} = [ $r->[0], $r->[2] ]; $r->[1]->(@{^CAPTURE}); fu->done; } @@ -333,11 +357,12 @@ sub _do_req($c) { 1; }; return if !$ok && ref $@ eq 'FU::err' && $@->[0] == -1; + $REQ->{trace_exn} = $ok ? undef : $@; my $err = $ok || _is_done($@) ? undef : $@; _log_err $err; for my $h (@after_request) { - $ok = eval { $h->(); 1 }; + $ok = eval { $h->[0]->(); 1 }; _log_err $@ if !$ok; $err = $@ if !$err && !$ok && !_is_done($@); } @@ -361,7 +386,12 @@ sub _do_req($c) { $REQ->{trace_end} = time; fu->_flush($c->{fcgi_obj} || $c->{client_sock}); - my $proc_ms = (time - $REQ->{trace_start}) * 1000; + if (debug && $REQ->{trace_id} && $debug_info->{history} && $debug_info->{storage}) { + require FU::DebugImpl; + FU::DebugImpl::save(); + } + + my $proc_ms = ($REQ->{trace_end} - $REQ->{trace_start}) * 1000; log_write(sprintf "%.0fms%s %s-%s %s-%d\n", $proc_ms, $REQ->{trace_nsql} ? sprintf ' (sql %.0f+%.0fms, %d/%d/%d)', @@ -639,11 +669,15 @@ sub formdata { # Response generation methods -sub done { die bless [200,'Done'], 'FU::err' } -sub error($,$code,$msg=$code) { die bless [$code,$msg], 'FU::err' } +sub done { die bless [200,'Done',FU::_caller_info], 'FU::err' } +sub error($,$code,$msg=$code) { die bless [$code,$msg,FU::_caller_info], 'FU::err' } sub status($, $code) { $FU::REQ->{status} = $code } -sub set_body($, $data) { $FU::REQ->{resbody} = $data } +sub set_body($, $data) { + confess "Invalid undef body" if !defined $data; + confess "Invalid attempt to set body to $data" if ref $data; + $FU::REQ->{resbody} = $data; +} sub reset { fu->status(200); @@ -950,9 +984,29 @@ handling and performance tracing. Enable or disable debug mode. Returns the current mode when no argument is given. -Debug mode currently only enables more verbose logging, but it may influence -other features in the future as well. You're of course free to use the debug -setting to enable or disable debugging features in your own code. +Debug mode currently enables more verbose logging and the C +interface below. It may influence other features in the future as well. You're +of course free to use the debug setting to enable or disable debugging features +in your own code. + +=item FU::debug_info($path, $storage, $history) + +Enable the built-in web interface for inspecting debug info. The interface is +accessible from your browser at the given C<$path>, which is matched against +C<< fu->path >>. + +When the optional C<$storage> argument is given and set to an existing +directory, detailed request data is logged and stored in that directory, which +is then made available through the web interface. The C<$history> argument sets +the number of requests to keep, which defaults to 100. + +Request logging and the web interface are only available when C mode +is enabled. + +B This interface exposes internal and potentially sensitive +information. When this option is configured, make sure to B +enable debug mode in production! Or at least set an absolutely impossible to +guess C<$path>. =item FU::log_slow_reqs($ms) @@ -1271,7 +1325,7 @@ though. This method loads the entire file contents in memory and does not support range requests, so DO NOT use it to send large files. Actual web servers are much -more efficient at sending static files. +more efficient at serving static files. The content-type header is determined from the file extension in C<$path>, using the configured C. As fallback, files that look like they @@ -1424,7 +1478,7 @@ external process manager. =back -When C<--monitor> or C<--max-reqs> are set or C<<--proc>> is larger than 1, FU +When C<--monitor> or C<--max-reqs> are set or C<--proc> is larger than 1, FU starts a supervisor process to ensure the requested number of worker processes are running and that they are restarted when necessary. When FU has been loaded with the C<-spawn> flag, this supervisor process runs directly from the context diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm new file mode 100644 index 0000000..1c1875e --- /dev/null +++ b/FU/DebugImpl.pm @@ -0,0 +1,353 @@ +# Internal module used by FU.pm +package FU::DebugImpl 0.1; +use v5.36; +use experimental 'for_list'; +use FU; +use FU::XMLWriter ':html5_', 'fragment', 'xml_escape'; +use Time::HiRes 'time'; +use POSIX 'strftime'; + +sub fmtts { strftime '%Y-%m-%d %H:%M:%S UTC', gmtime shift } + +sub loc_($loc) { + txt_ '[internal]' if !@$loc; + for (0..$#$loc) { + br_ if $_; + my $l = $loc->[$_]; + my $f = $_ == $#$loc ? '(main)' : $loc->[$_+1][3]; + txt_ "$l->[1]:$l->[2] $f"; + } +} + +sub fmtpre_($code) { + lit_ xml_escape($code) =~ s/^\s+//r =~ s/\s+$//r =~ s/\n/
/rg; +} + +sub clean_re($str) { + # Regex formatting isn't stable, but this cleans up the crap I'm seeing a little bit. + "$str" =~ s/^\(\?\^u:\^\(\?\^u://r =~ s/\)\$\)$//r; +} + +my @tabs = ( + req => sub { + table_ sub { + tr_ sub { td_ 'Method'; td_ fu->method }; + tr_ sub { td_ 'Path'; td_ fu->path }; + tr_ sub { td_ 'Query'; td_ fu->query }; + tr_ sub { td_ 'Client IP'; td_ fu->ip }; + tr_ sub { td_ 'Received'; td_ fmtts $FU::REQ->{trace_start} }; + }; + h2_ 'Headers'; + table_ sub { + tr_ sub { + td_ $_; + td_ fu->headers->{$_}; + } for sort keys fu->headers->%*; + }; + h2_ 'Body'; + p_ 'TODO'; + ('Request') + }, + + res => sub { + my $r = $FU::REQ; + return () if !exists $r->{trace_end}; + table_ sub { + tr_ sub { td_ 'Status'; td_ $r->{status} }; + tr_ sub { + td_ 'Handler'; + td_ $r->{trace_han} ? sub { + txt_ clean_re $r->{trace_han}[0]; + br_; + loc_ $r->{trace_han}[1]; + } : 'N/A'; + }; + my $exn = $r->{trace_exn}; + tr_ sub { + td_ 'Exception'; + td_ !defined $exn ? 'N/A' : ref $exn eq 'FU::err' ? sub { + txt_ $exn->[0]; + txt_ " $exn->[1]" if $exn->[1] ne $exn->[0]; + br_; + loc_ $exn->[2]; + } : $exn; + }; + tr_ sub { td_ 'Timing'; td_ sprintf '%.1f ms', ($r->{trace_end}-$r->{trace_start})*1000 }; + }; + h2_ 'Headers'; + table_ sub { + tr_ sub { + td_ $_; + td_ $r->{reshdr}{$_}; + } for keys $r->{reshdr}->%*; + }; + ('Response') + }, + + sql => sub { + return () if !$FU::REQ->{trace_sql}; + table_ sub { + thead_ sub { tr_ sub { + td_ class => 'num', 'Exec'; + td_ class => 'num', 'Prep'; + td_ class => 'num', 'Rows'; + td_ 'Query'; + } }; + tr_ sub { + td_ class => 'num', sprintf '%.1f ms', $_->{exec_time}*1000; + td_ class => 'num', !defined $_->{prepare_time} ? '-' : $_->{prepare_time} ? sprintf '%.1f ms', $_->{prepare_time}*1000 : 'cache'; + td_ class => 'num', $_->{nrows}; + td_ class => 'code', sub { fmtpre_ $_->{query} }; + # TODO: Params, both separate and interpolated + } for $FU::REQ->{trace_sql}->@*; + }; + ('Queries', scalar $FU::REQ->{trace_sql}->@*) + }, + + fu => sub { + return () if !keys fu->%*; + # TODO: Contents of the 'fu' object + ('fu obj') + }, + + proc => sub { + table_ sub { + tr_ sub { td_ 'PID'; td_ $$ }; + tr_ sub { td_ 'NAME'; td_ $0 }; + tr_ sub { td_ 'ARGV'; td_ join ' ', @ARGV }; + tr_ sub { td_ 'USER'; td_ "$< / $>" }; + tr_ sub { td_ 'GROUP'; td_ "$( / $)" }; + tr_ sub { td_ 'OS'; td_ $^O }; + tr_ sub { td_ 'Perl'; td_ $^V }; + tr_ sub { td_ 'Up for'; td_ sprintf '%.3f seconds', time - $^T }; + }; + ('Process') + }, + + env => sub { + table_ sub { + tr_ sub { + td_ $_; + td_ $ENV{$_}; + } for sort keys %ENV; + }; + ('Environment', scalar keys %ENV) + }, + + inc => sub { + table_ sub { + tr_ sub { + td_ $_; + td_ $INC{$_}; + } for sort keys %INC; + }; + ('Included files', scalar keys %INC) + }, + + han => sub { + my $cnt = 0; + my sub tbl_($title, $lst) { + return if !@$lst; + $cnt += @$lst; + h2_ $title; + table_ sub { + tr_ sub { + td_ clean_re $_->[0]; + td_ sub { loc_ $_->[1] }; + } for @$lst; + }; + } + for my $meth (qw/GET POST DELETE OPTIONS PUT PATCH QUERY/) { + my($path, $re) = ($FU::path_routes{$meth}, $FU::re_routes{$meth}); + my @lst = ( + (map [$_, $path->{$_}[1]], $path ? sort keys %$path : ()), + (map [$_->[0], $_->[2]], $re ? @$re : ()) + ); + tbl_ $meth, \@lst; + } + tbl_ before_request => [ map [$_, $FU::before_request[$_][1]], 0..$#FU::before_request ]; + tbl_ after_request => [ map [$_, $FU::after_request[$_][1]], 0..$#FU::after_request ]; + ('Handlers', $cnt) + }, + + pgst => sub { + return () if !$FU::DB; + my $lst = eval { $FU::DB->q( + 'SELECT generic_plans + custom_plans, statement FROM pg_prepared_statements ORDER BY generic_plans + custom_plans DESC, statement' + )->cache(0)->alla } || do { warn "Unable to collect prepared statement list: $@"; return () }; + return () if !@$lst; + table_ sub { + thead_ sub { tr_ sub { + td_ 'Num'; + td_ 'Query'; + } }; + tr_ sub { + td_ $_->[0]; + td_ class => 'code', sub { fmtpre_ $_->[1] }; + } for @$lst; + }; + ('Prepared statements', scalar @$lst) + }, +); + + +sub collect { + my @t; + for my ($id, $sub) (@tabs) { + my($title, $num); + my $html = fragment { ($title, $num) = $sub->() }; + push @t, { id => $id, title => $title, num => $num, html => $html } if $title; + } + \@t +} + + +sub framework_($data) { + html_ sub { + head_ sub { + title_ 'FU Debugging Interface'; + meta_ name => 'viewport', content => 'width=device-width, initial-scale=1.0, user-scalable=yes'; + style_ type => 'text/css', <<~_; + html { box-sizing: border-box; color: #000; background: #fff } + *, *:before, *:after { box-sizing: inherit } + * { margin: 0; padding: 0; font: inherit; color: inherit } + + body { display: grid; grid: 45px 400px / 220px auto; } + header { grid-column: 1 / 3; grid-row: 1 / 2 } + nav { grid-column: 1 / 2; grid-row: 2 / 3 } + main { grid-column: 2 / 3; grid-row: 2 / 3 } + + header, nav { background: #eee } + main { border-top: 2px solid #009; border-left: 2px solid #009 } + nav { border-bottom: 2px solid #009 } + + header { display: flex; justify-content: space-between; padding: 10px } + header h1 { font-size: 20px; font-weight: bold } + header menu { list-style-type: none; display: flex; gap: 15px } + + body > input { display: none } + nav { padding-top: 20px } + nav menu { list-style-type: none } + nav label { display: block; width: 100%; padding: 2px 10px; cursor: pointer; white-space: nowrap } + nav label:hover { background-color: #fff } + nav label span { float: right; font-size: 80% } + + main { padding: 10px 20px } + main h2 { margin: 30px 0 5px -10px; font-size: 20px; font-weight: bold } + main h2:first-child { margin-top: 0 } + + p, pre, table { margin: 5px 0 } + pre, .code { font-family: monospace; white-space: pre } + table { border-collapse: collapse } + td { padding: 1px 10px 1px 0; font-size: 12px; vertical-align: top } + tr:hover { background-color: #eee } + thead { font-weight: bold } + .num { text-align: right; white-space: nowrap } + _ + style_ type => 'text/css', join "\n", map +( + "#tab_$_:checked ~ nav menu li label[for=tab_$_] { background-color: #fff }", + "#tab_$_:not(:checked) ~ main #tabc_$_ { display: none }", + ), map $_->{id}, @$data; + }; + body_ sub { + header_ sub { + h1_ 'FU Debugging Interface'; + menu_ sub { + li_ sub { a_ href => '?last', 'Last' }; + li_ sub { a_ href => '?cur', 'Current' }; + li_ sub { a_ href => '?', 'Listing' }; + }; + }; + input_ type => 'radio', name => 'tab', id => "tab_$_->{id}", checked => $_ eq $data->[0] ? 'checked' : undef for @$data; + nav_ sub { + menu_ sub { + li_ sub { + label_ for => "tab_$_->{id}", sub { + txt_ $_->{title}; + span_ $_->{num} if defined $_->{num}; + } + } for @$data; + }; + } if @$data; + main_ sub { + div_ id => "tabc_$_->{id}", sub { + h2_ $_->{title}; + lit_ $_->{html}; + } for @$data; + }; + }; + }; +} + +sub listing { + opendir my $dh, $FU::debug_info->{storage} or do { + warn "Error opening '$FU::debug_info->{storage}': $!\n"; + return; + }; + my @f; + /^fu-([0-9a-f]{22})\.txt$/ && push @f, $1 while (readdir $dh); + return [sort @f]; +} + +sub listing_ { + my $lst = listing; + return p_ 'Request logging disabled.' if !$FU::debug_info->{storage} || !$FU::debug_info->{history}; + return p_ 'No requests logged.' if !@$lst; + table_ sub { + tr_ sub { + open my $fh, '<:utf8', "$FU::debug_info->{storage}/fu-$_.txt" or return; + my($ts, $time, $status, $method, $uri) = split / /, scalar <$fh>, 5; + td_ sub { a_ href => "?$_", $_ }; + td_ class => 'num', fmtts $ts; + td_ class => 'num', sprintf '%.0f ms', $time*1000; + td_ class => 'num', $status; + td_ $method; + td_ $uri; + } for reverse @$lst; + } +} + +sub load($id) { + open my $fn, '<', "$FU::debug_info->{storage}/fu-$id.txt" or fu->error(404); + scalar <$fn>; + local $/=undef; + fu->set_body(scalar <$fn>); +} + +sub render { + my $q = fu->query; + if (!$q) { + fu->set_body(framework_ [{id => 'lst', title => 'Recent Requests', html => fragment \&listing_ }]); + } elsif ($q eq 'cur') { + fu->set_body(framework_ collect); + } elsif ($q eq 'last') { + my $lst = listing; + fu->error(404) if !@$lst; + load $lst->[$#$lst]; + } elsif ($FU::debug_info->{storage} && $q =~ /^[0-9a-f]{22}$/) { + load $q; + } else { + fu->error(404); + } +} + +sub save { + my $files = listing; + unlink sprintf '%s/fu-%s.txt', $FU::debug_info->{storage}, shift @$files while @$files >= $FU::debug_info->{history}; + + delete $FU::REQ->{txn}; + + my $fn = "$FU::debug_info->{storage}/fu-$FU::REQ->{trace_id}.txt"; + open my $fh, '>', $fn or do { + warn "Error opening '$fn': $!\n"; + return; + }; + my $line = sprintf "%d %f %s %s %s\n", + time, time - $FU::REQ->{trace_start}, $FU::REQ->{status}, + fu->method, fu->path.(fu->query?'?'.fu->query:''); + utf8::encode($line); + print $fh $line; + print $fh framework_ collect; +} + +1; From 3662931fc2dd45e287f9709ad62fee191a4b2fdc Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 27 Feb 2025 14:00:28 +0100 Subject: [PATCH 07/99] FU: Add "denied" and "notfound" convenience methods --- FU.pm | 14 ++++++++++++-- FU/DebugImpl.pm | 8 ++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/FU.pm b/FU.pm index d5ce879..55d8454 100644 --- a/FU.pm +++ b/FU.pm @@ -352,7 +352,7 @@ sub _do_req($c) { fu->done; } } - fu->error(404); + fu->notfound; } 1; }; @@ -671,6 +671,8 @@ sub formdata { sub done { die bless [200,'Done',FU::_caller_info], 'FU::err' } sub error($,$code,$msg=$code) { die bless [$code,$msg,FU::_caller_info], 'FU::err' } +sub denied { fu->error(403) } +sub notfound { fu->error(404) } sub status($, $code) { $FU::REQ->{status} = $code } sub set_body($, $data) { @@ -1137,7 +1139,7 @@ the following is a valid approach to handle user authentication: }; FU::get '/registered-users-only', sub { - fu->error(403) if !fu->{user}; + fu->denied if !fu->{user}; }; In addition to the request information and response generation methods @@ -1266,6 +1268,14 @@ elsewhere, this ends up in running the appropriate C handler. C<$message> is optional and currently only used for logging. +=item fu->denied + +Alias for C<< fu->error(403) >>. + +=item fu->notfound + +Alias for C<< fu->error(404) >>. + =item fu->reset Reset the response to an empty state, basically undoing all effects of the diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index 1c1875e..dddc7dc 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -79,7 +79,7 @@ my @tabs = ( tr_ sub { td_ $_; td_ $r->{reshdr}{$_}; - } for keys $r->{reshdr}->%*; + } for sort keys $r->{reshdr}->%*; }; ('Response') }, @@ -308,7 +308,7 @@ sub listing_ { } sub load($id) { - open my $fn, '<', "$FU::debug_info->{storage}/fu-$id.txt" or fu->error(404); + open my $fn, '<', "$FU::debug_info->{storage}/fu-$id.txt" or fu->notfound; scalar <$fn>; local $/=undef; fu->set_body(scalar <$fn>); @@ -322,12 +322,12 @@ sub render { fu->set_body(framework_ collect); } elsif ($q eq 'last') { my $lst = listing; - fu->error(404) if !@$lst; + fu->notfound if !@$lst; load $lst->[$#$lst]; } elsif ($FU::debug_info->{storage} && $q =~ /^[0-9a-f]{22}$/) { load $q; } else { - fu->error(404); + fu->notfound } } From 327fd9ea5051418bc86f85fcd7b6bddf30779a1d Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 27 Feb 2025 18:24:14 +0100 Subject: [PATCH 08/99] Pg: Support type override configuration --- FU.xs | 3 ++ FU/Pg.pm | 50 ++++++++++++++++++++++- c/pgconn.c | 96 +++++++++++++++++++++++++++++++++++++++++---- c/pgtypes.c | 24 +++++++++--- t/pgtypes-dynamic.t | 76 ++++++++++++++++++++++++++--------- 5 files changed, 215 insertions(+), 34 deletions(-) diff --git a/FU.xs b/FU.xs index cddf171..ffd3df6 100644 --- a/FU.xs +++ b/FU.xs @@ -229,6 +229,9 @@ void q(fupg_conn *c, SV *sv, ...) FUPG_CONN_COOKIE; ST(0) = fupg_q(aTHX_ c, c->stflags, SvPVutf8_nolen(sv), ax, items); +void _set_type(fupg_conn *c, SV *name, SV *sendsv, SV *recvsv) + CODE: + fupg_set_type(c, name, sendsv, recvsv); MODULE = FU PACKAGE = FU::Pg::txn diff --git a/FU/Pg.pm b/FU/Pg.pm index 284cb72..59fa799 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -13,6 +13,13 @@ package FU::Pg::conn { my($sql, $params) = FU::SQL::SQL(@_)->compile(placeholder_style => 'pg', in_style => 'pg'); $s->q($sql, @$params); } + + sub set_type($s, $n, @arg) { + Carp::confess("Invalid number of arguments") if @arg == 0 || (@arg > 1 && @arg % 2); + return $s->_set_type($n, $arg[0], $arg[0]) if @arg == 1; + my %arg = @arg; + $s->_set_type($n, $arg{send}, $arg{recv}); + } }; *FU::Pg::txn::Q = \*FU::Pg::conn::Q; @@ -625,12 +632,51 @@ any of these. =back +=head3 Overriding types + +The default conversion for each type can be changed: + +=over + +=item $conn->set_type($affected_type, $type) + +=item $conn->set_type($affected_type, send => $type, recv => $type) + +Change how C<$affected_type> is being converted when used as a bind parameter +(I) or when received from query results (I). The two-argument +version is equivalent to setting I and I to the same C<$type>. + +Types can be specified either by their numeric I or by name. In the latter +case, the name must exactly match the internal type name used by PostgreSQL. +Note that this "internal type name" does not always match the names used in +documentation. For example, I, I and I should be +specified as I, I and I, respectively, and the I type +is internally called I. The full list of recognized types in your +database can be queried with: + + SELECT oid, typname FROM pg_type; + +The C<$affected_type> does not actually have to exist in the database, this +method only stores the type in its internal configuration, which is consulted +upon executing a query that takes the type as bind parameter or when it returns +a column of that type. + +The given C<$type> arguments must refer to a built-in type supported by this +module. Types can also be set to I to restore the conversion to its +default. + +=back + +I Type override examples and a warning about domain types. + +I Some handy special types for overriding common conversions. + +I Support for custom types through callbacks. + I Methods to convert between the various formats. I Methods to query type info. -I Custom per-type configuration. - =head2 Errors All methods can throw an exception on error. When possible, the error message diff --git a/c/pgconn.c b/c/pgconn.c index a2f1eec..c72e747 100644 --- a/c/pgconn.c +++ b/c/pgconn.c @@ -25,6 +25,15 @@ static void fupg_prep_destroy(fupg_prep *p) { safefree(p); } +typedef struct { + const fupg_type *send, *recv; +} fupg_override; + +#define fupg_name_hash(v) kh_hash_str((v).n) +#define fupg_name_eq(a,b) kh_eq_str((a).n, (b).n) +KHASHL_MAP_INIT(KH_LOCAL, fupg_oid_overrides, fupg_oid_overrides, Oid, fupg_override, kh_hash_uint32, kh_eq_generic); +KHASHL_MAP_INIT(KH_LOCAL, fupg_name_overrides, fupg_name_overrides, fupg_name, fupg_override, fupg_name_hash, fupg_name_eq); + typedef struct { SV *self; @@ -38,6 +47,8 @@ typedef struct { unsigned int prep_max; unsigned int prep_cur; /* Number of prepared statements not associated with an active $st object */ fupg_type *types; + fupg_oid_overrides *oidtypes; + fupg_name_overrides *nametypes; fupg_records *records; fupg_prepared *prep_map; fupg_prep *prep_head, *prep_tail; /* Inserted into head, removed at tail */ @@ -166,6 +177,8 @@ static SV *fupg_connect(pTHX_ const char *str) { c->ntypes = 0; c->types = NULL; c->records = fupg_records_init(); + c->oidtypes = fupg_oid_overrides_init(); + c->nametypes = fupg_name_overrides_init(); c->prep_cur = 0; c->prep_max = 256; c->prep_map = fupg_prepared_init(); @@ -196,6 +209,8 @@ static void fupg_conn_destroy(pTHX_ fupg_conn *c) { PQfinish(c->conn); if (c->buf.sv) SvREFCNT_dec(c->buf.sv); safefree(c->types); + fupg_oid_overrides_destroy(c->oidtypes); + fupg_name_overrides_destroy(c->nametypes); khint_t k; kh_foreach(c->records, k) safefree(kh_val(c->records, k)); fupg_records_destroy(c->records); @@ -352,6 +367,47 @@ static void fupg_prepared_unref(fupg_conn *c, fupg_prep *p) { /* Type handling */ +static const fupg_type *fupg_resolve_builtin(pTHX_ SV *name) { + SvGETMAGIC(name); + if (!SvOK(name)) return NULL; + UV uv; + const char *pv = SvPV_nomg_nolen(name); + const fupg_type *t = grok_atoUV(pv, &uv, NULL) && uv <= (UV)UINT_MAX + ? fupg_builtin_byoid((Oid)uv) + : fupg_builtin_byname(pv); + if (!t) fu_confess("No builtin type found with oid or name '%s'", pv); + return t; +} + +static void fupg_set_type(pTHX_ fupg_conn *c, SV *name, SV *sendsv, SV *recvsv) { + fupg_override o; + o.send = fupg_resolve_builtin(sendsv); + o.recv = fupg_resolve_builtin(recvsv); + if ((o.send && o.send->send == fupg_send_array) || (o.recv && o.recv->recv == fupg_recv_array)) + fu_confess("Cannot set a type to array, override the underlying element type instead"); + /* Can't currently happen since we have no records in the builtin type + * list, but catch this just in case that changes. */ + if ((o.send && o.send->send == fupg_send_record) || (o.recv && o.recv->recv == fupg_recv_record)) + fu_confess("Cannot set a type to record"); + + UV uv; + STRLEN len; + const char *pv = SvPV(name, len); + int k, i; + if (grok_atoUV(pv, &uv, NULL) && uv <= (UV)UINT_MAX) { + k = fupg_oid_overrides_put(c->oidtypes, (Oid)uv, &i); + kh_val(c->oidtypes, k) = o; + } else if (len < sizeof(fupg_name)) { + fupg_name n; + strcpy(n.n, pv); + k = fupg_name_overrides_put(c->nametypes, n, &i); + kh_val(c->nametypes, k) = o; + } else { + fu_confess("Invalid type oid or name '%s'", pv); + } +} + + /* XXX: It feels a bit wasteful to load *all* types; even on an empty database * that's ~55k of data, but it's easier and (potentially) faster than fetching * each type seperately as we encounter them. @@ -382,7 +438,7 @@ static void fupg_refresh_types(pTHX_ fupg_conn *c) { for (i=0; intypes; i++) { fupg_type *t = c->types + i; t->oid = fu_frombeU(32, PQgetvalue(r, i, 0)); - snprintf(t->name, sizeof(t->name), "%s", PQgetvalue(r, i, 1)); + snprintf(t->name.n, sizeof(t->name.n), "%s", PQgetvalue(r, i, 1)); char typ = *PQgetvalue(r, i, 2); t->elemoid = fu_frombeU(32, PQgetvalue(r, i, 3)); @@ -448,7 +504,7 @@ static const fupg_record *fupg_lookup_record(fupg_conn *c, Oid oid) { int i; for (i=0; inattrs; i++) { record->attrs[i].oid = fu_frombeU(32, PQgetvalue(r, i, 0)); - snprintf(record->attrs[i].name, sizeof(record->attrs->name), "%s", PQgetvalue(r, i, 1)); + snprintf(record->attrs[i].name.n, sizeof(record->attrs->name.n), "%s", PQgetvalue(r, i, 1)); } k = fupg_records_put(c->records, oid, &i); kh_val(c->records, k) = record; @@ -461,6 +517,21 @@ static const fupg_record *fupg_lookup_record(fupg_conn *c, Oid oid) { #define FUPGT_SEND 2 #define FUPGT_RECV 4 +static const fupg_type *fupg_override_get(fupg_conn *c, int flags, Oid oid, const fupg_name *name) { + khint_t k; + +#define R(t) if (k != kh_end(c->t)) return flags & FUPGT_SEND ? kh_val(c->t, k).send : kh_val(c->t, k).recv + if (name == NULL) { + k = fupg_oid_overrides_get(c->oidtypes, oid); + R(oidtypes); + } else { + k = fupg_name_overrides_get(c->nametypes, *name); + R(nametypes); + } +#undef R + return NULL; +} + static void fupg_tio_setup(pTHX_ fupg_conn *conn, fupg_tio *tio, int flags, Oid oid, int *refresh_done) { tio->oid = oid; if (flags & FUPGT_TEXT) { @@ -470,14 +541,25 @@ static void fupg_tio_setup(pTHX_ fupg_conn *conn, fupg_tio *tio, int flags, Oid return; } - const fupg_type *e, *t = fupg_lookup_type(aTHX_ conn, refresh_done, oid); + /* Minor wart? When the type is overridden by oid, the name & oid in error + * messages will be that of the builtin type. When overridden by name, the + * name will be correct but the oid is still of the builtin type. + * Some send/recv functions have slightly different behavior based on oid, + * in those cases this behavior is useful. */ + + const fupg_type *e, *t; + e = t = fupg_override_get(conn, flags, oid, NULL); + if (!t) t = fupg_lookup_type(aTHX_ conn, refresh_done, oid); if (!t) fu_confess("No type found with oid %u", oid); - if (!t->send || !t->recv) fu_confess("Unable to send or receive type '%s' (oid %u)", t->name, oid); - tio->name = t->name; + tio->name = t->name.n; + if (!e && (e = fupg_override_get(conn, flags, 0, &t->name))) t = e; + + if (flags & FUPGT_SEND && !t->send) fu_confess("Unable to send type '%s' (oid %u)", tio->name, oid); + if (flags & FUPGT_RECV && !t->recv) fu_confess("Unable to receive type '%s' (oid %u)", tio->name, oid); if (flags & FUPGT_SEND ? t->send == fupg_send_domain : t->recv == fupg_recv_domain) { e = fupg_lookup_type(aTHX_ conn, refresh_done, t->elemoid); - if (!e) fu_confess("Base type %u not found for domain '%s' (oid %u)", t->elemoid, t->name, t->oid); + if (!e) fu_confess("Base type %u not found for domain '%s' (oid %u)", t->elemoid, tio->name, t->oid); t = e; } @@ -488,7 +570,7 @@ static void fupg_tio_setup(pTHX_ fupg_conn *conn, fupg_tio *tio, int flags, Oid fupg_tio_setup(aTHX_ conn, tio->arrayelem, flags, t->elemoid, refresh_done); } else if (flags & FUPGT_SEND ? tio->send == fupg_send_record : tio->recv == fupg_recv_record) { tio->record.info = fupg_lookup_record(conn, t->elemoid); - if (!tio->record.info) fu_confess("Unable to find attributes for record type '%s' (oid %u, relid %u)", t->name, t->oid, t->elemoid); + if (!tio->record.info) fu_confess("Unable to find attributes for record type '%s' (oid %u, relid %u)", tio->name, t->oid, t->elemoid); tio->record.tio = safecalloc(tio->record.info->nattrs, sizeof(*tio->record.tio)); int i; for (i=0; irecord.info->nattrs; i++) diff --git a/c/pgtypes.c b/c/pgtypes.c index cb4eb41..8a44471 100644 --- a/c/pgtypes.c +++ b/c/pgtypes.c @@ -7,12 +7,16 @@ typedef void (*fupg_send_fn)(pTHX_ const fupg_tio *, SV *, fustr *); /* Receive function, takes a binary string and should return a Perl value. */ typedef SV *(*fupg_recv_fn)(pTHX_ const fupg_tio *, const char *, int); +typedef struct { + char n[64]; +} fupg_name; + /* Record/composite type definition */ typedef struct { int nattrs; struct { Oid oid; - char name[64]; + fupg_name name; } attrs[]; } fupg_record; @@ -34,7 +38,7 @@ struct fupg_tio { typedef struct { Oid oid; Oid elemoid; /* For arrays & domain types; relid for records */ - char name[64]; + fupg_name name; fupg_send_fn send; fupg_recv_fn recv; } fupg_type; @@ -377,7 +381,7 @@ RECVFN(record) { r = ctx->record.tio[i].recv(aTHX_ ctx->record.tio+i, buf, vlen); buf += vlen; len -= vlen; } - hv_store(hv, ctx->record.info->attrs[i].name, -strlen(ctx->record.info->attrs[i].name), r, 0); + hv_store(hv, ctx->record.info->attrs[i].name.n, -strlen(ctx->record.info->attrs[i].name.n), r, 0); } return SvREFCNT_inc(sv); } @@ -393,7 +397,7 @@ SENDFN(record) { I32 i; for (i=0; irecord.info->nattrs; i++) { fustr_writebeI(32, out, ctx->record.info->attrs[i].oid); - SV **rsv = hv_fetch(hv, ctx->record.info->attrs[i].name, -strlen(ctx->record.info->attrs[i].name), 0); + SV **rsv = hv_fetch(hv, ctx->record.info->attrs[i].name.n, -strlen(ctx->record.info->attrs[i].name.n), 0); if (!rsv || !*rsv) { fustr_writebeI(32, out, -1); continue; @@ -711,8 +715,8 @@ SENDFN(date) { B( 5069, "xid8", uint8 ) static const fupg_type fupg_builtin[] = { -#define B(oid, name, fun) { oid, 0, name"\0", fupg_send_##fun, fupg_recv_##fun }, -#define A(oid, name, eoid) { oid, eoid, name"\0", fupg_send_array, fupg_recv_array }, +#define B(oid, name, fun) { oid, 0, {name"\0"}, fupg_send_##fun, fupg_recv_##fun }, +#define A(oid, name, eoid) { oid, eoid, {name"\0"}, fupg_send_array, fupg_recv_array }, BUILTINS #undef B #undef A @@ -737,3 +741,11 @@ static const fupg_type *fupg_type_byoid(const fupg_type *list, int len, Oid oid) static const fupg_type *fupg_builtin_byoid(Oid oid) { return fupg_type_byoid(fupg_builtin, FUPG_BUILTIN, oid); } + +static const fupg_type *fupg_builtin_byname(const char *name) { + size_t i; + for (i=0; iQ('SELECT 1', IN([1,2,3]))->param_types, [1007]; is $conn->Q('SELECT 1', IN([1,2,3]))->val, 1; ok !eval { $conn->q('SELECT $1::aclitem', '')->exec; 1 }; -like $@, qr/Unable to send or receive/; +like $@, qr/Unable to send type/; + + +$conn->set_type(int4 => recv => 'bytea'); +is $conn->q('SELECT 5::int4')->val, "\0\0\0\5"; +is_deeply $conn->q('SELECT ARRAY[5::int4]')->val, ["\0\0\0\5"]; + +$conn->set_type(int4 => send => 'bytea'); +is $conn->q('SELECT $1::int4', "\0\0\0\5")->val, 5; +is_deeply $conn->q('SELECT $1::int4[]', ["\0\0\0\5"])->val, [5]; + +$conn->set_type(int4 => 'int2'); +ok !eval { $conn->q('SELECT 5::int4')->val }; +like $@, qr/Error parsing value/; +ok !eval { $conn->q('SELECT $1::int4', 5)->val }; +like $@, qr/insufficient data left in message/; + +$conn->set_type(int4 => undef); +is $conn->q('SELECT 5::int4')->val, 5; + +ok !eval { $conn->set_type(int4 => 1007); }; +like $@, qr/Cannot set a type to array/; + +ok !eval { $conn->set_type(int4 => 1); }; +like $@, qr/No builtin type found/; { my $txn = $conn->txn; @@ -22,29 +46,29 @@ like $@, qr/Unable to send or receive/; is $txn->Q('SELECT 1', IN([1,2,3]))->val, 1; $txn->exec(<<~_); - CREATE TYPE fupg_test_enum AS ENUM('a', 'b', 'ccccccccccccccccccc'); - CREATE DOMAIN fupg_test_domain AS fupg_test_enum CHECK(value IN('a','b')); + CREATE TYPE fupg_test_enum AS ENUM('aa', 'bb', 'ccccccccccccccccccc'); + CREATE DOMAIN fupg_test_domain AS fupg_test_enum CHECK(value IN('aa','bb')); CREATE TYPE fupg_test_record AS ( a int, aenum fupg_test_enum[], domain fupg_test_domain ); _ - is $txn->q("SELECT 'a'::fupg_test_enum")->val, 'a'; + is $txn->q("SELECT 'aa'::fupg_test_enum")->val, 'aa'; is $txn->q('SELECT $1::fupg_test_enum', 'ccccccccccccccccccc')->val, 'ccccccccccccccccccc'; - is_deeply $txn->q("SELECT '{a,b,null}'::fupg_test_enum[]")->val, ['a','b',undef]; - is $txn->q('SELECT $1::fupg_test_enum[]', ['a','b',undef])->text_results->val, '{a,b,NULL}'; + is_deeply $txn->q("SELECT '{aa,bb,null}'::fupg_test_enum[]")->val, ['aa','bb',undef]; + is $txn->q('SELECT $1::fupg_test_enum[]', ['aa','bb',undef])->text_results->val, '{aa,bb,NULL}'; - is $txn->q("SELECT 'a'::fupg_test_domain")->val, 'a'; - is $txn->q('SELECT $1::fupg_test_domain', 'b')->val, 'b'; + is $txn->q("SELECT 'aa'::fupg_test_domain")->val, 'aa'; + is $txn->q('SELECT $1::fupg_test_domain', 'bb')->val, 'bb'; - is_deeply $txn->q("SELECT '{a,b,null}'::fupg_test_domain[]")->val, ['a','b',undef]; - is $txn->q('SELECT $1::fupg_test_domain[]', ['a','b',undef])->text_results->val, '{a,b,NULL}'; + is_deeply $txn->q("SELECT '{aa,bb,null}'::fupg_test_domain[]")->val, ['aa','bb',undef]; + is $txn->q('SELECT $1::fupg_test_domain[]', ['aa','bb',undef])->text_results->val, '{aa,bb,NULL}'; - my $val = { a => undef, aenum => ['a','b'], domain => 'a' }; - is_deeply $txn->q("SELECT '(,\"{a,b}\",a)'::fupg_test_record")->val, $val; - is $txn->q('SELECT $1::fupg_test_record', $val)->text_results->val, '(,"{a,b}",a)'; + my $val = { a => undef, aenum => ['aa','bb'], domain => 'aa' }; + is_deeply $txn->q("SELECT '(,\"{aa,bb}\",aa)'::fupg_test_record")->val, $val; + is $txn->q('SELECT $1::fupg_test_record', $val)->text_results->val, '(,"{aa,bb}",aa)'; $txn->exec(<<~_); CREATE TEMPORARY TABLE fupg_test_table ( @@ -53,15 +77,29 @@ like $@, qr/Unable to send or receive/; ); _ - is_deeply $txn->q(q{SELECT '{"(\"(2,{},b)\",)","(\"(,,)\",b)"}'::fupg_test_table[]})->val, [ - { rec => { a => 2, aenum => [], domain => 'b' }, dom => undef }, - { rec => { a => undef, aenum => undef, domain => undef }, dom => 'b' }, + is_deeply $txn->q(q{SELECT '{"(\"(2,{},bb)\",)","(\"(,,)\",bb)"}'::fupg_test_table[]})->val, [ + { rec => { a => 2, aenum => [], domain => 'bb' }, dom => undef }, + { rec => { a => undef, aenum => undef, domain => undef }, dom => 'bb' }, ]; is $txn->q('SELECT $1::fupg_test_table[]', [ - { rec => { a => 2, aenum => [], domain => 'b' }, dom => undef }, - { rec => {}, dom => 'b', extra => 1 }, - ])->text_results->val, '{"(\"(2,{},b)\",)","(\"(,,)\",b)"}'; + { rec => { a => 2, aenum => [], domain => 'bb' }, dom => undef }, + { rec => {}, dom => 'bb', extra => 1 }, + ])->text_results->val, '{"(\"(2,{},bb)\",)","(\"(,,)\",bb)"}'; + + # Wonky Postgres behavior: selecting a domain directly actually returns the + # underlying type, but going through an array does work. + $conn->set_type(fupg_test_domain => 21); + is_deeply $txn->q("SELECT ARRAY['aa'::fupg_test_domain]")->val, [0x6161]; + + # Bind param type doesn't match column type, argh. + is $txn->q('SELECT $1::fupg_test_domain', 0x6161)->val, 'aa'; + + # Same for selecting from a table :( + $txn->exec("INSERT INTO fupg_test_table VALUES (NULL, 'bb')"); + is $txn->q("SELECT dom FROM fupg_test_table")->val, 'bb'; + $conn->set_type(fupg_test_enum => 21); + is $txn->q("SELECT dom FROM fupg_test_table")->val, 0x6262; } done_testing; From 4686097d00cc1daaf5182849d64d4e1bcfbc8f25 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 28 Feb 2025 11:23:37 +0100 Subject: [PATCH 09/99] Pg: Support custom type overrides with callbacks --- FU.xs | 4 ++ FU/DebugImpl.pm | 3 +- FU/Pg.pm | 123 ++++++++++++++++++++++++++++---------------- c/pgconn.c | 76 ++++++++++++++++++++------- c/pgtypes.c | 49 ++++++++++++++++++ t/pgtypes-dynamic.t | 57 +++++++++++++------- 6 files changed, 227 insertions(+), 85 deletions(-) diff --git a/FU.xs b/FU.xs index ffd3df6..f5cf9a3 100644 --- a/FU.xs +++ b/FU.xs @@ -21,6 +21,10 @@ #define BOOL_INTERNALS_sv_isbool_true(x) SvPVXtrue(x) #endif +/* Disable key/value struct packing in khashl, so we can safely take a pointer + * to values inside the hash table. */ +#define kh_packed + #include "c/khashl.h" #include "c/common.c" #include "c/jsonfmt.c" diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index dddc7dc..0852b1c 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -44,8 +44,7 @@ my @tabs = ( td_ fu->headers->{$_}; } for sort keys fu->headers->%*; }; - h2_ 'Body'; - p_ 'TODO'; + # TODO: Body? Certainly useful for JSON ('Request') }, diff --git a/FU/Pg.pm b/FU/Pg.pm index 59fa799..d676dd2 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -555,9 +555,55 @@ send and receive everything as text!" Instead, in the (default) C mode, the responsibility of converting Postgres data to and from Perl values lies with this module. This allows for a lot of type-specific conveniences, but has the downside of requiring special -code for each supported PostgreSQL type. Most of the Postgres core types are -supported by this module and convert in an intuitive way, but here's a few -type-specific notes: +code for every PostgreSQL type. Most of the core types are supported by this +module and convert in an intuitive way, but you can also configure each type +manually: + +=over + +=item $conn->set_type($target_type, $type) + +=item $conn->set_type($target_type, send => $type, recv => $type) + +Change how C<$target_type> is being converted when used as a bind parameter +(I) or when received from query results (I). The two-argument +version is equivalent to setting I and I to the same C<$type>. + +Types can be specified either by their numeric I or by name. In the latter +case, the name must exactly match the internal type name used by PostgreSQL. +Note that this "internal type name" does not always match the names used in +documentation. For example, I, I and I should be +specified as I, I and I, respectively, and the I type +is internally called I. The full list of recognized types in your +database can be queried with: + + SELECT oid, typname FROM pg_type; + +The C<$target_type> does not have to exist in the database when this method is +called. This method only stores the type in its internal configuration, which +is consulted when executing a query that takes the type as bind parameter or +returns a column of that type. + +The following arguments are supported for C<$type>: + +=over + +=item * I, to reset the conversion functions to their default. + +=item * The numeric I or name of a built-in type supported by this module, +to use those conversion functions. + +=item * A subroutine reference that is called to perform the conversion. For +I, the subroutine is given a Perl value as argument and expected to +return a binary string to be sent to Postgres. For I, the subroutine is +given a binary string received from Postgres and expected to return a Perl +value. + +=back + +=back + +Some built-in types deserve a few additional notes: =over @@ -576,14 +622,18 @@ that along as raw binary strings. =item timestamp / timestamptz These are converted to and from seconds since the Unix epoch as a floating -point value, similar to the C (or better: C) -functions. +point value, for easy comparison against C and related functions. The timestamp types in Postgres have microsecond accuracy. Floating point can represent that without loss for dates that are near enough to the epoch (still seems to be fine in 2025, at least), but this conversion may be lossy for dates far beyond or before the epoch. +Postgres internally represents timestamps as microseconds since 2000-01-01 +stored in a 64-bit integer. If you prefer that, use: + + $conn->set_type(timestamptz => 'int8'); + =item date Converted between strings in C format. Postgres accepts a bunch of @@ -598,6 +648,13 @@ While C is a valid JSON value, there's currently no way to distinguish that from SQL C. When sending C as bind parameter, it is sent as SQL C. +If you prefer to work with JSON are raw text values instead, use: + + $conn->set_type(json => 'text'); + +That doesn't I work for the C type. I mean, it works, but then +there's a single C<"\1"> byte prefixed to the string. + =item arrays PostgreSQL arrays automatically convert to and from Perl arrays as you'd @@ -608,7 +665,19 @@ and all arrays sent to Postgres will use their default 1-based indexing. =item records / row types -These are converted to and from hashrefs. +Typed records are converted to and from hashrefs. Untyped records (i.e. values +of the C pseudo-type) are not supported. + +=item domain types + +These are recognized and automatically converted to and from their underlying +type. It may be tempting to use C to configure special type +conversions for domain types, but beware that PostgreSQL reports columns in the +C