diff --git a/ChangeLog b/ChangeLog index 8a0bb73..0774d6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,96 @@ +1.4 - 2026-01-10 + - FU::Pg: rename q() and Q() to sql() and SQL() (old names still work) + - FU: Improve handling of EPIPE when writing FastCGI response + - FU: Log unclean worker process shutdown + - FU: Fix warning when parsing empty cookie values + - Misc doc fixes + +1.3 - 2025-09-04 + - FU::Validate: Scalar validations now reject control characters by default + - FU::Validate: Add `allow_control` option to override above behavior + - FU::Util: JSON and URI parsing now always permit control characters + - FU::Util: More strict UTF-8 validation on path & URI decoding + - FU::Util: Deprecate `decode_utf8()` + - FU::Util: Deprecate `allow_control` option in `json_parse()` + +1.2 - 2025-07-06 + - FU::Pg: Throw error on non-boolean-looking Perl values for boolean bind + parameters + - FU: Improve setting process status during startup + +1.1 - 2025-06-07 + - FU::SQL: Add IDENT function and `quote_identifier` option + - FU::Pg: Set appropriate `quote_identifier` for `$conn->Q()` + - FU: Improve `--monitor` file change detection + - FU::XMLWriter: Disallow stringification of bare Perl references + - FU::Util::json_parse(): Disallow control characters in strings, add + `allow_control` option to revert to old behavior. + - Some doc fixes + +1.0 - 2025-05-11 + - FU::Util: Fix parsing of empty sections in query_decode() + - FU::Util: Fix buffer overflow in json_format() float formatting + - FU::Util: Reject `0x1f` in utf8_decode() + - FU::Pg: Add perl<->text and bin<->text type conversion methods + - FU::Validate: Improved error messages + - FU::MultipartFormData: Various parser fixes + - FU: Include request body in verbose error logs + - FU: Add fu->log_verbose() + - FU: Extend debug_info pages with request body, response body, 'fu' + object dump, expandable query parameters and interpolated SQL queries + - FU: Improve styling of debug_info pages + - FU: Preserve headers on fu->redirect + - FU: Ignore HTTP_CONTENT_LENGTH and HTTP_CONTENT_TYPE FastCGI parameters + - FU: Suppress warning about missing files in FU::monitor_path + - FU: Reject hash character and newlines in request path + - Fix creating read-only undef/true/false in json_parse() and FU::Pg + - Benchmark updates + +0.5 - 2025-04-24 + - FU::Util: Set O_CLOEXEC on fds received through `fdpass_recv()` + - FU::Util: Fix interpretation of false options in `json_format()` and + `json_parse()` + - FU::Validate: Add `coerce()` and `empty()` utility methods + - FU::Validate: Limit values of int/uint input to 64 bits + - FU::Validate: Normalize num/int/uint inputs to Perl numeric types + - FU::Pg: Add `escape_literal()` and `escape_identifier()` methods + - FU::Pg: Use less memory for `kvv()`, `kva()` and `kvh()` methods + - FU::Pg: Disallow chaining of `cache()`, `text()`, `text_params()` and + `text_results()` methods on connection and transaction objects + - FU: Throw and catch FU::Validate errors without wrapping in `fu->error()` + - FU: Add `-progname` option and add diagnostics to process names + - FU: Whole bunch of misc fixes + - Doc fixes + - Fix nul-termination of some XS-created strings + +0.4 - 2025-03-19 + - FU::Validate: Support arrayref schemas + - FU::Validate: Rename 'values' option to 'elems' + - FU::Validate: Repurpose 'values' option to validate hash values + - FU::Validate: Merge nested 'elems', 'keys' and 'values' schemas during compile() + - FU::Validate: Rename 'scalar' to 'accept_scalar' + - FU::Validate: Add 'accept_array' option + - FU::Util: Add 'html_safe' option to json_format() + - FU::Util: Add gzip_compress() wrapper for libdeflate.so, zlib-ng.so or zlib.so + - FU::Util: Add brotli_compress() wrapper for libbrotlienc.so + - FU: Consistency fixes for fu->json() and fu->formdata() + - FU: Add fu->cookie() and fu->set_cookie() + - FU: Add support for brotli output compression + - FU: Use gzip_compress() for faster gzip output compression + +0.3 - 2025-03-10 + - FU::Validate: Change API, ->validate() now returns data or throws error on failure + - FU::Validate: Rename 'rmwhitespace' to 'trim' + - FU::Validate: Support (more) human-readable error messages + - FU::Pg: Add support for COPY operations + - FU::Pg: Support types with dynamic OIDs + - FU: Add support for reading multipart/form-data + - FU: Add convenience methods for reading and writing JSON + - FU: Fix error in handling a 400 + - FU::MultipartFormData: New helper module + - Fix some tests + - Some doc improvements + 0.2 - 2025-02-28 - FU: Add debug_info web interface - FU: Add fu->denied and fu->notfound methods diff --git a/FU.pm b/FU.pm index 78c0fce..cb73e10 100644 --- a/FU.pm +++ b/FU.pm @@ -1,21 +1,29 @@ -package FU 0.2; +package FU 1.4; use v5.36; use Carp 'confess', 'croak'; use IO::Socket; use POSIX (); -use Time::HiRes 'time'; +use Time::HiRes 'time', 'clock_gettime', 'CLOCK_MONOTONIC'; use FU::Log 'log_write'; use FU::Util; +use FU::Validate; +my $procname; +my $scriptpath = $0; sub import($pkg, @opt) { my $c = caller; no strict 'refs'; *{$c.'::fu'} = \&fu; + my $spawn; for (@opt) { - if ($_ eq '-spawn') { _spawn() } + if (ref $procname eq 'FU::ARG') { $procname = $_ } + elsif ($_ eq '-procname') { $procname = bless {}, 'FU::ARG' } + elsif ($_ eq '-spawn') { $spawn = 1; } else { croak "Unknown import option: '$_'" } } + croak "Missing argument for -procname option" if ref $procname eq 'FU::ARG'; + _spawn() if $spawn; } @@ -113,15 +121,29 @@ sub query_trace($st,@) { $REQ->{trace_nsqldirect}++ if !defined $st->prepare_time; $REQ->{trace_sqlexec} += $st->exec_time; $REQ->{trace_sqlprep} += $st->prepare_time if $st->prepare_time; - push $REQ->{trace_sql}->@*, { - query => $st->query, nrows => $st->nrows, - param_types => $st->param_types, param_values => $st->param_values, - exec_time => $st->exec_time, prepare_time => $st->prepare_time, - } if FU::debug; + if (FU::debug) { + my $t = $st->param_types; + my $v = $st->param_values; + my $txt = $st->get_text_params; + push $REQ->{trace_sql}->@*, { + query => $st->query, nrows => $st->nrows, + exec_time => $st->exec_time, prepare_time => $st->prepare_time, + # Store the binary value when we're in binary params mode, that way + # we don't have to keep a reference to the original perl value and + # we can defer & batch the conversion to text. + params => [ map +{ + type => $t->[$_], + !defined $v->[$_] ? (text => undef) : + $txt ? (text => "$v->[$_]") + : (bin => $DB->perl2bin($t->[$_], $v->[$_])) + }, 0..$#$v ], + }; + } } sub _connect_db { $DB = ref $INIT_DB eq 'CODE' ? $INIT_DB->() : FU::Pg->connect($INIT_DB); $DB->query_trace(\&query_trace); + $DB } sub init_db($info) { require FU::Pg; @@ -195,25 +217,20 @@ sub monitor_path { push @monitor_paths, @_ } sub monitor_check :prototype(&) { $monitor_check = $_[0] } sub _monitor { - state %data; return 1 if $monitor_check && $monitor_check->(); require File::Find; eval { File::Find::find({ - wanted => sub { - my $m = (stat)[9]; - $data{$_} //= $m; - die if $m > $data{$_}; - }, + wanted => sub { die if (-M) < 0 }, no_chdir => 1 - }, $0, values %INC, @monitor_paths); + }, grep -e, $scriptpath, values %INC, @monitor_paths); 0 } // 1; } -our $debug_info = []; +our $debug_info = {}; sub debug_info($path, $storage=undef, $history=100) { $debug_info = { path => $path, storage => $storage, history => $history } } @@ -253,8 +270,9 @@ sub _read_req_http($sock, $req) { $req->{body} = ''; while ($len > 0) { - my $r = $sock->read($req->{body}, $len, -1); - fu->error(400, 'Client disconnect before request was read') if !$r + my $r = $sock->read($req->{body}, $len, length $req->{body}); + fu->error(400, 'Client disconnect before request was read') if !$r; + $len -= $r; } } @@ -274,7 +292,8 @@ sub _read_req($c) { : $r == -2 ? "I/O error while reading from FastCGI socket\n" : $r == -3 ? "FastCGI protocol error\n" : $r == -4 ? "Too long FastCGI parameter\n" - : $r == -5 ? "Too long request body\n" : undef if $r != -7; + : $r == -5 ? "Too long request body\n" + : $r == -8 ? "I/O error while writing to FastCGI socket\n" : undef if $r != -7; delete $c->{fcgi_obj}; fu->error(-1); } @@ -288,36 +307,33 @@ sub _read_req($c) { # The HTTP reader above and the FastCGI XS reader operate on bytes. # Decode these into Unicode strings and check for special characters. - eval { FU::Util::utf8_decode($_); 1} || fu->err(400, $@) + eval { FU::Util::utf8_decode($_); 1} || fu->error(400, $@) for ($REQ->{path}, $REQ->{qs}, values $REQ->{hdr}->%*); + fu->error(400, 'Invalid character in path') if $REQ->{path} =~ /#/; # Some bots don't correctly split off the fragment ($REQ->{path}, my $qs) = split /\?/, $REQ->{path}//'', 2; $REQ->{qs} //= $qs; - $REQ->{path} = FU::Util::uri_unescape($REQ->{path}); + eval { $REQ->{path} = FU::Util::uri_unescape($REQ->{path}); FU::Util::check_control($REQ->{path}); 1; } || fu->error(400, $@); + fu->error(400, 'Invalid character in path') if $REQ->{path} =~ /[\r\n\t]/; # There are plenty other questionable characters, but newlines and tabs are definitely out } -sub _is_done($e) { ref $@ eq 'FU::err' && $@->[0] == 200 } +sub _is_done($e) { ref $e eq 'FU::err' && $e->[0] == 200 } sub _log_err($e) { return if !$e; - return if !debug && ref $@ eq 'FU::err' && $@->[0] != 500; - if (!$REQ->{full_err} && (ref $@ ne 'FU::err' || $@->[0] == 500)) { - $REQ->{full_err}++; - $e =~ s/^\s+//; - $e =~ s/\s+$//; - log_write join "\n", - 'IP: '.($REQ->{ip}||'-'), - 'Headers:', (map " $_: $REQ->{hdr}{$_}", sort keys $REQ->{hdr}->%*), - 'ERROR:'.($e =~ s/(^|\n)/\n /rg); - # TODO: decoded body, if we have that. - } else { - log_write $e; - } + my $crit = $e isa 'FU::err' ? $e->[0] == 500 : !($e isa 'FU::Validate::err'); + return if !debug && !$crit; + return fu->log_verbose($e) if $crit; + log_write $e; } sub _do_req($c) { - local $REQ = { hdr => {}, trace_start => time, trace_id => sprintf('%010x%08x%04x', int time, $$, int rand 1<<16) }; + local $REQ = { + hdr => {}, + trace_start => clock_gettime(CLOCK_MONOTONIC), + trace_id => sprintf('%012x%06x%04x', int(time*10000) % (1<<(12*4)), $$ % (1<<(6*4)), int rand 1<<16) + }; local $fu = bless {}, 'FU::obj'; $REQ->{ip} = $c->{client_sock} isa 'IO::Socket::INET' ? $c->{client_sock}->peerhost : '127.0.0.1'; @@ -325,7 +341,7 @@ sub _do_req($c) { my $ok = eval { _read_req $c; - $REQ->{trace_start} = time; + $REQ->{trace_start} = clock_gettime(CLOCK_MONOTONIC); my $path = fu->path; my $method = fu->method eq 'HEAD' ? 'GET' : fu->method; @@ -374,17 +390,24 @@ sub _do_req($c) { } if ($err) { - my($code, $msg) = ref $err eq 'FU::err' ? $err->@* : (500, $err); + my($code, $msg) = $err isa 'FU::err' ? @$err : $err isa 'FU::Validate::err' ? (400, $err) : (500, $err); fu->reset; fu->status($code); - eval { - ($onerr{$code} || $onerr{500})->($code, $msg); - 1; - } || _err_500(); + my $ok = eval { ($onerr{$code} || $onerr{500})->($code, $msg) }; + if (!$ok && !_is_done($@)) { + _log_err $@; + _err_500(); + } } - $REQ->{trace_end} = time; - fu->_flush($c->{fcgi_obj} || $c->{client_sock}); + $REQ->{trace_end} = clock_gettime(CLOCK_MONOTONIC); + eval { + fu->_flush($c->{fcgi_obj} || $c->{client_sock}); + 1; + } || do { + log_write "Error writing response: $@\n"; + $c->{client_sock} = $c->{fcgi_obj} = undef; + }; if (debug && $REQ->{trace_id} && $debug_info->{history} && $debug_info->{storage}) { require FU::DebugImpl; @@ -392,19 +415,20 @@ sub _do_req($c) { } my $proc_ms = ($REQ->{trace_end} - $REQ->{trace_start}) * 1000; - log_write(sprintf "%.0fms%s %s-%s %s-%d\n", $proc_ms, + log_write(sprintf "%.0fms%s %s-%s %d-%s\n", $proc_ms, $REQ->{trace_nsql} ? sprintf ' (sql %.0f+%.0fms, %d/%d/%d)', ($REQ->{trace_sqlexec}||0)*1000, ($REQ->{trace_sqlprep}||0)*1000, $REQ->{trace_nsqldirect}||0, $REQ->{trace_nsqlprep}||0, $REQ->{trace_nsql} : '', $REQ->{status}, ($REQ->{reshdr}{'content-type'}//'-') =~ s/;.+$//r, - $REQ->{reshdr}{'content-encoding'}//'bytes', length($REQ->{resbody}), + length($REQ->{resbody}), substr($REQ->{reshdr}{'content-encoding'}//'r', 0, 1) ) if FU::debug || $proc_ms > (FU::log_slow_reqs||1e10); } sub _run_loop($c) { my $stop = 0; + my $count = 0; local $SIG{HUP} = 'IGNORE'; local $SIG{TERM} = $SIG{INT} = sub { $stop = 1 }; @@ -414,7 +438,13 @@ sub _run_loop($c) { exit; } + my sub setstate($state) { + $0 = sprintf "%s: %s [#%d%s]", $procname, $state, $count, $c->{max_reqs} ? "/$c->{max_reqs}" : '' if $procname; + } + while (!$stop) { + setstate 'idle'; + $c->{client_sock} ||= $c->{listen_sock}->accept || next; $c->{fcgi_obj} ||= $c->{listen_proto} eq 'fcgi' && FU::fcgi::new(fileno $c->{client_sock}, $c->{proc}); @@ -423,11 +453,13 @@ sub _run_loop($c) { passclient; } + setstate 'working'; _do_req $c; $c->{client_sock} = $c->{fcgi_obj} = undef if !($c->{fcgi_obj} && $c->{fcgi_obj}->keepalive); - passclient if $c->{max_reqs} && !--$c->{max_reqs}; + $count++; + passclient if $c->{max_reqs} && $count >= $c->{max_reqs}; } } @@ -466,34 +498,36 @@ sub _supervisor($c) { if (!$err && (!$childs{$pid} || $childs{$pid} != 2)) { $err = 1; log_write "Script exited before calling FU::run()\n"; + } elsif ($?) { + log_write "Unclean shutdown of worker PID $pid status $?\n"; } delete $childs{$pid}; } # Don't bother spawning more than 1 at a time while in error state - my $spawn = !$err ? $c->{proc} - keys %childs : (grep $_ == 1, values %childs) ? 0 : 1; + my $spawn = !$err ? $c->{proc} - keys %childs : !@client_fd && (grep $_ == 1, values %childs) ? 0 : 1; for (1..$spawn) { - my $client = shift @client_fd; + my $client = @client_fd ? IO::Socket->new_from_fd(shift(@client_fd), 'r') : undef; my $pid = fork; die $! if !defined $pid; if (!$pid) { # child $SIG{CHLD} = $SIG{HUP} = $SIG{INT} = $SIG{TERM} = undef; + $0 = sprintf '%s: starting', $procname if $procname; + # In error state, wait with loading the script until we've received a request. + # Otherwise we'll end up in an infinite spawning loop if the script doesn't start properly. + $client = $c->{listen_sock}->accept() or die $! if !$client && $err; if ($client) { - $ENV{FU_CLIENT_FD} = $client; - } elsif ($err) { - # In error state, wait with loading the script until we've received a request. - # Otherwise we'll end up in an infinite spawning loop if the script doesn't start properly. - $client = $c->{listen_sock}->accept() or die $!; fcntl $client, Fcntl::F_SETFD, 0; $ENV{FU_CLIENT_FD} = fileno $client; } - exec $^X, (map "-I$_", @INC), $0; + exec $^X, (map "-I$_", @INC), $scriptpath; exit 1; } - $client && IO::Socket->new_from_fd($client, 'r'); # close() the fd if we have one $childs{$pid} = 1; } + $0 = sprintf "%s: supervisor [%d/%d]", $procname, scalar keys %childs, $c->{proc} if $procname; + my ($fd, $msgadd) = FU::Util::fdpass_recv(fileno($rsock), 500); push @client_fd, $fd if $fd; next if !defined $msgadd; @@ -621,8 +655,29 @@ sub db { }; } -sub sql { shift->db->q(@_) } -sub SQL { shift->db->Q(@_) } +sub sql { shift->db->sql(@_) } +sub SQL { shift->db->SQL(@_) } + +sub _fmt_section($s) { $s =~ s/^\s*/ /r =~ s/\s+$//r =~ s/\n/\n /rg } + +sub log_verbose($,$msg) { + my $r = $FU::REQ; + return FU::Log::log_write($msg) if $r->{log_verbose}++; + FU::Log::log_write(join "\n", + 'IP: '.($r->{ip}||'-'), + 'Headers:', (map " $_: $r->{hdr}{$_}", sort keys $r->{hdr}->%*), + $r->{multipart} ? ('Body (multipart):', _fmt_section join "\n", map $_->describe, $r->{multipart}->@*) : + $r->{json} ? ('Body (JSON):', _fmt_section FU::Util::json_format($r->{json}, pretty => 1, canonical => 1)) : + $r->{formdata} ? ('Body (formdata):', _fmt_section FU::Util::json_format($r->{formdata}, pretty => 1, canonical => 1)) : + length $r->{body} ? do { + my $b = substr $r->{body}, 0, 4096; + my $trunc = length $r->{body} > 4096 ? ', truncated' : ''; + utf8::decode($b) ? ("Body (utf8$trunc):", _fmt_section($b =~ s/\r//rg =~ s/\n{4,}/\n[..]\n/rg)) + : ("Body (hex$trunc):", _fmt_section(unpack('H*', $b) =~ s/(.{128})/$1\n/rg)) + } : (), + 'Message:', _fmt_section $msg + ); +} @@ -636,12 +691,13 @@ sub headers { $FU::REQ->{hdr} } sub ip { $FU::REQ->{ip} } sub _getfield($data, @a) { - return $data->{$a[0]} if @a == 1 && !ref $a[0]; - require FU::Validate; + if (@a == 1 && !ref $a[0]) { + fu->error(400, "Expected top-level to be a hash") if ref $data ne 'HASH'; + return $data->{$a[0]}; + } my $schema = FU::Validate->compile(@a > 1 ? { keys => {@a} } : $a[0]); my $res = $schema->validate($data); - fu->error(400, "Input validation failed") if !$res; # TODO: More detailed error message - return @a == 2 ? $res->data->{$a[0]} : $res->data; + return @a == 2 ? $res->{$a[0]} : $res; } sub query { @@ -651,18 +707,50 @@ sub query { _getfield $FU::REQ->{qs_parsed}, @_; } +sub cookie { + shift; + return fu->header('cookie') if !@_; + $FU::REQ->{cookie} ||= do { + my %c; + for my $c (split /; /, fu->header('cookie')||'') { + my($n, $v) = split /=/, $c, 2; + if (!defined $v) {} + elsif (!exists $c{$n}) { $c{$n} = $v } + elsif (ref $c{$n}) { push $c{$n}->@*, $v } + else { $c{$n} = [ $c{$n}, $v ] } + } + \%c + }; + _getfield $FU::REQ->{cookie}, @_; +} + +sub json { + shift; + fu->error(400, "Invalid content type for json") if (fu->header('content-type')||'') !~ m{^application/json(?:;\s*charset=utf-?8)?$}i; + return FU::Util::utf8_decode(my $x = $FU::REQ->{body}) if !@_; + $FU::REQ->{json} ||= eval { + FU::Util::json_parse($FU::REQ->{body}, utf8 => 1) + } || fu->error(400, "JSON parse error: $@"); + _getfield $FU::REQ->{json}, @_; +} + sub formdata { shift; + fu->error(400, "Invalid content type for form data") if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded'; + return FU::Util::utf8_decode(my $x = $FU::REQ->{body}) if !@_; $FU::REQ->{formdata} ||= eval { - # TODO: Support multipart encoding - confess "Invalid content type for form data" - if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded'; - FU::Util::query_decode($FU::REQ->{data}); + FU::Util::query_decode($FU::REQ->{body}); } || fu->error(400, $@); - # TODO: Accept schema validation thing. _getfield $FU::REQ->{formdata}, @_; } +sub multipart { + require FU::MultipartFormData; + $FU::REQ->{multipart} ||= eval { + FU::MultipartFormData->parse(fu->header('content-type')||'', $FU::REQ->{body}) + } || fu->error(400, $@); +} + @@ -687,12 +775,13 @@ sub reset { $FU::REQ->{reshdr} = { 'content-type', 'text/html', }; + delete $FU::REQ->{rescookie}; } sub _validate_header($hdr, $val) { confess "Invalid response header '$hdr'" if $hdr !~ /^$FU::hdrname_re$/; - confess "Invalid attempt to set response header containing a newline" if $val =~ /[\r\n]/; + confess "Invalid attempt to set response header containing a newline" if defined $val && $val =~ /[\r\n]/; } sub add_header($, $hdr, $val) { @@ -709,6 +798,45 @@ sub set_header($, $hdr, $val=undef) { $FU::REQ->{reshdr}{ lc $hdr } = $val; } +sub set_cookie($, $name, $val=undef, %opt) { + confess "Invalid cookie name '$name'" if $name !~ /^$FU::hdrname_re$/; + return delete $FU::REQ->{rescookie}{$name} if !defined $val; + confess "Invalid cookie value: $val" if $val =~ /[\0-\x1f\x7f-\x{10ffff}\s\r\n\t",;\\]/; + my $c = "$name=$val"; + for my ($k,$v) (%opt) { + $k = lc $k; # attributes are case-insensitive + if ($k eq 'domain') { + confess "Invalid cookie domain: $v" if $v !~ $FU::Validate::re_domain; + } elsif ($k eq 'expires') { + confess "Cookie 'Expires' attribute should be a UNIX timestamp" if defined $v && $v !~ /^[0-9]+$/; + $v = FU::Util::httpdate_format($v || 0); + } elsif ($k eq 'httponly') { + $c .= "; $k" if $v; + next; + } elsif ($k eq 'max-age') { + confess "Invalid 'Max-Age' cookie attribute: $v" if $v !~ /^[0-9]+$/; + } elsif ($k eq 'partitioned') { + $c .= "; $k" if $v; + next; + } elsif ($k eq 'path') { + confess "Invalid 'Path' cookie attribute: $v" if $v =~ /[\0-\x1f\x7f-\x{10ffff}\s\r\n\t",;\\]/; + } elsif ($k eq 'secure') { + $c .= "; $k" if $v; + next; + } elsif ($k eq 'samesite') { + confess "Invalid 'SameSite' cookie attribute: $v" if $v !~ /^(?:Strict|Lax|None)$/; + } + $c .= "; $k=$v"; + } + $FU::REQ->{rescookie}{$name} = $c; +} + +sub send_json($, $data) { + fu->set_header('content-type', 'application/json'); + fu->set_body(FU::Util::json_format($data, canonical => 1, utf8 => 1)); + fu->done; +} + sub send_file($, $root, $path) { # This also catches files with '..' somewhere in the middle of the name. # Let's just disallow that to simplify this check, I'd err on the side of @@ -742,7 +870,6 @@ sub send_file($, $root, $path) { sub redirect($, $code, $location) { state $alias = {qw/ perm 301 temp 302 tempget 303 tempsame 307 permsame 308 /}; - fu->reset; fu->status($alias->{$code} // $code); fu->set_header(location => "$location"); fu->set_header('content-type', 'text/plain'); @@ -775,9 +902,12 @@ sub _error_page($, $code, $title, $msg) { } sub _finalize { - state $haszlib = eval { require Compress::Raw::Zlib; 1 }; + state $hasgzip = FU::Util::gzip_lib(); + state $hasbrotli = eval { FU::Util::brotli_compress(6, ''); 1 }; my $r = $FU::REQ; + fu->add_header('set-cookie', $_) for $r->{rescookie} ? sort values $r->{rescookie}->%* : (); + if ($r->{status} == 204 || $r->{status} == 304) { delete $r->{reshdr}{'content-length'}; delete $r->{reshdr}{'content-encoding'}; @@ -785,21 +915,24 @@ sub _finalize { $r->{resbody} = ''; } else { - if ($haszlib && length($r->{resbody}) > 256 - && !defined $r->{reshdr}{'content-encoding'} && FU::compress_mimes->{$r->{reshdr}{'content-type'}}) { + my @vary = ref $r->{reshdr}{vary} eq 'ARRAY' ? $r->{reshdr}{vary}->@* : defined $r->{reshdr}{vary} ? ($r->{reshdr}{vary}) : (); + if (($hasgzip || $hasbrotli) && length($r->{resbody}) > 256 + && !defined $r->{reshdr}{'content-encoding'} + && FU::compress_mimes->{$r->{reshdr}{'content-type'}} + ) { + push @vary, 'accept-encoding'; + if ($hasbrotli && ($r->{hdr}{'accept-encoding'}||'') =~ /\bbr\b/) { + $r->{resbody_orig} = $r->{resbody}; + $r->{resbody} = FU::Util::brotli_compress(6, $r->{resbody}); + $r->{reshdr}{'content-encoding'} = 'br'; - $r->{reshdr}{'vary'} = ($r->{reshdr}{'vary'} ? $r->{reshdr}{'vary'}.', ' : '').'accept-encoding' - if ($r->{reshdr}{'vary'}||'') !~ /accept-encoding/i; - - if ($haszlib && ($r->{hdr}{'accept-encoding'}||'') =~ /gzip/) { - # Use lower-level API because the higher-level Compress::Zlib loads a whole bunch of other modules. - my $z = Compress::Raw::Zlib::Deflate->new(-WindowBits => Compress::Raw::Zlib::WANT_GZIP(), -Level => 3, -AppendOutput => 1); - $z->deflate($r->{resbody}, my $buf); - $z->flush($buf); - $r->{resbody} = $buf; + } elsif ($hasgzip && ($r->{hdr}{'accept-encoding'}||'') =~ /\bgzip\b/) { + $r->{resbody_orig} = $r->{resbody}; + $r->{resbody} = FU::Util::gzip_compress(6, $r->{resbody}); $r->{reshdr}{'content-encoding'} = 'gzip'; } } + $r->{reshdr}{vary} = @vary ? join ', ', @vary : undef; $r->{reshdr}{'content-length'} = length $r->{resbody}; $r->{resbody} = '' if (fu->method//'') eq 'HEAD'; } @@ -849,15 +982,7 @@ __END__ =head1 NAME -FU - Framework Ultimatum: A Lean and Efficient Zero-Dependency Web Framework. - -=head1 EXPERIMENTAL - -This module is still in development: it's missing important functionality and -there will likely be a few breaking API changes. This framework currently -powers manned.org as a test. I'll do a stable 1.0 release once FU is used in -production for vndb.org, which will take a few months in the best case -scenario. +FU - A Lean and Efficient Zero-Dependency Web Framework. =head1 SYNOPSIS @@ -875,7 +1000,7 @@ scenario. } FU::get qr{/hello/(.+)}, sub($who) { - my_html_ "Website title", sub { + myhtml_ "Website title", sub { h1_ "Hello, $who!"; }; }; @@ -884,6 +1009,11 @@ scenario. =head1 DESCRIPTION +FU is the backend web framework developed for L and +L, but is also perfectly suitable for other +projects. Besides a web framework, this distrubion also includes a bunch of +handy utility functions and modules. + =head2 Distribution Overview This top-level C module is a web development framework. The C @@ -916,6 +1046,12 @@ is). There are a few additional optional dependencies: =item * C - required for L, dynamically loaded through C. +=item * C or C or C - required for +C in L and used for HTTP output compression. + +=item * C - required for C in L +and used for HTTP output compression. + =back @@ -954,16 +1090,22 @@ certainly not great if you plan to transfer large files. =back The rest of this document is reference documentation; there's no easy -introductionary cookbook-style docs yet, sorry about that. +introductory cookbook-style docs yet, sorry about that. Unless specifically mentioned otherwise, all methods and functions taking or returning strings deal with perl Unicode strings, not raw bytes. -=head2 Framework Configuration +=head1 Framework Configuration =over +=item use FU -procname => $name + +When the C<-procname> import option is set, FU automatically updates the +process name (as displayed in L and L, see C<$0>) with +information about the current process, prefixed with the given C<$name>. + =item FU::init_db($info) Set database configuration. C<$info> can either be a connection string for C<< @@ -1057,7 +1199,7 @@ restart loop. =back -=head2 Handlers & Routing +=head1 Handlers & Routing =over @@ -1124,12 +1266,18 @@ for a certain error code, C<500> is used as fallback. =back +All of the above C<$sub> callbacks are allowed to throw an error. Special +handling is given to exceptions generated by C<< fu->error() >>, which are +relegated to the appropriate C handler, and errors thrown by the +C method of L, which result in the C<400> error +handler being run. Any other exception is passed to the C<500> error handler. -=head2 The 'fu' Object + +=head1 The 'fu' Object While the C namespace is used for global configuration and utility functions, the C object is intended for methods that deal with request -processing (although some are useful used outside of request handlers as well). +processing (although some are useful outside of request handlers as well). The C object itself can be used to store request-local data. For example, the following is a valid approach to handle user authentication: @@ -1167,15 +1315,23 @@ has successfully been processed, or rolled back if there was an error. =item fu->sql($query, @params) -Convenient short-hand for C<< fu->db->q($query, @params) >>. +Convenient short-hand for C<< fu->db->sql($query, @params) >>. =item fu->SQL(@args) -Convenient short-hand for C<< fu->db->Q(@args) >>. +Convenient short-hand for C<< fu->db->SQL(@args) >>. + +=item fu->log_verbose($message) + +Write a verbose multi-line message to the log, including a full dump of +information about the request: IP, headers and (potentially reformatted and/or +truncated) body. This extra info is only written once per request, further +calls to C just go directly to L's C +instead. =back -=head2 Request Information +=head1 Request Information =over @@ -1211,15 +1367,24 @@ C this returns C. =item fu->query($name) Parses the raw query string with C in L and returns the -value with the given $name. Beware: multiple values are returned as an array. -Prefer to use the C<$schema>-based validation methods below to reliably handle -all sorts of query strings. +value with the given $name. Beware: an array is returned if the given key is +repeated in the query string. Prefer to use the C<$schema>-based validation +methods below to reliably handle all sorts of query strings. =item fu->query($name => $schema) Parse, validate and return the query parameter identified by C<$name> with the -given L schema. Calls C<< fu->error(400) >> with a useful error -message if validation fails. +given L schema. + +To fetch a query parameter that may have multiple values, use: + + my $arrayref = fu->query(q => {accept_scalar => 1}); + + # OR: + my $first_value = fu->query(q => {accept_array => 'first'}); + + # OR: + my $last_value = fu->query(q => {accept_array => 'last'}); =item fu->query($schema) @@ -1236,22 +1401,40 @@ Parse, validate and return multiple query parameters. # Or, more concisely: my $data = fu->query(a => {anybool => 1}, b => {}); -=item fu->formdata($name) +To fetch all query paramaters as decoded by C, use: -=item fu->formdata($schema) + my $data = fu->query({type=>'any'}); -Like C<< fu->query() >> but returns data from the POST request body. +=item fu->cookie(...) + +Like C<< fu->query() >> but parses the C request header. Beware that, +exactly like with query parameters, it's possible for a cookie to have multiple +values and thus get represented as an array. + +=item fu->json(...) + +Like C<< fu->query() >> but parses the request body as JSON. Returns the raw +(unvalidated!) JSON Unicode string if no arguments are given. To retrieve the +decoded JSON data without performing further validation, use: + + my $data = fu->json({type=>'any'}); + +=item fu->formdata(...) + +Like C<< fu->query() >> but returns data from the POST request body. This +method only supports form data encoded as C, +which is the default for HTML C<<
>>s. To handle multipart form data, +use C<< fu->multipart >> instead. + +=item fu->multipart + +Parse the request body as C and return an array of field +objects. Refer to L for more information. =back -I Support C and file uploads. -I Support JSON bodies. - -I Cookie parsing. - - -=head2 Generating Responses +=head1 Response Generation =over @@ -1295,6 +1478,31 @@ Add a response header, can be used to add multiple headers with the same name. Add a response header or overwrite the header with a new value if it already exists. Set C<$value> to undef to remove a previously set header. +=item fu->set_cookie($name, $value, %attributes) + +Set or overwrite a cookie. Set C<$value> to undef to remove a previously set +cookie. To fully remove a cookie from the user's browser, set the cookie with +an empty value and zero C: + + fu->set_cookie(my_cookie => '', 'Max-Age' => 0); + +C<%attributes> can be any of the supported L. +The C attribute, when given, must be a UNIX timestamp. Boolean +attributes are interpreted according to Perl's idea of truthiness. For example: + + fu->set_cookie(auth => $auth_token, + Expires => time()+30*24*3600, + Domain => 'example.com', + Secure => 1, + SameSite => 'Lax' + ); + +This method does not encode or escape the cookie value in any way. If you want +to set a non-ASCII value or a value containing characters that are not +permitted in the C header, use C in L or +your favorite alternative cookie-safe encoding. + =item fu->set_body($data) Set the (raw, binary) body of the response to C<$data>. This method is not very @@ -1309,6 +1517,12 @@ templating system or L: }; }); +=item fu->send_json($data) + +Encode C<$data> as JSON (using C in L), set an +appropriate C header and send it to the client. Calls C<< +fu->done >>. + =item fu->send_file($root, $path) If a file identified by C<"$root/$path"> exists, set that as response and call @@ -1360,12 +1574,9 @@ one of the following status codes or an alias: =back -I Setting cookies. - -I JSON output. -=head2 Running the Site +=head1 Running the Site When your script is done setting L and registering L, it should call C to actually start serving diff --git a/FU.xs b/FU.xs index 3c6084a..1477a0a 100644 --- a/FU.xs +++ b/FU.xs @@ -3,7 +3,7 @@ #include /* struct timespec & clock_gettime() */ #include /* strerror() */ #include /* inet_ntop(), inet_ntoa() */ -#include /* fd passing */ +#include /* send(), fd passing */ #include /* fd passing */ #include /* dlopen() etc */ @@ -18,7 +18,13 @@ #define av_push_simple av_push #endif #ifndef BOOL_INTERNALS_sv_isbool_true -#define BOOL_INTERNALS_sv_isbool_true(x) SvPVXtrue(x) +#define BOOL_INTERNALS_sv_isbool_true(x) SvTRUEx(x) +#endif +#ifndef newSV_true +#define newSV_true() newSVsv(&PL_sv_yes) +#endif +#ifndef newSV_false +#define newSV_false() newSVsv(&PL_sv_no) #endif /* Disable key/value struct packing in khashl, so we can safely take a pointer @@ -27,10 +33,12 @@ #include "c/khashl.h" #include "c/common.c" + +#include "c/compress.c" +#include "c/fcgi.c" +#include "c/fdpass.c" #include "c/jsonfmt.c" #include "c/jsonparse.c" -#include "c/fdpass.c" -#include "c/fcgi.c" #include "c/xmlwr.c" #include "c/libpq.h" @@ -53,7 +61,6 @@ if (!ix) ix = FUPG_CACHE;\ if (items == 1 || SvTRUE(ST(1))) x->stflags |= ix; \ else x->stflags &= ~ix; \ - XSRETURN(1); \ } while(0) MODULE = FU @@ -68,6 +75,7 @@ fuxmlwr * FUXMLWR fupg_conn * FUPG_CONN fupg_txn * FUPG_TXN fupg_st * FUPG_ST +fupg_copy * FUPG_COPY INPUT FUFCGI @@ -89,6 +97,10 @@ FUPG_TXN FUPG_ST if (sv_derived_from($arg, \"FU::Pg::st\")) $var = (fupg_st *)SvIVX(SvRV($arg)); else fu_confess(\"invalid statement object\"); + +FUPG_COPY + if (sv_derived_from($arg, \"FU::Pg::copy\")) $var = (fupg_copy *)SvIVX(SvRV($arg)); + else fu_confess(\"invalid COPY object\"); #" EOT @@ -110,6 +122,19 @@ void json_parse(SV *val, ...) CODE: ST(0) = fujson_parse_xs(aTHX_ ax, items, val); +void gzip_lib() + PROTOTYPE: + CODE: + ST(0) = sv_2mortal(newSVpv(fugz_lib(), 0)); + +void gzip_compress(IV level, SV *in) + CODE: + ST(0) = fugz_compress(aTHX_ level, in); + +void brotli_compress(IV level, SV *in) + CODE: + ST(0) = fubr_compress(aTHX_ level, in); + void fdpass_send(int socket, int fd, SV *data) CODE: STRLEN buflen; @@ -145,11 +170,11 @@ void print(fufcgi *ctx, SV *sv) CODE: STRLEN len; const char *buf = SvPVbyte(sv, len); - fufcgi_print(ctx, buf, len); + fufcgi_print(aTHX_ ctx, buf, len); void flush(fufcgi *ctx) CODE: - fufcgi_done(ctx); + fufcgi_done(aTHX_ ctx); void DESTROY(fufcgi *ctx) CODE: @@ -192,10 +217,34 @@ void query_trace(fupg_conn *c, SV *cb) SvGETMAGIC(cb); c->trace = SvOK(cb) ? SvREFCNT_inc(cb) : NULL; +void conn(fupg_conn *c) + CODE: + ST(0) = sv_newmortal(); + sv_setrv_inc(ST(0), c->self); + sv_bless(ST(0), gv_stashpv("FU::Pg::conn", 0)); + void status(fupg_conn *c) CODE: ST(0) = sv_2mortal(newSVpv(fupg_conn_status(c), 0)); +void escape_literal(fupg_conn *c, SV *v) + CODE: + STRLEN len; + const char *str = SvPVutf8(v, len); + char *r = PQescapeLiteral(c->conn, str, len); + if (!r) fupg_conn_croak(c, "escapeLiteral"); + ST(0) = newSVpvn_flags(r, strlen(r), SVf_UTF8|SVs_TEMP); + PQfreemem(r); + +void escape_identifier(fupg_conn *c, SV *v) + CODE: + STRLEN len; + const char *str = SvPVutf8(v, len); + char *r = PQescapeIdentifier(c->conn, str, len); + if (!r) fupg_conn_croak(c, "escapeIdentifier"); + ST(0) = newSVpvn_flags(r, strlen(r), SVf_UTF8|SVs_TEMP); + PQfreemem(r); + void cache(fupg_conn *x, ...) ALIAS: FU::Pg::conn::text_params = FUPG_TEXT_PARAMS @@ -228,16 +277,37 @@ void exec(fupg_conn *c, SV *sv) FUPG_CONN_COOKIE; ST(0) = fupg_exec(aTHX_ c, SvPVutf8_nolen(sv)); -void q(fupg_conn *c, SV *sv, ...) +void sql(fupg_conn *c, SV *sv, ...) CODE: FUPG_CONN_COOKIE; - ST(0) = fupg_q(aTHX_ c, c->stflags, SvPVutf8_nolen(sv), ax, items); + ST(0) = fupg_sql(aTHX_ c, c->stflags, SvPVutf8_nolen(sv), ax, items); + +void copy(fupg_conn *c, SV *sv) + CODE: + FUPG_CONN_COOKIE; + ST(0) = fupg_copy_exec(aTHX_ c, SvPVutf8_nolen(sv)); void _set_type(fupg_conn *c, SV *name, SV *sendsv, SV *recvsv) CODE: fupg_set_type(aTHX_ c, name, sendsv, recvsv); XSRETURN(1); +void perl2bin(fupg_conn *c, int oid, SV *sv) + CODE: + ST(0) = fupg_perl2bin(aTHX_ c, oid, sv); + +void bin2perl(fupg_conn *c, int oid, SV *sv) + CODE: + ST(0) = fupg_bin2perl(aTHX_ c, oid, sv); + +void bin2text(fupg_conn *c, ...) + CODE: + XSRETURN(fupg_bintext(aTHX_ c, 0, ax, items)); + +void text2bin(fupg_conn *c, ...) + CODE: + XSRETURN(fupg_bintext(aTHX_ c, 1, ax, items)); + MODULE = FU PACKAGE = FU::Pg::txn @@ -253,6 +323,12 @@ void cache(fupg_txn *x, ...) CODE: FUPG_STFLAGS; +void conn(fupg_txn *t) + CODE: + ST(0) = sv_newmortal(); + sv_setrv_inc(ST(0), t->conn->self); + sv_bless(ST(0), gv_stashpv("FU::Pg::conn", 0)); + void status(fupg_txn *t) CODE: ST(0) = sv_2mortal(newSVpv(fupg_txn_status(t), 0)); @@ -277,10 +353,16 @@ void exec(fupg_txn *t, SV *sv) FUPG_TXN_COOKIE; ST(0) = fupg_exec(aTHX_ t->conn, SvPVutf8_nolen(sv)); -void q(fupg_txn *t, SV *sv, ...) +void sql(fupg_txn *t, SV *sv, ...) CODE: FUPG_TXN_COOKIE; - ST(0) = fupg_q(aTHX_ t->conn, t->stflags, SvPVutf8_nolen(sv), ax, items); + ST(0) = fupg_sql(aTHX_ t->conn, t->stflags, SvPVutf8_nolen(sv), ax, items); + +# XXX: The copy object should probably keep a ref on the transaction +void copy(fupg_txn *t, SV *sv) + CODE: + FUPG_TXN_COOKIE; + ST(0) = fupg_copy_exec(aTHX_ t->conn, SvPVutf8_nolen(sv)); @@ -294,6 +376,7 @@ void cache(fupg_st *x, ...) CODE: if (ix == 0 && x->prepared) fu_confess("Invalid attempt to change statement configuration after it has already been prepared or executed"); FUPG_STFLAGS; + XSRETURN(1); void exec(fupg_st *st) CODE: @@ -393,6 +476,28 @@ void DESTROY(fupg_st *st) fupg_st_destroy(aTHX_ st); +MODULE = FU PACKAGE = FU::Pg::copy + +void write(fupg_copy *c, SV *sv) + CODE: + fupg_copy_write(aTHX_ c, sv); + +void read(fupg_copy *c) + CODE: + ST(0) = fupg_copy_read(aTHX_ c, 0); + +void is_binary(fupg_copy *c) + CODE: + ST(0) = c->bin ? &PL_sv_yes : &PL_sv_no; + +void close(fupg_copy *c) + CODE: + fupg_copy_close(aTHX_ c, 0); + +void DESTROY(fupg_copy *c) + CODE: + fupg_copy_destroy(aTHX_ c); + MODULE = FU PACKAGE = FU::XMLWriter diff --git a/FU/Benchmarks.pod b/FU/Benchmarks.pod index 2479667..b4b9182 100644 --- a/FU/Benchmarks.pod +++ b/FU/Benchmarks.pod @@ -26,21 +26,25 @@ The following module versions were used: =over -=item L 4.38 +=item L 4.40 -=item L 0.1 +=item L 3.18.0 + +=item L 1.4 =item L 1.08 =item L 4.16 -=item L 1.06 +=item L 1.07 =item L 0.58 -=item L 4.03 +=item L 4.04 -=item L 1.5 +=item L 0.15 + +=item L 1.6 =item L 0.900 @@ -56,266 +60,294 @@ These benchmarks run on large-ish arrays with repeated values. JSON encoding is sufficiently fast that Perl function calling overhead tends to dominate for smaller inputs, but I don't find that overhead very interesting. -Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the -SIMD parts are only used for parsing. +Also worth noting that L formatting code is forked from +L, the SIMD parts are only used for parsing. API object from L documentation. Encode Canonical Decode - JSON::PP 5312/s 5119/s 1290/s - JSON::Tiny 7757/s - 3426/s - Cpanel::JSON::XS 108187/s 101867/s 103575/s - JSON::SIMD 130137/s 118948/s 115123/s - JSON::XS 128421/s 120243/s 117940/s - FU::Util 133182/s 113275/s 118213/s + JSON::PP 5136/s 4943/s 1240/s + JSON::Tiny 7617/s - 3474/s + Cpanel::JSON::XS 108128/s 98734/s 105811/s + JSON::SIMD 125105/s 114822/s 118410/s + JSON::XS 128749/s 117518/s 120190/s + FU::Util 126909/s 109166/s 113983/s Object (small) Encode Canonical Decode - JSON::PP 907/s 829/s 202/s - JSON::Tiny 1224/s - 499/s - Cpanel::JSON::XS 43168/s 28114/s 19229/s - JSON::SIMD 49019/s 30699/s 23267/s - JSON::XS 49814/s 31326/s 25336/s - FU::Util 44110/s 26134/s 21144/s + JSON::PP 896/s 826/s 194/s + JSON::Tiny 1216/s - 519/s + Cpanel::JSON::XS 44184/s 28190/s 19449/s + JSON::SIMD 52633/s 31157/s 23587/s + JSON::XS 50314/s 34276/s 25294/s + FU::Util 42121/s 25618/s 19203/s Object (large) Encode Canonical Decode - JSON::PP 927/s 747/s 104/s - JSON::Tiny 1108/s - 392/s - Cpanel::JSON::XS 29672/s 12637/s 16609/s - JSON::SIMD 24418/s 12388/s 22895/s - JSON::XS 23192/s 13174/s 23553/s - FU::Util 39477/s 13567/s 17178/s + JSON::PP 910/s 734/s 98/s + JSON::Tiny 1068/s - 404/s + Cpanel::JSON::XS 27626/s 12484/s 15333/s + JSON::SIMD 34106/s 12808/s 23674/s + JSON::XS 35738/s 13099/s 22637/s + FU::Util 37663/s 13366/s 16292/s Object (large, mixed unicode) Encode Canonical Decode - JSON::PP 817/s 679/s 86/s - JSON::Tiny 1036/s - 402/s - Cpanel::JSON::XS 20437/s 1345/s 7408/s - JSON::SIMD 25031/s 1331/s 15997/s - JSON::XS 23580/s 1375/s 8526/s - FU::Util 34435/s 11916/s 9419/s + JSON::PP 835/s 664/s 82/s + JSON::Tiny 1028/s - 427/s + Cpanel::JSON::XS 24123/s 1352/s 8694/s + JSON::SIMD 26008/s 1413/s 19707/s + JSON::XS 25444/s 1391/s 10442/s + FU::Util 33132/s 12006/s 11861/s Small integers Encode Decode - JSON::PP 113/s 29/s - JSON::Tiny 160/s 86/s - Cpanel::JSON::XS 7137/s 6083/s - JSON::SIMD 7963/s 4361/s - JSON::XS 7915/s 6058/s - FU::Util 8565/s 5639/s + JSON::PP 116/s 30/s + JSON::Tiny 158/s 86/s + Cpanel::JSON::XS 7426/s 5774/s + JSON::SIMD 8294/s 4375/s + JSON::XS 8526/s 6179/s + FU::Util 7996/s 5962/s Large integers Encode Decode - JSON::PP 2176/s 329/s - JSON::Tiny 2999/s 1638/s - Cpanel::JSON::XS 31302/s 48892/s - JSON::SIMD 37201/s 51719/s - JSON::XS 36722/s 50110/s - FU::Util 116188/s 62110/s + JSON::PP 2213/s 341/s + JSON::Tiny 2910/s 1661/s + Cpanel::JSON::XS 32616/s 53053/s + JSON::SIMD 37749/s 53032/s + JSON::XS 38644/s 55004/s + FU::Util 109930/s 63358/s ASCII strings Encode Decode - JSON::PP 2934/s 336/s - JSON::Tiny 4126/s 1439/s - Cpanel::JSON::XS 116744/s 43489/s - JSON::SIMD 134711/s 50429/s - JSON::XS 135419/s 43976/s - FU::Util 182026/s 44312/s + JSON::PP 2811/s 312/s + JSON::Tiny 3924/s 1506/s + Cpanel::JSON::XS 129468/s 51536/s + JSON::SIMD 140393/s 64499/s + JSON::XS 141149/s 56913/s + FU::Util 165938/s 55034/s Unicode strings Encode Decode - JSON::PP 5113/s 253/s - JSON::Tiny 6603/s 2585/s - Cpanel::JSON::XS 91704/s 64489/s - JSON::SIMD 106928/s 102440/s - JSON::XS 105473/s 60558/s - FU::Util 217135/s 58972/s + JSON::PP 5138/s 248/s + JSON::Tiny 6501/s 2677/s + Cpanel::JSON::XS 91004/s 64101/s + JSON::SIMD 101185/s 80941/s + JSON::XS 106312/s 61104/s + FU::Util 205716/s 52041/s String escaping (few) Encode Decode - JSON::PP 4251/s 352/s - JSON::Tiny 4704/s 1869/s - Cpanel::JSON::XS 131789/s 106306/s - JSON::SIMD 158171/s 153692/s - JSON::XS 157261/s 97676/s - FU::Util 191699/s 91177/s + JSON::PP 4269/s 329/s + JSON::Tiny 4878/s 2101/s + Cpanel::JSON::XS 152958/s 105597/s + JSON::SIMD 165340/s 130074/s + JSON::XS 165863/s 87872/s + FU::Util 228511/s 81599/s String escaping (many) Encode Decode - JSON::PP 2224/s 366/s - JSON::Tiny 2884/s 984/s - Cpanel::JSON::XS 136583/s 100789/s - JSON::SIMD 152951/s 113242/s - JSON::XS 153471/s 106269/s - FU::Util 142604/s 97984/s + JSON::PP 4052/s 573/s + JSON::Tiny 4575/s 2274/s + Cpanel::JSON::XS 201958/s 102800/s + JSON::SIMD 242806/s 146341/s + JSON::XS 209689/s 98420/s + FU::Util 210713/s 100255/s =head2 XML Writing +L is the only XS-based XML DSL that I'm aware of, so all direct +competition is inherently slower by virtue of being pure perl. I'm sure some +templating modules will perform better, though. + HTML fragment - TUWF::XML 795/s - XML::Writer 833/s - HTML::Tiny 423/s - FU::XMLWriter 5285/s + TUWF::XML 787/s + XML::Writer 832/s + HTML::Tiny 403/s + FU::XMLWriter 5192/s + + + +=head2 PostgreSQL client + +Fetching query results is highly unlikely to be a bottleneck in your code, this +benchmark is mainly here to verify that L is not introducing a +bottleneck where there shouldn't be one. + +Fetch and bitwise-or 20k integers + + Smallint Bigint + DBD::Pg 346/s 33/s + Pg::PQ 270/s 24/s + FU::Pg (bin) 476/s 46/s + FU::Pg (text) 273/s 23/s =cut # Cached data used by bench.PL. Same as the formatted tables above but easier to parse. -json/api Canonical Cpanel::JSON::XS 101867 -json/api Canonical FU::Util 113275 -json/api Canonical JSON::PP 5119 -json/api Canonical JSON::SIMD 118948 -json/api Canonical JSON::XS 120243 -json/api Decode Cpanel::JSON::XS 103575 -json/api Decode FU::Util 118213 -json/api Decode JSON::PP 1290 -json/api Decode JSON::SIMD 115123 -json/api Decode JSON::Tiny 3426 -json/api Decode JSON::XS 117940 -json/api Encode Cpanel::JSON::XS 108187 -json/api Encode FU::Util 133182 -json/api Encode JSON::PP 5312 -json/api Encode JSON::SIMD 130137 -json/api Encode JSON::Tiny 7757 -json/api Encode JSON::XS 128421 -json/intl Decode Cpanel::JSON::XS 48892 -json/intl Decode FU::Util 62110 -json/intl Decode JSON::PP 329 -json/intl Decode JSON::SIMD 51719 -json/intl Decode JSON::Tiny 1638 -json/intl Decode JSON::XS 50110 -json/intl Encode Cpanel::JSON::XS 31302 -json/intl Encode FU::Util 116188 -json/intl Encode JSON::PP 2176 -json/intl Encode JSON::SIMD 37201 -json/intl Encode JSON::Tiny 2999 -json/intl Encode JSON::XS 36722 -json/ints Decode Cpanel::JSON::XS 6083 -json/ints Decode FU::Util 5639 -json/ints Decode JSON::PP 29 -json/ints Decode JSON::SIMD 4361 +json/api Canonical Cpanel::JSON::XS 98734 +json/api Canonical FU::Util 109166 +json/api Canonical JSON::PP 4943 +json/api Canonical JSON::SIMD 114822 +json/api Canonical JSON::XS 117518 +json/api Decode Cpanel::JSON::XS 105811 +json/api Decode FU::Util 113983 +json/api Decode JSON::PP 1240 +json/api Decode JSON::SIMD 118410 +json/api Decode JSON::Tiny 3474 +json/api Decode JSON::XS 120190 +json/api Encode Cpanel::JSON::XS 108128 +json/api Encode FU::Util 126909 +json/api Encode JSON::PP 5136 +json/api Encode JSON::SIMD 125105 +json/api Encode JSON::Tiny 7617 +json/api Encode JSON::XS 128749 +json/intl Decode Cpanel::JSON::XS 53053 +json/intl Decode FU::Util 63358 +json/intl Decode JSON::PP 341 +json/intl Decode JSON::SIMD 53032 +json/intl Decode JSON::Tiny 1661 +json/intl Decode JSON::XS 55004 +json/intl Encode Cpanel::JSON::XS 32616 +json/intl Encode FU::Util 109930 +json/intl Encode JSON::PP 2213 +json/intl Encode JSON::SIMD 37749 +json/intl Encode JSON::Tiny 2910 +json/intl Encode JSON::XS 38644 +json/ints Decode Cpanel::JSON::XS 5774 +json/ints Decode FU::Util 5962 +json/ints Decode JSON::PP 30 +json/ints Decode JSON::SIMD 4375 json/ints Decode JSON::Tiny 86 -json/ints Decode JSON::XS 6058 -json/ints Encode Cpanel::JSON::XS 7137 -json/ints Encode FU::Util 8565 -json/ints Encode JSON::PP 113 -json/ints Encode JSON::SIMD 7963 -json/ints Encode JSON::Tiny 160 -json/ints Encode JSON::XS 7915 -json/objl Canonical Cpanel::JSON::XS 12637 -json/objl Canonical FU::Util 13567 -json/objl Canonical JSON::PP 747 -json/objl Canonical JSON::SIMD 12388 -json/objl Canonical JSON::XS 13174 -json/objl Decode Cpanel::JSON::XS 16609 -json/objl Decode FU::Util 17178 -json/objl Decode JSON::PP 104 -json/objl Decode JSON::SIMD 22895 -json/objl Decode JSON::Tiny 392 -json/objl Decode JSON::XS 23553 -json/objl Encode Cpanel::JSON::XS 29672 -json/objl Encode FU::Util 39477 -json/objl Encode JSON::PP 927 -json/objl Encode JSON::SIMD 24418 -json/objl Encode JSON::Tiny 1108 -json/objl Encode JSON::XS 23192 -json/objs Canonical Cpanel::JSON::XS 28114 -json/objs Canonical FU::Util 26134 -json/objs Canonical JSON::PP 829 -json/objs Canonical JSON::SIMD 30699 -json/objs Canonical JSON::XS 31326 -json/objs Decode Cpanel::JSON::XS 19229 -json/objs Decode FU::Util 21144 -json/objs Decode JSON::PP 202 -json/objs Decode JSON::SIMD 23267 -json/objs Decode JSON::Tiny 499 -json/objs Decode JSON::XS 25336 -json/objs Encode Cpanel::JSON::XS 43168 -json/objs Encode FU::Util 44110 -json/objs Encode JSON::PP 907 -json/objs Encode JSON::SIMD 49019 -json/objs Encode JSON::Tiny 1224 -json/objs Encode JSON::XS 49814 -json/obju Canonical Cpanel::JSON::XS 1345 -json/obju Canonical FU::Util 11916 -json/obju Canonical JSON::PP 679 -json/obju Canonical JSON::SIMD 1331 -json/obju Canonical JSON::XS 1375 -json/obju Decode Cpanel::JSON::XS 7408 -json/obju Decode FU::Util 9419 -json/obju Decode JSON::PP 86 -json/obju Decode JSON::SIMD 15997 -json/obju Decode JSON::Tiny 402 -json/obju Decode JSON::XS 8526 -json/obju Encode Cpanel::JSON::XS 20437 -json/obju Encode FU::Util 34435 -json/obju Encode JSON::PP 817 -json/obju Encode JSON::SIMD 25031 -json/obju Encode JSON::Tiny 1036 -json/obju Encode JSON::XS 23580 -json/strel Decode Cpanel::JSON::XS 100789 -json/strel Decode FU::Util 97984 -json/strel Decode JSON::PP 366 -json/strel Decode JSON::SIMD 113242 -json/strel Decode JSON::Tiny 984 -json/strel Decode JSON::XS 106269 -json/strel Encode Cpanel::JSON::XS 136583 -json/strel Encode FU::Util 142604 -json/strel Encode JSON::PP 2224 -json/strel Encode JSON::SIMD 152951 -json/strel Encode JSON::Tiny 2884 -json/strel Encode JSON::XS 153471 -json/stres Decode Cpanel::JSON::XS 106306 -json/stres Decode FU::Util 91177 -json/stres Decode JSON::PP 352 -json/stres Decode JSON::SIMD 153692 -json/stres Decode JSON::Tiny 1869 -json/stres Decode JSON::XS 97676 -json/stres Encode Cpanel::JSON::XS 131789 -json/stres Encode FU::Util 191699 -json/stres Encode JSON::PP 4251 -json/stres Encode JSON::SIMD 158171 -json/stres Encode JSON::Tiny 4704 -json/stres Encode JSON::XS 157261 -json/strs Decode Cpanel::JSON::XS 43489 -json/strs Decode FU::Util 44312 -json/strs Decode JSON::PP 336 -json/strs Decode JSON::SIMD 50429 -json/strs Decode JSON::Tiny 1439 -json/strs Decode JSON::XS 43976 -json/strs Encode Cpanel::JSON::XS 116744 -json/strs Encode FU::Util 182026 -json/strs Encode JSON::PP 2934 -json/strs Encode JSON::SIMD 134711 -json/strs Encode JSON::Tiny 4126 -json/strs Encode JSON::XS 135419 -json/stru Decode Cpanel::JSON::XS 64489 -json/stru Decode FU::Util 58972 -json/stru Decode JSON::PP 253 -json/stru Decode JSON::SIMD 102440 -json/stru Decode JSON::Tiny 2585 -json/stru Decode JSON::XS 60558 -json/stru Encode Cpanel::JSON::XS 91704 -json/stru Encode FU::Util 217135 -json/stru Encode JSON::PP 5113 -json/stru Encode JSON::SIMD 106928 -json/stru Encode JSON::Tiny 6603 -json/stru Encode JSON::XS 105473 -xml/a Rate FU::XMLWriter 5285 -xml/a Rate HTML::Tiny 423 -xml/a Rate TUWF::XML 795 -xml/a Rate XML::Writer 833 +json/ints Decode JSON::XS 6179 +json/ints Encode Cpanel::JSON::XS 7426 +json/ints Encode FU::Util 7996 +json/ints Encode JSON::PP 116 +json/ints Encode JSON::SIMD 8294 +json/ints Encode JSON::Tiny 158 +json/ints Encode JSON::XS 8526 +json/objl Canonical Cpanel::JSON::XS 12484 +json/objl Canonical FU::Util 13366 +json/objl Canonical JSON::PP 734 +json/objl Canonical JSON::SIMD 12808 +json/objl Canonical JSON::XS 13099 +json/objl Decode Cpanel::JSON::XS 15333 +json/objl Decode FU::Util 16292 +json/objl Decode JSON::PP 98 +json/objl Decode JSON::SIMD 23674 +json/objl Decode JSON::Tiny 404 +json/objl Decode JSON::XS 22637 +json/objl Encode Cpanel::JSON::XS 27626 +json/objl Encode FU::Util 37663 +json/objl Encode JSON::PP 910 +json/objl Encode JSON::SIMD 34106 +json/objl Encode JSON::Tiny 1068 +json/objl Encode JSON::XS 35738 +json/objs Canonical Cpanel::JSON::XS 28190 +json/objs Canonical FU::Util 25618 +json/objs Canonical JSON::PP 826 +json/objs Canonical JSON::SIMD 31157 +json/objs Canonical JSON::XS 34276 +json/objs Decode Cpanel::JSON::XS 19449 +json/objs Decode FU::Util 19203 +json/objs Decode JSON::PP 194 +json/objs Decode JSON::SIMD 23587 +json/objs Decode JSON::Tiny 519 +json/objs Decode JSON::XS 25294 +json/objs Encode Cpanel::JSON::XS 44184 +json/objs Encode FU::Util 42121 +json/objs Encode JSON::PP 896 +json/objs Encode JSON::SIMD 52633 +json/objs Encode JSON::Tiny 1216 +json/objs Encode JSON::XS 50314 +json/obju Canonical Cpanel::JSON::XS 1352 +json/obju Canonical FU::Util 12006 +json/obju Canonical JSON::PP 664 +json/obju Canonical JSON::SIMD 1413 +json/obju Canonical JSON::XS 1391 +json/obju Decode Cpanel::JSON::XS 8694 +json/obju Decode FU::Util 11861 +json/obju Decode JSON::PP 82 +json/obju Decode JSON::SIMD 19707 +json/obju Decode JSON::Tiny 427 +json/obju Decode JSON::XS 10442 +json/obju Encode Cpanel::JSON::XS 24123 +json/obju Encode FU::Util 33132 +json/obju Encode JSON::PP 835 +json/obju Encode JSON::SIMD 26008 +json/obju Encode JSON::Tiny 1028 +json/obju Encode JSON::XS 25444 +json/strel Decode Cpanel::JSON::XS 102800 +json/strel Decode FU::Util 100255 +json/strel Decode JSON::PP 573 +json/strel Decode JSON::SIMD 146341 +json/strel Decode JSON::Tiny 2274 +json/strel Decode JSON::XS 98420 +json/strel Encode Cpanel::JSON::XS 201958 +json/strel Encode FU::Util 210713 +json/strel Encode JSON::PP 4052 +json/strel Encode JSON::SIMD 242806 +json/strel Encode JSON::Tiny 4575 +json/strel Encode JSON::XS 209689 +json/stres Decode Cpanel::JSON::XS 105597 +json/stres Decode FU::Util 81599 +json/stres Decode JSON::PP 329 +json/stres Decode JSON::SIMD 130074 +json/stres Decode JSON::Tiny 2101 +json/stres Decode JSON::XS 87872 +json/stres Encode Cpanel::JSON::XS 152958 +json/stres Encode FU::Util 228511 +json/stres Encode JSON::PP 4269 +json/stres Encode JSON::SIMD 165340 +json/stres Encode JSON::Tiny 4878 +json/stres Encode JSON::XS 165863 +json/strs Decode Cpanel::JSON::XS 51536 +json/strs Decode FU::Util 55034 +json/strs Decode JSON::PP 312 +json/strs Decode JSON::SIMD 64499 +json/strs Decode JSON::Tiny 1506 +json/strs Decode JSON::XS 56913 +json/strs Encode Cpanel::JSON::XS 129468 +json/strs Encode FU::Util 165938 +json/strs Encode JSON::PP 2811 +json/strs Encode JSON::SIMD 140393 +json/strs Encode JSON::Tiny 3924 +json/strs Encode JSON::XS 141149 +json/stru Decode Cpanel::JSON::XS 64101 +json/stru Decode FU::Util 52041 +json/stru Decode JSON::PP 248 +json/stru Decode JSON::SIMD 80941 +json/stru Decode JSON::Tiny 2677 +json/stru Decode JSON::XS 61104 +json/stru Encode Cpanel::JSON::XS 91004 +json/stru Encode FU::Util 205716 +json/stru Encode JSON::PP 5138 +json/stru Encode JSON::SIMD 101185 +json/stru Encode JSON::Tiny 6501 +json/stru Encode JSON::XS 106312 +pg/ints Bigint DBD::Pg 33 +pg/ints Bigint FU::Pg (bin) 46 +pg/ints Bigint FU::Pg (text) 23 +pg/ints Bigint Pg::PQ 24 +pg/ints Smallint DBD::Pg 346 +pg/ints Smallint FU::Pg (bin) 476 +pg/ints Smallint FU::Pg (text) 273 +pg/ints Smallint Pg::PQ 270 +xml/a Rate FU::XMLWriter 5192 +xml/a Rate HTML::Tiny 403 +xml/a Rate TUWF::XML 787 +xml/a Rate XML::Writer 832 diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index 1fc09cd..4fd2a26 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -1,10 +1,11 @@ # Internal module used by FU.pm -package FU::DebugImpl 0.2; +package FU::DebugImpl 1.4; use v5.36; +use utf8; use experimental 'for_list'; use FU; use FU::XMLWriter ':html5_', 'fragment', 'xml_escape'; -use Time::HiRes 'time'; +use Time::HiRes 'time', 'clock_gettime', 'CLOCK_MONOTONIC'; use POSIX 'strftime'; sub fmtts { strftime '%Y-%m-%d %H:%M:%S UTC', gmtime shift } @@ -15,27 +16,33 @@ sub loc_($loc) { br_ if $_; my $l = $loc->[$_]; my $f = $_ == $#$loc ? '(main)' : $loc->[$_+1][3]; - txt_ "$l->[1]:$l->[2] $f"; + $f = "$l->[0]::$f" if $f !~ /^\Q$l->[0]/; + txt_ $f; + small_ " @ $l->[1]:$l->[2]"; } } -sub fmtpre_($code) { - lit_ xml_escape($code) =~ s/^\s+//r =~ s/\s+$//r =~ s/\n/
/rg; -} - sub clean_re($str) { # Regex formatting isn't stable, but this cleans up the crap I'm seeing a little bit. "$str" =~ s/^\(\?\^u:\^\(\?\^u://r =~ s/\)\$\)$//r; } -my @tabs = ( +sub raw_data($str) { + my $d = substr $str, 0, 32*1024; + my $trunc = length $str > 32*1024 ? ', truncated' : ''; + return utf8::decode($d) ? ("utf8$trunc", $d) + : ("hex$trunc", unpack('H*', $d) =~ s/(.{128})/$1\n/rg =~ s/(.{16})/$1 /rg); +} + +my @sections = ( req => sub { + my $r = $FU::REQ; table_ sub { tr_ sub { td_ 'Method'; td_ fu->method }; tr_ sub { td_ 'Path'; td_ fu->path }; tr_ sub { td_ 'Query'; td_ fu->query }; tr_ sub { td_ 'Client IP'; td_ fu->ip }; - tr_ sub { td_ 'Received'; td_ fmtts $FU::REQ->{trace_start} }; + tr_ sub { td_ 'Received'; td_ fmtts(time - (($r->{trace_end}||clock_gettime(CLOCK_MONOTONIC)) - $r->{trace_start})) }; }; h2_ 'Headers'; table_ sub { @@ -44,7 +51,38 @@ my @tabs = ( td_ fu->headers->{$_}; } for sort keys fu->headers->%*; }; - # TODO: Body? Certainly useful for JSON + if ((fu->header('content-length')||0) > 0) { + h2_ 'Body'; + section_ class => 'tabs', sub { + my $json = eval { fu->json({type=>'any'}) }; + details_ name => 'reqbody', open => !0, sub { + summary_ 'JSON'; + pre_ FU::Util::json_format($json, pretty => 1, canonical => 1); + } if $json; + my $formdata = eval { fu->formdata({type=>'hash'}) }; + details_ name => 'reqbody', open => !0, sub { + summary_ 'Form data'; + table_ sub { + for my $k (sort keys %$formdata) { + tr_ sub { + td_ $k; + td_ $_; + } for ref $formdata->{$k} ? $formdata->{$k}->@* : ($formdata->{$k}); + } + }; + } if $formdata; + my $multipart = eval { fu->multipart }; + details_ name => 'reqbody', open => !0, sub { + summary_ 'Multipart'; + pre_ join "\n", map $_->describe, @$multipart; + } if $multipart; + details_ name => 'reqbody', open => !0,sub { + my($lbl, $data) = raw_data $r->{body}; + summary_ "Raw ($lbl)"; + pre_ $data; + }; + } + } ('Request') }, @@ -75,37 +113,111 @@ my @tabs = ( }; h2_ 'Headers'; table_ sub { - tr_ sub { - td_ $_; - td_ $r->{reshdr}{$_}; - } for sort keys $r->{reshdr}->%*; + for my $k (sort keys $r->{reshdr}->%*) { + my $v = $r->{reshdr}{$k}; + tr_ sub { + td_ $k; + td_ $_; + } for !defined $v ? () : ref $v ? @$v : ($v); + } }; + my $body = $r->{resbody_orig} // $r->{resbody}; + if (length $body) { + h2_ 'Body'; + section_ class => 'tabs', sub { + my $json = ($r->{reshdr}{'content-type'}||'') =~ /^application\/json/ && eval { FU::Util::json_parse($body, utf8 => 1) }; + details_ name => 'resbody', open => !0, sub { + summary_ 'JSON'; + pre_ FU::Util::json_format($json, pretty => 1, canonical => 1); + } if $json; + details_ name => 'resbody', open => !0,sub { + my($lbl, $data) = raw_data $body; + summary_ "Raw ($lbl)"; + pre_ $data; + }; + } + } ('Response') }, sql => sub { - return () if !$FU::REQ->{trace_sql}; - table_ sub { + my $queries = $FU::REQ->{trace_sql}; + return () if !$queries; + + # Convert binary params to text. + # For queries with text_params, assume the params are already valid for the text format. + my @binparams = grep $_->{type} && !exists $_->{text}, map $_->{params}->@*, @$queries; + my @arg = map +($_->{type}, $_->{bin}), @binparams; + my @text; + my $ok = !@arg || eval { @text = $FU::DB->bin2text(@arg); 1 }; + $binparams[$_]{text} = $text[$_] for 0..$#text; + pre_ "Error converting binary parameters:\n$@" if !$ok; + + input_ type => 'checkbox', id => "row${_}_c" for 0..$#{$queries}; + table_ class => 'sqlt', sub { thead_ sub { tr_ sub { td_ class => 'num', 'Exec'; td_ class => 'num', 'Prep'; td_ class => 'num', 'Rows'; td_ 'Query'; } }; + my $rows = 0; + for my($i, $st) (builtin::indexed $queries->@*) { + $rows += $st->{nrows}; + tr_ sub { + td_ class => 'num', sprintf '%.1f ms', $st->{exec_time}*1000; + td_ class => 'num', !defined $st->{prepare_time} ? '-' : $st->{prepare_time} ? sprintf '%.1f ms', $st->{prepare_time}*1000 : 'cache'; + td_ class => 'num', $st->{nrows}; + td_ class => 'sum', sub { + label_ for => "row${i}_c", sub { + span_ class => 'closed', '▶'; + span_ class => 'open', '▼'; + txt_ $st->{query} =~ s/[\r\n]/ /rg =~ s/\s\s+/ /rg =~ s/^\s+//r; + }; + }; + }; + tr_ class => 'details', id => "row$i", sub { + td_ ''; + td_ colspan => 3, sub { + pre_ $st->{query}; + if ($st->{params}->@*) { + strong_ 'Parameters:'; + table_ sub { + tr_ sub { + td_ class => 'num', sprintf '$%d =', $_+1; + td_ class => 'code', sub { + my $p = $st->{params}[$_]{text}; + !defined $p ? em_ 'null' : txt_ $p; + }; + } for (0..$#{$st->{params}}); + }; + # XXX: Buggy when the query contains string literals with $n variables. + strong_ 'Interpolated:'; + pre_ $st->{query} =~ s{\$([1-9][0-9]*)}{ + my $v = $st->{params}[$1-1]{text}; + defined $v ? $FU::DB->escape_literal($v) : 'NULL' + }egr; + } + }; + }; + } tr_ sub { - td_ class => 'num', sprintf '%.1f ms', $_->{exec_time}*1000; - td_ class => 'num', !defined $_->{prepare_time} ? '-' : $_->{prepare_time} ? sprintf '%.1f ms', $_->{prepare_time}*1000 : 'cache'; - td_ class => 'num', $_->{nrows}; - td_ class => 'code', sub { fmtpre_ $_->{query} }; - # TODO: Params, both separate and interpolated - } for $FU::REQ->{trace_sql}->@*; + td_ class => 'num', sprintf '%.1f ms', $FU::REQ->{trace_sqlexec}*1000; + td_ class => 'num', !defined $FU::REQ->{trace_sqlprep} ? '-' : sprintf '%.1f ms', $FU::REQ->{trace_sqlprep}*1000; + td_ class => 'num', $rows; + td_ class => 'sum', 'total'; + } if @$queries > 1; }; - ('Queries', scalar $FU::REQ->{trace_sql}->@*) + ('Queries', scalar @$queries) }, fu => sub { return () if !keys fu->%*; - # TODO: Contents of the 'fu' object + # TODO: This is kinda lazy, an expandable table might be nicer. + require Data::Dumper; + pre_ sub { + lit_ Data::Dumper->new([fu])->Sortkeys(1)->Terse(1)->Dump; + }; ('fu obj') }, @@ -171,7 +283,7 @@ my @tabs = ( pgst => sub { return () if !$FU::DB; - my $lst = eval { $FU::DB->q( + my $lst = eval { $FU::DB->sql( 'SELECT generic_plans + custom_plans, statement FROM pg_prepared_statements ORDER BY generic_plans + custom_plans DESC, statement' )->cache(0)->alla } || do { warn "Unable to collect prepared statement list: $@"; return () }; return () if !@$lst; @@ -182,19 +294,20 @@ my @tabs = ( } }; tr_ sub { td_ $_->[0]; - td_ class => 'code', sub { fmtpre_ $_->[1] }; + td_ class => 'code', $_->[1]; } for @$lst; }; - ('Prepared statements', scalar @$lst) + ('Prepared stmts', scalar @$lst) }, ); sub collect { my @t; - for my ($id, $sub) (@tabs) { + for my ($id, $sub) (@sections) { my($title, $num); my $html = fragment { ($title, $num) = $sub->() }; + utf8::decode($html); push @t, { id => $id, title => $title, num => $num, html => $html } if $title; } \@t @@ -206,47 +319,9 @@ sub framework_($data) { head_ sub { title_ 'FU Debugging Interface'; meta_ name => 'viewport', content => 'width=device-width, initial-scale=1.0, user-scalable=yes'; + link_ rel => 'stylesheet', type => 'text/css', media => 'all', href => '?css'; style_ type => 'text/css', <<~_; - html { box-sizing: border-box; color: #000; background: #fff } - *, *:before, *:after { box-sizing: inherit } - * { margin: 0; padding: 0; font: inherit; color: inherit } - - body { display: grid; grid: 45px 400px / 220px auto; } - header { grid-column: 1 / 3; grid-row: 1 / 2 } - nav { grid-column: 1 / 2; grid-row: 2 / 3 } - main { grid-column: 2 / 3; grid-row: 2 / 3 } - - header, nav { background: #eee } - main { border-top: 2px solid #009; border-left: 2px solid #009 } - nav { border-bottom: 2px solid #009 } - - header { display: flex; justify-content: space-between; padding: 10px } - header h1 { font-size: 20px; font-weight: bold } - header menu { list-style-type: none; display: flex; gap: 15px } - - body > input { display: none } - nav { padding-top: 20px } - nav menu { list-style-type: none } - nav label { display: block; width: 100%; padding: 2px 10px; cursor: pointer; white-space: nowrap } - nav label:hover { background-color: #fff } - nav label span { float: right; font-size: 80% } - - main { padding: 10px 20px } - main h2 { margin: 30px 0 5px -10px; font-size: 20px; font-weight: bold } - main h2:first-child { margin-top: 0 } - - p, pre, table { margin: 5px 0 } - pre, .code { font-family: monospace; white-space: pre } - table { border-collapse: collapse } - td { padding: 1px 10px 1px 0; font-size: 12px; vertical-align: top } - tr:hover { background-color: #eee } - thead { font-weight: bold } - .num { text-align: right; white-space: nowrap } _ - style_ type => 'text/css', join "\n", map +( - "#tab_$_:checked ~ nav menu li label[for=tab_$_] { background-color: #fff }", - "#tab_$_:not(:checked) ~ main #tabc_$_ { display: none }", - ), map $_->{id}, @$data; }; body_ sub { header_ sub { @@ -257,22 +332,21 @@ sub framework_($data) { li_ sub { a_ href => '?', 'Listing' }; }; }; - input_ type => 'radio', name => 'tab', id => "tab_$_->{id}", checked => $_ eq $data->[0] ? 'checked' : undef for @$data; nav_ sub { menu_ sub { li_ sub { - label_ for => "tab_$_->{id}", sub { + a_ href => "#$_->{id}", sub { txt_ $_->{title}; span_ $_->{num} if defined $_->{num}; - } + }; } for @$data; }; } if @$data; main_ sub { - div_ id => "tabc_$_->{id}", sub { - h2_ $_->{title}; + for (@$data) { + h1_ id => $_->{id}, $_->{title}; lit_ $_->{html}; - } for @$data; + } }; }; }; @@ -313,10 +387,23 @@ sub load($id) { fu->set_body(scalar <$fn>); } +sub css { + # Awful CSS row hiding hack. I'm not sorry. + state $css = join '', , map qq{ + #row${_}_c:checked ~ * label[for=row${_}_c] .closed { display: none } + #row${_}_c:not(:checked) ~ * label[for=row${_}_c] .open { display: none } + #row${_}_c:not(:checked) ~ * #row${_} { display: none } + }, 0..1000; +} + sub render { my $q = fu->query; if (!$q) { fu->set_body(framework_ [{id => 'lst', title => 'Recent Requests', html => fragment \&listing_ }]); + } elsif ($q eq 'css') { + fu->set_header('content-type', 'text/css'); + fu->set_header('cache-control', 'max-age=86400'); + fu->set_body(css()); } elsif ($q eq 'cur') { fu->set_body(framework_ collect); } elsif ($q eq 'last') { @@ -342,7 +429,7 @@ sub save { return; }; my $line = sprintf "%d %f %s %s %s\n", - time, time - $FU::REQ->{trace_start}, $FU::REQ->{status}, + time, $FU::REQ->{trace_end} - $FU::REQ->{trace_start}, $FU::REQ->{status}, fu->method, fu->path.(fu->query?'?'.fu->query:''); utf8::encode($line); print $fh $line; @@ -350,3 +437,62 @@ sub save { } 1; + +__DATA__ +html { box-sizing: border-box; color: #000; background: #fff } +*, *:before, *:after { box-sizing: inherit } +* { margin: 0; padding: 0; font: inherit; color: inherit } + +/* Ugh, fixed positioning */ +header { position: fixed; top: 0; left: 0; width: 100%; height: 40px; z-index: 2 } +nav { position: fixed; top: 38px; left: 0; width: 200px; z-index: 2 } +main { margin: 0 0 0 200px } + +header, nav { background: #eee } +header { border-bottom: 2px solid #009 } +nav { border-bottom: 2px solid #009; border-right: 2px solid #009 } + +header { display: flex; justify-content: space-between; align-items: baseline; padding: 5px 10px } +header h1 { font-size: 120%; font-weight: bold } +header menu { list-style-type: none; display: flex; gap: 15px } + +body > input { display: none } +nav { padding-top: 20px } +nav menu { list-style-type: none } +nav a { display: block; width: 100%; text-decoration: none; padding: 2px 10px; cursor: pointer; white-space: nowrap } +nav a:hover { background-color: #fff } +nav a span { float: right; font-size: 80% } + +main { padding: 0 10px 30px 10px } +main h1 { background: #eee; padding: 5px 10px 5px 205px; margin: 40px -10px 10px -210px; scroll-margin-top: 40px; font-size: 130%; font-weight: bold } +main h2 { margin: 20px 0 5px 0; font-size: 120%; font-weight: bold } + +p, table, pre { margin: 5px 0 } +pre { border-left: 2px dotted #999; padding-left: 5px; font-family: monospace; white-space: pre; overflow-x: auto; padding-bottom: 15px; /* for the scrollbar, kinda browser-specific */ } +table { border-collapse: collapse } +td { padding: 1px 10px 1px 0; font-size: 12px; vertical-align: top } +td.code { font-family: monospace } +tr:hover { background-color: #eee } +thead { font-weight: bold } +.num { text-align: right; white-space: nowrap } + +section.tabs { position: relative; display: flex; flex-wrap: wrap; z-index: 1; } +section.tabs summary { cursor: pointer; order: 0; display: block; padding: 3px 5px; margin-right: 10px; background: #ddd } +section.tabs summary:hover, section.tabs details[open] summary { background: #eee } +section.tabs details { display: contents } +section.tabs details *:nth-child(2) { order: 1; width: 100% } + +.sqlt { width: 100%; table-layout: fixed } +.sqlt .num { width: 50px } +.sqlt .num:first-child { width: 75px } +.sqlt .num:nth-child(2) { width: 60px } +.sqlt .sum { white-space: nowrap; font-family: monospace; overflow: hidden; text-overflow: ellipsis } +.sqlt label { cursor: pointer } +.sqlt label span { color: #555; display: inline-block; width: 15px } +.sqlt tr.details { background: #fff } +.sqlt tr.details > td { padding-bottom: 10px } +input[id^=row] { display: none } + +small { color: #555; font-size: 90% } +em { font-style: italic } +strong { font-weight: bold } diff --git a/FU/Log.pm b/FU/Log.pm index 17f809c..44f881c 100644 --- a/FU/Log.pm +++ b/FU/Log.pm @@ -1,4 +1,4 @@ -package FU::Log 0.2; +package FU::Log 1.4; use v5.36; use Exporter 'import'; use POSIX 'strftime'; @@ -65,11 +65,6 @@ __END__ FU::Log - Extremely Basic Process-Wide Logging Infrastructure -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSIS use FU::Log 'log_write'; @@ -89,7 +84,7 @@ interface either; the entire point of this module is that it only handles process-global logging. This module mainly exists for users of the L framework. -=head2 Configuration +=head1 Configuration =over @@ -119,7 +114,7 @@ is then used instead. This is to avoid recursion. =back -=head2 Exportable function +=head1 Exportable function =over diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm new file mode 100644 index 0000000..46c6a6d --- /dev/null +++ b/FU/MultipartFormData.pm @@ -0,0 +1,210 @@ +package FU::MultipartFormData 1.4; +use v5.36; +use Carp 'confess'; +use FU::Util 'utf8_decode'; + +sub _arg($d) { $d =~ s{^"(.+)"$}{$1 =~ s/\\([\\"])/$1/rg}er } + +sub parse($pkg, $header, $data) { + confess "Invalid multipart header '$header'" + if $header !~ m{^multipart/form-data\s*;\s*boundary\s*=(.+)$}; + my $boundary = _arg $1; + confess "Invalid multipart boundary '$boundary'" if $boundary !~ /^[\x21-\x7e]+$/; + utf8::encode($boundary); + + my @a; + while ($data =~ m{--\Q$boundary\E(?:--\r\n|\r\n((?:.+\r\n)+)\r\n)}xg) { + my $hdrs = $1; + $a[$#a]{length} = $-[0] - 2 - $a[$#a]{start} if @a; + if (!$hdrs) { + confess "Trailing garbage" if pos $data != length $data; + last; + } + + my $d = bless { + data => $data, + start => pos $data, + }, $pkg; + + confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data(.+)/i; + my $v = $1; + my $pvalue = qr/("(?:\\[\\"]|[^\\"\r\n]+)*"|[^\s;"]*)/; + confess "Missing 'name' parameter" if $v !~ /;\s*name\s*=\s*$pvalue/; + $d->{name} = utf8_decode _arg $1; + $d->{filename} = utf8_decode _arg $1 if $v =~ /;\s*filename\s*=\s*$pvalue/; + + if ($hdrs =~ /content-type:\s*$pvalue(?:\s*;\s*charset\s*=\s*$pvalue)?/i) { + $d->{mime} = utf8_decode _arg $1; + $d->{charset} = utf8_decode _arg $2 if $2; + } + push @a, $d; + } + confess "Missing end-of-data marker" if @a && !defined $a[$#a]{length}; + \@a +} + +sub name { $_[0]{name} } +sub filename { $_[0]{filename} } +sub mime { $_[0]{mime} } +sub charset { $_[0]{charset} } +sub length { $_[0]{length} } + +sub substr($o,$off,$len=undef) { + $off += $o->{length} if $off < 0; + $off = 0 if $off < 0; + $off = $o->{length} if $off > $o->{length}; + + $len //= $o->{length} - $off; + $len += $o->{length} - 1 if $len < 0; + $len = 0 if $len < 0; + $len = $o->{length} - $off if $len > $o->{length} - $off; + + substr $o->{data}, $o->{start} + $off, $len; +} + +sub data { $_[0]->substr(0) } +sub value { utf8_decode $_[0]->data } + +sub syswrite($o, $fh) { + my $off = $o->{start}; + my $end = $o->{start} + $o->{length}; + while ($off < $end) { + my $r = syswrite $fh, $o->{data}, $end-$off, $off; + return if !defined $r; + $off += $r; + } + $o->{length}; +} + +sub save($o, $fn) { + open my $F, '>', $fn or confess "Error opening '$fn': $!"; + defined $o->syswrite($F) or confess "Error writing to '$fn': $!"; +} + +sub describe($o) { + my $head = eval { utf8_decode $o->substr(0, 100) }; + if (defined $head && $head =~ /\n/) { + ($head) = split /\n/, $head, 2; + $head .= '...'; + } elsif (defined $head && $o->{length} > 100) { + $head .= '...'; + } + $o->{name}.': '.join ' ', + $o->{filename} ? "filename=$o->{filename}" : (), + $o->{mime} ? "mime=$o->{mime}" : (), + $o->{charset} ? "charset=$o->{charset}" : (), + "length=$o->{length}", + defined $head ? "value=$head" : (); +} + +1; +__END__ + +=head1 NAME + +FU::MultipartFormData - Parse multipart/form-data + +=head1 SYNOPSIS + + my $fields = FU::MultipartFormData->parse($content_type_header, $request_body); + + for my $f (@$fields) { + print "%s %d\n", $f->name, $f->length; + + $f->save('file.png') if $f->name eq 'image'; + } + +=head1 DESCRIPTION + +This is a tiny module to parse an HTTP request body encoded as +C, which is typically used to handle file uploads. + +The entire request body is assumed to be in memory as a Perl string, but this +module makes an attempt to avoid any further copies of data values. + +=head1 Parsing + +=over + +=item FU::MultipartFormData->parse($header, $body) + +Returns an array of field objects from the given C<$header>, which must be a +valid value for the C request header, and the given C<$body>, +which must hold the request body as a byte string. An error is thrown if the +header is not valid or parsing failed. + +This module is pretty lousy and does not fully comform to any HTTP standards, +but it does happen to be able to parse POST data from any browser that I've +tried. + +=back + +=head1 Field Object + +Each field is parsed into a field object that supports the following methods: + +=over + +=item name + +Returns the field name as a Perl Unicode string. + +=item filename + +Returns the filename as a Perl Unicode string, or C if no filename was +provided. + +=item mime + +Returns the mime type extracted from the field's C header, or +C if none was present. + +=item charset + +Returns the charset extracted from the field's C header, or +C if none was present. + +=item length + +Returns the byte length of the field value. + +=item data + +Returns a copy of the field value as a byte string. You'll want to avoid using +this on large fields. + +=item value + +Returns a copy of the field value as a Unicode string. + +=item substr($off, $len) + +Equivalent to calling C on the string returned by C, but avoids +a copy of the entire field value. + +=item syswrite($fh) + +Write the field value to C<$fh> using Perl's C, returns C on +error or the number of bytes written on success. + +Can be used to write uploaded file data to a file or send it over a socket or +pipe, without making a full in-memory copy of the data. + +=item save($fn) + +Save the field value to the file C<$fn>, throws an error on failure. + +=item describe + +Returns a human-readable string to describe this field. Mainly for debugging +purposes, the exact format is subject to change. + +=back + +=head1 COPYRIGHT + +MIT. + +=head1 AUTHOR + +Yorhel diff --git a/FU/Pg.pm b/FU/Pg.pm index 175bba3..465d076 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -1,4 +1,4 @@ -package FU::Pg 0.2; +package FU::Pg 1.4; use v5.36; use FU::XS; @@ -7,11 +7,15 @@ _load_libpq(); package FU::Pg::conn { sub lib_version { FU::Pg::lib_version() } - sub Q { + sub SQL { require FU::SQL; my $s = shift; - my($sql, $params) = FU::SQL::SQL(@_)->compile(placeholder_style => 'pg', in_style => 'pg'); - $s->q($sql, @$params); + my($sql, $params) = FU::SQL::SQL(@_)->compile( + placeholder_style => 'pg', + in_style => 'pg', + quote_identifier => sub { $s->conn->escape_identifier(@_) }, + ); + $s->sql($sql, @$params); } sub set_type($s, $n, @arg) { @@ -22,7 +26,13 @@ package FU::Pg::conn { } }; -*FU::Pg::txn::Q = \*FU::Pg::conn::Q; +*FU::Pg::txn::SQL = \*FU::Pg::conn::SQL; + +# Compat +*FU::Pg::conn::q = \*FU::Pg::conn::sql; +*FU::Pg::txn::q = \*FU::Pg::txn::sql; +*FU::Pg::conn::Q = \*FU::Pg::conn::SQL; +*FU::Pg::txn::Q = \*FU::Pg::txn::SQL; package FU::Pg::error { use overload '""' => sub($e, @) { $e->{full_message} }; @@ -35,11 +45,6 @@ __END__ FU::Pg - The Ultimate (synchronous) Interface to PostgreSQL -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSYS use FU::Pg; @@ -48,10 +53,10 @@ changes, see the main L module for details. $conn->exec('CREATE TABLE books (id SERIAL, title text, read bool)'); - $conn->q('INSERT INTO books (title) VALUES ($1)', 'Revelation Space')->exec; - $conn->q('INSERT INTO books (title) VALUES ($1)', 'The Invincible')->exec; + $conn->sql('INSERT INTO books (title) VALUES ($1)', 'Revelation Space')->exec; + $conn->sql('INSERT INTO books (title) VALUES ($1)', 'The Invincible')->exec; - for my ($id, $title) ($conn->q('SELECT * FROM books')->flat->@*) { + for my ($id, $title) ($conn->sql('SELECT * FROM books')->flat->@*) { print "$id: $title\n"; } @@ -61,7 +66,7 @@ FU::Pg is a client module for PostgreSQL with a convenient high-level API and support for flexible and complex type conversions. This module interfaces directly with C. -=head2 Connection setup +=head1 Connection setup =over @@ -72,7 +77,7 @@ C<$string> can either be in key=value format or a URI, refer to L for the full list of supported formats and options. You may also pass an empty -string and leave the configuration up L. =item $conn->server_version @@ -112,12 +117,28 @@ Inside a transaction that is in an error state. The transaction must be rolled back in order to recover to a usable state. This happens automatically when the transaction object goes out of scope. +=item active + +Currently executing a query. This state can only be observed during a L. + =item bad Connection is dead or otherwise unusable. =back +=item $conn->escape_literal($str) + +Return an escaped version of C<$str> suitable for use as a string literal in an +SQL statement. You'll rarely need this, it's often better to pass data as bind +parameters instead. + +=item $conn->escape_identifier($str) + +Return an escaped version of C<$str> suitable for use as an identifier (name of +a table, column, function, etc) in an SQL statement. + =item $conn->cache($enable) =item $conn->text_params($enable) @@ -126,7 +147,7 @@ Connection is dead or otherwise unusable. =item $conn->text($enable) -Set the default settings for new statements created with B<< $conn->q() >>. +Set the default settings for new statements created with B<< $conn->sql() >>. =item $conn->cache_size($num) @@ -154,11 +175,12 @@ Also worth noting that the subroutine is called from the context of the code executing the query, but I the query results have been returned. The subroutine is (currently) only called for queries executed through C<< -$conn->exec >>, C<< $conn->q >>, C<< $conn->Q >> and their C<$txn> variants; -internal queries performed by this module (such as for transaction management, -querying type information, etc) do not trigger the callback. Statements that -result in an error being thrown during or before execution are also not -traceable this way. This behavior might change in the future. +$conn->exec >>, C<< $conn->sql >>, C<< $conn->SQL >> and their C<$txn> variants; +C<< $conn->copy >> statements and internal queries performed by this module +(such as for transaction management, querying type information, etc) do not +trigger the callback. Statements that result in an error being thrown during or +before execution are also not traceable this way. This behavior might change in +the future. =item $conn->disconnect @@ -167,7 +189,7 @@ attempts to use C<$conn> throw an error. =back -=head2 Querying +=head1 Querying =over @@ -177,7 +199,7 @@ Execute one or more SQL commands, separated by a semicolon. Returns the number of rows affected by the last statement or I if that information is not available for the given command (like with C). -=item $conn->q($sql, @params) +=item $conn->sql($sql, @params) Create a new SQL statement with the given C<$sql> string and an optional list of bind parameters. C<$sql> can only hold a single statement. @@ -193,14 +215,15 @@ Note that this method just creates a statement object, the query is not prepared or executed until the appropriate statement methods (see below) are used. -=item $conn->Q(@args) +=item $conn->SQL(@args) -Same as C<< $conn->q() >> but uses L to construct the query and bind -parameters. +Same as C<< $conn->sql() >> but uses L to construct the query and bind +parameters. Uses the 'pg' C and C<< $conn->escape_identifier() >> for +identifier quoting. =back -Statement objects returned by C<< $conn->q() >> support the following +Statement objects returned by C<< $conn->sql() >> support the following configuration parameters, which can be set before the statement is executed: =over @@ -235,7 +258,7 @@ depending on how you'd like to obtain the results: Execute the query and return the number of rows affected. Similar to C<< $conn->exec >>. - my $v = $conn->q('UPDATE books SET read = true WHERE id = 1')->exec; + my $v = $conn->sql('UPDATE books SET read = true WHERE id = 1')->exec; # $v = 1 =item $st->val @@ -244,7 +267,7 @@ Return the first column of the first row. Throws an error if the query does not return exactly one column, or if multiple rows are returned. Returns I if no rows are returned or if its value is I. - my $v = $conn->q('SELECT COUNT(*) FROM books')->val; + my $v = $conn->sql('SELECT COUNT(*) FROM books')->val; # $v = 2 =item $st->rowl @@ -252,7 +275,7 @@ if no rows are returned or if its value is I. Return the first row as a list, or an empty list if no rows are returned. Throws an error if the query returned more than one row. - my($id, $title) = $conn->q('SELECT id, title FROM books LIMIT 1')->rowl; + my($id, $title) = $conn->sql('SELECT id, title FROM books LIMIT 1')->rowl; # ($id, $title) = (1, 'Revelation Space'); =item $st->rowa @@ -261,7 +284,7 @@ Return the first row as an arrayref, equivalent to C<< [$st->rowl] >> but might be slightly more efficient. Returns C if the query did not generate any rows. - my $row = $conn->q('SELECT id, title FROM books LIMIT 1')->rowa; + my $row = $conn->sql('SELECT id, title FROM books LIMIT 1')->rowa; # $row = [1, 'Revelation Space']; =item $st->rowh @@ -270,14 +293,14 @@ Return the first row as a hashref. Returns C if the query did not generate any rows. Throws an error if the query returns multiple columns with the same name. - my $row = $conn->q('SELECT id, title FROM books LIMIT 1')->rowh; + my $row = $conn->sql('SELECT id, title FROM books LIMIT 1')->rowh; # $row = { id => 1, title => 'Revelation Space' }; =item $st->alla Return all rows as an arrayref of arrayrefs. - my $data = $conn->q('SELECT id, title FROM books')->alla; + my $data = $conn->sql('SELECT id, title FROM books')->alla; # $data = [ # [ 1, 'Revelation Space' ], # [ 2, 'The Invincible' ], @@ -288,7 +311,7 @@ Return all rows as an arrayref of arrayrefs. Return all rows as an arrayref of hashrefs. Throws an error if the query returns multiple columns with the same name. - my $data = $conn->q('SELECT id, title FROM books')->allh; + my $data = $conn->sql('SELECT id, title FROM books')->allh; # $data = [ # { id => 1, title => 'Revelation Space' }, # { id => 2, title => 'The Invincible' }, @@ -298,7 +321,7 @@ returns multiple columns with the same name. Return an arrayref with all rows flattened. - my $data = $conn->q('SELECT id, title FROM books')->flat; + my $data = $conn->sql('SELECT id, title FROM books')->flat; # $data = [ # 1, 'Revelation Space', # 2, 'The Invincible', @@ -310,7 +333,7 @@ Return a hashref where the first result column is used as key and the second column as value. If the query only returns a single column, C is used as value instead. An error is thrown if the query returns 3 or more columns. - my $data = $conn->q('SELECT id, title FROM books')->kvv; + my $data = $conn->sql('SELECT id, title FROM books')->kvv; # $data = { # 1 => 'Revelation Space', # 2 => 'The Invincible', @@ -321,7 +344,7 @@ value instead. An error is thrown if the query returns 3 or more columns. Return a hashref where the first result column is used as key and the remaining columns are stored as arrayref. - my $data = $conn->q('SELECT id, title, read FROM books')->kva; + my $data = $conn->sql('SELECT id, title, read FROM books')->kva; # $data = { # 1 => [ 'Revelation Space', true ], # 2 => [ 'The Invincible', false ], @@ -332,7 +355,7 @@ columns are stored as arrayref. Return a hashref where the first result column is used as key and the remaining columns are stored as hashref. - my $data = $conn->q('SELECT id, title, read FROM books')->kvh; + my $data = $conn->sql('SELECT id, title, read FROM books')->kvh; # $data = { # 1 => { title => 'Revelation Space', read => true }, # 2 => { title => 'The Invincible', read => false }, @@ -344,7 +367,7 @@ The only time you actually need to assign a statement object to a variable is when you want to inspect the statement using one of the methods below, in all other cases you can chain the methods for more concise code. For example: - my $data = $conn->q('SELECT a, b FROM table')->cache(0)->text->alla; + my $data = $conn->sql('SELECT a, b FROM table')->cache(0)->text->alla; Statement objects can be inspected with the following methods (many of which only make sense after the query has been executed): @@ -364,10 +387,10 @@ Returns the provided bind parameters as an arrayref. Returns an arrayref of integers indicating the type (as I) of each parameter in the given C<$sql> string. Example: - my $oids = $conn->q('SELECT id FROM books WHERE id = $1 AND title = $2')->param_types; + my $oids = $conn->sql('SELECT id FROM books WHERE id = $1 AND title = $2')->param_types; # $oids = [23,25] - my $oids = $conn->q('SELECT id FROM books')->params; + my $oids = $conn->sql('SELECT id FROM books')->params; # $oids = [] This method can be called before the query has been executed, but will then @@ -380,7 +403,7 @@ prepared statement caching is disabled and C is enabled. Returns an arrayref of hashrefs describing each column that the statement returns. - my $cols = $conn->q('SELECT id, title FROM books')->columns; + my $cols = $conn->sql('SELECT id, title FROM books')->columns; # $cols = [ # { name => 'id', oid => 23 }, # { name => 'title', oid => 25 }, @@ -400,9 +423,7 @@ results into Perl values. Observed query preparation time, in seconds, including network round-trip. Returns 0 if a cached prepared statement was used or C if the query was -executed without a separate preparation phase (currently only happens with C<< -$conn->exec() >>, but support for direct query execution may be added for other -queries in the future as well). +executed without a separate preparation phase. =item $st->get_cache @@ -416,7 +437,7 @@ Returns the respective configuration parameters. -=head2 Transactions +=head1 Transactions This module provides a convenient and safe API for I and I. A new transaction can be started with C<< $conn->txn >>, @@ -431,7 +452,7 @@ fail while a transaction object is alive. my $txn = $conn->txn; # run queries - $txn->q('DELETE FROM books WHERE id = $1', 1)->exec; + $txn->sql('DELETE FROM books WHERE id = $1', 1)->exec; # run commands in a subtransaction { @@ -452,9 +473,9 @@ Transaction methods: =item $txn->exec(..) -=item $txn->q(..) +=item $txn->sql(..) -=item $txn->Q(..) +=item $txn->SQL(..) Run a query inside the transaction. These work the same as the respective methods on the parent C<$conn> object. @@ -477,7 +498,7 @@ when the object goes out of scope. =item $txn->text($enable) -Set the default settings for new statements created with B<< $txn->q() >>. +Set the default settings for new statements created with B<< $txn->sql() >>. These settings are inherited from the main connection when the transaction is created. Subtransactions inherit these settings from their parent transaction. @@ -521,6 +542,11 @@ current implementation does not track subtransactions that closely) A subtransaction is in error state and awaiting to be rolled back. +=item active + +Currently executing a query. This state can only be observed during a L. + =item bad Connection is dead or otherwise unusable. @@ -540,7 +566,7 @@ Just don't try to use transaction objects and manual transaction commands at the same time, that won't end well. -=head2 Formats and Types +=head1 Formats and Types The PostgreSQL wire protocol supports sending bind parameters and receiving query results in two different formats: text and binary. While the exact wire @@ -611,10 +637,12 @@ Some built-in types deserve a few additional notes: =item bool -Boolean values are converted to C and C. As bind -parameters, Perl's idea of truthiness is used: C<0>, C and C<""> are -false, everything else is true. Objects that overload I are also -supported. C always converts to SQL C. +Boolean values are converted to C and C. + +As bind parameters, values recognized by C in L are +accepted, in addition to C<0>, C<"f"> and C<""> for false and C<1>, and C<"t"> +for true. C always converts to SQL C. Everything else throws an +error. =item bytea @@ -666,8 +694,8 @@ module does not. Converted between floating point seconds since C<00:00:00>, supporting microsecond precision. This format allows for easy comparison against Unix -timestamps (time of day = C<$timestamp % 86400>) and can be added to an integer -date value to form a complete timestamp. +timestamps (time of day in UTC = C<$timestamp % 86400>) and can be added to an +integer date value to form a complete timestamp. (There's no support for the string format yet) @@ -680,7 +708,7 @@ While C is a valid JSON value, there's currently no way to distinguish that from SQL C. When sending C as bind parameter, it is sent as SQL C. -If you prefer to work with JSON are raw text values instead, use: +If you prefer to work with JSON as raw text values instead, use: $conn->set_type(json => 'text'); @@ -738,11 +766,115 @@ C to configure appropriate conversions for these types. =back -I Methods to convert between the various formats. +Utility functions: + +=over + +=item $conn->perl2bin($oid, $val) + +=item $conn->bin2perl($oid, $bin) + +Convert the value for a specific type between the Perl representation and the +PostgreSQL binary format, using the current type configuration of the +connection. This is the same conversion used internally by this module to send +bind parameters and receive query results, and map to the C and C +functions of C<< $conn->set_type() >>. + +These methods throw an error if C<$oid> is not a known type or if the given +data is not valid for the type. However, these methods should not be used for +strict validation: the conversion routines are usually written under the +assumption that the data has been received directly from Postgres or is about +to be sent to (and further validated by) Postgres. For some types, +C may return invalid data on invalid input and C may +accept invalid binary data. + +=item $conn->bin2text($oid, $bin, ...) + +=item $conn->text2bin($oid, $text, ...) + +Convert between the binary format and the PostgreSQL text format. This +conversion requires a round-trip to the server and throws an error if the +connection state is not I or I. Since it is Postgres doing the +conversion, the input is properly validated and, in the case of C, +the result is guaranteed to be suitable for use as a textual bind parameter or +for inclusion in an SQL query (but don't forget to use C in +that case). + +Calling these methods many times can be pretty slow. If you have several values +to convert, you can do that in a single call to speed things up: + + my($text1, $text2, ..) = $conn->bin2text($oid1, $bin1, $oid2, $bin2, ..); + +=back I Methods to query type info. -=head2 Errors + +=head1 COPY support + +You can use L for efficient +bulk data transfers between your application and the PostgreSQL server: + +=over + +=item $copy = $conn->copy($statement) + +=item $copy = $txn->copy($statement) + +Execute C<$statement> and return a C object that lets you +transfer data to or from Postgres. + +It is not possible to execute any other queries on the same connection while a +copy operation is in progress. When used on a transaction object, C<$txn> must +be kept alive long enough to finish the copy operation. + +=back + +A C<$copy> object supports the following methods: + +=over + +=item $copy->is_binary + +Returns true if the transfer is performed in the binary format, false for text. + +=item $copy->write($data) + +Send C<$data> to the server. An error is thrown if this is not a C operation. An error may be thrown if C<$data> is not a valid format +understood by Postgres, but such errors can also be deferred to C. + +C<$data> is interpreted as a Perl Unicode string for textual transfers and as a +binary string for binary transfers. + +=item $copy->read + +Return the next row read from the Postgres server, or C if no more data +is coming. In the text format, a single line - including trailing newline - is +returned as a Perl Unicode string. In the binary format, a single row is +returned as a byte string. An error is thrown if this is not a C operation. + +=item $copy->close + +Marks the end of the copy operation. Does not return anything but throws an +error if something went wrong. + +It is possible to close a read-copy operation before all data has been +consumed, but that causes all data to still be read and discarded during +C. If you really want to interrupt a large read operation, a more +efficient approach is to call C<< $conn->disconnect >> and discard the entire +connection. + +It is not I to call this method, simply letting the C<$copy> object +run out of scope will do the trick as well, but in that case errors are +silently discarded. An explicit C is recommended to catch errors. + +=back + + +=head1 Errors All methods can throw an exception on error. When possible, the error message is constructed using L's C, including a full stack trace. @@ -825,32 +957,17 @@ to it after C is always safe: =item * Only works with blocking (synchronous) calls, not very suitable for use in asynchronous frameworks unless you know your queries are fast and you have a -low-latency connection with the Postgres server. +low-latency connection with the Postgres server. This is unlikely to improve in +future versions, Perl's async story is somewhat awkward in general, and fully +supporting async operation might require a fundamental redesign of how this +module works. -=back +=item * LISTEN support is still missing. May be added in a future version, as +this seems doable without supporting full async. -Missing features: - -=over - -=item COPY support - -I hope to implement this someday. - -=item LISTEN support - -Would be nice to have, most likely doable without going full async. - -=item Asynchronous calls - -Probably won't happen. Perl's async story is slightly awkward in general, and -fully supporting async operation might require a fundamental redesign of how -this module works. It certainly won't I the implementation. - -=item Pipelining - -I have some ideas for an API, but doubt I'll ever implement it. Suffers from -the same awkwardness and complexity as asynchronous calls. +=item * Pipelining support is also missing. I have some ideas for an API, but +doubt I'll ever implement it. Suffers from the same awkwardness and complexity +as asynchronous calls. =back diff --git a/FU/SQL.pm b/FU/SQL.pm index 73f5613..c33d680 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -1,11 +1,11 @@ -package FU::SQL 0.2; +package FU::SQL 1.4; use v5.36; use Exporter 'import'; use Carp 'confess'; use experimental 'builtin', 'for_list'; our @EXPORT = qw/ - P RAW SQL + P RAW IDENT SQL PARENS INTERSPERSE COMMA AND OR WHERE SET VALUES IN @@ -16,6 +16,7 @@ sub _obj { bless [@_], 'FU::SQL::val' } sub P :prototype($) ($p) { bless \(my $x = $p), 'FU::SQL::p' } sub RAW :prototype($) ($s) { _obj "$s" } +sub IDENT :prototype($) ($s) { bless \(my $x = "$s"), 'FU::SQL::i' } # These operate on $_ and must be called with &func syntax. # The readonly check can be finicky. @@ -29,7 +30,7 @@ sub COMMA { INTERSPERSE ',', @_ } sub _conditions { @_ == 1 && ref $_[0] eq 'HASH' - ? map PARENS(RAW $_, + ? map PARENS(IDENT $_, !defined $_[0]{$_} ? ('IS NULL') : ref($_[0]{$_}) eq 'FU::SQL::in' ? ($_[0]{$_}) : ('=', $_[0]{$_}) @@ -41,11 +42,11 @@ sub AND { !@_ || (@_ == 1 && ref $_[0] eq 'HASH' && keys $_[0]->%* == 0) ? RAW ' sub OR { !@_ || (@_ == 1 && ref $_[0] eq 'HASH' && keys $_[0]->%* == 0) ? RAW '1=0' : INTERSPERSE 'OR', _conditions @_ } sub WHERE { SQL 'WHERE', AND @_ } -sub SET($h) { SQL 'SET', COMMA map SQL(RAW $_, '=', $h->{$_}), sort keys %$h } +sub SET($h) { SQL 'SET', COMMA map SQL(IDENT $_, '=', $h->{$_}), sort keys %$h } sub VALUES { @_ == 1 && ref $_[0] eq 'HASH' - ? SQL '(', COMMA(map RAW $_, sort keys $_[0]->%*), ') VALUES (', COMMA(map $_[0]{$_}, sort keys $_[0]->%*), ')' + ? SQL '(', COMMA(map IDENT $_, sort keys $_[0]->%*), ') VALUES (', COMMA(map $_[0]{$_}, sort keys $_[0]->%*), ')' : @_ == 1 && ref $_[0] eq 'ARRAY' ? SQL 'VALUES (', COMMA($_[0]->@*), ')' : SQL 'VALUES (', COMMA(@_), ')'; @@ -71,6 +72,10 @@ sub FU::SQL::p::_compile($self, $opt, $sql, $params) { $$sql .= $opt->{placeholder_style} eq 'pg' ? '$'.@$params : '?'; } +sub FU::SQL::i::_compile($self, $opt, $sql, $params) { + $$sql .= $opt->{quote_identifier} ? $opt->{quote_identifier}->($$self) : $$self; +} + sub FU::SQL::in::_compile($self, $opt, $sql, $params) { if ($opt->{in_style} eq 'pg') { $$sql .= '= ANY('; @@ -87,6 +92,7 @@ sub FU::SQL::in::_compile($self, $opt, $sql, $params) { } sub FU::SQL::val::compile($self, %opt) { + !/^(placeholder_style|in_style|quote_identifier)$/ && confess "Unknown flag: $_" for keys %opt; $opt{placeholder_style} ||= 'dbi'; $opt{in_style} ||= 'dbi'; my($sql, @params) = (''); @@ -94,7 +100,7 @@ sub FU::SQL::val::compile($self, %opt) { ($sql, \@params) } -*FU::SQL::p::compile = *FU::SQL::in::compile = \*FU::SQL::val::compile; +*FU::SQL::p::compile = *FU::SQL::i::compile = *FU::SQL::in::compile = \*FU::SQL::val::compile; 1; __END__ @@ -103,11 +109,6 @@ __END__ FU::SQL - Small and Safe SQL Query Builder -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSIS use FU::SQL; @@ -120,11 +121,11 @@ changes, see the main L module for details. my $sel = SQL 'SELECT id, name FROM table', WHERE { id => IN([1,2,3]) }; - my($sql, @params) = $sel->compile; + my($sql, $params) = $sel->compile; =head1 DESCRIPTION -=head2 Compiling SQL +=head1 Compiling SQL All functions listed under L return an object that can be passed to other construction functions or compiled into SQL and bind @@ -161,11 +162,21 @@ C<'pg'> when your SQL is going to L or L. Set the style to use for C expressions, refer to the C function below for details. +=item quote_identifier => $func + +Set a function to perform quoting of SQL identifiers. When using DBI, you can +do: + + my($sql) = $obj->compile(quote_identifier => sub { $dbh->quote_identifier(@_) }); + +If this option is not set, identifiers are included into the raw SQL string +without any escaping. + =back =back -=head2 Constructing SQL +=head1 Constructing SQL All of the functions below return an object with a C method. All functions are exported by default. @@ -181,7 +192,7 @@ types of supported arguments: =item 1. -B are interpreted as raw SQL fragments. +I are interpreted as raw SQL fragments. =item 2. @@ -189,7 +200,7 @@ Objects returned by other functions listed below are included as SQL fragments. =item 3. -B is considered a bind parameter. +I is considered a bind parameter. =back @@ -249,6 +260,18 @@ Force the given C<$sql> string to be included as SQL. For example: Never use this function with untrusted input. +=item IDENT($string) + +Mark the given string as an SQL identifier. This function is only useful if you +use potentially untrusted input to determine which column to select or which +table to select from, for example: + + SQL 'SELECT id,', IDENT $ENV{column}, 'FROM table'; + +B By default this function is equivalent to C and hence +provides no safety whatsoever. Be sure to set the C option on +C to get more useful behavior. + =item PARENS(@args) Like C but surrounds the expression by parens: @@ -284,8 +307,8 @@ C<'1=1'> (i.e. true) if C<@conditions> is an empty list. =item AND($hashref) A special form of C that tests the given columns for equality instead. -The keys of the hashref are interpreted as raw SQL and the values as bind -parameters. +The keys of the hashref are interpreted as per C and the values as +bind parameters. AND { id => 1, number => RAW 'random()', x => undef } # '( id = ? ) AND ( number = random() ) AND ( x IS NULL )' @@ -356,12 +379,12 @@ values. This function results in different SQL depending on the C option given to C. The default C<'dbi'> style passes each value as a bind parameter: - SQL 'WHERE id', IN([1, 2, 3, 4]); + SQL 'WHERE id', IN [1, 2, 3, 4]; # 'WHERE id IN(?, ?, ?, ?)', parameters: 1, 2, 3, 4 The C<'pg'> style passes the entire array as a single bind parameter instead: - SQL 'WHERE id', IN([1, 2, 3, 4]); + SQL 'WHERE id', IN [1, 2, 3, 4]; # 'WHERE id = ANY(?)', parameter: [1, 2, 3, 4] The C<'pg'> style allows for more efficient re-use of cached prepared @@ -372,7 +395,7 @@ with L or L. Can be used in the C<$hashref> versions of C, C and C as well: - WHERE { id => IN([1, 2]) } + WHERE { id => IN [1, 2] } # 'WHERE id IN(?, ?)' =back diff --git a/FU/Util.pm b/FU/Util.pm index 275cac5..84f10d7 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -1,25 +1,36 @@ -package FU::Util 0.2; +package FU::Util 1.4; use v5.36; use FU::XS; use Carp 'confess'; use Exporter 'import'; +use Encode (); use POSIX (); use experimental 'builtin'; our @EXPORT_OK = qw/ to_bool json_format json_parse - utf8_decode uri_escape uri_unescape + has_control check_control utf8_decode + uri_escape uri_unescape query_decode query_encode httpdate_format httpdate_parse + gzip_lib gzip_compress brotli_compress fdpass_send fdpass_recv /; + +# Internal utility function +sub has_control :prototype($) ($s) { defined $s && $s =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/ } +sub check_control :prototype($) ($s) { confess 'Invalid control character' if has_control $s; } + +# Deprecated, call Encode::decode() directly. sub utf8_decode :prototype($) { return if !defined $_[0]; - confess 'Invalid UTF-8' if !utf8::decode($_[0]); - confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/; + eval { + $_[0] = Encode::decode('UTF-8', $_[0], Encode::FB_CROAK); + 1 + } || confess($@ =~ s/ at .+\n$//r); $_[0] } @@ -32,6 +43,7 @@ sub uri_escape :prototype($) ($s) { sub uri_unescape :prototype($) ($s) { return if !defined $s; utf8::encode($s); + $s =~ tr/+/ /; $s =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; utf8_decode $s; } @@ -39,6 +51,7 @@ sub uri_unescape :prototype($) ($s) { sub query_decode :prototype($) ($s) { my %o; for (split /&/, $s//'') { + next if !length; my($k,$v) = map uri_unescape($_), split /=/, $_, 2; $v //= builtin::true; if (ref $o{$k}) { push $o{$k}->@*, $v } @@ -93,15 +106,7 @@ __END__ =head1 NAME -FU::Util - Miscellaneous utility functions that really should have been part of -a core Perl installation but aren't for some reason because the Perl community -doesn't believe in the concept of a "batteries included" standard library. - - -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. +FU::Util - Miscellaneous Utility Functions =head1 SYNOPSIS @@ -111,7 +116,11 @@ changes, see the main L module for details. =head1 DESCRIPTION -=head2 Boolean Stuff +A bunch of functions that are too small (or I'm too lazy) to split out into +separate modules. Some of these functions really ought to be part of Perl core. + + +=head1 Boolean Stuff Perl has had a builtin boolean type since version 5.36 and FU uses that where appropriate, but there's still a lot of older code out there using different @@ -135,10 +144,10 @@ value for C<$val>, due to C<\0> and C<\1> being considered booleans. =back -=head2 JSON parsing & formatting +=head1 JSON Parsing & Formatting This module comes with a custom C-based JSON parser and formatter. These -functions conform strictly to L, +functions conform to L, non-standard extensions are not supported and never will be. It also happens to be pretty fast, refer to L for some numbers. @@ -212,13 +221,6 @@ roughly similar to: JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar); -Some modules escape the slash character in encoded strings to prevent a -potential XSS vulnerability when embedding JSON inside C<< >> tags. This function does I do that because it might not even -be sufficient. The following is probably an improvement: - - json_format($data) =~ s{>, C<< > >> and C<< & >> as Unicode escapes. +Commonly used to embed data inside a HTML page: + + $html = ''; + +This option does NOT make it safe to include the encoded JSON as an attribute +value. There is no way to do that without violating JSON specs, so you should +use entity escaping instead. + +Some JSON modules escape the forward slash (C) character instead, but that +is I sufficient for embedding inside a C<<