From 0925ae79a1b49ee3d63e542dd74acdb0c580c6c9 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 19 Mar 2025 17:33:10 +0100 Subject: [PATCH 01/61] XS: Ensure created Perl strings are nul-terminated Most of these are binary strings and shouldn't be interpreted as C strings in the first place, but better be safe in case they are, anyway. The lack of nul-termination of FU::Pg `$hex` strings was more likely to be problematic. --- c/compress.c | 3 +++ c/fdpass.c | 1 + c/pgtypes.c | 1 + 3 files changed, 5 insertions(+) diff --git a/c/compress.c b/c/compress.c index 20a59be..da986a3 100644 --- a/c/compress.c +++ b/c/compress.c @@ -87,6 +87,7 @@ static SV *fugz_compress_ld(pTHX_ int level, const char *bytes, size_t inlen) { size_t len = libdeflate_gzip_compress(fugz_ld_ctx, bytes, inlen, SvPVX(out), outlen); if (!len) fu_confess("Libdeflate compression failed"); /* Shouldn't happen */ SvCUR_set(out, len); + SvPVX(out)[len] = 0; return out; } @@ -110,6 +111,7 @@ static SV *fugz_compress_zlib(pTHX_ int level, const char *bytes, size_t inlen) if ((r = deflate(&stream, 4)) != 1) fu_confess("Zlib compression failed (%d)", r); SvCUR_set(out, stream.total_out); + SvPVX(out)[stream.total_out] = 0; deflateEnd(&stream); return out; } @@ -157,5 +159,6 @@ static SV *fubr_compress(pTHX_ IV level, SV *in) { if (!BrotliEncoderCompress(level, 22, BROTLI_MODE_GENERIC, inlen, bytes, &outlen, SvPVX(out))) fu_confess("Brotli compression failed"); SvCUR_set(out, outlen); + SvPVX(out)[outlen] = 0; return out; } diff --git a/c/fdpass.c b/c/fdpass.c index 74a1229..ae4b141 100644 --- a/c/fdpass.c +++ b/c/fdpass.c @@ -71,6 +71,7 @@ static int fufdpass_recv(pTHX_ I32 ax, int socket, size_t len) { } SvCUR_set(buf, r); + SvPVX(buf)[r] = 0; ST(1) = buf; return 2; } diff --git a/c/pgtypes.c b/c/pgtypes.c index 6fb8835..b307cc0 100644 --- a/c/pgtypes.c +++ b/c/pgtypes.c @@ -166,6 +166,7 @@ RECVFN(hex) { *out++ = PL_hexdigit[(in[i] >> 4) & 0x0f]; *out++ = PL_hexdigit[in[i] & 0x0f]; } + *out = 0; SvCUR_set(r, len * 2); return r; } From 90881924d4f8331b7d4c67446dc9b9024bc2cf26 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 21 Mar 2025 11:29:52 +0100 Subject: [PATCH 02/61] Doc fixes --- FU.pm | 32 ++++++++++++++++---------------- FU/Log.pm | 4 ++-- FU/MultipartFormData.pm | 4 ++-- FU/Pg.pm | 12 ++++++------ FU/SQL.pm | 4 ++-- FU/Util.pm | 25 +++++++++++++------------ FU/Validate.pm | 34 +++++++++++++++++----------------- FU/XMLWriter.pm | 8 ++++---- 8 files changed, 62 insertions(+), 61 deletions(-) diff --git a/FU.pm b/FU.pm index cf13473..02b1b8b 100644 --- a/FU.pm +++ b/FU.pm @@ -1046,7 +1046,7 @@ Unless specifically mentioned otherwise, all methods and functions taking or returning strings deal with perl Unicode strings, not raw bytes. -=head2 Framework Configuration +=head1 Framework Configuration =over @@ -1143,7 +1143,7 @@ restart loop. =back -=head2 Handlers & Routing +=head1 Handlers & Routing =over @@ -1211,7 +1211,7 @@ for a certain error code, C<500> is used as fallback. =back -=head2 The 'fu' Object +=head1 The 'fu' Object While the C namespace is used for global configuration and utility functions, the C object is intended for methods that deal with request @@ -1261,7 +1261,7 @@ Convenient short-hand for C<< fu->db->Q(@args) >>. =back -=head2 Request Information +=head1 Request Information =over @@ -1307,6 +1307,16 @@ Parse, validate and return the query parameter identified by C<$name> with the given L schema. Calls C<< fu->error(400) >> with a useful error message if validation fails. +To fetch a query parameter that may have multiple values, use: + + my $arrayref = fu->query(q => {accept_scalar => 1}); + + # OR: + my $first_value = fu->query(q => {accept_array => 'first'}); + + # OR: + my $last_value = fu->query(q => {accept_array => 'last'}); + =item fu->query($schema) =item fu->query($name1 => $schema1, $name2 => $schema2, ..) @@ -1326,16 +1336,6 @@ To fetch all query paramaters as decoded by C, use: my $data = fu->query({type=>'any'}); -To fetch a query parameter that may have multiple values, use: - - my $arrayref = fu->query(q => {accept_scalar => 1}); - - # OR: - my $first_value = fu->query(q => {accept_array => 'first'}); - - # OR: - my $last_value = fu->query(q => {accept_array => 'last'}); - =item fu->cookie(...) Like C<< fu->query() >> but parses the C request header. Beware that, @@ -1365,7 +1365,7 @@ objects. Refer to L for more information. =back -=head2 Generating Responses +=head1 Response Generation =over @@ -1509,7 +1509,7 @@ one of the following status codes or an alias: -=head2 Running the Site +=head1 Running the Site When your script is done setting L and registering L, it should call C to actually start serving diff --git a/FU/Log.pm b/FU/Log.pm index 612ecd1..24dafbf 100644 --- a/FU/Log.pm +++ b/FU/Log.pm @@ -89,7 +89,7 @@ interface either; the entire point of this module is that it only handles process-global logging. This module mainly exists for users of the L framework. -=head2 Configuration +=head1 Configuration =over @@ -119,7 +119,7 @@ is then used instead. This is to avoid recursion. =back -=head2 Exportable function +=head1 Exportable function =over diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm index 8c2542a..8441740 100644 --- a/FU/MultipartFormData.pm +++ b/FU/MultipartFormData.pm @@ -121,7 +121,7 @@ C, which is typically used to handle file uploads. The entire request body is assumed to be in memory as a Perl string, but this module makes an attempt to avoid any further copies of data values. -=head2 Parsing +=head1 Parsing =over @@ -138,7 +138,7 @@ tried. =back -=head2 Field Object +=head1 Field Object Each field is parsed into a field object that supports the following methods: diff --git a/FU/Pg.pm b/FU/Pg.pm index 0d5579f..83b2111 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -61,7 +61,7 @@ FU::Pg is a client module for PostgreSQL with a convenient high-level API and support for flexible and complex type conversions. This module interfaces directly with C. -=head2 Connection setup +=head1 Connection setup =over @@ -173,7 +173,7 @@ attempts to use C<$conn> throw an error. =back -=head2 Querying +=head1 Querying =over @@ -420,7 +420,7 @@ Returns the respective configuration parameters. -=head2 Transactions +=head1 Transactions This module provides a convenient and safe API for I and I. A new transaction can be started with C<< $conn->txn >>, @@ -549,7 +549,7 @@ Just don't try to use transaction objects and manual transaction commands at the same time, that won't end well. -=head2 Formats and Types +=head1 Formats and Types The PostgreSQL wire protocol supports sending bind parameters and receiving query results in two different formats: text and binary. While the exact wire @@ -752,7 +752,7 @@ I Methods to convert between the various formats. I Methods to query type info. -=head2 COPY support +=head1 COPY support You can use L for efficient @@ -816,7 +816,7 @@ silently discarded. An explicit C is recommended to catch errors. =back -=head2 Errors +=head1 Errors All methods can throw an exception on error. When possible, the error message is constructed using L's C, including a full stack trace. diff --git a/FU/SQL.pm b/FU/SQL.pm index 6a30332..15adbb1 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -124,7 +124,7 @@ changes, see the main L module for details. =head1 DESCRIPTION -=head2 Compiling SQL +=head1 Compiling SQL All functions listed under L return an object that can be passed to other construction functions or compiled into SQL and bind @@ -165,7 +165,7 @@ for details. =back -=head2 Constructing SQL +=head1 Constructing SQL All of the functions below return an object with a C method. All functions are exported by default. diff --git a/FU/Util.pm b/FU/Util.pm index 41d01f3..7b8c6ad 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -95,10 +95,7 @@ __END__ =head1 NAME -FU::Util - Miscellaneous utility functions that really should have been part of -a core Perl installation but aren't for some reason because the Perl community -doesn't believe in the concept of a "batteries included" standard library. - +FU::Util - Miscellaneous Utility Functions =head1 EXPERIMENTAL @@ -113,7 +110,11 @@ changes, see the main L module for details. =head1 DESCRIPTION -=head2 Boolean Stuff +A bunch of functions that are too small (or I'm too lazy) to split out into +separate modules. Some of these functions really ought to be part of Perl core. + + +=head1 Boolean Stuff Perl has had a builtin boolean type since version 5.36 and FU uses that where appropriate, but there's still a lot of older code out there using different @@ -137,7 +138,7 @@ value for C<$val>, due to C<\0> and C<\1> being considered booleans. =back -=head2 JSON parsing & formatting +=head1 JSON Parsing & Formatting This module comes with a custom C-based JSON parser and formatter. These functions conform strictly to L, @@ -281,7 +282,7 @@ functions, L and L are perfectly fine alternatives. L and L also look like good and maintained candidates.) -=head2 URI-Related Functions +=head1 URI-Related Functions While URIs are capable of encoding arbitrary binary data, the functions below assume you're only dealing with text. This makes them more robust against weird @@ -343,7 +344,7 @@ then encoded. =back -=head2 HTTP Date Formatting +=head1 HTTP Date Formatting The HTTP date format is utter garbage, but with the right tools it doesn't require I much code to work with. @@ -368,7 +369,7 @@ This will not happen if your local timezone is UTC. =back -=head2 Gzip Compression +=head1 Gzip Compression Gzip compression can be done with a few different libraries. The canonical one is I, which is old and not well optimized for modern systems. There's @@ -408,10 +409,10 @@ This function is B safe to use from multiple threads! This module does not currently implement decompression. If you need that, or streaming, or other functionality not provided here, there's L and L in the core Perl distribution and -L on CPAN. +L, L and L on CPAN. -=head2 Brotli Compression +=head1 Brotli Compression Just a small wrapper around C's one-shot compression interface. @@ -428,7 +429,7 @@ Throws an error if C could not be found or loaded. =back -=head2 File Descriptor Passing +=head1 File Descriptor Passing UNIX sockets (see L) have the fancy property of letting you send file descriptors over them, allowing you to pass, for example, a socket diff --git a/FU/Validate.pm b/FU/Validate.pm index bc63845..da29a2e 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -419,15 +419,15 @@ parameters within Perl code. In fact, the correct answer to "how do I validate function parameters?" is "don't, document your assumptions instead". -=head2 Validation API +=head1 Validation API To validate some input, you first need a schema. A schema can be compiled as follows: my $validator = FU::Validate->compile($schema, $validations); -C<$schema> is the schema that describes the data to be validated (see L below) and C<$validations> is an optional hashref containing +C<$schema> is the schema that describes the data to be validated (see L below) and C<$validations> is an optional hashref containing L that C<$schema> can refer to. An error is thrown if the C<$validations> or C<$schema> are invalid. @@ -443,10 +443,10 @@ An error is thrown if the input does not validate. The error object is a C-blessed hashref containing at least one key: I, which indicates the name of the validation that failed. Additional keys with more detailed information may be present, depending on the -validation. These are documented in L below. +validation. These are documented in L below. -=head1 SCHEMA DEFINITION +=head1 Schema Definition A schema is an arrayref or hashref, where each key is the name of a built-in option or of a validation to be performed and the values are the arguments to @@ -473,7 +473,7 @@ Or to use the same validation multiple times: [ regex => qr/^a/, regex => qr/z$/ ] -=head2 Built-in options +=head1 Built-in options =over @@ -730,7 +730,7 @@ reporting. =back -=head2 Standard validations +=head1 Standard validations Standard validations are provided by the module. It is possible to override, re-implement and supplement these with custom validations. Internally, these @@ -855,7 +855,7 @@ given the year and month. =back -=head2 Custom validations +=head1 Custom validations Custom validations can be passed to C as the C<$validations> hashref argument. A custom validation is, in simple terms, either a schema or a @@ -885,7 +885,7 @@ schema that contains the I built-in option to do the actual validation. my $schema = { prefix => 'Hello, ' }; my $result = FU::Validate->compile($schema, $validations)->validate('Hello, World!'); -=head3 Custom validations and built-in options +=head2 Custom validations and built-in options Custom validations can also set built-in options, but the semantics differ a little depending on the option. First, be aware that many of the built-in @@ -909,13 +909,13 @@ error: The I option is validated separately for each custom validation. -Multiple I and I validations are merged into a single validation. -So if you have multiple custom validations that set the I option, a -single combined schema is created that validates all array elements. The same -applies to I: if the same key is listed in multiple custom validations, -then the key must conform to all schemas. With respect to the I -option, a key that is mentioned in any of the I options is considered -"known". +Multiple I, I and I validations are merged into a single +validation. So if you have multiple custom validations that set the I +option, a single combined schema is created that validates all array elements. +The same applies to I: if the same key is listed in multiple custom +validations, then the key must conform to all schemas. With respect to the +I option, a key that is mentioned in any of the I options is +considered "known". All other built-in options follow inheritance semantics: These options can be set in a custom validation, and they are inherited by the top-level schema. If @@ -924,7 +924,7 @@ inherited. The top-level schema can always override options set by custom validations. -=head3 Global custom validations +=head2 Global custom validations Instead of passing a C<$validations> argument every time you call C, you can also add custom validations to the global list of built-in validations: diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index 3fc8626..35e0702 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -138,7 +138,7 @@ other XML writing modules on CPAN that I tried, but whether this approach is faster than typical templating solutions... I've no idea. Check out L for some benchmarks. -=head2 Top-level functions +=head1 Top-level functions These functions all return a byte string with (UTF-8) encoded XML. @@ -170,7 +170,7 @@ passed to the C call for the top-level C<< >> element. =back -=head2 Output functions +=head1 Output functions =over @@ -252,7 +252,7 @@ provided it defaults to C. =back -=head2 Utility function +=head1 Utility function =over @@ -263,7 +263,7 @@ and C<"> are replaced with their XML entity. =back -=head2 Import options +=head1 Import options All of the functions mentioned in this document can be imported individually. There are also two import groups: From 17584f2b8ca71e2aeb8899e60f475df78e1008bc Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 22 Mar 2025 14:58:38 +0100 Subject: [PATCH 03/61] FU: Fix DB reconnect + not setting debug_info() --- FU.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/FU.pm b/FU.pm index 02b1b8b..c178072 100644 --- a/FU.pm +++ b/FU.pm @@ -123,6 +123,7 @@ sub query_trace($st,@) { sub _connect_db { $DB = ref $INIT_DB eq 'CODE' ? $INIT_DB->() : FU::Pg->connect($INIT_DB); $DB->query_trace(\&query_trace); + $DB } sub init_db($info) { require FU::Pg; @@ -214,7 +215,7 @@ sub _monitor { } -our $debug_info = []; +our $debug_info = {}; sub debug_info($path, $storage=undef, $history=100) { $debug_info = { path => $path, storage => $storage, history => $history } } From 9e1be5bc719c8fc84ba2be05d576f72d25353cc2 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 22 Mar 2025 15:10:59 +0100 Subject: [PATCH 04/61] FU: Log errors thrown from error handler --- FU.pm | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/FU.pm b/FU.pm index c178072..81bcf94 100644 --- a/FU.pm +++ b/FU.pm @@ -380,10 +380,11 @@ sub _do_req($c) { my($code, $msg) = ref $err eq 'FU::err' ? $err->@* : (500, $err); fu->reset; fu->status($code); - eval { - ($onerr{$code} || $onerr{500})->($code, $msg); - 1; - } || _err_500(); + my $ok = eval { ($onerr{$code} || $onerr{500})->($code, $msg) }; + if (!$ok && !_is_done($@)) { + _log_err $@; + _err_500(); + } } $REQ->{trace_end} = time; From 2f50736782894519a81136691363c3f1227efba5 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 24 Mar 2025 11:07:36 +0100 Subject: [PATCH 05/61] fdpass_recv: Set O_CLOEXEC on received fds Turns out this is necessary even if the fd is going to be passed through exec() soon, because the supervisor might receive multiple fds before spawning another process, in which case all of them are going to be passed to the new process instead of just one. --- FU.pm | 13 +++++-------- FU/Util.pm | 7 +++---- c/fdpass.c | 2 +- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/FU.pm b/FU.pm index 81bcf94..1451c74 100644 --- a/FU.pm +++ b/FU.pm @@ -475,26 +475,23 @@ sub _supervisor($c) { } # Don't bother spawning more than 1 at a time while in error state - my $spawn = !$err ? $c->{proc} - keys %childs : (grep $_ == 1, values %childs) ? 0 : 1; + my $spawn = !$err ? $c->{proc} - keys %childs : !@client_fd && (grep $_ == 1, values %childs) ? 0 : 1; for (1..$spawn) { - my $client = shift @client_fd; + my $client = @client_fd ? IO::Socket->new_from_fd(shift(@client_fd), 'r') : undef; my $pid = fork; die $! if !defined $pid; if (!$pid) { # child $SIG{CHLD} = $SIG{HUP} = $SIG{INT} = $SIG{TERM} = undef; + # In error state, wait with loading the script until we've received a request. + # Otherwise we'll end up in an infinite spawning loop if the script doesn't start properly. + $client = $c->{listen_sock}->accept() or die $! if !$client && $err; if ($client) { - $ENV{FU_CLIENT_FD} = $client; - } elsif ($err) { - # In error state, wait with loading the script until we've received a request. - # Otherwise we'll end up in an infinite spawning loop if the script doesn't start properly. - $client = $c->{listen_sock}->accept() or die $!; fcntl $client, Fcntl::F_SETFD, 0; $ENV{FU_CLIENT_FD} = fileno $client; } exec $^X, (map "-I$_", @INC), $0; exit 1; } - $client && IO::Socket->new_from_fd($client, 'r'); # close() the fd if we have one $childs{$pid} = 1; } diff --git a/FU/Util.pm b/FU/Util.pm index 7b8c6ad..e823d79 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -458,10 +458,9 @@ Like regular socket I/O, a single C message may be split across multiple C calls; in that case the C<$fd> is only received on the first call. -Don't use this function if the sender may include multiple file descriptors in -a single message, weird things can happen. File descriptors received this way -do not have the C flag and will thus survive a call to C. -Refer to L flag is set on received file descriptors. Don't use this +function if the sender may include multiple file descriptors in a single +message, weird things can happen. Refer to L for more weirdness and edge cases. diff --git a/c/fdpass.c b/c/fdpass.c index ae4b141..b54df7d 100644 --- a/c/fdpass.c +++ b/c/fdpass.c @@ -53,7 +53,7 @@ static int fufdpass_recv(pTHX_ I32 ax, int socket, size_t len) { msg.msg_controllen = sizeof(cmsgbuf.buf); msg.msg_flags = 0; - ssize_t r = recvmsg(socket, &msg, 0); + ssize_t r = recvmsg(socket, &msg, MSG_CMSG_CLOEXEC); if (r < 0) { ST(0) = &PL_sv_undef; ST(1) = &PL_sv_undef; From 13661b46f900b0ef7dcca6404c9b9fd276624f40 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 30 Mar 2025 13:51:14 +0200 Subject: [PATCH 06/61] Validate: Normalize num/int/uint to Perl numeric types + add 64bit limit to int/uint Normalization may be undone by later validations, but this should work in most cases. --- FU/Validate.pm | 15 ++++++++------- t/validate.t | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index da29a2e..7352053 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -81,9 +81,9 @@ our %default_validations = ( bool => { type => 'any', func => sub { my $r = to_bool $_[0]; return {} if !defined $r; $_[0] = $r; 1 } }, anybool => { type => 'any', default => false, func => sub { $_[0] = $_[0] ? true : false; 1 } }, - num => { _reg $re_num }, - int => { _reg $re_int }, # implies num - uint => { _reg $re_uint }, # implies num + num => [ _reg($re_num), func => sub { $_[0] = $_[0]*1; 1 } ], + int => [ _reg($re_int), func => sub { return { message => 'integer out of range' } if $_[0] < -9223372036854775808 || $_[0] > 9223372036854775807; $_[0] = int $_[0]; 1 } ], + uint => [ _reg($re_uint), func => sub { return { message => 'integer out of range' } if $_[0] > 18446744073709551615; $_[0] = int $_[0]; 1 } ], min => sub($min) { +{ num => 1, func => sub { $_[0] >= $min ? 1 : { expected => $min, got => $_[0] } } } }, max => sub($max) { +{ num => 1, func => sub { $_[0] <= $max ? 1 : { expected => $max, got => $_[0] } } } }, range => sub { [ min => $_[0][0], max => $_[0][1] ] }, @@ -780,17 +780,18 @@ Require the input to be a boolean type as per C in L. Implies C<< type => 'scalar' >>. Require the input to be a number formatted using the format permitted by JSON. Note that this is slightly more restrictive from Perl's number formatting, in that 'NaN', 'Inf' and thousand separators are -not permitted. +not permitted. The value is normalized to a Perl integer or floating point +value, which means precision for large numbers may be lost. =item int => 1 -Implies C<< type => 'scalar' >>. Require the input to be an (arbitrarily large) +Implies C<< type => 'scalar' >>. Require the input to be an (at most) 64-bit integer. =item uint => 1 -Implies C<< type => 'scalar' >>. Require the input to be an (arbitrarily large) -positive integer. +Implies C<< type => 'scalar' >>. Require the input to be an (at most) 64-bit +unsigned integer. =item min => $num diff --git a/t/validate.t b/t/validate.t index 626dbbd..a29dc4c 100644 --- a/t/validate.t +++ b/t/validate.t @@ -209,8 +209,8 @@ t { num => 1 }, '1', '1'; f { num => 1 }, '1.1.', nerr '1.1.'; f { num => 1 }, '1.-1', nerr '1.-1'; f { num => 1 }, '.1', nerr '.1'; -t { num => 1 }, '0.1e5', '0.1e5'; -t { num => 1 }, '0.1e+5', '0.1e+5'; +t { num => 1 }, '0.1e5', 10000; +t { num => 1 }, '0.1e+5', 10000; f { num => 1 }, '0.1e5.1', nerr '0.1e5.1'; t { int => 1 }, 0, 0; t { int => 1 }, -123, -123; From 3bf98e4d8fd5dbd4173a50b826eec608fe028ec8 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 3 Apr 2025 15:58:38 +0200 Subject: [PATCH 07/61] FU: Fix fu->reset also resetting cookies --- FU.pm | 1 + 1 file changed, 1 insertion(+) diff --git a/FU.pm b/FU.pm index 1451c74..d59f821 100644 --- a/FU.pm +++ b/FU.pm @@ -722,6 +722,7 @@ sub reset { $FU::REQ->{reshdr} = { 'content-type', 'text/html', }; + delete $FU::REQ->{rescookie}; } From b3281924d102f01d203df4cefb58a0c17d09990f Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 7 Apr 2025 13:45:24 +0200 Subject: [PATCH 08/61] Pg: Add escape_literal() and escape_identifier() Didn't expect I'd ever need these, but they're useful for generating SQL scripts. --- FU.xs | 18 ++++++++++++++++++ FU/Pg.pm | 11 +++++++++++ c/libpq.h | 2 ++ t/pgconnect.t | 10 ++++++++++ 4 files changed, 41 insertions(+) diff --git a/FU.xs b/FU.xs index 3e7a0b2..ec12a7c 100644 --- a/FU.xs +++ b/FU.xs @@ -216,6 +216,24 @@ void status(fupg_conn *c) CODE: ST(0) = sv_2mortal(newSVpv(fupg_conn_status(c), 0)); +void escape_literal(fupg_conn *c, SV *v) + CODE: + STRLEN len; + const char *str = SvPVutf8(v, len); + char *r = PQescapeLiteral(c->conn, str, len); + if (!r) fupg_conn_croak(c, "escapeLiteral"); + ST(0) = newSVpvn_flags(r, strlen(r), SVf_UTF8|SVs_TEMP); + PQfreemem(r); + +void escape_identifier(fupg_conn *c, SV *v) + CODE: + STRLEN len; + const char *str = SvPVutf8(v, len); + char *r = PQescapeIdentifier(c->conn, str, len); + if (!r) fupg_conn_croak(c, "escapeIdentifier"); + ST(0) = newSVpvn_flags(r, strlen(r), SVf_UTF8|SVs_TEMP); + PQfreemem(r); + void cache(fupg_conn *x, ...) ALIAS: FU::Pg::conn::text_params = FUPG_TEXT_PARAMS diff --git a/FU/Pg.pm b/FU/Pg.pm index 83b2111..e43776e 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -123,6 +123,17 @@ Connection is dead or otherwise unusable. =back +=item $conn->escape_literal($str) + +Return an escaped version of C<$str> suitable for use as a string literal in an +SQL statement. You'll rarely need this, it's often better to pass data as bind +parameters instead. + +=item $conn->escape_identifier($str) + +Return an escaped version of C<$str> suitable for use as an identifier (name of +a table, column, function, etc) in an SQL statement. + =item $conn->cache($enable) =item $conn->text_params($enable) diff --git a/c/libpq.h b/c/libpq.h index 94d817e..d47dd16 100644 --- a/c/libpq.h +++ b/c/libpq.h @@ -43,6 +43,8 @@ typedef enum { PQTRANS_IDLE, PQTRANS_ACTIVE, PQTRANS_INTRANS, PQTRANS_INERROR, P X(PQconnectdb, PGconn *, const char *) \ X(PQenterPipelineMode, int, PGconn *) \ X(PQerrorMessage, char *, const PGconn *) \ + X(PQescapeIdentifier, char *, PGconn *, const char *, size_t) \ + X(PQescapeLiteral, char *, PGconn *, const char *, size_t) \ X(PQexec, PGresult *, PGconn *, const char *) \ X(PQexecParams, PGresult *, PGconn *, const char *, int, const Oid *, const char * const *, const int *, const int *, int) \ X(PQexecPrepared, PGresult *, PGconn *, const char *, int, const char * const *, const int *, const int *, int) \ diff --git a/t/pgconnect.t b/t/pgconnect.t index b7d1996..d2dfbf5 100644 --- a/t/pgconnect.t +++ b/t/pgconnect.t @@ -353,6 +353,16 @@ subtest 'txn', sub { is_deeply $st->val, [1,3], 'not deep copy'; } + +{ + # Exact format returned by escape_literal() can differ between Postgres versions and configurations. + my $x = q{"' \" \\}; + is $conn->q('SELECT '.$conn->escape_literal($x))->val, $x; + + # Format can also change, but unsure how to test this otherwise. + is $conn->escape_identifier('hel\l"o'), '"hel\l""o"'; +} + subtest 'Prepared statement cache', sub { my $txn = $conn->cache_size(2)->txn->cache; my sub numexec($sql) { From e7a9f165deffdf7075014f7c37da9f0f65b0f218 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 7 Apr 2025 14:29:21 +0200 Subject: [PATCH 09/61] Fix use of SvPVXtrue() where SvTRUEx() was intended --- FU.xs | 2 +- c/jsonfmt.c | 8 ++++---- c/jsonparse.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/FU.xs b/FU.xs index ec12a7c..ef8d60b 100644 --- a/FU.xs +++ b/FU.xs @@ -18,7 +18,7 @@ #define av_push_simple av_push #endif #ifndef BOOL_INTERNALS_sv_isbool_true -#define BOOL_INTERNALS_sv_isbool_true(x) SvPVXtrue(x) +#define BOOL_INTERNALS_sv_isbool_true(x) SvTRUEx(x) #endif /* Disable key/value struct packing in khashl, so we can safely take a pointer diff --git a/c/jsonfmt.c b/c/jsonfmt.c index f605e8d..fff3e4f 100644 --- a/c/jsonfmt.c +++ b/c/jsonfmt.c @@ -289,10 +289,10 @@ static SV *fujson_fmt_xs(pTHX_ I32 ax, I32 argc, SV *val) { r = ST(i); i++; - if (strcmp(arg, "canonical") == 0) ctx.canon = SvPVXtrue(r); - else if (strcmp(arg, "pretty") == 0) ctx.pretty = SvPVXtrue(r) ? 0 : INT_MIN; - else if (strcmp(arg, "html_safe") == 0) ctx.htmlsafe = !!SvPVXtrue(r); - else if (strcmp(arg, "utf8") == 0) encutf8 = SvPVXtrue(r); + if (strcmp(arg, "canonical") == 0) ctx.canon = SvTRUEx(r); + else if (strcmp(arg, "pretty") == 0) ctx.pretty = SvTRUEx(r) ? 0 : INT_MIN; + else if (strcmp(arg, "html_safe") == 0) ctx.htmlsafe = !!SvTRUEx(r); + else if (strcmp(arg, "utf8") == 0) encutf8 = SvTRUEx(r); else if (strcmp(arg, "max_size") == 0) out.maxlen = SvUV(r); else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r); else croak("Unknown flag: '%s'", arg); diff --git a/c/jsonparse.c b/c/jsonparse.c index 6c742be..d24b1f9 100644 --- a/c/jsonparse.c +++ b/c/jsonparse.c @@ -272,7 +272,7 @@ static SV *fujson_parse_xs(pTHX_ I32 ax, I32 argc, SV *val) { r = ST(i); i++; - if (strcmp(arg, "utf8") == 0) decutf8 = SvPVXtrue(r); + if (strcmp(arg, "utf8") == 0) decutf8 = SvTRUEx(r); else if (strcmp(arg, "max_size") == 0) maxlen = SvUV(r); else if (strcmp(arg, "max_depth") == 0) ctx.depth = SvUV(r); else if (strcmp(arg, "offset") == 0) offset = r; From 196b1cc3cee1fc90c4da43621af2068721722df7 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 7 Apr 2025 16:41:29 +0200 Subject: [PATCH 10/61] FU: Use CLOCK_MONOTONIC for timing --- FU.pm | 8 ++++---- FU/DebugImpl.pm | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/FU.pm b/FU.pm index d59f821..c21bb7f 100644 --- a/FU.pm +++ b/FU.pm @@ -3,7 +3,7 @@ use v5.36; use Carp 'confess', 'croak'; use IO::Socket; use POSIX (); -use Time::HiRes 'time'; +use Time::HiRes 'clock_gettime', 'CLOCK_MONOTONIC'; use FU::Log 'log_write'; use FU::Util; use FU::Validate; @@ -320,7 +320,7 @@ sub _log_err($e) { } sub _do_req($c) { - local $REQ = { hdr => {}, trace_start => time, trace_id => sprintf('%010x%08x%04x', int time, $$, int rand 1<<16) }; + local $REQ = { hdr => {}, trace_start => clock_gettime(CLOCK_MONOTONIC), trace_id => sprintf('%010x%08x%04x', int time, $$, int rand 1<<16) }; local $fu = bless {}, 'FU::obj'; $REQ->{ip} = $c->{client_sock} isa 'IO::Socket::INET' ? $c->{client_sock}->peerhost : '127.0.0.1'; @@ -328,7 +328,7 @@ sub _do_req($c) { my $ok = eval { _read_req $c; - $REQ->{trace_start} = time; + $REQ->{trace_start} = clock_gettime(CLOCK_MONOTONIC); my $path = fu->path; my $method = fu->method eq 'HEAD' ? 'GET' : fu->method; @@ -387,7 +387,7 @@ sub _do_req($c) { } } - $REQ->{trace_end} = time; + $REQ->{trace_end} = clock_gettime(CLOCK_MONOTONIC); fu->_flush($c->{fcgi_obj} || $c->{client_sock}); if (debug && $REQ->{trace_id} && $debug_info->{history} && $debug_info->{storage}) { diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index 08252ad..eda2939 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -342,7 +342,7 @@ sub save { return; }; my $line = sprintf "%d %f %s %s %s\n", - time, time - $FU::REQ->{trace_start}, $FU::REQ->{status}, + time, $FU::REQ->{trace_end} - $FU::REQ->{trace_start}, $FU::REQ->{status}, fu->method, fu->path.(fu->query?'?'.fu->query:''); utf8::encode($line); print $fh $line; From efa63ca96abdd6582a869cfc5cd8d047647d7ca9 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 7 Apr 2025 16:52:45 +0200 Subject: [PATCH 11/61] Pg: Discard temporary hash keys earlier in $st->kv? methods Saves some memory for large query results, didn't notice much of a performance difference. --- c/pgst.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/c/pgst.c b/c/pgst.c index 1e01392..e5a2a3d 100644 --- a/c/pgst.c +++ b/c/pgst.c @@ -460,9 +460,11 @@ static SV *fupg_st_kvv(pTHX_ fupg_st *st) { HV *hv = newHV(); SV *sv = sv_2mortal(newRV_noinc((SV *)hv)); for (i=0; ikvv() query results", SvPV_nolen(key)); hv_store_ent(hv, key, st->nfields == 1 ? &PL_sv_yes : fupg_st_getval(aTHX_ st, i, 1), 0); + FREETMPS; } return sv; } @@ -474,10 +476,12 @@ static SV *fupg_st_kva(pTHX_ fupg_st *st) { HV *hv = newHV(); SV *sv = sv_2mortal(newRV_noinc((SV *)hv)); for (i=0; ikva() query results", SvPV_nolen(key)); AV *row = st->nfields == 1 ? newAV() : newAV_alloc_x(st->nfields-1); hv_store_ent(hv, key, newRV_noinc((SV *)row), 0); + FREETMPS; for (j=1; jnfields; j++) av_push_simple(row, fupg_st_getval(aTHX_ st, i, j)); } @@ -492,10 +496,12 @@ static SV *fupg_st_kvh(pTHX_ fupg_st *st) { HV *hv = newHV(); SV *sv = sv_2mortal(newRV_noinc((SV *)hv)); for (i=0; ikvh() query results", SvPV_nolen(key)); HV *row = newHV(); hv_store_ent(hv, key, newRV_noinc((SV *)row), 0); + FREETMPS; for (j=1; jnfields; j++) { const char *key = PQfname(st->result, j); hv_store(row, key, -strlen(key), fupg_st_getval(aTHX_ st, i, j), 0); From 8b807e6dcfff16d917479aa8482dae0bac4a69b0 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 19 Apr 2025 12:36:19 +0200 Subject: [PATCH 12/61] Validate: Add empty() and coerce() methods Implementing the undocumented coerce_for_json() method from TUWF and elm_empty() from VNDB. --- FU/Validate.pm | 100 +++++++++++++++++++++++++++++++++++++++++++--- t/validate-util.t | 36 +++++++++++++++++ 2 files changed, 131 insertions(+), 5 deletions(-) create mode 100644 t/validate-util.t diff --git a/FU/Validate.pm b/FU/Validate.pm index 7352053..dd767c7 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -61,6 +61,10 @@ our $re_weburl = qr/^https?:\/\/$re_domain(?::[1-9][0-9]{0,5})?(?:\/[^\s<>"]* our $re_date = qr/^(?:19[0-9][0-9]|20[0-9][0-9])-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12][0-9]|3[01])$/; +# There's a special '_scalartype' option used for coerce() and empty(), with the following values: +# 0/undef/missing: string, 1:num, 2:int, 3:bool +# The highest number, i.e. most restrictive type, is chosen when multiple validations exist. + our %default_validations = ( regex => sub($reg) { # Error objects should be plain data structures so that they can easily @@ -78,12 +82,12 @@ our %default_validations = ( maxlength => sub($v) { _length $v, undef, $v }, length => sub($v) { _length $v, ref $v eq 'ARRAY' ? @$v : ($v, $v) }, - bool => { type => 'any', func => sub { my $r = to_bool $_[0]; return {} if !defined $r; $_[0] = $r; 1 } }, - anybool => { type => 'any', default => false, func => sub { $_[0] = $_[0] ? true : false; 1 } }, + bool => { _scalartype => 3, type => 'any', func => sub { my $r = to_bool $_[0]; return {} if !defined $r; $_[0] = $r; 1 } }, + anybool => { _scalartype => 3, type => 'any', default => false, func => sub { $_[0] = $_[0] ? true : false; 1 } }, - num => [ _reg($re_num), func => sub { $_[0] = $_[0]*1; 1 } ], - int => [ _reg($re_int), func => sub { return { message => 'integer out of range' } if $_[0] < -9223372036854775808 || $_[0] > 9223372036854775807; $_[0] = int $_[0]; 1 } ], - uint => [ _reg($re_uint), func => sub { return { message => 'integer out of range' } if $_[0] > 18446744073709551615; $_[0] = int $_[0]; 1 } ], + num => [ _scalartype => 1, _reg($re_num), func => sub { $_[0] = $_[0]*1; 1 } ], + int => [ _scalartype => 2, _reg($re_int), func => sub { return { message => 'integer out of range' } if $_[0] < -9223372036854775808 || $_[0] > 9223372036854775807; $_[0] = int $_[0]; 1 } ], + uint => [ _scalartype => 2, _reg($re_uint), func => sub { return { message => 'integer out of range' } if $_[0] > 18446744073709551615; $_[0] = int $_[0]; 1 } ], min => sub($min) { +{ num => 1, func => sub { $_[0] >= $min ? 1 : { expected => $min, got => $_[0] } } } }, max => sub($max) { +{ num => 1, func => sub { $_[0] <= $max ? 1 : { expected => $max, got => $_[0] } } } }, range => sub { [ min => $_[0][0], max => $_[0][1] ] }, @@ -152,6 +156,11 @@ sub _compile($schema, $custom, $rec, $top, $validations=$top->{validations}) { next; } + if ($name eq '_scalartype') { + $top->{_scalartype} = $val if ($top->{_scalartype}||0) < $val; + next; + } + if ($builtin{$name}) { confess "Invalid value for 'missing': $val" if $name eq 'missing' && !$missing_vals{$val}; confess "Invalid value for 'unknown': $val" if $name eq 'unknown' && !$unknown_vals{$val}; @@ -353,6 +362,37 @@ sub validate($c, $input) { } +sub coerce { + my $c = $_[0]; + my %opt = @_[2..$#_]; + if (!defined $_[1]) { + $_[1] = undef; + } elsif ($c->{_scalartype}) { + $_[1] = $c->{_scalartype} == 3 ? !!$_[1] : $c->{_scalartype} == 2 ? int $_[1] : $_[1]+0; + } elsif (!$c->{type} || $c->{type} eq 'scalar') { + $_[1] = "$_[1]"; + } elsif ($c->{type} eq 'array' && $c->{elems} && ref $_[1] eq 'ARRAY') { + coerce($c->{elems}, $_, %opt) for $_[1]->@*; + } elsif ($c->{type} eq 'hash' && $c->{keys} && ref $_[1] eq 'HASH') { + $opt{unknown} ||= $c->{unknown}; + delete @{$_[1]}{ grep !$c->{keys}{$_}, keys $_[1]->%* } + if $opt{unknown} && $opt{unknown} ne 'pass'; + $_[1]{$_} = exists $_[1]{$_} ? coerce($c->{keys}{$_}, $_[1]{$_}, %opt) : empty($c->{keys}{$_}) + for keys $c->{keys}->%*; + } + return $_[1]; +} + + +sub empty($c) { + return ref $c->{default} eq 'CODE' ? $c->{default}->(undef) : $c->{default} if exists $c->{default}; + return [] if $c->{type} && $c->{type} eq 'array'; + return $c->{keys} ? +{ map +($_, empty($c->{keys}{$_})), keys $c->{keys}->%* } : {} if $c->{type} && $c->{type} eq 'hash'; + return undef if $c->{type} && $c->{type} eq 'any'; + # Only scalar types remain + return !$c->{_scalartype} ? '' : $c->{_scalartype} == 3 ? !1 : 0; +} + package FU::Validate::err; @@ -445,6 +485,56 @@ I, which indicates the name of the validation that failed. Additional keys with more detailed information may be present, depending on the validation. These are documented in L below. +Additional utility methods: + +=over + +=item $validator->empty + +Returns an "empty" value that roughly follows the data structure described by +the schema. The returned value does not necessarily validate but can still be +useful as a template. Works roughly as follows: + +=over + +=item * If the schema has a I, then that is returned. + +=item * If the schema describes a hash, then a hash is returned with each key +in I initialized to an empty value. + +=item * If the schema describes an array, an empty array is returned. + +=item * If the schema describes a bool, return C. + +=item * If the schema describes a number, return C<0>. + +=item * If the schema describes a string, return C<''>. + +=item * Otherwise, return C. + +=back + +=item $validator->coerce($input, %opt) + +Perform in-place coercion of C<$input> to the data types described by the +schema. Also returns the modified C<$input> for convenience. This method assumes +that C<$input> already has the general structure described by the schema and is +mainly useful to ensure that encoding the value as JSON will end up with the +correct data types. i.e. booleans are encoded as booleans, integers as integers +(truncating if necessary), numbers as numbers, etc. + +If an input hash is missing keys described in the schema, then those are +created with C<< ->empty >>. If the schema has I set to either +I or I, unknown keys are removed. This behavior can be +overriden by passing different I value in C<%opt>. + +This method does NOT perform any sort of validation and will happily pass +through garbage if the given C<$input> does not follow the structure of the +schema. It's basically a faster and lousier normalization-only alternative to +C<< ->validate() >>. + +=back + =head1 Schema Definition diff --git a/t/validate-util.t b/t/validate-util.t new file mode 100644 index 0000000..a0ad106 --- /dev/null +++ b/t/validate-util.t @@ -0,0 +1,36 @@ +use v5.36; +use Test::More; +use FU::Validate; +use FU::Util 'json_format'; + +my $schema = FU::Validate->compile({ keys => { + bool => { anybool => 1 }, + num => { num => 1 }, + int => { int => 1 }, + str => { default => 'x' }, + intarray => { elems => { int => 1 } }, + any => { type => 'any' }, +}}); + + +is json_format($schema->coerce(undef)), 'null'; +is json_format($schema->coerce("str")), '"str"'; + +is json_format($schema->coerce({ + bool => 'abc', + num => " 1.5 ", + int => 9.7, + str => !1, + intarray => [ 1.5, -10, undef, ' 0E0 ' ], + any => {}, + whatsthis => undef, + }, unknown => 'remove'), canonical => 1), + '{"any":{},"bool":true,"int":9,"intarray":[1,-10,null,0],"num":1.5,"str":""}'; + +is json_format($schema->coerce({uhm => 1}), canonical => 1), + '{"any":null,"bool":false,"int":0,"intarray":[],"num":0,"str":"x","uhm":1}'; + +is json_format($schema->empty, canonical => 1), + '{"any":null,"bool":false,"int":0,"intarray":[],"num":0,"str":"x"}'; + +done_testing; From f2294a709a7d683fb95fa94ce427384a3d97e60b Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 20 Apr 2025 11:31:15 +0200 Subject: [PATCH 13/61] FU: Fix warning when calling fu->set_header() with undef value --- FU.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FU.pm b/FU.pm index c21bb7f..37f90db 100644 --- a/FU.pm +++ b/FU.pm @@ -728,7 +728,7 @@ sub reset { sub _validate_header($hdr, $val) { confess "Invalid response header '$hdr'" if $hdr !~ /^$FU::hdrname_re$/; - confess "Invalid attempt to set response header containing a newline" if $val =~ /[\r\n]/; + confess "Invalid attempt to set response header containing a newline" if defined $val && $val =~ /[\r\n]/; } sub add_header($, $hdr, $val) { From 15940067395b2185fc1120122a0b85ff696103a2 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 20 Apr 2025 11:40:27 +0200 Subject: [PATCH 14/61] FU: Improve merging of "Vary" response headers + debug header listing --- FU.pm | 7 +++---- FU/DebugImpl.pm | 11 +++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/FU.pm b/FU.pm index 37f90db..6a9ec47 100644 --- a/FU.pm +++ b/FU.pm @@ -863,14 +863,12 @@ sub _finalize { $r->{resbody} = ''; } else { + my @vary = ref $r->{reshdr}{vary} eq 'ARRAY' ? $r->{reshdr}{vary}->@* : defined $r->{reshdr}{vary} ? ($r->{reshdr}{vary}) : (); if (($hasgzip || $hasbrotli) && length($r->{resbody}) > 256 && !defined $r->{reshdr}{'content-encoding'} && FU::compress_mimes->{$r->{reshdr}{'content-type'}} ) { - - $r->{reshdr}{'vary'} = ($r->{reshdr}{'vary'} ? $r->{reshdr}{'vary'}.', ' : '').'accept-encoding' - if ($r->{reshdr}{'vary'}||'') !~ /accept-encoding/i; - + push @vary, 'accept-encoding'; if ($hasbrotli && ($r->{hdr}{'accept-encoding'}||'') =~ /\bbr\b/) { $r->{resbody} = FU::Util::brotli_compress(6, $r->{resbody}); $r->{reshdr}{'content-encoding'} = 'br'; @@ -880,6 +878,7 @@ sub _finalize { $r->{reshdr}{'content-encoding'} = 'gzip'; } } + $r->{reshdr}{vary} = @vary ? join ', ', @vary : undef; $r->{reshdr}{'content-length'} = length $r->{resbody}; $r->{resbody} = '' if (fu->method//'') eq 'HEAD'; } diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index eda2939..da5fc47 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -75,10 +75,13 @@ my @tabs = ( }; h2_ 'Headers'; table_ sub { - tr_ sub { - td_ $_; - td_ $r->{reshdr}{$_}; - } for sort keys $r->{reshdr}->%*; + for my $k (sort keys $r->{reshdr}->%*) { + my $v = $r->{reshdr}{$k}; + tr_ sub { + td_ $k; + td_ $_; + } for !defined $v ? () : ref $v ? @$v : ($v); + } }; ('Response') }, From ea8ad9e4838e75b149a357388f94ca34e7a0eca9 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 20 Apr 2025 18:37:54 +0200 Subject: [PATCH 15/61] FU: Throw and handle FU::Validate errors directly Instead of wrapping them in a FU::err that isn't easily inspected. --- FU.pm | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/FU.pm b/FU.pm index 6a9ec47..0161f01 100644 --- a/FU.pm +++ b/FU.pm @@ -300,13 +300,13 @@ sub _read_req($c) { } -sub _is_done($e) { ref $@ eq 'FU::err' && $@->[0] == 200 } +sub _is_done($e) { ref $e eq 'FU::err' && $e->[0] == 200 } sub _log_err($e) { return if !$e; - return if !debug && ref $@ eq 'FU::err' && $@->[0] != 500; - if (!$REQ->{full_err} && (ref $@ ne 'FU::err' || $@->[0] == 500)) { - $REQ->{full_err}++; + my $crit = $e isa 'FU::err' ? $e->[0] == 500 : !($e isa 'FU::Validate::err'); + return if !debug && !$crit; + if ($crit && !$REQ->{full_err}++) { $e =~ s/^\s+//; $e =~ s/\s+$//; log_write join "\n", @@ -377,7 +377,7 @@ sub _do_req($c) { } if ($err) { - my($code, $msg) = ref $err eq 'FU::err' ? $err->@* : (500, $err); + my($code, $msg) = $err isa 'FU::err' ? @$err : $err isa 'FU::Validate::err' ? (400, $err) : (500, $err); fu->reset; fu->status($code); my $ok = eval { ($onerr{$code} || $onerr{500})->($code, $msg) }; @@ -642,8 +642,7 @@ sub _getfield($data, @a) { return $data->{$a[0]}; } my $schema = FU::Validate->compile(@a > 1 ? { keys => {@a} } : $a[0]); - my $res = eval { $schema->validate($data) }; - fu->error(400, "Input validation failed: $@") if $@; + my $res = $schema->validate($data); return @a == 2 ? $res->{$a[0]} : $res; } @@ -1209,6 +1208,12 @@ for a certain error code, C<500> is used as fallback. =back +All of the above C<$sub> callbacks are allowed to throw an error. Special +handling is given to exceptions generated by C<< fu->error() >>, which are +relegated to the appropriate C handler, and errors thrown by the +C method of L, which result in the C<400> error +handler being run. Any other exception is passed to the C<500> error handler. + =head1 The 'fu' Object @@ -1303,8 +1308,7 @@ methods below to reliably handle all sorts of query strings. =item fu->query($name => $schema) Parse, validate and return the query parameter identified by C<$name> with the -given L schema. Calls C<< fu->error(400) >> with a useful error -message if validation fails. +given L schema. To fetch a query parameter that may have multiple values, use: From 91b2421a84329baca5590bd6080579bba375b653 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 22 Apr 2025 09:32:56 +0200 Subject: [PATCH 16/61] FU: Add -procname import option and setting $0 to something useful --- FU.pm | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/FU.pm b/FU.pm index 0161f01..abdde9b 100644 --- a/FU.pm +++ b/FU.pm @@ -8,15 +8,22 @@ use FU::Log 'log_write'; use FU::Util; use FU::Validate; +my $procname; +my $scriptpath = $0; sub import($pkg, @opt) { my $c = caller; no strict 'refs'; *{$c.'::fu'} = \&fu; + my $spawn; for (@opt) { - if ($_ eq '-spawn') { _spawn() } + if (ref $procname eq 'FU::ARG') { $procname = $_ } + elsif ($_ eq '-procname') { $procname = bless {}, 'FU::ARG' } + elsif ($_ eq '-spawn') { $spawn = 1; } else { croak "Unknown import option: '$_'" } } + croak "Missing argument for -procname option" if ref $procname eq 'FU::ARG'; + _spawn() if $spawn; } @@ -209,7 +216,7 @@ sub _monitor { die if $m > $data{$_}; }, no_chdir => 1 - }, $0, values %INC, @monitor_paths); + }, $scriptpath, values %INC, @monitor_paths); 0 } // 1; } @@ -409,6 +416,7 @@ sub _do_req($c) { sub _run_loop($c) { my $stop = 0; + my $count = 0; local $SIG{HUP} = 'IGNORE'; local $SIG{TERM} = $SIG{INT} = sub { $stop = 1 }; @@ -418,7 +426,13 @@ sub _run_loop($c) { exit; } + my sub setstate($state) { + $0 = sprintf "%s: %s [#%d%s]", $procname, $state, $count, $c->{max_reqs} ? "/$c->{max_reqs}" : '' if $procname; + } + while (!$stop) { + setstate 'idle'; + $c->{client_sock} ||= $c->{listen_sock}->accept || next; $c->{fcgi_obj} ||= $c->{listen_proto} eq 'fcgi' && FU::fcgi::new(fileno $c->{client_sock}, $c->{proc}); @@ -427,11 +441,13 @@ sub _run_loop($c) { passclient; } + setstate 'working'; _do_req $c; $c->{client_sock} = $c->{fcgi_obj} = undef if !($c->{fcgi_obj} && $c->{fcgi_obj}->keepalive); - passclient if $c->{max_reqs} && !--$c->{max_reqs}; + $count++; + passclient if $c->{max_reqs} && $count >= $c->{max_reqs}; } } @@ -489,12 +505,14 @@ sub _supervisor($c) { fcntl $client, Fcntl::F_SETFD, 0; $ENV{FU_CLIENT_FD} = fileno $client; } - exec $^X, (map "-I$_", @INC), $0; + exec $^X, (map "-I$_", @INC), $scriptpath; exit 1; } $childs{$pid} = 1; } + $0 = sprintf "%s: supervisor [%d/%d]", $procname, scalar keys %childs, $c->{proc} if $procname; + my ($fd, $msgadd) = FU::Util::fdpass_recv(fileno($rsock), 500); push @client_fd, $fd if $fd; next if !defined $msgadd; @@ -1048,6 +1066,12 @@ returning strings deal with perl Unicode strings, not raw bytes. =over +=item use FU -procname => $name + +When the C<-procname> import option is set, FU automatically updates the +process name (as displayed in L and L, see `$0`) with +information about the current process, prefixed with the given C<$name>. + =item FU::init_db($info) Set database configuration. C<$info> can either be a connection string for C<< From 8096de749741f6365887991e6d555dc6b115e6fc Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 22 Apr 2025 18:54:04 +0200 Subject: [PATCH 17/61] MultipartFormData: Fix ->save() on zero-length values --- FU/MultipartFormData.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm index 8441740..7c4f467 100644 --- a/FU/MultipartFormData.pm +++ b/FU/MultipartFormData.pm @@ -77,7 +77,7 @@ sub syswrite($o, $fh) { sub save($o, $fn) { open my $F, '>', $fn or confess "Error opening '$fn': $!"; - $o->syswrite($F) or confess "Error writing to '$fn': $!"; + defined $o->syswrite($F) or confess "Error writing to '$fn': $!"; } sub describe($o) { From 13271fa413daab3bd91ac33392605b3b6c88ea9a Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 23 Apr 2025 16:12:39 +0200 Subject: [PATCH 18/61] Minor changes & fixes --- FU.pm | 5 +---- FU/DebugImpl.pm | 7 ++++--- README.md | 7 +++---- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/FU.pm b/FU.pm index abdde9b..ffdc9bd 100644 --- a/FU.pm +++ b/FU.pm @@ -694,7 +694,6 @@ sub json { $FU::REQ->{json} ||= eval { FU::Util::json_parse($FU::REQ->{body}, utf8 => 1) } || fu->error(400, "JSON parse error: $@"); - return $FU::REQ->{json} if !@_; _getfield $FU::REQ->{json}, @_; } @@ -945,7 +944,7 @@ __END__ =head1 NAME -FU - Framework Ultimatum: A Lean and Efficient Zero-Dependency Web Framework. +FU - A Lean and Efficient Zero-Dependency Web Framework. =head1 EXPERIMENTAL @@ -1481,8 +1480,6 @@ Encode C<$data> as JSON (using C in L), set an appropriate C header and send it to the client. Calls C<< fu->done >>. -I Support schema-based normalization. - =item fu->send_file($root, $path) If a file identified by C<"$root/$path"> exists, set that as response and call diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index da5fc47..d7f0334 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -4,7 +4,7 @@ use v5.36; use experimental 'for_list'; use FU; use FU::XMLWriter ':html5_', 'fragment', 'xml_escape'; -use Time::HiRes 'time'; +use Time::HiRes 'time', 'clock_gettime', 'CLOCK_MONOTONIC'; use POSIX 'strftime'; sub fmtts { strftime '%Y-%m-%d %H:%M:%S UTC', gmtime shift } @@ -15,7 +15,8 @@ sub loc_($loc) { br_ if $_; my $l = $loc->[$_]; my $f = $_ == $#$loc ? '(main)' : $loc->[$_+1][3]; - txt_ "$l->[1]:$l->[2] $f"; + $f = "$l->[0]::$f" if $f !~ /^\Q$l->[0]/; + txt_ "$f @ $l->[1]:$l->[2]"; } } @@ -35,7 +36,7 @@ my @tabs = ( tr_ sub { td_ 'Path'; td_ fu->path }; tr_ sub { td_ 'Query'; td_ fu->query }; tr_ sub { td_ 'Client IP'; td_ fu->ip }; - tr_ sub { td_ 'Received'; td_ fmtts $FU::REQ->{trace_start} }; + tr_ sub { td_ 'Received'; td_ fmtts(time - (($FU::REQ->{trace_end}||clock_gettime(CLOCK_MONOTONIC)) - $FU::REQ->{trace_start})) }; }; h2_ 'Headers'; table_ sub { diff --git a/README.md b/README.md index 9cf7cb8..bf545b6 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# FU - Framework Ultimatum: A Lean and Efficient Zero-Dependency Web Framework +# FU - A Lean and Efficient Zero-Dependency Web Framework -FU is a web development framework for Perl and a collection of handy utility -modules. +FU (Framework Ultimatum) is a web development framework for Perl and a +collection of handy utility modules. *Contributing:* Refer to my [contribution guidelines](https://dev.yorhel.nl/contributing). @@ -23,7 +23,6 @@ Things that may or may not happen: - FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy. - FU::DBI - DBI wrapper with a FU::Pg-like API, for easier integration into FU. -- FU::Mailer - Simple sendmail wrapper # License From ab168bd952042a9e2fbd43173b0805204cf57eef Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 24 Apr 2025 07:23:00 +0200 Subject: [PATCH 19/61] Pg: Don't return self on / cache() or text_*() methods This is much too easy of a footgun: $db->cache->q(...); Enabled cache on the $db object, not just for the given query. --- FU.xs | 2 +- t/pgconnect.t | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/FU.xs b/FU.xs index ef8d60b..221740b 100644 --- a/FU.xs +++ b/FU.xs @@ -55,7 +55,6 @@ if (!ix) ix = FUPG_CACHE;\ if (items == 1 || SvTRUE(ST(1))) x->stflags |= ix; \ else x->stflags &= ~ix; \ - XSRETURN(1); \ } while(0) MODULE = FU @@ -343,6 +342,7 @@ void cache(fupg_st *x, ...) CODE: if (ix == 0 && x->prepared) fu_confess("Invalid attempt to change statement configuration after it has already been prepared or executed"); FUPG_STFLAGS; + XSRETURN(1); void exec(fupg_st *st) CODE: diff --git a/t/pgconnect.t b/t/pgconnect.t index d2dfbf5..798734c 100644 --- a/t/pgconnect.t +++ b/t/pgconnect.t @@ -17,7 +17,9 @@ okerr FATAL => connect => qr/missing "=" after "invalid"/; ok FU::Pg::lib_version() > 100000; -my $conn = FU::Pg->connect($ENV{FU_TEST_DB})->text->cache(0); +my $conn = FU::Pg->connect($ENV{FU_TEST_DB}); +$conn->text; +$conn->cache(0); $conn->_debug_trace(0); is ref $conn, 'FU::Pg::conn'; @@ -364,7 +366,9 @@ subtest 'txn', sub { } subtest 'Prepared statement cache', sub { - my $txn = $conn->cache_size(2)->txn->cache; + $conn->cache_size(2); + my $txn = $conn->txn; + $txn->cache; my sub numexec($sql) { $txn->q('SELECT generic_plans + custom_plans FROM pg_prepared_statements WHERE statement = $1', $sql)->cache(0)->val } From 48334568984eafd20fe82f54106e93eec49349d7 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 24 Apr 2025 10:50:50 +0200 Subject: [PATCH 20/61] FU: Accept charset argument in JSON request content-type header --- FU.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FU.pm b/FU.pm index ffdc9bd..423c6e1 100644 --- a/FU.pm +++ b/FU.pm @@ -689,7 +689,7 @@ sub cookie { sub json { shift; - fu->error(400, "Invalid content type for json") if (fu->header('content-type')||'') ne 'application/json'; + fu->error(400, "Invalid content type for json") if (fu->header('content-type')||'') !~ m{^application/json(?:;\s*charset=utf-?8)?$}i; return FU::Util::utf8_decode(my $x = $FU::REQ->{body}) if !@_; $FU::REQ->{json} ||= eval { FU::Util::json_parse($FU::REQ->{body}, utf8 => 1) From e88ad65232ad9e6fb4246993cbb1a172a04641ac Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 24 Apr 2025 14:16:17 +0200 Subject: [PATCH 21/61] Version 0.5 --- ChangeLog | 17 +++++++++++++++++ FU.pm | 4 ++-- FU/DebugImpl.pm | 2 +- FU/Log.pm | 2 +- FU/MultipartFormData.pm | 2 +- FU/Pg.pm | 2 +- FU/SQL.pm | 2 +- FU/Util.pm | 2 +- FU/Validate.pm | 2 +- FU/XMLWriter.pm | 2 +- FU/XS.pm | 2 +- Makefile.PL | 1 + README.md | 2 ++ 13 files changed, 31 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index f9c7266..3f18b6b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +0.5 - 2025-04-24 + - FU::Util: Set O_CLOEXEC on fds received through `fdpass_recv()` + - FU::Util: Fix interpretation of false options in `json_format()` and + `json_parse()` + - FU::Validate: Add `coerce()` and `empty()` utility methods + - FU::Validate: Limit values of int/uint input to 64 bits + - FU::Validate: Normalize num/int/uint inputs to Perl numeric types + - FU::Pg: Add `escape_literal()` and `escape_identifier()` methods + - FU::Pg: Use less memory for `kvv()`, `kva()` and `kvh()` methods + - FU::Pg: Disallow chaining of `cache()`, `text()`, `text_params()` and + `text_results()` methods on connection and transaction objects + - FU: Throw and catch FU::Validate errors without wrapping in `fu->error()` + - FU: Add `-progname` option and add diagnostics to process names + - FU: Whole bunch of misc fixes + - Doc fixes + - Fix nul-termination of some XS-created strings + 0.4 - 2025-03-19 - FU::Validate: Support arrayref schemas - FU::Validate: Rename 'values' option to 'elems' diff --git a/FU.pm b/FU.pm index 423c6e1..1f71a91 100644 --- a/FU.pm +++ b/FU.pm @@ -1,4 +1,4 @@ -package FU 0.4; +package FU 0.5; use v5.36; use Carp 'confess', 'croak'; use IO::Socket; @@ -1055,7 +1055,7 @@ certainly not great if you plan to transfer large files. =back The rest of this document is reference documentation; there's no easy -introductionary cookbook-style docs yet, sorry about that. +introductory cookbook-style docs yet, sorry about that. Unless specifically mentioned otherwise, all methods and functions taking or returning strings deal with perl Unicode strings, not raw bytes. diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index d7f0334..a0de7ea 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -1,5 +1,5 @@ # Internal module used by FU.pm -package FU::DebugImpl 0.4; +package FU::DebugImpl 0.5; use v5.36; use experimental 'for_list'; use FU; diff --git a/FU/Log.pm b/FU/Log.pm index 24dafbf..e2da4a2 100644 --- a/FU/Log.pm +++ b/FU/Log.pm @@ -1,4 +1,4 @@ -package FU::Log 0.4; +package FU::Log 0.5; use v5.36; use Exporter 'import'; use POSIX 'strftime'; diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm index 7c4f467..a92e4d7 100644 --- a/FU/MultipartFormData.pm +++ b/FU/MultipartFormData.pm @@ -1,4 +1,4 @@ -package FU::MultipartFormData 0.4; +package FU::MultipartFormData 0.5; use v5.36; use Carp 'confess'; use FU::Util 'utf8_decode'; diff --git a/FU/Pg.pm b/FU/Pg.pm index e43776e..f43c7f8 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -1,4 +1,4 @@ -package FU::Pg 0.4; +package FU::Pg 0.5; use v5.36; use FU::XS; diff --git a/FU/SQL.pm b/FU/SQL.pm index 15adbb1..2f8566d 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -1,4 +1,4 @@ -package FU::SQL 0.4; +package FU::SQL 0.5; use v5.36; use Exporter 'import'; use Carp 'confess'; diff --git a/FU/Util.pm b/FU/Util.pm index e823d79..5b262fb 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -1,4 +1,4 @@ -package FU::Util 0.4; +package FU::Util 0.5; use v5.36; use FU::XS; diff --git a/FU/Validate.pm b/FU/Validate.pm index dd767c7..b315784 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -1,4 +1,4 @@ -package FU::Validate 0.4; +package FU::Validate 0.5; use v5.36; use experimental 'builtin', 'for_list'; diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index 35e0702..1e9bb90 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -1,4 +1,4 @@ -package FU::XMLWriter 0.4; +package FU::XMLWriter 0.5; use v5.36; use Carp 'confess'; use Exporter 'import'; diff --git a/FU/XS.pm b/FU/XS.pm index f9660c8..52cc757 100644 --- a/FU/XS.pm +++ b/FU/XS.pm @@ -1,5 +1,5 @@ # This module is for internal use by other FU modules. -package FU::XS 0.4; +package FU::XS 0.5; use Carp; # may be called by XS. use XSLoader; XSLoader::load('FU'); diff --git a/Makefile.PL b/Makefile.PL index 326fdb4..c107fa1 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -17,6 +17,7 @@ WriteMakefile( dynamic_config => 0, 'meta-spec' => { version => 2 }, resources => { + homepage => 'https://dev.yorhel.nl/fu', repository => { web => 'https://code.blicky.net/yorhel/fu', type => 'git', diff --git a/README.md b/README.md index bf545b6..d29e00c 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ FU (Framework Ultimatum) is a web development framework for Perl and a collection of handy utility modules. +*Website:* More information @ [dev.yorhel.nl/fu](https://dev.yorhel.nl/fu). + *Contributing:* Refer to my [contribution guidelines](https://dev.yorhel.nl/contributing). ## Project Status From 0cd947c545b13ce316d36bda21476ace5b2b3483 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 25 Apr 2025 09:31:43 +0200 Subject: [PATCH 22/61] FastCGI: Ignore HTTP_CONTENT_(TYPE|LENGTH) The non-HTTP_ versions of these are authoritative, Also fixes a memory leak when both the HTTP_ and non-HTTP_ versions are included. --- c/fcgi.c | 7 +++++-- t/fcgi.t | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/c/fcgi.c b/c/fcgi.c index fdf8cae..4f886dd 100644 --- a/c/fcgi.c +++ b/c/fcgi.c @@ -319,8 +319,11 @@ static int fufcgi_read_params(pTHX_ fufcgi *ctx, fufcgi_rec *rec) { p.name += 5; for (r=0; r= 'A' && p.name[r] <= 'Z' ? p.name[r] | 0x20 : p.name[r]; - valsv = newSV(p.vallen+1); - hv_store(ctx->headers, p.name, p.namelen, valsv, 0); + if (!(p.namelen == 14 && memcmp(p.name, "content-length", 14) == 0) + && !(p.namelen == 12 && memcmp(p.name, "content-type", 12) == 0)) { + valsv = newSV(p.vallen+1); + hv_store(ctx->headers, p.name, p.namelen, valsv, 0); + } } else if (p.namelen == 14 && memcmp(p.name, "CONTENT_LENGTH", 14) == 0) { valsv = newSV(p.vallen+1); diff --git a/t/fcgi.t b/t/fcgi.t index 0711d6a..d7860dc 100644 --- a/t/fcgi.t +++ b/t/fcgi.t @@ -167,6 +167,13 @@ record 1, 4, "\x0c\x05CONTENT_TYPEsomet"; record 1, 2, ""; isrec {'content-type','somet'}, {body => ''}, -6; +start; +begin; +record 1, 4, "\x13\x01HTTP_CONTENT_LENGTH3\x0e\x01CONTENT_LENGTH0\x13\x01HTTP_CONTENT_LENGTH5"; +record 1, 4, ""; +record 1, 5, ""; +isrec {'content-length','0'}, {body => ''}; + start; begin; record 1, 4, "\x0e\x05CONTENT_LENGTH65536"; From 5f8809d0523d547ae7be03fac568ec7a36b85e53 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 25 Apr 2025 17:07:56 +0200 Subject: [PATCH 23/61] FU::Util::query_decode(): Properly handle empty "&"-parts --- FU/Util.pm | 1 + t/query.t | 2 ++ 2 files changed, 3 insertions(+) diff --git a/FU/Util.pm b/FU/Util.pm index 5b262fb..7d585d9 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -41,6 +41,7 @@ sub uri_unescape :prototype($) ($s) { sub query_decode :prototype($) ($s) { my %o; for (split /&/, $s//'') { + next if !length; my($k,$v) = map uri_unescape($_), split /=/, $_, 2; $v //= builtin::true; if (ref $o{$k}) { push $o{$k}->@*, $v } diff --git a/t/query.t b/t/query.t index ebeff80..80f2b00 100644 --- a/t/query.t +++ b/t/query.t @@ -10,6 +10,8 @@ is_deeply ok !eval { query_decode('%10'); 1 }; like $@, qr/Invalid control character/; +is_deeply query_decode('&&&a=b'), { a => 'b' }; + is query_encode { a => builtin::true, b => undef, c => builtin::false, d => 'string', e => "&=\xfe" }, 'a&d=string&e=%26%3d%c3%be'; From 461ed6f39d5522136da212ba2477a0bfbf25613c Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 26 Apr 2025 08:05:09 +0200 Subject: [PATCH 24/61] FU: Suppress warnings about non-existent files in FU::monitor_path checking --- FU.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FU.pm b/FU.pm index 1f71a91..04809b5 100644 --- a/FU.pm +++ b/FU.pm @@ -216,7 +216,7 @@ sub _monitor { die if $m > $data{$_}; }, no_chdir => 1 - }, $scriptpath, values %INC, @monitor_paths); + }, grep -e, $scriptpath, values %INC, @monitor_paths); 0 } // 1; } From 753cac615a645bc875d4e39ad3b81d80fc01542f Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 26 Apr 2025 15:41:26 +0200 Subject: [PATCH 25/61] Validate: Improved + extendable error message formatting Very much needed for VNDB's advanced search validation. Also completely undocumented. --- FU/Validate.pm | 47 +++++++++++++++++++++++++++++++---------------- t/validate.t | 34 +++++++++++++++++----------------- 2 files changed, 48 insertions(+), 33 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index b315784..2741ee6 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -395,32 +395,47 @@ sub empty($c) { +sub _fmtkey($k) { $k =~ /^[a-zA-Z0-9_-]+$/ ? $k : FU::Util::json_format($k); } +sub _fmtval($v) { eval { $v = FU::Util::json_format($v) }; "$v" } +sub _inval($t,$v) { sprintf 'invalid %s: %s', $t, _fmtval $v } + +# validation name => formatting sub +# TODO: document. +our %error_format = ( + required => sub { 'required value missing' }, + type => sub($e) { "invalid type, expected '$e->{expected}' but got '$e->{got}'" }, + unknown => sub($e) { sprintf 'unknown key%s: %s', $e->{keys}->@* == 1 ? '' : 's', join ', ', map _fmtkey($_), $e->{keys}->@* }, + minlength => sub($e) { sprintf "input too short, expected minimum of %d but got %d", $e->{expected}, $e->{got} }, + maxlength => sub($e) { sprintf "input too long, expected maximum of %d but got %d", $e->{expected}, $e->{got} }, + length => sub($e) { + !ref $e->{expected} + ? sprintf 'invalid input length, expected %d but got %d', $e->{expected}, $e->{got} + : sprintf 'invalid input length, expected between %d and %d but got %d', $e->{expected}->@*, $e->{got} + }, + num => sub($e) { _inval 'number', $e->{got} }, + min => sub($e) { $e->{error} ? _inval 'number', $e->{error}{got} : sprintf 'expected minimum %s but got %s', $e->{expected}, $e->{got} }, + max => sub($e) { $e->{error} ? _inval 'number', $e->{error}{got} : sprintf 'expected maximum %s but got %s', $e->{expected}, $e->{got} }, + range => sub($e) { FU::Validate::err::errors($e->{error}) }, +); + + package FU::Validate::err; use v5.36; -use FU::Util; use overload '""' => sub { $_[0]{longmess} || join "\n", $_[0]->errors }; -sub _fmtkey($k) { - $k =~ /^[a-zA-Z0-9_-]+$/ ? $k : FU::Util::json_format($k); -} - -sub _fmtval($v) { - eval { $v = FU::Util::json_format($v) }; "$v" -} - +# TODO: document. sub errors($e, $prefix='') { my $val = $e->{validation}; my $p = $prefix ? "$prefix: " : ''; - $val eq 'keys' ? map errors($_, $prefix.'.'._fmtkey($_->{key})), $e->{errors}->@* : - $val eq 'values' ? map errors($_, $prefix.'.'._fmtkey($_->{key})), $e->{errors}->@* : - $val eq 'missing' ? $prefix.'.'._fmtkey($e->{key}).': required key missing' : + $FU::Validate::error_format{$val} ? map "$p$_", $FU::Validate::error_format{$val}->($e) : + $val eq 'keys' ? map errors($_, $prefix.'.'.FU::Validate::_fmtkey($_->{key})), $e->{errors}->@* : + $val eq 'values' ? map errors($_, $prefix.'.'.FU::Validate::_fmtkey($_->{key})), $e->{errors}->@* : + $val eq 'missing' ? $prefix.'.'.FU::Validate::_fmtkey($e->{key}).': required key missing' : $val eq 'elems' ? map errors($_, $prefix."[$_->{index}]"), $e->{errors}->@* : - $val eq 'unique' ? $prefix."[$e->{index_b}] value '"._fmtval($e->{value_a})."' duplicated" : - $val eq 'required' ? "${p}required value missing" : - $val eq 'type' ? "${p}invalid type, expected '$e->{expected}' but got '$e->{got}'" : - $val eq 'unknown' ? ($e->{keys}->@* > 1 ? "${p}unknown keys: ".join(', ', _fmtkey($e->{keys})) : "${p}unknown key '"._fmtkey($e->{keys}[0])."'") : + $val eq 'unique' ? $prefix."[$e->{index_b}] value '".FU::Validate::_fmtval($e->{value_a})."' duplicated" : $e->{error} ? errors($e->{error}, "${p}validation '$val'") : + $e->{message} ? "${p}validation '$val': $e->{message}" : "${p}failed validation '$val'"; } diff --git a/t/validate.t b/t/validate.t index a29dc4c..26704cd 100644 --- a/t/validate.t +++ b/t/validate.t @@ -119,7 +119,7 @@ f { type => 'hash', keys => { a=>{missing=>'reject'} } }, {}, {key => 'a', valid t { type => 'hash', keys => { a=>{} } }, {a=>' a '}, {a=>'a'}; # Test against in-place modification t { type => 'hash', keys => { a=>{} }, unknown => 'remove' }, { a=>1,b=>1 }, { a=>1 }; -f { type => 'hash', keys => { a=>{} }, unknown => 'reject' }, { a=>1,b=>1 }, { validation => 'unknown', keys => ['b'], expected => ['a'] }, "unknown key 'b'"; +f { type => 'hash', keys => { a=>{} }, unknown => 'reject' }, { a=>1,b=>1 }, { validation => 'unknown', keys => ['b'], expected => ['a'] }, "unknown key: b"; t { type => 'hash', keys => { a=>{} }, unknown => 'pass' }, { a=>1,b=>1 }, { a=>1,b=>1 }; t { type => 'hash', setundef => 1 }, {}, undef; t { type => 'hash', unknown => 'reject', keys => { a=>{ type => 'any', setundef => 1}} }, {a=>[]}, {a=>undef}; @@ -132,20 +132,20 @@ t { values => { int => 1 } }, { a => -1, b => 1 }, { a => -1, b => 1 }; f { values => { int => 1 } }, { a => undef }, { validation => 'values', errors => [{ key => 'a', validation => 'required' }] }, '.a: required value missing'; # default validations -f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }, "failed validation 'minlength'"; +f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }, "input too short, expected minimum of 3 but got 2"; t { minlength => 3 }, 'abc', 'abc'; -f { maxlength => 3 }, 'abcd', { validation => 'maxlength', expected => 3, got => 4 }, "failed validation 'maxlength'"; +f { maxlength => 3 }, 'abcd', { validation => 'maxlength', expected => 3, got => 4 }, "input too long, expected maximum of 3 but got 4"; t { maxlength => 3 }, 'abc', 'abc'; t { minlength => 3, maxlength => 3 }, 'abc', 'abc'; -f { length => 3 }, 'ab', { validation => 'length', expected => 3, got => 2 }, "failed validation 'length'"; -f { length => 3 }, 'abcd', { validation => 'length', expected => 3, got => 4 }, "failed validation 'length'"; +f { length => 3 }, 'ab', { validation => 'length', expected => 3, got => 2 }, 'invalid input length, expected 3 but got 2'; +f { length => 3 }, 'abcd', { validation => 'length', expected => 3, got => 4 }, 'invalid input length, expected 3 but got 4'; t { length => 3 }, 'abc', 'abc'; t { length => [1,3] }, 'abc', 'abc'; -f { length => [1,3] }, 'abcd', { validation => 'length', expected => [1,3], got => 4 }, "failed validation 'length'"; +f { length => [1,3] }, 'abcd', { validation => 'length', expected => [1,3], got => 4 }, "invalid input length, expected between 1 and 3 but got 4"; t { type => 'array', length => 0 }, [], []; -f { type => 'array', length => 1 }, [1,2], { validation => 'length', expected => 1, got => 2 }, "failed validation 'length'"; +f { type => 'array', length => 1 }, [1,2], { validation => 'length', expected => 1, got => 2 }, "invalid input length, expected 1 but got 2"; t { type => 'hash', length => 0 }, {}, {}; -f { type => 'hash', length => 1, unknown => 'pass' }, {qw/1 a 2 b/}, { validation => 'length', expected => 1, got => 2 }, "failed validation 'length'"; +f { type => 'hash', length => 1, unknown => 'pass' }, {qw/1 a 2 b/}, { validation => 'length', expected => 1, got => 2 }, "invalid input length, expected 1 but got 2"; t { type => 'hash', length => 1, keys => {a => {missing=>'ignore'}, b => {missing=>'ignore'}} }, {a=>1}, {a=>1}; t { regex => '^a' }, 'abc', 'abc'; # XXX: Can't use qr// here because t() does dclone(). The 'hex' test covers that case anyway. f { regex => '^a' }, 'cba', { validation => 'regex', regex => '^a', got => 'cba' }, "failed validation 'regex'"; @@ -201,7 +201,7 @@ t { doublefunc => 1 }, 0, 2; f { doublefunc => 1 }, 1, { validation => 'doublefunc', error => { validation => 'func', result => '' } }, "validation 'doublefunc': failed validation 'func'"; # numbers -sub nerr { ({ validation => 'num', got => $_[0] }, "failed validation 'num'") } +sub nerr { ({ validation => 'num', got => $_[0] }, "invalid number: \"$_[0]\"") } t { num => 1 }, 0, 0; f { num => 1 }, '-', nerr '-'; f { num => 1 }, '00', nerr '00'; @@ -219,16 +219,16 @@ t { uint => 1 }, 0, 0; t { uint => 1 }, 123, 123; f { uint => 1 }, -123, { validation => 'uint', got => -123 }, "failed validation 'uint'"; t { min => 1 }, 1, 1; -f { min => 1 }, 0.9, { validation => 'min', expected => 1, got => 0.9 }, "failed validation 'min'"; -f { min => 1 }, 'a', { validation => 'min', error => (nerr 'a')[0] }, "validation 'min': failed validation 'num'"; +f { min => 1 }, 0.9, { validation => 'min', expected => 1, got => 0.9 }, "expected minimum 1 but got 0.9"; +f { min => 1 }, 'a', { validation => 'min', error => (nerr 'a')[0] }, 'invalid number: "a"'; t { max => 1 }, 1, 1; -f { max => 1 }, 1.1, { validation => 'max', expected => 1, got => 1.1 }, "failed validation 'max'"; -f { max => 1 }, 'a', { validation => 'max', error => (nerr 'a')[0] }, "validation 'max': failed validation 'num'"; +f { max => 1 }, 1.1, { validation => 'max', expected => 1, got => 1.1 }, "expected maximum 1 but got 1.1"; +f { max => 1 }, 'a', { validation => 'max', error => (nerr 'a')[0] }, 'invalid number: "a"'; t { range => [1,2] }, 1, 1; t { range => [1,2] }, 2, 2; -f { range => [1,2] }, 0.9, { validation => 'range', error => { validation => 'min', expected => 1, got => 0.9 } }, "validation 'range': failed validation 'min'"; -f { range => [1,2] }, 2.1, { validation => 'range', error => { validation => 'max', expected => 2, got => 2.1 } }, "validation 'range': failed validation 'max'"; -f { range => [1,2] }, 'a', { validation => 'range', error => { validation => 'min', error => (nerr 'a')[0] } }, "validation 'range': validation 'min': failed validation 'num'"; +f { range => [1,2] }, 0.9, { validation => 'range', error => { validation => 'min', expected => 1, got => 0.9 } }, 'expected minimum 1 but got 0.9'; +f { range => [1,2] }, 2.1, { validation => 'range', error => { validation => 'max', expected => 2, got => 2.1 } }, 'expected maximum 2 but got 2.1'; +f { range => [1,2] }, 'a', { validation => 'range', error => { validation => 'min', error => (nerr 'a')[0] } }, 'invalid number: "a"'; # email template use utf8; @@ -253,7 +253,7 @@ t { email => 1 }, $_, $_ for ( 'abc@x-y_z.example', ); my $long = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@xxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx.xxxxx'; -f { email => 1 }, $long, { validation => 'email', error => { validation => 'maxlength', got => 255, expected => 254 } }, "validation 'email': failed validation 'maxlength'"; +f { email => 1 }, $long, { validation => 'email', error => { validation => 'maxlength', got => 255, expected => 254 } }, "validation 'email': input too long, expected maximum of 254 but got 255"; # weburl template f { weburl => 1 }, $_, { validation => 'weburl', got => $_ }, "failed validation 'weburl'" for ( From 817fa600d0ccc66bdc2e6d0e501e557a01be69d9 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 27 Apr 2025 11:17:54 +0200 Subject: [PATCH 26/61] FU: Add fu->log_verbose() + include request body in error logs --- FU.pm | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/FU.pm b/FU.pm index 04809b5..11c58ff 100644 --- a/FU.pm +++ b/FU.pm @@ -303,7 +303,7 @@ sub _read_req($c) { ($REQ->{path}, my $qs) = split /\?/, $REQ->{path}//'', 2; $REQ->{qs} //= $qs; - $REQ->{path} = FU::Util::uri_unescape($REQ->{path}); + eval { $REQ->{path} = FU::Util::uri_unescape($REQ->{path}); 1; } || fu->error(400, $@); } @@ -313,17 +313,8 @@ sub _log_err($e) { return if !$e; my $crit = $e isa 'FU::err' ? $e->[0] == 500 : !($e isa 'FU::Validate::err'); return if !debug && !$crit; - if ($crit && !$REQ->{full_err}++) { - $e =~ s/^\s+//; - $e =~ s/\s+$//; - log_write join "\n", - 'IP: '.($REQ->{ip}||'-'), - 'Headers:', (map " $_: $REQ->{hdr}{$_}", sort keys $REQ->{hdr}->%*), - 'ERROR:'.($e =~ s/(^|\n)/\n /rg); - # TODO: decoded body, if we have that. - } else { - log_write $e; - } + return fu->log_verbose($e) if $crit; + log_write $e; } sub _do_req($c) { @@ -643,6 +634,27 @@ sub db { sub sql { shift->db->q(@_) } sub SQL { shift->db->Q(@_) } +sub _fmt_section($s) { $s =~ s/^\s*/ /r =~ s/\s+$//r =~ s/\n/\n /rg } + +sub log_verbose($,$msg) { + my $r = $FU::REQ; + return FU::Log::log_write($msg) if $r->{log_verbose}++; + FU::Log::log_write(join "\n", + 'IP: '.($r->{ip}||'-'), + 'Headers:', (map " $_: $r->{hdr}{$_}", sort keys $r->{hdr}->%*), + $r->{multipart} ? ('Body (multipart):', _fmt_section join "\n", map $_->describe, $r->{multipart}->@*) : + $r->{json} ? ('Body (JSON):', _fmt_section FU::Util::json_format($r->{json}, pretty => 1, canonical => 1)) : + $r->{formdata} ? ('Body (formdata):', _fmt_section FU::Util::json_format($r->{json}, pretty => 1, canonical => 1)) : + length $r->{body} ? do { + my $b = substr $r->{body}, 0, 4096; + my $trunc = length $r->{body} > 4096 ? ', truncated' : ''; + utf8::decode($b) ? ("Body (utf8$trunc)", _fmt_section($b =~ s/\r//rg =~ s/\n{4,}/\n[..]\n/rg)) + : ("Body (hex$trunc)", _fmt_section(unpack('H*', $b) =~ s/(.{128})/$1\n/rg)) + } : (), + 'Message:', _fmt_section $msg + ); +} + @@ -1286,6 +1298,14 @@ Convenient short-hand for C<< fu->db->q($query, @params) >>. Convenient short-hand for C<< fu->db->Q(@args) >>. +=item fu->log_verbose($message) + +Write a verbose multi-line message to the log, including a full dump of +information about the request: IP, headers and (potentially reformatted and/or +truncated) body. This extra info is only written once per request, further +calls to C just go directly to L's C +instead. + =back =head1 Request Information From d0c5397e2dda0387c1678a279bc13187c8dd455d Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 28 Apr 2025 10:20:53 +0200 Subject: [PATCH 27/61] json_parse()/pgtypes: Fix accidental creation of read-only array/hash values &PL_sv_* shouldn't be used when constructing arrays or hashes in this context. --- FU.xs | 6 ++++++ c/jsonparse.c | 4 ++-- c/pgst.c | 2 +- c/pgtypes.c | 12 ++++++++---- t/json_parse.t | 8 +++++++- t/pgconnect.t | 1 + t/pgtypes-dynamic.t | 7 ++++++- t/pgtypes.t | 16 ++++++++++++++-- 8 files changed, 45 insertions(+), 11 deletions(-) diff --git a/FU.xs b/FU.xs index 221740b..dc19870 100644 --- a/FU.xs +++ b/FU.xs @@ -20,6 +20,12 @@ #ifndef BOOL_INTERNALS_sv_isbool_true #define BOOL_INTERNALS_sv_isbool_true(x) SvTRUEx(x) #endif +#ifndef newSV_true +#define newSV_true() newSVsv(&PL_sv_yes) +#endif +#ifndef newSV_false +#define newSV_false() newSVsv(&PL_sv_no) +#endif /* Disable key/value struct packing in khashl, so we can safely take a pointer * to values inside the hash table. */ diff --git a/c/jsonparse.c b/c/jsonparse.c index d24b1f9..6dfee91 100644 --- a/c/jsonparse.c +++ b/c/jsonparse.c @@ -236,12 +236,12 @@ static SV *fujson_parse(pTHX_ fujson_parse_ctx *ctx) { if (ctx->end - ctx->buf < 4) return NULL; if (memcmp(ctx->buf, "true", 4) != 0) return NULL; ctx->buf += 4; - return &PL_sv_yes; + return newSV_true(); case 'f': if (ctx->end - ctx->buf < 5) return NULL; if (memcmp(ctx->buf, "false", 5) != 0) return NULL; ctx->buf += 5; - return &PL_sv_no; + return newSV_false(); case 'n': if (ctx->end - ctx->buf < 4) return NULL; if (memcmp(ctx->buf, "null", 4) != 0) return NULL; diff --git a/c/pgst.c b/c/pgst.c index e5a2a3d..e943450 100644 --- a/c/pgst.c +++ b/c/pgst.c @@ -463,7 +463,7 @@ static SV *fupg_st_kvv(pTHX_ fupg_st *st) { SAVETMPS; SV *key = sv_2mortal(fupg_st_getval(aTHX_ st, i, 0)); if (hv_exists_ent(hv, key, 0)) fu_confess("Key '%s' is duplicated in $st->kvv() query results", SvPV_nolen(key)); - hv_store_ent(hv, key, st->nfields == 1 ? &PL_sv_yes : fupg_st_getval(aTHX_ st, i, 1), 0); + hv_store_ent(hv, key, st->nfields == 1 ? newSV_true() : fupg_st_getval(aTHX_ st, i, 1), 0); FREETMPS; } return sv; diff --git a/c/pgtypes.c b/c/pgtypes.c index b307cc0..471b6d2 100644 --- a/c/pgtypes.c +++ b/c/pgtypes.c @@ -78,7 +78,7 @@ SENDFN(domain) { (void)out; SERR("domain type should not be handled by this func RECVFN(bool) { RLEN(1); - return *buf ? &PL_sv_yes : &PL_sv_no; + return *buf ? newSV_true() : newSV_false(); } SENDFN(bool) { @@ -89,7 +89,7 @@ SENDFN(bool) { RECVFN(void) { RLEN(0); (void)buf; - return &PL_sv_undef; + return newSV(0); } SENDFN(void) { @@ -269,7 +269,7 @@ SENDFN(jsonpath) { #define ARRAY_MAXDIM 100 static SV *fupg_recv_array_elem(pTHX_ const fupg_tio *elem, const char *header, U32 dim, U32 ndim, const char **buf, const char *end) { - SV *r = &PL_sv_undef; + SV *r; if (dim == ndim) { if (end - *buf < 4) fu_confess("Invalid array format"); I32 len = fu_frombeI(32, *buf); @@ -279,6 +279,8 @@ static SV *fupg_recv_array_elem(pTHX_ const fupg_tio *elem, const char *header, if (len >= 0) { r = elem->recv(aTHX_ elem, *buf, len); *buf += len; + } else { + r = newSV(0); } } else { @@ -403,12 +405,14 @@ RECVFN(record) { if (oid != ctx->record.info->attrs[i].oid) RERR("expected field %d to be of type %u but got %u", i, ctx->record.info->attrs[i].oid, oid); I32 vlen = fu_frombeI(32, buf+4); - SV *r = &PL_sv_undef; + SV *r; buf += 8; len -= 8; if (vlen > len) RERR("input data too short"); if (vlen >= 0) { r = ctx->record.tio[i].recv(aTHX_ ctx->record.tio+i, buf, vlen); buf += vlen; len -= vlen; + } else { + r = newSV(0); } hv_store(hv, ctx->record.info->attrs[i].name.n, -strlen(ctx->record.info->attrs[i].name.n), r, 0); } diff --git a/t/json_parse.t b/t/json_parse.t index 0c26dff..d01414f 100644 --- a/t/json_parse.t +++ b/t/json_parse.t @@ -2,7 +2,7 @@ use v5.36; use Test::More; use FU::Util 'json_parse'; no warnings 'experimental::builtin'; -use builtin 'is_bool', 'created_as_number'; +use builtin 'is_bool', 'created_as_number', 'true', 'false'; use Config; my @error = ( @@ -236,4 +236,10 @@ ok !eval { json_parse '{"":{"":{"":{"":1}}}}', max_depth => 4; 1 }; ok !eval { json_parse '"string"', max_size => 7 }; } +# Mutable hashes/arrays +my $d = json_parse('[true,false,null,{"a":true,"b":false,"c":null}]'); +is_deeply $d, [true,false,undef,{a => true, b => false, c => undef}]; +$_ = 1 for @{$d}[0,1,2], values $d->[3]->%*; +is_deeply $d, [1,1,1,{a => 1, b => 1, c => 1}]; + done_testing; diff --git a/t/pgconnect.t b/t/pgconnect.t index 798734c..8536574 100644 --- a/t/pgconnect.t +++ b/t/pgconnect.t @@ -197,6 +197,7 @@ subtest '$st->kvv', sub { is_deeply $conn->q('SELECT 1 WHERE false')->kvv, {}; is_deeply $conn->q('SELECT 1')->kvv, {1=>1}; is_deeply $conn->q('SELECT 1, null UNION ALL SELECT 3, 2')->kvv, {1=>undef,3=>2}; + $conn->q('SELECT 1')->kvv->{1} = 0; }; subtest '$st->kva', sub { diff --git a/t/pgtypes-dynamic.t b/t/pgtypes-dynamic.t index b6954b5..2751a86 100644 --- a/t/pgtypes-dynamic.t +++ b/t/pgtypes-dynamic.t @@ -96,10 +96,15 @@ subtest 'custom types', sub { ); _ - is_deeply $txn->q(q{SELECT '{"(\"(2,{},bb)\",)","(\"(,,)\",bb)"}'::fupg_test_table[]})->val, [ + $val = $txn->q(q{SELECT '{"(\"(2,{},bb)\",)","(\"(,,)\",bb)"}'::fupg_test_table[]})->val; + is_deeply $val, [ { rec => { a => 2, aenum => [], domain => 'bb' }, dom => undef }, { rec => { a => undef, aenum => undef, domain => undef }, dom => 'bb' }, ]; + $val->[0] = 0; + $val->[1]{rec}{a} = 0; + $val->[1]{rec} = 0; + $val->[1]{dom} = 0; is $txn->q('SELECT $1::fupg_test_table[]', [ { rec => { a => 2, aenum => [], domain => 'bb' }, dom => undef }, diff --git a/t/pgtypes.t b/t/pgtypes.t index 5a17a71..3a3252c 100644 --- a/t/pgtypes.t +++ b/t/pgtypes.t @@ -21,10 +21,12 @@ sub v($type, $p_in, @args) { my $test = "$type $s_in" =~ s/\n/\\n/rg; utf8::encode($test); { - my $res = $conn->q("SELECT \$1::$type", $s_in)->text_params->val; + my $array = $conn->q("SELECT \$1::$type", $s_in)->text_params->flat; + my $res = $array->[0]; ok is_bool($res), "$test is bool" if $type eq 'bool'; ok created_as_number($res), "$test is number" if $type =~ /^(int|float)\d/; is_deeply $res, $p_out, "$test text->bin"; + $array->[0] = 0; # Must be writable } { my $res = $conn->q("SELECT \$1::$type", $p_in)->text_results->val; @@ -41,7 +43,11 @@ sub f($type, $p_in) { ok !eval { $conn->q("SELECT \$1::$type", $p_in)->val; 1 }, "$test fail"; } -ok !defined $conn->q('SELECT pg_sleep(0)')->val; # void +{ # void + my $array = $conn->q('SELECT pg_sleep(0)')->flat; + ok !defined $array->[0]; + $array->[0] = 0; +} v bool => true, undef, 1, 't'; v bool => false, undef, 0, 'f'; @@ -166,4 +172,10 @@ is $conn->q('SELECT ($1::int2[])[2]', [1,2,3,4])->val, 2; is $conn->q('SELECT ($1::int2vector)[1]', [1,2,3,4])->val, 2; is $conn->q('SELECT ($1::oidvector)[1]', [1,2,3,4])->val, 2; +{ + my $v = $conn->q("SELECT '{t,f,NULL}'::bool[]")->val; + is_deeply $v, [true, false, undef]; + $_ = 0 for @$v; +} + done_testing; From f8b0043e2248e981ea74bc08030fecb6fff93af2 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 29 Apr 2025 09:14:44 +0200 Subject: [PATCH 28/61] MultipartFormData: Bunch of parser fixes --- FU/MultipartFormData.pm | 11 ++++++----- t/multipart.t | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm index a92e4d7..7d9d77e 100644 --- a/FU/MultipartFormData.pm +++ b/FU/MultipartFormData.pm @@ -3,7 +3,7 @@ use v5.36; use Carp 'confess'; use FU::Util 'utf8_decode'; -sub _arg($d) { $d =~ s/^\s+//r =~ s/\s+$//r =~ s/^"(.+)"$/$1/r } +sub _arg($d) { $d =~ s{^"(.+)"$}{$1 =~ s/\\([\\"])/$1/rg}er } sub parse($pkg, $header, $data) { confess "Invalid multipart header '$header'" @@ -26,13 +26,14 @@ sub parse($pkg, $header, $data) { start => pos $data, }, $pkg; - confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data;(.+)/i; + confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data(.+)/i; my $v = $1; - confess "Missing 'name' parameter" if $v !~ /[;\s]name=([^[;\s]+)/; + my $pvalue = qr/("(?:\\[\\"]|[^\\"\r\n]+)*"|[^\s;"]*)/; + confess "Missing 'name' parameter" if $v !~ /;\s*name\s*=\s*$pvalue/; $d->{name} = utf8_decode _arg $1; - $d->{filename} = utf8_decode _arg $1 if $v =~ /[;\s]filename=([^;\s]+)/; + $d->{filename} = utf8_decode _arg $1 if $v =~ /;\s*filename\s*=\s*$pvalue/; - if ($hdrs =~ /content-type:\s*([^;\s]+)(?:\s*;\s*charset=([^;\s]+))?/i) { + if ($hdrs =~ /content-type:\s*$pvalue(?:\s*;\s*charset\s*=\s*$pvalue)?/i) { $d->{mime} = utf8_decode _arg $1; $d->{charset} = utf8_decode _arg $2 if $2; } diff --git a/t/multipart.t b/t/multipart.t index 842b9cd..e045ff1 100644 --- a/t/multipart.t +++ b/t/multipart.t @@ -14,12 +14,17 @@ Content-Type: text Content-Disposition: form-data; filename="example.txt"; name=field2 value2 +--delimiter12345 +Content-Type: something; charset = " a b \\ c " +Content-Disposition: form-data; name = "field \" name" ;filename= "月姫.jpg" + + --delimiter12345-- _ my $l = FU::MultipartFormData->parse('multipart/form-data;boundary="delimiter12345"', $t); -is scalar @$l, 2; +is scalar @$l, 3; my $v = $l->[0]; is $v->name, 'field1'; @@ -44,4 +49,12 @@ is $v->charset, undef; is $v->length, 6; is $v->data, 'value2'; +$v = $l->[2]; +is $v->name, 'field " name'; +is $v->filename, "\x{6708}\x{59eb}.jpg"; +is $v->mime, 'something'; +is $v->charset, ' a b \ c '; +is $v->length, 0; +is $v->data, ''; + done_testing; From f52ad9a2e6ae0b55072f078e8d3668bf8ffe397a Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 29 Apr 2025 13:51:28 +0200 Subject: [PATCH 29/61] json_format(): Fix buffer overflow in float formatting The ndigit argument to Gconvert() is the number of significant digits to format, not the size of the output buffer. D'oh. --- c/jsonfmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/jsonfmt.c b/c/jsonfmt.c index fff3e4f..a6d46a4 100644 --- a/c/jsonfmt.c +++ b/c/jsonfmt.c @@ -244,7 +244,7 @@ static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) { if (isinfnan(nv)) croak("unable to format floating point NaN or Inf as JSON"); /* XXX: Cpanel::JSON::XS appears to always append a ".0" for round numbers, other modules do not. */ /* XXX#2: This doesn't support quadmath. Makefile.PL checks for that */ - fustr_reserve(ctx->out, NV_DIG+1); + fustr_reserve(ctx->out, NV_DIG+32); Gconvert(nv, NV_DIG, 0, ctx->out->cur); ctx->out->cur += strlen(ctx->out->cur); } else if (SvIOKp(val)) { From af9340f908dcca0852d2c3f05809f6f2d44a9ffd Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 30 Apr 2025 15:00:21 +0200 Subject: [PATCH 30/61] DebugInfo: Styling + add request/response body and fu obj contents Formatting is still shit. --- FU.pm | 14 +++-- FU/DebugImpl.pm | 139 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 110 insertions(+), 43 deletions(-) diff --git a/FU.pm b/FU.pm index 11c58ff..316f273 100644 --- a/FU.pm +++ b/FU.pm @@ -3,7 +3,7 @@ use v5.36; use Carp 'confess', 'croak'; use IO::Socket; use POSIX (); -use Time::HiRes 'clock_gettime', 'CLOCK_MONOTONIC'; +use Time::HiRes 'time', 'clock_gettime', 'CLOCK_MONOTONIC'; use FU::Log 'log_write'; use FU::Util; use FU::Validate; @@ -318,7 +318,11 @@ sub _log_err($e) { } sub _do_req($c) { - local $REQ = { hdr => {}, trace_start => clock_gettime(CLOCK_MONOTONIC), trace_id => sprintf('%010x%08x%04x', int time, $$, int rand 1<<16) }; + local $REQ = { + hdr => {}, + trace_start => clock_gettime(CLOCK_MONOTONIC), + trace_id => sprintf('%012x%06x%04x', int(time*10000) % (1<<(12*4)), $$ % (1<<(6*4)), int rand 1<<16) + }; local $fu = bless {}, 'FU::obj'; $REQ->{ip} = $c->{client_sock} isa 'IO::Socket::INET' ? $c->{client_sock}->peerhost : '127.0.0.1'; @@ -648,8 +652,8 @@ sub log_verbose($,$msg) { length $r->{body} ? do { my $b = substr $r->{body}, 0, 4096; my $trunc = length $r->{body} > 4096 ? ', truncated' : ''; - utf8::decode($b) ? ("Body (utf8$trunc)", _fmt_section($b =~ s/\r//rg =~ s/\n{4,}/\n[..]\n/rg)) - : ("Body (hex$trunc)", _fmt_section(unpack('H*', $b) =~ s/(.{128})/$1\n/rg)) + utf8::decode($b) ? ("Body (utf8$trunc):", _fmt_section($b =~ s/\r//rg =~ s/\n{4,}/\n[..]\n/rg)) + : ("Body (hex$trunc):", _fmt_section(unpack('H*', $b) =~ s/(.{128})/$1\n/rg)) } : (), 'Message:', _fmt_section $msg ); @@ -898,10 +902,12 @@ sub _finalize { ) { push @vary, 'accept-encoding'; if ($hasbrotli && ($r->{hdr}{'accept-encoding'}||'') =~ /\bbr\b/) { + $r->{resbody_orig} = $r->{resbody}; $r->{resbody} = FU::Util::brotli_compress(6, $r->{resbody}); $r->{reshdr}{'content-encoding'} = 'br'; } elsif ($hasgzip && ($r->{hdr}{'accept-encoding'}||'') =~ /\bgzip\b/) { + $r->{resbody_orig} = $r->{resbody}; $r->{resbody} = FU::Util::gzip_compress(6, $r->{resbody}); $r->{reshdr}{'content-encoding'} = 'gzip'; } diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index a0de7ea..f02ceed 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -16,27 +16,32 @@ sub loc_($loc) { my $l = $loc->[$_]; my $f = $_ == $#$loc ? '(main)' : $loc->[$_+1][3]; $f = "$l->[0]::$f" if $f !~ /^\Q$l->[0]/; - txt_ "$f @ $l->[1]:$l->[2]"; + txt_ $f; + small_ " @ $l->[1]:$l->[2]"; } } -sub fmtpre_($code) { - lit_ xml_escape($code) =~ s/^\s+//r =~ s/\s+$//r =~ s/\n/
/rg; -} - sub clean_re($str) { # Regex formatting isn't stable, but this cleans up the crap I'm seeing a little bit. "$str" =~ s/^\(\?\^u:\^\(\?\^u://r =~ s/\)\$\)$//r; } -my @tabs = ( +sub raw_data($str) { + my $d = substr $str, 0, 32*1024; + my $trunc = length $str > 32*1024 ? ', truncated' : ''; + return utf8::decode($d) ? ("utf8$trunc", $d) + : ("hex$trunc", unpack('H*', $d) =~ s/(.{128})/$1\n/rg =~ s/(.{16})/$1 /rg); +} + +my @sections = ( req => sub { + my $r = $FU::REQ; table_ sub { tr_ sub { td_ 'Method'; td_ fu->method }; tr_ sub { td_ 'Path'; td_ fu->path }; tr_ sub { td_ 'Query'; td_ fu->query }; tr_ sub { td_ 'Client IP'; td_ fu->ip }; - tr_ sub { td_ 'Received'; td_ fmtts(time - (($FU::REQ->{trace_end}||clock_gettime(CLOCK_MONOTONIC)) - $FU::REQ->{trace_start})) }; + tr_ sub { td_ 'Received'; td_ fmtts(time - (($r->{trace_end}||clock_gettime(CLOCK_MONOTONIC)) - $r->{trace_start})) }; }; h2_ 'Headers'; table_ sub { @@ -45,7 +50,38 @@ my @tabs = ( td_ fu->headers->{$_}; } for sort keys fu->headers->%*; }; - # TODO: Body? Certainly useful for JSON + if ((fu->header('content-length')||0) > 0) { + h2_ 'Body'; + section_ class => 'tabs', sub { + my $json = eval { fu->json({type=>'any'}) }; + details_ name => 'reqbody', open => !0, sub { + summary_ 'JSON'; + pre_ FU::Util::json_format($json, pretty => 1, canonical => 1); + } if $json; + my $formdata = eval { fu->formdata({type=>'hash'}) }; + details_ name => 'reqbody', open => !0, sub { + summary_ 'Form data'; + table_ sub { + for my $k (sort keys %$formdata) { + tr_ sub { + td_ $k; + td_ $_; + } for ref $formdata->{$k} ? $formdata->{$k}->@* : ($formdata->{$k}); + } + }; + } if $formdata; + my $multipart = eval { fu->multipart }; + details_ name => 'reqbody', open => !0, sub { + summary_ 'Multipart'; + pre_ join "\n", map $_->describe, @$multipart; + } if $multipart; + details_ name => 'reqbody', open => !0,sub { + my($lbl, $data) = raw_data $r->{body}; + summary_ "Raw ($lbl)"; + pre_ $data; + }; + } + } ('Request') }, @@ -84,11 +120,28 @@ my @tabs = ( } for !defined $v ? () : ref $v ? @$v : ($v); } }; + my $body = $r->{resbody_orig} // $r->{resbody}; + if (length $body) { + h2_ 'Body'; + section_ class => 'tabs', sub { + my $json = ($r->{reshdr}{'content-type'}||'') =~ /^application\/json/ && eval { FU::Util::json_parse($body, utf8 => 1) }; + details_ name => 'resbody', open => !0, sub { + summary_ 'JSON'; + pre_ FU::Util::json_format($json, pretty => 1, canonical => 1); + } if $json; + details_ name => 'resbody', open => !0,sub { + my($lbl, $data) = raw_data $body; + summary_ "Raw ($lbl)"; + pre_ $data; + }; + } + } ('Response') }, sql => sub { return () if !$FU::REQ->{trace_sql}; + # TODO: Summarize main table, expand to display full query, params table, interpolated query table_ sub { thead_ sub { tr_ sub { td_ class => 'num', 'Exec'; @@ -100,8 +153,7 @@ my @tabs = ( td_ class => 'num', sprintf '%.1f ms', $_->{exec_time}*1000; td_ class => 'num', !defined $_->{prepare_time} ? '-' : $_->{prepare_time} ? sprintf '%.1f ms', $_->{prepare_time}*1000 : 'cache'; td_ class => 'num', $_->{nrows}; - td_ class => 'code', sub { fmtpre_ $_->{query} }; - # TODO: Params, both separate and interpolated + td_ class => 'code', $_->{query}; } for $FU::REQ->{trace_sql}->@*; }; ('Queries', scalar $FU::REQ->{trace_sql}->@*) @@ -109,7 +161,11 @@ my @tabs = ( fu => sub { return () if !keys fu->%*; - # TODO: Contents of the 'fu' object + # TODO: This is kinda lazy, an expandable table might be nicer. + require Data::Dumper; + pre_ sub { + lit_ Data::Dumper->new([fu])->Sortkeys(1)->Terse(1)->Dump; + }; ('fu obj') }, @@ -186,7 +242,7 @@ my @tabs = ( } }; tr_ sub { td_ $_->[0]; - td_ class => 'code', sub { fmtpre_ $_->[1] }; + td_ class => 'code', $_->[1]; } for @$lst; }; ('Prepared statements', scalar @$lst) @@ -196,9 +252,10 @@ my @tabs = ( sub collect { my @t; - for my ($id, $sub) (@tabs) { + for my ($id, $sub) (@sections) { my($title, $num); my $html = fragment { ($title, $num) = $sub->() }; + utf8::decode($html); push @t, { id => $id, title => $title, num => $num, html => $html } if $title; } \@t @@ -215,42 +272,47 @@ sub framework_($data) { *, *:before, *:after { box-sizing: inherit } * { margin: 0; padding: 0; font: inherit; color: inherit } - body { display: grid; grid: 45px 400px / 220px auto; } - header { grid-column: 1 / 3; grid-row: 1 / 2 } - nav { grid-column: 1 / 2; grid-row: 2 / 3 } - main { grid-column: 2 / 3; grid-row: 2 / 3 } + /* Ugh, fixed positioning */ + header { position: fixed; top: 0; left: 0; width: 100%; height: 40px; z-index: 2 } + nav { position: fixed; top: 38px; left: 0; width: 200px; z-index: 2 } + main { margin: 0 0 0 200px } header, nav { background: #eee } - main { border-top: 2px solid #009; border-left: 2px solid #009 } - nav { border-bottom: 2px solid #009 } + header { border-bottom: 2px solid #009 } + nav { border-bottom: 2px solid #009; border-right: 2px solid #009 } - header { display: flex; justify-content: space-between; padding: 10px } - header h1 { font-size: 20px; font-weight: bold } + header { display: flex; justify-content: space-between; align-items: baseline; padding: 5px 10px } + header h1 { font-size: 120%; font-weight: bold } header menu { list-style-type: none; display: flex; gap: 15px } body > input { display: none } nav { padding-top: 20px } nav menu { list-style-type: none } - nav label { display: block; width: 100%; padding: 2px 10px; cursor: pointer; white-space: nowrap } - nav label:hover { background-color: #fff } - nav label span { float: right; font-size: 80% } + nav a { display: block; width: 100%; text-decoration: none; padding: 2px 10px; cursor: pointer; white-space: nowrap } + nav a:hover { background-color: #fff } + nav a span { float: right; font-size: 80% } - main { padding: 10px 20px } - main h2 { margin: 30px 0 5px -10px; font-size: 20px; font-weight: bold } - main h2:first-child { margin-top: 0 } + main { padding: 0 10px 30px 10px } + main h1 { background: #eee; padding: 5px 10px 5px 205px; margin: 40px -10px 10px -210px; scroll-margin-top: 40px; font-size: 130%; font-weight: bold } + main h2 { margin: 20px 0 5px 0; font-size: 120%; font-weight: bold } - p, pre, table { margin: 5px 0 } - pre, .code { font-family: monospace; white-space: pre } + p, table, pre { margin: 5px 0 } + pre { font-family: monospace; white-space: pre; overflow-x: auto; padding-bottom: 15px; /* for the scrollbar, kinda browser-specific */ } table { border-collapse: collapse } td { padding: 1px 10px 1px 0; font-size: 12px; vertical-align: top } + td.code { font-family: monospace } tr:hover { background-color: #eee } thead { font-weight: bold } .num { text-align: right; white-space: nowrap } + + section.tabs { position: relative; display: flex; flex-wrap: wrap; z-index: 1; } + section.tabs summary { cursor: pointer; order: 0; display: block; padding: 3px 5px; margin-right: 10px; background: #ddd } + section.tabs summary:hover, section.tabs details[open] summary { background: #eee } + section.tabs details { display: contents } + section.tabs details *:nth-child(2) { order: 1; width: 100% } + + small { color: #555; font-size: 90% } _ - style_ type => 'text/css', join "\n", map +( - "#tab_$_:checked ~ nav menu li label[for=tab_$_] { background-color: #fff }", - "#tab_$_:not(:checked) ~ main #tabc_$_ { display: none }", - ), map $_->{id}, @$data; }; body_ sub { header_ sub { @@ -261,22 +323,21 @@ sub framework_($data) { li_ sub { a_ href => '?', 'Listing' }; }; }; - input_ type => 'radio', name => 'tab', id => "tab_$_->{id}", checked => $_ eq $data->[0] ? 'checked' : undef for @$data; nav_ sub { menu_ sub { li_ sub { - label_ for => "tab_$_->{id}", sub { + a_ href => "#$_->{id}", sub { txt_ $_->{title}; span_ $_->{num} if defined $_->{num}; - } + }; } for @$data; }; } if @$data; main_ sub { - div_ id => "tabc_$_->{id}", sub { - h2_ $_->{title}; + for (@$data) { + h1_ id => $_->{id}, $_->{title}; lit_ $_->{html}; - } for @$data; + } }; }; }; From beeefcf3373a11877d121f7eded08ab238b36189 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 30 Apr 2025 17:07:44 +0200 Subject: [PATCH 31/61] Pg: Add perl2bin() and bin2perl() conversion methods --- FU.xs | 8 ++++++++ FU/Pg.pm | 26 +++++++++++++++++++++++++- c/pgconn.c | 27 +++++++++++++++++++++++++++ t/pgtypes.t | 10 +++++++++- 4 files changed, 69 insertions(+), 2 deletions(-) diff --git a/FU.xs b/FU.xs index dc19870..68082a5 100644 --- a/FU.xs +++ b/FU.xs @@ -286,6 +286,14 @@ void _set_type(fupg_conn *c, SV *name, SV *sendsv, SV *recvsv) fupg_set_type(aTHX_ c, name, sendsv, recvsv); XSRETURN(1); +void perl2bin(fupg_conn *c, int oid, SV *sv) + CODE: + ST(0) = fupg_perl2bin(aTHX_ c, oid, sv); + +void bin2perl(fupg_conn *c, int oid, SV *sv) + CODE: + ST(0) = fupg_bin2perl(aTHX_ c, oid, sv); + MODULE = FU PACKAGE = FU::Pg::txn diff --git a/FU/Pg.pm b/FU/Pg.pm index f43c7f8..d88a331 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -758,7 +758,31 @@ C to configure appropriate conversions for these types. =back -I Methods to convert between the various formats. +Utility functions: + +=over + +=item $conn->perl2bin($oid, $val) + +=item $conn->bin2perl($oid, $bin) + +Convert the value for a specific type between the Perl representation and the +PostgreSQL binary format, using the current type configuration of the +connection. This is the same conversion used internally by this module to send +bind parameters and receive query results, and map to the C and C +functions of C<< $conn->set_type() >>. + +These methods throw an error if C<$oid> is not a known type or if the given +data is not valid for the type. However, these methods should not be used for +strict validation: the conversion routines are usually written under the +assumption that the data has been received directly from Postgres or is about +to be sent to (and further validated by) Postgres. For some types, +C may return invalid data on invalid input and C may +accept invalid binary data. + +=back + +I Methods to convert between bin and text formats. I Methods to query type info. diff --git a/c/pgconn.c b/c/pgconn.c index 3a20b56..04423e6 100644 --- a/c/pgconn.c +++ b/c/pgconn.c @@ -626,3 +626,30 @@ static void fupg_tio_free(fupg_tio *tio) { safefree(tio->record.tio); } } + + + + +static SV *fupg_perl2bin(pTHX_ fupg_conn *conn, Oid oid, SV *sv) { + int refresh_done = 0; + fupg_tio tio; + fustr buf; + memset(&tio, 0, sizeof(tio)); + fupg_tio_setup(aTHX_ conn, &tio, FUPGT_SEND, oid, &refresh_done); + fustr_init(&buf, sv_newmortal(), SIZE_MAX); + tio.send(aTHX_ &tio, sv, &buf); /* XXX: Leaks 'tio' on error */ + fupg_tio_free(&tio); + return fustr_done(&buf); +} + +static SV *fupg_bin2perl(pTHX_ fupg_conn *conn, Oid oid, SV *sv) { + int refresh_done = 0; + fupg_tio tio; + STRLEN len; + const char *buf = SvPVbyte(sv, len); + memset(&tio, 0, sizeof(tio)); + fupg_tio_setup(aTHX_ conn, &tio, FUPGT_RECV, oid, &refresh_done); + SV *r = tio.recv(aTHX_ &tio, buf, len); /* XXX: Leaks 'tio' on error */ + fupg_tio_free(&tio); + return r; +} diff --git a/t/pgtypes.t b/t/pgtypes.t index 3a3252c..67e566e 100644 --- a/t/pgtypes.t +++ b/t/pgtypes.t @@ -19,9 +19,12 @@ sub v($type, $p_in, @args) { my $s_out = @args > 2 && defined $args[2] ? $args[2] : $s_in; my $test = "$type $s_in" =~ s/\n/\\n/rg; + my $oid; utf8::encode($test); { - my $array = $conn->q("SELECT \$1::$type", $s_in)->text_params->flat; + my $st = $conn->q("SELECT \$1::$type", $s_in)->text_params; + $oid = $st->param_types->[0]; + my $array = $st->flat; my $res = $array->[0]; ok is_bool($res), "$test is bool" if $type eq 'bool'; ok created_as_number($res), "$test is number" if $type =~ /^(int|float)\d/; @@ -36,6 +39,11 @@ sub v($type, $p_in, @args) { my $res = $conn->q("SELECT \$1::$type", $p_in)->val; is_deeply $res, $p_out, "$test bin->bin"; } + { + my $bin = $conn->perl2bin($oid, $p_in); + ok defined $bin; + is_deeply $conn->bin2perl($oid, $bin), $p_out if $type !~ /\(/; + } } sub f($type, $p_in) { my $test = "$type $p_in" =~ s/\n/\\n/rg; From 76f55f277bd94155827ce6dcf23566f15128bab8 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 30 Apr 2025 20:02:12 +0200 Subject: [PATCH 32/61] Pg: Add text2bin() and bin2text() conversion methods --- FU.xs | 8 ++++++++ FU/Pg.pm | 19 +++++++++++++++++-- c/pgconn.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ t/pgtypes.t | 19 ++++++++++++++++++- 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/FU.xs b/FU.xs index 68082a5..1c342be 100644 --- a/FU.xs +++ b/FU.xs @@ -294,6 +294,14 @@ void bin2perl(fupg_conn *c, int oid, SV *sv) CODE: ST(0) = fupg_bin2perl(aTHX_ c, oid, sv); +void bin2text(fupg_conn *c, ...) + CODE: + XSRETURN(fupg_bintext(aTHX_ c, 0, ax, items)); + +void text2bin(fupg_conn *c, ...) + CODE: + XSRETURN(fupg_bintext(aTHX_ c, 1, ax, items)); + MODULE = FU PACKAGE = FU::Pg::txn diff --git a/FU/Pg.pm b/FU/Pg.pm index d88a331..ff88d60 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -780,9 +780,24 @@ to be sent to (and further validated by) Postgres. For some types, C may return invalid data on invalid input and C may accept invalid binary data. -=back +=item $conn->bin2text($oid, $bin, ...) -I Methods to convert between bin and text formats. +=item $conn->text2bin($oid, $text, ...) + +Convert between the binary format and the PostgreSQL text format. This +conversion requires a round-trip to the server and throws an error if the +connection state is not I or I. Since it is Postgres doing the +conversion, the input is properly validated and, in the case of C, +the result is guaranteed to be suitable for use as a textual bind parameter or +for inclusion in an SQL query (but don't forget to use C in +that case). + +Calling these methods many times can be pretty slow. If you have several values +to convert, you can do that in a single call to speed things up: + + my($text1, $text2, ..) = $conn->bin2text($oid1, $bin1, $oid2, $bin2, ..); + +=back I Methods to query type info. diff --git a/c/pgconn.c b/c/pgconn.c index 04423e6..607a9c6 100644 --- a/c/pgconn.c +++ b/c/pgconn.c @@ -653,3 +653,53 @@ static SV *fupg_bin2perl(pTHX_ fupg_conn *conn, Oid oid, SV *sv) { fupg_tio_free(&tio); return r; } + + +static I32 fupg_bintext(pTHX_ fupg_conn *conn, int format, I32 ax, I32 argc) { + int vals = argc/2; + + if (argc == 1 || argc % 2 == 0) croak("Usage: $conn->%s(oid, data, ...)", format ? "text2bin" : "bin2text"); + if (vals > 1 && GIMME_V != G_LIST) { + ST(0) = sv_2mortal(newSViv(vals)); + return 1; + } + + Oid *paramtypes = safemalloc(vals * sizeof(*paramtypes)); + const char **paramvalues = safemalloc(vals * sizeof(*paramvalues)); + int *paramlengths = safemalloc(vals * sizeof(*paramlengths)); + int *paramformats = safemalloc(vals * sizeof(*paramformats)); + + fustr sql; + fustr_init(&sql, NULL, SIZE_MAX); + fustr_write(&sql, "SELECT ", 7); + + STRLEN len; + int i; + for (i=0; iconn, fustr_start(&sql), vals, + paramtypes, paramvalues, paramlengths, paramformats, format); + safefree(paramtypes); + safefree(paramvalues); + safefree(paramlengths); + safefree(paramformats); + SvREFCNT_dec(sql.sv); + + if (!r) fupg_conn_croak(conn, "exec"); + if (PQresultStatus(r) != PGRES_TUPLES_OK) fupg_result_croak(r, "exec", sql.sv ? "SELECT $1, ..." : sql.sbuf); + + /* The stack is guaranteed to be large enough, since we received 1+2*vals arguments */ + for (i=0; iperl2bin($oid, $p_in); ok defined $bin; - is_deeply $conn->bin2perl($oid, $bin), $p_out if $type !~ /\(/; + if ($type !~ /\(/) { + is_deeply $conn->bin2perl($oid, $bin), $p_out; + is $conn->bin2text($oid, $bin), $s_out; + is $conn->text2bin($oid, $s_out), $bin if $type ne 'jsonb'; # jsonb pretty-prints for some reason + } } } sub f($type, $p_in) { @@ -180,6 +184,19 @@ is $conn->q('SELECT ($1::int2[])[2]', [1,2,3,4])->val, 2; is $conn->q('SELECT ($1::int2vector)[1]', [1,2,3,4])->val, 2; is $conn->q('SELECT ($1::oidvector)[1]', [1,2,3,4])->val, 2; +is_deeply [$conn->bin2text( + 16, $conn->perl2bin(16, 1), + 25, 'Hello', + 1007, $conn->perl2bin(1007, [-3,1,undef]) +)], ['t', 'Hello', '{-3,1,NULL}']; + +{ + my($b,$s,$a) = $conn->text2bin(16, 't', 25, 'Hello', 1007, '{-3,1,NULL}'); + is $conn->bin2perl(16, $b), 1; + is $conn->bin2perl(25, $s), 'Hello'; + is_deeply $conn->bin2perl(1007, $a), [-3,1,undef]; +} + { my $v = $conn->q("SELECT '{t,f,NULL}'::bool[]")->val; is_deeply $v, [true, false, undef]; From cbccf045b71f22696c5d235345271b00a1f1ce51 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 1 May 2025 11:48:08 +0200 Subject: [PATCH 33/61] DebugInfo: Expand queries table with params & details Apart from the ugly implementation, this is pretty neat. --- FU.pm | 23 ++++-- FU/DebugImpl.pm | 189 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 153 insertions(+), 59 deletions(-) diff --git a/FU.pm b/FU.pm index 316f273..8b8eaa3 100644 --- a/FU.pm +++ b/FU.pm @@ -121,11 +121,24 @@ sub query_trace($st,@) { $REQ->{trace_nsqldirect}++ if !defined $st->prepare_time; $REQ->{trace_sqlexec} += $st->exec_time; $REQ->{trace_sqlprep} += $st->prepare_time if $st->prepare_time; - push $REQ->{trace_sql}->@*, { - query => $st->query, nrows => $st->nrows, - param_types => $st->param_types, param_values => $st->param_values, - exec_time => $st->exec_time, prepare_time => $st->prepare_time, - } if FU::debug; + if (FU::debug) { + my $t = $st->param_types; + my $v = $st->param_values; + my $txt = $st->get_text_params; + push $REQ->{trace_sql}->@*, { + query => $st->query, nrows => $st->nrows, + exec_time => $st->exec_time, prepare_time => $st->prepare_time, + # Store the binary value when we're in binary params mode, that way + # we don't have to keep a reference to the original perl value and + # we can defer & batch the conversion to text. + params => [ map +{ + type => $t->[$_], + !defined $v->[$_] ? (text => undef) : + $txt ? (text => "$v->[$_]") + : (bin => $DB->perl2bin($t->[$_], $v->[$_])) + }, 0..$#$v ], + }; + } } sub _connect_db { $DB = ref $INIT_DB eq 'CODE' ? $INIT_DB->() : FU::Pg->connect($INIT_DB); diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index f02ceed..48c6cf1 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -1,6 +1,7 @@ # Internal module used by FU.pm package FU::DebugImpl 0.5; use v5.36; +use utf8; use experimental 'for_list'; use FU; use FU::XMLWriter ':html5_', 'fragment', 'xml_escape'; @@ -140,23 +141,74 @@ my @sections = ( }, sql => sub { - return () if !$FU::REQ->{trace_sql}; - # TODO: Summarize main table, expand to display full query, params table, interpolated query - table_ sub { + my $queries = $FU::REQ->{trace_sql}; + return () if !$queries; + + # Convert binary params to text. + # For queries with text_params, assume the params are already valid for the text format. + my @binparams = grep $_->{type} && !$_->{text}, map $_->{params}->@*, @$queries; + my @arg = map +($_->{type}, $_->{bin}), @binparams; + my @text; + my $ok = !@arg || eval { @text = $FU::DB->bin2text(@arg); 1 }; + $binparams[$_]{text} = $text[$_] for 0..$#text; + pre_ "Error converting binary parameters:\n$@" if !$ok; + + input_ type => 'checkbox', id => "row${_}_c" for 0..$#{$queries}; + table_ class => 'sqlt', sub { thead_ sub { tr_ sub { td_ class => 'num', 'Exec'; td_ class => 'num', 'Prep'; td_ class => 'num', 'Rows'; td_ 'Query'; } }; + my $rows = 0; + for my($i, $st) (builtin::indexed $queries->@*) { + $rows += $st->{nrows}; + tr_ sub { + td_ class => 'num', sprintf '%.1f ms', $st->{exec_time}*1000; + td_ class => 'num', !defined $st->{prepare_time} ? '-' : $st->{prepare_time} ? sprintf '%.1f ms', $st->{prepare_time}*1000 : 'cache'; + td_ class => 'num', $st->{nrows}; + td_ class => 'sum', sub { + label_ for => "row${i}_c", sub { + span_ class => 'closed', '▶'; + span_ class => 'open', '▼'; + txt_ $st->{query} =~ s/[\r\n]/ /rg =~ s/\s\s+/ /rg =~ s/^\s+//r; + }; + }; + }; + tr_ class => 'details', id => "row$i", sub { + td_ ''; + td_ colspan => 3, sub { + pre_ $st->{query}; + if ($st->{params}->@*) { + strong_ 'Parameters:'; + table_ sub { + tr_ sub { + td_ class => 'num', sprintf '$%d =', $_+1; + td_ class => 'code', sub { + my $p = $st->{params}[$_]{text}; + !defined $p ? em_ 'null' : txt_ $p; + }; + } for (0..$#{$st->{params}}); + }; + # XXX: Buggy when the query contains string literals with $n variables. + strong_ 'Interpolated:'; + pre_ $st->{query} =~ s{\$([1-9][0-9]*)}{ + my $v = $st->{params}[$1-1]{text}; + defined $v ? $FU::DB->escape_literal($v) : 'NULL' + }egr; + } + }; + }; + } tr_ sub { - td_ class => 'num', sprintf '%.1f ms', $_->{exec_time}*1000; - td_ class => 'num', !defined $_->{prepare_time} ? '-' : $_->{prepare_time} ? sprintf '%.1f ms', $_->{prepare_time}*1000 : 'cache'; - td_ class => 'num', $_->{nrows}; - td_ class => 'code', $_->{query}; - } for $FU::REQ->{trace_sql}->@*; + td_ class => 'num', sprintf '%.1f ms', $FU::REQ->{trace_sqlexec}*1000; + td_ class => 'num', !defined $FU::REQ->{trace_sqlprep} ? '-' : sprintf '%.1f ms', $FU::REQ->{trace_sqlprep}*1000; + td_ class => 'num', $rows; + td_ class => 'sum', 'total'; + } if @$queries > 1; }; - ('Queries', scalar $FU::REQ->{trace_sql}->@*) + ('Queries', scalar @$queries) }, fu => sub { @@ -245,7 +297,7 @@ my @sections = ( td_ class => 'code', $_->[1]; } for @$lst; }; - ('Prepared statements', scalar @$lst) + ('Prepared stmts', scalar @$lst) }, ); @@ -267,51 +319,8 @@ sub framework_($data) { head_ sub { title_ 'FU Debugging Interface'; meta_ name => 'viewport', content => 'width=device-width, initial-scale=1.0, user-scalable=yes'; + link_ rel => 'stylesheet', type => 'text/css', media => 'all', href => '?css'; style_ type => 'text/css', <<~_; - html { box-sizing: border-box; color: #000; background: #fff } - *, *:before, *:after { box-sizing: inherit } - * { margin: 0; padding: 0; font: inherit; color: inherit } - - /* Ugh, fixed positioning */ - header { position: fixed; top: 0; left: 0; width: 100%; height: 40px; z-index: 2 } - nav { position: fixed; top: 38px; left: 0; width: 200px; z-index: 2 } - main { margin: 0 0 0 200px } - - header, nav { background: #eee } - header { border-bottom: 2px solid #009 } - nav { border-bottom: 2px solid #009; border-right: 2px solid #009 } - - header { display: flex; justify-content: space-between; align-items: baseline; padding: 5px 10px } - header h1 { font-size: 120%; font-weight: bold } - header menu { list-style-type: none; display: flex; gap: 15px } - - body > input { display: none } - nav { padding-top: 20px } - nav menu { list-style-type: none } - nav a { display: block; width: 100%; text-decoration: none; padding: 2px 10px; cursor: pointer; white-space: nowrap } - nav a:hover { background-color: #fff } - nav a span { float: right; font-size: 80% } - - main { padding: 0 10px 30px 10px } - main h1 { background: #eee; padding: 5px 10px 5px 205px; margin: 40px -10px 10px -210px; scroll-margin-top: 40px; font-size: 130%; font-weight: bold } - main h2 { margin: 20px 0 5px 0; font-size: 120%; font-weight: bold } - - p, table, pre { margin: 5px 0 } - pre { font-family: monospace; white-space: pre; overflow-x: auto; padding-bottom: 15px; /* for the scrollbar, kinda browser-specific */ } - table { border-collapse: collapse } - td { padding: 1px 10px 1px 0; font-size: 12px; vertical-align: top } - td.code { font-family: monospace } - tr:hover { background-color: #eee } - thead { font-weight: bold } - .num { text-align: right; white-space: nowrap } - - section.tabs { position: relative; display: flex; flex-wrap: wrap; z-index: 1; } - section.tabs summary { cursor: pointer; order: 0; display: block; padding: 3px 5px; margin-right: 10px; background: #ddd } - section.tabs summary:hover, section.tabs details[open] summary { background: #eee } - section.tabs details { display: contents } - section.tabs details *:nth-child(2) { order: 1; width: 100% } - - small { color: #555; font-size: 90% } _ }; body_ sub { @@ -378,10 +387,23 @@ sub load($id) { fu->set_body(scalar <$fn>); } +sub css { + # Awful CSS row hiding hack. I'm not sorry. + state $css = join '', , map qq{ + #row${_}_c:checked ~ * label[for=row${_}_c] .closed { display: none } + #row${_}_c:not(:checked) ~ * label[for=row${_}_c] .open { display: none } + #row${_}_c:not(:checked) ~ * #row${_} { display: none } + }, 0..1000; +} + sub render { my $q = fu->query; if (!$q) { fu->set_body(framework_ [{id => 'lst', title => 'Recent Requests', html => fragment \&listing_ }]); + } elsif ($q eq 'css') { + fu->set_header('content-type', 'text/css'); + fu->set_header('cache-control', 'max-age=86400'); + fu->set_body(css()); } elsif ($q eq 'cur') { fu->set_body(framework_ collect); } elsif ($q eq 'last') { @@ -415,3 +437,62 @@ sub save { } 1; + +__DATA__ +html { box-sizing: border-box; color: #000; background: #fff } +*, *:before, *:after { box-sizing: inherit } +* { margin: 0; padding: 0; font: inherit; color: inherit } + +/* Ugh, fixed positioning */ +header { position: fixed; top: 0; left: 0; width: 100%; height: 40px; z-index: 2 } +nav { position: fixed; top: 38px; left: 0; width: 200px; z-index: 2 } +main { margin: 0 0 0 200px } + +header, nav { background: #eee } +header { border-bottom: 2px solid #009 } +nav { border-bottom: 2px solid #009; border-right: 2px solid #009 } + +header { display: flex; justify-content: space-between; align-items: baseline; padding: 5px 10px } +header h1 { font-size: 120%; font-weight: bold } +header menu { list-style-type: none; display: flex; gap: 15px } + +body > input { display: none } +nav { padding-top: 20px } +nav menu { list-style-type: none } +nav a { display: block; width: 100%; text-decoration: none; padding: 2px 10px; cursor: pointer; white-space: nowrap } +nav a:hover { background-color: #fff } +nav a span { float: right; font-size: 80% } + +main { padding: 0 10px 30px 10px } +main h1 { background: #eee; padding: 5px 10px 5px 205px; margin: 40px -10px 10px -210px; scroll-margin-top: 40px; font-size: 130%; font-weight: bold } +main h2 { margin: 20px 0 5px 0; font-size: 120%; font-weight: bold } + +p, table, pre { margin: 5px 0 } +pre { border-left: 2px dotted #999; padding-left: 5px; font-family: monospace; white-space: pre; overflow-x: auto; padding-bottom: 15px; /* for the scrollbar, kinda browser-specific */ } +table { border-collapse: collapse } +td { padding: 1px 10px 1px 0; font-size: 12px; vertical-align: top } +td.code { font-family: monospace } +tr:hover { background-color: #eee } +thead { font-weight: bold } +.num { text-align: right; white-space: nowrap } + +section.tabs { position: relative; display: flex; flex-wrap: wrap; z-index: 1; } +section.tabs summary { cursor: pointer; order: 0; display: block; padding: 3px 5px; margin-right: 10px; background: #ddd } +section.tabs summary:hover, section.tabs details[open] summary { background: #eee } +section.tabs details { display: contents } +section.tabs details *:nth-child(2) { order: 1; width: 100% } + +.sqlt { width: 100%; table-layout: fixed } +.sqlt .num { width: 50px } +.sqlt .num:first-child { width: 75px } +.sqlt .num:nth-child(2) { width: 60px } +.sqlt .sum { white-space: nowrap; font-family: monospace; overflow: hidden; text-overflow: ellipsis } +.sqlt label { cursor: pointer } +.sqlt label span { color: #555; display: inline-block; width: 15px } +.sqlt tr.details { background: #fff } +.sqlt tr.details > td { padding-bottom: 10px } +input[id^=row] { display: none } + +small { color: #555; font-size: 90% } +em { font-style: italic } +strong { font-weight: bold } From 6787f32fd9c4f22f20905e260f7aa48eb88e06a8 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 3 May 2025 12:32:50 +0200 Subject: [PATCH 34/61] DebugInfo: Fix handling of undef and falsy bind parameters --- FU/DebugImpl.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index 48c6cf1..88e0bb5 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -146,7 +146,7 @@ my @sections = ( # Convert binary params to text. # For queries with text_params, assume the params are already valid for the text format. - my @binparams = grep $_->{type} && !$_->{text}, map $_->{params}->@*, @$queries; + my @binparams = grep $_->{type} && !exists $_->{text}, map $_->{params}->@*, @$queries; my @arg = map +($_->{type}, $_->{bin}), @binparams; my @text; my $ok = !@arg || eval { @text = $FU::DB->bin2text(@arg); 1 }; From 52c36e0aeac242d960c357205f0fef30fde7dc90 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 4 May 2025 12:18:12 +0200 Subject: [PATCH 35/61] FU: Preserve existing headers on fu->redirect() Allows setting custom headers (in particular, cookies) when redirecting. This behavior is consistent with send_file(). --- FU.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/FU.pm b/FU.pm index 8b8eaa3..f81be88 100644 --- a/FU.pm +++ b/FU.pm @@ -862,7 +862,6 @@ sub send_file($, $root, $path) { sub redirect($, $code, $location) { state $alias = {qw/ perm 301 temp 302 tempget 303 tempsame 307 permsame 308 /}; - fu->reset; fu->status($alias->{$code} // $code); fu->set_header(location => "$location"); fu->set_header('content-type', 'text/plain'); From 6c54ee30911bb26dfd1fd0538cd5bd1d1280ac0f Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 9 May 2025 08:32:41 +0200 Subject: [PATCH 36/61] FU: Reject some invalid characters in path --- FU.pm | 2 ++ FU/Util.pm | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/FU.pm b/FU.pm index f81be88..1a826e1 100644 --- a/FU.pm +++ b/FU.pm @@ -313,10 +313,12 @@ sub _read_req($c) { # Decode these into Unicode strings and check for special characters. eval { FU::Util::utf8_decode($_); 1} || fu->error(400, $@) for ($REQ->{path}, $REQ->{qs}, values $REQ->{hdr}->%*); + fu->error(400, 'Invalid character in path') if $REQ->{path} =~ /#/; # Some bots don't correctly split off the fragment ($REQ->{path}, my $qs) = split /\?/, $REQ->{path}//'', 2; $REQ->{qs} //= $qs; eval { $REQ->{path} = FU::Util::uri_unescape($REQ->{path}); 1; } || fu->error(400, $@); + fu->error(400, 'Invalid character in path') if $REQ->{path} =~ /[\r\n\t]/; # There are plenty other questionable characters, but newlines and tabs are definitely out } diff --git a/FU/Util.pm b/FU/Util.pm index 7d585d9..18db781 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -20,7 +20,7 @@ our @EXPORT_OK = qw/ sub utf8_decode :prototype($) { return if !defined $_[0]; confess 'Invalid UTF-8' if !utf8::decode($_[0]); - confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/; + confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/; $_[0] } From 8dbc17ab37858cca82410f222c71a56c427beb94 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 9 May 2025 09:53:43 +0200 Subject: [PATCH 37/61] FU: Fix error logging of formdata --- FU.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FU.pm b/FU.pm index 1a826e1..106322e 100644 --- a/FU.pm +++ b/FU.pm @@ -663,7 +663,7 @@ sub log_verbose($,$msg) { 'Headers:', (map " $_: $r->{hdr}{$_}", sort keys $r->{hdr}->%*), $r->{multipart} ? ('Body (multipart):', _fmt_section join "\n", map $_->describe, $r->{multipart}->@*) : $r->{json} ? ('Body (JSON):', _fmt_section FU::Util::json_format($r->{json}, pretty => 1, canonical => 1)) : - $r->{formdata} ? ('Body (formdata):', _fmt_section FU::Util::json_format($r->{json}, pretty => 1, canonical => 1)) : + $r->{formdata} ? ('Body (formdata):', _fmt_section FU::Util::json_format($r->{formdata}, pretty => 1, canonical => 1)) : length $r->{body} ? do { my $b = substr $r->{body}, 0, 4096; my $trunc = length $r->{body} > 4096 ? ', truncated' : ''; From 383ed8409cf823a2e7966f5692fa3aa6167039a6 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 11 May 2025 10:33:48 +0200 Subject: [PATCH 38/61] bench: version updates + add small Pg benchmark --- FU/Benchmarks.pod | 180 +++++++++++++++++++++++++++------------------- bench.PL | 45 +++++++++++- 2 files changed, 149 insertions(+), 76 deletions(-) diff --git a/FU/Benchmarks.pod b/FU/Benchmarks.pod index 2479667..37415ff 100644 --- a/FU/Benchmarks.pod +++ b/FU/Benchmarks.pod @@ -26,9 +26,11 @@ The following module versions were used: =over -=item L 4.38 +=item L 4.39 -=item L 0.1 +=item L 3.18.0 + +=item L 0.5 =item L 1.08 @@ -40,6 +42,8 @@ The following module versions were used: =item L 4.03 +=item L 0.15 + =item L 1.5 =item L 0.900 @@ -56,266 +60,294 @@ These benchmarks run on large-ish arrays with repeated values. JSON encoding is sufficiently fast that Perl function calling overhead tends to dominate for smaller inputs, but I don't find that overhead very interesting. -Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the -SIMD parts are only used for parsing. +Also worth noting that L formatting code is forked from +L, the SIMD parts are only used for parsing. API object from L documentation. Encode Canonical Decode JSON::PP 5312/s 5119/s 1290/s JSON::Tiny 7757/s - 3426/s - Cpanel::JSON::XS 108187/s 101867/s 103575/s + Cpanel::JSON::XS 114802/s 104141/s 107274/s JSON::SIMD 130137/s 118948/s 115123/s JSON::XS 128421/s 120243/s 117940/s - FU::Util 133182/s 113275/s 118213/s + FU::Util 132890/s 111630/s 121124/s Object (small) Encode Canonical Decode JSON::PP 907/s 829/s 202/s JSON::Tiny 1224/s - 499/s - Cpanel::JSON::XS 43168/s 28114/s 19229/s + Cpanel::JSON::XS 45732/s 30862/s 20102/s JSON::SIMD 49019/s 30699/s 23267/s JSON::XS 49814/s 31326/s 25336/s - FU::Util 44110/s 26134/s 21144/s + FU::Util 43853/s 26568/s 20426/s Object (large) Encode Canonical Decode JSON::PP 927/s 747/s 104/s JSON::Tiny 1108/s - 392/s - Cpanel::JSON::XS 29672/s 12637/s 16609/s + Cpanel::JSON::XS 30587/s 11875/s 15515/s JSON::SIMD 24418/s 12388/s 22895/s JSON::XS 23192/s 13174/s 23553/s - FU::Util 39477/s 13567/s 17178/s + FU::Util 36455/s 11920/s 17370/s Object (large, mixed unicode) Encode Canonical Decode JSON::PP 817/s 679/s 86/s JSON::Tiny 1036/s - 402/s - Cpanel::JSON::XS 20437/s 1345/s 7408/s + Cpanel::JSON::XS 25333/s 1459/s 7480/s JSON::SIMD 25031/s 1331/s 15997/s JSON::XS 23580/s 1375/s 8526/s - FU::Util 34435/s 11916/s 9419/s + FU::Util 33085/s 12639/s 9375/s Small integers Encode Decode JSON::PP 113/s 29/s JSON::Tiny 160/s 86/s - Cpanel::JSON::XS 7137/s 6083/s + Cpanel::JSON::XS 7345/s 6151/s JSON::SIMD 7963/s 4361/s JSON::XS 7915/s 6058/s - FU::Util 8565/s 5639/s + FU::Util 7883/s 5671/s Large integers Encode Decode JSON::PP 2176/s 329/s JSON::Tiny 2999/s 1638/s - Cpanel::JSON::XS 31302/s 48892/s + Cpanel::JSON::XS 32545/s 50162/s JSON::SIMD 37201/s 51719/s JSON::XS 36722/s 50110/s - FU::Util 116188/s 62110/s + FU::Util 110210/s 61006/s ASCII strings Encode Decode JSON::PP 2934/s 336/s JSON::Tiny 4126/s 1439/s - Cpanel::JSON::XS 116744/s 43489/s + Cpanel::JSON::XS 116721/s 44560/s JSON::SIMD 134711/s 50429/s JSON::XS 135419/s 43976/s - FU::Util 182026/s 44312/s + FU::Util 164804/s 48163/s Unicode strings Encode Decode JSON::PP 5113/s 253/s JSON::Tiny 6603/s 2585/s - Cpanel::JSON::XS 91704/s 64489/s + Cpanel::JSON::XS 97039/s 67669/s JSON::SIMD 106928/s 102440/s JSON::XS 105473/s 60558/s - FU::Util 217135/s 58972/s + FU::Util 187489/s 61121/s String escaping (few) Encode Decode JSON::PP 4251/s 352/s JSON::Tiny 4704/s 1869/s - Cpanel::JSON::XS 131789/s 106306/s + Cpanel::JSON::XS 136755/s 118059/s JSON::SIMD 158171/s 153692/s JSON::XS 157261/s 97676/s - FU::Util 191699/s 91177/s + FU::Util 216443/s 96354/s String escaping (many) Encode Decode JSON::PP 2224/s 366/s JSON::Tiny 2884/s 984/s - Cpanel::JSON::XS 136583/s 100789/s + Cpanel::JSON::XS 140220/s 107040/s JSON::SIMD 152951/s 113242/s JSON::XS 153471/s 106269/s - FU::Util 142604/s 97984/s + FU::Util 153081/s 100279/s =head2 XML Writing +L is the only XS-based XML DSL that I'm aware of, so all direct +competition is inherently slower by virtue of being pure perl. I'm sure some +templating modules will perform better, though. + HTML fragment TUWF::XML 795/s XML::Writer 833/s HTML::Tiny 423/s - FU::XMLWriter 5285/s + FU::XMLWriter 5396/s + + + +=head2 PostgreSQL client + +Fetching query results is highly unlikely to be a bottleneck in your code, this +benchmark is mainly here to verify that L is not introducing a +bottleneck where there shouldn't be one. + +Fetch and bitwise-or 20k integers + + Smallint Bigint + DBD::Pg 194/s 22/s + Pg::PQ 226/s 19/s + FU::Pg (bin) 239/s 23/s + FU::Pg (text) 222/s 21/s =cut # Cached data used by bench.PL. Same as the formatted tables above but easier to parse. -json/api Canonical Cpanel::JSON::XS 101867 -json/api Canonical FU::Util 113275 +json/api Canonical Cpanel::JSON::XS 104141 +json/api Canonical FU::Util 111630 json/api Canonical JSON::PP 5119 json/api Canonical JSON::SIMD 118948 json/api Canonical JSON::XS 120243 -json/api Decode Cpanel::JSON::XS 103575 -json/api Decode FU::Util 118213 +json/api Decode Cpanel::JSON::XS 107274 +json/api Decode FU::Util 121124 json/api Decode JSON::PP 1290 json/api Decode JSON::SIMD 115123 json/api Decode JSON::Tiny 3426 json/api Decode JSON::XS 117940 -json/api Encode Cpanel::JSON::XS 108187 -json/api Encode FU::Util 133182 +json/api Encode Cpanel::JSON::XS 114802 +json/api Encode FU::Util 132890 json/api Encode JSON::PP 5312 json/api Encode JSON::SIMD 130137 json/api Encode JSON::Tiny 7757 json/api Encode JSON::XS 128421 -json/intl Decode Cpanel::JSON::XS 48892 -json/intl Decode FU::Util 62110 +json/intl Decode Cpanel::JSON::XS 50162 +json/intl Decode FU::Util 61006 json/intl Decode JSON::PP 329 json/intl Decode JSON::SIMD 51719 json/intl Decode JSON::Tiny 1638 json/intl Decode JSON::XS 50110 -json/intl Encode Cpanel::JSON::XS 31302 -json/intl Encode FU::Util 116188 +json/intl Encode Cpanel::JSON::XS 32545 +json/intl Encode FU::Util 110210 json/intl Encode JSON::PP 2176 json/intl Encode JSON::SIMD 37201 json/intl Encode JSON::Tiny 2999 json/intl Encode JSON::XS 36722 -json/ints Decode Cpanel::JSON::XS 6083 -json/ints Decode FU::Util 5639 +json/ints Decode Cpanel::JSON::XS 6151 +json/ints Decode FU::Util 5671 json/ints Decode JSON::PP 29 json/ints Decode JSON::SIMD 4361 json/ints Decode JSON::Tiny 86 json/ints Decode JSON::XS 6058 -json/ints Encode Cpanel::JSON::XS 7137 -json/ints Encode FU::Util 8565 +json/ints Encode Cpanel::JSON::XS 7345 +json/ints Encode FU::Util 7883 json/ints Encode JSON::PP 113 json/ints Encode JSON::SIMD 7963 json/ints Encode JSON::Tiny 160 json/ints Encode JSON::XS 7915 -json/objl Canonical Cpanel::JSON::XS 12637 -json/objl Canonical FU::Util 13567 +json/objl Canonical Cpanel::JSON::XS 11875 +json/objl Canonical FU::Util 11920 json/objl Canonical JSON::PP 747 json/objl Canonical JSON::SIMD 12388 json/objl Canonical JSON::XS 13174 -json/objl Decode Cpanel::JSON::XS 16609 -json/objl Decode FU::Util 17178 +json/objl Decode Cpanel::JSON::XS 15515 +json/objl Decode FU::Util 17370 json/objl Decode JSON::PP 104 json/objl Decode JSON::SIMD 22895 json/objl Decode JSON::Tiny 392 json/objl Decode JSON::XS 23553 -json/objl Encode Cpanel::JSON::XS 29672 -json/objl Encode FU::Util 39477 +json/objl Encode Cpanel::JSON::XS 30587 +json/objl Encode FU::Util 36455 json/objl Encode JSON::PP 927 json/objl Encode JSON::SIMD 24418 json/objl Encode JSON::Tiny 1108 json/objl Encode JSON::XS 23192 -json/objs Canonical Cpanel::JSON::XS 28114 -json/objs Canonical FU::Util 26134 +json/objs Canonical Cpanel::JSON::XS 30862 +json/objs Canonical FU::Util 26568 json/objs Canonical JSON::PP 829 json/objs Canonical JSON::SIMD 30699 json/objs Canonical JSON::XS 31326 -json/objs Decode Cpanel::JSON::XS 19229 -json/objs Decode FU::Util 21144 +json/objs Decode Cpanel::JSON::XS 20102 +json/objs Decode FU::Util 20426 json/objs Decode JSON::PP 202 json/objs Decode JSON::SIMD 23267 json/objs Decode JSON::Tiny 499 json/objs Decode JSON::XS 25336 -json/objs Encode Cpanel::JSON::XS 43168 -json/objs Encode FU::Util 44110 +json/objs Encode Cpanel::JSON::XS 45732 +json/objs Encode FU::Util 43853 json/objs Encode JSON::PP 907 json/objs Encode JSON::SIMD 49019 json/objs Encode JSON::Tiny 1224 json/objs Encode JSON::XS 49814 -json/obju Canonical Cpanel::JSON::XS 1345 -json/obju Canonical FU::Util 11916 +json/obju Canonical Cpanel::JSON::XS 1459 +json/obju Canonical FU::Util 12639 json/obju Canonical JSON::PP 679 json/obju Canonical JSON::SIMD 1331 json/obju Canonical JSON::XS 1375 -json/obju Decode Cpanel::JSON::XS 7408 -json/obju Decode FU::Util 9419 +json/obju Decode Cpanel::JSON::XS 7480 +json/obju Decode FU::Util 9375 json/obju Decode JSON::PP 86 json/obju Decode JSON::SIMD 15997 json/obju Decode JSON::Tiny 402 json/obju Decode JSON::XS 8526 -json/obju Encode Cpanel::JSON::XS 20437 -json/obju Encode FU::Util 34435 +json/obju Encode Cpanel::JSON::XS 25333 +json/obju Encode FU::Util 33085 json/obju Encode JSON::PP 817 json/obju Encode JSON::SIMD 25031 json/obju Encode JSON::Tiny 1036 json/obju Encode JSON::XS 23580 -json/strel Decode Cpanel::JSON::XS 100789 -json/strel Decode FU::Util 97984 +json/strel Decode Cpanel::JSON::XS 107040 +json/strel Decode FU::Util 100279 json/strel Decode JSON::PP 366 json/strel Decode JSON::SIMD 113242 json/strel Decode JSON::Tiny 984 json/strel Decode JSON::XS 106269 -json/strel Encode Cpanel::JSON::XS 136583 -json/strel Encode FU::Util 142604 +json/strel Encode Cpanel::JSON::XS 140220 +json/strel Encode FU::Util 153081 json/strel Encode JSON::PP 2224 json/strel Encode JSON::SIMD 152951 json/strel Encode JSON::Tiny 2884 json/strel Encode JSON::XS 153471 -json/stres Decode Cpanel::JSON::XS 106306 -json/stres Decode FU::Util 91177 +json/stres Decode Cpanel::JSON::XS 118059 +json/stres Decode FU::Util 96354 json/stres Decode JSON::PP 352 json/stres Decode JSON::SIMD 153692 json/stres Decode JSON::Tiny 1869 json/stres Decode JSON::XS 97676 -json/stres Encode Cpanel::JSON::XS 131789 -json/stres Encode FU::Util 191699 +json/stres Encode Cpanel::JSON::XS 136755 +json/stres Encode FU::Util 216443 json/stres Encode JSON::PP 4251 json/stres Encode JSON::SIMD 158171 json/stres Encode JSON::Tiny 4704 json/stres Encode JSON::XS 157261 -json/strs Decode Cpanel::JSON::XS 43489 -json/strs Decode FU::Util 44312 +json/strs Decode Cpanel::JSON::XS 44560 +json/strs Decode FU::Util 48163 json/strs Decode JSON::PP 336 json/strs Decode JSON::SIMD 50429 json/strs Decode JSON::Tiny 1439 json/strs Decode JSON::XS 43976 -json/strs Encode Cpanel::JSON::XS 116744 -json/strs Encode FU::Util 182026 +json/strs Encode Cpanel::JSON::XS 116721 +json/strs Encode FU::Util 164804 json/strs Encode JSON::PP 2934 json/strs Encode JSON::SIMD 134711 json/strs Encode JSON::Tiny 4126 json/strs Encode JSON::XS 135419 -json/stru Decode Cpanel::JSON::XS 64489 -json/stru Decode FU::Util 58972 +json/stru Decode Cpanel::JSON::XS 67669 +json/stru Decode FU::Util 61121 json/stru Decode JSON::PP 253 json/stru Decode JSON::SIMD 102440 json/stru Decode JSON::Tiny 2585 json/stru Decode JSON::XS 60558 -json/stru Encode Cpanel::JSON::XS 91704 -json/stru Encode FU::Util 217135 +json/stru Encode Cpanel::JSON::XS 97039 +json/stru Encode FU::Util 187489 json/stru Encode JSON::PP 5113 json/stru Encode JSON::SIMD 106928 json/stru Encode JSON::Tiny 6603 json/stru Encode JSON::XS 105473 -xml/a Rate FU::XMLWriter 5285 +pg/ints Bigint DBD::Pg 22 +pg/ints Bigint FU::Pg (bin) 23 +pg/ints Bigint FU::Pg (text) 21 +pg/ints Bigint Pg::PQ 19 +pg/ints Smallint DBD::Pg 194 +pg/ints Smallint FU::Pg (bin) 239 +pg/ints Smallint FU::Pg (text) 222 +pg/ints Smallint Pg::PQ 226 +xml/a Rate FU::XMLWriter 5396 xml/a Rate HTML::Tiny 423 xml/a Rate TUWF::XML 795 xml/a Rate XML::Writer 833 diff --git a/bench.PL b/bench.PL index fa53068..6ccd763 100755 --- a/bench.PL +++ b/bench.PL @@ -25,7 +25,10 @@ my %modules = map +($_, eval "require $_; \$${_}::VERSION"), qw/ TUWF::XML HTML::Tiny XML::Writer + DBD::Pg + Pg::PQ /; +use FU::Pg; my %data; # "id x y" => { id x y rate exists } my %oldmodules; @@ -196,6 +199,32 @@ def 'xml/a', 'HTML fragment', [ 'Rate' ], +{ + die "FU_TEST_DB not set.\n" if !$ENV{FU_TEST_DB}; + my $pq = Pg::PQ::Conn->new($ENV{FU_TEST_DB}); + my $fu = FU::Pg->connect($ENV{FU_TEST_DB}); + # XXX: Doesn't support all connection params this way + my $dbi = DBI->connect("dbi:Pg:dbname=".$pq->db, $pq->user, $pq->pass, {RaiseError => 1, PrintError => 0}); + + my $small = 'SELECT x, x+1, x+2, x+3, x+4, x+5, x+6, x+7, x+8, x+9 FROM generate_series(-10000::smallint, 9999, 10) x(x)'; + my $big = 'SELECT x<<5, x<<10, x<<15, x<<20, x<<25, x<<30, x<<35, x<<40, x<<45, x<<50 FROM generate_series(1::bigint, 20000, 1) x(x)'; + + my sub dbi { my $sum = 0; my $st = $dbi->prepare_cached($_[0]); for my $row ($dbi->selectall_arrayref($st)->@*) { $sum ^= $_ for @$row; } } + my sub pq { my $sum = 0; $pq->prepare('' => $_[0]); for my $row ($pq->execQueryPrepared('')->rows) { $sum ^= $_ for @$row; } } + my sub fub { my $sum = 0; for my $row ($fu->q($_[0])->alla->@*) { $sum ^= $_ for @$row; } } + my sub fut { my $sum = 0; for my $row ($fu->q($_[0])->text->alla->@*) { $sum ^= $_ for @$row; } } + + def 'pg/ints', 'Fetch and bitwise-or 20k integers', [ 'Smallint', 'Bigint' ], + [ 'DBD::Pg', undef, sub { dbi($small) }, sub { dbi($big) } ], + [ 'Pg::PQ', undef, sub { pq($small) }, sub { pq($big) } ], + [ 'FU::Pg (bin)', 'FU', sub { fub($small) }, sub { fub($big) } ], + [ 'FU::Pg (text)', 'FU', sub { fut($small) }, sub { fut($big) } ]; +} + + + + + delete @data{ grep !$data{$_}{exists}, keys %data }; sub fmtbench($id, $text, $xs, $ys) { @@ -276,15 +305,27 @@ These benchmarks run on large-ish arrays with repeated values. JSON encoding is sufficiently fast that Perl function calling overhead tends to dominate for smaller inputs, but I don't find that overhead very interesting. -Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the -SIMD parts are only used for parsing. +Also worth noting that L formatting code is forked from +L, the SIMD parts are only used for parsing. :benches ^json %head2 XML Writing +L is the only XS-based XML DSL that I'm aware of, so all direct +competition is inherently slower by virtue of being pure perl. I'm sure some +templating modules will perform better, though. + :benches ^xml +%head2 PostgreSQL client + +Fetching query results is highly unlikely to be a bottleneck in your code, this +benchmark is mainly here to verify that L is not introducing a +bottleneck where there shouldn't be one. + +:benches ^pg + %cut # Cached data used by bench.PL. Same as the formatted tables above but easier to parse. From 32c8fc1b898d5d505a55c1cb1dcdf1cc4822677c Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 11 May 2025 11:03:32 +0200 Subject: [PATCH 39/61] Version 1.0 + remove "experimental" notices --- ChangeLog | 19 +++++++++++++++++++ FU.pm | 15 ++++++--------- FU/Benchmarks.pod | 2 +- FU/DebugImpl.pm | 2 +- FU/Log.pm | 7 +------ FU/MultipartFormData.pm | 2 +- FU/Pg.pm | 7 +------ FU/SQL.pm | 7 +------ FU/Util.pm | 7 +------ FU/Validate.pm | 7 +------ FU/XMLWriter.pm | 7 +------ FU/XS.pm | 2 +- README.md | 4 ---- 13 files changed, 35 insertions(+), 53 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3f18b6b..29e387f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +1.0 - 2025-05-11 + - FU::Util: Fix parsing of empty sections in query_decode() + - FU::Util: Fix buffer overflow in json_format() float formatting + - FU::Util: Reject `0x1f` in utf8_decode() + - FU::Pg: Add perl<->text and bin<->text type conversion methods + - FU::Validate: Improved error messages + - FU::MultipartFormData: Various parser fixes + - FU: Include request body in verbose error logs + - FU: Add fu->log_verbose() + - FU: Extend debug_info pages with request body, response body, 'fu' + object dump, expandable query parameters and interpolated SQL queries + - FU: Improve styling of debug_info pages + - FU: Preserve headers on fu->redirect + - FU: Ignore HTTP_CONTENT_LENGTH and HTTP_CONTENT_TYPE FastCGI parameters + - FU: Suppress warning about missing files in FU::monitor_path + - FU: Reject hash character and newlines in request path + - Fix creating read-only undef/true/false in json_parse() and FU::Pg + - Benchmark updates + 0.5 - 2025-04-24 - FU::Util: Set O_CLOEXEC on fds received through `fdpass_recv()` - FU::Util: Fix interpretation of false options in `json_format()` and diff --git a/FU.pm b/FU.pm index 106322e..440a14c 100644 --- a/FU.pm +++ b/FU.pm @@ -1,4 +1,4 @@ -package FU 0.5; +package FU 1.0; use v5.36; use Carp 'confess', 'croak'; use IO::Socket; @@ -978,14 +978,6 @@ __END__ FU - A Lean and Efficient Zero-Dependency Web Framework. -=head1 EXPERIMENTAL - -This module is still in development: it's missing important functionality and -there will likely be a few breaking API changes. This framework currently -powers manned.org as a test. I'll do a stable 1.0 release once FU is used in -production for vndb.org, which will take a few months in the best case -scenario. - =head1 SYNOPSIS use v5.36; @@ -1011,6 +1003,11 @@ scenario. =head1 DESCRIPTION +FU is the backend web framework developed for L and +L, but is also perfectly suitable for other +projects. Besides a web framework, this distrubion also includes a bunch of +handy utility functions and modules. + =head2 Distribution Overview This top-level C module is a web development framework. The C diff --git a/FU/Benchmarks.pod b/FU/Benchmarks.pod index 37415ff..0762c7a 100644 --- a/FU/Benchmarks.pod +++ b/FU/Benchmarks.pod @@ -30,7 +30,7 @@ The following module versions were used: =item L 3.18.0 -=item L 0.5 +=item L 1.0 =item L 1.08 diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index 88e0bb5..ebf8c80 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -1,5 +1,5 @@ # Internal module used by FU.pm -package FU::DebugImpl 0.5; +package FU::DebugImpl 1.0; use v5.36; use utf8; use experimental 'for_list'; diff --git a/FU/Log.pm b/FU/Log.pm index e2da4a2..9606326 100644 --- a/FU/Log.pm +++ b/FU/Log.pm @@ -1,4 +1,4 @@ -package FU::Log 0.5; +package FU::Log 1.0; use v5.36; use Exporter 'import'; use POSIX 'strftime'; @@ -65,11 +65,6 @@ __END__ FU::Log - Extremely Basic Process-Wide Logging Infrastructure -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSIS use FU::Log 'log_write'; diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm index 7d9d77e..48ebb77 100644 --- a/FU/MultipartFormData.pm +++ b/FU/MultipartFormData.pm @@ -1,4 +1,4 @@ -package FU::MultipartFormData 0.5; +package FU::MultipartFormData 1.0; use v5.36; use Carp 'confess'; use FU::Util 'utf8_decode'; diff --git a/FU/Pg.pm b/FU/Pg.pm index ff88d60..4732daf 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -1,4 +1,4 @@ -package FU::Pg 0.5; +package FU::Pg 1.0; use v5.36; use FU::XS; @@ -35,11 +35,6 @@ __END__ FU::Pg - The Ultimate (synchronous) Interface to PostgreSQL -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSYS use FU::Pg; diff --git a/FU/SQL.pm b/FU/SQL.pm index 2f8566d..db8aff1 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -1,4 +1,4 @@ -package FU::SQL 0.5; +package FU::SQL 1.0; use v5.36; use Exporter 'import'; use Carp 'confess'; @@ -103,11 +103,6 @@ __END__ FU::SQL - Small and Safe SQL Query Builder -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSIS use FU::SQL; diff --git a/FU/Util.pm b/FU/Util.pm index 18db781..922747d 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -1,4 +1,4 @@ -package FU::Util 0.5; +package FU::Util 1.0; use v5.36; use FU::XS; @@ -98,11 +98,6 @@ __END__ FU::Util - Miscellaneous Utility Functions -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSIS use FU::Util qw/json_format/; diff --git a/FU/Validate.pm b/FU/Validate.pm index 2741ee6..a4544bf 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -1,4 +1,4 @@ -package FU::Validate 0.5; +package FU::Validate 1.0; use v5.36; use experimental 'builtin', 'for_list'; @@ -447,11 +447,6 @@ __END__ FU::Validate - Data and form validation and normalization -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 DESCRIPTION This module provides an easy and simple interface for data validation. It can diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index 1e9bb90..fe755f1 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -1,4 +1,4 @@ -package FU::XMLWriter 0.5; +package FU::XMLWriter 1.0; use v5.36; use Carp 'confess'; use Exporter 'import'; @@ -83,11 +83,6 @@ __END__ FU::XMLWriter - Convenient and efficient XML and HTML generator. -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSIS use FU::XMLWriter ':html5_'; diff --git a/FU/XS.pm b/FU/XS.pm index 52cc757..b583e00 100644 --- a/FU/XS.pm +++ b/FU/XS.pm @@ -1,5 +1,5 @@ # This module is for internal use by other FU modules. -package FU::XS 0.5; +package FU::XS 1.0; use Carp; # may be called by XS. use XSLoader; XSLoader::load('FU'); diff --git a/README.md b/README.md index d29e00c..8f25140 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,6 @@ collection of handy utility modules. *Contributing:* Refer to my [contribution guidelines](https://dev.yorhel.nl/contributing). -## Project Status - -**EXPERIMENTAL**; expect breaking changes. - ## Build & Install ```sh From 31994a4bf6a126aadae4bf736b40bfae60e9adf8 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 12 May 2025 12:38:23 +0200 Subject: [PATCH 40/61] Doc typos --- FU.pm | 4 ++-- FU/XMLWriter.pm | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/FU.pm b/FU.pm index 440a14c..9c85edf 100644 --- a/FU.pm +++ b/FU.pm @@ -994,7 +994,7 @@ FU - A Lean and Efficient Zero-Dependency Web Framework. } FU::get qr{/hello/(.+)}, sub($who) { - my_html_ "Website title", sub { + myhtml_ "Website title", sub { h1_ "Hello, $who!"; }; }; @@ -1097,7 +1097,7 @@ returning strings deal with perl Unicode strings, not raw bytes. =item use FU -procname => $name When the C<-procname> import option is set, FU automatically updates the -process name (as displayed in L and L, see `$0`) with +process name (as displayed in L and L, see C<$0>) with information about the current process, prefixed with the given C<$name>. =item FU::init_db($info) diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index fe755f1..1b964ee 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -263,7 +263,7 @@ and C<"> are replaced with their XML entity. All of the functions mentioned in this document can be imported individually. There are also two import groups: - use FU::XMLWriter ':html_'; + use FU::XMLWriter ':html5_'; Exports C, C, C, C and all of the C<< _ >> functions mentioned above. From 81a3d3c608dd37214a94ad8381b4efe6232355e0 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 22 May 2025 09:22:05 +0200 Subject: [PATCH 41/61] SQL: Add IDENT() and quote_identifier options Turns out VNDB has a few places where request data is directly used for column names in VALUES/SET/WHERE clauses. These are already restricted to known strings through the use of FU::Validate, but an extra layer of protection seems warranted here. --- FU/SQL.pm | 46 +++++++++++++++++++++++++++++++++++++--------- t/sql.t | 10 ++++++++++ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/FU/SQL.pm b/FU/SQL.pm index db8aff1..63107b9 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -5,7 +5,7 @@ use Carp 'confess'; use experimental 'builtin', 'for_list'; our @EXPORT = qw/ - P RAW SQL + P RAW IDENT SQL PARENS INTERSPERSE COMMA AND OR WHERE SET VALUES IN @@ -16,6 +16,7 @@ sub _obj { bless [@_], 'FU::SQL::val' } sub P :prototype($) ($p) { bless \(my $x = $p), 'FU::SQL::p' } sub RAW :prototype($) ($s) { _obj "$s" } +sub IDENT :prototype($) ($s) { bless \(my $x = "$s"), 'FU::SQL::i' } # These operate on $_ and must be called with &func syntax. # The readonly check can be finicky. @@ -29,7 +30,7 @@ sub COMMA { INTERSPERSE ',', @_ } sub _conditions { @_ == 1 && ref $_[0] eq 'HASH' - ? map PARENS(RAW $_, + ? map PARENS(IDENT $_, !defined $_[0]{$_} ? ('IS NULL') : ref($_[0]{$_}) eq 'FU::SQL::in' ? ($_[0]{$_}) : ('=', $_[0]{$_}) @@ -41,11 +42,11 @@ sub AND { !@_ || (@_ == 1 && ref $_[0] eq 'HASH' && keys $_[0]->%* == 0) ? RAW ' sub OR { !@_ || (@_ == 1 && ref $_[0] eq 'HASH' && keys $_[0]->%* == 0) ? RAW '1=0' : INTERSPERSE 'OR', _conditions @_ } sub WHERE { SQL 'WHERE', AND @_ } -sub SET($h) { SQL 'SET', COMMA map SQL(RAW $_, '=', $h->{$_}), sort keys %$h } +sub SET($h) { SQL 'SET', COMMA map SQL(IDENT $_, '=', $h->{$_}), sort keys %$h } sub VALUES { @_ == 1 && ref $_[0] eq 'HASH' - ? SQL '(', COMMA(map RAW $_, sort keys $_[0]->%*), ') VALUES (', COMMA(map $_[0]{$_}, sort keys $_[0]->%*), ')' + ? SQL '(', COMMA(map IDENT $_, sort keys $_[0]->%*), ') VALUES (', COMMA(map $_[0]{$_}, sort keys $_[0]->%*), ')' : @_ == 1 && ref $_[0] eq 'ARRAY' ? SQL 'VALUES (', COMMA($_[0]->@*), ')' : SQL 'VALUES (', COMMA(@_), ')'; @@ -71,6 +72,10 @@ sub FU::SQL::p::_compile($self, $opt, $sql, $params) { $$sql .= $opt->{placeholder_style} eq 'pg' ? '$'.@$params : '?'; } +sub FU::SQL::i::_compile($self, $opt, $sql, $params) { + $$sql .= $opt->{quote_identifier} ? $opt->{quote_identifier}->($$self) : $$self; +} + sub FU::SQL::in::_compile($self, $opt, $sql, $params) { if ($opt->{in_style} eq 'pg') { $$sql .= '= ANY('; @@ -87,6 +92,7 @@ sub FU::SQL::in::_compile($self, $opt, $sql, $params) { } sub FU::SQL::val::compile($self, %opt) { + !/^(placeholder_style|in_style|quote_identifier)$/ && confess "Unknown flag: $_" for keys %opt; $opt{placeholder_style} ||= 'dbi'; $opt{in_style} ||= 'dbi'; my($sql, @params) = (''); @@ -94,7 +100,7 @@ sub FU::SQL::val::compile($self, %opt) { ($sql, \@params) } -*FU::SQL::p::compile = *FU::SQL::in::compile = \*FU::SQL::val::compile; +*FU::SQL::p::compile = *FU::SQL::i::compile = *FU::SQL::in::compile = \*FU::SQL::val::compile; 1; __END__ @@ -156,6 +162,16 @@ C<'pg'> when your SQL is going to L or L. Set the style to use for C expressions, refer to the C function below for details. +=item quote_identifier => $func + +Set a function to perform quoting of SQL identifiers. When using DBI, you can +do: + + my($sql) = $obj->compile(quote_identifier => sub { $dbh->quote_identifier(@_) }); + +If this option is not set, identifiers are included into the raw SQL string +without any escaping. + =back =back @@ -176,7 +192,7 @@ types of supported arguments: =item 1. -B are interpreted as raw SQL fragments. +I are interpreted as raw SQL fragments. =item 2. @@ -184,7 +200,7 @@ Objects returned by other functions listed below are included as SQL fragments. =item 3. -B is considered a bind parameter. +I is considered a bind parameter. =back @@ -244,6 +260,18 @@ Force the given C<$sql> string to be included as SQL. For example: Never use this function with untrusted input. +=item IDENT($string) + +Mark the given string as an SQL identifier. This function is only useful if you +use potentially untrusted input to determine which column to select or which +table to select from, for example: + + SQL 'SELECT id,', IDENT $ENV{column}, 'FROM table'; + +B By default this function is equivalent to C and hence +provides no safety whatsoever. Be sure to set the C option on +C to get more useful behavior. + =item PARENS(@args) Like C but surrounds the expression by parens: @@ -279,8 +307,8 @@ C<'1=1'> (i.e. true) if C<@conditions> is an empty list. =item AND($hashref) A special form of C that tests the given columns for equality instead. -The keys of the hashref are interpreted as raw SQL and the values as bind -parameters. +The keys of the hashref are interpreted as per C and the values as +bind parameters. AND { id => 1, number => RAW 'random()', x => undef } # '( id = ? ) AND ( number = random() ) AND ( x IS NULL )' diff --git a/t/sql.t b/t/sql.t index e6b7378..f9cee56 100644 --- a/t/sql.t +++ b/t/sql.t @@ -9,11 +9,15 @@ sub t($obj, $sql, $params, @opt) { is_deeply $gotparams, $params; } +my @q_ident = (quote_identifier => sub($x) { $x =~ s/"/_/rg }); + my $x; t P '', '?', ['']; t P '', '$1', [''], placeholder_style => 'pg'; t P undef, '?', [undef]; t RAW '', '', []; +t IDENT '"hello"', '"hello"', []; +t IDENT '"hello"', '_hello_', [], @q_ident; t SQL('select', '1'), 'select 1', []; t SQL('select', P '1'), 'select ?', [1]; t SQL('select', $x = '1'), 'select ?', [1]; @@ -41,6 +45,7 @@ t WHERE($x, '1 = 2', SQL('x = ', $x)), t WHERE({ col1 => RAW 'NOW()', col2 => 'a'}), 'WHERE ( col1 = NOW() ) AND ( col2 = ? )', ['a']; t WHERE(), 'WHERE 1=1', []; +t WHERE({ '"x' => 1 }), 'WHERE ( _x = ? )', [1], @q_ident; t WHERE(AND('true', $x), OR($y, 'y'), AND, OR), 'WHERE ( ( true ) AND ( ? ) ) AND ( ( ? ) OR ( y ) ) AND ( 1=1 ) AND ( 1=0 )', [$x, $y]; @@ -52,9 +57,11 @@ t SQL(SELECT => COMMA(qw/a b c/), FROM => 'table', WHERE { x => 1, a => undef }) t SET({ a => 1, c => RAW 'NOW()', d => undef }), 'SET a = ? , c = NOW() , d = ?', [1, undef]; +t SET({ '"x' => 1 }), 'SET _x = ?', [1], @q_ident; t VALUES({ a => 1, c => RAW 'NOW()', d => undef }), '( a , c , d ) VALUES ( ? , NOW() , ? )', [1, undef]; +t VALUES({ '"x' => 1 }), '( _x ) VALUES ( ? )', [1], @q_ident; t VALUES(1, $x, 'NOW()', RAW 'NOW()'), 'VALUES ( ? , ? , NOW() , NOW() )', [1, $x]; t VALUES([1, $x, 'NOW()', RAW 'NOW()']), 'VALUES ( ? , ? , ? , NOW() )', [1, $x, 'NOW()']; @@ -86,4 +93,7 @@ Hash::Util::lock_keys(%hash); Hash::Util::lock_value(%hash, 'v'); t SQL($hash{v}), 'value', []; +ok !eval { SQL('')->compile(oops => 1); 1 }; +like $@, qr/Unknown flag: oops/; + done_testing; From 2083ab2a6f3793ce560996dc2c8120c74c61a78b Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 22 May 2025 09:53:41 +0200 Subject: [PATCH 42/61] Pg: Set appropriate quote_identifier for $conn->Q() --- FU.xs | 12 ++++++++++++ FU/Pg.pm | 9 +++++++-- t/pgtypes-dynamic.t | 8 ++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/FU.xs b/FU.xs index 1c342be..7a387f9 100644 --- a/FU.xs +++ b/FU.xs @@ -217,6 +217,12 @@ void query_trace(fupg_conn *c, SV *cb) SvGETMAGIC(cb); c->trace = SvOK(cb) ? SvREFCNT_inc(cb) : NULL; +void conn(fupg_conn *c) + CODE: + ST(0) = sv_newmortal(); + sv_setrv_inc(ST(0), c->self); + sv_bless(ST(0), gv_stashpv("FU::Pg::conn", 0)); + void status(fupg_conn *c) CODE: ST(0) = sv_2mortal(newSVpv(fupg_conn_status(c), 0)); @@ -317,6 +323,12 @@ void cache(fupg_txn *x, ...) CODE: FUPG_STFLAGS; +void conn(fupg_txn *t) + CODE: + ST(0) = sv_newmortal(); + sv_setrv_inc(ST(0), t->conn->self); + sv_bless(ST(0), gv_stashpv("FU::Pg::conn", 0)); + void status(fupg_txn *t) CODE: ST(0) = sv_2mortal(newSVpv(fupg_txn_status(t), 0)); diff --git a/FU/Pg.pm b/FU/Pg.pm index 4732daf..2e7baf8 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -10,7 +10,11 @@ package FU::Pg::conn { sub Q { require FU::SQL; my $s = shift; - my($sql, $params) = FU::SQL::SQL(@_)->compile(placeholder_style => 'pg', in_style => 'pg'); + my($sql, $params) = FU::SQL::SQL(@_)->compile( + placeholder_style => 'pg', + in_style => 'pg', + quote_identifier => sub { $s->conn->escape_identifier(@_) }, + ); $s->q($sql, @$params); } @@ -208,7 +212,8 @@ used. =item $conn->Q(@args) Same as C<< $conn->q() >> but uses L to construct the query and bind -parameters. +parameters. Uses the 'pg' C and C<< $conn->escape_identifier() >> for +identifier quoting. =back diff --git a/t/pgtypes-dynamic.t b/t/pgtypes-dynamic.t index 2751a86..79abd92 100644 --- a/t/pgtypes-dynamic.t +++ b/t/pgtypes-dynamic.t @@ -127,6 +127,14 @@ subtest 'custom types', sub { }; +subtest 'identifier quoting', sub { + my $txn = $conn->txn; + $txn->exec('CREATE TEMPORARY TABLE fupg_test_tbl ("desc" int, ok int, "hello world" int)'); + ok $txn->Q('INSERT INTO fupg_test_tbl', VALUES {desc => 5, ok => 10, 'hello world', 15})->exec; + is $txn->Q('SELECT', IDENT 'hello world', 'FROM fupg_test_tbl')->val, 15; +}; + + subtest 'vndbid', sub { plan skip_all => 'type not loaded in the database' if !$conn->q("SELECT 1 FROM pg_type WHERE typname = 'vndbtag'")->val; From fd8332601b56e661c7c656cdfcd8d11fd65f3cc9 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 22 May 2025 09:54:08 +0200 Subject: [PATCH 43/61] t/pgconnect: Fix ref leak in test Apparently 'my sub' captured the $conn variable and held a ref on it even beyond the parent sub scope. 'my $x = sub {}' doesn't do that. Getting the ref counts right is important here for the last test to work. (Found while I was inspecting the refcount effects of the new ->conn() methods with Devel::Peek) --- t/pgconnect.t | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/t/pgconnect.t b/t/pgconnect.t index 8536574..cec597d 100644 --- a/t/pgconnect.t +++ b/t/pgconnect.t @@ -370,18 +370,18 @@ subtest 'Prepared statement cache', sub { $conn->cache_size(2); my $txn = $conn->txn; $txn->cache; - my sub numexec($sql) { + my $numexec = sub($sql) { $txn->q('SELECT generic_plans + custom_plans FROM pg_prepared_statements WHERE statement = $1', $sql)->cache(0)->val - } + }; is $txn->q('SELECT 1')->val, 1; - is numexec('SELECT 1'), 1; + is $numexec->('SELECT 1'), 1; my $sql = 'SELECT $1::int as a, $2::text as b'; - ok !defined numexec($sql); + ok !defined $numexec->($sql); my $params = $txn->q($sql)->param_types; is_deeply $params, [23, 25]; - is numexec($sql), 0; + is $numexec->($sql), 0; my $cparams = $txn->q($sql)->param_types; is_deeply $cparams, $params; @@ -391,23 +391,23 @@ subtest 'Prepared statement cache', sub { is_deeply $ccols, $cols; $txn->q($sql, 0, '')->exec; - is numexec($sql), 1; + is $numexec->($sql), 1; $txn->q($sql, 0, '')->exec; - is numexec($sql), 2; + is $numexec->($sql), 2; - is numexec('SELECT 1'), 1; + is $numexec->('SELECT 1'), 1; $txn->q('SELECT 2')->exec; - ok !defined numexec('SELECT 1'); - is numexec('SELECT 2'), 1; + ok !defined $numexec->('SELECT 1'); + is $numexec->('SELECT 2'), 1; $conn->cache_size(1); - ok !defined numexec('SELECT 1'); - ok !defined numexec($sql); - is numexec('SELECT 2'), 1; + ok !defined $numexec->('SELECT 1'); + ok !defined $numexec->($sql); + is $numexec->('SELECT 2'), 1; $conn->cache_size(0); - ok !defined numexec($sql); - ok !defined numexec('SELECT 2'); + ok !defined $numexec->($sql); + ok !defined $numexec->('SELECT 2'); }; From f8cd8a6d8cbc687e452071b98f1457f546a55c08 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 27 May 2025 09:30:46 +0200 Subject: [PATCH 44/61] FU: Simplify --monitor file change detection This changes the way that file changes are detected. The upside is that it now correctly detects changes that happened after the code has loaded but before the first request came in, the downside is that it now gets stuck on reloading when a file has a future mtime. --- FU.pm | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/FU.pm b/FU.pm index 9c85edf..6da28cb 100644 --- a/FU.pm +++ b/FU.pm @@ -217,17 +217,12 @@ sub monitor_path { push @monitor_paths, @_ } sub monitor_check :prototype(&) { $monitor_check = $_[0] } sub _monitor { - state %data; return 1 if $monitor_check && $monitor_check->(); require File::Find; eval { File::Find::find({ - wanted => sub { - my $m = (stat)[9]; - $data{$_} //= $m; - die if $m > $data{$_}; - }, + wanted => sub { die if (-M) < 0 }, no_chdir => 1 }, grep -e, $scriptpath, values %INC, @monitor_paths); 0 From a43dc70ff92b2baca45bb316b3bbce571054191d Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 2 Jun 2025 09:00:04 +0200 Subject: [PATCH 45/61] XMLWriter: Throw error when stringifying a bare reference I can't think of a use case where Perl's default ref stringification is something you actually want when writing XML/HTML - this pretty much always points to a bug. One that I seem to be prone to making... --- c/xmlwr.c | 4 +++- t/xmlwr.t | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/c/xmlwr.c b/c/xmlwr.c index f81d94c..2d31fec 100644 --- a/c/xmlwr.c +++ b/c/xmlwr.c @@ -27,6 +27,8 @@ static void fuxmlwr_destroy(pTHX_ fuxmlwr *wr) { static void fuxmlwr_escape(pTHX_ fuxmlwr *wr, SV *sv) { + if (SvROK(sv) && !SvAMAGIC(sv)) fu_confess("Invalid attempt to output bare reference"); + STRLEN len; const unsigned char *str = (unsigned char *)SvPV_const(sv, len); const unsigned char *tmp, *end = str + len; @@ -96,7 +98,7 @@ static void fuxmlwr_tag(pTHX_ fuxmlwr *wr, I32 ax, I32 offset, I32 argc, int sel val = ST(offset); offset++; - // Don't even try to stringify other arguments; non-string keys are always a bug. + // Don't even try to stringify attribute names; non-string keys are always a bug. if (!SvPOK(key)) fu_confess("Non-string attribute"); keys = SvPVX(key); diff --git a/t/xmlwr.t b/t/xmlwr.t index e8b2d95..becb96c 100644 --- a/t/xmlwr.t +++ b/t/xmlwr.t @@ -65,4 +65,21 @@ sub t { is fragment { t 'arg' }, '
ab" < c &< d🥳
'; +ok !eval { fragment { tag_ 'hi', \1 } }; +like $@, qr/Invalid attempt to output bare reference/; + +ok !eval { fragment { tag_ 'hi', {} } }; +like $@, qr/Invalid attempt to output bare reference/; + +is fragment { tag_ 'hi', bless {}, 'XTEST1' }, 'string'; +like fragment { tag_ 'hi', bless {}, 'XTEST2' }, qr{HASH\(.*\)}; # Yeah, whatever. +like fragment { tag_ 'hi', ''.{} }, qr{HASH\(.*\)}; + done_testing; + + +package XTEST1; +use overload '""' => sub { 'string' }; + +package XTEST2; +use overload '""' => sub { {} }; From 55baa6c9a616e9a3a9223cc07826dc7c23ec6825 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 4 Jun 2025 18:48:06 +0200 Subject: [PATCH 46/61] json_parse(): Disallow control characters in strings by default Deviating from the standard, but more consistent other FU functions. --- FU/Util.pm | 16 +++++++++++----- c/jsonparse.c | 10 ++++++++-- t/json_parse.t | 12 +++++++++--- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/FU/Util.pm b/FU/Util.pm index 922747d..4b06f33 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -137,7 +137,7 @@ value for C<$val>, due to C<\0> and C<\1> being considered booleans. =head1 JSON Parsing & Formatting This module comes with a custom C-based JSON parser and formatter. These -functions conform strictly to L, +functions conform to L, non-standard extensions are not supported and never will be. It also happens to be pretty fast, refer to L for some numbers. @@ -171,6 +171,13 @@ Supported C<%options>: =over +=item allow_control + +Boolean, set to true to allow (encoded) ASCII control characters in JSON +strings, such as C<\u0000>, C<\b>, C<\u007f>, etc. These characters are +permitted per RFC-8259, but disallowed by this parser by default. See +C below. + =item utf8 Boolean, interpret the input C<$string> as a UTF-8 encoded byte string instead @@ -251,10 +258,9 @@ value. There is no way to do that without violating JSON specs, so you should use entity escaping instead. Some JSON modules escape the forward slash (C) character instead, but that -is, at best, B sufficient for embedding inside a C<<