From 31994a4bf6a126aadae4bf736b40bfae60e9adf8 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 12 May 2025 12:38:23 +0200 Subject: [PATCH 01/22] Doc typos --- FU.pm | 4 ++-- FU/XMLWriter.pm | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/FU.pm b/FU.pm index 440a14c..9c85edf 100644 --- a/FU.pm +++ b/FU.pm @@ -994,7 +994,7 @@ FU - A Lean and Efficient Zero-Dependency Web Framework. } FU::get qr{/hello/(.+)}, sub($who) { - my_html_ "Website title", sub { + myhtml_ "Website title", sub { h1_ "Hello, $who!"; }; }; @@ -1097,7 +1097,7 @@ returning strings deal with perl Unicode strings, not raw bytes. =item use FU -procname => $name When the C<-procname> import option is set, FU automatically updates the -process name (as displayed in L and L, see `$0`) with +process name (as displayed in L and L, see C<$0>) with information about the current process, prefixed with the given C<$name>. =item FU::init_db($info) diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index fe755f1..1b964ee 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -263,7 +263,7 @@ and C<"> are replaced with their XML entity. All of the functions mentioned in this document can be imported individually. There are also two import groups: - use FU::XMLWriter ':html_'; + use FU::XMLWriter ':html5_'; Exports C, C, C, C and all of the C<< _ >> functions mentioned above. From 81a3d3c608dd37214a94ad8381b4efe6232355e0 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 22 May 2025 09:22:05 +0200 Subject: [PATCH 02/22] SQL: Add IDENT() and quote_identifier options Turns out VNDB has a few places where request data is directly used for column names in VALUES/SET/WHERE clauses. These are already restricted to known strings through the use of FU::Validate, but an extra layer of protection seems warranted here. --- FU/SQL.pm | 46 +++++++++++++++++++++++++++++++++++++--------- t/sql.t | 10 ++++++++++ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/FU/SQL.pm b/FU/SQL.pm index db8aff1..63107b9 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -5,7 +5,7 @@ use Carp 'confess'; use experimental 'builtin', 'for_list'; our @EXPORT = qw/ - P RAW SQL + P RAW IDENT SQL PARENS INTERSPERSE COMMA AND OR WHERE SET VALUES IN @@ -16,6 +16,7 @@ sub _obj { bless [@_], 'FU::SQL::val' } sub P :prototype($) ($p) { bless \(my $x = $p), 'FU::SQL::p' } sub RAW :prototype($) ($s) { _obj "$s" } +sub IDENT :prototype($) ($s) { bless \(my $x = "$s"), 'FU::SQL::i' } # These operate on $_ and must be called with &func syntax. # The readonly check can be finicky. @@ -29,7 +30,7 @@ sub COMMA { INTERSPERSE ',', @_ } sub _conditions { @_ == 1 && ref $_[0] eq 'HASH' - ? map PARENS(RAW $_, + ? map PARENS(IDENT $_, !defined $_[0]{$_} ? ('IS NULL') : ref($_[0]{$_}) eq 'FU::SQL::in' ? ($_[0]{$_}) : ('=', $_[0]{$_}) @@ -41,11 +42,11 @@ sub AND { !@_ || (@_ == 1 && ref $_[0] eq 'HASH' && keys $_[0]->%* == 0) ? RAW ' sub OR { !@_ || (@_ == 1 && ref $_[0] eq 'HASH' && keys $_[0]->%* == 0) ? RAW '1=0' : INTERSPERSE 'OR', _conditions @_ } sub WHERE { SQL 'WHERE', AND @_ } -sub SET($h) { SQL 'SET', COMMA map SQL(RAW $_, '=', $h->{$_}), sort keys %$h } +sub SET($h) { SQL 'SET', COMMA map SQL(IDENT $_, '=', $h->{$_}), sort keys %$h } sub VALUES { @_ == 1 && ref $_[0] eq 'HASH' - ? SQL '(', COMMA(map RAW $_, sort keys $_[0]->%*), ') VALUES (', COMMA(map $_[0]{$_}, sort keys $_[0]->%*), ')' + ? SQL '(', COMMA(map IDENT $_, sort keys $_[0]->%*), ') VALUES (', COMMA(map $_[0]{$_}, sort keys $_[0]->%*), ')' : @_ == 1 && ref $_[0] eq 'ARRAY' ? SQL 'VALUES (', COMMA($_[0]->@*), ')' : SQL 'VALUES (', COMMA(@_), ')'; @@ -71,6 +72,10 @@ sub FU::SQL::p::_compile($self, $opt, $sql, $params) { $$sql .= $opt->{placeholder_style} eq 'pg' ? '$'.@$params : '?'; } +sub FU::SQL::i::_compile($self, $opt, $sql, $params) { + $$sql .= $opt->{quote_identifier} ? $opt->{quote_identifier}->($$self) : $$self; +} + sub FU::SQL::in::_compile($self, $opt, $sql, $params) { if ($opt->{in_style} eq 'pg') { $$sql .= '= ANY('; @@ -87,6 +92,7 @@ sub FU::SQL::in::_compile($self, $opt, $sql, $params) { } sub FU::SQL::val::compile($self, %opt) { + !/^(placeholder_style|in_style|quote_identifier)$/ && confess "Unknown flag: $_" for keys %opt; $opt{placeholder_style} ||= 'dbi'; $opt{in_style} ||= 'dbi'; my($sql, @params) = (''); @@ -94,7 +100,7 @@ sub FU::SQL::val::compile($self, %opt) { ($sql, \@params) } -*FU::SQL::p::compile = *FU::SQL::in::compile = \*FU::SQL::val::compile; +*FU::SQL::p::compile = *FU::SQL::i::compile = *FU::SQL::in::compile = \*FU::SQL::val::compile; 1; __END__ @@ -156,6 +162,16 @@ C<'pg'> when your SQL is going to L or L. Set the style to use for C expressions, refer to the C function below for details. +=item quote_identifier => $func + +Set a function to perform quoting of SQL identifiers. When using DBI, you can +do: + + my($sql) = $obj->compile(quote_identifier => sub { $dbh->quote_identifier(@_) }); + +If this option is not set, identifiers are included into the raw SQL string +without any escaping. + =back =back @@ -176,7 +192,7 @@ types of supported arguments: =item 1. -B are interpreted as raw SQL fragments. +I are interpreted as raw SQL fragments. =item 2. @@ -184,7 +200,7 @@ Objects returned by other functions listed below are included as SQL fragments. =item 3. -B is considered a bind parameter. +I is considered a bind parameter. =back @@ -244,6 +260,18 @@ Force the given C<$sql> string to be included as SQL. For example: Never use this function with untrusted input. +=item IDENT($string) + +Mark the given string as an SQL identifier. This function is only useful if you +use potentially untrusted input to determine which column to select or which +table to select from, for example: + + SQL 'SELECT id,', IDENT $ENV{column}, 'FROM table'; + +B By default this function is equivalent to C and hence +provides no safety whatsoever. Be sure to set the C option on +C to get more useful behavior. + =item PARENS(@args) Like C but surrounds the expression by parens: @@ -279,8 +307,8 @@ C<'1=1'> (i.e. true) if C<@conditions> is an empty list. =item AND($hashref) A special form of C that tests the given columns for equality instead. -The keys of the hashref are interpreted as raw SQL and the values as bind -parameters. +The keys of the hashref are interpreted as per C and the values as +bind parameters. AND { id => 1, number => RAW 'random()', x => undef } # '( id = ? ) AND ( number = random() ) AND ( x IS NULL )' diff --git a/t/sql.t b/t/sql.t index e6b7378..f9cee56 100644 --- a/t/sql.t +++ b/t/sql.t @@ -9,11 +9,15 @@ sub t($obj, $sql, $params, @opt) { is_deeply $gotparams, $params; } +my @q_ident = (quote_identifier => sub($x) { $x =~ s/"/_/rg }); + my $x; t P '', '?', ['']; t P '', '$1', [''], placeholder_style => 'pg'; t P undef, '?', [undef]; t RAW '', '', []; +t IDENT '"hello"', '"hello"', []; +t IDENT '"hello"', '_hello_', [], @q_ident; t SQL('select', '1'), 'select 1', []; t SQL('select', P '1'), 'select ?', [1]; t SQL('select', $x = '1'), 'select ?', [1]; @@ -41,6 +45,7 @@ t WHERE($x, '1 = 2', SQL('x = ', $x)), t WHERE({ col1 => RAW 'NOW()', col2 => 'a'}), 'WHERE ( col1 = NOW() ) AND ( col2 = ? )', ['a']; t WHERE(), 'WHERE 1=1', []; +t WHERE({ '"x' => 1 }), 'WHERE ( _x = ? )', [1], @q_ident; t WHERE(AND('true', $x), OR($y, 'y'), AND, OR), 'WHERE ( ( true ) AND ( ? ) ) AND ( ( ? ) OR ( y ) ) AND ( 1=1 ) AND ( 1=0 )', [$x, $y]; @@ -52,9 +57,11 @@ t SQL(SELECT => COMMA(qw/a b c/), FROM => 'table', WHERE { x => 1, a => undef }) t SET({ a => 1, c => RAW 'NOW()', d => undef }), 'SET a = ? , c = NOW() , d = ?', [1, undef]; +t SET({ '"x' => 1 }), 'SET _x = ?', [1], @q_ident; t VALUES({ a => 1, c => RAW 'NOW()', d => undef }), '( a , c , d ) VALUES ( ? , NOW() , ? )', [1, undef]; +t VALUES({ '"x' => 1 }), '( _x ) VALUES ( ? )', [1], @q_ident; t VALUES(1, $x, 'NOW()', RAW 'NOW()'), 'VALUES ( ? , ? , NOW() , NOW() )', [1, $x]; t VALUES([1, $x, 'NOW()', RAW 'NOW()']), 'VALUES ( ? , ? , ? , NOW() )', [1, $x, 'NOW()']; @@ -86,4 +93,7 @@ Hash::Util::lock_keys(%hash); Hash::Util::lock_value(%hash, 'v'); t SQL($hash{v}), 'value', []; +ok !eval { SQL('')->compile(oops => 1); 1 }; +like $@, qr/Unknown flag: oops/; + done_testing; From 2083ab2a6f3793ce560996dc2c8120c74c61a78b Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 22 May 2025 09:53:41 +0200 Subject: [PATCH 03/22] Pg: Set appropriate quote_identifier for $conn->Q() --- FU.xs | 12 ++++++++++++ FU/Pg.pm | 9 +++++++-- t/pgtypes-dynamic.t | 8 ++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/FU.xs b/FU.xs index 1c342be..7a387f9 100644 --- a/FU.xs +++ b/FU.xs @@ -217,6 +217,12 @@ void query_trace(fupg_conn *c, SV *cb) SvGETMAGIC(cb); c->trace = SvOK(cb) ? SvREFCNT_inc(cb) : NULL; +void conn(fupg_conn *c) + CODE: + ST(0) = sv_newmortal(); + sv_setrv_inc(ST(0), c->self); + sv_bless(ST(0), gv_stashpv("FU::Pg::conn", 0)); + void status(fupg_conn *c) CODE: ST(0) = sv_2mortal(newSVpv(fupg_conn_status(c), 0)); @@ -317,6 +323,12 @@ void cache(fupg_txn *x, ...) CODE: FUPG_STFLAGS; +void conn(fupg_txn *t) + CODE: + ST(0) = sv_newmortal(); + sv_setrv_inc(ST(0), t->conn->self); + sv_bless(ST(0), gv_stashpv("FU::Pg::conn", 0)); + void status(fupg_txn *t) CODE: ST(0) = sv_2mortal(newSVpv(fupg_txn_status(t), 0)); diff --git a/FU/Pg.pm b/FU/Pg.pm index 4732daf..2e7baf8 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -10,7 +10,11 @@ package FU::Pg::conn { sub Q { require FU::SQL; my $s = shift; - my($sql, $params) = FU::SQL::SQL(@_)->compile(placeholder_style => 'pg', in_style => 'pg'); + my($sql, $params) = FU::SQL::SQL(@_)->compile( + placeholder_style => 'pg', + in_style => 'pg', + quote_identifier => sub { $s->conn->escape_identifier(@_) }, + ); $s->q($sql, @$params); } @@ -208,7 +212,8 @@ used. =item $conn->Q(@args) Same as C<< $conn->q() >> but uses L to construct the query and bind -parameters. +parameters. Uses the 'pg' C and C<< $conn->escape_identifier() >> for +identifier quoting. =back diff --git a/t/pgtypes-dynamic.t b/t/pgtypes-dynamic.t index 2751a86..79abd92 100644 --- a/t/pgtypes-dynamic.t +++ b/t/pgtypes-dynamic.t @@ -127,6 +127,14 @@ subtest 'custom types', sub { }; +subtest 'identifier quoting', sub { + my $txn = $conn->txn; + $txn->exec('CREATE TEMPORARY TABLE fupg_test_tbl ("desc" int, ok int, "hello world" int)'); + ok $txn->Q('INSERT INTO fupg_test_tbl', VALUES {desc => 5, ok => 10, 'hello world', 15})->exec; + is $txn->Q('SELECT', IDENT 'hello world', 'FROM fupg_test_tbl')->val, 15; +}; + + subtest 'vndbid', sub { plan skip_all => 'type not loaded in the database' if !$conn->q("SELECT 1 FROM pg_type WHERE typname = 'vndbtag'")->val; From fd8332601b56e661c7c656cdfcd8d11fd65f3cc9 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 22 May 2025 09:54:08 +0200 Subject: [PATCH 04/22] t/pgconnect: Fix ref leak in test Apparently 'my sub' captured the $conn variable and held a ref on it even beyond the parent sub scope. 'my $x = sub {}' doesn't do that. Getting the ref counts right is important here for the last test to work. (Found while I was inspecting the refcount effects of the new ->conn() methods with Devel::Peek) --- t/pgconnect.t | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/t/pgconnect.t b/t/pgconnect.t index 8536574..cec597d 100644 --- a/t/pgconnect.t +++ b/t/pgconnect.t @@ -370,18 +370,18 @@ subtest 'Prepared statement cache', sub { $conn->cache_size(2); my $txn = $conn->txn; $txn->cache; - my sub numexec($sql) { + my $numexec = sub($sql) { $txn->q('SELECT generic_plans + custom_plans FROM pg_prepared_statements WHERE statement = $1', $sql)->cache(0)->val - } + }; is $txn->q('SELECT 1')->val, 1; - is numexec('SELECT 1'), 1; + is $numexec->('SELECT 1'), 1; my $sql = 'SELECT $1::int as a, $2::text as b'; - ok !defined numexec($sql); + ok !defined $numexec->($sql); my $params = $txn->q($sql)->param_types; is_deeply $params, [23, 25]; - is numexec($sql), 0; + is $numexec->($sql), 0; my $cparams = $txn->q($sql)->param_types; is_deeply $cparams, $params; @@ -391,23 +391,23 @@ subtest 'Prepared statement cache', sub { is_deeply $ccols, $cols; $txn->q($sql, 0, '')->exec; - is numexec($sql), 1; + is $numexec->($sql), 1; $txn->q($sql, 0, '')->exec; - is numexec($sql), 2; + is $numexec->($sql), 2; - is numexec('SELECT 1'), 1; + is $numexec->('SELECT 1'), 1; $txn->q('SELECT 2')->exec; - ok !defined numexec('SELECT 1'); - is numexec('SELECT 2'), 1; + ok !defined $numexec->('SELECT 1'); + is $numexec->('SELECT 2'), 1; $conn->cache_size(1); - ok !defined numexec('SELECT 1'); - ok !defined numexec($sql); - is numexec('SELECT 2'), 1; + ok !defined $numexec->('SELECT 1'); + ok !defined $numexec->($sql); + is $numexec->('SELECT 2'), 1; $conn->cache_size(0); - ok !defined numexec($sql); - ok !defined numexec('SELECT 2'); + ok !defined $numexec->($sql); + ok !defined $numexec->('SELECT 2'); }; From f8cd8a6d8cbc687e452071b98f1457f546a55c08 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 27 May 2025 09:30:46 +0200 Subject: [PATCH 05/22] FU: Simplify --monitor file change detection This changes the way that file changes are detected. The upside is that it now correctly detects changes that happened after the code has loaded but before the first request came in, the downside is that it now gets stuck on reloading when a file has a future mtime. --- FU.pm | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/FU.pm b/FU.pm index 9c85edf..6da28cb 100644 --- a/FU.pm +++ b/FU.pm @@ -217,17 +217,12 @@ sub monitor_path { push @monitor_paths, @_ } sub monitor_check :prototype(&) { $monitor_check = $_[0] } sub _monitor { - state %data; return 1 if $monitor_check && $monitor_check->(); require File::Find; eval { File::Find::find({ - wanted => sub { - my $m = (stat)[9]; - $data{$_} //= $m; - die if $m > $data{$_}; - }, + wanted => sub { die if (-M) < 0 }, no_chdir => 1 }, grep -e, $scriptpath, values %INC, @monitor_paths); 0 From a43dc70ff92b2baca45bb316b3bbce571054191d Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 2 Jun 2025 09:00:04 +0200 Subject: [PATCH 06/22] XMLWriter: Throw error when stringifying a bare reference I can't think of a use case where Perl's default ref stringification is something you actually want when writing XML/HTML - this pretty much always points to a bug. One that I seem to be prone to making... --- c/xmlwr.c | 4 +++- t/xmlwr.t | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/c/xmlwr.c b/c/xmlwr.c index f81d94c..2d31fec 100644 --- a/c/xmlwr.c +++ b/c/xmlwr.c @@ -27,6 +27,8 @@ static void fuxmlwr_destroy(pTHX_ fuxmlwr *wr) { static void fuxmlwr_escape(pTHX_ fuxmlwr *wr, SV *sv) { + if (SvROK(sv) && !SvAMAGIC(sv)) fu_confess("Invalid attempt to output bare reference"); + STRLEN len; const unsigned char *str = (unsigned char *)SvPV_const(sv, len); const unsigned char *tmp, *end = str + len; @@ -96,7 +98,7 @@ static void fuxmlwr_tag(pTHX_ fuxmlwr *wr, I32 ax, I32 offset, I32 argc, int sel val = ST(offset); offset++; - // Don't even try to stringify other arguments; non-string keys are always a bug. + // Don't even try to stringify attribute names; non-string keys are always a bug. if (!SvPOK(key)) fu_confess("Non-string attribute"); keys = SvPVX(key); diff --git a/t/xmlwr.t b/t/xmlwr.t index e8b2d95..becb96c 100644 --- a/t/xmlwr.t +++ b/t/xmlwr.t @@ -65,4 +65,21 @@ sub t { is fragment { t 'arg' }, '
ab" < c &< d🥳
'; +ok !eval { fragment { tag_ 'hi', \1 } }; +like $@, qr/Invalid attempt to output bare reference/; + +ok !eval { fragment { tag_ 'hi', {} } }; +like $@, qr/Invalid attempt to output bare reference/; + +is fragment { tag_ 'hi', bless {}, 'XTEST1' }, 'string'; +like fragment { tag_ 'hi', bless {}, 'XTEST2' }, qr{HASH\(.*\)}; # Yeah, whatever. +like fragment { tag_ 'hi', ''.{} }, qr{HASH\(.*\)}; + done_testing; + + +package XTEST1; +use overload '""' => sub { 'string' }; + +package XTEST2; +use overload '""' => sub { {} }; From 55baa6c9a616e9a3a9223cc07826dc7c23ec6825 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 4 Jun 2025 18:48:06 +0200 Subject: [PATCH 07/22] json_parse(): Disallow control characters in strings by default Deviating from the standard, but more consistent other FU functions. --- FU/Util.pm | 16 +++++++++++----- c/jsonparse.c | 10 ++++++++-- t/json_parse.t | 12 +++++++++--- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/FU/Util.pm b/FU/Util.pm index 922747d..4b06f33 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -137,7 +137,7 @@ value for C<$val>, due to C<\0> and C<\1> being considered booleans. =head1 JSON Parsing & Formatting This module comes with a custom C-based JSON parser and formatter. These -functions conform strictly to L, +functions conform to L, non-standard extensions are not supported and never will be. It also happens to be pretty fast, refer to L for some numbers. @@ -171,6 +171,13 @@ Supported C<%options>: =over +=item allow_control + +Boolean, set to true to allow (encoded) ASCII control characters in JSON +strings, such as C<\u0000>, C<\b>, C<\u007f>, etc. These characters are +permitted per RFC-8259, but disallowed by this parser by default. See +C below. + =item utf8 Boolean, interpret the input C<$string> as a UTF-8 encoded byte string instead @@ -251,10 +258,9 @@ value. There is no way to do that without violating JSON specs, so you should use entity escaping instead. Some JSON modules escape the forward slash (C) character instead, but that -is, at best, B sufficient for embedding inside a C<<