From 0cd947c545b13ce316d36bda21476ace5b2b3483 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 25 Apr 2025 09:31:43 +0200 Subject: [PATCH 01/40] FastCGI: Ignore HTTP_CONTENT_(TYPE|LENGTH) The non-HTTP_ versions of these are authoritative, Also fixes a memory leak when both the HTTP_ and non-HTTP_ versions are included. --- c/fcgi.c | 7 +++++-- t/fcgi.t | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/c/fcgi.c b/c/fcgi.c index fdf8cae..4f886dd 100644 --- a/c/fcgi.c +++ b/c/fcgi.c @@ -319,8 +319,11 @@ static int fufcgi_read_params(pTHX_ fufcgi *ctx, fufcgi_rec *rec) { p.name += 5; for (r=0; r= 'A' && p.name[r] <= 'Z' ? p.name[r] | 0x20 : p.name[r]; - valsv = newSV(p.vallen+1); - hv_store(ctx->headers, p.name, p.namelen, valsv, 0); + if (!(p.namelen == 14 && memcmp(p.name, "content-length", 14) == 0) + && !(p.namelen == 12 && memcmp(p.name, "content-type", 12) == 0)) { + valsv = newSV(p.vallen+1); + hv_store(ctx->headers, p.name, p.namelen, valsv, 0); + } } else if (p.namelen == 14 && memcmp(p.name, "CONTENT_LENGTH", 14) == 0) { valsv = newSV(p.vallen+1); diff --git a/t/fcgi.t b/t/fcgi.t index 0711d6a..d7860dc 100644 --- a/t/fcgi.t +++ b/t/fcgi.t @@ -167,6 +167,13 @@ record 1, 4, "\x0c\x05CONTENT_TYPEsomet"; record 1, 2, ""; isrec {'content-type','somet'}, {body => ''}, -6; +start; +begin; +record 1, 4, "\x13\x01HTTP_CONTENT_LENGTH3\x0e\x01CONTENT_LENGTH0\x13\x01HTTP_CONTENT_LENGTH5"; +record 1, 4, ""; +record 1, 5, ""; +isrec {'content-length','0'}, {body => ''}; + start; begin; record 1, 4, "\x0e\x05CONTENT_LENGTH65536"; From 5f8809d0523d547ae7be03fac568ec7a36b85e53 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 25 Apr 2025 17:07:56 +0200 Subject: [PATCH 02/40] FU::Util::query_decode(): Properly handle empty "&"-parts --- FU/Util.pm | 1 + t/query.t | 2 ++ 2 files changed, 3 insertions(+) diff --git a/FU/Util.pm b/FU/Util.pm index 5b262fb..7d585d9 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -41,6 +41,7 @@ sub uri_unescape :prototype($) ($s) { sub query_decode :prototype($) ($s) { my %o; for (split /&/, $s//'') { + next if !length; my($k,$v) = map uri_unescape($_), split /=/, $_, 2; $v //= builtin::true; if (ref $o{$k}) { push $o{$k}->@*, $v } diff --git a/t/query.t b/t/query.t index ebeff80..80f2b00 100644 --- a/t/query.t +++ b/t/query.t @@ -10,6 +10,8 @@ is_deeply ok !eval { query_decode('%10'); 1 }; like $@, qr/Invalid control character/; +is_deeply query_decode('&&&a=b'), { a => 'b' }; + is query_encode { a => builtin::true, b => undef, c => builtin::false, d => 'string', e => "&=\xfe" }, 'a&d=string&e=%26%3d%c3%be'; From 461ed6f39d5522136da212ba2477a0bfbf25613c Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 26 Apr 2025 08:05:09 +0200 Subject: [PATCH 03/40] FU: Suppress warnings about non-existent files in FU::monitor_path checking --- FU.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FU.pm b/FU.pm index 1f71a91..04809b5 100644 --- a/FU.pm +++ b/FU.pm @@ -216,7 +216,7 @@ sub _monitor { die if $m > $data{$_}; }, no_chdir => 1 - }, $scriptpath, values %INC, @monitor_paths); + }, grep -e, $scriptpath, values %INC, @monitor_paths); 0 } // 1; } From 753cac615a645bc875d4e39ad3b81d80fc01542f Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 26 Apr 2025 15:41:26 +0200 Subject: [PATCH 04/40] Validate: Improved + extendable error message formatting Very much needed for VNDB's advanced search validation. Also completely undocumented. --- FU/Validate.pm | 47 +++++++++++++++++++++++++++++++---------------- t/validate.t | 34 +++++++++++++++++----------------- 2 files changed, 48 insertions(+), 33 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index b315784..2741ee6 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -395,32 +395,47 @@ sub empty($c) { +sub _fmtkey($k) { $k =~ /^[a-zA-Z0-9_-]+$/ ? $k : FU::Util::json_format($k); } +sub _fmtval($v) { eval { $v = FU::Util::json_format($v) }; "$v" } +sub _inval($t,$v) { sprintf 'invalid %s: %s', $t, _fmtval $v } + +# validation name => formatting sub +# TODO: document. +our %error_format = ( + required => sub { 'required value missing' }, + type => sub($e) { "invalid type, expected '$e->{expected}' but got '$e->{got}'" }, + unknown => sub($e) { sprintf 'unknown key%s: %s', $e->{keys}->@* == 1 ? '' : 's', join ', ', map _fmtkey($_), $e->{keys}->@* }, + minlength => sub($e) { sprintf "input too short, expected minimum of %d but got %d", $e->{expected}, $e->{got} }, + maxlength => sub($e) { sprintf "input too long, expected maximum of %d but got %d", $e->{expected}, $e->{got} }, + length => sub($e) { + !ref $e->{expected} + ? sprintf 'invalid input length, expected %d but got %d', $e->{expected}, $e->{got} + : sprintf 'invalid input length, expected between %d and %d but got %d', $e->{expected}->@*, $e->{got} + }, + num => sub($e) { _inval 'number', $e->{got} }, + min => sub($e) { $e->{error} ? _inval 'number', $e->{error}{got} : sprintf 'expected minimum %s but got %s', $e->{expected}, $e->{got} }, + max => sub($e) { $e->{error} ? _inval 'number', $e->{error}{got} : sprintf 'expected maximum %s but got %s', $e->{expected}, $e->{got} }, + range => sub($e) { FU::Validate::err::errors($e->{error}) }, +); + + package FU::Validate::err; use v5.36; -use FU::Util; use overload '""' => sub { $_[0]{longmess} || join "\n", $_[0]->errors }; -sub _fmtkey($k) { - $k =~ /^[a-zA-Z0-9_-]+$/ ? $k : FU::Util::json_format($k); -} - -sub _fmtval($v) { - eval { $v = FU::Util::json_format($v) }; "$v" -} - +# TODO: document. sub errors($e, $prefix='') { my $val = $e->{validation}; my $p = $prefix ? "$prefix: " : ''; - $val eq 'keys' ? map errors($_, $prefix.'.'._fmtkey($_->{key})), $e->{errors}->@* : - $val eq 'values' ? map errors($_, $prefix.'.'._fmtkey($_->{key})), $e->{errors}->@* : - $val eq 'missing' ? $prefix.'.'._fmtkey($e->{key}).': required key missing' : + $FU::Validate::error_format{$val} ? map "$p$_", $FU::Validate::error_format{$val}->($e) : + $val eq 'keys' ? map errors($_, $prefix.'.'.FU::Validate::_fmtkey($_->{key})), $e->{errors}->@* : + $val eq 'values' ? map errors($_, $prefix.'.'.FU::Validate::_fmtkey($_->{key})), $e->{errors}->@* : + $val eq 'missing' ? $prefix.'.'.FU::Validate::_fmtkey($e->{key}).': required key missing' : $val eq 'elems' ? map errors($_, $prefix."[$_->{index}]"), $e->{errors}->@* : - $val eq 'unique' ? $prefix."[$e->{index_b}] value '"._fmtval($e->{value_a})."' duplicated" : - $val eq 'required' ? "${p}required value missing" : - $val eq 'type' ? "${p}invalid type, expected '$e->{expected}' but got '$e->{got}'" : - $val eq 'unknown' ? ($e->{keys}->@* > 1 ? "${p}unknown keys: ".join(', ', _fmtkey($e->{keys})) : "${p}unknown key '"._fmtkey($e->{keys}[0])."'") : + $val eq 'unique' ? $prefix."[$e->{index_b}] value '".FU::Validate::_fmtval($e->{value_a})."' duplicated" : $e->{error} ? errors($e->{error}, "${p}validation '$val'") : + $e->{message} ? "${p}validation '$val': $e->{message}" : "${p}failed validation '$val'"; } diff --git a/t/validate.t b/t/validate.t index a29dc4c..26704cd 100644 --- a/t/validate.t +++ b/t/validate.t @@ -119,7 +119,7 @@ f { type => 'hash', keys => { a=>{missing=>'reject'} } }, {}, {key => 'a', valid t { type => 'hash', keys => { a=>{} } }, {a=>' a '}, {a=>'a'}; # Test against in-place modification t { type => 'hash', keys => { a=>{} }, unknown => 'remove' }, { a=>1,b=>1 }, { a=>1 }; -f { type => 'hash', keys => { a=>{} }, unknown => 'reject' }, { a=>1,b=>1 }, { validation => 'unknown', keys => ['b'], expected => ['a'] }, "unknown key 'b'"; +f { type => 'hash', keys => { a=>{} }, unknown => 'reject' }, { a=>1,b=>1 }, { validation => 'unknown', keys => ['b'], expected => ['a'] }, "unknown key: b"; t { type => 'hash', keys => { a=>{} }, unknown => 'pass' }, { a=>1,b=>1 }, { a=>1,b=>1 }; t { type => 'hash', setundef => 1 }, {}, undef; t { type => 'hash', unknown => 'reject', keys => { a=>{ type => 'any', setundef => 1}} }, {a=>[]}, {a=>undef}; @@ -132,20 +132,20 @@ t { values => { int => 1 } }, { a => -1, b => 1 }, { a => -1, b => 1 }; f { values => { int => 1 } }, { a => undef }, { validation => 'values', errors => [{ key => 'a', validation => 'required' }] }, '.a: required value missing'; # default validations -f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }, "failed validation 'minlength'"; +f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }, "input too short, expected minimum of 3 but got 2"; t { minlength => 3 }, 'abc', 'abc'; -f { maxlength => 3 }, 'abcd', { validation => 'maxlength', expected => 3, got => 4 }, "failed validation 'maxlength'"; +f { maxlength => 3 }, 'abcd', { validation => 'maxlength', expected => 3, got => 4 }, "input too long, expected maximum of 3 but got 4"; t { maxlength => 3 }, 'abc', 'abc'; t { minlength => 3, maxlength => 3 }, 'abc', 'abc'; -f { length => 3 }, 'ab', { validation => 'length', expected => 3, got => 2 }, "failed validation 'length'"; -f { length => 3 }, 'abcd', { validation => 'length', expected => 3, got => 4 }, "failed validation 'length'"; +f { length => 3 }, 'ab', { validation => 'length', expected => 3, got => 2 }, 'invalid input length, expected 3 but got 2'; +f { length => 3 }, 'abcd', { validation => 'length', expected => 3, got => 4 }, 'invalid input length, expected 3 but got 4'; t { length => 3 }, 'abc', 'abc'; t { length => [1,3] }, 'abc', 'abc'; -f { length => [1,3] }, 'abcd', { validation => 'length', expected => [1,3], got => 4 }, "failed validation 'length'"; +f { length => [1,3] }, 'abcd', { validation => 'length', expected => [1,3], got => 4 }, "invalid input length, expected between 1 and 3 but got 4"; t { type => 'array', length => 0 }, [], []; -f { type => 'array', length => 1 }, [1,2], { validation => 'length', expected => 1, got => 2 }, "failed validation 'length'"; +f { type => 'array', length => 1 }, [1,2], { validation => 'length', expected => 1, got => 2 }, "invalid input length, expected 1 but got 2"; t { type => 'hash', length => 0 }, {}, {}; -f { type => 'hash', length => 1, unknown => 'pass' }, {qw/1 a 2 b/}, { validation => 'length', expected => 1, got => 2 }, "failed validation 'length'"; +f { type => 'hash', length => 1, unknown => 'pass' }, {qw/1 a 2 b/}, { validation => 'length', expected => 1, got => 2 }, "invalid input length, expected 1 but got 2"; t { type => 'hash', length => 1, keys => {a => {missing=>'ignore'}, b => {missing=>'ignore'}} }, {a=>1}, {a=>1}; t { regex => '^a' }, 'abc', 'abc'; # XXX: Can't use qr// here because t() does dclone(). The 'hex' test covers that case anyway. f { regex => '^a' }, 'cba', { validation => 'regex', regex => '^a', got => 'cba' }, "failed validation 'regex'"; @@ -201,7 +201,7 @@ t { doublefunc => 1 }, 0, 2; f { doublefunc => 1 }, 1, { validation => 'doublefunc', error => { validation => 'func', result => '' } }, "validation 'doublefunc': failed validation 'func'"; # numbers -sub nerr { ({ validation => 'num', got => $_[0] }, "failed validation 'num'") } +sub nerr { ({ validation => 'num', got => $_[0] }, "invalid number: \"$_[0]\"") } t { num => 1 }, 0, 0; f { num => 1 }, '-', nerr '-'; f { num => 1 }, '00', nerr '00'; @@ -219,16 +219,16 @@ t { uint => 1 }, 0, 0; t { uint => 1 }, 123, 123; f { uint => 1 }, -123, { validation => 'uint', got => -123 }, "failed validation 'uint'"; t { min => 1 }, 1, 1; -f { min => 1 }, 0.9, { validation => 'min', expected => 1, got => 0.9 }, "failed validation 'min'"; -f { min => 1 }, 'a', { validation => 'min', error => (nerr 'a')[0] }, "validation 'min': failed validation 'num'"; +f { min => 1 }, 0.9, { validation => 'min', expected => 1, got => 0.9 }, "expected minimum 1 but got 0.9"; +f { min => 1 }, 'a', { validation => 'min', error => (nerr 'a')[0] }, 'invalid number: "a"'; t { max => 1 }, 1, 1; -f { max => 1 }, 1.1, { validation => 'max', expected => 1, got => 1.1 }, "failed validation 'max'"; -f { max => 1 }, 'a', { validation => 'max', error => (nerr 'a')[0] }, "validation 'max': failed validation 'num'"; +f { max => 1 }, 1.1, { validation => 'max', expected => 1, got => 1.1 }, "expected maximum 1 but got 1.1"; +f { max => 1 }, 'a', { validation => 'max', error => (nerr 'a')[0] }, 'invalid number: "a"'; t { range => [1,2] }, 1, 1; t { range => [1,2] }, 2, 2; -f { range => [1,2] }, 0.9, { validation => 'range', error => { validation => 'min', expected => 1, got => 0.9 } }, "validation 'range': failed validation 'min'"; -f { range => [1,2] }, 2.1, { validation => 'range', error => { validation => 'max', expected => 2, got => 2.1 } }, "validation 'range': failed validation 'max'"; -f { range => [1,2] }, 'a', { validation => 'range', error => { validation => 'min', error => (nerr 'a')[0] } }, "validation 'range': validation 'min': failed validation 'num'"; +f { range => [1,2] }, 0.9, { validation => 'range', error => { validation => 'min', expected => 1, got => 0.9 } }, 'expected minimum 1 but got 0.9'; +f { range => [1,2] }, 2.1, { validation => 'range', error => { validation => 'max', expected => 2, got => 2.1 } }, 'expected maximum 2 but got 2.1'; +f { range => [1,2] }, 'a', { validation => 'range', error => { validation => 'min', error => (nerr 'a')[0] } }, 'invalid number: "a"'; # email template use utf8; @@ -253,7 +253,7 @@ t { email => 1 }, $_, $_ for ( 'abc@x-y_z.example', ); my $long = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@xxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx.xxxxx'; -f { email => 1 }, $long, { validation => 'email', error => { validation => 'maxlength', got => 255, expected => 254 } }, "validation 'email': failed validation 'maxlength'"; +f { email => 1 }, $long, { validation => 'email', error => { validation => 'maxlength', got => 255, expected => 254 } }, "validation 'email': input too long, expected maximum of 254 but got 255"; # weburl template f { weburl => 1 }, $_, { validation => 'weburl', got => $_ }, "failed validation 'weburl'" for ( From 817fa600d0ccc66bdc2e6d0e501e557a01be69d9 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 27 Apr 2025 11:17:54 +0200 Subject: [PATCH 05/40] FU: Add fu->log_verbose() + include request body in error logs --- FU.pm | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/FU.pm b/FU.pm index 04809b5..11c58ff 100644 --- a/FU.pm +++ b/FU.pm @@ -303,7 +303,7 @@ sub _read_req($c) { ($REQ->{path}, my $qs) = split /\?/, $REQ->{path}//'', 2; $REQ->{qs} //= $qs; - $REQ->{path} = FU::Util::uri_unescape($REQ->{path}); + eval { $REQ->{path} = FU::Util::uri_unescape($REQ->{path}); 1; } || fu->error(400, $@); } @@ -313,17 +313,8 @@ sub _log_err($e) { return if !$e; my $crit = $e isa 'FU::err' ? $e->[0] == 500 : !($e isa 'FU::Validate::err'); return if !debug && !$crit; - if ($crit && !$REQ->{full_err}++) { - $e =~ s/^\s+//; - $e =~ s/\s+$//; - log_write join "\n", - 'IP: '.($REQ->{ip}||'-'), - 'Headers:', (map " $_: $REQ->{hdr}{$_}", sort keys $REQ->{hdr}->%*), - 'ERROR:'.($e =~ s/(^|\n)/\n /rg); - # TODO: decoded body, if we have that. - } else { - log_write $e; - } + return fu->log_verbose($e) if $crit; + log_write $e; } sub _do_req($c) { @@ -643,6 +634,27 @@ sub db { sub sql { shift->db->q(@_) } sub SQL { shift->db->Q(@_) } +sub _fmt_section($s) { $s =~ s/^\s*/ /r =~ s/\s+$//r =~ s/\n/\n /rg } + +sub log_verbose($,$msg) { + my $r = $FU::REQ; + return FU::Log::log_write($msg) if $r->{log_verbose}++; + FU::Log::log_write(join "\n", + 'IP: '.($r->{ip}||'-'), + 'Headers:', (map " $_: $r->{hdr}{$_}", sort keys $r->{hdr}->%*), + $r->{multipart} ? ('Body (multipart):', _fmt_section join "\n", map $_->describe, $r->{multipart}->@*) : + $r->{json} ? ('Body (JSON):', _fmt_section FU::Util::json_format($r->{json}, pretty => 1, canonical => 1)) : + $r->{formdata} ? ('Body (formdata):', _fmt_section FU::Util::json_format($r->{json}, pretty => 1, canonical => 1)) : + length $r->{body} ? do { + my $b = substr $r->{body}, 0, 4096; + my $trunc = length $r->{body} > 4096 ? ', truncated' : ''; + utf8::decode($b) ? ("Body (utf8$trunc)", _fmt_section($b =~ s/\r//rg =~ s/\n{4,}/\n[..]\n/rg)) + : ("Body (hex$trunc)", _fmt_section(unpack('H*', $b) =~ s/(.{128})/$1\n/rg)) + } : (), + 'Message:', _fmt_section $msg + ); +} + @@ -1286,6 +1298,14 @@ Convenient short-hand for C<< fu->db->q($query, @params) >>. Convenient short-hand for C<< fu->db->Q(@args) >>. +=item fu->log_verbose($message) + +Write a verbose multi-line message to the log, including a full dump of +information about the request: IP, headers and (potentially reformatted and/or +truncated) body. This extra info is only written once per request, further +calls to C just go directly to L's C +instead. + =back =head1 Request Information From d0c5397e2dda0387c1678a279bc13187c8dd455d Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 28 Apr 2025 10:20:53 +0200 Subject: [PATCH 06/40] json_parse()/pgtypes: Fix accidental creation of read-only array/hash values &PL_sv_* shouldn't be used when constructing arrays or hashes in this context. --- FU.xs | 6 ++++++ c/jsonparse.c | 4 ++-- c/pgst.c | 2 +- c/pgtypes.c | 12 ++++++++---- t/json_parse.t | 8 +++++++- t/pgconnect.t | 1 + t/pgtypes-dynamic.t | 7 ++++++- t/pgtypes.t | 16 ++++++++++++++-- 8 files changed, 45 insertions(+), 11 deletions(-) diff --git a/FU.xs b/FU.xs index 221740b..dc19870 100644 --- a/FU.xs +++ b/FU.xs @@ -20,6 +20,12 @@ #ifndef BOOL_INTERNALS_sv_isbool_true #define BOOL_INTERNALS_sv_isbool_true(x) SvTRUEx(x) #endif +#ifndef newSV_true +#define newSV_true() newSVsv(&PL_sv_yes) +#endif +#ifndef newSV_false +#define newSV_false() newSVsv(&PL_sv_no) +#endif /* Disable key/value struct packing in khashl, so we can safely take a pointer * to values inside the hash table. */ diff --git a/c/jsonparse.c b/c/jsonparse.c index d24b1f9..6dfee91 100644 --- a/c/jsonparse.c +++ b/c/jsonparse.c @@ -236,12 +236,12 @@ static SV *fujson_parse(pTHX_ fujson_parse_ctx *ctx) { if (ctx->end - ctx->buf < 4) return NULL; if (memcmp(ctx->buf, "true", 4) != 0) return NULL; ctx->buf += 4; - return &PL_sv_yes; + return newSV_true(); case 'f': if (ctx->end - ctx->buf < 5) return NULL; if (memcmp(ctx->buf, "false", 5) != 0) return NULL; ctx->buf += 5; - return &PL_sv_no; + return newSV_false(); case 'n': if (ctx->end - ctx->buf < 4) return NULL; if (memcmp(ctx->buf, "null", 4) != 0) return NULL; diff --git a/c/pgst.c b/c/pgst.c index e5a2a3d..e943450 100644 --- a/c/pgst.c +++ b/c/pgst.c @@ -463,7 +463,7 @@ static SV *fupg_st_kvv(pTHX_ fupg_st *st) { SAVETMPS; SV *key = sv_2mortal(fupg_st_getval(aTHX_ st, i, 0)); if (hv_exists_ent(hv, key, 0)) fu_confess("Key '%s' is duplicated in $st->kvv() query results", SvPV_nolen(key)); - hv_store_ent(hv, key, st->nfields == 1 ? &PL_sv_yes : fupg_st_getval(aTHX_ st, i, 1), 0); + hv_store_ent(hv, key, st->nfields == 1 ? newSV_true() : fupg_st_getval(aTHX_ st, i, 1), 0); FREETMPS; } return sv; diff --git a/c/pgtypes.c b/c/pgtypes.c index b307cc0..471b6d2 100644 --- a/c/pgtypes.c +++ b/c/pgtypes.c @@ -78,7 +78,7 @@ SENDFN(domain) { (void)out; SERR("domain type should not be handled by this func RECVFN(bool) { RLEN(1); - return *buf ? &PL_sv_yes : &PL_sv_no; + return *buf ? newSV_true() : newSV_false(); } SENDFN(bool) { @@ -89,7 +89,7 @@ SENDFN(bool) { RECVFN(void) { RLEN(0); (void)buf; - return &PL_sv_undef; + return newSV(0); } SENDFN(void) { @@ -269,7 +269,7 @@ SENDFN(jsonpath) { #define ARRAY_MAXDIM 100 static SV *fupg_recv_array_elem(pTHX_ const fupg_tio *elem, const char *header, U32 dim, U32 ndim, const char **buf, const char *end) { - SV *r = &PL_sv_undef; + SV *r; if (dim == ndim) { if (end - *buf < 4) fu_confess("Invalid array format"); I32 len = fu_frombeI(32, *buf); @@ -279,6 +279,8 @@ static SV *fupg_recv_array_elem(pTHX_ const fupg_tio *elem, const char *header, if (len >= 0) { r = elem->recv(aTHX_ elem, *buf, len); *buf += len; + } else { + r = newSV(0); } } else { @@ -403,12 +405,14 @@ RECVFN(record) { if (oid != ctx->record.info->attrs[i].oid) RERR("expected field %d to be of type %u but got %u", i, ctx->record.info->attrs[i].oid, oid); I32 vlen = fu_frombeI(32, buf+4); - SV *r = &PL_sv_undef; + SV *r; buf += 8; len -= 8; if (vlen > len) RERR("input data too short"); if (vlen >= 0) { r = ctx->record.tio[i].recv(aTHX_ ctx->record.tio+i, buf, vlen); buf += vlen; len -= vlen; + } else { + r = newSV(0); } hv_store(hv, ctx->record.info->attrs[i].name.n, -strlen(ctx->record.info->attrs[i].name.n), r, 0); } diff --git a/t/json_parse.t b/t/json_parse.t index 0c26dff..d01414f 100644 --- a/t/json_parse.t +++ b/t/json_parse.t @@ -2,7 +2,7 @@ use v5.36; use Test::More; use FU::Util 'json_parse'; no warnings 'experimental::builtin'; -use builtin 'is_bool', 'created_as_number'; +use builtin 'is_bool', 'created_as_number', 'true', 'false'; use Config; my @error = ( @@ -236,4 +236,10 @@ ok !eval { json_parse '{"":{"":{"":{"":1}}}}', max_depth => 4; 1 }; ok !eval { json_parse '"string"', max_size => 7 }; } +# Mutable hashes/arrays +my $d = json_parse('[true,false,null,{"a":true,"b":false,"c":null}]'); +is_deeply $d, [true,false,undef,{a => true, b => false, c => undef}]; +$_ = 1 for @{$d}[0,1,2], values $d->[3]->%*; +is_deeply $d, [1,1,1,{a => 1, b => 1, c => 1}]; + done_testing; diff --git a/t/pgconnect.t b/t/pgconnect.t index 798734c..8536574 100644 --- a/t/pgconnect.t +++ b/t/pgconnect.t @@ -197,6 +197,7 @@ subtest '$st->kvv', sub { is_deeply $conn->q('SELECT 1 WHERE false')->kvv, {}; is_deeply $conn->q('SELECT 1')->kvv, {1=>1}; is_deeply $conn->q('SELECT 1, null UNION ALL SELECT 3, 2')->kvv, {1=>undef,3=>2}; + $conn->q('SELECT 1')->kvv->{1} = 0; }; subtest '$st->kva', sub { diff --git a/t/pgtypes-dynamic.t b/t/pgtypes-dynamic.t index b6954b5..2751a86 100644 --- a/t/pgtypes-dynamic.t +++ b/t/pgtypes-dynamic.t @@ -96,10 +96,15 @@ subtest 'custom types', sub { ); _ - is_deeply $txn->q(q{SELECT '{"(\"(2,{},bb)\",)","(\"(,,)\",bb)"}'::fupg_test_table[]})->val, [ + $val = $txn->q(q{SELECT '{"(\"(2,{},bb)\",)","(\"(,,)\",bb)"}'::fupg_test_table[]})->val; + is_deeply $val, [ { rec => { a => 2, aenum => [], domain => 'bb' }, dom => undef }, { rec => { a => undef, aenum => undef, domain => undef }, dom => 'bb' }, ]; + $val->[0] = 0; + $val->[1]{rec}{a} = 0; + $val->[1]{rec} = 0; + $val->[1]{dom} = 0; is $txn->q('SELECT $1::fupg_test_table[]', [ { rec => { a => 2, aenum => [], domain => 'bb' }, dom => undef }, diff --git a/t/pgtypes.t b/t/pgtypes.t index 5a17a71..3a3252c 100644 --- a/t/pgtypes.t +++ b/t/pgtypes.t @@ -21,10 +21,12 @@ sub v($type, $p_in, @args) { my $test = "$type $s_in" =~ s/\n/\\n/rg; utf8::encode($test); { - my $res = $conn->q("SELECT \$1::$type", $s_in)->text_params->val; + my $array = $conn->q("SELECT \$1::$type", $s_in)->text_params->flat; + my $res = $array->[0]; ok is_bool($res), "$test is bool" if $type eq 'bool'; ok created_as_number($res), "$test is number" if $type =~ /^(int|float)\d/; is_deeply $res, $p_out, "$test text->bin"; + $array->[0] = 0; # Must be writable } { my $res = $conn->q("SELECT \$1::$type", $p_in)->text_results->val; @@ -41,7 +43,11 @@ sub f($type, $p_in) { ok !eval { $conn->q("SELECT \$1::$type", $p_in)->val; 1 }, "$test fail"; } -ok !defined $conn->q('SELECT pg_sleep(0)')->val; # void +{ # void + my $array = $conn->q('SELECT pg_sleep(0)')->flat; + ok !defined $array->[0]; + $array->[0] = 0; +} v bool => true, undef, 1, 't'; v bool => false, undef, 0, 'f'; @@ -166,4 +172,10 @@ is $conn->q('SELECT ($1::int2[])[2]', [1,2,3,4])->val, 2; is $conn->q('SELECT ($1::int2vector)[1]', [1,2,3,4])->val, 2; is $conn->q('SELECT ($1::oidvector)[1]', [1,2,3,4])->val, 2; +{ + my $v = $conn->q("SELECT '{t,f,NULL}'::bool[]")->val; + is_deeply $v, [true, false, undef]; + $_ = 0 for @$v; +} + done_testing; From f8b0043e2248e981ea74bc08030fecb6fff93af2 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 29 Apr 2025 09:14:44 +0200 Subject: [PATCH 07/40] MultipartFormData: Bunch of parser fixes --- FU/MultipartFormData.pm | 11 ++++++----- t/multipart.t | 15 ++++++++++++++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm index a92e4d7..7d9d77e 100644 --- a/FU/MultipartFormData.pm +++ b/FU/MultipartFormData.pm @@ -3,7 +3,7 @@ use v5.36; use Carp 'confess'; use FU::Util 'utf8_decode'; -sub _arg($d) { $d =~ s/^\s+//r =~ s/\s+$//r =~ s/^"(.+)"$/$1/r } +sub _arg($d) { $d =~ s{^"(.+)"$}{$1 =~ s/\\([\\"])/$1/rg}er } sub parse($pkg, $header, $data) { confess "Invalid multipart header '$header'" @@ -26,13 +26,14 @@ sub parse($pkg, $header, $data) { start => pos $data, }, $pkg; - confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data;(.+)/i; + confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data(.+)/i; my $v = $1; - confess "Missing 'name' parameter" if $v !~ /[;\s]name=([^[;\s]+)/; + my $pvalue = qr/("(?:\\[\\"]|[^\\"\r\n]+)*"|[^\s;"]*)/; + confess "Missing 'name' parameter" if $v !~ /;\s*name\s*=\s*$pvalue/; $d->{name} = utf8_decode _arg $1; - $d->{filename} = utf8_decode _arg $1 if $v =~ /[;\s]filename=([^;\s]+)/; + $d->{filename} = utf8_decode _arg $1 if $v =~ /;\s*filename\s*=\s*$pvalue/; - if ($hdrs =~ /content-type:\s*([^;\s]+)(?:\s*;\s*charset=([^;\s]+))?/i) { + if ($hdrs =~ /content-type:\s*$pvalue(?:\s*;\s*charset\s*=\s*$pvalue)?/i) { $d->{mime} = utf8_decode _arg $1; $d->{charset} = utf8_decode _arg $2 if $2; } diff --git a/t/multipart.t b/t/multipart.t index 842b9cd..e045ff1 100644 --- a/t/multipart.t +++ b/t/multipart.t @@ -14,12 +14,17 @@ Content-Type: text Content-Disposition: form-data; filename="example.txt"; name=field2 value2 +--delimiter12345 +Content-Type: something; charset = " a b \\ c " +Content-Disposition: form-data; name = "field \" name" ;filename= "月姫.jpg" + + --delimiter12345-- _ my $l = FU::MultipartFormData->parse('multipart/form-data;boundary="delimiter12345"', $t); -is scalar @$l, 2; +is scalar @$l, 3; my $v = $l->[0]; is $v->name, 'field1'; @@ -44,4 +49,12 @@ is $v->charset, undef; is $v->length, 6; is $v->data, 'value2'; +$v = $l->[2]; +is $v->name, 'field " name'; +is $v->filename, "\x{6708}\x{59eb}.jpg"; +is $v->mime, 'something'; +is $v->charset, ' a b \ c '; +is $v->length, 0; +is $v->data, ''; + done_testing; From f52ad9a2e6ae0b55072f078e8d3668bf8ffe397a Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 29 Apr 2025 13:51:28 +0200 Subject: [PATCH 08/40] json_format(): Fix buffer overflow in float formatting The ndigit argument to Gconvert() is the number of significant digits to format, not the size of the output buffer. D'oh. --- c/jsonfmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c/jsonfmt.c b/c/jsonfmt.c index fff3e4f..a6d46a4 100644 --- a/c/jsonfmt.c +++ b/c/jsonfmt.c @@ -244,7 +244,7 @@ static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) { if (isinfnan(nv)) croak("unable to format floating point NaN or Inf as JSON"); /* XXX: Cpanel::JSON::XS appears to always append a ".0" for round numbers, other modules do not. */ /* XXX#2: This doesn't support quadmath. Makefile.PL checks for that */ - fustr_reserve(ctx->out, NV_DIG+1); + fustr_reserve(ctx->out, NV_DIG+32); Gconvert(nv, NV_DIG, 0, ctx->out->cur); ctx->out->cur += strlen(ctx->out->cur); } else if (SvIOKp(val)) { From af9340f908dcca0852d2c3f05809f6f2d44a9ffd Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 30 Apr 2025 15:00:21 +0200 Subject: [PATCH 09/40] DebugInfo: Styling + add request/response body and fu obj contents Formatting is still shit. --- FU.pm | 14 +++-- FU/DebugImpl.pm | 139 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 110 insertions(+), 43 deletions(-) diff --git a/FU.pm b/FU.pm index 11c58ff..316f273 100644 --- a/FU.pm +++ b/FU.pm @@ -3,7 +3,7 @@ use v5.36; use Carp 'confess', 'croak'; use IO::Socket; use POSIX (); -use Time::HiRes 'clock_gettime', 'CLOCK_MONOTONIC'; +use Time::HiRes 'time', 'clock_gettime', 'CLOCK_MONOTONIC'; use FU::Log 'log_write'; use FU::Util; use FU::Validate; @@ -318,7 +318,11 @@ sub _log_err($e) { } sub _do_req($c) { - local $REQ = { hdr => {}, trace_start => clock_gettime(CLOCK_MONOTONIC), trace_id => sprintf('%010x%08x%04x', int time, $$, int rand 1<<16) }; + local $REQ = { + hdr => {}, + trace_start => clock_gettime(CLOCK_MONOTONIC), + trace_id => sprintf('%012x%06x%04x', int(time*10000) % (1<<(12*4)), $$ % (1<<(6*4)), int rand 1<<16) + }; local $fu = bless {}, 'FU::obj'; $REQ->{ip} = $c->{client_sock} isa 'IO::Socket::INET' ? $c->{client_sock}->peerhost : '127.0.0.1'; @@ -648,8 +652,8 @@ sub log_verbose($,$msg) { length $r->{body} ? do { my $b = substr $r->{body}, 0, 4096; my $trunc = length $r->{body} > 4096 ? ', truncated' : ''; - utf8::decode($b) ? ("Body (utf8$trunc)", _fmt_section($b =~ s/\r//rg =~ s/\n{4,}/\n[..]\n/rg)) - : ("Body (hex$trunc)", _fmt_section(unpack('H*', $b) =~ s/(.{128})/$1\n/rg)) + utf8::decode($b) ? ("Body (utf8$trunc):", _fmt_section($b =~ s/\r//rg =~ s/\n{4,}/\n[..]\n/rg)) + : ("Body (hex$trunc):", _fmt_section(unpack('H*', $b) =~ s/(.{128})/$1\n/rg)) } : (), 'Message:', _fmt_section $msg ); @@ -898,10 +902,12 @@ sub _finalize { ) { push @vary, 'accept-encoding'; if ($hasbrotli && ($r->{hdr}{'accept-encoding'}||'') =~ /\bbr\b/) { + $r->{resbody_orig} = $r->{resbody}; $r->{resbody} = FU::Util::brotli_compress(6, $r->{resbody}); $r->{reshdr}{'content-encoding'} = 'br'; } elsif ($hasgzip && ($r->{hdr}{'accept-encoding'}||'') =~ /\bgzip\b/) { + $r->{resbody_orig} = $r->{resbody}; $r->{resbody} = FU::Util::gzip_compress(6, $r->{resbody}); $r->{reshdr}{'content-encoding'} = 'gzip'; } diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index a0de7ea..f02ceed 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -16,27 +16,32 @@ sub loc_($loc) { my $l = $loc->[$_]; my $f = $_ == $#$loc ? '(main)' : $loc->[$_+1][3]; $f = "$l->[0]::$f" if $f !~ /^\Q$l->[0]/; - txt_ "$f @ $l->[1]:$l->[2]"; + txt_ $f; + small_ " @ $l->[1]:$l->[2]"; } } -sub fmtpre_($code) { - lit_ xml_escape($code) =~ s/^\s+//r =~ s/\s+$//r =~ s/\n/
/rg; -} - sub clean_re($str) { # Regex formatting isn't stable, but this cleans up the crap I'm seeing a little bit. "$str" =~ s/^\(\?\^u:\^\(\?\^u://r =~ s/\)\$\)$//r; } -my @tabs = ( +sub raw_data($str) { + my $d = substr $str, 0, 32*1024; + my $trunc = length $str > 32*1024 ? ', truncated' : ''; + return utf8::decode($d) ? ("utf8$trunc", $d) + : ("hex$trunc", unpack('H*', $d) =~ s/(.{128})/$1\n/rg =~ s/(.{16})/$1 /rg); +} + +my @sections = ( req => sub { + my $r = $FU::REQ; table_ sub { tr_ sub { td_ 'Method'; td_ fu->method }; tr_ sub { td_ 'Path'; td_ fu->path }; tr_ sub { td_ 'Query'; td_ fu->query }; tr_ sub { td_ 'Client IP'; td_ fu->ip }; - tr_ sub { td_ 'Received'; td_ fmtts(time - (($FU::REQ->{trace_end}||clock_gettime(CLOCK_MONOTONIC)) - $FU::REQ->{trace_start})) }; + tr_ sub { td_ 'Received'; td_ fmtts(time - (($r->{trace_end}||clock_gettime(CLOCK_MONOTONIC)) - $r->{trace_start})) }; }; h2_ 'Headers'; table_ sub { @@ -45,7 +50,38 @@ my @tabs = ( td_ fu->headers->{$_}; } for sort keys fu->headers->%*; }; - # TODO: Body? Certainly useful for JSON + if ((fu->header('content-length')||0) > 0) { + h2_ 'Body'; + section_ class => 'tabs', sub { + my $json = eval { fu->json({type=>'any'}) }; + details_ name => 'reqbody', open => !0, sub { + summary_ 'JSON'; + pre_ FU::Util::json_format($json, pretty => 1, canonical => 1); + } if $json; + my $formdata = eval { fu->formdata({type=>'hash'}) }; + details_ name => 'reqbody', open => !0, sub { + summary_ 'Form data'; + table_ sub { + for my $k (sort keys %$formdata) { + tr_ sub { + td_ $k; + td_ $_; + } for ref $formdata->{$k} ? $formdata->{$k}->@* : ($formdata->{$k}); + } + }; + } if $formdata; + my $multipart = eval { fu->multipart }; + details_ name => 'reqbody', open => !0, sub { + summary_ 'Multipart'; + pre_ join "\n", map $_->describe, @$multipart; + } if $multipart; + details_ name => 'reqbody', open => !0,sub { + my($lbl, $data) = raw_data $r->{body}; + summary_ "Raw ($lbl)"; + pre_ $data; + }; + } + } ('Request') }, @@ -84,11 +120,28 @@ my @tabs = ( } for !defined $v ? () : ref $v ? @$v : ($v); } }; + my $body = $r->{resbody_orig} // $r->{resbody}; + if (length $body) { + h2_ 'Body'; + section_ class => 'tabs', sub { + my $json = ($r->{reshdr}{'content-type'}||'') =~ /^application\/json/ && eval { FU::Util::json_parse($body, utf8 => 1) }; + details_ name => 'resbody', open => !0, sub { + summary_ 'JSON'; + pre_ FU::Util::json_format($json, pretty => 1, canonical => 1); + } if $json; + details_ name => 'resbody', open => !0,sub { + my($lbl, $data) = raw_data $body; + summary_ "Raw ($lbl)"; + pre_ $data; + }; + } + } ('Response') }, sql => sub { return () if !$FU::REQ->{trace_sql}; + # TODO: Summarize main table, expand to display full query, params table, interpolated query table_ sub { thead_ sub { tr_ sub { td_ class => 'num', 'Exec'; @@ -100,8 +153,7 @@ my @tabs = ( td_ class => 'num', sprintf '%.1f ms', $_->{exec_time}*1000; td_ class => 'num', !defined $_->{prepare_time} ? '-' : $_->{prepare_time} ? sprintf '%.1f ms', $_->{prepare_time}*1000 : 'cache'; td_ class => 'num', $_->{nrows}; - td_ class => 'code', sub { fmtpre_ $_->{query} }; - # TODO: Params, both separate and interpolated + td_ class => 'code', $_->{query}; } for $FU::REQ->{trace_sql}->@*; }; ('Queries', scalar $FU::REQ->{trace_sql}->@*) @@ -109,7 +161,11 @@ my @tabs = ( fu => sub { return () if !keys fu->%*; - # TODO: Contents of the 'fu' object + # TODO: This is kinda lazy, an expandable table might be nicer. + require Data::Dumper; + pre_ sub { + lit_ Data::Dumper->new([fu])->Sortkeys(1)->Terse(1)->Dump; + }; ('fu obj') }, @@ -186,7 +242,7 @@ my @tabs = ( } }; tr_ sub { td_ $_->[0]; - td_ class => 'code', sub { fmtpre_ $_->[1] }; + td_ class => 'code', $_->[1]; } for @$lst; }; ('Prepared statements', scalar @$lst) @@ -196,9 +252,10 @@ my @tabs = ( sub collect { my @t; - for my ($id, $sub) (@tabs) { + for my ($id, $sub) (@sections) { my($title, $num); my $html = fragment { ($title, $num) = $sub->() }; + utf8::decode($html); push @t, { id => $id, title => $title, num => $num, html => $html } if $title; } \@t @@ -215,42 +272,47 @@ sub framework_($data) { *, *:before, *:after { box-sizing: inherit } * { margin: 0; padding: 0; font: inherit; color: inherit } - body { display: grid; grid: 45px 400px / 220px auto; } - header { grid-column: 1 / 3; grid-row: 1 / 2 } - nav { grid-column: 1 / 2; grid-row: 2 / 3 } - main { grid-column: 2 / 3; grid-row: 2 / 3 } + /* Ugh, fixed positioning */ + header { position: fixed; top: 0; left: 0; width: 100%; height: 40px; z-index: 2 } + nav { position: fixed; top: 38px; left: 0; width: 200px; z-index: 2 } + main { margin: 0 0 0 200px } header, nav { background: #eee } - main { border-top: 2px solid #009; border-left: 2px solid #009 } - nav { border-bottom: 2px solid #009 } + header { border-bottom: 2px solid #009 } + nav { border-bottom: 2px solid #009; border-right: 2px solid #009 } - header { display: flex; justify-content: space-between; padding: 10px } - header h1 { font-size: 20px; font-weight: bold } + header { display: flex; justify-content: space-between; align-items: baseline; padding: 5px 10px } + header h1 { font-size: 120%; font-weight: bold } header menu { list-style-type: none; display: flex; gap: 15px } body > input { display: none } nav { padding-top: 20px } nav menu { list-style-type: none } - nav label { display: block; width: 100%; padding: 2px 10px; cursor: pointer; white-space: nowrap } - nav label:hover { background-color: #fff } - nav label span { float: right; font-size: 80% } + nav a { display: block; width: 100%; text-decoration: none; padding: 2px 10px; cursor: pointer; white-space: nowrap } + nav a:hover { background-color: #fff } + nav a span { float: right; font-size: 80% } - main { padding: 10px 20px } - main h2 { margin: 30px 0 5px -10px; font-size: 20px; font-weight: bold } - main h2:first-child { margin-top: 0 } + main { padding: 0 10px 30px 10px } + main h1 { background: #eee; padding: 5px 10px 5px 205px; margin: 40px -10px 10px -210px; scroll-margin-top: 40px; font-size: 130%; font-weight: bold } + main h2 { margin: 20px 0 5px 0; font-size: 120%; font-weight: bold } - p, pre, table { margin: 5px 0 } - pre, .code { font-family: monospace; white-space: pre } + p, table, pre { margin: 5px 0 } + pre { font-family: monospace; white-space: pre; overflow-x: auto; padding-bottom: 15px; /* for the scrollbar, kinda browser-specific */ } table { border-collapse: collapse } td { padding: 1px 10px 1px 0; font-size: 12px; vertical-align: top } + td.code { font-family: monospace } tr:hover { background-color: #eee } thead { font-weight: bold } .num { text-align: right; white-space: nowrap } + + section.tabs { position: relative; display: flex; flex-wrap: wrap; z-index: 1; } + section.tabs summary { cursor: pointer; order: 0; display: block; padding: 3px 5px; margin-right: 10px; background: #ddd } + section.tabs summary:hover, section.tabs details[open] summary { background: #eee } + section.tabs details { display: contents } + section.tabs details *:nth-child(2) { order: 1; width: 100% } + + small { color: #555; font-size: 90% } _ - style_ type => 'text/css', join "\n", map +( - "#tab_$_:checked ~ nav menu li label[for=tab_$_] { background-color: #fff }", - "#tab_$_:not(:checked) ~ main #tabc_$_ { display: none }", - ), map $_->{id}, @$data; }; body_ sub { header_ sub { @@ -261,22 +323,21 @@ sub framework_($data) { li_ sub { a_ href => '?', 'Listing' }; }; }; - input_ type => 'radio', name => 'tab', id => "tab_$_->{id}", checked => $_ eq $data->[0] ? 'checked' : undef for @$data; nav_ sub { menu_ sub { li_ sub { - label_ for => "tab_$_->{id}", sub { + a_ href => "#$_->{id}", sub { txt_ $_->{title}; span_ $_->{num} if defined $_->{num}; - } + }; } for @$data; }; } if @$data; main_ sub { - div_ id => "tabc_$_->{id}", sub { - h2_ $_->{title}; + for (@$data) { + h1_ id => $_->{id}, $_->{title}; lit_ $_->{html}; - } for @$data; + } }; }; }; From beeefcf3373a11877d121f7eded08ab238b36189 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 30 Apr 2025 17:07:44 +0200 Subject: [PATCH 10/40] Pg: Add perl2bin() and bin2perl() conversion methods --- FU.xs | 8 ++++++++ FU/Pg.pm | 26 +++++++++++++++++++++++++- c/pgconn.c | 27 +++++++++++++++++++++++++++ t/pgtypes.t | 10 +++++++++- 4 files changed, 69 insertions(+), 2 deletions(-) diff --git a/FU.xs b/FU.xs index dc19870..68082a5 100644 --- a/FU.xs +++ b/FU.xs @@ -286,6 +286,14 @@ void _set_type(fupg_conn *c, SV *name, SV *sendsv, SV *recvsv) fupg_set_type(aTHX_ c, name, sendsv, recvsv); XSRETURN(1); +void perl2bin(fupg_conn *c, int oid, SV *sv) + CODE: + ST(0) = fupg_perl2bin(aTHX_ c, oid, sv); + +void bin2perl(fupg_conn *c, int oid, SV *sv) + CODE: + ST(0) = fupg_bin2perl(aTHX_ c, oid, sv); + MODULE = FU PACKAGE = FU::Pg::txn diff --git a/FU/Pg.pm b/FU/Pg.pm index f43c7f8..d88a331 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -758,7 +758,31 @@ C to configure appropriate conversions for these types. =back -I Methods to convert between the various formats. +Utility functions: + +=over + +=item $conn->perl2bin($oid, $val) + +=item $conn->bin2perl($oid, $bin) + +Convert the value for a specific type between the Perl representation and the +PostgreSQL binary format, using the current type configuration of the +connection. This is the same conversion used internally by this module to send +bind parameters and receive query results, and map to the C and C +functions of C<< $conn->set_type() >>. + +These methods throw an error if C<$oid> is not a known type or if the given +data is not valid for the type. However, these methods should not be used for +strict validation: the conversion routines are usually written under the +assumption that the data has been received directly from Postgres or is about +to be sent to (and further validated by) Postgres. For some types, +C may return invalid data on invalid input and C may +accept invalid binary data. + +=back + +I Methods to convert between bin and text formats. I Methods to query type info. diff --git a/c/pgconn.c b/c/pgconn.c index 3a20b56..04423e6 100644 --- a/c/pgconn.c +++ b/c/pgconn.c @@ -626,3 +626,30 @@ static void fupg_tio_free(fupg_tio *tio) { safefree(tio->record.tio); } } + + + + +static SV *fupg_perl2bin(pTHX_ fupg_conn *conn, Oid oid, SV *sv) { + int refresh_done = 0; + fupg_tio tio; + fustr buf; + memset(&tio, 0, sizeof(tio)); + fupg_tio_setup(aTHX_ conn, &tio, FUPGT_SEND, oid, &refresh_done); + fustr_init(&buf, sv_newmortal(), SIZE_MAX); + tio.send(aTHX_ &tio, sv, &buf); /* XXX: Leaks 'tio' on error */ + fupg_tio_free(&tio); + return fustr_done(&buf); +} + +static SV *fupg_bin2perl(pTHX_ fupg_conn *conn, Oid oid, SV *sv) { + int refresh_done = 0; + fupg_tio tio; + STRLEN len; + const char *buf = SvPVbyte(sv, len); + memset(&tio, 0, sizeof(tio)); + fupg_tio_setup(aTHX_ conn, &tio, FUPGT_RECV, oid, &refresh_done); + SV *r = tio.recv(aTHX_ &tio, buf, len); /* XXX: Leaks 'tio' on error */ + fupg_tio_free(&tio); + return r; +} diff --git a/t/pgtypes.t b/t/pgtypes.t index 3a3252c..67e566e 100644 --- a/t/pgtypes.t +++ b/t/pgtypes.t @@ -19,9 +19,12 @@ sub v($type, $p_in, @args) { my $s_out = @args > 2 && defined $args[2] ? $args[2] : $s_in; my $test = "$type $s_in" =~ s/\n/\\n/rg; + my $oid; utf8::encode($test); { - my $array = $conn->q("SELECT \$1::$type", $s_in)->text_params->flat; + my $st = $conn->q("SELECT \$1::$type", $s_in)->text_params; + $oid = $st->param_types->[0]; + my $array = $st->flat; my $res = $array->[0]; ok is_bool($res), "$test is bool" if $type eq 'bool'; ok created_as_number($res), "$test is number" if $type =~ /^(int|float)\d/; @@ -36,6 +39,11 @@ sub v($type, $p_in, @args) { my $res = $conn->q("SELECT \$1::$type", $p_in)->val; is_deeply $res, $p_out, "$test bin->bin"; } + { + my $bin = $conn->perl2bin($oid, $p_in); + ok defined $bin; + is_deeply $conn->bin2perl($oid, $bin), $p_out if $type !~ /\(/; + } } sub f($type, $p_in) { my $test = "$type $p_in" =~ s/\n/\\n/rg; From 76f55f277bd94155827ce6dcf23566f15128bab8 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 30 Apr 2025 20:02:12 +0200 Subject: [PATCH 11/40] Pg: Add text2bin() and bin2text() conversion methods --- FU.xs | 8 ++++++++ FU/Pg.pm | 19 +++++++++++++++++-- c/pgconn.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ t/pgtypes.t | 19 ++++++++++++++++++- 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/FU.xs b/FU.xs index 68082a5..1c342be 100644 --- a/FU.xs +++ b/FU.xs @@ -294,6 +294,14 @@ void bin2perl(fupg_conn *c, int oid, SV *sv) CODE: ST(0) = fupg_bin2perl(aTHX_ c, oid, sv); +void bin2text(fupg_conn *c, ...) + CODE: + XSRETURN(fupg_bintext(aTHX_ c, 0, ax, items)); + +void text2bin(fupg_conn *c, ...) + CODE: + XSRETURN(fupg_bintext(aTHX_ c, 1, ax, items)); + MODULE = FU PACKAGE = FU::Pg::txn diff --git a/FU/Pg.pm b/FU/Pg.pm index d88a331..ff88d60 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -780,9 +780,24 @@ to be sent to (and further validated by) Postgres. For some types, C may return invalid data on invalid input and C may accept invalid binary data. -=back +=item $conn->bin2text($oid, $bin, ...) -I Methods to convert between bin and text formats. +=item $conn->text2bin($oid, $text, ...) + +Convert between the binary format and the PostgreSQL text format. This +conversion requires a round-trip to the server and throws an error if the +connection state is not I or I. Since it is Postgres doing the +conversion, the input is properly validated and, in the case of C, +the result is guaranteed to be suitable for use as a textual bind parameter or +for inclusion in an SQL query (but don't forget to use C in +that case). + +Calling these methods many times can be pretty slow. If you have several values +to convert, you can do that in a single call to speed things up: + + my($text1, $text2, ..) = $conn->bin2text($oid1, $bin1, $oid2, $bin2, ..); + +=back I Methods to query type info. diff --git a/c/pgconn.c b/c/pgconn.c index 04423e6..607a9c6 100644 --- a/c/pgconn.c +++ b/c/pgconn.c @@ -653,3 +653,53 @@ static SV *fupg_bin2perl(pTHX_ fupg_conn *conn, Oid oid, SV *sv) { fupg_tio_free(&tio); return r; } + + +static I32 fupg_bintext(pTHX_ fupg_conn *conn, int format, I32 ax, I32 argc) { + int vals = argc/2; + + if (argc == 1 || argc % 2 == 0) croak("Usage: $conn->%s(oid, data, ...)", format ? "text2bin" : "bin2text"); + if (vals > 1 && GIMME_V != G_LIST) { + ST(0) = sv_2mortal(newSViv(vals)); + return 1; + } + + Oid *paramtypes = safemalloc(vals * sizeof(*paramtypes)); + const char **paramvalues = safemalloc(vals * sizeof(*paramvalues)); + int *paramlengths = safemalloc(vals * sizeof(*paramlengths)); + int *paramformats = safemalloc(vals * sizeof(*paramformats)); + + fustr sql; + fustr_init(&sql, NULL, SIZE_MAX); + fustr_write(&sql, "SELECT ", 7); + + STRLEN len; + int i; + for (i=0; iconn, fustr_start(&sql), vals, + paramtypes, paramvalues, paramlengths, paramformats, format); + safefree(paramtypes); + safefree(paramvalues); + safefree(paramlengths); + safefree(paramformats); + SvREFCNT_dec(sql.sv); + + if (!r) fupg_conn_croak(conn, "exec"); + if (PQresultStatus(r) != PGRES_TUPLES_OK) fupg_result_croak(r, "exec", sql.sv ? "SELECT $1, ..." : sql.sbuf); + + /* The stack is guaranteed to be large enough, since we received 1+2*vals arguments */ + for (i=0; iperl2bin($oid, $p_in); ok defined $bin; - is_deeply $conn->bin2perl($oid, $bin), $p_out if $type !~ /\(/; + if ($type !~ /\(/) { + is_deeply $conn->bin2perl($oid, $bin), $p_out; + is $conn->bin2text($oid, $bin), $s_out; + is $conn->text2bin($oid, $s_out), $bin if $type ne 'jsonb'; # jsonb pretty-prints for some reason + } } } sub f($type, $p_in) { @@ -180,6 +184,19 @@ is $conn->q('SELECT ($1::int2[])[2]', [1,2,3,4])->val, 2; is $conn->q('SELECT ($1::int2vector)[1]', [1,2,3,4])->val, 2; is $conn->q('SELECT ($1::oidvector)[1]', [1,2,3,4])->val, 2; +is_deeply [$conn->bin2text( + 16, $conn->perl2bin(16, 1), + 25, 'Hello', + 1007, $conn->perl2bin(1007, [-3,1,undef]) +)], ['t', 'Hello', '{-3,1,NULL}']; + +{ + my($b,$s,$a) = $conn->text2bin(16, 't', 25, 'Hello', 1007, '{-3,1,NULL}'); + is $conn->bin2perl(16, $b), 1; + is $conn->bin2perl(25, $s), 'Hello'; + is_deeply $conn->bin2perl(1007, $a), [-3,1,undef]; +} + { my $v = $conn->q("SELECT '{t,f,NULL}'::bool[]")->val; is_deeply $v, [true, false, undef]; From cbccf045b71f22696c5d235345271b00a1f1ce51 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 1 May 2025 11:48:08 +0200 Subject: [PATCH 12/40] DebugInfo: Expand queries table with params & details Apart from the ugly implementation, this is pretty neat. --- FU.pm | 23 ++++-- FU/DebugImpl.pm | 189 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 153 insertions(+), 59 deletions(-) diff --git a/FU.pm b/FU.pm index 316f273..8b8eaa3 100644 --- a/FU.pm +++ b/FU.pm @@ -121,11 +121,24 @@ sub query_trace($st,@) { $REQ->{trace_nsqldirect}++ if !defined $st->prepare_time; $REQ->{trace_sqlexec} += $st->exec_time; $REQ->{trace_sqlprep} += $st->prepare_time if $st->prepare_time; - push $REQ->{trace_sql}->@*, { - query => $st->query, nrows => $st->nrows, - param_types => $st->param_types, param_values => $st->param_values, - exec_time => $st->exec_time, prepare_time => $st->prepare_time, - } if FU::debug; + if (FU::debug) { + my $t = $st->param_types; + my $v = $st->param_values; + my $txt = $st->get_text_params; + push $REQ->{trace_sql}->@*, { + query => $st->query, nrows => $st->nrows, + exec_time => $st->exec_time, prepare_time => $st->prepare_time, + # Store the binary value when we're in binary params mode, that way + # we don't have to keep a reference to the original perl value and + # we can defer & batch the conversion to text. + params => [ map +{ + type => $t->[$_], + !defined $v->[$_] ? (text => undef) : + $txt ? (text => "$v->[$_]") + : (bin => $DB->perl2bin($t->[$_], $v->[$_])) + }, 0..$#$v ], + }; + } } sub _connect_db { $DB = ref $INIT_DB eq 'CODE' ? $INIT_DB->() : FU::Pg->connect($INIT_DB); diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index f02ceed..48c6cf1 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -1,6 +1,7 @@ # Internal module used by FU.pm package FU::DebugImpl 0.5; use v5.36; +use utf8; use experimental 'for_list'; use FU; use FU::XMLWriter ':html5_', 'fragment', 'xml_escape'; @@ -140,23 +141,74 @@ my @sections = ( }, sql => sub { - return () if !$FU::REQ->{trace_sql}; - # TODO: Summarize main table, expand to display full query, params table, interpolated query - table_ sub { + my $queries = $FU::REQ->{trace_sql}; + return () if !$queries; + + # Convert binary params to text. + # For queries with text_params, assume the params are already valid for the text format. + my @binparams = grep $_->{type} && !$_->{text}, map $_->{params}->@*, @$queries; + my @arg = map +($_->{type}, $_->{bin}), @binparams; + my @text; + my $ok = !@arg || eval { @text = $FU::DB->bin2text(@arg); 1 }; + $binparams[$_]{text} = $text[$_] for 0..$#text; + pre_ "Error converting binary parameters:\n$@" if !$ok; + + input_ type => 'checkbox', id => "row${_}_c" for 0..$#{$queries}; + table_ class => 'sqlt', sub { thead_ sub { tr_ sub { td_ class => 'num', 'Exec'; td_ class => 'num', 'Prep'; td_ class => 'num', 'Rows'; td_ 'Query'; } }; + my $rows = 0; + for my($i, $st) (builtin::indexed $queries->@*) { + $rows += $st->{nrows}; + tr_ sub { + td_ class => 'num', sprintf '%.1f ms', $st->{exec_time}*1000; + td_ class => 'num', !defined $st->{prepare_time} ? '-' : $st->{prepare_time} ? sprintf '%.1f ms', $st->{prepare_time}*1000 : 'cache'; + td_ class => 'num', $st->{nrows}; + td_ class => 'sum', sub { + label_ for => "row${i}_c", sub { + span_ class => 'closed', '▶'; + span_ class => 'open', '▼'; + txt_ $st->{query} =~ s/[\r\n]/ /rg =~ s/\s\s+/ /rg =~ s/^\s+//r; + }; + }; + }; + tr_ class => 'details', id => "row$i", sub { + td_ ''; + td_ colspan => 3, sub { + pre_ $st->{query}; + if ($st->{params}->@*) { + strong_ 'Parameters:'; + table_ sub { + tr_ sub { + td_ class => 'num', sprintf '$%d =', $_+1; + td_ class => 'code', sub { + my $p = $st->{params}[$_]{text}; + !defined $p ? em_ 'null' : txt_ $p; + }; + } for (0..$#{$st->{params}}); + }; + # XXX: Buggy when the query contains string literals with $n variables. + strong_ 'Interpolated:'; + pre_ $st->{query} =~ s{\$([1-9][0-9]*)}{ + my $v = $st->{params}[$1-1]{text}; + defined $v ? $FU::DB->escape_literal($v) : 'NULL' + }egr; + } + }; + }; + } tr_ sub { - td_ class => 'num', sprintf '%.1f ms', $_->{exec_time}*1000; - td_ class => 'num', !defined $_->{prepare_time} ? '-' : $_->{prepare_time} ? sprintf '%.1f ms', $_->{prepare_time}*1000 : 'cache'; - td_ class => 'num', $_->{nrows}; - td_ class => 'code', $_->{query}; - } for $FU::REQ->{trace_sql}->@*; + td_ class => 'num', sprintf '%.1f ms', $FU::REQ->{trace_sqlexec}*1000; + td_ class => 'num', !defined $FU::REQ->{trace_sqlprep} ? '-' : sprintf '%.1f ms', $FU::REQ->{trace_sqlprep}*1000; + td_ class => 'num', $rows; + td_ class => 'sum', 'total'; + } if @$queries > 1; }; - ('Queries', scalar $FU::REQ->{trace_sql}->@*) + ('Queries', scalar @$queries) }, fu => sub { @@ -245,7 +297,7 @@ my @sections = ( td_ class => 'code', $_->[1]; } for @$lst; }; - ('Prepared statements', scalar @$lst) + ('Prepared stmts', scalar @$lst) }, ); @@ -267,51 +319,8 @@ sub framework_($data) { head_ sub { title_ 'FU Debugging Interface'; meta_ name => 'viewport', content => 'width=device-width, initial-scale=1.0, user-scalable=yes'; + link_ rel => 'stylesheet', type => 'text/css', media => 'all', href => '?css'; style_ type => 'text/css', <<~_; - html { box-sizing: border-box; color: #000; background: #fff } - *, *:before, *:after { box-sizing: inherit } - * { margin: 0; padding: 0; font: inherit; color: inherit } - - /* Ugh, fixed positioning */ - header { position: fixed; top: 0; left: 0; width: 100%; height: 40px; z-index: 2 } - nav { position: fixed; top: 38px; left: 0; width: 200px; z-index: 2 } - main { margin: 0 0 0 200px } - - header, nav { background: #eee } - header { border-bottom: 2px solid #009 } - nav { border-bottom: 2px solid #009; border-right: 2px solid #009 } - - header { display: flex; justify-content: space-between; align-items: baseline; padding: 5px 10px } - header h1 { font-size: 120%; font-weight: bold } - header menu { list-style-type: none; display: flex; gap: 15px } - - body > input { display: none } - nav { padding-top: 20px } - nav menu { list-style-type: none } - nav a { display: block; width: 100%; text-decoration: none; padding: 2px 10px; cursor: pointer; white-space: nowrap } - nav a:hover { background-color: #fff } - nav a span { float: right; font-size: 80% } - - main { padding: 0 10px 30px 10px } - main h1 { background: #eee; padding: 5px 10px 5px 205px; margin: 40px -10px 10px -210px; scroll-margin-top: 40px; font-size: 130%; font-weight: bold } - main h2 { margin: 20px 0 5px 0; font-size: 120%; font-weight: bold } - - p, table, pre { margin: 5px 0 } - pre { font-family: monospace; white-space: pre; overflow-x: auto; padding-bottom: 15px; /* for the scrollbar, kinda browser-specific */ } - table { border-collapse: collapse } - td { padding: 1px 10px 1px 0; font-size: 12px; vertical-align: top } - td.code { font-family: monospace } - tr:hover { background-color: #eee } - thead { font-weight: bold } - .num { text-align: right; white-space: nowrap } - - section.tabs { position: relative; display: flex; flex-wrap: wrap; z-index: 1; } - section.tabs summary { cursor: pointer; order: 0; display: block; padding: 3px 5px; margin-right: 10px; background: #ddd } - section.tabs summary:hover, section.tabs details[open] summary { background: #eee } - section.tabs details { display: contents } - section.tabs details *:nth-child(2) { order: 1; width: 100% } - - small { color: #555; font-size: 90% } _ }; body_ sub { @@ -378,10 +387,23 @@ sub load($id) { fu->set_body(scalar <$fn>); } +sub css { + # Awful CSS row hiding hack. I'm not sorry. + state $css = join '', , map qq{ + #row${_}_c:checked ~ * label[for=row${_}_c] .closed { display: none } + #row${_}_c:not(:checked) ~ * label[for=row${_}_c] .open { display: none } + #row${_}_c:not(:checked) ~ * #row${_} { display: none } + }, 0..1000; +} + sub render { my $q = fu->query; if (!$q) { fu->set_body(framework_ [{id => 'lst', title => 'Recent Requests', html => fragment \&listing_ }]); + } elsif ($q eq 'css') { + fu->set_header('content-type', 'text/css'); + fu->set_header('cache-control', 'max-age=86400'); + fu->set_body(css()); } elsif ($q eq 'cur') { fu->set_body(framework_ collect); } elsif ($q eq 'last') { @@ -415,3 +437,62 @@ sub save { } 1; + +__DATA__ +html { box-sizing: border-box; color: #000; background: #fff } +*, *:before, *:after { box-sizing: inherit } +* { margin: 0; padding: 0; font: inherit; color: inherit } + +/* Ugh, fixed positioning */ +header { position: fixed; top: 0; left: 0; width: 100%; height: 40px; z-index: 2 } +nav { position: fixed; top: 38px; left: 0; width: 200px; z-index: 2 } +main { margin: 0 0 0 200px } + +header, nav { background: #eee } +header { border-bottom: 2px solid #009 } +nav { border-bottom: 2px solid #009; border-right: 2px solid #009 } + +header { display: flex; justify-content: space-between; align-items: baseline; padding: 5px 10px } +header h1 { font-size: 120%; font-weight: bold } +header menu { list-style-type: none; display: flex; gap: 15px } + +body > input { display: none } +nav { padding-top: 20px } +nav menu { list-style-type: none } +nav a { display: block; width: 100%; text-decoration: none; padding: 2px 10px; cursor: pointer; white-space: nowrap } +nav a:hover { background-color: #fff } +nav a span { float: right; font-size: 80% } + +main { padding: 0 10px 30px 10px } +main h1 { background: #eee; padding: 5px 10px 5px 205px; margin: 40px -10px 10px -210px; scroll-margin-top: 40px; font-size: 130%; font-weight: bold } +main h2 { margin: 20px 0 5px 0; font-size: 120%; font-weight: bold } + +p, table, pre { margin: 5px 0 } +pre { border-left: 2px dotted #999; padding-left: 5px; font-family: monospace; white-space: pre; overflow-x: auto; padding-bottom: 15px; /* for the scrollbar, kinda browser-specific */ } +table { border-collapse: collapse } +td { padding: 1px 10px 1px 0; font-size: 12px; vertical-align: top } +td.code { font-family: monospace } +tr:hover { background-color: #eee } +thead { font-weight: bold } +.num { text-align: right; white-space: nowrap } + +section.tabs { position: relative; display: flex; flex-wrap: wrap; z-index: 1; } +section.tabs summary { cursor: pointer; order: 0; display: block; padding: 3px 5px; margin-right: 10px; background: #ddd } +section.tabs summary:hover, section.tabs details[open] summary { background: #eee } +section.tabs details { display: contents } +section.tabs details *:nth-child(2) { order: 1; width: 100% } + +.sqlt { width: 100%; table-layout: fixed } +.sqlt .num { width: 50px } +.sqlt .num:first-child { width: 75px } +.sqlt .num:nth-child(2) { width: 60px } +.sqlt .sum { white-space: nowrap; font-family: monospace; overflow: hidden; text-overflow: ellipsis } +.sqlt label { cursor: pointer } +.sqlt label span { color: #555; display: inline-block; width: 15px } +.sqlt tr.details { background: #fff } +.sqlt tr.details > td { padding-bottom: 10px } +input[id^=row] { display: none } + +small { color: #555; font-size: 90% } +em { font-style: italic } +strong { font-weight: bold } From 6787f32fd9c4f22f20905e260f7aa48eb88e06a8 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 3 May 2025 12:32:50 +0200 Subject: [PATCH 13/40] DebugInfo: Fix handling of undef and falsy bind parameters --- FU/DebugImpl.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index 48c6cf1..88e0bb5 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -146,7 +146,7 @@ my @sections = ( # Convert binary params to text. # For queries with text_params, assume the params are already valid for the text format. - my @binparams = grep $_->{type} && !$_->{text}, map $_->{params}->@*, @$queries; + my @binparams = grep $_->{type} && !exists $_->{text}, map $_->{params}->@*, @$queries; my @arg = map +($_->{type}, $_->{bin}), @binparams; my @text; my $ok = !@arg || eval { @text = $FU::DB->bin2text(@arg); 1 }; From 52c36e0aeac242d960c357205f0fef30fde7dc90 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 4 May 2025 12:18:12 +0200 Subject: [PATCH 14/40] FU: Preserve existing headers on fu->redirect() Allows setting custom headers (in particular, cookies) when redirecting. This behavior is consistent with send_file(). --- FU.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/FU.pm b/FU.pm index 8b8eaa3..f81be88 100644 --- a/FU.pm +++ b/FU.pm @@ -862,7 +862,6 @@ sub send_file($, $root, $path) { sub redirect($, $code, $location) { state $alias = {qw/ perm 301 temp 302 tempget 303 tempsame 307 permsame 308 /}; - fu->reset; fu->status($alias->{$code} // $code); fu->set_header(location => "$location"); fu->set_header('content-type', 'text/plain'); From 6c54ee30911bb26dfd1fd0538cd5bd1d1280ac0f Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 9 May 2025 08:32:41 +0200 Subject: [PATCH 15/40] FU: Reject some invalid characters in path --- FU.pm | 2 ++ FU/Util.pm | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/FU.pm b/FU.pm index f81be88..1a826e1 100644 --- a/FU.pm +++ b/FU.pm @@ -313,10 +313,12 @@ sub _read_req($c) { # Decode these into Unicode strings and check for special characters. eval { FU::Util::utf8_decode($_); 1} || fu->error(400, $@) for ($REQ->{path}, $REQ->{qs}, values $REQ->{hdr}->%*); + fu->error(400, 'Invalid character in path') if $REQ->{path} =~ /#/; # Some bots don't correctly split off the fragment ($REQ->{path}, my $qs) = split /\?/, $REQ->{path}//'', 2; $REQ->{qs} //= $qs; eval { $REQ->{path} = FU::Util::uri_unescape($REQ->{path}); 1; } || fu->error(400, $@); + fu->error(400, 'Invalid character in path') if $REQ->{path} =~ /[\r\n\t]/; # There are plenty other questionable characters, but newlines and tabs are definitely out } diff --git a/FU/Util.pm b/FU/Util.pm index 7d585d9..18db781 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -20,7 +20,7 @@ our @EXPORT_OK = qw/ sub utf8_decode :prototype($) { return if !defined $_[0]; confess 'Invalid UTF-8' if !utf8::decode($_[0]); - confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/; + confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/; $_[0] } From 8dbc17ab37858cca82410f222c71a56c427beb94 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 9 May 2025 09:53:43 +0200 Subject: [PATCH 16/40] FU: Fix error logging of formdata --- FU.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FU.pm b/FU.pm index 1a826e1..106322e 100644 --- a/FU.pm +++ b/FU.pm @@ -663,7 +663,7 @@ sub log_verbose($,$msg) { 'Headers:', (map " $_: $r->{hdr}{$_}", sort keys $r->{hdr}->%*), $r->{multipart} ? ('Body (multipart):', _fmt_section join "\n", map $_->describe, $r->{multipart}->@*) : $r->{json} ? ('Body (JSON):', _fmt_section FU::Util::json_format($r->{json}, pretty => 1, canonical => 1)) : - $r->{formdata} ? ('Body (formdata):', _fmt_section FU::Util::json_format($r->{json}, pretty => 1, canonical => 1)) : + $r->{formdata} ? ('Body (formdata):', _fmt_section FU::Util::json_format($r->{formdata}, pretty => 1, canonical => 1)) : length $r->{body} ? do { my $b = substr $r->{body}, 0, 4096; my $trunc = length $r->{body} > 4096 ? ', truncated' : ''; From 383ed8409cf823a2e7966f5692fa3aa6167039a6 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 11 May 2025 10:33:48 +0200 Subject: [PATCH 17/40] bench: version updates + add small Pg benchmark --- FU/Benchmarks.pod | 180 +++++++++++++++++++++++++++------------------- bench.PL | 45 +++++++++++- 2 files changed, 149 insertions(+), 76 deletions(-) diff --git a/FU/Benchmarks.pod b/FU/Benchmarks.pod index 2479667..37415ff 100644 --- a/FU/Benchmarks.pod +++ b/FU/Benchmarks.pod @@ -26,9 +26,11 @@ The following module versions were used: =over -=item L 4.38 +=item L 4.39 -=item L 0.1 +=item L 3.18.0 + +=item L 0.5 =item L 1.08 @@ -40,6 +42,8 @@ The following module versions were used: =item L 4.03 +=item L 0.15 + =item L 1.5 =item L 0.900 @@ -56,266 +60,294 @@ These benchmarks run on large-ish arrays with repeated values. JSON encoding is sufficiently fast that Perl function calling overhead tends to dominate for smaller inputs, but I don't find that overhead very interesting. -Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the -SIMD parts are only used for parsing. +Also worth noting that L formatting code is forked from +L, the SIMD parts are only used for parsing. API object from L documentation. Encode Canonical Decode JSON::PP 5312/s 5119/s 1290/s JSON::Tiny 7757/s - 3426/s - Cpanel::JSON::XS 108187/s 101867/s 103575/s + Cpanel::JSON::XS 114802/s 104141/s 107274/s JSON::SIMD 130137/s 118948/s 115123/s JSON::XS 128421/s 120243/s 117940/s - FU::Util 133182/s 113275/s 118213/s + FU::Util 132890/s 111630/s 121124/s Object (small) Encode Canonical Decode JSON::PP 907/s 829/s 202/s JSON::Tiny 1224/s - 499/s - Cpanel::JSON::XS 43168/s 28114/s 19229/s + Cpanel::JSON::XS 45732/s 30862/s 20102/s JSON::SIMD 49019/s 30699/s 23267/s JSON::XS 49814/s 31326/s 25336/s - FU::Util 44110/s 26134/s 21144/s + FU::Util 43853/s 26568/s 20426/s Object (large) Encode Canonical Decode JSON::PP 927/s 747/s 104/s JSON::Tiny 1108/s - 392/s - Cpanel::JSON::XS 29672/s 12637/s 16609/s + Cpanel::JSON::XS 30587/s 11875/s 15515/s JSON::SIMD 24418/s 12388/s 22895/s JSON::XS 23192/s 13174/s 23553/s - FU::Util 39477/s 13567/s 17178/s + FU::Util 36455/s 11920/s 17370/s Object (large, mixed unicode) Encode Canonical Decode JSON::PP 817/s 679/s 86/s JSON::Tiny 1036/s - 402/s - Cpanel::JSON::XS 20437/s 1345/s 7408/s + Cpanel::JSON::XS 25333/s 1459/s 7480/s JSON::SIMD 25031/s 1331/s 15997/s JSON::XS 23580/s 1375/s 8526/s - FU::Util 34435/s 11916/s 9419/s + FU::Util 33085/s 12639/s 9375/s Small integers Encode Decode JSON::PP 113/s 29/s JSON::Tiny 160/s 86/s - Cpanel::JSON::XS 7137/s 6083/s + Cpanel::JSON::XS 7345/s 6151/s JSON::SIMD 7963/s 4361/s JSON::XS 7915/s 6058/s - FU::Util 8565/s 5639/s + FU::Util 7883/s 5671/s Large integers Encode Decode JSON::PP 2176/s 329/s JSON::Tiny 2999/s 1638/s - Cpanel::JSON::XS 31302/s 48892/s + Cpanel::JSON::XS 32545/s 50162/s JSON::SIMD 37201/s 51719/s JSON::XS 36722/s 50110/s - FU::Util 116188/s 62110/s + FU::Util 110210/s 61006/s ASCII strings Encode Decode JSON::PP 2934/s 336/s JSON::Tiny 4126/s 1439/s - Cpanel::JSON::XS 116744/s 43489/s + Cpanel::JSON::XS 116721/s 44560/s JSON::SIMD 134711/s 50429/s JSON::XS 135419/s 43976/s - FU::Util 182026/s 44312/s + FU::Util 164804/s 48163/s Unicode strings Encode Decode JSON::PP 5113/s 253/s JSON::Tiny 6603/s 2585/s - Cpanel::JSON::XS 91704/s 64489/s + Cpanel::JSON::XS 97039/s 67669/s JSON::SIMD 106928/s 102440/s JSON::XS 105473/s 60558/s - FU::Util 217135/s 58972/s + FU::Util 187489/s 61121/s String escaping (few) Encode Decode JSON::PP 4251/s 352/s JSON::Tiny 4704/s 1869/s - Cpanel::JSON::XS 131789/s 106306/s + Cpanel::JSON::XS 136755/s 118059/s JSON::SIMD 158171/s 153692/s JSON::XS 157261/s 97676/s - FU::Util 191699/s 91177/s + FU::Util 216443/s 96354/s String escaping (many) Encode Decode JSON::PP 2224/s 366/s JSON::Tiny 2884/s 984/s - Cpanel::JSON::XS 136583/s 100789/s + Cpanel::JSON::XS 140220/s 107040/s JSON::SIMD 152951/s 113242/s JSON::XS 153471/s 106269/s - FU::Util 142604/s 97984/s + FU::Util 153081/s 100279/s =head2 XML Writing +L is the only XS-based XML DSL that I'm aware of, so all direct +competition is inherently slower by virtue of being pure perl. I'm sure some +templating modules will perform better, though. + HTML fragment TUWF::XML 795/s XML::Writer 833/s HTML::Tiny 423/s - FU::XMLWriter 5285/s + FU::XMLWriter 5396/s + + + +=head2 PostgreSQL client + +Fetching query results is highly unlikely to be a bottleneck in your code, this +benchmark is mainly here to verify that L is not introducing a +bottleneck where there shouldn't be one. + +Fetch and bitwise-or 20k integers + + Smallint Bigint + DBD::Pg 194/s 22/s + Pg::PQ 226/s 19/s + FU::Pg (bin) 239/s 23/s + FU::Pg (text) 222/s 21/s =cut # Cached data used by bench.PL. Same as the formatted tables above but easier to parse. -json/api Canonical Cpanel::JSON::XS 101867 -json/api Canonical FU::Util 113275 +json/api Canonical Cpanel::JSON::XS 104141 +json/api Canonical FU::Util 111630 json/api Canonical JSON::PP 5119 json/api Canonical JSON::SIMD 118948 json/api Canonical JSON::XS 120243 -json/api Decode Cpanel::JSON::XS 103575 -json/api Decode FU::Util 118213 +json/api Decode Cpanel::JSON::XS 107274 +json/api Decode FU::Util 121124 json/api Decode JSON::PP 1290 json/api Decode JSON::SIMD 115123 json/api Decode JSON::Tiny 3426 json/api Decode JSON::XS 117940 -json/api Encode Cpanel::JSON::XS 108187 -json/api Encode FU::Util 133182 +json/api Encode Cpanel::JSON::XS 114802 +json/api Encode FU::Util 132890 json/api Encode JSON::PP 5312 json/api Encode JSON::SIMD 130137 json/api Encode JSON::Tiny 7757 json/api Encode JSON::XS 128421 -json/intl Decode Cpanel::JSON::XS 48892 -json/intl Decode FU::Util 62110 +json/intl Decode Cpanel::JSON::XS 50162 +json/intl Decode FU::Util 61006 json/intl Decode JSON::PP 329 json/intl Decode JSON::SIMD 51719 json/intl Decode JSON::Tiny 1638 json/intl Decode JSON::XS 50110 -json/intl Encode Cpanel::JSON::XS 31302 -json/intl Encode FU::Util 116188 +json/intl Encode Cpanel::JSON::XS 32545 +json/intl Encode FU::Util 110210 json/intl Encode JSON::PP 2176 json/intl Encode JSON::SIMD 37201 json/intl Encode JSON::Tiny 2999 json/intl Encode JSON::XS 36722 -json/ints Decode Cpanel::JSON::XS 6083 -json/ints Decode FU::Util 5639 +json/ints Decode Cpanel::JSON::XS 6151 +json/ints Decode FU::Util 5671 json/ints Decode JSON::PP 29 json/ints Decode JSON::SIMD 4361 json/ints Decode JSON::Tiny 86 json/ints Decode JSON::XS 6058 -json/ints Encode Cpanel::JSON::XS 7137 -json/ints Encode FU::Util 8565 +json/ints Encode Cpanel::JSON::XS 7345 +json/ints Encode FU::Util 7883 json/ints Encode JSON::PP 113 json/ints Encode JSON::SIMD 7963 json/ints Encode JSON::Tiny 160 json/ints Encode JSON::XS 7915 -json/objl Canonical Cpanel::JSON::XS 12637 -json/objl Canonical FU::Util 13567 +json/objl Canonical Cpanel::JSON::XS 11875 +json/objl Canonical FU::Util 11920 json/objl Canonical JSON::PP 747 json/objl Canonical JSON::SIMD 12388 json/objl Canonical JSON::XS 13174 -json/objl Decode Cpanel::JSON::XS 16609 -json/objl Decode FU::Util 17178 +json/objl Decode Cpanel::JSON::XS 15515 +json/objl Decode FU::Util 17370 json/objl Decode JSON::PP 104 json/objl Decode JSON::SIMD 22895 json/objl Decode JSON::Tiny 392 json/objl Decode JSON::XS 23553 -json/objl Encode Cpanel::JSON::XS 29672 -json/objl Encode FU::Util 39477 +json/objl Encode Cpanel::JSON::XS 30587 +json/objl Encode FU::Util 36455 json/objl Encode JSON::PP 927 json/objl Encode JSON::SIMD 24418 json/objl Encode JSON::Tiny 1108 json/objl Encode JSON::XS 23192 -json/objs Canonical Cpanel::JSON::XS 28114 -json/objs Canonical FU::Util 26134 +json/objs Canonical Cpanel::JSON::XS 30862 +json/objs Canonical FU::Util 26568 json/objs Canonical JSON::PP 829 json/objs Canonical JSON::SIMD 30699 json/objs Canonical JSON::XS 31326 -json/objs Decode Cpanel::JSON::XS 19229 -json/objs Decode FU::Util 21144 +json/objs Decode Cpanel::JSON::XS 20102 +json/objs Decode FU::Util 20426 json/objs Decode JSON::PP 202 json/objs Decode JSON::SIMD 23267 json/objs Decode JSON::Tiny 499 json/objs Decode JSON::XS 25336 -json/objs Encode Cpanel::JSON::XS 43168 -json/objs Encode FU::Util 44110 +json/objs Encode Cpanel::JSON::XS 45732 +json/objs Encode FU::Util 43853 json/objs Encode JSON::PP 907 json/objs Encode JSON::SIMD 49019 json/objs Encode JSON::Tiny 1224 json/objs Encode JSON::XS 49814 -json/obju Canonical Cpanel::JSON::XS 1345 -json/obju Canonical FU::Util 11916 +json/obju Canonical Cpanel::JSON::XS 1459 +json/obju Canonical FU::Util 12639 json/obju Canonical JSON::PP 679 json/obju Canonical JSON::SIMD 1331 json/obju Canonical JSON::XS 1375 -json/obju Decode Cpanel::JSON::XS 7408 -json/obju Decode FU::Util 9419 +json/obju Decode Cpanel::JSON::XS 7480 +json/obju Decode FU::Util 9375 json/obju Decode JSON::PP 86 json/obju Decode JSON::SIMD 15997 json/obju Decode JSON::Tiny 402 json/obju Decode JSON::XS 8526 -json/obju Encode Cpanel::JSON::XS 20437 -json/obju Encode FU::Util 34435 +json/obju Encode Cpanel::JSON::XS 25333 +json/obju Encode FU::Util 33085 json/obju Encode JSON::PP 817 json/obju Encode JSON::SIMD 25031 json/obju Encode JSON::Tiny 1036 json/obju Encode JSON::XS 23580 -json/strel Decode Cpanel::JSON::XS 100789 -json/strel Decode FU::Util 97984 +json/strel Decode Cpanel::JSON::XS 107040 +json/strel Decode FU::Util 100279 json/strel Decode JSON::PP 366 json/strel Decode JSON::SIMD 113242 json/strel Decode JSON::Tiny 984 json/strel Decode JSON::XS 106269 -json/strel Encode Cpanel::JSON::XS 136583 -json/strel Encode FU::Util 142604 +json/strel Encode Cpanel::JSON::XS 140220 +json/strel Encode FU::Util 153081 json/strel Encode JSON::PP 2224 json/strel Encode JSON::SIMD 152951 json/strel Encode JSON::Tiny 2884 json/strel Encode JSON::XS 153471 -json/stres Decode Cpanel::JSON::XS 106306 -json/stres Decode FU::Util 91177 +json/stres Decode Cpanel::JSON::XS 118059 +json/stres Decode FU::Util 96354 json/stres Decode JSON::PP 352 json/stres Decode JSON::SIMD 153692 json/stres Decode JSON::Tiny 1869 json/stres Decode JSON::XS 97676 -json/stres Encode Cpanel::JSON::XS 131789 -json/stres Encode FU::Util 191699 +json/stres Encode Cpanel::JSON::XS 136755 +json/stres Encode FU::Util 216443 json/stres Encode JSON::PP 4251 json/stres Encode JSON::SIMD 158171 json/stres Encode JSON::Tiny 4704 json/stres Encode JSON::XS 157261 -json/strs Decode Cpanel::JSON::XS 43489 -json/strs Decode FU::Util 44312 +json/strs Decode Cpanel::JSON::XS 44560 +json/strs Decode FU::Util 48163 json/strs Decode JSON::PP 336 json/strs Decode JSON::SIMD 50429 json/strs Decode JSON::Tiny 1439 json/strs Decode JSON::XS 43976 -json/strs Encode Cpanel::JSON::XS 116744 -json/strs Encode FU::Util 182026 +json/strs Encode Cpanel::JSON::XS 116721 +json/strs Encode FU::Util 164804 json/strs Encode JSON::PP 2934 json/strs Encode JSON::SIMD 134711 json/strs Encode JSON::Tiny 4126 json/strs Encode JSON::XS 135419 -json/stru Decode Cpanel::JSON::XS 64489 -json/stru Decode FU::Util 58972 +json/stru Decode Cpanel::JSON::XS 67669 +json/stru Decode FU::Util 61121 json/stru Decode JSON::PP 253 json/stru Decode JSON::SIMD 102440 json/stru Decode JSON::Tiny 2585 json/stru Decode JSON::XS 60558 -json/stru Encode Cpanel::JSON::XS 91704 -json/stru Encode FU::Util 217135 +json/stru Encode Cpanel::JSON::XS 97039 +json/stru Encode FU::Util 187489 json/stru Encode JSON::PP 5113 json/stru Encode JSON::SIMD 106928 json/stru Encode JSON::Tiny 6603 json/stru Encode JSON::XS 105473 -xml/a Rate FU::XMLWriter 5285 +pg/ints Bigint DBD::Pg 22 +pg/ints Bigint FU::Pg (bin) 23 +pg/ints Bigint FU::Pg (text) 21 +pg/ints Bigint Pg::PQ 19 +pg/ints Smallint DBD::Pg 194 +pg/ints Smallint FU::Pg (bin) 239 +pg/ints Smallint FU::Pg (text) 222 +pg/ints Smallint Pg::PQ 226 +xml/a Rate FU::XMLWriter 5396 xml/a Rate HTML::Tiny 423 xml/a Rate TUWF::XML 795 xml/a Rate XML::Writer 833 diff --git a/bench.PL b/bench.PL index fa53068..6ccd763 100755 --- a/bench.PL +++ b/bench.PL @@ -25,7 +25,10 @@ my %modules = map +($_, eval "require $_; \$${_}::VERSION"), qw/ TUWF::XML HTML::Tiny XML::Writer + DBD::Pg + Pg::PQ /; +use FU::Pg; my %data; # "id x y" => { id x y rate exists } my %oldmodules; @@ -196,6 +199,32 @@ def 'xml/a', 'HTML fragment', [ 'Rate' ], +{ + die "FU_TEST_DB not set.\n" if !$ENV{FU_TEST_DB}; + my $pq = Pg::PQ::Conn->new($ENV{FU_TEST_DB}); + my $fu = FU::Pg->connect($ENV{FU_TEST_DB}); + # XXX: Doesn't support all connection params this way + my $dbi = DBI->connect("dbi:Pg:dbname=".$pq->db, $pq->user, $pq->pass, {RaiseError => 1, PrintError => 0}); + + my $small = 'SELECT x, x+1, x+2, x+3, x+4, x+5, x+6, x+7, x+8, x+9 FROM generate_series(-10000::smallint, 9999, 10) x(x)'; + my $big = 'SELECT x<<5, x<<10, x<<15, x<<20, x<<25, x<<30, x<<35, x<<40, x<<45, x<<50 FROM generate_series(1::bigint, 20000, 1) x(x)'; + + my sub dbi { my $sum = 0; my $st = $dbi->prepare_cached($_[0]); for my $row ($dbi->selectall_arrayref($st)->@*) { $sum ^= $_ for @$row; } } + my sub pq { my $sum = 0; $pq->prepare('' => $_[0]); for my $row ($pq->execQueryPrepared('')->rows) { $sum ^= $_ for @$row; } } + my sub fub { my $sum = 0; for my $row ($fu->q($_[0])->alla->@*) { $sum ^= $_ for @$row; } } + my sub fut { my $sum = 0; for my $row ($fu->q($_[0])->text->alla->@*) { $sum ^= $_ for @$row; } } + + def 'pg/ints', 'Fetch and bitwise-or 20k integers', [ 'Smallint', 'Bigint' ], + [ 'DBD::Pg', undef, sub { dbi($small) }, sub { dbi($big) } ], + [ 'Pg::PQ', undef, sub { pq($small) }, sub { pq($big) } ], + [ 'FU::Pg (bin)', 'FU', sub { fub($small) }, sub { fub($big) } ], + [ 'FU::Pg (text)', 'FU', sub { fut($small) }, sub { fut($big) } ]; +} + + + + + delete @data{ grep !$data{$_}{exists}, keys %data }; sub fmtbench($id, $text, $xs, $ys) { @@ -276,15 +305,27 @@ These benchmarks run on large-ish arrays with repeated values. JSON encoding is sufficiently fast that Perl function calling overhead tends to dominate for smaller inputs, but I don't find that overhead very interesting. -Also worth noting that JSON::SIMD formatting code is forked from JSON::XS, the -SIMD parts are only used for parsing. +Also worth noting that L formatting code is forked from +L, the SIMD parts are only used for parsing. :benches ^json %head2 XML Writing +L is the only XS-based XML DSL that I'm aware of, so all direct +competition is inherently slower by virtue of being pure perl. I'm sure some +templating modules will perform better, though. + :benches ^xml +%head2 PostgreSQL client + +Fetching query results is highly unlikely to be a bottleneck in your code, this +benchmark is mainly here to verify that L is not introducing a +bottleneck where there shouldn't be one. + +:benches ^pg + %cut # Cached data used by bench.PL. Same as the formatted tables above but easier to parse. From 32c8fc1b898d5d505a55c1cb1dcdf1cc4822677c Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 11 May 2025 11:03:32 +0200 Subject: [PATCH 18/40] Version 1.0 + remove "experimental" notices --- ChangeLog | 19 +++++++++++++++++++ FU.pm | 15 ++++++--------- FU/Benchmarks.pod | 2 +- FU/DebugImpl.pm | 2 +- FU/Log.pm | 7 +------ FU/MultipartFormData.pm | 2 +- FU/Pg.pm | 7 +------ FU/SQL.pm | 7 +------ FU/Util.pm | 7 +------ FU/Validate.pm | 7 +------ FU/XMLWriter.pm | 7 +------ FU/XS.pm | 2 +- README.md | 4 ---- 13 files changed, 35 insertions(+), 53 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3f18b6b..29e387f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +1.0 - 2025-05-11 + - FU::Util: Fix parsing of empty sections in query_decode() + - FU::Util: Fix buffer overflow in json_format() float formatting + - FU::Util: Reject `0x1f` in utf8_decode() + - FU::Pg: Add perl<->text and bin<->text type conversion methods + - FU::Validate: Improved error messages + - FU::MultipartFormData: Various parser fixes + - FU: Include request body in verbose error logs + - FU: Add fu->log_verbose() + - FU: Extend debug_info pages with request body, response body, 'fu' + object dump, expandable query parameters and interpolated SQL queries + - FU: Improve styling of debug_info pages + - FU: Preserve headers on fu->redirect + - FU: Ignore HTTP_CONTENT_LENGTH and HTTP_CONTENT_TYPE FastCGI parameters + - FU: Suppress warning about missing files in FU::monitor_path + - FU: Reject hash character and newlines in request path + - Fix creating read-only undef/true/false in json_parse() and FU::Pg + - Benchmark updates + 0.5 - 2025-04-24 - FU::Util: Set O_CLOEXEC on fds received through `fdpass_recv()` - FU::Util: Fix interpretation of false options in `json_format()` and diff --git a/FU.pm b/FU.pm index 106322e..440a14c 100644 --- a/FU.pm +++ b/FU.pm @@ -1,4 +1,4 @@ -package FU 0.5; +package FU 1.0; use v5.36; use Carp 'confess', 'croak'; use IO::Socket; @@ -978,14 +978,6 @@ __END__ FU - A Lean and Efficient Zero-Dependency Web Framework. -=head1 EXPERIMENTAL - -This module is still in development: it's missing important functionality and -there will likely be a few breaking API changes. This framework currently -powers manned.org as a test. I'll do a stable 1.0 release once FU is used in -production for vndb.org, which will take a few months in the best case -scenario. - =head1 SYNOPSIS use v5.36; @@ -1011,6 +1003,11 @@ scenario. =head1 DESCRIPTION +FU is the backend web framework developed for L and +L, but is also perfectly suitable for other +projects. Besides a web framework, this distrubion also includes a bunch of +handy utility functions and modules. + =head2 Distribution Overview This top-level C module is a web development framework. The C diff --git a/FU/Benchmarks.pod b/FU/Benchmarks.pod index 37415ff..0762c7a 100644 --- a/FU/Benchmarks.pod +++ b/FU/Benchmarks.pod @@ -30,7 +30,7 @@ The following module versions were used: =item L 3.18.0 -=item L 0.5 +=item L 1.0 =item L 1.08 diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index 88e0bb5..ebf8c80 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -1,5 +1,5 @@ # Internal module used by FU.pm -package FU::DebugImpl 0.5; +package FU::DebugImpl 1.0; use v5.36; use utf8; use experimental 'for_list'; diff --git a/FU/Log.pm b/FU/Log.pm index e2da4a2..9606326 100644 --- a/FU/Log.pm +++ b/FU/Log.pm @@ -1,4 +1,4 @@ -package FU::Log 0.5; +package FU::Log 1.0; use v5.36; use Exporter 'import'; use POSIX 'strftime'; @@ -65,11 +65,6 @@ __END__ FU::Log - Extremely Basic Process-Wide Logging Infrastructure -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSIS use FU::Log 'log_write'; diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm index 7d9d77e..48ebb77 100644 --- a/FU/MultipartFormData.pm +++ b/FU/MultipartFormData.pm @@ -1,4 +1,4 @@ -package FU::MultipartFormData 0.5; +package FU::MultipartFormData 1.0; use v5.36; use Carp 'confess'; use FU::Util 'utf8_decode'; diff --git a/FU/Pg.pm b/FU/Pg.pm index ff88d60..4732daf 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -1,4 +1,4 @@ -package FU::Pg 0.5; +package FU::Pg 1.0; use v5.36; use FU::XS; @@ -35,11 +35,6 @@ __END__ FU::Pg - The Ultimate (synchronous) Interface to PostgreSQL -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSYS use FU::Pg; diff --git a/FU/SQL.pm b/FU/SQL.pm index 2f8566d..db8aff1 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -1,4 +1,4 @@ -package FU::SQL 0.5; +package FU::SQL 1.0; use v5.36; use Exporter 'import'; use Carp 'confess'; @@ -103,11 +103,6 @@ __END__ FU::SQL - Small and Safe SQL Query Builder -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSIS use FU::SQL; diff --git a/FU/Util.pm b/FU/Util.pm index 18db781..922747d 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -1,4 +1,4 @@ -package FU::Util 0.5; +package FU::Util 1.0; use v5.36; use FU::XS; @@ -98,11 +98,6 @@ __END__ FU::Util - Miscellaneous Utility Functions -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSIS use FU::Util qw/json_format/; diff --git a/FU/Validate.pm b/FU/Validate.pm index 2741ee6..a4544bf 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -1,4 +1,4 @@ -package FU::Validate 0.5; +package FU::Validate 1.0; use v5.36; use experimental 'builtin', 'for_list'; @@ -447,11 +447,6 @@ __END__ FU::Validate - Data and form validation and normalization -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 DESCRIPTION This module provides an easy and simple interface for data validation. It can diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index 1e9bb90..fe755f1 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -1,4 +1,4 @@ -package FU::XMLWriter 0.5; +package FU::XMLWriter 1.0; use v5.36; use Carp 'confess'; use Exporter 'import'; @@ -83,11 +83,6 @@ __END__ FU::XMLWriter - Convenient and efficient XML and HTML generator. -=head1 EXPERIMENTAL - -This module is still in development and there will likely be a few breaking API -changes, see the main L module for details. - =head1 SYNOPSIS use FU::XMLWriter ':html5_'; diff --git a/FU/XS.pm b/FU/XS.pm index 52cc757..b583e00 100644 --- a/FU/XS.pm +++ b/FU/XS.pm @@ -1,5 +1,5 @@ # This module is for internal use by other FU modules. -package FU::XS 0.5; +package FU::XS 1.0; use Carp; # may be called by XS. use XSLoader; XSLoader::load('FU'); diff --git a/README.md b/README.md index d29e00c..8f25140 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,6 @@ collection of handy utility modules. *Contributing:* Refer to my [contribution guidelines](https://dev.yorhel.nl/contributing). -## Project Status - -**EXPERIMENTAL**; expect breaking changes. - ## Build & Install ```sh From 31994a4bf6a126aadae4bf736b40bfae60e9adf8 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 12 May 2025 12:38:23 +0200 Subject: [PATCH 19/40] Doc typos --- FU.pm | 4 ++-- FU/XMLWriter.pm | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/FU.pm b/FU.pm index 440a14c..9c85edf 100644 --- a/FU.pm +++ b/FU.pm @@ -994,7 +994,7 @@ FU - A Lean and Efficient Zero-Dependency Web Framework. } FU::get qr{/hello/(.+)}, sub($who) { - my_html_ "Website title", sub { + myhtml_ "Website title", sub { h1_ "Hello, $who!"; }; }; @@ -1097,7 +1097,7 @@ returning strings deal with perl Unicode strings, not raw bytes. =item use FU -procname => $name When the C<-procname> import option is set, FU automatically updates the -process name (as displayed in L and L, see `$0`) with +process name (as displayed in L and L, see C<$0>) with information about the current process, prefixed with the given C<$name>. =item FU::init_db($info) diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index fe755f1..1b964ee 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -263,7 +263,7 @@ and C<"> are replaced with their XML entity. All of the functions mentioned in this document can be imported individually. There are also two import groups: - use FU::XMLWriter ':html_'; + use FU::XMLWriter ':html5_'; Exports C, C, C, C and all of the C<< _ >> functions mentioned above. From 81a3d3c608dd37214a94ad8381b4efe6232355e0 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 22 May 2025 09:22:05 +0200 Subject: [PATCH 20/40] SQL: Add IDENT() and quote_identifier options Turns out VNDB has a few places where request data is directly used for column names in VALUES/SET/WHERE clauses. These are already restricted to known strings through the use of FU::Validate, but an extra layer of protection seems warranted here. --- FU/SQL.pm | 46 +++++++++++++++++++++++++++++++++++++--------- t/sql.t | 10 ++++++++++ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/FU/SQL.pm b/FU/SQL.pm index db8aff1..63107b9 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -5,7 +5,7 @@ use Carp 'confess'; use experimental 'builtin', 'for_list'; our @EXPORT = qw/ - P RAW SQL + P RAW IDENT SQL PARENS INTERSPERSE COMMA AND OR WHERE SET VALUES IN @@ -16,6 +16,7 @@ sub _obj { bless [@_], 'FU::SQL::val' } sub P :prototype($) ($p) { bless \(my $x = $p), 'FU::SQL::p' } sub RAW :prototype($) ($s) { _obj "$s" } +sub IDENT :prototype($) ($s) { bless \(my $x = "$s"), 'FU::SQL::i' } # These operate on $_ and must be called with &func syntax. # The readonly check can be finicky. @@ -29,7 +30,7 @@ sub COMMA { INTERSPERSE ',', @_ } sub _conditions { @_ == 1 && ref $_[0] eq 'HASH' - ? map PARENS(RAW $_, + ? map PARENS(IDENT $_, !defined $_[0]{$_} ? ('IS NULL') : ref($_[0]{$_}) eq 'FU::SQL::in' ? ($_[0]{$_}) : ('=', $_[0]{$_}) @@ -41,11 +42,11 @@ sub AND { !@_ || (@_ == 1 && ref $_[0] eq 'HASH' && keys $_[0]->%* == 0) ? RAW ' sub OR { !@_ || (@_ == 1 && ref $_[0] eq 'HASH' && keys $_[0]->%* == 0) ? RAW '1=0' : INTERSPERSE 'OR', _conditions @_ } sub WHERE { SQL 'WHERE', AND @_ } -sub SET($h) { SQL 'SET', COMMA map SQL(RAW $_, '=', $h->{$_}), sort keys %$h } +sub SET($h) { SQL 'SET', COMMA map SQL(IDENT $_, '=', $h->{$_}), sort keys %$h } sub VALUES { @_ == 1 && ref $_[0] eq 'HASH' - ? SQL '(', COMMA(map RAW $_, sort keys $_[0]->%*), ') VALUES (', COMMA(map $_[0]{$_}, sort keys $_[0]->%*), ')' + ? SQL '(', COMMA(map IDENT $_, sort keys $_[0]->%*), ') VALUES (', COMMA(map $_[0]{$_}, sort keys $_[0]->%*), ')' : @_ == 1 && ref $_[0] eq 'ARRAY' ? SQL 'VALUES (', COMMA($_[0]->@*), ')' : SQL 'VALUES (', COMMA(@_), ')'; @@ -71,6 +72,10 @@ sub FU::SQL::p::_compile($self, $opt, $sql, $params) { $$sql .= $opt->{placeholder_style} eq 'pg' ? '$'.@$params : '?'; } +sub FU::SQL::i::_compile($self, $opt, $sql, $params) { + $$sql .= $opt->{quote_identifier} ? $opt->{quote_identifier}->($$self) : $$self; +} + sub FU::SQL::in::_compile($self, $opt, $sql, $params) { if ($opt->{in_style} eq 'pg') { $$sql .= '= ANY('; @@ -87,6 +92,7 @@ sub FU::SQL::in::_compile($self, $opt, $sql, $params) { } sub FU::SQL::val::compile($self, %opt) { + !/^(placeholder_style|in_style|quote_identifier)$/ && confess "Unknown flag: $_" for keys %opt; $opt{placeholder_style} ||= 'dbi'; $opt{in_style} ||= 'dbi'; my($sql, @params) = (''); @@ -94,7 +100,7 @@ sub FU::SQL::val::compile($self, %opt) { ($sql, \@params) } -*FU::SQL::p::compile = *FU::SQL::in::compile = \*FU::SQL::val::compile; +*FU::SQL::p::compile = *FU::SQL::i::compile = *FU::SQL::in::compile = \*FU::SQL::val::compile; 1; __END__ @@ -156,6 +162,16 @@ C<'pg'> when your SQL is going to L or L. Set the style to use for C expressions, refer to the C function below for details. +=item quote_identifier => $func + +Set a function to perform quoting of SQL identifiers. When using DBI, you can +do: + + my($sql) = $obj->compile(quote_identifier => sub { $dbh->quote_identifier(@_) }); + +If this option is not set, identifiers are included into the raw SQL string +without any escaping. + =back =back @@ -176,7 +192,7 @@ types of supported arguments: =item 1. -B are interpreted as raw SQL fragments. +I are interpreted as raw SQL fragments. =item 2. @@ -184,7 +200,7 @@ Objects returned by other functions listed below are included as SQL fragments. =item 3. -B is considered a bind parameter. +I is considered a bind parameter. =back @@ -244,6 +260,18 @@ Force the given C<$sql> string to be included as SQL. For example: Never use this function with untrusted input. +=item IDENT($string) + +Mark the given string as an SQL identifier. This function is only useful if you +use potentially untrusted input to determine which column to select or which +table to select from, for example: + + SQL 'SELECT id,', IDENT $ENV{column}, 'FROM table'; + +B By default this function is equivalent to C and hence +provides no safety whatsoever. Be sure to set the C option on +C to get more useful behavior. + =item PARENS(@args) Like C but surrounds the expression by parens: @@ -279,8 +307,8 @@ C<'1=1'> (i.e. true) if C<@conditions> is an empty list. =item AND($hashref) A special form of C that tests the given columns for equality instead. -The keys of the hashref are interpreted as raw SQL and the values as bind -parameters. +The keys of the hashref are interpreted as per C and the values as +bind parameters. AND { id => 1, number => RAW 'random()', x => undef } # '( id = ? ) AND ( number = random() ) AND ( x IS NULL )' diff --git a/t/sql.t b/t/sql.t index e6b7378..f9cee56 100644 --- a/t/sql.t +++ b/t/sql.t @@ -9,11 +9,15 @@ sub t($obj, $sql, $params, @opt) { is_deeply $gotparams, $params; } +my @q_ident = (quote_identifier => sub($x) { $x =~ s/"/_/rg }); + my $x; t P '', '?', ['']; t P '', '$1', [''], placeholder_style => 'pg'; t P undef, '?', [undef]; t RAW '', '', []; +t IDENT '"hello"', '"hello"', []; +t IDENT '"hello"', '_hello_', [], @q_ident; t SQL('select', '1'), 'select 1', []; t SQL('select', P '1'), 'select ?', [1]; t SQL('select', $x = '1'), 'select ?', [1]; @@ -41,6 +45,7 @@ t WHERE($x, '1 = 2', SQL('x = ', $x)), t WHERE({ col1 => RAW 'NOW()', col2 => 'a'}), 'WHERE ( col1 = NOW() ) AND ( col2 = ? )', ['a']; t WHERE(), 'WHERE 1=1', []; +t WHERE({ '"x' => 1 }), 'WHERE ( _x = ? )', [1], @q_ident; t WHERE(AND('true', $x), OR($y, 'y'), AND, OR), 'WHERE ( ( true ) AND ( ? ) ) AND ( ( ? ) OR ( y ) ) AND ( 1=1 ) AND ( 1=0 )', [$x, $y]; @@ -52,9 +57,11 @@ t SQL(SELECT => COMMA(qw/a b c/), FROM => 'table', WHERE { x => 1, a => undef }) t SET({ a => 1, c => RAW 'NOW()', d => undef }), 'SET a = ? , c = NOW() , d = ?', [1, undef]; +t SET({ '"x' => 1 }), 'SET _x = ?', [1], @q_ident; t VALUES({ a => 1, c => RAW 'NOW()', d => undef }), '( a , c , d ) VALUES ( ? , NOW() , ? )', [1, undef]; +t VALUES({ '"x' => 1 }), '( _x ) VALUES ( ? )', [1], @q_ident; t VALUES(1, $x, 'NOW()', RAW 'NOW()'), 'VALUES ( ? , ? , NOW() , NOW() )', [1, $x]; t VALUES([1, $x, 'NOW()', RAW 'NOW()']), 'VALUES ( ? , ? , ? , NOW() )', [1, $x, 'NOW()']; @@ -86,4 +93,7 @@ Hash::Util::lock_keys(%hash); Hash::Util::lock_value(%hash, 'v'); t SQL($hash{v}), 'value', []; +ok !eval { SQL('')->compile(oops => 1); 1 }; +like $@, qr/Unknown flag: oops/; + done_testing; From 2083ab2a6f3793ce560996dc2c8120c74c61a78b Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 22 May 2025 09:53:41 +0200 Subject: [PATCH 21/40] Pg: Set appropriate quote_identifier for $conn->Q() --- FU.xs | 12 ++++++++++++ FU/Pg.pm | 9 +++++++-- t/pgtypes-dynamic.t | 8 ++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/FU.xs b/FU.xs index 1c342be..7a387f9 100644 --- a/FU.xs +++ b/FU.xs @@ -217,6 +217,12 @@ void query_trace(fupg_conn *c, SV *cb) SvGETMAGIC(cb); c->trace = SvOK(cb) ? SvREFCNT_inc(cb) : NULL; +void conn(fupg_conn *c) + CODE: + ST(0) = sv_newmortal(); + sv_setrv_inc(ST(0), c->self); + sv_bless(ST(0), gv_stashpv("FU::Pg::conn", 0)); + void status(fupg_conn *c) CODE: ST(0) = sv_2mortal(newSVpv(fupg_conn_status(c), 0)); @@ -317,6 +323,12 @@ void cache(fupg_txn *x, ...) CODE: FUPG_STFLAGS; +void conn(fupg_txn *t) + CODE: + ST(0) = sv_newmortal(); + sv_setrv_inc(ST(0), t->conn->self); + sv_bless(ST(0), gv_stashpv("FU::Pg::conn", 0)); + void status(fupg_txn *t) CODE: ST(0) = sv_2mortal(newSVpv(fupg_txn_status(t), 0)); diff --git a/FU/Pg.pm b/FU/Pg.pm index 4732daf..2e7baf8 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -10,7 +10,11 @@ package FU::Pg::conn { sub Q { require FU::SQL; my $s = shift; - my($sql, $params) = FU::SQL::SQL(@_)->compile(placeholder_style => 'pg', in_style => 'pg'); + my($sql, $params) = FU::SQL::SQL(@_)->compile( + placeholder_style => 'pg', + in_style => 'pg', + quote_identifier => sub { $s->conn->escape_identifier(@_) }, + ); $s->q($sql, @$params); } @@ -208,7 +212,8 @@ used. =item $conn->Q(@args) Same as C<< $conn->q() >> but uses L to construct the query and bind -parameters. +parameters. Uses the 'pg' C and C<< $conn->escape_identifier() >> for +identifier quoting. =back diff --git a/t/pgtypes-dynamic.t b/t/pgtypes-dynamic.t index 2751a86..79abd92 100644 --- a/t/pgtypes-dynamic.t +++ b/t/pgtypes-dynamic.t @@ -127,6 +127,14 @@ subtest 'custom types', sub { }; +subtest 'identifier quoting', sub { + my $txn = $conn->txn; + $txn->exec('CREATE TEMPORARY TABLE fupg_test_tbl ("desc" int, ok int, "hello world" int)'); + ok $txn->Q('INSERT INTO fupg_test_tbl', VALUES {desc => 5, ok => 10, 'hello world', 15})->exec; + is $txn->Q('SELECT', IDENT 'hello world', 'FROM fupg_test_tbl')->val, 15; +}; + + subtest 'vndbid', sub { plan skip_all => 'type not loaded in the database' if !$conn->q("SELECT 1 FROM pg_type WHERE typname = 'vndbtag'")->val; From fd8332601b56e661c7c656cdfcd8d11fd65f3cc9 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 22 May 2025 09:54:08 +0200 Subject: [PATCH 22/40] t/pgconnect: Fix ref leak in test Apparently 'my sub' captured the $conn variable and held a ref on it even beyond the parent sub scope. 'my $x = sub {}' doesn't do that. Getting the ref counts right is important here for the last test to work. (Found while I was inspecting the refcount effects of the new ->conn() methods with Devel::Peek) --- t/pgconnect.t | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/t/pgconnect.t b/t/pgconnect.t index 8536574..cec597d 100644 --- a/t/pgconnect.t +++ b/t/pgconnect.t @@ -370,18 +370,18 @@ subtest 'Prepared statement cache', sub { $conn->cache_size(2); my $txn = $conn->txn; $txn->cache; - my sub numexec($sql) { + my $numexec = sub($sql) { $txn->q('SELECT generic_plans + custom_plans FROM pg_prepared_statements WHERE statement = $1', $sql)->cache(0)->val - } + }; is $txn->q('SELECT 1')->val, 1; - is numexec('SELECT 1'), 1; + is $numexec->('SELECT 1'), 1; my $sql = 'SELECT $1::int as a, $2::text as b'; - ok !defined numexec($sql); + ok !defined $numexec->($sql); my $params = $txn->q($sql)->param_types; is_deeply $params, [23, 25]; - is numexec($sql), 0; + is $numexec->($sql), 0; my $cparams = $txn->q($sql)->param_types; is_deeply $cparams, $params; @@ -391,23 +391,23 @@ subtest 'Prepared statement cache', sub { is_deeply $ccols, $cols; $txn->q($sql, 0, '')->exec; - is numexec($sql), 1; + is $numexec->($sql), 1; $txn->q($sql, 0, '')->exec; - is numexec($sql), 2; + is $numexec->($sql), 2; - is numexec('SELECT 1'), 1; + is $numexec->('SELECT 1'), 1; $txn->q('SELECT 2')->exec; - ok !defined numexec('SELECT 1'); - is numexec('SELECT 2'), 1; + ok !defined $numexec->('SELECT 1'); + is $numexec->('SELECT 2'), 1; $conn->cache_size(1); - ok !defined numexec('SELECT 1'); - ok !defined numexec($sql); - is numexec('SELECT 2'), 1; + ok !defined $numexec->('SELECT 1'); + ok !defined $numexec->($sql); + is $numexec->('SELECT 2'), 1; $conn->cache_size(0); - ok !defined numexec($sql); - ok !defined numexec('SELECT 2'); + ok !defined $numexec->($sql); + ok !defined $numexec->('SELECT 2'); }; From f8cd8a6d8cbc687e452071b98f1457f546a55c08 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 27 May 2025 09:30:46 +0200 Subject: [PATCH 23/40] FU: Simplify --monitor file change detection This changes the way that file changes are detected. The upside is that it now correctly detects changes that happened after the code has loaded but before the first request came in, the downside is that it now gets stuck on reloading when a file has a future mtime. --- FU.pm | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/FU.pm b/FU.pm index 9c85edf..6da28cb 100644 --- a/FU.pm +++ b/FU.pm @@ -217,17 +217,12 @@ sub monitor_path { push @monitor_paths, @_ } sub monitor_check :prototype(&) { $monitor_check = $_[0] } sub _monitor { - state %data; return 1 if $monitor_check && $monitor_check->(); require File::Find; eval { File::Find::find({ - wanted => sub { - my $m = (stat)[9]; - $data{$_} //= $m; - die if $m > $data{$_}; - }, + wanted => sub { die if (-M) < 0 }, no_chdir => 1 }, grep -e, $scriptpath, values %INC, @monitor_paths); 0 From a43dc70ff92b2baca45bb316b3bbce571054191d Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 2 Jun 2025 09:00:04 +0200 Subject: [PATCH 24/40] XMLWriter: Throw error when stringifying a bare reference I can't think of a use case where Perl's default ref stringification is something you actually want when writing XML/HTML - this pretty much always points to a bug. One that I seem to be prone to making... --- c/xmlwr.c | 4 +++- t/xmlwr.t | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/c/xmlwr.c b/c/xmlwr.c index f81d94c..2d31fec 100644 --- a/c/xmlwr.c +++ b/c/xmlwr.c @@ -27,6 +27,8 @@ static void fuxmlwr_destroy(pTHX_ fuxmlwr *wr) { static void fuxmlwr_escape(pTHX_ fuxmlwr *wr, SV *sv) { + if (SvROK(sv) && !SvAMAGIC(sv)) fu_confess("Invalid attempt to output bare reference"); + STRLEN len; const unsigned char *str = (unsigned char *)SvPV_const(sv, len); const unsigned char *tmp, *end = str + len; @@ -96,7 +98,7 @@ static void fuxmlwr_tag(pTHX_ fuxmlwr *wr, I32 ax, I32 offset, I32 argc, int sel val = ST(offset); offset++; - // Don't even try to stringify other arguments; non-string keys are always a bug. + // Don't even try to stringify attribute names; non-string keys are always a bug. if (!SvPOK(key)) fu_confess("Non-string attribute"); keys = SvPVX(key); diff --git a/t/xmlwr.t b/t/xmlwr.t index e8b2d95..becb96c 100644 --- a/t/xmlwr.t +++ b/t/xmlwr.t @@ -65,4 +65,21 @@ sub t { is fragment { t 'arg' }, '
ab" < c &< d🥳
'; +ok !eval { fragment { tag_ 'hi', \1 } }; +like $@, qr/Invalid attempt to output bare reference/; + +ok !eval { fragment { tag_ 'hi', {} } }; +like $@, qr/Invalid attempt to output bare reference/; + +is fragment { tag_ 'hi', bless {}, 'XTEST1' }, 'string'; +like fragment { tag_ 'hi', bless {}, 'XTEST2' }, qr{HASH\(.*\)}; # Yeah, whatever. +like fragment { tag_ 'hi', ''.{} }, qr{HASH\(.*\)}; + done_testing; + + +package XTEST1; +use overload '""' => sub { 'string' }; + +package XTEST2; +use overload '""' => sub { {} }; From 55baa6c9a616e9a3a9223cc07826dc7c23ec6825 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 4 Jun 2025 18:48:06 +0200 Subject: [PATCH 25/40] json_parse(): Disallow control characters in strings by default Deviating from the standard, but more consistent other FU functions. --- FU/Util.pm | 16 +++++++++++----- c/jsonparse.c | 10 ++++++++-- t/json_parse.t | 12 +++++++++--- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/FU/Util.pm b/FU/Util.pm index 922747d..4b06f33 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -137,7 +137,7 @@ value for C<$val>, due to C<\0> and C<\1> being considered booleans. =head1 JSON Parsing & Formatting This module comes with a custom C-based JSON parser and formatter. These -functions conform strictly to L, +functions conform to L, non-standard extensions are not supported and never will be. It also happens to be pretty fast, refer to L for some numbers. @@ -171,6 +171,13 @@ Supported C<%options>: =over +=item allow_control + +Boolean, set to true to allow (encoded) ASCII control characters in JSON +strings, such as C<\u0000>, C<\b>, C<\u007f>, etc. These characters are +permitted per RFC-8259, but disallowed by this parser by default. See +C below. + =item utf8 Boolean, interpret the input C<$string> as a UTF-8 encoded byte string instead @@ -251,10 +258,9 @@ value. There is no way to do that without violating JSON specs, so you should use entity escaping instead. Some JSON modules escape the forward slash (C) character instead, but that -is, at best, B sufficient for embedding inside a C<<