More strict UTF-8 validation on input

This commit is contained in:
Yorhel 2025-08-22 09:21:06 +02:00
parent 5a863c20c2
commit 2e9a40da69
3 changed files with 9 additions and 2 deletions

2
FU.pm
View file

@ -1267,7 +1267,7 @@ handler being run. Any other exception is passed to the C<500> error handler.
While the C<FU::> namespace is used for global configuration and utility While the C<FU::> namespace is used for global configuration and utility
functions, the C<fu> object is intended for methods that deal with request functions, the C<fu> object is intended for methods that deal with request
processing (although some are useful used outside of request handlers as well). processing (although some are useful outside of request handlers as well).
The C<fu> object itself can be used to store request-local data. For example, The C<fu> object itself can be used to store request-local data. For example,
the following is a valid approach to handle user authentication: the following is a valid approach to handle user authentication:

View file

@ -4,6 +4,7 @@ use v5.36;
use FU::XS; use FU::XS;
use Carp 'confess'; use Carp 'confess';
use Exporter 'import'; use Exporter 'import';
use Encode ();
use POSIX (); use POSIX ();
use experimental 'builtin'; use experimental 'builtin';
@ -19,7 +20,10 @@ our @EXPORT_OK = qw/
sub utf8_decode :prototype($) { sub utf8_decode :prototype($) {
return if !defined $_[0]; return if !defined $_[0];
confess 'Invalid UTF-8' if !utf8::decode($_[0]); eval {
$_[0] = Encode::decode('UTF-8', $_[0], Encode::FB_CROAK);
1
} || confess($@ =~ s/ at .+\n$//r);
confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/; confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/;
$_[0] $_[0]
} }

View file

@ -10,6 +10,9 @@ is_deeply
ok !eval { query_decode('%10'); 1 }; ok !eval { query_decode('%10'); 1 };
like $@, qr/Invalid control character/; like $@, qr/Invalid control character/;
ok !eval { query_decode('a=%fe%83%bf%bf%bf%bf%bf%0a'); 1 };
like $@, qr/does not map to Unicode/;
is_deeply query_decode('&&&a=b'), { a => 'b' }; is_deeply query_decode('&&&a=b'), { a => 'b' };
is query_encode is query_encode