More strict UTF-8 validation on input

This commit is contained in:
Yorhel 2025-08-22 09:21:06 +02:00
parent 5a863c20c2
commit 2e9a40da69
3 changed files with 9 additions and 2 deletions

View file

@ -4,6 +4,7 @@ use v5.36;
use FU::XS;
use Carp 'confess';
use Exporter 'import';
use Encode ();
use POSIX ();
use experimental 'builtin';
@ -19,7 +20,10 @@ our @EXPORT_OK = qw/
sub utf8_decode :prototype($) {
return if !defined $_[0];
confess 'Invalid UTF-8' if !utf8::decode($_[0]);
eval {
$_[0] = Encode::decode('UTF-8', $_[0], Encode::FB_CROAK);
1
} || confess($@ =~ s/ at .+\n$//r);
confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]/;
$_[0]
}