Add query string encoding & decoding functions

This commit is contained in:
Yorhel 2025-02-18 14:08:05 +01:00
parent 90cfd66069
commit 67e6d99f01
5 changed files with 120 additions and 15 deletions

View file

@ -4,10 +4,12 @@ use v5.36;
use FU::XS;
use Carp 'confess';
use Exporter 'import';
use experimental 'builtin';
our @EXPORT_OK = qw/
json_format json_parse
utf8_decode uri_escape uri_unescape
query_decode query_encode
fdpass_send fdpass_recv
/;
@ -18,17 +20,44 @@ sub utf8_decode :prototype($) {
$_[0]
}
sub uri_escape :prototype($) {
utf8::encode(local $_ = shift);
s/([^A-Za-z0-9._~-])/sprintf '%%%02X', ord $1/eg;
$_;
sub uri_escape :prototype($) ($s) {
utf8::encode($s);
$s =~ s/([^A-Za-z0-9._~-])/sprintf '%%%02x', ord $1/eg;
$s;
}
sub uri_unescape :prototype($) {
return if !defined $_[0];
utf8::encode(local $_ = shift);
s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
utf8_decode $_;
sub uri_unescape :prototype($) ($s) {
return if !defined $s;
utf8::encode($s);
$s =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
utf8_decode $s;
}
sub query_decode :prototype($) ($s) {
my %o;
for (split /&/, $s//'') {
my($k,$v) = map uri_unescape($_), split /=/;
$v //= builtin::true;
if (ref $o{$k}) { push $o{$k}->@*, $v }
elsif (exists $o{$k}) { $o{$k} = [ $o{$k}, $v ] }
else { $o{$k} = $v }
}
\%o
}
sub query_encode :prototype($) ($o) {
return join '&', map {
my($k, $v) = ($_, $o->{$_});
$k = uri_escape $k;
map {
my $a = $_;
$a = $a->TO_QUERY() if builtin::blessed($a) && $a->can('TO_QUERY');
!defined $a || (builtin::is_bool($a) && !$a)
? ()
: builtin::is_bool($a) ? $k
: $k.'='.uri_escape($a)
} ref $v eq 'ARRAY' ? @$v : ($v);
} sort keys %$o;
}
1;
@ -177,7 +206,11 @@ L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives.
L<JSON::SIMD> and L<Mojo::JSON> also look like good and maintained candidates.)
=head2 String Utility Functions
=head2 URI-Related Functions
While URIs are capable of encoding arbitrary binary data, the functions below
assume you're only dealing with text. This makes them more robust against weird
inputs, at the cost of flexibility.
=over
@ -204,6 +237,34 @@ Takes an Unicode string potentially containing percent-encoding and returns a
decoded Unicode string. Also checks for ASCII control characters as per
C<utf8_decode()>.
=item query_decode($string)
Decode a query string or C<application/x-www-form-urlencoded> format (they're
the same thing). Returns a hashref with decoded key/value pairs. Values for
duplicated keys are collected into a single array value. Bare keys that do not
have a value are decoded as C<builtin::true>. Example:
my $hash = query_decode 'bare&a=1&a=2&something=else';
# $hash = {
# bare => builtin::true,
# a => [ 1, 2 ],
# something => 'else'
# }
The input C<$string> is assumed to be a perl Unicode string. An error is thrown
if the resulting data decodes into invalid UTF-8 or contains control
characters, as per C<utf8_decode>.
=item query_encode($hashref)
The opposite of C<query_decode>. Takes a hashref of similar structure and
returns an ASCII-encoded query string. Keys with C<undef> or C<builtin::false>
values are omitted in the output.
If a given value is a blessed object with a C<TO_QUERY()> method, that method
is called and it should return either C<undef>, a boolean or a string, which is
then encoded.
=back