Add query string encoding & decoding functions

This commit is contained in:
Yorhel 2025-02-18 14:08:05 +01:00
parent 90cfd66069
commit 67e6d99f01
5 changed files with 120 additions and 15 deletions

22
FU.pm
View file

@ -526,7 +526,25 @@ sub method { $FU::REQ->{method} }
sub header($, $h) { $FU::REQ->{hdr}{ lc $h } }
sub headers { $FU::REQ->{hdr} }
sub ip { $FU::REQ->{ip} }
sub query { $FU::REQ->{qs} } # TODO: parse & validate
sub query {
return $FU::REQ->{qs} if @_ == 1;
$FU::REQ->{qs_parsed} ||= eval { FU::Util::query_decode($FU::REQ->{qs}) } || fu->error(400, $@);
# TODO: Also accept schema validation thing.
$FU::REQ->{qs_parsed}{$_[1]};
}
sub formdata {
$FU::REQ->{formdata} ||= eval {
# TODO: Support multipart encoding
confess "Invalid content type for form data"
if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded';
FU::Util::query_decode($FU::REQ->{data});
} || fu->error(400, $@);
# TODO: Accept schema validation thing.
$FU::REQ->{formdata}{$_[1]};
}
@ -664,7 +682,7 @@ can be used independently of the framework:
=over
=item * L<FU::Util> - JSON parsing & formatting.
=item * L<FU::Util> - JSON parsing & formatting, URI encoding, etc.
=item * L<FU::Pg> - PostgreSQL client.

View file

@ -4,10 +4,12 @@ use v5.36;
use FU::XS;
use Carp 'confess';
use Exporter 'import';
use experimental 'builtin';
our @EXPORT_OK = qw/
json_format json_parse
utf8_decode uri_escape uri_unescape
query_decode query_encode
fdpass_send fdpass_recv
/;
@ -18,17 +20,44 @@ sub utf8_decode :prototype($) {
$_[0]
}
sub uri_escape :prototype($) {
utf8::encode(local $_ = shift);
s/([^A-Za-z0-9._~-])/sprintf '%%%02X', ord $1/eg;
$_;
sub uri_escape :prototype($) ($s) {
utf8::encode($s);
$s =~ s/([^A-Za-z0-9._~-])/sprintf '%%%02x', ord $1/eg;
$s;
}
sub uri_unescape :prototype($) {
return if !defined $_[0];
utf8::encode(local $_ = shift);
s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
utf8_decode $_;
sub uri_unescape :prototype($) ($s) {
return if !defined $s;
utf8::encode($s);
$s =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
utf8_decode $s;
}
sub query_decode :prototype($) ($s) {
my %o;
for (split /&/, $s//'') {
my($k,$v) = map uri_unescape($_), split /=/;
$v //= builtin::true;
if (ref $o{$k}) { push $o{$k}->@*, $v }
elsif (exists $o{$k}) { $o{$k} = [ $o{$k}, $v ] }
else { $o{$k} = $v }
}
\%o
}
sub query_encode :prototype($) ($o) {
return join '&', map {
my($k, $v) = ($_, $o->{$_});
$k = uri_escape $k;
map {
my $a = $_;
$a = $a->TO_QUERY() if builtin::blessed($a) && $a->can('TO_QUERY');
!defined $a || (builtin::is_bool($a) && !$a)
? ()
: builtin::is_bool($a) ? $k
: $k.'='.uri_escape($a)
} ref $v eq 'ARRAY' ? @$v : ($v);
} sort keys %$o;
}
1;
@ -177,7 +206,11 @@ L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives.
L<JSON::SIMD> and L<Mojo::JSON> also look like good and maintained candidates.)
=head2 String Utility Functions
=head2 URI-Related Functions
While URIs are capable of encoding arbitrary binary data, the functions below
assume you're only dealing with text. This makes them more robust against weird
inputs, at the cost of flexibility.
=over
@ -204,6 +237,34 @@ Takes an Unicode string potentially containing percent-encoding and returns a
decoded Unicode string. Also checks for ASCII control characters as per
C<utf8_decode()>.
=item query_decode($string)
Decode a query string or C<application/x-www-form-urlencoded> format (they're
the same thing). Returns a hashref with decoded key/value pairs. Values for
duplicated keys are collected into a single array value. Bare keys that do not
have a value are decoded as C<builtin::true>. Example:
my $hash = query_decode 'bare&a=1&a=2&something=else';
# $hash = {
# bare => builtin::true,
# a => [ 1, 2 ],
# something => 'else'
# }
The input C<$string> is assumed to be a perl Unicode string. An error is thrown
if the resulting data decodes into invalid UTF-8 or contains control
characters, as per C<utf8_decode>.
=item query_encode($hashref)
The opposite of C<query_decode>. Takes a hashref of similar structure and
returns an ASCII-encoded query string. Keys with C<undef> or C<builtin::false>
values are omitted in the output.
If a given value is a blessed object with a C<TO_QUERY()> method, that method
is called and it should return either C<undef>, a boolean or a string, which is
then encoded.
=back

View file

@ -2,7 +2,7 @@
WIP.
*Contributing:* Refer to my [contribution guidelines)[https://dev.yorhel.nl/contributing].
*Contributing:* Refer to my [contribution guidelines](https://dev.yorhel.nl/contributing).
## Build & Install
@ -19,7 +19,6 @@ Things that may or may not happen:
- FU - The website framework, taking inspiration from TUWF.
- FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy.
- FU::Log - Basic logger.
- FU::Util additions: `VNDB::Util::query_encode`?
- FU::Validate - TUWF::Validate & normalization with some improvements.
- FU::XML - TUWF::XMLXS with some improvements.
- FU::Mailer - Simple sendmail wrapper

View file

@ -13,7 +13,7 @@ sub start {
sub record($id, $type, $data, $pad=undef) {
$pad //= rand > 0.5 ? int rand(50) : 0;
my $msg = pack('CCnnCC', 1, $type, $id, length($data), $pad, 0) . $data . ("\0"x$pad);
is $remote->syswrite($msg, length($msg)), length($msg);
die "Short write" if $remote->syswrite($msg, length($msg)) != length($msg);
}
sub begin($id=1, $role=1, $keep=0) {

27
t/query.t Normal file
View file

@ -0,0 +1,27 @@
use v5.36;
use Test::More;
use FU::Util qw/query_decode query_encode/;
use experimental 'builtin';
is_deeply
query_decode('a&a&%c3%be=%26%3d%c3%be&a=3'),
{ a => [ builtin::true, builtin::true, 3 ], "\xfe" => "&=\xfe" };
ok !eval { query_decode('%10'); 1 };
like $@, qr/Invalid control character/;
is query_encode
{ a => builtin::true, b => undef, c => builtin::false, d => 'string', e => "&=\xfe" },
'a&d=string&e=%26%3d%c3%be';
is query_encode
{ "\xfe" => [ 1, undef, 3, builtin::false, builtin::true ] },
"%c3%be=1&%c3%be=3&%c3%be";
sub FUTILTEST::TO_QUERY { '&'.($_[0][0] + 1) }
is query_encode
{ -ab => bless [2], 'FUTILTEST' },
'-ab=%263';
done_testing;