Add query string encoding & decoding functions

This commit is contained in:
Yorhel 2025-02-18 14:08:05 +01:00
parent 90cfd66069
commit 67e6d99f01
5 changed files with 120 additions and 15 deletions

22
FU.pm
View file

@ -526,7 +526,25 @@ sub method { $FU::REQ->{method} }
sub header($, $h) { $FU::REQ->{hdr}{ lc $h } } sub header($, $h) { $FU::REQ->{hdr}{ lc $h } }
sub headers { $FU::REQ->{hdr} } sub headers { $FU::REQ->{hdr} }
sub ip { $FU::REQ->{ip} } sub ip { $FU::REQ->{ip} }
sub query { $FU::REQ->{qs} } # TODO: parse & validate
sub query {
return $FU::REQ->{qs} if @_ == 1;
$FU::REQ->{qs_parsed} ||= eval { FU::Util::query_decode($FU::REQ->{qs}) } || fu->error(400, $@);
# TODO: Also accept schema validation thing.
$FU::REQ->{qs_parsed}{$_[1]};
}
sub formdata {
$FU::REQ->{formdata} ||= eval {
# TODO: Support multipart encoding
confess "Invalid content type for form data"
if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded';
FU::Util::query_decode($FU::REQ->{data});
} || fu->error(400, $@);
# TODO: Accept schema validation thing.
$FU::REQ->{formdata}{$_[1]};
}
@ -664,7 +682,7 @@ can be used independently of the framework:
=over =over
=item * L<FU::Util> - JSON parsing & formatting. =item * L<FU::Util> - JSON parsing & formatting, URI encoding, etc.
=item * L<FU::Pg> - PostgreSQL client. =item * L<FU::Pg> - PostgreSQL client.

View file

@ -4,10 +4,12 @@ use v5.36;
use FU::XS; use FU::XS;
use Carp 'confess'; use Carp 'confess';
use Exporter 'import'; use Exporter 'import';
use experimental 'builtin';
our @EXPORT_OK = qw/ our @EXPORT_OK = qw/
json_format json_parse json_format json_parse
utf8_decode uri_escape uri_unescape utf8_decode uri_escape uri_unescape
query_decode query_encode
fdpass_send fdpass_recv fdpass_send fdpass_recv
/; /;
@ -18,17 +20,44 @@ sub utf8_decode :prototype($) {
$_[0] $_[0]
} }
sub uri_escape :prototype($) { sub uri_escape :prototype($) ($s) {
utf8::encode(local $_ = shift); utf8::encode($s);
s/([^A-Za-z0-9._~-])/sprintf '%%%02X', ord $1/eg; $s =~ s/([^A-Za-z0-9._~-])/sprintf '%%%02x', ord $1/eg;
$_; $s;
} }
sub uri_unescape :prototype($) { sub uri_unescape :prototype($) ($s) {
return if !defined $_[0]; return if !defined $s;
utf8::encode(local $_ = shift); utf8::encode($s);
s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; $s =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
utf8_decode $_; utf8_decode $s;
}
sub query_decode :prototype($) ($s) {
my %o;
for (split /&/, $s//'') {
my($k,$v) = map uri_unescape($_), split /=/;
$v //= builtin::true;
if (ref $o{$k}) { push $o{$k}->@*, $v }
elsif (exists $o{$k}) { $o{$k} = [ $o{$k}, $v ] }
else { $o{$k} = $v }
}
\%o
}
sub query_encode :prototype($) ($o) {
return join '&', map {
my($k, $v) = ($_, $o->{$_});
$k = uri_escape $k;
map {
my $a = $_;
$a = $a->TO_QUERY() if builtin::blessed($a) && $a->can('TO_QUERY');
!defined $a || (builtin::is_bool($a) && !$a)
? ()
: builtin::is_bool($a) ? $k
: $k.'='.uri_escape($a)
} ref $v eq 'ARRAY' ? @$v : ($v);
} sort keys %$o;
} }
1; 1;
@ -177,7 +206,11 @@ L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives.
L<JSON::SIMD> and L<Mojo::JSON> also look like good and maintained candidates.) L<JSON::SIMD> and L<Mojo::JSON> also look like good and maintained candidates.)
=head2 String Utility Functions =head2 URI-Related Functions
While URIs are capable of encoding arbitrary binary data, the functions below
assume you're only dealing with text. This makes them more robust against weird
inputs, at the cost of flexibility.
=over =over
@ -204,6 +237,34 @@ Takes an Unicode string potentially containing percent-encoding and returns a
decoded Unicode string. Also checks for ASCII control characters as per decoded Unicode string. Also checks for ASCII control characters as per
C<utf8_decode()>. C<utf8_decode()>.
=item query_decode($string)
Decode a query string or C<application/x-www-form-urlencoded> format (they're
the same thing). Returns a hashref with decoded key/value pairs. Values for
duplicated keys are collected into a single array value. Bare keys that do not
have a value are decoded as C<builtin::true>. Example:
my $hash = query_decode 'bare&a=1&a=2&something=else';
# $hash = {
# bare => builtin::true,
# a => [ 1, 2 ],
# something => 'else'
# }
The input C<$string> is assumed to be a perl Unicode string. An error is thrown
if the resulting data decodes into invalid UTF-8 or contains control
characters, as per C<utf8_decode>.
=item query_encode($hashref)
The opposite of C<query_decode>. Takes a hashref of similar structure and
returns an ASCII-encoded query string. Keys with C<undef> or C<builtin::false>
values are omitted in the output.
If a given value is a blessed object with a C<TO_QUERY()> method, that method
is called and it should return either C<undef>, a boolean or a string, which is
then encoded.
=back =back

View file

@ -2,7 +2,7 @@
WIP. WIP.
*Contributing:* Refer to my [contribution guidelines)[https://dev.yorhel.nl/contributing]. *Contributing:* Refer to my [contribution guidelines](https://dev.yorhel.nl/contributing).
## Build & Install ## Build & Install
@ -19,7 +19,6 @@ Things that may or may not happen:
- FU - The website framework, taking inspiration from TUWF. - FU - The website framework, taking inspiration from TUWF.
- FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy. - FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy.
- FU::Log - Basic logger. - FU::Log - Basic logger.
- FU::Util additions: `VNDB::Util::query_encode`?
- FU::Validate - TUWF::Validate & normalization with some improvements. - FU::Validate - TUWF::Validate & normalization with some improvements.
- FU::XML - TUWF::XMLXS with some improvements. - FU::XML - TUWF::XMLXS with some improvements.
- FU::Mailer - Simple sendmail wrapper - FU::Mailer - Simple sendmail wrapper

View file

@ -13,7 +13,7 @@ sub start {
sub record($id, $type, $data, $pad=undef) { sub record($id, $type, $data, $pad=undef) {
$pad //= rand > 0.5 ? int rand(50) : 0; $pad //= rand > 0.5 ? int rand(50) : 0;
my $msg = pack('CCnnCC', 1, $type, $id, length($data), $pad, 0) . $data . ("\0"x$pad); my $msg = pack('CCnnCC', 1, $type, $id, length($data), $pad, 0) . $data . ("\0"x$pad);
is $remote->syswrite($msg, length($msg)), length($msg); die "Short write" if $remote->syswrite($msg, length($msg)) != length($msg);
} }
sub begin($id=1, $role=1, $keep=0) { sub begin($id=1, $role=1, $keep=0) {

27
t/query.t Normal file
View file

@ -0,0 +1,27 @@
use v5.36;
use Test::More;
use FU::Util qw/query_decode query_encode/;
use experimental 'builtin';
is_deeply
query_decode('a&a&%c3%be=%26%3d%c3%be&a=3'),
{ a => [ builtin::true, builtin::true, 3 ], "\xfe" => "&=\xfe" };
ok !eval { query_decode('%10'); 1 };
like $@, qr/Invalid control character/;
is query_encode
{ a => builtin::true, b => undef, c => builtin::false, d => 'string', e => "&=\xfe" },
'a&d=string&e=%26%3d%c3%be';
is query_encode
{ "\xfe" => [ 1, undef, 3, builtin::false, builtin::true ] },
"%c3%be=1&%c3%be=3&%c3%be";
sub FUTILTEST::TO_QUERY { '&'.($_[0][0] + 1) }
is query_encode
{ -ab => bless [2], 'FUTILTEST' },
'-ab=%263';
done_testing;