From 67e6d99f018b2fe911eafb224615518128797a8a Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 18 Feb 2025 14:08:05 +0100 Subject: [PATCH] Add query string encoding & decoding functions --- FU.pm | 22 +++++++++++++-- FU/Util.pm | 81 +++++++++++++++++++++++++++++++++++++++++++++++------- README.md | 3 +- t/fcgi.t | 2 +- t/query.t | 27 ++++++++++++++++++ 5 files changed, 120 insertions(+), 15 deletions(-) create mode 100644 t/query.t diff --git a/FU.pm b/FU.pm index f2e0dc8..92db2e0 100644 --- a/FU.pm +++ b/FU.pm @@ -526,7 +526,25 @@ sub method { $FU::REQ->{method} } sub header($, $h) { $FU::REQ->{hdr}{ lc $h } } sub headers { $FU::REQ->{hdr} } sub ip { $FU::REQ->{ip} } -sub query { $FU::REQ->{qs} } # TODO: parse & validate + +sub query { + return $FU::REQ->{qs} if @_ == 1; + $FU::REQ->{qs_parsed} ||= eval { FU::Util::query_decode($FU::REQ->{qs}) } || fu->error(400, $@); + # TODO: Also accept schema validation thing. + $FU::REQ->{qs_parsed}{$_[1]}; +} + +sub formdata { + $FU::REQ->{formdata} ||= eval { + # TODO: Support multipart encoding + confess "Invalid content type for form data" + if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded'; + FU::Util::query_decode($FU::REQ->{data}); + } || fu->error(400, $@); + # TODO: Accept schema validation thing. + $FU::REQ->{formdata}{$_[1]}; +} + @@ -664,7 +682,7 @@ can be used independently of the framework: =over -=item * L - JSON parsing & formatting. +=item * L - JSON parsing & formatting, URI encoding, etc. =item * L - PostgreSQL client. diff --git a/FU/Util.pm b/FU/Util.pm index 83fbde6..289f2e5 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -4,10 +4,12 @@ use v5.36; use FU::XS; use Carp 'confess'; use Exporter 'import'; +use experimental 'builtin'; our @EXPORT_OK = qw/ json_format json_parse utf8_decode uri_escape uri_unescape + query_decode query_encode fdpass_send fdpass_recv /; @@ -18,17 +20,44 @@ sub utf8_decode :prototype($) { $_[0] } -sub uri_escape :prototype($) { - utf8::encode(local $_ = shift); - s/([^A-Za-z0-9._~-])/sprintf '%%%02X', ord $1/eg; - $_; +sub uri_escape :prototype($) ($s) { + utf8::encode($s); + $s =~ s/([^A-Za-z0-9._~-])/sprintf '%%%02x', ord $1/eg; + $s; } -sub uri_unescape :prototype($) { - return if !defined $_[0]; - utf8::encode(local $_ = shift); - s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; - utf8_decode $_; +sub uri_unescape :prototype($) ($s) { + return if !defined $s; + utf8::encode($s); + $s =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + utf8_decode $s; +} + +sub query_decode :prototype($) ($s) { + my %o; + for (split /&/, $s//'') { + my($k,$v) = map uri_unescape($_), split /=/; + $v //= builtin::true; + if (ref $o{$k}) { push $o{$k}->@*, $v } + elsif (exists $o{$k}) { $o{$k} = [ $o{$k}, $v ] } + else { $o{$k} = $v } + } + \%o +} + +sub query_encode :prototype($) ($o) { + return join '&', map { + my($k, $v) = ($_, $o->{$_}); + $k = uri_escape $k; + map { + my $a = $_; + $a = $a->TO_QUERY() if builtin::blessed($a) && $a->can('TO_QUERY'); + !defined $a || (builtin::is_bool($a) && !$a) + ? () + : builtin::is_bool($a) ? $k + : $k.'='.uri_escape($a) + } ref $v eq 'ARRAY' ? @$v : ($v); + } sort keys %$o; } 1; @@ -177,7 +206,11 @@ L and L are perfectly fine alternatives. L and L also look like good and maintained candidates.) -=head2 String Utility Functions +=head2 URI-Related Functions + +While URIs are capable of encoding arbitrary binary data, the functions below +assume you're only dealing with text. This makes them more robust against weird +inputs, at the cost of flexibility. =over @@ -204,6 +237,34 @@ Takes an Unicode string potentially containing percent-encoding and returns a decoded Unicode string. Also checks for ASCII control characters as per C. +=item query_decode($string) + +Decode a query string or C format (they're +the same thing). Returns a hashref with decoded key/value pairs. Values for +duplicated keys are collected into a single array value. Bare keys that do not +have a value are decoded as C. Example: + + my $hash = query_decode 'bare&a=1&a=2&something=else'; + # $hash = { + # bare => builtin::true, + # a => [ 1, 2 ], + # something => 'else' + # } + +The input C<$string> is assumed to be a perl Unicode string. An error is thrown +if the resulting data decodes into invalid UTF-8 or contains control +characters, as per C. + +=item query_encode($hashref) + +The opposite of C. Takes a hashref of similar structure and +returns an ASCII-encoded query string. Keys with C or C +values are omitted in the output. + +If a given value is a blessed object with a C method, that method +is called and it should return either C, a boolean or a string, which is +then encoded. + =back diff --git a/README.md b/README.md index 5682e10..90a0a58 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ WIP. -*Contributing:* Refer to my [contribution guidelines)[https://dev.yorhel.nl/contributing]. +*Contributing:* Refer to my [contribution guidelines](https://dev.yorhel.nl/contributing). ## Build & Install @@ -19,7 +19,6 @@ Things that may or may not happen: - FU - The website framework, taking inspiration from TUWF. - FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy. - FU::Log - Basic logger. -- FU::Util additions: `VNDB::Util::query_encode`? - FU::Validate - TUWF::Validate & normalization with some improvements. - FU::XML - TUWF::XMLXS with some improvements. - FU::Mailer - Simple sendmail wrapper diff --git a/t/fcgi.t b/t/fcgi.t index 6c06423..0711d6a 100644 --- a/t/fcgi.t +++ b/t/fcgi.t @@ -13,7 +13,7 @@ sub start { sub record($id, $type, $data, $pad=undef) { $pad //= rand > 0.5 ? int rand(50) : 0; my $msg = pack('CCnnCC', 1, $type, $id, length($data), $pad, 0) . $data . ("\0"x$pad); - is $remote->syswrite($msg, length($msg)), length($msg); + die "Short write" if $remote->syswrite($msg, length($msg)) != length($msg); } sub begin($id=1, $role=1, $keep=0) { diff --git a/t/query.t b/t/query.t new file mode 100644 index 0000000..599f50f --- /dev/null +++ b/t/query.t @@ -0,0 +1,27 @@ +use v5.36; +use Test::More; +use FU::Util qw/query_decode query_encode/; +use experimental 'builtin'; + +is_deeply + query_decode('a&a&%c3%be=%26%3d%c3%be&a=3'), + { a => [ builtin::true, builtin::true, 3 ], "\xfe" => "&=\xfe" }; + +ok !eval { query_decode('%10'); 1 }; +like $@, qr/Invalid control character/; + +is query_encode + { a => builtin::true, b => undef, c => builtin::false, d => 'string', e => "&=\xfe" }, + 'a&d=string&e=%26%3d%c3%be'; + +is query_encode + { "\xfe" => [ 1, undef, 3, builtin::false, builtin::true ] }, + "%c3%be=1&%c3%be=3&%c3%be"; + +sub FUTILTEST::TO_QUERY { '&'.($_[0][0] + 1) } + +is query_encode + { -ab => bless [2], 'FUTILTEST' }, + '-ab=%263'; + +done_testing;