From 17176738a0d942f5925e76afee4414349d9a6fbd Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 8 Mar 2025 14:02:51 +0100 Subject: [PATCH] FU: Support multipart file uploads + some doc fixes API is not super convenient and implementation is lousy, but uploading files is not a super common operation so that should be fine. At least it supports large files with only a single in-memory copy. --- FU.pm | 25 +++-- FU/MultipartFormData.pm | 209 ++++++++++++++++++++++++++++++++++++++++ FU/Pg.pm | 4 +- FU/SQL.pm | 6 +- FU/XMLWriter.pm | 2 +- t/multipart.t | 47 +++++++++ 6 files changed, 280 insertions(+), 13 deletions(-) create mode 100644 FU/MultipartFormData.pm create mode 100644 t/multipart.t diff --git a/FU.pm b/FU.pm index 57123a5..4807e0b 100644 --- a/FU.pm +++ b/FU.pm @@ -253,8 +253,9 @@ sub _read_req_http($sock, $req) { $req->{body} = ''; while ($len > 0) { - my $r = $sock->read($req->{body}, $len, -1); - fu->error(400, 'Client disconnect before request was read') if !$r + my $r = $sock->read($req->{body}, $len, length $req->{body}); + fu->error(400, 'Client disconnect before request was read') if !$r; + $len -= $r; } } @@ -654,7 +655,6 @@ sub query { sub formdata { shift; $FU::REQ->{formdata} ||= eval { - # TODO: Support multipart encoding confess "Invalid content type for form data" if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded'; FU::Util::query_decode($FU::REQ->{data}); @@ -662,6 +662,13 @@ sub formdata { _getfield $FU::REQ->{formdata}, @_; } +sub multipart { + require FU::MultipartFormData; + $FU::REQ->{multipart} ||= eval { + FU::MultipartFormData->parse(fu->header('content-type')||'', $FU::REQ->{body}) + } || fu->error(400, $@); +} + @@ -1239,12 +1246,18 @@ Parse, validate and return multiple query parameters. =item fu->formdata($schema) -Like C<< fu->query() >> but returns data from the POST request body. +Like C<< fu->query() >> but returns data from the POST request body. This +method only supports form data encoded as C, +which is the default for HTML C<<
>>s. To handle multipart form data, +use C<< fu->multipart >> instead. + +=item fu->multipart + +Parse the request body as C and return an array of fields. +Refer to L for more information. =back -I Support C and file uploads. - I Support JSON bodies. I Cookie parsing. diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm new file mode 100644 index 0000000..da415f8 --- /dev/null +++ b/FU/MultipartFormData.pm @@ -0,0 +1,209 @@ +package FU::MultipartFormData; +use v5.36; +use Carp 'confess'; +use FU::Util 'utf8_decode'; + +sub _arg($d) { $d =~ s/^\s+//r =~ s/\s+$//r =~ s/^"(.+)"$/$1/r } + +sub parse($pkg, $header, $data) { + confess "Invalid multipart header '$header'" + if $header !~ m{^multipart/form-data\s*;\s*boundary\s*=(.+)$}; + my $boundary = _arg $1; + confess "Invalid multipart boundary '$boundary'" if $boundary !~ /^[\x21-\x7e]+$/; + utf8::encode($boundary); + + my @a; + while ($data =~ m{--\Q$boundary\E(?:--\r\n|\r\n((?:.+\r\n)+)\r\n)}xg) { + my $hdrs = $1; + $a[$#a]{length} = $-[0] - 2 - $a[$#a]{start} if @a; + if (!$hdrs) { + confess "Trailing garbage" if pos $data != length $data; + last; + } + + my $d = bless { + data => $data, + start => pos $data, + }, $pkg; + + confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data;(.+)/i; + my $v = $1; + confess "Missing 'name' parameter" if $v !~ /[;\s]name=([^[;\s]+)/; + $d->{name} = utf8_decode _arg $1; + $d->{filename} = utf8_decode _arg $1 if $v =~ /[;\s]filename=([^;\s]+)/; + + if ($hdrs =~ /content-type:\s*([^;\s]+)(?:\s*;\s*charset=([^;\s]+))?/i) { + $d->{mime} = utf8_decode _arg $1; + $d->{charset} = utf8_decode _arg $2 if $2; + } + push @a, $d; + } + confess "Missing end-of-data marker" if @a && !defined $a[$#a]{length}; + \@a +} + +sub name { $_[0]{name} } +sub filename { $_[0]{filename} } +sub mime { $_[0]{mime} } +sub charset { $_[0]{charset} } +sub length { $_[0]{length} } + +sub substr($o,$off,$len=undef) { + $off += $o->{length} if $off < 0; + $off = 0 if $off < 0; + $off = $o->{length} if $off > $o->{length}; + + $len //= $o->{length} - $off; + $len += $o->{length} - 1 if $len < 0; + $len = 0 if $len < 0; + $len = $o->{length} - $off if $len > $o->{length} - $off; + + substr $o->{data}, $o->{start} + $off, $len; +} + +sub data { $_[0]->substr(0) } +sub value { utf8_decode $_[0]->data } + +sub syswrite($o, $fh) { + my $off = $o->{start}; + my $end = $o->{start} + $o->{length}; + while ($off < $end) { + my $r = syswrite $fh, $o->{data}, $end-$off, $off; + return if !defined $r; + $off += $r; + } + $o->{length}; +} + +sub save($o, $fn) { + open my $F, '>', $fn or confess "Error opening '$fn': $!"; + $o->syswrite($F) or confess "Error writing to '$fn': $!"; +} + +sub describe($o) { + my $head = eval { utf8_decode $o->substr(0, 100) }; + if (defined $head && $head =~ /\n/) { + ($head) = split /\n/, $head, 2; + $head .= '...'; + } elsif (defined $head && $o->{length} > 100) { + $head .= '...'; + } + $o->{name}.': '.join ' ', + $o->{filename} ? "filename=$o->{filename}" : (), + $o->{mime} ? "mime=$o->{mime}" : (), + $o->{charset} ? "charset=$o->{charset}" : (), + "length=$o->{length}", + defined $head ? "value=$head" : (); +} + +1; +__END__ + +=head1 NAME + +FU::MultipartFormData - Parse multipart/form-data + +=head1 SYNOPSIS + + my $fields = FU::MultipartFormData->parse($content_type_header, $request_body); + + for my $f (@$fields) { + print "%s %d\n", $f->name, $f->length; + + $f->save('file.png') if $f->name eq 'image'; + } + +=head1 DESCRIPTION + +This is a tiny module to parse an HTTP request body encoded as +C, which is typically used to handle file uploads. + +The entire request body is assumed to be in memory as a Perl string, but this +module makes an attempt to avoid any further copies of data values. + +=head2 Parsing + +=over FU::MultipartFormData->parse($header, $body) + +Returns an array of field objects from the given C<$header>, which must be a +valid value for the C request header, and the given C<$body>, +which must hold the request body as a byte string. An error is thrown if the +header is not valid or parsing failed. + +This module is pretty lousy and does not fully comform to any HTTP standards, +but it does happen to be able to parse POST data from any browser that I've +tried. + +=back + +=head2 Field Object + +Each field is parsed into a field object that supports the following methods: + +=over + +=item name + +Returns the field name as a Perl Unicode string. + +=item filename + +Returns the filename as a Perl Unicode string, or C if no filename was +provided. + +=item mime + +Returns the mime type extracted from the field's C header, or +C if none was present. + +=item charset + +Returns the charset extracted from the field's C header, or +C if none was present. + +=item length + +Returns the byte length of the field value. + +=item data + +Returns a copy of the field value as a byte string. You'll want to avoid using +this on large fields. + +=item value + +Returns a copy of the field value as a Unicode string. Uses C +from L, so also throws an error if the value contains control +characters. + +=item substr($off, $len) + +Equivalent to calling C on the string returned by C, but avoids +a copy of the entire field value. + +=item syswrite($fh) + +Write the field value to C<$fh> using Perl's C, returns C on +error or the number of bytes written on success. + +Can be used to write uploaded file data to a file or send it over a socket or +pipe, without making a full in-memory copy of the data. + +=item save($fn) + +Save the field value to the file C<$fn>, throws an error on failure. + +=item describe + +Returns a human-readable string to describe this field. Mainly for debugging +purposes, the exact format is subject to change. + +=back + +=head1 COPYRIGHT + +MIT. + +=head1 AUTHOR + +Yorhel diff --git a/FU/Pg.pm b/FU/Pg.pm index 7b9de5d..176a8ac 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -400,9 +400,7 @@ results into Perl values. Observed query preparation time, in seconds, including network round-trip. Returns 0 if a cached prepared statement was used or C if the query was -executed without a separate preparation phase (currently only happens with C<< -$conn->exec() >>, but support for direct query execution may be added for other -queries in the future as well). +executed without a separate preparation phase. =item $st->get_cache diff --git a/FU/SQL.pm b/FU/SQL.pm index 73f5613..c86a3fb 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -356,12 +356,12 @@ values. This function results in different SQL depending on the C option given to C. The default C<'dbi'> style passes each value as a bind parameter: - SQL 'WHERE id', IN([1, 2, 3, 4]); + SQL 'WHERE id', IN [1, 2, 3, 4]; # 'WHERE id IN(?, ?, ?, ?)', parameters: 1, 2, 3, 4 The C<'pg'> style passes the entire array as a single bind parameter instead: - SQL 'WHERE id', IN([1, 2, 3, 4]); + SQL 'WHERE id', IN [1, 2, 3, 4]; # 'WHERE id = ANY(?)', parameter: [1, 2, 3, 4] The C<'pg'> style allows for more efficient re-use of cached prepared @@ -372,7 +372,7 @@ with L or L. Can be used in the C<$hashref> versions of C, C and C as well: - WHERE { id => IN([1, 2]) } + WHERE { id => IN [1, 2] } # 'WHERE id IN(?, ?)' =back diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index 0911ced..08fc54d 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -146,7 +146,7 @@ These functions all return a byte string with (UTF-8) encoded XML. =item fragment($block) -Executes C<$block> and captures the output of all I +Executes C<$block> and captures the output of all L called within the same scope into a string. This function can be safely nested: my $string = fragment { diff --git a/t/multipart.t b/t/multipart.t new file mode 100644 index 0000000..842b9cd --- /dev/null +++ b/t/multipart.t @@ -0,0 +1,47 @@ +use v5.36; +use Test::More; +use FU::MultipartFormData; + +# Example based on https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/POST +my $t = <<'_' =~ s/\n/\r\n/rg; +--delimiter12345 +Content-Disposition: form-data; name="field1" +content-type: hello; charset=x + +value1 +--delimiter12345 +Content-Type: text +Content-Disposition: form-data; filename="example.txt"; name=field2 + +value2 +--delimiter12345-- +_ + + +my $l = FU::MultipartFormData->parse('multipart/form-data;boundary="delimiter12345"', $t); +is scalar @$l, 2; + +my $v = $l->[0]; +is $v->name, 'field1'; +is $v->filename, undef; +is $v->mime, 'hello'; +is $v->charset, 'x'; +is $v->length, 6; +is $v->data, 'value1'; + +is $v->substr(4), 'e1'; +is $v->substr(1, 2), 'al'; +is $v->substr(-2, 1), 'e'; +is $v->substr(-2, 5), 'e1'; +is $v->substr(-100, 2), 'va'; +is $v->substr(1, -3), 'al'; + +$v = $l->[1]; +is $v->name, 'field2'; +is $v->filename, 'example.txt'; +is $v->mime, 'text'; +is $v->charset, undef; +is $v->length, 6; +is $v->data, 'value2'; + +done_testing;