FU: Support multipart file uploads + some doc fixes

API is not super convenient and implementation is lousy, but uploading
files is not a super common operation so that should be fine.

At least it supports large files with only a single in-memory copy.
This commit is contained in:
Yorhel 2025-03-08 14:02:51 +01:00
parent e5755ddd80
commit 17176738a0
6 changed files with 280 additions and 13 deletions

25
FU.pm
View file

@ -253,8 +253,9 @@ sub _read_req_http($sock, $req) {
$req->{body} = ''; $req->{body} = '';
while ($len > 0) { while ($len > 0) {
my $r = $sock->read($req->{body}, $len, -1); my $r = $sock->read($req->{body}, $len, length $req->{body});
fu->error(400, 'Client disconnect before request was read') if !$r fu->error(400, 'Client disconnect before request was read') if !$r;
$len -= $r;
} }
} }
@ -654,7 +655,6 @@ sub query {
sub formdata { sub formdata {
shift; shift;
$FU::REQ->{formdata} ||= eval { $FU::REQ->{formdata} ||= eval {
# TODO: Support multipart encoding
confess "Invalid content type for form data" confess "Invalid content type for form data"
if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded'; if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded';
FU::Util::query_decode($FU::REQ->{data}); FU::Util::query_decode($FU::REQ->{data});
@ -662,6 +662,13 @@ sub formdata {
_getfield $FU::REQ->{formdata}, @_; _getfield $FU::REQ->{formdata}, @_;
} }
sub multipart {
require FU::MultipartFormData;
$FU::REQ->{multipart} ||= eval {
FU::MultipartFormData->parse(fu->header('content-type')||'', $FU::REQ->{body})
} || fu->error(400, $@);
}
@ -1239,12 +1246,18 @@ Parse, validate and return multiple query parameters.
=item fu->formdata($schema) =item fu->formdata($schema)
Like C<< fu->query() >> but returns data from the POST request body. Like C<< fu->query() >> but returns data from the POST request body. This
method only supports form data encoded as C<application/x-www-form-urlencoded>,
which is the default for HTML C<< <form> >>s. To handle multipart form data,
use C<< fu->multipart >> instead.
=item fu->multipart
Parse the request body as C<multipart/form-data> and return an array of fields.
Refer to L<FU::MultipartFormData> for more information.
=back =back
I<TODO:> Support C<multipart/form-data> and file uploads.
I<TODO:> Support JSON bodies. I<TODO:> Support JSON bodies.
I<TODO:> Cookie parsing. I<TODO:> Cookie parsing.

209
FU/MultipartFormData.pm Normal file
View file

@ -0,0 +1,209 @@
package FU::MultipartFormData;
use v5.36;
use Carp 'confess';
use FU::Util 'utf8_decode';
sub _arg($d) { $d =~ s/^\s+//r =~ s/\s+$//r =~ s/^"(.+)"$/$1/r }
sub parse($pkg, $header, $data) {
confess "Invalid multipart header '$header'"
if $header !~ m{^multipart/form-data\s*;\s*boundary\s*=(.+)$};
my $boundary = _arg $1;
confess "Invalid multipart boundary '$boundary'" if $boundary !~ /^[\x21-\x7e]+$/;
utf8::encode($boundary);
my @a;
while ($data =~ m{--\Q$boundary\E(?:--\r\n|\r\n((?:.+\r\n)+)\r\n)}xg) {
my $hdrs = $1;
$a[$#a]{length} = $-[0] - 2 - $a[$#a]{start} if @a;
if (!$hdrs) {
confess "Trailing garbage" if pos $data != length $data;
last;
}
my $d = bless {
data => $data,
start => pos $data,
}, $pkg;
confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data;(.+)/i;
my $v = $1;
confess "Missing 'name' parameter" if $v !~ /[;\s]name=([^[;\s]+)/;
$d->{name} = utf8_decode _arg $1;
$d->{filename} = utf8_decode _arg $1 if $v =~ /[;\s]filename=([^;\s]+)/;
if ($hdrs =~ /content-type:\s*([^;\s]+)(?:\s*;\s*charset=([^;\s]+))?/i) {
$d->{mime} = utf8_decode _arg $1;
$d->{charset} = utf8_decode _arg $2 if $2;
}
push @a, $d;
}
confess "Missing end-of-data marker" if @a && !defined $a[$#a]{length};
\@a
}
sub name { $_[0]{name} }
sub filename { $_[0]{filename} }
sub mime { $_[0]{mime} }
sub charset { $_[0]{charset} }
sub length { $_[0]{length} }
sub substr($o,$off,$len=undef) {
$off += $o->{length} if $off < 0;
$off = 0 if $off < 0;
$off = $o->{length} if $off > $o->{length};
$len //= $o->{length} - $off;
$len += $o->{length} - 1 if $len < 0;
$len = 0 if $len < 0;
$len = $o->{length} - $off if $len > $o->{length} - $off;
substr $o->{data}, $o->{start} + $off, $len;
}
sub data { $_[0]->substr(0) }
sub value { utf8_decode $_[0]->data }
sub syswrite($o, $fh) {
my $off = $o->{start};
my $end = $o->{start} + $o->{length};
while ($off < $end) {
my $r = syswrite $fh, $o->{data}, $end-$off, $off;
return if !defined $r;
$off += $r;
}
$o->{length};
}
sub save($o, $fn) {
open my $F, '>', $fn or confess "Error opening '$fn': $!";
$o->syswrite($F) or confess "Error writing to '$fn': $!";
}
sub describe($o) {
my $head = eval { utf8_decode $o->substr(0, 100) };
if (defined $head && $head =~ /\n/) {
($head) = split /\n/, $head, 2;
$head .= '...';
} elsif (defined $head && $o->{length} > 100) {
$head .= '...';
}
$o->{name}.': '.join ' ',
$o->{filename} ? "filename=$o->{filename}" : (),
$o->{mime} ? "mime=$o->{mime}" : (),
$o->{charset} ? "charset=$o->{charset}" : (),
"length=$o->{length}",
defined $head ? "value=$head" : ();
}
1;
__END__
=head1 NAME
FU::MultipartFormData - Parse multipart/form-data
=head1 SYNOPSIS
my $fields = FU::MultipartFormData->parse($content_type_header, $request_body);
for my $f (@$fields) {
print "%s %d\n", $f->name, $f->length;
$f->save('file.png') if $f->name eq 'image';
}
=head1 DESCRIPTION
This is a tiny module to parse an HTTP request body encoded as
C<multipart/form-data>, which is typically used to handle file uploads.
The entire request body is assumed to be in memory as a Perl string, but this
module makes an attempt to avoid any further copies of data values.
=head2 Parsing
=over FU::MultipartFormData->parse($header, $body)
Returns an array of field objects from the given C<$header>, which must be a
valid value for the C<Content-Type> request header, and the given C<$body>,
which must hold the request body as a byte string. An error is thrown if the
header is not valid or parsing failed.
This module is pretty lousy and does not fully comform to any HTTP standards,
but it does happen to be able to parse POST data from any browser that I've
tried.
=back
=head2 Field Object
Each field is parsed into a field object that supports the following methods:
=over
=item name
Returns the field name as a Perl Unicode string.
=item filename
Returns the filename as a Perl Unicode string, or C<undef> if no filename was
provided.
=item mime
Returns the mime type extracted from the field's C<Content-Type> header, or
C<undef> if none was present.
=item charset
Returns the charset extracted from the field's C<Content-Type> header, or
C<undef> if none was present.
=item length
Returns the byte length of the field value.
=item data
Returns a copy of the field value as a byte string. You'll want to avoid using
this on large fields.
=item value
Returns a copy of the field value as a Unicode string. Uses C<utf8_decode()>
from L<FU::Util>, so also throws an error if the value contains control
characters.
=item substr($off, $len)
Equivalent to calling C<substr()> on the string returned by C<data>, but avoids
a copy of the entire field value.
=item syswrite($fh)
Write the field value to C<$fh> using Perl's C<syswrite()>, returns C<undef> on
error or the number of bytes written on success.
Can be used to write uploaded file data to a file or send it over a socket or
pipe, without making a full in-memory copy of the data.
=item save($fn)
Save the field value to the file C<$fn>, throws an error on failure.
=item describe
Returns a human-readable string to describe this field. Mainly for debugging
purposes, the exact format is subject to change.
=back
=head1 COPYRIGHT
MIT.
=head1 AUTHOR
Yorhel <projects@yorhel.nl>

View file

@ -400,9 +400,7 @@ results into Perl values.
Observed query preparation time, in seconds, including network round-trip. Observed query preparation time, in seconds, including network round-trip.
Returns 0 if a cached prepared statement was used or C<undef> if the query was Returns 0 if a cached prepared statement was used or C<undef> if the query was
executed without a separate preparation phase (currently only happens with C<< executed without a separate preparation phase.
$conn->exec() >>, but support for direct query execution may be added for other
queries in the future as well).
=item $st->get_cache =item $st->get_cache

View file

@ -356,12 +356,12 @@ values. This function results in different SQL depending on the C<in_style>
option given to C<compile()>. The default C<'dbi'> style passes each value as a option given to C<compile()>. The default C<'dbi'> style passes each value as a
bind parameter: bind parameter:
SQL 'WHERE id', IN([1, 2, 3, 4]); SQL 'WHERE id', IN [1, 2, 3, 4];
# 'WHERE id IN(?, ?, ?, ?)', parameters: 1, 2, 3, 4 # 'WHERE id IN(?, ?, ?, ?)', parameters: 1, 2, 3, 4
The C<'pg'> style passes the entire array as a single bind parameter instead: The C<'pg'> style passes the entire array as a single bind parameter instead:
SQL 'WHERE id', IN([1, 2, 3, 4]); SQL 'WHERE id', IN [1, 2, 3, 4];
# 'WHERE id = ANY(?)', parameter: [1, 2, 3, 4] # 'WHERE id = ANY(?)', parameter: [1, 2, 3, 4]
The C<'pg'> style allows for more efficient re-use of cached prepared The C<'pg'> style allows for more efficient re-use of cached prepared
@ -372,7 +372,7 @@ with L<DBD::Pg> or L<Pg::PQ>.
Can be used in the C<$hashref> versions of C<AND>, C<OR> and C<WHERE> as well: Can be used in the C<$hashref> versions of C<AND>, C<OR> and C<WHERE> as well:
WHERE { id => IN([1, 2]) } WHERE { id => IN [1, 2] }
# 'WHERE id IN(?, ?)' # 'WHERE id IN(?, ?)'
=back =back

View file

@ -146,7 +146,7 @@ These functions all return a byte string with (UTF-8) encoded XML.
=item fragment($block) =item fragment($block)
Executes C<$block> and captures the output of all I</"Output functions"> Executes C<$block> and captures the output of all L</"Output functions">
called within the same scope into a string. This function can be safely nested: called within the same scope into a string. This function can be safely nested:
my $string = fragment { my $string = fragment {

47
t/multipart.t Normal file
View file

@ -0,0 +1,47 @@
use v5.36;
use Test::More;
use FU::MultipartFormData;
# Example based on https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/POST
my $t = <<'_' =~ s/\n/\r\n/rg;
--delimiter12345
Content-Disposition: form-data; name="field1"
content-type: hello; charset=x
value1
--delimiter12345
Content-Type: text
Content-Disposition: form-data; filename="example.txt"; name=field2
value2
--delimiter12345--
_
my $l = FU::MultipartFormData->parse('multipart/form-data;boundary="delimiter12345"', $t);
is scalar @$l, 2;
my $v = $l->[0];
is $v->name, 'field1';
is $v->filename, undef;
is $v->mime, 'hello';
is $v->charset, 'x';
is $v->length, 6;
is $v->data, 'value1';
is $v->substr(4), 'e1';
is $v->substr(1, 2), 'al';
is $v->substr(-2, 1), 'e';
is $v->substr(-2, 5), 'e1';
is $v->substr(-100, 2), 'va';
is $v->substr(1, -3), 'al';
$v = $l->[1];
is $v->name, 'field2';
is $v->filename, 'example.txt';
is $v->mime, 'text';
is $v->charset, undef;
is $v->length, 6;
is $v->data, 'value2';
done_testing;