FU: Support multipart file uploads + some doc fixes

API is not super convenient and implementation is lousy, but uploading files is not a super common operation so that should be fine. At least it supports large files with only a single in-memory copy.
2025-03-08 14:02:51 +01:00 · 2025-03-08 14:02:51 +01:00 · 17176738a0
commit 17176738a0
parent e5755ddd80
6 changed files with 280 additions and 13 deletions
--- a/FU.pm
+++ b/FU.pm
@ -253,8 +253,9 @@ sub _read_req_http($sock, $req) {
    $req->{body} = '';
    while ($len > 0) {
-        my $r = $sock->read($req->{body}, $len, -1);
+        my $r = $sock->read($req->{body}, $len, length $req->{body});
-        fu->error(400, 'Client disconnect before request was read') if !$r
+        fu->error(400, 'Client disconnect before request was read') if !$r;
        $len -= $r;
    }
 }
@ -654,7 +655,6 @@ sub query {
 sub formdata {
    shift;
    $FU::REQ->{formdata} ||= eval {
        # TODO: Support multipart encoding
        confess "Invalid content type for form data"
            if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded';
        FU::Util::query_decode($FU::REQ->{data});
@ -662,6 +662,13 @@ sub formdata {
    _getfield $FU::REQ->{formdata}, @_;
 }
 sub multipart {
    require FU::MultipartFormData;
    $FU::REQ->{multipart} ||= eval {
        FU::MultipartFormData->parse(fu->header('content-type')||'', $FU::REQ->{body})
    } || fu->error(400, $@);
 }
@ -1239,12 +1246,18 @@ Parse, validate and return multiple query parameters.
 =item fu->formdata($schema)
-Like C<< fu->query() >> but returns data from the POST request body.
+Like C<< fu->query() >> but returns data from the POST request body. This
 method only supports form data encoded as C<application/x-www-form-urlencoded>,
 which is the default for HTML C<< <form> >>s. To handle multipart form data,
 use C<< fu->multipart >> instead.
 =item fu->multipart
 Parse the request body as C<multipart/form-data> and return an array of fields.
 Refer to L<FU::MultipartFormData> for more information.
 =back
 I<TODO:> Support C<multipart/form-data> and file uploads.
 I<TODO:> Support JSON bodies.
 I<TODO:> Cookie parsing.
--- a/FU/MultipartFormData.pm
+++ b/FU/MultipartFormData.pm
@ -0,0 +1,209 @@
 package FU::MultipartFormData;
 use v5.36;
 use Carp 'confess';
 use FU::Util 'utf8_decode';
 sub _arg($d) { $d =~ s/^\s+//r =~ s/\s+$//r =~ s/^"(.+)"$/$1/r }
 sub parse($pkg, $header, $data) {
    confess "Invalid multipart header '$header'"
        if $header !~ m{^multipart/form-data\s*;\s*boundary\s*=(.+)$};
    my $boundary = _arg $1;
    confess "Invalid multipart boundary '$boundary'" if $boundary !~ /^[\x21-\x7e]+$/;
    utf8::encode($boundary);
    my @a;
    while ($data =~ m{--\Q$boundary\E(?:--\r\n|\r\n((?:.+\r\n)+)\r\n)}xg) {
        my $hdrs = $1;
        $a[$#a]{length} = $-[0] - 2 - $a[$#a]{start} if @a;
        if (!$hdrs) {
            confess "Trailing garbage" if pos $data != length $data;
            last;
        }
        my $d = bless {
            data => $data,
            start => pos $data,
        }, $pkg;
        confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data;(.+)/i;
        my $v = $1;
        confess "Missing 'name' parameter" if $v !~ /[;\s]name=([^[;\s]+)/;
        $d->{name} = utf8_decode _arg $1;
        $d->{filename} = utf8_decode _arg $1 if $v =~ /[;\s]filename=([^;\s]+)/;
        if ($hdrs =~ /content-type:\s*([^;\s]+)(?:\s*;\s*charset=([^;\s]+))?/i) {
            $d->{mime} = utf8_decode _arg $1;
            $d->{charset} = utf8_decode _arg $2 if $2;
        }
        push @a, $d;
    }
    confess "Missing end-of-data marker" if @a && !defined $a[$#a]{length};
    \@a
 }
 sub name     { $_[0]{name} }
 sub filename { $_[0]{filename} }
 sub mime     { $_[0]{mime} }
 sub charset  { $_[0]{charset} }
 sub length   { $_[0]{length} }
 sub substr($o,$off,$len=undef) {
    $off += $o->{length} if $off < 0;
    $off = 0 if $off < 0;
    $off = $o->{length} if $off > $o->{length};
    $len //= $o->{length} - $off;
    $len += $o->{length} - 1 if $len < 0;
    $len = 0 if $len < 0;
    $len = $o->{length} - $off if $len > $o->{length} - $off;
    substr $o->{data}, $o->{start} + $off, $len;
 }
 sub data     { $_[0]->substr(0) }
 sub value    { utf8_decode $_[0]->data }
 sub syswrite($o, $fh) {
    my $off = $o->{start};
    my $end = $o->{start} + $o->{length};
    while ($off < $end) {
        my $r = syswrite $fh, $o->{data}, $end-$off, $off;
        return if !defined $r;
        $off += $r;
    }
    $o->{length};
 }
 sub save($o, $fn) {
    open my $F, '>', $fn or confess "Error opening '$fn': $!";
    $o->syswrite($F) or confess "Error writing to '$fn': $!";
 }
 sub describe($o) {
    my $head = eval { utf8_decode $o->substr(0, 100) };
    if (defined $head && $head =~ /\n/) {
        ($head) = split /\n/, $head, 2;
        $head .= '...';
    } elsif (defined $head && $o->{length} > 100) {
        $head .= '...';
    }
    $o->{name}.': '.join ' ',
        $o->{filename} ? "filename=$o->{filename}" : (),
        $o->{mime} ? "mime=$o->{mime}" : (),
        $o->{charset} ? "charset=$o->{charset}" : (),
        "length=$o->{length}",
        defined $head ? "value=$head" : ();
 }
 1;
 __END__
 =head1 NAME
 FU::MultipartFormData - Parse multipart/form-data
 =head1 SYNOPSIS
  my $fields = FU::MultipartFormData->parse($content_type_header, $request_body);
  for my $f (@$fields) {
      print "%s   %d\n", $f->name, $f->length;
      $f->save('file.png') if $f->name eq 'image';
  }
 =head1 DESCRIPTION
 This is a tiny module to parse an HTTP request body encoded as
 C<multipart/form-data>, which is typically used to handle file uploads.
 The entire request body is assumed to be in memory as a Perl string, but this
 module makes an attempt to avoid any further copies of data values.
 =head2 Parsing
 =over FU::MultipartFormData->parse($header, $body)
 Returns an array of field objects from the given C<$header>, which must be a
 valid value for the C<Content-Type> request header, and the given C<$body>,
 which must hold the request body as a byte string. An error is thrown if the
 header is not valid or parsing failed.
 This module is pretty lousy and does not fully comform to any HTTP standards,
 but it does happen to be able to parse POST data from any browser that I've
 tried.
 =back
 =head2 Field Object
 Each field is parsed into a field object that supports the following methods:
 =over
 =item name
 Returns the field name as a Perl Unicode string.
 =item filename
 Returns the filename as a Perl Unicode string, or C<undef> if no filename was
 provided.
 =item mime
 Returns the mime type extracted from the field's C<Content-Type> header, or
 C<undef> if none was present.
 =item charset
 Returns the charset extracted from the field's C<Content-Type> header, or
 C<undef> if none was present.
 =item length
 Returns the byte length of the field value.
 =item data
 Returns a copy of the field value as a byte string. You'll want to avoid using
 this on large fields.
 =item value
 Returns a copy of the field value as a Unicode string. Uses C<utf8_decode()>
 from L<FU::Util>, so also throws an error if the value contains control
 characters.
 =item substr($off, $len)
 Equivalent to calling C<substr()> on the string returned by C<data>, but avoids
 a copy of the entire field value.
 =item syswrite($fh)
 Write the field value to C<$fh> using Perl's C<syswrite()>, returns C<undef> on
 error or the number of bytes written on success.
 Can be used to write uploaded file data to a file or send it over a socket or
 pipe, without making a full in-memory copy of the data.
 =item save($fn)
 Save the field value to the file C<$fn>, throws an error on failure.
 =item describe
 Returns a human-readable string to describe this field. Mainly for debugging
 purposes, the exact format is subject to change.
 =back
 =head1 COPYRIGHT
 MIT.
 =head1 AUTHOR
 Yorhel <projects@yorhel.nl>
--- a/FU/Pg.pm
+++ b/FU/Pg.pm
@ -400,9 +400,7 @@ results into Perl values.
 Observed query preparation time, in seconds, including network round-trip.
 Returns 0 if a cached prepared statement was used or C<undef> if the query was
-executed without a separate preparation phase (currently only happens with C<<
+executed without a separate preparation phase.
 $conn->exec() >>, but support for direct query execution may be added for other
 queries in the future as well).
 =item $st->get_cache
--- a/FU/SQL.pm
+++ b/FU/SQL.pm
@ -356,12 +356,12 @@ values. This function results in different SQL depending on the C<in_style>
 option given to C<compile()>. The default C<'dbi'> style passes each value as a
 bind parameter:
-  SQL 'WHERE id', IN([1, 2, 3, 4]);
+  SQL 'WHERE id', IN [1, 2, 3, 4];
  # 'WHERE id IN(?, ?, ?, ?)', parameters: 1, 2, 3, 4
 The C<'pg'> style passes the entire array as a single bind parameter instead:
-  SQL 'WHERE id', IN([1, 2, 3, 4]);
+  SQL 'WHERE id', IN [1, 2, 3, 4];
  # 'WHERE id = ANY(?)', parameter: [1, 2, 3, 4]
 The C<'pg'> style allows for more efficient re-use of cached prepared
@ -372,7 +372,7 @@ with L<DBD::Pg> or L<Pg::PQ>.
 Can be used in the C<$hashref> versions of C<AND>, C<OR> and C<WHERE> as well:
-  WHERE { id => IN([1, 2]) }
+  WHERE { id => IN [1, 2] }
  # 'WHERE id IN(?, ?)'
 =back
--- a/FU/XMLWriter.pm
+++ b/FU/XMLWriter.pm
@ -146,7 +146,7 @@ These functions all return a byte string with (UTF-8) encoded XML.
 =item fragment($block)
-Executes C<$block> and captures the output of all I</"Output functions">
+Executes C<$block> and captures the output of all L</"Output functions">
 called within the same scope into a string. This function can be safely nested:
  my $string = fragment {
--- a/t/multipart.t
+++ b/t/multipart.t
@ -0,0 +1,47 @@
 use v5.36;
 use Test::More;
 use FU::MultipartFormData;
 # Example based on https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/POST
 my $t = <<'_' =~ s/\n/\r\n/rg;
 --delimiter12345
 Content-Disposition: form-data; name="field1"
 content-type: hello; charset=x
 value1
 --delimiter12345
 Content-Type: text
 Content-Disposition: form-data; filename="example.txt"; name=field2
 value2
 --delimiter12345--
 _
 my $l = FU::MultipartFormData->parse('multipart/form-data;boundary="delimiter12345"', $t);
 is scalar @$l, 2;
 my $v = $l->[0];
 is $v->name, 'field1';
 is $v->filename, undef;
 is $v->mime, 'hello';
 is $v->charset, 'x';
 is $v->length, 6;
 is $v->data, 'value1';
 is $v->substr(4), 'e1';
 is $v->substr(1, 2), 'al';
 is $v->substr(-2, 1), 'e';
 is $v->substr(-2, 5), 'e1';
 is $v->substr(-100, 2), 'va';
 is $v->substr(1, -3), 'al';
 $v = $l->[1];
 is $v->name, 'field2';
 is $v->filename, 'example.txt';
 is $v->mime, 'text';
 is $v->charset, undef;
 is $v->length, 6;
 is $v->data, 'value2';
 done_testing;