FU: Support multipart file uploads + some doc fixes

API is not super convenient and implementation is lousy, but uploading files is not a super common operation so that should be fine. At least it supports large files with only a single in-memory copy.
2025-03-08 14:02:51 +01:00 · 2025-03-08 14:02:51 +01:00 · 17176738a0
commit 17176738a0
parent e5755ddd80
6 changed files with 280 additions and 13 deletions
--- a/FU/MultipartFormData.pm
+++ b/FU/MultipartFormData.pm
@ -0,0 +1,209 @@
+package FU::MultipartFormData;
+use v5.36;
+use Carp 'confess';
+use FU::Util 'utf8_decode';
+
+sub _arg($d) { $d =~ s/^\s+//r =~ s/\s+$//r =~ s/^"(.+)"$/$1/r }
+
+sub parse($pkg, $header, $data) {
+    confess "Invalid multipart header '$header'"
+        if $header !~ m{^multipart/form-data\s*;\s*boundary\s*=(.+)$};
+    my $boundary = _arg $1;
+    confess "Invalid multipart boundary '$boundary'" if $boundary !~ /^[\x21-\x7e]+$/;
+    utf8::encode($boundary);
+
+    my @a;
+    while ($data =~ m{--\Q$boundary\E(?:--\r\n|\r\n((?:.+\r\n)+)\r\n)}xg) {
+        my $hdrs = $1;
+        $a[$#a]{length} = $-[0] - 2 - $a[$#a]{start} if @a;
+        if (!$hdrs) {
+            confess "Trailing garbage" if pos $data != length $data;
+            last;
+        }
+
+        my $d = bless {
+            data => $data,
+            start => pos $data,
+        }, $pkg;
+
+        confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data;(.+)/i;
+        my $v = $1;
+        confess "Missing 'name' parameter" if $v !~ /[;\s]name=([^[;\s]+)/;
+        $d->{name} = utf8_decode _arg $1;
+        $d->{filename} = utf8_decode _arg $1 if $v =~ /[;\s]filename=([^;\s]+)/;
+
+        if ($hdrs =~ /content-type:\s*([^;\s]+)(?:\s*;\s*charset=([^;\s]+))?/i) {
+            $d->{mime} = utf8_decode _arg $1;
+            $d->{charset} = utf8_decode _arg $2 if $2;
+        }
+        push @a, $d;
+    }
+    confess "Missing end-of-data marker" if @a && !defined $a[$#a]{length};
+    \@a
+}
+
+sub name     { $_[0]{name} }
+sub filename { $_[0]{filename} }
+sub mime     { $_[0]{mime} }
+sub charset  { $_[0]{charset} }
+sub length   { $_[0]{length} }
+
+sub substr($o,$off,$len=undef) {
+    $off += $o->{length} if $off < 0;
+    $off = 0 if $off < 0;
+    $off = $o->{length} if $off > $o->{length};
+
+    $len //= $o->{length} - $off;
+    $len += $o->{length} - 1 if $len < 0;
+    $len = 0 if $len < 0;
+    $len = $o->{length} - $off if $len > $o->{length} - $off;
+
+    substr $o->{data}, $o->{start} + $off, $len;
+}
+
+sub data     { $_[0]->substr(0) }
+sub value    { utf8_decode $_[0]->data }
+
+sub syswrite($o, $fh) {
+    my $off = $o->{start};
+    my $end = $o->{start} + $o->{length};
+    while ($off < $end) {
+        my $r = syswrite $fh, $o->{data}, $end-$off, $off;
+        return if !defined $r;
+        $off += $r;
+    }
+    $o->{length};
+}
+
+sub save($o, $fn) {
+    open my $F, '>', $fn or confess "Error opening '$fn': $!";
+    $o->syswrite($F) or confess "Error writing to '$fn': $!";
+}
+
+sub describe($o) {
+    my $head = eval { utf8_decode $o->substr(0, 100) };
+    if (defined $head && $head =~ /\n/) {
+        ($head) = split /\n/, $head, 2;
+        $head .= '...';
+    } elsif (defined $head && $o->{length} > 100) {
+        $head .= '...';
+    }
+    $o->{name}.': '.join ' ',
+        $o->{filename} ? "filename=$o->{filename}" : (),
+        $o->{mime} ? "mime=$o->{mime}" : (),
+        $o->{charset} ? "charset=$o->{charset}" : (),
+        "length=$o->{length}",
+        defined $head ? "value=$head" : ();
+}
+
+1;
+__END__
+
+=head1 NAME
+
+FU::MultipartFormData - Parse multipart/form-data
+
+=head1 SYNOPSIS
+
+  my $fields = FU::MultipartFormData->parse($content_type_header, $request_body);
+
+  for my $f (@$fields) {
+      print "%s   %d\n", $f->name, $f->length;
+
+      $f->save('file.png') if $f->name eq 'image';
+  }
+
+=head1 DESCRIPTION
+
+This is a tiny module to parse an HTTP request body encoded as
+C<multipart/form-data>, which is typically used to handle file uploads.
+
+The entire request body is assumed to be in memory as a Perl string, but this
+module makes an attempt to avoid any further copies of data values.
+
+=head2 Parsing
+
+=over FU::MultipartFormData->parse($header, $body)
+
+Returns an array of field objects from the given C<$header>, which must be a
+valid value for the C<Content-Type> request header, and the given C<$body>,
+which must hold the request body as a byte string. An error is thrown if the
+header is not valid or parsing failed.
+
+This module is pretty lousy and does not fully comform to any HTTP standards,
+but it does happen to be able to parse POST data from any browser that I've
+tried.
+
+=back
+
+=head2 Field Object
+
+Each field is parsed into a field object that supports the following methods:
+
+=over
+
+=item name
+
+Returns the field name as a Perl Unicode string.
+
+=item filename
+
+Returns the filename as a Perl Unicode string, or C<undef> if no filename was
+provided.
+
+=item mime
+
+Returns the mime type extracted from the field's C<Content-Type> header, or
+C<undef> if none was present.
+
+=item charset
+
+Returns the charset extracted from the field's C<Content-Type> header, or
+C<undef> if none was present.
+
+=item length
+
+Returns the byte length of the field value.
+
+=item data
+
+Returns a copy of the field value as a byte string. You'll want to avoid using
+this on large fields.
+
+=item value
+
+Returns a copy of the field value as a Unicode string. Uses C<utf8_decode()>
+from L<FU::Util>, so also throws an error if the value contains control
+characters.
+
+=item substr($off, $len)
+
+Equivalent to calling C<substr()> on the string returned by C<data>, but avoids
+a copy of the entire field value.
+
+=item syswrite($fh)
+
+Write the field value to C<$fh> using Perl's C<syswrite()>, returns C<undef> on
+error or the number of bytes written on success.
+
+Can be used to write uploaded file data to a file or send it over a socket or
+pipe, without making a full in-memory copy of the data.
+
+=item save($fn)
+
+Save the field value to the file C<$fn>, throws an error on failure.
+
+=item describe
+
+Returns a human-readable string to describe this field. Mainly for debugging
+purposes, the exact format is subject to change.
+
+=back
+
+=head1 COPYRIGHT
+
+MIT.
+
+=head1 AUTHOR
+
+Yorhel <projects@yorhel.nl>