FU: Support multipart file uploads + some doc fixes
API is not super convenient and implementation is lousy, but uploading files is not a super common operation so that should be fine. At least it supports large files with only a single in-memory copy.
This commit is contained in:
parent
e5755ddd80
commit
17176738a0
6 changed files with 280 additions and 13 deletions
209
FU/MultipartFormData.pm
Normal file
209
FU/MultipartFormData.pm
Normal file
|
|
@ -0,0 +1,209 @@
|
|||
package FU::MultipartFormData;
|
||||
use v5.36;
|
||||
use Carp 'confess';
|
||||
use FU::Util 'utf8_decode';
|
||||
|
||||
sub _arg($d) { $d =~ s/^\s+//r =~ s/\s+$//r =~ s/^"(.+)"$/$1/r }
|
||||
|
||||
sub parse($pkg, $header, $data) {
|
||||
confess "Invalid multipart header '$header'"
|
||||
if $header !~ m{^multipart/form-data\s*;\s*boundary\s*=(.+)$};
|
||||
my $boundary = _arg $1;
|
||||
confess "Invalid multipart boundary '$boundary'" if $boundary !~ /^[\x21-\x7e]+$/;
|
||||
utf8::encode($boundary);
|
||||
|
||||
my @a;
|
||||
while ($data =~ m{--\Q$boundary\E(?:--\r\n|\r\n((?:.+\r\n)+)\r\n)}xg) {
|
||||
my $hdrs = $1;
|
||||
$a[$#a]{length} = $-[0] - 2 - $a[$#a]{start} if @a;
|
||||
if (!$hdrs) {
|
||||
confess "Trailing garbage" if pos $data != length $data;
|
||||
last;
|
||||
}
|
||||
|
||||
my $d = bless {
|
||||
data => $data,
|
||||
start => pos $data,
|
||||
}, $pkg;
|
||||
|
||||
confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data;(.+)/i;
|
||||
my $v = $1;
|
||||
confess "Missing 'name' parameter" if $v !~ /[;\s]name=([^[;\s]+)/;
|
||||
$d->{name} = utf8_decode _arg $1;
|
||||
$d->{filename} = utf8_decode _arg $1 if $v =~ /[;\s]filename=([^;\s]+)/;
|
||||
|
||||
if ($hdrs =~ /content-type:\s*([^;\s]+)(?:\s*;\s*charset=([^;\s]+))?/i) {
|
||||
$d->{mime} = utf8_decode _arg $1;
|
||||
$d->{charset} = utf8_decode _arg $2 if $2;
|
||||
}
|
||||
push @a, $d;
|
||||
}
|
||||
confess "Missing end-of-data marker" if @a && !defined $a[$#a]{length};
|
||||
\@a
|
||||
}
|
||||
|
||||
sub name { $_[0]{name} }
|
||||
sub filename { $_[0]{filename} }
|
||||
sub mime { $_[0]{mime} }
|
||||
sub charset { $_[0]{charset} }
|
||||
sub length { $_[0]{length} }
|
||||
|
||||
sub substr($o,$off,$len=undef) {
|
||||
$off += $o->{length} if $off < 0;
|
||||
$off = 0 if $off < 0;
|
||||
$off = $o->{length} if $off > $o->{length};
|
||||
|
||||
$len //= $o->{length} - $off;
|
||||
$len += $o->{length} - 1 if $len < 0;
|
||||
$len = 0 if $len < 0;
|
||||
$len = $o->{length} - $off if $len > $o->{length} - $off;
|
||||
|
||||
substr $o->{data}, $o->{start} + $off, $len;
|
||||
}
|
||||
|
||||
sub data { $_[0]->substr(0) }
|
||||
sub value { utf8_decode $_[0]->data }
|
||||
|
||||
sub syswrite($o, $fh) {
|
||||
my $off = $o->{start};
|
||||
my $end = $o->{start} + $o->{length};
|
||||
while ($off < $end) {
|
||||
my $r = syswrite $fh, $o->{data}, $end-$off, $off;
|
||||
return if !defined $r;
|
||||
$off += $r;
|
||||
}
|
||||
$o->{length};
|
||||
}
|
||||
|
||||
sub save($o, $fn) {
|
||||
open my $F, '>', $fn or confess "Error opening '$fn': $!";
|
||||
$o->syswrite($F) or confess "Error writing to '$fn': $!";
|
||||
}
|
||||
|
||||
sub describe($o) {
|
||||
my $head = eval { utf8_decode $o->substr(0, 100) };
|
||||
if (defined $head && $head =~ /\n/) {
|
||||
($head) = split /\n/, $head, 2;
|
||||
$head .= '...';
|
||||
} elsif (defined $head && $o->{length} > 100) {
|
||||
$head .= '...';
|
||||
}
|
||||
$o->{name}.': '.join ' ',
|
||||
$o->{filename} ? "filename=$o->{filename}" : (),
|
||||
$o->{mime} ? "mime=$o->{mime}" : (),
|
||||
$o->{charset} ? "charset=$o->{charset}" : (),
|
||||
"length=$o->{length}",
|
||||
defined $head ? "value=$head" : ();
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
FU::MultipartFormData - Parse multipart/form-data
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
my $fields = FU::MultipartFormData->parse($content_type_header, $request_body);
|
||||
|
||||
for my $f (@$fields) {
|
||||
print "%s %d\n", $f->name, $f->length;
|
||||
|
||||
$f->save('file.png') if $f->name eq 'image';
|
||||
}
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
This is a tiny module to parse an HTTP request body encoded as
|
||||
C<multipart/form-data>, which is typically used to handle file uploads.
|
||||
|
||||
The entire request body is assumed to be in memory as a Perl string, but this
|
||||
module makes an attempt to avoid any further copies of data values.
|
||||
|
||||
=head2 Parsing
|
||||
|
||||
=over FU::MultipartFormData->parse($header, $body)
|
||||
|
||||
Returns an array of field objects from the given C<$header>, which must be a
|
||||
valid value for the C<Content-Type> request header, and the given C<$body>,
|
||||
which must hold the request body as a byte string. An error is thrown if the
|
||||
header is not valid or parsing failed.
|
||||
|
||||
This module is pretty lousy and does not fully comform to any HTTP standards,
|
||||
but it does happen to be able to parse POST data from any browser that I've
|
||||
tried.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Field Object
|
||||
|
||||
Each field is parsed into a field object that supports the following methods:
|
||||
|
||||
=over
|
||||
|
||||
=item name
|
||||
|
||||
Returns the field name as a Perl Unicode string.
|
||||
|
||||
=item filename
|
||||
|
||||
Returns the filename as a Perl Unicode string, or C<undef> if no filename was
|
||||
provided.
|
||||
|
||||
=item mime
|
||||
|
||||
Returns the mime type extracted from the field's C<Content-Type> header, or
|
||||
C<undef> if none was present.
|
||||
|
||||
=item charset
|
||||
|
||||
Returns the charset extracted from the field's C<Content-Type> header, or
|
||||
C<undef> if none was present.
|
||||
|
||||
=item length
|
||||
|
||||
Returns the byte length of the field value.
|
||||
|
||||
=item data
|
||||
|
||||
Returns a copy of the field value as a byte string. You'll want to avoid using
|
||||
this on large fields.
|
||||
|
||||
=item value
|
||||
|
||||
Returns a copy of the field value as a Unicode string. Uses C<utf8_decode()>
|
||||
from L<FU::Util>, so also throws an error if the value contains control
|
||||
characters.
|
||||
|
||||
=item substr($off, $len)
|
||||
|
||||
Equivalent to calling C<substr()> on the string returned by C<data>, but avoids
|
||||
a copy of the entire field value.
|
||||
|
||||
=item syswrite($fh)
|
||||
|
||||
Write the field value to C<$fh> using Perl's C<syswrite()>, returns C<undef> on
|
||||
error or the number of bytes written on success.
|
||||
|
||||
Can be used to write uploaded file data to a file or send it over a socket or
|
||||
pipe, without making a full in-memory copy of the data.
|
||||
|
||||
=item save($fn)
|
||||
|
||||
Save the field value to the file C<$fn>, throws an error on failure.
|
||||
|
||||
=item describe
|
||||
|
||||
Returns a human-readable string to describe this field. Mainly for debugging
|
||||
purposes, the exact format is subject to change.
|
||||
|
||||
=back
|
||||
|
||||
=head1 COPYRIGHT
|
||||
|
||||
MIT.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Yorhel <projects@yorhel.nl>
|
||||
Loading…
Add table
Add a link
Reference in a new issue