212 lines
5.7 KiB
Perl
212 lines
5.7 KiB
Perl
package FU::MultipartFormData 1.1;
|
|
use v5.36;
|
|
use Carp 'confess';
|
|
use FU::Util 'utf8_decode';
|
|
|
|
sub _arg($d) { $d =~ s{^"(.+)"$}{$1 =~ s/\\([\\"])/$1/rg}er }
|
|
|
|
sub parse($pkg, $header, $data) {
|
|
confess "Invalid multipart header '$header'"
|
|
if $header !~ m{^multipart/form-data\s*;\s*boundary\s*=(.+)$};
|
|
my $boundary = _arg $1;
|
|
confess "Invalid multipart boundary '$boundary'" if $boundary !~ /^[\x21-\x7e]+$/;
|
|
utf8::encode($boundary);
|
|
|
|
my @a;
|
|
while ($data =~ m{--\Q$boundary\E(?:--\r\n|\r\n((?:.+\r\n)+)\r\n)}xg) {
|
|
my $hdrs = $1;
|
|
$a[$#a]{length} = $-[0] - 2 - $a[$#a]{start} if @a;
|
|
if (!$hdrs) {
|
|
confess "Trailing garbage" if pos $data != length $data;
|
|
last;
|
|
}
|
|
|
|
my $d = bless {
|
|
data => $data,
|
|
start => pos $data,
|
|
}, $pkg;
|
|
|
|
confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data(.+)/i;
|
|
my $v = $1;
|
|
my $pvalue = qr/("(?:\\[\\"]|[^\\"\r\n]+)*"|[^\s;"]*)/;
|
|
confess "Missing 'name' parameter" if $v !~ /;\s*name\s*=\s*$pvalue/;
|
|
$d->{name} = utf8_decode _arg $1;
|
|
$d->{filename} = utf8_decode _arg $1 if $v =~ /;\s*filename\s*=\s*$pvalue/;
|
|
|
|
if ($hdrs =~ /content-type:\s*$pvalue(?:\s*;\s*charset\s*=\s*$pvalue)?/i) {
|
|
$d->{mime} = utf8_decode _arg $1;
|
|
$d->{charset} = utf8_decode _arg $2 if $2;
|
|
}
|
|
push @a, $d;
|
|
}
|
|
confess "Missing end-of-data marker" if @a && !defined $a[$#a]{length};
|
|
\@a
|
|
}
|
|
|
|
sub name { $_[0]{name} }
|
|
sub filename { $_[0]{filename} }
|
|
sub mime { $_[0]{mime} }
|
|
sub charset { $_[0]{charset} }
|
|
sub length { $_[0]{length} }
|
|
|
|
sub substr($o,$off,$len=undef) {
|
|
$off += $o->{length} if $off < 0;
|
|
$off = 0 if $off < 0;
|
|
$off = $o->{length} if $off > $o->{length};
|
|
|
|
$len //= $o->{length} - $off;
|
|
$len += $o->{length} - 1 if $len < 0;
|
|
$len = 0 if $len < 0;
|
|
$len = $o->{length} - $off if $len > $o->{length} - $off;
|
|
|
|
substr $o->{data}, $o->{start} + $off, $len;
|
|
}
|
|
|
|
sub data { $_[0]->substr(0) }
|
|
sub value { utf8_decode $_[0]->data }
|
|
|
|
sub syswrite($o, $fh) {
|
|
my $off = $o->{start};
|
|
my $end = $o->{start} + $o->{length};
|
|
while ($off < $end) {
|
|
my $r = syswrite $fh, $o->{data}, $end-$off, $off;
|
|
return if !defined $r;
|
|
$off += $r;
|
|
}
|
|
$o->{length};
|
|
}
|
|
|
|
sub save($o, $fn) {
|
|
open my $F, '>', $fn or confess "Error opening '$fn': $!";
|
|
defined $o->syswrite($F) or confess "Error writing to '$fn': $!";
|
|
}
|
|
|
|
sub describe($o) {
|
|
my $head = eval { utf8_decode $o->substr(0, 100) };
|
|
if (defined $head && $head =~ /\n/) {
|
|
($head) = split /\n/, $head, 2;
|
|
$head .= '...';
|
|
} elsif (defined $head && $o->{length} > 100) {
|
|
$head .= '...';
|
|
}
|
|
$o->{name}.': '.join ' ',
|
|
$o->{filename} ? "filename=$o->{filename}" : (),
|
|
$o->{mime} ? "mime=$o->{mime}" : (),
|
|
$o->{charset} ? "charset=$o->{charset}" : (),
|
|
"length=$o->{length}",
|
|
defined $head ? "value=$head" : ();
|
|
}
|
|
|
|
1;
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
FU::MultipartFormData - Parse multipart/form-data
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
my $fields = FU::MultipartFormData->parse($content_type_header, $request_body);
|
|
|
|
for my $f (@$fields) {
|
|
print "%s %d\n", $f->name, $f->length;
|
|
|
|
$f->save('file.png') if $f->name eq 'image';
|
|
}
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This is a tiny module to parse an HTTP request body encoded as
|
|
C<multipart/form-data>, which is typically used to handle file uploads.
|
|
|
|
The entire request body is assumed to be in memory as a Perl string, but this
|
|
module makes an attempt to avoid any further copies of data values.
|
|
|
|
=head1 Parsing
|
|
|
|
=over
|
|
|
|
=item FU::MultipartFormData->parse($header, $body)
|
|
|
|
Returns an array of field objects from the given C<$header>, which must be a
|
|
valid value for the C<Content-Type> request header, and the given C<$body>,
|
|
which must hold the request body as a byte string. An error is thrown if the
|
|
header is not valid or parsing failed.
|
|
|
|
This module is pretty lousy and does not fully comform to any HTTP standards,
|
|
but it does happen to be able to parse POST data from any browser that I've
|
|
tried.
|
|
|
|
=back
|
|
|
|
=head1 Field Object
|
|
|
|
Each field is parsed into a field object that supports the following methods:
|
|
|
|
=over
|
|
|
|
=item name
|
|
|
|
Returns the field name as a Perl Unicode string.
|
|
|
|
=item filename
|
|
|
|
Returns the filename as a Perl Unicode string, or C<undef> if no filename was
|
|
provided.
|
|
|
|
=item mime
|
|
|
|
Returns the mime type extracted from the field's C<Content-Type> header, or
|
|
C<undef> if none was present.
|
|
|
|
=item charset
|
|
|
|
Returns the charset extracted from the field's C<Content-Type> header, or
|
|
C<undef> if none was present.
|
|
|
|
=item length
|
|
|
|
Returns the byte length of the field value.
|
|
|
|
=item data
|
|
|
|
Returns a copy of the field value as a byte string. You'll want to avoid using
|
|
this on large fields.
|
|
|
|
=item value
|
|
|
|
Returns a copy of the field value as a Unicode string. Uses C<utf8_decode()>
|
|
from L<FU::Util>, so also throws an error if the value contains control
|
|
characters.
|
|
|
|
=item substr($off, $len)
|
|
|
|
Equivalent to calling C<substr()> on the string returned by C<data>, but avoids
|
|
a copy of the entire field value.
|
|
|
|
=item syswrite($fh)
|
|
|
|
Write the field value to C<$fh> using Perl's C<syswrite()>, returns C<undef> on
|
|
error or the number of bytes written on success.
|
|
|
|
Can be used to write uploaded file data to a file or send it over a socket or
|
|
pipe, without making a full in-memory copy of the data.
|
|
|
|
=item save($fn)
|
|
|
|
Save the field value to the file C<$fn>, throws an error on failure.
|
|
|
|
=item describe
|
|
|
|
Returns a human-readable string to describe this field. Mainly for debugging
|
|
purposes, the exact format is subject to change.
|
|
|
|
=back
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
MIT.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Yorhel <projects@yorhel.nl>
|