FU: Some FastCGI fixes; FU::Util: utf8_decode & URI escaping
This commit is contained in:
parent
d9fba4e8d8
commit
90cfd66069
6 changed files with 146 additions and 33 deletions
39
FU.pm
39
FU.pm
|
|
@ -163,13 +163,6 @@ sub _monitor {
|
|||
}
|
||||
|
||||
|
||||
sub _decode_utf8 {
|
||||
return if !defined $_[0];
|
||||
fu->error(400, 'Invalid UTF-8 in request') if !utf8::decode($_[0]);
|
||||
# Disallow any control codes, except for x09 (tab), x0a (newline) and x0d (carriage return)
|
||||
fu->error(400, 'Invalid control character in request') if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/;
|
||||
}
|
||||
|
||||
our $hdrname_re = qr/[!#\$\%&'\*\+-\.^_`\|~0-9a-zA-Z]{1,127}/;
|
||||
our $method_re = qr/(?:GET|POST|DELETE|OPTIONS|PUT|PATCH|QUERY)/;
|
||||
|
||||
|
|
@ -181,8 +174,7 @@ sub _read_req_http($sock, $req) {
|
|||
fu->error(400, 'Client disconnect before request was read') if !defined $line;
|
||||
fu->error(400, 'Invalid request') if $line !~ /^($method_re)\s+(\S+)\s+HTTP\/1\.[01]\r\n$/;
|
||||
$req->{method} = $1;
|
||||
($req->{path}, $req->{qs}) = split /\?/, $2 =~ s{^https?://[^/]+/}{/}r, 2;
|
||||
$req->{path} =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
|
||||
$req->{path} = $2 =~ s{^https?://[^/]+/}{/}r;
|
||||
|
||||
while (1) {
|
||||
# Turns out header line folding has been officially deprecated, so I'm
|
||||
|
|
@ -225,7 +217,7 @@ sub _read_req($c) {
|
|||
: $r == -2 ? "I/O error while reading from FastCGI socket\n"
|
||||
: $r == -3 ? "FastCGI protocol error\n"
|
||||
: $r == -4 ? "Too long FastCGI parameter\n"
|
||||
: $r == -5 ? "Too long request body\n" : undef;
|
||||
: $r == -5 ? "Too long request body\n" : undef if $r != -7;
|
||||
delete $c->{fcgi_obj};
|
||||
fu->error(-1);
|
||||
}
|
||||
|
|
@ -236,7 +228,12 @@ sub _read_req($c) {
|
|||
|
||||
# The HTTP reader above and the FastCGI XS reader operate on bytes.
|
||||
# Decode these into Unicode strings and check for special characters.
|
||||
_decode_utf8 $_ for ($REQ->{path}, $REQ->{qs}, values $REQ->{hdr}->%*);
|
||||
eval { FU::Util::utf8_decode($_); 1} || fu->err(400, $@)
|
||||
for ($REQ->{path}, $REQ->{qs}, values $REQ->{hdr}->%*);
|
||||
|
||||
($REQ->{path}, my $qs) = split /\?/, $REQ->{path}//'', 2;
|
||||
$REQ->{qs} //= $qs;
|
||||
$REQ->{path} = FU::Util::uri_unescape($REQ->{path});
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -418,7 +415,7 @@ sub _spawn {
|
|||
|
||||
if (!keys %c) {
|
||||
%c = (
|
||||
http => $ENV{FU_HTTP} // '127.0.0.1:3000',
|
||||
http => $ENV{FU_HTTP},
|
||||
fcgi => $ENV{FU_FCGI},
|
||||
proc => $ENV{FU_PROC} // 1,
|
||||
monitor => $ENV{FU_MONITOR} // 0,
|
||||
|
|
@ -447,8 +444,17 @@ sub _spawn {
|
|||
my $need_supervisor = !$c{supervisor_sock} && !$c{client_sock} && ($c{proc} > 1 || $c{monitor} || $c{max_reqs});
|
||||
return if !@_ && !$need_supervisor;
|
||||
|
||||
if (!$c{http} && !$c{fcgi} && !$c{listen_sock}) {
|
||||
# When spawned under FastCGI, stdin is our listen socket
|
||||
local $_ = getpeername \*STDIN;
|
||||
if ($!{ENOTCONN}) {
|
||||
$c{listen_sock} = IO::Socket->new_from_fd(0, 'r');
|
||||
$c{listen_proto} = 'fcgi';
|
||||
}
|
||||
};
|
||||
$c{http} //= '127.0.0.1:3000';
|
||||
|
||||
if (!$c{listen_sock}) {
|
||||
# TODO: check if stdin is a fastcgi sock
|
||||
$c{listen_proto} //= $c{fcgi} ? 'fcgi' : 'http';
|
||||
my $addr = $c{$c{listen_proto}};
|
||||
$c{listen_sock} = IO::Socket->new(
|
||||
|
|
@ -520,6 +526,7 @@ sub method { $FU::REQ->{method} }
|
|||
sub header($, $h) { $FU::REQ->{hdr}{ lc $h } }
|
||||
sub headers { $FU::REQ->{hdr} }
|
||||
sub ip { $FU::REQ->{ip} }
|
||||
sub query { $FU::REQ->{qs} } # TODO: parse & validate
|
||||
|
||||
|
||||
|
||||
|
|
@ -773,7 +780,7 @@ this option is set, it takes precedence over the HTTP option.
|
|||
Nginx and Apache will, in their default configuration, use a separate
|
||||
connection per request. If you have a more esoteric setup, you should probably
|
||||
be aware of the following: this implementation does not support multiplexing or
|
||||
pipelining. It does support keepalive, but this come with a few caveats:
|
||||
pipelining. It does support keepalive, but this comes with a few caveats:
|
||||
|
||||
=over
|
||||
|
||||
|
|
@ -786,7 +793,7 @@ I<after> each request rather than before, so clients may get a response from
|
|||
stale code.
|
||||
|
||||
=item * When worker processes shut down, either through C<--max-reqs> or in
|
||||
response to a signal, there is the possibility that an incoming request on an
|
||||
response to a signal, there is a possibility that an incoming request on an
|
||||
existing connection gets interrupted.
|
||||
|
||||
=back
|
||||
|
|
@ -822,6 +829,8 @@ Set the initial value for C<FU::debug>.
|
|||
|
||||
=item LISTEN_FD=num
|
||||
|
||||
=item LISTEN_PROTO=http/fcgi
|
||||
|
||||
Listen for incoming connections on the given file descriptor instead of
|
||||
creating a new listen socket. This is mainly useful if you are using an
|
||||
external process manager.
|
||||
|
|
|
|||
5
FU.xs
5
FU.xs
|
|
@ -116,7 +116,7 @@ void new(int fd, int maxproc)
|
|||
void read_req(fufcgi *ctx, SV *headers, SV *params)
|
||||
CODE:
|
||||
ST(0) = sv_2mortal(newSViv(fufcgi_read_req(aTHX_ ctx, headers, params)));
|
||||
ctx->off = 8;
|
||||
ctx->off = ctx->len = 0;
|
||||
|
||||
void keepalive(fufcgi *ctx)
|
||||
CODE:
|
||||
|
|
@ -130,8 +130,7 @@ void print(fufcgi *ctx, SV *sv)
|
|||
|
||||
void flush(fufcgi *ctx)
|
||||
CODE:
|
||||
fufcgi_flush(ctx);
|
||||
ctx->off = ctx->len = ctx->reqid = 0;
|
||||
fufcgi_done(ctx);
|
||||
|
||||
void DESTROY(fufcgi *ctx)
|
||||
CODE:
|
||||
|
|
|
|||
52
FU/Util.pm
52
FU/Util.pm
|
|
@ -2,13 +2,35 @@ package FU::Util 0.1;
|
|||
|
||||
use v5.36;
|
||||
use FU::XS;
|
||||
use Carp 'confess';
|
||||
use Exporter 'import';
|
||||
|
||||
our @EXPORT_OK = qw/
|
||||
json_format json_parse
|
||||
utf8_decode uri_escape uri_unescape
|
||||
fdpass_send fdpass_recv
|
||||
/;
|
||||
|
||||
sub utf8_decode :prototype($) {
|
||||
return if !defined $_[0];
|
||||
confess 'Invalid UTF-8' if !utf8::decode($_[0]);
|
||||
confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/;
|
||||
$_[0]
|
||||
}
|
||||
|
||||
sub uri_escape :prototype($) {
|
||||
utf8::encode(local $_ = shift);
|
||||
s/([^A-Za-z0-9._~-])/sprintf '%%%02X', ord $1/eg;
|
||||
$_;
|
||||
}
|
||||
|
||||
sub uri_unescape :prototype($) {
|
||||
return if !defined $_[0];
|
||||
utf8::encode(local $_ = shift);
|
||||
s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
|
||||
utf8_decode $_;
|
||||
}
|
||||
|
||||
1;
|
||||
__END__
|
||||
|
||||
|
|
@ -155,6 +177,36 @@ L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives.
|
|||
L<JSON::SIMD> and L<Mojo::JSON> also look like good and maintained candidates.)
|
||||
|
||||
|
||||
=head2 String Utility Functions
|
||||
|
||||
=over
|
||||
|
||||
=item utf8_decode($bytes)
|
||||
|
||||
Convert a (perl-UTF-8 encoded) byte string into a sanitized perl Unicode
|
||||
string. The conversion is performed in-place, so the C<$bytes> argument is
|
||||
turned into a Unicode string. Returns the same string for convenience.
|
||||
|
||||
This function throws an error if the input is not valid UTF-8 or if it contains
|
||||
ASCII control characters - that is, any character between C<0x00> and C<0x1f>
|
||||
except for tab, newline and carriage return.
|
||||
|
||||
(This is a tiny wrapper around C<utf8::decode()> with some extra checks)
|
||||
|
||||
=item uri_escape($string)
|
||||
|
||||
Takes an Unicode string and returns a percent-encoded ASCII string, suitable
|
||||
for use in a query parameter.
|
||||
|
||||
=item uri_unescape($string)
|
||||
|
||||
Takes an Unicode string potentially containing percent-encoding and returns a
|
||||
decoded Unicode string. Also checks for ASCII control characters as per
|
||||
C<utf8_decode()>.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head2 File Descriptor Passing
|
||||
|
||||
UNIX sockets (see L<IO::Socket::UNIX>) have the fancy property of letting you
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ Things that may or may not happen:
|
|||
- FU - The website framework, taking inspiration from TUWF.
|
||||
- FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy.
|
||||
- FU::Log - Basic logger.
|
||||
- FU::Util additions: `uri_escape`, `VNDB::Util::query_encode`, `scrypt`, `urandom`.
|
||||
- FU::Util additions: `VNDB::Util::query_encode`?
|
||||
- FU::Validate - TUWF::Validate & normalization with some improvements.
|
||||
- FU::XML - TUWF::XMLXS with some improvements.
|
||||
- FU::Mailer - Simple sendmail wrapper
|
||||
|
|
|
|||
42
c/fcgi.c
42
c/fcgi.c
|
|
@ -11,12 +11,15 @@
|
|||
#define FCGI_UNKNOWN_TYPE 11
|
||||
|
||||
#define FUFE_OK 0
|
||||
#define FUFE_EOF -1 /* protocol-level EOF */
|
||||
#define FUFE_EOF -1 /* unexpected protocol-level EOF */
|
||||
#define FUFE_IO -2
|
||||
#define FUFE_PROTO -3
|
||||
#define FUFE_PLEN -4
|
||||
#define FUFE_CLEN -5
|
||||
#define FUFE_ABORT -6 /* explicit abort or client-level EOF */
|
||||
#define FUFE_NOREQ -7 /* protocol-level EOF before we received anything */
|
||||
|
||||
#define FUFCGI_MAX_DATA 65535
|
||||
|
||||
typedef struct {
|
||||
SV *self;
|
||||
|
|
@ -29,7 +32,7 @@ typedef struct {
|
|||
HV *params;
|
||||
|
||||
/* Single buffer for reading & writing, we only do one thing at a time */
|
||||
char buf[8 + 65536 + 256]; /* fits a maximum-length fcgi record */
|
||||
char buf[8 + FUFCGI_MAX_DATA + 255]; /* fits a maximum-length fcgi record */
|
||||
int len; /* total number of bytes in the buffer */
|
||||
int off; /* number of bytes consumed */
|
||||
} fufcgi;
|
||||
|
|
@ -154,7 +157,7 @@ static int fufcgi_read_record(fufcgi *ctx, fufcgi_rec *rec) {
|
|||
rec->type = ctx->buf[ctx->off+1];
|
||||
rec->id = fu_frombeU(16, ctx->buf+ctx->off+2);
|
||||
rec->len = fu_frombeU(16, ctx->buf+ctx->off+4);
|
||||
int pad = ctx->buf[ctx->off+6];
|
||||
int pad = (unsigned char)ctx->buf[ctx->off+6];
|
||||
ctx->off += 8;
|
||||
|
||||
if ((r = fufcgi_fill(ctx, rec->len + pad)) != FUFE_OK) return r;
|
||||
|
|
@ -225,7 +228,7 @@ static int fufcgi_read_req_record(fufcgi *ctx, fufcgi_rec *rec) {
|
|||
int r;
|
||||
char tmp[128]; /* Large enough for a FCGI_GET_VALUES_RESULT */
|
||||
while (1) {
|
||||
if ((r = fufcgi_read_record(ctx, rec)) != FUFE_OK) return r;
|
||||
if ((r = fufcgi_read_record(ctx, rec)) != FUFE_OK) return r == FUFE_EOF && ctx->len == 0 ? FUFE_NOREQ : r;
|
||||
|
||||
switch (rec->type) {
|
||||
case FCGI_PARAMS:
|
||||
|
|
@ -405,23 +408,40 @@ static int fufcgi_read_req(pTHX_ fufcgi *ctx, SV *headers, SV *params) {
|
|||
|
||||
static void fufcgi_flush(fufcgi *ctx) {
|
||||
fufcgi_rec hdr;
|
||||
if (ctx->off > 8) {
|
||||
hdr.len = ctx->off - 8;
|
||||
if (ctx->len > 0) {
|
||||
hdr.len = ctx->len;
|
||||
hdr.type = FCGI_STDOUT;
|
||||
hdr.id = ctx->reqid;
|
||||
fufcgi_write_record(ctx, &hdr, ctx->buf);
|
||||
ctx->off = 8;
|
||||
ctx->len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void fufcgi_print(fufcgi *ctx, const char *buf, int len) {
|
||||
int r;
|
||||
while (len > 0) {
|
||||
r = len > 65535 - ctx->off ? 65535 - ctx->off : len;
|
||||
memcpy(ctx->buf+ctx->off, buf, r);
|
||||
ctx->off += r;
|
||||
r = len > FUFCGI_MAX_DATA - ctx->len ? FUFCGI_MAX_DATA - ctx->len : len;
|
||||
memcpy(ctx->buf+8+ctx->len, buf, r);
|
||||
ctx->len += r;
|
||||
len -= r;
|
||||
buf += r;
|
||||
if (ctx->off >= 65535) fufcgi_flush(ctx);
|
||||
if (ctx->len >= FUFCGI_MAX_DATA) fufcgi_flush(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
static void fufcgi_done(fufcgi *ctx) {
|
||||
fufcgi_rec hdr;
|
||||
fufcgi_flush(ctx);
|
||||
|
||||
hdr.len = 0;
|
||||
hdr.type = FCGI_STDOUT;
|
||||
hdr.id = ctx->reqid;
|
||||
fufcgi_write_record(ctx, &hdr, ctx->buf);
|
||||
|
||||
memcpy(ctx->buf+8, "\0\0\0\0\0\0\0\0", 8); /* FCGI_REQUEST_COMPLETE */
|
||||
hdr.type = FCGI_END_REQUEST;
|
||||
hdr.len = 8;
|
||||
fufcgi_write_record(ctx, &hdr, ctx->buf);
|
||||
|
||||
ctx->reqid = ctx->len = ctx->off = 0;
|
||||
}
|
||||
|
|
|
|||
39
t/fcgi.t
39
t/fcgi.t
|
|
@ -10,8 +10,8 @@ sub start {
|
|||
$f = FU::fcgi::new(fileno $local, 123);
|
||||
}
|
||||
|
||||
sub record($id, $type, $data) {
|
||||
my $pad = rand > 0.5 ? int rand(50) : 0;
|
||||
sub record($id, $type, $data, $pad=undef) {
|
||||
$pad //= rand > 0.5 ? int rand(50) : 0;
|
||||
my $msg = pack('CCnnCC', 1, $type, $id, length($data), $pad, 0) . $data . ("\0"x$pad);
|
||||
is $remote->syswrite($msg, length($msg)), length($msg);
|
||||
}
|
||||
|
|
@ -31,13 +31,19 @@ sub isrec($hdr, $par, $code=0) {
|
|||
}
|
||||
|
||||
sub isrecv($data) {
|
||||
is $remote->sysread(my $buf, length $data), length $data;
|
||||
my($buf, $off) = ('', 0);
|
||||
$off += $remote->sysread($buf, length($data) - $off, $off) while $off < length $data;
|
||||
is $buf, $data;
|
||||
}
|
||||
|
||||
|
||||
start;
|
||||
$remote->close;
|
||||
iserr -7;
|
||||
|
||||
start;
|
||||
begin;
|
||||
$remote->close;
|
||||
iserr -1;
|
||||
|
||||
start;
|
||||
|
|
@ -96,6 +102,8 @@ $f->print("Status: 200\r\n");
|
|||
$f->print("Something else");
|
||||
$f->flush;
|
||||
isrecv "\1\6\0\5\0\x1b\0\0"."Status: 200\r\nSomething else";
|
||||
isrecv "\1\6\0\5\0\0\0\0";
|
||||
isrecv "\1\3\0\5\0\x08\0\0"."\0\0\0\0\0\0\0\0";
|
||||
# Same connection:
|
||||
begin;
|
||||
record 1, 4, "\x00\x00\x06\x00HTTP_x\x00\x00";
|
||||
|
|
@ -159,4 +167,29 @@ record 1, 4, "\x0c\x05CONTENT_TYPEsomet";
|
|||
record 1, 2, "";
|
||||
isrec {'content-type','somet'}, {body => ''}, -6;
|
||||
|
||||
start;
|
||||
begin;
|
||||
record 1, 4, "\x0e\x05CONTENT_LENGTH65536";
|
||||
record 1, 4, '';
|
||||
if (!fork) {
|
||||
record 1, 5, 'A'x65535, 255;
|
||||
record 1, 5, 'B', 255;
|
||||
record 1, 5, '';
|
||||
exit;
|
||||
}
|
||||
isrec {'content-length',65536}, {body => ('A'x65535) . 'B'};
|
||||
if (!fork) {
|
||||
$f->print('a');
|
||||
$f->print('b');
|
||||
$f->print('c');
|
||||
$f->print('D' x 65536);
|
||||
$f->print('e');
|
||||
$f->flush;
|
||||
exit;
|
||||
}
|
||||
isrecv "\1\6\0\1\xff\xff\0\0".'abc'.('D'x65532);
|
||||
isrecv "\1\6\0\1\0\5\0\0".'DDDDe';
|
||||
isrecv "\1\6\0\1\0\0\0\0";
|
||||
isrecv "\1\3\0\1\0\x08\0\0"."\0\0\0\0\0\0\0\0";
|
||||
|
||||
done_testing;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue