FU: Some FastCGI fixes; FU::Util: utf8_decode & URI escaping

This commit is contained in:
Yorhel 2025-02-18 10:27:58 +01:00
parent d9fba4e8d8
commit 90cfd66069
6 changed files with 146 additions and 33 deletions

39
FU.pm
View file

@ -163,13 +163,6 @@ sub _monitor {
}
sub _decode_utf8 {
return if !defined $_[0];
fu->error(400, 'Invalid UTF-8 in request') if !utf8::decode($_[0]);
# Disallow any control codes, except for x09 (tab), x0a (newline) and x0d (carriage return)
fu->error(400, 'Invalid control character in request') if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/;
}
our $hdrname_re = qr/[!#\$\%&'\*\+-\.^_`\|~0-9a-zA-Z]{1,127}/;
our $method_re = qr/(?:GET|POST|DELETE|OPTIONS|PUT|PATCH|QUERY)/;
@ -181,8 +174,7 @@ sub _read_req_http($sock, $req) {
fu->error(400, 'Client disconnect before request was read') if !defined $line;
fu->error(400, 'Invalid request') if $line !~ /^($method_re)\s+(\S+)\s+HTTP\/1\.[01]\r\n$/;
$req->{method} = $1;
($req->{path}, $req->{qs}) = split /\?/, $2 =~ s{^https?://[^/]+/}{/}r, 2;
$req->{path} =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
$req->{path} = $2 =~ s{^https?://[^/]+/}{/}r;
while (1) {
# Turns out header line folding has been officially deprecated, so I'm
@ -225,7 +217,7 @@ sub _read_req($c) {
: $r == -2 ? "I/O error while reading from FastCGI socket\n"
: $r == -3 ? "FastCGI protocol error\n"
: $r == -4 ? "Too long FastCGI parameter\n"
: $r == -5 ? "Too long request body\n" : undef;
: $r == -5 ? "Too long request body\n" : undef if $r != -7;
delete $c->{fcgi_obj};
fu->error(-1);
}
@ -236,7 +228,12 @@ sub _read_req($c) {
# The HTTP reader above and the FastCGI XS reader operate on bytes.
# Decode these into Unicode strings and check for special characters.
_decode_utf8 $_ for ($REQ->{path}, $REQ->{qs}, values $REQ->{hdr}->%*);
eval { FU::Util::utf8_decode($_); 1} || fu->err(400, $@)
for ($REQ->{path}, $REQ->{qs}, values $REQ->{hdr}->%*);
($REQ->{path}, my $qs) = split /\?/, $REQ->{path}//'', 2;
$REQ->{qs} //= $qs;
$REQ->{path} = FU::Util::uri_unescape($REQ->{path});
}
@ -418,7 +415,7 @@ sub _spawn {
if (!keys %c) {
%c = (
http => $ENV{FU_HTTP} // '127.0.0.1:3000',
http => $ENV{FU_HTTP},
fcgi => $ENV{FU_FCGI},
proc => $ENV{FU_PROC} // 1,
monitor => $ENV{FU_MONITOR} // 0,
@ -447,8 +444,17 @@ sub _spawn {
my $need_supervisor = !$c{supervisor_sock} && !$c{client_sock} && ($c{proc} > 1 || $c{monitor} || $c{max_reqs});
return if !@_ && !$need_supervisor;
if (!$c{http} && !$c{fcgi} && !$c{listen_sock}) {
# When spawned under FastCGI, stdin is our listen socket
local $_ = getpeername \*STDIN;
if ($!{ENOTCONN}) {
$c{listen_sock} = IO::Socket->new_from_fd(0, 'r');
$c{listen_proto} = 'fcgi';
}
};
$c{http} //= '127.0.0.1:3000';
if (!$c{listen_sock}) {
# TODO: check if stdin is a fastcgi sock
$c{listen_proto} //= $c{fcgi} ? 'fcgi' : 'http';
my $addr = $c{$c{listen_proto}};
$c{listen_sock} = IO::Socket->new(
@ -520,6 +526,7 @@ sub method { $FU::REQ->{method} }
sub header($, $h) { $FU::REQ->{hdr}{ lc $h } }
sub headers { $FU::REQ->{hdr} }
sub ip { $FU::REQ->{ip} }
sub query { $FU::REQ->{qs} } # TODO: parse & validate
@ -773,7 +780,7 @@ this option is set, it takes precedence over the HTTP option.
Nginx and Apache will, in their default configuration, use a separate
connection per request. If you have a more esoteric setup, you should probably
be aware of the following: this implementation does not support multiplexing or
pipelining. It does support keepalive, but this come with a few caveats:
pipelining. It does support keepalive, but this comes with a few caveats:
=over
@ -786,7 +793,7 @@ I<after> each request rather than before, so clients may get a response from
stale code.
=item * When worker processes shut down, either through C<--max-reqs> or in
response to a signal, there is the possibility that an incoming request on an
response to a signal, there is a possibility that an incoming request on an
existing connection gets interrupted.
=back
@ -822,6 +829,8 @@ Set the initial value for C<FU::debug>.
=item LISTEN_FD=num
=item LISTEN_PROTO=http/fcgi
Listen for incoming connections on the given file descriptor instead of
creating a new listen socket. This is mainly useful if you are using an
external process manager.

5
FU.xs
View file

@ -116,7 +116,7 @@ void new(int fd, int maxproc)
void read_req(fufcgi *ctx, SV *headers, SV *params)
CODE:
ST(0) = sv_2mortal(newSViv(fufcgi_read_req(aTHX_ ctx, headers, params)));
ctx->off = 8;
ctx->off = ctx->len = 0;
void keepalive(fufcgi *ctx)
CODE:
@ -130,8 +130,7 @@ void print(fufcgi *ctx, SV *sv)
void flush(fufcgi *ctx)
CODE:
fufcgi_flush(ctx);
ctx->off = ctx->len = ctx->reqid = 0;
fufcgi_done(ctx);
void DESTROY(fufcgi *ctx)
CODE:

View file

@ -2,13 +2,35 @@ package FU::Util 0.1;
use v5.36;
use FU::XS;
use Carp 'confess';
use Exporter 'import';
our @EXPORT_OK = qw/
json_format json_parse
utf8_decode uri_escape uri_unescape
fdpass_send fdpass_recv
/;
sub utf8_decode :prototype($) {
return if !defined $_[0];
confess 'Invalid UTF-8' if !utf8::decode($_[0]);
confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/;
$_[0]
}
sub uri_escape :prototype($) {
utf8::encode(local $_ = shift);
s/([^A-Za-z0-9._~-])/sprintf '%%%02X', ord $1/eg;
$_;
}
sub uri_unescape :prototype($) {
return if !defined $_[0];
utf8::encode(local $_ = shift);
s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
utf8_decode $_;
}
1;
__END__
@ -155,6 +177,36 @@ L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives.
L<JSON::SIMD> and L<Mojo::JSON> also look like good and maintained candidates.)
=head2 String Utility Functions
=over
=item utf8_decode($bytes)
Convert a (perl-UTF-8 encoded) byte string into a sanitized perl Unicode
string. The conversion is performed in-place, so the C<$bytes> argument is
turned into a Unicode string. Returns the same string for convenience.
This function throws an error if the input is not valid UTF-8 or if it contains
ASCII control characters - that is, any character between C<0x00> and C<0x1f>
except for tab, newline and carriage return.
(This is a tiny wrapper around C<utf8::decode()> with some extra checks)
=item uri_escape($string)
Takes an Unicode string and returns a percent-encoded ASCII string, suitable
for use in a query parameter.
=item uri_unescape($string)
Takes an Unicode string potentially containing percent-encoding and returns a
decoded Unicode string. Also checks for ASCII control characters as per
C<utf8_decode()>.
=back
=head2 File Descriptor Passing
UNIX sockets (see L<IO::Socket::UNIX>) have the fancy property of letting you

View file

@ -19,7 +19,7 @@ Things that may or may not happen:
- FU - The website framework, taking inspiration from TUWF.
- FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy.
- FU::Log - Basic logger.
- FU::Util additions: `uri_escape`, `VNDB::Util::query_encode`, `scrypt`, `urandom`.
- FU::Util additions: `VNDB::Util::query_encode`?
- FU::Validate - TUWF::Validate & normalization with some improvements.
- FU::XML - TUWF::XMLXS with some improvements.
- FU::Mailer - Simple sendmail wrapper

View file

@ -11,12 +11,15 @@
#define FCGI_UNKNOWN_TYPE 11
#define FUFE_OK 0
#define FUFE_EOF -1 /* protocol-level EOF */
#define FUFE_EOF -1 /* unexpected protocol-level EOF */
#define FUFE_IO -2
#define FUFE_PROTO -3
#define FUFE_PLEN -4
#define FUFE_CLEN -5
#define FUFE_ABORT -6 /* explicit abort or client-level EOF */
#define FUFE_NOREQ -7 /* protocol-level EOF before we received anything */
#define FUFCGI_MAX_DATA 65535
typedef struct {
SV *self;
@ -29,7 +32,7 @@ typedef struct {
HV *params;
/* Single buffer for reading & writing, we only do one thing at a time */
char buf[8 + 65536 + 256]; /* fits a maximum-length fcgi record */
char buf[8 + FUFCGI_MAX_DATA + 255]; /* fits a maximum-length fcgi record */
int len; /* total number of bytes in the buffer */
int off; /* number of bytes consumed */
} fufcgi;
@ -154,7 +157,7 @@ static int fufcgi_read_record(fufcgi *ctx, fufcgi_rec *rec) {
rec->type = ctx->buf[ctx->off+1];
rec->id = fu_frombeU(16, ctx->buf+ctx->off+2);
rec->len = fu_frombeU(16, ctx->buf+ctx->off+4);
int pad = ctx->buf[ctx->off+6];
int pad = (unsigned char)ctx->buf[ctx->off+6];
ctx->off += 8;
if ((r = fufcgi_fill(ctx, rec->len + pad)) != FUFE_OK) return r;
@ -225,7 +228,7 @@ static int fufcgi_read_req_record(fufcgi *ctx, fufcgi_rec *rec) {
int r;
char tmp[128]; /* Large enough for a FCGI_GET_VALUES_RESULT */
while (1) {
if ((r = fufcgi_read_record(ctx, rec)) != FUFE_OK) return r;
if ((r = fufcgi_read_record(ctx, rec)) != FUFE_OK) return r == FUFE_EOF && ctx->len == 0 ? FUFE_NOREQ : r;
switch (rec->type) {
case FCGI_PARAMS:
@ -405,23 +408,40 @@ static int fufcgi_read_req(pTHX_ fufcgi *ctx, SV *headers, SV *params) {
static void fufcgi_flush(fufcgi *ctx) {
fufcgi_rec hdr;
if (ctx->off > 8) {
hdr.len = ctx->off - 8;
if (ctx->len > 0) {
hdr.len = ctx->len;
hdr.type = FCGI_STDOUT;
hdr.id = ctx->reqid;
fufcgi_write_record(ctx, &hdr, ctx->buf);
ctx->off = 8;
ctx->len = 0;
}
}
static void fufcgi_print(fufcgi *ctx, const char *buf, int len) {
int r;
while (len > 0) {
r = len > 65535 - ctx->off ? 65535 - ctx->off : len;
memcpy(ctx->buf+ctx->off, buf, r);
ctx->off += r;
r = len > FUFCGI_MAX_DATA - ctx->len ? FUFCGI_MAX_DATA - ctx->len : len;
memcpy(ctx->buf+8+ctx->len, buf, r);
ctx->len += r;
len -= r;
buf += r;
if (ctx->off >= 65535) fufcgi_flush(ctx);
if (ctx->len >= FUFCGI_MAX_DATA) fufcgi_flush(ctx);
}
}
static void fufcgi_done(fufcgi *ctx) {
fufcgi_rec hdr;
fufcgi_flush(ctx);
hdr.len = 0;
hdr.type = FCGI_STDOUT;
hdr.id = ctx->reqid;
fufcgi_write_record(ctx, &hdr, ctx->buf);
memcpy(ctx->buf+8, "\0\0\0\0\0\0\0\0", 8); /* FCGI_REQUEST_COMPLETE */
hdr.type = FCGI_END_REQUEST;
hdr.len = 8;
fufcgi_write_record(ctx, &hdr, ctx->buf);
ctx->reqid = ctx->len = ctx->off = 0;
}

View file

@ -10,8 +10,8 @@ sub start {
$f = FU::fcgi::new(fileno $local, 123);
}
sub record($id, $type, $data) {
my $pad = rand > 0.5 ? int rand(50) : 0;
sub record($id, $type, $data, $pad=undef) {
$pad //= rand > 0.5 ? int rand(50) : 0;
my $msg = pack('CCnnCC', 1, $type, $id, length($data), $pad, 0) . $data . ("\0"x$pad);
is $remote->syswrite($msg, length($msg)), length($msg);
}
@ -31,13 +31,19 @@ sub isrec($hdr, $par, $code=0) {
}
sub isrecv($data) {
is $remote->sysread(my $buf, length $data), length $data;
my($buf, $off) = ('', 0);
$off += $remote->sysread($buf, length($data) - $off, $off) while $off < length $data;
is $buf, $data;
}
start;
$remote->close;
iserr -7;
start;
begin;
$remote->close;
iserr -1;
start;
@ -96,6 +102,8 @@ $f->print("Status: 200\r\n");
$f->print("Something else");
$f->flush;
isrecv "\1\6\0\5\0\x1b\0\0"."Status: 200\r\nSomething else";
isrecv "\1\6\0\5\0\0\0\0";
isrecv "\1\3\0\5\0\x08\0\0"."\0\0\0\0\0\0\0\0";
# Same connection:
begin;
record 1, 4, "\x00\x00\x06\x00HTTP_x\x00\x00";
@ -159,4 +167,29 @@ record 1, 4, "\x0c\x05CONTENT_TYPEsomet";
record 1, 2, "";
isrec {'content-type','somet'}, {body => ''}, -6;
start;
begin;
record 1, 4, "\x0e\x05CONTENT_LENGTH65536";
record 1, 4, '';
if (!fork) {
record 1, 5, 'A'x65535, 255;
record 1, 5, 'B', 255;
record 1, 5, '';
exit;
}
isrec {'content-length',65536}, {body => ('A'x65535) . 'B'};
if (!fork) {
$f->print('a');
$f->print('b');
$f->print('c');
$f->print('D' x 65536);
$f->print('e');
$f->flush;
exit;
}
isrecv "\1\6\0\1\xff\xff\0\0".'abc'.('D'x65532);
isrecv "\1\6\0\1\0\5\0\0".'DDDDe';
isrecv "\1\6\0\1\0\0\0\0";
isrecv "\1\3\0\1\0\x08\0\0"."\0\0\0\0\0\0\0\0";
done_testing;