From 90cfd66069fd2f2c4c452691aad5dfc49d8d4361 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 18 Feb 2025 10:27:58 +0100 Subject: [PATCH] FU: Some FastCGI fixes; FU::Util: utf8_decode & URI escaping --- FU.pm | 39 ++++++++++++++++++++++++--------------- FU.xs | 5 ++--- FU/Util.pm | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 2 +- c/fcgi.c | 42 +++++++++++++++++++++++++++++++----------- t/fcgi.t | 39 ++++++++++++++++++++++++++++++++++++--- 6 files changed, 146 insertions(+), 33 deletions(-) diff --git a/FU.pm b/FU.pm index ad19d87..f2e0dc8 100644 --- a/FU.pm +++ b/FU.pm @@ -163,13 +163,6 @@ sub _monitor { } -sub _decode_utf8 { - return if !defined $_[0]; - fu->error(400, 'Invalid UTF-8 in request') if !utf8::decode($_[0]); - # Disallow any control codes, except for x09 (tab), x0a (newline) and x0d (carriage return) - fu->error(400, 'Invalid control character in request') if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/; -} - our $hdrname_re = qr/[!#\$\%&'\*\+-\.^_`\|~0-9a-zA-Z]{1,127}/; our $method_re = qr/(?:GET|POST|DELETE|OPTIONS|PUT|PATCH|QUERY)/; @@ -181,8 +174,7 @@ sub _read_req_http($sock, $req) { fu->error(400, 'Client disconnect before request was read') if !defined $line; fu->error(400, 'Invalid request') if $line !~ /^($method_re)\s+(\S+)\s+HTTP\/1\.[01]\r\n$/; $req->{method} = $1; - ($req->{path}, $req->{qs}) = split /\?/, $2 =~ s{^https?://[^/]+/}{/}r, 2; - $req->{path} =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + $req->{path} = $2 =~ s{^https?://[^/]+/}{/}r; while (1) { # Turns out header line folding has been officially deprecated, so I'm @@ -225,7 +217,7 @@ sub _read_req($c) { : $r == -2 ? "I/O error while reading from FastCGI socket\n" : $r == -3 ? "FastCGI protocol error\n" : $r == -4 ? "Too long FastCGI parameter\n" - : $r == -5 ? "Too long request body\n" : undef; + : $r == -5 ? "Too long request body\n" : undef if $r != -7; delete $c->{fcgi_obj}; fu->error(-1); } @@ -236,7 +228,12 @@ sub _read_req($c) { # The HTTP reader above and the FastCGI XS reader operate on bytes. # Decode these into Unicode strings and check for special characters. - _decode_utf8 $_ for ($REQ->{path}, $REQ->{qs}, values $REQ->{hdr}->%*); + eval { FU::Util::utf8_decode($_); 1} || fu->err(400, $@) + for ($REQ->{path}, $REQ->{qs}, values $REQ->{hdr}->%*); + + ($REQ->{path}, my $qs) = split /\?/, $REQ->{path}//'', 2; + $REQ->{qs} //= $qs; + $REQ->{path} = FU::Util::uri_unescape($REQ->{path}); } @@ -418,7 +415,7 @@ sub _spawn { if (!keys %c) { %c = ( - http => $ENV{FU_HTTP} // '127.0.0.1:3000', + http => $ENV{FU_HTTP}, fcgi => $ENV{FU_FCGI}, proc => $ENV{FU_PROC} // 1, monitor => $ENV{FU_MONITOR} // 0, @@ -447,8 +444,17 @@ sub _spawn { my $need_supervisor = !$c{supervisor_sock} && !$c{client_sock} && ($c{proc} > 1 || $c{monitor} || $c{max_reqs}); return if !@_ && !$need_supervisor; + if (!$c{http} && !$c{fcgi} && !$c{listen_sock}) { + # When spawned under FastCGI, stdin is our listen socket + local $_ = getpeername \*STDIN; + if ($!{ENOTCONN}) { + $c{listen_sock} = IO::Socket->new_from_fd(0, 'r'); + $c{listen_proto} = 'fcgi'; + } + }; + $c{http} //= '127.0.0.1:3000'; + if (!$c{listen_sock}) { - # TODO: check if stdin is a fastcgi sock $c{listen_proto} //= $c{fcgi} ? 'fcgi' : 'http'; my $addr = $c{$c{listen_proto}}; $c{listen_sock} = IO::Socket->new( @@ -520,6 +526,7 @@ sub method { $FU::REQ->{method} } sub header($, $h) { $FU::REQ->{hdr}{ lc $h } } sub headers { $FU::REQ->{hdr} } sub ip { $FU::REQ->{ip} } +sub query { $FU::REQ->{qs} } # TODO: parse & validate @@ -773,7 +780,7 @@ this option is set, it takes precedence over the HTTP option. Nginx and Apache will, in their default configuration, use a separate connection per request. If you have a more esoteric setup, you should probably be aware of the following: this implementation does not support multiplexing or -pipelining. It does support keepalive, but this come with a few caveats: +pipelining. It does support keepalive, but this comes with a few caveats: =over @@ -786,7 +793,7 @@ I each request rather than before, so clients may get a response from stale code. =item * When worker processes shut down, either through C<--max-reqs> or in -response to a signal, there is the possibility that an incoming request on an +response to a signal, there is a possibility that an incoming request on an existing connection gets interrupted. =back @@ -822,6 +829,8 @@ Set the initial value for C. =item LISTEN_FD=num +=item LISTEN_PROTO=http/fcgi + Listen for incoming connections on the given file descriptor instead of creating a new listen socket. This is mainly useful if you are using an external process manager. diff --git a/FU.xs b/FU.xs index b42663f..666f342 100644 --- a/FU.xs +++ b/FU.xs @@ -116,7 +116,7 @@ void new(int fd, int maxproc) void read_req(fufcgi *ctx, SV *headers, SV *params) CODE: ST(0) = sv_2mortal(newSViv(fufcgi_read_req(aTHX_ ctx, headers, params))); - ctx->off = 8; + ctx->off = ctx->len = 0; void keepalive(fufcgi *ctx) CODE: @@ -130,8 +130,7 @@ void print(fufcgi *ctx, SV *sv) void flush(fufcgi *ctx) CODE: - fufcgi_flush(ctx); - ctx->off = ctx->len = ctx->reqid = 0; + fufcgi_done(ctx); void DESTROY(fufcgi *ctx) CODE: diff --git a/FU/Util.pm b/FU/Util.pm index e5d82d8..83fbde6 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -2,13 +2,35 @@ package FU::Util 0.1; use v5.36; use FU::XS; +use Carp 'confess'; use Exporter 'import'; our @EXPORT_OK = qw/ json_format json_parse + utf8_decode uri_escape uri_unescape fdpass_send fdpass_recv /; +sub utf8_decode :prototype($) { + return if !defined $_[0]; + confess 'Invalid UTF-8' if !utf8::decode($_[0]); + confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/; + $_[0] +} + +sub uri_escape :prototype($) { + utf8::encode(local $_ = shift); + s/([^A-Za-z0-9._~-])/sprintf '%%%02X', ord $1/eg; + $_; +} + +sub uri_unescape :prototype($) { + return if !defined $_[0]; + utf8::encode(local $_ = shift); + s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + utf8_decode $_; +} + 1; __END__ @@ -155,6 +177,36 @@ L and L are perfectly fine alternatives. L and L also look like good and maintained candidates.) +=head2 String Utility Functions + +=over + +=item utf8_decode($bytes) + +Convert a (perl-UTF-8 encoded) byte string into a sanitized perl Unicode +string. The conversion is performed in-place, so the C<$bytes> argument is +turned into a Unicode string. Returns the same string for convenience. + +This function throws an error if the input is not valid UTF-8 or if it contains +ASCII control characters - that is, any character between C<0x00> and C<0x1f> +except for tab, newline and carriage return. + +(This is a tiny wrapper around C with some extra checks) + +=item uri_escape($string) + +Takes an Unicode string and returns a percent-encoded ASCII string, suitable +for use in a query parameter. + +=item uri_unescape($string) + +Takes an Unicode string potentially containing percent-encoding and returns a +decoded Unicode string. Also checks for ASCII control characters as per +C. + +=back + + =head2 File Descriptor Passing UNIX sockets (see L) have the fancy property of letting you diff --git a/README.md b/README.md index c677c71..5682e10 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Things that may or may not happen: - FU - The website framework, taking inspiration from TUWF. - FU::JSON - JSON::{XS,PP,etc}-compatible wrapper around FU::Util's JSON functions? I prolly won't need this myself, but could be handy. - FU::Log - Basic logger. -- FU::Util additions: `uri_escape`, `VNDB::Util::query_encode`, `scrypt`, `urandom`. +- FU::Util additions: `VNDB::Util::query_encode`? - FU::Validate - TUWF::Validate & normalization with some improvements. - FU::XML - TUWF::XMLXS with some improvements. - FU::Mailer - Simple sendmail wrapper diff --git a/c/fcgi.c b/c/fcgi.c index 20368a5..fdf8cae 100644 --- a/c/fcgi.c +++ b/c/fcgi.c @@ -11,12 +11,15 @@ #define FCGI_UNKNOWN_TYPE 11 #define FUFE_OK 0 -#define FUFE_EOF -1 /* protocol-level EOF */ +#define FUFE_EOF -1 /* unexpected protocol-level EOF */ #define FUFE_IO -2 #define FUFE_PROTO -3 #define FUFE_PLEN -4 #define FUFE_CLEN -5 #define FUFE_ABORT -6 /* explicit abort or client-level EOF */ +#define FUFE_NOREQ -7 /* protocol-level EOF before we received anything */ + +#define FUFCGI_MAX_DATA 65535 typedef struct { SV *self; @@ -29,7 +32,7 @@ typedef struct { HV *params; /* Single buffer for reading & writing, we only do one thing at a time */ - char buf[8 + 65536 + 256]; /* fits a maximum-length fcgi record */ + char buf[8 + FUFCGI_MAX_DATA + 255]; /* fits a maximum-length fcgi record */ int len; /* total number of bytes in the buffer */ int off; /* number of bytes consumed */ } fufcgi; @@ -154,7 +157,7 @@ static int fufcgi_read_record(fufcgi *ctx, fufcgi_rec *rec) { rec->type = ctx->buf[ctx->off+1]; rec->id = fu_frombeU(16, ctx->buf+ctx->off+2); rec->len = fu_frombeU(16, ctx->buf+ctx->off+4); - int pad = ctx->buf[ctx->off+6]; + int pad = (unsigned char)ctx->buf[ctx->off+6]; ctx->off += 8; if ((r = fufcgi_fill(ctx, rec->len + pad)) != FUFE_OK) return r; @@ -225,7 +228,7 @@ static int fufcgi_read_req_record(fufcgi *ctx, fufcgi_rec *rec) { int r; char tmp[128]; /* Large enough for a FCGI_GET_VALUES_RESULT */ while (1) { - if ((r = fufcgi_read_record(ctx, rec)) != FUFE_OK) return r; + if ((r = fufcgi_read_record(ctx, rec)) != FUFE_OK) return r == FUFE_EOF && ctx->len == 0 ? FUFE_NOREQ : r; switch (rec->type) { case FCGI_PARAMS: @@ -405,23 +408,40 @@ static int fufcgi_read_req(pTHX_ fufcgi *ctx, SV *headers, SV *params) { static void fufcgi_flush(fufcgi *ctx) { fufcgi_rec hdr; - if (ctx->off > 8) { - hdr.len = ctx->off - 8; + if (ctx->len > 0) { + hdr.len = ctx->len; hdr.type = FCGI_STDOUT; hdr.id = ctx->reqid; fufcgi_write_record(ctx, &hdr, ctx->buf); - ctx->off = 8; + ctx->len = 0; } } static void fufcgi_print(fufcgi *ctx, const char *buf, int len) { int r; while (len > 0) { - r = len > 65535 - ctx->off ? 65535 - ctx->off : len; - memcpy(ctx->buf+ctx->off, buf, r); - ctx->off += r; + r = len > FUFCGI_MAX_DATA - ctx->len ? FUFCGI_MAX_DATA - ctx->len : len; + memcpy(ctx->buf+8+ctx->len, buf, r); + ctx->len += r; len -= r; buf += r; - if (ctx->off >= 65535) fufcgi_flush(ctx); + if (ctx->len >= FUFCGI_MAX_DATA) fufcgi_flush(ctx); } } + +static void fufcgi_done(fufcgi *ctx) { + fufcgi_rec hdr; + fufcgi_flush(ctx); + + hdr.len = 0; + hdr.type = FCGI_STDOUT; + hdr.id = ctx->reqid; + fufcgi_write_record(ctx, &hdr, ctx->buf); + + memcpy(ctx->buf+8, "\0\0\0\0\0\0\0\0", 8); /* FCGI_REQUEST_COMPLETE */ + hdr.type = FCGI_END_REQUEST; + hdr.len = 8; + fufcgi_write_record(ctx, &hdr, ctx->buf); + + ctx->reqid = ctx->len = ctx->off = 0; +} diff --git a/t/fcgi.t b/t/fcgi.t index 5262488..6c06423 100644 --- a/t/fcgi.t +++ b/t/fcgi.t @@ -10,8 +10,8 @@ sub start { $f = FU::fcgi::new(fileno $local, 123); } -sub record($id, $type, $data) { - my $pad = rand > 0.5 ? int rand(50) : 0; +sub record($id, $type, $data, $pad=undef) { + $pad //= rand > 0.5 ? int rand(50) : 0; my $msg = pack('CCnnCC', 1, $type, $id, length($data), $pad, 0) . $data . ("\0"x$pad); is $remote->syswrite($msg, length($msg)), length($msg); } @@ -31,13 +31,19 @@ sub isrec($hdr, $par, $code=0) { } sub isrecv($data) { - is $remote->sysread(my $buf, length $data), length $data; + my($buf, $off) = ('', 0); + $off += $remote->sysread($buf, length($data) - $off, $off) while $off < length $data; is $buf, $data; } start; $remote->close; +iserr -7; + +start; +begin; +$remote->close; iserr -1; start; @@ -96,6 +102,8 @@ $f->print("Status: 200\r\n"); $f->print("Something else"); $f->flush; isrecv "\1\6\0\5\0\x1b\0\0"."Status: 200\r\nSomething else"; +isrecv "\1\6\0\5\0\0\0\0"; +isrecv "\1\3\0\5\0\x08\0\0"."\0\0\0\0\0\0\0\0"; # Same connection: begin; record 1, 4, "\x00\x00\x06\x00HTTP_x\x00\x00"; @@ -159,4 +167,29 @@ record 1, 4, "\x0c\x05CONTENT_TYPEsomet"; record 1, 2, ""; isrec {'content-type','somet'}, {body => ''}, -6; +start; +begin; +record 1, 4, "\x0e\x05CONTENT_LENGTH65536"; +record 1, 4, ''; +if (!fork) { + record 1, 5, 'A'x65535, 255; + record 1, 5, 'B', 255; + record 1, 5, ''; + exit; +} +isrec {'content-length',65536}, {body => ('A'x65535) . 'B'}; +if (!fork) { + $f->print('a'); + $f->print('b'); + $f->print('c'); + $f->print('D' x 65536); + $f->print('e'); + $f->flush; + exit; +} +isrecv "\1\6\0\1\xff\xff\0\0".'abc'.('D'x65532); +isrecv "\1\6\0\1\0\5\0\0".'DDDDe'; +isrecv "\1\6\0\1\0\0\0\0"; +isrecv "\1\3\0\1\0\x08\0\0"."\0\0\0\0\0\0\0\0"; + done_testing;