From 8595c4ba64f35e32fdd5a73e9f7c99de0470b014 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 24 Feb 2025 11:12:01 +0100 Subject: [PATCH] FU: HTTP date handling + fu->send_file --- FU.pm | 100 ++++++++++++++++++++++++++++++++++++++++++++++++----- FU/Util.pm | 52 ++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 8 deletions(-) diff --git a/FU.pm b/FU.pm index ec6be17..9a0d70a 100644 --- a/FU.pm +++ b/FU.pm @@ -208,7 +208,7 @@ sub _monitor { our $hdrname_re = qr/[!#\$\%&'\*\+-\.^_`\|~0-9a-zA-Z]{1,127}/; -our $method_re = qr/(?:GET|POST|DELETE|OPTIONS|PUT|PATCH|QUERY)/; +our $method_re = qr/(?:HEAD|GET|POST|DELETE|OPTIONS|PUT|PATCH|QUERY)/; # rfc7230 used as reference, though strict conformance is not a goal. # Does not limit size of headers, so not suitable for deployment in untrusted networks. @@ -303,7 +303,7 @@ sub _log_err($e) { } sub _do_req($c) { - local $REQ = { hdr => {} }; + local $REQ = { hdr => {}, trace_start => time }; local $fu = bless {}, 'FU::obj'; $REQ->{ip} = $c->{client_sock} isa 'IO::Socket::INET' ? $c->{client_sock}->peerhost : '127.0.0.1'; @@ -316,7 +316,8 @@ sub _do_req($c) { for my $h (@before_request) { $h->() } my $path = fu->path; - my $r = $path_routes{ fu->method }{$path}; + my $method = fu->method eq 'HEAD' ? 'GET' : fu->method; + my $r = $path_routes{$method}{$path}; if ($r) { $r->() } else { for $r ($re_routes{ fu->method }->@*) { @@ -567,6 +568,7 @@ sub run(%conf) { package FU::obj; +use v5.36; use Carp 'confess'; sub fu() { $FU::fu } @@ -655,7 +657,37 @@ sub add_header($, $hdr, $val) { sub set_header($, $hdr, $val=undef) { _validate_header($hdr, $val); - $FU::REQ{reshdr}{ lc $hdr } = $val; + $FU::REQ->{reshdr}{ lc $hdr } = $val; +} + +sub send_file($, $root, $path) { + # This also catches files with '..' somewhere in the middle of the name. + # Let's just disallow that to simplify this check, I'd err on the side of + # caution. + return if $path =~ /\.\./; + + my $fn = "$root/$path"; + my $m = (stat $fn)[9]; + return if !defined $m; + + fu->set_header('last-modified', FU::Util::httpdate_format($m)); + my $ims = fu->header('if-modified-since'); + $ims = FU::Util::httpdate_parse($ims) if $ims; + if ($ims && $ims > $m) { + fu->status(304); + fu->done; + } + + my $ctype = FU::mime_types->{$path =~ m{\.([^/\.]+)$} ? lc $1 : ''}; + { + open my $fh, '<', $fn or confess "Unable to open '$fn': $!"; + local $/=undef; + my $body = <$fh>; + $ctype ||= substr($body, 0, 1024) =~ /[\x00-\x08\x0e-\x1f]/ ? 'application/octet-stream' : 'text/plain'; + fu->set_body($body); + } + fu->set_header('content-type', $ctype); + fu->done; } sub _error_page($, $code, $title, $msg) { @@ -687,9 +719,10 @@ sub _finalize { state $haszstd = eval { require Compress::Zstd; 1 }; my $r = $FU::REQ; - if ($r->{status} == 204) { + if ($r->{status} == 204 || $r->{status} == 304) { delete $r->{reshdr}{'content-length'}; delete $r->{reshdr}{'content-encoding'}; + delete $r->{reshdr}{'content-type'}; $r->{resbody} = ''; } else { @@ -716,7 +749,7 @@ sub _finalize { $r->{resbody} = '' if (fu->method//'') eq 'HEAD'; } - $r->{reshdr}{'content-type'} .= '; charset=UTF-8' if FU::utf8_mimes->{$r->{reshdr}{'content-type'}}; + $r->{reshdr}{'content-type'} .= '; charset=UTF-8' if FU::utf8_mimes->{ $r->{reshdr}{'content-type'}||'' }; } sub _flush($, $sock) { @@ -729,6 +762,8 @@ sub _flush($, $sock) { $sock->print("\r\n"); } else { $sock->printf("HTTP/1.0 %d Hello\r\n", $r->{status}); + $sock->printf("date: %s\r\n", FU::Util::httpdate_format time); + $sock->print("server: FU\r\n"); } for my ($hdr, $val) ($r->{reshdr}->%*) { @@ -898,6 +933,21 @@ setting to enable or disable debugging features in your own code. Enable logging of requests that took longer than C<$ms> milliseconds to process. Can be set to 0 to disable such logging. +=item FU::mime_types + +Returns a modifiable hashref that serves as a lookup table from file extension +to MIME type, used by C<< fu->send_file() >>. + +=item FU::utf8_mimes + +Returns a modifiable hashref listing which mime types should get a UTF-8 +C parameter appended to them in the C header. + +=item FU::compress_mimes + +Returns a modifiable hashref listing mime types for which compression makes +sense. + =item FU::monitor_path(@paths) Add filesystem paths to be monitored for changes when running in monitor mode @@ -1150,6 +1200,42 @@ templating system or L: }; }); +=item fu->send_file($root, $path) + +If a file identified by C<"$root/$path"> exists, set that as response and call +C<< fu->done >>. Returns normally if the file does not exist. This method is +mainly intended to serve small static files from a directory: + + FU::before_request { + # We can set custom headers before send_file() + fu->set_header('cache-control', 'max-age=31536000'); + + # Attempt to serve files from '/static/files' + fu->send_file('/static/files', fu->path); + + # If that fails, fall back to another directory + fu->send_file('/more/static/files', fu->path); + + # Otherwise, continue processing the request as normal + fu->reset; + }; + +C<$path> may be an untrusted string from the client, this method prevents path +traversal attacks that go below the given C<$root>. It does follow symlinks, +though. + +This method loads the entire file contents in memory and does not support range +requests, so DO NOT use it to send large files. Actual web servers are much +more efficient at sending static files. + +The content-type header is determined from the file extension in C<$path>, +using the configured C. As fallback, files that look like they +might be text get C and binary files are served with +C. + +This method sets an appropriate C header and supports +conditional requests with C. + =back I Setting cookies. @@ -1158,8 +1244,6 @@ I JSON output. I Redirection responses. -I Sending files. - =head2 Running the Site diff --git a/FU/Util.pm b/FU/Util.pm index 4d88519..34d61d8 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -4,12 +4,14 @@ use v5.36; use FU::XS; use Carp 'confess'; use Exporter 'import'; +use POSIX (); use experimental 'builtin'; our @EXPORT_OK = qw/ json_format json_parse utf8_decode uri_escape uri_unescape query_decode query_encode + httpdate_format httpdate_parse fdpass_send fdpass_recv /; @@ -60,6 +62,31 @@ sub query_encode :prototype($) ($o) { } sort keys %$o; } + +my @httpmonths = qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/; +my %httpmonths = map +($httpmonths[$_], $_), 0..11; +my @httpdays = qw/Sun Mon Tue Wed Thu Fri Sat/; +my $httpdays = '(?:'.join('|', @httpdays).')'; + +sub httpdate_format :prototype($) ($time) { + my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime $time; + sprintf '%s, %02d %s %d %02d:%02d:%02d GMT', + $httpdays[$wday], $mday, $httpmonths[$mon], $year+1900, $hour, $min, $sec; +} + +sub httpdate_parse :prototype($) ($str) { + return if $str !~ /^$httpdays, ([0-9]{2}) ([A-Z][a-z]{2}) ([0-9]{4}) ([0-9]{2}):([0-9]{2}):([0-9]{2}) GMT$/; + my ($mday, $mon, $year, $hour, $min, $sec) = ($1, $httpmonths{$2}, $3, $4, $5, $6); + return if !defined $mon; + # mktime() interprets the broken down time as our local timezone, + # which is utter garbage. But we can work around that by subtracting the + # time offset between localtime and gmtime around the given date. Might be + # off for a few hours around DST changes, but ugh. + my $mktime = POSIX::mktime($sec, $min, $hour, $mday, $mon, $year-1900); + $mktime + (POSIX::mktime(localtime $mktime) - POSIX::mktime(gmtime $mktime)); +} + + 1; __END__ @@ -272,6 +299,31 @@ then encoded. =back +=head2 HTTP Date Formatting + +The HTTP date format is utter garbage, but with the right tools it doesn't +require I much code to work with. + +=over + +=item httpdate_format($time) + +Convert the given seconds-since-Unix-epoch C<$time> into a HTTP date string. + +=item httpdate_parse($str) + +Converts the given HTTP date string into a seconds-since-Unix-epoch integer. +This function is very strict about its input and only accepts "IMF-fixdate" as +per L, which is +what every sensible implementation written in the past decade uses. + +This function plays fast and loose with timezone conversions, the parsed +timestamp I be off by an hour or so for a few hours around a DST change. +This will not happen if your local timezone is UTC. + +=back + + =head2 File Descriptor Passing UNIX sockets (see L) have the fancy property of letting you