From bc33fe53f033003080d7babb56570792bd09cf55 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 18 Mar 2025 16:58:31 +0100 Subject: [PATCH] FU::Util: Add gzip_compress() wrapper for libdeflate/zlib-ng/zlib And use it for automatic output compression in FU, as (potentially) faster alternative to Compress::Raw::Zlib. Was also planning to maybe add support for Zstd or Brotli, but given the performance of libdeflate, I'm not sure that's really necessary. Brotli does tend to do a better job at compressing HTML, though. --- FU.pm | 14 +++--- FU.xs | 15 +++++- FU/Util.pm | 43 +++++++++++++++++ c/compress.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++ t/compress.t | 47 +++++++++++++++++++ 5 files changed, 237 insertions(+), 10 deletions(-) create mode 100644 c/compress.c create mode 100644 t/compress.t diff --git a/FU.pm b/FU.pm index 4d7a28b..999dda2 100644 --- a/FU.pm +++ b/FU.pm @@ -850,7 +850,6 @@ sub _error_page($, $code, $title, $msg) { } sub _finalize { - state $haszlib = eval { require Compress::Raw::Zlib; 1 }; my $r = $FU::REQ; fu->add_header('set-cookie', $_) for $r->{rescookie} ? sort values $r->{rescookie}->%* : (); @@ -862,18 +861,14 @@ sub _finalize { $r->{resbody} = ''; } else { - if ($haszlib && length($r->{resbody}) > 256 + if (FU::Util::gzip_lib() && length($r->{resbody}) > 256 && !defined $r->{reshdr}{'content-encoding'} && FU::compress_mimes->{$r->{reshdr}{'content-type'}}) { $r->{reshdr}{'vary'} = ($r->{reshdr}{'vary'} ? $r->{reshdr}{'vary'}.', ' : '').'accept-encoding' if ($r->{reshdr}{'vary'}||'') !~ /accept-encoding/i; - if ($haszlib && ($r->{hdr}{'accept-encoding'}||'') =~ /gzip/) { - # Use lower-level API because the higher-level Compress::Zlib loads a whole bunch of other modules. - my $z = Compress::Raw::Zlib::Deflate->new(-WindowBits => Compress::Raw::Zlib::WANT_GZIP(), -Level => 3, -AppendOutput => 1); - $z->deflate($r->{resbody}, my $buf); - $z->flush($buf); - $r->{resbody} = $buf; + if (($r->{hdr}{'accept-encoding'}||'') =~ /gzip/) { + $r->{resbody} = FU::Util::gzip_compress(6, $r->{resbody}); $r->{reshdr}{'content-encoding'} = 'gzip'; } } @@ -993,6 +988,9 @@ is). There are a few additional optional dependencies: =item * C - required for L, dynamically loaded through C. +=item * C or C or C - required for +C in L and used for HTTP output compression. + =back diff --git a/FU.xs b/FU.xs index 1d63ffe..b775f3c 100644 --- a/FU.xs +++ b/FU.xs @@ -27,10 +27,12 @@ #include "c/khashl.h" #include "c/common.c" + +#include "c/compress.c" +#include "c/fcgi.c" +#include "c/fdpass.c" #include "c/jsonfmt.c" #include "c/jsonparse.c" -#include "c/fdpass.c" -#include "c/fcgi.c" #include "c/xmlwr.c" #include "c/libpq.h" @@ -115,6 +117,15 @@ void json_parse(SV *val, ...) CODE: ST(0) = fujson_parse_xs(aTHX_ ax, items, val); +void gzip_lib() + PROTOTYPE: + CODE: + ST(0) = sv_2mortal(newSVpv(fugz_lib(), 0)); + +void gzip_compress(IV level, SV *in) + CODE: + ST(0) = fugz_compress(aTHX_ level, in); + void fdpass_send(int socket, int fd, SV *data) CODE: STRLEN buflen; diff --git a/FU/Util.pm b/FU/Util.pm index a94aa9e..4ff1c2b 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -13,6 +13,7 @@ our @EXPORT_OK = qw/ utf8_decode uri_escape uri_unescape query_decode query_encode httpdate_format httpdate_parse + gzip_lib gzip_compress fdpass_send fdpass_recv /; @@ -367,6 +368,48 @@ This will not happen if your local timezone is UTC. =back +=head2 Gzip Compression + +Gzip compression can be done with a few different libraries. The canonical one +is I, which is old and not well optimized for modern systems. There's +also I, a (much) more performant reimplementation that remains +API-compatible with I. And there's I, which offers a +different API that does not support streaming compression but is, in exchange, +even faster than I. + +There are more implementations, of course, but this module only supports those +three and (attempts to) pick the best one that's available on your system. + +=over + +=item gzip_lib() + +Returns an empty string if no supported gzip library was found on your system +(unlikely but possible), otherwise returns the selected implementation: either +C<"libdeflate">, C<"zlib-ng"> or C<"zlib">. + +This function does not try very hard to differentiate between I and +I, so it may report that I is being used on systems where +C is, in fact, I. + +=item gzip_compress($level, $data) + +Returns a byte string with the gzip-compressed version of C<$data> at the given +gzip C<$level>, which is a number between 0 (no compression) and 12 (strongest +compression). Only I supports levels higher than 9, for +I the level is capped at 9. 6 is typically used as a default. + +Throws an error if no suitable library was found. + +=back + +This module does not currently implement decompression. If you need that, or +streaming, or other functionality not provided here, there's +L and L in the core Perl distribution and +L on CPAN. + + + =head2 File Descriptor Passing UNIX sockets (see L) have the fancy property of letting you diff --git a/c/compress.c b/c/compress.c new file mode 100644 index 0000000..9c4a1a9 --- /dev/null +++ b/c/compress.c @@ -0,0 +1,128 @@ +static const char *fugz_imps[] = {"", "libdeflate", "zlib-ng", "zlib"}; +static int fugz_imp = -1; + + +/* zlib & zlib-ng */ + +typedef struct { + const char *next_in; + unsigned int avail_in; + unsigned long total_in; + char *next_out; + unsigned int avail_out; + unsigned long total_out; + const char *msg; + struct internal_state *state; + void *zalloc; + void *zfree; + void *opaque; + int data_type; + unsigned long adler; + unsigned long reserved; +} z_stream; + +static int (*deflate)(z_stream *, int); +static int (*deflateEnd)(z_stream *); +static int (*deflateInit2)(z_stream *, int, int, int, int, int); +static int (*deflateInit2_)(z_stream *, int, int, int, int, int, const char *, int); +static unsigned long (*compressBound)(unsigned long); + + +/* libdeflate */ + +static struct libdeflate_compressor *fugz_ld_ctx; +static int fugz_ld_comp = -1; + +static struct libdeflate_compressor *(*libdeflate_alloc_compressor)(int); +static void (*libdeflate_free_compressor)(struct libdeflate_compressor *); +static size_t (*libdeflate_gzip_compress_bound)(struct libdeflate_compressor *, size_t); +static size_t (*libdeflate_gzip_compress)(struct libdeflate_compressor *, const void *, size_t, void *, size_t); + + + +static const char *fugz_lib() { + if (fugz_imp >= 0) goto done; + + void *handle; + if ((handle = dlopen("libdeflate.so", RTLD_LAZY))) { + if ((libdeflate_alloc_compressor = dlsym(handle, "libdeflate_alloc_compressor")) + && (libdeflate_free_compressor = dlsym(handle, "libdeflate_free_compressor")) + && (libdeflate_gzip_compress_bound = dlsym(handle, "libdeflate_gzip_compress_bound")) + && (libdeflate_gzip_compress = dlsym(handle, "libdeflate_gzip_compress"))) { + fugz_imp = 1; + goto done; + } + } + + int i; + for (i=2; i<=3; i++) { + if ((handle = dlopen(i == 2 ? "libz-ng.so" : "libz.so", RTLD_LAZY))) { + if (((deflate = dlsym(handle, "zng_deflate")) || (deflate = dlsym(handle, "deflate"))) + && ((deflateEnd = dlsym(handle, "zng_deflateEnd")) || (deflateEnd = dlsym(handle, "deflateEnd"))) + && ((deflateInit2 = dlsym(handle, "zng_deflateInit2")) || (deflateInit2_ = dlsym(handle, "deflateInit2_"))) + && ((compressBound = dlsym(handle, "zng_compressBound")) || (compressBound = dlsym(handle, "compressBound")))) { + fugz_imp = i; + goto done; + } + } + } + fugz_imp = 0; + +done: + return fugz_imps[fugz_imp]; +} + + +static SV *fugz_compress_ld(pTHX_ int level, const char *bytes, size_t inlen) { + if (fugz_ld_comp != level) { + if (fugz_ld_ctx) libdeflate_free_compressor(fugz_ld_ctx); + fugz_ld_ctx = NULL; + fugz_ld_comp = level; + } + if (!fugz_ld_ctx) fugz_ld_ctx = libdeflate_alloc_compressor(level); + + size_t outlen = libdeflate_gzip_compress_bound(fugz_ld_ctx, inlen); + SV *out = sv_2mortal(newSV(outlen)); + SvPOK_only(out); + size_t len = libdeflate_gzip_compress(fugz_ld_ctx, bytes, inlen, SvPVX(out), outlen); + if (!len) fu_confess("Libdeflate compression failed"); /* Shouldn't happen */ + SvCUR_set(out, len); + return out; +} + + +static SV *fugz_compress_zlib(pTHX_ int level, const char *bytes, size_t inlen) { + z_stream stream; + memset(&stream, 0, sizeof(stream)); + + int r = deflateInit2 + ? deflateInit2(&stream, level > 9 ? 9 : level, 8, 16+15, 9, 0) + : deflateInit2_(&stream, level > 9 ? 9 : level, 8, 16+15, 9, 0, "1.3.1", (int)sizeof(stream)); + if (r) fu_confess("Zlib compression failed (%d)", r); + + stream.avail_out = compressBound(inlen) + 64; /* compressBound() does not include the gzip header */ + SV *out = sv_2mortal(newSV(stream.avail_out)); + SvPOK_only(out); + stream.next_out = SvPVX(out); + stream.next_in = bytes; + stream.avail_in = inlen; + + if ((r = deflate(&stream, 4)) != 1) fu_confess("Zlib compression failed (%d)", r); + + SvCUR_set(out, stream.total_out); + deflateEnd(&stream); + return out; +} + + +static SV *fugz_compress(pTHX_ IV level, SV *in) { + if (level < 0 || level > 12) fu_confess("Invalid compression level: %"IVdf, level); + if (!*fugz_lib()) fu_confess("Unable to load a suitable compression library"); + + STRLEN inlen; + const char *bytes = SvPVbyte(in, inlen); + + if (fugz_imp == 1) return fugz_compress_ld(aTHX_ level, bytes, inlen); + else return fugz_compress_zlib(aTHX_ level, bytes, inlen); + return &PL_sv_undef; +} diff --git a/t/compress.t b/t/compress.t new file mode 100644 index 0000000..ef17c77 --- /dev/null +++ b/t/compress.t @@ -0,0 +1,47 @@ +use v5.36; +use Test::More; +use FU::Util qw/gzip_lib gzip_compress/; + +like gzip_lib, qr/^(|libdeflate|zlib-ng|zlib)$/, gzip_lib; + +plan skip_all => 'No suitable gzip library found' if !gzip_lib; +plan skip_all => 'Compress::Zlib not found' if !eval { require Compress::Zlib }; + +my $incompressible = Compress::Zlib::memGzip(join '', map chr(rand 256), 0..93123); + +for my $str ('', 'Hello world!', 'x'x4096, $incompressible) { + is Compress::Zlib::memGunzip(gzip_compress(0, $str)), $str; + is Compress::Zlib::memGunzip(gzip_compress(12, $str)), $str; +} + + +done_testing; + + +__END__ + +# Test for leaks: + +use Test::LeakTrace; +diag count_sv; +for (0..1000) { + for my $str ('', 'Hello world!', 'x'x4096, $incompressible) { + local $_ = gzip_lib; + $_ = gzip_compress(0, $str); + $_ = gzip_compress(12, $str); + } +} +diag count_sv; + + +# Compare performance: + +use Benchmark 'cmpthese'; +open my $F, '<', 'FU.pm'; +local $/ = undef; +my $data = <$F>; + +cmpthese -3, { + memGzip => 'Compress::Zlib::memGzip($data)', + gzip_compress => 'gzip_compress(6, $data)', +};