package FU::Util 0.4; use v5.36; use FU::XS; use Carp 'confess'; use Exporter 'import'; use POSIX (); use experimental 'builtin'; our @EXPORT_OK = qw/ to_bool json_format json_parse utf8_decode uri_escape uri_unescape query_decode query_encode httpdate_format httpdate_parse gzip_lib gzip_compress brotli_compress fdpass_send fdpass_recv /; sub utf8_decode :prototype($) { return if !defined $_[0]; confess 'Invalid UTF-8' if !utf8::decode($_[0]); confess 'Invalid control character' if $_[0] =~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/; $_[0] } sub uri_escape :prototype($) ($s) { utf8::encode($s); $s =~ s/([^A-Za-z0-9._~-])/sprintf '%%%02x', ord $1/eg; $s; } sub uri_unescape :prototype($) ($s) { return if !defined $s; utf8::encode($s); $s =~ tr/+/ /; $s =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; utf8_decode $s; } sub query_decode :prototype($) ($s) { my %o; for (split /&/, $s//'') { my($k,$v) = map uri_unescape($_), split /=/, $_, 2; $v //= builtin::true; if (ref $o{$k}) { push $o{$k}->@*, $v } elsif (exists $o{$k}) { $o{$k} = [ $o{$k}, $v ] } else { $o{$k} = $v } } \%o } sub query_encode :prototype($) ($o) { return join '&', map { my($k, $v) = ($_, $o->{$_}); $k = uri_escape $k; map { my $x = $_; $x = $x->TO_QUERY() if builtin::blessed($x) && $x->can('TO_QUERY'); my $bool = to_bool($x); !defined $x || !($bool//1) ? () : $bool ? $k : $k.'='.uri_escape($x) } ref $v eq 'ARRAY' ? @$v : ($v); } sort keys %$o; } my @httpmonths = qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/; my %httpmonths = map +($httpmonths[$_], $_), 0..11; my @httpdays = qw/Sun Mon Tue Wed Thu Fri Sat/; my $httpdays = '(?:'.join('|', @httpdays).')'; sub httpdate_format :prototype($) ($time) { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime $time; sprintf '%s, %02d %s %d %02d:%02d:%02d GMT', $httpdays[$wday], $mday, $httpmonths[$mon], $year+1900, $hour, $min, $sec; } sub httpdate_parse :prototype($) ($str) { return if $str !~ /^\s*$httpdays, ([0-9]{2}) ([A-Z][a-z]{2}) ([0-9]{4}) ([0-9]{2}):([0-9]{2}):([0-9]{2}) GMT\s*$/; my ($mday, $mon, $year, $hour, $min, $sec) = ($1, $httpmonths{$2}, $3, $4, $5, $6); return if !defined $mon; # mktime() interprets the broken down time as our local timezone, # which is utter garbage. But we can work around that by subtracting the # time offset between localtime and gmtime around the given date. Might be # off for a few hours around DST changes, but ugh. my $mktime = POSIX::mktime($sec, $min, $hour, $mday, $mon, $year-1900); $mktime + (POSIX::mktime(localtime $mktime) - POSIX::mktime(gmtime $mktime)); } 1; __END__ =head1 NAME FU::Util - Miscellaneous Utility Functions =head1 EXPERIMENTAL This module is still in development and there will likely be a few breaking API changes, see the main L module for details. =head1 SYNOPSIS use FU::Util qw/json_format/; my $data = json_format [1, 2, 3]; =head1 DESCRIPTION A bunch of functions that are too small (or I'm too lazy) to split out into separate modules. Some of these functions really ought to be part of Perl core. =head1 Boolean Stuff Perl has had a builtin boolean type since version 5.36 and FU uses that where appropriate, but there's still a lot of older code out there using different conventions. The following function should help when interacting with older code and provide a gradual migration path to the new builtin booleans. =over =item to_bool($val) Returns C if C<$val> is not likely to be a distinct boolean type, otherwise it returns a normalized C or C. This function recognizes the builtin booleans, C<\0>, C<\1>, L, L (which is used by L, L, L and others), L (also used by L and others), L and L. This function is ambiguous in contexts where a bare scalar reference is a valid value for C<$val>, due to C<\0> and C<\1> being considered booleans. =back =head1 JSON Parsing & Formatting This module comes with a custom C-based JSON parser and formatter. These functions conform strictly to L, non-standard extensions are not supported and never will be. It also happens to be pretty fast, refer to L for some numbers. JSON booleans are parsed into C and C. In the other direction, the C function above is used to recognize which values to represent as JSON boolean. JSON numbers that are too large fit into a Perl integer are parsed into a floating point value instead. This obviously loses precision, but is consistent with C in JavaScript land - except Perl does support the full range of a 64bit integer. JSON numbers with a fraction or exponent are also converted into floating point, which may lose precision as well. L and L are not currently supported. Attempting to format a floating point C or C results in an error. =over =item json_parse($string, %options) Parse a JSON string and return a Perl value. With the default options, this function is roughly similar to: JSON::PP->new->allow_nonref->core_bools-decode($string); Croaks on invalid JSON, but the error messages are not super useful. This function also throws an error on JSON objects with duplicate keys, which is consistent with the default behavior of L but inconsistent with other modules. Supported C<%options>: =over =item utf8 Boolean, interpret the input C<$string> as a UTF-8 encoded byte string instead of a Perl Unicode string. =item max_depth Maximum permitted nesting depth of arrays and objects. Defaults to 512. =item max_size Throw an error if the JSON data is larger than the given size in bytes. Defaults to 1 GiB. =item offset Takes a reference to a scalar that indicates from which byte offset in C<$string> to start parsing. On success, the offset is updated to point to the next non-whitespace character or C if the string has been fully consumed. This option can be used to parse a stream of JSON values: my $data = '{"obj":1}{"obj":2}'; my $offset = 0; my $obj1 = json_parse($data, offset => \$offset); # $obj1 = {obj=>1}; $offset = 9; my $obj2 = json_parse($data, offset => \$offset); # $obj2 = {obj=>2}; $offset = undef; =back =item json_format($scalar, %options) Format a Perl value as JSON. With the default options, this function behaves roughly similar to: JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar); This function generates invalid JSON if you pass it a string with invalid Unicode characters; I don't see how you'd ever accidentally end up with such a string, anyway. The following C<%options> are supported: =over =item canonical Boolean, write hash keys in deterministic (sorted) order. This option currently has no effect on tied hashes. =item pretty Boolean, format JSON with newlines and indentation for easier reading. Beauty is in the eye of the beholder, this option currently follows the convention used by L and others: 3 space indent and one space around the C<:> separating object keys and values. The exact format might change in later versions. =item utf8 Boolean, returns a UTF-8 encoded byte string instead of a Perl Unicode string. =item html_safe Boolean. When set, the encoded JSON is safe for (unescaped) inclusion into HTML or XML content. This encodes C<< < >>, C<< > >> and C<< & >> as Unicode escapes. Commonly used to embed data inside a HTML page: $html = ''; This option does NOT make it safe to include the encoded JSON as an attribute value. There is no way to do that without violating JSON specs, so you should use entity escaping instead. Some JSON modules escape the forward slash (C) character instead, but that is, at best, B sufficient for embedding inside a C<<