Util: Add to_bool() and use it for JSON, Pg & query encoding

To improve interop with legacy modules.
This commit is contained in:
Yorhel 2025-02-25 09:10:03 +01:00
parent 06e2f950fe
commit c7a3415485
10 changed files with 141 additions and 37 deletions

7
FU.xs
View file

@ -91,6 +91,13 @@ EOT
MODULE = FU PACKAGE = FU::Util
void to_bool(SV *val)
PROTOTYPE: $
CODE:
SvGETMAGIC(val);
int r = fu_2bool(val);
ST(0) = r < 0 ? &PL_sv_undef : r ? &PL_sv_yes : &PL_sv_no;
void json_format(SV *val, ...)
CODE:
ST(0) = fujson_fmt_xs(aTHX_ ax, items, val);

View file

@ -8,6 +8,7 @@ use POSIX ();
use experimental 'builtin';
our @EXPORT_OK = qw/
to_bool
json_format json_parse
utf8_decode uri_escape uri_unescape
query_decode query_encode
@ -52,12 +53,12 @@ sub query_encode :prototype($) ($o) {
my($k, $v) = ($_, $o->{$_});
$k = uri_escape $k;
map {
my $a = $_;
$a = $a->TO_QUERY() if builtin::blessed($a) && $a->can('TO_QUERY');
!defined $a || (builtin::is_bool($a) && !$a)
? ()
: builtin::is_bool($a) ? $k
: $k.'='.uri_escape($a)
my $x = $_;
$x = $x->TO_QUERY() if builtin::blessed($x) && $x->can('TO_QUERY');
my $bool = to_bool($x);
!defined $x || !($bool//1) ? ()
: $bool ? $k
: $k.'='.uri_escape($x)
} ref $v eq 'ARRAY' ? @$v : ($v);
} sort keys %$o;
}
@ -75,7 +76,7 @@ sub httpdate_format :prototype($) ($time) {
}
sub httpdate_parse :prototype($) ($str) {
return if $str !~ /^$httpdays, ([0-9]{2}) ([A-Z][a-z]{2}) ([0-9]{4}) ([0-9]{2}):([0-9]{2}):([0-9]{2}) GMT$/;
return if $str !~ /^\s*$httpdays, ([0-9]{2}) ([A-Z][a-z]{2}) ([0-9]{4}) ([0-9]{2}):([0-9]{2}):([0-9]{2}) GMT\s*$/;
my ($mday, $mon, $year, $hour, $min, $sec) = ($1, $httpmonths{$2}, $3, $4, $5, $6);
return if !defined $mon;
# mktime() interprets the broken down time as our local timezone,
@ -105,6 +106,30 @@ doesn't believe in the concept of a "batteries included" standard library.
=head1 DESCRIPTION
=head2 Boolean Stuff
Perl has had a builtin boolean type since version 5.36 and FU uses that where
appropriate, but there's still a lot of older code out there using different
conventions. The following function should help when interacting with older
code and provide a gradual migration path to the new builtin booleans.
=over
=item to_bool($val)
Returns C<undef> if C<$val> is not likely to be a distinct boolean type,
otherwise it returns a normalized C<builtin::true> or C<builtin::false>.
This function recognizes the builtin booleans, C<\0>, C<\1>,
L<Types::Serialiser> (which is used by L<JSON::XS>, L<JSON::SIMD>, L<CBOR::XS>
and others), L<JSON::PP> (also used by L<Cpanel::JSON::XS> and others),
L<JSON::Tiny> and L<Mojo::JSON>.
This function is ambiguous in contexts where a bare scalar reference is a valid
value for C<$val>, due to C<\0> and C<\1> being considered booleans.
=back
=head2 JSON parsing & formatting
This module comes with a custom C-based JSON parser and formatter. These
@ -112,10 +137,9 @@ functions conform strictly to L<RFC-8259|https://tools.ietf.org/html/rfc8259>,
non-standard extensions are not supported and never will be. It also happens to
be pretty fast, refer to L<FU::Benchmarks> for some numbers.
JSON booleans are parsed into C<builtin::true> and C<builtin::false>. When
formatting, those builtin constants are the I<only> recognized boolean values -
alternative representations such as C<JSON::PP::true> and C<JSON::PP::false>
are not recognized and attempting to format such values will croak.
JSON booleans are parsed into C<builtin::true> and C<builtin::false>. In the
other direction, the C<to_bool()> function above is used to recognize which
values to represent as JSON boolean.
JSON numbers that are too large fit into a Perl integer are parsed into a
floating point value instead. This obviously loses precision, but is consistent
@ -230,11 +254,11 @@ Maximum permitted nesting depth of Perl values. Defaults to 512.
(Why the hell yet another JSON codec when CPAN is already full of them!? Well,
L<JSON::XS> is pretty cool but isn't going to be updated to support Perl's new
builtin booleans. L<JSON::PP> is slow and while L<Cpanel::JSON::XS> is
perfectly adequate, its codebase is too large and messy for my taste - too many
unnecessary features and C<#ifdef>s to support ancient perls and esoteric
configurations. Still, if you need anything not provided by these functions,
L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives.
L<JSON::SIMD> and L<Mojo::JSON> also look like good and maintained candidates.)
perfectly adequate, its codebase is way too large and messy for what I need -
it has too many unnecessary features and C<#ifdef>s to support ancient perls
and esoteric configurations. Still, if you need anything not provided by these
functions, L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives.
L<JSON::SIMD> and L<JSON::Tiny> also look like good and maintained candidates.)
=head2 URI-Related Functions
@ -289,7 +313,7 @@ characters, as per C<utf8_decode>.
=item query_encode($hashref)
The opposite of C<query_decode>. Takes a hashref of similar structure and
returns an ASCII-encoded query string. Keys with C<undef> or C<builtin::false>
returns an ASCII-encoded query string. Keys with C<undef> or C<to_bool()> false
values are omitted in the output.
If a given value is a blessed object with a C<TO_QUERY()> method, that method
@ -350,8 +374,8 @@ descriptor was received. The returned C<$message> is undef on error or an empty
string on EOF.
Like regular socket I/O, a single C<fdpass_send()> message may be split across
multiple C<fdpass_recv()> calls; in that case the C<$fd> will only be received
on the first call.
multiple C<fdpass_recv()> calls; in that case the C<$fd> is only received on
the first call.
Don't use this function if the sender may include multiple file descriptors in
a single message, weird things can happen. File descriptors received this way

View file

@ -176,3 +176,35 @@ static SV *fustr_done_(pTHX_ fustr *s) {
static double fu_timediff(const struct timespec *a, const struct timespec *b) {
return ((double)(a->tv_sec - b->tv_sec)) + (double)(a->tv_nsec - b->tv_nsec) / 1000000000.0;
}
/* -1 if arg is not a bool, 0 on false, 1 on true */
static int fu_2bool(SV *val) {
if (SvIsBOOL(val)) return BOOL_INTERNALS_sv_isbool_true(val) ? 1 : 0;
if (!SvROK(val)) return -1;
SV *rv = SvRV(val);
if (SvOBJECT(rv)) {
HV *stash = SvSTASH(rv);
/* Historical: "JSON::XS::Boolean", not used by JSON::XS since 3.0 in 2013 */
if (stash == gv_stashpvs("JSON::PP::Boolean", 0) /* Also covers Types::Serialiser::Boolean and used by a bunch of other modules */
|| stash == gv_stashpvs("Mojo::JSON::_Bool", 0)
|| stash == gv_stashpvs("JSON::Tiny::_Bool", 0))
return !!SvIV(rv);
return -1;
}
/* \0 or \1 */
if (SvTYPE(rv) < SVt_PVAV) {
if (SvIOK(rv)) {
IV iv = SvIV(rv);
return iv == 0 ? 0 : iv == 1 ? 1 : -1;
} else if (SvOK(rv)) {
STRLEN len;
char *str = SvPV_nomg(rv, len);
return len != 1 ? -1 : *str == '0' ? 0 : *str == '1' ? 1 : -1;
}
}
return -1;
}

View file

@ -231,10 +231,9 @@ static void fujson_fmt_obj(pTHX_ fujson_fmt_ctx *ctx, SV *rv, SV *obj) {
static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
SvGETMAGIC(val);
/* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need
* to explicitly check for those; does this work in 5.36 as well? */
if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */
if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(ctx->out, "true", 4);
int r = fu_2bool(val);
if (r != -1) { /* Must check SvISBOOL() before IOKp & POKp, because it implies both flags */
if (r) fustr_write(ctx->out, "true", 4);
else fustr_write(ctx->out, "false", 5);
} else if (SvPOKp(val)) {
fujson_fmt_str(aTHX_ ctx, SvPVX(val), SvCUR(val), SvUTF8(val));

View file

@ -77,7 +77,8 @@ RECVFN(bool) {
}
SENDFN(bool) {
fustr_write_ch(out, SvTRUE(val) ? 1 : 0);
int r = fu_2bool(val); /* So that we also recognize \0 and \1 */
fustr_write_ch(out, r < 0 ? SvTRUE(val) : r);
}
RECVFN(void) {

View file

@ -58,7 +58,7 @@ my @tests = (
);
my @errors = (
\1, qr/unable to format reference/,
\2, qr/unable to format reference/,
*STDOUT, qr/unable to format unknown value/,
'NaN'+0, qr/unable to format floating point NaN or Inf as JSON/,
'Inf'+0, qr/unable to format floating point NaN or Inf as JSON/,

View file

@ -92,12 +92,12 @@ subtest '$st prepare & exec', sub {
# Interleaved
{
my $a = $conn->q('SELECT 1 as a');
my $b = $conn->q('SELECT 2 as b');
is_deeply $a->columns, [ { oid => 23, name => 'a' } ];
is_deeply $b->columns, [ { oid => 23, name => 'b' } ];
is $a->val, 1;
is $b->val, 2;
my $x = $conn->q('SELECT 1 as a');
my $y = $conn->q('SELECT 2 as b');
is_deeply $x->columns, [ { oid => 23, name => 'a' } ];
is_deeply $y->columns, [ { oid => 23, name => 'b' } ];
is $x->val, 1;
is $y->val, 2;
}
};
@ -347,9 +347,9 @@ subtest 'txn', sub {
}
{
my $a = [1,2];
my $st = $conn->q('SELECT $1::int[]', $a)->text(0);
$a->[1] = 3;
my $x = [1,2];
my $st = $conn->q('SELECT $1::int[]', $x)->text(0);
$x->[1] = 3;
is_deeply $st->val, [1,3], 'not deep copy';
}

View file

@ -89,11 +89,11 @@ f float8 => $_ for ('', 'a', '123g', []);
v json => {}, undef, '{}';
# XXX: Huh, what's causing this "pretty" formatting?
v json => [1, undef, true, "hello"], undef, qq#[\n 1,\n null,\n true,\n "hello"\n]#;
f json => \1;
f json => \2;
v jsonb => {}, undef, '{}';
v jsonb => [1, undef, true, "hello"], undef, '[1, null, true, "hello"]';
f jsonb => \1;
f jsonb => \2;
v jsonpath => $_ for ('$."key"', '$."a[*]"?(@ > 2)');
f jsonpath => $_ for ('', 'hello world');

View file

@ -22,7 +22,7 @@ is_deeply
query_decode('a=&a=&b=&c==x&d=x='),
{ a => ['', ''], b => '', c => '=x', d => 'x=' };
is query_encode { a => ['', ''], b => '', c => '=x', d => 'x=' }, 'a=&a=&b=&c=%3dx&d=x%3d';
is query_encode { a => ['', '', \1], b => '', c => '=x', d => 'x=' }, 'a=&a=&a&b=&c=%3dx&d=x%3d';
sub FUTILTEST::TO_QUERY { '&'.($_[0][0] + 1) }

41
t/to_bool.t Normal file
View file

@ -0,0 +1,41 @@
use v5.36;
use Test::More;
use FU::Util 'to_bool';
use experimental 'builtin';
use builtin 'true', 'false';
is to_bool undef, undef;
is to_bool '', undef;
is to_bool 1, undef;
is to_bool [], undef;
is to_bool {}, undef;
is to_bool bless(\(my $x = 1), 'FU::Bullshit'), undef;
is to_bool builtin::true, true;
is to_bool builtin::false, false;
is to_bool \1, true;
is to_bool \0, false;
is to_bool \'1', true;
is to_bool \'0', false;
is to_bool \2, undef;
SKIP: {
eval { require Types::Serialiser; 1 } || skip 'Types::Serialiser not installed';
is to_bool Types::Serialiser::true(), true;
is to_bool Types::Serialiser::false(), false;
}
SKIP: {
eval { require JSON::Tiny; 1 } || skip 'JSON::Tiny not installed';
is to_bool JSON::Tiny::true(), true;
is to_bool JSON::Tiny::false(), false;
}
SKIP: {
eval { require Cpanel::JSON::XS; 1 } || skip 'Cpanel::JSON::XS not installed';
is to_bool Cpanel::JSON::XS::true(), true;
is to_bool Cpanel::JSON::XS::false(), false;
}
done_testing;