Util: Add to_bool() and use it for JSON, Pg & query encoding

To improve interop with legacy modules.
This commit is contained in:
Yorhel 2025-02-25 09:10:03 +01:00
parent 06e2f950fe
commit c7a3415485
10 changed files with 141 additions and 37 deletions

7
FU.xs
View file

@ -91,6 +91,13 @@ EOT
MODULE = FU PACKAGE = FU::Util MODULE = FU PACKAGE = FU::Util
void to_bool(SV *val)
PROTOTYPE: $
CODE:
SvGETMAGIC(val);
int r = fu_2bool(val);
ST(0) = r < 0 ? &PL_sv_undef : r ? &PL_sv_yes : &PL_sv_no;
void json_format(SV *val, ...) void json_format(SV *val, ...)
CODE: CODE:
ST(0) = fujson_fmt_xs(aTHX_ ax, items, val); ST(0) = fujson_fmt_xs(aTHX_ ax, items, val);

View file

@ -8,6 +8,7 @@ use POSIX ();
use experimental 'builtin'; use experimental 'builtin';
our @EXPORT_OK = qw/ our @EXPORT_OK = qw/
to_bool
json_format json_parse json_format json_parse
utf8_decode uri_escape uri_unescape utf8_decode uri_escape uri_unescape
query_decode query_encode query_decode query_encode
@ -52,12 +53,12 @@ sub query_encode :prototype($) ($o) {
my($k, $v) = ($_, $o->{$_}); my($k, $v) = ($_, $o->{$_});
$k = uri_escape $k; $k = uri_escape $k;
map { map {
my $a = $_; my $x = $_;
$a = $a->TO_QUERY() if builtin::blessed($a) && $a->can('TO_QUERY'); $x = $x->TO_QUERY() if builtin::blessed($x) && $x->can('TO_QUERY');
!defined $a || (builtin::is_bool($a) && !$a) my $bool = to_bool($x);
? () !defined $x || !($bool//1) ? ()
: builtin::is_bool($a) ? $k : $bool ? $k
: $k.'='.uri_escape($a) : $k.'='.uri_escape($x)
} ref $v eq 'ARRAY' ? @$v : ($v); } ref $v eq 'ARRAY' ? @$v : ($v);
} sort keys %$o; } sort keys %$o;
} }
@ -75,7 +76,7 @@ sub httpdate_format :prototype($) ($time) {
} }
sub httpdate_parse :prototype($) ($str) { sub httpdate_parse :prototype($) ($str) {
return if $str !~ /^$httpdays, ([0-9]{2}) ([A-Z][a-z]{2}) ([0-9]{4}) ([0-9]{2}):([0-9]{2}):([0-9]{2}) GMT$/; return if $str !~ /^\s*$httpdays, ([0-9]{2}) ([A-Z][a-z]{2}) ([0-9]{4}) ([0-9]{2}):([0-9]{2}):([0-9]{2}) GMT\s*$/;
my ($mday, $mon, $year, $hour, $min, $sec) = ($1, $httpmonths{$2}, $3, $4, $5, $6); my ($mday, $mon, $year, $hour, $min, $sec) = ($1, $httpmonths{$2}, $3, $4, $5, $6);
return if !defined $mon; return if !defined $mon;
# mktime() interprets the broken down time as our local timezone, # mktime() interprets the broken down time as our local timezone,
@ -105,6 +106,30 @@ doesn't believe in the concept of a "batteries included" standard library.
=head1 DESCRIPTION =head1 DESCRIPTION
=head2 Boolean Stuff
Perl has had a builtin boolean type since version 5.36 and FU uses that where
appropriate, but there's still a lot of older code out there using different
conventions. The following function should help when interacting with older
code and provide a gradual migration path to the new builtin booleans.
=over
=item to_bool($val)
Returns C<undef> if C<$val> is not likely to be a distinct boolean type,
otherwise it returns a normalized C<builtin::true> or C<builtin::false>.
This function recognizes the builtin booleans, C<\0>, C<\1>,
L<Types::Serialiser> (which is used by L<JSON::XS>, L<JSON::SIMD>, L<CBOR::XS>
and others), L<JSON::PP> (also used by L<Cpanel::JSON::XS> and others),
L<JSON::Tiny> and L<Mojo::JSON>.
This function is ambiguous in contexts where a bare scalar reference is a valid
value for C<$val>, due to C<\0> and C<\1> being considered booleans.
=back
=head2 JSON parsing & formatting =head2 JSON parsing & formatting
This module comes with a custom C-based JSON parser and formatter. These This module comes with a custom C-based JSON parser and formatter. These
@ -112,10 +137,9 @@ functions conform strictly to L<RFC-8259|https://tools.ietf.org/html/rfc8259>,
non-standard extensions are not supported and never will be. It also happens to non-standard extensions are not supported and never will be. It also happens to
be pretty fast, refer to L<FU::Benchmarks> for some numbers. be pretty fast, refer to L<FU::Benchmarks> for some numbers.
JSON booleans are parsed into C<builtin::true> and C<builtin::false>. When JSON booleans are parsed into C<builtin::true> and C<builtin::false>. In the
formatting, those builtin constants are the I<only> recognized boolean values - other direction, the C<to_bool()> function above is used to recognize which
alternative representations such as C<JSON::PP::true> and C<JSON::PP::false> values to represent as JSON boolean.
are not recognized and attempting to format such values will croak.
JSON numbers that are too large fit into a Perl integer are parsed into a JSON numbers that are too large fit into a Perl integer are parsed into a
floating point value instead. This obviously loses precision, but is consistent floating point value instead. This obviously loses precision, but is consistent
@ -230,11 +254,11 @@ Maximum permitted nesting depth of Perl values. Defaults to 512.
(Why the hell yet another JSON codec when CPAN is already full of them!? Well, (Why the hell yet another JSON codec when CPAN is already full of them!? Well,
L<JSON::XS> is pretty cool but isn't going to be updated to support Perl's new L<JSON::XS> is pretty cool but isn't going to be updated to support Perl's new
builtin booleans. L<JSON::PP> is slow and while L<Cpanel::JSON::XS> is builtin booleans. L<JSON::PP> is slow and while L<Cpanel::JSON::XS> is
perfectly adequate, its codebase is too large and messy for my taste - too many perfectly adequate, its codebase is way too large and messy for what I need -
unnecessary features and C<#ifdef>s to support ancient perls and esoteric it has too many unnecessary features and C<#ifdef>s to support ancient perls
configurations. Still, if you need anything not provided by these functions, and esoteric configurations. Still, if you need anything not provided by these
L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives. functions, L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives.
L<JSON::SIMD> and L<Mojo::JSON> also look like good and maintained candidates.) L<JSON::SIMD> and L<JSON::Tiny> also look like good and maintained candidates.)
=head2 URI-Related Functions =head2 URI-Related Functions
@ -289,7 +313,7 @@ characters, as per C<utf8_decode>.
=item query_encode($hashref) =item query_encode($hashref)
The opposite of C<query_decode>. Takes a hashref of similar structure and The opposite of C<query_decode>. Takes a hashref of similar structure and
returns an ASCII-encoded query string. Keys with C<undef> or C<builtin::false> returns an ASCII-encoded query string. Keys with C<undef> or C<to_bool()> false
values are omitted in the output. values are omitted in the output.
If a given value is a blessed object with a C<TO_QUERY()> method, that method If a given value is a blessed object with a C<TO_QUERY()> method, that method
@ -350,8 +374,8 @@ descriptor was received. The returned C<$message> is undef on error or an empty
string on EOF. string on EOF.
Like regular socket I/O, a single C<fdpass_send()> message may be split across Like regular socket I/O, a single C<fdpass_send()> message may be split across
multiple C<fdpass_recv()> calls; in that case the C<$fd> will only be received multiple C<fdpass_recv()> calls; in that case the C<$fd> is only received on
on the first call. the first call.
Don't use this function if the sender may include multiple file descriptors in Don't use this function if the sender may include multiple file descriptors in
a single message, weird things can happen. File descriptors received this way a single message, weird things can happen. File descriptors received this way

View file

@ -176,3 +176,35 @@ static SV *fustr_done_(pTHX_ fustr *s) {
static double fu_timediff(const struct timespec *a, const struct timespec *b) { static double fu_timediff(const struct timespec *a, const struct timespec *b) {
return ((double)(a->tv_sec - b->tv_sec)) + (double)(a->tv_nsec - b->tv_nsec) / 1000000000.0; return ((double)(a->tv_sec - b->tv_sec)) + (double)(a->tv_nsec - b->tv_nsec) / 1000000000.0;
} }
/* -1 if arg is not a bool, 0 on false, 1 on true */
static int fu_2bool(SV *val) {
if (SvIsBOOL(val)) return BOOL_INTERNALS_sv_isbool_true(val) ? 1 : 0;
if (!SvROK(val)) return -1;
SV *rv = SvRV(val);
if (SvOBJECT(rv)) {
HV *stash = SvSTASH(rv);
/* Historical: "JSON::XS::Boolean", not used by JSON::XS since 3.0 in 2013 */
if (stash == gv_stashpvs("JSON::PP::Boolean", 0) /* Also covers Types::Serialiser::Boolean and used by a bunch of other modules */
|| stash == gv_stashpvs("Mojo::JSON::_Bool", 0)
|| stash == gv_stashpvs("JSON::Tiny::_Bool", 0))
return !!SvIV(rv);
return -1;
}
/* \0 or \1 */
if (SvTYPE(rv) < SVt_PVAV) {
if (SvIOK(rv)) {
IV iv = SvIV(rv);
return iv == 0 ? 0 : iv == 1 ? 1 : -1;
} else if (SvOK(rv)) {
STRLEN len;
char *str = SvPV_nomg(rv, len);
return len != 1 ? -1 : *str == '0' ? 0 : *str == '1' ? 1 : -1;
}
}
return -1;
}

View file

@ -231,10 +231,9 @@ static void fujson_fmt_obj(pTHX_ fujson_fmt_ctx *ctx, SV *rv, SV *obj) {
static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) { static void fujson_fmt(pTHX_ fujson_fmt_ctx *ctx, SV *val) {
SvGETMAGIC(val); SvGETMAGIC(val);
/* XXX: &PL_sv_yes and &PL_sv_no are proper booleans under 5.40, so no need int r = fu_2bool(val);
* to explicitly check for those; does this work in 5.36 as well? */ if (r != -1) { /* Must check SvISBOOL() before IOKp & POKp, because it implies both flags */
if (SvIsBOOL(val)) { /* Must check before IOKp & POKp, because bool implies both flags */ if (r) fustr_write(ctx->out, "true", 4);
if (BOOL_INTERNALS_sv_isbool_true(val)) fustr_write(ctx->out, "true", 4);
else fustr_write(ctx->out, "false", 5); else fustr_write(ctx->out, "false", 5);
} else if (SvPOKp(val)) { } else if (SvPOKp(val)) {
fujson_fmt_str(aTHX_ ctx, SvPVX(val), SvCUR(val), SvUTF8(val)); fujson_fmt_str(aTHX_ ctx, SvPVX(val), SvCUR(val), SvUTF8(val));

View file

@ -77,7 +77,8 @@ RECVFN(bool) {
} }
SENDFN(bool) { SENDFN(bool) {
fustr_write_ch(out, SvTRUE(val) ? 1 : 0); int r = fu_2bool(val); /* So that we also recognize \0 and \1 */
fustr_write_ch(out, r < 0 ? SvTRUE(val) : r);
} }
RECVFN(void) { RECVFN(void) {

View file

@ -58,7 +58,7 @@ my @tests = (
); );
my @errors = ( my @errors = (
\1, qr/unable to format reference/, \2, qr/unable to format reference/,
*STDOUT, qr/unable to format unknown value/, *STDOUT, qr/unable to format unknown value/,
'NaN'+0, qr/unable to format floating point NaN or Inf as JSON/, 'NaN'+0, qr/unable to format floating point NaN or Inf as JSON/,
'Inf'+0, qr/unable to format floating point NaN or Inf as JSON/, 'Inf'+0, qr/unable to format floating point NaN or Inf as JSON/,

View file

@ -92,12 +92,12 @@ subtest '$st prepare & exec', sub {
# Interleaved # Interleaved
{ {
my $a = $conn->q('SELECT 1 as a'); my $x = $conn->q('SELECT 1 as a');
my $b = $conn->q('SELECT 2 as b'); my $y = $conn->q('SELECT 2 as b');
is_deeply $a->columns, [ { oid => 23, name => 'a' } ]; is_deeply $x->columns, [ { oid => 23, name => 'a' } ];
is_deeply $b->columns, [ { oid => 23, name => 'b' } ]; is_deeply $y->columns, [ { oid => 23, name => 'b' } ];
is $a->val, 1; is $x->val, 1;
is $b->val, 2; is $y->val, 2;
} }
}; };
@ -347,9 +347,9 @@ subtest 'txn', sub {
} }
{ {
my $a = [1,2]; my $x = [1,2];
my $st = $conn->q('SELECT $1::int[]', $a)->text(0); my $st = $conn->q('SELECT $1::int[]', $x)->text(0);
$a->[1] = 3; $x->[1] = 3;
is_deeply $st->val, [1,3], 'not deep copy'; is_deeply $st->val, [1,3], 'not deep copy';
} }

View file

@ -89,11 +89,11 @@ f float8 => $_ for ('', 'a', '123g', []);
v json => {}, undef, '{}'; v json => {}, undef, '{}';
# XXX: Huh, what's causing this "pretty" formatting? # XXX: Huh, what's causing this "pretty" formatting?
v json => [1, undef, true, "hello"], undef, qq#[\n 1,\n null,\n true,\n "hello"\n]#; v json => [1, undef, true, "hello"], undef, qq#[\n 1,\n null,\n true,\n "hello"\n]#;
f json => \1; f json => \2;
v jsonb => {}, undef, '{}'; v jsonb => {}, undef, '{}';
v jsonb => [1, undef, true, "hello"], undef, '[1, null, true, "hello"]'; v jsonb => [1, undef, true, "hello"], undef, '[1, null, true, "hello"]';
f jsonb => \1; f jsonb => \2;
v jsonpath => $_ for ('$."key"', '$."a[*]"?(@ > 2)'); v jsonpath => $_ for ('$."key"', '$."a[*]"?(@ > 2)');
f jsonpath => $_ for ('', 'hello world'); f jsonpath => $_ for ('', 'hello world');

View file

@ -22,7 +22,7 @@ is_deeply
query_decode('a=&a=&b=&c==x&d=x='), query_decode('a=&a=&b=&c==x&d=x='),
{ a => ['', ''], b => '', c => '=x', d => 'x=' }; { a => ['', ''], b => '', c => '=x', d => 'x=' };
is query_encode { a => ['', ''], b => '', c => '=x', d => 'x=' }, 'a=&a=&b=&c=%3dx&d=x%3d'; is query_encode { a => ['', '', \1], b => '', c => '=x', d => 'x=' }, 'a=&a=&a&b=&c=%3dx&d=x%3d';
sub FUTILTEST::TO_QUERY { '&'.($_[0][0] + 1) } sub FUTILTEST::TO_QUERY { '&'.($_[0][0] + 1) }

41
t/to_bool.t Normal file
View file

@ -0,0 +1,41 @@
use v5.36;
use Test::More;
use FU::Util 'to_bool';
use experimental 'builtin';
use builtin 'true', 'false';
is to_bool undef, undef;
is to_bool '', undef;
is to_bool 1, undef;
is to_bool [], undef;
is to_bool {}, undef;
is to_bool bless(\(my $x = 1), 'FU::Bullshit'), undef;
is to_bool builtin::true, true;
is to_bool builtin::false, false;
is to_bool \1, true;
is to_bool \0, false;
is to_bool \'1', true;
is to_bool \'0', false;
is to_bool \2, undef;
SKIP: {
eval { require Types::Serialiser; 1 } || skip 'Types::Serialiser not installed';
is to_bool Types::Serialiser::true(), true;
is to_bool Types::Serialiser::false(), false;
}
SKIP: {
eval { require JSON::Tiny; 1 } || skip 'JSON::Tiny not installed';
is to_bool JSON::Tiny::true(), true;
is to_bool JSON::Tiny::false(), false;
}
SKIP: {
eval { require Cpanel::JSON::XS; 1 } || skip 'Cpanel::JSON::XS not installed';
is to_bool Cpanel::JSON::XS::true(), true;
is to_bool Cpanel::JSON::XS::false(), false;
}
done_testing;