Add some tests and move some docs

2025-02-01 07:00:09 +01:00 · 2025-02-01 07:00:09 +01:00 · abfbba3c10
commit abfbba3c10
parent ebe84167e7
7 changed files with 179 additions and 129 deletions
--- a/FU.pm
+++ b/FU.pm
@ -2,3 +2,19 @@ package FU 0.1;
 use v5.36;

 1;
+
+__END__
+
+=head1 NAME
+
+FU - A collection of awesome modules plus a lean and efficient web framework.
+
+=head1 SYNOPSIS
+
+=head1 DESCRIPTION
+
+=head2 Properties
+
+- Requires a moderately recent Perl (>= 5.36).
+- Only works on 64-bit Linux (and possibly *BSD).
+- Assumes that no threading is used; not all modules are thread-safe.
--- a/FU.pod
+++ b/FU.pod
@ -1,13 +0,0 @@
-=head1 NAME
-
-FU - A collection of awesome modules plus a lean and efficient web framework.
-
-=head1 SYNOPSIS
-
-=head1 DESCRIPTION
-
-=head2 Properties
-
- Requires a moderately recent Perl (>= 5.36).
- Only works on 64-bit Linux (and possibly *BSD).
- Assumes that no threading is used; not all modules are thread-safe.
--- a/FU/Util.pm
+++ b/FU/Util.pm
@ -7,3 +7,116 @@ use Exporter 'import';
 our @EXPORT_OK = qw/json_format json_parse/;

 1;
+__END__
+
+=head1 NAME
+
+FU::Util - Miscellaneous utility functions that really should have been part of
+a core Perl installation but aren't for some reason because the Perl community
+doesn't believe in the concept of a "batteries included" standard library.
+</rant>
+
+=head1 SYNOPSIS
+
+    use FU::Util qw/json_format/;
+
+    my $data = json_format [1, 2, 3];
+
+=head1 DESCRIPTION
+
+=head2 JSON parsing & formatting
+
+This module comes with a custom C-based JSON parser and formatter. These
+functions conform strictly to L<RFC-8259|https://tools.ietf.org/html/rfc8259>,
+non-standard extensions are not supported and never will be. It also happens to
+be pretty fast, refer to L<FU::Benchmarks> for some numbers.
+
+JSON booleans are parsed into C<builtin::true> and C<builtin::false>. When
+formatting, those builtin constants are the I<only> recognized boolean values -
+alternative representations such as C<JSON::PP::true> and C<JSON::PP::false>
+are not recognized and attempting to format such values will croak.
+
+JSON numbers that are too large fit into a Perl integer are parsed into a
+floating point value instead. This obviously loses precision, but is consistent
+with C<JSON.parse()> in JavaScript land - except Perl does support the full
+range of a 64bit integer. JSON numbers with a fraction or exponent are also
+converted into floating point, which may lose precision as well.
+L<Math::BigInt> and L<Math::BigFloat> are not currently supported. Attempting
+to format a floating point C<NaN> or C<Inf> results in an error.
+
+=over
+
+=item json_parse($string, %options)
+
+Parse a JSON string and return a Perl value. With the default options, this
+function is roughly similar to:
+
+    JSON::PP->new->allow_nonref->core_bools-decode($string);
+
+Supported C<%options>:
+
+=over
+
+=item utf8
+
+Boolean, interpret the input C<$string> as a UTF-8 encoded byte string instead
+of a Perl Unicode string.
+
+=back
+
+
+=item json_format($scalar, %options)
+
+Format a Perl value as JSON. With the default options, this function behaves
+roughly similar to:
+
+    JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar);
+
+Some modules escape the slash character in encoded strings to prevent a
+potential XSS vulnerability when embedding JSON inside C<< <script> ..
+</script> >> tags.  This function does I<not> do that because it might not even
+be sufficient. The following is probably an improvement:
+
+    json_format($data) =~ s{</}{<\\/}rg =~ s/<!--/<\\u0021--/rg;
+
+The following C<%options> are supported:
+
+=over
+
+=item canonical
+
+Boolean, write hash keys in deterministic (sorted) order. This option currently
+has no effect on tied hashes.
+
+=item pretty
+
+Boolean, format JSON with newlines and indentation for easier reading.  Beauty
+is in the eye of the beholder, this option currently follows the convention
+used by L<JSON::XS> and others: 3 space indent and one space around the C<:>
+separating object keys and values. The exact format might change in later
+versions.
+
+=item utf8
+
+Boolean, returns a UTF-8 encoded byte string instead of a Perl Unicode string.
+
+=item max_size
+
+Maximum permitted size, in bytes, of the generated JSON string. Defaults to 1 GiB.
+
+=item max_depth
+
+Maximum permitted nesting depth of Perl values. Defaults to 512.
+
+=back
+
+=back
+
+(Why the hell yet another JSON codec when CPAN is already full of them!? Well,
+L<JSON::XS> is pretty cool but isn't going to be updated to support Perl's new
+builtin booleans. L<JSON::PP> is slow and while L<Cpanel::JSON::XS> is
+perfectly adequate, its codebase is too large and messy for my taste - too many
+unnecessary features and C<#ifdef>s to support ancient perls and esoteric
+configurations. Still, if you need anything not provided by these functions,
+L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives.
+L<JSON::SIMD> and L<Mojo::JSON> also look like good and maintained candidates.)
--- a/FU/Util.pod
+++ b/FU/Util.pod
@ -1,111 +0,0 @@
-=head1 NAME
-
-FU::Util - Miscellaneous utility functions that really should have been part of
-a core Perl installation but aren't for some reason because the Perl community
-doesn't believe in the concept of a "batteries included" standard library.
-</rant>
-
-=head1 SYNOPSIS
-
-    use FU::Util qw/json_format/;
-
-    my $data = json_format [1, 2, 3];
-
-=head1 DESCRIPTION
-
-=head2 JSON parsing & formatting
-
-This module comes with a custom C-based JSON parser and formatter. These
-functions conform strictly to L<RFC-8259|https://tools.ietf.org/html/rfc8259>,
-non-standard extensions are not supported and never will be. It also happens to
-be pretty fast, refer to L<FU::Benchmarks> for some numbers.
-
-JSON booleans are parsed into C<builtin::true> and C<builtin::false>. When
-formatting, those builtin constants are the I<only> recognized boolean values -
-alternative representations such as C<JSON::PP::true> and C<JSON::PP::false>
-are not recognized and attempting to format such values will croak.
-
-JSON numbers that are too large fit into a Perl integer are parsed into a
-floating point value instead. This obviously loses precision, but is consistent
-with C<JSON.parse()> in JavaScript land - except Perl does support the full
-range of a 64bit integer. JSON numbers with a fraction or exponent are also
-converted into floating point, which may lose precision as well.
-L<Math::BigInt> and L<Math::BigFloat> are not currently supported. Attempting
-to format a floating point C<NaN> or C<Inf> results in an error.
-
-=over
-
-=item json_parse($string, %options)
-
-Parse a JSON string and return a Perl value. With the default options, this
-function is roughly similar to:
-
-    JSON::PP->new->allow_nonref->core_bools-decode($string);
-
-Supported C<%options>:
-
-=over
-
-=item utf8
-
-Boolean, interpret the input C<$string> as a UTF-8 encoded byte string instead
-of a Perl Unicode string.
-
-=back
-
-
-=item json_format($scalar, %options)
-
-Format a Perl value as JSON. With the default options, this function behaves
-roughly similar to:
-
-    JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar);
-
-Some modules escape the slash character in encoded strings to prevent a
-potential XSS vulnerability when embedding JSON inside C<< <script> ..
-</script> >> tags.  This function does I<not> do that because it might not even
-be sufficient. The following is probably an improvement:
-
-    json_format($data) =~ s{</}{<\\/}rg =~ s/<!--/<\\u0021--/rg;
-
-The following C<%options> are supported:
-
-=over
-
-=item canonical
-
-Boolean, write hash keys in deterministic (sorted) order. This option currently
-has no effect on tied hashes.
-
-=item pretty
-
-Boolean, format JSON with newlines and indentation for easier reading.  Beauty
-is in the eye of the beholder, this option currently follows the convention
-used by L<JSON::XS> and others: 3 space indent and one space around the C<:>
-separating object keys and values. The exact format might change in later
-versions.
-
-=item utf8
-
-Boolean, returns a UTF-8 encoded byte string instead of a Perl Unicode string.
-
-=item max_size
-
-Maximum permitted size, in bytes, of the generated JSON string. Defaults to 1 GiB.
-
-=item max_depth
-
-Maximum permitted nesting depth of Perl values. Defaults to 512.
-
-=back
-
-=back
-
-(Why the hell yet another JSON codec when CPAN is already full of them!? Well,
-L<JSON::XS> is pretty cool but isn't going to be updated to support Perl's new
-builtin booleans. L<JSON::PP> is slow and while L<Cpanel::JSON::XS> is
-perfectly adequate, its codebase is a little too messy for my taste - too many
-unnecessary features and C<#ifdef>s to support ancient perls and esoteric
-configurations. Still, if you need anything not provided by these functions,
-L<JSON::PP> and L<Cpanel::JSON::XS> are perfectly fine alternatives.
-L<JSON::SIMD> and L<Mojo::JSON> also look like good and maintained candidates.)
--- a/bench.PL
+++ b/bench.PL
@ -164,6 +164,9 @@ be a good measure". I've used these benchmarks to find and optimize hotspots in
 FU, which in turn means these numbers may look better than they are in
 real-world use.

+B<DISCLAIMER#3:> Many of these benchmarks exists solely to test edge case
+performance, these numbers are not representative for real-world use.
+
 =head1 MODULE VERSIONS

 The following module versions were used:
@ -176,7 +179,7 @@ The following module versions were used:

 =head1 BENCHMARKS

-=head2 JSON Formatting
+=head2 JSON Parsing & Formatting

 These benchmarks run on large-ish arrays with repeated values. JSON encoding is
 sufficiently fast that Perl function calling overhead tends to dominate for
--- a/t/json_format.t
+++ b/t/json_format.t
@ -68,7 +68,6 @@ my @errors = (
    do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/,
 );

-plan tests => @tests*2 + @errors/2 + 10;

 for my($in, $exp) (@tests) {
    my $out = json_format $in;
@ -120,6 +119,25 @@ eval { json_format 'hello world', max_size => 8 };
 like $@, qr/maximum string length exceeded/;


+# Test large strings to cover some buffer handling special cases.
+for (2000..2100, 4000..4200, 8100..8200, 12200..12300, 16300..16400) {
+    my $s = 'a'x$_;
+    is json_format($s), "\"$s\"";
+}
+
+# 500 depth
+{
+    my $v = 1;
+    $v = [$v] for (1..500);
+    is json_format($v), '['x500 . 1 . ']'x500;
+}
+{
+    my $v = 1;
+    $v = {'',$v} for (1..500);
+    is json_format($v), '{"":'x500 . 1 . '}'x500;
+}
+
+
 # http://e-choroba.eu/18-yapc slide 6

 tie my $incs, 'MyIncrementer', 'Xa';
@ -132,6 +150,9 @@ is json_format($incu), 4;
 is json_format($incu), 5;
 is json_format($incu), 6;

+
+done_testing;
+
 package MyIncrementer;
 use Tie::Scalar;
 use parent -norequire => 'Tie::StdScalar';
--- a/t/json_parse.t
+++ b/t/json_parse.t
@ -137,7 +137,7 @@ is ref $v, 'HASH';
 is keys %$v, 1;
 is $v->{a}, 1;

-sub large($s) {
+sub complete($s) {
    $v = json_parse $s;
    is ref $v, 'HASH';
    is keys %$v, 3;
@ -163,11 +163,32 @@ sub large($s) {
    is ref $v->{'ë'}, 'ARRAY';
    is scalar $v->{'ë'}->@*, 0;
 }
-large '{"a":[1,0.1,true,null,{}],"":-0,"ë":[]}';
-large '  {
+complete '{"a":[1,0.1,true,null,{}],"":-0,"ë":[]}';
+complete '  {
    "a"  :  [  1  ,  0.1  ,  true  ,  null  ,  {  }  ]  ,
    ""   :  -0  ,
    "ë"  :  [  ]
 }  ';

+
+# Test large inputs to cover some buffer handling special cases.
+for (2000..2100, 4000..4200, 8100..8200, 12200..12300, 16300..16400) {
+    my $s = 'a'x$_;
+    is json_parse("\"$s\""), $s
+}
+
+# 500 depth
+{
+    $v = json_parse('['x500 . ']'x500);
+    my $i = 0;
+    while (ref $v) { $v = $v->[0]; $i++ }
+    is $i, 500;
+}
+{
+    $v = json_parse('{"":'x500 . 1 . '}'x500);
+    my $i = 0;
+    while (ref $v) { $v = $v->{''}; $i++ }
+    is $i, 500;
+}
+
 done_testing;