diff --git a/FU.pm b/FU.pm index fc125fa..eb4010d 100644 --- a/FU.pm +++ b/FU.pm @@ -2,3 +2,19 @@ package FU 0.1; use v5.36; 1; + +__END__ + +=head1 NAME + +FU - A collection of awesome modules plus a lean and efficient web framework. + +=head1 SYNOPSIS + +=head1 DESCRIPTION + +=head2 Properties + +- Requires a moderately recent Perl (>= 5.36). +- Only works on 64-bit Linux (and possibly *BSD). +- Assumes that no threading is used; not all modules are thread-safe. diff --git a/FU.pod b/FU.pod deleted file mode 100644 index d11dd5b..0000000 --- a/FU.pod +++ /dev/null @@ -1,13 +0,0 @@ -=head1 NAME - -FU - A collection of awesome modules plus a lean and efficient web framework. - -=head1 SYNOPSIS - -=head1 DESCRIPTION - -=head2 Properties - -- Requires a moderately recent Perl (>= 5.36). -- Only works on 64-bit Linux (and possibly *BSD). -- Assumes that no threading is used; not all modules are thread-safe. diff --git a/FU/Util.pm b/FU/Util.pm index d22a639..5ff032c 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -7,3 +7,116 @@ use Exporter 'import'; our @EXPORT_OK = qw/json_format json_parse/; 1; +__END__ + +=head1 NAME + +FU::Util - Miscellaneous utility functions that really should have been part of +a core Perl installation but aren't for some reason because the Perl community +doesn't believe in the concept of a "batteries included" standard library. + + +=head1 SYNOPSIS + + use FU::Util qw/json_format/; + + my $data = json_format [1, 2, 3]; + +=head1 DESCRIPTION + +=head2 JSON parsing & formatting + +This module comes with a custom C-based JSON parser and formatter. These +functions conform strictly to L, +non-standard extensions are not supported and never will be. It also happens to +be pretty fast, refer to L for some numbers. + +JSON booleans are parsed into C and C. When +formatting, those builtin constants are the I recognized boolean values - +alternative representations such as C and C +are not recognized and attempting to format such values will croak. + +JSON numbers that are too large fit into a Perl integer are parsed into a +floating point value instead. This obviously loses precision, but is consistent +with C in JavaScript land - except Perl does support the full +range of a 64bit integer. JSON numbers with a fraction or exponent are also +converted into floating point, which may lose precision as well. +L and L are not currently supported. Attempting +to format a floating point C or C results in an error. + +=over + +=item json_parse($string, %options) + +Parse a JSON string and return a Perl value. With the default options, this +function is roughly similar to: + + JSON::PP->new->allow_nonref->core_bools-decode($string); + +Supported C<%options>: + +=over + +=item utf8 + +Boolean, interpret the input C<$string> as a UTF-8 encoded byte string instead +of a Perl Unicode string. + +=back + + +=item json_format($scalar, %options) + +Format a Perl value as JSON. With the default options, this function behaves +roughly similar to: + + JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar); + +Some modules escape the slash character in encoded strings to prevent a +potential XSS vulnerability when embedding JSON inside C<< >> tags. This function does I do that because it might not even +be sufficient. The following is probably an improvement: + + json_format($data) =~ s{ are supported: + +=over + +=item canonical + +Boolean, write hash keys in deterministic (sorted) order. This option currently +has no effect on tied hashes. + +=item pretty + +Boolean, format JSON with newlines and indentation for easier reading. Beauty +is in the eye of the beholder, this option currently follows the convention +used by L and others: 3 space indent and one space around the C<:> +separating object keys and values. The exact format might change in later +versions. + +=item utf8 + +Boolean, returns a UTF-8 encoded byte string instead of a Perl Unicode string. + +=item max_size + +Maximum permitted size, in bytes, of the generated JSON string. Defaults to 1 GiB. + +=item max_depth + +Maximum permitted nesting depth of Perl values. Defaults to 512. + +=back + +=back + +(Why the hell yet another JSON codec when CPAN is already full of them!? Well, +L is pretty cool but isn't going to be updated to support Perl's new +builtin booleans. L is slow and while L is +perfectly adequate, its codebase is too large and messy for my taste - too many +unnecessary features and C<#ifdef>s to support ancient perls and esoteric +configurations. Still, if you need anything not provided by these functions, +L and L are perfectly fine alternatives. +L and L also look like good and maintained candidates.) diff --git a/FU/Util.pod b/FU/Util.pod deleted file mode 100644 index 67bb5a2..0000000 --- a/FU/Util.pod +++ /dev/null @@ -1,111 +0,0 @@ -=head1 NAME - -FU::Util - Miscellaneous utility functions that really should have been part of -a core Perl installation but aren't for some reason because the Perl community -doesn't believe in the concept of a "batteries included" standard library. - - -=head1 SYNOPSIS - - use FU::Util qw/json_format/; - - my $data = json_format [1, 2, 3]; - -=head1 DESCRIPTION - -=head2 JSON parsing & formatting - -This module comes with a custom C-based JSON parser and formatter. These -functions conform strictly to L, -non-standard extensions are not supported and never will be. It also happens to -be pretty fast, refer to L for some numbers. - -JSON booleans are parsed into C and C. When -formatting, those builtin constants are the I recognized boolean values - -alternative representations such as C and C -are not recognized and attempting to format such values will croak. - -JSON numbers that are too large fit into a Perl integer are parsed into a -floating point value instead. This obviously loses precision, but is consistent -with C in JavaScript land - except Perl does support the full -range of a 64bit integer. JSON numbers with a fraction or exponent are also -converted into floating point, which may lose precision as well. -L and L are not currently supported. Attempting -to format a floating point C or C results in an error. - -=over - -=item json_parse($string, %options) - -Parse a JSON string and return a Perl value. With the default options, this -function is roughly similar to: - - JSON::PP->new->allow_nonref->core_bools-decode($string); - -Supported C<%options>: - -=over - -=item utf8 - -Boolean, interpret the input C<$string> as a UTF-8 encoded byte string instead -of a Perl Unicode string. - -=back - - -=item json_format($scalar, %options) - -Format a Perl value as JSON. With the default options, this function behaves -roughly similar to: - - JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar); - -Some modules escape the slash character in encoded strings to prevent a -potential XSS vulnerability when embedding JSON inside C<< >> tags. This function does I do that because it might not even -be sufficient. The following is probably an improvement: - - json_format($data) =~ s{ are supported: - -=over - -=item canonical - -Boolean, write hash keys in deterministic (sorted) order. This option currently -has no effect on tied hashes. - -=item pretty - -Boolean, format JSON with newlines and indentation for easier reading. Beauty -is in the eye of the beholder, this option currently follows the convention -used by L and others: 3 space indent and one space around the C<:> -separating object keys and values. The exact format might change in later -versions. - -=item utf8 - -Boolean, returns a UTF-8 encoded byte string instead of a Perl Unicode string. - -=item max_size - -Maximum permitted size, in bytes, of the generated JSON string. Defaults to 1 GiB. - -=item max_depth - -Maximum permitted nesting depth of Perl values. Defaults to 512. - -=back - -=back - -(Why the hell yet another JSON codec when CPAN is already full of them!? Well, -L is pretty cool but isn't going to be updated to support Perl's new -builtin booleans. L is slow and while L is -perfectly adequate, its codebase is a little too messy for my taste - too many -unnecessary features and C<#ifdef>s to support ancient perls and esoteric -configurations. Still, if you need anything not provided by these functions, -L and L are perfectly fine alternatives. -L and L also look like good and maintained candidates.) diff --git a/bench.PL b/bench.PL index 5120c74..e057df2 100755 --- a/bench.PL +++ b/bench.PL @@ -164,6 +164,9 @@ be a good measure". I've used these benchmarks to find and optimize hotspots in FU, which in turn means these numbers may look better than they are in real-world use. +B Many of these benchmarks exists solely to test edge case +performance, these numbers are not representative for real-world use. + =head1 MODULE VERSIONS The following module versions were used: @@ -176,7 +179,7 @@ The following module versions were used: =head1 BENCHMARKS -=head2 JSON Formatting +=head2 JSON Parsing & Formatting These benchmarks run on large-ish arrays with repeated values. JSON encoding is sufficiently fast that Perl function calling overhead tends to dominate for diff --git a/t/json_format.t b/t/json_format.t index d0b5747..c6c2575 100644 --- a/t/json_format.t +++ b/t/json_format.t @@ -68,7 +68,6 @@ my @errors = ( do { my $o = {}; bless $o, 'MyToJSONSelf' }, qr/MyToJSONSelf::TO_JSON method returned same object as was passed instead of a new one/, ); -plan tests => @tests*2 + @errors/2 + 10; for my($in, $exp) (@tests) { my $out = json_format $in; @@ -120,6 +119,25 @@ eval { json_format 'hello world', max_size => 8 }; like $@, qr/maximum string length exceeded/; +# Test large strings to cover some buffer handling special cases. +for (2000..2100, 4000..4200, 8100..8200, 12200..12300, 16300..16400) { + my $s = 'a'x$_; + is json_format($s), "\"$s\""; +} + +# 500 depth +{ + my $v = 1; + $v = [$v] for (1..500); + is json_format($v), '['x500 . 1 . ']'x500; +} +{ + my $v = 1; + $v = {'',$v} for (1..500); + is json_format($v), '{"":'x500 . 1 . '}'x500; +} + + # http://e-choroba.eu/18-yapc slide 6 tie my $incs, 'MyIncrementer', 'Xa'; @@ -132,6 +150,9 @@ is json_format($incu), 4; is json_format($incu), 5; is json_format($incu), 6; + +done_testing; + package MyIncrementer; use Tie::Scalar; use parent -norequire => 'Tie::StdScalar'; diff --git a/t/json_parse.t b/t/json_parse.t index be76e31..65e0748 100644 --- a/t/json_parse.t +++ b/t/json_parse.t @@ -137,7 +137,7 @@ is ref $v, 'HASH'; is keys %$v, 1; is $v->{a}, 1; -sub large($s) { +sub complete($s) { $v = json_parse $s; is ref $v, 'HASH'; is keys %$v, 3; @@ -163,11 +163,32 @@ sub large($s) { is ref $v->{'ë'}, 'ARRAY'; is scalar $v->{'ë'}->@*, 0; } -large '{"a":[1,0.1,true,null,{}],"":-0,"ë":[]}'; -large ' { +complete '{"a":[1,0.1,true,null,{}],"":-0,"ë":[]}'; +complete ' { "a" : [ 1 , 0.1 , true , null , { } ] , "" : -0 , "ë" : [ ] } '; + +# Test large inputs to cover some buffer handling special cases. +for (2000..2100, 4000..4200, 8100..8200, 12200..12300, 16300..16400) { + my $s = 'a'x$_; + is json_parse("\"$s\""), $s +} + +# 500 depth +{ + $v = json_parse('['x500 . ']'x500); + my $i = 0; + while (ref $v) { $v = $v->[0]; $i++ } + is $i, 500; +} +{ + $v = json_parse('{"":'x500 . 1 . '}'x500); + my $i = 0; + while (ref $v) { $v = $v->{''}; $i++ } + is $i, 500; +} + done_testing;