From f09a103c530030ce2b2718619194f5c077144cf5 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 2 Mar 2025 10:06:33 +0100 Subject: [PATCH 01/86] Some test portability fixes again + minor changes --- FU/Pg.pm | 4 ++-- Makefile.PL | 16 ++++++++++------ bench.PL | 20 +++++++++++--------- c/pgconn.c | 9 +++------ c/pgtypes.c | 4 ++-- t/json_format.t | 2 +- t/json_parse.t | 6 ++++-- 7 files changed, 33 insertions(+), 28 deletions(-) diff --git a/FU/Pg.pm b/FU/Pg.pm index 175bba3..7b9de5d 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -666,8 +666,8 @@ module does not. Converted between floating point seconds since C<00:00:00>, supporting microsecond precision. This format allows for easy comparison against Unix -timestamps (time of day = C<$timestamp % 86400>) and can be added to an integer -date value to form a complete timestamp. +timestamps (time of day in UTC = C<$timestamp % 86400>) and can be added to an +integer date value to form a complete timestamp. (There's no support for the string format yet) diff --git a/Makefile.PL b/Makefile.PL index 87362fc..326fdb4 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -2,7 +2,7 @@ use ExtUtils::MakeMaker; use Config; os_unsupported if $^O eq 'MSWin32'; # I don't know on which OS'es the code will work exactly, but this one I can easily rule out. -os_unsupported if $Config{ivsize} < 8; +os_unsupported if $Config{ptrsize} < 8; os_unsupported if $Config{usequadmath}; WriteMakefile( @@ -15,12 +15,16 @@ WriteMakefile( MIN_PERL_VERSION => 'v5.36', META_MERGE => { dynamic_config => 0, + 'meta-spec' => { version => 2 }, resources => { - repository => 'https://code.blicky.net/yorhel/fu', - bugtracker => 'https://code.blicky.net/yorhel/fu/issues', - }, - no_index => { - file => 'bench.PL', + repository => { + web => 'https://code.blicky.net/yorhel/fu', + type => 'git', + }, + bugtracker => { + web => 'https://code.blicky.net/yorhel/fu/issues', + mailto => 'projects@yorhel.nl', + }, }, }, depend => { '$(OBJECT)', 'c/*.c' }, diff --git a/bench.PL b/bench.PL index d8c13d3..fa53068 100755 --- a/bench.PL +++ b/bench.PL @@ -223,6 +223,7 @@ sub fmtbench($id, $text, $xs, $ys) { open my $F, '>FU/Benchmarks.pod' or die $!; select $F; while () { + s/^%/=/; s#^:modules#join '', map sprintf("=item L<%s> %s\n\n", $_, $modules{$_}), sort keys %modules#e; s#^:benches (.+)#join '', map fmtbench(@$_), grep $_->[0] =~ /$1/, @bench#e; print; @@ -233,12 +234,13 @@ sub fmtbench($id, $text, $xs, $ys) { } } +# s/^=/%/ to prevent tools from interpreting the below as POD __DATA__ -=head1 NAME +%head1 NAME FU::Benchmarks - A bunch of automated benchmark results. -=head1 DESCRIPTION +%head1 DESCRIPTION This file is automatically generated from 'bench.PL' in the L distribution. These benchmarks compare performance of some FU functionality against similar @@ -256,19 +258,19 @@ real-world use. B Many of these benchmarks exists solely to test edge case performance, these numbers are not representative for real-world use. -=head1 MODULE VERSIONS +%head1 MODULE VERSIONS The following module versions were used: -=over +%over :modules -=back +%back -=head1 BENCHMARKS +%head1 BENCHMARKS -=head2 JSON Parsing & Formatting +%head2 JSON Parsing & Formatting These benchmarks run on large-ish arrays with repeated values. JSON encoding is sufficiently fast that Perl function calling overhead tends to dominate for @@ -279,10 +281,10 @@ SIMD parts are only used for parsing. :benches ^json -=head2 XML Writing +%head2 XML Writing :benches ^xml -=cut +%cut # Cached data used by bench.PL. Same as the formatted tables above but easier to parse. diff --git a/c/pgconn.c b/c/pgconn.c index ace6ace..4b84463 100644 --- a/c/pgconn.c +++ b/c/pgconn.c @@ -488,7 +488,7 @@ static void fupg_refresh_types(pTHX_ fupg_conn *c) { t->send = fupg_send_text; t->recv = fupg_recv_text; } else { - /* TODO: (multi)ranges, custom overrides, by-name lookup for dynamic-oid types */ + /* TODO: (multi)ranges, by-name lookup for dynamic-oid types */ const fupg_type *builtin = fupg_builtin_byoid(t->oid); if (builtin) { t->send = builtin->send; @@ -571,11 +571,8 @@ static void fupg_tio_setup(pTHX_ fupg_conn *conn, fupg_tio *tio, int flags, Oid return; } - /* Minor wart? When the type is overridden by oid, the name & oid in error - * messages will be that of the builtin type. When overridden by name, the - * name will be correct but the oid is still of the builtin type. - * Some send/recv functions have slightly different behavior based on oid, - * in those cases this behavior is useful. */ + /* Minor wart? When the type is overridden by oid, its name in error + * messages will be that of the builtin type instead of the actual type. */ SV *cb = NULL; const fupg_type *e, *t; diff --git a/c/pgtypes.c b/c/pgtypes.c index 481cd03..f9c5b00 100644 --- a/c/pgtypes.c +++ b/c/pgtypes.c @@ -456,8 +456,8 @@ RECVFN(perlcb) { call_sv(ctx->cb, G_SCALAR); SPAGAIN; - SV *ret = newSV(0); - sv_setsv(ret, POPs); + SV *ret = POPs; + SvREFCNT_inc(ret); PUTBACK; FREETMPS; diff --git a/t/json_format.t b/t/json_format.t index 0797a4f..5288e1a 100644 --- a/t/json_format.t +++ b/t/json_format.t @@ -55,7 +55,7 @@ my @tests = ( ''.$$, '"'.$$.'"', do { my $x = 12; utf8::decode($x); $x }, '"12"', do { no warnings 'numeric'; my $x = '19a'; $x += 0; $x }, '19', - 1844674407370955161 / 10, $Config{uselongdouble} ? 184467440737095516 : '1.84467440737096e+17', + $Config{uselongdouble} ? () : ( 1844674407370955161 / 10, '1.84467440737096e+17' ), ); my @errors = ( diff --git a/t/json_parse.t b/t/json_parse.t index 901d01f..0c26dff 100644 --- a/t/json_parse.t +++ b/t/json_parse.t @@ -97,8 +97,10 @@ num ' -0 ', 0; num '-9223372036854775808'; num '9223372036854775807'; num '18446744073709551615'; -num '-9223372036854775809', $Config{uselongdouble} ? -9.22337203685477581e+18 : -9.22337203685478e+18; -num '18446744073709551616', $Config{uselongdouble} ? 1.84467440737095516e+19 : 1.84467440737096e+19; +if (!$Config{uselongdouble}) { # Behavior of longdouble is architecture-dependent + num '-9223372036854775809', -9.22337203685478e+18; + num '18446744073709551616', 1.84467440737096e+19; +} num '1.234'; num '1e5', 100000; num '1e+5', 100000; From 7839e7df787331a01447bd3d31329f9261bb5414 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 2 Mar 2025 21:09:59 +0100 Subject: [PATCH 02/86] FU: Fix typo in error handling --- FU.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FU.pm b/FU.pm index 78c0fce..a0cd38e 100644 --- a/FU.pm +++ b/FU.pm @@ -288,7 +288,7 @@ sub _read_req($c) { # The HTTP reader above and the FastCGI XS reader operate on bytes. # Decode these into Unicode strings and check for special characters. - eval { FU::Util::utf8_decode($_); 1} || fu->err(400, $@) + eval { FU::Util::utf8_decode($_); 1} || fu->error(400, $@) for ($REQ->{path}, $REQ->{qs}, values $REQ->{hdr}->%*); ($REQ->{path}, my $qs) = split /\?/, $REQ->{path}//'', 2; From cbebc3a21eb5cc832109671466676aff275a26b9 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 5 Mar 2025 15:32:01 +0100 Subject: [PATCH 03/86] Validate: Rework API, ->validate() now throws error instead of result object This is a slight simplification and removes the need to pass around partially normalized data. I've never found a use for the unsafe_data() method. --- FU.pm | 5 +- FU/Validate.pm | 284 ++++++++++++++++----------------------- t/validate.t | 353 +++++++++++++++++++++++++------------------------ 3 files changed, 295 insertions(+), 347 deletions(-) diff --git a/FU.pm b/FU.pm index a0cd38e..4d04562 100644 --- a/FU.pm +++ b/FU.pm @@ -639,8 +639,8 @@ sub _getfield($data, @a) { return $data->{$a[0]} if @a == 1 && !ref $a[0]; require FU::Validate; my $schema = FU::Validate->compile(@a > 1 ? { keys => {@a} } : $a[0]); - my $res = $schema->validate($data); - fu->error(400, "Input validation failed") if !$res; # TODO: More detailed error message + my $res = eval { $schema->validate($data) }; + fu->error(400, "Input validation failed") if $@; # TODO: More detailed error message return @a == 2 ? $res->data->{$a[0]} : $res->data; } @@ -659,7 +659,6 @@ sub formdata { if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded'; FU::Util::query_decode($FU::REQ->{data}); } || fu->error(400, $@); - # TODO: Accept schema validation thing. _getfield $FU::REQ->{formdata}, @_; } diff --git a/FU/Validate.pm b/FU/Validate.pm index 1538c5a..7f1cc1c 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -102,7 +102,7 @@ sub _compile($schema, $validations, $rec) { my @keys = keys $schema->{keys}->%* if $schema->{keys}; for my($name, $val) (%$schema) { - if($builtin{$name}) { + if ($builtin{$name}) { $top{$name} = $schema->{$name}; next; } @@ -125,7 +125,7 @@ sub _compile($schema, $validations, $rec) { # Inherit some builtin options from validations for my $t (@val) { - if($top{type} && $t->{schema}{type} && $top{type} ne $t->{schema}{type}) { + if ($top{type} && $t->{schema}{type} && $top{type} ne $t->{schema}{type}) { confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{schema}{type}'" if $schema->{type}; confess "Incompatible types, '$t->[0]' requires '$t->{schema}{type}', but another validation requires '$top{type}'"; } @@ -164,7 +164,7 @@ sub compile($pkg, $schema, $validations={}) { delete $c->{schema}{default} if ref $c->{schema}{default} eq 'SCALAR' && ${$c->{schema}{default}} eq 'required'; - if(exists $c->{schema}{sort}) { + if (exists $c->{schema}{sort}) { my $s = $c->{schema}{sort}; $c->{schema}{sort} = ref $s eq 'CODE' ? $s @@ -178,186 +178,169 @@ sub compile($pkg, $schema, $validations={}) { } -sub _validate_rec($c, $input) { +sub _validate_rec { + my $c = $_[0]; + # hash keys - if($c->{schema}{keys}) { + if ($c->{schema}{keys}) { my @err; for my ($k, $s) ($c->{schema}{keys}->%*) { - if(!exists $input->{$k}) { + if (!exists $_[1]{$k}) { next if $s->{schema}{missing} eq 'ignore'; - return [$input, { validation => 'missing', key => $k }] if $s->{schema}{missing} eq 'reject'; - $input->{$k} = ref $s->{schema}{default} eq 'CODE' ? $s->{schema}{default}->() : $s->{schema}{default} // undef; + return { validation => 'missing', key => $k } if $s->{schema}{missing} eq 'reject'; + $_[1]{$k} = ref $s->{schema}{default} eq 'CODE' ? $s->{schema}{default}->() : $s->{schema}{default} // undef; next if exists $s->{schema}{default}; } - my $r = _validate($s, $input->{$k}); - $input->{$k} = $r->[0]; - if($r->[1]) { - $r->[1]{key} = $k; - push @err, $r->[1]; + my $r = _validate($s, $_[1]{$k}); + if ($r) { + $r->{key} = $k; + push @err, $r; } } - return [$input, { validation => 'keys', errors => \@err }] if @err; + return { validation => 'keys', errors => \@err } if @err; } # array values - if($c->{schema}{values}) { + if ($c->{schema}{values}) { my @err; - for my $i (0..$#$input) { - my $r = _validate($c->{schema}{values}, $input->[$i]); - $input->[$i] = $r->[0]; - if($r->[1]) { - $r->[1]{index} = $i; - push @err, $r->[1]; + for my $i (0..$#{$_[1]}) { + my $r = _validate($c->{schema}{values}, $_[1][$i]); + if ($r) { + $r->{index} = $i; + push @err, $r; } } - return [$input, { validation => 'values', errors => \@err }] if @err; + return { validation => 'values', errors => \@err } if @err; } # validations for ($c->{validations}->@*) { - my $r = _validate_rec($_, $input); - $input = $r->[0]; - - return [$input, { + my $r = _validate_rec($_, $_[1]); + return { # If the error was a custom 'func' object, then make that the primary cause. # This makes it possible for validations to provide their own error objects. - $r->[1]{validation} eq 'func' && (!exists $r->[1]{result} || keys $r->[1]->%* > 2) ? $r->[1]->%* : (error => $r->[1]), + $r->{validation} eq 'func' && (!exists $r->{result} || keys $r->%* > 2) ? $r->%* : (error => $r), validation => $_->{name}, - }] if $r->[1]; + } if $r; } # func - if($c->{schema}{func}) { - my $r = $c->{schema}{func}->($input); - return [$input, { %$r, validation => 'func' }] if ref $r eq 'HASH'; - return [$input, { validation => 'func', result => $r }] if !$r; + if ($c->{schema}{func}) { + my $r = $c->{schema}{func}->($_[1]); + return { %$r, validation => 'func' } if ref $r eq 'HASH'; + return { validation => 'func', result => $r } if !$r; } - - return [$input] } -sub _validate_array($c, $input) { - return [$input] if $c->{schema}{type} ne 'array'; +sub _validate_array { + my $c = $_[0]; + return if $c->{schema}{type} ne 'array'; - $input = [sort { $c->{schema}{sort}->($a, $b) } @$input ] if $c->{schema}{sort}; + $_[1] = [sort { $c->{schema}{sort}->($a, $b) } $_[1]->@* ] if $c->{schema}{sort}; # Key-based uniqueness - if($c->{schema}{unique} && ref $c->{schema}{unique} eq 'CODE') { + if ($c->{schema}{unique} && ref $c->{schema}{unique} eq 'CODE') { my %h; - for my $i (0..$#$input) { - my $k = $c->{schema}{unique}->($input->[$i]); - return [$input, { validation => 'unique', index_a => $h{$k}, value_a => $input->[$h{$k}], index_b => $i, value_b => $input->[$i], key => $k }] if exists $h{$k}; + for my $i (0..$#{$_[1]}) { + my $k = $c->{schema}{unique}->($_[1][$i]); + return { validation => 'unique', index_a => $h{$k}, value_a => $_[1][$h{$k}], index_b => $i, value_b => $_[1][$i], key => $k } if exists $h{$k}; $h{$k} = $i; } # Comparison-based uniqueness - } elsif($c->{schema}{unique}) { - for my $i (0..$#$input-1) { - return [$input, { validation => 'unique', index_a => $i, value_a => $input->[$i], index_b => $i+1, value_b => $input->[$i+1] }] - if $c->{schema}{sort}->($input->[$i], $input->[$i+1]) == 0 + } elsif ($c->{schema}{unique}) { + for my $i (0..$#{$_[1]}-1) { + return { validation => 'unique', index_a => $i, value_a => $_[1][$i], index_b => $i+1, value_b => $_[1][$i+1] } + if $c->{schema}{sort}->($_[1][$i], $_[1][$i+1]) == 0 } } - - return [$input] } -sub _validate_input($c, $input) { +sub _validate_input { + my $c = $_[0]; + # rmwhitespace (needs to be done before the 'default' test) - if(defined $input && !ref $input && $c->{schema}{type} eq 'scalar' && $c->{schema}{rmwhitespace}) { - $input =~ s/\r//g; - $input =~ s/^\s*//; - $input =~ s/\s*$//; + if (defined $_[1] && !ref $_[1] && $c->{schema}{type} eq 'scalar' && $c->{schema}{rmwhitespace}) { + $_[1] =~ s/\r//g; + $_[1] =~ s/^\s*//; + $_[1] =~ s/\s*$//; } # default - if(!defined $input || (!ref $input && $input eq '')) { - return [ref $c->{schema}{default} eq 'CODE' ? $c->{schema}{default}->($input) : $c->{schema}{default}] if exists $c->{schema}{default}; - return [$input, { validation => 'required' }]; + if (!defined $_[1] || (!ref $_[1] && $_[1] eq '')) { + if (exists $c->{schema}{default}) { + $_[1] = ref $c->{schema}{default} eq 'CODE' ? $c->{schema}{default}->($_[1]) : $c->{schema}{default}; + return; + } + return { validation => 'required' }; } - if($c->{schema}{type} eq 'scalar') { - return [$input, { validation => 'type', expected => 'scalar', got => lc ref $input }] if ref $input; + if ($c->{schema}{type} eq 'scalar') { + return { validation => 'type', expected => 'scalar', got => lc ref $_[1] } if ref $_[1]; - } elsif($c->{schema}{type} eq 'hash') { - return [$input, { validation => 'type', expected => 'hash', got => lc ref $input || 'scalar' }] if ref $input ne 'HASH'; + } elsif ($c->{schema}{type} eq 'hash') { + return { validation => 'type', expected => 'hash', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'HASH'; # Each branch below makes a shallow copy of the hash, so that further # validations can perform in-place modifications without affecting the # input. - if($c->{schema}{unknown} eq 'remove') { - $input = { map +($_, $input->{$_}), grep $c->{known_keys}{$_}, keys %$input }; - } elsif($c->{schema}{unknown} eq 'reject') { - my @err = grep !$c->{known_keys}{$_}, keys %$input; - return [$input, { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] }] if @err; - $input = { %$input }; + if ($c->{schema}{unknown} eq 'remove') { + $_[1] = { map +($_, $_[1]{$_}), grep $c->{known_keys}{$_}, keys $_[1]->%* }; + } elsif ($c->{schema}{unknown} eq 'reject') { + my @err = grep !$c->{known_keys}{$_}, keys $_[1]->%*; + return { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] } if @err; + $_[1] = { $_[1]->%* }; } else { - $input = { %$input }; + $_[1] = { $_[1]->%* }; } - } elsif($c->{schema}{type} eq 'array') { - $input = [$input] if $c->{schema}{scalar} && !ref $input; - return [$input, { validation => 'type', expected => $c->{schema}{scalar} ? 'array or scalar' : 'array', got => lc ref $input || 'scalar' }] if ref $input ne 'ARRAY'; - $input = [@$input]; # Create a shallow copy to prevent in-place modification. + } elsif ($c->{schema}{type} eq 'array') { + $_[1] = [$_[1]] if $c->{schema}{scalar} && !ref $_[1]; + return { validation => 'type', expected => $c->{schema}{scalar} ? 'array or scalar' : 'array', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'ARRAY'; + $_[1] = [$_[1]->@*]; # Create a shallow copy to prevent in-place modification. - } elsif($c->{schema}{type} eq 'any') { + } elsif ($c->{schema}{type} eq 'any') { # No need to do anything here. } else { confess "Unknown type '$c->{schema}{type}'"; # Already checked in compile(), but be extra safe } - my $r = _validate_rec($c, $input); - return $r if $r->[1]; - $input = $r->[0]; - - _validate_array($c, $input); + &_validate_rec || &_validate_array; } -sub _validate($c, $input) { - my $r = _validate_input($c, $input); - return $r if !$r->[1] || !exists $c->{schema}{onerror}; - [ ref $c->{schema}{onerror} eq 'CODE' ? $c->{schema}{onerror}->(bless $r, 'FU::Validate::Result') : $c->{schema}{onerror} ] +sub _validate { + my $c = $_[0]; + my $r = &_validate_input; + ($r, $_[1]) = (undef, ref $c->{schema}{onerror} eq 'CODE' ? $c->{schema}{onerror}->($_[0], bless $r, 'FU::Validate::err') : $c->{schema}{onerror}) + if $r && exists $c->{schema}{onerror}; + $r } sub validate($c, $input) { - bless _validate($c, $input), 'FU::Validate::Result'; + my $r = _validate($c, $input); + return $input if !$r; + die bless $r, 'FU::Validate::err'; + $input } -package FU::Validate::Result; - +package FU::Validate::err; use v5.36; -use Carp 'confess'; -# A result object contains: [$data, $error] +use overload '""' => sub { + # TODO: Better error message + require Data::Dumper; + Data::Dumper->new([{$_[0]->%*}])->Terse(1)->Pair(':')->Indent(0)->Sortkeys(1)->Dump."\n"; +}; -# In boolean context, returns whether the validation succeeded. -use overload bool => sub { !$_[0][1] }; - -# Returns the validation errors, or undef if validation succeeded -sub err { $_[0][1] } - -# Returns the validated and normalized input, dies if validation didn't succeed. -sub data { - if($_[0][1]) { - require Data::Dumper; - my $s = Data::Dumper->new([$_[0][1]])->Terse(1)->Pair(':')->Indent(0)->Sortkeys(1)->Dump; - confess "Validation failed: $s"; - } - $_[0][0] -} - -# Same as 'data', but returns partially validated and normalized data if validation failed. -sub unsafe_data { $_[0][0] } - -# TODO: Human-readable error message formatting 1; __END__ @@ -402,67 +385,22 @@ follows: C<$schema> is the schema that describes the data to be validated (see L below) and C<$validations> is an optional hashref containing -L that C<$schema> can refer to. +L that C<$schema> can refer to. An +error is thrown if the C<$validations> or C<$schema> are invalid. To validate input, run: - my $result = $validator->validate($input); + my $validated_input = $validator->validate($input); -C<$input> is the data to be validated, and the C<$result> object is L. +C returns a validated and (depending on the schema) normalized copy +of C<$input>. Great care is taken that C<$input> is not being modified +in-place, even if data normalization is being performed. -Both C and C may throw an error if the C<$validations> -or C<$schema> are invalid. Errors in the C<$input> should never cause an error -to be thrown, since these are always reported in the C<$result> object. - -This module takes great care that C<$input> is not being modified in place, -even if data normalization is being performed. The normalized data can be read -from the C<$result> object. - -=head2 Result object - -The C<$result> object returned by C overloads boolean context, so -you can check if the validation succeeded with a simple if statement: - - my $result = $validator->validate(..); - if($result) { - # Success! - my $data = $result->data; - } else { - # Input failed to validate... - my $error = $result->err; - } - -In addition, the result object implements the following methods: - -=over - -=item data() - -Returns the validated and normalized data. This method throws an error if -validation failed, so if you're lazy and don't want to bother too much with -proper error reporting, you can safely I in a single step: - - my $validated_data = $v->validate(..)->data; - -(Note regarding reference semantics: The returned data will usually be a -(possibly modified) copy of C<$input>, but may in some cases still have nested -references to data in C<$input> - so if you are working with nested hashrefs, -arrayrefs or other objects and are going to make modifications to the values -embedded within them, these changes may or may not also affect the values in -the original C<$input>. Make a deep copy of the data if you're concerned about -this). - -=item err() - -Returns I if validation succeeded, an error object otherwise. - -An error object is a hashref containing at least one key: I, which -indicates the name of the validation that failed. Additional keys with more -detailed information may be present, depending on the validation. These are -documented in L below. - -=back +An error is thrown if the input does not validate. The error object is a +C-blessed hashref containing at least one key: +I, which indicates the name of the validation that failed. +Additional keys with more detailed information may be present, depending on the +validation. These are documented in L below. =head1 SCHEMA DEFINITION @@ -519,9 +457,9 @@ Instead of reporting an error, return C<$val> if this input fails validation for whatever reason. Setting this option in the top-level schema ensures that the validation will always succeed regardless of the input. -If C<$val> is a CODE reference, the subroutine is called with the result object -for this validation as its first argument. The return value of the subroutine -is then returned for this validation. +If C<$val> is a CODE reference, the subroutine is called with the (partially +normalized) input as first argument and error object as second argument. The +return value of the subroutine is then returned for this validation. =item rmwhitespace => 0/1 @@ -856,11 +794,11 @@ Here's a simple example that defines and uses a custom validation named I, which accepts either the string I or I. my $validations = { - stringbool => { enum => ['true', 'false'] } + stringbool => { enum => ['true', 'false'] } }; my $schema = { stringbool => 1 }; my $result = FU::Validate->compile($schema, $validations)->validate('true'); - # $result->data() eq 'true' + # $result eq 'true' A custom validation can also be defined as a subroutine, in which case it can accept options. Here is an example of a I custom validation, which @@ -868,9 +806,9 @@ requires that the string starts with the given prefix. The subroutine returns a schema that contains the I built-in option to do the actual validation. my $validations = { - prefix => sub($prefix) { - return { func => sub { $_[0] =~ /^\Q$prefix/ } } - } + prefix => sub($prefix) { + return { func => sub { $_[0] =~ /^\Q$prefix/ } } + } }; my $schema = { prefix => 'Hello, ' }; my $result = FU::Validate->compile($schema, $validations)->validate('Hello, World!'); @@ -891,10 +829,10 @@ mixes validations of different types. For example, the following throws an error: FU::Validate->compile({ - # top-level schema says we expect a hash - type => 'hash', - # but the 'int' validation implies that the type is a scalar - int => 1 + # top-level schema says we expect a hash + type => 'hash', + # but the 'int' validation implies that the type is a scalar + int => 1 }); The I, I and C built-in options are validated separately diff --git a/t/validate.t b/t/validate.t index c7eb459..fdcd416 100644 --- a/t/validate.t +++ b/t/validate.t @@ -14,7 +14,7 @@ my %validations = ( setundef => { func => sub { $_[0] = undef; 1 } }, defaultsub1 => { default => sub { 2 } }, defaultsub2 => { default => sub { defined $_[0] } }, - onerrorsub => { onerror => sub { ref $_[0] } }, + onerrorsub => { onerror => sub { ref $_[1] } }, collapsews => { rmwhitespace => 0, func => sub { $_[0] =~ s/\s+/ /g; 1 } }, neverfails => { onerror => 'err' }, revnum => { type => 'array', sort => sub($x,$y) { $y <=> $x } }, @@ -31,8 +31,7 @@ my %validations = ( ); -sub t { - my($schema, $input, $output, $error) = @_; +sub t($schema, $input, $output) { my $line = (caller)[2]; my $schema_copy = dclone([$schema])->[0]; @@ -40,203 +39,215 @@ sub t { my $res = FU::Validate->compile($schema, \%validations)->validate($input); #diag explain FU::Validate->compile($schema, \%validations) if $line == 139; - is !$error, !!$res, "boolean context $line"; is_deeply $schema, $schema_copy, "schema modification $line"; is_deeply $input, $input_copy, "input modification $line"; - is_deeply $res->unsafe_data(), $output, "unsafe_data $line"; - is_deeply $res->data(), $output, "data ok $line" if !$error; - ok !eval { $res->data; 1}, "data err $line" if $error; - is_deeply $res->err(), $error, "err $line"; + is_deeply $res, $output, "data ok $line"; +} + +sub f($schema, $input, $error) { + my $line = (caller)[2]; + + my $schema_copy = dclone([$schema])->[0]; + my $input_copy = dclone([$input])->[0]; + + ok !eval { FU::Validate->compile($schema, \%validations)->validate($input); 1 }, "eval $line"; + is_deeply $schema, $schema_copy, "schema modification $line"; + is_deeply $input, $input_copy, "input modification $line"; + is_deeply { $@->%* }, $error, "err $line"; } # default -t {}, 0, 0, undef; -t {}, '', '', { validation => 'required' }; -t {}, undef, undef, { validation => 'required' }; -t { default => undef }, undef, undef, undef; -t { default => undef }, '', undef, undef; -t { defaultsub1 => 1 }, undef, 2, undef; -t { defaultsub2 => 1 }, undef, '', undef; -t { defaultsub2 => 1 }, '', 1, undef; -t { onerrorsub => 1 }, undef, 'FU::Validate::Result', undef; +t {}, 0, 0; +f {}, '', { validation => 'required' }; +f {}, undef, { validation => 'required' }; +t { default => undef }, undef, undef; +t { default => undef }, '', undef; +t { defaultsub1 => 1 }, undef, 2; +t { defaultsub2 => 1 }, undef, ''; +t { defaultsub2 => 1 }, '', 1; +t { onerrorsub => 1 }, undef, 'FU::Validate::err'; # rmwhitespace -t {}, " Va\rl id \n ", 'Val id', undef; -t { rmwhitespace => 0 }, " Va\rl id \n ", " Va\rl id \n ", undef; -t {}, ' ', '', { validation => 'required' }; -t { rmwhitespace => 0 }, ' ', ' ', undef; +t {}, " Va\rl id \n ", 'Val id'; +t { rmwhitespace => 0 }, " Va\rl id \n ", " Va\rl id \n "; +f {}, ' ', { validation => 'required' }; +t { rmwhitespace => 0 }, ' ', ' '; # arrays -t {}, [], [], { validation => 'type', expected => 'scalar', got => 'array' }; -t { type => 'array' }, 1, 1, { validation => 'type', expected => 'array', got => 'scalar' }; -t { type => 'array' }, [], [], undef; -t { type => 'array' }, [undef,1,2,{}], [undef,1,2,{}], undef; -t { type => 'array', scalar => 1 }, 1, [1], undef; -t { type => 'array', values => {} }, [undef], [undef], { validation => 'values', errors => [{ index => 0, validation => 'required' }] }; -t { type => 'array', values => {} }, [' a '], ['a'], undef; -t { type => 'array', sort => 'str' }, [qw/20 100 3/], [qw/100 20 3/], undef; -t { type => 'array', sort => 'num' }, [qw/20 100 3/], [qw/3 20 100/], undef; -t { revnum => 1 }, [qw/20 100 3/], [qw/100 20 3/], undef; -t { type => 'array', sort => 'num', unique => 1 }, [qw/3 2 1/], [qw/1 2 3/], undef; -t { type => 'array', sort => 'num', unique => 1 }, [qw/3 2 3/], [qw/2 3 3/], { validation => 'unique', index_a => 1, value_a => 3, index_b => 2, value_b => 3 }; -t { type => 'array', unique => 1 }, [qw/3 1 2/], [qw/3 1 2/], undef; -t { type => 'array', unique => 1 }, [qw/3 1 3/], [qw/3 1 3/], { validation => 'unique', index_a => 0, value_a => 3, index_b => 2, value_b => 3, key => 3 }; -t { uniquelength => 1 }, [[],[1],[1,2]], [[],[1],[1,2]], undef; -t { uniquelength => 1 }, [[],[1],[2]], [[],[1],[2]], { validation => 'unique', index_a => 1, value_a => [1], index_b => 2, value_b => [2], key => 1 }; -t { type => 'array', setundef => 1 }, [], undef, undef; -t { type => 'array', values => { type => 'any', setundef => 1 } }, [[]], [undef], undef; +f {}, [], { validation => 'type', expected => 'scalar', got => 'array' }; +f { type => 'array' }, 1, { validation => 'type', expected => 'array', got => 'scalar' }; +t { type => 'array' }, [], []; +t { type => 'array' }, [undef,1,2,{}], [undef,1,2,{}]; +t { type => 'array', scalar => 1 }, 1, [1]; +f { type => 'array', values => {} }, [undef], { validation => 'values', errors => [{ index => 0, validation => 'required' }] }; +t { type => 'array', values => {} }, [' a '], ['a']; +t { type => 'array', sort => 'str' }, [qw/20 100 3/], [qw/100 20 3/]; +t { type => 'array', sort => 'num' }, [qw/20 100 3/], [qw/3 20 100/]; +t { revnum => 1 }, [qw/20 100 3/], [qw/100 20 3/]; +t { type => 'array', sort => 'num', unique => 1 }, [qw/3 2 1/], [qw/1 2 3/]; +f { type => 'array', sort => 'num', unique => 1 }, [qw/3 2 3/], { validation => 'unique', index_a => 1, value_a => 3, index_b => 2, value_b => 3 }; +t { type => 'array', unique => 1 }, [qw/3 1 2/], [qw/3 1 2/]; +f { type => 'array', unique => 1 }, [qw/3 1 3/], { validation => 'unique', index_a => 0, value_a => 3, index_b => 2, value_b => 3, key => 3 }; +t { uniquelength => 1 }, [[],[1],[1,2]], [[],[1],[1,2]]; +f { uniquelength => 1 }, [[],[1],[2]], { validation => 'unique', index_a => 1, value_a => [1], index_b => 2, value_b => [2], key => 1 }; +t { type => 'array', setundef => 1 }, [], undef; +t { type => 'array', values => { type => 'any', setundef => 1 } }, [[]], [undef]; # hashes -t { type => 'hash' }, [], [], { validation => 'type', expected => 'hash', got => 'array' }; -t { type => 'hash' }, 'a', 'a', { validation => 'type', expected => 'hash', got => 'scalar' }; -t { type => 'hash' }, {a=>[],b=>undef,c=>{}}, {}, undef; -t { type => 'hash', keys => { a=>{} } }, {}, {a=>undef}, { validation => 'keys', errors => [{ key => 'a', validation => 'required' }] }; # XXX: the key doesn't necessarily have to be created -t { type => 'hash', keys => { a=>{missing=>'ignore'} } }, {}, {}, undef; -t { type => 'hash', keys => { a=>{default=>undef} } }, {}, {a=>undef}, undef; -t { type => 'hash', keys => { a=>{missing=>'create',default=>undef} } }, {}, {a=>undef}, undef; -t { type => 'hash', keys => { a=>{missing=>'reject'} } }, {}, {}, {key => 'a', validation => 'missing'}; +f { type => 'hash' }, [], { validation => 'type', expected => 'hash', got => 'array' }; +f { type => 'hash' }, 'a', { validation => 'type', expected => 'hash', got => 'scalar' }; +t { type => 'hash' }, {a=>[],b=>undef,c=>{}}, {}; +f { type => 'hash', keys => { a=>{} } }, {}, { validation => 'keys', errors => [{ key => 'a', validation => 'required' }] }; +t { type => 'hash', keys => { a=>{missing=>'ignore'} } }, {}, {}; +t { type => 'hash', keys => { a=>{default=>undef} } }, {}, {a=>undef}; +t { type => 'hash', keys => { a=>{missing=>'create',default=>undef} } }, {}, {a=>undef}; +f { type => 'hash', keys => { a=>{missing=>'reject'} } }, {}, {key => 'a', validation => 'missing'}; -t { type => 'hash', keys => { a=>{} } }, {a=>' a '}, {a=>'a'}, undef; # Test against in-place modification -t { type => 'hash', keys => { a=>{} }, unknown => 'remove' }, { a=>1,b=>1 }, { a=>1 }, undef; -t { type => 'hash', keys => { a=>{} }, unknown => 'reject' }, { a=>1,b=>1 }, { a=>1,b=>1 }, { validation => 'unknown', keys => ['b'], expected => ['a'] }; -t { type => 'hash', keys => { a=>{} }, unknown => 'pass' }, { a=>1,b=>1 }, { a=>1,b=>1 }, undef; -t { type => 'hash', setundef => 1 }, {}, undef, undef; -t { type => 'hash', unknown => 'reject', keys => { a=>{ type => 'any', setundef => 1}} }, {a=>[]}, {a=>undef}, undef; +t { type => 'hash', keys => { a=>{} } }, {a=>' a '}, {a=>'a'}; # Test against in-place modification +t { type => 'hash', keys => { a=>{} }, unknown => 'remove' }, { a=>1,b=>1 }, { a=>1 }; +f { type => 'hash', keys => { a=>{} }, unknown => 'reject' }, { a=>1,b=>1 }, { validation => 'unknown', keys => ['b'], expected => ['a'] }; +t { type => 'hash', keys => { a=>{} }, unknown => 'pass' }, { a=>1,b=>1 }, { a=>1,b=>1 }; +t { type => 'hash', setundef => 1 }, {}, undef; +t { type => 'hash', unknown => 'reject', keys => { a=>{ type => 'any', setundef => 1}} }, {a=>[]}, {a=>undef}; # default validations -t { minlength => 3 }, 'ab', 'ab', { validation => 'minlength', expected => 3, got => 2 }; -t { minlength => 3 }, 'abc', 'abc', undef; -t { maxlength => 3 }, 'abcd', 'abcd', { validation => 'maxlength', expected => 3, got => 4 }; -t { maxlength => 3 }, 'abc', 'abc', undef; -t { minlength => 3, maxlength => 3 }, 'abc', 'abc', undef; -t { length => 3 }, 'ab', 'ab', { validation => 'length', expected => 3, got => 2 }; -t { length => 3 }, 'abcd', 'abcd', { validation => 'length', expected => 3, got => 4 }; -t { length => 3 }, 'abc', 'abc', undef; -t { length => [1,3] }, 'abc', 'abc', undef; -t { length => [1,3] }, 'abcd', 'abcd', { validation => 'length', expected => [1,3], got => 4 };; -t { type => 'array', length => 0 }, [], [], undef; -t { type => 'array', length => 1 }, [1,2], [1,2], { validation => 'length', expected => 1, got => 2 }; -t { type => 'hash', length => 0 }, {}, {}, undef; -t { type => 'hash', length => 1, unknown => 'pass' }, {qw/1 a 2 b/}, {qw/1 a 2 b/}, { validation => 'length', expected => 1, got => 2 }; -t { type => 'hash', length => 1, keys => {a => {missing=>'ignore'}, b => {missing=>'ignore'}} }, {a=>1}, {a=>1}, undef; -t { regex => '^a' }, 'abc', 'abc', undef; # XXX: Can't use qr// here because t() does dclone(). The 'hex' test covers that case anyway. -t { regex => '^a' }, 'cba', 'cba', { validation => 'regex', regex => '^a', got => 'cba' }; -t { enum => [1,2] }, 1, 1, undef; -t { enum => [1,2] }, 2, 2, undef; -t { enum => [1,2] }, 3, 3, { validation => 'enum', expected => [1,2], got => 3 }; -t { enum => 1 }, 1, 1, undef; -t { enum => 1 }, 2, 2, { validation => 'enum', expected => [1], got => 2 }; -t { enum => {a=>1,b=>2} }, 'a', 'a', undef; -t { enum => {a=>1,b=>2} }, 'c', 'c', { validation => 'enum', expected => ['a','b'], got => 'c' }; -t { anybool => 1 }, 1, true, undef; -t { anybool => 1 }, undef, false, undef; -t { anybool => 1 }, '', false, undef; -t { anybool => 1 }, {}, true, undef; -t { anybool => 1 }, [], true, undef; -t { anybool => 1 }, bless({}, 'test'), true, undef; -t { bool => 1 }, 1, 1, { validation => 'bool' }; -t { bool => 1 }, \1, true, undef; +f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }; +t { minlength => 3 }, 'abc', 'abc'; +f { maxlength => 3 }, 'abcd', { validation => 'maxlength', expected => 3, got => 4 }; +t { maxlength => 3 }, 'abc', 'abc'; +t { minlength => 3, maxlength => 3 }, 'abc', 'abc'; +f { length => 3 }, 'ab', { validation => 'length', expected => 3, got => 2 }; +f { length => 3 }, 'abcd', { validation => 'length', expected => 3, got => 4 }; +t { length => 3 }, 'abc', 'abc'; +t { length => [1,3] }, 'abc', 'abc'; +f { length => [1,3] }, 'abcd', { validation => 'length', expected => [1,3], got => 4 };; +t { type => 'array', length => 0 }, [], []; +f { type => 'array', length => 1 }, [1,2], { validation => 'length', expected => 1, got => 2 }; +t { type => 'hash', length => 0 }, {}, {}; +f { type => 'hash', length => 1, unknown => 'pass' }, {qw/1 a 2 b/}, { validation => 'length', expected => 1, got => 2 }; +t { type => 'hash', length => 1, keys => {a => {missing=>'ignore'}, b => {missing=>'ignore'}} }, {a=>1}, {a=>1}; +t { regex => '^a' }, 'abc', 'abc'; # XXX: Can't use qr// here because t() does dclone(). The 'hex' test covers that case anyway. +f { regex => '^a' }, 'cba', { validation => 'regex', regex => '^a', got => 'cba' }; +t { enum => [1,2] }, 1, 1; +t { enum => [1,2] }, 2, 2; +f { enum => [1,2] }, 3, { validation => 'enum', expected => [1,2], got => 3 }; +t { enum => 1 }, 1, 1; +f { enum => 1 }, 2, { validation => 'enum', expected => [1], got => 2 }; +t { enum => {a=>1,b=>2} }, 'a', 'a'; +f { enum => {a=>1,b=>2} }, 'c', { validation => 'enum', expected => ['a','b'], got => 'c' }; +t { anybool => 1 }, 1, true; +t { anybool => 1 }, undef, false; +t { anybool => 1 }, '', false; +t { anybool => 1 }, {}, true; +t { anybool => 1 }, [], true; +t { anybool => 1 }, bless({}, 'test'), true; +f { bool => 1 }, 1, { validation => 'bool' }; +t { bool => 1 }, \1, true; my($true, $false) = (1,0); -t { bool => 1 }, bless(\$true, 'boolean'), true, undef; -t { bool => 1 }, bless(\$false, 'boolean'), false, undef; -t { bool => 1 }, bless(\$true, 'test'), bless(\$true, 'test'), { validation => 'bool' }; -t { ascii => 1 }, 'ab c', 'ab c', undef; -t { ascii => 1 }, "a\nb", "a\nb", { validation => 'ascii', got => "a\nb" }; +t { bool => 1 }, bless(\$true, 'boolean'), true; +t { bool => 1 }, bless(\$false, 'boolean'), false; +f { bool => 1 }, bless(\$true, 'test'), { validation => 'bool' }; +t { ascii => 1 }, 'ab c', 'ab c'; +f { ascii => 1 }, "a\nb", { validation => 'ascii', got => "a\nb" }; # custom validations -t { hex => 1 }, 'DeadBeef', 'DeadBeef', undef; -t { hex => 1 }, 'x', 'x', { validation => 'hex', error => { validation => 'regex', regex => "$validations{hex}{regex}", got => 'x' } }; -t { prefix => 'a' }, 'abc', 'abc', undef; -t { prefix => 'a' }, 'cba', 'cba', { validation => 'prefix', error => { validation => 'func', result => '' } }; -t { mybool => 1 }, 'abc', 1, undef; -t { mybool => 1 }, undef, 0, undef; -t { mybool => 1 }, '', 0, undef; -t { collapsews => 1 }, " \t\n ", ' ', undef; -t { collapsews => 1 }, ' x ', ' x ', undef; -t { collapsews => 1, rmwhitespace => 1 }, ' x ', 'x', undef; -t { person => 1 }, 1, 1, { validation => 'type', expected => 'hash', got => 'scalar' }; -t { person => 1, default => 1 }, undef, 1, undef; -t { person => 1 }, { sex => 1 }, { sex => 1, name => undef }, { validation => 'person', error => { validation => 'keys', errors => [{ key => 'name', validation => 'required' }] } }; -t { person => 1 }, { sex => undef, name => 'y' }, { sex => 1, name => 'y' }, undef; -t { person => 1, keys => {age => {default => \'required'}} }, {name => 'x', sex => 'y'}, { name => 'x', sex => 'y', age => undef }, { validation => 'keys', errors => [{ key => 'age', validation => 'required' }] }; -t { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => 1}, { name => 'x', sex => 'y', extra => 1 }, undef; -t { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => ''}, { name => 'x', sex => 'y', extra => '' }, { validation => 'keys', errors => [{ key => 'extra', validation => 'required' }] }; -t { person => 1 }, {name => 'x', sex => 'y', extra => 1}, {name => 'x', sex => 'y', extra => 1}, undef; -t { person => 1, unknown => 'remove' }, {name => 'x', sex => 'y', extra => 1}, {name => 'x', sex => 'y'}, undef; -t { neverfails => 1, int => 1 }, undef, 'err', undef; -t { neverfails => 1, int => 1 }, 'x', 'err', undef; -t { neverfails => 1, int => 1, onerror => undef }, 'x', undef, undef; # XXX: no way to 'unset' an inherited onerror clause, hmm. +t { hex => 1 }, 'DeadBeef', 'DeadBeef'; +f { hex => 1 }, 'x', { validation => 'hex', error => { validation => 'regex', regex => "$validations{hex}{regex}", got => 'x' } }; +t { prefix => 'a' }, 'abc', 'abc'; +f { prefix => 'a' }, 'cba', { validation => 'prefix', error => { validation => 'func', result => '' } }; +t { mybool => 1 }, 'abc', 1; +t { mybool => 1 }, undef, 0; +t { mybool => 1 }, '', 0; +t { collapsews => 1 }, " \t\n ", ' '; +t { collapsews => 1 }, ' x ', ' x '; +t { collapsews => 1, rmwhitespace => 1 }, ' x ', 'x'; +f { person => 1 }, 1, { validation => 'type', expected => 'hash', got => 'scalar' }; +t { person => 1, default => 1 }, undef, 1; +f { person => 1 }, { sex => 1 }, { validation => 'person', error => { validation => 'keys', errors => [{ key => 'name', validation => 'required' }] } }; +t { person => 1 }, { sex => undef, name => 'y' }, { sex => 1, name => 'y' }; +f { person => 1, keys => {age => {default => \'required'}} }, {name => 'x', sex => 'y'}, { validation => 'keys', errors => [{ key => 'age', validation => 'required' }] }; +t { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => 1}, { name => 'x', sex => 'y', extra => 1 }; +f { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => ''}, { validation => 'keys', errors => [{ key => 'extra', validation => 'required' }] }; +t { person => 1 }, {name => 'x', sex => 'y', extra => 1}, {name => 'x', sex => 'y', extra => 1}; +t { person => 1, unknown => 'remove' }, {name => 'x', sex => 'y', extra => 1}, {name => 'x', sex => 'y'}; +t { neverfails => 1, int => 1 }, undef, 'err'; +t { neverfails => 1, int => 1 }, 'x', 'err'; +t { neverfails => 1, int => 1, onerror => undef }, 'x', undef; # XXX: no way to 'unset' an inherited onerror clause, hmm. # numbers sub nerr { +{ validation => 'num', got => $_[0] } } -t { num => 1 }, 0, 0, undef; -t { num => 1 }, '-', '-', nerr '-'; -t { num => 1 }, '00', '00', nerr '00'; -t { num => 1 }, '1', '1', undef; -t { num => 1 }, '1.1.', '1.1.', nerr '1.1.'; -t { num => 1 }, '1.-1', '1.-1', nerr '1.-1'; -t { num => 1 }, '.1', '.1', nerr '.1'; -t { num => 1 }, '0.1e5', '0.1e5', undef; -t { num => 1 }, '0.1e+5', '0.1e+5', undef; -t { num => 1 }, '0.1e5.1', '0.1e5.1', nerr '0.1e5.1'; -t { int => 1 }, 0, 0, undef; -t { int => 1 }, -123, -123, undef; -t { int => 1 }, -123.1, -123.1, { validation => 'int', got => -123.1 }; -t { uint => 1 }, 0, 0, undef; -t { uint => 1 }, 123, 123, undef; -t { uint => 1 }, -123, -123, { validation => 'uint', got => -123 }; -t { min => 1 }, 1, 1, undef; -t { min => 1 }, 0.9, 0.9, { validation => 'min', expected => 1, got => 0.9 }; -t { min => 1 }, 'a', 'a', { validation => 'min', error => nerr 'a' }; -t { max => 1 }, 1, 1, undef; -t { max => 1 }, 1.1, 1.1, { validation => 'max', expected => 1, got => 1.1 }; -t { max => 1 }, 'a', 'a', { validation => 'max', error => nerr 'a' }; -t { range => [1,2] }, 1, 1, undef; -t { range => [1,2] }, 2, 2, undef; -t { range => [1,2] }, 0.9, 0.9, { validation => 'range', error => { validation => 'min', expected => 1, got => 0.9 } }; -t { range => [1,2] }, 2.1, 2.1, { validation => 'range', error => { validation => 'max', expected => 2, got => 2.1 } }; +t { num => 1 }, 0, 0; +f { num => 1 }, '-', nerr '-'; +f { num => 1 }, '00', nerr '00'; +t { num => 1 }, '1', '1'; +f { num => 1 }, '1.1.', nerr '1.1.'; +f { num => 1 }, '1.-1', nerr '1.-1'; +f { num => 1 }, '.1', nerr '.1'; +t { num => 1 }, '0.1e5', '0.1e5'; +t { num => 1 }, '0.1e+5', '0.1e+5'; +f { num => 1 }, '0.1e5.1', nerr '0.1e5.1'; +t { int => 1 }, 0, 0; +t { int => 1 }, -123, -123; +f { int => 1 }, -123.1, { validation => 'int', got => -123.1 }; +t { uint => 1 }, 0, 0; +t { uint => 1 }, 123, 123; +f { uint => 1 }, -123, { validation => 'uint', got => -123 }; +t { min => 1 }, 1, 1; +f { min => 1 }, 0.9, { validation => 'min', expected => 1, got => 0.9 }; +f { min => 1 }, 'a', { validation => 'min', error => nerr 'a' }; +t { max => 1 }, 1, 1; +f { max => 1 }, 1.1, { validation => 'max', expected => 1, got => 1.1 }; +f { max => 1 }, 'a', { validation => 'max', error => nerr 'a' }; +t { range => [1,2] }, 1, 1; +t { range => [1,2] }, 2, 2; +f { range => [1,2] }, 0.9, { validation => 'range', error => { validation => 'min', expected => 1, got => 0.9 } }; +f { range => [1,2] }, 2.1, { validation => 'range', error => { validation => 'max', expected => 2, got => 2.1 } }; #t { range => [1,2] }, 'a', 'a', { validation => 'range', error => { validation => 'max', error => nerr 'a' } }; # XXX: Error validation type depends on evaluation order # email template use utf8; -t { email => 1 }, $_->[1], $_->[1], $_->[0] ? undef : { validation => 'email', got => $_->[1] } for ( - [ 0, 'abc.com' ], - [ 0, 'abc@localhost' ], - [ 0, 'abc@10.0.0.' ], - [ 0, 'abc@256.0.0.1' ], - [ 0, '@blicky.net' ], - [ 0, 'a @a.com' ], - [ 0, 'a"@a.com' ], - [ 0, 'a@[:]' ], - [ 0, 'a@127.0.0.1' ], - [ 0, 'a@[::1]' ], - [ 1, 'a@a.com' ], - [ 1, 'a@a.com.' ], - [ 1, 'é@yörhel.nl' ], - [ 1, 'a+_0-c@yorhel.nl' ], - [ 1, 'é@x-y_z.example' ], - [ 1, 'abc@x-y_z.example' ], +f { email => 1 }, $_, { validation => 'email', got => $_ } for ( + 'abc.com', + 'abc@localhost', + 'abc@10.0.0.', + 'abc@256.0.0.1', + '@blicky.net', + 'a @a.com', + 'a"@a.com', + 'a@[:]', + 'a@127.0.0.1', + 'a@[::1]', +); +t { email => 1 }, $_, $_ for ( + 'a@a.com', + 'a@a.com.', + 'é@yörhel.nl', + 'a+_0-c@yorhel.nl', + 'é@x-y_z.example', + 'abc@x-y_z.example', ); my $long = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@xxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx.xxxxx'; -t { email => 1 }, $long, $long, { validation => 'email', error => { validation => 'maxlength', got => 255, expected => 254 } }; +f { email => 1 }, $long, { validation => 'email', error => { validation => 'maxlength', got => 255, expected => 254 } }; # weburl template -t { weburl => 1 }, $_->[1], $_->[1], $_->[0] ? undef : { validation => 'weburl', got => $_->[1] } for ( - [ 0, 'http' ], - [ 0, 'http://' ], - [ 0, 'http:///' ], - [ 0, 'http://x/' ], - [ 0, 'http://x/' ], - [ 0, 'http://256.0.0.1/' ], - [ 0, 'http://blicky.net:050/' ], - [ 0, 'ftp//blicky.net/' ], - [ 1, 'http://blicky.net/' ], - [ 1, 'http://blicky.net:50/' ], - [ 1, 'https://blicky.net/' ], - [ 1, 'https://[::1]:80/' ], - [ 1, 'https://l-n.x_.example.com/' ], - [ 1, 'https://blicky.net/?#Who\'d%20ever%22makeaurl_like-this/!idont.know' ], +f { weburl => 1 }, $_, { validation => 'weburl', got => $_ } for ( + 'http', + 'http://', + 'http:///', + 'http://x/', + 'http://x/', + 'http://256.0.0.1/', + 'http://blicky.net:050/', + 'ftp//blicky.net/', +); +t { weburl => 1}, $_, $_ for ( + 'http://blicky.net/', + 'http://blicky.net:50/', + 'https://blicky.net/', + 'https://[::1]:80/', + 'https://l-n.x_.example.com/', + 'https://blicky.net/?#Who\'d%20ever%22makeaurl_like-this/!idont.know', ); From e4b6b77e1bd0390c675c67e26cfb2de86f6f29c0 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Wed, 5 Mar 2025 15:39:46 +0100 Subject: [PATCH 04/86] Validate: rename rmwhitespace to trim and use builtin::trim() --- FU/Validate.pm | 45 ++++++++++++++++++++------------------------- t/validate.t | 10 +++++----- 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index 7f1cc1c..296d78f 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -2,7 +2,7 @@ package FU::Validate 0.2; use v5.36; use experimental 'builtin', 'for_list'; -use builtin qw/true false blessed/; +use builtin qw/true false blessed trim/; use Carp 'confess'; use FU::Util 'to_bool'; @@ -12,7 +12,7 @@ my %builtin = map +($_,1), qw/ type default onerror - rmwhitespace + trim values scalar sort unique keys unknown missing func @@ -130,7 +130,7 @@ sub _compile($schema, $validations, $rec) { confess "Incompatible types, '$t->[0]' requires '$t->{schema}{type}', but another validation requires '$top{type}'"; } exists $t->{schema}{$_} and !exists $top{$_} and $top{$_} = delete $t->{schema}{$_} - for qw/default onerror rmwhitespace type scalar unknown missing sort unique/; + for qw/default onerror trim type scalar unknown missing sort unique/; push @keys, keys %{ delete $t->{known_keys} }; push @keys, keys %{ $t->{schema}{keys} } if $t->{schema}{keys}; @@ -155,7 +155,7 @@ sub compile($pkg, $schema, $validations={}) { $c->{schema}{type} //= 'scalar'; $c->{schema}{missing} //= 'create'; - $c->{schema}{rmwhitespace} //= 1 if $c->{schema}{type} eq 'scalar'; + $c->{schema}{trim} //= 1 if $c->{schema}{type} eq 'scalar'; $c->{schema}{unknown} //= 'remove' if $c->{schema}{type} eq 'hash'; confess "Invalid value for 'type': $c->{schema}{type}" if !$type_vals{$c->{schema}{type}}; @@ -262,12 +262,8 @@ sub _validate_array { sub _validate_input { my $c = $_[0]; - # rmwhitespace (needs to be done before the 'default' test) - if (defined $_[1] && !ref $_[1] && $c->{schema}{type} eq 'scalar' && $c->{schema}{rmwhitespace}) { - $_[1] =~ s/\r//g; - $_[1] =~ s/^\s*//; - $_[1] =~ s/\s*$//; - } + # trim (needs to be done before the 'default' test) + $_[1] = trim $_[1] =~ s/\r//rg if defined $_[1] && !ref $_[1] && $c->{schema}{type} eq 'scalar' && $c->{schema}{trim}; # default if (!defined $_[1] || (!ref $_[1] && $_[1] eq '')) { @@ -410,10 +406,10 @@ validation to be performed. None of the options or validations are required, but some built-ins have default values. This means that the empty schema C<{}> is actually equivalent to: - { type => 'scalar', - rmwhitespace => 1, - default => \'required', - missing => 'create', + { type => 'scalar', + trim => 1, + default => \'required', + missing => 'create', } =head2 Built-in options @@ -444,12 +440,11 @@ C<$val> is returned instead. If C<$val> is a CODE reference, the subroutine is called with the original value (which is either no argument, undef or an empty string) and the return value of the subroutine is used as value instead. -The empty check is performed after I and before any other -validations. So a string containing only whitespace is considered an empty -string and will be treated according to this I option. As an -additional side effect, other validations will never get to validate undef or -an empty string, as these values are either rejected or substituted with a -default. +The empty check is performed after I and before any other validations. So +a string containing only whitespace is considered an empty string and will be +treated according to this I option. As an additional side effect, +other validations will never get to validate undef or an empty string, as these +values are either rejected or substituted with a default. =item onerror => $val @@ -461,11 +456,11 @@ If C<$val> is a CODE reference, the subroutine is called with the (partially normalized) input as first argument and error object as second argument. The return value of the subroutine is then returned for this validation. -=item rmwhitespace => 0/1 +=item trim => 0/1 By default, any whitespace around scalar-type input is removed before testing -any other validations. Setting I to a false value will disable -this behavior. +any other validations. Setting I to a false value will disable this +behavior. =item keys => $hashref @@ -818,8 +813,8 @@ schema that contains the I built-in option to do the actual validation. Custom validations can also set built-in options, but the semantics differ a little depending on the option. First, be aware that many of the built-in options apply to the whole schema and not just to the custom validation. For -example, if the top-level schema sets C<< rmwhitespace => 0 >>, then all -validations used in that schema may get input with whitespace around it. +example, if the top-level schema sets C<< trim => 0 >>, then all validations +used in that schema may get input with whitespace around it. All validations used in a schema need to agree upon a single I option. If a custom validation does not specify a I option (and no type is diff --git a/t/validate.t b/t/validate.t index fdcd416..1bf0b35 100644 --- a/t/validate.t +++ b/t/validate.t @@ -15,7 +15,7 @@ my %validations = ( defaultsub1 => { default => sub { 2 } }, defaultsub2 => { default => sub { defined $_[0] } }, onerrorsub => { onerror => sub { ref $_[1] } }, - collapsews => { rmwhitespace => 0, func => sub { $_[0] =~ s/\s+/ /g; 1 } }, + collapsews => { trim => 0, func => sub { $_[0] =~ s/\s+/ /g; 1 } }, neverfails => { onerror => 'err' }, revnum => { type => 'array', sort => sub($x,$y) { $y <=> $x } }, uniquelength => { type => 'array', values => { type => 'array' }, unique => sub { scalar @{$_[0]} } }, @@ -68,11 +68,11 @@ t { defaultsub2 => 1 }, undef, ''; t { defaultsub2 => 1 }, '', 1; t { onerrorsub => 1 }, undef, 'FU::Validate::err'; -# rmwhitespace +# trim t {}, " Va\rl id \n ", 'Val id'; -t { rmwhitespace => 0 }, " Va\rl id \n ", " Va\rl id \n "; +t { trim => 0 }, " Va\rl id \n ", " Va\rl id \n "; f {}, ' ', { validation => 'required' }; -t { rmwhitespace => 0 }, ' ', ' '; +t { trim => 0 }, ' ', ' '; # arrays f {}, [], { validation => 'type', expected => 'scalar', got => 'array' }; @@ -161,7 +161,7 @@ t { mybool => 1 }, undef, 0; t { mybool => 1 }, '', 0; t { collapsews => 1 }, " \t\n ", ' '; t { collapsews => 1 }, ' x ', ' x '; -t { collapsews => 1, rmwhitespace => 1 }, ' x ', 'x'; +t { collapsews => 1, trim => 1 }, ' x ', 'x'; f { person => 1 }, 1, { validation => 'type', expected => 'hash', got => 'scalar' }; t { person => 1, default => 1 }, undef, 1; f { person => 1 }, { sex => 1 }, { validation => 'person', error => { validation => 'keys', errors => [{ key => 'name', validation => 'required' }] } }; From e5755ddd80cf44dc5ba8a76b4495afb33e9bac0f Mon Sep 17 00:00:00 2001 From: Yorhel Date: Thu, 6 Mar 2025 10:16:12 +0100 Subject: [PATCH 05/86] Validate: Human-readable error messages --- FU.pm | 4 +-- FU/Validate.pm | 35 ++++++++++++++---- t/validate.t | 96 ++++++++++++++++++++++++++------------------------ 3 files changed, 79 insertions(+), 56 deletions(-) diff --git a/FU.pm b/FU.pm index 4d04562..57123a5 100644 --- a/FU.pm +++ b/FU.pm @@ -640,8 +640,8 @@ sub _getfield($data, @a) { require FU::Validate; my $schema = FU::Validate->compile(@a > 1 ? { keys => {@a} } : $a[0]); my $res = eval { $schema->validate($data) }; - fu->error(400, "Input validation failed") if $@; # TODO: More detailed error message - return @a == 2 ? $res->data->{$a[0]} : $res->data; + fu->error(400, "Input validation failed: $@") if $@; + return @a == 2 ? $res->{$a[0]} : $res; } sub query { diff --git a/FU/Validate.pm b/FU/Validate.pm index 296d78f..1c7c94b 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -321,8 +321,10 @@ sub _validate { sub validate($c, $input) { my $r = _validate($c, $input); return $input if !$r; - die bless $r, 'FU::Validate::err'; - $input + $r = bless $r, 'FU::Validate::err';; + my @e = $r->errors; + $r->{longmess} = Carp::longmess(@e > 1 ? join("\n",@e)."\n" : $e[0]); + die $r; } @@ -330,12 +332,31 @@ sub validate($c, $input) { package FU::Validate::err; use v5.36; +use FU::Util; -use overload '""' => sub { - # TODO: Better error message - require Data::Dumper; - Data::Dumper->new([{$_[0]->%*}])->Terse(1)->Pair(':')->Indent(0)->Sortkeys(1)->Dump."\n"; -}; +use overload '""' => sub { $_[0]{longmess} || join "\n", $_[0]->errors }; + +sub _fmtkey($k) { + $k =~ /^[a-zA-Z0-9_-]+$/ ? $k : FU::Util::json_format($k); +} + +sub _fmtval($v) { + eval { $v = FU::Util::json_format($v) }; "$v" +} + +sub errors($e, $prefix='') { + my $val = $e->{validation}; + my $p = $prefix ? "$prefix: " : ''; + $val eq 'keys' ? map errors($_, $prefix.'.'._fmtkey($_->{key})), $e->{errors}->@* : + $val eq 'missing' ? $prefix.'.'._fmtkey($e->{key}).': required key missing' : + $val eq 'values' ? map errors($_, $prefix."[$_->{index}]"), $e->{errors}->@* : + $val eq 'unique' ? $prefix."[$e->{index_b}] value '"._fmtval($e->{value_a})."' duplicated" : + $val eq 'required' ? "${p}required value missing" : + $val eq 'type' ? "${p}invalid type, expected '$e->{expected}' but got '$e->{got}'" : + $val eq 'unknown' ? ($e->{keys}->@* > 1 ? "${p}unknown keys: ".join(', ', _fmtkey($e->{keys})) : "${p}unknown key '"._fmtkey($e->{keys}[0])."'") : + $e->{error} ? errors($e->{error}, "${p}validation '$val'") : + "${p}failed validation '$val'"; +} 1; diff --git a/t/validate.t b/t/validate.t index 1bf0b35..b01352b 100644 --- a/t/validate.t +++ b/t/validate.t @@ -44,7 +44,7 @@ sub t($schema, $input, $output) { is_deeply $res, $output, "data ok $line"; } -sub f($schema, $input, $error) { +sub f($schema, $input, $error, @msg) { my $line = (caller)[2]; my $schema_copy = dclone([$schema])->[0]; @@ -53,14 +53,16 @@ sub f($schema, $input, $error) { ok !eval { FU::Validate->compile($schema, \%validations)->validate($input); 1 }, "eval $line"; is_deeply $schema, $schema_copy, "schema modification $line"; is_deeply $input, $input_copy, "input modification $line"; + delete $@->{longmess}; is_deeply { $@->%* }, $error, "err $line"; + is_deeply [$@->errors], \@msg, "errmsg $line"; } # default t {}, 0, 0; -f {}, '', { validation => 'required' }; -f {}, undef, { validation => 'required' }; +f {}, '', { validation => 'required' }, 'required value missing'; +f {}, undef, { validation => 'required' }, 'required value missing'; t { default => undef }, undef, undef; t { default => undef }, '', undef; t { defaultsub1 => 1 }, undef, 2; @@ -71,104 +73,104 @@ t { onerrorsub => 1 }, undef, 'FU::Validate::err'; # trim t {}, " Va\rl id \n ", 'Val id'; t { trim => 0 }, " Va\rl id \n ", " Va\rl id \n "; -f {}, ' ', { validation => 'required' }; +f {}, ' ', { validation => 'required' }, 'required value missing'; t { trim => 0 }, ' ', ' '; # arrays -f {}, [], { validation => 'type', expected => 'scalar', got => 'array' }; -f { type => 'array' }, 1, { validation => 'type', expected => 'array', got => 'scalar' }; +f {}, [], { validation => 'type', expected => 'scalar', got => 'array' }, "invalid type, expected 'scalar' but got 'array'"; +f { type => 'array' }, 1, { validation => 'type', expected => 'array', got => 'scalar' }, "invalid type, expected 'array' but got 'scalar'"; t { type => 'array' }, [], []; t { type => 'array' }, [undef,1,2,{}], [undef,1,2,{}]; t { type => 'array', scalar => 1 }, 1, [1]; -f { type => 'array', values => {} }, [undef], { validation => 'values', errors => [{ index => 0, validation => 'required' }] }; +f { type => 'array', values => {} }, [undef], { validation => 'values', errors => [{ index => 0, validation => 'required' }] }, "[0]: required value missing"; t { type => 'array', values => {} }, [' a '], ['a']; t { type => 'array', sort => 'str' }, [qw/20 100 3/], [qw/100 20 3/]; t { type => 'array', sort => 'num' }, [qw/20 100 3/], [qw/3 20 100/]; t { revnum => 1 }, [qw/20 100 3/], [qw/100 20 3/]; t { type => 'array', sort => 'num', unique => 1 }, [qw/3 2 1/], [qw/1 2 3/]; -f { type => 'array', sort => 'num', unique => 1 }, [qw/3 2 3/], { validation => 'unique', index_a => 1, value_a => 3, index_b => 2, value_b => 3 }; +f { type => 'array', sort => 'num', unique => 1 }, [qw/3 2 3/], { validation => 'unique', index_a => 1, value_a => 3, index_b => 2, value_b => 3 }, q{[2] value '"3"' duplicated}; t { type => 'array', unique => 1 }, [qw/3 1 2/], [qw/3 1 2/]; -f { type => 'array', unique => 1 }, [qw/3 1 3/], { validation => 'unique', index_a => 0, value_a => 3, index_b => 2, value_b => 3, key => 3 }; +f { type => 'array', unique => 1 }, [qw/3 1 3/], { validation => 'unique', index_a => 0, value_a => 3, index_b => 2, value_b => 3, key => 3 }, q{[2] value '"3"' duplicated}; t { uniquelength => 1 }, [[],[1],[1,2]], [[],[1],[1,2]]; -f { uniquelength => 1 }, [[],[1],[2]], { validation => 'unique', index_a => 1, value_a => [1], index_b => 2, value_b => [2], key => 1 }; +f { uniquelength => 1 }, [[],[1],[2]], { validation => 'unique', index_a => 1, value_a => [1], index_b => 2, value_b => [2], key => 1 }, q{[2] value '[1]' duplicated}; t { type => 'array', setundef => 1 }, [], undef; t { type => 'array', values => { type => 'any', setundef => 1 } }, [[]], [undef]; # hashes -f { type => 'hash' }, [], { validation => 'type', expected => 'hash', got => 'array' }; -f { type => 'hash' }, 'a', { validation => 'type', expected => 'hash', got => 'scalar' }; +f { type => 'hash' }, [], { validation => 'type', expected => 'hash', got => 'array' }, "invalid type, expected 'hash' but got 'array'"; +f { type => 'hash' }, 'a', { validation => 'type', expected => 'hash', got => 'scalar' }, "invalid type, expected 'hash' but got 'scalar'"; t { type => 'hash' }, {a=>[],b=>undef,c=>{}}, {}; -f { type => 'hash', keys => { a=>{} } }, {}, { validation => 'keys', errors => [{ key => 'a', validation => 'required' }] }; +f { type => 'hash', keys => { a=>{} } }, {}, { validation => 'keys', errors => [{ key => 'a', validation => 'required' }] }, '.a: required value missing'; t { type => 'hash', keys => { a=>{missing=>'ignore'} } }, {}, {}; t { type => 'hash', keys => { a=>{default=>undef} } }, {}, {a=>undef}; t { type => 'hash', keys => { a=>{missing=>'create',default=>undef} } }, {}, {a=>undef}; -f { type => 'hash', keys => { a=>{missing=>'reject'} } }, {}, {key => 'a', validation => 'missing'}; +f { type => 'hash', keys => { a=>{missing=>'reject'} } }, {}, {key => 'a', validation => 'missing'}, '.a: required key missing'; t { type => 'hash', keys => { a=>{} } }, {a=>' a '}, {a=>'a'}; # Test against in-place modification t { type => 'hash', keys => { a=>{} }, unknown => 'remove' }, { a=>1,b=>1 }, { a=>1 }; -f { type => 'hash', keys => { a=>{} }, unknown => 'reject' }, { a=>1,b=>1 }, { validation => 'unknown', keys => ['b'], expected => ['a'] }; +f { type => 'hash', keys => { a=>{} }, unknown => 'reject' }, { a=>1,b=>1 }, { validation => 'unknown', keys => ['b'], expected => ['a'] }, "unknown key 'b'"; t { type => 'hash', keys => { a=>{} }, unknown => 'pass' }, { a=>1,b=>1 }, { a=>1,b=>1 }; t { type => 'hash', setundef => 1 }, {}, undef; t { type => 'hash', unknown => 'reject', keys => { a=>{ type => 'any', setundef => 1}} }, {a=>[]}, {a=>undef}; # default validations -f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }; +f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }, "failed validation 'minlength'"; t { minlength => 3 }, 'abc', 'abc'; -f { maxlength => 3 }, 'abcd', { validation => 'maxlength', expected => 3, got => 4 }; +f { maxlength => 3 }, 'abcd', { validation => 'maxlength', expected => 3, got => 4 }, "failed validation 'maxlength'"; t { maxlength => 3 }, 'abc', 'abc'; t { minlength => 3, maxlength => 3 }, 'abc', 'abc'; -f { length => 3 }, 'ab', { validation => 'length', expected => 3, got => 2 }; -f { length => 3 }, 'abcd', { validation => 'length', expected => 3, got => 4 }; +f { length => 3 }, 'ab', { validation => 'length', expected => 3, got => 2 }, "failed validation 'length'"; +f { length => 3 }, 'abcd', { validation => 'length', expected => 3, got => 4 }, "failed validation 'length'"; t { length => 3 }, 'abc', 'abc'; t { length => [1,3] }, 'abc', 'abc'; -f { length => [1,3] }, 'abcd', { validation => 'length', expected => [1,3], got => 4 };; +f { length => [1,3] }, 'abcd', { validation => 'length', expected => [1,3], got => 4 }, "failed validation 'length'"; t { type => 'array', length => 0 }, [], []; -f { type => 'array', length => 1 }, [1,2], { validation => 'length', expected => 1, got => 2 }; +f { type => 'array', length => 1 }, [1,2], { validation => 'length', expected => 1, got => 2 }, "failed validation 'length'"; t { type => 'hash', length => 0 }, {}, {}; -f { type => 'hash', length => 1, unknown => 'pass' }, {qw/1 a 2 b/}, { validation => 'length', expected => 1, got => 2 }; +f { type => 'hash', length => 1, unknown => 'pass' }, {qw/1 a 2 b/}, { validation => 'length', expected => 1, got => 2 }, "failed validation 'length'"; t { type => 'hash', length => 1, keys => {a => {missing=>'ignore'}, b => {missing=>'ignore'}} }, {a=>1}, {a=>1}; t { regex => '^a' }, 'abc', 'abc'; # XXX: Can't use qr// here because t() does dclone(). The 'hex' test covers that case anyway. -f { regex => '^a' }, 'cba', { validation => 'regex', regex => '^a', got => 'cba' }; +f { regex => '^a' }, 'cba', { validation => 'regex', regex => '^a', got => 'cba' }, "failed validation 'regex'"; t { enum => [1,2] }, 1, 1; t { enum => [1,2] }, 2, 2; -f { enum => [1,2] }, 3, { validation => 'enum', expected => [1,2], got => 3 }; +f { enum => [1,2] }, 3, { validation => 'enum', expected => [1,2], got => 3 }, "failed validation 'enum'"; t { enum => 1 }, 1, 1; -f { enum => 1 }, 2, { validation => 'enum', expected => [1], got => 2 }; +f { enum => 1 }, 2, { validation => 'enum', expected => [1], got => 2 }, "failed validation 'enum'"; t { enum => {a=>1,b=>2} }, 'a', 'a'; -f { enum => {a=>1,b=>2} }, 'c', { validation => 'enum', expected => ['a','b'], got => 'c' }; +f { enum => {a=>1,b=>2} }, 'c', { validation => 'enum', expected => ['a','b'], got => 'c' }, "failed validation 'enum'"; t { anybool => 1 }, 1, true; t { anybool => 1 }, undef, false; t { anybool => 1 }, '', false; t { anybool => 1 }, {}, true; t { anybool => 1 }, [], true; t { anybool => 1 }, bless({}, 'test'), true; -f { bool => 1 }, 1, { validation => 'bool' }; +f { bool => 1 }, 1, { validation => 'bool' }, "failed validation 'bool'"; t { bool => 1 }, \1, true; my($true, $false) = (1,0); t { bool => 1 }, bless(\$true, 'boolean'), true; t { bool => 1 }, bless(\$false, 'boolean'), false; -f { bool => 1 }, bless(\$true, 'test'), { validation => 'bool' }; +f { bool => 1 }, bless(\$true, 'test'), { validation => 'bool' }, "failed validation 'bool'"; t { ascii => 1 }, 'ab c', 'ab c'; -f { ascii => 1 }, "a\nb", { validation => 'ascii', got => "a\nb" }; +f { ascii => 1 }, "a\nb", { validation => 'ascii', got => "a\nb" }, "failed validation 'ascii'"; # custom validations t { hex => 1 }, 'DeadBeef', 'DeadBeef'; -f { hex => 1 }, 'x', { validation => 'hex', error => { validation => 'regex', regex => "$validations{hex}{regex}", got => 'x' } }; +f { hex => 1 }, 'x', { validation => 'hex', error => { validation => 'regex', regex => "$validations{hex}{regex}", got => 'x' } }, "validation 'hex': failed validation 'regex'"; t { prefix => 'a' }, 'abc', 'abc'; -f { prefix => 'a' }, 'cba', { validation => 'prefix', error => { validation => 'func', result => '' } }; +f { prefix => 'a' }, 'cba', { validation => 'prefix', error => { validation => 'func', result => '' } }, "validation 'prefix': failed validation 'func'"; t { mybool => 1 }, 'abc', 1; t { mybool => 1 }, undef, 0; t { mybool => 1 }, '', 0; t { collapsews => 1 }, " \t\n ", ' '; t { collapsews => 1 }, ' x ', ' x '; t { collapsews => 1, trim => 1 }, ' x ', 'x'; -f { person => 1 }, 1, { validation => 'type', expected => 'hash', got => 'scalar' }; +f { person => 1 }, 1, { validation => 'type', expected => 'hash', got => 'scalar' }, "invalid type, expected 'hash' but got 'scalar'"; t { person => 1, default => 1 }, undef, 1; -f { person => 1 }, { sex => 1 }, { validation => 'person', error => { validation => 'keys', errors => [{ key => 'name', validation => 'required' }] } }; +f { person => 1 }, { sex => 1 }, { validation => 'person', error => { validation => 'keys', errors => [{ key => 'name', validation => 'required' }] } }, "validation 'person'.name: required value missing"; t { person => 1 }, { sex => undef, name => 'y' }, { sex => 1, name => 'y' }; -f { person => 1, keys => {age => {default => \'required'}} }, {name => 'x', sex => 'y'}, { validation => 'keys', errors => [{ key => 'age', validation => 'required' }] }; +f { person => 1, keys => {age => {default => \'required'}} }, {name => 'x', sex => 'y'}, { validation => 'keys', errors => [{ key => 'age', validation => 'required' }] }, '.age: required value missing'; t { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => 1}, { name => 'x', sex => 'y', extra => 1 }; -f { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => ''}, { validation => 'keys', errors => [{ key => 'extra', validation => 'required' }] }; +f { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => ''}, { validation => 'keys', errors => [{ key => 'extra', validation => 'required' }] }, '.extra: required value missing'; t { person => 1 }, {name => 'x', sex => 'y', extra => 1}, {name => 'x', sex => 'y', extra => 1}; t { person => 1, unknown => 'remove' }, {name => 'x', sex => 'y', extra => 1}, {name => 'x', sex => 'y'}; t { neverfails => 1, int => 1 }, undef, 'err'; @@ -176,7 +178,7 @@ t { neverfails => 1, int => 1 }, 'x', 'err'; t { neverfails => 1, int => 1, onerror => undef }, 'x', undef; # XXX: no way to 'unset' an inherited onerror clause, hmm. # numbers -sub nerr { +{ validation => 'num', got => $_[0] } } +sub nerr { ({ validation => 'num', got => $_[0] }, "failed validation 'num'") } t { num => 1 }, 0, 0; f { num => 1 }, '-', nerr '-'; f { num => 1 }, '00', nerr '00'; @@ -189,25 +191,25 @@ t { num => 1 }, '0.1e+5', '0.1e+5'; f { num => 1 }, '0.1e5.1', nerr '0.1e5.1'; t { int => 1 }, 0, 0; t { int => 1 }, -123, -123; -f { int => 1 }, -123.1, { validation => 'int', got => -123.1 }; +f { int => 1 }, -123.1, { validation => 'int', got => -123.1 }, "failed validation 'int'"; t { uint => 1 }, 0, 0; t { uint => 1 }, 123, 123; -f { uint => 1 }, -123, { validation => 'uint', got => -123 }; +f { uint => 1 }, -123, { validation => 'uint', got => -123 }, "failed validation 'uint'"; t { min => 1 }, 1, 1; -f { min => 1 }, 0.9, { validation => 'min', expected => 1, got => 0.9 }; -f { min => 1 }, 'a', { validation => 'min', error => nerr 'a' }; +f { min => 1 }, 0.9, { validation => 'min', expected => 1, got => 0.9 }, "failed validation 'min'"; +f { min => 1 }, 'a', { validation => 'min', error => (nerr 'a')[0] }, "validation 'min': failed validation 'num'"; t { max => 1 }, 1, 1; -f { max => 1 }, 1.1, { validation => 'max', expected => 1, got => 1.1 }; -f { max => 1 }, 'a', { validation => 'max', error => nerr 'a' }; +f { max => 1 }, 1.1, { validation => 'max', expected => 1, got => 1.1 }, "failed validation 'max'"; +f { max => 1 }, 'a', { validation => 'max', error => (nerr 'a')[0] }, "validation 'max': failed validation 'num'"; t { range => [1,2] }, 1, 1; t { range => [1,2] }, 2, 2; -f { range => [1,2] }, 0.9, { validation => 'range', error => { validation => 'min', expected => 1, got => 0.9 } }; -f { range => [1,2] }, 2.1, { validation => 'range', error => { validation => 'max', expected => 2, got => 2.1 } }; +f { range => [1,2] }, 0.9, { validation => 'range', error => { validation => 'min', expected => 1, got => 0.9 } }, "validation 'range': failed validation 'min'"; +f { range => [1,2] }, 2.1, { validation => 'range', error => { validation => 'max', expected => 2, got => 2.1 } }, "validation 'range': failed validation 'max'"; #t { range => [1,2] }, 'a', 'a', { validation => 'range', error => { validation => 'max', error => nerr 'a' } }; # XXX: Error validation type depends on evaluation order # email template use utf8; -f { email => 1 }, $_, { validation => 'email', got => $_ } for ( +f { email => 1 }, $_, { validation => 'email', got => $_ }, "failed validation 'email'" for ( 'abc.com', 'abc@localhost', 'abc@10.0.0.', @@ -228,10 +230,10 @@ t { email => 1 }, $_, $_ for ( 'abc@x-y_z.example', ); my $long = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@xxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxx.xxxxx'; -f { email => 1 }, $long, { validation => 'email', error => { validation => 'maxlength', got => 255, expected => 254 } }; +f { email => 1 }, $long, { validation => 'email', error => { validation => 'maxlength', got => 255, expected => 254 } }, "validation 'email': failed validation 'maxlength'"; # weburl template -f { weburl => 1 }, $_, { validation => 'weburl', got => $_ } for ( +f { weburl => 1 }, $_, { validation => 'weburl', got => $_ }, "failed validation 'weburl'" for ( 'http', 'http://', 'http:///', From 17176738a0d942f5925e76afee4414349d9a6fbd Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 8 Mar 2025 14:02:51 +0100 Subject: [PATCH 06/86] FU: Support multipart file uploads + some doc fixes API is not super convenient and implementation is lousy, but uploading files is not a super common operation so that should be fine. At least it supports large files with only a single in-memory copy. --- FU.pm | 25 +++-- FU/MultipartFormData.pm | 209 ++++++++++++++++++++++++++++++++++++++++ FU/Pg.pm | 4 +- FU/SQL.pm | 6 +- FU/XMLWriter.pm | 2 +- t/multipart.t | 47 +++++++++ 6 files changed, 280 insertions(+), 13 deletions(-) create mode 100644 FU/MultipartFormData.pm create mode 100644 t/multipart.t diff --git a/FU.pm b/FU.pm index 57123a5..4807e0b 100644 --- a/FU.pm +++ b/FU.pm @@ -253,8 +253,9 @@ sub _read_req_http($sock, $req) { $req->{body} = ''; while ($len > 0) { - my $r = $sock->read($req->{body}, $len, -1); - fu->error(400, 'Client disconnect before request was read') if !$r + my $r = $sock->read($req->{body}, $len, length $req->{body}); + fu->error(400, 'Client disconnect before request was read') if !$r; + $len -= $r; } } @@ -654,7 +655,6 @@ sub query { sub formdata { shift; $FU::REQ->{formdata} ||= eval { - # TODO: Support multipart encoding confess "Invalid content type for form data" if (fu->header('content-type')||'') ne 'application/x-www-form-urlencoded'; FU::Util::query_decode($FU::REQ->{data}); @@ -662,6 +662,13 @@ sub formdata { _getfield $FU::REQ->{formdata}, @_; } +sub multipart { + require FU::MultipartFormData; + $FU::REQ->{multipart} ||= eval { + FU::MultipartFormData->parse(fu->header('content-type')||'', $FU::REQ->{body}) + } || fu->error(400, $@); +} + @@ -1239,12 +1246,18 @@ Parse, validate and return multiple query parameters. =item fu->formdata($schema) -Like C<< fu->query() >> but returns data from the POST request body. +Like C<< fu->query() >> but returns data from the POST request body. This +method only supports form data encoded as C, +which is the default for HTML C<<
>>s. To handle multipart form data, +use C<< fu->multipart >> instead. + +=item fu->multipart + +Parse the request body as C and return an array of fields. +Refer to L for more information. =back -I Support C and file uploads. - I Support JSON bodies. I Cookie parsing. diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm new file mode 100644 index 0000000..da415f8 --- /dev/null +++ b/FU/MultipartFormData.pm @@ -0,0 +1,209 @@ +package FU::MultipartFormData; +use v5.36; +use Carp 'confess'; +use FU::Util 'utf8_decode'; + +sub _arg($d) { $d =~ s/^\s+//r =~ s/\s+$//r =~ s/^"(.+)"$/$1/r } + +sub parse($pkg, $header, $data) { + confess "Invalid multipart header '$header'" + if $header !~ m{^multipart/form-data\s*;\s*boundary\s*=(.+)$}; + my $boundary = _arg $1; + confess "Invalid multipart boundary '$boundary'" if $boundary !~ /^[\x21-\x7e]+$/; + utf8::encode($boundary); + + my @a; + while ($data =~ m{--\Q$boundary\E(?:--\r\n|\r\n((?:.+\r\n)+)\r\n)}xg) { + my $hdrs = $1; + $a[$#a]{length} = $-[0] - 2 - $a[$#a]{start} if @a; + if (!$hdrs) { + confess "Trailing garbage" if pos $data != length $data; + last; + } + + my $d = bless { + data => $data, + start => pos $data, + }, $pkg; + + confess "Missing content-disposition header" if $hdrs !~ /content-disposition:\s*form-data;(.+)/i; + my $v = $1; + confess "Missing 'name' parameter" if $v !~ /[;\s]name=([^[;\s]+)/; + $d->{name} = utf8_decode _arg $1; + $d->{filename} = utf8_decode _arg $1 if $v =~ /[;\s]filename=([^;\s]+)/; + + if ($hdrs =~ /content-type:\s*([^;\s]+)(?:\s*;\s*charset=([^;\s]+))?/i) { + $d->{mime} = utf8_decode _arg $1; + $d->{charset} = utf8_decode _arg $2 if $2; + } + push @a, $d; + } + confess "Missing end-of-data marker" if @a && !defined $a[$#a]{length}; + \@a +} + +sub name { $_[0]{name} } +sub filename { $_[0]{filename} } +sub mime { $_[0]{mime} } +sub charset { $_[0]{charset} } +sub length { $_[0]{length} } + +sub substr($o,$off,$len=undef) { + $off += $o->{length} if $off < 0; + $off = 0 if $off < 0; + $off = $o->{length} if $off > $o->{length}; + + $len //= $o->{length} - $off; + $len += $o->{length} - 1 if $len < 0; + $len = 0 if $len < 0; + $len = $o->{length} - $off if $len > $o->{length} - $off; + + substr $o->{data}, $o->{start} + $off, $len; +} + +sub data { $_[0]->substr(0) } +sub value { utf8_decode $_[0]->data } + +sub syswrite($o, $fh) { + my $off = $o->{start}; + my $end = $o->{start} + $o->{length}; + while ($off < $end) { + my $r = syswrite $fh, $o->{data}, $end-$off, $off; + return if !defined $r; + $off += $r; + } + $o->{length}; +} + +sub save($o, $fn) { + open my $F, '>', $fn or confess "Error opening '$fn': $!"; + $o->syswrite($F) or confess "Error writing to '$fn': $!"; +} + +sub describe($o) { + my $head = eval { utf8_decode $o->substr(0, 100) }; + if (defined $head && $head =~ /\n/) { + ($head) = split /\n/, $head, 2; + $head .= '...'; + } elsif (defined $head && $o->{length} > 100) { + $head .= '...'; + } + $o->{name}.': '.join ' ', + $o->{filename} ? "filename=$o->{filename}" : (), + $o->{mime} ? "mime=$o->{mime}" : (), + $o->{charset} ? "charset=$o->{charset}" : (), + "length=$o->{length}", + defined $head ? "value=$head" : (); +} + +1; +__END__ + +=head1 NAME + +FU::MultipartFormData - Parse multipart/form-data + +=head1 SYNOPSIS + + my $fields = FU::MultipartFormData->parse($content_type_header, $request_body); + + for my $f (@$fields) { + print "%s %d\n", $f->name, $f->length; + + $f->save('file.png') if $f->name eq 'image'; + } + +=head1 DESCRIPTION + +This is a tiny module to parse an HTTP request body encoded as +C, which is typically used to handle file uploads. + +The entire request body is assumed to be in memory as a Perl string, but this +module makes an attempt to avoid any further copies of data values. + +=head2 Parsing + +=over FU::MultipartFormData->parse($header, $body) + +Returns an array of field objects from the given C<$header>, which must be a +valid value for the C request header, and the given C<$body>, +which must hold the request body as a byte string. An error is thrown if the +header is not valid or parsing failed. + +This module is pretty lousy and does not fully comform to any HTTP standards, +but it does happen to be able to parse POST data from any browser that I've +tried. + +=back + +=head2 Field Object + +Each field is parsed into a field object that supports the following methods: + +=over + +=item name + +Returns the field name as a Perl Unicode string. + +=item filename + +Returns the filename as a Perl Unicode string, or C if no filename was +provided. + +=item mime + +Returns the mime type extracted from the field's C header, or +C if none was present. + +=item charset + +Returns the charset extracted from the field's C header, or +C if none was present. + +=item length + +Returns the byte length of the field value. + +=item data + +Returns a copy of the field value as a byte string. You'll want to avoid using +this on large fields. + +=item value + +Returns a copy of the field value as a Unicode string. Uses C +from L, so also throws an error if the value contains control +characters. + +=item substr($off, $len) + +Equivalent to calling C on the string returned by C, but avoids +a copy of the entire field value. + +=item syswrite($fh) + +Write the field value to C<$fh> using Perl's C, returns C on +error or the number of bytes written on success. + +Can be used to write uploaded file data to a file or send it over a socket or +pipe, without making a full in-memory copy of the data. + +=item save($fn) + +Save the field value to the file C<$fn>, throws an error on failure. + +=item describe + +Returns a human-readable string to describe this field. Mainly for debugging +purposes, the exact format is subject to change. + +=back + +=head1 COPYRIGHT + +MIT. + +=head1 AUTHOR + +Yorhel diff --git a/FU/Pg.pm b/FU/Pg.pm index 7b9de5d..176a8ac 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -400,9 +400,7 @@ results into Perl values. Observed query preparation time, in seconds, including network round-trip. Returns 0 if a cached prepared statement was used or C if the query was -executed without a separate preparation phase (currently only happens with C<< -$conn->exec() >>, but support for direct query execution may be added for other -queries in the future as well). +executed without a separate preparation phase. =item $st->get_cache diff --git a/FU/SQL.pm b/FU/SQL.pm index 73f5613..c86a3fb 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -356,12 +356,12 @@ values. This function results in different SQL depending on the C option given to C. The default C<'dbi'> style passes each value as a bind parameter: - SQL 'WHERE id', IN([1, 2, 3, 4]); + SQL 'WHERE id', IN [1, 2, 3, 4]; # 'WHERE id IN(?, ?, ?, ?)', parameters: 1, 2, 3, 4 The C<'pg'> style passes the entire array as a single bind parameter instead: - SQL 'WHERE id', IN([1, 2, 3, 4]); + SQL 'WHERE id', IN [1, 2, 3, 4]; # 'WHERE id = ANY(?)', parameter: [1, 2, 3, 4] The C<'pg'> style allows for more efficient re-use of cached prepared @@ -372,7 +372,7 @@ with L or L. Can be used in the C<$hashref> versions of C, C and C as well: - WHERE { id => IN([1, 2]) } + WHERE { id => IN [1, 2] } # 'WHERE id IN(?, ?)' =back diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index 0911ced..08fc54d 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -146,7 +146,7 @@ These functions all return a byte string with (UTF-8) encoded XML. =item fragment($block) -Executes C<$block> and captures the output of all I +Executes C<$block> and captures the output of all L called within the same scope into a string. This function can be safely nested: my $string = fragment { diff --git a/t/multipart.t b/t/multipart.t new file mode 100644 index 0000000..842b9cd --- /dev/null +++ b/t/multipart.t @@ -0,0 +1,47 @@ +use v5.36; +use Test::More; +use FU::MultipartFormData; + +# Example based on https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/POST +my $t = <<'_' =~ s/\n/\r\n/rg; +--delimiter12345 +Content-Disposition: form-data; name="field1" +content-type: hello; charset=x + +value1 +--delimiter12345 +Content-Type: text +Content-Disposition: form-data; filename="example.txt"; name=field2 + +value2 +--delimiter12345-- +_ + + +my $l = FU::MultipartFormData->parse('multipart/form-data;boundary="delimiter12345"', $t); +is scalar @$l, 2; + +my $v = $l->[0]; +is $v->name, 'field1'; +is $v->filename, undef; +is $v->mime, 'hello'; +is $v->charset, 'x'; +is $v->length, 6; +is $v->data, 'value1'; + +is $v->substr(4), 'e1'; +is $v->substr(1, 2), 'al'; +is $v->substr(-2, 1), 'e'; +is $v->substr(-2, 5), 'e1'; +is $v->substr(-100, 2), 'va'; +is $v->substr(1, -3), 'al'; + +$v = $l->[1]; +is $v->name, 'field2'; +is $v->filename, 'example.txt'; +is $v->mime, 'text'; +is $v->charset, undef; +is $v->length, 6; +is $v->data, 'value2'; + +done_testing; From 70c5199df493bf3ec83925e83ccd385d5a092f7d Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sat, 8 Mar 2025 15:49:43 +0100 Subject: [PATCH 07/86] FU: Add JSON reading & writing methods --- FU.pm | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/FU.pm b/FU.pm index 4807e0b..1b3f6aa 100644 --- a/FU.pm +++ b/FU.pm @@ -637,7 +637,10 @@ sub headers { $FU::REQ->{hdr} } sub ip { $FU::REQ->{ip} } sub _getfield($data, @a) { - return $data->{$a[0]} if @a == 1 && !ref $a[0]; + if (@a == 1 && !ref $a[0]) { + fu->error(400, "Expected top-level to be a hash") if ref $data ne 'HASH'; + return $data->{$a[0]}; + } require FU::Validate; my $schema = FU::Validate->compile(@a > 1 ? { keys => {@a} } : $a[0]); my $res = eval { $schema->validate($data) }; @@ -652,6 +655,15 @@ sub query { _getfield $FU::REQ->{qs_parsed}, @_; } +sub json { + shift; + $FU::REQ->{json} ||= eval { + FU::Util::json_parse($FU::REQ->{data}, utf8 => 1) + } || fu->error(400, "JSON parse error: $@"); + return $FU::REQ->{json} if !@_; + _getfield $FU::REQ->{json}, @_; +} + sub formdata { shift; $FU::REQ->{formdata} ||= eval { @@ -715,6 +727,12 @@ sub set_header($, $hdr, $val=undef) { $FU::REQ->{reshdr}{ lc $hdr } = $val; } +sub send_json($, $data) { + fu->set_header('content-type', 'application/json'); + fu->set_body(FU::Util::json_format($data, canonical => 1, utf8 => 1)); + fu->done; +} + sub send_file($, $root, $path) { # This also catches files with '..' somewhere in the middle of the name. # Let's just disallow that to simplify this check, I'd err on the side of @@ -1242,9 +1260,12 @@ Parse, validate and return multiple query parameters. # Or, more concisely: my $data = fu->query(a => {anybool => 1}, b => {}); -=item fu->formdata($name) +=item fu->json(@args) -=item fu->formdata($schema) +Like C<< fu->query() >> but parses the request body as JSON. Returns the +decoded JSON value if C<@args> is empty. + +=item fu->formdata(@args) Like C<< fu->query() >> but returns data from the POST request body. This method only supports form data encoded as C, @@ -1253,13 +1274,11 @@ use C<< fu->multipart >> instead. =item fu->multipart -Parse the request body as C and return an array of fields. -Refer to L for more information. +Parse the request body as C and return an array of field +objects. Refer to L for more information. =back -I Support JSON bodies. - I Cookie parsing. @@ -1321,6 +1340,14 @@ templating system or L: }; }); +=item fu->send_json($data) + +Encode C<$data> as JSON (using C in L), set an +appropriate C header and send it to the client. Calls C<< +fu->done >>. + +I Support schema-based normalization. + =item fu->send_file($root, $path) If a file identified by C<"$root/$path"> exists, set that as response and call @@ -1374,8 +1401,6 @@ one of the following status codes or an alias: I Setting cookies. -I JSON output. - =head2 Running the Site From dc752e2a23d029b0ac9c81e77f7e41472567616d Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 9 Mar 2025 10:23:48 +0100 Subject: [PATCH 08/86] Pg: Support dynamic-oid types + vndbtag/vndbid --- c/pgconn.c | 14 ++++--- c/pgtypes.c | 94 +++++++++++++++++++++++++++++++++++++++++++++ t/pgtypes-dynamic.t | 22 +++++++++++ 3 files changed, 124 insertions(+), 6 deletions(-) diff --git a/c/pgconn.c b/c/pgconn.c index 4b84463..3a20b56 100644 --- a/c/pgconn.c +++ b/c/pgconn.c @@ -471,6 +471,7 @@ static void fupg_refresh_types(pTHX_ fupg_conn *c) { snprintf(t->name.n, sizeof(t->name.n), "%s", PQgetvalue(r, i, 1)); char typ = *PQgetvalue(r, i, 2); t->elemoid = fu_frombeU(32, PQgetvalue(r, i, 3)); + const fupg_type *builtin; if (t->elemoid) { if (typ == 'd') { /* domain */ @@ -487,13 +488,14 @@ static void fupg_refresh_types(pTHX_ fupg_conn *c) { /* enum, can use text send/recv */ t->send = fupg_send_text; t->recv = fupg_recv_text; + } else if ((builtin = fupg_builtin_byoid(t->oid))) { + t->send = builtin->send; + t->recv = builtin->recv; + } else if ((builtin = fupg_dynoid_byname(t->name.n))) { + t->send = builtin->send; + t->recv = builtin->recv; } else { - /* TODO: (multi)ranges, by-name lookup for dynamic-oid types */ - const fupg_type *builtin = fupg_builtin_byoid(t->oid); - if (builtin) { - t->send = builtin->send; - t->recv = builtin->recv; - } + /* TODO: (multi)ranges */ } } PQclear(r); diff --git a/c/pgtypes.c b/c/pgtypes.c index f9c5b00..6fb8835 100644 --- a/c/pgtypes.c +++ b/c/pgtypes.c @@ -638,6 +638,72 @@ SENDFN(time) { fustr_writebeI(64, out, SvNV(val) * 1000000); } + + +/* VNDB types */ + +const char vndbtag_alpha[] = "\0""abcdefghijklmnopqrstuvwxyz?????"; + +static I16 vndbtag_parse(char **str) { + I16 tag = 0; + if (**str >= 'a' && **str <= 'z') { + tag = (**str - 'a' + 1) << 10; + (*str)++; + if (**str >= 'a' && **str <= 'z') { + tag |= (**str - 'a' + 1) << 5; + (*str)++; + if (**str >= 'a' && **str <= 'z') { + tag |= **str - 'a' + 1; + (*str)++; + } + } + } + return tag; +} + +void vndbtag_fmt(I16 tag, char *out) { + out[0] = vndbtag_alpha[(tag >> 10) & 31]; + out[1] = vndbtag_alpha[(tag >> 5) & 31]; + out[2] = vndbtag_alpha[(tag >> 0) & 31]; + out[3] = 0; +} + +RECVFN(vndbtag) { + RLEN(2); + SV *r = newSV(4); + SvPOK_only(r); + vndbtag_fmt(fu_frombeI(16, buf), SvPVX(r)); + SvCUR_set(r, strlen(SvPVX(r))); + return r; +} + +SENDFN(vndbtag) { + char *t = SvPV_nolen(val); + I16 v = vndbtag_parse(&t); + if (*t) SERR("Invalid vndbtag: '%s'", SvPV_nolen(val)); + fustr_writebeI(16, out, v); +} + + +#define VNDBID2_MAXNUM (((I64)1<<48)-1) + +RECVFN(vndbid) { + RLEN(8); + I64 v = fu_frombeI(64, buf); + char tbuf[4]; + vndbtag_fmt(v >> 48, tbuf); + return newSVpvf("%s%"UVuf, tbuf, (UV)(v & VNDBID2_MAXNUM)); +} + +SENDFN(vndbid) { + char *ostr = SvPV_nolen(val), *str = ostr; + UV num; + I16 tag = vndbtag_parse(&str); + if (!grok_atoUV(str, &num, NULL) || num > VNDBID2_MAXNUM) SERR("invalid vndbid '%s'", ostr); + fustr_writebeI(64, out, ((I64)tag)<<48 | num); +} + + #undef SIV #undef RLEN #undef RECVFN @@ -818,7 +884,24 @@ static const fupg_type fupg_builtin[] = { #define FUPG_BUILTIN (sizeof(fupg_builtin) / sizeof(fupg_type)) +/* List of types identified by name */ + +#define DYNOID\ + T("vndbtag", vndbtag)\ + T("vndbid", vndbid) + +static const fupg_type fupg_dynoid[] = { +#define T(name, fun) { 0, 0, {name"\0"}, fupg_send_##fun, fupg_recv_##fun }, + DYNOID +#undef T +}; + +#undef DYNOID +#define FUPG_DYNOID (sizeof(fupg_dynoid) / sizeof(fupg_type)) + + /* List of special types for use with set_type() */ + #define SPECIALS\ T("$date_str", date_str)\ T("$hex", hex ) @@ -851,8 +934,19 @@ static const fupg_type *fupg_builtin_byoid(Oid oid) { return fupg_type_byoid(fupg_builtin, FUPG_BUILTIN, oid); } +static const fupg_type *fupg_dynoid_byname(const char *name) { + size_t i; + for (i=0; iq("SELECT dom FROM fupg_test_table")->val, 0x6262; }; + +subtest 'vndbid', sub { + plan skip_all => 'type not loaded in the database' if !$conn->q("SELECT 1 FROM pg_type WHERE typname = 'vndbtag'")->val; + + for my $t (qw/a zz xxx/) { + is $conn->q('SELECT $1::vndbtag', $t)->val, $t; + is $conn->q('SELECT $1::vndbtag', $t)->text_params->val, $t; + is $conn->q('SELECT $1::vndbtag', $t)->text_results->val, $t; + } + ok !eval { $conn->q('SELECT $1::vndbtag', '')->val }; + ok !eval { $conn->q('SELECT $1::vndbtag', 'abcd')->val }; + + for my $t (qw/a123 zz992883231 xxx18388123/) { + is $conn->q('SELECT $1::vndbid', $t)->val, $t; + is $conn->q('SELECT $1::vndbid', $t)->text_params->val, $t; + is $conn->q('SELECT $1::vndbid', $t)->text_results->val, $t; + } + ok !eval { $conn->q('SELECT $1::vndbid', '')->val }; + ok !eval { $conn->q('SELECT $1::vndbid', 'ab')->val }; + ok !eval { $conn->q('SELECT $1::vndbid', 'ab1219229999999999')->val }; +}; + done_testing; From d9d2ad0434d8272fe32893825dad2538ac41bf78 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 10 Mar 2025 12:24:52 +0100 Subject: [PATCH 09/86] Pg: Add COPY support --- FU.xs | 38 +++++++++++++++++ FU/Pg.pm | 117 ++++++++++++++++++++++++++++++++++++++++------------- c/libpq.h | 6 ++- c/pgst.c | 79 ++++++++++++++++++++++++++++++++++++ t/pgcopy.t | 90 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 301 insertions(+), 29 deletions(-) create mode 100644 t/pgcopy.t diff --git a/FU.xs b/FU.xs index 3c6084a..1d63ffe 100644 --- a/FU.xs +++ b/FU.xs @@ -68,6 +68,7 @@ fuxmlwr * FUXMLWR fupg_conn * FUPG_CONN fupg_txn * FUPG_TXN fupg_st * FUPG_ST +fupg_copy * FUPG_COPY INPUT FUFCGI @@ -89,6 +90,10 @@ FUPG_TXN FUPG_ST if (sv_derived_from($arg, \"FU::Pg::st\")) $var = (fupg_st *)SvIVX(SvRV($arg)); else fu_confess(\"invalid statement object\"); + +FUPG_COPY + if (sv_derived_from($arg, \"FU::Pg::copy\")) $var = (fupg_copy *)SvIVX(SvRV($arg)); + else fu_confess(\"invalid COPY object\"); #" EOT @@ -233,6 +238,11 @@ void q(fupg_conn *c, SV *sv, ...) FUPG_CONN_COOKIE; ST(0) = fupg_q(aTHX_ c, c->stflags, SvPVutf8_nolen(sv), ax, items); +void copy(fupg_conn *c, SV *sv) + CODE: + FUPG_CONN_COOKIE; + ST(0) = fupg_copy_exec(aTHX_ c, SvPVutf8_nolen(sv)); + void _set_type(fupg_conn *c, SV *name, SV *sendsv, SV *recvsv) CODE: fupg_set_type(aTHX_ c, name, sendsv, recvsv); @@ -282,6 +292,12 @@ void q(fupg_txn *t, SV *sv, ...) FUPG_TXN_COOKIE; ST(0) = fupg_q(aTHX_ t->conn, t->stflags, SvPVutf8_nolen(sv), ax, items); +# XXX: The copy object should probably keep a ref on the transaction +void copy(fupg_txn *t, SV *sv) + CODE: + FUPG_TXN_COOKIE; + ST(0) = fupg_copy_exec(aTHX_ t->conn, SvPVutf8_nolen(sv)); + MODULE = FU PACKAGE = FU::Pg::st @@ -393,6 +409,28 @@ void DESTROY(fupg_st *st) fupg_st_destroy(aTHX_ st); +MODULE = FU PACKAGE = FU::Pg::copy + +void write(fupg_copy *c, SV *sv) + CODE: + fupg_copy_write(aTHX_ c, sv); + +void read(fupg_copy *c) + CODE: + ST(0) = fupg_copy_read(aTHX_ c, 0); + +void is_binary(fupg_copy *c) + CODE: + ST(0) = c->bin ? &PL_sv_yes : &PL_sv_no; + +void close(fupg_copy *c) + CODE: + fupg_copy_close(aTHX_ c, 0); + +void DESTROY(fupg_copy *c) + CODE: + fupg_copy_destroy(aTHX_ c); + MODULE = FU PACKAGE = FU::XMLWriter diff --git a/FU/Pg.pm b/FU/Pg.pm index 176a8ac..4bd4b23 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -112,6 +112,11 @@ Inside a transaction that is in an error state. The transaction must be rolled back in order to recover to a usable state. This happens automatically when the transaction object goes out of scope. +=item active + +Currently executing a query. This state can only be observed during a L. + =item bad Connection is dead or otherwise unusable. @@ -155,10 +160,11 @@ executing the query, but I the query results have been returned. The subroutine is (currently) only called for queries executed through C<< $conn->exec >>, C<< $conn->q >>, C<< $conn->Q >> and their C<$txn> variants; -internal queries performed by this module (such as for transaction management, -querying type information, etc) do not trigger the callback. Statements that -result in an error being thrown during or before execution are also not -traceable this way. This behavior might change in the future. +C<< $conn->copy >> statements and internal queries performed by this module +(such as for transaction management, querying type information, etc) do not +trigger the callback. Statements that result in an error being thrown during or +before execution are also not traceable this way. This behavior might change in +the future. =item $conn->disconnect @@ -519,6 +525,11 @@ current implementation does not track subtransactions that closely) A subtransaction is in error state and awaiting to be rolled back. +=item active + +Currently executing a query. This state can only be observed during a L. + =item bad Connection is dead or otherwise unusable. @@ -740,6 +751,71 @@ I Methods to convert between the various formats. I Methods to query type info. + +=head2 COPY support + +You can use L for efficient +bulk data transfers between your application and the PostgreSQL server: + +=over + +=item $copy = $conn->copy($statement) + +=item $copy = $txn->copy($statement) + +Execute C<$statement> and return a C object that lets you +transfer data to or from Postgres. + +It is not possible to execute any other queries on the same connection while a +copy operation is in progress. When used on a transaction object, C<$txn> must +be kept alive long enough to finish the copy operation. + +=back + +A C<$copy> object supports the following methods: + +=over + +=item $copy->is_binary + +Returns true if the transfer is performed in the binary format, false for text. + +=item $copy->write($data) + +Send C<$data> to the server. An error is thrown if this is not a C operation. An error may be thrown if C<$data> is not a valid format +understood by Postgres, but such errors can also be deferred to C. + +C<$data> is interpreted as a Perl Unicode string for textual transfers and as a +binary string for binary transfers. + +=item $copy->read + +Return the next row read from the Postgres server, or C if no more data +is coming. In the text format, a single line - including trailing newline - is +returned as a Perl Unicode string. In the binary format, a single row is +returned as a byte string. An error is thrown if this is not a C operation. + +=item $copy->close + +Marks the end of the copy operation. Does not return anything but throws an +error if something went wrong. + +It is possible to close a read-copy operation before all data has been +consumed, but that causes all data to still be read and discarded during +C. If you really want to interrupt a large read operation, a more +efficient approach is to call C<< $conn->close >> and discard the entire +connection. + +It is not I to call this method, simply letting the C<$copy> object +run out of scope will do the trick as well, but in that case errors are +silently discarded. An explicit C is recommended to catch errors. + +=back + + =head2 Errors All methods can throw an exception on error. When possible, the error message @@ -823,32 +899,17 @@ to it after C is always safe: =item * Only works with blocking (synchronous) calls, not very suitable for use in asynchronous frameworks unless you know your queries are fast and you have a -low-latency connection with the Postgres server. +low-latency connection with the Postgres server. This is unlikely to improve in +future versions, Perl's async story is somewhat awkward in general, and fully +supporting async operation might require a fundamental redesign of how this +module works. -=back +=item * LISTEN support is still missing. May be added in a future version, as +this seems doable without supporting full async. -Missing features: - -=over - -=item COPY support - -I hope to implement this someday. - -=item LISTEN support - -Would be nice to have, most likely doable without going full async. - -=item Asynchronous calls - -Probably won't happen. Perl's async story is slightly awkward in general, and -fully supporting async operation might require a fundamental redesign of how -this module works. It certainly won't I the implementation. - -=item Pipelining - -I have some ideas for an API, but doubt I'll ever implement it. Suffers from -the same awkwardness and complexity as asynchronous calls. +=item * Pipelining support is also missing. I have some ideas for an API, but +doubt I'll ever implement it. Suffers from the same awkwardness and complexity +as asynchronous calls. =back diff --git a/c/libpq.h b/c/libpq.h index f931016..94d817e 100644 --- a/c/libpq.h +++ b/c/libpq.h @@ -36,6 +36,7 @@ typedef enum { PQTRANS_IDLE, PQTRANS_ACTIVE, PQTRANS_INTRANS, PQTRANS_INERROR, P #define PG_DIAG_SOURCE_FUNCTION 'R' #define PG_FUNCS \ + X(PQbinaryTuples, int, const PGresult *) \ X(PQclear, void, PGresult *) \ X(PQclosePrepared, PGresult *, PGconn *, const char *) \ X(PQcmdTuples, char *, PGresult *) \ @@ -51,9 +52,10 @@ typedef enum { PQTRANS_IDLE, PQTRANS_ACTIVE, PQTRANS_INTRANS, PQTRANS_INERROR, P X(PQfname, char *, const PGresult *, int) \ X(PQfreemem, void, void *) \ X(PQftype, Oid, const PGresult *, int) \ + X(PQgetCopyData, int, PGconn *, char **, int) \ + X(PQgetResult, PGresult *, PGconn *) \ X(PQgetisnull, int, const PGresult *, int, int) \ X(PQgetlength, int, const PGresult *, int, int) \ - X(PQgetResult, PGresult *, PGconn *) \ X(PQgetvalue, char *, const PGresult *, int, int) \ X(PQlibVersion, int, void) \ X(PQnfields, int, const PGresult *) \ @@ -61,6 +63,8 @@ typedef enum { PQTRANS_IDLE, PQTRANS_ACTIVE, PQTRANS_INTRANS, PQTRANS_INERROR, P X(PQntuples, int, const PGresult *) \ X(PQparamtype, Oid, const PGresult *, int) \ X(PQpipelineSync, int, PGconn *) \ + X(PQputCopyData, int, PGconn *, const char *, int) \ + X(PQputCopyEnd, int, PGconn *, const char *) \ X(PQresStatus, char *, ExecStatusType) \ X(PQresultErrorField, char *, const PGresult *, int) \ X(PQresultErrorMessage, char *, const PGresult *) \ diff --git a/c/pgst.c b/c/pgst.c index 94c8072..1e01392 100644 --- a/c/pgst.c +++ b/c/pgst.c @@ -503,3 +503,82 @@ static SV *fupg_st_kvh(pTHX_ fupg_st *st) { } return sv; } + + + + +/* COPY support */ + +typedef struct { + SV *self; + fupg_conn *conn; + char in; + char bin; + char rddone; + char closed; +} fupg_copy; + +static SV *fupg_copy_exec(pTHX_ fupg_conn *c, const char *sql) { + PGresult *r = PQexec(c->conn, sql); + + if (!r) fupg_conn_croak(c, "exec"); + int s = PQresultStatus(r); + switch (s) { + case PGRES_COPY_OUT: + case PGRES_COPY_IN: + break; + default: fupg_result_croak(r, "exec", sql); + } + + fupg_copy *copy = safecalloc(1, sizeof(fupg_copy)); + copy->conn = c; + SvREFCNT_inc(c->self); + copy->bin = !!PQbinaryTuples(r); + copy->in = s == PGRES_COPY_IN; + PQclear(r); + return fu_selfobj(copy, "FU::Pg::copy"); +} + +static void fupg_copy_write(pTHX_ fupg_copy *c, SV *data) { + STRLEN len; + const char *buf = c->bin ? SvPVbyte(data, len) : SvPVutf8(data, len); + if (PQputCopyData(c->conn->conn, buf, len) < 0) fupg_conn_croak(c->conn, "copy"); +} + +static SV *fupg_copy_read(pTHX_ fupg_copy *c, int discard) { + char *buf = NULL; + int len = PQgetCopyData(c->conn->conn, &buf, 0); + if (len == -1) { + c->rddone = 1; + return &PL_sv_undef; + } else if (len < 0) { + if (discard) c->rddone = 1; + else fupg_conn_croak(c->conn, "copy"); + } + SV *r = discard ? &PL_sv_undef : newSVpvn_flags(buf, len, SVs_TEMP | (c->bin ? 0 : SVf_UTF8)); + PQfreemem(buf); + return r; +} + +static void fupg_copy_close(pTHX_ fupg_copy *c, int ignerror) { + if (c->closed) return; + c->closed = 1; /* Mark as closed even on error, a second attempt won't help anyway */ + + if (c->in && PQputCopyEnd(c->conn->conn, NULL) < 0 && !ignerror) + fupg_conn_croak(c->conn, "copyEnd"); + + while (!c->in && !c->rddone) fupg_copy_read(aTHX_ c, 1); + + PGresult *r = PQgetResult(c->conn->conn); + if (!ignerror && !r) fupg_conn_croak(c->conn, "copyEnd"); + if (!ignerror && PQresultStatus(r) != PGRES_COMMAND_OK) fupg_result_croak(r, "copy", ""); + PQclear(r); + + while ((r = PQgetResult(c->conn->conn))) PQclear(r); +} + +static void fupg_copy_destroy(pTHX_ fupg_copy *c) { + fupg_copy_close(aTHX_ c, 1); + SvREFCNT_dec(c->conn->self); + safefree(c); +} diff --git a/t/pgcopy.t b/t/pgcopy.t new file mode 100644 index 0000000..9c81349 --- /dev/null +++ b/t/pgcopy.t @@ -0,0 +1,90 @@ +use v5.36; +use Test::More; + +plan skip_all => $@ if !eval { require FU::Pg; } && $@ =~ /Unable to load libpq/; +die $@ if $@; +plan skip_all => 'Please set FU_TEST_DB to a PostgreSQL connection string to run these tests' if !$ENV{FU_TEST_DB}; + +my $conn = FU::Pg->connect($ENV{FU_TEST_DB}); +$conn->_debug_trace(0); + +ok !eval { $conn->copy('SELECT 1') }; +like $@, qr/unexpected status code/; + +ok !eval { $conn->copy('COPX') }; +like $@, qr/syntax error/; + +$conn->exec('CREATE TEMPORARY TABLE fupg_copy_test (v int)'); + +is $conn->status, 'idle'; +{ + my $c = $conn->copy('COPY (SELECT 1) TO STDOUT'); + is $conn->status, 'active'; + $c->close; +} +is $conn->status, 'idle'; +$conn->copy('COPY (SELECT 1) TO STDOUT'); +is $conn->status, 'idle'; + +{ + my $c = $conn->copy('COPY fupg_copy_test FROM STDIN'); + is $conn->status, 'active'; + $c->close; +} +is $conn->status, 'idle'; +$conn->copy('COPY fupg_copy_test FROM STDIN'); +is $conn->status, 'idle'; + +{ + my $c = $conn->copy('COPY fupg_copy_test FROM STDIN'); + ok !$c->is_binary; + ok !eval { $c->{read} }; + $c->write("1"); + $c->write("\n2\n3\n"); + $c->close; + ok !eval { $c->read }; + ok !eval { $c->write('') }; + $c->close; +} +is $conn->status, 'idle'; + +{ + my $c = $conn->copy('COPY (SELECT * FROM fupg_copy_test ORDER BY v) TO STDOUT'); + ok !$c->is_binary; + ok !eval { $c->write('') }; + is $c->read, "1\n"; + is $c->read, "2\n"; + is $c->read, "3\n"; + is $c->read, undef; + $c->close; + ok !eval { $c->read }; + ok !eval { $c->write('') }; + $c->close; +} +is $conn->status, 'idle'; + +my $bin = ''; +{ + my $c = $conn->copy('COPY fupg_copy_test TO STDOUT (FORMAT binary)'); + ok $c->is_binary; + while (my $d = $c->read) { + $bin .= $d; + } + $c->close; +} +is $conn->status, 'idle'; + +{ + my $txn = $conn->txn; + my $c = $txn->copy('COPY fupg_copy_test FROM STDIN (FORMAT binary)'); + is $txn->status, 'active'; + ok $c->is_binary; + $c->write($bin); + $c->close; + + is $txn->q('SELECT sum(v) FROM fupg_copy_test')->val, 1+1+2+2+3+3; + $txn->rollback; +} +is $conn->q('SELECT sum(v) FROM fupg_copy_test')->val, 1+2+3; + +done_testing; From 9685287523bf2039ee223ed4eea91ea63ac08b62 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Mon, 10 Mar 2025 12:43:02 +0100 Subject: [PATCH 10/86] Version 0.3 --- ChangeLog | 13 +++++++++++++ FU.pm | 2 +- FU/DebugImpl.pm | 2 +- FU/Log.pm | 2 +- FU/MultipartFormData.pm | 2 +- FU/Pg.pm | 2 +- FU/SQL.pm | 2 +- FU/Util.pm | 2 +- FU/Validate.pm | 2 +- FU/XMLWriter.pm | 2 +- FU/XS.pm | 2 +- 11 files changed, 23 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8a0bb73..7b07d4d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +0.3 - 2025-03-10 + - FU::Validate: Change API, ->validate() now returns data or throws error on failure + - FU::Validate: Rename 'rmwhitespace' to 'trim' + - FU::Validate: Support (more) human-readable error messages + - FU::Pg: Add support for COPY operations + - FU::Pg: Support types with dynamic OIDs + - FU: Add support for reading multipart/form-data + - FU: Add convenience methods for reading and writing JSON + - FU: Fix error in handling a 400 + - FU::MultipartFormData: New helper module + - Fix some tests + - Some doc improvements + 0.2 - 2025-02-28 - FU: Add debug_info web interface - FU: Add fu->denied and fu->notfound methods diff --git a/FU.pm b/FU.pm index 1b3f6aa..d6b9268 100644 --- a/FU.pm +++ b/FU.pm @@ -1,4 +1,4 @@ -package FU 0.2; +package FU 0.3; use v5.36; use Carp 'confess', 'croak'; use IO::Socket; diff --git a/FU/DebugImpl.pm b/FU/DebugImpl.pm index 1fc09cd..2d0ca50 100644 --- a/FU/DebugImpl.pm +++ b/FU/DebugImpl.pm @@ -1,5 +1,5 @@ # Internal module used by FU.pm -package FU::DebugImpl 0.2; +package FU::DebugImpl 0.3; use v5.36; use experimental 'for_list'; use FU; diff --git a/FU/Log.pm b/FU/Log.pm index 17f809c..a0e78ff 100644 --- a/FU/Log.pm +++ b/FU/Log.pm @@ -1,4 +1,4 @@ -package FU::Log 0.2; +package FU::Log 0.3; use v5.36; use Exporter 'import'; use POSIX 'strftime'; diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm index da415f8..e19d7bc 100644 --- a/FU/MultipartFormData.pm +++ b/FU/MultipartFormData.pm @@ -1,4 +1,4 @@ -package FU::MultipartFormData; +package FU::MultipartFormData 0.3; use v5.36; use Carp 'confess'; use FU::Util 'utf8_decode'; diff --git a/FU/Pg.pm b/FU/Pg.pm index 4bd4b23..ba574d6 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -1,4 +1,4 @@ -package FU::Pg 0.2; +package FU::Pg 0.3; use v5.36; use FU::XS; diff --git a/FU/SQL.pm b/FU/SQL.pm index c86a3fb..8f091e5 100644 --- a/FU/SQL.pm +++ b/FU/SQL.pm @@ -1,4 +1,4 @@ -package FU::SQL 0.2; +package FU::SQL 0.3; use v5.36; use Exporter 'import'; use Carp 'confess'; diff --git a/FU/Util.pm b/FU/Util.pm index 275cac5..cd34fd3 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -1,4 +1,4 @@ -package FU::Util 0.2; +package FU::Util 0.3; use v5.36; use FU::XS; diff --git a/FU/Validate.pm b/FU/Validate.pm index 1c7c94b..4b93eb9 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -1,4 +1,4 @@ -package FU::Validate 0.2; +package FU::Validate 0.3; use v5.36; use experimental 'builtin', 'for_list'; diff --git a/FU/XMLWriter.pm b/FU/XMLWriter.pm index 08fc54d..1d01e81 100644 --- a/FU/XMLWriter.pm +++ b/FU/XMLWriter.pm @@ -1,4 +1,4 @@ -package FU::XMLWriter 0.2; +package FU::XMLWriter 0.3; use v5.36; use Carp 'confess'; use Exporter 'import'; diff --git a/FU/XS.pm b/FU/XS.pm index d2f7d19..1109318 100644 --- a/FU/XS.pm +++ b/FU/XS.pm @@ -1,5 +1,5 @@ # This module is for internal use by other FU modules. -package FU::XS 0.2; +package FU::XS 0.3; use Carp; # may be called by XS. use XSLoader; XSLoader::load('FU'); From 64a105e0131477962cb1eca016c9b7ddb1022ff0 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 06:50:49 +0100 Subject: [PATCH 11/86] Validate: remove a level of indirection --- FU/MultipartFormData.pm | 4 +- FU/Pg.pm | 2 +- FU/Validate.pm | 106 ++++++++++++++++++++-------------------- 3 files changed, 56 insertions(+), 56 deletions(-) diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm index e19d7bc..a777823 100644 --- a/FU/MultipartFormData.pm +++ b/FU/MultipartFormData.pm @@ -123,7 +123,9 @@ module makes an attempt to avoid any further copies of data values. =head2 Parsing -=over FU::MultipartFormData->parse($header, $body) +=over + +=item FU::MultipartFormData->parse($header, $body) Returns an array of field objects from the given C<$header>, which must be a valid value for the C request header, and the given C<$body>, diff --git a/FU/Pg.pm b/FU/Pg.pm index ba574d6..5d8c2a7 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -806,7 +806,7 @@ error if something went wrong. It is possible to close a read-copy operation before all data has been consumed, but that causes all data to still be read and discarded during C. If you really want to interrupt a large read operation, a more -efficient approach is to call C<< $conn->close >> and discard the entire +efficient approach is to call C<< $conn->disconnect >> and discard the entire connection. It is not I to call this method, simply letting the C<$copy> object diff --git a/FU/Validate.pm b/FU/Validate.pm index 4b93eb9..fd180b8 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -94,8 +94,8 @@ our %default_validations = ( # # name => $name_or_undef, # validations => [ $recursive_compiled_object, .. ], -# schema => $builtin_validations, # known_keys => { $key => 1, .. } # Extracted from 'keys', Used for the 'unknown' validation +# %builtin_validations # sub _compile($schema, $validations, $rec) { my(%top, @val); @@ -125,26 +125,24 @@ sub _compile($schema, $validations, $rec) { # Inherit some builtin options from validations for my $t (@val) { - if ($top{type} && $t->{schema}{type} && $top{type} ne $t->{schema}{type}) { - confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{schema}{type}'" if $schema->{type}; - confess "Incompatible types, '$t->[0]' requires '$t->{schema}{type}', but another validation requires '$top{type}'"; + if ($top{type} && $t->{type} && $top{type} ne $t->{type}) { + confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{type}'" if $schema->{type}; + confess "Incompatible types, '$t->[0]' requires '$t->{type}', but another validation requires '$top{type}'"; } - exists $t->{schema}{$_} and !exists $top{$_} and $top{$_} = delete $t->{schema}{$_} + exists $t->{$_} and !exists $top{$_} and $top{$_} = delete $t->{$_} for qw/default onerror trim type scalar unknown missing sort unique/; push @keys, keys %{ delete $t->{known_keys} }; - push @keys, keys %{ $t->{schema}{keys} } if $t->{schema}{keys}; + push @keys, keys %{ $t->{keys} } if $t->{keys}; } # Compile sub-schemas $top{keys} = { map +($_, __PACKAGE__->compile($top{keys}{$_}, $validations)), keys $top{keys}->%* } if $top{keys}; $top{values} = __PACKAGE__->compile($top{values}, $validations) if $top{values}; - return { - validations => \@val, - schema => \%top, - known_keys => { map +($_,1), @keys }, - }; + $top{validations} = \@val; + $top{known_keys} = { map +($_,1), @keys }; + \%top; } @@ -153,26 +151,26 @@ sub compile($pkg, $schema, $validations={}) { my $c = _compile $schema, $validations, 64; - $c->{schema}{type} //= 'scalar'; - $c->{schema}{missing} //= 'create'; - $c->{schema}{trim} //= 1 if $c->{schema}{type} eq 'scalar'; - $c->{schema}{unknown} //= 'remove' if $c->{schema}{type} eq 'hash'; + $c->{type} //= 'scalar'; + $c->{missing} //= 'create'; + $c->{trim} //= 1 if $c->{type} eq 'scalar'; + $c->{unknown} //= 'remove' if $c->{type} eq 'hash'; - confess "Invalid value for 'type': $c->{schema}{type}" if !$type_vals{$c->{schema}{type}}; - confess "Invalid value for 'missing': $c->{schema}{missing}" if !$missing_vals{$c->{schema}{missing}}; - confess "Invalid value for 'unknown': $c->{schema}{unknown}" if exists $c->{schema}{unknown} && !$unknown_vals{$c->{schema}{unknown}}; + confess "Invalid value for 'type': $c->{type}" if !$type_vals{$c->{type}}; + confess "Invalid value for 'missing': $c->{missing}" if !$missing_vals{$c->{missing}}; + confess "Invalid value for 'unknown': $c->{unknown}" if exists $c->{unknown} && !$unknown_vals{$c->{unknown}}; - delete $c->{schema}{default} if ref $c->{schema}{default} eq 'SCALAR' && ${$c->{schema}{default}} eq 'required'; + delete $c->{default} if ref $c->{default} eq 'SCALAR' && ${$c->{default}} eq 'required'; - if (exists $c->{schema}{sort}) { - my $s = $c->{schema}{sort}; - $c->{schema}{sort} = + if (exists $c->{sort}) { + my $s = $c->{sort}; + $c->{sort} = ref $s eq 'CODE' ? $s : $s eq 'str' ? sub($x,$y) { $x cmp $y } : $s eq 'num' ? sub($x,$y) { $x <=> $y } - : confess "Unknown value for 'sort': $c->{schema}{sort}"; + : confess "Unknown value for 'sort': $c->{sort}"; } - $c->{schema}{unique} = sub { $_[0] } if $c->{schema}{unique} && !ref $c->{schema}{unique} && !$c->{schema}{sort}; + $c->{unique} = sub { $_[0] } if $c->{unique} && !ref $c->{unique} && !$c->{sort}; bless $c, $pkg; } @@ -182,14 +180,14 @@ sub _validate_rec { my $c = $_[0]; # hash keys - if ($c->{schema}{keys}) { + if ($c->{keys}) { my @err; - for my ($k, $s) ($c->{schema}{keys}->%*) { + for my ($k, $s) ($c->{keys}->%*) { if (!exists $_[1]{$k}) { - next if $s->{schema}{missing} eq 'ignore'; - return { validation => 'missing', key => $k } if $s->{schema}{missing} eq 'reject'; - $_[1]{$k} = ref $s->{schema}{default} eq 'CODE' ? $s->{schema}{default}->() : $s->{schema}{default} // undef; - next if exists $s->{schema}{default}; + next if $s->{missing} eq 'ignore'; + return { validation => 'missing', key => $k } if $s->{missing} eq 'reject'; + $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; + next if exists $s->{default}; } my $r = _validate($s, $_[1]{$k}); @@ -202,10 +200,10 @@ sub _validate_rec { } # array values - if ($c->{schema}{values}) { + if ($c->{values}) { my @err; for my $i (0..$#{$_[1]}) { - my $r = _validate($c->{schema}{values}, $_[1][$i]); + my $r = _validate($c->{values}, $_[1][$i]); if ($r) { $r->{index} = $i; push @err, $r; @@ -226,8 +224,8 @@ sub _validate_rec { } # func - if ($c->{schema}{func}) { - my $r = $c->{schema}{func}->($_[1]); + if ($c->{func}) { + my $r = $c->{func}->($_[1]); return { %$r, validation => 'func' } if ref $r eq 'HASH'; return { validation => 'func', result => $r } if !$r; } @@ -236,24 +234,24 @@ sub _validate_rec { sub _validate_array { my $c = $_[0]; - return if $c->{schema}{type} ne 'array'; + return if $c->{type} ne 'array'; - $_[1] = [sort { $c->{schema}{sort}->($a, $b) } $_[1]->@* ] if $c->{schema}{sort}; + $_[1] = [sort { $c->{sort}->($a, $b) } $_[1]->@* ] if $c->{sort}; # Key-based uniqueness - if ($c->{schema}{unique} && ref $c->{schema}{unique} eq 'CODE') { + if ($c->{unique} && ref $c->{unique} eq 'CODE') { my %h; for my $i (0..$#{$_[1]}) { - my $k = $c->{schema}{unique}->($_[1][$i]); + my $k = $c->{unique}->($_[1][$i]); return { validation => 'unique', index_a => $h{$k}, value_a => $_[1][$h{$k}], index_b => $i, value_b => $_[1][$i], key => $k } if exists $h{$k}; $h{$k} = $i; } # Comparison-based uniqueness - } elsif ($c->{schema}{unique}) { + } elsif ($c->{unique}) { for my $i (0..$#{$_[1]}-1) { return { validation => 'unique', index_a => $i, value_a => $_[1][$i], index_b => $i+1, value_b => $_[1][$i+1] } - if $c->{schema}{sort}->($_[1][$i], $_[1][$i+1]) == 0 + if $c->{sort}->($_[1][$i], $_[1][$i+1]) == 0 } } } @@ -263,29 +261,29 @@ sub _validate_input { my $c = $_[0]; # trim (needs to be done before the 'default' test) - $_[1] = trim $_[1] =~ s/\r//rg if defined $_[1] && !ref $_[1] && $c->{schema}{type} eq 'scalar' && $c->{schema}{trim}; + $_[1] = trim $_[1] =~ s/\r//rg if defined $_[1] && !ref $_[1] && $c->{type} eq 'scalar' && $c->{trim}; # default if (!defined $_[1] || (!ref $_[1] && $_[1] eq '')) { - if (exists $c->{schema}{default}) { - $_[1] = ref $c->{schema}{default} eq 'CODE' ? $c->{schema}{default}->($_[1]) : $c->{schema}{default}; + if (exists $c->{default}) { + $_[1] = ref $c->{default} eq 'CODE' ? $c->{default}->($_[1]) : $c->{default}; return; } return { validation => 'required' }; } - if ($c->{schema}{type} eq 'scalar') { + if ($c->{type} eq 'scalar') { return { validation => 'type', expected => 'scalar', got => lc ref $_[1] } if ref $_[1]; - } elsif ($c->{schema}{type} eq 'hash') { + } elsif ($c->{type} eq 'hash') { return { validation => 'type', expected => 'hash', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'HASH'; # Each branch below makes a shallow copy of the hash, so that further # validations can perform in-place modifications without affecting the # input. - if ($c->{schema}{unknown} eq 'remove') { + if ($c->{unknown} eq 'remove') { $_[1] = { map +($_, $_[1]{$_}), grep $c->{known_keys}{$_}, keys $_[1]->%* }; - } elsif ($c->{schema}{unknown} eq 'reject') { + } elsif ($c->{unknown} eq 'reject') { my @err = grep !$c->{known_keys}{$_}, keys $_[1]->%*; return { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] } if @err; $_[1] = { $_[1]->%* }; @@ -293,16 +291,16 @@ sub _validate_input { $_[1] = { $_[1]->%* }; } - } elsif ($c->{schema}{type} eq 'array') { - $_[1] = [$_[1]] if $c->{schema}{scalar} && !ref $_[1]; - return { validation => 'type', expected => $c->{schema}{scalar} ? 'array or scalar' : 'array', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'ARRAY'; + } elsif ($c->{type} eq 'array') { + $_[1] = [$_[1]] if $c->{scalar} && !ref $_[1]; + return { validation => 'type', expected => $c->{scalar} ? 'array or scalar' : 'array', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'ARRAY'; $_[1] = [$_[1]->@*]; # Create a shallow copy to prevent in-place modification. - } elsif ($c->{schema}{type} eq 'any') { + } elsif ($c->{type} eq 'any') { # No need to do anything here. } else { - confess "Unknown type '$c->{schema}{type}'"; # Already checked in compile(), but be extra safe + confess "Unknown type '$c->{type}'"; # Already checked in compile(), but be extra safe } &_validate_rec || &_validate_array; @@ -312,8 +310,8 @@ sub _validate_input { sub _validate { my $c = $_[0]; my $r = &_validate_input; - ($r, $_[1]) = (undef, ref $c->{schema}{onerror} eq 'CODE' ? $c->{schema}{onerror}->($_[0], bless $r, 'FU::Validate::err') : $c->{schema}{onerror}) - if $r && exists $c->{schema}{onerror}; + ($r, $_[1]) = (undef, ref $c->{onerror} eq 'CODE' ? $c->{onerror}->($_[0], bless $r, 'FU::Validate::err') : $c->{onerror}) + if $r && exists $c->{onerror}; $r } From 1363e112698c186427bf1689bfa861b97cea0357 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 06:57:59 +0100 Subject: [PATCH 12/86] Validate: allow array schemas + defer known_keys hash creation Doesn't allow multiple 'func' options yet, needs work. --- FU/Validate.pm | 44 +++++++++++++++++++++++++++++--------------- t/validate.t | 5 ++++- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index fd180b8..54b3c77 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -76,7 +76,7 @@ our %default_validations = ( uint => { _reg $re_uint }, # implies num min => sub($min) { +{ num => 1, func => sub { $_[0] >= $min ? 1 : { expected => $min, got => $_[0] } } } }, max => sub($max) { +{ num => 1, func => sub { $_[0] <= $max ? 1 : { expected => $max, got => $_[0] } } } }, - range => sub { +{ min => $_[0][0], max => $_[0][1] } }, + range => sub { [ min => $_[0][0], max => $_[0][1] ] }, ascii => { _reg qr/^[\x20-\x7E]*$/ }, sl => { _reg qr/^[^\t\r\n]+$/ }, @@ -99,10 +99,12 @@ our %default_validations = ( # sub _compile($schema, $validations, $rec) { my(%top, @val); - my @keys = keys $schema->{keys}->%* if $schema->{keys}; - for my($name, $val) (%$schema) { + for my($name, $val) (ref $schema eq 'ARRAY' ? @$schema : %$schema) { if ($builtin{$name}) { + confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val}; + confess "Invalid value for 'missing': $val" if $name eq 'missing' && !$missing_vals{$val}; + confess "Invalid value for 'unknown': $val" if $name eq 'unknown' && !$unknown_vals{$val}; $top{$name} = $schema->{$name}; next; } @@ -117,6 +119,8 @@ sub _compile($schema, $validations, $rec) { push @val, $v; } + my @keys = keys $top{keys}->%* if $top{keys}; + for my ($n,$t) (qw/keys hash unknown hash values array sort array unique array/) { next if !exists $top{$n}; confess "Incompatible types, the schema specifies '$top{type}' but the '$n' validation implies '$t'" if $top{type} && $top{type} ne $t; @@ -132,8 +136,8 @@ sub _compile($schema, $validations, $rec) { exists $t->{$_} and !exists $top{$_} and $top{$_} = delete $t->{$_} for qw/default onerror trim type scalar unknown missing sort unique/; - push @keys, keys %{ delete $t->{known_keys} }; - push @keys, keys %{ $t->{keys} } if $t->{keys}; + push @keys, delete($t->{known_keys})->@* if $t->{known_keys}; + push @keys, keys $t->{keys}->%* if $t->{keys}; } # Compile sub-schemas @@ -141,24 +145,20 @@ sub _compile($schema, $validations, $rec) { $top{values} = __PACKAGE__->compile($top{values}, $validations) if $top{values}; $top{validations} = \@val; - $top{known_keys} = { map +($_,1), @keys }; + $top{known_keys} = \@keys; \%top; } sub compile($pkg, $schema, $validations={}) { return $schema if $schema isa __PACKAGE__; - my $c = _compile $schema, $validations, 64; $c->{type} //= 'scalar'; $c->{missing} //= 'create'; $c->{trim} //= 1 if $c->{type} eq 'scalar'; $c->{unknown} //= 'remove' if $c->{type} eq 'hash'; - - confess "Invalid value for 'type': $c->{type}" if !$type_vals{$c->{type}}; - confess "Invalid value for 'missing': $c->{missing}" if !$missing_vals{$c->{missing}}; - confess "Invalid value for 'unknown': $c->{unknown}" if exists $c->{unknown} && !$unknown_vals{$c->{unknown}}; + $c->{known_keys} = { map +($_,1), $c->{known_keys}->@* } if $c->{known_keys}; delete $c->{default} if ref $c->{default} eq 'SCALAR' && ${$c->{default}} eq 'required'; @@ -420,10 +420,11 @@ validation. These are documented in L below. =head1 SCHEMA DEFINITION -A schema is a hashref, each key is the name of a built-in option or of a -validation to be performed. None of the options or validations are required, -but some built-ins have default values. This means that the empty schema C<{}> -is actually equivalent to: +A schema is an arrayref or hashref, where each key is the name of a built-in +option or of a validation to be performed and the values are the arguments to +those validations. None of the options or validations are required, but some +built-ins have default values. This means that the empty schema C<{}> is +actually equivalent to: { type => 'scalar', trim => 1, @@ -431,6 +432,19 @@ is actually equivalent to: missing => 'create', } +Built-in options are always validated in a fixed order, but the order in which +standard and custom validations are performed is random when the schema is +given as a hashref. This is rarely a problem, but it can in some cases affect +the returned error message or whether a later validation will receive data +normalized by a previous validation. An arrayref can be used to enforce a +validation order: + + [ enum => [1, 2, 'a'], int => 1 ] + +Or to use the same validation multiple times: + + [ regex => qr/^a/, regex => qr/z$/ ] + =head2 Built-in options =over diff --git a/t/validate.t b/t/validate.t index b01352b..2f8d8d9 100644 --- a/t/validate.t +++ b/t/validate.t @@ -131,6 +131,9 @@ f { type => 'hash', length => 1, unknown => 'pass' }, {qw/1 a 2 b/}, { validatio t { type => 'hash', length => 1, keys => {a => {missing=>'ignore'}, b => {missing=>'ignore'}} }, {a=>1}, {a=>1}; t { regex => '^a' }, 'abc', 'abc'; # XXX: Can't use qr// here because t() does dclone(). The 'hex' test covers that case anyway. f { regex => '^a' }, 'cba', { validation => 'regex', regex => '^a', got => 'cba' }, "failed validation 'regex'"; +t [ regex => '^a', regex => 'z$' ], 'abcxyz', 'abcxyz'; +f [ regex => '^a', regex => 'z$' ], 'bcxyz', { validation => 'regex', regex => '^a', got => 'bcxyz' }, "failed validation 'regex'"; +f [ regex => '^a', regex => 'z$' ], 'abcxy', { validation => 'regex', regex => 'z$', got => 'abcxy' }, "failed validation 'regex'"; t { enum => [1,2] }, 1, 1; t { enum => [1,2] }, 2, 2; f { enum => [1,2] }, 3, { validation => 'enum', expected => [1,2], got => 3 }, "failed validation 'enum'"; @@ -205,7 +208,7 @@ t { range => [1,2] }, 1, 1; t { range => [1,2] }, 2, 2; f { range => [1,2] }, 0.9, { validation => 'range', error => { validation => 'min', expected => 1, got => 0.9 } }, "validation 'range': failed validation 'min'"; f { range => [1,2] }, 2.1, { validation => 'range', error => { validation => 'max', expected => 2, got => 2.1 } }, "validation 'range': failed validation 'max'"; -#t { range => [1,2] }, 'a', 'a', { validation => 'range', error => { validation => 'max', error => nerr 'a' } }; # XXX: Error validation type depends on evaluation order +f { range => [1,2] }, 'a', { validation => 'range', error => { validation => 'min', error => (nerr 'a')[0] } }, "validation 'range': validation 'min': failed validation 'num'"; # email template use utf8; From f248a33c1c4cfe0edadf4146681ee2e1b13c0f01 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 09:25:56 +0100 Subject: [PATCH 13/86] Validate: Allow multiple func validations in arrayref schema --- FU/Validate.pm | 63 ++++++++++++++++++++++++++++---------------------- t/validate.t | 6 ++++- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index 54b3c77..5ea79d6 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -23,10 +23,10 @@ my %unknown_vals = map +($_,1), qw/remove reject pass/; my %missing_vals = map +($_,1), qw/create reject ignore/; sub _length($exp, $min, $max) { - +{ func => sub($v) { + [ func => sub($v) { my $got = ref $v eq 'HASH' ? keys %$v : ref $v eq 'ARRAY' ? @$v : length $v; (!defined $min || $got >= $min) && (!defined $max || $got <= $max) ? 1 : { expected => $exp, got => $got }; - }} + }] } # Basically the same as ( regex => $arg ), but hides the regex error @@ -100,7 +100,15 @@ our %default_validations = ( sub _compile($schema, $validations, $rec) { my(%top, @val); - for my($name, $val) (ref $schema eq 'ARRAY' ? @$schema : %$schema) { + # 'func' is always evaluated last in hashref schemas. + my $func = ref $schema eq 'HASH' && $schema->{func}; + + for my($name, $val) (ref $schema eq 'HASH' ? %$schema : @$schema) { + if ($name eq 'func') { + push @val, $val if !$func; + next; + } + if ($builtin{$name}) { confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val}; confess "Invalid value for 'missing': $val" if $name eq 'missing' && !$missing_vals{$val}; @@ -112,13 +120,15 @@ sub _compile($schema, $validations, $rec) { my $t = $validations->{$name} || $default_validations{$name}; confess "Unknown validation: $name" if !$t; confess "Recursion limit exceeded while resolving validation '$name'" if $rec < 1; - $t = ref $t eq 'HASH' ? $t : $t->($val); + $t = ref $t eq 'CODE' ? $t->($val) : $t; my $v = _compile($t, $validations, $rec-1); $v->{name} = $name; push @val, $v; } + push @val, $func if $func; + my @keys = keys $top{keys}->%* if $top{keys}; for my ($n,$t) (qw/keys hash unknown hash values array sort array unique array/) { @@ -128,7 +138,7 @@ sub _compile($schema, $validations, $rec) { } # Inherit some builtin options from validations - for my $t (@val) { + for my $t (grep ref $_ eq 'HASH', @val) { if ($top{type} && $t->{type} && $top{type} ne $t->{type}) { confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{type}'" if $schema->{type}; confess "Incompatible types, '$t->[0]' requires '$t->{type}', but another validation requires '$top{type}'"; @@ -213,21 +223,20 @@ sub _validate_rec { } # validations - for ($c->{validations}->@*) { - my $r = _validate_rec($_, $_[1]); - return { - # If the error was a custom 'func' object, then make that the primary cause. - # This makes it possible for validations to provide their own error objects. - $r->{validation} eq 'func' && (!exists $r->{result} || keys $r->%* > 2) ? $r->%* : (error => $r), - validation => $_->{name}, - } if $r; - } - - # func - if ($c->{func}) { - my $r = $c->{func}->($_[1]); - return { %$r, validation => 'func' } if ref $r eq 'HASH'; - return { validation => 'func', result => $r } if !$r; + for my $v ($c->{validations}->@*) { + if (ref $v eq 'CODE') { + my $r = $v->($_[1]); + return { %$r, validation => 'func' } if ref $r eq 'HASH'; + return { validation => 'func', result => $r } if !$r; + } else { + my $r = _validate_rec($v, $_[1]); + return { + # If the error was a custom 'func' object, then make that the primary cause. + # This makes it possible for validations to provide their own error objects. + $r->{validation} eq 'func' && (!exists $r->{result} || keys $r->%* > 2) ? $r->%* : (error => $r), + validation => $v->{name}, + } if $r; + } } } @@ -669,10 +678,10 @@ assumes the first schema from the previous example. =item func => $sub Run the input through a subroutine to perform additional validation or -normalization. The subroutine is only called after all other validations have -succeeded. The subroutine is called with the input as its only argument. -Normalization of the input can be done by assigning to the first argument or -modifying its value in-place. +normalization. When the schema is a hashref, the subroutine is only called +after all other validations have succeeded. The subroutine is called with the +input as its only argument. Normalization of the input can be done by +assigning to the first argument or modifying its value in-place. On success, the subroutine should return a true value. On failure, it should return either a false value or a hashref. The hashref will have the @@ -873,9 +882,9 @@ considered "known". All other built-in options follow inheritance semantics: These options can be set in a custom validation, and they are inherited by the top-level schema. If -the same option is set in multiple validations a random one will be inherited, -so that's not a good idea. The top-level schema can always override options set -by custom validations. +the same option is set in multiple validations, the final one will be +inherited. The top-level schema can always override options set by custom +validations. =head3 Global custom validations diff --git a/t/validate.t b/t/validate.t index 2f8d8d9..c04288a 100644 --- a/t/validate.t +++ b/t/validate.t @@ -17,6 +17,7 @@ my %validations = ( onerrorsub => { onerror => sub { ref $_[1] } }, collapsews => { trim => 0, func => sub { $_[0] =~ s/\s+/ /g; 1 } }, neverfails => { onerror => 'err' }, + doublefunc => [ func => sub { $_[0] == 0 }, func => sub { $_[0] = 2; 1; } ], revnum => { type => 'array', sort => sub($x,$y) { $y <=> $x } }, uniquelength => { type => 'array', values => { type => 'array' }, unique => sub { scalar @{$_[0]} } }, person => { @@ -38,7 +39,7 @@ sub t($schema, $input, $output) { my $input_copy = dclone([$input])->[0]; my $res = FU::Validate->compile($schema, \%validations)->validate($input); - #diag explain FU::Validate->compile($schema, \%validations) if $line == 139; + #diag explain FU::Validate->compile($schema, \%validations) if $line == 98; is_deeply $schema, $schema_copy, "schema modification $line"; is_deeply $input, $input_copy, "input modification $line"; is_deeply $res, $output, "data ok $line"; @@ -51,6 +52,7 @@ sub f($schema, $input, $error, @msg) { my $input_copy = dclone([$input])->[0]; ok !eval { FU::Validate->compile($schema, \%validations)->validate($input); 1 }, "eval $line"; + #diag explain FU::Validate->compile($schema, \%validations) if $line == 162; is_deeply $schema, $schema_copy, "schema modification $line"; is_deeply $input, $input_copy, "input modification $line"; delete $@->{longmess}; @@ -179,6 +181,8 @@ t { person => 1, unknown => 'remove' }, {name => 'x', sex => 'y', extra => 1}, { t { neverfails => 1, int => 1 }, undef, 'err'; t { neverfails => 1, int => 1 }, 'x', 'err'; t { neverfails => 1, int => 1, onerror => undef }, 'x', undef; # XXX: no way to 'unset' an inherited onerror clause, hmm. +t { doublefunc => 1 }, 0, 2; +f { doublefunc => 1 }, 1, { validation => 'doublefunc', error => { validation => 'func', result => '' } }, "validation 'doublefunc': failed validation 'func'"; # numbers sub nerr { ({ validation => 'num', got => $_[0] }, "failed validation 'num'") } From cea691dd55ed427975aeeb0583b73300bc441fa1 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 10:52:09 +0100 Subject: [PATCH 14/86] Validate: drop creation of default values for built-ins To better support merging multiple validations, which'll come next. Probably. --- FU/Validate.pm | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index 5ea79d6..75af46d 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -164,10 +164,6 @@ sub compile($pkg, $schema, $validations={}) { return $schema if $schema isa __PACKAGE__; my $c = _compile $schema, $validations, 64; - $c->{type} //= 'scalar'; - $c->{missing} //= 'create'; - $c->{trim} //= 1 if $c->{type} eq 'scalar'; - $c->{unknown} //= 'remove' if $c->{type} eq 'hash'; $c->{known_keys} = { map +($_,1), $c->{known_keys}->@* } if $c->{known_keys}; delete $c->{default} if ref $c->{default} eq 'SCALAR' && ${$c->{default}} eq 'required'; @@ -194,8 +190,8 @@ sub _validate_rec { my @err; for my ($k, $s) ($c->{keys}->%*) { if (!exists $_[1]{$k}) { - next if $s->{missing} eq 'ignore'; - return { validation => 'missing', key => $k } if $s->{missing} eq 'reject'; + next if $s->{missing} && $s->{missing} eq 'ignore'; + return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject'; $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; next if exists $s->{default}; } @@ -243,7 +239,6 @@ sub _validate_rec { sub _validate_array { my $c = $_[0]; - return if $c->{type} ne 'array'; $_[1] = [sort { $c->{sort}->($a, $b) } $_[1]->@* ] if $c->{sort}; @@ -269,8 +264,10 @@ sub _validate_array { sub _validate_input { my $c = $_[0]; + my $type = $c->{type} // 'scalar'; + # trim (needs to be done before the 'default' test) - $_[1] = trim $_[1] =~ s/\r//rg if defined $_[1] && !ref $_[1] && $c->{type} eq 'scalar' && $c->{trim}; + $_[1] = trim $_[1] =~ s/\r//rg if defined $_[1] && !ref $_[1] && $type eq 'scalar' && (!exists $c->{trim} || $c->{trim}); # default if (!defined $_[1] || (!ref $_[1] && $_[1] eq '')) { @@ -281,18 +278,18 @@ sub _validate_input { return { validation => 'required' }; } - if ($c->{type} eq 'scalar') { + if ($type eq 'scalar') { return { validation => 'type', expected => 'scalar', got => lc ref $_[1] } if ref $_[1]; - } elsif ($c->{type} eq 'hash') { + } elsif ($type eq 'hash') { return { validation => 'type', expected => 'hash', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'HASH'; # Each branch below makes a shallow copy of the hash, so that further # validations can perform in-place modifications without affecting the # input. - if ($c->{unknown} eq 'remove') { + if (!$c->{unknown} || $c->{unknown} eq 'remove') { $_[1] = { map +($_, $_[1]{$_}), grep $c->{known_keys}{$_}, keys $_[1]->%* }; - } elsif ($c->{unknown} eq 'reject') { + } elsif ($c->{unknown} && $c->{unknown} eq 'reject') { my @err = grep !$c->{known_keys}{$_}, keys $_[1]->%*; return { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] } if @err; $_[1] = { $_[1]->%* }; @@ -300,19 +297,16 @@ sub _validate_input { $_[1] = { $_[1]->%* }; } - } elsif ($c->{type} eq 'array') { + } elsif ($type eq 'array') { $_[1] = [$_[1]] if $c->{scalar} && !ref $_[1]; return { validation => 'type', expected => $c->{scalar} ? 'array or scalar' : 'array', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'ARRAY'; $_[1] = [$_[1]->@*]; # Create a shallow copy to prevent in-place modification. - } elsif ($c->{type} eq 'any') { + } elsif ($type eq 'any') { # No need to do anything here. - - } else { - confess "Unknown type '$c->{type}'"; # Already checked in compile(), but be extra safe } - &_validate_rec || &_validate_array; + &_validate_rec || ($type eq 'array' && &_validate_array); } From fa24ca53e3e998187400ebb28119f942fa1d8a01 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 14:28:26 +0100 Subject: [PATCH 15/86] Validate: improved arrayref-schema semantics This allows all built-in options to be duplicated inside a single schema, the semantics of which are the same as the kind of merging done as part of inheriting options from custom validations. This also causes all 'keys' and 'values' validation schemas to be merged, which changes error messages a bit but is great for introspection. Probably slightly improves performance as well. --- FU/Validate.pm | 217 ++++++++++++++++++++++++------------------------- t/validate.t | 13 ++- 2 files changed, 114 insertions(+), 116 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index 75af46d..feeabb3 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -21,6 +21,11 @@ my %builtin = map +($_,1), qw/ my %type_vals = map +($_,1), qw/scalar hash array any/; my %unknown_vals = map +($_,1), qw/remove reject pass/; my %missing_vals = map +($_,1), qw/create reject ignore/; +my %implied_type = qw/keys hash unknown hash values array sort array unique array/; +my %sort_vals = ( + str => sub($x,$y) { $x cmp $y }, + num => sub($x,$y) { $x <=> $y }, +); sub _length($exp, $min, $max) { [ func => sub($v) { @@ -89,136 +94,119 @@ our %default_validations = ( ); -# Loads a hashref of validations and a schema definition, and converts it into -# an object with: -# -# name => $name_or_undef, -# validations => [ $recursive_compiled_object, .. ], -# known_keys => { $key => 1, .. } # Extracted from 'keys', Used for the 'unknown' validation -# %builtin_validations -# -sub _compile($schema, $validations, $rec) { - my(%top, @val); +sub _new { bless { validations => [], @_ }, __PACKAGE__ } - # 'func' is always evaluated last in hashref schemas. - my $func = ref $schema eq 'HASH' && $schema->{func}; - for my($name, $val) (ref $schema eq 'HASH' ? %$schema : @$schema) { +sub _compile($schema, $custom, $rec, $top, $validations=$top->{validations}) { + # For hashref schemas, builtins always override other validations + $schema = [ + map +($_, $schema->{$_}), + (grep !$builtin{$_}, keys %$schema), + (grep $builtin{$_}, keys %$schema), + ] if ref $schema eq 'HASH'; + + for my($name, $val) (@$schema) { + if ($name eq 'type') { + confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val}; + confess "Incompatible types, the schema specifies '$val', but another validation requires '$top->{type}'" if $top->{type} && $top->{type} ne $val;; + $top->{type} = $val; + next; + } + + my $type = $implied_type{$name}; + if ($type) { + confess "Incompatible types, the schema specifies '$top->{type}' but the '$name' validation implies '$type'" if $top->{type} && $top->{type} ne $type; + $top->{type} = $type; + } + + if ($name eq 'values') { + $top->{values} ||= _new; + _compile($val, $custom, $rec-1, $top->{values}); + next; + } + + if ($name eq 'keys') { + $top->{keys} ||= {}; + for my($n,$v) (%$val) { + $top->{keys}{$n} ||= _new; + _compile($v, $custom, $rec-1, $top->{keys}{$n}); + } + next; + } + if ($name eq 'func') { - push @val, $val if !$func; + push @$validations, $val; + next; + } + + if ($name eq 'default') { + $top->{default} = $val; + delete $top->{default} if ref $val eq 'SCALAR' && $$val eq 'required'; next; } if ($builtin{$name}) { - confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val}; confess "Invalid value for 'missing': $val" if $name eq 'missing' && !$missing_vals{$val}; confess "Invalid value for 'unknown': $val" if $name eq 'unknown' && !$unknown_vals{$val}; - $top{$name} = $schema->{$name}; + $val = $sort_vals{$val} || confess "Unknown value for 'sort': $val" if $name eq 'sort' && ref $val ne 'CODE'; + $top->{$name} = $val; next; } - my $t = $validations->{$name} || $default_validations{$name}; + my $t = $custom->{$name} || $default_validations{$name}; confess "Unknown validation: $name" if !$t; confess "Recursion limit exceeded while resolving validation '$name'" if $rec < 1; $t = ref $t eq 'CODE' ? $t->($val) : $t; - my $v = _compile($t, $validations, $rec-1); - $v->{name} = $name; - push @val, $v; + my $v = _new name => $name; + _compile($t, $custom, $rec-1, $top, $v->{validations}); + push @$validations, $v if $v->{validations}->@*; } - - push @val, $func if $func; - - my @keys = keys $top{keys}->%* if $top{keys}; - - for my ($n,$t) (qw/keys hash unknown hash values array sort array unique array/) { - next if !exists $top{$n}; - confess "Incompatible types, the schema specifies '$top{type}' but the '$n' validation implies '$t'" if $top{type} && $top{type} ne $t; - $top{type} = $t; - } - - # Inherit some builtin options from validations - for my $t (grep ref $_ eq 'HASH', @val) { - if ($top{type} && $t->{type} && $top{type} ne $t->{type}) { - confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{type}'" if $schema->{type}; - confess "Incompatible types, '$t->[0]' requires '$t->{type}', but another validation requires '$top{type}'"; - } - exists $t->{$_} and !exists $top{$_} and $top{$_} = delete $t->{$_} - for qw/default onerror trim type scalar unknown missing sort unique/; - - push @keys, delete($t->{known_keys})->@* if $t->{known_keys}; - push @keys, keys $t->{keys}->%* if $t->{keys}; - } - - # Compile sub-schemas - $top{keys} = { map +($_, __PACKAGE__->compile($top{keys}{$_}, $validations)), keys $top{keys}->%* } if $top{keys}; - $top{values} = __PACKAGE__->compile($top{values}, $validations) if $top{values}; - - $top{validations} = \@val; - $top{known_keys} = \@keys; - \%top; } -sub compile($pkg, $schema, $validations={}) { +sub compile($pkg, $schema, $custom={}) { return $schema if $schema isa __PACKAGE__; - my $c = _compile $schema, $validations, 64; + my $c = _new; + _compile $schema, $custom, 64, $c; + $c +} - $c->{known_keys} = { map +($_,1), $c->{known_keys}->@* } if $c->{known_keys}; - delete $c->{default} if ref $c->{default} eq 'SCALAR' && ${$c->{default}} eq 'required'; +sub _validate_keys { + my @err; + for my ($k, $s) ($_[0]{keys}->%*) { + if (!exists $_[1]{$k}) { + next if $s->{missing} && $s->{missing} eq 'ignore'; + return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject'; + $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; + next if exists $s->{default}; + } - if (exists $c->{sort}) { - my $s = $c->{sort}; - $c->{sort} = - ref $s eq 'CODE' ? $s - : $s eq 'str' ? sub($x,$y) { $x cmp $y } - : $s eq 'num' ? sub($x,$y) { $x <=> $y } - : confess "Unknown value for 'sort': $c->{sort}"; + my $r = _validate($s, $_[1]{$k}); + if ($r) { + $r->{key} = $k; + push @err, $r; + } } - $c->{unique} = sub { $_[0] } if $c->{unique} && !ref $c->{unique} && !$c->{sort}; + return { validation => 'keys', errors => [ sort { $a->{key} cmp $b->{key} } @err ] } if @err; +} - bless $c, $pkg; +sub _validate_values { + my @err; + for my $i (0..$#{$_[1]}) { + my $r = _validate($_[0]{values}, $_[1][$i]); + if ($r) { + $r->{index} = $i; + push @err, $r; + } + } + return { validation => 'values', errors => \@err } if @err; } sub _validate_rec { my $c = $_[0]; - - # hash keys - if ($c->{keys}) { - my @err; - for my ($k, $s) ($c->{keys}->%*) { - if (!exists $_[1]{$k}) { - next if $s->{missing} && $s->{missing} eq 'ignore'; - return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject'; - $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; - next if exists $s->{default}; - } - - my $r = _validate($s, $_[1]{$k}); - if ($r) { - $r->{key} = $k; - push @err, $r; - } - } - return { validation => 'keys', errors => \@err } if @err; - } - - # array values - if ($c->{values}) { - my @err; - for my $i (0..$#{$_[1]}) { - my $r = _validate($c->{values}, $_[1][$i]); - if ($r) { - $r->{index} = $i; - push @err, $r; - } - } - return { validation => 'values', errors => \@err } if @err; - } - - # validations for my $v ($c->{validations}->@*) { if (ref $v eq 'CODE') { my $r = $v->($_[1]); @@ -243,10 +231,10 @@ sub _validate_array { $_[1] = [sort { $c->{sort}->($a, $b) } $_[1]->@* ] if $c->{sort}; # Key-based uniqueness - if ($c->{unique} && ref $c->{unique} eq 'CODE') { + if ($c->{unique} && (!$c->{sort} || ref $c->{unique} eq 'CODE')) { my %h; for my $i (0..$#{$_[1]}) { - my $k = $c->{unique}->($_[1][$i]); + my $k = ref $c->{unique} eq 'CODE' ? $c->{unique}->($_[1][$i]) : $_[1][$i]; return { validation => 'unique', index_a => $h{$k}, value_a => $_[1][$h{$k}], index_b => $i, value_b => $_[1][$i], key => $k } if exists $h{$k}; $h{$k} = $i; } @@ -288,10 +276,10 @@ sub _validate_input { # validations can perform in-place modifications without affecting the # input. if (!$c->{unknown} || $c->{unknown} eq 'remove') { - $_[1] = { map +($_, $_[1]{$_}), grep $c->{known_keys}{$_}, keys $_[1]->%* }; + $_[1] = { map +($_, $_[1]{$_}), grep $c->{keys}{$_}, keys $_[1]->%* }; } elsif ($c->{unknown} && $c->{unknown} eq 'reject') { - my @err = grep !$c->{known_keys}{$_}, keys $_[1]->%*; - return { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] } if @err; + my @err = grep !$c->{keys}{$_}, keys $_[1]->%*; + return { validation => 'unknown', keys => \@err, expected => [ sort keys $c->{keys}->%* ] } if @err; $_[1] = { $_[1]->%* }; } else { $_[1] = { $_[1]->%* }; @@ -306,7 +294,10 @@ sub _validate_input { # No need to do anything here. } - &_validate_rec || ($type eq 'array' && &_validate_array); + ($c->{keys} && &_validate_keys) || + ($c->{values} && &_validate_values) || + &_validate_rec || + ($type eq 'array' && &_validate_array) } @@ -866,13 +857,15 @@ error: int => 1 }); -The I, I and C built-in options are validated separately -for each custom validation. So if you have multiple custom validations that set -the I option, then the array elements must validate all the listed -schemas. The same applies to I: If the same key is listed in multiple -custom validations, then the key must conform to all schemas. With respect to -the I option, a key that is mentioned in any of the I options is -considered "known". +The I option is validated separately for each custom validation. + +Multiple I and I validations are merged into a single validation. +So if you have multiple custom validations that set the I option, a +single combined schema is created that validates all array elements. The same +applies to I: if the same key is listed in multiple custom validations, +then the key must conform to all schemas. With respect to the I +option, a key that is mentioned in any of the I options is considered +"known". All other built-in options follow inheritance semantics: These options can be set in a custom validation, and they are inherited by the top-level schema. If diff --git a/t/validate.t b/t/validate.t index c04288a..7533a02 100644 --- a/t/validate.t +++ b/t/validate.t @@ -38,8 +38,8 @@ sub t($schema, $input, $output) { my $schema_copy = dclone([$schema])->[0]; my $input_copy = dclone([$input])->[0]; + #diag explain FU::Validate->compile($schema, \%validations) if $line == 95; my $res = FU::Validate->compile($schema, \%validations)->validate($input); - #diag explain FU::Validate->compile($schema, \%validations) if $line == 98; is_deeply $schema, $schema_copy, "schema modification $line"; is_deeply $input, $input_copy, "input modification $line"; is_deeply $res, $output, "data ok $line"; @@ -51,8 +51,8 @@ sub f($schema, $input, $error, @msg) { my $schema_copy = dclone([$schema])->[0]; my $input_copy = dclone([$input])->[0]; + #diag explain FU::Validate->compile($schema, \%validations) if $line == 176; ok !eval { FU::Validate->compile($schema, \%validations)->validate($input); 1 }, "eval $line"; - #diag explain FU::Validate->compile($schema, \%validations) if $line == 162; is_deeply $schema, $schema_copy, "schema modification $line"; is_deeply $input, $input_copy, "input modification $line"; delete $@->{longmess}; @@ -67,6 +67,7 @@ f {}, '', { validation => 'required' }, 'required value missing'; f {}, undef, { validation => 'required' }, 'required value missing'; t { default => undef }, undef, undef; t { default => undef }, '', undef; +f { default => \'required' }, '', { validation => 'required' }, 'required value missing'; t { defaultsub1 => 1 }, undef, 2; t { defaultsub2 => 1 }, undef, ''; t { defaultsub2 => 1 }, '', 1; @@ -115,6 +116,10 @@ t { type => 'hash', keys => { a=>{} }, unknown => 'pass' }, { a=>1,b=>1 }, { a=> t { type => 'hash', setundef => 1 }, {}, undef; t { type => 'hash', unknown => 'reject', keys => { a=>{ type => 'any', setundef => 1}} }, {a=>[]}, {a=>undef}; +t [ keys => { a => {} }, keys => { b => {} } ], {a=>1, b=>2}, {a=>1, b=>2}; +f [ keys => { a => {} }, keys => { b => {} } ], {a=>1}, { validation => 'keys', errors => [{ key => 'b', validation => 'required' }] }, '.b: required value missing'; +f [ keys => { a => {} }, keys => { a => { int => 1 } } ], {a=>'abc'}, { validation => 'keys', errors => [{ key => 'a', validation => 'int', got => 'abc' }] }, ".a: failed validation 'int'"; + # default validations f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }, "failed validation 'minlength'"; t { minlength => 3 }, 'abc', 'abc'; @@ -171,9 +176,9 @@ t { collapsews => 1 }, ' x ', ' x '; t { collapsews => 1, trim => 1 }, ' x ', 'x'; f { person => 1 }, 1, { validation => 'type', expected => 'hash', got => 'scalar' }, "invalid type, expected 'hash' but got 'scalar'"; t { person => 1, default => 1 }, undef, 1; -f { person => 1 }, { sex => 1 }, { validation => 'person', error => { validation => 'keys', errors => [{ key => 'name', validation => 'required' }] } }, "validation 'person'.name: required value missing"; +f { person => 1 }, { sex => 1 }, { validation => 'keys', errors => [{ key => 'name', validation => 'required' }] }, ".name: required value missing"; t { person => 1 }, { sex => undef, name => 'y' }, { sex => 1, name => 'y' }; -f { person => 1, keys => {age => {default => \'required'}} }, {name => 'x', sex => 'y'}, { validation => 'keys', errors => [{ key => 'age', validation => 'required' }] }, '.age: required value missing'; +f { person => 1, keys => {age => {missing => 'reject'}} }, {name => 'x', sex => 'y'}, { key => 'age', validation => 'missing' }, '.age: required key missing'; t { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => 1}, { name => 'x', sex => 'y', extra => 1 }; f { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => ''}, { validation => 'keys', errors => [{ key => 'extra', validation => 'required' }] }, '.extra: required value missing'; t { person => 1 }, {name => 'x', sex => 'y', extra => 1}, {name => 'x', sex => 'y', extra => 1}; From 3fad7feec3770034dd7af22cc219ea342b289645 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 16:44:08 +0100 Subject: [PATCH 16/86] Validate: Rename "values"->"elems", repurpose "values" to validate hash values I'm breaking stuff left and right while I still can. Idea: "key_names" validation? Idea: "tuple" validation that works like "keys" but for arrays. (i.e. { tuple => { $index => $schema } }, could make "missing" and "unknown" work for arrays, too) --- FU/Validate.pm | 120 ++++++++++++++++++++++++++++++------------------- t/validate.t | 13 +++--- 2 files changed, 82 insertions(+), 51 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index feeabb3..8d8db91 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -13,15 +13,18 @@ my %builtin = map +($_,1), qw/ default onerror trim - values scalar sort unique - keys unknown missing + elems scalar sort unique + keys values unknown missing func /; my %type_vals = map +($_,1), qw/scalar hash array any/; my %unknown_vals = map +($_,1), qw/remove reject pass/; my %missing_vals = map +($_,1), qw/create reject ignore/; -my %implied_type = qw/keys hash unknown hash values array sort array unique array/; +my %implied_type = qw/ + keys hash values hash unknown hash + elems array sort array unique array scalar array +/; my %sort_vals = ( str => sub($x,$y) { $x cmp $y }, num => sub($x,$y) { $x <=> $y }, @@ -119,9 +122,9 @@ sub _compile($schema, $custom, $rec, $top, $validations=$top->{validations}) { $top->{type} = $type; } - if ($name eq 'values') { - $top->{values} ||= _new; - _compile($val, $custom, $rec-1, $top->{values}); + if ($name eq 'elems' || $name eq 'values') { + $top->{$name} ||= _new; + _compile($val, $custom, $rec-1, $top->{$name}); next; } @@ -173,35 +176,51 @@ sub compile($pkg, $schema, $custom={}) { } -sub _validate_keys { - my @err; - for my ($k, $s) ($_[0]{keys}->%*) { - if (!exists $_[1]{$k}) { - next if $s->{missing} && $s->{missing} eq 'ignore'; - return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject'; - $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; - next if exists $s->{default}; - } +sub _validate_hash { + my $c = $_[0]; - my $r = _validate($s, $_[1]{$k}); - if ($r) { - $r->{key} = $k; - push @err, $r; + if ($c->{keys}) { + my @err; + for my ($k, $s) ($c->{keys}->%*) { + if (!exists $_[1]{$k}) { + next if $s->{missing} && $s->{missing} eq 'ignore'; + return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject'; + $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; + next if exists $s->{default}; + } + + my $r = _validate($s, $_[1]{$k}); + if ($r) { + $r->{key} = $k; + push @err, $r; + } } + return { validation => 'keys', errors => [ sort { $a->{key} cmp $b->{key} } @err ] } if @err; + } + + if ($c->{values}) { + my @err; + for my ($k, $v) ($_[1]->%*) { + my $r = _validate($c->{values}, $v); + if ($r) { + $r->{key} = $k; + push @err, $r; + } + } + return { validation => 'values', errors => [ sort { $a->{key} cmp $b->{key} } @err ] } if @err; } - return { validation => 'keys', errors => [ sort { $a->{key} cmp $b->{key} } @err ] } if @err; } -sub _validate_values { +sub _validate_elems { my @err; for my $i (0..$#{$_[1]}) { - my $r = _validate($_[0]{values}, $_[1][$i]); + my $r = _validate($_[0]{elems}, $_[1][$i]); if ($r) { $r->{index} = $i; push @err, $r; } } - return { validation => 'values', errors => \@err } if @err; + return { validation => 'elems', errors => \@err } if @err; } @@ -275,14 +294,14 @@ sub _validate_input { # Each branch below makes a shallow copy of the hash, so that further # validations can perform in-place modifications without affecting the # input. - if (!$c->{unknown} || $c->{unknown} eq 'remove') { + if (!$c->{keys} || ($c->{unknown} && $c->{unknown} eq 'pass')) { + $_[1] = { $_[1]->%* }; + } elsif (!$c->{unknown} || $c->{unknown} eq 'remove') { $_[1] = { map +($_, $_[1]{$_}), grep $c->{keys}{$_}, keys $_[1]->%* }; - } elsif ($c->{unknown} && $c->{unknown} eq 'reject') { + } else { my @err = grep !$c->{keys}{$_}, keys $_[1]->%*; return { validation => 'unknown', keys => \@err, expected => [ sort keys $c->{keys}->%* ] } if @err; $_[1] = { $_[1]->%* }; - } else { - $_[1] = { $_[1]->%* }; } } elsif ($type eq 'array') { @@ -294,8 +313,8 @@ sub _validate_input { # No need to do anything here. } - ($c->{keys} && &_validate_keys) || - ($c->{values} && &_validate_values) || + ($type eq 'hash' && &_validate_hash) || + ($c->{elems} && &_validate_elems) || &_validate_rec || ($type eq 'array' && &_validate_array) } @@ -340,8 +359,9 @@ sub errors($e, $prefix='') { my $val = $e->{validation}; my $p = $prefix ? "$prefix: " : ''; $val eq 'keys' ? map errors($_, $prefix.'.'._fmtkey($_->{key})), $e->{errors}->@* : + $val eq 'values' ? map errors($_, $prefix.'.'._fmtkey($_->{key})), $e->{errors}->@* : $val eq 'missing' ? $prefix.'.'._fmtkey($e->{key}).': required key missing' : - $val eq 'values' ? map errors($_, $prefix."[$_->{index}]"), $e->{errors}->@* : + $val eq 'elems' ? map errors($_, $prefix."[$_->{index}]"), $e->{errors}->@* : $val eq 'unique' ? $prefix."[$e->{index_b}] value '"._fmtval($e->{value_a})."' duplicated" : $val eq 'required' ? "${p}required value missing" : $val eq 'type' ? "${p}invalid type, expected '$e->{expected}' but got '$e->{got}'" : @@ -374,10 +394,10 @@ validate the format and the structure of the data, but it does not support validations that depend on other input values. For example, it is not possible to specify that the contents of a I field must be equivalent to that of a I field, but you can specify that both fields need to be -filled out. Recursive data structures are not supported. There is also no -built-in support for validating hashes with dynamic keys or arrays where not -all elements conform to the same schema. These could technically still be -validated with custom validations, but it won't be as convenient. +filled out. Recursive data structures are not supported. There is also no good +support for validating hashes with dynamic keys or arrays where not all +elements conform to the same schema. These could technically still be validated +with custom validations, but it won't be as convenient. This module is designed to validate any kind of program input after it has been parsed into a Perl data structure. It should not be used to validate function @@ -521,14 +541,22 @@ like: ] } +=item values => $schema + +Implies C<< type => 'hash' >>, set a schema that is used to validate every hash +value. Can be used together with I, in which case values must validate +both this C<$schema> and the schema corresponding to the key. + =item unknown => $option Implies C<< type => 'hash' >>, this option specifies what to do with keys in the input data that have not been defined in the I option. Possible values are I to remove unknown keys from the output data (this is the default), I to return an error if there are unknown keys in the input, -or I to pass through any unknown keys to the output data. Note that the -values for passed-through keys are not validated against any schema! +or I to pass through any unknown keys to the output data. Values for +passed-through keys are only validated when the I option is set, +otherwise they are passed through as-is. This option has no effect when the +I option is never set, in that case all values are always passed through. In the case of I, the error object will look like: @@ -549,7 +577,8 @@ undef), I to return an error if the option is missing or I to leave the key out of the returned data. The default is I, but if no I option is set for this key then -that is effectively the same as I. +that is effectively the same as I. Values created through I are +still validated through I if that has been set. In the case of I, the error object will look like: @@ -557,15 +586,15 @@ In the case of I, the error object will look like: key => 'field' } -=item values => $schema +=item elems => $schema Implies C<< type => 'array' >>, this defines the schema that is applied to -every item in the array. The schema definition may be a bare hashref or a +every element in the array. The schema definition may be a bare hashref or a validator returned by C. Failure is reported in a similar fashion to I: - { validation => 'values', + { validation => 'elems', errors => [ { index => 1, validation => 'required' } ] @@ -626,8 +655,7 @@ All of that may sound complicated, but it's quite easy to use. Here's a few examples: # This describes an array of hashes with keys 'id' and 'name'. - { values => { - type => 'hash', + { elems => { keys => { id => { uint => 1 }, name => {} @@ -641,7 +669,7 @@ examples: # Contrived example: An array of strings, and we want # each string to start with a different character. - { values => { minlength => 1 }, + { elems => { minlength => 1 }, unique => sub { substr $_[0], 0, 1 } } @@ -845,7 +873,7 @@ used in that schema may get input with whitespace around it. All validations used in a schema need to agree upon a single I option. If a custom validation does not specify a I option (and no type is -implied by another validation such as I or I), then the +implied by another validation such as I or I), then the validation should work with every type. It is an error to define a schema that mixes validations of different types. For example, the following throws an error: @@ -859,8 +887,8 @@ error: The I option is validated separately for each custom validation. -Multiple I and I validations are merged into a single validation. -So if you have multiple custom validations that set the I option, a +Multiple I and I validations are merged into a single validation. +So if you have multiple custom validations that set the I option, a single combined schema is created that validates all array elements. The same applies to I: if the same key is listed in multiple custom validations, then the key must conform to all schemas. With respect to the I diff --git a/t/validate.t b/t/validate.t index 7533a02..de37264 100644 --- a/t/validate.t +++ b/t/validate.t @@ -19,7 +19,7 @@ my %validations = ( neverfails => { onerror => 'err' }, doublefunc => [ func => sub { $_[0] == 0 }, func => sub { $_[0] = 2; 1; } ], revnum => { type => 'array', sort => sub($x,$y) { $y <=> $x } }, - uniquelength => { type => 'array', values => { type => 'array' }, unique => sub { scalar @{$_[0]} } }, + uniquelength => { elems => { type => 'array' }, unique => sub { scalar @{$_[0]} } }, person => { type => 'hash', unknown => 'pass', @@ -85,8 +85,8 @@ f { type => 'array' }, 1, { validation => 'type', expected => 'array', got => 's t { type => 'array' }, [], []; t { type => 'array' }, [undef,1,2,{}], [undef,1,2,{}]; t { type => 'array', scalar => 1 }, 1, [1]; -f { type => 'array', values => {} }, [undef], { validation => 'values', errors => [{ index => 0, validation => 'required' }] }, "[0]: required value missing"; -t { type => 'array', values => {} }, [' a '], ['a']; +f { type => 'array', elems => {} }, [undef], { validation => 'elems', errors => [{ index => 0, validation => 'required' }] }, "[0]: required value missing"; +t { type => 'array', elems => {} }, [' a '], ['a']; t { type => 'array', sort => 'str' }, [qw/20 100 3/], [qw/100 20 3/]; t { type => 'array', sort => 'num' }, [qw/20 100 3/], [qw/3 20 100/]; t { revnum => 1 }, [qw/20 100 3/], [qw/100 20 3/]; @@ -97,12 +97,12 @@ f { type => 'array', unique => 1 }, [qw/3 1 3/], { validation => 'unique', index t { uniquelength => 1 }, [[],[1],[1,2]], [[],[1],[1,2]]; f { uniquelength => 1 }, [[],[1],[2]], { validation => 'unique', index_a => 1, value_a => [1], index_b => 2, value_b => [2], key => 1 }, q{[2] value '[1]' duplicated}; t { type => 'array', setundef => 1 }, [], undef; -t { type => 'array', values => { type => 'any', setundef => 1 } }, [[]], [undef]; +t { type => 'array', elems => { type => 'any', setundef => 1 } }, [[]], [undef]; # hashes f { type => 'hash' }, [], { validation => 'type', expected => 'hash', got => 'array' }, "invalid type, expected 'hash' but got 'array'"; f { type => 'hash' }, 'a', { validation => 'type', expected => 'hash', got => 'scalar' }, "invalid type, expected 'hash' but got 'scalar'"; -t { type => 'hash' }, {a=>[],b=>undef,c=>{}}, {}; +t { type => 'hash' }, {a=>[],b=>undef,c=>{}}, {a=>[],b=>undef,c=>{}}; f { type => 'hash', keys => { a=>{} } }, {}, { validation => 'keys', errors => [{ key => 'a', validation => 'required' }] }, '.a: required value missing'; t { type => 'hash', keys => { a=>{missing=>'ignore'} } }, {}, {}; t { type => 'hash', keys => { a=>{default=>undef} } }, {}, {a=>undef}; @@ -120,6 +120,9 @@ t [ keys => { a => {} }, keys => { b => {} } ], {a=>1, b=>2}, {a=>1, b=>2}; f [ keys => { a => {} }, keys => { b => {} } ], {a=>1}, { validation => 'keys', errors => [{ key => 'b', validation => 'required' }] }, '.b: required value missing'; f [ keys => { a => {} }, keys => { a => { int => 1 } } ], {a=>'abc'}, { validation => 'keys', errors => [{ key => 'a', validation => 'int', got => 'abc' }] }, ".a: failed validation 'int'"; +t { values => { int => 1 } }, { a => -1, b => 1 }, { a => -1, b => 1 }; +f { values => { int => 1 } }, { a => undef }, { validation => 'values', errors => [{ key => 'a', validation => 'required' }] }, '.a: required value missing'; + # default validations f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }, "failed validation 'minlength'"; t { minlength => 3 }, 'abc', 'abc'; From f8fe53cba90f5b39ae4666aee97d053a421c61ef Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 16 Mar 2025 15:03:32 +0100 Subject: [PATCH 17/86] json_format: Add html_safe option --- FU/Util.pm | 27 ++++++++++++++++++++------- c/jsonfmt.c | 29 ++++++++++++++++------------- t/json_format.t | 1 + 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/FU/Util.pm b/FU/Util.pm index cd34fd3..a9cca1d 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -212,13 +212,6 @@ roughly similar to: JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar); -Some modules escape the slash character in encoded strings to prevent a -potential XSS vulnerability when embedding JSON inside C<< >> tags. This function does I do that because it might not even -be sufficient. The following is probably an improvement: - - json_format($data) =~ s{>, C<< > >> and C<< & >> as Unicode escapes. +Commonly used to embed data inside a HTML page: + + $html = ''; + +This option does NOT make it safe to include the encoded JSON as an attribute +value. There is no way to do that without violating JSON specs, so you should +use entity escaping instead. + +Some JSON modules escape the forward slash (C) character instead, but that +is, at best, B sufficient for embedding inside a C<<