From 64a105e0131477962cb1eca016c9b7ddb1022ff0 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 06:50:49 +0100 Subject: [PATCH 01/76] Validate: remove a level of indirection --- FU/MultipartFormData.pm | 4 +- FU/Pg.pm | 2 +- FU/Validate.pm | 106 ++++++++++++++++++++-------------------- 3 files changed, 56 insertions(+), 56 deletions(-) diff --git a/FU/MultipartFormData.pm b/FU/MultipartFormData.pm index e19d7bc..a777823 100644 --- a/FU/MultipartFormData.pm +++ b/FU/MultipartFormData.pm @@ -123,7 +123,9 @@ module makes an attempt to avoid any further copies of data values. =head2 Parsing -=over FU::MultipartFormData->parse($header, $body) +=over + +=item FU::MultipartFormData->parse($header, $body) Returns an array of field objects from the given C<$header>, which must be a valid value for the C request header, and the given C<$body>, diff --git a/FU/Pg.pm b/FU/Pg.pm index ba574d6..5d8c2a7 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -806,7 +806,7 @@ error if something went wrong. It is possible to close a read-copy operation before all data has been consumed, but that causes all data to still be read and discarded during C. If you really want to interrupt a large read operation, a more -efficient approach is to call C<< $conn->close >> and discard the entire +efficient approach is to call C<< $conn->disconnect >> and discard the entire connection. It is not I to call this method, simply letting the C<$copy> object diff --git a/FU/Validate.pm b/FU/Validate.pm index 4b93eb9..fd180b8 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -94,8 +94,8 @@ our %default_validations = ( # # name => $name_or_undef, # validations => [ $recursive_compiled_object, .. ], -# schema => $builtin_validations, # known_keys => { $key => 1, .. } # Extracted from 'keys', Used for the 'unknown' validation +# %builtin_validations # sub _compile($schema, $validations, $rec) { my(%top, @val); @@ -125,26 +125,24 @@ sub _compile($schema, $validations, $rec) { # Inherit some builtin options from validations for my $t (@val) { - if ($top{type} && $t->{schema}{type} && $top{type} ne $t->{schema}{type}) { - confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{schema}{type}'" if $schema->{type}; - confess "Incompatible types, '$t->[0]' requires '$t->{schema}{type}', but another validation requires '$top{type}'"; + if ($top{type} && $t->{type} && $top{type} ne $t->{type}) { + confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{type}'" if $schema->{type}; + confess "Incompatible types, '$t->[0]' requires '$t->{type}', but another validation requires '$top{type}'"; } - exists $t->{schema}{$_} and !exists $top{$_} and $top{$_} = delete $t->{schema}{$_} + exists $t->{$_} and !exists $top{$_} and $top{$_} = delete $t->{$_} for qw/default onerror trim type scalar unknown missing sort unique/; push @keys, keys %{ delete $t->{known_keys} }; - push @keys, keys %{ $t->{schema}{keys} } if $t->{schema}{keys}; + push @keys, keys %{ $t->{keys} } if $t->{keys}; } # Compile sub-schemas $top{keys} = { map +($_, __PACKAGE__->compile($top{keys}{$_}, $validations)), keys $top{keys}->%* } if $top{keys}; $top{values} = __PACKAGE__->compile($top{values}, $validations) if $top{values}; - return { - validations => \@val, - schema => \%top, - known_keys => { map +($_,1), @keys }, - }; + $top{validations} = \@val; + $top{known_keys} = { map +($_,1), @keys }; + \%top; } @@ -153,26 +151,26 @@ sub compile($pkg, $schema, $validations={}) { my $c = _compile $schema, $validations, 64; - $c->{schema}{type} //= 'scalar'; - $c->{schema}{missing} //= 'create'; - $c->{schema}{trim} //= 1 if $c->{schema}{type} eq 'scalar'; - $c->{schema}{unknown} //= 'remove' if $c->{schema}{type} eq 'hash'; + $c->{type} //= 'scalar'; + $c->{missing} //= 'create'; + $c->{trim} //= 1 if $c->{type} eq 'scalar'; + $c->{unknown} //= 'remove' if $c->{type} eq 'hash'; - confess "Invalid value for 'type': $c->{schema}{type}" if !$type_vals{$c->{schema}{type}}; - confess "Invalid value for 'missing': $c->{schema}{missing}" if !$missing_vals{$c->{schema}{missing}}; - confess "Invalid value for 'unknown': $c->{schema}{unknown}" if exists $c->{schema}{unknown} && !$unknown_vals{$c->{schema}{unknown}}; + confess "Invalid value for 'type': $c->{type}" if !$type_vals{$c->{type}}; + confess "Invalid value for 'missing': $c->{missing}" if !$missing_vals{$c->{missing}}; + confess "Invalid value for 'unknown': $c->{unknown}" if exists $c->{unknown} && !$unknown_vals{$c->{unknown}}; - delete $c->{schema}{default} if ref $c->{schema}{default} eq 'SCALAR' && ${$c->{schema}{default}} eq 'required'; + delete $c->{default} if ref $c->{default} eq 'SCALAR' && ${$c->{default}} eq 'required'; - if (exists $c->{schema}{sort}) { - my $s = $c->{schema}{sort}; - $c->{schema}{sort} = + if (exists $c->{sort}) { + my $s = $c->{sort}; + $c->{sort} = ref $s eq 'CODE' ? $s : $s eq 'str' ? sub($x,$y) { $x cmp $y } : $s eq 'num' ? sub($x,$y) { $x <=> $y } - : confess "Unknown value for 'sort': $c->{schema}{sort}"; + : confess "Unknown value for 'sort': $c->{sort}"; } - $c->{schema}{unique} = sub { $_[0] } if $c->{schema}{unique} && !ref $c->{schema}{unique} && !$c->{schema}{sort}; + $c->{unique} = sub { $_[0] } if $c->{unique} && !ref $c->{unique} && !$c->{sort}; bless $c, $pkg; } @@ -182,14 +180,14 @@ sub _validate_rec { my $c = $_[0]; # hash keys - if ($c->{schema}{keys}) { + if ($c->{keys}) { my @err; - for my ($k, $s) ($c->{schema}{keys}->%*) { + for my ($k, $s) ($c->{keys}->%*) { if (!exists $_[1]{$k}) { - next if $s->{schema}{missing} eq 'ignore'; - return { validation => 'missing', key => $k } if $s->{schema}{missing} eq 'reject'; - $_[1]{$k} = ref $s->{schema}{default} eq 'CODE' ? $s->{schema}{default}->() : $s->{schema}{default} // undef; - next if exists $s->{schema}{default}; + next if $s->{missing} eq 'ignore'; + return { validation => 'missing', key => $k } if $s->{missing} eq 'reject'; + $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; + next if exists $s->{default}; } my $r = _validate($s, $_[1]{$k}); @@ -202,10 +200,10 @@ sub _validate_rec { } # array values - if ($c->{schema}{values}) { + if ($c->{values}) { my @err; for my $i (0..$#{$_[1]}) { - my $r = _validate($c->{schema}{values}, $_[1][$i]); + my $r = _validate($c->{values}, $_[1][$i]); if ($r) { $r->{index} = $i; push @err, $r; @@ -226,8 +224,8 @@ sub _validate_rec { } # func - if ($c->{schema}{func}) { - my $r = $c->{schema}{func}->($_[1]); + if ($c->{func}) { + my $r = $c->{func}->($_[1]); return { %$r, validation => 'func' } if ref $r eq 'HASH'; return { validation => 'func', result => $r } if !$r; } @@ -236,24 +234,24 @@ sub _validate_rec { sub _validate_array { my $c = $_[0]; - return if $c->{schema}{type} ne 'array'; + return if $c->{type} ne 'array'; - $_[1] = [sort { $c->{schema}{sort}->($a, $b) } $_[1]->@* ] if $c->{schema}{sort}; + $_[1] = [sort { $c->{sort}->($a, $b) } $_[1]->@* ] if $c->{sort}; # Key-based uniqueness - if ($c->{schema}{unique} && ref $c->{schema}{unique} eq 'CODE') { + if ($c->{unique} && ref $c->{unique} eq 'CODE') { my %h; for my $i (0..$#{$_[1]}) { - my $k = $c->{schema}{unique}->($_[1][$i]); + my $k = $c->{unique}->($_[1][$i]); return { validation => 'unique', index_a => $h{$k}, value_a => $_[1][$h{$k}], index_b => $i, value_b => $_[1][$i], key => $k } if exists $h{$k}; $h{$k} = $i; } # Comparison-based uniqueness - } elsif ($c->{schema}{unique}) { + } elsif ($c->{unique}) { for my $i (0..$#{$_[1]}-1) { return { validation => 'unique', index_a => $i, value_a => $_[1][$i], index_b => $i+1, value_b => $_[1][$i+1] } - if $c->{schema}{sort}->($_[1][$i], $_[1][$i+1]) == 0 + if $c->{sort}->($_[1][$i], $_[1][$i+1]) == 0 } } } @@ -263,29 +261,29 @@ sub _validate_input { my $c = $_[0]; # trim (needs to be done before the 'default' test) - $_[1] = trim $_[1] =~ s/\r//rg if defined $_[1] && !ref $_[1] && $c->{schema}{type} eq 'scalar' && $c->{schema}{trim}; + $_[1] = trim $_[1] =~ s/\r//rg if defined $_[1] && !ref $_[1] && $c->{type} eq 'scalar' && $c->{trim}; # default if (!defined $_[1] || (!ref $_[1] && $_[1] eq '')) { - if (exists $c->{schema}{default}) { - $_[1] = ref $c->{schema}{default} eq 'CODE' ? $c->{schema}{default}->($_[1]) : $c->{schema}{default}; + if (exists $c->{default}) { + $_[1] = ref $c->{default} eq 'CODE' ? $c->{default}->($_[1]) : $c->{default}; return; } return { validation => 'required' }; } - if ($c->{schema}{type} eq 'scalar') { + if ($c->{type} eq 'scalar') { return { validation => 'type', expected => 'scalar', got => lc ref $_[1] } if ref $_[1]; - } elsif ($c->{schema}{type} eq 'hash') { + } elsif ($c->{type} eq 'hash') { return { validation => 'type', expected => 'hash', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'HASH'; # Each branch below makes a shallow copy of the hash, so that further # validations can perform in-place modifications without affecting the # input. - if ($c->{schema}{unknown} eq 'remove') { + if ($c->{unknown} eq 'remove') { $_[1] = { map +($_, $_[1]{$_}), grep $c->{known_keys}{$_}, keys $_[1]->%* }; - } elsif ($c->{schema}{unknown} eq 'reject') { + } elsif ($c->{unknown} eq 'reject') { my @err = grep !$c->{known_keys}{$_}, keys $_[1]->%*; return { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] } if @err; $_[1] = { $_[1]->%* }; @@ -293,16 +291,16 @@ sub _validate_input { $_[1] = { $_[1]->%* }; } - } elsif ($c->{schema}{type} eq 'array') { - $_[1] = [$_[1]] if $c->{schema}{scalar} && !ref $_[1]; - return { validation => 'type', expected => $c->{schema}{scalar} ? 'array or scalar' : 'array', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'ARRAY'; + } elsif ($c->{type} eq 'array') { + $_[1] = [$_[1]] if $c->{scalar} && !ref $_[1]; + return { validation => 'type', expected => $c->{scalar} ? 'array or scalar' : 'array', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'ARRAY'; $_[1] = [$_[1]->@*]; # Create a shallow copy to prevent in-place modification. - } elsif ($c->{schema}{type} eq 'any') { + } elsif ($c->{type} eq 'any') { # No need to do anything here. } else { - confess "Unknown type '$c->{schema}{type}'"; # Already checked in compile(), but be extra safe + confess "Unknown type '$c->{type}'"; # Already checked in compile(), but be extra safe } &_validate_rec || &_validate_array; @@ -312,8 +310,8 @@ sub _validate_input { sub _validate { my $c = $_[0]; my $r = &_validate_input; - ($r, $_[1]) = (undef, ref $c->{schema}{onerror} eq 'CODE' ? $c->{schema}{onerror}->($_[0], bless $r, 'FU::Validate::err') : $c->{schema}{onerror}) - if $r && exists $c->{schema}{onerror}; + ($r, $_[1]) = (undef, ref $c->{onerror} eq 'CODE' ? $c->{onerror}->($_[0], bless $r, 'FU::Validate::err') : $c->{onerror}) + if $r && exists $c->{onerror}; $r } From 1363e112698c186427bf1689bfa861b97cea0357 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 06:57:59 +0100 Subject: [PATCH 02/76] Validate: allow array schemas + defer known_keys hash creation Doesn't allow multiple 'func' options yet, needs work. --- FU/Validate.pm | 44 +++++++++++++++++++++++++++++--------------- t/validate.t | 5 ++++- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index fd180b8..54b3c77 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -76,7 +76,7 @@ our %default_validations = ( uint => { _reg $re_uint }, # implies num min => sub($min) { +{ num => 1, func => sub { $_[0] >= $min ? 1 : { expected => $min, got => $_[0] } } } }, max => sub($max) { +{ num => 1, func => sub { $_[0] <= $max ? 1 : { expected => $max, got => $_[0] } } } }, - range => sub { +{ min => $_[0][0], max => $_[0][1] } }, + range => sub { [ min => $_[0][0], max => $_[0][1] ] }, ascii => { _reg qr/^[\x20-\x7E]*$/ }, sl => { _reg qr/^[^\t\r\n]+$/ }, @@ -99,10 +99,12 @@ our %default_validations = ( # sub _compile($schema, $validations, $rec) { my(%top, @val); - my @keys = keys $schema->{keys}->%* if $schema->{keys}; - for my($name, $val) (%$schema) { + for my($name, $val) (ref $schema eq 'ARRAY' ? @$schema : %$schema) { if ($builtin{$name}) { + confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val}; + confess "Invalid value for 'missing': $val" if $name eq 'missing' && !$missing_vals{$val}; + confess "Invalid value for 'unknown': $val" if $name eq 'unknown' && !$unknown_vals{$val}; $top{$name} = $schema->{$name}; next; } @@ -117,6 +119,8 @@ sub _compile($schema, $validations, $rec) { push @val, $v; } + my @keys = keys $top{keys}->%* if $top{keys}; + for my ($n,$t) (qw/keys hash unknown hash values array sort array unique array/) { next if !exists $top{$n}; confess "Incompatible types, the schema specifies '$top{type}' but the '$n' validation implies '$t'" if $top{type} && $top{type} ne $t; @@ -132,8 +136,8 @@ sub _compile($schema, $validations, $rec) { exists $t->{$_} and !exists $top{$_} and $top{$_} = delete $t->{$_} for qw/default onerror trim type scalar unknown missing sort unique/; - push @keys, keys %{ delete $t->{known_keys} }; - push @keys, keys %{ $t->{keys} } if $t->{keys}; + push @keys, delete($t->{known_keys})->@* if $t->{known_keys}; + push @keys, keys $t->{keys}->%* if $t->{keys}; } # Compile sub-schemas @@ -141,24 +145,20 @@ sub _compile($schema, $validations, $rec) { $top{values} = __PACKAGE__->compile($top{values}, $validations) if $top{values}; $top{validations} = \@val; - $top{known_keys} = { map +($_,1), @keys }; + $top{known_keys} = \@keys; \%top; } sub compile($pkg, $schema, $validations={}) { return $schema if $schema isa __PACKAGE__; - my $c = _compile $schema, $validations, 64; $c->{type} //= 'scalar'; $c->{missing} //= 'create'; $c->{trim} //= 1 if $c->{type} eq 'scalar'; $c->{unknown} //= 'remove' if $c->{type} eq 'hash'; - - confess "Invalid value for 'type': $c->{type}" if !$type_vals{$c->{type}}; - confess "Invalid value for 'missing': $c->{missing}" if !$missing_vals{$c->{missing}}; - confess "Invalid value for 'unknown': $c->{unknown}" if exists $c->{unknown} && !$unknown_vals{$c->{unknown}}; + $c->{known_keys} = { map +($_,1), $c->{known_keys}->@* } if $c->{known_keys}; delete $c->{default} if ref $c->{default} eq 'SCALAR' && ${$c->{default}} eq 'required'; @@ -420,10 +420,11 @@ validation. These are documented in L below. =head1 SCHEMA DEFINITION -A schema is a hashref, each key is the name of a built-in option or of a -validation to be performed. None of the options or validations are required, -but some built-ins have default values. This means that the empty schema C<{}> -is actually equivalent to: +A schema is an arrayref or hashref, where each key is the name of a built-in +option or of a validation to be performed and the values are the arguments to +those validations. None of the options or validations are required, but some +built-ins have default values. This means that the empty schema C<{}> is +actually equivalent to: { type => 'scalar', trim => 1, @@ -431,6 +432,19 @@ is actually equivalent to: missing => 'create', } +Built-in options are always validated in a fixed order, but the order in which +standard and custom validations are performed is random when the schema is +given as a hashref. This is rarely a problem, but it can in some cases affect +the returned error message or whether a later validation will receive data +normalized by a previous validation. An arrayref can be used to enforce a +validation order: + + [ enum => [1, 2, 'a'], int => 1 ] + +Or to use the same validation multiple times: + + [ regex => qr/^a/, regex => qr/z$/ ] + =head2 Built-in options =over diff --git a/t/validate.t b/t/validate.t index b01352b..2f8d8d9 100644 --- a/t/validate.t +++ b/t/validate.t @@ -131,6 +131,9 @@ f { type => 'hash', length => 1, unknown => 'pass' }, {qw/1 a 2 b/}, { validatio t { type => 'hash', length => 1, keys => {a => {missing=>'ignore'}, b => {missing=>'ignore'}} }, {a=>1}, {a=>1}; t { regex => '^a' }, 'abc', 'abc'; # XXX: Can't use qr// here because t() does dclone(). The 'hex' test covers that case anyway. f { regex => '^a' }, 'cba', { validation => 'regex', regex => '^a', got => 'cba' }, "failed validation 'regex'"; +t [ regex => '^a', regex => 'z$' ], 'abcxyz', 'abcxyz'; +f [ regex => '^a', regex => 'z$' ], 'bcxyz', { validation => 'regex', regex => '^a', got => 'bcxyz' }, "failed validation 'regex'"; +f [ regex => '^a', regex => 'z$' ], 'abcxy', { validation => 'regex', regex => 'z$', got => 'abcxy' }, "failed validation 'regex'"; t { enum => [1,2] }, 1, 1; t { enum => [1,2] }, 2, 2; f { enum => [1,2] }, 3, { validation => 'enum', expected => [1,2], got => 3 }, "failed validation 'enum'"; @@ -205,7 +208,7 @@ t { range => [1,2] }, 1, 1; t { range => [1,2] }, 2, 2; f { range => [1,2] }, 0.9, { validation => 'range', error => { validation => 'min', expected => 1, got => 0.9 } }, "validation 'range': failed validation 'min'"; f { range => [1,2] }, 2.1, { validation => 'range', error => { validation => 'max', expected => 2, got => 2.1 } }, "validation 'range': failed validation 'max'"; -#t { range => [1,2] }, 'a', 'a', { validation => 'range', error => { validation => 'max', error => nerr 'a' } }; # XXX: Error validation type depends on evaluation order +f { range => [1,2] }, 'a', { validation => 'range', error => { validation => 'min', error => (nerr 'a')[0] } }, "validation 'range': validation 'min': failed validation 'num'"; # email template use utf8; From f248a33c1c4cfe0edadf4146681ee2e1b13c0f01 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 09:25:56 +0100 Subject: [PATCH 03/76] Validate: Allow multiple func validations in arrayref schema --- FU/Validate.pm | 63 ++++++++++++++++++++++++++++---------------------- t/validate.t | 6 ++++- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index 54b3c77..5ea79d6 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -23,10 +23,10 @@ my %unknown_vals = map +($_,1), qw/remove reject pass/; my %missing_vals = map +($_,1), qw/create reject ignore/; sub _length($exp, $min, $max) { - +{ func => sub($v) { + [ func => sub($v) { my $got = ref $v eq 'HASH' ? keys %$v : ref $v eq 'ARRAY' ? @$v : length $v; (!defined $min || $got >= $min) && (!defined $max || $got <= $max) ? 1 : { expected => $exp, got => $got }; - }} + }] } # Basically the same as ( regex => $arg ), but hides the regex error @@ -100,7 +100,15 @@ our %default_validations = ( sub _compile($schema, $validations, $rec) { my(%top, @val); - for my($name, $val) (ref $schema eq 'ARRAY' ? @$schema : %$schema) { + # 'func' is always evaluated last in hashref schemas. + my $func = ref $schema eq 'HASH' && $schema->{func}; + + for my($name, $val) (ref $schema eq 'HASH' ? %$schema : @$schema) { + if ($name eq 'func') { + push @val, $val if !$func; + next; + } + if ($builtin{$name}) { confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val}; confess "Invalid value for 'missing': $val" if $name eq 'missing' && !$missing_vals{$val}; @@ -112,13 +120,15 @@ sub _compile($schema, $validations, $rec) { my $t = $validations->{$name} || $default_validations{$name}; confess "Unknown validation: $name" if !$t; confess "Recursion limit exceeded while resolving validation '$name'" if $rec < 1; - $t = ref $t eq 'HASH' ? $t : $t->($val); + $t = ref $t eq 'CODE' ? $t->($val) : $t; my $v = _compile($t, $validations, $rec-1); $v->{name} = $name; push @val, $v; } + push @val, $func if $func; + my @keys = keys $top{keys}->%* if $top{keys}; for my ($n,$t) (qw/keys hash unknown hash values array sort array unique array/) { @@ -128,7 +138,7 @@ sub _compile($schema, $validations, $rec) { } # Inherit some builtin options from validations - for my $t (@val) { + for my $t (grep ref $_ eq 'HASH', @val) { if ($top{type} && $t->{type} && $top{type} ne $t->{type}) { confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{type}'" if $schema->{type}; confess "Incompatible types, '$t->[0]' requires '$t->{type}', but another validation requires '$top{type}'"; @@ -213,21 +223,20 @@ sub _validate_rec { } # validations - for ($c->{validations}->@*) { - my $r = _validate_rec($_, $_[1]); - return { - # If the error was a custom 'func' object, then make that the primary cause. - # This makes it possible for validations to provide their own error objects. - $r->{validation} eq 'func' && (!exists $r->{result} || keys $r->%* > 2) ? $r->%* : (error => $r), - validation => $_->{name}, - } if $r; - } - - # func - if ($c->{func}) { - my $r = $c->{func}->($_[1]); - return { %$r, validation => 'func' } if ref $r eq 'HASH'; - return { validation => 'func', result => $r } if !$r; + for my $v ($c->{validations}->@*) { + if (ref $v eq 'CODE') { + my $r = $v->($_[1]); + return { %$r, validation => 'func' } if ref $r eq 'HASH'; + return { validation => 'func', result => $r } if !$r; + } else { + my $r = _validate_rec($v, $_[1]); + return { + # If the error was a custom 'func' object, then make that the primary cause. + # This makes it possible for validations to provide their own error objects. + $r->{validation} eq 'func' && (!exists $r->{result} || keys $r->%* > 2) ? $r->%* : (error => $r), + validation => $v->{name}, + } if $r; + } } } @@ -669,10 +678,10 @@ assumes the first schema from the previous example. =item func => $sub Run the input through a subroutine to perform additional validation or -normalization. The subroutine is only called after all other validations have -succeeded. The subroutine is called with the input as its only argument. -Normalization of the input can be done by assigning to the first argument or -modifying its value in-place. +normalization. When the schema is a hashref, the subroutine is only called +after all other validations have succeeded. The subroutine is called with the +input as its only argument. Normalization of the input can be done by +assigning to the first argument or modifying its value in-place. On success, the subroutine should return a true value. On failure, it should return either a false value or a hashref. The hashref will have the @@ -873,9 +882,9 @@ considered "known". All other built-in options follow inheritance semantics: These options can be set in a custom validation, and they are inherited by the top-level schema. If -the same option is set in multiple validations a random one will be inherited, -so that's not a good idea. The top-level schema can always override options set -by custom validations. +the same option is set in multiple validations, the final one will be +inherited. The top-level schema can always override options set by custom +validations. =head3 Global custom validations diff --git a/t/validate.t b/t/validate.t index 2f8d8d9..c04288a 100644 --- a/t/validate.t +++ b/t/validate.t @@ -17,6 +17,7 @@ my %validations = ( onerrorsub => { onerror => sub { ref $_[1] } }, collapsews => { trim => 0, func => sub { $_[0] =~ s/\s+/ /g; 1 } }, neverfails => { onerror => 'err' }, + doublefunc => [ func => sub { $_[0] == 0 }, func => sub { $_[0] = 2; 1; } ], revnum => { type => 'array', sort => sub($x,$y) { $y <=> $x } }, uniquelength => { type => 'array', values => { type => 'array' }, unique => sub { scalar @{$_[0]} } }, person => { @@ -38,7 +39,7 @@ sub t($schema, $input, $output) { my $input_copy = dclone([$input])->[0]; my $res = FU::Validate->compile($schema, \%validations)->validate($input); - #diag explain FU::Validate->compile($schema, \%validations) if $line == 139; + #diag explain FU::Validate->compile($schema, \%validations) if $line == 98; is_deeply $schema, $schema_copy, "schema modification $line"; is_deeply $input, $input_copy, "input modification $line"; is_deeply $res, $output, "data ok $line"; @@ -51,6 +52,7 @@ sub f($schema, $input, $error, @msg) { my $input_copy = dclone([$input])->[0]; ok !eval { FU::Validate->compile($schema, \%validations)->validate($input); 1 }, "eval $line"; + #diag explain FU::Validate->compile($schema, \%validations) if $line == 162; is_deeply $schema, $schema_copy, "schema modification $line"; is_deeply $input, $input_copy, "input modification $line"; delete $@->{longmess}; @@ -179,6 +181,8 @@ t { person => 1, unknown => 'remove' }, {name => 'x', sex => 'y', extra => 1}, { t { neverfails => 1, int => 1 }, undef, 'err'; t { neverfails => 1, int => 1 }, 'x', 'err'; t { neverfails => 1, int => 1, onerror => undef }, 'x', undef; # XXX: no way to 'unset' an inherited onerror clause, hmm. +t { doublefunc => 1 }, 0, 2; +f { doublefunc => 1 }, 1, { validation => 'doublefunc', error => { validation => 'func', result => '' } }, "validation 'doublefunc': failed validation 'func'"; # numbers sub nerr { ({ validation => 'num', got => $_[0] }, "failed validation 'num'") } From cea691dd55ed427975aeeb0583b73300bc441fa1 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 10:52:09 +0100 Subject: [PATCH 04/76] Validate: drop creation of default values for built-ins To better support merging multiple validations, which'll come next. Probably. --- FU/Validate.pm | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index 5ea79d6..75af46d 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -164,10 +164,6 @@ sub compile($pkg, $schema, $validations={}) { return $schema if $schema isa __PACKAGE__; my $c = _compile $schema, $validations, 64; - $c->{type} //= 'scalar'; - $c->{missing} //= 'create'; - $c->{trim} //= 1 if $c->{type} eq 'scalar'; - $c->{unknown} //= 'remove' if $c->{type} eq 'hash'; $c->{known_keys} = { map +($_,1), $c->{known_keys}->@* } if $c->{known_keys}; delete $c->{default} if ref $c->{default} eq 'SCALAR' && ${$c->{default}} eq 'required'; @@ -194,8 +190,8 @@ sub _validate_rec { my @err; for my ($k, $s) ($c->{keys}->%*) { if (!exists $_[1]{$k}) { - next if $s->{missing} eq 'ignore'; - return { validation => 'missing', key => $k } if $s->{missing} eq 'reject'; + next if $s->{missing} && $s->{missing} eq 'ignore'; + return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject'; $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; next if exists $s->{default}; } @@ -243,7 +239,6 @@ sub _validate_rec { sub _validate_array { my $c = $_[0]; - return if $c->{type} ne 'array'; $_[1] = [sort { $c->{sort}->($a, $b) } $_[1]->@* ] if $c->{sort}; @@ -269,8 +264,10 @@ sub _validate_array { sub _validate_input { my $c = $_[0]; + my $type = $c->{type} // 'scalar'; + # trim (needs to be done before the 'default' test) - $_[1] = trim $_[1] =~ s/\r//rg if defined $_[1] && !ref $_[1] && $c->{type} eq 'scalar' && $c->{trim}; + $_[1] = trim $_[1] =~ s/\r//rg if defined $_[1] && !ref $_[1] && $type eq 'scalar' && (!exists $c->{trim} || $c->{trim}); # default if (!defined $_[1] || (!ref $_[1] && $_[1] eq '')) { @@ -281,18 +278,18 @@ sub _validate_input { return { validation => 'required' }; } - if ($c->{type} eq 'scalar') { + if ($type eq 'scalar') { return { validation => 'type', expected => 'scalar', got => lc ref $_[1] } if ref $_[1]; - } elsif ($c->{type} eq 'hash') { + } elsif ($type eq 'hash') { return { validation => 'type', expected => 'hash', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'HASH'; # Each branch below makes a shallow copy of the hash, so that further # validations can perform in-place modifications without affecting the # input. - if ($c->{unknown} eq 'remove') { + if (!$c->{unknown} || $c->{unknown} eq 'remove') { $_[1] = { map +($_, $_[1]{$_}), grep $c->{known_keys}{$_}, keys $_[1]->%* }; - } elsif ($c->{unknown} eq 'reject') { + } elsif ($c->{unknown} && $c->{unknown} eq 'reject') { my @err = grep !$c->{known_keys}{$_}, keys $_[1]->%*; return { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] } if @err; $_[1] = { $_[1]->%* }; @@ -300,19 +297,16 @@ sub _validate_input { $_[1] = { $_[1]->%* }; } - } elsif ($c->{type} eq 'array') { + } elsif ($type eq 'array') { $_[1] = [$_[1]] if $c->{scalar} && !ref $_[1]; return { validation => 'type', expected => $c->{scalar} ? 'array or scalar' : 'array', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'ARRAY'; $_[1] = [$_[1]->@*]; # Create a shallow copy to prevent in-place modification. - } elsif ($c->{type} eq 'any') { + } elsif ($type eq 'any') { # No need to do anything here. - - } else { - confess "Unknown type '$c->{type}'"; # Already checked in compile(), but be extra safe } - &_validate_rec || &_validate_array; + &_validate_rec || ($type eq 'array' && &_validate_array); } From fa24ca53e3e998187400ebb28119f942fa1d8a01 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 14:28:26 +0100 Subject: [PATCH 05/76] Validate: improved arrayref-schema semantics This allows all built-in options to be duplicated inside a single schema, the semantics of which are the same as the kind of merging done as part of inheriting options from custom validations. This also causes all 'keys' and 'values' validation schemas to be merged, which changes error messages a bit but is great for introspection. Probably slightly improves performance as well. --- FU/Validate.pm | 217 ++++++++++++++++++++++++------------------------- t/validate.t | 13 ++- 2 files changed, 114 insertions(+), 116 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index 75af46d..feeabb3 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -21,6 +21,11 @@ my %builtin = map +($_,1), qw/ my %type_vals = map +($_,1), qw/scalar hash array any/; my %unknown_vals = map +($_,1), qw/remove reject pass/; my %missing_vals = map +($_,1), qw/create reject ignore/; +my %implied_type = qw/keys hash unknown hash values array sort array unique array/; +my %sort_vals = ( + str => sub($x,$y) { $x cmp $y }, + num => sub($x,$y) { $x <=> $y }, +); sub _length($exp, $min, $max) { [ func => sub($v) { @@ -89,136 +94,119 @@ our %default_validations = ( ); -# Loads a hashref of validations and a schema definition, and converts it into -# an object with: -# -# name => $name_or_undef, -# validations => [ $recursive_compiled_object, .. ], -# known_keys => { $key => 1, .. } # Extracted from 'keys', Used for the 'unknown' validation -# %builtin_validations -# -sub _compile($schema, $validations, $rec) { - my(%top, @val); +sub _new { bless { validations => [], @_ }, __PACKAGE__ } - # 'func' is always evaluated last in hashref schemas. - my $func = ref $schema eq 'HASH' && $schema->{func}; - for my($name, $val) (ref $schema eq 'HASH' ? %$schema : @$schema) { +sub _compile($schema, $custom, $rec, $top, $validations=$top->{validations}) { + # For hashref schemas, builtins always override other validations + $schema = [ + map +($_, $schema->{$_}), + (grep !$builtin{$_}, keys %$schema), + (grep $builtin{$_}, keys %$schema), + ] if ref $schema eq 'HASH'; + + for my($name, $val) (@$schema) { + if ($name eq 'type') { + confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val}; + confess "Incompatible types, the schema specifies '$val', but another validation requires '$top->{type}'" if $top->{type} && $top->{type} ne $val;; + $top->{type} = $val; + next; + } + + my $type = $implied_type{$name}; + if ($type) { + confess "Incompatible types, the schema specifies '$top->{type}' but the '$name' validation implies '$type'" if $top->{type} && $top->{type} ne $type; + $top->{type} = $type; + } + + if ($name eq 'values') { + $top->{values} ||= _new; + _compile($val, $custom, $rec-1, $top->{values}); + next; + } + + if ($name eq 'keys') { + $top->{keys} ||= {}; + for my($n,$v) (%$val) { + $top->{keys}{$n} ||= _new; + _compile($v, $custom, $rec-1, $top->{keys}{$n}); + } + next; + } + if ($name eq 'func') { - push @val, $val if !$func; + push @$validations, $val; + next; + } + + if ($name eq 'default') { + $top->{default} = $val; + delete $top->{default} if ref $val eq 'SCALAR' && $$val eq 'required'; next; } if ($builtin{$name}) { - confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val}; confess "Invalid value for 'missing': $val" if $name eq 'missing' && !$missing_vals{$val}; confess "Invalid value for 'unknown': $val" if $name eq 'unknown' && !$unknown_vals{$val}; - $top{$name} = $schema->{$name}; + $val = $sort_vals{$val} || confess "Unknown value for 'sort': $val" if $name eq 'sort' && ref $val ne 'CODE'; + $top->{$name} = $val; next; } - my $t = $validations->{$name} || $default_validations{$name}; + my $t = $custom->{$name} || $default_validations{$name}; confess "Unknown validation: $name" if !$t; confess "Recursion limit exceeded while resolving validation '$name'" if $rec < 1; $t = ref $t eq 'CODE' ? $t->($val) : $t; - my $v = _compile($t, $validations, $rec-1); - $v->{name} = $name; - push @val, $v; + my $v = _new name => $name; + _compile($t, $custom, $rec-1, $top, $v->{validations}); + push @$validations, $v if $v->{validations}->@*; } - - push @val, $func if $func; - - my @keys = keys $top{keys}->%* if $top{keys}; - - for my ($n,$t) (qw/keys hash unknown hash values array sort array unique array/) { - next if !exists $top{$n}; - confess "Incompatible types, the schema specifies '$top{type}' but the '$n' validation implies '$t'" if $top{type} && $top{type} ne $t; - $top{type} = $t; - } - - # Inherit some builtin options from validations - for my $t (grep ref $_ eq 'HASH', @val) { - if ($top{type} && $t->{type} && $top{type} ne $t->{type}) { - confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{type}'" if $schema->{type}; - confess "Incompatible types, '$t->[0]' requires '$t->{type}', but another validation requires '$top{type}'"; - } - exists $t->{$_} and !exists $top{$_} and $top{$_} = delete $t->{$_} - for qw/default onerror trim type scalar unknown missing sort unique/; - - push @keys, delete($t->{known_keys})->@* if $t->{known_keys}; - push @keys, keys $t->{keys}->%* if $t->{keys}; - } - - # Compile sub-schemas - $top{keys} = { map +($_, __PACKAGE__->compile($top{keys}{$_}, $validations)), keys $top{keys}->%* } if $top{keys}; - $top{values} = __PACKAGE__->compile($top{values}, $validations) if $top{values}; - - $top{validations} = \@val; - $top{known_keys} = \@keys; - \%top; } -sub compile($pkg, $schema, $validations={}) { +sub compile($pkg, $schema, $custom={}) { return $schema if $schema isa __PACKAGE__; - my $c = _compile $schema, $validations, 64; + my $c = _new; + _compile $schema, $custom, 64, $c; + $c +} - $c->{known_keys} = { map +($_,1), $c->{known_keys}->@* } if $c->{known_keys}; - delete $c->{default} if ref $c->{default} eq 'SCALAR' && ${$c->{default}} eq 'required'; +sub _validate_keys { + my @err; + for my ($k, $s) ($_[0]{keys}->%*) { + if (!exists $_[1]{$k}) { + next if $s->{missing} && $s->{missing} eq 'ignore'; + return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject'; + $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; + next if exists $s->{default}; + } - if (exists $c->{sort}) { - my $s = $c->{sort}; - $c->{sort} = - ref $s eq 'CODE' ? $s - : $s eq 'str' ? sub($x,$y) { $x cmp $y } - : $s eq 'num' ? sub($x,$y) { $x <=> $y } - : confess "Unknown value for 'sort': $c->{sort}"; + my $r = _validate($s, $_[1]{$k}); + if ($r) { + $r->{key} = $k; + push @err, $r; + } } - $c->{unique} = sub { $_[0] } if $c->{unique} && !ref $c->{unique} && !$c->{sort}; + return { validation => 'keys', errors => [ sort { $a->{key} cmp $b->{key} } @err ] } if @err; +} - bless $c, $pkg; +sub _validate_values { + my @err; + for my $i (0..$#{$_[1]}) { + my $r = _validate($_[0]{values}, $_[1][$i]); + if ($r) { + $r->{index} = $i; + push @err, $r; + } + } + return { validation => 'values', errors => \@err } if @err; } sub _validate_rec { my $c = $_[0]; - - # hash keys - if ($c->{keys}) { - my @err; - for my ($k, $s) ($c->{keys}->%*) { - if (!exists $_[1]{$k}) { - next if $s->{missing} && $s->{missing} eq 'ignore'; - return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject'; - $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; - next if exists $s->{default}; - } - - my $r = _validate($s, $_[1]{$k}); - if ($r) { - $r->{key} = $k; - push @err, $r; - } - } - return { validation => 'keys', errors => \@err } if @err; - } - - # array values - if ($c->{values}) { - my @err; - for my $i (0..$#{$_[1]}) { - my $r = _validate($c->{values}, $_[1][$i]); - if ($r) { - $r->{index} = $i; - push @err, $r; - } - } - return { validation => 'values', errors => \@err } if @err; - } - - # validations for my $v ($c->{validations}->@*) { if (ref $v eq 'CODE') { my $r = $v->($_[1]); @@ -243,10 +231,10 @@ sub _validate_array { $_[1] = [sort { $c->{sort}->($a, $b) } $_[1]->@* ] if $c->{sort}; # Key-based uniqueness - if ($c->{unique} && ref $c->{unique} eq 'CODE') { + if ($c->{unique} && (!$c->{sort} || ref $c->{unique} eq 'CODE')) { my %h; for my $i (0..$#{$_[1]}) { - my $k = $c->{unique}->($_[1][$i]); + my $k = ref $c->{unique} eq 'CODE' ? $c->{unique}->($_[1][$i]) : $_[1][$i]; return { validation => 'unique', index_a => $h{$k}, value_a => $_[1][$h{$k}], index_b => $i, value_b => $_[1][$i], key => $k } if exists $h{$k}; $h{$k} = $i; } @@ -288,10 +276,10 @@ sub _validate_input { # validations can perform in-place modifications without affecting the # input. if (!$c->{unknown} || $c->{unknown} eq 'remove') { - $_[1] = { map +($_, $_[1]{$_}), grep $c->{known_keys}{$_}, keys $_[1]->%* }; + $_[1] = { map +($_, $_[1]{$_}), grep $c->{keys}{$_}, keys $_[1]->%* }; } elsif ($c->{unknown} && $c->{unknown} eq 'reject') { - my @err = grep !$c->{known_keys}{$_}, keys $_[1]->%*; - return { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] } if @err; + my @err = grep !$c->{keys}{$_}, keys $_[1]->%*; + return { validation => 'unknown', keys => \@err, expected => [ sort keys $c->{keys}->%* ] } if @err; $_[1] = { $_[1]->%* }; } else { $_[1] = { $_[1]->%* }; @@ -306,7 +294,10 @@ sub _validate_input { # No need to do anything here. } - &_validate_rec || ($type eq 'array' && &_validate_array); + ($c->{keys} && &_validate_keys) || + ($c->{values} && &_validate_values) || + &_validate_rec || + ($type eq 'array' && &_validate_array) } @@ -866,13 +857,15 @@ error: int => 1 }); -The I, I and C built-in options are validated separately -for each custom validation. So if you have multiple custom validations that set -the I option, then the array elements must validate all the listed -schemas. The same applies to I: If the same key is listed in multiple -custom validations, then the key must conform to all schemas. With respect to -the I option, a key that is mentioned in any of the I options is -considered "known". +The I option is validated separately for each custom validation. + +Multiple I and I validations are merged into a single validation. +So if you have multiple custom validations that set the I option, a +single combined schema is created that validates all array elements. The same +applies to I: if the same key is listed in multiple custom validations, +then the key must conform to all schemas. With respect to the I +option, a key that is mentioned in any of the I options is considered +"known". All other built-in options follow inheritance semantics: These options can be set in a custom validation, and they are inherited by the top-level schema. If diff --git a/t/validate.t b/t/validate.t index c04288a..7533a02 100644 --- a/t/validate.t +++ b/t/validate.t @@ -38,8 +38,8 @@ sub t($schema, $input, $output) { my $schema_copy = dclone([$schema])->[0]; my $input_copy = dclone([$input])->[0]; + #diag explain FU::Validate->compile($schema, \%validations) if $line == 95; my $res = FU::Validate->compile($schema, \%validations)->validate($input); - #diag explain FU::Validate->compile($schema, \%validations) if $line == 98; is_deeply $schema, $schema_copy, "schema modification $line"; is_deeply $input, $input_copy, "input modification $line"; is_deeply $res, $output, "data ok $line"; @@ -51,8 +51,8 @@ sub f($schema, $input, $error, @msg) { my $schema_copy = dclone([$schema])->[0]; my $input_copy = dclone([$input])->[0]; + #diag explain FU::Validate->compile($schema, \%validations) if $line == 176; ok !eval { FU::Validate->compile($schema, \%validations)->validate($input); 1 }, "eval $line"; - #diag explain FU::Validate->compile($schema, \%validations) if $line == 162; is_deeply $schema, $schema_copy, "schema modification $line"; is_deeply $input, $input_copy, "input modification $line"; delete $@->{longmess}; @@ -67,6 +67,7 @@ f {}, '', { validation => 'required' }, 'required value missing'; f {}, undef, { validation => 'required' }, 'required value missing'; t { default => undef }, undef, undef; t { default => undef }, '', undef; +f { default => \'required' }, '', { validation => 'required' }, 'required value missing'; t { defaultsub1 => 1 }, undef, 2; t { defaultsub2 => 1 }, undef, ''; t { defaultsub2 => 1 }, '', 1; @@ -115,6 +116,10 @@ t { type => 'hash', keys => { a=>{} }, unknown => 'pass' }, { a=>1,b=>1 }, { a=> t { type => 'hash', setundef => 1 }, {}, undef; t { type => 'hash', unknown => 'reject', keys => { a=>{ type => 'any', setundef => 1}} }, {a=>[]}, {a=>undef}; +t [ keys => { a => {} }, keys => { b => {} } ], {a=>1, b=>2}, {a=>1, b=>2}; +f [ keys => { a => {} }, keys => { b => {} } ], {a=>1}, { validation => 'keys', errors => [{ key => 'b', validation => 'required' }] }, '.b: required value missing'; +f [ keys => { a => {} }, keys => { a => { int => 1 } } ], {a=>'abc'}, { validation => 'keys', errors => [{ key => 'a', validation => 'int', got => 'abc' }] }, ".a: failed validation 'int'"; + # default validations f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }, "failed validation 'minlength'"; t { minlength => 3 }, 'abc', 'abc'; @@ -171,9 +176,9 @@ t { collapsews => 1 }, ' x ', ' x '; t { collapsews => 1, trim => 1 }, ' x ', 'x'; f { person => 1 }, 1, { validation => 'type', expected => 'hash', got => 'scalar' }, "invalid type, expected 'hash' but got 'scalar'"; t { person => 1, default => 1 }, undef, 1; -f { person => 1 }, { sex => 1 }, { validation => 'person', error => { validation => 'keys', errors => [{ key => 'name', validation => 'required' }] } }, "validation 'person'.name: required value missing"; +f { person => 1 }, { sex => 1 }, { validation => 'keys', errors => [{ key => 'name', validation => 'required' }] }, ".name: required value missing"; t { person => 1 }, { sex => undef, name => 'y' }, { sex => 1, name => 'y' }; -f { person => 1, keys => {age => {default => \'required'}} }, {name => 'x', sex => 'y'}, { validation => 'keys', errors => [{ key => 'age', validation => 'required' }] }, '.age: required value missing'; +f { person => 1, keys => {age => {missing => 'reject'}} }, {name => 'x', sex => 'y'}, { key => 'age', validation => 'missing' }, '.age: required key missing'; t { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => 1}, { name => 'x', sex => 'y', extra => 1 }; f { person => 1, keys => {extra => {}} }, {name => 'x', sex => 'y', extra => ''}, { validation => 'keys', errors => [{ key => 'extra', validation => 'required' }] }, '.extra: required value missing'; t { person => 1 }, {name => 'x', sex => 'y', extra => 1}, {name => 'x', sex => 'y', extra => 1}; From 3fad7feec3770034dd7af22cc219ea342b289645 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Fri, 14 Mar 2025 16:44:08 +0100 Subject: [PATCH 06/76] Validate: Rename "values"->"elems", repurpose "values" to validate hash values I'm breaking stuff left and right while I still can. Idea: "key_names" validation? Idea: "tuple" validation that works like "keys" but for arrays. (i.e. { tuple => { $index => $schema } }, could make "missing" and "unknown" work for arrays, too) --- FU/Validate.pm | 120 ++++++++++++++++++++++++++++++------------------- t/validate.t | 13 +++--- 2 files changed, 82 insertions(+), 51 deletions(-) diff --git a/FU/Validate.pm b/FU/Validate.pm index feeabb3..8d8db91 100644 --- a/FU/Validate.pm +++ b/FU/Validate.pm @@ -13,15 +13,18 @@ my %builtin = map +($_,1), qw/ default onerror trim - values scalar sort unique - keys unknown missing + elems scalar sort unique + keys values unknown missing func /; my %type_vals = map +($_,1), qw/scalar hash array any/; my %unknown_vals = map +($_,1), qw/remove reject pass/; my %missing_vals = map +($_,1), qw/create reject ignore/; -my %implied_type = qw/keys hash unknown hash values array sort array unique array/; +my %implied_type = qw/ + keys hash values hash unknown hash + elems array sort array unique array scalar array +/; my %sort_vals = ( str => sub($x,$y) { $x cmp $y }, num => sub($x,$y) { $x <=> $y }, @@ -119,9 +122,9 @@ sub _compile($schema, $custom, $rec, $top, $validations=$top->{validations}) { $top->{type} = $type; } - if ($name eq 'values') { - $top->{values} ||= _new; - _compile($val, $custom, $rec-1, $top->{values}); + if ($name eq 'elems' || $name eq 'values') { + $top->{$name} ||= _new; + _compile($val, $custom, $rec-1, $top->{$name}); next; } @@ -173,35 +176,51 @@ sub compile($pkg, $schema, $custom={}) { } -sub _validate_keys { - my @err; - for my ($k, $s) ($_[0]{keys}->%*) { - if (!exists $_[1]{$k}) { - next if $s->{missing} && $s->{missing} eq 'ignore'; - return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject'; - $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; - next if exists $s->{default}; - } +sub _validate_hash { + my $c = $_[0]; - my $r = _validate($s, $_[1]{$k}); - if ($r) { - $r->{key} = $k; - push @err, $r; + if ($c->{keys}) { + my @err; + for my ($k, $s) ($c->{keys}->%*) { + if (!exists $_[1]{$k}) { + next if $s->{missing} && $s->{missing} eq 'ignore'; + return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject'; + $_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef; + next if exists $s->{default}; + } + + my $r = _validate($s, $_[1]{$k}); + if ($r) { + $r->{key} = $k; + push @err, $r; + } } + return { validation => 'keys', errors => [ sort { $a->{key} cmp $b->{key} } @err ] } if @err; + } + + if ($c->{values}) { + my @err; + for my ($k, $v) ($_[1]->%*) { + my $r = _validate($c->{values}, $v); + if ($r) { + $r->{key} = $k; + push @err, $r; + } + } + return { validation => 'values', errors => [ sort { $a->{key} cmp $b->{key} } @err ] } if @err; } - return { validation => 'keys', errors => [ sort { $a->{key} cmp $b->{key} } @err ] } if @err; } -sub _validate_values { +sub _validate_elems { my @err; for my $i (0..$#{$_[1]}) { - my $r = _validate($_[0]{values}, $_[1][$i]); + my $r = _validate($_[0]{elems}, $_[1][$i]); if ($r) { $r->{index} = $i; push @err, $r; } } - return { validation => 'values', errors => \@err } if @err; + return { validation => 'elems', errors => \@err } if @err; } @@ -275,14 +294,14 @@ sub _validate_input { # Each branch below makes a shallow copy of the hash, so that further # validations can perform in-place modifications without affecting the # input. - if (!$c->{unknown} || $c->{unknown} eq 'remove') { + if (!$c->{keys} || ($c->{unknown} && $c->{unknown} eq 'pass')) { + $_[1] = { $_[1]->%* }; + } elsif (!$c->{unknown} || $c->{unknown} eq 'remove') { $_[1] = { map +($_, $_[1]{$_}), grep $c->{keys}{$_}, keys $_[1]->%* }; - } elsif ($c->{unknown} && $c->{unknown} eq 'reject') { + } else { my @err = grep !$c->{keys}{$_}, keys $_[1]->%*; return { validation => 'unknown', keys => \@err, expected => [ sort keys $c->{keys}->%* ] } if @err; $_[1] = { $_[1]->%* }; - } else { - $_[1] = { $_[1]->%* }; } } elsif ($type eq 'array') { @@ -294,8 +313,8 @@ sub _validate_input { # No need to do anything here. } - ($c->{keys} && &_validate_keys) || - ($c->{values} && &_validate_values) || + ($type eq 'hash' && &_validate_hash) || + ($c->{elems} && &_validate_elems) || &_validate_rec || ($type eq 'array' && &_validate_array) } @@ -340,8 +359,9 @@ sub errors($e, $prefix='') { my $val = $e->{validation}; my $p = $prefix ? "$prefix: " : ''; $val eq 'keys' ? map errors($_, $prefix.'.'._fmtkey($_->{key})), $e->{errors}->@* : + $val eq 'values' ? map errors($_, $prefix.'.'._fmtkey($_->{key})), $e->{errors}->@* : $val eq 'missing' ? $prefix.'.'._fmtkey($e->{key}).': required key missing' : - $val eq 'values' ? map errors($_, $prefix."[$_->{index}]"), $e->{errors}->@* : + $val eq 'elems' ? map errors($_, $prefix."[$_->{index}]"), $e->{errors}->@* : $val eq 'unique' ? $prefix."[$e->{index_b}] value '"._fmtval($e->{value_a})."' duplicated" : $val eq 'required' ? "${p}required value missing" : $val eq 'type' ? "${p}invalid type, expected '$e->{expected}' but got '$e->{got}'" : @@ -374,10 +394,10 @@ validate the format and the structure of the data, but it does not support validations that depend on other input values. For example, it is not possible to specify that the contents of a I field must be equivalent to that of a I field, but you can specify that both fields need to be -filled out. Recursive data structures are not supported. There is also no -built-in support for validating hashes with dynamic keys or arrays where not -all elements conform to the same schema. These could technically still be -validated with custom validations, but it won't be as convenient. +filled out. Recursive data structures are not supported. There is also no good +support for validating hashes with dynamic keys or arrays where not all +elements conform to the same schema. These could technically still be validated +with custom validations, but it won't be as convenient. This module is designed to validate any kind of program input after it has been parsed into a Perl data structure. It should not be used to validate function @@ -521,14 +541,22 @@ like: ] } +=item values => $schema + +Implies C<< type => 'hash' >>, set a schema that is used to validate every hash +value. Can be used together with I, in which case values must validate +both this C<$schema> and the schema corresponding to the key. + =item unknown => $option Implies C<< type => 'hash' >>, this option specifies what to do with keys in the input data that have not been defined in the I option. Possible values are I to remove unknown keys from the output data (this is the default), I to return an error if there are unknown keys in the input, -or I to pass through any unknown keys to the output data. Note that the -values for passed-through keys are not validated against any schema! +or I to pass through any unknown keys to the output data. Values for +passed-through keys are only validated when the I option is set, +otherwise they are passed through as-is. This option has no effect when the +I option is never set, in that case all values are always passed through. In the case of I, the error object will look like: @@ -549,7 +577,8 @@ undef), I to return an error if the option is missing or I to leave the key out of the returned data. The default is I, but if no I option is set for this key then -that is effectively the same as I. +that is effectively the same as I. Values created through I are +still validated through I if that has been set. In the case of I, the error object will look like: @@ -557,15 +586,15 @@ In the case of I, the error object will look like: key => 'field' } -=item values => $schema +=item elems => $schema Implies C<< type => 'array' >>, this defines the schema that is applied to -every item in the array. The schema definition may be a bare hashref or a +every element in the array. The schema definition may be a bare hashref or a validator returned by C. Failure is reported in a similar fashion to I: - { validation => 'values', + { validation => 'elems', errors => [ { index => 1, validation => 'required' } ] @@ -626,8 +655,7 @@ All of that may sound complicated, but it's quite easy to use. Here's a few examples: # This describes an array of hashes with keys 'id' and 'name'. - { values => { - type => 'hash', + { elems => { keys => { id => { uint => 1 }, name => {} @@ -641,7 +669,7 @@ examples: # Contrived example: An array of strings, and we want # each string to start with a different character. - { values => { minlength => 1 }, + { elems => { minlength => 1 }, unique => sub { substr $_[0], 0, 1 } } @@ -845,7 +873,7 @@ used in that schema may get input with whitespace around it. All validations used in a schema need to agree upon a single I option. If a custom validation does not specify a I option (and no type is -implied by another validation such as I or I), then the +implied by another validation such as I or I), then the validation should work with every type. It is an error to define a schema that mixes validations of different types. For example, the following throws an error: @@ -859,8 +887,8 @@ error: The I option is validated separately for each custom validation. -Multiple I and I validations are merged into a single validation. -So if you have multiple custom validations that set the I option, a +Multiple I and I validations are merged into a single validation. +So if you have multiple custom validations that set the I option, a single combined schema is created that validates all array elements. The same applies to I: if the same key is listed in multiple custom validations, then the key must conform to all schemas. With respect to the I diff --git a/t/validate.t b/t/validate.t index 7533a02..de37264 100644 --- a/t/validate.t +++ b/t/validate.t @@ -19,7 +19,7 @@ my %validations = ( neverfails => { onerror => 'err' }, doublefunc => [ func => sub { $_[0] == 0 }, func => sub { $_[0] = 2; 1; } ], revnum => { type => 'array', sort => sub($x,$y) { $y <=> $x } }, - uniquelength => { type => 'array', values => { type => 'array' }, unique => sub { scalar @{$_[0]} } }, + uniquelength => { elems => { type => 'array' }, unique => sub { scalar @{$_[0]} } }, person => { type => 'hash', unknown => 'pass', @@ -85,8 +85,8 @@ f { type => 'array' }, 1, { validation => 'type', expected => 'array', got => 's t { type => 'array' }, [], []; t { type => 'array' }, [undef,1,2,{}], [undef,1,2,{}]; t { type => 'array', scalar => 1 }, 1, [1]; -f { type => 'array', values => {} }, [undef], { validation => 'values', errors => [{ index => 0, validation => 'required' }] }, "[0]: required value missing"; -t { type => 'array', values => {} }, [' a '], ['a']; +f { type => 'array', elems => {} }, [undef], { validation => 'elems', errors => [{ index => 0, validation => 'required' }] }, "[0]: required value missing"; +t { type => 'array', elems => {} }, [' a '], ['a']; t { type => 'array', sort => 'str' }, [qw/20 100 3/], [qw/100 20 3/]; t { type => 'array', sort => 'num' }, [qw/20 100 3/], [qw/3 20 100/]; t { revnum => 1 }, [qw/20 100 3/], [qw/100 20 3/]; @@ -97,12 +97,12 @@ f { type => 'array', unique => 1 }, [qw/3 1 3/], { validation => 'unique', index t { uniquelength => 1 }, [[],[1],[1,2]], [[],[1],[1,2]]; f { uniquelength => 1 }, [[],[1],[2]], { validation => 'unique', index_a => 1, value_a => [1], index_b => 2, value_b => [2], key => 1 }, q{[2] value '[1]' duplicated}; t { type => 'array', setundef => 1 }, [], undef; -t { type => 'array', values => { type => 'any', setundef => 1 } }, [[]], [undef]; +t { type => 'array', elems => { type => 'any', setundef => 1 } }, [[]], [undef]; # hashes f { type => 'hash' }, [], { validation => 'type', expected => 'hash', got => 'array' }, "invalid type, expected 'hash' but got 'array'"; f { type => 'hash' }, 'a', { validation => 'type', expected => 'hash', got => 'scalar' }, "invalid type, expected 'hash' but got 'scalar'"; -t { type => 'hash' }, {a=>[],b=>undef,c=>{}}, {}; +t { type => 'hash' }, {a=>[],b=>undef,c=>{}}, {a=>[],b=>undef,c=>{}}; f { type => 'hash', keys => { a=>{} } }, {}, { validation => 'keys', errors => [{ key => 'a', validation => 'required' }] }, '.a: required value missing'; t { type => 'hash', keys => { a=>{missing=>'ignore'} } }, {}, {}; t { type => 'hash', keys => { a=>{default=>undef} } }, {}, {a=>undef}; @@ -120,6 +120,9 @@ t [ keys => { a => {} }, keys => { b => {} } ], {a=>1, b=>2}, {a=>1, b=>2}; f [ keys => { a => {} }, keys => { b => {} } ], {a=>1}, { validation => 'keys', errors => [{ key => 'b', validation => 'required' }] }, '.b: required value missing'; f [ keys => { a => {} }, keys => { a => { int => 1 } } ], {a=>'abc'}, { validation => 'keys', errors => [{ key => 'a', validation => 'int', got => 'abc' }] }, ".a: failed validation 'int'"; +t { values => { int => 1 } }, { a => -1, b => 1 }, { a => -1, b => 1 }; +f { values => { int => 1 } }, { a => undef }, { validation => 'values', errors => [{ key => 'a', validation => 'required' }] }, '.a: required value missing'; + # default validations f { minlength => 3 }, 'ab', { validation => 'minlength', expected => 3, got => 2 }, "failed validation 'minlength'"; t { minlength => 3 }, 'abc', 'abc'; From f8fe53cba90f5b39ae4666aee97d053a421c61ef Mon Sep 17 00:00:00 2001 From: Yorhel Date: Sun, 16 Mar 2025 15:03:32 +0100 Subject: [PATCH 07/76] json_format: Add html_safe option --- FU/Util.pm | 27 ++++++++++++++++++++------- c/jsonfmt.c | 29 ++++++++++++++++------------- t/json_format.t | 1 + 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/FU/Util.pm b/FU/Util.pm index cd34fd3..a9cca1d 100644 --- a/FU/Util.pm +++ b/FU/Util.pm @@ -212,13 +212,6 @@ roughly similar to: JSON::PP->new->allow_nonref->core_bools->convert_blessed->encode($scalar); -Some modules escape the slash character in encoded strings to prevent a -potential XSS vulnerability when embedding JSON inside C<< >> tags. This function does I do that because it might not even -be sufficient. The following is probably an improvement: - - json_format($data) =~ s{>, C<< > >> and C<< & >> as Unicode escapes. +Commonly used to embed data inside a HTML page: + + $html = ''; + +This option does NOT make it safe to include the encoded JSON as an attribute +value. There is no way to do that without violating JSON specs, so you should +use entity escaping instead. + +Some JSON modules escape the forward slash (C) character instead, but that +is, at best, B sufficient for embedding inside a C<<