Validate: improved arrayref-schema semantics

This allows all built-in options to be duplicated inside a single
schema, the semantics of which are the same as the kind of merging
done as part of inheriting options from custom validations.

This also causes all 'keys' and 'values' validation schemas to be
merged, which changes error messages a bit but is great for
introspection. Probably slightly improves performance as well.
This commit is contained in:
Yorhel 2025-03-14 14:28:26 +01:00
parent cea691dd55
commit fa24ca53e3
2 changed files with 114 additions and 116 deletions

View file

@ -21,6 +21,11 @@ my %builtin = map +($_,1), qw/
my %type_vals = map +($_,1), qw/scalar hash array any/;
my %unknown_vals = map +($_,1), qw/remove reject pass/;
my %missing_vals = map +($_,1), qw/create reject ignore/;
my %implied_type = qw/keys hash unknown hash values array sort array unique array/;
my %sort_vals = (
str => sub($x,$y) { $x cmp $y },
num => sub($x,$y) { $x <=> $y },
);
sub _length($exp, $min, $max) {
[ func => sub($v) {
@ -89,136 +94,119 @@ our %default_validations = (
);
# Loads a hashref of validations and a schema definition, and converts it into
# an object with:
#
# name => $name_or_undef,
# validations => [ $recursive_compiled_object, .. ],
# known_keys => { $key => 1, .. } # Extracted from 'keys', Used for the 'unknown' validation
# %builtin_validations
#
sub _compile($schema, $validations, $rec) {
my(%top, @val);
sub _new { bless { validations => [], @_ }, __PACKAGE__ }
# 'func' is always evaluated last in hashref schemas.
my $func = ref $schema eq 'HASH' && $schema->{func};
for my($name, $val) (ref $schema eq 'HASH' ? %$schema : @$schema) {
sub _compile($schema, $custom, $rec, $top, $validations=$top->{validations}) {
# For hashref schemas, builtins always override other validations
$schema = [
map +($_, $schema->{$_}),
(grep !$builtin{$_}, keys %$schema),
(grep $builtin{$_}, keys %$schema),
] if ref $schema eq 'HASH';
for my($name, $val) (@$schema) {
if ($name eq 'type') {
confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val};
confess "Incompatible types, the schema specifies '$val', but another validation requires '$top->{type}'" if $top->{type} && $top->{type} ne $val;;
$top->{type} = $val;
next;
}
my $type = $implied_type{$name};
if ($type) {
confess "Incompatible types, the schema specifies '$top->{type}' but the '$name' validation implies '$type'" if $top->{type} && $top->{type} ne $type;
$top->{type} = $type;
}
if ($name eq 'values') {
$top->{values} ||= _new;
_compile($val, $custom, $rec-1, $top->{values});
next;
}
if ($name eq 'keys') {
$top->{keys} ||= {};
for my($n,$v) (%$val) {
$top->{keys}{$n} ||= _new;
_compile($v, $custom, $rec-1, $top->{keys}{$n});
}
next;
}
if ($name eq 'func') {
push @val, $val if !$func;
push @$validations, $val;
next;
}
if ($name eq 'default') {
$top->{default} = $val;
delete $top->{default} if ref $val eq 'SCALAR' && $$val eq 'required';
next;
}
if ($builtin{$name}) {
confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val};
confess "Invalid value for 'missing': $val" if $name eq 'missing' && !$missing_vals{$val};
confess "Invalid value for 'unknown': $val" if $name eq 'unknown' && !$unknown_vals{$val};
$top{$name} = $schema->{$name};
$val = $sort_vals{$val} || confess "Unknown value for 'sort': $val" if $name eq 'sort' && ref $val ne 'CODE';
$top->{$name} = $val;
next;
}
my $t = $validations->{$name} || $default_validations{$name};
my $t = $custom->{$name} || $default_validations{$name};
confess "Unknown validation: $name" if !$t;
confess "Recursion limit exceeded while resolving validation '$name'" if $rec < 1;
$t = ref $t eq 'CODE' ? $t->($val) : $t;
my $v = _compile($t, $validations, $rec-1);
$v->{name} = $name;
push @val, $v;
my $v = _new name => $name;
_compile($t, $custom, $rec-1, $top, $v->{validations});
push @$validations, $v if $v->{validations}->@*;
}
push @val, $func if $func;
my @keys = keys $top{keys}->%* if $top{keys};
for my ($n,$t) (qw/keys hash unknown hash values array sort array unique array/) {
next if !exists $top{$n};
confess "Incompatible types, the schema specifies '$top{type}' but the '$n' validation implies '$t'" if $top{type} && $top{type} ne $t;
$top{type} = $t;
}
# Inherit some builtin options from validations
for my $t (grep ref $_ eq 'HASH', @val) {
if ($top{type} && $t->{type} && $top{type} ne $t->{type}) {
confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{type}'" if $schema->{type};
confess "Incompatible types, '$t->[0]' requires '$t->{type}', but another validation requires '$top{type}'";
}
exists $t->{$_} and !exists $top{$_} and $top{$_} = delete $t->{$_}
for qw/default onerror trim type scalar unknown missing sort unique/;
push @keys, delete($t->{known_keys})->@* if $t->{known_keys};
push @keys, keys $t->{keys}->%* if $t->{keys};
}
# Compile sub-schemas
$top{keys} = { map +($_, __PACKAGE__->compile($top{keys}{$_}, $validations)), keys $top{keys}->%* } if $top{keys};
$top{values} = __PACKAGE__->compile($top{values}, $validations) if $top{values};
$top{validations} = \@val;
$top{known_keys} = \@keys;
\%top;
}
sub compile($pkg, $schema, $validations={}) {
sub compile($pkg, $schema, $custom={}) {
return $schema if $schema isa __PACKAGE__;
my $c = _compile $schema, $validations, 64;
my $c = _new;
_compile $schema, $custom, 64, $c;
$c
}
$c->{known_keys} = { map +($_,1), $c->{known_keys}->@* } if $c->{known_keys};
delete $c->{default} if ref $c->{default} eq 'SCALAR' && ${$c->{default}} eq 'required';
sub _validate_keys {
my @err;
for my ($k, $s) ($_[0]{keys}->%*) {
if (!exists $_[1]{$k}) {
next if $s->{missing} && $s->{missing} eq 'ignore';
return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject';
$_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef;
next if exists $s->{default};
}
if (exists $c->{sort}) {
my $s = $c->{sort};
$c->{sort} =
ref $s eq 'CODE' ? $s
: $s eq 'str' ? sub($x,$y) { $x cmp $y }
: $s eq 'num' ? sub($x,$y) { $x <=> $y }
: confess "Unknown value for 'sort': $c->{sort}";
my $r = _validate($s, $_[1]{$k});
if ($r) {
$r->{key} = $k;
push @err, $r;
}
}
$c->{unique} = sub { $_[0] } if $c->{unique} && !ref $c->{unique} && !$c->{sort};
return { validation => 'keys', errors => [ sort { $a->{key} cmp $b->{key} } @err ] } if @err;
}
bless $c, $pkg;
sub _validate_values {
my @err;
for my $i (0..$#{$_[1]}) {
my $r = _validate($_[0]{values}, $_[1][$i]);
if ($r) {
$r->{index} = $i;
push @err, $r;
}
}
return { validation => 'values', errors => \@err } if @err;
}
sub _validate_rec {
my $c = $_[0];
# hash keys
if ($c->{keys}) {
my @err;
for my ($k, $s) ($c->{keys}->%*) {
if (!exists $_[1]{$k}) {
next if $s->{missing} && $s->{missing} eq 'ignore';
return { validation => 'missing', key => $k } if $s->{missing} && $s->{missing} eq 'reject';
$_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef;
next if exists $s->{default};
}
my $r = _validate($s, $_[1]{$k});
if ($r) {
$r->{key} = $k;
push @err, $r;
}
}
return { validation => 'keys', errors => \@err } if @err;
}
# array values
if ($c->{values}) {
my @err;
for my $i (0..$#{$_[1]}) {
my $r = _validate($c->{values}, $_[1][$i]);
if ($r) {
$r->{index} = $i;
push @err, $r;
}
}
return { validation => 'values', errors => \@err } if @err;
}
# validations
for my $v ($c->{validations}->@*) {
if (ref $v eq 'CODE') {
my $r = $v->($_[1]);
@ -243,10 +231,10 @@ sub _validate_array {
$_[1] = [sort { $c->{sort}->($a, $b) } $_[1]->@* ] if $c->{sort};
# Key-based uniqueness
if ($c->{unique} && ref $c->{unique} eq 'CODE') {
if ($c->{unique} && (!$c->{sort} || ref $c->{unique} eq 'CODE')) {
my %h;
for my $i (0..$#{$_[1]}) {
my $k = $c->{unique}->($_[1][$i]);
my $k = ref $c->{unique} eq 'CODE' ? $c->{unique}->($_[1][$i]) : $_[1][$i];
return { validation => 'unique', index_a => $h{$k}, value_a => $_[1][$h{$k}], index_b => $i, value_b => $_[1][$i], key => $k } if exists $h{$k};
$h{$k} = $i;
}
@ -288,10 +276,10 @@ sub _validate_input {
# validations can perform in-place modifications without affecting the
# input.
if (!$c->{unknown} || $c->{unknown} eq 'remove') {
$_[1] = { map +($_, $_[1]{$_}), grep $c->{known_keys}{$_}, keys $_[1]->%* };
$_[1] = { map +($_, $_[1]{$_}), grep $c->{keys}{$_}, keys $_[1]->%* };
} elsif ($c->{unknown} && $c->{unknown} eq 'reject') {
my @err = grep !$c->{known_keys}{$_}, keys $_[1]->%*;
return { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] } if @err;
my @err = grep !$c->{keys}{$_}, keys $_[1]->%*;
return { validation => 'unknown', keys => \@err, expected => [ sort keys $c->{keys}->%* ] } if @err;
$_[1] = { $_[1]->%* };
} else {
$_[1] = { $_[1]->%* };
@ -306,7 +294,10 @@ sub _validate_input {
# No need to do anything here.
}
&_validate_rec || ($type eq 'array' && &_validate_array);
($c->{keys} && &_validate_keys) ||
($c->{values} && &_validate_values) ||
&_validate_rec ||
($type eq 'array' && &_validate_array)
}
@ -866,13 +857,15 @@ error:
int => 1
});
The I<keys>, I<values> and C<func> built-in options are validated separately
for each custom validation. So if you have multiple custom validations that set
the I<values> option, then the array elements must validate all the listed
schemas. The same applies to I<keys>: If the same key is listed in multiple
custom validations, then the key must conform to all schemas. With respect to
the I<unknown> option, a key that is mentioned in any of the I<keys> options is
considered "known".
The I<func> option is validated separately for each custom validation.
Multiple I<keys> and I<values> validations are merged into a single validation.
So if you have multiple custom validations that set the I<values> option, a
single combined schema is created that validates all array elements. The same
applies to I<keys>: if the same key is listed in multiple custom validations,
then the key must conform to all schemas. With respect to the I<unknown>
option, a key that is mentioned in any of the I<keys> options is considered
"known".
All other built-in options follow inheritance semantics: These options can be
set in a custom validation, and they are inherited by the top-level schema. If