901 lines
32 KiB
Perl
901 lines
32 KiB
Perl
package FU::Validate 0.3;
|
|
|
|
use v5.36;
|
|
use experimental 'builtin', 'for_list';
|
|
use builtin qw/true false blessed trim/;
|
|
use Carp 'confess';
|
|
use FU::Util 'to_bool';
|
|
|
|
|
|
# Unavailable as custom validation names
|
|
my %builtin = map +($_,1), qw/
|
|
type
|
|
default
|
|
onerror
|
|
trim
|
|
values scalar sort unique
|
|
keys unknown missing
|
|
func
|
|
/;
|
|
|
|
my %type_vals = map +($_,1), qw/scalar hash array any/;
|
|
my %unknown_vals = map +($_,1), qw/remove reject pass/;
|
|
my %missing_vals = map +($_,1), qw/create reject ignore/;
|
|
|
|
sub _length($exp, $min, $max) {
|
|
+{ func => sub($v) {
|
|
my $got = ref $v eq 'HASH' ? keys %$v : ref $v eq 'ARRAY' ? @$v : length $v;
|
|
(!defined $min || $got >= $min) && (!defined $max || $got <= $max) ? 1 : { expected => $exp, got => $got };
|
|
}}
|
|
}
|
|
|
|
# Basically the same as ( regex => $arg ), but hides the regex error
|
|
sub _reg($reg) {
|
|
( type => 'scalar', func => sub { $_[0] =~ $reg ? 1 : { got => $_[0] } } );
|
|
}
|
|
|
|
|
|
our $re_num = qr/^-?(?:0|[1-9][0-9]*)(?:\.[0-9]+)?(?:[eE][+-]?[0-9]+)?$/;
|
|
our $re_int = qr/^-?(?:0|[1-9][0-9]*)$/;
|
|
our $re_uint = qr/^(?:0|[1-9][0-9]*)$/;
|
|
our $re_fqdn = qr/(?:[a-zA-Z0-9][\w-]*\.)+[a-zA-Z][a-zA-Z0-9-]{1,25}\.?/;
|
|
our $re_ip4_digit = qr/(?:0|[1-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])/;
|
|
our $re_ip4 = qr/($re_ip4_digit\.){3}$re_ip4_digit/;
|
|
# This monstrosity is based on http://stackoverflow.com/questions/53497/regular-expression-that-matches-valid-ipv6-addresses
|
|
# Doesn't allow IPv4-mapped-IPv6 addresses or other fancy stuff.
|
|
our $re_ip6 = qr/(?:[0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,7}:|(?:[0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|(?:[0-9a-fA-F]{1,4}:){1,5}(?::[0-9a-fA-F]{1,4}){1,2}|(?:[0-9a-fA-F]{1,4}:){1,4}(?::[0-9a-fA-F]{1,4}){1,3}|(?:[0-9a-fA-F]{1,4}:){1,3}(?::[0-9a-fA-F]{1,4}){1,4}|(?:[0-9a-fA-F]{1,4}:){1,2}(?::[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:(?:(?::[0-9a-fA-F]{1,4}){1,6})|:(?:(?::[0-9a-fA-F]{1,4}){1,7}|:)/;
|
|
our $re_ip = qr/(?:$re_ip4|$re_ip6)/;
|
|
our $re_domain = qr/(?:$re_fqdn|$re_ip4|\[$re_ip6\])/;
|
|
our $re_email = qr/^[-\+\.#\$=\w]+\@$re_fqdn$/;
|
|
our $re_weburl = qr/^https?:\/\/$re_domain(?::[1-9][0-9]{0,5})?(?:\/[^\s<>"]*)$/;
|
|
our $re_date = qr/^(?:19[0-9][0-9]|20[0-9][0-9])-(?:0[1-9]|1[0-2])-(?:0[1-9]|[12][0-9]|3[01])$/;
|
|
|
|
|
|
our %default_validations = (
|
|
regex => sub($reg) {
|
|
# Error objects should be plain data structures so that they can easily
|
|
# be converted to JSON for debugging. We have to stringify $reg in the
|
|
# error object to ensure that.
|
|
+{ type => 'scalar', func => sub { $_[0] =~ $reg ? 1 : { regex => "$reg", got => $_[0] } } }
|
|
},
|
|
enum => sub($vals) {
|
|
my @l = ref $vals eq 'HASH' ? sort keys %$vals : ref $vals eq 'ARRAY' ? @$vals : ($vals);
|
|
my %opts = map +($_,1), @l;
|
|
+{ type => 'scalar', func => sub { $opts{ (my $v = $_[0]) } ? 1 : { expected => \@l, got => $_[0] } } }
|
|
},
|
|
|
|
minlength => sub($v) { _length $v, $v, undef },
|
|
maxlength => sub($v) { _length $v, undef, $v },
|
|
length => sub($v) { _length $v, ref $v eq 'ARRAY' ? @$v : ($v, $v) },
|
|
|
|
bool => { type => 'any', func => sub { my $r = to_bool $_[0]; return {} if !defined $r; $_[0] = $r; 1 } },
|
|
anybool => { type => 'any', default => false, func => sub { $_[0] = $_[0] ? true : false; 1 } },
|
|
|
|
num => { _reg $re_num },
|
|
int => { _reg $re_int }, # implies num
|
|
uint => { _reg $re_uint }, # implies num
|
|
min => sub($min) { +{ num => 1, func => sub { $_[0] >= $min ? 1 : { expected => $min, got => $_[0] } } } },
|
|
max => sub($max) { +{ num => 1, func => sub { $_[0] <= $max ? 1 : { expected => $max, got => $_[0] } } } },
|
|
range => sub { [ min => $_[0][0], max => $_[0][1] ] },
|
|
|
|
ascii => { _reg qr/^[\x20-\x7E]*$/ },
|
|
sl => { _reg qr/^[^\t\r\n]+$/ },
|
|
ipv4 => { _reg $re_ip4 },
|
|
ipv6 => { _reg $re_ip6 },
|
|
ip => { _reg $re_ip },
|
|
email => { _reg($re_email), maxlength => 254 },
|
|
weburl => { _reg($re_weburl), maxlength => 65536 }, # the maxlength is a bit arbitrary, but better than unlimited
|
|
date => { _reg $re_date },
|
|
);
|
|
|
|
|
|
# Loads a hashref of validations and a schema definition, and converts it into
|
|
# an object with:
|
|
#
|
|
# name => $name_or_undef,
|
|
# validations => [ $recursive_compiled_object, .. ],
|
|
# known_keys => { $key => 1, .. } # Extracted from 'keys', Used for the 'unknown' validation
|
|
# %builtin_validations
|
|
#
|
|
sub _compile($schema, $validations, $rec) {
|
|
my(%top, @val);
|
|
|
|
for my($name, $val) (ref $schema eq 'ARRAY' ? @$schema : %$schema) {
|
|
if ($builtin{$name}) {
|
|
confess "Invalid value for 'type': $val" if $name eq 'type' && !$type_vals{$val};
|
|
confess "Invalid value for 'missing': $val" if $name eq 'missing' && !$missing_vals{$val};
|
|
confess "Invalid value for 'unknown': $val" if $name eq 'unknown' && !$unknown_vals{$val};
|
|
$top{$name} = $schema->{$name};
|
|
next;
|
|
}
|
|
|
|
my $t = $validations->{$name} || $default_validations{$name};
|
|
confess "Unknown validation: $name" if !$t;
|
|
confess "Recursion limit exceeded while resolving validation '$name'" if $rec < 1;
|
|
$t = ref $t eq 'HASH' ? $t : $t->($val);
|
|
|
|
my $v = _compile($t, $validations, $rec-1);
|
|
$v->{name} = $name;
|
|
push @val, $v;
|
|
}
|
|
|
|
my @keys = keys $top{keys}->%* if $top{keys};
|
|
|
|
for my ($n,$t) (qw/keys hash unknown hash values array sort array unique array/) {
|
|
next if !exists $top{$n};
|
|
confess "Incompatible types, the schema specifies '$top{type}' but the '$n' validation implies '$t'" if $top{type} && $top{type} ne $t;
|
|
$top{type} = $t;
|
|
}
|
|
|
|
# Inherit some builtin options from validations
|
|
for my $t (@val) {
|
|
if ($top{type} && $t->{type} && $top{type} ne $t->{type}) {
|
|
confess "Incompatible types, the schema specifies '$top{type}' but validation '$t->{name}' requires '$t->{type}'" if $schema->{type};
|
|
confess "Incompatible types, '$t->[0]' requires '$t->{type}', but another validation requires '$top{type}'";
|
|
}
|
|
exists $t->{$_} and !exists $top{$_} and $top{$_} = delete $t->{$_}
|
|
for qw/default onerror trim type scalar unknown missing sort unique/;
|
|
|
|
push @keys, delete($t->{known_keys})->@* if $t->{known_keys};
|
|
push @keys, keys $t->{keys}->%* if $t->{keys};
|
|
}
|
|
|
|
# Compile sub-schemas
|
|
$top{keys} = { map +($_, __PACKAGE__->compile($top{keys}{$_}, $validations)), keys $top{keys}->%* } if $top{keys};
|
|
$top{values} = __PACKAGE__->compile($top{values}, $validations) if $top{values};
|
|
|
|
$top{validations} = \@val;
|
|
$top{known_keys} = \@keys;
|
|
\%top;
|
|
}
|
|
|
|
|
|
sub compile($pkg, $schema, $validations={}) {
|
|
return $schema if $schema isa __PACKAGE__;
|
|
my $c = _compile $schema, $validations, 64;
|
|
|
|
$c->{type} //= 'scalar';
|
|
$c->{missing} //= 'create';
|
|
$c->{trim} //= 1 if $c->{type} eq 'scalar';
|
|
$c->{unknown} //= 'remove' if $c->{type} eq 'hash';
|
|
$c->{known_keys} = { map +($_,1), $c->{known_keys}->@* } if $c->{known_keys};
|
|
|
|
delete $c->{default} if ref $c->{default} eq 'SCALAR' && ${$c->{default}} eq 'required';
|
|
|
|
if (exists $c->{sort}) {
|
|
my $s = $c->{sort};
|
|
$c->{sort} =
|
|
ref $s eq 'CODE' ? $s
|
|
: $s eq 'str' ? sub($x,$y) { $x cmp $y }
|
|
: $s eq 'num' ? sub($x,$y) { $x <=> $y }
|
|
: confess "Unknown value for 'sort': $c->{sort}";
|
|
}
|
|
$c->{unique} = sub { $_[0] } if $c->{unique} && !ref $c->{unique} && !$c->{sort};
|
|
|
|
bless $c, $pkg;
|
|
}
|
|
|
|
|
|
sub _validate_rec {
|
|
my $c = $_[0];
|
|
|
|
# hash keys
|
|
if ($c->{keys}) {
|
|
my @err;
|
|
for my ($k, $s) ($c->{keys}->%*) {
|
|
if (!exists $_[1]{$k}) {
|
|
next if $s->{missing} eq 'ignore';
|
|
return { validation => 'missing', key => $k } if $s->{missing} eq 'reject';
|
|
$_[1]{$k} = ref $s->{default} eq 'CODE' ? $s->{default}->() : $s->{default} // undef;
|
|
next if exists $s->{default};
|
|
}
|
|
|
|
my $r = _validate($s, $_[1]{$k});
|
|
if ($r) {
|
|
$r->{key} = $k;
|
|
push @err, $r;
|
|
}
|
|
}
|
|
return { validation => 'keys', errors => \@err } if @err;
|
|
}
|
|
|
|
# array values
|
|
if ($c->{values}) {
|
|
my @err;
|
|
for my $i (0..$#{$_[1]}) {
|
|
my $r = _validate($c->{values}, $_[1][$i]);
|
|
if ($r) {
|
|
$r->{index} = $i;
|
|
push @err, $r;
|
|
}
|
|
}
|
|
return { validation => 'values', errors => \@err } if @err;
|
|
}
|
|
|
|
# validations
|
|
for ($c->{validations}->@*) {
|
|
my $r = _validate_rec($_, $_[1]);
|
|
return {
|
|
# If the error was a custom 'func' object, then make that the primary cause.
|
|
# This makes it possible for validations to provide their own error objects.
|
|
$r->{validation} eq 'func' && (!exists $r->{result} || keys $r->%* > 2) ? $r->%* : (error => $r),
|
|
validation => $_->{name},
|
|
} if $r;
|
|
}
|
|
|
|
# func
|
|
if ($c->{func}) {
|
|
my $r = $c->{func}->($_[1]);
|
|
return { %$r, validation => 'func' } if ref $r eq 'HASH';
|
|
return { validation => 'func', result => $r } if !$r;
|
|
}
|
|
}
|
|
|
|
|
|
sub _validate_array {
|
|
my $c = $_[0];
|
|
return if $c->{type} ne 'array';
|
|
|
|
$_[1] = [sort { $c->{sort}->($a, $b) } $_[1]->@* ] if $c->{sort};
|
|
|
|
# Key-based uniqueness
|
|
if ($c->{unique} && ref $c->{unique} eq 'CODE') {
|
|
my %h;
|
|
for my $i (0..$#{$_[1]}) {
|
|
my $k = $c->{unique}->($_[1][$i]);
|
|
return { validation => 'unique', index_a => $h{$k}, value_a => $_[1][$h{$k}], index_b => $i, value_b => $_[1][$i], key => $k } if exists $h{$k};
|
|
$h{$k} = $i;
|
|
}
|
|
|
|
# Comparison-based uniqueness
|
|
} elsif ($c->{unique}) {
|
|
for my $i (0..$#{$_[1]}-1) {
|
|
return { validation => 'unique', index_a => $i, value_a => $_[1][$i], index_b => $i+1, value_b => $_[1][$i+1] }
|
|
if $c->{sort}->($_[1][$i], $_[1][$i+1]) == 0
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
sub _validate_input {
|
|
my $c = $_[0];
|
|
|
|
# trim (needs to be done before the 'default' test)
|
|
$_[1] = trim $_[1] =~ s/\r//rg if defined $_[1] && !ref $_[1] && $c->{type} eq 'scalar' && $c->{trim};
|
|
|
|
# default
|
|
if (!defined $_[1] || (!ref $_[1] && $_[1] eq '')) {
|
|
if (exists $c->{default}) {
|
|
$_[1] = ref $c->{default} eq 'CODE' ? $c->{default}->($_[1]) : $c->{default};
|
|
return;
|
|
}
|
|
return { validation => 'required' };
|
|
}
|
|
|
|
if ($c->{type} eq 'scalar') {
|
|
return { validation => 'type', expected => 'scalar', got => lc ref $_[1] } if ref $_[1];
|
|
|
|
} elsif ($c->{type} eq 'hash') {
|
|
return { validation => 'type', expected => 'hash', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'HASH';
|
|
|
|
# Each branch below makes a shallow copy of the hash, so that further
|
|
# validations can perform in-place modifications without affecting the
|
|
# input.
|
|
if ($c->{unknown} eq 'remove') {
|
|
$_[1] = { map +($_, $_[1]{$_}), grep $c->{known_keys}{$_}, keys $_[1]->%* };
|
|
} elsif ($c->{unknown} eq 'reject') {
|
|
my @err = grep !$c->{known_keys}{$_}, keys $_[1]->%*;
|
|
return { validation => 'unknown', keys => \@err, expected => [ sort keys %{$c->{known_keys}} ] } if @err;
|
|
$_[1] = { $_[1]->%* };
|
|
} else {
|
|
$_[1] = { $_[1]->%* };
|
|
}
|
|
|
|
} elsif ($c->{type} eq 'array') {
|
|
$_[1] = [$_[1]] if $c->{scalar} && !ref $_[1];
|
|
return { validation => 'type', expected => $c->{scalar} ? 'array or scalar' : 'array', got => lc ref $_[1] || 'scalar' } if ref $_[1] ne 'ARRAY';
|
|
$_[1] = [$_[1]->@*]; # Create a shallow copy to prevent in-place modification.
|
|
|
|
} elsif ($c->{type} eq 'any') {
|
|
# No need to do anything here.
|
|
|
|
} else {
|
|
confess "Unknown type '$c->{type}'"; # Already checked in compile(), but be extra safe
|
|
}
|
|
|
|
&_validate_rec || &_validate_array;
|
|
}
|
|
|
|
|
|
sub _validate {
|
|
my $c = $_[0];
|
|
my $r = &_validate_input;
|
|
($r, $_[1]) = (undef, ref $c->{onerror} eq 'CODE' ? $c->{onerror}->($_[0], bless $r, 'FU::Validate::err') : $c->{onerror})
|
|
if $r && exists $c->{onerror};
|
|
$r
|
|
}
|
|
|
|
|
|
sub validate($c, $input) {
|
|
my $r = _validate($c, $input);
|
|
return $input if !$r;
|
|
$r = bless $r, 'FU::Validate::err';;
|
|
my @e = $r->errors;
|
|
$r->{longmess} = Carp::longmess(@e > 1 ? join("\n",@e)."\n" : $e[0]);
|
|
die $r;
|
|
}
|
|
|
|
|
|
|
|
|
|
package FU::Validate::err;
|
|
use v5.36;
|
|
use FU::Util;
|
|
|
|
use overload '""' => sub { $_[0]{longmess} || join "\n", $_[0]->errors };
|
|
|
|
sub _fmtkey($k) {
|
|
$k =~ /^[a-zA-Z0-9_-]+$/ ? $k : FU::Util::json_format($k);
|
|
}
|
|
|
|
sub _fmtval($v) {
|
|
eval { $v = FU::Util::json_format($v) }; "$v"
|
|
}
|
|
|
|
sub errors($e, $prefix='') {
|
|
my $val = $e->{validation};
|
|
my $p = $prefix ? "$prefix: " : '';
|
|
$val eq 'keys' ? map errors($_, $prefix.'.'._fmtkey($_->{key})), $e->{errors}->@* :
|
|
$val eq 'missing' ? $prefix.'.'._fmtkey($e->{key}).': required key missing' :
|
|
$val eq 'values' ? map errors($_, $prefix."[$_->{index}]"), $e->{errors}->@* :
|
|
$val eq 'unique' ? $prefix."[$e->{index_b}] value '"._fmtval($e->{value_a})."' duplicated" :
|
|
$val eq 'required' ? "${p}required value missing" :
|
|
$val eq 'type' ? "${p}invalid type, expected '$e->{expected}' but got '$e->{got}'" :
|
|
$val eq 'unknown' ? ($e->{keys}->@* > 1 ? "${p}unknown keys: ".join(', ', _fmtkey($e->{keys})) : "${p}unknown key '"._fmtkey($e->{keys}[0])."'") :
|
|
$e->{error} ? errors($e->{error}, "${p}validation '$val'") :
|
|
"${p}failed validation '$val'";
|
|
}
|
|
|
|
|
|
1;
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
FU::Validate - Data and form validation and normalization
|
|
|
|
=head1 EXPERIMENTAL
|
|
|
|
This module is still in development and there will likely be a few breaking API
|
|
changes, see the main L<FU> module for details.
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This module provides an easy and simple interface for data validation. It can
|
|
handle most types of data structures (scalars, hashes, arrays and nested data
|
|
structures), and has some conveniences for validating form-like data.
|
|
|
|
That this module will not solve B<all> your input validation problems. It can
|
|
validate the format and the structure of the data, but it does not support
|
|
validations that depend on other input values. For example, it is not possible
|
|
to specify that the contents of a I<password> field must be equivalent to that
|
|
of a I<confirm_password> field, but you can specify that both fields need to be
|
|
filled out. Recursive data structures are not supported. There is also no
|
|
built-in support for validating hashes with dynamic keys or arrays where not
|
|
all elements conform to the same schema. These could technically still be
|
|
validated with custom validations, but it won't be as convenient.
|
|
|
|
This module is designed to validate any kind of program input after it has been
|
|
parsed into a Perl data structure. It should not be used to validate function
|
|
parameters within Perl code. In fact, the correct answer to "how do I validate
|
|
function parameters?" is "don't, document your assumptions instead".
|
|
|
|
|
|
=head2 Validation API
|
|
|
|
To validate some input, you first need a schema. A schema can be compiled as
|
|
follows:
|
|
|
|
my $validator = FU::Validate->compile($schema, $validations);
|
|
|
|
C<$schema> is the schema that describes the data to be validated (see L</SCHEMA
|
|
DEFINITION> below) and C<$validations> is an optional hashref containing
|
|
L<custom validations|/Custom validations> that C<$schema> can refer to. An
|
|
error is thrown if the C<$validations> or C<$schema> are invalid.
|
|
|
|
To validate input, run:
|
|
|
|
my $validated_input = $validator->validate($input);
|
|
|
|
C<validate()> returns a validated and (depending on the schema) normalized copy
|
|
of C<$input>. Great care is taken that C<$input> is not being modified
|
|
in-place, even if data normalization is being performed.
|
|
|
|
An error is thrown if the input does not validate. The error object is a
|
|
C<FU::Validate::err>-blessed hashref containing at least one key:
|
|
I<validation>, which indicates the name of the validation that failed.
|
|
Additional keys with more detailed information may be present, depending on the
|
|
validation. These are documented in L</SCHEMA DEFINITION> below.
|
|
|
|
|
|
=head1 SCHEMA DEFINITION
|
|
|
|
A schema is an arrayref or hashref, where each key is the name of a built-in
|
|
option or of a validation to be performed and the values are the arguments to
|
|
those validations. None of the options or validations are required, but some
|
|
built-ins have default values. This means that the empty schema C<{}> is
|
|
actually equivalent to:
|
|
|
|
{ type => 'scalar',
|
|
trim => 1,
|
|
default => \'required',
|
|
missing => 'create',
|
|
}
|
|
|
|
Built-in options are always validated in a fixed order, but the order in which
|
|
standard and custom validations are performed is random when the schema is
|
|
given as a hashref. This is rarely a problem, but it can in some cases affect
|
|
the returned error message or whether a later validation will receive data
|
|
normalized by a previous validation. An arrayref can be used to enforce a
|
|
validation order:
|
|
|
|
[ enum => [1, 2, 'a'], int => 1 ]
|
|
|
|
Or to use the same validation multiple times:
|
|
|
|
[ regex => qr/^a/, regex => qr/z$/ ]
|
|
|
|
=head2 Built-in options
|
|
|
|
=over
|
|
|
|
=item type => $type
|
|
|
|
Specify the type of the input, this can be I<scalar>, I<array>, I<hash> or
|
|
I<any>. If no type is specified or implied by other validations, the default
|
|
type is I<scalar>.
|
|
|
|
Upon failure, the error object will look something like:
|
|
|
|
{ validation => 'type',
|
|
expected => 'hash',
|
|
got => 'scalar'
|
|
}
|
|
|
|
=item default => $val
|
|
|
|
If not set, or set to C<\'required'> (note: scalarref), then a value is required
|
|
for this field. Specifically, this means that a value must exist and must not
|
|
be C<undef> or an empty string, i.e. C<exists($x) && defined($x) && $x ne ''>.
|
|
|
|
If set to any other value, then the input is considered optional and the given
|
|
C<$val> is returned instead. If C<$val> is a CODE reference, the subroutine is
|
|
called with the original value (which is either no argument, undef or an empty
|
|
string) and the return value of the subroutine is used as value instead.
|
|
|
|
The empty check is performed after I<trim> and before any other validations. So
|
|
a string containing only whitespace is considered an empty string and will be
|
|
treated according to this I<default> option. As an additional side effect,
|
|
other validations will never get to validate undef or an empty string, as these
|
|
values are either rejected or substituted with a default.
|
|
|
|
=item onerror => $val
|
|
|
|
Instead of reporting an error, return C<$val> if this input fails validation
|
|
for whatever reason. Setting this option in the top-level schema ensures that
|
|
the validation will always succeed regardless of the input.
|
|
|
|
If C<$val> is a CODE reference, the subroutine is called with the (partially
|
|
normalized) input as first argument and error object as second argument. The
|
|
return value of the subroutine is then returned for this validation.
|
|
|
|
=item trim => 0/1
|
|
|
|
By default, any whitespace around scalar-type input is removed before testing
|
|
any other validations. Setting I<trim> to a false value will disable this
|
|
behavior.
|
|
|
|
=item keys => $hashref
|
|
|
|
Implies C<< type => 'hash' >>, this option specifies which keys are permitted,
|
|
and how to validate the values. Each key in C<$hashref> corresponds to a key
|
|
with the same name in the input. Each value is a schema definition by which the
|
|
value in the input will be validated. The schema definition may be a bare
|
|
hashref or a validator returned by C<compile()>. If a key is not present in
|
|
the input hash, it will be created in the output with the default value (or
|
|
undef), but see the I<missing> option for how to change that behavior.
|
|
|
|
For example, the following schema specifies that the input must be a hash with
|
|
three keys:
|
|
|
|
{ type => 'hash',
|
|
keys => {
|
|
username => { maxlength => 16 },
|
|
password => { minlength => 8 },
|
|
email => { default => '', email => 1 }
|
|
}
|
|
}
|
|
|
|
If validation on one or more keys fail, the error object that is returned looks
|
|
like:
|
|
|
|
{ validation => 'keys',
|
|
errors => [
|
|
# List of error objects, each with an additional 'key' field.
|
|
{ key => 'username', validation => 'required' }
|
|
# In this case, the username was required but either absent or empty.
|
|
]
|
|
}
|
|
|
|
=item unknown => $option
|
|
|
|
Implies C<< type => 'hash' >>, this option specifies what to do with keys in
|
|
the input data that have not been defined in the I<keys> option. Possible
|
|
values are I<remove> to remove unknown keys from the output data (this is the
|
|
default), I<reject> to return an error if there are unknown keys in the input,
|
|
or I<pass> to pass through any unknown keys to the output data. Note that the
|
|
values for passed-through keys are not validated against any schema!
|
|
|
|
In the case of I<reject>, the error object will look like:
|
|
|
|
{ validation => 'unknown',
|
|
# List of unknown keys present in the input
|
|
keys => ['unknown1', .. ],
|
|
# List of known keys (which may or may not be present
|
|
# in the input - that is checked at a later stage)
|
|
expected => ['known1', .. ]
|
|
}
|
|
|
|
=item missing => $option
|
|
|
|
For values inside a hash I<keys> schema, this option specifies what to do when
|
|
the key is not present in the input data. Possible values are I<create> to
|
|
insert the key with a default value (if the I<default> option is set, otherwise
|
|
undef), I<reject> to return an error if the option is missing or I<ignore> to
|
|
leave the key out of the returned data.
|
|
|
|
The default is I<create>, but if no I<default> option is set for this key then
|
|
that is effectively the same as I<reject>.
|
|
|
|
In the case of I<reject>, the error object will look like:
|
|
|
|
{ validation => 'missing',
|
|
key => 'field'
|
|
}
|
|
|
|
=item values => $schema
|
|
|
|
Implies C<< type => 'array' >>, this defines the schema that is applied to
|
|
every item in the array. The schema definition may be a bare hashref or a
|
|
validator returned by C<compile()>.
|
|
|
|
Failure is reported in a similar fashion to I<keys>:
|
|
|
|
{ validation => 'values',
|
|
errors => [
|
|
{ index => 1, validation => 'required' }
|
|
]
|
|
}
|
|
|
|
=item scalar => 0/1
|
|
|
|
Implies C<< type => 'array' >>, this option will also permit the input to be a
|
|
scalar. In this case, the input is interpreted and returned as an array with
|
|
only one element. This option exists to make it easy to validate multi-value
|
|
form inputs. For example, consider C<query_decode()> in L<FU::Util>: a
|
|
parameter in a query string is decoded into an array if it is listed multiple
|
|
times, a scalar if it only occcurs once. So we could either end up with:
|
|
|
|
{ a => 1, b => 1 }
|
|
# OR:
|
|
{ a => [1, 3], b => 1 }
|
|
|
|
With the I<scalar> option, we can accept both forms for C<a> and normalize into
|
|
an array. The following schema definition can validate the above examples:
|
|
|
|
{ type => 'hash',
|
|
keys => {
|
|
a => { type => 'array', scalar => 1 },
|
|
b => { }
|
|
}
|
|
}
|
|
|
|
=item sort => $option
|
|
|
|
Implies C<< type => 'array' >>, sort the array after validating its elements.
|
|
C<$option> determines how the array is sorted, possible values are I<str> for
|
|
string comparison, I<num> for numeric comparison, or a subroutine reference for
|
|
custom comparison function. The subroutine must be similar to the one given to
|
|
Perl's C<sort()> function, except it should compare C<$_[0]> and C<$_[1]>
|
|
instead of C<$a> and C<$b>.
|
|
|
|
=item unique => $option
|
|
|
|
Implies C<< type => 'array' >>, require elements to be unique. That is, don't
|
|
allow duplicate elements. There are several ways to specify what uniqueness
|
|
means in this context:
|
|
|
|
If C<$option> is a subroutine reference, then the subroutine is given an
|
|
element as first argument, and it should return a string that is used to check
|
|
for uniqueness. For example, if array elements are hashes, and you want to
|
|
check for uniqueness of a hash key named I<id>, you can specify this as
|
|
C<< unique => sub { $_[0]{id} } >>.
|
|
|
|
Otherwise, if C<$option> is true and the I<sort> option is set, then the
|
|
comparison function used for sorting is also used as uniqueness check. Two
|
|
elements are the same if the comparison function returns C<0>.
|
|
|
|
If C<$option> is true and I<sort> is not set, then the elements will be
|
|
interpreted as strings, similar to setting C<< unique => sub { $_[0] } >>.
|
|
|
|
All of that may sound complicated, but it's quite easy to use. Here's a few
|
|
examples:
|
|
|
|
# This describes an array of hashes with keys 'id' and 'name'.
|
|
{ values => {
|
|
type => 'hash',
|
|
keys => {
|
|
id => { uint => 1 },
|
|
name => {}
|
|
}
|
|
},
|
|
# Sort the array on 'id'
|
|
sort => sub { $_[0]{id} <=> $_[1]{id} },
|
|
# And require that 'id' fields are unique
|
|
unique => 1
|
|
}
|
|
|
|
# Contrived example: An array of strings, and we want
|
|
# each string to start with a different character.
|
|
{ values => { minlength => 1 },
|
|
unique => sub { substr $_[0], 0, 1 }
|
|
}
|
|
|
|
On failure, this validation returns the following error object. This output
|
|
assumes the first schema from the previous example.
|
|
|
|
{ validation => 'unique',
|
|
# Index and value of element a
|
|
index_a => 1,
|
|
value_a => { id => 3, name => 'whatever' }
|
|
# Index and value of duplicate element b
|
|
index_b => 4,
|
|
value_b => { id => 3, name => 'something else' },
|
|
# If string-based uniqueness was used, this is included as well:
|
|
# key => '..'
|
|
}
|
|
|
|
|
|
=item func => $sub
|
|
|
|
Run the input through a subroutine to perform additional validation or
|
|
normalization. The subroutine is only called after all other validations have
|
|
succeeded. The subroutine is called with the input as its only argument.
|
|
Normalization of the input can be done by assigning to the first argument or
|
|
modifying its value in-place.
|
|
|
|
On success, the subroutine should return a true value. On failure, it should
|
|
return either a false value or a hashref. The hashref will have the
|
|
I<validation> key set to I<func>, and this will be returned as error object.
|
|
|
|
When I<func> is used inside a custom validation, the returned error object will
|
|
have its I<validation> field set to the name of the custom validation. This
|
|
makes custom validations to behave as first-class validations in terms of error
|
|
reporting.
|
|
|
|
|
|
=back
|
|
|
|
=head2 Standard validations
|
|
|
|
Standard validations are provided by the module. It is possible to override,
|
|
re-implement and supplement these with custom validations. Internally, these
|
|
are, in fact, implemented as custom validations.
|
|
|
|
=over
|
|
|
|
=item regex => $re
|
|
|
|
Implies C<< type => 'scalar' >>. Validate the input against a regular
|
|
expression.
|
|
|
|
=item enum => $options
|
|
|
|
Implies C<< type => 'scalar' >>. Validate the input against a list of known
|
|
values. C<$options> can be either a scalar (in which case that is the only
|
|
permitted input), an array (listing all possible inputs) or a hash (where the
|
|
hash keys are considered to be the list of permitted inputs).
|
|
|
|
=item minlength => $num
|
|
|
|
Minimum length of the input. The I<length> is the string C<length()> if the
|
|
input is a scalar, the number of elements if the input is an array, or the
|
|
number of keys if the input is a hash.
|
|
|
|
=item maxlength => $num
|
|
|
|
Maximum length of the input.
|
|
|
|
=item length => $option
|
|
|
|
If C<$option> is a number, then this specifies the exact length of the input.
|
|
If C<$option> is an array, then this is a shorthand for
|
|
C<[$minlength,$maxlength]>.
|
|
|
|
=item anybool => 1
|
|
|
|
Accept any value of any type as input, and normalize it to either C<true> or
|
|
C<false> according to Perl's idea of truth.
|
|
|
|
=item bool => 1
|
|
|
|
Require the input to be a boolean type as per C<to_bool()> in L<FU::Util>.
|
|
|
|
=item num => 1
|
|
|
|
Implies C<< type => 'scalar' >>. Require the input to be a number formatted
|
|
using the format permitted by JSON. Note that this is slightly more restrictive
|
|
from Perl's number formatting, in that 'NaN', 'Inf' and thousand separators are
|
|
not permitted.
|
|
|
|
=item int => 1
|
|
|
|
Implies C<< type => 'scalar' >>. Require the input to be an (arbitrarily large)
|
|
integer.
|
|
|
|
=item uint => 1
|
|
|
|
Implies C<< type => 'scalar' >>. Require the input to be an (arbitrarily large)
|
|
positive integer.
|
|
|
|
=item min => $num
|
|
|
|
Implies C<< num => 1 >>. Require the input to be larger than or equal to
|
|
C<$num>.
|
|
|
|
=item max => $num
|
|
|
|
Implies C<< num => 1 >>. Require the input to be smaller than or equal to
|
|
C<$num>.
|
|
|
|
=item range => [$min,$max]
|
|
|
|
Equivalent to C<< min => $min, max => $max >>.
|
|
|
|
=item ascii => 1
|
|
|
|
Implies C<< type => 'scalar' >>. Require the input to wholly consist of
|
|
printable ASCII characters.
|
|
|
|
=item sl => 1
|
|
|
|
Implies C<< type => 'scalar' >>. Require the input to be a single line of text.
|
|
Useful for validating C<< <input type="text"> >> form elements, which really
|
|
should not result in multi-line input.
|
|
|
|
=item ipv4 => 1
|
|
|
|
Implies C<< type => 'scalar' >>. Require the input to be an IPv4 address.
|
|
|
|
=item ipv6 => 1
|
|
|
|
Implies C<< type => 'scalar' >>. Require the input to be an IPv6 address. Note
|
|
that the IP address is not normalized, and fancy features such as
|
|
IPv4-manned-IPv6 addresses are not permitted.
|
|
|
|
=item ip => 1
|
|
|
|
Require either C<< ipv4 => 1 >> or C<< ipv6 => 1 >>.
|
|
|
|
=item email => 1
|
|
|
|
Implies C<< type => 'scalar' >>. Validate the email address against a
|
|
monstrosity of a regular expression. This email validation is designed to catch
|
|
obviously invalid addresses and addresses that, while compliant with some RFCs,
|
|
will not be accepted by most actual SMTP implementations.
|
|
|
|
Email validation is quite a minefield, see L<Data::Validate::Email> for an
|
|
alternative solution.
|
|
|
|
=item weburl => 1
|
|
|
|
Implies C<< type => 'scalar' >>. Requires the input to be a C<http://> or
|
|
C<https://> url.
|
|
|
|
=item date => 1
|
|
|
|
Implies C<< type => 'scalar' >>. Requires the input to be a date string in the
|
|
form of C<YYYY-MM-DD>. Does not validate that the day number is valid for the
|
|
given the year and month.
|
|
|
|
=back
|
|
|
|
|
|
=head2 Custom validations
|
|
|
|
Custom validations can be passed to C<compile()> as the C<$validations> hashref
|
|
argument. A custom validation is, in simple terms, either a schema or a
|
|
subroutine that returns a schema. The custom validation can then be referenced
|
|
from other schemas.
|
|
|
|
Here's a simple example that defines and uses a custom validation named
|
|
I<stringbool>, which accepts either the string I<true> or I<false>.
|
|
|
|
my $validations = {
|
|
stringbool => { enum => ['true', 'false'] }
|
|
};
|
|
my $schema = { stringbool => 1 };
|
|
my $result = FU::Validate->compile($schema, $validations)->validate('true');
|
|
# $result eq 'true'
|
|
|
|
A custom validation can also be defined as a subroutine, in which case it can
|
|
accept options. Here is an example of a I<prefix> custom validation, which
|
|
requires that the string starts with the given prefix. The subroutine returns a
|
|
schema that contains the I<func> built-in option to do the actual validation.
|
|
|
|
my $validations = {
|
|
prefix => sub($prefix) {
|
|
return { func => sub { $_[0] =~ /^\Q$prefix/ } }
|
|
}
|
|
};
|
|
my $schema = { prefix => 'Hello, ' };
|
|
my $result = FU::Validate->compile($schema, $validations)->validate('Hello, World!');
|
|
|
|
=head3 Custom validations and built-in options
|
|
|
|
Custom validations can also set built-in options, but the semantics differ a
|
|
little depending on the option. First, be aware that many of the built-in
|
|
options apply to the whole schema and not just to the custom validation. For
|
|
example, if the top-level schema sets C<< trim => 0 >>, then all validations
|
|
used in that schema may get input with whitespace around it.
|
|
|
|
All validations used in a schema need to agree upon a single I<type> option.
|
|
If a custom validation does not specify a I<type> option (and no type is
|
|
implied by another validation such as I<keys> or I<values>), then the
|
|
validation should work with every type. It is an error to define a schema that
|
|
mixes validations of different types. For example, the following throws an
|
|
error:
|
|
|
|
FU::Validate->compile({
|
|
# top-level schema says we expect a hash
|
|
type => 'hash',
|
|
# but the 'int' validation implies that the type is a scalar
|
|
int => 1
|
|
});
|
|
|
|
The I<keys>, I<values> and C<func> built-in options are validated separately
|
|
for each custom validation. So if you have multiple custom validations that set
|
|
the I<values> option, then the array elements must validate all the listed
|
|
schemas. The same applies to I<keys>: If the same key is listed in multiple
|
|
custom validations, then the key must conform to all schemas. With respect to
|
|
the I<unknown> option, a key that is mentioned in any of the I<keys> options is
|
|
considered "known".
|
|
|
|
All other built-in options follow inheritance semantics: These options can be
|
|
set in a custom validation, and they are inherited by the top-level schema. If
|
|
the same option is set in multiple validations a random one will be inherited,
|
|
so that's not a good idea. The top-level schema can always override options set
|
|
by custom validations.
|
|
|
|
|
|
=head3 Global custom validations
|
|
|
|
Instead of passing a C<$validations> argument every time you call C<compile()>,
|
|
you can also add custom validations to the global list of built-in validations:
|
|
|
|
$FU::Validate::default_validations{stringbool} = { enum => ['true', 'false'] };
|
|
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<FU>.
|
|
|
|
This module is a fork of L<TUWF::Validate>.
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
MIT.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Yorhel <projects@yorhel.nl>
|