diff --git a/FU.pm b/FU.pm index 2fc0234..498d5cf 100644 --- a/FU.pm +++ b/FU.pm @@ -537,6 +537,7 @@ sub db { } sub sql { fu->db->q(@_) } +sub SQL { fu->db->Q(@_) } @@ -748,6 +749,8 @@ standalone and can be used independently of the framework: =item * L - PostgreSQL client. +=item * L - Small and safe query builder. + =item * L - Dynamic XML generation, easy and fast. =back diff --git a/FU/Pg.pm b/FU/Pg.pm index dc7b037..108d02b 100644 --- a/FU/Pg.pm +++ b/FU/Pg.pm @@ -6,8 +6,17 @@ _load_libpq(); package FU::Pg::conn { sub lib_version { FU::Pg::lib_version() } + + sub Q { + require FU::SQL; + my $s = shift; + my($sql, $params) = FU::SQL::SQL(@_)->compile(placeholder_style => 'pg', in_style => 'pg'); + $s->q($sql, @$params); + } }; +*FU::Pg::txn::Q = \*FU::Pg::conn::Q; + package FU::Pg::error { use overload '""' => sub($e, @) { $e->{full_message} }; } @@ -21,6 +30,8 @@ FU::Pg - The Ultimate (synchronous) Interface to PostgreSQL =head1 SYNOPSYS + use FU::Pg; + my $conn = FU::Pg->connect("dbname=test user=test password=nottest"); $conn->exec('CREATE TABLE books (id SERIAL, title text, read bool)'); @@ -147,6 +158,11 @@ Note that this method just creates a statement object, the given query is not prepared or executed until the appropriate statement methods (see below) are used. +=item $conn->Q(@args) + +Same as C<< $conn->q() >> but uses L to construct the SQL query and +bind parameters. + =back Statement objects returned by C<< $conn->q() >> support the following @@ -359,6 +375,8 @@ Transaction methods: =item $txn->q(..) +=item $txn->Q(..) + Run a query inside the transaction. These work the same as the respective methods on the parent C<$conn> object. diff --git a/FU/SQL.pm b/FU/SQL.pm new file mode 100644 index 0000000..8e61945 --- /dev/null +++ b/FU/SQL.pm @@ -0,0 +1,404 @@ +package FU::SQL 0.1; +use v5.36; +use Exporter 'import'; +use Carp 'confess'; +use experimental 'builtin'; + +our @EXPORT = qw/ + P RAW SQL + PARENS INTERSPERSE COMMA + AND OR WHERE + SET VALUES IN +/; + + +sub _obj { bless [@_], 'FU::SQL::val' } + +sub P :prototype($) ($p) { bless \(my $x = $p), 'FU::SQL::p' } +sub RAW :prototype($) ($s) { _obj "$s" } + +# These operate on $_ and must be called with &func syntax. +# The readonly check can be finicky. +sub _israw { builtin::created_as_string($_) && Internals::SvREADONLY($_) } +sub _tosql { &_israw ? "$_" : ref($_) =~ /^FU::SQL::/ ? $_ : P $_ } + +sub SQL { _obj map &_tosql, @_ } +sub PARENS { SQL '(', @_, ')' } +sub INTERSPERSE { my @a = map &_tosql, @_; _obj map $_ > 1 ? ($a[0],$a[$_]) : $a[$_], 1..$#a } +sub COMMA { INTERSPERSE ',', @_ } + +sub _conditions { + @_ == 1 && ref $_[0] eq 'HASH' + ? map PARENS(RAW $_, + !defined $_[0]{$_} ? ('IS NULL') : + ref($_[0]{$_}) eq 'FU::SQL::in' ? ($_[0]{$_}) + : ('=', $_[0]{$_}) + ), sort keys $_[0]->%* + : map PARENS($_), @_ +} + +sub AND { !@_ || (@_ == 1 && ref $_[0] eq 'HASH' && keys $_[0]->%* == 0) ? RAW '1=1' : INTERSPERSE 'AND', _conditions @_ } +sub OR { !@_ || (@_ == 1 && ref $_[0] eq 'HASH' && keys $_[0]->%* == 0) ? RAW '1=0' : INTERSPERSE 'OR', _conditions @_ } +sub WHERE { SQL 'WHERE', AND @_ } + +sub SET($h) { SQL 'SET', COMMA map SQL(RAW $_, '=', $h->{$_}), sort keys %$h } + +sub VALUES { + @_ == 1 && ref $_[0] eq 'HASH' + ? SQL '(', COMMA(map RAW $_, sort keys $_[0]->%*), ') VALUES (', COMMA(map $_[0]{$_}, sort keys $_[0]->%*), ')' + : @_ == 1 && ref $_[0] eq 'ARRAY' + ? SQL 'VALUES (', COMMA($_[0]->@*), ')' + : SQL 'VALUES (', COMMA(@_), ')'; +} + +sub IN($a) { + confess "Expected arrayref" if ref $a ne 'ARRAY'; + bless \$a, 'FU::SQL::in' +} + + + +sub FU::SQL::val::_compile($self, $opt, $sql, $params) { + for (@$self) { + $$sql .= ' ' if length $$sql && $$sql !~ /\s$/; + if (ref $_) { $_->_compile($opt, $sql, $params); } + else { $$sql .= $_; } + } +} + +sub FU::SQL::p::_compile($self, $opt, $sql, $params) { + push @$params, $$self; + $$sql .= $opt->{placeholder_style} eq 'pg' ? '$'.@$params : '?'; +} + +sub FU::SQL::in::_compile($self, $opt, $sql, $params) { + if ($opt->{in_style} eq 'pg') { + $$sql .= '= ANY('; + FU::SQL::p::_compile($self, $opt, $sql, $params); + $$sql .= ')'; + } else { + $$sql .= 'IN('; + for my($i,$v) (builtin::indexed @$$self) { + $$sql .= ',' if $i; + FU::SQL::p::_compile(\$v, $opt, $sql, $params); + } + $$sql .= ')'; + } +} + +sub FU::SQL::val::compile($self, %opt) { + $opt{placeholder_style} ||= 'dbi'; + $opt{in_style} ||= 'dbi'; + my($sql, @params) = (''); + $self->_compile(\%opt, \$sql, \@params); + ($sql, \@params) +} + +*FU::SQL::p::compile = *FU::SQL::in::compile = \*FU::SQL::val::compile; + +1; +__END__ + +=head1 NAME + +FU::SQL - Small and Safe SQL Query Builder + +=head1 SYNOPSIS + + use FU::SQL; + + my $data = { name => 'John', last_updated => RAW 'NOW()' }; + + my $upd = SQL 'UPDATE table', SET $data; + + my $ins = SQL 'INSERT INTO table', VALUES $data; + + my $sel = SQL 'SELECT id, name FROM table', WHERE { id => IN([1,2,3]) }; + + my($sql, @params) = $sel->compile; + +=head1 DESCRIPTION + +=head2 Compiling SQL + +All functions listed under L return an object that can be +passed to other construction functions or compiled into SQL and bind +parameters. These objects support one method call: + +=over + +=item ($sql, $params) = $obj->compile(%options) + +Compile an object into a SQL string and a (possibly empty) arrayref of bind +parameters. The following options are supported: + +=over + +=item placeholder_style => 'dbi' or 'pg' + +Set the style to use for placeholders in the SQL string. When set to C<'dbi'> +(default), placeholders are indicated with a single question mark. When set to +C<'pg'>, placeholders use PostgreSQL-style numbered variables instead. For +example: + + my $obj = SQL 'SELECT', 1, ',', 2; + my ($sql) = $obj->compile(placeholder_style => 'dbi'); + # $sql = 'SELECT ?, ?' + + ($sql) = $obj->compile(placeholder_style => 'pg'); + # $sql = 'SELECT $1, $2' + +All L drivers support the C<'dbi'> method just fine, but you need to use +C<'pg'> when your SQL is going to L or L. + +=item in_style => 'dbi' or 'pg' + +Set the style to use for C expressions, refer to the C function below +for details. + +=back + +=back + +=head2 Constructing SQL + +All of the functions below return an object with a C method. All +functions are exported by default. + +=over + +=item SQL(@args) + +Construct an SQL object by concatenating the given arguments. There are three +types of supported arguments: + +=over + +=item 1. + +B are interpreted as raw SQL fragments. + +=item 2. + +Objects returned by other functions listed below can be included as +SQL fragments. + +=item 3. + +B is considered a bind parameter. + +=back + +These rules allow for flexible SQL construction: + + SQL 'SELECT 1'; # Raw SQL statement + SQL 'WHERE id =', 1; # SQL with a bind parameter + + my $fifteen = SQL('5 + ', 10); + SQL 'WHERE number =', $fifteen; # Composing SQL objects + +There is some magic going on in order to differentiate between a I and other arguments. The rule is that anything that is +C and read-only (as per +C) is considered raw SQL. Variables are by definition +writable: + + my $x = 'SELECT 1'; + SQL $x; # BAD: $x is used as bind parameter instead + + # Better: + my $x = SQL 'SELECT 1'; + SQL $x; + +In most cases, though, this is the behavior you want. In the few cases where it +isn't, you can always use C or C to force an argument as bind +parameter or SQL string. + +=item P($val) + +Return an object where C<$val> is forced into a bind parameter, for example: + + SQL 'WHERE name =', 'John'; # BAD, 'John' is a string literal + + SQL 'WHERE name =', P 'John'; # Good, 'John' is now a parameter + +=item RAW($sql) + +Force the given C<$sql> string to be included as SQL. For example: + + # BAD: + my $tables = ['a', 'b', 'c']; + SQL 'SELECT * FROM', $tables[1]; + # 'SELECT * FROM ?', that's a syntax error. + + # Better: + SQL 'WHERE * FROM', RAW $tables[1]; + # 'SELECT * FROM b' + +Absolutely do not use this function with untrusted input. + +=item PARENS(@args) + +Like C but surrounds the expression by parens: + + SQL 'WHERE x AND', PARENS('y', 'OR', 'z'); + # 'WHERE x AND ( y OR z )' + +=item INTERSPERSE($value, @args) + +Concatenate C<@args> with C<$value> as separator. Same way as C works +for strings, but I had to come up with a different name because "join" tends to +have a completely different meaning in the SQL world. + + INTERSPERSE 'OR', 'true', 'false'; + # 'true OR false' + +=item COMMA(@args) + +Short-hand for C. + +=item AND(@conditions) + +Construct an SQL expression to test that all given conditions are true. Returns +C<'1=1'> (i.e. true) if C<@conditions> is an empty list. + + AND 'x IS NOT NULL', + SQL('id <>', $not_this_id); + # '( x IS NOT NULL ) AND ( id <> ? )' + + AND; + # '1=1' + +=item AND($hashref) + +A special form of C that tests the given column for equality instead. +The keys of the hashref are interpreted as raw SQL and the values as bind +parameters. + + AND { id => 1, number => RAW 'random()', x => undef } + # '( id = ? ) AND ( number = random() ) AND ( x IS NULL )' + +=item OR(@conditions) + +=item OR($hashref) + +Like C except OR. These return C<'1=0'> (i.e. false) on an empty list. + +=item WHERE(@conditions) + +=item WHERE($hashref) + +Like C but prefixed with C<'WHERE'>. + +=item SET($hashref) + +Construct a SET clause: + + SQL 'UPDATE table', SET { + name => 'John', + last_updated => RAW('NOW()'), + }; + # 'UPDATE table SET name = ? , last_updated = NOW()' + +=item VALUES(@args) + +Construct a VALUES clause, C<@args> is interpreted as in C: + + SQL 'INSERT INTO table (name, last_updated)', VALUES(P('John'), 'NOW()'); + # 'INSERT INTO table (name, last_updated) VALUES ( ? , NOW() )' + +=item VALUES($arrayref) + +Same as C but arguments are interpreted as bind parameters: + + SQL 'INSERT INTO table (name, last_updated)', VALUES(['John', RAW 'NOW()']); + # 'INSERT INTO table (name, last_updated) VALUES ( ? , NOW() )' + +=item VALUES($hashref) + +Like C but also constructs a list of column names from the +hash keys: + + SQL 'INSERT INTO table', VALUES { + name => 'John', + last_updated => RAW('NOW()'), + }; + # Same as above examples + +Note how this allows for re-using the same hashref with C, allowing for +convenient insert-or-update: + + my $data = { + name => 'John', + last_updated => RAW('NOW()'), + }; + SQL 'INSERT INTO table', VALUES($data), + 'ON CONFLICT (name) DO UPDATE', SET($data); + +(The bind parameters are duplicated though, there's no duplicate detection yet. +Not sure if that's even worth it) + +=item IN($arrayref) + +Construct an C clause for matching an SQL expression against multiple +values. This function results in different SQL depending on the C +option given to C. The default C<'dbi'> style passes each value as a +bind parameter: + + SQL 'WHERE id', IN([1, 2, 3, 4]); + # 'WHERE id IN(?, ?, ?, ?)', parameters: 1, 2, 3, 4 + +The C<'pg'> style passes the entire array as a single bind parameter instead: + + SQL 'WHERE id', IN([1, 2, 3, 4]); + # 'WHERE id = ANY(?)', parameter: [1, 2, 3, 4] + +The C<'pg'> style allows for more efficient re-use of cached prepared +statements, since the generated query does not depend on the number of values. +Unfortunately, the only Postgres module that supports arrays as bind parameters +that I am aware of is L. This approach does not, as of writing, work +with L or L. + +Can be used in the C<$hashref> versions of C, C and C as well: + + WHERE { id => IN([1, 2]) } + # 'WHERE id IN(?, ?)' + +=back + +=head1 SEE ALSO + +L and the many other related modules on CPAN. This module was +heavily inspired by SQL::Interp, but differs in a few key areas: + +=over + +=item * SQL::Interp expects bind parameters to be passed as a scalar reference +(e.g. C<\$x>), but this is easy to forget and the result of forgetting to do so +is an SQL injection vulnerability - the worst possible outcome. +C was introduced in an attempt to provide a safer +alternative, but that limits the flexibility of the query builder. This module +instead attempts to identify string literals through some trickery and +considers everything else a bind parameter, which is much less prone to +accidental SQL injection. + +=item * SQL::Interp parses your input query in an attempt to guess the context +for interpolation. While this has (to my surprise) always worked out well for +anything I've written, it does feel a tad too magical for my taste. This module +instead requires you to more explicitly state your intentions, while hopefully +remaining as concise and readable. + +=item * SQL::Interp assigns various semantics to hashrefs and arrayrefs, which +means those can't easily be used as bind parameters. Not at all a problem if +you're using DBI - which doesn't support that anyway, but it can cause trouble +with L. + +=back + +=head1 COPYRIGHT + +MIT. + +=head1 AUTHOR + +Yorhel diff --git a/t/pgtypes-dynamic.t b/t/pgtypes-dynamic.t index 69a03ec..9bf2d27 100644 --- a/t/pgtypes-dynamic.t +++ b/t/pgtypes-dynamic.t @@ -1,5 +1,6 @@ use v5.36; use Test::More; +use FU::SQL; plan skip_all => $@ if !eval { require FU::Pg; } && $@ =~ /Unable to load libpq/; die $@ if $@; @@ -8,11 +9,18 @@ plan skip_all => 'Please set FU_TEST_DB to a PostgreSQL connection string to run my $conn = FU::Pg->connect($ENV{FU_TEST_DB}); $conn->_debug_trace(0); +is_deeply $conn->Q('SELECT', 1, '::int')->param_types, [23]; +is_deeply $conn->Q('SELECT 1', IN([1,2,3]))->param_types, [1007]; +is $conn->Q('SELECT 1', IN([1,2,3]))->val, 1; + ok !eval { $conn->q('SELECT $1::aclitem', '')->exec; 1 }; like $@, qr/Unable to send or receive/; { my $txn = $conn->txn; + + is $txn->Q('SELECT 1', IN([1,2,3]))->val, 1; + $txn->exec(<<~_); CREATE TYPE fupg_test_enum AS ENUM('a', 'b', 'ccccccccccccccccccc'); CREATE DOMAIN fupg_test_domain AS fupg_test_enum CHECK(value IN('a','b')); diff --git a/t/sql.t b/t/sql.t new file mode 100644 index 0000000..db5172e --- /dev/null +++ b/t/sql.t @@ -0,0 +1,71 @@ +use v5.36; +use Test::More; +use FU::SQL; +use experimental 'builtin'; + +sub t($obj, $sql, $params, @opt) { + my($gotsql, $gotparams) = $obj->compile(@opt); + is $gotsql, $sql; + is_deeply $gotparams, $params; +} + +my $x; +t P '', '?', ['']; +t P '', '$1', [''], placeholder_style => 'pg'; +t P undef, '?', [undef]; +t RAW '', '', []; +t SQL('select', '1'), 'select 1', []; +t SQL('select', P '1'), 'select ?', [1]; +t SQL('select', $x = '1'), 'select ?', [1]; +t SQL('select', RAW($x = 1)), 'select 1', []; +t SQL(builtin::true, {}, [], \1), '? ? ? ?', [builtin::true, {}, [], \1]; +t SQL(builtin::true, {}, [], \1), '$1 $2 $3 $4', [builtin::true, {}, [], \1], placeholder_style => 'pg'; +t SQL(map SQL($_), qw/a b c/), 'a b c', []; +t SQL(map SQL($_,$_.'x',$_), qw/a b c/), 'a ? a b ? b c ? c', ['ax','bx','cx']; +t SQL(map P($_), 1,2,3), '? ? ?', [1,2,3]; +t SQL(map { $_ } 1,2,3), '? ? ?', [1,2,3]; + +$x = 'oops'; +my $y = 'y'; +t SQL("SELECT $x"), '?', ["SELECT $x"]; + +t PARENS('a', $x), '( a ? )', [$x]; + +t INTERSPERSE($x, 1, 'a'), '? ? a', [1, $x]; +t INTERSPERSE('-', 'a', $x, $y), 'a - ? - ?', [$x, $y]; + +t COMMA('a', 'b', $x), 'a , b , ?', [$x]; + +t WHERE($x, '1 = 2', SQL('x = ', $x)), + 'WHERE ( ? ) AND ( 1 = 2 ) AND ( x = ? )', [$x, $x]; +t WHERE({ col1 => RAW 'NOW()', col2 => 'a'}), + 'WHERE ( col1 = NOW() ) AND ( col2 = ? )', ['a']; +t WHERE(), 'WHERE 1=1', []; + +t WHERE(AND('true', $x), OR($y, 'y'), AND, OR), + 'WHERE ( ( true ) AND ( ? ) ) AND ( ( ? ) OR ( y ) ) AND ( 1=1 ) AND ( 1=0 )', [$x, $y]; + +t OR({}), '1=0', []; + +t SQL(SELECT => COMMA(qw/a b c/), FROM => 'table', WHERE { x => 1, a => undef }), + 'SELECT a , b , c FROM table WHERE ( a IS NULL ) AND ( x = ? )', [1]; + +t SET({ a => 1, c => RAW 'NOW()', d => undef }), + 'SET a = ? , c = NOW() , d = ?', [1, undef]; + +t VALUES({ a => 1, c => RAW 'NOW()', d => undef }), + '( a , c , d ) VALUES ( ? , NOW() , ? )', [1, undef]; + +t VALUES(1, $x, 'NOW()', RAW 'NOW()'), 'VALUES ( ? , ? , NOW() , NOW() )', [1, $x]; +t VALUES([1, $x, 'NOW()', RAW 'NOW()']), 'VALUES ( ? , ? , ? , NOW() )', [1, $x, 'NOW()']; + +t VALUES(P {}), 'VALUES ( ? )', [{}]; +t VALUES(P []), 'VALUES ( ? )', [[]]; + +t IN([1,2,'a',undef,$x]), 'IN(?,?,?,?,?)', [1,2,'a',undef,$x]; +t IN([1,2,'a',undef,$x]), '= ANY(?)', [[1,2,'a',undef,$x]], in_style => 'pg'; +t IN([]), '= ANY($1)', [[]], in_style => 'pg', placeholder_style => 'pg'; + +t WHERE({ id => IN([1,2]) }), 'WHERE ( id IN(?,?) )', [1,2]; + +done_testing;