From 10e8d4acae00eab22ff0258f42d6ab756eb28b20 Mon Sep 17 00:00:00 2001 From: Yorhel Date: Tue, 17 Jul 2012 18:45:41 +0200 Subject: [PATCH] Added search index table + simple XML-based search interface The XML thing is supposed to be used for auto-completion, of course. The current search implementation is very, very stupid and simple: Just a prefix match on the man name, and simple detection of section stuff. I suppose it'll suffice for now. --- schema.sql | 4 ++++ util/cron.sh | 4 ++++ util/update_indices.sql | 9 +++++++++ www/index.pl | 42 ++++++++++++++++++++--------------------- 4 files changed, 38 insertions(+), 21 deletions(-) create mode 100644 util/update_indices.sql diff --git a/schema.sql b/schema.sql index af3114e..5270a65 100644 --- a/schema.sql +++ b/schema.sql @@ -53,6 +53,10 @@ CREATE INDEX ON man USING hash (hash); CREATE INDEX ON man (name); +CREATE TABLE man_index AS SELECT DISTINCT name, section FROM man; +CREATE INDEX ON man_index USING btree(lower(name) text_pattern_ops); + + INSERT INTO systems (id, name, release, short, relorder) VALUES (1, 'Arch Linux', NULL, 'arch', 0), (2, 'Ubuntu', '4.10', 'ubuntu-warty', 0), diff --git a/util/cron.sh b/util/cron.sh index e69020a..17c8e39 100755 --- a/util/cron.sh +++ b/util/cron.sh @@ -1,6 +1,10 @@ #!/bin/sh +PSQL="psql -U manned -Awtq" + ./arch.sh ./deb.sh ubuntu_active ./deb.sh debian_active +echo "============ Updating SQL indices" +$PGSL -f update_indices.sql diff --git a/util/update_indices.sql b/util/update_indices.sql new file mode 100644 index 0000000..4d452b8 --- /dev/null +++ b/util/update_indices.sql @@ -0,0 +1,9 @@ +-- Create a new table before replacing in order to avoid a long-held lock on +-- the table being replaced. The site should remain responsive while these +-- queries are run. +BEGIN; +CREATE TABLE man_index_new AS SELECT DISTINCT name, section FROM man; +CREATE INDEX ON man_index_new USING btree(lower(name) text_pattern_ops); +DROP TABLE man_index; +ALTER TABLE man_index_new RENAME TO man_index; +COMMIT; diff --git a/www/index.pl b/www/index.pl index 505bd61..415b041 100755 --- a/www/index.pl +++ b/www/index.pl @@ -42,7 +42,7 @@ TUWF::register( qr{info/about} => \&about, qr{browse/([^/]+)} => \&browsesys, qr{browse/([^/]+)/([^/]+)(?:/([^/]+))?} => \&browsepkg, - qr{xml/search} => \&xmlsearch, + qr{xml/search\.xml} => \&xmlsearch, qr{([^/]+)/([0-9a-f]{8})} => \&man, qr{([^/]+)/([0-9a-f]{8})/src} => \&src, qr{([^/]+)} => \&man, @@ -470,33 +470,15 @@ sub src { } -# TODO: This is a prototype, really needs to be polished and optimized! sub xmlsearch { my $self = shift; my $q = $self->reqGet('q')||''; - - my $mansect = $1 if $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$//; - my $manname = $1 if $q =~ s/^([a-zA-Z0-9,.:_-]+)$//; - - # Manual pages - my $man = !$manname ? [] : $self->dbAll( - 'SELECT name, section - FROM man !W - GROUP BY name, section - ORDER BY name, section - LIMIT 10', - { # Don't use wildcards in this query, prevents index usage. - "name ILIKE '$manname%'" => 1, - $mansect ? ("section ILIKE '$mansect%'" => 1) : () - } - ); + my $man = $self->dbSearch($q, 20); $self->resHeader('Content-Type' => 'text/xml; charset=UTF-8'); xml; tag 'results'; - tag 'mans'; - tag 'man', %$_, undef for(@$man); - end; + tag 'man', %$_, undef for(@$man); end 'results'; } @@ -602,6 +584,24 @@ sub dbManInfo { } +# Very simple (and fast) prefix match. +sub dbSearch { + my($s, $q, $limit) = @_; + + my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : ''; + my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : ''; + + return !$name ? [] : $s->dbAll( + 'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?', + { # Don't use wildcards in this query, prevents index usage. + "lower(name) LIKE '\L$name\E%'" => 1, + $sect ? ("section ILIKE '\L$sect\E%'" => 1) : () + }, + $limit + ); +} + + sub dbSystemGet { return shift->dbAll('SELECT id, name, release, short, relorder FROM systems ORDER BY name, relorder'); }