Added search index table + simple XML-based search interface

The XML thing is supposed to be used for auto-completion, of course. The
current search implementation is very, very stupid and simple: Just a
prefix match on the man name, and simple detection of section stuff. I
suppose it'll suffice for now.
This commit is contained in:
Yorhel 2012-07-17 18:45:41 +02:00
parent 5d39a55158
commit 10e8d4acae
4 changed files with 38 additions and 21 deletions

View file

@ -53,6 +53,10 @@ CREATE INDEX ON man USING hash (hash);
CREATE INDEX ON man (name); CREATE INDEX ON man (name);
CREATE TABLE man_index AS SELECT DISTINCT name, section FROM man;
CREATE INDEX ON man_index USING btree(lower(name) text_pattern_ops);
INSERT INTO systems (id, name, release, short, relorder) VALUES INSERT INTO systems (id, name, release, short, relorder) VALUES
(1, 'Arch Linux', NULL, 'arch', 0), (1, 'Arch Linux', NULL, 'arch', 0),
(2, 'Ubuntu', '4.10', 'ubuntu-warty', 0), (2, 'Ubuntu', '4.10', 'ubuntu-warty', 0),

View file

@ -1,6 +1,10 @@
#!/bin/sh #!/bin/sh
PSQL="psql -U manned -Awtq"
./arch.sh ./arch.sh
./deb.sh ubuntu_active ./deb.sh ubuntu_active
./deb.sh debian_active ./deb.sh debian_active
echo "============ Updating SQL indices"
$PGSL -f update_indices.sql

9
util/update_indices.sql Normal file
View file

@ -0,0 +1,9 @@
-- Create a new table before replacing in order to avoid a long-held lock on
-- the table being replaced. The site should remain responsive while these
-- queries are run.
BEGIN;
CREATE TABLE man_index_new AS SELECT DISTINCT name, section FROM man;
CREATE INDEX ON man_index_new USING btree(lower(name) text_pattern_ops);
DROP TABLE man_index;
ALTER TABLE man_index_new RENAME TO man_index;
COMMIT;

View file

@ -42,7 +42,7 @@ TUWF::register(
qr{info/about} => \&about, qr{info/about} => \&about,
qr{browse/([^/]+)} => \&browsesys, qr{browse/([^/]+)} => \&browsesys,
qr{browse/([^/]+)/([^/]+)(?:/([^/]+))?} => \&browsepkg, qr{browse/([^/]+)/([^/]+)(?:/([^/]+))?} => \&browsepkg,
qr{xml/search} => \&xmlsearch, qr{xml/search\.xml} => \&xmlsearch,
qr{([^/]+)/([0-9a-f]{8})} => \&man, qr{([^/]+)/([0-9a-f]{8})} => \&man,
qr{([^/]+)/([0-9a-f]{8})/src} => \&src, qr{([^/]+)/([0-9a-f]{8})/src} => \&src,
qr{([^/]+)} => \&man, qr{([^/]+)} => \&man,
@ -470,33 +470,15 @@ sub src {
} }
# TODO: This is a prototype, really needs to be polished and optimized!
sub xmlsearch { sub xmlsearch {
my $self = shift; my $self = shift;
my $q = $self->reqGet('q')||''; my $q = $self->reqGet('q')||'';
my $man = $self->dbSearch($q, 20);
my $mansect = $1 if $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$//;
my $manname = $1 if $q =~ s/^([a-zA-Z0-9,.:_-]+)$//;
# Manual pages
my $man = !$manname ? [] : $self->dbAll(
'SELECT name, section
FROM man !W
GROUP BY name, section
ORDER BY name, section
LIMIT 10',
{ # Don't use wildcards in this query, prevents index usage.
"name ILIKE '$manname%'" => 1,
$mansect ? ("section ILIKE '$mansect%'" => 1) : ()
}
);
$self->resHeader('Content-Type' => 'text/xml; charset=UTF-8'); $self->resHeader('Content-Type' => 'text/xml; charset=UTF-8');
xml; xml;
tag 'results'; tag 'results';
tag 'mans'; tag 'man', %$_, undef for(@$man);
tag 'man', %$_, undef for(@$man);
end;
end 'results'; end 'results';
} }
@ -602,6 +584,24 @@ sub dbManInfo {
} }
# Very simple (and fast) prefix match.
sub dbSearch {
my($s, $q, $limit) = @_;
my $sect = $q =~ s/^([0-9])\s+// || $q =~ s/\(([a-zA-Z0-9]+)\)$// ? $1 : '';
my $name = $q =~ s/^([a-zA-Z0-9,.:_-]+)// ? $1 : '';
return !$name ? [] : $s->dbAll(
'SELECT name, section FROM man_index !W ORDER BY name, section LIMIT ?',
{ # Don't use wildcards in this query, prevents index usage.
"lower(name) LIKE '\L$name\E%'" => 1,
$sect ? ("section ILIKE '\L$sect\E%'" => 1) : ()
},
$limit
);
}
sub dbSystemGet { sub dbSystemGet {
return shift->dbAll('SELECT id, name, release, short, relorder FROM systems ORDER BY name, relorder'); return shift->dbAll('SELECT id, name, release, short, relorder FROM systems ORDER BY name, relorder');
} }