Normalize package info tables + change browsing URLs

This splits the 'package' table into 'packages' and 'package_versions',
which should improve performance in some cases and simplify some future
queries.

Previously it wasn't very well defined whether packages were uniquely
identified by (system, name) or by (system, category, name). This is now
normalized to the latter form. This required changes to the package URLs
to include the category.
This commit is contained in:
Yorhel 2016-10-02 20:07:57 +02:00
parent 26aefaebcd
commit 03d278e4ff
7 changed files with 180 additions and 134 deletions

View file

@ -11,13 +11,18 @@ trap "rm -rf $TMP" EXIT
# Usage: add_pkginfo sysid category name version date
# Returns 0 if the package is already in the database or if an error occured.
# Otherwise adds the package, sets PKGID to the new ID, and returns 1.
# Otherwise adds the package, sets PKGID to the new package_versions.id, and returns 1.
PKGID=
add_pkginfo() {
RES=`echo "SELECT id FROM package WHERE system = :'sysid' AND name = :'name' AND version = :'ver'"\
| $PSQL -v "sysid=$1" -v "name=$3" -v "ver=$4"`
RES=`echo "SELECT pv.id FROM packages p JOIN package_versions pv ON pv.package = p.id
WHERE p.system = :'sysid' AND p.category = :'cat' AND p.name = :'name' AND pv.version = :'ver'"\
| $PSQL -v "sysid=$1" -v "cat=$2" -v "name=$3" -v "ver=$4"`
[ "$?" -ne 0 -o -n "$RES" ] && return 0
RES=`echo "INSERT INTO package (system, category, name, version, released) VALUES(:'sysid',:'cat',:'name',:'ver',:'rel') RETURNING id"\
RES=`echo "
INSERT INTO packages (system, category, name) VALUES(:'sysid', :'cat', :'name') ON CONFLICT DO NOTHING;
INSERT INTO package_versions (version, released, package) VALUES(:'ver', :'rel',
(SELECT packages.id FROM packages WHERE system = :'sysid' AND category = :'cat' AND name = :'name'))
RETURNING id"\
| $PSQL -v "sysid=$1" -v "cat=$2" -v "name=$3" -v "ver=$4" -v "rel=$5"`
[ "$?" -ne 0 ] && return 0
PKGID=$RES

View file

@ -31,11 +31,10 @@ checkpkg() {
DATE=`date -d "$DATE" +%F`
# Insert package in the database
PKGID=`echo "INSERT INTO package (system, category, name, version, released) VALUES(:'sysid',:'cat',:'name',:'ver',:'rel') RETURNING id"\
| $PSQL -v "sysid=$SYSID" -v "cat=$SECTION" -v "name=$NAME" -v "ver=$VERSION" -v "rel=$DATE"`
add_pkginfo $SYSID $SECTION $NAME $VERSION $DATE
# Extract and handle the man pages
if [ "$?" -eq 0 -a -n "$PKGID" ]; then
if [ "$?" -eq 1 -a -n "$PKGID" ]; then
# Old format
if [ "`head -c8 \"$FN\"`" = "0.939000" ]; then
tail -n+3 "$FN" | tail -c+"`head -n2 \"$FN\" | tail -n1`" | tail -c+2 | add_tar - $PKGID -z
@ -116,11 +115,13 @@ syncrepo() {
if($p && $v && $s && $f) {
$f =~ s{^(Debian-1.[12])/}{dists/$1/main/};
print "$p $v $s $f" if $pkg{$p} && $pkg{$p} == 1
&& !$db->selectrow_arrayref(q{SELECT 1 FROM package WHERE system = ? AND name = ? AND version = ?}, {}, $sysid, $p, $v);
&& !$db->selectrow_arrayref(q{
SELECT 1 FROM packages p JOIN package_versions pv ON pv.package = p.id
WHERE p.system = ? AND p.category = ? AND p.name = ? AND pv.version = ?}, {}, $sysid, $s, $p, $v);
#warn "Duplicate package? $p\n" if $pkg{$p} && $pkg{$p} == 2;
$pkg{$p} = 2;
}
$p=$v=$f=undef
$p=$v=$f=$s=undef
}
}
close F;

View file

@ -52,8 +52,7 @@ check_pkg() { # <sysid> <base-url> <category> <filename> <name> <version>
return
fi
PKGID=`echo "INSERT INTO package (system, category, name, version, released) VALUES(:'sysid',:'cat',:'name',:'ver',:'rel') RETURNING id"\
| $PSQL -v "sysid=$SYSID" -v "cat=$CAT" -v "name=$NAME" -v "ver=$VER" -v "rel=$DATE"`
add_pkginfo $SYSID $CAT $NAME $VER $DATE
add_tar "$TMP/$FN" $PKGID
rm -f "$TMP/$FN"
}
@ -84,8 +83,8 @@ check_pkgdir() { # <sysid> <url>
echo "== Error fetching package index for /$CAT/"
continue
fi
perl -l - "$TMP/pkgnames" "$TMP/pkglist" $SYSID <<'EOP' >"$TMP/newpkgs"
($names, $list, $sysid) = @ARGV;
perl -l - "$TMP/pkgnames" "$TMP/pkglist" $SYSID $CAT <<'EOP' >"$TMP/newpkgs"
($names, $list, $sysid, $cat) = @ARGV;
use DBI;
$db = DBI->connect('dbi:Pg:dbname=manned', 'manned', '', {RaiseError => 1});
@ -103,7 +102,9 @@ check_pkgdir() { # <sysid> <url>
if(!$n || !$names{$n} || !$v) {
warn "== Unknown package: $c\n";
} else {
print "$c $n $v" if !$db->selectrow_arrayref(q{SELECT 1 FROM package WHERE system = ? AND name = ? AND version = ?}, {}, $sysid, $n, $v);
print "$c $n $v" if !$db->selectrow_arrayref(q{
SELECT 1 FROM packages p JOIN package_versions pv ON pv.package = p.id
WHERE p.system = ? AND p.category = ? AND p.name = ? AND pv.version = ?}, {}, $sysid, $cat, $n, $v);
}
}
close F;
@ -720,15 +721,7 @@ f9_1() {
}
f9_2() {
MIR="http://ftp.dk.freebsd.org/pub/FreeBSD/releases/i386/9.2-RELEASE/"
echo "============ $MIR"
check_dist 86 "$MIR/base.txz" "core-base" "2013-09-27"
check_dist 86 "$MIR/games.txz" "core-games" "2013-09-27"
check_pkgdir 86 "$MIR/packages"
}
f9_3() {
MIR="http://ftp.dk.freebsd.org/pub/FreeBSD/releases/i386/9.2-RELEASE/"
MIR="http://ftp-archive.freebsd.org/mirror/FreeBSD-Archive/old-releases/i386/9.2-RELEASE/"
echo "============ $MIR"
check_dist 86 "$MIR/base.txz" "core-base" "2013-09-27"
check_dist 86 "$MIR/games.txz" "core-games" "2013-09-27"