Added and indexed early Debian versions

This commit is contained in:
Yorhel 2012-07-04 12:34:08 +02:00
parent 311b4ee327
commit 531882296f
5 changed files with 70 additions and 20 deletions

View file

@ -70,7 +70,10 @@ INSERT INTO systems (id, name, release, short, relorder) VALUES
(14, 'Ubuntu', '10.10', 'ubuntu-maverick', 12), (14, 'Ubuntu', '10.10', 'ubuntu-maverick', 12),
(15, 'Ubuntu', '11.04', 'ubuntu-natty', 13), (15, 'Ubuntu', '11.04', 'ubuntu-natty', 13),
(16, 'Ubuntu', '11.10', 'ubuntu-oneiric', 14), (16, 'Ubuntu', '11.10', 'ubuntu-oneiric', 14),
(17, 'Ubuntu', '12.04', 'ubuntu-precise', 15); (17, 'Ubuntu', '12.04', 'ubuntu-precise', 15),
(18, 'Debian', '1.1', 'debian-buzz', 0),
(19, 'Debian', '1.2', 'debian-rex', 1),
(20, 'Debian', '1.3', 'debian-bo', 2);
-- Removes any path components and compression extensions from the filename. -- Removes any path components and compression extensions from the filename.
@ -110,3 +113,4 @@ $$ LANGUAGE SQL;
--DELETE FROM contents c WHERE NOT EXISTS(SELECT 1 FROM man m WHERE m.hash = c.hash); --DELETE FROM contents c WHERE NOT EXISTS(SELECT 1 FROM man m WHERE m.hash = c.hash);
--COMMIT; --COMMIT;
--DELETE FROM package WHERE system = 18 AND NOT EXISTS(SELECT 1 FROM man WHERE id = package);

View file

@ -9,7 +9,7 @@ REPOS="core extra community"
DEBUG=false DEBUG=false
SYSID=1 SYSID=1
CURL="curl -Ss" CURL="curl -fSs"
PSQL="psql -U manned -Awtq" PSQL="psql -U manned -Awtq"
TMP=`mktemp -d manned.arch.XXXXXX` TMP=`mktemp -d manned.arch.XXXXXX`

View file

@ -2,7 +2,7 @@
# A fetcher for debian-style repositories. # A fetcher for debian-style repositories.
CURL="curl -Ss" CURL="curl -fSs"
PSQL="psql -U manned -Awtq" PSQL="psql -U manned -Awtq"
TMP=`mktemp -d manned.deb.XXXXXX` TMP=`mktemp -d manned.deb.XXXXXX`
@ -16,13 +16,22 @@ checkpkg() {
FILE=$6 FILE=$6
echo "===> $NAME-$VERSION" echo "===> $NAME-$VERSION"
FN="$TMP/$NAME-$VERSION.deb" FN="$TMP/$NAME-$VERSION.deb"
$CURL "$REPO$FILE" -o "$FN" || return $CURL "$REPO$FILE" -o "$FN" || return 1
# For 0.939000 formats:
# control.tar.gz = tail -n+3 $FILE | head -c"`head -n2 $FILE | tail -n1`"
# data.tar.gz = tail -n+3 $FILE | tail -c+"`head -n2 $FILE | tail -n1`" | tail -c+2
# Get the date from the last modification time of the debian-binary file # Get the date from the last modification time of the debian-binary file
# inside the .deb. Preferably, the date we store in the database indicates # inside the .deb. Preferably, the date we store in the database indicates
# when the *source* package has been uploaded, but this will work fine as # when the *source* package has been uploaded, but this will work fine as
# an approximation, I guess. # an approximation, I guess.
DATE=`date -d "\`ar tv \"$FN\" debian-binary | perl -lne 's/^[^ ]+ [^ ]+ +\d+ (.+) debian-binary$/print $1/e'\`" "+%F"` if [ "`head -c8 \"$FN\"`" = "0.939000" ]; then
DATE=`tail -n+3 "$FN" | head -c"\`head -n2 \"$FN\" | tail -n1\`" | tar -tvzf - | grep control | perl -lne 's/.+ ([^ ]+ [^ ]+) [^ ]*control$/print $1/e'`
else
DATE=`ar tv "$FN" debian-binary | perl -lne 's/^[^ ]+ [^ ]+ +\d+ (.+) debian-binary$/print $1/e'`
fi
DATE=`date -d "$DATE" +%F`
# Insert package in the database # Insert package in the database
PKGID=`echo "INSERT INTO package (system, category, name, version, released) VALUES(:'sysid',:'cat',:'name',:'ver',:'rel') RETURNING id"\ PKGID=`echo "INSERT INTO package (system, category, name, version, released) VALUES(:'sysid',:'cat',:'name',:'ver',:'rel') RETURNING id"\
@ -30,16 +39,23 @@ checkpkg() {
# Extract and handle the man pages # Extract and handle the man pages
if [ "$?" -eq 0 -a -n "$PKGID" ]; then if [ "$?" -eq 0 -a -n "$PKGID" ]; then
DATAFN=`ar t $FN | grep -F data.tar` # Old format
case "$DATAFN" in if [ "`head -c8 \"$FN\"`" = "0.939000" ]; then
"data.tar.gz") DATAZ="-z" ;; tail -n+3 "$FN" | tail -c+"`head -n2 \"$FN\" | tail -n1`" | tail -c+2 | ./add_tar.sh - $PKGID -z
"data.tar.bz2") DATAZ="-j" ;;
"data.tar.lzma") DATAZ="--lzma" ;;
"data.tar.xz") DATAZ="-J" ;;
*) echo "No data.tar found, or unknown compression format."; DATAZ="ERR" ;;
esac
[ "$DATAZ" != "ERR" ] && ar p "$FN" "$DATAFN" | ./add_tar.sh - $PKGID $DATAZ # New format
else
DATAFN=`ar t $FN | grep -F data.tar`
case "$DATAFN" in
"data.tar.gz") DATAZ="-z" ;;
"data.tar.bz2") DATAZ="-j" ;;
"data.tar.lzma") DATAZ="--lzma" ;;
"data.tar.xz") DATAZ="-J" ;;
*) echo "No data.tar found, or unknown compression format."; DATAZ="ERR" ;;
esac
[ "$DATAZ" != "ERR" ] && ar p "$FN" "$DATAFN" | ./add_tar.sh - $PKGID $DATAZ
fi
fi fi
rm "$FN" rm "$FN"
@ -66,8 +82,13 @@ syncrepo() {
for CMP in $COMPONENTS; do for CMP in $COMPONENTS; do
echo "MANDIFF-COMPONENT: $CMP" >>"$PFN" echo "MANDIFF-COMPONENT: $CMP" >>"$PFN"
TFN="$TMP/Packages-$CMP.bz2" TFN="$TMP/Packages-$CMP.bz2"
$CURL "${REPO}dists/$DISTRO/$CMP/binary-i386/Packages.bz2" -o "$TFN" || return 1 $CURL "${REPO}dists/$DISTRO/$CMP/binary-i386/Packages.bz2" -o "$TFN"
bzcat "$TFN" >>"$PFN" if [ -s "$TFM" ]; then
bzcat "$TFN" >>"$PFN"
else
$CURL "${REPO}dists/$DISTRO/$CMP/binary-i386/Packages.gz" -o "$TFN" || return 1
zcat "$TFN" >>"$PFN"
fi
rm "$TFN" rm "$TFN"
done done
@ -91,11 +112,12 @@ syncrepo() {
while(<F>) { while(<F>) {
chomp; chomp;
$p = $1 if /^Package: (.+)/; $p = $1 if /^Package: (.+)/;
$v = $1 if /^Version: (.+)/; $v = $1 if /^[Vv]ersion: (.+)/;
$s = $1 if /^Section: (.+)/; $s = $1 if /^[Ss]ection: (.+)/;
$f = $1 if /^Filename: (.+)/; $f = $1 if /^[Ff]ilename: (.+)/;
if(!$_) { if(!$_) {
if($p && $v && $s && $f) { if($p && $v && $s && $f) {
$f =~ s{^(Debian-1.[12])/}{dists/$1/main/};
print "$p $v $s $f" if $pkg{$p} && $pkg{$p} == 1 print "$p $v $s $f" if $pkg{$p} && $pkg{$p} == 1
&& !$db->selectrow_arrayref(q{SELECT 1 FROM package WHERE system = ? AND name = ? AND version = ?}, {}, $sysid, $p, $v); && !$db->selectrow_arrayref(q{SELECT 1 FROM package WHERE system = ? AND name = ? AND version = ?}, {}, $sysid, $p, $v);
#warn "Duplicate package? $p\n" if $pkg{$p} && $pkg{$p} == 2; #warn "Duplicate package? $p\n" if $pkg{$p} && $pkg{$p} == 2;
@ -250,6 +272,22 @@ ubuntu_active() {
} }
debian_buzz() {
# Contrib uses a rather non-standard arch directory ("binary" and "binary-all"), so let's stick with main for now.
syncrepo 18 "http://archive.debian.org/debian/" "buzz" "main" "dists/buzz/main/Contents.gz"
}
debian_rex() {
# (Same note on contrib)
syncrepo 19 "http://archive.debian.org/debian/" "rex" "main" "dists/rex/main/Contents.gz"
}
debian_bo() {
# Contrib and non-free don't have a Contents file :(
syncrepo 20 "http://archive.debian.org/debian/" "bo" "main" "dists/bo/main/Contents-i386.gz"
}
"$@" "$@"
rm -rf "$TMP" rm -rf "$TMP"

View file

@ -57,7 +57,7 @@ sub home {
p style => 'float: none'; p style => 'float: none';
# Relevant query: SELECT count(distinct hash), count(distinct name), count(*), count(distinct package) FROM man; # Relevant query: SELECT count(distinct hash), count(distinct name), count(*), count(distinct package) FROM man;
# It's far too slow to run that on every pageview. :-( # It's far too slow to run that on every pageview. :-(
lit 'Indexing <b>485,506</b> versions of <b>119,406</b> manual pages found in <b>1,578,498</b> files of <b>170,215</b> packages.'; lit 'Indexing <b>493,399</b> versions of <b>120,090</b> manual pages found in <b>1,598,828</b> files of <b>171,724</b> packages.';
br; br;
txt 'At this point only Arch Linux and Ubuntu have been indexed. More systems and repositories will be added later on.'; txt 'At this point only Arch Linux and Ubuntu have been indexed. More systems and repositories will be added later on.';
end; end;
@ -123,6 +123,13 @@ sub about {
restricted and multiverse) from the $release, $release-updates and restricted and multiverse) from the $release, $release-updates and
$release-security repositories are indexed. Backports are not included at $release-security repositories are indexed. Backports are not included at
the moment.</dd> the moment.</dd>
<dt>Debian</dt><dd>
Historical releases were fetched from <a
href="http://archive.debian.org/debian/">http://archive.debian.org/debian/</a>.
For buzz, rex and bo, only the 'main' component has been indexed, and
we're missing a few man pages because some packages were missing from the
repository archives.
</dd>
</dl><br /> </dl><br />
Only packages for a single architecture (i386 or i686) are scanned. To my Only packages for a single architecture (i386 or i686) are scanned. To my
knowledge, packages that come with different manuals for different knowledge, packages that come with different manuals for different

View file

@ -45,6 +45,7 @@ function setText(obj, txt) {
/* What follows is specific to manned.org */ /* What follows is specific to manned.org */
// TODO: Fix the 'pkg' link // TODO: Fix the 'pkg' link
// TODO: Keep same view when switching to different version of the same man page
// TODO: Allow showing/hiding old package versions individually. // TODO: Allow showing/hiding old package versions individually.
// TODO: Allow complete hiding of old systems. (And enable that by default) // TODO: Allow complete hiding of old systems. (And enable that by default)