Added and indexed early Debian versions
This commit is contained in:
parent
311b4ee327
commit
531882296f
5 changed files with 70 additions and 20 deletions
|
|
@ -70,7 +70,10 @@ INSERT INTO systems (id, name, release, short, relorder) VALUES
|
||||||
(14, 'Ubuntu', '10.10', 'ubuntu-maverick', 12),
|
(14, 'Ubuntu', '10.10', 'ubuntu-maverick', 12),
|
||||||
(15, 'Ubuntu', '11.04', 'ubuntu-natty', 13),
|
(15, 'Ubuntu', '11.04', 'ubuntu-natty', 13),
|
||||||
(16, 'Ubuntu', '11.10', 'ubuntu-oneiric', 14),
|
(16, 'Ubuntu', '11.10', 'ubuntu-oneiric', 14),
|
||||||
(17, 'Ubuntu', '12.04', 'ubuntu-precise', 15);
|
(17, 'Ubuntu', '12.04', 'ubuntu-precise', 15),
|
||||||
|
(18, 'Debian', '1.1', 'debian-buzz', 0),
|
||||||
|
(19, 'Debian', '1.2', 'debian-rex', 1),
|
||||||
|
(20, 'Debian', '1.3', 'debian-bo', 2);
|
||||||
|
|
||||||
|
|
||||||
-- Removes any path components and compression extensions from the filename.
|
-- Removes any path components and compression extensions from the filename.
|
||||||
|
|
@ -110,3 +113,4 @@ $$ LANGUAGE SQL;
|
||||||
--DELETE FROM contents c WHERE NOT EXISTS(SELECT 1 FROM man m WHERE m.hash = c.hash);
|
--DELETE FROM contents c WHERE NOT EXISTS(SELECT 1 FROM man m WHERE m.hash = c.hash);
|
||||||
--COMMIT;
|
--COMMIT;
|
||||||
|
|
||||||
|
--DELETE FROM package WHERE system = 18 AND NOT EXISTS(SELECT 1 FROM man WHERE id = package);
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ REPOS="core extra community"
|
||||||
DEBUG=false
|
DEBUG=false
|
||||||
SYSID=1
|
SYSID=1
|
||||||
|
|
||||||
CURL="curl -Ss"
|
CURL="curl -fSs"
|
||||||
PSQL="psql -U manned -Awtq"
|
PSQL="psql -U manned -Awtq"
|
||||||
TMP=`mktemp -d manned.arch.XXXXXX`
|
TMP=`mktemp -d manned.arch.XXXXXX`
|
||||||
|
|
||||||
|
|
|
||||||
72
util/deb.sh
72
util/deb.sh
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
# A fetcher for debian-style repositories.
|
# A fetcher for debian-style repositories.
|
||||||
|
|
||||||
CURL="curl -Ss"
|
CURL="curl -fSs"
|
||||||
PSQL="psql -U manned -Awtq"
|
PSQL="psql -U manned -Awtq"
|
||||||
TMP=`mktemp -d manned.deb.XXXXXX`
|
TMP=`mktemp -d manned.deb.XXXXXX`
|
||||||
|
|
||||||
|
|
@ -16,13 +16,22 @@ checkpkg() {
|
||||||
FILE=$6
|
FILE=$6
|
||||||
echo "===> $NAME-$VERSION"
|
echo "===> $NAME-$VERSION"
|
||||||
FN="$TMP/$NAME-$VERSION.deb"
|
FN="$TMP/$NAME-$VERSION.deb"
|
||||||
$CURL "$REPO$FILE" -o "$FN" || return
|
$CURL "$REPO$FILE" -o "$FN" || return 1
|
||||||
|
|
||||||
|
# For 0.939000 formats:
|
||||||
|
# control.tar.gz = tail -n+3 $FILE | head -c"`head -n2 $FILE | tail -n1`"
|
||||||
|
# data.tar.gz = tail -n+3 $FILE | tail -c+"`head -n2 $FILE | tail -n1`" | tail -c+2
|
||||||
|
|
||||||
# Get the date from the last modification time of the debian-binary file
|
# Get the date from the last modification time of the debian-binary file
|
||||||
# inside the .deb. Preferably, the date we store in the database indicates
|
# inside the .deb. Preferably, the date we store in the database indicates
|
||||||
# when the *source* package has been uploaded, but this will work fine as
|
# when the *source* package has been uploaded, but this will work fine as
|
||||||
# an approximation, I guess.
|
# an approximation, I guess.
|
||||||
DATE=`date -d "\`ar tv \"$FN\" debian-binary | perl -lne 's/^[^ ]+ [^ ]+ +\d+ (.+) debian-binary$/print $1/e'\`" "+%F"`
|
if [ "`head -c8 \"$FN\"`" = "0.939000" ]; then
|
||||||
|
DATE=`tail -n+3 "$FN" | head -c"\`head -n2 \"$FN\" | tail -n1\`" | tar -tvzf - | grep control | perl -lne 's/.+ ([^ ]+ [^ ]+) [^ ]*control$/print $1/e'`
|
||||||
|
else
|
||||||
|
DATE=`ar tv "$FN" debian-binary | perl -lne 's/^[^ ]+ [^ ]+ +\d+ (.+) debian-binary$/print $1/e'`
|
||||||
|
fi
|
||||||
|
DATE=`date -d "$DATE" +%F`
|
||||||
|
|
||||||
# Insert package in the database
|
# Insert package in the database
|
||||||
PKGID=`echo "INSERT INTO package (system, category, name, version, released) VALUES(:'sysid',:'cat',:'name',:'ver',:'rel') RETURNING id"\
|
PKGID=`echo "INSERT INTO package (system, category, name, version, released) VALUES(:'sysid',:'cat',:'name',:'ver',:'rel') RETURNING id"\
|
||||||
|
|
@ -30,16 +39,23 @@ checkpkg() {
|
||||||
|
|
||||||
# Extract and handle the man pages
|
# Extract and handle the man pages
|
||||||
if [ "$?" -eq 0 -a -n "$PKGID" ]; then
|
if [ "$?" -eq 0 -a -n "$PKGID" ]; then
|
||||||
DATAFN=`ar t $FN | grep -F data.tar`
|
# Old format
|
||||||
case "$DATAFN" in
|
if [ "`head -c8 \"$FN\"`" = "0.939000" ]; then
|
||||||
"data.tar.gz") DATAZ="-z" ;;
|
tail -n+3 "$FN" | tail -c+"`head -n2 \"$FN\" | tail -n1`" | tail -c+2 | ./add_tar.sh - $PKGID -z
|
||||||
"data.tar.bz2") DATAZ="-j" ;;
|
|
||||||
"data.tar.lzma") DATAZ="--lzma" ;;
|
|
||||||
"data.tar.xz") DATAZ="-J" ;;
|
|
||||||
*) echo "No data.tar found, or unknown compression format."; DATAZ="ERR" ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
[ "$DATAZ" != "ERR" ] && ar p "$FN" "$DATAFN" | ./add_tar.sh - $PKGID $DATAZ
|
# New format
|
||||||
|
else
|
||||||
|
DATAFN=`ar t $FN | grep -F data.tar`
|
||||||
|
case "$DATAFN" in
|
||||||
|
"data.tar.gz") DATAZ="-z" ;;
|
||||||
|
"data.tar.bz2") DATAZ="-j" ;;
|
||||||
|
"data.tar.lzma") DATAZ="--lzma" ;;
|
||||||
|
"data.tar.xz") DATAZ="-J" ;;
|
||||||
|
*) echo "No data.tar found, or unknown compression format."; DATAZ="ERR" ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
[ "$DATAZ" != "ERR" ] && ar p "$FN" "$DATAFN" | ./add_tar.sh - $PKGID $DATAZ
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
rm "$FN"
|
rm "$FN"
|
||||||
|
|
@ -66,8 +82,13 @@ syncrepo() {
|
||||||
for CMP in $COMPONENTS; do
|
for CMP in $COMPONENTS; do
|
||||||
echo "MANDIFF-COMPONENT: $CMP" >>"$PFN"
|
echo "MANDIFF-COMPONENT: $CMP" >>"$PFN"
|
||||||
TFN="$TMP/Packages-$CMP.bz2"
|
TFN="$TMP/Packages-$CMP.bz2"
|
||||||
$CURL "${REPO}dists/$DISTRO/$CMP/binary-i386/Packages.bz2" -o "$TFN" || return 1
|
$CURL "${REPO}dists/$DISTRO/$CMP/binary-i386/Packages.bz2" -o "$TFN"
|
||||||
bzcat "$TFN" >>"$PFN"
|
if [ -s "$TFM" ]; then
|
||||||
|
bzcat "$TFN" >>"$PFN"
|
||||||
|
else
|
||||||
|
$CURL "${REPO}dists/$DISTRO/$CMP/binary-i386/Packages.gz" -o "$TFN" || return 1
|
||||||
|
zcat "$TFN" >>"$PFN"
|
||||||
|
fi
|
||||||
rm "$TFN"
|
rm "$TFN"
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
@ -91,11 +112,12 @@ syncrepo() {
|
||||||
while(<F>) {
|
while(<F>) {
|
||||||
chomp;
|
chomp;
|
||||||
$p = $1 if /^Package: (.+)/;
|
$p = $1 if /^Package: (.+)/;
|
||||||
$v = $1 if /^Version: (.+)/;
|
$v = $1 if /^[Vv]ersion: (.+)/;
|
||||||
$s = $1 if /^Section: (.+)/;
|
$s = $1 if /^[Ss]ection: (.+)/;
|
||||||
$f = $1 if /^Filename: (.+)/;
|
$f = $1 if /^[Ff]ilename: (.+)/;
|
||||||
if(!$_) {
|
if(!$_) {
|
||||||
if($p && $v && $s && $f) {
|
if($p && $v && $s && $f) {
|
||||||
|
$f =~ s{^(Debian-1.[12])/}{dists/$1/main/};
|
||||||
print "$p $v $s $f" if $pkg{$p} && $pkg{$p} == 1
|
print "$p $v $s $f" if $pkg{$p} && $pkg{$p} == 1
|
||||||
&& !$db->selectrow_arrayref(q{SELECT 1 FROM package WHERE system = ? AND name = ? AND version = ?}, {}, $sysid, $p, $v);
|
&& !$db->selectrow_arrayref(q{SELECT 1 FROM package WHERE system = ? AND name = ? AND version = ?}, {}, $sysid, $p, $v);
|
||||||
#warn "Duplicate package? $p\n" if $pkg{$p} && $pkg{$p} == 2;
|
#warn "Duplicate package? $p\n" if $pkg{$p} && $pkg{$p} == 2;
|
||||||
|
|
@ -250,6 +272,22 @@ ubuntu_active() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
debian_buzz() {
|
||||||
|
# Contrib uses a rather non-standard arch directory ("binary" and "binary-all"), so let's stick with main for now.
|
||||||
|
syncrepo 18 "http://archive.debian.org/debian/" "buzz" "main" "dists/buzz/main/Contents.gz"
|
||||||
|
}
|
||||||
|
|
||||||
|
debian_rex() {
|
||||||
|
# (Same note on contrib)
|
||||||
|
syncrepo 19 "http://archive.debian.org/debian/" "rex" "main" "dists/rex/main/Contents.gz"
|
||||||
|
}
|
||||||
|
|
||||||
|
debian_bo() {
|
||||||
|
# Contrib and non-free don't have a Contents file :(
|
||||||
|
syncrepo 20 "http://archive.debian.org/debian/" "bo" "main" "dists/bo/main/Contents-i386.gz"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
"$@"
|
"$@"
|
||||||
|
|
||||||
rm -rf "$TMP"
|
rm -rf "$TMP"
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ sub home {
|
||||||
p style => 'float: none';
|
p style => 'float: none';
|
||||||
# Relevant query: SELECT count(distinct hash), count(distinct name), count(*), count(distinct package) FROM man;
|
# Relevant query: SELECT count(distinct hash), count(distinct name), count(*), count(distinct package) FROM man;
|
||||||
# It's far too slow to run that on every pageview. :-(
|
# It's far too slow to run that on every pageview. :-(
|
||||||
lit 'Indexing <b>485,506</b> versions of <b>119,406</b> manual pages found in <b>1,578,498</b> files of <b>170,215</b> packages.';
|
lit 'Indexing <b>493,399</b> versions of <b>120,090</b> manual pages found in <b>1,598,828</b> files of <b>171,724</b> packages.';
|
||||||
br;
|
br;
|
||||||
txt 'At this point only Arch Linux and Ubuntu have been indexed. More systems and repositories will be added later on.';
|
txt 'At this point only Arch Linux and Ubuntu have been indexed. More systems and repositories will be added later on.';
|
||||||
end;
|
end;
|
||||||
|
|
@ -123,6 +123,13 @@ sub about {
|
||||||
restricted and multiverse) from the $release, $release-updates and
|
restricted and multiverse) from the $release, $release-updates and
|
||||||
$release-security repositories are indexed. Backports are not included at
|
$release-security repositories are indexed. Backports are not included at
|
||||||
the moment.</dd>
|
the moment.</dd>
|
||||||
|
<dt>Debian</dt><dd>
|
||||||
|
Historical releases were fetched from <a
|
||||||
|
href="http://archive.debian.org/debian/">http://archive.debian.org/debian/</a>.
|
||||||
|
For buzz, rex and bo, only the 'main' component has been indexed, and
|
||||||
|
we're missing a few man pages because some packages were missing from the
|
||||||
|
repository archives.
|
||||||
|
</dd>
|
||||||
</dl><br />
|
</dl><br />
|
||||||
Only packages for a single architecture (i386 or i686) are scanned. To my
|
Only packages for a single architecture (i386 or i686) are scanned. To my
|
||||||
knowledge, packages that come with different manuals for different
|
knowledge, packages that come with different manuals for different
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,7 @@ function setText(obj, txt) {
|
||||||
/* What follows is specific to manned.org */
|
/* What follows is specific to manned.org */
|
||||||
|
|
||||||
// TODO: Fix the 'pkg' link
|
// TODO: Fix the 'pkg' link
|
||||||
|
// TODO: Keep same view when switching to different version of the same man page
|
||||||
// TODO: Allow showing/hiding old package versions individually.
|
// TODO: Allow showing/hiding old package versions individually.
|
||||||
// TODO: Allow complete hiding of old systems. (And enable that by default)
|
// TODO: Allow complete hiding of old systems. (And enable that by default)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue