Reorganize indexing scripts + use Rust for Debian

This commit is contained in:
Yorhel 2016-11-20 12:29:01 +01:00
parent 5d44d0e2ec
commit 2ee2f7495b
7 changed files with 142 additions and 148 deletions

13
util/arch.sh Executable file
View file

@ -0,0 +1,13 @@
#!/bin/sh
. ./common.sh
case "$1" in
active)
MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux
REPOS="core extra community"
for REPO in $REPOS; do
index arch --sys arch --mirror $MIRROR --repo $REPO
done
;;
esac

View file

@ -1,8 +1,50 @@
#!/bin/bash #!/bin/bash
CURL="curl -fSs -A manual-page-crawler,info@manned.org --limit-rate 500k" if test -f .config; then
source .config
fi
index() {
echo "====> indexer -vv $@"
./indexer -vv --dryrun $@ 2>&1
echo
}
# Convenient wrapper around index() for debian repos
# TODO: Use x86_64 for new releases
# Usage: index_dev sys mirror distro list-of-components [contents]
# contents:
# empty for global Contents-i386.gz location
# "cmp" for per-component Contents.i386.gz location
# Otherwise, full path to Contents file
index_deb() {
local SYS=$1
local MIRROR=$2
local DISTRO=$3
local COMPONENTS=$4
local CONTENTS=${5:-"dists/$DISTRO/Contents-i386.gz"}
for CMP in $COMPONENTS; do
local CONT=$CONTENTS
test $CONT = cmp && CONT="dists/$DISTRO/$CMP/Contents-i386.gz"
index deb --sys "$SYS" --mirror "$MIRROR" --contents "$MIRROR$CONT" --packages "${MIRROR}dists/$DISTRO/$CMP/binary-i386/Packages.gz"
done
}
PSQL="psql -U manned -Awtq" PSQL="psql -U manned -Awtq"
## THE STUFF BELOW IS OLD
# To be replaced with calls to index()
CURL="curl -fSs -A manual-page-crawler,info@manned.org --limit-rate 500k"
TMP=`mktemp -d manned.XXXXXX` TMP=`mktemp -d manned.XXXXXX`
# bash-ism, remove the working directory when we're done. # bash-ism, remove the working directory when we're done.

View file

@ -2,9 +2,8 @@
. ./common.sh . ./common.sh
./index.sh daily ./arch.sh active
./deb.sh ubuntu_active ./debian.sh active
./deb.sh debian_active
echo "============ Updating SQL indices" echo "============ Updating SQL indices"
$PSQL -f update_indices.sql $PSQL -f update_indices.sql

78
util/debian.sh Executable file
View file

@ -0,0 +1,78 @@
#!/bin/sh
. ./common.sh
AMIRROR=http://archive.debian.org/debian/
CMIRROR=http://ftp.nl.debian.org/debian/
# XXX: buzz and rex have some deb-old formatted packages, the indexer doesn't support these.
case "$1" in
buzz)
index deb --sys debian-buzz --mirror $AMIRROR --contents ${AMIRROR}dists/buzz/main/Contents.gz --packages ${AMIRROR}dists/buzz/main/binary-i386/Packages.gz
index deb --sys debian-buzz --mirror $AMIRROR --contents ${AMIRROR}dists/buzz/contrib/Contents.gz --packages ${AMIRROR}dists/buzz/contrib/binary/Packages.gz
;;
rex)
index deb --sys debian-rex --mirror $AMIRROR --contents ${AMIRROR}dists/rex/main/Contents.gz --packages ${AMIRROR}dists/rex/main/binary-i386/Packages.gz
index deb --sys debian-rex --mirror $AMIRROR --contents ${AMIRROR}dists/rex/contrib/Contents.gz --packages ${AMIRROR}dists/rex/contrib/binary/Packages.gz
;;
bo)
index deb --sys debian-bo --mirror $AMIRROR --contents ${AMIRROR}dists/bo/main/Contents-i386.gz --packages ${AMIRROR}dists/bo/main/binary-i386/Packages.gz
# There's no Contents file for contrib and non-free
index deb --sys debian-bo --mirror $AMIRROR --packages ${AMIRROR}dists/bo/contrib/binary/Packages.gz
index deb --sys debian-bo --mirror $AMIRROR --packages ${AMIRROR}dists/bo/non-free/binary/Packages.gz
;;
hamm)
index_deb debian-hamm $AMIRROR hamm "main hamm contrib non-free"
;;
slink)
index_deb debian-slink $AMIRROR slink "main contrib non-free"
;;
potato)
index_deb debian-potato $AMIRROR potato "main contrib non-free"
;;
woody)
index_deb debian-woody $AMIRROR woody "main contrib non-free"
;;
sarge)
index_deb debian-sarge $AMIRROR sarge "main contrib non-free"
;;
etch)
index_deb debian-etch $AMIRROR etch "main contrib non-free"
;;
lenny)
index_deb debian-lenny $AMIRROR lenny "main contrib non-free"
;;
squeeze)
index_deb debian-squeeze $AMIRROR squeeze "main contrib non-free"
index_deb debian-squeeze $AMIRROR squeeze-lts "main contrib non-free" cmp
;;
wheezy)
index_deb debian-wheezy $CMIRROR wheezy "main contrib non-free"
index_deb debian-wheezy $CMIRROR wheezy-updates "main contrib non-free" cmp
;;
jessie)
index_deb debian-jessie $CMIRROR jessie "main contrib non-free" cmp
index_deb debian-jessie $CMIRROR jessie-updates "main contrib non-free" cmp
;;
old)
$0 buzz
$0 rex
$0 bo
$0 hamm
$0 slink
$0 potato
$0 woody
$0 sarge
$0 etch
$0 lenny
$0 squeeze
;;
active)
$0 wheezy
$0 jessie
;;
all)
$0 old
$0 active
esac

View file

@ -1,22 +0,0 @@
if test -f .config; then
source .config
fi
INDEX="./indexer -vv"
set -x
arch() {
local MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux
local REPOS="core extra community"
for REPO in $REPOS; do
$INDEX arch --sys arch --mirror $MIRROR --repo $REPO
done
}
daily() {
arch
}
$@

View file

@ -339,120 +339,5 @@ ubuntu() {
} }
debian_buzz() {
# Contrib uses a rather non-standard arch directory ("binary" and "binary-all"), so let's stick with main for now.
syncrepo 18 "http://archive.debian.org/debian/" "buzz" "main" "dists/buzz/main/Contents.gz"
}
debian_rex() {
# (Same note on contrib)
syncrepo 19 "http://archive.debian.org/debian/" "rex" "main" "dists/rex/main/Contents.gz"
}
debian_bo() {
# Contrib and non-free don't have a Contents file :(
syncrepo 20 "http://archive.debian.org/debian/" "bo" "main" "dists/bo/main/Contents-i386.gz"
}
debian_hamm() {
syncrepo 21 "http://archive.debian.org/debian/" "hamm" "main hamm contrib non-free"
}
debian_slink() {
syncrepo 22 "http://archive.debian.org/debian/" "slink" "main contrib non-free"
}
debian_potato() {
syncrepo 23 "http://archive.debian.org/debian/" "potato" "main contrib non-free"
}
debian_woody() {
syncrepo 24 "http://archive.debian.org/debian/" "woody" "main contrib non-free"
}
debian_sarge() {
syncrepo 25 "http://archive.debian.org/debian/" "sarge" "main contrib non-free"
}
debian_etch() {
syncrepo 26 "http://archive.debian.org/debian/" "etch" "main contrib non-free"
}
debian_lenny() {
syncrepo 27 "http://archive.debian.org/debian/" "lenny" "main contrib non-free"
}
debian_squeeze() {
syncrepo 28 "http://ftp.nl.debian.org/debian/" "squeeze" "main contrib non-free"
syncrepo 28 "http://ftp.nl.debian.org/debian/" "squeeze-updates" "main contrib non-free"
}
debian_wheezy() {
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy" "main contrib non-free"
# The Contents-* files have moved...
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy-updates" "main" "dists/wheezy-updates/main/Contents-i386.gz"
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy-updates" "contrib" "dists/wheezy-updates/contrib/Contents-i386.gz"
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy-updates" "non-free" "dists/wheezy-updates/non-free/Contents-i386.gz"
}
debian_jessie() {
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie" "main" "dists/jessie/main/Contents-i386.gz"
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie" "contrib" "dists/jessie/contrib/Contents-i386.gz"
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie" "non-free" "dists/jessie/non-free/Contents-i386.gz"
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie-updates" "main" "dists/jessie-updates/main/Contents-i386.gz"
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie-updates" "contrib" "dists/jessie-updates/contrib/Contents-i386.gz"
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie-updates" "non-free" "dists/jessie-updates/non-free/Contents-i386.gz"
}
debian_old() {
debian_buzz
debian_rex
debian_bo
debian_hamm
debian_slink
debian_potato
debian_woody
debian_sarge
debian_etch
debian_lenny
debian_squeeze
}
debian_active() {
debian_wheezy
debian_jessie
}
debian() {
debian_old
debian_active
}
# Fetch older packages from snapshot.debian.org
debian_snapshot_month() {
YEAR=$1
MONTH=$2
ROOT="http://snapshot.debian.org/archive/debian/"
DATES=`$CURL "$ROOT?year=$YEAR&month=$MONTH" | perl -lne 'm|<a href="([0-9]{8}T[0-9]{6}Z)/"| && print $1'`
PREVDATE="00000000"
for DATE in $DATES; do
CURDATE=`echo $DATE | head -c8`
[ "$CURDATE" = "$PREVDATE" ] && continue
PREVDATE=$CURDATE
[ $DATE \< "20070104" ] && syncrepo 24 "$ROOT$DATE/" "woody" "main contrib non-free"
[ \( $DATE \> "20050607" \) -a \( $DATE \< "20081028" \) ] && syncrepo 25 "$ROOT$DATE/" "sarge" "main contrib non-free"
[ \( $DATE \> "20070409" \) -a \( $DATE \< "20100620" \) ] && syncrepo 26 "$ROOT$DATE/" "etch" "main contrib non-free"
[ \( $DATE \> "20090218" \) -a \( $DATE \< "20120326" \) ] && syncrepo 27 "$ROOT$DATE/" "lenny" "main contrib non-free"
if [ $DATE \> "20110206" ]; then
syncrepo 28 "$ROOT$DATE/" "squeeze" "main contrib non-free"
syncrepo 28 "$ROOT$DATE/" "squeeze-updates" "main contrib non-free"
fi
done
}
"$@" "$@"

View file

@ -245,10 +245,9 @@ sub about {
Historical releases were fetched from <a Historical releases were fetched from <a
href="http://archive.debian.org/debian/">http://archive.debian.org/debian/</a> href="http://archive.debian.org/debian/">http://archive.debian.org/debian/</a>
and <a href="http://snapshot.debian.org/">http://snapshot.debian.org/</a>. and <a href="http://snapshot.debian.org/">http://snapshot.debian.org/</a>.
For buzz, rex and bo, only the 'main' component has been indexed, and For buzz, rex and bo, we're missing a few man pages because some packages
we're missing a few man pages because some packages were missing from the were missing from the repository archives. Where available, all components
repository archives. For the other releases, all components (main, contrib (main, contrib and non-free) from the $release and $release-updates
and non-free) from the $release and $release-updates (where available)
repositories are indexed.</dd> repositories are indexed.</dd>
<dt>FreeBSD</dt><dd> <dt>FreeBSD</dt><dd>
Historical releases were fetched from <a Historical releases were fetched from <a
@ -267,8 +266,8 @@ sub about {
href="http://old-releases.ubuntu.com/ubuntu/">http://old-releases.ubuntu.com/ubuntu/</a>, href="http://old-releases.ubuntu.com/ubuntu/">http://old-releases.ubuntu.com/ubuntu/</a>,
supported releases from a local mirror. All components (main, universe, supported releases from a local mirror. All components (main, universe,
restricted and multiverse) from the $release, $release-updates and restricted and multiverse) from the $release, $release-updates and
$release-security repositories are indexed. Backports are not included at $release-security repositories are indexed. Indexing started around mid
the moment. Indexing started around mid June 2012.</dd> June 2012.</dd>
</dl> </dl>
Only packages for a single architecture (i386 or amd64) are scanned. To my Only packages for a single architecture (i386 or amd64) are scanned. To my
knowledge, packages that come with different manuals for different knowledge, packages that come with different manuals for different