Reorganize indexing scripts + use Rust for Debian
This commit is contained in:
parent
5d44d0e2ec
commit
2ee2f7495b
7 changed files with 142 additions and 148 deletions
13
util/arch.sh
Executable file
13
util/arch.sh
Executable file
|
|
@ -0,0 +1,13 @@
|
|||
#!/bin/sh
|
||||
|
||||
. ./common.sh
|
||||
|
||||
case "$1" in
|
||||
active)
|
||||
MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux
|
||||
REPOS="core extra community"
|
||||
for REPO in $REPOS; do
|
||||
index arch --sys arch --mirror $MIRROR --repo $REPO
|
||||
done
|
||||
;;
|
||||
esac
|
||||
|
|
@ -1,8 +1,50 @@
|
|||
#!/bin/bash
|
||||
|
||||
CURL="curl -fSs -A manual-page-crawler,info@manned.org --limit-rate 500k"
|
||||
if test -f .config; then
|
||||
source .config
|
||||
fi
|
||||
|
||||
|
||||
index() {
|
||||
echo "====> indexer -vv $@"
|
||||
./indexer -vv --dryrun $@ 2>&1
|
||||
echo
|
||||
}
|
||||
|
||||
|
||||
# Convenient wrapper around index() for debian repos
|
||||
# TODO: Use x86_64 for new releases
|
||||
# Usage: index_dev sys mirror distro list-of-components [contents]
|
||||
# contents:
|
||||
# empty for global Contents-i386.gz location
|
||||
# "cmp" for per-component Contents.i386.gz location
|
||||
# Otherwise, full path to Contents file
|
||||
index_deb() {
|
||||
local SYS=$1
|
||||
local MIRROR=$2
|
||||
local DISTRO=$3
|
||||
local COMPONENTS=$4
|
||||
local CONTENTS=${5:-"dists/$DISTRO/Contents-i386.gz"}
|
||||
|
||||
|
||||
for CMP in $COMPONENTS; do
|
||||
local CONT=$CONTENTS
|
||||
test $CONT = cmp && CONT="dists/$DISTRO/$CMP/Contents-i386.gz"
|
||||
index deb --sys "$SYS" --mirror "$MIRROR" --contents "$MIRROR$CONT" --packages "${MIRROR}dists/$DISTRO/$CMP/binary-i386/Packages.gz"
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
PSQL="psql -U manned -Awtq"
|
||||
|
||||
|
||||
|
||||
|
||||
## THE STUFF BELOW IS OLD
|
||||
# To be replaced with calls to index()
|
||||
|
||||
CURL="curl -fSs -A manual-page-crawler,info@manned.org --limit-rate 500k"
|
||||
|
||||
TMP=`mktemp -d manned.XXXXXX`
|
||||
|
||||
# bash-ism, remove the working directory when we're done.
|
||||
|
|
|
|||
|
|
@ -2,9 +2,8 @@
|
|||
|
||||
. ./common.sh
|
||||
|
||||
./index.sh daily
|
||||
./deb.sh ubuntu_active
|
||||
./deb.sh debian_active
|
||||
./arch.sh active
|
||||
./debian.sh active
|
||||
|
||||
echo "============ Updating SQL indices"
|
||||
$PSQL -f update_indices.sql
|
||||
|
||||
|
|
|
|||
78
util/debian.sh
Executable file
78
util/debian.sh
Executable file
|
|
@ -0,0 +1,78 @@
|
|||
#!/bin/sh
|
||||
|
||||
. ./common.sh
|
||||
|
||||
AMIRROR=http://archive.debian.org/debian/
|
||||
CMIRROR=http://ftp.nl.debian.org/debian/
|
||||
|
||||
# XXX: buzz and rex have some deb-old formatted packages, the indexer doesn't support these.
|
||||
|
||||
case "$1" in
|
||||
buzz)
|
||||
index deb --sys debian-buzz --mirror $AMIRROR --contents ${AMIRROR}dists/buzz/main/Contents.gz --packages ${AMIRROR}dists/buzz/main/binary-i386/Packages.gz
|
||||
index deb --sys debian-buzz --mirror $AMIRROR --contents ${AMIRROR}dists/buzz/contrib/Contents.gz --packages ${AMIRROR}dists/buzz/contrib/binary/Packages.gz
|
||||
;;
|
||||
rex)
|
||||
index deb --sys debian-rex --mirror $AMIRROR --contents ${AMIRROR}dists/rex/main/Contents.gz --packages ${AMIRROR}dists/rex/main/binary-i386/Packages.gz
|
||||
index deb --sys debian-rex --mirror $AMIRROR --contents ${AMIRROR}dists/rex/contrib/Contents.gz --packages ${AMIRROR}dists/rex/contrib/binary/Packages.gz
|
||||
;;
|
||||
bo)
|
||||
index deb --sys debian-bo --mirror $AMIRROR --contents ${AMIRROR}dists/bo/main/Contents-i386.gz --packages ${AMIRROR}dists/bo/main/binary-i386/Packages.gz
|
||||
# There's no Contents file for contrib and non-free
|
||||
index deb --sys debian-bo --mirror $AMIRROR --packages ${AMIRROR}dists/bo/contrib/binary/Packages.gz
|
||||
index deb --sys debian-bo --mirror $AMIRROR --packages ${AMIRROR}dists/bo/non-free/binary/Packages.gz
|
||||
;;
|
||||
hamm)
|
||||
index_deb debian-hamm $AMIRROR hamm "main hamm contrib non-free"
|
||||
;;
|
||||
slink)
|
||||
index_deb debian-slink $AMIRROR slink "main contrib non-free"
|
||||
;;
|
||||
potato)
|
||||
index_deb debian-potato $AMIRROR potato "main contrib non-free"
|
||||
;;
|
||||
woody)
|
||||
index_deb debian-woody $AMIRROR woody "main contrib non-free"
|
||||
;;
|
||||
sarge)
|
||||
index_deb debian-sarge $AMIRROR sarge "main contrib non-free"
|
||||
;;
|
||||
etch)
|
||||
index_deb debian-etch $AMIRROR etch "main contrib non-free"
|
||||
;;
|
||||
lenny)
|
||||
index_deb debian-lenny $AMIRROR lenny "main contrib non-free"
|
||||
;;
|
||||
squeeze)
|
||||
index_deb debian-squeeze $AMIRROR squeeze "main contrib non-free"
|
||||
index_deb debian-squeeze $AMIRROR squeeze-lts "main contrib non-free" cmp
|
||||
;;
|
||||
wheezy)
|
||||
index_deb debian-wheezy $CMIRROR wheezy "main contrib non-free"
|
||||
index_deb debian-wheezy $CMIRROR wheezy-updates "main contrib non-free" cmp
|
||||
;;
|
||||
jessie)
|
||||
index_deb debian-jessie $CMIRROR jessie "main contrib non-free" cmp
|
||||
index_deb debian-jessie $CMIRROR jessie-updates "main contrib non-free" cmp
|
||||
;;
|
||||
old)
|
||||
$0 buzz
|
||||
$0 rex
|
||||
$0 bo
|
||||
$0 hamm
|
||||
$0 slink
|
||||
$0 potato
|
||||
$0 woody
|
||||
$0 sarge
|
||||
$0 etch
|
||||
$0 lenny
|
||||
$0 squeeze
|
||||
;;
|
||||
active)
|
||||
$0 wheezy
|
||||
$0 jessie
|
||||
;;
|
||||
all)
|
||||
$0 old
|
||||
$0 active
|
||||
esac
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
if test -f .config; then
|
||||
source .config
|
||||
fi
|
||||
|
||||
INDEX="./indexer -vv"
|
||||
|
||||
set -x
|
||||
|
||||
arch() {
|
||||
local MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux
|
||||
local REPOS="core extra community"
|
||||
for REPO in $REPOS; do
|
||||
$INDEX arch --sys arch --mirror $MIRROR --repo $REPO
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
daily() {
|
||||
arch
|
||||
}
|
||||
|
||||
$@
|
||||
|
|
@ -339,120 +339,5 @@ ubuntu() {
|
|||
}
|
||||
|
||||
|
||||
debian_buzz() {
|
||||
# Contrib uses a rather non-standard arch directory ("binary" and "binary-all"), so let's stick with main for now.
|
||||
syncrepo 18 "http://archive.debian.org/debian/" "buzz" "main" "dists/buzz/main/Contents.gz"
|
||||
}
|
||||
|
||||
debian_rex() {
|
||||
# (Same note on contrib)
|
||||
syncrepo 19 "http://archive.debian.org/debian/" "rex" "main" "dists/rex/main/Contents.gz"
|
||||
}
|
||||
|
||||
debian_bo() {
|
||||
# Contrib and non-free don't have a Contents file :(
|
||||
syncrepo 20 "http://archive.debian.org/debian/" "bo" "main" "dists/bo/main/Contents-i386.gz"
|
||||
}
|
||||
|
||||
debian_hamm() {
|
||||
syncrepo 21 "http://archive.debian.org/debian/" "hamm" "main hamm contrib non-free"
|
||||
}
|
||||
|
||||
debian_slink() {
|
||||
syncrepo 22 "http://archive.debian.org/debian/" "slink" "main contrib non-free"
|
||||
}
|
||||
|
||||
debian_potato() {
|
||||
syncrepo 23 "http://archive.debian.org/debian/" "potato" "main contrib non-free"
|
||||
}
|
||||
|
||||
debian_woody() {
|
||||
syncrepo 24 "http://archive.debian.org/debian/" "woody" "main contrib non-free"
|
||||
}
|
||||
|
||||
debian_sarge() {
|
||||
syncrepo 25 "http://archive.debian.org/debian/" "sarge" "main contrib non-free"
|
||||
}
|
||||
|
||||
debian_etch() {
|
||||
syncrepo 26 "http://archive.debian.org/debian/" "etch" "main contrib non-free"
|
||||
}
|
||||
|
||||
debian_lenny() {
|
||||
syncrepo 27 "http://archive.debian.org/debian/" "lenny" "main contrib non-free"
|
||||
}
|
||||
|
||||
debian_squeeze() {
|
||||
syncrepo 28 "http://ftp.nl.debian.org/debian/" "squeeze" "main contrib non-free"
|
||||
syncrepo 28 "http://ftp.nl.debian.org/debian/" "squeeze-updates" "main contrib non-free"
|
||||
}
|
||||
|
||||
debian_wheezy() {
|
||||
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy" "main contrib non-free"
|
||||
# The Contents-* files have moved...
|
||||
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy-updates" "main" "dists/wheezy-updates/main/Contents-i386.gz"
|
||||
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy-updates" "contrib" "dists/wheezy-updates/contrib/Contents-i386.gz"
|
||||
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy-updates" "non-free" "dists/wheezy-updates/non-free/Contents-i386.gz"
|
||||
}
|
||||
|
||||
debian_jessie() {
|
||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie" "main" "dists/jessie/main/Contents-i386.gz"
|
||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie" "contrib" "dists/jessie/contrib/Contents-i386.gz"
|
||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie" "non-free" "dists/jessie/non-free/Contents-i386.gz"
|
||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie-updates" "main" "dists/jessie-updates/main/Contents-i386.gz"
|
||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie-updates" "contrib" "dists/jessie-updates/contrib/Contents-i386.gz"
|
||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie-updates" "non-free" "dists/jessie-updates/non-free/Contents-i386.gz"
|
||||
}
|
||||
|
||||
debian_old() {
|
||||
debian_buzz
|
||||
debian_rex
|
||||
debian_bo
|
||||
debian_hamm
|
||||
debian_slink
|
||||
debian_potato
|
||||
debian_woody
|
||||
debian_sarge
|
||||
debian_etch
|
||||
debian_lenny
|
||||
debian_squeeze
|
||||
}
|
||||
|
||||
debian_active() {
|
||||
debian_wheezy
|
||||
debian_jessie
|
||||
}
|
||||
|
||||
debian() {
|
||||
debian_old
|
||||
debian_active
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Fetch older packages from snapshot.debian.org
|
||||
|
||||
debian_snapshot_month() {
|
||||
YEAR=$1
|
||||
MONTH=$2
|
||||
ROOT="http://snapshot.debian.org/archive/debian/"
|
||||
DATES=`$CURL "$ROOT?year=$YEAR&month=$MONTH" | perl -lne 'm|<a href="([0-9]{8}T[0-9]{6}Z)/"| && print $1'`
|
||||
PREVDATE="00000000"
|
||||
for DATE in $DATES; do
|
||||
CURDATE=`echo $DATE | head -c8`
|
||||
[ "$CURDATE" = "$PREVDATE" ] && continue
|
||||
PREVDATE=$CURDATE
|
||||
[ $DATE \< "20070104" ] && syncrepo 24 "$ROOT$DATE/" "woody" "main contrib non-free"
|
||||
[ \( $DATE \> "20050607" \) -a \( $DATE \< "20081028" \) ] && syncrepo 25 "$ROOT$DATE/" "sarge" "main contrib non-free"
|
||||
[ \( $DATE \> "20070409" \) -a \( $DATE \< "20100620" \) ] && syncrepo 26 "$ROOT$DATE/" "etch" "main contrib non-free"
|
||||
[ \( $DATE \> "20090218" \) -a \( $DATE \< "20120326" \) ] && syncrepo 27 "$ROOT$DATE/" "lenny" "main contrib non-free"
|
||||
if [ $DATE \> "20110206" ]; then
|
||||
syncrepo 28 "$ROOT$DATE/" "squeeze" "main contrib non-free"
|
||||
syncrepo 28 "$ROOT$DATE/" "squeeze-updates" "main contrib non-free"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
"$@"
|
||||
|
||||
11
www/index.pl
11
www/index.pl
|
|
@ -245,10 +245,9 @@ sub about {
|
|||
Historical releases were fetched from <a
|
||||
href="http://archive.debian.org/debian/">http://archive.debian.org/debian/</a>
|
||||
and <a href="http://snapshot.debian.org/">http://snapshot.debian.org/</a>.
|
||||
For buzz, rex and bo, only the 'main' component has been indexed, and
|
||||
we're missing a few man pages because some packages were missing from the
|
||||
repository archives. For the other releases, all components (main, contrib
|
||||
and non-free) from the $release and $release-updates (where available)
|
||||
For buzz, rex and bo, we're missing a few man pages because some packages
|
||||
were missing from the repository archives. Where available, all components
|
||||
(main, contrib and non-free) from the $release and $release-updates
|
||||
repositories are indexed.</dd>
|
||||
<dt>FreeBSD</dt><dd>
|
||||
Historical releases were fetched from <a
|
||||
|
|
@ -267,8 +266,8 @@ sub about {
|
|||
href="http://old-releases.ubuntu.com/ubuntu/">http://old-releases.ubuntu.com/ubuntu/</a>,
|
||||
supported releases from a local mirror. All components (main, universe,
|
||||
restricted and multiverse) from the $release, $release-updates and
|
||||
$release-security repositories are indexed. Backports are not included at
|
||||
the moment. Indexing started around mid June 2012.</dd>
|
||||
$release-security repositories are indexed. Indexing started around mid
|
||||
June 2012.</dd>
|
||||
</dl>
|
||||
Only packages for a single architecture (i386 or amd64) are scanned. To my
|
||||
knowledge, packages that come with different manuals for different
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue