Reorganize indexing scripts + use Rust for Debian
This commit is contained in:
parent
5d44d0e2ec
commit
2ee2f7495b
7 changed files with 142 additions and 148 deletions
13
util/arch.sh
Executable file
13
util/arch.sh
Executable file
|
|
@ -0,0 +1,13 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
. ./common.sh
|
||||||
|
|
||||||
|
case "$1" in
|
||||||
|
active)
|
||||||
|
MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux
|
||||||
|
REPOS="core extra community"
|
||||||
|
for REPO in $REPOS; do
|
||||||
|
index arch --sys arch --mirror $MIRROR --repo $REPO
|
||||||
|
done
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
@ -1,8 +1,50 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
CURL="curl -fSs -A manual-page-crawler,info@manned.org --limit-rate 500k"
|
if test -f .config; then
|
||||||
|
source .config
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
index() {
|
||||||
|
echo "====> indexer -vv $@"
|
||||||
|
./indexer -vv --dryrun $@ 2>&1
|
||||||
|
echo
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Convenient wrapper around index() for debian repos
|
||||||
|
# TODO: Use x86_64 for new releases
|
||||||
|
# Usage: index_dev sys mirror distro list-of-components [contents]
|
||||||
|
# contents:
|
||||||
|
# empty for global Contents-i386.gz location
|
||||||
|
# "cmp" for per-component Contents.i386.gz location
|
||||||
|
# Otherwise, full path to Contents file
|
||||||
|
index_deb() {
|
||||||
|
local SYS=$1
|
||||||
|
local MIRROR=$2
|
||||||
|
local DISTRO=$3
|
||||||
|
local COMPONENTS=$4
|
||||||
|
local CONTENTS=${5:-"dists/$DISTRO/Contents-i386.gz"}
|
||||||
|
|
||||||
|
|
||||||
|
for CMP in $COMPONENTS; do
|
||||||
|
local CONT=$CONTENTS
|
||||||
|
test $CONT = cmp && CONT="dists/$DISTRO/$CMP/Contents-i386.gz"
|
||||||
|
index deb --sys "$SYS" --mirror "$MIRROR" --contents "$MIRROR$CONT" --packages "${MIRROR}dists/$DISTRO/$CMP/binary-i386/Packages.gz"
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
PSQL="psql -U manned -Awtq"
|
PSQL="psql -U manned -Awtq"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## THE STUFF BELOW IS OLD
|
||||||
|
# To be replaced with calls to index()
|
||||||
|
|
||||||
|
CURL="curl -fSs -A manual-page-crawler,info@manned.org --limit-rate 500k"
|
||||||
|
|
||||||
TMP=`mktemp -d manned.XXXXXX`
|
TMP=`mktemp -d manned.XXXXXX`
|
||||||
|
|
||||||
# bash-ism, remove the working directory when we're done.
|
# bash-ism, remove the working directory when we're done.
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,8 @@
|
||||||
|
|
||||||
. ./common.sh
|
. ./common.sh
|
||||||
|
|
||||||
./index.sh daily
|
./arch.sh active
|
||||||
./deb.sh ubuntu_active
|
./debian.sh active
|
||||||
./deb.sh debian_active
|
|
||||||
echo "============ Updating SQL indices"
|
echo "============ Updating SQL indices"
|
||||||
$PSQL -f update_indices.sql
|
$PSQL -f update_indices.sql
|
||||||
|
|
||||||
|
|
|
||||||
78
util/debian.sh
Executable file
78
util/debian.sh
Executable file
|
|
@ -0,0 +1,78 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
. ./common.sh
|
||||||
|
|
||||||
|
AMIRROR=http://archive.debian.org/debian/
|
||||||
|
CMIRROR=http://ftp.nl.debian.org/debian/
|
||||||
|
|
||||||
|
# XXX: buzz and rex have some deb-old formatted packages, the indexer doesn't support these.
|
||||||
|
|
||||||
|
case "$1" in
|
||||||
|
buzz)
|
||||||
|
index deb --sys debian-buzz --mirror $AMIRROR --contents ${AMIRROR}dists/buzz/main/Contents.gz --packages ${AMIRROR}dists/buzz/main/binary-i386/Packages.gz
|
||||||
|
index deb --sys debian-buzz --mirror $AMIRROR --contents ${AMIRROR}dists/buzz/contrib/Contents.gz --packages ${AMIRROR}dists/buzz/contrib/binary/Packages.gz
|
||||||
|
;;
|
||||||
|
rex)
|
||||||
|
index deb --sys debian-rex --mirror $AMIRROR --contents ${AMIRROR}dists/rex/main/Contents.gz --packages ${AMIRROR}dists/rex/main/binary-i386/Packages.gz
|
||||||
|
index deb --sys debian-rex --mirror $AMIRROR --contents ${AMIRROR}dists/rex/contrib/Contents.gz --packages ${AMIRROR}dists/rex/contrib/binary/Packages.gz
|
||||||
|
;;
|
||||||
|
bo)
|
||||||
|
index deb --sys debian-bo --mirror $AMIRROR --contents ${AMIRROR}dists/bo/main/Contents-i386.gz --packages ${AMIRROR}dists/bo/main/binary-i386/Packages.gz
|
||||||
|
# There's no Contents file for contrib and non-free
|
||||||
|
index deb --sys debian-bo --mirror $AMIRROR --packages ${AMIRROR}dists/bo/contrib/binary/Packages.gz
|
||||||
|
index deb --sys debian-bo --mirror $AMIRROR --packages ${AMIRROR}dists/bo/non-free/binary/Packages.gz
|
||||||
|
;;
|
||||||
|
hamm)
|
||||||
|
index_deb debian-hamm $AMIRROR hamm "main hamm contrib non-free"
|
||||||
|
;;
|
||||||
|
slink)
|
||||||
|
index_deb debian-slink $AMIRROR slink "main contrib non-free"
|
||||||
|
;;
|
||||||
|
potato)
|
||||||
|
index_deb debian-potato $AMIRROR potato "main contrib non-free"
|
||||||
|
;;
|
||||||
|
woody)
|
||||||
|
index_deb debian-woody $AMIRROR woody "main contrib non-free"
|
||||||
|
;;
|
||||||
|
sarge)
|
||||||
|
index_deb debian-sarge $AMIRROR sarge "main contrib non-free"
|
||||||
|
;;
|
||||||
|
etch)
|
||||||
|
index_deb debian-etch $AMIRROR etch "main contrib non-free"
|
||||||
|
;;
|
||||||
|
lenny)
|
||||||
|
index_deb debian-lenny $AMIRROR lenny "main contrib non-free"
|
||||||
|
;;
|
||||||
|
squeeze)
|
||||||
|
index_deb debian-squeeze $AMIRROR squeeze "main contrib non-free"
|
||||||
|
index_deb debian-squeeze $AMIRROR squeeze-lts "main contrib non-free" cmp
|
||||||
|
;;
|
||||||
|
wheezy)
|
||||||
|
index_deb debian-wheezy $CMIRROR wheezy "main contrib non-free"
|
||||||
|
index_deb debian-wheezy $CMIRROR wheezy-updates "main contrib non-free" cmp
|
||||||
|
;;
|
||||||
|
jessie)
|
||||||
|
index_deb debian-jessie $CMIRROR jessie "main contrib non-free" cmp
|
||||||
|
index_deb debian-jessie $CMIRROR jessie-updates "main contrib non-free" cmp
|
||||||
|
;;
|
||||||
|
old)
|
||||||
|
$0 buzz
|
||||||
|
$0 rex
|
||||||
|
$0 bo
|
||||||
|
$0 hamm
|
||||||
|
$0 slink
|
||||||
|
$0 potato
|
||||||
|
$0 woody
|
||||||
|
$0 sarge
|
||||||
|
$0 etch
|
||||||
|
$0 lenny
|
||||||
|
$0 squeeze
|
||||||
|
;;
|
||||||
|
active)
|
||||||
|
$0 wheezy
|
||||||
|
$0 jessie
|
||||||
|
;;
|
||||||
|
all)
|
||||||
|
$0 old
|
||||||
|
$0 active
|
||||||
|
esac
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
if test -f .config; then
|
|
||||||
source .config
|
|
||||||
fi
|
|
||||||
|
|
||||||
INDEX="./indexer -vv"
|
|
||||||
|
|
||||||
set -x
|
|
||||||
|
|
||||||
arch() {
|
|
||||||
local MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux
|
|
||||||
local REPOS="core extra community"
|
|
||||||
for REPO in $REPOS; do
|
|
||||||
$INDEX arch --sys arch --mirror $MIRROR --repo $REPO
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
daily() {
|
|
||||||
arch
|
|
||||||
}
|
|
||||||
|
|
||||||
$@
|
|
||||||
|
|
@ -339,120 +339,5 @@ ubuntu() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
debian_buzz() {
|
|
||||||
# Contrib uses a rather non-standard arch directory ("binary" and "binary-all"), so let's stick with main for now.
|
|
||||||
syncrepo 18 "http://archive.debian.org/debian/" "buzz" "main" "dists/buzz/main/Contents.gz"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_rex() {
|
|
||||||
# (Same note on contrib)
|
|
||||||
syncrepo 19 "http://archive.debian.org/debian/" "rex" "main" "dists/rex/main/Contents.gz"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_bo() {
|
|
||||||
# Contrib and non-free don't have a Contents file :(
|
|
||||||
syncrepo 20 "http://archive.debian.org/debian/" "bo" "main" "dists/bo/main/Contents-i386.gz"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_hamm() {
|
|
||||||
syncrepo 21 "http://archive.debian.org/debian/" "hamm" "main hamm contrib non-free"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_slink() {
|
|
||||||
syncrepo 22 "http://archive.debian.org/debian/" "slink" "main contrib non-free"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_potato() {
|
|
||||||
syncrepo 23 "http://archive.debian.org/debian/" "potato" "main contrib non-free"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_woody() {
|
|
||||||
syncrepo 24 "http://archive.debian.org/debian/" "woody" "main contrib non-free"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_sarge() {
|
|
||||||
syncrepo 25 "http://archive.debian.org/debian/" "sarge" "main contrib non-free"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_etch() {
|
|
||||||
syncrepo 26 "http://archive.debian.org/debian/" "etch" "main contrib non-free"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_lenny() {
|
|
||||||
syncrepo 27 "http://archive.debian.org/debian/" "lenny" "main contrib non-free"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_squeeze() {
|
|
||||||
syncrepo 28 "http://ftp.nl.debian.org/debian/" "squeeze" "main contrib non-free"
|
|
||||||
syncrepo 28 "http://ftp.nl.debian.org/debian/" "squeeze-updates" "main contrib non-free"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_wheezy() {
|
|
||||||
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy" "main contrib non-free"
|
|
||||||
# The Contents-* files have moved...
|
|
||||||
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy-updates" "main" "dists/wheezy-updates/main/Contents-i386.gz"
|
|
||||||
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy-updates" "contrib" "dists/wheezy-updates/contrib/Contents-i386.gz"
|
|
||||||
syncrepo 83 "http://ftp.nl.debian.org/debian/" "wheezy-updates" "non-free" "dists/wheezy-updates/non-free/Contents-i386.gz"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_jessie() {
|
|
||||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie" "main" "dists/jessie/main/Contents-i386.gz"
|
|
||||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie" "contrib" "dists/jessie/contrib/Contents-i386.gz"
|
|
||||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie" "non-free" "dists/jessie/non-free/Contents-i386.gz"
|
|
||||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie-updates" "main" "dists/jessie-updates/main/Contents-i386.gz"
|
|
||||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie-updates" "contrib" "dists/jessie-updates/contrib/Contents-i386.gz"
|
|
||||||
syncrepo 91 "http://ftp.nl.debian.org/debian/" "jessie-updates" "non-free" "dists/jessie-updates/non-free/Contents-i386.gz"
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_old() {
|
|
||||||
debian_buzz
|
|
||||||
debian_rex
|
|
||||||
debian_bo
|
|
||||||
debian_hamm
|
|
||||||
debian_slink
|
|
||||||
debian_potato
|
|
||||||
debian_woody
|
|
||||||
debian_sarge
|
|
||||||
debian_etch
|
|
||||||
debian_lenny
|
|
||||||
debian_squeeze
|
|
||||||
}
|
|
||||||
|
|
||||||
debian_active() {
|
|
||||||
debian_wheezy
|
|
||||||
debian_jessie
|
|
||||||
}
|
|
||||||
|
|
||||||
debian() {
|
|
||||||
debian_old
|
|
||||||
debian_active
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Fetch older packages from snapshot.debian.org
|
|
||||||
|
|
||||||
debian_snapshot_month() {
|
|
||||||
YEAR=$1
|
|
||||||
MONTH=$2
|
|
||||||
ROOT="http://snapshot.debian.org/archive/debian/"
|
|
||||||
DATES=`$CURL "$ROOT?year=$YEAR&month=$MONTH" | perl -lne 'm|<a href="([0-9]{8}T[0-9]{6}Z)/"| && print $1'`
|
|
||||||
PREVDATE="00000000"
|
|
||||||
for DATE in $DATES; do
|
|
||||||
CURDATE=`echo $DATE | head -c8`
|
|
||||||
[ "$CURDATE" = "$PREVDATE" ] && continue
|
|
||||||
PREVDATE=$CURDATE
|
|
||||||
[ $DATE \< "20070104" ] && syncrepo 24 "$ROOT$DATE/" "woody" "main contrib non-free"
|
|
||||||
[ \( $DATE \> "20050607" \) -a \( $DATE \< "20081028" \) ] && syncrepo 25 "$ROOT$DATE/" "sarge" "main contrib non-free"
|
|
||||||
[ \( $DATE \> "20070409" \) -a \( $DATE \< "20100620" \) ] && syncrepo 26 "$ROOT$DATE/" "etch" "main contrib non-free"
|
|
||||||
[ \( $DATE \> "20090218" \) -a \( $DATE \< "20120326" \) ] && syncrepo 27 "$ROOT$DATE/" "lenny" "main contrib non-free"
|
|
||||||
if [ $DATE \> "20110206" ]; then
|
|
||||||
syncrepo 28 "$ROOT$DATE/" "squeeze" "main contrib non-free"
|
|
||||||
syncrepo 28 "$ROOT$DATE/" "squeeze-updates" "main contrib non-free"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
"$@"
|
"$@"
|
||||||
|
|
||||||
11
www/index.pl
11
www/index.pl
|
|
@ -245,10 +245,9 @@ sub about {
|
||||||
Historical releases were fetched from <a
|
Historical releases were fetched from <a
|
||||||
href="http://archive.debian.org/debian/">http://archive.debian.org/debian/</a>
|
href="http://archive.debian.org/debian/">http://archive.debian.org/debian/</a>
|
||||||
and <a href="http://snapshot.debian.org/">http://snapshot.debian.org/</a>.
|
and <a href="http://snapshot.debian.org/">http://snapshot.debian.org/</a>.
|
||||||
For buzz, rex and bo, only the 'main' component has been indexed, and
|
For buzz, rex and bo, we're missing a few man pages because some packages
|
||||||
we're missing a few man pages because some packages were missing from the
|
were missing from the repository archives. Where available, all components
|
||||||
repository archives. For the other releases, all components (main, contrib
|
(main, contrib and non-free) from the $release and $release-updates
|
||||||
and non-free) from the $release and $release-updates (where available)
|
|
||||||
repositories are indexed.</dd>
|
repositories are indexed.</dd>
|
||||||
<dt>FreeBSD</dt><dd>
|
<dt>FreeBSD</dt><dd>
|
||||||
Historical releases were fetched from <a
|
Historical releases were fetched from <a
|
||||||
|
|
@ -267,8 +266,8 @@ sub about {
|
||||||
href="http://old-releases.ubuntu.com/ubuntu/">http://old-releases.ubuntu.com/ubuntu/</a>,
|
href="http://old-releases.ubuntu.com/ubuntu/">http://old-releases.ubuntu.com/ubuntu/</a>,
|
||||||
supported releases from a local mirror. All components (main, universe,
|
supported releases from a local mirror. All components (main, universe,
|
||||||
restricted and multiverse) from the $release, $release-updates and
|
restricted and multiverse) from the $release, $release-updates and
|
||||||
$release-security repositories are indexed. Backports are not included at
|
$release-security repositories are indexed. Indexing started around mid
|
||||||
the moment. Indexing started around mid June 2012.</dd>
|
June 2012.</dd>
|
||||||
</dl>
|
</dl>
|
||||||
Only packages for a single architecture (i386 or amd64) are scanned. To my
|
Only packages for a single architecture (i386 or amd64) are scanned. To my
|
||||||
knowledge, packages that come with different manuals for different
|
knowledge, packages that come with different manuals for different
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue