Initial commit
This commit is contained in:
commit
c47f450934
11 changed files with 1271 additions and 0 deletions
159
util/add_dir.pl
Executable file
159
util/add_dir.pl
Executable file
|
|
@ -0,0 +1,159 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
# Usage: ./add_dir.pl <dir> <pkgid>
|
||||
# Prints the path names of the found man pages on stdout.
|
||||
# May throw errors or warnings on stderr.
|
||||
# Returns 0 if it has added something, 1 on error or if nothing has been found.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
no warnings 'once';
|
||||
use Encode 'decode', 'find_encoding', 'decode_utf8';
|
||||
use Digest::SHA 'sha1_hex';
|
||||
use File::Find;
|
||||
use DBI;
|
||||
|
||||
die "Not enough arguments\n" if @ARGV < 2;
|
||||
my($dir, $pkgid) = @ARGV;
|
||||
|
||||
|
||||
my $db = DBI->connect('dbi:Pg:dbname=manned', 'manned', '', {
|
||||
pg_enable_utf8 => 1, PrintError => 0, RaiseError => 1, AutoCommit => 0
|
||||
});
|
||||
|
||||
|
||||
sub readman {
|
||||
my $ofn = shift;
|
||||
local $/;
|
||||
open my $F, '<', $ofn or die "Unable to open '$ofn': $!\n";
|
||||
my $dat = <$F>;
|
||||
close $F;
|
||||
|
||||
# Note: Don't forget to update 'section_from_filename()' in SQL when a new
|
||||
# compression file extension is recognized.
|
||||
my $fn = $ofn;
|
||||
while(1) {
|
||||
if($fn =~ s/\.gz$//) {
|
||||
require Compress::Zlib;
|
||||
$dat = Compress::Zlib::memGunzip($dat);
|
||||
die "Error decompressing '$ofn': $Compress::Zlib::gzerrno\n" if !defined $dat;
|
||||
next;
|
||||
}
|
||||
if($fn =~ s/\.bz2$//) {
|
||||
# Don't try to use Compress::Bzip2::memBunzip() here. It's been terribly
|
||||
# broken for at least 3 years:
|
||||
# https://rt.cpan.org/Public/Bug/Display.html?id=48128
|
||||
require Compress::Raw::Bzip2;
|
||||
my($b, $s) = Compress::Raw::Bunzip2->new();
|
||||
my $r;
|
||||
die "Error decompressing '$ofn': Opening bzip2 decompressor: $s\n" if $s != Compress::Raw::Bzip2::BZ_OK();
|
||||
die "Error decompressing '$ofn': $s\n" if ($s = $b->bzinflate($dat, $r)) != Compress::Raw::Bzip2::BZ_STREAM_END();
|
||||
$dat = $r;
|
||||
next;
|
||||
}
|
||||
if($fn =~ s/\.lzma$//) {
|
||||
require Compress::Raw::Lzma;
|
||||
my($l, $s) = Compress::Raw::Lzma::AutoDecoder->new();
|
||||
my $r;
|
||||
die "Error decompressing '$ofn': Opening lzma decompressor: $s\n" if $s != Compress::Raw::Lzma::LZMA_OK();
|
||||
die "Error decompressing '$ofn': $s\n" if ($s = $l->code($dat, $r)) != Compress::Raw::Lzma::LZMA_STREAM_END();
|
||||
$dat = $r;
|
||||
next;
|
||||
}
|
||||
last;
|
||||
}
|
||||
|
||||
return $dat;
|
||||
}
|
||||
|
||||
|
||||
sub decodeman {
|
||||
my($data, $locale) = @_;
|
||||
|
||||
my @enc = ('utf-8'); # No harm in trying utf-8 first.
|
||||
|
||||
# Check for 'coding:' indications in the file header.
|
||||
# According to preconv.1, only the first two lines are checked. I've not seen
|
||||
# any man page where this coding information was on the second line, though.
|
||||
# Note that that man page also mentions some aliasses that Perl's
|
||||
# find_encoding doesn't have. Again, I've not found any man page using those.
|
||||
my $re = qr/[\.']?\\["#].+-\*-.*coding: *([^ ;]+).+-\*-/;
|
||||
if($data =~ /^$re/ || $data =~ /^.*\n$re/) {
|
||||
(my $c = $1) =~ s/-(?:dos|unix|mac)$//;
|
||||
$c = find_encoding $c;
|
||||
$c = $c->name if $c;
|
||||
push @enc, $c if $c && $c ne 'ascii' && $c ne 'utf8' && $c ne 'utf-8-strict';
|
||||
}
|
||||
|
||||
# Get encoding from the locale part of the path
|
||||
my $locenc = $locale && find_encoding $locale;
|
||||
unshift @enc, $locenc->name if $locenc;
|
||||
|
||||
# Some language-specific fallbacks
|
||||
# TODO: Handle zh_* locales
|
||||
$locale && push @enc,
|
||||
$locale =~ /^(pl|cs|sk)/i ? 'iso-8859-2'
|
||||
: $locale =~ /^tr/i ? 'iso-8859-9'
|
||||
: $locale =~ /^ru/i ? 'koi8-r' # TODO: Or iso-8859-5, probably want to autodetect that?
|
||||
: $locale =~ /^ja/i ? 'euc-jp' # TODO: Works for everything I've found yet, but Japanese isn't that simple. Probably want to detect Shift-JIS as well?
|
||||
: $locale =~ /^ko/i ? 'euc-kr'
|
||||
#: $locale =~ /^el/i ? 'iso-8859-7' # So far, all el mans I've seen were UTF-8.
|
||||
: ();
|
||||
|
||||
# If all else fails.
|
||||
push @enc, 'iso-8859-1';
|
||||
|
||||
# Now try decoding
|
||||
my($dec, $enc);
|
||||
for(@enc) {
|
||||
$enc = $_;
|
||||
$dec = eval { my $tmp = $data; decode($enc, $tmp, 1) };
|
||||
last if $dec;
|
||||
}
|
||||
|
||||
return $dec ? ($enc, $dec) : ();
|
||||
}
|
||||
|
||||
|
||||
sub addman {
|
||||
my($pkg, $path, $fn, $locale) = @_;
|
||||
my $dat = readman $fn;
|
||||
my $hash = sha1_hex $dat;
|
||||
|
||||
my($enc, $dec) = decodeman($dat, $locale);
|
||||
print "Invalid encoding or empty file: $path\n" and return if !$enc;
|
||||
|
||||
$db->do(q{INSERT INTO contents (hash, content) VALUES(decode(?, 'hex'),?)}, {}, $hash, $dec)
|
||||
if !$db->selectrow_arrayref(q{SELECT 1 FROM contents WHERE hash = decode(?, 'hex')}, {}, $hash);
|
||||
|
||||
$db->do(q{
|
||||
INSERT INTO man (package, name, section, filename, locale, hash)
|
||||
VALUES(?,name_from_filename(?),section_from_filename(?),?,?,decode(?, 'hex'))}, {},
|
||||
$pkg, $path, $path, $path, $locale, $hash);
|
||||
|
||||
printf "$path ($enc)\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
my $found = 0;
|
||||
|
||||
find sub {
|
||||
return if !-f $_;
|
||||
(my $path = $File::Find::name) =~ s/^\Q$dir\E//;
|
||||
# Note: fltk also creates pre-formatted pages in /cat$sectre/, but those are ignored.
|
||||
# TODO: Also ignore html and INDEX sections
|
||||
return warn "Ignoring $path\n" if $path !~ m{man(?:/([^/]+))?/man[0-9n]/([^/]+)$};
|
||||
addman $pkgid, $path, $2, $1;
|
||||
$found++;
|
||||
}, $dir;
|
||||
|
||||
|
||||
if($found) {
|
||||
$db->commit;
|
||||
} else {
|
||||
warn "No man pages found.\n";
|
||||
$db->rollback;
|
||||
exit 1;
|
||||
}
|
||||
|
||||
16
util/add_tar.sh
Executable file
16
util/add_tar.sh
Executable file
|
|
@ -0,0 +1,16 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Usage: add_tar.sh <file> <pkgid> <flags>
|
||||
# Requires a recent GNU tar for compression autodetect and xz support.
|
||||
|
||||
|
||||
TMP=`mktemp -d manned.XXXXXXX`
|
||||
|
||||
# TODO: tar throws an error if there are no man pages. This isn't really an error, though.
|
||||
tar --warning=no-unknown-keyword -C "$TMP" $3 -xf "$1" --wildcards '*/man/*'\
|
||||
&& ./add_dir.pl "$TMP" "$2"
|
||||
RET=$?
|
||||
|
||||
rm -rf "$TMP"
|
||||
exit $RET
|
||||
|
||||
95
util/arch.sh
Executable file
95
util/arch.sh
Executable file
|
|
@ -0,0 +1,95 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Usage: ./arch.sh
|
||||
# Synchronises the database with an Arch mirror, fetching any packages that
|
||||
# aren't yet in the database and may have man pages.
|
||||
|
||||
MIRROR=http://ftp.nluug.nl/pub/os/Linux/distr/archlinux
|
||||
REPOS="core extra community"
|
||||
DEBUG=false
|
||||
SYSID=1
|
||||
|
||||
CURL="curl -Ss"
|
||||
PSQL="psql -U manned -Awtq"
|
||||
TMP=`mktemp -d manned.arch.XXXXXX`
|
||||
|
||||
|
||||
# Returns 0 if the package is already in the database or if an error occured.
|
||||
# Otherwise adds the package, sets PKGID to the new ID, and returns 1.
|
||||
PKGID=
|
||||
add_pkginfo() { # cat name ver date
|
||||
RES=`echo "SELECT id FROM package WHERE system = :'sysid' AND name = :'name' AND version = :'ver'"\
|
||||
| $PSQL -v "sysid=$SYSID" -v "name=$2" -v "ver=$3"`
|
||||
[ "$?" -ne 0 -o -n "$RES" ] && return 0
|
||||
RES=`echo "INSERT INTO package (system, category, name, version, released) VALUES(:'sysid',:'cat',:'name',:'ver',:'rel') RETURNING id"\
|
||||
| $PSQL -v "sysid=$SYSID" -v "cat=$1" -v "name=$2" -v "ver=$3" -v "rel=$4"`
|
||||
[ "$?" -ne 0 ] && return 0
|
||||
PKGID=$RES
|
||||
return 1
|
||||
}
|
||||
|
||||
|
||||
checkpkg() {
|
||||
REPO=$1
|
||||
FN=$2
|
||||
D="$TMP/$REPO/$FN"
|
||||
if [ ! \( -d "$D" -a -f "$D/files" -a -f "$D/desc" \) ]; then
|
||||
echo "===> $FN"
|
||||
echo "Invalid item, ignoring"
|
||||
return
|
||||
fi
|
||||
grep -q /man/ "$D/files"
|
||||
if [ "$?" -ne 0 ]; then
|
||||
$DEBUG && echo "===> $FN"
|
||||
$DEBUG && echo "No mans"
|
||||
return
|
||||
fi
|
||||
|
||||
# Somewhat inefficient description parsing
|
||||
FILENAME=`grep -A 1 '%FILENAME%' "$D/desc" | tail -n 1`
|
||||
NAME=`grep -A 1 '%NAME%' "$D/desc" | tail -n 1`
|
||||
VERSION=`grep -A 1 '%VERSION%' "$D/desc" | tail -n 1`
|
||||
BUILDDATE=`grep -A 1 '%BUILDDATE%' "$D/desc" | tail -n 1`
|
||||
if [ -z "$FILENAME" -o -z "$NAME" -o -z "$VERSION" -o -z "$BUILDDATE" ]; then
|
||||
echo "===> $FN"
|
||||
echo "Invalid/missing description info"
|
||||
return
|
||||
fi
|
||||
BUILDDATE=`date -d "@$BUILDDATE" '+%F'`
|
||||
|
||||
add_pkginfo "$REPO" "$NAME" "$VERSION" "$BUILDDATE"
|
||||
if [ "$?" -eq 0 ]; then
|
||||
$DEBUG && echo "===> $FN"
|
||||
$DEBUG && echo "Already up-to-date"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "===> $FN"
|
||||
F="$TMP/$REPO/$FILENAME"
|
||||
$CURL "$MIRROR/$REPO/os/i686/$FILENAME" -o "$F" || return
|
||||
./add_tar.sh "$F" "$PKGID"
|
||||
rm -f "$F"
|
||||
}
|
||||
|
||||
|
||||
syncrepo() {
|
||||
REPO=$1
|
||||
F="$TMP/$REPO/repo.tar.gz"
|
||||
echo "============ $REPO"
|
||||
$CURL "$MIRROR/$REPO/os/i686/$REPO.files.tar.gz" -o "$F" || return 1
|
||||
tar -C "$TMP/$REPO" -xf "$F" || return 1
|
||||
rm -f "$F"
|
||||
for fn in "$TMP/$REPO"/*; do
|
||||
checkpkg "$REPO" `basename "$fn"`
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
for r in $REPOS; do
|
||||
mkdir "$TMP/$r"
|
||||
syncrepo $r
|
||||
rm -rf "$TMP/$r"
|
||||
done
|
||||
|
||||
rm -rf "$TMP"
|
||||
|
||||
121
util/deb.sh
Executable file
121
util/deb.sh
Executable file
|
|
@ -0,0 +1,121 @@
|
|||
#!/bin/bash
|
||||
|
||||
# A fetcher for debian-style repositories.
|
||||
|
||||
CURL="curl -Ss"
|
||||
PSQL="psql -U manned -Awtq"
|
||||
TMP=`mktemp -d manned.deb.XXXXXX`
|
||||
|
||||
|
||||
checkpkg() {
|
||||
SYSID=$1
|
||||
REPO=$2
|
||||
NAME=$3
|
||||
VERSION=$4
|
||||
SECTION=$5
|
||||
FILE=$6
|
||||
echo "===> $NAME-$VERSION"
|
||||
FN="$TMP/$NAME-$VERSION.deb"
|
||||
$CURL "$REPO/$FILE" -o "$FN" || return
|
||||
|
||||
# Get the date from the last modification time of the debian-binary file
|
||||
# inside the .deb. Preferably, the date we store in the database indicates
|
||||
# when the *source* package has been uploaded, but this will work fine as
|
||||
# an approximation, I guess.
|
||||
DATE=`date -d "\`ar tv \"$FN\" debian-binary | perl -lne 's/^[^ ]+ [^ ]+ +\d+ (.+) debian-binary$/print $1/e'\`" "+%F"`
|
||||
|
||||
# Insert package in the database
|
||||
PKGID=`echo "INSERT INTO package (system, category, name, version, released) VALUES(:'sysid',:'cat',:'name',:'ver',:'rel') RETURNING id"\
|
||||
| $PSQL -v "sysid=$SYSID" -v "cat=$SECTION" -v "name=$NAME" -v "ver=$VERSION" -v "rel=$DATE"`
|
||||
|
||||
# Extract and handle the man pages
|
||||
if [ "$?" -eq 0 -a -n "$PKGID" ]; then
|
||||
ar p "$FN" data.tar.gz | ./add_tar.sh - $PKGID -z
|
||||
fi
|
||||
|
||||
rm "$FN"
|
||||
}
|
||||
|
||||
|
||||
syncrepo() {
|
||||
SYSID=$1
|
||||
REPO=$2
|
||||
DISTRO=$3
|
||||
COMPONENTS=$4
|
||||
CONTENTSURL=${5:-"dists/$DISTRO/Contents-i386.gz"}
|
||||
echo "============ $REPO $DISTRO ($COMPONENTS)"
|
||||
|
||||
# Get Contents.gz and Packages
|
||||
CFN="$TMP/Contents"
|
||||
PFN="$TMP/Packages"
|
||||
printf "" >"$PFN"
|
||||
$CURL "$REPO/$CONTENTSURL" -o "$CFN.gz" || return 1
|
||||
gunzip "$CFN.gz"
|
||||
|
||||
for CMP in $COMPONENTS; do
|
||||
echo "MANDIFF-COMPONENT: $CMP" >>"$PFN"
|
||||
TFN="$TMP/Packages-$CMP.bz2"
|
||||
$CURL "$REPO/dists/$DISTRO/$CMP/binary-i386/Packages.bz2" -o "$TFN" || return 1
|
||||
bzcat "$TFN" >>"$PFN"
|
||||
rm "$TFN"
|
||||
done
|
||||
|
||||
# Parse the Contents and Packages files and check with the database to figure
|
||||
# out which packages we need to download.
|
||||
mkfifo "$TMP/fifo"
|
||||
perl -l - $CFN $PFN $SYSID <<'EOP' >"$TMP/fifo" &
|
||||
($cfn, $pfn, $sysid) = @ARGV;
|
||||
|
||||
use DBI;
|
||||
$db = DBI->connect('dbi:Pg:dbname=manned', 'manned', '', {RaiseError => 1});
|
||||
|
||||
open F, '<', $cfn or die $!;
|
||||
while(<F>) {
|
||||
chomp; @l=split/ +/;
|
||||
grep{ s{^.+/([^/]+)$}{$1}; $_ ne"-" and ($pkg{$_}=1) } split/,/, $l[1] if $l[0]=~/\/man\//
|
||||
}
|
||||
close F;
|
||||
|
||||
open F, '<', $pfn or die $!;
|
||||
while(<F>) {
|
||||
chomp;
|
||||
$p = $1 if /^Package: (.+)/;
|
||||
$v = $1 if /^Version: (.+)/;
|
||||
$s = $1 if /^Section: (.+)/;
|
||||
$f = $1 if /^Filename: (.+)/;
|
||||
if(!$_) {
|
||||
if($p && $v && $s && $f) {
|
||||
print "$p $v $s $f" if $pkg{$p} && $pkg{$p} == 1
|
||||
&& !$db->selectrow_arrayref(q{SELECT 1 FROM package WHERE system = ? AND name = ? AND version = ?}, {}, $sysid, $p, $v);
|
||||
warn "Duplicate package? $p\n" if $pkg{$p} && $pkg{$p} == 2;
|
||||
$pkg{$p} = 2;
|
||||
}
|
||||
$p=$v=$f=undef
|
||||
}
|
||||
}
|
||||
close F;
|
||||
EOP
|
||||
|
||||
while read l; do
|
||||
checkpkg $SYSID $REPO $l
|
||||
done <"$TMP/fifo"
|
||||
|
||||
rm -f "$TMP/fifo" "$CFN" "$PFN"
|
||||
}
|
||||
|
||||
# TODO: backports?
|
||||
|
||||
#syncrepo 2 "http://old-releases.ubuntu.com/ubuntu/" "warty" "main multiverse restricted universe"
|
||||
#syncrepo 2 "http://old-releases.ubuntu.com/ubuntu/" "warty-updates" "main multiverse restricted universe" "dists/warty/Contents-i386.gz"
|
||||
#syncrepo 2 "http://old-releases.ubuntu.com/ubuntu/" "warty-security" "main multiverse restricted universe" "dists/warty/Contents-i386.gz"
|
||||
|
||||
#syncrepo 3 "http://old-releases.ubuntu.com/ubuntu/" "hoary" "main multiverse restricted universe"
|
||||
#syncrepo 3 "http://old-releases.ubuntu.com/ubuntu/" "hoary-updates" "main multiverse restricted universe" "dists/hoary/Contents-i386.gz"
|
||||
#syncrepo 3 "http://old-releases.ubuntu.com/ubuntu/" "hoary-security" "main multiverse restricted universe" "dists/hoary/Contents-i386.gz"
|
||||
|
||||
#syncrepo 4 "http://old-releases.ubuntu.com/ubuntu/" "breezy" "main multiverse restricted universe"
|
||||
#syncrepo 4 "http://old-releases.ubuntu.com/ubuntu/" "breezy-updates" "main multiverse restricted universe" "dists/breezy/Contents-i386.gz"
|
||||
#syncrepo 4 "http://old-releases.ubuntu.com/ubuntu/" "breezy-security" "main multiverse restricted universe" "dists/breezy/Contents-i386.gz"
|
||||
|
||||
rm -rf "$TMP"
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue