With a complete reorganisation of the directory structure and most of the content converted to pandoc-flavoured markdown. Some TODO's left before this can go live: - Main page - Atom feeds - Bug tracker
166 lines
3.5 KiB
Perl
166 lines
3.5 KiB
Perl
#!/usr/bin/perl
|
|
|
|
# awshrink v0.1
|
|
# 2007-01-15
|
|
# Yoran Heling
|
|
|
|
# License: MIT
|
|
|
|
|
|
|
|
use strict;
|
|
use warnings;
|
|
use bytes;
|
|
|
|
|
|
|
|
# hash for easy lookup
|
|
my %shrinkable = ( map { $_ => 1 } qw|
|
|
DOMAIN LOGIN ROBOT WORMS EMAILSENDER EMAILRECEIVER
|
|
BROWSER UNKNOWNREFERER UNKOWNREFERERBROWSER SEREFERRALS
|
|
PAGEREFS SEARCHWORDS KEYWORDS SIDER_404 SIDER VISITOR
|
|
UNKNOWNREFERERBROWSER
|
|
|);
|
|
|
|
|
|
|
|
sub getstats { # read
|
|
my $fl = shift;
|
|
my @l;
|
|
seek($fl, 0, 0);
|
|
|
|
# get positions
|
|
while(<$fl>) {
|
|
last if /^END_MAP/;
|
|
next if !/^POS_([A-Z0-9_]+)\s+([0-9]+)\s+$/;
|
|
push(@l, [ $1, $2, 0, 0 ]);
|
|
}
|
|
|
|
# get sizes (bytes)
|
|
@l = sort { $a->[1] <=> $b->[1] } @l;
|
|
$l[$_][2] = ($l[$_+1] ? $l[$_+1][1] : -s $fl) - $l[$_][1]
|
|
for (0..$#l);
|
|
|
|
# get lines
|
|
for (0..$#l) {
|
|
seek $fl, $l[$_][1], 0;
|
|
<$fl> =~ /BEGIN_[A-Z0-9_]+\s+([0-9]+)/;
|
|
$l[$_][3] = $1;
|
|
}
|
|
|
|
print " Section Size (Bytes) Lines\n";
|
|
printf "%22s %12d %7d\n", $_->[0].($shrinkable{$_->[0]}?' ':'*'), $_->[2], $_->[3]
|
|
for (sort { $a->[2] <=> $b->[2] } @l);
|
|
print "* = not shrinkable\n";
|
|
}
|
|
|
|
|
|
|
|
sub fixmap { # read, write
|
|
my($fl, $FL) = @_;
|
|
|
|
my $map = '';
|
|
my @l;
|
|
seek $fl, 0, 0;
|
|
while(<$fl>) {
|
|
next if !/^BEGIN_([A-Z0-9_]+)/ || $1 eq 'MAP';
|
|
$map .= sprintf "POS_%s %-20d\n", $1, tell($fl)-length($_);
|
|
}
|
|
|
|
seek $fl, 0, 0;
|
|
seek $FL, 0, 0;
|
|
|
|
my $inmap = 0;
|
|
while(<$fl>) {
|
|
if(!$inmap && !/^BEGIN_MAP/) {
|
|
print $FL $_;
|
|
next;
|
|
}
|
|
$inmap = 1;
|
|
if(/^END_MAP/) {
|
|
printf $FL "BEGIN_MAP 27\n%sEND_MAP\n", $map;
|
|
$inmap = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
sub shrink { # read, write, %sections->{ key = section, value = lines }
|
|
my($fh, $FH, %sec) = @_;
|
|
|
|
seek $fh, 0, 0;
|
|
seek $FH, 0, 0;
|
|
while(<$fh>) {
|
|
if(!/^BEGIN_([A-Z0-9_]+)\s+([0-9]+)/ || !$sec{$1} || $sec{$1} >= $2) {
|
|
print $FH $_;
|
|
next;
|
|
}
|
|
my $sec = $1;
|
|
|
|
# read entire section
|
|
my @l;
|
|
while(<$fh>) {
|
|
last if /^END_/;
|
|
s/^[\s\r\n]+//;
|
|
s/[\s\r\n]+$//;
|
|
push @l, [ split / / ];
|
|
}
|
|
|
|
# sort section
|
|
# DOMAIN, LOGIN, ROBOT, WORMS, EMAILSENDER. EMAILRECEIVER,
|
|
# BROWSER, UNKNOWNREFERER, UNKOWNREFERERBROWSER, SEREFERRALS,
|
|
# PAGEREFS, SEARCHWORDS, KEYWORDS, SIDER_404, SIDER -> 1
|
|
# VISITOR -> 2
|
|
if($sec{$sec} > 10) { # assume sorted if we only want the first ten
|
|
@l = sort { $b->[1] <=> $a->[1] } @l if $sec ne 'VISITOR';
|
|
@l = sort { $b->[2] <=> $a->[2] } @l if $sec eq 'VISITOR';
|
|
}
|
|
|
|
# write section
|
|
printf $FH "BEGIN_%s %d\n", $sec, $sec{$sec};
|
|
print $FH join(' ', @{$l[$_]})."\n"
|
|
for (0..($sec{$sec}-1));
|
|
printf $FH "END_%s\n", $sec;
|
|
}
|
|
1;
|
|
}
|
|
|
|
|
|
|
|
sub run {
|
|
print "Usage: $@ [options] filename\n" and exit if !@ARGV;
|
|
|
|
my $file = pop;
|
|
my $ci = 0;
|
|
my $st = 0;
|
|
|
|
my %options;
|
|
while(local $_ = shift) {
|
|
/^-c$/ ? $ci++ :
|
|
/^-s$/ ? $st++ :
|
|
/^-([A-Za-z0-9_]+)$/ && $shrinkable{ uc($1) } ? $options{ uc($1) } = shift :
|
|
printf("Unknown option: %s\n", $_) && exit;
|
|
}
|
|
(!$options{$_} || $options{$_} !~ /^[0-9]+$/) && printf("Invalid value for -%s\n", $_) && exit
|
|
for keys %options;
|
|
|
|
print "Nothing to do...\n" and exit if !keys %options && !$st;
|
|
|
|
open(my $fl, '<', $file) || die $!;
|
|
my $tmpfile;
|
|
open(my $TMP, '+>', \$tmpfile);
|
|
open(my $FL, '+>', "$file~") || die $! if keys %options;
|
|
|
|
shrink($fl, $TMP, %options) && fixmap($TMP, $FL) if keys %options;
|
|
getstats(keys %options ? $FL : $fl) if $st;
|
|
|
|
close $fl;
|
|
close $TMP;
|
|
close $FL if keys %options;
|
|
|
|
rename "$file~", $file if $ci;
|
|
}
|
|
|
|
|
|
|
|
run @ARGV;
|