yhdev/pub/download/code/awshrink
Yorhel 6242b2ee9c Rewrite to static site
With a complete reorganisation of the directory structure and most of
the content converted to pandoc-flavoured markdown.

Some TODO's left before this can go live:
- Main page
- Atom feeds
- Bug tracker
2019-03-23 11:56:53 +01:00

166 lines
3.5 KiB
Perl

#!/usr/bin/perl
# awshrink v0.1
# 2007-01-15
# Yoran Heling
# License: MIT
use strict;
use warnings;
use bytes;
# hash for easy lookup
my %shrinkable = ( map { $_ => 1 } qw|
DOMAIN LOGIN ROBOT WORMS EMAILSENDER EMAILRECEIVER
BROWSER UNKNOWNREFERER UNKOWNREFERERBROWSER SEREFERRALS
PAGEREFS SEARCHWORDS KEYWORDS SIDER_404 SIDER VISITOR
UNKNOWNREFERERBROWSER
|);
sub getstats { # read
my $fl = shift;
my @l;
seek($fl, 0, 0);
# get positions
while(<$fl>) {
last if /^END_MAP/;
next if !/^POS_([A-Z0-9_]+)\s+([0-9]+)\s+$/;
push(@l, [ $1, $2, 0, 0 ]);
}
# get sizes (bytes)
@l = sort { $a->[1] <=> $b->[1] } @l;
$l[$_][2] = ($l[$_+1] ? $l[$_+1][1] : -s $fl) - $l[$_][1]
for (0..$#l);
# get lines
for (0..$#l) {
seek $fl, $l[$_][1], 0;
<$fl> =~ /BEGIN_[A-Z0-9_]+\s+([0-9]+)/;
$l[$_][3] = $1;
}
print " Section Size (Bytes) Lines\n";
printf "%22s %12d %7d\n", $_->[0].($shrinkable{$_->[0]}?' ':'*'), $_->[2], $_->[3]
for (sort { $a->[2] <=> $b->[2] } @l);
print "* = not shrinkable\n";
}
sub fixmap { # read, write
my($fl, $FL) = @_;
my $map = '';
my @l;
seek $fl, 0, 0;
while(<$fl>) {
next if !/^BEGIN_([A-Z0-9_]+)/ || $1 eq 'MAP';
$map .= sprintf "POS_%s %-20d\n", $1, tell($fl)-length($_);
}
seek $fl, 0, 0;
seek $FL, 0, 0;
my $inmap = 0;
while(<$fl>) {
if(!$inmap && !/^BEGIN_MAP/) {
print $FL $_;
next;
}
$inmap = 1;
if(/^END_MAP/) {
printf $FL "BEGIN_MAP 27\n%sEND_MAP\n", $map;
$inmap = 0;
}
}
}
sub shrink { # read, write, %sections->{ key = section, value = lines }
my($fh, $FH, %sec) = @_;
seek $fh, 0, 0;
seek $FH, 0, 0;
while(<$fh>) {
if(!/^BEGIN_([A-Z0-9_]+)\s+([0-9]+)/ || !$sec{$1} || $sec{$1} >= $2) {
print $FH $_;
next;
}
my $sec = $1;
# read entire section
my @l;
while(<$fh>) {
last if /^END_/;
s/^[\s\r\n]+//;
s/[\s\r\n]+$//;
push @l, [ split / / ];
}
# sort section
# DOMAIN, LOGIN, ROBOT, WORMS, EMAILSENDER. EMAILRECEIVER,
# BROWSER, UNKNOWNREFERER, UNKOWNREFERERBROWSER, SEREFERRALS,
# PAGEREFS, SEARCHWORDS, KEYWORDS, SIDER_404, SIDER -> 1
# VISITOR -> 2
if($sec{$sec} > 10) { # assume sorted if we only want the first ten
@l = sort { $b->[1] <=> $a->[1] } @l if $sec ne 'VISITOR';
@l = sort { $b->[2] <=> $a->[2] } @l if $sec eq 'VISITOR';
}
# write section
printf $FH "BEGIN_%s %d\n", $sec, $sec{$sec};
print $FH join(' ', @{$l[$_]})."\n"
for (0..($sec{$sec}-1));
printf $FH "END_%s\n", $sec;
}
1;
}
sub run {
print "Usage: $@ [options] filename\n" and exit if !@ARGV;
my $file = pop;
my $ci = 0;
my $st = 0;
my %options;
while(local $_ = shift) {
/^-c$/ ? $ci++ :
/^-s$/ ? $st++ :
/^-([A-Za-z0-9_]+)$/ && $shrinkable{ uc($1) } ? $options{ uc($1) } = shift :
printf("Unknown option: %s\n", $_) && exit;
}
(!$options{$_} || $options{$_} !~ /^[0-9]+$/) && printf("Invalid value for -%s\n", $_) && exit
for keys %options;
print "Nothing to do...\n" and exit if !keys %options && !$st;
open(my $fl, '<', $file) || die $!;
my $tmpfile;
open(my $TMP, '+>', \$tmpfile);
open(my $FL, '+>', "$file~") || die $! if keys %options;
shrink($fl, $TMP, %options) && fixmap($TMP, $FL) if keys %options;
getstats(keys %options ? $FL : $fl) if $st;
close $fl;
close $TMP;
close $FL if keys %options;
rename "$file~", $file if $ci;
}
run @ARGV;