#!/usr/bin/perl # awshrink v0.1 # 2007-01-15 # Yoran Heling # License: MIT use strict; use warnings; use bytes; # hash for easy lookup my %shrinkable = ( map { $_ => 1 } qw| DOMAIN LOGIN ROBOT WORMS EMAILSENDER EMAILRECEIVER BROWSER UNKNOWNREFERER UNKOWNREFERERBROWSER SEREFERRALS PAGEREFS SEARCHWORDS KEYWORDS SIDER_404 SIDER VISITOR UNKNOWNREFERERBROWSER |); sub getstats { # read my $fl = shift; my @l; seek($fl, 0, 0); # get positions while(<$fl>) { last if /^END_MAP/; next if !/^POS_([A-Z0-9_]+)\s+([0-9]+)\s+$/; push(@l, [ $1, $2, 0, 0 ]); } # get sizes (bytes) @l = sort { $a->[1] <=> $b->[1] } @l; $l[$_][2] = ($l[$_+1] ? $l[$_+1][1] : -s $fl) - $l[$_][1] for (0..$#l); # get lines for (0..$#l) { seek $fl, $l[$_][1], 0; <$fl> =~ /BEGIN_[A-Z0-9_]+\s+([0-9]+)/; $l[$_][3] = $1; } print " Section Size (Bytes) Lines\n"; printf "%22s %12d %7d\n", $_->[0].($shrinkable{$_->[0]}?' ':'*'), $_->[2], $_->[3] for (sort { $a->[2] <=> $b->[2] } @l); print "* = not shrinkable\n"; } sub fixmap { # read, write my($fl, $FL) = @_; my $map = ''; my @l; seek $fl, 0, 0; while(<$fl>) { next if !/^BEGIN_([A-Z0-9_]+)/ || $1 eq 'MAP'; $map .= sprintf "POS_%s %-20d\n", $1, tell($fl)-length($_); } seek $fl, 0, 0; seek $FL, 0, 0; my $inmap = 0; while(<$fl>) { if(!$inmap && !/^BEGIN_MAP/) { print $FL $_; next; } $inmap = 1; if(/^END_MAP/) { printf $FL "BEGIN_MAP 27\n%sEND_MAP\n", $map; $inmap = 0; } } } sub shrink { # read, write, %sections->{ key = section, value = lines } my($fh, $FH, %sec) = @_; seek $fh, 0, 0; seek $FH, 0, 0; while(<$fh>) { if(!/^BEGIN_([A-Z0-9_]+)\s+([0-9]+)/ || !$sec{$1} || $sec{$1} >= $2) { print $FH $_; next; } my $sec = $1; # read entire section my @l; while(<$fh>) { last if /^END_/; s/^[\s\r\n]+//; s/[\s\r\n]+$//; push @l, [ split / / ]; } # sort section # DOMAIN, LOGIN, ROBOT, WORMS, EMAILSENDER. EMAILRECEIVER, # BROWSER, UNKNOWNREFERER, UNKOWNREFERERBROWSER, SEREFERRALS, # PAGEREFS, SEARCHWORDS, KEYWORDS, SIDER_404, SIDER -> 1 # VISITOR -> 2 if($sec{$sec} > 10) { # assume sorted if we only want the first ten @l = sort { $b->[1] <=> $a->[1] } @l if $sec ne 'VISITOR'; @l = sort { $b->[2] <=> $a->[2] } @l if $sec eq 'VISITOR'; } # write section printf $FH "BEGIN_%s %d\n", $sec, $sec{$sec}; print $FH join(' ', @{$l[$_]})."\n" for (0..($sec{$sec}-1)); printf $FH "END_%s\n", $sec; } 1; } sub run { print "Usage: $@ [options] filename\n" and exit if !@ARGV; my $file = pop; my $ci = 0; my $st = 0; my %options; while(local $_ = shift) { /^-c$/ ? $ci++ : /^-s$/ ? $st++ : /^-([A-Za-z0-9_]+)$/ && $shrinkable{ uc($1) } ? $options{ uc($1) } = shift : printf("Unknown option: %s\n", $_) && exit; } (!$options{$_} || $options{$_} !~ /^[0-9]+$/) && printf("Invalid value for -%s\n", $_) && exit for keys %options; print "Nothing to do...\n" and exit if !keys %options && !$st; open(my $fl, '<', $file) || die $!; my $tmpfile; open(my $TMP, '+>', \$tmpfile); open(my $FL, '+>', "$file~") || die $! if keys %options; shrink($fl, $TMP, %options) && fixmap($TMP, $FL) if keys %options; getstats(keys %options ? $FL : $fl) if $st; close $fl; close $TMP; close $FL if keys %options; rename "$file~", $file if $ci; } run @ARGV;