[Raw Msg Headers][Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
zmailstats
Hi,
In the contrib directory of the zmailer sources is a zmailstats script
written in perl. As some of you may have noticed it produces wildly
inaccurate results, at least with newer zmailer versions ( 2.99.50s8 ).
Because this script could give some relevant information i've hacked it
into something that works. My version is appended below.
Comments appreciated,
Wieger
--------------------------------------
#! /usr/bin/perl -w
# Copyright (c) 1997 University of Cambridge.
# Written by Philip Hazel for the exim mailer and placed under the GPL
# (ph10@cus.cam.ac.uk, P.Hazel@ucs.cam.ac.uk).
# Revised by Christoph Lameter for use with zmailer (clameter@waterf.org).
# Numerous bugfixes by Wieger Opmeer to make it actually work with zmailer.
# (wieger@snt.utwente.nl)
# Perl script to generate statistics from one or more Zmailer log files.
# Usage: zmailstats [<options>] <log file> <log file> ...
use integer;
##################################################
# Static data #
##################################################
@tab62 =
(0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9
0,10,11,12,13,14,15,16,17,18,19,20, # A-K
21,22,23,24,25,26,27,28,29,30,31,32, # L-W
33,34,35, 0, 0, 0, 0, 0, # X-Z
0,36,37,38,39,40,41,42,43,44,45,46, # a-k
47,48,49,50,51,52,53,54,55,56,57,58, # l-w
59,60,61); # x-z
@days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
@queue_times = (60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60,
12*60*60, 24*60*60);
%month_to_num = (
Jan => "01",
Feb => "02",
Mar => "03",
Apr => "04",
May => "05",
Jun => "06",
Jul => "07",
Aug => "08",
Sep => "09",
Oct => "10",
Nov => "11",
Dec => "12",
);
##################################################
# Subroutines #
##################################################
sub print_volume_rounded {
my($x) = pop @_;
if ($x < 10000)
{
printf("%6d", $x);
}
elsif ($x < 10000000)
{
printf("%4dKB", ($x + 512)/1024);
}
else
{
printf("%4dMB", ($x + 512*1024)/(1024*1024));
}
}
sub format_time {
my($t) = pop @_;
my($s) = $t % 60;
$t /= 60;
my($m) = $t % 60;
$t /= 60;
my($h) = $t % 24;
$t /= 24;
my($d) = $t % 7;
my($w) = $t/7;
my($p) = "";
$p .= "$w"."w" if $w > 0;
$p .= "$d"."d" if $d > 0;
$p .= "$h"."h" if $h > 0;
$p .= "$m"."m" if $m > 0;
$p .= "$s"."s" if $s > 0 || $p eq "";
$p;
}
# Given a log date/time, compute seconds since jan 1, 1970
sub seconds {
my($y,$mo,$d,$h,$mi,$s) = (pop @_) =~
/(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/;
my($leap) = ($y % 4)? 1 : 0;
$d += $days_per_month[$mo-1];
$d++ if ($leap && $mo > 2);
$y -= 1970;
$d += $y * 365 + ($y+1)/4;
$s += 60 * ($mi + 60 * ($h + $d * 24));
$s;
}
# Given a message id, compute seconds
sub id_seconds {
my($id) = substr((pop @_), 0, 6);
my($s) = 0;
my(@c) = split(//, $id);
while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] }
$s;
}
# Print time on queue information
sub print_queue_times {
no integer;
my($string,$array) = @_;
$printed_one = 0;
$cumulative_percent = 0;
$queue_unknown += keys %arrival_time;
$queue_total = $queue_more_than;
for ($i = 0; $i <= $#queue_times; $i++) { $queue_total += $$array[$i] }
my($temp) = "Time spent on the queue: $string";
printf ("%s\n%s\n\n", $temp, "-" x length($temp));
for ($i = 0; $i <= $#queue_times; $i++)
{
if ($$array[$i] > 0)
{
$percent = ($$array[$i] * 100)/$queue_total;
$cumulative_percent += $percent;
printf("%s %4s %6d %5.1f%% %5.1f%%\n",
$printed_one? " " : "Under",
&format_time($queue_times[$i]),
$$array[$i], $percent, $cumulative_percent);
$printed_one = 1;
}
}
if ($queue_more_than > 0)
{
$percent = ($queue_more_than * 100)/$queue_total;
$cumulative_percent += $percent;
printf("Over %4s %6d %5.1f%% %5.1f%%\n",
&format_time($queue_times[$#queue_times]),
$queue_more_than, $percent, $cumulative_percent);
}
#printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0;
print "\n";
}
# Print histogram
sub print_histogram {
my($text) = shift;
my(@interval_count) = @_;
my($maxd) = 0;
for ($i = 0; $i < $hist_number; $i++)
{ $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; }
$scale = int(($maxd + 25)/50);
$scale = 1 if $scale == 0;
if ($text eq "Deliveries")
{
$type = ($scale == 1)? "delivery" : "deliveries";
}
else
{
$type = ($scale == 1)? "message" : "messages";
}
my($temp) = sprintf("$text per %s (each dot is $scale $type)",
($hist_interval == 60)? "hour" :
($hist_interval == 1)? "minute" : "$hist_interval minutes");
printf("%s\n%s\n\n", $temp, "-" x length($temp));
$hour = 0;
$minutes = 0;
for ($i = 0; $i < $hist_number; $i++)
{
$c = $interval_count[$i];
# If the interval is an hour (the maximum) print the starting and
# ending hours as a label. Otherwise print the starting hour and
# minutes, which take up the same space.
if ($hist_opt == 1)
{
printf("%02d-%02d", $hour, $hour + 1);
$hour++;
}
else
{
if ($minutes == 0)
{ printf("%02d:%02d", $hour, $minutes) }
else
{ printf(" :%02d", $minutes) }
$minutes += $hist_interval;
if ($minutes >= 60)
{
$minutes = 0;
$hour++;
}
}
printf(" %6d %s\n", $c, "." x ($c/$scale));
}
print "\n";
}
# Print league table
sub print_league_table {
my($text) = $_[0];
my($m_count) = $_[1];
my($m_data) = $_[2];
my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s";
my($temp) = "Top $name by message count";
printf ("%s\n%s\n\n", $temp, "-" x length($temp));
$count = 1;
foreach $key (sort
{
$$m_count{$b} <=> $$m_count{$a} ||
$$m_data{$b} <=> $$m_data{$a} ||
$a cmp $b
}
keys %{$m_count})
{
printf("%5d %8d %s\n", $$m_count{$key}, $$m_data{$key}, $key);
last if $count++ >= $topcount;
}
$temp = "Top $name by volume";
printf ("\n%s\n%s\n\n", $temp, "-" x length($temp));
$count = 1;
foreach $key (sort
{
$$m_data{$b} <=> $$m_data{$a} ||
$$m_count{$b} <=> $$m_count{$a} ||
$a cmp $b
}
keys %{$m_count})
{
printf("%5d %8d %s\n", $$m_count{$key}, $$m_data{$key}, $key);
last if $count++ >= $topcount;
}
print "\n";
}
##################################################
# Main Program #
##################################################
$delayed_count = 0;
$relayed_unshown = 0;
$show_errors = 0;
$show_relay = 0;
$show_transport = 1;
$queue_more_than = 0;
$queue_unknown = 0;
$topcount = 50;
$local_league_table = 1;
$hist_opt = 1;
$begin = "9999-99-99 99:99:99";
$end = "0000-00-00 00:00:00";
# Decode options
if ($#ARGV > 0)
{
while (substr($ARGV[0], 0, 1) eq '-')
{
if ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 }
elsif ($ARGV[0] =~ /^\-ne$/) { $show_errors = 0 }
elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/)
{
if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 }
}
elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/)
{
@queue_times = split(/,/, $1);
foreach $q (@queue_times) { $q = eval($q) + 0 }
@queue_times = sort { $a <=> $b } @queue_times;
@queue_times = () if ($#queue_times == 0 && $queue_times[0] == 0);
}
elsif ($ARGV[0] =~ /^\-nt$/) { $show_transport = 0 }
elsif ($ARGV[0] =~ /^\-t(\d+)$/) { $topcount = $1 }
elsif ($ARGV[0] =~ /^\-tnl$/) { $local_league_table = 0 }
else
{
print "zmailstats: Unknown or malformed option $ARGV[0]\n";
print " Valid options are:\n";
print " -h<number> histogram divisions per hour\n";
print " default is 1, 0 suppresses histogram\n";
print " -ne don't display error information\n";
print " -nr don't display relaying information\n";
print " -nr/pattern/ don't display relaying information that matches\n";
print " -nt don't display transport information\n";
print " -q<list> list of times for queuing information\n";
print " single 0 item suppresses\n";
print " -t<number> display top <number> sources/destinations\n";
print " default is 50, 0 suppresses top listing\n";
print " -tnl omit local sources/destinations in top listing\n";
exit 1;
}
shift;
}
}
# Initialize slots for queue times
for ($i = 0; $i <= $#queue_times; $i++)
{
$queue_bin[$i] = 0;
$remote_queue_bin[$i] = 0;
}
# Compute the number of slots for the histogram
if ($hist_opt > 0) {
if ($hist_opt > 60 || 60 % $hist_opt != 0) {
print "zmailstats: -h must specify a factor of 60\n";
exit 1;
}
$hist_interval = 60/$hist_opt;
$hist_number = (24*60)/$hist_interval;
@received_interval_count = (0) x $hist_number;
@delivered_interval_count = (0) x $hist_number;
}
# *********************************************************
# Scan the input files and collect the data
$|=1;
$year = ((localtime)[5]) + 1900;
$received_data_total = 0;
$received_count_total = 0;
$delivered_data_total = 0;
$delivered_count_total = 0;
%size = ();
%from_host = ();
%received_count_user = ();
%received_data_user = ();
%received_count = ();
%received_data = ();
%arrival_time = ();
%delayed = ();
%had_error = ();
%departure_time = ();
%arrival_time = ();
%remote_delivered = ();
%delivered_count_user = ();
%delivered_data_user = ();
%delivered_count = ();
%delivered_data = ();
%transported_data = ();
%transported_count = ();
while (<>) {
next unless /(from|to)=<(.*)>,/o;
$flag = $1 if defined $1;
($x_month,$x_day,$x_time,$id,$_) =
/^(\w{3})\s+(\d+)\s+(\d\d:\d\d:\d\d)\s\S+\s\w+\[\d+\]:\sS\.(\S+):\s(.*)/o;
# print "$lines $x_month $x_day $x_time $transport $id $flag # $rest\n";
# print "> $_\n";
$x_day = sprintf "%02d", $x_day;
$tod = "$year-$month_to_num{$x_month}-$x_day $x_time";
$begin = $tod if $tod lt $begin;
$end = $tod if $tod gt $end;
if ($flag eq "from") {
/\ssize=(\d+)/;
$thissize = (defined($1)) ? $1 : 0;
$size{$id} = $thissize;
($user,$host) = /\srrelay=(\S+)\@(\S+),/;
if (!defined $user) {
($host) = /\srrelay=(\S+)/;
}
if (defined $host && $host ne "localhost") {
if ($show_relay) { # Save incoming information
/\srrelay=\S+(\s\[[^]]*\])/;
$ip = (defined($1)) ? $1 : "" ;
$from_host{$id} = "$host$ip";
# $from_address{$id} = $x_address;
}
} else {
$host = "local";
unless ( defined $user) {
($user) = /\srrelay=\S+.+'(\w*)'/;
}
if (defined $user) {
$received_count_user{$user}++;
$received_data_user{$user} = 0 unless defined $received_data_user{$user};
$received_data_user{$user} += $thissize;
}
}
$received_count{$host}++;
$received_data{$host} = 0 unless defined $received_data{$host};
$received_data{$host} += $thissize;
$received_count_total++;
$received_data_total += $thissize;
$arrival_time{$id} = $tod;
if ($hist_opt > 0) {
$tod =~ /^\S+ (\d{2}):(\d{2}):/;
$received_interval_count[($2+$1*60)/$hist_interval]++;
}
} elsif ($flag eq "to") {
# Was this a deferring action?
if ( /stat=defer/ || /stat=retry/) {
unless (defined $delayed{$id}) {
$delayed_count++;
$delayed{$id} = 1;
} else {
$delayed{$id}++;
}
next;
} elsif (! /stat=ok/) {
# Unsuccessful delivery. Note this and continue
$had_error{$id} = 1 if defined ($size{$id});
if ($show_errors) {
($error) = /,\sstat=(.*)/;
if (defined $error) {
if ($error =~ /^error2\s/) {
$error = substr($error,7);
}
$errors_count{$error}++;
}
}
next;
}
$departure_time{$id}= $tod;
$arrival_time{$id}= $begin if !defined $arrival_time{$id};
$size = ( defined $size{$id} ) ? $size{$id} : 0;
($host) = /\srelay=(\S+)/;
if (defined $host) {
$remote_delivered{$id} = 0 unless defined($remote_delivered{$id});
$remote_delivered{$id}++;
# Determine relaying address if either only one address listed,
# or two the same. If they are different, it implies a forwarding
# or aliasing, which is not relaying. Note that for multi-aliased
# addresses, there may be a further address between the first
# and last.
if ($show_relay && defined $from_host{$id}) {
($ip) = /\srelay=\S+(\s\[[^]]*\])/;
$ip = "" unless defined $ip;
$key = "H=\L$from_host{$id}\E => H=\L$host\E$ip";
unless (defined $relay_pattern || $key !~ /$relay_pattern/o) {
$relayed{$key} = 0 if !defined $relayed{$key};
$relayed{$key}++;
} else {
$relayed_unshown++
}
}
} else {
$host = "local";
/to=<(\S+)\@\S+>,/;
$user = ( defined $1 ) ? $1 : "";
$delivered_count_user{$user}++;
$delivered_data_user{$user} = 0 unless defined $delivered_data_user{$user};
$delivered_data_user{$user} += $size;
}
$delivered_count{$host}++;
$delivered_data{$host} = 0 unless defined $delivered_data{$host};
$delivered_data{$host} += $size;
$delivered_count_total++;
$delivered_data_total += $size;
if ($show_transport) {
/\smailer=(\w+),/;
$transport = ( defined $1 ) ? $1 : "";
$transported_data{$transport} = 0 unless defined $transported_data{$transport};
$transported_data{$transport} += $size;
$transported_count{$transport}++;
}
if ($hist_opt > 0) {
$tod =~ /^\S+ (\d{2}):(\d{2}):/;
$delivered_interval_count[($2+$1*60)/$hist_interval]++;
}
}
}
# End of log reading loop
# *********************************************************
foreach (keys %arrival_time)
{
$id = $_;
$deptime=$departure_time{$id};
if (!defined $deptime) { $deptime=$end; }
if ($#queue_times >=0)
{
$queued = &seconds($deptime) - &seconds($arrival_time{$id});
for ($i = 0; $i <= $#queue_times; $i++)
{
if ($queued < $queue_times[$i])
{
$queue_bin[$i]++;
$remote_queue_bin[$i]++ if $remote_delivered{$id};
last;
}
}
$queue_more_than++ if $i > $#queue_times;
}
# if ($show_relay)
# {
# delete($from_host{$id});
# delete($from_address{$id});
# }
}
if ($begin eq "9999-99-99 99:99:99")
{
print "**** No valid log lines read\n";
exit 1;
}
print "\nZmailer statistics from $begin to $end\n";
# Print grand totals
print "\nGrand total summary";
print "\n-------------------";
print "\n At least one address";
print "\n TOTAL Volume Messages Hosts Delayed Failed";
print "\n Received ";
&print_volume_rounded($received_data_total);
{
no integer;
$failed_count = keys %had_error;
printf(" %6d %4d %6d %4.1f%% %6d %4.1f%%",
$received_count_total,
scalar(keys %received_data),
$delayed_count,
($received_count_total == 0)?
0 : ($delayed_count*100)/$received_count_total,
$failed_count,
($received_count_total == 0)?
0 : ($failed_count*100)/$received_count_total);
}
print "\n Delivered ";
&print_volume_rounded($delivered_data_total);
printf(" %6d %4d\n", $delivered_count_total,
scalar(keys %delivered_data));
print "\n";
# Print totals by transport if required
if ($show_transport)
{
print "Deliveries by transport\n";
print "-----------------------";
print "\n Volume Messages";
foreach $key (sort keys %transported_data)
{
printf( "\n %-16s ", $key);
&print_volume_rounded($transported_data{$key});
printf( " %6d", $transported_count{$key});
}
print "\n\n";
}
# Print the deliveries per interval as a histogram, unless configured not to.
# First find the maximum in one interval and scale accordingly.
if ($hist_opt > 0)
{
&print_histogram("Messages received", @received_interval_count);
&print_histogram("Deliveries", @delivered_interval_count);
}
# Print times on queue if required
if ($#queue_times >= 0)
{
&print_queue_times("all messages", \@queue_bin);
&print_queue_times("messages with at least one remote delivery",
\@remote_queue_bin);
}
# Print relay information if required
if ($show_relay)
{
if (scalar(keys %relayed) > 0 || $relayed_unshown > 0)
{
$shown = 0;
$spacing = "";
print "Relayed messages\n";
print "----------------\n\n";
foreach $key (sort { $relayed{$b} <=> $relayed{$a} } keys %relayed)
{
$count = $relayed{$key};
$shown += $count;
$key =~ s/[HA]=//g;
($one,$two) = split(/=> /, $key);
# printf("%5d %s\n => %s\n", $count, $one, $two);
printf("%5d %s => %s\n", $count, $one, $two);
$spacing = "\n";
}
print "${spacing}Total: $shown (plus $relayed_unshown unshown)\n";
}
else
{
print "No relayed messages\n";
print "-------------------\n";
}
print "\n";
}
# If the topcount is zero, print no league tables
if ($topcount > 0)
{
&print_league_table("sending host", \%received_count, \%received_data);
&print_league_table("local sender", \%received_count_user,
\%received_data_user) if $local_league_table;
&print_league_table("destination", \%delivered_count, \%delivered_data);
&print_league_table("local destination", \%delivered_count_user,
\%delivered_data_user) if $local_league_table;
}
# Omit error statistics if configured out
if ($show_errors)
{
$total_errors = 0;
if (scalar(keys %errors_count) != 0)
{
print "List of errors\n";
print "--------------\n\n";
foreach $key (sort keys %errors_count)
{
chop($text = $key);
printf("%5d ", $errors_count{$key});
$total_errors += $errors_count{$key};
while (length($text) > 65)
{
($first,$rest) = $text =~ /(.{55}\S*)\s+(.+)/;
if (!$first)
{
printf("%s\n", $text);
last;
}
printf("%s\n ", $first);
$text = $rest;
}
printf("%s\n\n", $text);
}
}
$temp = "Errors encountered: $total_errors";
printf("%s\n%s\n\n", $temp, "-" x length($temp));
}
# End of zmailstats