[Raw Msg Headers][Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

zmailstats




Hi,

In the contrib directory of the zmailer sources is a zmailstats script
written in perl. As some of you may have noticed it produces wildly
inaccurate results, at least with newer zmailer versions ( 2.99.50s8 ).

Because this script could give some relevant information i've hacked it
into something that works. My version is appended below.

Comments appreciated,

Wieger

--------------------------------------

#! /usr/bin/perl -w

# Copyright (c) 1997 University of Cambridge.
# Written by Philip Hazel for the exim mailer and placed under the GPL
# (ph10@cus.cam.ac.uk, P.Hazel@ucs.cam.ac.uk).

# Revised by Christoph Lameter for use with zmailer (clameter@waterf.org).

# Numerous bugfixes by Wieger Opmeer to make it actually work with zmailer.
# (wieger@snt.utwente.nl)

# Perl script to generate statistics from one or more Zmailer log files.

# Usage: zmailstats [<options>] <log file> <log file> ...

use integer;

##################################################
#             Static data                        #
##################################################

@tab62 =
  (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0,     # 0-9
   0,10,11,12,13,14,15,16,17,18,19,20,  # A-K
  21,22,23,24,25,26,27,28,29,30,31,32,  # L-W
  33,34,35, 0, 0, 0, 0, 0,              # X-Z
   0,36,37,38,39,40,41,42,43,44,45,46,  # a-k
  47,48,49,50,51,52,53,54,55,56,57,58,  # l-w
  59,60,61);                            # x-z

@days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);

@queue_times = (60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60,
                12*60*60, 24*60*60);

%month_to_num = (
	Jan => "01",
        Feb => "02",
	Mar => "03",
	Apr => "04",
	May => "05",
	Jun => "06",
	Jul => "07",
	Aug => "08",
	Sep => "09",
	Oct => "10",
	Nov => "11",
	Dec => "12",
);

##################################################
#                   Subroutines                  #
##################################################

sub print_volume_rounded {
my($x) = pop @_;
if ($x < 10000)
  {
  printf("%6d", $x);
  }
elsif ($x < 10000000)
  {
  printf("%4dKB", ($x + 512)/1024);
  }
else
  {
  printf("%4dMB", ($x + 512*1024)/(1024*1024));
  }
}


sub format_time {
my($t) = pop @_;
my($s) = $t % 60;
$t /= 60;
my($m) = $t % 60;
$t /= 60;
my($h) = $t % 24;
$t /= 24;
my($d) = $t % 7;
my($w) = $t/7;
my($p) = "";
$p .= "$w"."w" if $w > 0;
$p .= "$d"."d" if $d > 0;
$p .= "$h"."h" if $h > 0;
$p .= "$m"."m" if $m > 0;
$p .= "$s"."s" if $s > 0 || $p eq "";
$p;
}


# Given a log date/time, compute seconds since jan 1, 1970

sub seconds {
my($y,$mo,$d,$h,$mi,$s) = (pop @_) =~
  /(\d\d\d\d)-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)/;
my($leap) = ($y % 4)? 1 : 0;
$d += $days_per_month[$mo-1];
$d++ if ($leap && $mo > 2);
$y -= 1970;
$d += $y * 365 + ($y+1)/4;
$s += 60 * ($mi + 60 * ($h + $d * 24));
$s;
}


# Given a message id, compute seconds

sub id_seconds {
my($id) = substr((pop @_), 0, 6);
my($s) = 0;
my(@c) = split(//, $id);
while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] }
$s;
}


# Print time on queue information

sub print_queue_times {
no integer;
my($string,$array) = @_;

$printed_one = 0;
$cumulative_percent = 0;
$queue_unknown += keys %arrival_time;

$queue_total = $queue_more_than;
for ($i = 0; $i <= $#queue_times; $i++) { $queue_total += $$array[$i] }

my($temp) = "Time spent on the queue: $string";
printf ("%s\n%s\n\n", $temp, "-" x length($temp));

for ($i = 0; $i <= $#queue_times; $i++)
  {
  if ($$array[$i] > 0)
    {
    $percent = ($$array[$i] * 100)/$queue_total;
    $cumulative_percent += $percent;
    printf("%s %4s   %6d %5.1f%%  %5.1f%%\n",
      $printed_one? "     " : "Under",
      &format_time($queue_times[$i]),
      $$array[$i], $percent, $cumulative_percent);
    $printed_one = 1;
    }
  }

if ($queue_more_than > 0)
  {
  $percent = ($queue_more_than * 100)/$queue_total;
  $cumulative_percent += $percent;
  printf("Over  %4s   %6d %5.1f%%  %5.1f%%\n",
    &format_time($queue_times[$#queue_times]),
    $queue_more_than, $percent, $cumulative_percent);
  }

#printf("Unknown   %6d\n", $queue_unknown) if $queue_unknown > 0;
print "\n";
}


# Print histogram

sub print_histogram {
my($text) = shift;
my(@interval_count) = @_;
my($maxd) = 0;

for ($i = 0; $i < $hist_number; $i++)
  { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; }

$scale = int(($maxd + 25)/50);
$scale = 1 if $scale == 0;

if ($text eq "Deliveries")
  {
  $type = ($scale == 1)? "delivery" : "deliveries";
  }
else
  {
  $type = ($scale == 1)? "message" : "messages";
  }

my($temp) = sprintf("$text per %s (each dot is $scale $type)",
  ($hist_interval == 60)? "hour" :
  ($hist_interval == 1)?  "minute" : "$hist_interval minutes");

printf("%s\n%s\n\n", $temp, "-" x length($temp));

$hour = 0;
$minutes = 0;
for ($i = 0; $i < $hist_number; $i++)
  {
  $c = $interval_count[$i];

  # If the interval is an hour (the maximum) print the starting and
  # ending hours as a label. Otherwise print the starting hour and
  # minutes, which take up the same space.

  if ($hist_opt == 1)
    {
    printf("%02d-%02d", $hour, $hour + 1);
    $hour++;
    }
  else
    {
    if ($minutes == 0)
      { printf("%02d:%02d", $hour, $minutes) }
    else
      { printf("  :%02d", $minutes) }
    $minutes += $hist_interval;
    if ($minutes >= 60)
      {
      $minutes = 0;
      $hour++;
      }
    }

  printf(" %6d %s\n", $c, "." x ($c/$scale));
  }
print "\n";
}



# Print league table

sub print_league_table {
my($text) = $_[0];
my($m_count) = $_[1];
my($m_data) = $_[2];
my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s";
my($temp) = "Top $name by message count";
printf ("%s\n%s\n\n", $temp, "-" x length($temp));

$count = 1;
foreach $key (sort
               {
               $$m_count{$b} <=> $$m_count{$a} ||
               $$m_data{$b}  <=> $$m_data{$a}  ||
               $a cmp $b
               }
             keys %{$m_count})
  {
  printf("%5d %8d   %s\n", $$m_count{$key}, $$m_data{$key}, $key);
  last if $count++ >= $topcount;
  }

$temp = "Top $name by volume";
printf ("\n%s\n%s\n\n", $temp, "-" x length($temp));

$count = 1;
foreach $key (sort
               {
               $$m_data{$b}  <=> $$m_data{$a}  ||
               $$m_count{$b} <=> $$m_count{$a} ||
               $a cmp $b
               }
             keys %{$m_count})
  {
  printf("%5d %8d   %s\n", $$m_count{$key}, $$m_data{$key}, $key);
  last if $count++ >= $topcount;
  }

print "\n";
}




##################################################
#                 Main Program                   #
##################################################

$delayed_count = 0;
$relayed_unshown = 0;
$show_errors = 0;
$show_relay = 0;
$show_transport = 1;
$queue_more_than = 0;
$queue_unknown = 0;
$topcount = 50;
$local_league_table = 1;
$hist_opt = 1;
$begin = "9999-99-99 99:99:99";
$end = "0000-00-00 00:00:00";

# Decode options

if ($#ARGV > 0)
  {
  while (substr($ARGV[0], 0, 1) eq '-')
    {
    if    ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 }
    elsif ($ARGV[0] =~ /^\-ne$/)     { $show_errors = 0 }
    elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/)
      {
      if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 }
      }
    elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/)
      {
      @queue_times = split(/,/, $1);
      foreach $q (@queue_times) { $q = eval($q) + 0 }
      @queue_times = sort { $a <=> $b } @queue_times;
      @queue_times = () if ($#queue_times == 0 && $queue_times[0] == 0);
      }
    elsif ($ARGV[0] =~ /^\-nt$/)     { $show_transport = 0 }
    elsif ($ARGV[0] =~ /^\-t(\d+)$/) { $topcount = $1 }
    elsif ($ARGV[0] =~ /^\-tnl$/) { $local_league_table = 0 }
    else
      {
      print "zmailstats: Unknown or malformed option $ARGV[0]\n";
      print "  Valid options are:\n";
      print "    -h<number>   histogram divisions per hour\n";
      print "                 default is 1, 0 suppresses histogram\n";
      print "    -ne          don't display error information\n";
      print "    -nr          don't display relaying information\n";
      print "    -nr/pattern/ don't display relaying information that matches\n";
      print "    -nt          don't display transport information\n";
      print "    -q<list>     list of times for queuing information\n";
      print "                 single 0 item suppresses\n";
      print "    -t<number>   display top <number> sources/destinations\n";
      print "                 default is 50, 0 suppresses top listing\n";
      print "    -tnl         omit local sources/destinations in top listing\n";
      exit 1;
      }
    shift;
    }
  }

# Initialize slots for queue times

for ($i = 0; $i <= $#queue_times; $i++)
  {
  $queue_bin[$i] = 0;
  $remote_queue_bin[$i] = 0;
  }

# Compute the number of slots for the histogram

if ($hist_opt > 0) {
  if ($hist_opt > 60 || 60 % $hist_opt != 0) {
    print "zmailstats: -h must specify a factor of 60\n";
    exit 1;
  }
  $hist_interval = 60/$hist_opt;
  $hist_number = (24*60)/$hist_interval;
  @received_interval_count = (0) x $hist_number;
  @delivered_interval_count = (0) x $hist_number;
}

# *********************************************************
# Scan the input files and collect the data

$|=1;

$year = ((localtime)[5]) + 1900;
$received_data_total = 0;
$received_count_total = 0;
$delivered_data_total = 0;
$delivered_count_total = 0;

%size = ();
%from_host = ();
%received_count_user = ();
%received_data_user = ();
%received_count = ();
%received_data = ();
%arrival_time = ();
%delayed = ();
%had_error = ();
%departure_time = ();
%arrival_time = ();
%remote_delivered = ();
%delivered_count_user = ();
%delivered_data_user = ();
%delivered_count = ();
%delivered_data = (); 
%transported_data = ();
%transported_count = ();

while (<>) {
	next unless /(from|to)=<(.*)>,/o;
	$flag = $1 if defined $1;
	($x_month,$x_day,$x_time,$id,$_) =
		/^(\w{3})\s+(\d+)\s+(\d\d:\d\d:\d\d)\s\S+\s\w+\[\d+\]:\sS\.(\S+):\s(.*)/o;
#	print "$lines $x_month $x_day $x_time $transport $id $flag # $rest\n";
#	print "> $_\n";

	$x_day = sprintf "%02d", $x_day;
	$tod = "$year-$month_to_num{$x_month}-$x_day $x_time";
	$begin = $tod if $tod lt $begin;
	$end = $tod if $tod gt $end;

	if ($flag eq "from") {
		/\ssize=(\d+)/;
		$thissize = (defined($1)) ? $1 : 0;
		$size{$id} = $thissize;

		($user,$host) = /\srrelay=(\S+)\@(\S+),/;
		if (!defined $user) {
			($host) = /\srrelay=(\S+)/;
		}
		if (defined $host && $host ne "localhost") {
			if ($show_relay) {                  # Save incoming information
 				/\srrelay=\S+(\s\[[^]]*\])/;
				$ip = (defined($1)) ? $1 : "" ;
				$from_host{$id} = "$host$ip";
				# $from_address{$id} = $x_address;
				}
		} else {
			$host = "local";
			unless ( defined $user) {
				($user) = /\srrelay=\S+.+'(\w*)'/;
			}
			if (defined $user) {
				$received_count_user{$user}++;
				$received_data_user{$user} = 0 unless defined $received_data_user{$user};
				$received_data_user{$user} += $thissize;
			}
		 }

		$received_count{$host}++;
		$received_data{$host} = 0 unless defined $received_data{$host};
		$received_data{$host} += $thissize;

		$received_count_total++;
		$received_data_total += $thissize;

		$arrival_time{$id} = $tod;
		if ($hist_opt > 0) {
			$tod =~ /^\S+ (\d{2}):(\d{2}):/;
			$received_interval_count[($2+$1*60)/$hist_interval]++;
		}

	}  elsif ($flag eq "to") {
		# Was this a deferring action?
		if ( /stat=defer/ || /stat=retry/) {
			unless (defined $delayed{$id}) {
				$delayed_count++;
				$delayed{$id} = 1;
			} else {
				$delayed{$id}++;
			}
			next;
		} elsif (! /stat=ok/) {
			# Unsuccessful delivery. Note this and continue
			$had_error{$id} = 1 if defined ($size{$id});
			if ($show_errors) {
				($error) = /,\sstat=(.*)/;
				if (defined $error) { 
					if ($error =~ /^error2\s/) {
					$error = substr($error,7);
					}
					$errors_count{$error}++;
				}
			}
			next;
		}

		$departure_time{$id}= $tod;
		$arrival_time{$id}= $begin if !defined $arrival_time{$id};
		$size = ( defined $size{$id} ) ? $size{$id} : 0;

		($host) = /\srelay=(\S+)/;
		if (defined $host) {
			$remote_delivered{$id} = 0 unless defined($remote_delivered{$id});
			$remote_delivered{$id}++;

			# Determine relaying address if either only one address listed,
			# or two the same. If they are different, it implies a forwarding
			# or aliasing, which is not relaying. Note that for multi-aliased
			# addresses, there may be a further address between the first
			# and last.

			if ($show_relay && defined $from_host{$id}) {
				($ip) = /\srelay=\S+(\s\[[^]]*\])/;
				$ip = "" unless defined $ip;
				$key = "H=\L$from_host{$id}\E => H=\L$host\E$ip";

			        unless (defined $relay_pattern || $key !~ /$relay_pattern/o) {
					$relayed{$key} = 0 if !defined $relayed{$key};
					$relayed{$key}++;
				} else { 
					$relayed_unshown++
				}
			}
		} else {
			$host = "local";
			/to=<(\S+)\@\S+>,/;
			$user = ( defined $1 ) ? $1 : "";

			$delivered_count_user{$user}++;
			$delivered_data_user{$user} = 0 unless defined $delivered_data_user{$user};
			$delivered_data_user{$user} += $size;
		}

		$delivered_count{$host}++;
		$delivered_data{$host} = 0 unless defined $delivered_data{$host};
		$delivered_data{$host} += $size;
		$delivered_count_total++;
		$delivered_data_total += $size;
		if ($show_transport) {
			/\smailer=(\w+),/;
			$transport = ( defined $1 ) ? $1 : "";
			$transported_data{$transport} = 0 unless defined $transported_data{$transport};
			$transported_data{$transport} += $size;
			$transported_count{$transport}++;
		}

		if ($hist_opt > 0) {
			$tod =~ /^\S+ (\d{2}):(\d{2}):/;
			$delivered_interval_count[($2+$1*60)/$hist_interval]++;
		}
	}
}
# End of log reading loop
# *********************************************************


foreach (keys %arrival_time)
{
  $id = $_;
  $deptime=$departure_time{$id};
  if (!defined $deptime) { $deptime=$end; }
  if ($#queue_times >=0)
    {
    $queued = &seconds($deptime) - &seconds($arrival_time{$id});
    for ($i = 0; $i <= $#queue_times; $i++)
      {
      if ($queued < $queue_times[$i])
        {
        $queue_bin[$i]++;
        $remote_queue_bin[$i]++ if $remote_delivered{$id};
        last;
        }
      }
    $queue_more_than++ if $i > $#queue_times;
    }

#  if ($show_relay)
#    {
#    delete($from_host{$id});
#    delete($from_address{$id});
#    }
}

if ($begin eq "9999-99-99 99:99:99")
  {
  print "**** No valid log lines read\n";
  exit 1;
  }

print "\nZmailer statistics from $begin to $end\n";

# Print grand totals

print "\nGrand total summary";
print "\n-------------------";
print "\n                                                       At least one address";
print "\n  TOTAL               Volume    Messages    Hosts      Delayed       Failed";
print "\n  Received            ";
&print_volume_rounded($received_data_total);

  {
  no integer;
  $failed_count = keys %had_error;
  printf("      %6d     %4d  %6d %4.1f%% %6d %4.1f%%",
    $received_count_total,
    scalar(keys %received_data),
    $delayed_count,
    ($received_count_total == 0)?
      0 : ($delayed_count*100)/$received_count_total,
    $failed_count,
    ($received_count_total == 0)?
      0 : ($failed_count*100)/$received_count_total);
  }

print "\n  Delivered           ";
&print_volume_rounded($delivered_data_total);
printf("      %6d     %4d\n", $delivered_count_total,
  scalar(keys %delivered_data));
print "\n";

# Print totals by transport if required

if ($show_transport)
  {
  print "Deliveries by transport\n";
  print "-----------------------";
  print "\n                      Volume    Messages";

  foreach $key (sort keys %transported_data)
    {
    printf( "\n  %-16s    ", $key);
    &print_volume_rounded($transported_data{$key});
    printf( "      %6d", $transported_count{$key});
    }
  print "\n\n";
  }

# Print the deliveries per interval as a histogram, unless configured not to.
# First find the maximum in one interval and scale accordingly.

if ($hist_opt > 0)
  {
  &print_histogram("Messages received", @received_interval_count);
  &print_histogram("Deliveries", @delivered_interval_count);
  }

# Print times on queue if required

if ($#queue_times >= 0)
  {
  &print_queue_times("all messages", \@queue_bin);
  &print_queue_times("messages with at least one remote delivery",
    \@remote_queue_bin);
  }

# Print relay information if required

if ($show_relay)
  {
  if (scalar(keys %relayed) > 0 || $relayed_unshown > 0)
    {
    $shown = 0;
    $spacing = "";
    print "Relayed messages\n";
    print "----------------\n\n";

    foreach $key (sort { $relayed{$b} <=> $relayed{$a} } keys %relayed)
      {
      $count = $relayed{$key};
      $shown += $count;
      $key =~ s/[HA]=//g;
      ($one,$two) = split(/=> /, $key);
#      printf("%5d %s\n      => %s\n", $count, $one, $two);
      printf("%5d %s => %s\n", $count, $one, $two);
      $spacing = "\n";
      }
    print "${spacing}Total: $shown (plus $relayed_unshown unshown)\n";
    }
  else
    {
    print "No relayed messages\n";
    print "-------------------\n";
    }
  print "\n";
  }

# If the topcount is zero, print no league tables

if ($topcount > 0)
  {
  &print_league_table("sending host", \%received_count, \%received_data);
  &print_league_table("local sender", \%received_count_user,
    \%received_data_user) if $local_league_table;
  &print_league_table("destination", \%delivered_count, \%delivered_data);
  &print_league_table("local destination", \%delivered_count_user,
    \%delivered_data_user) if $local_league_table;
  }

# Omit error statistics if configured out

if ($show_errors)
  {
  $total_errors = 0;

  if (scalar(keys %errors_count) != 0)
    {
    print "List of errors\n";
    print "--------------\n\n";

    foreach $key (sort keys %errors_count)
      {
      chop($text = $key);
      printf("%5d ", $errors_count{$key});
      $total_errors += $errors_count{$key};
      while (length($text) > 65)
        {
        ($first,$rest) = $text =~ /(.{55}\S*)\s+(.+)/;
        if (!$first)
          {
          printf("%s\n", $text);
          last;
          }
        printf("%s\n      ", $first);
        $text = $rest;
        }
      printf("%s\n\n", $text);
      }
    }

  $temp = "Errors encountered: $total_errors";
  printf("%s\n%s\n\n", $temp, "-" x length($temp));
  }

# End of zmailstats