#!/usr/local/bin/perl

#-----------------------------------------------------------------------
# Tidy Version 1.0
#
# Tidy up logfile(s) created by syslogd
#
# See manual pages tidy(8) and tidy.conf(5) for details
#
# Author: Marek Rouchal, University of Bayreuth, Germany
#         (marek@saftsack.fs.uni-bayreuth.de)
#-----------------------------------------------------------------------

#----------------------
# Configuration
#----------------------

require 5.0; # tidy needs at least perl5

# This is where tidy looks for its configuration.
# This may be overridden by the -C option
# All other configurable options are found there
$CONFIGFILE = "tidy.conf";

#----------------------
# parse command line
#----------------------

# hot pipe output, flush after each print
$| = 1;

$NOTIDY = 0;
$VERBOSE = 1;

#--------------------
# look for switches
#--------------------
while ( $ARGV[0] ) {
    $_ = shift( @ARGV );
	#------------------
	# the '-' switches
	#------------------
	if ( /^-/ ) {
		@_ = split ( //, $_ );
		shift ( @_ );
		foreach ( @_ ) {
			if ( "$_" eq "a" )
				{ $OUTPUT = "ascii"; }
			elsif ( "$_" eq "B" ) {
				$_ = shift( @ARGV );
				($DBFILE) = /^(.+)$/ unless ( /^(-|\+)/ );
				die "tidy: error: No database file specified\n" unless $DBFILE;
				}
			elsif ( "$_" eq "b" )
				{ $ORIENTATION = "below"; }
			elsif ( "$_" eq "C" ) {
				$_ = shift( @ARGV );
				($CONFIGFILE) = /^(.+)$/ unless ( /^(-|\+)/ );
				die "tidy: error: No configuration file specified\n"
					unless $CONFIGFILE;
				}
			elsif ( "$_" eq "c" )
				{ $CASE_SENSITIVE = "on"; }
			elsif ( "$_" eq "D" )
				{ $FORM_DATE = "long"; }
			elsif ( "$_" eq "d" )
				{ $FORM_DATE = "short"; }
			elsif ( "$_" eq "H" )
				{ $OUTPUT = "html"; }
			elsif ( "$_" eq "h" )
				{ &help_screen; exit 0; }
			elsif ( "$_" eq "L" ) {
				$_ = shift( @ARGV );
				($LOGFILE) = /^(.+)$/ unless ( /^(-|\+)/ );
				die "tidy: error: No log file specified\n" unless $LOGFILE;
				}
			elsif ( "$_" eq "O" )
				{ $NOTIDY = 1; }
			elsif ( "$_" eq "o" ) {
				$_ = shift( @ARGV );
				($OUTFILE) = /^(.+)$/ unless ( /^(-|\+)/ );
				die "tidy: error: No output file specified\n" unless $OUTFILE;
				}
			elsif ( "$_" eq "p" ) {
				$_ = shift( @ARGV );
				($PAGER_LINES) = /^(\d+)$/ unless ( /^(-|\+)/ );
				die "tidy: error: No number for pager_lines specified\n"
					unless $PAGER_LINES;
				}
			elsif ( "$_" eq "q" )
				{ $VERBOSE = 0; }
			elsif ( "$_" eq "r" )
				{ $RULER = "on"; }
			elsif ( "$_" eq "s" ) {
				$_ = shift( @ARGV );
				($SORTKEY) = /^(.+)$/ unless ( /^(-|\+)/ );
				die "tidy: error: No sortkey specified\n" unless $SORTKEY;
				}
			elsif ( "$_" eq "T" )
				{ $FORM_TIME = "long"; }
			elsif ( "$_" eq "t" )
				{ $FORM_TIME = "short"; }
			elsif ( "$_" eq "v" )
				{ &version_info; exit 0; }
			else
				{ die "tidy: error: Illegal option \"-$_\" specified\n"; }
			}
		}
	#-------------------
	# the '+' switches
	#-------------------
	elsif ( /^\+/ ) {
		@_ = split ( //, $_ );
		shift ( @_ );
		foreach ( @_ ) {
			if ( "$_" eq "b" )
				{ $ORIENTATION = "besides"; }
			elsif ( "$_" eq "c" )
				{ $CASE_SENSITIVE = "off"; }
			elsif ( "$_" eq "d" )
				{ $FORM_DATE = "none"; }
			elsif ( "$_" eq "O" )
				{ $OUTPUT = "none"; }
			elsif ( "$_" eq "p" )
				{ $PAGER_LINES = "0"; }
			elsif ( "$_" eq "r" )
				{ $RULER = "off"; }
			elsif ( "$_" eq "t" )
				{ $FORM_TIME = "none"; }
			else
				{ die "tidy: error: Illegal option \"+$_\" specified\n"; }
			}
		}
	else
		{ die "tidy: error: Runaway argument \"$_\" in command line\n"; }
	}

&version_info if $VERBOSE;

#-----------------------------
# load configfile
#-----------------------------

if ( -f "$CONFIGFILE" ) {
	print "tidy: Reading configuration file \"$CONFIGFILE\"...\n" if $VERBOSE;
	require( "$CONFIGFILE" ) ||
		die "tidy: error: Cannot read \"$CONFIGFILE\".\n";
	}
else
	{ die "tidy: error: Configuration file \"$CONFIGFILE\" not found.\n"; }

#-----------------------------
# read database
#-----------------------------

$TODAY = localtime;

if ( -f "$DBFILE" ) {
	print "tidy: Reading database file \"$DBFILE\"...\n" if $VERBOSE;
	require("$DBFILE") ||
		die "tidy: error: Cannot read \"$DBFILE\".\n";
	}
else {
	print "tidy: No database file. \"$DBFILE\" will be created.\n" if $VERBOSE;
	$CREATION_DATE = $TODAY;
    $LAST_UPDATE   = $TODAY;
	}

# sort categories
@sort_cat = sort {$CATEGORY{$a}[0] <=> $CATEGORY{$b}[0]} (keys %CATEGORY );

if ( $NOTIDY != 1 ) {
	&process_logfile;	
	&write_database;
	&write_logfile;
	}

if ( "$OUTPUT" ne "none" ) {
	&init_sort;
	&print_statistics;
	}

print "tidy: Done.\n" if $VERBOSE;
exit 0;
#
# and that's ist!!!  *** THE END ***
#

sub process_logfile {
#--------------------------
# process logfile
#--------------------------

print "tidy: Processing logfile \"$LOGFILE\"... " if $VERBOSE;
open( Logfile, "<$LOGFILE" )
	|| die "\b \ntidy: error: cannot open logfile \"$LOGFILE\"\n";

# process indicators |/-\
@COUNT_STR = ( "\b|", "\b/", "\b-", "\b\\" );

$lines = 0;
# Shall I print process indicator?
$t = $VERBOSE && -t STDOUT;
print "|" if $t;

while ( <Logfile> ) {
	chop;
	$count++; $lines++;
	print "$COUNT_STR[ $count &= 3 ]" if $t;

	# check for valid entry
	if ( /^(\w+)\s+(\d+)\s+(\d\d:\d\d:\d\d)\s+(\S+)\s+(\S+?)(\[\d+\]|):\s*(.*)$/ ) {
		# The preceding regexp is not a simple one :) Here is what it returns:
		# Month (e.g. "Feb"), is casted to number
		$month = $MONTH_STR_TO_NUM{$1};
		# Day (e.g. "12"), filled with '0'
		$day = sprintf ("%02d",$2);
		# Time (e.g. "10:37:55")
		$time = $3;
		# local host name
		$host = $4;
		# Process which called syslog (e.g. "in.telnetd")
		$proc = $5;
		# its PID in square brackets or nothing (if PID is not logged)
		# (e.g. "[1234]")
		$pid = $6;
		# the message (e.g. "connect from foo.bar.dom")
		$mesg = $7;

		$d = 0; # flag; when = 1 then entry will be discarded

		# scan all entry types
		foreach $cat ( @sort_cat ) {
			# check for process
			if ( $proc =~ /$CATEGORY{$cat}[1]/ ) {
				# try to get the remote host
				if ( $mesg =~ /$CATEGORY{$cat}[2]/ ) {
					# found entry with remotehost: GOTCHA!
					$remote = $1;
					# do hostname aliasing
					$remote = $HOSTNAME_ALIAS{$remote}
						if ( exists( $HOSTNAME_ALIAS{$remote} ) );
					$DB{$remote}{$cat}[0]++;
					$DB{$remote}{$cat}[1] = join('/', $month, $day);
					$DB{$remote}{$cat}[2] = $time;
					# we may discard this line from logfile
					$d = 1;
					$LOG_REMOTE++;
				    last; # exit loop for categories
					}
				else {
					# found entry, but no remote host
					# discard it if it matches the second regexp
					if ($CATEGORY{$cat}[3] && $mesg =~ /$CATEGORY{$cat}[3]/ ) {
						# we may discard this line from logfile
						$d = 1;
						$LOG_DISCARD++;
						last; # exit loop for categories
						}
					} # END found remote host
				} # END found category
			} # END loop for categories
		if ( $d == 0 ) {
			# unrecognized entry: save it
			push( @REST, $_ );
			}
		} # END found valid syslog line in logfile
	else {
		# unrecognized string in logfile
		# should usually not occur, but save it nevertheless
		push( @REST, $_ );
		}
	} # END loop for logfile

close(Logfile);
print "\b " if $t;
print "$lines lines.\n" if $VERBOSE;
}

sub write_database {
#----------------------------
# save database
#----------------------------

$LAST_UPDATE   = $TODAY;
open( Database, ">$DBFILE" )
	|| die "tidy: error: Cannot open database \"$DBFILE\" for output\n";

print "tidy: Writing database file \"$DBFILE\"...\n" if $VERBOSE;

print Database <<"EOF";
# Tidy 1.0 Database File
# created $TODAY
# with configfile $CONFIGFILE
#
# This file is automatically created. Editing of this file by hand is strongly
# discouraged unless you are perfectly sure of what you are doing :)
#
\$LAST_UPDATE = \'$LAST_UPDATE\'\;
\$CREATION_DATE = \'$CREATION_DATE\'\;
\$LOG_REMOTE = \'$LOG_REMOTE\'\;
\$LOG_DISCARD = \'$LOG_DISCARD\'\;
#
EOF

foreach $host ( keys %DB ) {
	foreach $cat ( keys %{$DB{$host}} ) {
		print Database "\$DB{\'$host\'}{\'$cat\'} = [ $DB{$host}{$cat}[0], \'$DB{$host}{$cat}[1]\', \'$DB{$host}{$cat}[2]\' ]\;\n";
		}
	print Database "#\n";
	}
close(Database);
}

sub write_logfile {
	#----------------------------
	# write rest to logfile
	#----------------------------

	open( Logfile, ">$LOGFILE" )
		|| die "tidy: error: cannot open \"$LOGFILE\" for output\n";

	print "tidy: Writing logfile \"$LOGFILE\"... " if $VERBOSE;

	$d = 0; # lines counter
	foreach ( @REST ) {
		$d++;
		print Logfile "$_\n";
		}
	close(Logfile);
	print "$d lines left in logfile.\n" if $VERBOSE;
	}

sub init_sort {
	#---------------------------
	# initialize sorting
	#---------------------------
	if ( "$SORTKEY" eq "none" )
		{ $SORT_FLAG = 0; } # no sorting at all
	elsif ( "$SORTKEY" eq "host" ) {
		$SORT_FLAG = 1; # alphanumerical sorting
		foreach $h ( keys %DB ) {
			if ( "$CASE_SENSITIVE" eq "on" )
				{ $sort_ary{$h} =     $h;   }
			else
				{ $sort_ary{$h} = uc( $h ); }
			}
		}
	elsif ( "$SORTKEY" eq "domain" ) {
		$SORT_FLAG = 1; # alphanumerical sorting
		foreach $h ( keys %DB ) {
			# Perl is _SO_ cool: The following does foo.bar.dom -> dom.bar.foo
			$d = "";
			while ( $h =~ /([\w-]+)(\.|)/g )
				{ $d = $2 . $1 . $d ; }

			if ( "$CASE_SENSITIVE" eq "on" )
				{ $sort_ary{$h} =     $d;   }
			else
				{ $sort_ary{$h} = uc( $d ); }
			}
		}
	else {
		$d = 0; # flag = 1 when sortkey found
		foreach $cat ( @sort_cat ) {
			if ( "$SORTKEY" eq "$cat" ) {
				$SORT_FLAG = 2; # numerical sorting
				$d = 1;
				foreach $h ( keys %DB )
					{ $sort_ary{$h} = $DB{$h}{$cat}[0]; }
				last;
				}
			}
		die "tidy: error: Illegal SORTKEY \"$SORTKEY\" specified.\n"
			if $d == 0;
		}
	}

sub print_statistics {
#----------------------------
# print statisics
#----------------------------

if ( "$OUTPUT" eq "ascii" )
	{ ( $OUTFILE = $OUTDIR . '/' . $OUTFILE_ASCII ) unless $OUTFILE; }
elsif ( "$OUTPUT" eq "html" )
	{ ( $OUTFILE = $OUTDIR . '/' . $OUTFILE_HTML ) unless $OUTFILE; }
else
	{ die "tidy: error: Illegal output format \"$OUTPUT\" specified.\n"; }

print "tidy: Writing output to \"$OUTFILE\"...\n" if $VERBOSE;

open(Statfile,">$OUTFILE")
	|| die "tidy: error: cannot open \"$OUTFILE\" for output\n";

if ( $SORT_FLAG == 0 ) {
	# no sorting
	@sorted_hosts = keys %DB;
	}
elsif ( $SORT_FLAG == 1 ) {
	# alphanumerical sort, a's first, z's last
	@sorted_hosts = sort { $sort_ary{$a} cmp $sort_ary{$b} } ( keys %DB );
	}
elsif ($SORT_FLAG == 2) {
	# numerical sort, highest numbers first
	@sorted_hosts = sort { $sort_ary{$b} <=> $sort_ary{$a} } ( keys %DB );
	}

#--------------------
# header, init
#--------------------

# total number of remote hosts
$TOTAL_REMOTE = keys %DB;

if ( "$OUTPUT" eq "ascii" ) {
	#---------------------
	# ascii header & init
	#---------------------
	foreach ( @ASCII_HEADER ) {
		eval "\$h = \"$_\";";
		print Statfile "$h\n";
		}

	$LEN = '4';
	$rule_part = '+----';
	if ( "$ORIENTATION" eq "below" ) {
		if ( "$FORM_DATE" eq "short" || "$FORM_TIME" eq "short" )
			{ $LEN = '5'; $rule_part = '+-----'; }
		if ( "$FORM_DATE" eq "long" )
			{ $LEN = '6'; $rule_part = '+------'; }
		if ( "$FORM_TIME" eq "long" )
			{ $LEN = '8'; $rule_part = '+--------'; }
		}
	else { # ORIENTATION "besides"
		if ( "$FORM_TIME" eq "long" )
			{ $LEN += 9; $rule_part .= '---------'; }
		if ( "$FORM_DATE" eq "long" && "$FORM_TIME" ne "long" )
			{ $LEN += 7; $rule_part .= '-------'; }
		if ( "$FORM_DATE" eq "short" && "$FORM_TIME" ne "long" )
			{ $LEN += 6; $rule_part .= '------'; }
		if ( "$FORM_TIME" eq "short" && "$FORM_DATE" eq "none" )
			{ $LEN += 6; $rule_part .= '------'; }
		}
	# compose table header
	$rule = '';
	$cat_s = '';
	foreach $cat ( @sort_cat ) {
		$rule .= $rule_part;
		$cat_s .= sprintf ( "|%${LEN}.${LEN}s", $CATEGORY{$cat}[4] );
		}
	$rule .= '+';
	$cat_s .= '| Remote Host';
	while ( length ($rule) < 79 ) # fill up with '-'
		{ $rule .= '-'; }
	}
else {
	#---------------------
	# html header & init
	#---------------------
	foreach ( @HTML_HEADER ) {
		eval "\$h = \"$_\";";
		print Statfile "$h\n";
		}
	}

#--------------------
# print entries
#--------------------

$count = 0;
if ( "$OUTPUT" eq "ascii" ) {
	#--------------------
	# ascii output
	#--------------------
	if ( $RULER eq "on" )
		{ print Statfile "$rule\n"; }

	foreach $host ( @sorted_hosts ) {
		if ( $count == 0 ) {
			if ( $RULER eq "on" )
				{ print Statfile "$cat_s\n$rule\n"; }
			else
				{ print Statfile "$rule\n$cat_s\n$rule\n"; }

			if ( $PAGER_LINES )
				{ $count = -$PAGER_LINES; }
			}
		$count++;

		if ( "$ORIENTATION" eq "below" ) {
			$e = ''; $t = ''; $d = '';
			foreach $cat ( @sort_cat ) {
				$e .= sprintf ( "|%${LEN}d", $DB{$host}{$cat}[0] );

				if ( "$FORM_DATE" ne "none" ) {
					$d .= sprintf ( "|%${LEN}.${LEN}s",
						&fmt_date_ascii( $DB{$host}{$cat}[1] ) );
					}
				if ("$FORM_TIME" ne "none") {
					$t .= sprintf ( "|%${LEN}.${LEN}s",
						&fmt_time_ascii( $DB{$host}{$cat}[2] ) );
					}
				}
			print Statfile "$e| $host\n";
			if ( "$FORM_DATE" ne "none" )
				{ print Statfile "$d|\n"; }
			if ( "$FORM_TIME" ne "none" )
				{ print Statfile "$t|\n"; }
			}
		else { # Orientation "besides"
			$e = ''; $t = '';
			foreach $cat ( @sort_cat ) {
				$d = sprintf ( "%4d", $DB{$host}{$cat}[0] );
				if ( $DB{$host}{$cat}[0] != 0 ) {
					if ( "$FORM_DATE" ne "none" ) {
						$d .= ',' . &fmt_date_ascii( $DB{$host}{$cat}[1] );
						if ( "$FORM_TIME" ne "none" ) {
							$t .= sprintf( "|%${LEN}.${LEN}s",
								&fmt_time_ascii( $DB{$host}{$cat}[2] ) );
							}
						}
					else {
						if ("$FORM_TIME" ne "none") {
							$d .= ',' . &fmt_time_ascii( $DB{$host}{$cat}[2] );
							}
						}
					}
				else {
					if ( "$FORM_TIME" ne "none" && "$FORM_DATE" ne "none" )
						{ $t .= sprintf ( "|%${LEN}.${LEN}s", " " ); }
					}
				$e .= sprintf ( "|%-${LEN}.${LEN}s", $d );
				}
			print Statfile "$e| $host\n";
			if ( "$FORM_TIME" ne "none" && "$FORM_DATE" ne "none" )
				{ print Statfile "$t|\n"; }
			}
		if ( "$RULER" eq "on" )
			{ print Statfile "$rule\n"; }
		}
	if ( "$RULER" eq "off" ) # print ruler in last line of table
		{ print Statfile "$rule\n"; }
	}
else {
	#---------------
	# html output
	#---------------
	$rule = "<TR>$REMOTE_HEADER";
	foreach $cat ( @sort_cat ) {
		if ( "$ORIENTATION" eq "besides" && 
			( "$FORM_DATE" ne "none" || "$FORM_TIME" ne "none" ) )
			{ $rule .= "<TH COLSPAN=2 ${CATEGORY{$cat}[5]}</TH>"; }
		else
			{ $rule .= "<TH ${CATEGORY{$cat}[5]}</TH>"; }
		}
	$rule .= "</TR>";

	foreach $host ( @sorted_hosts ) {
		if ( $count == 0 )
			{ print Statfile "<TABLE BORDER>$rule\n"; }
		if ( $PAGER_LINES && $count == $PAGER_LINES ) {
			if ( "$RULER" eq "off" )
				{ print Statfile "</TABLE><P>\n<TABLE BORDER>"; }
			print Statfile "$rule\n";
			$count = 0;
			}
		$count++;
		print Statfile "<TR ALIGN=CENTER><TH ALIGN=LEFT>$host</TH>";
		foreach $cat ( @sort_cat ) {
			if ( $DB{$host}{$cat}[0] )
				{ print Statfile "<TD>${DB{$host}{$cat}[0]}"; }
			else
				{ print Statfile '<TD>&nbsp;'; }

			if ( "$ORIENTATION" eq "below" ) {
				if ( "$FORM_DATE" ne "none" ) {
					$d = &fmt_date_html( $DB{$host}{$cat}[1] );
					print Statfile "<BR><FONT SIZE=\"-1\"><I>$d</I></FONT>";
					}
				if ( "$FORM_TIME" ne "none" ) {
					$d = &fmt_time_html( $DB{$host}{$cat}[2] );
					print Statfile "<BR><FONT SIZE=\"-1\"><I>$d</I></FONT>";
					}
				}
			else { # Orientation besides
				if ( "$FORM_DATE" ne "none" ) {
					$d = &fmt_date_html( $DB{$host}{$cat}[1] );
					print Statfile "</TD><TD><FONT SIZE=\"-1\"><I>$d</I> ";
					}
				if ( "$FORM_TIME" ne "none" ) {
					$d = &fmt_time_html( $DB{$host}{$cat}[2] );
					if ( "$FORM_DATE" eq "none" ) {
						print Statfile "</TD><TD><FONT SIZE=\"-1\"><I>$d</I>";
						}
					else
						{ print Statfile "<BR><FONT SIZE=\"-1\"><I>$d</I>"; }
					}
				}
			print Statfile "</TD>";
			}
		print Statfile "</TR>\n";
		}
	print Statfile "</TABLE>\n";
	}

#--------------------
# trailer
#--------------------

if ( "$OUTPUT" eq "ascii" ) {
	foreach ( @ASCII_TRAILER ) {
		eval "\$h = \"$_\";";
		print Statfile "$h\n";
		}
	}
elsif ( "$OUTPUT" eq "html" ) {
	foreach ( @HTML_TRAILER ) {
		eval "\$h = \"$_\";";
		print Statfile "$h\n";
		}
	}
close(Statfile);
}

sub fmt_date_ascii {
	local( $_ ) = @_;
	local( $dd );
	if ( "$FORM_DATE" eq "long" ) {
		if ( /(\d*)\/(\d*)/ )
			{ $dd = sprintf ("%3.3s %2d", $MONTH_NUM_TO_STR[$1], $2 ); }
		else
			{ $dd = '      '; }
		}
	else 
		{ $dd = sprintf ( "%5.5s", $_ ); }
	$dd;
	}

sub fmt_date_html {
	local( $_ ) = @_;
	local( $dd );
	if ( "$FORM_DATE" eq "long" ) {
		if ( /(\d*)\/(\d*)/ )
			{ $dd = $MONTH_NUM_TO_STR[$1] . '&nbsp;' . $2; }
		else
			{ $dd = '&nbsp;'; }
		}
	else {
		if( /\S+/ )
			{ $dd = $_ ; }
		else
			{ $dd = '&nbsp;'; }
		}
	$dd;
	}

sub fmt_time_ascii {
	local( $_ ) = @_;
	local( $tt );
	if ( "$FORM_TIME" eq "long" )
		{ $tt = sprintf( "%8.8s", $_ ); }
	else 
		{ $tt = sprintf( "%5.5s", $_ );}
	$tt;
	}

sub fmt_time_html {
	local( $_ ) = @_;
	local( $tt );
	if ( /\S+/ ) {
		if ( "$FORM_TIME" eq "long" )
			{ $tt = $_; }
		else
			{ $tt = sprintf ("%5.5s", $_ ); }
		}
	else 
		{ $tt = '&nbsp;'; }
	$tt;
	}

#-----------------------------------------
# Help message
#-----------------------------------------

sub help_screen {
	&version_info;
	print <<"EOF"
 Tidy up logfile created by syslogd and do statistics on it

 Syntax: tidy [-B database] [-C configfile] [-L logfile] [-o outfile]
              [-p pagerlines] [-s sortkey] [-abcDdHOqrTt] [+bcdOprt]
         tidy -h | -v

 See manpage tidy(8) for a detailed description

 Author: Marek Rouchal (marek\@saftsack.fs.uni-bayreuth.de)

EOF
	}

sub version_info {
	print <<"EOF"

 This is tidy Version 1.0

EOF
	}

