1 |
#!/usr/bin/perl -w |
2 |
|
3 |
# parse file.alert mon logs and report (up|down)time of services |
4 |
# |
5 |
# 2003-09-03 Dobrica Pavlinusic <dpavlin@rot13.org> |
6 |
# |
7 |
# Usage: ./parse-log.pl < /var/log/mon/foo.log |
8 |
|
9 |
use strict; |
10 |
use POSIX qw(strftime); |
11 |
|
12 |
my $date_fmt = "%Y-%m-%d %H:%M:%S"; |
13 |
|
14 |
my $debug=0; |
15 |
$debug++ if (grep(/-v/,@ARGV)); |
16 |
$debug++ if (grep(/-d/,@ARGV)); |
17 |
|
18 |
# |
19 |
# This option (activated via command switch -r) will reset failure duration |
20 |
# if repeated failure on same group/service happend. |
21 |
# If you want honest reporting (or grouped only by group and service), |
22 |
# you souldn't turn it on :-) However, if you have just failure events in your |
23 |
# log, this will produce output which will show duration BETWEEN two failures |
24 |
# |
25 |
my $rep_reset = 0; |
26 |
$rep_reset++ if (grep(/-r/,@ARGV)); |
27 |
|
28 |
# pretty format date |
29 |
sub d { |
30 |
my $utime = shift || return "?"; |
31 |
return strftime($date_fmt,localtime($utime)); |
32 |
} |
33 |
# pretty format duration |
34 |
sub dur { |
35 |
my $s = shift || return "?"; |
36 |
my $out = ""; |
37 |
|
38 |
my $d = int($s/(24*60*60)); |
39 |
$s = $s % (24*60*60); |
40 |
my $h = int($s/(60*60)); |
41 |
$s = $s % (60*60); |
42 |
my $m = int($s/60); |
43 |
$s = $s % 60; |
44 |
|
45 |
$out .= $d."d " if ($d > 0); |
46 |
$out .= sprintf("%02d:%02d:%02d",$h,$m,$s); |
47 |
|
48 |
return $out; |
49 |
} |
50 |
|
51 |
print "#group/service\tfrom_date\tfrom_time\tto_date\tto_time\tduration\tdescription\n"; |
52 |
|
53 |
my %fail; |
54 |
|
55 |
while(<STDIN>) { |
56 |
chomp; |
57 |
if (/^(failure|up)\s+(\S+)\s+(\S+)\s+(\d+)\s+\(([^)]+)\)\s+(.+)$/) { |
58 |
my ($status,$group,$service,$utime,$date,$desc) = ($1,$2,$3,$4,$5,$6); |
59 |
print "# $group/$service $status $date [$desc]\n" if ($debug); |
60 |
my $id = "$group/$service"; |
61 |
if ($status eq "up" && defined($fail{$id})) { |
62 |
print "$group/$service\t",d($fail{$id}),"\t",d($utime),"\t",dur($utime - $fail{$id}),"\t$desc\n"; |
63 |
delete $fail{$id}; |
64 |
} elsif ($status eq "up") { |
65 |
print "$group/$service\tunknown\t",d($utime),"\tunknown\t$desc\n"; |
66 |
delete $fail{$id}; |
67 |
} elsif (defined($fail{$id})) { |
68 |
if ($rep_reset) { |
69 |
print "$group/$service\t",d($fail{$id}),"\t",d($utime),"\t",dur($utime - $fail{$id}),"\t$desc\t[failure again]\n"; |
70 |
$fail{$id} = $utime; |
71 |
} |
72 |
} else { |
73 |
$fail{$id} = $utime; |
74 |
} |
75 |
} |
76 |
} |