1 |
#!/usr/bin/perl -w |
2 |
|
3 |
# parse file.alert mon logs and report (up|down)time of services |
4 |
# |
5 |
# 2003-09-03 Dobrica Pavlinusic <dpavlin@rot13.org> |
6 |
# 2003-10-05 converted to CGI script |
7 |
# |
8 |
|
9 |
use strict; |
10 |
use POSIX qw(strftime); |
11 |
use CGI qw/:standard *table/; |
12 |
use CGI::Carp qw(fatalsToBrowser); |
13 |
use Data::Sorting qw(:arrays); |
14 |
use Data::Dumper; |
15 |
|
16 |
my $date_fmt = "%Y-%m-%d %H:%M:%S"; |
17 |
|
18 |
# working days definition (1-7; mon=1) |
19 |
my $wday_start = 1; |
20 |
my $wday_end = 5; |
21 |
# working hours |
22 |
my $whours_start = "7:00"; |
23 |
my $whours_end = "17:00"; |
24 |
|
25 |
my $debug=0; |
26 |
$debug++ if (grep(/-v/,@ARGV)); |
27 |
$debug++ if (grep(/-d/,@ARGV)); |
28 |
|
29 |
|
30 |
my $q = new CGI; |
31 |
|
32 |
my $print_orphans = $q->param('print_orphans') || 0; |
33 |
my $rep_reset = $q->param('rep_reset') || 0; |
34 |
my @sg_selected = $q->param('sg_filter'); |
35 |
|
36 |
my @sort; |
37 |
my $order; |
38 |
my %sort_param; |
39 |
my ($usort,$dsort); |
40 |
if ($q->param('usort')) { |
41 |
$sort_param{'usort'} = $q->param('usort'); |
42 |
$q->delete('usort'); |
43 |
@sort = ( -compare => 'numeric', $sort_param{'usort'} ); |
44 |
} |
45 |
if ($q->param('dsort')) { |
46 |
$sort_param{'dsort'} = $q->param('dsort'); |
47 |
$q->delete('dsort'); |
48 |
@sort = ( -compare => 'numeric', -order=>'reverse', $sort_param{'dsort'} ); |
49 |
} |
50 |
|
51 |
# |
52 |
# This option (activated via command switch -r) will reset failure duration |
53 |
# if repeated failure on same group/service happend. |
54 |
# If you want honest reporting (or grouped only by group and service), |
55 |
# you souldn't turn it on :-) However, if you have just failure events in your |
56 |
# log, this will produce output which will show duration BETWEEN two failures |
57 |
# |
58 |
|
59 |
# pretty format date |
60 |
sub d { |
61 |
my $utime = shift || return "?"; |
62 |
return strftime($date_fmt,localtime($utime)); |
63 |
} |
64 |
# pretty format duration |
65 |
sub dur { |
66 |
my $dur = shift || return "?"; |
67 |
my $out = ""; |
68 |
|
69 |
my $s = $dur; |
70 |
my $d = int($s/(24*60*60)); |
71 |
$s = $s % (24*60*60); |
72 |
my $h = int($s/(60*60)); |
73 |
$s = $s % (60*60); |
74 |
my $m = int($s/60); |
75 |
$s = $s % 60; |
76 |
|
77 |
$out .= $d."d " if ($d > 0); |
78 |
$out .= sprintf("%02d:%02d:%02d [%d]",$h,$m,$s, $dur); |
79 |
# $out .= sprintf("%02d:%02d:%02d",$h,$m,$s); |
80 |
|
81 |
return $out; |
82 |
} |
83 |
|
84 |
# read log and calculate |
85 |
# |
86 |
|
87 |
my %fail; |
88 |
my %downtime; # total downtime |
89 |
my %sg_filter; # filter for service/group |
90 |
|
91 |
my $log_file="/home/dpavlin/mon-log/sap.log"; |
92 |
|
93 |
my @data; |
94 |
|
95 |
open(LOG, $log_file) || die "$log_file: $!"; |
96 |
|
97 |
while(<LOG>) { |
98 |
chomp; |
99 |
if (/^(failure|up)\s+(\S+)\s+(\S+)\s+(\d+)\s+\(([^)]+)\)\s+(.+)$/) { |
100 |
my ($status,$group,$service,$utime,$date,$desc) = ($1,$2,$3,$4,$5,$6); |
101 |
my $id = "$group/$service"; |
102 |
if ($status eq "up" && defined($fail{$id})) { |
103 |
if (grep(m;$group/$service;,@sg_selected)) { |
104 |
push @data, { |
105 |
'sg'=>"$group/$service", |
106 |
'from_time'=>$fail{$id}, |
107 |
'to_time'=>$utime, |
108 |
'dur_time'=>($utime - $fail{$id}), |
109 |
'from'=>d($fail{$id}), |
110 |
'to'=>d($utime), |
111 |
'dur'=>dur($utime - $fail{$id}), |
112 |
'desc'=>$desc }; |
113 |
$downtime{"$group/$service"} += ($utime - $fail{$id}), |
114 |
} |
115 |
$sg_filter{"$group/$service"}++; |
116 |
delete $fail{$id}; |
117 |
} elsif ($status eq "up") { |
118 |
if ($print_orphans) { |
119 |
push @data, { |
120 |
'sg'=>"$group/$service", |
121 |
'to_time'=>$utime, |
122 |
'from'=>'unknown', |
123 |
'to'=>d($utime), |
124 |
'dur'=>'unknown', |
125 |
'desc'=>$desc }; |
126 |
} |
127 |
delete $fail{$id}; |
128 |
$sg_filter{"$group/$service"}++; |
129 |
} elsif (defined($fail{$id})) { |
130 |
if ($rep_reset) { |
131 |
push @data, { |
132 |
'sg'=>"$group/$service", |
133 |
'from_time'=>$fail{$id}, |
134 |
'to_time'=>$utime, |
135 |
'dur_time'=>($utime - $fail{$id}), |
136 |
'from'=>d($fail{$id}), |
137 |
'to'=>d($utime), |
138 |
'dur'=>dur($utime - $fail{$id}), |
139 |
'desc'=>'[failure again]'}; |
140 |
$downtime{"$group/$service"} += ($utime - $fail{$id}), |
141 |
$fail{$id} = $utime; |
142 |
} |
143 |
$sg_filter{"$group/$service"}++; |
144 |
} else { |
145 |
$fail{$id} = $utime; |
146 |
} |
147 |
} |
148 |
} |
149 |
close(LOG); |
150 |
|
151 |
# generate output |
152 |
# |
153 |
print header,start_html("mon availiability report"); |
154 |
|
155 |
# make some filters |
156 |
# |
157 |
|
158 |
print start_form, |
159 |
start_table({-border=>0,-cellspacing=>0,-cellpadding=>0}), |
160 |
Tr(td( |
161 |
em("Show just service/group:"),br, |
162 |
checkbox_group(-name=>'sg_filter', |
163 |
-values=>[keys %sg_filter], |
164 |
-default=>[keys %sg_filter], |
165 |
-linebreak=>'true', |
166 |
), |
167 |
),td( |
168 |
em("Other options:"),br, |
169 |
$q->checkbox(-name=>'rep_reset',-checked=>0, |
170 |
-label=>"show repeated failures on same service as individual failures"), |
171 |
br, |
172 |
$q->checkbox(-name=>'print_orphans',-checked=>0, |
173 |
-label=>"show records which are not complete in this interval"), |
174 |
br, |
175 |
$q->submit(-name=>'show',-value=>'Show report'), |
176 |
)),end_table; |
177 |
|
178 |
# dump report |
179 |
# |
180 |
|
181 |
sub sort_link { |
182 |
my $q = shift || return; |
183 |
my $col = shift || return; |
184 |
my $dir = lc(shift) || return; |
185 |
if ($sort_param{$dir.'sort'} && $sort_param{$dir.'sort'} eq $col) { |
186 |
return '&'.$dir.'Arr;'; |
187 |
} else { |
188 |
return '<a href="'.$q->url(-query=>1).'&'.$dir.'sort='.$col.'">&'.$dir.'Arr;</a>'; |
189 |
} |
190 |
} |
191 |
|
192 |
print start_table({-border=>1,-cellspacing=>0,-cellpadding=>2,-width=>'100%'}), |
193 |
Tr( |
194 |
th("group/service"), |
195 |
th({-bgcolor=>'#f0f0f0'}, |
196 |
&sort_link($q,'from_time','u').' from '. |
197 |
&sort_link($q,'from_time','d') |
198 |
), |
199 |
th( |
200 |
&sort_link($q,'to_time','u').' to '. |
201 |
&sort_link($q,'to_time','d') |
202 |
), |
203 |
th({-bgcolor=>'#e0e0e0'}, |
204 |
&sort_link($q,'dur_time','u').' duration '. |
205 |
&sort_link($q,'dur_time','d') |
206 |
), |
207 |
th("description") |
208 |
); |
209 |
|
210 |
my @sorted = sorted_array(@data, @sort); |
211 |
#my @sorted = @data; |
212 |
|
213 |
print "-- sort: ",Dumper(@sort)," (data: ".@data." sorted: ".@sorted.") --\n"; |
214 |
|
215 |
foreach my $row (@sorted) { |
216 |
print Tr( |
217 |
td({-align=>'left',-valign=>'center'},$row->{sg}), |
218 |
td({-align=>'right',-bgcolor=>'#f0f0f0'},$row->{from}), |
219 |
td({-align=>'right'},$row->{to}), |
220 |
td({-align=>'center',-bgcolor=>'#e0e0e0'},$row->{dur}), |
221 |
td({-align=>'left'},$row->{desc}), |
222 |
),"\n"; |
223 |
} |
224 |
|
225 |
# dump totals |
226 |
# |
227 |
|
228 |
foreach my $sg (keys %downtime) { |
229 |
print Tr(th({-colspan=>3,-align=>'right'},"total for $sg:"), |
230 |
th({-bgcolor=>'#e0e0e0',-align=>'right'},dur($downtime{"$sg"})), |
231 |
th("")),"\n"; |
232 |
} |
233 |
|
234 |
print end_table, |
235 |
end_form; |
236 |
|