1 |
#!/usr/bin/perl -w |
2 |
|
3 |
# parse file.alert mon logs and report (up|down)time of services |
4 |
# |
5 |
# 2003-09-03 Dobrica Pavlinusic <dpavlin@rot13.org> |
6 |
# 2003-10-05 converted to CGI script |
7 |
# |
8 |
|
9 |
use strict; |
10 |
use POSIX qw(strftime); |
11 |
use CGI qw/:standard *table/; |
12 |
use CGI::Carp qw(fatalsToBrowser); |
13 |
use Data::Sorting qw(:arrays); |
14 |
use Time::ParseDate; |
15 |
|
16 |
use Data::Dumper; |
17 |
|
18 |
my $date_fmt = "%Y-%m-%d"; |
19 |
my $date_time_fmt = "%Y-%m-%d %H:%M:%S"; |
20 |
|
21 |
my $from_date = "now - 6 months"; |
22 |
my $to_date = "now"; |
23 |
|
24 |
# working days definition (1-7; mon=1) |
25 |
my $wday_start = 1; |
26 |
my $wday_end = 5; |
27 |
# working hours |
28 |
my $whours_start = "7:00"; |
29 |
my $whours_end = "17:00"; |
30 |
|
31 |
my $debug=1; |
32 |
$debug++ if (grep(/-v/,@ARGV)); |
33 |
$debug++ if (grep(/-d/,@ARGV)); |
34 |
|
35 |
my $q = new CGI; |
36 |
|
37 |
my $print_orphans = $q->param('print_orphans') || 0; |
38 |
my $rep_reset = $q->param('rep_reset') || 0; |
39 |
my @sg_selected = $q->param('sg_filter'); |
40 |
|
41 |
my @sort; |
42 |
my $order; |
43 |
my %sort_param; |
44 |
my ($usort,$dsort); |
45 |
if ($q->param('usort')) { |
46 |
$sort_param{'usort'} = $q->param('usort'); |
47 |
$q->delete('usort'); |
48 |
@sort = ( -compare => 'numeric', $sort_param{'usort'} ); |
49 |
} |
50 |
if ($q->param('dsort')) { |
51 |
$sort_param{'dsort'} = $q->param('dsort'); |
52 |
$q->delete('dsort'); |
53 |
@sort = ( -compare => 'numeric', -order=>'reverse', $sort_param{'dsort'} ); |
54 |
} |
55 |
|
56 |
# |
57 |
# This option (activated via command switch -r) will reset failure duration |
58 |
# if repeated failure on same group/service happend. |
59 |
# If you want honest reporting (or grouped only by group and service), |
60 |
# you souldn't turn it on :-) However, if you have just failure events in your |
61 |
# log, this will produce output which will show duration BETWEEN two failures |
62 |
# |
63 |
|
64 |
# pretty format date |
65 |
sub d { |
66 |
my $utime = shift || return "?"; |
67 |
return strftime($date_time_fmt,localtime($utime)); |
68 |
} |
69 |
# pretty format duration |
70 |
sub dur { |
71 |
my $dur = shift || return "?"; |
72 |
my $out = ""; |
73 |
|
74 |
my $s = $dur; |
75 |
my $d = int($s/(24*60*60)); |
76 |
$s = $s % (24*60*60); |
77 |
my $h = int($s/(60*60)); |
78 |
$s = $s % (60*60); |
79 |
my $m = int($s/60); |
80 |
$s = $s % 60; |
81 |
|
82 |
$out .= $d."d " if ($d > 0); |
83 |
if ($debug) { |
84 |
$out .= sprintf("%02d:%02d:%02d [%d]",$h,$m,$s, $dur); |
85 |
} else { |
86 |
$out .= sprintf("%02d:%02d:%02d",$h,$m,$s); |
87 |
} |
88 |
|
89 |
return $out; |
90 |
} |
91 |
|
92 |
# read log and calculate |
93 |
# |
94 |
|
95 |
my %fail; |
96 |
my %downtime; # total downtime |
97 |
my %sg_filter; # filter for service/group |
98 |
my %sg_count; # count number of downtimes |
99 |
|
100 |
my $log_file="/home/dpavlin/mon-log/sap.log"; |
101 |
|
102 |
my @data; |
103 |
|
104 |
open(LOG, $log_file) || die "$log_file: $!"; |
105 |
|
106 |
while(<LOG>) { |
107 |
chomp; |
108 |
if (/^(failure|up)\s+(\S+)\s+(\S+)\s+(\d+)\s+\(([^)]+)\)\s+(.+)$/) { |
109 |
my ($status,$group,$service,$utime,$date,$desc) = ($1,$2,$3,$4,$5,$6); |
110 |
my $id = "$group/$service"; |
111 |
if ($status eq "up" && defined($fail{$id})) { |
112 |
if (grep(m;$group/$service;,@sg_selected)) { |
113 |
push @data, { |
114 |
'sg'=>"$group/$service", |
115 |
'from_time'=>$fail{$id}, |
116 |
'to_time'=>$utime, |
117 |
'dur_time'=>($utime - $fail{$id}), |
118 |
'from'=>d($fail{$id}), |
119 |
'to'=>d($utime), |
120 |
'dur'=>dur($utime - $fail{$id}), |
121 |
'desc'=>$desc }; |
122 |
$downtime{"$group/$service"} += ($utime - $fail{$id}), |
123 |
$sg_count{"$group/$service"}++; |
124 |
} |
125 |
$sg_filter{"$group/$service"}++; |
126 |
delete $fail{$id}; |
127 |
} elsif ($status eq "up") { |
128 |
if ($print_orphans && grep(m;$group/$service;,@sg_selected)) { |
129 |
push @data, { |
130 |
'sg'=>"$group/$service", |
131 |
'to_time'=>$utime, |
132 |
'from'=>'unknown', |
133 |
'to'=>d($utime), |
134 |
'dur'=>'unknown', |
135 |
'desc'=>$desc }; |
136 |
$sg_count{"$group/$service"}++; |
137 |
} |
138 |
delete $fail{$id}; |
139 |
$sg_filter{"$group/$service"}++; |
140 |
} elsif (defined($fail{$id})) { |
141 |
if ($rep_reset && grep(m;$group/$service;,@sg_selected)) { |
142 |
push @data, { |
143 |
'sg'=>"$group/$service", |
144 |
'from_time'=>$fail{$id}, |
145 |
'to_time'=>$utime, |
146 |
'dur_time'=>($utime - $fail{$id}), |
147 |
'from'=>d($fail{$id}), |
148 |
'to'=>d($utime), |
149 |
'dur'=>dur($utime - $fail{$id}), |
150 |
'desc'=>'[failure again]'}; |
151 |
$downtime{"$group/$service"} += ($utime - $fail{$id}), |
152 |
$fail{$id} = $utime; |
153 |
$sg_count{"$group/$service"}++; |
154 |
} |
155 |
$sg_filter{"$group/$service"}++; |
156 |
} else { |
157 |
$fail{$id} = $utime; |
158 |
} |
159 |
} |
160 |
} |
161 |
close(LOG); |
162 |
|
163 |
# generate output |
164 |
# |
165 |
print header,start_html("mon availiability report"); |
166 |
|
167 |
# make some filters |
168 |
# |
169 |
|
170 |
print start_form, |
171 |
start_table({-border=>0,-cellspacing=>0,-cellpadding=>0}), |
172 |
Tr(td( |
173 |
em("Show just service/group:"),br, |
174 |
checkbox_group(-name=>'sg_filter', |
175 |
-values=>[keys %sg_filter], |
176 |
-default=>[keys %sg_filter], |
177 |
-linebreak=>'true', |
178 |
), |
179 |
),td( |
180 |
em("Other options:"),br, |
181 |
$q->checkbox(-name=>'rep_reset',-checked=>0, |
182 |
-label=>"show repeated failures on same service as individual failures"), |
183 |
br, |
184 |
$q->checkbox(-name=>'print_orphans',-checked=>0, |
185 |
-label=>"show records which are not complete in this interval"), |
186 |
br, |
187 |
$q->checkbox(-name=>'use_date_limit',-checked=>1, |
188 |
-label=>"use date limit from:"), |
189 |
$q->textfield(-name=>'from_date',-size=>20,-default=>$from_date), |
190 |
" to: ", |
191 |
$q->textfield(-name=>'to_date',-size=>20,-default=>$to_date), |
192 |
small('Using <a href="http://search.cpan.org/search?mode=module&query=Time::ParseDate">Time::ParseDate</a>'), |
193 |
br, |
194 |
$q->submit(-name=>'show',-value=>'Show report'), |
195 |
)),end_table; |
196 |
|
197 |
# dump report |
198 |
# |
199 |
|
200 |
sub sort_link { |
201 |
my $q = shift || return; |
202 |
my $col = shift || return; |
203 |
my $dir = lc(shift) || return; |
204 |
if ($sort_param{$dir.'sort'} && $sort_param{$dir.'sort'} eq $col) { |
205 |
return '&'.$dir.'Arr;'; |
206 |
} else { |
207 |
return '<a href="'.$q->url(-query=>1).'&'.$dir.'sort='.$col.'">&'.$dir.'Arr;</a>'; |
208 |
} |
209 |
} |
210 |
|
211 |
|
212 |
my ($from_time,$to_time,$from_html,$to_html); |
213 |
if ($q->param('use_date_limit')) { |
214 |
$from_time = parsedate($q->param('from_date'), UK=>1); |
215 |
$to_time = parsedate($q->param('to_date'), UK=>1); |
216 |
$from_html = strftime($date_fmt,localtime($from_time)); |
217 |
$to_html = strftime($date_fmt,localtime($to_time)); |
218 |
$from_html .= " [$from_time] " if ($debug); |
219 |
$to_html .= " [$to_time] " if ($debug); |
220 |
} |
221 |
|
222 |
# sort data |
223 |
# |
224 |
my @sorted = sorted_array(@data, @sort); |
225 |
#my @sorted = @data; |
226 |
|
227 |
print "-- sort: ",Dumper(@sort)," (data: ".@data." sorted: ".@sorted.") --\n",br if ($debug); |
228 |
|
229 |
print start_table({-border=>1,-cellspacing=>0,-cellpadding=>2,-width=>'100%'}); |
230 |
|
231 |
print Tr( |
232 |
th("group/service"), |
233 |
th({-bgcolor=>'#f0f0f0'}, |
234 |
&sort_link($q,'from_time','u').' from '. |
235 |
&sort_link($q,'from_time','d'), |
236 |
br,$from_html |
237 |
), |
238 |
th( |
239 |
&sort_link($q,'to_time','u').' to '. |
240 |
&sort_link($q,'to_time','d'), |
241 |
br,$to_html |
242 |
), |
243 |
th({-bgcolor=>'#e0e0e0'}, |
244 |
&sort_link($q,'dur_time','u').' duration '. |
245 |
&sort_link($q,'dur_time','d') |
246 |
), |
247 |
th("description") |
248 |
) if (scalar @sorted > 0); |
249 |
|
250 |
foreach my $row (@sorted) { |
251 |
next if ($q->param('use_date_limit') && ($row->{from_time} < $from_time || $row->{to_time} > $to_time)); |
252 |
print Tr( |
253 |
td({-align=>'left',-valign=>'center'},$row->{sg}), |
254 |
td({-align=>'right',-bgcolor=>'#f0f0f0'},$row->{from}), |
255 |
td({-align=>'right'},$row->{to}), |
256 |
td({-align=>'center',-bgcolor=>'#e0e0e0'},$row->{dur}), |
257 |
td({-align=>'left'},$row->{desc}), |
258 |
),"\n"; |
259 |
} |
260 |
|
261 |
# dump totals |
262 |
# |
263 |
|
264 |
foreach my $sg (keys %downtime) { |
265 |
print Tr(td({-colspan=>3,-align=>'right'},"total for $sg:"), |
266 |
td({-bgcolor=>'#e0e0e0',-align=>'right'},dur($downtime{$sg})), |
267 |
td(small("in ".$sg_count{$sg}." failures"))),"\n"; |
268 |
} |
269 |
|
270 |
print end_table, |
271 |
end_form; |
272 |
|