1 |
dpavlin |
1.1 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
# parse file.alert mon logs and report (up|down)time of services |
4 |
|
|
# |
5 |
|
|
# 2003-09-03 Dobrica Pavlinusic <dpavlin@rot13.org> |
6 |
|
|
# 2003-10-05 converted to CGI script |
7 |
|
|
# |
8 |
|
|
|
9 |
|
|
use strict; |
10 |
|
|
use POSIX qw(strftime); |
11 |
dpavlin |
1.8 |
use CGI; |
12 |
|
|
#use CGI::Carp qw(fatalsToBrowser); |
13 |
dpavlin |
1.1 |
use Data::Sorting qw(:arrays); |
14 |
dpavlin |
1.2 |
use Time::ParseDate; |
15 |
dpavlin |
1.3 |
use Time::Available; |
16 |
dpavlin |
1.4 |
use Cache::FileCache; |
17 |
dpavlin |
1.2 |
|
18 |
dpavlin |
1.1 |
use Data::Dumper; |
19 |
|
|
|
20 |
dpavlin |
1.2 |
my $date_fmt = "%Y-%m-%d"; |
21 |
dpavlin |
1.5 |
#my $date_time_fmt = "%Y-%m-%d %H:%M:%S"; |
22 |
|
|
my $date_time_fmt = "<small>%a</small> <nobr>%Y-%m-%d</nobr> %H:%M:%S"; |
23 |
dpavlin |
1.2 |
|
24 |
|
|
my $from_date = "now - 6 months"; |
25 |
|
|
my $to_date = "now"; |
26 |
dpavlin |
1.1 |
|
27 |
|
|
# working days definition (1-7; mon=1) |
28 |
dpavlin |
1.3 |
my $dayMask = Time::Available::DAY_WEEKDAY; |
29 |
dpavlin |
1.1 |
# working hours |
30 |
dpavlin |
1.3 |
my $from_time_interval = "7:00"; |
31 |
|
|
my $to_time_interval = "17:00"; |
32 |
dpavlin |
1.1 |
|
33 |
dpavlin |
1.5 |
my $debug=0; |
34 |
dpavlin |
1.1 |
$debug++ if (grep(/-v/,@ARGV)); |
35 |
|
|
$debug++ if (grep(/-d/,@ARGV)); |
36 |
|
|
|
37 |
dpavlin |
1.3 |
my %days = ( |
38 |
|
|
Time::Available::DAY_MONDAY=>'Mo', |
39 |
|
|
Time::Available::DAY_TUESDAY=>'Tu', |
40 |
|
|
Time::Available::DAY_WEDNESDAY=>'We', |
41 |
|
|
Time::Available::DAY_THURSDAY=>'Th', |
42 |
|
|
Time::Available::DAY_FRIDAY=>'Fr', |
43 |
|
|
Time::Available::DAY_SATURDAY=>'Sa', |
44 |
|
|
Time::Available::DAY_SUNDAY=>'Su' |
45 |
|
|
); |
46 |
|
|
|
47 |
dpavlin |
1.1 |
my $q = new CGI; |
48 |
|
|
|
49 |
|
|
my $print_orphans = $q->param('print_orphans') || 0; |
50 |
|
|
my $rep_reset = $q->param('rep_reset') || 0; |
51 |
|
|
my @sg_selected = $q->param('sg_filter'); |
52 |
|
|
|
53 |
dpavlin |
1.3 |
# init misc sort parametars |
54 |
dpavlin |
1.5 |
my @sort_rules; |
55 |
dpavlin |
1.1 |
my $order; |
56 |
|
|
my %sort_param; |
57 |
|
|
my ($usort,$dsort); |
58 |
|
|
if ($q->param('usort')) { |
59 |
|
|
$sort_param{'usort'} = $q->param('usort'); |
60 |
|
|
$q->delete('usort'); |
61 |
dpavlin |
1.5 |
@sort_rules = ( -compare => 'numeric', scalar $sort_param{'usort'} ); |
62 |
dpavlin |
1.1 |
} |
63 |
|
|
if ($q->param('dsort')) { |
64 |
|
|
$sort_param{'dsort'} = $q->param('dsort'); |
65 |
|
|
$q->delete('dsort'); |
66 |
dpavlin |
1.5 |
@sort_rules = ( -compare => 'numeric', -order=>'reverse', scalar $sort_param{'dsort'} ); |
67 |
dpavlin |
1.1 |
} |
68 |
|
|
|
69 |
dpavlin |
1.3 |
# make interval |
70 |
|
|
my $working_days; |
71 |
|
|
if ($q->param('use_time_limit')) { |
72 |
|
|
$dayMask=0; |
73 |
|
|
foreach my $dm ($q->param('day_interval')) { |
74 |
|
|
$dayMask |= $dm; |
75 |
|
|
} |
76 |
|
|
$working_days=new Time::Available(start=>$q->param('from_time_interval'),end=>$q->param('to_time_interval'),dayMask=>$dayMask); |
77 |
|
|
} |
78 |
|
|
|
79 |
dpavlin |
1.4 |
# init cache and setup expriration |
80 |
|
|
my $cache = new Cache::FileCache({ default_expires_in => '10 min' }); |
81 |
|
|
|
82 |
dpavlin |
1.1 |
# |
83 |
|
|
# This option (activated via command switch -r) will reset failure duration |
84 |
|
|
# if repeated failure on same group/service happend. |
85 |
|
|
# If you want honest reporting (or grouped only by group and service), |
86 |
|
|
# you souldn't turn it on :-) However, if you have just failure events in your |
87 |
|
|
# log, this will produce output which will show duration BETWEEN two failures |
88 |
|
|
# |
89 |
|
|
|
90 |
|
|
# pretty format date |
91 |
|
|
sub d { |
92 |
|
|
my $utime = shift || return "?"; |
93 |
dpavlin |
1.3 |
if ($debug) { |
94 |
|
|
return strftime($date_time_fmt." [%s]",localtime($utime)); |
95 |
|
|
} else { |
96 |
|
|
return strftime($date_time_fmt,localtime($utime)); |
97 |
|
|
} |
98 |
dpavlin |
1.1 |
} |
99 |
|
|
# pretty format duration |
100 |
|
|
sub dur { |
101 |
dpavlin |
1.3 |
my $dur = shift || return "0"; |
102 |
dpavlin |
1.1 |
my $out = ""; |
103 |
|
|
|
104 |
|
|
my $s = $dur; |
105 |
|
|
my $d = int($s/(24*60*60)); |
106 |
|
|
$s = $s % (24*60*60); |
107 |
|
|
my $h = int($s/(60*60)); |
108 |
|
|
$s = $s % (60*60); |
109 |
|
|
my $m = int($s/60); |
110 |
|
|
$s = $s % 60; |
111 |
|
|
|
112 |
|
|
$out .= $d."d " if ($d > 0); |
113 |
dpavlin |
1.2 |
if ($debug) { |
114 |
|
|
$out .= sprintf("%02d:%02d:%02d [%d]",$h,$m,$s, $dur); |
115 |
|
|
} else { |
116 |
|
|
$out .= sprintf("%02d:%02d:%02d",$h,$m,$s); |
117 |
|
|
} |
118 |
dpavlin |
1.1 |
|
119 |
|
|
return $out; |
120 |
|
|
} |
121 |
|
|
|
122 |
|
|
# read log and calculate |
123 |
|
|
# |
124 |
|
|
|
125 |
|
|
my %fail; |
126 |
dpavlin |
1.4 |
my $sg_filter; # filter for service/group |
127 |
|
|
|
128 |
|
|
my $log_file="/var/log/mon/sap.log"; |
129 |
|
|
|
130 |
|
|
my $data; |
131 |
|
|
|
132 |
|
|
# generate unique key for this data and options |
133 |
dpavlin |
1.5 |
my $cache_key="monlog".join("|",@sg_selected)."|".$print_orphans."|".$rep_reset; |
134 |
dpavlin |
1.4 |
|
135 |
dpavlin |
1.5 |
# debug disables cache |
136 |
dpavlin |
1.4 |
if (! $debug) { |
137 |
dpavlin |
1.5 |
$data = $cache->get( $cache_key ); |
138 |
|
|
$sg_filter = $cache->get("sg_filter $cache_key"); |
139 |
dpavlin |
1.4 |
} |
140 |
|
|
|
141 |
dpavlin |
1.5 |
if (!$data || !$sg_filter) { |
142 |
dpavlin |
1.4 |
|
143 |
|
|
open(LOG, $log_file) || die "$log_file: $!"; |
144 |
|
|
|
145 |
|
|
while(<LOG>) { |
146 |
|
|
chomp; |
147 |
|
|
if (/^(failure|up)\s+(\S+)\s+(\S+)\s+(\d+)\s+\(([^)]+)\)\s+(.+)$/) { |
148 |
|
|
my ($status,$group,$service,$utime,$date,$desc) = ($1,$2,$3,$4,$5,$6); |
149 |
|
|
my $id = "$group/$service"; |
150 |
|
|
if ($status eq "up" && defined($fail{$id})) { |
151 |
|
|
if (grep(m;$group/$service;,@sg_selected)) { |
152 |
|
|
push @$data, { |
153 |
|
|
'sg'=>"$group/$service", |
154 |
|
|
'from'=>$fail{$id}, |
155 |
|
|
'to'=>$utime, |
156 |
dpavlin |
1.7 |
'dur'=>($utime-$fail{$id}), |
157 |
dpavlin |
1.4 |
'desc'=>$desc }; |
158 |
|
|
} |
159 |
|
|
$sg_filter->{"$group/$service"}++; |
160 |
|
|
delete $fail{$id}; |
161 |
|
|
} elsif ($status eq "up") { |
162 |
|
|
if ($print_orphans && grep(m;$group/$service;,@sg_selected)) { |
163 |
|
|
push @$data, { |
164 |
|
|
'sg'=>"$group/$service", |
165 |
|
|
'from'=>-1, |
166 |
|
|
'to'=>$utime, |
167 |
dpavlin |
1.5 |
'dur'=>0, |
168 |
dpavlin |
1.4 |
'desc'=>$desc }; |
169 |
|
|
} |
170 |
|
|
delete $fail{$id}; |
171 |
|
|
$sg_filter->{"$group/$service"}++; |
172 |
|
|
} elsif (defined($fail{$id})) { |
173 |
|
|
if ($rep_reset && grep(m;$group/$service;,@sg_selected)) { |
174 |
|
|
push @$data, { |
175 |
|
|
'sg'=>"$group/$service", |
176 |
|
|
'from'=>$fail{$id}, |
177 |
|
|
'to'=>$utime, |
178 |
dpavlin |
1.7 |
'dur'=>($utime-$fail{$id}), |
179 |
dpavlin |
1.4 |
'desc'=>'[failure again]'}; |
180 |
|
|
$fail{$id} = $utime; |
181 |
|
|
} |
182 |
|
|
$sg_filter->{"$group/$service"}++; |
183 |
|
|
} else { |
184 |
dpavlin |
1.1 |
$fail{$id} = $utime; |
185 |
|
|
} |
186 |
|
|
} |
187 |
|
|
} |
188 |
dpavlin |
1.4 |
close(LOG); |
189 |
|
|
|
190 |
|
|
$cache->set($cache_key, $data); |
191 |
|
|
$cache->set("sg_filter $cache_key", $sg_filter); |
192 |
|
|
|
193 |
dpavlin |
1.1 |
} |
194 |
|
|
|
195 |
|
|
# generate output |
196 |
|
|
# |
197 |
dpavlin |
1.8 |
print $q->header,$q->start_html("mon availiability report"); |
198 |
dpavlin |
1.1 |
|
199 |
|
|
# make some filters |
200 |
|
|
# |
201 |
|
|
|
202 |
dpavlin |
1.8 |
print $q->start_form,' |
203 |
|
|
<table border=0 cellspacing=0 cellpadding=0> |
204 |
|
|
<tr><td> |
205 |
|
|
<em>Show just service/group:</em><br> |
206 |
|
|
',$q->checkbox_group(-name=>'sg_filter', |
207 |
dpavlin |
1.4 |
-values=>[keys %$sg_filter], |
208 |
|
|
-default=>[keys %$sg_filter], |
209 |
dpavlin |
1.1 |
-linebreak=>'true', |
210 |
dpavlin |
1.8 |
),' |
211 |
|
|
</td><td> |
212 |
|
|
<em>Other options:</em><br>', |
213 |
dpavlin |
1.7 |
$q->checkbox(-name=>'rep_reset',-checked=>0, |
214 |
dpavlin |
1.8 |
-label=>"show repeated failures on same service as individual failures"),'<br>', |
215 |
dpavlin |
1.7 |
$q->checkbox(-name=>'print_orphans',-checked=>0, |
216 |
dpavlin |
1.8 |
-label=>"show records which are not complete in this interval"),'<br>', |
217 |
dpavlin |
1.7 |
$q->checkbox(-name=>'use_date_limit',-checked=>1, |
218 |
dpavlin |
1.2 |
-label=>"use date limit from:"), |
219 |
dpavlin |
1.8 |
$q->textfield(-name=>'from_date',-size=>20,-default=>$from_date),' to: ', |
220 |
|
|
$q->textfield(-name=>'to_date',-size=>20,-default=>$to_date),' |
221 |
|
|
<small>Using <a href="http://search.cpan.org/search?mode=module&query=Time::ParseDate">Time::ParseDate</a></small> |
222 |
|
|
<br> |
223 |
|
|
',$q->checkbox(-name=>'use_time_limit',-checked=>1, -value=>'on', |
224 |
dpavlin |
1.3 |
-label=>"use time limit for each day:"), |
225 |
dpavlin |
1.8 |
$q->textfield(-name=>'from_time_interval',-size=>8,-default=>$from_time_interval),' to: ', |
226 |
dpavlin |
1.3 |
$q->textfield(-name=>'to_time_interval',-size=>8,-default=>$to_time_interval), |
227 |
dpavlin |
1.8 |
'<br>Days: ', |
228 |
dpavlin |
1.3 |
$q->checkbox_group(-name=>'day_interval', |
229 |
|
|
-values=>[ sort { $a <=> $b } keys %days ], |
230 |
|
|
-labels=>\%days, |
231 |
|
|
-defaults=>[ |
232 |
|
|
Time::Available::DAY_MONDAY, |
233 |
|
|
Time::Available::DAY_TUESDAY, |
234 |
|
|
Time::Available::DAY_WEDNESDAY, |
235 |
|
|
Time::Available::DAY_THURSDAY, |
236 |
|
|
Time::Available::DAY_FRIDAY, |
237 |
|
|
] |
238 |
dpavlin |
1.8 |
), |
239 |
|
|
$q->submit(-name=>'show',-value=>'Show report'),' |
240 |
|
|
</td></tr> |
241 |
|
|
</table> |
242 |
|
|
',$q->end_form; |
243 |
|
|
|
244 |
|
|
# bail out of no data |
245 |
|
|
if ($data && scalar @$data < 1) { |
246 |
|
|
print $q->end_html; |
247 |
|
|
exit; |
248 |
|
|
} |
249 |
dpavlin |
1.1 |
|
250 |
|
|
# dump report |
251 |
|
|
# |
252 |
|
|
|
253 |
dpavlin |
1.5 |
my %dir_html_entity = ( |
254 |
|
|
# 'u' => '⇑', |
255 |
|
|
# 'd' => '⇓' |
256 |
|
|
'u' => '▲', |
257 |
|
|
'd' => '▼', |
258 |
|
|
); |
259 |
|
|
|
260 |
dpavlin |
1.1 |
sub sort_link { |
261 |
|
|
my $q = shift || return; |
262 |
|
|
my $col = shift || return; |
263 |
|
|
my $dir = lc(shift) || return; |
264 |
|
|
if ($sort_param{$dir.'sort'} && $sort_param{$dir.'sort'} eq $col) { |
265 |
dpavlin |
1.5 |
return $dir_html_entity{$dir}; |
266 |
dpavlin |
1.1 |
} else { |
267 |
dpavlin |
1.5 |
return '<a href="'.$q->url(-query=>1).'&'.$dir.'sort='.$col.'">'.$dir_html_entity{$dir}.'</a>'; |
268 |
dpavlin |
1.1 |
} |
269 |
|
|
} |
270 |
|
|
|
271 |
dpavlin |
1.2 |
|
272 |
|
|
my ($from_time,$to_time,$from_html,$to_html); |
273 |
|
|
if ($q->param('use_date_limit')) { |
274 |
|
|
$from_time = parsedate($q->param('from_date'), UK=>1); |
275 |
|
|
$to_time = parsedate($q->param('to_date'), UK=>1); |
276 |
|
|
$from_html = strftime($date_fmt,localtime($from_time)); |
277 |
|
|
$to_html = strftime($date_fmt,localtime($to_time)); |
278 |
|
|
$from_html .= " [$from_time] " if ($debug); |
279 |
|
|
$to_html .= " [$to_time] " if ($debug); |
280 |
|
|
} |
281 |
|
|
|
282 |
|
|
# sort data |
283 |
|
|
# |
284 |
dpavlin |
1.5 |
my @sorted = sorted_array( @$data, @sort_rules ); |
285 |
dpavlin |
1.2 |
|
286 |
dpavlin |
1.8 |
print "-- sort: ",Dumper(@sort_rules)," (data: ".@$data." sorted: ".@sorted.") --\n<br>-- dayMask: $dayMask --\n<br>-- cache_key: $cache_key --\n<br>" if ($debug); |
287 |
dpavlin |
1.2 |
|
288 |
dpavlin |
1.8 |
print '<table border=1 cellspacing=0 cellpadding=2 width="100%"> |
289 |
|
|
<tr> |
290 |
|
|
<th>group/service</th><th bgcolor="#f0f0f0"><nobr>', |
291 |
|
|
&sort_link($q,'from','u'),' from ',&sort_link($q,'from','d'),'</nobr>'; |
292 |
|
|
print '<br>',$from_html if ($from_html); |
293 |
|
|
print '</th><th><nobr>', |
294 |
|
|
&sort_link($q,'to','u'),' to ',&sort_link($q,'to','d'),'</nobr>'; |
295 |
|
|
print '<br>',$to_html if ($to_html); |
296 |
|
|
print '</th><th bgcolor="#e0e0e0"><nobr>', |
297 |
|
|
&sort_link($q,'dur','u'),' duration ',&sort_link($q,'dur','d'),'</nobr> |
298 |
|
|
</th><th>description</th> |
299 |
|
|
</tr>'; |
300 |
dpavlin |
1.1 |
|
301 |
dpavlin |
1.5 |
my $downtime; # total downtime |
302 |
|
|
my $downinterval; # total downtime in time interval |
303 |
|
|
my $sg_count; # count number of downtimes |
304 |
|
|
|
305 |
dpavlin |
1.1 |
foreach my $row (@sorted) { |
306 |
dpavlin |
1.3 |
next if ($q->param('use_date_limit') && ($row->{from} < $from_time || $row->{to} > $to_time)); |
307 |
|
|
my ($from,$dur,$int) = ('unknown','unknown','unknown'); |
308 |
dpavlin |
1.5 |
|
309 |
dpavlin |
1.3 |
if ($row->{from} != -1 ) { |
310 |
|
|
$from = d($row->{from}); |
311 |
dpavlin |
1.5 |
$dur = $row->{to} - $row->{from}; |
312 |
|
|
$downtime->{$row->{sg}} += $dur; |
313 |
|
|
if ($q->param('use_time_limit')) { |
314 |
|
|
$int = $working_days->interval($row->{from},$row->{to}); |
315 |
|
|
$dur = dur($int)."<br><nobr><small>∑ ".dur($dur)."</small></nobr>"; |
316 |
|
|
$downinterval->{$row->{sg}} += $int; |
317 |
|
|
} else { |
318 |
|
|
$dur = dur($dur); |
319 |
|
|
} |
320 |
dpavlin |
1.3 |
} |
321 |
dpavlin |
1.5 |
$sg_count->{$row->{sg}}++; |
322 |
|
|
|
323 |
dpavlin |
1.8 |
print '<tr> |
324 |
|
|
<td align="left" valign="center">',$row->{sg},'</td> |
325 |
|
|
<td align="right" bgcolor="#f0f0f0">',$from,'</td> |
326 |
|
|
<td align="right">',d($row->{to}),'</td> |
327 |
|
|
<td align="center" bgcolor="#e0e0e0">',$dur,'</td> |
328 |
|
|
<td align="left">',$row->{desc},'</td> |
329 |
|
|
</tr>'; |
330 |
dpavlin |
1.1 |
} |
331 |
|
|
|
332 |
|
|
# dump totals |
333 |
|
|
# |
334 |
|
|
|
335 |
dpavlin |
1.8 |
my $dur; |
336 |
dpavlin |
1.4 |
foreach my $sg (keys %$downtime) { |
337 |
dpavlin |
1.5 |
if ($downinterval->{$sg}) { |
338 |
|
|
$dur=dur($downinterval->{$sg})."<br><nobr><small>∑ ".dur($downtime->{$sg})."</small></nobr>"; |
339 |
|
|
} else { |
340 |
dpavlin |
1.6 |
$dur=dur($downtime->{$sg}); |
341 |
dpavlin |
1.5 |
} |
342 |
dpavlin |
1.8 |
print '<tr><td colspan=3 align="right">total for ',$sg,'</td> |
343 |
|
|
<td bgcolor="#e0e0e0" align="right">',$dur,'</td> |
344 |
|
|
<td><small>in ',$sg_count->{$sg},' failures</small></td> |
345 |
|
|
</tr>'; |
346 |
dpavlin |
1.1 |
} |
347 |
|
|
|
348 |
dpavlin |
1.8 |
print "</table>", |
349 |
|
|
$q->end_html; |
350 |
|
|
|
351 |
dpavlin |
1.1 |
|