1 |
#!/usr/bin/perl -w |
2 |
|
3 |
# parse file.alert mon logs and report (up|down)time of services |
4 |
# |
5 |
# 2003-09-03 Dobrica Pavlinusic <dpavlin@rot13.org> |
6 |
# 2003-10-05 converted to CGI script |
7 |
# |
8 |
|
9 |
use strict; |
10 |
use POSIX qw(strftime); |
11 |
use CGI qw/:standard *table/; |
12 |
use CGI::Carp qw(fatalsToBrowser); |
13 |
use Data::Sorting qw(:arrays); |
14 |
use Time::ParseDate; |
15 |
use Time::Available; |
16 |
use Cache::FileCache; |
17 |
|
18 |
use Data::Dumper; |
19 |
|
20 |
my $date_fmt = "%Y-%m-%d"; |
21 |
my $date_time_fmt = "%Y-%m-%d %H:%M:%S"; |
22 |
|
23 |
my $from_date = "now - 6 months"; |
24 |
my $to_date = "now"; |
25 |
|
26 |
# working days definition (1-7; mon=1) |
27 |
my $dayMask = Time::Available::DAY_WEEKDAY; |
28 |
# working hours |
29 |
my $from_time_interval = "7:00"; |
30 |
my $to_time_interval = "17:00"; |
31 |
|
32 |
my $debug=1; |
33 |
$debug++ if (grep(/-v/,@ARGV)); |
34 |
$debug++ if (grep(/-d/,@ARGV)); |
35 |
|
36 |
my %days = ( |
37 |
Time::Available::DAY_MONDAY=>'Mo', |
38 |
Time::Available::DAY_TUESDAY=>'Tu', |
39 |
Time::Available::DAY_WEDNESDAY=>'We', |
40 |
Time::Available::DAY_THURSDAY=>'Th', |
41 |
Time::Available::DAY_FRIDAY=>'Fr', |
42 |
Time::Available::DAY_SATURDAY=>'Sa', |
43 |
Time::Available::DAY_SUNDAY=>'Su' |
44 |
); |
45 |
|
46 |
my $q = new CGI; |
47 |
|
48 |
my $print_orphans = $q->param('print_orphans') || 0; |
49 |
my $rep_reset = $q->param('rep_reset') || 0; |
50 |
my @sg_selected = $q->param('sg_filter'); |
51 |
|
52 |
# init misc sort parametars |
53 |
my @sort; |
54 |
my $order; |
55 |
my %sort_param; |
56 |
my ($usort,$dsort); |
57 |
if ($q->param('usort')) { |
58 |
$sort_param{'usort'} = $q->param('usort'); |
59 |
$q->delete('usort'); |
60 |
@sort = ( -compare => 'numeric', $sort_param{'usort'} ); |
61 |
} |
62 |
if ($q->param('dsort')) { |
63 |
$sort_param{'dsort'} = $q->param('dsort'); |
64 |
$q->delete('dsort'); |
65 |
@sort = ( -compare => 'numeric', -order=>'reverse', $sort_param{'dsort'} ); |
66 |
} |
67 |
|
68 |
# make interval |
69 |
my $working_days; |
70 |
if ($q->param('use_time_limit')) { |
71 |
$dayMask=0; |
72 |
foreach my $dm ($q->param('day_interval')) { |
73 |
$dayMask |= $dm; |
74 |
} |
75 |
$working_days=new Time::Available(start=>$q->param('from_time_interval'),end=>$q->param('to_time_interval'),dayMask=>$dayMask); |
76 |
} |
77 |
|
78 |
# init cache and setup expriration |
79 |
my $cache = new Cache::FileCache({ default_expires_in => '10 min' }); |
80 |
|
81 |
# |
82 |
# This option (activated via command switch -r) will reset failure duration |
83 |
# if repeated failure on same group/service happend. |
84 |
# If you want honest reporting (or grouped only by group and service), |
85 |
# you souldn't turn it on :-) However, if you have just failure events in your |
86 |
# log, this will produce output which will show duration BETWEEN two failures |
87 |
# |
88 |
|
89 |
# pretty format date |
90 |
sub d { |
91 |
my $utime = shift || return "?"; |
92 |
if ($debug) { |
93 |
return strftime($date_time_fmt." [%s]",localtime($utime)); |
94 |
} else { |
95 |
return strftime($date_time_fmt,localtime($utime)); |
96 |
} |
97 |
} |
98 |
# pretty format duration |
99 |
sub dur { |
100 |
my $dur = shift || return "0"; |
101 |
my $out = ""; |
102 |
|
103 |
my $s = $dur; |
104 |
my $d = int($s/(24*60*60)); |
105 |
$s = $s % (24*60*60); |
106 |
my $h = int($s/(60*60)); |
107 |
$s = $s % (60*60); |
108 |
my $m = int($s/60); |
109 |
$s = $s % 60; |
110 |
|
111 |
$out .= $d."d " if ($d > 0); |
112 |
if ($debug) { |
113 |
$out .= sprintf("%02d:%02d:%02d [%d]",$h,$m,$s, $dur); |
114 |
} else { |
115 |
$out .= sprintf("%02d:%02d:%02d",$h,$m,$s); |
116 |
} |
117 |
|
118 |
return $out; |
119 |
} |
120 |
|
121 |
# read log and calculate |
122 |
# |
123 |
|
124 |
my %fail; |
125 |
my $downtime; # total downtime |
126 |
my $sg_filter; # filter for service/group |
127 |
my $sg_count; # count number of downtimes |
128 |
|
129 |
my $log_file="/var/log/mon/sap.log"; |
130 |
|
131 |
my $data; |
132 |
|
133 |
# generate unique key for this data and options |
134 |
my $cache_key="monlog".join("",@sg_selected).$print_orphans.$rep_reset; |
135 |
|
136 |
if (! $debug) { |
137 |
$data = $cache->get( $cache_key ); |
138 |
$downtime = $cache->get("downtime $cache_key"); |
139 |
$sg_filter = $cache->get("sg_filter $cache_key"); |
140 |
$sg_count = $cache->get("sg_count $cache_key"); |
141 |
} |
142 |
|
143 |
if (!$data || !$downtime || !$sg_filter || !$sg_count) { |
144 |
|
145 |
open(LOG, $log_file) || die "$log_file: $!"; |
146 |
|
147 |
while(<LOG>) { |
148 |
chomp; |
149 |
if (/^(failure|up)\s+(\S+)\s+(\S+)\s+(\d+)\s+\(([^)]+)\)\s+(.+)$/) { |
150 |
my ($status,$group,$service,$utime,$date,$desc) = ($1,$2,$3,$4,$5,$6); |
151 |
my $id = "$group/$service"; |
152 |
if ($status eq "up" && defined($fail{$id})) { |
153 |
if (grep(m;$group/$service;,@sg_selected)) { |
154 |
push @$data, { |
155 |
'sg'=>"$group/$service", |
156 |
'from'=>$fail{$id}, |
157 |
'to'=>$utime, |
158 |
'desc'=>$desc }; |
159 |
$downtime->{"$group/$service"} += ($utime - $fail{$id}), |
160 |
$sg_count->{"$group/$service"}++; |
161 |
} |
162 |
$sg_filter->{"$group/$service"}++; |
163 |
delete $fail{$id}; |
164 |
} elsif ($status eq "up") { |
165 |
if ($print_orphans && grep(m;$group/$service;,@sg_selected)) { |
166 |
push @$data, { |
167 |
'sg'=>"$group/$service", |
168 |
'from'=>-1, |
169 |
'to'=>$utime, |
170 |
'desc'=>$desc }; |
171 |
$sg_count->{"$group/$service"}++; |
172 |
} |
173 |
delete $fail{$id}; |
174 |
$sg_filter->{"$group/$service"}++; |
175 |
} elsif (defined($fail{$id})) { |
176 |
if ($rep_reset && grep(m;$group/$service;,@sg_selected)) { |
177 |
push @$data, { |
178 |
'sg'=>"$group/$service", |
179 |
'from'=>$fail{$id}, |
180 |
'to'=>$utime, |
181 |
'desc'=>'[failure again]'}; |
182 |
$downtime->{"$group/$service"} += ($utime - $fail{$id}), |
183 |
$fail{$id} = $utime; |
184 |
$sg_count->{"$group/$service"}++; |
185 |
} |
186 |
$sg_filter->{"$group/$service"}++; |
187 |
} else { |
188 |
$fail{$id} = $utime; |
189 |
} |
190 |
} |
191 |
} |
192 |
close(LOG); |
193 |
|
194 |
$cache->set($cache_key, $data); |
195 |
$cache->set("downtime $cache_key", $downtime); |
196 |
$cache->set("sg_filter $cache_key", $sg_filter); |
197 |
$cache->set("sg_count $cache_key", $sg_count); |
198 |
|
199 |
} |
200 |
|
201 |
# generate output |
202 |
# |
203 |
print header,start_html("mon availiability report"); |
204 |
|
205 |
# make some filters |
206 |
# |
207 |
|
208 |
print start_form, |
209 |
start_table({-border=>0,-cellspacing=>0,-cellpadding=>0}), |
210 |
Tr(td( |
211 |
em("Show just service/group:"),br, |
212 |
checkbox_group(-name=>'sg_filter', |
213 |
-values=>[keys %$sg_filter], |
214 |
-default=>[keys %$sg_filter], |
215 |
-linebreak=>'true', |
216 |
), |
217 |
),td( |
218 |
em("Other options:"),br, |
219 |
$q->checkbox(-name=>'rep_reset',-checked=>0, |
220 |
-label=>"show repeated failures on same service as individual failures"), |
221 |
br, |
222 |
$q->checkbox(-name=>'print_orphans',-checked=>0, |
223 |
-label=>"show records which are not complete in this interval"), |
224 |
br, |
225 |
$q->checkbox(-name=>'use_date_limit',-checked=>1, |
226 |
-label=>"use date limit from:"), |
227 |
$q->textfield(-name=>'from_date',-size=>20,-default=>$from_date), |
228 |
" to: ", |
229 |
$q->textfield(-name=>'to_date',-size=>20,-default=>$to_date), |
230 |
small('Using <a href="http://search.cpan.org/search?mode=module&query=Time::ParseDate">Time::ParseDate</a>'), |
231 |
br, |
232 |
$q->checkbox(-name=>'use_time_limit',-checked=>1, |
233 |
-label=>"use time limit for each day:"), |
234 |
$q->textfield(-name=>'from_time_interval',-size=>8,-default=>$from_time_interval), |
235 |
" to: ", |
236 |
$q->textfield(-name=>'to_time_interval',-size=>8,-default=>$to_time_interval), |
237 |
br,"Days: ", |
238 |
$q->checkbox_group(-name=>'day_interval', |
239 |
-values=>[ sort { $a <=> $b } keys %days ], |
240 |
-labels=>\%days, |
241 |
-defaults=>[ |
242 |
Time::Available::DAY_MONDAY, |
243 |
Time::Available::DAY_TUESDAY, |
244 |
Time::Available::DAY_WEDNESDAY, |
245 |
Time::Available::DAY_THURSDAY, |
246 |
Time::Available::DAY_FRIDAY, |
247 |
] |
248 |
), |
249 |
$q->submit(-name=>'show',-value=>'Show report'), |
250 |
)),end_table; |
251 |
|
252 |
# dump report |
253 |
# |
254 |
|
255 |
sub sort_link { |
256 |
my $q = shift || return; |
257 |
my $col = shift || return; |
258 |
my $dir = lc(shift) || return; |
259 |
if ($sort_param{$dir.'sort'} && $sort_param{$dir.'sort'} eq $col) { |
260 |
return '&'.$dir.'Arr;'; |
261 |
} else { |
262 |
return '<a href="'.$q->url(-query=>1).'&'.$dir.'sort='.$col.'">&'.$dir.'Arr;</a>'; |
263 |
} |
264 |
} |
265 |
|
266 |
|
267 |
my ($from_time,$to_time,$from_html,$to_html); |
268 |
if ($q->param('use_date_limit')) { |
269 |
$from_time = parsedate($q->param('from_date'), UK=>1); |
270 |
$to_time = parsedate($q->param('to_date'), UK=>1); |
271 |
$from_html = strftime($date_fmt,localtime($from_time)); |
272 |
$to_html = strftime($date_fmt,localtime($to_time)); |
273 |
$from_html .= " [$from_time] " if ($debug); |
274 |
$to_html .= " [$to_time] " if ($debug); |
275 |
} |
276 |
|
277 |
# sort data |
278 |
# |
279 |
my @sorted = sorted_array(@$data, @sort); |
280 |
|
281 |
print "-- sort: ",Dumper(@sort)," (data: ".@$data." sorted: ".@sorted.") --\n",br,"-- dayMask: $dayMask --\n",br,"-- cache_key: $cache_key --\n",br if ($debug); |
282 |
|
283 |
print start_table({-border=>1,-cellspacing=>0,-cellpadding=>2,-width=>'100%'}); |
284 |
|
285 |
print Tr( |
286 |
th("group/service"), |
287 |
th({-bgcolor=>'#f0f0f0'},'<nobr>'. |
288 |
&sort_link($q,'from','u').' from '. |
289 |
&sort_link($q,'from','d').'</nobr>', |
290 |
br,$from_html |
291 |
), |
292 |
th( '<nobr>'. |
293 |
&sort_link($q,'to','u').' to '. |
294 |
&sort_link($q,'to','d').'</nobr>', |
295 |
br,$to_html |
296 |
), |
297 |
th({-bgcolor=>'#e0e0e0'},'<nobr>'. |
298 |
&sort_link($q,'dur','u').' duration '. |
299 |
&sort_link($q,'dur','d').'</nobr>' |
300 |
), |
301 |
th("description") |
302 |
) if (scalar @sorted > 0); |
303 |
|
304 |
foreach my $row (@sorted) { |
305 |
next if ($q->param('use_date_limit') && ($row->{from} < $from_time || $row->{to} > $to_time)); |
306 |
my ($from,$dur,$int) = ('unknown','unknown','unknown'); |
307 |
if ($row->{from} != -1 ) { |
308 |
$from = d($row->{from}); |
309 |
$dur = dur($row->{to} - $row->{from}); |
310 |
$int = dur($working_days->interval($row->{to},$row->{from})); |
311 |
} |
312 |
print Tr( |
313 |
td({-align=>'left',-valign=>'center'},$row->{sg}), |
314 |
td({-align=>'right',-bgcolor=>'#f0f0f0'},$from), |
315 |
td({-align=>'right'},d($row->{to})), |
316 |
td({-align=>'center',-bgcolor=>'#e0e0e0'},$dur), |
317 |
td({-align=>'center',-bgcolor=>'#e0e0e0'},$int), |
318 |
td({-align=>'left'},$row->{desc}), |
319 |
),"\n"; |
320 |
} |
321 |
|
322 |
# dump totals |
323 |
# |
324 |
|
325 |
foreach my $sg (keys %$downtime) { |
326 |
print Tr(td({-colspan=>3,-align=>'right'},"total for $sg:"), |
327 |
td({-bgcolor=>'#e0e0e0',-align=>'right'},dur($downtime->{$sg})), |
328 |
td(small("in ".$sg_count->{$sg}." failures"))),"\n"; |
329 |
} |
330 |
|
331 |
print end_table, |
332 |
end_form; |
333 |
|