1 |
dpavlin |
1.1 |
#!/usr/bin/perl |
2 |
|
|
# |
3 |
|
|
# Return a list of hosts which not reachable via ICMP echo |
4 |
|
|
# |
5 |
|
|
# Jim Trocki, trockij@transmeta.com |
6 |
|
|
# |
7 |
|
|
# $Id: fping.monitor 1.7 Mon, 27 Aug 2001 14:22:45 -0400 trockij $ |
8 |
|
|
# |
9 |
|
|
# Copyright (C) 1998, Jim Trocki |
10 |
|
|
# |
11 |
|
|
# This program is free software; you can redistribute it and/or modify |
12 |
|
|
# it under the terms of the GNU General Public License as published by |
13 |
|
|
# the Free Software Foundation; either version 2 of the License, or |
14 |
|
|
# (at your option) any later version. |
15 |
|
|
# |
16 |
|
|
# This program is distributed in the hope that it will be useful, |
17 |
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 |
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
19 |
|
|
# GNU General Public License for more details. |
20 |
|
|
# |
21 |
|
|
# You should have received a copy of the GNU General Public License |
22 |
|
|
# along with this program; if not, write to the Free Software |
23 |
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
24 |
|
|
# |
25 |
|
|
use strict; |
26 |
|
|
|
27 |
|
|
use Getopt::Std; |
28 |
|
|
|
29 |
|
|
my %opt; |
30 |
|
|
getopts ("ahr:s:t:T", \%opt); |
31 |
|
|
|
32 |
|
|
sub usage |
33 |
|
|
{ |
34 |
|
|
print <<EOF; |
35 |
|
|
usage: fping.monitor [-a] [-r num] [-s num] [-t num] [-T] host [host...] |
36 |
|
|
|
37 |
|
|
-a only report failure if all hosts are unreachable |
38 |
|
|
-r num retry "num" times for each host before reporting failure |
39 |
|
|
-s num consider hosts which respond in over "num" msecs failures |
40 |
|
|
-t num wait "num" msecs before sending retries |
41 |
|
|
-T traceroute to each failed host. CAUTION: this may cause |
42 |
|
|
this monitor to hang for a very long time |
43 |
|
|
|
44 |
|
|
EOF |
45 |
|
|
|
46 |
|
|
exit; |
47 |
|
|
} |
48 |
|
|
|
49 |
|
|
usage if ($opt{"h"}); |
50 |
|
|
|
51 |
|
|
my $TIMEOUT = $opt{"t"} || 2000; |
52 |
|
|
my $RETRIES = $opt{"r"} || 3; |
53 |
|
|
my $CMD = "fping -e -r $RETRIES -t $TIMEOUT"; |
54 |
|
|
my $START_TIME = time; |
55 |
|
|
my $END_TIME; |
56 |
|
|
|
57 |
|
|
exit 0 if (@ARGV == 0); |
58 |
|
|
|
59 |
|
|
open (IN, "$CMD @ARGV 2>&1 |") || |
60 |
|
|
die "could not open pipe to fping: $!\n"; |
61 |
|
|
|
62 |
|
|
my @unreachable; |
63 |
|
|
my @alive; |
64 |
|
|
my @slow; |
65 |
|
|
my @other_prob; # details for other per-host problems |
66 |
|
|
my @error; # other errors which I'll give non-zero exit for |
67 |
|
|
my @icmp; # ICMP messages output by fping |
68 |
|
|
my %addr_unknown; |
69 |
|
|
|
70 |
|
|
my %want_host = map { $_ => 1 } @ARGV; # hosts fping hasn't output yet |
71 |
|
|
|
72 |
|
|
while (<IN>) |
73 |
|
|
{ |
74 |
|
|
chomp; |
75 |
|
|
if (/^(\S+).*unreachable/) |
76 |
|
|
{ |
77 |
|
|
push (@unreachable, $1); |
78 |
|
|
delete $want_host{$1} |
79 |
|
|
or push @error, "unreachable host `$1' wasn't asked for"; |
80 |
|
|
} |
81 |
|
|
|
82 |
|
|
elsif (/^(\S+) is alive \((\S+)/) |
83 |
|
|
{ |
84 |
|
|
delete $want_host{$1} |
85 |
|
|
or push @error, "reachable host `$1' wasn't asked for"; |
86 |
|
|
|
87 |
|
|
if ($opt{"s"} && $2 > $opt{"s"}) |
88 |
|
|
{ |
89 |
|
|
push (@slow, [$1, $2]); |
90 |
|
|
} |
91 |
|
|
|
92 |
|
|
else |
93 |
|
|
{ |
94 |
|
|
push (@alive, [$1, $2]); |
95 |
|
|
} |
96 |
|
|
} |
97 |
|
|
|
98 |
|
|
elsif (/^(\S+)\s+address\s+not\s+found/) |
99 |
|
|
{ |
100 |
|
|
$addr_unknown{$1} = 1; |
101 |
|
|
push @other_prob, "$1 address not found"; |
102 |
|
|
push @unreachable, $1; |
103 |
|
|
delete $want_host{$1} |
104 |
|
|
or push @error, "unknown host `$1' wasn't asked for"; |
105 |
|
|
} |
106 |
|
|
|
107 |
|
|
# ICMP Host Unreachable from 1.2.3.4 for ICMP Echo sent to 2.4.6.8 |
108 |
|
|
# (among others) |
109 |
|
|
|
110 |
|
|
elsif (/^ICMP (.*) for ICMP Echo sent to (\S+)/) |
111 |
|
|
{ |
112 |
|
|
push @icmp, $_; |
113 |
|
|
} |
114 |
|
|
|
115 |
|
|
else |
116 |
|
|
{ |
117 |
|
|
push @error, "unidentified output from fping: [$_]"; |
118 |
|
|
} |
119 |
|
|
} |
120 |
|
|
|
121 |
|
|
for my $host (keys %want_host) { |
122 |
|
|
push @other_prob, "$host not listed in fping's output"; |
123 |
|
|
push @unreachable, $host; |
124 |
|
|
} |
125 |
|
|
|
126 |
|
|
close (IN); |
127 |
|
|
|
128 |
|
|
$END_TIME = time; |
129 |
|
|
|
130 |
|
|
my $retval = $? >> 8; |
131 |
|
|
|
132 |
|
|
if ($retval < 3) |
133 |
|
|
{ |
134 |
|
|
# do nothing |
135 |
|
|
} |
136 |
|
|
|
137 |
|
|
elsif ($retval == 3) |
138 |
|
|
{ |
139 |
|
|
push @error, "fping: invalid cmdline arguments [$CMD @ARGV]"; |
140 |
|
|
} |
141 |
|
|
|
142 |
|
|
elsif ($retval == 4) |
143 |
|
|
{ |
144 |
|
|
push @error, "fping: system call failure"; |
145 |
|
|
} |
146 |
|
|
|
147 |
|
|
else |
148 |
|
|
{ |
149 |
|
|
push @error, "unknown return code ($retval) from fping"; |
150 |
|
|
} |
151 |
|
|
|
152 |
|
|
if (@error) { |
153 |
|
|
print "unusual errors\n"; |
154 |
|
|
} |
155 |
|
|
else { |
156 |
|
|
my @fail = sort @unreachable, map { $_->[0] } @slow; |
157 |
|
|
# This line is intentionally blank if there are no failures. |
158 |
|
|
print "@fail\n"; |
159 |
|
|
} |
160 |
|
|
|
161 |
|
|
print "\n"; |
162 |
|
|
print "start time: " . localtime ($START_TIME) . "\n"; |
163 |
|
|
print "end time : " . localtime ($END_TIME) . "\n"; |
164 |
|
|
print "duration : " . ($END_TIME - $START_TIME) . " seconds\n"; |
165 |
|
|
|
166 |
|
|
if (@error != 0) |
167 |
|
|
{ |
168 |
|
|
print <<EOF; |
169 |
|
|
|
170 |
|
|
------------------------------------------------------------------------------ |
171 |
|
|
unusual errors |
172 |
|
|
------------------------------------------------------------------------------ |
173 |
|
|
EOF |
174 |
|
|
print join ("\n", @error), "\n"; |
175 |
|
|
} |
176 |
|
|
|
177 |
|
|
if (@unreachable != 0) |
178 |
|
|
{ |
179 |
|
|
print <<EOF; |
180 |
|
|
|
181 |
|
|
------------------------------------------------------------------------------ |
182 |
|
|
unreachable hosts |
183 |
|
|
------------------------------------------------------------------------------ |
184 |
|
|
EOF |
185 |
|
|
print join ("\n", @unreachable), "\n"; |
186 |
|
|
|
187 |
|
|
print "\nother problems:\n", join "\n", @other_prob, '' |
188 |
|
|
if @other_prob; |
189 |
|
|
} |
190 |
|
|
|
191 |
|
|
if (@icmp != 0) |
192 |
|
|
{ |
193 |
|
|
print <<EOF; |
194 |
|
|
|
195 |
|
|
------------------------------------------------------------------------------ |
196 |
|
|
ICMP messages |
197 |
|
|
------------------------------------------------------------------------------ |
198 |
|
|
EOF |
199 |
|
|
print join "\n", @icmp, ''; |
200 |
|
|
} |
201 |
|
|
|
202 |
|
|
|
203 |
|
|
if (@slow != 0) |
204 |
|
|
{ |
205 |
|
|
print <<EOF; |
206 |
|
|
|
207 |
|
|
------------------------------------------------------------------------------ |
208 |
|
|
slow hosts (response time which exceeds $opt{s}ms) |
209 |
|
|
------------------------------------------------------------------------------ |
210 |
|
|
EOF |
211 |
|
|
|
212 |
|
|
foreach my $host (@slow) |
213 |
|
|
{ |
214 |
|
|
printf ("%-40s %.2f ms\n", @{$host}); |
215 |
|
|
} |
216 |
|
|
} |
217 |
|
|
|
218 |
|
|
|
219 |
|
|
|
220 |
|
|
if (@alive != 0) |
221 |
|
|
{ |
222 |
|
|
print <<EOF; |
223 |
|
|
|
224 |
|
|
------------------------------------------------------------------------------ |
225 |
|
|
reachable hosts rtt |
226 |
|
|
------------------------------------------------------------------------------ |
227 |
|
|
EOF |
228 |
|
|
|
229 |
|
|
for (my $i = 0; $i < @alive; $i++) |
230 |
|
|
{ |
231 |
|
|
printf ("%-40s %.2f ms\n", @{$alive[$i]}); |
232 |
|
|
} |
233 |
|
|
} |
234 |
|
|
|
235 |
|
|
# |
236 |
|
|
# traceroute |
237 |
|
|
# |
238 |
|
|
if ($opt{"T"} && @unreachable) |
239 |
|
|
{ |
240 |
|
|
my $header_output = 0; |
241 |
|
|
foreach my $host (@unreachable) |
242 |
|
|
{ |
243 |
|
|
next if $addr_unknown{$host}; |
244 |
|
|
print $header_output++ ? "\n" : <<EOF; |
245 |
|
|
|
246 |
|
|
------------------------------------------------------------------------------ |
247 |
|
|
traceroute to unreachable hosts |
248 |
|
|
------------------------------------------------------------------------------ |
249 |
|
|
EOF |
250 |
|
|
system ("traceroute -w 3 $host 2>&1"); |
251 |
|
|
} |
252 |
|
|
} |
253 |
|
|
|
254 |
|
|
exit 1 if @error; |
255 |
|
|
|
256 |
|
|
# |
257 |
|
|
# fail only if all hosts do not respond |
258 |
|
|
# |
259 |
|
|
if ($opt{"a"}) |
260 |
|
|
{ |
261 |
|
|
exit(@alive ? 0 : 1); |
262 |
|
|
} |
263 |
|
|
|
264 |
|
|
exit 1 if (@slow != 0); |
265 |
|
|
|
266 |
|
|
exit $retval; |