- new

author Arkadiusz Miśkiewicz <arekm@maven.pl>

Mon, 7 Jun 2004 00:30:20 +0000 (00:30 +0000)

committer cvs2git <feedback@pld-linux.org>

Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)
author Arkadiusz Miśkiewicz <arekm@maven.pl>
Mon, 7 Jun 2004 00:30:20 +0000 (00:30 +0000)
committer cvs2git <feedback@pld-linux.org>
Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)
diff --git a/mon-debian.patch b/mon-debian.patch

new file mode 100644 (file)

index 0000000..9cb12ad
--- /dev/null
+++ b/mon-debian.patch
@@ -0,0 +1,595 @@
+--- mon-0.99.2.orig/doc/mon.8
++++ mon-0.99.2/doc/mon.8
+@@ -75,7 +75,7 @@
+ Read configuration from
+ .IR file .
+ This defaults to
+-IR /etc/mon/mon.cf " if the " /etc/mon
++.IR /etc/mon/mon.cf " if the " /etc/mon
+ directory exists, otherwise to
+ .IR /etc/mon.cf .
+ .TP
+@@ -241,7 +241,7 @@
+ monitor fping.monitor -t 4000 -r 2
+ \fR
+ .br
+-then the exectuable "fping.monitor" will be executed with these
++then the executable "fping.monitor" will be executed with these
+ parameters:
+ .br
+ \fC
+@@ -382,6 +382,7 @@
+ parameter, or in the
+ .I /usr/local/lib/mon/alert.d
+ and
++.I /usr/lib/mon/alert.d
+ directories if not specified.  They are invoked with the following command-line
+ parameters:
+ 
+@@ -551,8 +552,11 @@
+ .B \-s
+ command-line parameter.
+ 
+-Multiple alert paths may be specified by separating them with
+-a colon. All paths must be absolute.
++Multiple monitor paths may be specified by separating them with
++a colon.  Non-absolute paths are taken to be relative to the
++base directory
++.RI ( /usr/lib/mon
++by default).
+ 
+ When the configuration file is read, all monitors referenced from the
+ configuration will be looked up in each of these paths, and the
+@@ -909,6 +913,13 @@
+ .IR timeval .
+ 
+ .TP
++.BI failure_interval " timeval"
++Adjusts the polling interval to
++.I timeval
++when the service check is failing. Resets the interval
++to the original when the service succeeds.
++
++.TP
+ .BI traptimeout " timeval"
+ This keyword takes the same time specification argument as
+ .BI interval ,
+@@ -1206,13 +1217,6 @@
+ automatically to alert programs.
+ 
+ .TP
+-.BI failure_interval " timeval"
+-Adjusts the polling interval to
+-.I timeval
+-when the service check is failing. Resets the interval
+-to the original when the service succeeds.
+-
+-.TP
+ .BI upalert " alert [arg...]"
+ An
+ .B upalert
+--- mon-0.99.2.orig/mon
++++ mon-0.99.2/mon
+@@ -1369,6 +1369,21 @@
+                   $pref->{"no_comp_alerts"} = 1;
+                   next;
+               }
++
++              #
++              # for backwards-compatibility with <= 0.38.21
++              #
++              elsif ($var eq "comp_alerts")
++              {
++                  $pref->{"no_comp_alerts"} = 0;
++                  next;
++              }
++
++              else
++              {
++                  close (CFG);
++                  return "cf error: unknown syntax in period section [$l], line $line_num";
++              }
+           }
+ 
+           #
+@@ -1547,10 +1562,13 @@
+     # reap children that may be running
+     #
+     foreach my $pid (keys %runningpid) {
+-      my ($group, $service) = split (/\//, $runningpid{$pid});
+       kill 15, $pid;
+-      waitpid ($pid, 0);
+-      syslog ('info', "reset killed child $pid, exit status $?");
++      # XXX don't wait forever
++      if (waitpid ($pid, 0) == $pid) {
++          syslog ('info', "reset killed child $pid, exit status $?");
++      } else {
++          syslog ('notice', "reset couldn't reap child $pid ($!)");
++      }
+       remove_proc ($pid);
+     }
+ 
+@@ -1717,7 +1735,7 @@
+     socket (TRAPSERVER, PF_INET, SOCK_DGRAM, $udpproto) ||
+       die_die ("err", "could not create UDP socket: $!");
+     bind (TRAPSERVER, sockaddr_in ($CF{"TRAPPORT"}, $bindaddr)) ||
+-      die_die ("err", "could not bind UDP server port: $!");
++      die_die ("err", "could not bind UDP trap port $CF{'TRAPPORT'}: $!");
+     configure_filehandle (*TRAPSERVER) ||
+       die_die ("err", "could not configure UDP trap port: $!");
+     
+@@ -1729,7 +1747,7 @@
+     socket (SNMPSERVER, PF_INET, SOCK_DGRAM, $udpproto) ||
+       die_die ("err", "could not create UDP socket: $!");
+     bind (SNMPSERVER, sockaddr_in ($CF{"SNMPPORT"}, INADDR_ANY)) ||
+-      die_die ("err", "could not bind UDP server port: $!");
++      die_die ("err", "could not bind UDP SNMP port $CF{'SNMPPORT'}: $!");
+     configure_filehandle (*SNMPSERVER) ||
+       die_die ("err", "could not configure UDP SNMP port: $!");
+ }
+@@ -2771,10 +2789,25 @@
+     my ($summary, $tmnow, $buf);
+ 
+     $tmnow = time;
++
++    #
++    # Since this only waits while a monitor is running, alerts which are
++    # run while no monitors are active will become zombies.  They'll be
++    # reaped when a monitor is next started up.  This should be harmless
++    # with any normal configuration.  It could be fixed by either
++    # removing this test (and so always checking for dead kids, even
++    # when no monitors are active), or by integrating non-monitors into
++    # %runningpid and testing that rather than %running here.
++    #
+     return if (keys %running == 0);
+ 
+     while ((my $p = waitpid (-1, &WNOHANG)) >0)
+     {
++      if (!exists $runningpid{$p}) {
++          debug (1, "reaped non-monitor, pid $p status $?\n");
++          next;
++      }
++
+       my ($group, $service) = split (/\//, $runningpid{$p});
+       my $sref = \%{$watch{$group}->{$service}};
+ 
+@@ -4553,14 +4586,39 @@
+     syslog ("alert", "calling $alerttype $a for" .
+       " $args{group}/$args{service} ($alert,$args{args}) $summary");
+ 
+-    my $pid = open (ALERT, "|-");
++    #
++    # As a simple way to avoid blocking while writing data to the alert,
++    # I fork an extra kid to do the job.  First I fork one kid.  This
++    # one forks another which execs the alert, then it writes the data
++    # to it.  The server process continues without waiting.
++    #
++    # XXX This could be done more efficiently by forking a single kid to
++    # exec the alert, then using the main select loop to write data to
++    # it.
++    #
++    my $pid = fork;
+     if (!defined $pid) {
+       syslog ('err', "could not fork: $!");
+       return undef;
+     }
+ 
+     #
+-    # child, the actual alert
++    # first child, fork off the alert and feed data to it
++    #
++    if ($pid == 0) {
++      $pid = open (ALERT, "|-");
++      if (!defined $pid) {
++          syslog ('err', "could not fork: $!");
++          exit;
++      }
++      if ($pid != 0) {
++          print ALERT $args{"output"};
++          exit;
++      }
++    }
++
++    #
++    # second child, the actual alert
+     #
+     if ($pid == 0) {
+       #
+@@ -4623,13 +4681,6 @@
+     }
+ 
+     #
+-    # this will block if the alert is sucking gas
+-    #
+-    print ALERT $args{"output"};
+-    close (ALERT);
+-    waitpid $pid, 0;
+-
+-    #
+     # test alerts don't count
+     #
+     return (1) if ($args{"flags"} & $FL_TEST);
+--- mon-0.99.2.orig/alert.d/mail.alert
++++ mon-0.99.2/alert.d/mail.alert
+@@ -43,7 +43,7 @@
+ $t = localtime($opt_t);
+ ($wday,$mon,$day,$tm) = split (/\s+/, $t);
+ 
+-open (MAIL, "| /usr/lib/sendmail -oi -t") ||
++open (MAIL, "| /usr/sbin/sendmail -oi -t") ||
+     die "could not open pipe to mail: $!\n";
+ print MAIL <<EOF;
+ To: $mailaddrs
+--- mon-0.99.2.orig/alert.d/netpage.alert
++++ mon-0.99.2/alert.d/netpage.alert
+@@ -41,7 +41,7 @@
+ 
+ $ALERT = $opt_u ? "UPALERT" : "ALERT";
+ 
+-open (MAIL, "| /usr/lib/sendmail -oi -t") ||
++open (MAIL, "| /usr/sbin/sendmail -oi -t") ||
+     die "could not open pipe to mail: $!\n";
+ 
+ print MAIL <<EOF;
+--- mon-0.99.2.orig/clients/monshow
++++ mon-0.99.2/clients/monshow
+@@ -1160,7 +1160,12 @@
+ @ @<<<<<<<<<<<<<< @<<<<<<<<<<< @<<<<<<<<<  @<<<<<<<   @<<<<<<<<< @<<<   @
+ EOF
+           chomp $fmt;
+-          $fmt .= "<" x length($SUMMARY) . "\n";
++          my $summary_len = do {
++              # avoid taint
++              $SUMMARY =~ /^(.*)/s or die;
++              length $1
++          };
++          $fmt .= "<" x $summary_len . "\n";
+           $fmt .= <<'EOF';
+ $DEP, $GROUP, $SERVICE, $STATUS, $TIME, $NEXT, $ALERTS, $SUMMARY
+ .
+--- mon-0.99.2.orig/clients/skymon/skymon
++++ mon-0.99.2/clients/skymon/skymon
+@@ -233,7 +233,7 @@
+ #    &load_address() || die "could not load address\n";
+ #    print "$BUF";
+ 
+-    open (MAIL, "| /usr/lib/sendmail -oi -t") ||
++    open (MAIL, "| /usr/sbin/sendmail -oi -t") ||
+       die "could not open pipe to mail: $!\n";
+ print MAIL <<EOF;
+ To: $ADDR
+--- mon-0.99.2.orig/debian/debug.cf
++++ mon-0.99.2/debian/debug.cf
+@@ -0,0 +1,75 @@
++# $Id$
++
++# I use this configuration when debugging.  It's still too rudimentary,
++# though, I haven't done any serious work on the daemon since I added
++# it.  Still, it improves incrementally.
++
++dtlogging     = yes
++randstart     = 15s
++serverport    = 23000
++snmp          = yes
++snmpport      = 23001
++syslog_facility       = local0
++trapport      = 23000
++
++hostgroup router
++    208.33.154.1
++
++hostgroup http
++    jones
++    quake.argon.org
++    www.debian.org
++
++watch jones
++    service long-status
++      interval 30s
++      monitor long-status.monitor
++    service debug-listen
++      interval 30s
++      failure_interval 10s
++      monitor tcp.monitor -p 23456
++    service freespace
++      interval 1m
++      monitor freespace.monitor /:5000 /var:30000 ;;
++    service nntp
++      interval 60m
++      monitor nntp.monitor
++    service rpc
++      interval 1m
++      monitor rpc.monitor -r status -r mountd -r nfs
++
++watch non-host
++    service dns
++      interval 1m
++      monitor dns.monitor -zone argon.org -master janus hogan jones-ext ;;
++    service reboot
++      interval 1m
++      monitor reboot.monitor localhost ;;
++
++watch http
++    service ping
++      interval 10m
++      monitor fping.monitor
++      period wd {Sun-Sat}
++          alert mail.alert roderick
++          alertevery 4h
++    service http
++      depend http:ping
++      interval 10m
++      monitor http.monitor
++      period wd {Sun-Sat}
++          alert mail.alert roderick
++          alertevery 4h
++
++watch router
++    service ping
++      interval 2m
++      monitor fping.monitor
++      period wd {Sun-Sat}
++          alert mail.alert roderick
++          alertevery 4h
++
++watch bugs.debian.org
++    service ldap
++      interval 1m
++      monitor ldap.monitor --basedn 'ou=Bugs,o=Debian Project,c=US' --port 35567 --attribute bugid --filter '(bugid=1399)' --value 1399
+--- mon-0.99.2.orig/mon.d/dialin.monitor.wrap.c
++++ mon-0.99.2/mon.d/dialin.monitor.wrap.c
+@@ -11,6 +11,7 @@
+  *
+ */
+ 
++#include <stdio.h>
+ #include <unistd.h>
+ 
+ #ifndef REAL_DIALIN_MONITOR
+@@ -20,10 +21,11 @@
+ int
+ main (int argc, char *argv[])
+ {
+-    char *real_img = REAL_DIALIN_MONITOR;
++    char *me = argv[0];
+ 
+-    argv[0] = real_img;
+-
+-    /* exec */
+-    execv (real_img, argv);
++    argv[0] = REAL_DIALIN_MONITOR;
++    execv (argv[0], argv);
++    fprintf (stderr, "%s: can't exec ", me);
++    perror (argv[0]);
++    return 1;
+ }
+--- mon-0.99.2.orig/mon.d/file_change.monitor
++++ mon-0.99.2/mon.d/file_change.monitor
+@@ -1,4 +1,4 @@
+-#!/usr/local/bin/perl
++#!/usr/bin/perl
+ #
+ # mon monitor to watch for file changes
+ #
+--- mon-0.99.2.orig/mon.d/fping.monitor
++++ mon-0.99.2/mon.d/fping.monitor
+@@ -61,8 +61,13 @@
+ 
+ my @unreachable;
+ my @alive;
+-my @addr_not_found;
+ my @slow;
++my @other_prob;               # details for other per-host problems
++my @error;            # other errors which I'll give non-zero exit for
++my @icmp;             # ICMP messages output by fping
++my %addr_unknown;
++
++my %want_host = map { $_ => 1 } @ARGV;        # hosts fping hasn't output yet
+ 
+ while (<IN>)
+ {
+@@ -70,10 +75,15 @@
+     if (/^(\S+).*unreachable/)
+     {
+       push (@unreachable, $1);
++      delete $want_host{$1}
++          or push @error, "unreachable host `$1' wasn't asked for";
+     }
+ 
+     elsif (/^(\S+) is alive \((\S+)/)
+     {
++      delete $want_host{$1}
++          or push @error, "reachable host `$1' wasn't asked for";
++
+       if ($opt{"s"} && $2 > $opt{"s"})
+       {
+           push (@slow, [$1, $2]);
+@@ -87,74 +97,113 @@
+ 
+     elsif (/^(\S+)\s+address\s+not\s+found/)
+     {
+-      push @addr_not_found, $1;
++      $addr_unknown{$1} = 1;
++      push @other_prob, "$1 address not found";
+       push @unreachable, $1;
++      delete $want_host{$1}
++          or push @error, "unknown host `$1' wasn't asked for";
++    }
++
++    # ICMP Host Unreachable from 1.2.3.4 for ICMP Echo sent to 2.4.6.8
++    # (among others)
++
++    elsif (/^ICMP (.*) for ICMP Echo sent to (\S+)/)
++    {
++      push @icmp, $_;
+     }
+ 
+     else
+     {
+-      print STDERR "unidentified output from fping: [$_]\n";
++      push @error, "unidentified output from fping: [$_]";
+     }
+ }
+ 
++for my $host (keys %want_host) {
++    push @other_prob, "$host not listed in fping's output";
++    push @unreachable, $host;
++}
++
+ close (IN);
+ 
+ $END_TIME = time;
+ 
+ my $retval = $? >> 8;
+ 
+-if ($retval == 3)
++if ($retval < 3)
+ {
+-    print "fping: invalid cmdline arguments [$CMD @ARGV]\n";
+-    exit 1;
++    # do nothing
+ }
+ 
+-elsif ($retval == 4)
++elsif ($retval == 3)
+ {
+-    print "fping: system call failure\n";
+-    exit 1;
++    push @error, "fping: invalid cmdline arguments [$CMD @ARGV]";
+ }
+ 
+-elsif ($retval == 1 || $retval == 2 || @slow != 0)
++elsif ($retval == 4)
+ {
+-    print join (" ", sort (@unreachable, map { $_->[0] } @slow)), "\n\n";
++    push @error, "fping: system call failure";
+ }
+ 
+-elsif ($retval == 0)
++else
+ {
+-    print "\n";
++    push @error, "unknown return code ($retval) from fping";
+ }
+ 
+-else
+-{
+-    print "unknown return code ($retval) from fping\n";
++if (@error) {
++    print "unusual errors\n";
++}
++else {
++    my @fail = sort @unreachable, map { $_->[0] } @slow;
++    # This line is intentionally blank if there are no failures.
++    print "@fail\n";
+ }
+ 
++print "\n";
+ print "start time: " . localtime ($START_TIME) . "\n";
+ print "end time  : " . localtime ($END_TIME) . "\n";
+-print "duration  : " . ($END_TIME - $START_TIME) . " seconds\n\n";
++print "duration  : " . ($END_TIME - $START_TIME) . " seconds\n";
++
++if (@error != 0)
++{
++    print <<EOF;
++
++------------------------------------------------------------------------------
++unusual errors
++------------------------------------------------------------------------------
++EOF
++    print join ("\n", @error), "\n";
++}
+ 
+ if (@unreachable != 0)
+ {
+     print <<EOF;
++
+ ------------------------------------------------------------------------------
+ unreachable hosts
+ ------------------------------------------------------------------------------
+ EOF
+-    print join ("\n", @unreachable), "\n\n";
++    print join ("\n", @unreachable), "\n";
+ 
+-    if (@addr_not_found != 0)
+-    {
+-      print "address not found for @addr_not_found\n";
+-    }
++    print "\nother problems:\n", join "\n", @other_prob, ''
++      if @other_prob;
++}
+ 
+-    print "\n";
++if (@icmp != 0)
++{
++    print <<EOF;
++
++------------------------------------------------------------------------------
++ICMP messages
++------------------------------------------------------------------------------
++EOF
++    print join "\n", @icmp, '';
+ }
+ 
+ 
+ if (@slow != 0)
+ {
+     print <<EOF;
++
+ ------------------------------------------------------------------------------
+ slow hosts (response time which exceeds $opt{s}ms)
+ ------------------------------------------------------------------------------
+@@ -171,6 +220,7 @@
+ if (@alive != 0)
+ {
+     print <<EOF;
++
+ ------------------------------------------------------------------------------
+ reachable hosts                          rtt
+ ------------------------------------------------------------------------------
+@@ -187,25 +237,28 @@
+ #
+ if ($opt{"T"} && @unreachable)
+ {
++    my $header_output = 0;
+     foreach my $host (@unreachable)
+     {
++      next if $addr_unknown{$host};
++      print $header_output++ ? "\n" : <<EOF;
++
++------------------------------------------------------------------------------
++traceroute to unreachable hosts
++------------------------------------------------------------------------------
++EOF
+       system ("traceroute -w 3 $host 2>&1");
+     }
+-
+-    print "\n";
+ }
+ 
++exit 1 if @error;
++
+ #
+ # fail only if all hosts do not respond
+ #
+ if ($opt{"a"})
+ {
+-    if (@unreachable == @ARGV)
+-    {
+-      exit 1;
+-    }
+-
+-    exit 0;
++    exit(@alive ? 0 : 1);
+ }
+ 
+ exit 1 if (@slow != 0);
+--- mon-0.99.2.orig/mon.d/up_rtt.monitor
++++ mon-0.99.2/mon.d/up_rtt.monitor
+@@ -1,4 +1,4 @@
+-#!/usr/local/bin/perl
++#!/usr/bin/perl
+ #
+ # mon monitor to check for circuit up and measure RTT
+ #
author	Arkadiusz Miśkiewicz <arekm@maven.pl>
	Mon, 7 Jun 2004 00:30:20 +0000 (00:30 +0000)
committer	cvs2git <feedback@pld-linux.org>
	Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)