| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136 |
- #!/usr/bin/perl -T
- # nagios: -epn
- #
- # Author: Hari Sekhon
- # Date: 2013-10-13 19:32:32 +0100 (Sun, 13 Oct 2013)
- #
- # https://github.com/harisekhon/nagios-plugins
- #
- # License: see accompanying LICENSE file
- #
- # TODO: check if I can rewrite a version of this via API
- my $max_num_down_nodes_to_output = 5;
- $DESCRIPTION = "Nagios Plugin to check the number of available cassandra nodes and raise warning/critical on down nodes.
- Uses nodetool's status command to determine how many downed nodes there are to compare against the warning/critical thresholds. Reports the addresses of up to $max_num_down_nodes_to_output nodes that are down in verbose mode. Always returns perfdata for graphing the node counts and states.
- Can specify a remote host and port otherwise assumes to check via localhost
- Tested on Cassandra 1.2.9, 2.0.1, 2.0.9, 2.2.5, 3.0.8, 3.5, 3.6, 3.7";
- $VERSION = "0.4.2";
- use strict;
- use warnings;
- BEGIN {
- use File::Basename;
- use lib dirname(__FILE__) . "/lib";
- }
- use HariSekhonUtils qw/:DEFAULT :regex/;
- use HariSekhon::Cassandra::Nodetool;
- set_threshold_defaults(0, 1);
- %options = (
- %nodetool_options,
- %thresholdoptions,
- );
- splice @usage_order, 0, 0, 'nodetool';
- get_options();
- ($nodetool, $host, $port, $user, $password) = validate_nodetool_options($nodetool, $host, $port, $user, $password);
- validate_thresholds(undef, undef, { "simple" => "upper", "integer" => 1, "positive" => 1});
- vlog2;
- set_timeout();
- $status = "OK";
- my $options = nodetool_options($host, $port, $user, $password);
- my $cmd = "${nodetool} ${options}status";
- vlog2 "fetching cluster nodes information";
- my @output = cmd($cmd);
- my $up_nodes = 0;
- my $down_nodes = 0;
- my $normal_nodes = 0;
- my $leaving_nodes = 0;
- my $joining_nodes = 0;
- my $moving_nodes = 0;
- my @down_nodes;
- my $node_address;
- sub parse_state ($) {
- # Don't know what remote JMX auth failure looks like yet so will go critical on any user/password related message returned assuming that's an auth failure
- check_nodetool_errors($_);
- if(/^[UD][NLJM]\s+($host_regex)/){
- $node_address = $1;
- if(/^U/){
- $up_nodes++;
- } elsif(/^D/){
- $down_nodes++;
- push(@down_nodes, $node_address);
- }
- if(/^.N/){
- $normal_nodes++;
- } elsif(/^.L/){
- $leaving_nodes++;
- } elsif(/^.J/){
- $joining_nodes++;
- } elsif(/^.M/){
- $moving_nodes++;
- } else {
- quit "UNKNOWN", "unrecognized second column for node status, $nagios_plugins_support_msg";
- }
- return 1;
- } elsif($_ =~ $nodetool_status_header_regex){
- # ignore
- } elsif(skip_nodetool_output($_)){
- # ignore
- } else {
- die_nodetool_unrecognized_output($_);
- }
- }
- foreach(@output){
- parse_state($_);
- }
- vlog2 "checking node counts and number of nodes down";
- if(@down_nodes){
- quit "UNKNOWN", "inconsistent nodes down count vs nodes down addresses, probably a parsing error in parse_state(). $nagios_plugins_support_msg" unless $down_nodes;
- plural $down_nodes;
- vlog2("$down_nodes node$plural down: " . join(", ", @down_nodes) );
- }
- unless( ($up_nodes + $down_nodes ) == ($normal_nodes + $leaving_nodes + $joining_nodes + $moving_nodes)){
- quit "UNKNOWN", "live+down node counts vs (normal/leaving/joining/moving) nodes are not equal, investigation required";
- }
- $msg = "$up_nodes nodes up, $down_nodes down";
- check_thresholds($down_nodes);
- if($verbose and @down_nodes){
- plural scalar @down_nodes;
- $msg .= " [node$plural down: ";
- if(scalar @down_nodes > $max_num_down_nodes_to_output){
- for(my $i; $i < $max_num_down_nodes_to_output; $i++){
- $msg .= ", " . $down_nodes[$i];
- }
- $msg .= " ... ";
- } else {
- $msg .= join(", ", @down_nodes);
- }
- $msg .= "]";
- }
- $msg .= ", node states: $normal_nodes normal, $leaving_nodes leaving, $joining_nodes joining, $moving_nodes moving";
- $msg .= " | nodes_up=$up_nodes nodes_down=$down_nodes";
- msg_perf_thresholds();
- $msg .= " normal_nodes=$normal_nodes leaving_nodes=$leaving_nodes joining_nodes=$joining_nodes moving_nodes=$moving_nodes";
- vlog2;
- quit $status, $msg;
|