check_cassandra_tpstats.pl 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. #!/usr/bin/perl -T
  2. # nagios: -epn
  3. #
  4. # Author: Hari Sekhon
  5. # Date: 2013-10-15 04:56:49 +0100 (Tue, 15 Oct 2013)
  6. #
  7. # https://github.com/harisekhon/nagios-plugins
  8. #
  9. # License: see accompanying LICENSE file
  10. #
  11. $DESCRIPTION = "Nagios Plugin to fetch Cassandra's thread pool stats per node by parsing 'nodetool tpstats'.
  12. Checks Pending/Blocked operations against warning/critical thresholds.
  13. Check the baseline first and then set appropriate thresholds since a build up of Pending/Blocked operations is indicative of performance problems.
  14. Also returns Active and Dropped operations with perfdata for graphing.
  15. Can specify a remote host and port otherwise it checks the local node's stats (for calling over NRPE on each Cassandra node)
  16. Tested on Cassandra 1.2.9, 2.0.1, 2.0.9, 2.2.5, 3.0.8, 3.5, 3.6, 3.7";
  17. $VERSION = "0.7.0";
  18. use strict;
  19. use warnings;
  20. BEGIN {
  21. use File::Basename;
  22. use lib dirname(__FILE__) . "/lib";
  23. }
  24. use HariSekhonUtils;
  25. use HariSekhon::Cassandra::Nodetool;
  26. set_threshold_defaults(0, 0);
  27. %options = (
  28. %nodetool_options,
  29. %thresholdoptions,
  30. );
  31. splice @usage_order, 0, 0, 'nodetool';
  32. get_options();
  33. ($nodetool, $host, $port, $user, $password) = validate_nodetool_options($nodetool, $host, $port, $user, $password);
  34. validate_thresholds(1, 1, { "simple" => "upper", "integer" => 1, "positive" => 1 } );
  35. vlog2;
  36. set_timeout();
  37. $status = "OK";
  38. my $options = nodetool_options($host, $port, $user, $password);
  39. my $cmd = "${nodetool} ${options}tpstats";
  40. vlog2 "fetching threadpool stats";
  41. my @output = cmd($cmd);
  42. foreach(@output){
  43. skip_nodetool_output($_) and next;
  44. check_nodetool_errors($_);
  45. }
  46. my $i = 0;
  47. while(skip_nodetool_output($output[$i])){
  48. $i++;
  49. }
  50. $output[$i] =~ /Pool\s+Name\s+Active\s+Pending\s+Completed\s+Blocked\s+All time blocked\s*$/i or die_nodetool_unrecognized_output($output[$i]);
  51. $i++;
  52. my @stats;
  53. foreach(; $i < scalar @output; $i++){
  54. $output[$i] =~ /^\s*$/ and $i++ and last;
  55. $output[$i] =~ /^([\w-]+(?:\s[A-Za-z]+)?)\s+(\d+)\s+(\d+)\s+(\d+)(?:\s+(\d+)\s+(\d+))?\s*$/ or die_nodetool_unrecognized_output($output[$i]);
  56. push(@stats,
  57. (
  58. { "$1_Blocked" => $5, },
  59. { "$1_Pending" => $3, },
  60. { "$1_Active" => $2, },
  61. #{ "$1_Completed" => $4, },
  62. #{ "$1_All_time_blocked" => $6, },
  63. )
  64. );
  65. }
  66. foreach(; $i < scalar @output; $i++){
  67. next if $output[$i] =~ /^\s*$/;
  68. last;
  69. }
  70. $output[$i] =~ /^Message type\s+Dropped/ or die_format_changed($output[$i]);
  71. $i++;
  72. my @stats2;
  73. foreach(; $i < scalar @output; $i++){
  74. $output[$i] =~ /^(\w+)\s+(\d+)$/ or die_format_changed($output[$i]);
  75. push(@stats2,
  76. (
  77. { ucfirst(lc($1)) . "_Dropped" => $2 }
  78. )
  79. );
  80. }
  81. push(@stats2, @stats);
  82. my $msg2;
  83. my $msg3;
  84. my ($thresholds_ok, $thresholds_msg);
  85. foreach(my $i = 0; $i < scalar @stats2; $i++){
  86. foreach my $stat3 ($stats2[$i]){
  87. foreach my $key (keys %$stat3){
  88. $msg2 = "$key=$$stat3{$key} ";
  89. $msg3 .= $msg2;
  90. if($key =~ /Pending|Blocked/i){
  91. ($thresholds_ok, $thresholds_msg) = check_thresholds($$stat3{$key}, 1);
  92. unless($thresholds_ok){
  93. $msg2 = uc $msg2;
  94. }
  95. }
  96. $msg .= $msg2;
  97. }
  98. }
  99. }
  100. $msg =~ s/\s$//;
  101. if($verbose or $status ne "OK"){
  102. msg_thresholds();
  103. }
  104. $msg .= "| $msg3";
  105. vlog2;
  106. quit $status, $msg;