Monitor R1soft CDP with Nagios

Updated 2017-02-01:
This is the updated way we use to monitor our backup jobs, we now instead query the R1soft CDP datacenter console instead of the individual backup managers.

You will need atleast python 2.7, the python argparse and python suds packages. Then run like this: python check_r1soft_cdp_console.py –host 192.168.1.1 –port 7080 –username admin –password admin

check_r1soft_cdp_console.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
import argparse
import datetime

STATE_OK = 0
STATE_WARNING = 1
STATE_CRITICAL = 2
STATE_UNKNOWN = 3

try:
    import suds
except ImportError:
    print 'You must install suds using pip or your package manager.'
    sys.exit(STATE_CRITICAL)

# enable debug output
#import logging
#logging.basicConfig(filename='swagyolo.log',format='%(asctime)s %(levelname)s  %(message)s',level=logging.INFO)
#logging.getLogger('suds.client').setLevel(logging.DEBUG)
#logging.getLogger('suds.transport').setLevel(logging.DEBUG)

parser = argparse.ArgumentParser(description='Check CDP datacenter console policy status')

parser.add_argument('--host', metavar='localhost', type=str, required=True, help='CDP datacenter console host')
parser.add_argument('--port', metavar='7080', type=str, help='API port')
parser.add_argument('--username', metavar='admin', type=str, required=True, help='CDP datacenter console username')
parser.add_argument('--password', metavar='admin', type=str, required=True, help='CDP datacenter console password')
parser.add_argument('--ssl', dest='ssl', action='store_true', help='Use SSL')

args = parser.parse_args()

def build_url(host, port, factory, ssl=False):
    protocol = 'https' if ssl else 'http'

    if port is None:
        port = 7080

    url = '{protocol}://{host}:{port}/{factory}?wsdl'.format(
        protocol=protocol,
        host=host,
        port=port,
        factory=factory)

    return url

def get_manager_by_id(managers, id):
    if managers is None:
        return None

    for m in managers:
        if m['id'] == id:
            return m

    return None

policy_url = build_url(args.host, args.port, 'Policy')
policy = suds.client.Client(policy_url, username=args.username, password=args.password)

if policy is None:
    print 'CRITICAL: Invalid suds SOAP policy'
    sys.exit(STATE_CRITICAL)

cdp_url = build_url(args.host, args.port, 'CDP')

try:
    cdp = suds.client.Client(cdp_url, username=args.username, password=args.password)
except Exception as e:
    print 'CRITICAL: Failed to setup suds SOAP with exception %s' % (e)
    sys.exit(STATE_CRITICAL)

if cdp is None:
    print 'CRITICAL: Invalid suds SOAP cdp'
    sys.exit(STATE_CRITICAL)

managers = cdp.service.getRegisteredCDPS()

if managers is None:
    print 'CRITICAL: Failed to get managers'
    sys.exit(STATE_CRITICAL)

policies = policy.service.getPolicies()

if policies is None:
    print 'CRITICAL: Could not get policies'
    sys.exit(STATE_CRITICAL)

stats = {}
stats['ok'] = 0
stats['warning'] = 0
stats['error'] = 0
stats['unknown'] = 0
stats['disabled'] = 0
stats['lastrun'] = 0

data = []

for p in policies:
    enabled = p['enabled']
    name = p['name']
    freq = p['replicationScheduleFrequencyType']

    if freq != 'ON_DEMAND':
        if 'lastReplicationRunTime' in p:
            lastrun = p['lastReplicationRunTime']
        else:
            lastrun = 0

        if 'nextReplicationRunTime' in p:
            nextrun = p['nextReplicationRunTime']
        else:
            nextrun = 0
    else:
        lastrun = '0'
        nextrun = '0'

    state = p['state']

    manager = get_manager_by_id(managers, p['CDPId'])

    if enabled == False:
        stats['disabled'] = stats['disabled'] + 1
        data.append({
            'name': name,
            'manager': manager['name'],
            'status': 'DISABLED'
        })
        continue

    now = datetime.datetime.now()
    lower = None

    if freq == 'DAILY':
        lower = now - datetime.timedelta(days=1)
    elif freq == 'WEEKLY':
        lower = now - datetime.timedelta(weeks=1)
    elif freq == 'MONTHLY':
        lower = now - datetime.timedelta(months=1)
    elif freq == 'ON_DEMAND':
        lower = None
    else:
        lower = None

    if lower is not None and lastrun != 0 and nextrun != 0:
        if lastrun <= lower:
            stats['lastrun'] = stats['lastrun'] + 1
            data.append({
                'name': name,
                'manager': manager['name'],
                'status': 'LAST RUN ERROR'
            })
            data.append('%s on %s has not been backed up properly last time!' % (name, manager['name']))
            continue

    if state == 'UNKNOWN':
        stats['unknown'] = stats['unknown'] + 1
        data.append({
            'name': name,
            'manager': manager['name'],
            'status': 'UNKNOWN'
        })
    elif state == 'ERROR':
        stats['error'] = stats['error'] + 1
        data.append({
            'name': name,
            'manager': manager['name'],
            'status': 'ERROR'
        })
    elif state == 'ALERT':
        stats['warning'] = stats['warning'] + 1
        data.append({
            'name': name,
            'manager': manager['name'],
            'status': 'ALERT'
        })
    else:
        stats['ok'] = stats['ok'] + 1
        data.append({
            'name': name,
            'manager': manager['name'],
            'status': 'OK'
        })

final_status = 'OK'
final_code = STATE_OK

if stats['error'] >= 1 or stats['lastrun'] >= 1:
    final_status = 'CRITICAL'
    final_code = STATE_CRITICAL
elif stats['warning'] >= 1:
    final_status = 'WARNING'
    final_code = STATE_WARNING
elif stats['unknown'] >= 1:
    final_status = 'UNKNOWN'
    final_code = STATE_UNKNOWN

print '%s: %s ok, %s error, %s warning, %s unknown, %s disabled, %s last run error' % (final_status, stats['ok'], stats['error'], stats['warning'], stats['unknown'], stats['disabled'], stats['lastrun'])
sys.exit(final_code)

Old article below:
So my company is using R1soft’s product CDP to backup some of our servers.
I wanted to have a clear overview of the status of our backup machines instead of recieving their email reports, here is a working Nagios plugin to monitor CDP.

What it does is connect to CDP’s SOAP API using PHP and gets the state of all policies then loops through it and checks if it’s OK or not.
The authentication is done against a “Local” user on that backup manager.

Example
./check_r1soft_cdp -H 10.1.1.1 -u admin -p test

10.1.1.1 = IP or hostname of the backup manager
admin = username
test = password

If you want to use it over SSL you can add the “s” option, example.
./check_r1soft_cdp -H 10.1.1.1 -u admin -p test -s

Installation
Copy the nagios plugin below to /usr/lib/nagios/plugins with your desired filename, for example check_r1soft_cdp.
Make sure it’s executable (chmod +x check_r1soft_cdp) and then setup a command in Nagios.

define command{
command_name check_r1soft_cdp
command_line $USER1$/check_r1soft_cdp -H $HOSTADDRESS$ -u $ARG1$ -p $ARG2$
}

#!/usr/bin/php
<?php

//check_r1soft_cdp - nagios plugin for R1soft CDP
//Modified by trigger
//Originally by Rogierm (http://rogierm.redbee.nl/blog/2013/07/12/idera-cdp-monitoring-with-nagios/)

$options = getopt("H:u:p:s");

if (!is_array($options) ) {
        print "There was a problem reading in the options.\r\n";
        exit(1);
}

if (count($options) < 3) {
        print "Not all options are set.\r\n";
        exit(2);
}

$HOST=$options['H'];

$PORT="9080";
if(isset($options['s']) && !$options['s']) {
        $PORT="9443";
}

$USER="admin";

if(isset($options['u'])) {
        $USER=$options['u'];
}

$PASS="password";
if(isset($options['p'])) {
        $PASS=$options['p'];
}

try {
        $PROTO = "http";

        if(isset($options['s']) && !$options['s']) {
                $PROTO = "https";
        }

        $policyClient = new soapclient("$PROTO://$HOST:$PORT/Policy2?wsdl",

        array(
                'login' => "$USER",
                'password' => "$PASS",
                'trace' => 1,
                'cache_wsdl' => WSDL_CACHE_NONE,
                'features' => SOAP_SINGLE_ELEMENT_ARRAYS)
        );

        $allPoliciesForUser = $policyClient->getPolicies();

        $failstring = '';
        $count_unk = 0;
        $count_err = 0;
        $count_warn = 0;
        $count_ok = 0;
        $count_dis = 0;
        $alldis = '';

        foreach($allPoliciesForUser->return as $tmp)
        {
                $enabled = $tmp->enabled;

                if(!$enabled) {
                        $count_dis++;
                        $alldis .= $tmp->name . " is DISABLED\n";
                        continue;
                }

                if ($tmp->state == "UNKNOWN"){
                        $failstring .= $tmp->name . " is in a UNKNOWN state\n";
                        $unknown = 1;
                        $count_unk++;
                }
                else if ($tmp->state == "ERROR"){
                        $failstring .= $tmp->name . " has ERROR\n";
                        $error = 1;
                        $count_err++;
                }
                else if ($tmp->state == "ALERT"){
                        $failstring .= $tmp->name . " has ALERT(s)\n";
                        $warning = 1;
                        $count_warn++;
                }
                else {
                        $ok = 1;
                        $count_ok++;
                }
        }

        if ($error==1) {
                print "CDP ERROR - OK=" . $count_ok . " DIS=" . $count_dis . " WARN=" . $count_warn . " ERR=" . $count_err.  " UNK=" . $count_unk . " | OK=" . $count_ok . ",DIS=" . $count_dis . ",WARNING=" . $count_warn . ",ERROR=" . $count_err . ",UNKNOWN=" . $count_unk . "\n\n";
                print $alldis . "\n";
                print $failstring;
                exit(2);
        } elseif ($warning==1 || $count_dis > 0) {
                print "CDP WARNING - OK=" . $count_ok . " DIS=" . $count_dis . " WARN=" . $count_warn . " ERR=" . $count_err.  " UNK=" . $count_unk . " | OK=" . $count_ok . ",DIS=" . $count_dis . ",WARNING=" . $count_warn . ",ERROR=" . $count_err . ",UNKNOWN=" . $count_unk . "\n\n";
                print $alldis . "\n";
                print $failstring;
                exit(1);
        } elseif ($unknown==1) {
                print "CDP UNKNOWN - OK=" . $count_ok . " DIS=" . $count_dis . " WARN=" . $count_warn . " ERR=" . $count_err.  " UNK=" . $count_unk . " | OK=" . $count_ok . ",DIS=" . $count_dis . ",WARNING=" . $count_warn . ",ERROR=" . $count_err . ",UNKNOWN=" . $count_unk . "\n\n";
                print $alldis . "\n";
                print $failstring;
                exit(3);
        } elseif ($ok==1) {
                print "CDP OK - OK=" . $count_ok . " DIS=" . $count_dis . " WARN=" . $count_warn . " ERR=" . $count_err.  " UNK=" . $count_unk . " | OK=" . $count_ok . ",DIS=" . $count_dis . ",WARNING=" . $count_warn . ",ERROR=" . $count_err . ",UNKNOWN=" . $count_unk . "\n";
                exit(0);
        } else {
                echo "Should never reach this state!??!\n";
                exit(1);
        }
} 
catch (SoapFault $exception)
{
        echo "Failed to find all status of all policies";
        echo $exception;
        exit(1);
}
?>

This is a modified version of Rogierm’s nagios plugin that can be found here.
I have added performance data, more arguments, checking disabled policies and added what I think is better output.
You can always click on the service to get a full list of policies etc.

/Trigger

2 Comments

C says:

Been using this plugin for some time and it’s great. Should really be on nagios exchange site 😉

Sad to say it no longer works on SBM version 6.2.1

Thanks!

C

trigger says:

Thanks, I have updated the article on how we monitor our jobs right now. We are using the R1soft CDP datacenter console instead of having checks on each backup manager.

Leave a Reply

Your email address will not be published.