Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added some fixes to handle nagios data from windows systems #128

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions graphios.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ use_service_desc = False
# (uses the replacement_character)
replace_hostname = True

# append a domain to the hostname if it is not found
#append_domain = example.com

# reverse hostname
# if you have:
# host.datacenter.company.tld
Expand Down Expand Up @@ -117,6 +120,9 @@ librato_whitelist = [".*"]
#flag the librato backend as 'non essential' for the purposes of error checking
#nerf_librato = False

# nagios on windows has issues... with will workaround labels with spaces
#windows_format_fix = False

#------------------------------------------------------------------------------
# InfluxDB Details (if you are using InfluxDB 0.8)
#------------------------------------------------------------------------------
Expand Down Expand Up @@ -162,6 +168,20 @@ enable_influxdb09 = False
#influxdb_line_protocol = True


#------------------------------------------------------------------------------
# InfluxDB (if you are using InfluxDB 1.0+)
#------------------------------------------------------------------------------

enable_influxdb1 = False

# Connection url. If the url is stdout:// the data will be printed to terminal.
#influxdb_url = https://influxdb.math.example.ca:8086/write?db=example_db&u=example_user&p=<password>&precision=s

# Precision of time to format the influxdb data
# if using precision=s in the url then use: 1
# if using precision=ns or not providing a precision in the url use: 10000000000
#influxdb_time_precision = 10000000000

#------------------------------------------------------------------------------
# STDOUT Details (comment in if you are using STDOUT)
#------------------------------------------------------------------------------
Expand Down
34 changes: 25 additions & 9 deletions graphios.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ def validate(self):
self.VALID = True

def check_adjust_hostname(self):
self.HOSTNAME = self.HOSTNAME.lower()
if cfg["append_domain"]:
if not self.HOSTNAME.endswith(cfg["append_domain"]):
self.HOSTNAME += '.' + cfg["append_domain"]
if cfg["reverse_hostname"]:
self.HOSTNAME = '.'.join(reversed(self.HOSTNAME.split('.')))
if cfg["replace_hostname"]:
Expand Down Expand Up @@ -381,7 +385,15 @@ def process_log(file_name):
if mobj:
# break out the metric object into one object per perfdata metric
# log.debug('perfdata:%s' % mobj.PERFDATA)
for metric in mobj.PERFDATA.split():
data = mobj.PERFDATA
if cfg.get('windows_format_fix', False):
dr = re.findall('([^=]*)=([^\s]*)', data)
data = ''
for d in dr:
data += '{0}={1} '.format(d[0].replace(' ', '')
.replace('%', ''), d[1].strip())
data = data.strip()
for metric in data.split():
try:
nobj = copy.copy(mobj)
(nobj.LABEL, d) = metric.split('=')
Expand All @@ -391,8 +403,8 @@ def process_log(file_name):
nobj.UOM = re.sub("[^a-zA-Z]+", "", u)
processed_objects.append(nobj)
except:
log.critical("failed to parse label: '%s' part of perf"
"string '%s'" % (metric, nobj.PERFDATA))
log.critical("failed to parse metric: '%s' part of perf"
"string '%s' " % (metric, nobj.PERFDATA))
continue
return processed_objects

Expand Down Expand Up @@ -491,12 +503,15 @@ def check_skip_file(file_name, file_dir):
return True
elif re.match('^_', file_name):
return True

if os.stat(file_dir)[6] == 0:
# file was 0 bytes
handle_file(file_dir, 0)
return True
if os.path.isdir(file_dir):
try:
if os.stat(file_dir)[6] == 0:
# file was 0 bytes
handle_file(file_dir, 0)
return True
if os.path.isdir(file_dir):
return True
except Exception as ex:
log.critical(ex)
return True
return False

Expand All @@ -521,6 +536,7 @@ def init_backends():
"librato",
"influxdb",
"influxdb09",
"influxdb1",
"stdout",
)
# populate the controller dict from avail + config. this assumes you named
Expand Down
123 changes: 123 additions & 0 deletions graphios_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,129 @@ def send(self, metrics):
return ret


# ###########################################################
# #### influxdb 1.0 backend ####################################

class influxdb1(object):
"""
influxdb_url
influxdb_time_detail
influxdb_max_metrics
influxdb_extra_tags
"""
def __init__(self, cfg):
self.log = logging.getLogger("log.backends.influxdb")
self.log.info("InfluxDB 1.0 backend initialized")
self.timeout = 5

if 'influxdb_url' in cfg:
self.urls = cfg['influxdb_url'].split(',')
else:
self.log.critical("Missing influxdb_url in graphios.cfg")
sys.exit(1)

# nano seconds
self.time_precision = 10 ** 9
if 'influxdb_time_precision' in cfg:
self.time_precision = cfg['influxdb_time_precision']

if 'influxdb_max_metrics' in cfg:
try:
self.influxdb_max_metrics = int(cfg['influxdb_max_metrics'])
except ValueError:
self.log.critical("influxdb_max_metrics needs to be a integer")
sys.exit(1)
else:
self.influxdb_max_metrics = 250

if 'influxdb_extra_tags' in cfg:
self.influxdb_extra_tags = ast.literal_eval(
cfg['influxdb_extra_tags'])
else:
self.influxdb_extra_tags = {}

def format_metric(self, timestamp, path, tags, value):
tag_list = []
for k in tags:
if tags[k]:
tag = tags[k].replace(':', '').replace('\\', '')
tag_list.append('{0}={1}'.format(k, tags[k]))
t = ','.join(tag_list)
ts = int(timestamp) * int(self.time_precision)
m = '{0},{1} value={2} {3}'.format(path, t, value, ts)
return m

def format_series(self, chunk):
return '\n'.join(chunk)

def send(self, metrics):
""" Connect to influxdb and send metrics """
ret = 0
perfdata = []
for m in metrics:
ret += 1

if (m.SERVICEDESC == ''):
path = m.HOSTCHECKCOMMAND
else:
path = m.SERVICEDESC

# Ensure a int/float gets passed
try:
value = int(m.VALUE)
except ValueError:
try:
value = float(m.VALUE)
except ValueError:
value = 0

tags = {"check": m.LABEL, "host": m.HOSTNAME}
tags.update(self.influxdb_extra_tags)
perfdata.append(self.format_metric(int(m.TIMET), path,
tags, value))

series_chunks = self.chunks(perfdata, self.influxdb_max_metrics)
for chunk in series_chunks:
series = self.format_series(chunk)
for url in self.urls:
if not self._send(url, series):
self.log.warning("failed to send metrics url:{0}".format(url))
ret = 0
return ret

def chunks(self, l, n):
""" Yield successive n-sized chunks from l. """
for i in xrange(0, len(l), n):
yield l[i:i+n]

def _send(self, server, chunk):
if server.startswith('stdout:'):
print(server)
print(chunk)
else:
self.log.debug("Connecting to InfluxDB at %s" % server)
self.log.debug("sending: %s" % chunk)
req = urllib2.Request(server, chunk)
req.add_header('Content-Type', 'application/x-www-form-urlencoded')
try:
r = urllib2.urlopen(req, timeout=self.timeout)
r.close()
return True
except urllib2.HTTPError as e:
body = e.read()
self.log.warning('Failed to send metrics to InfluxDB. \
Status code: %d: %s' % (e.code, body))
return False
except IOError as e:
fail_string = "Failed to send metrics to InfluxDB. "
if hasattr(e, 'code'):
fail_string = fail_string + "Status code: %s" % e.code
if hasattr(e, 'reason'):
fail_string = fail_string + str(e.reason)
self.log.warning(fail_string)
return False


# ###########################################################
# #### stdout backend #######################################

Expand Down