diff --git a/Products/DataCollector/zenmodeler.py b/Products/DataCollector/zenmodeler.py index e62db28729..a357c32b7f 100644 --- a/Products/DataCollector/zenmodeler.py +++ b/Products/DataCollector/zenmodeler.py @@ -58,7 +58,7 @@ from Products.ZenEvents.ZenEventClasses import Heartbeat, Error from Products.ZenHub.PBDaemon import FakeRemote, PBDaemon, HubDown from Products.ZenUtils.Driver import drive, driveLater -from Products.ZenUtils.Utils import unused, zenPath +from Products.ZenUtils.Utils import unused, zenPath, wait from Products.Zuul.utils import safe_hasattr as hasattr # needed for Twisted's PB (Perspective Broker) to work @@ -889,17 +889,23 @@ def heartbeat(self, ignored=None): # We start modeling from here to accomodate the startup delay. if not self.started: - if self.immediate == 0 and self.startat: - # This stuff relies on ARBITRARY_BEAT being < 60s - if self.timeMatches(): - self.started = True - self.log.info("Starting modeling...") - reactor.callLater(1, self.main) + if self.immediate == 0: + if self.startat: + # This stuff relies on ARBITRARY_BEAT being < 60s + if self.timeMatches(): + # Run modeling in case we have now=False, startat is not None and local time matches the startat + self.started = True + self.log.info("Starting modeling...") + reactor.callLater(1, self.main) elif not self.isMainScheduled: + # Or run modeling by cycleTime in case we have now=False, startat is None + # and we haven't set schedule by cycleTime yet self.isMainScheduled = True reactor.callLater(self.cycleTime(), self.main) else: - self.started = True + # Going back to the normal modeling schedule either cron or cycleTime + # after the first immediate modeling during service startup + self.immediate = 0 self.log.info( "Starting modeling in %s seconds.", self.startDelay ) @@ -941,6 +947,7 @@ def _devicegen_has_items(self): self.devicegen = chain([first], self.devicegen) return result + @defer.inlineCallbacks def checkStop(self, unused=None): """ Check to see if there's anything to do. @@ -980,6 +987,11 @@ def checkStop(self, unused=None): if not self.options.cycle: self.stop() self.finished = [] + # frequency of heartbeat rate could be 2 times per minute in case we have + # cron job modeling faster than 1 minute it'll be trigger a second time + if runTime < 60 and self.startat is not None: + yield wait(60) + self.started = False def fillCollectionSlots(self, driver): """ @@ -1040,8 +1052,6 @@ def timeMatches(self): Check whether the current time matches a cron-like specification, return a straight true or false """ - if self.startat is None: - return True def match_entity(entity, value): if entity == "*": @@ -1333,7 +1343,7 @@ def mainLoop(self, driver): @return: Twisted deferred object @rtype: Twisted deferred object """ - if self.options.cycle: + if self.options.cycle and self.startat is None: self.isMainScheduled = True driveLater(self.cycleTime(), self.mainLoop) diff --git a/Products/Jobber/bin.py b/Products/Jobber/bin.py index 5131b57edf..e6cd5bac6e 100644 --- a/Products/Jobber/bin.py +++ b/Products/Jobber/bin.py @@ -7,7 +7,6 @@ # ############################################################################## - def main(): import sys @@ -16,6 +15,9 @@ def main(): from celery.bin.celery import main from Products.ZenUtils.Utils import load_config + # work-around for celery's `--help` bug. + _print_help_when_requested() + # Dynamic configuration shenanigans because Celery can't be re-configured # after its initial configuration has been set. _configure_celery() @@ -28,11 +30,59 @@ def main(): sys.exit(main()) +# Note: an empty tuple implies repetition of the key +_import_names = { + "inspect": ("control", "inspect"), + "list": ("list", "list_"), + "report": ("celery", "report"), + "help": ("celery", "help"), +} + + +def _get_command(modname, cmdname): + import importlib + + module = importlib.import_module("celery.bin.{}".format(modname)) + return getattr(module, cmdname) + + +def _print_help_when_requested(): + import sys + from Products.Jobber.zenjobs import app + + if "--help" not in sys.argv: + return + + name = sys.argv[1] + + if name == "--help": + sys.argv[1:] = ["help"] + return + + if name == "monitor": + from Products.Jobber.monitor.command import MonitorCommand + + w = MonitorCommand(app=app) + p = w.create_parser("zenjobs", "monitor") + else: + modname, cmdname = _import_names.get(sys.argv[1], (name, name)) + command = _get_command(modname, cmdname) + cmd = command(app=app) + p = cmd.create_parser(sys.argv[0], name) + + p.print_help() + sys.exit(0) + + def _configure_celery(): import argparse import sys from Products.Jobber import config + # If '--help' was passed as an argument, don't attempt configuration. + if "--help" in sys.argv: + return + parser = argparse.ArgumentParser() parser.add_argument("--config-file") @@ -41,5 +91,5 @@ def _configure_celery(): return cfg = config.getConfig(args.config_file) - config.ZenCeleryConfig = config.CeleryConfig.from_config(cfg) + config.ZenCeleryConfig = config.from_config(cfg) sys.argv[1:] = remainder diff --git a/Products/Jobber/config.py b/Products/Jobber/config.py index d4da489d6b..721279bc3b 100644 --- a/Products/Jobber/config.py +++ b/Products/Jobber/config.py @@ -142,27 +142,28 @@ class CeleryConfig(object): CELERYBEAT_REDIRECT_STDOUTS = attr.ib(default=True) CELERYBEAT_REDIRECT_STDOUTS_LEVEL = attr.ib(default="INFO") - @classmethod - def from_config(cls, cfg={}): - args = { - "broker_url": buildBrokerUrl(cfg), - "result_backend": cfg.get("redis-url"), - "result_expires": cfg.get("zenjobs-job-expires"), - "worker_concurrency": cfg.get("concurrent-jobs"), - "worker_max_tasks_per_child": cfg.get("max-jobs-per-worker"), - "task_time_limit": cfg.get("job-hard-time-limit"), - "task_soft_time_limit": cfg.get("job-soft-time-limit"), - "beat_max_loop_interval": cfg.get( - "scheduler-max-loop-interval" - ), - "worker_proc_alive_timeout": cfg.get("zenjobs-worker-alive-timeout"), - } - tz = os.environ.get("TZ") - if tz: - args["timezone"] = tz - - return cls(**args) + +def from_config(cfg=None): + cfg = cfg if cfg is not None else {} + args = { + "broker_url": buildBrokerUrl(cfg), + "result_backend": cfg.get("redis-url"), + "result_expires": cfg.get("zenjobs-job-expires"), + "worker_concurrency": cfg.get("concurrent-jobs"), + "worker_max_tasks_per_child": cfg.get("max-jobs-per-worker"), + "task_time_limit": cfg.get("job-hard-time-limit"), + "task_soft_time_limit": cfg.get("job-soft-time-limit"), + "beat_max_loop_interval": cfg.get( + "scheduler-max-loop-interval" + ), + "worker_proc_alive_timeout": cfg.get("zenjobs-worker-alive-timeout"), + } + tz = os.environ.get("TZ") + if tz: + args["timezone"] = tz + + return CeleryConfig(**args) # Initialized with default values (for when --config-file is not specified) -ZenCeleryConfig = CeleryConfig.from_config(getConfig()) +ZenCeleryConfig = from_config(getConfig()) diff --git a/Products/Jobber/log.py b/Products/Jobber/log.py index a9be43fe34..21b64c322a 100644 --- a/Products/Jobber/log.py +++ b/Products/Jobber/log.py @@ -419,7 +419,7 @@ def run(self): def _get_hash(config): - return hashlib.md5( + return hashlib.sha256( "".join("{0}{1}".format(k, config[k]) for k in sorted(config)) ).hexdigest() diff --git a/Products/Jobber/monitor.py b/Products/Jobber/monitor.py deleted file mode 100644 index 1d408da8c0..0000000000 --- a/Products/Jobber/monitor.py +++ /dev/null @@ -1,137 +0,0 @@ -############################################################################## -# -# Copyright (C) Zenoss, Inc. 2019, all rights reserved. -# -# This content is made available according to terms specified in -# License.zenoss under the directory where your Zenoss product is installed. -# -############################################################################## - -from __future__ import absolute_import, print_function - -# import ast - -from collections import defaultdict -from datetime import timedelta - -from celery.bin.base import Command -from zope.component import getUtility - -from .interfaces import IJobStore -from .utils.datetime import humanize_timedelta - - -def catch_error(f): - # Decorator that catches and prints the exception thrown from the - # decorated function. - def call_func(*args, **kw): - try: - return f(*args, **kw) - except Exception as ex: - print(ex) - - return call_func - - -class ZenJobsMonitor(Command): - """Monitor Celery events.""" - - @catch_error - def task_failed(self, event): - self.state.event(event) - jobid = event["uuid"] - instance = self.state.tasks.get(jobid) - job = self.app.tasks.get(instance.name) - result = job.AsyncResult(jobid) - classkey, summary = _getErrorInfo(self.app, result.result) - # args = ast.literal_eval(instance.args) - # kwargs = ast.literal_eval(instance.kwargs) - name = job.getJobType() if hasattr(job, "getJobType") else job.name - print( - "Job failed worker=%s jobid=%s name=%s" - % (event["hostname"], jobid, name) - ) - - def run(self, **kw): - self.state = self.app.events.State( - on_node_join=on_node_join, - on_node_leave=on_node_leave, - ) - self.seconds_since = defaultdict(float) - self.storage = getUtility(IJobStore, "redis") - - conn = self.app.connection().clone() - - def _error_handler(exc, interval): - print("Internal error: %s" % (exc,)) - - while True: - print("Begin monitoring for zenjobs/celery events") - try: - conn.ensure_connection(_error_handler) - recv = self.app.events.Receiver( - conn, - handlers={ - "task-failed": self.task_failed, - "*": self.state.event, - }, - ) - recv.capture(wakeup=True) - except (KeyboardInterrupt, SystemExit): - return conn and conn.close() - except conn.connection_errors + conn.channel_errors: - print("Connection lost, attempting reconnect") - - -def _getTimeoutSummary(app, ex): - return "Job killed after {}.".format( - humanize_timedelta( - timedelta( - seconds=app.conf.get("task_soft_time_limit"), - ), - ), - ) - - -def _getAbortedSummary(app, ex): - return "Job aborted by user" - - -def _getErrorSummary(app, ex): - return "{0.__class__.__name__}: {0}".format(ex) - - -_error_eventkey_map = { - "TaskAborted": ("zenjobs-aborted", _getAbortedSummary), - "SoftTimeLimitExceeded": ("zenjobs-timeout", _getTimeoutSummary), -} - - -def _getErrorInfo(app, ex): - """Returns (eventkey, summary).""" - key, summary_fn = _error_eventkey_map.get( - type(ex).__name__, ("zenjobs-failure", _getErrorSummary) - ) - return key, summary_fn(app, ex) - - -def on_node_join(*args, **kw): - worker = args[0] - print( - "Worker node added to monitor worker=%s uptime=%s" - % ( - worker.hostname, - humanize_timedelta(timedelta(seconds=worker.clock)), - ), - ) - - -def on_node_leave(*args, **kw): - worker = args[0] - print( - "Worker node left monitor worker=%s uptime=%s" - % ( - worker.hostname, - humanize_timedelta(timedelta(seconds=worker.clock)), - ), - ) diff --git a/Products/Jobber/monitor/__init__.py b/Products/Jobber/monitor/__init__.py new file mode 100644 index 0000000000..dd3ac4a06c --- /dev/null +++ b/Products/Jobber/monitor/__init__.py @@ -0,0 +1,12 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from .command import MonitorCommand + +__all__ = ("MonitorCommand",) diff --git a/Products/Jobber/monitor/broker.py b/Products/Jobber/monitor/broker.py new file mode 100644 index 0000000000..bc15309646 --- /dev/null +++ b/Products/Jobber/monitor/broker.py @@ -0,0 +1,76 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import absolute_import, print_function + +import requests + +from six.moves.urllib.parse import urlparse, urljoin, quote_plus, unquote + +from .logger import getLogger + + +class Broker(object): + def __new__(cls, broker_url): + scheme = urlparse(broker_url).scheme + if scheme == "amqp": + return RabbitMQ(broker_url) + + +class RabbitMQ(object): + """Just enough API to satisfy collecting metrics from the broker.""" + + def __init__(self, broker_url): + parsed = urlparse(broker_url) + self._host = parsed.hostname + self._port = 15672 + self._vhost = quote_plus(parsed.path[1:]) + username = parsed.username + password = parsed.password + self._username = unquote(username) if username else username + self._password = unquote(password) if password else password + self._http_api = ( + "http://{username}:{password}@{host}:{port}/api/" + ).format( + username=self._username, + password=self._password, + host=self._host, + port=self._port, + ) + self._log = getLogger(self) + + def queues(self, names): + if not names: + return () + attempts = 1 + timeout = 1.0 + url = urljoin(self._http_api, "queues/" + self._vhost) + params = {"columns": ",".join(["name", "messages"])} + while True: + try: + r = requests.get(url, params=params, timeout=timeout) + except requests.Timeout: + if attempts < 3: + attempts += 1 + timeout *= 2 + else: + self._log.warning( + "timed out requesting data from RabbitMQ" + ) + return () + except Exception: + self._log.exception( + "unexpected error while requesting data from RabbitMQ" + ) + else: + break + + if r.status_code != 200: + r.raise_for_status() + return tuple(q for q in r.json() if q["name"] in names) diff --git a/Products/Jobber/monitor/collector.py b/Products/Jobber/monitor/collector.py new file mode 100644 index 0000000000..fde6df5f61 --- /dev/null +++ b/Products/Jobber/monitor/collector.py @@ -0,0 +1,183 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import absolute_import, print_function + +import logging +import math +import threading +import time + +from itertools import chain + +from Products.ZenUtils.controlplane import configuration as cc_config + +from .logger import getLogger + +# from itertools import izip_longest + +# Metrics +# ------- +# celery..pending.count - Count of queued tasks +# celery..running.count - Count of running tasks +# celery..cycletime.mean - Average runtime of tasks +# celery..leadtime.mean - Average lifetime of tasks +# celery..success.percent - Percentage of successful runs +# celery..failure.percent - Percentage of failed runs +# celery..retry.percent - Percentage of retried runs +# +# Where is "zenjobs" or "builder" and is the +# lower-cased name of the job. + + +class MetricsCollector(threading.Thread): + def __init__(self, broker, inspector, reporter, metrics, interval=60): + super(MetricsCollector, self).__init__() + self._broker = broker + self._inspector = inspector + self._metrics = metrics + self._reporter = reporter + self._interval = interval + self._stopEvent = threading.Event() + self._log = getLogger(self) + + def stop(self): + self._stopEvent.set() + + def run(self): + while not self._stopEvent.is_set(): + self._stopEvent.wait(self._interval) + if not self._stopEvent.is_set(): + try: + self.task() + except Exception: + self._log.exception("error while collecting metrics") + + def task(self): + self._log.debug("begin metric collection") + try: + running_counts = self._inspector.running_counts() + if not running_counts: + self._log.warning("count of running tasks not collected") + services = self._inspector.workers() + if not services: + self._log.warning("no information about workers") + queues = { + str(queue["name"]): queue["messages"] + for queue in self._broker.queues( + [info["queue"] for info in services.values()] + ) + } + if not queues: + self._log.warning("no information about queues") + report = self._metrics.report() + + mgen = _MetricGenerator(services, running_counts, queues, report) + + common_tags = { + "serviceId": cc_config.service_id, + "tenantId": cc_config.tenant_id, + } + with self._reporter.session(tags=common_tags) as session: + for metric in mgen(): + session.add(**metric) + + if self._log.getEffectiveLevel() == logging.DEBUG: + for metric in session.metrics: + self._log.debug(metric) + finally: + self._log.debug("finished metric collection") + + +class _MetricGenerator(object): + def __init__(self, services, running_counts, queues, report): + self._now = time.time() + self._running_counts = running_counts + self._services = services + self._serviceids = { + str(name): str(info["serviceid"]) + for name, info in services.iteritems() + } + self._queues = queues + self._report = report + + def __call__(self): + return chain(self._counts(), self._percents(), self._timings()) + + def _counts(self): + for service, info in self._services.iteritems(): + pending_count = self._queues.get(info["queue"]) + if pending_count is not None: + yield ( + { + "metric": "celery.{}.pending.count".format(service), + "value": pending_count, + "timestamp": self._now, + } + ) + running_count = self._running_counts.get(service) + if running_count is not None: + yield ( + { + "metric": "celery.{}.running.count".format(service), + "value": running_count, + "timestamp": self._now, + } + ) + + def _percents(self): + results = self._report.get("results") + for service, result in results.iteritems(): + success = result["success_percent"] + failure = result["failure_percent"] + retry = result["retry_percent"] + if not math.isnan(success): + yield ( + { + "metric": "celery.{}.success.percent".format(service), + "value": success, + "timestamp": self._now, + } + ) + if not math.isnan(failure): + yield ( + { + "metric": "celery.{}.failure.percent".format(service), + "value": failure, + "timestamp": self._now, + } + ) + if not math.isnan(retry): + yield ( + { + "metric": "celery.{}.retry.percent".format(service), + "value": retry, + "timestamp": self._now, + } + ) + + def _timings(self): + cycletime_services = self._report["cycletime"]["services"] + leadtime_services = self._report["leadtime"]["services"] + for service, cycletimes in cycletime_services.iteritems(): + yield ( + { + "metric": "celery.{}.cycletime.mean".format(service), + "value": cycletimes["mean"], + "timestamp": self._now, + } + ) + leadtimes = leadtime_services.get(service) + yield ( + { + "metric": "celery.{}.leadtime.mean".format(service), + "value": leadtimes["mean"], + "timestamp": self._now, + } + ) diff --git a/Products/Jobber/monitor/command.py b/Products/Jobber/monitor/command.py new file mode 100644 index 0000000000..3e1c27fbb3 --- /dev/null +++ b/Products/Jobber/monitor/command.py @@ -0,0 +1,121 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import absolute_import, print_function + +import Queue +import signal + +from celery.bin.base import Command + +from Products.ZenCollector.configcache.app.args import ZenHelpFormatter +from Products.ZenUtils.config import ConfigLoader +from Products.ZenUtils.Utils import zenPath + +from .broker import Broker +from .collector import MetricsCollector +from .events import EventsMonitor +from .handler import EventsHandler +from .inspector import Inspector +from .logger import configure_logging, getLogger +from .metrics import ZenJobsMetrics +from .reporter import MetricsReporter + + +class MonitorCommand(Command): + # Override create_parser to get a different formatter class. + # @override + def create_parser(self, prog_name, command=None): + # for compatibility with optparse usage. + usage = self.usage(command).replace("%prog", "%(prog)s") + parser = self.Parser( + prog=prog_name, + usage=usage, + epilog=self._format_epilog(self.epilog), + formatter_class=ZenHelpFormatter, + description=self._format_description(self.description), + ) + self._add_version_argument(parser) + self.add_preload_arguments(parser) + self.add_arguments(parser) + self.add_compat_options(parser, self.get_options()) + self.add_compat_options(parser, self.app.user_options["preload"]) + + if self.supports_args: + # for backward compatibility with optparse, we automatically + # add arbitrary positional args. + parser.add_argument(self.args_name, nargs="*") + return self.prepare_parser(parser) + + # @override + def add_arguments(self, parser): + parser.add_argument( + "--conf-file", + default=zenPath("etc", "zenjobs-monitor.conf"), + help="Pathname of configuration file", + ) + + # @override + def run(self, *args, **options): + conf_file = options["conf_file"] + config = ConfigLoader(conf_file)() + metric_interval = config.getint("metric-interval") + log_filename = config.get("log-filename") + log_level = config.get("log-level") + log_max_file_count = config.getint("log-max-file-count") + log_max_file_size = config.getint("log-max-file-size") * 1024 + configure_logging( + level=log_level, + filename=log_filename, + maxcount=log_max_file_count, + maxsize=log_max_file_size, + ) + log = getLogger(self) + try: + eventqueue = Queue.Queue() + reporter = MetricsReporter() + metrics = ZenJobsMetrics() + broker_url = self.app.connection().as_uri(include_password=True) + + broker = Broker(broker_url) + inspector = Inspector(self.app) + + handler = EventsHandler(eventqueue, metrics, self.app) + monitor = EventsMonitor(eventqueue, self.app) + collector = MetricsCollector( + broker, inspector, reporter, metrics, metric_interval + ) + + handler.start() + monitor.start() + collector.start() + + state = {"shutdown": False} + + def _handle_signal(state, signum, frame): + state["shutdown"] = True + + signal.signal( + signal.SIGTERM, lambda sn, fr: _handle_signal(state, sn, fr) + ) + + while True: + try: + signal.pause() + if state["shutdown"]: + break + except (KeyboardInterrupt, SystemExit): + break + except Exception: + log.exception("unexpected error") + finally: + collector.stop() + handler.stop() + handler.join() + collector.join(timeout=1.0) diff --git a/Products/Jobber/monitor/events.py b/Products/Jobber/monitor/events.py new file mode 100644 index 0000000000..89ef656f16 --- /dev/null +++ b/Products/Jobber/monitor/events.py @@ -0,0 +1,53 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import absolute_import, print_function + +import threading +import time + +from celery.events import EventReceiver + +from .logger import getLogger + + +class EventsMonitor(threading.Thread): + + daemon = True # doesn't block shutdown + + def __init__(self, sink, app): + """Initialize an EventsMonitor instance. + + @param sink: Events are written to this object. + @type sink: Queue.Queue + @param app: The Celery application + @type app: celery.Celery + """ + super(EventsMonitor, self).__init__() + self._sink = sink + self._app = app + self._log = getLogger(self) + + def run(self): + try_interval = 1 + while True: + try: + try_interval *= 2 + with self._app.connection() as conn: + recv = EventReceiver( + conn, handlers={"*": self._put}, app=self._app + ) + try_interval = 1 + recv.capture(limit=None, timeout=None, wakeup=True) + except Exception: + self._log.exception("unexpected error") + time.sleep(try_interval) + + def _put(self, event): + self._sink.put(event) diff --git a/Products/Jobber/monitor/handler.py b/Products/Jobber/monitor/handler.py new file mode 100644 index 0000000000..73612e39c8 --- /dev/null +++ b/Products/Jobber/monitor/handler.py @@ -0,0 +1,145 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import absolute_import, print_function + +import Queue +import threading + +from collections import defaultdict + +from celery.events.state import State + +from .logger import getLogger + + +class EventsHandler(threading.Thread): + def __init__(self, source, metrics, app): + """Initialize an EventsHandler instance. + + @param source: Events are read from this object. + @type source: Queue.Queue + @param metrics: + @type metrics: ZenJobsMetrics + @param app: The Celery application + @type app: celery.Celery + """ + super(EventsHandler, self).__init__() + self._source = source + self._metrics = metrics + self._app = app + self._stopEvent = threading.Event() + self._queue_svc_map = {} + self._handlers = { + "worker-online": self._online, + "worker-offline": self._offline, + "task-sent": self._sent, + "task-succeeded": self._succeeded, + "task-retried": self._retried, + "task-failed": self._failed, + } + self._heartbeats = defaultdict(int) + self._log = getLogger(self) + + def run(self): + self._log.info("started handling celery events") + state = State() + while not self._stopEvent.is_set(): + try: + event = self._source.get(True, 0.5) + state.event(event) + + event_type = event["type"] + + handler = self._handlers.get(event_type) + if not handler: + continue + + if event_type.startswith("task-"): + task_id = event["uuid"] + arg = state.tasks.get(task_id) + else: + arg = state.workers.get(event["hostname"]) + + try: + handler(arg) + except Exception: + self._log.exception("event handler failed: %r", handler) + except Queue.Empty: + pass + except Exception: + self._log.exception("unexpected error") + self._log.info("stopped handling celery events") + + def stop(self): + self._stopEvent.set() + + def _get_svc_from_node(self, node): + return node.split("@")[0].split("-")[0] + + def _online(self, worker): + self._log.info("worker online worker=%s", worker.hostname) + + def _offline(self, worker): + self._log.warning("worker offline worker=%s", worker.hostname) + + def _build_queue_svc_mapping(self): + inspect = self._app.control.inspect() + active_queues = inspect.active_queues() + for node, queues in active_queues.items(): + svcname = self._get_svc_from_node(node) + qname = queues[0]["name"] + if qname not in self._queue_svc_map: + self._queue_svc_map[qname] = svcname + + def _get_svc_from_queue(self, qname): + if qname not in self._queue_svc_map: + self._build_queue_svc_mapping() + return self._queue_svc_map.get(qname) + + def _sent(self, task): + if not task.sent: + return + svcid = self._get_svc_from_queue(task.queue) + if svcid is None: + self._log.warning( + "no service for tasks on queue '%s' found", task.queue + ) + else: + with self._metrics as updater: + updater.count_sent(svcid) + + def _succeeded(self, task): + if not task.received or not task.started: + return + svcid = self._get_svc_from_node(task.hostname) + with self._metrics as updater: + updater.mark_success(svcid) + updater.add_task_runtime(svcid, task.name, task.runtime) + _completed(task, svcid, updater) + + def _failed(self, task): + svcid = self._get_svc_from_node(task.hostname) + with self._metrics as updater: + updater.mark_failure(svcid) + _completed(task, svcid, updater) + + def _retried(self, task): + svcid = self._get_svc_from_node(task.hostname) + with self._metrics as updater: + updater.mark_retry(svcid) + _completed(task, svcid, updater) + + +def _completed(task, svcid, metrics): + if not task.sent: + return + leadtime = task.timestamp - task.sent + metrics.count_completed(svcid) + metrics.add_task_leadtime(svcid, task.name, leadtime) diff --git a/Products/Jobber/monitor/inspector.py b/Products/Jobber/monitor/inspector.py new file mode 100644 index 0000000000..50d15391a1 --- /dev/null +++ b/Products/Jobber/monitor/inspector.py @@ -0,0 +1,59 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from collections import defaultdict + +from .logger import getLogger + + +class Inspector(object): + """Just enough API to satisfy collecting metrics from Celery.""" + + def __init__(self, app, timeout=10): + self._app = app + self._timeout = timeout + self._workers = defaultdict(dict) + self._log = getLogger(self) + + def running_counts(self): + inspect = self._app.control.inspect(timeout=self._timeout) + result = inspect.active() + if result is None or "error" in result: + self._log.warning("inspect method 'active' failed: %s", result) + return {} + running = {} + for node, tasks in result.items(): + service = _get_service_from_node(node) + count = running.get(service, 0) + running[service] = count + len(tasks) + return running + + def workers(self): + inspect = self._app.control.inspect(timeout=self._timeout) + result = inspect.active_queues() + if result is None or "error" in result: + self._log.warning( + "inspect method 'active_queues' failed: %s", result + ) + return {} + return { + _get_service_from_node(node): { + "serviceid": _get_serviceid_from_node(node), + "queue": data[0]["name"], + } + for node, data in result.items() + } + + +def _get_serviceid_from_node(node): + return node.split("@")[1] + + +def _get_service_from_node(node): + return node.split("@")[0].split("-")[0] diff --git a/Products/Jobber/monitor/logger.py b/Products/Jobber/monitor/logger.py new file mode 100644 index 0000000000..42f59a767b --- /dev/null +++ b/Products/Jobber/monitor/logger.py @@ -0,0 +1,73 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import absolute_import, print_function + +import copy +import logging +import logging.config + + +def getLogger(obj): + return logging.getLogger( + "zen.zenjobs.monitor.{}".format( + type(obj).__module__.split(".")[-1].lower() + ) + ) + + +def configure_logging(level=None, filename=None, maxcount=None, maxsize=None): + config = copy.deepcopy(_logging_config) + common_handler = config["handlers"]["default"] + common_handler.update( + { + "filename": filename, + "maxBytes": maxsize, + "backupCount": maxcount, + } + ) + config["loggers"]["zen.zenjobs.monitor"]["level"] = level.upper() + logging.config.dictConfig(config) + + +_logging_config = { + "version": 1, + "disable_existing_loggers": True, + "formatters": { + "default": { + "format": ( + "%(asctime)s.%(msecs).0f %(levelname)s %(name)s: %(message)s" + ), + "datefmt": "%Y-%m-%d %H:%M:%S", + }, + }, + "handlers": { + "default": { + "formatter": "default", + "class": "cloghandler.ConcurrentRotatingFileHandler", + "filename": None, + "maxBytes": None, + "backupCount": None, + "mode": "a", + "filters": [], + }, + }, + "loggers": { + "zen": { + "level": "INFO", + "handlers": ["default"], + }, + "zen.zenjobs.monitor": { + "level": "INFO", + }, + }, + "root": { + "handlers": [], + }, +} diff --git a/Products/Jobber/monitor/metrics.py b/Products/Jobber/monitor/metrics.py new file mode 100644 index 0000000000..5d6ba4127f --- /dev/null +++ b/Products/Jobber/monitor/metrics.py @@ -0,0 +1,225 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import absolute_import, print_function + +from collections import defaultdict +from threading import RLock + +from metrology.instruments import HistogramUniform, Meter +from metrology.instruments.gauge import PercentGauge + + +class ZenJobsMetrics(object): + def __init__(self): + self._lock = RLock() # synchronize thread access + self._metrics = _BagOfMetrics() + + def __enter__(self): + self._lock.acquire() + return self._metrics + + def __exit__(self, *exc_info): + self._lock.release() + + def report(self): + with self._lock: + cycletime = _get_timings( + self._metrics.cycletime, + self._metrics.cycletime_task, + self._metrics.cycletime_service, + self._metrics.cycletime_service_task, + ) + leadtime = _get_timings( + self._metrics.leadtime, + self._metrics.leadtime_task, + self._metrics.leadtime_service, + self._metrics.leadtime_service_task, + ) + + results = { + service: { + "success_rate": self._metrics.successes[service].mean_rate, + "success_percent": self._metrics.success_pct[ + service + ].value, + "retry_rate": self._metrics.retries[service].mean_rate, + "retry_percent": self._metrics.retry_pct[service].value, + "failure_rate": self._metrics.failures[service].mean_rate, + "failure_percent": self._metrics.failure_pct[ + service + ].value, + } + for service in self._metrics.services + } + + return { + "cycletime": cycletime, + "leadtime": leadtime, + "results": results, + } + + +class _BagOfMetrics(object): + def __init__(self): + # cache of service IDs + self.services = set() + + # Task runtimes; + # {service-id: {task-name: histogram}} + self.cycletime_service_task = {} + # {task-name: histogram} + self.cycletime_task = {} + # {service-id: histogram} + self.cycletime_service = {} + # All tasks on all services + self.cycletime = HistogramUniform() + + # Total lifetime of tasks; + # {service-id: {task-name: histogram}} + self.leadtime_service_task = {} + # {task-name: histogram} + self.leadtime_task = {} + # {service-id: histogram} + self.leadtime_service = {} + # All tasks on all services + self.leadtime = HistogramUniform() + + # Task run rates + # {service-id: meter} + self.failures = defaultdict(Meter) + self.retries = defaultdict(Meter) + self.successes = defaultdict(Meter) + self.completed = defaultdict(Meter) + + # Percentages by service + # {service-id: PercentGauge} + self.success_pct = PercentMetricsGroup(self.successes, self.completed) + self.failure_pct = PercentMetricsGroup(self.failures, self.completed) + self.retry_pct = PercentMetricsGroup(self.retries, self.completed) + + def add_task_runtime(self, service, task, runtime): + millisecs = int(runtime * 1000) + + if service not in self.cycletime_service_task: + self.cycletime_service_task[service] = {} + if task not in self.cycletime_service_task[service]: + self.cycletime_service_task[service][task] = HistogramUniform() + self.cycletime_service_task[service][task].update(millisecs) + + if task not in self.cycletime_task: + self.cycletime_task[task] = HistogramUniform() + self.cycletime_task[task].update(millisecs) + + if service not in self.cycletime_service: + self.cycletime_service[service] = HistogramUniform() + self.cycletime_service[service].update(millisecs) + + self.cycletime.update(millisecs) + self.services.add(service) + + def add_task_leadtime(self, service, task, leadtime): + millisecs = int(leadtime * 1000) + + if service not in self.leadtime_service_task: + self.leadtime_service_task[service] = {} + if task not in self.leadtime_service_task[service]: + self.leadtime_service_task[service][task] = HistogramUniform() + self.leadtime_service_task[service][task].update(millisecs) + + if task not in self.leadtime_task: + self.leadtime_task[task] = HistogramUniform() + self.leadtime_task[task].update(millisecs) + + if service not in self.leadtime_service: + self.leadtime_service[service] = HistogramUniform() + self.leadtime_service[service].update(millisecs) + + self.leadtime.update(millisecs) + self.services.add(service) + + def count_sent(self, service): + self.services.add(service) + + def count_completed(self, service): + self.completed[service].mark() + self.services.add(service) + + def mark_success(self, service): + self.successes[service].mark() + self.services.add(service) + + def mark_retry(self, service): + self.retries[service].mark() + self.services.add(service) + + def mark_failure(self, service): + self.failures[service].mark() + self.services.add(service) + + +def _get_timings(total, bytask, byservice, byservicetask): + return { + "min": total.min, + "mean": total.mean, + "max": total.max, + "tasks": { + task: {"min": metric.min, "mean": metric.mean, "max": metric.max} + for task, metric in bytask.iteritems() + }, + "services": { + service: { + "min": metric.min, + "mean": metric.mean, + "max": metric.max, + "tasks": { + task: { + "min": metric.min, + "mean": metric.mean, + "max": metric.max, + } + for task, metric in byservicetask.get( + service, {} + ).iteritems() + }, + } + for service, metric in byservice.iteritems() + }, + } + + +class PercentMetricsGroup(object): + def __init__(self, numerators, denominators): + self._nums = numerators + self._dens = denominators + self._metrics = {} + + def get(self, name, default=None): + metric = self._metrics.get(name) + if metric is None: + metric = _TwoCountersGauge(self._nums[name], self._dens[name]) + self._metrics[name] = metric + return metric + + def __getitem__(self, key): + return self.get(key) + + +class _TwoCountersGauge(PercentGauge): + def __init__(self, numerator, denominator): + self._num = numerator + self._den = denominator + + # @override + def numerator(self): + return self._num.count + + # @override + def denominator(self): + return self._den.count diff --git a/Products/Jobber/monitor/reporter.py b/Products/Jobber/monitor/reporter.py new file mode 100644 index 0000000000..68e97336de --- /dev/null +++ b/Products/Jobber/monitor/reporter.py @@ -0,0 +1,100 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +import contextlib +import json + +import attr +import requests + +from attr.validators import instance_of, deep_mapping + +from Products.ZenUtils.controlplane import configuration as cc_config +from Products.ZenUtils.MetricReporter import DEFAULT_METRIC_URL + +from .logger import getLogger + + +class MetricsReporter(object): + def __init__(self, url=None, prefix=""): + if not url: + url = cc_config.consumer_url + if not url: + url = DEFAULT_METRIC_URL + self._url = url + self._log = getLogger(self) + + @contextlib.contextmanager + def session(self, tags=None): + session = _Session(tags if tags is not None else {}) + try: + yield session + except Exception: + self._log.exception("metrics reporting session failed") + else: + self._post(session.metrics) + + def _post(self, metrics): + if not metrics: + return + session = requests.Session() + session.headers.update( + { + "Content-Type": "application/json", + "User-Agent": "Zenoss Service Metrics", + } + ) + body = {"metrics": [attr.asdict(sample) for sample in metrics]} + self._log.debug("sending metric payload: %s", body) + response = session.post(self._url, data=json.dumps(body)) + if response.status_code != 200: + self._log.warning( + "problem submitting metrics: %s, %s", + response.status_code, + response.text.replace("\n", "\\n"), + ) + else: + self._log.debug("%s metrics posted", len(metrics)) + + def build_metric(self, **kw): + return Metric(**kw) + + +class _Session(object): + def __init__(self, tags): + self._tags = tags + self.metrics = [] + + def add(self, metric, value, timestamp, tags=None): + tags = tags if tags is not None else {} + tags.update(self._tags) + self.metrics.append( + Metric(metric=metric, value=value, timestamp=timestamp, tags=tags) + ) + + +@attr.s(frozen=True, slots=True) +class Metric(object): + metric = attr.ib(converter=str) + value = attr.ib(converter=float) + timestamp = attr.ib(validator=instance_of(float)) + tags = attr.ib( + validator=deep_mapping( + key_validator=instance_of(str), + value_validator=instance_of(str), + mapping_validator=instance_of(dict), + ) + ) + + @tags.validator + def _verify_keys(self, attribute, value): + if "serviceId" not in value: + raise KeyError("Missing 'serviceId' tag") + if "tenantId" not in value: + raise KeyError("Missing 'tenantId' tag") diff --git a/Products/Jobber/storage.py b/Products/Jobber/storage.py index e620cf5a37..aeeb03df43 100644 --- a/Products/Jobber/storage.py +++ b/Products/Jobber/storage.py @@ -148,6 +148,7 @@ def __init__(self, client, expires=None): """ self.__client = client self.__expires = expires + self.__scan_count = 1000 def search(self, **fields): """Return the job IDs for jobs matching the search criteria. @@ -192,7 +193,9 @@ def get_fields(key): return ( self.__client.hget(key, "jobid") - for key in self.__client.scan_iter(match=_keypattern) + for key in self.__client.scan_iter( + match=_keypattern, count=self.__scan_count + ) if matchers == dict(zip(field_names, get_fields(key))) ) @@ -261,7 +264,9 @@ def keys(self): """ return ( self.__client.hget(key, "jobid") - for key in self.__client.scan_iter(match=_keypattern) + for key in self.__client.scan_iter( + match=_keypattern, count=self.__scan_count + ) ) def values(self): @@ -269,7 +274,7 @@ def values(self): :rtype: Iterator[Dict[str, Union[str, float]]] """ - items = _iteritems(self.__client) + items = _iteritems(self.__client, self.__scan_count) return ( {k: Fields[k].loads(v) for k, v in fields.iteritems()} for _, fields in items @@ -280,7 +285,7 @@ def items(self): :rtype: Iterator[Tuple[str, Dict[str, Union[str, float]]]] """ - items = _iteritems(self.__client) + items = _iteritems(self.__client, self.__scan_count) return ( ( fields["jobid"], @@ -392,7 +397,12 @@ def __contains__(self, jobid): return self.__client.exists(_key(jobid)) def __len__(self): - return sum(1 for _ in self.__client.scan_iter(match=_keypattern)) + return sum( + 1 + for _ in self.__client.scan_iter( + match=_keypattern, count=self.__scan_count + ) + ) def __iter__(self): """Return an iterator producing all the job IDs in the datastore. @@ -416,12 +426,12 @@ def _key(jobid): return _keytemplate.format(jobid) -def _iteritems(client): +def _iteritems(client, count): """Return an iterable of (redis key, job data) pairs. Only (key, data) pairs where data is not None are returned. """ - keys = client.scan_iter(match=_keypattern) + keys = client.scan_iter(match=_keypattern, count=count) raw = ((key, client.hgetall(key)) for key in keys) return ((key, data) for key, data in raw if data) diff --git a/Products/Jobber/utils/log.py b/Products/Jobber/utils/log.py index 4a163bc55b..d0d4b50768 100644 --- a/Products/Jobber/utils/log.py +++ b/Products/Jobber/utils/log.py @@ -11,13 +11,13 @@ import inspect import logging -import logging.config -import logging.handlers import os import sys from functools import wraps +import six + from celery._state import get_current_task from celery.utils.log import ( LoggingProxy as _LoggingProxy, @@ -236,7 +236,7 @@ def __init__(self, log=None, adapter=None, aschild=True): if not isinstance(baselog, logging.getLoggerClass()): raise TypeError("'log' callable does produce a logger") self.baselog = baselog - elif isinstance(log, basestring): + elif isinstance(log, six.string_types): self.baselog = logging.getLogger(log) else: raise TypeError( diff --git a/Products/ZenCollector/configcache/app/base.py b/Products/ZenCollector/configcache/app/base.py index c1854befc0..507df9d048 100644 --- a/Products/ZenCollector/configcache/app/base.py +++ b/Products/ZenCollector/configcache/app/base.py @@ -18,11 +18,12 @@ from MySQLdb import OperationalError +from ..utils import MetricReporter + from .config import add_config_arguments, getConfigFromArguments from .init import initialize_environment from .genconf import GenerateConfig from .logger import add_logging_arguments, setup_logging, setup_debug_logging -from .metrics import MetricManager from .pid import add_pidfile_arguments, pidfile from .zodb import add_zodb_arguments, zodb @@ -38,6 +39,7 @@ def from_args(cls, args): return cls(config, args.task) def __init__(self, config, task): + # config data from config files and CLI args self.config = config self.task = task @@ -57,12 +59,12 @@ def run(self): log.info("application has started") try: # Setup Metric Reporting - metric_manager = MetricManager( - daemon_tags={ - "zenoss_daemon": "configcache", - "internal": True, - } + prefix = getattr(self.task, "metric_prefix", "") + metric_reporter = MetricReporter( + tags={"internal": True}, prefix=prefix ) + + # Run the application loop while not controller.shutdown: try: with zodb(self.config) as (db, session, dmd): @@ -71,7 +73,7 @@ def run(self): db, session, dmd, - metric_manager, + metric_reporter, ) self.task(self.config, ctx).run() except OperationalError as oe: @@ -109,7 +111,7 @@ class ApplicationContext(object): db = attr.ib() session = attr.ib() dmd = attr.ib() - metrics = attr.ib() + metric_reporter = attr.ib() class _Controller(object): diff --git a/Products/ZenCollector/configcache/app/metrics.py b/Products/ZenCollector/configcache/app/metrics.py deleted file mode 100644 index 1fc4d0cfb3..0000000000 --- a/Products/ZenCollector/configcache/app/metrics.py +++ /dev/null @@ -1,81 +0,0 @@ -############################################################################## -# -# Copyright (C) Zenoss, Inc. 2023, all rights reserved. -# -# This content is made available according to terms specified in -# License.zenoss under the directory where your Zenoss product is installed. -# -############################################################################## - -from __future__ import absolute_import - -import os - -from Products.ZenUtils.MetricReporter import MetricReporter -from Products.ZenUtils.metricwriter import ( - AggregateMetricWriter, - FilteredMetricWriter, - MetricWriter, -) - -from Products.ZenHub.metricpublisher.publisher import ( - RedisListPublisher, - HttpPostPublisher, -) - - -class MetricManager(object): - """General interface for storing and reporting metrics - metric publisher: publishes metrics to an external system (redis, http) - metric writer: drives metric pulisher(s), calling their .put method - metric reporter: once its .start method is called, - periodically calls writer.write_metric, to publish stored metrics - """ - - def __init__(self, daemon_tags): - self.daemon_tags = daemon_tags - self._metric_writer = None - self._metric_reporter = None - - def start(self): - self.metricreporter.start() - - def stop(self): - self.metricreporter.stop() - - @property - def metricreporter(self): - if not self._metric_reporter: - self._metric_reporter = MetricReporter( - metricWriter=self.metric_writer, tags=self.daemon_tags - ) - - return self._metric_reporter - - @property - def metric_writer(self): - if not self._metric_writer: - self._metric_writer = _cc_metric_writer_factory() - - return self._metric_writer - - -def _cc_metric_writer_factory(): - metric_writer = MetricWriter(RedisListPublisher()) - cc = os.environ.get("CONTROLPLANE", "0") == "1" - internal_url = os.environ.get("CONTROLPLANE_CONSUMER_URL", None) - if cc and internal_url: - username = os.environ.get("CONTROLPLANE_CONSUMER_USERNAME", "") - password = os.environ.get("CONTROLPLANE_CONSUMER_PASSWORD", "") - _publisher = HttpPostPublisher(username, password, internal_url) - internal_metric_writer = FilteredMetricWriter( - _publisher, _internal_metric_filter - ) - metric_writer = AggregateMetricWriter( - [metric_writer, internal_metric_writer] - ) - return metric_writer - - -def _internal_metric_filter(metric, value, timestamp, tags): - return tags and tags.get("internal", False) diff --git a/Products/ZenCollector/configcache/cache/storage.py b/Products/ZenCollector/configcache/cache/storage.py index 29e1bb3302..9a2db4fa9d 100644 --- a/Products/ZenCollector/configcache/cache/storage.py +++ b/Products/ZenCollector/configcache/cache/storage.py @@ -9,13 +9,13 @@ # Key structure # ============= -# modelchange:device:uid: -# modelchange:device:config::: -# modelchange:device:age:: [(, ), ...] -# modelchange:device:retired:: [(, ), ...] -# modelchange:device:expired:: [(, ), ...] -# modelchange:device:pending:: [(, ), ...] -# modelchange:device:building:: [(, ), ...] +# configcache:device:uid: +# configcache:device:config::: +# configcache:device:age:: [(, ), ...] +# configcache:device:retired:: [(, ), ...] +# configcache:device:expired:: [(, ), ...] +# configcache:device:pending:: [(, ), ...] +# configcache:device:building:: [(, ), ...] # # While "device" seems redundant, other values in this position could be # "threshold" and "property". @@ -62,7 +62,6 @@ import ast import json import logging -import operator import re from functools import partial @@ -137,7 +136,18 @@ def __contains__(self, key): self.__client, key.service, key.monitor, key.device ) - def search(self, query=CacheQuery()): + def __iter__(self): + """ + Returns an iterable over the known keys. + + @rtype: Iterator[CacheKey] + """ + return iter( + CacheKey(service, monitor, device) + for service, monitor, device in self.__config.scan(self.__client) + ) + + def search(self, query=None): """ Returns the configuration keys matching the search criteria. @@ -146,6 +156,8 @@ def search(self, query=CacheQuery()): @raises TypeError: Unsupported value given for a field @raises AttributeError: Unknown field """ + if query is None: + query = CacheQuery() if not isinstance(query, CacheQuery): raise TypeError("'{!r} is not a CacheQuery".format(query)) return self._query(**attr.asdict(query)) @@ -206,6 +218,7 @@ def get_updated(self, key): Return the timestamp of when the config was built. @type key: CacheKey + @rtype: float """ return _to_ts( self.__age.score( @@ -213,6 +226,25 @@ def get_updated(self, key): ) ) + def query_updated(self, query=None): + """ + Return the last update timestamp of every configuration selected + by the query. + + @type query: CacheQuery + @rtype: Iterable[Tuple[CacheKey, float]] + """ + if query is None: + query = CacheQuery() + predicate = self._get_device_predicate(query.device) + return ( + (key, ts) + for key, ts in self._get_metadata( + self.__age, query.service, query.monitor + ) + if predicate(key.device) + ) + def get(self, key, default=None): """ @type key: CacheKey @@ -244,13 +276,13 @@ def remove(self, *keys): self._delete_statuses(pipe, svc, mon, dvc) pipe.execute() - devices = set(key.device for key in keys) - remaining = set( + devices = {key.device for key in keys} + remaining = { key.device for key in chain.from_iterable( self._query(device=dvc) for dvc in devices ) - ) + } deleted = devices - remaining if deleted: self.__uids.delete(self.__client, *deleted) @@ -272,11 +304,18 @@ def clear_status(self, *keys): @type keys: Sequence[CacheKey] """ - with self.__client.pipeline() as pipe: + if len(keys) == 0: + return + + def clear_impl(pipe): + pipe.multi() for key in keys: - svc, mon, dvc = key.service, key.monitor, key.device - self._delete_statuses(pipe, svc, mon, dvc) - pipe.execute() + self._delete_statuses( + pipe, key.service, key.monitor, key.device + ) + + watch_keys = self._get_watch_keys(keys) + self.__client.transaction(clear_impl, *watch_keys) def _delete_statuses(self, pipe, svc, mon, dvc): self.__retired.delete(pipe, svc, mon, dvc) @@ -300,7 +339,7 @@ def _impl(rows, pipe): self.__pending.delete(pipe, svc, mon, dvc) self.__building.delete(pipe, svc, mon, dvc) - self._set_status(pairs, self.__retired, _impl) + self._set_status(pairs, _impl) def set_expired(self, *pairs): """ @@ -318,7 +357,7 @@ def _impl(rows, pipe): self.__pending.delete(pipe, svc, mon, dvc) self.__building.delete(pipe, svc, mon, dvc) - self._set_status(pairs, self.__expired, _impl) + self._set_status(pairs, _impl) def set_pending(self, *pairs): """ @@ -336,7 +375,7 @@ def _impl(rows, pipe): self.__pending.add(pipe, svc, mon, dvc, score) self.__building.delete(pipe, svc, mon, dvc) - self._set_status(pairs, self.__pending, _impl) + self._set_status(pairs, _impl) def set_building(self, *pairs): """ @@ -354,14 +393,14 @@ def _impl(rows, pipe): self.__pending.delete(pipe, svc, mon, dvc) self.__building.add(pipe, svc, mon, dvc, score) - self._set_status(pairs, self.__building, _impl) + self._set_status(pairs, _impl) - def _set_status(self, pairs, table, fn): + def _set_status(self, pairs, fn): if len(pairs) == 0: return watch_keys = self._get_watch_keys(key for key, _ in pairs) - rows = ( + rows = tuple( (key.service, key.monitor, key.device, ts) for key, ts in pairs ) @@ -382,27 +421,28 @@ def _get_watch_keys(self, keys): ) ) - def get_status(self, *keys): + def get_status(self, key): """ - Returns an iterable of ConfigStatus objects. + Returns the current status of the config identified by `key`. - @rtype: Iterable[ConfigStatus] + @type key: CacheKey + @rtype: ConfigStatus | None """ - for key in keys: - scores = self._get_scores(key) - uid = self.__uids.get(self.__client, key.device) - status = self._get_status_from_scores(scores, key, uid) - if status is not None: - yield status + scores = self._get_scores(key) + if not any(scores): + return None + uid = self.__uids.get(self.__client, key.device) + return self._get_status_from_scores(scores, key, uid) - def get_statuses(self, query=CacheQuery()): + def query_statuses(self, query=None): """ Return all status objects matching the query. @type query: CacheQuery @rtype: Iterable[ConfigStatus] """ - statuses = [] + if query is None: + query = CacheQuery() keys = set() uids = {} tables = ( @@ -411,22 +451,7 @@ def get_statuses(self, query=CacheQuery()): (self.__pending, ConfigStatus.Pending), (self.__building, ConfigStatus.Building), ) - - def accept_all(_): - return True - - def filter_regex(regex, value): - m = regex.match(value) - return m is not None - - if query.device == "*": - predicate = accept_all - elif "*" in query.device: - expr = query.device.replace("*", ".*") - regex = re.compile(expr) - predicate = partial(filter_regex, regex) - else: - predicate = partial(operator.eq, query.device) + predicate = self._get_device_predicate(query.device) for table, cls in tables: for key, ts in self._get_metadata( @@ -435,7 +460,7 @@ def filter_regex(regex, value): if predicate(key.device): keys.add(key) uid = self._get_uid(uids, key.device) - statuses.append(cls(key, uid, ts)) + yield cls(key, uid, ts) for key, ts in self._get_metadata( self.__age, query.service, query.monitor ): @@ -445,8 +470,18 @@ def filter_regex(regex, value): continue if predicate(key.device): uid = self._get_uid(uids, key.device) - statuses.append(ConfigStatus.Current(key, uid, ts)) - return statuses + yield ConfigStatus.Current(key, uid, ts) + + def _get_device_predicate(self, spec): + + if spec == "*": + return lambda _: True + elif "*" in spec: + expr = spec.replace("*", ".*") + regex = re.compile(expr) + return lambda value: regex.match(value) is not None + else: + return lambda value: value == spec def _get_uid(self, uids, device): uid = uids.get(device) diff --git a/Products/ZenCollector/configcache/cli/__init__.py b/Products/ZenCollector/configcache/cli/__init__.py index fb026aaa52..f7409ce684 100644 --- a/Products/ZenCollector/configcache/cli/__init__.py +++ b/Products/ZenCollector/configcache/cli/__init__.py @@ -14,6 +14,7 @@ from .list import List_ from .remove import Remove from .show import Show +from .stats import Stats -__all__ = ("Expire", "List_", "Remove", "Show") +__all__ = ("Expire", "List_", "Remove", "Show", "Stats") diff --git a/Products/ZenCollector/configcache/cli/_groups.py b/Products/ZenCollector/configcache/cli/_groups.py new file mode 100644 index 0000000000..7796a45db3 --- /dev/null +++ b/Products/ZenCollector/configcache/cli/_groups.py @@ -0,0 +1,231 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import print_function, absolute_import, division + +from collections import defaultdict +from itertools import chain + +import attr + +from ._stats import UniqueCountStat + + +class DeviceGroup(object): + + name = "devices" + order = 1 + + def __init__(self, stats): + # Only one row, so use summary + self._summary = tuple(s() for s in stats) + try: + # DeviceGroup doesn't want CountStat + posn = stats.index(UniqueCountStat) + except ValueError: + # Not found, so don't worry about it + self._counter = None + self._otherstats = self._summary + else: + # Found, replace it with UniqueCountStat + self._counter = self._summary[posn] + self._otherstats = self._summary[0:posn] + self._summary[posn+1:] + self._stats = stats + self._samples = 0 + + def handle_key(self, key): + if self._counter is None: + return + self._counter.mark(key.device) + self._samples += 1 + + def handle_timestamp(self, key, ts): + for stat in self._otherstats: + stat.mark(ts) + self._samples += 1 + + def handle_status(self, status): + pass + + def headings(self): + return [s.name for s in self._stats] + + def hints(self): + return [s.type_ for s in self._stats] + + def rows(self): + return [] + + def summary(self): + if self._samples == 0: + return [] + return list(s.value() for s in self._summary) + + +class ServiceGroup(object): + + name = "services" + order = 2 + + def __init__(self, stats): + self._stats = stats + self._byrow = defaultdict(self._makerowvalue) + self._summary = tuple(s() for s in stats) + self._samples = 0 + + def _makerowvalue(self): + return tuple(stat() for stat in self._stats) + + def handle_key(self, key): + pass + + def handle_timestamp(self, key, ts): + for stat in self._byrow[key.service]: + stat.mark(ts) + for stat in self._summary: + stat.mark(ts) + self._samples += 1 + + def handle_status(self, status): + pass + + def headings(self): + headings = ["configuration service class"] + headings.extend(s.name for s in self._stats) + return headings + + def hints(self): + hints = ["str"] + hints.extend(s.type_ for s in self._stats) + return hints + + def rows(self): + if self._samples == 0: + return [] + return ( + self._makerow(svcname, stats) + for svcname, stats in self._byrow.iteritems() + ) + + def _makerow(self, svcname, stats): + return tuple(chain((svcname,), (s.value() for s in stats))) + + def summary(self): + if self._samples == 0: + return [] + return list(s.value() for s in self._summary) + + +class MonitorGroup(object): + + name = "monitors" + order = 3 + + def __init__(self, stats): + self._stats = stats + self._byrow = defaultdict(self._makerowvalue) + self._summary = tuple(s() for s in stats) + self._samples = 0 + + def _makerowvalue(self): + return tuple(stat() for stat in self._stats) + + def handle_key(self, key): + pass + + def handle_timestamp(self, key, ts): + for stat in self._byrow[key.monitor]: + stat.mark(ts) + for stat in self._summary: + stat.mark(ts) + self._samples += 1 + + def handle_status(self, status): + pass + + def headings(self): + headings = ["collector"] + headings.extend(s.name for s in self._stats) + return headings + + def hints(self): + hints = ["str"] + hints.extend(s.type_ for s in self._stats) + return hints + + def rows(self): + if self._samples == 0: + return [] + return ( + self._makerow(name, stats) + for name, stats in self._byrow.iteritems() + ) + + def _makerow(self, name, stats): + return tuple(chain((name,), (s.value() for s in stats))) + + def summary(self): + if self._samples == 0: + return [] + return list(s.value() for s in self._summary) + + +class StatusGroup(object): + + name = "statuses" + order = 4 + + def __init__(self, stats): + self._stats = stats + self._byrow = defaultdict(self._makerowvalue) + self._summary = tuple(s() for s in stats) + self._samples = 0 + + def _makerowvalue(self): + return tuple(stat() for stat in self._stats) + + def handle_key(self, key): + pass + + def handle_timestamp(self, key, ts): + pass + + def handle_status(self, status): + data = attr.astuple(status) + for stat in self._byrow[type(status).__name__]: + stat.mark(data[-1]) + for stat in self._summary: + stat.mark(data[-1]) + self._samples += 1 + + def headings(self): + headings = ["status"] + headings.extend(s.name for s in self._stats) + return headings + + def hints(self): + hints = ["str"] + hints.extend(s.type_ for s in self._stats) + return hints + + def rows(self): + if self._samples == 0: + return [] + return ( + self._makerow(name, stats) + for name, stats in self._byrow.iteritems() + ) + + def _makerow(self, name, stats): + return tuple(chain((name,), (s.value() for s in stats))) + + def summary(self): + if self._samples == 0: + return [] + return list(s.value() for s in self._summary) diff --git a/Products/ZenCollector/configcache/cli/_json.py b/Products/ZenCollector/configcache/cli/_json.py new file mode 100644 index 0000000000..01c9f0404e --- /dev/null +++ b/Products/ZenCollector/configcache/cli/_json.py @@ -0,0 +1,83 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import print_function, absolute_import, division + +import json + + +class JSONOutput(object): + """ + { + "devices": [ + "summary" : { + "number_of_devices": 4, + ... + } + ], + "services": { + "data": [ + {: , ... }, ... + ], + "summary": { + : , # except first column + ... + } + }, + "monitors": { + "data": [ + {: , ... }, ... + ], + "summary": { + : , # except first column + ... + } + }, + "statuses": { + "data": [ + {: , ... }, ... + ], + "summary": { + : , # except first column + ... + } + } + } + """ + + def write(self, *groups): + result = {} + for group in groups: + rows = list(group.rows()) + summary = group.summary() + headings = [ + hdr.replace(" ", "_").lower() for hdr in group.headings() + ] + + if len(rows) == 0 and len(summary) == 0: + continue + + if len(headings) == 1 and len(rows) == 1: + result[group.name] = [ + {headings[0].replace(" ", "_").lower(): rows[0][0]} + ] + continue + + rows = [ + {hdr: value for hdr, value in zip(headings, row)} + for row in rows + ] + if len(rows) == 0: + summary = {hdr: value for hdr, value in zip(headings, summary)} + else: + summary = { + hdr: value for hdr, value in zip(headings[1:], summary) + } + result[group.name] = {"data": rows, "summary": summary} + print(json.dumps(result)) diff --git a/Products/ZenCollector/configcache/cli/_stats.py b/Products/ZenCollector/configcache/cli/_stats.py new file mode 100644 index 0000000000..e2f0f2aa52 --- /dev/null +++ b/Products/ZenCollector/configcache/cli/_stats.py @@ -0,0 +1,181 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import print_function, absolute_import, division + +import sys +import time + +_current_time = None + + +def _current_time_unset(): + global _current_time + _current_time = time.time() + try: + return _current_time + finally: + global _get_current_time + _get_current_time = _current_time_set + + +def _current_time_set(): + global _current_time + return _current_time + + +_get_current_time = _current_time_unset + + +class CountStat(object): + + name = "count" + type_ = "int" + + def __init__(self): + self._count = 0 + + def mark(self, *args): + self._count += 1 + + def value(self): + return self._count + + +class UniqueCountStat(CountStat): + + name = "count of devices" + + def __init__(self): + self._values = set() + + def mark(self, value): + self._values.add(value) + + def value(self): + return len(self._values) + + +class AverageStat(object): + + name = "average" + type_ = "timedelta" + + def __init__(self): + self._total = 0 + self._count = 0 + + def mark(self, value): + self._count += 1 + self._total += value + + def value(self): + if self._count == 0: + return 0 + return self._total / self._count + + +class AverageAgeStat(AverageStat): + + name = "average age" + + def value(self): + avg = super(AverageAgeStat, self).value() + if avg == 0: + return 0 + return _get_current_time() - avg + + +class MedianStat(object): + + name = "median" + type_ = "timedelta" + + def __init__(self): + self._min = sys.maxsize + self._max = 0 + + def mark(self, value): + value = int(value) + self._min = min(self._min, value) + self._max = max(self._max, value) + + def value(self): + if self._min == sys.maxsize: + return 0 + return (self._min + self._max) / 2 + + +class MedianAgeStat(MedianStat): + + name = "median age" + + def value(self): + median = super(MedianAgeStat, self).value() + if median == 0: + return 0 + return _get_current_time() - median + + +class MinStat(object): + + name = "min" + type_ = "float" + + def __init__(self): + self._min = sys.maxsize + + def mark(self, value): + self._min = min(self._min, int(value)) + + def value(self): + if self._min == sys.maxsize: + return 0 + return self._min + + +class MaxAgeStat(MinStat): + + name = "max age" + type_ = "timedelta" + + def value(self): + maxv = super(MaxAgeStat, self).value() + if maxv == 0: + return 0 + return _get_current_time() - maxv + + +class MaxStat(object): + + name = "max" + type_ = "float" + + def __init__(self): + self._max = 0 + + def mark(self, value): + self._max = max(self._max, int(value)) + + def value(self): + if self._max == 0: + return 0 + return self._max + + +class MinAgeStat(MaxStat): + + name = "min age" + type_ = "timedelta" + + def value(self): + minv = super(MinAgeStat, self).value() + if minv == 0: + return 0 + return _get_current_time() - minv diff --git a/Products/ZenCollector/configcache/cli/_tables.py b/Products/ZenCollector/configcache/cli/_tables.py new file mode 100644 index 0000000000..83675fd7a5 --- /dev/null +++ b/Products/ZenCollector/configcache/cli/_tables.py @@ -0,0 +1,124 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import print_function, absolute_import, division + +import datetime + +from itertools import chain + + +class TablesOutput(object): + + def write(self, *groups): + for group in groups: + self._display( + list(group.rows()), + group.summary(), + group.headings(), + group.hints(), + ) + + def _display(self, rows, summary, headings, hints): + if not rows and not summary: + return + + # Transform row values for presentation + if rows: + rows = [ + tuple(_xform(value, hint) for value, hint in zip(row, hints)) + for row in sorted(rows, key=lambda x: x[0]) + ] + + # Transform total values for presentation + if summary: + if rows: + summary = tuple( + _xform(v, h) for v, h in zip([""] + summary, hints) + ) + else: + summary = tuple( + _xform(v, h) for v, h in zip(summary, hints) + ) + + # Transform column headers for presentation + if summary and not rows: + headings = [hdr.capitalize() for hdr in headings] + else: + headings = [hdr.upper() for hdr in headings] + + # Initialize maxwidth values for each column + maxwidths = [0 for _ in headings] + + if summary and not rows: + hdrmaxw = max(len(hdr) for hdr in headings) + maxwidths = [hdrmaxw] * len(headings) + else: + for row in rows: + for idx, (mw, col) in enumerate(zip(maxwidths, row)): + maxwidths[idx] = max(mw, len(str(col))) + for idx, (mw, hd) in enumerate(zip(maxwidths, headings)): + maxwidths[idx] = max(mw, len(hd)) + for idx, (mw, tv) in enumerate(zip(maxwidths[1:], summary)): + maxwidths[idx + 1] = max(mw, len(str(tv))) + + offset = len(maxwidths) + tmplt = " ".join( + "{{{0}:{{{1}}}}}".format(idx, idx + offset) + for idx in range(0, offset) + ) + fmtspecs = [ + _get_fmt_spec(mw, hint) for mw, hint in zip(maxwidths, hints) + ] + print() + if summary and not rows: + for hdr, value in zip(headings, summary): + print("{0:{2}}: {1}".format(hdr, value, maxwidths[0])) + else: + if headings: + print(tmplt.format(*chain(headings, fmtspecs))) + sep = ["-" * c for c in maxwidths] + print(tmplt.format(*chain(sep, maxwidths))) + + for row in rows: + print(tmplt.format(*chain(row, fmtspecs))) + + if summary: + print(tmplt.format(*chain(sep, maxwidths))) + print(tmplt.format(*chain(summary, fmtspecs))) + + +def _xform(value, hint): + if hint == "timedelta": + td = datetime.timedelta(seconds=value) + hours = td.seconds // 3600 + minutes = (td.seconds - (hours * 3600)) // 60 + seconds = td.seconds - (hours * 3600) - (minutes * 60) + return "{0} {1:02}:{2:02}:{3:02}".format( + ( + "" + if td.days == 0 + else "{} day{}".format(td.days, "" if td.days == 1 else "s") + ), + hours, + minutes, + seconds, + ).strip() + else: + return value + + +def _get_fmt_spec(mw, hint): + if hint == "int": + return ">{}".format(mw) + elif hint == "timedelta": + return ">{}".format(mw) + elif hint == "float": + return ">{}.2f".format(mw) + return mw diff --git a/Products/ZenCollector/configcache/cli/expire.py b/Products/ZenCollector/configcache/cli/expire.py index 5335d85f45..918d3f3ee5 100644 --- a/Products/ZenCollector/configcache/cli/expire.py +++ b/Products/ZenCollector/configcache/cli/expire.py @@ -65,7 +65,7 @@ def run(self): client = getRedisClient(url=getRedisUrl()) store = createObject("configcache-store", client) query = CacheQuery(service=self._service, monitor=self._monitor) - results = store.get_status(*store.search(query)) + results = store.query_statuses(query) method = self._no_devices if not self._devices else self._with_devices keys = method(results, wildcard=haswildcard) now = time.time() diff --git a/Products/ZenCollector/configcache/cli/list.py b/Products/ZenCollector/configcache/cli/list.py index d58d6608e4..f4dc572231 100644 --- a/Products/ZenCollector/configcache/cli/list.py +++ b/Products/ZenCollector/configcache/cli/list.py @@ -11,8 +11,12 @@ import argparse import sys +import time -from datetime import datetime +from datetime import datetime, timedelta +from itertools import chain + +import attr from zope.component import createObject @@ -96,19 +100,18 @@ def run(self): ) else: query = CacheQuery(service=self._service, monitor=self._monitor) - data = store.get_statuses(query) + data = store.query_statuses(query) if self._states: data = ( status for status in data if isinstance(status, self._states) ) if len(self._devices) > 1: data = ( - status - for status in data - if status.key.device in self._devices + status for status in data if status.key.device in self._devices ) rows = [] - maxd, maxs, maxm = 1, 1, 1 + maxd, maxs, maxt, maxa, maxm = 1, 1, 1, 1, 1 + now = time.time() for status in sorted( data, key=lambda x: (x.key.device, x.key.service) ): @@ -117,36 +120,39 @@ def run(self): else: devid = status.key.device status_text = _format_status(status) + ts = attr.astuple(status)[-1] + ts_text = _format_date(ts) + age_text = _format_timedelta(now - ts) maxd = max(maxd, len(devid)) maxs = max(maxs, len(status_text)) + maxt = max(maxt, len(ts_text)) + maxa = max(maxa, len(age_text)) maxm = max(maxm, len(status.key.monitor)) rows.append( - (devid, status_text, status.key.monitor, status.key.service) - ) - if rows: - print( - "{0:{maxd}} {1:{maxs}} {2:{maxm}} {3}".format( - "DEVICE", - "STATUS", - "COLLECTOR", - "SERVICE", - maxd=maxd, - maxs=maxs, - maxm=maxm, + ( + devid, + status_text, + ts_text, + age_text, + status.key.monitor, + status.key.service, ) ) + hdr_tmplt = "{0:{6}} {1:{7}} {2:^{8}} {3:^{9}} {4:{10}} {5}" + row_tmplt = "{0:{6}} {1:{7}} {2:{8}} {3:>{9}} {4:{10}} {5}" + headings = ( + "DEVICE", + "STATUS", + "LAST CHANGE", + "AGE", + "COLLECTOR", + "SERVICE", + ) + widths = (maxd, maxs, maxt, maxa, maxm) + if rows: + print(hdr_tmplt.format(*chain(headings, widths))) for row in rows: - print( - "{0:{maxd}} {1:{maxs}} {2:{maxm}} {3}".format( - row[0], - row[1], - row[2], - row[3], - maxd=maxd, - maxs=maxs, - maxm=maxm, - ) - ) + print(row_tmplt.format(*chain(row, widths))) _name_state_lookup = { @@ -158,21 +164,25 @@ def run(self): } +def _format_timedelta(value): + td = timedelta(seconds=value) + hours = td.seconds // 3600 + minutes = (td.seconds - (hours * 3600)) // 60 + seconds = td.seconds - (hours * 3600) - (minutes * 60) + return "{0} {1:02}:{2:02}:{3:02}".format( + ( + "" + if td.days == 0 + else "{} day{}".format(td.days, "" if td.days == 1 else "s") + ), + hours, + minutes, + seconds, + ).strip() + + def _format_status(status): - if isinstance(status, ConfigStatus.Current): - return "current since {}".format(_format_date(status.updated)) - elif isinstance(status, ConfigStatus.Retired): - return "retired since {}".format(_format_date(status.retired)) - elif isinstance(status, ConfigStatus.Expired): - return "expired since {}".format(_format_date(status.expired)) - elif isinstance(status, ConfigStatus.Pending): - return "waiting to build since {}".format( - _format_date(status.submitted) - ) - elif isinstance(status, ConfigStatus.Building): - return "build started {}".format(_format_date(status.started)) - else: - return "????" + return type(status).__name__.lower() def _format_date(ts): diff --git a/Products/ZenCollector/configcache/cli/remove.py b/Products/ZenCollector/configcache/cli/remove.py index b1837c2d51..ab1300dd50 100644 --- a/Products/ZenCollector/configcache/cli/remove.py +++ b/Products/ZenCollector/configcache/cli/remove.py @@ -64,7 +64,7 @@ def run(self): client = getRedisClient(url=getRedisUrl()) store = createObject("configcache-store", client) query = CacheQuery(service=self._service, monitor=self._monitor) - results = store.get_status(*store.search(query)) + results = store.query_statuses(query) method = self._no_devices if not self._devices else self._with_devices keys = method(results, wildcard=haswildcard) store.remove(*keys) diff --git a/Products/ZenCollector/configcache/cli/stats.py b/Products/ZenCollector/configcache/cli/stats.py new file mode 100644 index 0000000000..26752d1722 --- /dev/null +++ b/Products/ZenCollector/configcache/cli/stats.py @@ -0,0 +1,158 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import print_function, absolute_import, division + +import argparse +import sys + +from zope.component import createObject + +from Products.ZenUtils.RedisUtils import getRedisClient, getRedisUrl + +from ..app import initialize_environment +from ..app.args import get_subparser +from ..cache import CacheQuery + +from .args import get_common_parser, MultiChoice +from ._tables import TablesOutput +from ._json import JSONOutput +from ._stats import ( + AverageAgeStat, + CountStat, + MaxAgeStat, + MedianAgeStat, + MinAgeStat, + UniqueCountStat, +) +from ._groups import DeviceGroup, ServiceGroup, MonitorGroup, StatusGroup + + +class Stats(object): + description = "Show statistics about the configuration cache" + + configs = (("stats.zcml", __name__),) + + _groups = ("collector", "device", "service", "status") + _statistics = ("count", "avg_age", "median_age", "min_age", "max_age") + + @staticmethod + def add_arguments(parser, subparsers): + subp = get_subparser( + subparsers, "stats", Stats.description, parent=get_common_parser() + ) + subp.add_argument( + "-S", + dest="statistic", + action=MultiChoice, + choices=Stats._statistics, + default=argparse.SUPPRESS, + help="Specify the statistics to return. One or more statistics " + "may be specified (comma separated). By default, all " + "statistics are returned.", + ) + subp.add_argument( + "-G", + dest="group", + action=MultiChoice, + choices=Stats._groups, + default=argparse.SUPPRESS, + help="Specify the statistics groupings to return. One or more " + "groupings may be specified (comma separated). By default, all " + "groupings are returned.", + ) + subp.add_argument( + "-f", + dest="format", + choices=("tables", "json"), + default="tables", + help="Output statistics in the specified format", + ) + subp.set_defaults(factory=Stats) + + def __init__(self, args): + stats = [] + for statId in getattr(args, "statistic", Stats._statistics): + if statId == "count": + stats.append(CountStat) + elif statId == "avg_age": + stats.append(AverageAgeStat) + elif statId == "median_age": + stats.append(MedianAgeStat) + elif statId == "min_age": + stats.append(MinAgeStat) + elif statId == "max_age": + stats.append(MaxAgeStat) + self._groups = [] + for groupId in getattr(args, "group", Stats._groups): + if groupId == "collector": + self._groups.append(MonitorGroup(stats)) + elif groupId == "device": + try: + # DeviceGroup doesn't want CountStat + posn = stats.index(CountStat) + except ValueError: + # Not found, so don't worry about it + dg_stats = stats + pass + else: + # Found, replace it with UniqueCountStat + dg_stats = list(stats) + dg_stats[posn] = UniqueCountStat + self._groups.append(DeviceGroup(dg_stats)) + if groupId == "service": + self._groups.append(ServiceGroup(stats)) + elif groupId == "status": + self._groups.append(StatusGroup(stats)) + if args.format == "tables": + self._format = TablesOutput() + elif args.format == "json": + self._format = JSONOutput() + self._monitor = "*{}*".format(args.collector).replace("***", "*") + self._service = "*{}*".format(args.service).replace("***", "*") + self._devices = getattr(args, "device", []) + + def run(self): + haswildcard = any("*" in d for d in self._devices) + if haswildcard and len(self._devices) > 1: + print( + "Only one DEVICE argument supported when a wildcard is used.", + file=sys.stderr, + ) + return + initialize_environment(configs=self.configs, useZope=False) + client = getRedisClient(url=getRedisUrl()) + store = createObject("configcache-store", client) + + if len(self._devices) == 1: + query = CacheQuery(self._service, self._monitor, self._devices[0]) + else: + query = CacheQuery(self._service, self._monitor) + include = _get_device_predicate(self._devices) + for key, ts in store.query_updated(query): + if not include(key.device): + continue + for group in self._groups: + group.handle_key(key) + group.handle_timestamp(key, ts) + for status in store.query_statuses(query): + if not include(status.key.device): + continue + for group in self._groups: + group.handle_status(status) + + self._format.write( + *(group for group in sorted(self._groups, key=lambda x: x.order)) + ) + + +def _get_device_predicate(devices): + if len(devices) < 2: + return lambda _: True + return lambda x: next((True for d in devices if x == d), False) diff --git a/Products/ZenCollector/configcache/cli/stats.zcml b/Products/ZenCollector/configcache/cli/stats.zcml new file mode 100644 index 0000000000..8ec2993701 --- /dev/null +++ b/Products/ZenCollector/configcache/cli/stats.zcml @@ -0,0 +1,13 @@ + + + + + + + + diff --git a/Products/ZenCollector/configcache/configcache.py b/Products/ZenCollector/configcache/configcache.py index c8e7c7bce3..8ce4764ea8 100644 --- a/Products/ZenCollector/configcache/configcache.py +++ b/Products/ZenCollector/configcache/configcache.py @@ -10,7 +10,7 @@ from __future__ import absolute_import, print_function from .app.args import get_arg_parser -from .cli import Expire, List_, Remove, Show +from .cli import Expire, List_, Remove, Show, Stats from .invalidator import Invalidator from .manager import Manager from .version import Version @@ -24,10 +24,11 @@ def main(argv=None): Version.add_arguments(parser, subparsers) Manager.add_arguments(parser, subparsers) Invalidator.add_arguments(parser, subparsers) - List_.add_arguments(parser, subparsers) - Show.add_arguments(parser, subparsers) Expire.add_arguments(parser, subparsers) + List_.add_arguments(parser, subparsers) Remove.add_arguments(parser, subparsers) + Show.add_arguments(parser, subparsers) + Stats.add_arguments(parser, subparsers) args = parser.parse_args() args.factory(args).run() diff --git a/Products/ZenCollector/configcache/handlers.py b/Products/ZenCollector/configcache/handlers.py index d234b6119e..9ea59da09e 100644 --- a/Products/ZenCollector/configcache/handlers.py +++ b/Products/ZenCollector/configcache/handlers.py @@ -11,40 +11,38 @@ import time -from .cache import CacheKey, ConfigStatus +from .cache import CacheKey, CacheQuery, ConfigStatus class NewDeviceHandler(object): - def __init__(self, log, store, dispatcher): self.log = log self.store = store self.dispatcher = dispatcher def __call__(self, deviceId, monitor, buildlimit, newDevice=True): - keys = tuple( + all_keys = { CacheKey(svcname, monitor, deviceId) for svcname in self.dispatcher.service_names - ) - keys_with_pending_status = set( + } + query = CacheQuery(device=deviceId, monitor=monitor) + pending_keys = { status.key - for status in self.store.get_status(*keys) + for status in self.store.query_statuses(query) if isinstance(status, ConfigStatus.Pending) - ) - for key in keys_with_pending_status: - self.log.debug( + } + non_pending_keys = all_keys - pending_keys + for key in pending_keys: + self.log.info( "build job already submitted for this config " "device=%s collector=%s service=%s", key.device, key.monitor, key.service, ) - keys_without_pending_status = set(keys) - keys_with_pending_status now = time.time() - self.store.set_pending( - *((key, now) for key in keys_without_pending_status) - ) - for key in keys_without_pending_status: + self.store.set_pending(*((key, now) for key in non_pending_keys)) + for key in non_pending_keys: self.dispatcher.dispatch( key.service, key.monitor, key.device, buildlimit, now ) @@ -59,32 +57,39 @@ def __call__(self, deviceId, monitor, buildlimit, newDevice=True): class DeviceUpdateHandler(object): - def __init__(self, log, store, dispatcher): self.log = log self.store = store self.dispatcher = dispatcher def __call__(self, keys, minttl): - current_statuses = tuple( + statuses = tuple( status - for status in self.store.get_status(*keys) - if isinstance(status, ConfigStatus.Current) + for status in (self.store.get_status(key) for key in keys) + if not isinstance( + status, + ( + # These statuses won't get 'stuck' in a wait period + # before manager handles them. + ConfigStatus.Expired, + ConfigStatus.Retired, + ), + ) ) now = time.time() retirement = now - minttl - retired = set( - status.key - for status in current_statuses - if status.updated >= retirement - ) - expired = set( + # Transitioning to Retired is relevant only for Current. + retired = { status.key - for status in current_statuses - if status.key not in retired - ) + for status in statuses + if isinstance(status, ConfigStatus.Current) + and status.updated >= retirement + } + expired = { + status.key for status in statuses if status.key not in retired + } self.store.set_retired(*((key, now) for key in retired)) self.store.set_expired(*((key, now) for key in expired)) @@ -108,7 +113,6 @@ def __call__(self, keys, minttl): class MissingConfigsHandler(object): - def __init__(self, log, store, dispatcher): self.log = log self.store = store @@ -131,7 +135,9 @@ def __call__(self, deviceId, monitor, keys, buildlimit): ) # Identify all no-config keys that already have a status. skipkeys = tuple( - status.key for status in self.store.get_status(*noconfigkeys) + key + for key in noconfigkeys + if self.store.get_status(key) is not None ) now = time.time() for key in (k for k in noconfigkeys if k not in skipkeys): @@ -149,7 +155,6 @@ def __call__(self, deviceId, monitor, keys, buildlimit): class RemoveConfigsHandler(object): - def __init__(self, log, store): self.log = log self.store = store diff --git a/Products/ZenCollector/configcache/invalidator.py b/Products/ZenCollector/configcache/invalidator.py index b4f0776ba0..3ce35161cd 100644 --- a/Products/ZenCollector/configcache/invalidator.py +++ b/Products/ZenCollector/configcache/invalidator.py @@ -13,6 +13,8 @@ from multiprocessing import Process +from metrology.instruments import Gauge, HistogramExponentiallyDecaying + from zenoss.modelindex import constants from zope.component import createObject @@ -52,6 +54,8 @@ class Invalidator(object): configs = (("modelchange.zcml", CONFIGCACHE_MODULE),) + metric_prefix = "configcache.invalidations." + @staticmethod def add_arguments(parser, subparsers): subp = get_subparser( @@ -107,6 +111,10 @@ def __init__(self, config, context): self.interval = config["poll-interval"] + # metrics + self.ctx.metric_reporter.add_tags({"zenoss_daemon": "invalidator"}) + self._metrics = _Metrics(self.ctx.metric_reporter) + def run(self): # Handle changes that occurred when Invalidator wasn't running. self._synchronize() @@ -121,10 +129,16 @@ def run(self): try: self.ctx.session.sync() invalidations = poller.poll() + + self._metrics.received.mark(len(invalidations)) + self._metrics.processed.update(len(invalidations)) + if not invalidations: continue + self._process_invalidations(invalidations) finally: + self.ctx.metric_reporter.save() # Call cacheGC to aggressively trim the ZODB cache self.ctx.session.cacheGC() self.ctx.controller.wait(self.interval) @@ -149,6 +163,19 @@ def _process_invalidations(self, invalidations): ) +class InvalidationGauge(Gauge): + + def __init__(self): + self._value = 0 + + @property + def value(self): + return self._value + + def mark(self, value): + self._value = value + + _solr_fields = ("id", "collector", "uid") @@ -275,3 +302,12 @@ def __call__(self, device, oid, reason): monitor, oid, ) + + +class _Metrics(object): + + def __init__(self, reporter): + self.received = InvalidationGauge() + self.processed = HistogramExponentiallyDecaying() + reporter.register("received", self.received) + reporter.register("processed", self.processed) diff --git a/Products/ZenCollector/configcache/manager.py b/Products/ZenCollector/configcache/manager.py index 9a4a71a107..18c7ebd0ec 100644 --- a/Products/ZenCollector/configcache/manager.py +++ b/Products/ZenCollector/configcache/manager.py @@ -11,9 +11,14 @@ import logging +from collections import Counter, defaultdict from datetime import datetime from time import time +import attr + +from metrology.instruments import Gauge, HistogramExponentiallyDecaying +from metrology.utils.periodic import PeriodicTask from zope.component import createObject from Products.ZenUtils.RedisUtils import getRedisClient, getRedisUrl @@ -36,6 +41,8 @@ class Manager(object): "Determines whether device configs are old and regenerates them" ) + metric_prefix = "configcache.status." + @staticmethod def add_arguments(parser, subparsers): subp = get_subparser( @@ -78,12 +85,24 @@ def __init__(self, config, context): self.interval = config["check-interval"] self.log = logging.getLogger("zen.configcache.manager") + # metrics + self.ctx.metric_reporter.add_tags({"zenoss_daemon": "manager"}) + self._metric_collector = _MetricCollector(self.ctx.metric_reporter) + def run(self): self.log.info( "checking for expired configurations and configuration build " "timeouts every %s seconds", self.interval, ) + try: + self._metric_collector.start() + self._main() + finally: + self._metric_collector.stop() + self._metric_collector.join(timeout=5) + + def _main(self): while not self.ctx.controller.shutdown: try: self.ctx.session.sync() @@ -239,3 +258,95 @@ def _rebuild_configs(self, statuses): count += 1 if count == 0: self.log.debug("found no expired or old configurations to rebuild") + + +class _MetricCollector(PeriodicTask): + + def __init__(self, reporter): + super(_MetricCollector, self).__init__(interval=60) + self._reporter = reporter + self._metrics = _Metrics(reporter) + self._store = None + + def task(self): + if self._store is None: + client = getRedisClient(url=getRedisUrl()) + self._store = createObject("configcache-store", client) + self._collect() + self._reporter.save() + + def _collect(self): + counts = Counter() + ages = defaultdict(list) + now = time() + for status in self._store.query_statuses(): + key, uid, ts = attr.astuple(status) + ages[type(status)].append(int(now - ts)) + counts.update([type(status)]) + + self._metrics.count.current.mark(counts.get(ConfigStatus.Current, 0)) + self._metrics.count.retired.mark(counts.get(ConfigStatus.Retired, 0)) + self._metrics.count.expired.mark(counts.get(ConfigStatus.Expired, 0)) + self._metrics.count.pending.mark(counts.get(ConfigStatus.Pending, 0)) + self._metrics.count.building.mark(counts.get(ConfigStatus.Building, 0)) + + for age in ages.get(ConfigStatus.Current, []): + self._metrics.age.current.update(age) + for age in ages.get(ConfigStatus.Expired, []): + self._metrics.age.retired.update(age) + for age in ages.get(ConfigStatus.Retired, []): + self._metrics.age.expired.update(age) + for age in ages.get(ConfigStatus.Pending, []): + self._metrics.age.pending.update(age) + for age in ages.get(ConfigStatus.Building, []): + self._metrics.age.building.update(age) + + +class StatusCountGauge(Gauge): + + def __init__(self): + self._value = 0 + + @property + def value(self): + return self._value + + def mark(self, value): + self._value = value + + +class _Metrics(object): + + def __init__(self, reporter): + self.count = type( + "Count", + (object,), + { + "current": StatusCountGauge(), + "retired": StatusCountGauge(), + "expired": StatusCountGauge(), + "pending": StatusCountGauge(), + "building": StatusCountGauge(), + }, + )() + reporter.register("count.current", self.count.current) + reporter.register("count.retired", self.count.retired) + reporter.register("count.expired", self.count.expired) + reporter.register("count.pending", self.count.pending) + reporter.register("count.building", self.count.building) + self.age = type( + "Age", + (object,), + { + "current": HistogramExponentiallyDecaying(), + "retired": HistogramExponentiallyDecaying(), + "expired": HistogramExponentiallyDecaying(), + "pending": HistogramExponentiallyDecaying(), + "building": HistogramExponentiallyDecaying(), + }, + )() + reporter.register("age.current", self.age.current) + reporter.register("age.retired", self.age.retired) + reporter.register("age.expired", self.age.expired) + reporter.register("age.pending", self.age.pending) + reporter.register("age.building", self.age.building) diff --git a/Products/ZenCollector/configcache/modelchange/filters.py b/Products/ZenCollector/configcache/modelchange/filters.py index 99851fb0c4..644988ed21 100644 --- a/Products/ZenCollector/configcache/modelchange/filters.py +++ b/Products/ZenCollector/configcache/modelchange/filters.py @@ -11,7 +11,7 @@ import re from cStringIO import StringIO -from hashlib import md5 +from hashlib import sha256 from zope.interface import implementer @@ -128,14 +128,14 @@ def initialize(self, context): self.checksum_map = results def organizerChecksum(self, organizer): - m = md5() + m = sha256() self.generateChecksum(organizer, m) return m.hexdigest() - def generateChecksum(self, organizer, md5_checksum): + def generateChecksum(self, organizer, hash_checksum): # Checksum all zProperties and custom properties for zId, propertyString in _getZorCProperties(organizer): - md5_checksum.update("%s|%s" % (zId, propertyString)) + hash_checksum.update("%s|%s" % (zId, propertyString)) def include(self, obj): # Move on if it's not one of our types @@ -170,7 +170,7 @@ def __init__(self): def getRoot(self, context): return context.dmd.Devices.primaryAq() - def generateChecksum(self, organizer, md5_checksum): + def generateChecksum(self, organizer, hash_checksum): """ Generate a checksum representing the state of the device class as it pertains to configuration. This takes into account templates and @@ -188,10 +188,10 @@ def generateChecksum(self, organizer, md5_checksum): "unable to export XML of template template=%r", tpl ) else: - md5_checksum.update(s.getvalue()) + hash_checksum.update(s.getvalue()) # Include z/c properties from base class super(DeviceClassInvalidationFilter, self).generateChecksum( - organizer, md5_checksum + organizer, hash_checksum ) @@ -219,14 +219,14 @@ def __init__(self): def getRoot(self, context): return context.dmd.Processes.primaryAq() - def generateChecksum(self, organizer, md5_checksum): + def generateChecksum(self, organizer, hash_checksum): # Include properties of OSProcessClass for prop in organizer._properties: prop_id = prop["id"] - md5_checksum.update( + hash_checksum.update( "%s|%s" % (prop_id, getattr(organizer, prop_id, "")) ) # Include z/c properties from base class super(OSProcessClassFilter, self).generateChecksum( - organizer, md5_checksum + organizer, hash_checksum ) diff --git a/Products/ZenCollector/configcache/modelchange/tests/__init__.py b/Products/ZenCollector/configcache/modelchange/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Products/ZenCollector/configcache/modelchange/tests/test_filters.py b/Products/ZenCollector/configcache/modelchange/tests/test_filters.py index 59d343ebaf..3ae955dcc3 100644 --- a/Products/ZenCollector/configcache/modelchange/tests/test_filters.py +++ b/Products/ZenCollector/configcache/modelchange/tests/test_filters.py @@ -13,7 +13,7 @@ FILTER_EXCLUDE, IgnorableClassesFilter, IInvalidationFilter, - md5, + sha256, OSProcessClass, OSProcessClassFilter, OSProcessOrganizer, @@ -21,7 +21,7 @@ ) from .mock_interface import create_interface_mock -PATH = {"invalidationfilter": "Products.ZenHub.invalidationfilter"} +PATH = {"path": "Products.ZenCollector.configcache.modelchange.filters"} class IgnorableClassesFilterTest(TestCase): @@ -88,9 +88,7 @@ def test_getRoot(t): t.assertEqual(root, context.dmd.primaryAq()) @patch( - "{invalidationfilter}.IModelCatalogTool".format(**PATH), - autospec=True, - spec_set=True, + "{path}.IModelCatalogTool".format(**PATH), autospec=True, spec_set=True ) def test_initialize(t, IModelCatalogTool): # Create a Mock object that provides the ICatalogBrain interface @@ -127,16 +125,14 @@ def test_getZorCProperties(t): t.organizer.zenPropIsPassword.return_value = True zId, propertyString = next(results) t.assertEqual(zId, zprop) - t.assertEqual( - propertyString, t.organizer.getProperty.return_value - ) + t.assertEqual(propertyString, t.organizer.getProperty.return_value) t.organizer.getProperty.assert_called_with(zprop, "") with t.assertRaises(StopIteration): next(results) @patch( - "{invalidationfilter}._getZorCProperties".format(**PATH), + "{path}._getZorCProperties".format(**PATH), autospec=True, spec_set=True, ) @@ -144,9 +140,9 @@ def test_generateChecksum(t, _getZorCProps): zprop = Mock(name="zenPropertyId", spec_set=[]) data = (zprop, "property_string") _getZorCProps.return_value = [data] - actual = md5() + actual = sha256() - expect = md5() + expect = sha256() expect.update("%s|%s" % data) t.bof.generateChecksum(t.organizer, actual) @@ -155,7 +151,7 @@ def test_generateChecksum(t, _getZorCProps): t.assertEqual(actual.hexdigest(), expect.hexdigest()) @patch( - "{invalidationfilter}._getZorCProperties".format(**PATH), + "{path}._getZorCProperties".format(**PATH), autospec=True, spec_set=True, ) @@ -166,7 +162,7 @@ def test_organizerChecksum(t, _getZorCProps): out = t.bof.organizerChecksum(t.organizer) - expect = md5() + expect = sha256() expect.update("%s|%s" % data) t.assertEqual(out, expect.hexdigest()) @@ -221,14 +217,12 @@ def test_getRoot(t): t.assertEqual(root, context.dmd.Devices.primaryAq()) @patch( - "{invalidationfilter}.BaseOrganizerFilter.generateChecksum".format( - **PATH - ), + "{path}.BaseOrganizerFilter.generateChecksum".format(**PATH), autospec=True, spec_set=True, ) def test_generateChecksum(t, super_generateChecksum): - md5_checksum = md5() + hash_checksum = sha256() organizer = Mock( name="Products.ZenRelations.ZenPropertyManager", spec_set=["rrdTemplates"], @@ -237,12 +231,12 @@ def test_generateChecksum(t, super_generateChecksum): rrdTemplate.exportXml.return_value = "some exemel" organizer.rrdTemplates.return_value = [rrdTemplate] - t.dcif.generateChecksum(organizer, md5_checksum) + t.dcif.generateChecksum(organizer, hash_checksum) # We cannot validate the output of the current version, refactor needed rrdTemplate.exportXml.was_called_once() super_generateChecksum.assert_called_with( - t.dcif, organizer, md5_checksum + t.dcif, organizer, hash_checksum ) @@ -277,9 +271,7 @@ def test_getRoot(t): t.assertEqual(root, context.dmd.Processes.primaryAq()) @patch( - "{invalidationfilter}.BaseOrganizerFilter.generateChecksum".format( - **PATH - ), + "{path}.BaseOrganizerFilter.generateChecksum".format(**PATH), autospec=True, spec_set=True, ) @@ -291,13 +283,13 @@ def test_generateChecksum(t, super_generateChecksum): prop = {"id": "property_id"} organizer._properties = [prop] organizer.property_id = "value" - md5_checksum = md5() + hash_checksum = sha256() - t.ospcf.generateChecksum(organizer, md5_checksum) + t.ospcf.generateChecksum(organizer, hash_checksum) - expect = md5() + expect = sha256() expect.update("%s|%s" % (prop["id"], getattr(organizer, prop["id"]))) - t.assertEqual(md5_checksum.hexdigest(), expect.hexdigest()) + t.assertEqual(hash_checksum.hexdigest(), expect.hexdigest()) super_generateChecksum.assert_called_with( - t.ospcf, organizer, md5_checksum + t.ospcf, organizer, hash_checksum ) diff --git a/Products/ZenCollector/configcache/modelchange/tests/test_oids.py b/Products/ZenCollector/configcache/modelchange/tests/test_oids.py index bb3a9f0151..f04641eb4f 100644 --- a/Products/ZenCollector/configcache/modelchange/tests/test_oids.py +++ b/Products/ZenCollector/configcache/modelchange/tests/test_oids.py @@ -1,23 +1,22 @@ from unittest import TestCase from mock import Mock -# Breaks unittest independence due to -# ImportError: No module named CMFCore.DirectoryView -from Products.ZenHub.invalidationoid import ( - DefaultOidTransform, - DeviceOidTransform, - IInvalidationOid, +from Products.ZenRelations.PrimaryPathObjectManager import ( PrimaryPathObjectManager, ) +from ..oids import ( + IdentityOidTransform, + ComponentOidTransform, + IInvalidationOid, +) from zope.interface.verify import verifyObject -from zope.component import adaptedBy -class DefaultOidTransformTest(TestCase): +class IdentityOidTransformTest(TestCase): def setUp(self): self.obj = Mock(spec_set=PrimaryPathObjectManager) - self.default_oid_transform = DefaultOidTransform(self.obj) + self.default_oid_transform = IdentityOidTransform(self.obj) def test_implements_IInvalidationOid(self): # Provides the interface @@ -25,23 +24,18 @@ def test_implements_IInvalidationOid(self): # Implements the interface it according to spec verifyObject(IInvalidationOid, self.default_oid_transform) - def test_adapts_PrimaryPathObjectManager(self): - self.assertEqual( - list(adaptedBy(DefaultOidTransform)), [PrimaryPathObjectManager] - ) - def test_init(self): - self.assertEqual(self.default_oid_transform._obj, self.obj) + self.assertEqual(self.default_oid_transform._entity, self.obj) def test_transformOid(self): ret = self.default_oid_transform.transformOid("unmodified oid") self.assertEqual(ret, "unmodified oid") -class DeviceOidTransformTest(TestCase): +class ComponentOidTransformTest(TestCase): def setUp(self): self.obj = Mock(spec_set=PrimaryPathObjectManager) - self.device_oid_transform = DeviceOidTransform(self.obj) + self.device_oid_transform = ComponentOidTransform(self.obj) def test_implements_IInvalidationOid(self): # Provides the interface @@ -50,10 +44,10 @@ def test_implements_IInvalidationOid(self): verifyObject(IInvalidationOid, self.device_oid_transform) def test_init(self): - self.assertEqual(self.device_oid_transform._obj, self.obj) + self.assertEqual(self.device_oid_transform._entity, self.obj) def test_transformOid(self): - """returns unmodified oid, if _obj has no device attribute""" + """returns unmodified oid, if _entity has no device attribute""" self.assertFalse(hasattr(self.obj, "device")) ret = self.device_oid_transform.transformOid("unmodified oid") self.assertEqual(ret, "unmodified oid") @@ -64,7 +58,7 @@ def test_transformOid_returns_device_oid(self): device = Mock(name="device", spec_set=["_p_oid"]) obj.device.return_value = device - device_oid_transform = DeviceOidTransform(obj) + device_oid_transform = ComponentOidTransform(obj) ret = device_oid_transform.transformOid("ignored oid") self.assertEqual(ret, obj.device.return_value._p_oid) diff --git a/Products/ZenCollector/configcache/task.py b/Products/ZenCollector/configcache/task.py index 61f28f7325..8dfa72e420 100644 --- a/Products/ZenCollector/configcache/task.py +++ b/Products/ZenCollector/configcache/task.py @@ -17,7 +17,7 @@ from Products.ZenUtils.RedisUtils import getRedisClient, getRedisUrl -from Products.Jobber.task import requires, DMD, Abortable +from Products.Jobber.task import requires, DMD from Products.Jobber.zenjobs import app from .cache import CacheKey, CacheRecord, ConfigStatus @@ -27,7 +27,7 @@ @app.task( bind=True, - base=requires(DMD, Abortable), + base=requires(DMD), name="configcache.build_device_config", summary="Create Device Configuration Task", description_template="Create the configuration for device {2}.", @@ -56,6 +56,10 @@ def build_device_config( ) +# NOTE: the buildDeviceConfig function exists so that it can be tested +# without having to handle Celery details in the unit tests. + + def buildDeviceConfig( dmd, log, monitorname, deviceid, configclassname, submitted ): @@ -64,11 +68,26 @@ def buildDeviceConfig( store = _getStore() key = CacheKey(svcname, monitorname, deviceid) + # record when this build starts + started = time() + # Check whether this is an old job, i.e. job pending timeout. # If it is an old job, skip it, manager already sent another one. - status = next(store.get_status(key), None) + status = store.get_status(key) device = dmd.Devices.findDeviceByIdExact(deviceid) - if _job_is_old(status, submitted, device, log): + if device is None: + log.warn( + "cannot build config because device was not found " + "device=%s collector=%s service=%s submitted=%f", + key.device, + key.monitor, + key.service, + submitted, + ) + store.clear_status(key) + return + + if _job_is_old(status, submitted, started, device, log): return # If the status is Expired, another job is coming, so skip this job. @@ -119,11 +138,18 @@ def buildDeviceConfig( record = CacheRecord.make( svcname, monitorname, deviceid, uid, time(), config ) + # Get the current status of the configuration. - status = next(store.get_status(key), None) - if isinstance(status, (ConfigStatus.Expired, ConfigStatus.Pending)): - # status is not ConfigStatus.Building, so another job will be - # submitted or has already been submitted. + recent_status = store.get_status(key) + + # Test whether the status should be updated + update_status = _should_update_status( + recent_status, started, deviceid, monitorname, svcname, log + ) + + if not update_status: + # recent_status is not ConfigStatus.Building, so another job + # will be submitted or has already been submitted. store.put_config(record) log.info( "saved config without changing status " @@ -146,6 +172,67 @@ def buildDeviceConfig( ) +def _should_update_status( + recent_status, started, deviceid, monitorname, svcname, log +): + # Check for expected statuses. + if isinstance(recent_status, ConfigStatus.Building): + # The status is Building, so let's update the status. + return True + + if isinstance(recent_status, ConfigStatus.Expired): + update_status = bool(recent_status.expired < started) + if not update_status: + log.info( + "config expired while building config " + "device=%s collector=%s service=%s", + deviceid, + monitorname, + svcname, + ) + else: + log.warning( + "config status has inconsistent state status=Expired " + "expired=%s device=%s collector=%s service=%s", + datetime.fromtimestamp(recent_status.expired).isoformat(), + deviceid, + monitorname, + svcname, + ) + return update_status + + if isinstance(recent_status, ConfigStatus.Pending): + update_status = bool(recent_status.submitted < started) + if not update_status: + log.info( + "another job submitted while building config " + "device=%s collector=%s service=%s", + deviceid, + monitorname, + svcname, + ) + else: + log.warning( + "config status has inconsistent state status=Pending " + "submitted=%s device=%s collector=%s service=%s", + datetime.fromtimestamp(recent_status.submitted).isoformat(), + deviceid, + monitorname, + svcname, + ) + return update_status + + log.warning( + "Unexpected status change during config build " + "status=%s device=%s collector=%s service=%s", + type(recent_status).__name__, + deviceid, + monitorname, + svcname, + ) + return True + + def _delete_config(key, store, log): log.info( "no configuration built device=%s collector=%s service=%s", @@ -166,12 +253,11 @@ def _delete_config(key, store, log): store.clear_status(key) -def _job_is_old(status, submitted, device, log): +def _job_is_old(status, submitted, now, device, log): if submitted is None or status is None: # job is not old (default state) return False limit = get_pending_timeout(device) - now = time() if submitted < (now - limit): log.warn( "skipped this job because it's too old " diff --git a/Products/ZenCollector/configcache/tests/test_deviceupdatehandler.py b/Products/ZenCollector/configcache/tests/test_deviceupdatehandler.py new file mode 100644 index 0000000000..246b2172e1 --- /dev/null +++ b/Products/ZenCollector/configcache/tests/test_deviceupdatehandler.py @@ -0,0 +1,166 @@ + +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2019, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +from __future__ import absolute_import, print_function + +import logging + +from unittest import TestCase + +from mock import Mock, patch + +from ..cache import CacheKey, ConfigStatus +from ..cache.storage import ConfigStore +from ..dispatcher import BuildConfigTaskDispatcher +from ..handlers import DeviceUpdateHandler + + +PATH = {"src": "Products.ZenCollector.configcache.handlers"} + + +class DeviceUpdateHandlerTest(TestCase): + """Test the DeviceUpdateHandler object.""" + + def setUp(t): + t.store = Mock(ConfigStore) + t.dispatcher = Mock(BuildConfigTaskDispatcher) + t.dispatcher.service_names = ("ServiceA", "ServiceB") + t.log = Mock(logging.getLogger("zen")) + t.handler = DeviceUpdateHandler(t.log, t.store, t.dispatcher) + + def tearDown(t): + del t.handler + del t.log + del t.dispatcher + del t.store + + @patch("{src}.time".format(**PATH), autospec=True) + def test_current_to_expired(t, _time): + key1 = CacheKey('a', 'b', 'c1') + key2 = CacheKey('a', 'b', 'c2') + updated1 = 33330.0 + updated2 = 33331.0 + now = 34000.0 + _time.time.return_value = now + + status1 = ConfigStatus.Current(key1, "/a/b/c1", updated1) + status2 = ConfigStatus.Current(key2, "/a/b/c2", updated2) + t.store.get_status.side_effect = (status1, status2) + + t.handler((key1, key2), 100.0) + + t.store.set_retired.assert_called_with() + t.store.set_expired.assert_called_with((key2, now), (key1, now)) + + @patch("{src}.time".format(**PATH), autospec=True) + def test_current_to_retired(t, _time): + key1 = CacheKey('a', 'b', 'c1') + key2 = CacheKey('a', 'b', 'c2') + updated1 = 33330.0 + updated2 = 33331.0 + now = 34000.0 + _time.time.return_value = now + + status1 = ConfigStatus.Current(key1, "/a/b/c1", updated1) + status2 = ConfigStatus.Current(key2, "/a/b/c2", updated2) + t.store.get_status.side_effect = (status1, status2) + + t.handler((key1, key2), 1000.0) + + t.store.set_retired.assert_called_with((key2, now), (key1, now)) + t.store.set_expired.assert_called_with() + + @patch("{src}.time".format(**PATH), autospec=True) + def test_current_to_retired_and_expired(t, _time): + key1 = CacheKey('a', 'b', 'c1') + key2 = CacheKey('a', 'd', 'c2') + updated1 = 33330.0 + updated2 = 32331.0 + now = 34000.0 + _time.time.return_value = now + + status1 = ConfigStatus.Current(key1, "/a/b/c1", updated1) + status2 = ConfigStatus.Current(key2, "/a/d/c2", updated2) + t.store.get_status.side_effect = (status1, status2) + + t.handler((key1, key2), 1000.0) + + t.store.set_retired.assert_called_with((key1, now)) + t.store.set_expired.assert_called_with((key2, now)) + + @patch("{src}.time".format(**PATH), autospec=True) + def test_pending_to_expired(t, _time): + key1 = CacheKey('a', 'b', 'c1') + key2 = CacheKey('a', 'b', 'c2') + updated1 = 33330.0 + updated2 = 32331.0 + now = 34000.0 + _time.time.return_value = now + + status1 = ConfigStatus.Pending(key1, "/a/b/c1", updated1) + status2 = ConfigStatus.Pending(key2, "/a/b/c2", updated2) + t.store.get_status.side_effect = (status1, status2) + + t.handler((key1, key2), 1000.0) + + t.store.set_retired.assert_called_with() + t.store.set_expired.assert_called_with((key2, now), (key1, now)) + + @patch("{src}.time".format(**PATH), autospec=True) + def test_only_expired(t, _time): + key1 = CacheKey('a', 'b', 'c1') + key2 = CacheKey('a', 'b', 'c2') + expired1 = 33330.0 + expired2 = 33331.0 + _time.time.return_value = 34000.0 + + status1 = ConfigStatus.Expired(key1, "/a/b/c1", expired1) + status2 = ConfigStatus.Expired(key2, "/a/b/c2", expired2) + t.store.get_status.side_effect = (status1, status2) + + t.handler((key1, key2), 100.0) + + t.store.set_retired.assert_called_with() + t.store.set_expired.assert_called_with() + + @patch("{src}.time".format(**PATH), autospec=True) + def test_only_retired(t, _time): + key1 = CacheKey('a', 'b', 'c1') + key2 = CacheKey('a', 'b', 'c2') + expired1 = 33330.0 + expired2 = 33331.0 + _time.time.return_value = 34000.0 + + status1 = ConfigStatus.Retired(key1, "/a/b/c1", expired1) + status2 = ConfigStatus.Retired(key2, "/a/b/c2", expired2) + t.store.get_status.side_effect = (status1, status2) + + t.handler((key1, key2), 1000.0) + + t.store.set_retired.assert_called_with() + t.store.set_expired.assert_called_with() + + @patch("{src}.time".format(**PATH), autospec=True) + def test_only_building(t, _time): + key1 = CacheKey('a', 'b', 'c1') + key2 = CacheKey('a', 'b', 'c2') + expired1 = 33330.0 + expired2 = 33331.0 + _time.time.return_value = 34000.0 + now = 34000.0 + + status1 = ConfigStatus.Building(key1, "/a/b/c1", expired1) + status2 = ConfigStatus.Building(key2, "/a/b/c2", expired2) + t.store.get_status.side_effect = (status1, status2) + + t.handler((key1, key2), 1000.0) + + t.store.set_retired.assert_called_with() + t.store.set_expired.assert_called_with((key2, now), (key1, now)) diff --git a/Products/ZenCollector/configcache/tests/test_storage.py b/Products/ZenCollector/configcache/tests/test_storage.py index d9282c2fbd..f4e1dafed7 100644 --- a/Products/ZenCollector/configcache/tests/test_storage.py +++ b/Products/ZenCollector/configcache/tests/test_storage.py @@ -52,16 +52,10 @@ def test_get_with_nondefault_default(t): def test_remove(t): t.assertIsNone(t.store.remove()) - def test_get_status_no_keys(t): - result = t.store.get_status() - t.assertIsInstance(result, collections.Iterable) - t.assertTupleEqual(tuple(result), ()) - def test_get_status_unknown_key(t): key = CacheKey("a", "b", "c") result = t.store.get_status(key) - t.assertIsInstance(result, collections.Iterable) - t.assertTupleEqual(tuple(result), ()) + t.assertIsNone(result) def test_get_pending(t): result = t.store.get_pending() @@ -98,7 +92,7 @@ def tearDown(t): del t.store def test_current_status(t): - t.assertIsNone(next(t.store.get_status(t.key), None)) + t.assertIsNone(t.store.get_status(t.key)) def test_search_with_status(t): t.store.set_pending((t.key, t.now)) @@ -107,25 +101,25 @@ def test_search_with_status(t): def test_retired(t): expected = ConfigStatus.Retired(t.key, None, t.now) t.store.set_retired((t.key, t.now)) - status = next(t.store.get_status(t.key), None) + status = t.store.get_status(t.key) t.assertEqual(expected, status) def test_expired(t): expected = ConfigStatus.Expired(t.key, None, t.now) t.store.set_expired((t.key, t.now)) - status = next(t.store.get_status(t.key), None) + status = t.store.get_status(t.key) t.assertEqual(expected, status) def test_pending(t): expected = ConfigStatus.Pending(t.key, None, t.now) t.store.set_pending((t.key, t.now)) - status = next(t.store.get_status(t.key), None) + status = t.store.get_status(t.key) t.assertEqual(expected, status) def test_building(t): expected = ConfigStatus.Building(t.key, None, t.now) t.store.set_building((t.key, t.now)) - status = next(t.store.get_status(t.key), None) + status = t.store.get_status(t.key) t.assertEqual(expected, status) @@ -270,16 +264,12 @@ def test_get_status(t): t.store.add(t.record1) t.store.add(t.record2) - result = tuple(t.store.get_status(t.record1.key)) - t.assertEqual(1, len(result)) - status = result[0] + status = t.store.get_status(t.record1.key) t.assertEqual(t.record1.key, status.key) t.assertIsInstance(status, ConfigStatus.Current) t.assertEqual(t.fields[0].updated, status.updated) - result = tuple(t.store.get_status(t.record2.key)) - t.assertEqual(1, len(result)) - status = result[0] + status = t.store.get_status(t.record2.key) t.assertEqual(t.record2.key, status.key) t.assertIsInstance(status, ConfigStatus.Current) t.assertEqual(t.fields[1].updated, status.updated) @@ -448,7 +438,7 @@ def test_retired_once(t): actual = next(t.store.get_retired(), None) t.assertEqual(expected, actual) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) actual = next(t.store.get_expired(), None) @@ -465,7 +455,7 @@ def test_expired_once(t): actual = next(t.store.get_expired(), None) t.assertEqual(expected, actual) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) actual = next(t.store.get_retired(), None) @@ -482,7 +472,7 @@ def test_pending_once(t): actual = next(t.store.get_pending(), None) t.assertEqual(expected, actual) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) actual = next(t.store.get_retired(), None) @@ -499,7 +489,7 @@ def test_building_once(t): actual = next(t.store.get_building(), None) t.assertEqual(expected, actual) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) actual = next(t.store.get_retired(), None) @@ -525,7 +515,7 @@ def test_retired_twice(t): actual = next(t.store.get_retired(), None) t.assertEqual(expected, actual) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) actual = next(t.store.get_expired(), None) @@ -544,7 +534,7 @@ def test_expired_twice(t): actual = next(t.store.get_expired(), None) t.assertEqual(expected, actual) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) actual = next(t.store.get_retired(), None) @@ -563,7 +553,7 @@ def test_pending_twice(t): actual = next(t.store.get_pending(), None) t.assertEqual(expected, actual) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) actual = next(t.store.get_retired(), None) @@ -582,7 +572,7 @@ def test_building_twice(t): actual = next(t.store.get_building(), None) t.assertEqual(expected, actual) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) actual = next(t.store.get_retired(), None) @@ -601,7 +591,7 @@ class TestCurrentOnlyMethods(_BaseTest): def test_older_with_current(t): t.store.add(t.record1) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Current) older = next(t.store.get_older(t.record1.updated), None) @@ -612,7 +602,7 @@ def test_older_with_retired(t): ts = t.record1.updated + 500 t.store.set_retired((t.record1.key, ts)) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Retired) older = next(t.store.get_older(t.record1.updated), None) @@ -623,7 +613,7 @@ def test_older_with_expired(t): ts = t.record1.updated + 500 t.store.set_expired((t.record1.key, ts)) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Expired) older = next(t.store.get_older(t.record1.updated), None) @@ -634,7 +624,7 @@ def test_older_with_pending(t): ts = t.record1.updated + 500 t.store.set_pending((t.record1.key, ts)) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Pending) older = next(t.store.get_older(t.record1.updated), None) @@ -645,7 +635,7 @@ def test_older_with_building(t): ts = t.record1.updated + 500 t.store.set_building((t.record1.key, ts)) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Building) older = next(t.store.get_older(t.record1.updated), None) @@ -654,7 +644,7 @@ def test_older_with_building(t): def test_newer_with_current(t): t.store.add(t.record1) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Current) newer = next(t.store.get_newer(t.record1.updated - 1), None) @@ -665,7 +655,7 @@ def test_newer_with_retired(t): ts = t.record1.updated + 500 t.store.set_retired((t.record1.key, ts)) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Retired) newer = next(t.store.get_newer(t.record1.updated - 1), None) @@ -676,7 +666,7 @@ def test_newer_with_expired(t): ts = t.record1.updated + 500 t.store.set_expired((t.record1.key, ts)) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Expired) newer = next(t.store.get_newer(t.record1.updated - 1), None) @@ -687,7 +677,7 @@ def test_newer_with_pending(t): ts = t.record1.updated + 500 t.store.set_pending((t.record1.key, ts)) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Pending) newer = next(t.store.get_newer(t.record1.updated - 1), None) @@ -698,7 +688,7 @@ def test_newer_with_building(t): ts = t.record1.updated + 500 t.store.set_building((t.record1.key, ts)) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Building) newer = next(t.store.get_newer(t.record1.updated - 1), None) @@ -715,7 +705,7 @@ def test_current(t): expected = ConfigStatus.Current( t.record1.key, t.record1.uid, t.record1.updated ) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) def test_retired(t): @@ -723,7 +713,7 @@ def test_retired(t): ts = t.record1.updated + 100 t.store.set_retired((t.record1.key, ts)) expected = ConfigStatus.Retired(t.record1.key, t.record1.uid, ts) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) def test_expired(t): @@ -731,7 +721,7 @@ def test_expired(t): ts = t.record1.updated + 200 t.store.set_expired((t.record1.key, ts)) expected = ConfigStatus.Expired(t.record1.key, t.record1.uid, ts) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) def test_pending(t): @@ -739,7 +729,7 @@ def test_pending(t): ts = t.record1.updated + 300 t.store.set_pending((t.record1.key, ts)) expected = ConfigStatus.Pending(t.record1.key, t.record1.uid, ts) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) def test_building(t): @@ -747,7 +737,7 @@ def test_building(t): ts = t.record1.updated + 400 t.store.set_building((t.record1.key, ts)) expected = ConfigStatus.Building(t.record1.key, t.record1.uid, ts) - actual = next(t.store.get_status(t.record1.key), None) + actual = t.store.get_status(t.record1.key) t.assertEqual(expected, actual) @@ -760,7 +750,7 @@ def test_clear_from_current(t): t.store.add(t.record1) t.store.clear_status(t.record1.key) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Current) t.assertIsNone(next(t.store.get_retired(), None)) @@ -775,7 +765,7 @@ def test_clear_from_expired_to_current(t): t.store.clear_status(t.record1.key) - status = next(t.store.get_status(t.record1.key), None) + status = t.store.get_status(t.record1.key) t.assertIsInstance(status, ConfigStatus.Current) t.assertIsNone(next(t.store.get_retired(), None)) @@ -789,7 +779,7 @@ def test_clear_from_retired(t): t.store.clear_status(t.record1.key) - t.assertIsNone(next(t.store.get_status(t.record1.key), None)) + t.assertIsNone(t.store.get_status(t.record1.key)) t.assertIsNone(next(t.store.get_retired(), None)) t.assertIsNone(next(t.store.get_expired(), None)) t.assertIsNone(next(t.store.get_pending(), None)) @@ -801,7 +791,7 @@ def test_clear_from_expired(t): t.store.clear_status(t.record1.key) - t.assertIsNone(next(t.store.get_status(t.record1.key), None)) + t.assertIsNone(t.store.get_status(t.record1.key)) t.assertIsNone(next(t.store.get_retired(), None)) t.assertIsNone(next(t.store.get_expired(), None)) t.assertIsNone(next(t.store.get_pending(), None)) @@ -813,7 +803,7 @@ def test_clear_from_pending(t): t.store.clear_status(t.record1.key) - t.assertIsNone(next(t.store.get_status(t.record1.key), None)) + t.assertIsNone(t.store.get_status(t.record1.key)) t.assertIsNone(next(t.store.get_retired(), None)) t.assertIsNone(next(t.store.get_expired(), None)) t.assertIsNone(next(t.store.get_pending(), None)) @@ -825,7 +815,7 @@ def test_clear_from_building(t): t.store.clear_status(t.record1.key) - t.assertIsNone(next(t.store.get_status(t.record1.key), None)) + t.assertIsNone(t.store.get_status(t.record1.key)) t.assertIsNone(next(t.store.get_retired(), None)) t.assertIsNone(next(t.store.get_expired(), None)) t.assertIsNone(next(t.store.get_pending(), None)) @@ -1038,9 +1028,7 @@ def test_add_overwrites_retired(t): retired_keys = tuple(t.store.get_retired()) t.assertTupleEqual((), retired_keys) - result = tuple(t.store.get_status(t.record1.key)) - t.assertEqual(1, len(result)) - status = result[0] + status = t.store.get_status(t.record1.key) t.assertEqual(t.record1.key, status.key) t.assertIsInstance(status, ConfigStatus.Current) t.assertEqual(t.record1.updated, status.updated) @@ -1054,9 +1042,7 @@ def test_add_overwrites_expired(t): expired_keys = tuple(t.store.get_expired()) t.assertTupleEqual((), expired_keys) - result = tuple(t.store.get_status(t.record1.key)) - t.assertEqual(1, len(result)) - status = result[0] + status = t.store.get_status(t.record1.key) t.assertEqual(t.record1.key, status.key) t.assertIsInstance(status, ConfigStatus.Current) t.assertEqual(t.record1.updated, status.updated) @@ -1075,9 +1061,7 @@ def test_add_overwrites_pending(t): pending_keys = tuple(t.store.get_pending()) t.assertTupleEqual((), pending_keys) - result = tuple(t.store.get_status(t.record1.key)) - t.assertEqual(1, len(result)) - status = result[0] + status = t.store.get_status(t.record1.key) t.assertEqual(t.record1.key, status.key) t.assertIsInstance(status, ConfigStatus.Current) t.assertEqual(t.record1.updated, status.updated) @@ -1100,9 +1084,7 @@ def test_add_overwrites_building(t): building_keys = tuple(t.store.get_building()) t.assertTupleEqual((), building_keys) - result = tuple(t.store.get_status(t.record1.key)) - t.assertEqual(1, len(result)) - status = result[0] + status = t.store.get_status(t.record1.key) t.assertEqual(t.record1.key, status.key) t.assertIsInstance(status, ConfigStatus.Current) t.assertEqual(t.record1.updated, status.updated) diff --git a/Products/ZenCollector/configcache/tests/test_task.py b/Products/ZenCollector/configcache/tests/test_task.py index 706047d7cb..8f50d4bec0 100644 --- a/Products/ZenCollector/configcache/tests/test_task.py +++ b/Products/ZenCollector/configcache/tests/test_task.py @@ -64,5 +64,30 @@ def test_no_config_built(t, _resolve, _createObject, _time): buildDeviceConfig(dmd, log, monitor, t.device_name, clsname, submitted) - status = next(t.store.get_status(key), None) + status = t.store.get_status(key) + t.assertIsNone(status) + + @mock.patch("{task}.createObject".format(**PATH), autospec=True) + @mock.patch("{task}.resolve".format(**PATH), autospec=True) + def test_device_not_found(t, _resolve, _createObject): + monitor = "localhost" + clsname = "Products.ZenHub.services.PingService.PingService" + svcname = clsname.rsplit(".", 1)[0] + submitted = 123456.34 + svcclass = mock.Mock() + svc = mock.MagicMock() + dmd = mock.Mock() + log = mock.Mock() + key = CacheKey(svcname, monitor, t.device_name) + + _createObject.return_value = t.store + _resolve.return_value = svcclass + svcclass.return_value = svc + dmd.Devices.findDeviceByIdExact.return_value = None + + t.store.set_pending((key, submitted)) + + buildDeviceConfig(dmd, log, monitor, t.device_name, clsname, submitted) + + status = t.store.get_status(key) t.assertIsNone(status) diff --git a/Products/ZenCollector/configcache/utils/__init__.py b/Products/ZenCollector/configcache/utils/__init__.py index 4c7e5957ba..2462114665 100644 --- a/Products/ZenCollector/configcache/utils/__init__.py +++ b/Products/ZenCollector/configcache/utils/__init__.py @@ -9,6 +9,7 @@ from __future__ import absolute_import +from .metrics import MetricReporter from .pollers import RelStorageInvalidationPoller from .services import getConfigServices from .zprops import ( @@ -25,5 +26,6 @@ "get_minimum_ttl", "get_pending_timeout", "get_ttl", + "MetricReporter", "RelStorageInvalidationPoller", ) diff --git a/Products/ZenCollector/configcache/utils/metrics.py b/Products/ZenCollector/configcache/utils/metrics.py new file mode 100644 index 0000000000..82bae7882a --- /dev/null +++ b/Products/ZenCollector/configcache/utils/metrics.py @@ -0,0 +1,91 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +import json +import logging + +import requests + +from Products.ZenUtils.controlplane import configuration as cc_config +from Products.ZenUtils.MetricReporter import DEFAULT_METRIC_URL, getMetricData + +log = logging.getLogger("zen.configcache.metrics") + + +class MetricReporter(object): + + def __init__(self, url=None, prefix="", tags=None): + if not url: + url = cc_config.consumer_url + if not url: + url = DEFAULT_METRIC_URL + self._url = url + self._prefix = prefix + tags = dict(tags if tags is not None else {}) + tags.update( + { + "serviceId": cc_config.service_id, + "instance": cc_config.instance_id, + "hostId": cc_config.host_id, + "tenantId": cc_config.tenant_id, + } + ) + self._tags = tags + self._session = None + self._instruments = {} + + def __contains__(self, name): + """Return True if `name` matches a registered metric.""" + return name in self._instruments + + def add_tags(self, tags): + self._tags.update(tags) + + def register(self, name, instrument): + self._instruments[name] = instrument + + def save(self, name=None): + metrics = list( + self._get_metrics( + self._instruments.keys() if name is None else (name,) + ) + ) + if not metrics: + return + self._post_metrics(metrics) + + def _post_metrics(self, metrics): + if self._session is None: + self._session = requests.Session() + self._session.headers.update( + { + "Content-Type": "application/json", + "User-Agent": "Zenoss Service Metrics", + } + ) + post_data = {"metrics": metrics} + log.debug("sending metric payload: %s", post_data) + response = self._session.post(self._url, data=json.dumps(post_data)) + if response.status_code != 200: + log.warning( + "problem submitting metrics: %d, %s", + response.status_code, + response.text.replace("\n", "\\n"), + ) + self._session = None + else: + log.debug("%d metrics posted", len(metrics)) + + def _get_metrics(self, names): + for name in names: + instrument = self._instruments.get(name) + data = getMetricData(instrument, name, self._tags, self._prefix) + if data: + for metric in data: + yield metric diff --git a/Products/ZenHub/PBDaemon.py b/Products/ZenHub/PBDaemon.py index fa43bcbf90..b9b724b4c8 100644 --- a/Products/ZenHub/PBDaemon.py +++ b/Products/ZenHub/PBDaemon.py @@ -195,6 +195,7 @@ def generateEvent(self, event, **kw): eventCopy["agent"] = self.name eventCopy["monitor"] = self.options.monitor + eventCopy["manager"] = self.fqdn return eventCopy def publisher(self): diff --git a/Products/ZenHub/server/executors/tests/test_workers.py b/Products/ZenHub/server/executors/tests/test_workers.py index 4ebc1d0592..af774a595b 100644 --- a/Products/ZenHub/server/executors/tests/test_workers.py +++ b/Products/ZenHub/server/executors/tests/test_workers.py @@ -408,7 +408,7 @@ def setUp(self): def test__handle_start_first_attempt(self): task = Mock(spec=["attempt", "started", "call"]) task.attempt = 0 - workerId = 1 + workerId = "default_1" self.running._handle_start(task, workerId) @@ -423,7 +423,7 @@ def test__handle_start_first_attempt(self): def test__handle_start_later_attempts(self): task = Mock(spec=["attempt", "started", "call"]) task.attempt = 1 - workerId = 1 + workerId = "default_1" self.running._handle_start(task, workerId) @@ -729,7 +729,10 @@ def test_execute_unexpected_error(self): worker = Mock(spec=["workerId", "run"]) worker.run.side_effect = error - task = Mock(spec=["call", "retryable", "attempt", "priority"]) + task = Mock( + spec=["call", "retryable", "attempt", "priority", "__slots__"] + ) + task.__slots__ = ("workerId",) task.retryable = True handler = Mock() @@ -753,7 +756,9 @@ def test_execute_unexpected_error(self): ) self.workers.layoff.assert_called_once_with(worker) self.logger.exception.assert_called_once_with( - "Unexpected failure worklist=%s", "default" + "Unexpected failure worklist=%s, task details: %r", + "default", + {"workerId": None}, ) self.logger.error.assert_not_called() diff --git a/Products/ZenHub/server/executors/workers.py b/Products/ZenHub/server/executors/workers.py index 43aacfc96d..fdcee41a3a 100644 --- a/Products/ZenHub/server/executors/workers.py +++ b/Products/ZenHub/server/executors/workers.py @@ -228,8 +228,8 @@ def execute(self, worker, task): ) self._handle_retry(task, ex) except Exception as ex: + self.log.exception("Unexpected failure worklist=%s, task details: %r", self.name, {f: getattr(task, f, None) for f in task.__slots__}) self._handle_error(task, ex) - self.log.exception("Unexpected failure worklist=%s", self.name) finally: # if the task is retryable, push the task # to the front of its queue. @@ -443,7 +443,14 @@ def completed( return ServiceCallCompleted(**data) def failure(self, error): - self.deferred.errback(error) + try: + self.deferred.errback(error) + except Exception as ex: + getLogger(self).exception("Unexpected exception in deferred.errback for task %s with original error %r", self.desc, error) def success(self, result): - self.deferred.callback(result) + try: + self.deferred.callback(result) + except Exception as ex: + getLogger(self).exception("Unexpected exception in deferred.callback for task %s with original result %r", self.desc, result) + diff --git a/Products/ZenHub/tests/test_PBDaemon.py b/Products/ZenHub/tests/test_PBDaemon.py index 3bc2ba3d12..fb6bf5a641 100644 --- a/Products/ZenHub/tests/test_PBDaemon.py +++ b/Products/ZenHub/tests/test_PBDaemon.py @@ -211,6 +211,7 @@ def setUp(t): t.name = "pb_daemon_name" t.pbd = PBDaemon(name=t.name) + t.pbd.fqdn = "fqdn" # Mock out 'log' to prevent spurious output to stdout. t.pbd.log = Mock(spec=logging.getLoggerClass()) @@ -447,6 +448,7 @@ def test_generateEvent(t): "name": "event", "newkey": "newkey", "agent": t.pbd.name, + "manager": t.pbd.fqdn, "monitor": t.pbd.options.monitor, }, ) diff --git a/Products/ZenHub/tests/test_zenhubworker.py b/Products/ZenHub/tests/test_zenhubworker.py index 3091da5fff..87c0500720 100644 --- a/Products/ZenHub/tests/test_zenhubworker.py +++ b/Products/ZenHub/tests/test_zenhubworker.py @@ -499,11 +499,18 @@ def test_is_connected_true(t): t.assertTrue(t.zhc.is_connected) @patch.object(ZenHubClient, "_ZenHubClient__prepForConnection") - def test_start(t, prepForConnection): + def test_start_while_stopping(t, prepForConnection): t.zhc._ZenHubClient__stopping = True t.zhc.start() + t.assertTrue(t.zhc._ZenHubClient__stopping) + t.ClientService.assert_not_called() + + @patch.object(ZenHubClient, "_ZenHubClient__prepForConnection") + def test_start(t, prepForConnection): + t.zhc.start() + t.assertFalse(t.zhc._ZenHubClient__stopping) t.backoffPolicy.assert_called_once_with(initialDelay=0.5, factor=3.0) t.ClientService.assert_called_once_with( diff --git a/Products/ZenHub/zenhubworker.py b/Products/ZenHub/zenhubworker.py index 05494aa7c8..7146f78031 100755 --- a/Products/ZenHub/zenhubworker.py +++ b/Products/ZenHub/zenhubworker.py @@ -475,7 +475,8 @@ def is_connected(self): def start(self): """Start connecting to ZenHub.""" - self.__stopping = False + if self.__stopping: + return factory = ZenPBClientFactory() self.__service = ClientService( self.__endpoint, diff --git a/Products/ZenModel/ZenPack.py b/Products/ZenModel/ZenPack.py index 054b54e2c5..3917daab0c 100644 --- a/Products/ZenModel/ZenPack.py +++ b/Products/ZenModel/ZenPack.py @@ -494,7 +494,7 @@ def migrate(self, previousVersion=None): try: for instance in instances: - if instance.version >= migrateCutoff: + if instance.version.tuple() >= migrateCutoff: recover.append(instance) instance.migrate(self) except Exception as ex: diff --git a/Products/ZenModel/ZenPacker.py b/Products/ZenModel/ZenPacker.py index 63c630576c..0c2bcee69e 100644 --- a/Products/ZenModel/ZenPacker.py +++ b/Products/ZenModel/ZenPacker.py @@ -63,6 +63,8 @@ def findObject(self, id): pass if len(result) == 0: try: + if isinstance(id, unicode): + id = id.encode('utf-8') result.append(self.dmd.unrestrictedTraverse(id)) except KeyError: pass diff --git a/Products/ZenModel/ZenossInfo.py b/Products/ZenModel/ZenossInfo.py index 3a86117887..1164ff08ee 100644 --- a/Products/ZenModel/ZenossInfo.py +++ b/Products/ZenModel/ZenossInfo.py @@ -20,7 +20,6 @@ import shutil import traceback import logging -import commands from AccessControl.class_init import InitializeClass from OFS.SimpleItem import SimpleItem @@ -307,19 +306,6 @@ def getRabbitMQVersion(self): from Products.ZenUtils.qverify import ZenAmqp return Version.parse("RabbitMQ %s" % ZenAmqp().getVersion()) - @versionmeta("Erlang", "http://www.erlang.org/") - def getErlangVersion(self): - retVal, output = commands.getstatusoutput('erl -noshell +V') - version = None - - if not retVal: - try: - version = re.findall(r'version (\S+)', output)[0] - except Exception: - pass - - return Version.parse("Erlang %s" % version) - def getAllVersions(self): """ Return a list of version numbers for currently tracked component @@ -333,7 +319,6 @@ def getAllVersions(self): self.getMySQLVersion, self.getTwistedVersion, self.getRabbitMQVersion, - self.getErlangVersion, self.getNetSnmpVersion, self.getPyNetSnmpVersion, self.getWmiVersion, diff --git a/Products/ZenModel/data/devices.xml b/Products/ZenModel/data/devices.xml index 0b49a7897f..bafc8e2063 100644 --- a/Products/ZenModel/data/devices.xml +++ b/Products/ZenModel/data/devices.xml @@ -1,14 +1,14 @@ - + [] diff --git a/Products/ZenModel/data/events.xml b/Products/ZenModel/data/events.xml index 396f4fbdf1..8d5a9a1437 100644 --- a/Products/ZenModel/data/events.xml +++ b/Products/ZenModel/data/events.xml @@ -1,14 +1,14 @@ - + True diff --git a/Products/ZenModel/data/manufacturers.xml b/Products/ZenModel/data/manufacturers.xml index 04b1205fe4..0b296fed09 100644 --- a/Products/ZenModel/data/manufacturers.xml +++ b/Products/ZenModel/data/manufacturers.xml @@ -1,14 +1,14 @@ - + diff --git a/Products/ZenModel/data/monitorTemplate.xml b/Products/ZenModel/data/monitorTemplate.xml index 8aaf4d7c7c..a63d9aac9f 100644 --- a/Products/ZenModel/data/monitorTemplate.xml +++ b/Products/ZenModel/data/monitorTemplate.xml @@ -1,14 +1,14 @@ - + MonitorClass diff --git a/Products/ZenModel/data/osprocesses.xml b/Products/ZenModel/data/osprocesses.xml index cf42c9a7eb..73538a2621 100644 --- a/Products/ZenModel/data/osprocesses.xml +++ b/Products/ZenModel/data/osprocesses.xml @@ -1,14 +1,14 @@ - + Base Zenoss daemons diff --git a/Products/ZenModel/data/services.xml b/Products/ZenModel/data/services.xml index bfe6a6cde4..bad5802696 100644 --- a/Products/ZenModel/data/services.xml +++ b/Products/ZenModel/data/services.xml @@ -1,14 +1,14 @@ - + False diff --git a/Products/ZenModel/data/zodb.sql.gz b/Products/ZenModel/data/zodb.sql.gz index 8146c54f9a..a4738a7e0b 100644 Binary files a/Products/ZenModel/data/zodb.sql.gz and b/Products/ZenModel/data/zodb.sql.gz differ diff --git a/Products/ZenStatus/ping/CmdPingTask.py b/Products/ZenStatus/ping/CmdPingTask.py index fad7f149e2..0c2e65e0d1 100644 --- a/Products/ZenStatus/ping/CmdPingTask.py +++ b/Products/ZenStatus/ping/CmdPingTask.py @@ -52,7 +52,7 @@ def _detectPing(): log.info("ping6 not found in path") _PING_ARG_TEMPLATE = ( - "%(ping)s -n -s %(datalength)d -c 1 -t %(ttl)d -w %(timeout)f %(ip)s" + "%(ping)s -n -s %(datalength)d -c 1 -t %(ttl)d -w %(timeout)d %(ip)s" ) import platform @@ -61,7 +61,7 @@ def _detectPing(): log.info("Mac OS X detected; adjusting ping args.") _PING_ARG_TEMPLATE = ( "%(ping)s -n -s %(datalength)d -c 1 " - "-m %(ttl)d -t %(timeout)f %(ip)s" + "-m %(ttl)d -t %(timeout)d %(ip)s" ) elif system != "Linux": log.info( @@ -150,7 +150,7 @@ def _pingIp(self): ip=self.config.ip, version=self.config.ipVersion, ttl=64, - timeout=float(self._preferences.pingTimeOut), + timeout=int(self._preferences.pingTimeOut), datalength=self._daemon.options.dataLength if self._daemon.options.dataLength > 16 else 16, diff --git a/Products/ZenUI3/browser/modelapi/configure.zcml b/Products/ZenUI3/browser/modelapi/configure.zcml index f5fe2ad9d8..14ac6a8ffd 100644 --- a/Products/ZenUI3/browser/modelapi/configure.zcml +++ b/Products/ZenUI3/browser/modelapi/configure.zcml @@ -136,4 +136,11 @@ permission="zenoss.Common" /> + + diff --git a/Products/ZenUI3/browser/modelapi/modelapi.py b/Products/ZenUI3/browser/modelapi/modelapi.py index 490a41bb24..50ac3da943 100644 --- a/Products/ZenUI3/browser/modelapi/modelapi.py +++ b/Products/ZenUI3/browser/modelapi/modelapi.py @@ -403,3 +403,13 @@ def _services(self): ('configCacheBuilders', 'builder'), ('configCacheManagers', 'manager'), ) + +class ZenjobsMonitor(BaseApiView): + """ + This view emits info for zenjobs-monitor + """ + @property + def _services(self): + return ( + ('zenjobsMonitors', 'zenjobs-monitor'), + ) diff --git a/Products/ZenUI3/browser/resources/js/zenoss/EventPanels.js b/Products/ZenUI3/browser/resources/js/zenoss/EventPanels.js index b223979302..0555a4cfe4 100644 --- a/Products/ZenUI3/browser/resources/js/zenoss/EventPanels.js +++ b/Products/ZenUI3/browser/resources/js/zenoss/EventPanels.js @@ -1711,6 +1711,7 @@ return val; }, refresh: function() { + this.getStore().reload(); this.callParent(arguments); this.fireEvent('eventgridrefresh', this); } diff --git a/Products/ZenUI3/browser/resources/js/zenoss/itinfrastructure.js b/Products/ZenUI3/browser/resources/js/zenoss/itinfrastructure.js index ce1ba28d97..6fd26594f3 100644 --- a/Products/ZenUI3/browser/resources/js/zenoss/itinfrastructure.js +++ b/Products/ZenUI3/browser/resources/js/zenoss/itinfrastructure.js @@ -1665,6 +1665,7 @@ Ext.onReady(function () { handler: function () { var grid = Ext.getCmp('device_grid'); if (grid.isVisible(true)) { + grid.getStore().reload(); grid.refresh(); Ext.getCmp('organizer_events').refresh(); refreshTreePanel(); diff --git a/Products/ZenUtils/DaemonStats.py b/Products/ZenUtils/DaemonStats.py index 687264db8d..3f5193bb06 100644 --- a/Products/ZenUtils/DaemonStats.py +++ b/Products/ZenUtils/DaemonStats.py @@ -8,7 +8,8 @@ ############################################################################## import time -import os + +from .controlplane import configuration as cc_config class DaemonStats(object): @@ -22,9 +23,18 @@ def __init__(self): self.metric_writer = None self._threshold_notifier = None self._derivative_tracker = None - self._service_id = None - self._tenant_id = None - self._instance_id = None + self._ctx_id = None + self._ctx_key = None + + tags = {"internal": True} + # Only capture the control center variables that have a value. + if cc_config.service_id: + tags["serviceId"] = cc_config.service_id + if cc_config.tenant_id: + tags["tenantId"] = cc_config.tenant_id + if cc_config.instance_id: + tags["instance"] = cc_config.instance_id + self._common_tags = tags def config( self, @@ -48,35 +58,16 @@ def config( self._threshold_notifier = threshold_notifier self._derivative_tracker = derivative_tracker - # when running inside control plane pull the service id from the - # environment. - if os.environ.get("CONTROLPLANE", "0") == "1": - self._tenant_id = os.environ.get("CONTROLPLANE_TENANT_ID") - self._service_id = os.environ.get("CONTROLPLANE_SERVICE_ID") - self._instance_id = os.environ.get("CONTROLPLANE_INSTANCE_ID") - - def _context_id(self): - return self.name + "-" + self.monitor + # Update the common tags + self._common_tags.update({"daemon": name, "monitor": monitor}) - def _contextKey(self): - return "/".join(("Daemons", self.monitor)) + # evaluate identifiers once + self._ctx_id = name + "-" + monitor + self._ctx_key = "/".join(("Daemons", monitor)) def _tags(self, metric_type): - tags = { - "daemon": self.name, - "monitor": self.monitor, - "metricType": metric_type, - "internal": True, - } - if self._service_id: - tags["serviceId"] = self._service_id - - if self._tenant_id: - tags["tenantId"] = self._tenant_id - - if self._instance_id: - tags["instance"] = self._instance_id - + tags = self._common_tags.copy() + tags["metricType"] = metric_type return tags def derive(self, name, value): @@ -95,7 +86,6 @@ def post_metrics(self, name, value, metric_type): tags = self._tags(metric_type) timestamp = time.time() - context_id = self._context_id() if metric_type in {"DERIVE", "COUNTER"}: # compute (and cache) a rate for COUNTER/DERIVE if metric_type == "COUNTER": @@ -104,7 +94,7 @@ def post_metrics(self, name, value, metric_type): metric_min = "U" value = self._derivative_tracker.derivative( - "%s:%s" % (context_id, name), + "%s:%s" % (self._ctx_id, name), (float(value), timestamp), min=metric_min, ) @@ -113,8 +103,8 @@ def post_metrics(self, name, value, metric_type): self._metric_writer.write_metric(name, value, timestamp, tags) # check for threshold breaches and send events when needed self._threshold_notifier.notify( - self._contextKey(), - context_id, + self._ctx_key, + self._ctx_id, self.name + "_" + name, timestamp, value, diff --git a/Products/ZenUtils/MetricReporter.py b/Products/ZenUtils/MetricReporter.py index b01c490f06..7bd485f31a 100644 --- a/Products/ZenUtils/MetricReporter.py +++ b/Products/ZenUtils/MetricReporter.py @@ -1,24 +1,23 @@ ############################################################################## -# +# # Copyright (C) Zenoss, Inc. 2017, all rights reserved. -# +# # This content is made available according to terms specified in # License.zenoss under the directory where your Zenoss product is installed. -# +# ############################################################################## import json +import inspect import logging -import os -import requests import time -from twisted.internet import reactor, defer, task -from metrology.registry import registry -from astrolabe.interval import Interval -from itertools import izip from collections import deque +from itertools import izip +import requests + +from astrolabe.interval import Interval from metrology.instruments import ( Counter, Gauge, @@ -27,99 +26,79 @@ Timer, UtilizationTimer, ) - +from metrology.registry import registry from metrology.reporter.base import Reporter +from twisted.internet import reactor, defer, task -log = logging.getLogger("zen.metricreporter") - - -class TimeOnce(object): - """ - a simple context manager to time something and save tag values and - a measurement. - """ - def __init__(self, gauge, *args): - self.gauge = gauge - self.tagValues = args - def __enter__(self): - self.interval = Interval.now() - def __exit__(self, *args): - self.gauge.update(self.tagValues, self.interval.stop()) +from .controlplane import configuration as cc_config +DEFAULT_METRIC_URL = "http://localhost:22350/api/metrics/store" -class QueueGauge(object): - """ - This instrument contains simple point-in-time measurements like a gauge. - Unlike a gauge, however, it: - - can be configured to have tags whose values can vary with each measurement - - contains a queue of values with tag values, which are read only once each - Many values or none can be written to this instrument between cycles of its - reporter, so for it many or no values will be published. - Calling an instance returns something which should append to the instances - queue a tuple, which should contain 1 value for each tagKey of the instance, - followed by a measurement. - """ - def __init__(self, *args): - self.newContextManager = args[0] if callable(args[0]) else TimeOnce - self.tagKeys = args if not callable(args[0]) else args[1:] - self.queue = deque() - def __call__(self, *args): - if len(self.tagKeys) != len(args): - raise RuntimeError('The number of tag values provided does not match the number of configured tag keys') - return self.newContextManager(self, *args) - def update(self, tagValues, metricValue): - self.queue.appendleft(tagValues + (metricValue,)) +log = logging.getLogger("zen.metricreporter") class MetricReporter(Reporter): def __init__(self, **options): - super(MetricReporter, self).__init__(interval=30) - self.prefix = options.get('prefix', "") - self.metric_destination = os.environ.get("CONTROLPLANE_CONSUMER_URL", "") - if self.metric_destination == "": - self.metric_destination = "http://localhost:22350/api/metrics/store" + interval = options.get("interval", 30) + super(MetricReporter, self).__init__(interval=interval) + self.prefix = options.get("prefix", "") + self.metric_destination = cc_config.consumer_url + if not self.metric_destination: + self.metric_destination = DEFAULT_METRIC_URL self.session = None - self.tags = None - self.tags = { - 'serviceId': os.environ.get('CONTROLPLANE_SERVICE_ID', ''), - 'instance': os.environ.get('CONTROLPLANE_INSTANCE_ID', ''), - 'hostId': os.environ.get('CONTROLPLANE_HOST_ID', ''), - 'tenantId': os.environ.get('CONTROLPLANE_TENANT_ID', ''), - } + self.tags = dict(options.get("tags", {})) + self.tags.update( + { + "serviceId": cc_config.service_id, + "instance": cc_config.instance_id, + "hostId": cc_config.host_id, + "tenantId": cc_config.tenant_id, + } + ) + + def add_tags(self, tags): + self.tags.update(tags) + # @override def write(self): self._write() def _write(self): - metrics = getMetrics(self.registry, self.tags, self.prefix) try: + metrics = getMetrics(self.registry, self.tags, self.prefix) self.postMetrics(metrics) except Exception as e: - log.error(e) + log.exception(e) def postMetrics(self, metrics): if not self.session: self.session = requests.Session() - self.session.headers.update({'Content-Type': 'application/json'}) - self.session.headers.update({'User-Agent': 'Zenoss Service Metrics'}) - post_data = {'metrics': metrics} - log.debug("Sending metric payload: %s", post_data) - response = self.session.post(self.metric_destination, - data=json.dumps(post_data)) + self.session.headers.update({"Content-Type": "application/json"}) + self.session.headers.update({"User-Agent": "Zenoss Service Metrics"}) + post_data = {"metrics": metrics} + log.debug("sending metric payload: %s", post_data) + response = self.session.post( + self.metric_destination, data=json.dumps(post_data) + ) if response.status_code != 200: - log.warning("Problem submitting metrics: %d, %s", - response.status_code, response.text.replace('\n', '\\n')) + log.warning( + "problem submitting metrics: %d, %s", + response.status_code, + response.text.replace("\n", "\\n"), + ) self.session = None else: - log.debug("%d Metrics posted", len(metrics)) + log.debug("%d metrics posted", len(metrics)) class TwistedMetricReporter(object): - def __init__(self, interval=30, metricWriter=None, tags={}, *args, **options): + def __init__( + self, interval=30, metricWriter=None, tags={}, *args, **options + ): super(TwistedMetricReporter, self).__init__() - self.registry = options.get('registry', registry) - self.prefix = options.get('prefix', "") + self.registry = options.get("registry", registry) + self.prefix = options.get("prefix", "") self.metricWriter = metricWriter self.interval = interval self.tags = {} @@ -132,7 +111,7 @@ def doStart(): self._loop.start(self.interval, now=False) reactor.callWhenRunning(doStart) - reactor.addSystemEventTrigger('before', 'shutdown', self.stop) + reactor.addSystemEventTrigger("before", "shutdown", self.stop) @defer.inlineCallbacks def stop(self): @@ -145,47 +124,105 @@ def stop(self): def postMetrics(self): try: for metric in getMetrics(self.registry, self.tags, self.prefix): - yield self.metricWriter.write_metric(metric['metric'], metric['value'], metric['timestamp'], - metric['tags']) + yield self.metricWriter.write_metric( + metric["metric"], + metric["value"], + metric["timestamp"], + metric["tags"], + ) except Exception: log.exception("Error writing metrics") +class TimeOnce(object): + """ + A context manager to time something and save tag values and + a measurement. + """ + + def __init__(self, gauge, *args): + self.gauge = gauge + self.tagValues = args + + def __enter__(self): + self.interval = Interval.now() + + def __exit__(self, *args): + self.gauge.update(self.tagValues, self.interval.stop()) + + +class QueueGauge(object): + """ + This instrument contains simple point-in-time measurements like a gauge. + + Unlike a gauge, however, it: + - can be configured to have tags whose values can vary with each + measurement. + - contains a queue of values with tag values, which are read only + once each. + + Many values or none can be written to this instrument between cycles of its + reporter, so for it many or no values will be published. + + Calling an instance returns something which should append to the instances + queue a tuple, which should contain 1 value for each tagKey of the + instance, followed by a measurement. + """ + + def __init__(self, *args): + self.newContextManager = args[0] if callable(args[0]) else TimeOnce + self.tagKeys = args if not callable(args[0]) else args[1:] + self.queue = deque() + + def __call__(self, *args): + if len(self.tagKeys) != len(args): + raise RuntimeError( + "The number of tag values provided does not match the " + "number of configured tag keys" + ) + return self.newContextManager(self, *args) + + def update(self, tagValues, metricValue): + self.queue.appendleft(tagValues + (metricValue,)) + + def getMetrics(mRegistry, tags, prefix): metrics = [] - snapshot_keys = ['median', 'percentile_95th'] for name, metric in mRegistry: log.debug("metric info: %s, %s", name, metric) - if isinstance(metric, Meter): - keys = ['count', 'one_minute_rate', 'five_minute_rate', - 'fifteen_minute_rate', 'mean_rate'] - metrics.extend(log_metric(name, metric, keys, tags, prefix)) - if isinstance(metric, Gauge): - keys = ['value'] - metrics.extend(log_metric(name, metric, keys, tags, prefix)) - if isinstance(metric, QueueGauge): - metrics.extend(log_queue_gauge(name, metric, tags, prefix)) - if isinstance(metric, UtilizationTimer): - keys = ['count', 'one_minute_rate', 'five_minute_rate', - 'fifteen_minute_rate', 'mean_rate', 'min', 'max', - 'mean', 'stddev', 'one_minute_utilization', - 'five_minute_utilization', 'fifteen_minute_utilization', - 'mean_utilization'] - metrics.extend(log_metric(name, metric, keys, tags, prefix, snapshot_keys)) - if isinstance(metric, Timer): - keys = ['count', 'one_minute_rate', 'five_minute_rate', - 'fifteen_minute_rate', 'mean_rate', 'min', 'max', 'mean', - 'stddev'] - metrics.extend(log_metric(name, metric, keys, tags, prefix, snapshot_keys)) - if isinstance(metric, Counter): - keys = ['count'] - metrics.extend(log_metric(name, metric, keys, tags, prefix)) - if isinstance(metric, Histogram): - keys = ['count', 'min', 'max', 'mean', 'stddev'] - metrics.extend(log_metric(name, metric, keys, tags, prefix, snapshot_keys)) + metrics.extend(getMetricData(metric, name, tags, prefix)) return metrics -def log_queue_gauge(name, metric, tags, prefix): + +def getMetric(mRegistry, name, tags, prefix): + if name not in mRegistry: + log.info("%s not found in metric registry", name) + return [] + metric = mRegistry.get(name) + return getMetricData(metric, name, tags, prefix) + + +_snapshot_keys = ["median", "percentile_95th"] + + +def getMetricData(metric, name, tags, prefix): + config = _getMetricConfig(metric) + if config is None: + log.info("could not generate a config for metric %s", name) + return [] + fn = config["fn"] + keys = config["keys"] + return fn(name, metric, keys, tags, prefix, _snapshot_keys) + +def _getMetricConfig(metric): + keys = (_classname(cls) for cls in inspect.getmro(metric.__class__)) + return next( + (_metric_configs.get(key) for key in keys if key in _metric_configs), + None, + ) + + +def _log_queue_gauge(name, metric, tags, prefix): """ A QueueGauge needs this unique handler because it does not contain a fixed number of values. @@ -201,16 +238,26 @@ def log_queue_gauge(name, metric, tags, prefix): stat = metric.queue.pop() qtags = tags.copy() qtags.update(izip(metric.tagKeys, stat)) - results.append({"metric": whole_metric_name, - "value": stat[-1], - "timestamp": ts, - "tags": qtags}) + results.append( + { + "metric": whole_metric_name, + "value": stat[-1], + "timestamp": ts, + "tags": qtags, + } + ) + log.debug( + "recording metric metric=%s value=%s", + whole_metric_name, + stat[-1], + ) except Exception as e: log.error(e) return results -def log_metric(name, metric, keys, tags, prefix, snapshot_keys=None): + +def _log_metric(name, metric, keys, tags, prefix, snapshot_keys=None): results = [] if snapshot_keys is None: @@ -221,19 +268,106 @@ def log_metric(name, metric, keys, tags, prefix, snapshot_keys=None): try: for stat in keys: whole_metric_name = "%s.%s" % (metric_name, stat) - results.append({"metric": whole_metric_name, - "value": getattr(metric, stat), - "timestamp": ts, - "tags": tags}) + results.append( + { + "metric": whole_metric_name, + "value": getattr(metric, stat), + "timestamp": ts, + "tags": tags, + } + ) + log.debug( + "recording metric metric=%s value=%s", + whole_metric_name, + getattr(metric, stat), + ) - if hasattr(metric, 'snapshot'): + if hasattr(metric, "snapshot"): snapshot = metric.snapshot for stat in snapshot_keys: whole_metric_name = "%s.%s" % (metric_name, stat) - results.append({"metric": whole_metric_name, - "value": getattr(snapshot, stat), - "timestamp": ts, - "tags": tags}) + results.append( + { + "metric": whole_metric_name, + "value": getattr(snapshot, stat), + "timestamp": ts, + "tags": tags, + } + ) + log.debug( + "recording metric metric=%s value=%s", + whole_metric_name, + getattr(snapshot, stat), + ) except Exception as e: log.error(e) return results + + +def _log_without_snapshot(name, metric, keys, tags, prefix, snapshot_keys): + return _log_metric(name, metric, keys, tags, prefix) + + +def _classname(obj): + return obj.__name__ if isinstance(obj, type) else type(obj).__name__ + + +_metric_configs = { + _classname(Meter): { + "fn": _log_without_snapshot, + "keys": [ + "count", + "one_minute_rate", + "five_minute_rate", + "fifteen_minute_rate", + "mean_rate", + ], + }, + _classname(Gauge): {"fn": _log_without_snapshot, "keys": ["value"]}, + _classname(QueueGauge): { + "fn": lambda name, metric, _, tags, prefix: _log_queue_gauge( + name, metric, tags, prefix + ), + "keys": [], + }, + _classname(UtilizationTimer): { + "fn": _log_metric, + "keys": [ + "count", + "one_minute_rate", + "five_minute_rate", + "fifteen_minute_rate", + "mean_rate", + "min", + "max", + "mean", + "stddev", + "one_minute_utilization", + "five_minute_utilization", + "fifteen_minute_utilization", + "mean_utilization", + ], + }, + _classname(Timer): { + "fn": _log_metric, + "keys": [ + "count", + "one_minute_rate", + "five_minute_rate", + "fifteen_minute_rate", + "mean_rate", + "min", + "max", + "mean", + "stddev", + ], + }, + _classname(Counter): { + "fn": _log_without_snapshot, + "keys": ["count"], + }, + _classname(Histogram): { + "fn": _log_metric, + "keys": ["count", "min", "max", "mean", "stddev"], + }, +} diff --git a/Products/ZenUtils/config.py b/Products/ZenUtils/config.py index 8c42e44449..859874f819 100644 --- a/Products/ZenUtils/config.py +++ b/Products/ZenUtils/config.py @@ -28,6 +28,8 @@ import re +import six + class ConfigError(Exception): """Error for problems parsing config files.""" @@ -237,6 +239,7 @@ class InvalidLine(ConfigLine): Default line if no other ConfigLines matched. Assumed to be invalid input. """ + @property def setting(self): return None @@ -360,22 +363,19 @@ def items(self): yield line.setting -class Parser(object): - def __call__(self, file): - configFile = ConfigFile(file) - configFile.validate() - return configFile.items() +def _parse(file): + configFile = ConfigFile(file) + configFile.validate() + return configFile.items() class ConfigLoader(object): """Lazily load the config when requested.""" - def __init__(self, config_files, config=Config, parser=Parser()): + def __init__(self, config_files, config=Config): """ :param config Config The config instance or class to load data into. Must support update which accepts an iterable of (key, value). - :param parser Parser The parser to use to parse the config files. - Must be a callable and return an iterable of (key, value). :param config_files list A list of config file names to parse in order. """ @@ -383,7 +383,6 @@ def __init__(self, config_files, config=Config, parser=Parser()): config_files = [config_files] self.config_files = config_files - self.parser = parser self.config = config self._config = None @@ -398,14 +397,14 @@ def load(self): raise ConfigError("Config loader has no config files to load.") for file in self.config_files: - if not hasattr(file, "read") and isinstance(file, basestring): - # Look like a file name, open it + if isinstance(file, six.string_types): + # It's a string, so open it first with open(file, "r") as fp: - options = self.parser(fp) + options = _parse(fp) else: - options = self.parser(file) + # Assume it's an open file + options = _parse(file) - # self._config.update(options) for k, v in options: self._config[k] = v diff --git a/Products/ZenUtils/controlplane/__init__.py b/Products/ZenUtils/controlplane/__init__.py index e05650c31c..cf719a25cf 100644 --- a/Products/ZenUtils/controlplane/__init__.py +++ b/Products/ZenUtils/controlplane/__init__.py @@ -7,12 +7,26 @@ # ############################################################################## -from .data import * -from .client import * -from servicetree import ServiceTree -from Products.ZenUtils.GlobalConfig import globalConfToDict +from __future__ import absolute_import, print_function + import os +from Products.ZenUtils.GlobalConfig import globalConfToDict + +from .data import ( + Host, + HostFactory, + ServiceDefinition, + ServiceDefinitionFactory, + ServiceInstance, + ServiceInstanceFactory, + ServiceJsonDecoder, + ServiceJsonEncoder, +) +from .client import ControlPlaneClient, ControlCenterError +from .environment import configuration +from .servicetree import ServiceTree + def getConnectionSettings(options=None): if options is None: @@ -22,10 +36,29 @@ def getConnectionSettings(options=None): settings = { "user": o.get("controlplane-user", "zenoss"), "password": o.get("controlplane-password", "zenoss"), - } + } # allow these to be set from the global.conf for development but # give preference to the environment variables - settings["user"] = os.environ.get('CONTROLPLANE_SYSTEM_USER', settings['user']) - settings["password"] = os.environ.get('CONTROLPLANE_SYSTEM_PASSWORD', settings['password']) + settings["user"] = os.environ.get( + "CONTROLPLANE_SYSTEM_USER", settings["user"] + ) + settings["password"] = os.environ.get( + "CONTROLPLANE_SYSTEM_PASSWORD", settings["password"] + ) return settings + +__all__ = ( + "ControlCenterError", + "ControlPlaneClient", + "Host", + "HostFactory", + "ServiceDefinition", + "ServiceDefinitionFactory", + "ServiceInstance", + "ServiceInstanceFactory", + "ServiceJsonDecoder", + "ServiceJsonEncoder", + "ServiceTree", + "configuration", +) diff --git a/Products/ZenUtils/controlplane/application.py b/Products/ZenUtils/controlplane/application.py index 7e74d79692..6396681c92 100644 --- a/Products/ZenUtils/controlplane/application.py +++ b/Products/ZenUtils/controlplane/application.py @@ -12,43 +12,47 @@ """ import logging -import os import re import time + +from collections import Sequence, Iterator from fnmatch import fnmatch from functools import wraps -from Products.ZenUtils.controlplane import getConnectionSettings -from collections import Sequence, Iterator from zope.interface import implementer +import six + from Products.ZenUtils.application import ( - IApplicationManager, IApplication, - IApplicationLog, IApplicationConfiguration + IApplication, + IApplicationConfiguration, + IApplicationLog, + IApplicationManager, ) +from Products.ZenUtils.controlplane import getConnectionSettings from .client import ControlPlaneClient, ControlCenterError +from .environment import configuration as cc_config from .runstates import RunStates LOG = logging.getLogger("zen.controlplane") -_TENANT_ID_ENV = "CONTROLPLANE_TENANT_ID" MEM_MULTIPLIER = { - 'k':1024, - 'm':1024*1024, - 'g':1024*1024*1024, - 't':1024*1024*1024*1024 + "k": 1024, + "m": 1024 * 1024, + "g": 1024 * 1024 * 1024, + "t": 1024 * 1024 * 1024 * 1024, } def getTenantId(): - """Returns the tenant ID from the environment. - """ - tid = os.environ.get(_TENANT_ID_ENV) - if tid is None: + """Returns the tenant ID from the environment.""" + tid = cc_config.tenant_id + if not tid: LOG.error( "ERROR: Could not determine the tenantID from the environment" ) + return None return tid @@ -58,21 +62,21 @@ def _search(services, params): """ if "name" in params: namepat = params["name"] - services = ( - svc for svc in services if fnmatch(svc.name, namepat) - ) + services = (svc for svc in services if fnmatch(svc.name, namepat)) if "tags" in params: tags = set(params["tags"]) - includes = set(t for t in tags if not t.startswith('-')) - excludes = set(t[1:] for t in tags if t.startswith('-')) + includes = set(t for t in tags if not t.startswith("-")) + excludes = set(t[1:] for t in tags if t.startswith("-")) if includes: services = ( - svc for svc in services + svc + for svc in services if svc.tags and (set(svc.tags) & includes == includes) ) if excludes: services = ( - svc for svc in services + svc + for svc in services if not svc.tags or excludes.isdisjoint(set(svc.tags)) ) return services @@ -91,9 +95,9 @@ def _search(services, params): # removed services and/or implement this cache using redis or memcached (or # something else equivalent) and have it shared among all the Zopes. + class _Cache(object): - """Cache for ServiceDefinition objects. - """ + """Cache for ServiceDefinition objects.""" def __init__(self, client, ttl=60): """Initialize an instance of _Cache. @@ -107,8 +111,7 @@ def __init__(self, client, ttl=60): self._ttl = ttl def _load(self): - """Load all the data into the cache. - """ + """Load all the data into the cache.""" tenant_id = getTenantId() if tenant_id is None: self._data = None @@ -118,8 +121,7 @@ def _load(self): self._lastUpdate = time.time() def _refresh(self): - """Update the cache with changes. - """ + """Update the cache with changes.""" since = int((time.time() - self._lastUpdate) * 1000) # No refresh if no time has elapsed since the last update if since == 0: @@ -131,9 +133,11 @@ def _refresh(self): for changed_svc in changes: idx = next( ( - idx for idx, svc in enumerate(self._data) + idx + for idx, svc in enumerate(self._data) if svc.id == changed_svc.id - ), None + ), + None, ) if idx is not None: # Update existing service in cache @@ -143,8 +147,7 @@ def _refresh(self): self._data.append(changed_svc) def get(self): - """Return the cached data. - """ + """Return the cached data.""" if not self.__nonzero__(): self._load() else: @@ -152,14 +155,12 @@ def get(self): return self._data def clear(self): - """Clear the cache. - """ + """Clear the cache.""" self._data = None self._lastUpdate = 0 def __nonzero__(self): - """Return True if there is cached data. - """ + """Return True if there is cached data.""" age = int(time.time() - self._lastUpdate) return age < self._ttl and self._data is not None @@ -197,7 +198,7 @@ def query(self, name=None, tags=None, monitorName=None): if name: params["name"] = name if tags: - if isinstance(tags, (str, unicode)): + if isinstance(tags, six.string_types): tags = [tags] params["tags"] = tags @@ -210,17 +211,12 @@ def query(self, name=None, tags=None, monitorName=None): # applications. tags = set(tags) - set(["daemon"]) tags.add("-daemon") - params = { - "name": monitorName, - "tags": list(tags) - } + params = {"name": monitorName, "tags": list(tags)} parent = next(_search(services, params), None) # If the monitor name wasn't found, return an empty sequence. if not parent: return () - result = ( - svc for svc in result if svc.parentId == parent.id - ) + result = (svc for svc in result if svc.parentId == parent.id) return tuple(self._getApp(service) for service in result) @@ -230,8 +226,7 @@ def get(self, id, default=None): The default argument is returned if the application doesn't exist. """ service = next( - (svc for svc in self._servicecache.get() if svc.id == id), - None + (svc for svc in self._servicecache.get() if svc.id == id), None ) if not service: return default @@ -250,8 +245,9 @@ class DeployedApp(object): """ Control and interact with the deployed app via the control plane. """ + UNKNOWN_STATUS = type( - 'SENTINEL', (object,), {'__nonzero__': lambda x: False} + "SENTINEL", (object,), {"__nonzero__": lambda x: False} )() def __init__(self, service, client, runstate): @@ -264,11 +260,13 @@ def _initStatus(fn): """ Decorator which calls updateStatus if status is uninitialized """ + @wraps(fn) def wrapper(self, *args, **kwargs): if self._status == DeployedApp.UNKNOWN_STATUS: self.updateStatus(*args, **kwargs) return fn(self) + return wrapper def updateStatus(self): @@ -345,15 +343,17 @@ def autostart(self): @autostart.setter def autostart(self, value): - value = self._service.LAUNCH_MODE.AUTO \ - if bool(value) else self._service.LAUNCH_MODE.MANUAL + value = ( + self._service.LAUNCH_MODE.AUTO + if bool(value) + else self._service.LAUNCH_MODE.MANUAL + ) self._service.launch = value self._client.updateServiceProperty(self._service, "Launch") @property def configurations(self): - """ - """ + """ """ return _DeployedAppConfigList(self._service, self._client) @configurations.setter @@ -405,16 +405,13 @@ def restart(self): if priorState != self._runstate.state: LOG.info("[%x] RESTARTING APP", id(self)) if self._status: - self._client.killInstance( - self._status.hostId, self._status.id - ) + self._client.killInstance(self._status.hostId, self._status.id) else: self._service.desiredState = self._service.STATE.RUN self._client.startService(self._service.id) def update(self): - """ - """ + """ """ self._client.updateService(self._service) @property @@ -423,9 +420,16 @@ def RAMCommitment(self): Get the RAM Commitment of the service and trasform it in the byte value. RAMCommitment: string in form """ - match = re.search("(?P[0-9]*\.?[0-9]*)(?P[k,m,g,t]+)", self._service.RAMCommitment, re.IGNORECASE) - if not match : return - RAMCommitment_bytes = int(match.group('value')) * MEM_MULTIPLIER.get(match.group('unit').lower()) + match = re.search( + "(?P[0-9]*\.?[0-9]*)(?P[k,m,g,t]+)", + self._service.RAMCommitment, + re.IGNORECASE, + ) + if not match: + return + RAMCommitment_bytes = int(match.group("value")) * MEM_MULTIPLIER.get( + match.group("unit").lower() + ) return RAMCommitment_bytes @@ -439,7 +443,9 @@ class _DeployedAppConfigList(Sequence): def __init__(self, service, client): self._service = service if not service._data.has_key("ConfigFiles"): - service._data["ConfigFiles"] = client.getService(service.id)._data["ConfigFiles"] + service._data["ConfigFiles"] = client.getService(service.id)._data[ + "ConfigFiles" + ] self._client = client def __getitem__(self, index): @@ -485,13 +491,13 @@ def __iter__(self): def next(self): return DeployedAppConfig( - self._service, self._client, self._iter.next()) + self._service, self._client, self._iter.next() + ) @implementer(IApplicationConfiguration) class DeployedAppConfig(object): - """ - """ + """ """ def __init__(self, service, client, config): self._service = service @@ -515,8 +521,7 @@ def content(self, content): @implementer(IApplicationLog) class DeployedAppLog(object): - """ - """ + """ """ def __init__(self, instance, client): self._status = instance @@ -535,5 +540,8 @@ def last(self, count): __all__ = ( - "DeployedApp", "DeployedAppConfig", "DeployedAppLog", "DeployedAppLookup" + "DeployedApp", + "DeployedAppConfig", + "DeployedAppLog", + "DeployedAppLookup", ) diff --git a/Products/ZenUtils/controlplane/client.py b/Products/ZenUtils/controlplane/client.py index 53c9be27ee..9f1c1db0a0 100644 --- a/Products/ZenUtils/controlplane/client.py +++ b/Products/ZenUtils/controlplane/client.py @@ -10,10 +10,10 @@ """ ControlPlaneClient """ + import fnmatch import json import logging -import os import urllib import urllib2 @@ -22,30 +22,35 @@ from errno import ECONNRESET from urlparse import urlunparse -from .data import (ServiceJsonDecoder, ServiceJsonEncoder, HostJsonDecoder, - ServiceStatusJsonDecoder, InstanceV2ToServiceStatusJsonDecoder) +import six +from .data import ( + HostJsonDecoder, + InstanceV2ToServiceStatusJsonDecoder, + ServiceJsonDecoder, + ServiceJsonEncoder, + ServiceStatusJsonDecoder, +) +from .environment import configuration as cc_config LOG = logging.getLogger("zen.controlplane.client") -SERVICED_VERSION_ENV = "SERVICED_VERSION" - - def getCCVersion(): """ Checks if the client is connecting to Hoth or newer. The cc version is injected in the containers by serviced """ - cc_version = os.environ.get(SERVICED_VERSION_ENV) - if cc_version: # CC is >= 1.2.0 + cc_version = cc_config.version + if cc_version: # CC is >= 1.2.0 LOG.debug("Detected CC version >= 1.2.0") else: cc_version = "1.1.X" return cc_version -class ControlCenterError(Exception): pass +class ControlCenterError(Exception): + pass class _Request(urllib2.Request): @@ -59,22 +64,23 @@ def __init__(self, *args, **kwargs): urllib2.Request.__init__(self, *args, **kwargs) def get_method(self): - return self.__method \ - if self.__method else urllib2.Request.get_method(self) + return ( + self.__method + if self.__method + else urllib2.Request.get_method(self) + ) class ControlPlaneClient(object): - """ - """ + """ """ def __init__(self, user, password, host=None, port=None): - """ - """ + """ """ self._cj = CookieJar() self._opener = urllib2.build_opener( urllib2.HTTPHandler(), urllib2.HTTPSHandler(), - urllib2.HTTPCookieProcessor(self._cj) + urllib2.HTTPCookieProcessor(self._cj), ) # Zproxy always provides a proxy to serviced on port 443 self._server = { @@ -95,7 +101,7 @@ def _checkUseHttps(self): """ use_https = True cc_master = self._server.get("host") - if self._hothOrNewer and cc_master in [ "localhost", "127.0.0.1" ]: + if self._hothOrNewer and cc_master in ["localhost", "127.0.0.1"]: use_https = False return use_https @@ -111,13 +117,13 @@ def queryServices(self, name=None, tags=None, tenantID=None): namepat = namepat.replace("\\Z", "\\z") query["name"] = namepat if tags: - if isinstance(tags, (str, unicode)): + if isinstance(tags, six.string_types): tags = [tags] - query["tags"] = ','.join(tags) + query["tags"] = ",".join(tags) if tenantID: query["tenantID"] = tenantID response = self._dorequest(self._servicesEndpoint, query=query) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() decoded = ServiceJsonDecoder().decode(body) if decoded is None: @@ -129,7 +135,7 @@ def getService(self, serviceId, default=None): Returns the ServiceDefinition object for the given service. """ response = self._dorequest("/services/%s" % serviceId) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) @@ -143,7 +149,7 @@ def getChangesSince(self, age): """ query = {"since": age} response = self._dorequest(self._servicesEndpoint, query=query) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() decoded = ServiceJsonDecoder().decode(body) if decoded is None: @@ -159,12 +165,16 @@ def updateServiceProperty(self, service, prop): oldService = self.getService(service.id) oldService._data[prop] = service._data[prop] body = ServiceJsonEncoder().encode(oldService) - LOG.info("Updating prop '%s' for service '%s':%s resourceId=%s", prop, service.name, service.id, service.resourceId) - LOG.debug("Updating service %s", body) - response = self._dorequest( - service.resourceId, method="PUT", data=body + LOG.info( + "Updating prop '%s' for service '%s':%s resourceId=%s", + prop, + service.name, + service.id, + service.resourceId, ) - body = ''.join(response.readlines()) + LOG.debug("Updating service %s", body) + response = self._dorequest(service.resourceId, method="PUT", data=body) + body = "".join(response.readlines()) response.close() def updateService(self, service): @@ -176,10 +186,8 @@ def updateService(self, service): body = ServiceJsonEncoder().encode(service) LOG.info("Updating service '%s':%s", service.name, service.id) LOG.debug("Updating service %s", body) - response = self._dorequest( - service.resourceId, method="PUT", data=body - ) - body = ''.join(response.readlines()) + response = self._dorequest(service.resourceId, method="PUT", data=body) + body = "".join(response.readlines()) response.close() def startService(self, serviceId): @@ -189,9 +197,10 @@ def startService(self, serviceId): :param string ServiceId: The service to start """ LOG.info("Starting service '%s", serviceId) - response = self._dorequest("/services/%s/startService" % serviceId, - method='PUT') - body = ''.join(response.readlines()) + response = self._dorequest( + "/services/%s/startService" % serviceId, method="PUT" + ) + body = "".join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) @@ -202,9 +211,10 @@ def stopService(self, serviceId): :param string ServiceId: The service to stop """ LOG.info("Stopping service %s", serviceId) - response = self._dorequest("/services/%s/stopService" % serviceId, - method='PUT') - body = ''.join(response.readlines()) + response = self._dorequest( + "/services/%s/stopService" % serviceId, method="PUT" + ) + body = "".join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) @@ -220,7 +230,7 @@ def addService(self, serviceDefinition): response = self._dorequest( "/services/add", method="POST", data=serviceDefinition ) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return body @@ -231,9 +241,7 @@ def deleteService(self, serviceId): :param string serviceId: Id of the service to delete """ LOG.info("Removing service %s", serviceId) - response = self._dorequest( - "/services/%s" % serviceId, method="DELETE" - ) + response = self._dorequest("/services/%s" % serviceId, method="DELETE") response.close() def deployService(self, parentId, service): @@ -245,15 +253,12 @@ def deployService(self, parentId, service): :returns string: json encoded representation of new service's links """ LOG.info("Deploying service") - data = { - 'ParentID': parentId, - 'Service': json.loads(service) - } + data = {"ParentID": parentId, "Service": json.loads(service)} LOG.debug(data) response = self._dorequest( "/services/deploy", method="POST", data=json.dumps(data) ) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return body @@ -262,11 +267,10 @@ def queryServiceInstances(self, serviceId): Returns a sequence of ServiceInstance objects. """ response = self._dorequest("/services/%s/running" % serviceId) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) - def queryServiceStatus(self, serviceId): """ CC version-independent call to get the status of a service. @@ -299,7 +303,7 @@ def queryServiceStatusImpl(self, serviceId): :rtype: dict of ServiceStatus objects with ID as key """ response = self._dorequest("/services/%s/status" % serviceId) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() decoded = ServiceStatusJsonDecoder().decode(body) return decoded @@ -314,9 +318,10 @@ def queryServiceInstancesV2(self, serviceId): :returns: The raw result of the query :rtype: json formatted string """ - response = self._dorequest("%s/services/%s/instances" % (self._v2loc, - serviceId)) - body = ''.join(response.readlines()) + response = self._dorequest( + "%s/services/%s/instances" % (self._v2loc, serviceId) + ) + body = "".join(response.readlines()) response.close() return body @@ -336,13 +341,12 @@ def _convertInstancesV2ToStatuses(self, rawV2Instance): decoded = {instance.id: instance for instance in decoded} return decoded - def queryHosts(self): """ Returns a sequence of Host objects. """ response = self._dorequest("/hosts") - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return HostJsonDecoder().decode(body) @@ -350,8 +354,8 @@ def getHost(self, hostId): """ Returns a sequence of Host objects. """ - response = self._dorequest("/hosts/%" % hostId) - body = ''.join(response.readlines()) + response = self._dorequest("/hosts/%s" % hostId) + body = "".join(response.readlines()) response.close() return HostJsonDecoder().decode(body) @@ -362,54 +366,49 @@ def getInstance(self, serviceId, instanceId, default=None): response = self._dorequest( "/services/%s/running/%s" % (serviceId, instanceId) ) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return ServiceJsonDecoder().decode(body) def getServiceLog(self, serviceId, start=0, end=None): - """ - """ + """ """ response = self._dorequest("/services/%s/logs" % serviceId) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() log = json.loads(body) return log["Detail"] def getInstanceLog(self, serviceId, instanceId, start=0, end=None): - """ - """ + """ """ response = self._dorequest( "/services/%s/%s/logs" % (serviceId, instanceId) ) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() log = json.loads(body) return str(log["Detail"]) def killInstance(self, hostId, uuid): - """ - """ + """ """ response = self._dorequest( "/hosts/%s/%s" % (hostId, uuid), method="DELETE" ) response.close() def getServicesForMigration(self, serviceId): - """ - """ + """ """ query = {"includeChildren": "true"} response = self._dorequest("/services/%s" % serviceId, query=query) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return json.loads(body) def postServicesForMigration(self, data, serviceId): - """ - """ + """ """ response = self._dorequest( "/services/%s/migrate" % serviceId, method="POST", data=data ) - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return body @@ -418,7 +417,7 @@ def getPoolsData(self): Get all the pools and return raw json """ response = self._dorequest("/pools") - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return body @@ -427,7 +426,7 @@ def getHostsData(self): Get all the pools and return raw json """ response = self._dorequest("/hosts") - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return body @@ -435,16 +434,16 @@ def getRunningServicesData(self): """ Get all the running services and return raw json """ - body = '' + body = "" if not self._hothOrNewer: response = self._dorequest("/running") - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() else: hostsData = self.queryHosts() for hostID in hostsData: - response = self._dorequest("/hosts/%s/running" %hostID) - body = body + ''.join(response.readlines()) + response = self._dorequest("/hosts/%s/running" % hostID) + body = body + "".join(response.readlines()) response.close() return body @@ -453,14 +452,22 @@ def getStorageData(self): Get the storage information and return raw json """ response = self._dorequest("/storage") - body = ''.join(response.readlines()) + body = "".join(response.readlines()) response.close() return body def _makeRequest(self, uri, method=None, data=None, query=None): query = urllib.urlencode(query) if query else "" - url = urlunparse(("https" if self._useHttps else "http", - self._netloc, uri, "", query, "")) + url = urlunparse( + ( + "https" if self._useHttps else "http", + self._netloc, + uri, + "", + query, + "", + ) + ) args = {} if method: args["method"] = method @@ -481,7 +488,9 @@ def _login(self): def _dorequest(self, uri, method=None, data=None, query=None): # Try to perform the request up to five times for trycount in range(5): - request = self._makeRequest(uri, method=method, data=data, query=query) + request = self._makeRequest( + uri, method=method, data=data, query=query + ) try: return self._opener.open(request) except urllib2.HTTPError as ex: @@ -494,35 +503,39 @@ def _dorequest(self, uri, method=None, data=None, query=None): msg = json.load(ex) except ValueError: raise ex # This stinks because we lose the stack - detail = msg.get('Detail') + detail = msg.get("Detail") if not detail: raise detail = detail.replace("Internal Server Error: ", "") raise ControlCenterError(detail) raise - # The CC server resets the connection when an unauthenticated POST requesti is - # made. Depending on when during the request lifecycle the connection is reset, - # we can get either an URLError with a socket.error as the reason, or a naked - # socket.error. In either case, the socket.error.errno indicates that the - # connection was reset with an errno of ECONNRESET (104). - # When we get a connection reset exception, assume that the reset was caused - # by lack of authentication, login, and retry the request. + # The CC server resets the connection when an unauthenticated + # POST requesti is made. Depending on when during the request + # lifecycle the connection is reset, we can get either an + # URLError with a socket.error as the reason, or a naked + # socket.error. In either case, the socket.error.errno + # indicates that the connection was reset with an errno of + # ECONNRESET (104). When we get a connection reset exception, + # assume that the reset was caused by lack of authentication, + # login, and retry the request. except urllib2.URLError as ex: reason = ex.reason - if type(reason) == socket_error and reason.errno == ECONNRESET: + if ( + isinstance(reason, socket_error) + and reason.errno == ECONNRESET + ): self._login() continue raise except socket_error as ex: if ex.errno == ECONNRESET: - self._login() - continue + self._login() + continue raise else: # break the loop so we skip the loop's else clause break - else: # raises the last exception that was raised (the 401 error) raise @@ -550,19 +563,16 @@ def cookies(self): for cookie in self._get_cookie_jar(): cookies.append( { - 'name': cookie.name, - 'value': cookie.value, - 'domain': cookie.domain, - 'path': cookie.path, - 'expires': cookie.expires, - 'secure': cookie.discard + "name": cookie.name, + "value": cookie.value, + "domain": cookie.domain, + "path": cookie.path, + "expires": cookie.expires, + "secure": cookie.discard, } ) return cookies # Define the names to export via 'from client import *'. -__all__ = ( - "ControlPlaneClient", - "ControlCenterError" -) +__all__ = ("ControlPlaneClient", "ControlCenterError") diff --git a/Products/ZenUtils/controlplane/data.py b/Products/ZenUtils/controlplane/data.py index cabf4cf287..b974d276c4 100644 --- a/Products/ZenUtils/controlplane/data.py +++ b/Products/ZenUtils/controlplane/data.py @@ -221,7 +221,6 @@ def wrapper(*args, **kw): "stopped": ApplicationState.STOPPED, "started": ApplicationState.RUNNING, "pulling": ApplicationState.STARTING, - "resuming": ApplicationState.STARTING, "resumed": ApplicationState.RUNNING, "pending_restart": ApplicationState.STARTING, "emergency_stopping": ApplicationState.STOPPING, diff --git a/Products/ZenUtils/controlplane/environment.py b/Products/ZenUtils/controlplane/environment.py new file mode 100644 index 0000000000..4b07fd7347 --- /dev/null +++ b/Products/ZenUtils/controlplane/environment.py @@ -0,0 +1,126 @@ +############################################################################## +# +# Copyright (C) Zenoss, Inc. 2024, all rights reserved. +# +# This content is made available according to terms specified in +# License.zenoss under the directory where your Zenoss product is installed. +# +############################################################################## + +import os + +from backports.functools_lru_cache import lru_cache + + +class _EnviromentVariables(object): + _CONSUMER_URL = "CONTROLPLANE_CONSUMER_URL" + _HOST_ID = "CONTROLPLANE_HOST_ID" + _HOST_IPS = "CONTROLPLANE_HOST_IPS" + _IMAGE_ID = "SERVICED_SERVICE_IMAGE" + _INSTANCE_ID = "CONTROLPLANE_INSTANCE_ID" + _LOG_ADDRESS = "SERVICED_LOG_ADDRESS" + _MASTER_IP = "SERVICED_MASTER_IP" + _MAX_RPC_CLIENTS = "SERVICED_MAX_RPC_CLIENTS" + _MUX_PORT = "SERVICED_MUX_PORT" + _RPC_PORT = "SERVICED_RPC_PORT" + _RUN = "CONTROLPLANE" + _SERVICE_ID = "CONTROLPLANE_SERVICED_ID" + _SHELL = "SERVICED_IS_SERVICE_SHELL" + _TENANT_ID = "CONTROLPLANE_TENANT_ID" + _UI_PORT = "SERVICED_UI_PORT" + _VERSION = "SERVICED_VERSION" + _VIRTUAL_ADDRESS_SUBNET = "SERVICED_VIRTUAL_ADDRESS_SUBNET" + + @staticmethod + def _get(name): + return os.environ.get(name, "") + + @property + @lru_cache(maxsize=1) + def is_serviced(self): + return self._get(self._RUN) == "1" + + @property + @lru_cache(maxsize=1) + def is_serviced_shell(self): + return self._get(self._SHELL) == "true" + + @property + @lru_cache(maxsize=1) + def consumer_url(self): + return self._get(self._CONSUMER_URL) + + @property + @lru_cache(maxsize=1) + def host_id(self): + return self._get(self._HOST_ID) + + @property + @lru_cache(maxsize=1) + def instance_id(self): + return self._get(self._INSTANCE_ID) + + @property + @lru_cache(maxsize=1) + def service_id(self): + return self._get(self._SERVICE_ID) + + @property + @lru_cache(maxsize=1) + def tenant_id(self): + return self._get(self._TENANT_ID) + + @property + @lru_cache(maxsize=1) + def version(self): + return self._get(self._VERSION) + + @property + @lru_cache(maxsize=1) + def image_id(self): + return self._get(self._IMAGE_ID) + + @property + @lru_cache(maxsize=1) + def host_ips(self): + return tuple( + ip.strip() for ip in self._get(self._HOST_IPS).split(" ") if ip + ) + + @property + @lru_cache(maxsize=1) + def log_address(self): + return self._get(self._LOG_ADDRESS) + + @property + @lru_cache(maxsize=1) + def master_ip(self): + return self._get(self._MASTER_IP) + + @property + @lru_cache(maxsize=1) + def max_rpc_clients(self): + return self._get(self._MAX_RPC_CLIENTS) + + @property + @lru_cache(maxsize=1) + def mux_port(self): + return self._get(self._MUX_PORT) + + @property + @lru_cache(maxsize=1) + def rpc_port(self): + return self._get(self._RPC_PORT) + + @property + @lru_cache(maxsize=1) + def ui_port(self): + return self._get(self._UI_PORT) + + @property + @lru_cache(maxsize=1) + def virtual_address_subnet(self): + return self._get(self._VIRTUAL_ADDRESS_SUBNET) + + +configuration = _EnviromentVariables() diff --git a/Products/ZenUtils/tests/testDaemonStats.py b/Products/ZenUtils/tests/testDaemonStats.py index 2eb51946ef..cdef97f457 100644 --- a/Products/ZenUtils/tests/testDaemonStats.py +++ b/Products/ZenUtils/tests/testDaemonStats.py @@ -1,53 +1,66 @@ ############################################################################## -# +# # Copyright (C) Zenoss, Inc. 2014, all rights reserved. -# +# # This content is made available according to terms specified in # License.zenoss under the directory where your Zenoss product is installed. -# +# ############################################################################## -import unittest, os +import os +import unittest + +from mock import patch from Products.ZenUtils.DaemonStats import DaemonStats from Products.ZenTestCase.BaseTestCase import BaseTestCase + class DaemonStatsTest(BaseTestCase): """Test the DaemonStats""" - def setUp(self): - self.daemon_stats = DaemonStats() + @patch("Products.ZenUtils.DaemonStats.cc_config", autospec=True) + def testDaemonsTagsServiceId(self, _cc): + _cc.service_id = "ID" + _cc.tenant_id = "foo" + _cc.instance_id = "bar" + daemon_stats = DaemonStats() - def testDaemonsTagsServiceId(self): - os.environ["CONTROLPLANE"] = "1" - os.environ["CONTROLPLANE_SERVICE_ID"] = "ID" - os.environ["CONTROLPLANE_TENANT_ID"] = "foo" - os.environ["CONTROLPLANE_INSTANCE_ID"] = "bar" - self.daemon_stats.config( "name", "monitor", None, None, None) + daemon_stats.config("name", "monitor", None, None, None) self.assertEqual( - {'daemon': 'name', 'instance': 'bar', 'internal': True, - 'monitor': 'monitor', 'metricType': 'type', 'serviceId': 'ID', - 'tenantId': 'foo'}, - self.daemon_stats._tags("type") + { + "daemon": "name", + "instance": "bar", + "internal": True, + "monitor": "monitor", + "metricType": "type", + "serviceId": "ID", + "tenantId": "foo", + }, + daemon_stats._tags("type"), ) - def testDaemonsDoesNotTagServiceId(self): - if "CONTROLPLANE" in os.environ: - del os.environ["CONTROLPLANE"] - - if "CONTROLPLANE_SERVICE_ID" in os.environ: - del os.environ["CONTROLPLANE_SERVICE_ID"] + @patch("Products.ZenUtils.DaemonStats.cc_config", autospec=True) + def testDaemonsDoesNotTagServiceId(self, _cc): + _cc.is_serviced = False + _cc.service_id = "" + daemon_stats = DaemonStats() - self.daemon_stats.config( "name", "monitor", None, None, None) + daemon_stats.config("name", "monitor", None, None, None) self.assertEqual( - {'daemon': 'name', 'internal': True, 'monitor': 'monitor', 'metricType': 'type'}, - self.daemon_stats._tags("type") + { + "daemon": "name", + "internal": True, + "monitor": "monitor", + "metricType": "type", + }, + daemon_stats._tags("type"), ) + def test_suite(): - return unittest.TestSuite(( - unittest.makeSuite(DaemonStatsTest), - )) + return unittest.TestSuite((unittest.makeSuite(DaemonStatsTest),)) + -if __name__ == '__main__': - unittest.main(defaultTest='test_suite') +if __name__ == "__main__": + unittest.main(defaultTest="test_suite") diff --git a/Products/Zuul/facades/devicefacade.py b/Products/Zuul/facades/devicefacade.py index 2c1424faaa..7a225993cd 100644 --- a/Products/Zuul/facades/devicefacade.py +++ b/Products/Zuul/facades/devicefacade.py @@ -780,7 +780,13 @@ def getTemplates(self, id): def byTitleOrId(left, right): return cmp(left.titleOrId().lower(), right.titleOrId().lower()) - for rrdTemplate in sorted(boundTemplates, byTitleOrId) + sorted(unboundTemplates, byTitleOrId): + for rrdTemplate in list(unboundTemplates): + if rrdTemplate.id.endswith('-replacement') or rrdTemplate.id.endswith('-addition'): + if '-'.join(rrdTemplate.id.split('-')[:-1]) in zDeviceTemplates: + boundTemplates.append(rrdTemplate) + unboundTemplates.remove(rrdTemplate) + + def makenode(rrdTemplate, suborg=None): uid = '/'.join(rrdTemplate.getPrimaryPath()) path = '' @@ -797,13 +803,26 @@ def byTitleOrId(left, right): path = "%s (%s)" % (path, _t('Locally Defined')) else: path = "%s (%s)" % (path, uiPath) - yield {'id': uid, + return {'id': uid, 'uid': uid, 'path': path, 'text': '%s %s' % (rrdTemplate.titleOrId(), path), 'leaf': True } + for rrdTemplate in sorted(boundTemplates, byTitleOrId): + yield makenode(rrdTemplate) + if isDeviceClass: + available = [] + for rrdTemplate in sorted(unboundTemplates, byTitleOrId): + available.append(makenode(rrdTemplate, "Available")) + yield {'id': 'Available', + 'text': 'Available', + 'leaf': False, + 'children': available + } + + def getLocalTemplates(self, uid): """ Returns a dictionary of every template defined on the device specified by the uid diff --git a/SCHEMA_VERSION b/SCHEMA_VERSION index 8b80672b97..411f54f547 100644 --- a/SCHEMA_VERSION +++ b/SCHEMA_VERSION @@ -1 +1 @@ -300.2.1 \ No newline at end of file +300.2.2 \ No newline at end of file diff --git a/VERSION b/VERSION index 4b49d9bb63..468c41f93c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -7.2.0 \ No newline at end of file +7.2.1 \ No newline at end of file diff --git a/bin/metrics/zenossStatsView.py b/bin/metrics/zenossStatsView.py index 03e5d40148..dbdb35f078 100755 --- a/bin/metrics/zenossStatsView.py +++ b/bin/metrics/zenossStatsView.py @@ -1,7 +1,7 @@ #!/usr/bin/env python ############################################################################## # -# Copyright (C) Zenoss, Inc. 2017, all rights reserved. +# Copyright (C) Zenoss, Inc. 2017-2024, all rights reserved. # # This content is made available according to terms specified in # License.zenoss under the directory where your Zenoss product is installed. @@ -35,6 +35,8 @@ class ZProxyMetricGatherer(MetricGatherer): # Note that changing the follwing ID format will break this script. INSTANCE_ID_FORMAT = '{}_{}' + last_changed = {} + def __init__(self, interval=30): super(ZProxyMetricGatherer, self).__init__() self.zopes = self.get_zopes(first_time=True) @@ -84,21 +86,28 @@ def get_zopes(self, first_time=False): self.zopes = {} # Check mtime of /opt/zenoss/zproxy/conf/zope-upstreams.conf - # if it's newer than, say, now - self.interval, reread it. + # if it's newer than it was modified last time, reread it. zope_upstream_file = self.ZPROXY_CONF_DIR + 'zope-upstreams.conf' zenapi_upstream_file = self.ZPROXY_CONF_DIR + 'apizopes-upstreams.conf' zenreports_upstream_file = self.ZPROXY_CONF_DIR + 'zopereports-upstreams.conf' zauth_upstream_file = self.ZPROXY_CONF_DIR + 'zauth-upstreams.conf' + def read_upstream_file(upstream_file): + with open(upstream_file, 'r') as inf: + zopes = inf.readlines() + zopes = [line.rstrip('\n;') for line in zopes] + zopes = [line.split(' ')[-1] for line in zopes] + return zopes + def check_upstream_util(upstream_file): upstream_modified = os.path.getmtime(upstream_file) - now = time.time() zopes = [] - if first_time or upstream_modified > (now - self.interval): - with open(upstream_file, 'r') as inf: - zopes = inf.readlines() - zopes = [line.rstrip('\n;') for line in zopes] - zopes = [line.split(' ')[-1] for line in zopes] + if first_time: + zopes = read_upstream_file(upstream_file) + self.last_changed[upstream_file] = upstream_modified + elif upstream_modified > self.last_changed.get('upstream_file'): + zopes = read_upstream_file(upstream_file) + self.last_changed[upstream_file] = upstream_modified return zopes def check_upstream(svcName, upstream_file): diff --git a/setup.py b/setup.py index ee07c0d257..46d194b4ea 100644 --- a/setup.py +++ b/setup.py @@ -77,7 +77,7 @@ class ZenDevelopCommand(develop): "zenjobs=Products.Jobber.bin:main", ], "celery.commands": [ - "monitor=Products.Jobber.monitor:ZenJobsMonitor", + "monitor=Products.Jobber.monitor:MonitorCommand", ], }, )