diff --git a/bin/snab_flux_load_test.d b/bin/snab_flux_load_test.d new file mode 100755 index 00000000..345029cd --- /dev/null +++ b/bin/snab_flux_load_test.d @@ -0,0 +1,397 @@ +#!/bin/bash + +# This is used to start the python daemon_runner and issue kill to the parent +# process. Originally a stop call was made to the python daemon_runner which +# initiated a new instance of the agent and the agent's main module, this often +# resulted in the daemon_runner overwritting to the Skyline app log file due to +# complexities in the python logging context relating to log handlers and the +# use of multiprocessing. These stop calls are no longer made directly to the +# agent and they are made directly to the parent pid. In terms of the +# python-daemon this is exactly what initiating a new agent does, the +# python-daemon simply issues a os.kill pid, however initiating a new instance +# of the application results in a new daemon_runner that cannot preserve the +# log file object of the running daemon_runner in terms of: +# daemon_context.files_preserve = [handler.stream] +# which results in the log file being overwritten. + +CURRENT_DIR=$(dirname "${BASH_SOURCE[0]}") +#BASEDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/.." +BASEDIR=$(dirname "$CURRENT_DIR") + +RETVAL=0 + +SERVICE_NAME="snab" +SUB_SERVICE_NAME="${SERVICE_NAME}_flux_load_test" + +PID=$$ + +# Python virtualenv support +# A skyline.conf can be used for passing any additional variables to this script +# This was specifically added to allow for the operator to run in a virtualenv +# environment with whichever version of python they choose to run. Some simple +# sanity checks are made if a virtualenv is used +if [ -f /etc/skyline/skyline.conf ]; then + # Test the config file has sensible variables + bash -n /etc/skyline/skyline.conf + if [ $? -eq 0 ]; then + . /etc/skyline/skyline.conf + else + echo "error: There is a syntax error in /etc/skyline/skyline.conf, try bash -n /etc/skyline/skyline.conf" + exit 1 + fi +fi +USE_VIRTUALENV=0 +if [ "$PYTHON_VIRTUALENV" == "true" ]; then + if [ ! -f "$USE_PYTHON" ]; then + echo "error: The python binary specified does not exists as specified as USE_PYTHON in /etc/skyline/skyline.conf" + exit 1 + fi +# Test the python binary + $USE_PYTHON --version > /dev/null 2>&1 + if [ $? -eq 0 ]; then + USE_VIRTUALENV=1 + else + echo "error: The python binary specified does not execute as expected as specified as USE_PYTHON in /etc/skyline/skyline.conf" + exit 1 + fi +fi + +# Determine LOG and PID PATHs from the settings.py +if [ ! -f "$BASEDIR/skyline/settings.py" ]; then + echo "error: The Skyline settings.py was not found at $BASEDIR/skyline/settings.py" + exit 1 +fi +LOG_PATH=$(cat "$BASEDIR/skyline/settings.py" | grep -v "^#" | grep "^LOG_PATH = " | sed -e "s/.*= //;s/'//g" | sed -e 's/"//g') +if [ ! -d "$LOG_PATH" ]; then + echo "error: The LOG_PATH directory in $BASEDIR/skyline/settings.py does not exist" + exit 1 +fi +PID_PATH=$(cat "$BASEDIR/skyline/settings.py" | grep -v "^#" | grep "^PID_PATH = " | sed -e "s/.*= //;s/'//g" | sed -e 's/"//g') +if [ ! -d "$PID_PATH" ]; then + echo "error: The PID_PATH directory in $BASEDIR/skyline/settings.py does not exist" + exit 1 +fi +SKYLINE_TMP_DIR=$(cat "$BASEDIR/skyline/settings.py" | grep -v "^#" | grep "^SKYLINE_TMP_DIR = " | sed -e "s/.*= //;s/'//g" | sed -e 's/"//g') +if [ ! -d "$SKYLINE_TMP_DIR" ]; then + echo "notice: The SKYLINE_TMP_DIR directory in $BASEDIR/skyline/settings.py does not exist, creating" + mkdir -p "$SKYLINE_TMP_DIR" +fi + +# @added 20200415 - Branch #3262: py3 +# Handle using a skyline user that does not have sudo access +CURRENT_USER=$(whoami) +USE_SUDO="sudo" +if [ "$CURRENT_USER" == "skyline" ]; then + USE_SUDO="" +fi + +# Check if it is running and if so its state +RESTART=0 +RUNNING=0 +VALID_PIDFILE=0 +VALID_PID=0 +PROCESS_STALLED=0 +if [ -f "$PID_PATH/${SUB_SERVICE_NAME}.pid" ]; then + RUNNING=1 + RUNNING_PID=$(cat "$PID_PATH/${SUB_SERVICE_NAME}.pid" | head -n 1) + # Is the RUNNING_PID a valid number? + # shellcheck disable=SC2065 + test "$RUNNING_PID" -gt 1 > /dev/null 2>&1 + if [ $? -eq 0 ]; then + VALID_PIDFILE=1 + fi + if [ $VALID_PIDFILE -eq 1 ]; then + if [ -f "/proc/$RUNNING_PID/status" ]; then + RUNNING=1 + VALID_PID=1 + else + PROCESS_STALLED=1 + fi + fi +fi + +status () { + +# As per http://refspecs.linuxbase.org/LSB_3.1.0/LSB-Core-generic/LSB-Core-generic/iniscrptact.html +# 0 program is running or service is OK +# 1 program is dead and /var/run pid file exists +# 2 program is dead and /var/lock lock file exists +# 3 program is not running +# 4 program or service status is unknown + + if [[ $RUNNING -eq 1 && $VALID_PIDFILE -eq 1 ]]; then + echo "${SUB_SERVICE_NAME} is running with pid $RUNNING_PID" + return 0 + fi + + if [ $PROCESS_STALLED -eq 1 ]; then + echo "${SUB_SERVICE_NAME} is dead and pid file exists - $PID_PATH/${SUB_SERVICE_NAME}.pid" + return 1 + fi + + if [ $RUNNING -eq 0 ]; then + echo "${SUB_SERVICE_NAME} is not running" + return 3 + fi + +} + +start () { + + if [ $RESTART -eq 1 ]; then + # These a reset for the restart context + RUNNING=0 + VALID_PIDFILE=0 + VALID_PID=0 + PROCESS_STALLED=0 + fi + + if [ $PROCESS_STALLED -eq 1 ]; then + echo "${SUB_SERVICE_NAME} is dead but pid file exists - $PID_PATH/${SUB_SERVICE_NAME}.pid" + return 1 + fi + + if [[ $RUNNING -eq 1 && $VALID_PIDFILE -eq 0 ]]; then + echo "error: A pid file exists for ${SUB_SERVICE_NAME} with no valid pid $PID_PATH/${SUB_SERVICE_NAME}.pid" + return 1 + fi + + if [[ $RUNNING -eq 1 && $VALID_PID -eq 1 ]]; then + echo "${SUB_SERVICE_NAME} is already running with pid $RUNNING_PID" + return 0 + fi + + rm -f "$BASEDIR/skyline/${SERVICE_NAME}/*.pyc" + if [ -f "$LOG_PATH/${SUB_SERVICE_NAME}.log" ]; then + cat "$LOG_PATH/${SUB_SERVICE_NAME}.log" > "$LOG_PATH/${SUB_SERVICE_NAME}.log.last" + fi + + touch "$LOG_PATH/${SUB_SERVICE_NAME}.log.lock" + touch "$LOG_PATH/${SUB_SERVICE_NAME}.log.wait" + + if [ -f "$BASEDIR/skyline/settings.pyc" ]; then + rm -f "$BASEDIR/skyline/settings.pyc" + fi + + if [ $USE_VIRTUALENV -eq 0 ]; then + /usr/bin/env python "$BASEDIR/skyline/${SERVICE_NAME}/snab_flux_load_test_agent.py" start + else + $USE_PYTHON "$BASEDIR/skyline/${SERVICE_NAME}/snab_flux_load_test_agent.py" start + fi + RETVAL=$? + + if [ $RETVAL -ne 0 ]; then + if [ -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.last" ]; then + cat "$LOG_PATH/${SUB_SERVICE_NAME}.log.last" "$LOG_PATH/${SUB_SERVICE_NAME}.log" > "$LOG_PATH/${SUB_SERVICE_NAME}.log.new" + cat "$LOG_PATH/${SUB_SERVICE_NAME}.log.new" > "$LOG_PATH/${SUB_SERVICE_NAME}.log" + rm -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.last" + rm -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.new" + fi + if [ -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.wait" ]; then + rm -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.wait" + fi + if [ -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.lock" ]; then + rm -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.lock" + fi + echo "error - failed to start ${SUB_SERVICE_NAME}" + return $RETVAL + fi + + PROCESS_WAITING=0 + NOW=$(date +%s) + WAIT_FOR=$((NOW+5)) + while [ $NOW -lt $WAIT_FOR ]; + do + if [ ! -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.wait" ]; then + NOW=$((WAIT_FOR+1)) + PROCESS_WAITING=1 + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: process removed log.wait file, starting log management" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" + else + sleep .2 + NOW=$(date +%s) + fi + done + + if [ -f "$PID_PATH/${SUB_SERVICE_NAME}.pid" ]; then + RUNNING_PID=$(cat "$PID_PATH/${SUB_SERVICE_NAME}.pid" | head -n 1) + else + RUNNING_PID="unknown" + fi + + if [ $PROCESS_WAITING -eq 0 ]; then + if [ -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.wait" ]; then + rm -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.wait" + fi + if [ -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.lock" ]; then + rm -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.lock" + fi + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: error - log management failed" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" + echo "${SUB_SERVICE_NAME} started with pid $RUNNING_PID, but log management failed" + fi + + if [ $PROCESS_WAITING -eq 1 ]; then + if [ -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.last" ]; then + cat "$LOG_PATH/${SUB_SERVICE_NAME}.log.last" "$LOG_PATH/${SUB_SERVICE_NAME}.log" > "$LOG_PATH/${SUB_SERVICE_NAME}.log.new" + cat "$LOG_PATH/${SUB_SERVICE_NAME}.log.new" > "$LOG_PATH/${SUB_SERVICE_NAME}.log" + rm -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.last" + rm -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.new" + fi + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: log management done" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" + if [ -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.lock" ]; then + rm -f "$LOG_PATH/${SUB_SERVICE_NAME}.log.lock" + fi + echo "${SUB_SERVICE_NAME} started with pid $RUNNING_PID" + fi + + return $RETVAL +} + +stop () { + + if [ $PROCESS_STALLED -eq 1 ]; then + echo "${SUB_SERVICE_NAME} is dead but pid file exists - $PID_PATH/${SUB_SERVICE_NAME}.pid" + return 1 + fi + + if [[ $RUNNING -eq 1 && $VALID_PIDFILE -eq 0 ]]; then + echo "error: A pid file exists for ${SUB_SERVICE_NAME} with no valid pid $PID_PATH/${SUB_SERVICE_NAME}.pid" + return 1 + fi + + if [ $RUNNING -eq 0 ]; then + echo "${SUB_SERVICE_NAME} is not running" + return 0 + fi + +# Originally a stop call was made to the python daemon_runner which +# initiated a new instance of the agent and the agent's main module, this often +# resulted in the daemon_runner overwritting to the Skyline app log file due to +# complexities in the python logging context relating to log handlers and the +# use of multiprocessing. These stop calls are no longer made directly to the +# agent and they are made directly to the parent pid. In terms of the +# python-daemon this is exactly what initiating a new agent does, the +# python-daemon simply issues a os.kill pid, however initiating a new instance +# of the application results in a new daemon_runner that cannot preserve the +# log file object of the running daemon_runner in terms of: +# daemon_context.files_preserve = [handler.stream] +# which results in the log file being overwritten. +# if [ $USE_VIRTUALENV -eq 0 ]; then +# /usr/bin/env python "$BASEDIR/skyline/${SERVICE_NAME}/agent.py" stop +# else +# $USE_PYTHON "$BASEDIR/skyline/${SERVICE_NAME}/agent.py" stop +# fi +# RETVAL=$? +# if [[ $RETVAL -eq 0 ]]; then +# echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SERVICE_NAME}.d :: stopped ${SERVICE_NAME}-agent" +# else +# echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SERVICE_NAME}.d :: error - failed to stop ${SERVICE_NAME}-agent" +# fi + + SERVICE_PID=$RUNNING_PID + SERVICE_RELATED_PID=$(ps aux | grep "${SERVICE_NAME}/snab_flux_load_test_agent.py start" | grep "$SUB_SERVICE_NAME" | grep -v grep | awk '{print $2 }' | grep -c "$SERVICE_PID") + if [ $SERVICE_RELATED_PID -eq 1 ]; then + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: stopping process $SERVICE_PID" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" +# @added 20200415 - Branch #3262: py3 +# Handle using a skyline user that does not have sudo access + $USE_SUDO kill $SERVICE_PID + fi + + PROCESS_COUNT=$(ps aux | grep "${SERVICE_NAME}/snab_flux_load_test_agent.py start" | grep "$SUB_SERVICE_NAME" | grep -v grep | awk '{print $2 }' | wc -l) + if [ $PROCESS_COUNT -gt 0 ]; then + sleep 1 + fi + + # TODO: write a real kill script + PROCESS_COUNT=$(ps aux | grep "${SERVICE_NAME}/snab_flux_load_test_agent.py start" | grep "$SUB_SERVICE_NAME" | grep -v grep | awk '{print $2 }' | wc -l) + if [ $PROCESS_COUNT -gt 0 ]; then + # kill -15 + ps aux | grep "${SERVICE_NAME}/snab_flux_load_test_agent.py start" | grep "$SUB_SERVICE_NAME" | grep -v grep | awk '{print $2 }' | while read i_pid + do + SERVICE_RELATED_PID=$(ps aux | grep "${SERVICE_NAME}/snab_flux_load_test_agent.py start" | grep "$SUB_SERVICE_NAME" | grep -v grep | awk '{print $2 }' | grep -c "$i_pid") + if [ $SERVICE_RELATED_PID -eq 1 ]; then + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: cleaning up process $i_pid" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" +# @added 20200415 - Branch #3262: py3 +# Handle using a skyline user that does not have sudo access + $USE_SUDO kill $i_pid + fi + done + + PROCESS_COUNT=$(ps aux | grep "${SERVICE_NAME}/snab_flux_load_test_agent.py start" | grep "$SUB_SERVICE_NAME" | grep -v grep | awk '{print $2 }' | wc -l) + if [ $PROCESS_COUNT -gt 0 ]; then + # kill -9 + ps aux | grep "${SERVICE_NAME}/snab_flux_load_test_agent.py start" | grep "$SUB_SERVICE_NAME" | grep -v grep | awk '{print $2 }' | while read i_pid + do + SERVICE_RELATED_PID=$(ps aux | grep "${SERVICE_NAME}/snab_flux_load_test_agent.py start" | grep "$SUB_SERVICE_NAME" | grep -v grep | awk '{print $2 }' | grep -c "$i_pid") + if [ $SERVICE_RELATED_PID -eq 1 ]; then + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: kill -9 process $i_pid" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" +# @added 20200415 - Branch #3262: py3 +# Handle using a skyline user that does not have sudo access + $USE_SUDO kill -9 $i_pid + fi + done + fi + fi + + PROCESS_COUNT=$(ps aux | grep "${SERVICE_NAME}/snab_flux_load_test_agent.py start" | grep "$SUB_SERVICE_NAME" | grep -v grep | awk '{print $2 }' | wc -l) + if [[ ! -f "$PID_PATH/${SUB_SERVICE_NAME}.pid" && $PROCESS_COUNT -eq 0 ]]; then + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: all ${SUB_SERVICE_NAME} processes have been stopped - OK" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" + echo "$SUB_SERVICE_NAME has been stopped" + RETVAL=0 + return $RETVAL + fi + if [[ -f "$PID_PATH/${SUB_SERVICE_NAME}.pid" && $PROCESS_COUNT -eq 0 ]]; then + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: error - stopped all ${SUB_SERVICE_NAME} processes, but a pid file remains" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" + rm -f "$PID_PATH/${SUB_SERVICE_NAME}.pid" + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: pid file removed" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" + echo "$SUB_SERVICE_NAME has been stopped" + RETVAL=1 + fi + if [[ -f "$PID_PATH/${SUB_SERVICE_NAME}.pid" && $PROCESS_COUNT -gt 0 ]]; then + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: error - failed to stop all ${SUB_SERVICE_NAME} processes and pid file remains" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" + echo "$(date +"%Y-%m-%d %H:%M:%S") :: $PID :: ${SUB_SERVICE_NAME}.d :: error - there maybe zombies or multiple instances running" >> "$LOG_PATH/${SUB_SERVICE_NAME}.log" + echo "$SUB_SERVICE_NAME.d falied to stop all $SUB_SERVICE_NAME processes, there maybe zombies or multiple instances running" + RETVAL=1 + fi + + # These are reset for the restart context + RUNNING=0 + VALID_PIDFILE=0 + VALID_PID=0 + PROCESS_STALLED=0 + + return $RETVAL +} + +run () { + echo "running ${SUB_SERVICE_NAME}" + if [ $USE_VIRTUALENV -eq 0 ]; then + /usr/bin/env python "$BASEDIR/skyline/${SERVICE_NAME}/snab_flux_load_test_agent.py" run + else + $USE_PYTHON "$BASEDIR/skyline/${SERVICE_NAME}/snab_flux_load_test_agent.py" run + fi +} + +# See how we were called. +case "$1" in + start) + start + ;; + stop) + stop + ;; + restart) + RESTART=1 + stop + start + ;; + run) + run + ;; + status) + status + ;; + *) + echo $"Usage: $0 {start|stop|run|status}" + exit 2 + ;; +esac diff --git a/docs/SNAB.rst b/docs/SNAB.rst index 3670caa9..7732ac4b 100644 --- a/docs/SNAB.rst +++ b/docs/SNAB.rst @@ -208,3 +208,46 @@ Example of log output: 2020-10-07 16:52:26 :: 3580119 :: Ionosphere metrics :: 763 2020-10-07 16:52:26 :: 3580119 :: canary duration :: 24.17 2020-10-07 16:52:26 :: 3580119 :: sleeping for 33.01 seconds due to low run time... + +SNAB flux load tester +--------------------- + +So how many metrics can Skyline handle ruuning on that digitalocean droplet? + +It is difficult to tell how many metrics a single Skyline server can handle +given the myriad combinations of configurations and hardware resources it may be +run on. + +This is where SNAB flux load tester comes into play. It allows you to deploy +Skyline and the snab app can be configured to send as many metrics as you want +to Graphite and Skyline. + +.. warning:: DO NOT run this on an existing Skyline that is running with real + data, it is meant to be run on a new, disposal Skyline build. This because it + populates Graphite, Redis, the database tables, etc with real data of test + metrics. Unless you want to test and remove all the test metrics from Redis, + Graphite and MariaDB manually, which would be possible, but not advisable. + +To enable a snab_flux_load_test set the following in settings.py: + +.. code-block:: python + + SNAB_FLUX_LOAD_TEST_ENABLED = True + # the number of metrics you want to load test with, use the number appropriate + # for you + SNAB_FLUX_LOAD_TEST_METRICS = 2000 + +Ensure that horizon, analyzer, flux and Graphite are running and start +snab_flux_load_test.d as appropriate + +.. code-block:: bash + + sudo -u skyline /opt/skyline/github/skyline/bin/snab_flux_load_test.d start + tail -n 80 /var/log/skyline/snab_flux_load_test.log + + # Stop the load test + /opt/skyline/github/skyline/bin/snab_flux_load_test.d stop + +You will want to let the load test run for a while and you may want to adjust +the :mod:`settings.SNAB_FLUX_LOAD_TEST_METRICS` value and restart the test a few +times. diff --git a/skyline/snab/snab_flux_load_test.py b/skyline/snab/snab_flux_load_test.py new file mode 100644 index 00000000..458aebc4 --- /dev/null +++ b/skyline/snab/snab_flux_load_test.py @@ -0,0 +1,475 @@ +from __future__ import division +import logging +import uuid +import random + +import requests + +from time import time, sleep +from threading import Thread +from multiprocessing import Process +import os +from os import kill, getpid +import traceback +from sys import version_info +import os.path +import datetime + +import settings +from skyline_functions import get_redis_conn, get_redis_conn_decoded + +skyline_app = 'snab_flux_load_test' +skyline_app_logger = 'snab_flux_load_testLog' +logger = logging.getLogger(skyline_app_logger) +skyline_app_logfile = '%s/%s.log' % (settings.LOG_PATH, skyline_app) +skyline_app_loglock = '%s.lock' % skyline_app_logfile +skyline_app_logwait = '%s.wait' % skyline_app_logfile + +python_version = int(version_info[0]) +this_host = str(os.uname()[1]) + +try: + SERVER_METRIC_PATH = '.%s' % settings.SERVER_METRICS_NAME + if SERVER_METRIC_PATH == '.': + SERVER_METRIC_PATH = '' +except: + SERVER_METRIC_PATH = '' + +try: + SNAB_FLUX_LOAD_TEST_ENABLED = settings.SNAB_FLUX_LOAD_TEST_ENABLED +except: + SNAB_FLUX_LOAD_TEST_ENABLED = True +try: + SNAB_FLUX_LOAD_TEST_METRICS = settings.SNAB_FLUX_LOAD_TEST_METRICS +except: + SNAB_FLUX_LOAD_TEST_METRICS = 0 +try: + SNAB_FLUX_LOAD_TEST_METRICS_PER_POST = settings.SNAB_FLUX_LOAD_TEST_METRICS_PER_POST +except: + SNAB_FLUX_LOAD_TEST_METRICS_PER_POST = 100 +try: + SNAB_FLUX_LOAD_TEST_NAMESPACE_PREFIX = settings.SNAB_FLUX_LOAD_TEST_NAMESPACE_PREFIX +except: + SNAB_FLUX_LOAD_TEST_NAMESPACE_PREFIX = 'skyline.snab.%s.flux_load_test' % this_host + +FLUX_POST_URL = '%s/flux/metric_data_post' % settings.SKYLINE_URL + +LOCAL_DEBUG = False + +snab_flux_load_test_metrics_set = 'snab.flux_load_test.metrics' +snab_flux_load_test_metrics_all_set = 'snab.flux_load_test.metrics.all' + + +class SNAB_flux_load_test(Thread): + """ + The SNAB class which controls the snab thread and spawned + processes. + + """ + + def __init__(self, parent_pid): + """ + Initialize the SNAB_flux_load_test + """ + super(SNAB_flux_load_test, self).__init__() + self.redis_conn = get_redis_conn(skyline_app) + self.redis_conn_decoded = get_redis_conn_decoded(skyline_app) + self.daemon = True + self.parent_pid = parent_pid + self.current_pid = getpid() + + def check_if_parent_is_alive(self): + """ + Self explanatory + """ + try: + kill(self.current_pid, 0) + kill(self.parent_pid, 0) + except: + exit(0) + + def spin_snab_flux_load_test_process(self, current_timestamp): + """ + Push metrics to flux. + + :param i: python process id + + :return: True + :rtype: boolean + """ + + spin_start = time() + snab_flux_load_test_metrics_set = 'snab.flux_load_test.metrics' + snab_flux_load_test_metrics_all_set = 'snab.flux_load_test.metrics.all' + + spin_snab_flux_load_test_process_pid = os.getpid() + logger.info('spin_snab_flux_load_test_process - pid %s, sending %s metrics to flux at %s metrics per POST' % ( + str(spin_snab_flux_load_test_process_pid), + str(SNAB_FLUX_LOAD_TEST_METRICS), + str(SNAB_FLUX_LOAD_TEST_METRICS_PER_POST))) + if not SNAB_FLUX_LOAD_TEST_METRICS: + logger.info('nothing to do') + return + + snab_flux_load_test_metrics = [] + try: + snab_flux_load_test_metrics = sorted(list(self.redis_conn_decoded.smembers(snab_flux_load_test_metrics_set))) + except Exception as e: + logger.error('error :: could not query Redis for set %s - %s' % (snab_flux_load_test_metrics_set, e)) + logger.info('snab_flux_load_test_metrics determined %s test metrics from Redis' % ( + str(len(snab_flux_load_test_metrics)))) + + if snab_flux_load_test_metrics: + try: + self.redis_conn.sadd(snab_flux_load_test_metrics_all_set, *set(snab_flux_load_test_metrics)) + except: + logger.error(traceback.format_exc()) + logger.error('error :: failed to add multiple members to the %s Redis set' % snab_flux_load_test_metrics_all_set) + + snab_flux_load_test_metrics_all = [] + try: + snab_flux_load_test_metrics_all = sorted(list(self.redis_conn_decoded.smembers(snab_flux_load_test_metrics_all_set))) + except Exception as e: + logger.error('error :: could not query Redis for set %s - %s' % (snab_flux_load_test_metrics_all_set, e)) + logger.info('snab_flux_load_test_metrics_all determined %s known test metrics from Redis' % ( + str(len(snab_flux_load_test_metrics_all)))) + + check_for_removals = True + if len(snab_flux_load_test_metrics) != len(snab_flux_load_test_metrics_all): + check_for_removals = False + if len(snab_flux_load_test_metrics) > SNAB_FLUX_LOAD_TEST_METRICS: + check_for_removals = True + if check_for_removals: + logger.info('checking what snab test metrics need to be removed') + remove_from_snab_set = False + if len(snab_flux_load_test_metrics) > SNAB_FLUX_LOAD_TEST_METRICS: + remove_from_snab_set = True + metrics_to_remove = [] + if remove_from_snab_set: + metrics_to_remove = snab_flux_load_test_metrics_all[SNAB_FLUX_LOAD_TEST_METRICS:] + snab_flux_load_test_metrics = snab_flux_load_test_metrics[0:SNAB_FLUX_LOAD_TEST_METRICS] + if remove_from_snab_set: + if metrics_to_remove: + logger.info('removing %s metrics from %s Redis set' % ( + str(len(metrics_to_remove)), snab_flux_load_test_metrics_set)) + try: + self.redis_conn.srem(snab_flux_load_test_metrics_set, *set(metrics_to_remove)) + except: + logger.info(traceback.format_exc()) + logger.error('error :: failed to remove multiple members from %s Redis set' % snab_flux_load_test_metrics_set) + + logger.info('getting list of metrics.unique_metrics from Redis set') + full_uniques = '%sunique_metrics' % settings.FULL_NAMESPACE + try: + unique_metrics = list(self.redis_conn_decoded.smembers(full_uniques)) + except: + logger.error(traceback.format_exc()) + logger.error('error :: failed to generate a list from %s Redis set' % full_uniques) + logger.info('checking if any snab test metrics need to be removed from metrics.unique_metrics') + for metric in unique_metrics: + if SNAB_FLUX_LOAD_TEST_NAMESPACE_PREFIX in metric: + if metric.startswith(settings.FULL_NAMESPACE): + base_name = metric.replace(settings.FULL_NAMESPACE, '', 1) + else: + base_name = metric + if base_name not in snab_flux_load_test_metrics: + try: + self.redis_conn.srem(full_uniques, str(metric)) + except: + pass + del unique_metrics + del metrics_to_remove + + flux_last_key_prefix = 'flux.last.%s.*' % SNAB_FLUX_LOAD_TEST_NAMESPACE_PREFIX + logger.info('generating list of all possible %s Redis keys' % flux_last_key_prefix) + snab_remove_flux_last_keys = [] + for base_name in snab_flux_load_test_metrics_all: + if base_name not in snab_flux_load_test_metrics: + try: + flux_last_key = 'flux.last.%s' % base_name + snab_remove_flux_last_keys.append(flux_last_key) + except: + pass + del snab_flux_load_test_metrics_all + logger.info('getting list of flux.last Redis keys') + flux_last_keys = list(self.redis_conn_decoded.scan_iter(match=flux_last_key_prefix)) + logger.info('there are potentially %s flux.last keys that need to be removed from Redis for not in use snab test metrics' % str(len(snab_remove_flux_last_keys))) + logger.info('checking if any of the %s flux.last keys need to be removed from Redis' % str(len(flux_last_keys))) + snab_flux_last_keys_to_remove = [] + for flux_last_key in snab_remove_flux_last_keys: + if flux_last_key in flux_last_keys: + snab_flux_last_keys_to_remove.append(flux_last_key) + del flux_last_keys + del snab_remove_flux_last_keys + if snab_flux_last_keys_to_remove: + for flux_last_key in snab_flux_last_keys_to_remove: + try: + self.redis_conn.delete(flux_last_key) + except: + continue + logger.info('deleted %s flux.last keys for not in use snab test metrics' % str(len(snab_flux_last_keys_to_remove))) + else: + logger.info('there are no flux.last keys for not in use snab test metrics to delete') + del snab_flux_last_keys_to_remove + + adding_metrics = 0 + if len(snab_flux_load_test_metrics) < SNAB_FLUX_LOAD_TEST_METRICS: + adding_metrics = SNAB_FLUX_LOAD_TEST_METRICS - len(snab_flux_load_test_metrics) + logger.info('adding %s metrics to snab_flux_load_test_metrics and %s Redis set' % ( + str(adding_metrics), snab_flux_load_test_metrics_set)) + + if len(snab_flux_load_test_metrics) < SNAB_FLUX_LOAD_TEST_METRICS: + snab_flux_load_test_metrics_all = [] + known_snab_metrics_to_added = 0 + try: + snab_flux_load_test_metrics_all = list(self.redis_conn_decoded.smembers(snab_flux_load_test_metrics_all_set)) + except Exception as e: + logger.error('error :: could not query Redis for set %s - %s' % (snab_flux_load_test_metrics_all_set, e)) + snab_flux_load_test_metrics_all = [] + logger.info('snab_flux_load_test_metrics_all determined %s test metrics from Redis' % ( + str(len(snab_flux_load_test_metrics_all)))) + if snab_flux_load_test_metrics_all: + unique_snab_flux_load_test_metrics_set = set(snab_flux_load_test_metrics) + unique_snab_flux_load_test_metrics_all_set = set(snab_flux_load_test_metrics_all) + known_snab_metrics_to_add = [] + set_difference = unique_snab_flux_load_test_metrics_all_set.difference(unique_snab_flux_load_test_metrics_set) + for metric in set_difference: + known_snab_metrics_to_add.append(metric) + if known_snab_metrics_to_add: + known_snab_metrics_to_add = list(set(known_snab_metrics_to_add)) + for metric in known_snab_metrics_to_add: + if len(snab_flux_load_test_metrics) < SNAB_FLUX_LOAD_TEST_METRICS: + snab_flux_load_test_metrics.append(metric) + known_snab_metrics_to_added += 1 + logger.info('%s known_snab_metrics added snab_flux_load_test_metrics' % ( + str(known_snab_metrics_to_added))) + del snab_flux_load_test_metrics_all + + while len(snab_flux_load_test_metrics) < SNAB_FLUX_LOAD_TEST_METRICS: + new_uuid = str(uuid.uuid4()) + new_metric_uuid = new_uuid.replace('-', '.') + slot = str(round(random.random(), 2)) + new_metric = '%s.%s.%s' % (SNAB_FLUX_LOAD_TEST_NAMESPACE_PREFIX, slot, new_metric_uuid) + snab_flux_load_test_metrics.append(new_metric) + # Add to the snab_flux_load_test_metrics_set Redis set + try: + self.redis_conn.sadd(snab_flux_load_test_metrics_set, new_metric) + except: + logger.error(traceback.format_exc()) + logger.error('error :: failed to add item to Redis set %s' % ( + snab_flux_load_test_metrics_set)) + + if adding_metrics: + logger.info('snab_flux_load_test_metrics now has %s metrics' % ( + str(len(snab_flux_load_test_metrics)))) + + logger.info('snab_flux_load_test_metrics has %s metrics' % ( + str(len(snab_flux_load_test_metrics)))) + logger.info('snab_flux_load_test_metrics has %s unique metrics' % ( + str(len(set(snab_flux_load_test_metrics))))) + try: + self.redis_conn.sadd(snab_flux_load_test_metrics_set, *set(snab_flux_load_test_metrics)) + except: + logger.error(traceback.format_exc()) + logger.error('error :: failed to add multiple members to the %s Redis set' % snab_flux_load_test_metrics) + + epoch_Y = datetime.datetime.today().year + epoch_m = datetime.datetime.today().month + epoch_d = datetime.datetime.today().day + epoch_H = datetime.datetime.today().hour + epoch_M = datetime.datetime.today().minute + epoch_S = 0 + initial_datetime = datetime.datetime(epoch_Y, epoch_m, epoch_d, epoch_H, epoch_M, epoch_S) + one_minute = datetime.timedelta(minutes=1) + epoch_datetime = initial_datetime - one_minute + epoch_timestamp = int(epoch_datetime.strftime('%s')) + + connect_timeout = 5 + read_timeout = 5 + use_timeout = (int(connect_timeout), int(read_timeout)) + + if settings.WEBAPP_AUTH_ENABLED: + user = str(settings.WEBAPP_AUTH_USER) + password = str(settings.WEBAPP_AUTH_USER_PASSWORD) + + post_count = 0 + posted_count = 0 + for metric in snab_flux_load_test_metrics: + if not post_count: + post_data_dict = { + 'key': settings.FLUX_SELF_API_KEY, + 'metrics': [] + } + if post_count < SNAB_FLUX_LOAD_TEST_METRICS_PER_POST: + post_data_dict['metrics'].append({'metric': metric, 'timestamp': str(epoch_timestamp), 'value': str(round(random.random(), 2))}) + post_count += 1 + if post_count == SNAB_FLUX_LOAD_TEST_METRICS_PER_POST: + response = None + try: + response = requests.post(FLUX_POST_URL, auth=(user, password), json=post_data_dict, timeout=use_timeout, verify=settings.VERIFY_SSL) + except: + logger.error(traceback.format_exc()) + logger.error('error :: failed to post %s metrics, sleeping for 1 second' % ( + str(post_count))) + response = None + sleep(1) + if response: + logger.info('posted %s metrics to flux with status code %s returned' % (str(post_count), str(response.status_code))) + posted_count += post_count + post_count = 0 + running_for = int(time()) - current_timestamp + if running_for > 55: + logger.info('load test has run for longer than 55 seconds, stopping') + post_count = 0 + break + + if post_count: + response = None + try: + response = requests.post(FLUX_POST_URL, auth=(user, password), json=post_data_dict, timeout=use_timeout, verify=settings.VERIFY_SSL) + except: + logger.error(traceback.format_exc()) + logger.error('error :: failed to post %s metrics' % ( + str(post_count))) + response = None + if response: + posted_count += post_count + + spin_end = time() - spin_start + logger.info('spin_snab_flux_load_test_process posted %s metrics to flux in %.2f seconds' % (str(posted_count), spin_end)) + + return + + def run(self): + """ + - Called when the process intializes. + + - Determine if Redis is up and discover checks to run. + + - Divide and assign each process a metric check to analyse and add + results to source Redis set. + + - Wait for the processes to finish. + + """ + + # Log management to prevent overwriting + # Allow the bin/.d to manage the log + if os.path.isfile(skyline_app_logwait): + try: + os.remove(skyline_app_logwait) + except OSError: + logger.error('error - failed to remove %s, continuing' % skyline_app_logwait) + pass + + now = time() + log_wait_for = now + 5 + while now < log_wait_for: + if os.path.isfile(skyline_app_loglock): + sleep(.1) + now = time() + else: + now = log_wait_for + 1 + + logger.info('starting %s run' % skyline_app) + if os.path.isfile(skyline_app_loglock): + logger.error('error - bin/%s.d log management seems to have failed, continuing' % skyline_app) + try: + os.remove(skyline_app_loglock) + logger.info('log lock file removed') + except OSError: + logger.error('error - failed to remove %s, continuing' % skyline_app_loglock) + pass + else: + logger.info('bin/%s.d log management done' % skyline_app) + + logger.info('starting SNAB_flux_load_test') + + while 1: + now = time() + # Make sure Redis is up + try: + self.redis_conn.ping() + logger.info('pinged Redis via get_redis_conn') + except: + logger.error(traceback.format_exc()) + logger.error('error :: cannot connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH) + sleep(10) + try: + self.redis_conn = get_redis_conn(skyline_app) + logger.info('connected via get_redis_conn') + except: + logger.error(traceback.format_exc()) + logger.error('error :: not connected via get_redis_conn') + continue + try: + self.redis_conn_decoded.ping() + logger.info('pinged Redis via get_redis_conn_decoded') + except: + logger.error(traceback.format_exc()) + logger.error('error :: not connected via get_redis_conn_decoded') + sleep(10) + try: + self.redis_conn_decoded = get_redis_conn_decoded(skyline_app) + logger.info('connected via get_redis_conn_decoded') + except: + logger.error(traceback.format_exc()) + logger.error('error :: cannot connect to get_redis_conn_decoded') + continue + + """ + Run load test + """ + while True: + + current_timestamp = int(time()) + + logger.info('snab_flux_load_test - running load test') + + # Spawn processes + pids = [] + spawned_pids = [] + pid_count = 0 + p = Process(target=self.spin_snab_flux_load_test_process, args=(current_timestamp,)) + pids.append(p) + pid_count += 1 + logger.info('starting 1 of %s spin_snab_process' % (str(pid_count))) + p.start() + spawned_pids.append(p.pid) + + # Send wait signal to zombie processes + # for p in pids: + # p.join() + # Self monitor processes and terminate if any spin_snab_process + # that has run for longer than 58 seconds + p_starts = time() + while time() - p_starts <= 58: + if any(p.is_alive() for p in pids): + # Just to avoid hogging the CPU + sleep(.1) + else: + # All the processes are done, break now. + time_to_run = time() - p_starts + logger.info('1 spin_snab_flux_load_test_process completed in %.2f seconds' % (time_to_run)) + break + else: + # We only enter this if we didn't 'break' above. + logger.info('timed out, killing spin_snab_flux_load_test_process process') + for p in pids: + p.terminate() + # p.join() + + for p in pids: + if p.is_alive(): + logger.info('stopping spin_snab_flux_load_test_process - %s' % (str(p.is_alive()))) + p.join() + + process_runtime = time() - current_timestamp + if process_runtime < 60: + sleep_for = (60 - process_runtime) + logger.info('sleeping for %.2f seconds' % sleep_for) + sleep(sleep_for) + try: + del sleep_for + except: + pass diff --git a/skyline/snab/snab_flux_load_test_agent.py b/skyline/snab/snab_flux_load_test_agent.py new file mode 100644 index 00000000..0dd85d58 --- /dev/null +++ b/skyline/snab/snab_flux_load_test_agent.py @@ -0,0 +1,103 @@ +import logging +import sys +from os import getpid +from os.path import isdir +from daemon import runner +from time import sleep +from sys import version_info + +from logging.handlers import TimedRotatingFileHandler, MemoryHandler + +import os.path +sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) +sys.path.insert(0, os.path.dirname(__file__)) + +if True: + import settings + from validate_settings import validate_settings_variables + from snab_flux_load_test import SNAB_flux_load_test + +skyline_app = 'snab_flux_load_test' +skyline_app_logger = 'snab_flux_load_testLog' +logger = logging.getLogger(skyline_app_logger) +logfile = '%s/%s.log' % (settings.LOG_PATH, skyline_app) +python_version = int(version_info[0]) + + +class SNAB_flux_load_test_Agent(): + """ + The SNAB_flux_load_test_Agent class does the follow: + + ensures that the required OS resources as defined by the various settings + are available for the app. + """ + + def __init__(self): + self.stdin_path = '/dev/null' + self.stdout_path = '%s/%s.log' % (settings.LOG_PATH, skyline_app) + self.stderr_path = '%s/%s.log' % (settings.LOG_PATH, skyline_app) + self.pidfile_path = '%s/%s.pid' % (settings.PID_PATH, skyline_app) + self.pidfile_timeout = 5 + + def run(self): + logger.info('agent starting skyline %s' % skyline_app) + SNAB_flux_load_test(getpid()).start() + + while 1: + sleep(100) + + +def run(): + """ + Start the SNAB_flux_load_test_Agent. + + Start the logger. + """ + if not isdir(settings.PID_PATH): + print('pid directory does not exist at %s' % settings.PID_PATH) + sys.exit(1) + + if not isdir(settings.LOG_PATH): + print('log directory does not exist at %s' % settings.LOG_PATH) + sys.exit(1) + + logger.setLevel(logging.DEBUG) + formatter = logging.Formatter("%(asctime)s :: %(process)s :: %(message)s", datefmt="%Y-%m-%d %H:%M:%S") + handler = logging.handlers.TimedRotatingFileHandler( + logfile, + when="midnight", + interval=1, + backupCount=5) + + memory_handler = logging.handlers.MemoryHandler(256, + flushLevel=logging.DEBUG, + target=handler) + handler.setFormatter(formatter) + logger.addHandler(memory_handler) + + # Validate settings variables + valid_settings = validate_settings_variables(skyline_app) + + if not valid_settings: + print ('error :: invalid variables in settings.py - cannot start') + sys.exit(1) + + snab_flux_load_test = SNAB_flux_load_test_Agent() + + logger.info('starting snab_flux_load_test.run') + + memory_handler.flush + + if len(sys.argv) > 1 and sys.argv[1] == 'run': + snab_flux_load_test.run() + else: + daemon_runner = runner.DaemonRunner(snab_flux_load_test) + daemon_runner.daemon_context.files_preserve = [handler.stream] + daemon_runner.do_action() + + logger.info('stopping snab_flux_load_test') + memory_handler.flush + + +if __name__ == '__main__': + run() diff --git a/utils/dawn/skyline.dawn.sh b/utils/dawn/skyline.dawn.sh index 66b84a8a..8b3edc71 100755 --- a/utils/dawn/skyline.dawn.sh +++ b/utils/dawn/skyline.dawn.sh @@ -13,6 +13,7 @@ # @modified 20191016 - Branch #3262: py3 # @modified 20200703 - Task #3608: Update Skyline to Python 3.8.3 and deps # Branch #3262: py3 +# @modified 20201016 - Branch #3068: SNAB # @modified # @license # @source https://github.com/earthgecko/skyline/utils/dawn/skyline.dawn.sh @@ -50,7 +51,7 @@ WEBAPP_AUTH_USER_PASSWORD="$(echo {$HOSTNAME}_skyline)" # The password you wa MYSQL_ROOT_PASSWORD="set_the-root-mysql-user-password" # The MySQL root user password MYSQL_SKYLINE_PASSWORD="set_the-skyline-user-db-password" # The Skyline DB user password REDIS_PASSWORD="set_really_long_LONG-Redis-password" # The Redis password -SKYLINE_RELEASE="v2.0.0" # The Skyline release to deploy +SKYLINE_RELEASE="v2.0.1" # The Skyline release to deploy # @added 20191016 - Branch #3262: py3 INSTALL_GRAPHITE=0 # Install Graphite 0 = no, 1 = yes (CentOS 6 only) @@ -84,6 +85,11 @@ fi #REDIS_VERSION="redis-3.2.12" # @modified 20200703 - Task #3608: Update Skyline to Python 3.8.3 and deps #REDIS_VERSION="redis-4.0.14" +# @modified 20201016 - Branch #3068: SNAB +#REDIS_VERSION="redis-5.0.8" +#REDIS_VERSION="redis-6.0.8" +# Reverted to 5.0.8 as 6 requires different service files which need to be +# done REDIS_VERSION="redis-5.0.8" # @modified 20190412 - Task #2926: Update dependencies @@ -92,7 +98,9 @@ REDIS_VERSION="redis-5.0.8" # @modified 20200703 - Task #3608: Update Skyline to Python 3.8.3 and deps #PYTHON_VERSION="3.7.6" #PYTHON_MAJOR_VERSION="3.7" -PYTHON_VERSION="3.8.3" +# @modified 20201016 - Branch #3068: SNAB +#PYTHON_VERSION="3.8.3" +PYTHON_VERSION="3.8.6" PYTHON_MAJOR_VERSION="3.8" PYTHON_VIRTUALENV_DIR="/opt/python_virtualenv" @@ -100,9 +108,11 @@ PYTHON_VIRTUALENV_DIR="/opt/python_virtualenv" #PROJECT="skyline-py2714" # @modified 20200703 - Task #3608: Update Skyline to Python 3.8.3 and deps #PROJECT="skyline-py376" -PROJECT="skyline-py383" +PROJECT="skyline-py386" #VIRTUALENV_VERSION="15.2.0" -VIRTUALENV_VERSION="16.7.9" +# @modified 20201016 - Branch #3068: SNAB +#VIRTUALENV_VERSION="16.7.9" +VIRTUALENV_VERSION="16.7.10" # @modified 20200703 - Task #3608: Update Skyline to Python 3.8.3 and deps #OPENSSL_VERSION="1.1.1d" @@ -609,7 +619,9 @@ if [ ! -f "${PYTHON_VIRTUALENV_DIR}/versions/${PYTHON_VERSION}/bin/python${PYTHO # @modified 20200703 - Task #3608: Update Skyline to Python 3.8.3 and deps if [ "$OS_MAJOR_VERSION" == "8" ]; then - pip3 install --user virtualenv + # @modified 20201016 - Branch #3068: SNAB + # pip3 install --user virtualenv + pip3 install virtualenv else pip install virtualenv==${VIRTUALENV_VERSION} fi @@ -977,7 +989,9 @@ if [ ! -f /opt/skyline/github/skyline/skyline/settings.py.original ]; then | sed -e 's/MEMCACHE_ENABLED = .*/MEMCACHE_ENABLED = True/g' \ | sed -e "s/PANORAMA_DBUSER = .*/PANORAMA_DBUSER = 'skyline'/g" \ | sed -e "s/HORIZON_IP = .*/HORIZON_IP = '127.0.0.1'/g" \ - | sed -e "s/PANORAMA_DBUSERPASS = .*/PANORAMA_DBUSERPASS = '$MYSQL_SKYLINE_PASSWORD'/g" > /opt/skyline/github/skyline/skyline/settings.py + | sed -e "s/PANORAMA_DBUSERPASS = .*/PANORAMA_DBUSERPASS = '$MYSQL_SKYLINE_PASSWORD'/g" \ + | sed -e "s/CARBON_PORT = .*/CARBON_PORT = 2014/g" \ + | sed -e "s/VERIFY_SSL = .*/VERIFY_SSL = False/g" > /opt/skyline/github/skyline/skyline/settings.py if [ $? -ne 0 ]; then echo "error :: failed to populate the variables in /opt/skyline/github/skyline/skyline/settings.py" exit 1 @@ -1073,20 +1087,12 @@ else systemctl restart apache2 fi -echo "Seeding Skyline with data" -sleep 2 -cd "${PYTHON_VIRTUALENV_DIR}/projects/${PROJECT}" || exit 1 -source bin/activate -bin/python${PYTHON_MAJOR_VERSION} /opt/skyline/github/skyline/utils/seed_data.py -deactivate -cd /tmp || exit - # @added 20191016 - Branch #3262: py3 # Allow to install Graphite on CentOS 6 for now, allows for an end to end # testing environment DO_GRAPHITE_INSTALL=0 if [ "$OS" == "CentOS" ]; then - if [ "$OS_MAJOR_VERSION" == "6" ]; then + if [[ "$OS_MAJOR_VERSION" == "6" || "$OS_MAJOR_VERSION" == "8" ]]; then if [ -z "$INSTALL_GRAPHITE" ]; then echo "Not installing Graphite" else @@ -1109,6 +1115,10 @@ if [ $DO_GRAPHITE_INSTALL -eq 1 ]; then memcached \ libffi-devel fi + if [ $CENTOS_8 -eq 1 ]; then + yum -y install nginx cairo cairo-devel openssl-devel bzip2-devel \ + sqlite-devel memcached libffi-devel + fi if [ "$OS" == "Ubuntu" ]; then if [ "$OS_MAJOR_VERSION" == "16.04" ]; then sudo apt -y install python-dev python-pip libcairo2-dev libffi-dev build-essential nginx @@ -1120,6 +1130,12 @@ if [ $DO_GRAPHITE_INSTALL -eq 1 ]; then echo "Setting up the Graphite virtualenv" sleep 1 cd /opt || exit 1 + + # @added 20201016 - Branch #3068: SNAB + # As per https://github.com/graphite-project/graphite-web/issues/2566 + pip${PYTHON_MAJOR_VERSION} uninstall -y virtualenv + pip${PYTHON_MAJOR_VERSION} install virtualenv==16.7.10 + virtualenv --python="${PYTHON_VIRTUALENV_DIR}/versions/${PYTHON_VERSION}/bin/python${PYTHON_MAJOR_VERSION}" graphite else echo "Skipping, setting up the Graphite virtualenv, already done." @@ -1131,9 +1147,13 @@ if [ $DO_GRAPHITE_INSTALL -eq 1 ]; then source bin/activate export PYTHONPATH="/opt/graphite/lib/:/opt/graphite/webapp/" - bin/"pip${PYTHON_MAJOR_VERSION}" install --no-binary=:all: https://github.com/graphite-project/whisper/tarball/master - bin/"pip${PYTHON_MAJOR_VERSION}" install --no-binary=:all: https://github.com/graphite-project/carbon/tarball/master - bin/"pip${PYTHON_MAJOR_VERSION}" install --no-binary=:all: https://github.com/graphite-project/graphite-web/tarball/master +# bin/"pip${PYTHON_MAJOR_VERSION}" install --no-binary=:all: https://github.com/graphite-project/whisper/tarball/master +# bin/"pip${PYTHON_MAJOR_VERSION}" install --no-binary=:all: https://github.com/graphite-project/carbon/tarball/master +# bin/"pip${PYTHON_MAJOR_VERSION}" install --no-binary=:all: https://github.com/graphite-project/graphite-web/tarball/master + bin/"pip${PYTHON_MAJOR_VERSION}" install --no-binary=:all: https://github.com/graphite-project/whisper/archive/1.1.7.tar.gz + bin/"pip${PYTHON_MAJOR_VERSION}" install --no-binary=:all: https://github.com/graphite-project/carbon/archive/1.1.7.tar.gz + bin/"pip${PYTHON_MAJOR_VERSION}" install --no-binary=:all: https://github.com/graphite-project/graphite-web/archive/1.1.7.tar.gz + bin/"pip${PYTHON_MAJOR_VERSION}" install gunicorn sed "s/#SECRET_KEY.*/SECRET_KEY = '$(date +%s | sha256sum | base64 | head -c 64)'/g" \ @@ -1142,12 +1162,30 @@ if [ $DO_GRAPHITE_INSTALL -eq 1 ]; then GRAPHITE_ROOT="/opt/graphite" PYTHONPATH=$GRAPHITE_ROOT/webapp "/opt/graphite/lib/python${PYTHON_MAJOR_VERSION}/site-packages/django/bin/django-admin.py" migrate --settings=graphite.settings --run-syncdb + # @added 20201016 - Branch #3068: SNAB + # As per https://github.com/graphite-project/graphite-web/issues/2566 + deactivate + cd + pip${PYTHON_MAJOR_VERSION} uninstall -y virtualenv + pip${PYTHON_MAJOR_VERSION} virtualenv + if [ "$OS" == "CentOS" ]; then - if [ $CENTOS_6 -eq 1 ]; then - sudo chown nginx:nginx /opt/graphite/storage/graphite.db - rm -f /etc/nginx/conf.d/default.conf - NGINX_GRAPHITE_CONFIG="/etc/nginx/conf.d/graphite.conf" - fi + sudo chown nginx:nginx /opt/graphite/storage/graphite.db + rm -f /etc/nginx/conf.d/default.conf + NGINX_GRAPHITE_CONFIG="/etc/nginx/conf.d/graphite.conf" + cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.bak + cat /etc/nginx/nginx.conf | while read line + do + ADD=$(echo $line | grep -c server) + if [ $ADD -eq 0 ]; then + echo $line >> /etc/nginx/nginx.conf.new + else + break + fi + done + echo "}" >> /etc/nginx/nginx.conf.new + cat /etc/nginx/nginx.conf.new > /etc/nginx/nginx.conf + fi if [ "$OS" == "Ubuntu" ]; then if [ "$OS_MAJOR_VERSION" == "16.04" ]; then @@ -1167,7 +1205,7 @@ server { server_name $YOUR_SKYLINE_SERVER_FQDN; allow $YOUR_OTHER_IP_ADDRESS/32; - allow $USE_IP/32; + allow $YOUR_SERVER_IP_ADDRESS/32; deny all; root /opt/graphite/webapp; @@ -1201,10 +1239,11 @@ server { }" > "$NGINX_GRAPHITE_CONFIG" if [ "$OS" == "CentOS" ]; then + # SELinux prevents nginx from initiating outbound connections + setsebool -P httpd_can_network_connect 1 + chcon -Rt httpd_sys_content_t /opt/graphite/webapp/ + semanage port -a -t http_port_t -p tcp 8888 if [ $CENTOS_6 -eq 1 ]; then - # SELinux prevents nginx from initiating outbound connections - setsebool -P httpd_can_network_connect 1 - chcon -Rt httpd_sys_content_t /opt/graphite/webapp/ /etc/init.d/nginx start chkconfig nginx on fi @@ -1237,9 +1276,9 @@ server { PYTHONPATH=/opt/graphite/webapp /opt/graphite/bin/gunicorn wsgi --workers=4 --bind=127.0.0.1:8080 --log-file=/var/log/gunicorn.log --preload --pythonpath=/opt/graphite/webapp/graphite & fi fi - if [ "$OS" == "Ubuntu" ]; then + if [[ "$OS" == "Ubuntu" || "$OS" == "CentOS" ]]; then # if [ "$OS_MAJOR_VERSION" == "16.04" ]; then - if [[ "$OS_MAJOR_VERSION" == "16.04" || "$OS_MAJOR_VERSION" == "18.04" ]]; then + if [[ "$OS_MAJOR_VERSION" == "16.04" || "$OS_MAJOR_VERSION" == "18.04" || $CENTOS_8 -eq 1 ]]; then echo "[Unit] Description=carbon-cache instance %i (Graphite) @@ -1310,7 +1349,7 @@ WantedBy = multi-user.target" > /etc/systemd/system/graphite.service fi SKYLINE_SERVER_FQDN_IN_HOSTS=$(cat /etc/hosts | grep -c "$YOUR_SKYLINE_SERVER_FQDN") if [ $SKYLINE_SERVER_FQDN_IN_HOSTS -eq 0 ]; then - echo "$USE_IP $YOUR_SKYLINE_SERVER_FQDN" >> /etc/hosts + echo "$YOUR_SERVER_IP_ADDRESS $YOUR_SKYLINE_SERVER_FQDN" >> /etc/hosts fi echo "Restarting Skyline services" @@ -1344,6 +1383,14 @@ WantedBy = multi-user.target" > /etc/systemd/system/graphite.service deactivate fi +echo "Seeding Skyline with data" +sleep 2 +cd "${PYTHON_VIRTUALENV_DIR}/projects/${PROJECT}" || exit 1 +source bin/activate +bin/python${PYTHON_MAJOR_VERSION} /opt/skyline/github/skyline/utils/seed_data.py +deactivate +cd /tmp || exit + echo "Skyline is deployed and running" echo "Please visit https://$YOUR_SKYLINE_SERVER_FQDN" echo "And view the logs in /var/log/skyline"