From 16a7ad95f7df7663322dd5d8d075280c92459137 Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Mon, 30 Dec 2024 10:59:49 -0600 Subject: [PATCH 01/22] Dockerfile with correct python versions installing --- tacc_stats/site/Dockerfile | 18 ++++++++ tacc_stats/site/requirements.txt | 76 ++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 tacc_stats/site/Dockerfile create mode 100644 tacc_stats/site/requirements.txt diff --git a/tacc_stats/site/Dockerfile b/tacc_stats/site/Dockerfile new file mode 100644 index 0000000..932d45e --- /dev/null +++ b/tacc_stats/site/Dockerfile @@ -0,0 +1,18 @@ +# pull official base image +FROM python:3.6 + +# set work directory +WORKDIR ../../../build/ + +# set environment variables +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +# install dependencies +RUN pip install --upgrade pip +COPY ./requirements.txt . +RUN pip install -r requirements.txt + +# copy project +COPY . . + diff --git a/tacc_stats/site/requirements.txt b/tacc_stats/site/requirements.txt new file mode 100644 index 0000000..321b2ad --- /dev/null +++ b/tacc_stats/site/requirements.txt @@ -0,0 +1,76 @@ +agavepy==0.9.5 +ansible-cmdb==1.31 +asgiref==3.4.1 +attrs==21.2.0 +backports.ssl-match-hostname==3.7.0.1 +bokeh==2.3.3 +certifi==2021.10.8 +cffi==1.15.1 +charset-normalizer==2.0.9 +cheroot==8.5.2 +CherryPy==18.6.1 +cloudpickle==2.0.0 +configparser==5.2.0 +cryptography==40.0.2 +cycler==0.11.0 +Django==3.1.14 +future==0.18.2 +gitdb==4.0.9 +GitPython==3.1.20 +idna==3.3 +importlib-metadata==4.8.2 +importlib-resources==5.4.0 +iniconfig==1.1.1 +jaraco.classes==3.2.1 +jaraco.collections==3.4.0 +jaraco.functools==3.4.0 +jaraco.text==3.6.0 +Jinja2==3.0.3 +jsonxs==0.6 +kiwisolver==1.3.1 +Mako==1.1.6 +MarkupSafe==2.0.1 +matplotlib==3.3.4 +more-itertools==8.12.0 +mysql==0.0.3 +mysql-connector-python==8.0.33 +mysqlclient==2.1.1 +numpy==1.19.5 +packaging==21.3 +pandas==1.1.5 +pgcopy==1.5.0 +pika==1.2.0 +Pillow==8.4.0 +pip==21.3.1 +pluggy==1.0.0 +portend==3.0.0 +protobuf==3.19.1 +psycopg2==2.9.2 +psycopg2-binary==2.9.2 +py==1.11.0 +pycparser==2.21 +PyMySQL==1.0.2 +pyparsing==3.0.6 +pytest==6.2.5 +python-dateutil==2.8.2 +python-hostlist==1.21 +python-memcached==1.59 +pytz==2021.3 +PyYAML==6.0 +requests==2.26.0 +requests-toolbelt==0.9.1 +setuptools==39.2.0 +six==1.16.0 +smmap==5.0.0 +sqlparse==0.4.2 +tempora==4.1.2 +termcolor==1.1.0 +toml==0.10.2 +tornado==6.1 +typing==3.7.4.3 +typing_extensions==4.0.1 +urllib3==1.26.7 +ushlex==0.99.1 +websocket-client==0.53.0 +zc.lockfile==2.0 +zipp==3.6.0 From f076d72a8e9ae763a7cf5956531a9373f0228eb0 Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Tue, 31 Dec 2024 12:33:57 -0600 Subject: [PATCH 02/22] Untested DJango Docker container --- tacc_stats/site/Dockerfile => Dockerfile | 0 tacc_stats/site/requirements.txt => requirements.txt | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tacc_stats/site/Dockerfile => Dockerfile (100%) rename tacc_stats/site/requirements.txt => requirements.txt (100%) diff --git a/tacc_stats/site/Dockerfile b/Dockerfile similarity index 100% rename from tacc_stats/site/Dockerfile rename to Dockerfile diff --git a/tacc_stats/site/requirements.txt b/requirements.txt similarity index 100% rename from tacc_stats/site/requirements.txt rename to requirements.txt From 23483d8df1f9bee06e7a3134861b17c74f37502d Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Tue, 31 Dec 2024 12:36:59 -0600 Subject: [PATCH 03/22] First attempt at making Django handle all tables, including timescale hypertables --- Dockerfile | 9 +++- requirements.txt | 1 + setup.py | 2 +- tacc_stats/conf_parser.py | 2 +- tacc_stats/site/machine/models.py | 80 ++++++++++++++++++++++++++++++- 5 files changed, 89 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 932d45e..3e62dc9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,12 +2,13 @@ FROM python:3.6 # set work directory -WORKDIR ../../../build/ +WORKDIR ../../../build # set environment variables ENV PYTHONDONTWRITEBYTECODE 1 ENV PYTHONUNBUFFERED 1 + # install dependencies RUN pip install --upgrade pip COPY ./requirements.txt . @@ -15,4 +16,10 @@ RUN pip install -r requirements.txt # copy project COPY . . +#RUN echo $(pwd) +#RUN echo $(ls -la) +RUN python setup.py install + +COPY tacc_stats.ini tacc_stats/site/ +#RUN cd tacc_stats/site && python manage.py migrate diff --git a/requirements.txt b/requirements.txt index 321b2ad..3d23aea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -74,3 +74,4 @@ ushlex==0.99.1 websocket-client==0.53.0 zc.lockfile==2.0 zipp==3.6.0 +django-timescaledb diff --git a/setup.py b/setup.py index 73ad787..58f3297 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ include_package_data = True, scripts = scripts, install_requires = ['argparse','numpy', 'psycopg2-binary', 'pandas', 'pgcopy', - 'bokeh', 'django==3.2.25', 'python-hostlist', 'PyMySQL', 'mod_wsgi', + 'bokeh', 'django==3.1.14', 'python-hostlist', 'PyMySQL', 'mysql-connector-python', 'python-memcached', 'pika', 'mysqlclient'], platforms = 'any', classifiers = [ diff --git a/tacc_stats/conf_parser.py b/tacc_stats/conf_parser.py index 438e9ff..8122e21 100644 --- a/tacc_stats/conf_parser.py +++ b/tacc_stats/conf_parser.py @@ -9,7 +9,7 @@ cfg = configparser.ConfigParser() # Append your local repository path here: -cfg.read('/home/username/tacc_stats/tacc_stats.ini') +cfg.read('tacc_stats.ini') def get_db_connection_string(): temp_string = "dbname={0} user="+cfg.get('PORTAL', 'username')+" password="+cfg.get('PORTAL', 'password')+" port="+cfg.get('PORTAL', 'port') diff --git a/tacc_stats/site/machine/models.py b/tacc_stats/site/machine/models.py index 1c87d50..7db379f 100644 --- a/tacc_stats/site/machine/models.py +++ b/tacc_stats/site/machine/models.py @@ -4,6 +4,13 @@ from django.forms import ModelForm from django.contrib.postgres.fields import ArrayField +from timescale.db.models.models import TimescaleModel + +class RealField(models.FloatField): + # Use 32 bit floats (reals) instead of 64 bit floats + def db_type(self, connection): + return "real" + # manage.py inspectdb class job_data(models.Model): @@ -24,7 +31,6 @@ class job_data(models.Model): host_list = ArrayField(models.TextField()) class Meta: - managed = False db_table = 'job_data' def __unicode__(self): @@ -47,9 +53,79 @@ class metrics_data(models.Model): value = models.FloatField(blank=True, null=True) class Meta: - managed = False + #managed = False db_table = 'metrics_data' unique_together = (('jid', 'type', 'metric'),) def __unicode__(self): return str(self.jid + '_' + type + '_' + metric) + +#Old Table SQL +""" + query_create_hostdata_table = CREATE TABLE IF NOT EXISTS host_data ( + time TIMESTAMPTZ NOT NULL, + host VARCHAR(64), + jid VARCHAR(32), + type VARCHAR(32), + dev VARCHAR(64), + event VARCHAR(64), + unit VARCHAR(16), + value real, + delta real, + arc real, + UNIQUE (time, host, type, event) + ); + + CREATE INDEX ON host_data (host, time DESC); + CREATE INDEX ON host_data (jid, time DESC); + + query_create_compression = ALTER TABLE host_data SET \ + (timescaledb.compress, timescaledb.compress_orderby = 'time DESC', timescaledb.compress_segmentby = 'host,jid,type,event'); + SELECT add_compression_policy('host_data', INTERVAL '12h', if_not_exists => true); + + + query_create_process_table = CREATE TABLE IF NOT EXISTS proc_data ( + jid VARCHAR(32) NOT NULL, + host VARCHAR(64), + proc VARCHAR(512), + UNIQUE(jid, host, proc) + ); + + query_create_process_index = "CREATE INDEX ON proc_data (jid);" +""" + + + + +class host_data(TimescaleModel): + # time field is configured in the parent class + host = models.CharField(max_length=64, blank=True, null=True) + jid = models.ForeignKey(job_data, on_delete = models.CASCADE, db_column='jid', blank=True, null=True) + type = models.CharField(max_length=32, blank=True, null=True) + dev = models.CharField(max_length=64, blank=True, null=True) + event = models.CharField(max_length=64, blank=True, null=True) + unit = models.CharField(max_length=16, blank=True, null=True) + value = RealField(null=True) + arc = RealField(null=True) + delta = RealField(null=True) + + class Meta: + db_table = 'host_data' + abstract = True + unique_together = (('time', 'host', 'type', 'event'),) + indexes = [ + models.Index(fields=["host", "time"]), + models.Index(fields=["jid", "time"]), + ] + +class proc_data(models.Model): + jid = models.ForeignKey(job_data, on_delete = models.CASCADE, db_column='jid', blank=True, null=True) + host = models.CharField(max_length=64, blank=True, null=True) + proc = models.CharField(max_length=512, blank=True, null=True) + + class Meta: + db_table = 'host_data' + unique_together = (('jid', 'host', 'proc'),) + indexes = [ + models.Index(fields=["jid"]), + ] From 1a237f82e32751dfa03203d90e82a5155a6fddc6 Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Tue, 31 Dec 2024 13:09:27 -0600 Subject: [PATCH 04/22] Update some of the timescale fields --- tacc_stats/site/machine/models.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tacc_stats/site/machine/models.py b/tacc_stats/site/machine/models.py index 7db379f..74feb9f 100644 --- a/tacc_stats/site/machine/models.py +++ b/tacc_stats/site/machine/models.py @@ -4,10 +4,11 @@ from django.forms import ModelForm from django.contrib.postgres.fields import ArrayField -from timescale.db.models.models import TimescaleModel +from timescale.db.models.fields import TimescaleDateTimeField +from timescale.db.models.managers import TimescaleManager class RealField(models.FloatField): - # Use 32 bit floats (reals) instead of 64 bit floats + # Make type in order to use 32 bit floats (reals) instead of 64 bit floats def db_type(self, connection): return "real" @@ -94,13 +95,13 @@ def __unicode__(self): query_create_process_index = "CREATE INDEX ON proc_data (jid);" """ +# TODO: Compression in migration.py - -class host_data(TimescaleModel): - # time field is configured in the parent class +class host_data(models.Model): + time = TimescaleDateTimeField(interval="1 day") host = models.CharField(max_length=64, blank=True, null=True) - jid = models.ForeignKey(job_data, on_delete = models.CASCADE, db_column='jid', blank=True, null=True) + jid = models.CharField(max_length=32, blank=True, null=True) type = models.CharField(max_length=32, blank=True, null=True) dev = models.CharField(max_length=64, blank=True, null=True) event = models.CharField(max_length=64, blank=True, null=True) @@ -109,6 +110,8 @@ class host_data(TimescaleModel): arc = RealField(null=True) delta = RealField(null=True) + objects = models.Manager() + timescale = TimescaleManager() class Meta: db_table = 'host_data' abstract = True @@ -124,7 +127,7 @@ class proc_data(models.Model): proc = models.CharField(max_length=512, blank=True, null=True) class Meta: - db_table = 'host_data' + db_table = 'proc_data' unique_together = (('jid', 'host', 'proc'),) indexes = [ models.Index(fields=["jid"]), From 51460b338c390d385fa8a57504d18268eb7d50dc Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Tue, 31 Dec 2024 14:32:51 -0600 Subject: [PATCH 05/22] Docker compose now can pull rabbitmq and timescaledb. nothing is connected yet. --- Dockerfile | 2 +- docker-compose.yml | 42 +++++++++++++++++++++ tacc_stats/site/tacc_stats_site/settings.py | 12 ++++-- 3 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 docker-compose.yml diff --git a/Dockerfile b/Dockerfile index 3e62dc9..3220ec5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,6 +20,6 @@ COPY . . #RUN echo $(ls -la) RUN python setup.py install -COPY tacc_stats.ini tacc_stats/site/ +COPY ./tacc_stats.ini . #RUN cd tacc_stats/site && python manage.py migrate diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..9ca1d3e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,42 @@ +version: '3.8' + +services: + web: + build: ./ + command: python tacc_stats/site/manage.py runserver 0.0.0.0:8000 + volumes: + - ./:/usr/src/hpcstats/ + ports: + - 8000:8000 + env_file: + - ./.env.dev + + + db: + image: timescale/timescaledb:latest-pg15 + volumes: + - postgres_data:/var/lib/postgresql/data/ + environment: + - POSTGRES_USER=hello_django + - POSTGRES_PASSWORD=hello_django + - POSTGRES_DB=hello_django_dev + + rabbitmq: + image: rabbitmq:4-alpine + container_name: 'rabbitmq' + ports: + - 5672:5672 + - 15672:15672 + volumes: + - /tmp/rabbitmq/data/:/var/lib/rabbitmq/ + - /tmp/rabbitmq/log/:/var/log/rabbitmq + networks: + - rabbitmq_go_net + +networks: + rabbitmq_go_net: + driver: bridge + +volumes: + postgres_data: + diff --git a/tacc_stats/site/tacc_stats_site/settings.py b/tacc_stats/site/tacc_stats_site/settings.py index 496629f..1bc59eb 100644 --- a/tacc_stats/site/tacc_stats_site/settings.py +++ b/tacc_stats/site/tacc_stats_site/settings.py @@ -13,7 +13,9 @@ messages.ERROR: 'danger', } -DEBUG = True +# For dockerization +SECRET_KEY = os.environ.get("SECRET_KEY") +DEBUG = bool(os.environ.get("DEBUG", default=0)) ADMINS = ( ('Stephen Lien Harrell', 'sharrell@tacc.utexas.edu'), @@ -42,10 +44,14 @@ } } - # Hosts/domain names that are valid for this site; required if DEBUG is False # See https://docs.djangoproject.com/en/1.5/ref/settings/#allowed-hosts -ALLOWED_HOSTS = ['*'] +#ALLOWED_HOSTS = ['*'] + +# For dockerization +# 'DJANGO_ALLOWED_HOSTS' should be a single string of hosts with a space between each. +# For example: 'DJANGO_ALLOWED_HOSTS=localhost 127.0.0.1 [::1]' +ALLOWED_HOSTS = os.environ.get("DJANGO_ALLOWED_HOSTS").split(" ") # Local time zone for this installation. Choices can be found here: # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name From 38e29f4940718a2dd0435bff1454ddc63e8f9a34 Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Wed, 1 Jan 2025 13:31:41 -0600 Subject: [PATCH 06/22] Database now connected with web, rabbitmq is running and accepting requests, but cant connect to the web yet. --- Dockerfile | 30 ++-- django_startup.sh | 22 +++ docker-compose.yml | 36 ++--- requirements.txt | 2 +- setup.py | 8 +- supervisord.conf | 12 ++ tacc_stats.ini | 14 +- tacc_stats/listend.py | 1 + tacc_stats/listend2.py | 58 ++++++++ tacc_stats/site/app.py | 5 + .../site/machine/migrations/0001_initial.py | 130 ++++++++++++++++++ .../site/machine/migrations/__init__.py | 0 tacc_stats/site/machine/models.py | 18 ++- tacc_stats/site/machine/views.py | 22 ++- tacc_stats/site/tacc_stats_site/settings.py | 9 +- 15 files changed, 309 insertions(+), 58 deletions(-) create mode 100755 django_startup.sh create mode 100644 supervisord.conf create mode 100644 tacc_stats/listend2.py create mode 100644 tacc_stats/site/app.py create mode 100644 tacc_stats/site/machine/migrations/0001_initial.py create mode 100644 tacc_stats/site/machine/migrations/__init__.py diff --git a/Dockerfile b/Dockerfile index 3220ec5..2a1b362 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,25 +1,29 @@ # pull official base image -FROM python:3.6 +FROM python:3.6.15 -# set work directory -WORKDIR ../../../build +RUN useradd -ms /bin/bash hpcstats +WORKDIR /home/hpcstats -# set environment variables -ENV PYTHONDONTWRITEBYTECODE 1 -ENV PYTHONUNBUFFERED 1 +# run as root +RUN apt-get update && apt-get upgrade -y +RUN apt-get install netcat supervisor -y +USER hpcstats +# run as user +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 +ENV PATH $PATH:/home/hpcstats/.local/bin # install dependencies RUN pip install --upgrade pip -COPY ./requirements.txt . +COPY --chown=hpcstats:hpcstats ./requirements.txt . RUN pip install -r requirements.txt # copy project -COPY . . -#RUN echo $(pwd) -#RUN echo $(ls -la) -RUN python setup.py install +COPY --chown=hpcstats:hpcstats . . +RUN pip install . + +COPY --chown=hpcstats:hpcstats ./tacc_stats.ini . -COPY ./tacc_stats.ini . -#RUN cd tacc_stats/site && python manage.py migrate +ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf diff --git a/django_startup.sh b/django_startup.sh new file mode 100755 index 0000000..0c4fed0 --- /dev/null +++ b/django_startup.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +echo "Waiting for postgres..." + +while ! nc -z db 5432; do + sleep 0.1 +done + +echo "PostgreSQL started" + +# make directories if they are not there +mkdir -p /hpcstats/accounting +mkdir -p /hpcstats/archive +mkdir -p /hpcstats/daily_archive + +# detect if the tables are existing and create if not +/usr/local/bin/python3 tacc_stats/site/manage.py makemigrations +/usr/local/bin/python3 tacc_stats/site/manage.py migrate + +# then run this (gunicorn later) +/usr/local/bin/python3 tacc_stats/site/manage.py runserver 0.0.0.0:8000 + diff --git a/docker-compose.yml b/docker-compose.yml index 9ca1d3e..a70a8ec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,42 +1,46 @@ -version: '3.8' - services: web: build: ./ - command: python tacc_stats/site/manage.py runserver 0.0.0.0:8000 + command: /usr/bin/supervisord + volumes: - ./:/usr/src/hpcstats/ + - hpcstats_data:/hpcstats/ + ports: - 8000:8000 env_file: - ./.env.dev - db: image: timescale/timescaledb:latest-pg15 volumes: - postgres_data:/var/lib/postgresql/data/ environment: - - POSTGRES_USER=hello_django - - POSTGRES_PASSWORD=hello_django - - POSTGRES_DB=hello_django_dev + - POSTGRES_USER=hpcstats + - POSTGRES_PASSWORD=hpcstats rabbitmq: image: rabbitmq:4-alpine - container_name: 'rabbitmq' ports: - 5672:5672 - 15672:15672 volumes: - - /tmp/rabbitmq/data/:/var/lib/rabbitmq/ - - /tmp/rabbitmq/log/:/var/log/rabbitmq - networks: - - rabbitmq_go_net - -networks: - rabbitmq_go_net: - driver: bridge + - rabbitmq_messages:/var/lib/rabbitmq/ + - /var/log/rabbitmq:/var/log/rabbitmq +# networks: +# - rabbitmq_go_net +# +#networks: +# rabbitmq_go_net: +# driver: bridge volumes: postgres_data: + rabbitmq_messages: + + hpcstats_data: + + + diff --git a/requirements.txt b/requirements.txt index 3d23aea..57621d0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -74,4 +74,4 @@ ushlex==0.99.1 websocket-client==0.53.0 zc.lockfile==2.0 zipp==3.6.0 -django-timescaledb +gunicorn diff --git a/setup.py b/setup.py index 58f3297..5bf88e7 100644 --- a/setup.py +++ b/setup.py @@ -29,10 +29,10 @@ config = ConfigParser() config.read("tacc_stats.ini") -with open("tacc_stats/cfg.py", 'w') as fd: - for s in config.sections(): - for key, val in dict(config.items(s)).items(): - fd.write(key + " = " + "\"" + val + "\"" + '\n') +#with open("tacc_stats/cfg.py", 'w') as fd: +# for s in config.sections(): +# for key, val in dict(config.items(s)).items(): +# fd.write(key + " = " + "\"" + val + "\"" + '\n') setup( name = DISTNAME, diff --git a/supervisord.conf b/supervisord.conf new file mode 100644 index 0000000..400c39e --- /dev/null +++ b/supervisord.conf @@ -0,0 +1,12 @@ +[supervisord] +nodaemon=true + +[program:hpcstats-web] +command=/home/hpcstats/django_startup.sh +user=hpcstats +autorestart=true + +[program:hpcstats-rabbitmq-listener] +command=/usr/local/bin/python3 /home/hpcstats/tacc_stats/listend.py +user=hpcstats +autorestart=true diff --git a/tacc_stats.ini b/tacc_stats.ini index dec01b2..3267b61 100644 --- a/tacc_stats.ini +++ b/tacc_stats.ini @@ -1,22 +1,22 @@ [DEFAULT] -machine = machine-name -data_dir = /tacc_stats_site/%(machine)s +machine = stampede3 +data_dir = /hpcstats/ server = servername.tacc.utexas.edu [RMQ] -rmq_server = %(server)s +rmq_server = rabbitmq rmq_queue = %(machine)s [PORTAL] acct_path = %(data_dir)s/accounting archive_dir = %(data_dir)s/archive host_name_ext = %(machine)s.tacc.utexas.edu -dbname = test_db +dbname = hpcstats daily_archive_dir = %(data_dir)s/daily_archive engine_name = django.db.backends.postgresql_psycopg2 -username = user -password = password -host = localhost +username = hpcstats +password = hpcstats +host = db port = 5432 [XALT] diff --git a/tacc_stats/listend.py b/tacc_stats/listend.py index 81ae448..79a1ec7 100755 --- a/tacc_stats/listend.py +++ b/tacc_stats/listend.py @@ -55,6 +55,7 @@ def on_message(channel, method_frame, header_frame, body): parameters = pika.ConnectionParameters(cfg.get_rmq_server()) connection = pika.BlockingConnection(parameters) channel = connection.channel() + channel.queue_declare(queue=cfg.get_rmq_queue()) channel.basic_consume(cfg.get_rmq_queue(), on_message) try: channel.start_consuming() diff --git a/tacc_stats/listend2.py b/tacc_stats/listend2.py new file mode 100644 index 0000000..884a961 --- /dev/null +++ b/tacc_stats/listend2.py @@ -0,0 +1,58 @@ +#!/fstats/frontera/bin/python3 +import pika +import os, sys +import time +import tacc_stats.cfg as cfg +from fcntl import flock, LOCK_EX, LOCK_NB + +def on_message(channel, method_frame, header_frame, body): + + try: + message = body.decode() + except: + print("Unexpected error at decode:", sys.exc_info()[0]) + #print(body) + return + + if message[0] == '$': + host = message.split('\n')[1].split()[1] + else: + host = message.split()[2] + + #if host == "localhost.localdomain": return + host_dir = os.path.join(cfg.archive_dir, host) + if not os.path.exists(host_dir): + os.makedirs(host_dir) + + current_path = os.path.join(host_dir, "current") + if message[0] == '$': + if os.path.exists(current_path): + os.unlink(current_path) + + with open(current_path, 'w') as fd: + link_path = os.path.join(host_dir, str(int(time.time()))) + if os.path.exists(link_path): + os.remove(link_path) + os.link(current_path, link_path) + + with open(current_path, 'a') as fd: + fd.write(message) + + channel.basic_ack(delivery_tag=method_frame.delivery_tag) + +with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "listend_lock"), "w") as fd: + try: + flock(fd, LOCK_EX | LOCK_NB) + except IOError: + print("listend is already running") + sys.exit() + + parameters = pika.ConnectionParameters(cfg.rmq_server) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.basic_consume(cfg.rmq_queue, on_message) + try: + channel.start_consuming() + except KeyboardInterrupt: + channel.stop_consuming() + connection.close() diff --git a/tacc_stats/site/app.py b/tacc_stats/site/app.py new file mode 100644 index 0000000..6cd2fff --- /dev/null +++ b/tacc_stats/site/app.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + +class MyAppConfig(AppConfig): + name = 'tacc_stats' # This is your app label + verbose_name = 'HPC Statistics' # Optional: A human-readable name diff --git a/tacc_stats/site/machine/migrations/0001_initial.py b/tacc_stats/site/machine/migrations/0001_initial.py new file mode 100644 index 0000000..a7f8631 --- /dev/null +++ b/tacc_stats/site/machine/migrations/0001_initial.py @@ -0,0 +1,130 @@ +import django.contrib.postgres.fields +from django.db import migrations, models +import django.db.models.deletion +import tacc_stats.site.machine.models + + +# generated by manage.py makemigrations + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='host_data', + fields=[ + ('time', models.DateTimeField(primary_key=True, serialize=False)), + ('host', models.CharField(blank=True, max_length=64, null=True)), + ('jid', models.CharField(blank=True, max_length=32, null=True)), + ('type', models.CharField(blank=True, max_length=32, null=True)), + ('dev', models.CharField(blank=True, max_length=64, null=True)), + ('event', models.CharField(blank=True, max_length=64, null=True)), + ('unit', models.CharField(blank=True, max_length=16, null=True)), + ('value', tacc_stats.site.machine.models.RealField(null=True)), + ('arc', tacc_stats.site.machine.models.RealField(null=True)), + ('delta', tacc_stats.site.machine.models.RealField(null=True)), + ], + options={ + 'db_table': 'host_data', + }, + ), + migrations.CreateModel( + name='job_data', + fields=[ + ('jid', models.CharField(max_length=32, primary_key=True, serialize=False)), + ('submit_time', models.DateTimeField()), + ('start_time', models.DateTimeField()), + ('end_time', models.DateTimeField()), + ('runtime', models.FloatField(blank=True, null=True)), + ('timelimit', models.FloatField(blank=True, null=True)), + ('node_hrs', models.FloatField(blank=True, null=True)), + ('nhosts', models.IntegerField(blank=True, null=True)), + ('ncores', models.IntegerField(blank=True, null=True)), + ('username', models.CharField(max_length=64)), + ('account', models.CharField(blank=True, max_length=64, null=True)), + ('queue', models.CharField(blank=True, max_length=64, null=True)), + ('state', models.CharField(blank=True, max_length=64, null=True)), + ('jobname', models.TextField(blank=True, null=True)), + ('host_list', django.contrib.postgres.fields.ArrayField(base_field=models.TextField(), size=None)), + ], + options={ + 'db_table': 'job_data', + 'managed': True, + }, + ), + migrations.CreateModel( + name='proc_data', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('host', models.CharField(blank=True, max_length=64, null=True)), + ('proc', models.CharField(blank=True, max_length=512, null=True)), + ('jid', models.ForeignKey(blank=True, db_column='jid', null=True, on_delete=django.db.models.deletion.CASCADE, to='machine.job_data')), + ], + options={ + 'db_table': 'proc_data', + 'managed': True, + }, + ), + migrations.CreateModel( + name='metrics_data', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('type', models.CharField(blank=True, max_length=32, null=True)), + ('metric', models.CharField(blank=True, max_length=32, null=True)), + ('units', models.CharField(blank=True, max_length=16, null=True)), + ('value', models.FloatField(blank=True, null=True)), + ('jid', models.ForeignKey(blank=True, db_column='jid', null=True, on_delete=django.db.models.deletion.CASCADE, to='machine.job_data')), + ], + options={ + 'db_table': 'metrics_data', + 'managed': True, + }, + ), + migrations.AddIndex( + model_name='host_data', + index=models.Index(fields=['host', 'time'], name='host_data_host_eda1c8_idx'), + ), + migrations.AddIndex( + model_name='host_data', + index=models.Index(fields=['jid', 'time'], name='host_data_jid_9e84c4_idx'), + ), + migrations.AlterUniqueTogether( + name='host_data', + unique_together={('time', 'host', 'type', 'event')}, + ), + migrations.AddIndex( + model_name='proc_data', + index=models.Index(fields=['jid'], name='proc_data_jid_a6e794_idx'), + ), + migrations.AlterUniqueTogether( + name='proc_data', + unique_together={('jid', 'host', 'proc')}, + ), + migrations.AlterUniqueTogether( + name='metrics_data', + unique_together={('jid', 'type', 'metric')}, + ), + + # TIMESCALEDB Timeseries and compression setup + + migrations.RunSQL( + "ALTER TABLE host_data DROP CONSTRAINT host_data_pkey;" + ), + + migrations.RunSQL( + "SELECT create_hypertable('host_data', 'time', chunk_time_interval => INTERVAL '1 days');" + ), + + migrations.RunSQL( + "ALTER TABLE host_data SET (timescaledb.compress, timescaledb.compress_orderby = 'time', timescaledb.compress_segmentby = 'host,jid,type,event');" + + ), + migrations.RunSQL( + "SELECT add_compression_policy('host_data', compress_after => INTERVAL '60d');" + ), + + ] diff --git a/tacc_stats/site/machine/migrations/__init__.py b/tacc_stats/site/machine/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tacc_stats/site/machine/models.py b/tacc_stats/site/machine/models.py index 74feb9f..73f374a 100644 --- a/tacc_stats/site/machine/models.py +++ b/tacc_stats/site/machine/models.py @@ -1,12 +1,8 @@ """The database models of tacc stats""" from django.db import models -from django.forms import ModelForm from django.contrib.postgres.fields import ArrayField -from timescale.db.models.fields import TimescaleDateTimeField -from timescale.db.models.managers import TimescaleManager - class RealField(models.FloatField): # Make type in order to use 32 bit floats (reals) instead of 64 bit floats def db_type(self, connection): @@ -33,6 +29,7 @@ class job_data(models.Model): class Meta: db_table = 'job_data' + managed = True def __unicode__(self): return str(self.id) @@ -54,7 +51,7 @@ class metrics_data(models.Model): value = models.FloatField(blank=True, null=True) class Meta: - #managed = False + managed = True db_table = 'metrics_data' unique_together = (('jid', 'type', 'metric'),) @@ -80,6 +77,7 @@ def __unicode__(self): CREATE INDEX ON host_data (host, time DESC); CREATE INDEX ON host_data (jid, time DESC); + SELECT create_hypertable('host_data', by_range('time', 86400000000)); query_create_compression = ALTER TABLE host_data SET \ (timescaledb.compress, timescaledb.compress_orderby = 'time DESC', timescaledb.compress_segmentby = 'host,jid,type,event'); SELECT add_compression_policy('host_data', INTERVAL '12h', if_not_exists => true); @@ -97,9 +95,8 @@ def __unicode__(self): # TODO: Compression in migration.py - class host_data(models.Model): - time = TimescaleDateTimeField(interval="1 day") + time = models.DateTimeField(primary_key=True) host = models.CharField(max_length=64, blank=True, null=True) jid = models.CharField(max_length=32, blank=True, null=True) type = models.CharField(max_length=32, blank=True, null=True) @@ -110,11 +107,8 @@ class host_data(models.Model): arc = RealField(null=True) delta = RealField(null=True) - objects = models.Manager() - timescale = TimescaleManager() class Meta: db_table = 'host_data' - abstract = True unique_together = (('time', 'host', 'type', 'event'),) indexes = [ models.Index(fields=["host", "time"]), @@ -127,8 +121,12 @@ class proc_data(models.Model): proc = models.CharField(max_length=512, blank=True, null=True) class Meta: + managed = True db_table = 'proc_data' unique_together = (('jid', 'host', 'proc'),) indexes = [ models.Index(fields=["jid"]), ] + + def __unicode__(self): + return str(self.id) diff --git a/tacc_stats/site/machine/views.py b/tacc_stats/site/machine/views.py index c0a1e22..334ff4b 100644 --- a/tacc_stats/site/machine/views.py +++ b/tacc_stats/site/machine/views.py @@ -1,4 +1,5 @@ import sys +import traceback # Append your local repository path here: # sys.path.append("/home/sg99/tacc_stats") from django.http import HttpResponse, HttpResponseRedirect @@ -475,10 +476,23 @@ class ChoiceForm(forms.Form): queues = job_data.objects.distinct("queue").values_list("queue", flat = True) states = job_data.objects.exclude(state__contains = "CANCELLED by").distinct("state").values_list("state", flat = True) - QUEUECHOICES = [('','')] + [(q, q) for q in queues] - print(QUEUECHOICES) + + try: + QUEUECHOICES = [('','')] + [(q, q) for q in queues] + except Exception as e: + print(e) + print("Continuing in case of makemigrations") + # print(traceback.format_exc()) + QUEUECHOICES = [] + #print(QUEUECHOICES) queue = forms.ChoiceField(choices=QUEUECHOICES, widget=forms.Select(choices=QUEUECHOICES)) - STATECHOICES = [('','')] + [(s, s) for s in states] - print(STATECHOICES) + try: + STATECHOICES = [('','')] + [(s, s) for s in states] + except Exception as e: + print(e) + print("Continuing in case of makemigrations") + #print(traceback.format_exc()) + STATECHOICES = [] + #print(STATECHOICES) state = forms.ChoiceField(choices=STATECHOICES, widget=forms.Select(choices=STATECHOICES)) diff --git a/tacc_stats/site/tacc_stats_site/settings.py b/tacc_stats/site/tacc_stats_site/settings.py index 1bc59eb..875bd06 100644 --- a/tacc_stats/site/tacc_stats_site/settings.py +++ b/tacc_stats/site/tacc_stats_site/settings.py @@ -44,6 +44,9 @@ } } +# Backend for timescale-specific tables +TIMESCALE_DB_BACKEND_BASE = "timescale.db.backends.postgresql" + # Hosts/domain names that are valid for this site; required if DEBUG is False # See https://docs.djangoproject.com/en/1.5/ref/settings/#allowed-hosts #ALLOWED_HOSTS = ['*'] @@ -150,6 +153,9 @@ WSGI_APPLICATION = 'tacc_stats.site.tacc_stats_site.wsgi.application' INSTALLED_APPS = ( + 'tacc_stats.site.machine', + 'tacc_stats.site.xalt', + 'tacc_stats.site.tacc_stats_site', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', @@ -163,9 +169,6 @@ #'django_pdf', # Uncomment the next line to enable admin documentation: # 'django.contrib.admindocs', - 'tacc_stats.site.machine', - 'tacc_stats.site.xalt', - 'tacc_stats.site.tacc_stats_site', ) INTERNAL_IPS = ['127.0.0.1'] SESSION_SERIALIZER = 'django.contrib.sessions.serializers.JSONSerializer' From f6a90ad7066f334b602d5420478a526e5c44bea9 Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Wed, 1 Jan 2025 13:37:38 -0600 Subject: [PATCH 07/22] Cleanup of some messy files --- tacc_stats/listend2.py | 58 --------------------- tacc_stats/site/app.py | 5 -- tacc_stats/site/tacc_stats_site/settings.py | 3 -- 3 files changed, 66 deletions(-) delete mode 100644 tacc_stats/listend2.py delete mode 100644 tacc_stats/site/app.py diff --git a/tacc_stats/listend2.py b/tacc_stats/listend2.py deleted file mode 100644 index 884a961..0000000 --- a/tacc_stats/listend2.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/fstats/frontera/bin/python3 -import pika -import os, sys -import time -import tacc_stats.cfg as cfg -from fcntl import flock, LOCK_EX, LOCK_NB - -def on_message(channel, method_frame, header_frame, body): - - try: - message = body.decode() - except: - print("Unexpected error at decode:", sys.exc_info()[0]) - #print(body) - return - - if message[0] == '$': - host = message.split('\n')[1].split()[1] - else: - host = message.split()[2] - - #if host == "localhost.localdomain": return - host_dir = os.path.join(cfg.archive_dir, host) - if not os.path.exists(host_dir): - os.makedirs(host_dir) - - current_path = os.path.join(host_dir, "current") - if message[0] == '$': - if os.path.exists(current_path): - os.unlink(current_path) - - with open(current_path, 'w') as fd: - link_path = os.path.join(host_dir, str(int(time.time()))) - if os.path.exists(link_path): - os.remove(link_path) - os.link(current_path, link_path) - - with open(current_path, 'a') as fd: - fd.write(message) - - channel.basic_ack(delivery_tag=method_frame.delivery_tag) - -with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "listend_lock"), "w") as fd: - try: - flock(fd, LOCK_EX | LOCK_NB) - except IOError: - print("listend is already running") - sys.exit() - - parameters = pika.ConnectionParameters(cfg.rmq_server) - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - channel.basic_consume(cfg.rmq_queue, on_message) - try: - channel.start_consuming() - except KeyboardInterrupt: - channel.stop_consuming() - connection.close() diff --git a/tacc_stats/site/app.py b/tacc_stats/site/app.py deleted file mode 100644 index 6cd2fff..0000000 --- a/tacc_stats/site/app.py +++ /dev/null @@ -1,5 +0,0 @@ -from django.apps import AppConfig - -class MyAppConfig(AppConfig): - name = 'tacc_stats' # This is your app label - verbose_name = 'HPC Statistics' # Optional: A human-readable name diff --git a/tacc_stats/site/tacc_stats_site/settings.py b/tacc_stats/site/tacc_stats_site/settings.py index 875bd06..f48a480 100644 --- a/tacc_stats/site/tacc_stats_site/settings.py +++ b/tacc_stats/site/tacc_stats_site/settings.py @@ -44,9 +44,6 @@ } } -# Backend for timescale-specific tables -TIMESCALE_DB_BACKEND_BASE = "timescale.db.backends.postgresql" - # Hosts/domain names that are valid for this site; required if DEBUG is False # See https://docs.djangoproject.com/en/1.5/ref/settings/#allowed-hosts #ALLOWED_HOSTS = ['*'] From e334e62664cee7a6b73c8b7e245178648f25c9fb Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Wed, 1 Jan 2025 16:51:44 -0600 Subject: [PATCH 08/22] Added nginx reverse proxy --- Dockerfile | 6 ++-- docker-compose.yml | 28 +++++++++++-------- .../django_startup.sh | 0 services-conf/nginx.conf | 17 +++++++++++ .../supervisord.conf | 2 +- 5 files changed, 37 insertions(+), 16 deletions(-) rename django_startup.sh => services-conf/django_startup.sh (100%) create mode 100644 services-conf/nginx.conf rename supervisord.conf => services-conf/supervisord.conf (80%) diff --git a/Dockerfile b/Dockerfile index 2a1b362..81abc5e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,21 +9,21 @@ RUN apt-get update && apt-get upgrade -y RUN apt-get install netcat supervisor -y -USER hpcstats -# run as user ENV PYTHONDONTWRITEBYTECODE 1 ENV PYTHONUNBUFFERED 1 ENV PATH $PATH:/home/hpcstats/.local/bin + # install dependencies RUN pip install --upgrade pip COPY --chown=hpcstats:hpcstats ./requirements.txt . RUN pip install -r requirements.txt + # copy project COPY --chown=hpcstats:hpcstats . . RUN pip install . COPY --chown=hpcstats:hpcstats ./tacc_stats.ini . -ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf +ADD services-conf/supervisord.conf /etc/supervisor/conf.d/supervisord.conf diff --git a/docker-compose.yml b/docker-compose.yml index a70a8ec..ac766ea 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,16 +2,20 @@ services: web: build: ./ command: /usr/bin/supervisord - volumes: - - ./:/usr/src/hpcstats/ - hpcstats_data:/hpcstats/ - - ports: - - 8000:8000 env_file: - ./.env.dev + proxy: + restart: always + image: nginx:1-alpine + ports: + - 80:80 + - 443:443 + volumes: + - ./services-conf/nginx.conf:/etc/nginx/conf.d/default.conf + db: image: timescale/timescaledb:latest-pg15 volumes: @@ -24,16 +28,16 @@ services: image: rabbitmq:4-alpine ports: - 5672:5672 - - 15672:15672 + + # Admin Console + # - 15672:15672 volumes: - rabbitmq_messages:/var/lib/rabbitmq/ - /var/log/rabbitmq:/var/log/rabbitmq -# networks: -# - rabbitmq_go_net -# -#networks: -# rabbitmq_go_net: -# driver: bridge + +networks: + default: + driver: bridge volumes: postgres_data: diff --git a/django_startup.sh b/services-conf/django_startup.sh similarity index 100% rename from django_startup.sh rename to services-conf/django_startup.sh diff --git a/services-conf/nginx.conf b/services-conf/nginx.conf new file mode 100644 index 0000000..567f07a --- /dev/null +++ b/services-conf/nginx.conf @@ -0,0 +1,17 @@ +upstream tacc_stats { + server web:8000; +} + + +server { + listen 80; + + client_max_body_size 14M; + location / { + proxy_pass http://web:8000; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Host $host; + proxy_redirect off; + } +} + diff --git a/supervisord.conf b/services-conf/supervisord.conf similarity index 80% rename from supervisord.conf rename to services-conf/supervisord.conf index 400c39e..6b694c1 100644 --- a/supervisord.conf +++ b/services-conf/supervisord.conf @@ -2,7 +2,7 @@ nodaemon=true [program:hpcstats-web] -command=/home/hpcstats/django_startup.sh +command=/home/hpcstats/services-conf/django_startup.sh user=hpcstats autorestart=true From a0f5d1154ad8ff1144c644708fbacc03cf32336f Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Wed, 1 Jan 2025 18:14:46 -0600 Subject: [PATCH 09/22] Moving from django native server to gunicorn --- services-conf/django_startup.sh | 2 +- services-conf/supervisord.conf | 4 ++++ tacc_stats.ini | 10 +++++----- tacc_stats/site/tacc_stats_site/settings.py | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/services-conf/django_startup.sh b/services-conf/django_startup.sh index 0c4fed0..81ae83e 100755 --- a/services-conf/django_startup.sh +++ b/services-conf/django_startup.sh @@ -18,5 +18,5 @@ mkdir -p /hpcstats/daily_archive /usr/local/bin/python3 tacc_stats/site/manage.py migrate # then run this (gunicorn later) -/usr/local/bin/python3 tacc_stats/site/manage.py runserver 0.0.0.0:8000 +/usr/local/bin/gunicorn tacc_stats.site.tacc_stats_site.wsgi --bind 0.0.0.0:8000 --env DJANGO_SETTINGS_MODULE=tacc_stats.site.tacc_stats_site.settings diff --git a/services-conf/supervisord.conf b/services-conf/supervisord.conf index 6b694c1..02b2b6e 100644 --- a/services-conf/supervisord.conf +++ b/services-conf/supervisord.conf @@ -5,8 +5,12 @@ nodaemon=true command=/home/hpcstats/services-conf/django_startup.sh user=hpcstats autorestart=true +stdout_logfile=/dev/fd/1 +stdout_logfile_maxbytes=0 [program:hpcstats-rabbitmq-listener] command=/usr/local/bin/python3 /home/hpcstats/tacc_stats/listend.py user=hpcstats autorestart=true +stdout_logfile=/dev/fd/1 +stdout_logfile_maxbytes=0 diff --git a/tacc_stats.ini b/tacc_stats.ini index 3267b61..0873d78 100644 --- a/tacc_stats.ini +++ b/tacc_stats.ini @@ -20,8 +20,8 @@ host = db port = 5432 [XALT] -xalt_engine = none -xalt_name = none -xalt_user = none -xalt_password = none -xalt_host = none +xalt_engine = +xalt_name = +xalt_user = +xalt_password = +xalt_host = diff --git a/tacc_stats/site/tacc_stats_site/settings.py b/tacc_stats/site/tacc_stats_site/settings.py index f48a480..8732a99 100644 --- a/tacc_stats/site/tacc_stats_site/settings.py +++ b/tacc_stats/site/tacc_stats_site/settings.py @@ -147,7 +147,7 @@ ROOT_URLCONF = 'tacc_stats.site.tacc_stats_site.urls' # Python dotted path to the WSGI application used by Django's runserver. -WSGI_APPLICATION = 'tacc_stats.site.tacc_stats_site.wsgi.application' +WSGI_APPLICATION = 'tacc_stats.site.tacc_stats_site.wsgi' INSTALLED_APPS = ( 'tacc_stats.site.machine', From 58964caea4d279e47d43a13f058741d742b21c86 Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Thu, 2 Jan 2025 08:31:48 -0600 Subject: [PATCH 10/22] Update DJANGO to allow all hosts, because we will do any host checking in nginx --- tacc_stats/site/tacc_stats_site/settings.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tacc_stats/site/tacc_stats_site/settings.py b/tacc_stats/site/tacc_stats_site/settings.py index 8732a99..d3d0fb0 100644 --- a/tacc_stats/site/tacc_stats_site/settings.py +++ b/tacc_stats/site/tacc_stats_site/settings.py @@ -46,12 +46,8 @@ # Hosts/domain names that are valid for this site; required if DEBUG is False # See https://docs.djangoproject.com/en/1.5/ref/settings/#allowed-hosts -#ALLOWED_HOSTS = ['*'] +ALLOWED_HOSTS = ['*'] -# For dockerization -# 'DJANGO_ALLOWED_HOSTS' should be a single string of hosts with a space between each. -# For example: 'DJANGO_ALLOWED_HOSTS=localhost 127.0.0.1 [::1]' -ALLOWED_HOSTS = os.environ.get("DJANGO_ALLOWED_HOSTS").split(" ") # Local time zone for this installation. Choices can be found here: # http://en.wikipedia.org/wiki/List_of_tz_zones_by_name From b0b2d99d0e1f351db60c69924e4628b0ebb0547e Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Thu, 2 Jan 2025 15:38:27 -0600 Subject: [PATCH 11/22] Quick and dirty instructions to get this running from a base rocky image --- docker-instructions.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 docker-instructions.txt diff --git a/docker-instructions.txt b/docker-instructions.txt new file mode 100644 index 0000000..7581c72 --- /dev/null +++ b/docker-instructions.txt @@ -0,0 +1,22 @@ +# From a base rocky image + +dnf install docker git podman-compose + +# May need to use an ssh git address if you plan to commit to the report +git clone https://github.com/TACC/tacc_stats.git + +cd tacc_stats + +git checkout sharrell-docker + +docker compose up --build -d + +# to see the startup +docker compose logs + +#you should be able to get to the website at this point, will error if no data + + + + + From c2bf81052681089771d706912016ded4079573ca Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Thu, 2 Jan 2025 15:45:49 -0600 Subject: [PATCH 12/22] Add a step --- docker-instructions.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docker-instructions.txt b/docker-instructions.txt index 7581c72..b390af0 100644 --- a/docker-instructions.txt +++ b/docker-instructions.txt @@ -3,12 +3,18 @@ dnf install docker git podman-compose # May need to use an ssh git address if you plan to commit to the report -git clone https://github.com/TACC/tacc_stats.git +git clone https://github.com/TACC/tacc_stats.git cd tacc_stats git checkout sharrell-docker +# Create env.dev file, contents: +DEBUG=1 +SECRET_KEY=foo +PATH=$PATH:/home/hpcstats/.local/bin +# end contents + docker compose up --build -d # to see the startup From 6e889081a673f977865edf204ae933ca480eeca5 Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Thu, 2 Jan 2025 16:08:10 -0600 Subject: [PATCH 13/22] Fix typo --- docker-instructions.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-instructions.txt b/docker-instructions.txt index b390af0..e1e2f2b 100644 --- a/docker-instructions.txt +++ b/docker-instructions.txt @@ -2,7 +2,7 @@ dnf install docker git podman-compose -# May need to use an ssh git address if you plan to commit to the report +# May need to use an ssh git address if you plan to commit to the repo git clone https://github.com/TACC/tacc_stats.git cd tacc_stats From 1cdf30ebf03911460df6cd537231e08d10d64c2e Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Thu, 2 Jan 2025 17:11:19 -0600 Subject: [PATCH 14/22] add stampede3 ssl certs into nginx, need to figure out how to configure this for the general public --- docker-compose.yml | 2 +- services-conf/nginx.conf | 40 +++++++++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index ac766ea..39ee3b9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,6 +15,7 @@ services: - 443:443 volumes: - ./services-conf/nginx.conf:/etc/nginx/conf.d/default.conf + - /etc/letsencrypt/:/etc/letsencrypt/ db: image: timescale/timescaledb:latest-pg15 @@ -28,7 +29,6 @@ services: image: rabbitmq:4-alpine ports: - 5672:5672 - # Admin Console # - 15672:15672 volumes: diff --git a/services-conf/nginx.conf b/services-conf/nginx.conf index 567f07a..41b1424 100644 --- a/services-conf/nginx.conf +++ b/services-conf/nginx.conf @@ -1,17 +1,35 @@ -upstream tacc_stats { +upstream django_server { server web:8000; } server { - listen 80; - - client_max_body_size 14M; + listen 80; location / { - proxy_pass http://web:8000; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header Host $host; - proxy_redirect off; - } -} - + rewrite ^ https://$host$request_uri? permanent; + } +} + +server { + listen 443 ssl; + + ssl_certificate /etc/letsencrypt/live/stats.stampede3.tacc.utexas.edu/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/stats.stampede3.tacc.utexas.edu/privkey.pem; + + location / { + proxy_pass http://web:8000; + proxy_redirect off; + proxy_http_version 1.1; + proxy_cache_bypass $http_upgrade; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection keep-alive; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host nginx; + proxy_buffer_size 128k; + proxy_buffers 4 256k; + proxy_busy_buffers_size 256k; + } +} From 328e8cfcacca8f48f5b04b8f82f83a882bc9af51 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 2 Jan 2025 17:20:31 -0600 Subject: [PATCH 15/22] Adding some debug output --- tacc_stats/listend.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tacc_stats/listend.py b/tacc_stats/listend.py index 79a1ec7..543679e 100755 --- a/tacc_stats/listend.py +++ b/tacc_stats/listend.py @@ -11,6 +11,7 @@ import tacc_stats.conf_parser as cfg def on_message(channel, method_frame, header_frame, body): + print("found message: %s" % header_frame) try: message = body.decode() @@ -52,11 +53,13 @@ def on_message(channel, method_frame, header_frame, body): print("listend is already running") sys.exit() + print("Starting Connection") parameters = pika.ConnectionParameters(cfg.get_rmq_server()) connection = pika.BlockingConnection(parameters) channel = connection.channel() channel.queue_declare(queue=cfg.get_rmq_queue()) channel.basic_consume(cfg.get_rmq_queue(), on_message) + print("Begining Consume") try: channel.start_consuming() except KeyboardInterrupt: From df6595bd29c26d7ef3c25913a5a305fadc5fa374 Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Thu, 2 Jan 2025 17:21:25 -0600 Subject: [PATCH 16/22] Provide both SSL and non-ssl nginx proxy configs, added instructions on how to use --- docker-instructions.txt | 4 ++++ services-conf/nginx-nossl.conf | 17 +++++++++++++++++ .../{nginx.conf => nginx-withssl.conf} | 0 3 files changed, 21 insertions(+) create mode 100644 services-conf/nginx-nossl.conf rename services-conf/{nginx.conf => nginx-withssl.conf} (100%) diff --git a/docker-instructions.txt b/docker-instructions.txt index b390af0..e14008b 100644 --- a/docker-instructions.txt +++ b/docker-instructions.txt @@ -15,6 +15,10 @@ SECRET_KEY=foo PATH=$PATH:/home/hpcstats/.local/bin # end contents +# In service-conf, nginx.conf needs to be chosen, to make things easier for development, use the nossl config, if its production use the ssl version, you can copy the nginx-nossl.conf to nginx.conf and build. + +cp services-conf/nginx-nossl.conf services-conf/nginx.conf + docker compose up --build -d # to see the startup diff --git a/services-conf/nginx-nossl.conf b/services-conf/nginx-nossl.conf new file mode 100644 index 0000000..567f07a --- /dev/null +++ b/services-conf/nginx-nossl.conf @@ -0,0 +1,17 @@ +upstream tacc_stats { + server web:8000; +} + + +server { + listen 80; + + client_max_body_size 14M; + location / { + proxy_pass http://web:8000; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header Host $host; + proxy_redirect off; + } +} + diff --git a/services-conf/nginx.conf b/services-conf/nginx-withssl.conf similarity index 100% rename from services-conf/nginx.conf rename to services-conf/nginx-withssl.conf From fcd43a5d6ca889c5f27d4fa8d60b271d6f9bc6ae Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Mon, 6 Jan 2025 11:49:44 -0600 Subject: [PATCH 17/22] Get sync_acct.py working --- Dockerfile | 6 ++- docker-compose.yml | 4 +- services-conf/django_startup.sh | 7 ++-- services-conf/supervisord.conf | 24 +++++++++++ tacc_stats/conf_parser.py | 2 +- tacc_stats/dbload/sync_acct.py | 42 ++----------------- .../site/machine/migrations/0001_initial.py | 1 + tacc_stats/site/machine/models.py | 1 + tacc_stats/site/machine/views.py | 25 ++++++----- 9 files changed, 54 insertions(+), 58 deletions(-) diff --git a/Dockerfile b/Dockerfile index 81abc5e..5bbfa77 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,9 +21,11 @@ RUN pip install -r requirements.txt # copy project COPY --chown=hpcstats:hpcstats . . -RUN pip install . +# This includes the tacc_stats.ini +#COPY --chown=hpcstats:hpcstats ./tacc_stats.ini . + -COPY --chown=hpcstats:hpcstats ./tacc_stats.ini . +RUN pip install . ADD services-conf/supervisord.conf /etc/supervisor/conf.d/supervisord.conf diff --git a/docker-compose.yml b/docker-compose.yml index 39ee3b9..f9e7293 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ services: build: ./ command: /usr/bin/supervisord volumes: - - hpcstats_data:/hpcstats/ + - /home/sharrell/compose_test/hpcstats_data/:/hpcstats/ env_file: - ./.env.dev @@ -43,8 +43,6 @@ volumes: postgres_data: rabbitmq_messages: - - hpcstats_data: diff --git a/services-conf/django_startup.sh b/services-conf/django_startup.sh index 81ae83e..dbd23be 100755 --- a/services-conf/django_startup.sh +++ b/services-conf/django_startup.sh @@ -9,9 +9,10 @@ done echo "PostgreSQL started" # make directories if they are not there -mkdir -p /hpcstats/accounting -mkdir -p /hpcstats/archive -mkdir -p /hpcstats/daily_archive +mkdir -pv /hpcstats/accounting +mkdir -pv /hpcstats/archive +mkdir -pv /hpcstats/daily_archive +mkdir -pv /hpcstats/logs # detect if the tables are existing and create if not /usr/local/bin/python3 tacc_stats/site/manage.py makemigrations diff --git a/services-conf/supervisord.conf b/services-conf/supervisord.conf index 02b2b6e..cb31e45 100644 --- a/services-conf/supervisord.conf +++ b/services-conf/supervisord.conf @@ -1,16 +1,40 @@ [supervisord] nodaemon=true +#user=hpcstats +logfile=/dev/fd/1 +logfile_maxbytes=0 [program:hpcstats-web] command=/home/hpcstats/services-conf/django_startup.sh user=hpcstats autorestart=true +stdout_redirect=true stdout_logfile=/dev/fd/1 stdout_logfile_maxbytes=0 +stderr_redirect=true +stderr_logfile=/dev/fd/1 +stderr_logfile_maxbytes=0 [program:hpcstats-rabbitmq-listener] command=/usr/local/bin/python3 /home/hpcstats/tacc_stats/listend.py user=hpcstats autorestart=true +stdout_redirect=true stdout_logfile=/dev/fd/1 stdout_logfile_maxbytes=0 +stderr_redirect=true +stderr_logfile=/dev/fd/1 +stderr_logfile_maxbytes=0 + + +[program:sync_acct] +command=/usr/local/bin/python3 /home/hpcstats/tacc_stats/dbload/sync_acct.py +user=hpcstats +autorestart=true +stdout_redirect=true +stdout_logfile=/dev/fd/1 +stdout_logfile_maxbytes=0 +stderr_redirect=true +stderr_logfile=/dev/fd/1 +stderr_logfile_maxbytes=0 + diff --git a/tacc_stats/conf_parser.py b/tacc_stats/conf_parser.py index 8122e21..aa7c854 100644 --- a/tacc_stats/conf_parser.py +++ b/tacc_stats/conf_parser.py @@ -12,7 +12,7 @@ cfg.read('tacc_stats.ini') def get_db_connection_string(): - temp_string = "dbname={0} user="+cfg.get('PORTAL', 'username')+" password="+cfg.get('PORTAL', 'password')+" port="+cfg.get('PORTAL', 'port') + temp_string = "dbname={0} user="+cfg.get('PORTAL', 'username')+" password="+cfg.get('PORTAL', 'password')+" port="+cfg.get('PORTAL', 'port') + " host="+cfg.get('PORTAL', 'host') connection_string = temp_string.format(cfg.get('PORTAL', 'dbname')) return connection_string diff --git a/tacc_stats/dbload/sync_acct.py b/tacc_stats/dbload/sync_acct.py index bc28cee..eac8836 100755 --- a/tacc_stats/dbload/sync_acct.py +++ b/tacc_stats/dbload/sync_acct.py @@ -18,43 +18,6 @@ import tacc_stats.conf_parser as cfg -CONNECTION = cfg.get_db_connection_string() - -query_create_jobdata_table = """CREATE TABLE IF NOT EXISTS job_data ( -jid VARCHAR(32) NOT NULL, -submit_time TIMESTAMPTZ NOT NULL, -start_time TIMESTAMPTZ NOT NULL, -end_time TIMESTAMPTZ NOT NULL, -runtime REAL, -timelimit REAL, -node_hrs REAL, -nhosts INT CHECK (nhosts > 0), -ncores INT CHECK (ncores > 0), -username VARCHAR(64) NOT NULL, -account VARCHAR(64), -queue VARCHAR(64), -state VARCHAR(64), -jobname TEXT, -host_list TEXT[], -CHECK (start_time <= end_time), -CHECK (submit_time <= start_time), -CHECK (runtime >= 0), -CHECK (timelimit >= 0), -CHECK (node_hrs >= 0), -UNIQUE(jid) -);""" - -query_create_jobindex = "CREATE INDEX ON job_data (jid);" - -conn = psycopg2.connect(CONNECTION) -print(conn.server_version) - -with conn.cursor() as cur: - #cur.execute("DROP TABLE IF EXISTS job_data;") - cur.execute(query_create_jobdata_table) - cur.execute(query_create_jobindex) - conn.commit() -conn.close() def sync_acct(acct_file, date_str): print(date_str) @@ -100,6 +63,8 @@ def sync_acct(acct_file, date_str): conn.close() if __name__ == "__main__": + CONNECTION = cfg.get_db_connection_string() + conn = psycopg2.connect(CONNECTION) # while True: @@ -119,6 +84,7 @@ def sync_acct(acct_file, date_str): # Parse and convert raw stats files to pandas dataframe start = time.time() directory = cfg.get_accounting_path() + while startdate <= enddate: for entry in os.scandir(directory): @@ -129,4 +95,4 @@ def sync_acct(acct_file, date_str): startdate += timedelta(days=1) print("loading time", time.time() - start) - #time.sleep(900) + time.sleep(900) diff --git a/tacc_stats/site/machine/migrations/0001_initial.py b/tacc_stats/site/machine/migrations/0001_initial.py index a7f8631..15b1336 100644 --- a/tacc_stats/site/machine/migrations/0001_initial.py +++ b/tacc_stats/site/machine/migrations/0001_initial.py @@ -48,6 +48,7 @@ class Migration(migrations.Migration): ('account', models.CharField(blank=True, max_length=64, null=True)), ('queue', models.CharField(blank=True, max_length=64, null=True)), ('state', models.CharField(blank=True, max_length=64, null=True)), + ('QOS', models.CharField(blank=True, max_length=64, null=True)), ('jobname', models.TextField(blank=True, null=True)), ('host_list', django.contrib.postgres.fields.ArrayField(base_field=models.TextField(), size=None)), ], diff --git a/tacc_stats/site/machine/models.py b/tacc_stats/site/machine/models.py index 73f374a..2fae398 100644 --- a/tacc_stats/site/machine/models.py +++ b/tacc_stats/site/machine/models.py @@ -24,6 +24,7 @@ class job_data(models.Model): account = models.CharField(max_length=64, blank=True, null=True) queue = models.CharField(max_length=64, blank=True, null=True) state = models.CharField(max_length=64, blank=True, null=True) + QOS = models.CharField(max_length=64, blank=True, null=True) jobname = models.TextField(blank=True, null=True) host_list = ArrayField(models.TextField()) diff --git a/tacc_stats/site/machine/views.py b/tacc_stats/site/machine/views.py index 334ff4b..def66e8 100644 --- a/tacc_stats/site/machine/views.py +++ b/tacc_stats/site/machine/views.py @@ -260,21 +260,24 @@ def get_context_data(self, **kwargs): print("error getting gpu data") # xalt - xalt_data=xalt_data_c() - for r in run.objects.using('xalt').filter(job_id = job.jid): + if not cfg.get_xalt_user() == '': + xalt_data=xalt_data_c() + for r in run.objects.using('xalt').filter(job_id = job.jid): if "usr" in r.exec_path.split('/'): continue xalt_data.exec_path.append(r.exec_path) xalt_data.cwd.append(r.cwd[0:128]) for join in join_run_object.objects.using('xalt').filter(run_id = r.run_id): - object_path = lib.objects.using('xalt').get(obj_id = join.obj_id).object_path - module_name = lib.objects.using('xalt').get(obj_id = join.obj_id).module_name - if not module_name: module_name = 'none' - if any(libtmp.module_name == module_name for libtmp in xalt_data.libset): continue - xalt_data.libset.append (libset_c(object_path = object_path, module_name = module_name)) - xalt_data.exec_path=list(set(xalt_data.exec_path)) - xalt_data.cwd=list(set(xalt_data.cwd)) - xalt_data.libset=sorted(xalt_data.libset, key=lambda x:x.module_name) - context['xalt_data'] = xalt_data + object_path = lib.objects.using('xalt').get(obj_id = join.obj_id).object_path + module_name = lib.objects.using('xalt').get(obj_id = join.obj_id).module_name + if not module_name: module_name = 'none' + if any(libtmp.module_name == module_name for libtmp in xalt_data.libset): continue + xalt_data.libset.append (libset_c(object_path = object_path, module_name = module_name)) + xalt_data.exec_path=list(set(xalt_data.exec_path)) + xalt_data.cwd=list(set(xalt_data.cwd)) + xalt_data.libset=sorted(xalt_data.libset, key=lambda x:x.module_name) + context['xalt_data'] = xalt_data + else: + xalt_data = [] From c83b1dc671eacb27dcc240720242231541ad8798 Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Mon, 6 Jan 2025 11:50:37 -0600 Subject: [PATCH 18/22] Adding some gitignore-fu for the nginx config and vi swp files --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3f57606..4db16ea 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +services-conf/nginx.conf dist/ rabbitmq-c/ tacc_stats.egg-info/ @@ -27,6 +28,7 @@ tacc_stats/version.py tacc_stats/cfg.py *egg* #* +*.swp tacc_stats/taccstats Makefile.in /ar-lib @@ -50,4 +52,4 @@ Makefile.in /stamp-h1 /ltmain.sh /texinfo.tex -tacc_stats/pickler/*lock \ No newline at end of file +tacc_stats/pickler/*lock From 0225d1a5b5152e32ae671d45daed88568266c34a Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Mon, 6 Jan 2025 11:54:55 -0600 Subject: [PATCH 19/22] Removing table creation, done with migrate.py and models.py from DJango --- tacc_stats/dbload/sync_timedb.py | 45 -------------------------------- 1 file changed, 45 deletions(-) diff --git a/tacc_stats/dbload/sync_timedb.py b/tacc_stats/dbload/sync_timedb.py index 5e3e710..95a5bf8 100755 --- a/tacc_stats/dbload/sync_timedb.py +++ b/tacc_stats/dbload/sync_timedb.py @@ -369,57 +369,12 @@ def archive_stats_files(archive_info): print(subprocess.check_output(['/usr/bin/gzip', '-8', '-v', archive_tar_fname]), flush=True) def database_startup(): - - query_create_hostdata_table = """CREATE TABLE IF NOT EXISTS host_data ( - time TIMESTAMPTZ NOT NULL, - host VARCHAR(64), - jid VARCHAR(32), - type VARCHAR(32), - dev VARCHAR(64), - event VARCHAR(64), - unit VARCHAR(16), - value real, - delta real, - arc real, - UNIQUE (time, host, type, event) - );""" - - - query_create_hostdata_hypertable = """CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE; - SELECT create_hypertable('host_data', 'time', if_not_exists => TRUE, chunk_time_interval => INTERVAL '1 day'); - CREATE INDEX ON host_data (host, time DESC); - CREATE INDEX ON host_data (jid, time DESC);""" - - query_create_compression = """ALTER TABLE host_data SET \ - (timescaledb.compress, timescaledb.compress_orderby = 'time DESC', timescaledb.compress_segmentby = 'host,jid,type,event'); - SELECT add_compression_policy('host_data', INTERVAL '12h', if_not_exists => true);""" - - - query_create_process_table = """CREATE TABLE IF NOT EXISTS proc_data ( - jid VARCHAR(32) NOT NULL, - host VARCHAR(64), - proc VARCHAR(512), - UNIQUE(jid, host, proc) - );""" - - query_create_process_index = "CREATE INDEX ON proc_data (jid);" - - conn = psycopg2.connect(CONNECTION) if debug: print("Postgresql server version: " + str(conn.server_version)) with conn.cursor() as cur: - # This should only be used for testing and debugging purposes - #cur.execute("DROP TABLE IF EXISTS host_data CASCADE;") - - #cur.execute(query_create_hostdata_table) - #cur.execute(query_create_hostdata_hypertable) - #cur.execute(query_create_compression) - - #cur.execute(query_create_process_table) - #cur.execute(query_create_process_index) cur.execute("SELECT pg_size_pretty(pg_database_size('{0}'));".format(cfg.get_db_name())) for x in cur.fetchall(): print("Database Size:", x[0]) From b739381fb97228e53751f32677f2cb628c59126c Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Mon, 6 Jan 2025 13:39:09 -0600 Subject: [PATCH 20/22] Separate the web container from the pipeline --- docker-compose.yml | 9 +++++++++ services-conf/supervisord.conf | 14 ++------------ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f9e7293..8fc1787 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,15 @@ services: web: build: ./ + image: hpcperfstats + command: /home/hpcstats/services-conf/django_startup.sh + env_file: + - ./.env.dev + + pipeline: + image: hpcperfstats + depends_on: + - web command: /usr/bin/supervisord volumes: - /home/sharrell/compose_test/hpcstats_data/:/hpcstats/ diff --git a/services-conf/supervisord.conf b/services-conf/supervisord.conf index cb31e45..9151e3b 100644 --- a/services-conf/supervisord.conf +++ b/services-conf/supervisord.conf @@ -1,24 +1,13 @@ [supervisord] nodaemon=true -#user=hpcstats logfile=/dev/fd/1 logfile_maxbytes=0 -[program:hpcstats-web] -command=/home/hpcstats/services-conf/django_startup.sh -user=hpcstats -autorestart=true -stdout_redirect=true -stdout_logfile=/dev/fd/1 -stdout_logfile_maxbytes=0 -stderr_redirect=true -stderr_logfile=/dev/fd/1 -stderr_logfile_maxbytes=0 - [program:hpcstats-rabbitmq-listener] command=/usr/local/bin/python3 /home/hpcstats/tacc_stats/listend.py user=hpcstats autorestart=true +startretries=20 stdout_redirect=true stdout_logfile=/dev/fd/1 stdout_logfile_maxbytes=0 @@ -31,6 +20,7 @@ stderr_logfile_maxbytes=0 command=/usr/local/bin/python3 /home/hpcstats/tacc_stats/dbload/sync_acct.py user=hpcstats autorestart=true +startretries=20 stdout_redirect=true stdout_logfile=/dev/fd/1 stdout_logfile_maxbytes=0 From 44b0ead5bde9357c128c98311387b6bfc08b7c24 Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Mon, 6 Jan 2025 13:56:05 -0600 Subject: [PATCH 21/22] create a supervisor startup so we can make the pipeline directories if needed --- docker-compose.yml | 2 +- services-conf/django_startup.sh | 6 ------ services-conf/supervisor_startup.sh | 14 ++++++++++++++ 3 files changed, 15 insertions(+), 7 deletions(-) create mode 100755 services-conf/supervisor_startup.sh diff --git a/docker-compose.yml b/docker-compose.yml index 8fc1787..2f37e7f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,7 +10,7 @@ services: image: hpcperfstats depends_on: - web - command: /usr/bin/supervisord + command: /home/hpcstats/services-conf/supervisor_startup.sh volumes: - /home/sharrell/compose_test/hpcstats_data/:/hpcstats/ env_file: diff --git a/services-conf/django_startup.sh b/services-conf/django_startup.sh index dbd23be..d6dcc2e 100755 --- a/services-conf/django_startup.sh +++ b/services-conf/django_startup.sh @@ -8,12 +8,6 @@ done echo "PostgreSQL started" -# make directories if they are not there -mkdir -pv /hpcstats/accounting -mkdir -pv /hpcstats/archive -mkdir -pv /hpcstats/daily_archive -mkdir -pv /hpcstats/logs - # detect if the tables are existing and create if not /usr/local/bin/python3 tacc_stats/site/manage.py makemigrations /usr/local/bin/python3 tacc_stats/site/manage.py migrate diff --git a/services-conf/supervisor_startup.sh b/services-conf/supervisor_startup.sh new file mode 100755 index 0000000..f083c41 --- /dev/null +++ b/services-conf/supervisor_startup.sh @@ -0,0 +1,14 @@ +#!/bin/sh + + +# make directories if they are not there +mkdir -pv /hpcstats/accounting +mkdir -pv /hpcstats/archive +mkdir -pv /hpcstats/daily_archive +mkdir -pv /hpcstats/logs +chmod 777 -R /hpcstats/ + +/usr/bin/supervisord + + + From 351f2efd6440d123bfec7d950385d98fca6cb01d Mon Sep 17 00:00:00 2001 From: Stephen Lien Harrell Date: Fri, 10 Jan 2025 07:06:12 -0600 Subject: [PATCH 22/22] Updating name to hpcperfstats and some small fixes --- Dockerfile | 12 ++++++------ docker-compose.yml | 18 +++++++++++------- docker-instructions.txt | 2 +- services-conf/django_startup.sh | 2 +- services-conf/supervisor_startup.sh | 11 ++++++----- services-conf/supervisord.conf | 10 +++++----- tacc_stats.ini | 8 ++++---- tacc_stats/listend.py | 4 +--- 8 files changed, 35 insertions(+), 32 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5bbfa77..c077ebb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ # pull official base image FROM python:3.6.15 -RUN useradd -ms /bin/bash hpcstats -WORKDIR /home/hpcstats +RUN useradd -ms /bin/bash hpcperfstats +WORKDIR /home/hpcperfstats # run as root RUN apt-get update && apt-get upgrade -y @@ -11,18 +11,18 @@ RUN apt-get install netcat supervisor -y ENV PYTHONDONTWRITEBYTECODE 1 ENV PYTHONUNBUFFERED 1 -ENV PATH $PATH:/home/hpcstats/.local/bin +ENV PATH $PATH:/home/hpcperfstats/.local/bin # install dependencies RUN pip install --upgrade pip -COPY --chown=hpcstats:hpcstats ./requirements.txt . +COPY --chown=hpcperfstats:hpcperfstats ./requirements.txt . RUN pip install -r requirements.txt # copy project -COPY --chown=hpcstats:hpcstats . . +COPY --chown=hpcperfstats:hpcperfstats . . # This includes the tacc_stats.ini -#COPY --chown=hpcstats:hpcstats ./tacc_stats.ini . +#COPY --chown=hpcperfstats:hpcperfstats ./tacc_stats.ini . RUN pip install . diff --git a/docker-compose.yml b/docker-compose.yml index 2f37e7f..62abe4d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ services: web: build: ./ image: hpcperfstats - command: /home/hpcstats/services-conf/django_startup.sh + command: /home/hpcperfstats/services-conf/django_startup.sh env_file: - ./.env.dev @@ -10,9 +10,9 @@ services: image: hpcperfstats depends_on: - web - command: /home/hpcstats/services-conf/supervisor_startup.sh + command: /home/hpcperfstats/services-conf/supervisor_startup.sh volumes: - - /home/sharrell/compose_test/hpcstats_data/:/hpcstats/ + - hpcperfstatsdata:/hpcperfstats/ env_file: - ./.env.dev @@ -31,8 +31,8 @@ services: volumes: - postgres_data:/var/lib/postgresql/data/ environment: - - POSTGRES_USER=hpcstats - - POSTGRES_PASSWORD=hpcstats + - POSTGRES_USER=hpcperfstats + - POSTGRES_PASSWORD=hpcperfstats rabbitmq: image: rabbitmq:4-alpine @@ -52,6 +52,10 @@ volumes: postgres_data: rabbitmq_messages: - - + hpcperfstatsdata: + driver: local + driver_opts: + type: none + device: /home/sharrell/compose_test/hpcperfstats_data/ + o: bind diff --git a/docker-instructions.txt b/docker-instructions.txt index 55304ee..8be571e 100644 --- a/docker-instructions.txt +++ b/docker-instructions.txt @@ -12,7 +12,7 @@ git checkout sharrell-docker # Create env.dev file, contents: DEBUG=1 SECRET_KEY=foo -PATH=$PATH:/home/hpcstats/.local/bin +PATH=$PATH:/home/hpcperfstats/.local/bin # end contents # In service-conf, nginx.conf needs to be chosen, to make things easier for development, use the nossl config, if its production use the ssl version, you can copy the nginx-nossl.conf to nginx.conf and build. diff --git a/services-conf/django_startup.sh b/services-conf/django_startup.sh index d6dcc2e..2e9ab0b 100755 --- a/services-conf/django_startup.sh +++ b/services-conf/django_startup.sh @@ -13,5 +13,5 @@ echo "PostgreSQL started" /usr/local/bin/python3 tacc_stats/site/manage.py migrate # then run this (gunicorn later) -/usr/local/bin/gunicorn tacc_stats.site.tacc_stats_site.wsgi --bind 0.0.0.0:8000 --env DJANGO_SETTINGS_MODULE=tacc_stats.site.tacc_stats_site.settings +/usr/local/bin/gunicorn tacc_stats.site.tacc_stats_site.wsgi --bind 0.0.0.0:8000 --env DJANGO_SETTINGS_MODULE=tacc_stats.site.tacc_stats_site.settings -u hpcperfstats diff --git a/services-conf/supervisor_startup.sh b/services-conf/supervisor_startup.sh index f083c41..deccaac 100755 --- a/services-conf/supervisor_startup.sh +++ b/services-conf/supervisor_startup.sh @@ -1,12 +1,13 @@ #!/bin/sh +chmod -c 755 /hpcperfstats/ # make directories if they are not there -mkdir -pv /hpcstats/accounting -mkdir -pv /hpcstats/archive -mkdir -pv /hpcstats/daily_archive -mkdir -pv /hpcstats/logs -chmod 777 -R /hpcstats/ +mkdir -pv /hpcperfstats/accounting +mkdir -pv /hpcperfstats/archive +mkdir -pv /hpcperfstats/daily_archive +mkdir -pv /hpcperfstats/logs +chown -R hpcperfstats:hpcperfstats /hpcperfstats/* /usr/bin/supervisord diff --git a/services-conf/supervisord.conf b/services-conf/supervisord.conf index 9151e3b..26a8aca 100644 --- a/services-conf/supervisord.conf +++ b/services-conf/supervisord.conf @@ -3,9 +3,9 @@ nodaemon=true logfile=/dev/fd/1 logfile_maxbytes=0 -[program:hpcstats-rabbitmq-listener] -command=/usr/local/bin/python3 /home/hpcstats/tacc_stats/listend.py -user=hpcstats +[program:hpcperfstats-rabbitmq-listener] +command=/usr/local/bin/python3 /home/hpcperfstats/tacc_stats/listend.py +user=hpcperfstats autorestart=true startretries=20 stdout_redirect=true @@ -17,8 +17,8 @@ stderr_logfile_maxbytes=0 [program:sync_acct] -command=/usr/local/bin/python3 /home/hpcstats/tacc_stats/dbload/sync_acct.py -user=hpcstats +command=/usr/local/bin/python3 /home/hpcperfstats/tacc_stats/dbload/sync_acct.py +user=hpcperfstats autorestart=true startretries=20 stdout_redirect=true diff --git a/tacc_stats.ini b/tacc_stats.ini index 0873d78..7edb519 100644 --- a/tacc_stats.ini +++ b/tacc_stats.ini @@ -1,6 +1,6 @@ [DEFAULT] machine = stampede3 -data_dir = /hpcstats/ +data_dir = /hpcperfstats/ server = servername.tacc.utexas.edu [RMQ] @@ -11,11 +11,11 @@ rmq_queue = %(machine)s acct_path = %(data_dir)s/accounting archive_dir = %(data_dir)s/archive host_name_ext = %(machine)s.tacc.utexas.edu -dbname = hpcstats +dbname = hpcperfstats daily_archive_dir = %(data_dir)s/daily_archive engine_name = django.db.backends.postgresql_psycopg2 -username = hpcstats -password = hpcstats +username = hpcperfstats +password = hpcperfstats host = db port = 5432 diff --git a/tacc_stats/listend.py b/tacc_stats/listend.py index 543679e..a676b5a 100755 --- a/tacc_stats/listend.py +++ b/tacc_stats/listend.py @@ -5,11 +5,9 @@ from fcntl import flock, LOCK_EX, LOCK_NB -# Append your local repository path here: -# sys.path.append("/home/sg99/tacc_stats") - import tacc_stats.conf_parser as cfg + def on_message(channel, method_frame, header_frame, body): print("found message: %s" % header_frame)