Skip to content

Commit

Permalink
Merge pull request #419 from MetOffice/feature-self-updating-database
Browse files Browse the repository at this point in the history
Feature self updating database
  • Loading branch information
andrewgryan authored Jul 2, 2020
2 parents 99eaf8b + 2578b20 commit 767d591
Show file tree
Hide file tree
Showing 11 changed files with 106 additions and 131 deletions.
2 changes: 1 addition & 1 deletion forest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
.. automodule:: forest.services
"""
__version__ = '0.20.3'
__version__ = '0.20.4'

from .config import *
from . import (
Expand Down
35 changes: 35 additions & 0 deletions forest/app_hooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import forest.main
import forest.cli.main
import forest.data as data


class DatasetSyncCallback:
"""Process to synchronize datasets"""
def __init__(self, datasets):
self.datasets = datasets

def __call__(self):
for dataset in self.datasets:
if hasattr(dataset, "sync"):
dataset.sync()


def on_server_loaded(server_context):
data.on_server_loaded()

# Add periodic callback to keep database(s) up to date
_, argv = forest.cli.main.parse_args()
config = forest.main.configure(argv)
interval_ms = 15 * 60 * 1000 # 15 minutes in miliseconds
callback = DatasetSyncCallback(list(config.datasets))
server_context.add_periodic_callback(callback, interval_ms)


def on_session_destroyed(session_context):
'''
Function called when a session is closed
(e.g. tab closed or time out)
'''
if data.AUTO_SHUTDOWN:
import sys
sys.exit('\033[1;31mThe session has ended - tab closed or timeout. \n\n --- Terminating the Forest progam and relinquishing control of port. ---\033[1;00m')
2 changes: 0 additions & 2 deletions forest/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,3 @@ def add_bokeh_arguments(parser):
parser.add_argument(
"--allow-websocket-origin", metavar="HOST[:PORT]",
help="public hostnames that may connect to the websocket")


13 changes: 13 additions & 0 deletions forest/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import os
import string
import yaml
import forest.drivers
import forest.state
from dataclasses import dataclass, field
from collections import defaultdict
Expand Down Expand Up @@ -247,6 +248,18 @@ def file_groups(self):
return [FileGroup(**data)
for data in self.data["files"]]

@property
def datasets(self):
for group in self.file_groups:
settings = {
"label": group.label,
"pattern": group.pattern,
"locator": group.locator,
"database_path": group.database_path,
"directory": group.directory
}
yield forest.drivers.get_dataset(group.file_type, settings)


class FileGroup(object):
"""Meta-data needed to describe group of files
Expand Down
50 changes: 50 additions & 0 deletions forest/drivers/unified_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import datetime as dt
import numpy as np
import netCDF4
import sqlite3
import forest.db
import forest.util
import forest.map_view
from forest import (
Expand All @@ -27,6 +29,51 @@ class NotFound(Exception):
pass


class Sync:
"""Process to synchronize SQL database"""
def __init__(self, database_path, pattern, directory):
self.database_path = database_path
self.pattern = pattern
self.directory = directory

def __call__(self):
print(f"sync: {self.database_path} {self.pattern} {self.directory}")

# Find S3 objects
paths = glob.glob(self.full_path(self.pattern))
s3_names = [os.path.basename(path) for path in paths]

# Find names in database
connection = sqlite3.connect(self.database_path)
cursor = connection.cursor()
query = "SELECT name FROM file WHERE name GLOB :pattern;"
sql_names = []
for row in cursor.execute(query, {"pattern": self.pattern}):
path, = row
sql_names.append(os.path.basename(path))
connection.close()

# Find extra files
extra_names = set(s3_names) - set(sql_names)
extra_paths = [self.full_path(name) for name in extra_names]

# Add NetCDF files to database
if len(extra_paths) > 0:
print("connecting to: {}".format(self.database_path))
with forest.db.Database.connect(self.database_path) as database:
for path in extra_paths:
print("inserting: '{}'".format(path))
database.insert_netcdf(path)
print("finished")

def full_path(self, name):
"""Prepend directory if available"""
if self.directory is None:
return name
else:
return os.path.join(self.directory, name)


class Dataset:
def __init__(self,
label=None,
Expand All @@ -39,6 +86,9 @@ def __init__(self,
self.pattern = pattern
self.use_database = locator == "database"
if self.use_database:
self.sync = Sync(database_path,
pattern,
directory)
self.database = db.get_database(database_path)
self.locator = db.Locator(self.database.connection,
directory=directory)
Expand Down
19 changes: 7 additions & 12 deletions forest/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,9 @@ def map_figure(x_range, y_range):
return figure


def main(argv=None):

def configure(argv=None):
args = parse_args.parse_args(argv)
data.AUTO_SHUTDOWN = args.auto_shutdown

if len(args.files) > 0:
if args.config_file is not None:
raise Exception('--config-file and [FILE [FILE ...]] not compatible')
Expand All @@ -63,6 +61,11 @@ def main(argv=None):
variables=cfg.combine_variables(
os.environ,
args.variables))
return config


def main(argv=None):
config = configure(argv=argv)

# Feature toggles
if "feature" in config.plugins:
Expand Down Expand Up @@ -93,15 +96,7 @@ def main(argv=None):
datasets = {}
datasets_by_pattern = {}
label_to_pattern = {}
for group in config.file_groups:
settings = {
"label": group.label,
"pattern": group.pattern,
"locator": group.locator,
"database_path": group.database_path,
"directory": group.directory
}
dataset = drivers.get_dataset(group.file_type, settings)
for group, dataset in zip(config.file_groups, config.datasets):
datasets[group.label] = dataset
datasets_by_pattern[group.pattern] = dataset
label_to_pattern[group.label] = group.pattern
Expand Down
15 changes: 0 additions & 15 deletions forest/server_lifecycle.py

This file was deleted.

1 change: 0 additions & 1 deletion server/housekeeping/cron.file

This file was deleted.

18 changes: 0 additions & 18 deletions server/housekeeping/update-database-container.sh

This file was deleted.

10 changes: 0 additions & 10 deletions server/housekeeping/update-database-cron.sh

This file was deleted.

72 changes: 0 additions & 72 deletions server/housekeeping/update-database.py

This file was deleted.

0 comments on commit 767d591

Please sign in to comment.