Skip to content

Commit

Permalink
Merge pull request #105 from grand-mother/dev_database
Browse files Browse the repository at this point in the history
Dev database
  • Loading branch information
lwpiotr authored Oct 14, 2024
2 parents 96155d2 + d32acf5 commit 3b01985
Show file tree
Hide file tree
Showing 14 changed files with 603 additions and 147 deletions.
4 changes: 3 additions & 1 deletion granddb/config.ini.example
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ localdir = ["/home/fleg/incoming/"]
; If credentials are required to access the repository, they should be given in the [credential] section using the same name
; repository CCIN2P3 is already defined in the database (so it's not necessary to define it here), but credentials for it have
; to be supplied in the [credentials] section below
; THIS DEFINITIONS OVERRIDE THE ONES FROM THE DATABASE
[repositories]
CCIN2P3 = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/nancay/GRANDfiles"]]
CC = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/nancay/GRANDfiles"]]


; Credentials for repositories given as :
Expand All @@ -35,6 +36,7 @@ CCIN2P3 = ["ssh","cca.in2p3.fr",22,["/sps/grand/data/nancay/GRANDfiles"]]
; To run an ssh-agent just do : `eval $(ssh-agent)` and `ssh-add .ssh/id_rsa`
[credentials]
CCIN2P3 = ["john",""]
CC = ["jim",""]
SSHTUNNEL = ["joe",""]

; database to use (only one database can be defined)
Expand Down
299 changes: 258 additions & 41 deletions granddb/granddatalib.py

Large diffs are not rendered by default.

81 changes: 56 additions & 25 deletions granddb/granddblib.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,33 @@


def casttodb(value):
#print(f'{type(value)} - {value}')
if isinstance(value, numpy.uint32):
value = int(value)
if isinstance(value, numpy.float32):
value = float(value)
if isinstance(value, numpy.ndarray):
val = int(value)
elif isinstance(value, numpy.float32):
val = float(value)
elif isinstance(value, numpy.ndarray):
if value.size == 0:
value = None
val = None
elif value.size == 1:
value = value.item()
val = value.item()
else:
value = value.tolist()
if isinstance(value, grand.dataio.root_trees.StdVectorList):
value = [i for i in value]
if isinstance(value, str):
value = value.strip().strip('\t').strip('\n')
return value
val = value.tolist()
elif isinstance(value, grand.dataio.root_trees.StdVectorList):
val =[]
#postgres cannot store arrays of arrays... so we split (not sure if really correct)!
for i in value:
if isinstance(i,numpy.ndarray) or isinstance(i, grand.dataio.root_trees.StdVectorList):
val.append(casttodb(i))
else:
val.append(i)

#value = [i for i in value]
elif isinstance(value, str):
val = value.strip().strip('\t').strip('\n')
else:
val = value
return val


## @brief Class to handle the Grand database.
Expand Down Expand Up @@ -94,7 +105,8 @@ def __init__(self, host, port, dbname, user, passwd, sshserv="", sshport=22, cre
Base = automap_base()

Base.prepare(engine, reflect=True)
self.sqlalchemysession = Session(engine)
self.sqlalchemysession = Session(engine,autoflush=False)
#self.sqlalchemysession.no_autoflush = True
inspection = inspect(engine)
for table in inspection.get_table_names():
# for table in engine.table_names(): #this is obsolete
Expand Down Expand Up @@ -306,21 +318,37 @@ def register_repository(self, name, protocol, port, server, path, description=""
# Returns the id_file for the file and a boolean True if the file was not previously in the DB (i.e it's a new file)
# and false if the file was already registered. This is usefull to know if the metadata of the file needs to be read
# or not
def register_filename(self, filename, newfilename, id_repository, provider):
def register_filename(self, filename, newfilename, dataset, id_repository, provider, targetfile=None):
import os
register_file = False
isnewfile = False
idfile = None
id_dataset = None
if targetfile is None:
targetfile = newfilename
if dataset is not None:
id_dataset = self.get_or_create_key('dataset', 'dataset_name', os.path.basename(dataset))
filt = {}
filt['id_dataset'] = str(casttodb(id_dataset))
filt['id_repository'] = str(casttodb(id_repository))
ret = self.sqlalchemysession.query(getattr(self._tables['dataset_location'], 'id_dataset')).filter_by(
**filt).all()
if len(ret) == 0:
container = self.tables()['dataset_location'](id_dataset=id_dataset, id_repository=id_repository, path=dataset,
description="")
self.sqlalchemysession.add(container)
self.sqlalchemysession.flush()

## Check if file not already registered IN THIS REPO : IF YES, ABORT, IF NO REGISTER
#First see if file is registered elsewhere
file_exist = self.sqlalchemysession.query(self.tables()['file']).filter_by(
filename=os.path.basename(newfilename)).first()
filename=os.path.basename(targetfile),id_dataset=id_dataset).first()
if file_exist is not None:
# file_exist_here = self.sqlalchemysession.query(self.tables()['file_location']).filter_by(
# id_repository=id_repository).first()
#File exists somewhere... see if in the repository we want
file_exist_here = self.sqlalchemysession.query(self.tables()['file_location']).filter_by(
id_repository=id_repository).first()
id_repository=id_repository, id_file=file_exist.id_file).first()
if file_exist_here is None:
# file exists in different repo. We only need to register it in the current repo
# file exists but in a different repo. We only need to register it in the current repo
register_file = True
idfile = file_exist.id_file
else:
Expand All @@ -332,11 +360,11 @@ def register_filename(self, filename, newfilename, id_repository, provider):
if register_file:
id_provider = self.get_or_create_key('provider', 'provider', provider)
if isnewfile:
# rfile = ROOT.TFile(str(filename))
rfile = rdb.RootFile(str(filename))
rfile.dataset_name()
#rfile.dataset_name()
# rfile.file().GetSize()
container = self.tables()['file'](filename=os.path.basename(newfilename),
container = self.tables()['file'](id_dataset=id_dataset,
filename=os.path.basename(targetfile),
description='autodesc',
original_name=os.path.basename(filename),
id_provider=id_provider,
Expand All @@ -346,9 +374,10 @@ def register_filename(self, filename, newfilename, id_repository, provider):
self.sqlalchemysession.flush()
idfile = container.id_file
# container = self.tables()['file_location'](id_file=idfile, id_repository=id_repository, path=os.path.dirname(newfilename))
container = self.tables()['file_location'](id_file=idfile, id_repository=id_repository, path=newfilename,
container = self.tables()['file_location'](id_file=idfile, id_repository=id_repository, path=targetfile,
description="")
self.sqlalchemysession.add(container)
logger.debug(f"File name {filename} registered")
# self.sqlalchemysession.flush()

return idfile, isnewfile
Expand Down Expand Up @@ -485,12 +514,14 @@ def register_filecontent(self, file, idfile):
# print('Execution time:', elapsed_time, 'seconds')
logger.debug(f"execution time {elapsed_time} seconds")

def register_file(self, orgfilename, newfilename, id_repository, provider):
idfile, read_file = self.register_filename(orgfilename, newfilename, id_repository, provider)
def register_file(self, orgfilename, newfilename, dataset, id_repository, provider, targetdir=None):
idfile, read_file = self.register_filename(orgfilename, newfilename, dataset, id_repository, provider, targetdir)
if read_file:
# We read the localfile and not the remote one
self.register_filecontent(orgfilename, idfile)
# self.register_filecontent(newfilename,idfile)
else:
logger.info(f"file {orgfilename} already registered.")
self.sqlalchemysession.commit()


35 changes: 20 additions & 15 deletions granddb/rootdblib.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import ROOT
import grand.dataio.root_trees as groot
import grand.manage_log as mlg
import os
logger = mlg.get_logger_for_script(__name__)
#mlg.create_output_for_logger("debug", log_stdout=True)

Expand Down Expand Up @@ -136,21 +137,21 @@ class RootFile:
'hadronic_model': 'id_hadronic_model',
'low_energy_model': 'id_low_energy_model',
'cpu_time': 'cpu_time',
# 'long_pd_depth': 'long_pd_depth',
# 'long_pd_eminus': 'long_pd_eminus',
# 'long_pd_eplus': 'long_pd_eplus',
# 'long_pd_muminus': 'long_pd_muminus',
# 'long_pd_muplus': 'long_pd_muplus',
# 'long_pd_gamma': 'long_pd_gamma',
# 'long_pd_hadron': 'long_pd_hadron',
# 'long_gamma_elow': 'long_gamma_elow',
# 'long_e_elow': 'long_e_elow',
# 'long_e_edep': 'long_e_edep',
# 'long_mu_edep': 'long_mu_edep',
# 'long_mu_elow': 'long_mu_elow',
# 'long_hadron_edep': 'long_hadron_edep',
# 'long_hadron_elow': 'long_hadron_elow',
# 'long_neutrino': 'long_neutrino',
#'long_pd_depth': 'long_pd_depth',
#'long_pd_eminus': 'long_pd_eminus',
#'long_pd_eplus': 'long_pd_eplus',
#'long_pd_muminus': 'long_pd_muminus',
#'long_pd_muplus': 'long_pd_muplus',
#'long_pd_gamma': 'long_pd_gamma',
#'long_pd_hadron': 'long_pd_hadron',
#'long_gamma_elow': 'long_gamma_elow',
#'long_e_elow': 'long_e_elow',
#'long_e_edep': 'long_e_edep',
#'long_mu_edep': 'long_mu_edep',
#'long_mu_elow': 'long_mu_elow',
#'long_hadron_edep': 'long_hadron_edep',
#'long_hadron_elow': 'long_hadron_elow',
#'long_neutrino': 'long_neutrino',
'event_weight': 'event_weight'
}
tadcToDB = {
Expand Down Expand Up @@ -307,9 +308,11 @@ class RootFile:
#TreeList is a dict with name of the trees as key and the class corresponding to it's type as value
TreeList = {}
file = None
filename = None

## We retreive the list of Ttrees in the file and store them as the corresponding class from root_files.py in the dict TreeList
def __init__(self, f_name):
self.filename = f_name
self.TreeList.clear()
self.file = ROOT.TFile(f_name)
for key in self.file.GetListOfKeys():
Expand Down Expand Up @@ -341,6 +344,8 @@ def copy_content_to(self, file):
# [extra]-> given by user (metadata ?)
# serial -> automatically incremented in case of new version (how to do that ?)
def dataset_name(self):
name = os.path.basename(os.path.dirname(self.filename))
return name
treename = 'trun'
name = "noname"
for run in self.TreeList[treename].get_list_of_runs():
Expand Down
4 changes: 4 additions & 0 deletions scripts/archiving/config.properties.gaa
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
aipTempDirectory=/sps/grand/prod_grand/tests/archivage/archs/gaa/
configMetadataDescriptiveDC=dc_gaa.xml
configDocumentation=GRAND_DMP_2024.pdf
representationID_1=representation1
4 changes: 4 additions & 0 deletions scripts/archiving/config.properties.gp13
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
aipTempDirectory=/sps/grand/prod_grand/tests/archivage/archs/gp13/
configMetadataDescriptiveDC=dc_gp13.xml
configDocumentation=GRAND_DMP_2024.pdf
representationID_1=representation1
76 changes: 76 additions & 0 deletions scripts/archiving/create_archive.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/bin/bash
datadir="/sps/grand/data"
archive_root_name="doi+10.25520+in2p3.archive.grand"
irods_path='/grand/home/trirods/data/archives/'

usage="$(basename "$0") [-d DATE] [-s SITE] [
Archive some Grand raw files into irods :
-s site (gaa, gp13)
-d YYYY-MM to be archived
"

while getopts "d:s:" option ${args}; do
case $option in
d)
if [[ ${OPTARG} =~ ^([0-9]{4})-([0][1-9]|[1][0-2]|[1-9])$ ]]; then
date=$(date --date="${BASH_REMATCH[1]}-${BASH_REMATCH[2]}-01" "+%Y_%m")
dir=$(date --date="${BASH_REMATCH[1]}-${BASH_REMATCH[2]}-01" "+%Y/%m")
else
echo "Date ${OPTARG} should be in format YYYY-MM"
exit 1
fi
;;
s)
if [[ ${OPTARG} =~ gp13|gaa ]] ; then
site=${OPTARG}
else
echo "Site should be gp13 or gaa"
exit 1
fi
;;
:)
printf "option -${OPTARG} need an argument\n"
exit 1;;
?) # Invalid option
printf "Error: Invalid option -${OPTARG}\n"
exit 1;;
esac
done

if [ ! "$date" ] || [ ! "$site" ]; then
echo "arguments -d and -s must be provided"
echo "$usage" >&2; exit 1
fi

outfile="${archive_root_name}.${site}.${date}"
logfile=archs/${site}/${outfile}--$(date "+%Y_%m_%d_%H%M%S").log

find $datadir/$site/raw/$dir/ -name "*.bin" >list_files_${site}
echo "List of files to archive :" >> ${logfile}
cat list_files_${site} >> ${logfile}

java -jar createAIP.jar --configfile=config.properties.${site} --listobjects=list_files_${site} -i ${outfile}

echo "Archive ready to tar" >> ${logfile}

tar -cvf archs/${site}/${outfile}.tar archs/${site}/${outfile}

echo "Archive tared" >> ${logfile}

echo "Push archs/${site}/${outfile}.tar to irods" >> ${logfile}
# Put file into irods
sfile=archs/${site}/${outfile}.tar
ipath="${irods_path}${site}/raw"
ifile="${ipath}/${outfile}.tar"
echo "imkdir -p $ipath" >> ${logfile}
imkdir -p $ipath >> ${logfile} 2>&1
echo "iput -f $sfile $ifile" >> ${logfile}
#iput -f $sfile $ifile >> ${logfile} 2>&1
#iput_status=$?
#if [ "$iput_status" -ne 0 ]; then
# notify=1
#fi

rm -rf archs/${site}/${outfile}
rm $sfile
echo "Month archived.">> ${logfile}
32 changes: 32 additions & 0 deletions scripts/archiving/dc_gaa.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?xml version="1.0" encoding="UTF-8"?>
<resource xmlns="http://datacite.org/schema/kernel-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.4/metadata.xsd">
<identifier>
<identifier identifierType="DOI">doi+10.25520+in2p3.archive+grand+gaa</identifier>
</identifier>
<titles>
<title xml:lang="EN">Grand Raw Files from GAA</title>
</titles>
<creators>
<creator>
<creatorName nameType="Organizational">Grand Observatory</creatorName>
<nameIdentifier>Grand Observatory</nameIdentifier>
</creator>
</creators>
<publisher>Grand Observatory</publisher>
<publicationYear>2024</publicationYear>
<resourceType resourceTypeGeneral="Dataset">Grand raw files</resourceType>
<descriptions>
<description xml:lang="EN">Grand Raw Data from GAA Observatory</description>
</descriptions>
<subjects>
<subject subjectScheme="wikidata">radio astronomy cosmics rays neutrinos</subject>
</subjects>
<dates>
<date dateType="Copyrighted">2024</date>
</dates>
<geoLocations>
<geoLocation>
<geoLocationPlace>Argentina</geoLocationPlace>
</geoLocation>
</geoLocations>
</resource>
32 changes: 32 additions & 0 deletions scripts/archiving/dc_gp13.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?xml version="1.0" encoding="UTF-8"?>
<resource xmlns="http://datacite.org/schema/kernel-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.4/metadata.xsd">
<identifier>
<identifier identifierType="DOI">doi+10.25520+in2p3.archive+grand+gp13</identifier>
</identifier>
<titles>
<title xml:lang="EN">Grand Raw Files from GP13</title>
</titles>
<creators>
<creator>
<creatorName nameType="Organizational">Grand Observatory</creatorName>
<nameIdentifier>Grand Observatory</nameIdentifier>
</creator>
</creators>
<publisher>Grand Observatory</publisher>
<publicationYear>2024</publicationYear>
<resourceType resourceTypeGeneral="Dataset">Grand raw files</resourceType>
<descriptions>
<description xml:lang="EN">Grand Raw Data from GP13 Observatory</description>
</descriptions>
<subjects>
<subject subjectScheme="wikidata">radio astronomy cosmics rays neutrinos</subject>
</subjects>
<dates>
<date dateType="Copyrighted">2024</date>
</dates>
<geoLocations>
<geoLocation>
<geoLocationPlace>China</geoLocationPlace>
</geoLocation>
</geoLocations>
</resource>
Loading

0 comments on commit 3b01985

Please sign in to comment.