Preparations for better configuration of training pointings

(for RF interpolation)
cta-observatory · Nov 28, 2024 · 2969a25 · 2969a25
1 parent 588a248
commit 2969a25
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 41 deletions.
diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
@@ -56,16 +56,16 @@
 ]
 
 
-def get_training_directions(training_dir):
+def get_training_directions(training_dirs):
     """
     This function obtains the pointings of the telescope in the RF training
     sample.
 
     Parameters:
     -----------
-    training_dir: pathlib.Path, path to the directory under which the folders
-    containing the DL1 files used in the training are found.
-    The folders' names are assumed to follow this pattern:
+    training_dirs: array of strings, each element is the name of one of the
+    folders containing the DL1 files used in the training. Order is
+    irrelevant. The folders' names are assumed to follow this pattern:
 
     node_corsika_theta_34.367_az_69.537_
 
@@ -77,12 +77,10 @@ def get_training_directions(training_dir):
 
     """
 
-    dirs = glob.glob(str(training_dir) + '/node_corsika*')
-
     training_zd_deg = []
     training_az_deg = []
 
-    for dir in dirs:
+    for dir in training_dirs:
         c1 = dir.find('_theta_') + 7
         c2 = dir.find('_az_', c1) + 4
         c3 = dir.find('_', c2)
@@ -795,10 +793,10 @@ def apply_models(dl1,
                  cls_disp_sign=None,
                  effective_focal_length=29.30565 * u.m,
                  custom_config=None,
-                 interpolate_rf={'energy_regression':False,
-                                 'particle_classification':False,
-                                 'disp':False},
-                 dl1_training_dir=None,
+                 interpolate_rf={'energy_regression': False,
+                                 'particle_classification': False,
+                                 'disp': False},
+                 training_pointings=None
                  ):
     """
     Apply previously trained Random Forests to a set of data
@@ -829,10 +827,9 @@ def apply_models(dl1,
     interpolate_rf: dictionary. Contains three booleans, 'energy_regression',
         'particle_classification', 'disp', indicating which RF predictions
         should be interpolated linearly in cos(zenith)
-    dl1_training_dir: pathlib.Path, path to the directory under which the
-        folders containing the DL1 MC training data are kept (only folder
-        names are needed, not the actual DL1 files). Needed for interpolation
-        of RF predictions, if activated by user via interpolate_rf
+    training_pointings: array (# of pointings, 2) azimuth, zenith in degrees;
+         pointings of the MC sample used in the training. Needed for the
+         interpolation of RF predictions.
 
     Returns
     -------
@@ -867,18 +864,11 @@ def apply_models(dl1,
         dl2 = update_disp_with_effective_focal_length(dl2,
                                                       effective_focal_length=effective_focal_length)
 
-    # If dl1_training_dir was provided (containing folders for each fo the MC
-    # training pointing nodes), obtain the training pointings, and update the
-    # DL2 table with the additional info needed for the interpolation:
-    if dl1_training_dir is not None and dl1_training_dir.is_dir():
-        training_az_deg, training_zd_deg = get_training_directions(dl1_training_dir)
+    if True in interpolate_rf.values():
+        # Interpolation of RF predictions is switched on
+        training_az_deg = training_pointings[:, 0]
+        training_zd_deg = training_pointings[:, 1]
         dl2 = add_zd_interpolation_info(dl2, training_zd_deg, training_az_deg)
-    else:
-        logger.warning('DL1 training directory not found...')
-        logger.warning('Switching off RF interpolation with zenith!')
-        interpolate_rf['energy_regression'] = False
-        interpolate_rf['particle_classification'] = False
-        interpolate_rf['disp'] = False
 
     # Reconstruction of Energy and disp_norm distance
     if isinstance(reg_energy, (str, bytes, Path)):

diff --git a/lstchain/scripts/lstchain_dl1_to_dl2.py b/lstchain/scripts/lstchain_dl1_to_dl2.py
@@ -81,7 +81,6 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
     interpolate_energy = False
     interpolate_gammaness = False
     interpolate_direction = False
-    dl1_training_dir = None
     # Read in the settings for the interpolation of Random Forest predictions
     # in cos(zd). If activated this avoids the jumps of performance produced
     # by the discrete set of pointings in the RF training sample.
@@ -93,22 +92,15 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
             interpolate_gammaness = zdinter['interpolate_gammaness']
         if 'interpolate_direction' in zdinter.keys():
             interpolate_direction = zdinter['interpolate_direction']
-        if 'DL1_training_dir' in zdinter.keys():
-            dl1_training_dir = zdinter['DL1_training_dir']
 
     interpolate_rf = {'energy_regression': interpolate_energy,
                       'particle_classification': interpolate_gammaness,
                       'disp': interpolate_direction
                       }
-    if dl1_training_dir is None:
-        # Build name of DL1 MC training files assuming standard pattern:
-        dummy = models_path.as_posix().replace('/data/models', '/data/mc/DL1')
-        dl1_training_dir = Path(dummy[:dummy.rfind('/dec')] +
-                                '/TrainingDataset/GammaDiffuse/' +
-                                dummy[dummy.rfind('/dec')+1:])
-
 
-    if interpolate_energy or interpolate_gammaness or interpolate_direction:
+    dl1_training_dir = None
+    training_pointings = None
+    if True in interpolate_rf.values():
         logger.warning('Cos(zenith) interpolation will be used in:')
         if interpolate_energy:
             logger.warning('   energy reconstruction Random Forest')
@@ -117,6 +109,32 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
         if interpolate_direction:
             logger.warning('   direction reconstruction Random Forest')
 
+        if 'random_forest_zd_interpolation' in config.keys():
+            zdinter = config['random_forest_zd_interpolation']
+            if 'dl1_training_dir' in zdinter.keys():
+                dl1_training_dir = zdinter['dl1_training_dir']
+
+        if dl1_training_dir is None:
+            # Build name of DL1 MC training files assuming standard pattern:
+            dummy = models_path.as_posix().replace('/data/models', '/data/mc/DL1')
+            dl1_training_dir = (Path(dummy[:dummy.rfind('/dec')] +
+                                     '/TrainingDataset/GammaDiffuse/' +
+                                     dummy[dummy.rfind('/dec') + 1:]))
+
+        # Obtain the training pointings, needed for the RF interpolation:
+        if dl1_training_dir.is_dir():
+            dirs = glob.glob(str(dl1_training_dir) + '/node_corsika*')
+            training_az_deg, training_zd_deg = get_training_directions(dirs)
+        else:
+            logger.warning('DL1 training directory not found...')
+            logger.warning('Switching off RF interpolation with zenith!')
+            interpolate_rf['energy_regression'] = False
+            interpolate_rf['particle_classification'] = False
+            interpolate_rf['disp'] = False
+
+        training_pointings = np.array([training_az_deg, training_zd_deg]).T
+
+
     if 'lh_fit_config' in config.keys():
         lhfit_data = pd.read_hdf(filename, key=dl1_likelihood_params_lstcam_key)
         if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all(lhfit_data['event_id'] == data['event_id']):
@@ -174,7 +192,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
                                           effective_focal_length=effective_focal_length,
                                           custom_config=config,
                                           interpolate_rf=interpolate_rf,
-                                          dl1_training_dir=dl1_training_dir)
+                                          training_pointings=training_pointings)
         elif config['disp_method'] == 'disp_norm_sign':
             dl2 = dl1_to_dl2.apply_models(data,
                                           models_dict['cls_gh'],
@@ -184,7 +202,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
                                           effective_focal_length=effective_focal_length,
                                           custom_config=config,
                                           interpolate_rf=interpolate_rf,
-                                          dl1_training_dir=dl1_training_dir)
+                                          training_pointings=training_pointings)
 
     # Source-dependent analysis
     if config['source_dependent']:
@@ -219,7 +237,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
                                                  effective_focal_length=effective_focal_length,
                                                  custom_config=config,
                                                  interpolate_rf=interpolate_rf,
-                                                 dl1_training_dir=dl1_training_dir)
+                                                 training_pointings=training_pointings)
             elif config['disp_method'] == 'disp_norm_sign':
                 dl2 = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                                  models_dict['cls_gh'],
@@ -229,7 +247,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
                                                  effective_focal_length=effective_focal_length,
                                                  custom_config=config,
                                                  interpolate_rf=interpolate_rf,
-                                                 dl1_training_dir=dl1_training_dir)
+                                                 training_pointings=training_pointings)
 
             dl2_srcdep = dl2.drop(srcindep_keys, axis=1)
             dl2_srcdep_dict[k] = dl2_srcdep