Skip to content

Commit

Permalink
Preparations for better configuration of training pointings
Browse files Browse the repository at this point in the history
(for RF interpolation)
  • Loading branch information
moralejo committed Nov 28, 2024
1 parent 588a248 commit 2969a25
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 41 deletions.
42 changes: 16 additions & 26 deletions lstchain/reco/dl1_to_dl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,16 @@
]


def get_training_directions(training_dir):
def get_training_directions(training_dirs):
"""
This function obtains the pointings of the telescope in the RF training
sample.
Parameters:
-----------
training_dir: pathlib.Path, path to the directory under which the folders
containing the DL1 files used in the training are found.
The folders' names are assumed to follow this pattern:
training_dirs: array of strings, each element is the name of one of the
folders containing the DL1 files used in the training. Order is
irrelevant. The folders' names are assumed to follow this pattern:
node_corsika_theta_34.367_az_69.537_
Expand All @@ -77,12 +77,10 @@ def get_training_directions(training_dir):
"""

dirs = glob.glob(str(training_dir) + '/node_corsika*')

training_zd_deg = []
training_az_deg = []

for dir in dirs:
for dir in training_dirs:
c1 = dir.find('_theta_') + 7
c2 = dir.find('_az_', c1) + 4
c3 = dir.find('_', c2)
Expand Down Expand Up @@ -795,10 +793,10 @@ def apply_models(dl1,
cls_disp_sign=None,
effective_focal_length=29.30565 * u.m,
custom_config=None,
interpolate_rf={'energy_regression':False,
'particle_classification':False,
'disp':False},
dl1_training_dir=None,
interpolate_rf={'energy_regression': False,
'particle_classification': False,
'disp': False},
training_pointings=None
):
"""
Apply previously trained Random Forests to a set of data
Expand Down Expand Up @@ -829,10 +827,9 @@ def apply_models(dl1,
interpolate_rf: dictionary. Contains three booleans, 'energy_regression',
'particle_classification', 'disp', indicating which RF predictions
should be interpolated linearly in cos(zenith)
dl1_training_dir: pathlib.Path, path to the directory under which the
folders containing the DL1 MC training data are kept (only folder
names are needed, not the actual DL1 files). Needed for interpolation
of RF predictions, if activated by user via interpolate_rf
training_pointings: array (# of pointings, 2) azimuth, zenith in degrees;
pointings of the MC sample used in the training. Needed for the
interpolation of RF predictions.
Returns
-------
Expand Down Expand Up @@ -867,18 +864,11 @@ def apply_models(dl1,
dl2 = update_disp_with_effective_focal_length(dl2,
effective_focal_length=effective_focal_length)

# If dl1_training_dir was provided (containing folders for each fo the MC
# training pointing nodes), obtain the training pointings, and update the
# DL2 table with the additional info needed for the interpolation:
if dl1_training_dir is not None and dl1_training_dir.is_dir():
training_az_deg, training_zd_deg = get_training_directions(dl1_training_dir)
if True in interpolate_rf.values():
# Interpolation of RF predictions is switched on
training_az_deg = training_pointings[:, 0]
training_zd_deg = training_pointings[:, 1]
dl2 = add_zd_interpolation_info(dl2, training_zd_deg, training_az_deg)
else:
logger.warning('DL1 training directory not found...')
logger.warning('Switching off RF interpolation with zenith!')
interpolate_rf['energy_regression'] = False
interpolate_rf['particle_classification'] = False
interpolate_rf['disp'] = False

# Reconstruction of Energy and disp_norm distance
if isinstance(reg_energy, (str, bytes, Path)):
Expand Down
48 changes: 33 additions & 15 deletions lstchain/scripts/lstchain_dl1_to_dl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
interpolate_energy = False
interpolate_gammaness = False
interpolate_direction = False
dl1_training_dir = None
# Read in the settings for the interpolation of Random Forest predictions
# in cos(zd). If activated this avoids the jumps of performance produced
# by the discrete set of pointings in the RF training sample.
Expand All @@ -93,22 +92,15 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
interpolate_gammaness = zdinter['interpolate_gammaness']
if 'interpolate_direction' in zdinter.keys():
interpolate_direction = zdinter['interpolate_direction']
if 'DL1_training_dir' in zdinter.keys():
dl1_training_dir = zdinter['DL1_training_dir']

interpolate_rf = {'energy_regression': interpolate_energy,
'particle_classification': interpolate_gammaness,
'disp': interpolate_direction
}
if dl1_training_dir is None:
# Build name of DL1 MC training files assuming standard pattern:
dummy = models_path.as_posix().replace('/data/models', '/data/mc/DL1')
dl1_training_dir = Path(dummy[:dummy.rfind('/dec')] +
'/TrainingDataset/GammaDiffuse/' +
dummy[dummy.rfind('/dec')+1:])


if interpolate_energy or interpolate_gammaness or interpolate_direction:
dl1_training_dir = None
training_pointings = None
if True in interpolate_rf.values():
logger.warning('Cos(zenith) interpolation will be used in:')
if interpolate_energy:
logger.warning(' energy reconstruction Random Forest')
Expand All @@ -117,6 +109,32 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
if interpolate_direction:
logger.warning(' direction reconstruction Random Forest')

if 'random_forest_zd_interpolation' in config.keys():
zdinter = config['random_forest_zd_interpolation']
if 'dl1_training_dir' in zdinter.keys():
dl1_training_dir = zdinter['dl1_training_dir']

if dl1_training_dir is None:
# Build name of DL1 MC training files assuming standard pattern:
dummy = models_path.as_posix().replace('/data/models', '/data/mc/DL1')
dl1_training_dir = (Path(dummy[:dummy.rfind('/dec')] +
'/TrainingDataset/GammaDiffuse/' +
dummy[dummy.rfind('/dec') + 1:]))

# Obtain the training pointings, needed for the RF interpolation:
if dl1_training_dir.is_dir():
dirs = glob.glob(str(dl1_training_dir) + '/node_corsika*')
training_az_deg, training_zd_deg = get_training_directions(dirs)
else:
logger.warning('DL1 training directory not found...')
logger.warning('Switching off RF interpolation with zenith!')
interpolate_rf['energy_regression'] = False
interpolate_rf['particle_classification'] = False
interpolate_rf['disp'] = False

training_pointings = np.array([training_az_deg, training_zd_deg]).T


if 'lh_fit_config' in config.keys():
lhfit_data = pd.read_hdf(filename, key=dl1_likelihood_params_lstcam_key)
if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all(lhfit_data['event_id'] == data['event_id']):
Expand Down Expand Up @@ -174,7 +192,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
effective_focal_length=effective_focal_length,
custom_config=config,
interpolate_rf=interpolate_rf,
dl1_training_dir=dl1_training_dir)
training_pointings=training_pointings)
elif config['disp_method'] == 'disp_norm_sign':
dl2 = dl1_to_dl2.apply_models(data,
models_dict['cls_gh'],
Expand All @@ -184,7 +202,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
effective_focal_length=effective_focal_length,
custom_config=config,
interpolate_rf=interpolate_rf,
dl1_training_dir=dl1_training_dir)
training_pointings=training_pointings)

# Source-dependent analysis
if config['source_dependent']:
Expand Down Expand Up @@ -219,7 +237,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
effective_focal_length=effective_focal_length,
custom_config=config,
interpolate_rf=interpolate_rf,
dl1_training_dir=dl1_training_dir)
training_pointings=training_pointings)
elif config['disp_method'] == 'disp_norm_sign':
dl2 = dl1_to_dl2.apply_models(data_with_srcdep_param,
models_dict['cls_gh'],
Expand All @@ -229,7 +247,7 @@ def apply_to_file(filename, models_dict, output_dir, config, models_path):
effective_focal_length=effective_focal_length,
custom_config=config,
interpolate_rf=interpolate_rf,
dl1_training_dir=dl1_training_dir)
training_pointings=training_pointings)

dl2_srcdep = dl2.drop(srcindep_keys, axis=1)
dl2_srcdep_dict[k] = dl2_srcdep
Expand Down

0 comments on commit 2969a25

Please sign in to comment.