Merge pull request #1179 from cta-observatory/run_summary_ctar1

Make run_summary script work with ctar1 data, store format string
cta-observatory · Nov 14, 2023 · 3303fdd · 3303fdd
2 parents 644e43b + 068af50
commit 3303fdd
Show file tree

Hide file tree

Showing 2 changed files with 78 additions and 36 deletions.
diff --git a/lstchain/scripts/lstchain_create_run_summary.py b/lstchain/scripts/lstchain_create_run_summary.py
@@ -8,7 +8,7 @@
 import logging
 from collections import Counter
 from datetime import datetime
-from glob import glob
+from astropy.io import fits
 from pathlib import Path
 
 import numpy as np
@@ -82,6 +82,17 @@
 }
 
 
+COUNTER_DEFAULTS = dict(
+    ucts_timestamp=-1,
+    run_start=-1,
+    dragon_reference_time=-1,
+    dragon_reference_module_id=-1,
+    dragon_reference_module_index=-1,
+    dragon_reference_counter=-1,
+    dragon_reference_source=None,
+)
+
+
 def get_list_of_files(r0_path):
     """
     Get the list of R0 files from a given date.
@@ -107,6 +118,7 @@ def get_list_of_runs(list_of_files):
     ----------
     list_of_files : pathlib.Path.glob
         List of files
+
     Returns
     -------
     list_of_run_objects
@@ -176,9 +188,11 @@ def guess_run_type(date_path, run_number, counters, n_events=500):
     config = Config()
     config.LSTEventSource.apply_drs4_corrections = False
     config.LSTEventSource.pointing_information = False
-    config.EventTimeCalculator.dragon_reference_time = int(counters["dragon_reference_time"])
-    config.EventTimeCalculator.dragon_reference_counter = int(counters["dragon_reference_counter"])
-    config.EventTimeCalculator.dragon_module_id = int(counters["dragon_reference_module_id"])
+
+    if counters["dragon_reference_source"] is not None:
+        config.EventTimeCalculator.dragon_reference_time = int(counters["dragon_reference_time"])
+        config.EventTimeCalculator.dragon_reference_counter = int(counters["dragon_reference_counter"])
+        config.EventTimeCalculator.dragon_module_id = int(counters["dragon_reference_module_id"])
 
     try:
         with LSTEventSource(filename, config=config, max_events=n_events) as source:
@@ -198,8 +212,7 @@ def guess_run_type(date_path, run_number, counters, n_events=500):
             run_type = "ERROR"
 
     except Exception as err:
-        log.error(f"File {filename} has error: {err!r}")
-
+        log.exception(f"File {filename} has error: {err!r}")
         run_type = "ERROR"
 
     return run_type
@@ -220,7 +233,7 @@ def is_db_available(server: str) -> bool:
 def get_run_type_from_TCU(run_number, tcu_server):
     """
     Get the run type of a run from the TCU database.
-    
+
     Parameters
     ----------
     run_number : int
@@ -233,29 +246,29 @@ def get_run_type_from_TCU(run_number, tcu_server):
     run_type: str
         Type of run (DRS4, PEDCALIB, DATA)
     """
-    
+
     client = MongoClient(tcu_server)
     collection = client["lst1_obs_summary"]["camera"]
     summary = collection.find_one({"run_number": int(run_number)})
 
     if summary is not None:
-        
+
         run_type = summary["kind"].replace(
             "calib_drs4", "DRS4"
         ).replace(
             "calib_ped_run", "PEDCALIB"
         ).replace(
             "data_taking", "DATA")
-        
+
         return run_type
 
 
 def type_of_run(date_path, run_number, tcu_server):
     """
-    Get the run type of a run by first looking in the TCU 
+    Get the run type of a run by first looking in the TCU
     database, and if its run type is not available, guess it
     based on the percentage of different event types.
-    
+
     Parameters
     ----------
     date_path : pathlib.Path
@@ -264,25 +277,26 @@ def type_of_run(date_path, run_number, tcu_server):
         Run id
     tcu_server : str
         Host of the TCU database
-        
+
     Returns
     -------
     run_type: str
         Type of run (DRS4, PEDCALIB, DATA, ERROR)
     """
- 
+
     run_type = get_run_type_from_TCU(run_number, tcu_server)
-    
+
     if run_type is None:
-        counters = read_counters(date_path, run_number)
+        counters = read_counters_or_get_default(date_path, run_number)
         run_type = guess_run_type(date_path, run_number, counters)
-    
+
     return run_type
 
 
-def read_counters(date_path, run_number):
+def read_counters(path):
     """
-    Get initial valid timestamps from the first subrun.
+    Get initial valid timestamps from the first subrun for "old" data.
+
     Write down the reference Dragon module used, reference event_id.
 
     Parameters
@@ -296,15 +310,16 @@ def read_counters(date_path, run_number):
     -------
     dict: reference counters and timestamps
     """
-    pattern = date_path / f"LST-1.1.Run{run_number:05d}.0000*.fits.fz"
-    try:
-        path = glob(str(pattern))[0]
-        f = MultiFiles(path, all_streams=True, all_subruns=False)
+    with MultiFiles(path, all_streams=True, all_subruns=False) as f:
         first_event = next(f)
 
         if first_event.event_id != 1:
             raise ValueError("Must be used on first file streams (subrun)")
 
+        if f.cta_r1:
+            # we don't use dragon counters at all in case of new data
+            return COUNTER_DEFAULTS
+
         module_index = np.where(first_event.lstcam.module_status)[0][0]
         module_id = np.where(f.camera_config.lstcam.expected_modules_id == module_index)[0][0]
         dragon_counters = first_event.lstcam.counters.view(DRAGON_COUNTERS_DTYPE)
@@ -340,18 +355,42 @@ def read_counters(date_path, run_number):
             dragon_reference_source=dragon_reference_source,
         )
 
-    except Exception as err:
-        log.exception(f"Files {pattern} have error: {err}")
 
-        return dict(
-            ucts_timestamp=-1,
-            run_start=-1,
-            dragon_reference_time=-1,
-            dragon_reference_module_id=-1,
-            dragon_reference_module_index=-1,
-            dragon_reference_counter=-1,
-            dragon_reference_source=None,
-        )
+
+def read_counters_or_get_default(date_path, run_number):
+    """
+    Calls read_counters and returns default values in case of an error.
+    """
+    pattern = f"LST-1.1.Run{run_number:05d}.0000*.fits.fz"
+    try:
+        path = next(date_path.glob(pattern))
+    except StopIteration:
+        log.error("No input file found for date %s, run number %d", date_path, run_number)
+        return COUNTER_DEFAULTS
+
+    try:
+        return read_counters(path)
+    except Exception:
+        log.exception("Error getting counter values for date %s, run number %d", date_path, run_number)
+        return COUNTER_DEFAULTS
+
+
+def get_data_format(date_path, run_number):
+    """Get data format (name of protobuf object) in data file"""
+    pattern = f"LST-1.1.Run{run_number:05d}.0000*.fits.fz"
+    try:
+        path = next(date_path.glob(pattern))
+    except StopIteration:
+        log.error("No input file found for date %s, run number %d", date_path, run_number)
+        return ""
+
+    try:
+        with fits.open(path) as hdul:
+            events = hdul["Events"]
+            return events.header["PBFHEAD"]
+    except Exception:
+        log.exception("Error getting data format for date %s, run number %d", date_path, run_number)
+        return ""
 
 
 def main():
@@ -376,8 +415,9 @@ def main():
     file_list = get_list_of_files(date_path)
     runs = get_list_of_runs(file_list)
     run_numbers, n_subruns = get_runs_and_subruns(runs)
+    data_format = [get_data_format(date_path, run) for run in run_numbers]
 
-    reference_counters = [read_counters(date_path, run) for run in run_numbers]
+    reference_counters = [read_counters_or_get_default(date_path, run) for run in run_numbers]
 
     if is_db_available(args.tcu_server):
         run_types = [
@@ -391,6 +431,7 @@ def main():
             for (run, counters) in zip(run_numbers, reference_counters)
         ]
 
+
     run_summary = Table(
         {
             col: np.array([d[col] for d in reference_counters], dtype=dtype)
@@ -402,6 +443,7 @@ def main():
     run_summary.add_column(run_numbers, name="run_id", index=0)
     run_summary.add_column(n_subruns, name="n_subruns", index=1)
     run_summary.add_column(run_types, name="run_type", index=2)
+    run_summary.add_column(data_format, name="data_format", index=3)
     run_summary.write(
         args.output_dir / f"RunSummary_{args.date}.ecsv",
         format="ascii.ecsv",

diff --git a/setup.py b/setup.py
@@ -42,7 +42,7 @@ def find_scripts(script_dir, prefix):
         'astropy~=5.0',
         'bokeh~=2.0',
         'ctapipe~=0.19.2',
-        'ctapipe_io_lst~=0.22.0',
+        'ctapipe_io_lst~=0.22.3',
         'ctaplot~=0.6.4',
         'eventio>=1.9.1,<2.0.0a0',  # at least 1.1.1, but not 2
         'gammapy~=1.1',