Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract calibration modules from the reader #127

Merged
merged 30 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
923f4d2
Fix KLM test
mraspaud Mar 26, 2024
58a9d0e
Refactor all calibration outside of the reader
mraspaud Mar 27, 2024
52ae086
Ruff fix
mraspaud Mar 27, 2024
bc9f8fc
Fix quotes
mraspaud Mar 27, 2024
8c7b140
Make a calibration subpackage
mraspaud Mar 27, 2024
97394c4
Fix numpy 2 errors
mraspaud Aug 20, 2024
9185fc1
Make calibration methods an entry point
mraspaud Aug 20, 2024
0c6bcc2
Allow tsm masking to be passed a dataset
mraspaud Aug 22, 2024
1680c99
Refactor to use xarray objects
mraspaud Aug 26, 2024
f746e15
Merge branch 'main' into refactor-calibration
mraspaud Aug 27, 2024
e4db3ae
Use ABC instead of using six
mraspaud Aug 27, 2024
74b45e5
Merge branch 'main' into refactor-calibration
mraspaud Aug 28, 2024
155ab7d
Add xarray to dependencies
mraspaud Aug 28, 2024
214ee92
Add xarray to ci
mraspaud Aug 28, 2024
167748b
Give a minimal version to xarray for snyk to be happier
mraspaud Aug 28, 2024
b1204de
Fix minimal xarray version
mraspaud Aug 28, 2024
247c770
Revert "Ruff fix"
mraspaud Aug 28, 2024
6d29442
Revert "Fix quotes"
mraspaud Aug 28, 2024
ac368bf
Move xaray imports to top of file
mraspaud Aug 29, 2024
d4aab34
Start refactoring times
mraspaud Aug 29, 2024
52f95e5
Fix tests after refactoring
mraspaud Aug 29, 2024
04206d9
Refactor get_counts_as_dataset
mraspaud Aug 29, 2024
3d9188b
Refactor time names
mraspaud Aug 29, 2024
806fec3
Simplify jday computation
mraspaud Aug 29, 2024
aeab152
Simplify jday computation
mraspaud Aug 29, 2024
26f1bde
Remove unused code
mraspaud Aug 29, 2024
6289b1a
Fix jday computation
mraspaud Aug 29, 2024
f48480f
Implement more robust PRT gap search
mraspaud Aug 29, 2024
7de229d
Sort imports with ruff
mraspaud Aug 30, 2024
38d4bb5
Use double quotes
mraspaud Aug 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ exclude: '^$'
fail_fast: false

repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
#- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.3.2'
hooks:
- id: ruff
args: [--fix]
#rev: 'v0.3.2'
#hooks:
#- id: ruff
#args: [--fix]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
Expand Down
1 change: 1 addition & 0 deletions continuous_integration/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies:
- packaging
- pytest
- pytest-cov
- xarray
- pip
- pip:
- docutils
Expand Down
93 changes: 80 additions & 13 deletions pygac/calibration.py → pygac/calibration/noaa.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,78 @@
LOG = logging.getLogger(__name__)


def calibrate(ds, custom_coeffs=None, coeffs_file=None):
"""Calibrate the dataset `ds`, possibly using custom_coeffs or a coeff file.

Args:
ds: xarray Dataset object containing the data to calibrate.
custom_calibration: dictionary with a subset of user defined satellite specific
calibration coefficients
calibration_file: path to json file containing default calibrations
"""
channels = ds["channels"].data
times = ds.coords["times"]
scan_line_numbers = ds["scan_line_index"].data

calibration_coeffs = Calibrator(
ds.attrs["spacecraft_name"],
custom_coeffs=custom_coeffs,
coeffs_file=coeffs_file
)

# `times` is in nanosecond resolution. However, convertion from datetime64[ns] to datetime objects
# does not work, and anyway only the date is needed.
start_time = times[0].dt
year = start_time.year.item()
jday = start_time.dayofyear.item()


corr = ds.attrs['sun_earth_distance_correction_factor']

# how many reflective channels are there ?
tot_ref = channels.shape[2] - 3

channels[:, :, 0:tot_ref] = calibrate_solar(
channels[:, :, 0:tot_ref],
np.arange(tot_ref),
year, jday,
calibration_coeffs,
corr
)

prt = ds["prt_counts"].data
ict = ds["ict_counts"].data
space = ds["space_counts"].data

ir_channels_to_calibrate = [3, 4, 5]

for chan in ir_channels_to_calibrate:
channels[:, :, chan - 6] = calibrate_thermal(
channels[:, :, chan - 6],
prt,
ict[:, chan - 3],
space[:, chan - 3],
scan_line_numbers,
chan,
calibration_coeffs
)

new_ds = ds.copy()
new_ds["channels"].data = channels

new_ds.attrs["calib_coeffs_version"] = calibration_coeffs.version

return new_ds


class CoeffStatus(Enum):
"""Indicates the status of calibration coefficients."""
NOMINAL = 'nominal'
PROVISIONAL = 'provisional'
EXPERIMENTAL = 'experimental'


class Calibrator(object):
class Calibrator:
"""Factory class to create namedtuples holding the calibration coefficients.

Attributes:
Expand Down Expand Up @@ -185,7 +249,7 @@
"""Determine coefficient version."""
md5_hash = hashlib.md5(coeff_file_content)
digest = md5_hash.hexdigest()
version_dict = cls.version_hashs.get(

Check warning on line 252 in pygac/calibration/noaa.py

View check run for this annotation

codefactor.io / CodeFactor

pygac/calibration/noaa.py#L252

Use of insecure MD2, MD4, MD5, or SHA1 hash function. (B303)
digest,
{'name': None, 'status': None}
)
Expand Down Expand Up @@ -393,14 +457,15 @@
# Note that the prt values are the average value of the three readings from one of the four
# PRTs. See reader.get_telemetry implementations.
prt_threshold = 50 # empirically found and set by Abhay Devasthale
offset = 0

for i, prt_val in enumerate(prt):
for offset in range(5):
# According to the KLM Guide the fill value between PRT measurments is 0, but we search
# for the first measurment gap using the threshold. Is this on purpose?
if prt_val < prt_threshold:
offset = i
# for the first measurement gap using the threshold, because the fill value is in practice
# not always exactly 0.
if np.median(prt[(line_numbers - line_numbers[0]) % 5 == offset]) < prt_threshold:
break
else:
raise IndexError("No PRT 0-index found!")

# get the PRT index, iprt equals to 0 corresponds to the measurement gaps
iprt = (line_numbers - line_numbers[0] + 5 - offset) % 5
Expand Down Expand Up @@ -450,16 +515,18 @@
if channel == 3:
zeros = ict < ict_threshold
nonzeros = np.logical_not(zeros)

ict[zeros] = np.interp((zeros).nonzero()[0],
(nonzeros).nonzero()[0],
ict[nonzeros])
try:
ict[zeros] = np.interp((zeros).nonzero()[0],
(nonzeros).nonzero()[0],
ict[nonzeros])
except ValueError: # 3b has no valid data
return counts
zeros = space < space_threshold
nonzeros = np.logical_not(zeros)

space[zeros] = np.interp((zeros).nonzero()[0],
(nonzeros).nonzero()[0],
space[nonzeros])
(nonzeros).nonzero()[0],
space[nonzeros])

# convolving and smoothing PRT, ICT and SPACE values
if lines > 51:
Expand Down Expand Up @@ -491,7 +558,7 @@
# TsBB = A + B*TBB (7.1.2.4-3)
# NBB = c1*nu_e^3/(exp(c2*nu_e/TsBB) - 1) (7.1.2.4-3)
# where the constants of the Planck function are defined as c1 = 2*h*c^2, c2 = h*c/k_B.
# constatns
# constants
c1 = 1.1910427e-5 # mW/m^2/sr/cm^{-4}
c2 = 1.4387752 # cm K
# coefficients
Expand Down
3 changes: 2 additions & 1 deletion pygac/gac_klm.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@
("spacecraft_altitude_above_reference_ellipsoid", ">u2"),
("angular_relationships", ">i2", (153, )),
("zero_fill3", ">i2", (3, )),
("earth_location", ">i4", (102, )),
("earth_location", [("lats", ">i2"),
("lons", ">i2")], (51,)),
("zero_fill4", ">i4", (2, )),
# HRPT MINOR FRAME TELEMETRY
("frame_sync", ">u2", (6, )),
Expand Down
3 changes: 2 additions & 1 deletion pygac/gac_pod.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
("number_of_meaningful_zenith_angles_and_earth_location_appended",
">u1"),
("solar_zenith_angles", "i1", (51, )),
("earth_location", ">i2", (102, )),
("earth_location", [("lats", ">i2"),
("lons", ">i2")], (51,)),
("telemetry", ">u4", (35, )),
("sensor_data", ">u4", (682, )),
("add_on_zenith", ">u2", (10, )),
Expand Down
5 changes: 4 additions & 1 deletion pygac/gac_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

import logging

import pygac.pygac_geotiepoints as gtp
from pygac.pygac_geotiepoints import GAC_LONLAT_SAMPLE_POINTS
from pygac.reader import Reader, ReaderError
import pygac.pygac_geotiepoints as gtp

Expand All @@ -42,10 +44,11 @@ class GACReader(Reader):
scan_freq = 2.0 / 1000.0
# Max scanlines
max_scanlines = 15000
lonlat_sample_points = GAC_LONLAT_SAMPLE_POINTS

def __init__(self, *args, **kwargs):
"""Init the GAC reader."""
super(GACReader, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
self.scan_width = 409
self.lonlat_interpolator = gtp.gac_lat_lon_interpolator

Expand Down
17 changes: 8 additions & 9 deletions pygac/klm_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,10 +759,10 @@ def get_telemetry(self):

return prt_counts, ict_counts, space_counts

def _get_lonlat(self):
def _get_lonlat_from_file(self):
"""Get the longitudes and latitudes."""
lats = self.scans["earth_location"][:, 0::2] / 1e4
lons = self.scans["earth_location"][:, 1::2] / 1e4
lats = self.scans["earth_location"]["lats"] / np.float32(1e4)
lons = self.scans["earth_location"]["lons"] / np.float32(1e4)
return lons, lats

def get_header_timestamp(self):
Expand All @@ -784,7 +784,7 @@ def get_header_timestamp(self):
except ValueError as err:
raise ValueError('Corrupt header timestamp: {0}'.format(err))

def _get_times(self):
def _get_times_from_file(self):
"""Get the times of the scanlines."""
year = self.scans["scan_line_year"]
jday = self.scans["scan_line_day_of_year"]
Expand All @@ -806,16 +806,15 @@ def _get_ir_channels_to_calibrate(self):
ir_channels_to_calibrate = [4, 5]
return ir_channels_to_calibrate

def postproc(self, channels):
def postproc(self, ds):
"""Apply KLM specific postprocessing.

Masking out 3a/3b/transition.
"""
switch = self.get_ch3_switch()
channels[:, :, 2][switch == 0] = np.nan
channels[:, :, 3][switch == 1] = np.nan
channels[:, :, 2][switch == 2] = np.nan
channels[:, :, 3][switch == 2] = np.nan
ds["channels"].loc[dict(channel_name="3a", scan_line_index=((switch==0) | (switch==2)))] = np.nan
ds["channels"].loc[dict(channel_name="3b", scan_line_index=((switch==1) | (switch==2)))] = np.nan


def _adjust_clock_drift(self):
"""Adjust the geolocation to compensate for the clock error.
Expand Down
3 changes: 2 additions & 1 deletion pygac/lac_klm.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@
("spacecraft_altitude_above_reference_ellipsoid", ">u2"),
("angular_relationships", ">i2", (153, )),
("zero_fill2", ">i2", (3, )),
("earth_location", ">i4", (102, )),
("earth_location", [("lats", ">i2"),
("lons", ">i2")], (51,)),
("zero_fill3", ">i4", (2, )),
# HRPT MINOR FRAME TELEMETRY
("frame_sync", ">u2", (6, )),
Expand Down
6 changes: 3 additions & 3 deletions pygac/lac_pod.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
("time_code", ">u2", (3, )),
("quality_indicators", ">u4"),
("calibration_coefficients", ">i4", (10, )),
("number_of_meaningful_zenith_angles_and_earth_location_appended",
">u1"),
("number_of_meaningful_zenith_angles_and_earth_location_appended", ">u1"),
("solar_zenith_angles", "i1", (51, )),
("earth_location", ">i2", (102, )),
("earth_location", [("lats", ">i2"),
("lons", ">i2")], (51,)),
("telemetry", ">u4", (35, )),
("sensor_data", ">u4", (3414, )),
("add_on_zenith", ">u2", (10, )),
Expand Down
3 changes: 3 additions & 0 deletions pygac/lac_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

import logging

import pygac.pygac_geotiepoints as gtp
from pygac.pygac_geotiepoints import LAC_LONLAT_SAMPLE_POINTS
from pygac.reader import Reader, ReaderError
import pygac.pygac_geotiepoints as gtp

Expand All @@ -40,6 +42,7 @@ class LACReader(Reader):
scan_freq = 6.0 / 1000.0
# Max scanlines
max_scanlines = 65535
lonlat_sample_points = LAC_LONLAT_SAMPLE_POINTS

def __init__(self, *args, **kwargs):
"""Init the LAC reader."""
Expand Down
23 changes: 11 additions & 12 deletions pygac/pod_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ class PODReader(Reader):
def correct_scan_line_numbers(self):
"""Correct the scan line numbers."""
# Perform common corrections first.
super(PODReader, self).correct_scan_line_numbers()
super().correct_scan_line_numbers()

# cleaning up the data
min_scanline_number = np.amin(
Expand Down Expand Up @@ -341,8 +341,8 @@ def read_header(cls, filename, fileobj=None, header_date="auto"):

@classmethod
def _validate_tbm_header(cls, potential_tbm_header):
data_set_name = potential_tbm_header['data_set_name']
allowed_empty = (42*b'\x00' + b' ')
data_set_name = potential_tbm_header["data_set_name"]
allowed_empty = (42*b"\x00" + b" ")
if data_set_name == allowed_empty:
return potential_tbm_header.copy()

Expand Down Expand Up @@ -439,16 +439,15 @@ def decode_timestamps(encoded):

return year, jday, msec

def _get_times(self):
def _get_times_from_file(self):
return self.decode_timestamps(self.scans["time_code"])

def _compute_missing_lonlat(self, missed_utcs):
"""Compute lon lat values using pyorbital."""
tic = datetime.datetime.now()

scan_rate = datetime.timedelta(milliseconds=1/self.scan_freq).total_seconds()
sgeom = avhrr_gac(missed_utcs.astype(datetime.datetime),
self.scan_points, frequency=scan_rate)
self.lonlat_sample_points, frequency=scan_rate)
t0 = missed_utcs[0].astype(datetime.datetime)
s_times = sgeom.times(t0)
tle1, tle2 = self.get_tle_lines()
Expand Down Expand Up @@ -482,7 +481,7 @@ def _adjust_clock_drift(self):
error_utcs = np.array(error_utcs, dtype='datetime64[ms]')
# interpolate to get the clock offsets at the scan line utcs
# the clock_error is given in seconds, so offsets are in seconds, too.
offsets = np.interp(self.utcs.astype(np.uint64),
offsets = np.interp(self._times_as_np_datetime64.astype(np.uint64),
error_utcs.astype(np.uint64),
clock_error)
LOG.info("Adjusting for clock drift of %s to %s seconds.",
Expand All @@ -507,7 +506,7 @@ def _adjust_clock_drift(self):
num_lines = max_line - min_line + 1
missed_lines = np.setdiff1d(np.arange(min_line, max_line+1), scan_lines)
missed_utcs = ((missed_lines - scan_lines[0])*np.timedelta64(scan_rate, "ms")
+ self.utcs[0])
+ self._times_as_np_datetime64[0])
# calculate the missing geo locations
try:
missed_lons, missed_lats = self._compute_missing_lonlat(missed_utcs)
Expand Down Expand Up @@ -540,14 +539,14 @@ def _adjust_clock_drift(self):
# set corrected values
self.lons = slerp_res[:, :, 0]
self.lats = slerp_res[:, :, 1]
self.utcs -= (offsets * 1000).astype('timedelta64[ms]')
self._times_as_np_datetime64 -= (offsets * 1000).astype('timedelta64[ms]')

toc = datetime.datetime.now()
LOG.debug("clock drift adjustment took %s", str(toc - tic))

def _get_lonlat(self):
lats = self.scans["earth_location"][:, 0::2] / 128.0
lons = self.scans["earth_location"][:, 1::2] / 128.0
def _get_lonlat_from_file(self):
lats = self.scans["earth_location"]["lats"] / np.float32(128.0)
lons = self.scans["earth_location"]["lons"] / np.float32(128.0)
return lons, lats

def get_telemetry(self):
Expand Down
10 changes: 6 additions & 4 deletions pygac/pygac_geotiepoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@

import numpy as np

GAC_LONLAT_SAMPLE_POINTS = np.arange(4, 405, 8)
LAC_LONLAT_SAMPLE_POINTS = np.arange(24, 2048, 40)


def gac_lat_lon_interpolator(lons_subset, lats_subset):
"""Interpolate lat-lon values in the AVHRR GAC data.
Expand All @@ -37,16 +40,15 @@ def gac_lat_lon_interpolator(lons_subset, lats_subset):
ranging from 5 to 405. Interpolate to full resolution.

"""
cols_subset = np.arange(4, 405, 8)
cols_full = np.arange(409)
return lat_lon_interpolator(lons_subset, lats_subset, cols_subset, cols_full)
return lat_lon_interpolator(lons_subset, lats_subset, GAC_LONLAT_SAMPLE_POINTS, cols_full)



def lac_lat_lon_interpolator(lons_subset, lats_subset):
"""Interpolate lat-lon values in the AVHRR LAC data."""
cols_subset = np.arange(24, 2048, 40)
cols_full = np.arange(2048)
return lat_lon_interpolator(lons_subset, lats_subset, cols_subset, cols_full)
return lat_lon_interpolator(lons_subset, lats_subset, LAC_LONLAT_SAMPLE_POINTS, cols_full)


def lat_lon_interpolator(lons_subset, lats_subset, cols_subset, cols_full):
Expand Down
Loading
Loading