Skip to content

Commit

Permalink
Merge branch 'master' into action
Browse files Browse the repository at this point in the history
  • Loading branch information
cmbant authored Aug 23, 2024
2 parents cdd09e0 + eb0669f commit 5470eae
Show file tree
Hide file tree
Showing 10 changed files with 85 additions and 80 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 3.5.x

- Detect and fix incomplete last lines when resuming or minimizing from existing runs (#378)
- Added functions module and refactored some numerical functions into it

## 3.5.4

- Allow classes to have both yaml and class attributes as long as no duplicate keys
Expand Down
10 changes: 5 additions & 5 deletions cobaya/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def compute_temperature(logpost, logprior, loglike, check=True, extra_tolerance=
"""
Returns the temperature of a sample.
If ``check=True`` and the log-probabilites passed are arrays, checks consistency
If ``check=True`` and the log-probabilities passed are arrays, checks consistency
of the sample temperature, and raises ``AssertionError`` if inconsistent.
"""
temp = (logprior + loglike) / logpost
Expand Down Expand Up @@ -286,8 +286,7 @@ def __init__(self, model, output=None, cache_size=_default_cache_size, name=None
self.reset()
# If loaded, check sample weights, consistent logp sums,
# and temperature (ignores the given one)
samples_loaded = len(self) > 0
if samples_loaded:
if len(self) > 0:
try:
try:
self.temperature = self._check_logps(extra_tolerance=False)
Expand Down Expand Up @@ -499,8 +498,9 @@ def _check_logps(self, temperature_only=False, extra_tolerance=False):
check=True, extra_tolerance=extra_tolerance)
except AssertionError as excpt:
raise LoggedError(
self.log, "The sample seems to have an inconsistent temperature.") \
from excpt
self.log, "The sample seems to have an inconsistent temperature. "
"This could be due to input file truncation on the last line "
"due to crash/being killed before complete.") from excpt
if not temperature_only:
tols = {
"rtol": 1e-4 * (10 if extra_tolerance else 1),
Expand Down
46 changes: 24 additions & 22 deletions cobaya/functions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import logging
import scipy

try:
import numba
Expand All @@ -13,9 +14,6 @@

random_SO_N = special_ortho_group.rvs

_fast_chi_squared = None
_sym_chi_squared = None

else:
import warnings

Expand Down Expand Up @@ -65,25 +63,29 @@ def _rvs(dim, xx, H):
H[:, :] = (D * H.T).T


@numba.njit(parallel=True)
def _fast_chi_squared(c_inv, delta):
"""
Calculate chi-squared from inverse matrix and delta vector,
using symmetry. Note parallel is slower for small sizes.
"""
n = delta.shape[0]
chi2 = 0.0

for j in numba.prange(n):
z_temp = np.dot(c_inv[j, j + 1:], delta[j + 1:])
chi2 += (2 * z_temp + c_inv[j, j] * delta[j]) * delta[j]

return chi2


def chi_squared(c_inv, delta):
if len(delta) < 1500 or not _fast_chi_squared:
"""
Compute chi squared, i.e. delta.T @ c_inv @ delta
:param c_inv: symmetric positive definite inverse covariance matrix
:param delta: 1D array
:return: delta.T @ c_inv @ delta
"""
if len(delta) < 1500:
return c_inv.dot(delta).dot(delta)
else:
return _fast_chi_squared(c_inv, delta)
# use symmetry
return scipy.linalg.blas.dsymv(alpha=1.0,
a=c_inv if np.isfortran(c_inv) else c_inv.T,
x=delta, lower=0).dot(delta)


def inverse_cholesky(cov):
"""
Get inverse of Cholesky decomposition
:param cov: symmetric positive definite matrix
:return: L^{-1} where cov = L L^T
"""
cholesky = np.linalg.cholesky(cov)
return scipy.linalg.lapack.dtrtri(cholesky, lower=True)[0]
2 changes: 0 additions & 2 deletions cobaya/likelihoods/base_classes/DataSetLikelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
from cobaya.typing import InfoDict
from cobaya.conventions import packages_path_input
from .InstallableLikelihood import InstallableLikelihood
# import _fast_chi_square for backwards compatibility
from .InstallableLikelihood import ChiSquaredFunctionLoader as _fast_chi_square # noqa


class DataSetLikelihood(InstallableLikelihood):
Expand Down
25 changes: 2 additions & 23 deletions cobaya/likelihoods/base_classes/InstallableLikelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,7 @@
from cobaya.install import _version_filename
from cobaya.component import ComponentNotInstalledError
from cobaya.tools import VersionCheckError, resolve_packages_path


class ChiSquaredFunctionLoader:

def __init__(self, method_name='_fast_chi_squared'):
"""
On first use will replace method_name with direct reference to chi2 function
:param method_name: name of the property that chi_squared() is assigned to
"""
self._method_name = method_name

def __get__(self, instance, owner):
# delay testing active camb until run time
from cobaya.functions import numba, chi_squared
camb_chi_squared = None
if numba is None:
try:
from camb.mathutils import chi_squared as camb_chi_squared
except ImportError:
pass
setattr(instance, self._method_name, camb_chi_squared or chi_squared)
return chi_squared
from cobaya.functions import chi_squared


class InstallableLikelihood(Likelihood):
Expand All @@ -50,7 +29,7 @@ class InstallableLikelihood(Likelihood):

# fast convenience function, to get chi-squared (exploiting symmetry); can call
# self._fast_chi_squared(cov_inv, delta)
_fast_chi_squared = ChiSquaredFunctionLoader()
_fast_chi_squared = staticmethod(chi_squared)

def __init__(self, *args, **kwargs):
# Ensure check for install and version errors
Expand Down
2 changes: 1 addition & 1 deletion cobaya/likelihoods/base_classes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .InstallableLikelihood import InstallableLikelihood
from .bao import BAO
from .DataSetLikelihood import DataSetLikelihood, _fast_chi_square
from .DataSetLikelihood import DataSetLikelihood
from .cmblikes import CMBlikes, make_forecast_cmb_dataset
from .des import DES
from .H0 import H0
Expand Down
3 changes: 2 additions & 1 deletion cobaya/likelihoods/gaussian_mixture/gaussian_mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from cobaya.log import LoggedError
from cobaya.mpi import share_mpi, is_main_process
from cobaya.typing import InputDict, Union, Sequence
from cobaya.functions import inverse_cholesky

derived_suffix = "_derived"

Expand Down Expand Up @@ -111,7 +112,7 @@ def initialize_with_params(self):
self.weights = 1 / len(self.gaussians)

# Prepare the transformation(s) for the derived parameters
self.inv_choleskyL = [np.linalg.inv(np.linalg.cholesky(cov)) for cov in self.covs]
self.inv_choleskyL = [inverse_cholesky(cov) for cov in self.covs]

def logp(self, **params_values):
"""
Expand Down
4 changes: 2 additions & 2 deletions cobaya/samplers/mcmc/mcmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from cobaya.samplers.mcmc.proposal import BlockedProposer
from cobaya.log import LoggedError, always_stop_exceptions
from cobaya.tools import get_external_function, NumberWithUnits, load_DataFrame
from cobaya.functions import inverse_cholesky
from cobaya.yaml import yaml_dump_file
from cobaya.model import LogPosterior
from cobaya import mpi
Expand Down Expand Up @@ -724,15 +725,14 @@ def check_convergence_and_learn_proposal(self):
converged_means = False
# Cholesky of (normalized) mean of covs and eigvals of Linv*cov_of_means*L
try:
L = np.linalg.cholesky(norm_mean_of_covs)
Linv = inverse_cholesky(norm_mean_of_covs)
except np.linalg.LinAlgError:
self.log.warning(
"Negative covariance eigenvectors. "
"This may mean that the covariance of the samples does not "
"contain enough information at this point. "
"Skipping learning a new covmat for now.")
else:
Linv = np.linalg.inv(L)
try:
eigvals = np.linalg.eigvalsh(Linv.dot(corr_of_means).dot(Linv.T))
success_means = True
Expand Down
4 changes: 2 additions & 2 deletions cobaya/samplers/mcmc/proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import numpy as np

from cobaya.log import LoggedError, HasLogger
from cobaya.tools import choleskyL
from cobaya.tools import choleskyL_corr
from cobaya.functions import random_SO_N


Expand Down Expand Up @@ -217,7 +217,7 @@ def set_covariance(self, propose_matrix):
"symmetric square matrix.")
self.propose_matrix = propose_matrix.copy()
propose_matrix_j_sorted = self.propose_matrix[np.ix_(self.i_of_j, self.i_of_j)]
sigmas_diag, L = choleskyL(propose_matrix_j_sorted, return_scale_free=True)
sigmas_diag, L = choleskyL_corr(propose_matrix_j_sorted)
# Store the basis as transformation matrices
self.transform = []
for j_start, bp in zip(self.j_start, self.proposer):
Expand Down
64 changes: 42 additions & 22 deletions cobaya/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,19 @@ def read_dnumber(n: Any, dim: int):
return NumberWithUnits(n, "d", dtype=int, scale=dim).value


def truncate_to_end_line(file_name):
with open(file_name, "r+b") as inp:
# Find the last complete line
inp.seek(0, 2) # Go to the end of the file
pos = inp.tell() - 1
while pos > 0 and inp.read(1) != b"\n":
pos -= 1
inp.seek(pos, 0)
if pos > 0:
inp.seek(pos + 1, 0)
inp.truncate()


def load_DataFrame(file_name, skip=0, root_file_name=None):
"""
Loads a `pandas.DataFrame` from a text file
Expand Down Expand Up @@ -493,7 +506,17 @@ def load_DataFrame(file_name, skip=0, root_file_name=None):
inp, sep=" ", header=None, names=cols, comment="#", skipinitialspace=True,
skiprows=skip, index_col=False)

return data
if not data.empty:
# Check if the last row contains any NaNs
if data.iloc[-1].isna().any():
log.warning("Last row of %s is incomplete or contains NaNs", file_name)
# If the second-to-last row exists and doesn't contain NaNs,
# delete the last row assuming this was due to crash on write
if len(data) > 1 and not data.iloc[-2].isna().any():
data = data.iloc[:-1]
log.info(f"Saving {file_name} deleting last (in)complete line")
truncate_to_end_line(file_name)
return data


def prepare_comment(comment):
Expand Down Expand Up @@ -610,14 +633,14 @@ def fast_logpdf(x):


def _KL_norm(m1, S1, m2, S2):
"""Performs the Guassian KL computation, without input testing."""
"""Performs the Gaussian KL computation, without input testing."""
dim = S1.shape[0]
S2inv = np.linalg.inv(S2)
return 0.5 * (np.trace(S2inv.dot(S1)) + (m1 - m2).dot(S2inv).dot(m1 - m2) -
dim + np.log(np.linalg.det(S2) / np.linalg.det(S1)))
dim + np.linalg.slogdet(S2)[1] - np.linalg.slogdet(S1)[1])


def KL_norm(m1=None, S1=np.array([]), m2=None, S2=np.array([]), symmetric=False):
def KL_norm(m1=None, S1=(), m2=None, S2=(), symmetric=False):
"""Kullback-Leibler divergence between 2 gaussians."""
S1, S2 = [np.atleast_2d(S) for S in [S1, S2]]
assert S1.shape[0], "Must give at least S1"
Expand All @@ -634,37 +657,34 @@ def KL_norm(m1=None, S1=np.array([]), m2=None, S2=np.array([]), symmetric=False)
return _KL_norm(m1, S1, m2, S2)


def choleskyL(M, return_scale_free=False):
def choleskyL_corr(M):
r"""
Gets the Cholesky lower triangular matrix :math:`L` (defined as :math:`M=LL^T`)
of a given matrix ``M``.
for the matrix ``M``, in the form :math:`L = S L^\prime` where S is diagonal.
Can be used to create an affine transformation that decorrelates a sample
:math:`x=\{x_i\}` as :math:`y=Lx`, where :math:`L` is extracted from the
covariance of the sample.
:math:`x=\{x_i\}` with covariance M, as :math:`x=Ly`,
where :math:`L` is extracted from M and y has identity covariance.
If ``return_scale_free=True`` (default: ``False``), returns a tuple of
a matrix :math:`S` containing the square roots of the diagonal of the input matrix
(the standard deviations, if a covariance is given), and the scale-free
:math:`L^\prime=S^{-1}L`.
Returns a tuple of a matrix :math:`S` containing the square roots of the diagonal
of the input matrix (the standard deviations, if a covariance is given),
and the scale-free :math:`L^\prime=S^{-1}L`.
(could just use Cholesky directly for proposal)
"""
std_diag, corr = cov_to_std_and_corr(M)
Lprime = np.linalg.cholesky(corr)
if return_scale_free:
return std_diag, Lprime
else:
return np.linalg.inv(std_diag).dot(Lprime)
return np.diag(std_diag), np.linalg.cholesky(corr)


def cov_to_std_and_corr(cov):
"""
Gets the standard deviations (as a diagonal matrix)
Gets the standard deviations (as a 1D array
and the correlation matrix of a covariance matrix.
"""
std_diag = np.diag(np.sqrt(np.diag(cov)))
invstd_diag = np.linalg.inv(std_diag)
corr = invstd_diag.dot(cov).dot(invstd_diag)
return std_diag, corr
std = np.sqrt(np.diag(cov))
inv_std = 1 / std
corr = inv_std[:, np.newaxis] * cov * inv_std[np.newaxis, :]
np.fill_diagonal(corr, 1.0)
return std, corr


def are_different_params_lists(list_A, list_B, name_A="A", name_B="B"):
Expand Down

0 comments on commit 5470eae

Please sign in to comment.