Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ToyResult and ToyManager classes #20

Merged
merged 41 commits into from
Feb 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
886e4a0
addition of toyutils.py
marinang Jan 23, 2020
5be315b
add ParameterNoFound exception
marinang Jan 23, 2020
c0cc5f9
add docstrings in toyutils.py
marinang Jan 23, 2020
011e13b
add methods to store and load toys in FrequentistCalculator
marinang Jan 23, 2020
fda587e
saved toys of discovery test notebooks.
marinang Jan 24, 2020
594c8ad
Rename Toys -> ToyResult
marinang Jan 24, 2020
f899ffc
Rename ToysCollection -> ToysManager
marinang Jan 24, 2020
faae35d
add test for discovery with frequentists calculator
marinang Jan 24, 2020
a6b34eb
add teardown_function in tests with zfit
marinang Jan 24, 2020
2be9e00
move mean param in test_parameter.py inside test function
marinang Jan 24, 2020
1c128a9
add print statements to figure out why the windows build fails
marinang Jan 24, 2020
1d36862
add another print statement to figure out why the windows build fails.
marinang Jan 25, 2020
ccd95f2
windows failure investigation #3
marinang Jan 25, 2020
f3c2f84
windows failure investigation #4
marinang Jan 25, 2020
91829f5
remove __eq__ in POIarray
marinang Jan 25, 2020
31355d7
windows failure investigation 5
marinang Jan 25, 2020
a6071b0
windows failure investigation 6
marinang Jan 25, 2020
ddaabb4
windows failure investgation 7
marinang Jan 25, 2020
063ad52
windows failure investigation 8
marinang Jan 25, 2020
6584c06
windows failure investigation 9 (print hash of POI, and POIarray)
marinang Jan 25, 2020
236b368
windows failure investigation 10
marinang Jan 25, 2020
c4d568f
windows failure investigation 11 (cause is probably hash(float32) =! …
marinang Jan 25, 2020
0f9a915
impose the np.float64 dtype in values of Parameter
marinang Jan 25, 2020
0e009a5
remove print statements in frequentist_calculator.py
marinang Jan 25, 2020
f3eb199
add tests for classes in toysutils.py
marinang Jan 25, 2020
f3da725
add upper limits test with frequentists calculator
marinang Jan 25, 2020
0f607d5
move toy generation and fit to ToyManager
marinang Jan 25, 2020
f6e927c
add tests for confidence intervals with frequentist calculator
marinang Jan 26, 2020
05464c3
fix number of scan points in test_confidence_interval to be the same …
marinang Jan 26, 2020
e29ea31
add docstrings in hypotests_object
marinang Jan 26, 2020
1958b0d
add new docstrings in basecalculator.py
marinang Jan 26, 2020
1eef58f
add docstrings in toyutils.py
marinang Jan 26, 2020
92dbb47
fiy typo in from_yaml method of ToysCalculator
marinang Jan 26, 2020
992bb86
fix second typo in from_yaml method of ToysCalculator
marinang Jan 26, 2020
6c6dead
add docstring in from_yaml in ToysCalculator
marinang Jan 26, 2020
1add273
remove fit_range in loss_builder
marinang Jan 26, 2020
cfcceda
Fix index error in ToysManager
marinang Jan 28, 2020
810482d
remove zfit specific lines
marinang Jan 28, 2020
d3724aa
Merge branch 'master' into mm_savetoys
marinang Feb 1, 2020
667c05e
fix typo zfi to zfit in test_confidence_intervals.py
marinang Feb 2, 2020
d61ae7c
implement eduardo's comments
marinang Feb 15, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ jobs:
python.version: '3.6'
Python37:
python.version: '3.7'
Python38:
python.version: '3.8'

steps:
- bash: echo "##vso[task.prependpath]$CONDA/bin"
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ dependencies:
- tensorflow=>1.14
- tensorflow-probability
- zfit
- asdf
eduardo-rodrigues marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 1 addition & 1 deletion hepstats/hypotests/calculators/asymptotic_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def asimov_dataset(self, poi) -> (np.array, np.array):
for i, ad in enumerate([generate_asimov_hist(m, values, self._asimov_bins) for m in model]):
weights, bin_edges = ad
bin_centers = bin_edges[0: -1] + np.diff(bin_edges)/2
asimov_data.append(array2dataset(type(data[i]), data[i].obs, bin_centers, weights))
asimov_data.append(array2dataset(type(data[i]), data[i].space, bin_centers, weights))

self._asimov_dataset[poi] = asimov_data

Expand Down
318 changes: 179 additions & 139 deletions hepstats/hypotests/calculators/basecalculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
from typing import Dict, Union, Tuple, List
import numpy as np

from ..parameters import POI, POIarray
from ..fitutils.api_check import is_valid_loss, is_valid_fitresult, is_valid_minimizer
from ..fitutils.api_check import is_valid_data, is_valid_pdf
from ..hypotests_object import HypotestsObject
from ..parameters import POI, POIarray, asarray
from ..fitutils.utils import pll
from ..toyutils import ToysManager
from ..fitutils.sampling import base_sampler, base_sample

"""
Module defining the base class for the calculators for statistical tests based on the likelyhood ratio.
Expand All @@ -21,7 +22,7 @@
"""


class BaseCalculator(object):
class BaseCalculator(HypotestsObject):

def __init__(self, input, minimizer):
"""Base class for calculator.
Expand All @@ -45,149 +46,15 @@ def __init__(self, input, minimizer):
>>> calc = BaseCalculator(input=loss, minimizer=MinuitMinimizer())
"""

if is_valid_fitresult(input):
self._loss = input.loss
self._bestfit = input
elif is_valid_loss(input):
self._loss = input
self._bestfit = None
else:
raise ValueError("{} is not a valid loss funtion or fit result!".format(input))

if not is_valid_minimizer(minimizer):
raise ValueError("{} is not a valid minimizer !".format(minimizer))
super(BaseCalculator, self).__init__(input, minimizer)

self._minimizer = minimizer
self.minimizer.verbosity = 0
# cache of the observed nll values
self._obs_nll = {}
eduardo-rodrigues marked this conversation as resolved.
Show resolved Hide resolved

self._parameters = {}
for m in self.model:
for d in m.get_dependents():
self._parameters[d.name] = d

@property
def loss(self):
"""
Returns the loss / likelihood function used in the calculator.
"""
return self._loss

@property
def minimizer(self):
"""
Returns the minimzer used in the calculator.
"""
return self._minimizer

@property
def bestfit(self):
"""
Returns the best fit values of the model parameters.
"""
if getattr(self, "_bestfit", None):
return self._bestfit
else:
print("Get fit best values!")
self.minimizer.verbosity = 5
mininum = self.minimizer.minimize(loss=self.loss)
self.minimizer.verbosity = 0
self._bestfit = mininum
return self._bestfit

@bestfit.setter
def bestfit(self, value):
"""
Set the best fit values of the model parameters.

Args:
value: fit result
"""
if not is_valid_fitresult(value):
raise ValueError()
self._bestfit = value

@property
def model(self):
"""
Returns the model used in the calculator.
"""
return self.loss.model

@property
def data(self):
"""
Returns the data used in the calculator.
"""
return self.loss.data

@property
def constraints(self):
"""
Returns the constraints on the loss / likehood function used in the calculator.
"""
return self.loss.constraints

def get_parameter(self, name):
"""
Returns the parameter in loss function with given input name.

Args:
name (str): name of the parameter to return
"""
return self._parameters[name]

def set_dependents_to_bestfit(self):
"""
Set the values of the parameters in the models to the best fit values
"""
for m in self.model:
for d in m.get_dependents():
d.set_value(self.bestfit.params[d]["value"])

def lossbuilder(self, model, data, weights=None):
""" Method to build a new loss function.

Args:
model (List): The model or models to evaluate the data on
data (List): Data to use
weights (optional, List): the data weights

Example with `zfit`:
>>> data = zfit.data.Data.from_numpy(obs=obs, array=np.random.normal(1.2, 0.1, 10000))
>>> mean = zfit.Parameter("mu", 1.2)
>>> sigma = zfit.Parameter("sigma", 0.1)
>>> model = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma)
>>> loss = calc.lossbuilder(model, data)

Returns:
Loss function

"""

assert all(is_valid_pdf(m) for m in model)
assert all(is_valid_data(d) for d in data)

msg = "{0} must have the same number of components as {1}"
if len(data) != len(self.data):
raise ValueError(msg.format("data", "`self.data"))
if len(model) != len(self.model):
raise ValueError(msg.format("model", "`self.model"))
if weights is not None and len(weights) != len(self.data):
raise ValueError(msg.format("weights", "`self.data`"))

fit_range = self.loss.fit_range

if weights is not None:
for d, w in zip(data, weights):
d.set_weights(w)

loss = type(self.loss)(model=model, data=data, fit_range=fit_range)
loss.add_constraints(self.constraints)

return loss

def obs_nll(self, pois) -> np.array:
""" Compute observed negative log-likelihood values for given parameters of interest.

Expand Down Expand Up @@ -436,3 +303,176 @@ def q(self, nll1: np.array, nll2: np.array, poi1, poi2,
q = q

return q


class BaseToysCalculator(BaseCalculator):

def __init__(self, input, minimizer, sampler, sample):
"""Basis for toys calculator class.

Args:
input : loss or fit result
minimizer : minimizer to use to find the minimum of the loss function
sampler : function used to create sampler with models, number of events and
floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`.
sample : function used to get samples from the sampler.
Default is `hepstats.fitutils.sampling.base_sample`.
"""
super(BaseToysCalculator, self).__init__(input, minimizer)


class ToysCalculator(BaseToysCalculator, ToysManager):
"""
Class for calculators using toys.
"""

def __init__(self, input, minimizer, ntoysnull=100, ntoysalt=100, sampler=base_sampler, sample=base_sample):
"""Toys calculator class.

Args:
input : loss or fit result
minimizer : minimizer to use to find the minimum of the loss function
ntoysnull (int, default=100): minimum number of toys to generate for the null hypothesis
ntoysalt (int, default=100): minimum number of toys to generate for the alternative hypothesis
sampler : function used to create sampler with models, number of events and
floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`.
sample : function used to get samples from the sampler.
Default is `hepstats.fitutils.sampling.base_sample`.
"""
super(ToysCalculator, self).__init__(input, minimizer, sampler, sample)

self._ntoysnull = ntoysnull
self._ntoysalt = ntoysalt

@classmethod
def from_yaml(cls, filename, loss, minimizer, ntoysnull=100, ntoysalt=100, sampler=base_sampler,
sample=base_sample):
"""
ToysCalculator constructor with the toys loaded from a yaml file.

Args:
filename (str)
input : loss or fit result
minimizer : minimizer to use to find the minimum of the loss function
ntoysnull (int, default=100): minimum number of toys to generate for the null hypothesis
ntoysalt (int, default=100): minimum number of toys to generate for the alternative hypothesis
sampler : function used to create sampler with models, number of events and
floating parameters in the sample Default is `hepstats.fitutils.sampling.base_sampler`.
sample : function used to get samples from the sampler.
Default is `hepstats.fitutils.sampling.base_sample`.

Returns
ToysCalculator
"""

calculator = cls(loss, minimizer, ntoysnull, ntoysalt, sampler, sample)
toysresults = calculator.toysresults_from_yaml(filename)

for t in toysresults:
calculator.add_toyresult(t)

return calculator

@property
def ntoysnull(self):
"""
Returns the number of toys generated for the null hypothesis.
"""
return self._ntoysnull

@property
def ntoysalt(self):
"""
Returns the number of toys generated for the alternative hypothesis.
"""
return self._ntoysalt

def _get_toys(self, poigen, poieval=None, qtilde=False, hypothesis="null"):
"""
Return the generated toys for a given POI.

Args:
poigen (POI): POI used to generate the toys
poieval (POIarray): POI values to evaluate the loss function
qtilde (bool, optional): if `True` use the $$\tilde{q}$$ test statistics else (default) use
the $$q$$ test statistic
hypothesis: `null` or `alternative`
"""

assert hypothesis in ["null", "alternative"]

if hypothesis == "null":
ntoys = self.ntoysnull
else:
ntoys = self.ntoysalt

ret = {}

for p in poigen:
poieval_p = poieval

if poieval_p is None:
poieval_p = POIarray(poigen.parameter, [p.value])
else:
if p not in poieval_p:
poieval_p = poieval_p.append(p.value)

if qtilde and 0. not in poieval_p.values:
poieval_p = poieval_p.append(0.0)

poieval_p = asarray(poieval_p)

ngenerated = self.ntoys(p, poieval_p)
if ngenerated < ntoys:
ntogen = ntoys - ngenerated
else:
ntogen = 0

if ntogen > 0:
print(f"Generating {hypothesis} hypothesis toys for {p}.")
print(p, poieval_p)

self.generate_and_fit_toys(ntoys=ntogen, poigen=p, poieval=poieval_p)
marinang marked this conversation as resolved.
Show resolved Hide resolved

ret[p] = self.get_toyresult(p, poieval_p)

return ret

def get_toys_null(self, poigen, poieval, qtilde=False):
"""
Return the generated toys for the null hypothesis.

Args:
poigen (POI): POI used to generate the toys
ntoys (int): number of toys to generate
poieval (POIarray): POI values to evaluate the loss function
qtilde (bool, optional): if `True` use the $$\tilde{q}$$ test statistics else (default) use
the $$q$$ test statistic

Example with `zfit`:
>>> mean = zfit.Parameter("mu", 1.2)
>>> poinull = POIarray(mean, [1.1, 1.2, 1.0])
>>> poialt = POI(mean, 1.2)
>>> for p in poinull:
... calc.get_toys_alt(p, poieval=poialt)
"""
return self._get_toys(poigen=poigen, poieval=poieval, qtilde=qtilde, hypothesis="null")

def get_toys_alt(self, poigen, poieval, qtilde=False):
"""
Return the generated toys for the alternative hypothesis.

Args:
poigen (POI): POI used to generate the toys
ntoys (int): number of toys to generate
poieval (POIarray): POI values to evaluate the loss function
qtilde (bool, optional): if `True` use the $$\tilde{q}$$ test statistics else (default) use
the $$q$$ test statistic

Example with `zfit`:
>>> mean = zfit.Parameter("mu", 1.2)
>>> poinull = POIarray(mean, [1.1, 1.2, 1.0])
>>> poialt = POI(mean, 1.2)
>>> calc.get_toys_alt(poialt, poieval=poinull)
"""
return self._get_toys(poigen=poigen, poieval=poieval, qtilde=qtilde, hypothesis="alternative")
Loading