Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Development #7

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 59 additions & 26 deletions skopt/optimizer/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
from numbers import Number

import ConfigSpace as CS
ccs_active = False
try:
import cconfigspace as CCS
ccs_active = True
except (ImportError, OSError) as a:
warnings.warn("CCS could not be loaded and is deactivated: " + str(a), category=ImportWarning)

import numpy as np
import pandas as pd

Expand Down Expand Up @@ -294,6 +301,11 @@ def __init__(

if isinstance(self.base_estimator_, GaussianProcessRegressor):
raise RuntimeError("GP estimator is not available with ConfigSpace!")
elif ccs_active and isinstance(dimensions, CCS.ConfigurationSpace):
self.ccs = dimensions

if isinstance(self.base_estimator_, GaussianProcessRegressor):
raise RuntimeError("GP estimator is not available with CCS!")
else:

# normalize space if GP regressor
Expand Down Expand Up @@ -352,10 +364,16 @@ def copy(self, random_state=None):
Set the random state of the copy.
"""

dimens = None
if hasattr(self, "config_space"):
dimens = self.config_space
elif hasattr(self, "ccs"):
dimens = self.ccs
else:
dimens = self.space.dimensions

optimizer = Optimizer(
dimensions=self.config_space
if hasattr(self, "config_space")
else self.space.dimensions,
dimensions=dimens,
base_estimator=self.base_estimator_,
n_initial_points=self.n_initial_points_,
initial_point_generator=self._initial_point_generator,
Expand All @@ -376,6 +394,35 @@ def copy(self, random_state=None):

return optimizer

def _lie_to_optimizer(self, opt, strategy, x):
ti_available = "ps" in self.acq_func and len(opt.yi) > 0
ti = [t for (_, t) in opt.yi] if ti_available else None
if strategy == "cl_min":
y_lie = np.min(opt.yi) if opt.yi else 0.0 # CL-min lie
t_lie = np.min(ti) if ti is not None else log(sys.float_info.max)
elif strategy == "cl_mean":
y_lie = np.mean(opt.yi) if opt.yi else 0.0 # CL-mean lie
t_lie = np.mean(ti) if ti is not None else log(sys.float_info.max)
else:
y_lie = np.max(opt.yi) if opt.yi else 0.0 # CL-max lie
t_lie = np.max(ti) if ti is not None else log(sys.float_info.max)

# Lie to the optimizer.
if "ps" in self.acq_func:
# Use `_tell()` instead of `tell()` to prevent repeated
# log transformations of the computation times.
opt._tell(x, (y_lie, t_lie))
else:
opt._tell(x, y_lie)

def ask_default(self, strategy="cl_min"):
x = self.space.default()
if x is not None:
self.sampled.append(x)
opt = self.copy(random_state=self.rng.randint(0, np.iinfo(np.int32).max))
self._lie_to_optimizer(opt, strategy, x)
return x

def ask(self, n_points=None, strategy="cl_min"):
"""Query point or multiple points at which objective should be evaluated.

Expand Down Expand Up @@ -445,26 +492,7 @@ def ask(self, n_points=None, strategy="cl_min"):
if i == n_points - 1:
break

ti_available = "ps" in self.acq_func and len(opt.yi) > 0
ti = [t for (_, t) in opt.yi] if ti_available else None

if strategy == "cl_min":
y_lie = np.min(opt.yi) if opt.yi else 0.0 # CL-min lie
t_lie = np.min(ti) if ti is not None else log(sys.float_info.max)
elif strategy == "cl_mean":
y_lie = np.mean(opt.yi) if opt.yi else 0.0 # CL-mean lie
t_lie = np.mean(ti) if ti is not None else log(sys.float_info.max)
else:
y_lie = np.max(opt.yi) if opt.yi else 0.0 # CL-max lie
t_lie = np.max(ti) if ti is not None else log(sys.float_info.max)

# Lie to the optimizer.
if "ps" in self.acq_func:
# Use `_tell()` instead of `tell()` to prevent repeated
# log transformations of the computation times.
opt._tell(x, (y_lie, t_lie))
else:
opt._tell(x, y_lie)
self._lie_to_optimizer(opt, strategy, x)

self.cache_ = {(n_points, strategy): X} # cache_ the result

Expand All @@ -477,6 +505,8 @@ def _filter_duplicated(self, samples):

if hasattr(self, "config_space"):
hps_names = self.config_space.get_hyperparameter_names()
elif hasattr(self, "ccs"):
hps_names = [x.name for x in self.ccs.parameters]
else:
hps_names = self.space.dimension_names

Expand All @@ -486,7 +516,8 @@ def _filter_duplicated(self, samples):
if len(self.sampled) > 0:
df_history = pd.DataFrame(data=self.sampled, columns=hps_names)
df_merge = pd.merge(df_samples, df_history, on=None, how="inner")
df_samples = df_samples.append(df_merge)
#df_samples = df_samples.append(df_merge)
df_samples = pd.concat([df_samples, df_merge])
df_samples = df_samples[~df_samples.duplicated(keep=False)]

if len(df_samples) > 0:
Expand Down Expand Up @@ -524,7 +555,7 @@ def _ask(self):

next_x = self._next_x
if next_x is not None:
if not self.space.is_config_space:
if not self.space.is_config_space and not self.space.is_ccs:
min_delta_x = min([self.space.distance(next_x, xi) for xi in self.Xi])
if abs(min_delta_x) <= 1e-8:
warnings.warn(
Expand Down Expand Up @@ -563,6 +594,8 @@ def tell(self, x, y, fit=True):
"""
if self.space.is_config_space:
pass
elif self.space.is_ccs:
pass
else:
check_x_in_space(x, self.space)

Expand Down Expand Up @@ -686,7 +719,7 @@ def _tell(self, x, y, fit=True):
# lbfgs should handle this but just in case there are
# precision errors.
if not self.space.is_categorical:
if not self.space.is_config_space:
if not self.space.is_config_space and not self.space.is_ccs:
next_x = np.clip(
next_x, transformed_bounds[:, 0], transformed_bounds[:, 1]
)
Expand Down
132 changes: 115 additions & 17 deletions skopt/space/space.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@


import ConfigSpace as CS
ccs_active = False
try:
import cconfigspace as CCS
ccs_active = True
except (ImportError, OSError) as a:
import warnings
warnings.warn("CCS could not be loaded and is deactivated: " + str(a), category=ImportWarning)

from sklearn.impute import SimpleImputer

Expand Down Expand Up @@ -666,9 +673,12 @@ def __contains__(self, point):
@property
def transformed_bounds(self):
if self.transform_ == "normalize":
return 0., 1.
return 0.0, 1.0
else:
return (self.low, self.high)
if self.prior == "uniform":
return self.low, self.high
else:
return np.log10(self.low), np.log10(self.high)

def distance(self, a, b):
"""Compute distance between point `a` and `b`.
Expand Down Expand Up @@ -888,8 +898,11 @@ class Space(object):

def __init__(self, dimensions):
self.is_config_space = False
self.is_ccs = False
self.config_space_samples = None
self.ccs_samples = None
self.config_space_explored = False
self.ccs_explored = False
self.imp_const = SimpleImputer(
missing_values=np.nan, strategy="constant", fill_value=-1000
)
Expand Down Expand Up @@ -957,6 +970,56 @@ def __init__(self, dimensions):
else:
raise ValueError("Unknown Hyperparameter type.")
dimensions = space
elif ccs_active and isinstance(dimensions, CCS.ConfigurationSpace):
self.is_ccs = True
self.ccs = dimensions
self.hps_type = {}

hps = self.ccs.parameters
cond_hps = [x.name for x in self.ccs.conditional_parameters]

space = []
for x in hps:
self.hps_names.append(x.name)
distrib = self.ccs.get_parameter_distribution(x)[0]
if (isinstance(x, CCS.CategoricalParameter) or
isinstance(x, CCS.OrdinalParameter) or
isinstance(x, CCS.DiscreteParameter)):
vals = list(x.values)
if x.name in cond_hps:
vals.append("NA")
if isinstance(distrib, CCS.RouletteDistribution):
param = Categorical(vals, prior=distrib.areas, name=x.name)
elif isinstance(distrib, CCS.UniformDistribution):
param = Categorical(vals, name=x.name)
else:
raise ValueError("Unsupported distribution")
space.append(param)
self.hps_type[x.name] = "Categorical"
elif isinstance(x, CCS.NumericalParameter):
prior = "uniform"
lower = x.lower
upper = x.upper
t = x.data_type
if isinstance(distrib, CCS.UniformDistribution):
if distrib.scale_type == CCS.ScaleType.LOGARITHMIC:
prior = "log-uniform"
elif isinstance(distrib, CCS.NormalDistribution):
prior = "normal"
if distrib.scale_type == CCS.ScaleType.LOGARITHMIC:
raise ValueError("Unsupported 'log' transformation for CCS.NumericalParameter with normal prior.")
else:
raise ValueError("Unsupported distribution")
if CCS.NumericType.INT:
param = Integer(lower, upper, prior=prior, name=x.name)
self.hps_type[x.name] = "Integer"
else:
param = Real(lower, upper, prior=prior, name=x.name)
self.hps_type[x.name] = "Real"
space.append(param)
else:
raise ValueError("Unknown Parameter type")
dimensions = space
self.dimensions = [check_dimension(dim) for dim in dimensions]

def __eq__(self, other):
Expand Down Expand Up @@ -1058,6 +1121,43 @@ def from_yaml(cls, yml_path, namespace=None):

return space

def _cs_post_process_conf(self, hps_names, conf):
point = []
for hp_name in hps_names:
val = np.nan
if self.hps_type[hp_name] == "Categorical":
val = "NA"
if hp_name in conf.keys():
val = conf[hp_name]
point.append(val)
return point


def _ccs_post_process_conf(self, hps_names, conf):
point = []
values = conf.values
for i, hp_name in enumerate(hps_names):
val = values[i]
if CCS.inactive == val:
if self.hps_type[hp_name] == "Categorical":
val = "NA"
else:
val = np.nan
point.append(val)
return point

def default(self):
if self.is_config_space:
conf = self.config_space.get_default_configuration()
hps_names = self.config_space.get_hyperparameter_names()
return self._cs_post_process_conf(hps_names, conf)
elif self.is_ccs:
conf = self.ccs.default_configuration
hps_names = [x.name for x in self.ccs.parameters]
return self._ccs_post_process_conf(hps_names, conf)
else:
return None

def rvs(self, n_samples=1, random_state=None):
"""Draw random samples.

Expand All @@ -1080,25 +1180,23 @@ def rvs(self, n_samples=1, random_state=None):
"""
rng = check_random_state(random_state)
if self.is_config_space:
req_points = []

points = []
confs = self.config_space.sample_configuration(n_samples)
if n_samples == 1:
confs = [confs]

hps_names = self.config_space.get_hyperparameter_names()
for conf in confs:
point = []
for hps_name in hps_names:
val = np.nan
if self.hps_type[hps_name] == "Categorical":
val = "NA"
if hps_name in conf.keys():
val = conf[hps_name]
point.append(val)
req_points.append(point)

return req_points
point = self._cs_post_process_conf(hps_names, conf)
points.append(point)
return points
elif self.is_ccs:
points = []
confs = self.ccs.samples(n_samples)
hps_names = [x.name for x in self.ccs.parameters]
for conf in confs:
point = self._ccs_post_process_conf(hps_names, conf)
points.append(point)
return points
else:
# Draw
columns = []
Expand Down Expand Up @@ -1178,7 +1276,7 @@ def transform(self, X):
# Repack as an array
Xt = np.hstack([np.asarray(c).reshape((len(X), -1)) for c in columns])

if False and self.is_config_space:
if False and (self.is_config_space or self.is_ccs):
self.imp_const.fit(Xt)
Xtt = self.imp_const.transform(Xt)
Xt = Xtt
Expand Down