Skip to content

Commit

Permalink
Merge branch 'Add_Brain_Age' into photon_dev
Browse files Browse the repository at this point in the history
# Conflicts:
#	photonai/modelwrapper/SamplePairing.py
  • Loading branch information
RLeenings committed Jun 5, 2019
2 parents a329229 + ffbad24 commit c44a5ec
Show file tree
Hide file tree
Showing 24 changed files with 934 additions and 65 deletions.
71 changes: 50 additions & 21 deletions photonai/base/PhotonBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@
from .PhotonPipeline import PhotonPipeline


class PhotonNative:
"""only for checking if code is meeting requirements"""
pass


class OutputSettings:
"""
Configuration class that specifies the format in which the results are saved. Results can be saved to a MongoDB
Expand Down Expand Up @@ -93,11 +98,11 @@ def __init__(self, mongodb_connect_url: str = None,
self.save_output = save_output

if self.save_output:
local_file: str = 'photon_result_file.p'
log_filename: str = 'photon_output.log'
summary_filename: str = 'photon_summary.txt'
pretrained_model_filename: str = 'photon_best_model.photon'
predictions_filename: str = 'outer_fold_predictions.csv'
local_file = 'photon_result_file.p'
log_filename = 'photon_output.log'
summary_filename = 'photon_summary.txt'
pretrained_model_filename = 'photon_best_model.photon'
predictions_filename = 'outer_fold_predictions.csv'
self.local_file = os.path.join(project_folder, local_file)
self.log_file = os.path.join(project_folder, log_filename)
self.summary_filename = os.path.join(project_folder, summary_filename)
Expand Down Expand Up @@ -428,7 +433,7 @@ def __iadd__(self, pipe_element):
if isinstance(pipe_element, PreprocessingPipe):
self.preprocessing_pipe = pipe_element
else:
if isinstance(pipe_element, PipelineElement):
if isinstance(pipe_element, PipelineElement) or issubclass(type(pipe_element), PhotonNative):
self.pipeline_elements.append(pipe_element)
# Todo: is repeated each time element is added....
self._prepare_pipeline()
Expand Down Expand Up @@ -749,8 +754,14 @@ def fit(self, data, targets, **kwargs):

# do the optimizing
for current_config in self.optimizer.ask:

if hasattr(self.optimizer, 'ask_for_pipe'):
pipe_ctor = self.optimizer.ask_for_pipe()
else:
pipe_ctor = self._copy_pipeline

self.__distribute_cv_info_to_hyperpipe_children(reset=True, config_counter=tested_config_counter)
hp = TestPipeline(self._copy_pipeline, current_config, self.metrics, self.update_mother_inner_fold_nr,
hp = TestPipeline(pipe_ctor, current_config, self.metrics, self.update_mother_inner_fold_nr,
mongo_db_settings=self.output_settings,
callback_function=self.inner_cv_callback_function)
Logger().debug('optimizing of:' + self.name)
Expand Down Expand Up @@ -1236,17 +1247,17 @@ def load_optimum_pipe(file, password=None):

def run_dummy_estimator(self):
if hasattr(self.pipeline_elements[-1].base_element, '_estimator_type'):
type = self.pipeline_elements[-1].base_element._estimator_type
est_type = self.pipeline_elements[-1].base_element._estimator_type
else:
if isinstance(self.pipeline_elements[-1], PipelineSwitch):
type = self.pipeline_elements[-1].base_element.base_element._estimator_type
est_type = self.pipeline_elements[-1].base_element.base_element._estimator_type
else:
type = None
est_type = None

if type == 'regressor':
if est_type == 'regressor':
strategy = 'mean'
dummy = DummyRegressor(strategy=strategy)
elif type == 'classifier':
elif est_type == 'classifier':
strategy = 'most_frequent'
dummy = DummyClassifier(strategy=strategy)
else:
Expand All @@ -1263,6 +1274,13 @@ def run_dummy_estimator(self):
for train, test in self.data_test_cases:

train_X, train_y = self.X[train], self.y[train]

if isinstance(train_X, np.ndarray):
if len(train_X.shape) > 2:
Logger().info("Skipping dummy estimator because of too much dimensions")
break

# dummy.fit(train_X, train_y)
dummy.fit(train_X, train_y)
train_scores = TestPipeline.score(dummy, train_X, train_y, metrics=self.metrics)

Expand All @@ -1276,12 +1294,14 @@ def run_dummy_estimator(self):
inner_fold.validation = test_scores

fold_list.append(inner_fold)
config_item.inner_folds = fold_list
config_item.metrics_train, config_item.metrics_test = MDBHelper.aggregate_metrics(config_item, self.metrics)

dummy_results = DummyResults()
dummy_results.strategy = strategy
dummy_results.train = config_item.metrics_train
dummy_results.test = config_item.metrics_test
if len(fold_list) > 0:
config_item.inner_folds = fold_list
config_item.metrics_train, config_item.metrics_test = MDBHelper.aggregate_metrics(config_item, self.metrics)
dummy_results.strategy = strategy
dummy_results.train = config_item.metrics_train
dummy_results.test = config_item.metrics_test
return dummy_results


Expand Down Expand Up @@ -1437,6 +1457,8 @@ def __init__(self, name, hyperparameters: dict=None, test_disabled: bool=False,
self.is_transformer = hasattr(self.base_element, "transform")
self.is_estimator = hasattr(self.base_element, "predict")

self.kwargs = kwargs

# Todo: check if hyperparameters are members of the class
# Todo: write method that returns any hyperparameter that could be optimized --> sklearn: get_params.keys
# Todo: map any hyperparameter to a possible default list of values to try
Expand Down Expand Up @@ -1465,7 +1487,12 @@ def __init__(self, name, hyperparameters: dict=None, test_disabled: bool=False,
self.needs_covariates = False

def copy_me(self):
return deepcopy(self)
if hasattr(self.base_element, 'copy_me'):
# new_base_element = self.base_element.copy_me()
# TODO !!!!!!!
return PipelineElement(self.name, self.hyperparameters, **self.kwargs)
else:
return deepcopy(self)

@classmethod
def create(cls, name, base_element, hyperparameters: dict, test_disabled=False, disabled=False, **kwargs):
Expand Down Expand Up @@ -1606,6 +1633,8 @@ def transform(self, X, y=None, **kwargs):
if hasattr(self.base_element, 'transform'):
return self.adjusted_delegate_call(self.base_element.transform, X, y, **kwargs)
elif hasattr(self.base_element, 'predict', **kwargs):
# Logger().warn("used prediction instead of transform " + self.name)
# raise Warning()
return self.base_element.predict(X)
else:
Logger().error('BaseException: transform-predict-mess')
Expand Down Expand Up @@ -1835,7 +1864,7 @@ def __init__(self, name: str, stacking_elements=None, voting: bool=False):
self.__iadd__(item_to_stack)

# in case any of the children needs y or the covariates, we have to request them
self.needs_y = True
self.needs_y = False
self.needs_covariates = True

def __iadd__(self, item):
Expand Down Expand Up @@ -1916,7 +1945,7 @@ def fit(self, data, targets=None, **kwargs):
"""
for name, element in self.pipe_elements.items():
# Todo: parallellize fitting
element.fit(data, targets)
element.fit(data, targets, **kwargs)
return self

def predict(self, data, targets=None, **kwargs):
Expand Down Expand Up @@ -1981,7 +2010,7 @@ def stack_data(cls, a, b):
New matrix, that is a and b horizontally joined
"""
if a.size == 0:
if a is None or (isinstance(a, np.ndarray) and a.size == 0):
a = b
else:
# Todo: check for right dimensions!
Expand Down
99 changes: 99 additions & 0 deletions photonai/base/PhotonBatchElement.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from .PhotonBase import PipelineElement
from ..photonlogger import Logger
import numpy as np


class PhotonBatchElement(PipelineElement):

def __init__(self, name, hyperparameters: dict=None, test_disabled: bool=False, disabled: bool =False,
base_element=None, batch_size: int = 10, **kwargs):

super(PhotonBatchElement, self).__init__(name, hyperparameters, test_disabled, disabled, base_element, **kwargs)
# self.base_element = PipelineElement(base_element_name, hyperparameters=hyperparameters, **kwargs)

self.batch_size = batch_size

@staticmethod
def chunker(nr_items, size):
return [(pos, pos + size) for pos in range(0, nr_items, size)]

def batch_call(self, delegate, X, y=None, call_with_y=True, **kwargs):

# initialize return values
processed_X = None
processed_y = None
processed_kwargs = dict()

# iterate through data batchwise
if isinstance(X, np.ndarray):
nr = X.shape[0]
dim = len(X.shape)
else:
nr = len(X)
dim = 1

batch_idx = 0
for start, stop in PhotonBatchElement.chunker(nr, self.batch_size):

batch_idx += 1
Logger().debug(self.name + " is processing batch nr " + str(batch_idx))

# split data in batches
if dim > 1:
X_batched = X[start:stop, :]
else:
X_batched = X[start:stop]

# we are probably None anyway
y_batched = y
# if we are to batch then apply it
if call_with_y and y is not None:
y_batched = y[start:stop]

kwargs_dict_batched = dict()
for key, kwargs_list in kwargs.items():
if not isinstance(kwargs_list, np.ndarray):
kwargs_list = np.array(kwargs_list)
if len(kwargs_list.shape) > 1:
kwargs_dict_batched[key] = kwargs_list[start:stop, :]
else:
kwargs_dict_batched[key] = kwargs_list[start:stop]

# call the delegate
X_new, y_new, kwargs_new = self.adjusted_delegate_call(delegate, X_batched, y_batched, **kwargs_dict_batched)

# stack results
processed_X = PhotonBatchElement.stack_results(X_new, processed_X)

if call_with_y:
processed_y = PhotonBatchElement.stack_results(y_new, processed_y)
for proc_key, proc_values in kwargs_new.items():
new_kwargs_data = kwargs_new[proc_key]
if proc_key not in processed_kwargs:
processed_kwargs[proc_key] = new_kwargs_data
else:
processed_kwargs[proc_key] = PhotonBatchElement.stack_results(new_kwargs_data, processed_kwargs[proc_key])
else:
processed_kwargs = kwargs
processed_y = y
return processed_X, processed_y, processed_kwargs

@staticmethod
def stack_results(new_a, existing_a):
if existing_a is not None:
if isinstance(new_a, list) or (isinstance(new_a, np.ndarray) and len(new_a.shape) < 2):
if isinstance(existing_a, list):
existing_a = existing_a + new_a
else:
existing_a = np.hstack((existing_a, new_a))
else:
existing_a = np.vstack((existing_a, new_a))
else:
existing_a = new_a
return existing_a

def transform(self, X, y=None, **kwargs):
return self.batch_call(self.base_element.transform, X, y, **kwargs)

def predict(self, X, y=None, **kwargs):
return self.batch_call(self.base_element.predict, X, y, call_with_y=False, **kwargs)
14 changes: 9 additions & 5 deletions photonai/configuration/PhotonNeuro.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
{
"ResampleImgs":[
"photonai.neuro.ImageBasics.ResamplingImgs",
"ResampleImages":[
"photonai.neuro.ImageBasics.ResampleImages",
"Transformer"
],
"SmoothImgs":[
"photonai.neuro.ImageBasics.SmoothImgs",
"SmoothImages":[
"photonai.neuro.ImageBasics.SmoothImages",
"Transformer"
],
"BrainAtlas":[
"photonai.neuro.BrainAtlas.BrainAtlas",
"Transformer"
],
"PatchImages":[
"photonai.neuro.ImageBasics.PatchImages",
"Transformer"
]
}
}
101 changes: 101 additions & 0 deletions photonai/examples/Brain_Age_Master.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import KFold
#from skopt import Optimizer
#from skopt.optimizer import dummy_minimize
#from skopt import dummy_minimize
import scipy.io as sio
import keras
from photonai.base.PhotonBase import Hyperpipe, PipelineElement, PhotonRegister
from photonai.base.PhotonBatchElement import PhotonBatchElement
from photonai.validation import ResultsTreeHandler
from photonai.neuro.BrainAtlas import AtlasLibrary
from scipy.stats import itemfreq
from photonai.investigator.Investigator import Investigator
import matplotlib.pyplot as plt
import pandas as pd
from nilearn import image
import time


import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="2"


# RandomCtrlData = np.ones((1792, 121, 145, 121))
# RandomCtrlData = np.ones((172, 121, 145, 121))
# RandomCtrlLabels = np.random.randn((RandomCtrlData.shape[0]))

root_folder = '/spm-data/Scratch/spielwiese_ramona/PAC2018/'
filename = 'PAC2018_age.csv'
df = pd.read_csv(os.path.join(root_folder, filename))

X = df["PAC_ID"]
X = [os.path.join(root_folder, 'data_all/' + x + ".nii") for x in X]
y = df["Age"].values

X = X[0:1500]
y = y[0:1500]

#
PhotonRegister.save(photon_name='Brain_Age_Splitting_Wrapper',
class_str='photonai.modelwrapper.Brain_Age_Splitting_Wrapper.Brain_Age_Splitting_Wrapper', element_type="Transformer")
#
# PhotonRegister.save(photon_name='Brain_Age_Splitting_CNN',
# class_str='photonai.modelwrapper.Brain_Age_Splitting_CNN.Brain_Age_Splitting_CNN', element_type="Estimator")
#
PhotonRegister.save(photon_name='Brain_Age_Random_Forest',
class_str='photonai.modelwrapper.Brain_Age_Random_Forest.Brain_Age_Random_Forest', element_type="Estimator")

my_pipe = Hyperpipe('BrainAgePipe',
optimizer='grid_search',
metrics=['mean_absolute_error'],
best_config_metric='mean_absolute_error',
inner_cv=KFold(n_splits=5, shuffle=True, random_state=42),
outer_cv=KFold(n_splits=5, shuffle=True, random_state=42),
eval_final_performance=False,
verbosity=2)

# transformer = PipelineElement(, hyperparameters={})
# base_element=transformer
batched_transformer = PhotonBatchElement("PatchImages", hyperparameters={'patch_size': [10, 25, 50, 75, 100]},
batch_size=100,
nr_of_processes=10,
cache_folder='/spm-data/vault-data1/tmp/photon_cache_vincent/')
my_pipe += batched_transformer


#my_pipe += PipelineElement('Brain_Age_Splitting_Wrapper')

my_pipe += PipelineElement('Brain_Age_Random_Forest')

my_pipe.fit(X, y)

batched_transformer.base_element.clear_cache()









inner_performances = list()
for i, fold in enumerate(my_pipe.result_tree.outer_folds[0].tested_config_list):
inner_performances.append((fold.config_dict, fold.metrics_test[0].value))
print(inner_performances)

plt.ylim(0.2, 0.8)
plt.xticks(rotation=90)
plt.margins(0.3)

for i, lelles in inner_performances:
print(i, lelles)
Benis = ",".join(("{}={}".format(*p) for p in i.items()))
plt.plot(Benis, lelles, 'ro')


plt.show()
Loading

0 comments on commit c44a5ec

Please sign in to comment.