Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
NilsWinter committed Nov 22, 2024
1 parent aac9fd7 commit 547da24
Show file tree
Hide file tree
Showing 12 changed files with 339 additions and 65 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,6 @@ cython_debug/
docs

examples/tmp
examples/simulated_data
examples/simulated_data

poetry.lock
42 changes: 40 additions & 2 deletions cpm/reporting/plots/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,44 @@


def bar_plot(df, selected_metric, results_folder):
cmap = sns.color_palette("muted", n_colors=3)
flatui = [ "#ba6523", "#31559e", "#cc3727"]
cmap = sns.color_palette(flatui)

# Dropdown for metric selection

fig = sns.FacetGrid(df[df['model'].isin(['connectome', 'covariates', 'residuals', 'full'])],
row='model', col='network', hue='network',
aspect=3, height=2 / 2.54,
col_order = ["positive", "negative", "both"],
row_order=['covariates', 'connectome','full', 'residuals'],
sharex='col', sharey=False, palette=cmap)

# then we add the densities kdeplots
fig.map(sns.boxplot, selected_metric, "model")

#fig.map(sns.kdeplot, selected_metric,
# bw_adjust=1, clip_on=False,
# fill=True, alpha=1, linewidth=1)

#fig.map(sns.violinplot, selected_metric, "model", inner = None, split = True, hue=1, hue_order=[1, 2],
# density_norm = 'count', dodge = True, fill = True)
#fig.map(sns.boxplot, selected_metric, "model")#, dodge=True, hue=1, hue_order=[2, 1])

fig.set_titles(template='')
fig.set(ylabel=None)
for ax,m in zip(fig.axes[0,:], ["positive", "negative", "both"]):
ax.set_title(m, fontweight='bold', fontsize=8)
for ax,l in zip(fig.axes[:,0], ['covariates', 'connectome','full', 'residuals']):
ax.set_ylabel(l, fontweight='bold', fontsize=8, rotation=0, ha='right', va='center')
fig.set(yticks=[])

fig.fig.subplots_adjust(hspace=0.2, wspace=0.15)
if selected_metric == 'pearson_score' or selected_metric == 'spearman_score':
for ax in fig.axes.ravel():
ax.set_xlim(-0.5, 1)
fig.map(plt.axvline, x=0, color='black', linewidth=0.5, zorder=-1)
"""
cmap = sns.color_palette("muted", n_colors=4)
# Dropdown for metric selection
Expand Down Expand Up @@ -35,9 +73,9 @@ def bar_plot(df, selected_metric, results_folder):
plt.tight_layout()
if selected_metric == 'pearson_score' or selected_metric == 'spearman_score':
axes[1].set_ylim(-0.5, 1)

"""
plot_name = os.path.join(results_folder, "plots", 'point_plot.png')
sns.despine(offset=1, trim=True)
#sns.despine(offset=1, trim=True)
fig.savefig(plot_name, dpi=300)
return plot_name, fig

Expand Down
8 changes: 7 additions & 1 deletion cpm/reporting/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,14 @@ def main():
#st.session_state['results_directory'] = '/spm-data/vault-data3/mmll/projects/cpm_macs_example/haushaltsnetto'
#st.session_state['results_directory'] = '/spm-data/vault-data3/mmll/projects/cpm_macs_example/haushaltsnetto_partial'
#st.session_state['results_directory'] = '/spm-data/vault-data3/mmll/projects/cpm_macs_example/haushaltsnetto_05_fdr'
#st.session_state[
# 'results_directory'] = '/home/nwinter/PycharmProjects/cpm_python/examples/tmp/simulated_data_no_no_link/'
#st.session_state[
# 'results_directory'] = '/home/nwinter/PycharmProjects/cpm_python/examples/tmp/macs_fsozu_partial/'
#st.session_state[
# 'results_directory'] = '/home/nwinter/PycharmProjects/cpm_python/examples/tmp/macs_age_partial/'
st.session_state[
'results_directory'] = '/home/nwinter/PycharmProjects/cpm_python/examples/tmp/simulated_data_no_no_link/'
'results_directory'] = '/home/nwinter/PycharmProjects/cpm_python/examples/tmp/macs_IQ_partial/'

st.session_state['df'] = pd.read_csv(os.path.join(st.session_state.results_directory, 'cv_results.csv'))
st.session_state['df_main'] = load_results_from_folder(st.session_state.results_directory, 'cv_results_mean_std.csv')
Expand Down
2 changes: 1 addition & 1 deletion cpm/reporting/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@

with st.container(border=False):
#st.plotly_chart(fig, use_container_width=True)
st.image(plot_name)
st.image(plot_name, width=600)
8 changes: 4 additions & 4 deletions cpm/simulate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,15 @@ def simulate_regression_data_2(n_samples: int = 500,
y = np.copy(y_rand)

if link_type == 'no_link':
z = 1 * y_rand + z_rand
z = 0.4 * y_rand + z_rand
X[:, :n_informative_features] = x_rand[:, :n_informative_features] + z
elif link_type == 'no_no_link':
z = 0.8 * y_rand + z_rand
z = 0.4 * y_rand + z_rand
elif link_type == 'direct_link':
z = 0.8 * y_rand + z_rand
z = 0.4 * y_rand + z_rand
X[:, :n_informative_features] = x_rand[:, :n_informative_features] + (0.2 * (noise * y_rand) + y_rand) + z
elif link_type == 'weak_link':
z = 0.3 * y_rand + z_rand
z = 0.4 * y_rand + z_rand
X[:, :n_informative_features] = x_rand[:, :n_informative_features] + (0.8 * (noise * y_rand) + y_rand) + z
else:
raise NotImplemented(f'Link type {link_type} not implemented')
Expand Down
31 changes: 17 additions & 14 deletions examples/example_simulated_data2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,28 @@
from cpm.edge_selection import PThreshold, UnivariateEdgeSelection


link_types = [#'no_link',
#'no_no_link',
link_types = ['no_link',
'no_no_link',
'direct_link',
'weak_link'
]
edge_statistics = ['pearson', 'pearson_partial']
results_folder = '/spm-data/vault-data3/mmll/projects/confound_corrected_cpm/results'

for link in link_types:
X, y, covariates = simulate_regression_data_2(n_features=1225, n_informative_features=50, link_type=link)
for edge_statistic in edge_statistics:
X, y, covariates = simulate_regression_data_2(n_features=1225, n_informative_features=50, link_type=link)

univariate_edge_selection = UnivariateEdgeSelection(edge_statistic=['pearson'],
edge_selection=[PThreshold(threshold=[0.05],
correction=[None])])
cpm = CPMRegression(results_directory=f'./tmp/simulated_data_{link}',
cv=RepeatedKFold(n_splits=10, n_repeats=5, random_state=42),
edge_selection=univariate_edge_selection,
#cv_edge_selection=ShuffleSplit(n_splits=1, test_size=0.2, random_state=42),
add_edge_filter=True,
n_permutations=2)
cpm.estimate(X=X, y=y, covariates=covariates)
univariate_edge_selection = UnivariateEdgeSelection(edge_statistic=[edge_statistic],
edge_selection=[PThreshold(threshold=[0.05],
correction=['fdr_by'])])
cpm = CPMRegression(results_directory=f'{results_folder}/simulated_data_{link}_{edge_statistic}',
cv=RepeatedKFold(n_splits=10, n_repeats=5, random_state=42),
edge_selection=univariate_edge_selection,
#cv_edge_selection=ShuffleSplit(n_splits=1, test_size=0.2, random_state=42),
add_edge_filter=True,
n_permutations=2)
cpm.estimate(X=X, y=y, covariates=covariates)

#cpm._calculate_permutation_results('./tmp/example_simulated_data2')
#cpm._calculate_permutation_results('./tmp/example_simulated_data2')

49 changes: 49 additions & 0 deletions examples/human_connectome_project.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
import pandas as pd
import numpy as np

# Define the folder containing the .txt files
folder_path = ('/spm-data/vault-data3/mmll/data/HCP/100_nodes_times_series/3T_HCP1200_MSMAll_d100_ts2')

# Initialize a list to store correlation matrices
correlation_matrices = []

# Initialize a list to store subject IDs
subject_ids = []

file_list = [f for f in os.listdir(folder_path) if f.endswith('.txt')]

# Sort the filenames by subject ID (assuming subject IDs are in the filename before .txt)
file_list.sort() # This will sort alphabetically, which works if subject IDs are formatted consistently

# Loop through all files in the specified folder
for filename in file_list:
print(filename)
# Construct the full file path
file_path = os.path.join(folder_path, filename)

# Read the text file into a DataFrame
df = pd.read_csv(file_path, sep='\s+', header=None)

# Calculate the correlation matrix
correlation_matrix = df.corr()

# Append the correlation matrix to the list
correlation_matrices.append(correlation_matrix.values)

# Extract subject ID from the filename (remove the .txt extension)
subject_id = filename[:-4] # Exclude the last 4 characters ('.txt')
subject_ids.append(subject_id)

# Convert the list of correlation matrices into a 3D numpy array
correlation_array = np.array(correlation_matrices)

# Create a DataFrame for subject IDs
subject_ids_df = pd.DataFrame(subject_ids, columns=['Subject_ID'])

# Save the numpy array and DataFrame to disk
np.save('/spm-data/vault-data3/mmll/data/HCP/100_nodes_times_series/functional_connectivity.npy', correlation_array) # Save as .npy file
subject_ids_df.to_csv('/spm-data/vault-data3/mmll/data/HCP/100_nodes_times_series/subject_ids.csv', index=False) # Save as .csv file


print()
34 changes: 24 additions & 10 deletions examples/macs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,49 @@
df = pd.read_csv('/spm-data/vault-data3/mmll/projects/cpm_macs_example/datahub/AnalysisReady/hc/DTI_fractional_anisotropy/subjects.csv',
na_values=-99)

#X = X[df['Group'] == 1]
#df = df[df['Group'] == 1]
X = X[df['Group'] <= 1]
df = df[df['Group'] <= 1]


X = X[~df['CTQ_Sum'].isna()]
df = df[~df['CTQ_Sum'].isna()]
#X = X[~df['CTQ_Sum'].isna()]
#df = df[~df['CTQ_Sum'].isna()]
X = X[~df['IQ'].isna()]
df = df[~df['IQ'].isna()]

#X = X[~df['NEOFFI_Neurotizismus'].isna()]
#df = df[~df['NEOFFI_Neurotizismus'].isna()]

#X = X[~df['FSozU_Sum'].isna()]
#df = df[~df['FSozU_Sum'].isna()]

#X = X[~df['BDI_Sum'].isna()]
#df = df[~df['BDI_Sum'].isna()]
covs = df[['Alter', 'Geschlecht', 'Site']].to_numpy()
#covs = df[['Geschlecht', 'Site']].to_numpy()
#covs = df[['Geschlecht']].to_numpy()
#y = df['BDI_Sum'].to_numpy()
y = df['CTQ_Sum'].to_numpy()
#y = df['FSozU_Sum'].to_numpy()
#y = df['Alter'].to_numpy()
y = df['IQ'].to_numpy()

#y = df['NEOFFI_Neurotizismus'].to_numpy()
#y = df['CTQ_Sum'].to_numpy()
#covs = df[['Geschlecht']].to_numpy()
#y = df['Alter'].to_numpy()


from cpm.edge_selection import PThreshold, UnivariateEdgeSelection
p_threshold = PThreshold(threshold=[0.05], correction=[None])
p_threshold = PThreshold(threshold=[0.1, 0.05, 0.01, 0.005, 0.001], correction=[None])
univariate_edge_selection = UnivariateEdgeSelection(edge_statistic=['pearson_partial'],
edge_selection=[p_threshold])

cpm = CPMRegression(results_directory='./tmp/macs_ctq',
cpm = CPMRegression(results_directory='./tmp/macs_IQ_partial',
cv=KFold(n_splits=10, shuffle=True, random_state=42),
edge_selection=univariate_edge_selection,
#cv_edge_selection=KFold(n_splits=2, shuffle=True, random_state=42),
cv_edge_selection=KFold(n_splits=10, shuffle=True, random_state=42),
add_edge_filter=True,
n_permutations=100)
n_permutations=20)
results = cpm.estimate(X=X, y=y, covariates=covs)
#print(results)
cpm._calculate_permutation_results()
#cpm._calculate_permutation_results()

44 changes: 44 additions & 0 deletions examples/monte_carlo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import numpy as np
import matplotlib.pyplot as plt


def simulate_scenario(scenario, n=1000, noise_level=0.1):
np.random.seed(42) # For reproducibility
if scenario == 1:
# Scenario 1: x and y associated, z independent
x = np.random.normal(0, 1, n)
y = 2 * x + np.random.normal(0, noise_level, n)
z = np.random.normal(0, 1, n) # Independent of x and y
elif scenario == 2:
# Scenario 2: x and y spurious due to z
z = np.random.normal(0, 1, n)
x = 2 * z + np.random.normal(0, noise_level, n)
y = -3 * z + np.random.normal(0, noise_level, n)
elif scenario == 3:
# Scenario 3: x and y associated; z partially explains it
z = np.random.normal(0, 1, n)
x = 2 * z + np.random.normal(0, noise_level, n)
y = 3 * x - 1.5 * z + np.random.normal(0, noise_level, n)
else:
raise ValueError("Invalid scenario. Choose 1, 2, or 3.")
return x, y, z


# Simulate and plot results for each scenario
for scenario in range(1, 4):
x, y, z = simulate_scenario(scenario, noise_level=2)

print(f"Scenario {scenario} correlations:")
print(f" Corr(x, y): {np.corrcoef(x, y)[0, 1]:.2f}")
print(f" Corr(x, z): {np.corrcoef(x, z)[0, 1]:.2f}")
print(f" Corr(y, z): {np.corrcoef(y, z)[0, 1]:.2f}")

plt.figure()
plt.scatter(x, y, alpha=0.5, label="x vs y")
plt.scatter(x, z, alpha=0.5, label="x vs z")
plt.scatter(y, z, alpha=0.5, label="y vs z")
plt.legend()
plt.title(f"Scenario {scenario}")
plt.xlabel("Variable")
plt.ylabel("Variable")
plt.show()
15 changes: 15 additions & 0 deletions examples/monte_carlo_tutorial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import numpy as np


assoc_strength = 0.5

x = np.random.normal(0, 1, 1000)
e = np.random.normal(0, 1, 1000)
y = assoc_strength * x + e

print(f"var({assoc_strength}*x)={np.var(assoc_strength * x)}")
print(f"var(noise)={np.var(e)}")
print(f"var(y)={np.var(y)}")

print(f"r={np.corrcoef(x, y)[0, 1]:.2f}")
print(f"Explained variance: {np.square(np.corrcoef(x, y)[0, 1]):.2f}")
71 changes: 39 additions & 32 deletions examples/plot_simulation_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,42 @@
import pandas as pd


results_folder = "/home/nwinter/PycharmProjects/cpm_python/examples/tmp/"

results = {"brain not associated with y\n brain not associated with confound\n confound associated with y": os.path.join(results_folder, "simulated_data_no_no_link"),
"brain not associated with y\n brain associated with confound\n confound associated with y": os.path.join(results_folder, "simulated_data_no_link"),
"brain weakly associated with y\n brain associated with confound\n confound associated with y": os.path.join(results_folder, "simulated_data_weak_link"),
"brain strongly associated with y\n brain associated with confound\n confound associated with y": os.path.join(results_folder, "simulated_data_direct_link"),

}

dfs = []
for link_type, folder in results.items():
df = pd.read_csv(os.path.join(folder, 'cv_results.csv'))
df['link_type'] = link_type
dfs.append(df)

concatenated_df = pd.concat(dfs, ignore_index=True)

concatenated_df = concatenated_df[concatenated_df['network'] == 'both']

concatenated_df['model'] = concatenated_df['model'].replace({"covariates": "confound only", "full": "connectome + confound",
"connectome": "connectome only"})

g = sns.FacetGrid(concatenated_df, col="link_type", margin_titles=True, despine=True,
height=2.5)
g.map(plt.axvline, x=0, color='grey', linewidth=0.5, zorder=-1)
g.map(sns.violinplot, "pearson_score", "model", inner=None, split=True, hue=1, hue_order=[1, 2],
density_norm='count', dodge=True, palette="Blues_r")
g.map(sns.boxplot, "pearson_score", "model", dodge=True, hue=1, hue_order=[2, 1])
g.set_titles(col_template="{col_name}", size=7)
g.set_xlabels("Pearson correlation", size=8)
plt.show()
print()
results_folder = "/spm-data/vault-data3/mmll/projects/confound_corrected_cpm/results"

edge_statistics = ['pearson', 'pearson_partial']


for edge_statistic in edge_statistics:
results = {"brain not associated with y\n brain not associated with confound\n confound associated with y": os.path.join(
results_folder, f"simulated_data_no_no_link_{edge_statistic}"),
"brain not associated with y\n brain associated with confound\n confound associated with y": os.path.join(
results_folder, f"simulated_data_no_link_{edge_statistic}"),
"brain weakly associated with y\n brain associated with confound\n confound associated with y": os.path.join(
results_folder, f"simulated_data_weak_link_{edge_statistic}"),
"brain strongly associated with y\n brain associated with confound\n confound associated with y": os.path.join(
results_folder, f"simulated_data_direct_link_{edge_statistic}")}
dfs = []
for link_type, folder in results.items():
df = pd.read_csv(os.path.join(folder, 'cv_results.csv'))
df['link_type'] = link_type
dfs.append(df)

concatenated_df = pd.concat(dfs, ignore_index=True)

concatenated_df = concatenated_df[concatenated_df['network'] == 'both']

concatenated_df['model'] = concatenated_df['model'].replace({"covariates": "confound only", "full": "connectome + confound",
"connectome": "connectome only"})


g = sns.FacetGrid(concatenated_df, col="link_type", margin_titles=True, despine=True,
height=2.5, hue="model")
g.map(plt.axvline, x=0, color='grey', linewidth=0.5, zorder=-1)
g.map(sns.violinplot, "pearson_score", "model", inner=None, split=True,# hue="model",# hue_order=[1, 2],
density_norm='count', dodge=True, palette="Blues_r", fill=True)
#g.map(sns.boxplot, "pearson_score", "model", dodge=True, #hue=1, hue_order=[2, 1]
# )
g.set_titles(col_template="{col_name}", size=7)
g.set_xlabels("Pearson correlation", size=8)
plt.show()
print()
Loading

0 comments on commit 547da24

Please sign in to comment.