From a802bc36eef627c6ddd0468be7cf276443696632 Mon Sep 17 00:00:00 2001 From: TheSinnerAR Date: Wed, 17 Jul 2024 22:23:43 -0300 Subject: [PATCH 1/4] Add Bisecting K-Means Cluster Algorithm in Feature Selection (ClusteringAlgorithm in models.py) - [src/feature_selection/models.py] - [src/feature_selection/fs_models.py] - [src/feature_selection/views.py] - [src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgotithmLable.tsx] - [src/frontend/static/frontend/src/components/biomarkers/types.ts] - [src/frontend/static/frontend/src/components/biomarkers/utils.ts] --- src/feature_selection/fs_algorithms_spark.py | 2 ++ src/feature_selection/fs_models.py | 6 ++++-- src/feature_selection/models.py | 1 + src/feature_selection/views.py | 8 +++++++- .../biomarkers/labels/ClusteringAlgorithmLabel.tsx | 4 ++++ .../static/frontend/src/components/biomarkers/types.ts | 3 ++- .../static/frontend/src/components/biomarkers/utils.ts | 3 ++- 7 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/feature_selection/fs_algorithms_spark.py b/src/feature_selection/fs_algorithms_spark.py index bc2125bb..fcec625d 100644 --- a/src/feature_selection/fs_algorithms_spark.py +++ b/src/feature_selection/fs_algorithms_spark.py @@ -32,6 +32,8 @@ def __get_clustering_algorithm_value(cluster_algorithm: ClusteringAlgorithm) -> """Gets the corresponding string value for the parameter 'clustering-algorithm' of the EMR integration.""" if cluster_algorithm == ClusteringAlgorithm.SPECTRAL: return 'spectral' + if cluster_algorithm == ClusteringAlgorithm.BK_MEANS: + return 'bk_means' return 'k_means' # Default is kmeans diff --git a/src/feature_selection/fs_models.py b/src/feature_selection/fs_models.py index 4570b80f..a8de8e1d 100644 --- a/src/feature_selection/fs_models.py +++ b/src/feature_selection/fs_models.py @@ -1,6 +1,6 @@ from typing import Literal, Union, Optional from django.conf import settings -from sklearn.cluster import KMeans, SpectralClustering +from sklearn.cluster import KMeans, SpectralClustering, BisectingKMeans from sksurv.ensemble import RandomSurvivalForest from sksurv.svm import FastKernelSurvivalSVM from .models import ClusteringAlgorithm @@ -12,7 +12,7 @@ SVMOptimizerOptions = Literal["avltree", "rbtree"] # Available models for clustering -ClusteringModels = Union[KMeans, SpectralClustering] +ClusteringModels = Union[KMeans, SpectralClustering, BisectingKMeans] def get_clustering_model(clustering_algorithm: ClusteringAlgorithm, @@ -28,6 +28,8 @@ def get_clustering_model(clustering_algorithm: ClusteringAlgorithm, return KMeans(n_clusters=number_of_clusters, random_state=random_state, n_init='auto') elif clustering_algorithm == ClusteringAlgorithm.SPECTRAL: return SpectralClustering(n_clusters=number_of_clusters, random_state=random_state) + elif clustering_algorithm == ClusteringAlgorithm.BK_MEANS: + return BisectingKMeans(n_clusters=number_of_clusters, random_state=random_state) raise Exception(f'Invalid clustering_algorithm parameter: {clustering_algorithm}') diff --git a/src/feature_selection/models.py b/src/feature_selection/models.py index e6c921fc..d8f164ff 100644 --- a/src/feature_selection/models.py +++ b/src/feature_selection/models.py @@ -31,6 +31,7 @@ class ClusteringAlgorithm(models.IntegerChoices): """Clustering algorithm.""" K_MEANS = 1 SPECTRAL = 2 # TODO: implement in backend + BK_MEANS = 3 class ClusteringMetric(models.IntegerChoices): diff --git a/src/feature_selection/views.py b/src/feature_selection/views.py index 7565b171..c6a52903 100644 --- a/src/feature_selection/views.py +++ b/src/feature_selection/views.py @@ -210,7 +210,13 @@ def __get_clustering_parameters_columns(row: pd.Series) -> Tuple[int, Clustering parameters_desc = row['parameters'] params = parameters_desc.split('_') number_of_clusters, algorithm_description, scoring_method = params[0], params[2], params[4] - algorithm = ClusteringAlgorithm.K_MEANS if algorithm_description == 'k-means' else ClusteringAlgorithm.SPECTRAL + # algorithm = ClusteringAlgorithm.K_MEANS if algorithm_description == 'k-means' else ClusteringAlgorithm.SPECTRAL + if algorithm_description == 'k-means': + algorithm = ClusteringAlgorithm.K_MEANS + elif algorithm_description == 'spectral': + algorithm = ClusteringAlgorithm.SPECTRAL + else: + algorithm = ClusteringAlgorithm.BK_MEANS scoring = ClusteringScoringMethod.C_INDEX if scoring_method == 'concordance-index' \ else ClusteringScoringMethod.LOG_LIKELIHOOD return number_of_clusters, algorithm, scoring diff --git a/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx b/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx index 79c41539..5535b83a 100644 --- a/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx +++ b/src/frontend/static/frontend/src/components/biomarkers/labels/ClusteringAlgorithmLabel.tsx @@ -28,6 +28,10 @@ export const ClusteringAlgorithmLabel = (props: ClusteringAlgorithmLabelProps) = color = 'blue' description = 'Spectral' break + case ClusteringAlgorithm.BK_MEANS: + color = 'blue' + description = 'Bisecting KMeans' + break default: color = 'blue' description = '' diff --git a/src/frontend/static/frontend/src/components/biomarkers/types.ts b/src/frontend/static/frontend/src/components/biomarkers/types.ts index 67da6130..8a5c5ff3 100644 --- a/src/frontend/static/frontend/src/components/biomarkers/types.ts +++ b/src/frontend/static/frontend/src/components/biomarkers/types.ts @@ -208,7 +208,8 @@ enum FitnessFunction { /** Clustering algorithm. */ enum ClusteringAlgorithm { K_MEANS = 1, - SPECTRAL = 2 + SPECTRAL = 2, + BK_MEANS = 3 } /** Clustering metric to optimize. */ diff --git a/src/frontend/static/frontend/src/components/biomarkers/utils.ts b/src/frontend/static/frontend/src/components/biomarkers/utils.ts index 971cd35a..a51237af 100644 --- a/src/frontend/static/frontend/src/components/biomarkers/utils.ts +++ b/src/frontend/static/frontend/src/components/biomarkers/utils.ts @@ -36,7 +36,8 @@ const SVMKernelOptions: DropdownItemProps[] = [ /** Available options for a Clustering algorithm. */ const clusteringAlgorithmOptions: DropdownItemProps[] = [ { key: ClusteringAlgorithm.K_MEANS, text: 'K-Means', value: ClusteringAlgorithm.K_MEANS }, - { key: ClusteringAlgorithm.SPECTRAL, text: 'Spectral', value: ClusteringAlgorithm.SPECTRAL } + { key: ClusteringAlgorithm.SPECTRAL, text: 'Spectral', value: ClusteringAlgorithm.SPECTRAL }, + { key: ClusteringAlgorithm.BK_MEANS, text: 'BK-Means', value: ClusteringAlgorithm.BK_MEANS } ] /** Available options for a Clustering metric to optimize. */ From 5ae8d08d5c0865691f99289164c35a1ca170d36a Mon Sep 17 00:00:00 2001 From: Hernan Date: Fri, 11 Oct 2024 18:58:18 -0300 Subject: [PATCH 2/4] Fix External API Service --- src/api_service/mrna_service.py | 14 ++++++++++++-- src/multiomics_intermediate/settings.py | 6 ++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/api_service/mrna_service.py b/src/api_service/mrna_service.py index 16a59f3d..95280624 100644 --- a/src/api_service/mrna_service.py +++ b/src/api_service/mrna_service.py @@ -13,10 +13,20 @@ class MRNAService(object): def __init__(self): modulector_settings = settings.MODULECTOR_SETTINGS - self.url_modulector_prefix = f"http://{modulector_settings['host']}:{modulector_settings['port']}" + if modulector_settings['protocol'] == 'http' and modulector_settings['port'] == 80: + self.url_modulector_prefix = f"{modulector_settings['protocol']}://{modulector_settings['host']}" + elif modulector_settings['protocol'] == 'https' and modulector_settings['port'] == 443: + self.url_modulector_prefix = f"{modulector_settings['protocol']}://{modulector_settings['host']}" + else: + self.url_modulector_prefix = f"{modulector_settings['protocol']}://{modulector_settings['host']}:{modulector_settings['port']}" bioapi_settings = settings.BIOAPI_SETTINGS - self.url_bioapi_prefix = f"http://{bioapi_settings['host']}:{bioapi_settings['port']}" + if bioapi_settings['protocol'] == 'http' and bioapi_settings['port'] == 80: + self.url_bioapi_prefix = f"{bioapi_settings['protocol']}://{bioapi_settings['host']}" + elif bioapi_settings['protocol'] == 'https' and bioapi_settings['port'] == 443: + self.url_bioapi_prefix = f"{bioapi_settings['protocol']}://{bioapi_settings['host']}" + else: + self.url_bioapi_prefix = f"{bioapi_settings['protocol']}://{bioapi_settings['host']}:{bioapi_settings['port']}" @staticmethod def __generate_rest_query_params(get_request: QueryDict) -> str: diff --git a/src/multiomics_intermediate/settings.py b/src/multiomics_intermediate/settings.py index beaf5106..e2477aa4 100644 --- a/src/multiomics_intermediate/settings.py +++ b/src/multiomics_intermediate/settings.py @@ -300,13 +300,15 @@ # Modulector settings MODULECTOR_SETTINGS = { 'host': os.getenv('MODULECTOR_HOST', '127.0.0.1'), - 'port': os.getenv('MODULECTOR_PORT', '8001') + 'port': os.getenv('MODULECTOR_PORT', '8001'), + 'protocol': os.getenv('BIOAPI_PROTOCOL', 'http') } # BioAPI settings BIOAPI_SETTINGS = { 'host': os.getenv('BIOAPI_HOST', '127.0.0.1'), - 'port': os.getenv('BIOAPI_PORT', '8002') + 'port': os.getenv('BIOAPI_PORT', '8002'), + 'protocol': os.getenv('BIOAPI_PROTOCOL', 'http') } # Multiomix-aws-emr From 6d2734f1a3398ae77e81fc422860cf957e58f2f0 Mon Sep 17 00:00:00 2001 From: Hernan Date: Wed, 23 Oct 2024 13:09:16 -0300 Subject: [PATCH 3/4] Hotfix consumo de APIs externas y agregado de migrations --- .../0061_alter_experiment_shared_users.py | 20 +++++++++++++ src/api_service/mrna_service.py | 30 ++++++++++++------- ...clusteringparameters_algorithm_and_more.py | 23 ++++++++++++++ src/multiomics_intermediate/settings.py | 4 +-- 4 files changed, 64 insertions(+), 13 deletions(-) create mode 100644 src/api_service/migrations/0061_alter_experiment_shared_users.py create mode 100644 src/feature_selection/migrations/0056_alter_clusteringparameters_algorithm_and_more.py diff --git a/src/api_service/migrations/0061_alter_experiment_shared_users.py b/src/api_service/migrations/0061_alter_experiment_shared_users.py new file mode 100644 index 00000000..b7cfa5af --- /dev/null +++ b/src/api_service/migrations/0061_alter_experiment_shared_users.py @@ -0,0 +1,20 @@ +# Generated by Django 4.2.15 on 2024-10-23 13:51 + +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('api_service', '0060_experiment_shared_users'), + ] + + operations = [ + migrations.AlterField( + model_name='experiment', + name='shared_users', + field=models.ManyToManyField(blank=True, related_name='shared_users_correlation_analysis', to=settings.AUTH_USER_MODEL), + ), + ] diff --git a/src/api_service/mrna_service.py b/src/api_service/mrna_service.py index 95280624..d431f53e 100644 --- a/src/api_service/mrna_service.py +++ b/src/api_service/mrna_service.py @@ -13,20 +13,28 @@ class MRNAService(object): def __init__(self): modulector_settings = settings.MODULECTOR_SETTINGS - if modulector_settings['protocol'] == 'http' and modulector_settings['port'] == 80: - self.url_modulector_prefix = f"{modulector_settings['protocol']}://{modulector_settings['host']}" - elif modulector_settings['protocol'] == 'https' and modulector_settings['port'] == 443: - self.url_modulector_prefix = f"{modulector_settings['protocol']}://{modulector_settings['host']}" - else: - self.url_modulector_prefix = f"{modulector_settings['protocol']}://{modulector_settings['host']}:{modulector_settings['port']}" + self.url_modulector_prefix = self.__build_url(modulector_settings) bioapi_settings = settings.BIOAPI_SETTINGS - if bioapi_settings['protocol'] == 'http' and bioapi_settings['port'] == 80: - self.url_bioapi_prefix = f"{bioapi_settings['protocol']}://{bioapi_settings['host']}" - elif bioapi_settings['protocol'] == 'https' and bioapi_settings['port'] == 443: - self.url_bioapi_prefix = f"{bioapi_settings['protocol']}://{bioapi_settings['host']}" + self.url_bioapi_prefix = self.__build_url(bioapi_settings) + + @staticmethod + def __build_url(settings: Dict[str, Any]) -> str: + """ + Constructs the URL based on the settings provided. + If the port is the default for the protocol (80 for http, 443 for https), it is omitted. + Otherwise, the port is included in the URL. + @param settings: Dictionary containing protocol, host, and port information. + @return: Constructed URL as a string. + """ + protocol = settings['protocol'] + host = settings['host'] + port = settings['port'] + + if (protocol == 'http' and port == 80) or (protocol == 'https' and port == 443): + return f"{protocol}://{host}" else: - self.url_bioapi_prefix = f"{bioapi_settings['protocol']}://{bioapi_settings['host']}:{bioapi_settings['port']}" + return f"{protocol}://{host}:{port}" @staticmethod def __generate_rest_query_params(get_request: QueryDict) -> str: diff --git a/src/feature_selection/migrations/0056_alter_clusteringparameters_algorithm_and_more.py b/src/feature_selection/migrations/0056_alter_clusteringparameters_algorithm_and_more.py new file mode 100644 index 00000000..53051cd6 --- /dev/null +++ b/src/feature_selection/migrations/0056_alter_clusteringparameters_algorithm_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.15 on 2024-10-23 13:51 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('feature_selection', '0055_alter_fsexperiment_app_name_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='clusteringparameters', + name='algorithm', + field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means')], default=1), + ), + migrations.AlterField( + model_name='clusteringtimesrecord', + name='algorithm', + field=models.IntegerField(choices=[(1, 'K Means'), (2, 'Spectral'), (3, 'Bk Means')]), + ), + ] diff --git a/src/multiomics_intermediate/settings.py b/src/multiomics_intermediate/settings.py index e2477aa4..e05c7ed0 100644 --- a/src/multiomics_intermediate/settings.py +++ b/src/multiomics_intermediate/settings.py @@ -300,14 +300,14 @@ # Modulector settings MODULECTOR_SETTINGS = { 'host': os.getenv('MODULECTOR_HOST', '127.0.0.1'), - 'port': os.getenv('MODULECTOR_PORT', '8001'), + 'port': os.getenv('MODULECTOR_PORT', 8001), 'protocol': os.getenv('BIOAPI_PROTOCOL', 'http') } # BioAPI settings BIOAPI_SETTINGS = { 'host': os.getenv('BIOAPI_HOST', '127.0.0.1'), - 'port': os.getenv('BIOAPI_PORT', '8002'), + 'port': os.getenv('BIOAPI_PORT', 8002), 'protocol': os.getenv('BIOAPI_PROTOCOL', 'http') } From 7fde39a0857f26b1c372f71fc9de0f9d0705592c Mon Sep 17 00:00:00 2001 From: Hernan Date: Wed, 23 Oct 2024 13:38:46 -0300 Subject: [PATCH 4/4] Update settings.py Se configura para dejar como default el consumo de Modulector y Bioapi de manera online --- src/multiomics_intermediate/settings.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/multiomics_intermediate/settings.py b/src/multiomics_intermediate/settings.py index e05c7ed0..3718507c 100644 --- a/src/multiomics_intermediate/settings.py +++ b/src/multiomics_intermediate/settings.py @@ -299,16 +299,16 @@ # Modulector settings MODULECTOR_SETTINGS = { - 'host': os.getenv('MODULECTOR_HOST', '127.0.0.1'), - 'port': os.getenv('MODULECTOR_PORT', 8001), - 'protocol': os.getenv('BIOAPI_PROTOCOL', 'http') + 'host': os.getenv('MODULECTOR_HOST', 'modulector.multiomix.org'), + 'port': os.getenv('MODULECTOR_PORT', 443), + 'protocol': os.getenv('BIOAPI_PROTOCOL', 'https') } # BioAPI settings BIOAPI_SETTINGS = { - 'host': os.getenv('BIOAPI_HOST', '127.0.0.1'), - 'port': os.getenv('BIOAPI_PORT', 8002), - 'protocol': os.getenv('BIOAPI_PROTOCOL', 'http') + 'host': os.getenv('BIOAPI_HOST', 'bioapi.multiomix.org'), + 'port': os.getenv('BIOAPI_PORT', 443), + 'protocol': os.getenv('BIOAPI_PROTOCOL', 'https') } # Multiomix-aws-emr