From 051711317dd8a98be5ca23a8beae8a64f020fdfa Mon Sep 17 00:00:00 2001 From: Henry Date: Tue, 19 Sep 2023 18:30:52 +0200 Subject: [PATCH] :sparkles: add further methods - msImpute - trKNN (from source) Add to workflow check. --- project/01_1_train_NAGuideR_methods.R | 28 ++++++++----- project/01_1_train_NAGuideR_methods.ipynb | 39 ++++++++++--------- .../single_dev_dataset/example/config.yaml | 2 + 3 files changed, 41 insertions(+), 28 deletions(-) diff --git a/project/01_1_train_NAGuideR_methods.R b/project/01_1_train_NAGuideR_methods.R index 0ef65fdc2..2b21d7e32 100644 --- a/project/01_1_train_NAGuideR_methods.R +++ b/project/01_1_train_NAGuideR_methods.R @@ -170,12 +170,14 @@ nafunctions <- function(x,method="zero"){ } else if(method=="trknn"){ source('src/R_NAGuideR/Imput_funcs.r') - sim_trKNN_wrapper <- function(data) { - result <- data %>% as.matrix %>% t %>% imputeKNN(., k=10, distance='truncation', perc=0) %>% t - return(result) - } - df1x <- sim_trKNN_wrapper(t(df1)) - df<-as.data.frame(t(df1x)) + # sim_trKNN_wrapper <- function(data) { + # result <- data %>% as.matrix %>% t %>% imputeKNN(., k=10, distance='truncation', perc=0) %>% t + # return(result) + # } + # df1x <- sim_trKNN_wrapper(t(df1)) + # df<-as.data.frame(t(df1x)) + df <- imputeKNN(as.matrix(df), k=10, distance='truncation', perc=0) + df <- as.data.frame(df) } else if(method=="rf"){ install_rpackage("missForest") @@ -212,10 +214,15 @@ nafunctions <- function(x,method="zero"){ df<-GMS.Lasso(df1,nfolds=3,log.scale=FALSE,TS.Lasso=TRUE) } + else if(method=="msimpute"){ + install_bioconductor("msImpute") + df <- msImpute(as.matrix(df), method='v2') + df <- as.data.frame(df) + } else{ stop(paste("Unspported methods so far: ", method)) } - df<-as.data.frame(df) + df <- as.data.frame(df) df } # - @@ -269,8 +276,8 @@ df original_header <- colnames( readr::read_csv(train_split, n_max=1, col_names=TRUE, skip=0) ) -original_header feat_name <- original_header[1] +original_header[1:5] # - # Uncomment to test certain methods (only for debugging, as at least one method per package is tested using Github Actions) @@ -298,7 +305,9 @@ feat_name <- original_header[1] # 'MICE-CART', # 'RF', # 'PI', - # 'GMS' # fails to install on Windows + # 'GMS', # fails to install on Windows + # 'trknn', + # 'msimpute' # ) # for (method in to_test) { @@ -316,7 +325,6 @@ pred <- tibble::as_tibble( ) names(pred) <- original_header pred - # + vscode={"languageId": "r"} pred <- reshape2::melt(pred, id.vars=feat_name) names(pred) <- c(feat_name, 'Sample ID', method) diff --git a/project/01_1_train_NAGuideR_methods.ipynb b/project/01_1_train_NAGuideR_methods.ipynb index 968614c11..41ed54001 100644 --- a/project/01_1_train_NAGuideR_methods.ipynb +++ b/project/01_1_train_NAGuideR_methods.ipynb @@ -1,7 +1,6 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", "id": "afa6aadb-bb6e-4fa2-8c91-b69d6ff9af43", "metadata": {}, @@ -52,7 +51,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "edec29ae-208a-403a-aa77-82782bccba87", "metadata": {}, @@ -61,7 +59,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "c2e51b96-2f46-42c7-a642-a94c628dec04", "metadata": {}, @@ -198,12 +195,14 @@ " }\n", " else if(method==\"trknn\"){\n", " source('src/R_NAGuideR/Imput_funcs.r')\n", - " sim_trKNN_wrapper <- function(data) {\n", - " result <- data %>% as.matrix %>% t %>% imputeKNN(., k=10, distance='truncation', perc=0) %>% t\n", - " return(result)\n", - " }\n", - " df1x <- sim_trKNN_wrapper(t(df1))\n", - " df<-as.data.frame(t(df1x))\n", + " # sim_trKNN_wrapper <- function(data) {\n", + " # result <- data %>% as.matrix %>% t %>% imputeKNN(., k=10, distance='truncation', perc=0) %>% t\n", + " # return(result)\n", + " # }\n", + " # df1x <- sim_trKNN_wrapper(t(df1))\n", + " # df<-as.data.frame(t(df1x))\n", + " df <- imputeKNN(as.matrix(df), k=10, distance='truncation', perc=0)\n", + " df <- as.data.frame(df)\n", " }\n", " else if(method==\"rf\"){\n", " install_rpackage(\"missForest\")\n", @@ -240,16 +239,20 @@ " \n", " df<-GMS.Lasso(df1,nfolds=3,log.scale=FALSE,TS.Lasso=TRUE)\n", " }\n", + " else if(method==\"msimpute\"){\n", + " install_bioconductor(\"msImpute\")\n", + " df <- msImpute(as.matrix(df), method='v2')\n", + " df <- as.data.frame(df) \n", + " }\n", " else{\n", " stop(paste(\"Unspported methods so far: \", method))\n", " }\n", - " df<-as.data.frame(df)\n", + " df <- as.data.frame(df)\n", " df\n", " }" ] }, { - "attachments": {}, "cell_type": "markdown", "id": "7152239b-fad2-4e0f-8b3e-98d943cab264", "metadata": {}, @@ -303,7 +306,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "fc7ef882-0cbd-40f7-a77f-cc87f7145171", "metadata": {}, @@ -328,7 +330,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "3ed78a0c-2716-4629-bb15-8e3fd650576a", "metadata": { @@ -354,8 +355,8 @@ "original_header <- colnames(\n", " readr::read_csv(train_split, n_max=1, col_names=TRUE, skip=0)\n", ")\n", - "original_header\n", - "feat_name <- original_header[1]" + "feat_name <- original_header[1]\n", + "original_header[1:5]" ] }, { @@ -399,7 +400,9 @@ " # 'MICE-CART',\n", " # 'RF',\n", " # 'PI',\n", - " # 'GMS' # fails to install on Windows\n", + " # 'GMS', # fails to install on Windows\n", + " # 'trknn',\n", + " # 'msimpute'\n", "# )\n", "\n", "# for (method in to_test) {\n", @@ -409,7 +412,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "id": "ff4ff1c2-192e-4a48-b5b6-d80ab989b12e", "metadata": {}, @@ -422,6 +424,7 @@ "execution_count": null, "id": "690d47c2-5666-41f2-b13f-9215334f197c", "metadata": { + "lines_to_next_cell": 0, "tags": [], "vscode": { "languageId": "r" @@ -514,7 +517,7 @@ "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", - "version": "3.6.3" + "version": "4.1.3" } }, "nbformat": 4, diff --git a/project/config/single_dev_dataset/example/config.yaml b/project/config/single_dev_dataset/example/config.yaml index 0181d36ed..4f32f8b2b 100644 --- a/project/config/single_dev_dataset/example/config.yaml +++ b/project/config/single_dev_dataset/example/config.yaml @@ -21,3 +21,5 @@ NAGuideR_methods: - MICE-NORM # mice - MLE # norm - IRM # VIM - ~9mins + - TRKNN + - MSIMPUTE