-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Full model is constructed now *on the fly*
This change makes the binary file `dat/full.model.RDS` obsolete. Finally... `gapseq test` now also checks if full model can be constructed on the local machine.
- Loading branch information
Showing
9 changed files
with
312 additions
and
92 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,89 +1,208 @@ | ||
construct_full_model <- function(script.path) { | ||
require(data.table) | ||
require(stringr) | ||
require(sybil) | ||
source(paste0(script.dir, "/add_missing_exRxns.R")) | ||
construct_full_model <- function(script.dir) { | ||
suppressMessages(require(data.table)) | ||
suppressMessages(require(stringr)) | ||
suppressMessages(require(sybil)) | ||
options(warn=1) | ||
|
||
mseed <- fread(paste0(script.dir, "/../dat/seed_reactions_corrected.tsv"), header=T, stringsAsFactors = F) | ||
mseed <- mseed[gapseq.status %in% c("approved","corrected")] | ||
mseed <- mseed[order(id)] | ||
# compartment ids | ||
compIDs <- c("c0","e0","p0") | ||
|
||
mod <- modelorg(name = "Full Dummy model with all approved/corrected ModelSEED reactions",id = "dummy") | ||
mod@mod_desc <- "Full Dummy model" | ||
for(i in (1:nrow(mseed))) { | ||
cat("\r",i,"/",nrow(mseed)," (",mseed[i,id],")") | ||
rxn.info <- str_split(unlist(str_split(string = mseed[i,stoichiometry],pattern = ";")), pattern = ":", simplify = T) | ||
|
||
met.comp <- rxn.info[,3] | ||
met.comp.n <- ifelse(met.comp==0,"c0","e0") | ||
met.comp.n <- ifelse(met.comp>=2,"p0",met.comp.n) | ||
|
||
met.ids <- paste0(rxn.info[,2],"[",met.comp.n,"]") | ||
met.scoef <- as.numeric(rxn.info[,1]) | ||
met.name <- str_replace_all(rxn.info[,5],"\\\"","") | ||
|
||
met.name <- paste0(met.name,"-",met.comp.n) | ||
|
||
ind <- which(met.name=="" | is.na(met.name)) | ||
met.name[ind] <- met.ids[ind] | ||
|
||
is.rev <- ifelse(mseed[i,reversibility] %in% c("<","="),T,F) | ||
only.backwards <- ifelse(mseed[i,reversibility]=="<",T,F) | ||
|
||
ind.new.mets <- which(met.ids %in% mod@met_id) | ||
ind.old.mets <- which(mod@met_id %in% met.ids[ind.new.mets]) | ||
# Load database | ||
db_rxns <- fread(paste0(script.dir, "/../dat/seed_reactions_corrected.tsv"), | ||
header=T, stringsAsFactors = F) | ||
db_rxns <- db_rxns[gapseq.status %in% c("approved","corrected")] | ||
db_rxns <- db_rxns[order(id)] | ||
db_mets <- fread(paste0(script.dir,"/../dat/seed_metabolites_edited.tsv")) | ||
|
||
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # | ||
# Constuct stoichiometrix matrix S # | ||
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # | ||
|
||
|
||
# parse stoichiometry | ||
rxninfos <- lapply(db_rxns$stoichiometry, function(x) { | ||
tmp <- str_split(unlist(str_split(string = x,pattern = ";")), | ||
pattern = ":", simplify = T) | ||
|
||
met.name[ind.new.mets] <- mod@met_name[ind.old.mets] | ||
# sometimes a ":" occurs in the metabolite name (e.g. "OPC-8:0), which cause | ||
# false separation into additional columns. Here: re-concat to single name | ||
# column | ||
if(ncol(tmp) > 5) { | ||
tmp[,5] <- apply(tmp[,5:ncol(tmp)], 1, function(y) paste(y[y != ""], | ||
collapse = ":")) | ||
tmp <- tmp[,1:5] | ||
} | ||
|
||
mod <- addReact(model = mod, | ||
id = paste0(mseed[i,id],"_c0"), | ||
met = met.ids, | ||
Scoef = met.scoef, | ||
reversible = is.rev, | ||
metComp = as.integer(met.comp)+1, | ||
ub = ifelse(only.backwards, 0, 1000), | ||
lb = ifelse(is.rev, -1000, 0), | ||
reactName = mseed[i, name], | ||
metName = met.name) | ||
} | ||
cat("\n") | ||
mod <- add_missing_exchanges(mod) | ||
saveRDS(mod,file = paste0(script.dir, "/../dat/full.model.RDS"), version=2) | ||
tmp | ||
}) | ||
|
||
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # | ||
# Identification of potential nutrients # | ||
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ # | ||
cat("Identifying potential nutrients...") | ||
fmod <- mod | ||
fmod@lowbnd[grep("^EX_cpd", fmod@react_id)] <- -1000 | ||
ex_mets <- fmod@met_id[grep("\\[e0\\]", fmod@met_id)] | ||
# add reaction ID | ||
rxninfos2 <- lapply(1:length(rxninfos), function(i) { | ||
tmp <- rxninfos[[i]] | ||
tmp <- cbind(tmp, | ||
rep(db_rxns$id[i], nrow(tmp))) | ||
return(tmp) | ||
}) | ||
|
||
de_mets <- deadEndMetabolites(fmod) | ||
de_mets <- de_mets$dem | ||
# make an easy to handle data.table | ||
rxninfos <- do.call(rbind, rxninfos2) | ||
rxninfos <- data.table(rxninfos) | ||
rm(rxninfos2) | ||
setnames(rxninfos, c("scoeff","cpd","comp","xyz","cpd.name","rxn.id")) | ||
|
||
ex_mets_in <- ex_mets[!(ex_mets %in% de_mets)] # removing dead end metabolites | ||
ex_mets_in <- gsub("\\[e0\\]","",ex_mets_in) | ||
# correct some Ids and data types | ||
rxninfos$comp.name <- compIDs[as.integer(rxninfos$comp) + 1L] | ||
rxninfos[, cpd.id := paste0(cpd,"[",comp.name,"]")] | ||
rxninfos[, rxn.id := paste0(rxn.id, "_c0")] | ||
rxninfos[, scoeff := as.double(scoeff)] | ||
|
||
all_mets <- fread(paste0(script.dir, "/../dat/seed_metabolites_edited.tsv")) | ||
cpdinfos <- rxninfos[, .(cpd, cpd.id, cpd.name, comp)] | ||
cpdinfos <- cpdinfos[!duplicated(cpd.id)] | ||
cpdinfos[, cpd.name := gsub("^\"|\"$","", cpd.name)] | ||
cpdinfos[, cpd.name := paste0(cpd.name, "-", compIDs[as.integer(comp) + 1L])] | ||
tmp_id <- match(cpdinfos$cpd, db_mets$id) | ||
cpdinfos$charge <- db_mets[tmp_id, charge] | ||
cpdinfos$chemical.formula <- db_mets[tmp_id, formula] | ||
|
||
nutr_dt <- all_mets[id %in% ex_mets_in, .(id, name, formula, charge, MNX_ID)] | ||
nutr_dt <- nutr_dt[formula != "null"] | ||
fwrite(nutr_dt, | ||
paste0(script.dir, "/../dat/nutrients.tsv"), | ||
sep = "\t") | ||
cat("\n") | ||
# table for exchange reactions | ||
exrxns <- copy(cpdinfos[comp == "1"]) | ||
exrxns[, exid := paste0("EX_", cpd, "_e0")] | ||
exrxns[, rxnIndex := (1:.N) + nrow(db_rxns)] | ||
exrxns[, metIndex := match(cpd.id, cpdinfos$cpd.id)] | ||
exrxns[, exName := paste(cpd.name, "Exchange")] | ||
|
||
} | ||
# construct modelorg object | ||
mod <- modelorg(id = "dummy", name = "Full Dummy model with all approved/corrected ModelSEED reactions") | ||
mod@mod_desc <- "Full Dummy model" | ||
mod@mod_compart <- compIDs | ||
|
||
# compounds | ||
mod@met_id <- cpdinfos$cpd.id | ||
mod@met_name <- cpdinfos$cpd.name | ||
mod@met_comp <- as.integer(cpdinfos$comp) + 1L | ||
mod@met_num <- length(mod@met_id) | ||
mod@met_attr <- data.frame(charge = cpdinfos$charge, | ||
chemicalFormula = cpdinfos$chemical.formula) | ||
mod@met_single <- rep(NA, mod@met_num) | ||
mod@met_de <- rep(NA, mod@met_num) | ||
|
||
# reactions | ||
mod@react_id <- c(paste0(db_rxns$id, "_c0"), exrxns$exid) | ||
mod@react_name <- c(db_rxns$name, exrxns$exName) | ||
mod@react_de <- mod@react_single <- rep(NA, length(mod@react_id)) | ||
mod@react_num <- length(mod@react_id) | ||
mod@react_rev <- rep(TRUE, length(mod@react_id)) | ||
mod@lowbnd <- rep(-SYBIL_SETTINGS("MAXIMUM"), mod@react_num) | ||
mod@uppbnd <- rep(SYBIL_SETTINGS("MAXIMUM"), mod@react_num) | ||
mod@obj_coef <- rep(0, length(mod@react_id)) | ||
mod@uppbnd[db_rxns[,which(reversibility == "<")]] <- 0 | ||
mod@lowbnd[db_rxns[,which(reversibility == ">")]] <- 0 | ||
mod@lowbnd[exrxns$rxnIndex] <- 0 | ||
|
||
ind_mat_rxns <- cbind(match(rxninfos$cpd.id, mod@met_id), | ||
match(rxninfos$rxn.id, mod@react_id), | ||
rxninfos$scoeff) | ||
ind_mat_exch <- cbind(exrxns$metIndex, exrxns$rxnIndex, rep(-1, nrow(exrxns))) | ||
ind_mat <- rbind(ind_mat_rxns, ind_mat_exch) | ||
|
||
# S | ||
mod@S <- Matrix(0, nrow = mod@met_num, ncol = mod@react_num) | ||
mod@S[ind_mat[,1:2]] <- ind_mat[,3] | ||
|
||
# rxnGeneMat adn subsystem matrix | ||
mod@rxnGeneMat <- Matrix(nrow=mod@react_num, ncol = 0) | ||
mod@subSys <- Matrix(nrow=mod@react_num, ncol = 0) | ||
|
||
return(mod) | ||
} | ||
|
||
# get current script path | ||
if (!is.na(Sys.getenv("RSTUDIO", unset = NA))) { | ||
# RStudio specific code | ||
script.dir <- dirname(rstudioapi::getSourceEditorContext()$path) | ||
} else{ | ||
initial.options <- commandArgs(trailingOnly = FALSE) | ||
script.name <- sub("--file=", "", initial.options[grep("--file=", initial.options)]) | ||
script.dir <- dirname(script.name) | ||
# TODO: Add ec info and mass balance of reactions in futile cycles | ||
futile_cycle_test <- function(script.dir, env = "") { | ||
require(sybil) | ||
require(data.table) | ||
source(paste0(script.dir, "/print_reaction.R")) | ||
if("cplexAPI" %in% installed.packages()) { | ||
sybil::SYBIL_SETTINGS("SOLVER","cplexAPI") | ||
#sybil::SYBIL_SETTINGS("METHOD", "hybbaropt") | ||
} | ||
|
||
# parse environment string | ||
env <- unlist(str_split(env, ",")) | ||
|
||
# repair path if required | ||
script.dir <- gsub("\\/$", "", script.dir) # removes tailing "/" if there | ||
meta.seed <- construct_full_model(script.dir) | ||
|
||
|
||
# adjust environment if needed | ||
if("highH2" %in% env) { | ||
env_dt <- fread(paste0(script.dir, "/../dat/env/env_highH2.tsv"), header = F) | ||
env_dt[, V1 := paste0(V1,"_c0")] | ||
env_dt <- env_dt[V1 %in% meta.seed@react_id] | ||
for(i in 1:nrow(env_dt)) { | ||
if(env_dt[i, V2] == ">") | ||
meta.seed <- changeBounds(meta.seed, react = env_dt[i, V1], | ||
lb = 0, | ||
ub = sybil::SYBIL_SETTINGS("MAXIMUM")) | ||
if(env_dt[i, V2] == "<") | ||
meta.seed <- changeBounds(meta.seed, react = env_dt[i, V1], | ||
lb = -sybil::SYBIL_SETTINGS("MAXIMUM"), | ||
ub = 0) | ||
if(env_dt[i, V2] == "=") | ||
meta.seed <- changeBounds(meta.seed, react = env_dt[i, V1], | ||
lb = -sybil::SYBIL_SETTINGS("MAXIMUM"), | ||
ub = sybil::SYBIL_SETTINGS("MAXIMUM")) | ||
} | ||
} | ||
|
||
meta.seed@obj_coef[which(meta.seed@react_id=="rxn00062_c0")] <- 1 | ||
meta.seed <- changeBounds(meta.seed, "rxn05759", lb = -1000) # Uncomment to ad-hoc include a futile cycle | ||
sol <- optimizeProb(meta.seed, algorithm="mtf") | ||
|
||
n <- meta.seed@react_num | ||
|
||
# save fluxes | ||
dt <- data.table(id = meta.seed@react_id, | ||
name = meta.seed@react_name, | ||
flux = sol@fluxdist@fluxes[1:n] | ||
) | ||
dt <- dt[abs(flux) > 0.001] | ||
if(nrow(dt) == 0) { | ||
return("furile cycle free - Hooray!") | ||
} | ||
|
||
dt[, id.backup := id] | ||
dt[grepl("^rxn", id), id := gsub("_.0$","", id)] | ||
|
||
# get balances for reactions | ||
#dt[, ec := ""] | ||
dt[, CBal := ""] | ||
#dt[, MBal := ""] | ||
|
||
for(i in 1:nrow(dt)) { | ||
if(grepl("^rxn",dt[i, id])) { | ||
ind.tmp <- which(meta.seed@react_id == dt[i, id.backup]) | ||
ind.mets <- which(abs(meta.seed@S[,ind.tmp]) > 0) | ||
dt$CBal[i] <- sum(meta.seed@S[ind.mets,ind.tmp] * meta.seed@met_attr$charge[ind.mets]) | ||
#dt[i, MBal := getMassBalance(db, id.tmp)] | ||
|
||
#ec.tmp <- paste(db@rxn[[1]]@ec, collapse = ", ") | ||
#dt[i, ec == ec.tmp] | ||
} | ||
} | ||
|
||
# Get reaction table | ||
gs.rxn <- fread(paste0(script.dir,"/../dat/seed_reactions_corrected.tsv")) | ||
|
||
dt <- merge(dt, gs.rxn[, .(id, gapseq.status)], by= "id") | ||
dt$equation <- print_reaction(meta.seed, react = dt$id.backup) | ||
dt$definition <- print_reaction(meta.seed, react = dt$id.backup, | ||
use.ids = TRUE) | ||
|
||
dt[, id.backup := NULL] | ||
dt[] | ||
|
||
fwrite(dt, paste0(script.dir,"/../Futile_cycle.csv")) | ||
|
||
return(paste0("At least one futile cycles found. See file: Futile_cycle.csv")) | ||
} | ||
|
||
construct_full_model(script.path = script.dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.