Skip to content

Commit

Permalink
Full model is constructed now *on the fly*
Browse files Browse the repository at this point in the history
This change makes the binary file `dat/full.model.RDS` obsolete.
Finally...

`gapseq test` now also checks if full model can be constructed on the
local machine.
  • Loading branch information
Waschina committed Feb 6, 2022
1 parent a87b185 commit fbb651e
Show file tree
Hide file tree
Showing 9 changed files with 312 additions and 92 deletions.
Binary file removed dat/full.model.RDS
Binary file not shown.
2 changes: 1 addition & 1 deletion gapseq
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ usage()
echo " gapseq draft -r toy/ecoli-all-Reactions.tbl -t toy/ecoli-Transporter.tbl -c toy/ecoli.fna.gz -p toy/ecoli-all-Pathways.tbl"
echo " gapseq medium -m toy/ecoli-draft.RDS -p toy/ecoli-all-Pathways.tbl"
echo " gapseq fill -m toy/ecoli-draft.RDS -n dat/media/ALLmed.csv -c toy/ecoli-rxnWeights.RDS -g toy/ecoli-rxnXgenes.RDS"
echo " gapseq adapt add 14DICHLORBENZDEG-PWY toy/myb71.RDS"
echo " gapseq adapt -a 14DICHLORBENZDEG-PWY -m toy/myb71.RDS"
echo -e "\nOptions:"
echo " test Testing dependencies and basic functionality of gapseq."
echo " find Pathway analysis, try to find enzymes based on homology."
Expand Down
7 changes: 2 additions & 5 deletions src/adapt.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ library(getopt)
spec <- matrix(c(
'model', 'm', 1, "character", "GapSeq model to be extended (RDS or SBML)",
'help' , 'h', 0, "logical", "help",
'full.model', 'f', 2, "character", "(Deprecated) RDS file of the full model. Please do not use; Option has currently no effect.",
'add' , 'a', 2, "character", "reactions or pathways that should be added to the model (comma-separated)",
'remove' , 'r', 2, "character", "reactions or pathways that should be removed from the model (comma-separated)"
), ncol = 5, byrow = T)
Expand Down Expand Up @@ -51,17 +50,14 @@ if( "cplexAPI" %in% rownames(installed.packages()) ){
}

# Setting defaults if required
if ( is.null(opt$full.model ) ) { opt$full.model = paste0(script.dir,"/../dat/full.model.RDS") }
#if ( is.null(opt$output.dir) ) { opt$output.dir = "." }
#if ( is.null(opt$sbml.output) ) { opt$sbml.output = F }
if ( is.null(opt$model) ) { opt$model = paste0(script.dir,"/../toy/myb71.RDS")}

# overwrite -f Option with default (This option might be used in future for a different purpose)
opt$full.model = paste0(script.dir,"/../dat/full.model.RDS")

# Arguments:
mod.file <- opt$model
fullmod.file <- opt$full.model
ids.add <- opt$add
ids.remove <- opt$remove

Expand All @@ -72,6 +68,7 @@ sbml.export <- FALSE
source(paste0(script.dir,"/add_missing_exRxns.R"))
source(paste0(script.dir,"/addMetAttr.R"))
source(paste0(script.dir,"/addReactAttr.R"))
source(paste0(script.dir,"/construct_full_model.R"))

# database files
meta.pwy <- fread(paste0(script.dir, "/../dat/meta_pwy.tbl"))
Expand All @@ -81,7 +78,7 @@ meta.rxn <- fread(paste0(script.dir, "/../dat/meta_rea.tbl"))
seed_x_mets <- fread(paste0(script.dir,"/../dat/seed_metabolites_edited.tsv"), header=T, stringsAsFactors = F, na.strings = c("null","","NA"))

cat("Loading model files", mod.file, "\n")
mod <- readRDS(fullmod.file)
mod <- construct_full_model(script.dir)
if ( toupper(file_ext(mod.file)) == "RDS" ){
mod.orig <- readRDS(mod.file)
}else{
Expand Down
271 changes: 195 additions & 76 deletions src/construct_full_model.R
Original file line number Diff line number Diff line change
@@ -1,89 +1,208 @@
construct_full_model <- function(script.path) {
require(data.table)
require(stringr)
require(sybil)
source(paste0(script.dir, "/add_missing_exRxns.R"))
construct_full_model <- function(script.dir) {
suppressMessages(require(data.table))
suppressMessages(require(stringr))
suppressMessages(require(sybil))
options(warn=1)

mseed <- fread(paste0(script.dir, "/../dat/seed_reactions_corrected.tsv"), header=T, stringsAsFactors = F)
mseed <- mseed[gapseq.status %in% c("approved","corrected")]
mseed <- mseed[order(id)]
# compartment ids
compIDs <- c("c0","e0","p0")

mod <- modelorg(name = "Full Dummy model with all approved/corrected ModelSEED reactions",id = "dummy")
mod@mod_desc <- "Full Dummy model"
for(i in (1:nrow(mseed))) {
cat("\r",i,"/",nrow(mseed)," (",mseed[i,id],")")
rxn.info <- str_split(unlist(str_split(string = mseed[i,stoichiometry],pattern = ";")), pattern = ":", simplify = T)

met.comp <- rxn.info[,3]
met.comp.n <- ifelse(met.comp==0,"c0","e0")
met.comp.n <- ifelse(met.comp>=2,"p0",met.comp.n)

met.ids <- paste0(rxn.info[,2],"[",met.comp.n,"]")
met.scoef <- as.numeric(rxn.info[,1])
met.name <- str_replace_all(rxn.info[,5],"\\\"","")

met.name <- paste0(met.name,"-",met.comp.n)

ind <- which(met.name=="" | is.na(met.name))
met.name[ind] <- met.ids[ind]

is.rev <- ifelse(mseed[i,reversibility] %in% c("<","="),T,F)
only.backwards <- ifelse(mseed[i,reversibility]=="<",T,F)

ind.new.mets <- which(met.ids %in% mod@met_id)
ind.old.mets <- which(mod@met_id %in% met.ids[ind.new.mets])
# Load database
db_rxns <- fread(paste0(script.dir, "/../dat/seed_reactions_corrected.tsv"),
header=T, stringsAsFactors = F)
db_rxns <- db_rxns[gapseq.status %in% c("approved","corrected")]
db_rxns <- db_rxns[order(id)]
db_mets <- fread(paste0(script.dir,"/../dat/seed_metabolites_edited.tsv"))

# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #
# Constuct stoichiometrix matrix S #
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #


# parse stoichiometry
rxninfos <- lapply(db_rxns$stoichiometry, function(x) {
tmp <- str_split(unlist(str_split(string = x,pattern = ";")),
pattern = ":", simplify = T)

met.name[ind.new.mets] <- mod@met_name[ind.old.mets]
# sometimes a ":" occurs in the metabolite name (e.g. "OPC-8:0), which cause
# false separation into additional columns. Here: re-concat to single name
# column
if(ncol(tmp) > 5) {
tmp[,5] <- apply(tmp[,5:ncol(tmp)], 1, function(y) paste(y[y != ""],
collapse = ":"))
tmp <- tmp[,1:5]
}

mod <- addReact(model = mod,
id = paste0(mseed[i,id],"_c0"),
met = met.ids,
Scoef = met.scoef,
reversible = is.rev,
metComp = as.integer(met.comp)+1,
ub = ifelse(only.backwards, 0, 1000),
lb = ifelse(is.rev, -1000, 0),
reactName = mseed[i, name],
metName = met.name)
}
cat("\n")
mod <- add_missing_exchanges(mod)
saveRDS(mod,file = paste0(script.dir, "/../dat/full.model.RDS"), version=2)
tmp
})

# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #
# Identification of potential nutrients #
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #
cat("Identifying potential nutrients...")
fmod <- mod
fmod@lowbnd[grep("^EX_cpd", fmod@react_id)] <- -1000
ex_mets <- fmod@met_id[grep("\\[e0\\]", fmod@met_id)]
# add reaction ID
rxninfos2 <- lapply(1:length(rxninfos), function(i) {
tmp <- rxninfos[[i]]
tmp <- cbind(tmp,
rep(db_rxns$id[i], nrow(tmp)))
return(tmp)
})

de_mets <- deadEndMetabolites(fmod)
de_mets <- de_mets$dem
# make an easy to handle data.table
rxninfos <- do.call(rbind, rxninfos2)
rxninfos <- data.table(rxninfos)
rm(rxninfos2)
setnames(rxninfos, c("scoeff","cpd","comp","xyz","cpd.name","rxn.id"))

ex_mets_in <- ex_mets[!(ex_mets %in% de_mets)] # removing dead end metabolites
ex_mets_in <- gsub("\\[e0\\]","",ex_mets_in)
# correct some Ids and data types
rxninfos$comp.name <- compIDs[as.integer(rxninfos$comp) + 1L]
rxninfos[, cpd.id := paste0(cpd,"[",comp.name,"]")]
rxninfos[, rxn.id := paste0(rxn.id, "_c0")]
rxninfos[, scoeff := as.double(scoeff)]

all_mets <- fread(paste0(script.dir, "/../dat/seed_metabolites_edited.tsv"))
cpdinfos <- rxninfos[, .(cpd, cpd.id, cpd.name, comp)]
cpdinfos <- cpdinfos[!duplicated(cpd.id)]
cpdinfos[, cpd.name := gsub("^\"|\"$","", cpd.name)]
cpdinfos[, cpd.name := paste0(cpd.name, "-", compIDs[as.integer(comp) + 1L])]
tmp_id <- match(cpdinfos$cpd, db_mets$id)
cpdinfos$charge <- db_mets[tmp_id, charge]
cpdinfos$chemical.formula <- db_mets[tmp_id, formula]

nutr_dt <- all_mets[id %in% ex_mets_in, .(id, name, formula, charge, MNX_ID)]
nutr_dt <- nutr_dt[formula != "null"]
fwrite(nutr_dt,
paste0(script.dir, "/../dat/nutrients.tsv"),
sep = "\t")
cat("\n")
# table for exchange reactions
exrxns <- copy(cpdinfos[comp == "1"])
exrxns[, exid := paste0("EX_", cpd, "_e0")]
exrxns[, rxnIndex := (1:.N) + nrow(db_rxns)]
exrxns[, metIndex := match(cpd.id, cpdinfos$cpd.id)]
exrxns[, exName := paste(cpd.name, "Exchange")]

}
# construct modelorg object
mod <- modelorg(id = "dummy", name = "Full Dummy model with all approved/corrected ModelSEED reactions")
mod@mod_desc <- "Full Dummy model"
mod@mod_compart <- compIDs

# compounds
mod@met_id <- cpdinfos$cpd.id
mod@met_name <- cpdinfos$cpd.name
mod@met_comp <- as.integer(cpdinfos$comp) + 1L
mod@met_num <- length(mod@met_id)
mod@met_attr <- data.frame(charge = cpdinfos$charge,
chemicalFormula = cpdinfos$chemical.formula)
mod@met_single <- rep(NA, mod@met_num)
mod@met_de <- rep(NA, mod@met_num)

# reactions
mod@react_id <- c(paste0(db_rxns$id, "_c0"), exrxns$exid)
mod@react_name <- c(db_rxns$name, exrxns$exName)
mod@react_de <- mod@react_single <- rep(NA, length(mod@react_id))
mod@react_num <- length(mod@react_id)
mod@react_rev <- rep(TRUE, length(mod@react_id))
mod@lowbnd <- rep(-SYBIL_SETTINGS("MAXIMUM"), mod@react_num)
mod@uppbnd <- rep(SYBIL_SETTINGS("MAXIMUM"), mod@react_num)
mod@obj_coef <- rep(0, length(mod@react_id))
mod@uppbnd[db_rxns[,which(reversibility == "<")]] <- 0
mod@lowbnd[db_rxns[,which(reversibility == ">")]] <- 0
mod@lowbnd[exrxns$rxnIndex] <- 0

ind_mat_rxns <- cbind(match(rxninfos$cpd.id, mod@met_id),
match(rxninfos$rxn.id, mod@react_id),
rxninfos$scoeff)
ind_mat_exch <- cbind(exrxns$metIndex, exrxns$rxnIndex, rep(-1, nrow(exrxns)))
ind_mat <- rbind(ind_mat_rxns, ind_mat_exch)

# S
mod@S <- Matrix(0, nrow = mod@met_num, ncol = mod@react_num)
mod@S[ind_mat[,1:2]] <- ind_mat[,3]

# rxnGeneMat adn subsystem matrix
mod@rxnGeneMat <- Matrix(nrow=mod@react_num, ncol = 0)
mod@subSys <- Matrix(nrow=mod@react_num, ncol = 0)

return(mod)
}

# get current script path
if (!is.na(Sys.getenv("RSTUDIO", unset = NA))) {
# RStudio specific code
script.dir <- dirname(rstudioapi::getSourceEditorContext()$path)
} else{
initial.options <- commandArgs(trailingOnly = FALSE)
script.name <- sub("--file=", "", initial.options[grep("--file=", initial.options)])
script.dir <- dirname(script.name)
# TODO: Add ec info and mass balance of reactions in futile cycles
futile_cycle_test <- function(script.dir, env = "") {
require(sybil)
require(data.table)
source(paste0(script.dir, "/print_reaction.R"))
if("cplexAPI" %in% installed.packages()) {
sybil::SYBIL_SETTINGS("SOLVER","cplexAPI")
#sybil::SYBIL_SETTINGS("METHOD", "hybbaropt")
}

# parse environment string
env <- unlist(str_split(env, ","))

# repair path if required
script.dir <- gsub("\\/$", "", script.dir) # removes tailing "/" if there
meta.seed <- construct_full_model(script.dir)


# adjust environment if needed
if("highH2" %in% env) {
env_dt <- fread(paste0(script.dir, "/../dat/env/env_highH2.tsv"), header = F)
env_dt[, V1 := paste0(V1,"_c0")]
env_dt <- env_dt[V1 %in% meta.seed@react_id]
for(i in 1:nrow(env_dt)) {
if(env_dt[i, V2] == ">")
meta.seed <- changeBounds(meta.seed, react = env_dt[i, V1],
lb = 0,
ub = sybil::SYBIL_SETTINGS("MAXIMUM"))
if(env_dt[i, V2] == "<")
meta.seed <- changeBounds(meta.seed, react = env_dt[i, V1],
lb = -sybil::SYBIL_SETTINGS("MAXIMUM"),
ub = 0)
if(env_dt[i, V2] == "=")
meta.seed <- changeBounds(meta.seed, react = env_dt[i, V1],
lb = -sybil::SYBIL_SETTINGS("MAXIMUM"),
ub = sybil::SYBIL_SETTINGS("MAXIMUM"))
}
}

meta.seed@obj_coef[which(meta.seed@react_id=="rxn00062_c0")] <- 1
meta.seed <- changeBounds(meta.seed, "rxn05759", lb = -1000) # Uncomment to ad-hoc include a futile cycle
sol <- optimizeProb(meta.seed, algorithm="mtf")

n <- meta.seed@react_num

# save fluxes
dt <- data.table(id = meta.seed@react_id,
name = meta.seed@react_name,
flux = sol@fluxdist@fluxes[1:n]
)
dt <- dt[abs(flux) > 0.001]
if(nrow(dt) == 0) {
return("furile cycle free - Hooray!")
}

dt[, id.backup := id]
dt[grepl("^rxn", id), id := gsub("_.0$","", id)]

# get balances for reactions
#dt[, ec := ""]
dt[, CBal := ""]
#dt[, MBal := ""]

for(i in 1:nrow(dt)) {
if(grepl("^rxn",dt[i, id])) {
ind.tmp <- which(meta.seed@react_id == dt[i, id.backup])
ind.mets <- which(abs(meta.seed@S[,ind.tmp]) > 0)
dt$CBal[i] <- sum(meta.seed@S[ind.mets,ind.tmp] * meta.seed@met_attr$charge[ind.mets])
#dt[i, MBal := getMassBalance(db, id.tmp)]

#ec.tmp <- paste(db@rxn[[1]]@ec, collapse = ", ")
#dt[i, ec == ec.tmp]
}
}

# Get reaction table
gs.rxn <- fread(paste0(script.dir,"/../dat/seed_reactions_corrected.tsv"))

dt <- merge(dt, gs.rxn[, .(id, gapseq.status)], by= "id")
dt$equation <- print_reaction(meta.seed, react = dt$id.backup)
dt$definition <- print_reaction(meta.seed, react = dt$id.backup,
use.ids = TRUE)

dt[, id.backup := NULL]
dt[]

fwrite(dt, paste0(script.dir,"/../Futile_cycle.csv"))

return(paste0("At least one futile cycles found. See file: Futile_cycle.csv"))
}

construct_full_model(script.path = script.dir)
35 changes: 32 additions & 3 deletions src/correct_seed_rxnDB.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
# . dat/corrections_seed_reactionDB.tsv (database of corrections to the original seed database)
# . dat/importantCIreactions.lst (list of charge-unbalanced reactions that are anyway accepted for the (full)-model)
correct_seed_rxnDB <- function(script.path) {
require(data.table)
require(stringr)
#require(sybil)
suppressMessages(require(data.table))
suppressMessages(require(stringr))

cat("Correcting reaction table... ")

source(paste0(script.dir, "/generate_rxn_stoich_hash.R"))

mseed <- fread(paste0(script.dir,"/../dat/seed_reactions.tsv"), header=T, stringsAsFactors = F,
Expand Down Expand Up @@ -128,6 +130,33 @@ correct_seed_rxnDB <- function(script.path) {
mseed[gapseq.status=="corrected",equation := getRxnEquaFromStoich(stoichiometry, reversibility, str.type = "equation")]
mseed[gapseq.status=="corrected",definition := getRxnEquaFromStoich(stoichiometry, reversibility, str.type = "definition")]

cat("done\n")

# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #
# Identification of potential nutrients #
# ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ #
cat("Identifying potential nutrients... ")
source(paste0(script.path,"/construct_full_model.R"))
fmod <- construct_full_model(script.path)
fmod@lowbnd[grep("^EX_cpd", fmod@react_id)] <- -1000
ex_mets <- fmod@met_id[grep("\\[e0\\]", fmod@met_id)]

de_mets <- deadEndMetabolites(fmod)
de_mets <- de_mets$dem

ex_mets_in <- ex_mets[!(ex_mets %in% de_mets)] # removing dead end metabolites
ex_mets_in <- gsub("\\[e0\\]","",ex_mets_in)

all_mets <- fread(paste0(script.dir, "/../dat/seed_metabolites_edited.tsv"))

nutr_dt <- all_mets[id %in% ex_mets_in, .(id, name, formula, charge, MNX_ID)]
nutr_dt <- nutr_dt[formula != "null"]
fwrite(nutr_dt,
paste0(script.dir, "/../dat/nutrients.tsv"),
sep = "\t")

cat("done\n")

# Export of corrected database
mseed <- mseed[,-"gs.hash"]
fwrite(mseed, file=paste0(script.path,"/../dat/seed_reactions_corrected.tsv"), sep = "\t", quote = F)
Expand Down
Loading

0 comments on commit fbb651e

Please sign in to comment.