Merge pull request #7 from COBREXA/compathelper/new_version/2024-06-1…

…0-12-48-43-523-00499766618 CompatHelper: bump compat for AbstractFBCModels to 0.3, (keep existing compat)
COBREXA · Jun 13, 2024 · ea240cc · ea240cc · exaexa · Jun 13, 2024
2 parents b8f9629 + 89db32f
commit ea240cc
Show file tree

Hide file tree

Showing 9 changed files with 366 additions and 32 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
       matrix:
         version:
           - '1' # This is always the latest stable release in the 1.X series
-          #- '1.6' # LTS
+          - '1.6' # LTS
           #- 'nightly'
         os:
           - ubuntu-latest
@@ -37,6 +37,7 @@ jobs:
       - uses: julia-actions/julia-runtest@latest
         continue-on-error: ${{ matrix.version == 'nightly' }}
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v3
+      - uses: codecov/codecov-action@v4
         with:
-          file: lcov.info
+          files: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/Project.toml b/Project.toml
@@ -1,18 +1,20 @@
 name = "MATFBCModels"
 uuid = "bd2e13bf-42c0-49e1-9a9c-885d36daf88b"
 authors = ["The authors of MATFBCModels.jl"]
-version = "0.1.0"
+version = "0.2.0"
 
 [deps]
 AbstractFBCModels = "5a4f3dfa-1789-40f8-8221-69268c29937c"
 DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
 MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
+PikaParser = "3bbf5609-3e7b-44cd-8549-7c69f321e792"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [compat]
-AbstractFBCModels = "0.1, 0.2"
+AbstractFBCModels = "0.3"
 DocStringExtensions = "0.8, 0.9"
 MAT = "0.10"
+PikaParser = "0.6"
 SparseArrays = "1"
 Test = "1"
 julia = "1"

diff --git a/src/MATFBCModels.jl b/src/MATFBCModels.jl
@@ -12,6 +12,7 @@ include("constants.jl")
 include("interface.jl")
 include("io.jl")
 include("utils.jl")
+include("grr_utils.jl")
 
 export MATFBCModel
 

diff --git a/src/constants.jl b/src/constants.jl
@@ -4,6 +4,9 @@
 
 This module collects information on how people typically name stuff in the MAT
 file structure.
+
+The constants are not "constant" per se, so that they are easy to modify in
+case they should read models with creatively named fields.
 """
 module key_names
 reactions = ["reactions", "rxns", "RXNS", "REACTIONS", "Reactions", "Rxns"]

diff --git a/src/grr_utils.jl b/src/grr_utils.jl
@@ -0,0 +1,192 @@
+
+# note: this file is a copy from JSONFBCModels. Might be nice to have some
+# kindof mechanism to keep them roughly in sync. Maybe sink to the Abstract
+# interface?
+
+import PikaParser as PP
+
+"""
+`PikaParser.jl` grammar for stringy GRR expressions.
+"""
+const grr_grammar = begin
+    # characters that typically form the identifiers
+    isident(x::Char) =
+        isletter(x) ||
+        isdigit(x) ||
+        x == '_' ||
+        x == '-' ||
+        x == ':' ||
+        x == '.' ||
+        x == '@' ||
+        x == '#' ||
+        x == '\'' ||
+        x == '[' ||
+        x == ']'
+
+    # scanner helpers
+    eat(p) = m -> begin
+        last = 0
+        for i in eachindex(m)
+            p(m[i]) || break
+            last = i
+        end
+        return last
+    end
+
+    # eat one of keywords
+    kws(w...) = m -> begin
+        last = eat(isident)(m)
+        m[begin:last] in w ? last : 0
+    end
+
+    PP.make_grammar(
+        [:expr],
+        PP.flatten(
+            Dict(
+                :space => PP.first(PP.scan(eat(isspace)), PP.epsilon),
+                :id => PP.scan(eat(isident)),
+                :orop =>
+                    PP.first(PP.tokens("||"), PP.token('|'), PP.scan(kws("OR", "or"))),
+                :andop => PP.first(
+                    PP.tokens("&&"),
+                    PP.token('&'),
+                    PP.scan(kws("AND", "and")),
+                ),
+                :expr => PP.seq(:space, :orexpr, :space, PP.end_of_input),
+                :orexpr => PP.first(
+                    :or => PP.seq(:andexpr, :space, :orop, :space, :orexpr),
+                    :andexpr,
+                ),
+                :andexpr => PP.first(
+                    :and => PP.seq(:baseexpr, :space, :andop, :space, :andexpr),
+                    :baseexpr,
+                ),
+                :baseexpr => PP.first(
+                    :id,
+                    :parenexpr => PP.seq(
+                        PP.token('('),
+                        :space,
+                        :orexpr,
+                        :space,
+                        PP.token(')'),
+                    ),
+                ),
+            ),
+            Char,
+        ),
+    )
+end
+
+grr_grammar_open(m, _) =
+    m.rule == :expr ? Bool[0, 1, 0, 0] :
+    m.rule == :parenexpr ? Bool[0, 0, 1, 0, 0] :
+    m.rule in [:or, :and] ? Bool[1, 0, 0, 0, 1] :
+    m.rule in [:andexpr, :orexpr, :notexpr, :baseexpr] ? Bool[1] :
+    (false for _ in m.submatches)
+
+grr_grammar_fold(m, _, subvals) =
+    m.rule == :id ? Expr(:call, :gene, String(m.view)) :
+    m.rule == :and ? Expr(:call, :and, subvals[1], subvals[5]) :
+    m.rule == :or ? Expr(:call, :or, subvals[1], subvals[5]) :
+    m.rule == :parenexpr ? subvals[3] :
+    m.rule == :expr ? subvals[2] : isempty(subvals) ? nothing : subvals[1]
+
+"""
+$(TYPEDSIGNATURES)
+
+Parses a JSON-ish data reference to a `Expr`-typed gene association. Contains
+"calls" to `gene`, `and` and `or` functions that describe the association.
+"""
+function parse_gene_association(str::String)::Maybe{Expr}
+    all(isspace, str) && return nothing
+    tree = PP.parse_lex(grr_grammar, str)
+    match = PP.find_match_at!(tree, :expr, 1)
+    match > 0 || throw(DomainError(str, "cannot parse GRR"))
+    PP.traverse_match(tree, match, open = grr_grammar_open, fold = grr_grammar_fold)
+end
+
+"""
+$(TYPEDSIGNATURES)
+
+Evaluate the gene association expression with the reference values given by the
+`val` function.
+"""
+function eval_gene_association(ga::Expr, val::Function)::Bool
+    (ga.head == :call && length(ga.args) >= 2) ||
+        throw(DomainError(ga, "invalid gene association expr"))
+    if ga.args[1] == :gene && length(ga.args) == 2
+        val(ga.args[2])
+    elseif ga.args[1] == :and
+        all(eval_gene_association.(ga.args[2:end], Ref(val)))
+    elseif ga.args[1] == :or
+        any(eval_gene_association.(ga.args[2:end], Ref(val)))
+    else
+        throw(DomainError(ga, "unsupported gene association function"))
+    end
+end
+
+"""
+$(TYPEDSIGNATURES)
+
+A helper for producing predictable unique sequences. Might be faster if
+compacting would be done directly in sort().
+"""
+function sortunique(x)
+    o = collect(x)
+    sort!(o)
+    put = prevind(o, firstindex(o))
+    for i in eachindex(o)
+        if put >= firstindex(o) && o[i] == o[put]
+            # we already have this one
+            continue
+        else
+            put = nextind(o, put)
+            if put != i
+                o[put] = o[i]
+            end
+        end
+    end
+    o[begin:put]
+end
+
+"""
+$(TYPEDSIGNATURES)
+
+Convert the given gene association expression to DNF.
+"""
+function flatten_gene_association(ga::Expr)::A.GeneAssociationDNF
+    function fold_and(dnfs::Vector{Vector{Vector{String}}})::Vector{Vector{String}}
+        if isempty(dnfs)
+            [String[]]
+        else
+            sortunique(
+                sortunique(String[l; r]) for l in dnfs[1] for r in fold_and(dnfs[2:end])
+            )
+        end
+    end
+
+    (ga.head == :call && length(ga.args) >= 2) ||
+        throw(DomainError(ga, "invalid gene association expr"))
+    if ga.args[1] == :gene && length(ga.args) == 2
+        [[ga.args[2]]]
+    elseif ga.args[1] == :and
+        fold_and(flatten_gene_association.(ga.args[2:end]))
+    elseif ga.args[1] == :or
+        sortunique(vcat(flatten_gene_association.(ga.args[2:end])...))
+    else
+        throw(DomainError(ga, "unsupported gene association function"))
+    end
+end
+
+"""
+$(TYPEDSIGNATURES)
+
+Formats a DNF gene association as a `String`.
+"""
+function format_gene_association_dnf(
+    grr::A.GeneAssociationDNF;
+    and = " && ",
+    or = " || ",
+)::String
+    return join(("(" * join(gr, and) * ")" for gr in grr), or)
+end
diff --git a/src/interface.jl b/src/interface.jl
@@ -24,13 +24,78 @@ A.balance(m::MATFBCModel) =
 A.objective(m::MATFBCModel) =
     sparsevec(m.mat[guesskeys(:objective, m)])::SparseVector{Float64,Int64}
 
-A.reaction_gene_products_available(model::MATFBCModel, rid::String, available::Function) =
-    A.reaction_gene_products_available_from_dnf(model, rid, available)
+# TODO: we should use `guesskeys` for couplings too, but the situation gets
+# quite messy there and might require a lot of effort to do "right".
+looks_like_squashed_coupling(mat) =
+    haskey(mat, "A") && haskey(mat, "b") && length(mat["b"]) == size(mat["A"], 1)
+
+A.n_couplings(m::MATFBCModel) =
+    looks_like_squashed_coupling(m.mat) ? size(m.mat["A"], 1)::Int - A.n_reactions(m) :
+    size(get(m.mat, "C", zeros(0, A.n_reactions(m))), 1)::Int
+
+A.couplings(m::MATFBCModel) = String["mat_coupling_$i" for i = 1:A.n_couplings(m)]
+
+A.coupling(m::MATFBCModel)::SparseMatrixCSC{Float64,Int64} =
+    looks_like_squashed_coupling(m.mat) ? sparse(m.mat["A"][A.n_reactions(m)+1:end, :]) :
+    sparse(get(m.mat, "C", zeros(0, A.n_reactions(m))))
+
+"""
+$(TYPEDSIGNATURES)
+
+Overload of `coupling_weights` for `MATFBCModel` is currently quite
+inefficient. Use `coupling` instead.
+"""
+function A.coupling_weights(m::MATFBCModel, cid::String)
+    startswith(cid, "mat_coupling_") || throw(DomainError(cid, "unknown coupling"))
+    cidx = parse(Int, cid[14:end])
+    return Dict{String,Float64}(
+        r => w for (r, w) in zip(A.reactions(m), A.coupling(m)[cidx, :]) if w != 0
+    )
+end
+
+function A.coupling_bounds(m::MATFBCModel)::Tuple{Vector{Float64},Vector{Float64}}
+    nc = A.n_couplings(m)
+    if looks_like_squashed_coupling(m.mat)
+        c = reshape(m.mat["b"], length(m.mat["b"]))[A.n_reactions(m)+1:end]
+        csense = reshape(m.mat["csense"], nc)
+        return (
+            [sense in ["G", "E"] ? val : -Inf for (val, sense) in zip(c, csense)],
+            [sense in ["L", "E"] ? val : Inf for (val, sense) in zip(c, csense)],
+        )
+    elseif haskey(m.mat, "d") && haskey(m.mat, "dsense")
+        d = reshape(m.mat["d"], nc)
+        dsense = reshape(m.mat["dsense"], nc)
+        return (
+            [sense in ["G", "E"] ? val : -Inf for (val, sense) in zip(d, dsense)],
+            [sense in ["L", "E"] ? val : Inf for (val, sense) in zip(d, dsense)],
+        )
+    else
+        return (
+            reshape(get(m.mat, "cl", fill(-Inf, nc, 1)), nc),
+            reshape(get(m.mat, "cu", fill(Inf, nc, 1)), nc),
+        )
+    end
+end
+
+function A.reaction_gene_products_available(
+    m::MATFBCModel,
+    rid::String,
+    available::Function,
+)
+    any(haskey(m.mat, x) for x in key_names.grrs) || return nothing
+    grr = m.mat[guesskeys(:grrs, m)][findfirst(==(rid), A.reactions(m))]
+    typeof(grr) == String || return nothing
+    grrexp = parse_gene_association(grr)
+    grrexp == nothing && return nothing
+    return eval_gene_association(grrexp, available)
+end
 
 function A.reaction_gene_association_dnf(m::MATFBCModel, rid::String)
     any(haskey(m.mat, x) for x in key_names.grrs) || return nothing
     grr = m.mat[guesskeys(:grrs, m)][findfirst(==(rid), A.reactions(m))]
-    typeof(grr) == String ? parse_grr(grr) : nothing
+    grrexp = parse_gene_association(grr)
+    grrexp == nothing && return nothing
+    return flatten_gene_association(grrexp)
 end
 
 function A.metabolite_formula(m::MATFBCModel, mid::String)
@@ -86,21 +151,30 @@ function Base.convert(::Type{MATFBCModel}, m::A.AbstractFBCModel)
         return m
     end
 
+    rxns = A.reactions(m)
+    mets = A.metabolites(m)
     lb, ub = A.bounds(m)
+    clb, cub = A.coupling_bounds(m)
     return MATFBCModel(
         "model", # default name
         Dict(
             "S" => A.stoichiometry(m),
-            "rxns" => A.reactions(m),
-            "mets" => A.metabolites(m),
+            "rxns" => rxns,
+            "rxnNames" => [something(A.reaction_name(m, rxn), "") for rxn in rxns],
+            "mets" => mets,
+            "metNames" => [something(A.metabolite_name(m, met), "") for met in mets],
             "lb" => Vector(lb),
             "ub" => Vector(ub),
             "b" => Vector(A.balance(m)),
             "c" => Vector(A.objective(m)),
+            "C" => A.coupling(m),
+            "cl" => Vector(clb),
+            "cu" => Vector(cub),
             "genes" => A.genes(m),
             "grRules" => [
-                unparse_grr(A.reaction_gene_association_dnf(m, rid)) for
-                rid in A.reactions(m)
+                let dnf = A.reaction_gene_association_dnf(m, rid)
+                    isnothing(dnf) ? "" : format_gene_association_dnf(dnf)
+                end for rid in A.reactions(m)
             ],
             "metFormulas" =>
                 [unparse_formula(A.metabolite_formula(m, mid)) for mid in A.metabolites(m)],

diff --git a/src/utils.jl b/src/utils.jl
@@ -1,20 +1,6 @@
 
 guesskeys(id, model) = first(intersect(keys(model.mat), getfield(key_names, id)))
 
-function parse_grr(str::Maybe{String})
-    isnothing(str) && return nothing
-    isempty(str) && return nothing
-
-    dnf = A.GeneAssociationDNF()
-    for isozyme in string.(split(str, " or "))
-        push!(
-            dnf,
-            string.(split(replace(isozyme, "(" => "", ")" => "", " and " => " "), " ")),
-        )
-    end
-    return dnf
-end
-
 function parse_formula(x::Maybe{String})
     isnothing(x) && return nothing
     x == "" && return nothing
@@ -46,8 +32,3 @@ function unparse_formula(x::Maybe{A.MetaboliteFormula})
     ks = sort(collect(keys(x)))
     join(k * string(x[k]) for k in ks)
 end
-
-function unparse_grr(xs::Maybe{A.GeneAssociationDNF})
-    isnothing(xs) && return nothing
-    join((join(x, " and ") for x in xs), " or ")
-end