diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 11ead99..3f2fb61 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,7 @@ jobs: matrix: version: - '1' # This is always the latest stable release in the 1.X series - #- '1.6' # LTS + - '1.6' # LTS #- 'nightly' os: - ubuntu-latest @@ -37,6 +37,7 @@ jobs: - uses: julia-actions/julia-runtest@latest continue-on-error: ${{ matrix.version == 'nightly' }} - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v4 with: - file: lcov.info + files: lcov.info + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/Project.toml b/Project.toml index a4bae26..37fbf09 100644 --- a/Project.toml +++ b/Project.toml @@ -1,18 +1,20 @@ name = "MATFBCModels" uuid = "bd2e13bf-42c0-49e1-9a9c-885d36daf88b" authors = ["The authors of MATFBCModels.jl"] -version = "0.1.0" +version = "0.2.0" [deps] AbstractFBCModels = "5a4f3dfa-1789-40f8-8221-69268c29937c" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" MAT = "23992714-dd62-5051-b70f-ba57cb901cac" +PikaParser = "3bbf5609-3e7b-44cd-8549-7c69f321e792" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [compat] -AbstractFBCModels = "0.1, 0.2" +AbstractFBCModels = "0.3" DocStringExtensions = "0.8, 0.9" MAT = "0.10" +PikaParser = "0.6" SparseArrays = "1" Test = "1" julia = "1" diff --git a/src/MATFBCModels.jl b/src/MATFBCModels.jl index 51eab18..fb5bc8e 100644 --- a/src/MATFBCModels.jl +++ b/src/MATFBCModels.jl @@ -12,6 +12,7 @@ include("constants.jl") include("interface.jl") include("io.jl") include("utils.jl") +include("grr_utils.jl") export MATFBCModel diff --git a/src/constants.jl b/src/constants.jl index 493b000..b8d53df 100644 --- a/src/constants.jl +++ b/src/constants.jl @@ -4,6 +4,9 @@ This module collects information on how people typically name stuff in the MAT file structure. + +The constants are not "constant" per se, so that they are easy to modify in +case they should read models with creatively named fields. """ module key_names reactions = ["reactions", "rxns", "RXNS", "REACTIONS", "Reactions", "Rxns"] diff --git a/src/grr_utils.jl b/src/grr_utils.jl new file mode 100644 index 0000000..3b9a5fd --- /dev/null +++ b/src/grr_utils.jl @@ -0,0 +1,192 @@ + +# note: this file is a copy from JSONFBCModels. Might be nice to have some +# kindof mechanism to keep them roughly in sync. Maybe sink to the Abstract +# interface? + +import PikaParser as PP + +""" +`PikaParser.jl` grammar for stringy GRR expressions. +""" +const grr_grammar = begin + # characters that typically form the identifiers + isident(x::Char) = + isletter(x) || + isdigit(x) || + x == '_' || + x == '-' || + x == ':' || + x == '.' || + x == '@' || + x == '#' || + x == '\'' || + x == '[' || + x == ']' + + # scanner helpers + eat(p) = m -> begin + last = 0 + for i in eachindex(m) + p(m[i]) || break + last = i + end + return last + end + + # eat one of keywords + kws(w...) = m -> begin + last = eat(isident)(m) + m[begin:last] in w ? last : 0 + end + + PP.make_grammar( + [:expr], + PP.flatten( + Dict( + :space => PP.first(PP.scan(eat(isspace)), PP.epsilon), + :id => PP.scan(eat(isident)), + :orop => + PP.first(PP.tokens("||"), PP.token('|'), PP.scan(kws("OR", "or"))), + :andop => PP.first( + PP.tokens("&&"), + PP.token('&'), + PP.scan(kws("AND", "and")), + ), + :expr => PP.seq(:space, :orexpr, :space, PP.end_of_input), + :orexpr => PP.first( + :or => PP.seq(:andexpr, :space, :orop, :space, :orexpr), + :andexpr, + ), + :andexpr => PP.first( + :and => PP.seq(:baseexpr, :space, :andop, :space, :andexpr), + :baseexpr, + ), + :baseexpr => PP.first( + :id, + :parenexpr => PP.seq( + PP.token('('), + :space, + :orexpr, + :space, + PP.token(')'), + ), + ), + ), + Char, + ), + ) +end + +grr_grammar_open(m, _) = + m.rule == :expr ? Bool[0, 1, 0, 0] : + m.rule == :parenexpr ? Bool[0, 0, 1, 0, 0] : + m.rule in [:or, :and] ? Bool[1, 0, 0, 0, 1] : + m.rule in [:andexpr, :orexpr, :notexpr, :baseexpr] ? Bool[1] : + (false for _ in m.submatches) + +grr_grammar_fold(m, _, subvals) = + m.rule == :id ? Expr(:call, :gene, String(m.view)) : + m.rule == :and ? Expr(:call, :and, subvals[1], subvals[5]) : + m.rule == :or ? Expr(:call, :or, subvals[1], subvals[5]) : + m.rule == :parenexpr ? subvals[3] : + m.rule == :expr ? subvals[2] : isempty(subvals) ? nothing : subvals[1] + +""" +$(TYPEDSIGNATURES) + +Parses a JSON-ish data reference to a `Expr`-typed gene association. Contains +"calls" to `gene`, `and` and `or` functions that describe the association. +""" +function parse_gene_association(str::String)::Maybe{Expr} + all(isspace, str) && return nothing + tree = PP.parse_lex(grr_grammar, str) + match = PP.find_match_at!(tree, :expr, 1) + match > 0 || throw(DomainError(str, "cannot parse GRR")) + PP.traverse_match(tree, match, open = grr_grammar_open, fold = grr_grammar_fold) +end + +""" +$(TYPEDSIGNATURES) + +Evaluate the gene association expression with the reference values given by the +`val` function. +""" +function eval_gene_association(ga::Expr, val::Function)::Bool + (ga.head == :call && length(ga.args) >= 2) || + throw(DomainError(ga, "invalid gene association expr")) + if ga.args[1] == :gene && length(ga.args) == 2 + val(ga.args[2]) + elseif ga.args[1] == :and + all(eval_gene_association.(ga.args[2:end], Ref(val))) + elseif ga.args[1] == :or + any(eval_gene_association.(ga.args[2:end], Ref(val))) + else + throw(DomainError(ga, "unsupported gene association function")) + end +end + +""" +$(TYPEDSIGNATURES) + +A helper for producing predictable unique sequences. Might be faster if +compacting would be done directly in sort(). +""" +function sortunique(x) + o = collect(x) + sort!(o) + put = prevind(o, firstindex(o)) + for i in eachindex(o) + if put >= firstindex(o) && o[i] == o[put] + # we already have this one + continue + else + put = nextind(o, put) + if put != i + o[put] = o[i] + end + end + end + o[begin:put] +end + +""" +$(TYPEDSIGNATURES) + +Convert the given gene association expression to DNF. +""" +function flatten_gene_association(ga::Expr)::A.GeneAssociationDNF + function fold_and(dnfs::Vector{Vector{Vector{String}}})::Vector{Vector{String}} + if isempty(dnfs) + [String[]] + else + sortunique( + sortunique(String[l; r]) for l in dnfs[1] for r in fold_and(dnfs[2:end]) + ) + end + end + + (ga.head == :call && length(ga.args) >= 2) || + throw(DomainError(ga, "invalid gene association expr")) + if ga.args[1] == :gene && length(ga.args) == 2 + [[ga.args[2]]] + elseif ga.args[1] == :and + fold_and(flatten_gene_association.(ga.args[2:end])) + elseif ga.args[1] == :or + sortunique(vcat(flatten_gene_association.(ga.args[2:end])...)) + else + throw(DomainError(ga, "unsupported gene association function")) + end +end + +""" +$(TYPEDSIGNATURES) + +Formats a DNF gene association as a `String`. +""" +function format_gene_association_dnf( + grr::A.GeneAssociationDNF; + and = " && ", + or = " || ", +)::String + return join(("(" * join(gr, and) * ")" for gr in grr), or) +end diff --git a/src/interface.jl b/src/interface.jl index f9e72df..9e2c5d7 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -24,13 +24,78 @@ A.balance(m::MATFBCModel) = A.objective(m::MATFBCModel) = sparsevec(m.mat[guesskeys(:objective, m)])::SparseVector{Float64,Int64} -A.reaction_gene_products_available(model::MATFBCModel, rid::String, available::Function) = - A.reaction_gene_products_available_from_dnf(model, rid, available) +# TODO: we should use `guesskeys` for couplings too, but the situation gets +# quite messy there and might require a lot of effort to do "right". +looks_like_squashed_coupling(mat) = + haskey(mat, "A") && haskey(mat, "b") && length(mat["b"]) == size(mat["A"], 1) + +A.n_couplings(m::MATFBCModel) = + looks_like_squashed_coupling(m.mat) ? size(m.mat["A"], 1)::Int - A.n_reactions(m) : + size(get(m.mat, "C", zeros(0, A.n_reactions(m))), 1)::Int + +A.couplings(m::MATFBCModel) = String["mat_coupling_$i" for i = 1:A.n_couplings(m)] + +A.coupling(m::MATFBCModel)::SparseMatrixCSC{Float64,Int64} = + looks_like_squashed_coupling(m.mat) ? sparse(m.mat["A"][A.n_reactions(m)+1:end, :]) : + sparse(get(m.mat, "C", zeros(0, A.n_reactions(m)))) + +""" +$(TYPEDSIGNATURES) + +Overload of `coupling_weights` for `MATFBCModel` is currently quite +inefficient. Use `coupling` instead. +""" +function A.coupling_weights(m::MATFBCModel, cid::String) + startswith(cid, "mat_coupling_") || throw(DomainError(cid, "unknown coupling")) + cidx = parse(Int, cid[14:end]) + return Dict{String,Float64}( + r => w for (r, w) in zip(A.reactions(m), A.coupling(m)[cidx, :]) if w != 0 + ) +end + +function A.coupling_bounds(m::MATFBCModel)::Tuple{Vector{Float64},Vector{Float64}} + nc = A.n_couplings(m) + if looks_like_squashed_coupling(m.mat) + c = reshape(m.mat["b"], length(m.mat["b"]))[A.n_reactions(m)+1:end] + csense = reshape(m.mat["csense"], nc) + return ( + [sense in ["G", "E"] ? val : -Inf for (val, sense) in zip(c, csense)], + [sense in ["L", "E"] ? val : Inf for (val, sense) in zip(c, csense)], + ) + elseif haskey(m.mat, "d") && haskey(m.mat, "dsense") + d = reshape(m.mat["d"], nc) + dsense = reshape(m.mat["dsense"], nc) + return ( + [sense in ["G", "E"] ? val : -Inf for (val, sense) in zip(d, dsense)], + [sense in ["L", "E"] ? val : Inf for (val, sense) in zip(d, dsense)], + ) + else + return ( + reshape(get(m.mat, "cl", fill(-Inf, nc, 1)), nc), + reshape(get(m.mat, "cu", fill(Inf, nc, 1)), nc), + ) + end +end + +function A.reaction_gene_products_available( + m::MATFBCModel, + rid::String, + available::Function, +) + any(haskey(m.mat, x) for x in key_names.grrs) || return nothing + grr = m.mat[guesskeys(:grrs, m)][findfirst(==(rid), A.reactions(m))] + typeof(grr) == String || return nothing + grrexp = parse_gene_association(grr) + grrexp == nothing && return nothing + return eval_gene_association(grrexp, available) +end function A.reaction_gene_association_dnf(m::MATFBCModel, rid::String) any(haskey(m.mat, x) for x in key_names.grrs) || return nothing grr = m.mat[guesskeys(:grrs, m)][findfirst(==(rid), A.reactions(m))] - typeof(grr) == String ? parse_grr(grr) : nothing + grrexp = parse_gene_association(grr) + grrexp == nothing && return nothing + return flatten_gene_association(grrexp) end function A.metabolite_formula(m::MATFBCModel, mid::String) @@ -86,21 +151,30 @@ function Base.convert(::Type{MATFBCModel}, m::A.AbstractFBCModel) return m end + rxns = A.reactions(m) + mets = A.metabolites(m) lb, ub = A.bounds(m) + clb, cub = A.coupling_bounds(m) return MATFBCModel( "model", # default name Dict( "S" => A.stoichiometry(m), - "rxns" => A.reactions(m), - "mets" => A.metabolites(m), + "rxns" => rxns, + "rxnNames" => [something(A.reaction_name(m, rxn), "") for rxn in rxns], + "mets" => mets, + "metNames" => [something(A.metabolite_name(m, met), "") for met in mets], "lb" => Vector(lb), "ub" => Vector(ub), "b" => Vector(A.balance(m)), "c" => Vector(A.objective(m)), + "C" => A.coupling(m), + "cl" => Vector(clb), + "cu" => Vector(cub), "genes" => A.genes(m), "grRules" => [ - unparse_grr(A.reaction_gene_association_dnf(m, rid)) for - rid in A.reactions(m) + let dnf = A.reaction_gene_association_dnf(m, rid) + isnothing(dnf) ? "" : format_gene_association_dnf(dnf) + end for rid in A.reactions(m) ], "metFormulas" => [unparse_formula(A.metabolite_formula(m, mid)) for mid in A.metabolites(m)], diff --git a/src/utils.jl b/src/utils.jl index ea43227..1b3a005 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,20 +1,6 @@ guesskeys(id, model) = first(intersect(keys(model.mat), getfield(key_names, id))) -function parse_grr(str::Maybe{String}) - isnothing(str) && return nothing - isempty(str) && return nothing - - dnf = A.GeneAssociationDNF() - for isozyme in string.(split(str, " or ")) - push!( - dnf, - string.(split(replace(isozyme, "(" => "", ")" => "", " and " => " "), " ")), - ) - end - return dnf -end - function parse_formula(x::Maybe{String}) isnothing(x) && return nothing x == "" && return nothing @@ -46,8 +32,3 @@ function unparse_formula(x::Maybe{A.MetaboliteFormula}) ks = sort(collect(keys(x))) join(k * string(x[k]) for k in ks) end - -function unparse_grr(xs::Maybe{A.GeneAssociationDNF}) - isnothing(xs) && return nothing - join((join(x, " and ") for x in xs), " or ") -end diff --git a/test/misc.jl b/test/misc.jl index c173fe8..dcf0d95 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -10,8 +10,83 @@ @test all(x in ["mat" "MAT"] for x in A.filename_extensions(M.MATFBCModel)) end + +@testset "Test coupling" begin + model = convert( + A.CanonicalModel.Model, + A.load(M.MATFBCModel, joinpath(@__DIR__, "test-models", "e_coli_core.mat")), + ) + model.couplings["breathing"] = A.CanonicalModel.Coupling( + lower_bound = -4.321, + upper_bound = 1.234, + reaction_weights = Dict("EX_o2_e" => 1, "EX_co2_e" => -1), + ) + model.couplings["eating"] = A.CanonicalModel.Coupling( + lower_bound = -5, + upper_bound = -5, + reaction_weights = Dict("EX_glc__D_e" => 1, "EX_fru_e" => 1, "EX_pyr_e" => 1), + ) + path = joinpath(@__DIR__, "test-models", "e_coli_core_coupled.mat") + A.save(convert(M.MATFBCModel, model), path) + A.run_fbcmodel_file_tests( + M.MATFBCModel, + path; + name = "E. coli coupled", + test_save = true, + ) + model = convert(A.CanonicalModel.Model, A.load(M.MATFBCModel, path)) + + @test A.n_couplings(model) == 2 + cbs = A.coupling_bounds(model) + @test cbs in [([-4.321, -5], [1.234, -5]), ([-5, -4.321], [-5, 1.234])] + @test merge(A.coupling_weights.(Ref(model), A.couplings(model))...) == Dict( + "EX_o2_e" => 1, + "EX_co2_e" => -1, + "EX_glc__D_e" => 1, + "EX_fru_e" => 1, + "EX_pyr_e" => 1, + ) +end + +@testset "Ugly cases of coupling" begin + m = M.MATFBCModel( + "model", + Dict( + "S" => [ + 1.0 1.0 + -1.0 1.0 + ], + "b" => [3.0, 1.0], + "c" => [-0.25, 1.0], + "xl" => -ones(2), + "xu" => 2.0 * ones(2), + "rxns" => ["r$x" for x = 1:2], + "mets" => ["m$x" for x = 1:2], + ), + ) + + @test size(A.coupling(m)) == (0, 2) + + mc = deepcopy(m) + + mc.mat["C"] = [0.5 0.5] + @test A.coupling_bounds(mc) == ([-Inf], [Inf]) + + m1 = deepcopy(mc) + m1.mat["A"] = vcat(m1.mat["S"], m1.mat["C"]) + m1.mat["b"] = vcat(m1.mat["b"], [5]) + m1.mat["csense"] = ["L"] + @test A.coupling_bounds(m1) == ([-Inf], [5.0]) + + m2 = deepcopy(mc) + m2.mat["d"] = [5] + m2.mat["dsense"] = ["G"] + @test A.coupling_bounds(m2) == ([5.0], [Inf]) +end + @testset "Corner cases" begin - import MATFBCModels: parse_charge + import MATFBCModels: + parse_charge, eval_gene_association, flatten_gene_association, sortunique @test parse_charge(1) == 1 @test parse_charge(2.0) == 2 @@ -20,4 +95,8 @@ end @test parse_charge(nothing) == nothing @test_throws ArgumentError parse_charge("totally positive charge") @test_throws DomainError parse_charge(["very charged"]) + + @test_throws DomainError eval_gene_association(:(xor(gene("a"), gene("b"))), _ -> false) + @test_throws DomainError flatten_gene_association(:(xor(gene("a"), gene("b")))) + @test sortunique([3, 2, 2, 1]) == [1, 2, 3] end diff --git a/test/runtests.jl b/test/runtests.jl index 74a8a68..8c837f3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using Test import AbstractFBCModels as A import MATFBCModels as M +import SparseArrays @testset "MATFBCModels" begin