From 1d6e12e957721b82fe8e99a0ec1fc40944b93bf4 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 18 Apr 2017 15:18:52 -0400 Subject: [PATCH 01/51] docs: Attribute documentation to PSAMM developers --- docs/conf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1c3b6370..ad1d287a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -67,8 +67,8 @@ # General information about the project. project = u'PSAMM' -copyright = u'2015, Jon Lund Steffensen' -author = u'Jon Lund Steffensen' +copyright = u'2015-2017, PSAMM developers' +author = u'PSAMM developers' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -249,7 +249,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'psamm.tex', u'PSAMM Documentation', - u'Jon Lund Steffensen', 'manual'), + u'PSAMM developers', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of From 8e27af4540b87f953a98bb3463cc2dfb9a49d37d Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 16 May 2017 16:12:29 -0400 Subject: [PATCH 02/51] fastcore: Improve API docs for fastcore module --- psamm/fastcore.py | 60 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/psamm/fastcore.py b/psamm/fastcore.py index 0e2a349d..66d8bc63 100644 --- a/psamm/fastcore.py +++ b/psamm/fastcore.py @@ -13,11 +13,13 @@ # You should have received a copy of the GNU General Public License # along with PSAMM. If not, see . # -# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2014-2017 Jon Lund Steffensen """Fastcore module implementing the fastcore algorithm This is an implementation of the algorithms described in [Vlassis14]_. +Use the functions :func:`fastcore` and :func:`fastcc` to easily apply +these algorithms to a :class:`MetabolicModel`. """ import logging @@ -33,6 +35,16 @@ class FastcoreError(Exception): class FastcoreProblem(FluxBalanceProblem): + """Represents a FastCore extension of a flux balance problem. + + Accepts the same arguments as :class:`FluxBalanceProblem`, and an + additional ``epsilon`` keyword argument. + + Args: + model: :class:`MetabolicModel` to solve. + solver: LP solver instance to use. + epsilon: Flux threshold value. + """ def __init__(self, *args, **kwargs): self._epsilon = kwargs.pop('epsilon', 1e-5) super(FastcoreProblem, self).__init__(*args, **kwargs) @@ -131,6 +143,7 @@ def find_sparse_mode(self, core, additional, scaling, weights={}): yield reaction_id def flip(self, reactions): + """Flip the specified reactions.""" for reaction in reactions: if reaction in self._flipped: self._flipped.remove(reaction) @@ -138,16 +151,21 @@ def flip(self, reactions): self._flipped.add(reaction) def is_flipped(self, reaction): + """Return true if reaction is flipped.""" return reaction in self._flipped def fastcc(model, epsilon, solver): - """Check consistency of model reactions + """Check consistency of model reactions. - Yields all reactions in the model that are not part - of the consistent subset. - """ + Yield all reactions in the model that are not part of the consistent + subset. + Args: + model: :class:`MetabolicModel` to solve. + epsilon: Flux threshold value. + solver: LP solver instance to use. + """ reaction_set = set(model.reactions) subset = set(reaction_id for reaction_id in reaction_set if model.limits[reaction_id].lower >= 0) @@ -225,34 +243,52 @@ def fastcc(model, epsilon, solver): def fastcc_is_consistent(model, epsilon, solver): """Quickly check whether model is consistent - Returns true if the model is consistent. If it is only necessary to know - whether a model is consistent, this function is faster as it will return + Return true if the model is consistent. If it is only necessary to know + whether a model is consistent, this function is fast as it will return the result as soon as it finds a single inconsistent reaction. - """ + Args: + model: :class:`MetabolicModel` to solve. + epsilon: Flux threshold value. + solver: LP solver instance to use. + """ for reaction in fastcc(model, epsilon, solver): return False return True def fastcc_consistent_subset(model, epsilon, solver): - """Return consistent subset of model + """Return consistent subset of model. The largest consistent subset is returned as a set of reaction names. - """ + Args: + model: :class:`MetabolicModel` to solve. + epsilon: Flux threshold value. + solver: LP solver instance to use. + """ reaction_set = set(model.reactions) return reaction_set.difference(fastcc(model, epsilon, solver)) def fastcore(model, core, epsilon, solver, scaling=1e5, weights={}): - """Find a flux consistent subnetwork containing the core subset + """Find a flux consistent subnetwork containing the core subset. The result will contain the core subset and as few of the additional reactions as possible. - """ + Args: + model: :class:`MetabolicModel` to solve. + core: Set of core reaction IDs. + epsilon: Flux threshold value. + solver: LP solver instance to use. + scaling: Scaling value to apply (see [Vlassis14]_ for more + information on this parameter). + weights: Dictionary with reaction IDs as keys and values as weights. + Weights specify the cost of adding a reaction to the consistent + subnetwork. Default value is 1. + """ consistent_subset = set() reaction_set = set(model.reactions) From a9a5fde09413a4ecf048ee84b9d096b9c437937c Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 16 May 2017 16:37:39 -0400 Subject: [PATCH 03/51] sbml: Change SBMLReader to default to ignoring boundary species --- psamm/datasource/sbml.py | 11 ++++++++- psamm/tests/test_datasource_sbml.py | 36 ++++++++++++++--------------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index 5c0db99d..4b26cad4 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -538,9 +538,18 @@ class SBMLReader(object): If ``ignore_boundary`` is ``True``, the species that are marked as boundary conditions will simply be dropped from the species list and from the reaction equations, and any boundary compartment will be dropped too. + Otherwise the boundary species will be retained. Retaining these is only + useful to extract specific information from those species objects. + + Args: + file: File-like object to parse XML SBML content from. + strict: Indicating whether strict parsing is enabled. + ignore_boundary: Indicating whether boundary species are dropped. + context: Optional file parsing context from + :mod:`psamm.datasource.context`. """ - def __init__(self, file, strict=False, ignore_boundary=False, + def __init__(self, file, strict=False, ignore_boundary=True, context=None): # Parse SBML file tree = ET.parse(file) diff --git a/psamm/tests/test_datasource_sbml.py b/psamm/tests/test_datasource_sbml.py index 28aafe07..82bb0b82 100644 --- a/psamm/tests/test_datasource_sbml.py +++ b/psamm/tests/test_datasource_sbml.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with PSAMM. If not, see . # -# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2014-2017 Jon Lund Steffensen import unittest @@ -77,7 +77,7 @@ def test_model_name(self): self.assertEqual(reader.name, 'Test model') def test_compartment_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) compartments = {entry.id: entry for entry in reader.compartments} self.assertEqual(len(compartments), 2) self.assertEqual(compartments['cell'].id, 'cell') @@ -93,7 +93,7 @@ def test_compartment_exists_with_ignore_boundary(self): self.assertEqual(compartments['cell'].name, 'cell') def test_compounds_exist(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) species = {entry.id: entry for entry in reader.species} self.assertEqual(len(species), 5) @@ -132,7 +132,7 @@ def test_g6pase_reaction_exists(self): self.assertEqual(reaction.equation, actual_equation) def test_biomass_reaction_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) reaction = reader.get_reaction('Biomass') self.assertFalse(reaction.reversible) @@ -144,7 +144,7 @@ def test_biomass_reaction_exists(self): self.assertEqual(reaction.equation, actual_equation) def test_reaction_xml_notes(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) reaction = reader.get_reaction('G6Pase') notes = reaction.xml_notes @@ -217,7 +217,7 @@ def test_model_name(self): self.assertEqual(reader.name, 'Test model') def test_compartment_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) compartments = {entry.id: entry for entry in reader.compartments} self.assertEqual(len(compartments), 2) self.assertEqual(compartments['C_c'].id, 'C_c') @@ -233,7 +233,7 @@ def test_compartment_exists_with_ignore_boundary(self): self.assertEqual(compartments['C_c'].name, 'cell') def test_compounds_exist(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) species = {entry.id: entry for entry in reader.species} self.assertEqual(len(species), 5) @@ -272,7 +272,7 @@ def test_g6pase_reaction_exists(self): self.assertEqual(reaction.equation, actual_equation) def test_biomass_reaction_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) reaction = reader.get_reaction('R_Biomass') self.assertFalse(reaction.reversible) @@ -357,7 +357,7 @@ def test_model_name(self): self.assertEqual(reader.name, 'Test model') def test_compartment_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) compartments = {entry.id: entry for entry in reader.compartments} self.assertEqual(len(compartments), 2) self.assertEqual(compartments['C_c'].id, 'C_c') @@ -373,7 +373,7 @@ def test_compartment_exists_with_ignore_boundary(self): self.assertEqual(compartments['C_c'].name, 'cell') def test_compounds_exist(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) species = {entry.id: entry for entry in reader.species} self.assertEqual(len(species), 5) @@ -409,7 +409,7 @@ def test_g6pase_reaction_exists(self): self.assertEqual(reaction.equation, actual_equation) def test_biomass_reaction_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) reaction = reader.get_reaction('R_Biomass') self.assertFalse(reaction.reversible) @@ -506,7 +506,7 @@ def test_model_name(self): self.assertEqual(reader.name, 'Test model') def test_compartment_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) compartments = {entry.id: entry for entry in reader.compartments} self.assertEqual(len(compartments), 2) self.assertEqual(compartments['C_c'].id, 'C_c') @@ -515,7 +515,7 @@ def test_compartment_exists(self): self.assertEqual(compartments['C_b'].name, 'boundary') def test_compounds_exist(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) species = {entry.id: entry for entry in reader.species} self.assertEqual(len(species), 5) @@ -547,7 +547,7 @@ def test_compounds_exist(self): self.assertIsNone(species['M_Biomass'].charge) def test_g6pase_reaction_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) reaction = reader.get_reaction('R_G6Pase') self.assertTrue(reaction.reversible) @@ -563,7 +563,7 @@ def test_g6pase_reaction_exists(self): self.assertEqual(reaction.properties['upper_flux'], 1000) def test_biomass_reaction_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) reaction = reader.get_reaction('R_Biomass') self.assertFalse(reaction.reversible) @@ -675,7 +675,7 @@ def test_model_name(self): self.assertEqual(reader.name, 'Test model') def test_compartment_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) compartments = {entry.id: entry for entry in reader.compartments} self.assertEqual(len(compartments), 2) self.assertEqual(compartments['C_c'].id, 'C_c') @@ -684,7 +684,7 @@ def test_compartment_exists(self): self.assertEqual(compartments['C_b'].name, 'boundary') def test_compounds_exist(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) species = {entry.id: entry for entry in reader.species} self.assertEqual(len(species), 5) @@ -732,7 +732,7 @@ def test_g6pase_reaction_exists(self): self.assertEqual(reaction.properties['upper_flux'], 1000) def test_biomass_reaction_exists(self): - reader = sbml.SBMLReader(self.doc) + reader = sbml.SBMLReader(self.doc, ignore_boundary=False) reaction = reader.get_reaction('R_Biomass') self.assertFalse(reaction.reversible) From 7f1ba4a723e1ec692c83165576be3ef3e7aeaaa3 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 16 May 2017 16:41:07 -0400 Subject: [PATCH 04/51] sbml: Expand SBMLWriter docstrings --- psamm/datasource/sbml.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index 4b26cad4..8f7df301 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -841,7 +841,7 @@ def create_model(self): class SBMLWriter(object): - """Writer of SBML files""" + """Writer of SBML files.""" def __init__(self, cobra_flux_bounds=False): self._namespace = SBML_NS_L3_V1_CORE @@ -979,8 +979,13 @@ def _indent(self, elem, level=0): elem.tail = i def write_model(self, file, model, pretty=False): - """Write a given model to file""" + """Write a given model to file. + Args: + file: File-like object open for writing. + model: Instance of :class:`NativeModel` to write. + pretty: Whether to format the XML output for readability. + """ ET.register_namespace('mathml', MATHML_NS) ET.register_namespace('xhtml', XHTML_NS) ET.register_namespace('fbc', FBC_V2) From 41d8e072a067fe3823dbe5ebfa3d64a5ef5ee79a Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Thu, 18 May 2017 21:07:49 -0400 Subject: [PATCH 05/51] fastcore: Document return values from functions --- psamm/fastcore.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/psamm/fastcore.py b/psamm/fastcore.py index 66d8bc63..4fca620f 100644 --- a/psamm/fastcore.py +++ b/psamm/fastcore.py @@ -267,6 +267,9 @@ def fastcc_consistent_subset(model, epsilon, solver): model: :class:`MetabolicModel` to solve. epsilon: Flux threshold value. solver: LP solver instance to use. + + Returns: + Set of reaction IDs in the consistent reaction subset. """ reaction_set = set(model.reactions) return reaction_set.difference(fastcc(model, epsilon, solver)) @@ -288,6 +291,9 @@ def fastcore(model, core, epsilon, solver, scaling=1e5, weights={}): weights: Dictionary with reaction IDs as keys and values as weights. Weights specify the cost of adding a reaction to the consistent subnetwork. Default value is 1. + + Returns: + Set of reaction IDs in the consistent reaction subset. """ consistent_subset = set() reaction_set = set(model.reactions) From 17fc8f02b3705c190062a43e1361e84deb16774e Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Thu, 18 May 2017 21:08:32 -0400 Subject: [PATCH 06/51] gapfilling: Fix performance issue when adding reactions The functions for adding transport and exchange reactions would supply the property model.reactions to the function that generates a unique ID. This function expects a set in order to do quick lookup but will also work with other collections that support the `in` operator or even iterators. The performance issue appeared for large models because model.reactions is not a set but an iterator. This resulted in the gap-filling functions spending large amounts of time traversing the same iterator over and over when creating the extended model. This is fixed by first creating a set from the iterator and using the set when trying to generate unique IDs. --- psamm/gapfilling.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/psamm/gapfilling.py b/psamm/gapfilling.py index 8cf8aa0e..6a693f95 100644 --- a/psamm/gapfilling.py +++ b/psamm/gapfilling.py @@ -72,16 +72,18 @@ def add_all_exchange_reactions(model, compartment, allow_duplicates=False): added = set() added_compounds = set() initial_compounds = set(model.compounds) + reactions = set(model.database.reactions) for model_compound in initial_compounds: compound = model_compound.in_compartment(compartment) if compound in added_compounds: continue - rxnid_ex = create_exchange_id(model.database.reactions, compound) + rxnid_ex = create_exchange_id(reactions, compound) reaction_ex = Reaction(Direction.Both, {compound: -1}) if reaction_ex not in all_reactions: model.database.set_reaction(rxnid_ex, reaction_ex) + reactions.add(rxnid_ex) else: rxnid_ex = all_reactions[reaction_ex] @@ -125,6 +127,7 @@ def add_all_transport_reactions(model, boundaries, allow_duplicates=False): added = set() added_pairs = set() initial_compounds = set(model.compounds) + reactions = set(model.database.reactions) for compound in initial_compounds: for c1, c2 in boundary_pairs: compound1 = compound.in_compartment(c1) @@ -133,8 +136,7 @@ def add_all_transport_reactions(model, boundaries, allow_duplicates=False): if pair in added_pairs: continue - rxnid_tp = create_transport_id( - model.database.reactions, compound1, compound2) + rxnid_tp = create_transport_id(reactions, compound1, compound2) reaction_tp = Reaction(Direction.Both, { compound1: -1, @@ -142,6 +144,7 @@ def add_all_transport_reactions(model, boundaries, allow_duplicates=False): }) if reaction_tp not in all_reactions: model.database.set_reaction(rxnid_tp, reaction_tp) + reactions.add(rxnid_tp) else: rxnid_tp = all_reactions[reaction_tp] From 862623d2b90f86ae27430cc1de5b872cbd875eec Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Thu, 18 May 2017 21:13:31 -0400 Subject: [PATCH 07/51] fastgapfill: Fix bug where epsilon was not used --- psamm/fastgapfill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/psamm/fastgapfill.py b/psamm/fastgapfill.py index 33e33060..c96a1deb 100644 --- a/psamm/fastgapfill.py +++ b/psamm/fastgapfill.py @@ -51,7 +51,7 @@ def fastgapfill(model_extended, core, solver, weights={}, epsilon=1e-5): # Run Fastcore and print the induced reaction set logger.info('Calculating Fastcore induced set on model') induced = fastcore( - model_extended, core, epsilon=1e-5, weights=weights, solver=solver) + model_extended, core, epsilon=epsilon, weights=weights, solver=solver) logger.debug('Result: |A| = {}, A = {}'.format(len(induced), induced)) added_reactions = induced - core logger.debug('Extended: |E| = {}, E = {}'.format( From d3a41281f385af3c354f7dec1a341097478b3e2c Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 23 May 2017 12:08:25 -0400 Subject: [PATCH 08/51] native: Provide repr for common classes This makes it easier to work with these objects interactively and improves error messages in some cases. --- psamm/datasource/entry.py | 5 +++++ psamm/datasource/native.py | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/psamm/datasource/entry.py b/psamm/datasource/entry.py index c52137d7..6d8791a9 100644 --- a/psamm/datasource/entry.py +++ b/psamm/datasource/entry.py @@ -17,6 +17,8 @@ """Representation of compound/reaction entries in models.""" +from __future__ import unicode_literals + import abc from collections import Mapping @@ -46,6 +48,9 @@ def properties(self): def filemark(self): """Position of entry in the source file (or None).""" + def __repr__(self): + return str('<{} id={!r}>').format(self.__class__.__name__, self.id) + class CompoundEntry(ModelEntry): """Abstract compound entry.""" diff --git a/psamm/datasource/native.py b/psamm/datasource/native.py index f2bf2c9a..76266edb 100644 --- a/psamm/datasource/native.py +++ b/psamm/datasource/native.py @@ -97,8 +97,11 @@ def yaml_load(stream): class _OrderedEntrySet(object): """Ordered set of entity entries. - This deliberately does not implement Mapping because iteration does not - provide keys (since the keys are already in the entry values). + This is somewhere between a Set and a Mapping but deliberately does not + implement any of those interfaces. Iteration does not provide + (key, value)-items (since the keys are already in the entry values) + but instead provides just values. The usual set operations such as union + and intersection are not provided. """ def __init__(self, mapping={}): self._dict = OrderedDict(mapping) @@ -130,6 +133,10 @@ def discard(self, key): except KeyError: pass + def __repr__(self): + return str('<_OrderedEntrySet {{{}}}>').format( + ', '.join(repr(x) for x in iter(self))) + class ModelReader(object): """Reader of native YAML-based model format. @@ -575,6 +582,9 @@ def _translate_compartments(reaction, compartment): database, model_definition, itervalues(self.exchange), itervalues(self.limits), v_max=self.default_flux_limit) + def __repr__(self): + return str('<{} name={!r}>'.format(self.__class__.__name__, self.name)) + def _check_id(entity, entity_type): """Check whether the ID is valid. From 743f698372779bb9b92a0b5e7a5cde8bcb7262b9 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Wed, 24 May 2017 11:29:17 -0400 Subject: [PATCH 09/51] setup.py: Make excelexport entry point depend on xlsxwriter --- setup.py | 59 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/setup.py b/setup.py index 365d826b..529bf9e9 100755 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ long_description=long_description, - classifiers = [ + classifiers=[ 'Development Status :: 4 - Beta', 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)', 'Programming Language :: Python :: 2.7', @@ -44,34 +44,34 @@ ], packages=find_packages(), - entry_points = { - 'console_scripts': [ - 'psamm-model = psamm.command:main', - 'psamm-sbml-model = psamm.command:main_sbml', - 'psamm-list-lpsolvers = psamm.lpsolver.generic:list_solvers' - ], - 'psamm.commands': [ - 'chargecheck = psamm.commands.chargecheck:ChargeBalanceCommand', - 'console = psamm.commands.console:ConsoleCommand', - 'duplicatescheck = psamm.commands.duplicatescheck:DuplicatesCheck', - 'excelexport = psamm.commands.excelexport:ExcelExportCommand', - 'fastgapfill = psamm.commands.fastgapfill:FastGapFillCommand', - 'fba = psamm.commands.fba:FluxBalanceCommand', - 'fluxcheck = psamm.commands.fluxcheck:FluxConsistencyCommand', - 'fluxcoupling = psamm.commands.fluxcoupling:FluxCouplingCommand', - 'formulacheck = psamm.commands.formulacheck:FormulaBalanceCommand', - 'fva = psamm.commands.fva:FluxVariabilityCommand', - 'gapcheck = psamm.commands.gapcheck:GapCheckCommand', - 'gapfill = psamm.commands.gapfill:GapFillCommand', - 'genedelete = psamm.commands.genedelete:GeneDeletionCommand', - 'masscheck = psamm.commands.masscheck:MassConsistencyCommand', - 'randomsparse = psamm.commands.randomsparse:RandomSparseNetworkCommand', - 'robustness = psamm.commands.robustness:RobustnessCommand', - 'sbmlexport = psamm.commands.sbmlexport:SBMLExport', - 'search = psamm.commands.search:SearchCommand', - 'tableexport = psamm.commands.tableexport:ExportTableCommand', - ] - }, + + entry_points=''' + [console_scripts] + psamm-model = psamm.command:main + psamm-sbml-model = psamm.command:main_sbml + psamm-list-lpsolvers = psamm.lpsolver.generic:list_solvers + + [psamm.commands] + chargecheck = psamm.commands.chargecheck:ChargeBalanceCommand + console = psamm.commands.console:ConsoleCommand + duplicatescheck = psamm.commands.duplicatescheck:DuplicatesCheck + excelexport = psamm.commands.excelexport:ExcelExportCommand [excel] + fastgapfill = psamm.commands.fastgapfill:FastGapFillCommand + fba = psamm.commands.fba:FluxBalanceCommand + fluxcheck = psamm.commands.fluxcheck:FluxConsistencyCommand + fluxcoupling = psamm.commands.fluxcoupling:FluxCouplingCommand + formulacheck = psamm.commands.formulacheck:FormulaBalanceCommand + fva = psamm.commands.fva:FluxVariabilityCommand + gapcheck = psamm.commands.gapcheck:GapCheckCommand + gapfill = psamm.commands.gapfill:GapFillCommand + genedelete = psamm.commands.genedelete:GeneDeletionCommand + masscheck = psamm.commands.masscheck:MassConsistencyCommand + randomsparse = psamm.commands.randomsparse:RandomSparseNetworkCommand + robustness = psamm.commands.robustness:RobustnessCommand + sbmlexport = psamm.commands.sbmlexport:SBMLExport + search = psamm.commands.search:SearchCommand + tableexport = psamm.commands.tableexport:ExportTableCommand + ''', test_suite='psamm.tests', @@ -81,6 +81,7 @@ ], extras_require={ 'docs': ['sphinx', 'sphinx_rtd_theme', 'mock'], + 'excel': ['xlsxwriter'], ':python_version=="2.7"': ['enum34'], ':python_version=="3.3"': ['enum34'] }) From 7db2584b69a291e65971543ece1cc48334343a59 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 23 May 2017 19:13:30 -0400 Subject: [PATCH 10/51] sbml: Use cElementTree if possible It was also considered whether lxml could be used. lxml would have been able to supply additional information, such as line numbers of element definitions. However, lxml has compatibility issues with the way namespaces are handled in the sbml module currently. The tests that rely on inline XML data were modified. On Python 2.7, cElementTree only works with a file that reads bytes. StringIO returns unicode if initialized with a unicode string. This results in cElementTree failing if a unicode string is supplied in test cases (or if unicode_literals is set). Instead use BytesIO with a string explicitly encoded as UTF-8. This works in all cases. --- psamm/datasource/sbml.py | 21 +++++++++++++++++---- psamm/tests/test_command.py | 6 +++--- psamm/tests/test_datasource_sbml.py | 24 +++++++++++++----------- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index 8f7df301..dba424dc 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -19,7 +19,6 @@ from __future__ import unicode_literals -import xml.etree.ElementTree as ET from decimal import Decimal from fractions import Fraction from functools import partial @@ -27,6 +26,13 @@ import re import json +# Import ElementTree XML parser. The lxml etree implementation may also be +# used with SBMLReader but has compatiblity issues with SBMLWriter. +try: + import xml.etree.cElementTree as ET +except ImportError: + import xml.etree.ElementTree as ET + from six import itervalues, iteritems, text_type, PY3 from .context import FileMark @@ -640,7 +646,8 @@ def __init__(self, file, strict=False, ignore_boundary=True, self._sbml_tag('listOfCompartments')) for compartment in self._compartments.iterfind( self._sbml_tag('compartment')): - filemark = FileMark(self._context, None, None) + filemark = FileMark( + self._context, self._get_sourceline(compartment), None) entry = CompartmentEntry(self, compartment, filemark=filemark) self._model_compartments[entry.id] = entry @@ -648,7 +655,8 @@ def __init__(self, file, strict=False, ignore_boundary=True, self._model_species = {} self._species = self._model.find(self._sbml_tag('listOfSpecies')) for species in self._species.iterfind(self._sbml_tag('species')): - filemark = FileMark(self._context, None, None) + filemark = FileMark( + self._context, self._get_sourceline(species), None) entry = SpeciesEntry(self, species, filemark=filemark) self._model_species[entry.id] = entry @@ -656,7 +664,8 @@ def __init__(self, file, strict=False, ignore_boundary=True, self._model_reactions = {} self._reactions = self._model.find(self._sbml_tag('listOfReactions')) for reaction in self._reactions.iterfind(self._sbml_tag('reaction')): - filemark = FileMark(self._context, None, None) + filemark = FileMark( + self._context, self._get_sourceline(reaction), None) entry = ReactionEntry(self, reaction, filemark=filemark) self._model_reactions[entry.id] = entry @@ -700,6 +709,10 @@ def __init__(self, file, strict=False, ignore_boundary=True, self._active_objective = self._model_objectives[active] + def _get_sourceline(self, element): + """Get source line of element (only supported by lxml).""" + return getattr(element, 'sourceline', None) + def get_compartment(self, compartment_id): """Return :class:`.CompartmentEntry` corresponding to id.""" return self._model_compartments[compartment_id] diff --git a/psamm/tests/test_command.py b/psamm/tests/test_command.py index fb8c342e..ed587c25 100644 --- a/psamm/tests/test_command.py +++ b/psamm/tests/test_command.py @@ -20,7 +20,6 @@ import sys import os import argparse -import codecs import shutil import tempfile from contextlib import contextmanager @@ -103,6 +102,7 @@ class MockSolverCommand(SolverCommandMixin, Command): """ def run(self): solver = self._get_solver() + print(solver) class BaseCommandTest(object): @@ -579,7 +579,7 @@ def test_command_argument_error(self): class TestSBMLCommandMain(unittest.TestCase, BaseCommandTest): def setUp(self): - doc = StringIO(''' + doc = BytesIO(''' -''') +'''.encode('utf-8')) reader = sbml.SBMLReader(doc) self._model = reader.create_model() diff --git a/psamm/tests/test_datasource_sbml.py b/psamm/tests/test_datasource_sbml.py index 82bb0b82..b3430325 100644 --- a/psamm/tests/test_datasource_sbml.py +++ b/psamm/tests/test_datasource_sbml.py @@ -16,6 +16,8 @@ # # Copyright 2014-2017 Jon Lund Steffensen +from __future__ import unicode_literals + import unittest from psamm.datasource import sbml @@ -23,14 +25,14 @@ from decimal import Decimal from fractions import Fraction -from six import StringIO +from six import BytesIO class TestSBMLDatabaseL1V2(unittest.TestCase): """Test parsing of a simple level 1 version 2 SBML file""" def setUp(self): - self.doc = StringIO(''' + self.doc = BytesIO(''' @@ -70,7 +72,7 @@ def setUp(self): -''') +'''.encode('utf-8')) def test_model_name(self): reader = sbml.SBMLReader(self.doc) @@ -169,7 +171,7 @@ class TestSBMLDatabaseL2V5(unittest.TestCase): """Test parsing of a simple level 2 version 5 SBML file""" def setUp(self): - self.doc = StringIO(''' + self.doc = BytesIO(''' @@ -209,7 +211,7 @@ def setUp(self): -''') +'''.encode('utf-8')) def test_model_name(self): reader = sbml.SBMLReader(self.doc) @@ -309,7 +311,7 @@ class TestSBMLDatabaseL3V1(unittest.TestCase): """Test parsing of a simple level 3 version 1 SBML file""" def setUp(self): - self.doc = StringIO(''' + self.doc = BytesIO(''' @@ -349,7 +351,7 @@ def setUp(self): -''') +'''.encode('utf-8')) def test_model_name(self): reader = sbml.SBMLReader(self.doc) @@ -446,7 +448,7 @@ class TestSBMLDatabaseL3V1WithFBCV1(unittest.TestCase): """Test parsing of a level 3 version 1 SBML file with FBC version 1""" def setUp(self): - self.doc = StringIO(''' + self.doc = BytesIO(''' -''') +'''.encode('utf-8')) def test_model_name(self): reader = sbml.SBMLReader(self.doc) @@ -616,7 +618,7 @@ class TestSBMLDatabaseL3V1WithFBCV2(unittest.TestCase): """Test parsing of a level 3 version 1 SBML file with FBC version 2""" def setUp(self): - self.doc = StringIO(''' + self.doc = BytesIO(''' -''') +'''.encode('utf-8')) def test_model_name(self): reader = sbml.SBMLReader(self.doc) From b89c6f8dc6b110d813d83585e2fc49e401024795 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 23 May 2017 19:15:55 -0400 Subject: [PATCH 11/51] sbml: Add SBML prefix to entity classes This makes error messages and repr output less confusing by making it clear that these are SBML specific objects. --- psamm/datasource/sbml.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index dba424dc..b8083689 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -111,11 +111,11 @@ def xml_notes(self): return self._root.find(self._reader._sbml_tag('notes')) -class SpeciesEntry(_SBMLEntry, BaseCompoundEntry): +class SBMLSpeciesEntry(_SBMLEntry, BaseCompoundEntry): """Species entry in the SBML file""" def __init__(self, reader, root, filemark=None): - super(SpeciesEntry, self).__init__(reader, root) + super(SBMLSpeciesEntry, self).__init__(reader, root) self._name = root.get('name') self._comp = root.get('compartment') @@ -215,11 +215,11 @@ def filemark(self): return self._filemark -class ReactionEntry(_SBMLEntry, BaseReactionEntry): +class SBMLReactionEntry(_SBMLEntry, BaseReactionEntry): """Reaction entry in SBML file""" def __init__(self, reader, root, filemark=None): - super(ReactionEntry, self).__init__(reader, root) + super(SBMLReactionEntry, self).__init__(reader, root) self._name = self._root.get('name') self._rev = self._root.get('reversible', 'true') == 'true' @@ -391,11 +391,11 @@ def filemark(self): return self._filemark -class CompartmentEntry(_SBMLEntry, BaseCompartmentEntry): +class SBMLCompartmentEntry(_SBMLEntry, BaseCompartmentEntry): """Compartment entry in the SBML file""" def __init__(self, reader, root, filemark=None): - super(CompartmentEntry, self).__init__(reader, root) + super(SBMLCompartmentEntry, self).__init__(reader, root) self._name = self._root.get('name') self._filemark = filemark @@ -414,7 +414,7 @@ def filemark(self): return self._filemark -class ObjectiveEntry(object): +class SBMLObjectiveEntry(object): """Flux objective defined with FBC""" def __init__(self, reader, namespace, root): @@ -464,7 +464,7 @@ def reactions(self): return iteritems(self._reactions) -class FluxBoundEntry(object): +class SBMLFluxBoundEntry(object): """Flux bound defined with FBC V1. Flux bounds defined with FBC V2 are instead encoded as ``upper_flux`` and @@ -632,7 +632,7 @@ def __init__(self, file, strict=False, ignore_boundary=True, if flux_bounds is not None: for flux_bound in flux_bounds.iterfind( _tag('fluxBound', FBC_V1)): - entry = FluxBoundEntry(self, FBC_V1, flux_bound) + entry = SBMLFluxBoundEntry(self, FBC_V1, flux_bound) self._flux_bounds.append(entry) # Create reference from reaction to flux bound @@ -648,7 +648,7 @@ def __init__(self, file, strict=False, ignore_boundary=True, self._sbml_tag('compartment')): filemark = FileMark( self._context, self._get_sourceline(compartment), None) - entry = CompartmentEntry(self, compartment, filemark=filemark) + entry = SBMLCompartmentEntry(self, compartment, filemark=filemark) self._model_compartments[entry.id] = entry # Species @@ -657,7 +657,7 @@ def __init__(self, file, strict=False, ignore_boundary=True, for species in self._species.iterfind(self._sbml_tag('species')): filemark = FileMark( self._context, self._get_sourceline(species), None) - entry = SpeciesEntry(self, species, filemark=filemark) + entry = SBMLSpeciesEntry(self, species, filemark=filemark) self._model_species[entry.id] = entry # Reactions @@ -666,7 +666,7 @@ def __init__(self, file, strict=False, ignore_boundary=True, for reaction in self._reactions.iterfind(self._sbml_tag('reaction')): filemark = FileMark( self._context, self._get_sourceline(reaction), None) - entry = ReactionEntry(self, reaction, filemark=filemark) + entry = SBMLReactionEntry(self, reaction, filemark=filemark) self._model_reactions[entry.id] = entry if self._ignore_boundary: @@ -699,7 +699,7 @@ def __init__(self, file, strict=False, ignore_boundary=True, if objectives is not None: for objective in objectives.iterfind( _tag('objective', fbc_ns)): - entry = ObjectiveEntry(self, fbc_ns, objective) + entry = SBMLObjectiveEntry(self, fbc_ns, objective) self._model_objectives[entry.id] = entry active = objectives.get(_tag('activeObjective', fbc_ns)) @@ -823,19 +823,19 @@ def create_model(self): if reaction in model.limits: continue - if bounds.operation == FluxBoundEntry.LESS_EQUAL: + if bounds.operation == SBMLFluxBoundEntry.LESS_EQUAL: if reaction not in limits_upper: limits_upper[reaction] = bounds.value else: raise ParseError( 'Conflicting bounds for {}'.format(reaction)) - elif bounds.operation == FluxBoundEntry.GREATER_EQUAL: + elif bounds.operation == SBMLFluxBoundEntry.GREATER_EQUAL: if reaction not in limits_lower: limits_lower[reaction] = bounds.value else: raise ParseError( 'Conflicting bounds for {}'.format(reaction)) - elif bounds.operation == FluxBoundEntry.EQUAL: + elif bounds.operation == SBMLFluxBoundEntry.EQUAL: if (reaction not in limits_lower and reaction not in limits_upper): limits_lower[reaction] = bounds.value From db4fe7a324214767393e578ec2a7d4c8ca2dd264 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 23 May 2017 19:14:38 -0400 Subject: [PATCH 12/51] sbml: Allow access to annotations --- psamm/datasource/sbml.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index dba424dc..8fa498bf 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -81,7 +81,7 @@ class ParseError(Exception): class _SBMLEntry(object): - """Base class for compound and reaction entries""" + """Base class for compound and reaction entries.""" def __init__(self, reader, root): self._reader = reader @@ -107,9 +107,14 @@ def id(self): @property def xml_notes(self): - """Access the entity notes as an XML document fragment""" + """Access the entity notes as an XML document fragment.""" return self._root.find(self._reader._sbml_tag('notes')) + @property + def xml_annotation(self): + """Access the entity annotation as an XML document fragment.""" + return self._root.find(self._reader._sbml_tag('annotation')) + class SpeciesEntry(_SBMLEntry, BaseCompoundEntry): """Species entry in the SBML file""" From ce9168c87fe6cc1a898bef8494390b5819c2a6b2 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Fri, 26 May 2017 14:35:11 -0400 Subject: [PATCH 13/51] sbml: Add tests for xml_notes/xml_annotations --- psamm/tests/test_datasource_sbml.py | 40 +++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/psamm/tests/test_datasource_sbml.py b/psamm/tests/test_datasource_sbml.py index b3430325..5615415b 100644 --- a/psamm/tests/test_datasource_sbml.py +++ b/psamm/tests/test_datasource_sbml.py @@ -625,12 +625,23 @@ def setUp(self): level="3" version="1" fbc:required="false"> + + +

This is model information intended to be seen by humans.

+ +
- + + + +

This is compound information intended to be seen by humans.

+ +
+
@@ -642,7 +653,7 @@ def setUp(self): - + @@ -651,6 +662,23 @@ def setUp(self): + + +

This is reaction information intended to be seen by humans.

+ +
+ + + + + +
  • + + + + + @@ -696,6 +724,7 @@ def test_compounds_exist(self): self.assertFalse(species['M_Glucose'].boundary) self.assertEqual(species['M_Glucose'].formula, 'C6H12O6') self.assertEqual(species['M_Glucose'].charge, 0) + self.assertIsNotNone(species['M_Glucose'].xml_notes) self.assertEqual(species['M_Glucose_6_P'].id, 'M_Glucose_6_P') self.assertEqual(species['M_Glucose_6_P'].name, 'Glucose-6-P') @@ -703,6 +732,7 @@ def test_compounds_exist(self): self.assertFalse(species['M_Glucose_6_P'].boundary) self.assertEqual(species['M_Glucose_6_P'].formula, 'C6H11O9P') self.assertEqual(species['M_Glucose_6_P'].charge, -2) + self.assertIsNone(species['M_Glucose_6_P'].xml_notes) self.assertEqual(species['M_H2O'].id, 'M_H2O') self.assertEqual(species['M_H2O'].name, 'H2O') @@ -733,6 +763,9 @@ def test_g6pase_reaction_exists(self): self.assertEqual(reaction.properties['lower_flux'], -10) self.assertEqual(reaction.properties['upper_flux'], 1000) + self.assertIsNotNone(reaction.xml_notes) + self.assertIsNotNone(reaction.xml_annotation) + def test_biomass_reaction_exists(self): reader = sbml.SBMLReader(self.doc, ignore_boundary=False) reaction = reader.get_reaction('R_Biomass') @@ -748,6 +781,9 @@ def test_biomass_reaction_exists(self): self.assertEqual(reaction.properties['lower_flux'], 0) self.assertEqual(reaction.properties['upper_flux'], 1000) + self.assertIsNone(reaction.xml_notes) + self.assertIsNone(reaction.xml_annotation) + def test_objective_exists(self): reader = sbml.SBMLReader(self.doc) objectives = {entry.id: entry for entry in reader.objectives} From c0b60b552ecb15d7f7d4bcc0c5e659f1caa2e061 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 23 May 2017 19:11:21 -0400 Subject: [PATCH 14/51] entry: Expand docs and id override --- psamm/datasource/entry.py | 84 ++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 14 deletions(-) diff --git a/psamm/datasource/entry.py b/psamm/datasource/entry.py index 6d8791a9..328a046b 100644 --- a/psamm/datasource/entry.py +++ b/psamm/datasource/entry.py @@ -27,7 +27,20 @@ @add_metaclass(abc.ABCMeta) class ModelEntry(object): - """Abstract model entry.""" + """Abstract model entry. + + Provdides a base class for model entries which are representations of + any entity (such as compound, reaction or compartment) in a model. An + entity has an ID, and may have a name and filemark. The ID is a unique + string identified within a model. The name is a string identifier for + human consumption. The filemark indicates where the entry originates from + (e.g. file name and line number). Any additional properties for an + entity exist in ``properties`` which is any dict-like object mapping + from string keys to any value type. The ``name`` entry in the dictionary + corresponds to the name. Entries can be mutable, where the + properties can be modified, or immutable, where the properties cannot be + modified or where modifications are ignored. The ID is always immutable. + """ @abc.abstractproperty def id(self): """Identifier of entry.""" @@ -41,7 +54,10 @@ def name(self): def properties(self): """Properties of entry as a :class:`Mapping` subclass (e.g. dict). - Note that the properties are not generally mutable. + Note that the properties are not generally mutable but may be mutable + for specific subclasses. If the ``id`` exists in this dictionary, it + must never change the actual entry ID as obtained from the ``id`` + property, even if other properties are mutable. """ @abc.abstractproperty @@ -53,7 +69,11 @@ def __repr__(self): class CompoundEntry(ModelEntry): - """Abstract compound entry.""" + """Abstract compound entry. + + Entry subclass for representing compounds. This standardizes the properties + ``formula`` and ``charge``. + """ @property def formula(self): """Chemical formula of compound.""" @@ -66,7 +86,11 @@ def charge(self): class ReactionEntry(ModelEntry): - """Abstract reaction entry.""" + """Abstract reaction entry. + + Entry subclass for representing compounds. This standardizes the properties + ``equation`` and ``genes``. + """ @property def equation(self): """Reaction equation.""" @@ -79,21 +103,33 @@ def genes(self): class CompartmentEntry(ModelEntry): - """Abstract compartment entry.""" + """Abstract compartment entry. + + Entry subclass for representing compartments. + """ class _BaseDictEntry(ModelEntry): - """Base class for concrete entries based on dictionary.""" - def __init__(self, abstract_type, properties={}, filemark=None): + """Base class for concrete entries based on dictionary. + + The properties are mutable for this subclass. If ``id`` is None, the + value corresponding to the ``id`` key in the dictionary is used. If this is + not defined, a :class:`ValueError` is raised. + """ + def __init__(self, abstract_type, properties={}, filemark=None, id=None): if isinstance(properties, abstract_type): - self._id = properties.id + self._id = id + if self._id is None: + self._id = properties.id self._properties = dict(properties.properties) if filemark is None: filemark = properties.filemark elif isinstance(properties, Mapping): - if 'id' not in properties: - raise ValueError('id not defined in properties') - self._id = properties['id'] + self._id = id + if self._id is None: + if 'id' not in properties: + raise ValueError('id not defined in properties') + self._id = properties['id'] self._properties = dict(properties) else: raise ValueError('Invalid type of properties object') @@ -104,6 +140,10 @@ def __init__(self, abstract_type, properties={}, filemark=None): def id(self): return self._id + @ModelEntry.name.setter + def name(self, value): + self._properties['name'] = value + @property def properties(self): return self._properties @@ -116,7 +156,7 @@ def filemark(self): class DictCompoundEntry(CompoundEntry, _BaseDictEntry): """Compound entry backed by dictionary. - The given properties dictionary must contain a key 'id' with the + The given properties dictionary must contain a key ``id`` with the identifier. Args: @@ -126,11 +166,19 @@ class DictCompoundEntry(CompoundEntry, _BaseDictEntry): def __init__(self, *args, **kwargs): super(DictCompoundEntry, self).__init__(CompoundEntry, *args, **kwargs) + @CompoundEntry.formula.setter + def formula(self, value): + self._properties['formula'] = value + + @CompoundEntry.charge.setter + def charge(self, value): + self._properties['charge'] = value + class DictReactionEntry(ReactionEntry, _BaseDictEntry): """Reaction entry backed by dictionary. - The given properties dictionary must contain a key 'id' with the + The given properties dictionary must contain a key ``id`` with the identifier. Args: @@ -140,11 +188,19 @@ class DictReactionEntry(ReactionEntry, _BaseDictEntry): def __init__(self, *args, **kwargs): super(DictReactionEntry, self).__init__(ReactionEntry, *args, **kwargs) + @ReactionEntry.equation.setter + def equation(self, value): + self._properties['equation'] = value + + @ReactionEntry.genes.setter + def genes(self, value): + self._properties['genes'] = value + class DictCompartmentEntry(CompartmentEntry, _BaseDictEntry): """Compartment entry backed by dictionary. - The given properties dictionary must contain a key 'id' with the + The given properties dictionary must contain a key ``id`` with the identifier. Args: From 800aca6258d28b3ef7ad1309c001cb4e43cff7f3 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 11:32:38 -0400 Subject: [PATCH 15/51] entry: Add tests for id override/setters --- psamm/tests/test_datasource_entry.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/psamm/tests/test_datasource_entry.py b/psamm/tests/test_datasource_entry.py index b210fa18..7570e7a4 100644 --- a/psamm/tests/test_datasource_entry.py +++ b/psamm/tests/test_datasource_entry.py @@ -56,6 +56,23 @@ def test_create_compound_dict_entry_without_id(self): with self.assertRaises(ValueError): e = entry.DictCompoundEntry(props) + def test_create_compound_dict_entry_with_id_override(self): + props = { + 'name': 'Compound 1', + 'formula': 'CO2' + } + e = entry.DictCompoundEntry(props, id='new_id') + + def test_use_compound_dict_entry_setters(self): + e = entry.DictCompoundEntry({}, id='new_id') + e.formula = 'CO2' + e.name = 'Compound 1' + e.charge = 5 + self.assertEqual(e.formula, 'CO2') + self.assertEqual(e.properties['formula'], 'CO2') + self.assertEqual(e.name, 'Compound 1') + self.assertEqual(e.charge, 5) + def test_create_reaction_entry(self): props = { 'id': 'reaction_1', @@ -74,6 +91,16 @@ def test_create_reaction_entry_from_compound_entry(self): with self.assertRaises(ValueError): e2 = entry.DictReactionEntry(e) + def test_use_reaction_dict_entry_setters(self): + e = entry.DictReactionEntry({}, id='reaction_1') + e.name = 'Reaction 1' + e.equation = 'A => B' + e.genes = 'gene_1 and gene_2' + self.assertEqual(e.name, 'Reaction 1') + self.assertEqual(e.equation, 'A => B') + self.assertEqual(e.genes, 'gene_1 and gene_2') + self.assertEqual(e.properties['genes'], 'gene_1 and gene_2') + def test_create_compartment_entry(self): props = { 'id': 'c', From f49bdb9742aa54ee994f14080579efd94034a25d Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 23 May 2017 19:11:44 -0400 Subject: [PATCH 16/51] native: Allow clear/update of entry sets --- psamm/datasource/native.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/psamm/datasource/native.py b/psamm/datasource/native.py index 76266edb..ce411918 100644 --- a/psamm/datasource/native.py +++ b/psamm/datasource/native.py @@ -133,6 +133,13 @@ def discard(self, key): except KeyError: pass + def clear(self): + self._dict.clear() + + def update(self, it): + for entry in it: + self.add_entry(entry) + def __repr__(self): return str('<_OrderedEntrySet {{{}}}>').format( ', '.join(repr(x) for x in iter(self))) From 96214939320edca1bcc513c28ae969a6bf31e200 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 12:09:31 -0400 Subject: [PATCH 17/51] native: Add tests of _OrderedEntrySet --- psamm/tests/test_datasource_native.py | 82 +++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/psamm/tests/test_datasource_native.py b/psamm/tests/test_datasource_native.py index e7a72beb..b4f20156 100644 --- a/psamm/tests/test_datasource_native.py +++ b/psamm/tests/test_datasource_native.py @@ -32,6 +32,88 @@ import yaml +class MockEntry(object): + """Entry object for testing OrderedEntrySet.""" + def __init__(self, id): + self._id = id + + @property + def id(self): + return self._id + + +class TestOrderedEntrySet(unittest.TestCase): + def test_init_from_dict(self): + entries = { + 'test_1': MockEntry('test_1'), + 'test_2': MockEntry('test_2') + } + entry_set = native._OrderedEntrySet(entries) + self.assertEqual(entry_set['test_1'], entries['test_1']) + self.assertEqual(entry_set['test_2'], entries['test_2']) + self.assertEqual(len(entry_set), len(entries)) + + def test_add_and_contains(self): + entry_set = native._OrderedEntrySet() + entry_set.add_entry(MockEntry('def')) + entry_set.add_entry(MockEntry('abc')) + entry_set.add_entry(MockEntry('ghi')) + + self.assertIn('def', entry_set) + self.assertIn('ghi', entry_set) + self.assertNotIn('klm', entry_set) + self.assertEqual(len(entry_set), 3) + + # Check that it is ordered + self.assertEqual( + [entry.id for entry in entry_set], ['def', 'abc', 'ghi']) + + def test_get(self): + entries = { + 'test_1': MockEntry('test_1'), + 'test_2': MockEntry('test_2') + } + entry_set = native._OrderedEntrySet(entries) + self.assertEqual(entry_set.get('test_1'), entries['test_1']) + self.assertIsNone(entry_set.get('test_3')) + self.assertEqual(entry_set.get('test_3', 5), 5) + + def test_discard(self): + entries = { + 'test_1': MockEntry('test_1'), + 'test_2': MockEntry('test_2') + } + entry_set = native._OrderedEntrySet(entries) + entry_set.discard('test_1') + self.assertNotIn('test_1', entry_set) + + def test_update(self): + entries = { + 'test_1': MockEntry('test_1'), + 'test_2': MockEntry('test_2') + } + entry_set = native._OrderedEntrySet(entries) + entry_set.update([ + MockEntry('test_2'), + MockEntry('test_3') + ]) + + self.assertEqual(len(entry_set), 3) + self.assertEqual(entry_set['test_1'], entries['test_1']) + self.assertNotEqual(entry_set['test_2'], entries['test_2']) + + def test_clear(self): + entries = { + 'test_1': MockEntry('test_1'), + 'test_2': MockEntry('test_2') + } + entry_set = native._OrderedEntrySet(entries) + entry_set.clear() + + self.assertEqual(len(entry_set), 0) + self.assertNotIn('test_1', entry_set) + + class TestYAMLDataSource(unittest.TestCase): def test_parse_compartments(self): model_dict = { From 37af1ad1c51ef7c499764180ac9fcc7dfa956fc8 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 15:50:18 -0400 Subject: [PATCH 18/51] entry: Fix id override in properties When overriding ID in the entry constructors, the properties dict would still be set to the old ID. This is now fixed. --- psamm/datasource/entry.py | 1 + psamm/tests/test_datasource_entry.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/psamm/datasource/entry.py b/psamm/datasource/entry.py index 328a046b..82a38ea1 100644 --- a/psamm/datasource/entry.py +++ b/psamm/datasource/entry.py @@ -134,6 +134,7 @@ def __init__(self, abstract_type, properties={}, filemark=None, id=None): else: raise ValueError('Invalid type of properties object') + self._properties['id'] = self._id self._filemark = filemark @property diff --git a/psamm/tests/test_datasource_entry.py b/psamm/tests/test_datasource_entry.py index 7570e7a4..65a1003b 100644 --- a/psamm/tests/test_datasource_entry.py +++ b/psamm/tests/test_datasource_entry.py @@ -58,10 +58,13 @@ def test_create_compound_dict_entry_without_id(self): def test_create_compound_dict_entry_with_id_override(self): props = { + 'id': 'old_id', 'name': 'Compound 1', 'formula': 'CO2' } e = entry.DictCompoundEntry(props, id='new_id') + self.assertEqual(e.id, 'new_id') + self.assertEqual(e.properties['id'], 'new_id') def test_use_compound_dict_entry_setters(self): e = entry.DictCompoundEntry({}, id='new_id') From a94a1a6ef46580f0842d28e194db50a5989ecc15 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 13:09:48 -0400 Subject: [PATCH 19/51] native: Allow setting NativeModel properties --- psamm/datasource/native.py | 24 ++++++++++++++++++++++++ psamm/tests/test_datasource_native.py | 17 +++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/psamm/datasource/native.py b/psamm/datasource/native.py index ce411918..085830b5 100644 --- a/psamm/datasource/native.py +++ b/psamm/datasource/native.py @@ -460,31 +460,55 @@ def name(self): """Return model name property.""" return self._properties.get('name') + @name.setter + def name(self, value): + self._properties['name'] = value + @property def version_string(self): """Return model version string.""" return self._properties.get('version_string') + @version_string.setter + def version_string(self, value): + self._properties['version_string'] = value + @property def biomass_reaction(self): """Return biomass reaction property.""" return self._properties.get('biomass') + @biomass_reaction.setter + def biomass_reaction(self, value): + self._properties['biomass'] = value + @property def extracellular_compartment(self): """Return extracellular compartment property.""" return self._properties.get('extracellular') + @extracellular_compartment.setter + def extracellular_compartment(self, value): + self._properties['extracellular'] = value + @property def default_compartment(self): """Return default compartment property.""" return self._properties.get('default_compartment') + @default_compartment.setter + def default_compartment(self, value): + self._properties['default_compartment'] = value + @property def default_flux_limit(self): """Return default flux limit property.""" return self._properties.get('default_flux_limit') + @default_flux_limit.setter + def default_flux_limit(self, value): + self._properties['default_flux_limit'] = value + @property def compartments(self): """Return compartments entry set.""" diff --git a/psamm/tests/test_datasource_native.py b/psamm/tests/test_datasource_native.py index b4f20156..a5cbe09d 100644 --- a/psamm/tests/test_datasource_native.py +++ b/psamm/tests/test_datasource_native.py @@ -680,6 +680,23 @@ def test_check_id_success(self): native._check_id(u'\u222b', 'Compound') +class TestNativeModel(unittest.TestCase): + def test_properties(self): + model = native.NativeModel() + model.name = 'Test model' + model.version_string = '1.0' + model.biomass_reaction = 'rxn_1' + model.extracellular_compartment = 'e' + model.default_compartment = 'c' + model.default_flux_limit = 1000 + self.assertEqual(model.name, 'Test model') + self.assertEqual(model.version_string, '1.0') + self.assertEqual(model.biomass_reaction, 'rxn_1') + self.assertEqual(model.extracellular_compartment, 'e') + self.assertEqual(model.default_compartment, 'c') + self.assertEqual(model.default_flux_limit, 1000) + + class TestNativeModelWriter(unittest.TestCase): def setUp(self): self.writer = native.ModelWriter() From b1932139fcfcea290761d9631cac88ac3c39baff Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 23 May 2017 19:16:50 -0400 Subject: [PATCH 20/51] sbml: Add conversion functions --- psamm/datasource/sbml.py | 406 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 405 insertions(+), 1 deletion(-) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index 3b634367..74def378 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -38,7 +38,9 @@ from .context import FileMark from .entry import (CompoundEntry as BaseCompoundEntry, ReactionEntry as BaseReactionEntry, - CompartmentEntry as BaseCompartmentEntry) + CompartmentEntry as BaseCompartmentEntry, + DictReactionEntry, DictCompoundEntry, + DictCompartmentEntry) from .native import NativeModel from ..reaction import Reaction, Compound, Direction from ..metabolicmodel import create_exchange_id @@ -69,6 +71,21 @@ FBC_V2 = 'http://www.sbml.org/sbml/level3/version1/fbc/version2' +# COBRA ID mappings +_COBRA_DECODE_ESCAPES = { + '_DASH_': '-', + '_FSLASH_': '/', + '_BSLASH_': '\\', + '_LPAREN_': '(', + '_RPAREN_': ')', + '_LSQBKT_': '[', + '_RSQBKT_': ']', + '_COMMA_': ',', + '_PERIOD_': '.', + '_APOS_': "'" +} + + def _tag(tag, namespace=None): """Prepend namespace to tag name""" if namespace is None: @@ -1241,3 +1258,390 @@ def write_model(self, file, model, pretty=False): if PY3: write_options['encoding'] = 'unicode' tree.write(file, **write_options) + + +def convert_sbml_model(model): + """Convert raw SBML model to extended model. + + Args: + model: :class:`NativeModel` obtained from :class:`SBMLReader`. + """ + biomass_reactions = set() + for reaction in model.reactions: + # Extract limits + if reaction.id not in model.limits: + lower, upper = parse_flux_bounds(reaction) + if lower is not None or upper is not None: + model.limits[reaction.id] = reaction.id, lower, upper + + # Detect objective + objective = parse_objective_coefficient(reaction) + if objective is not None and objective != 0: + biomass_reactions.add(reaction.id) + + if len(biomass_reactions) == 1: + model.biomass_reaction = next(iter(biomass_reactions)) + + # Convert model to mutable entries + convert_model_entries(model) + + +def entry_id_from_cobra_encoding(cobra_id): + """Convert COBRA-encoded ID string to decoded ID string.""" + for escape, symbol in iteritems(_COBRA_DECODE_ESCAPES): + cobra_id = cobra_id.replace(escape, symbol) + return cobra_id + + +def create_convert_sbml_id_function( + compartment_prefix='C_', reaction_prefix='R_', + compound_prefix='M_', decode_id=entry_id_from_cobra_encoding): + """Create function for converting SBML IDs. + + The returned function will strip prefixes, decode the ID using the provided + function. These prefixes are common on IDs in SBML models because the IDs + live in a global namespace. + """ + def convert_sbml_id(entry): + if isinstance(entry, BaseCompartmentEntry): + prefix = compartment_prefix + elif isinstance(entry, BaseReactionEntry): + prefix = reaction_prefix + elif isinstance(entry, BaseCompoundEntry): + prefix = compound_prefix + + new_id = entry.id + if decode_id is not None: + new_id = decode_id(new_id) + if prefix is not None and new_id.startswith(prefix): + new_id = new_id[len(prefix):] + + return new_id + + return convert_sbml_id + + +def translate_sbml_compartment(entry, new_id): + """Translate SBML compartment entry.""" + return DictCompartmentEntry(entry, id=new_id) + + +def translate_sbml_reaction(entry, new_id, compartment_map, compound_map): + """Translate SBML reaction entry.""" + new_entry = DictReactionEntry(entry, id=new_id) + + # Convert compound IDs in reaction equation + if new_entry.equation is not None: + compounds = [] + for compound, value in new_entry.equation.compounds: + # Translate compartment to new ID, if available. + compartment = compartment_map.get( + compound.compartment, compound.compartment) + new_compound = compound.translate( + lambda name: compound_map.get(name, name)).in_compartment( + compartment) + compounds.append((new_compound, value)) + + new_entry.equation = Reaction( + new_entry.equation.direction, compounds) + + # Get XHTML notes properties + for key, value in iteritems(parse_xhtml_reaction_notes(entry)): + if key not in new_entry.properties: + new_entry.properties[key] = value + + return new_entry + + +def translate_sbml_compound(entry, new_id, compartment_map): + """Translate SBML compound entry.""" + new_entry = DictCompoundEntry(entry, id=new_id) + + if 'compartment' in new_entry.properties: + old_compartment = new_entry.properties['compartment'] + new_entry.properties['compartment'] = compartment_map.get( + old_compartment, old_compartment) + + # Get XHTML notes properties + for key, value in iteritems(parse_xhtml_species_notes(entry)): + if key not in new_entry.properties: + new_entry.properties[key] = value + + return new_entry + + +def convert_model_entries( + model, convert_id=create_convert_sbml_id_function(), + create_unique_id=None, + translate_compartment=translate_sbml_compartment, + translate_reaction=translate_sbml_reaction, + translate_compound=translate_sbml_compound): + """Convert and decode model entries. + + Model entries are converted to new entries using the translate functions + and IDs are converted using the given coversion function. If ID conversion + would create a clash of IDs, the ``create_unique_id`` function is called + with a container of current IDs and the base ID to generate a unique ID + from. The translation functions take an existing entry and the new ID. + + All references within the model are updated to use new IDs: compartment + boundaries, limits, exchange, model, biomass reaction, etc. + + Args: + model: :class:`NativeModel`. + """ + compartment_map = {} + compound_map = {} + reaction_map = {} + + def find_new_ids(entries, id_map, type_name): + """Create new IDs for entries.""" + new_ids = set() + for entry in entries: + new_id = convert_id(entry) + if new_id in new_ids: + if create_unique_id is not None: + new_id = create_unique_id(new_ids, new_id) + else: + raise ValueError( + 'Entity ID {!r} is not unique after conversion'.format( + type_name, entry.id)) + + id_map[entry.id] = new_id + new_ids.add(new_id) + + # Find new IDs for all entries + find_new_ids(model.compartments, compartment_map, 'Compartment') + find_new_ids(model.compounds, compound_map, 'Compound') + find_new_ids(model.reactions, reaction_map, 'Reaction') + + # Create new compartment entries + new_compartments = [] + for compartment in model.compartments: + new_id = compartment_map[compartment.id] + new_compartments.append(translate_compartment(compartment, new_id)) + + # Create new compound entries + new_compounds = [] + for compound in model.compounds: + new_id = compound_map[compound.id] + new_compounds.append( + translate_compound(compound, new_id, compartment_map)) + + # Create new reaction entries + new_reactions = [] + for reaction in model.reactions: + new_id = reaction_map[reaction.id] + new_entry = translate_reaction( + reaction, new_id, compartment_map, compound_map) + new_reactions.append(new_entry) + + # Update entries + model.compartments.clear() + model.compartments.update(new_compartments) + + model.compounds.clear() + model.compounds.update(new_compounds) + + model.reactions.clear() + model.reactions.update(new_reactions) + + # Convert compartment boundaries + new_boundaries = [] + for boundary in model.compartment_boundaries: + c1, c2 = (compartment_map.get(c, c) for c in boundary) + new_boundaries.append(tuple(sorted(c1, c2))) + + model.compartment_boundaries.clear() + model.compartment_boundaries.update(new_boundaries) + + # Convert limits + new_limits = [] + for reaction, lower, upper in itervalues(model.limits): + new_reaction_id = reaction_map.get(reaction, reaction) + new_limits.append((new_reaction_id, lower, upper)) + + model.limits.clear() + model.limits.update((limit[0], limit) for limit in new_limits) + + # Convert exchange + new_exchanges = [] + for compound, reaction, lower, upper in itervalues(model.exchange): + new_compound_id = compound.translated( + lambda name: compound_map.get(name, name)) + new_reaction_id = reaction_map.get(reaction, reaction) + new_exchanges.append((new_compound_id, new_reaction_id, lower, upper)) + + model.exchange.clear() + model.exchange.update((ex[0], ex) for ex in new_exchanges) + + # Convert model + new_model = [] + for reaction in model.model: + new_id = reaction_map.get(reaction, reaction) + new_model.append(new_id) + + model.model.clear() + model.model.update((new_id, None) for new_id in new_model) + + # Convert other properties + if model.biomass_reaction is not None: + old_id = model.biomass_reaction + model.biomass_reaction = reaction_map.get(old_id, old_id) + + if model.extracellular_compartment is not None: + old_id = model.extracellular_compartment + model.extracellular_compartment = compartment_map.get(old_id, old_id) + + if model.default_compartment is not None: + old_id = model.default_compartment + model.default_compartment = compartment_map.get(old_id, old_id) + + +def parse_xhtml_notes(entry): + """Yield key, value pairs parsed from the XHTML notes section. + + Each key, value pair must be defined in its own text block, e.g. + ``

    key: value

    key2: value2

    ``. The key and value must be + separated by a colon. Whitespace is stripped from both key and value, and + quotes are removed from values if present. The key is normalized by + conversion to lower case and spaces replaced with underscores. + + Args: + entry: :class:`_SBMLEntry`. + """ + for note in entry.xml_notes.itertext(): + m = re.match(r'^([^:]+):(.+)$', note) + if m: + key, value = m.groups() + key = key.strip().lower().replace(' ', '_') + value = value.strip() + m = re.match(r'^"(.*)"$', value) + if m: + value = m.group(1) + if value != '': + yield key, value + + +def parse_xhtml_species_notes(entry): + """Return species properties defined in the XHTML notes. + + Older SBML models often define additional properties in the XHTML notes + section because structured methods for defining properties had not been + developed. This will try to parse the following properties: ``PUBCHEM ID``, + ``CHEBI ID``, ``FORMULA``, ``KEGG ID``, ``CHARGE``. + + Args: + entry: :class:`SBMLSpeciesEntry`. + """ + properties = {} + if entry.xml_notes is not None: + cobra_notes = dict(parse_xhtml_notes(entry)) + + for key in ('pubchem_id', 'chebi_id'): + if key in cobra_notes: + properties[key] = cobra_notes[key] + + if 'formula' in cobra_notes: + properties['formula'] = cobra_notes['formula'] + + if 'kegg_id' in cobra_notes: + properties['kegg'] = cobra_notes['kegg_id'] + + if 'charge' in cobra_notes: + try: + value = int(cobra_notes['charge']) + except ValueError: + logger.warning( + 'Unable to parse charge for {} as an' + ' integer: {}'.format( + entry.id, cobra_notes['charge'])) + value = cobra_notes['charge'] + properties['charge'] = value + + return properties + + +def parse_xhtml_reaction_notes(entry): + """Return reaction properties defined in the XHTML notes. + + Older SBML models often define additional properties in the XHTML notes + section because structured methods for defining properties had not been + developed. This will try to parse the following properties: ``SUBSYSTEM``, + ``GENE ASSOCIATION``, ``EC NUMBER``, ``AUTHORS``, ``CONFIDENCE``. + + Args: + entry: :class:`SBMLReactionEntry`. + """ + properties = {} + if entry.xml_notes is not None: + cobra_notes = dict(parse_xhtml_notes(entry)) + + if 'subsystem' in cobra_notes: + properties['subsystem'] = cobra_notes['subsystem'] + + if 'gene_association' in cobra_notes: + properties['genes'] = cobra_notes['gene_association'] + + if 'ec_number' in cobra_notes: + properties['ec'] = cobra_notes['ec_number'] + + if 'authors' in cobra_notes: + properties['authors'] = [ + a.strip() for a in cobra_notes['authors'].split(';')] + + if 'confidence' in cobra_notes: + try: + value = int(cobra_notes['confidence']) + except ValueError: + logger.warning( + 'Unable to parse confidence level for {} as an' + ' integer: {}'.format( + entry.id, cobra_notes['confidence'])) + value = cobra_notes['confidence'] + properties['confidence'] = value + + return properties + + +def parse_objective_coefficient(entry): + """Return objective value for reaction entry. + + Detect objectives that are specified using the non-standardized + kinetic law parameters which are used by many pre-FBC SBML models. The + objective coefficient is returned for the given reaction, or None if + undefined. + + Args: + entry: :class:`SBMLReactionEntry`. + """ + for parameter in entry.kinetic_law_reaction_parameters: + pid, name, value, units = parameter + if (pid == 'OBJECTIVE_COEFFICIENT' or + name == 'OBJECTIVE_COEFFICIENT'): + return value + + return None + + +def parse_flux_bounds(entry): + """Return flux bounds for reaction entry. + + Detect flux bounds that are specified using the non-standardized + kinetic law parameters which are used by many pre-FBC SBML models. The + flux bounds are returned as a pair of lower, upper bounds. The returned + bound is None if undefined. + + Args: + entry: :class:`SBMLReactionEntry`. + """ + lower_bound = None + upper_bound = None + for parameter in entry.kinetic_law_reaction_parameters: + pid, name, value, units = parameter + if pid == 'UPPER_BOUND' or name == 'UPPER_BOUND': + upper_bound = value + elif pid == 'LOWER_BOUND' or name == 'LOWER_BOUND': + lower_bound = value + + return lower_bound, upper_bound From 1614e1fa62c3fc53a422101c9ca572ff881d5e37 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Fri, 26 May 2017 18:32:28 -0400 Subject: [PATCH 21/51] sbml: Add tests for parsing properties in notes section --- psamm/tests/test_datasource_sbml.py | 54 ++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/psamm/tests/test_datasource_sbml.py b/psamm/tests/test_datasource_sbml.py index 5615415b..f0c34125 100644 --- a/psamm/tests/test_datasource_sbml.py +++ b/psamm/tests/test_datasource_sbml.py @@ -173,7 +173,6 @@ class TestSBMLDatabaseL2V5(unittest.TestCase): def setUp(self): self.doc = BytesIO(''' @@ -181,7 +180,17 @@ def setUp(self): - + + + +

    FORMULA: C6H12O6

    +

    Charge: "0"

    +

    Additional notes..

    +

    KEGG ID: C00031

    +

    Custom: 123

    + +
    +
    @@ -198,7 +207,14 @@ def setUp(self): - Glucose 6-phosphatase + +

    Authors: Jane Doe ; John Doe

    +

    CONFIDENCE: 3

    +

    EC NUMBER: 3.1.3.9

    +

    gene association : b0822

    +

    SUBSYSTEM: "Glycolysis / Gluconeogenesis"

    +

    Additional notes...

    +
    @@ -260,6 +276,16 @@ def test_compounds_exist(self): self.assertFalse(species['M_Phosphate'].boundary) self.assertTrue(species['M_Biomass'].boundary) + def test_glucose_parse_notes(self): + reader = sbml.SBMLReader(self.doc) + species = reader.get_species('M_Glucose') + notes_dict = sbml.parse_xhtml_species_notes(species) + self.assertEqual(notes_dict, { + 'formula': 'C6H12O6', + 'kegg': 'C00031', + 'charge': 0 + }) + def test_g6pase_reaction_exists(self): reader = sbml.SBMLReader(self.doc) reaction = reader.get_reaction('R_G6Pase') @@ -273,6 +299,18 @@ def test_g6pase_reaction_exists(self): (Compound('M_Glucose_6_P', 'C_c'), 2)]) self.assertEqual(reaction.equation, actual_equation) + def test_g6pase_parse_notes(self): + reader = sbml.SBMLReader(self.doc) + reaction = reader.get_reaction('R_G6Pase') + notes = sbml.parse_xhtml_reaction_notes(reaction) + self.assertEqual(notes, { + 'subsystem': 'Glycolysis / Gluconeogenesis', + 'genes': 'b0822', + 'ec': '3.1.3.9', + 'confidence': 3, + 'authors': ['Jane Doe', 'John Doe'] + }) + def test_biomass_reaction_exists(self): reader = sbml.SBMLReader(self.doc, ignore_boundary=False) reaction = reader.get_reaction('R_Biomass') @@ -285,16 +323,6 @@ def test_biomass_reaction_exists(self): [(Compound('M_Biomass', 'C_b'), 1)]) self.assertEqual(reaction.equation, actual_equation) - def test_reaction_xml_notes(self): - reader = sbml.SBMLReader(self.doc) - reaction = reader.get_reaction('R_G6Pase') - notes = reaction.xml_notes - - notes_tags = list(notes) - self.assertEqual(len(notes_tags), 1) - self.assertEqual(notes_tags[0].tag, '{http://www.w3.org/1999/xhtml}p') - self.assertEqual(notes_tags[0].text, 'Glucose 6-phosphatase') - def test_objective_not_present(self): reader = sbml.SBMLReader(self.doc) objectives = list(reader.objectives) From 66eab007cd6162fe7cdf37bfc026ee1ae7579868 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 13:37:03 -0400 Subject: [PATCH 22/51] sbml: Add tests for parsing COBRA kinetic law parameters --- psamm/tests/test_datasource_sbml.py | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/psamm/tests/test_datasource_sbml.py b/psamm/tests/test_datasource_sbml.py index f0c34125..cf0a3ec7 100644 --- a/psamm/tests/test_datasource_sbml.py +++ b/psamm/tests/test_datasource_sbml.py @@ -224,6 +224,14 @@ def setUp(self): + + + + + + + + @@ -311,6 +319,28 @@ def test_g6pase_parse_notes(self): 'authors': ['Jane Doe', 'John Doe'] }) + def test_parse_reaction_cobra_flux_bounds(self): + reader = sbml.SBMLReader(self.doc) + reaction = reader.get_reaction('R_G6Pase') + lower, upper = sbml.parse_flux_bounds(reaction) + self.assertIsNone(lower) + self.assertIsNone(upper) + + reaction = reader.get_reaction('R_Biomass') + lower, upper = sbml.parse_flux_bounds(reaction) + self.assertEqual(lower, 0) + self.assertEqual(upper, 1000) + + def test_parse_reaction_cobra_objective(self): + reader = sbml.SBMLReader(self.doc) + reaction = reader.get_reaction('R_G6Pase') + coeff = sbml.parse_objective_coefficient(reaction) + self.assertIsNone(coeff) + + reaction = reader.get_reaction('R_Biomass') + coeff = sbml.parse_objective_coefficient(reaction) + self.assertEqual(coeff, 1) + def test_biomass_reaction_exists(self): reader = sbml.SBMLReader(self.doc, ignore_boundary=False) reaction = reader.get_reaction('R_Biomass') From ef243ca2bc28e5989409c9757f27b96f8eff8896 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 14:25:10 -0400 Subject: [PATCH 23/51] sbml: Update tests for model conversion --- psamm/tests/test_datasource_sbml.py | 524 +++++++++++++++++++--------- 1 file changed, 355 insertions(+), 169 deletions(-) diff --git a/psamm/tests/test_datasource_sbml.py b/psamm/tests/test_datasource_sbml.py index cf0a3ec7..5e178987 100644 --- a/psamm/tests/test_datasource_sbml.py +++ b/psamm/tests/test_datasource_sbml.py @@ -42,11 +42,15 @@ def setUp(self): - - + + - - + + @@ -64,7 +68,8 @@ def setUp(self): - + @@ -166,6 +171,21 @@ def test_flux_bounds_not_present(self): flux_bounds = list(reader.flux_bounds) self.assertEqual(len(flux_bounds), 0) + def test_create_and_convert_model(self): + reader = sbml.SBMLReader(self.doc) + model = reader.create_model() + sbml.convert_sbml_model(model) + + self.assertEqual( + {entry.id for entry in model.compounds}, + {'Glucose', 'Glucose_6_P', 'H2O', 'Phosphate'}) + self.assertEqual( + {entry.id for entry in model.reactions}, + {'G6Pase', 'Biomass'}) + self.assertEqual( + {entry.id for entry in model.compartments}, + {'cell'}) + class TestSBMLDatabaseL2V5(unittest.TestCase): """Test parsing of a simple level 2 version 5 SBML file""" @@ -180,7 +200,8 @@ def setUp(self): - +

    FORMULA: C6H12O6

    @@ -191,20 +212,26 @@ def setUp(self):
    - - - - + + + +
    - - + + - - + + @@ -219,7 +246,8 @@ def setUp(self): - + @@ -263,30 +291,33 @@ def test_compounds_exist(self): species = {entry.id: entry for entry in reader.species} self.assertEqual(len(species), 5) - self.assertEqual(species['M_Glucose'].id, 'M_Glucose') - self.assertEqual(species['M_Glucose'].name, 'Glucose') - self.assertEqual(species['M_Glucose'].compartment, 'C_c') - self.assertFalse(species['M_Glucose'].boundary) - self.assertEqual(species['M_Glucose'].charge, 0) - - self.assertEqual(species['M_Glucose_6_P'].id, 'M_Glucose_6_P') - self.assertEqual(species['M_Glucose_6_P'].name, 'Glucose-6-P') - self.assertEqual(species['M_Glucose_6_P'].compartment, 'C_c') - self.assertFalse(species['M_Glucose_6_P'].boundary) - self.assertEqual(species['M_Glucose_6_P'].charge, -2) - - self.assertEqual(species['M_H2O'].id, 'M_H2O') - self.assertEqual(species['M_H2O'].name, 'H2O') - self.assertEqual(species['M_H2O'].compartment, 'C_c') - self.assertFalse(species['M_H2O'].boundary) - self.assertEqual(species['M_H2O'].charge, 0) - - self.assertFalse(species['M_Phosphate'].boundary) + gluc_species = species['M_Glucose_LPAREN_c_RPAREN_'] + self.assertEqual(gluc_species.id, 'M_Glucose_LPAREN_c_RPAREN_') + self.assertEqual(gluc_species.name, 'Glucose') + self.assertEqual(gluc_species.compartment, 'C_c') + self.assertFalse(gluc_species.boundary) + self.assertEqual(gluc_species.charge, 0) + + g6p_species = species['M_Glucose_6_DASH_P_LPAREN_c_RPAREN_'] + self.assertEqual(g6p_species.id, 'M_Glucose_6_DASH_P_LPAREN_c_RPAREN_') + self.assertEqual(g6p_species.name, 'Glucose-6-P') + self.assertEqual(g6p_species.compartment, 'C_c') + self.assertFalse(g6p_species.boundary) + self.assertEqual(g6p_species.charge, -2) + + h2o_species = species['M_H2O_LPAREN_c_RPAREN_'] + self.assertEqual(h2o_species.id, 'M_H2O_LPAREN_c_RPAREN_') + self.assertEqual(h2o_species.name, 'H2O') + self.assertEqual(h2o_species.compartment, 'C_c') + self.assertFalse(h2o_species.boundary) + self.assertEqual(h2o_species.charge, 0) + + self.assertFalse(species['M_Phosphate_LPAREN_c_RPAREN_'].boundary) self.assertTrue(species['M_Biomass'].boundary) def test_glucose_parse_notes(self): reader = sbml.SBMLReader(self.doc) - species = reader.get_species('M_Glucose') + species = reader.get_species('M_Glucose_LPAREN_c_RPAREN_') notes_dict = sbml.parse_xhtml_species_notes(species) self.assertEqual(notes_dict, { 'formula': 'C6H12O6', @@ -300,11 +331,13 @@ def test_g6pase_reaction_exists(self): self.assertTrue(reaction.reversible) # Compare equation of reaction - actual_equation = Reaction(Direction.Both, - [(Compound('M_Glucose', 'C_c'), 2), - (Compound('M_Phosphate', 'C_c'), 2)], - [(Compound('M_H2O', 'C_c'), 2), - (Compound('M_Glucose_6_P', 'C_c'), 2)]) + actual_equation = Reaction(Direction.Both, [ + (Compound('M_Glucose_LPAREN_c_RPAREN_', 'C_c'), 2), + (Compound('M_Phosphate_LPAREN_c_RPAREN_', 'C_c'), 2) + ], [ + (Compound('M_H2O_LPAREN_c_RPAREN_', 'C_c'), 2), + (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), 2) + ]) self.assertEqual(reaction.equation, actual_equation) def test_g6pase_parse_notes(self): @@ -347,10 +380,12 @@ def test_biomass_reaction_exists(self): self.assertFalse(reaction.reversible) # Compare equation of reaction - actual_equation = Reaction(Direction.Forward, - [(Compound('M_Glucose_6_P', 'C_c'), - Decimal('0.56'))], - [(Compound('M_Biomass', 'C_b'), 1)]) + actual_equation = Reaction(Direction.Forward, [ + (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), + Decimal('0.56')) + ], [ + (Compound('M_Biomass', 'C_b'), 1) + ]) self.assertEqual(reaction.equation, actual_equation) def test_objective_not_present(self): @@ -364,6 +399,24 @@ def test_flux_bounds_not_present(self): flux_bounds = list(reader.flux_bounds) self.assertEqual(len(flux_bounds), 0) + def test_create_and_convert_model(self): + reader = sbml.SBMLReader(self.doc) + model = reader.create_model() + sbml.convert_sbml_model(model) + + self.assertEqual( + {entry.id for entry in model.compounds}, + {'Glucose(c)', 'Glucose_6-P(c)', 'H2O(c)', 'Phosphate(c)'}) + self.assertEqual( + {entry.id for entry in model.reactions}, + {'G6Pase', 'Biomass'}) + self.assertEqual( + {entry.id for entry in model.compartments}, + {'c'}) + + self.assertEqual(model.limits['Biomass'], ('Biomass', 0, 1000)) + self.assertEqual(model.biomass_reaction, 'Biomass') + class TestSBMLDatabaseL3V1(unittest.TestCase): """Test parsing of a simple level 3 version 1 SBML file""" @@ -379,21 +432,34 @@ def setUp(self): - - - - - + + + + + - - + + - - + + Glucose 6-phosphatase @@ -401,7 +467,8 @@ def setUp(self): - + @@ -437,22 +504,25 @@ def test_compounds_exist(self): species = {entry.id: entry for entry in reader.species} self.assertEqual(len(species), 5) - self.assertEqual(species['M_Glucose'].id, 'M_Glucose') - self.assertEqual(species['M_Glucose'].name, 'Glucose') - self.assertEqual(species['M_Glucose'].compartment, 'C_c') - self.assertFalse(species['M_Glucose'].boundary) - - self.assertEqual(species['M_Glucose_6_P'].id, 'M_Glucose_6_P') - self.assertEqual(species['M_Glucose_6_P'].name, 'Glucose-6-P') - self.assertEqual(species['M_Glucose_6_P'].compartment, 'C_c') - self.assertFalse(species['M_Glucose_6_P'].boundary) - - self.assertEqual(species['M_H2O'].id, 'M_H2O') - self.assertEqual(species['M_H2O'].name, 'H2O') - self.assertEqual(species['M_H2O'].compartment, 'C_c') - self.assertFalse(species['M_H2O'].boundary) - - self.assertFalse(species['M_Phosphate'].boundary) + gluc_species = species['M_Glucose_LPAREN_c_RPAREN_'] + self.assertEqual(gluc_species.id, 'M_Glucose_LPAREN_c_RPAREN_') + self.assertEqual(gluc_species.name, 'Glucose') + self.assertEqual(gluc_species.compartment, 'C_c') + self.assertFalse(gluc_species.boundary) + + g6p_species = species['M_Glucose_6_DASH_P_LPAREN_c_RPAREN_'] + self.assertEqual(g6p_species.id, 'M_Glucose_6_DASH_P_LPAREN_c_RPAREN_') + self.assertEqual(g6p_species.name, 'Glucose-6-P') + self.assertEqual(g6p_species.compartment, 'C_c') + self.assertFalse(g6p_species.boundary) + + h2o_species = species['M_H2O_LPAREN_c_RPAREN_'] + self.assertEqual(h2o_species.id, 'M_H2O_LPAREN_c_RPAREN_') + self.assertEqual(h2o_species.name, 'H2O') + self.assertEqual(h2o_species.compartment, 'C_c') + self.assertFalse(h2o_species.boundary) + + self.assertFalse(species['M_Phosphate_LPAREN_c_RPAREN_'].boundary) self.assertTrue(species['M_Biomass'].boundary) def test_g6pase_reaction_exists(self): @@ -461,11 +531,13 @@ def test_g6pase_reaction_exists(self): self.assertTrue(reaction.reversible) # Compare equation of reaction - actual_equation = Reaction(Direction.Both, - [(Compound('M_Glucose', 'C_c'), 2), - (Compound('M_Phosphate', 'C_c'), 2)], - [(Compound('M_H2O', 'C_c'), 2), - (Compound('M_Glucose_6_P', 'C_c'), 2)]) + actual_equation = Reaction(Direction.Both, [ + (Compound('M_Glucose_LPAREN_c_RPAREN_', 'C_c'), 2), + (Compound('M_Phosphate_LPAREN_c_RPAREN_', 'C_c'), 2) + ], [ + (Compound('M_H2O_LPAREN_c_RPAREN_', 'C_c'), 2), + (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), 2) + ]) self.assertEqual(reaction.equation, actual_equation) def test_biomass_reaction_exists(self): @@ -474,10 +546,12 @@ def test_biomass_reaction_exists(self): self.assertFalse(reaction.reversible) # Compare equation of reaction - actual_equation = Reaction(Direction.Forward, - [(Compound('M_Glucose_6_P', 'C_c'), - Decimal('0.56'))], - [(Compound('M_Biomass', 'C_b'), 1)]) + actual_equation = Reaction(Direction.Forward, [ + (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), + Decimal('0.56')) + ], [ + (Compound('M_Biomass', 'C_b'), 1) + ]) self.assertEqual(reaction.equation, actual_equation) def test_reaction_xml_notes(self): @@ -501,6 +575,21 @@ def test_flux_bounds_not_present(self): flux_bounds = list(reader.flux_bounds) self.assertEqual(len(flux_bounds), 0) + def test_create_and_convert_model(self): + reader = sbml.SBMLReader(self.doc) + model = reader.create_model() + sbml.convert_sbml_model(model) + + self.assertEqual( + {entry.id for entry in model.compounds}, + {'Glucose(c)', 'Glucose_6-P(c)', 'H2O(c)', 'Phosphate(c)'}) + self.assertEqual( + {entry.id for entry in model.reactions}, + {'G6Pase', 'Biomass'}) + self.assertEqual( + {entry.id for entry in model.compartments}, + {'c'}) + class TestSBMLDatabaseL3V1WithFBCV1(unittest.TestCase): """Test parsing of a level 3 version 1 SBML file with FBC version 1""" @@ -518,26 +607,44 @@ def setUp(self): - - - - - + + + + + - - + + - - + + - + @@ -552,10 +659,14 @@ def setUp(self): - - - - + + + + '''.encode('utf-8')) @@ -579,28 +690,31 @@ def test_compounds_exist(self): species = {entry.id: entry for entry in reader.species} self.assertEqual(len(species), 5) - self.assertEqual(species['M_Glucose'].id, 'M_Glucose') - self.assertEqual(species['M_Glucose'].name, 'Glucose') - self.assertEqual(species['M_Glucose'].compartment, 'C_c') - self.assertFalse(species['M_Glucose'].boundary) - self.assertEqual(species['M_Glucose'].formula, 'C6H12O6') - self.assertEqual(species['M_Glucose'].charge, 0) - - self.assertEqual(species['M_Glucose_6_P'].id, 'M_Glucose_6_P') - self.assertEqual(species['M_Glucose_6_P'].name, 'Glucose-6-P') - self.assertEqual(species['M_Glucose_6_P'].compartment, 'C_c') - self.assertFalse(species['M_Glucose_6_P'].boundary) - self.assertEqual(species['M_Glucose_6_P'].formula, 'C6H11O9P') - self.assertEqual(species['M_Glucose_6_P'].charge, -2) - - self.assertEqual(species['M_H2O'].id, 'M_H2O') - self.assertEqual(species['M_H2O'].name, 'H2O') - self.assertEqual(species['M_H2O'].compartment, 'C_c') - self.assertFalse(species['M_H2O'].boundary) - self.assertEqual(species['M_H2O'].formula, 'H2O') - self.assertEqual(species['M_H2O'].charge, 0) - - self.assertFalse(species['M_Phosphate'].boundary) + gluc_species = species['M_Glucose_LPAREN_c_RPAREN_'] + self.assertEqual(gluc_species.id, 'M_Glucose_LPAREN_c_RPAREN_') + self.assertEqual(gluc_species.name, 'Glucose') + self.assertEqual(gluc_species.compartment, 'C_c') + self.assertFalse(gluc_species.boundary) + self.assertEqual(gluc_species.formula, 'C6H12O6') + self.assertEqual(gluc_species.charge, 0) + + g6p_species = species['M_Glucose_6_DASH_P_LPAREN_c_RPAREN_'] + self.assertEqual(g6p_species.id, 'M_Glucose_6_DASH_P_LPAREN_c_RPAREN_') + self.assertEqual(g6p_species.name, 'Glucose-6-P') + self.assertEqual(g6p_species.compartment, 'C_c') + self.assertFalse(g6p_species.boundary) + self.assertEqual(g6p_species.formula, 'C6H11O9P') + self.assertEqual(g6p_species.charge, -2) + + h2o_species = species['M_H2O_LPAREN_c_RPAREN_'] + self.assertEqual(h2o_species.id, 'M_H2O_LPAREN_c_RPAREN_') + self.assertEqual(h2o_species.name, 'H2O') + self.assertEqual(h2o_species.compartment, 'C_c') + self.assertFalse(h2o_species.boundary) + self.assertEqual(h2o_species.formula, 'H2O') + self.assertEqual(h2o_species.charge, 0) + + self.assertFalse(species['M_Phosphate_LPAREN_c_RPAREN_'].boundary) self.assertTrue(species['M_Biomass'].boundary) self.assertIsNone(species['M_Biomass'].formula) @@ -612,11 +726,13 @@ def test_g6pase_reaction_exists(self): self.assertTrue(reaction.reversible) # Compare equation of reaction - actual_equation = Reaction(Direction.Both, - [(Compound('M_Glucose', 'C_c'), 2), - (Compound('M_Phosphate', 'C_c'), 2)], - [(Compound('M_H2O', 'C_c'), 2), - (Compound('M_Glucose_6_P', 'C_c'), 2)]) + actual_equation = Reaction(Direction.Both, [ + (Compound('M_Glucose_LPAREN_c_RPAREN_', 'C_c'), 2), + (Compound('M_Phosphate_LPAREN_c_RPAREN_', 'C_c'), 2) + ], [ + (Compound('M_H2O_LPAREN_c_RPAREN_', 'C_c'), 2), + (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), 2) + ]) self.assertEqual(reaction.equation, actual_equation) self.assertEqual(reaction.properties['lower_flux'], -10) @@ -628,10 +744,12 @@ def test_biomass_reaction_exists(self): self.assertFalse(reaction.reversible) # Compare equation of reaction - actual_equation = Reaction(Direction.Forward, - [(Compound('M_Glucose_6_P', 'C_c'), - Decimal('0.56'))], - [(Compound('M_Biomass', 'C_b'), 1)]) + actual_equation = Reaction(Direction.Forward, [ + (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), + Decimal('0.56')) + ], [ + (Compound('M_Biomass', 'C_b'), 1) + ]) self.assertEqual(reaction.equation, actual_equation) self.assertEqual(reaction.properties['lower_flux'], 0) @@ -671,6 +789,25 @@ def test_flux_bounds_exists(self): ('greaterEqual', -10), ('lessEqual', 1000)}) + def test_create_and_convert_model(self): + reader = sbml.SBMLReader(self.doc) + model = reader.create_model() + sbml.convert_sbml_model(model) + + self.assertEqual( + {entry.id for entry in model.compounds}, + {'Glucose(c)', 'Glucose_6-P(c)', 'H2O(c)', 'Phosphate(c)'}) + self.assertEqual( + {entry.id for entry in model.reactions}, + {'G6Pase', 'Biomass'}) + self.assertEqual( + {entry.id for entry in model.compartments}, + {'c'}) + + self.assertEqual(model.limits['Biomass'], ('Biomass', 0, 1000)) + self.assertEqual(model.limits['G6Pase'], ('G6Pase', -10, 1000)) + self.assertEqual(model.biomass_reaction, 'Biomass') + class TestSBMLDatabaseL3V1WithFBCV2(unittest.TestCase): """Test parsing of a level 3 version 1 SBML file with FBC version 2""" @@ -693,17 +830,31 @@ def setUp(self): - +

    This is compound information intended to be seen by humans.

    - - - - + + + +
    @@ -711,14 +862,20 @@ def setUp(self): - + - - + + - - + + @@ -738,9 +895,12 @@ def setUp(self): - + - + @@ -776,30 +936,33 @@ def test_compounds_exist(self): species = {entry.id: entry for entry in reader.species} self.assertEqual(len(species), 5) - self.assertEqual(species['M_Glucose'].id, 'M_Glucose') - self.assertEqual(species['M_Glucose'].name, 'Glucose') - self.assertEqual(species['M_Glucose'].compartment, 'C_c') - self.assertFalse(species['M_Glucose'].boundary) - self.assertEqual(species['M_Glucose'].formula, 'C6H12O6') - self.assertEqual(species['M_Glucose'].charge, 0) - self.assertIsNotNone(species['M_Glucose'].xml_notes) - - self.assertEqual(species['M_Glucose_6_P'].id, 'M_Glucose_6_P') - self.assertEqual(species['M_Glucose_6_P'].name, 'Glucose-6-P') - self.assertEqual(species['M_Glucose_6_P'].compartment, 'C_c') - self.assertFalse(species['M_Glucose_6_P'].boundary) - self.assertEqual(species['M_Glucose_6_P'].formula, 'C6H11O9P') - self.assertEqual(species['M_Glucose_6_P'].charge, -2) - self.assertIsNone(species['M_Glucose_6_P'].xml_notes) - - self.assertEqual(species['M_H2O'].id, 'M_H2O') - self.assertEqual(species['M_H2O'].name, 'H2O') - self.assertEqual(species['M_H2O'].compartment, 'C_c') - self.assertFalse(species['M_H2O'].boundary) - self.assertEqual(species['M_H2O'].formula, 'H2O') - self.assertEqual(species['M_H2O'].charge, 0) - - self.assertFalse(species['M_Phosphate'].boundary) + gluc_species = species['M_Glucose_LPAREN_c_RPAREN_'] + self.assertEqual(gluc_species.id, 'M_Glucose_LPAREN_c_RPAREN_') + self.assertEqual(gluc_species.name, 'Glucose') + self.assertEqual(gluc_species.compartment, 'C_c') + self.assertFalse(gluc_species.boundary) + self.assertEqual(gluc_species.formula, 'C6H12O6') + self.assertEqual(gluc_species.charge, 0) + self.assertIsNotNone(gluc_species.xml_notes) + + g6p_species = species['M_Glucose_6_DASH_P_LPAREN_c_RPAREN_'] + self.assertEqual(g6p_species.id, 'M_Glucose_6_DASH_P_LPAREN_c_RPAREN_') + self.assertEqual(g6p_species.name, 'Glucose-6-P') + self.assertEqual(g6p_species.compartment, 'C_c') + self.assertFalse(g6p_species.boundary) + self.assertEqual(g6p_species.formula, 'C6H11O9P') + self.assertEqual(g6p_species.charge, -2) + self.assertIsNone(g6p_species.xml_notes) + + h2o_species = species['M_H2O_LPAREN_c_RPAREN_'] + self.assertEqual(h2o_species.id, 'M_H2O_LPAREN_c_RPAREN_') + self.assertEqual(h2o_species.name, 'H2O') + self.assertEqual(h2o_species.compartment, 'C_c') + self.assertFalse(h2o_species.boundary) + self.assertEqual(h2o_species.formula, 'H2O') + self.assertEqual(h2o_species.charge, 0) + + self.assertFalse(species['M_Phosphate_LPAREN_c_RPAREN_'].boundary) self.assertTrue(species['M_Biomass'].boundary) self.assertIsNone(species['M_Biomass'].formula) @@ -811,11 +974,13 @@ def test_g6pase_reaction_exists(self): self.assertTrue(reaction.reversible) # Compare equation of reaction - actual_equation = Reaction(Direction.Both, - [(Compound('M_Glucose', 'C_c'), 2), - (Compound('M_Phosphate', 'C_c'), 2)], - [(Compound('M_H2O', 'C_c'), 2), - (Compound('M_Glucose_6_P', 'C_c'), 2)]) + actual_equation = Reaction(Direction.Both, [ + (Compound('M_Glucose_LPAREN_c_RPAREN_', 'C_c'), 2), + (Compound('M_Phosphate_LPAREN_c_RPAREN_', 'C_c'), 2) + ], [ + (Compound('M_H2O_LPAREN_c_RPAREN_', 'C_c'), 2), + (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), 2) + ]) self.assertEqual(reaction.equation, actual_equation) self.assertEqual(reaction.properties['lower_flux'], -10) @@ -830,10 +995,12 @@ def test_biomass_reaction_exists(self): self.assertFalse(reaction.reversible) # Compare equation of reaction - actual_equation = Reaction(Direction.Forward, - [(Compound('M_Glucose_6_P', 'C_c'), - Decimal('0.56'))], - [(Compound('M_Biomass', 'C_b'), 1)]) + actual_equation = Reaction(Direction.Forward, [ + (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), + Decimal('0.56')) + ], [ + (Compound('M_Biomass', 'C_b'), 1) + ]) self.assertEqual(reaction.equation, actual_equation) self.assertEqual(reaction.properties['lower_flux'], 0) @@ -862,3 +1029,22 @@ def test_flux_bounds_not_present(self): reader = sbml.SBMLReader(self.doc) flux_bounds = list(reader.flux_bounds) self.assertEqual(len(flux_bounds), 0) + + def test_create_and_convert_model(self): + reader = sbml.SBMLReader(self.doc) + model = reader.create_model() + sbml.convert_sbml_model(model) + + self.assertEqual( + {entry.id for entry in model.compounds}, + {'Glucose(c)', 'Glucose_6-P(c)', 'H2O(c)', 'Phosphate(c)'}) + self.assertEqual( + {entry.id for entry in model.reactions}, + {'G6Pase', 'Biomass'}) + self.assertEqual( + {entry.id for entry in model.compartments}, + {'c'}) + + self.assertEqual(model.limits['Biomass'], ('Biomass', 0, 1000)) + self.assertEqual(model.limits['G6Pase'], ('G6Pase', -10, 1000)) + self.assertEqual(model.biomass_reaction, 'Biomass') From fb2431b3c02a5935d67005de1739eab27088e182 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 15:51:26 -0400 Subject: [PATCH 24/51] sbml: Initialize model reaction set in create_model() Previously the model reaction set was left empty. Since SBML does not have a way of specifying a reaction subset to use, the model reaction subset should be set to the set of all reactions. --- psamm/datasource/sbml.py | 4 ++++ psamm/tests/test_datasource_sbml.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index 74def378..e6db1df7 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -828,6 +828,10 @@ def create_model(self): for reaction in self.reactions: model.reactions.add_entry(reaction) + # Create model reaction set + for reaction in model.reactions: + model.model[reaction.id] = None + # Convert reaction limits properties to proper limits for reaction in model.reactions: props = reaction.properties diff --git a/psamm/tests/test_datasource_sbml.py b/psamm/tests/test_datasource_sbml.py index 5e178987..6619bf8f 100644 --- a/psamm/tests/test_datasource_sbml.py +++ b/psamm/tests/test_datasource_sbml.py @@ -186,6 +186,8 @@ def test_create_and_convert_model(self): {entry.id for entry in model.compartments}, {'cell'}) + self.assertEqual(set(model.model), {'Biomass', 'G6Pase'}) + class TestSBMLDatabaseL2V5(unittest.TestCase): """Test parsing of a simple level 2 version 5 SBML file""" @@ -416,6 +418,7 @@ def test_create_and_convert_model(self): self.assertEqual(model.limits['Biomass'], ('Biomass', 0, 1000)) self.assertEqual(model.biomass_reaction, 'Biomass') + self.assertEqual(set(model.model), {'Biomass', 'G6Pase'}) class TestSBMLDatabaseL3V1(unittest.TestCase): @@ -590,6 +593,8 @@ def test_create_and_convert_model(self): {entry.id for entry in model.compartments}, {'c'}) + self.assertEqual(set(model.model), {'Biomass', 'G6Pase'}) + class TestSBMLDatabaseL3V1WithFBCV1(unittest.TestCase): """Test parsing of a level 3 version 1 SBML file with FBC version 1""" @@ -806,6 +811,7 @@ def test_create_and_convert_model(self): self.assertEqual(model.limits['Biomass'], ('Biomass', 0, 1000)) self.assertEqual(model.limits['G6Pase'], ('G6Pase', -10, 1000)) + self.assertEqual(set(model.model), {'Biomass', 'G6Pase'}) self.assertEqual(model.biomass_reaction, 'Biomass') @@ -1047,4 +1053,5 @@ def test_create_and_convert_model(self): self.assertEqual(model.limits['Biomass'], ('Biomass', 0, 1000)) self.assertEqual(model.limits['G6Pase'], ('G6Pase', -10, 1000)) + self.assertEqual(set(model.model), {'Biomass', 'G6Pase'}) self.assertEqual(model.biomass_reaction, 'Biomass') From 3bb1566d88be82061aff4b54fd79b5abe51d2317 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 16:31:19 -0400 Subject: [PATCH 25/51] sbml: Add function for merging equivalent compounds --- psamm/datasource/sbml.py | 128 ++++++++++++++++++++++++++++ psamm/tests/test_datasource_sbml.py | 39 ++++++++- 2 files changed, 166 insertions(+), 1 deletion(-) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index e6db1df7..06d65bf1 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -25,6 +25,7 @@ import logging import re import json +from collections import OrderedDict # Import ElementTree XML parser. The lxml etree implementation may also be # used with SBMLReader but has compatiblity issues with SBMLWriter. @@ -1649,3 +1650,130 @@ def parse_flux_bounds(entry): lower_bound = value return lower_bound, upper_bound + + +def merge_equivalent_compounds(model): + """Merge equivalent compounds in various compartments. + + Tries to detect and merge compound entries that represent the same + compound in different compartments. The entries are only merged if all + properties are equivalent. Compound entries must have an ID with a suffix + of an underscore followed by the compartment ID. This suffix will be + stripped and compounds with identical IDs are merged if the properties + are identical. + + Args: + model: :class:`NativeModel`. + """ + def dicts_are_compatible(d1, d2): + return all(key not in d1 or key not in d2 or d1[key] == d2[key] + for key in set(d1) | set(d2)) + + compound_compartment = {} + inelegible = set() + for reaction in model.reactions: + equation = reaction.equation + if equation is None: + continue + + for compound, _ in equation.compounds: + compartment = compound.compartment + if compartment is not None: + compound_compartment[compound.name] = compartment + if not compound.name.endswith('_{}'.format(compartment)): + inelegible.add(compound.name) + + compound_groups = {} + for compound_id, compartment in iteritems(compound_compartment): + if compound_id in inelegible: + continue + + suffix = '_{}'.format(compound_compartment[compound_id]) + if compound_id.endswith(suffix): + group_name = compound_id[:-len(suffix)] + compound_groups.setdefault(group_name, set()).add(compound_id) + + compound_mapping = {} + merged_compounds = {} + for group, compound_set in iteritems(compound_groups): + # Try to merge as many compounds as possible + merged = [] + for compound_id in compound_set: + props = dict(model.compounds[compound_id].properties) + + # Ignore differences in ID and compartment properties + props.pop('id', None) + props.pop('compartment', None) + + for merged_props, merged_set in merged: + if dicts_are_compatible(props, merged_props): + merged_set.add(compound_id) + merged_props.update(props) + break + else: + keys = set(key for key in set(props) | set(merged_props) + if key not in props or + key not in merged_props or + props[key] != merged_props[key]) + logger.info( + 'Unable to merge {} into {}, difference in' + ' keys: {}'.format( + compound_id, ', '.join(merged_set), + ', '.join(keys))) + else: + merged.append((props, {compound_id})) + + if len(merged) == 1: + # Merge into one set with the group name + merged_props, merged_set = merged[0] + + for compound_id in merged_set: + compound_mapping[compound_id] = group + merged_compounds[group] = merged_props + else: + # Since we cannot merge all compounds, create new group names + # based on the group and compartments. + for merged_props, merged_set in merged: + compartments = set(compound_compartment[c] for c in merged_set) + merged_name = '{}_{}'.format( + group, '_'.join(sorted(compartments))) + + for compound_id in merged_set: + compound_mapping[compound_id] = merged_name + merged_compounds[merged_name] = merged_props + + # Translate reaction compounds + for reaction in model.reactions: + equation = reaction.equation + if equation is None: + continue + + reaction.equation = equation.translated_compounds( + lambda c: compound_mapping.get(c, c)) + + # Translate compound entries + new_compounds = [] + for compound in model.compounds: + if compound.id not in compound_mapping: + new_compounds.append(compound) + else: + group = compound_mapping[compound.id] + if group not in merged_compounds: + continue + props = merged_compounds.pop(group) + props['id'] = group + new_compounds.append(DictCompoundEntry( + props, filemark=compound.filemark)) + + model.compounds.clear() + model.compounds.update(new_compounds) + + # Translate exchange + new_exchange = OrderedDict() + for compound, reaction_id, lower, upper in itervalues(model.exchange): + new_compound = compound.translate( + lambda name: compound_mapping.get(name, name)) + new_exchange[new_compound] = new_compound, reaction_id, lower, upper + + model.exchange.clear() + model.exchange.update(new_exchange) diff --git a/psamm/tests/test_datasource_sbml.py b/psamm/tests/test_datasource_sbml.py index 6619bf8f..6b4a43e5 100644 --- a/psamm/tests/test_datasource_sbml.py +++ b/psamm/tests/test_datasource_sbml.py @@ -20,7 +20,8 @@ import unittest -from psamm.datasource import sbml +from psamm.datasource import sbml, native, entry +from psamm.datasource.reaction import parse_reaction, parse_compound from psamm.reaction import Reaction, Compound, Direction from decimal import Decimal @@ -1055,3 +1056,39 @@ def test_create_and_convert_model(self): self.assertEqual(model.limits['G6Pase'], ('G6Pase', -10, 1000)) self.assertEqual(set(model.model), {'Biomass', 'G6Pase'}) self.assertEqual(model.biomass_reaction, 'Biomass') + + +class TestMergeEquivalentCompounds(unittest.TestCase): + def test_merge(self): + model = native.NativeModel() + model.compounds.update([ + entry.DictCompoundEntry({ + 'id': 'g6p_c', + 'name': 'G6P' + }), + entry.DictCompoundEntry({ + 'id': 'g6p_e', + 'name': 'G6P' + }) + ]) + model.reactions.update([ + entry.DictReactionEntry({ + 'id': 'TP_g6p', + 'equation': parse_reaction('g6p_c[c] <=> g6p_e[e]') + }) + ]) + exchange_compound = Compound('g6p_e', 'e') + model.exchange[exchange_compound] = ( + exchange_compound, 'EX_g6p_e', -10, 0) + + sbml.merge_equivalent_compounds(model) + + self.assertEqual({entry.id for entry in model.compounds}, {'g6p'}) + self.assertEqual( + model.reactions['TP_g6p'].equation, + parse_reaction('g6p[c] <=> g6p[e]')) + + new_exchange_compound = Compound('g6p', 'e') + self.assertEqual( + model.exchange[new_exchange_compound], + (new_exchange_compound, 'EX_g6p_e', -10, 0)) From 9a5fc00da09737b2aca1acca8be7e416d99fc777 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 17:43:20 -0400 Subject: [PATCH 26/51] sbml: Convert exchange reactions to exchange compounds Add functions for automatically detecting the extracellular compartment and for converting exchange reactions in the extracellular compartment to exchange compounds in NativeModel. --- psamm/datasource/sbml.py | 116 ++++++++++++++++++++++++++-- psamm/tests/test_datasource_sbml.py | 86 ++++++++++++++++++--- 2 files changed, 187 insertions(+), 15 deletions(-) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index 06d65bf1..414ab924 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -25,7 +25,7 @@ import logging import re import json -from collections import OrderedDict +from collections import OrderedDict, Counter # Import ElementTree XML parser. The lxml etree implementation may also be # used with SBMLReader but has compatiblity issues with SBMLWriter. @@ -389,7 +389,7 @@ def kinetic_law_reaction_parameters(self): self._reader._sbml_tag('parameter'))): param_id = parameter.get('id') param_name = parameter.get('name') - param_value = float(parameter.get('value')) + param_value = Decimal(parameter.get('value')) param_units = parameter.get('units') yield param_id, param_name, param_value, param_units @@ -462,7 +462,7 @@ def __init__(self, reader, namespace, root): _tag('listOfFluxObjectives', self._namespace), _tag('fluxObjective', self._namespace))): reaction = fo.get(_tag('reaction', self._namespace)) - coefficient = float(fo.get(_tag('coefficient', self._namespace))) + coefficient = Decimal(fo.get(_tag('coefficient', self._namespace))) if reaction is None or coefficient is None: raise ParseError( 'Missing attributes on flux objective: {}'.format(self.id)) @@ -522,7 +522,7 @@ def __init__(self, reader, namespace, root): if value is None: raise ParseError('Flux bound is missing "value" attribute') try: - self._value = float(value) + self._value = Decimal(value) except ValueError: raise ParseError('Unable to parse flux bound value') @@ -640,7 +640,7 @@ def __init__(self, file, strict=False, ignore_boundary=True, for param in params.iterfind(self._sbml_tag('parameter')): if param.get('constant') == 'true': param_id = param.get('id') - value = float(param.get('value')) + value = Decimal(param.get('value')) self._model_constants[param_id] = value # Flux bounds @@ -1290,6 +1290,14 @@ def convert_sbml_model(model): # Convert model to mutable entries convert_model_entries(model) + # Detect extracelluar compartment + if model.extracellular_compartment is None: + extracellular = detect_extracellular_compartment(model) + model.extracellular_compartment = extracellular + + # Convert exchange reactions to exchange compounds + convert_exchange_to_compounds(model) + def entry_id_from_cobra_encoding(cobra_id): """Convert COBRA-encoded ID string to decoded ID string.""" @@ -1652,6 +1660,104 @@ def parse_flux_bounds(entry): return lower_bound, upper_bound +def detect_extracellular_compartment(model): + """Detect the identifier for equations with extracellular compartments. + + Args: + model: :class:`NativeModel`. + """ + extracellular_key = Counter() + + for reaction in model.reactions: + equation = reaction.equation + if equation is None: + continue + + if len(equation.compounds) == 1: + compound, _ = equation.compounds[0] + compartment = compound.compartment + extracellular_key[compartment] += 1 + if len(extracellular_key) == 0: + return None + else: + best_key, _ = extracellular_key.most_common(1)[0] + + logger.info('{} is extracellular compartment'.format(best_key)) + + return best_key + + +def convert_exchange_to_compounds(model): + """Convert exchange reactions in model to exchange compounds. + + Only exchange reactions in the extracellular compartment are converted. + The extracelluar compartment must be defined for the model. + + Args: + model: :class:`NativeModel`. + """ + # Build set of exchange reactions + exchanges = set() + for reaction in model.reactions: + equation = reaction.properties.get('equation') + if equation is None: + continue + + if len(equation.compounds) != 1: + # Provide warning for exchange reactions with more than + # one compound, they won't be put into the exchange definition + if (len(equation.left) == 0) != (len(equation.right) == 0): + logger.warning('Exchange reaction {} has more than one' + ' compound, it was not converted to' + ' exchange compound'.format(reaction.id)) + continue + + exchanges.add(reaction.id) + + # Convert exchange reactions into exchange compounds + for reaction_id in exchanges: + equation = model.reactions[reaction_id].equation + compound, value = equation.compounds[0] + if compound.compartment != model.extracellular_compartment: + continue + + if compound in model.exchange: + logger.warning( + 'Compound {} is already defined in the exchange' + ' definition'.format(compound)) + continue + + # We multiply the flux bounds by value in order to create equivalent + # exchange reactions with stoichiometric value of one. If the flux + # bounds are not set but the reaction is unidirectional, the implicit + # flux bounds must be used. + lower_flux, upper_flux = None, None + if reaction_id in model.limits: + _, lower, upper = model.limits[reaction_id] + if lower is not None: + lower_flux = lower * abs(value) + if upper is not None: + upper_flux = upper * abs(value) + + if lower_flux is None and equation.direction == Direction.Forward: + lower_flux = 0.0 + if upper_flux is None and equation.direction == Direction.Reverse: + upper_flux = 0.0 + + # If the stoichiometric value of the reaction is reversed, the flux + # limits must be flipped. + if value > 0: + lower_flux, upper_flux = ( + -upper_flux if upper_flux is not None else None, + -lower_flux if lower_flux is not None else None) + + model.exchange[compound] = ( + compound, reaction_id, lower_flux, upper_flux) + + model.reactions.discard(reaction_id) + model.limits.pop(reaction_id, None) + + def merge_equivalent_compounds(model): """Merge equivalent compounds in various compartments. diff --git a/psamm/tests/test_datasource_sbml.py b/psamm/tests/test_datasource_sbml.py index 6b4a43e5..1867e4cc 100644 --- a/psamm/tests/test_datasource_sbml.py +++ b/psamm/tests/test_datasource_sbml.py @@ -21,12 +21,12 @@ import unittest from psamm.datasource import sbml, native, entry -from psamm.datasource.reaction import parse_reaction, parse_compound +from psamm.datasource.reaction import parse_reaction from psamm.reaction import Reaction, Compound, Direction from decimal import Decimal from fractions import Fraction -from six import BytesIO +from six import BytesIO, itervalues class TestSBMLDatabaseL1V2(unittest.TestCase): @@ -71,6 +71,8 @@ def setUp(self): + @@ -145,10 +147,12 @@ def test_biomass_reaction_exists(self): self.assertFalse(reaction.reversible) # Compare equation of reaction - actual_equation = Reaction(Direction.Forward, - [(Compound('Glucose_6_P', 'cell'), - Fraction(56, 100))], - [(Compound('Biomass', 'boundary'), 1)]) + actual_equation = Reaction(Direction.Forward, [ + (Compound('Glucose_6_P', 'cell'), Fraction(56, 100)), + (Compound('Glucose', 'cell'), Fraction(88, 100)) + ], [ + (Compound('Biomass', 'boundary'), 1) + ]) self.assertEqual(reaction.equation, actual_equation) def test_reaction_xml_notes(self): @@ -251,6 +255,8 @@ def setUp(self): + @@ -385,7 +391,8 @@ def test_biomass_reaction_exists(self): # Compare equation of reaction actual_equation = Reaction(Direction.Forward, [ (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), - Decimal('0.56')) + Decimal('0.56')), + (Compound('M_Glucose_LPAREN_c_RPAREN_', 'C_c'), Decimal('0.88')) ], [ (Compound('M_Biomass', 'C_b'), 1) ]) @@ -473,6 +480,8 @@ def setUp(self): + @@ -552,7 +561,8 @@ def test_biomass_reaction_exists(self): # Compare equation of reaction actual_equation = Reaction(Direction.Forward, [ (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), - Decimal('0.56')) + Decimal('0.56')), + (Compound('M_Glucose_LPAREN_c_RPAREN_', 'C_c'), Decimal('0.88')) ], [ (Compound('M_Biomass', 'C_b'), 1) ]) @@ -651,6 +661,8 @@ def setUp(self): + @@ -752,7 +764,8 @@ def test_biomass_reaction_exists(self): # Compare equation of reaction actual_equation = Reaction(Direction.Forward, [ (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), - Decimal('0.56')) + Decimal('0.56')), + (Compound('M_Glucose_LPAREN_c_RPAREN_', 'C_c'), Decimal('0.88')) ], [ (Compound('M_Biomass', 'C_b'), 1) ]) @@ -908,6 +921,8 @@ def setUp(self): + @@ -1004,7 +1019,8 @@ def test_biomass_reaction_exists(self): # Compare equation of reaction actual_equation = Reaction(Direction.Forward, [ (Compound('M_Glucose_6_DASH_P_LPAREN_c_RPAREN_', 'C_c'), - Decimal('0.56')) + Decimal('0.56')), + (Compound('M_Glucose_LPAREN_c_RPAREN_', 'C_c'), Decimal('0.88')) ], [ (Compound('M_Biomass', 'C_b'), 1) ]) @@ -1058,6 +1074,56 @@ def test_create_and_convert_model(self): self.assertEqual(model.biomass_reaction, 'Biomass') +class TestModelExtracellularCompartment(unittest.TestCase): + def setUp(self): + self.model = native.NativeModel() + self.model.reactions.update([ + entry.DictReactionEntry({ + 'id': 'EX_g6p', + 'equation': parse_reaction('g6p[e] <=>') + }), + entry.DictReactionEntry({ + 'id': 'EX_glc-D', + 'equation': parse_reaction('glc-D[e] <=>') + }), + entry.DictReactionEntry({ + 'id': 'SINK_glc-D', + 'equation': parse_reaction('glc-D[c] <=>') + }), + entry.DictReactionEntry({ + 'id': 'rxn_1', + 'equation': parse_reaction('g6p[c] <=> glc-D[c]') + }), + entry.DictReactionEntry({ + 'id': 'TP_glc-D', + 'equation': parse_reaction('glc-D[c] <=> glc-D[e]') + }) + ]) + + def test_detect_model(self): + """Test that the extracellular compartment is detected. + + Since the 'e' compartment occurs more often in exchange reactions, + this compartment should be detected. + """ + extracellular = sbml.detect_extracellular_compartment(self.model) + self.assertEqual(extracellular, 'e') + + def test_convert_model(self): + self.model.extracellular_compartment = 'e' + sbml.convert_exchange_to_compounds(self.model) + + self.assertEqual( + {entry.id for entry in self.model.reactions}, + {'rxn_1', 'TP_glc-D', 'SINK_glc-D'}) + self.assertEqual( + set(itervalues(self.model.exchange)), + { + (Compound('g6p', 'e'), 'EX_g6p', None, None), + (Compound('glc-D', 'e'), 'EX_glc-D', None, None) + }) + + class TestMergeEquivalentCompounds(unittest.TestCase): def test_merge(self): model = native.NativeModel() From 1ede389c3880ea741229117284fb4577a64c2efc Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 15:48:01 -0400 Subject: [PATCH 27/51] command: Convert SBML model in entry point With this change the SBML model information is fully extracted when running the psamm-sbml-model command, and the command should then work with level 1-2 models and level 3 models without FBC information. In addition, the level 3 with FBC will still work. --- psamm/command.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/psamm/command.py b/psamm/command.py index 2aef65b9..439770be 100644 --- a/psamm/command.py +++ b/psamm/command.py @@ -560,6 +560,7 @@ def main(command_class=None, args=None): def main_sbml(command_class=None, args=None): + """Run the SBML command line interface.""" # Set up logging for the command line interface if 'PSAMM_DEBUG' in os.environ: level = getattr(logging, os.environ['PSAMM_DEBUG'].upper(), None) @@ -575,10 +576,6 @@ def main_sbml(command_class=None, args=None): base_logger.addHandler(handler) base_logger.propagate = False - logger.warning( - 'This command is experimental. It currently only fully parses level 3' - ' SBML files!') - title = 'Metabolic modeling tools (SBML)' if command_class is not None: title, _, _ = command_class.__doc__.partition('\n\n') @@ -622,6 +619,7 @@ def main_sbml(command_class=None, args=None): context = FilePathContext(parsed_args.model) with context.open('r') as f: model = sbml.SBMLReader(f, context=context).create_model() + sbml.convert_sbml_model(model) # Instantiate command with model and run command = parsed_args.command(model, parsed_args) From 70abcfc6306ac4de7d0b2c438bb0c63ecab45a1c Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 30 May 2017 17:44:47 -0400 Subject: [PATCH 28/51] command: Allow merging compounds with option in SBML command --- psamm/command.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/psamm/command.py b/psamm/command.py index 439770be..7beea830 100644 --- a/psamm/command.py +++ b/psamm/command.py @@ -582,6 +582,9 @@ def main_sbml(command_class=None, args=None): parser = argparse.ArgumentParser(description=title) parser.add_argument('model', metavar='file', help='SBML file') + parser.add_argument('--merge-compounds', action='store_true', + help=('Merge identical compounds occuring in various' + ' compartments.')) parser.add_argument( '-V', '--version', action='version', version='%(prog)s ' + package_version) @@ -620,6 +623,8 @@ def main_sbml(command_class=None, args=None): with context.open('r') as f: model = sbml.SBMLReader(f, context=context).create_model() sbml.convert_sbml_model(model) + if parsed_args.merge_compounds: + sbml.merge_equivalent_compounds(model) # Instantiate command with model and run command = parsed_args.command(model, parsed_args) From ec5b7dd89c5ac7bdba021a4398a861b85be12c57 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Thu, 1 Jun 2017 13:47:53 -0400 Subject: [PATCH 29/51] sbml: Write zero value as 0 instead of 0.0 for exchange --- psamm/datasource/sbml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index 414ab924..7e75e5d8 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -1740,9 +1740,9 @@ def convert_exchange_to_compounds(model): upper_flux = upper * abs(value) if lower_flux is None and equation.direction == Direction.Forward: - lower_flux = 0.0 + lower_flux = 0 if upper_flux is None and equation.direction == Direction.Reverse: - upper_flux = 0.0 + upper_flux = 0 # If the stoichiometric value of the reaction is reversed, the flux # limits must be flipped. From 88b2c1d668ee33385ae2778b35b800543b071681 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Thu, 1 Jun 2017 15:06:34 -0400 Subject: [PATCH 30/51] sbml: Fix sphinx references in docstrings --- psamm/datasource/sbml.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/psamm/datasource/sbml.py b/psamm/datasource/sbml.py index 414ab924..58dedf23 100644 --- a/psamm/datasource/sbml.py +++ b/psamm/datasource/sbml.py @@ -741,15 +741,15 @@ def get_compartment(self, compartment_id): return self._model_compartments[compartment_id] def get_reaction(self, reaction_id): - """Return :class:`.ReactionEntry` corresponding to reaction_id""" + """Return :class:`.SBMLReactionEntry` corresponding to reaction_id""" return self._model_reactions[reaction_id] def get_species(self, species_id): - """Return :class:`.SpeciesEntry` corresponding to species_id""" + """Return :class:`.SBMLSpeciesEntry` corresponding to species_id""" return self._model_species[species_id] def get_objective(self, objective_id): - """Return :class:`.ObjectiveEntry` corresponding to objective_id""" + """Return :class:`.SBMLObjectiveEntry` corresponding to objective_id""" return self._model_objectives[objective_id] def get_active_objective(self): @@ -757,12 +757,12 @@ def get_active_objective(self): @property def compartments(self): - """Iterator over :class:`.CompartmentEntry` entries.""" + """Iterator over :class:`.SBMLCompartmentEntry` entries.""" return itervalues(self._model_compartments) @property def reactions(self): - """Iterator over :class:`ReactionEntries <.ReactionEntry>`""" + """Iterator over :class:`ReactionEntries <.SBMLReactionEntry>`""" return itervalues(self._model_reactions) @property @@ -776,12 +776,12 @@ def species(self): @property def objectives(self): - """Iterator over :class:`.ObjectiveEntry`""" + """Iterator over :class:`.SBMLObjectiveEntry`""" return itervalues(self._model_objectives) @property def flux_bounds(self): - """Iterator over :class:`.FluxBoundEntry`""" + """Iterator over :class:`.SBMLFluxBoundEntry`""" return iter(self._flux_bounds) @property From 6ddb10c204a0973efc27f38e0ef453072d86ce0f Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Wed, 15 Mar 2017 13:42:43 -0400 Subject: [PATCH 31/51] formula: Implement additional dict-like methods --- psamm/formula.py | 10 ++++++++++ psamm/tests/test_formula.py | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/psamm/formula.py b/psamm/formula.py index 36af2845..c46d21ac 100644 --- a/psamm/formula.py +++ b/psamm/formula.py @@ -269,6 +269,16 @@ def is_variable(self): def __contains__(self, element): return element in self._values + def get(self, element, default=None): + """Return value for element or default if not in the formula.""" + return self._values.get(element, default) + + def __getitem__(self, element): + return self._values[element] + + def __len__(self): + return len(self._values) + def __str__(self): """Return formula represented using Hill notation system diff --git a/psamm/tests/test_formula.py b/psamm/tests/test_formula.py index 502c1cf2..56c4d495 100755 --- a/psamm/tests/test_formula.py +++ b/psamm/tests/test_formula.py @@ -142,6 +142,25 @@ def test_formula_contains(self): self.assertIn(Atom.C, f) self.assertNotIn(Atom.Ag, f) + def test_formula_get(self): + f = Formula({Atom.H: 12, Atom.C: 6, Atom.O: 6}) + self.assertEqual(f.get(Atom.H), 12) + self.assertEqual(f.get(Atom.Au), None) + self.assertEqual(f.get(Atom.Hg, 4), 4) + + def test_formula_getitem(self): + f = Formula({Atom.H: 12, Atom.C: 6, Atom.O: 6}) + self.assertEqual(f[Atom.H], 12) + + def test_formula_getitem_not_found(self): + f = Formula({Atom.H: 12, Atom.C: 6, Atom.O: 6}) + with self.assertRaises(KeyError): + f[Atom.Au] + + def test_formula_length(self): + f = Formula({Atom.H: 12, Atom.C: 6, Atom.O: 6}) + self.assertEqual(len(f), 3) + def test_formula_to_string(self): f = Formula({Atom.H: 12, Atom.C: 6, Atom.O: 6}) self.assertEqual(str(f), 'C6H12O6') From 5485a8e5c4fa555f53b7f11494698448e1266ec6 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Fri, 9 Jun 2017 14:14:16 -0400 Subject: [PATCH 32/51] formula: Base on Counter instead of dict This allows some additional multiset operators to be defined with an efficient implementation. The __and__ and __sub__ operators are defined for Formula objects. --- psamm/formula.py | 48 ++++++++++++++++++++++++++----------- psamm/tests/test_formula.py | 26 +++++++++++++++++++- 2 files changed, 59 insertions(+), 15 deletions(-) diff --git a/psamm/formula.py b/psamm/formula.py index c46d21ac..192cc2f0 100644 --- a/psamm/formula.py +++ b/psamm/formula.py @@ -207,14 +207,15 @@ class Formula(FormulaElement): """ def __init__(self, values={}): - self._values = {} + self._values = Counter() self._variables = set() for element, value in iteritems(values): if not isinstance(element, FormulaElement): raise ValueError('Not a formula element: {}'.format( repr(element))) - if element != Formula() and value != 0: + if (value != 0 and + (not isinstance(element, Formula) or len(element) > 0)): self._values[element] = value if callable(getattr(value, 'variables', None)): @@ -243,22 +244,22 @@ def flattened(self): """ stack = [(self, 1)] - result = {} + result = Counter() while len(stack) > 0: var, value = stack.pop() if isinstance(var, Formula): for sub_var, sub_value in iteritems(var._values): stack.append((sub_var, value*sub_value)) else: - if var in result: - result[var] += value - else: - result[var] = value + result[var] += value return Formula(result) def variables(self): return iter(self._variables) + def __iter__(self): + return iter(self._values) + def items(self): """Iterate over (:class:`.FormulaElement`, value)-pairs""" return iteritems(self._values) @@ -274,6 +275,8 @@ def get(self, element, default=None): return self._values.get(element, default) def __getitem__(self, element): + if element not in self._values: + raise KeyError(repr(element)) return self._values[element] def __len__(self): @@ -329,20 +332,37 @@ def nongrouped(element, value): return s def __repr__(self): - return str('Formula({})').format(repr(self._values)) + return str('Formula({})').format(repr(dict(self._values))) + + def __and__(self, other): + """Intersection of formula elements.""" + if isinstance(other, Formula): + return Formula(self._values & other._values) + elif isinstance(other, FormulaElement): + return Formula(self._values & Counter([other])) + return NotImplemented def __or__(self, other): - """Merge formulas into one formula""" + """Merge formulas into one formula.""" + # Note: This operator corresponds to the add-operator on Counter not + # the or-operator! The add-operator is used here (on the superclass) + # to compose formula elements into subformulas. if isinstance(other, Formula): - values = Counter(self._values) - values.update(other._values) - return Formula(values) + return Formula(self._values + other._values) elif isinstance(other, FormulaElement): - return self | Formula({other: 1}) + return Formula(self._values + Counter([other])) + return NotImplemented + + def __sub__(self, other): + """Substract other formula from this formula.""" + if isinstance(other, Formula): + return Formula(self._values - other._values) + elif isinstance(other, FormulaElement): + return Formula(self._values - Counter([other])) return NotImplemented def __mul__(self, other): - """Multiply formula element by other""" + """Multiply formula element by other.""" values = {key: value*other for key, value in iteritems(self._values)} return Formula(values) diff --git a/psamm/tests/test_formula.py b/psamm/tests/test_formula.py index 56c4d495..c5dee4da 100755 --- a/psamm/tests/test_formula.py +++ b/psamm/tests/test_formula.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with PSAMM. If not, see . # -# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2014-2017 Jon Lund Steffensen import unittest @@ -117,6 +117,26 @@ def test_formula_repeat(self): f = Formula({Atom.H: 2, Atom.O: 1}).repeat(4) self.assertEqual(f, Formula({Formula({Atom.H: 2, Atom.O: 1}): 4})) + def test_formula_intersection(self): + f1 = Formula({Atom.H: 4, Atom.N: 1}) + f2 = Formula({Atom.H: 2, Atom.O: 1}) + f = f1 & f2 + self.assertEqual(f, Formula({Atom.H: 2})) + + def test_formula_intersection_with_atom(self): + f = Formula({Atom.H: 4, Atom.N: 1}) & Atom.H + self.assertEqual(f, Formula({Atom.H: 1})) + + def test_formula_subtraction(self): + f1 = Formula({Atom.H: 4, Atom.N: 1}) + f2 = Formula({Atom.H: 2, Atom.O: 1}) + f = f1 - f2 + self.assertEqual(f, Formula({Atom.H: 2, Atom.N: 1})) + + def test_formula_subtraction_with_atom(self): + f = Formula({Atom.H: 4, Atom.N: 1}) - Atom.H + self.assertEqual(f, Formula({Atom.H: 3, Atom.N: 1})) + def test_formula_equals_other_formula(self): f = Formula({Atom.H: 2, Atom.O: 1}) self.assertEqual(f, Formula({Atom.O: 1, Atom.H: 2})) @@ -129,6 +149,10 @@ def test_formula_not_equals_other_with_different_number(self): f = Formula({Atom.Au: 1}) self.assertNotEqual(f, Formula({Atom.Au: 2})) + def test_formula_iter(self): + f = Formula({Atom.H: 12, Atom.C: 6, Atom.O: 6}) + self.assertEqual(set(iter(f)), {Atom.H, Atom.C, Atom.O}) + def test_formula_items(self): f = Formula({Atom.H: 12, Atom.C: 6, Atom.O: 6}) self.assertEqual(dict(f.items()), { From 3e456297a3293e3a58a2bf04b5ca58f1c3cac417 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Fri, 9 Jun 2017 15:29:37 -0400 Subject: [PATCH 33/51] formula: Report span of parse errors --- psamm/balancecheck.py | 12 ++- psamm/formula.py | 193 ++++++++++++++++++++---------------- psamm/tests/test_formula.py | 16 ++- 3 files changed, 127 insertions(+), 94 deletions(-) diff --git a/psamm/balancecheck.py b/psamm/balancecheck.py index cf31e6db..7f73aa50 100644 --- a/psamm/balancecheck.py +++ b/psamm/balancecheck.py @@ -23,7 +23,7 @@ from six.moves import reduce -from .formula import Formula +from .formula import Formula, ParseError logger = logging.getLogger(__name__) @@ -106,10 +106,12 @@ def formula_balance(model): try: f = Formula.parse(compound.formula).flattened() compound_formula[compound.id] = f - except ValueError: - logger.warning( - 'Error parsing formula for compound {}: {}'.format( - compound.id, compound.formula), exc_info=True) + except ParseError as e: + msg = 'Error parsing formula for compound {}:\n{}\n{}'.format( + compound.id, e, compound.formula) + if e.indicator is not None: + msg += '\n{}'.format(e.indicator) + logger.warning(msg) for reaction in model.reactions: yield reaction, reaction_formula(reaction.equation, compound_formula) diff --git a/psamm/formula.py b/psamm/formula.py index 192cc2f0..f247db20 100644 --- a/psamm/formula.py +++ b/psamm/formula.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with PSAMM. If not, see . # -# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2014-2017 Jon Lund Steffensen """Parser and representation of chemical formulas. @@ -380,90 +380,8 @@ def __ne__(self, other): @classmethod def parse(cls, s): - """Parse a formula string (e.g. C6H10O2)""" - - scanner = re.compile(r''' - (\s+) | # whitespace - (\(|\)) | # group - ([A-Z][a-z]*) | # element - (\d+) | # number - ([a-z]) | # variable - (\Z) | # end - (.) # error - ''', re.DOTALL | re.VERBOSE) - - def transform_subformula(form): - '''Extract radical if subformula is a singleton with a radical''' - if isinstance(form, dict) and len(form) == 1: - # A radical in a singleton subformula is interpreted as a - # numbered radical. - element, value = next(iteritems(form)) - if isinstance(element, Radical): - return Radical('{}{}'.format(element.symbol, value)) - return form - - stack = [] - formula = {} - expect_count = False - - def close(formula, count=1): - if len(stack) == 0: - raise ValueError('Unbalanced parenthesis group in formula') - subformula = transform_subformula(formula) - if isinstance(subformula, dict): - subformula = Formula(subformula) - - formula = stack.pop() - if subformula not in formula: - formula[subformula] = 0 - formula[subformula] += count - return formula - - for match in re.finditer(scanner, s): - (whitespace, group, element, number, variable, end, - error) = match.groups() - - if error is not None: - raise ValueError('Invalid token in formula string: {}'.format( - match.group(0))) - elif whitespace is not None: - continue - elif group is not None and group == '(': - if expect_count: - formula = close(formula) - stack.append(formula) - formula = {} - expect_count = False - elif group is not None and group == ')': - if expect_count: - formula = close(formula) - expect_count = True - elif element is not None: - if expect_count: - formula = close(formula) - stack.append(formula) - if element in 'RX': - formula = Radical(element) - else: - formula = Atom(element) - expect_count = True - elif number is not None and expect_count: - formula = close(formula, int(number)) - expect_count = False - elif variable is not None and expect_count: - formula = close(formula, Expression(variable)) - expect_count = False - elif end is not None: - if expect_count: - formula = close(formula) - else: - raise ValueError('Invalid token in formula string:' - ' {}'.format(match.group(0))) - - if len(stack) > 0: - raise ValueError('Unbalanced parenthesis group in formula') - - return Formula(formula) + """Parse a formula string (e.g. C6H10O2).""" + return _parse_formula(s) @classmethod def balance(cls, lhs, rhs): @@ -488,6 +406,105 @@ def missing(formula, other): reduce(operator.or_, missing(lhs, rhs), Formula())) -if __name__ == '__main__': - import doctest - doctest.testmod() +class ParseError(Exception): + """Signals error parsing formula.""" + + def __init__(self, *args, **kwargs): + self._span = kwargs.pop('span', None) + super(ParseError, self).__init__(*args, **kwargs) + + @property + def indicator(self): + if self._span is None: + return None + pre = ' ' * self._span[0] + ind = '^' * max(1, self._span[1] - self._span[0]) + return pre + ind + + +def _parse_formula(s): + """Parse formula string.""" + scanner = re.compile(r''' + (\s+) | # whitespace + (\(|\)) | # group + ([A-Z][a-z]*) | # element + (\d+) | # number + ([a-z]) | # variable + (\Z) | # end + (.) # error + ''', re.DOTALL | re.VERBOSE) + + def transform_subformula(form): + """Extract radical if subformula is a singleton with a radical.""" + if isinstance(form, dict) and len(form) == 1: + # A radical in a singleton subformula is interpreted as a + # numbered radical. + element, value = next(iteritems(form)) + if isinstance(element, Radical): + return Radical('{}{}'.format(element.symbol, value)) + return form + + stack = [] + formula = {} + expect_count = False + + def close(formula, count=1): + if len(stack) == 0: + raise ParseError('Unbalanced parenthesis group in formula') + subformula = transform_subformula(formula) + if isinstance(subformula, dict): + subformula = Formula(subformula) + + formula = stack.pop() + if subformula not in formula: + formula[subformula] = 0 + formula[subformula] += count + return formula + + for match in re.finditer(scanner, s): + (whitespace, group, element, number, variable, end, + error) = match.groups() + + if error is not None: + raise ParseError( + 'Invalid token in formula string: {!r}'.format(match.group(0)), + span=(match.start(), match.end())) + elif whitespace is not None: + continue + elif group is not None and group == '(': + if expect_count: + formula = close(formula) + stack.append(formula) + formula = {} + expect_count = False + elif group is not None and group == ')': + if expect_count: + formula = close(formula) + expect_count = True + elif element is not None: + if expect_count: + formula = close(formula) + stack.append(formula) + if element in 'RX': + formula = Radical(element) + else: + formula = Atom(element) + expect_count = True + elif number is not None and expect_count: + formula = close(formula, int(number)) + expect_count = False + elif variable is not None and expect_count: + formula = close(formula, Expression(variable)) + expect_count = False + elif end is not None: + if expect_count: + formula = close(formula) + else: + raise ParseError( + 'Invalid token in formula string: {!r}'.format(match.group(0)), + span=(match.start(), match.end())) + + if len(stack) > 0: + raise ParseError('Unbalanced parenthesis group in formula') + + return Formula(formula) diff --git a/psamm/tests/test_formula.py b/psamm/tests/test_formula.py index c5dee4da..d816e499 100755 --- a/psamm/tests/test_formula.py +++ b/psamm/tests/test_formula.py @@ -18,7 +18,7 @@ import unittest -from psamm.formula import Formula, FormulaElement, Atom, Radical +from psamm.formula import Formula, FormulaElement, Atom, Radical, ParseError class TestFormulaElement(unittest.TestCase): @@ -317,6 +317,20 @@ def test_formula_parse_with_numbered_radical(self): self.assertEqual(f, Formula({ Atom.C: 2, Atom.H: 4, Atom.N: 1, Atom.O: 2, Radical('R1'): 1})) + def test_formula_parse_error(self): + with self.assertRaises(ParseError): + Formula.parse('H2O. ABC') + + +class TestFormulaParseError(unittest.TestCase): + def test_create_with_span(self): + e = ParseError('This is an error', span=(4, 7)) + self.assertEqual(e.indicator, ' ^^^') + + def test_create_without_span(self): + e = ParseError('This is an error') + self.assertIsNone(e.indicator) + if __name__ == '__main__': unittest.main() From a6ad74fd661597517f9ffa81748e6e2fd36338ec Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Wed, 14 Jun 2017 13:22:07 -0400 Subject: [PATCH 34/51] command: Extend test model --- psamm/tests/test_command.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/psamm/tests/test_command.py b/psamm/tests/test_command.py index ed587c25..c298b496 100644 --- a/psamm/tests/test_command.py +++ b/psamm/tests/test_command.py @@ -182,7 +182,7 @@ def setUp(self): }, { 'id': 'rxn_2_\u03c0', 'name': 'Reaction 2', - 'equation': '|B[c]| <=> |C[e]|', + 'equation': 'atp[c] + (2) |B[c]| <=> adp[c] + |C[e]|', 'genes': 'gene_3 or (gene_4 and gene_5)' }, { 'id': 'rxn_3', @@ -205,6 +205,18 @@ def setUp(self): 'id': 'C', 'charge': -1, 'formula': 'O2' + }, + { + 'id': 'atp', + 'name': '\u2192 ATP ', + 'charge': -4, + 'formula': 'C10H12N5O13P3' + }, + { + 'id': 'adp', + 'name': ' ADP', + 'charge': -3, + 'formula': 'C10H12N5O10P2' } ], 'exchange': [ @@ -510,7 +522,7 @@ def test_run_tableexport_reactions(self): ['id', 'equation', 'genes', 'name', 'in_model'], ['rxn_1', 'A_\u2206[e] => B[c]', '["gene_1", "gene_2"]', 'Reaction 1', 'true'], - ['rxn_2_\u03c0', 'B[c] <=> C[e]', + ['rxn_2_\u03c0', 'atp[c] + (2) B[c] <=> adp[c] + C[e]', 'gene_3 or (gene_4 and gene_5)', 'Reaction 2', 'true'], ['rxn_3', 'D[c] => E[c]', '', '', 'true'], ]) @@ -522,7 +534,9 @@ def test_run_tableexport_compounds(self): ['id', 'name', 'charge', 'formula', 'in_model'], ['A_\u2206', 'Compound A', '0', '', 'true'], ['B', 'Compound B', '-1', 'H2O', 'true'], - ['C', '', '-1', 'O2', 'true'] + ['C', '', '-1', 'O2', 'true'], + ['atp', '\u2192 ATP ', '-4', 'C10H12N5O13P3', 'true'], + ['adp', ' ADP', '-3', 'C10H12N5O10P2', 'true'] ]) def test_run_tableexport_exchange(self): From 5b300e7ad442cb8f98a8ca26c8abf671129cf693 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Wed, 14 Jun 2017 13:22:26 -0400 Subject: [PATCH 35/51] fastcore: Fix unicode error --- psamm/fastcore.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/psamm/fastcore.py b/psamm/fastcore.py index 4fca620f..d29057cd 100644 --- a/psamm/fastcore.py +++ b/psamm/fastcore.py @@ -22,6 +22,8 @@ these algorithms to a :class:`MetabolicModel`. """ +from __future__ import unicode_literals + import logging from .fluxanalysis import FluxBalanceProblem From 2fd5bd07f5d1290697d3cd13930a6bb6c622962c Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 20 Jun 2017 11:20:21 -0400 Subject: [PATCH 36/51] lp: Forward exceptions as SolverError Adds the SolverError in the lp module. Exceptions raised from the solver when solve() is called should inherit from this or should be wrapped as this to allow callers to handle such conditions gracefully. --- psamm/lpsolver/cplex.py | 13 +++++++++---- psamm/lpsolver/glpk.py | 4 ++-- psamm/lpsolver/lp.py | 4 ++++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/psamm/lpsolver/cplex.py b/psamm/lpsolver/cplex.py index 8ee33c58..1a170378 100644 --- a/psamm/lpsolver/cplex.py +++ b/psamm/lpsolver/cplex.py @@ -23,7 +23,7 @@ from itertools import repeat, count import numbers -from six import text_type +from six import text_type, raise_from from six.moves import zip import cplex as cp @@ -31,8 +31,9 @@ from .lp import Constraint as BaseConstraint from .lp import Problem as BaseProblem from .lp import Result as BaseResult -from .lp import (Expression, Product, RelationSense, ObjectiveSense, - VariableType, InvalidResultError, RangedProperty) +from .lp import (SolverError, Expression, Product, RelationSense, + ObjectiveSense, VariableType, InvalidResultError, + RangedProperty) from ..util import LoggerFile # Module-level logging @@ -341,7 +342,11 @@ def _reset_problem_type(self): def _solve(self): self._reset_problem_type() - self._cp.solve() + try: + self._cp.solve() + except cp.exceptions.CplexSolverError as e: + raise_from(SolverError('Unable to solve: {}'.format(e)), e) + self._solve_count += 1 if (self._cp.solution.get_status() == self._cp.solution.status.abort_user): diff --git a/psamm/lpsolver/glpk.py b/psamm/lpsolver/glpk.py index 07a7117c..287b3c69 100644 --- a/psamm/lpsolver/glpk.py +++ b/psamm/lpsolver/glpk.py @@ -32,7 +32,7 @@ from .lp import Problem as BaseProblem from .lp import Result as BaseResult from .lp import (Expression, RelationSense, ObjectiveSense, VariableType, - InvalidResultError, ranged_property) + InvalidResultError, ranged_property, SolverError) # Module-level logging logger = logging.getLogger(__name__) @@ -52,7 +52,7 @@ def _term_hook(s): _INF = float('inf') -class GLPKError(Exception): +class GLPKError(SolverError): """Error from calling GLPK library.""" diff --git a/psamm/lpsolver/lp.py b/psamm/lpsolver/lp.py index 83863916..4227fdfb 100644 --- a/psamm/lpsolver/lp.py +++ b/psamm/lpsolver/lp.py @@ -47,6 +47,10 @@ _INF = float('inf') +class SolverError(Exception): + """Error wrapping solver specific errors.""" + + class VariableSet(tuple): """A tuple used to represent sets of variables.""" From 5dd62f3d8499fba95731b1119471246d3659db6b Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 20 Jun 2017 12:11:59 -0400 Subject: [PATCH 37/51] lp: Raise error on non-optimal solutions This helps avoid a common bug where the result is not checked and therefore non-optimal solution values are used as though an optimal solution was found. Instead the solve() now raises a SolverError when no optimal solution is found. This makes it impossible to forget to check the solution in the most common case. In cases when the exception is not desired the new solve_unchecked() can be used which has the old behavior. --- psamm/commands/gapcheck.py | 16 +++++++++------- psamm/fluxanalysis.py | 12 ++++++------ psamm/fluxcoupling.py | 5 +++-- psamm/gapfill.py | 18 ++++++++++-------- psamm/lpsolver/cplex.py | 9 +++++++-- psamm/lpsolver/glpk.py | 9 +++++++-- psamm/lpsolver/gurobi.py | 9 +++++++-- psamm/lpsolver/lp.py | 20 ++++++++++++++++++-- psamm/lpsolver/qsoptex.py | 9 +++++++-- psamm/massconsistency.py | 26 +++++++++++++++----------- psamm/tests/test_lpsolver_cplex.py | 8 ++++++-- 11 files changed, 95 insertions(+), 46 deletions(-) diff --git a/psamm/commands/gapcheck.py b/psamm/commands/gapcheck.py index c23a77a2..8030e4f4 100644 --- a/psamm/commands/gapcheck.py +++ b/psamm/commands/gapcheck.py @@ -142,10 +142,11 @@ def run_sink_check(self, model, solver, threshold, implicit_sinks=True): mass_balance_constrs[compound].delete() prob.set_objective(lhs) - result = prob.solve(lp.ObjectiveSense.Maximize) - if not result: - logger.warning('Failed to solve for compound: {}'.format( - compound)) + try: + result = prob.solve(lp.ObjectiveSense.Maximize) + except lp.SolverError as e: + logger.warning('Failed to solve for compound: {} ({})'.format( + compound, e)) if result.get_value(lhs) < threshold: yield compound @@ -190,11 +191,12 @@ def run_reaction_production_check(self, model, solver, threshold, prob.set_objective(v(reaction)) for sense in (lp.ObjectiveSense.Maximize, lp.ObjectiveSense.Minimize): - result = prob.solve(sense) - if not result: + try: + result = prob.solve(sense) + except lp.SolverError as e: self.fail( 'Failed to solve for compound, reaction: {}, {}:' - ' {}'.format(compound, reaction, result.status)) + ' {}'.format(compound, reaction, e)) flux = result.get_value(v(reaction)) for compound, value in model.get_reaction_values(reaction): diff --git a/psamm/fluxanalysis.py b/psamm/fluxanalysis.py index ac42355d..928b8812 100644 --- a/psamm/fluxanalysis.py +++ b/psamm/fluxanalysis.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with PSAMM. If not, see . # -# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2014-2017 Jon Lund Steffensen """Implementation of Flux Balance Analysis.""" @@ -24,7 +24,7 @@ from .lpsolver import lp -from six import iteritems +from six import iteritems, raise_from # Module-level logging logger = logging.getLogger(__name__) @@ -266,10 +266,10 @@ def _solve(self): self._remove_constr.pop().delete() try: - result = self._prob.solve(lp.ObjectiveSense.Maximize) - if not result: - raise FluxBalanceError('Non-optimal solution: {}'.format( - result.status), result=result) + self._prob.solve(lp.ObjectiveSense.Maximize) + except lp.SolverError as e: + raise_from(FluxBalanceError('Failed to solve: {}'.format( + e), result=self._prob.result), e) finally: # Set temporary constraints to be removed on next solve call self._remove_constr = self._temp_constr diff --git a/psamm/fluxcoupling.py b/psamm/fluxcoupling.py index 738a56c6..f07889df 100755 --- a/psamm/fluxcoupling.py +++ b/psamm/fluxcoupling.py @@ -111,8 +111,9 @@ def solve(self, reaction_1, reaction_2): results = [] for sense in (lp.ObjectiveSense.Minimize, lp.ObjectiveSense.Maximize): - result = self._prob.solve(sense) - if not result: + try: + result = self._prob.solve(sense) + except lp.SolverError: results.append(None) else: results.append(result.get_value(self._vbow(reaction_1))) diff --git a/psamm/gapfill.py b/psamm/gapfill.py index 8575353d..f363aa1a 100644 --- a/psamm/gapfill.py +++ b/psamm/gapfill.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with PSAMM. If not, see . # -# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2014-2017 Jon Lund Steffensen """Identify blocked metabolites and possible reconstructions. @@ -22,7 +22,7 @@ import logging -from six import iteritems +from six import iteritems, raise_from from .lpsolver import lp @@ -130,9 +130,10 @@ def gapfind(model, solver, epsilon=0.001, v_max=1000, implicit_sinks=True): prob.add_linear_constraints(lhs == 0) # Solve - result = prob.solve(lp.ObjectiveSense.Maximize) - if not result: - raise GapFillError('Non-optimal solution: {}'.format(result.status)) + try: + result = prob.solve(lp.ObjectiveSense.Maximize) + except lp.SolverError as e: + raise_from(GapFillError('Failed to solve gapfill: {}'.format(e), e)) for compound in model.compounds: if result.get_value(xp(compound)) < 0.5: @@ -254,9 +255,10 @@ def gapfill( prob.add_linear_constraints(lhs == 0) # Solve - result = prob.solve(lp.ObjectiveSense.Minimize) - if not result: - raise GapFillError('Non-optimal solution: {}'.format(result.status)) + try: + prob.solve(lp.ObjectiveSense.Minimize) + except lp.SolverError as e: + raise_from(GapFillError('Failed to solve gapfill: {}'.format(e)), e) def added_iter(): for reaction_id in database_reactions: diff --git a/psamm/lpsolver/cplex.py b/psamm/lpsolver/cplex.py index 1a170378..3b9c8b27 100644 --- a/psamm/lpsolver/cplex.py +++ b/psamm/lpsolver/cplex.py @@ -352,8 +352,13 @@ def _solve(self): self._cp.solution.status.abort_user): raise KeyboardInterrupt() - def solve(self, sense=None): - """Solve problem""" + def solve_unchecked(self, sense=None): + """Solve problem and return result. + + The user must manually check the status of the result to determine + whether an optimal solution was found. A :class:`SolverError` may still + be raised if the underlying solver raises an exception. + """ if sense is not None: self.set_objective_sense(sense) diff --git a/psamm/lpsolver/glpk.py b/psamm/lpsolver/glpk.py index 287b3c69..d6a02a98 100644 --- a/psamm/lpsolver/glpk.py +++ b/psamm/lpsolver/glpk.py @@ -272,8 +272,13 @@ def set_objective_sense(self, sense): swiglpk.glp_set_obj_dir(self._p, self.OBJ_SENSE_MAP[sense]) - def solve(self, sense=None): - """Solve problem.""" + def solve_unchecked(self, sense=None): + """Solve problem and return result. + + The user must manually check the status of the result to determine + whether an optimal solution was found. A :class:`SolverError` may still + be raised if the underlying solver raises an exception. + """ if sense is not None: self.set_objective_sense(sense) diff --git a/psamm/lpsolver/gurobi.py b/psamm/lpsolver/gurobi.py index f5e6f210..d4087bcf 100644 --- a/psamm/lpsolver/gurobi.py +++ b/psamm/lpsolver/gurobi.py @@ -240,8 +240,13 @@ def set_objective_sense(self, sense): self._p.ModelSense = self.OBJ_SENSE_MAP[sense] - def solve(self, sense=None): - """Solve problem.""" + def solve_unchecked(self, sense=None): + """Solve problem and return result. + + The user must manually check the status of the result to determine + whether an optimal solution was found. A :class:`SolverError` may still + be raised if the underlying solver raises an exception. + """ if sense is not None: self.set_objective_sense(sense) self._p.optimize() diff --git a/psamm/lpsolver/lp.py b/psamm/lpsolver/lp.py index 4227fdfb..ccb7a31d 100644 --- a/psamm/lpsolver/lp.py +++ b/psamm/lpsolver/lp.py @@ -771,9 +771,25 @@ def set_objective(self, expression): def set_objective_sense(self, sense): """Set type of problem (minimize or maximize)""" - @abc.abstractmethod def solve(self, sense=None): - """Solve problem and return result""" + """Solve problem and return result. + + Raises :class:`SolverError` if the solution is not optimal. + """ + result = self.solve_unchecked(sense) + if not result: + raise SolverError('Solution not optimal: {}'.format(result.status)) + + return result + + @abc.abstractmethod + def solve_unchecked(self, sense=None): + """Solve problem and return result. + + The user must manually check the status of the result to determine + whether an optimal solution was found. A :class:`SolverError` may still + be raised if the underlying solver raises an exception. + """ @abc.abstractproperty def result(self): diff --git a/psamm/lpsolver/qsoptex.py b/psamm/lpsolver/qsoptex.py index 24831f8b..66ff13d4 100644 --- a/psamm/lpsolver/qsoptex.py +++ b/psamm/lpsolver/qsoptex.py @@ -175,8 +175,13 @@ def set_objective_sense(self, sense): else: raise ValueError('Invalid objective sense') - def solve(self, sense=None): - """Solve problem""" + def solve_unchecked(self, sense=None): + """Solve problem and return result. + + The user must manually check the status of the result to determine + whether an optimal solution was found. A :class:`SolverError` may still + be raised if the underlying solver raises an exception. + """ if sense is not None: self.set_objective_sense(sense) self._p.solve() diff --git a/psamm/massconsistency.py b/psamm/massconsistency.py index 91e9c630..128be235 100644 --- a/psamm/massconsistency.py +++ b/psamm/massconsistency.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with PSAMM. If not, see . # -# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2014-2017 Jon Lund Steffensen """Mass consistency analysis of metabolic databases @@ -29,7 +29,7 @@ from .lpsolver import lp -from six import iteritems +from six import iteritems, raise_from class MassConsistencyError(Exception): @@ -67,7 +67,7 @@ def is_consistent(database, solver, exchange=set(), zeromass=set()): if reaction not in exchange: prob.add_linear_constraints(lhs == 0) - result = prob.solve(lp.ObjectiveSense.Minimize) + result = prob.solve_unchecked(lp.ObjectiveSense.Minimize) return result.success @@ -119,10 +119,12 @@ def check_reaction_consistency(database, solver, exchange=set(), prob.add_linear_constraints(lhs == 0) # Solve - result = prob.solve(lp.ObjectiveSense.Minimize) - if not result: - raise MassConsistencyError('Non-optimal solution: {}'.format( - result.status)) + try: + prob.solve(lp.ObjectiveSense.Minimize) + except lp.SolverError as e: + raise_from( + MassConsistencyError('Failed to solve mass consistency: {}'.format( + e)), e) def iterate_reactions(): for reaction_id in database.reactions: @@ -173,10 +175,12 @@ def check_compound_consistency(database, solver, exchange=set(), prob.add_linear_constraints(lhs == 0) # Solve - result = prob.solve(lp.ObjectiveSense.Maximize) - if not result: - raise MassConsistencyError('Non-optimal solution: {}'.format( - result.status)) + try: + prob.solve(lp.ObjectiveSense.Maximize) + except lp.SolverError as e: + raise_from( + MassConsistencyError('Failed to solve mass consistency: {}'.format( + e)), e) for compound in mass_compounds: yield compound, m.value(compound) diff --git a/psamm/tests/test_lpsolver_cplex.py b/psamm/tests/test_lpsolver_cplex.py index c811967b..313ebeb4 100644 --- a/psamm/tests/test_lpsolver_cplex.py +++ b/psamm/tests/test_lpsolver_cplex.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU General Public License # along with PSAMM. If not, see . # -# Copyright 2014-2015 Jon Lund Steffensen +# Copyright 2014-2017 Jon Lund Steffensen import unittest @@ -98,9 +98,13 @@ def test_result_to_bool_conversion_on_infeasible(self): prob.var('z') <= 3) prob.set_objective_sense(lp.ObjectiveSense.Maximize) prob.set_linear_objective(2*prob.var('x')) - result = prob.solve() + result = prob.solve_unchecked() self.assertFalse(result) + # Check that the normal solve raises SolverError when non-optimal. + with self.assertRaises(lp.SolverError): + prob.solve() + def test_constraint_delete(self): prob = self.solver.create_problem() prob.define('x', 'y', lower=0, upper=10) From 059420656dda8859e7761fbd54100080b12841da Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 20 Jun 2017 15:59:37 -0400 Subject: [PATCH 38/51] lp: Sort Product variables by text string Previously these were sorted directly by the variable. Since the variable is not necessarily an object that implements an ordering, this will not work in Python 3. The sorting is only useful for pretty printing the expression is converted to text in __str__. For now, use text_type() as the sort key instead of the raw object. --- psamm/lpsolver/lp.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/psamm/lpsolver/lp.py b/psamm/lpsolver/lp.py index ccb7a31d..0f25ac8e 100644 --- a/psamm/lpsolver/lp.py +++ b/psamm/lpsolver/lp.py @@ -278,7 +278,7 @@ def __mul__(self, other): for v2, value2 in iteritems(other._variables): p1 = v1 if isinstance(v1, Product) else Product((v1,)) p2 = v2 if isinstance(v2, Product) else Product((v2,)) - product = Product(sorted(p1 + p2)) + product = Product(sorted(p1 + p2, key=text_type)) variables[product] += value1 * value2 if other._offset != 0: @@ -310,7 +310,7 @@ def __imul__(self, other): for v2, value2 in iteritems(other._variables): p1 = v1 if isinstance(v1, Product) else Product((v1,)) p2 = v2 if isinstance(v2, Product) else Product((v2,)) - product = Product(sorted(p1 + p2)) + product = Product(sorted(p1 + p2, key=text_type)) variables[product] += value1 * value2 if other._offset != 0: @@ -414,11 +414,15 @@ def __lt__(self, other): def __str__(self): """Return string representation of expression""" + def sort_key_variable(item): + key, _ = item + is_product = isinstance(key, Product) + return is_product, text_type(key) + def all_terms(): count_vars = 0 for name, value in sorted( - iteritems(self._variables), - key=lambda p: (isinstance(p[0], Product), p[0])): + iteritems(self._variables), key=sort_key_variable): if value != 0: count_vars += 1 if isinstance(name, VariableSet): From 0388ddf6fe0c2e6c44adab1e60fdd619020cbf99 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Fri, 15 Jan 2016 15:25:18 -0500 Subject: [PATCH 39/51] Add moma module for Minimization of Metabolic Adjustments The module currently implements MOMA using L1 and L2 minimization of flux differences as well as a more complex formulation which is not influenced by arbitrary fluxes in the wildtype solution (also L1 and L2). --- docs/api/moma.rst | 6 + docs/references.rst | 7 + psamm/moma.py | 364 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 377 insertions(+) create mode 100644 docs/api/moma.rst create mode 100644 psamm/moma.py diff --git a/docs/api/moma.rst b/docs/api/moma.rst new file mode 100644 index 00000000..fd879a39 --- /dev/null +++ b/docs/api/moma.rst @@ -0,0 +1,6 @@ + +``psamm.moma`` -- Minimization of metabolic adjustments +======================================================= + +.. automodule:: psamm.moma + :members: diff --git a/docs/references.rst b/docs/references.rst index 32e84667..6547bf58 100644 --- a/docs/references.rst +++ b/docs/references.rst @@ -20,6 +20,9 @@ References .. [Mahadevan03] Mahadevan R, Schilling CH. The effects of alternate optimal solutions in constraint-based genome-scale metabolic models. Metab Eng. 2003;5: 264–276. :doi:`10.1016/j.ymben.2003.09.002`. +.. [Mo09] Mo ML, Palsson BØ, Herrgård MJ. Connecting extracellular metabolomic + measurements to intracellular flux states in yeast. BMC Systems Biology. + 2009;3(1):3-37. :doi:`10.1186/1752-0509-3-37`. .. [Muller13] Müller AC, Bockmayr A. Fast thermodynamically constrained flux variability analysis. Bioinformatics. 2013;29: 903–909. :doi:`10.1093/bioinformatics/btt059`. @@ -30,6 +33,10 @@ References definition of metabolic pathways and their use in interpreting metabolic function from a pathway-oriented perspective. J Theor Biol. 2000;203: 229–48. :doi:`10.1006/jtbi.2000.1073`. +.. [Segre02] Segrè D, Vitkup D, Church GM. Analysis of optimality in natural + and perturbed metabolic networks. Proceedings of the National Academy of + Sciences of the United States of America. 2002;99(23):15112-15117. + :doi:`10.1073/pnas.232349399`. .. [Thiele14] Thiele I, Vlassis N, Fleming RMT. fastGapFill: efficient gap filling in metabolic networks. Bioinformatics. 2014;30: 2529–31. :doi:`10.1093/bioinformatics/btu321`. diff --git a/psamm/moma.py b/psamm/moma.py new file mode 100644 index 00000000..726a36ad --- /dev/null +++ b/psamm/moma.py @@ -0,0 +1,364 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2016-2017 Jon Lund Steffensen +# Copyright 2016-2017 Brian Bishop + +"""Implementation of Minimization of Metabolic Adjustments (MOMA).""" + +import logging +from itertools import product + +from six import iteritems, text_type, raise_from + +from psamm.lpsolver import lp + +logger = logging.getLogger(__name__) + + +class MOMAError(Exception): + """Error indicating an error solving MOMA.""" + + +class ConstraintGroup(object): + """Constraints that will be imposed on the model when solving. + + Args: + moma: MOMAProblem object for the proposed constraints. + *args: The constraints that are imposed on the model. + """ + def __init__(self, moma, *args): + self._moma = moma + self._constrs = [] + if len(args) > 0: + self.add(*args) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.delete() + + def add(self, *args): + """Add constraints to the model.""" + self._constrs.extend(self._moma._prob.add_linear_constraints(*args)) + + def delete(self): + """Set up the constraints to get deleted on the next solve.""" + self._moma._remove_constr.extend(self._constrs) + + +class MOMAProblem(object): + """Model as a flux optimization problem with minimal flux redistribution. + + Create a representation of the model as an LP optimization problem with + steady state assumption and a minimal redistribution of metabolic fluxes + with respect to the wild type configuration. + + The problem can be solved using any of the four MOMA variants described in + [Segre02]_ and [Mo09]_. MOMA is formulated to avoid the FBA assumption that + that growth efficiency has evolved to an optimal point directly following + model perturbation. MOMA finds the optimal solution for a model with + minimal flux redistribution with respect to the wild type flux + configuration. + + MOMA is implemented with two variations of a linear optimization problem + (:meth:`.lin_moma` and :meth:`.lin_moma2`) and two variations of a + quadratic optimization problem (:meth:`.moma` and :meth:`.moma2`). + Further information on these methods can be found within their respective + documentation. + + The problem can be modified and solved as many times as needed. The flux + of a reaction can be obtained after solving using :meth:`.get_flux`. + + Args: + model: :class:`MetabolicModel` to solve. + solver: LP solver instance to use. + """ + def __init__(self, model, solver): + self._prob = solver.create_problem() + self._model = model + + self._remove_constr = [] + + self._v_wt = v_wt = self._prob.namespace(name='v_wt') + self._v = v = self._prob.namespace(name='v') + self._z = z = self._prob.namespace(name='z') + self._z_diff = z_diff = self._prob.namespace(name='z_diff') + + # Define flux variables + for reaction_id in self._model.reactions: + lower, upper = self._model.limits[reaction_id] + v_wt.define([reaction_id], lower=lower, upper=upper) + v.define([reaction_id], lower=lower, upper=upper) + z.define([reaction_id], lower=0, upper=max(abs(lower), abs(upper))) + + flux_range = float(upper) - float(lower) + z_diff.define([reaction_id], lower=0, upper=flux_range) + + # Define constraints + mass_balance = self.constraints() + massbalance_lhs = { + spec: 0 for spec in product(model.compounds, ('wt', 'mod'))} + for (compound, reaction_id), value in iteritems(self._model.matrix): + massbalance_lhs[compound, 'wt'] += v_wt[reaction_id] * value + massbalance_lhs[compound, 'mod'] += v[reaction_id] * value + for compound, lhs in iteritems(massbalance_lhs): + mass_balance.add(lhs == 0) + + @property + def prob(self): + """Return the underlying LP problem.""" + return self._prob + + def constraints(self, *args): + """Return a constraint object.""" + return ConstraintGroup(self, *args) + + def _adjustment_reactions(self): + """Yield all the non exchange reactions in the model.""" + for reaction_id in self._model.reactions: + if not self._model.is_exchange(reaction_id): + yield reaction_id + + def _solve(self, sense=None): + """Remove old constraints and then solve the current problem. + + Args: + sense: Minimize or maximize the objective. + (:class:`.lp.ObjectiveSense) + + Returns: + The Result object for the solved LP problem + """ + # Remove the constraints from the last run + while len(self._remove_constr) > 0: + self._remove_constr.pop().delete() + + try: + return self._prob.solve(sense=sense) + except lp.SolverError as e: + raise_from(MOMAError(text_type(e)), e) + finally: + self._remove_constr = [] + + def solve_fba(self, objective): + """Solve the wild type problem using FBA. + + Args: + objective: The objective reaction to be maximized. + + Returns: + The LP Result object for the solved FBA problem. + """ + self._prob.set_objective(self._v_wt[objective]) + return self._solve(lp.ObjectiveSense.Maximize) + + def get_fba_flux(self, objective): + """Return a dictionary of all the fluxes solved by FBA. + + Dictionary of fluxes is used in :meth:`.lin_moma` and :meth:`.moma` + to minimize changes in the flux distributions following model + perturbation. + + Args: + objective: The objective reaction that is maximized. + + Returns: + Dictionary of fluxes for each reaction in the model. + """ + flux_result = self.solve_fba(objective) + fba_fluxes = {} + + # Place all the flux values in a dictionary + for key in self._model.reactions: + fba_fluxes[key] = flux_result.get_value(self._v_wt[key]) + return fba_fluxes + + def get_minimal_fba_flux(self, objective): + """Find the FBA solution that minimizes all the flux values. + + Maximize the objective flux then minimize all other fluxes + while keeping the objective flux at the maximum. + + Args: + objective: The objective reaction that is maximized. + + Returns: + A dictionary of all the reactions and their minimized fluxes. + """ + if self._z is None: + self._define_z_vars() + + # Define constraints + vs_wt = self._v_wt.set(self._model.reactions) + zs = self._z.set(self._model.reactions) + + wt_obj_flux = self.get_fba_obj_flux(objective) + + with self.constraints() as constr: + constr.add( + zs >= vs_wt, vs_wt >= -zs, + self._v_wt[objective] >= wt_obj_flux) + self._prob.set_objective(self._z.sum(self._model.reactions)) + result = self._solve(lp.ObjectiveSense.Minimize) + + fba_fluxes = {} + for key in self._model.reactions: + fba_fluxes[key] = result.get_value(self._v_wt[key]) + return fba_fluxes + + def get_fba_obj_flux(self, objective): + """Return the maximum objective flux solved by FBA.""" + flux_result = self.solve_fba(objective) + return flux_result.get_value(self._v_wt[objective]) + + def lin_moma(self, wt_fluxes): + """Minimize the redistribution of fluxes using a linear objective. + + The change in flux distribution is mimimized by minimizing the sum + of the absolute values of the differences of wild type FBA solution + and the knockout strain flux solution. + + This formulation bases the solution on the wild type fluxes that + are specified by the user. If these wild type fluxes were calculated + using FBA, then an arbitrary flux vector that optimizes the objective + function is used. See [Segre`_02] for more information. + + Args: + wt_fluxes: Dictionary of all the wild type fluxes. Use + :meth:`.get_fba_flux(objective)` to return a dictionary of + fluxes found by FBA. + """ + reactions = set(self._adjustment_reactions()) + + z_diff = self._z_diff + v = self._v + + with self.constraints() as constr: + for f_reaction, f_value in iteritems(wt_fluxes): + if f_reaction in reactions: + # Add the constraint that finds the optimal solution, such + # that the difference between the wildtype flux is similar + # to the knockout flux. + constr.add( + z_diff[f_reaction] >= f_value - v[f_reaction], + f_value - v[f_reaction] >= -z_diff[f_reaction]) + + # If we minimize the sum of the z vector then we will minimize + # the |vs_wt - vs| from above + self._prob.set_objective(z_diff.sum(reactions)) + + self._solve(lp.ObjectiveSense.Minimize) + + def lin_moma2(self, objective, wt_obj): + """Find the smallest redistribution vector using a linear objective. + + The change in flux distribution is mimimized by minimizing the sum + of the absolute values of the differences of wild type FBA solution + and the knockout strain flux solution. + + Creates the constraint that the we select the optimal flux vector that + is closest to the wildtype. This might still return an arbitrary flux + vector the maximizes the objective function. + + Args: + objective: Objective reaction for the model. + wt_obj: The flux value for your wild type objective reactions. + Can either use an expiremental value or on determined by FBA + by using :meth:`.get_fba_obj_flux(objective)`. + """ + reactions = set(self._adjustment_reactions()) + + z_diff = self._z_diff + v = self._v + v_wt = self._v_wt + + with self.constraints() as constr: + for f_reaction in reactions: + # Add the constraint that finds the optimal solution, such + # that the difference between the wildtype flux + # is similar to the knockout flux. + constr.add( + z_diff[f_reaction] >= v_wt[f_reaction] - v[f_reaction], + v_wt[f_reaction] - v[f_reaction] >= -z_diff[f_reaction]) + + # If we minimize the sum of the z vector then we will minimize + # the |v_wt - v| from above + self._prob.set_objective(z_diff.sum(reactions)) + + constr.add(self._v_wt[objective] >= wt_obj) + + self._solve(lp.ObjectiveSense.Minimize) + + def moma(self, wt_fluxes): + """Minimize the redistribution of fluxes using Euclidean distance. + + Minimizing the redistribution of fluxes using a quadratic objective + function. The distance is minimized by minimizing the sum of + (wild type - knockout)^2. + + Args: + wt_fluxes: Dictionary of all the wild type fluxes that will be + used to find a close MOMA solution. Fluxes can be expiremental + or calculated using :meth: get_fba_flux(objective). + """ + reactions = set(self._adjustment_reactions()) + v = self._v + + obj_expr = 0 + for f_reaction, f_value in iteritems(wt_fluxes): + if f_reaction in reactions: + # Minimize the Euclidean distance between the two vectors + obj_expr += (f_value - v[f_reaction])**2 + + self._prob.set_objective(obj_expr) + self._solve(lp.ObjectiveSense.Minimize) + + def moma2(self, objective, wt_obj): + """Find the smallest redistribution vector using Euclidean distance. + + Minimizing the redistribution of fluxes using a quadratic objective + function. The distance is minimized by minimizing the sum of + (wild type - knockout)^2. + + Creates the constraint that the we select the optimal flux vector that + is closest to the wildtype. This might still return an arbitrary flux + vector the maximizes the objective function. + + Args: + objective: Objective reaction for the model. + wt_obj: The flux value for your wild type objective reactions. + Can either use an expiremental value or on determined by FBA + by using :meth:`.get_fba_obj_flux(objective)`. + """ + obj_expr = 0 + for reaction in self._adjustment_reactions(): + v_wt = self._v_wt[reaction] + v = self._v[reaction] + obj_expr += (v_wt - v)**2 + + self._prob.set_objective(obj_expr) + + with self.constraints(self._v_wt[objective] >= wt_obj): + self._solve(lp.ObjectiveSense.Minimize) + + def get_flux(self, reaction): + """Return the knockout flux for a specific reaction.""" + return self._prob.result.get_value(self._v[reaction]) + + def get_flux_var(self, reaction): + """Return the LP variable for a specific reaction.""" + return self._v[reaction] From 71383c89e6c7dd7029d4fa318868cd8421044f5a Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 20 Jun 2017 16:02:31 -0400 Subject: [PATCH 40/51] moma: Add test cases --- psamm/tests/test_moma.py | 125 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 psamm/tests/test_moma.py diff --git a/psamm/tests/test_moma.py b/psamm/tests/test_moma.py new file mode 100644 index 00000000..c7e13de0 --- /dev/null +++ b/psamm/tests/test_moma.py @@ -0,0 +1,125 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2017 Jon Lund Steffensen + +import unittest + +from psamm.metabolicmodel import MetabolicModel +from psamm.database import DictDatabase +from psamm import moma +from psamm.datasource.reaction import parse_reaction +from psamm.lpsolver import generic + + +class TestLinearMOMA(unittest.TestCase): + def setUp(self): + self.database = DictDatabase() + self.database.set_reaction('rxn_1', parse_reaction('=> (2) |A|')) + self.database.set_reaction('rxn_2', parse_reaction('|A| <=> |B|')) + self.database.set_reaction('rxn_3', parse_reaction('|A| => |D|')) + self.database.set_reaction('rxn_4', parse_reaction('|A| => |C|')) + self.database.set_reaction('rxn_5', parse_reaction('|C| => |D|')) + self.database.set_reaction('rxn_6', parse_reaction('|D| =>')) + self.model = MetabolicModel.load_model( + self.database, self.database.reactions) + + try: + self.solver = generic.Solver() + except generic.RequirementsError: + self.skipTest('Unable to find an LP solver for tests') + + def test_moma_fba(self): + p = moma.MOMAProblem(self.model, self.solver) + fluxes = p.get_fba_flux('rxn_6') + self.assertAlmostEqual(fluxes['rxn_1'], 500) + self.assertAlmostEqual(fluxes['rxn_2'], 0) + self.assertAlmostEqual(fluxes['rxn_6'], 1000) + + def test_moma_minimal_fba(self): + p = moma.MOMAProblem(self.model, self.solver) + fluxes = p.get_minimal_fba_flux('rxn_6') + self.assertAlmostEqual(fluxes['rxn_1'], 500) + self.assertAlmostEqual(fluxes['rxn_2'], 0) + self.assertAlmostEqual(fluxes['rxn_3'], 1000) + self.assertAlmostEqual(fluxes['rxn_6'], 1000) + + def test_linear_moma(self): + p = moma.MOMAProblem(self.model, self.solver) + with p.constraints(p.get_flux_var('rxn_3') == 0): + p.lin_moma({ + 'rxn_3': 1000, + 'rxn_4': 0, + 'rxn_5': 0, + }) + + # The closest solution when these are constrained is for + # rxn_6 to take on a flux of zero. + self.assertAlmostEqual(p.get_flux('rxn_6'), 0) + + def test_linear_moma2(self): + p = moma.MOMAProblem(self.model, self.solver) + with p.constraints(p.get_flux_var('rxn_3') == 0): + p.lin_moma2('rxn_6', 1000) + + self.assertAlmostEqual(p.get_flux('rxn_1'), 500) + self.assertAlmostEqual(p.get_flux('rxn_2'), 0) + self.assertAlmostEqual(p.get_flux('rxn_3'), 0) + self.assertAlmostEqual(p.get_flux('rxn_4'), 1000) + self.assertAlmostEqual(p.get_flux('rxn_5'), 1000) + self.assertAlmostEqual(p.get_flux('rxn_6'), 1000) + + +class TestQuadraticMOMA(unittest.TestCase): + def setUp(self): + self.database = DictDatabase() + self.database.set_reaction('rxn_1', parse_reaction('=> (2) |A|')) + self.database.set_reaction('rxn_2', parse_reaction('|A| <=> |B|')) + self.database.set_reaction('rxn_3', parse_reaction('|A| => |D|')) + self.database.set_reaction('rxn_4', parse_reaction('|A| => |C|')) + self.database.set_reaction('rxn_5', parse_reaction('|C| => |D|')) + self.database.set_reaction('rxn_6', parse_reaction('|D| =>')) + self.model = MetabolicModel.load_model( + self.database, self.database.reactions) + + try: + self.solver = generic.Solver(quadratic=True) + except generic.RequirementsError: + self.skipTest('Unable to find an MIQP solver for tests') + + def test_quadratic_moma(self): + p = moma.MOMAProblem(self.model, self.solver) + with p.constraints(p.get_flux_var('rxn_3') == 0): + p.moma({ + 'rxn_3': 1000, + 'rxn_4': 0, + 'rxn_5': 0, + }) + + # The closest solution when these are constrained is for + # rxn_6 to take on a flux of zero. + self.assertAlmostEqual(p.get_flux('rxn_6'), 0) + + def test_quadratic_moma2(self): + p = moma.MOMAProblem(self.model, self.solver) + with p.constraints(p.get_flux_var('rxn_3') == 0): + p.moma2('rxn_6', 1000) + + self.assertAlmostEqual(p.get_flux('rxn_1'), 500) + self.assertAlmostEqual(p.get_flux('rxn_2'), 0) + self.assertAlmostEqual(p.get_flux('rxn_3'), 0) + self.assertAlmostEqual(p.get_flux('rxn_4'), 1000) + self.assertAlmostEqual(p.get_flux('rxn_5'), 1000) + self.assertAlmostEqual(p.get_flux('rxn_6'), 1000) From 5443aee9d3ee6f16346ecfeea91749d84730d51d Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 21 Jun 2017 16:47:46 -0400 Subject: [PATCH 41/51] Add moma to the genedelete command. --- docs/commands.rst | 42 +++++++++++++++++++++ docs/references.rst | 4 +- psamm/commands/genedelete.py | 71 ++++++++++++++++++++++++++++-------- psamm/tests/test_command.py | 20 ++++++++++ 4 files changed, 120 insertions(+), 17 deletions(-) diff --git a/docs/commands.rst b/docs/commands.rst index e2fb8ffe..3fc70373 100644 --- a/docs/commands.rst +++ b/docs/commands.rst @@ -209,6 +209,48 @@ The file gene_file.txt would contain the following lines:: ExGene1 ExGene2 +To delete genes using different algorithms use ``--method`` to specify +which algorithm for the solver to use. The default method for the command is +FBA. To delete genes using the Minimization of Metabolic Adjustment (MOMA) +algorithm use the command line argument ``--method moma``. MOMA is based on +the assumption that the knockout organism has not had time to adjust its gene +regulation to maximize biomass production so fluxes will be close to wildtype +fluxes. + +.. code-block:: shell + + $ psamm-model genedelete --gene ExGene1 --method moma + +There are four variations of MOMA available in PSAMM, defined in the following +way (where :math:`\bar{v}` is the wild type fluxes and :math:`\bar{u}` is the +knockout fluxes): + +MOMA (``--method moma``) + Finds the reaction fluxes in the knockout, such that the difference in flux + from the wildtype is minimized. Minimization is performed with the + Euclidean distance: :math:`\sum_j (v_j - u_j)^2`. The wildtype fluxes are + obtained from the wildtype model (i.e. before genes are deleted) by FBA + with L1 minimization. L1 minimization is performed on the FBA result to + remove loops and make the result disregard internal loop fluxes. [Segre02]_ + +Linear MOMA (``--method lin_moma``) + Finds the reaction fluxes in the knockout, such that the difference in flux + from the wildtype is minimized. Minimization is performed with the + Manhattan distance: :math:`\sum_j \|v_j - u_j\|`. The wildtype fluxes are + obtained from the wildtype model (i.e. before genes are deleted) by FBA + with L1 minimization. L1 minimization is performed on the FBA result to + remove loops and make the result disregard internal loop fluxes. [Mo09]_ + +Combined-model MOMA (``--method moma2``) (Experimental) + Similar to ``moma``, but this implementation solves for the wild type + fluxes at the same time as the knockout fluxes to ensure not to rely on the + arbitrary flux vector found with FBA. + +Combined-model linear MOMA (``--method lin_moma2``) (Experimental) + Similar to ``lin_moma``, but this implementation solves for the wild type + fluxes at the same time as the knockout fluxes to ensure not to rely on the + arbitrary flux vector found with FBA. + Flux coupling analysis (``fluxcoupling``) ----------------------------------------- diff --git a/docs/references.rst b/docs/references.rst index 6547bf58..0172f143 100644 --- a/docs/references.rst +++ b/docs/references.rst @@ -36,7 +36,7 @@ References .. [Segre02] Segrè D, Vitkup D, Church GM. Analysis of optimality in natural and perturbed metabolic networks. Proceedings of the National Academy of Sciences of the United States of America. 2002;99(23):15112-15117. - :doi:`10.1073/pnas.232349399`. + :doi:`10.1073/pnas.232349399`. .. [Thiele14] Thiele I, Vlassis N, Fleming RMT. fastGapFill: efficient gap filling in metabolic networks. Bioinformatics. 2014;30: 2529–31. :doi:`10.1093/bioinformatics/btu321`. @@ -50,4 +50,4 @@ References .. [Orth11] Orth JD, Conrad TM, Na J, Lerman JA, Nam H, Feist AM, et al. A comprehensive genome-scale reconstruction of Escherichia coli metabolism--2011. Mol Syst Biol. EMBO Press; 2011;7: 535. - :doi:`10.1038/msb.2011.65`. \ No newline at end of file + :doi:`10.1038/msb.2011.65`. diff --git a/psamm/commands/genedelete.py b/psamm/commands/genedelete.py index 4c6dcb4f..2bf4f0dc 100644 --- a/psamm/commands/genedelete.py +++ b/psamm/commands/genedelete.py @@ -16,7 +16,7 @@ # Copyright 2014-2017 Jon Lund Steffensen # Copyright 2015 Keith Dufault-Thompson -from __future__ import unicode_literals +from __future__ import unicode_literals, division import time import logging @@ -24,9 +24,10 @@ from ..command import (Command, MetabolicMixin, ObjectiveMixin, SolverCommandMixin, FilePrefixAppendAction) from .. import fluxanalysis +from .. import moma from ..expression import boolean -from six import string_types +from six import string_types, text_type logger = logging.getLogger(__name__) @@ -42,9 +43,16 @@ def init_parser(cls, parser): parser.add_argument( '--gene', metavar='genes', action=FilePrefixAppendAction, type=str, default=[], help='Delete multiple genes from model') + parser.add_argument( + '--method', metavar='method', + choices=['fba', 'lin_moma', 'lin_moma2', 'moma', 'moma2'], + type=text_type, default='fba', + help='Select which method to use. (fba, lin_moma, lin_moma2, ' + 'moma, moma2)') super(GeneDeletionCommand, cls).init_parser(parser) def run(self): + """Delete the specified gene and solve using the desired method.""" obj_reaction = self._get_objective() genes = set() @@ -81,27 +89,60 @@ def run(self): logger.info('Deleting reaction {}...'.format(reaction)) deleted_reactions.add(reaction) - solver = self._get_solver() - prob = fluxanalysis.FluxBalanceProblem(self._mm, solver) + if self._args.method in ['moma', 'moma2']: + solver = self._get_solver(quadratic=True) + else: + solver = self._get_solver() - try: - prob.maximize(obj_reaction) - except fluxanalysis.FluxBalanceError as e: - self.report_flux_balance_error(e) - wild = prob.get_flux(obj_reaction) + if self._args.method == 'fba': + logger.info('Solving using FBA...') + prob = fluxanalysis.FluxBalanceProblem(self._mm, solver) + + try: + prob.maximize(obj_reaction) + except fluxanalysis.FluxBalanceError as e: + self.report_flux_balance_error(e) - for reaction in deleted_reactions: - flux_var = prob.get_flux_var(reaction) - prob.prob.add_linear_constraints(flux_var == 0) + wild = prob.get_flux(obj_reaction) - prob.maximize(obj_reaction) - deleteflux = prob.get_flux(obj_reaction) + for reaction in deleted_reactions: + flux_var = prob.get_flux_var(reaction) + prob.prob.add_linear_constraints(flux_var == 0) + + prob.maximize(obj_reaction) + deleteflux = prob.get_flux(obj_reaction) + elif self._args.method in ['lin_moma', 'lin_moma2', 'moma', 'moma2']: + prob = moma.MOMAProblem(self._mm, solver) + wt_fluxes = prob.get_minimal_fba_flux(obj_reaction) + wild = wt_fluxes[obj_reaction] + + for reaction in deleted_reactions: + flux_var = prob.get_flux_var(reaction) + prob.prob.add_linear_constraints(flux_var == 0) + + try: + if self._args.method == 'moma': + logger.info('Solving using MOMA...') + prob.moma(wt_fluxes) + elif self._args.method == 'lin_moma': + logger.info('Solving using linear MOMA...') + prob.lin_moma(wt_fluxes) + elif self._args.method == 'moma2': + logger.info('Solving using combined-model MOMA...') + prob.moma2(obj_reaction, wild) + elif self._args.method == 'lin_moma2': + logger.info('Solving using combined-model linear MOMA...') + prob.lin_moma2(obj_reaction, wild) + except moma.MOMAError: + self.fail('Error computing the MOMA result.') + + deleteflux = prob.get_flux(obj_reaction) logger.info( 'Solving took {:.2f} seconds'.format(time.time() - start_time)) logger.info( 'Objective reaction after gene deletion has flux {}'.format( - abs(deleteflux) if deleteflux == 0 else deleteflux)) + deleteflux + 0)) if wild != 0: logger.info( 'Objective reaction has {:.2%} flux of wild type flux'.format( diff --git a/psamm/tests/test_command.py b/psamm/tests/test_command.py index c298b496..497efe1f 100644 --- a/psamm/tests/test_command.py +++ b/psamm/tests/test_command.py @@ -441,6 +441,26 @@ def test_run_gapfill_with_blocked(self): def test_run_genedelete(self): self.run_solver_command(GeneDeletionCommand, ['--gene', 'gene_1']) + def test_run_genedelete_with_fba(self): + self.run_solver_command( + GeneDeletionCommand, ['--gene=gene_1', '--method=fba']) + + def test_run_genedelete_with_lin_moma(self): + self.run_solver_command( + GeneDeletionCommand, ['--gene=gene_1', '--method=lin_moma']) + + def test_run_genedelete_with_lin_moma2(self): + self.run_solver_command( + GeneDeletionCommand, ['--gene=gene_1', '--method=lin_moma2']) + + def test_run_genedelete_with_moma(self): + self.run_solver_command( + GeneDeletionCommand, ['--gene=gene_1', '--method=moma'], {'quadratic': True}) + + def test_run_genedelete_with_moma2(self): + self.run_solver_command( + GeneDeletionCommand, ['--gene=gene_1', '--method=moma2'], {'quadratic': True}) + def test_run_genedelete_with_infeasible(self): self.skip_test_if_no_solver() with self.assertRaises(SystemExit): From 1848bc07abc9e3973e2b48fd83f3b6955b306599 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Thu, 22 Jun 2017 11:38:44 -0400 Subject: [PATCH 42/51] moma: Fix invalid call to non-existent method --- psamm/moma.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/psamm/moma.py b/psamm/moma.py index 726a36ad..029e4d60 100644 --- a/psamm/moma.py +++ b/psamm/moma.py @@ -199,9 +199,6 @@ def get_minimal_fba_flux(self, objective): Returns: A dictionary of all the reactions and their minimized fluxes. """ - if self._z is None: - self._define_z_vars() - # Define constraints vs_wt = self._v_wt.set(self._model.reactions) zs = self._z.set(self._model.reactions) From c7ddd64dd44074429c7517133735d1861555118d Mon Sep 17 00:00:00 2001 From: Keith Dufault-Thompson Date: Fri, 23 Jun 2017 09:48:49 -0400 Subject: [PATCH 43/51] Added xlsxwriter to required packages for install. --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 529bf9e9..c1238b3e 100755 --- a/setup.py +++ b/setup.py @@ -55,7 +55,7 @@ chargecheck = psamm.commands.chargecheck:ChargeBalanceCommand console = psamm.commands.console:ConsoleCommand duplicatescheck = psamm.commands.duplicatescheck:DuplicatesCheck - excelexport = psamm.commands.excelexport:ExcelExportCommand [excel] + excelexport = psamm.commands.excelexport:ExcelExportCommand fastgapfill = psamm.commands.fastgapfill:FastGapFillCommand fba = psamm.commands.fba:FluxBalanceCommand fluxcheck = psamm.commands.fluxcheck:FluxConsistencyCommand @@ -77,11 +77,11 @@ install_requires=[ 'PyYAML~=3.11', - 'six' + 'six', + 'xlsxwriter' ], extras_require={ 'docs': ['sphinx', 'sphinx_rtd_theme', 'mock'], - 'excel': ['xlsxwriter'], ':python_version=="2.7"': ['enum34'], ':python_version=="3.3"': ['enum34'] }) From d57b094ed9310ae85d5cd567bb3551d360f4258f Mon Sep 17 00:00:00 2001 From: Keith Dufault-Thompson Date: Fri, 23 Jun 2017 11:33:29 -0400 Subject: [PATCH 44/51] removed xlsxwriter as a dependency in tox.ini --- tox.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/tox.ini b/tox.ini index 33755285..e0e30113 100644 --- a/tox.ini +++ b/tox.ini @@ -22,7 +22,6 @@ setenv = glpk: PSAMM_SOLVER=glpk deps = coverage~=4.0 - xlsxwriter py27-cplex: {env:CPLEX_PYTHON2_PACKAGE} py35-cplex: {env:CPLEX_PYTHON3_PACKAGE} qsoptex: python-qsoptex>=0.5 From 941bf434399ff6ee87290b645a1178edf1e90934 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Thu, 18 Aug 2016 16:08:46 -0400 Subject: [PATCH 45/51] docs: Add MapMaker paper (Tervo et al. 2016) to references --- docs/references.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/references.rst b/docs/references.rst index 0172f143..bd41aa9c 100644 --- a/docs/references.rst +++ b/docs/references.rst @@ -37,6 +37,9 @@ References and perturbed metabolic networks. Proceedings of the National Academy of Sciences of the United States of America. 2002;99(23):15112-15117. :doi:`10.1073/pnas.232349399`. +.. [Tervo16] Tervo CJ, Reed JL. MapMaker and PathTracer for tracking carbon in + genome-scale metabolic models. Biotechnol J. 2016; 1–23. + :doi:`10.1002/biot.201400305`. .. [Thiele14] Thiele I, Vlassis N, Fleming RMT. fastGapFill: efficient gap filling in metabolic networks. Bioinformatics. 2014;30: 2529–31. :doi:`10.1093/bioinformatics/btu321`. From 205004a1b7ce557b3e94c4efa109fe4f5b7a8f4b Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Wed, 30 Mar 2016 15:49:45 -0400 Subject: [PATCH 46/51] Add mapmaker module for compound pair prediction based on MapMaker --- psamm/mapmaker.py | 261 +++++++++++++++++++++++++++++++++++ psamm/tests/test_mapmaker.py | 144 +++++++++++++++++++ 2 files changed, 405 insertions(+) create mode 100644 psamm/mapmaker.py create mode 100644 psamm/tests/test_mapmaker.py diff --git a/psamm/mapmaker.py b/psamm/mapmaker.py new file mode 100644 index 00000000..58be9161 --- /dev/null +++ b/psamm/mapmaker.py @@ -0,0 +1,261 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2016-2017 Jon Lund Steffensen + +"""Functions for predicting compound pairs using the MapMaker algorithm. + +The MapMaker algorithm is described in [Tervo16]_. +""" + +from itertools import product + +from six import iteritems + +from .lpsolver import lp +from .formula import Atom, Radical, Formula + + +def default_weight(element): + """Return weight of formula element. + + This implements the default weight proposed for MapMaker. + """ + if element in (Atom.N, Atom.O, Atom.P): + return 0.4 + elif isinstance(element, Radical): + return 40.0 + return 1.0 + + +def _weighted_formula(form, weight_func): + """Yield weight of each formula element.""" + for e, mf in form.items(): + if e == Atom.H: + continue + + yield e, mf, weight_func(e) + + +def _transfers(reaction, delta, elements, result, epsilon): + """Yield transfers obtained from result.""" + left = set(c for c, _ in reaction.left) + right = set(c for c, _ in reaction.right) + for c1, c2 in product(left, right): + items = {} + for e in elements: + v = result.get_value(delta[c1, c2, e]) + nearest_int = round(v) + if abs(v - nearest_int) < epsilon: + v = int(nearest_int) + if v >= epsilon: + items[e] = v + + if len(items) > 0: + yield (c1, c2), Formula(items) + + +def _reaction_to_dicts(reaction): + """Convert a reaction to reduced left, right dictionaries.""" + + def dict_from_iter_sum(it): + d = {} + for k, v in it: + if k not in d: + d[k] = 0 + d[k] += v + return d + + left = dict_from_iter_sum(reaction.left) + right = dict_from_iter_sum(reaction.right) + + return left, right + + +class UnbalancedReactionError(ValueError): + """Raised when an unbalanced reaction is provided.""" + + +def predict_compound_pairs(reaction, compound_formula, solver, epsilon=1e-5, + alt_elements=None, weight_func=default_weight): + """Predict compound pairs of reaction using MapMaker. + + Yields all solutions as dictionaries with compound pairs as keys and + formula objects as values. + + Args: + reaction: :class:`psamm.reaction.Reaction` object. + compound_formula: Dictionary mapping compound IDs to formulas. Formulas + must be flattened. + solver: LP solver (MILP). + epsilon: Threshold for rounding floating-point values to integers in + the predicted transfers. + alt_elements: Iterable of elements to consider for alternative + solutions. Only alternate solutions that have different transfers + of these elements will be returned (default=all elements). + weight_func: Optional function that returns a weight for a formula + element (should handle specific Atom and Radical objects). By + default, the standard MapMaker weights will be used + (H=0, R=40, N=0.4, O=0.4, P=0.4, *=1). + """ + elements = set() + for compound, value in reaction.compounds: + if compound.name not in compound_formula: + return + + f = compound_formula[compound.name] + elements.update(e for e, _, _ in _weighted_formula(f, weight_func)) + + if len(elements) == 0: + return + + p = solver.create_problem() + + x = p.namespace(name='x') + y = p.namespace(name='y') + m = p.namespace(name='m') + q = p.namespace(name='q') + omega = p.namespace(name='omega') + gamma = p.namespace(name='gamma') + delta = p.namespace(name='delta') + + left, right = _reaction_to_dicts(reaction) + + objective = 0 + for c1, c2 in product(left, right): + m.define([(c1, c2)], lower=0) + q.define([(c1, c2)], lower=0) + omega.define([(c1, c2)], types=lp.VariableType.Binary) + + objective += 100 * m[c1, c2] + 90 * q[c1, c2] - 0.1 * omega[c1, c2] + + gamma.define(((c1, c2, e) for e in elements), + types=lp.VariableType.Binary) + delta.define(((c1, c2, e) for e in elements), lower=0) + + # Eq 12 + x.define([(c1, c2)], types=lp.VariableType.Binary) + y.define([(c1, c2)], types=lp.VariableType.Binary) + p.add_linear_constraints(y[c1, c2] <= 1 - x[c1, c2]) + + # Eq 6, 9 + delta_wsum = delta.expr( + ((c1, c2, e), weight_func(e)) for e in elements) + p.add_linear_constraints( + m[c1, c2] <= delta_wsum, q[c1, c2] <= delta_wsum) + + objective -= gamma.sum((c1, c2, e) for e in elements) + + p.set_objective(objective) + + # Eq 3 + zs = {} + for c1, v in iteritems(left): + f = compound_formula[c1.name] + for e in elements: + mf = f.get(e, 0) + delta_sum = delta.sum((c1, c2, e) for c2 in right) + try: + p.add_linear_constraints(delta_sum == v * mf) + except ValueError: + raise UnbalancedReactionError('Unable to add constraint') + + # Eq 8, 11 + x_sum = x.sum((c1, c2) for c2 in right) + y_sum = y.sum((c1, c2) for c2 in right) + p.add_linear_constraints(x_sum <= 1, y_sum <= 1) + + # Eq 13 + zs[c1] = 0 + for e, mf, w in _weighted_formula(f, weight_func): + zs[c1] += w * mf + + # Eq 2 + for c2, v in iteritems(right): + f = compound_formula[c2.name] + for e in elements: + mf = f.get(e, 0) + delta_sum = delta.sum((c1, c2, e) for c1 in left) + try: + p.add_linear_constraints(delta_sum == v * mf) + except ValueError: + raise UnbalancedReactionError('Unable to add constraint') + + for c1, v1 in iteritems(left): + for c2 in right: + f1 = compound_formula[c1.name] + f2 = compound_formula[c2.name] + for e in elements: + mf = f1.get(e, 0) + + # Eq 4 + p.add_linear_constraints( + delta[c1, c2, e] <= float(v1) * mf * omega[c1, c2]) + + # Eq 5 + if e in f2: + p.add_linear_constraints( + delta[c1, c2, e] <= float(v1) * mf * gamma[c1, c2, e]) + + # Eq 7, 10 + p.add_linear_constraints( + m[c1, c2] <= float(v1) * zs[c1] * x[c1, c2], + q[c1, c2] <= float(v1) * zs[c1] * y[c1, c2]) + + try: + result = p.solve(lp.ObjectiveSense.Maximize) + except lp.SolverError: + raise UnbalancedReactionError('Unable to solve') + + first_objective = result.get_value(objective) + + # Yield the first result + yield dict(_transfers(reaction, delta, elements, result, epsilon)) + + # Find alternative solutions + if alt_elements is None: + alt_elements = elements + else: + alt_elements = elements.intersection(alt_elements) + + if len(alt_elements) == 0: + return + + # Add a constraint that disallows the current transfer solution until no + # alternative solutions are left. + add_objective_constraint = True + while True: + elem_vars = 0 + elem_count = 0 + for c1, c2 in product(left, right): + for e in alt_elements: + if result.get_value(gamma[c1, c2, e]) > 0.5: + elem_count += 1 + elem_vars += gamma[c1, c2, e] + + if elem_count == 0: + break + + if add_objective_constraint: + p.add_linear_constraints(objective >= first_objective) + add_objective_constraint = False + + p.add_linear_constraints(elem_vars <= elem_count - 1) + try: + result = p.solve(lp.ObjectiveSense.Maximize) + except lp.SolverError: + break + + yield dict(_transfers(reaction, delta, elements, result, epsilon)) diff --git a/psamm/tests/test_mapmaker.py b/psamm/tests/test_mapmaker.py new file mode 100644 index 00000000..1b26fcad --- /dev/null +++ b/psamm/tests/test_mapmaker.py @@ -0,0 +1,144 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2017 Jon Lund Steffensen + +import unittest + +from psamm import mapmaker +from psamm.lpsolver import generic +from psamm.formula import Formula, Atom, Radical +from psamm.reaction import Compound +from psamm.datasource.reaction import parse_reaction + + +class TestMapMakerWeigths(unittest.TestCase): + def test_default_weight(self): + self.assertEqual(mapmaker.default_weight(Atom.C), 1) + self.assertEqual(mapmaker.default_weight(Atom.N), 0.4) + self.assertEqual(mapmaker.default_weight(Atom.O), 0.4) + self.assertEqual(mapmaker.default_weight(Atom.P), 0.4) + self.assertEqual(mapmaker.default_weight(Atom.S), 1) + self.assertEqual(mapmaker.default_weight(Radical('R')), 40.0) + + def test_weighted_formula(self): + weighted = mapmaker._weighted_formula( + Formula.parse('C1H2N3S4R'), mapmaker.default_weight) + self.assertEqual(set(weighted), { + (Atom.C, 1, 1), + # H is discarded + (Atom.N, 3, 0.4), + (Atom.S, 4, 1), + (Radical('R'), 1, 40.0) + }) + + +class TestMapMaker(unittest.TestCase): + def setUp(self): + try: + self.solver = generic.Solver(integer=True) + except generic.RequirementsError: + self.skipTest('Unable to find an MILP solver for tests') + + def test_predict_compound_pairs(self): + """Test prediction of HEX1 reaction.""" + reaction = parse_reaction( + 'atp[c] + glc-D[c] => adp[c] + g6p[c] + h[c]') + formulas = { + 'atp': Formula.parse('C10H12N5O13P3'), + 'adp': Formula.parse('C10H12N5O10P2'), + 'glc-D': Formula.parse('C6H12O6'), + 'g6p': Formula.parse('C6H11O9P'), + 'h': Formula.parse('H') + } + + solutions = list( + mapmaker.predict_compound_pairs(reaction, formulas, self.solver)) + + self.assertEqual(len(solutions), 1) + self.assertEqual(solutions[0], { + (Compound('atp', 'c'), Compound('adp', 'c')): + Formula.parse('C10N5O10P2'), + (Compound('glc-D', 'c'), Compound('g6p', 'c')): + Formula.parse('C6O6'), + (Compound('atp', 'c'), Compound('g6p', 'c')): + Formula.parse('O3P'), + }) + + def test_predict_compound_pairs_unbalanced(self): + """Test prediction of (non-sense) unbalanced reaction.""" + reaction = parse_reaction( + 'a[c] <=> b[c] + c[c]') + formulas = { + 'a': Formula.parse('C10H12'), + 'b': Formula.parse('C9H11'), + 'c': Formula.parse('CO2'), + } + + with self.assertRaises(mapmaker.UnbalancedReactionError): + list(mapmaker.predict_compound_pairs( + reaction, formulas, self.solver)) + + def test_predict_compound_pairs_multiple(self): + """Test prediction of reaction with multiple instances.""" + reaction = parse_reaction( + 'a[c] <=> (2) b[c] + c[c]') + formulas = { + 'a': Formula.parse('C10H13O6'), + 'b': Formula.parse('C5H6O3'), + 'c': Formula.parse('H'), + } + solutions = list( + mapmaker.predict_compound_pairs(reaction, formulas, self.solver)) + + self.assertEqual(len(solutions), 1) + self.assertEqual(solutions[0], { + (Compound('a', 'c'), Compound('b', 'c')): Formula.parse('C10O6'), + }) + + def test_predict_with_multiple_solutions(self): + """Test prediction where multiple solutions exist (TALA).""" + reaction = parse_reaction( + 'r5p[c] + xu5p-D[c] <=> g3p[c] + s7p[c]') + formulas = { + 'r5p': Formula.parse('C5H9O8P'), + 'xu5p-D': Formula.parse('C5H9O8P'), + 'g3p': Formula.parse('C3H5O6P'), + 's7p': Formula.parse('C7H13O10P'), + } + + solutions = list( + mapmaker.predict_compound_pairs(reaction, formulas, self.solver)) + self.assertEqual(len(solutions), 2) + + # Solution A + self.assertIn({ + (Compound('r5p', 'c'), Compound('s7p', 'c')): + Formula.parse('C5O8P'), + (Compound('xu5p-D', 'c'), Compound('g3p', 'c')): + Formula.parse('C3O6P'), + (Compound('xu5p-D', 'c'), Compound('s7p', 'c')): + Formula.parse('C2O2'), + }, solutions) + + # Solution B + self.assertIn({ + (Compound('xu5p-D', 'c'), Compound('s7p', 'c')): + Formula.parse('C5O8P'), + (Compound('r5p', 'c'), Compound('g3p', 'c')): + Formula.parse('C3O6P'), + (Compound('r5p', 'c'), Compound('s7p', 'c')): + Formula.parse('C2O2'), + }, solutions) From 22bb2dad28cfd3f817225544c342e7c934414517 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Thu, 18 Aug 2016 16:26:55 -0400 Subject: [PATCH 47/51] Add findprimarypairs module for primary pair prediction --- psamm/findprimarypairs.py | 400 +++++++++++++++++++++++++++ psamm/tests/test_findprimarypairs.py | 102 +++++++ 2 files changed, 502 insertions(+) create mode 100644 psamm/findprimarypairs.py create mode 100644 psamm/tests/test_findprimarypairs.py diff --git a/psamm/findprimarypairs.py b/psamm/findprimarypairs.py new file mode 100644 index 00000000..1cc7be24 --- /dev/null +++ b/psamm/findprimarypairs.py @@ -0,0 +1,400 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2015-2017 Jon Lund Steffensen + +from __future__ import division + +import logging +from itertools import product +from collections import Counter + +from six import iteritems +from six.moves import reduce + +from .formula import Formula, Atom + +try: + from math import gcd +except ImportError: + from fractions import gcd + +logger = logging.getLogger(__name__) + + +def element_weight(element): + """Return default element weight. + + This is the default element weight function. + """ + if element == Atom.H: + return 0.0 + elif element == Atom.C: + return 1.0 + return 0.82 + + +def _jaccard_similarity(f1, f2, weight_func): + """Calculate generalized Jaccard similarity of formulas. + + Returns the weighted similarity value or None if there is no overlap + at all. If the union of the formulas has a weight of zero (i.e. the + denominator in the Jaccard similarity is zero), a value of zero is + returned. + """ + elements = set(f1) + elements.update(f2) + + count, w_count, w_total = 0, 0, 0 + for element in elements: + mi = min(f1.get(element, 0), f2.get(element, 0)) + mx = max(f1.get(element, 0), f2.get(element, 0)) + count += mi + w = weight_func(element) + w_count += w * mi + w_total += w * mx + + if count == 0: + return None + + return 0.0 if w_total == 0.0 else w_count / w_total + + +def _reaction_to_dicts(reaction): + """Convert a reaction to reduced left, right dictionaries. + + Returns a pair of (left, right) dictionaries mapping compounds to + normalized integer stoichiometric values. If a compound occurs multiple + times on one side, the occurences are combined into a single entry in the + dictionary. + """ + def dict_from_iter_sum(it, div): + d = {} + for k, v in it: + if k not in d: + d[k] = 0 + d[k] += int(v / div) + return d + + div = reduce(gcd, (abs(v) for _, v in reaction.compounds), 0) + if div == 0: + raise ValueError('Empty reaction') + + left = dict_from_iter_sum(reaction.left, div) + right = dict_from_iter_sum(reaction.right, div) + + return left, right + + +class _CompoundInstance(object): + """Instance of a compound in a reaction during matching. + + A compound in a reaction has a number of instances corresponding to the + stoichiometric value. The instance is defined by the compound and index + number. The compound instance also has an attached formula which can be + updated through the ``formula`` property. The compound instance can be + created as a copy of an existing instance, or from a + :psamm:`psamm.reaction.Compound`, number, and + :psamm:`psamm.formula.Formula`. + """ + def __init__(self, *args): + if len(args) == 1 and isinstance(args[0], _CompoundInstance): + self._compound = args[0]._compound + self._index = args[0]._index + self._formula = args[0]._formula + elif len(args) == 3: + self._compound = args[0] + self._index = args[1] + self._formula = args[2] + else: + raise ValueError('Invalid arguments') + + @property + def compound(self): + """Return the :class:`psamm.reaction.Compound`.""" + return self._compound + + @property + def index(self): + """Return the index.""" + return self._index + + @property + def formula(self): + """Return the formula.""" + return self._formula + + @formula.setter + def formula(self, value): + self._formula = value + + def __repr__(self): + return str('<{} {} of {!r}>').format( + self.__class__.__name__, self._index, self._compound) + + def __eq__(self, other): + return (isinstance(other, self.__class__) and + self._compound == other._index and + self._index == other._index) + + def __hash__(self): + return hash((self._compound, self._index)) + + +def predict_compound_pairs_iterated( + reactions, formulas, prior=(1, 43), max_iterations=None, + element_weight=element_weight): + """Predict reaction pairs using iterated method. + + Returns a tuple containing a dictionary of predictions keyed by the + reaction IDs, and the final number of iterations. Each reaction prediction + entry contains a tuple with a dictionary of transfers and a dictionary of + unbalanced compounds. The dictionary of unbalanced compounds is empty only + if the reaction is balanced. + + Args: + reactions: Dictionary or pair-iterable of (id, equation) pairs. + IDs must be any hashable reaction identifier (e.g. string) and + equation must be :class:`psamm.reaction.Reaction` objects. + formulas: Dictionary mapping compound IDs to + :class:`psamm.formula.Formula`. Formulas must be flattened. + prior: Tuple of (alpha, beta) parameters for the MAP inference. + If not provided, the default parameters will be used: (1, 43). + max_iterations: Maximum iterations to run before stopping. If the + stopping condition is reached before this number of iterations, + the procedure also stops. If None, the procedure only stops when + the stopping condition is reached. + element_weight: A function providing returning weight value for the + given :class:`psamm.formula.Atom` or + :class:`psamm.formula.Radical`. If not provided, the default weight + will be used (H=0, C=1, *=0.82) + """ + prior_alpha, prior_beta = prior + reactions = dict(reactions) + + pair_reactions = {} + possible_pairs = Counter() + for reaction_id, equation in iteritems(reactions): + for (c1, _), (c2, _) in product(equation.left, equation.right): + spair = tuple(sorted([c1.name, c2.name])) + possible_pairs[spair] += 1 + pair_reactions.setdefault(spair, set()).add(reaction_id) + + next_reactions = set(reactions) + pairs_predicted = None + prediction = {} + weights = {} + iteration = 0 + while len(next_reactions) > 0: + iteration += 1 + if max_iterations is not None and iteration > max_iterations: + break + + logger.info('Iteration {}: {} reactions...'.format( + iteration, len(next_reactions))) + + for reaction_id in next_reactions: + result = predict_compound_pairs( + reactions[reaction_id], formulas, weights, element_weight) + if result is None: + continue + + transfer, balance = result + + rpairs = {} + for ((c1, _), (c2, _)), form in iteritems(transfer): + rpairs.setdefault((c1, c2), []).append(form) + + prediction[reaction_id] = rpairs, balance + + pairs_predicted = Counter() + for reaction_id, (rpairs, _) in iteritems(prediction): + for c1, c2 in rpairs: + spair = tuple(sorted([c1.name, c2.name])) + pairs_predicted[spair] += 1 + + next_reactions = set() + for spair, total in sorted(iteritems(possible_pairs)): + pred = pairs_predicted[spair] + + # The weight is set to the maximum a posteriori (MAP) estimate + # of the primary pair probability distribution. + posterior_alpha = prior_alpha + pred + posterior_beta = prior_beta + total - pred + pair_weight = ((posterior_alpha - 1) / + (posterior_alpha + posterior_beta - 2)) + + if (spair not in weights or + abs(pair_weight - weights[spair]) > 1e-5): + next_reactions.update(pair_reactions[spair]) + + c1, c2 = spair + weights[c1, c2] = pair_weight + weights[c2, c1] = pair_weight + + return prediction, iteration + + +def _match_greedily(reaction, compound_formula, score_func): + """Match compounds greedily based on score function. + + Args: + reaction: Reaction equation :class:`psamm.reaction.Reaction`. + compound_formula: Dictionary mapping compound IDs to + :class:`psamm.formula.Formula`. Formulas must be flattened. + score_func: Function that takes two :class:`_CompoundInstance` and + returns the score. + """ + uninstantiated_left, uninstantiated_right = _reaction_to_dicts(reaction) + + def compound_instances(uninstantiated): + instances = [] + for compound, value in iteritems(uninstantiated): + if value > 0: + f = compound_formula[compound.name] + instances.append(_CompoundInstance(compound, value, f)) + + for inst in instances: + uninstantiated[inst.compound] -= 1 + + return instances + + def instantiate(uninstantiated, compound): + n = uninstantiated[compound] + if n > 0: + f = compound_formula[compound.name] + inst = _CompoundInstance(compound, n, f) + uninstantiated[compound] -= 1 + return inst + + return None + + left = compound_instances(uninstantiated_left) + right = compound_instances(uninstantiated_right) + instances = left + right + + pairs = {} + for inst1, inst2 in product(left, right): + result = score_func(inst1, inst2) + if result is not None: + pairs[inst1, inst2] = result + + def inst_pair_sort_key(entry): + """Sort key for finding best match among instance pairs. + + Rank by score in general but always match identical compounds first + (these will always have score equal to one but are handled specially + to put them ahead of other compounds with score equal to one). Use + compound names to break ties to produce a deterministic result. + """ + (inst1, inst2), score = entry + c1, c2 = inst1.compound, inst2.compound + same_compound = c1.name == c2.name and c1.compartment != c2.compartment + return same_compound, score, c1.name, c2.name + + transfer = {} + while len(pairs) > 0: + (inst1, inst2), _ = max(iteritems(pairs), key=inst_pair_sort_key) + common = inst1.formula & inst2.formula + + key = (inst1.compound, inst1.index), (inst2.compound, inst2.index) + if key not in transfer: + transfer[key] = Formula() + transfer[key] |= common + + for inst in (inst1, inst2): + inst.formula -= common + + to_insert = set() + + inst = instantiate(uninstantiated_left, inst1.compound) + if inst is not None: + left.append(inst) + instances.append(inst) + to_insert.add(inst) + + inst = instantiate(uninstantiated_right, inst2.compound) + if inst is not None: + right.append(inst) + instances.append(inst) + to_insert.add(inst) + + to_update = {inst1, inst2} + + to_delete = set() + for inst1, inst2 in pairs: + if inst1 in to_update or inst2 in to_update: + if len(inst1.formula) > 0 and len(inst2.formula) > 0: + result = score_func(inst1, inst2) + if result is None: + to_delete.add((inst1, inst2)) + else: + pairs[inst1, inst2] = result + else: + to_delete.add((inst1, inst2)) + + for pair in to_delete: + del pairs[pair] + + for inst1, inst2 in product(left, right): + if inst1 in to_insert or inst2 in to_insert: + result = score_func(inst1, inst2) + if result is not None: + pairs[inst1, inst2] = result + + balance = {} + for inst in instances: + if len(inst.formula) > 0: + key = inst.compound, inst.index + balance[key] = inst.formula + + return transfer, balance + + +def predict_compound_pairs(reaction, compound_formula, pair_weights={}, + weight_func=element_weight): + """Predict compound pairs for a single reaction. + + Performs greedy matching on reaction compounds using a scoring function + that uses generalized Jaccard similarity corrected by the weights in the + given dictionary. Returns a tuple of a transfer dictionary and a dictionary + of unbalanced compounds. The dictionary of unbalanced compounds is empty + only if the reaction is balanced. + + Args: + reaction: :class:`psamm.reaction.Reaction`. + compound_formula: Dictionary mapping compound IDs to + :class:`psamm.formula.Formula`. Formulas must be flattened. + pair_weights: Dictionary mapping pairs of compound IDs to correction + values. This value is multiplied by the calculated Jaccard + similarity. If a pair is not in the dictionary, the value 1 is + used. Pairs are looked up in the weights dictionary as a tuple of + compound names (``c1``, ``c2``) where ``c1`` is the left-hand side + and ``c2`` is the right-hand side. + weight_func: Weight function for caclulating the generalized Jaccard + similarity. This function will be given an + :class:`psamm.formula.Atom` or :class:`psamm.formula.Radical` and + should return a corresponding weight. + """ + def score_func(inst1, inst2): + score = _jaccard_similarity( + inst1.formula, inst2.formula, weight_func) + if score is None: + return None + pair = inst1.compound.name, inst2.compound.name + pair_weight = pair_weights.get(pair, 1.0) + return pair_weight * score + + return _match_greedily(reaction, compound_formula, score_func) diff --git a/psamm/tests/test_findprimarypairs.py b/psamm/tests/test_findprimarypairs.py new file mode 100644 index 00000000..ff5cc4ef --- /dev/null +++ b/psamm/tests/test_findprimarypairs.py @@ -0,0 +1,102 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2017 Jon Lund Steffensen + +import unittest + +from psamm import findprimarypairs +from psamm.formula import Formula, Atom, Radical +from psamm.reaction import Compound +from psamm.datasource.reaction import parse_reaction + + +class TestFindPrimaryPairs(unittest.TestCase): + def test_default_weight(self): + self.assertEqual(findprimarypairs.element_weight(Atom.C), 1) + self.assertEqual(findprimarypairs.element_weight(Atom.H), 0) + self.assertEqual(findprimarypairs.element_weight(Atom.N), 0.82) + self.assertEqual(findprimarypairs.element_weight(Radical('R')), 0.82) + + def test_predict_compound_pairs(self): + """Test prediction of HEX1 reaction.""" + reaction = parse_reaction( + 'atp[c] + glc-D[c] => adp[c] + g6p[c] + h[c]') + formulas = { + 'atp': Formula.parse('C10H12N5O13P3'), + 'adp': Formula.parse('C10H12N5O10P2'), + 'glc-D': Formula.parse('C6H12O6'), + 'g6p': Formula.parse('C6H11O9P'), + 'h': Formula.parse('H'), + } + transfer, balance = findprimarypairs.predict_compound_pairs( + reaction, formulas) + + self.assertEqual(balance, {}) + self.assertEqual(transfer, { + ((Compound('atp', 'c'), 1), (Compound('adp', 'c'), 1)): + Formula.parse('C10H12N5O10P2'), + ((Compound('glc-D', 'c'), 1), (Compound('g6p', 'c'), 1)): + Formula.parse('C6H11O6'), + ((Compound('atp', 'c'), 1), (Compound('g6p', 'c'), 1)): + Formula.parse('O3P'), + ((Compound('glc-D', 'c'), 1), (Compound('h', 'c'), 1)): + Formula.parse('H'), + }) + + def test_predict_compound_pairs_unbalanced(self): + """Test prediction of (non-sense) unbalanced reaction.""" + reaction = parse_reaction( + 'a[c] <=> b[c] + c[c]') + formulas = { + 'a': Formula.parse('C10H12'), + 'b': Formula.parse('C9H11'), + 'c': Formula.parse('CO2'), + } + transfer, balance = findprimarypairs.predict_compound_pairs( + reaction, formulas) + + self.assertEqual(balance, { + (Compound('a', 'c'), 1): Formula.parse('H'), + (Compound('c', 'c'), 1): Formula.parse('O2'), + }) + self.assertEqual(transfer, { + ((Compound('a', 'c'), 1), (Compound('b', 'c'), 1)): + Formula.parse('C9H11'), + ((Compound('a', 'c'), 1), (Compound('c', 'c'), 1)): + Formula.parse('C'), + }) + + def test_predict_compound_pairs_multiple(self): + """Test prediction of reaction with multiple instances.""" + reaction = parse_reaction( + 'a[c] <=> (2) b[c] + c[c]') + formulas = { + 'a': Formula.parse('C10H13O6'), + 'b': Formula.parse('C5H6O3'), + 'c': Formula.parse('H'), + } + transfer, balance = findprimarypairs.predict_compound_pairs( + reaction, formulas) + + self.assertEqual(balance, {}) + self.assertEqual(transfer, { + ((Compound('a', 'c'), 1), (Compound('b', 'c'), 1)): + Formula.parse('C5H6O3'), + ((Compound('a', 'c'), 1), (Compound('b', 'c'), 2)): + Formula.parse('C5H6O3'), + ((Compound('a', 'c'), 1), (Compound('c', 'c'), 1)): + Formula.parse('H'), + }) From 3d1ad60f29c9f04c017d870b472c4ac2690e1b7d Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Tue, 7 Mar 2017 18:01:56 -0500 Subject: [PATCH 48/51] Add primarypairs command --- psamm/commands/primarypairs.py | 236 +++++++++++++++++++++++++++++++++ psamm/tests/test_command.py | 21 +++ setup.py | 1 + 3 files changed, 258 insertions(+) create mode 100644 psamm/commands/primarypairs.py diff --git a/psamm/commands/primarypairs.py b/psamm/commands/primarypairs.py new file mode 100644 index 00000000..3d8cee56 --- /dev/null +++ b/psamm/commands/primarypairs.py @@ -0,0 +1,236 @@ +# This file is part of PSAMM. +# +# PSAMM is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PSAMM is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PSAMM. If not, see . +# +# Copyright 2015-2017 Jon Lund Steffensen + +from __future__ import unicode_literals + +import logging + +from ..command import Command, SolverCommandMixin, FilePrefixAppendAction +from ..formula import Formula, Atom, Radical, ParseError +from .. import findprimarypairs +from .. import mapmaker + +from six import text_type, iteritems + +logger = logging.getLogger(__name__) + + +class PrimaryPairsCommand(SolverCommandMixin, Command): + """Predict primary pairs of reactions. + + This command is used to predict element-transferring reactant/product pairs + in the reactions of the model. This can be used to determine the flow of + elements through reactions. + """ + + @classmethod + def init_parser(cls, parser): + parser.add_argument( + '--method', + choices=['fpp', 'mapmaker'], + default='fpp', help='Primary pair prediction method') + parser.add_argument( + '--exclude', metavar='reaction', type=text_type, default=[], + action=FilePrefixAppendAction, + help=('Reaction to exclude (e.g. biomass reactions or' + ' macromolecule synthesis)')) + parser.add_argument( + '--report-element', metavar='element', type=text_type, default=[], + action='append', help=('Only output pairs predicted to transfer' + ' this element (e.g. C, N, P)')) + parser.add_argument( + '--report-all-transfers', action='store_true', + help=('Report all transfers instead of combining elements for' + ' each pair (only available for fpp).')) + parser.add_argument( + '--weights', action='append', default=[], type=text_type, + help=('Set weights for elements for inferring compound' + ' similarities (e.g. --weights N=0.4,C=1,H=0,R=20,*=0.6).' + ' The default value depends on the prediction method.')) + super(PrimaryPairsCommand, cls).init_parser(parser) + + def run(self): + # Check that elements are valid + elements = set() + for element in self._args.report_element: + if not hasattr(Atom, element): + self.argument_error('Invalid element: {}'.format(element)) + elements.add(Atom(element)) + + # Check that method can report all transfers if requested + if self._args.report_all_transfers and self._args.method != 'fpp': + self.argument_error( + 'Reporting all transfers is not available for this method:' + ' {}'.format(self._args.method)) + + if len(self._args.weights) > 0: + weights_dict, r_group_weight, default_weight = _parse_weights( + self._args.weights, default_weight=0.6) + + def element_weight(element): + if isinstance(element, Radical): + return r_group_weight + return weights_dict.get(element, default_weight) + else: + if self._args.method == 'fpp': + logger.info( + 'Using default element weights for {}:' + ' C=1, H=0, *=0.82'.format(self._args.method)) + element_weight = findprimarypairs.element_weight + elif self._args.method == 'mapmaker': + logger.info( + 'Using default element weights for {}:' + ' H=0, N=0.4, O=0.4, P=0.4, R=40, *=1'.format( + self._args.method)) + element_weight = mapmaker.default_weight + + # Mapping from compound id to formula + compound_formula = {} + for compound in self._model.compounds: + if compound.formula is not None: + try: + f = Formula.parse(compound.formula).flattened() + if not f.is_variable(): + compound_formula[compound.id] = f + else: + logger.warning( + 'Skipping variable formula {}: {}'.format( + compound.id, compound.formula)) + except ParseError as e: + msg = ( + 'Error parsing formula' + ' for compound {}:\n{}\n{}'.format( + compound.id, e, compound.formula)) + if e.indicator is not None: + msg += '\n{}'.format(e.indicator) + logger.warning(msg) + + # Set of excluded reactions + exclude = set(self._args.exclude) + + def iter_reactions(): + for reaction in self._model.reactions: + if (reaction.id not in self._model.model or + reaction.id in exclude): + continue + + if reaction.equation is None: + logger.warning( + 'Reaction {} has no reaction equation'.format( + reaction.id)) + continue + + if any(c.name not in compound_formula + for c, _ in reaction.equation.compounds): + logger.warning( + 'Reaction {} has compounds with undefined' + ' formula'.format(reaction.id)) + continue + + yield reaction + + if self._args.method == 'fpp': + result = self._run_find_primary_pairs( + compound_formula, iter_reactions(), element_weight) + elif self._args.method == 'mapmaker': + result = self._run_mapmaker( + compound_formula, iter_reactions(), element_weight) + else: + self.argument_error('Unknown method: {}'.format(self._args.method)) + + if not self._args.report_all_transfers: + result = self._combine_transfers(result) + + for reaction_id, c1, c2, form in result: + if len(elements) == 0 or any(e in form for e in elements): + print('{}\t{}\t{}\t{}'.format(reaction_id, c1, c2, form)) + + def _combine_transfers(self, result): + """Combine multiple pair transfers into one.""" + transfers = {} + for reaction_id, c1, c2, form in result: + key = reaction_id, c1, c2 + combined_form = transfers.setdefault(key, Formula()) + transfers[key] = combined_form | form + + for (reaction_id, c1, c2), form in iteritems(transfers): + yield reaction_id, c1, c2, form + + def _run_find_primary_pairs( + self, compound_formula, reactions, element_weight): + reaction_pairs = [(r.id, r.equation) for r in reactions] + prediction, _ = findprimarypairs.predict_compound_pairs_iterated( + reaction_pairs, compound_formula, element_weight=element_weight) + + for reaction_id, _ in reaction_pairs: + if reaction_id not in prediction: + logger.warning('Failed to predict pairs for {}'.format( + reaction_id)) + continue + + pairs, balance = prediction[reaction_id] + if len(balance) > 0: + logger.warning('Reaction {} is not balanced!'.format( + reaction_id)) + + for (c1, c2), forms in sorted(iteritems(pairs)): + for form in forms: + yield reaction_id, c1, c2, form + + def _run_mapmaker(self, compound_formula, reactions, element_weight): + solver = self._get_solver(integer=True) + for reaction in reactions: + logger.info('Predicting reaction {}...'.format(reaction.id)) + try: + transfer = next(mapmaker.predict_compound_pairs( + reaction.equation, compound_formula, solver, + weight_func=element_weight)) + except mapmaker.UnbalancedReactionError: + logger.info('Reaction {} is not balanced! Skipping...'.format( + reaction.id)) + continue + except StopIteration: + logger.warning('Reaction {} has no predicted transfer!'.format( + reaction.id)) + continue + + for (c1, c2), form in sorted(iteritems(transfer)): + yield reaction.id, c1, c2, form + + +def _parse_weights(weight_args, default_weight=0.6): + """Parse list of weight assignments.""" + weights_dict = {} + r_group_weight = default_weight + for weight_arg in weight_args: + for weight_assignment in weight_arg.split(','): + if '=' not in weight_assignment: + raise ValueError( + 'Invalid weight assignment: {}'.format(weight_assignment)) + + key, value = weight_assignment.split('=', 1) + value = float(value) + if key == 'R': + r_group_weight = value + elif key == '*': + default_weight = value + elif hasattr(Atom, key): + weights_dict[Atom(key)] = value + else: + raise ValueError('Invalid element: {}'.format(key)) + + return weights_dict, r_group_weight, default_weight diff --git a/psamm/tests/test_command.py b/psamm/tests/test_command.py index 497efe1f..9360d8f5 100644 --- a/psamm/tests/test_command.py +++ b/psamm/tests/test_command.py @@ -45,6 +45,7 @@ from psamm.commands.gapfill import GapFillCommand from psamm.commands.genedelete import GeneDeletionCommand from psamm.commands.masscheck import MassConsistencyCommand +from psamm.commands.primarypairs import PrimaryPairsCommand from psamm.commands.randomsparse import RandomSparseNetworkCommand from psamm.commands.robustness import RobustnessCommand from psamm.commands.sbmlexport import SBMLExport @@ -478,6 +479,26 @@ def test_run_masscheck_reaction_with_checked(self): self.run_solver_command( MassConsistencyCommand, ['--type=reaction', '--checked=rxn_3']) + def test_run_primarypairs_with_fpp(self): + self.run_command(PrimaryPairsCommand, ['--method', 'fpp']) + + def test_run_primarypairs_with_fpp_and_report_element(self): + self.run_command( + PrimaryPairsCommand, ['--method', 'fpp', '--report-element', 'C']) + + def test_run_primarypairs_with_fpp_and_report_all_transfers(self): + self.run_command( + PrimaryPairsCommand, ['--method', 'fpp', '--report-all-transfers']) + + def test_run_primarypairs_with_fpp_and_weight(self): + self.run_command( + PrimaryPairsCommand, [ + '--method', 'fpp', '--weights', 'C=1,H=0,R=0.5,*=0.4']) + + def test_run_primarypairs_with_mapmaker(self): + self.run_solver_command( + PrimaryPairsCommand, ['--method', 'mapmaker'], {'integer': True}) + def test_run_randomsparse_reactions(self): self.run_solver_command( RandomSparseNetworkCommand, ['--type=reactions', '50%']) diff --git a/setup.py b/setup.py index 529bf9e9..e29467b0 100755 --- a/setup.py +++ b/setup.py @@ -66,6 +66,7 @@ gapfill = psamm.commands.gapfill:GapFillCommand genedelete = psamm.commands.genedelete:GeneDeletionCommand masscheck = psamm.commands.masscheck:MassConsistencyCommand + primarypairs = psamm.commands.primarypairs:PrimaryPairsCommand randomsparse = psamm.commands.randomsparse:RandomSparseNetworkCommand robustness = psamm.commands.robustness:RobustnessCommand sbmlexport = psamm.commands.sbmlexport:SBMLExport From 417b27de35f6575f2d1a82ddb06a8363e90211bd Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Fri, 23 Jun 2017 15:11:20 -0400 Subject: [PATCH 49/51] docs: Add primarypairs to commands documentation --- docs/commands.rst | 19 +++++++++++++++++++ docs/references.rst | 3 +++ 2 files changed, 22 insertions(+) diff --git a/docs/commands.rst b/docs/commands.rst index 3fc70373..44688442 100644 --- a/docs/commands.rst +++ b/docs/commands.rst @@ -499,6 +499,25 @@ minimal solution. $ psamm-model fastgapfill --penalty penalty.tsv +Predict primary pairs (``primarypairs``) +---------------------------------------------------------------------- + +This command is used to predict element-transferring reactant/product pairs +in the reactions of the model. This can be used to determine the flow of +elements through reactions. Two methods for predicting the pairs are available: +FindPrimaryPairs (``fpp``) [Steffensen17]_ and +MapMaker (``mapmaker``) [Tervo16]_. The ``--method`` option can used to select +which prediction method to use: + +.. code-block:: shell + + $ psamm-model primarypairs --method=fpp + +The result is reported as a table of four columns. The first column is the +reactions ID, the second and third columns contain the compound ID of the +reactant and product. The fourth column contains the predicted transfer of +elements. + SBML Export (``sbmlexport``) ---------------------------- diff --git a/docs/references.rst b/docs/references.rst index bd41aa9c..6b1b7367 100644 --- a/docs/references.rst +++ b/docs/references.rst @@ -37,6 +37,9 @@ References and perturbed metabolic networks. Proceedings of the National Academy of Sciences of the United States of America. 2002;99(23):15112-15117. :doi:`10.1073/pnas.232349399`. +.. [Steffensen17] Steffensen JL, Zhang Y. FindPrimaryPairs: An efficient + algorithm for predicting element-transferring reactant/product pairs in + metabolic networks. *Submitted* .. [Tervo16] Tervo CJ, Reed JL. MapMaker and PathTracer for tracking carbon in genome-scale metabolic models. Biotechnol J. 2016; 1–23. :doi:`10.1002/biot.201400305`. From c02218d9f5318d120c42f2c35683431faf5d2262 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Fri, 23 Jun 2017 11:35:54 -0400 Subject: [PATCH 50/51] Bump version to 0.30 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 903a4031..0912e612 100755 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( name='psamm', - version='0.29', + version='0.30', description='PSAMM metabolic modeling tools', maintainer='Jon Lund Steffensen', maintainer_email='jon_steffensen@uri.edu', From a47e5af9349fb6c2dee3096dacb80282c5751ba4 Mon Sep 17 00:00:00 2001 From: Jon Lund Steffensen Date: Fri, 23 Jun 2017 11:51:30 -0400 Subject: [PATCH 51/51] Update NEWS.md for 0.30 --- NEWS.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/NEWS.md b/NEWS.md index 91ad5a4a..df7048d0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,22 @@ +v0.30 (2017-06-23) +------------------ + +- Adds the new command `primarypairs` for predicting reactant/product element + transfers using the new FindPrimaryPairs method as well as the MapMaker + method. +- A new option has been added to the `genedelete` command which allows use of + _minimization of metabolic adjustments_ to simulate biomass production for + gene knockouts. +- Fixes a bug where the epsilon parameter was accidentally ignored by + `fastgapfill`. +- Fixes a bug where the `psamm-sbml-model` command did not ignore boundary + species. With this change, the boundary species are also ignored by default + when using the API to read SBML models. +- Fixes a performance issues with gap-filling that made the `gapfill` and + `fastgapfill` commands take much longer time to run on large models than + necessary. + v0.29 (2017-04-18) ------------------