diff --git a/geckomat/get_enzyme_data/findInDB.m b/geckomat/get_enzyme_data/findInDB.m
index 06b702d54..cffdef468 100644
--- a/geckomat/get_enzyme_data/findInDB.m
+++ b/geckomat/get_enzyme_data/findInDB.m
@@ -1,4 +1,4 @@
-function [uni,EC,MW,Genes,conflicts] = findInDB(grRule,DBprot,DBgenes,DBecNum,DBMW)
+function [uni,EC,MW,Genes,conflicts] = findInDB(grRule,DBprot,DBgenes,DBecNum,DBMW, geneIndex, geneHashMap)
 % findInDB
 %   Gets the uniprots and EC numbers for a given rxn into a given database
 %
@@ -20,6 +20,7 @@
 %
 %   Benjamin J. Sanchez, 2017-08-10
 %   Ivan Domenzain,      2018-09-06
+%	Johan Gustafsson, 	 2021-07-02 introduced optimizations from GeckoLight
 %
 
 %Find simple gene sets for reaction:
@@ -33,19 +34,40 @@
 
 for i = 1:length(gene_sets)
     %Split the gene set and match each gene:
-    gene_set  = strsplit(gene_sets{i},' and ');
+    gene_set  = strsplit(gene_sets{i},' and '); %this string splitting together with getSimpleGeneSets takes 2.6 s, that is ok.
     uni_set   = cell(size(gene_set));
+    uni_set_idx   = cell(size(gene_set));
     EC_set    = cell(size(gene_set));
     genesCell = cell(size(gene_set));
+    
+    %find index indices of all genes in one call
+
+    %this loop is slow
     for j = 1:length(gene_set)
-        gene = gene_set{j};
-        %Get the indexes for all of the proteins related to gene
-        matches=find(sum(strcmpi(gene,DBgenes),2));
-        if ~isempty(matches)
+         gene = gene_set{j};
+         matches = [];
+         %Get the indexes for all of the proteins related to gene
+         if isKey(geneHashMap,gene) %annoyingly, this seems to be needed
+             matchInd = cell2mat(values(geneHashMap,gene_set(j)));
+             matches = geneIndex{matchInd};
+             %matches2=find(sum(strcmpi(gene,DBgenes),2));
+             %if (length(matches) ~= length(matches2))
+             %   disp('misfit length')
+             %end
+             %for i = 1:length(matches)
+             %   if (~all(matches == matches2))
+             %       disp('misfit numbers')
+             %   end
+             %end
+         end
+%    end %remove later
+         %matches=find(sum(strcmpi(gene,DBgenes),2));
+         if ~isempty(matches)
             uni_set{j}   = DBprot{matches};
+            uni_set_idx{j} = matches;
             genesCell{j} = gene;
             %Get the indexes of the matched protein(s) with non-empty EC#s
-            nonEmpty = matches(~cellfun(@isempty,DBecNum(matches)));
+           nonEmpty = matches(~cellfun(@isempty,DBecNum(matches)));
             if ~isempty(nonEmpty)
                 [geneECs,ia] = unique(DBecNum(nonEmpty),'stable');
                 %For genes with multiple proteins associated to several
@@ -55,6 +77,7 @@
                     ecNum = DBecNum(indexes);
                     %Save first match
                     uni_set{j} = DBprot{indexes(1)};
+                    uni_set_idx{j} = indexes(1);
                     EC_set{j}  = getECstring(EC_set{j},ecNum{1});
                     %Save additional matches as potential conflicts
                     if ~ismember(gene,conflicts{1})
@@ -67,24 +90,28 @@
                     [~,minW]   = min(cell2mat(DBMW(nonEmpty)));
                     matches    = nonEmpty(minW);
                     uni_set{j} = DBprot{matches};
+                    uni_set_idx{j} = matches;
                     EC_set{j}  = getECstring(EC_set{j},DBecNum{matches});
                 end
             end
         else
             uni_set{j} = '';
+            uni_set_idx{j} = NaN;
         end
                              
         if isempty(EC_set{j})
             EC_set{j} = '';
         end
-    end
+     end
     %Uniprot: Delete repeated and empty spaces
     [uni_set,repeated] = deleteRepeated(uni_set);
+    uni_set_idx        = uni_set_idx(~repeated);
     genesCell          = genesCell(~repeated);
     emptyIndexes       = cellfun(@isempty,uni_set);
     uni_set            = uni_set(~emptyIndexes);
+    uni_set_idx        = uni_set_idx(~emptyIndexes);
     genesCell          = genesCell(~emptyIndexes);
-    
+
     %EC: Find union and intersection between all units (only applies for
     %complexes, i.e. length(EC_set) > 1):
     uni_EC = strsplit(EC_set{1},' ');
@@ -107,7 +134,13 @@
     end    
     %MW: use uniprot codes + swissprot table:
     for j = 1:length(uni_set)
-        index     = strcmpi(DBprot(:),uni_set{j});
+        %index2     = strcmpi(DBprot(:),uni_set{j});
+        index = uni_set_idx{j};
+        %if (length(find(index2)) ~= length(index))
+        %    disp('Length error')
+        %elseif (~all(index == find(index2)))
+        %    disp('value error')
+        %end
         minWeight = min(DBMW{index});
         MW(i)     = MW(i) + minWeight;
     end
@@ -115,7 +148,7 @@
     uni{i}   = strjoin(uni_set,' ');
     EC{i}    = strjoin(EC_set,' ');
     Genes{i} = strjoin(genesCell,' ');
-end
+ end
 
 %Delete repeated Uniprots (and the corresponding ECs):
 [uni,deleted] = deleteRepeated(uni);
diff --git a/geckomat/get_enzyme_data/findInDBOld.m b/geckomat/get_enzyme_data/findInDBOld.m
new file mode 100644
index 000000000..d37507a2d
--- /dev/null
+++ b/geckomat/get_enzyme_data/findInDBOld.m
@@ -0,0 +1,179 @@
+function [uni,EC,MW,Genes,conflicts] = findInDBOld(grRule,DBprot,DBgenes,DBecNum,DBMW)
+% findInDBOld
+%   Gets the uniprots and EC numbers for a given rxn into a given database - this is the old slow implementation, still used for KEGG
+%
+%   grRule     Genes association for a given metabolic reaction
+%   DBprot     array of uniprot IDs, taken from swissprot or kegg database
+%   DBgenes    array of genes corresponding to DBprot
+%   DBecNum    array of EC numbers corresponding to DBprot
+%   DBMW       array of molecular weights in Da corresponding to DBprot
+%
+%   uni        Array containing all the uniprot IDs related to an isoenzyme,
+%              for a given reaction in each of its cells.
+%   EC         Array containing all the EC numbers related to an isoenzyme,
+%              for a given reaction in each of its cells.
+%   Genes      Array containing all the genes related to an isoenzyme,
+%              for a given reaction in each of its cells.
+%   conflicts  Contains those genes with multiple protein matches. 
+%
+%   Usage: [uni,EC,MW,Genes,conflicts] = findInDB(grRule,DBprot,DBgenes,DBecNum,DBMW)
+%
+%   Benjamin J. Sanchez, 2017-08-10
+%   Ivan Domenzain,      2018-09-06
+%
+
+%Find simple gene sets for reaction:
+gene_sets = getSimpleGeneSets(grRule);
+%Preallocate function variables
+uni       = cell(size(gene_sets));
+EC        = cell(size(gene_sets));
+MW        = zeros(size(gene_sets));
+Genes     = cell(size(gene_sets));
+conflicts = cell(1,2);
+
+for i = 1:length(gene_sets)
+    %Split the gene set and match each gene:
+    gene_set  = strsplit(gene_sets{i},' and ');
+    uni_set   = cell(size(gene_set));
+    EC_set    = cell(size(gene_set));
+    genesCell = cell(size(gene_set));
+    for j = 1:length(gene_set)
+        gene = gene_set{j};
+        %Get the indexes for all of the proteins related to gene
+        matches=find(sum(strcmpi(gene,DBgenes),2));
+        if ~isempty(matches)
+            uni_set{j}   = DBprot{matches};
+            genesCell{j} = gene;
+            %Get the indexes of the matched protein(s) with non-empty EC#s
+            nonEmpty = matches(~cellfun(@isempty,DBecNum(matches)));
+            if ~isempty(nonEmpty)
+                [geneECs,ia] = unique(DBecNum(nonEmpty),'stable');
+                %For genes with multiple proteins associated to several
+                %non-empty ecNumbers
+                if length(geneECs)>1
+                    indexes = nonEmpty(ia);
+                    ecNum = DBecNum(indexes);
+                    %Save first match
+                    uni_set{j} = DBprot{indexes(1)};
+                    EC_set{j}  = getECstring(EC_set{j},ecNum{1});
+                    %Save additional matches as potential conflicts
+                    if ~ismember(gene,conflicts{1})
+                        conflicts{1} = [conflicts{1}; {gene}];
+                        conflicts{2} = [conflicts{2}; {indexes}]; 
+                    end
+                else
+                    %If there is a single unique ec number for the matched
+                    %protein(s), then choose the lightest protein
+                    [~,minW]   = min(cell2mat(DBMW(nonEmpty)));
+                    matches    = nonEmpty(minW);
+                    uni_set{j} = DBprot{matches};
+                    EC_set{j}  = getECstring(EC_set{j},DBecNum{matches});
+                end
+            end
+        else
+            uni_set{j} = '';
+        end
+                             
+        if isempty(EC_set{j})
+            EC_set{j} = '';
+        end
+    end
+    %Uniprot: Delete repeated and empty spaces
+    [uni_set,repeated] = deleteRepeated(uni_set);
+    genesCell          = genesCell(~repeated);
+    emptyIndexes       = cellfun(@isempty,uni_set);
+    uni_set            = uni_set(~emptyIndexes);
+    genesCell          = genesCell(~emptyIndexes);
+    
+    %EC: Find union and intersection between all units (only applies for
+    %complexes, i.e. length(EC_set) > 1):
+    uni_EC = strsplit(EC_set{1},' ');
+    int_EC = uni_EC;
+    for j = 2:length(EC_set)
+        other_EC = strsplit(EC_set{j},' ');
+        if isempty(uni_EC)
+            uni_EC = other_EC;
+            int_EC = other_EC;
+        elseif ~isempty(other_EC)
+            uni_EC = compare_wild([uni_EC other_EC]);
+            int_EC = intersection(int_EC,other_EC);
+        end
+    end
+    %Use the intersection found, if any. If not, use the union: 
+    if isempty(int_EC)
+        EC_set = uni_EC;
+    else
+        EC_set = int_EC;
+    end    
+    %MW: use uniprot codes + swissprot table:
+    for j = 1:length(uni_set)
+        index     = strcmpi(DBprot(:),uni_set{j});
+        minWeight = min(DBMW{index});
+        MW(i)     = MW(i) + minWeight;
+    end
+    %Add new codes as new possible isoenzymes:
+    uni{i}   = strjoin(uni_set,' ');
+    EC{i}    = strjoin(EC_set,' ');
+    Genes{i} = strjoin(genesCell,' ');
+end
+
+%Delete repeated Uniprots (and the corresponding ECs):
+[uni,deleted] = deleteRepeated(uni);
+EC(deleted)   = [];
+MW(deleted)   = [];
+
+end
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+function int_EC = intersection(prev_EC,new_EC)
+%Finds the common elements between two cell arrays, if any. Also considers
+%wildcards (e.g. if 'EC1.1.1.1' is in one array and 'EC1.1.1.-' is in the
+%other one, then 'EC1.1.1.1' is added to the intersection).
+int_EC = {};
+for i = 1:length(prev_EC)
+    for j = 1:length(new_EC)
+        new_int = compare_wild({prev_EC{i} new_EC{j}});
+        if length(new_int) == 1
+            int_EC = [int_EC new_int];
+        end
+    end
+end
+end
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+function int_EC = compare_wild(EC)
+%Goes through a cell array of EC numbers, and erases any repetitions,
+%considering also wildcards (e.g. will erase 'EC1.2.3.-' if 'EC1.2.3.4' is
+%already present).
+
+%Trim all EC numbers of wild cards (e.g. 'EC1.2.-.-' -> 'EC1.2.'):
+EC_trimmed = EC;
+for i = 1:length(EC)
+    %Add a final dot for avoiding issues (e.g. 'EC1.1.1.1' & 'EC1.1.1.12'):
+    ECi = [EC{i} '.'];
+    pos = strfind(ECi,'-');
+    if ~isempty(pos)
+        ECi = ECi(1:pos(1)-1);
+    end
+    EC_trimmed{i} = ECi;
+end
+
+%Compare all EC numbers between them to find if 1 fits in the other:
+non_repeated = true(1,length(EC));
+for i = 1:length(EC)-1
+    for j = i+1:length(EC)
+        ECi = EC_trimmed{i};
+        ECj = EC_trimmed{j};
+        %If ECj fits in ECi then ECj can be disregarded:
+        if strfind(ECi,ECj)
+            non_repeated(j) = false;
+        %Else, if ECi fits in ECj then ECi can be disregarded:
+        elseif strfind(ECj,ECi)
+            non_repeated(i) = false;
+        end
+    end
+end
+
+int_EC = EC(non_repeated);
+
+end
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\ No newline at end of file
diff --git a/geckomat/get_enzyme_data/getEnzymeCodes.m b/geckomat/get_enzyme_data/getEnzymeCodes.m
index fa3994bcd..4d71790d8 100644
--- a/geckomat/get_enzyme_data/getEnzymeCodes.m
+++ b/geckomat/get_enzyme_data/getEnzymeCodes.m
@@ -26,10 +26,11 @@
 % 
 % Benjamin Sanchez. Last edited: 2017-03-05
 % Ivan Domenzain.   Last edited: 2018-09-07
+% Johan Gustafsson  Last edited: 2021-07-02 Introduced optimizations from Gecko Light
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-function model_data = getEnzymeCodes(model,action)
+function model_data = getEnzymeCodes(model, action)
 
-if nargin<2
+if nargin<3
     action = 'display';
 end
 
@@ -58,7 +59,9 @@
 
 [m,n]      = size(model.S);
 substrates = cell(n,20);
+substrateIndices = zeros(n,20);
 products   = cell(n,20);
+productIndices   = zeros(n,20);
 uniprots   = cell(n,20);
 Genes      = cell(n,20);
 EC_numbers = cell(n,20);
@@ -67,6 +70,26 @@
 count      = zeros(4,1);
 rgmat      = full(model.rxnGeneMat);
 conflicts  = cell(1,4);
+
+%Build an index from gene to prot for faster processing later
+[x,y] = size(DBgenesSwissprot);
+genesForIndex = reshape(DBgenesSwissprot, x*y, 1);
+genesForIndex = genesForIndex(~cellfun(@isempty, genesForIndex));
+genesForIndex = unique(genesForIndex); %18360
+geneIndex = cell(length(genesForIndex),1);
+geneHashMap = containers.Map(genesForIndex,1:length(genesForIndex));
+protIndices = 1:length(DBgenesSwissprot(:,1));
+for i = 1:y
+    tmp1 = DBgenesSwissprot(:,i);
+    sel = ~cellfun(@isempty, tmp1);
+    indices = cell2mat(values(geneHashMap,tmp1(sel)));
+    protIndicesSel = protIndices(sel);
+    for j = 1:length(indices)
+        geneIndex{indices(j)} = [geneIndex{indices(j)};protIndicesSel(j)]; 
+    end
+end
+
+
 for i = 1:n
     ks  = 1;
     kp  = 1;
@@ -74,23 +97,32 @@
     inv = 0;
     DB = '';
     %Save the substrates and products (if rxn is reversible):
-    for j = 1:m
-        if model.S(j,i) < 0 && model.ub(i) > 0
-            substrates{i,ks} = model.metNames{j};
-            ks  = ks+1;
-            dir = 1;
-        elseif model.S(j,i) > 0 && model.lb(i) < 0
-            products{i,kp} = model.metNames{j};
-            kp  = kp+1;
-            inv = 1;
-        end
+    %This code was opimized (a loop through every element was replaced)
+    %It was tested and produces the same results, but roughly 2000 times faster
+    %This part executes in roughly 0.3 seconds for all genes, so it is no
+    %longer an issue regarding performance
+    if model.ub(i) > 0
+       sel = model.S(:,i) < 0;
+       sbstrs = model.metNames(sel); 
+       substrates(i,1:length(sbstrs)) = sbstrs;
+       ind = find(sel);
+       substrateIndices(i,1:length(ind)) = ind;
+       dir = length(sbstrs) > 0;
+    end
+    if model.lb(i) < 0
+       sel = model.S(:,i) > 0;
+       prds = model.metNames(sel); 
+       products(i,1:length(prds)) = prds;
+       ind = find(sel);
+       productIndices(i,1:length(ind)) = ind;
+       inv = length(prds) > 0;
     end
     %isrev(i) = 0 if rxn is blocked, = 1 if non-reversible, and = 2 if
     %reversible:
     isrev(i) = dir + inv;
     if ~isempty(model.grRules{i})
         %Find match in Swissprot:
-        [new_uni,new_EC,new_MW,newGene,multGenes] = findInDB(model.grRules{i},DBprotSwissprot,DBgenesSwissprot,DBecNumSwissprot,DBMWSwissprot);
+        [new_uni,new_EC,new_MW,newGene,multGenes] = findInDB(model.grRules{i},DBprotSwissprot,DBgenesSwissprot,DBecNumSwissprot,DBMWSwissprot,geneIndex,geneHashMap);
         if ~isempty(union_string(new_EC))
             count(1) = count(1) + isrev(i);
             DBase    = 'swissprot';
@@ -98,8 +130,8 @@
                 multGenes{3} = DBase;
             end
         else
-            %Find match in KEGG:
-            [new_uni,new_EC,new_MW,newGene,multGenes] = findInDB(model.grRules{i},DBprotKEGG,DBgenesKEGG,DBecNumKEGG,DBMWKEGG);
+            %Find match in KEGG (skipped optimizing this step) - may be good to fix this later to get rid of the findInDBOld function:
+            [new_uni,new_EC,new_MW,newGene,multGenes] = findInDBOld(model.grRules{i},DBprotKEGG,DBgenesKEGG,DBecNumKEGG,DBMWKEGG);
             if ~isempty(union_string(new_EC))
                 count(2) = count(2) + isrev(i);
                 DBase    = 'kegg';
@@ -157,7 +189,9 @@
 end
 model_data.model        = model;
 model_data.substrates   = substrates;
+model_data.substrateIndices   = substrateIndices; %for optimizing matchKcats
 model_data.products     = products;
+model_data.productIndices     = productIndices; %for optimizing matchKcats
 model_data.matchedGenes = Genes;
 model_data.uniprots     = uniprots;
 model_data.EC_numbers   = EC_numbers;
diff --git a/geckomat/get_enzyme_data/loadBRENDAdata.m b/geckomat/get_enzyme_data/loadBRENDAdata.m
index 2bb50024c..84537a976 100644
--- a/geckomat/get_enzyme_data/loadBRENDAdata.m
+++ b/geckomat/get_enzyme_data/loadBRENDAdata.m
@@ -15,11 +15,29 @@
          SAcell{i} = [];
      end
      previousEC = []; EC_indexes = [];
+     
+    %build an index on MW{1} to speed things up a bit
+    %first just extract the genus (i.e. the first part of the name)
+    MWECNum = upper(unique(MW{1}));
+    MWECNumIndices = cell(length(MWECNum),1);
+    MWECNumHashMap = containers.Map(MWECNum,1:length(MWECNum));
+    for i = 1:length(MW{1})
+         matchInd = cell2mat(values(MWECNumHashMap, MW{1}(i)));
+         MWECNumIndices{matchInd} = [MWECNumIndices{matchInd};i];
+    end
+
+     
      for i=1:length(SA{1})
          %Gets the indexes of the EC repetitions in the MW cell for every
          %new (different) EC
          if ~strcmpi(SA{1}(i), previousEC)
-             EC_indexes = find(strcmpi(SA{1}(i),MW{1}));
+             key = upper(SA{1}(i));
+             if isKey(MWECNumHashMap,key) %annoyingly, this seems to be needed
+                 matchInd = cell2mat(values(MWECNumHashMap,key));
+                 EC_indexes = MWECNumIndices{matchInd};
+             else
+                 EC_indexes = [];
+             end
          end
          mwEC{1} = MW{3}(EC_indexes); mwEC{2} = MW{4}(EC_indexes);
          % just looks for the first match because just the maximal value for
diff --git a/geckomat/get_enzyme_data/matchKcats.m b/geckomat/get_enzyme_data/matchKcats.m
index ab0da16c9..93901ac7f 100644
--- a/geckomat/get_enzyme_data/matchKcats.m
+++ b/geckomat/get_enzyme_data/matchKcats.m
@@ -1,9 +1,11 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- % kcats = matchKcats(model_data,org_name)
+ % kcats = matchKcats(model_data,org_name, minAcceptableKCat)
  % Matchs the model EC numbers and substrates to the BRENDA database, to
  % return the corresponding kcats for each reaction.
  %
 % INPUT:    Model data structure (generated by getECnumbers.m)
+%           minAcceptableKCat [optional] Set this value if you want to
+%           replace too low kcats with a minimum value (unit: s^-1)
  % OUTPUTS:  kcats, which contains:
  %           *forw.kcats:   kcat values for the forward reactions (mxn)
  %           *forw.org_s:   Number of matches for organism - substrate in
@@ -47,8 +49,15 @@
  % 
  % Benjamin J. Sanchez. Last edited: 2016-03-01
  % Ivan Domenzain.      Last edited: 2018-01-16
+ % Johan Gustafsson		Last edited: 2021-07-02 Introduced optimizations from GeckoLight
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- function kcats = matchKcats(model_data, org_name)
+ function kcats = matchKcats(model_data, org_name, minAcceptableKCat)
+ 
+ if nargin < 3
+     minAcceptableKCat = 0;
+ end
+ 
+ minAcceptableKCat = minAcceptableKCat * 3600;%convert to per hour
  
  fprintf('Matching kcats...')
  
@@ -60,9 +69,29 @@
  phylDistStruct =  KEGG_struct;
  %Get the KEGG code for the model's organism
  org_index      = find_inKEGG(org_name,phylDistStruct.names);
+ %build an index for genus in the phyl dist struct
+ %first just extract the genus (i.e. the first part of the name)
+ phylDistStruct.genus = cell(length(phylDistStruct.names),1);
+ for i = 1:length(phylDistStruct.genus)
+    name = phylDistStruct.names{i};
+    phylDistStruct.genus{i} = lower(name(1:(strfind(name,' ')-1))); %convert all to lower case to avoid problems with case
+ end
+ %create a map for the genuses
+ phylDistStruct.uniqueGenusList = unique(phylDistStruct.genus);
+ phylDistStruct.genusHashMap = containers.Map(phylDistStruct.uniqueGenusList,1:length(phylDistStruct.uniqueGenusList));
+ phylDistStruct.uniqueGenusIndices = cell(length(phylDistStruct.uniqueGenusList),1);
+ 
+ %Then for each genus create a list with indices to the names
+ for i = 1:length(phylDistStruct.genus)
+     matchInd = cell2mat(values(phylDistStruct.genusHashMap,phylDistStruct.genus(i)));
+     phylDistStruct.uniqueGenusIndices{matchInd} = [phylDistStruct.uniqueGenusIndices{matchInd};i];
+ end
+
  %Extract relevant info from model_data:
  substrates = model_data.substrates;
+ substrateIndices = model_data.substrateIndices;
  products   = model_data.products;
+ productIndices = model_data.productIndices;
  EC_numbers = model_data.EC_numbers;
  MWs        = model_data.MWs;
  model      = model_data.model;
@@ -75,6 +104,7 @@
  forw.rest_ns = zeros(mM,nM);
  forw.org_sa  = zeros(mM,nM);
  forw.rest_sa = zeros(mM,nM);
+ forw.wcLevel = NaN(mM,nM);
  back.kcats   = zeros(mM,nM);
  back.org_s   = zeros(mM,nM);
  back.rest_s  = zeros(mM,nM);
@@ -82,6 +112,7 @@
  back.rest_ns = zeros(mM,nM);
  back.org_sa  = zeros(mM,nM);
  back.rest_sa = zeros(mM,nM);
+ back.wcLevel = NaN(mM,nM);
  tot.queries  = 0;
  tot.org_s    = 0;
  tot.rest_s   = 0;
@@ -96,23 +127,39 @@
  tot.wc4      = 0;
  tot.matrix   = zeros(6,5);
  
+ %build an EC index to speed things up a bit - many of the ECs appear
+ %many times - unnecessary to compare them all
+ %so, here, each EC string appears only once, and you get a vector with
+ %indices to the rows in KCATcell
+ [ECIndexIds,~,ic] = unique(KCATcell{1});
+ EcIndexIndices = cell(length(ECIndexIds),1);
+ for i = 1:length(EcIndexIndices)
+     EcIndexIndices{i} = find(ic == i).';
+ end
+ 
+ 
+ 
  %Main loop: 
   for i = 1:mM
      %Match:
      for j = 1:nM
          EC = EC_numbers{i,j};
          MW = MWs(i,j);
+         if (isempty(EC))
+             break;
+         end
+         EC = strsplit(EC,' ');
          %Try to match direct reaction:
-         if ~isempty(EC) && ~isempty(substrates{i,1})
+         if ~isempty(substrates{i,1})
               [forw,tot] = iterativeMatch(EC,MW,substrates(i,:),i,j,KCATcell,...
                                           forw,tot,model,org_name,...
-                                          phylDistStruct,org_index,SAcell);
+                                          phylDistStruct,org_index,SAcell,ECIndexIds,EcIndexIndices, substrateIndices(i,:), minAcceptableKCat);
          end
          %Repeat for inverse reaction:
-         if ~isempty(EC) && ~isempty(products{i,1})
+         if ~isempty(products{i,1})
              [back,tot] = iterativeMatch(EC,MW,products(i,:),i,j,KCATcell,...
                                          back,tot,model,org_name,...
-                                         phylDistStruct,org_index,SAcell);
+                                         phylDistStruct,org_index,SAcell,ECIndexIds,EcIndexIndices, productIndices(i,:), minAcceptableKCat);
          end
      end
      %Display progress:
@@ -130,11 +177,10 @@
  end
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  function [dir,tot] =iterativeMatch(EC,MW,subs,i,j,KCATcell,dir,tot,model,...
-                                    name,phylDist,org_index,SAcell)
+                                    name,phylDist,org_index,SAcell,ECIndexIds,EcIndexIndices, subsIndices, minAcceptableKCat)
  %Will iteratively try to match the EC number to some registry in BRENDA,
  %using each time one additional wildcard.
  
- EC      = strsplit(EC,' ');
  kcat    = zeros(size(EC));
  origin  = zeros(size(EC));
  matches = zeros(size(EC));
@@ -145,7 +191,7 @@
          %Atempt match:
          [kcat(k),origin(k),matches(k)] = mainMatch(EC{k},MW,subs,KCATcell,...
                                                     model,i,name,phylDist,...
-                                                    org_index,SAcell);
+                                                    org_index,SAcell,ECIndexIds,EcIndexIndices,subsIndices, minAcceptableKCat);
          %If any match found, ends. If not, introduces one extra wild card and
          %tries again:
          if origin(k) > 0
@@ -180,6 +226,7 @@
      dir.org_sa(i,j)  = matches*(origin == 4);
      dir.rest_ns(i,j) = matches*(origin == 5);    
      dir.rest_sa(i,j) = matches*(origin == 6);
+     dir.wcLevel(i,j) = wc_num;
      tot.org_s        = tot.org_s   + (origin == 1);
      tot.rest_s       = tot.rest_s  + (origin == 2);
      tot.org_ns       = tot.org_ns  + (origin == 3);
@@ -199,48 +246,62 @@
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  
  function [kcat,origin,matches] = mainMatch(EC,MW,subs,KCATcell,model,i,...
-                                      name,phylDist,org_index,SAcell)
+                                      name,phylDist,org_index,SAcell,ECIndexIds,EcIndexIndices,subsIndices, minAcceptableKCat)
                                                                    
+ %First make the string matching. This takes time, so we only want to do
+ %this once:
+ %Relaxes matching if wild cards are present:
+ wild     = false;
+ wild_pos = strfind(EC,'-');
+ if ~isempty(wild_pos)
+     EC   = EC(1:wild_pos(1)-1);
+     wild = true;
+ end
+ stringMatchesEC_cell = extract_string_matches(EC,KCATcell{1},wild,ECIndexIds,EcIndexIndices);
+
  % Matching function prioritizing organism and substrate specificity when 
  % available.
  
  origin = 0;
  %First try to match organism and substrate:
  [kcat,matches] = matchKcat(EC,MW,subs,KCATcell,name,true,false,model,i,...
-                            phylDist,org_index,SAcell);                      
+                            phylDist,org_index,SAcell,stringMatchesEC_cell,[],subsIndices, minAcceptableKCat);                      
  if matches > 0
      origin = 1;
  %If no match, try the closest organism but match the substrate:
  else   
     [kcat,matches] = matchKcat(EC,MW,subs,KCATcell,'',true,false,model,i,...
-                               phylDist,org_index,SAcell);
+                               phylDist,org_index,SAcell,stringMatchesEC_cell,[],subsIndices, minAcceptableKCat);
      if matches > 0
          origin = 2;
      %If no match, try to match organism but with any substrate:
      else
          [kcat,matches] = matchKcat(EC,MW,subs,KCATcell,name,false,false,...
-                                    model,i,phylDist,org_index,SAcell);
+                                    model,i,phylDist,org_index,SAcell,stringMatchesEC_cell,[],subsIndices, minAcceptableKCat);
          if matches > 0
              origin = 3;
          %If no match, try to match organism but for any substrate (SA*MW):
          else
+              %create matching index for SA, has not been needed until now
+              stringMatchesSA = extract_string_matches(EC,SAcell{1},wild,[],[]);
+             
               [kcat,matches] = matchKcat(EC,MW,subs,KCATcell,name,false,...
                                          true,model,i,phylDist,org_index,...
-                                         SAcell);
+                                         SAcell,stringMatchesEC_cell,stringMatchesSA,subsIndices, minAcceptableKCat);
               if matches > 0
                   origin = 4; 
              %If no match, try any organism and any substrate:
               else
                  [kcat,matches] = matchKcat(EC,MW,subs,KCATcell,'',false,...
                                             false,model,i,phylDist,...
-                                            org_index,SAcell);
+                                            org_index,SAcell,stringMatchesEC_cell,stringMatchesSA,subsIndices, minAcceptableKCat);
                  if matches > 0
                      origin = 5;
                  %Again if no match, look for any org and SA*MW    
                   else
                       [kcat,matches] = matchKcat(EC,MW,subs,KCATcell,'',...
                                                  false,true,model,i,phylDist,...
-                                                 org_index, SAcell);
+                                                 org_index,SAcell,stringMatchesEC_cell,stringMatchesSA,subsIndices, minAcceptableKCat);
                       if matches > 0
                           origin = 6;
                       end
@@ -254,45 +315,61 @@
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  function [kcat,matches] = matchKcat(EC,MW,subs,KCATcell,organism,...
                                      substrate,SA,model,i,phylDist,...
-                                                         org_index,SAcell)
+                                     org_index,SAcell,KCATcellMatches,SAcellMatches,subsIndices,minAcceptableKCat)
                   
  %Will go through BRENDA and will record any match. Afterwards, it will
  %return the average value and the number of matches attained.
  kcat    = [];
  matches = 0;
- %Relaxes matching if wild cards are present:
- wild     = false;
- wild_pos = strfind(EC,'-');
- if ~isempty(wild_pos)
-     EC   = EC(1:wild_pos(1)-1);
-     wild = true;
- end
-
+ 
  if SA
-     EC_indexes = extract_indexes(EC,SAcell{1},[],SAcell{2},subs,substrate,...
-                                  organism,org_index,phylDist,wild);
+     %SAcell{1},wild,[],[]
+     EC_indexes = extract_indexes(SAcellMatches,[],SAcell{2},subs,substrate,...
+                                  organism,org_index,phylDist);
+
      kcat       = SAcell{3}(EC_indexes);
      org_cell   = SAcell{2}(EC_indexes);
      MW_BRENDA  = SAcell{4}(EC_indexes);
+     
+     %to handle bad kcat values that totally dominate the modeling, we do
+     %not accept a lower kcat than 1 s^-1, i.e. 3600 h^-1
+     %need to handle this in several places, since it is sometimes modified
+     %for stoichiometry
+     kcat(kcat < minAcceptableKCat) = minAcceptableKCat;
  else
-     EC_indexes = extract_indexes(EC,KCATcell{1},KCATcell{2},KCATcell{3},...
+     %KCATcell{1},wild,ECIndexIds,EcIndexIndices
+     EC_indexes = extract_indexes(KCATcellMatches,KCATcell{2},KCATcell{3},...
                                   subs,substrate,organism,org_index,...
-                                                      phylDist,wild);
+                                  phylDist);
      if substrate
          for j = 1:length(EC_indexes)
              indx = EC_indexes(j);
              for k = 1:length(subs)
-                 l = logical(strcmpi(model.metNames,subs{k}).*(model.S(:,i)~=0));
+                 if (isempty(subs{k}))
+                     break;
+                 end
+                 %l = logical(strcmpi(model.metNames,subs{k}).*(model.S(:,i)~=0)); %I don't understand the .* (model.S(:,i)~=0) part, it shouldn't be needed?/JG;
+                 l = subsIndices(k);
                  if ~isempty(subs{k}) && strcmpi(subs{k},KCATcell{2}(indx))
                      if KCATcell{4}(indx) > 0 
                          coeff = min(abs(model.S(l,i)));
-                         kcat  = [kcat;KCATcell{4}(indx)/coeff];
+                         kCatTmp = KCATcell{4}(indx);
+                         %to handle bad kcat values that totally dominate the modeling, we do
+                         %not accept a lower kcat than 1 s^-1, i.e. 3600 h^-1
+                         %need to handle this in several places, since it is sometimes modified
+                         %for stoichiometry
+                         if kCatTmp < minAcceptableKCat
+                             kCatTmp = minAcceptableKCat;
+                         end
+                         
+                         kcat  = [kcat;kCatTmp/coeff];
                      end
                  end
              end
          end
      else
          kcat = KCATcell{4}(EC_indexes);
+         kcat(kcat < minAcceptableKCat) = minAcceptableKCat;
      end
  end                         
  %Return maximum value:
@@ -314,34 +391,64 @@
      kcat = 1E7*3600;
  end
  end
+ 
+ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+ %Make the string matches of the ECs. This is heavy, so only do it once! 
+ %
+ function EC_indexes = extract_string_matches(EC,EC_cell,wild,ECIndexIds,EcIndexIndices)
+ EC_indexes = [];
+ EC_indexesOld = [];
+     if wild
+        if (~isempty(ECIndexIds)) %In some cases the EC_cell is not from KCatCell
+            X = find(contains(ECIndexIds, EC));
+            for j = 1:length(X)
+                EC_indexes = [EC_indexes,EcIndexIndices{X(j)}];
+            end
+        else %Not optimized
+           for j=1:length(EC_cell)
+                if strfind(EC_cell{j},EC)==1
+                    EC_indexes = [EC_indexes,j];
+                end
+           end         
+        end
+     else
+         if (~isempty(ECIndexIds)) %In some cases the EC_cell is not from KCatCell
+             mtch = find(strcmpi(EC,ECIndexIds));
+             if (~isempty(mtch))
+                 EC_indexes = EcIndexIndices{mtch};
+             end
+         else %%Not optimized 
+             if ~isempty(EC_cell)
+                EC_indexes = transpose(find(strcmpi(EC,EC_cell)));
+             end
+         end
+     end
+
+ end
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  %Extract the indexes of the entries in the BRENDA data that meet the 
  %conditions specified by the search criteria
- function EC_indexes = extract_indexes(EC,EC_cell,subs_cell,orgs_cell,subs,...
+ function EC_indexes = extract_indexes(EC_indCellStringMatches,subs_cell,orgs_cell,subs,...
                                        substrate,organism, org_index,...
-                                       phylDist,wild)
+                                       phylDist)
+ 
+ EC_indexes = EC_indCellStringMatches;%reuse so the string comparisons are not run many times
  
- EC_indexes = [];
- if wild
-   for j=1:length(EC_cell)
-        if strfind(EC_cell{j},EC)==1
-            EC_indexes = [EC_indexes,j];
-        end
-    end   
- else
-    EC_indexes = transpose(find(strcmpi(EC,EC_cell)));  
- end
  %If substrate=true then it will extract only the substrates appereances 
  %indexes in the EC subset from the BRENDA cell array
+ 
  if substrate
-     Subs_indexes = [];
-     for l = 1:length(subs)
-         if ~isempty(subs(l))
+     if (~isempty(EC_indexes)) %optimization
+         Subs_indexes = [];
+         for l = 1:length(subs)
+             if (isempty(subs{l}))
+                 break;
+             end
              Subs_indexes = horzcat(Subs_indexes,EC_indexes(strcmpi(subs(l),...
                                     subs_cell(EC_indexes))));          
          end
+         EC_indexes = Subs_indexes;    
      end
-     EC_indexes = Subs_indexes;    
  end
  
  EC_orgs = orgs_cell(EC_indexes);
@@ -359,7 +466,6 @@
      %on the BRENDA cell array it should search for a KEGG code for each of 
      %these 
      for j=1:length(EC_indexes)
- 
          %Assigns a KEGG index for those found on the KEGG struct
          orgs_index = find(strcmpi(orgs_cell(EC_indexes(j)),phylDist.names),1);
          if ~isempty(orgs_index)
@@ -367,17 +473,17 @@
              temp         = [temp;EC_indexes(j)];
          %For values related to organisms without KEGG code, then it will
          %look for KEGG code for the first organism with the same genus
-         else
-             k=1;
-             while isempty(orgs_index) && k<length(phylDist.names)
-                 str = phylDist.names{k};
-                 org = orgs_cell{EC_indexes(j)};
-                 if strcmpi(org(1:strfind(org,' ')-1),str(1:strfind(str,' ')-1))
-                     orgs_index   = k; 
-                     KEGG_indexes = [KEGG_indexes;k];
-                     temp         = [temp;EC_indexes(j)];
-                 end
-                 k = k+1;
+         else 
+             org = orgs_cell{EC_indexes(j)};
+             orgGenus = lower(org(1:(strfind(org,' ')-1)));
+             if isKey(phylDist.genusHashMap,orgGenus) %annoyingly, this seems to be needed
+                 matchInd = cell2mat(values(phylDist.genusHashMap,{orgGenus}));
+                 matches = phylDist.uniqueGenusIndices{matchInd};
+                 k = matches(1);
+                 k2 = k;%tmp, remove later
+                 orgs_index   = k; 
+                 KEGG_indexes = [KEGG_indexes;k];
+                 temp         = [temp;EC_indexes(j)];
              end
          end
      end
@@ -386,13 +492,10 @@
      %Looks for the taxonomically closest organism and saves the index of
      %its appearences in the BRENDA cell
      if ~isempty(EC_indexes)
-         distances  = num2cell(phylDist.distMat(org_index,:));
-         distances  = distances(KEGG_indexes);
-         EC_indexes = EC_indexes(cell2mat(distances) ==...
-                                                 min(cell2mat(distances)));                     
+         distances = phylDist.distMat(org_index,KEGG_indexes);
+         EC_indexes = EC_indexes(distances == min(distances));
      end
  end
- 
  end 
  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  function org_index = find_inKEGG(org_name,names)