Skip to content

Commit

Permalink
Merge branch 'main' into stable
Browse files Browse the repository at this point in the history
  • Loading branch information
guillaume-gricourt committed Aug 26, 2022
2 parents 0d49b04 + 75c7dd2 commit 828d1b0
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 34 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## [2.2.1](https://github.com/brsynth/rpfbagr/tree/2.2.1) (2022-08-25)

[Full Changelog](https://github.com/brsynth/rpfbagr/compare/2.2.0...2.2.1)

## [2.2.0](https://github.com/brsynth/rpfbagr/tree/2.2.0) (2022-08-24)

[Full Changelog](https://github.com/brsynth/rpfbagr/compare/2.1.0...2.2.0)
Expand Down
17 changes: 1 addition & 16 deletions src/rpfbagr/__main__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import argparse
import getpass
import logging
import os
import sys
import tempfile

os.environ["XDG_CACHE_HOME"] = tempfile.TemporaryDirectory().name
from rpfbagr.medium import associate_flux_env, load_medium
from rpfbagr.metabolic import gene_ko, gene_ou
from rpfbagr.preprocess import build_model, genes_annotate, save_results
Expand Down Expand Up @@ -158,18 +155,6 @@ def main():
logger.debug("Create out directory: %s")
os.makedirs(os.path.dirname(args.output_file_tsv))

try:
getpass.getuser()
except Exception as e:
if args.email:
os.environ["USERNAME"] = args.email
else:
logger.error(str(e))
logger.error(
"A login name must be provided for Cameo with --email argument"
)
parser.exit(1)

# Load model
logger.info("Build model")
model = build_model(
Expand Down Expand Up @@ -225,7 +210,7 @@ def main():
if res is not None:
if args.email and args.strategy == "ko":
logger.info("Perform gene annotation")
res = genes_annotate(model=model, df=res, email=args.email)
res = genes_annotate(model=model, df=res, email=args.email, logger=logger)
logger.info("Save results")
if args.output_file_csv:
save_results(res, path=args.output_file_csv, sep=",")
Expand Down
2 changes: 1 addition & 1 deletion src/rpfbagr/_version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__app_name__ = "rpfbagr"
__version__ = "2.2.0"
__version__ = "2.2.1"
45 changes: 28 additions & 17 deletions src/rpfbagr/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import ast
import logging
import time
from typing import Dict

import pandas as pd
Expand Down Expand Up @@ -44,10 +45,16 @@ def build_model(
return model


def genes_annotate(model: Model, df: pd.DataFrame, email: str) -> pd.DataFrame:
def genes_annotate(
model: Model,
df: pd.DataFrame,
email: str,
logger: logging.Logger,
) -> pd.DataFrame:

if df.empty:
return df
is_ncbi_error = False
Entrez.email = email
cache: Dict[str, str] = {}
for ix in df.index:
Expand All @@ -64,26 +71,30 @@ def genes_annotate(model: Model, df: pd.DataFrame, email: str) -> pd.DataFrame:
model_gene = model.genes.get_by_id(gene)
ncbi_gene = model_gene.annotation.get("ncbigene", "")
if gene not in cache.keys():
if ncbi_gene == "":
label = gene
else:
hd = Entrez.esummary(db="gene", id=ncbi_gene)
rec = Entrez.read(hd, validate=False)
rec = rec.get("DocumentSummarySet", {})
rec = rec.get("DocumentSummary", [])
label = gene
if len(rec) > 0:
name = rec[0].get("Name", "")
name = name.replace(",", "")
desc = rec[0].get("Description", "")
desc = desc.replace(",", "")
syn = rec[0].get("OtherAliases", "")
syn = syn.replace(",", "")
label = "%s=%s - %s" % (name, syn, desc)
label = gene
if ncbi_gene != "":
try:
hd = Entrez.esummary(db="gene", id=ncbi_gene)
rec = Entrez.read(hd, validate=False)
rec = rec.get("DocumentSummarySet", {})
rec = rec.get("DocumentSummary", [])
if len(rec) > 0:
name = rec[0].get("Name", "")
name = name.replace(",", "")
desc = rec[0].get("Description", "")
desc = desc.replace(",", "")
syn = rec[0].get("OtherAliases", "")
syn = syn.replace(",", "")
label = "%s=%s - %s" % (name, syn, desc)
time.sleep(2)
except Exception:
is_ncbi_error = True
cache[gene] = label
labels.append(cache[gene])
labels_groups.append("(%s)" % (",".join(labels),))
df.at[ix, "genes_annotation"] = ",".join(labels_groups)
if is_ncbi_error:
logger.warning("NCBI annotation failing for some items")
return df


Expand Down

0 comments on commit 828d1b0

Please sign in to comment.