Skip to content

Commit

Permalink
custom dnaapler all
Browse files Browse the repository at this point in the history
  • Loading branch information
gbouras13 committed Feb 5, 2024
1 parent a296b9b commit a140f95
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 5 deletions.
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# History

# 0.7.0 (2024-02-05)

* Adds `-c/--custom_db` with `dnaapler all` to allow specifying custom databases with `dnaapler all`.

# 0.6.0 (2024-01-31)

* Fixes bug where if the starting gene (dnaA/terL/repA) was on the reverse strand and the top BLAST hit did not find the start codon, it would reorient the replicon to begin at the end of the starting gene, not the start. Thanks @susiegriggo
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "dnaapler"
version = "0.6.0" # change VERSION too
version = "0.7.0" # change VERSION too
description = "Reorients assembled microbial sequences"
authors = ["George Bouras <[email protected]>"]
license = "MIT"
Expand Down
60 changes: 58 additions & 2 deletions src/dnaapler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,13 @@ def phage(
type=click.Path(),
required=True,
)
@click.option(
"--ignore",
default="",
help="Text file listing contigs (one per row) that are to be ignored",
type=click.Path(),
show_default=False,
)
@autocomplete_options
def custom(
ctx,
Expand All @@ -356,6 +363,7 @@ def custom(
custom_db,
autocomplete,
seed_value,
ignore,
**kwargs,
):
"""Reorients your genome with a custom database"""
Expand Down Expand Up @@ -668,6 +676,13 @@ def bulk(
help="Lets you choose a subset of databases rather than all 3. Must be one of: 'all', 'dnaa', 'repa', terl', 'dnaa,repa', 'dnaa,terl' or 'repa,terl' ",
show_default=True,
)
@click.option(
"-c",
"--custom_db",
default="",
help="FASTA file with amino acids that will be used as a custom blast database to reorient your sequence however you want.",
type=click.Path(),
)
@autocomplete_options
def all(
ctx,
Expand All @@ -681,6 +696,7 @@ def all(
seed_value,
ignore,
db,
custom_db,
**kwargs,
):
"""Reorients contigs to begin with any of dnaA, repA or terL"""
Expand All @@ -706,6 +722,10 @@ def all(
elif db == "repa,terl":
gene = "repA,terL"

# custom
if custom_db != "":
gene = "custom"

# initial logging etc
start_time = begin_dnaapler(input, output, threads, gene)

Expand All @@ -726,8 +746,37 @@ def all(
logger.info(f"You have specified contigs to ignore in {ignore}.")
exists_contains_txt = validate_ignore_file(ignore)

if gene == "custom":
# validates custom fasta input for database
validate_custom_db_fasta(Path(custom_db))

# make db
db_dir = os.path.join(output, "custom_db")
Path(db_dir).mkdir(parents=True, exist_ok=True)
custom_db_fasta = os.path.join(db_dir, "custom_db.faa")
shutil.copy2(custom_db, custom_db_fasta)

logdir = Path(f"{output}/logs")

# custom db
# make custom db
custom_database = os.path.join(db_dir, "custom_db")
makeblastdb = ExternalTool(
tool="makeblastdb",
input=f"-in {custom_db_fasta}",
output=f"-out {custom_database}",
params="-dbtype prot ",
logdir=logdir,
)

ExternalTool.run_tool(makeblastdb, ctx)
else:
custom_db = None

# runs bulk BLAST
run_bulk_blast(ctx, input, output, prefix, gene, evalue, threads, custom_db=None)
run_bulk_blast(
ctx, input, output, prefix, gene, evalue, threads, custom_db=custom_db
)

# rerorients blast
blast_file = os.path.join(output, f"{prefix}_blast_output.txt")
Expand All @@ -746,7 +795,14 @@ def all(
ignore_list = list(ignore_dict)

all_process_blast_output_and_reorient(
input, blast_file, output, prefix, ignore_list, autocomplete, seed_value
input,
blast_file,
output,
prefix,
ignore_list,
autocomplete,
seed_value,
custom_db=custom_db,
)

# end dnaapler
Expand Down
2 changes: 1 addition & 1 deletion src/dnaapler/utils/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.6.0
0.7.0
4 changes: 4 additions & 0 deletions src/dnaapler/utils/all.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def all_process_blast_output_and_reorient(
ignore_list: Path,
autocomplete: str,
seed_value: int,
custom_db: str,
) -> None:
"""Processes the blast output,reorients and saves all contigs into os.path.join(output, f"{prefix}_reoriented.fasta")
Expand Down Expand Up @@ -231,6 +232,9 @@ def all_process_blast_output_and_reorient(
# for phages
if "phrog" in filtered_df["sseqid"][0]:
gene = "terL"
# custom
if custom_db is not None:
gene = "custom"

# update the record description to contain 'rotated=True' akin to how unicycler does it
record.description = (
Expand Down
10 changes: 9 additions & 1 deletion tests/test_overall.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,13 +196,21 @@ def test_bulk_custom(tmp_dir):
exec_command(cmd)


def test_all(tmp_dir):
def test_all_custom(tmp_dir):
"""test all"""
input_fasta: Path = f"{overall_test_data}/all_test.fasta"
cmd = f"dnaapler all -i {input_fasta} -o {tmp_dir} -t 1 -f"
exec_command(cmd)


def test_custom(tmp_dir):
"""test custom"""
input_fasta: Path = f"{overall_test_data}/all_test.fasta"
custom: Path = f"{test_data}/fake_custom.faa"
cmd = f"dnaapler all -i {input_fasta} -o {tmp_dir} -t 1 -f -c {custom}"
exec_command(cmd)


def test_all_dnaa_repa(tmp_dir):
"""test all dnaa repa"""
input_fasta: Path = f"{overall_test_data}/all_test.fasta"
Expand Down

0 comments on commit a140f95

Please sign in to comment.