Skip to content

Commit

Permalink
--longreads option added for FASTQ long reads inputs such as PacBio, …
Browse files Browse the repository at this point in the history
…ONT, etc. Gives better mapping results
  • Loading branch information
kbessonov1984 committed Nov 7, 2024
1 parent 6e38b40 commit 3963d92
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 5 deletions.
7 changes: 7 additions & 0 deletions ectyper/commandLineOptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ def checkdbversion():
nargs="+"
)

parser.add_argument(
"--longreads",
action="store_true",
default=False,
help="Enable for raw long reads FASTQ inputs (ONT, PacBio, other sequencing platforms). [default %(default)s]"
)

parser.add_argument(
"--maxdirdepth",
help="Maximum number of directories to descend when searching an input directory of files [default %(default)s levels]. Only works on path inputs not containing '*' wildcard",
Expand Down
8 changes: 5 additions & 3 deletions ectyper/genomeFunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def create_bowtie_base(temp_dir, reference, cores):
return bowtie_base


def assemble_reads(reads, bowtie_base, combined_fasta, temp_dir, cores=1):
def assemble_reads(reads, bowtie_base, combined_fasta, temp_dir, cores=1, longreads=False):
"""
Assembles fastq reads to the specified reference file.
:param reads: The fastq file to assemble
Expand All @@ -232,14 +232,15 @@ def assemble_reads(reads, bowtie_base, combined_fasta, temp_dir, cores=1):
bowtie_run = [
'bowtie2',
'--threads',f'{cores}',
'--local',
'--score-min L,1,-0.5',
'--np 5',
'--no-unal',
'-x', bowtie_base,
'-U', reads,
'-S', sam_reads
]
if longreads == True: #for nanopore reads do local alignment as long reads are longer than references
bowtie_run.append('--local')

subprocess_util.run_subprocess(bowtie_run)

Expand Down Expand Up @@ -377,7 +378,8 @@ def assemble_fastq(raw_files_dict, temp_dir, combined_fasta, bowtie_base, args):
bowtie_base=bowtie_base,
combined_fasta=combined_fasta,
temp_dir=temp_dir,
cores=cores)
cores=cores,
longreads=args.longreads)

all_fasta_files_dict = dict.fromkeys(raw_files_dict['fasta']) #add assembled genomes as new keys
with Pool(processes=args.cores) as pool:
Expand Down
2 changes: 1 addition & 1 deletion test/test_O_serotyping.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def test_Ecoli_O17H18(caplog):
rows = outfp.readlines()
secondrow=rows[1:][0] #check only second row
assert "Escherichia coli" in secondrow.split('\t')
assert "O17/O77/O44/O106\tH18\tO17/O77/O44/O106:H18\tWARNING MIXED O-TYPE" in secondrow
assert "O17/O44/O77/O106\tH18\tO17/O44/O77/O106:H18\tWARNING MIXED O-TYPE" in secondrow

def test_download_refseq_mash(caplog, tmpdir):
caplog.set_level(logging.DEBUG)
Expand Down
2 changes: 1 addition & 1 deletion test/test_complex_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_multiple_inputs(caplog):
output_tsv_lines = fp.readlines()
with open(output_blastn_antigens) as fp:
output_blastn_antigens_lines = fp.readlines()
assert any([True if 'O17/O77/O44/O106:H18' in line else False for line in output_tsv_lines]), "No matches of 'O17/O77/O44/O106:H18' serotype"
assert any([True if 'O17/O44/O77/O106:H18' in line else False for line in output_tsv_lines]), "No matches of 'O17/O44/O77/O106:H18' serotype"
assert any([True if 'O28/O42:H25' in line else False for line in output_tsv_lines]), "No matches of 'O28/O42:H25' serotype"
assert any([True if 'EscherichiaO17H18' in line else False for line in output_blastn_antigens_lines]), "No matches of 'EscherichiaO17H18' in BLAST output"
assert any([True if 'EscherichiaO28H5' in line else False for line in output_blastn_antigens_lines]), "No matches of 'EscherichiaO28H5' in BLAST output"
Expand Down

0 comments on commit 3963d92

Please sign in to comment.