Skip to content

Commit

Permalink
allow anchor selection directly in read_anchors(), change ft.loc_rang…
Browse files Browse the repository at this point in the history
…e ot ft.locs.range
  • Loading branch information
trichter committed Nov 8, 2024
1 parent 49982b6 commit 0f0f2b8
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 20 deletions.
19 changes: 10 additions & 9 deletions anchorna/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from sugar import read
from anchorna.core import combine, cutout, find_my_anchors
from anchorna.io import export_dialign, export_jalview, read_anchors, load_selected_anchors
from anchorna.io import export_dialign, export_jalview, read_anchors
from anchorna.util import _apply_mode


Expand Down Expand Up @@ -108,7 +108,7 @@ def _tutorial_seqs(subset=False):
seqs = read(files('anchorna.tests.data').joinpath('pesti56.gff.zip'))
if subset:
seqs = seqs[18:28]
start, stop = zip(*[seq.fts.get('cds').loc_range for seq in seqs])
start, stop = zip(*[seq.fts.get('cds').locs.range for seq in seqs])
start = max(start)
stop = min(stop)
stop = start + (stop-start) // 3 * 3 + 6
Expand Down Expand Up @@ -169,7 +169,7 @@ def _cmd_go(fname, fname_anchor, pbar=True, continue_with=None,
anchors.write(fname_anchor)

def _cmd_print(fname_anchor, verbose=False, mode='aa'):
anchors = load_selected_anchors(fname_anchor)
anchors = read_anchors(fname_anchor)
if anchors.no_cds:
mode = 'aa'
try:
Expand All @@ -183,7 +183,7 @@ def _cmd_load(fname_anchor):
def _cmd_export(fname_anchor, out, mode='aa', score_use_fluke=None, fmt='gff',
fname=None):
assert mode in ('nt', 'cds', 'aa')
anchors = load_selected_anchors(fname_anchor)
anchors = read_anchors(fname_anchor)
if anchors.no_cds:
mode = 'aa'
if fmt in ('jalview', 'dialign'):
Expand Down Expand Up @@ -240,17 +240,17 @@ def _cmd_view(fname_anchor, fname, mode='aa', align=None, score_use_fluke=None):
subprocess.run(f'jalview {fname_seq} --features {fname_export}'.split())

def _cmd_combine(fname_anchor, out):
lot_of_anchors = [load_selected_anchors(fn) for fn in fname_anchor]
lot_of_anchors = [read_anchors(fn) for fn in fname_anchor]
anchors = combine(lot_of_anchors)
anchors.write(out)

def _cmd_cutout(fname, fname_anchor, pos1, pos2, out, fmt, mode='nt', score_use_fluke=None):
def _cmd_cutout(fname, fname_anchor, pos1, pos2, out, fmt, mode='nt', score_use_fluke=None, gap=None):
assert mode in ('nt', 'cds', 'aa')
seqs = read(fname)
anchors = load_selected_anchors(fname_anchor)
anchors = read_anchors(fname_anchor)
if anchors.no_cds:
mode = 'aa'
seqs2 = cutout(seqs, anchors, pos1, pos2, mode=mode, score_use_fluke=score_use_fluke)
seqs2 = cutout(seqs, anchors, pos1, pos2, mode=mode, score_use_fluke=score_use_fluke, gap=gap)
if out is None:
print(seqs2.tofmtstr(fmt or 'fasta'))
else:
Expand Down Expand Up @@ -342,7 +342,7 @@ def run_cmdline(cmd_args=None):
'Each position has 3 parts ABC where B and C are optional. '
'Part A: Is a number or number prepended with letter a to specify the anchor number, '
'use special words "start" and "end" for start or end of sequence, '
'use special words "ATG" and "*" for start or stop codon of sequence (only allowed in mode "seq") '
'use special words "ATG" and "*" for start or stop codon of sequence (only allowed in mode "nt") '
'Part B: One of the characters <, >, ^, for start, end or middle of word (anchor) specified in A, '
'default is < for the first anchor and > for the second anchor, must be omitted for A=start or A=end. '
'Part C: Additional character offset in the form +X or -X. '
Expand Down Expand Up @@ -400,6 +400,7 @@ def run_cmdline(cmd_args=None):

p_cutout.add_argument('-o', '--out', help='output file name (by default prints fasta to stdout)')
p_cutout.add_argument('--fmt', help='output format (default: autodetect from file extension)')
p_cutout.add_argument('--gap', help='specify gap character(s), allow gaps in sequences (default: no special handling for gaps)')

p_print.add_argument('fname_anchor', help='anchor file name')
p_print.add_argument('-v', '--verbose', help=msg, action='store_true')
Expand Down
4 changes: 2 additions & 2 deletions anchorna/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ def _transform_cutout_index(A, B, C, id_, seq, mode):
return i


def cutout(seqs, anchors, pos1, pos2, mode='nt', score_use_fluke=None):
def cutout(seqs, anchors, pos1, pos2, mode='nt', score_use_fluke=None, gap=None):
"""
Cutout subsequences from pos1 to pos2 (i.e. between two anchors)
Expand All @@ -320,7 +320,7 @@ def cutout(seqs, anchors, pos1, pos2, mode='nt', score_use_fluke=None):
continue
i = _transform_cutout_index(la, lb, lc, id_, seqs[id_], mode)
j = _transform_cutout_index(ra, rb, rc, id_, seqs[id_], mode)
seq2 = seqs[id_][i:j]
seq2 = seqs[id_].sl(gap=gap)[i:j]
if mode == 'nt':
seq2.meta.offset = i
seq2.meta.pop('fts', None)
Expand Down
15 changes: 9 additions & 6 deletions anchorna/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def write_anchors(anchors, fname, mode=None):
fts.write(fname, 'gff', header=header)


def read_anchors(fname, check_header=True):
def _read_anchors(fname, check_header=True):
"""
Read anchors from GFF file
Expand Down Expand Up @@ -99,16 +99,19 @@ def _parse_selection(anchors, selection):
return anchors2


def load_selected_anchors(fname):
def read_anchors(fname, check_header=True):
"""
Read anchors and select or remove some of them
Read anchors from GFF file
See ``anchorna combine -h``
Offsets are restored from comments.
Additionally, anchors can be selected and/or removed with a special syntax,
see ``anchorna combine -h``
"""
fname = str(fname)
if '|' not in fname:
return read_anchors(fname)
return _read_anchors(fname, check_header=check_header)
fname, selection = fname.split('|', 1)
anchors = read_anchors(fname.strip())
anchors = _read_anchors(fname.strip(), check_header=check_header)
selection = selection.lower()
if '|' not in selection:
return _parse_selection(anchors, selection)
Expand Down
4 changes: 2 additions & 2 deletions anchorna/tests/test_anchorna.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from anchorna import cutout, read_anchors
from anchorna.cli import run_cmdline
from anchorna.io import load_json, load_selected_anchors, write_json
from anchorna.io import load_json, write_json


_IDS = ( # Representative sequences of pesti virus
Expand Down Expand Up @@ -147,7 +147,7 @@ def test_anchorna_workflow_subset():
assert '' == check(f'anchorna cutout anchors.gff a6> a10< -o {fname}')
assert '' == check(f'anchorna go --fname {fname} --no-pbar anchors_cutout2.gff --search-range=1000')
assert '' == check('anchorna combine anchors.gff||a7:a10 anchors_cutout2.gff -o anchors_combined2.gff')
assert read_anchors('anchors_combined2.gff') == load_selected_anchors('anchors.gff')
assert read_anchors('anchors_combined2.gff') == read_anchors('anchors.gff')

# check --no-remove option and --continue-with option
assert '' == check('anchorna go --no-remove --no-pbar anchors2.gff')
Expand Down
2 changes: 1 addition & 1 deletion anchorna/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def fts2anchors(fts, no_cds=False):
if ft.type in ('anchor', 'fluke'):
if not ft.meta.name.startswith(aname):
raise ValueError(f'Fluke with name {ft.meta.name} not part of anchor {aname}')
start, stop = ft.loc_range
start, stop = ft.locs.range
fluke = Fluke(seqid=ft.seqid, score=ft.meta.score, start=start, stop=stop,
word=ft.meta._gff.word, poor=hasattr(ft.meta._gff, 'poor'),
median_score = float(ft.meta._gff.get('median_score', 1)))
Expand Down

0 comments on commit 0f0f2b8

Please sign in to comment.