Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add threeLetter SEQRES #1950

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions prody/atomic/atomic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@
except:
continue

CORE_AAMAP = AAMAP = {
'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLN': 'Q',
'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K',
'MET': 'M', 'PHE': 'F', 'PRO': 'P', 'SER': 'S', 'THR': 'T', 'TRP': 'W',
'TYR': 'Y', 'VAL': 'V'
}

invAAMAP = dict((v, k) for k, v in CORE_AAMAP.items())

AAMAP = {
'ALA': 'A', 'ARG': 'R', 'ASN': 'N', 'ASP': 'D', 'CYS': 'C', 'GLN': 'Q',
'GLU': 'E', 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LEU': 'L', 'LYS': 'K',
Expand Down Expand Up @@ -247,12 +256,20 @@ def getSequence(self, **kwargs):
residues (e.g. water molecules) in the chain and **X** will be used for
non-standard residue names."""

threeLetter = kwargs.get('threeLetter', False)

get = AAMAP.get
if hasattr(self, 'getResnames'):
seq = ''.join([get(res, 'X') for res in self.getResnames()])
if threeLetter:
seq = ' '.join(self.getResnames())
else:
seq = ''.join([get(res, 'X') for res in self.getResnames()])
else:
res = self.getResname()
seq = get(res, 'X')
if threeLetter:
seq = res
else:
seq = get(res, 'X')

return seq

Expand Down
13 changes: 10 additions & 3 deletions prody/atomic/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@

__all__ = ['Chain']

def getSequence(resnames):
def getSequence(resnames, **kwargs):
"""Returns polypeptide sequence as from list of *resnames* (residue
name abbreviations)."""

threeLetter = kwargs.get('threeLetter', False)
if threeLetter:
return ' '.join(resnames)

get = AAMAP.get
return ''.join([get(rn, 'X') for rn in resnames])

Expand Down Expand Up @@ -134,13 +138,16 @@ def getSequence(self, **kwargs):

if kwargs.get('allres', False):
get = AAMAP.get
seq = ''.join([get(res.getResname(), 'X') for res in self])
if kwargs.get('threeLetter', False):
seq = ' '.join([res.getResname() for res in self])
else:
seq = ''.join([get(res.getResname(), 'X') for res in self])
elif self._seq:
seq = self._seq
else:
calpha = self.calpha
if calpha:
seq = getSequence(calpha.getResnames())
seq = getSequence(calpha.getResnames(), **kwargs)
else:
seq = ''
self._seq = seq
Expand Down
45 changes: 36 additions & 9 deletions prody/proteins/cifheader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

from prody import LOGGER
from prody.atomic import flags, AAMAP
from prody.utilities import openFile, alignBioPairwise, GAP_PENALTY, GAP_EXT_PENALTY
from prody.atomic.atomic import invAAMAP
from prody.utilities import openFile, alignBioPairwise, GAP_EXT_PENALTY

from .localpdb import fetchPDB
from .header import (Chemical, Polymer, DBRef, _PDB_DBREF,
Expand Down Expand Up @@ -57,7 +58,7 @@ def _natomsFromFormulaPart(part):
return 1
return int("".join(digits))

def parseCIFHeader(pdb, *keys):
def parseCIFHeader(pdb, *keys, **kwargs):
"""Returns header data dictionary for *pdb*. This function is equivalent to
``parsePDB(pdb, header=True, model=0, meta=False)``, likewise *pdb* may be
an identifier or a filename.
Expand Down Expand Up @@ -119,12 +120,12 @@ def parseCIFHeader(pdb, *keys):
raise IOError('{0} is not a valid filename or a valid PDB '
'identifier.'.format(pdb))
pdb = openFile(pdb, 'rt')
header = getCIFHeaderDict(pdb, *keys)
header = getCIFHeaderDict(pdb, *keys, **kwargs)
pdb.close()
return header


def getCIFHeaderDict(stream, *keys):
def getCIFHeaderDict(stream, *keys, **kwargs):
"""Returns header data in a dictionary. *stream* may be a list of PDB lines
or a stream."""

Expand All @@ -139,11 +140,17 @@ def getCIFHeaderDict(stream, *keys):
keys = list(keys)
for k, key in enumerate(keys):
if key in _PDB_HEADER_MAP:
value = _PDB_HEADER_MAP[key](lines)
if key == 'polymers':
value = _PDB_HEADER_MAP[key](lines, **kwargs)
else:
value = _PDB_HEADER_MAP[key](lines)
keys[k] = value
else:
try:
value = _PDB_HEADER_MAP['others'](lines, key)
if key == 'polymers':
value = _PDB_HEADER_MAP[key](lines, **kwargs)
else:
value = _PDB_HEADER_MAP[key](lines)
keys[k] = value
except:
raise KeyError('{0} is not a valid header data identifier'
Expand Down Expand Up @@ -758,7 +765,7 @@ def _getReference(lines):
return ref


def _getPolymers(lines):
def _getPolymers(lines, **kwargs):
"""Returns list of polymers (macromolecules)."""

pdbid = _PDB_HEADER_MAP['identifier'](lines)
Expand All @@ -777,8 +784,28 @@ def _getPolymers(lines):
entities[entity].append(ch)
poly = polymers.get(ch, Polymer(ch))
polymers[ch] = poly
poly.sequence += ''.join(item[
'_entity_poly.pdbx_seq_one_letter_code_can'].replace(';', '').split())

threeLetter = kwargs.get('threeLetter', False)
if threeLetter:
poly.sequence += ''.join(item[
'_entity_poly.pdbx_seq_one_letter_code'].replace(';', '').split())
else:
poly.sequence += ''.join(item[
'_entity_poly.pdbx_seq_one_letter_code_can'].replace(';', '').split())

if threeLetter:
for poly in polymers.values():
seq = poly.sequence
resnames = []
for item in seq.split('('):
if item.find(')') != -1:
resnames.append(item[:item.find(')')])
letters = list(item[item.find(')')+1:])
else:
letters = list(item)
resnames.extend([invAAMAP[letter] for letter in letters])

poly.sequence = ' '.join(resnames)

# DBREF block 1
items2 = parseSTARSection(lines, '_struct_ref', report=False)
Expand Down
22 changes: 16 additions & 6 deletions prody/proteins/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def cleanString(string, nows=False):
return ' '.join(string.strip().split())


def parsePDBHeader(pdb, *keys):
def parsePDBHeader(pdb, *keys, **kwargs):
"""Returns header data dictionary for *pdb*. This function is equivalent to
``parsePDB(pdb, header=True, model=0, meta=False)``, likewise *pdb* may be
an identifier or a filename.
Expand Down Expand Up @@ -297,12 +297,12 @@ def parsePDBHeader(pdb, *keys):
raise IOError('{0} is not a valid filename or a valid PDB '
'identifier.'.format(pdb))
pdb = openFile(pdb, 'rt')
header, _ = getHeaderDict(pdb, *keys)
header, _ = getHeaderDict(pdb, *keys, **kwargs)
pdb.close()
return header


def getHeaderDict(stream, *keys):
def getHeaderDict(stream, *keys, **kwargs):
"""Returns header data in a dictionary. *stream* may be a list of PDB lines
or a stream."""

Expand All @@ -325,7 +325,10 @@ def getHeaderDict(stream, *keys):
keys = list(keys)
for k, key in enumerate(keys):
if key in _PDB_HEADER_MAP:
value = _PDB_HEADER_MAP[key](lines)
if key == 'polymers':
value = _PDB_HEADER_MAP[key](lines, **kwargs)
else:
value = _PDB_HEADER_MAP[key](lines)
keys[k] = value
else:
raise KeyError('{0} is not a valid header data identifier'
Expand Down Expand Up @@ -555,7 +558,7 @@ def _getReference(lines):
return ref


def _getPolymers(lines):
def _getPolymers(lines, **kwargs):
"""Returns list of polymers (macromolecules)."""

pdbid = lines['pdbid']
Expand All @@ -564,7 +567,14 @@ def _getPolymers(lines):
ch = line[11]
poly = polymers.get(ch, Polymer(ch))
polymers[ch] = poly
poly.sequence += ''.join(getSequence(line[19:].split()))

threeLetter = kwargs.get('threeLetter', False)
if threeLetter:
if poly.sequence != '':
poly.sequence += ' '
poly.sequence += getSequence(line[19:].split(), **kwargs)
else:
poly.sequence += ''.join(getSequence(line[19:].split(), **kwargs))

for i, line in lines['DBREF ']:
i += 1
Expand Down
Loading