Source code for swamp.utils.targetsplit
import os
import gemmi
import itertools
import conkit.io
from swamp.parsers import TopconsParser
from swamp.logger import SwampLogger
from swamp.utils import extract_hierarchy_seqnumber, extract_interhelical_cmap
[docs]class TargetSplit(object):
"""Class to split a given target protein into pairs of TM helices and store their interhelical contact information
:param str workdir: working directory for :py:obj:`~swamp.utils.targetsplit.TargetSplit` instance
:param str conpred: contact prediction file name for the target
:param str sspred: target's secondary structure prediction file name (must be topcons format!)
:param str conformat: format of the contact prediction file (default: 'psicov')
:param str pdb_benchmark: target's pdb file to be used for benchmarking purposes (default: None)
:param `~swamp.logger.swamplogger.SwampLogger` logger: logging interface for the instance (default None)
:ivar list subtargets: a list of subtarget contact maps that the target has been splitted into
:ivar bool error: if True an error has occurred along the process
:ivar `~swamp.parsers.topconsparser.TopconsParser` sspred: a topcons parser
:ivar `conkit.core.ContactMap` conpred: a contactmap
:example:
>>> from swamp.utils.targetsplit import TargetSplit
>>> splitter = TargetSplit('<workdir>', '<conpred>', '<sspred>')
>>> splitter.split()
"""
def __init__(self, workdir, conpred, sspred, conformat="psicov", pdb_benchmark=None, logger=None):
self.workdir = workdir
self.conpred = conkit.io.read(conpred, conformat).top_map
self._make_workdir()
self.subtargets = None
self.pdb_benchmark = pdb_benchmark
self.error = False
if logger is None:
self.logger = SwampLogger(__name__)
self.logger.init(logfile=None, use_console=True, console_level='info')
else:
self.logger = logger
self.sspred = TopconsParser(sspred, logger=self.logger)
self.sspred.parse()
if self.sspred.error:
self.logger.warning('Previous errors detected while parsing TM topology prediction!')
self.error = True
self._subtargets_pdb = None
# ------------------ Some general properties ------------------
@property
def _possible_helical_pairs(self):
"""List of all possible TM helical pair combinations as indicated in \
:py:attr:`~swamp.utils.targetsplit.TargetSplit.sspred`"""
return list(itertools.combinations(self.sspred.tmhelices.id, 2))
@property
def ranked_subtargets(self):
"""A list of the :py:attr:`~swamp.utils.targetsplit.TargetSplit.subtargets` sorted according to the number of \
interhelical contacts"""
if self.subtargets is None:
return None
else:
ranked_scores = [cmap.ncontacts for cmap in self.subtargets]
ranked_scores.sort()
ranked_scores.reverse()
ranked_cmaps = [-1 for x in ranked_scores]
# Loop through the targets
for cmap in self.subtargets:
ranked_cmaps[ranked_scores.index(cmap.ncontacts)] = cmap
ranked_scores[ranked_scores.index(cmap.ncontacts)] = -1
# Eliminate targets with score 0
ranked_cmaps = [cmap for cmap in ranked_cmaps if cmap.ncontacts > 0]
# Store the ranked targets
return ranked_cmaps
@property
def subtargets_pdb(self):
"""List containing the :py:obj:`gemmi.Structure` hierarchies of the \
:py:attr:`~swamp.utils.targetsplit.TargetSplit.subtargets`. Not None only if \
:py:attr:`~swamp.utils.targetsplit.TargetSplit.pdb_benchmark` is not None"""
if self.pdb_benchmark is None or self.subtargets is None:
return None
elif self._subtargets_pdb is None:
self.extract_pdb_tmhelices(self.pdb_benchmark)
return self._subtargets_pdb
# ------------------ Some general methods ------------------
def _make_workdir(self):
"""Make the :py:attr:`swamp.utils.targetsplit.TargetSplit.workdir`"""
if not os.path.isdir(self.workdir):
os.makedirs(self.workdir)
def _helix_range(self, helix_id):
"""Extract the range of residue numbers that a given tm helix spans.
:param helix_id: the id of the :py:attr:`swamp.parsers.topconsparser.TopconsParser.tmhelices` of interest
:type helix_id: str, int
:returns: a list with the range of residues that the tm helix of interest spans (list)
"""
if self.sspred is not None and self.sspred.tmhelices is not None:
return range(int(self.sspred.tmhelices[self.sspred.tmhelices.id == helix_id].start),
int(self.sspred.tmhelices[self.sspred.tmhelices.id == helix_id].stop) + 1)
else:
return None
[docs] def get_helical_pair(self, helical_pair):
"""Method to extract the residues spanning a given helical pair
:param tuple helical_pair: pair of :py:attr:`swamp.parsers.topconsparser.TopconsParser.tmhelices` to process
:returns: a tuple where each element corresponds with the residue sequence numbers of the regions where the tm \
helices of the pair span (tuple)
"""
region_A = [x for x in self._helix_range(helical_pair[0])]
region_B = [x for x in self._helix_range(helical_pair[1])]
return region_A, region_B
[docs] def split(self):
"""Split the target into its contacting helical pairs. It will probe all possible combinations
of :py:attr:`swamp.utils.targetsplit.TargetSplit.sspred.tmhelices`"""
if self.sspred.error:
self.logger.warning('Previous errors prevented splitting the target into helical pairs!')
self.error = True
return
self.subtargets = []
for helical_pair in self._possible_helical_pairs:
interhelical_cmap = self.get_interhelical_contacts(helical_pair)
self.subtargets.append(interhelical_cmap)
self.logger.info("Splitted the target into %s helical pairs" % len(self.subtargets))