Source code for swamp.wrappers.gesamt

import os
import gemmi
import itertools
import threading
from pyjob import cexec
from swamp.wrappers.wrapper import Wrapper
from swamp.parsers.gesamtparser import GesamtParser, GesamtErrorCodes
from swamp.utils import ThreadResults, invert_hiearchy, get_tempfile


[docs]class Gesamt(Wrapper): """Wrapper around gesamt This class can be used to perform structural alignments using gesamt and parse the obtained results. :param str workdir: working directory :py:obj:`~swamp.wrappers.gesamt.Gesamt` instance :type workdir: str, None :param str mode: specify the type of task to be done with gesamt :param pdbin: input pdb file name. If an iterable is given, assumes multiple structure alignment :type pdbin: str, tuple, list :param gesamt_archive: location of the gesamt archive to be scanned (default None) :type gesamt_archive: str, None :param pdbout: output pdb file name (default None) :type pdbout: str, None :param int nthreads: number of threads to be used by gesamt (default 1) :param hits_out: file name of the .hits output file (default None) :type hits_out: str, None :param float min1: argument to be passed to gesamt as -min1 (default 0.1) :param float min2: argument to be passed to gesamt as -min2 (default 0.1) :param pdb_archive: location of the pdb archive to be used in gesamt (default None) :type pdb_archive: str, None :param `swamp.logger.swamplogger.SwampLogger` logger: logging interface for the wrapper :ivar bool error: if True an error has occurred along the process :ivar float qscore: qscore as reported by gesamt :ivar float rmsd: the obtained rmsd as reported by gesamt :ivar float seq_id: sequence identity between the input structures :ivar int n_align: number of aligned residues """ def __init__(self, workdir, mode, pdbin=None, gesamt_archive=None, pdbout=None, nthreads=1, hits_out=None, min1=0.1, min2=0.1, pdb_archive=None, logger=None): super(Gesamt, self).__init__(workdir=workdir, logger=logger) self.summary_results = None self.qscore = None self.rmsd = None self.seq_id = None self.n_align = None self.mode = mode self.pdbin = pdbin self.gesamt_archive = gesamt_archive self._pdbout = pdbout self.nthreads = nthreads self.hits_out = hits_out self.pdb_archive = pdb_archive self.min1 = min1 self.min2 = min2 if self.pdbin is None and self.mode != 'make-archive': self.logger.error('Need to provide at least one PDB file for gesamt!') self.error = True elif self.mode not in self.allowed_modes: self.logger.error( 'Selected mode was not recognised! Allowed modes are: {}'.format(' '.join(self.allowed_modes))) self.error = True # ------------------ Properties ------------------ @property def pdbout(self): """Setter for :py:attr:`~swamp.wrapper.gesamt.Gesamt.pdbout`""" return self._pdbout @pdbout.setter def pdbout(self, value): self._pdbout = value @property def allowed_modes(self): """A list with the allowed values for :py:attr:`~swmap.wrappers.gesamt.Gesamt.mode`""" return ['make-archive', 'alignment', 'search-archive'] @property def wrapper_name(self): """The name of this `~swamp.wrapper.wrapper.Wrapper` child class (gesamt)""" return "gesamt" @property def summary_results(self): """A summary of the figures of merit obtained through the structural alignment""" return self._summary_results @summary_results.setter def summary_results(self, value): self._summary_results = value @property def keywords(self): """No keywords are used in gesamt through stdin""" return None @property def cmd(self): """Command to be executed on :py:func:`~swamp.wrappers.wrapper.Wrapper.run`""" # Create an archive if self.mode == "make-archive": if self.gesamt_archive is None or self.pdb_archive is None: self.logger.error('Impossible to create archive, need to provide location!') self.error = True return None return [self.source, '--make-archive', self.gesamt_archive, '-pdb', self.pdb_archive] # Align structures elif self.mode == "alignment": cmd = [self.source] if len(self.pdbin) < 2: self.error = True self.logger.error('Need to provide at least two structures for alignment!') return None for fname in self.pdbin: cmd.append(fname) if self.pdbout is not None: cmd.append('-o') cmd.append(self.pdbout) return cmd # Scan an archive else: cmd = [self.source, self.pdbin, '-archive', self.gesamt_archive, '-o', self.hits_out] self._filthy_files.append(self.hits_out) if self.nthreads is not None: cmd.append('-nthreads=%s' % self.nthreads) if self.min1 is not None: cmd.append('-min1=%s' % self.min1) if self.min2 is not None: cmd.append('-min2=%s' % self.min2) return cmd # ------------------ General methods ------------------
[docs] def get_scores(self, logfile=None): """Use a :py:obj:`~swamp.parsers.gesamtparser.GesamtParser` instance to extract the scores and figures of \ merit out of the :py:attr:`~swamp.wrappers.wrapper.Wrapper.logcontents` and \ :py:attr:`~swamp.wrappers.gesamt.Gesamt.hits_out` :param logfile: Not in use """ parser = GesamtParser(fname=self.hits_out, mode=self.mode, stdout=self.logcontents, logger=self.logger) parser.parse() if parser.error: self.error = True if parser.error == GesamtErrorCodes.DISSIMILAR: self.logger.warning("%s are to dissimilar to be aligned!" % (" ".join(self.pdbin))) elif parser.error == GesamtErrorCodes.ERROR_2: self.logger.warning("Alignment of (%s) returned ALIGNMENT ERROR 2!" % (" ".join(self.pdbin))) elif parser.error == GesamtErrorCodes.READ_ERRORS: self.logger.warning('Read errors have occurred while reading: %s' % (" ".join(self.pdbin))) elif parser.error == GesamtErrorCodes.NO_STDOUT: self.logger.warning('No stdout found while trying to align: %s' % (" ".join(self.pdbin))) else: raise ValueError('Unkown gesamt error code!') elif self.mode == "search-archive": self.summary_results = parser.summary elif self.mode == "alignment": self.qscore, self.rmsd, self.seq_id, self.n_align = parser.summary
def _run(self): """Run :py:attr:`~swamp.wrappers.gesamt.Gesamt.cmd` and parse the results using \ :py:func:`~swamp.wrappers.gesamt.Gesamt.get_scores` :raises TypeError: command line command is malformed, please report if this occurs """ if self.mode == 'make-archive' and not os.path.isfile(self.gesamt_archive): os.makedirs(self.gesamt_archive) try: self.logger.debug(" ".join(self.cmd)) except TypeError: self.logger.error('CMD IS NOT ITERABLE, THIS SHOULD NOT HAPPEN PLEASE REPORT ISSUE') self.logger.debug(self.cmd) self.logger.debug(self.pdbin) self.logger.debug(self.mode) self.error = True return self.logcontents = cexec(self.cmd, permit_nonzero=True) self.get_scores() self._cleanup_files() # ------------------ Static methods ------------------
[docs] @staticmethod def get_optimum_alignment(pdbfiles, nthreads=1, logger=None): """Method to get the optimum alignment between a set given pdb files. This method considers all the structure arrangements: it screens for all the possible sets of reference structures and inverted structures possible). This can be computationally demanding so multi-threading is possible. :param pdbfiles: the set of pdb files to be aligned :type pdbfiles: tuple, list :param int nthreads: number of threads to be used when screening all possible arrangements (default 1) :param logger: logging interface to be used (default None) :type logger: None, :py:obj:`~swamp.logger.swamplogger.SwampLogger` :returns: the :py:obj:`~swamp.wrappers.gesamt.Gesamt` instance instance for the optimal alignment and a \ :py:obj:`gemmi.Structure` hierarchy with the aligned structuresas an ensemble (tuple) """ def _align(idx, combination, logger): """Create a :py:obj:`~swamp.wrappers.gesamt.Gesamt` instance and align the specified combination of pdb \ files. It uses a :py:obj:`threading.semaphore` for multiple thread coordination. :param int idx: index of the combination to be aligned (used as identifier) :param tuple combination: the combination of files to be aligned """ semaphore.acquire() gesamt = Gesamt(workdir=None, logger=logger, pdbin=[fname for fname in combination], mode="alignment") gesamt.run() if gesamt.qscore: results.register((idx, gesamt.qscore)) else: results.register((idx, 0.0)) semaphore.release() if len(pdbfiles) == 1: return None, gemmi.read_structure(pdbfiles[0]) # Initiate values input_list = [] tmp_pdbout = get_tempfile() tmp_files = [tmp_pdbout] child_threads = [] semaphore = threading.Semaphore(value=nthreads) results = ThreadResults() # Write the inverted pdb files for pdbfile in pdbfiles: hierarchy = gemmi.read_structure(pdbfile) inverted = invert_hiearchy(hierarchy) tmp_inverted_file = get_tempfile() inverted.write_minimal_pdb(tmp_inverted_file) tmp_files.append(tmp_inverted_file) input_list.append((pdbfile, tmp_inverted_file)) # Determine the alignment with the highest qscore all_combinations = list(itertools.product(*input_list)) for idx, combination in enumerate(all_combinations): t = threading.Thread(target=_align, args=(idx, combination, logger,)) child_threads.append(t.getName()) t.start() mainthread = threading.current_thread() for t in threading.enumerate(): if t is not mainthread and t.getName() in child_threads: t.join() ranked_alignemnts = sorted(results.value, key=lambda x: x[1], reverse=True) optimal_alignment = Gesamt(workdir=None, logger=logger, pdbin=all_combinations[ranked_alignemnts[0][0]], mode="alignment", pdbout=tmp_pdbout) optimal_alignment.run() if not optimal_alignment.error: if not os.path.isfile(tmp_pdbout): logger.debug('Not found! %s' % tmp_pdbout) logger.debug(all_combinations[ranked_alignemnts[0][0]]) logger.debug(os.listdir(os.environ['CCP4_SCR'])) optimal_ensemble = gemmi.read_structure(tmp_pdbout) else: optimal_ensemble = None # Remove tmp files and return optimal alignment for fname in tmp_files: if os.path.isfile(fname): os.remove(fname) return optimal_alignment, optimal_ensemble