Source code for swamp.mr.mrrun

import os
import swamp
from swamp.mr.mr import Mr
from swamp.mr.targetdata import TargetData
from swamp.mr.searchmodel import SearchModel
from swamp.wrappers import Phaser, wRefmac, Shelxe


[docs]class MrRun(Mr): """Class to attempt molecular replacement on a given target This class contains methods to attempt run molecular replacement on a given target structure. The pipeline uses phaser for search model placement, refmac5 for refinement, shelxe for density modification and model building. If scattering data has been recorded into the target's mtz file. :param str id: unique identifier for the instance :param str workdir: working directory where the MR task will be executed :param int threads: number of threads to be used in the pipeline (only affects phaser) (default 1) :param str target_fa: target's fasta filename :param str target_mtz: target's mtz filename :param str phased_mtz: target's mtz filename containing phases (default: None) :param str phaser_sgalternative: parameter to be passed :py:func:`phaser.InputMR_AUTO.SGAL_SELE` (default 'NONE') :param float phaser_packcutoff: parameter to be passed to :py:func:`phaser.InputMR_AUTO.setPACK_CUTO` (default None) :param float phaser_peaks_rotcutoff: parameter to be passed to :py:func:`phaser.InputMR_AUTO.setPEAK_ROTA_CUTO` \ (default None) :param bool phaser_early_kill: if True pipeline will stop execution if phaser scores are low (default False) :param int phaser_timeout: parameter to be passed to :py:func:`phaser.InputMR_AUTO.KILL_TIME` (default 360) :param bool extend_solution: if True then solutions will be completed with ideal helices (default False) :param bool save_disk_space: if True metadata will be removed to save disk space (default False) :param `~swamp.logger.swamplogger.SwampLogger` logger: logging interface for the MR pipeline (default None) :param bool silent: if set to True the logger will not print messages :param bool quiet_start: if True the logger will not display the header section and the inital parameters :ivar bool error: True if errors have occurred at some point on the pipeline :ivar str is_extended: 'YES' if the instance corresponds with an ideal helices extension run, otherwise 'NO' :ivar `~swamp.wrappers.wphaser.Phaser` phaser: phaser wrapper used in the pipeline :ivar `~swamp.wrappers.wrefmac.Refmac` refmac: refmac wrapper used in the pipeline :ivar `~swamp.wrappers.shelxe` shelxe: shelxe wrapper used in the pipeline :ivar str search_id: the search model identifier for the `~swamp.mr.mrrun.MrRun` instance :ivar str run_id: the run identifier for the `~swamp.mr.mrrun.MrRun` instance :ivar list search_model_list: a list of the search models to be used in this \ :py:obj:`~swamp.mr.mrrun.MrRun` instance :ivar str solution: 'YES' if shelxe CC > 25, otherwise 'NO' :ivar idealhelix_run: instance of :py:obj:`~swamp.mr.mrrun.MrRun to extend the solution with ideal helices """ def __init__(self, id, workdir, target_fa, target_mtz, phased_mtz=None, threads=1, phaser_sgalternative="NONE", phaser_early_kill=True, silent=False, save_disk_space=False, logger=None, phaser_packcutoff=None, phaser_timeout=1800, extend_solution=False, quiet_start=False, phaser_peaks_rotcutoff=None): super(MrRun, self).__init__(id, target_fa, target_mtz, workdir, phased_mtz=phased_mtz, logger=logger, silent=silent) self.search_id = id.split('_')[1] self.run_id = id.split('_')[3] self.init_params = locals() if not quiet_start: self.logger.info(self.pipeline_header.format(' MR-RUN ')) self.logger.info(self._inform_args(**self.init_params)) self.searchmodel_list = [] self.threads = threads self.save_disk_space = save_disk_space self.extend_solution = extend_solution self.phaser_sgalternative = phaser_sgalternative self.phaser_early_kill = phaser_early_kill self.phaser_timeout = phaser_timeout self.is_extended = "NO" self.phaser = None self.refmac = None self.shelxe = None self.solution = None self.idealhelix_run = None self.phaser_packcutoff = phaser_packcutoff self.phaser_peaks_rotcutoff = phaser_peaks_rotcutoff self.target = TargetData(target_fa, target_mtz, phased_mtz_fname=phased_mtz, logger=self.logger) self.target.get_info() # ------------------ Some general properties ------------------ @property def cleanup_dir_list(self): """Property storing information about directories used in the pipeline that can be removed to save disk space""" return [self.phaser_info['workdir'], self.refmac_info['workdir'], self.shelxe_info['workdir'], self.searchmodel_dir, os.path.join(self.workdir, "ideal_helices")] @property def searchmodel_dir(self): """Directory where the search model preparation will take place""" return os.path.join(self.workdir, "searchmodels") @property def phaser_info(self): """Dictionary to use as **kwargs for :py:obj:`~swamp.wrappers.wphaser.Phaser`""" return {'early_kill': self.phaser_early_kill, 'workdir': os.path.join(self.workdir, "phaser"), 'timeout': self.phaser_timeout, 'logger': self.logger, 'threads': self.threads, 'phased_mtz': self.phased_mtz, 'mtzfile': self.target_mtz, 'mw': self.target.mw, 'packcutoff': self.phaser_packcutoff, 'nchains_asu': self.target.ncopies, 'sgalternative': self.phaser_sgalternative, 'peaks_rotcutoff': self.phaser_peaks_rotcutoff } @property def refmac_info(self): """Dictionary to use as **kwargs for :py:attr:`~swamp.wrappers.wrefmac.wRefmac`""" return {'workdir': os.path.join(self.workdir, "refmac"), 'pdbin': self.phaser.pdbout, 'mtzin': self.target_mtz, 'phased_mtz': self.phased_mtz, 'logger': self.logger } @property def shelxe_info(self): """Dictionary to use as **kwargs for :py:attr:`~swamp.wrappers.shelxe.Shelxe`""" return {'workdir': os.path.join(self.workdir, 'shelxe'), 'logger': self.logger, 'pdbin': self.refmac.pdbout, 'mtzin': self.target_mtz, 'solvent': self.target.solvent, 'nreflections': self.target.nreflections, 'use_f': self.target.use_f, 'resolution': self.target.resolution } @property def _list_idealhelices(self): """List of file names of the ideal helices available to extend the solution""" permited_sizes = ["10", "15", "20", "25"] permited_modification = ["nativebfact", "gradientbfact"] return [x for x in os.listdir(swamp.IDEALHELICES_DIR) if x.split("_")[1] in permited_sizes and x.split("_")[-2] in permited_modification and x.split("_")[-1] == "homogenous.pdb"] @property def idealhelices_workdir(self): """Directory where the ideal helices solution extension will take place""" return os.path.join(self.workdir, "ideal_helices") # ------------------ Some general methods ------------------
[docs] def add_searchmodel(self, **kwargs): """Add a search model to :py:attr:`~swamp.mr.mrrun.MrRun.phaser` :param str id: unique identifier for the search model to be added :param str ensemble_code: the ensemble's SWAMP library id to be used as search model :param float ermsd: the eRMSD to be used with phaser to place the search model (default 0.1) :param int nsearch: number of copies to search with phaser :param bool disable_check: passed to :py:obj:`phaser.InputMR_AUTO.setENSE_DISA_CHEC` (default True) :param str mod: indicate how to prepare the search model (default 'unmod') :param str model: indicate if the search model is an ensemble or a centroid (default 'ensemble') """ if self.searchmodel_list and id in [x.id for x in self.searchmodel_list]: self.logger.error('A searchmodel with the same id has been already added!') self.error = True return searchmodel = SearchModel(workdir=self.searchmodel_dir, **kwargs) if not searchmodel.error: self.searchmodel_list.append(searchmodel) else: self.logger.warning('Previous errors prevent adding the searchmodel!') return
[docs] def register_solution(self, **kwargs): """Register an existing solution information to be used with :py:attr:`~swamp.mr.mrrun.MrRun.phaser`""" self.solution = kwargs
[docs] def append_results(self): """Method to append the results obtained into :py:attr:`~swamp.mr.mr.Mr.results`""" self.results.append( [self.search_id, self.run_id, self.phaser.LLG, self.phaser.TFZ, self.phaser.local_CC, self.phaser.overall_CC, self.refmac.rfree, self.refmac.rfactor, self.refmac.local_CC, self.refmac.overall_CC, self.shelxe.cc, self.shelxe.acl, self.is_extended, self.shelxe.solution])
[docs] def run(self): """Run the MR pipeline using of phaser, refmac5 and shelxe. Extend the possible solution with ideal helices""" self._initiate_wrappers() # Sanity check if self.error: self.logger.warning("Previous errors prevent execution of the pipeline") return # Run phaser self.phaser.run() # If there is a problem, abort if self.phaser.error or self.phaser.abort_suggested: if self.phaser.error: self.logger.warning("Previous error prevents pipeline moving forward... Exiting now!") else: self.logger.warning("Phaser scores below threshold, early termination triggered...") self.append_results() return # Run refmac self.refmac.run() self.refmac.make_logfile() # If there is a problem, abort if self.refmac.error: self.logger.warning("Previous error prevents pipeline moving forward... Exiting now!") self.append_results() return # Run shelxe self.shelxe.run() self.shelxe.make_logfile() # Store the results self.append_results() # If there is no solution and the cc is promising, let's try to fit some individual helices if self.extend_solution and self.shelxe.solution == "NO": self.logger.info("Search model placement did not yield a solution, trying to fit ideal helices!\n") self.fit_helices() # If there was no solution and the user wants to save disk space, delete unnecessary stuff if self.shelxe.solution == "NO" and self.save_disk_space and self.idealhelix_run.shelxe.solution == 'NO': self.logger.info("Saving disk space, %s will be deleted!" % self.workdir) os.chdir(os.path.dirname(self.workdir)) self._cleanup_files() # Exit and show the results unless this was an ideal helix run if not self.is_extended: self.logger.info('MR run finished. Table of results:\n\n%s\n' % self.table_contents)
[docs] def fit_helices(self): """Method to extend the solution with ideal helices This method will create and set running another :py:obj:`swamp.mr.mrrun.MrRun` instance that will take the placed search model as an existing solution and try to place ideal helices to extend it. """ # Set up input parameters based on original MR run parameters with some tweaks size = 20 n_helices = int(self.target.seq_length / size) n_helices -= 2 * len(self.searchmodel_list) input_params = {'id': self.id, 'workdir': os.path.join(self.idealhelices_workdir), 'target_fa': self.target_fa, 'target_mtz': self.target_mtz, 'phased_mtz': self.phased_mtz, 'phaser_sgalternative': self.phaser_sgalternative, 'phaser_packcutoff': 40.0, 'phaser_timeout': 1800, 'silent': False, 'phaser_early_kill': True, 'logger': self.logger, 'extend_solution': False, 'save_disk_space': False, 'quiet_start': True, 'threads': self.threads } # Run MR pipeline using the ideal helix as search model and manage results self.idealhelix_run = MrRun(**input_params) self.idealhelix_run.add_searchmodel(id='idealhelix', ensemble_code='idealhelix', nsearch=5) self.idealhelix_run.is_extended = 'YES' self.idealhelix_run.register_solution(pdbfile=self.refmac.pdbout, ermsd=0.1) self.idealhelix_run.run() self.results += self.idealhelix_run.results
# ------------------ Some hidden methods ------------------ def _initiate_wrappers(self): """Method to instantiate the wrappers to be used in the pipeline This method will instantiate all the :py:obj:`swamp.wrapper.wrapper` instances with the arguments necessary \ for the pipeline execution: :py:attr:`~swamp.mr.mrrun.MrRun.phaser`, \ :py:attr:`~swamp.mr.mrrun.MrRun.refmac`, :py:attr:`~swamp.mr.mrrun.MrRun.shelxe` """ if not self.searchmodel_list: self.error = True self.logger.error('Cannot proceed with MR run without at least one search model!') return self.phaser = Phaser(**self.phaser_info) for searchmodel in self.searchmodel_list: self.phaser.add_searchmodel(**searchmodel.phaser_info) if self.solution is not None: self.phaser.register_solution(**self.solution) self.refmac = wRefmac(**self.refmac_info) self.shelxe = Shelxe(**self.shelxe_info)