Source code for swamp.search.searchjob

import os
import abc
import swamp
import joblib
from pyjob import Script
from swamp.wrappers import MapAlign
from swamp.logger import SwampLogger
from swamp.wrappers.aleigen import AlEigen
from swamp.utils.swamplibrary import SwampLibrary

ABC = abc.ABCMeta('ABC', (object,), {})


[docs]class SearchJob(ABC): """Class that implements methods to search a library of templates using contact map alignment tools The class is used to search a single query against the SWAMP library of templates. The CMO between the \ observed contacts for each member of the library and the predicted contacts of the query is computed. :param str id: the id given to the MrRun and identifying this :py:obj:`~swamp.search.searchjob.SearchJob` instance :param str workdir: working directory where the search will be executed and temporary files will be created :param str query: file name of the query contact file :param str template_library: the directory containing all the templates to be used in the search :param str con_format: format of the contact prediction of the query (default 'psicov') :param str library_format: format of the contact maps contained in the library of templates :param str query_pdb_benchmark: file name of the pdb structure to be used for benchmarking (default None) :param str pdb_library: directory with the pdb files of the templates used for benchmarking (default None) :param tuple template_subset: a subset of templates to be used instead of the full library (default None) :param `~swamp.logger.swamplogger.SwampLogger` logger: logging interface for the search (default None) :param str algorithm: Indicate the alignment algorithm to be used in the search (default: 'mapalign') :ivar list results: a nested list with the results obtained in the search againts the library """ def __init__(self, id, workdir, query, template_library, con_format="psicov", library_format="pdb", logger=None, query_pdb_benchmark=None, pdb_library=None, template_subset=None, algorithm='mapalign', python_interpreter=os.path.join(os.environ['CCP4'], 'bin', 'ccp4-python')): self.id = id self.query = query self.workdir = workdir self._make_workdir() self.template_library = template_library self.con_format = con_format self.library_format = library_format self.results = [] self.query_pdb_benchmark = query_pdb_benchmark self.pdb_library = pdb_library self.template_subset = template_subset self.algorithm = algorithm self.python_interpreter = python_interpreter if algorithm == 'aleigen': self._compute_eigenvector() if logger is None: self.logger = SwampLogger(__name__) self.logger.init(logfile=None, use_console=True, console_level='debug') self.logger.info(self.search_header) else: self.logger = logger if self.query_pdb_benchmark is not None: if self.pdb_library is None: self.logger.warning('PDB benchmark was requested but a PDB template library was not set!') self.query_pdb_benchmark = None elif not os.path.isdir(self.pdb_library): self.logger.warning('PDB benchmark was requested but %s PDB library was not found!' % self.pdb_library) self.query_pdb_benchmark = None # ------------------ Some general properties ------------------ @property def search_header(self): """Header displayed when initiating :py:obj:`~swamp.logger.swamplogger.SwampLogger`""" return """********************************************************************** ******************** SWAMP - SCANJOB ****************** ********************************************************************** """ @property def template_list(self): """The list of templates to be used in the search (considers \ :py:attr:`~swamp.search.searchjob.SearchJob.template_subset`)""" if self.template_subset is None: return [os.path.join(self.template_library, filename) for filename in os.listdir(self.template_library)] else: return [os.path.join(self.template_library, filename) for filename in os.listdir(self.template_library) if self._in_subset(filename)] @property def pickle_fname(self): """A pickle file where the :py:attr:`~swamp.search.searchjob.SearchJob.results` will be stored""" return os.path.join(self.workdir, 'search_%s_results.pckl' % self.id) @property def _python_script(self): """Python script to create and execute an identical :py:obj:`~swamp.search.searchjob.SearchJob` instance""" script = 'cd {}\n{} << EOF\nfrom swamp.search import SearchJob\n'.format(self.workdir, self.python_interpreter) attributes = ['id', 'workdir', 'query', 'template_library', 'con_format', 'library_format', 'pdb_library', 'query_pdb_benchmark', 'template_subset', 'algorithm'] arguments = [] for att in attributes: if self.__getattribute__(att) is not None: if not isinstance(self.__getattribute__(att), (list, tuple)): arguments.append('%s="%s"' % (att, self.__getattribute__(att))) else: arguments.append('%s=(%s)' % (att, ', '.join(['"%s"' % x for x in self.__getattribute__(att)]))) script += 'job=SearchJob(%s)' % ', '.join(arguments) script += '\njob.run()\njob.store_pickle()\nEOF\n' return script @property def script(self): """Instance of :py:obj:`pyjob.Script` that will be executed to complete the search""" script = Script(directory=self.workdir, prefix='searchjob_%s' % self.id, stem='', suffix='.sh') script.append(self._python_script) return script @property def _alignment_wrapper(self): """Pointer to the appropiate :py:obj:`~swamp.wrappers.wrapper.Wrapper` to be used in this search :raises ValueError if the algorithm is not recognised (valid algorithms: 'aleigen', 'mapalign') """ if self.algorithm == 'mapalign': return MapAlign elif self.algorithm == 'aleigen': return AlEigen else: raise ValueError("Unrecognised alignment tool! %s" % self.algorithm) @property def eigen_template(self): """A template for eigen vector file names""" return os.path.join(swamp.FRAG_EIGEN_DB, "{}.eigenvct") @property def tmp_eigen_query(self): """A temporary file name to contain the eigen vectors of :py:attr:`~swamp.search.searchjob.SearchJob.query`""" return os.path.join(self.workdir, 'tmp_query.eigenvct') @property def joblist(self): """A list of :py:obj:`~swamp.wrappers.wrapper.Wrapper` instances corresponding with the alignment between the \ query and each member of the :py:attr:`~swamp.search.searchjob.SearchJob.template_list`""" joblist = [] for template in self.template_list: job = self._alignment_wrapper(**self._get_alignment_info(template)) joblist.append(job) return joblist # ------------------ Protected methods ------------------ def _get_alignment_info(self, template): """Create a dictionary with the **kwargs necessary to initialise a \ :py:obj:`~swamp.wrappers.wrapper.Wrapper` instance :param str template: the template contact map file name :returns: a dictionary with the arguments pass to :py:obj:`~swamp.wrappers.wrapper.Wrapper` (dict) :raises ValueError: if the algorithm is not recognised (valid algorithms: 'aleigen', 'mapalign') """ result = { 'map_b': template, 'map_a': self.query, 'format_a': self.con_format, 'format_b': self.library_format, 'logger': self.logger } if self.algorithm == 'mapalign': result['workdir'] = self.workdir elif self.algorithm == 'aleigen': result['workdir'] = self._job_dir_template(template) result["eigenvectors"] = self.tmp_eigen_query, self.eigen_template.format(os.path.basename(template).split(".")[0]) else: raise ValueError("Unrecognised alignment tool! %s" % self.algorithm) if self.pdb_library is not None: result["pdb_b"] = os.path.join(self.pdb_library, self._pdbfile_template(template)) if self.query_pdb_benchmark is not None: result["pdb_a"] = self.query_pdb_benchmark return result def _make_workdir(self): """Method to crete the :py:attr:`~swamp.search.searchjob.SearchJob.workdir`""" if not os.path.isdir(self.workdir): os.makedirs(self.workdir) def _pdbfile_template(self, mapfile): """Get the pdb file name for a given map file name""" if self.pdb_library is None: return None else: return os.path.join(self.pdb_library, "%s.pdb" % os.path.basename(mapfile).split(".")[0]) def _job_dir_template(self, template): """Get the job directory for a given template name""" return os.path.join(self.workdir, os.path.basename(os.path.basename(template).split(".")[0])) def _in_subset(self, fname): """Check if a given fragment file name is in the :py:attr:`~swamp.search.searchjob.SearchJob.template_subset` :param str fname: file name of the fragment of interest :returns: True if the fragment is in :py:attr:`~swamp.search.searchjob.SearchJob.template_subset` (bool) """ frag_id = os.path.basename(fname).split('.')[0] if frag_id in self.template_subset or SwampLibrary._get_reciprocal_id(frag_id) in self.template_subset: return True else: return False def _compute_eigenvector(self): """Create eigen vectors for :py:attr:`~swamp.search.searchjob.SearchJob.query`""" AlEigen.create_eigen_vectors(self.query, map_format=self.con_format, vector_output=self.tmp_eigen_query) # ------------------ Some general methods ------------------
[docs] def store_pickle(self): """Store :py:attr:`~swamp.search.searchjob.SearchJob.results` into \ :py:attr:`~swamp.search.searchjob.SearchJob.pickle_fname`""" joblib.dump(self.results, self.pickle_fname)
[docs] def run(self): """Run each independent job contained at :py:attr:`~swamp.search.searchjob.SearchJob.joblist`""" self.logger.info("Searching template library") for job in self.joblist: self.logger.debug("Alignment of query %s with template %s" % (job.map_a, job.map_b)) job.run() self.results.append(job.summary_results) self.logger.debug(job.summary_results) self.logger.debug("All alignments are done!") if os.path.isfile(self.tmp_eigen_query): os.remove(self.tmp_eigen_query)