Source code for swamp.mr.mrjob
import os
import dill
import swamp.mr.mrarray
from pyjob import Script
[docs]class MrJob(object):
"""Class to manage the creation and execution of a :py:obj:`~swamp.mr.mrrun.MrRun` in the context of a parent
container :py:obj:`~swamp.mr.mrarray.MrArray` instance.
This class implements methods to create a python script that can be executed as single independent job.
It also implements utilities and data structures to retrieve and store the results obtained with the resulting
instance of :py:obj:`~swamp.mr.mrrun.MrRun`.
:param str id: unique identifier of this :py:obj:`~swamp.mr.mrjob.MrJob` isntance
:param str workdir: working directory for :py:obj:`~swamp.mr.mrjob.MrJob` instance
:param str python_interpreter: python interpreter for the :py:obj:`pyjob.Script`
:ivar str id: Unique identifier of this :py:obj:`~swamp.mr.mrjob.MrJob` isntance
:ivar str phased_mtz: target's mtz filename containing phases (default None)
:ivar str target_mtz: target's mtz filename (default None)
:ivar str target_fa: target's fasta filename (default None)
:ivar list searchmodel_list: A list with the search models to be used in the \
:py:obj:`~swamp.mr.mrrun.MrRun` instance
"""
def __init__(self, id, workdir, python_interpreter=None, extend_solution=False):
self.id = id
self.workdir = workdir
self.target_mtz = None
self.target_fa = None
self.phased_mtz = None
self.parent_array = None
self.extend_solution = extend_solution
self.searchmodel_list = []
self.make_workdir()
if python_interpreter is None:
self.python_interpreter = os.path.join(os.environ['CCP4'], 'bin', 'ccp4-python')
else:
self.python_interpreter = python_interpreter
def __repr__(self):
return '{}(id={}, workdir="{}")'.format(self.__class__.__name__, self.id, self.workdir)
# ------------------ General properties ------------------
@property
def parent_array(self):
"""The parent :py:obj:`~swamp.mr.mrarray.MrArray` instance"""
return self._parent_array
@parent_array.setter
def parent_array(self, value):
"""Property setter for :py:attr:`~swamp.mr.mrjob.MrJob.parent_array`
:param value: MrArray to be set
:type value: :py:obj:`~swamp.mr.mrarray.MrArray`
:raises TypeError: value is not an instance of :py:obj:`~swamp.mr.mrarray.MrArray`
"""
if value is None:
pass
elif not isinstance(value, swamp.mr.mrarray.MrArray):
raise TypeError('Parent array must be a swamp.mr.mrarray.MrArray instance!')
else:
self._parent_array = value
self.target_mtz = self.parent_array.target_mtz
self.target_fa = self.parent_array.target_fa
self.phased_mtz = self.parent_array.phased_mtz
@property
def results(self):
"""A nested list with the results of the :py:obj:`~swamp.mr.mrrun.MrRun` instance created with the \
execution of :py:attr:`~swamp.mr.mrjob.MrJob.python_script`"""
pickle_fname = os.path.join(self.workdir, "results.pckl")
if os.path.isfile(pickle_fname):
with open(pickle_fname, 'rb') as pickle_fhandle:
mr_run = dill.load(pickle_fhandle)
pickle_fhandle.close()
return mr_run.results
else:
return None
@property
def python_script(self):
"""String with the python script to create and execute the :py:obj:`~swamp.mr.mrrun.MrRun` instance \
associated with this :py:obj:`~swamp.mr.mrjob.MrJob`"""
script = """cd {workdir}
{python_interpreter} << EOF
from swamp.mr import MrRun
mr_run = MrRun(id='{id}', workdir='{workdir}', target_fa='{target_fa}', target_mtz='{target_mtz}', \
extend_solution={extend_solution})\n""".format(**self.__dict__)
if self.phased_mtz is not None:
script += 'mr_run.phased_mtz = "%s"\n' % self.phased_mtz
for searchmodel in self.searchmodel_list:
args_list = []
for arg in searchmodel.keys():
args_list.append('%s="%s"' % (arg, searchmodel[arg]))
script += 'mr_run.add_searchmodel(%s)\n' % ', '.join(args_list)
script += """if not mr_run.error:
mr_run.run()
mr_run.create_result_table_outfile()
mr_run.store_pickle()
EOF
"""
return script
@property
def script(self):
"""A :py:obj:`pyjob.Script` instance that will be executed on this :py:obj:`~swamp.mr.mrjob.MrJob`"""
script = Script(directory=os.path.join(os.path.abspath(os.path.join(self.workdir, os.pardir))),
prefix=self.id.lower(), stem='', suffix='.sh')
script.append(self.python_script)
return script
# ------------------ Methods ------------------
[docs] def add_searchmodel(self, **kwargs):
"""Provide necessary information to add a given search model to the :py:obj:`~swamp.mr.mrrun.MrRun` \
instance.
:param kwargs: directly used into :py:func:`~swamp.mr.mrrun.MrRun.add_searchmodel`
"""
self.searchmodel_list.append(kwargs)
[docs] def make_workdir(self):
"""Create the :py:attr:`~swamp.mr.mrjob.MrJob.workdir`"""
if not os.path.isdir(self.workdir):
os.makedirs(self.workdir)