Source code for swamp.utils
"""This is SWAMP: Solving structures With Alpha Membrane Pairs
This module implements useful classes and methods used across all modules in SWAMP
"""
__author__ = "Filomeno Sanchez Rodriguez"
__credits__ = "Daniel Rigden, & Ronan Keegan"
__email__ = "filomeno.sanchez-rodriguez@liv.ac.uk"
import sys
import os
import gzip
import shutil
import tempfile
import logging
from swamp import version
__version__ = version.__version__
if 'DISABLE_DEPENDENCY_CHECKS' not in os.environ:
if "CCP4" not in os.environ:
raise RuntimeError("Cannot find CCP4 root directory")
import gemmi
import conkit.io
from conkit.core import Contact, ContactMap, Sequence
from Bio.PDB.parse_pdb_header import _parse_pdb_header_list
[docs]def SwampLibrary(*args, **kwargs):
""":py:obj:`~swamp.utils.swamplibrary.SwampLibrary` instance"""
from swamp.utils.swamplibrary import SwampLibrary
return SwampLibrary(*args, **kwargs)
[docs]def ThreadResults(*args, **kwargs):
""":py:obj:`~swamp.utils.threadresults.ThreadResults` instance"""
from swamp.utils.threadresults import ThreadResults
return ThreadResults(*args, **kwargs)
[docs]def TargetSplit(*args, **kwargs):
""":py:obj:`~swamp.utils.targetsplit.TargetSplit` instance"""
from swamp.utils.targetsplit import TargetSplit
return TargetSplit(*args, **kwargs)
[docs]def compress(fname, out=None):
"""Compress a text file into .gz
:param str fname: the file name to be compressed
:param str out: specify an output file name, otherwise default is fname.gz
:returns: compressed file name (str)
"""
if out is None:
out = '%s.gz' % fname
with open(fname, 'rb') as f_in, gzip.open(out, 'wb') as f_out:
data = f_in.read()
if sys.version_info[0] < 3:
bindata = data
else:
bindata = bytearray(data)
f_out.write(bindata)
return out
[docs]def decompress(fname, out=None):
"""Decompress a .gz file into text file
:param str fname: the file name to be decompressed
:param str out: specify an output file name, otherwise default is fname without .gz
:returns: the decompressed file name (str)
"""
if out is None:
out = fname.replace('.gz', '')
with open(out, "wb") as f_out, gzip.open(fname, "rb") as f_in:
bindata = f_in.read()
f_out.write(bindata)
return out
[docs]def touch(fname, content='', mode='w'):
"""Create a file with the specified contents
:param str fname: file name to be created
:param str content: content to write into the file (default '')
:param str mode: mode to open the file handler (default: 'w')
"""
with open(fname, mode) as fhandle:
fhandle.write(content)
fhandle.close()
[docs]def get_tempfile():
"""Method to get a temporary file name
:returns: temporary file name (str)
"""
temp_name = next(tempfile._get_candidate_names())
return os.path.join(os.environ['CCP4_SCR'], '%s.pdb' % temp_name)
[docs]def remove(path):
if os.path.exists(path):
if os.path.isdir(path):
shutil.rmtree(path)
else:
os.remove(path)
[docs]def create_tempfile(content, mode="w"):
"""Create a temporary file with a given set of contents
:param str content: content to dump into the temporary file
:param str mode: mode to open the file handler (default: 'w')
:returns: the path to the temporary file name (str)
"""
fname = get_tempfile()
touch(fname, content, mode)
return fname
[docs]def merge_hierarchies(hiearchies, new_chain_id="A", new_model_id="1", renumber=False):
"""Method to merge two given hierarchies into one (same chain and model)
:param tuple hiearchies: a list with the pdb hierarchies to be merged
:param str new_chain_id: the new chain id for the result hierarchy
:param str new_model_id: the new model name for the result hierarchy
:param bool renumber: if True the residues of the resulting hierarchy will be renumbered starting at 1
:returns: a new :py:obj:`~gemmi.Structure` hierarchy corresponding to the merged input hierarchies
"""
if not isinstance(hiearchies, list) and not isinstance(hiearchies, tuple):
raise ValueError("Please provide hierarchies to be merged as lists!")
if len(hiearchies) < 2:
raise ValueError("Please provide at least two hierarchies to merge!")
new_model = gemmi.Model(new_model_id)
new_chain = gemmi.Chain(new_chain_id)
new_hierarchy = gemmi.Structure()
for hierarchy in hiearchies:
for res in hierarchy[0][0]:
new_chain.add_residue(res)
new_model.add_chain(new_chain)
new_hierarchy.add_model(new_model)
if renumber:
renumber_hierarchy(new_hierarchy)
return new_hierarchy
[docs]def renumber_hierarchy(hierarchy, start=1):
"""Method to renumber a given hierarchy to start in a given value. Renumbered inplace
:param :py:obj:`~gemmi.Structure` hierarchy: pdb hierarchy to be renumbered
:param int start: first residue to start renumbering of the hierarchy
"""
atom_idx = 1
for model in hierarchy:
for chain in model:
for idx, residue in enumerate(chain):
residue.seqid.num = idx + start
for atom in residue:
atom.serial = atom_idx
atom_idx += 1
[docs]def invert_hiearchy(hierarchy):
"""Method to return the inverted hierarchy (1-res_seq)
:param :py:obj:`~gemmi.Structure` hierarchy: pdb hierarchy to be inverted
:returns: the :py:obj:`~gemmi.Structure` hierarchy corresponding with the inverted sequence (1-res_seq)
"""
inverted_model = gemmi.Model("1")
inverted_chain = gemmi.Chain("A")
inverted_hierarchy = gemmi.Structure()
tmp_list = []
for residue in hierarchy[0][0]:
tmp_list.append(residue)
for idx, residue in enumerate(tmp_list[::-1]):
inverted_chain.add_residue(residue)
inverted_chain[-1].seqid.num = idx + 1
inverted_model.add_chain(inverted_chain)
inverted_hierarchy.add_model(inverted_model)
renumber_hierarchy(inverted_hierarchy)
return inverted_hierarchy
[docs]def get_missing_residues(header_list):
"""Get a dictionary with the missing residues described in the REMARK section of a pdb file
:param tuple header_list: a list with the lines of the header section of the pdb file
:returns: a dictionary with the missing residues present in each chain (chain ids are used as keys)
"""
head = _parse_pdb_header_list(header_list)
rslt = {}
for residue in head['missing_residues']:
if residue['chain'] in rslt.keys():
rslt[residue['chain']].append(residue['ssseq'])
else:
rslt[residue['chain']] = [residue['ssseq']]
return rslt
[docs]def merge_into_ensemble(hierarchies):
"""Method to merge a series of hierarchies into an ensemble where each of the original hierarchies is
represented as a model
:argument tuple hierarchies: the hierarchies that will merged to form an ensemble
:returns: a new :py:obj:`~gemmi.Structure` hierarchy containing the ensemble
"""
new_hierarchy = gemmi.Structure()
for hierarchy in hierarchies:
new_model = gemmi.Model(str(len(new_hierarchy) + 1))
new_model.add_chain(hierarchy[0][0])
new_hierarchy.add_model(new_model)
return new_hierarchy
[docs]def split_ensemble_into_models(hierarchy):
"""Method to split a ensemble into its constituent models
:argument :py:obj:`~gemmi.Structure` hierarchy: the input ensemble to be splited
:returns: a tuple containing :py:obj:`~gemmi.Structure`, each formed by a single model originating from the input \
ensemble
"""
result = []
for model in hierarchy:
new_hierarchy = gemmi.Structure()
new_hierarchy.add_model(model)
result.append(new_hierarchy)
return tuple(result)