Source code for swamp.utils

"""This is SWAMP: Solving structures With Alpha Membrane Pairs

This module implements useful classes and methods used across all modules in SWAMP
"""

__author__ = "Filomeno Sanchez Rodriguez"
__credits__ = "Daniel Rigden, & Ronan Keegan"
__email__ = "filomeno.sanchez-rodriguez@liv.ac.uk"

import sys
import os
import gzip
import shutil
import tempfile
import logging
from swamp import version

__version__ = version.__version__

if 'DISABLE_DEPENDENCY_CHECKS' not in os.environ:

    if "CCP4" not in os.environ:
        raise RuntimeError("Cannot find CCP4 root directory")

    import gemmi
    import conkit.io
    from conkit.core import Contact, ContactMap, Sequence
    from Bio.PDB.parse_pdb_header import _parse_pdb_header_list


[docs]def SwampLibrary(*args, **kwargs): """:py:obj:`~swamp.utils.swamplibrary.SwampLibrary` instance""" from swamp.utils.swamplibrary import SwampLibrary return SwampLibrary(*args, **kwargs)
[docs]def ThreadResults(*args, **kwargs): """:py:obj:`~swamp.utils.threadresults.ThreadResults` instance""" from swamp.utils.threadresults import ThreadResults return ThreadResults(*args, **kwargs)
[docs]def TargetSplit(*args, **kwargs): """:py:obj:`~swamp.utils.targetsplit.TargetSplit` instance""" from swamp.utils.targetsplit import TargetSplit return TargetSplit(*args, **kwargs)
[docs]def compress(fname, out=None): """Compress a text file into .gz :param str fname: the file name to be compressed :param str out: specify an output file name, otherwise default is fname.gz :returns: compressed file name (str) """ if out is None: out = '%s.gz' % fname with open(fname, 'rb') as f_in, gzip.open(out, 'wb') as f_out: data = f_in.read() if sys.version_info[0] < 3: bindata = data else: bindata = bytearray(data) f_out.write(bindata) return out
[docs]def decompress(fname, out=None): """Decompress a .gz file into text file :param str fname: the file name to be decompressed :param str out: specify an output file name, otherwise default is fname without .gz :returns: the decompressed file name (str) """ if out is None: out = fname.replace('.gz', '') with open(out, "wb") as f_out, gzip.open(fname, "rb") as f_in: bindata = f_in.read() f_out.write(bindata) return out
[docs]def touch(fname, content='', mode='w'): """Create a file with the specified contents :param str fname: file name to be created :param str content: content to write into the file (default '') :param str mode: mode to open the file handler (default: 'w') """ with open(fname, mode) as fhandle: fhandle.write(content) fhandle.close()
[docs]def get_tempfile(): """Method to get a temporary file name :returns: temporary file name (str) """ temp_name = next(tempfile._get_candidate_names()) return os.path.join(os.environ['CCP4_SCR'], '%s.pdb' % temp_name)
[docs]def remove(path): if os.path.exists(path): if os.path.isdir(path): shutil.rmtree(path) else: os.remove(path)
[docs]def create_tempfile(content, mode="w"): """Create a temporary file with a given set of contents :param str content: content to dump into the temporary file :param str mode: mode to open the file handler (default: 'w') :returns: the path to the temporary file name (str) """ fname = get_tempfile() touch(fname, content, mode) return fname
[docs]def invert_contactmap(cmap): """Method to invert a contact map :param :py:obj:`~conkit.core.ContactMap` cmap: the contact map of interest :returns: and inverted_cmap: the contact map corresponding with the inverted sequence (1-res_seq) \ (:py:obj:`~conkit.core.ContactMap`) """ inverted_cmap = ContactMap('inverted') highest_residue_number = max([max(contact.id) for contact in cmap]) for contact in cmap: new_contact = Contact(highest_residue_number + 1 - contact.res1_seq, highest_residue_number + 1 - contact.res2_seq, contact.raw_score) inverted_cmap.add(new_contact) inverted_cmap.sequence = cmap.sequence return inverted_cmap
[docs]def extract_interhelical_cmap(cmap, helices, residues, new_id, score_threshold=0.0): """Method to extract the interhelical contacts :param :py:obj:`~conkit.core.ContactMap` cmap: the contact map of interest :param tuple helices: a nested list with the listed residue numbers of the two helices of interest :param str new_id: the new identifies given to the resulting contact map :param float score_threshold: the raw score threshold at which contacts will be included (default 0.0) :returns: inverted_cmap: the contact map containing only the inter-helical contacts between the pair of helices \ (:py:obj:`~conkit.core.ContactMap`) """ result_cmap = ContactMap(new_id) dummy_sequence = Sequence('id', 'A' * len(residues)) for contact in cmap: if contact.raw_score >= score_threshold: helix_1 = [(contact.res1_seq in x) for x in helices] helix_2 = [(contact.res2_seq in x) for x in helices] if helix_1 != helix_2 and any(helix_1) and any(helix_2): # Create a new contact (IMPORTANT: Renumber the contact position within the new map) new_contact = Contact(residues.index(contact.res1_seq) + 1, residues.index(contact.res2_seq) + 1, contact.raw_score) result_cmap.add(new_contact) result_cmap.sequence = dummy_sequence return result_cmap
[docs]def extract_fragment_cmap(pdb_hierarchy, helices): """Method to extract the interhelical contact map of a given pdb file :param :py:obj:`~gemmi.Structure` pdb_hierarchy: the pdb hierarchy of the fragment :param tuple helices: a nested list with the listed residues numbers of the helices of interest :returns: the contact map with the interhelical contacts (:py:obj:`~conkit.core.ContactMap`) """ residues = [i for sub in helices for i in sub] temp_pdb = get_tempfile() pdb_hierarchy.write_pdb(temp_pdb) fragment_cmap = conkit.io.read(temp_pdb, "pdb").top_map os.remove(temp_pdb) if fragment_cmap is None: return None return extract_interhelical_cmap(fragment_cmap, helices, residues, "InterhelicalContacts")
[docs]def merge_hierarchies(hiearchies, new_chain_id="A", new_model_id="1", renumber=False): """Method to merge two given hierarchies into one (same chain and model) :param tuple hiearchies: a list with the pdb hierarchies to be merged :param str new_chain_id: the new chain id for the result hierarchy :param str new_model_id: the new model name for the result hierarchy :param bool renumber: if True the residues of the resulting hierarchy will be renumbered starting at 1 :returns: a new :py:obj:`~gemmi.Structure` hierarchy corresponding to the merged input hierarchies """ if not isinstance(hiearchies, list) and not isinstance(hiearchies, tuple): raise ValueError("Please provide hierarchies to be merged as lists!") if len(hiearchies) < 2: raise ValueError("Please provide at least two hierarchies to merge!") new_model = gemmi.Model(new_model_id) new_chain = gemmi.Chain(new_chain_id) new_hierarchy = gemmi.Structure() for hierarchy in hiearchies: for res in hierarchy[0][0]: new_chain.add_residue(res) new_model.add_chain(new_chain) new_hierarchy.add_model(new_model) if renumber: renumber_hierarchy(new_hierarchy) return new_hierarchy
[docs]def renumber_hierarchy(hierarchy, start=1): """Method to renumber a given hierarchy to start in a given value. Renumbered inplace :param :py:obj:`~gemmi.Structure` hierarchy: pdb hierarchy to be renumbered :param int start: first residue to start renumbering of the hierarchy """ atom_idx = 1 for model in hierarchy: for chain in model: for idx, residue in enumerate(chain): residue.seqid.num = idx + start for atom in residue: atom.serial = atom_idx atom_idx += 1
[docs]def extract_hierarchy(full_hierarchy, to_extract, chainID=None): """Method to extract a given set of residues from a pdbfile in form of a gemmi structure hierarchy :param :py:obj:`~gemmi.Structure` full_hierarchy: pdb hierarchy of interest :param tuple to_extract: list with the residue numbers to be extracted :param str chainID: the chain id where the residues to be extracted are located (default None) :returns: a new :py:obj:`~gemmi.Structure` hierarchy containing the extracted residues """ new_hierarchy = gemmi.Structure() new_chain = gemmi.Chain("A") new_model = gemmi.Model("1") # Check model number if len(full_hierarchy) > 1: logging.debug("pdb {0} has > 1r model - only first model will be kept".format(full_hierarchy.name)) # Check chain number if len(full_hierarchy[0]) > 1: if chainID is None: logging.debug( "pdb {0} has > 1 chain - only first chain will be kept".format(full_hierarchy.name)) old_chain = full_hierarchy[0][0] else: try: old_chain = full_hierarchy[0][chainID] except ValueError as e: raise ValueError("Chain %s not found in %s!" % (chainID, full_hierarchy.name)) else: old_chain = full_hierarchy[0][0] # Extract region for residue in old_chain: if residue.seqid.num in to_extract: new_chain.add_residue(residue) # Append the model to the new hierarchy and exit new_model.add_chain(new_chain) new_hierarchy.add_model(new_model) return new_hierarchy
[docs]def invert_hiearchy(hierarchy): """Method to return the inverted hierarchy (1-res_seq) :param :py:obj:`~gemmi.Structure` hierarchy: pdb hierarchy to be inverted :returns: the :py:obj:`~gemmi.Structure` hierarchy corresponding with the inverted sequence (1-res_seq) """ inverted_model = gemmi.Model("1") inverted_chain = gemmi.Chain("A") inverted_hierarchy = gemmi.Structure() tmp_list = [] for residue in hierarchy[0][0]: tmp_list.append(residue) for idx, residue in enumerate(tmp_list[::-1]): inverted_chain.add_residue(residue) inverted_chain[-1].seqid.num = idx + 1 inverted_model.add_chain(inverted_chain) inverted_hierarchy.add_model(inverted_model) renumber_hierarchy(inverted_hierarchy) return inverted_hierarchy
[docs]def get_missing_residues(header_list): """Get a dictionary with the missing residues described in the REMARK section of a pdb file :param tuple header_list: a list with the lines of the header section of the pdb file :returns: a dictionary with the missing residues present in each chain (chain ids are used as keys) """ head = _parse_pdb_header_list(header_list) rslt = {} for residue in head['missing_residues']: if residue['chain'] in rslt.keys(): rslt[residue['chain']].append(residue['ssseq']) else: rslt[residue['chain']] = [residue['ssseq']] return rslt
[docs]def extract_hierarchy_seqnumber(hierarchy, seq_numbers, chain_id='A'): """ Extract the hierarchy corresponding with a given set of residue sequence numbers. Considers missing residues. :argument :py:obj:`~gemmi.Structure` hierarchy: original hierarchy to trim :argument tuple seq_numbers: residue sequence number to extract :argument str chain_id: chain where the residues should be extracted from :returns: a new :py:obj:`~gemmi.Structure` with the residues of interest :example >>> import gemmi >>> from swamp.utils import extract_hierarchy_seqnumber >>> hierarchy = gemmi.read_structure('/mnt/sda1/MR_edge_cases/3txt_MR/3txt.pdb') >>> subtrgt_1 = extract_hierarchy_seqnumber(hierarchy, [x for x in range(104 ,125)] + [x for x in range(158, 179)]) >>> subtrgt_1.write_minimal_pdb('/home/filo/test.pdb') """ header = hierarchy.make_pdb_headers().split('\n') try: missing_res = get_missing_residues(header)[chain_id] except KeyError: missing_res = [] new_hierarchy = gemmi.Structure() new_model = gemmi.Model("1") new_chain = gemmi.Chain(chain_id) idx = 1 for residue in hierarchy[0][chain_id]: factor = sum([1 for x in missing_res if x < residue.seqid.num]) residue.seqid.num = idx + factor idx += 1 if residue.seqid.num in seq_numbers: new_chain.add_residue(residue) new_model.add_chain(new_chain) new_hierarchy.add_model(new_model) return new_hierarchy
[docs]def merge_into_ensemble(hierarchies): """Method to merge a series of hierarchies into an ensemble where each of the original hierarchies is represented as a model :argument tuple hierarchies: the hierarchies that will merged to form an ensemble :returns: a new :py:obj:`~gemmi.Structure` hierarchy containing the ensemble """ new_hierarchy = gemmi.Structure() for hierarchy in hierarchies: new_model = gemmi.Model(str(len(new_hierarchy) + 1)) new_model.add_chain(hierarchy[0][0]) new_hierarchy.add_model(new_model) return new_hierarchy
[docs]def split_ensemble_into_models(hierarchy): """Method to split a ensemble into its constituent models :argument :py:obj:`~gemmi.Structure` hierarchy: the input ensemble to be splited :returns: a tuple containing :py:obj:`~gemmi.Structure`, each formed by a single model originating from the input \ ensemble """ result = [] for model in hierarchy: new_hierarchy = gemmi.Structure() new_hierarchy.add_model(model) result.append(new_hierarchy) return tuple(result)