Source code for swamp.mr.targetdata

import gemmi
from Bio import SeqIO
from swamp.parsers import MtzParser
from Bio.Alphabet import generic_protein
from Bio.SeqUtils import molecular_weight


[docs]class TargetData(object): """Class to store relevant information about the target structure to be solved through MR :param str fasta_fname: target's fasta filename :param str mtz_fname: target's mtz filename :param str phased_mtz_fname: target's mtz filename containing phases (default: None) :param `~swamp.logger.swamplogger.SwampLogger` logger: logging interface for the MR pipeline (default None) """ def __init__(self, fasta_fname, mtz_fname, phased_mtz_fname=None, logger=None): self.fasta_fname = fasta_fname self.mtz_fname = mtz_fname self.phased_mtz_fname = phased_mtz_fname self.mw = None self.use_f = False self.resolution = None self.nreflections = None self.spacegroup_symbol = None self.solvent = None self.seq_length = None self.nreflections = None self.spacegroup_symbol = None self.spacegroup_name = None self.ncopies = None self.f = None self.sigf = None self.i = None self.sigi = None self.dp = None self.sigdp = None self.free = None self.f_plus = None self.sigf_plus = None self.i_plus = None self.sigi_plus = None self.f_minus = None self.sigf_minus = None self.i_minus = None self.sigi_minus = None self.logger = logger
[docs] def get_info(self): """Get all the information required to perform MR on the given target and store it into corresponding attributes of this :py:obj:`~swamp.mr.targetdata.TargetData` instance""" self.mw, self.seq_length = self.read_fasta(self.fasta_fname) mtz_parser = MtzParser(self.mtz_fname, logger=self.logger) self.nreflections = mtz_parser.nreflections self.spacegroup_symbol = mtz_parser.spacegroup_symbol self.spacegroup_name = mtz_parser.spacegroup_symbol.replace(' ', '') self.resolution = mtz_parser.resolution mtz_parser.parse() if mtz_parser.i is None and mtz_parser.f is not None: self.use_f = True self.f, self.sigf, self.i, self.sigi, self.free, self.dp, self.sigdp, self.f_plus, self.sigf_plus, \ self.i_plus, self.sigi_plus, self.f_minus, self.sigf_minus, self.i_minus, self.sigi_minus = mtz_parser.summary self.ncopies, self.solvent = self.estimate_contents(mtz_parser.reflection_file.cell.volume_per_image(), self.mw)
[docs] @staticmethod def read_fasta(fname): """Extract information about the target's sequence from a fasta file :param str fname: the file name of the fasta file of interest :returns the combined molecular weight and length of the unique sequences in the fasta file (tuple) """ target_chains = [str(chain.seq) for chain in list(SeqIO.parse(fname, "fasta", alphabet=generic_protein))] target_chains = list(set(target_chains)) mw = 0.0 for seq in target_chains: seq = seq.replace("X", "A") mw += round(molecular_weight(seq, "protein"), 2) seq_length = sum([len(seq) for seq in target_chains]) return mw, seq_length
[docs] @staticmethod def estimate_contents(cell_volume, mw): """Estimate the number of copies and the solvent content of the crystal :param float cell_volume: the volume of the crystal's cell :param float mw: molecular weight of each copy of the structure :returns: the no. of copies in the asu and the solvent content (tuple) """ for ncopies in [1, 2, 3, 4, 5]: matthews = cell_volume / (mw * ncopies) protein_fraction = 1. / (6.02214e23 * 1e-24 * 1.35 * matthews) solvent = round((1 - protein_fraction), 1) if round(matthews, 3) <= 3.59: break if solvent <= 0.4 and ncopies != 1: ncopies -= 1 matthews = cell_volume / (mw * ncopies) protein_fraction = 1. / (6.02214e23 * 1e-24 * 1.35 * matthews) solvent = round((1 - protein_fraction), 1) return ncopies, solvent