Source code for swamp.parsers.mtzparser

import re
import gemmi
from enum import Enum
from swamp.parsers.parser import Parser


[docs]class MtzColumnLabels(Enum): """An enumerator that contains the regular expression used to detect the column labels of a given MTZ file""" free = re.compile(r"^.*?[Ff][Rr][Ee][Ee].*") i = re.compile(r"^[Ii]") sigi = re.compile(r"^[Ss][Ii][Gg][Ii]") f = re.compile(r"^[Ff][Pp]?(?![Cc])(?![Ww][Tw])") sigf = re.compile(r"^[Ss][Ii][Gg][Ff][Pp]?") dp = re.compile(r"^([Dd][Pp]|[Dd][Aa][Nn][Oo][Pp]?)") sigdp = re.compile(r"^[Ss][Ii][Gg]([Dd][Pp]|[Dd][Aa][Nn][Oo][Pp]?)") i_plus = re.compile(r"^[Ii].*(\(\+\)|[Pp][Ll][Uu][Ss])") sigi_plus = re.compile(r"^[Ss][Ii][Gg][Ii].*(\(\+\)|[Pp][Ll][Uu][Ss])") f_plus = re.compile(r"^[Ff][Pp]?.*(\(\+\)|[Pp][Ll][Uu][Ss])") sigf_plus = re.compile(r"^[Ss][Ii][Gg][Ff][Pp]?.*(\(\+\)|[Pp][Ll][Uu][Ss])") i_minus = re.compile(r"^[Ii].*(\(-\)|[Mm][Ii][Nn][Uu][Ss])") sigi_minus = re.compile(r"^[Ss][Ii][Gg][Ii].*(\(-\)|[Mm][Ii][Nn][Uu][Ss])") f_minus = re.compile(r"^[Ff][Pp]?.*(\(-\)|[Mm][Ii][Nn][Uu][Ss])") sigf_minus = re.compile(r"^[Ss][Ii][Gg][Ff][Pp]?.*(\(-\)|[Mm][Ii][Nn][Uu][Ss])")
[docs]class MTZColumnTypes(Enum): """An enumerator that contains the different types expected for each column of a given MTZ file""" free = 'I' i = 'J' sigi = 'Q' f = 'F' sigf = 'Q' dp = 'D' sigdp = 'Q' i_plus = 'K' sigi_plus = 'M' f_plus = 'G' sigf_plus = 'L' i_minus = 'K' sigi_minus = 'M' f_minus = 'G' sigf_minus = 'L'
[docs]class MtzParser(Parser): """Class to parse and store mtz label data. :param str stdout: the stdout to be parsed (default None) :param `~swamp.logger.swamplogger.SwampLogger` logger: logging interface for the parser (default None) :example: >>> from swamp.parsers import MtzParser >>> my_parser = MtzParser('<fname>') >>> my_parser.parse() """ def __init__(self, fname, logger=None): super(MtzParser, self).__init__(fname, logger=logger) self.reflection_file = None self.f = None self.sigf = None self.dp = None self.sigdp = None self.i = None self.sigi = None self.free = None self.f_plus = None self.sigf_plus = None self.i_plus = None self.sigi_plus = None self.f_minus = None self.sigf_minus = None self.i_minus = None self.sigi_minus = None self.resolution = None self.nreflections = None self.spacegroup_symbol = None self.read_reflections() @property def summary(self): """Tuple with all the parsed label names""" return (self.f, self.sigf, self.i, self.sigi, self.free, self.dp, self.sigdp, self.f_plus, self.sigf_plus, self.i_plus, self.sigi_plus, self.f_minus, self.sigf_minus, self.i_minus, self.sigi_minus)
[docs] def read_reflections(self): """Read the data in :py:attr:`~swamp.parsers.mtzparser.Mtzparser.reflection_file` file using \ :py:func:`gemmi.read_mtz_file`""" self.reflection_file = gemmi.read_mtz_file(self.fname) self.nreflections = self.reflection_file.nreflections self.spacegroup_symbol = self.reflection_file.spacegroup.hm self.resolution = self.reflection_file.resolution_high()
[docs] def parse(self): """Parse the input mtz file and retrieve the column names of the labels as described at :py:obj:`~swamp.parsers.mtzparser.MTZLabels`""" if self.error: self.logger.warning("Previous errors prevent parsing mtz file!") return for label in MtzColumnLabels: label_subset = [col.label for col in self.reflection_file.columns if col.type == MTZColumnTypes.__getattr__(label.name).value] matches = list(filter(label.value.match, label_subset)) if any(matches): self.__setattr__(label.name, matches[0].encode('utf-8')) if not any([label for label in self.summary if label is not None]): self.logger.error('Cannot find any column names at %s' % self.fname) self.error = True