Source code for vermouth.gmx.itp_read

# Copyright 2018 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Read GROMACS .itp files.

"""

import collections
from vermouth.molecule import (Block, Interaction)
from vermouth.parser_utils import (SectionLineParser, _tokenize)


[docs]
class ITPDirector(SectionLineParser):
    """
    class for reading itp files.
    """
    COMMENT_CHAR = ';'

    atom_idxs = {'bonds': [0, 1],
                 'position_restraints': [0],
                 'angles': [0, 1, 2],
                 'constraints': [0, 1],
                 'dihedrals': [0, 1, 2, 3],
                 'pairs': [0, 1],
                 'pairs_nb': [0, 1],
                 'exclusions': [slice(None, None)],
                 'virtual_sites1': [0],
                 'virtual_sites2': [0, 1, 2],
                 'virtual_sites3': [0, 1, 2, 3],
                 'virtual_sites4': [slice(0, 5)],
                 'virtual_sitesn': [0, slice(2, None)],
                 'settles': [0],
                 'distance_restraints':  [0, 1],
                 'dihedral_restraints':  [slice(0, 4)],
                 'orientation_restraints': [0, 1],
                 'angle_restraints': [slice(0, 4)],
                 'angle_restraints_z': [0, 1],
                 'cmap': [slice(0, 5)]}

    def __init__(self, force_field):
        super().__init__()
        self.force_field = force_field
        self.current_block = None
        self.current_meta = None
        self.blocks = collections.OrderedDict()
        self.header_actions = {
            ('moleculetype', ): self._new_block
        }
        # a list of nodes of current-block
        self.current_atom_names = []


[docs]
    def dispatch(self, line):
        """
        Looks at `line` to see what kind of line it is, and returns either
        :meth:`parse_header` if `line` is a section header or
        :meth:`vermouth.parser_utils.SectionLineParser.parse_section` otherwise.
        Calls :meth:`vermouth.parser_utils.SectionLineParser.is_section_header` to see
        whether `line` is a section header or not.

        Parameters
        ----------
        line: str

        Returns
        -------
        collections.abc.Callable
            The method that should be used to parse `line`.
        """

        if self.is_section_header(line):
            return self.parse_header
        elif self.is_pragma(line):
            return self.parse_pragma
        else:
            return self.parse_section



[docs]
    @staticmethod
    def is_pragma(line):
        """
        Parameters
        ----------
        line: str
            A line of text.

        Returns
        -------
        bool
            ``True`` iff `line` is a def statement.
        """
        return line.startswith('#')



[docs]
    def parse_pragma(self, line, lineno=0):
        """
        Parses the beginning and end of define sections
        with line number `lineno`. Sets attr current_meta
        when applicable. Does check if ifdefs overlap.

        Parameters
        ----------
        line: str
        lineno: str

        Returns
        -------
        object
            The result of calling :meth:`finalize_section`, which is called
            if a section ends.

        Raises
        ------
        IOError
            If the def sections are missformatted
        """
        if line == '#endif':
            if self.current_meta is not None:
                self.current_meta = None
            elif self.current_meta is None:
                raise IOError("Your #ifdef section is ordered incorrectly."
                              "At line {} I read #endif but I haven not read"
                              "a ifdef before.".format(lineno))

        elif line.startswith("#else"):
            if self.current_meta is None:
               raise IOError("Your #ifdef section is ordered incorrectly."
                             "At line {} I read #endif but I haven not read"
                             "a ifdef before.".format(lineno))

            inverse = {"ifdef": "ifndef", "ifndef": "ifdef"}
            tag = self.current_meta["tag"]
            condition = inverse[self.current_meta["condition"]]
            self.current_meta = {'tag': tag, 'condition': condition.replace("#", "")}

        elif line.startswith("#ifdef") or line.startswith("#ifndef"):
            if self.current_meta is None:
                condition, tag = line.split()
                self.current_meta = {'tag': tag, 'condition': condition.replace("#", "")}
            elif self.current_meta is not None:
                raise IOError("Your #ifdef/#ifndef section is ordered incorrectly."
                              "At line {} I read {} but there is still"
                              "an open #ifdef/#ifndef section from"
                              "before.".format(lineno, line.split()[0]))
        elif line.startswith("#define"):
            pass
        # Guard against unkown pragmas like #if or #include
        else:
            raise IOError("Don't know how to parse pragma {} at"
                          "line {}.".format(line, lineno))



[docs]
    def parse_header(self, line, lineno=0):
        """
        Parses a section header with line number `lineno`. Sets
        :attr:`vermouth.parser_utils.SectionLineParser.section`
        when applicable. Does not check whether `line` is a valid section
        header.

        Parameters
        ----------
        line: str
        lineno: str

        Returns
        -------
        object
            The result of calling :meth:`finalize_section`, which is called
            if a section ends.

        Raises
        ------
        KeyError
            If the section header is unknown.
        """
        prev_section = self.section

        ended = []
        section = self.section + [line.strip('[ ]').casefold()]
        if tuple(section[-1:]) in self.METH_DICT:
            self.section = section[-1:]
        else:
            while tuple(section) not in self.METH_DICT and len(section) > 1:
                ended.append(section.pop(-2))  # [a, b, c, d] -> [a, b, d]
            self.section = section

        result = None

        if len(prev_section) != 0:
            result = self.finalize_section(prev_section, ended)

        action = self.header_actions.get(tuple(self.section))
        if action:
            action()

        return result



[docs]
    def finalize_section(self, previous_section, ended_section):
        """
        Called once a section is finished. It appends the current_links list
        to the links and update the block dictionary with current_block. Thereby it
        finishes reading a given section.

        Parameters
        ---------
        previous_section: list[str]
            The last parsed section.
        ended_section: list[str]
            The sections that have been ended.
        """
        if "atoms" in ended_section:
            self.current_atom_names = list(self.current_block.nodes)

        if self.current_block is not None:
            self.force_field.blocks[self.current_block.name] = self.current_block



[docs]
    def finalize(self, lineno=0):
        """
        Called at the end of the file and checks that all pragmas are closed
        before calling the parent method.
        """
        if self.current_meta is not None:
            raise IOError("Your #ifdef/#ifndef section is ordered incorrectly."
                          "There is no #endif for the last pragma.")

        super().finalize()


    def _new_block(self):
        self.current_block = Block(force_field=self.force_field)

    @SectionLineParser.section_parser('moleculetype')
    def _block(self, line, lineno=0):
        """
        Parses the line directly following the '[moleculetype]'
        directive and stores the block name and exclusions.
         """
        name, nrexcl = line.split()
        self.current_block.name = name
        self.current_block.nrexcl = int(nrexcl)

    @SectionLineParser.section_parser('moleculetype', 'atoms')
    def _block_atoms(self, line, lineno=0):
        """
        Parses the lines of the [atoms] directive.
        """
        tokens = collections.deque(_tokenize(line))
        self._parse_block_atom(tokens, self.current_block)


    @SectionLineParser.section_parser('moleculetype', 'cmap')
    def _skip(self, line, lineno=0):
        pass
    @SectionLineParser.section_parser('moleculetype', 'bonds')
    @SectionLineParser.section_parser('moleculetype', 'angles')
    @SectionLineParser.section_parser('moleculetype', 'dihedrals')
    @SectionLineParser.section_parser('moleculetype', 'impropers')
    @SectionLineParser.section_parser('moleculetype', 'constraints')
    @SectionLineParser.section_parser('moleculetype', 'pairs')
    @SectionLineParser.section_parser('moleculetype', 'exclusions')
    @SectionLineParser.section_parser('moleculetype', 'virtual_sites1')
    @SectionLineParser.section_parser('moleculetype', 'virtual_sites2')
    @SectionLineParser.section_parser('moleculetype', 'virtual_sites3')
    @SectionLineParser.section_parser('moleculetype', 'virtual_sites4')
    @SectionLineParser.section_parser('moleculetype', 'virtual_sitesn')
    @SectionLineParser.section_parser('moleculetype', 'position_restraints')
    @SectionLineParser.section_parser('moleculetype', 'pairs_nb')
    @SectionLineParser.section_parser('moleculetype', 'settles')
    @SectionLineParser.section_parser('moleculetype', 'distance_restraints')
    @SectionLineParser.section_parser('moleculetype', 'dihedral_restraints')
    @SectionLineParser.section_parser('moleculetype', 'orientation_restraints')
    @SectionLineParser.section_parser('moleculetype', 'angle_restraints')
    @SectionLineParser.section_parser('moleculetype', 'angle_restraints_z')
    @SectionLineParser.section_parser('moleculetype', 'cmap')
    def _interactions(self, line, lineno=0):
        """
        Parses all interaction lines that are not directives (i.e. within []).
        Note that each interaction is enumerated explicitly to guard against typos
        and also interactions for which the format is unknown.
        """
        context = self.current_block
        interaction_name = self.section[-1]
        tokens = collections.deque(_tokenize(line))

        atom_idxs = self.atom_idxs.get(interaction_name)

        self._base_parser(
            tokens,
            context,
            section=interaction_name,
            atom_idxs=atom_idxs,
            )

    def _split_atoms_and_parameters(self, tokens, atom_idxs):
        """
        Returns atoms from line based on the indices defined in `atom_idxs`.
        It also interprets slices etc. stored as strings.

        Parameters:
        ------------
        tokens: collections.deque[str]
            Deque of token to inspect. The deque **can be modified** in place.
        atom_idxs: list of ints or strings that are valid python slices

        Returns:
        -----------
        list
        """

        atoms = []
        remove = []
        # first we extract the atoms from the indices given using
        # ints or slices
        tokens = list(tokens)
        for idx in atom_idxs:
            if isinstance(idx, int):
                atoms.append([tokens[idx], {}])
                remove.append(idx)
            elif isinstance(idx, slice):
                atoms += [[atom, {}] for atom in tokens[idx]]
                idx_range = range(0, len(tokens))
                remove += idx_range[idx]
            else:
                raise IOError

        # everything that is left are parameters, which we
        # get by simply deleting the atoms from tokens

        for index in sorted(remove, reverse=True):
            del tokens[index]

        return atoms, tokens

    def _treat_block_interaction_atoms(self, atoms, context, section):
        """
        Takes the atom indices associated with an interaction line
        and converts it to zero based indices. It also performas some
        format checks. It checks that:

              - the indices are not negative or zero
              - the indices refere to an existing atom
              - atoms have no prefixes

        Parameters
        -----------
        atom_idxs: list of ints or strings that are valid python slices
        context: :class:`vermouth.molecule.Block`
            The current block we parse
        section: str
            The current section header
        """
        all_references = []
        for atom in atoms:
            reference = atom[0]
            if reference.isdigit():
                if int(reference) < 1:
                    msg = 'In section {} is a negative atom reference, which is not allowed.'
                    raise IOError(msg.format(section.name))

               # The indices in the file are 1-based
                reference = int(reference) - 1
                try:
                    reference = self.current_atom_names[reference]
                except IndexError:
                    msg = ('There are {} atoms defined in the block "{}". '
                           'Interaction in section "{}" cannot refer to '
                           'atom index {}.')
                    raise IOError(msg.format(len(context), context.name,
                                             section, reference + 1))
                atom[0] = reference
            else:
                if reference not in context:
                    msg = ('There is no atom "{}" defined in the block "{}". '
                           'Section "{}" cannot refer to it.')
                    raise IOError(msg.format(reference, context.name, section))
                if reference[0] in '+-<>':
                    msg = ('Atom names in blocks cannot be prefixed with + or -. '
                           'The name "{}", used in section "{}" of the block "{}" '
                           'is not valid in a block.')
                    raise IOError(msg.format(reference, section, context.name))
            all_references.append(reference)
        return all_references

    def _base_parser(self, tokens, context, section, atom_idxs):
        """
        Converts an interaction line into a vermouth interaction
        tuple. It updates the block interactions in place.

        Parameters
        ----------
        tokens: collections.deque[str]
            Deque of token to inspect. The deque **can be modified** in place.
        context: :class:`vermouth.molecule.Block`
            The current block we parse
        section: str
            The current section header
        atom_idxs: list of ints or strings that are valid python slices
        """
        # split atoms and parameters

        atoms, parameters = self._split_atoms_and_parameters(tokens, atom_idxs)

        # perform check on the atom ids
        treated_atoms = self._treat_block_interaction_atoms(atoms, context, section)

        if self.current_meta:
            meta = {self.current_meta['condition']: self.current_meta['tag']}
        else:
            meta = {} #dict(collections.ChainMap(meta, apply_to_all_interactions))

        interaction = Interaction(
            atoms=treated_atoms,
            parameters=parameters,
            meta=meta,)

        context.interactions[section] = context.interactions.get(section, []) + [interaction]

    def _parse_block_atom(self, tokens, context):
        """
        Converts the lines of the atom directive to graph nodes and
        sets the values (i.e. atomtype) as attributes.

        Parameters
        ----------
        tokens: collections.deque[str]
            Deque of token to inspect. The deque **can be modified** in place.
        context: :class:`vermouth.molecule.Block`
            The current block we parse
        """
        # deque does not support slicing
        first_six = (tokens.popleft() for _ in range(6))
        index, atype, resid, resname, name, charge_group = first_six
        # since the index becomes the node name and all graphs start with 0
        # index it makes more sense to also directly start at 0

        if int(index) < 1:
            msg = 'One of your atoms has a negative atom reference, which is not allowed.'
            raise IOError(msg)

        index = int(index) - 1

        if index in context:
            msg = ('There is already an atom with index "{}" in the block "{}". '
                   'Atom indices must be unique within a block.')
            raise IOError(msg.format(name, context.name))

        atom = {
            # for bookkeeping purposes let's keep the actual index i.e. +1 here
            'index'   : index + 1,
            'atomname': name,
            'atype': atype,
            'resname': resname,
            'resid': int(resid),
            'charge_group': int(charge_group),
        }

       # charge and mass are optional, but charge has to be defined for mass to be
        if tokens:
            atom['charge'] = float(tokens.popleft())
        if tokens:
            atom['mass'] = float(tokens.popleft())

        attributes = {}

        context.add_node(index, **dict(collections.ChainMap(attributes, atom)))



[docs]
def read_itp(lines, force_field):
    """
    Parses `lines` of itp format and adds the
    molecule as a block to `force_field`.

    Parameters
    ----------
    lines: list
        list of lines of an itp file
    force_field: :class:`vermouth.forcefield.ForceField`
    """
    director = ITPDirector(force_field)
    return list(director.parse(iter(lines)))
Source code for vermouth.gmx.itp_read

VerMoUTH

Navigation

Related Topics