# Copyright 2018 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Read GROMACS .itp files.
"""
import collections
from vermouth.molecule import (Block, Interaction)
from vermouth.parser_utils import (SectionLineParser, _tokenize)
[docs]class ITPDirector(SectionLineParser):
"""
class for reading itp files.
"""
COMMENT_CHAR = ';'
atom_idxs = {'bonds': [0, 1],
'position_restraints': [0],
'angles': [0, 1, 2],
'constraints': [0, 1],
'dihedrals': [0, 1, 2, 3],
'pairs': [0, 1],
'pairs_nb': [0, 1],
'exclusions': [slice(None, None)],
'virtual_sites1': [0],
'virtual_sites2': [0, 1, 2],
'virtual_sites3': [0, 1, 2, 3],
'virtual_sites4': [slice(0, 5)],
'virtual_sitesn': [0, slice(2, None)],
'settles': [0],
'distance_restraints': [0, 1],
'dihedral_restraints': [slice(0, 4)],
'orientation_restraints': [0, 1],
'angle_restraints': [slice(0, 4)],
'angle_restraints_z': [0, 1]}
def __init__(self, force_field):
super().__init__()
self.force_field = force_field
self.current_block = None
self.current_meta = None
self.blocks = collections.OrderedDict()
self.header_actions = {
('moleculetype', ): self._new_block
}
# a list of nodes of current-block
self.current_atom_names = []
[docs] def dispatch(self, line):
"""
Looks at `line` to see what kind of line it is, and returns either
:meth:`parse_header` if `line` is a section header or
:meth:`vermouth.parser_utils.SectionLineParser.parse_section` otherwise.
Calls :meth:`vermouth.parser_utils.SectionLineParser.is_section_header` to see
whether `line` is a section header or not.
Parameters
----------
line: str
Returns
-------
collections.abc.Callable
The method that should be used to parse `line`.
"""
if self.is_section_header(line):
return self.parse_header
elif self.is_pragma(line):
return self.parse_pragma
else:
return self.parse_section
[docs] @staticmethod
def is_pragma(line):
"""
Parameters
----------
line: str
A line of text.
Returns
-------
bool
``True`` iff `line` is a def statement.
"""
return line.startswith('#')
[docs] def parse_pragma(self, line, lineno=0):
"""
Parses the beginning and end of define sections
with line number `lineno`. Sets attr current_meta
when applicable. Does check if ifdefs overlap.
Parameters
----------
line: str
lineno: str
Returns
-------
object
The result of calling :meth:`finalize_section`, which is called
if a section ends.
Raises
------
IOError
If the def sections are missformatted
"""
if line == '#endif':
if self.current_meta is not None:
self.current_meta = None
elif self.current_meta is None:
raise IOError("Your #ifdef section is orderd incorrectly."
"At line {} I read #endif but I haven not read"
"a ifdef before.".format(lineno))
elif line.startswith("#else"):
if self.current_meta is None:
raise IOError("Your #ifdef section is orderd incorrectly."
"At line {} I read #endif but I haven not read"
"a ifdef before.".format(lineno))
inverse = {"ifdef": "ifndef", "ifndef": "ifdef"}
tag = self.current_meta["tag"]
condition = inverse[self.current_meta["condition"]]
self.current_meta = {'tag': tag, 'condition': condition.replace("#", "")}
elif line.startswith("#ifdef") or line.startswith("#ifndef"):
if self.current_meta is None:
condition, tag = line.split()
self.current_meta = {'tag': tag, 'condition': condition.replace("#", "")}
elif self.current_meta is not None:
raise IOError("Your #ifdef/#ifndef section is orderd incorrectly."
"At line {} I read {} but there is still"
"an open #ifdef/#ifndef section from"
"before.".format(lineno, line.split()[0]))
# Guard against unkown pragmas like #if or #include
else:
raise IOError("Don't know how to parse pargma {} at"
"line {}.".format(line, lineno))
[docs] def finalize_section(self, previous_section, ended_section):
"""
Called once a section is finished. It appends the current_links list
to the links and update the block dictionary with current_block. Thereby it
finishes reading a given section.
Parameters
---------
previous_section: list[str]
The last parsed section.
ended_section: list[str]
The sections that have been ended.
"""
if "atoms" in ended_section:
self.current_atom_names = list(self.current_block.nodes)
if self.current_block is not None:
self.force_field.blocks[self.current_block.name] = self.current_block
[docs] def finalize(self, lineno=0):
"""
Called at the end of the file and checks that all pragmas are closed
before calling the parent method.
"""
if self.current_meta is not None:
raise IOError("Your #ifdef/#ifndef section is orderd incorrectly."
"There is no #endif for the last pragma.")
super().finalize()
def _new_block(self):
self.current_block = Block(force_field=self.force_field)
@SectionLineParser.section_parser('moleculetype')
def _block(self, line, lineno=0):
"""
Parses the line directly following the '[moleculetype]'
directive and stores the block name and exclusions.
"""
name, nrexcl = line.split()
self.current_block.name = name
self.current_block.nrexcl = int(nrexcl)
@SectionLineParser.section_parser('moleculetype', 'atoms')
def _block_atoms(self, line, lineno=0):
"""
Parses the lines of the [atoms] directive.
"""
tokens = collections.deque(_tokenize(line))
self._parse_block_atom(tokens, self.current_block)
@SectionLineParser.section_parser('moleculetype', 'bonds')
@SectionLineParser.section_parser('moleculetype', 'angles')
@SectionLineParser.section_parser('moleculetype', 'dihedrals')
@SectionLineParser.section_parser('moleculetype', 'impropers')
@SectionLineParser.section_parser('moleculetype', 'constraints')
@SectionLineParser.section_parser('moleculetype', 'pairs')
@SectionLineParser.section_parser('moleculetype', 'exclusions')
@SectionLineParser.section_parser('moleculetype', 'virtual_sites1')
@SectionLineParser.section_parser('moleculetype', 'virtual_sites2')
@SectionLineParser.section_parser('moleculetype', 'virtual_sites3')
@SectionLineParser.section_parser('moleculetype', 'virtual_sites4')
@SectionLineParser.section_parser('moleculetype', 'virtual_sitesn')
@SectionLineParser.section_parser('moleculetype', 'position_restraints')
@SectionLineParser.section_parser('moleculetype', 'pairs_nb')
@SectionLineParser.section_parser('moleculetype', 'settles')
@SectionLineParser.section_parser('moleculetype', 'distance_restraints')
@SectionLineParser.section_parser('moleculetype', 'dihedral_restraints')
@SectionLineParser.section_parser('moleculetype', 'orientation_restraints')
@SectionLineParser.section_parser('moleculetype', 'angle_restraints')
@SectionLineParser.section_parser('moleculetype', 'angle_restraints_z')
def _interactions(self, line, lineno=0):
"""
Parses all interaction lines that are not directives (i.e. within []).
Note that each interaction is enumerated explicitly to guard against typos
and also interactions for which the format is unknown.
"""
context = self.current_block
interaction_name = self.section[-1]
tokens = collections.deque(_tokenize(line))
atom_idxs = self.atom_idxs.get(interaction_name)
self._base_parser(
tokens,
context,
section=interaction_name,
atom_idxs=atom_idxs,
)
def _split_atoms_and_parameters(self, tokens, atom_idxs):
"""
Returns atoms from line based on the indices defined in `atom_idxs`.
It also interprets slices etc. stored as strings.
Parameters:
------------
tokens: collections.deque[str]
Deque of token to inspect. The deque **can be modified** in place.
atom_idxs: list of ints or strings that are valid python slices
Returns:
-----------
list
"""
atoms = []
remove = []
# first we extract the atoms from the indices given using
# ints or slices
tokens = list(tokens)
for idx in atom_idxs:
if isinstance(idx, int):
atoms.append([tokens[idx], {}])
remove.append(idx)
elif isinstance(idx, slice):
atoms += [[atom, {}] for atom in tokens[idx]]
idx_range = range(0, len(tokens))
remove += idx_range[idx]
else:
raise IOError
# everything that is left are parameters, which we
# get by simply deleting the atoms from tokens
for index in sorted(remove, reverse=True):
del tokens[index]
return atoms, tokens
def _treat_block_interaction_atoms(self, atoms, context, section):
"""
Takes the atom indices associated with an interaction line
and converts it to zero based indices. It also performas some
format checks. It checks that:
- the indices are not negative or zero
- the indices refere to an existing atom
- atoms have no prefixes
Parameters
-----------
atom_idxs: list of ints or strings that are valid python slices
context: :class:`vermouth.molecule.Block`
The current block we parse
section: str
The current section header
"""
all_references = []
for atom in atoms:
reference = atom[0]
if reference.isdigit():
if int(reference) < 1:
msg = 'In section {} is a negative atom reference, which is not allowed.'
raise IOError(msg.format(section.name))
# The indices in the file are 1-based
reference = int(reference) - 1
try:
reference = self.current_atom_names[reference]
except IndexError:
msg = ('There are {} atoms defined in the block "{}". '
'Interaction in section "{}" cannot refer to '
'atom index {}.')
raise IOError(msg.format(len(context), context.name,
section, reference + 1))
atom[0] = reference
else:
if reference not in context:
msg = ('There is no atom "{}" defined in the block "{}". '
'Section "{}" cannot refer to it.')
raise IOError(msg.format(reference, context.name, section))
if reference[0] in '+-<>':
msg = ('Atom names in blocks cannot be prefixed with + or -. '
'The name "{}", used in section "{}" of the block "{}" '
'is not valid in a block.')
raise IOError(msg.format(reference, section, context.name))
all_references.append(reference)
return all_references
def _base_parser(self, tokens, context, section, atom_idxs):
"""
Converts an interaction line into a vermouth interaction
tuple. It updates the block interactions in place.
Parameters
----------
tokens: collections.deque[str]
Deque of token to inspect. The deque **can be modified** in place.
context: :class:`vermouth.molecule.Block`
The current block we parse
section: str
The current section header
atom_idxs: list of ints or strings that are valid python slices
"""
# split atoms and parameters
atoms, parameters = self._split_atoms_and_parameters(tokens, atom_idxs)
# perform check on the atom ids
treated_atoms = self._treat_block_interaction_atoms(atoms, context, section)
if self.current_meta:
meta = {self.current_meta['condition']: self.current_meta['tag']}
else:
meta = {} #dict(collections.ChainMap(meta, apply_to_all_interactions))
interaction = Interaction(
atoms=treated_atoms,
parameters=parameters,
meta=meta,)
context.interactions[section] = context.interactions.get(section, []) + [interaction]
def _parse_block_atom(self, tokens, context):
"""
Converts the lines of the atom directive to graph nodes and
sets the values (i.e. atomtype) as attributes.
Parameters
----------
tokens: collections.deque[str]
Deque of token to inspect. The deque **can be modified** in place.
context: :class:`vermouth.molecule.Block`
The current block we parse
"""
# deque does not support slicing
first_six = (tokens.popleft() for _ in range(6))
index, atype, resid, resname, name, charge_group = first_six
# since the index becomes the node name and all graphs start with 0
# index it makes more sense to also directly start at 0
if int(index) < 1:
msg = 'One of your atoms has a negative atom reference, which is not allowed.'
raise IOError(msg)
index = int(index) - 1
if index in context:
msg = ('There is already an atom with index "{}" in the block "{}". '
'Atom indices must be unique within a block.')
raise IOError(msg.format(name, context.name))
atom = {
# for bookkeeping purposes let's keep the actual index i.e. +1 here
'index' : index + 1,
'atomname': name,
'atype': atype,
'resname': resname,
'resid': int(resid),
'charge_group': int(charge_group),
}
# charge and mass are optional, but charge has to be defined for mass to be
if tokens:
atom['charge'] = float(tokens.popleft())
if tokens:
atom['mass'] = float(tokens.popleft())
attributes = {}
context.add_node(index, **dict(collections.ChainMap(attributes, atom)))
[docs]def read_itp(lines, force_field):
"""
Parses `lines` of itp format and adds the
molecule as a block to `force_field`.
Parameters
----------
lines: list
list of lines of an itp file
force_field: :class:`vermouth.forcefield.ForceField`
"""
director = ITPDirector(force_field)
return list(director.parse(iter(lines)))