#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2018 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Provides a processor that can perform a resolution transformation on a
molecule.
"""
from collections import defaultdict
from itertools import product, combinations
import networkx as nx
from ..molecule import Molecule, attributes_match
from .processor import Processor
from ..utils import are_all_equal, format_atom_string
from ..log_helpers import StyleAdapter, get_logger
LOGGER = StyleAdapter(get_logger(__name__))
[docs]
def build_graph_mapping_collection(from_ff, to_ff, mappings):
"""
Function that produces a collection of :class:`vermouth.map_parser.Mapping`
objects.
Hereby deprecated.
Parameters
----------
from_ff: vermouth.forcefield.ForceField
Origin force field.
to_ff: vermouth.forcefield.ForceField
Destination force field.
mappings: dict[str, dict[str, vermouth.map_parser.Mapping]]
All known mappings
Returns
-------
collections.abc.Iterable
A collection of mappings that map from `from_ff` to `to_ff`.
"""
return mappings[from_ff.name][to_ff.name].values()
[docs]
def edge_matcher(graph1, graph2, node11, node12, node21, node22):
"""
Checks whether the resids for node11 and node12 in graph1 are the same, and
whether that's also true for node21 and node22 in graph2.
Parameters
----------
graph1: networkx.Graph
graph2: networkx.Graph
node11: collections.abc.Hashable
A node key in `graph1`.
node12: collections.abc.Hashable
A node key in `graph1`.
node21: collections.abc.Hashable
A node key in `graph2`.
node22: collections.abc.Hashable
A node key in `graph2`.
Returns
-------
bool
"""
node11 = graph1.nodes[node11]
node12 = graph1.nodes[node12]
node21 = graph2.nodes[node21]
node22 = graph2.nodes[node22]
return (node11.get('resid') == node12.get('resid')) ==\
(node21.get('resid') == node22.get('resid'))
[docs]
def node_matcher(node1, node2):
"""
Checks whether nodes should be considered equal for isomorphism. Takes all
attributes in `node2` into account, except for the attributes "atype",
"charge", "charge_group", "resid", "replace", and "_old_atomname".
Parameters
----------
node1: dict
node2: dict
Returns
-------
bool
"""
return attributes_match(node1, node2,
ignore_keys=('atype', 'charge', 'charge_group', 'mass',
'resid', 'replace', '_old_atomname'))
def _old_atomname_match(node1, node2):
"""
Adds a _name attribute to copies of the nodes, and feeds it all to
:func:`node_matcher`
"""
name1 = node1.get('_old_atomname', node1['atomname'])
name2 = node2.get('_old_atomname', node2['atomname'])
node1 = node1.copy()
node2 = node2.copy()
node1['_name'] = name1
node2['_name'] = name2
if 'order' in node2 and 'order' not in node1:
node1['order'] = node2['order']
del node1['atomname']
del node2['atomname']
return node_matcher(node1, node2)
[docs]
def node_should_exist(modification, node_idx):
"""
Returns True if the node with index `node_idx` in `modification` should
already exist in the parent molecule.
Parameters
----------
modification: networkx.Graph
node_idx: collections.abc.Hashable
The key of a node in `modification`.
Returns
-------
bool
True iff the node `node_idx` in `modification` should already exist in
the parent molecule.
"""
return not modification.nodes[node_idx].get('PTM_atom', False)
[docs]
def ptm_resname_match(mol_node, map_node):
"""
As :func:`node_matcher`, except that empty resname and false PTM_atom
attributes from `node2` are removed.
"""
if 'resname' in map_node and not map_node['resname']:
map_node = map_node.copy()
del map_node['resname']
if 'PTM_atom' in map_node and not map_node['PTM_atom']:
map_node = map_node.copy()
del map_node['PTM_atom']
if 'modifications' in mol_node:
map_node = map_node.copy()
matching_mod = all(map_mod in mol_node.get('modifications', [])
for map_mod in map_node.pop('modifications', []))
else:
matching_mod = True
is_equal = _old_atomname_match(mol_node, map_node)
return is_equal and matching_mod
[docs]
def cover(to_cover, options):
"""
Implements a recursive backtracking algorithm to cover all elements of
`to_cover` with the elements from `options` that have the lowest index.
In this context "to cover" means that all items in an element of `options`
must be in `to_cover`. Elements in `to_cover` can only be covered *once*.
Parameters
----------
to_cover: collections.abc.MutableSet
The items that should be covered.
options: collections.abc.Sequence[collections.abc.MutableSet]
The elements that can be used to cover `to_cover`. All items in an
element of `options` must be present in `to_cover` to qualify.
Returns
-------
None or list
None if no covering can be found, or the list of items from `options`
with the lowest indices that exactly covers `to_cover`.
"""
if not to_cover:
return []
for idx, option in enumerate(options):
if all(item in to_cover for item in option):
left_to_cover = to_cover.copy()
for item in option:
# Only remove the leftmost item. PS. we know for sure all items
# in option are in left_to_cover at least once.
left_to_cover.remove(item)
found = cover(left_to_cover, options[idx:])
if found is not None:
return [option] + found
return None
[docs]
def get_mod_mappings(mappings):
"""
Returns a dict of all known modification mappings.
Parameters
----------
mappings: collections.abc.Iterable[vermouth.map_parser.Mapping]
All known mappings.
Returns
-------
dict[tuple[str], vermouth.map_parser.Mapping]
All mappings that describe a modification mapping.
"""
out = {}
for mapping in mappings:
if mapping.type == 'modification':
out[mapping.names] = mapping
return out
[docs]
def modification_matches(molecule, mappings):
"""
Returns a minimal combination of modification mappings and where they
should be applied that describes all modifications in `molecule`.
Parameters
----------
molecule: networkx.Graph
The molecule whose modifications should be treated. Modifications are
described by the 'modifications' node attribute.
mappings: collections.abc.Iterable[vermouth.map_parser.Mapping]
All known mappings.
Returns
-------
list[tuple[dict, vermouth.molecule.Link, dict]]
A list with the following items:
Dict describing the correspondence of node keys in `molecule` to
node keys in the modification.
The modification.
Dict with all reference atoms, mapping modification nodes to
nodes in `molecule`.
"""
modified_nodes = set() # This will contain whole residues.
for idx, node in molecule.nodes.items():
if node.get('modifications', []):
modified_nodes.add(idx)
ptm_subgraph = molecule.subgraph(modified_nodes)
grouped = nx.connected_components(ptm_subgraph)
found_ptm_groups = []
# For every modification group we would like a set with the names of the
# involved modifications, so we can use that to figure out which mod
# mappings should be used.
for group in grouped:
modifications = {
mod.name for mol_idx in group for mod in molecule.nodes[mol_idx].get('modifications', [])
}
found_ptm_groups.append(modifications)
needed_mod_mappings = set()
known_mod_mappings = get_mod_mappings(mappings)
for group in found_ptm_groups:
# known_mod_mappings is a dict[tuple[str], Mapping]. We want to know
# the minimal combination of those needed to cover all the PTMs found
# in group. The cheapest solution is covering the names of the PTMs in
# group with keys from known_mod_mappings. An improvement would be to
# do the graph covering again.
# TODO?
covered_by = cover(list(group),
sorted(known_mod_mappings, key=len, reverse=True))
if covered_by is None:
LOGGER.warning("Can't find modification mappings for the "
"modifications {}. The following modification "
"mappings are known: {}",
list(group), known_mod_mappings,
type='unmapped-atom')
continue
needed_mod_mappings.update(covered_by)
matches = []
# Sort on the tuple[str] type names of the mappings so that mappings that
# define most modifications at the same time get processed first
for mod_name in sorted(needed_mod_mappings, key=len, reverse=True):
mod_mapping = known_mod_mappings[mod_name]
for mol_to_mod, modification, references in mod_mapping.map(molecule, node_match=ptm_resname_match):
matches.append((mol_to_mod, modification, references))
if not set(mol_to_mod) <= modified_nodes:
# TODO: better message
LOGGER.warning('Overlapping modification mappings', type='inconsistent-data')
modified_nodes -= set(mol_to_mod)
return matches
[docs]
def apply_block_mapping(match, molecule, graph_out, mol_to_out, out_to_mol):
"""
Performs a mapping operation for a "block". `match` is a tuple of 3
elements that describes what nodes in `molecule` should correspond to
a :class:`vermouth.molecule.Block` that should be added to `graph_out`, and
any atoms that should be used a references.
Add the required :class:`vermouth.molecule.Block` to `graph_out`, and
updates `mol_to_out` and `out_to_mol` *in-place*.
Parameters
----------
match
molecule: networkx.Graph
The original molecule
graph_out: vermouth.molecule.Molecule
The newly created graph that describes `molecule` at a different
resolution.
mol_to_out: dict[collections.abc.Hashable, dict[collections.abc.Hashable, float]]
A dict mapping nodes in `molecule` to nodes in `graph_out` with the
associated weights.
out_to_mol: dict[collections.abc.Hashable, dict[collections.abc.Hashable, float]]
A dict mapping nodes in `graph_out` to nodes in `molecule` with the
associated weights.
Returns
-------
set
A set of all overlapping nodes that were already mapped before.
set
A set of none-to-one mappings. I.e. nodes that were created without
nodes mapping to them.
dict
A dict of reference atoms, mapping `graph_out` nodes to nodes in
`molecule`.
"""
mol_to_block, blocks_to, references = match
if graph_out.nrexcl is None:
graph_out.nrexcl = blocks_to.nrexcl
try:
# merge_molecule will return a dict mapping the node keys of the
# added block to the ones in graph_out
# FIXME: Issue #154 lives here.
block_to_out = graph_out.merge_molecule(blocks_to)
except ValueError:
# This probably means the nrexcl of the block is different from the
# others. This means the user messed up their data. Or there are
# different forcefields in the same forcefield folder...
LOGGER.exception('Residue(s) {} is not compatible with the others',
set(nx.get_node_attributes(blocks_to, 'resname').values()),
type='inconsistent-data')
raise
# overlap does not have to be a dict, since the values in block_to_out are
# guaranteed to be unique in graph_out. So we can look them up in
# mol_to_out
overlap = set(mol_to_out.keys()) & set(mol_to_block.keys())
for mol_idx in mol_to_block:
for block_idx, weight in mol_to_block[mol_idx].items():
out_idx = block_to_out[block_idx]
mol_to_out[mol_idx][out_idx] = weight
out_to_mol[out_idx][mol_idx] = weight
none_to_one_mappings = set()
mapped_block_idxs = {block_idx
for mol_idx in mol_to_block
for block_idx in mol_to_block[mol_idx]}
for spawned in set(blocks_to.nodes) - mapped_block_idxs:
# These nodes come from "nowhere", so, let's pretend they come from
# all nodes in the block. This helps with setting attributes such
# as 'chain'
# "None to one" mapping - this is fine. This happens with e.g.
# charge dummies.
spawned = block_to_out[spawned]
none_to_one_mappings.add(spawned)
for mol_idx in mol_to_block:
mol_to_out[mol_idx][spawned] = 0
out_to_mol[spawned][mol_idx] = 0
new_references = {block_to_out[mod_idx]: mol_idx for mod_idx, mol_idx in references.items()}
return overlap, none_to_one_mappings, new_references
[docs]
def apply_mod_mapping(match, molecule, graph_out, mol_to_out, out_to_mol):
"""
Performs the mapping operation for a modification.
Parameters
----------
match
molecule: networkx.Graph
The original molecule
graph_out: vermouth.molecule.Molecule
The newly created graph that describes `molecule` at a different
resolution.
mol_to_out: dict[collections.abc.Hashable, dict[collections.abc.Hashable, float]]
A dict mapping nodes in `molecule` to nodes in `graph_out` with the
associated weights.
out_to_mol: dict[collections.abc.Hashable, dict[collections.abc.Hashable, float]]
A dict mapping nodes in `graph_out` to nodes in `molecule` with the
associated weights.
Returns
-------
dict[str, dict[tuple, vermouth.molecule.Link]]
A dict of all modifications that have been applied by this modification
mapping operations. Maps interaction type to involved atoms to the
modification responsible.
dict
A dict of reference atoms, mapping `graph_out` nodes to nodes in
`molecule`.
"""
mol_to_mod, modification, references = match
LOGGER.info('Applying modification mapping {}', modification.name, type='general')
graph_out.citations.update(modification.citations)
mod_to_mol = defaultdict(dict)
for mol_idx, mod_idxs in mol_to_mod.items():
for mod_idx in mod_idxs:
mod_to_mol[mod_idx][mol_idx] = mol_to_mod[mol_idx][mod_idx]
mod_to_mol = dict(mod_to_mol)
mod_to_out = {}
# Some nodes of modification will already exist. The question is
# which, and which index they have in graph_out.
for mod_idx in modification:
if not node_should_exist(modification, mod_idx):
# Node does not exist yet.
if not graph_out.nodes:
out_idx = 0
else:
out_idx = max(graph_out) + 1
mod_to_out[mod_idx] = out_idx
graph_out.add_node(out_idx, **modification.nodes[mod_idx])
else:
# Node should already exist
# We need to find the out_index of this node. Since the
# node already exists, there is at least one mol_idx in
# mol_to_out that refers to the correct out_idx. What we do
# is try to find those mol indices by looking at
# mod_to_mol.
# Find the other mol nodes that map to this bead according
# to the mod mapping...
mol_idxs = mod_to_mol[mod_idx]
# ...and make the node with the correct attributes.
out_idxs = set()
for mol_idx in mol_idxs:
out_idxs.update(mol_to_out.get(mol_idx, {}))
for out_idx in out_idxs:
out_node = graph_out.nodes[out_idx]
if modification.nodes[mod_idx]['atomname'] == out_node['atomname']:
break
else: # No break, so no matching node found
raise ValueError("No node found in molecule with "
"atomname {}".format(modification.nodes[mod_idx]['atomname']))
# Undefined loop variable is guarded against by the else-raise above
mod_to_out[mod_idx] = out_idx # pylint: disable=undefined-loop-variable
graph_out.nodes[out_idx].update(modification.nodes[mod_idx].get('replace', {})) # pylint: disable=undefined-loop-variable
graph_out.nodes[out_idx]['modifications'] = graph_out.nodes[out_idx].get('modifications', [])
if modification not in graph_out.nodes[out_idx]['modifications']:
graph_out.nodes[out_idx]['modifications'].append(modification)
for mol_idx in mol_to_mod:
for mod_idx, weight in mol_to_mod[mol_idx].items():
out_idx = mod_to_out[mod_idx]
if mol_idx not in mol_to_out:
mol_to_out[mol_idx] = {}
mol_to_out[mol_idx][out_idx] = weight
if out_idx not in out_to_mol:
out_to_mol[out_idx] = {}
out_to_mol[out_idx][mol_idx] = weight
for mod_idx, mod_jdx in modification.edges:
out_idx = mod_to_out[mod_idx]
out_jdx = mod_to_out[mod_jdx]
if not graph_out.has_edge(out_idx, out_jdx):
graph_out.add_edge(out_idx, out_jdx)
new_references = {mod_to_out[mod_idx]: mol_idx for mod_idx, mol_idx in references.items()}
# Apply interactions
applied_interactions = defaultdict(lambda: defaultdict(list))
for interaction_type, interactions in modification.interactions.items():
for interaction in interactions:
atoms = [mod_to_out[mod_idx] for mod_idx in interaction.atoms]
assert len(atoms) == len(interaction.atoms)
interaction = interaction._replace(atoms=atoms)
applied_interactions[interaction_type][tuple(atoms)].append(modification)
graph_out.add_or_replace_interaction(interaction_type, **interaction._asdict())
# Some jank needed here, since modification node indices are integers
mod_atom_name_to_out = {}
for mod_idx in modification.nodes:
name = modification.nodes[mod_idx]['atomname']
mod_atom_name_to_out[name] = mod_to_out[mod_idx]
for loglevel, entries in modification.log_entries.items():
for entry in entries:
graph_out.log_entries[loglevel][entry] += [mod_atom_name_to_out]
return dict(applied_interactions), new_references
[docs]
def attrs_from_node(node, attrs):
"""
Helper function that applies a "replace" operations on the node if
required, and then returns a dict of the attributes listed in `attrs`.
Parameters
----------
node: dict
attrs: collections.abc.Container
Attributes that should be in the output.
Returns
-------
dict
"""
if 'replace' in node:
node = node.copy()
node.update(node['replace'])
return {attr: val for attr, val in node.items() if attr in attrs}
[docs]
def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(), attribute_stash=()):
"""
Creates a new :class:`~vermouth.molecule.Molecule` in force field `to_ff`
from `molecule`, based on `mappings`. It does this by doing a subgraph
isomorphism of all blocks in `mappings` and `molecule`. Will issue warnings
if there's atoms not contributing to the new molecule, or if there's
overlapping blocks.
Node attributes in the new molecule will come from the blocks constructing
it, except for those in `attribute_keep`, which lists the attributes that
will be kept from `molecule`.
Parameters
----------
molecule: :class:`~vermouth.molecule.Molecule`
The molecule to transform.
mappings: dict[str, dict[str, dict[str, tuple]]]
``{ff_name: {ff_name: {block_name: (mapping, weights, extra)}}}``
A collection of mappings, as returned by e.g.
:func:`~vermouth.map_input.read_mapping_directory`.
to_ff: :class:`~vermouth.forcefield.ForceField`
The force field to transform to.
attribute_keep: :class:`~collections.abc.Iterable`
The attributes that will always be transferred from `molecule` to the
produced graph.
attribute_must: :class:`~collections.abc.Iterable`
The attributes that the nodes in the output graph *must* have. If
they're not provided by the mappings/blocks they're taken from
`molecule`.
attribute_stash: tuple[str]
The attributes that will always be transferred from the input molecule
to the produced graph, but prefixed with _old_.Thus they are new attributes
and are not conflicting with already defined attributes.
Returns
-------
:class:`~vermouth.molecule.Molecule`
A new molecule, created by transforming `molecule` to `to_ff` according
to `mappings`.
"""
attribute_keep = tuple(attribute_keep)
attribute_must = tuple(attribute_must)
attribute_stash = tuple(attribute_stash)
# Transferring the meta maybe should be a copy, or a deep copy...
# If it breaks we look at this line.
graph_out = Molecule(force_field=to_ff, meta=molecule.meta)
graph_out.citations.update(molecule.citations)
mappings = build_graph_mapping_collection(molecule.force_field, to_ff, mappings)
block_matches = []
for mapping in mappings:
if mapping.type == 'block':
block_matches.extend(mapping.map(molecule, node_match=_old_atomname_match,
edge_match=edge_matcher))
mod_matches = modification_matches(molecule, mappings)
# Sort by lowest node key per residue. We need to do this, since
# merge_molecule creates new resid's in order.
block_sort_key = lambda x: min(x[0].keys())
# Sort modifications by the lowest mapped index when all touched atoms are
# newly created PTM atoms that do not exist yet. Otherwise, take the
# highest index. If we don't do this we run the risk that a PTM mapped to
# a node with a low index also changes a node of a higher residue, causing
# all sorts of havok.
mod_sort_key = lambda x: (max(x[0].keys())
if any(node_should_exist(x[1], idx)
for idx in
{mod_idx for mol_idx in x[0]
for mod_idx in x[0][mol_idx]})
else
min(x[0].keys()))
block_matches = sorted(block_matches, key=block_sort_key, reverse=True)
mod_matches = sorted(mod_matches, key=mod_sort_key, reverse=True)
# There are a few separate mapping cases to be considered:
# One to one mapping - e.g. AA to AA, the simplest case
# Many to one mapping - e.g. AA to CG without sharing atoms between beads
# Many to many mapping - e.g. AA to CG *with* sharing atoms between beads
# These three cases are covered by the normal operation, the following are
# caught with some additional logic
# None to one - whole block taken as origin, with weights 0
# One to none - unmapped atoms (produces a warning)
# One to many - e.g. CG to AA. This mostly works, but we don't know how to
# make sure the "many" should be connected together. Gives a
# warning if it's disconnected.
mol_to_out = defaultdict(dict)
out_to_mol = defaultdict(dict)
overlapping_mappings = set()
none_to_one_mappings = set()
modified_interactions = {}
all_references = {}
all_matches = []
while block_matches or mod_matches:
# Take the match with the lowest atom id, and prefer blocks over
# modifications
if (not block_matches or
(mod_matches and
mod_sort_key(mod_matches[-1]) < block_sort_key(block_matches[-1]))):
match = mod_matches.pop(-1)
applied_interactions, refs = apply_mod_mapping(match,
molecule, graph_out,
mol_to_out, out_to_mol)
modified_interactions.update(applied_interactions)
else:
match = block_matches.pop(-1)
overlap, none_to_one, refs = apply_block_mapping(match,
molecule, graph_out,
mol_to_out, out_to_mol)
overlapping_mappings.update(overlap)
none_to_one_mappings.update(none_to_one)
all_matches.append(match)
all_references.update(refs)
# At this point, we should have created graph_out at the desired
# resolution, *and* have the associated correspondence in mol_to_out and
# out_to_mol.
# Set node attributes based on what the original atoms are.
to_remove = set()
for out_idx in out_to_mol:
mol_idxs = out_to_mol[out_idx].keys()
# Keep track of what bead comes from where
subgraph = molecule.subgraph(mol_idxs)
graph_out.nodes[out_idx]['graph'] = subgraph
weights = out_to_mol[out_idx]
graph_out.nodes[out_idx]['mapping_weights'] = weights
if out_idx in all_references:
ref_idx = all_references[out_idx]
new_attrs = attrs_from_node(molecule.nodes[ref_idx],
attribute_keep+attribute_must+attribute_stash)
for attr, val in new_attrs.items():
# Attrs in attribute_keep we always transfer, those in
# attribute_must we transfer only if they're not already in the
# created node
if attr in attribute_keep or attr not in graph_out.nodes[out_idx]:
graph_out.nodes[out_idx].update(new_attrs)
if attr in attribute_stash:
graph_out.nodes[out_idx]["_old_"+attr] = val
else:
attrs = defaultdict(list)
for mol_idx in mol_idxs:
new_attrs = attrs_from_node(molecule.nodes[mol_idx],
attribute_keep+attribute_must+attribute_stash)
for attr, val in new_attrs.items():
attrs[attr].append(val)
attrs_not_sane = []
for attr, vals in attrs.items():
if attr in attribute_keep or attr not in graph_out.nodes[out_idx]:
if vals:
graph_out.nodes[out_idx][attr] = vals[0]
else:
# No nodes hat the attribute.
graph_out.nodes[out_idx][attr] = None
if attr in attribute_stash:
if vals:
graph_out.nodes[out_idx]["_old_"+attr] = vals[0]
else:
# No nodes hat the attribute.
graph_out.nodes[out_idx]["_old_"+attr] = None
if not are_all_equal(vals):
attrs_not_sane.append(attr)
if attrs_not_sane:
LOGGER.warning('The attributes {} for atom {} are going to'
' be garbage because the attributes of the'
' constructing atoms are different.',
attrs_not_sane,
format_atom_string(graph_out.nodes[out_idx]),
type='inconsistent-data')
if graph_out.nodes[out_idx].get('atomname', '') is None:
to_remove.add(out_idx)
# We need to add edges between residues. Within residues comes from the
# blocks.
for match1, match2 in combinations(all_matches, 2):
match1 = match1[0]
match2 = match2[0]
edges = molecule.edges_between(match1.keys(), match2.keys())
# TODO: Backmapping needs love here
for mol_idx, mol_jdx in edges:
# Subtract none_to_one_mappings, since those should not be made to
# connect to things automatically.
out_idxs = mol_to_out[mol_idx].keys() - none_to_one_mappings
out_jdxs = mol_to_out[mol_jdx].keys() - none_to_one_mappings
for out_idx, out_jdx in product(out_idxs, out_jdxs):
if out_idx != out_jdx:
graph_out.add_edge(out_idx, out_jdx)
############################
# Sanity check the results #
############################
# "Many to one" mapping - overlapping blocks means dubious node properties
if overlapping_mappings:
LOGGER.warning('These atoms are covered by multiple blocks. This is a '
'bad idea: {}. This probably means the following output'
' particles are wrong: {}.',
{format_atom_string(molecule.nodes[mol_idx])
for mol_idx in overlapping_mappings},
{format_atom_string(graph_out.nodes[out_idx], atomid='')
for mol_idx in overlapping_mappings
for out_idx in mol_to_out[mol_idx]},
type='inconsistent-data')
# "One to many" mapping - not necessarily a problem, unless it leads to
# missing edges
for mol_idx in mol_to_out:
# Subtract the none to one mapped nodes, since those don't contribute
# and make false positives.
out_idxs = mol_to_out[mol_idx].keys() - none_to_one_mappings
if len(out_idxs) > 1 and not nx.is_connected(graph_out.subgraph(out_idxs)):
# In this case there's a single input particle mapping to multiple
# output particles. This probably means there's bonds missing
LOGGER.warning('The input particle {} maps to multiple output '
'particles: {}, which are disconnected. There are '
'probably edges missing.',
format_atom_string(molecule.nodes[mol_idx]),
{format_atom_string(graph_out.nodes[out_idx], atomid='')
for out_idx in out_idxs},
type='inconsistent-data')
# "One to none" mapping - this means your mapping files are incomplete
uncovered_atoms = set(molecule.nodes.keys()) - set(mol_to_out.keys())
if uncovered_atoms:
uncovered_hydrogens = {idx for idx in uncovered_atoms
if molecule.nodes[idx].get('element', '') == 'H'}
if uncovered_hydrogens:
# Maybe this should be info?
LOGGER.debug('These hydrogen atoms are not covered by a mapping.'
' This is not the best idea. {}',
[format_atom_string(molecule.nodes[idx])
for idx in uncovered_hydrogens],
type='unmapped-atom'
)
other_uncovered = uncovered_atoms - uncovered_hydrogens
if other_uncovered:
LOGGER.warning("These atoms are not covered by a mapping. Either"
" your mappings don't describe all atoms (bad idea),"
" or, there's no mapping available for all residues."
" {}",
[format_atom_string(molecule.nodes[idx])
for idx in other_uncovered],
type='unmapped-atom')
for interaction_type in modified_interactions:
for atoms, modifications in modified_interactions[interaction_type].items():
if len(modifications) != 1:
# TODO: better message
LOGGER.warning('Interaction set by multiple modification '
'mappings', type='inconsistent-data')
graph_out.remove_nodes_from(to_remove)
return graph_out
[docs]
class DoMapping(Processor):
"""
Processor for performing a resolution transformation from one force field to
another.
This processor will create new Molecules by stitching together Blocks from
the target force field, as dictated by the available mappings.
Fragments/atoms/residues/modifications for which no mapping is available
will not be represented in the resulting molecule.
The resulting molecules will have intra-block edges and interactions as
specified in the blocks from the target force field. Inter-block edges will
be added based on the connectivity of the original molecule, but no
interactions will be added for those.
Attributes
----------
mappings: dict[str, dict[str, dict[str, tuple]]]
``{ff_name: {ff_name: {block_name: (mapping, weights, extra)}}}``
A collection of mappings, as returned by e.g.
:func:`~vermouth.map_input.read_mapping_directory`.
to_ff: vermouth.forcefield.ForceField
The force field to map to.
delete_unknown: bool
Not currently used
attribute_keep: tuple[str]
The attributes that will always be transferred from the input molecule
to the produced graph.
attribute_must: tuple[str]
The attributes that the nodes in the output graph *must* have. If
they're not provided by the mappings/blocks they're taken from
the original molecule.
attribute_stash: tuple[str]
The attributes that will always be transferred from the input molecule
to the produced graph, but prefixed with _old_.Thus they are new attributes
and are not conflicting with already defined attributes.
See Also
--------
:func:`do_mapping`
"""
def __init__(self, mappings, to_ff, delete_unknown=False, attribute_keep=(),
attribute_must=(), attribute_stash=()):
self.mappings = mappings
self.to_ff = to_ff
self.delete_unknown = delete_unknown
self.attribute_keep = tuple(attribute_keep)
self.attribute_must = tuple(attribute_must)
self.attribute_stash = tuple(attribute_stash)
super().__init__()
[docs]
def run_molecule(self, molecule):
return do_mapping(
molecule,
mappings=self.mappings,
to_ff=self.to_ff,
attribute_keep=self.attribute_keep,
attribute_must=self.attribute_must,
attribute_stash=self.attribute_stash
)
[docs]
def run_system(self, system):
mols = []
for molecule in system.molecules:
try:
new_molecule = self.run_molecule(molecule)
except KeyError as err:
if not self.delete_unknown:
raise err
else:
raise
# TODO: raise a loud warning here
else:
if new_molecule:
mols.append(new_molecule)
system.molecules = mols
system.force_field = self.to_ff