Source code for vermouth.processors.do_links

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2018 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from itertools import combinations
import numbers

import networkx as nx
from numpy import sign

from ..molecule import attributes_match
from .processor import Processor


def _atoms_match(node1, node2):
    # node1 is molecule, node2 is link
    # modifications are named as tuples, e.g. `('C-ter',)`, since mappings can
    # deal with multiple modifications at the same time. Here we build a flat
    # list of relevant modification names, and require that /all/ of these match
    # the link['modifications']
    mods = []
    for mod in node1.get('modifications', []):
        mods.extend(mod.name)

                  # No modifications specified by link: always match
    mods_match = ('modifications' not in node2 or
                  # empty modifications in link and no modifications in molecule: match
                  (not node2['modifications'] and not mods) or
                  # Else, if both specify modifications, then...
                  (node2['modifications'] and mods and
                   # link modifications must be a list, and molecule mods must
                   # match links mods exactly
                   ((isinstance(node2['modifications'], list) and sorted(mods) == sorted(node2['modifications'])) or
                    # Or link mods are a simple string or a Choice, and all
                    # molecule modifications must be accounted for
                    # Here we need to do a little jiggery-pokery to leverage
                    # attributes_match. This probably means that that function
                    # needs to be cut up into smaller pieces.
                    all(attributes_match({'_': modname}, {'_': node2['modifications']}) for modname in mods))))

    return bool(mods_match and attributes_match(node1, node2, ignore_keys=('order', 'replace', 'modifications')))


def _is_valid_non_edges(molecule, link, rev_raw_match):
    for from_node, to_node_attrs in link.non_edges:
        if from_node not in link:
            continue
        from_mol_node_name = rev_raw_match[from_node]
        from_mol = molecule.nodes[from_mol_node_name]
        # from_link = link.nodes[from_node]
        from_resid = from_mol['resid']
        # from_order = from_link.get('order', 0)
        for neighbor in molecule.neighbors(from_mol_node_name):
            to_mol = molecule.nodes[neighbor]
            to_link = to_node_attrs
            to_resid = to_mol['resid']
            to_order = to_link.get('order', 0)
            if to_resid == from_resid + to_order and _atoms_match(to_mol, to_link):
                return False
    return True


def _pattern_match(molecule, atoms, raw_match):
    for link_key, template_attr in atoms:
        molecule_key = raw_match[link_key]
        molecule_attr = molecule.nodes[molecule_key]
        if not _atoms_match(molecule_attr, template_attr):
            return False
    return True


def _any_pattern_match(molecule, patterns, rev_raw_match):
    return any(_pattern_match(molecule, atoms, rev_raw_match) for atoms in patterns)


def _interpret_order(order):
    error_msg = ('"{}" is not a valid value for the "order" node attribute. '
                 'The value must be an integer, a series of + '
                 '(i.e. >, >>, >>>, ...), a series of <, or a series of *.')
    if order is True or order is False:
        # Booleans match the Number abstract base class, so we have to test
        # for them separately.
        raise ValueError(error_msg.format(order))
    elif isinstance(order, numbers.Number):
        if int(order) != float(order):
            # order is a number but not an int (or int-like)
            raise ValueError(error_msg.format(order))
        order_type = 'number'
        order_value = order
    else:
        try:
            first_character = order[0]
        except (TypeError, IndexError, KeyError):
            # order is not an int, nor a sequence (str, list, tuple,...)
            # or it is an empty sequence. Anyway, we cannot work with it.
            raise ValueError(error_msg.format(order))
        if len(set(order)) != 1 or first_character not in '><*':
            # order is a str (or any sequence, we do not really care),
            # but it contains a mixture of characters (e.g. '+-'), or
            # the characters are not among the ones we expect.
            raise ValueError(error_msg.format(order))
        signs = {'>': +1, '<': -1}
        if first_character in signs:
            order_type = '><'
            order_value = signs[first_character] * len(order)
        elif first_character == '*':
            # This could be an 'else', but it would hide bugs if the code
            # above changes.
            order_type = '*'
            order_value = len(order)
    return order_type, order_value


[docs] def match_order(order1, resid1, order2, resid2): r""" Check if two residues match the order constraints. The order can be: an integer It is then the expected distance in resid with a reference residue. a series of > This indicates that the residue must have a larger resid than a reference residue. Multiple atoms with the same number of > are expected to be part of the same residue. The more > are in the serie, the further away the residue is expected to be from the reference, so a residue with >> is expected to have a greater resid than a residue with >. a series of < Same as a series of >, but for smaller resid. a series of * This indicates a different residue than the reference, but without a specified order. As for the > or the <, atoms with the same number of * are expected to be part of the same residue. The comparison matrix can be sumerized as follow, with 0 being the reference residue, n being an integer. In the matrix, a ? means that the result depends on the comparison of the actual numbers, a ! means that the comparison should not be considered, and / means that the resids must be different. The rows correspond to the order at the left of the comparison (order1 argument), while the columns correspond to the order at the right of it (order2 argument). +-----+---+----+---+----+---+---+----+-----+ | | > | >> | < | << | n | 0 | \* | \** | +-----+---+----+---+----+---+---+----+-----+ | > | = | < | > | > | ! | > | ! | ! | +-----+---+----+---+----+---+---+----+-----+ | >> | > | = | > | > | ! | > | ! | ! | +-----+---+----+---+----+---+---+----+-----+ | < | < | < | = | > | ! | < | ! | ! | +-----+---+----+---+----+---+---+----+-----+ | << | < | < | < | = | ! | < | ! | ! | +-----+---+----+---+----+---+---+----+-----+ | n | ! | ! | ! | ! | ? | ? | ! | ! | +-----+---+----+---+----+---+---+----+-----+ | 0 | < | < | > | > | ? | = | / | / | +-----+---+----+---+----+---+---+----+-----+ | \* | ! | ! | ! | ! | ! | / | = | / | +-----+---+----+---+----+---+---+----+-----+ | \** | ! | ! | ! | ! | ! | / | / | = | +-----+---+----+---+----+---+---+----+-----+ Parameters ---------- order1: int or str The order attribute of the residue on the left of the comparison. resid1: int The residue id of the residue on the left of the comparison. order2: int or str The order attribute of the residue on the right of the comparison. resid2: int The residue id of the residue on the right of the comparison. Returns ------- bool `True` if the conditions match. Raises ------ ValueError Raised if the order arguments do not follow the expected format. """ # Validate the order arguments, and format it for what comes next. orders = [] order_types = [] for order in (order1, order2): order_type, order_value = _interpret_order(order) order_types.append(order_type) orders.append(order_value) if order_types[0] == 'number': # Rows n and 0 in the comparison matrix if order_types[1] == 'number': # Columns n, and 0 if (orders[1] - orders[0]) != (resid2 - resid1): return False elif orders[0] == 0: # Row 0 in the comparison matrix if order_types[1] == '><' and sign(resid2 - resid1) != sign(orders[1]): # Columns >, >>, <, and << return False elif order_types[1] == '*' and resid1 == resid2: # Columns *, and ** return False elif order_types[0] == '><': # Rows >, >>, <, and << if (order_types[1] == 'number' and orders[1] == 0 and sign(resid1 - resid2) != sign(orders[0])): # Column 0 return False elif (order_types[1] == '><' and sign(resid2 - resid1) != sign(orders[1] - orders[0])): # Column >, >>, <, and << return False elif order_types[0] == '*': # Rows *, and ** if order_types[1] == 'number' and orders[1] == 0 and resid1 == resid2: # Column 0 return False elif order_types[1] == '*' and ((orders[0] == orders[1]) != (resid1 == resid2)): # Columns *, and ** return False return True
def _build_link_interaction_from(molecule, interaction, match): atoms = tuple(match[idx] for idx in interaction.atoms) parameters = [ param(molecule, match) if callable(param) else param for param in interaction.parameters ] new_interaction = interaction._replace( atoms=atoms, parameters=parameters ) return new_interaction