Source code for vermouth.processors.annotate_mut_mod

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2018 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Provides a processor that annotates a molecule with desired mutations and
modifications.
"""

from .processor import Processor
from ..log_helpers import StyleAdapter, get_logger
from ..graph_utils import make_residue_graph
from ..selectors import is_protein
LOGGER = StyleAdapter(get_logger(__name__))



[docs]
def parse_residue_spec(resspec):
    """
    Parse a residue specification: [<chain>-][<resname>][[#]<resid>] where
    resid is /[0-9]+/.
    If resname ends in a number and a resid is also specified, the # separator
    is required.
    Returns a dictionary with keys 'chain', 'resname', and 'resid' for the
    fields that are specified. Resid will be an int.

    Parameters
    ----------
    resspec: str

    Returns
    -------
    dict
    """
    # A-LYS2 or PO4#2
    # <chain>-<resname><resid>
    *chain, res = resspec.split('-', 1)
    res, *resid = res.rsplit('#', 1)
    if resid:  # [] if False
        resname = res
        resid = resid[0]
    else:
        idx = 0
        for idx, char in reversed(list(enumerate(res))):
            if not char.isdigit():
                idx += 1
                break
        resname = res[:idx]
        resid = res[idx:]

    out = {}
    if resid:
        resid = int(resid)
        out['resid'] = resid
    if resname:
        out['resname'] = resname
    if chain:
        out['chain'] = chain[0]
    return out



def _subdict(dict1, dict2):
    """True if dict1 <= dict2
    All items in dict1 must be in dict2.
    """
    for key, val in dict1.items():
        if key not in dict2 or dict2[key] != val:
            return False
    return True



[docs]
def residue_matches(resspec, residue_graph, res_idx):
    """
    Returns True iff resspec describes residue_graph.nodes[res_idx]. The
    'resname's nter and cter match the residues with a degree of 1 and
    with the lowest and highest residue numbers respectively.

    Parameters
    ----------
    resspec: dict
        Attributes that must be present in the residue node. 'resname' is
        treated specially as described above.
    residue_graph: networkx.Graph
        A graph with one node per residue.
    res_idx: collections.abc.Hashable
        A node index in residue_graph.

    Returns
    -------
    bool
        Whether resspec describes the node res_idx in residue_graph.
    """
    res_node = residue_graph.nodes[res_idx]
    residue = {key: res_node.get(key)
               for key in 'chain resid resname insertion_code'.split()}
    # Find all residues with degree 1: the ones with a resid lower than
    # their neighbour will be Nter, those with a resid higher than their
    # neighbour Cter
    # FIXME: Once residue_graph is a digraph we can do something much much
    #        more clever, addressing arbitrarily branched polymers and
    #        termini
    if residue_graph.degree[res_idx] == 1 and resspec.get('resname') in ['nter', 'cter']:
        if not _terminal_matches(resspec.get('resname'), residue_graph, res_idx):
            return False
        # Remove resname and resid from the resspec, since they have special meaning
        # in this case.
        resspec = resspec.copy()
        resspec.pop('resname', '')
        resspec.pop('resid', 0)
    return _subdict(resspec, residue)



def _terminal_matches(resname, residue_graph, res_idx):
    """
    Tests whether the node res_idx in graph residue_graph is a cter or nter, as
    determined by resname. Resname must be one of nter or cter.
    It is assumed that the degree of the specified node is 1.

    Parameters
    ----------
    resname: str
        Must be either nter or cter
    residue_graph: networkx.Graph
    res_idx: collections.abc.Hashable
        Key in residue_graph

    Returns
    -------
    bool
        If resname is nter, returns True iff the resid of the specified node is
        lower than that of its neighbour. If resname is cter, returns True iff
        the resid of the specified node is larger than that of its neighbour.

    Raises
    ------
    KeyError
        If resname is neither cter nor nter
    """
    neighbour = list(residue_graph[res_idx])[0]  # Only one neighbour by definition.
    resid = residue_graph.nodes[res_idx].get('resid', 0)
    neighbour_resid = residue_graph.nodes[neighbour].get('resid', 0)
    if not is_protein(residue_graph.nodes[res_idx]['graph']):
        return False
    if resname == 'nter':
        return resid < neighbour_resid
    elif resname == 'cter':
        return resid > neighbour_resid
    raise KeyError('Unknown residue name in provided resspec. Found {}, only '
                   'cter and nter are known'.format(resname))


def _format_resname(res):
    """
    Provisional function that performs the opposite of parse_residue_spec.
    Poorly tested, use at own risk.
    """
    chain = res.get('chain', '')
    out = ''
    if chain:
        out += chain + '-'
    resname = res.get('resname')
    out += resname
    if resname and resname[-1].isdigit():
        out += '#'
    out += str(res.get('resid', ''))
    out += res.get('insertion_code', '')
    return out

def _resiter(mod, residue_graph, resspec, library, key, molecule):
    """
    Iterate over residues to find a specific modification

    Parameters
    ----------
    mod: str
        The modification to apply, eg N-ter, C-ter
    residue_graph: networkx.Graph
        A graph with one node per residue.
    resspec: dict
        Attributes that must be present in the residue node. 'resname' is
        treated specially as described above.
    library: dict
        dictionary of modifications/mutations from the force field
    key: str
        from associations
    molecule: networkx.Graph
    """
    mod_found = False
    for res_idx in residue_graph:
        if residue_matches(resspec, residue_graph, res_idx):
            mod_found = True
            if mod.startswith('+'):
                mod = mod[1:]
                detect_modifications = True
            else:
                detect_modifications = False
            if mod != 'none' and mod not in library:
                raise NameError('{} is not known as a {} for '
                                'force field {}'
                                ''.format(mod, key, molecule.force_field.name))
            res = residue_graph.nodes[res_idx]
            LOGGER.debug('Annotating {} with {} {}',
                         _format_resname(res), key, mod)
            for node_idx in res['graph']:
                molecule.nodes[node_idx][key] = molecule.nodes[node_idx].get(key, []) + [mod]
                if detect_modifications:
                    molecule.nodes[node_idx][key].append('+')
    return mod_found


[docs]
def annotate_modifications(molecule, modifications, mutations, resspec_counts):
    """
    Annotate nodes in molecule with the desired modifications and mutations

    Parameters
    ----------
    molecule: networkx.Graph
    modifications: list[tuple[dict, str]]
        The modifications to apply. The first element is a dictionary contain
        the attributes a residue has to fulfill. It can contain the elements
        'chain', 'resname' and 'resid'. The second element is the modification
        that should be applied.
    mutations: list[tuple[dict, str]]
        The mutations to apply. The first element is a dictionary contain
        the attributes a residue has to fulfill. It can contain the elements
        'chain', 'resname' and 'resid'. The second element is the mutation that
        should be applied.
    resspec_counts: list[dict]
        List modified in place containing information about whether a
        modification/mutation has been applied successfully. If the target is
        found, the dictionary has one entry, {'success': True}. If not,
        'success' is False and there are additional items to indicate information
        about the failure.

    Raises
    ------
    NameError
        When a modification is not recognized.
    """
    if not modifications and not mutations:
        return

    # We need to do very similar but not quite identical things for
    # modifications and mutations. Associate them with the correct name and  FF
    # elements.
    associations = [(modifications, 'annotated_modifications', molecule.force_field.modifications),
                    (mutations, 'mutation', molecule.force_field.blocks)]

    residue_graph = make_residue_graph(molecule)
    # Get the name of the chain in the molecule that we're looking at
    residue = {key: residue_graph.nodes[0].get(key)
               for key in 'chain resid resname insertion_code'.split()}
    for mutmod, key, library in associations:
        for resspec, mod in mutmod:
            extra = False
            mod_found = _resiter(mod, residue_graph, resspec, library, key, molecule)
            if not mod_found:
                #if no mod found, return that there's a problem
                resspec_counts.append({'success': False,
                                       'mutmod': _format_resname(resspec),
                                       'post': mod,})
                extra = True
    #return that everything's fine by default
    if not extra:
        resspec_counts.append({'success': True})



[docs]
class AnnotateMutMod(Processor):
    """
    Annotates residues to have the required 'annotated_modifications' and
    'mutation' attributes on all nodes.

    Attributes
    ----------
    modifications: list[tuple[dict, str]]
    mutations: list[tuple[dict, str]]

    See also
    --------
    :func:`annotate_modifications`
    """
    def __init__(self, modifications=None, mutations=None):
        self.resspec_counts = []
        if not modifications:
            modifications = []
        if not mutations:
            mutations = []
        self.modifications = []
        for resspec, val in modifications:
            self.modifications.append((parse_residue_spec(resspec), val))
        self.mutations = []
        for resspec, val in mutations:
            self.mutations.append((parse_residue_spec(resspec), val))


[docs]
    def run_molecule(self, molecule):
        annotate_modifications(molecule, self.modifications, self.mutations, self.resspec_counts)
        return molecule



[docs]
    def run_system(self, system):
        super().run_system(system)
        _exit = sum([i['success'] for i in self.resspec_counts])
        if _exit == 0:
            LOGGER.warning('Residue specified by "{}" for mutation "{}" not found',
                           self.resspec_counts[0]['mutmod'], self.resspec_counts[0]['post'])
Source code for vermouth.processors.annotate_mut_mod

VerMoUTH

Navigation

Related Topics