Source code for vermouth.processors.annotate_mut_mod

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2018 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Provides a processor that annotates a molecule with desired mutations and
modifications.
"""

from .processor import Processor
from ..log_helpers import StyleAdapter, get_logger
from ..graph_utils import make_residue_graph
from ..selectors import is_protein
LOGGER = StyleAdapter(get_logger(__name__))


[docs] def parse_residue_spec(resspec): """ Parse a residue specification: [<chain>-][<resname>][[#]<resid>] where resid is /[0-9]+/. If resname ends in a number and a resid is also specified, the # separator is required. Returns a dictionary with keys 'chain', 'resname', and 'resid' for the fields that are specified. Resid will be an int. Parameters ---------- resspec: str Returns ------- dict """ # A-LYS2 or PO4#2 # <chain>-<resname><resid> *chain, res = resspec.split('-', 1) res, *resid = res.rsplit('#', 1) if resid: # [] if False resname = res resid = resid[0] else: idx = 0 for idx, char in reversed(list(enumerate(res))): if not char.isdigit(): idx += 1 break resname = res[:idx] resid = res[idx:] out = {} if resid: resid = int(resid) out['resid'] = resid if resname: out['resname'] = resname if chain: out['chain'] = chain[0] return out
def _subdict(dict1, dict2): """True if dict1 <= dict2 All items in dict1 must be in dict2. """ for key, val in dict1.items(): if key not in dict2 or dict2[key] != val: return False return True
[docs] def residue_matches(resspec, residue_graph, res_idx): """ Returns True iff resspec describes residue_graph.nodes[res_idx]. The 'resname's nter and cter match the residues with a degree of 1 and with the lowest and highest residue numbers respectively. Parameters ---------- resspec: dict Attributes that must be present in the residue node. 'resname' is treated specially as described above. residue_graph: networkx.Graph A graph with one node per residue. res_idx: collections.abc.Hashable A node index in residue_graph. Returns ------- bool Whether resspec describes the node res_idx in residue_graph. """ res_node = residue_graph.nodes[res_idx] residue = {key: res_node.get(key) for key in 'chain resid resname insertion_code'.split()} # Find all residues with degree 1: the ones with a resid lower than # their neighbour will be Nter, those with a resid higher than their # neighbour Cter # FIXME: Once residue_graph is a digraph we can do something much much # more clever, addressing arbitrarily branched polymers and # termini if residue_graph.degree[res_idx] == 1 and resspec.get('resname') in ['nter', 'cter']: if not _terminal_matches(resspec.get('resname'), residue_graph, res_idx): return False # Remove resname and resid from the resspec, since they have special meaning # in this case. resspec = resspec.copy() resspec.pop('resname', '') resspec.pop('resid', 0) return _subdict(resspec, residue)
def _terminal_matches(resname, residue_graph, res_idx): """ Tests whether the node res_idx in graph residue_graph is a cter or nter, as determined by resname. Resname must be one of nter or cter. It is assumed that the degree of the specified node is 1. Parameters ---------- resname: str Must be either nter or cter residue_graph: networkx.Graph res_idx: collections.abc.Hashable Key in residue_graph Returns ------- bool If resname is nter, returns True iff the resid of the specified node is lower than that of its neighbour. If resname is cter, returns True iff the resid of the specified node is larger than that of its neighbour. Raises ------ KeyError If resname is neither cter nor nter """ neighbour = list(residue_graph[res_idx])[0] # Only one neighbour by definition. resid = residue_graph.nodes[res_idx].get('resid', 0) neighbour_resid = residue_graph.nodes[neighbour].get('resid', 0) if not is_protein(residue_graph.nodes[res_idx]['graph']): return False if resname == 'nter': return resid < neighbour_resid elif resname == 'cter': return resid > neighbour_resid raise KeyError('Unknown residue name in provided resspec. Found {}, only ' 'cter and nter are known'.format(resname)) def _format_resname(res): """ Provisional function that performs the opposite of parse_residue_spec. Poorly tested, use at own risk. """ chain = res.get('chain', '') out = '' if chain: out += chain + '-' resname = res.get('resname') out += resname if resname and resname[-1].isdigit(): out += '#' out += str(res.get('resid', '')) out += res.get('insertion_code', '') return out def _resiter(mod, residue_graph, resspec, library, key, molecule): """ Iterate over residues to find a specific modification Parameters ---------- mod: str The modification to apply, eg N-ter, C-ter residue_graph: networkx.Graph A graph with one node per residue. resspec: dict Attributes that must be present in the residue node. 'resname' is treated specially as described above. library: dict dictionary of modifications/mutations from the force field key: str from associations molecule: networkx.Graph """ mod_found = False for res_idx in residue_graph: if residue_matches(resspec, residue_graph, res_idx): mod_found = True if mod != 'none' and mod not in library: raise NameError('{} is not known as a {} for ' 'force field {}' ''.format(mod, key, molecule.force_field.name)) res = residue_graph.nodes[res_idx] LOGGER.debug('Annotating {} with {} {}', _format_resname(res), key, mod) for node_idx in res['graph']: molecule.nodes[node_idx][key] = molecule.nodes[node_idx].get(key, []) + [mod] return mod_found
[docs] def annotate_modifications(molecule, modifications, mutations, resspec_counts): """ Annotate nodes in molecule with the desired modifications and mutations Parameters ---------- molecule: networkx.Graph modifications: list[tuple[dict, str]] The modifications to apply. The first element is a dictionary contain the attributes a residue has to fulfill. It can contain the elements 'chain', 'resname' and 'resid'. The second element is the modification that should be applied. mutations: list[tuple[dict, str]] The mutations to apply. The first element is a dictionary contain the attributes a residue has to fulfill. It can contain the elements 'chain', 'resname' and 'resid'. The second element is the mutation that should be applied. resspec_counts: list[dict] List modified in place containing information about whether a modification/mutation has been applied successfully. If the target is found, the dictionary has one entry, {'success': True}. If not, 'success' is False and there are additional items to indicate information about the failure. Raises ------ NameError When a modification is not recognized. """ if not modifications and not mutations: return # We need to do very similar but not quite identical things for # modifications and mutations. Associate them with the correct name and FF # elements. associations = [(modifications, 'modification', molecule.force_field.modifications), (mutations, 'mutation', molecule.force_field.blocks)] residue_graph = make_residue_graph(molecule) # Get the name of the chain in the molecule that we're looking at residue = {key: residue_graph.nodes[0].get(key) for key in 'chain resid resname insertion_code'.split()} chain = residue['chain'] extra = False for mutmod, key, library in associations: for resspec, mod in mutmod: mod_found = _resiter(mod, residue_graph, resspec, library, key, molecule) if not mod_found: #if no mod found, return that there's a problem resspec_counts.append({'success': False, 'mutmod': _format_resname(resspec), 'post': mod,}) extra = True #return that everything's fine by default if not extra: resspec_counts.append({'success': True})
[docs] class AnnotateMutMod(Processor): """ Annotates residues to have the required 'modification' and 'mutation' attributes on all nodes. Attributes ---------- modifications: list[tuple[dict, str]] mutations: list[tuple[dict, str]] See also -------- :func:`annotate_modifications` """ def __init__(self, modifications=None, mutations=None): self.resspec_counts = [] if not modifications: modifications = [] if not mutations: mutations = [] self.modifications = [] for resspec, val in modifications: self.modifications.append((parse_residue_spec(resspec), val)) self.mutations = [] for resspec, val in mutations: self.mutations.append((parse_residue_spec(resspec), val))
[docs] def run_molecule(self, molecule): annotate_modifications(molecule, self.modifications, self.mutations, self.resspec_counts) return molecule
[docs] def run_system(self, system): super().run_system(system) _exit = sum([i['success'] for i in self.resspec_counts]) if _exit == 0: LOGGER.warning('Residue specified by "{}" for mutation "{}" not found', self.resspec_counts[0]['mutmod'], self.resspec_counts[0]['post'])