Source code for vermouth.processors.annotate_idrs
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2024 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Provides processors that can add and remove IDR specific bonds
"""
from itertools import chain
from ..dssp.dssp import SS_CG, sequence_from_residues
from ..selectors import is_protein
from .processor import Processor
from ..rcsu.go_utils import _in_chain_and_resid_region
from ..log_helpers import StyleAdapter, get_logger
LOGGER = StyleAdapter(get_logger(__name__))
[docs]
def parse_residues(resspec):
"""
Parse a residue specification: [<chain>-][<resid_start>]:[<resid_end>] where
resid is /[0-9]+/.
Returns a dictionary with keys 'chain', 'resid_start', and 'resid_end' for the
fields that are specified. Resids will be ints.
Parameters
----------
resspec: str
Returns
-------
dict
"""
# <chain>-<resid>
*chain, resids = resspec.split('-', 1)
res_start, res_end = resids.split(':', 1)
out = {}
if resids:
out['resids'] = [(int(res_start), int(res_end))]
if chain:
out['chain'] = chain[0]
else:
out['chain'] = None
return out
[docs]
def annotate_disorder(molecule, id_regions, annotation="cgidr"):
"""
Annotate the disordered regions of the molecule
molecule: :class:`vermouth.molecule.Molecule`
the molecule
idr_regions: list
dictionaries defining the disordered regions to annotate
annotation: str
name of the annotation in the node
"""
for region in id_regions:
for key, node in molecule.nodes.items():
_old_resid = node['stash']['resid']
chain = node['chain']
# make sure we have the correct chain and are in the right region. If no chain in region assume single chain system.
if _in_chain_and_resid_region(region, _old_resid, chain):
molecule.nodes[key][annotation] = True
if "cgsecstruct" in molecule.nodes[key] and molecule.nodes[key]["cgsecstruct"] != "C":
molecule.nodes[key]["cgsecstruct"] = "C"
molecule.meta['modified_cgsecstruct'] = True
else:
if not molecule.nodes[key].get(annotation, None):
molecule.nodes[key][annotation] = False
[docs]
class AnnotateIDRs(Processor):
"""
Processor to annotate intrinsically disordered regions of a molecule.
This processor is designed primarily for the work described in the reference
M3_GO, but is generally applicable for such circumstances where extra
addition/removals are necessary.
"""
def __init__(self, id_regions=None):
"""
Parameters
----------
id_regions:
regions defining the IDRs
"""
self.id_regions = []
for region in id_regions:
self.id_regions.append(parse_residues(region))
[docs]
def run_molecule(self, molecule):
"""
Assign disordered regions for a single molecule
"""
annotate_disorder(molecule, self.id_regions)
return molecule
[docs]
def run_system(self, system):
"""
Assign the water bias of the Go model to file. Biasing
is always molecule specific i.e. no two different
vermouth molecules can have the same bias.
Parameters
----------
system: :class:`vermouth.system.System`
"""
if not self.id_regions:
return system
LOGGER.info("Annotating disordered regions.", type="step")
super().run_system(system)
if any([molecule.meta.get('modified_cgsecstruct', False) for molecule in system.molecules]):
supplementary_ss_seq = list(
chain(
*(
sequence_from_residues(molecule, "cgsecstruct")
for molecule in system.molecules
if is_protein(molecule)
)
)
)
LOGGER.info(("Secondary structure assignment changed between dssp and martinize. "
"Check files for details."), type="general")
system.meta["header"].extend((
"The assigned secondary structure conflicted with ",
"annotated IDRs. The following sequence of Martini secondary ",
"structure was actually applied to the system:",
"".join([SS_CG[i] for i in supplementary_ss_seq])
))