Source code for vermouth.processors.annotate_idrs

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright 2024 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Provides processors that can add and remove IDR specific bonds
"""

from itertools import chain
from ..dssp.dssp import SS_CG, sequence_from_residues
from ..selectors import is_protein
from .processor import Processor
from ..rcsu.go_utils import _in_chain_and_resid_region
from ..log_helpers import StyleAdapter, get_logger

LOGGER = StyleAdapter(get_logger(__name__))



[docs]
def parse_residues(resspec):
    """
    Parse a residue specification: [<chain>-][<resid_start>]:[<resid_end>] where
    resid is /[0-9]+/.
    Returns a dictionary with keys 'chain', 'resid_start', and 'resid_end' for the
    fields that are specified. Resids will be ints.

    Parameters
    ----------
    resspec: str

    Returns
    -------
    dict

    """
    # <chain>-<resid>
    *chain, resids = resspec.split('-', 1)
    res_start, res_end = resids.split(':', 1)

    out = {}
    if resids:
        out['resids'] = [(int(res_start), int(res_end))]
    if chain:
        out['chain'] = chain[0]
    else:
        out['chain'] = None
    return out



[docs]
def annotate_disorder(molecule, id_regions, annotation="cgidr"):
    """
    Annotate the disordered regions of the molecule

    molecule: :class:`vermouth.molecule.Molecule`
            the molecule
    idr_regions: list
        dictionaries defining the disordered regions to annotate
    annotation: str
        name of the annotation in the node
    """

    for region in id_regions:
        for key, node in molecule.nodes.items():
            _old_resid = node['stash']['resid']
            chain = node['chain']
            # make sure we have the correct chain and are in the right region. If no chain in region assume single chain system.
            if _in_chain_and_resid_region(region, _old_resid, chain):
                molecule.nodes[key][annotation] = True
                if "cgsecstruct" in molecule.nodes[key] and molecule.nodes[key]["cgsecstruct"] != "C":
                        molecule.nodes[key]["cgsecstruct"] = "C"
                        molecule.meta['modified_cgsecstruct'] = True
            else:
                molecule.nodes[key][annotation] = False






[docs]
class AnnotateIDRs(Processor):
    """
    Processor to annotate intrinsically disordered regions of a molecule.

    This processor is designed primarily for the work described in the reference
    M3_GO, but is generally applicable for such circumstances where extra
    addition/removals are necessary.

    """

    def __init__(self, id_regions=None):
        """
        Parameters
        ----------
        id_regions:
            regions defining the IDRs
        """
        self.id_regions = []
        for region in id_regions:
            self.id_regions.append(parse_residues(region))


[docs]
    def run_molecule(self, molecule):
        """
        Assign disordered regions for a single molecule
        """
        annotate_disorder(molecule, self.id_regions)
        return molecule



[docs]
    def run_system(self, system):
        """
        Assign the water bias of the Go model to file. Biasing
        is always molecule specific i.e. no two different
        vermouth molecules can have the same bias.

        Parameters
        ----------
        system: :class:`vermouth.system.System`
        """
        if not self.id_regions:
            return system
        LOGGER.info("Annotating disordered regions.", type="step")
        super().run_system(system)

        if any([molecule.meta.get('modified_cgsecstruct', False) for molecule in system.molecules]):
            supplementary_ss_seq = list(
                chain(
                    *(
                        sequence_from_residues(molecule, "cgsecstruct")
                        for molecule in system.molecules
                        if is_protein(molecule)
                    )
                )
            )

            LOGGER.info(("Secondary structure assignment changed between dssp and martinize. "
                         "Check files for details."), type="general")
            system.meta["header"].extend((
                "The assigned secondary structure conflicted with ",
                "annotated IDRs. The following sequence of Martini secondary ",
                "structure was actually applied to the system:",
                "".join([SS_CG[i] for i in supplementary_ss_seq])
            ))
Source code for vermouth.processors.annotate_idrs

VerMoUTH

Navigation

Related Topics