Source code for charger.csq

from collections import UserDict
from typing import Any, Dict, List, Set

from loguru import logger

logger.disable("charger")  # Disable emit logs by default

ALL_CONSEQUENCE_TYPES: List[str] = [
    "transcript_ablation",
    "splice_acceptor_variant",
    "splice_donor_variant",
    "stop_gained",
    "frameshift_variant",
    "stop_lost",
    "start_lost",
    "transcript_amplification",
    "inframe_insertion",
    "inframe_deletion",
    "missense_variant",
    "protein_altering_variant",
    "splice_region_variant",
    "incomplete_terminal_codon_variant",
    "start_retained_variant",
    "stop_retained_variant",
    "synonymous_variant",
    "coding_sequence_variant",
    "mature_miRNA_variant",
    "5_prime_UTR_variant",
    "3_prime_UTR_variant",
    "non_coding_transcript_exon_variant",
    "intron_variant",
    "NMD_transcript_variant",
    "non_coding_transcript_variant",
    "upstream_gene_variant",
    "downstream_gene_variant",
    "TFBS_ablation",
    "TFBS_amplification",
    "TF_binding_site_variant",
    "regulatory_region_ablation",
    "regulatory_region_amplification",
    "feature_elongation",
    "regulatory_region_variant",
    "feature_truncation",
    "intergenic_variant",
]
"""All the possible consequence types fetched from `Ensembl v99`_ (January 2020).

The consequence types here are ordered by their severeness.

.. _Ensembl v99: https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html
"""

ALL_TRUNCATION_TYPES: List[str] = [
    "transcript_ablation",
    "splice_acceptor_variant",
    "splice_donor_variant",
    "stop_gained",
    "frameshift_variant",
    "start_lost",
]
"""All consequence types considered as a truncation."""

ALL_INFRAME_TYPES: List[str] = [
    "inframe_insertion",
    "inframe_deletion",
    "stop_lost",
]
"""All consequence types considered as to be inframe."""


[docs]class CSQ(UserDict): """ Consequence of a variant. Access each CSQ field like a `dict`. The class is used to set the annotation records in a :class:`~charger.variant.Variant` object. List of CSQ per feature will be stored at :attr:`Variant.parsed_csq <charger.variant.Variant.parsed_csq>`. Examples: >>> csq = variant.parsed_csq[0]; csq CSQ(SYMBOL='FANCM', HGVSc='ENST00000267430.5:c.5101N>T', Consequence='stop_gained', …) >>> list(csq.keys())[:5] ['Allele', 'Consequence', 'IMPACT', 'SYMBOL', 'Gene'] >>> list(csq.values())[:5] ['T', 'stop_gained', 'HIGH', 'FANCM', 'ENSG00000187790'] >>> csq['HGVSc'] 'ENST00000267430.5:c.5101N>T' """ data: Dict[str, Any] __slots__ = [ "data", ] #: Required CSQ fields. Will raise a `ValueError` if any of the fields is #: missing when creating a new CSQ object. REQUIRED_FIELDS: Set[str] = set( [ "Allele", "Consequence", "SYMBOL", "Gene", "Feature_type", "Feature", "BIOTYPE", "HGVSc", "HGVSp", "cDNA_position", "CDS_position", "Protein_position", "Amino_acids", "Codons", "Existing_variation", "STRAND", ] ) def __init__(self, dict=None, **kwargs): super().__init__(dict, **kwargs) missing_fields = self.REQUIRED_FIELDS - set(self.data.keys()) if missing_fields: raise ValueError( f"CSQ misses these required fields: {', '.join(missing_fields)}" ) @property def consequence_types(self) -> List[str]: """Get all the consequence types separated.""" return self.data["Consequence"].split("&")
[docs] def rank_consequence_type(self) -> int: """Rank the severeness of its consequence type (CSQ column ``Consequence``). Severe consequence type has smaller rank (smallest being 0). Ranking is based on the order in :attr:`ALL_CONSEQUENCE_TYPES`. When the CSQ has multiple consequence types separated by ``&``, return the smallest rank of all the types. When the consequence type is not known, return the biggest possible rank + 1. """ ranks: List[int] = [] for ct in self.consequence_types: try: rank = ALL_CONSEQUENCE_TYPES.index(ct) except ValueError: # Assign unknown consequence type to the lowest rank rank = len(ALL_CONSEQUENCE_TYPES) logger.warning( "Got unknown consequence type: {ct}; assign its rank = {rank}", ct=ct, rank=rank, ) ranks.append(rank) return min(ranks)
[docs] def is_truncation_type(self) -> bool: """Whether the consequence type is truncation. See :attr:`ALL_TRUNCATION_TYPES` for the full list of consequence types. """ return any(ct in ALL_TRUNCATION_TYPES for ct in self.consequence_types)
[docs] def is_inframe_type(self) -> bool: """Whether the consequence type is inframe. See :attr:`ALL_INFRAME_TYPES` for the full list of consequence types. """ return any(ct in ALL_INFRAME_TYPES for ct in self.consequence_types)
def __repr__(self): fields = ["SYMBOL", "HGVSc", "Consequence"] details = [] for f in fields: details.append(f"{f}={self.data[f]!r}") return f"CSQ({', '.join(details)}, …)"