Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions mypy_stubs/ga4gh/va_spec/acmg_2015.pyi
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from enum import Enum

from pydantic import BaseModel

from .base.core import Method, iriReference, VariantPathogenicityProposition
from ..core.models import MappableConcept
from .base.core import EvidenceLine, Method, Statement, VariantPathogenicityProposition, iriReference

class VariantPathogenicityEvidenceLine(BaseModel):
targetProposition: VariantPathogenicityProposition | None
class VariantPathogenicityEvidenceLine(EvidenceLine):
targetProposition: VariantPathogenicityProposition | None # type: ignore
strengthOfEvidenceProvided: MappableConcept | None
specifiedBy: Method | iriReference | None

Expand Down Expand Up @@ -41,3 +39,13 @@ class VariantPathogenicityEvidenceLine(BaseModel):
BP5 = "BP5"
BP6 = "BP6"
BP7 = "BP7"

class VariantPathogenicityStatement(Statement):
proposition: VariantPathogenicityProposition

class AcmgClassification(str, Enum):
PATHOGENIC = "pathogenic"
BENIGN = "benign"
LIKELY_PATHOGENIC = "likely pathogenic"
LIKELY_BENIGN = "likely benign"
UNCERTAIN_SIGNIFICANCE = "uncertain significance"
3,472 changes: 1,752 additions & 1,720 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[build-system]
requires = ["poetry-core"]
requires = ["setuptools", "poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.poetry]
Expand Down Expand Up @@ -33,7 +33,7 @@ eutils = "~0.6.0"
email_validator = "~2.1.1"
numpy = "~1.26"
httpx = "~0.26.0"
pandas = "~1.4.1"
pandas = ">=2.2.0,<3.0.0"
pydantic = "~2.10.0"
python-dotenv = "~0.20.0"
python-json-logger = "~2.0.7"
Expand Down Expand Up @@ -105,6 +105,10 @@ asyncio_mode = 'strict'
testpaths = "tests/"
pythonpath = "."
norecursedirs = "tests/helpers/"
markers = [
"unit: fast unit tests with isolated dependencies",
"integration: tests that exercise multi-component flows and/or database interactions",
]
# Uncomment the following lines to include application log output in Pytest logs.
# log_cli = true
# log_cli_level = "DEBUG"
Expand Down
17 changes: 3 additions & 14 deletions src/mavedb/lib/annotation/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def mavedb_vrs_agent(version: str) -> Agent:
Create a [VA Agent](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/agent.html)
object for the passed MaveDB VRS mapping version.
"""
if version is None:
raise ValueError("Version cannot be None")

version_at_time_of_variant_generation = Extension(
name="mavedbVrsVersion",
value=version,
Expand All @@ -55,17 +58,3 @@ def mavedb_user_agent(user: User) -> Agent:
agentType="Person",
description=f"MaveDB ORCid authenticated user {user.username}",
)


# XXX: Ideally, this becomes versioned software.
def excalibr_calibration_agent() -> Agent:
"""
Create a [VA Agent](https://va-ga4gh.readthedocs.io/en/latest/core-information-model/entities/agent.html)
object for the ExCALIBR calibration software.
"""
return Agent(
name="ExCALIBR Variant Calibrator",
agentType="Software",
# XXX - version?
description="ExCALIBR variant calibrator, see https://github.com/Dzeiberg/mave_calibration",
)
115 changes: 93 additions & 22 deletions src/mavedb/lib/annotation/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,32 @@
See: https://va-ga4gh.readthedocs.io/en/latest/modeling-foundations/data-structures.html#study-result-structure
- Statement
See: https://va-ga4gh.readthedocs.io/en/latest/modeling-foundations/data-structures.html#statement-structure
- EvidenceLine
See: https://va-ga4gh.readthedocs.io/en/latest/modeling-foundations/data-structures.html#evidence-line-structure
- VariantPathogenicityStatement
See: https://va-spec.ga4gh.org/en/latest/va-standard-profiles/community-profiles/acmg-2015-profiles.html#variant-pathogenicity-statement-acmg-2015
"""

from typing import Optional

from ga4gh.va_spec.acmg_2015 import VariantPathogenicityEvidenceLine
from ga4gh.va_spec.acmg_2015 import VariantPathogenicityStatement
from ga4gh.va_spec.base.core import ExperimentalVariantFunctionalImpactStudyResult, Statement

from mavedb.lib.annotation.classification import functional_classification_of_variant
from mavedb.lib.annotation.evidence_line import acmg_evidence_line, functional_evidence_line
from mavedb.lib.annotation.proposition import (
mapped_variant_to_experimental_variant_clinical_impact_proposition,
mapped_variant_to_experimental_variant_functional_impact_proposition,
)
from mavedb.lib.annotation.statement import mapped_variant_to_functional_statement
from mavedb.lib.annotation.statement import (
mapped_variant_to_functional_statement,
mapped_variant_to_pathogenicity_statement,
)
from mavedb.lib.annotation.study_result import mapped_variant_to_experimental_variant_impact_study_result
from mavedb.lib.annotation.util import (
can_annotate_variant_for_pathogenicity_evidence,
can_annotate_variant_for_functional_statement,
can_annotate_variant_for_pathogenicity_evidence,
score_calibration_may_be_used_for_annotation,
select_strongest_functional_calibration,
select_strongest_pathogenicity_calibration,
)
from mavedb.models.mapped_variant import MappedVariant

Expand All @@ -31,33 +38,97 @@ def variant_study_result(mapped_variant: MappedVariant) -> ExperimentalVariantFu
return mapped_variant_to_experimental_variant_impact_study_result(mapped_variant)


def variant_functional_impact_statement(mapped_variant: MappedVariant) -> Optional[Statement]:
if not can_annotate_variant_for_functional_statement(mapped_variant):
def variant_functional_impact_statement(
mapped_variant: MappedVariant, allow_research_use_only_calibrations: bool = False
) -> Optional[Statement]:
if not can_annotate_variant_for_functional_statement(
mapped_variant, allow_research_use_only_calibrations=allow_research_use_only_calibrations
):
return None

# TODO#494: Add support for multiple functional evidence lines. If a score set has multiple ranges
# associated with it, we should create one evidence line for each range.
study_result = mapped_variant_to_experimental_variant_impact_study_result(mapped_variant)
functional_evidence = functional_evidence_line(mapped_variant, [study_result])
functional_proposition = mapped_variant_to_experimental_variant_functional_impact_proposition(mapped_variant)

return mapped_variant_to_functional_statement(mapped_variant, functional_proposition, [functional_evidence])
# Collect eligible calibrations
eligible_calibrations = []
for score_calibration in mapped_variant.variant.score_set.score_calibrations:
if score_calibration_may_be_used_for_annotation(
score_calibration,
annotation_type="functional",
allow_research_use_only_calibrations=allow_research_use_only_calibrations,
):
eligible_calibrations.append(score_calibration)

# Select the calibration with the strongest evidence
strongest_calibration, strongest_range = select_strongest_functional_calibration(
mapped_variant, eligible_calibrations
)

def variant_pathogenicity_evidence(
mapped_variant: MappedVariant,
) -> Optional[VariantPathogenicityEvidenceLine]:
if not can_annotate_variant_for_pathogenicity_evidence(mapped_variant):
if not strongest_calibration:
return None

study_result = mapped_variant_to_experimental_variant_impact_study_result(mapped_variant)
functional_impact = variant_functional_impact_statement(mapped_variant)
# Get the classification from the strongest range
# If strongest_range is None, the variant is not in any range, so classification will be INDETERMINATE
_, classification = functional_classification_of_variant(mapped_variant, strongest_calibration)

# Build evidence lines for all eligible calibrations
functional_evidence = []
for score_calibration in eligible_calibrations:
functional_evidence.append(functional_evidence_line(mapped_variant, score_calibration, [study_result]))

supporting_evidence = functional_impact if functional_impact else study_result
return mapped_variant_to_functional_statement(
mapped_variant, functional_proposition, functional_evidence, strongest_calibration, classification
)

# TODO#494: Add support for multiple clinical evidence lines. If a score set has multiple calibrations
# associated with it, we should create one evidence line for each calibration.

def variant_pathogenicity_statement(
mapped_variant: MappedVariant, allow_research_use_only_calibrations: bool = False
) -> Optional[VariantPathogenicityStatement]:
if not can_annotate_variant_for_pathogenicity_evidence(
mapped_variant, allow_research_use_only_calibrations=allow_research_use_only_calibrations
):
return None

study_result = mapped_variant_to_experimental_variant_impact_study_result(mapped_variant)
functional_proposition = mapped_variant_to_experimental_variant_functional_impact_proposition(mapped_variant)
clinical_proposition = mapped_variant_to_experimental_variant_clinical_impact_proposition(mapped_variant)
clinical_evidence = acmg_evidence_line(mapped_variant, clinical_proposition, [supporting_evidence.model_dump()])

return clinical_evidence
# Collect eligible calibrations
eligible_calibrations = []
for score_calibration in mapped_variant.variant.score_set.score_calibrations:
if score_calibration_may_be_used_for_annotation(
score_calibration,
annotation_type="pathogenicity",
allow_research_use_only_calibrations=allow_research_use_only_calibrations,
):
eligible_calibrations.append(score_calibration)

# Select the calibration with the strongest evidence
strongest_calibration, strongest_range = select_strongest_pathogenicity_calibration(
mapped_variant, eligible_calibrations
)

if not strongest_calibration:
return None

# Get the classification from the strongest range (used for the functional statement within clinical evidence)
# If strongest_range is None, the variant is not in any range, so classification will be INDETERMINATE
_, classification = functional_classification_of_variant(mapped_variant, strongest_calibration)

# Note: strongest_range is used in the pathogenicity statement for ACMG classification
# If None, the statement will use UNCERTAIN_SIGNIFICANCE

# Build evidence lines for all eligible calibrations
clinical_evidence = []
for score_calibration in eligible_calibrations:
functional_evidence = functional_evidence_line(mapped_variant, score_calibration, [study_result])
functional_statement = mapped_variant_to_functional_statement(
mapped_variant, functional_proposition, [functional_evidence], score_calibration, classification
)
clinical_evidence.append(
acmg_evidence_line(mapped_variant, score_calibration, clinical_proposition, [functional_statement])
)

return mapped_variant_to_pathogenicity_statement(
mapped_variant, clinical_proposition, clinical_evidence, strongest_calibration, strongest_range
)
Loading