import json
from pathlib import Path
import pytest
import ferro_hgvs
@pytest.fixture
def projector(tmp_path: Path) -> ferro_hgvs.VariantProjector:
fixture = {
"transcripts": [
{
"id": "NM_TEST.1",
"gene_symbol": "TESTGENE",
"strand": "+",
"sequence": "ATGCGCTAA",
"cds_start": 1,
"cds_end": 9,
"exons": [
{
"number": 1,
"start": 1, "end": 9, "genomic_start": 1000, "genomic_end": 1008, }
],
"chromosome": "chr1",
"genomic_start": 1000,
"genomic_end": 1008,
}
],
"genomic_sequences": {
"chr1": "N" * 1000 + "ATGCGCTAA" + "N" * 100,
},
}
path = tmp_path / "transcripts.json"
path.write_text(json.dumps(fixture))
return ferro_hgvs.VariantProjector(reference_json=str(path))
class TestVariantProjector:
def test_missense_substitution_plus_strand(
self, projector: ferro_hgvs.VariantProjector
) -> None:
result = projector.project("NC_000001.11:g.1003C>A", transcript="NM_TEST.1")
assert result.transcript_id == "NM_TEST.1"
assert result.gene_symbol == "TESTGENE"
assert result.c_name is not None
assert ":c.4C>A" in result.c_name
assert result.p_name == "NP_TEST.1(TESTGENE):p.(Arg2Ser)"
assert result.is_frameshift is False
assert result.is_intronic is False
assert result.is_utr is False
def test_deletion_frameshift_protein(self, projector: ferro_hgvs.VariantProjector) -> None:
result = projector.project("NC_000001.11:g.1003del", transcript="NM_TEST.1")
assert result.c_name is not None
assert "del" in result.c_name
assert result.p_name is not None
assert "fs" in result.p_name
assert result.is_frameshift is True
def test_unknown_transcript_raises(self, projector: ferro_hgvs.VariantProjector) -> None:
with pytest.raises(RuntimeError):
projector.project("NC_000001.11:g.1003C>A", transcript="NM_NOPE.99")
def test_no_overlap_raises(self, projector: ferro_hgvs.VariantProjector) -> None:
with pytest.raises(RuntimeError, match="overlap"):
projector.project("NC_000001.11:g.5000A>G", transcript="NM_TEST.1")
def test_repr_includes_g_name(self, projector: ferro_hgvs.VariantProjector) -> None:
result = projector.project("NC_000001.11:g.1003C>A", transcript="NM_TEST.1")
rep = repr(result)
assert "VariantProjection" in rep
assert ":g." in rep or ":c." in rep
@pytest.fixture
def long_projector(tmp_path: Path) -> ferro_hgvs.VariantProjector:
cds_seq = "ATGCGCAAAGGGTTTTAA" full_seq = "AAA" + cds_seq + "CCC" fixture = {
"transcripts": [
{
"id": "NM_LONG.1",
"gene_symbol": "LONGGENE",
"strand": "+",
"sequence": full_seq,
"cds_start": 4, "cds_end": 21, "exons": [
{
"number": 1,
"start": 1, "end": 24, "genomic_start": 2000, "genomic_end": 2023, }
],
"chromosome": "chr2",
"genomic_start": 2000,
"genomic_end": 2023,
}
],
"genomic_sequences": {
"chr2": "N" * 2000 + full_seq + "N" * 100,
},
}
path = tmp_path / "long_transcripts.json"
path.write_text(json.dumps(fixture))
return ferro_hgvs.VariantProjector(reference_json=str(path))
class TestIndelProteinNomenclature:
def test_del_whole_codon_single_aa(self, long_projector: ferro_hgvs.VariantProjector) -> None:
result = long_projector.project("NC_000002.12:g.2006_2008del", transcript="NM_LONG.1")
assert result.c_name is not None
assert "del" in result.c_name
assert result.p_name == "NP_LONG.1(LONGGENE):p.(Arg2del)"
assert result.is_frameshift is False
def test_del_two_whole_codons(self, long_projector: ferro_hgvs.VariantProjector) -> None:
result = long_projector.project("NC_000002.12:g.2006_2011del", transcript="NM_LONG.1")
assert result.c_name is not None
assert result.p_name == "NP_LONG.1(LONGGENE):p.(Arg2_Lys3del)"
assert result.is_frameshift is False
def test_del_single_base_frameshift(self, long_projector: ferro_hgvs.VariantProjector) -> None:
result = long_projector.project("NC_000002.12:g.2006del", transcript="NM_LONG.1")
assert result.p_name is not None
assert "fs" in result.p_name
assert "Arg2" in result.p_name
assert result.is_frameshift is True
def test_dup_whole_codon(self, long_projector: ferro_hgvs.VariantProjector) -> None:
result = long_projector.project("NC_000002.12:g.2006_2008dup", transcript="NM_LONG.1")
assert result.c_name is not None
assert "dup" in result.c_name
assert result.p_name == "NP_LONG.1(LONGGENE):p.(Arg2dup)"
assert result.is_frameshift is False
def test_dup_single_base_frameshift(self, long_projector: ferro_hgvs.VariantProjector) -> None:
result = long_projector.project("NC_000002.12:g.2006dup", transcript="NM_LONG.1")
assert result.p_name is not None
assert "fs" in result.p_name
assert result.is_frameshift is True
def test_ins_three_bases_codon_boundary(
self, long_projector: ferro_hgvs.VariantProjector
) -> None:
result = long_projector.project("NC_000002.12:g.2005_2006insGGG", transcript="NM_LONG.1")
assert result.c_name is not None
assert "ins" in result.c_name
assert result.p_name is not None
assert "ins" in result.p_name
assert result.is_frameshift is False
def test_ins_single_base_frameshift(self, long_projector: ferro_hgvs.VariantProjector) -> None:
result = long_projector.project("NC_000002.12:g.2005_2006insA", transcript="NM_LONG.1")
assert result.p_name is not None
assert "fs" in result.p_name
assert result.is_frameshift is True
def test_inversion_whole_codon(self, long_projector: ferro_hgvs.VariantProjector) -> None:
result = long_projector.project("NC_000002.12:g.2006_2008inv", transcript="NM_LONG.1")
assert result.c_name is not None
assert "inv" in result.c_name
assert result.p_name is not None
assert "Arg2" in result.p_name
assert "delins" in result.p_name
assert "Ala" in result.p_name
assert result.is_frameshift is False
def test_stop_codon_readthrough(self, long_projector: ferro_hgvs.VariantProjector) -> None:
result = long_projector.project("NC_000002.12:g.2018_2020delinsTGG", transcript="NM_LONG.1")
assert result.p_name is not None
assert "Ter6" in result.p_name
assert "ext" in result.p_name
@pytest.fixture
def two_tx_projector(tmp_path: Path) -> ferro_hgvs.VariantProjector:
fixture = {
"transcripts": [
{
"id": "NM_TX1.1",
"gene_symbol": "GENE1",
"strand": "+",
"sequence": "ATGCGCTAA",
"cds_start": 1,
"cds_end": 9,
"exons": [
{
"number": 1,
"start": 1,
"end": 9,
"genomic_start": 1000,
"genomic_end": 1008,
}
],
"chromosome": "chr1",
"genomic_start": 1000,
"genomic_end": 1008,
},
{
"id": "NM_TX2.1",
"gene_symbol": "GENE1",
"strand": "+",
"sequence": "ATGCGCTAA",
"cds_start": 1,
"cds_end": 9,
"exons": [
{
"number": 1,
"start": 1,
"end": 9,
"genomic_start": 1000,
"genomic_end": 1008,
}
],
"chromosome": "chr1",
"genomic_start": 1000,
"genomic_end": 1008,
},
],
"genomic_sequences": {
"chr1": "N" * 1000 + "ATGCGCTAA" + "N" * 100,
},
}
path = tmp_path / "two_tx.json"
path.write_text(json.dumps(fixture))
return ferro_hgvs.VariantProjector(reference_json=str(path))
class TestProjectAll:
def test_project_all_returns_both_transcripts(
self, two_tx_projector: ferro_hgvs.VariantProjector
) -> None:
results = two_tx_projector.project_all("chr1:g.1003C>A")
assert len(results) == 2, f"expected 2 projections, got {len(results)}"
def test_project_all_no_overlap_returns_empty(
self, two_tx_projector: ferro_hgvs.VariantProjector
) -> None:
results = two_tx_projector.project_all("NC_000001.11:g.5000A>G")
assert results == [], "expected empty list for non-overlapping position"
def test_project_all_projections_have_c_name(
self, two_tx_projector: ferro_hgvs.VariantProjector
) -> None:
results = two_tx_projector.project_all("chr1:g.1003C>A")
for r in results:
assert r.c_name is not None
assert ":c." in r.c_name
def test_project_normalized_all_same_result_as_project_all(
self, two_tx_projector: ferro_hgvs.VariantProjector
) -> None:
hgvs_string = "chr1:g.1003C>A"
via_all = two_tx_projector.project_all(hgvs_string)
variant = ferro_hgvs.parse(hgvs_string)
via_normalized_all = two_tx_projector.project_normalized_all(variant)
assert len(via_all) == len(via_normalized_all)
for a, b in zip(via_all, via_normalized_all, strict=True):
assert a.transcript_id == b.transcript_id
assert a.c_name == b.c_name
class TestProjectNormalized:
def test_project_normalized_matches_project(
self, projector: ferro_hgvs.VariantProjector
) -> None:
hgvs_string = "NC_000001.11:g.1003C>A"
via_project = projector.project(hgvs_string, "NM_TEST.1")
variant = ferro_hgvs.parse(hgvs_string)
via_normalized = projector.project_normalized(variant, "NM_TEST.1")
assert via_project.c_name == via_normalized.c_name
assert via_project.p_name == via_normalized.p_name
assert via_project.transcript_id == via_normalized.transcript_id
def test_project_normalized_returns_variant_projection(
self, projector: ferro_hgvs.VariantProjector
) -> None:
variant = ferro_hgvs.parse("NC_000001.11:g.1003C>A")
result = projector.project_normalized(variant, "NM_TEST.1")
assert isinstance(result, ferro_hgvs.VariantProjection)
assert result.c_name is not None