import json
import pytest
import ferro_hgvs
class TestParsing:
def test_parse_coding_substitution(self) -> None:
variant = ferro_hgvs.parse("NM_000088.3:c.100A>G")
assert variant.reference == "NM_000088.3"
assert variant.variant_type == "coding"
assert str(variant) == "NM_000088.3:c.100A>G"
def test_parse_genomic_substitution(self) -> None:
variant = ferro_hgvs.parse("NC_000001.11:g.12345A>G")
assert variant.reference == "NC_000001.11"
assert variant.variant_type == "genomic"
def test_parse_protein_substitution(self) -> None:
variant = ferro_hgvs.parse("NP_000079.2:p.Glu6Val")
assert variant.reference == "NP_000079.2"
assert variant.variant_type == "protein"
def test_parse_noncoding_substitution(self) -> None:
variant = ferro_hgvs.parse("NR_046018.2:n.100A>G")
assert variant.reference == "NR_046018.2"
assert variant.variant_type == "non_coding"
def test_parse_invalid_raises_error(self) -> None:
with pytest.raises(ValueError, match="Parse error"):
ferro_hgvs.parse("invalid")
def test_parse_deletion(self) -> None:
variant = ferro_hgvs.parse("NM_000088.3:c.100del")
assert "del" in str(variant)
def test_parse_insertion(self) -> None:
variant = ferro_hgvs.parse("NM_000088.3:c.100_101insATG")
assert "ins" in str(variant)
def test_parse_duplication(self) -> None:
variant = ferro_hgvs.parse("NM_000088.3:c.100dup")
assert "dup" in str(variant)
@pytest.mark.parametrize(
("hgvs", "selector"),
[
("MYSEQ(1):c.100A>G", "1"),
("MY-SEQ(GENE1):c.100A>G", "GENE1"),
("MYREF_SEQ(1):c.100A>G", "1"),
("MYREF_SEQ(1):p.(Arg8Gln)", "1"),
],
)
def test_parse_accepts_gene_selector_on_non_refseq(self, hgvs: str, selector: str) -> None:
variant = ferro_hgvs.parse(hgvs)
body = next(iter(json.loads(variant.to_json()).values()))
assert body["gene_symbol"] == selector
class TestHgvsVariant:
def test_variant_equality(self) -> None:
v1 = ferro_hgvs.parse("NM_000088.3:c.100A>G")
v2 = ferro_hgvs.parse("NM_000088.3:c.100A>G")
assert v1 == v2
def test_variant_hash(self) -> None:
v1 = ferro_hgvs.parse("NM_000088.3:c.100A>G")
v2 = ferro_hgvs.parse("NM_000088.3:c.100A>G")
assert hash(v1) == hash(v2)
variant_set = {v1, v2}
assert len(variant_set) == 1
def test_variant_repr(self) -> None:
variant = ferro_hgvs.parse("NM_000088.3:c.100A>G")
repr_str = repr(variant)
assert "HgvsVariant" in repr_str
assert "NM_000088.3" in repr_str
def test_to_dict(self) -> None:
variant = ferro_hgvs.parse("NM_000088.3:c.100A>G")
d = variant.to_dict()
assert isinstance(d, dict)
assert "reference" in d
assert d["reference"] == "NM_000088.3"
assert d["variant_type"] == "coding"
class TestVersion:
def test_version_exists(self) -> None:
assert hasattr(ferro_hgvs, "__version__")
assert isinstance(ferro_hgvs.__version__, str)
assert len(ferro_hgvs.__version__) > 0
class TestNormalizeIssue160RevcompInvSubspans:
@staticmethod
def _make_reference_json(tmp_path, contig: str, start_1based: int, bases: str) -> str:
seq = ["A"] * max(2000, start_1based + len(bases) + 200)
for i, b in enumerate(bases):
seq[start_1based - 1 + i] = b
payload = {
"transcripts": [],
"proteins": {},
"genomic_sequences": {contig: "".join(seq)},
}
path = tmp_path / "ref.json"
path.write_text(json.dumps(payload))
return str(path)
def test_full_span_revcomp_in_cis_merges_to_inv(self, tmp_path) -> None:
ref = self._make_reference_json(tmp_path, "NC_000001.11", 1092, "GG")
normalizer = ferro_hgvs.Normalizer(reference_json=ref)
result = normalizer.normalize("NC_000001.11:g.[1092G>C;1093G>C]")
assert result == "NC_000001.11:g.1092_1093inv"
def test_sub_span_revcomp_splits_into_inv_plus_sub(self, tmp_path) -> None:
ref = self._make_reference_json(tmp_path, "NC_000001.11", 1150, "TCC")
normalizer = ferro_hgvs.Normalizer(reference_json=ref)
result = normalizer.normalize("NC_000001.11:g.[1150T>G;1151C>A;1152C>G]")
assert result == "NC_000001.11:g.[1150_1151inv;1152C>G]"
def test_user_typed_delins_with_inv_subspan_splits_symmetric(self, tmp_path) -> None:
ref = self._make_reference_json(tmp_path, "NC_000001.11", 1150, "TCC")
normalizer = ferro_hgvs.Normalizer(reference_json=ref)
result = normalizer.normalize("NC_000001.11:g.1150_1152delinsGAG")
assert result == "NC_000001.11:g.[1150_1151inv;1152C>G]"
def test_cds_full_span_revcomp_in_cis_merges_to_inv(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:c.[9G>C;10G>C]")
assert result == "NM_000088.3:c.9_10inv"
def test_cds_sub_span_revcomp_splits_into_inv_plus_sub(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:c.[13C>A;14T>G;15G>T]")
assert result == "NM_000088.3:c.[13_14inv;15G>T]"
def test_rna_full_span_revcomp_in_cis_merges_to_inv(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:r.[9g>c;10g>c]")
assert result == "NM_000088.3:r.9_10inv"
def test_n_user_typed_delins_with_full_span_revcomp(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:n.9_10delinsCC")
assert result == "NM_000088.3:n.9_10inv"
class TestNormalizeMergeConsecutive:
def test_consecutive_subs_collapse_to_delins(self) -> None:
result = ferro_hgvs.normalize("NC_000001.11:g.[1000G>A;1001A>C]")
assert result == "NC_000001.11:g.1000_1001delinsAC"
def test_consecutive_dels_collapse_to_ranged_del(self) -> None:
result = ferro_hgvs.normalize("NC_000001.11:g.[1000del;1001del]")
assert result == "NC_000001.11:g.1000_1001del"
def test_non_adjacent_subs_stay_separate(self) -> None:
result = ferro_hgvs.normalize("NC_000001.11:g.[100G>A;102C>T]")
assert "100G>A" in result
assert "102C>T" in result
assert ";" in result
class TestNormalizeEmptyInsertDelinsToDel:
def test_single_position_empty_delins(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:c.10delins")
assert result == "NM_000088.3:c.10del"
def test_multi_position_empty_delins_with_3prime_shift(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:c.10_11delins")
assert result == "NM_000088.3:c.11_12del"
class TestNormalizeDelinsToInversion:
def test_simple_delins_to_inv(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:c.7_9delinsCTT")
assert result == "NM_000088.3:c.7_9inv"
def test_delins_to_inv_with_outer_shortening(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:c.3_6delinsGGGC")
assert result == "NM_000088.3:c.4_5inv"
def test_delins_one_base_complement_is_substitution(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:c.1delinsT")
assert result == "NM_000088.3:c.1A>T"
class TestNormalizeDelinsSharedAffixTrimming:
def test_delins_shared_suffix_becomes_substitution(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:c.1_4delinsAAGC")
assert result == "NM_000088.3:c.2T>A"
def test_delins_shared_affix_pure_deletion(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:c.1_4delinsAC")
assert result == "NM_000088.3:c.2_3del"
def test_delins_shared_affix_pure_insertion(self) -> None:
result = ferro_hgvs.normalize("NM_000088.3:c.1_3delinsATCG")
assert result == "NM_000088.3:c.2_3insC"