Skip to main content

ferro_hgvs/
python_helpers.rs

1//! Helper functions for Python bindings
2//!
3//! These functions are separated from the PyO3 code so they can be unit tested
4//! without requiring the Python runtime.
5
6use crate::hgvs::edit::NaEdit;
7use crate::hgvs::uncertainty::Mu;
8use crate::hgvs::variant::HgvsVariant;
9use crate::normalize::ShuffleDirection;
10
11/// Get the variant type as a string
12///
13/// Returns a human-readable string describing the type of HGVS variant.
14///
15/// # Examples
16///
17/// ```
18/// use ferro_hgvs::python_helpers::variant_type_str;
19/// use ferro_hgvs::parse_hgvs;
20///
21/// let variant = parse_hgvs("NC_000001.11:g.12345A>G").unwrap();
22/// assert_eq!(variant_type_str(&variant), "genomic");
23///
24/// let variant = parse_hgvs("NM_000088.3:c.100A>G").unwrap();
25/// assert_eq!(variant_type_str(&variant), "coding");
26/// ```
27pub fn variant_type_str(variant: &HgvsVariant) -> &'static str {
28    match variant {
29        HgvsVariant::Genome(_) => "genomic",
30        HgvsVariant::Cds(_) => "coding",
31        HgvsVariant::Tx(_) => "non_coding",
32        HgvsVariant::Protein(_) => "protein",
33        HgvsVariant::Rna(_) => "rna",
34        HgvsVariant::Mt(_) => "mitochondrial",
35        HgvsVariant::Circular(_) => "circular",
36        HgvsVariant::RnaFusion(_) => "rna_fusion",
37        HgvsVariant::Allele(_) => "allele",
38        HgvsVariant::NullAllele => "null_allele",
39        HgvsVariant::UnknownAllele => "unknown_allele",
40    }
41}
42
43/// Get the edit type as a string from an NaEdit
44///
45/// Returns a human-readable string describing the type of nucleic acid edit.
46///
47/// # Examples
48///
49/// ```
50/// use ferro_hgvs::python_helpers::na_edit_type_str;
51/// use ferro_hgvs::hgvs::edit::{NaEdit, Base};
52///
53/// let edit = NaEdit::Substitution { reference: Base::A, alternative: Base::G };
54/// assert_eq!(na_edit_type_str(&edit), "substitution");
55/// ```
56pub fn na_edit_type_str(edit: &NaEdit) -> &'static str {
57    match edit {
58        NaEdit::Substitution { .. } => "substitution",
59        NaEdit::SubstitutionNoRef { .. } => "substitution",
60        NaEdit::Deletion { .. } => "deletion",
61        NaEdit::Duplication { .. } => "duplication",
62        NaEdit::DupIns { .. } => "dupins",
63        NaEdit::Insertion { .. } => "insertion",
64        NaEdit::Delins { .. } => "delins",
65        NaEdit::Inversion { .. } => "inversion",
66        NaEdit::Repeat { .. } => "repeat",
67        NaEdit::MultiRepeat { .. } => "multi_repeat",
68        NaEdit::Identity { .. } => "identity",
69        NaEdit::Conversion { .. } => "conversion",
70        NaEdit::Unknown { .. } => "unknown",
71        NaEdit::Methylation { .. } => "methylation",
72        NaEdit::CopyNumber { .. } => "copy_number",
73        NaEdit::Splice => "splice",
74        NaEdit::NoProduct => "no_product",
75        NaEdit::PositionOnly => "position_only",
76    }
77}
78
79/// Get the edit type from debug string representation
80///
81/// This is a fallback function that determines the edit type by inspecting
82/// the Debug representation of an edit. Use `na_edit_type_str` when possible.
83pub fn edit_type_from_debug<T: std::fmt::Debug>(edit: &T) -> &'static str {
84    let debug = format!("{:?}", edit);
85    if debug.contains("Substitution") {
86        "substitution"
87    } else if debug.contains("Deletion") {
88        "deletion"
89    } else if debug.contains("DupIns") {
90        "dupins"
91    } else if debug.contains("Duplication") {
92        "duplication"
93    } else if debug.contains("Insertion") {
94        "insertion"
95    } else if debug.contains("Delins") {
96        "delins"
97    } else if debug.contains("Inversion") {
98        "inversion"
99    } else if debug.contains("Repeat") {
100        "repeat"
101    } else if debug.contains("Identity") {
102        "identity"
103    } else if debug.contains("Conversion") {
104        "conversion"
105    } else if debug.contains("Methylation") {
106        "methylation"
107    } else if debug.contains("CopyNumber") {
108        "copy_number"
109    } else {
110        "unknown"
111    }
112}
113
114/// Parse a direction string into ShuffleDirection
115///
116/// Accepts various common formats:
117/// - 3prime, 3', 3 -> ThreePrime (default)
118/// - 5prime, 5', 5 -> FivePrime
119///
120/// # Examples
121///
122/// ```
123/// use ferro_hgvs::python_helpers::parse_direction;
124/// use ferro_hgvs::ShuffleDirection;
125///
126/// assert!(matches!(parse_direction("3prime"), ShuffleDirection::ThreePrime));
127/// assert!(matches!(parse_direction("5'"), ShuffleDirection::FivePrime));
128/// ```
129pub fn parse_direction(direction: &str) -> ShuffleDirection {
130    match direction.to_lowercase().as_str() {
131        "5prime" | "5'" | "5" => ShuffleDirection::FivePrime,
132        _ => ShuffleDirection::ThreePrime,
133    }
134}
135
136/// Get the reference accession from a variant
137///
138/// Returns the accession string for variants that have one, or an error message
139/// for variants that don't support accessions.
140///
141/// # Examples
142///
143/// ```
144/// use ferro_hgvs::python_helpers::get_variant_reference;
145/// use ferro_hgvs::parse_hgvs;
146///
147/// let variant = parse_hgvs("NC_000001.11:g.12345A>G").unwrap();
148/// assert_eq!(get_variant_reference(&variant).unwrap(), "NC_000001.11");
149/// ```
150pub fn get_variant_reference(variant: &HgvsVariant) -> Result<String, &'static str> {
151    match variant {
152        HgvsVariant::Genome(v) => Ok(v.accession.to_string()),
153        HgvsVariant::Cds(v) => Ok(v.accession.to_string()),
154        HgvsVariant::Tx(v) => Ok(v.accession.to_string()),
155        HgvsVariant::Protein(v) => Ok(v.accession.to_string()),
156        HgvsVariant::Rna(v) => Ok(v.accession.to_string()),
157        HgvsVariant::Mt(v) => Ok(v.accession.to_string()),
158        HgvsVariant::Circular(v) => Ok(v.accession.to_string()),
159        HgvsVariant::RnaFusion(v) => Ok(v.five_prime.accession.to_string()),
160        HgvsVariant::Allele(a) => {
161            if let Some(first) = a.variants.first() {
162                get_variant_reference(first)
163            } else {
164                Err("Empty allele")
165            }
166        }
167        HgvsVariant::NullAllele | HgvsVariant::UnknownAllele => {
168            Err("No reference for null/unknown allele")
169        }
170    }
171}
172
173/// Get the edit type from a Mu-wrapped NaEdit
174///
175/// Handles the uncertainty wrapper, returning "unknown" for Mu::Unknown.
176pub fn mu_na_edit_type_str(edit: &Mu<NaEdit>) -> &'static str {
177    match edit.inner() {
178        Some(e) => na_edit_type_str(e),
179        None => "unknown",
180    }
181}
182
183/// Get the edit type string for any HgvsVariant
184///
185/// Returns a string describing the type of edit in the variant.
186pub fn get_variant_edit_type(variant: &HgvsVariant) -> &'static str {
187    match variant {
188        HgvsVariant::Genome(v) => mu_na_edit_type_str(&v.loc_edit.edit),
189        HgvsVariant::Cds(v) => mu_na_edit_type_str(&v.loc_edit.edit),
190        HgvsVariant::Tx(v) => mu_na_edit_type_str(&v.loc_edit.edit),
191        HgvsVariant::Rna(v) => mu_na_edit_type_str(&v.loc_edit.edit),
192        HgvsVariant::Mt(v) => mu_na_edit_type_str(&v.loc_edit.edit),
193        HgvsVariant::Circular(v) => mu_na_edit_type_str(&v.loc_edit.edit),
194        HgvsVariant::Protein(_) => "protein",
195        HgvsVariant::RnaFusion(_) => "fusion",
196        HgvsVariant::Allele(_) => "allele",
197        HgvsVariant::NullAllele => "null",
198        HgvsVariant::UnknownAllele => "unknown",
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205    use crate::hgvs::edit::{Base, MethylationStatus};
206    use crate::parse_hgvs;
207
208    // ===== variant_type_str Tests =====
209
210    #[test]
211    fn test_variant_type_str_genomic() {
212        let variant = parse_hgvs("NC_000001.11:g.12345A>G").unwrap();
213        assert_eq!(variant_type_str(&variant), "genomic");
214    }
215
216    #[test]
217    fn test_variant_type_str_coding() {
218        let variant = parse_hgvs("NM_000088.3:c.100A>G").unwrap();
219        assert_eq!(variant_type_str(&variant), "coding");
220    }
221
222    #[test]
223    fn test_variant_type_str_non_coding() {
224        let variant = parse_hgvs("NR_000001.1:n.100A>G").unwrap();
225        assert_eq!(variant_type_str(&variant), "non_coding");
226    }
227
228    #[test]
229    fn test_variant_type_str_protein() {
230        let variant = parse_hgvs("NP_000001.1:p.Val100Glu").unwrap();
231        assert_eq!(variant_type_str(&variant), "protein");
232    }
233
234    #[test]
235    fn test_variant_type_str_rna() {
236        let variant = parse_hgvs("NM_000088.3:r.100a>g").unwrap();
237        assert_eq!(variant_type_str(&variant), "rna");
238    }
239
240    #[test]
241    fn test_variant_type_str_mitochondrial() {
242        let variant = parse_hgvs("NC_012920.1:m.100A>G").unwrap();
243        assert_eq!(variant_type_str(&variant), "mitochondrial");
244    }
245
246    #[test]
247    fn test_variant_type_str_circular() {
248        let variant = parse_hgvs("NC_001416.1:o.100A>G").unwrap();
249        assert_eq!(variant_type_str(&variant), "circular");
250    }
251
252    #[test]
253    fn test_variant_type_str_null_allele() {
254        let variant = HgvsVariant::NullAllele;
255        assert_eq!(variant_type_str(&variant), "null_allele");
256    }
257
258    #[test]
259    fn test_variant_type_str_unknown_allele() {
260        let variant = HgvsVariant::UnknownAllele;
261        assert_eq!(variant_type_str(&variant), "unknown_allele");
262    }
263
264    // ===== na_edit_type_str Tests =====
265
266    #[test]
267    fn test_na_edit_type_str_substitution() {
268        let edit = NaEdit::Substitution {
269            reference: Base::A,
270            alternative: Base::G,
271        };
272        assert_eq!(na_edit_type_str(&edit), "substitution");
273    }
274
275    #[test]
276    fn test_na_edit_type_str_deletion() {
277        let edit = NaEdit::Deletion {
278            sequence: None,
279            length: None,
280        };
281        assert_eq!(na_edit_type_str(&edit), "deletion");
282    }
283
284    #[test]
285    fn test_na_edit_type_str_duplication() {
286        let edit = NaEdit::Duplication {
287            sequence: None,
288            length: None,
289            uncertain_extent: None,
290        };
291        assert_eq!(na_edit_type_str(&edit), "duplication");
292    }
293
294    #[test]
295    fn test_na_edit_type_str_insertion() {
296        use crate::hgvs::edit::{InsertedSequence, Sequence};
297        use std::str::FromStr;
298        let edit = NaEdit::Insertion {
299            sequence: InsertedSequence::Literal(Sequence::from_str("ATG").unwrap()),
300        };
301        assert_eq!(na_edit_type_str(&edit), "insertion");
302    }
303
304    #[test]
305    fn test_na_edit_type_str_delins() {
306        use crate::hgvs::edit::{InsertedSequence, Sequence};
307        use std::str::FromStr;
308        let edit = NaEdit::Delins {
309            sequence: InsertedSequence::Literal(Sequence::from_str("ATG").unwrap()),
310        };
311        assert_eq!(na_edit_type_str(&edit), "delins");
312    }
313
314    #[test]
315    fn test_na_edit_type_str_inversion() {
316        let edit = NaEdit::Inversion {
317            sequence: None,
318            length: None,
319        };
320        assert_eq!(na_edit_type_str(&edit), "inversion");
321    }
322
323    #[test]
324    fn test_na_edit_type_str_repeat() {
325        use crate::hgvs::edit::RepeatCount;
326        let edit = NaEdit::Repeat {
327            sequence: None,
328            count: RepeatCount::Exact(10),
329            additional_counts: Vec::new(),
330            trailing: None,
331        };
332        assert_eq!(na_edit_type_str(&edit), "repeat");
333    }
334
335    #[test]
336    fn test_na_edit_type_str_identity() {
337        let edit = NaEdit::Identity {
338            sequence: None,
339            whole_entity: false,
340        };
341        assert_eq!(na_edit_type_str(&edit), "identity");
342    }
343
344    #[test]
345    fn test_na_edit_type_str_unknown() {
346        let edit = NaEdit::Unknown {
347            whole_entity: false,
348        };
349        assert_eq!(na_edit_type_str(&edit), "unknown");
350    }
351
352    #[test]
353    fn test_na_edit_type_str_methylation() {
354        let edit = NaEdit::Methylation {
355            status: MethylationStatus::GainOfMethylation,
356        };
357        assert_eq!(na_edit_type_str(&edit), "methylation");
358    }
359
360    // ===== edit_type_from_debug Tests =====
361
362    #[test]
363    fn test_edit_type_from_debug_substitution() {
364        #[derive(Debug)]
365        struct TestSubstitution;
366        assert_eq!(edit_type_from_debug(&TestSubstitution), "substitution");
367    }
368
369    #[test]
370    fn test_edit_type_from_debug_deletion() {
371        #[derive(Debug)]
372        struct TestDeletion;
373        assert_eq!(edit_type_from_debug(&TestDeletion), "deletion");
374    }
375
376    #[test]
377    fn test_edit_type_from_debug_unknown() {
378        #[derive(Debug)]
379        struct TestOther;
380        assert_eq!(edit_type_from_debug(&TestOther), "unknown");
381    }
382
383    // ===== parse_direction Tests =====
384
385    #[test]
386    fn test_parse_direction_three_prime() {
387        assert!(matches!(
388            parse_direction("3prime"),
389            ShuffleDirection::ThreePrime
390        ));
391        assert!(matches!(
392            parse_direction("3'"),
393            ShuffleDirection::ThreePrime
394        ));
395        assert!(matches!(parse_direction("3"), ShuffleDirection::ThreePrime));
396    }
397
398    #[test]
399    fn test_parse_direction_five_prime() {
400        assert!(matches!(
401            parse_direction("5prime"),
402            ShuffleDirection::FivePrime
403        ));
404        assert!(matches!(parse_direction("5'"), ShuffleDirection::FivePrime));
405        assert!(matches!(parse_direction("5"), ShuffleDirection::FivePrime));
406    }
407
408    #[test]
409    fn test_parse_direction_default() {
410        assert!(matches!(
411            parse_direction("unknown"),
412            ShuffleDirection::ThreePrime
413        ));
414        assert!(matches!(parse_direction(""), ShuffleDirection::ThreePrime));
415    }
416
417    #[test]
418    fn test_parse_direction_case_insensitive() {
419        assert!(matches!(
420            parse_direction("5PRIME"),
421            ShuffleDirection::FivePrime
422        ));
423        assert!(matches!(
424            parse_direction("5Prime"),
425            ShuffleDirection::FivePrime
426        ));
427    }
428
429    // ===== get_variant_reference Tests =====
430
431    #[test]
432    fn test_get_variant_reference_genomic() {
433        let variant = parse_hgvs("NC_000001.11:g.12345A>G").unwrap();
434        assert_eq!(get_variant_reference(&variant).unwrap(), "NC_000001.11");
435    }
436
437    #[test]
438    fn test_get_variant_reference_coding() {
439        let variant = parse_hgvs("NM_000088.3:c.100A>G").unwrap();
440        assert_eq!(get_variant_reference(&variant).unwrap(), "NM_000088.3");
441    }
442
443    #[test]
444    fn test_get_variant_reference_protein() {
445        let variant = parse_hgvs("NP_000001.1:p.Val100Glu").unwrap();
446        assert_eq!(get_variant_reference(&variant).unwrap(), "NP_000001.1");
447    }
448
449    #[test]
450    fn test_get_variant_reference_null_allele() {
451        let variant = HgvsVariant::NullAllele;
452        assert!(get_variant_reference(&variant).is_err());
453    }
454
455    #[test]
456    fn test_get_variant_reference_unknown_allele() {
457        let variant = HgvsVariant::UnknownAllele;
458        assert!(get_variant_reference(&variant).is_err());
459    }
460
461    // ===== get_variant_edit_type Tests =====
462
463    #[test]
464    fn test_get_variant_edit_type_substitution() {
465        let variant = parse_hgvs("NC_000001.11:g.12345A>G").unwrap();
466        assert_eq!(get_variant_edit_type(&variant), "substitution");
467    }
468
469    #[test]
470    fn test_get_variant_edit_type_deletion() {
471        let variant = parse_hgvs("NC_000001.11:g.12345del").unwrap();
472        assert_eq!(get_variant_edit_type(&variant), "deletion");
473    }
474
475    #[test]
476    fn test_get_variant_edit_type_duplication() {
477        let variant = parse_hgvs("NC_000001.11:g.12345dup").unwrap();
478        assert_eq!(get_variant_edit_type(&variant), "duplication");
479    }
480
481    #[test]
482    fn test_get_variant_edit_type_insertion() {
483        let variant = parse_hgvs("NC_000001.11:g.12345_12346insATG").unwrap();
484        assert_eq!(get_variant_edit_type(&variant), "insertion");
485    }
486
487    #[test]
488    fn test_get_variant_edit_type_delins() {
489        let variant = parse_hgvs("NC_000001.11:g.12345_12350delinsATG").unwrap();
490        assert_eq!(get_variant_edit_type(&variant), "delins");
491    }
492
493    #[test]
494    fn test_get_variant_edit_type_inversion() {
495        let variant = parse_hgvs("NC_000001.11:g.12345_12350inv").unwrap();
496        assert_eq!(get_variant_edit_type(&variant), "inversion");
497    }
498
499    #[test]
500    fn test_get_variant_edit_type_protein() {
501        let variant = parse_hgvs("NP_000001.1:p.Val100Glu").unwrap();
502        assert_eq!(get_variant_edit_type(&variant), "protein");
503    }
504
505    #[test]
506    fn test_get_variant_edit_type_null() {
507        let variant = HgvsVariant::NullAllele;
508        assert_eq!(get_variant_edit_type(&variant), "null");
509    }
510
511    #[test]
512    fn test_get_variant_edit_type_unknown() {
513        let variant = HgvsVariant::UnknownAllele;
514        assert_eq!(get_variant_edit_type(&variant), "unknown");
515    }
516}