Skip to main content

cyanea_omics/
variant.rs

1//! Genomic variant representation (VCF-style).
2//!
3//! Types for representing single-nucleotide variants, insertions, deletions,
4//! and complex variants with quality and filter information.
5
6use cyanea_core::{Annotated, CyaneaError, Result, Scored};
7
8use crate::genomic::{GenomicInterval, Strand};
9
10/// The class of a genomic variant.
11#[derive(Debug, Clone, PartialEq, Eq)]
12#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
13pub enum VariantType {
14    /// Single nucleotide variant (ref and alt are both 1 base).
15    Snv,
16    /// Insertion (alt is longer than ref).
17    Insertion,
18    /// Deletion (ref is longer than alt).
19    Deletion,
20    /// Multi-nucleotide variant (ref and alt are equal length > 1).
21    Mnv,
22    /// Complex variant (none of the above).
23    Complex,
24}
25
26/// Filter status for a variant call.
27#[derive(Debug, Clone, PartialEq, Eq)]
28#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
29pub enum VariantFilter {
30    /// Passed all filters.
31    Pass,
32    /// Failed one or more filters.
33    Fail(Vec<String>),
34    /// Filter status not available.
35    Missing,
36}
37
38/// Zygosity of a variant call.
39#[derive(Debug, Clone, PartialEq, Eq)]
40#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
41pub enum Zygosity {
42    Homozygous,
43    Heterozygous,
44    Hemizygous,
45    Unknown,
46}
47
48/// A genomic variant (VCF-style representation).
49///
50/// Position is 1-based following VCF convention.
51#[derive(Debug, Clone)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub struct Variant {
54    pub chrom: String,
55    /// 1-based position (VCF convention).
56    pub position: u64,
57    /// Optional identifier (e.g. rs12345).
58    pub id: Option<String>,
59    /// Reference allele.
60    pub ref_allele: Vec<u8>,
61    /// Alternate alleles.
62    pub alt_alleles: Vec<Vec<u8>>,
63    /// Phred-scaled quality score.
64    pub quality: Option<f64>,
65    /// Filter status.
66    pub filter: VariantFilter,
67}
68
69impl Variant {
70    /// Create a new variant with minimal fields.
71    ///
72    /// Validates that reference and alternate alleles are non-empty.
73    pub fn new(
74        chrom: impl Into<String>,
75        position: u64,
76        ref_allele: Vec<u8>,
77        alt_alleles: Vec<Vec<u8>>,
78    ) -> Result<Self> {
79        if ref_allele.is_empty() {
80            return Err(CyaneaError::InvalidInput(
81                "reference allele must not be empty".into(),
82            ));
83        }
84        if alt_alleles.is_empty() {
85            return Err(CyaneaError::InvalidInput(
86                "at least one alternate allele is required".into(),
87            ));
88        }
89        for (i, alt) in alt_alleles.iter().enumerate() {
90            if alt.is_empty() {
91                return Err(CyaneaError::InvalidInput(format!(
92                    "alternate allele {i} must not be empty"
93                )));
94            }
95        }
96        Ok(Self {
97            chrom: chrom.into(),
98            position,
99            id: None,
100            ref_allele,
101            alt_alleles,
102            quality: None,
103            filter: VariantFilter::Missing,
104        })
105    }
106
107    /// Infer the variant type from the first alternate allele.
108    pub fn variant_type(&self) -> VariantType {
109        let ref_len = self.ref_allele.len();
110        let alt_len = self.alt_alleles[0].len();
111
112        if ref_len == 1 && alt_len == 1 {
113            VariantType::Snv
114        } else if ref_len == alt_len {
115            VariantType::Mnv
116        } else if ref_len < alt_len {
117            VariantType::Insertion
118        } else if ref_len > alt_len {
119            VariantType::Deletion
120        } else {
121            VariantType::Complex
122        }
123    }
124
125    /// Whether this is a single-nucleotide variant.
126    pub fn is_snv(&self) -> bool {
127        self.variant_type() == VariantType::Snv
128    }
129
130    /// Whether this is an insertion or deletion.
131    pub fn is_indel(&self) -> bool {
132        matches!(
133            self.variant_type(),
134            VariantType::Insertion | VariantType::Deletion
135        )
136    }
137
138    /// Whether this SNV is a transition (A↔G or C↔T).
139    ///
140    /// Returns `false` for non-SNV variants.
141    pub fn is_transition(&self) -> bool {
142        if !self.is_snv() {
143            return false;
144        }
145        let r = self.ref_allele[0].to_ascii_uppercase();
146        let a = self.alt_alleles[0][0].to_ascii_uppercase();
147        matches!(
148            (r, a),
149            (b'A', b'G') | (b'G', b'A') | (b'C', b'T') | (b'T', b'C')
150        )
151    }
152
153    /// Whether this SNV is a transversion (complement of transition for SNVs).
154    ///
155    /// Returns `false` for non-SNV variants.
156    pub fn is_transversion(&self) -> bool {
157        self.is_snv() && !self.is_transition()
158    }
159
160    /// Convert the 1-based VCF position to a 0-based [`GenomicInterval`].
161    ///
162    /// The interval spans the reference allele.
163    pub fn to_genomic_interval(&self) -> GenomicInterval {
164        let start = self.position - 1; // VCF is 1-based
165        let end = start + self.ref_allele.len() as u64;
166        // This is safe because ref_allele is validated non-empty, so start < end.
167        GenomicInterval {
168            chrom: self.chrom.clone(),
169            start,
170            end,
171            strand: Strand::Unknown,
172        }
173    }
174}
175
176impl Annotated for Variant {
177    fn name(&self) -> &str {
178        // Can't return a computed string from &str, so use id if available.
179        // For the fallback we'd need to store a formatted name; return "" instead.
180        match &self.id {
181            Some(id) => id.as_str(),
182            None => "",
183        }
184    }
185}
186
187impl Scored for Variant {
188    fn score(&self) -> f64 {
189        self.quality.unwrap_or(0.0)
190    }
191}
192
193#[cfg(test)]
194mod tests {
195    use super::*;
196
197    fn snv_a_to_g() -> Variant {
198        Variant::new("chr1", 100, vec![b'A'], vec![vec![b'G']]).unwrap()
199    }
200
201    #[test]
202    fn test_snv_construction() {
203        let v = snv_a_to_g();
204        assert_eq!(v.chrom, "chr1");
205        assert_eq!(v.position, 100);
206        assert!(v.is_snv());
207    }
208
209    #[test]
210    fn test_empty_ref_allele() {
211        assert!(Variant::new("chr1", 100, vec![], vec![vec![b'G']]).is_err());
212    }
213
214    #[test]
215    fn test_empty_alt_alleles() {
216        assert!(Variant::new("chr1", 100, vec![b'A'], vec![]).is_err());
217    }
218
219    #[test]
220    fn test_empty_alt_allele_entry() {
221        assert!(Variant::new("chr1", 100, vec![b'A'], vec![vec![]]).is_err());
222    }
223
224    #[test]
225    fn test_variant_type_snv() {
226        let v = snv_a_to_g();
227        assert_eq!(v.variant_type(), VariantType::Snv);
228    }
229
230    #[test]
231    fn test_variant_type_insertion() {
232        let v = Variant::new("chr1", 100, vec![b'A'], vec![vec![b'A', b'T', b'C']]).unwrap();
233        assert_eq!(v.variant_type(), VariantType::Insertion);
234        assert!(v.is_indel());
235    }
236
237    #[test]
238    fn test_variant_type_deletion() {
239        let v = Variant::new("chr1", 100, vec![b'A', b'T', b'C'], vec![vec![b'A']]).unwrap();
240        assert_eq!(v.variant_type(), VariantType::Deletion);
241        assert!(v.is_indel());
242    }
243
244    #[test]
245    fn test_variant_type_mnv() {
246        let v = Variant::new("chr1", 100, vec![b'A', b'T'], vec![vec![b'G', b'C']]).unwrap();
247        assert_eq!(v.variant_type(), VariantType::Mnv);
248    }
249
250    #[test]
251    fn test_transition() {
252        // A→G is a transition
253        assert!(snv_a_to_g().is_transition());
254        assert!(!snv_a_to_g().is_transversion());
255
256        // C→T is a transition
257        let v = Variant::new("chr1", 100, vec![b'C'], vec![vec![b'T']]).unwrap();
258        assert!(v.is_transition());
259    }
260
261    #[test]
262    fn test_transversion() {
263        // A→C is a transversion
264        let v = Variant::new("chr1", 100, vec![b'A'], vec![vec![b'C']]).unwrap();
265        assert!(v.is_transversion());
266        assert!(!v.is_transition());
267    }
268
269    #[test]
270    fn test_transition_non_snv() {
271        let v = Variant::new("chr1", 100, vec![b'A', b'T'], vec![vec![b'G', b'C']]).unwrap();
272        assert!(!v.is_transition());
273        assert!(!v.is_transversion());
274    }
275
276    #[test]
277    fn test_to_genomic_interval() {
278        let v = snv_a_to_g();
279        let iv = v.to_genomic_interval();
280        assert_eq!(iv.chrom, "chr1");
281        assert_eq!(iv.start, 99); // 1-based 100 → 0-based 99
282        assert_eq!(iv.end, 100);
283    }
284
285    #[test]
286    fn test_to_genomic_interval_deletion() {
287        let v = Variant::new("chr1", 100, vec![b'A', b'T', b'C'], vec![vec![b'A']]).unwrap();
288        let iv = v.to_genomic_interval();
289        assert_eq!(iv.start, 99);
290        assert_eq!(iv.end, 102); // 3-base ref
291    }
292
293    #[test]
294    fn test_annotated() {
295        let mut v = snv_a_to_g();
296        assert_eq!(v.name(), "");
297
298        v.id = Some("rs12345".into());
299        assert_eq!(v.name(), "rs12345");
300    }
301
302    #[test]
303    fn test_scored() {
304        let mut v = snv_a_to_g();
305        assert_eq!(v.score(), 0.0);
306
307        v.quality = Some(30.0);
308        assert_eq!(v.score(), 30.0);
309    }
310}