deepbiop_utils/
sv.rs

1//! a structrual variant or gene fusion event
2use std::str::FromStr;
3
4use bstr::BString;
5use derive_builder::Builder;
6
7use std::fmt;
8
9/// StructuralVariantType
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub enum StructralVariantType {
12    Deletion,
13    Duplication,
14    Inversion,
15    Translocation,
16    Insertion,
17    UNKNOWN,
18}
19
20impl FromStr for StructralVariantType {
21    type Err = anyhow::Error;
22
23    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
24        match s {
25            "deletion" => Ok(Self::Deletion),
26            "DEL" => Ok(Self::Deletion),
27            "duplication" => Ok(Self::Duplication),
28            "DUP" => Ok(Self::Duplication),
29            "inversion" => Ok(Self::Inversion),
30            "INV" => Ok(Self::Inversion),
31            "translocation" => Ok(Self::Translocation),
32            "TRA" => Ok(Self::Translocation),
33            "insertion" => Ok(Self::Insertion),
34            "INS" => Ok(Self::Insertion),
35            _ => Ok(Self::UNKNOWN),
36        }
37    }
38}
39
40impl fmt::Display for StructralVariantType {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        match self {
43            StructralVariantType::Deletion => write!(f, "DEL"),
44            StructralVariantType::Duplication => write!(f, "DUP"),
45            StructralVariantType::Inversion => write!(f, "INV"),
46            StructralVariantType::Translocation => write!(f, "TRA"),
47            StructralVariantType::Insertion => write!(f, "INS"),
48            StructralVariantType::UNKNOWN => write!(f, "UNKNOWN"),
49        }
50    }
51}
52
53/// A StructuralVariant is a genomic interval defined by a chromosome, a start position and an end position.
54#[derive(Debug, Builder, Clone)]
55pub struct StructuralVariant {
56    pub sv_type: StructralVariantType,
57    pub chr: BString,
58    pub breakpoint1: usize,
59    pub breakpoint2: usize,
60}
61
62impl fmt::Display for StructuralVariant {
63    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64        write!(
65            f,
66            "{}:{}:{}-{}",
67            self.sv_type, self.chr, self.breakpoint1, self.breakpoint2
68        )
69    }
70}
71
72impl FromStr for StructuralVariant {
73    type Err = anyhow::Error;
74
75    /// Parse a string into a StructuralVariant. The string should be formatted as
76    /// # Example
77    /// ```
78    /// use deepbiop_utils::sv::StructuralVariant;
79    /// let value =  "DEL:chr1:100-200";
80    /// let sv: StructuralVariant = value.parse().unwrap();
81    /// ```
82    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
83        let parts: Vec<&str> = s.split(':').collect();
84
85        if parts.len() != 3 {
86            return Err(anyhow::anyhow!("Invalid format"));
87        }
88
89        let sv_type = parts[0];
90        let chr = parts[1];
91
92        let positions: Vec<&str> = parts[2].split('-').collect();
93
94        if positions.len() != 2 {
95            return Err(anyhow::anyhow!("Invalid format"));
96        }
97
98        let start: usize = positions[0].parse()?;
99        let end: usize = positions[1].parse()?;
100
101        Ok(Self {
102            sv_type: StructralVariantType::from_str(sv_type)?,
103            chr: chr.into(),
104            breakpoint1: start,
105            breakpoint2: end,
106        })
107    }
108}
109
110#[cfg(test)]
111mod tests {
112    use super::*;
113
114    #[test]
115    fn test_structural_variant_type_from_str() {
116        let test_cases = vec![
117            ("deletion", StructralVariantType::Deletion),
118            ("DEL", StructralVariantType::Deletion),
119            ("duplication", StructralVariantType::Duplication),
120            ("DUP", StructralVariantType::Duplication),
121            ("inversion", StructralVariantType::Inversion),
122            ("INV", StructralVariantType::Inversion),
123            ("translocation", StructralVariantType::Translocation),
124            ("TRA", StructralVariantType::Translocation),
125            ("insertion", StructralVariantType::Insertion),
126            ("INS", StructralVariantType::Insertion),
127            ("unknown", StructralVariantType::UNKNOWN),
128        ];
129
130        for (input, _expected) in test_cases {
131            let result = StructralVariantType::from_str(input).unwrap();
132            assert!(matches!(result, _expected));
133        }
134    }
135
136    #[test]
137    fn test_structural_variant_from_str() {
138        let test_cases = vec![
139            (
140                "DEL:chr1:100-200",
141                StructuralVariant {
142                    sv_type: StructralVariantType::Deletion,
143                    chr: "chr1".into(),
144                    breakpoint1: 100,
145                    breakpoint2: 200,
146                },
147            ),
148            (
149                "duplication:chr2:300-400",
150                StructuralVariant {
151                    sv_type: StructralVariantType::Duplication,
152                    chr: "chr2".into(),
153                    breakpoint1: 300,
154                    breakpoint2: 400,
155                },
156            ),
157            (
158                "INV:chrX:1000-2000",
159                StructuralVariant {
160                    sv_type: StructralVariantType::Inversion,
161                    chr: "chrX".into(),
162                    breakpoint1: 1000,
163                    breakpoint2: 2000,
164                },
165            ),
166        ];
167
168        for (input, expected) in test_cases {
169            let result: StructuralVariant = input.parse().unwrap();
170            assert_eq!(result.sv_type.clone() as i32, expected.sv_type as i32);
171            assert_eq!(result.chr, expected.chr);
172            assert_eq!(result.breakpoint1, expected.breakpoint1);
173            assert_eq!(result.breakpoint2, expected.breakpoint2);
174        }
175    }
176
177    #[test]
178    fn test_structural_variant_from_str_invalid_format() {
179        let invalid_inputs = vec![
180            "invalid_format",
181            "DEL:chr1",
182            "DEL:chr1:100",
183            "DEL:chr1:abc-200",
184            "DEL:chr1:100-def",
185        ];
186
187        for input in invalid_inputs {
188            assert!(StructuralVariant::from_str(input).is_err());
189        }
190    }
191
192    #[test]
193    fn test_structural_variant_builder() {
194        let sv = StructuralVariantBuilder::default()
195            .sv_type(StructralVariantType::Deletion)
196            .chr("chr1".into())
197            .breakpoint1(100)
198            .breakpoint2(200)
199            .build()
200            .unwrap();
201
202        assert!(matches!(sv.sv_type, StructralVariantType::Deletion));
203        assert_eq!(sv.chr, "chr1");
204        assert_eq!(sv.breakpoint1, 100);
205        assert_eq!(sv.breakpoint2, 200);
206
207        let sv2 = sv.clone();
208
209        assert_eq!(sv.sv_type, sv2.sv_type);
210    }
211}