lorikeet_genome/model/
byte_array_allele.rs

1use std::hash::{Hash, Hasher};
2
3use crate::model::variants;
4use crate::utils::vcf_constants::VCFConstants;
5
6#[derive(Debug, Clone, Ord, PartialOrd)]
7pub struct ByteArrayAllele {
8    pub(crate) is_ref: bool,
9    pub(crate) is_no_call: bool,
10    pub(crate) is_symbolic: bool,
11    pub(crate) bases: Vec<u8>,
12}
13
14impl Hash for ByteArrayAllele {
15    fn hash<H: Hasher>(&self, state: &mut H) {
16        self.bases.hash(state);
17    }
18}
19
20impl PartialEq for ByteArrayAllele {
21    fn eq(&self, other: &Self) -> bool {
22        self.bases == other.bases
23    }
24}
25
26impl Eq for ByteArrayAllele {}
27
28impl ByteArrayAllele {
29    const SINGLE_BREAKEND_INDICATOR: char = '.';
30    const BREAKEND_EXTENDING_RIGHT: char = '[';
31    const BREAKEND_EXTENDING_LEFT: char = ']';
32    const SYMBOLIC_ALLELE_START: char = '<';
33    const SYMBOLIC_ALLELE_END: char = '>';
34
35    pub const NO_CALL: char = '.';
36    pub const SPAN_DEL: char = '*';
37
38    pub fn new(bases: &[u8], is_ref: bool) -> ByteArrayAllele {
39        if Self::would_be_null_allele(bases) {
40            panic!("Null alleles are not supported")
41        }
42
43        if Self::would_be_no_call_allele(bases) {
44            if is_ref {
45                panic!("Cannot tag a no call allele as the reference allele")
46            } else {
47                return ByteArrayAllele {
48                    bases: bases.to_ascii_uppercase(),
49                    is_ref: false,
50                    is_no_call: true,
51                    is_symbolic: false,
52                };
53            }
54        }
55
56        if Self::would_be_symbolic_allele(bases) {
57            if is_ref {
58                panic!("Cannot tag a no call allele as the reference allele")
59            } else {
60                return ByteArrayAllele {
61                    bases: bases.to_ascii_uppercase(),
62                    is_ref: false,
63                    is_no_call: false,
64                    is_symbolic: true,
65                };
66            }
67        }
68
69        if !Self::acceptable_allele_bases(bases, is_ref) {
70            panic!(
71                "Unexpected base in allele bases {}",
72                String::from_utf8_lossy(bases).to_string()
73            )
74        } else {
75            return ByteArrayAllele {
76                bases: bases.to_ascii_uppercase(),
77                is_ref,
78                is_no_call: false,
79                is_symbolic: false,
80            };
81        }
82    }
83
84    pub fn is_span_del(&self) -> bool {
85        self.bases.as_slice() == b"*"
86    }
87
88    pub fn len(&self) -> usize {
89        return if self.is_symbolic {
90            0
91        } else {
92            self.bases.len()
93        };
94    }
95
96    // pub fn get_bases(&self) -> &Vec<u8> {
97    //     return if self.is_symbolic {
98    //         &*variants::EMPTY_ALLELE_BASES
99    //     } else {
100    //         &self.bases
101    //     };
102    // }
103
104    pub fn fake(is_ref: bool) -> ByteArrayAllele {
105        if is_ref {
106            Self::new("N".as_bytes(), is_ref)
107        } else {
108            Self::new("<FAKE_ALT>".as_bytes(), is_ref)
109        }
110    }
111
112    pub fn create_fake_alleles() -> Vec<ByteArrayAllele> {
113        let alleles = vec![Self::fake(true), Self::fake(false)];
114
115        return alleles;
116    }
117
118    pub fn no_call() -> ByteArrayAllele {
119        Self {
120            bases: vec![Self::NO_CALL as u8],
121            is_ref: false,
122            is_no_call: true,
123            is_symbolic: false,
124        }
125    }
126
127    pub fn extend(left: &ByteArrayAllele, right: &[u8]) -> ByteArrayAllele {
128        if left.is_symbolic {
129            panic!("Cannot extend a symbolic allele");
130        };
131
132        let mut bases = vec![0; left.len() + right.len()];
133        bases[0..left.len()].clone_from_slice(&left.get_bases()[0..left.len()]);
134        bases[left.len()..].clone_from_slice(right);
135
136        return Self::new(&bases, left.is_ref);
137    }
138
139    pub fn would_be_null_allele(bases: &[u8]) -> bool {
140        return bases.len() == 1 && bases[0] as char == VCFConstants::NULL_ALLELE
141            || bases.len() == 0;
142    }
143
144    pub fn would_be_no_call_allele(bases: &[u8]) -> bool {
145        return bases.len() == 1 && bases[0] as char == VCFConstants::NO_CALL_ALLELE;
146    }
147
148    pub fn would_be_star_allele(bases: &[u8]) -> bool {
149        return bases.len() == 1 && bases[0] as char == VCFConstants::SPANNING_DELETION_ALLELE;
150    }
151
152    pub fn would_be_symbolic_allele(bases: &[u8]) -> bool {
153        if bases.len() <= 1 {
154            return false;
155        } else {
156            return bases[0] == Self::SYMBOLIC_ALLELE_START as u8
157                || bases[bases.len() - 1] == Self::SYMBOLIC_ALLELE_END as u8
158                || Self::would_be_breakpoint(bases)
159                || Self::would_be_single_breakend(bases);
160        }
161    }
162
163    pub fn would_be_breakpoint(bases: &[u8]) -> bool {
164        if bases.len() <= 1 {
165            return false;
166        }
167        return bases.iter().any(|base| {
168            *base as char == Self::BREAKEND_EXTENDING_LEFT
169                || *base as char == Self::BREAKEND_EXTENDING_RIGHT
170        });
171    }
172
173    pub fn would_be_single_breakend(bases: &[u8]) -> bool {
174        if bases.len() <= 1 {
175            return false;
176        } else {
177            return bases[0] == Self::SINGLE_BREAKEND_INDICATOR as u8
178                || bases[bases.len() - 1] == Self::SINGLE_BREAKEND_INDICATOR as u8;
179        }
180    }
181
182    pub fn acceptable_allele_bases(bases: &[u8], is_ref: bool) -> bool {
183        if Self::would_be_null_allele(bases) {
184            // debug!("Null allele bases are not acceptable");
185            return false;
186        } else if Self::would_be_no_call_allele(bases) || Self::would_be_symbolic_allele(bases) {
187            // debug!("No call or symbolic allele bases are acceptable: {} {}", Self::would_be_no_call_allele(bases), Self::would_be_symbolic_allele(bases));
188            return true;
189        } else if Self::would_be_star_allele(bases) {
190            // debug!("Star allele bases are acceptable: {}", is_ref);
191            return !is_ref;
192        } else {
193            // return true if there are any unacceptable bases, so take conjugate value
194            !bases.iter().any(|base| {
195                let base = *base as char;
196                let result = match base {
197                    'A' | 'C' | 'T' | 'G' | 'a' | 'c' | 't' | 'g' | 'N' | 'n' | 'R' | 'Y' | 'K'
198                    | 'M' | 'S' | 'W' | 'B' | 'D' | 'H' | 'V' | 'U' => false,
199                    _ => true,
200                };
201                // if result {
202                //     // debug!("Base {:?} is not acceptable", base);
203                // }
204                result
205            })
206        }
207    }
208}
209
210pub trait Allele: Eq + PartialEq + Clone + std::fmt::Debug + Send + Sync + Hash {
211    fn is_reference(&self) -> bool;
212
213    fn length(&self) -> usize;
214
215    fn is_symbolic(&self) -> bool;
216
217    fn is_called(&self) -> bool;
218
219    fn is_no_call(&self) -> bool;
220
221    fn get_bases(&self) -> &[u8];
222
223    fn no_call() -> Self;
224
225    fn bases_match(&self, other: &[u8]) -> bool {
226        self.get_bases() == other
227    }
228}
229
230impl Allele for ByteArrayAllele {
231    fn is_reference(&self) -> bool {
232        self.is_ref
233    }
234
235    fn length(&self) -> usize {
236        if self.is_symbolic {
237            0
238        } else {
239            self.len()
240        }
241    }
242
243    fn is_symbolic(&self) -> bool {
244        self.is_symbolic
245    }
246
247    fn is_called(&self) -> bool {
248        !self.is_no_call
249    }
250
251    fn is_no_call(&self) -> bool {
252        self.is_no_call
253    }
254
255    fn get_bases(&self) -> &[u8] {
256        return if self.is_symbolic {
257            variants::EMPTY_ALLELE_BASES.as_slice()
258        } else {
259            self.bases.as_slice()
260        };
261    }
262
263    fn no_call() -> Self {
264        Self {
265            bases: vec![Self::NO_CALL as u8],
266            is_ref: false,
267            is_no_call: true,
268            is_symbolic: false,
269        }
270    }
271}