lorikeet_genome/model/
byte_array_allele.rs1use std::hash::{Hash, Hasher};
2
3use crate::model::variants;
4use crate::utils::vcf_constants::VCFConstants;
5
6#[derive(Debug, Clone, Ord, PartialOrd)]
7pub struct ByteArrayAllele {
8 pub(crate) is_ref: bool,
9 pub(crate) is_no_call: bool,
10 pub(crate) is_symbolic: bool,
11 pub(crate) bases: Vec<u8>,
12}
13
14impl Hash for ByteArrayAllele {
15 fn hash<H: Hasher>(&self, state: &mut H) {
16 self.bases.hash(state);
17 }
18}
19
20impl PartialEq for ByteArrayAllele {
21 fn eq(&self, other: &Self) -> bool {
22 self.bases == other.bases
23 }
24}
25
26impl Eq for ByteArrayAllele {}
27
28impl ByteArrayAllele {
29 const SINGLE_BREAKEND_INDICATOR: char = '.';
30 const BREAKEND_EXTENDING_RIGHT: char = '[';
31 const BREAKEND_EXTENDING_LEFT: char = ']';
32 const SYMBOLIC_ALLELE_START: char = '<';
33 const SYMBOLIC_ALLELE_END: char = '>';
34
35 pub const NO_CALL: char = '.';
36 pub const SPAN_DEL: char = '*';
37
38 pub fn new(bases: &[u8], is_ref: bool) -> ByteArrayAllele {
39 if Self::would_be_null_allele(bases) {
40 panic!("Null alleles are not supported")
41 }
42
43 if Self::would_be_no_call_allele(bases) {
44 if is_ref {
45 panic!("Cannot tag a no call allele as the reference allele")
46 } else {
47 return ByteArrayAllele {
48 bases: bases.to_ascii_uppercase(),
49 is_ref: false,
50 is_no_call: true,
51 is_symbolic: false,
52 };
53 }
54 }
55
56 if Self::would_be_symbolic_allele(bases) {
57 if is_ref {
58 panic!("Cannot tag a no call allele as the reference allele")
59 } else {
60 return ByteArrayAllele {
61 bases: bases.to_ascii_uppercase(),
62 is_ref: false,
63 is_no_call: false,
64 is_symbolic: true,
65 };
66 }
67 }
68
69 if !Self::acceptable_allele_bases(bases, is_ref) {
70 panic!(
71 "Unexpected base in allele bases {}",
72 String::from_utf8_lossy(bases).to_string()
73 )
74 } else {
75 return ByteArrayAllele {
76 bases: bases.to_ascii_uppercase(),
77 is_ref,
78 is_no_call: false,
79 is_symbolic: false,
80 };
81 }
82 }
83
84 pub fn is_span_del(&self) -> bool {
85 self.bases.as_slice() == b"*"
86 }
87
88 pub fn len(&self) -> usize {
89 return if self.is_symbolic {
90 0
91 } else {
92 self.bases.len()
93 };
94 }
95
96 pub fn fake(is_ref: bool) -> ByteArrayAllele {
105 if is_ref {
106 Self::new("N".as_bytes(), is_ref)
107 } else {
108 Self::new("<FAKE_ALT>".as_bytes(), is_ref)
109 }
110 }
111
112 pub fn create_fake_alleles() -> Vec<ByteArrayAllele> {
113 let alleles = vec![Self::fake(true), Self::fake(false)];
114
115 return alleles;
116 }
117
118 pub fn no_call() -> ByteArrayAllele {
119 Self {
120 bases: vec![Self::NO_CALL as u8],
121 is_ref: false,
122 is_no_call: true,
123 is_symbolic: false,
124 }
125 }
126
127 pub fn extend(left: &ByteArrayAllele, right: &[u8]) -> ByteArrayAllele {
128 if left.is_symbolic {
129 panic!("Cannot extend a symbolic allele");
130 };
131
132 let mut bases = vec![0; left.len() + right.len()];
133 bases[0..left.len()].clone_from_slice(&left.get_bases()[0..left.len()]);
134 bases[left.len()..].clone_from_slice(right);
135
136 return Self::new(&bases, left.is_ref);
137 }
138
139 pub fn would_be_null_allele(bases: &[u8]) -> bool {
140 return bases.len() == 1 && bases[0] as char == VCFConstants::NULL_ALLELE
141 || bases.len() == 0;
142 }
143
144 pub fn would_be_no_call_allele(bases: &[u8]) -> bool {
145 return bases.len() == 1 && bases[0] as char == VCFConstants::NO_CALL_ALLELE;
146 }
147
148 pub fn would_be_star_allele(bases: &[u8]) -> bool {
149 return bases.len() == 1 && bases[0] as char == VCFConstants::SPANNING_DELETION_ALLELE;
150 }
151
152 pub fn would_be_symbolic_allele(bases: &[u8]) -> bool {
153 if bases.len() <= 1 {
154 return false;
155 } else {
156 return bases[0] == Self::SYMBOLIC_ALLELE_START as u8
157 || bases[bases.len() - 1] == Self::SYMBOLIC_ALLELE_END as u8
158 || Self::would_be_breakpoint(bases)
159 || Self::would_be_single_breakend(bases);
160 }
161 }
162
163 pub fn would_be_breakpoint(bases: &[u8]) -> bool {
164 if bases.len() <= 1 {
165 return false;
166 }
167 return bases.iter().any(|base| {
168 *base as char == Self::BREAKEND_EXTENDING_LEFT
169 || *base as char == Self::BREAKEND_EXTENDING_RIGHT
170 });
171 }
172
173 pub fn would_be_single_breakend(bases: &[u8]) -> bool {
174 if bases.len() <= 1 {
175 return false;
176 } else {
177 return bases[0] == Self::SINGLE_BREAKEND_INDICATOR as u8
178 || bases[bases.len() - 1] == Self::SINGLE_BREAKEND_INDICATOR as u8;
179 }
180 }
181
182 pub fn acceptable_allele_bases(bases: &[u8], is_ref: bool) -> bool {
183 if Self::would_be_null_allele(bases) {
184 return false;
186 } else if Self::would_be_no_call_allele(bases) || Self::would_be_symbolic_allele(bases) {
187 return true;
189 } else if Self::would_be_star_allele(bases) {
190 return !is_ref;
192 } else {
193 !bases.iter().any(|base| {
195 let base = *base as char;
196 let result = match base {
197 'A' | 'C' | 'T' | 'G' | 'a' | 'c' | 't' | 'g' | 'N' | 'n' | 'R' | 'Y' | 'K'
198 | 'M' | 'S' | 'W' | 'B' | 'D' | 'H' | 'V' | 'U' => false,
199 _ => true,
200 };
201 result
205 })
206 }
207 }
208}
209
210pub trait Allele: Eq + PartialEq + Clone + std::fmt::Debug + Send + Sync + Hash {
211 fn is_reference(&self) -> bool;
212
213 fn length(&self) -> usize;
214
215 fn is_symbolic(&self) -> bool;
216
217 fn is_called(&self) -> bool;
218
219 fn is_no_call(&self) -> bool;
220
221 fn get_bases(&self) -> &[u8];
222
223 fn no_call() -> Self;
224
225 fn bases_match(&self, other: &[u8]) -> bool {
226 self.get_bases() == other
227 }
228}
229
230impl Allele for ByteArrayAllele {
231 fn is_reference(&self) -> bool {
232 self.is_ref
233 }
234
235 fn length(&self) -> usize {
236 if self.is_symbolic {
237 0
238 } else {
239 self.len()
240 }
241 }
242
243 fn is_symbolic(&self) -> bool {
244 self.is_symbolic
245 }
246
247 fn is_called(&self) -> bool {
248 !self.is_no_call
249 }
250
251 fn is_no_call(&self) -> bool {
252 self.is_no_call
253 }
254
255 fn get_bases(&self) -> &[u8] {
256 return if self.is_symbolic {
257 variants::EMPTY_ALLELE_BASES.as_slice()
258 } else {
259 self.bases.as_slice()
260 };
261 }
262
263 fn no_call() -> Self {
264 Self {
265 bases: vec![Self::NO_CALL as u8],
266 is_ref: false,
267 is_no_call: true,
268 is_symbolic: false,
269 }
270 }
271}