1use ragc_common::Base;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum KmerMode {
9 Direct,
10 RevComp,
11 Canonical,
12}
13
14#[derive(Debug, Clone)]
17pub struct Kmer {
18 kmer_dir: u64, kmer_rc: u64, cur_size: u32, max_size: u32, variant: KmerMode, mask: u64, shift: u32, }
26
27impl Kmer {
28 pub fn new(max_size: u32, variant: KmerMode) -> Self {
30 let shift = 64 - 2 * max_size;
31 let mask = (!0u64) << shift;
32
33 Self {
34 kmer_dir: 0,
35 kmer_rc: 0,
36 cur_size: 0,
37 max_size,
38 variant,
39 mask,
40 shift,
41 }
42 }
43
44 pub fn from_values(
46 kmer_dir: u64,
47 kmer_rc: u64,
48 max_size: u32,
49 cur_size: u32,
50 variant: KmerMode,
51 ) -> Self {
52 let shift = 64 - 2 * max_size;
53 let mask = (!0u64) << shift;
54
55 Self {
56 kmer_dir,
57 kmer_rc,
58 cur_size,
59 max_size,
60 variant,
61 mask,
62 shift,
63 }
64 }
65
66 pub fn reset(&mut self) {
68 self.kmer_dir = 0;
69 self.kmer_rc = 0;
70 self.cur_size = 0;
71 }
72
73 #[inline]
75 pub fn insert_canonical(&mut self, symbol: u64) {
76 self.kmer_rc >>= 2;
78 self.kmer_rc += reverse_complement(symbol) << 62;
79 self.kmer_rc &= self.mask;
80
81 if self.cur_size == self.max_size {
83 self.kmer_dir <<= 2;
84 self.kmer_dir += symbol << self.shift;
85 } else {
86 self.cur_size += 1;
87 self.kmer_dir += symbol << (64 - 2 * self.cur_size);
88 }
89 }
90
91 #[inline]
93 pub fn insert(&mut self, symbol: u64) {
94 match self.variant {
95 KmerMode::Direct => self.insert_direct(symbol),
96 KmerMode::RevComp => self.insert_rev_comp(symbol),
97 KmerMode::Canonical => self.insert_canonical(symbol),
98 }
99 }
100
101 #[inline]
103 fn insert_direct(&mut self, symbol: u64) {
104 if self.cur_size == self.max_size {
105 self.kmer_dir <<= 2;
106 self.kmer_dir += symbol << self.shift;
107 } else {
108 self.cur_size += 1;
109 self.kmer_dir += symbol << (64 - 2 * self.cur_size);
110 }
111 }
112
113 #[inline]
115 fn insert_rev_comp(&mut self, symbol: u64) {
116 self.kmer_rc >>= 2;
117 self.kmer_rc += reverse_complement(symbol) << 62;
118 self.kmer_rc &= self.mask;
119
120 if self.cur_size < self.max_size {
121 self.cur_size += 1;
122 }
123 }
124
125 #[inline]
127 pub fn data(&self) -> u64 {
128 match self.variant {
129 KmerMode::Direct => self.kmer_dir,
130 KmerMode::RevComp => self.kmer_rc,
131 KmerMode::Canonical => self.kmer_dir.min(self.kmer_rc),
132 }
133 }
134
135 #[inline]
137 pub fn data_canonical(&self) -> u64 {
138 self.kmer_dir.min(self.kmer_rc)
139 }
140
141 #[inline]
143 pub fn data_dir(&self) -> u64 {
144 match self.variant {
145 KmerMode::Direct | KmerMode::Canonical => self.kmer_dir,
146 KmerMode::RevComp => 0,
147 }
148 }
149
150 #[inline]
152 pub fn data_rc(&self) -> u64 {
153 match self.variant {
154 KmerMode::RevComp | KmerMode::Canonical => self.kmer_rc,
155 KmerMode::Direct => 0,
156 }
157 }
158
159 #[inline]
161 pub fn is_full(&self) -> bool {
162 self.cur_size == self.max_size
163 }
164
165 #[inline]
167 pub fn get_cur_size(&self) -> u32 {
168 self.cur_size
169 }
170
171 #[inline]
173 pub fn get_max_size(&self) -> u32 {
174 self.max_size
175 }
176
177 #[inline]
179 pub fn get_symbol(&self, pos: u32) -> u64 {
180 match self.variant {
181 KmerMode::Direct | KmerMode::Canonical => {
182 let sym_shift = 62 - 2 * pos;
183 (self.kmer_dir >> sym_shift) & 3
184 }
185 KmerMode::RevComp => {
186 let sym_shift = 64 - 2 * self.cur_size + 2 * pos;
187 (self.kmer_rc >> sym_shift) & 3
188 }
189 }
190 }
191
192 pub fn swap_dir_rc(&mut self) {
194 if self.variant == KmerMode::Canonical {
195 std::mem::swap(&mut self.kmer_dir, &mut self.kmer_rc);
196 }
197 }
198
199 pub fn is_dir_oriented(&self) -> bool {
201 if self.variant == KmerMode::Canonical {
202 self.kmer_dir <= self.kmer_rc
203 } else {
204 false
205 }
206 }
207}
208
209impl PartialEq for Kmer {
210 fn eq(&self, other: &Self) -> bool {
211 match self.variant {
212 KmerMode::Direct | KmerMode::Canonical => self.kmer_dir == other.kmer_dir,
213 KmerMode::RevComp => self.kmer_rc == other.kmer_rc,
214 }
215 }
216}
217
218#[inline]
221pub const fn reverse_complement(base: u64) -> u64 {
222 match base {
223 0 => 3, 1 => 2, 2 => 1, 3 => 0, _ => 4, }
229}
230
231#[inline]
233pub fn encode_base(c: char) -> Option<u64> {
234 Base::from_char(c).map(|b| b as u64)
235}
236
237#[inline]
239pub fn decode_base(val: u64) -> Option<char> {
240 Base::from_u8(val as u8).map(|b| b.to_char())
241}
242
243pub fn extract_kmer(sequence: &[u8], pos: usize, k: usize, mode: KmerMode) -> Option<Kmer> {
245 if pos + k > sequence.len() {
246 return None;
247 }
248
249 let mut kmer = Kmer::new(k as u32, mode);
250
251 for i in 0..k {
252 let base = encode_base(sequence[pos + i] as char)?;
253 kmer.insert(base);
254 }
255
256 Some(kmer)
257}
258
259#[inline]
261pub fn canonical_kmer(kmer: u64, k: u32) -> u64 {
262 let rc = reverse_complement_kmer(kmer, k);
263 kmer.min(rc)
264}
265
266pub fn reverse_complement_kmer(kmer: u64, k: u32) -> u64 {
268 let mut result = 0u64;
269 let shift = 64 - 2 * k;
270
271 for i in 0..k {
272 let base = (kmer >> (shift + 2 * i)) & 3;
273 let rc_base = reverse_complement(base);
274 result |= rc_base << (shift + 2 * (k - 1 - i));
275 }
276
277 result
278}
279
280#[cfg(test)]
281mod tests {
282 use super::*;
283
284 #[test]
285 fn test_reverse_complement() {
286 assert_eq!(reverse_complement(0), 3); assert_eq!(reverse_complement(1), 2); assert_eq!(reverse_complement(2), 1); assert_eq!(reverse_complement(3), 0); }
291
292 #[test]
293 fn test_encode_decode() {
294 assert_eq!(encode_base('A'), Some(0));
295 assert_eq!(encode_base('C'), Some(1));
296 assert_eq!(encode_base('G'), Some(2));
297 assert_eq!(encode_base('T'), Some(3));
298
299 assert_eq!(decode_base(0), Some('A'));
300 assert_eq!(decode_base(1), Some('C'));
301 assert_eq!(decode_base(2), Some('G'));
302 assert_eq!(decode_base(3), Some('T'));
303 }
304
305 #[test]
306 fn test_kmer_insertion() {
307 let mut kmer = Kmer::new(4, KmerMode::Canonical);
308
309 kmer.insert(0); kmer.insert(1); kmer.insert(2); kmer.insert(3); assert!(kmer.is_full());
316 assert_eq!(kmer.get_cur_size(), 4);
317 }
318
319 #[test]
320 fn test_kmer_data() {
321 let mut kmer = Kmer::new(4, KmerMode::Direct);
322
323 for _ in 0..4 {
325 kmer.insert(0);
326 }
327
328 assert_eq!(kmer.data_dir() >> 56, 0);
330 }
331
332 #[test]
333 fn test_canonical_kmer() {
334 let mut kmer1 = Kmer::new(4, KmerMode::Canonical);
336 kmer1.insert(0); kmer1.insert(1); kmer1.insert(2); kmer1.insert(3); let canonical = kmer1.data_canonical();
342 assert!(canonical == kmer1.kmer_dir || canonical == kmer1.kmer_rc);
344 assert_eq!(canonical, kmer1.kmer_dir.min(kmer1.kmer_rc));
345 }
346}