1use traits::Seq;
2
3use crate::intrinsics::transpose;
4
5use super::*;
6
7#[derive(Copy, Clone, Debug, MemSize, MemDbg)]
9pub struct PackedSeq<'s> {
10 seq: &'s [u8],
12 offset: usize,
14 len: usize,
16}
17
18const PADDING: usize = 16;
20
21#[derive(Clone, Debug, MemSize, MemDbg)]
23#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
24#[cfg_attr(feature = "epserde", derive(epserde::Epserde))]
25pub struct PackedSeqVec {
26 pub(crate) seq: Vec<u8>,
30
31 len: usize,
33}
34
35impl Default for PackedSeqVec {
36 fn default() -> Self {
37 Self {
38 seq: vec![0; PADDING],
39 len: 0,
40 }
41 }
42}
43
44#[inline(always)]
46pub fn pack_char(base: u8) -> u8 {
47 match base {
48 b'a' | b'A' => 0,
49 b'c' | b'C' => 1,
50 b'g' | b'G' => 3,
51 b't' | b'T' => 2,
52 _ => panic!(
53 "Unexpected character '{}' with ASCII value {base}. Expected one of ACTGactg.",
54 base as char
55 ),
56 }
57}
58
59#[inline(always)]
61pub fn pack_char_lossy(base: u8) -> u8 {
62 (base >> 1) & 3
63}
64
65#[inline(always)]
67pub fn unpack_base(base: u8) -> u8 {
68 debug_assert!(base < 4, "Base {base} is not <4.");
69 b"ACTG"[base as usize]
70}
71
72#[inline(always)]
74pub const fn complement_char(base: u8) -> u8 {
75 match base {
76 b'A' => b'T',
77 b'C' => b'G',
78 b'G' => b'C',
79 b'T' => b'A',
80 _ => panic!("Unexpected character. Expected one of ACTGactg.",),
81 }
82}
83
84#[inline(always)]
86pub const fn complement_base(base: u8) -> u8 {
87 base ^ 2
88}
89
90#[inline(always)]
92pub fn complement_base_simd(base: u32x8) -> u32x8 {
93 base ^ u32x8::splat(2)
94}
95
96#[inline(always)]
98pub const fn revcomp_u64(word: u64, len: usize) -> u64 {
99 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
100 {
101 let mut res = word.reverse_bits(); res = ((res >> 1) & 0x5555_5555_5555_5555) | ((res & 0x5555_5555_5555_5555) << 1);
103 res ^= 0xAAAA_AAAA_AAAA_AAAA;
104 res >> (usize::BITS as usize - 2 * len)
105 }
106
107 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
108 {
109 let mut res = word.swap_bytes();
110 res = ((res >> 4) & 0x0F0F_0F0F_0F0F_0F0F) | ((res & 0x0F0F_0F0F_0F0F_0F0F) << 4);
111 res = ((res >> 2) & 0x3333_3333_3333_3333) | ((res & 0x3333_3333_3333_3333) << 2);
112 res ^= 0xAAAA_AAAA_AAAA_AAAA;
113 res >> (usize::BITS as usize - 2 * len)
114 }
115}
116
117impl PackedSeq<'_> {
118 #[inline(always)]
120 pub fn normalize(&self) -> Self {
121 let start = self.offset / 4;
122 let end = (self.offset + self.len).div_ceil(4);
123 Self {
124 seq: &self.seq[start..end],
125 offset: self.offset % 4,
126 len: self.len,
127 }
128 }
129
130 #[inline(always)]
132 pub fn unpack(&self) -> Vec<u8> {
133 self.iter_bp().map(unpack_base).collect()
134 }
135}
136
137#[inline(always)]
138pub(crate) fn read_slice(seq: &[u8], idx: usize) -> u32x8 {
139 let mut result = [0u8; 32];
141 let num_bytes = 32.min(seq.len().saturating_sub(idx));
142 unsafe {
143 let src = seq.as_ptr().add(idx);
144 std::ptr::copy_nonoverlapping(src, result.as_mut_ptr(), num_bytes);
145 std::mem::transmute(result)
146 }
147}
148
149impl<'s> Seq<'s> for PackedSeq<'s> {
150 const BASES_PER_BYTE: usize = 4;
151 const BITS_PER_CHAR: usize = 2;
152 type SeqVec = PackedSeqVec;
153
154 #[inline(always)]
155 fn len(&self) -> usize {
156 self.len
157 }
158
159 #[inline(always)]
160 fn is_empty(&self) -> bool {
161 self.len == 0
162 }
163
164 #[inline(always)]
165 fn get(&self, index: usize) -> u8 {
166 let offset = self.offset + index;
167 let idx = offset / 4;
168 let offset = offset % 4;
169 (self.seq[idx] >> (2 * offset)) & 3
170 }
171
172 #[inline(always)]
173 fn get_ascii(&self, index: usize) -> u8 {
174 unpack_base(self.get(index))
175 }
176
177 #[inline(always)]
180 fn as_u64(&self) -> u64 {
181 assert!(self.len() <= 32);
182 debug_assert!(self.seq.len() <= 9);
183
184 let mask = u64::MAX >> (64 - 2 * self.len());
185
186 if self.len() <= 29 {
189 let x = unsafe { (self.seq.as_ptr() as *const u64).read_unaligned() };
190 (x >> (2 * self.offset)) & mask
191 } else {
192 let x = unsafe { (self.seq.as_ptr() as *const u128).read_unaligned() };
193 (x >> (2 * self.offset)) as u64 & mask
194 }
195 }
196
197 #[inline(always)]
200 fn revcomp_as_u64(&self) -> u64 {
201 revcomp_u64(self.as_u64(), self.len())
202 }
203
204 #[inline(always)]
205 fn to_vec(&self) -> PackedSeqVec {
206 assert_eq!(self.offset, 0);
207 PackedSeqVec {
208 seq: self
209 .seq
210 .iter()
211 .copied()
212 .chain(std::iter::repeat_n(0u8, PADDING))
213 .collect(),
214 len: self.len,
215 }
216 }
217
218 fn to_revcomp(&self) -> PackedSeqVec {
219 let mut seq = self.seq[..(self.offset + self.len).div_ceil(4)]
220 .iter()
221 .rev()
223 .copied()
224 .map(|mut res| {
225 res = ((res >> 4) & 0x0F) | ((res & 0x0F) << 4);
228 res = ((res >> 2) & 0x33) | ((res & 0x33) << 2);
229 res ^ 0xAA
230 })
231 .chain(std::iter::repeat_n(0u8, PADDING))
232 .collect::<Vec<u8>>();
233
234 let new_offset = (4 - (self.offset + self.len) % 4) % 4;
236
237 if new_offset > 0 {
238 let shift = 2 * new_offset;
240 *seq.last_mut().unwrap() >>= shift;
241 for i in 0..seq.len() - 1 {
243 seq[i] = (seq[i] >> shift) | (seq[i + 1] << (8 - shift));
244 }
245 }
246
247 PackedSeqVec { seq, len: self.len }
248 }
249
250 #[inline(always)]
251 fn slice(&self, range: Range<usize>) -> Self {
252 debug_assert!(
253 range.end <= self.len,
254 "Slice index out of bounds: {} > {}",
255 range.end,
256 self.len
257 );
258 PackedSeq {
259 seq: self.seq,
260 offset: self.offset + range.start,
261 len: range.end - range.start,
262 }
263 .normalize()
264 }
265
266 #[inline(always)]
267 fn iter_bp(self) -> impl ExactSizeIterator<Item = u8> + Clone {
268 assert!(self.len <= self.seq.len() * 4);
269
270 let this = self.normalize();
271
272 let mut byte = 0;
274 let mut it = (0..this.len + this.offset).map(
275 #[inline(always)]
276 move |i| {
277 if i % 4 == 0 {
278 byte = this.seq[i / 4];
279 }
280 (byte >> (2 * (i % 4))) & 0b11
282 },
283 );
284 it.by_ref().take(this.offset).for_each(drop);
285 it
286 }
287
288 #[inline(always)]
289 fn par_iter_bp(self, context: usize) -> (impl ExactSizeIterator<Item = S> + Clone, usize) {
290 #[cfg(target_endian = "big")]
291 panic!("Big endian architectures are not supported.");
292
293 let this = self.normalize();
294 let o = this.offset;
295 assert!(o < 4);
296
297 let num_kmers = if this.len == 0 {
298 0
299 } else {
300 (this.len + o).saturating_sub(context - 1)
301 };
302 let num_kmers_stride = this.len.saturating_sub(context - 1);
304 let n = num_kmers_stride.div_ceil(L).next_multiple_of(4);
305 let bytes_per_chunk = n / 4;
306 let padding = 4 * L * bytes_per_chunk - num_kmers_stride;
307
308 let offsets: [usize; 8] = from_fn(|l| (l * bytes_per_chunk));
309 let mut cur = S::ZERO;
310
311 let mut buf = Box::new([S::ZERO; 8]);
314
315 let par_len = if num_kmers == 0 {
317 0
318 } else {
319 n + context + o - 1
320 };
321 let mut it = (0..par_len).map(
322 #[inline(always)]
323 move |i| {
324 if i % 16 == 0 {
325 if i % 128 == 0 {
326 let data: [u32x8; 8] = from_fn(
328 #[inline(always)]
329 |lane| read_slice(this.seq, offsets[lane] + (i / 4)),
330 );
331 *buf = transpose(data);
332 }
333 cur = buf[(i % 128) / 16];
334 }
335 let chars = cur & S::splat(0x03);
337 cur = cur >> S::splat(2);
339 chars
340 },
341 );
342 it.by_ref().take(o).for_each(drop);
344
345 (it, padding)
346 }
347
348 #[inline(always)]
351 fn par_iter_bp_delayed(
352 self,
353 context: usize,
354 delay: usize,
355 ) -> (impl ExactSizeIterator<Item = (S, S)> + Clone, usize) {
356 #[cfg(target_endian = "big")]
357 panic!("Big endian architectures are not supported.");
358
359 assert!(
360 delay < usize::MAX / 2,
361 "Delay={} should be >=0.",
362 delay as isize
363 );
364
365 let this = self.normalize();
366 let o = this.offset;
367 assert!(o < 4);
368
369 let num_kmers = if this.len == 0 {
370 0
371 } else {
372 (this.len + o).saturating_sub(context - 1)
373 };
374 let num_kmers_stride = this.len.saturating_sub(context - 1);
376 let n = num_kmers_stride.div_ceil(L).next_multiple_of(4);
377 let bytes_per_chunk = n / 4;
378 let padding = 4 * L * bytes_per_chunk - num_kmers_stride;
379
380 let offsets: [usize; 8] = from_fn(|l| (l * bytes_per_chunk));
381 let mut upcoming = S::ZERO;
382 let mut upcoming_d = S::ZERO;
383
384 let buf_len = (delay / 16 + 8).next_power_of_two();
388 let buf_mask = buf_len - 1;
389 let mut buf = vec![S::ZERO; buf_len];
390 let mut write_idx = 0;
391 let mut read_idx = (buf_len - delay / 16) % buf_len;
394
395 let par_len = if num_kmers == 0 {
396 0
397 } else {
398 n + context + o - 1
399 };
400 let mut it = (0..par_len).map(
401 #[inline(always)]
402 move |i| {
403 if i % 16 == 0 {
404 if i % 128 == 0 {
405 let data: [u32x8; 8] = from_fn(
407 #[inline(always)]
408 |lane| read_slice(this.seq, offsets[lane] + (i / 4)),
409 );
410 unsafe {
411 *TryInto::<&mut [u32x8; 8]>::try_into(
412 buf.get_unchecked_mut(write_idx..write_idx + 8),
413 )
414 .unwrap_unchecked() = transpose(data);
415 }
416 if i == 0 {
417 let elem = !((1u32 << (2 * o)) - 1);
419 let mask = S::splat(elem);
420 buf[write_idx] &= mask;
421 }
422 }
423 upcoming = buf[write_idx];
424 write_idx += 1;
425 write_idx &= buf_mask;
426 }
427 if i % 16 == delay % 16 {
428 unsafe { assert_unchecked(read_idx < buf.len()) };
429 upcoming_d = buf[read_idx];
430 read_idx += 1;
431 read_idx &= buf_mask;
432 }
433 let chars = upcoming & S::splat(0x03);
435 let chars_d = upcoming_d & S::splat(0x03);
436 upcoming = upcoming >> S::splat(2);
438 upcoming_d = upcoming_d >> S::splat(2);
439 (chars, chars_d)
440 },
441 );
442 it.by_ref().take(o).for_each(drop);
443
444 (it, padding)
445 }
446
447 #[inline(always)]
450 fn par_iter_bp_delayed_2(
451 self,
452 context: usize,
453 delay1: usize,
454 delay2: usize,
455 ) -> (impl ExactSizeIterator<Item = (S, S, S)> + Clone, usize) {
456 #[cfg(target_endian = "big")]
457 panic!("Big endian architectures are not supported.");
458
459 let this = self.normalize();
460 let o = this.offset;
461 assert!(o < 4);
462 assert!(delay1 <= delay2, "Delay1 must be at most delay2.");
463
464 let num_kmers = if this.len == 0 {
465 0
466 } else {
467 (this.len + o).saturating_sub(context - 1)
468 };
469 let num_kmers_stride = this.len.saturating_sub(context - 1);
471 let n = num_kmers_stride.div_ceil(L).next_multiple_of(4);
472 let bytes_per_chunk = n / 4;
473 let padding = 4 * L * bytes_per_chunk - num_kmers_stride;
474
475 let offsets: [usize; 8] = from_fn(|l| (l * bytes_per_chunk));
476 let mut upcoming = S::ZERO;
477 let mut upcoming_d1 = S::ZERO;
478 let mut upcoming_d2 = S::ZERO;
479
480 let buf_len = (delay2 / 16 + 8).next_power_of_two();
482 let buf_mask = buf_len - 1;
483 let mut buf = vec![S::ZERO; buf_len];
484 let mut write_idx = 0;
485 let mut read_idx1 = (buf_len - delay1 / 16) % buf_len;
488 let mut read_idx2 = (buf_len - delay2 / 16) % buf_len;
489
490 let par_len = if num_kmers == 0 {
491 0
492 } else {
493 n + context + o - 1
494 };
495 let mut it = (0..par_len).map(
496 #[inline(always)]
497 move |i| {
498 if i % 16 == 0 {
499 if i % 128 == 0 {
500 let data: [u32x8; 8] = from_fn(
502 #[inline(always)]
503 |lane| read_slice(this.seq, offsets[lane] + (i / 4)),
504 );
505 unsafe {
506 *TryInto::<&mut [u32x8; 8]>::try_into(
507 buf.get_unchecked_mut(write_idx..write_idx + 8),
508 )
509 .unwrap_unchecked() = transpose(data);
510 }
511 if i == 0 {
512 let elem = !((1u32 << (2 * o)) - 1);
514 let mask = S::splat(elem);
515 buf[write_idx] &= mask;
516 }
517 }
518 upcoming = buf[write_idx];
519 write_idx += 1;
520 write_idx &= buf_mask;
521 }
522 if i % 16 == delay1 % 16 {
523 unsafe { assert_unchecked(read_idx1 < buf.len()) };
524 upcoming_d1 = buf[read_idx1];
525 read_idx1 += 1;
526 read_idx1 &= buf_mask;
527 }
528 if i % 16 == delay2 % 16 {
529 unsafe { assert_unchecked(read_idx2 < buf.len()) };
530 upcoming_d2 = buf[read_idx2];
531 read_idx2 += 1;
532 read_idx2 &= buf_mask;
533 }
534 let chars = upcoming & S::splat(0x03);
536 let chars_d1 = upcoming_d1 & S::splat(0x03);
537 let chars_d2 = upcoming_d2 & S::splat(0x03);
538 upcoming = upcoming >> S::splat(2);
540 upcoming_d1 = upcoming_d1 >> S::splat(2);
541 upcoming_d2 = upcoming_d2 >> S::splat(2);
542 (chars, chars_d1, chars_d2)
543 },
544 );
545 it.by_ref().take(o).for_each(drop);
546
547 (it, padding)
548 }
549
550 fn cmp_lcp(&self, other: &Self) -> (std::cmp::Ordering, usize) {
552 let mut lcp = 0;
553 let min_len = self.len.min(other.len);
554 for i in (0..min_len).step_by(29) {
555 let len = (min_len - i).min(29);
556 let this = self.slice(i..i + len);
557 let other = other.slice(i..i + len);
558 let this_word = this.as_u64();
559 let other_word = other.as_u64();
560 if this_word != other_word {
561 let eq = this_word ^ other_word;
563 let t = eq.trailing_zeros() / 2 * 2;
564 lcp += t as usize / 2;
565 let mask = 0b11 << t;
566 return ((this_word & mask).cmp(&(other_word & mask)), lcp);
567 }
568 lcp += len;
569 }
570 (self.len.cmp(&other.len), lcp)
571 }
572}
573
574impl PartialEq for PackedSeq<'_> {
575 fn eq(&self, other: &Self) -> bool {
577 if self.len != other.len {
578 return false;
579 }
580 for i in (0..self.len).step_by(29) {
581 let len = (self.len - i).min(29);
582 let this = self.slice(i..i + len);
583 let that = other.slice(i..i + len);
584 if this.as_u64() != that.as_u64() {
585 return false;
586 }
587 }
588 true
589 }
590}
591
592impl Eq for PackedSeq<'_> {}
593
594impl PartialOrd for PackedSeq<'_> {
595 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
596 Some(self.cmp(other))
597 }
598}
599
600impl Ord for PackedSeq<'_> {
601 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
603 let min_len = self.len.min(other.len);
604 for i in (0..min_len).step_by(29) {
605 let len = (min_len - i).min(29);
606 let this = self.slice(i..i + len);
607 let other = other.slice(i..i + len);
608 let this_word = this.as_u64();
609 let other_word = other.as_u64();
610 if this_word != other_word {
611 let eq = this_word ^ other_word;
613 let t = eq.trailing_zeros() / 2 * 2;
614 let mask = 0b11 << t;
615 return (this_word & mask).cmp(&(other_word & mask));
616 }
617 }
618 self.len.cmp(&other.len)
619 }
620}
621
622impl SeqVec for PackedSeqVec {
623 type Seq<'s> = PackedSeq<'s>;
624
625 #[inline(always)]
626 fn into_raw(mut self) -> Vec<u8> {
627 self.seq.resize(self.len.div_ceil(4), 0);
628 self.seq
629 }
630
631 #[inline(always)]
632 fn as_slice(&self) -> Self::Seq<'_> {
633 PackedSeq {
634 seq: &self.seq[..self.len.div_ceil(4)],
635 offset: 0,
636 len: self.len,
637 }
638 }
639
640 #[inline(always)]
641 fn len(&self) -> usize {
642 self.len
643 }
644
645 #[inline(always)]
646 fn is_empty(&self) -> bool {
647 self.len == 0
648 }
649
650 #[inline(always)]
651 fn clear(&mut self) {
652 self.seq.clear();
653 self.len = 0;
654 }
655
656 fn push_seq<'a>(&mut self, seq: PackedSeq<'_>) -> Range<usize> {
657 let start = self.len.next_multiple_of(4) + seq.offset;
658 let end = start + seq.len();
659 self.seq.reserve(seq.seq.len());
661 self.seq.resize(self.len.div_ceil(4), 0);
663 self.seq.extend(seq.seq);
665 self.seq.extend(std::iter::repeat_n(0u8, PADDING));
667 self.len = end;
668 start..end
669 }
670
671 fn push_ascii(&mut self, seq: &[u8]) -> Range<usize> {
683 self.seq
684 .resize((self.len + seq.len()).div_ceil(4) + PADDING, 0);
685 let start_aligned = self.len.next_multiple_of(4);
686 let start = self.len;
687 let len = seq.len();
688 let mut idx = self.len / 4;
689
690 let unaligned = core::cmp::min(start_aligned - start, len);
691 if unaligned > 0 {
692 let mut packed_byte = self.seq[idx];
693 for &base in &seq[..unaligned] {
694 packed_byte |= pack_char_lossy(base) << ((self.len % 4) * 2);
695 self.len += 1;
696 }
697 self.seq[idx] = packed_byte;
698 idx += 1;
699 }
700
701 #[allow(unused)]
702 let mut last = unaligned;
703
704 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2"))]
705 {
706 last = unaligned + (len - unaligned) / 8 * 8;
707
708 for i in (unaligned..last).step_by(8) {
709 let chunk = &seq[i..i + 8].try_into().unwrap();
710 let ascii = u64::from_ne_bytes(*chunk);
711 let packed_bytes =
712 unsafe { std::arch::x86_64::_pext_u64(ascii, 0x0606060606060606) };
713 self.seq[idx] = packed_bytes as u8;
714 idx += 1;
715 self.seq[idx] = (packed_bytes >> 8) as u8;
716 idx += 1;
717 self.len += 8;
718 }
719 }
720
721 #[cfg(target_feature = "neon")]
722 {
723 use core::arch::aarch64::{vandq_u8, vdup_n_u8, vld1q_u8, vpadd_u8, vshlq_u8, vst1_u8};
724 use core::mem::transmute;
725
726 last = unaligned + (len - unaligned) / 16 * 16;
727
728 for i in (unaligned..last).step_by(16) {
729 unsafe {
730 let ascii = vld1q_u8(seq.as_ptr().add(i));
731 let masked_bits = vandq_u8(ascii, transmute([6i8; 16]));
732 let (bits_0, bits_1) = transmute(vshlq_u8(
733 masked_bits,
734 transmute([-1i8, 1, 3, 5, -1, 1, 3, 5, -1, 1, 3, 5, -1, 1, 3, 5]),
735 ));
736 let half_packed = vpadd_u8(bits_0, bits_1);
737 let packed = vpadd_u8(half_packed, vdup_n_u8(0));
738 vst1_u8(self.seq.as_mut_ptr().add(idx), packed);
739 idx += 4;
740 self.len += 16;
741 }
742 }
743 }
744
745 let mut packed_byte = 0;
746 for &base in &seq[last..] {
747 packed_byte |= pack_char_lossy(base) << ((self.len % 4) * 2);
748 self.len += 1;
749 if self.len % 4 == 0 {
750 self.seq[idx] = packed_byte;
751 idx += 1;
752 packed_byte = 0;
753 }
754 }
755 if self.len % 4 != 0 && last < len {
756 self.seq[idx] = packed_byte;
757 idx += 1;
758 }
759 assert_eq!(idx + PADDING, self.seq.len());
760 start..start + len
761 }
762
763 #[cfg(feature = "rand")]
764 fn random(n: usize) -> Self {
765 use rand::{RngCore, SeedableRng};
766
767 let byte_len = n.div_ceil(4);
768 let mut seq = vec![0; byte_len + PADDING];
769 rand::rngs::SmallRng::from_os_rng().fill_bytes(&mut seq[..byte_len]);
770 if n % 4 != 0 {
772 seq[byte_len - 1] &= (1 << (2 * (n % 4))) - 1;
773 }
774
775 Self { seq, len: n }
776 }
777}