1use traits::Seq;
2
3use crate::{intrinsics::transpose, padded_it::ChunkIt};
4
5use super::*;
6
7#[doc(hidden)]
8pub struct Bits<const B: usize>;
9#[doc(hidden)]
10pub trait SupportedBits {}
11impl SupportedBits for Bits<1> {}
12impl SupportedBits for Bits<2> {}
13impl SupportedBits for Bits<4> {}
14impl SupportedBits for Bits<8> {}
15
16const PADDING: usize = 16;
18
19#[doc(hidden)]
21#[derive(Copy, Clone, Debug, MemSize, MemDbg)]
22pub struct PackedSeqBase<'s, const B: usize>
23where
24 Bits<B>: SupportedBits,
25{
26 seq: &'s [u8],
28 offset: usize,
30 len: usize,
32}
33
34#[doc(hidden)]
36#[derive(Clone, Debug, MemSize, MemDbg)]
37#[cfg_attr(feature = "pyo3", pyo3::pyclass)]
38#[cfg_attr(feature = "epserde", derive(epserde::Epserde))]
39pub struct PackedSeqVecBase<const B: usize>
40where
41 Bits<B>: SupportedBits,
42{
43 pub(crate) seq: Vec<u8>,
47
48 len: usize,
50}
51
52pub type PackedSeq<'s> = PackedSeqBase<'s, 2>;
53pub type PackedSeqVec = PackedSeqVecBase<2>;
54pub type BitSeq<'s> = PackedSeqBase<'s, 1>;
55pub type BitSeqVec = PackedSeqVecBase<1>;
56
57impl<'s, const B: usize> PackedSeqBase<'s, B>
60where
61 Bits<B>: SupportedBits,
62{
63 const CHAR_MASK: u64 = (1 << B) - 1;
65 const C8: usize = 8 / B;
67 const C32: usize = 32 / B;
69 const C256: usize = 256 / B;
71 const K64: usize = (64 - 8) / B + 1;
73}
74
75impl<const B: usize> PackedSeqVecBase<B>
77where
78 Bits<B>: SupportedBits,
79{
80 const C8: usize = 8 / B;
82}
83
84impl<const B: usize> Default for PackedSeqVecBase<B>
85where
86 Bits<B>: SupportedBits,
87{
88 fn default() -> Self {
89 Self {
90 seq: vec![0; PADDING],
91 len: 0,
92 }
93 }
94}
95
96#[inline(always)]
101pub fn pack_char(base: u8) -> u8 {
102 match base {
103 b'a' | b'A' => 0,
104 b'c' | b'C' => 1,
105 b'g' | b'G' => 3,
106 b't' | b'T' => 2,
107 _ => panic!(
108 "Unexpected character '{}' with ASCII value {base}. Expected one of ACTGactg.",
109 base as char
110 ),
111 }
112}
113
114#[inline(always)]
116pub fn pack_char_lossy(base: u8) -> u8 {
117 (base >> 1) & 3
118}
119
120#[inline(always)]
122pub fn unpack_base(base: u8) -> u8 {
123 debug_assert!(base < 4, "Base {base} is not <4.");
124 b"ACTG"[base as usize]
125}
126
127#[inline(always)]
129pub const fn complement_char(base: u8) -> u8 {
130 match base {
131 b'A' => b'T',
132 b'C' => b'G',
133 b'G' => b'C',
134 b'T' => b'A',
135 _ => panic!("Unexpected character. Expected one of ACTGactg.",),
136 }
137}
138
139#[inline(always)]
141pub const fn complement_base(base: u8) -> u8 {
142 base ^ 2
143}
144
145#[inline(always)]
147pub fn complement_base_simd(base: u32x8) -> u32x8 {
148 base ^ u32x8::splat(2)
149}
150
151#[inline(always)]
153const fn revcomp_raw(word: u64) -> u64 {
154 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
155 {
156 let mut res = word.reverse_bits(); res = ((res >> 1) & 0x5555_5555_5555_5555) | ((res & 0x5555_5555_5555_5555) << 1);
158 res ^ 0xAAAA_AAAA_AAAA_AAAA
159 }
160
161 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
162 {
163 let mut res = word.swap_bytes();
164 res = ((res >> 4) & 0x0F0F_0F0F_0F0F_0F0F) | ((res & 0x0F0F_0F0F_0F0F_0F0F) << 4);
165 res = ((res >> 2) & 0x3333_3333_3333_3333) | ((res & 0x3333_3333_3333_3333) << 2);
166 res ^ 0xAAAA_AAAA_AAAA_AAAA
167 }
168}
169
170#[inline(always)]
172pub const fn revcomp_u64(word: u64, len: usize) -> u64 {
173 revcomp_raw(word) >> (usize::BITS as usize - 2 * len)
174}
175
176#[inline(always)]
177pub const fn revcomp_u128(word: u128, len: usize) -> u128 {
178 let low = word as u64;
179 let high = (word >> 64) as u64;
180 let rlow = revcomp_raw(low);
181 let rhigh = revcomp_raw(high);
182 let out = ((rlow as u128) << 64) | rhigh as u128;
183 out >> (u128::BITS as usize - 2 * len)
184}
185
186#[inline(always)]
191pub fn char_is_ambiguous(base: u8) -> u8 {
192 let table = b"ACTG";
194 let upper_mask = !(b'a' - b'A');
195 (table[pack_char_lossy(base) as usize] != (base & upper_mask)) as u8
196}
197
198#[inline(always)]
200const fn rev_raw(word: u64) -> u64 {
201 #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
202 {
203 word.reverse_bits()
205 }
206
207 #[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
208 {
209 let mut res = word.swap_bytes();
210 res = ((res >> 4) & 0x0F0F_0F0F_0F0F_0F0F) | ((res & 0x0F0F_0F0F_0F0F_0F0F) << 4);
211 res = ((res >> 2) & 0x3333_3333_3333_3333) | ((res & 0x3333_3333_3333_3333) << 2);
212 res = ((res >> 1) & 0x5555_5555_5555_5555) | ((res & 0x5555_5555_5555_5555) << 1);
213 res ^ 0xAAAA_AAAA_AAAA_AAAA
214 }
215}
216
217#[inline(always)]
219pub const fn rev_u64(word: u64, len: usize) -> u64 {
220 rev_raw(word) >> (usize::BITS as usize - len)
221}
222
223#[inline(always)]
224pub const fn rev_u128(word: u128, len: usize) -> u128 {
225 let low = word as u64;
226 let high = (word >> 64) as u64;
227 let rlow = rev_raw(low);
228 let rhigh = rev_raw(high);
229 let out = ((rlow as u128) << 64) | rhigh as u128;
230 out >> (u128::BITS as usize - len)
231}
232
233impl<const B: usize> PackedSeqBase<'_, B>
236where
237 Bits<B>: SupportedBits,
238{
239 #[inline(always)]
241 pub fn normalize(&self) -> Self {
242 let start_byte = self.offset / Self::C8;
243 let end_byte = (self.offset + self.len).div_ceil(Self::C8);
244 Self {
245 seq: &self.seq[start_byte..end_byte],
246 offset: self.offset % Self::C8,
247 len: self.len,
248 }
249 }
250
251 #[inline(always)]
253 pub fn unpack(&self) -> Vec<u8> {
254 self.iter_bp().map(unpack_base).collect()
255 }
256}
257
258#[inline(always)]
260pub(crate) fn read_slice(seq: &[u8], idx: usize) -> u32x8 {
261 let mut result = [0u8; 32];
263 let num_bytes = 32.min(seq.len().saturating_sub(idx));
264 unsafe {
265 let src = seq.as_ptr().add(idx);
266 std::ptr::copy_nonoverlapping(src, result.as_mut_ptr(), num_bytes);
267 std::mem::transmute(result)
268 }
269}
270
271impl<'s, const B: usize> Seq<'s> for PackedSeqBase<'s, B>
272where
273 Bits<B>: SupportedBits,
274{
275 const BITS_PER_CHAR: usize = B;
276 const BASES_PER_BYTE: usize = Self::C8;
277 type SeqVec = PackedSeqVecBase<B>;
278
279 #[inline(always)]
280 fn len(&self) -> usize {
281 self.len
282 }
283
284 #[inline(always)]
285 fn is_empty(&self) -> bool {
286 self.len == 0
287 }
288
289 #[inline(always)]
290 fn get_ascii(&self, index: usize) -> u8 {
291 unpack_base(self.get(index))
292 }
293
294 #[inline(always)]
297 fn as_u64(&self) -> u64 {
298 assert!(self.len() <= 64 / B);
299 debug_assert!(self.seq.len() <= 9);
300
301 let mask = u64::MAX >> (64 - B * self.len());
302
303 if self.len() <= Self::K64 {
306 let x = unsafe { (self.seq.as_ptr() as *const u64).read_unaligned() };
307 (x >> (B * self.offset)) & mask
308 } else {
309 let x = unsafe { (self.seq.as_ptr() as *const u128).read_unaligned() };
310 (x >> (B * self.offset)) as u64 & mask
311 }
312 }
313
314 #[inline(always)]
317 fn revcomp_as_u64(&self) -> u64 {
318 match B {
319 1 => rev_u64(self.as_u64(), self.len()),
320 2 => revcomp_u64(self.as_u64(), self.len()),
321 _ => panic!("Rev(comp) is only supported for 1-bit and 2-bit alphabets."),
322 }
323 }
324
325 #[inline(always)]
328 fn as_u128(&self) -> u128 {
329 assert!(
330 self.len() <= (128 - 8) / B + 1,
331 "Sequences >61 long cannot be read with a single unaligned u128 read."
332 );
333 debug_assert!(self.seq.len() <= 17);
334
335 let mask = u128::MAX >> (128 - B * self.len());
336
337 let x = unsafe { (self.seq.as_ptr() as *const u128).read_unaligned() };
340 (x >> (B * self.offset)) & mask
341 }
342
343 #[inline(always)]
346 fn revcomp_as_u128(&self) -> u128 {
347 match B {
348 1 => rev_u128(self.as_u128(), self.len()),
349 2 => revcomp_u128(self.as_u128(), self.len()),
350 _ => panic!("Rev(comp) is only supported for 1-bit and 2-bit alphabets."),
351 }
352 }
353
354 #[inline(always)]
355 fn to_vec(&self) -> PackedSeqVecBase<B> {
356 assert_eq!(self.offset, 0);
357 PackedSeqVecBase {
358 seq: self
359 .seq
360 .iter()
361 .copied()
362 .chain(std::iter::repeat_n(0u8, PADDING))
363 .collect(),
364 len: self.len,
365 }
366 }
367
368 fn to_revcomp(&self) -> PackedSeqVecBase<B> {
369 let mut seq = self.seq[..(self.offset + self.len).div_ceil(4)]
370 .iter()
371 .rev()
373 .copied()
374 .map(|mut res| {
375 res = ((res >> 4) & 0x0F) | ((res & 0x0F) << 4);
378 res = ((res >> 2) & 0x33) | ((res & 0x33) << 2);
379 res ^ 0xAA
380 })
381 .chain(std::iter::repeat_n(0u8, PADDING))
382 .collect::<Vec<u8>>();
383
384 let new_offset = (4 - (self.offset + self.len) % 4) % 4;
386
387 if new_offset > 0 {
388 let shift = 2 * new_offset;
390 *seq.last_mut().unwrap() >>= shift;
391 for i in 0..seq.len() - 1 {
393 seq[i] = (seq[i] >> shift) | (seq[i + 1] << (8 - shift));
394 }
395 }
396
397 PackedSeqVecBase { seq, len: self.len }
398 }
399
400 #[inline(always)]
401 fn slice(&self, range: Range<usize>) -> Self {
402 debug_assert!(
403 range.end <= self.len,
404 "Slice index out of bounds: {} > {}",
405 range.end,
406 self.len
407 );
408 PackedSeqBase {
409 seq: self.seq,
410 offset: self.offset + range.start,
411 len: range.end - range.start,
412 }
413 .normalize()
414 }
415
416 #[inline(always)]
417 fn iter_bp(self) -> impl ExactSizeIterator<Item = u8> {
418 assert!(self.len <= self.seq.len() * Self::C8);
419
420 let this = self.normalize();
421
422 let mut byte = 0;
424 (0..this.len + this.offset)
425 .map(
426 #[inline(always)]
427 move |i| {
428 if i % Self::C8 == 0 {
429 byte = this.seq[i / Self::C8];
430 }
431 (byte >> (B * (i % Self::C8))) & Self::CHAR_MASK as u8
433 },
434 )
435 .advance(this.offset)
436 }
437
438 #[inline(always)]
439 fn par_iter_bp(self, context: usize) -> PaddedIt<impl ChunkIt<S>> {
440 #[cfg(target_endian = "big")]
441 panic!("Big endian architectures are not supported.");
442
443 let this = self.normalize();
444 let o = this.offset;
445 assert!(o < Self::C8);
446
447 let num_kmers = if this.len == 0 {
448 0
449 } else {
450 (this.len + o).saturating_sub(context - 1)
451 };
452 let num_kmers_stride = this.len.saturating_sub(context - 1);
454 let n = num_kmers_stride.div_ceil(L).next_multiple_of(Self::C8);
455 let bytes_per_chunk = n / Self::C8;
456 let padding = Self::C8 * L * bytes_per_chunk - num_kmers_stride;
457
458 let offsets: [usize; 8] = from_fn(|l| l * bytes_per_chunk);
459 let mut cur = S::ZERO;
460
461 let mut buf = Box::new([S::ZERO; 8]);
464
465 let par_len = if num_kmers == 0 {
466 0
467 } else {
468 n + context + o - 1
469 };
470 let it = (0..par_len)
471 .map(
472 #[inline(always)]
473 move |i| {
474 if i % Self::C32 == 0 {
475 if i % Self::C256 == 0 {
476 let data: [u32x8; 8] = from_fn(
478 #[inline(always)]
479 |lane| read_slice(this.seq, offsets[lane] + (i / Self::C8)),
480 );
481 *buf = transpose(data);
482 }
483 cur = buf[(i % Self::C256) / Self::C32];
484 }
485 let chars = cur & S::splat(Self::CHAR_MASK as u32);
487 cur = cur >> S::splat(B as u32);
489 chars
490 },
491 )
492 .advance(o);
493
494 PaddedIt { it, padding }
495 }
496
497 #[inline(always)]
498 fn par_iter_bp_delayed(
499 self,
500 context: usize,
501 Delay(delay): Delay,
502 ) -> PaddedIt<impl ChunkIt<(S, S)>> {
503 #[cfg(target_endian = "big")]
504 panic!("Big endian architectures are not supported.");
505
506 assert!(
507 delay < usize::MAX / 2,
508 "Delay={} should be >=0.",
509 delay as isize
510 );
511
512 let this = self.normalize();
513 let o = this.offset;
514 assert!(o < Self::C8);
515
516 let num_kmers = if this.len == 0 {
517 0
518 } else {
519 (this.len + o).saturating_sub(context - 1)
520 };
521 let num_kmers_stride = this.len.saturating_sub(context - 1);
523 let n = num_kmers_stride.div_ceil(L).next_multiple_of(Self::C8);
524 let bytes_per_chunk = n / Self::C8;
525 let padding = Self::C8 * L * bytes_per_chunk - num_kmers_stride;
526
527 let offsets: [usize; 8] = from_fn(|l| l * bytes_per_chunk);
528 let mut upcoming = S::ZERO;
529 let mut upcoming_d = S::ZERO;
530
531 let buf_len = (delay / Self::C32 + 8).next_power_of_two();
536 let buf_mask = buf_len - 1;
537 let mut buf = vec![S::ZERO; buf_len];
538 let mut write_idx = 0;
539 let mut read_idx = (buf_len - delay / Self::C32) % buf_len;
542
543 let par_len = if num_kmers == 0 {
544 0
545 } else {
546 n + context + o - 1
547 };
548 let it = (0..par_len)
549 .map(
550 #[inline(always)]
551 move |i| {
552 if i % Self::C32 == 0 {
553 if i % Self::C256 == 0 {
554 let data: [u32x8; 8] = from_fn(
556 #[inline(always)]
557 |lane| read_slice(this.seq, offsets[lane] + (i / Self::C8)),
558 );
559 unsafe {
560 *TryInto::<&mut [u32x8; 8]>::try_into(
561 buf.get_unchecked_mut(write_idx..write_idx + 8),
562 )
563 .unwrap_unchecked() = transpose(data);
564 }
565 if i == 0 {
566 let elem = !((1u32 << (B * o)) - 1);
568 let mask = S::splat(elem);
569 buf[write_idx] &= mask;
570 }
571 }
572 upcoming = buf[write_idx];
573 write_idx += 1;
574 write_idx &= buf_mask;
575 }
576 if i % Self::C32 == delay % Self::C32 {
577 unsafe { assert_unchecked(read_idx < buf.len()) };
578 upcoming_d = buf[read_idx];
579 read_idx += 1;
580 read_idx &= buf_mask;
581 }
582 let chars = upcoming & S::splat(Self::CHAR_MASK as u32);
584 let chars_d = upcoming_d & S::splat(Self::CHAR_MASK as u32);
585 upcoming = upcoming >> S::splat(B as u32);
587 upcoming_d = upcoming_d >> S::splat(B as u32);
588 (chars, chars_d)
589 },
590 )
591 .advance(o);
592
593 PaddedIt { it, padding }
594 }
595
596 #[inline(always)]
599 fn par_iter_bp_delayed_2(
600 self,
601 context: usize,
602 delay1: Delay,
603 delay2: Delay,
604 ) -> PaddedIt<impl ChunkIt<(S, S, S)>> {
605 self.par_iter_bp_delayed_2_with_factor(context, delay1, delay2, 1)
606 }
607
608 fn cmp_lcp(&self, other: &Self) -> (std::cmp::Ordering, usize) {
610 let mut lcp = 0;
611 let min_len = self.len.min(other.len);
612 for i in (0..min_len).step_by(Self::K64) {
613 let len = (min_len - i).min(Self::K64);
614 let this = self.slice(i..i + len);
615 let other = other.slice(i..i + len);
616 let this_word = this.as_u64();
617 let other_word = other.as_u64();
618 if this_word != other_word {
619 let eq = this_word ^ other_word;
621 let t = eq.trailing_zeros() as usize / B * B;
622 lcp += t / B;
623 let mask = (Self::CHAR_MASK) << t;
624 return ((this_word & mask).cmp(&(other_word & mask)), lcp);
625 }
626 lcp += len;
627 }
628 (self.len.cmp(&other.len), lcp)
629 }
630
631 #[inline(always)]
632 fn get(&self, index: usize) -> u8 {
633 let offset = self.offset + index;
634 let idx = offset / Self::C8;
635 let offset = offset % Self::C8;
636 (self.seq[idx] >> (B * offset)) & Self::CHAR_MASK as u8
637 }
638}
639
640impl<'s, const B: usize> PackedSeqBase<'s, B>
641where
642 Bits<B>: SupportedBits,
643{
644 #[inline(always)]
650 pub fn par_iter_bp_delayed_2_with_factor(
651 self,
652 context: usize,
653 Delay(delay1): Delay,
654 Delay(delay2): Delay,
655 factor: usize,
656 ) -> PaddedIt<impl ChunkIt<(S, S, S)> + use<'s, B>> {
657 #[cfg(target_endian = "big")]
658 panic!("Big endian architectures are not supported.");
659
660 let this = self.normalize();
661 let o = this.offset;
662 assert!(o < Self::C8);
663 assert!(delay1 <= delay2, "Delay1 must be at most delay2.");
664
665 let num_kmers = if this.len == 0 {
666 0
667 } else {
668 (this.len + o).saturating_sub(context - 1)
669 };
670 let num_kmers_stride = this.len.saturating_sub(context - 1);
672 let n = num_kmers_stride
673 .div_ceil(L)
674 .next_multiple_of(factor * Self::C8);
675 let bytes_per_chunk = n / Self::C8;
676 let padding = Self::C8 * L * bytes_per_chunk - num_kmers_stride;
677
678 let offsets: [usize; 8] = from_fn(|l| l * bytes_per_chunk);
679 let mut upcoming = S::ZERO;
680 let mut upcoming_d1 = S::ZERO;
681 let mut upcoming_d2 = S::ZERO;
682
683 let buf_len = (delay2 / Self::C32 + 8).next_power_of_two();
685 let buf_mask = buf_len - 1;
686 let mut buf = vec![S::ZERO; buf_len];
687 let mut write_idx = 0;
688 let mut read_idx1 = (buf_len - delay1 / Self::C32) % buf_len;
691 let mut read_idx2 = (buf_len - delay2 / Self::C32) % buf_len;
692
693 let par_len = if num_kmers == 0 {
694 0
695 } else {
696 n + context + o - 1
697 };
698 let it = (0..par_len)
699 .map(
700 #[inline(always)]
701 move |i| {
702 if i % Self::C32 == 0 {
703 if i % Self::C256 == 0 {
704 let data: [u32x8; 8] = from_fn(
706 #[inline(always)]
707 |lane| read_slice(this.seq, offsets[lane] + (i / Self::C8)),
708 );
709 unsafe {
710 *TryInto::<&mut [u32x8; 8]>::try_into(
711 buf.get_unchecked_mut(write_idx..write_idx + 8),
712 )
713 .unwrap_unchecked() = transpose(data);
714 }
715 if i == 0 {
716 let elem = !((1u32 << (B * o)) - 1);
718 let mask = S::splat(elem);
719 buf[write_idx] &= mask;
720 }
721 }
722 upcoming = buf[write_idx];
723 write_idx += 1;
724 write_idx &= buf_mask;
725 }
726 if i % Self::C32 == delay1 % Self::C32 {
727 unsafe { assert_unchecked(read_idx1 < buf.len()) };
728 upcoming_d1 = buf[read_idx1];
729 read_idx1 += 1;
730 read_idx1 &= buf_mask;
731 }
732 if i % Self::C32 == delay2 % Self::C32 {
733 unsafe { assert_unchecked(read_idx2 < buf.len()) };
734 upcoming_d2 = buf[read_idx2];
735 read_idx2 += 1;
736 read_idx2 &= buf_mask;
737 }
738 let chars = upcoming & S::splat(Self::CHAR_MASK as u32);
740 let chars_d1 = upcoming_d1 & S::splat(Self::CHAR_MASK as u32);
741 let chars_d2 = upcoming_d2 & S::splat(Self::CHAR_MASK as u32);
742 upcoming = upcoming >> S::splat(B as u32);
744 upcoming_d1 = upcoming_d1 >> S::splat(B as u32);
745 upcoming_d2 = upcoming_d2 >> S::splat(B as u32);
746 (chars, chars_d1, chars_d2)
747 },
748 )
749 .advance(o);
750
751 PaddedIt { it, padding }
752 }
753}
754
755impl<const B: usize> PartialEq for PackedSeqBase<'_, B>
756where
757 Bits<B>: SupportedBits,
758{
759 fn eq(&self, other: &Self) -> bool {
761 if self.len != other.len {
762 return false;
763 }
764 for i in (0..self.len).step_by(Self::K64) {
765 let len = (self.len - i).min(Self::K64);
766 let this = self.slice(i..i + len);
767 let that = other.slice(i..i + len);
768 if this.as_u64() != that.as_u64() {
769 return false;
770 }
771 }
772 true
773 }
774}
775
776impl<const B: usize> Eq for PackedSeqBase<'_, B> where Bits<B>: SupportedBits {}
777
778impl<const B: usize> PartialOrd for PackedSeqBase<'_, B>
779where
780 Bits<B>: SupportedBits,
781{
782 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
783 Some(self.cmp(other))
784 }
785}
786
787impl<const B: usize> Ord for PackedSeqBase<'_, B>
788where
789 Bits<B>: SupportedBits,
790{
791 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
793 let min_len = self.len.min(other.len);
794 for i in (0..min_len).step_by(Self::K64) {
795 let len = (min_len - i).min(Self::K64);
796 let this = self.slice(i..i + len);
797 let other = other.slice(i..i + len);
798 let this_word = this.as_u64();
799 let other_word = other.as_u64();
800 if this_word != other_word {
801 let eq = this_word ^ other_word;
803 let t = eq.trailing_zeros() as usize / B * B;
804 let mask = (Self::CHAR_MASK) << t;
805 return (this_word & mask).cmp(&(other_word & mask));
806 }
807 }
808 self.len.cmp(&other.len)
809 }
810}
811
812impl<const B: usize> SeqVec for PackedSeqVecBase<B>
813where
814 Bits<B>: SupportedBits,
815{
816 type Seq<'s> = PackedSeqBase<'s, B>;
817
818 #[inline(always)]
819 fn into_raw(mut self) -> Vec<u8> {
820 self.seq.resize(self.len.div_ceil(Self::C8), 0);
821 self.seq
822 }
823
824 #[inline(always)]
825 fn as_slice(&self) -> Self::Seq<'_> {
826 PackedSeqBase {
827 seq: &self.seq[..self.len.div_ceil(Self::C8)],
828 offset: 0,
829 len: self.len,
830 }
831 }
832
833 #[inline(always)]
834 fn len(&self) -> usize {
835 self.len
836 }
837
838 #[inline(always)]
839 fn is_empty(&self) -> bool {
840 self.len == 0
841 }
842
843 #[inline(always)]
844 fn clear(&mut self) {
845 self.seq.clear();
846 self.len = 0;
847 }
848
849 fn push_seq<'a>(&mut self, seq: PackedSeqBase<'_, B>) -> Range<usize> {
850 let start = self.len.next_multiple_of(Self::C8) + seq.offset;
851 let end = start + seq.len();
852 self.seq.reserve(seq.seq.len());
854 self.seq.resize(self.len.div_ceil(Self::C8), 0);
856 self.seq.extend(seq.seq);
858 self.seq.extend(std::iter::repeat_n(0u8, PADDING));
860 self.len = end;
861 start..end
862 }
863
864 fn push_ascii(&mut self, seq: &[u8]) -> Range<usize> {
873 match B {
874 1 | 2 => {}
875 _ => panic!(
876 "Can only use ASCII input for 2-bit DNA packing, or 1-bit ambiguous indicators."
877 ),
878 }
879
880 self.seq
881 .resize((self.len + seq.len()).div_ceil(Self::C8) + PADDING, 0);
882 let start_aligned = self.len.next_multiple_of(Self::C8);
883 let start = self.len;
884 let len = seq.len();
885 let mut idx = self.len / Self::C8;
886
887 let parse_base = |base| match B {
888 1 => char_is_ambiguous(base),
889 2 => pack_char_lossy(base),
890 _ => unreachable!(),
891 };
892
893 let unaligned = core::cmp::min(start_aligned - start, len);
894 if unaligned > 0 {
895 let mut packed_byte = self.seq[idx];
896 for &base in &seq[..unaligned] {
897 packed_byte |= parse_base(base) << ((self.len % Self::C8) * B);
898 self.len += 1;
899 }
900 self.seq[idx] = packed_byte;
901 idx += 1;
902 }
903
904 #[allow(unused)]
905 let mut last = unaligned;
906
907 if B == 2 {
909 #[cfg(all(target_arch = "x86_64", target_feature = "bmi2"))]
910 {
911 last = unaligned + (len - unaligned) / 8 * 8;
912
913 for i in (unaligned..last).step_by(8) {
914 let chunk = &seq[i..i + 8].try_into().unwrap();
915 let ascii = u64::from_ne_bytes(*chunk);
916 let packed_bytes =
917 unsafe { std::arch::x86_64::_pext_u64(ascii, 0x0606060606060606) };
918 self.seq[idx] = packed_bytes as u8;
919 idx += 1;
920 self.seq[idx] = (packed_bytes >> 8) as u8;
921 idx += 1;
922 self.len += 8;
923 }
924 }
925
926 #[cfg(target_feature = "neon")]
927 {
928 use core::arch::aarch64::{
929 vandq_u8, vdup_n_u8, vld1q_u8, vpadd_u8, vshlq_u8, vst1_u8,
930 };
931 use core::mem::transmute;
932
933 last = unaligned + (len - unaligned) / 16 * 16;
934
935 for i in (unaligned..last).step_by(16) {
936 unsafe {
937 let ascii = vld1q_u8(seq.as_ptr().add(i));
938 let masked_bits = vandq_u8(ascii, transmute([6i8; 16]));
939 let (bits_0, bits_1) = transmute(vshlq_u8(
940 masked_bits,
941 transmute([-1i8, 1, 3, 5, -1, 1, 3, 5, -1, 1, 3, 5, -1, 1, 3, 5]),
942 ));
943 let half_packed = vpadd_u8(bits_0, bits_1);
944 let packed = vpadd_u8(half_packed, vdup_n_u8(0));
945 vst1_u8(self.seq.as_mut_ptr().add(idx), packed);
946 idx += Self::C8;
947 self.len += 16;
948 }
949 }
950 }
951 }
952 if B == 1 {
953 #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
955 {
956 last = unaligned + len;
957 self.len = len;
958
959 for i in (unaligned..last).step_by(32) {
960 use std::mem::transmute as t;
961
962 use wide::CmpEq;
963 type S = wide::i8x32;
965 let chars: S = unsafe { t(read_slice(seq, i)) };
966 let upper_mask = !(b'a' - b'A');
967 let chars = chars & S::splat(upper_mask as i8);
969 let lossy_encoded = chars & S::splat(6);
970 let table = unsafe { S::from(t::<_, S>(*b"AxCxTxGxxxxxxxxxAxCxTxGxxxxxxxxx")) };
971 let lookup: S = unsafe {
972 t(std::arch::x86_64::_mm256_shuffle_epi8(
973 t(table),
974 t(lossy_encoded),
975 ))
976 };
977 let packed_bytes = !(chars.cmp_eq(lookup).move_mask() as u32);
978
979 if i + 32 <= last {
980 self.seq[idx + 0] = packed_bytes as u8;
981 self.seq[idx + 1] = (packed_bytes >> 8) as u8;
982 self.seq[idx + 2] = (packed_bytes >> 16) as u8;
983 self.seq[idx + 3] = (packed_bytes >> 24) as u8;
984 idx += 4;
985 } else {
986 let mut b = 0;
987 while i + b < last {
988 self.seq[idx] = (packed_bytes >> b) as u8;
989 idx += 1;
990 b += 8;
991 }
992 }
993 }
994 }
995 }
996
997 let mut packed_byte = 0;
998 for &base in &seq[last..] {
999 packed_byte |= parse_base(base) << ((self.len % Self::C8) * B);
1000 self.len += 1;
1001 if self.len % Self::C8 == 0 {
1002 self.seq[idx] = packed_byte;
1003 idx += 1;
1004 packed_byte = 0;
1005 }
1006 }
1007 if self.len % Self::C8 != 0 && last < len {
1008 self.seq[idx] = packed_byte;
1009 idx += 1;
1010 }
1011 assert_eq!(idx + PADDING, self.seq.len());
1012 start..start + len
1013 }
1014
1015 #[cfg(feature = "rand")]
1016 fn random(n: usize) -> Self {
1017 use rand::{RngCore, SeedableRng};
1018
1019 let byte_len = n.div_ceil(Self::C8);
1020 let mut seq = vec![0; byte_len + PADDING];
1021 rand::rngs::SmallRng::from_os_rng().fill_bytes(&mut seq[..byte_len]);
1022 if n % Self::C8 != 0 {
1024 seq[byte_len - 1] &= (1 << (B * (n % Self::C8))) - 1;
1025 }
1026
1027 Self { seq, len: n }
1028 }
1029}
1030
1031impl<'s> PackedSeqBase<'s, 1> {
1032 #[inline(always)]
1036 pub fn iter_kmer_ambiguity(self, k: usize) -> impl ExactSizeIterator<Item = bool> + use<'s> {
1037 let this = self.normalize();
1038 assert!(k > 0);
1039 assert!(k <= Self::K64);
1040 (this.offset..this.offset + this.len.saturating_sub(k - 1))
1041 .map(move |i| self.read_kmer(k, i) != 0)
1042 }
1043
1044 #[inline(always)]
1052 pub fn par_iter_kmer_ambiguity(
1053 self,
1054 k: usize,
1055 context: usize,
1056 skip: usize,
1057 ) -> PaddedIt<impl ChunkIt<S> + use<'s>> {
1058 #[cfg(target_endian = "big")]
1059 panic!("Big endian architectures are not supported.");
1060
1061 assert!(k <= 64, "par_iter_kmers requires k<=64, but k={k}");
1062
1063 let this = self.normalize();
1064 let o = this.offset;
1065 assert!(o < Self::C8);
1066
1067 let num_kmers = if this.len == 0 {
1068 0
1069 } else {
1070 (this.len + o).saturating_sub(context - 1)
1071 };
1072 let num_kmers_stride = this.len.saturating_sub(context - 1);
1074 let n = num_kmers_stride.div_ceil(L).next_multiple_of(Self::C8);
1075 let bytes_per_chunk = n / Self::C8;
1076 let padding = Self::C8 * L * bytes_per_chunk - num_kmers_stride;
1077
1078 let offsets: [usize; 8] = from_fn(|l| l * bytes_per_chunk);
1079
1080 let mut cur = [S::ZERO; 3];
1093 let mut mask = [S::ZERO; 3];
1094 if k <= 32 {
1095 mask[2] = (S::MAX) << S::splat(32 - k as u32);
1097 } else {
1098 mask[2] = S::MAX;
1099 mask[1] = (S::MAX) << S::splat(64 - k as u32);
1100 }
1101
1102 #[inline(always)]
1103 fn rotate_mask(mask: &mut [S; 3], r: u32) {
1104 let carry01 = mask[0] >> S::splat(32 - r);
1105 let carry12 = mask[1] >> S::splat(32 - r);
1106 mask[0] = mask[0] << r;
1107 mask[1] = (mask[1] << r) | carry01;
1108 mask[2] = (mask[2] << r) | carry12;
1109 }
1110
1111 let mut buf = Box::new([S::ZERO; 8]);
1114
1115 let par_len = if num_kmers == 0 { 0 } else { n + k + o - 1 };
1117
1118 let mut read = {
1119 #[inline(always)]
1120 move |i: usize, cur: &mut [S; 3]| {
1121 if i % Self::C256 == 0 {
1122 let data: [u32x8; 8] = from_fn(
1124 #[inline(always)]
1125 |lane| read_slice(this.seq, offsets[lane] + (i / Self::C8)),
1126 );
1127 *buf = transpose(data);
1128 }
1129 cur[0] = cur[1];
1130 cur[1] = cur[2];
1131 cur[2] = buf[(i % Self::C256) / Self::C32];
1132 }
1133 };
1134
1135 let mut to_skip = o + skip;
1137 let mut i = 0;
1138 while to_skip > 0 {
1139 read(i, &mut cur);
1140 i += 32;
1141 if to_skip >= 32 {
1142 to_skip -= 32;
1143 } else {
1144 mask[0] = mask[1];
1145 mask[1] = mask[2];
1146 mask[2] = S::splat(0);
1147 rotate_mask(&mut mask, to_skip as u32);
1149 break;
1150 }
1151 }
1152
1153 let it = (o + skip..par_len).map(
1154 #[inline(always)]
1155 move |i| {
1156 if i % Self::C32 == 0 {
1157 read(i, &mut cur);
1158 mask[0] = mask[1];
1159 mask[1] = mask[2];
1160 mask[2] = S::splat(0);
1161 }
1162
1163 rotate_mask(&mut mask, 1);
1164 !((cur[0] & mask[0]) | (cur[1] & mask[1]) | (cur[2] & mask[2])).cmp_eq(S::splat(0))
1165 },
1166 );
1167
1168 PaddedIt { it, padding }
1169 }
1170}