packed_seq/
ascii.rs

1use crate::{intrinsics::transpose, packed_seq::read_slice};
2
3use super::*;
4
5/// Maps ASCII to `[0, 4)` on the fly.
6/// Prefer first packing into a `PackedSeqVec` for storage.
7impl Seq<'_> for &[u8] {
8    const BASES_PER_BYTE: usize = 1;
9    const BITS_PER_CHAR: usize = 8;
10    type SeqVec = Vec<u8>;
11
12    #[inline(always)]
13    fn len(&self) -> usize {
14        <[u8]>::len(self)
15    }
16
17    #[inline(always)]
18    fn is_empty(&self) -> bool {
19        <[u8]>::is_empty(self)
20    }
21
22    #[inline(always)]
23    fn get(&self, index: usize) -> u8 {
24        self[index]
25    }
26
27    #[inline(always)]
28    fn get_ascii(&self, index: usize) -> u8 {
29        self[index]
30    }
31
32    #[inline(always)]
33    fn as_u64(&self) -> u64 {
34        assert!(self.len() <= u64::BITS as usize / 8);
35        let mask = u64::MAX >> (64 - 8 * self.len());
36        unsafe { (self.as_ptr() as *const u64).read_unaligned() & mask }
37    }
38
39    #[inline(always)]
40    fn revcomp_as_u64(&self) -> u64 {
41        unimplemented!("Reverse complement is only defined for DNA sequences, use `AsciiSeq` or `PackedSeq` instead.")
42    }
43
44    /// Convert to an owned version.
45    #[inline(always)]
46    fn to_vec(&self) -> Vec<u8> {
47        <[u8]>::to_vec(self)
48    }
49
50    #[inline(always)]
51    fn to_revcomp(&self) -> Vec<u8> {
52        unimplemented!("Reverse complement is only defined for DNA sequences, use `AsciiSeq` or `PackedSeq` instead.")
53    }
54
55    #[inline(always)]
56    fn slice(&self, range: Range<usize>) -> Self {
57        &self[range]
58    }
59
60    /// Iter the ASCII characters.
61    #[inline(always)]
62    fn iter_bp(self) -> impl ExactSizeIterator<Item = u8> + Clone {
63        self.iter().copied()
64    }
65
66    /// Iter the ASCII characters in parallel.
67    #[inline(always)]
68    fn par_iter_bp(self, context: usize) -> (impl ExactSizeIterator<Item = S> + Clone, usize) {
69        let num_kmers = self.len().saturating_sub(context - 1);
70        let n = num_kmers.div_ceil(L);
71        let padding = L * n - num_kmers;
72
73        let offsets: [usize; 8] = from_fn(|l| (l * n));
74        let mut cur = S::ZERO;
75
76        // Boxed, so it doesn't consume precious registers.
77        // Without this, cur is not always inlined into a register.
78        let mut buf = Box::new([S::ZERO; 8]);
79
80        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
81        let it = (0..par_len).map(
82            #[inline(always)]
83            move |i| {
84                if i % 4 == 0 {
85                    if i % 32 == 0 {
86                        // Read a u256 for each lane containing the next 32 characters.
87                        let data: [u32x8; 8] = from_fn(
88                            #[inline(always)]
89                            |lane| read_slice(self, offsets[lane] + i),
90                        );
91                        *buf = transpose(data);
92                    }
93                    cur = buf[(i % 32) / 4];
94                }
95                // Extract the last 2 bits of each character.
96                let chars = cur & S::splat(0xff);
97                // Shift remaining characters to the right.
98                cur = cur >> S::splat(8);
99                chars
100            },
101        );
102
103        (it, padding)
104    }
105
106    #[inline(always)]
107    fn par_iter_bp_delayed(
108        self,
109        context: usize,
110        delay: usize,
111    ) -> (impl ExactSizeIterator<Item = (S, S)> + Clone, usize) {
112        assert!(
113            delay < usize::MAX / 2,
114            "Delay={} should be >=0.",
115            delay as isize
116        );
117
118        let num_kmers = self.len().saturating_sub(context - 1);
119        let n = num_kmers.div_ceil(L);
120        let padding = L * n - num_kmers;
121
122        let offsets: [usize; 8] = from_fn(|l| (l * n));
123        let mut upcoming = S::ZERO;
124        let mut upcoming_d = S::ZERO;
125
126        // Even buf_len is nice to only have the write==buf_len check once.
127        // We also make it the next power of 2, for faster modulo operations.
128        // delay/4: number of bp in a u32.
129        let buf_len = (delay / 4 + 8).next_power_of_two();
130        let buf_mask = buf_len - 1;
131        let mut buf = vec![S::ZERO; buf_len];
132        let mut write_idx = 0;
133        // We compensate for the first delay/16 triggers of the check below that
134        // happen before the delay is actually reached.
135        let mut read_idx = (buf_len - delay / 4) % buf_len;
136
137        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
138        let it = (0..par_len).map(
139            #[inline(always)]
140            move |i| {
141                if i % 4 == 0 {
142                    if i % 32 == 0 {
143                        // Read a u256 for each lane containing the next 32 characters.
144                        let data: [u32x8; 8] = from_fn(
145                            #[inline(always)]
146                            |lane| read_slice(self, offsets[lane] + i),
147                        );
148                        unsafe {
149                            let mut_array: &mut [u32x8; 8] = buf
150                                .get_unchecked_mut(write_idx..write_idx + 8)
151                                .try_into()
152                                .unwrap_unchecked();
153                            *mut_array = transpose(data);
154                        }
155                    }
156                    upcoming = buf[write_idx];
157                    write_idx += 1;
158                    write_idx &= buf_mask;
159                }
160                if i % 4 == delay % 4 {
161                    unsafe { assert_unchecked(read_idx < buf.len()) };
162                    upcoming_d = buf[read_idx];
163                    read_idx += 1;
164                    read_idx &= buf_mask;
165                }
166                // Extract the last 2 bits of each character.
167                let chars = upcoming & S::splat(0xff);
168                let chars_d = upcoming_d & S::splat(0xff);
169                // Shift remaining characters to the right.
170                upcoming = upcoming >> S::splat(8);
171                upcoming_d = upcoming_d >> S::splat(8);
172                (chars, chars_d)
173            },
174        );
175
176        (it, padding)
177    }
178
179    #[inline(always)]
180    fn par_iter_bp_delayed_2(
181        self,
182        context: usize,
183        delay1: usize,
184        delay2: usize,
185    ) -> (impl ExactSizeIterator<Item = (S, S, S)> + Clone, usize) {
186        assert!(delay1 <= delay2, "Delay1 must be at most delay2.");
187
188        let num_kmers = self.len().saturating_sub(context - 1);
189        let n = num_kmers.div_ceil(L);
190        let padding = L * n - num_kmers;
191
192        let offsets: [usize; 8] = from_fn(|l| (l * n));
193
194        let mut upcoming = S::ZERO;
195        let mut upcoming_d1 = S::ZERO;
196        let mut upcoming_d2 = S::ZERO;
197
198        // Even buf_len is nice to only have the write==buf_len check once.
199        // We also make it the next power of 2, for faster modulo operations.
200        // delay/4: number of bp in a u32.
201        let buf_len = (delay2 / 4 + 8).next_power_of_two();
202        let buf_mask = buf_len - 1;
203        let mut buf = vec![S::ZERO; buf_len];
204        let mut write_idx = 0;
205        // We compensate for the first delay/16 triggers of the check below that
206        // happen before the delay is actually reached.
207        let mut read_idx1 = (buf_len - delay1 / 4) % buf_len;
208        let mut read_idx2 = (buf_len - delay2 / 4) % buf_len;
209
210        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
211        let it = (0..par_len).map(
212            #[inline(always)]
213            move |i| {
214                if i % 4 == 0 {
215                    if i % 32 == 0 {
216                        // Read a u256 for each lane containing the next 32 characters.
217                        let data: [u32x8; 8] = from_fn(
218                            #[inline(always)]
219                            |lane| read_slice(self, offsets[lane] + i),
220                        );
221                        unsafe {
222                            let mut_array: &mut [u32x8; 8] = buf
223                                .get_unchecked_mut(write_idx..write_idx + 8)
224                                .try_into()
225                                .unwrap_unchecked();
226                            *mut_array = transpose(data);
227                        }
228                    }
229                    upcoming = buf[write_idx];
230                    write_idx += 1;
231                    write_idx &= buf_mask;
232                }
233                if i % 4 == delay1 % 4 {
234                    unsafe { assert_unchecked(read_idx1 < buf.len()) };
235                    upcoming_d1 = buf[read_idx1];
236                    read_idx1 += 1;
237                    read_idx1 &= buf_mask;
238                }
239                if i % 4 == delay2 % 4 {
240                    unsafe { assert_unchecked(read_idx2 < buf.len()) };
241                    upcoming_d2 = buf[read_idx2];
242                    read_idx2 += 1;
243                    read_idx2 &= buf_mask;
244                }
245                // Extract the last 2 bits of each character.
246                let chars = upcoming & S::splat(0xff);
247                let chars_d1 = upcoming_d1 & S::splat(0xff);
248                let chars_d2 = upcoming_d2 & S::splat(0xff);
249                // Shift remaining characters to the right.
250                upcoming = upcoming >> S::splat(8);
251                upcoming_d1 = upcoming_d1 >> S::splat(8);
252                upcoming_d2 = upcoming_d2 >> S::splat(8);
253                (chars, chars_d1, chars_d2)
254            },
255        );
256
257        (it, padding)
258    }
259
260    // TODO: This is not very optimized.
261    fn cmp_lcp(&self, other: &Self) -> (std::cmp::Ordering, usize) {
262        for i in 0..self.len().min(other.len()) {
263            if self[i] != other[i] {
264                return (self[i].cmp(&other[i]), i);
265            }
266        }
267        (self.len().cmp(&other.len()), self.len().min(other.len()))
268    }
269}
270
271impl SeqVec for Vec<u8> {
272    type Seq<'s> = &'s [u8];
273
274    /// Get the underlying ASCII text.
275    #[inline(always)]
276    fn into_raw(self) -> Vec<u8> {
277        self
278    }
279
280    #[inline(always)]
281    fn as_slice(&self) -> Self::Seq<'_> {
282        self.as_slice()
283    }
284
285    #[inline(always)]
286    fn len(&self) -> usize {
287        self.len()
288    }
289
290    #[inline(always)]
291    fn is_empty(&self) -> bool {
292        self.is_empty()
293    }
294
295    #[inline(always)]
296    fn clear(&mut self) {
297        self.clear()
298    }
299
300    #[inline(always)]
301    fn push_seq(&mut self, seq: &[u8]) -> Range<usize> {
302        let start = self.len();
303        let end = start + seq.len();
304        let range = start..end;
305        self.extend(seq);
306        range
307    }
308
309    #[inline(always)]
310    fn push_ascii(&mut self, seq: &[u8]) -> Range<usize> {
311        self.push_seq(seq)
312    }
313
314    #[cfg(feature = "rand")]
315    fn random(n: usize) -> Self {
316        use rand::{RngCore, SeedableRng};
317
318        let mut seq = vec![0; n];
319        rand::rngs::SmallRng::from_os_rng().fill_bytes(&mut seq);
320        seq
321    }
322}