packed_seq/
ascii.rs

1use crate::{intrinsics::transpose, packed_seq::read_slice_32, padded_it::ChunkIt};
2
3use super::*;
4
5/// Maps ASCII to `[0, 4)` on the fly.
6/// Prefer first packing into a `PackedSeqVec` for storage.
7impl Seq<'_> for &[u8] {
8    const BASES_PER_BYTE: usize = 1;
9    const BITS_PER_CHAR: usize = 8;
10    type SeqVec = Vec<u8>;
11
12    #[inline(always)]
13    fn len(&self) -> usize {
14        <[u8]>::len(self)
15    }
16
17    #[inline(always)]
18    fn is_empty(&self) -> bool {
19        <[u8]>::is_empty(self)
20    }
21
22    #[inline(always)]
23    fn get(&self, index: usize) -> u8 {
24        self[index]
25    }
26
27    #[inline(always)]
28    fn get_ascii(&self, index: usize) -> u8 {
29        self[index]
30    }
31
32    #[inline(always)]
33    fn as_u64(&self) -> u64 {
34        assert!(self.len() <= u64::BITS as usize / 8);
35        let mask = u64::MAX >> (64 - 8 * self.len());
36        unsafe { (self.as_ptr() as *const u64).read_unaligned() & mask }
37    }
38
39    #[inline(always)]
40    fn revcomp_as_u64(&self) -> u64 {
41        unimplemented!(
42            "Reverse complement is only defined for DNA sequences, use `AsciiSeq` or `PackedSeq` instead."
43        )
44    }
45
46    #[inline(always)]
47    fn as_u128(&self) -> u128 {
48        assert!(self.len() <= u128::BITS as usize / 8);
49        let mask = u128::MAX >> (128 - 8 * self.len());
50        unsafe { (self.as_ptr() as *const u128).read_unaligned() & mask }
51    }
52
53    #[inline(always)]
54    fn revcomp_as_u128(&self) -> u128 {
55        unimplemented!(
56            "Reverse complement is only defined for DNA sequences, use `AsciiSeq` or `PackedSeq` instead."
57        )
58    }
59
60    /// Convert to an owned version.
61    #[inline(always)]
62    fn to_vec(&self) -> Vec<u8> {
63        <[u8]>::to_vec(self)
64    }
65
66    #[inline(always)]
67    fn to_revcomp(&self) -> Vec<u8> {
68        unimplemented!(
69            "Reverse complement is only defined for DNA sequences, use `AsciiSeq` or `PackedSeq` instead."
70        )
71    }
72
73    #[inline(always)]
74    fn slice(&self, range: Range<usize>) -> Self {
75        &self[range]
76    }
77
78    /// Iter the ASCII characters.
79    #[inline(always)]
80    fn iter_bp(self) -> impl ExactSizeIterator<Item = u8> {
81        self.iter().copied()
82    }
83
84    /// Iter the ASCII characters in parallel.
85    #[inline(always)]
86    fn par_iter_bp(self, context: usize) -> PaddedIt<impl ChunkIt<u32x8>> {
87        let num_kmers = self.len().saturating_sub(context - 1);
88        let n = num_kmers.div_ceil(L);
89        let padding = L * n - num_kmers;
90
91        let offsets: [usize; 8] = from_fn(|l| l * n);
92        let mut cur = S::ZERO;
93
94        // Boxed, so it doesn't consume precious registers.
95        // Without this, cur is not always inlined into a register.
96        let mut buf = Box::new([S::ZERO; 8]);
97
98        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
99        let it = (0..par_len).map(
100            #[inline(always)]
101            move |i| {
102                if i % 4 == 0 {
103                    if i % 32 == 0 {
104                        // Read a u256 for each lane containing the next 32 characters.
105                        let data: [u32x8; 8] = from_fn(
106                            #[inline(always)]
107                            |lane| read_slice_32(self, offsets[lane] + i),
108                        );
109                        *buf = transpose(data);
110                    }
111                    cur = buf[(i % 32) / 4];
112                }
113                // Extract the last 2 bits of each character.
114                let chars = cur & S::splat(0xff);
115                // Shift remaining characters to the right.
116                cur = cur >> S::splat(8);
117                chars
118            },
119        );
120
121        PaddedIt { it, padding }
122    }
123
124    #[inline(always)]
125    fn par_iter_bp_delayed(
126        self,
127        context: usize,
128        Delay(delay): Delay,
129    ) -> PaddedIt<impl ChunkIt<(u32x8, u32x8)>> {
130        assert!(
131            delay < usize::MAX / 2,
132            "Delay={} should be >=0.",
133            delay as isize
134        );
135
136        let num_kmers = self.len().saturating_sub(context - 1);
137        let n = num_kmers.div_ceil(L);
138        let padding = L * n - num_kmers;
139
140        let offsets: [usize; 8] = from_fn(|l| l * n);
141        let mut upcoming = S::ZERO;
142        let mut upcoming_d = S::ZERO;
143
144        // Even buf_len is nice to only have the write==buf_len check once.
145        // We also make it the next power of 2, for faster modulo operations.
146        // delay/4: number of bp in a u32.
147        let buf_len = (delay / 4 + 8).next_power_of_two();
148        let buf_mask = buf_len - 1;
149        let mut buf = vec![S::ZERO; buf_len];
150        let mut write_idx = 0;
151        // We compensate for the first delay/16 triggers of the check below that
152        // happen before the delay is actually reached.
153        let mut read_idx = (buf_len - delay / 4) % buf_len;
154
155        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
156        let it = (0..par_len).map(
157            #[inline(always)]
158            move |i| {
159                if i % 4 == 0 {
160                    if i % 32 == 0 {
161                        // Read a u256 for each lane containing the next 32 characters.
162                        let data: [u32x8; 8] = from_fn(
163                            #[inline(always)]
164                            |lane| read_slice_32(self, offsets[lane] + i),
165                        );
166                        unsafe {
167                            let mut_array: &mut [u32x8; 8] = buf
168                                .get_unchecked_mut(write_idx..write_idx + 8)
169                                .try_into()
170                                .unwrap_unchecked();
171                            *mut_array = transpose(data);
172                        }
173                    }
174                    upcoming = buf[write_idx];
175                    write_idx += 1;
176                    write_idx &= buf_mask;
177                }
178                if i % 4 == delay % 4 {
179                    unsafe { assert_unchecked(read_idx < buf.len()) };
180                    upcoming_d = buf[read_idx];
181                    read_idx += 1;
182                    read_idx &= buf_mask;
183                }
184                // Extract the last 2 bits of each character.
185                let chars = upcoming & S::splat(0xff);
186                let chars_d = upcoming_d & S::splat(0xff);
187                // Shift remaining characters to the right.
188                upcoming = upcoming >> S::splat(8);
189                upcoming_d = upcoming_d >> S::splat(8);
190                (chars, chars_d)
191            },
192        );
193
194        PaddedIt { it, padding }
195    }
196
197    #[inline(always)]
198    fn par_iter_bp_delayed_2(
199        self,
200        context: usize,
201        Delay(delay1): Delay,
202        Delay(delay2): Delay,
203    ) -> PaddedIt<impl ChunkIt<(u32x8, u32x8, u32x8)>> {
204        assert!(delay1 <= delay2, "Delay1 must be at most delay2.");
205
206        let num_kmers = self.len().saturating_sub(context - 1);
207        let n = num_kmers.div_ceil(L);
208        let padding = L * n - num_kmers;
209
210        let offsets: [usize; 8] = from_fn(|l| l * n);
211
212        let mut upcoming = S::ZERO;
213        let mut upcoming_d1 = S::ZERO;
214        let mut upcoming_d2 = S::ZERO;
215
216        // Even buf_len is nice to only have the write==buf_len check once.
217        // We also make it the next power of 2, for faster modulo operations.
218        // delay/4: number of bp in a u32.
219        let buf_len = (delay2 / 4 + 8).next_power_of_two();
220        let buf_mask = buf_len - 1;
221        let mut buf = vec![S::ZERO; buf_len];
222        let mut write_idx = 0;
223        // We compensate for the first delay/16 triggers of the check below that
224        // happen before the delay is actually reached.
225        let mut read_idx1 = (buf_len - delay1 / 4) % buf_len;
226        let mut read_idx2 = (buf_len - delay2 / 4) % buf_len;
227
228        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
229        let it = (0..par_len).map(
230            #[inline(always)]
231            move |i| {
232                if i % 4 == 0 {
233                    if i % 32 == 0 {
234                        // Read a u256 for each lane containing the next 32 characters.
235                        let data: [u32x8; 8] = from_fn(
236                            #[inline(always)]
237                            |lane| read_slice_32(self, offsets[lane] + i),
238                        );
239                        unsafe {
240                            let mut_array: &mut [u32x8; 8] = buf
241                                .get_unchecked_mut(write_idx..write_idx + 8)
242                                .try_into()
243                                .unwrap_unchecked();
244                            *mut_array = transpose(data);
245                        }
246                    }
247                    upcoming = buf[write_idx];
248                    write_idx += 1;
249                    write_idx &= buf_mask;
250                }
251                if i % 4 == delay1 % 4 {
252                    unsafe { assert_unchecked(read_idx1 < buf.len()) };
253                    upcoming_d1 = buf[read_idx1];
254                    read_idx1 += 1;
255                    read_idx1 &= buf_mask;
256                }
257                if i % 4 == delay2 % 4 {
258                    unsafe { assert_unchecked(read_idx2 < buf.len()) };
259                    upcoming_d2 = buf[read_idx2];
260                    read_idx2 += 1;
261                    read_idx2 &= buf_mask;
262                }
263                // Extract the last 2 bits of each character.
264                let chars = upcoming & S::splat(0xff);
265                let chars_d1 = upcoming_d1 & S::splat(0xff);
266                let chars_d2 = upcoming_d2 & S::splat(0xff);
267                // Shift remaining characters to the right.
268                upcoming = upcoming >> S::splat(8);
269                upcoming_d1 = upcoming_d1 >> S::splat(8);
270                upcoming_d2 = upcoming_d2 >> S::splat(8);
271                (chars, chars_d1, chars_d2)
272            },
273        );
274
275        PaddedIt { it, padding }
276    }
277
278    // TODO: This is not very optimized.
279    fn cmp_lcp(&self, other: &Self) -> (std::cmp::Ordering, usize) {
280        for i in 0..self.len().min(other.len()) {
281            if self[i] != other[i] {
282                return (self[i].cmp(&other[i]), i);
283            }
284        }
285        (self.len().cmp(&other.len()), self.len().min(other.len()))
286    }
287}
288
289impl SeqVec for Vec<u8> {
290    type Seq<'s> = &'s [u8];
291
292    /// Get the underlying ASCII text.
293    #[inline(always)]
294    fn into_raw(self) -> Vec<u8> {
295        self
296    }
297
298    #[inline(always)]
299    fn as_slice(&self) -> Self::Seq<'_> {
300        self.as_slice()
301    }
302
303    #[inline(always)]
304    fn len(&self) -> usize {
305        self.len()
306    }
307
308    #[inline(always)]
309    fn is_empty(&self) -> bool {
310        self.is_empty()
311    }
312
313    #[inline(always)]
314    fn clear(&mut self) {
315        self.clear()
316    }
317
318    #[inline(always)]
319    fn push_seq(&mut self, seq: &[u8]) -> Range<usize> {
320        let start = self.len();
321        let end = start + seq.len();
322        let range = start..end;
323        self.extend(seq);
324        range
325    }
326
327    #[inline(always)]
328    fn push_ascii(&mut self, seq: &[u8]) -> Range<usize> {
329        self.push_seq(seq)
330    }
331
332    #[cfg(feature = "rand")]
333    fn random(n: usize) -> Self {
334        use rand::{RngCore, SeedableRng};
335
336        let mut seq = vec![0; n];
337        rand::rngs::SmallRng::from_os_rng().fill_bytes(&mut seq);
338        seq
339    }
340}