packed_seq/
ascii.rs

1use crate::{intrinsics::transpose, packed_seq::read_slice};
2
3use super::*;
4
5/// Maps ASCII to `[0, 4)` on the fly.
6/// Prefer first packing into a `PackedSeqVec` for storage.
7impl Seq<'_> for &[u8] {
8    const BASES_PER_BYTE: usize = 1;
9    const BITS_PER_CHAR: usize = 8;
10    type SeqVec = Vec<u8>;
11
12    #[inline(always)]
13    fn len(&self) -> usize {
14        <[u8]>::len(self)
15    }
16
17    #[inline(always)]
18    fn is_empty(&self) -> bool {
19        <[u8]>::is_empty(self)
20    }
21
22    #[inline(always)]
23    fn get(&self, index: usize) -> u8 {
24        self[index]
25    }
26
27    #[inline(always)]
28    fn get_ascii(&self, index: usize) -> u8 {
29        self[index]
30    }
31
32    #[inline(always)]
33    fn as_u64(&self) -> u64 {
34        assert!(self.len() <= u64::BITS as usize / 8);
35        let mask = u64::MAX >> (64 - 8 * self.len());
36        unsafe { (self.as_ptr() as *const u64).read_unaligned() & mask }
37    }
38
39    #[inline(always)]
40    fn revcomp_as_u64(&self) -> u64 {
41        unimplemented!("Reverse complement is only defined for DNA sequences, use `AsciiSeq` or `PackedSeq` instead.")
42    }
43
44    #[inline(always)]
45    fn as_u128(&self) -> u128 {
46        assert!(self.len() <= u128::BITS as usize / 8);
47        let mask = u128::MAX >> (128 - 8 * self.len());
48        unsafe { (self.as_ptr() as *const u128).read_unaligned() & mask }
49    }
50
51    #[inline(always)]
52    fn revcomp_as_u128(&self) -> u128 {
53        unimplemented!("Reverse complement is only defined for DNA sequences, use `AsciiSeq` or `PackedSeq` instead.")
54    }
55
56    /// Convert to an owned version.
57    #[inline(always)]
58    fn to_vec(&self) -> Vec<u8> {
59        <[u8]>::to_vec(self)
60    }
61
62    #[inline(always)]
63    fn to_revcomp(&self) -> Vec<u8> {
64        unimplemented!("Reverse complement is only defined for DNA sequences, use `AsciiSeq` or `PackedSeq` instead.")
65    }
66
67    #[inline(always)]
68    fn slice(&self, range: Range<usize>) -> Self {
69        &self[range]
70    }
71
72    /// Iter the ASCII characters.
73    #[inline(always)]
74    fn iter_bp(self) -> impl ExactSizeIterator<Item = u8> + Clone {
75        self.iter().copied()
76    }
77
78    /// Iter the ASCII characters in parallel.
79    #[inline(always)]
80    fn par_iter_bp(self, context: usize) -> (impl ExactSizeIterator<Item = S> + Clone, usize) {
81        let num_kmers = self.len().saturating_sub(context - 1);
82        let n = num_kmers.div_ceil(L);
83        let padding = L * n - num_kmers;
84
85        let offsets: [usize; 8] = from_fn(|l| l * n);
86        let mut cur = S::ZERO;
87
88        // Boxed, so it doesn't consume precious registers.
89        // Without this, cur is not always inlined into a register.
90        let mut buf = Box::new([S::ZERO; 8]);
91
92        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
93        let it = (0..par_len).map(
94            #[inline(always)]
95            move |i| {
96                if i % 4 == 0 {
97                    if i % 32 == 0 {
98                        // Read a u256 for each lane containing the next 32 characters.
99                        let data: [u32x8; 8] = from_fn(
100                            #[inline(always)]
101                            |lane| read_slice(self, offsets[lane] + i),
102                        );
103                        *buf = transpose(data);
104                    }
105                    cur = buf[(i % 32) / 4];
106                }
107                // Extract the last 2 bits of each character.
108                let chars = cur & S::splat(0xff);
109                // Shift remaining characters to the right.
110                cur = cur >> S::splat(8);
111                chars
112            },
113        );
114
115        (it, padding)
116    }
117
118    #[inline(always)]
119    fn par_iter_bp_delayed(
120        self,
121        context: usize,
122        delay: usize,
123    ) -> (impl ExactSizeIterator<Item = (S, S)> + Clone, usize) {
124        assert!(
125            delay < usize::MAX / 2,
126            "Delay={} should be >=0.",
127            delay as isize
128        );
129
130        let num_kmers = self.len().saturating_sub(context - 1);
131        let n = num_kmers.div_ceil(L);
132        let padding = L * n - num_kmers;
133
134        let offsets: [usize; 8] = from_fn(|l| l * n);
135        let mut upcoming = S::ZERO;
136        let mut upcoming_d = S::ZERO;
137
138        // Even buf_len is nice to only have the write==buf_len check once.
139        // We also make it the next power of 2, for faster modulo operations.
140        // delay/4: number of bp in a u32.
141        let buf_len = (delay / 4 + 8).next_power_of_two();
142        let buf_mask = buf_len - 1;
143        let mut buf = vec![S::ZERO; buf_len];
144        let mut write_idx = 0;
145        // We compensate for the first delay/16 triggers of the check below that
146        // happen before the delay is actually reached.
147        let mut read_idx = (buf_len - delay / 4) % buf_len;
148
149        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
150        let it = (0..par_len).map(
151            #[inline(always)]
152            move |i| {
153                if i % 4 == 0 {
154                    if i % 32 == 0 {
155                        // Read a u256 for each lane containing the next 32 characters.
156                        let data: [u32x8; 8] = from_fn(
157                            #[inline(always)]
158                            |lane| read_slice(self, offsets[lane] + i),
159                        );
160                        unsafe {
161                            let mut_array: &mut [u32x8; 8] = buf
162                                .get_unchecked_mut(write_idx..write_idx + 8)
163                                .try_into()
164                                .unwrap_unchecked();
165                            *mut_array = transpose(data);
166                        }
167                    }
168                    upcoming = buf[write_idx];
169                    write_idx += 1;
170                    write_idx &= buf_mask;
171                }
172                if i % 4 == delay % 4 {
173                    unsafe { assert_unchecked(read_idx < buf.len()) };
174                    upcoming_d = buf[read_idx];
175                    read_idx += 1;
176                    read_idx &= buf_mask;
177                }
178                // Extract the last 2 bits of each character.
179                let chars = upcoming & S::splat(0xff);
180                let chars_d = upcoming_d & S::splat(0xff);
181                // Shift remaining characters to the right.
182                upcoming = upcoming >> S::splat(8);
183                upcoming_d = upcoming_d >> S::splat(8);
184                (chars, chars_d)
185            },
186        );
187
188        (it, padding)
189    }
190
191    #[inline(always)]
192    fn par_iter_bp_delayed_2(
193        self,
194        context: usize,
195        delay1: usize,
196        delay2: usize,
197    ) -> (impl ExactSizeIterator<Item = (S, S, S)> + Clone, usize) {
198        assert!(delay1 <= delay2, "Delay1 must be at most delay2.");
199
200        let num_kmers = self.len().saturating_sub(context - 1);
201        let n = num_kmers.div_ceil(L);
202        let padding = L * n - num_kmers;
203
204        let offsets: [usize; 8] = from_fn(|l| l * n);
205
206        let mut upcoming = S::ZERO;
207        let mut upcoming_d1 = S::ZERO;
208        let mut upcoming_d2 = S::ZERO;
209
210        // Even buf_len is nice to only have the write==buf_len check once.
211        // We also make it the next power of 2, for faster modulo operations.
212        // delay/4: number of bp in a u32.
213        let buf_len = (delay2 / 4 + 8).next_power_of_two();
214        let buf_mask = buf_len - 1;
215        let mut buf = vec![S::ZERO; buf_len];
216        let mut write_idx = 0;
217        // We compensate for the first delay/16 triggers of the check below that
218        // happen before the delay is actually reached.
219        let mut read_idx1 = (buf_len - delay1 / 4) % buf_len;
220        let mut read_idx2 = (buf_len - delay2 / 4) % buf_len;
221
222        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
223        let it = (0..par_len).map(
224            #[inline(always)]
225            move |i| {
226                if i % 4 == 0 {
227                    if i % 32 == 0 {
228                        // Read a u256 for each lane containing the next 32 characters.
229                        let data: [u32x8; 8] = from_fn(
230                            #[inline(always)]
231                            |lane| read_slice(self, offsets[lane] + i),
232                        );
233                        unsafe {
234                            let mut_array: &mut [u32x8; 8] = buf
235                                .get_unchecked_mut(write_idx..write_idx + 8)
236                                .try_into()
237                                .unwrap_unchecked();
238                            *mut_array = transpose(data);
239                        }
240                    }
241                    upcoming = buf[write_idx];
242                    write_idx += 1;
243                    write_idx &= buf_mask;
244                }
245                if i % 4 == delay1 % 4 {
246                    unsafe { assert_unchecked(read_idx1 < buf.len()) };
247                    upcoming_d1 = buf[read_idx1];
248                    read_idx1 += 1;
249                    read_idx1 &= buf_mask;
250                }
251                if i % 4 == delay2 % 4 {
252                    unsafe { assert_unchecked(read_idx2 < buf.len()) };
253                    upcoming_d2 = buf[read_idx2];
254                    read_idx2 += 1;
255                    read_idx2 &= buf_mask;
256                }
257                // Extract the last 2 bits of each character.
258                let chars = upcoming & S::splat(0xff);
259                let chars_d1 = upcoming_d1 & S::splat(0xff);
260                let chars_d2 = upcoming_d2 & S::splat(0xff);
261                // Shift remaining characters to the right.
262                upcoming = upcoming >> S::splat(8);
263                upcoming_d1 = upcoming_d1 >> S::splat(8);
264                upcoming_d2 = upcoming_d2 >> S::splat(8);
265                (chars, chars_d1, chars_d2)
266            },
267        );
268
269        (it, padding)
270    }
271
272    // TODO: This is not very optimized.
273    fn cmp_lcp(&self, other: &Self) -> (std::cmp::Ordering, usize) {
274        for i in 0..self.len().min(other.len()) {
275            if self[i] != other[i] {
276                return (self[i].cmp(&other[i]), i);
277            }
278        }
279        (self.len().cmp(&other.len()), self.len().min(other.len()))
280    }
281}
282
283impl SeqVec for Vec<u8> {
284    type Seq<'s> = &'s [u8];
285
286    /// Get the underlying ASCII text.
287    #[inline(always)]
288    fn into_raw(self) -> Vec<u8> {
289        self
290    }
291
292    #[inline(always)]
293    fn as_slice(&self) -> Self::Seq<'_> {
294        self.as_slice()
295    }
296
297    #[inline(always)]
298    fn len(&self) -> usize {
299        self.len()
300    }
301
302    #[inline(always)]
303    fn is_empty(&self) -> bool {
304        self.is_empty()
305    }
306
307    #[inline(always)]
308    fn clear(&mut self) {
309        self.clear()
310    }
311
312    #[inline(always)]
313    fn push_seq(&mut self, seq: &[u8]) -> Range<usize> {
314        let start = self.len();
315        let end = start + seq.len();
316        let range = start..end;
317        self.extend(seq);
318        range
319    }
320
321    #[inline(always)]
322    fn push_ascii(&mut self, seq: &[u8]) -> Range<usize> {
323        self.push_seq(seq)
324    }
325
326    #[cfg(feature = "rand")]
327    fn random(n: usize) -> Self {
328        use rand::{RngCore, SeedableRng};
329
330        let mut seq = vec![0; n];
331        rand::rngs::SmallRng::from_os_rng().fill_bytes(&mut seq);
332        seq
333    }
334}