packed_seq/
ascii.rs

1use crate::{intrinsics::transpose, packed_seq::read_slice};
2
3use super::*;
4
5/// Maps ASCII to `[0, 4)` on the fly.
6/// Prefer first packing into a `PackedSeqVec` for storage.
7impl<'s> Seq<'s> for &[u8] {
8    const BASES_PER_BYTE: usize = 1;
9    const BITS_PER_CHAR: usize = 8;
10    type SeqVec = Vec<u8>;
11
12    #[inline(always)]
13    fn len(&self) -> usize {
14        <[u8]>::len(self)
15    }
16
17    #[inline(always)]
18    fn get(&self, index: usize) -> u8 {
19        self[index]
20    }
21
22    #[inline(always)]
23    fn get_ascii(&self, index: usize) -> u8 {
24        self[index]
25    }
26
27    #[inline(always)]
28    fn to_word(&self) -> usize {
29        assert!(self.len() <= usize::BITS as usize / 8);
30        let mask = usize::MAX >> (64 - 8 * self.len());
31        unsafe { (self.as_ptr() as *const usize).read_unaligned() & mask }
32    }
33
34    /// Convert to an owned version.
35    fn to_vec(&self) -> Vec<u8> {
36        <[u8]>::to_vec(self)
37    }
38
39    #[inline(always)]
40    fn slice(&self, range: Range<usize>) -> Self {
41        &self[range]
42    }
43
44    /// Iter the ASCII characters.
45    #[inline(always)]
46    fn iter_bp(self) -> impl ExactSizeIterator<Item = u8> + Clone {
47        self.iter().copied()
48    }
49
50    /// Iter the ASCII characters in parallel.
51    #[inline(always)]
52    fn par_iter_bp(self, context: usize) -> (impl ExactSizeIterator<Item = S> + Clone, usize) {
53        let num_kmers = self.len().saturating_sub(context - 1);
54        let n = num_kmers.div_ceil(L);
55        let padding = L * n - num_kmers;
56
57        let offsets: [usize; 8] = from_fn(|l| (l * n)).into();
58        let mut cur = S::ZERO;
59
60        // Boxed, so it doesn't consume precious registers.
61        // Without this, cur is not always inlined into a register.
62        let mut buf = Box::new([S::ZERO; 8]);
63
64        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
65        let it = (0..par_len).map(
66            #[inline(always)]
67            move |i| {
68                if i % 4 == 0 {
69                    if i % 32 == 0 {
70                        // Read a u256 for each lane containing the next 32 characters.
71                        let data: [u32x8; 8] = from_fn(
72                            #[inline(always)]
73                            |lane| read_slice(self, offsets[lane] + i),
74                        );
75                        *buf = transpose(data);
76                    }
77                    cur = buf[(i % 32) / 4];
78                }
79                // Extract the last 2 bits of each character.
80                let chars = cur & S::splat(0xff);
81                // Shift remaining characters to the right.
82                cur = cur >> S::splat(8);
83                chars
84            },
85        );
86
87        (it, padding)
88    }
89
90    #[inline(always)]
91    fn par_iter_bp_delayed(
92        self,
93        context: usize,
94        delay: usize,
95    ) -> (impl ExactSizeIterator<Item = (S, S)> + Clone, usize) {
96        assert!(
97            delay < usize::MAX / 2,
98            "Delay={} should be >=0.",
99            delay as isize
100        );
101
102        let num_kmers = self.len().saturating_sub(context - 1);
103        let n = num_kmers.div_ceil(L);
104        let padding = L * n - num_kmers;
105
106        let offsets: [usize; 8] = from_fn(|l| (l * n)).into();
107        let mut upcoming = S::ZERO;
108        let mut upcoming_d = S::ZERO;
109
110        // Even buf_len is nice to only have the write==buf_len check once.
111        // We also make it the next power of 2, for faster modulo operations.
112        // delay/4: number of bp in a u32.
113        let buf_len = (delay / 4 + 8).next_power_of_two();
114        let buf_mask = buf_len - 1;
115        let mut buf = vec![S::ZERO; buf_len];
116        let mut write_idx = 0;
117        // We compensate for the first delay/16 triggers of the check below that
118        // happen before the delay is actually reached.
119        let mut read_idx = (buf_len - delay / 4) % buf_len;
120
121        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
122        let it = (0..par_len).map(
123            #[inline(always)]
124            move |i| {
125                if i % 4 == 0 {
126                    if i % 32 == 0 {
127                        // Read a u256 for each lane containing the next 32 characters.
128                        let data: [u32x8; 8] = from_fn(
129                            #[inline(always)]
130                            |lane| read_slice(self, offsets[lane] + i),
131                        );
132                        unsafe {
133                            let mut_array: &mut [u32x8; 8] = buf
134                                .get_unchecked_mut(write_idx..write_idx + 8)
135                                .try_into()
136                                .unwrap_unchecked();
137                            *mut_array = transpose(data);
138                        }
139                    }
140                    upcoming = buf[write_idx];
141                    write_idx += 1;
142                    write_idx &= buf_mask;
143                }
144                if i % 4 == delay % 4 {
145                    unsafe { assert_unchecked(read_idx < buf.len()) };
146                    upcoming_d = buf[read_idx];
147                    read_idx += 1;
148                    read_idx &= buf_mask;
149                }
150                // Extract the last 2 bits of each character.
151                let chars = upcoming & S::splat(0xff);
152                let chars_d = upcoming_d & S::splat(0xff);
153                // Shift remaining characters to the right.
154                upcoming = upcoming >> S::splat(8);
155                upcoming_d = upcoming_d >> S::splat(8);
156                (chars, chars_d)
157            },
158        );
159
160        (it, padding)
161    }
162
163    #[inline(always)]
164    fn par_iter_bp_delayed_2(
165        self,
166        context: usize,
167        delay1: usize,
168        delay2: usize,
169    ) -> (impl ExactSizeIterator<Item = (S, S, S)> + Clone, usize) {
170        assert!(delay1 <= delay2, "Delay1 must be at most delay2.");
171
172        let num_kmers = self.len().saturating_sub(context - 1);
173        let n = num_kmers.div_ceil(L);
174        let padding = L * n - num_kmers;
175
176        let offsets: [usize; 8] = from_fn(|l| (l * n)).into();
177
178        let mut upcoming = S::ZERO;
179        let mut upcoming_d1 = S::ZERO;
180        let mut upcoming_d2 = S::ZERO;
181
182        // Even buf_len is nice to only have the write==buf_len check once.
183        // We also make it the next power of 2, for faster modulo operations.
184        // delay/4: number of bp in a u32.
185        let buf_len = (delay2 / 4 + 8).next_power_of_two();
186        let buf_mask = buf_len - 1;
187        let mut buf = vec![S::ZERO; buf_len];
188        let mut write_idx = 0;
189        // We compensate for the first delay/16 triggers of the check below that
190        // happen before the delay is actually reached.
191        let mut read_idx1 = (buf_len - delay1 / 4) % buf_len;
192        let mut read_idx2 = (buf_len - delay2 / 4) % buf_len;
193
194        let par_len = if num_kmers == 0 { 0 } else { n + context - 1 };
195        let it = (0..par_len).map(
196            #[inline(always)]
197            move |i| {
198                if i % 4 == 0 {
199                    if i % 32 == 0 {
200                        // Read a u256 for each lane containing the next 32 characters.
201                        let data: [u32x8; 8] = from_fn(
202                            #[inline(always)]
203                            |lane| read_slice(self, offsets[lane] + i),
204                        );
205                        unsafe {
206                            let mut_array: &mut [u32x8; 8] = buf
207                                .get_unchecked_mut(write_idx..write_idx + 8)
208                                .try_into()
209                                .unwrap_unchecked();
210                            *mut_array = transpose(data);
211                        }
212                    }
213                    upcoming = buf[write_idx];
214                    write_idx += 1;
215                    write_idx &= buf_mask;
216                }
217                if i % 4 == delay1 % 4 {
218                    unsafe { assert_unchecked(read_idx1 < buf.len()) };
219                    upcoming_d1 = buf[read_idx1];
220                    read_idx1 += 1;
221                    read_idx1 &= buf_mask;
222                }
223                if i % 4 == delay2 % 4 {
224                    unsafe { assert_unchecked(read_idx2 < buf.len()) };
225                    upcoming_d2 = buf[read_idx2];
226                    read_idx2 += 1;
227                    read_idx2 &= buf_mask;
228                }
229                // Extract the last 2 bits of each character.
230                let chars = upcoming & S::splat(0xff);
231                let chars_d1 = upcoming_d1 & S::splat(0xff);
232                let chars_d2 = upcoming_d2 & S::splat(0xff);
233                // Shift remaining characters to the right.
234                upcoming = upcoming >> S::splat(8);
235                upcoming_d1 = upcoming_d1 >> S::splat(8);
236                upcoming_d2 = upcoming_d2 >> S::splat(8);
237                (chars, chars_d1, chars_d2)
238            },
239        );
240
241        (it, padding)
242    }
243
244    // TODO: This is not very optimized.
245    fn cmp_lcp(&self, other: &Self) -> (std::cmp::Ordering, usize) {
246        for i in 0..self.len().min(other.len()) {
247            if self[i] != other[i] {
248                return (self[i].cmp(&other[i]), i);
249            }
250        }
251        (self.len().cmp(&other.len()), self.len().min(other.len()))
252    }
253}
254
255impl SeqVec for Vec<u8> {
256    type Seq<'s> = &'s [u8];
257
258    /// Get the underlying ASCII text.
259    fn into_raw(self) -> Vec<u8> {
260        self
261    }
262
263    #[inline(always)]
264    fn as_slice(&self) -> Self::Seq<'_> {
265        self.as_slice()
266    }
267
268    fn push_seq(&mut self, seq: &[u8]) -> Range<usize> {
269        let start = self.len();
270        let end = start + seq.len();
271        let range = start..end;
272        self.extend(seq);
273        range
274    }
275
276    fn push_ascii(&mut self, seq: &[u8]) -> Range<usize> {
277        self.push_seq(seq)
278    }
279
280    fn random(n: usize) -> Self {
281        let mut seq = vec![0; n];
282        rand::rngs::SmallRng::from_os_rng().fill_bytes(&mut seq);
283        seq
284    }
285}