packed_seq/
padded_it.rs

1use crate::intrinsics::transpose;
2use std::mem::transmute;
3use wide::u32x8;
4
5/// Trait alias for iterators over multiple chunks in parallel, typically over `u32x8`.
6pub trait ChunkIt<T>: ExactSizeIterator<Item = T> {}
7impl<T, I: ExactSizeIterator<Item = T>> ChunkIt<T> for I {}
8
9/// An iterator over values in multiple SIMD lanes, with a certain amount of `padding` at the end.
10///
11/// This type is returned by functions like [`crate::Seq::par_iter_bp`].
12/// It usally contains an iterator over e.g. `u32x8` values or `(u32x8, u32x8)` tuples,
13pub struct PaddedIt<I> {
14    pub it: I,
15    pub padding: usize,
16}
17
18/// Extension trait to advance an iterator by `n` steps.
19/// Used to skip e.g. the first `k-1` values of an iterator over k-mer hasher.
20pub trait Advance {
21    fn advance(self, n: usize) -> Self;
22}
23impl<I: ExactSizeIterator> Advance for I {
24    /// Advance the iterator by `n` steps, consuming the first `n` values.
25    #[inline(always)]
26    fn advance(mut self, n: usize) -> Self {
27        self.by_ref().take(n).for_each(drop);
28        self
29    }
30}
31
32impl<I> PaddedIt<I> {
33    /// Apply `f` to each element.
34    #[inline(always)]
35    pub fn map<T, T2>(self, f: impl FnMut(T) -> T2) -> PaddedIt<impl ChunkIt<T2>>
36    where
37        I: ChunkIt<T>,
38    {
39        PaddedIt {
40            it: self.it.map(f),
41            padding: self.padding,
42        }
43    }
44
45    /// Advance the iterator by `n` steps, consuming the first `n` values (of each lane).
46    #[inline(always)]
47    pub fn advance<T>(mut self, n: usize) -> PaddedIt<impl ChunkIt<T>>
48    where
49        I: ChunkIt<T>,
50    {
51        self.it = self.it.advance(n);
52        self
53    }
54
55    /// Advance the iterator by `n` steps, consuming the first `n` values (of each lane).
56    #[inline(always)]
57    pub fn advance_with<T>(&mut self, n: usize, f: impl FnMut(T))
58    where
59        I: ChunkIt<T>,
60    {
61        self.it.by_ref().take(n).for_each(f);
62    }
63
64    /// Advance the iterator by `n` steps, consuming the first `n` values (of each lane).
65    #[inline(always)]
66    pub fn zip<T, T2>(self, other: PaddedIt<impl ChunkIt<T2>>) -> PaddedIt<impl ChunkIt<(T, T2)>>
67    where
68        I: ChunkIt<T>,
69    {
70        assert_eq!(
71            self.padding,
72            other.padding,
73            "Len1 {} Padding1 {} Len2 {} Padding2 {}",
74            self.it.len(),
75            self.padding,
76            other.it.len(),
77            other.padding
78        );
79        assert_eq!(self.it.len(), other.it.len());
80        PaddedIt {
81            it: std::iter::zip(self.it, other.it),
82            padding: self.padding,
83        }
84    }
85}
86
87impl<I: ChunkIt<u32x8>> PaddedIt<I> {
88    /// Collect all values of a padded `u32x8`-iterator into a flat vector.
89    /// Prefer `collect_into` to avoid repeated allocations.
90    pub fn collect(self) -> Vec<u32> {
91        let mut v = vec![];
92        self.collect_into(&mut v);
93        v
94    }
95
96    /// Collect all values of a padded `u32x8`-iterator into a flat vector.
97    ///
98    /// Implemented by taking 8 elements from each stream, and transposing this SIMD-matrix before writing out the results.
99    /// The `tail` is appended at the end.
100    #[inline(always)]
101    pub fn collect_into(self, out_vec: &mut Vec<u32>) {
102        let PaddedIt { it, padding } = self;
103        let len = it.len();
104        out_vec.resize(len * 8, 0);
105
106        let mut m = [u32x8::new([0; 8]); 8];
107        let mut i = 0;
108        it.for_each(|x| {
109            m[i % 8] = x;
110            if i % 8 == 7 {
111                let t = transpose(m);
112                for j in 0..8 {
113                    unsafe {
114                        *out_vec
115                            .get_unchecked_mut(j * len + 8 * (i / 8)..)
116                            .split_first_chunk_mut::<8>()
117                            .unwrap()
118                            .0 = transmute(t[j]);
119                    }
120                }
121            }
122            i += 1;
123        });
124
125        // Manually write the unfinished parts of length k=i%8.
126        let t = transpose(m);
127        let k = i % 8;
128        for j in 0..8 {
129            unsafe {
130                out_vec[j * len + 8 * (i / 8)..j * len + 8 * (i / 8) + k]
131                    .copy_from_slice(&transmute::<_, [u32; 8]>(t[j])[..k]);
132            }
133        }
134
135        out_vec.resize(out_vec.len() - padding, 0);
136    }
137}