1use crate::intrinsics::transpose;
2use std::mem::transmute;
3use wide::u32x8;
4
5pub trait ChunkIt<T>: ExactSizeIterator<Item = T> {}
7impl<T, I: ExactSizeIterator<Item = T>> ChunkIt<T> for I {}
8
9pub struct PaddedIt<I> {
14 pub it: I,
15 pub padding: usize,
16}
17
18pub trait Advance {
21 fn advance(self, n: usize) -> Self;
22}
23impl<I: ExactSizeIterator> Advance for I {
24 #[inline(always)]
26 fn advance(mut self, n: usize) -> Self {
27 self.by_ref().take(n).for_each(drop);
28 self
29 }
30}
31
32impl<I> PaddedIt<I> {
33 #[inline(always)]
35 pub fn map<T, T2>(self, f: impl FnMut(T) -> T2) -> PaddedIt<impl ChunkIt<T2>>
36 where
37 I: ChunkIt<T>,
38 {
39 PaddedIt {
40 it: self.it.map(f),
41 padding: self.padding,
42 }
43 }
44
45 #[inline(always)]
47 pub fn advance<T>(mut self, n: usize) -> PaddedIt<impl ChunkIt<T>>
48 where
49 I: ChunkIt<T>,
50 {
51 self.it = self.it.advance(n);
52 self
53 }
54
55 #[inline(always)]
57 pub fn advance_with<T>(&mut self, n: usize, f: impl FnMut(T))
58 where
59 I: ChunkIt<T>,
60 {
61 self.it.by_ref().take(n).for_each(f);
62 }
63
64 #[inline(always)]
66 pub fn zip<T, T2>(self, other: PaddedIt<impl ChunkIt<T2>>) -> PaddedIt<impl ChunkIt<(T, T2)>>
67 where
68 I: ChunkIt<T>,
69 {
70 assert_eq!(
71 self.padding,
72 other.padding,
73 "Len1 {} Padding1 {} Len2 {} Padding2 {}",
74 self.it.len(),
75 self.padding,
76 other.it.len(),
77 other.padding
78 );
79 assert_eq!(self.it.len(), other.it.len());
80 PaddedIt {
81 it: std::iter::zip(self.it, other.it),
82 padding: self.padding,
83 }
84 }
85}
86
87impl<I: ChunkIt<u32x8>> PaddedIt<I> {
88 pub fn collect(self) -> Vec<u32> {
91 let mut v = vec![];
92 self.collect_into(&mut v);
93 v
94 }
95
96 #[inline(always)]
101 pub fn collect_into(self, out_vec: &mut Vec<u32>) {
102 let PaddedIt { it, padding } = self;
103 let len = it.len();
104 out_vec.resize(len * 8, 0);
105
106 let mut m = [u32x8::new([0; 8]); 8];
107 let mut i = 0;
108 it.for_each(|x| {
109 m[i % 8] = x;
110 if i % 8 == 7 {
111 let t = transpose(m);
112 for j in 0..8 {
113 unsafe {
114 *out_vec
115 .get_unchecked_mut(j * len + 8 * (i / 8)..)
116 .split_first_chunk_mut::<8>()
117 .unwrap()
118 .0 = transmute(t[j]);
119 }
120 }
121 }
122 i += 1;
123 });
124
125 let t = transpose(m);
127 let k = i % 8;
128 for j in 0..8 {
129 unsafe {
130 out_vec[j * len + 8 * (i / 8)..j * len + 8 * (i / 8) + k]
131 .copy_from_slice(&transmute::<_, [u32; 8]>(t[j])[..k]);
132 }
133 }
134
135 out_vec.resize(out_vec.len() - padding, 0);
136 }
137}