1use crate::algorithm::{Minimizer, MinimizerAlgorithm, ModMinimizer};
2use crate::iterator::*;
3use core::hash::{BuildHasher, Hash};
4use core::marker::PhantomData;
5use minimizer_queue::DefaultHashBuilder;
6use num_traits::PrimInt;
7
8#[derive(Clone, Debug, Eq, PartialEq)]
28pub struct MinimizerBuilder<
29 T: PrimInt = u64,
30 A: MinimizerAlgorithm = Minimizer,
31 S: BuildHasher = DefaultHashBuilder,
32 const CANONICAL: bool = false,
33> {
34 minimizer_size: usize,
35 width: u16,
36 hasher: S,
37 encoding: [u8; 256],
38 _marker: PhantomData<(T, A)>,
39}
40
41impl<T: PrimInt + Hash> MinimizerBuilder<T> {
42 #[inline]
48 pub fn new() -> Self {
49 Self::_new()
50 }
51}
52
53impl<T: PrimInt + Hash> Default for MinimizerBuilder<T> {
54 #[inline]
55 fn default() -> Self {
56 Self::_new()
57 }
58}
59
60impl<T: PrimInt + Hash, S: BuildHasher> MinimizerBuilder<T, Minimizer, S, false> {
61 #[inline]
63 pub fn iter(self, seq: &[u8]) -> MinimizerIterator<T, S> {
64 MinimizerIterator::new(
65 seq,
66 self.minimizer_size,
67 self.width,
68 self.hasher,
69 self.encoding,
70 )
71 }
72
73 #[inline]
75 pub fn iter_pos(self, seq: &[u8]) -> MinimizerPosIterator<T, S> {
76 MinimizerPosIterator::new(
77 seq,
78 self.minimizer_size,
79 self.width,
80 self.hasher,
81 self.encoding,
82 )
83 }
84}
85
86impl<T: PrimInt + Hash, S: BuildHasher> MinimizerBuilder<T, Minimizer, S, true> {
87 #[inline]
90 pub fn iter(self, seq: &[u8]) -> CanonicalMinimizerIterator<T, S> {
91 assert_eq!(
92 self.width % 2,
93 1,
94 "width must be odd to break ties between multiple minimizers"
95 );
96 CanonicalMinimizerIterator::new(
97 seq,
98 self.minimizer_size,
99 self.width,
100 self.hasher,
101 self.encoding,
102 )
103 }
104
105 #[inline]
108 pub fn iter_pos(self, seq: &[u8]) -> CanonicalMinimizerPosIterator<T, S> {
109 assert_eq!(
110 self.width % 2,
111 1,
112 "width must be odd to break ties between multiple minimizers"
113 );
114 CanonicalMinimizerPosIterator::new(
115 seq,
116 self.minimizer_size,
117 self.width,
118 self.hasher,
119 self.encoding,
120 )
121 }
122}
123
124const R: usize = 4;
125
126impl<T: PrimInt + Hash> MinimizerBuilder<T, ModMinimizer> {
127 #[inline]
133 pub fn new_mod() -> Self {
134 Self::_new()
135 }
136}
137
138impl<T: PrimInt + Hash, S: BuildHasher> MinimizerBuilder<T, ModMinimizer, S, false> {
139 #[inline]
141 pub fn iter(self, seq: &[u8]) -> ModSamplingIterator<T, S> {
142 assert!(
143 self.minimizer_size >= R,
144 "mod-minimizers require minimizer_size ≥ r={R}"
145 );
146 ModSamplingIterator::new(
147 seq,
148 self.minimizer_size,
149 self.width,
150 R + ((self.minimizer_size - R) % self.width as usize),
151 self.hasher,
152 self.encoding,
153 )
154 }
155
156 #[inline]
158 pub fn iter_pos(self, seq: &[u8]) -> ModSamplingPosIterator<T, S> {
159 assert!(
160 self.minimizer_size >= R,
161 "mod-minimizers require minimizer_size ≥ r={R}"
162 );
163 ModSamplingPosIterator::new(
164 seq,
165 self.minimizer_size,
166 self.width,
167 R + ((self.minimizer_size - R) % self.width as usize),
168 self.hasher,
169 self.encoding,
170 )
171 }
172}
173
174impl<T: PrimInt + Hash, S: BuildHasher> MinimizerBuilder<T, ModMinimizer, S, true> {
175 #[inline]
178 pub fn iter(self, seq: &[u8]) -> CanonicalModSamplingIterator<T, S> {
179 assert!(
180 self.minimizer_size >= R,
181 "mod-minimizers require minimizer_size ≥ r={R}"
182 );
183 assert_eq!(
184 self.width % 2,
185 1,
186 "width must be odd to break ties between multiple minimizers"
187 );
188 CanonicalModSamplingIterator::new(
189 seq,
190 self.minimizer_size,
191 self.width,
192 R + ((self.minimizer_size - R) % self.width as usize),
193 self.hasher,
194 self.encoding,
195 )
196 }
197
198 #[inline]
201 pub fn iter_pos(self, seq: &[u8]) -> CanonicalModSamplingPosIterator<T, S> {
202 assert!(
203 self.minimizer_size >= R,
204 "mod-minimizers require minimizer_size ≥ r={R}"
205 );
206 assert_eq!(
207 self.width % 2,
208 1,
209 "width must be odd to break ties between multiple minimizers"
210 );
211 CanonicalModSamplingPosIterator::new(
212 seq,
213 self.minimizer_size,
214 self.width,
215 R + ((self.minimizer_size - R) % self.width as usize),
216 self.hasher,
217 self.encoding,
218 )
219 }
220}
221
222impl<T: PrimInt + Hash, A: MinimizerAlgorithm> MinimizerBuilder<T, A, DefaultHashBuilder> {
223 fn _new() -> Self {
224 let mut encoding = [0u8; 256];
225 encoding[b'A' as usize] = 0b00;
226 encoding[b'a' as usize] = 0b00;
227 encoding[b'C' as usize] = 0b01;
228 encoding[b'c' as usize] = 0b01;
229 encoding[b'G' as usize] = 0b10;
230 encoding[b'g' as usize] = 0b10;
231 encoding[b'T' as usize] = 0b11;
232 encoding[b't' as usize] = 0b11;
233 Self {
234 minimizer_size: 21,
235 width: 31 - 21 + 1,
236 hasher: DefaultHashBuilder::default(),
237 encoding,
238 _marker: PhantomData,
239 }
240 }
241
242 pub fn seed(mut self, seed: u64) -> Self {
244 self.hasher = DefaultHashBuilder::with_seed(seed);
245 self
246 }
247}
248
249impl<T: PrimInt + Hash, A: MinimizerAlgorithm, S: BuildHasher, const CANONICAL: bool>
250 MinimizerBuilder<T, A, S, CANONICAL>
251{
252 pub fn minimizer_size(mut self, minimizer_size: usize) -> Self {
254 let max_size = (T::zero().count_zeros() / 2) as usize;
255 assert!(
256 minimizer_size <= max_size,
257 "With this integer type, minimizer_size must be ≤ {max_size}. Please select a smaller size or a larger type."
258 );
259 self.minimizer_size = minimizer_size;
260 self
261 }
262
263 pub const fn width(mut self, width: u16) -> Self {
265 self.width = width;
266 self
267 }
268
269 pub fn hasher<H: BuildHasher>(self, hasher: H) -> MinimizerBuilder<T, A, H, CANONICAL> {
271 MinimizerBuilder::<T, A, H, CANONICAL> {
272 minimizer_size: self.minimizer_size,
273 width: self.width,
274 hasher,
275 encoding: self.encoding,
276 _marker: self._marker,
277 }
278 }
279
280 pub fn encoding(mut self, a: u8, c: u8, g: u8, t: u8) -> Self {
282 self.encoding[b'A' as usize] = a;
283 self.encoding[b'a' as usize] = a;
284 self.encoding[b'C' as usize] = c;
285 self.encoding[b'c' as usize] = c;
286 self.encoding[b'G' as usize] = g;
287 self.encoding[b'g' as usize] = g;
288 self.encoding[b'T' as usize] = t;
289 self.encoding[b't' as usize] = t;
290 self
291 }
292
293 pub fn canonical(self) -> MinimizerBuilder<T, A, S, true> {
295 MinimizerBuilder::<T, A, S, true> {
296 minimizer_size: self.minimizer_size,
297 width: self.width,
298 hasher: self.hasher,
299 encoding: self.encoding,
300 _marker: self._marker,
301 }
302 }
303
304 pub fn non_canonical(self) -> MinimizerBuilder<T, A, S, false> {
306 MinimizerBuilder::<T, A, S, false> {
307 minimizer_size: self.minimizer_size,
308 width: self.width,
309 hasher: self.hasher,
310 encoding: self.encoding,
311 _marker: self._marker,
312 }
313 }
314}