1use rapidhash::HashMapExt;
7
8use crate::partitioned_mphf::PartitionedMphf;
9use std::io;
10use tracing::info;
11
12#[derive(Clone, Copy, Debug)]
14pub struct MinimizerControl {
15 pub count: u32,
17 pub bucket_type: BucketType,
19 pub metadata: u64,
21}
22
23#[derive(Clone, Copy, Debug, PartialEq, Eq)]
25#[derive(Default)]
26pub enum BucketType {
27 #[default]
29 Regular,
30 Sparse,
32 HeavyLoad,
34}
35
36
37pub struct MinimizersControlMapBuilder {
42 minimizers: Vec<u64>,
44 controls: Vec<MinimizerControl>,
46 index: rapidhash::RapidHashMap<u64, usize>,
48}
49
50impl MinimizersControlMapBuilder {
51 pub fn new() -> Self {
53 Self {
54 minimizers: Vec::new(),
55 controls: Vec::new(),
56 index: rapidhash::RapidHashMap::new(),
57 }
58 }
59
60 pub fn add_minimizer(&mut self, minimizer: u64) -> usize {
62 if let Some(&pos) = self.index.get(&minimizer) {
63 return pos;
64 }
65
66 let id = self.minimizers.len();
67 self.minimizers.push(minimizer);
68 self.controls.push(MinimizerControl {
69 count: 0,
70 bucket_type: BucketType::Regular,
71 metadata: 0,
72 });
73 self.index.insert(minimizer, id);
74 id
75 }
76
77 pub fn increment_count(&mut self, minimizer: u64) {
79 let id = self.add_minimizer(minimizer);
80 self.controls[id].count = self.controls[id].count.saturating_add(1);
81 }
82
83 pub fn set_bucket_type(&mut self, minimizer: u64, bucket_type: BucketType) {
85 let id = self.add_minimizer(minimizer);
86 self.controls[id].bucket_type = bucket_type;
87 }
88
89 pub fn get_control_mut(&mut self, minimizer: u64) -> Option<&mut MinimizerControl> {
91 self.index
92 .get(&minimizer)
93 .copied()
94 .map(|idx| &mut self.controls[idx])
95 }
96
97 pub fn finalize_bucket_types(&mut self, threshold_sparse: u32, threshold_heavy: u32) {
99 for control in &mut self.controls {
100 control.bucket_type = if control.count > threshold_heavy {
101 BucketType::HeavyLoad
102 } else if control.count > threshold_sparse {
103 BucketType::Sparse
104 } else {
105 BucketType::Regular
106 };
107 }
108 }
109
110 pub fn build(self, _c: u16, _alpha: f64, partitioned: bool) -> io::Result<(MinimizersControlMap, Vec<usize>)> {
120 if self.minimizers.is_empty() {
121 return Ok((MinimizersControlMap {
122 mphf: None,
123 num_keys: 0,
124 }, Vec::new()));
125 }
126
127 let minimizers = self.minimizers;
128 let controls = self.controls;
129 let num_keys = minimizers.len() as u64;
130
131 info!("Building PHast MPHF for {} minimizers (partitioned={})", num_keys, partitioned);
132
133 let mphf = PartitionedMphf::build_from_vec(minimizers.clone(), partitioned);
135
136 let mut bucket_id_by_mphf_index = vec![0usize; controls.len()];
138 for (idx, minimizer) in minimizers.iter().enumerate() {
139 let pos = mphf.get(minimizer);
140 if pos < bucket_id_by_mphf_index.len() {
141 bucket_id_by_mphf_index[pos] = controls[idx].metadata as usize;
142 }
143 }
144
145 Ok((MinimizersControlMap {
146 mphf: Some(mphf),
147 num_keys,
148 }, bucket_id_by_mphf_index))
149 }
150
151 pub fn num_minimizers(&self) -> usize {
153 self.minimizers.len()
154 }
155}
156
157impl Default for MinimizersControlMapBuilder {
158 fn default() -> Self {
159 Self::new()
160 }
161}
162
163pub struct MinimizersControlMap {
169 mphf: Option<PartitionedMphf>,
171 num_keys: u64,
173}
174
175impl MinimizersControlMap {
176 pub fn from_parts(
178 _controls: Vec<MinimizerControl>,
179 minimizers: Vec<u64>,
180 num_keys: u64,
181 ) -> Self {
182 let mphf = if !minimizers.is_empty() {
184 Some(PartitionedMphf::build_from_vec(minimizers, false))
185 } else {
186 None
187 };
188
189 Self {
190 mphf,
191 num_keys,
192 }
193 }
194
195 pub fn from_mphf(mphf: PartitionedMphf, num_keys: u64) -> Self {
200 Self {
201 mphf: Some(mphf),
202 num_keys,
203 }
204 }
205
206 pub fn lookup(&self, minimizer: u64) -> Option<usize> {
211 if let Some(ref mphf) = self.mphf {
212 let id = mphf.get(&minimizer);
213 if id < self.num_keys as usize {
214 Some(id)
215 } else {
216 None
217 }
218 } else {
219 None
220 }
221 }
222
223 pub fn mphf_ref(&self) -> Option<&PartitionedMphf> {
225 self.mphf.as_ref()
226 }
227
228 pub fn set_mphf(&mut self, mphf: Option<PartitionedMphf>) {
230 self.mphf = mphf;
231 }
232
233 pub fn num_minimizers(&self) -> u64 {
235 self.num_keys
236 }
237
238 pub fn num_bits(&self) -> u64 {
240
241
242 if let Some(ref _mphf) = self.mphf {
243 (self.num_keys as f64 * 4.0) as u64
245 } else {
246 0
247 }
248 }
249
250 pub fn mphf_serialized_bytes(&self) -> usize {
252 match &self.mphf {
253 Some(pmphf) => pmphf.write_bytes(),
254 None => 0,
255 }
256 }
257
258 pub fn serialize_without_mphf<W: io::Write>(
262 &self,
263 writer: &mut W,
264 ) -> io::Result<()> {
265 writer.write_all(&self.num_keys.to_le_bytes())?;
267
268 Ok(())
269 }
270
271 pub fn deserialize_without_mphf<R: io::Read>(
275 reader: &mut R,
276 ) -> io::Result<Self> {
277 let mut num_keys_bytes = [0u8; 8];
279 reader.read_exact(&mut num_keys_bytes)?;
280 let num_keys = u64::from_le_bytes(num_keys_bytes);
281
282 Ok(Self {
283 mphf: None,
284 num_keys,
285 })
286 }
287}
288
289#[cfg(test)]
290mod tests {
291 use super::*;
292
293 #[test]
294 fn test_minimizers_control_map_builder_creation() {
295 let builder = MinimizersControlMapBuilder::new();
296 assert_eq!(builder.num_minimizers(), 0);
297 }
298
299 #[test]
300 fn test_minimizers_control_map_builder_add() {
301 let mut builder = MinimizersControlMapBuilder::new();
302
303 let id1 = builder.add_minimizer(100);
304 let id2 = builder.add_minimizer(200);
305 let id3 = builder.add_minimizer(100); assert_eq!(id1, 0);
308 assert_eq!(id2, 1);
309 assert_eq!(id3, 0); assert_eq!(builder.num_minimizers(), 2);
311 }
312
313 #[test]
314 fn test_minimizers_control_map_builder_increment() {
315 let mut builder = MinimizersControlMapBuilder::new();
316
317 builder.increment_count(100);
318 builder.increment_count(100);
319 builder.increment_count(200);
320
321 assert_eq!(builder.num_minimizers(), 2);
322 assert_eq!(builder.controls[0].count, 2);
323 assert_eq!(builder.controls[1].count, 1);
324 }
325
326 #[test]
327 fn test_minimizers_control_map_builder_bucket_type() {
328 let mut builder = MinimizersControlMapBuilder::new();
329
330 builder.add_minimizer(100);
331 builder.set_bucket_type(100, BucketType::Sparse);
332
333 assert_eq!(builder.controls[0].bucket_type, BucketType::Sparse);
334 }
335
336 #[test]
337 fn test_minimizers_control_map_builder_finalize() {
338 let mut builder = MinimizersControlMapBuilder::new();
339
340 builder.add_minimizer(100);
341 builder.controls[0].count = 5;
342
343 builder.add_minimizer(200);
344 builder.controls[1].count = 15;
345
346 builder.add_minimizer(300);
347 builder.controls[2].count = 150;
348
349 builder.finalize_bucket_types(10, 100);
350
351 assert_eq!(builder.controls[0].bucket_type, BucketType::Regular);
352 assert_eq!(builder.controls[1].bucket_type, BucketType::Sparse);
353 assert_eq!(builder.controls[2].bucket_type, BucketType::HeavyLoad);
354 }
355
356 #[test]
357 fn test_minimizers_control_map_build_empty() {
358 let builder = MinimizersControlMapBuilder::new();
359 let (mcm, mapping) = builder.build(100, 0.94, false).unwrap();
360
361 assert_eq!(mcm.num_minimizers(), 0);
362 assert!(mcm.mphf.is_none());
363 assert!(mapping.is_empty());
364 }
365
366 #[test]
367 fn test_minimizers_control_map_build_and_lookup() {
368 let mut builder = MinimizersControlMapBuilder::new();
369
370 builder.increment_count(100);
371 builder.increment_count(100);
372 builder.increment_count(200);
373 builder.set_bucket_type(100, BucketType::Sparse);
374
375 let (mcm, _mapping) = builder.build(100, 0.94, false).unwrap();
376
377 assert_eq!(mcm.num_minimizers(), 2);
378
379 let idx_100 = mcm.lookup(100).unwrap();
381 assert!(idx_100 < 2); let idx_200 = mcm.lookup(200).unwrap();
384 assert!(idx_200 < 2); assert_ne!(idx_100, idx_200); }
387
388 #[test]
389 fn test_minimizers_control_map_lookup_missing() {
390 let mut builder = MinimizersControlMapBuilder::new();
391 builder.increment_count(100);
392
393 let (mcm, _) = builder.build(100, 0.94, false).unwrap();
394
395 let _result = mcm.lookup(300);
397 }
401
402 #[test]
403 fn test_bucket_type_default() {
404 let bucket_type = BucketType::default();
405 assert_eq!(bucket_type, BucketType::Regular);
406 }
407
408 #[test]
409 fn test_minimizer_control_default() {
410 let control = MinimizerControl {
411 count: 5,
412 bucket_type: BucketType::Regular,
413 metadata: 0,
414 };
415 assert_eq!(control.count, 5);
416 assert_eq!(control.bucket_type, BucketType::Regular);
417 }
418}