1use rapidhash::HashMapExt;
7
8use crate::mphf_config::{Mphf, build_mphf_from_vec};
9use std::io;
10use tracing::info;
11
12#[derive(Clone, Copy, Debug)]
14pub struct MinimizerControl {
15 pub count: u32,
17 pub bucket_type: BucketType,
19 pub metadata: u64,
21}
22
23#[derive(Clone, Copy, Debug, PartialEq, Eq)]
25#[derive(Default)]
26pub enum BucketType {
27 #[default]
29 Regular,
30 Sparse,
32 HeavyLoad,
34}
35
36
37pub struct MinimizersControlMapBuilder {
42 minimizers: Vec<u64>,
44 controls: Vec<MinimizerControl>,
46 index: rapidhash::RapidHashMap<u64, usize>,
48}
49
50impl MinimizersControlMapBuilder {
51 pub fn new() -> Self {
53 Self {
54 minimizers: Vec::new(),
55 controls: Vec::new(),
56 index: rapidhash::RapidHashMap::new(),
57 }
58 }
59
60 pub fn add_minimizer(&mut self, minimizer: u64) -> usize {
62 if let Some(&pos) = self.index.get(&minimizer) {
63 return pos;
64 }
65
66 let id = self.minimizers.len();
67 self.minimizers.push(minimizer);
68 self.controls.push(MinimizerControl {
69 count: 0,
70 bucket_type: BucketType::Regular,
71 metadata: 0,
72 });
73 self.index.insert(minimizer, id);
74 id
75 }
76
77 pub fn increment_count(&mut self, minimizer: u64) {
79 let id = self.add_minimizer(minimizer);
80 self.controls[id].count = self.controls[id].count.saturating_add(1);
81 }
82
83 pub fn set_bucket_type(&mut self, minimizer: u64, bucket_type: BucketType) {
85 let id = self.add_minimizer(minimizer);
86 self.controls[id].bucket_type = bucket_type;
87 }
88
89 pub fn get_control_mut(&mut self, minimizer: u64) -> Option<&mut MinimizerControl> {
91 self.index
92 .get(&minimizer)
93 .copied()
94 .map(|idx| &mut self.controls[idx])
95 }
96
97 pub fn finalize_bucket_types(&mut self, threshold_sparse: u32, threshold_heavy: u32) {
99 for control in &mut self.controls {
100 control.bucket_type = if control.count > threshold_heavy {
101 BucketType::HeavyLoad
102 } else if control.count > threshold_sparse {
103 BucketType::Sparse
104 } else {
105 BucketType::Regular
106 };
107 }
108 }
109
110 pub fn build(self, _c: u16, _alpha: f64) -> io::Result<(MinimizersControlMap, Vec<usize>)> {
119 if self.minimizers.is_empty() {
120 return Ok((MinimizersControlMap {
121 mphf: None,
122 num_keys: 0,
123 }, Vec::new()));
124 }
125
126 let minimizers = self.minimizers;
127 let controls = self.controls;
128 let num_keys = minimizers.len() as u64;
129
130 info!("Building PHast MPHF for {} minimizers", num_keys);
131
132 let mphf = build_mphf_from_vec(minimizers.clone());
134
135 let mut bucket_id_by_mphf_index = vec![0usize; controls.len()];
137 for (idx, minimizer) in minimizers.iter().enumerate() {
138 let pos = mphf.get(minimizer);
139 if pos < bucket_id_by_mphf_index.len() {
140 bucket_id_by_mphf_index[pos] = controls[idx].metadata as usize;
141 }
142 }
143
144 Ok((MinimizersControlMap {
145 mphf: Some(mphf),
146 num_keys,
147 }, bucket_id_by_mphf_index))
148 }
149
150 pub fn num_minimizers(&self) -> usize {
152 self.minimizers.len()
153 }
154}
155
156impl Default for MinimizersControlMapBuilder {
157 fn default() -> Self {
158 Self::new()
159 }
160}
161
162pub struct MinimizersControlMap {
168 mphf: Option<Mphf>,
170 num_keys: u64,
172}
173
174impl MinimizersControlMap {
175 pub fn from_parts(
177 _controls: Vec<MinimizerControl>,
178 minimizers: Vec<u64>,
179 num_keys: u64,
180 ) -> Self {
181 let mphf = if !minimizers.is_empty() {
183 Some(build_mphf_from_vec(minimizers))
184 } else {
185 None
186 };
187
188 Self {
189 mphf,
190 num_keys,
191 }
192 }
193
194 pub fn lookup(&self, minimizer: u64) -> Option<usize> {
199 if let Some(ref mphf) = self.mphf {
200 let id = mphf.get(&minimizer);
201 if id < self.num_keys as usize {
202 Some(id)
203 } else {
204 None
205 }
206 } else {
207 None
208 }
209 }
210
211 pub fn mphf_ref(&self) -> Option<&Mphf> {
213 self.mphf.as_ref()
214 }
215
216 pub fn set_mphf(&mut self, mphf: Option<Mphf>) {
218 self.mphf = mphf;
219 }
220
221 pub fn num_minimizers(&self) -> u64 {
223 self.num_keys
224 }
225
226 pub fn num_bits(&self) -> u64 {
228
229
230 if let Some(ref _mphf) = self.mphf {
231 (self.num_keys as f64 * 4.0) as u64
233 } else {
234 0
235 }
236 }
237
238 pub fn mphf_serialized_bytes(&self) -> usize {
240 match &self.mphf {
241 Some(mphf) => mphf.write_bytes(),
242 None => 0,
243 }
244 }
245
246 pub fn serialize_without_mphf<W: io::Write>(
250 &self,
251 writer: &mut W,
252 ) -> io::Result<()> {
253 writer.write_all(&self.num_keys.to_le_bytes())?;
255
256 Ok(())
257 }
258
259 pub fn deserialize_without_mphf<R: io::Read>(
263 reader: &mut R,
264 ) -> io::Result<Self> {
265 let mut num_keys_bytes = [0u8; 8];
267 reader.read_exact(&mut num_keys_bytes)?;
268 let num_keys = u64::from_le_bytes(num_keys_bytes);
269
270 Ok(Self {
271 mphf: None,
272 num_keys,
273 })
274 }
275}
276
277#[cfg(test)]
278mod tests {
279 use super::*;
280
281 #[test]
282 fn test_minimizers_control_map_builder_creation() {
283 let builder = MinimizersControlMapBuilder::new();
284 assert_eq!(builder.num_minimizers(), 0);
285 }
286
287 #[test]
288 fn test_minimizers_control_map_builder_add() {
289 let mut builder = MinimizersControlMapBuilder::new();
290
291 let id1 = builder.add_minimizer(100);
292 let id2 = builder.add_minimizer(200);
293 let id3 = builder.add_minimizer(100); assert_eq!(id1, 0);
296 assert_eq!(id2, 1);
297 assert_eq!(id3, 0); assert_eq!(builder.num_minimizers(), 2);
299 }
300
301 #[test]
302 fn test_minimizers_control_map_builder_increment() {
303 let mut builder = MinimizersControlMapBuilder::new();
304
305 builder.increment_count(100);
306 builder.increment_count(100);
307 builder.increment_count(200);
308
309 assert_eq!(builder.num_minimizers(), 2);
310 assert_eq!(builder.controls[0].count, 2);
311 assert_eq!(builder.controls[1].count, 1);
312 }
313
314 #[test]
315 fn test_minimizers_control_map_builder_bucket_type() {
316 let mut builder = MinimizersControlMapBuilder::new();
317
318 builder.add_minimizer(100);
319 builder.set_bucket_type(100, BucketType::Sparse);
320
321 assert_eq!(builder.controls[0].bucket_type, BucketType::Sparse);
322 }
323
324 #[test]
325 fn test_minimizers_control_map_builder_finalize() {
326 let mut builder = MinimizersControlMapBuilder::new();
327
328 builder.add_minimizer(100);
329 builder.controls[0].count = 5;
330
331 builder.add_minimizer(200);
332 builder.controls[1].count = 15;
333
334 builder.add_minimizer(300);
335 builder.controls[2].count = 150;
336
337 builder.finalize_bucket_types(10, 100);
338
339 assert_eq!(builder.controls[0].bucket_type, BucketType::Regular);
340 assert_eq!(builder.controls[1].bucket_type, BucketType::Sparse);
341 assert_eq!(builder.controls[2].bucket_type, BucketType::HeavyLoad);
342 }
343
344 #[test]
345 fn test_minimizers_control_map_build_empty() {
346 let builder = MinimizersControlMapBuilder::new();
347 let (mcm, mapping) = builder.build(100, 0.94).unwrap();
348
349 assert_eq!(mcm.num_minimizers(), 0);
350 assert!(mcm.mphf.is_none());
351 assert!(mapping.is_empty());
352 }
353
354 #[test]
355 fn test_minimizers_control_map_build_and_lookup() {
356 let mut builder = MinimizersControlMapBuilder::new();
357
358 builder.increment_count(100);
359 builder.increment_count(100);
360 builder.increment_count(200);
361 builder.set_bucket_type(100, BucketType::Sparse);
362
363 let (mcm, _mapping) = builder.build(100, 0.94).unwrap();
364
365 assert_eq!(mcm.num_minimizers(), 2);
366
367 let idx_100 = mcm.lookup(100).unwrap();
369 assert!(idx_100 < 2); let idx_200 = mcm.lookup(200).unwrap();
372 assert!(idx_200 < 2); assert_ne!(idx_100, idx_200); }
375
376 #[test]
377 fn test_minimizers_control_map_lookup_missing() {
378 let mut builder = MinimizersControlMapBuilder::new();
379 builder.increment_count(100);
380
381 let (mcm, _) = builder.build(100, 0.94).unwrap();
382
383 let _result = mcm.lookup(300);
385 }
389
390 #[test]
391 fn test_bucket_type_default() {
392 let bucket_type = BucketType::default();
393 assert_eq!(bucket_type, BucketType::Regular);
394 }
395
396 #[test]
397 fn test_minimizer_control_default() {
398 let control = MinimizerControl {
399 count: 5,
400 bucket_type: BucketType::Regular,
401 metadata: 0,
402 };
403 assert_eq!(control.count, 5);
404 assert_eq!(control.bucket_type, BucketType::Regular);
405 }
406}