Skip to main content

ph_temp/phast/
function.rs

1use std::{hash::Hash, io, usize};
2
3use crate::{phast::Params, seeds::{Bits8, SeedSize}};
4use super::{bits_per_seed_to_100_bucket_size, builder::{build_mt, build_st}, conf::Conf, seed_chooser::{SeedChooser, SeedOnly}, CompressedArray, DefaultCompressedArray, WINDOW_SIZE};
5use binout::{Serializer, VByte};
6use bitm::BitAccess;
7use dyn_size_of::GetSize;
8use seedable_hash::{BuildDefaultSeededHasher, BuildSeededHasher};
9use voracious_radix_sort::RadixSort;
10use rayon::prelude::*;
11
12/// Represents map-or-bump function.
13pub(crate) struct SeedEx<SSVecElement> {
14    pub(crate) seeds: Box<[SSVecElement]>,
15    pub(crate) conf: Conf,
16}
17
18impl<SSVecElement> SeedEx<SSVecElement> {
19    #[inline(always)]
20    pub(crate) fn bucket_for(&self, key: u64) -> usize { self.conf.bucket_for(key) }
21
22    #[inline(always)]
23    pub(crate) unsafe fn seed_for<SS>(&self, seed_size: SS, key: u64) -> u16 where SS: SeedSize<VecElement=SSVecElement> {
24        //self.seeds.get_fragment(self.bucket_for(key), self.conf.bits_per_seed()) as u16
25        seed_size.get_seed(&self.seeds, self.bucket_for(key))
26    }
27
28    /// Writes `self` to the `output`.
29    pub fn write<SS: SeedSize<VecElement = SSVecElement>>(&self, output: &mut dyn io::Write, seed_size: SS) -> io::Result<()>
30    {
31        self.conf.write(output)?;
32        seed_size.write_seed_vec(output, &self.seeds)
33    }
34
35    /// Reads seed size and `Self` from the `input`.
36    pub fn read<SS: SeedSize<VecElement = SSVecElement>>(input: &mut dyn io::Read) -> io::Result<(SS, Self)> {
37        let conf = Conf::read(input)?;
38        let (seed_size, seeds) = SS::read_seed_vec(input, conf.buckets_num)?;
39        Ok((seed_size, Self{ seeds, conf }))
40    }
41}
42
43impl<SSVecElement: GetSize> SeedEx<SSVecElement> {
44    /// Returns number of bytes which `write` will write.
45    pub fn write_bytes(&self) -> usize {
46        self.conf.write_bytes() + GetSize::size_bytes_dyn(&self.seeds)
47    }
48}
49
50impl<SSVecElement: GetSize> GetSize for SeedEx<SSVecElement> {
51    fn size_bytes_dyn(&self) -> usize { self.seeds.size_bytes_dyn() }
52    fn size_bytes_content_dyn(&self) -> usize { self.seeds.size_bytes_content_dyn() }
53    const USES_DYN_MEM: bool = true;
54}
55
56
57pub(crate) struct Level<SSVecElement> {
58    pub(crate) seeds: SeedEx<SSVecElement>,
59    pub(crate) shift: usize
60}
61
62impl<SSVecElement: GetSize> GetSize for Level<SSVecElement> {
63    fn size_bytes_dyn(&self) -> usize { self.seeds.size_bytes_dyn() }
64    fn size_bytes_content_dyn(&self) -> usize { self.seeds.size_bytes_content_dyn() }
65    const USES_DYN_MEM: bool = true;
66}
67
68impl<SSVecElement: GetSize> Level<SSVecElement> {
69    pub fn write_bytes(&self) -> usize {
70        VByte::size(self.shift) + self.seeds.write_bytes()
71    }
72}
73
74impl<SSVecElement> Level<SSVecElement> {
75    /// Writes `self` to the `output`.
76    pub fn write<SS: SeedSize<VecElement = SSVecElement>>(&self, output: &mut dyn io::Write, seed_size: SS) -> io::Result<()>
77    {
78        VByte::write(output, self.shift)?;
79        self.seeds.write(output, seed_size)
80    }
81
82    /// Reads seed size and `Self` from the `input`.
83    pub fn read<SS: SeedSize<VecElement = SSVecElement>>(input: &mut dyn io::Read) -> io::Result<(SS, Self)> {
84        let shift = VByte::read(input)?;
85        SeedEx::<SSVecElement>::read::<SS>(input).map(|(ss, seeds)| (ss, Self{ seeds, shift }))
86    }
87}
88
89#[inline]
90pub(crate) fn build_level_from_slice_st<K, SS, SC, S>(keys: &[K], params: &Params<SS>, hasher: &S, seed_chooser: SC, level_nr: u64)
91    -> (Vec<K>, SeedEx<SS::VecElement>, Box<[u64]>, usize)
92    where K: Hash+Clone, SS: SeedSize, SC: SeedChooser, S: BuildSeededHasher
93{
94    let mut hashes: Box<[_]> = keys.iter().map(|k| hasher.hash_one(k, level_nr)).collect();
95    //radsort::unopt::sort(&mut hashes);
96    hashes.voracious_sort();
97    let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
98    let (seeds, builder) =
99        build_st(&hashes, conf, params.seed_size, seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len()), seed_chooser);
100    let (unassigned_values, unassigned_len) = builder.unassigned_values(&seeds);
101    drop(builder);
102    let mut keys_vec = Vec::with_capacity(unassigned_len);
103    keys_vec.extend(keys.into_iter().filter(|key| {
104        unsafe { params.seed_size.get_seed(&seeds, conf.bucket_for(hasher.hash_one(key, level_nr))) == 0 }
105    }).cloned());
106    (keys_vec, SeedEx::<SS::VecElement>{ seeds, conf }, unassigned_values, unassigned_len)
107}
108
109#[inline]
110pub(crate) fn build_level_from_slice_mt<K, SS, SC, S>(keys: &[K], params: &Params<SS>, threads_num: usize, hasher: &S, seed_chooser: SC, level_nr: u64)
111    -> (Vec<K>, SeedEx<SS::VecElement>, Box<[u64]>, usize)
112    where K: Hash+Sync+Send+Clone, SS: SeedSize, SC: SeedChooser+Sync, S: BuildSeededHasher+Sync
113{
114    let mut hashes: Box<[_]> = if keys.len() > 4*2048 {    //maybe better for string keys
115        //let mut k = Vec::with_capacity(keys.len());
116        //k.par_extend(keys.par_iter().with_min_len(10000).map(|k| hasher.hash_one_s64(k, level_nr)));
117        //k.into_boxed_slice()
118        keys.par_iter().with_min_len(256).map(|k| hasher.hash_one(k, level_nr)).collect()
119    } else {
120        keys.iter().map(|k| hasher.hash_one(k, level_nr)).collect()
121    };
122    //radsort::unopt::sort(&mut hashes);
123    hashes.voracious_mt_sort(threads_num);
124    let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
125    let (seeds, builder) =
126        build_mt(&hashes, conf, params.seed_size, WINDOW_SIZE, seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len()), seed_chooser, threads_num);
127    let (unassigned_values, unassigned_len) = builder.unassigned_values(&seeds);
128    drop(builder);
129    let mut keys_vec = Vec::with_capacity(unassigned_len);
130    keys_vec.par_extend(keys.into_par_iter().filter(|key| {
131        unsafe { params.seed_size.get_seed(&seeds, conf.bucket_for(hasher.hash_one(key, level_nr))) == 0 }
132    }).cloned());
133    (keys_vec, SeedEx::<SS::VecElement>{ seeds, conf }, unassigned_values, unassigned_len)
134}
135
136#[inline(always)]
137pub(crate) fn build_level_st<K, SS, SC, S>(keys: &mut Vec::<K>, params: &Params<SS>, hasher: &S, seed_chooser: SC, level_nr: u64)
138    -> (SeedEx<SS::VecElement>, Box<[u64]>, usize)
139    where K: Hash, SS: SeedSize, SC: SeedChooser, S: BuildSeededHasher
140{
141    let mut hashes: Box<[_]> = keys.iter().map(|k| hasher.hash_one(k, level_nr)).collect();
142    hashes.voracious_sort();
143    let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
144    let (seeds, builder) =
145        build_st(&hashes, conf, params.seed_size, seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len()), seed_chooser);
146    let (unassigned_values, unassigned_len) = builder.unassigned_values(&seeds);
147    drop(builder);
148    keys.retain(|key| {
149        unsafe { params.seed_size.get_seed(&seeds, conf.bucket_for(hasher.hash_one(key, level_nr))) == 0 }
150    });
151    (SeedEx::<SS::VecElement>{ seeds, conf }, unassigned_values, unassigned_len)
152}
153
154#[inline]
155pub(crate) fn build_level_mt<K, SS, SC, S>(keys: &mut Vec::<K>, params: &Params<SS>, threads_num: usize, hasher: &S, seed_chooser: SC, level_nr: u64)
156    -> (SeedEx<SS::VecElement>, Box<[u64]>, usize)
157    where K: Hash+Sync+Send, SS: SeedSize, SC: SeedChooser+Sync, S: BuildSeededHasher+Sync
158{
159    let mut hashes: Box<[_]> = if keys.len() > 4*2048 {    //maybe better for string keys
160        //let mut k = Vec::with_capacity(keys.len());
161        //k.par_extend(keys.par_iter().with_min_len(10000).map(|k| hasher.hash_one_s64(k, level_nr)));
162        //k.into_boxed_slice()
163        keys.par_iter().with_min_len(256).map(|k| hasher.hash_one(k, level_nr)).collect()
164    } else {
165        keys.iter().map(|k| hasher.hash_one(k, level_nr)).collect()
166    };
167    //radsort::unopt::sort(&mut hashes);
168    hashes.voracious_mt_sort(threads_num);
169    let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
170    let (seeds, builder) =
171        build_mt(&hashes, conf, params.seed_size, WINDOW_SIZE, seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len()), seed_chooser, threads_num);
172    let (unassigned_values, unassigned_len) = builder.unassigned_values(&seeds);
173    drop(builder);
174    let mut result = Vec::with_capacity(unassigned_len);
175    std::mem::swap(keys, &mut result);
176    keys.par_extend(result.into_par_iter().filter(|key| {
177        unsafe { params.seed_size.get_seed(&seeds, conf.bucket_for(hasher.hash_one(key, level_nr))) == 0 }
178    }));
179    (SeedEx::<SS::VecElement>{ seeds, conf }, unassigned_values, unassigned_len)
180}
181
182/// PHast (Perfect Hashing made fast) - Minimal Perfect Hash Function
183/// with very fast evaluation and size below 2 bits/key
184/// developed by Piotr Beling and Peter Sanders.
185/// 
186/// It can be used with the following [`SeedChooser`] (which specify a particular PHast variant):
187/// [`ShiftOnlyWrapped`] (PHast+ with wrapping),
188/// [`ShiftSeedWrapped`] (PHast/PHast+ hybrid),
189/// [`SeedOnly`] (regular PHast).
190/// 
191/// Note that some [`SeedChooser`]s can be used only with [`Function2`](crate::phast::Function2).
192/// 
193/// See:
194/// Piotr Beling, Peter Sanders, *PHast - Perfect Hashing made fast*, 2025, <https://arxiv.org/abs/2504.17918>
195pub struct Function<SS, SC = SeedOnly, CA = DefaultCompressedArray, S = BuildDefaultSeededHasher>
196    where SS: SeedSize
197{
198    level0: SeedEx<SS::VecElement>,
199    unassigned: CA,
200    levels: Box<[Level<SS::VecElement>]>,
201    hasher: S,
202    seed_chooser: SC,
203    seed_size: SS,  // seed size, K=2**bits_per_seed
204}
205
206impl<SC, SS: SeedSize, CA, S> GetSize for Function<SS, SC, CA, S> where CA: GetSize {
207    fn size_bytes_dyn(&self) -> usize {
208        self.level0.size_bytes_dyn() +
209            self.unassigned.size_bytes_dyn() +
210            self.levels.size_bytes_dyn()
211    }
212    fn size_bytes_content_dyn(&self) -> usize {
213        self.level0.size_bytes_content_dyn() +
214            self.unassigned.size_bytes_content_dyn() +
215            self.levels.size_bytes_content_dyn()
216    }
217    const USES_DYN_MEM: bool = true;
218}
219
220impl<SS: SeedSize, SC: SeedChooser, CA: CompressedArray, S: BuildSeededHasher> Function<SS, SC, CA, S> {
221    
222    /// Returns value assigned to the given `key`.
223    ///
224    /// The returned value is in the range from `0` (inclusive) to the number of elements in the input key collection (exclusive).
225    /// `key` should come from the input key collection given during construction.
226    /// For keys not in the collection, returns `usize::MAX`.
227    #[inline(always)]   //inline(always) is important here
228    pub fn get<K>(&self, key: &K) -> usize where K: Hash + ?Sized {
229        /* TODO turbo support: let slice_begin = self.level0.slice_begin(key_hash);
230        let seed = self.level0.seed_for_slice(slice_begin);
231        if seed != 0 { return SC::f_slice(key_hash, slice_begin, seed, &self.level0.conf); } */
232
233        let key_hash = self.hasher.hash_one(key, 0);
234        let seed = unsafe{ self.level0.seed_for(self.seed_size, key_hash) };
235        if seed != 0 { return self.seed_chooser.f(key_hash, seed, &self.level0.conf); }
236
237        for level_nr in 0..self.levels.len() {
238            let l = &self.levels[level_nr];
239            let key_hash = self.hasher.hash_one(key, level_nr as u64 + 1);
240            let seed = unsafe { l.seeds.seed_for(self.seed_size, key_hash) };
241            if seed != 0 {
242                return self.unassigned.get(self.seed_chooser.f(key_hash, seed, &l.seeds.conf) + l.shift)
243            }
244        }
245        usize::MAX
246    }
247
248    /// Constructs [`Function`] for given `keys`, using a single thread and given parameters:
249    /// number of bits per seed, average bucket size (equals `bucket_size100/100.0`) and `hasher`.
250    /// 
251    /// `bits_per_seed_to_100_bucket_size` can be used to calculate good `bucket_size100`.
252    /// `keys` cannot contain duplicates.
253    pub fn with_vec_p_hash_sc<K>(mut keys: Vec::<K>, params: &Params<SS>, hasher: S, seed_chooser: SC) -> Self where K: Hash {
254        let number_of_keys = keys.len();
255        Self::_new(|h| {
256            let (level0, unassigned_values, unassigned_len) =
257                build_level_st(&mut keys, params, h, seed_chooser, 0);
258            (keys, level0, unassigned_values, unassigned_len)
259        }, |keys, level_nr, h| {
260            build_level_st(keys, params, h, seed_chooser, level_nr)
261        }, hasher, seed_chooser, params.seed_size, number_of_keys)
262    }
263
264    /// Constructs [`Function`] for given `keys`, using multiple (given number of) threads and given parameters:
265    /// number of bits per seed, average bucket size (equals `bucket_size100/100.0`) and `hasher`.
266    /// 
267    /// `bits_per_seed_to_100_bucket_size` can be used to calculate good `bucket_size100`.
268    /// `keys` cannot contain duplicates.
269    pub fn with_vec_p_threads_hash_sc<K>(mut keys: Vec::<K>, params: &Params<SS>, threads_num: usize, hasher: S, seed_chooser: SC) -> Self
270        where K: Hash+Sync+Send, S: Sync, SC: Sync
271    {
272        if threads_num == 1 { return Self::with_vec_p_hash_sc(keys, params, hasher, seed_chooser); }
273        let number_of_keys = keys.len();
274        Self::_new(|h| {
275            let (level0, unassigned_values, unassigned_len) =
276                build_level_mt(&mut keys, params, threads_num, h, seed_chooser, 0);
277            (keys, level0, unassigned_values, unassigned_len)
278        }, |keys, level_nr, h| {
279            build_level_mt(keys, params, threads_num, h, seed_chooser, level_nr)
280        }, hasher, seed_chooser, params.seed_size, number_of_keys)
281    }
282
283
284    /// Constructs [`Function`] for given `keys`, using a single thread and given parameters:
285    /// number of bits per seed, average bucket size (equals `bucket_size100/100.0`) and `hasher`.
286    /// 
287    /// `bits_per_seed_to_100_bucket_size` can be used to calculate good `bucket_size100`.
288    /// `keys` cannot contain duplicates.
289    pub fn with_slice_p_hash_sc<K>(keys: &[K], params: &Params<SS>, hasher: S, seed_chooser: SC) -> Self where K: Hash+Clone {
290        Self::_new(|h| {
291            build_level_from_slice_st(keys, params, h, seed_chooser, 0)
292        }, |keys, level_nr, h| {
293            build_level_st(keys, params, h, seed_chooser, level_nr)
294        }, hasher, seed_chooser, params.seed_size, keys.len())
295    }
296
297
298    /// Constructs [`Function`] for given `keys`, using multiple (given number of) threads and given parameters:
299    /// number of bits per seed, average bucket size (equals `bucket_size100/100.0`) and `hasher`.
300    /// 
301    /// `bits_per_seed_to_100_bucket_size` can be used to calculate good `bucket_size100`.
302    /// `keys` cannot contain duplicates.
303    pub fn with_slice_p_threads_hash_sc<K>(keys: &[K], params: &Params<SS>, threads_num: usize, hasher: S, seed_chooser: SC) -> Self
304        where K: Hash+Sync+Send+Clone, S: Sync, SC: Sync {
305        if threads_num == 1 { return Self::with_slice_p_hash_sc(keys, params, hasher, seed_chooser); }
306        Self::_new(|h| {
307            build_level_from_slice_mt(keys, params, threads_num, h, seed_chooser, 0)
308        }, |keys, level_nr, h| {
309            build_level_mt(keys, params, threads_num, h, seed_chooser, level_nr)
310        }, hasher, seed_chooser, params.seed_size, keys.len())
311    }
312
313    #[inline]
314    fn _new<K, BF, BL>(build_first: BF, build_level: BL, hasher: S, seed_chooser: SC, seed_size: SS, number_of_keys: usize) -> Self
315        where BF: FnOnce(&S) -> (Vec::<K>, SeedEx<SS::VecElement>, Box<[u64]>, usize),
316            BL: Fn(&mut Vec::<K>, u64, &S) -> (SeedEx<SS::VecElement>, Box<[u64]>, usize),
317            K: Hash
318        {
319        let (mut keys, level0, unassigned_values, unassigned_len) = build_first(&hasher);
320        debug_assert_eq!(unassigned_len, unassigned_values.bit_ones().count());
321        //dbg!(unassigned_len, keys.len(), unassigned_values.bit_ones().count());
322        //Self::finish_building(keys, bits_per_seed, bucket_size100, threads_num, hasher, level0, unassigned_values, unassigned_len)
323        let mut level0_unassigned = unassigned_values.bit_ones();
324        let mut unassigned = Vec::with_capacity(unassigned_len * 3 / 2);
325
326        let mut levels = Vec::with_capacity(16);
327        let mut last = 0;
328        while !keys.is_empty() {
329            let keys_len = keys.len();
330
331            //println!("{keys_len} {:.2}% keys bumped, {} {}% in {} self-collided buckets",
332            //    keys_len as f64 / 100000.0,
333                //crate::phast::seed_chooser::SELF_COLLISION_KEYS.load(std::sync::atomic::Ordering::SeqCst),
334                //crate::phast::seed_chooser::SELF_COLLISION_KEYS.load(std::sync::atomic::Ordering::SeqCst) * 100 / keys_len as u64,
335                //crate::phast::seed_chooser::SELF_COLLISION_BUCKETS.load(std::sync::atomic::Ordering::SeqCst));
336            let (seeds, unassigned_values, _unassigned_len) =
337                build_level(&mut keys, levels.len() as u64+1, &hasher);
338            let shift = unassigned.len();
339            for i in 0..keys_len {
340                if CA::MAX_FOR_UNUSED {
341                    if !unsafe{unassigned_values.get_bit_unchecked(i)} {
342                        last = level0_unassigned.next().unwrap();
343                        unassigned.push(last);
344                    } else {
345                        unassigned.push(usize::MAX);
346                    }
347                } else {
348                    if !unsafe{unassigned_values.get_bit_unchecked(i)} {
349                        last = level0_unassigned.next().unwrap();
350                    }
351                    unassigned.push(last);
352                }
353            }
354            levels.push(Level { seeds, shift });
355        }
356        debug_assert!(level0_unassigned.next().is_none());
357        drop(level0_unassigned);
358        Self {
359            level0,
360            unassigned: CA::new(unassigned, last, number_of_keys),
361            levels: levels.into_boxed_slice(),
362            hasher,
363            seed_chooser,
364            seed_size
365        }
366    }
367
368    /// Returns output range of minimal (perfect or k-perfect) function for given number of keys,
369    /// i.e. 1 + maximum value that minimal function can return.
370    #[inline(always)] pub fn minimal_output_range(&self, num_of_keys: usize) -> usize { self.seed_chooser.minimal_output_range(num_of_keys) }
371
372    /// Returns output range of `self`, i.e. 1 + maximum value that `self` can return.
373    pub fn output_range(&self) -> usize {
374        self.level0.conf.output_range(self.seed_chooser, self.seed_size.into())
375    }
376
377    /*#[inline(always)]
378    fn finish_building<K>(mut keys: Vec::<K>, bits_per_seed: SS, bucket_size100: u16, threads_num: usize, hasher: S, level0: SeedEx<SS>, unassigned_values: Box<[u64]>, unassigned_len: usize) -> Self where K: Hash+Sync+Send, S: Sync {
379        let mut level0_unassigned = unassigned_values.bit_ones();
380        let mut unassigned = Vec::with_capacity(unassigned_len * 3 / 2);
381
382        let mut levels = Vec::with_capacity(16);
383        let mut last = 0;
384        while !keys.is_empty() {
385            let keys_len = keys.len();
386            let (seeds, unassigned_values, _unassigned_len) =
387                Self::build_level(&mut keys, bits_per_seed, bucket_size100, threads_num, &hasher, levels.len() as u32+1);
388            let shift = unassigned.len();
389            for i in 0..keys_len {
390                if !unsafe{unassigned_values.get_bit_unchecked(i)} {
391                    last = level0_unassigned.next().unwrap();                    
392                }
393                unassigned.push(last);
394            }
395            levels.push(Level { seeds, shift });
396        }
397        debug_assert!(level0_unassigned.next().is_none());
398        drop(level0_unassigned);
399
400        let mut builder = CA::Builder::new(unassigned.len(), last);
401        builder.push_all(unassigned);
402
403        Self {
404            level0,
405            unassigned: CA::finish(builder),
406            levels: levels.into_boxed_slice(),
407            hasher,
408        }
409    }*/
410
411    /*pub fn new2<K>(mut keys: Vec::<K>, bits_per_seed: SS, bucket_size100: u16, threads_num: usize, hasher: S) -> Self where K: Hash+Sync+Send, S: Sync {
412        let keys_len = keys.len();
413        let (level0, unassigned_values, _unassigned_len) =
414            Self::build_level(&mut keys, bits_per_seed, bucket_size100, threads_num, &hasher, 0);
415        let largest_unassigned = bitmap_largest(&unassigned_values, keys_len);
416
417        let mut levels_data = Vec::with_capacity(16);
418        let mut total_len = 0;
419        while !keys.is_empty() {
420            let keys_len = keys.len();
421            let (seeds, unassigned_values, _unassigned_len) =
422                Self::build_level(&mut keys, bits_per_seed, bucket_size100, threads_num, &hasher, levels_data.len() as u32+1);
423            levels_data.push((seeds, unassigned_values, keys_len, total_len));
424            total_len += keys_len;
425        }
426        let mut levels = Vec::with_capacity(levels_data.len());
427        let mut builder = CA::Builder::new(total_len, largest_unassigned);
428        let mut level0_unassigned = unassigned_values.bit_ones();
429        let mut last = 0;
430        for (seeds, unassigned_values, keys_len, shift) in levels_data {
431            for i in 0..keys_len {
432                if !unsafe{unassigned_values.get_bit_unchecked(i)} {
433                    last = level0_unassigned.next().unwrap();                    
434                }
435                builder.push(last);
436            }
437            levels.push(Level { seeds, shift });
438        }
439        debug_assert!(level0_unassigned.next().is_none());
440        drop(level0_unassigned);
441
442        Self {
443            level0,
444            unassigned: CA::finish(builder),
445            levels: levels.into_boxed_slice(),
446            hasher,
447        }
448    }*/
449
450    /// Returns number of bytes which `write` will write.
451    pub fn write_bytes(&self) -> usize {
452        self.level0.write_bytes() +
453        self.unassigned.write_bytes() +
454        VByte::size(self.levels.len()) +
455        self.levels.iter().map(|l| l.size_bytes()).sum::<usize>()
456    }
457
458    /// Writes `self` to the `output`.
459    pub fn write(&self, output: &mut dyn io::Write) -> io::Result<()>
460    {
461        self.level0.write(output, self.seed_size)?;
462        self.unassigned.write(output)?;
463        VByte::write(output, self.levels.len())?;
464        for level in &self.levels {
465            level.write(output, self.seed_size)?;
466        }
467        Ok(())
468    }
469
470    /// Read `Self` from the `input`. `hasher` and `seed_chooser` must be the same as used by the structure written.
471    pub fn read_with_hasher_sc(input: &mut dyn io::Read, hasher: S, seed_chooser: SC) -> io::Result<Self> {
472        let (seed_size, level0) = SeedEx::read(input)?;
473        let unassigned = CA::read(input)?;
474        let levels_num: usize = VByte::read(input)?;
475        let mut levels = Vec::with_capacity(levels_num);
476        for _ in 0..levels_num {
477            levels.push(Level::read::<SS>(input)?.1);
478        }
479        Ok(Self { level0, unassigned, levels: levels.into_boxed_slice(), hasher, seed_chooser, seed_size })
480    }
481}
482
483impl<SS: SeedSize> Function<SS, SeedOnly, DefaultCompressedArray, BuildDefaultSeededHasher> {
484
485    /// Read `Self` from the `input`. Uses default hasher and seed chooser.
486    pub fn read(input: &mut dyn io::Read) -> io::Result<Self> {
487        Self::read_with_hasher_sc(input, BuildDefaultSeededHasher::default(), SeedOnly)
488    }
489}
490
491impl Function<Bits8, SeedOnly, DefaultCompressedArray, BuildDefaultSeededHasher> {
492    /// Constructs [`Function`] for given `keys`, using a single thread.
493    /// 
494    /// `keys` cannot contain duplicates.
495    pub fn from_vec_st<K>(keys: Vec::<K>) -> Self where K: Hash {
496        Self::with_vec_p_hash_sc(keys, &Params::new(Bits8::default(), bits_per_seed_to_100_bucket_size(8)),
497        BuildDefaultSeededHasher::default(), SeedOnly)
498    }
499
500    /// Constructs [`Function`] for given `keys`, using multiple threads.
501    /// 
502    /// `keys` cannot contain duplicates.
503    pub fn from_vec_mt<K>(keys: Vec::<K>) -> Self where K: Hash+Send+Sync {
504        Self::with_vec_p_threads_hash_sc(keys, &Params::new(Bits8::default(), bits_per_seed_to_100_bucket_size(8)),
505        std::thread::available_parallelism().map_or(1, |v| v.into()), BuildDefaultSeededHasher::default(), SeedOnly)
506    }
507
508    /// Constructs [`Function`] for given `keys`, using a single thread.
509    /// 
510    /// `keys` cannot contain duplicates.
511    pub fn from_slice_st<K>(keys: &[K]) -> Self where K: Hash+Clone {
512        Self::with_slice_p_hash_sc(keys, &Params::new(Bits8::default(), bits_per_seed_to_100_bucket_size(8)),
513        BuildDefaultSeededHasher::default(), SeedOnly)
514    }
515
516    /// Constructs [`Function`] for given `keys`, using multiple threads.
517    /// 
518    /// `keys` cannot contain duplicates.
519    pub fn from_slice_mt<K>(keys: &[K]) -> Self where K: Hash+Clone+Send+Sync {
520        Self::with_slice_p_threads_hash_sc(keys, &Params::new(Bits8::default(), bits_per_seed_to_100_bucket_size(8)),
521        std::thread::available_parallelism().map_or(1, |v| v.into()), BuildDefaultSeededHasher::default(), SeedOnly)
522    }
523
524}
525
526#[cfg(test)]
527pub(crate) mod tests {
528    use super::*;
529    use crate::utils::tests::test_mphf;
530
531    fn test_read_write<SS: SeedSize>(h: &Function::<SS>) where SS::VecElement: std::fmt::Debug+PartialEq {
532        let mut buff = Vec::new();
533        h.write(&mut buff).unwrap();
534        //assert_eq!(buff.len(), h.write_bytes());
535        let read = Function::<SS>::read(&mut &buff[..]).unwrap();
536        assert_eq!(h.level0.conf, read.level0.conf);
537        assert_eq!(h.levels.len(), read.levels.len());
538        for (hl, rl) in h.levels.iter().zip(&read.levels) {
539            assert_eq!(hl.shift, rl.shift);
540            assert_eq!(hl.seeds.conf, rl.seeds.conf);
541            assert_eq!(hl.seeds.seeds, rl.seeds.seeds);
542        }
543    }
544
545    #[test]
546    fn test_small() {
547        let input = [1, 2, 3, 4, 5];
548        let f = Function::from_slice_st(&input);
549        test_mphf(&input, |key| Some(f.get(key)));
550        test_read_write(&f);
551    }
552}