Skip to main content

ph_temp/phast/
partial.rs

1use dyn_size_of::GetSize;
2use voracious_radix_sort::RadixSort;
3
4use crate::{phast::{builder::{build_mt, build_st, BuildConf}, conf::Conf, evaluator::BucketToActivateEvaluator, function::SeedEx, Params, SeedChooser, SeedOnly, WINDOW_SIZE}, seeds::SeedSize};
5use std::hash::{BuildHasher, Hash, RandomState};
6
7/// Map-or-bump function that assigns different numbers to some keys and `None` to other.
8/// 
9/// Can be used with any seed chooser (which specify a particular PHast variant):
10/// [`ShiftOnlyWrapped`], [`ShiftSeedWrapped`], [`SeedOnly`], [`SeedOnlyK`].
11/// 
12/// See:
13/// Piotr Beling, Peter Sanders, *PHast - Perfect Hashing made fast*, 2025, <https://arxiv.org/abs/2504.17918>
14pub struct Partial<SS, SC = SeedOnly, S = RandomState> where SS: SeedSize {
15    seeds: SeedEx<SS::VecElement>,
16    hasher: S,
17    seed_chooser: SC,
18    seed_size: SS,
19}
20
21impl<SC, SS: SeedSize, S> GetSize for Partial<SS, SC, S> {
22    fn size_bytes_dyn(&self) -> usize { self.seeds.size_bytes_dyn() }
23    fn size_bytes_content_dyn(&self) -> usize { self.seeds.size_bytes_content_dyn() }
24    const USES_DYN_MEM: bool = true;
25}
26
27impl<SS: SeedSize, SC: SeedChooser> Partial<SS, SC, ()> {
28    pub fn with_hashes_bps_conf_sc_be_u<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
29        where BE: BucketToActivateEvaluator
30    {
31        let (f, build_conf) = Self::build_st(hashes, seed_size, conf, (), seed_chooser, bucket_evaluator);
32        let unassigned = build_conf.unassigned_len(&f.seeds.seeds);
33        (f, unassigned)
34    }
35
36    pub fn with_hashes_bps_conf_bs_threads_sc_be_u<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
37        where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
38    {
39        let (f, build_conf) = Self::build_mt(hashes, seed_size, conf, threads_num, (), seed_chooser, bucket_evaluator);
40        let unassigned = build_conf.unassigned_len(&f.seeds.seeds);
41        (f, unassigned)
42    }
43
44
45    pub fn with_hashes_bps_conf_sc_be<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, seed_chooser: SC, bucket_evaluator: BE) -> Self
46        where BE: BucketToActivateEvaluator
47    {
48        Self::build_st(hashes, seed_size, conf, (), seed_chooser, bucket_evaluator).0
49    }
50
51    pub fn with_hashes_bps_conf_bs_threads_sc_be<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> Self
52        where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
53    {
54        Self::build_mt(hashes, seed_size, conf, threads_num, (), seed_chooser, bucket_evaluator).0
55    }
56
57
58    pub fn with_hashes_p_sc_be_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
59        where BE: BucketToActivateEvaluator
60    {
61        let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
62        Self::with_hashes_bps_conf_sc_be_u(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
63    }
64
65    pub fn with_hashes_p_threads_sc_be_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
66        where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
67    {
68        let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
69        Self::with_hashes_bps_conf_bs_threads_sc_be_u(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
70    }
71
72
73    pub fn with_hashes_p_sc_be<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC, bucket_evaluator: BE) -> Self
74        where BE: BucketToActivateEvaluator
75    {
76        let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
77        Self::with_hashes_bps_conf_sc_be(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
78    }
79
80    pub fn with_hashes_p_threads_sc_be<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> Self
81        where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
82    {
83        let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
84        Self::with_hashes_bps_conf_bs_threads_sc_be(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
85    }
86
87
88    pub fn with_hashes_p_sc_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC) -> (Self, usize)
89        where BE: BucketToActivateEvaluator
90    {
91        let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
92        let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
93        Self::with_hashes_bps_conf_sc_be_u(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
94    }
95
96    pub fn with_hashes_p_threads_sc_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC) -> (Self, usize)
97        where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
98    {
99        let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
100        let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
101        Self::with_hashes_bps_conf_bs_threads_sc_be_u(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
102    }
103
104
105    pub fn with_hashes_p_sc<'k>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC) -> Self
106    {
107        let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
108        let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
109        Self::with_hashes_bps_conf_sc_be(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
110    }
111
112    pub fn with_hashes_p_threads_sc<'k>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC) -> Self
113        where SC: Sync
114    {
115        let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
116        let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
117        Self::with_hashes_bps_conf_bs_threads_sc_be(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
118    }
119}
120
121
122
123impl<SS: SeedSize, SC: SeedChooser, S> Partial<SS, SC, S> {
124    /// Returns value assigned to the given key hash or `None`.
125    #[inline(always)]
126    pub fn get_for_hash(&self, key_hash: u64) -> Option<usize> {
127        let seed = unsafe { self.seeds.seed_for(self.seed_size, key_hash) };
128        (seed != 0).then(|| self.seed_chooser.f(key_hash, seed, &self.seeds.conf))
129    }
130
131    fn build_st<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, hasher: S, seed_chooser: SC, bucket_evaluator: BE)
132     -> (Self, BuildConf<'k, BE, SS, SC>)
133        where BE: BucketToActivateEvaluator
134    {
135        hashes.voracious_sort();
136        let (seeds, build_conf) = build_st(hashes, conf, seed_size, bucket_evaluator, seed_chooser);
137        (Self {
138            seeds: SeedEx{ seeds, conf },
139            hasher,
140            seed_chooser,
141            seed_size
142        }, build_conf)
143    }
144
145    fn build_mt<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, threads_num: usize, hasher: S, seed_chooser: SC, bucket_evaluator: BE)
146     -> (Self, BuildConf<'k, BE, SS, SC>)
147        where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
148    {
149        if threads_num == 1 { return Self::build_st(hashes, seed_size, conf, hasher, seed_chooser, bucket_evaluator); }
150        hashes.voracious_mt_sort(threads_num);
151        let (seeds, build_conf) = build_mt(hashes, conf, seed_size, WINDOW_SIZE, bucket_evaluator, seed_chooser, threads_num);
152        (Self {
153            seeds: SeedEx{ seeds, conf },
154            hasher,
155            seed_chooser,
156            seed_size,
157        }, build_conf)
158    }
159}
160
161impl<SS: SeedSize, SC: SeedChooser, S> Partial<SS, SC, S> {
162    /// Returns output range of minimal (perfect or k-perfect) function for given number of keys,
163    /// i.e. 1 + maximum value that minimal function can return.
164    #[inline(always)] pub fn minimal_output_range(&self, num_of_keys: u32) -> usize {
165        self.seed_chooser.minimal_output_range(num_of_keys as usize)
166    }
167
168    /// Returns output range of `self`, i.e. 1 + maximum value that `self` can return.
169    pub fn output_range(&self) -> usize {
170        self.seeds.conf.output_range(self.seed_chooser, self.seed_size.into())
171    }
172}
173
174impl<SS: SeedSize, SC: SeedChooser, S: BuildHasher> Partial<SS, SC, S> {
175    /// Returns value assigned to the given `key` or `None`.
176    /// 
177    /// The returned value is in the range from `0` (inclusive) to the number of elements in the input key collection (exclusive).
178    /// `key` must come from the input key collection given during construction.
179    #[inline(always)]
180    pub fn get<K>(&self, key: &K) -> Option<usize> where K: Hash + ?Sized {
181        self.get_for_hash(self.hasher.hash_one(key))
182    }
183
184    /// Returns [`Partial`] function and number of keys with unassigned values for given `keys`,
185    /// using a single thread and given parameters.
186    pub fn with_keys_p_hash_sc_be_u<'k, K, BE>(keys: impl Iterator<Item = K>, params: &Params<SS>, hasher: S, seed_chooser: SC, bucket_evaluator: BE)
187     -> (Self, usize)
188        where K: Hash, BE: BucketToActivateEvaluator
189    {
190        let mut hashes: Box<[_]> = keys.map(|k| hasher.hash_one(k)).collect();
191        let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
192        let (f, build_conf) = Self::build_st(&mut hashes, params.seed_size, conf, hasher, seed_chooser, bucket_evaluator);
193        let unassigned = build_conf.unassigned_len(&f.seeds.seeds);
194        (f, unassigned)
195    }
196}