1use dyn_size_of::GetSize;
2use voracious_radix_sort::RadixSort;
3
4use crate::{phast::{builder::{build_mt, build_st, BuildConf}, conf::Conf, evaluator::BucketToActivateEvaluator, function::SeedEx, Params, SeedChooser, SeedOnly, WINDOW_SIZE}, seeds::SeedSize};
5use std::hash::{BuildHasher, Hash, RandomState};
6
7pub struct Partial<SS, SC = SeedOnly, S = RandomState> where SS: SeedSize {
15 seeds: SeedEx<SS::VecElement>,
16 hasher: S,
17 seed_chooser: SC,
18 seed_size: SS,
19}
20
21impl<SC, SS: SeedSize, S> GetSize for Partial<SS, SC, S> {
22 fn size_bytes_dyn(&self) -> usize { self.seeds.size_bytes_dyn() }
23 fn size_bytes_content_dyn(&self) -> usize { self.seeds.size_bytes_content_dyn() }
24 const USES_DYN_MEM: bool = true;
25}
26
27impl<SS: SeedSize, SC: SeedChooser> Partial<SS, SC, ()> {
28 pub fn with_hashes_bps_conf_sc_be_u<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
29 where BE: BucketToActivateEvaluator
30 {
31 let (f, build_conf) = Self::build_st(hashes, seed_size, conf, (), seed_chooser, bucket_evaluator);
32 let unassigned = build_conf.unassigned_len(&f.seeds.seeds);
33 (f, unassigned)
34 }
35
36 pub fn with_hashes_bps_conf_bs_threads_sc_be_u<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
37 where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
38 {
39 let (f, build_conf) = Self::build_mt(hashes, seed_size, conf, threads_num, (), seed_chooser, bucket_evaluator);
40 let unassigned = build_conf.unassigned_len(&f.seeds.seeds);
41 (f, unassigned)
42 }
43
44
45 pub fn with_hashes_bps_conf_sc_be<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, seed_chooser: SC, bucket_evaluator: BE) -> Self
46 where BE: BucketToActivateEvaluator
47 {
48 Self::build_st(hashes, seed_size, conf, (), seed_chooser, bucket_evaluator).0
49 }
50
51 pub fn with_hashes_bps_conf_bs_threads_sc_be<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> Self
52 where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
53 {
54 Self::build_mt(hashes, seed_size, conf, threads_num, (), seed_chooser, bucket_evaluator).0
55 }
56
57
58 pub fn with_hashes_p_sc_be_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
59 where BE: BucketToActivateEvaluator
60 {
61 let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
62 Self::with_hashes_bps_conf_sc_be_u(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
63 }
64
65 pub fn with_hashes_p_threads_sc_be_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> (Self, usize)
66 where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
67 {
68 let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
69 Self::with_hashes_bps_conf_bs_threads_sc_be_u(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
70 }
71
72
73 pub fn with_hashes_p_sc_be<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC, bucket_evaluator: BE) -> Self
74 where BE: BucketToActivateEvaluator
75 {
76 let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
77 Self::with_hashes_bps_conf_sc_be(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
78 }
79
80 pub fn with_hashes_p_threads_sc_be<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC, bucket_evaluator: BE) -> Self
81 where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
82 {
83 let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
84 Self::with_hashes_bps_conf_bs_threads_sc_be(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
85 }
86
87
88 pub fn with_hashes_p_sc_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC) -> (Self, usize)
89 where BE: BucketToActivateEvaluator
90 {
91 let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
92 let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
93 Self::with_hashes_bps_conf_sc_be_u(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
94 }
95
96 pub fn with_hashes_p_threads_sc_u<'k, BE>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC) -> (Self, usize)
97 where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
98 {
99 let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
100 let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
101 Self::with_hashes_bps_conf_bs_threads_sc_be_u(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
102 }
103
104
105 pub fn with_hashes_p_sc<'k>(hashes: &'k mut [u64], params: &Params<SS>, seed_chooser: SC) -> Self
106 {
107 let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
108 let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
109 Self::with_hashes_bps_conf_sc_be(hashes, params.seed_size, conf, seed_chooser, bucket_evaluator)
110 }
111
112 pub fn with_hashes_p_threads_sc<'k>(hashes: &'k mut [u64], params: &Params<SS>, threads_num: usize, seed_chooser: SC) -> Self
113 where SC: Sync
114 {
115 let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
116 let bucket_evaluator = seed_chooser.bucket_evaluator(params.bits_per_seed(), conf.slice_len());
117 Self::with_hashes_bps_conf_bs_threads_sc_be(hashes, params.seed_size, conf, threads_num, seed_chooser, bucket_evaluator)
118 }
119}
120
121
122
123impl<SS: SeedSize, SC: SeedChooser, S> Partial<SS, SC, S> {
124 #[inline(always)]
126 pub fn get_for_hash(&self, key_hash: u64) -> Option<usize> {
127 let seed = unsafe { self.seeds.seed_for(self.seed_size, key_hash) };
128 (seed != 0).then(|| self.seed_chooser.f(key_hash, seed, &self.seeds.conf))
129 }
130
131 fn build_st<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, hasher: S, seed_chooser: SC, bucket_evaluator: BE)
132 -> (Self, BuildConf<'k, BE, SS, SC>)
133 where BE: BucketToActivateEvaluator
134 {
135 hashes.voracious_sort();
136 let (seeds, build_conf) = build_st(hashes, conf, seed_size, bucket_evaluator, seed_chooser);
137 (Self {
138 seeds: SeedEx{ seeds, conf },
139 hasher,
140 seed_chooser,
141 seed_size
142 }, build_conf)
143 }
144
145 fn build_mt<'k, BE>(hashes: &'k mut [u64], seed_size: SS, conf: Conf, threads_num: usize, hasher: S, seed_chooser: SC, bucket_evaluator: BE)
146 -> (Self, BuildConf<'k, BE, SS, SC>)
147 where BE: BucketToActivateEvaluator + Sync, SC: Sync, BE::Value: Send
148 {
149 if threads_num == 1 { return Self::build_st(hashes, seed_size, conf, hasher, seed_chooser, bucket_evaluator); }
150 hashes.voracious_mt_sort(threads_num);
151 let (seeds, build_conf) = build_mt(hashes, conf, seed_size, WINDOW_SIZE, bucket_evaluator, seed_chooser, threads_num);
152 (Self {
153 seeds: SeedEx{ seeds, conf },
154 hasher,
155 seed_chooser,
156 seed_size,
157 }, build_conf)
158 }
159}
160
161impl<SS: SeedSize, SC: SeedChooser, S> Partial<SS, SC, S> {
162 #[inline(always)] pub fn minimal_output_range(&self, num_of_keys: u32) -> usize {
165 self.seed_chooser.minimal_output_range(num_of_keys as usize)
166 }
167
168 pub fn output_range(&self) -> usize {
170 self.seeds.conf.output_range(self.seed_chooser, self.seed_size.into())
171 }
172}
173
174impl<SS: SeedSize, SC: SeedChooser, S: BuildHasher> Partial<SS, SC, S> {
175 #[inline(always)]
180 pub fn get<K>(&self, key: &K) -> Option<usize> where K: Hash + ?Sized {
181 self.get_for_hash(self.hasher.hash_one(key))
182 }
183
184 pub fn with_keys_p_hash_sc_be_u<'k, K, BE>(keys: impl Iterator<Item = K>, params: &Params<SS>, hasher: S, seed_chooser: SC, bucket_evaluator: BE)
187 -> (Self, usize)
188 where K: Hash, BE: BucketToActivateEvaluator
189 {
190 let mut hashes: Box<[_]> = keys.map(|k| hasher.hash_one(k)).collect();
191 let conf = seed_chooser.conf_for_minimal_p(hashes.len(), params);
192 let (f, build_conf) = Self::build_st(&mut hashes, params.seed_size, conf, hasher, seed_chooser, bucket_evaluator);
193 let unassigned = build_conf.unassigned_len(&f.seeds.seeds);
194 (f, unassigned)
195 }
196}