ph_temp/phast/conf.rs
1use std::io;
2
3use binout::{Serializer, VByte};
4use seedable_hash::map64_to_64;
5
6use crate::seeds::SeedSize;
7
8use super::SeedChooser;
9
10/// PHast map-or-bump function configuration.
11#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
12pub struct Conf {
13 pub(crate) buckets_num: usize, // number of buckets, B
14 pub(crate) slice_len_minus_one: u16, // slice length L
15 pub(crate) num_of_slices: usize, // m-P
16}
17
18/*#[inline(always)]
19const fn mix64(mut x: u64) -> u64 {
20 x = (x ^ (x >> 30)).wrapping_mul(0xbf58476d1ce4e5b9u64);
21 x = (x ^ (x >> 27)).wrapping_mul(0x94d049bb133111ebu64);
22 x ^ (x >> 31)
23}*/
24
25/*#[inline(always)]
26fn mix16fast(mut x: u16) -> u16 {
27 x += x << 7; x ^= x >> 8;
28 x += x << 3; x ^= x >> 2;
29 x += x << 4; x ^= x >> 8;
30 x
31}*/
32
33/// Returns most significant 64-bit half of `a*b`.
34#[inline(always)]
35pub(crate) fn mult_hi(a: u64, b: u64) -> u64 {
36 let r = (a as u128) * (b as u128);
37 //((r >> 64) ^ r) as u64
38 (r >> 64) as u64
39}
40
41/// Returns the value that mix `key` and `seed`. Fast.
42#[inline(always)]
43pub(crate) fn mix_key_seed(key: u64, seed: u16) -> u16 {
44 mult_hi((seed as u64).wrapping_mul(0x51_7c_c1_b7_27_22_0a_95 /*0x1d8e_4e27_c47d_124f*/), key) as u16
45} // 0x51_7c_c1_b7_27_22_0a_95 is from FXHash
46
47/// Returns the value that mix `a` and `b` by multiplication and xoring.
48#[inline(always)]
49pub(crate) fn mix(a: u64, b: u64) -> u64 {
50 let r = (a as u128) * (b as u128);
51 ((r >> 64) ^ r) as u64
52 //(r >> 64) as u64
53}
54
55//const SEEDS_MAP: [u64; 256] = std::array::from_fn(|i| mix64(i as u64));
56
57/// Returns bucket size proper for given number of `bits_per_seed`.
58#[inline]
59pub const fn bits_per_seed_to_100_bucket_size(bits_per_seed: u8) -> u16 {
60 match bits_per_seed {
61 0..=4 => 250,
62 5 => 290,
63 6 => 320,
64 7 => 370,
65 8 => 450,
66 9 => 530,
67 10 => 590,
68 11 => 650,
69 12 => 720,
70 13 => 770,
71 _ => 830
72 }
73}
74
75impl Conf {
76
77 pub(crate) fn new(output_range: usize, input_size: usize, bucket_size_100: u16, slice_len: u16, max_shift: u16) -> Self {
78 let bucket_size_100 = bucket_size_100 as usize;
79 Self {
80 buckets_num: 1.max((input_size * 100 + bucket_size_100/2) / bucket_size_100),
81 slice_len_minus_one: slice_len - 1,
82 num_of_slices: output_range + 1 - slice_len as usize - max_shift as usize,
83 }
84 }
85
86 // configuration for "turbo" function that ussume that input=output range and bucket_size_100 is about 400.
87 /*pub(crate) fn turbo_new(output_range: usize, slice_len: u16, max_shift: u16) -> Self {
88 let num_of_slices = output_range + 1 - slice_len as usize - max_shift as usize;
89 Self {
90 buckets_num: (num_of_slices-1)/4+1,
91 slice_len_minus_one: slice_len - 1,
92 num_of_slices,
93 }
94 }*/
95
96 /// Returns output range of the function.
97 #[inline] pub fn output_range<SC: SeedChooser>(&self, seed_chooser: SC, bits_per_seed: u8) -> usize {
98 self.num_of_slices + self.slice_len_minus_one as usize + seed_chooser.extra_shift(bits_per_seed) as usize
99 }
100
101 /// Returns bucket assigned to the `key`.
102 #[inline(always)]
103 pub fn bucket_for(&self, key: u64) -> usize {
104 map64_to_64(key, self.buckets_num as u64) as usize
105 }
106
107 /// Returns first value of slice assigned to the `key`.
108 #[inline(always)]
109 pub fn slice_begin(&self, key: u64) -> usize {
110 map64_to_64(key, self.num_of_slices as u64) as usize
111 }
112
113 /// Returns index of `key` in its slice.
114 #[inline(always)]
115 pub fn in_slice(&self, key: u64, seed: u16) -> usize {
116 (mult_hi((seed as u64).wrapping_mul(0x51_7c_c1_b7_27_22_0a_95 /*0x1d8e_4e27_c47d_124f*/), key) as u16 & self.slice_len_minus_one) as usize
117 //((key.wrapping_add(seed as u64 * 2)) as u16 & self.slice_len_minus_one) as usize
118 //((key.wrapping_mul(0x1d8e_4e27_c47d_124f).wrapping_add(seed as u64)) as u16 & self.slice_len_minus_one) as usize
119 /*const P: u16 = 0;
120 let seed_lo = (seed & ((1<<P)-1)) + 1;
121 (wymum((seed_lo as u64).wrapping_mul(0x1d8e_4e27_c47d_124f), key).wrapping_add(3*(seed>>P) as u64) as u16 & self.slice_len_minus_one) as usize*/
122 }
123
124 /// Returns index of `key` in its slice.
125 /*#[inline(always)]
126 pub(crate) fn in_slice_seed_shift(&self, key: u64, seed: u16, shift: u16) -> usize {
127 ((mult_hi((seed as u64).wrapping_mul(0x1d8e_4e27_c47d_124f), key) as u16 + shift as u16) & self.slice_len_minus_one) as usize
128 //0x51_7c_c1_b7_27_22_0a_95
129 }*/
130
131 #[inline(always)]
132 pub(crate) fn in_slice_nobump(&self, key: u64, seed: u16) -> usize {
133 //(wymum((seed as u64 ^ 0xa076_1d64_78bd_642f).wrapping_mul(0x1d8e_4e27_c47d_124f), key) as u16 & self.slice_len_minus_one) as usize
134 (mix(mix(seed as u64 ^ 0xa076_1d64_78bd_642f, 0x1d8e_4e27_c47d_124f), key) as u16 & self.slice_len_minus_one) as usize
135 }
136
137 /// Returns seed independent index of `key` in its partition.
138 #[inline(always)]
139 pub(crate) fn in_slice_noseed(&self, key: u64) -> usize {
140 //(wymum_xor(key as u64, 0xe703_7ed1_a0b4_28db) as u16 & self.slice_len_minus_one) as usize
141 (key as u16 & self.slice_len_minus_one) as usize
142 }
143
144 /// Returns the value of the function for given `key` and `seed`.
145 #[inline(always)]
146 pub fn f(&self, key: u64, seed: u16) -> usize {
147 self.slice_begin(key) + self.in_slice(key, seed)
148 }
149
150 #[inline(always)]
151 pub(crate) fn f_shift0(&self, key: u64) -> usize {
152 self.slice_begin(key) + self.in_slice_noseed(key)
153 }
154
155 /*#[inline(always)]
156 pub(crate) fn f_shift(&self, key: u64, shift: u16) -> usize {
157 self.slice_begin(key) + self.in_slice_noseed(key) + shift as usize - 1
158 }*/
159
160 #[inline(always)]
161 pub(crate) fn f_nobump(&self, key: u64, seed: u16) -> usize {
162 self.slice_begin(key) + self.in_slice_nobump(key, seed)
163 }
164
165
166
167 #[inline] pub fn slice_len(&self) -> u16 {
168 self.slice_len_minus_one + 1
169 }
170
171 #[inline] pub(crate) fn new_seeds_vec<SS: SeedSize>(&self, seed_size: SS) -> Box<[SS::VecElement]> {
172 seed_size.new_zeroed_seed_vec(self.buckets_num)
173 }
174
175 // Returns bucket assigned to the `slice_begin` by "turbo" configuration.
176 /*#[inline(always)]
177 pub(crate) fn turbo_bucket_for_slice(&self, slice_begin: usize) -> usize {
178 slice_begin / 4
179 }*/
180
181 // Returns bucket assigned to the `key` by "turbo" configuration, using `turbo_bucket_for_slice`.
182 // It can be faster than bucket_for only if `slice_begin` is called near this call
183 // and compiler optime out redundant `slice_begin` calculation.
184 /*#[inline(always)]
185 pub(crate) fn turbo_bucket_for_key(&self, key: u64) -> usize {
186 self.turbo_bucket_for_slice(self.slice_begin(key))
187 }*/
188
189 /// Writes `self` to the `output`.
190 pub fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
191 VByte::write(output, self.buckets_num)?;
192 VByte::write(output, self.slice_len_minus_one)?;
193 VByte::write(output, self.num_of_slices)
194 }
195
196 /// Returns number of bytes which `write` will write.
197 pub fn write_bytes(&self) -> usize {
198 VByte::size(self.buckets_num)
199 + VByte::size(self.slice_len_minus_one)
200 + VByte::size(self.num_of_slices)
201 }
202
203 /// Read `Self` from the `input`.
204 pub fn read(input: &mut dyn io::Read) -> io::Result<Self>
205 {
206 let buckets_num = VByte::read(input)?;
207 let slice_len_minus_one = VByte::read(input)?;
208 let num_of_slices = VByte::read(input)?;
209 Ok(Self {
210 buckets_num,
211 slice_len_minus_one,
212 num_of_slices,
213 })
214 }
215}
216
217#[derive(Clone, Copy)]
218pub struct Params<SS> {
219 pub seed_size: SS,
220 pub bucket_size100: u16,
221 pub preferred_slice_len: u16
222}
223
224impl<SS> Params<SS> {
225 #[inline]
226 pub fn new(seed_size: SS, bucket_size100: u16) -> Self {
227 Self { seed_size, bucket_size100, preferred_slice_len: 0 }
228 }
229
230 #[inline]
231 pub fn new_psl(seed_size: SS, bucket_size100: u16, preferred_slice_len: u16) -> Self {
232 Self { seed_size, bucket_size100, preferred_slice_len }
233 }
234
235 /*#[inline]
236 pub fn slice_len(&self, default: u16) -> u16 {
237 if self.preferred_slice_len == 0 { default } else { self.preferred_slice_len }
238 }*/
239}
240
241impl<SS: Copy+Into<u8>> Params<SS> {
242 #[inline(always)]
243 pub fn bits_per_seed(&self) -> u8 { self.seed_size.into() }
244}