Skip to main content

ph_temp/phast/
conf.rs

1use std::io;
2
3use binout::{Serializer, VByte};
4use seedable_hash::map64_to_64;
5
6use crate::seeds::SeedSize;
7
8use super::SeedChooser;
9
10/// PHast map-or-bump function configuration.
11#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
12pub struct Conf {
13    pub(crate) buckets_num: usize, // number of buckets, B
14    pub(crate) slice_len_minus_one: u16,  // slice length L
15    pub(crate) num_of_slices: usize,   // m-P
16}
17
18/*#[inline(always)]
19const fn mix64(mut x: u64) -> u64 {
20    x = (x ^ (x >> 30)).wrapping_mul(0xbf58476d1ce4e5b9u64);
21    x = (x ^ (x >> 27)).wrapping_mul(0x94d049bb133111ebu64);
22    x ^ (x >> 31)
23}*/
24
25/*#[inline(always)]
26fn mix16fast(mut x: u16) -> u16 {
27    x += x << 7; x ^= x >> 8;
28    x += x << 3; x ^= x >> 2;
29    x += x << 4; x ^= x >> 8;
30    x
31}*/
32
33/// Returns most significant 64-bit half of `a*b`.
34#[inline(always)]
35pub(crate) fn mult_hi(a: u64, b: u64) -> u64 {
36    let r = (a as u128) * (b as u128);
37    //((r >> 64) ^ r) as u64
38    (r >> 64) as u64
39}
40
41/// Returns the value that mix `key` and `seed`. Fast.
42#[inline(always)]
43pub(crate) fn mix_key_seed(key: u64, seed: u16) -> u16 {
44    mult_hi((seed as u64).wrapping_mul(0x51_7c_c1_b7_27_22_0a_95 /*0x1d8e_4e27_c47d_124f*/), key) as u16
45}   // 0x51_7c_c1_b7_27_22_0a_95 is from FXHash
46
47/// Returns the value that mix `a` and `b` by multiplication and xoring.
48#[inline(always)]
49pub(crate) fn mix(a: u64, b: u64) -> u64 {
50    let r = (a as u128) * (b as u128);
51    ((r >> 64) ^ r) as u64
52    //(r >> 64) as u64
53}
54
55//const SEEDS_MAP: [u64; 256] = std::array::from_fn(|i| mix64(i as u64));
56
57/// Returns bucket size proper for given number of `bits_per_seed`.
58#[inline]
59pub const fn bits_per_seed_to_100_bucket_size(bits_per_seed: u8) -> u16 {
60    match bits_per_seed {
61        0..=4 => 250,
62        5 => 290,
63        6 => 320,
64        7 => 370,
65        8 => 450,
66        9 => 530,
67        10 => 590,
68        11 => 650,
69        12 => 720,
70        13 => 770,
71        _ => 830
72    }
73}
74
75impl Conf {
76
77    pub(crate) fn new(output_range: usize, input_size: usize, bucket_size_100: u16, slice_len: u16, max_shift: u16) -> Self {
78        let bucket_size_100 = bucket_size_100 as usize;
79        Self {
80            buckets_num: 1.max((input_size * 100 + bucket_size_100/2) / bucket_size_100),
81            slice_len_minus_one: slice_len - 1,
82            num_of_slices: output_range + 1 - slice_len as usize - max_shift as usize,
83        }
84    }
85
86    // configuration for "turbo" function that ussume that input=output range and bucket_size_100 is about 400.
87    /*pub(crate) fn turbo_new(output_range: usize, slice_len: u16, max_shift: u16) -> Self {
88        let num_of_slices = output_range + 1 - slice_len as usize - max_shift as usize;
89        Self {
90            buckets_num: (num_of_slices-1)/4+1,
91            slice_len_minus_one: slice_len - 1,
92            num_of_slices,
93        }
94    }*/
95
96    /// Returns output range of the function.
97    #[inline] pub fn output_range<SC: SeedChooser>(&self, seed_chooser: SC, bits_per_seed: u8) -> usize {
98        self.num_of_slices + self.slice_len_minus_one as usize + seed_chooser.extra_shift(bits_per_seed) as usize
99    }
100
101    /// Returns bucket assigned to the `key`.
102    #[inline(always)]
103    pub fn bucket_for(&self, key: u64) -> usize {
104        map64_to_64(key, self.buckets_num as u64) as usize
105    }
106
107    /// Returns first value of slice assigned to the `key`.
108    #[inline(always)]
109    pub fn slice_begin(&self, key: u64) -> usize {
110        map64_to_64(key, self.num_of_slices as u64) as usize
111    }
112
113    /// Returns index of `key` in its slice.
114    #[inline(always)]
115    pub fn in_slice(&self, key: u64, seed: u16) -> usize {
116        (mult_hi((seed as u64).wrapping_mul(0x51_7c_c1_b7_27_22_0a_95 /*0x1d8e_4e27_c47d_124f*/), key) as u16 & self.slice_len_minus_one) as usize
117        //((key.wrapping_add(seed as u64 * 2)) as u16 & self.slice_len_minus_one) as usize
118        //((key.wrapping_mul(0x1d8e_4e27_c47d_124f).wrapping_add(seed as u64)) as u16 & self.slice_len_minus_one) as usize
119        /*const P: u16 = 0;
120        let seed_lo = (seed & ((1<<P)-1)) + 1;
121        (wymum((seed_lo as u64).wrapping_mul(0x1d8e_4e27_c47d_124f), key).wrapping_add(3*(seed>>P) as u64) as u16 & self.slice_len_minus_one) as usize*/
122    }
123
124    /// Returns index of `key` in its slice.
125    /*#[inline(always)]
126    pub(crate) fn in_slice_seed_shift(&self, key: u64, seed: u16, shift: u16) -> usize {
127        ((mult_hi((seed as u64).wrapping_mul(0x1d8e_4e27_c47d_124f), key) as u16 + shift as u16) & self.slice_len_minus_one) as usize
128        //0x51_7c_c1_b7_27_22_0a_95
129    }*/
130
131    #[inline(always)]
132    pub(crate) fn in_slice_nobump(&self, key: u64, seed: u16) -> usize {
133        //(wymum((seed as u64 ^ 0xa076_1d64_78bd_642f).wrapping_mul(0x1d8e_4e27_c47d_124f), key) as u16 & self.slice_len_minus_one) as usize
134        (mix(mix(seed as u64 ^ 0xa076_1d64_78bd_642f, 0x1d8e_4e27_c47d_124f), key) as u16 & self.slice_len_minus_one) as usize
135    }
136
137    /// Returns seed independent index of `key` in its partition.
138    #[inline(always)]
139    pub(crate) fn in_slice_noseed(&self, key: u64) -> usize {
140        //(wymum_xor(key as u64, 0xe703_7ed1_a0b4_28db) as u16  & self.slice_len_minus_one) as usize
141        (key as u16 & self.slice_len_minus_one) as usize
142    }
143
144    /// Returns the value of the function for given `key` and `seed`.
145    #[inline(always)]
146    pub fn f(&self, key: u64, seed: u16) -> usize {
147        self.slice_begin(key) + self.in_slice(key, seed)
148    }
149
150    #[inline(always)]
151    pub(crate) fn f_shift0(&self, key: u64) -> usize {
152        self.slice_begin(key) + self.in_slice_noseed(key)
153    }
154
155    /*#[inline(always)]
156    pub(crate) fn f_shift(&self, key: u64, shift: u16) -> usize {
157        self.slice_begin(key) + self.in_slice_noseed(key) + shift as usize - 1
158    }*/
159
160    #[inline(always)]
161    pub(crate) fn f_nobump(&self, key: u64, seed: u16) -> usize {
162        self.slice_begin(key) + self.in_slice_nobump(key, seed)
163    }
164
165
166
167    #[inline] pub fn slice_len(&self) -> u16 {
168        self.slice_len_minus_one + 1
169    }
170
171    #[inline] pub(crate) fn new_seeds_vec<SS: SeedSize>(&self, seed_size: SS) -> Box<[SS::VecElement]> {
172        seed_size.new_zeroed_seed_vec(self.buckets_num)
173    }
174
175    // Returns bucket assigned to the `slice_begin` by "turbo" configuration.
176    /*#[inline(always)]
177    pub(crate) fn turbo_bucket_for_slice(&self, slice_begin: usize) -> usize {
178        slice_begin / 4
179    }*/
180
181    // Returns bucket assigned to the `key` by "turbo" configuration, using `turbo_bucket_for_slice`.
182    // It can be faster than bucket_for only if `slice_begin` is called near this call
183    // and compiler optime out redundant `slice_begin` calculation.
184    /*#[inline(always)]
185    pub(crate) fn turbo_bucket_for_key(&self, key: u64) -> usize {
186        self.turbo_bucket_for_slice(self.slice_begin(key))
187    }*/
188
189    /// Writes `self` to the `output`.
190    pub fn write(&self, output: &mut dyn io::Write) -> io::Result<()> {
191        VByte::write(output, self.buckets_num)?;
192        VByte::write(output, self.slice_len_minus_one)?;
193        VByte::write(output, self.num_of_slices)
194    }
195
196    /// Returns number of bytes which `write` will write.
197    pub fn write_bytes(&self) -> usize {
198        VByte::size(self.buckets_num)
199         + VByte::size(self.slice_len_minus_one)
200         + VByte::size(self.num_of_slices)
201    }
202
203    /// Read `Self` from the `input`.
204    pub fn read(input: &mut dyn io::Read) -> io::Result<Self>
205    {
206        let buckets_num = VByte::read(input)?;
207        let slice_len_minus_one = VByte::read(input)?;
208        let num_of_slices = VByte::read(input)?;
209        Ok(Self {
210            buckets_num,
211            slice_len_minus_one,
212            num_of_slices,
213        })
214    }
215}
216
217#[derive(Clone, Copy)]
218pub struct Params<SS> {
219    pub seed_size: SS,
220    pub bucket_size100: u16,
221    pub preferred_slice_len: u16
222}
223
224impl<SS> Params<SS> {
225    #[inline]
226    pub fn new(seed_size: SS, bucket_size100: u16) -> Self {
227        Self { seed_size, bucket_size100, preferred_slice_len: 0 }
228    }
229
230    #[inline]
231    pub fn new_psl(seed_size: SS, bucket_size100: u16, preferred_slice_len: u16) -> Self {
232        Self { seed_size, bucket_size100, preferred_slice_len }
233    }
234
235    /*#[inline]
236    pub fn slice_len(&self, default: u16) -> u16 {
237        if self.preferred_slice_len == 0 { default } else { self.preferred_slice_len }
238    }*/
239}
240
241impl<SS: Copy+Into<u8>> Params<SS> {
242    #[inline(always)]
243    pub fn bits_per_seed(&self) -> u8 { self.seed_size.into() }
244}