Skip to main content

ph_temp/
seeds.rs

1use std::convert::{TryFrom, TryInto};
2use std::io::{Read, Write};
3use std::ops::Mul;
4use binout::{AsIs, Serializer};
5use bitm::{BitAccess, BitVec, ceiling_div};
6use dyn_size_of::GetSize;
7use crate::utils::read_bits;
8
9pub fn to_io_error<E>(err: E) -> std::io::Error
10where E: Into<Box<dyn std::error::Error + Send + Sync>> {
11    std::io::Error::new(std::io::ErrorKind::InvalidData, err)
12}
13
14/// Implementations of `SeedSize` represent seed size in fingerprinting-based minimal perfect hashing with group optimization.
15pub trait SeedSize: Copy + Into<u8> + Sync + TryFrom<u8, Error=&'static str> {
16    type VecElement: Copy + Send + Sync + Sized + GetSize;
17    const VEC_ELEMENT_BIT_SIZE: usize = size_of::<Self::VecElement>() * 8;
18
19    fn validate(&self) -> Result<Self, &'static str> { Ok(*self) }
20
21    #[inline] fn new_zeroed_seed_vec(&self, number_of_seeds: usize) -> Box<[Self::VecElement]> {
22        self.new_seed_vec(0, number_of_seeds)
23    }
24
25    fn new_seed_vec(&self, seed: u16, number_of_seeds: usize) -> Box<[Self::VecElement]>;
26
27    /// Gets seed with given `index` from `vec`.
28    /// Does not check whether the `index` is within bounds.
29    unsafe fn get_seed(&self, vec: &[Self::VecElement], index: usize) -> u16;
30
31    /// Sets seed with given `index` in `vec` to `seed`.
32    /// Does not check whether the `index` is within bounds.
33    unsafe fn set_seed(&self, vec: &mut [Self::VecElement], index: usize, seed: u16);
34
35    /// Sets seed with given `index` in zeroed `vec` to `seed`.
36    /// Does not check whether the `index` is within bounds and `vec` is zeroed.
37    #[inline] unsafe fn init_seed(&self, vec: &mut [Self::VecElement], index: usize, seed: u16) {
38        self.set_seed(vec, index, seed)
39    }
40
41    fn concatenate_seed_vecs<LSI, LS>(&self, level_sizes: LS, seeds: Vec<Box<[Self::VecElement]>>) -> Box<[Self::VecElement]> 
42        where LSI: IntoIterator<Item = usize>, LS: Fn() -> LSI
43    {
44        let mut group_seeds_concatenated = self.new_zeroed_seed_vec(level_sizes().into_iter().sum::<usize>());
45        let mut dst_group = 0;
46        for (l_size, l_seeds) in level_sizes().into_iter().zip(seeds.into_iter()) {
47            for index in 0..l_size {
48                unsafe { self.init_seed(&mut group_seeds_concatenated, dst_group, self.get_seed(&l_seeds, index as usize)); }
49                dst_group += 1;
50            }
51        }
52        group_seeds_concatenated
53    }
54
55    /// Writes `self` to `output`.
56    fn write(&self, output: &mut dyn Write) -> std::io::Result<()> {
57        AsIs::write(output, (*self).into())
58    }
59
60    /// Reads `Self` from `input`.
61    fn read(input: &mut dyn Read) -> std::io::Result<Self> {
62        let seed_size: u8 = AsIs::read(input)?;
63        let result = TryInto::<Self>::try_into(seed_size).map_err(to_io_error)?;
64        result.validate().map_err(to_io_error)
65    }
66
67    fn write_seed_vec(&self, output: &mut dyn Write, seeds: &[Self::VecElement]) -> std::io::Result<()>;
68
69    fn read_seed_vec(input: &mut dyn Read, number_of_seeds: usize) -> std::io::Result<(Self, Box<[Self::VecElement]>)>;
70}
71
72/// Size in bits.
73#[derive(Copy, Clone)]
74pub struct Bits(pub u8);
75
76impl Mul<usize> for Bits {
77    type Output = usize;
78
79    #[inline(always)] fn mul(self, rhs: usize) -> Self::Output {
80        self.0 as usize * rhs
81    }
82}
83
84impl Into<u8> for Bits {
85    #[inline(always)] fn into(self) -> u8 { self.0 }
86}
87
88impl TryFrom<u8> for Bits {
89    type Error = &'static str;
90
91    fn try_from(value: u8) -> Result<Self, Self::Error> {
92        Ok(Self(value))
93    }
94}
95
96impl SeedSize for Bits {
97    type VecElement = u64;
98
99    #[inline(always)] fn new_zeroed_seed_vec(&self, number_of_seeds: usize) -> Box<[Self::VecElement]> {
100        Box::<[u64]>::with_zeroed_bits(number_of_seeds * self.0 as usize)
101    }
102
103    #[inline(always)] fn new_seed_vec(&self, seed: u16, number_of_seeds: usize) -> Box<[Self::VecElement]> {
104        Box::<[u64]>::with_bitwords(seed as u64, self.0, number_of_seeds)
105    }
106
107    #[inline(always)] unsafe fn get_seed(&self, vec: &[Self::VecElement], index: usize) -> u16 {
108        //vec.get_fragment(index, self.0) as u16
109        vec.get_fragment_unchecked(index, self.0) as u16
110    }
111
112    #[inline(always)] unsafe fn set_seed(&self, vec: &mut [Self::VecElement], index: usize, seed: u16) {
113        debug_assert!(seed < (1<<self.0));
114        //vec.set_fragment(index, seed as u64, self.0)
115        vec.set_fragment_unchecked(index, seed as u64, self.0);
116    }
117
118    #[inline(always)] unsafe fn init_seed(&self, vec: &mut [Self::VecElement], index: usize, seed: u16) {
119        debug_assert!(seed < (1<<self.0));
120        vec.init_fragment_unchecked(index, seed as u64, self.0)
121    }
122
123    fn write_seed_vec(&self, output: &mut dyn Write, seeds: &[Self::VecElement]) -> std::io::Result<()> {
124        SeedSize::write(self, output)?;
125        AsIs::write_all(output, seeds)
126    }
127
128    fn read_seed_vec(input: &mut dyn Read, number_of_seeds: usize) -> std::io::Result<(Self, Box<[Self::VecElement]>)> {
129        let bits_per_seed = SeedSize::read(input)?;
130        Ok((bits_per_seed, read_bits(input, number_of_seeds * bits_per_seed.0 as usize)?))
131    }
132}
133
134/// Size in bits.
135/// 
136/// Uses unaligned reads/writes to access data in SeedSize implementation.
137#[derive(Copy, Clone)]
138pub struct BitsFast(pub u8);
139
140impl Mul<usize> for BitsFast {
141    type Output = usize;
142
143    #[inline(always)] fn mul(self, rhs: usize) -> Self::Output {
144        self.0 as usize * rhs
145    }
146}
147
148impl Into<u8> for BitsFast {
149    #[inline(always)] fn into(self) -> u8 { self.0 }
150}
151
152impl TryFrom<u8> for BitsFast {
153    type Error = &'static str;
154
155    fn try_from(value: u8) -> Result<Self, Self::Error> {
156        Ok(Self(value))
157    }
158}
159
160impl BitsFast {
161    #[inline]
162    fn vec_len(&self, number_of_seeds: usize) -> usize {
163        ceiling_div(number_of_seeds * self.0 as usize, 8) + 3
164    }
165}
166
167impl SeedSize for BitsFast {
168    type VecElement = u8;
169
170    #[inline(always)] fn new_zeroed_seed_vec(&self, number_of_seeds: usize) -> Box<[Self::VecElement]> {
171        vec![0; self.vec_len(number_of_seeds)].into_boxed_slice()
172    }
173
174    #[inline(always)] fn new_seed_vec(&self, seed: u16, number_of_seeds: usize) -> Box<[Self::VecElement]> {
175        let mut vec = Self::new_zeroed_seed_vec(&self, number_of_seeds);
176        for index in 0..number_of_seeds { unsafe { self.init_seed(&mut vec, index, seed); } }
177        vec
178    }
179
180    #[inline(always)] unsafe fn get_seed(&self, vec: &[Self::VecElement], index: usize) -> u16 {
181        (bitm::get_bits25(vec.as_ptr(), index * self.0 as usize) & ((1<<self.0)-1)) as u16
182    }
183
184    #[inline(always)] unsafe fn set_seed(&self, vec: &mut [Self::VecElement], index: usize, seed: u16) {
185        debug_assert!(seed < (1<<self.0));
186        bitm::set_bits25(vec.as_mut_ptr(), index * self.0 as usize, seed as u32, (1<<self.0)-1)
187    }
188
189    #[inline(always)] unsafe fn init_seed(&self, vec: &mut [Self::VecElement], index: usize, seed: u16) {
190        debug_assert!(seed < (1<<self.0));
191        bitm::init_bits25(vec.as_mut_ptr(), index * self.0 as usize, seed as u32)
192    }
193
194    fn write_seed_vec(&self, output: &mut dyn Write, seeds: &[Self::VecElement]) -> std::io::Result<()> {
195        SeedSize::write(self, output)?;
196        AsIs::write_all(output, seeds)
197    }
198
199    fn read_seed_vec(input: &mut dyn Read, number_of_seeds: usize) -> std::io::Result<(Self, Box<[Self::VecElement]>)> {
200        let bits_per_seed = SeedSize::read(input)?;
201        Ok((bits_per_seed, AsIs::read_n(input, bits_per_seed.vec_len(number_of_seeds))?))
202    }
203}
204
205/// Seed size of 8 bits.
206#[derive(Copy, Clone, Default)]
207pub struct Bits8;
208
209impl Into<u8> for Bits8 {
210    #[inline] fn into(self) -> u8 { 8 }
211}
212
213impl TryFrom<u8> for Bits8 {
214    type Error = &'static str;
215
216    fn try_from(value: u8) -> Result<Self, Self::Error> {
217        if value == 8 { Ok(Self) } else { Err("Bits8 supports only 8-bit seeds.") }
218    }
219}
220
221impl SeedSize for Bits8 {
222    type VecElement = u8;
223
224    fn new_seed_vec(&self, seed: u16, number_of_seeds: usize) -> Box<[Self::VecElement]> {
225        vec![seed as u8; number_of_seeds].into_boxed_slice()
226    }
227
228    #[inline] unsafe fn get_seed(&self, vec: &[Self::VecElement], index: usize) -> u16 {
229        //vec[index] as u16
230        *vec.get_unchecked(index) as u16
231    }
232
233    #[inline] unsafe fn set_seed(&self, vec: &mut [Self::VecElement], index: usize, seed: u16) {
234        debug_assert!(seed < 256);
235        //vec[index] = seed as u8
236        *vec.get_unchecked_mut(index) = seed as u8
237    }
238
239    fn read_seed_vec(input: &mut dyn Read, number_of_seeds: usize) -> std::io::Result<(Self, Box<[Self::VecElement]>)> {
240        let bits_per_group_seed = SeedSize::read(input)?;   // mainly for validation
241        Ok((bits_per_group_seed, AsIs::read_n(input, number_of_seeds)?))
242    }
243
244    fn write_seed_vec(&self, output: &mut dyn Write, seeds: &[Self::VecElement]) -> std::io::Result<()> {
245        SeedSize::write(self, output)?;
246        AsIs::write_all(output, seeds)
247    }
248
249    fn concatenate_seed_vecs<LSI, LS>(&self, _level_sizes: LS, group_seeds: Vec<Box<[Self::VecElement]>>) -> Box<[Self::VecElement]> 
250        where LSI: IntoIterator<Item = usize>, LS: Fn() -> LSI
251    {
252        group_seeds.concat().into_boxed_slice()
253    }
254}
255
256/// Seed size given as a power of two (knowing at compile time).
257#[derive(Copy, Clone, Default)]
258pub struct TwoToPowerBitsStatic<const LOG2_BITS: u8>;
259
260impl<const LOG2_BITS: u8> TwoToPowerBitsStatic<LOG2_BITS> {
261    pub const BITS: u8 = 1 << LOG2_BITS;
262    pub const LOG2_MASK: u8 = Self::BITS-1;
263    pub const VALUES_PER_64: u8 = 64 >> LOG2_BITS;
264    pub const MASK16: u16 = ((1u64 << Self::BITS) - 1) as u16;
265    pub const MASK64: u64 = ((1u128 << Self::BITS) - 1) as u64;
266    #[inline(always)] pub const fn shift_for(index: usize) -> u8 {
267        ((index % Self::VALUES_PER_64 as usize) as u8) << LOG2_BITS
268    }
269}
270
271impl<const LOG2_BITS: u8> Into<u8> for TwoToPowerBitsStatic<LOG2_BITS> {
272    #[inline] fn into(self) -> u8 { Self::BITS }
273}
274
275impl<const LOG2_BITS: u8> TryFrom<u8> for TwoToPowerBitsStatic<LOG2_BITS> {
276    type Error = &'static str;
277
278    fn try_from(value: u8) -> Result<Self, Self::Error> {
279        if value == Self::BITS { Ok(Self) } else { Err("Number of bits per seed differs from TwoToPowerBitsStatic parameter.") }
280    }
281}
282
283impl<const LOG2_BITS: u8> SeedSize for TwoToPowerBitsStatic<LOG2_BITS> {
284    type VecElement = u64;
285
286    #[inline(always)] fn new_zeroed_seed_vec(&self, number_of_seeds: usize) -> Box<[Self::VecElement]> {
287        Box::<[u64]>::with_zeroed_bits(number_of_seeds << LOG2_BITS)
288    }
289
290    #[inline(always)] fn new_seed_vec(&self, seed: u16, number_of_seeds: usize) -> Box<[Self::VecElement]> {
291        let mut w = 0;
292        for _ in 0..Self::VALUES_PER_64 { w <<= Self::BITS; w |= seed as u64; }
293        vec![w; ceiling_div(number_of_seeds, Self::VALUES_PER_64 as usize)].into_boxed_slice()
294        //Box::<[u64]>:: with_bitwords(seed as u64, Self::BITS, number_of_seeds)
295    }
296
297    #[inline(always)] unsafe fn get_seed(&self, vec: &[Self::VecElement], index: usize) -> u16 {
298        //(vec[index / Self::VALUES_PER_64 as usize] >> Self::shift_for(index)) as u16 & Self::MASK16
299        (*vec.get_unchecked(index / Self::VALUES_PER_64 as usize) >> Self::shift_for(index)) as u16 & Self::MASK16
300    }
301
302    #[inline] unsafe fn set_seed(&self, vec: &mut [Self::VecElement], index: usize, seed: u16) {
303        debug_assert!(seed < (1<<Self::BITS));
304        //let v = &mut vec[index / Self::VALUES_PER_64 as usize];
305        let v = vec.get_unchecked_mut(index / Self::VALUES_PER_64 as usize);
306        let s = Self::shift_for(index);
307        *v &= !((Self::MASK16 as u64) << s);
308        *v |= (seed as u64) << s;
309    }
310
311    #[inline] unsafe fn init_seed(&self, vec: &mut [Self::VecElement], index: usize, seed: u16) {
312        debug_assert!(seed < (1<<Self::BITS));
313        //vec[index / Self::VALUES_PER_64 as usize] |= (seed as u64) << Self::shift_for(index);
314        *vec.get_unchecked_mut(index / Self::VALUES_PER_64 as usize) |= (seed as u64) << Self::shift_for(index);
315    }
316
317    fn write_seed_vec(&self, output: &mut dyn Write, seeds: &[Self::VecElement]) -> std::io::Result<()> {
318        SeedSize::write(self, output)?;
319        AsIs::write_all(output, seeds)
320    }
321
322    fn read_seed_vec(input: &mut dyn Read, number_of_seeds: usize) -> std::io::Result<(Self, Box<[Self::VecElement]>)> {
323        let bits_per_group_seed = SeedSize::read(input)?;   // mainly for validation
324        Ok((bits_per_group_seed, read_bits(input, number_of_seeds * Self::BITS as usize)?))
325    }
326}
327
328impl<const LOG2_BITS: u8> Mul<usize> for TwoToPowerBitsStatic<LOG2_BITS> {
329    type Output = usize;
330
331    #[inline(always)] fn mul(self, rhs: usize) -> Self::Output {
332        rhs << LOG2_BITS
333    }
334}