precomputed_map/
builder.rs

1//! Precomputed Map builder
2
3#![allow(clippy::uninlined_format_args)]
4
5#[cfg(test)]
6mod tests;
7mod build;
8mod codegen;
9
10use std::{ cmp, fmt };
11pub use codegen::*;
12
13/// Static Map builder
14///
15/// Computes an appropriate static map based on the provided keys.
16pub struct MapBuilder<'a, K> {
17    seed: Option<u64>,
18    limit: Option<u64>,
19    ord: Option<OrdFunc<'a, K>>,
20    hash: Option<HashFunc<'a, K>>,
21    next_seed: fn(u64, u64) -> u64,
22    force_build: bool,
23}
24
25pub type OrdFunc<'a, K> = &'a dyn Fn(&K, &K) -> cmp::Ordering;
26pub type HashFunc<'a, K> = &'a dyn Fn(u64, &K) -> u64;
27
28impl<'a, K> Default for MapBuilder<'a, K> {
29    fn default() -> Self {
30        MapBuilder::new()
31    }
32}
33
34impl<'a, K> MapBuilder<'a, K> {
35    pub fn new() -> Self {
36        MapBuilder {
37            limit: None,
38            seed: None,
39            ord: None,
40            hash: None,
41            force_build: false,
42            next_seed: |init_seed, c| {
43                use std::hash::Hasher;
44
45                let mut hasher = std::collections::hash_map::DefaultHasher::new();
46                hasher.write_u64(init_seed);
47                hasher.write_u64(c);
48                hasher.finish()
49            },
50        }
51    }
52
53    /// Try to construct even if keys is very large
54    pub fn force_build(&mut self, flag: bool) -> &mut Self {
55        self.force_build = flag;
56        self
57    }
58
59    pub fn set_limit(&mut self, limit: Option<u64>) -> &mut Self {
60        self.limit = limit;
61        self
62    }
63
64    pub fn set_seed(&mut self, seed: u64) -> &mut Self {
65        self.seed = Some(seed);
66        self
67    }
68
69    pub fn set_ord(&mut self, f: OrdFunc<'a, K>) -> &mut Self {
70        self.ord = Some(f);
71        self
72    }
73
74    pub fn set_hash(&mut self, f: HashFunc<'a, K>) -> &mut Self {
75        self.hash = Some(f);
76        self
77    }
78
79    pub fn set_next_seed(&mut self, f: fn(u64, u64) -> u64)
80        -> &mut Self
81    {
82        self.next_seed = f;
83        self
84    }
85
86    /// Creates a Map with the specified keys
87    ///
88    /// # NOTE
89    ///
90    /// Note that the keys used must be unique, otherwise the build will not succeed.
91    pub fn build(&self, keys: &[K]) -> Result<MapOutput, BuildFailed> {
92        if keys.len() <= 16 {
93            // For tiny amounts of data, binary search is usually faster.
94            //
95            // At most 4 comparisons will be faster than a high-quality hash.
96            if let Some(output) = build::build_tiny(self, keys) {
97                return Ok(output);
98            }
99        }
100
101        if keys.len() <= 128 {
102            // For small numbers of keys, try to build the smallest and fastest phf.
103            //
104            // This outperforms all other phfs,
105            // but for large numbers of keys, this may not be able to find the seed in a reasonable time.
106            //
107            // If the keys length is greater than 12, it will usually fallback to medium map.
108            if let Some(output) = build::build_small(self, keys) {
109                return Ok(output);
110            }
111        }
112
113        if !self.force_build && keys.len() > 10 * 1024 * 1024 {
114            return Err(BuildFailed("WARN: \
115                We currently don't have good support for large numbers of keys,\
116                and this construction may be slow or not complete in a reasonable time.\
117            "));
118        }
119
120        // A typical PHF, but not optimized for construction time, and no sharding.
121        // 
122        // It is suitable for large amounts of data that need to be embedded in a binary file,
123        // but for data larger than that it is better to use a specialized PHF library.
124        build::build_medium(self, keys)
125    }
126}
127
128#[derive(Debug)]
129pub struct BuildFailed(&'static str);
130
131#[derive(Debug)]
132enum MapKind {
133    Tiny,
134    Small(u64),
135    Medium {
136        seed: u64,
137        pilots: Box<[u8]>,
138        remap: Box<[u32]>,
139    }
140}
141
142impl fmt::Display for BuildFailed {
143    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144        f.write_str(self.0)
145    }
146}
147
148impl std::error::Error for BuildFailed {}
149
150/// Map output
151#[derive(Debug)]
152pub struct MapOutput {
153    kind: MapKind,
154    index: Box<[usize]>
155}