strmap/
strmap.rs

1use std::fmt;
2use std::ops::Deref;
3
4use crate::piece::MapPiece;
5use crate::StrMapConfig;
6
7type Result<T, E = fst::Error> = std::result::Result<T, E>;
8type InsertResult<R> = std::result::Result<R, InsertDuplicateError>;
9
10/// A map from string to T.
11pub struct StrMap<T> {
12    // Pieces sorted by capacity.
13    //
14    // We don't allow multiple pieces with the same value of
15    // `piece.capacity().next_power_of_two()`
16    pieces: Vec<MapPiece<T>>,
17    mini_piece: Vec<(Box<[u8]>, T)>,
18    should_rebalance: bool,
19}
20
21impl<T> StrMap<T> {
22    pub fn empty() -> Self {
23        Self {
24            pieces: Vec::new(),
25            mini_piece: Vec::with_capacity(128),
26            should_rebalance: false,
27        }
28    }
29
30    pub fn build(keys: &[&[u8]], vals: Vec<T>, opts: &StrMapConfig) -> Result<Self> {
31        let piece = MapPiece::build(keys, vals, opts)?;
32
33        Ok(Self {
34            pieces: vec![piece],
35            mini_piece: Vec::with_capacity(128),
36            should_rebalance: false,
37        })
38    }
39
40    pub fn len(&self) -> usize {
41        let mut len = self.mini_piece.len();
42        for piece in &self.pieces {
43            len += piece.len();
44        }
45        len
46    }
47
48    pub fn insert(&mut self, key: &[u8], value: T) -> InsertResult<()> {
49        if self.has_key(key) {
50            Err(InsertDuplicateError::new(key))
51        } else {
52            self.mini_piece
53                .push((key.to_vec().into_boxed_slice(), value));
54            if self.mini_piece.len() >= 1024 {
55                self.should_rebalance = true;
56            }
57            Ok(())
58        }
59    }
60
61    pub fn insert_many(&mut self, keys: &[&[u8]], vals: Vec<T>, opts: &StrMapConfig) -> Result<()> {
62        let mut keys_dupe = std::collections::HashSet::new();
63
64        for key in keys {
65            if self.has_key(key) || !keys_dupe.insert(key) {
66                return Err(InsertDuplicateError::new(key).into_fst());
67            }
68        }
69
70        drop(keys_dupe);
71        self.insert_many_unchecked(keys, vals, opts)
72    }
73
74    /// Assumes that `keys` have been checked for duplicates.
75    pub(crate) fn insert_many_unchecked(
76        &mut self,
77        keys: &[&[u8]],
78        vals: Vec<T>,
79        opts: &StrMapConfig,
80    ) -> Result<()> {
81        assert_eq!(keys.len(), vals.len());
82
83        if keys.len() < 128 {
84            if self.mini_piece.len() >= 2048 {
85                let len = keys.len() + self.mini_piece.len();
86                let mut keys2: Vec<&[u8]> = Vec::with_capacity(len);
87                keys2.extend_from_slice(keys);
88
89                let mut vals2 = vals;
90                let mut keys3 = Vec::with_capacity(self.mini_piece.len());
91                for (key, val) in self.mini_piece.drain(..) {
92                    keys3.push(key);
93                    vals2.push(val);
94                }
95                keys2.extend(keys3.iter().map(|slice| &**slice));
96
97                return self.insert_many_unchecked(&keys2, vals2, opts);
98            } else {
99                for (key, val) in keys.iter().copied().zip(vals) {
100                    self.mini_piece.push((key.to_vec().into_boxed_slice(), val));
101                }
102                if self.mini_piece.len() >= 1024 {
103                    self.should_rebalance = true;
104                }
105                return Ok(());
106            }
107        }
108
109        let piece = MapPiece::build(keys, vals, opts)?;
110
111        let p2 = piece.capacity().next_power_of_two();
112        for i in 0..self.pieces.len() {
113            if self.pieces[i].capacity() == p2 {
114                self.should_rebalance = true;
115                break;
116            }
117        }
118        self.pieces.push(piece);
119        self.pieces.sort_unstable_by_key(|p| p.capacity());
120        Ok(())
121    }
122
123    pub fn first(&self) -> Option<(Vec<u8>, &T)> {
124        if let Some(val) = self.get(b"") {
125            return Some((Vec::new(), val));
126        }
127
128        self.next(b"")
129    }
130
131    pub fn next(&self, curr: &[u8]) -> Option<(Vec<u8>, &T)> {
132        fn update_vec(vec: &mut Vec<u8>, new_data: &[u8]) {
133            vec.clear();
134            vec.extend_from_slice(new_data);
135        }
136
137        let mut next: Option<(Vec<u8>, &T)> = None;
138
139        for (key, val) in &self.mini_piece {
140            if curr < key {
141                match next.as_mut() {
142                    Some((next, nval)) => {
143                        if &**key < &**next {
144                            update_vec(next, key);
145                            *nval = val;
146                        }
147                    }
148                    None => {
149                        next = Some((key.to_vec(), val));
150                    }
151                }
152            }
153        }
154
155        for piece in &self.pieces {
156            piece.with_next(curr, |key, val| match next.as_mut() {
157                Some((next, nval)) => {
158                    if key < &**next {
159                        update_vec(next, key);
160                        *nval = val;
161                    }
162                }
163                None => {
164                    next = Some((key.to_vec(), val));
165                }
166            });
167        }
168
169        next
170    }
171
172    pub fn should_rebalance(&self) -> bool {
173        self.should_rebalance
174    }
175
176    pub fn rebalance(&mut self, opts: &StrMapConfig) -> Result<()> {
177        self.should_rebalance = false;
178        if self.mini_piece.len() > 64 {
179            let mut keys = Vec::with_capacity(self.mini_piece.len());
180            let mut vals = Vec::with_capacity(self.mini_piece.len());
181            for (key, val) in self.mini_piece.drain(..) {
182                keys.push(key);
183                vals.push(val);
184            }
185            let key_slices: Vec<&[u8]> = keys.iter().map(|k| k.deref()).collect();
186
187            let new_piece = MapPiece::build(&key_slices, vals, opts)?;
188            self.pieces.insert(0, new_piece);
189        }
190
191        // If any pieces are too empty, rebuild them.
192        for piece in self.pieces.iter_mut() {
193            if 10 * piece.len() < 8 * piece.capacity() {
194                let mut piece_builder = crate::piece::Builder::new(opts)?;
195                let mut drainer = piece.drain();
196                while let Some((key, val)) = drainer.next() {
197                    piece_builder.insert(key, val)?;
198                }
199                *piece = piece_builder.finish()?;
200            }
201        }
202
203        // Merge pieces in the same size class.
204        'outer: loop {
205            self.pieces.sort_unstable_by_key(|p| p.capacity());
206
207            for i in 1..self.pieces.len() {
208                let p1 = &self.pieces[i - 1];
209                let p2 = &self.pieces[i];
210                if p1.capacity().next_power_of_two() == p2.capacity().next_power_of_two() {
211                    let p1 = self.pieces.swap_remove(i);
212                    let p2 = self.pieces.swap_remove(i - 1);
213                    self.pieces.push(crate::piece::merge_pieces(p1, p2, opts)?);
214
215                    continue 'outer;
216                }
217            }
218
219            return Ok(());
220        }
221    }
222
223    pub fn has_key(&self, key: &[u8]) -> bool {
224        for entry in &self.mini_piece {
225            if entry.0.deref() == key {
226                return true;
227            }
228        }
229
230        for piece in &self.pieces {
231            if piece.has_key(key) {
232                return true;
233            }
234        }
235
236        false
237    }
238
239    pub fn get(&self, key: &[u8]) -> Option<&T> {
240        for entry in &self.mini_piece {
241            if entry.0.deref() == key {
242                return Some(&entry.1);
243            }
244        }
245
246        for piece in &self.pieces {
247            let res = piece.get(key);
248            if res.is_some() {
249                return res;
250            }
251        }
252
253        None
254    }
255
256    pub fn get_mut(&mut self, key: &[u8]) -> Option<&mut T> {
257        for entry in &mut self.mini_piece {
258            if entry.0.deref() == key {
259                return Some(&mut entry.1);
260            }
261        }
262
263        for piece in &mut self.pieces {
264            let res = piece.get_mut(key);
265            if res.is_some() {
266                return res;
267            }
268        }
269
270        None
271    }
272
273    pub fn delete(&mut self, key: &[u8]) -> bool {
274        for (i, entry) in self.mini_piece.iter().enumerate() {
275            if entry.0.deref() == key {
276                self.mini_piece.swap_remove(i);
277                return true;
278            }
279        }
280
281        for piece in &mut self.pieces {
282            if piece.delete(key) {
283                if 10 * piece.len() < 8 * piece.capacity() {
284                    self.should_rebalance = true;
285                }
286
287                return true;
288            }
289        }
290
291        false
292    }
293}
294
295#[derive(Debug)]
296pub struct InsertDuplicateError {
297    key: Box<[u8]>,
298}
299
300impl InsertDuplicateError {
301    fn new(key: &[u8]) -> Self {
302        Self {
303            key: key.to_vec().into_boxed_slice(),
304        }
305    }
306
307    pub fn key(&self) -> &[u8] {
308        &self.key
309    }
310
311    fn into_io(self) -> std::io::Error {
312        std::io::Error::new(std::io::ErrorKind::AlreadyExists, self)
313    }
314
315    fn into_fst(self) -> fst::Error {
316        self.into_io().into()
317    }
318}
319
320impl std::error::Error for InsertDuplicateError {}
321
322impl fmt::Display for InsertDuplicateError {
323    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
324        let key = String::from_utf8_lossy(&self.key);
325        write!(f, "trying to insert at existing key \"{}\"", key)
326    }
327}