sequence_map/
lib.rs

1// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! This crate implements a map of unsigned 64-bit keys into strings.
16//!
17//! The map is optimized for creating it once, and then reading many times. The struct [Builder] is
18//! used to build the map, and the struct [Map] is used for lookups.
19//!
20//! The special property of the implementation is that it encodes all the data needed for the
21//! lookup in a single sequence of bytes.  This makes it rather interesting for dynamic loading of
22//! data that can then be placed in an operating system's read only memory.  The internal structure
23//! requires no decoding when it is loaded (say from a file).
24//!
25//! The map is internally represented as a trie with each level of the trie being indexed by a
26//! number of bits of the key, starting from the least-significant bit side.  So for example, when
27//! creating the builder with 2 bits, then 2 bits will be chopped off the provided key for each
28//! descent into the trie by one level.
29//!
30//! Example:
31//!
32//! ```rust
33//! use sequence_map::{Builder, Map};
34//!
35//! const BITS: usize = 2;
36//!
37//! let mut builder = Builder::new(BITS);
38//!
39//! builder.insert(42, "Hello!");
40//!
41//! // Note: a second insert under the same key does *not* replace the
42//! // previously inserted key.
43//! builder.insert(42, "Wonderful!");
44//! builder.insert(84, "World!");
45//!
46//! // This is the resulting byte sequence.
47//! let bytes: Vec<u8> = builder.build();
48//!
49//! // Now, look up some keys.
50//! let lookup = Map::new(&bytes);
51//! assert_eq!("Hello!", lookup.get(42).unwrap());
52//! assert_eq!("World!", lookup.get(84).unwrap());
53//! assert!(lookup.get(100).is_none());
54//! ```
55
56use std::ffi;
57use std::mem::size_of;
58use zerocopy::LayoutVerified;
59
60mod cell;
61mod header;
62mod string_slice;
63
64/// A map builder.  Creates a sequence map, allowing the user to insert, repeatedly, a number of
65/// key-value pairs.  Use `Builder::new` to create.
66#[derive(Debug)]
67pub struct Builder {
68    bits: u8,
69    index: Vec<u8>,
70    strings: string_slice::Intern,
71}
72
73impl Builder {
74    /// Creates a new map builder.  `bits` determines how many bits are used
75    /// for each level of the internal trie, min bits is 2, and max is 16.  The
76    /// more bits are used, the faster the lookup, but the larger the resulting
77    /// binary format.
78    pub fn new(bits: usize) -> Builder {
79        assert!(bits >= 2 && bits <= 16);
80        let mut builder = Builder {
81            bits: bits as u8,
82            index: vec![],
83            strings: string_slice::Intern::new(),
84        };
85        builder.reserve_header();
86        builder
87    }
88
89    fn allocate_string(&mut self, s: &str) -> usize {
90        self.strings.add(s)
91    }
92
93    fn header_unchecked(&mut self) -> &mut header::Root {
94        let position = &mut self.index[..];
95        assert!(position.len() >= size_of::<header::Root>());
96        let (root, _): (LayoutVerified<_, header::Root>, _) =
97            LayoutVerified::new_from_prefix(position).expect("header_unchecked");
98        root.into_mut()
99    }
100
101    fn header(&mut self) -> &mut header::Root {
102        let root = self.header_unchecked();
103        assert_eq!(root.htype, header::Type::Root as header::TypeSize);
104        root
105    }
106
107    fn reserve_header(&mut self) {
108        assert_eq!(self.index.len(), 0);
109        self.index
110            .resize(self.index.len() + size_of::<header::Root>(), 0);
111        let root = self.header_unchecked();
112        root.set_type(header::Type::Root);
113        root.set_table_offset(0);
114        root.set_string_offset(0);
115        assert_ne!(self.index.len(), 0);
116    }
117
118    fn append_table(&mut self) -> usize {
119        let index = self.index.len();
120        let entries: usize = 1 << self.bits;
121        let size = size_of::<header::TableHeader>() + entries * size_of::<cell::Instance>();
122        self.index.resize(index + size, 0);
123        {
124            header::TableMut::init(self.bits, &mut self.index[index..index + size]);
125        }
126        index
127    }
128
129    /// Creates the resulting vector of bytes that encodes this sequence map.
130    pub fn build(mut self) -> Vec<u8> {
131        {
132            let len = self.index.len();
133            // This will fail if nothing has been inserted!
134            let root = self.header();
135            root.set_string_offset(len);
136        }
137        let mut result = self.index;
138        let mut strings: Vec<u8> = self.strings.into();
139        result.append(&mut strings);
140        result
141    }
142
143    /// Inserts this `key`-`value` pair into the map.
144    pub fn insert(&mut self, key: u64, value: &str) {
145        let root_table_initialized = {
146            let root = self.header();
147            root.root_table_offset != 0
148        };
149
150        if !root_table_initialized {
151            let index = self.append_table();
152            assert_ne!(index, 0);
153            let root = self.header();
154            root.root_table_offset = index;
155            // Now it is initialized.
156        }
157        let mut remaining_bits = 64;
158        let mut running_key = key;
159        let mut table_index = {
160            let header = self.header();
161            header.root_table_offset
162        };
163        assert_ne!(table_index, 0, "table: {:?}", self.index);
164        loop {
165            if remaining_bits == 0 {
166                break;
167            }
168            let mut table = header::TableMut::overlay_mut(&mut self.index[table_index..]);
169            let index = table.index(running_key);
170            // If it is empty, allocate string and put it here.
171            // If it is already allocated, allocate new table and move string around.
172            // If it is a table pointer, decrement and descend into table.
173            let cell = table.cell_mut(index);
174            let cell_type = cell.get_type();
175            #[allow(unused_variables)]
176            let cell = (); // Release self.
177            match cell_type {
178                cell::Type::Empty => {
179                    let str_index = self.allocate_string(value);
180                    let mut table = header::TableMut::overlay_mut(&mut self.index[table_index..]);
181                    let cell = table.cell_mut(index);
182                    cell.become_string_ptr(str_index, key);
183                    remaining_bits = 0; // exit the loop.
184                }
185                cell::Type::StringPtr => {
186                    // There's already a string here.  We need to replace the reference to that
187                    // string in this cell with a reference to a newly-created table, and place
188                    // that string in its appropriate place in the newly created table.  Once
189                    // that's done, we won't try to insert the new string right away, but instead
190                    // fall through and go through another loop iteration.
191
192                    let (str_index, str_key) = {
193                        let mut table =
194                            header::TableMut::overlay_mut(&mut self.index[table_index..]);
195                        let cell = table.cell_mut(index);
196                        // This is the string that was already here.
197                        cell.string_index_and_key()
198                    };
199
200                    // If it's a double insert, just return.
201                    if str_key == key {
202                        return;
203                    }
204
205                    // Adjust the key of the string which was already there to the same
206                    // number of remaining bits.
207                    assert!(remaining_bits <= 64, "remaining_bits: {}", remaining_bits);
208                    let new_str_key = str_key >> (64 - remaining_bits);
209
210                    // Create a new table to place the old string into.  Once created,
211                    // make a pointer from this cell to the new table.
212                    let new_table_index = self.append_table();
213                    let mut table = header::TableMut::overlay_mut(&mut self.index[table_index..]);
214                    let cell = table.cell_mut(index);
215                    cell.become_table_ptr(new_table_index);
216
217                    // Place the old string into the new table.
218                    let mut new_table =
219                        header::TableMut::overlay_mut(&mut self.index[new_table_index..]);
220                    let new_str_key = new_table.next_key(new_str_key);
221                    let new_cell_index = new_table.index(new_str_key);
222                    let cell = new_table.cell_mut(new_cell_index);
223                    cell.become_string_ptr(str_index, str_key);
224                    // Now that we created the table, repeat this iteration.
225                }
226                cell::Type::TablePtr => {
227                    // The cell is a table pointer.  Follow the table pointer to the next
228                    // table.  Trim the key from the LSB side, reduce the number of bits
229                    // remaining and fall through into the next iteration.
230                    let mut table = header::TableMut::overlay_mut(&mut self.index[table_index..]);
231                    let cell = table.cell_mut(index);
232                    table_index = cell.table_index();
233                    let table = header::TableMut::overlay_mut(&mut self.index[table_index..]);
234                    running_key = table.next_key(running_key);
235                    remaining_bits = table.decrement_bits(remaining_bits);
236                }
237                cell::Type::Unknown => panic!("unknown cell type"),
238            }
239        }
240    }
241}
242
243/// A read-only [Map], backed by a linear buffer.  The contents of that buffer
244/// are expected to have been generated with [Builder].
245pub struct Map<'a> {
246    rep: &'a [u8],
247}
248
249impl<'a> Map<'a> {
250    /// Creates a new [Map], with a representation based on the passed in slice
251    /// `rep`.  The contents of `rep` are opaque.
252    pub fn new(rep: &'a [u8]) -> Map<'a> {
253        Map { rep }
254    }
255
256    /// Looks up `key`, returning the found value in the form of a C string.
257    /// (Because it's possible).
258    pub fn get_cstr(&'a self, key: u64) -> Option<&'a ffi::CStr> {
259        use std::ffi::CStr;
260        use std::os::raw::c_char;
261
262        let (table_index, string_offset) = {
263            let header = self.header();
264            (header.root_table_offset, header.string_offset)
265        };
266        assert!(table_index > 0);
267        let mut remaining_bits = 64;
268        let mut running_key = key;
269        let mut running_table_index = table_index;
270        loop {
271            if remaining_bits == 0 {
272                break;
273            }
274            let table = header::Table::overlay(&self.rep[running_table_index..]);
275            let index = table.index(running_key);
276            let cell = table.cell(index);
277            let cell_type = cell.get_type();
278            match cell_type {
279                cell::Type::Empty => {
280                    return None;
281                }
282                cell::Type::StringPtr => {
283                    let (string_index, string_key) = cell.string_index_and_key();
284                    match key == string_key {
285                        false => return None,
286                        true => {
287                            // Find that string.
288                            let string_index = string_offset + string_index;
289                            let cstr = unsafe {
290                                // We know that the strings in the intern table
291                                // are C strings (UTF-8 with a trailing '/0').
292                                let ptr = self.rep[string_index..].as_ptr() as *const c_char;
293                                CStr::from_ptr(ptr)
294                            };
295                            return Some(cstr);
296                        }
297                    }
298                }
299                cell::Type::TablePtr => {
300                    remaining_bits = table.decrement_bits(remaining_bits);
301                    running_key = table.next_key(running_key);
302                    running_table_index = cell.table_index();
303                    // Descend one level deeper.
304                }
305                cell::Type::Unknown => {
306                    panic!("reached unknown cell");
307                }
308            }
309        }
310        None
311    }
312
313    /// Looks up `key` in the map, returning the found string if possible.
314    pub fn get(&'a self, key: u64) -> Option<&'a str> {
315        self.get_cstr(key)
316            .map(|cstr| cstr.to_str().expect("UTF-8 encoding"))
317    }
318
319    fn header(&'a self) -> &'a header::Root {
320        assert!(self.rep.len() >= size_of::<header::Root>());
321        let (root, _): (LayoutVerified<_, header::Root>, _) =
322            LayoutVerified::new_from_prefix(&self.rep[..]).expect("header check");
323        root.into_ref()
324    }
325}
326
327#[cfg(test)]
328mod tests {
329
330    use super::*;
331    use std::collections::BTreeMap;
332
333    #[test]
334    fn basic() {
335        let mut builder = Builder::new(2);
336        builder.insert(42, "Hello!");
337        builder.insert(84, "World!");
338        let expected: Vec<u8> = vec![
339            1, 0, 0, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 108, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
340            0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 7, 0, 0, 0, 0, 0, 0, 0, 84, 0, 0, 0, 0, 0, 0, 0,
341            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0,
342            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 101, 108, 108,
343            111, 33, 0, 87, 111, 114, 108, 100, 33, 0,
344        ];
345        assert_eq!(expected, builder.build());
346    }
347
348    #[test]
349    fn no_insert() {
350        let builder = Builder::new(2);
351        builder.build();
352    }
353
354    #[test]
355    fn get_one_string() {
356        let mut builder = Builder::new(2);
357        builder.insert(42, "Hello!");
358        let bytes = builder.build();
359        let lookup = Map::new(&bytes);
360        assert_eq!("Hello!", lookup.get(42).unwrap());
361        assert!(lookup.get(100).is_none());
362    }
363
364    #[test]
365    fn double_insert() {
366        let mut builder = Builder::new(2);
367        builder.insert(42, "Hello!");
368        builder.insert(42, "World!");
369        let bytes = builder.build();
370        let lookup = Map::new(&bytes);
371        assert_eq!("Hello!", lookup.get(42).unwrap());
372    }
373
374    #[test]
375    fn get_two_strings() {
376        let mut builder = Builder::new(7);
377        builder.insert(0x11_11_11, "World!");
378        builder.insert(0x22, "Again!!");
379        builder.insert(0x11, "Yadda!");
380        builder.insert(0x11_11, "Diddy!");
381        let bytes = builder.build();
382        // This should not need to be mutable!
383        let lookup = Map::new(&bytes);
384        assert_eq!("Yadda!", lookup.get(0x11).unwrap());
385        assert_eq!("Diddy!", lookup.get(0x11_11).unwrap());
386        assert_eq!("Again!!", lookup.get(0x22).unwrap());
387        assert_eq!("World!", lookup.get(0x11_11_11).unwrap());
388    }
389
390    fn insert_and_lookup_random_strings(bits: usize) {
391        let mut reference_map = BTreeMap::new();
392        let mut builder = Builder::new(bits);
393        for entry in 0..1000 {
394            let entry_str = format!("entry_{}", entry);
395            reference_map.insert(entry, entry_str.clone());
396            builder.insert(entry, &entry_str);
397        }
398
399        let buffer = builder.build();
400        let lookup = Map::new(&buffer);
401        for (key, value) in &reference_map {
402            assert_eq!(
403                lookup.get(*key).unwrap(),
404                *value,
405                "while looking up: key={}, value={}, bits={}",
406                key,
407                value,
408                bits
409            );
410        }
411    }
412
413    #[test]
414    fn test_insert_and_lookup_for_bits() {
415        for bits in 2..16 {
416            insert_and_lookup_random_strings(bits);
417        }
418    }
419}