lasso/lib.rs
1#![cfg_attr(feature = "no-std", no_std)]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3#![cfg_attr(feature = "inline-more", warn(clippy::missing_inline_in_public_items))]
4// `.copied()` was unstable in 1.34
5#![allow(clippy::map_clone)]
6#![deny(
7 missing_docs,
8 unsafe_op_in_unsafe_fn,
9 clippy::missing_safety_doc,
10 missing_debug_implementations
11)]
12
13//! [![CI][1]][0]
14//! [![Security Audit][2]][0]
15//! [![Coverage][3]][4]
16//! [![Docs.rs][6]][7]
17//! [![Crates.io][8]][9]
18//!
19//! A multithreaded and single threaded string interner that allows strings to be cached with a minimal memory footprint,
20//! associating them with a unique [key] that can be used to retrieve them at any time. A [`struct@Rodeo`] allows `O(1)`
21//! internment and resolution and can be turned into a [`struct@RodeoReader`] to allow for contention-free resolutions
22//! with both key to str and str to key operations. It can also be turned into a [`struct@RodeoResolver`] with only
23//! key to str operations for the lowest possible memory usage.
24//!
25//! ## Which interner do I use?
26//!
27//! For single-threaded workloads [`struct@Rodeo`] is encouraged, while multi-threaded applications should use [`struct@ThreadedRodeo`].
28//! Both of these are the only way to intern strings, but most applications will hit a stage where they are done interning
29//! strings, and at that point is where the choice between [`struct@RodeoReader`] and [`struct@RodeoResolver`]. If the user needs to get
30//! keys for strings still, then they must use the [`struct@RodeoReader`] (although they can still transfer into a [`struct@RodeoResolver`])
31//! at this point. For users who just need key to string resolution, the [`struct@RodeoResolver`] gives contention-free access at the
32//! minimum possible memory usage. Note that to gain access to [`struct@ThreadedRodeo`] the `multi-threaded` feature is required.
33//!
34//! | Interner | Thread-safe | Intern String | str to key | key to str | Contention Free | Memory Usage |
35//! |-------------------|:-----------:|:-------------:|:----------:|:----------:|:---------------:|:------------:|
36//! | [`struct@Rodeo`] | ❌ | ✅ | ✅ | ✅ | N/A | Medium |
37//! | [`struct@ThreadedRodeo`] | ✅ | ✅ | ✅ | ✅ | ❌ | Most |
38//! | [`struct@RodeoReader`] | ✅ | ❌ | ✅ | ✅ | ✅ | Medium |
39//! | [`struct@RodeoResolver`] | ✅ | ❌ | ❌ | ✅ | ✅ | Least |
40//!
41//! ## Cargo Features
42//!
43//! By default `lasso` has one dependency, `hashbrown`, and only [`struct@Rodeo`] is exposed. Hashbrown is used since the
44//! [`raw_entry` api] is currently unstable in the standard library's hashmap.
45//! The raw hashmap API is used for custom hashing within the hashmaps, which works to dramatically reduce memory usage
46//! To make use of [`struct@ThreadedRodeo`], you must enable the `multi-threaded` feature.
47//!
48//! * `multi-threaded` - Enables [`struct@ThreadedRodeo`], the interner for multi-threaded tasks
49//! * `ahasher` - Use [`ahash`]'s `RandomState` as the default hasher
50//! * `no-std` - Enables `no_std` + `alloc` support for [`struct@Rodeo`] and [`struct@ThreadedRodeo`]
51//! * Automatically enables the following required features:
52//! * `ahasher` - `no_std` hashing function
53//! * `serialize` - Implements `Serialize` and `Deserialize` for all [`struct@Spur`] types and all interners
54//! * `inline-more` - Annotate external apis with `#[inline]`
55//!
56//! ## Example: Using Rodeo
57//!
58//! ```rust
59//! use lasso::Rodeo;
60//!
61//! let mut rodeo = Rodeo::default();
62//! let key = rodeo.get_or_intern("Hello, world!");
63//!
64//! // Easily retrieve the value of a key and find the key for values
65//! assert_eq!("Hello, world!", rodeo.resolve(&key));
66//! assert_eq!(Some(key), rodeo.get("Hello, world!"));
67//!
68//! // Interning the same string again will yield the same key
69//! let key2 = rodeo.get_or_intern("Hello, world!");
70//!
71//! assert_eq!(key, key2);
72//! ```
73//!
74//! ## Example: Using ThreadedRodeo
75//!
76//! ```rust
77//! # // This keeps this doctest from running under miri since
78//! # // miri doesn't support threading
79//! # #[cfg(not(miri))]
80//! # {
81//! #
82//! # // This is hacky to the extreme, but it prevents failure of this doc test when
83//! # // run with `--no-default-features`
84//! #
85//! # #[cfg(not(feature = "multi-threaded"))]
86//! # #[derive(Default)]
87//! # struct ThreadedRodeo;
88//! #
89//! # #[cfg(not(feature = "multi-threaded"))]
90//! # impl ThreadedRodeo {
91//! # fn get_or_intern(&self, string: &'static str) -> i32 {
92//! # match string {
93//! # "Hello, world!" => 0,
94//! # "Hello from the thread!" => 1,
95//! # _ => unreachable!("Update the docs, dude"),
96//! # }
97//! # }
98//! #
99//! # fn get(&self, string: &'static str) -> Option<i32> {
100//! # match string {
101//! # "Hello, world!" => Some(0),
102//! # "Hello from the thread!" => Some(1),
103//! # _ => unreachable!("Update the docs, dude"),
104//! # }
105//! # }
106//! #
107//! # fn resolve(&self, id: &i32) -> &'static str {
108//! # match *id {
109//! # 0 => "Hello, world!",
110//! # 1 => "Hello from the thread!",
111//! # _ => unreachable!("Update the docs, dude"),
112//! # }
113//! # }
114//! # }
115//! #
116//! # #[cfg(feature = "multi-threaded")]
117//! use lasso::ThreadedRodeo;
118//! use std::{thread, sync::Arc};
119//!
120//! let rodeo = Arc::new(ThreadedRodeo::default());
121//! let key = rodeo.get_or_intern("Hello, world!");
122//!
123//! // Easily retrieve the value of a key and find the key for values
124//! assert_eq!("Hello, world!", rodeo.resolve(&key));
125//! assert_eq!(Some(key), rodeo.get("Hello, world!"));
126//!
127//! // Interning the same string again will yield the same key
128//! let key2 = rodeo.get_or_intern("Hello, world!");
129//!
130//! assert_eq!(key, key2);
131//!
132//! // ThreadedRodeo can be shared across threads
133//! let moved = Arc::clone(&rodeo);
134//! let hello = thread::spawn(move || {
135//! assert_eq!("Hello, world!", moved.resolve(&key));
136//! moved.get_or_intern("Hello from the thread!")
137//! })
138//! .join()
139//! .unwrap();
140//!
141//! assert_eq!("Hello, world!", rodeo.resolve(&key));
142//! assert_eq!("Hello from the thread!", rodeo.resolve(&hello));
143//! # }
144//! ```
145//!
146//! ## Example: Creating a RodeoReader
147//!
148//! ```rust
149//! use lasso::Rodeo;
150//!
151//! // Rodeo and ThreadedRodeo are interchangeable here
152//! let mut rodeo = Rodeo::default();
153//!
154//! let key = rodeo.get_or_intern("Hello, world!");
155//! assert_eq!("Hello, world!", rodeo.resolve(&key));
156//!
157//! let reader = rodeo.into_reader();
158//!
159//! // Reader keeps all the strings from the parent
160//! assert_eq!("Hello, world!", reader.resolve(&key));
161//! assert_eq!(Some(key), reader.get("Hello, world!"));
162//!
163//! // The Reader can now be shared across threads, no matter what kind of Rodeo created it
164//! ```
165//!
166//! ## Example: Creating a RodeoResolver
167//!
168//! ```rust
169//! use lasso::Rodeo;
170//!
171//! // Rodeo and ThreadedRodeo are interchangeable here
172//! let mut rodeo = Rodeo::default();
173//!
174//! let key = rodeo.get_or_intern("Hello, world!");
175//! assert_eq!("Hello, world!", rodeo.resolve(&key));
176//!
177//! let resolver = rodeo.into_resolver();
178//!
179//! // Resolver keeps all the strings from the parent
180//! assert_eq!("Hello, world!", resolver.resolve(&key));
181//!
182//! // The Resolver can now be shared across threads, no matter what kind of Rodeo created it
183//! ```
184//!
185//! ## Example: Making a custom-ranged key
186//!
187//! Sometimes you want your keys to only inhabit (or *not* inhabit) a certain range of values so that you can have custom [niches],
188//! meaning that an [`enum@Option`]`<`[`struct@Spur`]`>` is the same size as a [`struct@Spur`]. This allows you to pack more data into
189//! what would otherwise be unused space, which can be critical for memory-sensitive applications.
190//!
191//! ```rust
192//! use lasso::{Key, Rodeo};
193//!
194//! // First make our key type, this will be what we use as handles into our interner
195//! #[derive(Copy, Clone, PartialEq, Eq)]
196//! struct NicheKey(u32);
197//!
198//! // This will reserve the upper 255 values for us to use as niches
199//! const NICHE: usize = 0xFF000000;
200//!
201//! // Implementing `Key` is unsafe and requires that anything given to `try_from_usize` must produce the
202//! // same `usize` when `into_usize` is later called
203//! unsafe impl Key for NicheKey {
204//! fn into_usize(self) -> usize {
205//! self.0 as usize
206//! }
207//!
208//! fn try_from_usize(int: usize) -> Option<Self> {
209//! if int < NICHE {
210//! // The value isn't in our niche range, so we're good to go
211//! Some(Self(int as u32))
212//! } else {
213//! // The value interferes with our niche, so we return `None`
214//! None
215//! }
216//! }
217//! }
218//!
219//! // To make sure we're upholding `Key`'s safety contract, let's make two small tests
220//! #[test]
221//! # fn a() {}
222//! fn value_in_range() {
223//! let key = NicheKey::try_from_usize(0).unwrap();
224//! assert_eq!(key.into_usize(), 0);
225//!
226//! let key = NicheKey::try_from_usize(NICHE - 1).unwrap();
227//! assert_eq!(key.into_usize(), NICHE - 1);
228//! }
229//! # value_in_range();
230//!
231//! #[test]
232//! # fn b() {}
233//! fn value_out_of_range() {
234//! let key = NicheKey::try_from_usize(NICHE);
235//! assert!(key.is_none());
236//!
237//! let key = NicheKey::try_from_usize(u32::max_value() as usize);
238//! assert!(key.is_none());
239//! }
240//! # value_out_of_range();
241//!
242//! // And now we're done and can make `Rodeo`s or `ThreadedRodeo`s that use our custom key!
243//! let mut rodeo: Rodeo<NicheKey> = Rodeo::new();
244//! let key = rodeo.get_or_intern("It works!");
245//! assert_eq!(rodeo.resolve(&key), "It works!");
246//! ```
247//!
248//! ## Example: Creation using `FromIterator`
249//!
250//! ```rust
251//! use lasso::Rodeo;
252//! use core::iter::FromIterator;
253//!
254//! // Works for both `Rodeo` and `ThreadedRodeo`
255//! let rodeo: Rodeo = vec!["one string", "two string", "red string", "blue string"]
256//! .into_iter()
257//! .collect();
258//!
259//! assert!(rodeo.contains("one string"));
260//! assert!(rodeo.contains("two string"));
261//! assert!(rodeo.contains("red string"));
262//! assert!(rodeo.contains("blue string"));
263//! ```
264//!
265//! ```rust
266//! use lasso::Rodeo;
267//! use core::iter::FromIterator;
268//!
269//! // Works for both `Rodeo` and `ThreadedRodeo`
270//! let rodeo: Rodeo = Rodeo::from_iter(vec![
271//! "one string",
272//! "two string",
273//! "red string",
274//! "blue string",
275//! ]);
276//!
277//! assert!(rodeo.contains("one string"));
278//! assert!(rodeo.contains("two string"));
279//! assert!(rodeo.contains("red string"));
280//! assert!(rodeo.contains("blue string"));
281//! ```
282//!
283//! ## Benchmarks
284//!
285//! Benchmarks were gathered with [Criterion.rs](https://github.com/bheisler/criterion.rs)
286//! OS: Windows 10
287//! CPU: Ryzen 9 3900X at 3800Mhz
288//! RAM: 3200Mhz
289//! Rustc: Stable 1.44.1
290//!
291//! ### Rodeo
292//!
293//! #### STD RandomState
294//!
295//! | Method | Time | Throughput |
296//! |:-----------------------------|:---------:|:------------:|
297//! | `resolve` | 1.9251 μs | 13.285 GiB/s |
298//! | `try_resolve` | 1.9214 μs | 13.311 GiB/s |
299//! | `resolve_unchecked` | 1.4356 μs | 17.816 GiB/s |
300//! | `get_or_intern` (empty) | 60.350 μs | 433.96 MiB/s |
301//! | `get_or_intern` (filled) | 57.415 μs | 456.15 MiB/s |
302//! | `try_get_or_intern` (empty) | 58.978 μs | 444.06 MiB/s |
303//! | `try_get_or_intern (filled)` | 57.421 μs | 456.10 MiB/s |
304//! | `get` (empty) | 37.288 μs | 702.37 MiB/s |
305//! | `get` (filled) | 55.095 μs | 475.36 MiB/s |
306//!
307//! #### AHash
308//!
309//! | Method | Time | Throughput |
310//! |:-----------------------------|:---------:|:------------:|
311//! | `try_resolve` | 1.9282 μs | 13.264 GiB/s |
312//! | `resolve` | 1.9404 μs | 13.181 GiB/s |
313//! | `resolve_unchecked` | 1.4328 μs | 17.851 GiB/s |
314//! | `get_or_intern` (empty) | 38.029 μs | 688.68 MiB/s |
315//! | `get_or_intern` (filled) | 33.650 μs | 778.30 MiB/s |
316//! | `try_get_or_intern` (empty) | 39.392 μs | 664.84 MiB/s |
317//! | `try_get_or_intern (filled)` | 33.435 μs | 783.31 MiB/s |
318//! | `get` (empty) | 12.565 μs | 2.0356 GiB/s |
319//! | `get` (filled) | 26.545 μs | 986.61 MiB/s |
320//!
321//! #### FXHash
322//!
323//! | Method | Time | Throughput |
324//! |:-----------------------------|:---------:|:------------:|
325//! | `resolve` | 1.9014 μs | 13.451 GiB/s |
326//! | `try_resolve` | 1.9278 μs | 13.267 GiB/s |
327//! | `resolve_unchecked` | 1.4449 μs | 17.701 GiB/s |
328//! | `get_or_intern` (empty) | 32.523 μs | 805.27 MiB/s |
329//! | `get_or_intern` (filled) | 30.281 μs | 864.88 MiB/s |
330//! | `try_get_or_intern` (empty) | 31.630 μs | 828.00 MiB/s |
331//! | `try_get_or_intern (filled)` | 31.002 μs | 844.78 MiB/s |
332//! | `get` (empty) | 12.699 μs | 2.0141 GiB/s |
333//! | `get` (filled) | 29.220 μs | 896.28 MiB/s |
334//!
335//! ### ThreadedRodeo
336//!
337//! #### STD RandomState
338//!
339//! | Method | Time (1 Thread) | Throughput (1 Thread) | Time (24 Threads) | Throughput (24 Threads) |
340//! |:-----------------------------|:---------------:|:---------------------:|:-----------------:|:-----------------------:|
341//! | `resolve` | 54.336 μs | 482.00 MiB/s | 364.27 μs | 71.897 MiB/s |
342//! | `try_resolve` | 54.582 μs | 479.82 MiB/s | 352.67 μs | 74.261 MiB/s |
343//! | `get_or_intern` (empty) | 266.03 μs | 98.447 MiB/s | N\A | N\A |
344//! | `get_or_intern` (filled) | 103.04 μs | 254.17 MiB/s | 441.42 μs | 59.331 MiB/s |
345//! | `try_get_or_intern` (empty) | 261.80 μs | 100.04 MiB/s | N\A | N\A |
346//! | `try_get_or_intern` (filled) | 102.61 μs | 255.25 MiB/s | 447.42 μs | 58.535 MiB/s |
347//! | `get` (empty) | 80.346 μs | 325.96 MiB/s | N\A | N\A |
348//! | `get` (filled) | 92.669 μs | 282.62 MiB/s | 439.24 μs | 59.626 MiB/s |
349//!
350//! #### AHash
351//!
352//! | Method | Time (1 Thread) | Throughput (1 Thread) | Time (24 Threads) | Throughput (24 Threads) |
353//! |:-----------------------------|:---------------:|:---------------------:|:-----------------:|:-----------------------:|
354//! | `resolve` | 22.261 μs | 1.1489 GiB/s | 265.46 μs | 98.658 MiB/s |
355//! | `try_resolve` | 22.378 μs | 1.1429 GiB/s | 268.58 μs | 97.513 MiB/s |
356//! | `get_or_intern` (empty) | 157.86 μs | 165.91 MiB/s | N\A | N\A |
357//! | `get_or_intern` (filled) | 56.320 μs | 465.02 MiB/s | 357.13 μs | 73.335 MiB/s |
358//! | `try_get_or_intern` (empty) | 161.46 μs | 162.21 MiB/s | N\A | N\A |
359//! | `try_get_or_intern` (filled) | 55.874 μs | 468.73 MiB/s | 360.25 μs | 72.698 MiB/s |
360//! | `get` (empty) | 43.520 μs | 601.79 MiB/s | N\A | N\A |
361//! | `get` (filled) | 53.720 μs | 487.52 MiB/s | 360.66 μs | 72.616 MiB/s |
362//!
363//! #### FXHash
364//!
365//! | Method | Time (1 Thread) | Throughput (1 Thread) | Time (24 Threads) | Throughput (24 Threads) |
366//! |:-----------------------------|:---------------:|:---------------------:|:-----------------:|:-----------------------:|
367//! | `try_resolve` | 17.289 μs | 1.4794 GiB/s | 238.29 μs | 109.91 MiB/s |
368//! | `resolve` | 19.833 μs | 1.2896 GiB/s | 237.05 μs | 110.48 MiB/s |
369//! | `get_or_intern` (empty) | 130.97 μs | 199.97 MiB/s | N\A | N\A |
370//! | `get_or_intern` (filled) | 42.630 μs | 614.35 MiB/s | 301.60 μs | 86.837 MiB/s |
371//! | `try_get_or_intern` (empty) | 129.30 μs | 202.55 MiB/s | N\A | N\A |
372//! | `try_get_or_intern` (filled) | 42.508 μs | 616.12 MiB/s | 337.29 μs | 77.648 MiB/s |
373//! | `get` (empty) | 28.001 μs | 935.30 MiB/s | N\A | N\A |
374//! | `get` (filled) | 37.700 μs | 694.68 MiB/s | 292.15 μs | 89.645 MiB/s |
375//!
376//! ### RodeoReader
377//!
378//! #### STD RandomState
379//!
380//! | Method | Time (1 Thread) | Throughput (1 Thread) | Time (24 Threads) | Throughput (24 Threads) |
381//! |:--------------------|:---------------:|:---------------------:|:-----------------:|:------------------------:|
382//! | `resolve` | 1.9398 μs | 13.185 GiB/s | 4.3153 μs | 5.9269 GiB/s |
383//! | `try_resolve` | 1.9315 μs | 13.242 GiB/s | 4.1956 μs | 6.0959 GiB/s |
384//! | `resolve_unchecked` | 1.4416 μs | 17.741 GiB/s | 3.1204 μs | 8.1964 GiB/s |
385//! | `get` (empty) | 38.886 μs | 673.50 MiB/s | N\A | N\A |
386//! | `get` (filled) | 56.271 μs | 465.42 MiB/s | 105.12 μs | 249.14 MiB/s |
387//!
388//! #### AHash
389//!
390//! | Method | Time (1 Thread) | Throughput (1 Thread) | Time (24 Threads) | Throughput (24 Threads) |
391//! |:--------------------|:---------------:|:---------------------:|:-----------------:|:-----------------------:|
392//! | `resolve` | 1.9404 μs | 13.181 GiB/s | 4.1881 μs | 6.1069 GiB/s |
393//! | `try_resolve` | 1.8932 μs | 13.509 GiB/s | 4.2410 μs | 6.0306 GiB/s |
394//! | `resolve_unchecked` | 1.4128 μs | 18.103 GiB/s | 3.1691 μs | 8.0703 GiB/s |
395//! | `get` (empty) | 11.952 μs | 2.1399 GiB/s | N\A | N\A |
396//! | `get` (filled) | 27.093 μs | 966.65 MiB/s | 56.269 μs | 465.44 MiB/s |
397//!
398//! #### FXHash
399//!
400//! | Method | Time (1 Thread) | Throughput (1 Thread) | Time (24 Threads) | Throughput (24 Threads) |
401//! |:--------------------|:---------------:|:---------------------:|:-----------------:|:-----------------------:|
402//! | `resolve` | 1.8987 μs | 13.471 GiB/s | 4.2117 μs | 6.0727 GiB/s |
403//! | `try_resolve` | 1.9103 μs | 13.389 GiB/s | 4.2254 μs | 6.0529 GiB/s |
404//! | `resolve_unchecked` | 1.4469 μs | 17.677 GiB/s | 3.0923 μs | 8.2709 GiB/s |
405//! | `get` (empty) | 12.994 μs | 1.9682 GiB/s | N\A | N\A |
406//! | `get` (filled) | 29.745 μs | 880.49 MiB/s | 52.387 μs | 499.93 MiB/s |
407//!
408//! ### RodeoResolver
409//!
410//! | Method | Time (1 Thread) | Throughput (1 Thread) | Time (24 Threads) | Throughput (24 Threads) |
411//! |:--------------------|:---------------:|:---------------------:|:-----------------:|:-----------------------:|
412//! | `resolve` | 1.9416 μs | 13.172 GiB/s | 3.9114 μs | 6.5387 GiB/s |
413//! | `try_resolve` | 1.9264 μs | 13.277 GiB/s | 3.9289 μs | 6.5097 GiB/s |
414//! | `resolve_unchecked` | 1.6638 μs | 15.372 GiB/s | 3.1741 μs | 8.0578 GiB/s |
415//!
416//! [0]: https://github.com/Kixiron/lasso
417//! [1]: https://github.com/Kixiron/lasso/workflows/CI/badge.svg
418//! [2]: https://github.com/Kixiron/lasso/workflows/Security%20Audit/badge.svg
419//! [3]: https://coveralls.io/repos/github/Kixiron/lasso/badge.svg?branch=master
420//! [4]: https://coveralls.io/github/Kixiron/lasso?branch=master
421//! [6]: https://docs.rs/lasso/badge.svg
422//! [7]: https://docs.rs/lasso
423//! [8]: https://img.shields.io/crates/v/lasso.svg
424//! [9]: https://crates.io/crates/lasso
425//! [key]: crate::Key
426//! [niches]: https://rust-lang.github.io/unsafe-code-guidelines/layout/enums.html#discriminant-elision-on-option-like-enums
427//! [`hashbrown`]: https://crates.io/crates/hashbrown
428//! [`ahash`]: https://crates.io/crates/ahash
429//! [`raw_entry` api]: https://github.com/rust-lang/rust/issues/56167
430
431extern crate alloc;
432
433#[macro_use]
434mod util;
435mod arenas;
436mod interface;
437mod keys;
438mod reader;
439mod resolver;
440mod rodeo;
441
442pub use interface::{Interner, IntoReader, IntoReaderAndResolver, IntoResolver, Reader, Resolver};
443pub use keys::{Key, LargeSpur, MicroSpur, MiniSpur, Spur};
444pub use reader::RodeoReader;
445pub use resolver::RodeoResolver;
446pub use rodeo::Rodeo;
447pub use util::{Capacity, Iter, LassoError, LassoErrorKind, LassoResult, MemoryLimits, Strings};
448
449compile! {
450 if #[all(feature = "multi-threaded", not(feature = "no-std"))] {
451 mod threaded_rodeo;
452
453 pub use threaded_rodeo::ThreadedRodeo;
454
455 // If the `multi-threaded` and `no-std` features are both active
456 } else if #[all(feature = "multi-threaded", feature = "no-std")] {
457 compile_error!("The `multi-threaded` and `no-std` features are not supported together");
458 }
459}
460
461#[doc(hidden)]
462mod hasher {
463 compile! {
464 if #[feature = "ahasher"] {
465 pub use ahash::RandomState;
466 } else {
467 pub use std::collections::hash_map::RandomState;
468 }
469 }
470}
471
472// TODO: No-alloc interner