Skip to main content

gentoo_interner/
lib.rs

1//! String interning for Gentoo-related crates.
2//!
3//! Provides a flexible interning system for reducing memory usage when
4//! processing large numbers of repeated strings.
5//!
6//! # Components
7//!
8//! - [`Interner`]: Trait for interning strings into compact keys
9//! - [`Interned<I>`]: An interned string key parameterized by interner type
10//! - [`DefaultInterner`]: Default interner based on feature flags
11//!
12//! # Features
13//!
14//! | Feature | DefaultInterner | Key Type | Behavior |
15//! |---------|-----------------|----------|----------|
16//! | `interner` (default) | `GlobalInterner` | `u32` | Process-global deduplication, `Copy` |
17//! | no `interner` | `NoInterner` | `Box<str>` | No deduplication, `Clone` only |
18//!
19//! # Example
20//!
21//! ```
22//! use gentoo_interner::{Interned, DefaultInterner};
23//!
24//! let interned = Interned::<DefaultInterner>::intern("amd64");
25//! assert_eq!(interned.resolve(), "amd64");
26//! ```
27
28use std::fmt::Debug;
29use std::marker::PhantomData;
30
31/// Trait for interning strings into compact keys.
32///
33/// Implementations map strings to keys and resolve keys back to strings.
34/// All methods are static, allowing the interner type to serve as a
35/// configuration parameter without carrying runtime state.
36pub trait Interner: Send + Sync + 'static {
37    /// Key type returned by [`get_or_intern`](Self::get_or_intern).
38    type Key: Clone + Eq + std::hash::Hash + Send + Sync + 'static + Debug;
39
40    /// Intern `s`, returning a stable key.
41    fn get_or_intern(s: &str) -> Self::Key;
42
43    /// Resolve `key` back to its original string.
44    fn resolve(key: &Self::Key) -> &str;
45}
46
47/// Non-interning fallback that allocates each string as a `Box<str>`.
48///
49/// No deduplication occurs. The [`Key`](Interner::Key) type is `Box<str>`,
50/// making `Interned<NoInterner>` `Clone` but not `Copy`.
51#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
52pub struct NoInterner;
53
54impl Interner for NoInterner {
55    type Key = Box<str>;
56
57    fn get_or_intern(s: &str) -> Box<str> {
58        Box::from(s)
59    }
60
61    fn resolve(key: &Box<str>) -> &str {
62        key
63    }
64}
65
66/// Global process-wide [`Interner`] backed by `lasso::ThreadedRodeo`.
67///
68/// Zero-sized type; all state lives in a process-wide static.
69/// Keys are stable `u32` values, making `Interned<GlobalInterner>` `Copy`.
70#[cfg(feature = "interner")]
71#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
72pub struct GlobalInterner;
73
74#[cfg(feature = "interner")]
75static GLOBAL: std::sync::OnceLock<lasso::ThreadedRodeo> = std::sync::OnceLock::new();
76
77#[cfg(feature = "interner")]
78fn global() -> &'static lasso::ThreadedRodeo {
79    GLOBAL.get_or_init(lasso::ThreadedRodeo::default)
80}
81
82#[cfg(feature = "interner")]
83impl Interner for GlobalInterner {
84    type Key = u32;
85
86    fn get_or_intern(s: &str) -> u32 {
87        use lasso::Key as _;
88        global().get_or_intern(s).into_usize() as u32
89    }
90
91    fn resolve(key: &u32) -> &str {
92        use lasso::Key as _;
93        let spur = lasso::Spur::try_from_usize(*key as usize).expect("invalid interner key");
94        global().resolve(&spur)
95    }
96}
97
98/// Default interner type based on feature configuration.
99///
100/// - With `interner` feature (default): [`GlobalInterner`]
101/// - Without `interner` feature: [`NoInterner`]
102#[cfg(feature = "interner")]
103pub type DefaultInterner = GlobalInterner;
104#[cfg(not(feature = "interner"))]
105pub type DefaultInterner = NoInterner;
106
107/// An interned string key parameterized by [`Interner`] type `I`.
108///
109/// With [`GlobalInterner`], this is 4 bytes and `Copy`.
110/// With [`NoInterner`], this is a pointer and `Clone` only.
111///
112/// Serde support serializes as the string value and deserializes via interning.
113pub struct Interned<I: Interner> {
114    key: <I as Interner>::Key,
115    _marker: PhantomData<I>,
116}
117
118impl<I: Interner> Clone for Interned<I> {
119    fn clone(&self) -> Self {
120        Self {
121            key: self.key.clone(),
122            _marker: PhantomData,
123        }
124    }
125}
126impl<I: Interner> Copy for Interned<I> where <I as Interner>::Key: Copy {}
127impl<I: Interner> PartialEq for Interned<I> {
128    fn eq(&self, other: &Self) -> bool {
129        self.key == other.key
130    }
131}
132impl<I: Interner> Eq for Interned<I> {}
133impl<I: Interner> std::hash::Hash for Interned<I> {
134    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
135        self.key.hash(state);
136    }
137}
138impl<I: Interner> std::fmt::Debug for Interned<I> {
139    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
140        f.debug_tuple("Interned").field(&self.key).finish()
141    }
142}
143
144impl<I: Interner> Interned<I> {
145    /// Intern a string, returning a new `Interned<I>`.
146    pub fn intern(s: &str) -> Self {
147        Self {
148            key: I::get_or_intern(s),
149            _marker: PhantomData,
150        }
151    }
152
153    /// Resolve this interned key back to its original string.
154    pub fn resolve(&self) -> &str {
155        I::resolve(&self.key)
156    }
157}
158
159#[cfg(feature = "serde")]
160impl<I: Interner> serde::Serialize for Interned<I> {
161    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
162        serializer.serialize_str(self.resolve())
163    }
164}
165
166#[cfg(feature = "serde")]
167impl<'de, I: Interner> serde::Deserialize<'de> for Interned<I> {
168    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
169        let s = <String as serde::Deserialize<'de>>::deserialize(deserializer)?;
170        Ok(Self::intern(&s))
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_interned_basic() {
180        let a = Interned::<DefaultInterner>::intern("test");
181        assert_eq!(a.resolve(), "test");
182    }
183
184    #[test]
185    fn test_interned_equality() {
186        let a = Interned::<DefaultInterner>::intern("foo");
187        let b = Interned::<DefaultInterner>::intern("foo");
188        let c = Interned::<DefaultInterner>::intern("bar");
189
190        assert_eq!(a, b);
191        assert_ne!(a, c);
192    }
193
194    #[test]
195    fn test_interned_copy() {
196        let a = Interned::<DefaultInterner>::intern("test");
197        let b = a;
198        assert_eq!(a, b);
199    }
200}