Skip to main content

gentoo_interner/
lib.rs

1//! String interning for Gentoo-related crates.
2//!
3//! Provides a flexible interning system for reducing memory usage when
4//! processing large numbers of repeated strings.
5//!
6//! # Components
7//!
8//! - [`Interner`]: Trait for interning strings into compact keys
9//! - [`Interned<I>`]: An interned string key parameterized by interner type
10//! - [`DefaultInterner`]: Default interner based on feature flags
11//!
12//! # Features
13//!
14//! | Feature | DefaultInterner | Key Type | Behavior |
15//! |---------|-----------------|----------|----------|
16//! | `interner` (default) | `GlobalInterner` | `u32` | Process-global deduplication, `Copy` |
17//! | no `interner` | `NoInterner` | `Box<str>` | No deduplication, `Clone` only |
18//!
19//! # Example
20//!
21//! ```
22//! use gentoo_interner::{Interned, DefaultInterner};
23//!
24//! let interned = Interned::<DefaultInterner>::intern("amd64");
25//! assert_eq!(interned.resolve(), "amd64");
26//! ```
27
28use std::fmt::Debug;
29use std::marker::PhantomData;
30
31/// Trait for interning strings into compact keys.
32///
33/// Implementations map strings to keys and resolve keys back to strings.
34/// All methods are static, allowing the interner type to serve as a
35/// configuration parameter without carrying runtime state.
36pub trait Interner: Send + Sync + 'static {
37    /// Key type returned by [`get_or_intern`](Self::get_or_intern).
38    type Key: Clone + Eq + std::hash::Hash + Send + Sync + 'static + Debug;
39
40    /// Intern `s`, returning a stable key.
41    fn get_or_intern(s: &str) -> Self::Key;
42
43    /// Resolve `key` back to its original string.
44    fn resolve(key: &Self::Key) -> &str;
45}
46
47/// Non-interning fallback that allocates each string as a `Box<str>`.
48///
49/// No deduplication occurs. The [`Key`](Interner::Key) type is `Box<str>`,
50/// making `Interned<NoInterner>` `Clone` but not `Copy`.
51#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
52pub struct NoInterner;
53
54impl Interner for NoInterner {
55    type Key = Box<str>;
56
57    fn get_or_intern(s: &str) -> Box<str> {
58        Box::from(s)
59    }
60
61    fn resolve(key: &Box<str>) -> &str {
62        key
63    }
64}
65
66/// Global process-wide [`Interner`] backed by `lasso::ThreadedRodeo`.
67///
68/// Zero-sized type; all state lives in a process-wide static.
69/// Keys are stable `u32` values, making `Interned<GlobalInterner>` `Copy`.
70#[cfg(feature = "interner")]
71#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
72pub struct GlobalInterner;
73
74#[cfg(feature = "interner")]
75static GLOBAL: std::sync::OnceLock<lasso::ThreadedRodeo> = std::sync::OnceLock::new();
76
77#[cfg(feature = "interner")]
78fn global() -> &'static lasso::ThreadedRodeo {
79    GLOBAL.get_or_init(lasso::ThreadedRodeo::default)
80}
81
82#[cfg(feature = "interner")]
83impl Interner for GlobalInterner {
84    type Key = u32;
85
86    fn get_or_intern(s: &str) -> u32 {
87        use lasso::Key as _;
88        global().get_or_intern(s).into_usize() as u32
89    }
90
91    fn resolve(key: &u32) -> &str {
92        use lasso::Key as _;
93        let spur = lasso::Spur::try_from_usize(*key as usize).expect("invalid interner key");
94        global().resolve(&spur)
95    }
96}
97
98/// Default interner type based on feature configuration.
99///
100/// - With `interner` feature (default): [`GlobalInterner`]
101/// - Without `interner` feature: [`NoInterner`]
102#[cfg(feature = "interner")]
103pub type DefaultInterner = GlobalInterner;
104#[cfg(not(feature = "interner"))]
105pub type DefaultInterner = NoInterner;
106
107/// An interned string key parameterized by [`Interner`] type `I`.
108///
109/// With [`GlobalInterner`], this is 4 bytes and `Copy`.
110/// With [`NoInterner`], this is a pointer and `Clone` only.
111///
112/// Serde support serializes as the string value and deserializes via interning.
113pub struct Interned<I: Interner> {
114    key: <I as Interner>::Key,
115    _marker: PhantomData<I>,
116}
117
118impl<I: Interner> Clone for Interned<I> {
119    fn clone(&self) -> Self {
120        Self {
121            key: self.key.clone(),
122            _marker: PhantomData,
123        }
124    }
125}
126impl<I: Interner> Copy for Interned<I> where <I as Interner>::Key: Copy {}
127impl<I: Interner> PartialEq for Interned<I> {
128    fn eq(&self, other: &Self) -> bool {
129        self.key == other.key
130    }
131}
132impl<I: Interner> Eq for Interned<I> {}
133impl<I: Interner> std::hash::Hash for Interned<I> {
134    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
135        self.key.hash(state);
136    }
137}
138impl<I: Interner> std::fmt::Debug for Interned<I> {
139    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
140        f.debug_tuple("Interned").field(&self.key).finish()
141    }
142}
143
144impl<I: Interner> Interned<I> {
145    /// Intern a string, returning a new `Interned<I>`.
146    pub fn intern(s: &str) -> Self {
147        Self {
148            key: I::get_or_intern(s),
149            _marker: PhantomData,
150        }
151    }
152
153    /// Resolve this interned key back to its original string.
154    pub fn resolve(&self) -> &str {
155        I::resolve(&self.key)
156    }
157
158    /// Get the interned string as a `&str`.
159    pub fn as_str(&self) -> &str {
160        self.resolve()
161    }
162}
163
164impl<I: Interner> std::ops::Deref for Interned<I> {
165    type Target = str;
166
167    fn deref(&self) -> &Self::Target {
168        self.resolve()
169    }
170}
171
172impl<I: Interner> AsRef<str> for Interned<I> {
173    fn as_ref(&self) -> &str {
174        self.resolve()
175    }
176}
177
178impl<I: Interner> From<&str> for Interned<I> {
179    fn from(s: &str) -> Self {
180        Self::intern(s)
181    }
182}
183
184impl<I: Interner> std::fmt::Display for Interned<I> {
185    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
186        f.write_str(self.resolve())
187    }
188}
189
190impl<I: Interner> PartialEq<str> for Interned<I> {
191    fn eq(&self, other: &str) -> bool {
192        self.resolve() == other
193    }
194}
195
196impl<I: Interner> PartialEq<&str> for Interned<I> {
197    fn eq(&self, other: &&str) -> bool {
198        self.resolve() == *other
199    }
200}
201
202impl<I: Interner> PartialEq<Interned<I>> for str {
203    fn eq(&self, other: &Interned<I>) -> bool {
204        self == other.resolve()
205    }
206}
207
208impl<I: Interner> PartialEq<Interned<I>> for &str {
209    fn eq(&self, other: &Interned<I>) -> bool {
210        *self == other.resolve()
211    }
212}
213
214#[cfg(feature = "serde")]
215impl<I: Interner> serde::Serialize for Interned<I> {
216    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
217        serializer.serialize_str(self.resolve())
218    }
219}
220
221#[cfg(feature = "serde")]
222impl<'de, I: Interner> serde::Deserialize<'de> for Interned<I> {
223    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
224        let s = <String as serde::Deserialize<'de>>::deserialize(deserializer)?;
225        Ok(Self::intern(&s))
226    }
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    #[test]
234    fn test_interned_basic() {
235        let a = Interned::<DefaultInterner>::intern("test");
236        assert_eq!(a.resolve(), "test");
237        assert_eq!(a.as_str(), "test");
238    }
239
240    #[test]
241    fn test_interned_equality() {
242        let a = Interned::<DefaultInterner>::intern("foo");
243        let b = Interned::<DefaultInterner>::intern("foo");
244        let c = Interned::<DefaultInterner>::intern("bar");
245
246        assert_eq!(a, b);
247        assert_ne!(a, c);
248    }
249
250    #[test]
251    fn test_interned_copy() {
252        let a = Interned::<DefaultInterner>::intern("test");
253        #[allow(clippy::clone_on_copy)]
254        let b = a.clone();
255        assert_eq!(a, b);
256    }
257
258    #[test]
259    fn test_interned_from_str() {
260        let a: Interned<DefaultInterner> = "hello".into();
261        assert_eq!(a.as_str(), "hello");
262    }
263
264    #[test]
265    fn test_interned_deref() {
266        let a = Interned::<DefaultInterner>::intern("test");
267        assert!(a.starts_with("te"));
268        assert!(a.ends_with("st"));
269    }
270
271    #[test]
272    fn test_interned_as_ref() {
273        let a = Interned::<DefaultInterner>::intern("test");
274        let s: &str = a.as_ref();
275        assert_eq!(s, "test");
276    }
277
278    #[test]
279    fn test_interned_display() {
280        let a = Interned::<DefaultInterner>::intern("test");
281        assert_eq!(format!("{}", a), "test");
282    }
283
284    #[test]
285    fn test_interned_str_eq() {
286        let a = Interned::<DefaultInterner>::intern("test");
287        assert_eq!(a, "test");
288        assert_eq!("test", a);
289        assert_ne!(a, "other");
290    }
291}