polykit_core/
string_interner.rs

1//! Global string interning for reducing memory allocations.
2//!
3//! Provides a thread-safe string interner that deduplicates strings
4//! across the entire codebase, reducing memory usage and enabling
5//! fast pointer-based string comparisons.
6
7use dashmap::DashMap;
8use std::sync::Arc;
9
10/// Thread-safe string interner using DashMap.
11pub struct StringInterner {
12    strings: DashMap<String, Arc<str>>,
13}
14
15impl StringInterner {
16    /// Creates a new string interner.
17    pub fn new() -> Self {
18        Self {
19            strings: DashMap::new(),
20        }
21    }
22
23    /// Interns a string slice, returning an Arc<str>.
24    ///
25    /// If the string has been interned before, returns the existing Arc.
26    /// Otherwise, creates a new entry and returns it.
27    pub fn intern(&self, s: &str) -> Arc<str> {
28        self.strings
29            .entry(s.to_string())
30            .or_insert_with(|| Arc::from(s))
31            .value()
32            .clone()
33    }
34
35    /// Interns an owned String, returning an Arc<str>.
36    ///
37    /// More efficient than `intern` when you already have an owned String.
38    pub fn intern_owned(&self, s: String) -> Arc<str> {
39        self.strings
40            .entry(s.clone())
41            .or_insert_with(|| Arc::from(s))
42            .value()
43            .clone()
44    }
45
46    /// Returns the number of interned strings.
47    pub fn len(&self) -> usize {
48        self.strings.len()
49    }
50
51    /// Returns true if no strings have been interned.
52    pub fn is_empty(&self) -> bool {
53        self.strings.is_empty()
54    }
55}
56
57impl Default for StringInterner {
58    fn default() -> Self {
59        Self::new()
60    }
61}
62
63/// Global thread-safe string interner.
64///
65/// This is a singleton instance shared across all threads.
66/// All strings interned through this instance are deduplicated.
67pub static GLOBAL_INTERNER: once_cell::sync::Lazy<StringInterner> =
68    once_cell::sync::Lazy::new(StringInterner::new);
69
70/// Interns a string slice using the global interner.
71///
72/// This is a convenience function that uses the global `GLOBAL_INTERNER`.
73#[inline]
74pub fn intern(s: &str) -> Arc<str> {
75    GLOBAL_INTERNER.intern(s)
76}
77
78/// Interns an owned String using the global interner.
79///
80/// This is a convenience function that uses the global `GLOBAL_INTERNER`.
81#[inline]
82pub fn intern_owned(s: String) -> Arc<str> {
83    GLOBAL_INTERNER.intern_owned(s)
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    #[test]
91    fn test_intern_deduplication() {
92        let interner = StringInterner::new();
93        let s1 = interner.intern("hello");
94        let s2 = interner.intern("hello");
95        let s3 = interner.intern("world");
96
97        assert!(Arc::ptr_eq(&s1, &s2));
98        assert!(!Arc::ptr_eq(&s1, &s3));
99        assert_eq!(interner.len(), 2);
100    }
101
102    #[test]
103    fn test_global_intern() {
104        let s1 = intern("test");
105        let s2 = intern("test");
106        let s3 = intern("other");
107
108        assert!(Arc::ptr_eq(&s1, &s2));
109        assert!(!Arc::ptr_eq(&s1, &s3));
110    }
111
112    #[test]
113    fn test_intern_owned() {
114        let interner = StringInterner::new();
115        let s1 = interner.intern_owned("owned".to_string());
116        let s2 = interner.intern("owned");
117
118        assert!(Arc::ptr_eq(&s1, &s2));
119    }
120}