rumdl_lib/utils/
string_interner.rs

1use std::collections::HashMap;
2use std::sync::{Arc, LazyLock, Mutex};
3
4/// String interner for reducing memory allocations of common strings
5#[derive(Debug)]
6pub struct StringInterner {
7    strings: HashMap<String, Arc<str>>,
8}
9
10impl Default for StringInterner {
11    fn default() -> Self {
12        Self::new()
13    }
14}
15
16impl StringInterner {
17    pub fn new() -> Self {
18        Self {
19            strings: HashMap::new(),
20        }
21    }
22
23    /// Intern a string, returning an `Arc<str>` that can be shared
24    pub fn intern(&mut self, s: &str) -> Arc<str> {
25        if let Some(interned) = self.strings.get(s) {
26            interned.clone()
27        } else {
28            let arc_str: Arc<str> = Arc::from(s);
29            self.strings.insert(s.to_string(), arc_str.clone());
30            arc_str
31        }
32    }
33
34    /// Get the number of interned strings
35    pub fn len(&self) -> usize {
36        self.strings.len()
37    }
38
39    /// Check if the interner is empty
40    pub fn is_empty(&self) -> bool {
41        self.strings.is_empty()
42    }
43}
44
45/// Global string interner for common patterns
46static GLOBAL_INTERNER: LazyLock<Arc<Mutex<StringInterner>>> =
47    LazyLock::new(|| Arc::new(Mutex::new(StringInterner::new())));
48
49/// Intern a string globally
50pub fn intern_string(s: &str) -> Arc<str> {
51    let mut interner = GLOBAL_INTERNER.lock().expect("String interner mutex poisoned");
52    interner.intern(s)
53}
54
55/// Common interned strings for performance
56pub mod common {
57    use super::*;
58    use std::sync::LazyLock;
59
60    // Rule names
61    pub static MD001: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD001"));
62    pub static MD002: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD002"));
63    pub static MD003: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD003"));
64    pub static MD004: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD004"));
65    pub static MD005: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD005"));
66    pub static MD006: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD006"));
67    pub static MD007: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD007"));
68    pub static MD009: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD009"));
69    pub static MD010: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD010"));
70    pub static MD013: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD013"));
71    pub static MD034: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD034"));
72
73    // Common messages
74    pub static TRAILING_SPACES: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Trailing spaces found"));
75    pub static HARD_TABS: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Hard tabs found"));
76    pub static LINE_TOO_LONG: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Line length exceeds limit"));
77    pub static BARE_URL: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Bare URL found"));
78
79    // Common patterns
80    pub static EMPTY_STRING: LazyLock<Arc<str>> = LazyLock::new(|| intern_string(""));
81    pub static SPACE: LazyLock<Arc<str>> = LazyLock::new(|| intern_string(" "));
82    pub static NEWLINE: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("\n"));
83    pub static HASH: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("#"));
84    pub static ASTERISK: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("*"));
85    pub static DASH: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("-"));
86    pub static PLUS: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("+"));
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92
93    #[test]
94    fn test_string_interner() {
95        let mut interner = StringInterner::new();
96
97        let s1 = interner.intern("hello");
98        let s2 = interner.intern("hello");
99        let s3 = interner.intern("world");
100
101        // Same string should return the same Arc
102        assert!(Arc::ptr_eq(&s1, &s2));
103        assert!(!Arc::ptr_eq(&s1, &s3));
104
105        assert_eq!(interner.len(), 2);
106        assert!(!interner.is_empty());
107    }
108
109    #[test]
110    fn test_global_interner() {
111        let s1 = intern_string("test");
112        let s2 = intern_string("test");
113
114        assert!(Arc::ptr_eq(&s1, &s2));
115    }
116}