rumdl_lib/utils/
string_interner.rs

1use std::collections::HashMap;
2use std::sync::{Arc, LazyLock, Mutex};
3
4/// String interner for reducing memory allocations of common strings
5#[derive(Debug)]
6pub struct StringInterner {
7    strings: HashMap<String, Arc<str>>,
8}
9
10impl Default for StringInterner {
11    fn default() -> Self {
12        Self::new()
13    }
14}
15
16impl StringInterner {
17    pub fn new() -> Self {
18        Self {
19            strings: HashMap::new(),
20        }
21    }
22
23    /// Intern a string, returning an `Arc<str>` that can be shared
24    pub fn intern(&mut self, s: &str) -> Arc<str> {
25        if let Some(interned) = self.strings.get(s) {
26            interned.clone()
27        } else {
28            let arc_str: Arc<str> = Arc::from(s);
29            self.strings.insert(s.to_string(), arc_str.clone());
30            arc_str
31        }
32    }
33
34    /// Get the number of interned strings
35    pub fn len(&self) -> usize {
36        self.strings.len()
37    }
38
39    /// Check if the interner is empty
40    pub fn is_empty(&self) -> bool {
41        self.strings.is_empty()
42    }
43}
44
45/// Global string interner for common patterns
46static GLOBAL_INTERNER: LazyLock<Arc<Mutex<StringInterner>>> =
47    LazyLock::new(|| Arc::new(Mutex::new(StringInterner::new())));
48
49/// Intern a string globally
50///
51/// If the mutex is poisoned, returns a fresh Arc<str> without interning.
52/// This ensures the library never panics due to mutex poisoning.
53pub fn intern_string(s: &str) -> Arc<str> {
54    match GLOBAL_INTERNER.lock() {
55        Ok(mut interner) => interner.intern(s),
56        Err(_) => Arc::from(s),
57    }
58}
59
60/// Common interned strings for performance
61pub mod common {
62    use super::*;
63    use std::sync::LazyLock;
64
65    // Rule names
66    pub static MD001: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD001"));
67    pub static MD002: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD002"));
68    pub static MD003: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD003"));
69    pub static MD004: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD004"));
70    pub static MD005: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD005"));
71    pub static MD006: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD006"));
72    pub static MD007: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD007"));
73    pub static MD009: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD009"));
74    pub static MD010: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD010"));
75    pub static MD013: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD013"));
76    pub static MD034: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD034"));
77
78    // Common messages
79    pub static TRAILING_SPACES: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Trailing spaces found"));
80    pub static HARD_TABS: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Hard tabs found"));
81    pub static LINE_TOO_LONG: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Line length exceeds limit"));
82    pub static BARE_URL: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Bare URL found"));
83
84    // Common patterns
85    pub static EMPTY_STRING: LazyLock<Arc<str>> = LazyLock::new(|| intern_string(""));
86    pub static SPACE: LazyLock<Arc<str>> = LazyLock::new(|| intern_string(" "));
87    pub static NEWLINE: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("\n"));
88    pub static HASH: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("#"));
89    pub static ASTERISK: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("*"));
90    pub static DASH: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("-"));
91    pub static PLUS: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("+"));
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97
98    #[test]
99    fn test_string_interner() {
100        let mut interner = StringInterner::new();
101
102        let s1 = interner.intern("hello");
103        let s2 = interner.intern("hello");
104        let s3 = interner.intern("world");
105
106        // Same string should return the same Arc
107        assert!(Arc::ptr_eq(&s1, &s2));
108        assert!(!Arc::ptr_eq(&s1, &s3));
109
110        assert_eq!(interner.len(), 2);
111        assert!(!interner.is_empty());
112    }
113
114    #[test]
115    fn test_global_interner() {
116        let s1 = intern_string("test");
117        let s2 = intern_string("test");
118
119        assert!(Arc::ptr_eq(&s1, &s2));
120    }
121}