rumdl_lib/utils/
string_interner.rs

1use std::collections::HashMap;
2use std::sync::{Arc, LazyLock, Mutex};
3
4/// String interner for reducing memory allocations of common strings
5#[derive(Debug)]
6pub struct StringInterner {
7    strings: HashMap<String, Arc<str>>,
8}
9
10impl Default for StringInterner {
11    fn default() -> Self {
12        Self::new()
13    }
14}
15
16impl StringInterner {
17    pub fn new() -> Self {
18        Self {
19            strings: HashMap::new(),
20        }
21    }
22
23    /// Intern a string, returning an `Arc<str>` that can be shared
24    pub fn intern(&mut self, s: &str) -> Arc<str> {
25        Arc::clone(self.strings.entry(s.to_string()).or_insert_with(|| Arc::from(s)))
26    }
27
28    /// Get the number of interned strings
29    pub fn len(&self) -> usize {
30        self.strings.len()
31    }
32
33    /// Check if the interner is empty
34    pub fn is_empty(&self) -> bool {
35        self.strings.is_empty()
36    }
37}
38
39/// Global string interner for common patterns
40static GLOBAL_INTERNER: LazyLock<Arc<Mutex<StringInterner>>> =
41    LazyLock::new(|| Arc::new(Mutex::new(StringInterner::new())));
42
43/// Intern a string globally
44///
45/// If the mutex is poisoned, returns a fresh Arc<str> without interning.
46/// This ensures the library never panics due to mutex poisoning.
47pub fn intern_string(s: &str) -> Arc<str> {
48    match GLOBAL_INTERNER.lock() {
49        Ok(mut interner) => interner.intern(s),
50        Err(_) => Arc::from(s),
51    }
52}
53
54/// Common interned strings for performance
55pub mod common {
56    use super::*;
57    use std::sync::LazyLock;
58
59    // Rule names
60    pub static MD001: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD001"));
61    pub static MD002: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD002"));
62    pub static MD003: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD003"));
63    pub static MD004: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD004"));
64    pub static MD005: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD005"));
65    pub static MD006: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD006"));
66    pub static MD007: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD007"));
67    pub static MD009: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD009"));
68    pub static MD010: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD010"));
69    pub static MD013: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD013"));
70    pub static MD034: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("MD034"));
71
72    // Common messages
73    pub static TRAILING_SPACES: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Trailing spaces found"));
74    pub static HARD_TABS: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Hard tabs found"));
75    pub static LINE_TOO_LONG: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Line length exceeds limit"));
76    pub static BARE_URL: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("Bare URL found"));
77
78    // Common patterns
79    pub static EMPTY_STRING: LazyLock<Arc<str>> = LazyLock::new(|| intern_string(""));
80    pub static SPACE: LazyLock<Arc<str>> = LazyLock::new(|| intern_string(" "));
81    pub static NEWLINE: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("\n"));
82    pub static HASH: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("#"));
83    pub static ASTERISK: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("*"));
84    pub static DASH: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("-"));
85    pub static PLUS: LazyLock<Arc<str>> = LazyLock::new(|| intern_string("+"));
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91
92    #[test]
93    fn test_string_interner() {
94        let mut interner = StringInterner::new();
95
96        let s1 = interner.intern("hello");
97        let s2 = interner.intern("hello");
98        let s3 = interner.intern("world");
99
100        // Same string should return the same Arc
101        assert!(Arc::ptr_eq(&s1, &s2));
102        assert!(!Arc::ptr_eq(&s1, &s3));
103
104        assert_eq!(interner.len(), 2);
105        assert!(!interner.is_empty());
106    }
107
108    #[test]
109    fn test_global_interner() {
110        let s1 = intern_string("test");
111        let s2 = intern_string("test");
112
113        assert!(Arc::ptr_eq(&s1, &s2));
114    }
115}