rumdl_lib/utils/
string_interner.rs

1use lazy_static::lazy_static;
2use std::collections::HashMap;
3use std::sync::{Arc, Mutex};
4
5/// String interner for reducing memory allocations of common strings
6#[derive(Debug)]
7pub struct StringInterner {
8    strings: HashMap<String, Arc<str>>,
9}
10
11impl Default for StringInterner {
12    fn default() -> Self {
13        Self::new()
14    }
15}
16
17impl StringInterner {
18    pub fn new() -> Self {
19        Self {
20            strings: HashMap::new(),
21        }
22    }
23
24    /// Intern a string, returning an Arc<str> that can be shared
25    pub fn intern(&mut self, s: &str) -> Arc<str> {
26        if let Some(interned) = self.strings.get(s) {
27            interned.clone()
28        } else {
29            let arc_str: Arc<str> = Arc::from(s);
30            self.strings.insert(s.to_string(), arc_str.clone());
31            arc_str
32        }
33    }
34
35    /// Get the number of interned strings
36    pub fn len(&self) -> usize {
37        self.strings.len()
38    }
39
40    /// Check if the interner is empty
41    pub fn is_empty(&self) -> bool {
42        self.strings.is_empty()
43    }
44}
45
46lazy_static! {
47    /// Global string interner for common patterns
48    static ref GLOBAL_INTERNER: Arc<Mutex<StringInterner>> = Arc::new(Mutex::new(StringInterner::new()));
49}
50
51/// Intern a string globally
52pub fn intern_string(s: &str) -> Arc<str> {
53    let mut interner = GLOBAL_INTERNER.lock().unwrap();
54    interner.intern(s)
55}
56
57/// Common interned strings for performance
58pub mod common {
59    use super::*;
60    use lazy_static::lazy_static;
61
62    lazy_static! {
63        // Rule names
64        pub static ref MD001: Arc<str> = intern_string("MD001");
65        pub static ref MD002: Arc<str> = intern_string("MD002");
66        pub static ref MD003: Arc<str> = intern_string("MD003");
67        pub static ref MD004: Arc<str> = intern_string("MD004");
68        pub static ref MD005: Arc<str> = intern_string("MD005");
69        pub static ref MD006: Arc<str> = intern_string("MD006");
70        pub static ref MD007: Arc<str> = intern_string("MD007");
71        pub static ref MD009: Arc<str> = intern_string("MD009");
72        pub static ref MD010: Arc<str> = intern_string("MD010");
73        pub static ref MD013: Arc<str> = intern_string("MD013");
74        pub static ref MD034: Arc<str> = intern_string("MD034");
75
76        // Common messages
77        pub static ref TRAILING_SPACES: Arc<str> = intern_string("Trailing spaces found");
78        pub static ref HARD_TABS: Arc<str> = intern_string("Hard tabs found");
79        pub static ref LINE_TOO_LONG: Arc<str> = intern_string("Line length exceeds limit");
80        pub static ref BARE_URL: Arc<str> = intern_string("Bare URL found");
81
82        // Common patterns
83        pub static ref EMPTY_STRING: Arc<str> = intern_string("");
84        pub static ref SPACE: Arc<str> = intern_string(" ");
85        pub static ref NEWLINE: Arc<str> = intern_string("\n");
86        pub static ref HASH: Arc<str> = intern_string("#");
87        pub static ref ASTERISK: Arc<str> = intern_string("*");
88        pub static ref DASH: Arc<str> = intern_string("-");
89        pub static ref PLUS: Arc<str> = intern_string("+");
90    }
91}
92
93#[cfg(test)]
94mod tests {
95    use super::*;
96
97    #[test]
98    fn test_string_interner() {
99        let mut interner = StringInterner::new();
100
101        let s1 = interner.intern("hello");
102        let s2 = interner.intern("hello");
103        let s3 = interner.intern("world");
104
105        // Same string should return the same Arc
106        assert!(Arc::ptr_eq(&s1, &s2));
107        assert!(!Arc::ptr_eq(&s1, &s3));
108
109        assert_eq!(interner.len(), 2);
110        assert!(!interner.is_empty());
111    }
112
113    #[test]
114    fn test_global_interner() {
115        let s1 = intern_string("test");
116        let s2 = intern_string("test");
117
118        assert!(Arc::ptr_eq(&s1, &s2));
119    }
120}