lean_ctx/core/
symbol_map.rs1use std::collections::HashMap;
2
3use crate::core::tokens::count_tokens;
4
5macro_rules! static_regex {
6 ($pattern:expr) => {{
7 static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
8 RE.get_or_init(|| {
9 regex::Regex::new($pattern).expect(concat!("BUG: invalid static regex: ", $pattern))
10 })
11 }};
12}
13
14const MIN_IDENT_LENGTH: usize = 6;
15const SHORT_ID_PREFIX: char = 'α';
16
17#[derive(Debug, Clone)]
18pub struct SymbolMap {
19 forward: HashMap<String, String>,
20 next_id: usize,
21}
22
23impl Default for SymbolMap {
24 fn default() -> Self {
25 Self::new()
26 }
27}
28
29impl SymbolMap {
30 pub fn new() -> Self {
31 Self {
32 forward: HashMap::new(),
33 next_id: 1,
34 }
35 }
36
37 pub fn register(&mut self, identifier: &str) -> Option<String> {
38 if identifier.len() < MIN_IDENT_LENGTH {
39 return None;
40 }
41
42 if let Some(existing) = self.forward.get(identifier) {
43 return Some(existing.clone());
44 }
45
46 let short_id = format!("{SHORT_ID_PREFIX}{}", self.next_id);
47 self.next_id += 1;
48 self.forward
49 .insert(identifier.to_string(), short_id.clone());
50 Some(short_id)
51 }
52
53 pub fn apply(&self, text: &str) -> String {
54 if self.forward.is_empty() {
55 return text.to_string();
56 }
57
58 let mut sorted: Vec<(&String, &String)> = self.forward.iter().collect();
59 sorted.sort_by_key(|x| std::cmp::Reverse(x.0.len()));
60
61 let mut result = text.to_string();
62 for (long, short) in &sorted {
63 result = result.replace(long.as_str(), short.as_str());
64 }
65 result
66 }
67
68 pub fn format_table(&self) -> String {
69 if self.forward.is_empty() {
70 return String::new();
71 }
72
73 let mut entries: Vec<(&String, &String)> = self.forward.iter().collect();
74 entries.sort_by_key(|(_, v)| {
75 v.trim_start_matches(SHORT_ID_PREFIX)
76 .parse::<usize>()
77 .unwrap_or(0)
78 });
79
80 let mut table = String::from("\n§MAP:");
81 for (long, short) in &entries {
82 table.push_str(&format!("\n {short}={long}"));
83 }
84 table
85 }
86
87 pub fn len(&self) -> usize {
88 self.forward.len()
89 }
90
91 pub fn is_empty(&self) -> bool {
92 self.forward.is_empty()
93 }
94}
95
96const MAP_ENTRY_OVERHEAD: usize = 2;
98
99pub fn should_register(identifier: &str, occurrences: usize, next_id: usize) -> bool {
103 if identifier.len() < MIN_IDENT_LENGTH {
104 return false;
105 }
106 let ident_tokens = count_tokens(identifier);
107 let short_id = format!("{SHORT_ID_PREFIX}{next_id}");
108 let short_tokens = count_tokens(&short_id);
109
110 let token_saving_per_use = ident_tokens.saturating_sub(short_tokens);
111 if token_saving_per_use == 0 {
112 return false;
113 }
114
115 let total_savings = occurrences * token_saving_per_use;
116 let entry_cost = ident_tokens + short_tokens + MAP_ENTRY_OVERHEAD;
117
118 total_savings > entry_cost
119}
120
121pub fn extract_identifiers(content: &str, ext: &str) -> Vec<String> {
122 let ident_re = static_regex!(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b");
123
124 let mut seen = HashMap::new();
125 for mat in ident_re.find_iter(content) {
126 let word = mat.as_str();
127 if word.len() >= MIN_IDENT_LENGTH && !is_keyword(word, ext) {
128 *seen.entry(word.to_string()).or_insert(0usize) += 1;
129 }
130 }
131
132 let mut next_id = 1usize;
133 let mut idents: Vec<(String, usize)> = seen
134 .into_iter()
135 .filter(|(ident, count)| {
136 let pass = should_register(ident, *count, next_id);
137 if pass {
138 next_id += 1;
139 }
140 pass
141 })
142 .collect();
143
144 idents.sort_by(|a, b| {
145 let savings_a = a.0.len() * a.1;
146 let savings_b = b.0.len() * b.1;
147 savings_b.cmp(&savings_a)
148 });
149
150 idents.into_iter().map(|(s, _)| s).collect()
151}
152
153fn is_keyword(word: &str, ext: &str) -> bool {
154 match ext {
155 "rs" => matches!(
156 word,
157 "continue" | "default" | "return" | "struct" | "unsafe" | "where"
158 ),
159 "ts" | "tsx" | "js" | "jsx" => matches!(
160 word,
161 "constructor" | "arguments" | "undefined" | "prototype" | "instanceof"
162 ),
163 "py" => matches!(word, "continue" | "lambda" | "return" | "import" | "class"),
164 _ => false,
165 }
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171
172 #[test]
173 fn test_should_register_short_ident_rejected() {
174 assert!(!should_register("foo", 100, 1));
175 assert!(!should_register("bar", 50, 1));
176 assert!(!should_register("x", 1000, 1));
177 }
178
179 #[test]
180 fn test_should_register_roi_positive() {
181 assert!(should_register(
183 "authenticate_user_credentials_handler",
184 5,
185 1
186 ));
187 }
188
189 #[test]
190 fn test_should_register_roi_negative_single_use() {
191 assert!(!should_register(
193 "authenticate_user_credentials_handler",
194 1,
195 1
196 ));
197 }
198
199 #[test]
200 fn test_should_register_roi_scales_with_frequency() {
201 let ident = "configuration_manager_instance";
202 let passes_at_low = should_register(ident, 2, 1);
204 let passes_at_high = should_register(ident, 10, 1);
205 assert!(passes_at_high || !passes_at_low);
207 }
208
209 #[test]
210 fn test_extract_identifiers_roi_filtering() {
211 let long = "authenticate_user_credentials_handler";
213 let content = format!("{long} {long} {long} {long} {long} short");
214 let result = extract_identifiers(&content, "rs");
215 assert!(result.contains(&long.to_string()));
216 assert!(!result.contains(&"short".to_string()));
217 }
218
219 #[test]
220 fn test_register_returns_existing() {
221 let mut map = SymbolMap::new();
222 let first = map.register("validateToken");
223 let second = map.register("validateToken");
224 assert_eq!(first, second);
225 }
226
227 #[test]
228 fn test_apply_replaces_identifiers() {
229 let mut map = SymbolMap::new();
230 map.register("validateToken");
231 let result = map.apply("call validateToken here");
232 assert!(result.contains("α1"));
233 assert!(!result.contains("validateToken"));
234 }
235
236 #[test]
237 fn test_format_table_output() {
238 let mut map = SymbolMap::new();
239 map.register("validateToken");
240 let table = map.format_table();
241 assert!(table.contains("§MAP:"));
242 assert!(table.contains("α1=validateToken"));
243 }
244}