lean_ctx/core/
symbol_map.rs1use std::collections::HashMap;
2
3use crate::core::tokens::count_tokens;
4
5macro_rules! static_regex {
6 ($pattern:expr) => {{
7 static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
8 RE.get_or_init(|| {
9 regex::Regex::new($pattern).expect(concat!("BUG: invalid static regex: ", $pattern))
10 })
11 }};
12}
13
14const MIN_IDENT_LENGTH: usize = 6;
15const SHORT_ID_PREFIX: char = 'α';
16
17pub fn substitution_enabled() -> bool {
25 if let Ok(v) = std::env::var("LEAN_CTX_SYMBOL_MAP") {
26 return v == "1" || v.eq_ignore_ascii_case("true") || v.eq_ignore_ascii_case("on");
27 }
28 let cfg = crate::core::config::Config::load();
29 if cfg.symbol_map_auto {
30 return auto_detect_large_project();
31 }
32 false
33}
34
35fn auto_detect_large_project() -> bool {
36 use std::sync::OnceLock;
37 static DETECTED: OnceLock<bool> = OnceLock::new();
38 *DETECTED.get_or_init(|| {
39 let cwd = std::env::current_dir().unwrap_or_default();
40 let source_exts = [
41 "rs", "ts", "tsx", "js", "jsx", "py", "go", "java", "rb", "cpp", "c", "h",
42 ];
43 let count = ignore::WalkBuilder::new(&cwd)
44 .hidden(true)
45 .max_depth(Some(6))
46 .git_ignore(true)
47 .build()
48 .filter_map(std::result::Result::ok)
49 .filter(|e| {
50 e.file_type().is_some_and(|ft| ft.is_file())
51 && e.path()
52 .extension()
53 .and_then(|ext| ext.to_str())
54 .is_some_and(|ext| source_exts.contains(&ext))
55 })
56 .take(51)
57 .count();
58 count > 50
59 })
60}
61
62#[derive(Debug, Clone)]
63pub struct SymbolMap {
64 forward: HashMap<String, String>,
65 next_id: usize,
66}
67
68impl Default for SymbolMap {
69 fn default() -> Self {
70 Self::new()
71 }
72}
73
74impl SymbolMap {
75 pub fn new() -> Self {
76 Self {
77 forward: HashMap::new(),
78 next_id: 1,
79 }
80 }
81
82 pub fn register(&mut self, identifier: &str) -> Option<String> {
83 if identifier.len() < MIN_IDENT_LENGTH {
84 return None;
85 }
86
87 if let Some(existing) = self.forward.get(identifier) {
88 return Some(existing.clone());
89 }
90
91 let short_id = format!("{SHORT_ID_PREFIX}{}", self.next_id);
92 self.next_id += 1;
93 self.forward
94 .insert(identifier.to_string(), short_id.clone());
95 Some(short_id)
96 }
97
98 pub fn apply(&self, text: &str) -> String {
99 if self.forward.is_empty() {
100 return text.to_string();
101 }
102
103 let mut sorted: Vec<(&String, &String)> = self.forward.iter().collect();
104 sorted.sort_by_key(|x| std::cmp::Reverse(x.0.len()));
105
106 let mut result = text.to_string();
107 for (long, short) in &sorted {
108 result = result.replace(long.as_str(), short.as_str());
109 }
110 result
111 }
112
113 pub fn format_table(&self) -> String {
114 if self.forward.is_empty() {
115 return String::new();
116 }
117
118 let mut entries: Vec<(&String, &String)> = self.forward.iter().collect();
119 entries.sort_by_key(|(_, v)| {
120 v.trim_start_matches(SHORT_ID_PREFIX)
121 .parse::<usize>()
122 .unwrap_or(0)
123 });
124
125 let mut table = String::from("\n§MAP:");
126 for (long, short) in &entries {
127 table.push_str(&format!("\n {short}={long}"));
128 }
129 table
130 }
131
132 pub fn len(&self) -> usize {
133 self.forward.len()
134 }
135
136 pub fn is_empty(&self) -> bool {
137 self.forward.is_empty()
138 }
139}
140
141const MAP_ENTRY_OVERHEAD: usize = 2;
143
144pub fn should_register(identifier: &str, occurrences: usize, next_id: usize) -> bool {
148 if identifier.len() < MIN_IDENT_LENGTH {
149 return false;
150 }
151 let ident_tokens = count_tokens(identifier);
152 let short_id = format!("{SHORT_ID_PREFIX}{next_id}");
153 let short_tokens = count_tokens(&short_id);
154
155 let token_saving_per_use = ident_tokens.saturating_sub(short_tokens);
156 if token_saving_per_use == 0 {
157 return false;
158 }
159
160 let total_savings = occurrences * token_saving_per_use;
161 let entry_cost = ident_tokens + short_tokens + MAP_ENTRY_OVERHEAD;
162
163 total_savings > entry_cost
164}
165
166pub fn extract_identifiers(content: &str, ext: &str) -> Vec<String> {
167 let ident_re = static_regex!(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b");
168
169 let mut seen = HashMap::new();
170 for mat in ident_re.find_iter(content) {
171 let word = mat.as_str();
172 if word.len() >= MIN_IDENT_LENGTH && !is_keyword(word, ext) {
173 *seen.entry(word.to_string()).or_insert(0usize) += 1;
174 }
175 }
176
177 let mut next_id = 1usize;
178 let mut idents: Vec<(String, usize)> = seen
179 .into_iter()
180 .filter(|(ident, count)| {
181 let pass = should_register(ident, *count, next_id);
182 if pass {
183 next_id += 1;
184 }
185 pass
186 })
187 .collect();
188
189 idents.sort_by(|a, b| {
190 let savings_a = a.0.len() * a.1;
191 let savings_b = b.0.len() * b.1;
192 savings_b.cmp(&savings_a)
193 });
194
195 idents.into_iter().map(|(s, _)| s).collect()
196}
197
198fn is_keyword(word: &str, ext: &str) -> bool {
199 match ext {
200 "rs" => matches!(
201 word,
202 "continue" | "default" | "return" | "struct" | "unsafe" | "where"
203 ),
204 "ts" | "tsx" | "js" | "jsx" => matches!(
205 word,
206 "constructor" | "arguments" | "undefined" | "prototype" | "instanceof"
207 ),
208 "py" => matches!(word, "continue" | "lambda" | "return" | "import" | "class"),
209 _ => false,
210 }
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 #[test]
218 fn test_should_register_short_ident_rejected() {
219 assert!(!should_register("foo", 100, 1));
220 assert!(!should_register("bar", 50, 1));
221 assert!(!should_register("x", 1000, 1));
222 }
223
224 #[test]
225 fn test_should_register_roi_positive() {
226 assert!(should_register(
228 "authenticate_user_credentials_handler",
229 5,
230 1
231 ));
232 }
233
234 #[test]
235 fn test_should_register_roi_negative_single_use() {
236 assert!(!should_register(
238 "authenticate_user_credentials_handler",
239 1,
240 1
241 ));
242 }
243
244 #[test]
245 fn test_should_register_roi_scales_with_frequency() {
246 let ident = "configuration_manager_instance";
247 let passes_at_low = should_register(ident, 2, 1);
249 let passes_at_high = should_register(ident, 10, 1);
250 assert!(passes_at_high || !passes_at_low);
252 }
253
254 #[test]
255 fn test_extract_identifiers_roi_filtering() {
256 let long = "authenticate_user_credentials_handler";
258 let content = format!("{long} {long} {long} {long} {long} short");
259 let result = extract_identifiers(&content, "rs");
260 assert!(result.contains(&long.to_string()));
261 assert!(!result.contains(&"short".to_string()));
262 }
263
264 #[test]
265 fn test_register_returns_existing() {
266 let mut map = SymbolMap::new();
267 let first = map.register("validateToken");
268 let second = map.register("validateToken");
269 assert_eq!(first, second);
270 }
271
272 #[test]
273 fn test_apply_replaces_identifiers() {
274 let mut map = SymbolMap::new();
275 map.register("validateToken");
276 let result = map.apply("call validateToken here");
277 assert!(result.contains("α1"));
278 assert!(!result.contains("validateToken"));
279 }
280
281 #[test]
282 fn test_format_table_output() {
283 let mut map = SymbolMap::new();
284 map.register("validateToken");
285 let table = map.format_table();
286 assert!(table.contains("§MAP:"));
287 assert!(table.contains("α1=validateToken"));
288 }
289}