lean_ctx/core/
symbol_map.rs1use std::collections::HashMap;
2
3use crate::core::tokens::count_tokens;
4
5const MIN_IDENT_LENGTH: usize = 6;
6const SHORT_ID_PREFIX: char = 'α';
7
8#[derive(Debug, Clone)]
9#[allow(dead_code)]
10pub struct SymbolMap {
11 forward: HashMap<String, String>,
12 next_id: usize,
13}
14
15impl Default for SymbolMap {
16 fn default() -> Self {
17 Self::new()
18 }
19}
20
21impl SymbolMap {
22 pub fn new() -> Self {
23 Self {
24 forward: HashMap::new(),
25 next_id: 1,
26 }
27 }
28
29 pub fn register(&mut self, identifier: &str) -> Option<String> {
30 if identifier.len() < MIN_IDENT_LENGTH {
31 return None;
32 }
33
34 if let Some(existing) = self.forward.get(identifier) {
35 return Some(existing.clone());
36 }
37
38 let short_id = format!("{SHORT_ID_PREFIX}{}", self.next_id);
39 self.next_id += 1;
40 self.forward
41 .insert(identifier.to_string(), short_id.clone());
42 Some(short_id)
43 }
44
45 pub fn apply(&self, text: &str) -> String {
46 if self.forward.is_empty() {
47 return text.to_string();
48 }
49
50 let mut sorted: Vec<(&String, &String)> = self.forward.iter().collect();
51 sorted.sort_by(|a, b| b.0.len().cmp(&a.0.len()));
52
53 let mut result = text.to_string();
54 for (long, short) in &sorted {
55 result = result.replace(long.as_str(), short.as_str());
56 }
57 result
58 }
59
60 pub fn format_table(&self) -> String {
61 if self.forward.is_empty() {
62 return String::new();
63 }
64
65 let mut entries: Vec<(&String, &String)> = self.forward.iter().collect();
66 entries.sort_by_key(|(_, v)| {
67 v.trim_start_matches(SHORT_ID_PREFIX)
68 .parse::<usize>()
69 .unwrap_or(0)
70 });
71
72 let mut table = String::from("\n§MAP:");
73 for (long, short) in &entries {
74 table.push_str(&format!("\n {short}={long}"));
75 }
76 table
77 }
78
79 #[allow(dead_code)]
80 pub fn len(&self) -> usize {
81 self.forward.len()
82 }
83
84 #[allow(dead_code)]
85 pub fn is_empty(&self) -> bool {
86 self.forward.is_empty()
87 }
88}
89
90const MAP_ENTRY_OVERHEAD: usize = 2;
92
93pub fn should_register(identifier: &str, occurrences: usize, next_id: usize) -> bool {
97 if identifier.len() < MIN_IDENT_LENGTH {
98 return false;
99 }
100 let ident_tokens = count_tokens(identifier);
101 let short_id = format!("{SHORT_ID_PREFIX}{next_id}");
102 let short_tokens = count_tokens(&short_id);
103
104 let token_saving_per_use = ident_tokens.saturating_sub(short_tokens);
105 if token_saving_per_use == 0 {
106 return false;
107 }
108
109 let total_savings = occurrences * token_saving_per_use;
110 let entry_cost = ident_tokens + short_tokens + MAP_ENTRY_OVERHEAD;
111
112 total_savings > entry_cost
113}
114
115pub fn extract_identifiers(content: &str, ext: &str) -> Vec<String> {
116 let ident_re = regex::Regex::new(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b").unwrap();
117
118 let mut seen = HashMap::new();
119 for mat in ident_re.find_iter(content) {
120 let word = mat.as_str();
121 if word.len() >= MIN_IDENT_LENGTH && !is_keyword(word, ext) {
122 *seen.entry(word.to_string()).or_insert(0usize) += 1;
123 }
124 }
125
126 let mut next_id = 1usize;
127 let mut idents: Vec<(String, usize)> = seen
128 .into_iter()
129 .filter(|(ident, count)| {
130 let pass = should_register(ident, *count, next_id);
131 if pass {
132 next_id += 1;
133 }
134 pass
135 })
136 .collect();
137
138 idents.sort_by(|a, b| {
139 let savings_a = a.0.len() * a.1;
140 let savings_b = b.0.len() * b.1;
141 savings_b.cmp(&savings_a)
142 });
143
144 idents.into_iter().map(|(s, _)| s).collect()
145}
146
147fn is_keyword(word: &str, ext: &str) -> bool {
148 match ext {
149 "rs" => matches!(
150 word,
151 "continue" | "default" | "return" | "struct" | "unsafe" | "where"
152 ),
153 "ts" | "tsx" | "js" | "jsx" => matches!(
154 word,
155 "constructor" | "arguments" | "undefined" | "prototype" | "instanceof"
156 ),
157 "py" => matches!(word, "continue" | "lambda" | "return" | "import" | "class"),
158 _ => false,
159 }
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 #[test]
167 fn test_should_register_short_ident_rejected() {
168 assert!(!should_register("foo", 100, 1));
169 assert!(!should_register("bar", 50, 1));
170 assert!(!should_register("x", 1000, 1));
171 }
172
173 #[test]
174 fn test_should_register_roi_positive() {
175 assert!(should_register(
177 "authenticate_user_credentials_handler",
178 5,
179 1
180 ));
181 }
182
183 #[test]
184 fn test_should_register_roi_negative_single_use() {
185 assert!(!should_register(
187 "authenticate_user_credentials_handler",
188 1,
189 1
190 ));
191 }
192
193 #[test]
194 fn test_should_register_roi_scales_with_frequency() {
195 let ident = "configuration_manager_instance";
196 let passes_at_low = should_register(ident, 2, 1);
198 let passes_at_high = should_register(ident, 10, 1);
199 assert!(passes_at_high || !passes_at_low);
201 }
202
203 #[test]
204 fn test_extract_identifiers_roi_filtering() {
205 let long = "authenticate_user_credentials_handler";
207 let content = format!("{long} {long} {long} {long} {long} short");
208 let result = extract_identifiers(&content, "rs");
209 assert!(result.contains(&long.to_string()));
210 assert!(!result.contains(&"short".to_string()));
211 }
212
213 #[test]
214 fn test_register_returns_existing() {
215 let mut map = SymbolMap::new();
216 let first = map.register("validateToken");
217 let second = map.register("validateToken");
218 assert_eq!(first, second);
219 }
220
221 #[test]
222 fn test_apply_replaces_identifiers() {
223 let mut map = SymbolMap::new();
224 map.register("validateToken");
225 let result = map.apply("call validateToken here");
226 assert!(result.contains("α1"));
227 assert!(!result.contains("validateToken"));
228 }
229
230 #[test]
231 fn test_format_table_output() {
232 let mut map = SymbolMap::new();
233 map.register("validateToken");
234 let table = map.format_table();
235 assert!(table.contains("§MAP:"));
236 assert!(table.contains("α1=validateToken"));
237 }
238}