lean_ctx/core/
symbol_map.rs1use std::collections::HashMap;
2
3use crate::core::tokens::count_tokens;
4
5const MIN_IDENT_LENGTH: usize = 6;
6const SHORT_ID_PREFIX: char = 'α';
7
8#[derive(Debug, Clone)]
9pub struct SymbolMap {
10 forward: HashMap<String, String>,
11 next_id: usize,
12}
13
14impl Default for SymbolMap {
15 fn default() -> Self {
16 Self::new()
17 }
18}
19
20impl SymbolMap {
21 pub fn new() -> Self {
22 Self {
23 forward: HashMap::new(),
24 next_id: 1,
25 }
26 }
27
28 pub fn register(&mut self, identifier: &str) -> Option<String> {
29 if identifier.len() < MIN_IDENT_LENGTH {
30 return None;
31 }
32
33 if let Some(existing) = self.forward.get(identifier) {
34 return Some(existing.clone());
35 }
36
37 let short_id = format!("{SHORT_ID_PREFIX}{}", self.next_id);
38 self.next_id += 1;
39 self.forward
40 .insert(identifier.to_string(), short_id.clone());
41 Some(short_id)
42 }
43
44 pub fn apply(&self, text: &str) -> String {
45 if self.forward.is_empty() {
46 return text.to_string();
47 }
48
49 let mut sorted: Vec<(&String, &String)> = self.forward.iter().collect();
50 sorted.sort_by_key(|x| std::cmp::Reverse(x.0.len()));
51
52 let mut result = text.to_string();
53 for (long, short) in &sorted {
54 result = result.replace(long.as_str(), short.as_str());
55 }
56 result
57 }
58
59 pub fn format_table(&self) -> String {
60 if self.forward.is_empty() {
61 return String::new();
62 }
63
64 let mut entries: Vec<(&String, &String)> = self.forward.iter().collect();
65 entries.sort_by_key(|(_, v)| {
66 v.trim_start_matches(SHORT_ID_PREFIX)
67 .parse::<usize>()
68 .unwrap_or(0)
69 });
70
71 let mut table = String::from("\n§MAP:");
72 for (long, short) in &entries {
73 table.push_str(&format!("\n {short}={long}"));
74 }
75 table
76 }
77
78 pub fn len(&self) -> usize {
79 self.forward.len()
80 }
81
82 pub fn is_empty(&self) -> bool {
83 self.forward.is_empty()
84 }
85}
86
87const MAP_ENTRY_OVERHEAD: usize = 2;
89
90pub fn should_register(identifier: &str, occurrences: usize, next_id: usize) -> bool {
94 if identifier.len() < MIN_IDENT_LENGTH {
95 return false;
96 }
97 let ident_tokens = count_tokens(identifier);
98 let short_id = format!("{SHORT_ID_PREFIX}{next_id}");
99 let short_tokens = count_tokens(&short_id);
100
101 let token_saving_per_use = ident_tokens.saturating_sub(short_tokens);
102 if token_saving_per_use == 0 {
103 return false;
104 }
105
106 let total_savings = occurrences * token_saving_per_use;
107 let entry_cost = ident_tokens + short_tokens + MAP_ENTRY_OVERHEAD;
108
109 total_savings > entry_cost
110}
111
112pub fn extract_identifiers(content: &str, ext: &str) -> Vec<String> {
113 let ident_re = regex::Regex::new(r"\b[a-zA-Z_][a-zA-Z0-9_]*\b").unwrap();
114
115 let mut seen = HashMap::new();
116 for mat in ident_re.find_iter(content) {
117 let word = mat.as_str();
118 if word.len() >= MIN_IDENT_LENGTH && !is_keyword(word, ext) {
119 *seen.entry(word.to_string()).or_insert(0usize) += 1;
120 }
121 }
122
123 let mut next_id = 1usize;
124 let mut idents: Vec<(String, usize)> = seen
125 .into_iter()
126 .filter(|(ident, count)| {
127 let pass = should_register(ident, *count, next_id);
128 if pass {
129 next_id += 1;
130 }
131 pass
132 })
133 .collect();
134
135 idents.sort_by(|a, b| {
136 let savings_a = a.0.len() * a.1;
137 let savings_b = b.0.len() * b.1;
138 savings_b.cmp(&savings_a)
139 });
140
141 idents.into_iter().map(|(s, _)| s).collect()
142}
143
144fn is_keyword(word: &str, ext: &str) -> bool {
145 match ext {
146 "rs" => matches!(
147 word,
148 "continue" | "default" | "return" | "struct" | "unsafe" | "where"
149 ),
150 "ts" | "tsx" | "js" | "jsx" => matches!(
151 word,
152 "constructor" | "arguments" | "undefined" | "prototype" | "instanceof"
153 ),
154 "py" => matches!(word, "continue" | "lambda" | "return" | "import" | "class"),
155 _ => false,
156 }
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162
163 #[test]
164 fn test_should_register_short_ident_rejected() {
165 assert!(!should_register("foo", 100, 1));
166 assert!(!should_register("bar", 50, 1));
167 assert!(!should_register("x", 1000, 1));
168 }
169
170 #[test]
171 fn test_should_register_roi_positive() {
172 assert!(should_register(
174 "authenticate_user_credentials_handler",
175 5,
176 1
177 ));
178 }
179
180 #[test]
181 fn test_should_register_roi_negative_single_use() {
182 assert!(!should_register(
184 "authenticate_user_credentials_handler",
185 1,
186 1
187 ));
188 }
189
190 #[test]
191 fn test_should_register_roi_scales_with_frequency() {
192 let ident = "configuration_manager_instance";
193 let passes_at_low = should_register(ident, 2, 1);
195 let passes_at_high = should_register(ident, 10, 1);
196 assert!(passes_at_high || !passes_at_low);
198 }
199
200 #[test]
201 fn test_extract_identifiers_roi_filtering() {
202 let long = "authenticate_user_credentials_handler";
204 let content = format!("{long} {long} {long} {long} {long} short");
205 let result = extract_identifiers(&content, "rs");
206 assert!(result.contains(&long.to_string()));
207 assert!(!result.contains(&"short".to_string()));
208 }
209
210 #[test]
211 fn test_register_returns_existing() {
212 let mut map = SymbolMap::new();
213 let first = map.register("validateToken");
214 let second = map.register("validateToken");
215 assert_eq!(first, second);
216 }
217
218 #[test]
219 fn test_apply_replaces_identifiers() {
220 let mut map = SymbolMap::new();
221 map.register("validateToken");
222 let result = map.apply("call validateToken here");
223 assert!(result.contains("α1"));
224 assert!(!result.contains("validateToken"));
225 }
226
227 #[test]
228 fn test_format_table_output() {
229 let mut map = SymbolMap::new();
230 map.register("validateToken");
231 let table = map.format_table();
232 assert!(table.contains("§MAP:"));
233 assert!(table.contains("α1=validateToken"));
234 }
235}