1use std::collections::HashMap;
8use std::path::Path;
9
10pub struct TokenOptimizer {
11 replacements: HashMap<String, String>,
12}
13
14const DEFAULT_OPTIMIZATIONS: &[(&str, &str)] = &[
18 ("function ", "fn "),
19 ("boolean", "bool"),
20 ("string", "str"),
21 ("number", "num"),
22 ("undefined", "undef"),
23 ("console.log", "log"),
24 ("export function ", "fn "),
25 (" ", " "),
26 ("Result<T, E>", "Result"),
27 ("Result<T,E>", "Result"),
28 ("Option<T>", "Option"),
29 ("Vec<String>", "Vec"),
30 ("Vec<&str>", "Vec"),
31 ("Vec<u8>", "Vec"),
32 ("HashMap<String, String>", "HashMap"),
33 ("HashMap<K, V>", "HashMap"),
34 ("HashMap<K,V>", "HashMap"),
35 ("BTreeMap<K, V>", "BTreeMap"),
36 ("HashSet<String>", "HashSet"),
37 ("Box<dyn Error>", "Box<Error>"),
38 ("Arc<Mutex<", "Arc<Mutex<"),
39 ("std::collections::HashMap", "HashMap"),
40 ("std::collections::HashSet", "HashSet"),
41 ("std::collections::BTreeMap", "BTreeMap"),
42 ("std::path::PathBuf", "PathBuf"),
43 ("std::path::Path", "Path"),
44 ("std::sync::Arc", "Arc"),
45 ("std::sync::Mutex", "Mutex"),
46 ("std::io::Result", "io::Result"),
47 ("std::fmt::Display", "Display"),
48 ("std::fmt::Debug", "Debug"),
49];
50
51const BPE_ALIGNED_RULES: &[(&str, &str)] = &[
64 (" -> ", "->"),
65 (" => ", "=>"),
66 ("\n\n\n", "\n\n"),
67 ("pub(crate) ", "pub "),
68 ("pub(super) ", "pub "),
69 ("export default ", "export "),
70];
71
72impl TokenOptimizer {
73 pub fn load_or_default(model_dir: &Path) -> Self {
74 let config_path = model_dir.join("token_optimizer.json");
75 if config_path.exists() {
76 match Self::load_from_file(&config_path) {
77 Ok(opt) => {
78 tracing::info!(
79 "Token optimizer loaded ({} rules) from {:?}",
80 opt.replacements.len(),
81 config_path,
82 );
83 return opt;
84 }
85 Err(e) => {
86 tracing::warn!("Failed to load token optimizer: {e}. Using defaults.");
87 }
88 }
89 }
90
91 Self::with_defaults()
92 }
93
94 pub fn with_defaults() -> Self {
95 let mut replacements: HashMap<String, String> = DEFAULT_OPTIMIZATIONS
96 .iter()
97 .map(|(k, v)| (k.to_string(), v.to_string()))
98 .collect();
99
100 for &(from, to) in BPE_ALIGNED_RULES {
101 replacements.insert(from.to_string(), to.to_string());
102 }
103
104 Self { replacements }
105 }
106
107 fn load_from_file(path: &Path) -> anyhow::Result<Self> {
108 let content = std::fs::read_to_string(path)?;
109 let data: HashMap<String, String> = serde_json::from_str(&content)?;
110 Ok(Self { replacements: data })
111 }
112
113 pub fn optimize<'a>(&'a self, _concept: &str, representation: &'a str) -> &'a str {
114 representation
115 }
116
117 pub fn optimize_line(&self, line: &str) -> String {
118 let mut result = line.to_string();
119 for (from, to) in &self.replacements {
120 result = result.replace(from.as_str(), to.as_str());
121 }
122 result = elide_lifetimes(&result);
123 result
124 }
125
126 pub fn optimize_block(&self, content: &str) -> String {
127 let optimized: Vec<String> = content
128 .lines()
129 .map(|line| self.optimize_line(line))
130 .collect();
131 let collapsed = collapse_closing_braces(&optimized);
132 collapsed.join("\n")
133 }
134
135 pub fn replacement_count(&self) -> usize {
136 self.replacements.len()
137 }
138
139 pub fn token_cost(text: &str) -> usize {
142 crate::core::tokens::count_tokens(text)
143 }
144
145 pub fn cheaper_repr<'a>(a: &'a str, b: &'a str) -> &'a str {
147 if Self::token_cost(a) <= Self::token_cost(b) {
148 a
149 } else {
150 b
151 }
152 }
153}
154
155fn elide_lifetimes(line: &str) -> String {
156 let mut result = line.to_string();
157 let patterns = ["'a ", "'b ", "'c ", "'static "];
158 for pat in &patterns {
159 if *pat == "'static " {
160 continue;
161 }
162 let with_ref = format!("&{pat}");
163 let with_mut = format!("&{pat}mut ");
164 result = result.replace(&with_mut, "&mut ");
165 result = result.replace(&with_ref, "&");
166 }
167 result
168}
169
170fn collapse_closing_braces(lines: &[String]) -> Vec<String> {
171 let mut result: Vec<String> = Vec::with_capacity(lines.len());
172 let mut brace_run = 0u32;
173
174 for line in lines {
175 let trimmed = line.trim();
176 if matches!(trimmed, "}" | "};" | ");" | "});" | ")") {
177 brace_run += 1;
178 if brace_run <= 2 {
179 result.push(trimmed.to_string());
180 } else if brace_run == 3 {
181 if let Some(last) = result.last_mut() {
182 last.push_str(trimmed);
183 }
184 }
185 continue;
186 }
187 brace_run = 0;
188 result.push(line.clone());
189 }
190 result
191}
192
193#[cfg(test)]
194mod tests {
195 use super::*;
196
197 #[test]
198 fn default_optimizations_apply() {
199 let opt = TokenOptimizer::with_defaults();
200 assert_eq!(opt.optimize_line("function hello() {"), "fn hello() {");
201 assert_eq!(opt.optimize_line("boolean flag"), "bool flag");
202 }
203
204 #[test]
205 fn indentation_compresses() {
206 let opt = TokenOptimizer::with_defaults();
207 let input = " let x = 1;";
208 let output = opt.optimize_line(input);
209 assert_eq!(output, " let x = 1;");
210 }
211
212 #[test]
213 fn generic_types_simplify() {
214 let opt = TokenOptimizer::with_defaults();
215 assert_eq!(
216 opt.optimize_line("fn foo() -> Result<T, E>"),
217 "fn foo()->Result"
218 );
219 assert_eq!(
220 opt.optimize_line("fn bar() -> Option<T>"),
221 "fn bar()->Option"
222 );
223 assert_eq!(
224 opt.optimize_line("let v: Vec<String> = vec![]"),
225 "let v: Vec = vec![]"
226 );
227 assert_eq!(
228 opt.optimize_line("use std::collections::HashMap;"),
229 "use HashMap;"
230 );
231 }
232
233 #[test]
234 fn multiline_optimization() {
235 let opt = TokenOptimizer::with_defaults();
236 let input = "function hello() {\n return 42;\n}";
237 let output = opt.optimize_block(input);
238 assert_eq!(output, "fn hello() {\n return 42;\n}");
239 }
240
241 #[test]
242 fn lifetime_elision() {
243 let opt = TokenOptimizer::with_defaults();
244 assert_eq!(
245 opt.optimize_line("fn foo(&'a str) -> &'a str"),
246 "fn foo(&str)->&str"
247 );
248 assert_eq!(opt.optimize_line("fn bar(&'a mut Vec)"), "fn bar(&mut Vec)");
249 assert_eq!(
250 opt.optimize_line("fn baz(&'static str)"),
251 "fn baz(&'static str)",
252 "'static must not be elided"
253 );
254 }
255
256 #[test]
257 fn closing_brace_collapsing() {
258 let opt = TokenOptimizer::with_defaults();
259 let input = "fn main() {\n inner() {\n x\n }\n}\n}\n}\n}\nfn next() {}";
260 let output = opt.optimize_block(input);
261 assert!(output.contains("fn next()"), "code after braces preserved");
262 let brace_only_lines: Vec<_> = output.lines().filter(|l| l.trim() == "}").collect();
263 assert!(
264 brace_only_lines.len() <= 2,
265 "should collapse 4+ closing braces"
266 );
267 }
268
269 #[test]
270 fn std_path_shortening() {
271 let opt = TokenOptimizer::with_defaults();
272 assert_eq!(opt.optimize_line("use std::path::PathBuf;"), "use PathBuf;");
273 assert_eq!(opt.optimize_line("use std::sync::Arc;"), "use Arc;");
274 }
275
276 #[test]
277 fn bpe_aligned_arrow_compression() {
278 let opt = TokenOptimizer::with_defaults();
279 assert_eq!(opt.optimize_line("fn foo() -> bool {"), "fn foo()->bool {");
280 }
281
282 #[test]
283 fn bpe_cost_oracle_works() {
284 let cost = TokenOptimizer::token_cost("hello world");
285 assert!(cost > 0);
286 }
287
288 #[test]
289 fn cheaper_repr_picks_shorter() {
290 let result = TokenOptimizer::cheaper_repr("fn foo() -> bool", "fn foo()->bool");
291 assert!(
292 TokenOptimizer::token_cost(result) <= TokenOptimizer::token_cost("fn foo() -> bool")
293 );
294 }
295}