1use std::path::Path;
8
9pub struct TokenOptimizer {
10 replacements: Vec<(String, String)>,
11}
12
13const DEFAULT_OPTIMIZATIONS: &[(&str, &str)] = &[
17 ("function ", "fn "),
18 ("boolean", "bool"),
19 ("string", "str"),
20 ("number", "num"),
21 ("undefined", "undef"),
22 ("console.log", "log"),
23 ("export function ", "fn "),
24 (" ", " "),
25 ("Result<T, E>", "Result"),
26 ("Result<T,E>", "Result"),
27 ("Option<T>", "Option"),
28 ("Vec<String>", "Vec"),
29 ("Vec<&str>", "Vec"),
30 ("Vec<u8>", "Vec"),
31 ("HashMap<String, String>", "HashMap"),
32 ("HashMap<K, V>", "HashMap"),
33 ("HashMap<K,V>", "HashMap"),
34 ("BTreeMap<K, V>", "BTreeMap"),
35 ("HashSet<String>", "HashSet"),
36 ("Box<dyn Error>", "Box<Error>"),
37 ("Arc<Mutex<", "Arc<Mutex<"),
38 ("std::collections::HashMap", "HashMap"),
39 ("std::collections::HashSet", "HashSet"),
40 ("std::collections::BTreeMap", "BTreeMap"),
41 ("std::path::PathBuf", "PathBuf"),
42 ("std::path::Path", "Path"),
43 ("std::sync::Arc", "Arc"),
44 ("std::sync::Mutex", "Mutex"),
45 ("std::io::Result", "io::Result"),
46 ("std::fmt::Display", "Display"),
47 ("std::fmt::Debug", "Debug"),
48];
49
50const BPE_ALIGNED_RULES: &[(&str, &str)] = &[
63 (" -> ", "->"),
64 (" => ", "=>"),
65 ("\n\n\n", "\n\n"),
66 ("pub(crate) ", "pub "),
67 ("pub(super) ", "pub "),
68 ("export default ", "export "),
69];
70
71impl TokenOptimizer {
72 pub fn load_or_default(model_dir: &Path) -> Self {
73 let config_path = model_dir.join("token_optimizer.json");
74 if config_path.exists() {
75 match Self::load_from_file(&config_path) {
76 Ok(opt) => {
77 tracing::info!(
78 "Token optimizer loaded ({} rules) from {:?}",
79 opt.replacements.len(),
80 config_path,
81 );
82 return opt;
83 }
84 Err(e) => {
85 tracing::warn!("Failed to load token optimizer: {e}. Using defaults.");
86 }
87 }
88 }
89
90 Self::with_defaults()
91 }
92
93 pub fn with_defaults() -> Self {
94 let mut rules: Vec<(String, String)> = DEFAULT_OPTIMIZATIONS
95 .iter()
96 .map(|(k, v)| (k.to_string(), v.to_string()))
97 .collect();
98 rules.extend(
99 BPE_ALIGNED_RULES
100 .iter()
101 .map(|(k, v)| (k.to_string(), v.to_string())),
102 );
103 Self {
104 replacements: sort_rules(rules),
105 }
106 }
107
108 fn load_from_file(path: &Path) -> anyhow::Result<Self> {
109 let content = std::fs::read_to_string(path)?;
110 let data: std::collections::HashMap<String, String> = serde_json::from_str(&content)?;
111 let rules: Vec<(String, String)> = data.into_iter().collect();
112 Ok(Self {
113 replacements: sort_rules(rules),
114 })
115 }
116
117 pub fn optimize_line(&self, line: &str) -> String {
118 let mut result = line.to_string();
119 for (from, to) in &self.replacements {
120 result = result.replace(from.as_str(), to.as_str());
121 }
122 result = elide_lifetimes(&result);
123 result
124 }
125
126 pub fn optimize_block(&self, content: &str) -> String {
127 let optimized: Vec<String> = content
128 .lines()
129 .map(|line| self.optimize_line(line))
130 .collect();
131 let collapsed = collapse_closing_braces(&optimized);
132 collapsed.join("\n")
133 }
134
135 pub fn replacement_count(&self) -> usize {
136 self.replacements.len()
137 }
138
139 pub fn token_cost(text: &str) -> usize {
142 crate::core::tokens::count_tokens(text)
143 }
144
145 pub fn cheaper_repr<'a>(a: &'a str, b: &'a str) -> &'a str {
147 if Self::token_cost(a) <= Self::token_cost(b) {
148 a
149 } else {
150 b
151 }
152 }
153}
154
155fn sort_rules(mut rules: Vec<(String, String)>) -> Vec<(String, String)> {
156 rules.sort_by(|a, b| {
158 let la = a.0.len();
159 let lb = b.0.len();
160 lb.cmp(&la)
161 .then_with(|| a.0.cmp(&b.0))
162 .then_with(|| a.1.cmp(&b.1))
163 });
164 rules
165}
166
167fn elide_lifetimes(line: &str) -> String {
168 let mut result = line.to_string();
169 let patterns = ["'a ", "'b ", "'c ", "'static "];
170 for pat in &patterns {
171 if *pat == "'static " {
172 continue;
173 }
174 let with_ref = format!("&{pat}");
175 let with_mut = format!("&{pat}mut ");
176 result = result.replace(&with_mut, "&mut ");
177 result = result.replace(&with_ref, "&");
178 }
179 result
180}
181
182fn collapse_closing_braces(lines: &[String]) -> Vec<String> {
183 let mut result: Vec<String> = Vec::with_capacity(lines.len());
184 let mut brace_run = 0u32;
185
186 for line in lines {
187 let trimmed = line.trim();
188 if matches!(trimmed, "}" | "};" | ");" | "});" | ")") {
189 brace_run += 1;
190 if brace_run <= 2 {
191 result.push(trimmed.to_string());
192 } else if brace_run == 3 {
193 if let Some(last) = result.last_mut() {
194 last.push_str(trimmed);
195 }
196 }
197 continue;
198 }
199 brace_run = 0;
200 result.push(line.clone());
201 }
202 result
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208
209 #[test]
210 fn default_optimizations_apply() {
211 let opt = TokenOptimizer::with_defaults();
212 assert_eq!(opt.optimize_line("function hello() {"), "fn hello() {");
213 assert_eq!(opt.optimize_line("boolean flag"), "bool flag");
214 }
215
216 #[test]
217 fn indentation_compresses() {
218 let opt = TokenOptimizer::with_defaults();
219 let input = " let x = 1;";
220 let output = opt.optimize_line(input);
221 assert_eq!(output, " let x = 1;");
222 }
223
224 #[test]
225 fn generic_types_simplify() {
226 let opt = TokenOptimizer::with_defaults();
227 assert_eq!(
228 opt.optimize_line("fn foo() -> Result<T, E>"),
229 "fn foo()->Result"
230 );
231 assert_eq!(
232 opt.optimize_line("fn bar() -> Option<T>"),
233 "fn bar()->Option"
234 );
235 assert_eq!(
236 opt.optimize_line("let v: Vec<String> = vec![]"),
237 "let v: Vec = vec![]"
238 );
239 assert_eq!(
240 opt.optimize_line("use std::collections::HashMap;"),
241 "use HashMap;"
242 );
243 }
244
245 #[test]
246 fn multiline_optimization() {
247 let opt = TokenOptimizer::with_defaults();
248 let input = "function hello() {\n return 42;\n}";
249 let output = opt.optimize_block(input);
250 assert_eq!(output, "fn hello() {\n return 42;\n}");
251 }
252
253 #[test]
254 fn lifetime_elision() {
255 let opt = TokenOptimizer::with_defaults();
256 assert_eq!(
257 opt.optimize_line("fn foo(&'a str) -> &'a str"),
258 "fn foo(&str)->&str"
259 );
260 assert_eq!(opt.optimize_line("fn bar(&'a mut Vec)"), "fn bar(&mut Vec)");
261 assert_eq!(
262 opt.optimize_line("fn baz(&'static str)"),
263 "fn baz(&'static str)",
264 "'static must not be elided"
265 );
266 }
267
268 #[test]
269 fn closing_brace_collapsing() {
270 let opt = TokenOptimizer::with_defaults();
271 let input = "fn main() {\n inner() {\n x\n }\n}\n}\n}\n}\nfn next() {}";
272 let output = opt.optimize_block(input);
273 assert!(output.contains("fn next()"), "code after braces preserved");
274 let brace_only_lines: Vec<_> = output.lines().filter(|l| l.trim() == "}").collect();
275 assert!(
276 brace_only_lines.len() <= 2,
277 "should collapse 4+ closing braces"
278 );
279 }
280
281 #[test]
282 fn std_path_shortening() {
283 let opt = TokenOptimizer::with_defaults();
284 assert_eq!(opt.optimize_line("use std::path::PathBuf;"), "use PathBuf;");
285 assert_eq!(opt.optimize_line("use std::sync::Arc;"), "use Arc;");
286 }
287
288 #[test]
289 fn bpe_aligned_arrow_compression() {
290 let opt = TokenOptimizer::with_defaults();
291 assert_eq!(opt.optimize_line("fn foo() -> bool {"), "fn foo()->bool {");
292 }
293
294 #[test]
295 fn bpe_cost_oracle_works() {
296 let cost = TokenOptimizer::token_cost("hello world");
297 assert!(cost > 0);
298 }
299
300 #[test]
301 fn cheaper_repr_picks_shorter() {
302 let result = TokenOptimizer::cheaper_repr("fn foo() -> bool", "fn foo()->bool");
303 assert!(
304 TokenOptimizer::token_cost(result) <= TokenOptimizer::token_cost("fn foo() -> bool")
305 );
306 }
307}