1use std::collections::HashMap;
8use std::path::Path;
9
10pub struct TokenOptimizer {
11 replacements: HashMap<String, String>,
12}
13
14const DEFAULT_OPTIMIZATIONS: &[(&str, &str)] = &[
18 ("function ", "fn "),
19 ("boolean", "bool"),
20 ("string", "str"),
21 ("number", "num"),
22 ("undefined", "undef"),
23 ("console.log", "log"),
24 ("export function ", "fn "),
25 (" ", " "),
26 ("Result<T, E>", "Result"),
27 ("Result<T,E>", "Result"),
28 ("Option<T>", "Option"),
29 ("Vec<String>", "Vec"),
30 ("Vec<&str>", "Vec"),
31 ("Vec<u8>", "Vec"),
32 ("HashMap<String, String>", "HashMap"),
33 ("HashMap<K, V>", "HashMap"),
34 ("HashMap<K,V>", "HashMap"),
35 ("BTreeMap<K, V>", "BTreeMap"),
36 ("HashSet<String>", "HashSet"),
37 ("Box<dyn Error>", "Box<Error>"),
38 ("Arc<Mutex<", "Arc<Mutex<"),
39 ("std::collections::HashMap", "HashMap"),
40 ("std::collections::HashSet", "HashSet"),
41 ("std::collections::BTreeMap", "BTreeMap"),
42 ("std::path::PathBuf", "PathBuf"),
43 ("std::path::Path", "Path"),
44 ("std::sync::Arc", "Arc"),
45 ("std::sync::Mutex", "Mutex"),
46 ("std::io::Result", "io::Result"),
47 ("std::fmt::Display", "Display"),
48 ("std::fmt::Debug", "Debug"),
49];
50
51const BPE_ALIGNED_RULES: &[(&str, &str)] = &[
54 (" -> ", "->"),
56 (" => ", "=>"),
57 ("};", "}"),
59 ("\n\n\n", "\n\n"),
61 (".to_string()", ".into()"),
63 (".to_owned()", ".into()"),
64 ("pub(crate) ", "pub "),
65 ("pub(super) ", "pub "),
66 ("self, ", ""),
68 (" pass\n", ""),
69 ("export default ", "export "),
71 (": void", ""),
72 (": undefined", ""),
73 ("func (", "fn ("),
75 ("interface{}", "any"),
76];
77
78impl TokenOptimizer {
79 pub fn load_or_default(model_dir: &Path) -> Self {
80 let config_path = model_dir.join("token_optimizer.json");
81 if config_path.exists() {
82 match Self::load_from_file(&config_path) {
83 Ok(opt) => {
84 tracing::info!(
85 "Token optimizer loaded ({} rules) from {:?}",
86 opt.replacements.len(),
87 config_path,
88 );
89 return opt;
90 }
91 Err(e) => {
92 tracing::warn!("Failed to load token optimizer: {e}. Using defaults.");
93 }
94 }
95 }
96
97 Self::with_defaults()
98 }
99
100 pub fn with_defaults() -> Self {
101 let mut replacements: HashMap<String, String> = DEFAULT_OPTIMIZATIONS
102 .iter()
103 .map(|(k, v)| (k.to_string(), v.to_string()))
104 .collect();
105
106 for &(from, to) in BPE_ALIGNED_RULES {
107 replacements.insert(from.to_string(), to.to_string());
108 }
109
110 Self { replacements }
111 }
112
113 fn load_from_file(path: &Path) -> anyhow::Result<Self> {
114 let content = std::fs::read_to_string(path)?;
115 let data: HashMap<String, String> = serde_json::from_str(&content)?;
116 Ok(Self { replacements: data })
117 }
118
119 pub fn optimize<'a>(&'a self, _concept: &str, representation: &'a str) -> &'a str {
120 representation
121 }
122
123 pub fn optimize_line(&self, line: &str) -> String {
124 let mut result = line.to_string();
125 for (from, to) in &self.replacements {
126 result = result.replace(from.as_str(), to.as_str());
127 }
128 result = elide_lifetimes(&result);
129 result
130 }
131
132 pub fn optimize_block(&self, content: &str) -> String {
133 let optimized: Vec<String> = content
134 .lines()
135 .map(|line| self.optimize_line(line))
136 .collect();
137 let collapsed = collapse_closing_braces(&optimized);
138 collapsed.join("\n")
139 }
140
141 pub fn replacement_count(&self) -> usize {
142 self.replacements.len()
143 }
144
145 pub fn token_cost(text: &str) -> usize {
148 crate::core::tokens::count_tokens(text)
149 }
150
151 pub fn cheaper_repr<'a>(a: &'a str, b: &'a str) -> &'a str {
153 if Self::token_cost(a) <= Self::token_cost(b) {
154 a
155 } else {
156 b
157 }
158 }
159}
160
161fn elide_lifetimes(line: &str) -> String {
162 let mut result = line.to_string();
163 let patterns = ["'a ", "'b ", "'c ", "'static "];
164 for pat in &patterns {
165 if *pat == "'static " {
166 continue;
167 }
168 let with_ref = format!("&{pat}");
169 let with_mut = format!("&{pat}mut ");
170 result = result.replace(&with_mut, "&mut ");
171 result = result.replace(&with_ref, "&");
172 }
173 result
174}
175
176fn collapse_closing_braces(lines: &[String]) -> Vec<String> {
177 let mut result: Vec<String> = Vec::with_capacity(lines.len());
178 let mut brace_run = 0u32;
179
180 for line in lines {
181 let trimmed = line.trim();
182 if matches!(trimmed, "}" | "};" | ");" | "});" | ")") {
183 brace_run += 1;
184 if brace_run <= 2 {
185 result.push(trimmed.to_string());
186 } else if brace_run == 3 {
187 if let Some(last) = result.last_mut() {
188 last.push_str(trimmed);
189 }
190 }
191 continue;
192 }
193 brace_run = 0;
194 result.push(line.clone());
195 }
196 result
197}
198
199#[cfg(test)]
200mod tests {
201 use super::*;
202
203 #[test]
204 fn default_optimizations_apply() {
205 let opt = TokenOptimizer::with_defaults();
206 assert_eq!(opt.optimize_line("function hello() {"), "fn hello() {");
207 assert_eq!(opt.optimize_line("boolean flag"), "bool flag");
208 }
209
210 #[test]
211 fn indentation_compresses() {
212 let opt = TokenOptimizer::with_defaults();
213 let input = " let x = 1;";
214 let output = opt.optimize_line(input);
215 assert_eq!(output, " let x = 1;");
216 }
217
218 #[test]
219 fn generic_types_simplify() {
220 let opt = TokenOptimizer::with_defaults();
221 assert_eq!(
222 opt.optimize_line("fn foo() -> Result<T, E>"),
223 "fn foo()->Result"
224 );
225 assert_eq!(
226 opt.optimize_line("fn bar() -> Option<T>"),
227 "fn bar()->Option"
228 );
229 assert_eq!(
230 opt.optimize_line("let v: Vec<String> = vec![]"),
231 "let v: Vec = vec![]"
232 );
233 assert_eq!(
234 opt.optimize_line("use std::collections::HashMap;"),
235 "use HashMap;"
236 );
237 }
238
239 #[test]
240 fn multiline_optimization() {
241 let opt = TokenOptimizer::with_defaults();
242 let input = "function hello() {\n return 42;\n}";
243 let output = opt.optimize_block(input);
244 assert_eq!(output, "fn hello() {\n return 42;\n}");
245 }
246
247 #[test]
248 fn lifetime_elision() {
249 let opt = TokenOptimizer::with_defaults();
250 assert_eq!(
251 opt.optimize_line("fn foo(&'a str) -> &'a str"),
252 "fn foo(&str)->&str"
253 );
254 assert_eq!(opt.optimize_line("fn bar(&'a mut Vec)"), "fn bar(&mut Vec)");
255 assert_eq!(
256 opt.optimize_line("fn baz(&'static str)"),
257 "fn baz(&'static str)",
258 "'static must not be elided"
259 );
260 }
261
262 #[test]
263 fn closing_brace_collapsing() {
264 let opt = TokenOptimizer::with_defaults();
265 let input = "fn main() {\n inner() {\n x\n }\n}\n}\n}\n}\nfn next() {}";
266 let output = opt.optimize_block(input);
267 assert!(output.contains("fn next()"), "code after braces preserved");
268 let brace_only_lines: Vec<_> = output.lines().filter(|l| l.trim() == "}").collect();
269 assert!(
270 brace_only_lines.len() <= 2,
271 "should collapse 4+ closing braces"
272 );
273 }
274
275 #[test]
276 fn std_path_shortening() {
277 let opt = TokenOptimizer::with_defaults();
278 assert_eq!(opt.optimize_line("use std::path::PathBuf;"), "use PathBuf;");
279 assert_eq!(opt.optimize_line("use std::sync::Arc;"), "use Arc;");
280 }
281
282 #[test]
283 fn bpe_aligned_arrow_compression() {
284 let opt = TokenOptimizer::with_defaults();
285 assert_eq!(opt.optimize_line("fn foo() -> bool {"), "fn foo()->bool {");
286 }
287
288 #[test]
289 fn bpe_cost_oracle_works() {
290 let cost = TokenOptimizer::token_cost("hello world");
291 assert!(cost > 0);
292 }
293
294 #[test]
295 fn cheaper_repr_picks_shorter() {
296 let result = TokenOptimizer::cheaper_repr("fn foo() -> bool", "fn foo()->bool");
297 assert!(
298 TokenOptimizer::token_cost(result) <= TokenOptimizer::token_cost("fn foo() -> bool")
299 );
300 }
301}