1use crate::core::profiles::TranslationConfig;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4pub enum TranslationRulesetV1 {
5 Legacy,
6 Ascii,
7}
8
9#[derive(Debug, Clone)]
10pub struct TranslationSelectionV1 {
11 pub ruleset: TranslationRulesetV1,
12 pub reason_code: String,
13 pub reason: String,
14 pub model_key: Option<String>,
15}
16
17#[derive(Debug, Clone)]
18pub struct TranslationApplyResultV1 {
19 pub output: String,
20 pub selection: TranslationSelectionV1,
21 pub changed: bool,
22 pub skipped_json: bool,
23}
24
25pub fn translate_tool_output(text: &str, cfg: &TranslationConfig) -> TranslationApplyResultV1 {
26 let model_key = active_model_key_from_env();
27 let selection = select_ruleset(cfg, model_key.as_deref());
28
29 if selection.ruleset == TranslationRulesetV1::Legacy {
30 return TranslationApplyResultV1 {
31 output: text.to_string(),
32 selection,
33 changed: false,
34 skipped_json: false,
35 };
36 }
37
38 if looks_like_json(text) {
39 return TranslationApplyResultV1 {
40 output: text.to_string(),
41 selection,
42 changed: false,
43 skipped_json: true,
44 };
45 }
46
47 let out = translate_text(text, selection.ruleset);
48 TranslationApplyResultV1 {
49 changed: out != text,
50 output: out,
51 selection,
52 skipped_json: false,
53 }
54}
55
56pub fn translate_text(text: &str, ruleset: TranslationRulesetV1) -> String {
57 match ruleset {
58 TranslationRulesetV1::Legacy => text.to_string(),
59 TranslationRulesetV1::Ascii => translate_ascii(text),
60 }
61}
62
63fn normalize_ruleset(s: &str) -> String {
64 s.trim().to_lowercase().replace(['_', ' '], "-")
65}
66
67fn active_model_key_from_env() -> Option<String> {
68 let raw = std::env::var("LEAN_CTX_MODEL")
69 .or_else(|_| std::env::var("LCTX_MODEL"))
70 .unwrap_or_default();
71 let m = raw.trim();
72 if m.is_empty() {
73 return None;
74 }
75 Some(m.to_lowercase().replace(['_', ' '], "-"))
76}
77
78#[derive(Debug, Clone, Copy, PartialEq, Eq)]
79enum ModelFamilyV1 {
80 OpenAiGpt,
81 AnthropicClaude,
82 GoogleGemini,
83 Unknown,
84}
85
86fn infer_model_family(model_key: &str) -> ModelFamilyV1 {
87 let m = model_key.trim().to_lowercase();
88 if m.contains("gpt") || m.contains("openai") {
89 return ModelFamilyV1::OpenAiGpt;
90 }
91 if m.contains("claude") {
92 return ModelFamilyV1::AnthropicClaude;
93 }
94 if m.contains("gemini") {
95 return ModelFamilyV1::GoogleGemini;
96 }
97 ModelFamilyV1::Unknown
98}
99
100pub fn select_ruleset(cfg: &TranslationConfig, model_key: Option<&str>) -> TranslationSelectionV1 {
101 let model_key = model_key.map(str::trim).filter(|s| !s.is_empty());
102 let model_key = model_key.map(std::string::ToString::to_string);
103
104 if !cfg.enabled_effective() {
105 return TranslationSelectionV1 {
106 ruleset: TranslationRulesetV1::Legacy,
107 reason_code: "disabled".to_string(),
108 reason: "translation disabled by profile".to_string(),
109 model_key,
110 };
111 }
112
113 let ruleset = normalize_ruleset(cfg.ruleset_effective());
114 match ruleset.as_str() {
115 "legacy" | "unicode" => TranslationSelectionV1 {
116 ruleset: TranslationRulesetV1::Legacy,
117 reason_code: "legacy".to_string(),
118 reason: "legacy ruleset selected".to_string(),
119 model_key,
120 },
121 "ascii" => TranslationSelectionV1 {
122 ruleset: TranslationRulesetV1::Ascii,
123 reason_code: "ascii".to_string(),
124 reason: "ascii ruleset selected".to_string(),
125 model_key,
126 },
127 "auto" => {
128 let family = model_key
129 .as_deref()
130 .map_or(ModelFamilyV1::Unknown, infer_model_family);
131 match family {
132 ModelFamilyV1::OpenAiGpt => TranslationSelectionV1 {
133 ruleset: TranslationRulesetV1::Ascii,
134 reason_code: "auto_openai_gpt".to_string(),
135 reason: "auto: OpenAI/GPT tokenizer prefers ASCII over Unicode symbols"
136 .to_string(),
137 model_key,
138 },
139 _ => TranslationSelectionV1 {
140 ruleset: TranslationRulesetV1::Legacy,
141 reason_code: "auto_unknown".to_string(),
142 reason: "auto: unknown tokenizer family; preserve legacy format".to_string(),
143 model_key,
144 },
145 }
146 }
147 other => TranslationSelectionV1 {
148 ruleset: TranslationRulesetV1::Legacy,
149 reason_code: "unknown_ruleset".to_string(),
150 reason: format!("unknown ruleset '{other}'; using legacy"),
151 model_key,
152 },
153 }
154}
155
156fn looks_like_json(text: &str) -> bool {
157 let t = text.trim();
158 if t.is_empty() {
159 return false;
160 }
161 if !(t.starts_with('{') || t.starts_with('[')) {
162 return false;
163 }
164 serde_json::from_str::<serde_json::Value>(t).is_ok()
165}
166
167const ASCII_SYMBOL_RULES: &[(&str, &str)] = &[
169 ("⊛ ", "+ "),
171 ("⊛", "+"),
172 ("λ", "fn"),
173 ("§", "cl"),
174 ("∂", "if"),
175 ("τ", "ty"),
176 ("ε", "en"),
177 ("ν", "val"),
178 ("→", "->"),
180 ("≠", "!="),
181 ("≈", "~"),
182 ("∴", "thus"),
183 ("✓", "ok"),
184 ("✗", "fail"),
185 ("⚠", "warn"),
186];
187
188fn translate_ascii(text: &str) -> String {
189 let mut out = text.to_string();
190 for (from, to) in ASCII_SYMBOL_RULES {
191 if out.contains(from) {
192 out = out.replace(from, to);
193 }
194 }
195
196 let opt = crate::core::neural::token_optimizer::TokenOptimizer::with_defaults();
198 let mut changed = false;
199 let mut lines: Vec<String> = Vec::new();
200 for line in out.lines() {
201 if is_synthetic_tdd_signature_line(line) {
202 let optimized = opt.optimize_line(line);
203 if optimized != line {
204 changed = true;
205 }
206 lines.push(optimized);
207 } else {
208 lines.push(line.to_string());
209 }
210 }
211 if changed {
212 out = lines.join("\n");
213 }
214
215 out
216}
217
218fn is_synthetic_tdd_signature_line(line: &str) -> bool {
219 let mut t = line.trim_start();
220 if let Some(rest) = t.strip_prefix('~') {
221 t = rest;
222 }
223
224 if let Some(first) = t.chars().next() {
226 if matches!(first, 'λ' | '§' | '∂' | 'τ' | 'ε' | 'ν') {
227 let mut it = t.chars();
228 let _ = it.next();
229 if matches!(it.next(), Some('+' | '-')) {
230 return true;
231 }
232 }
233 }
234
235 let ascii_prefixes = [
237 "fn+", "fn-", "cl+", "cl-", "if+", "if-", "ty+", "ty-", "en+", "en-", "val+", "val-",
238 ];
239 ascii_prefixes.iter().any(|p| t.starts_with(p))
240}
241
242#[cfg(test)]
243mod tests {
244 use super::*;
245 use std::sync::{Mutex, OnceLock};
246
247 fn env_lock() -> std::sync::MutexGuard<'static, ()> {
248 static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
249 LOCK.get_or_init(|| Mutex::new(()))
250 .lock()
251 .unwrap_or_else(std::sync::PoisonError::into_inner)
252 }
253
254 #[test]
255 fn ruleset_disabled_is_legacy() {
256 let _lock = env_lock();
257 std::env::remove_var("LEAN_CTX_MODEL");
258 let cfg = TranslationConfig {
259 enabled: Some(false),
260 ruleset: Some("auto".to_string()),
261 };
262 let sel = select_ruleset(&cfg, Some("gpt-5.4"));
263 assert_eq!(sel.ruleset, TranslationRulesetV1::Legacy);
264 assert!(sel.reason_code.contains("disabled"));
265 }
266
267 #[test]
268 fn ruleset_ascii_forced() {
269 let cfg = TranslationConfig {
270 enabled: Some(true),
271 ruleset: Some("ascii".to_string()),
272 };
273 let sel = select_ruleset(&cfg, Some("claude-3.5-sonnet"));
274 assert_eq!(sel.ruleset, TranslationRulesetV1::Ascii);
275 }
276
277 #[test]
278 fn ruleset_auto_openai_gpt() {
279 let cfg = TranslationConfig {
280 enabled: Some(true),
281 ruleset: Some("auto".to_string()),
282 };
283 let sel = select_ruleset(&cfg, Some("gpt-5.4-mini"));
284 assert_eq!(sel.ruleset, TranslationRulesetV1::Ascii);
285 assert!(sel.reason_code.contains("auto_openai_gpt"));
286 }
287
288 #[test]
289 fn ruleset_auto_unknown_falls_back_to_legacy() {
290 let cfg = TranslationConfig {
291 enabled: Some(true),
292 ruleset: Some("auto".to_string()),
293 };
294 let sel = select_ruleset(&cfg, Some("claude-3.5-sonnet"));
295 assert_eq!(sel.ruleset, TranslationRulesetV1::Legacy);
296 assert!(sel.reason_code.contains("auto_unknown"));
297 }
298
299 #[test]
300 fn translation_skips_json_outputs() {
301 let _lock = env_lock();
302 std::env::set_var("LEAN_CTX_MODEL", "gpt-5.4");
303 let cfg = TranslationConfig {
304 enabled: Some(true),
305 ruleset: Some("auto".to_string()),
306 };
307 let json = r#"{"ok":"✓","arrow":"→"}"#;
308 let r = translate_tool_output(json, &cfg);
309 assert!(r.skipped_json);
310 assert_eq!(r.output, json);
311 }
312
313 #[test]
314 fn translation_ascii_converts_signature_markers_and_optimizes_types() {
315 let cfg = TranslationConfig {
316 enabled: Some(true),
317 ruleset: Some("ascii".to_string()),
318 };
319 let input = "λ+foo(x)→Vec<String>";
320 let r = translate_tool_output(input, &cfg);
321 assert!(!r.skipped_json);
322 assert!(r.output.contains("fn+foo"));
323 assert!(r.output.contains("->Vec"));
324 assert!(!r.output.contains("λ"));
325 assert!(!r.output.contains("→"));
326 }
327}