1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
14
15fn append_compressed_hint(output: &str, file_path: &str) -> String {
16 format!("{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\")")
17}
18
19pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
20 let bytes = std::fs::read(path)?;
21 match String::from_utf8(bytes) {
22 Ok(s) => Ok(s),
23 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
24 }
25}
26
27pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
28 handle_with_options(cache, path, mode, false, crp_mode, None)
29}
30
31pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
32 handle_with_options(cache, path, mode, true, crp_mode, None)
33}
34
35pub fn handle_with_task(
36 cache: &mut SessionCache,
37 path: &str,
38 mode: &str,
39 crp_mode: CrpMode,
40 task: Option<&str>,
41) -> String {
42 handle_with_options(cache, path, mode, false, crp_mode, task)
43}
44
45pub fn handle_fresh_with_task(
46 cache: &mut SessionCache,
47 path: &str,
48 mode: &str,
49 crp_mode: CrpMode,
50 task: Option<&str>,
51) -> String {
52 handle_with_options(cache, path, mode, true, crp_mode, task)
53}
54
55fn handle_with_options(
56 cache: &mut SessionCache,
57 path: &str,
58 mode: &str,
59 fresh: bool,
60 crp_mode: CrpMode,
61 task: Option<&str>,
62) -> String {
63 let file_ref = cache.get_file_ref(path);
64 let short = protocol::shorten_path(path);
65 let ext = Path::new(path)
66 .extension()
67 .and_then(|e| e.to_str())
68 .unwrap_or("");
69
70 if fresh {
71 cache.invalidate(path);
72 }
73
74 if mode == "diff" {
75 return handle_diff(cache, path, &file_ref);
76 }
77
78 if cache.get(path).is_some() {
79 if mode == "full" {
80 return handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, crp_mode);
81 }
82 let existing = cache.get(path).unwrap();
83 let content = existing.content.clone();
84 let original_tokens = existing.original_tokens;
85 return process_mode(
86 &content,
87 mode,
88 &file_ref,
89 &short,
90 ext,
91 original_tokens,
92 crp_mode,
93 path,
94 task,
95 );
96 }
97
98 let content = match read_file_lossy(path) {
99 Ok(c) => c,
100 Err(e) => return format!("ERROR: {e}"),
101 };
102
103 let (entry, _is_hit) = cache.store(path, content.clone());
104
105 if mode == "full" {
106 return format_full_output(cache, &file_ref, &short, ext, &content, &entry, crp_mode);
107 }
108
109 process_mode(
110 &content,
111 mode,
112 &file_ref,
113 &short,
114 ext,
115 entry.original_tokens,
116 crp_mode,
117 path,
118 task,
119 )
120}
121
122const AUTO_DELTA_THRESHOLD: f64 = 0.6;
123
124fn handle_full_with_auto_delta(
126 cache: &mut SessionCache,
127 path: &str,
128 file_ref: &str,
129 short: &str,
130 ext: &str,
131 crp_mode: CrpMode,
132) -> String {
133 let disk_content = match read_file_lossy(path) {
134 Ok(c) => c,
135 Err(_) => {
136 cache.record_cache_hit(path);
137 let existing = cache.get(path).unwrap();
138 return format!(
139 "{file_ref}={short} cached {}t {}L",
140 existing.read_count, existing.line_count
141 );
142 }
143 };
144
145 let old_content = cache.get(path).unwrap().content.clone();
146 let (entry, is_hit) = cache.store(path, disk_content.clone());
147
148 if is_hit {
149 return format!(
150 "{file_ref}={short} cached {}t {}L",
151 entry.read_count, entry.line_count
152 );
153 }
154
155 let diff = compressor::diff_content(&old_content, &disk_content);
156 let diff_tokens = count_tokens(&diff);
157 let full_tokens = entry.original_tokens;
158
159 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
160 let savings = protocol::format_savings(full_tokens, diff_tokens);
161 return format!(
162 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
163 disk_content.lines().count()
164 );
165 }
166
167 format_full_output(cache, file_ref, short, ext, &disk_content, &entry, crp_mode)
168}
169
170fn format_full_output(
171 _cache: &mut SessionCache,
172 file_ref: &str,
173 short: &str,
174 ext: &str,
175 content: &str,
176 entry: &crate::core::cache::CacheEntry,
177 _crp_mode: CrpMode,
178) -> String {
179 let tokens = entry.original_tokens;
180 let metadata = build_header(file_ref, short, ext, content, entry.line_count, true);
181
182 let mut sym = SymbolMap::new();
183 let idents = symbol_map::extract_identifiers(content, ext);
184 for ident in &idents {
185 sym.register(ident);
186 }
187
188 let sym_beneficial = if sym.len() >= 3 {
189 let sym_table = sym.format_table();
190 let compressed = sym.apply(content);
191 let original_tok = count_tokens(content);
192 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
193 let net_saving = original_tok.saturating_sub(compressed_tok);
194 original_tok > 0 && net_saving * 100 / original_tok >= 5
195 } else {
196 false
197 };
198
199 if sym_beneficial {
200 let compressed_content = sym.apply(content);
201 let sym_table = sym.format_table();
202 let output = format!("{compressed_content}{sym_table}\n{metadata}");
203 let sent = count_tokens(&output);
204 let savings = protocol::format_savings(tokens, sent);
205 return format!("{output}\n{savings}");
206 }
207
208 let output = format!("{content}\n{metadata}");
209 let sent = count_tokens(&output);
210 let savings = protocol::format_savings(tokens, sent);
211 format!("{output}\n{savings}")
212}
213
214fn build_header(
215 file_ref: &str,
216 short: &str,
217 ext: &str,
218 content: &str,
219 line_count: usize,
220 include_deps: bool,
221) -> String {
222 let mut header = format!("{file_ref}={short} {line_count}L");
223
224 if include_deps {
225 let dep_info = deps::extract_deps(content, ext);
226 if !dep_info.imports.is_empty() {
227 let imports_str: Vec<&str> = dep_info
228 .imports
229 .iter()
230 .take(8)
231 .map(|s| s.as_str())
232 .collect();
233 header.push_str(&format!("\n deps {}", imports_str.join(",")));
234 }
235 if !dep_info.exports.is_empty() {
236 let exports_str: Vec<&str> = dep_info
237 .exports
238 .iter()
239 .take(8)
240 .map(|s| s.as_str())
241 .collect();
242 header.push_str(&format!("\n exports {}", exports_str.join(",")));
243 }
244 }
245
246 header
247}
248
249#[allow(clippy::too_many_arguments)]
250fn process_mode(
251 content: &str,
252 mode: &str,
253 file_ref: &str,
254 short: &str,
255 ext: &str,
256 original_tokens: usize,
257 crp_mode: CrpMode,
258 file_path: &str,
259 task: Option<&str>,
260) -> String {
261 let line_count = content.lines().count();
262
263 match mode {
264 "auto" => {
265 let sig =
266 crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
267 let predictor = crate::core::mode_predictor::ModePredictor::new();
268 let resolved = predictor
269 .predict_best_mode(&sig)
270 .unwrap_or_else(|| "full".to_string());
271 process_mode(
272 content,
273 &resolved,
274 file_ref,
275 short,
276 ext,
277 original_tokens,
278 crp_mode,
279 file_path,
280 task,
281 )
282 }
283 "signatures" => {
284 let sigs = signatures::extract_signatures(content, ext);
285 let dep_info = deps::extract_deps(content, ext);
286
287 let mut output = format!("{file_ref}={short} {line_count}L");
288 if !dep_info.imports.is_empty() {
289 let imports_str: Vec<&str> = dep_info
290 .imports
291 .iter()
292 .take(8)
293 .map(|s| s.as_str())
294 .collect();
295 output.push_str(&format!("\n deps {}", imports_str.join(",")));
296 }
297 for sig in &sigs {
298 output.push('\n');
299 if crp_mode.is_tdd() {
300 output.push_str(&sig.to_tdd());
301 } else {
302 output.push_str(&sig.to_compact());
303 }
304 }
305 let sent = count_tokens(&output);
306 let savings = protocol::format_savings(original_tokens, sent);
307 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
308 }
309 "map" => {
310 if ext == "php" {
311 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
312 {
313 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
314 let sent = count_tokens(&output);
315 let savings = protocol::format_savings(original_tokens, sent);
316 output.push('\n');
317 output.push_str(&savings);
318 return append_compressed_hint(&output, file_path);
319 }
320 }
321
322 let sigs = signatures::extract_signatures(content, ext);
323 let dep_info = deps::extract_deps(content, ext);
324
325 let mut output = format!("{file_ref}={short} {line_count}L");
326
327 if !dep_info.imports.is_empty() {
328 output.push_str("\n deps: ");
329 output.push_str(&dep_info.imports.join(", "));
330 }
331
332 if !dep_info.exports.is_empty() {
333 output.push_str("\n exports: ");
334 output.push_str(&dep_info.exports.join(", "));
335 }
336
337 let key_sigs: Vec<&signatures::Signature> = sigs
338 .iter()
339 .filter(|s| s.is_exported || s.indent == 0)
340 .collect();
341
342 if !key_sigs.is_empty() {
343 output.push_str("\n API:");
344 for sig in &key_sigs {
345 output.push_str("\n ");
346 if crp_mode.is_tdd() {
347 output.push_str(&sig.to_tdd());
348 } else {
349 output.push_str(&sig.to_compact());
350 }
351 }
352 }
353
354 let sent = count_tokens(&output);
355 let savings = protocol::format_savings(original_tokens, sent);
356 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
357 }
358 "aggressive" => {
359 let raw = compressor::aggressive_compress(content, Some(ext));
360 let compressed = compressor::safeguard_ratio(content, &raw);
361 let header = build_header(file_ref, short, ext, content, line_count, true);
362
363 let mut sym = SymbolMap::new();
364 let idents = symbol_map::extract_identifiers(&compressed, ext);
365 for ident in &idents {
366 sym.register(ident);
367 }
368
369 let sym_beneficial = if sym.len() >= 3 {
370 let sym_table = sym.format_table();
371 let sym_applied = sym.apply(&compressed);
372 let orig_tok = count_tokens(&compressed);
373 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
374 let net = orig_tok.saturating_sub(comp_tok);
375 orig_tok > 0 && net * 100 / orig_tok >= 5
376 } else {
377 false
378 };
379
380 if sym_beneficial {
381 let sym_output = sym.apply(&compressed);
382 let sym_table = sym.format_table();
383 let sent = count_tokens(&sym_output) + count_tokens(&sym_table);
384 let savings = protocol::format_savings(original_tokens, sent);
385 return append_compressed_hint(
386 &format!("{header}\n{sym_output}{sym_table}\n{savings}"),
387 file_path,
388 );
389 }
390
391 let sent = count_tokens(&compressed);
392 let savings = protocol::format_savings(original_tokens, sent);
393 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path)
394 }
395 "entropy" => {
396 let result = entropy::entropy_compress_adaptive(content, file_path);
397 let avg_h = entropy::analyze_entropy(content).avg_entropy;
398 let header = build_header(file_ref, short, ext, content, line_count, false);
399 let techs = result.techniques.join(", ");
400 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
401 let sent = count_tokens(&output);
402 let savings = protocol::format_savings(original_tokens, sent);
403 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
404 }
405 "task" => {
406 let task_str = task.unwrap_or("");
407 if task_str.is_empty() {
408 let header = build_header(file_ref, short, ext, content, line_count, true);
409 return format!("{header}\n{content}\n[task mode: no task set — returned full]");
410 }
411 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
412 if keywords.is_empty() {
413 let header = build_header(file_ref, short, ext, content, line_count, true);
414 return format!(
415 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
416 );
417 }
418 let filtered =
419 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
420 let filtered_lines = filtered.lines().count();
421 let header = format!(
422 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
423 );
424 let sent = count_tokens(&filtered) + count_tokens(&header);
425 let savings = protocol::format_savings(original_tokens, sent);
426 append_compressed_hint(&format!("{header}\n{filtered}\n{savings}"), file_path)
427 }
428 "reference" => {
429 let tok = count_tokens(content);
430 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
431 let sent = count_tokens(&output);
432 let savings = protocol::format_savings(original_tokens, sent);
433 format!("{output}\n{savings}")
434 }
435 mode if mode.starts_with("lines:") => {
436 let range_str = &mode[6..];
437 let extracted = extract_line_range(content, range_str);
438 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
439 let sent = count_tokens(&extracted);
440 let savings = protocol::format_savings(original_tokens, sent);
441 format!("{header}\n{extracted}\n{savings}")
442 }
443 _ => {
444 let header = build_header(file_ref, short, ext, content, line_count, true);
445 format!("{header}\n{content}")
446 }
447 }
448}
449
450fn extract_line_range(content: &str, range_str: &str) -> String {
451 let lines: Vec<&str> = content.lines().collect();
452 let total = lines.len();
453 let mut selected = Vec::new();
454
455 for part in range_str.split(',') {
456 let part = part.trim();
457 if let Some((start_s, end_s)) = part.split_once('-') {
458 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
459 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
460 for i in start..=end {
461 if i >= 1 && i <= total {
462 selected.push(format!("{i:>4}| {}", lines[i - 1]));
463 }
464 }
465 } else if let Ok(n) = part.parse::<usize>() {
466 if n >= 1 && n <= total {
467 selected.push(format!("{n:>4}| {}", lines[n - 1]));
468 }
469 }
470 }
471
472 if selected.is_empty() {
473 "No lines matched the range.".to_string()
474 } else {
475 selected.join("\n")
476 }
477}
478
479fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> String {
480 let short = protocol::shorten_path(path);
481 let old_content = cache.get(path).map(|e| e.content.clone());
482
483 let new_content = match read_file_lossy(path) {
484 Ok(c) => c,
485 Err(e) => return format!("ERROR: {e}"),
486 };
487
488 let original_tokens = count_tokens(&new_content);
489
490 let diff_output = if let Some(old) = &old_content {
491 compressor::diff_content(old, &new_content)
492 } else {
493 format!("[first read]\n{new_content}")
494 };
495
496 cache.store(path, new_content);
497
498 let sent = count_tokens(&diff_output);
499 let savings = protocol::format_savings(original_tokens, sent);
500 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}")
501}
502
503#[cfg(test)]
504mod tests {
505 use super::*;
506
507 #[test]
508 fn test_header_toon_format_no_brackets() {
509 let content = "use std::io;\nfn main() {}\n";
510 let header = build_header("F1", "main.rs", "rs", content, 2, false);
511 assert!(!header.contains('['));
512 assert!(!header.contains(']'));
513 assert!(header.contains("F1=main.rs 2L"));
514 }
515
516 #[test]
517 fn test_header_toon_deps_indented() {
518 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
519 let header = build_header("F1", "main.rs", "rs", content, 3, true);
520 if header.contains("deps") {
521 assert!(
522 header.contains("\n deps "),
523 "deps should use indented TOON format"
524 );
525 assert!(
526 !header.contains("deps:["),
527 "deps should not use bracket format"
528 );
529 }
530 }
531
532 #[test]
533 fn test_header_toon_saves_tokens() {
534 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
535 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
536 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
537 let old_tokens = count_tokens(&old_header);
538 let new_tokens = count_tokens(&new_header);
539 assert!(
540 new_tokens <= old_tokens,
541 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
542 );
543 }
544
545 #[test]
546 fn test_tdd_symbols_are_compact() {
547 let symbols = [
548 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
549 ];
550 for sym in &symbols {
551 let tok = count_tokens(sym);
552 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
553 }
554 }
555
556 #[test]
557 fn test_task_mode_filters_content() {
558 let content = (0..200)
559 .map(|i| {
560 if i % 20 == 0 {
561 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
562 } else {
563 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
564 }
565 })
566 .collect::<Vec<_>>()
567 .join("\n");
568 let full_tokens = count_tokens(&content);
569 let task = Some("fix bug in validate_token");
570 let result = process_mode(
571 &content,
572 "task",
573 "F1",
574 "test.rs",
575 "rs",
576 full_tokens,
577 CrpMode::Off,
578 "test.rs",
579 task,
580 );
581 let result_tokens = count_tokens(&result);
582 assert!(
583 result_tokens < full_tokens,
584 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
585 );
586 assert!(
587 result.contains("task-filtered"),
588 "output should contain task-filtered marker"
589 );
590 }
591
592 #[test]
593 fn test_task_mode_without_task_returns_full() {
594 let content = "fn main() {}\nfn helper() {}\n";
595 let tokens = count_tokens(content);
596 let result = process_mode(
597 content,
598 "task",
599 "F1",
600 "test.rs",
601 "rs",
602 tokens,
603 CrpMode::Off,
604 "test.rs",
605 None,
606 );
607 assert!(
608 result.contains("no task set"),
609 "should indicate no task: {result}"
610 );
611 }
612
613 #[test]
614 fn test_reference_mode_one_line() {
615 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
616 let tokens = count_tokens(content);
617 let result = process_mode(
618 content,
619 "reference",
620 "F1",
621 "test.rs",
622 "rs",
623 tokens,
624 CrpMode::Off,
625 "test.rs",
626 None,
627 );
628 let lines: Vec<&str> = result.lines().collect();
629 assert!(
630 lines.len() <= 3,
631 "reference mode should be very compact, got {} lines",
632 lines.len()
633 );
634 assert!(result.contains("lines"), "should contain line count");
635 assert!(result.contains("tok"), "should contain token count");
636 }
637
638 #[test]
639 fn benchmark_task_conditioned_compression() {
640 let content = generate_benchmark_code(500);
641 let full_tokens = count_tokens(&content);
642 let task = Some("fix authentication in validate_token");
643
644 let full_output = process_mode(
645 &content,
646 "full",
647 "F1",
648 "server.rs",
649 "rs",
650 full_tokens,
651 CrpMode::Off,
652 "server.rs",
653 task,
654 );
655 let task_output = process_mode(
656 &content,
657 "task",
658 "F1",
659 "server.rs",
660 "rs",
661 full_tokens,
662 CrpMode::Off,
663 "server.rs",
664 task,
665 );
666 let sig_output = process_mode(
667 &content,
668 "signatures",
669 "F1",
670 "server.rs",
671 "rs",
672 full_tokens,
673 CrpMode::Off,
674 "server.rs",
675 task,
676 );
677 let ref_output = process_mode(
678 &content,
679 "reference",
680 "F1",
681 "server.rs",
682 "rs",
683 full_tokens,
684 CrpMode::Off,
685 "server.rs",
686 task,
687 );
688
689 let full_tok = count_tokens(&full_output);
690 let task_tok = count_tokens(&task_output);
691 let sig_tok = count_tokens(&sig_output);
692 let ref_tok = count_tokens(&ref_output);
693
694 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
695 eprintln!("Source: 500-line Rust file, task='fix authentication in validate_token'");
696 eprintln!(" full: {full_tok:>6} tokens (baseline)");
697 eprintln!(
698 " task: {task_tok:>6} tokens ({:.0}% savings)",
699 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
700 );
701 eprintln!(
702 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
703 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
704 );
705 eprintln!(
706 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
707 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
708 );
709 eprintln!("================================================\n");
710
711 assert!(task_tok < full_tok, "task mode should save tokens");
712 assert!(sig_tok < full_tok, "signatures should save tokens");
713 assert!(ref_tok < sig_tok, "reference should be most compact");
714 }
715
716 fn generate_benchmark_code(lines: usize) -> String {
717 let mut code = Vec::with_capacity(lines);
718 code.push("use std::collections::HashMap;".to_string());
719 code.push("use crate::core::auth;".to_string());
720 code.push(String::new());
721 code.push("pub struct Server {".to_string());
722 code.push(" config: Config,".to_string());
723 code.push(" cache: HashMap<String, String>,".to_string());
724 code.push("}".to_string());
725 code.push(String::new());
726 code.push("impl Server {".to_string());
727 code.push(
728 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
729 .to_string(),
730 );
731 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
732 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
733 code.push(" return Err(AuthError::Expired);".to_string());
734 code.push(" }".to_string());
735 code.push(" Ok(decoded.claims)".to_string());
736 code.push(" }".to_string());
737 code.push(String::new());
738
739 let remaining = lines.saturating_sub(code.len());
740 for i in 0..remaining {
741 if i % 30 == 0 {
742 code.push(format!(
743 " pub fn handler_{i}(&self, req: Request) -> Response {{"
744 ));
745 } else if i % 30 == 29 {
746 code.push(" }".to_string());
747 } else {
748 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
749 }
750 }
751 code.push("}".to_string());
752 code.join("\n")
753 }
754}