1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
14
15fn append_compressed_hint(output: &str, file_path: &str) -> String {
16 format!("{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\")")
17}
18
19pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
20 let cap = crate::core::limits::max_read_bytes();
21 if let Ok(meta) = std::fs::metadata(path) {
22 if meta.len() > cap as u64 {
23 return Err(std::io::Error::other(format!(
24 "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
25 meta.len(),
26 cap
27 )));
28 }
29 }
30 let bytes = std::fs::read(path)?;
31 match String::from_utf8(bytes) {
32 Ok(s) => Ok(s),
33 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
34 }
35}
36
37pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
38 handle_with_options(cache, path, mode, false, crp_mode, None)
39}
40
41pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
42 handle_with_options(cache, path, mode, true, crp_mode, None)
43}
44
45pub fn handle_with_task(
46 cache: &mut SessionCache,
47 path: &str,
48 mode: &str,
49 crp_mode: CrpMode,
50 task: Option<&str>,
51) -> String {
52 handle_with_options(cache, path, mode, false, crp_mode, task)
53}
54
55pub fn handle_with_task_resolved(
56 cache: &mut SessionCache,
57 path: &str,
58 mode: &str,
59 crp_mode: CrpMode,
60 task: Option<&str>,
61) -> (String, String) {
62 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
63}
64
65pub fn handle_fresh_with_task(
66 cache: &mut SessionCache,
67 path: &str,
68 mode: &str,
69 crp_mode: CrpMode,
70 task: Option<&str>,
71) -> String {
72 handle_with_options(cache, path, mode, true, crp_mode, task)
73}
74
75pub fn handle_fresh_with_task_resolved(
76 cache: &mut SessionCache,
77 path: &str,
78 mode: &str,
79 crp_mode: CrpMode,
80 task: Option<&str>,
81) -> (String, String) {
82 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
83}
84
85fn handle_with_options(
86 cache: &mut SessionCache,
87 path: &str,
88 mode: &str,
89 fresh: bool,
90 crp_mode: CrpMode,
91 task: Option<&str>,
92) -> String {
93 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).0
94}
95
96fn handle_with_options_resolved(
97 cache: &mut SessionCache,
98 path: &str,
99 mode: &str,
100 fresh: bool,
101 crp_mode: CrpMode,
102 task: Option<&str>,
103) -> (String, String) {
104 let file_ref = cache.get_file_ref(path);
105 let short = protocol::shorten_path(path);
106 let ext = Path::new(path)
107 .extension()
108 .and_then(|e| e.to_str())
109 .unwrap_or("");
110
111 if fresh {
112 cache.invalidate(path);
113 }
114
115 if mode == "diff" {
116 return (handle_diff(cache, path, &file_ref), "diff".to_string());
117 }
118
119 if let Some(existing) = cache.get(path) {
120 if mode == "full" {
121 return (
122 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext),
123 "full".to_string(),
124 );
125 }
126 let content = existing.content.clone();
127 let original_tokens = existing.original_tokens;
128 let resolved_mode = if mode == "auto" {
129 resolve_auto_mode(path, original_tokens, task)
130 } else {
131 mode.to_string()
132 };
133 let out = process_mode(
134 &content,
135 &resolved_mode,
136 &file_ref,
137 &short,
138 ext,
139 original_tokens,
140 crp_mode,
141 path,
142 task,
143 );
144 return (out, resolved_mode);
145 }
146
147 let content = match read_file_lossy(path) {
148 Ok(c) => c,
149 Err(e) => return (format!("ERROR: {e}"), "error".to_string()),
150 };
151
152 let similar_hint = find_semantic_similar(path, &content);
153
154 let store_result = cache.store(path, content.clone());
155
156 update_semantic_index(path, &content);
157
158 if mode == "full" {
159 let mut output = format_full_output(
160 &file_ref,
161 &short,
162 ext,
163 &content,
164 store_result.original_tokens,
165 store_result.line_count,
166 );
167 if let Some(hint) = similar_hint {
168 output.push_str(&format!("\n{hint}"));
169 }
170 return (output, "full".to_string());
171 }
172
173 let resolved_mode = if mode == "auto" {
174 resolve_auto_mode(path, store_result.original_tokens, task)
175 } else {
176 mode.to_string()
177 };
178
179 let mut output = process_mode(
180 &content,
181 &resolved_mode,
182 &file_ref,
183 &short,
184 ext,
185 store_result.original_tokens,
186 crp_mode,
187 path,
188 task,
189 );
190 if let Some(hint) = similar_hint {
191 output.push_str(&format!("\n{hint}"));
192 }
193 (output, resolved_mode)
194}
195
196fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
197 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
198 let predictor = crate::core::mode_predictor::ModePredictor::new();
199 let mut predicted = predictor
200 .predict_best_mode(&sig)
201 .unwrap_or_else(|| "full".to_string());
202 if predicted == "auto" {
203 predicted = "full".to_string();
204 }
205
206 if let Some(project_root) =
207 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
208 {
209 let ext = std::path::Path::new(file_path)
210 .extension()
211 .and_then(|e| e.to_str())
212 .unwrap_or("");
213 let bucket = match original_tokens {
214 0..=2000 => "sm",
215 2001..=10000 => "md",
216 10001..=50000 => "lg",
217 _ => "xl",
218 };
219 let bandit_key = format!("{ext}_{bucket}");
220 let mut store = crate::core::bandit::BanditStore::load(&project_root);
221 let bandit = store.get_or_create(&bandit_key);
222 let arm = bandit.select_arm();
223 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
224 predicted = "aggressive".to_string();
225 }
226 }
227
228 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
229 policy.choose_auto_mode(task, &predicted)
230}
231
232fn find_semantic_similar(path: &str, content: &str) -> Option<String> {
233 let project_root = detect_project_root(path);
234 let index = crate::core::semantic_cache::SemanticCacheIndex::load(&project_root)?;
235
236 let similar = index.find_similar(content, 0.7);
237 let relevant: Vec<_> = similar
238 .into_iter()
239 .filter(|(p, _)| p != path)
240 .take(3)
241 .collect();
242
243 if relevant.is_empty() {
244 return None;
245 }
246
247 let hints: Vec<String> = relevant
248 .iter()
249 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
250 .collect();
251
252 Some(format!(
253 "[semantic: {} similar file(s) in cache]\n{}",
254 relevant.len(),
255 hints.join("\n")
256 ))
257}
258
259fn update_semantic_index(path: &str, content: &str) {
260 let project_root = detect_project_root(path);
261 let session_id = format!("{}", std::process::id());
262 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
263 index.add_file(path, content, &session_id);
264 let _ = index.save(&project_root);
265}
266
267fn detect_project_root(path: &str) -> String {
268 crate::core::protocol::detect_project_root_or_cwd(path)
269}
270
271const AUTO_DELTA_THRESHOLD: f64 = 0.6;
272
273fn handle_full_with_auto_delta(
275 cache: &mut SessionCache,
276 path: &str,
277 file_ref: &str,
278 short: &str,
279 ext: &str,
280) -> String {
281 let disk_content = match read_file_lossy(path) {
282 Ok(c) => c,
283 Err(_) => {
284 cache.record_cache_hit(path);
285 let existing = cache.get(path).unwrap();
286 return format!(
287 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
288 existing.read_count, existing.line_count
289 );
290 }
291 };
292
293 let old_content = cache.get(path).unwrap().content.clone();
294 let store_result = cache.store(path, disk_content.clone());
295
296 if store_result.was_hit {
297 return format!(
298 "{file_ref}={short} cached {}t {}L\nFile already in context from previous read. Use fresh=true to re-read if content needed again.",
299 store_result.read_count, store_result.line_count
300 );
301 }
302
303 let diff = compressor::diff_content(&old_content, &disk_content);
304 let diff_tokens = count_tokens(&diff);
305 let full_tokens = store_result.original_tokens;
306
307 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
308 let savings = protocol::format_savings(full_tokens, diff_tokens);
309 return format!(
310 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
311 disk_content.lines().count()
312 );
313 }
314
315 format_full_output(
316 file_ref,
317 short,
318 ext,
319 &disk_content,
320 store_result.original_tokens,
321 store_result.line_count,
322 )
323}
324
325fn format_full_output(
326 file_ref: &str,
327 short: &str,
328 ext: &str,
329 content: &str,
330 original_tokens: usize,
331 line_count: usize,
332) -> String {
333 let tokens = original_tokens;
334 let metadata = build_header(file_ref, short, ext, content, line_count, true);
335
336 let mut sym = SymbolMap::new();
337 let idents = symbol_map::extract_identifiers(content, ext);
338 for ident in &idents {
339 sym.register(ident);
340 }
341
342 let sym_beneficial = if sym.len() >= 3 {
343 let sym_table = sym.format_table();
344 let compressed = sym.apply(content);
345 let original_tok = count_tokens(content);
346 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
347 let net_saving = original_tok.saturating_sub(compressed_tok);
348 original_tok > 0 && net_saving * 100 / original_tok >= 5
349 } else {
350 false
351 };
352
353 if sym_beneficial {
354 let compressed_content = sym.apply(content);
355 let sym_table = sym.format_table();
356 let output = format!("{compressed_content}{sym_table}\n{metadata}");
357 let sent = count_tokens(&output);
358 let savings = protocol::format_savings(tokens, sent);
359 return format!("{output}\n{savings}");
360 }
361
362 let output = format!("{content}\n{metadata}");
363 let sent = count_tokens(&output);
364 let savings = protocol::format_savings(tokens, sent);
365 format!("{output}\n{savings}")
366}
367
368fn build_header(
369 file_ref: &str,
370 short: &str,
371 ext: &str,
372 content: &str,
373 line_count: usize,
374 include_deps: bool,
375) -> String {
376 let mut header = format!("{file_ref}={short} {line_count}L");
377
378 if include_deps {
379 let dep_info = deps::extract_deps(content, ext);
380 if !dep_info.imports.is_empty() {
381 let imports_str: Vec<&str> = dep_info
382 .imports
383 .iter()
384 .take(8)
385 .map(|s| s.as_str())
386 .collect();
387 header.push_str(&format!("\n deps {}", imports_str.join(",")));
388 }
389 if !dep_info.exports.is_empty() {
390 let exports_str: Vec<&str> = dep_info
391 .exports
392 .iter()
393 .take(8)
394 .map(|s| s.as_str())
395 .collect();
396 header.push_str(&format!("\n exports {}", exports_str.join(",")));
397 }
398 }
399
400 header
401}
402
403#[allow(clippy::too_many_arguments)]
404fn process_mode(
405 content: &str,
406 mode: &str,
407 file_ref: &str,
408 short: &str,
409 ext: &str,
410 original_tokens: usize,
411 crp_mode: CrpMode,
412 file_path: &str,
413 task: Option<&str>,
414) -> String {
415 let line_count = content.lines().count();
416
417 match mode {
418 "auto" => {
419 let chosen = resolve_auto_mode(file_path, original_tokens, task);
420 process_mode(
421 content,
422 &chosen,
423 file_ref,
424 short,
425 ext,
426 original_tokens,
427 crp_mode,
428 file_path,
429 task,
430 )
431 }
432 "signatures" => {
433 let sigs = signatures::extract_signatures(content, ext);
434 let dep_info = deps::extract_deps(content, ext);
435
436 let mut output = format!("{file_ref}={short} {line_count}L");
437 if !dep_info.imports.is_empty() {
438 let imports_str: Vec<&str> = dep_info
439 .imports
440 .iter()
441 .take(8)
442 .map(|s| s.as_str())
443 .collect();
444 output.push_str(&format!("\n deps {}", imports_str.join(",")));
445 }
446 for sig in &sigs {
447 output.push('\n');
448 if crp_mode.is_tdd() {
449 output.push_str(&sig.to_tdd());
450 } else {
451 output.push_str(&sig.to_compact());
452 }
453 }
454 let sent = count_tokens(&output);
455 let savings = protocol::format_savings(original_tokens, sent);
456 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
457 }
458 "map" => {
459 if ext == "php" {
460 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
461 {
462 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
463 let sent = count_tokens(&output);
464 let savings = protocol::format_savings(original_tokens, sent);
465 output.push('\n');
466 output.push_str(&savings);
467 return append_compressed_hint(&output, file_path);
468 }
469 }
470
471 let sigs = signatures::extract_signatures(content, ext);
472 let dep_info = deps::extract_deps(content, ext);
473
474 let mut output = format!("{file_ref}={short} {line_count}L");
475
476 if !dep_info.imports.is_empty() {
477 output.push_str("\n deps: ");
478 output.push_str(&dep_info.imports.join(", "));
479 }
480
481 if !dep_info.exports.is_empty() {
482 output.push_str("\n exports: ");
483 output.push_str(&dep_info.exports.join(", "));
484 }
485
486 let key_sigs: Vec<&signatures::Signature> = sigs
487 .iter()
488 .filter(|s| s.is_exported || s.indent == 0)
489 .collect();
490
491 if !key_sigs.is_empty() {
492 output.push_str("\n API:");
493 for sig in &key_sigs {
494 output.push_str("\n ");
495 if crp_mode.is_tdd() {
496 output.push_str(&sig.to_tdd());
497 } else {
498 output.push_str(&sig.to_compact());
499 }
500 }
501 }
502
503 let sent = count_tokens(&output);
504 let savings = protocol::format_savings(original_tokens, sent);
505 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
506 }
507 "aggressive" => {
508 #[cfg(feature = "tree-sitter")]
509 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
510 #[cfg(not(feature = "tree-sitter"))]
511 let ast_pruned: Option<String> = None;
512
513 let base = ast_pruned.as_deref().unwrap_or(content);
514
515 let session_intent = crate::core::session::SessionState::load_latest()
516 .and_then(|s| s.active_structured_intent);
517 let raw = if let Some(ref intent) = session_intent {
518 compressor::task_aware_compress(base, Some(ext), intent)
519 } else {
520 compressor::aggressive_compress(base, Some(ext))
521 };
522 let compressed = compressor::safeguard_ratio(content, &raw);
523 let header = build_header(file_ref, short, ext, content, line_count, true);
524
525 let mut sym = SymbolMap::new();
526 let idents = symbol_map::extract_identifiers(&compressed, ext);
527 for ident in &idents {
528 sym.register(ident);
529 }
530
531 let sym_beneficial = if sym.len() >= 3 {
532 let sym_table = sym.format_table();
533 let sym_applied = sym.apply(&compressed);
534 let orig_tok = count_tokens(&compressed);
535 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
536 let net = orig_tok.saturating_sub(comp_tok);
537 orig_tok > 0 && net * 100 / orig_tok >= 5
538 } else {
539 false
540 };
541
542 if sym_beneficial {
543 let sym_output = sym.apply(&compressed);
544 let sym_table = sym.format_table();
545 let sent = count_tokens(&sym_output) + count_tokens(&sym_table);
546 let savings = protocol::format_savings(original_tokens, sent);
547 return append_compressed_hint(
548 &format!("{header}\n{sym_output}{sym_table}\n{savings}"),
549 file_path,
550 );
551 }
552
553 let sent = count_tokens(&compressed);
554 let savings = protocol::format_savings(original_tokens, sent);
555 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path)
556 }
557 "entropy" => {
558 let result = entropy::entropy_compress_adaptive(content, file_path);
559 let avg_h = entropy::analyze_entropy(content).avg_entropy;
560 let header = build_header(file_ref, short, ext, content, line_count, false);
561 let techs = result.techniques.join(", ");
562 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
563 let sent = count_tokens(&output);
564 let savings = protocol::format_savings(original_tokens, sent);
565 let compression_ratio = if original_tokens > 0 {
566 1.0 - (sent as f64 / original_tokens as f64)
567 } else {
568 0.0
569 };
570 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
571 append_compressed_hint(&format!("{output}\n{savings}"), file_path)
572 }
573 "task" => {
574 let task_str = task.unwrap_or("");
575 if task_str.is_empty() {
576 let header = build_header(file_ref, short, ext, content, line_count, true);
577 return format!("{header}\n{content}\n[task mode: no task set — returned full]");
578 }
579 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
580 if keywords.is_empty() {
581 let header = build_header(file_ref, short, ext, content, line_count, true);
582 return format!(
583 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
584 );
585 }
586 let filtered =
587 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
588 let filtered_lines = filtered.lines().count();
589 let header = format!(
590 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
591 );
592 let project_root = detect_project_root(file_path);
593 let graph_ctx = crate::core::graph_context::build_graph_context(
594 file_path,
595 &project_root,
596 Some(crate::core::graph_context::GraphContextOptions::default()),
597 )
598 .map(|c| crate::core::graph_context::format_graph_context(&c))
599 .unwrap_or_default();
600
601 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
602 let savings = protocol::format_savings(original_tokens, sent);
603 append_compressed_hint(
604 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
605 file_path,
606 )
607 }
608 "reference" => {
609 let tok = count_tokens(content);
610 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
611 let sent = count_tokens(&output);
612 let savings = protocol::format_savings(original_tokens, sent);
613 format!("{output}\n{savings}")
614 }
615 mode if mode.starts_with("lines:") => {
616 let range_str = &mode[6..];
617 let extracted = extract_line_range(content, range_str);
618 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
619 let sent = count_tokens(&extracted);
620 let savings = protocol::format_savings(original_tokens, sent);
621 format!("{header}\n{extracted}\n{savings}")
622 }
623 unknown => {
624 let header = build_header(file_ref, short, ext, content, line_count, true);
625 format!(
626 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
627 )
628 }
629 }
630}
631
632fn extract_line_range(content: &str, range_str: &str) -> String {
633 let lines: Vec<&str> = content.lines().collect();
634 let total = lines.len();
635 let mut selected = Vec::new();
636
637 for part in range_str.split(',') {
638 let part = part.trim();
639 if let Some((start_s, end_s)) = part.split_once('-') {
640 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
641 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
642 for i in start..=end {
643 if i >= 1 && i <= total {
644 selected.push(format!("{i:>4}| {}", lines[i - 1]));
645 }
646 }
647 } else if let Ok(n) = part.parse::<usize>() {
648 if n >= 1 && n <= total {
649 selected.push(format!("{n:>4}| {}", lines[n - 1]));
650 }
651 }
652 }
653
654 if selected.is_empty() {
655 "No lines matched the range.".to_string()
656 } else {
657 selected.join("\n")
658 }
659}
660
661fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> String {
662 let short = protocol::shorten_path(path);
663 let old_content = cache.get(path).map(|e| e.content.clone());
664
665 let new_content = match read_file_lossy(path) {
666 Ok(c) => c,
667 Err(e) => return format!("ERROR: {e}"),
668 };
669
670 let original_tokens = count_tokens(&new_content);
671
672 let diff_output = if let Some(old) = &old_content {
673 compressor::diff_content(old, &new_content)
674 } else {
675 format!("[first read]\n{new_content}")
676 };
677
678 cache.store(path, new_content);
679
680 let sent = count_tokens(&diff_output);
681 let savings = protocol::format_savings(original_tokens, sent);
682 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}")
683}
684
685#[cfg(test)]
686mod tests {
687 use super::*;
688
689 #[test]
690 fn test_header_toon_format_no_brackets() {
691 let content = "use std::io;\nfn main() {}\n";
692 let header = build_header("F1", "main.rs", "rs", content, 2, false);
693 assert!(!header.contains('['));
694 assert!(!header.contains(']'));
695 assert!(header.contains("F1=main.rs 2L"));
696 }
697
698 #[test]
699 fn test_header_toon_deps_indented() {
700 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
701 let header = build_header("F1", "main.rs", "rs", content, 3, true);
702 if header.contains("deps") {
703 assert!(
704 header.contains("\n deps "),
705 "deps should use indented TOON format"
706 );
707 assert!(
708 !header.contains("deps:["),
709 "deps should not use bracket format"
710 );
711 }
712 }
713
714 #[test]
715 fn test_header_toon_saves_tokens() {
716 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
717 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
718 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
719 let old_tokens = count_tokens(&old_header);
720 let new_tokens = count_tokens(&new_header);
721 assert!(
722 new_tokens <= old_tokens,
723 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
724 );
725 }
726
727 #[test]
728 fn test_tdd_symbols_are_compact() {
729 let symbols = [
730 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
731 ];
732 for sym in &symbols {
733 let tok = count_tokens(sym);
734 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
735 }
736 }
737
738 #[test]
739 fn test_task_mode_filters_content() {
740 let content = (0..200)
741 .map(|i| {
742 if i % 20 == 0 {
743 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
744 } else {
745 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
746 }
747 })
748 .collect::<Vec<_>>()
749 .join("\n");
750 let full_tokens = count_tokens(&content);
751 let task = Some("fix bug in validate_token");
752 let result = process_mode(
753 &content,
754 "task",
755 "F1",
756 "test.rs",
757 "rs",
758 full_tokens,
759 CrpMode::Off,
760 "test.rs",
761 task,
762 );
763 let result_tokens = count_tokens(&result);
764 assert!(
765 result_tokens < full_tokens,
766 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
767 );
768 assert!(
769 result.contains("task-filtered"),
770 "output should contain task-filtered marker"
771 );
772 }
773
774 #[test]
775 fn test_task_mode_without_task_returns_full() {
776 let content = "fn main() {}\nfn helper() {}\n";
777 let tokens = count_tokens(content);
778 let result = process_mode(
779 content,
780 "task",
781 "F1",
782 "test.rs",
783 "rs",
784 tokens,
785 CrpMode::Off,
786 "test.rs",
787 None,
788 );
789 assert!(
790 result.contains("no task set"),
791 "should indicate no task: {result}"
792 );
793 }
794
795 #[test]
796 fn test_reference_mode_one_line() {
797 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
798 let tokens = count_tokens(content);
799 let result = process_mode(
800 content,
801 "reference",
802 "F1",
803 "test.rs",
804 "rs",
805 tokens,
806 CrpMode::Off,
807 "test.rs",
808 None,
809 );
810 let lines: Vec<&str> = result.lines().collect();
811 assert!(
812 lines.len() <= 3,
813 "reference mode should be very compact, got {} lines",
814 lines.len()
815 );
816 assert!(result.contains("lines"), "should contain line count");
817 assert!(result.contains("tok"), "should contain token count");
818 }
819
820 #[test]
821 fn benchmark_task_conditioned_compression() {
822 let content = generate_benchmark_code(500);
823 let full_tokens = count_tokens(&content);
824 let task = Some("fix authentication in validate_token");
825
826 let full_output = process_mode(
827 &content,
828 "full",
829 "F1",
830 "server.rs",
831 "rs",
832 full_tokens,
833 CrpMode::Off,
834 "server.rs",
835 task,
836 );
837 let task_output = process_mode(
838 &content,
839 "task",
840 "F1",
841 "server.rs",
842 "rs",
843 full_tokens,
844 CrpMode::Off,
845 "server.rs",
846 task,
847 );
848 let sig_output = process_mode(
849 &content,
850 "signatures",
851 "F1",
852 "server.rs",
853 "rs",
854 full_tokens,
855 CrpMode::Off,
856 "server.rs",
857 task,
858 );
859 let ref_output = process_mode(
860 &content,
861 "reference",
862 "F1",
863 "server.rs",
864 "rs",
865 full_tokens,
866 CrpMode::Off,
867 "server.rs",
868 task,
869 );
870
871 let full_tok = count_tokens(&full_output);
872 let task_tok = count_tokens(&task_output);
873 let sig_tok = count_tokens(&sig_output);
874 let ref_tok = count_tokens(&ref_output);
875
876 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
877 eprintln!("Source: 500-line Rust file, task='fix authentication in validate_token'");
878 eprintln!(" full: {full_tok:>6} tokens (baseline)");
879 eprintln!(
880 " task: {task_tok:>6} tokens ({:.0}% savings)",
881 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
882 );
883 eprintln!(
884 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
885 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
886 );
887 eprintln!(
888 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
889 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
890 );
891 eprintln!("================================================\n");
892
893 assert!(task_tok < full_tok, "task mode should save tokens");
894 assert!(sig_tok < full_tok, "signatures should save tokens");
895 assert!(ref_tok < sig_tok, "reference should be most compact");
896 }
897
898 fn generate_benchmark_code(lines: usize) -> String {
899 let mut code = Vec::with_capacity(lines);
900 code.push("use std::collections::HashMap;".to_string());
901 code.push("use crate::core::auth;".to_string());
902 code.push(String::new());
903 code.push("pub struct Server {".to_string());
904 code.push(" config: Config,".to_string());
905 code.push(" cache: HashMap<String, String>,".to_string());
906 code.push("}".to_string());
907 code.push(String::new());
908 code.push("impl Server {".to_string());
909 code.push(
910 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
911 .to_string(),
912 );
913 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
914 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
915 code.push(" return Err(AuthError::Expired);".to_string());
916 code.push(" }".to_string());
917 code.push(" Ok(decoded.claims)".to_string());
918 code.push(" }".to_string());
919 code.push(String::new());
920
921 let remaining = lines.saturating_sub(code.len());
922 for i in 0..remaining {
923 if i % 30 == 0 {
924 code.push(format!(
925 " pub fn handler_{i}(&self, req: Request) -> Response {{"
926 ));
927 } else if i % 30 == 29 {
928 code.push(" }".to_string());
929 } else {
930 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
931 }
932 }
933 code.push("}".to_string());
934 code.join("\n")
935 }
936}