1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32 if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40 format!("{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\")")
41}
42
43pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
45 let cap = crate::core::limits::max_read_bytes();
46 if let Ok(meta) = std::fs::metadata(path) {
47 if meta.len() > cap as u64 {
48 return Err(std::io::Error::other(format!(
49 "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
50 meta.len(),
51 cap
52 )));
53 }
54 }
55 let bytes = std::fs::read(path)?;
56 match String::from_utf8(bytes) {
57 Ok(s) => Ok(s),
58 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
59 }
60}
61
62pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
64 handle_with_options(cache, path, mode, false, crp_mode, None)
65}
66
67pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
69 handle_with_options(cache, path, mode, true, crp_mode, None)
70}
71
72pub fn handle_with_task(
74 cache: &mut SessionCache,
75 path: &str,
76 mode: &str,
77 crp_mode: CrpMode,
78 task: Option<&str>,
79) -> String {
80 handle_with_options(cache, path, mode, false, crp_mode, task)
81}
82
83pub fn handle_with_task_resolved(
85 cache: &mut SessionCache,
86 path: &str,
87 mode: &str,
88 crp_mode: CrpMode,
89 task: Option<&str>,
90) -> ReadOutput {
91 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
92}
93
94pub fn handle_fresh_with_task(
96 cache: &mut SessionCache,
97 path: &str,
98 mode: &str,
99 crp_mode: CrpMode,
100 task: Option<&str>,
101) -> String {
102 handle_with_options(cache, path, mode, true, crp_mode, task)
103}
104
105pub fn handle_fresh_with_task_resolved(
107 cache: &mut SessionCache,
108 path: &str,
109 mode: &str,
110 crp_mode: CrpMode,
111 task: Option<&str>,
112) -> ReadOutput {
113 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
114}
115
116fn handle_with_options(
117 cache: &mut SessionCache,
118 path: &str,
119 mode: &str,
120 fresh: bool,
121 crp_mode: CrpMode,
122 task: Option<&str>,
123) -> String {
124 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
125}
126
127fn handle_with_options_resolved(
128 cache: &mut SessionCache,
129 path: &str,
130 mode: &str,
131 fresh: bool,
132 crp_mode: CrpMode,
133 task: Option<&str>,
134) -> ReadOutput {
135 let file_ref = cache.get_file_ref(path);
136 let short = protocol::shorten_path(path);
137 let ext = Path::new(path)
138 .extension()
139 .and_then(|e| e.to_str())
140 .unwrap_or("");
141
142 if fresh {
143 cache.invalidate(path);
144 }
145
146 if mode == "diff" {
147 let (out, sent) = handle_diff(cache, path, &file_ref);
148 return ReadOutput {
149 content: out,
150 resolved_mode: "diff".into(),
151 output_tokens: sent,
152 };
153 }
154
155 if mode != "full" {
156 if let Some(existing) = cache.get(path) {
157 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
158 if stale {
159 cache.invalidate(path);
160 }
161 }
162 }
163
164 if let Some(existing) = cache.get(path) {
165 if mode == "full" {
166 let (out, sent) =
167 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
168 let out = crate::core::redaction::redact_text_if_enabled(&out);
169 return ReadOutput {
170 content: out,
171 resolved_mode: "full".into(),
172 output_tokens: sent,
173 };
174 }
175 let content = existing.content.clone();
176 let original_tokens = existing.original_tokens;
177 let resolved_mode = if mode == "auto" {
178 resolve_auto_mode(path, original_tokens, task)
179 } else {
180 mode.to_string()
181 };
182 if is_cacheable_mode(&resolved_mode) {
183 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
184 if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
185 let sent = count_tokens(cached_output);
186 let out = crate::core::redaction::redact_text_if_enabled(cached_output);
187 return ReadOutput {
188 content: out,
189 resolved_mode,
190 output_tokens: sent,
191 };
192 }
193 }
194 let (out, sent) = process_mode(
195 &content,
196 &resolved_mode,
197 &file_ref,
198 &short,
199 ext,
200 original_tokens,
201 crp_mode,
202 path,
203 task,
204 );
205 if is_cacheable_mode(&resolved_mode) {
206 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
207 cache.set_compressed(path, &cache_key, out.clone());
208 }
209 let out = crate::core::redaction::redact_text_if_enabled(&out);
210 return ReadOutput {
211 content: out,
212 resolved_mode,
213 output_tokens: sent,
214 };
215 }
216
217 let content = match read_file_lossy(path) {
218 Ok(c) => c,
219 Err(e) => {
220 let msg = format!("ERROR: {e}");
221 let tokens = count_tokens(&msg);
222 return ReadOutput {
223 content: msg,
224 resolved_mode: "error".into(),
225 output_tokens: tokens,
226 };
227 }
228 };
229
230 let similar_hint = find_similar_and_update_semantic_index(path, &content);
231 let graph_hint = build_graph_related_hint(path);
232
233 let store_result = cache.store(path, content.clone());
234
235 if mode == "full" {
236 cache.mark_full_delivered(path);
237 let (mut output, sent) = format_full_output(
238 &file_ref,
239 &short,
240 ext,
241 &content,
242 store_result.original_tokens,
243 store_result.line_count,
244 task,
245 );
246 if let Some(hint) = &graph_hint {
247 output.push_str(&format!("\n{hint}"));
248 }
249 if let Some(hint) = similar_hint {
250 output.push_str(&format!("\n{hint}"));
251 }
252 let output = crate::core::redaction::redact_text_if_enabled(&output);
253 return ReadOutput {
254 content: output,
255 resolved_mode: "full".into(),
256 output_tokens: sent,
257 };
258 }
259
260 let resolved_mode = if mode == "auto" {
261 resolve_auto_mode(path, store_result.original_tokens, task)
262 } else {
263 mode.to_string()
264 };
265
266 let (mut output, _sent) = process_mode(
267 &content,
268 &resolved_mode,
269 &file_ref,
270 &short,
271 ext,
272 store_result.original_tokens,
273 crp_mode,
274 path,
275 task,
276 );
277 if is_cacheable_mode(&resolved_mode) {
278 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
279 cache.set_compressed(path, &cache_key, output.clone());
280 }
281 if let Some(hint) = &graph_hint {
282 output.push_str(&format!("\n{hint}"));
283 }
284 if let Some(hint) = similar_hint {
285 output.push_str(&format!("\n{hint}"));
286 }
287 let output = crate::core::redaction::redact_text_if_enabled(&output);
288 let final_tokens = count_tokens(&output);
289 ReadOutput {
290 content: output,
291 resolved_mode,
292 output_tokens: final_tokens,
293 }
294}
295
296pub fn is_instruction_file(path: &str) -> bool {
297 let lower = path.to_lowercase();
298 let filename = std::path::Path::new(&lower)
299 .file_name()
300 .and_then(|f| f.to_str())
301 .unwrap_or("");
302
303 matches!(
304 filename,
305 "skill.md"
306 | "agents.md"
307 | "rules.md"
308 | ".cursorrules"
309 | ".clinerules"
310 | "lean-ctx.md"
311 | "lean-ctx.mdc"
312 ) || lower.contains("/skills/")
313 || lower.contains("/.cursor/rules/")
314 || lower.contains("/.claude/rules/")
315 || lower.contains("/agents.md")
316}
317
318fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
319 if is_instruction_file(file_path) {
320 return "full".to_string();
321 }
322
323 let intent_query = task.unwrap_or("read");
326 let route = crate::core::intent_router::route_v1(intent_query);
327 let intent_mode = &route.decision.effective_read_mode;
328 if intent_mode != "auto" && intent_mode != "reference" {
329 return intent_mode.clone();
330 }
331
332 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
334 let predictor = crate::core::mode_predictor::ModePredictor::new();
335 let mut predicted = predictor
336 .predict_best_mode(&sig)
337 .unwrap_or_else(|| "full".to_string());
338 if predicted == "auto" {
339 predicted = "full".to_string();
340 }
341
342 if let Some(project_root) =
344 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
345 {
346 let ext = std::path::Path::new(file_path)
347 .extension()
348 .and_then(|e| e.to_str())
349 .unwrap_or("");
350 let bucket = match original_tokens {
351 0..=2000 => "sm",
352 2001..=10000 => "md",
353 10001..=50000 => "lg",
354 _ => "xl",
355 };
356 let bandit_key = format!("{ext}_{bucket}");
357 let mut store = crate::core::bandit::BanditStore::load(&project_root);
358 let bandit = store.get_or_create(&bandit_key);
359 let arm = bandit.select_arm();
360 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
361 predicted = "aggressive".to_string();
362 }
363 }
364
365 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
367 let chosen = policy.choose_auto_mode(task, &predicted);
368
369 if original_tokens > 2000 {
370 if predicted == "map" || predicted == "signatures" {
371 if chosen != "map" && chosen != "signatures" {
372 return predicted;
373 }
374 } else if chosen == "full" && predicted != "full" {
375 return predicted;
376 }
377 }
378
379 chosen
380}
381
382fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
383 let cfg = crate::core::config::Config::load();
384 let profile = crate::core::config::MemoryProfile::effective(&cfg);
385 if !profile.semantic_cache_enabled() {
386 return None;
387 }
388
389 let project_root = detect_project_root(path);
390 let session_id = format!("{}", std::process::id());
391 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
392
393 let similar = index.find_similar(content, 0.7);
394 let relevant: Vec<_> = similar
395 .into_iter()
396 .filter(|(p, _)| p != path)
397 .take(3)
398 .collect();
399
400 index.add_file(path, content, &session_id);
401 let _ = index.save(&project_root);
402
403 if relevant.is_empty() {
404 return None;
405 }
406
407 let hints: Vec<String> = relevant
408 .iter()
409 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
410 .collect();
411
412 Some(format!(
413 "[semantic: {} similar file(s) in cache]\n{}",
414 relevant.len(),
415 hints.join("\n")
416 ))
417}
418
419fn detect_project_root(path: &str) -> String {
420 crate::core::protocol::detect_project_root_or_cwd(path)
421}
422
423fn build_graph_related_hint(path: &str) -> Option<String> {
424 let project_root = detect_project_root(path);
425 crate::core::graph_context::build_related_hint(path, &project_root, 5)
426}
427
428const AUTO_DELTA_THRESHOLD: f64 = 0.6;
429
430fn handle_full_with_auto_delta(
432 cache: &mut SessionCache,
433 path: &str,
434 file_ref: &str,
435 short: &str,
436 ext: &str,
437 task: Option<&str>,
438) -> (String, usize) {
439 let Ok(disk_content) = read_file_lossy(path) else {
440 cache.record_cache_hit(path);
441 let out = if let Some(existing) = cache.get(path) {
442 format!(
443 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
444 existing.read_count, existing.line_count
445 )
446 } else {
447 format!("[file read failed and no cached version available] {file_ref}={short}")
448 };
449 let sent = count_tokens(&out);
450 return (out, sent);
451 };
452
453 let old_content = cache
454 .get(path)
455 .map(|e| e.content.clone())
456 .unwrap_or_default();
457 let store_result = cache.store(path, disk_content.clone());
458
459 if store_result.was_hit {
460 if store_result.full_content_delivered {
461 let out = format!(
462 "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
463 store_result.read_count, store_result.line_count
464 );
465 let sent = count_tokens(&out);
466 return (out, sent);
467 }
468 cache.mark_full_delivered(path);
469 return format_full_output(
470 file_ref,
471 short,
472 ext,
473 &disk_content,
474 store_result.original_tokens,
475 store_result.line_count,
476 task,
477 );
478 }
479
480 let diff = compressor::diff_content(&old_content, &disk_content);
481 let diff_tokens = count_tokens(&diff);
482 let full_tokens = store_result.original_tokens;
483
484 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
485 let savings = protocol::format_savings(full_tokens, diff_tokens);
486 let out = format!(
487 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
488 disk_content.lines().count()
489 );
490 return (out, diff_tokens);
491 }
492
493 format_full_output(
494 file_ref,
495 short,
496 ext,
497 &disk_content,
498 store_result.original_tokens,
499 store_result.line_count,
500 task,
501 )
502}
503
504fn format_full_output(
505 file_ref: &str,
506 short: &str,
507 ext: &str,
508 content: &str,
509 original_tokens: usize,
510 line_count: usize,
511 task: Option<&str>,
512) -> (String, usize) {
513 let tokens = original_tokens;
514 let metadata = build_header(file_ref, short, ext, content, line_count, true);
515
516 let mut reordered: Option<String> = None;
517 {
518 let profile = crate::core::profiles::active_profile();
519 let cfg = profile.layout;
520 if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
521 let task_str = task.unwrap_or("");
522 if !task_str.is_empty() {
523 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
524 let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
525 content, &keywords, &cfg,
526 );
527 if !r.skipped && r.changed {
528 reordered = Some(r.output);
529 }
530 }
531 }
532 }
533
534 let content_for_output = reordered.as_deref().unwrap_or(content);
535
536 let mut sym = SymbolMap::new();
537 let idents = symbol_map::extract_identifiers(content_for_output, ext);
538 for ident in &idents {
539 sym.register(ident);
540 }
541
542 if sym.len() >= 3 {
543 let sym_table = sym.format_table();
544 let compressed = sym.apply(content_for_output);
545 let original_tok = count_tokens(content_for_output);
546 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
547 let net_saving = original_tok.saturating_sub(compressed_tok);
548 if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
549 let output = format!("{metadata}\n{compressed}{sym_table}");
550 let sent = count_tokens(&output);
551 let savings = protocol::format_savings(tokens, sent);
552 return (format!("{output}\n{savings}"), sent);
553 }
554 }
555
556 let output = format!("{metadata}\n{content_for_output}");
557 let sent = count_tokens(&output);
558 let savings = protocol::format_savings(tokens, sent);
559 (format!("{output}\n{savings}"), sent)
560}
561
562fn build_header(
563 file_ref: &str,
564 short: &str,
565 ext: &str,
566 content: &str,
567 line_count: usize,
568 include_deps: bool,
569) -> String {
570 let mut header = format!("{file_ref}={short} {line_count}L");
571
572 if include_deps {
573 let dep_info = deps::extract_deps(content, ext);
574 if !dep_info.imports.is_empty() {
575 let imports_str: Vec<&str> = dep_info
576 .imports
577 .iter()
578 .take(8)
579 .map(std::string::String::as_str)
580 .collect();
581 header.push_str(&format!("\n deps {}", imports_str.join(",")));
582 }
583 if !dep_info.exports.is_empty() {
584 let exports_str: Vec<&str> = dep_info
585 .exports
586 .iter()
587 .take(8)
588 .map(std::string::String::as_str)
589 .collect();
590 header.push_str(&format!("\n exports {}", exports_str.join(",")));
591 }
592 }
593
594 header
595}
596
597#[allow(clippy::too_many_arguments)]
598fn process_mode(
599 content: &str,
600 mode: &str,
601 file_ref: &str,
602 short: &str,
603 ext: &str,
604 original_tokens: usize,
605 crp_mode: CrpMode,
606 file_path: &str,
607 task: Option<&str>,
608) -> (String, usize) {
609 let line_count = content.lines().count();
610
611 match mode {
612 "auto" => {
613 let chosen = resolve_auto_mode(file_path, original_tokens, task);
614 process_mode(
615 content,
616 &chosen,
617 file_ref,
618 short,
619 ext,
620 original_tokens,
621 crp_mode,
622 file_path,
623 task,
624 )
625 }
626 "full" => format_full_output(
627 file_ref,
628 short,
629 ext,
630 content,
631 original_tokens,
632 line_count,
633 task,
634 ),
635 "signatures" => {
636 let sigs = signatures::extract_signatures(content, ext);
637 let dep_info = deps::extract_deps(content, ext);
638
639 let mut output = format!("{file_ref}={short} {line_count}L");
640 if !dep_info.imports.is_empty() {
641 let imports_str: Vec<&str> = dep_info
642 .imports
643 .iter()
644 .take(8)
645 .map(std::string::String::as_str)
646 .collect();
647 output.push_str(&format!("\n deps {}", imports_str.join(",")));
648 }
649 for sig in &sigs {
650 output.push('\n');
651 if crp_mode.is_tdd() {
652 output.push_str(&sig.to_tdd());
653 } else {
654 output.push_str(&sig.to_compact());
655 }
656 }
657 let sent = count_tokens(&output);
658 let savings = protocol::format_savings(original_tokens, sent);
659 (
660 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
661 sent,
662 )
663 }
664 "map" => {
665 if ext == "php" {
666 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
667 {
668 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
669 let sent = count_tokens(&output);
670 let savings = protocol::format_savings(original_tokens, sent);
671 output.push('\n');
672 output.push_str(&savings);
673 return (append_compressed_hint(&output, file_path), sent);
674 }
675 }
676
677 let sigs = signatures::extract_signatures(content, ext);
678 let dep_info = deps::extract_deps(content, ext);
679
680 let mut output = format!("{file_ref}={short} {line_count}L");
681
682 if !dep_info.imports.is_empty() {
683 output.push_str("\n deps: ");
684 output.push_str(&dep_info.imports.join(", "));
685 }
686
687 if !dep_info.exports.is_empty() {
688 output.push_str("\n exports: ");
689 output.push_str(&dep_info.exports.join(", "));
690 }
691
692 let key_sigs: Vec<&signatures::Signature> = sigs
693 .iter()
694 .filter(|s| s.is_exported || s.indent == 0)
695 .collect();
696
697 if !key_sigs.is_empty() {
698 output.push_str("\n API:");
699 for sig in &key_sigs {
700 output.push_str("\n ");
701 if crp_mode.is_tdd() {
702 output.push_str(&sig.to_tdd());
703 } else {
704 output.push_str(&sig.to_compact());
705 }
706 }
707 }
708
709 let sent = count_tokens(&output);
710 let savings = protocol::format_savings(original_tokens, sent);
711 (
712 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
713 sent,
714 )
715 }
716 "aggressive" => {
717 #[cfg(feature = "tree-sitter")]
718 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
719 #[cfg(not(feature = "tree-sitter"))]
720 let ast_pruned: Option<String> = None;
721
722 let base = ast_pruned.as_deref().unwrap_or(content);
723
724 let session_intent = crate::core::session::SessionState::load_latest()
725 .and_then(|s| s.active_structured_intent);
726 let raw = if let Some(ref intent) = session_intent {
727 compressor::task_aware_compress(base, Some(ext), intent)
728 } else {
729 compressor::aggressive_compress(base, Some(ext))
730 };
731 let compressed = compressor::safeguard_ratio(content, &raw);
732 let header = build_header(file_ref, short, ext, content, line_count, true);
733
734 let mut sym = SymbolMap::new();
735 let idents = symbol_map::extract_identifiers(&compressed, ext);
736 for ident in &idents {
737 sym.register(ident);
738 }
739
740 if sym.len() >= 3 {
741 let sym_table = sym.format_table();
742 let sym_applied = sym.apply(&compressed);
743 let orig_tok = count_tokens(&compressed);
744 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
745 let net = orig_tok.saturating_sub(comp_tok);
746 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
747 let savings = protocol::format_savings(original_tokens, comp_tok);
748 return (
749 append_compressed_hint(
750 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
751 file_path,
752 ),
753 comp_tok,
754 );
755 }
756 let savings = protocol::format_savings(original_tokens, orig_tok);
757 return (
758 append_compressed_hint(
759 &format!("{header}\n{compressed}\n{savings}"),
760 file_path,
761 ),
762 orig_tok,
763 );
764 }
765
766 let sent = count_tokens(&compressed);
767 let savings = protocol::format_savings(original_tokens, sent);
768 (
769 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
770 sent,
771 )
772 }
773 "entropy" => {
774 let result = entropy::entropy_compress_adaptive(content, file_path);
775 let avg_h = entropy::analyze_entropy(content).avg_entropy;
776 let header = build_header(file_ref, short, ext, content, line_count, false);
777 let techs = result.techniques.join(", ");
778 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
779 let sent = count_tokens(&output);
780 let savings = protocol::format_savings(original_tokens, sent);
781 let compression_ratio = if original_tokens > 0 {
782 1.0 - (sent as f64 / original_tokens as f64)
783 } else {
784 0.0
785 };
786 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
787 (
788 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
789 sent,
790 )
791 }
792 "task" => {
793 let task_str = task.unwrap_or("");
794 if task_str.is_empty() {
795 let header = build_header(file_ref, short, ext, content, line_count, true);
796 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
797 let sent = count_tokens(&out);
798 return (out, sent);
799 }
800 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
801 if keywords.is_empty() {
802 let header = build_header(file_ref, short, ext, content, line_count, true);
803 let out = format!(
804 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
805 );
806 let sent = count_tokens(&out);
807 return (out, sent);
808 }
809 let filtered =
810 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
811 let filtered_lines = filtered.lines().count();
812 let header = format!(
813 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
814 );
815 let project_root = detect_project_root(file_path);
816 let graph_ctx = crate::core::graph_context::build_graph_context(
817 file_path,
818 &project_root,
819 Some(crate::core::graph_context::GraphContextOptions::default()),
820 )
821 .map(|c| crate::core::graph_context::format_graph_context(&c))
822 .unwrap_or_default();
823
824 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
825 let savings = protocol::format_savings(original_tokens, sent);
826 (
827 append_compressed_hint(
828 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
829 file_path,
830 ),
831 sent,
832 )
833 }
834 "reference" => {
835 let tok = count_tokens(content);
836 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
837 let sent = count_tokens(&output);
838 let savings = protocol::format_savings(original_tokens, sent);
839 (format!("{output}\n{savings}"), sent)
840 }
841 mode if mode.starts_with("lines:") => {
842 let range_str = &mode[6..];
843 let extracted = extract_line_range(content, range_str);
844 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
845 let sent = count_tokens(&extracted);
846 let savings = protocol::format_savings(original_tokens, sent);
847 (format!("{header}\n{extracted}\n{savings}"), sent)
848 }
849 unknown => {
850 let header = build_header(file_ref, short, ext, content, line_count, true);
851 let out = format!(
852 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
853 );
854 let sent = count_tokens(&out);
855 (out, sent)
856 }
857 }
858}
859
860fn extract_line_range(content: &str, range_str: &str) -> String {
861 let lines: Vec<&str> = content.lines().collect();
862 let total = lines.len();
863 let mut selected = Vec::new();
864
865 for part in range_str.split(',') {
866 let part = part.trim();
867 if let Some((start_s, end_s)) = part.split_once('-') {
868 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
869 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
870 for i in start..=end {
871 if i >= 1 && i <= total {
872 selected.push(format!("{i:>4}| {}", lines[i - 1]));
873 }
874 }
875 } else if let Ok(n) = part.parse::<usize>() {
876 if n >= 1 && n <= total {
877 selected.push(format!("{n:>4}| {}", lines[n - 1]));
878 }
879 }
880 }
881
882 if selected.is_empty() {
883 "No lines matched the range.".to_string()
884 } else {
885 selected.join("\n")
886 }
887}
888
889fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
890 let short = protocol::shorten_path(path);
891 let old_content = cache.get(path).map(|e| e.content.clone());
892
893 let new_content = match read_file_lossy(path) {
894 Ok(c) => c,
895 Err(e) => {
896 let msg = format!("ERROR: {e}");
897 let tokens = count_tokens(&msg);
898 return (msg, tokens);
899 }
900 };
901
902 let original_tokens = count_tokens(&new_content);
903
904 let diff_output = if let Some(old) = &old_content {
905 compressor::diff_content(old, &new_content)
906 } else {
907 format!("[first read]\n{new_content}")
908 };
909
910 cache.store(path, new_content);
911
912 let sent = count_tokens(&diff_output);
913 let savings = protocol::format_savings(original_tokens, sent);
914 (
915 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
916 sent,
917 )
918}
919
920#[cfg(test)]
921mod tests {
922 use super::*;
923 use std::time::Duration;
924
925 #[test]
926 fn test_header_toon_format_no_brackets() {
927 let content = "use std::io;\nfn main() {}\n";
928 let header = build_header("F1", "main.rs", "rs", content, 2, false);
929 assert!(!header.contains('['));
930 assert!(!header.contains(']'));
931 assert!(header.contains("F1=main.rs 2L"));
932 }
933
934 #[test]
935 fn test_header_toon_deps_indented() {
936 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
937 let header = build_header("F1", "main.rs", "rs", content, 3, true);
938 if header.contains("deps") {
939 assert!(
940 header.contains("\n deps "),
941 "deps should use indented TOON format"
942 );
943 assert!(
944 !header.contains("deps:["),
945 "deps should not use bracket format"
946 );
947 }
948 }
949
950 #[test]
951 fn test_header_toon_saves_tokens() {
952 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
953 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
954 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
955 let old_tokens = count_tokens(&old_header);
956 let new_tokens = count_tokens(&new_header);
957 assert!(
958 new_tokens <= old_tokens,
959 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
960 );
961 }
962
963 #[test]
964 fn test_tdd_symbols_are_compact() {
965 let symbols = [
966 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
967 ];
968 for sym in &symbols {
969 let tok = count_tokens(sym);
970 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
971 }
972 }
973
974 #[test]
975 fn test_task_mode_filters_content() {
976 let content = (0..200)
977 .map(|i| {
978 if i % 20 == 0 {
979 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
980 } else {
981 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
982 }
983 })
984 .collect::<Vec<_>>()
985 .join("\n");
986 let full_tokens = count_tokens(&content);
987 let task = Some("fix bug in validate_token");
988 let (result, result_tokens) = process_mode(
989 &content,
990 "task",
991 "F1",
992 "test.rs",
993 "rs",
994 full_tokens,
995 CrpMode::Off,
996 "test.rs",
997 task,
998 );
999 assert!(
1000 result_tokens < full_tokens,
1001 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1002 );
1003 assert!(
1004 result.contains("task-filtered"),
1005 "output should contain task-filtered marker"
1006 );
1007 }
1008
1009 #[test]
1010 fn test_task_mode_without_task_returns_full() {
1011 let content = "fn main() {}\nfn helper() {}\n";
1012 let tokens = count_tokens(content);
1013 let (result, _sent) = process_mode(
1014 content,
1015 "task",
1016 "F1",
1017 "test.rs",
1018 "rs",
1019 tokens,
1020 CrpMode::Off,
1021 "test.rs",
1022 None,
1023 );
1024 assert!(
1025 result.contains("no task set"),
1026 "should indicate no task: {result}"
1027 );
1028 }
1029
1030 #[test]
1031 fn test_reference_mode_one_line() {
1032 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1033 let tokens = count_tokens(content);
1034 let (result, _sent) = process_mode(
1035 content,
1036 "reference",
1037 "F1",
1038 "test.rs",
1039 "rs",
1040 tokens,
1041 CrpMode::Off,
1042 "test.rs",
1043 None,
1044 );
1045 let lines: Vec<&str> = result.lines().collect();
1046 assert!(
1047 lines.len() <= 3,
1048 "reference mode should be very compact, got {} lines",
1049 lines.len()
1050 );
1051 assert!(result.contains("lines"), "should contain line count");
1052 assert!(result.contains("tok"), "should contain token count");
1053 }
1054
1055 #[test]
1056 fn cached_lines_mode_invalidates_on_mtime_change() {
1057 let dir = tempfile::tempdir().unwrap();
1058 let path = dir.path().join("file.txt");
1059 let p = path.to_string_lossy().to_string();
1060
1061 std::fs::write(&path, "one\nsecond\n").unwrap();
1062 let mut cache = SessionCache::new();
1063
1064 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1065 let l1: Vec<&str> = r1.content.lines().collect();
1066 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1067 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1068 assert_eq!(got1, "one");
1069
1070 std::thread::sleep(Duration::from_secs(1));
1071 std::fs::write(&path, "two\nsecond\n").unwrap();
1072
1073 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1074 let l2: Vec<&str> = r2.content.lines().collect();
1075 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1076 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1077 assert_eq!(got2, "two");
1078 }
1079
1080 #[test]
1081 #[cfg_attr(tarpaulin, ignore)]
1082 fn benchmark_task_conditioned_compression() {
1083 let content = generate_benchmark_code(200);
1085 let full_tokens = count_tokens(&content);
1086 let task = Some("fix authentication in validate_token");
1087
1088 let (_full_output, full_tok) = process_mode(
1089 &content,
1090 "full",
1091 "F1",
1092 "server.rs",
1093 "rs",
1094 full_tokens,
1095 CrpMode::Off,
1096 "server.rs",
1097 task,
1098 );
1099 let (_task_output, task_tok) = process_mode(
1100 &content,
1101 "task",
1102 "F1",
1103 "server.rs",
1104 "rs",
1105 full_tokens,
1106 CrpMode::Off,
1107 "server.rs",
1108 task,
1109 );
1110 let (_sig_output, sig_tok) = process_mode(
1111 &content,
1112 "signatures",
1113 "F1",
1114 "server.rs",
1115 "rs",
1116 full_tokens,
1117 CrpMode::Off,
1118 "server.rs",
1119 task,
1120 );
1121 let (_ref_output, ref_tok) = process_mode(
1122 &content,
1123 "reference",
1124 "F1",
1125 "server.rs",
1126 "rs",
1127 full_tokens,
1128 CrpMode::Off,
1129 "server.rs",
1130 task,
1131 );
1132
1133 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1134 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1135 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1136 eprintln!(
1137 " task: {task_tok:>6} tokens ({:.0}% savings)",
1138 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1139 );
1140 eprintln!(
1141 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1142 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1143 );
1144 eprintln!(
1145 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1146 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1147 );
1148 eprintln!("================================================\n");
1149
1150 assert!(task_tok < full_tok, "task mode should save tokens");
1151 assert!(sig_tok < full_tok, "signatures should save tokens");
1152 assert!(ref_tok < sig_tok, "reference should be most compact");
1153 }
1154
1155 fn generate_benchmark_code(lines: usize) -> String {
1156 let mut code = Vec::with_capacity(lines);
1157 code.push("use std::collections::HashMap;".to_string());
1158 code.push("use crate::core::auth;".to_string());
1159 code.push(String::new());
1160 code.push("pub struct Server {".to_string());
1161 code.push(" config: Config,".to_string());
1162 code.push(" cache: HashMap<String, String>,".to_string());
1163 code.push("}".to_string());
1164 code.push(String::new());
1165 code.push("impl Server {".to_string());
1166 code.push(
1167 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1168 .to_string(),
1169 );
1170 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1171 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1172 code.push(" return Err(AuthError::Expired);".to_string());
1173 code.push(" }".to_string());
1174 code.push(" Ok(decoded.claims)".to_string());
1175 code.push(" }".to_string());
1176 code.push(String::new());
1177
1178 let remaining = lines.saturating_sub(code.len());
1179 for i in 0..remaining {
1180 if i % 30 == 0 {
1181 code.push(format!(
1182 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1183 ));
1184 } else if i % 30 == 29 {
1185 code.push(" }".to_string());
1186 } else {
1187 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1188 }
1189 }
1190 code.push("}".to_string());
1191 code.join("\n")
1192 }
1193
1194 #[test]
1195 fn instruction_file_detection() {
1196 assert!(is_instruction_file(
1197 "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1198 ));
1199 assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1200 assert!(is_instruction_file("/project/AGENTS.md"));
1201 assert!(is_instruction_file("/project/.cursorrules"));
1202 assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1203 assert!(is_instruction_file("/skills/some-skill/README.md"));
1204
1205 assert!(!is_instruction_file("/project/src/main.rs"));
1206 assert!(!is_instruction_file("/project/config.json"));
1207 assert!(!is_instruction_file("/project/data/report.csv"));
1208 }
1209
1210 #[test]
1211 fn resolve_auto_mode_returns_full_for_instruction_files() {
1212 let mode = resolve_auto_mode(
1213 "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1214 5000,
1215 Some("read"),
1216 );
1217 assert_eq!(mode, "full", "SKILL.md must always be read in full");
1218
1219 let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1220 assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1221
1222 let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1223 assert_eq!(mode, "full", ".cursorrules must always be read in full");
1224 }
1225}