1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32 if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40 format!("{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\")")
41}
42
43pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
45 if crate::core::binary_detect::is_binary_file(path) {
46 let msg = crate::core::binary_detect::binary_file_message(path);
47 return Err(std::io::Error::other(msg));
48 }
49
50 let cap = crate::core::limits::max_read_bytes();
51 let meta = std::fs::metadata(path).map_err(|e| {
52 std::io::Error::other(format!("cannot stat file (refusing unbounded read): {e}"))
53 })?;
54 if meta.len() > cap as u64 {
55 return Err(std::io::Error::other(format!(
56 "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
57 Increase the limit or use a line-range read: mode=\"lines:1-100\"",
58 meta.len(),
59 cap
60 )));
61 }
62
63 let bytes = std::fs::read(path)?;
64 match String::from_utf8(bytes) {
65 Ok(s) => Ok(s),
66 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
67 }
68}
69
70pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
72 handle_with_options(cache, path, mode, false, crp_mode, None)
73}
74
75pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
77 handle_with_options(cache, path, mode, true, crp_mode, None)
78}
79
80pub fn handle_with_task(
82 cache: &mut SessionCache,
83 path: &str,
84 mode: &str,
85 crp_mode: CrpMode,
86 task: Option<&str>,
87) -> String {
88 handle_with_options(cache, path, mode, false, crp_mode, task)
89}
90
91pub fn handle_with_task_resolved(
93 cache: &mut SessionCache,
94 path: &str,
95 mode: &str,
96 crp_mode: CrpMode,
97 task: Option<&str>,
98) -> ReadOutput {
99 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
100}
101
102pub fn handle_fresh_with_task(
104 cache: &mut SessionCache,
105 path: &str,
106 mode: &str,
107 crp_mode: CrpMode,
108 task: Option<&str>,
109) -> String {
110 handle_with_options(cache, path, mode, true, crp_mode, task)
111}
112
113pub fn handle_fresh_with_task_resolved(
115 cache: &mut SessionCache,
116 path: &str,
117 mode: &str,
118 crp_mode: CrpMode,
119 task: Option<&str>,
120) -> ReadOutput {
121 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
122}
123
124fn handle_with_options(
125 cache: &mut SessionCache,
126 path: &str,
127 mode: &str,
128 fresh: bool,
129 crp_mode: CrpMode,
130 task: Option<&str>,
131) -> String {
132 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
133}
134
135fn handle_with_options_resolved(
136 cache: &mut SessionCache,
137 path: &str,
138 mode: &str,
139 fresh: bool,
140 crp_mode: CrpMode,
141 task: Option<&str>,
142) -> ReadOutput {
143 let file_ref = cache.get_file_ref(path);
144 let short = protocol::shorten_path(path);
145 let ext = Path::new(path)
146 .extension()
147 .and_then(|e| e.to_str())
148 .unwrap_or("");
149
150 if fresh {
151 cache.invalidate(path);
152 }
153
154 if mode == "diff" {
155 let (out, sent) = handle_diff(cache, path, &file_ref);
156 return ReadOutput {
157 content: out,
158 resolved_mode: "diff".into(),
159 output_tokens: sent,
160 };
161 }
162
163 if mode != "full" {
164 if let Some(existing) = cache.get(path) {
165 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
166 if stale {
167 cache.invalidate(path);
168 }
169 }
170 }
171
172 if let Some(existing) = cache.get(path) {
173 if mode == "full" {
174 let (out, sent) =
175 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
176 let out = crate::core::redaction::redact_text_if_enabled(&out);
177 return ReadOutput {
178 content: out,
179 resolved_mode: "full".into(),
180 output_tokens: sent,
181 };
182 }
183 let content = existing.content.clone();
184 let original_tokens = existing.original_tokens;
185 let resolved_mode = if mode == "auto" {
186 resolve_auto_mode(path, original_tokens, task)
187 } else {
188 mode.to_string()
189 };
190 if is_cacheable_mode(&resolved_mode) {
191 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
192 if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
193 let sent = count_tokens(cached_output);
194 let out = crate::core::redaction::redact_text_if_enabled(cached_output);
195 return ReadOutput {
196 content: out,
197 resolved_mode,
198 output_tokens: sent,
199 };
200 }
201 }
202 let (out, sent) = process_mode(
203 &content,
204 &resolved_mode,
205 &file_ref,
206 &short,
207 ext,
208 original_tokens,
209 crp_mode,
210 path,
211 task,
212 );
213 if is_cacheable_mode(&resolved_mode) {
214 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
215 cache.set_compressed(path, &cache_key, out.clone());
216 }
217 let out = crate::core::redaction::redact_text_if_enabled(&out);
218 return ReadOutput {
219 content: out,
220 resolved_mode,
221 output_tokens: sent,
222 };
223 }
224
225 let content = match read_file_lossy(path) {
226 Ok(c) => c,
227 Err(e) => {
228 let msg = format!("ERROR: {e}");
229 let tokens = count_tokens(&msg);
230 return ReadOutput {
231 content: msg,
232 resolved_mode: "error".into(),
233 output_tokens: tokens,
234 };
235 }
236 };
237
238 let similar_hint = find_similar_and_update_semantic_index(path, &content);
239 let graph_hint = build_graph_related_hint(path);
240
241 let store_result = cache.store(path, content.clone());
242
243 if mode == "full" {
244 cache.mark_full_delivered(path);
245 let (mut output, sent) = format_full_output(
246 &file_ref,
247 &short,
248 ext,
249 &content,
250 store_result.original_tokens,
251 store_result.line_count,
252 task,
253 );
254 if let Some(hint) = &graph_hint {
255 output.push_str(&format!("\n{hint}"));
256 }
257 if let Some(hint) = similar_hint {
258 output.push_str(&format!("\n{hint}"));
259 }
260 let output = crate::core::redaction::redact_text_if_enabled(&output);
261 return ReadOutput {
262 content: output,
263 resolved_mode: "full".into(),
264 output_tokens: sent,
265 };
266 }
267
268 let resolved_mode = if mode == "auto" {
269 resolve_auto_mode(path, store_result.original_tokens, task)
270 } else {
271 mode.to_string()
272 };
273
274 let (mut output, _sent) = process_mode(
275 &content,
276 &resolved_mode,
277 &file_ref,
278 &short,
279 ext,
280 store_result.original_tokens,
281 crp_mode,
282 path,
283 task,
284 );
285 if is_cacheable_mode(&resolved_mode) {
286 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
287 cache.set_compressed(path, &cache_key, output.clone());
288 }
289 if let Some(hint) = &graph_hint {
290 output.push_str(&format!("\n{hint}"));
291 }
292 if let Some(hint) = similar_hint {
293 output.push_str(&format!("\n{hint}"));
294 }
295 let output = crate::core::redaction::redact_text_if_enabled(&output);
296 let final_tokens = count_tokens(&output);
297 ReadOutput {
298 content: output,
299 resolved_mode,
300 output_tokens: final_tokens,
301 }
302}
303
304pub fn is_instruction_file(path: &str) -> bool {
305 let lower = path.to_lowercase();
306 let filename = std::path::Path::new(&lower)
307 .file_name()
308 .and_then(|f| f.to_str())
309 .unwrap_or("");
310
311 matches!(
312 filename,
313 "skill.md"
314 | "agents.md"
315 | "rules.md"
316 | ".cursorrules"
317 | ".clinerules"
318 | "lean-ctx.md"
319 | "lean-ctx.mdc"
320 ) || lower.contains("/skills/")
321 || lower.contains("/.cursor/rules/")
322 || lower.contains("/.claude/rules/")
323 || lower.contains("/agents.md")
324}
325
326fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
327 if is_instruction_file(file_path) {
328 return "full".to_string();
329 }
330
331 let intent_query = task.unwrap_or("read");
334 let route = crate::core::intent_router::route_v1(intent_query);
335 let intent_mode = &route.decision.effective_read_mode;
336 if intent_mode != "auto" && intent_mode != "reference" {
337 return intent_mode.clone();
338 }
339
340 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
342 let predictor = crate::core::mode_predictor::ModePredictor::new();
343 let mut predicted = predictor
344 .predict_best_mode(&sig)
345 .unwrap_or_else(|| "full".to_string());
346 if predicted == "auto" {
347 predicted = "full".to_string();
348 }
349
350 if let Some(project_root) =
352 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
353 {
354 let ext = std::path::Path::new(file_path)
355 .extension()
356 .and_then(|e| e.to_str())
357 .unwrap_or("");
358 let bucket = match original_tokens {
359 0..=2000 => "sm",
360 2001..=10000 => "md",
361 10001..=50000 => "lg",
362 _ => "xl",
363 };
364 let bandit_key = format!("{ext}_{bucket}");
365 let mut store = crate::core::bandit::BanditStore::load(&project_root);
366 let bandit = store.get_or_create(&bandit_key);
367 let arm = bandit.select_arm();
368 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
369 predicted = "aggressive".to_string();
370 }
371 }
372
373 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
375 let chosen = policy.choose_auto_mode(task, &predicted);
376
377 if original_tokens > 2000 {
378 if predicted == "map" || predicted == "signatures" {
379 if chosen != "map" && chosen != "signatures" {
380 return predicted;
381 }
382 } else if chosen == "full" && predicted != "full" {
383 return predicted;
384 }
385 }
386
387 chosen
388}
389
390fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
391 let cfg = crate::core::config::Config::load();
392 let profile = crate::core::config::MemoryProfile::effective(&cfg);
393 if !profile.semantic_cache_enabled() {
394 return None;
395 }
396
397 let project_root = detect_project_root(path);
398 let session_id = format!("{}", std::process::id());
399 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
400
401 let similar = index.find_similar(content, 0.7);
402 let relevant: Vec<_> = similar
403 .into_iter()
404 .filter(|(p, _)| p != path)
405 .take(3)
406 .collect();
407
408 index.add_file(path, content, &session_id);
409 let _ = index.save(&project_root);
410
411 if relevant.is_empty() {
412 return None;
413 }
414
415 let hints: Vec<String> = relevant
416 .iter()
417 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
418 .collect();
419
420 Some(format!(
421 "[semantic: {} similar file(s) in cache]\n{}",
422 relevant.len(),
423 hints.join("\n")
424 ))
425}
426
427fn detect_project_root(path: &str) -> String {
428 crate::core::protocol::detect_project_root_or_cwd(path)
429}
430
431fn build_graph_related_hint(path: &str) -> Option<String> {
432 let project_root = detect_project_root(path);
433 crate::core::graph_context::build_related_hint(path, &project_root, 5)
434}
435
436const AUTO_DELTA_THRESHOLD: f64 = 0.6;
437
438fn handle_full_with_auto_delta(
440 cache: &mut SessionCache,
441 path: &str,
442 file_ref: &str,
443 short: &str,
444 ext: &str,
445 task: Option<&str>,
446) -> (String, usize) {
447 let Ok(disk_content) = read_file_lossy(path) else {
448 cache.record_cache_hit(path);
449 let out = if let Some(existing) = cache.get(path) {
450 format!(
451 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
452 existing.read_count, existing.line_count
453 )
454 } else {
455 format!("[file read failed and no cached version available] {file_ref}={short}")
456 };
457 let sent = count_tokens(&out);
458 return (out, sent);
459 };
460
461 let old_content = cache
462 .get(path)
463 .map(|e| e.content.clone())
464 .unwrap_or_default();
465 let store_result = cache.store(path, disk_content.clone());
466
467 if store_result.was_hit {
468 if store_result.full_content_delivered {
469 let out = format!(
470 "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
471 store_result.read_count, store_result.line_count
472 );
473 let sent = count_tokens(&out);
474 return (out, sent);
475 }
476 cache.mark_full_delivered(path);
477 return format_full_output(
478 file_ref,
479 short,
480 ext,
481 &disk_content,
482 store_result.original_tokens,
483 store_result.line_count,
484 task,
485 );
486 }
487
488 let diff = compressor::diff_content(&old_content, &disk_content);
489 let diff_tokens = count_tokens(&diff);
490 let full_tokens = store_result.original_tokens;
491
492 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
493 let savings = protocol::format_savings(full_tokens, diff_tokens);
494 let out = format!(
495 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
496 disk_content.lines().count()
497 );
498 return (out, diff_tokens);
499 }
500
501 format_full_output(
502 file_ref,
503 short,
504 ext,
505 &disk_content,
506 store_result.original_tokens,
507 store_result.line_count,
508 task,
509 )
510}
511
512fn format_full_output(
513 file_ref: &str,
514 short: &str,
515 ext: &str,
516 content: &str,
517 original_tokens: usize,
518 line_count: usize,
519 task: Option<&str>,
520) -> (String, usize) {
521 let tokens = original_tokens;
522 let metadata = build_header(file_ref, short, ext, content, line_count, true);
523
524 let mut reordered: Option<String> = None;
525 {
526 let profile = crate::core::profiles::active_profile();
527 let cfg = profile.layout;
528 if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
529 let task_str = task.unwrap_or("");
530 if !task_str.is_empty() {
531 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
532 let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
533 content, &keywords, &cfg,
534 );
535 if !r.skipped && r.changed {
536 reordered = Some(r.output);
537 }
538 }
539 }
540 }
541
542 let content_for_output = reordered.as_deref().unwrap_or(content);
543
544 let mut sym = SymbolMap::new();
545 let idents = symbol_map::extract_identifiers(content_for_output, ext);
546 for ident in &idents {
547 sym.register(ident);
548 }
549
550 if sym.len() >= 3 {
551 let sym_table = sym.format_table();
552 let compressed = sym.apply(content_for_output);
553 let original_tok = count_tokens(content_for_output);
554 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
555 let net_saving = original_tok.saturating_sub(compressed_tok);
556 if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
557 let output = format!("{metadata}\n{compressed}{sym_table}");
558 let sent = count_tokens(&output);
559 let savings = protocol::format_savings(tokens, sent);
560 return (format!("{output}\n{savings}"), sent);
561 }
562 }
563
564 let output = format!("{metadata}\n{content_for_output}");
565 let sent = count_tokens(&output);
566 let savings = protocol::format_savings(tokens, sent);
567 (format!("{output}\n{savings}"), sent)
568}
569
570fn build_header(
571 file_ref: &str,
572 short: &str,
573 ext: &str,
574 content: &str,
575 line_count: usize,
576 include_deps: bool,
577) -> String {
578 let mut header = format!("{file_ref}={short} {line_count}L");
579
580 if include_deps {
581 let dep_info = deps::extract_deps(content, ext);
582 if !dep_info.imports.is_empty() {
583 let imports_str: Vec<&str> = dep_info
584 .imports
585 .iter()
586 .take(8)
587 .map(std::string::String::as_str)
588 .collect();
589 header.push_str(&format!("\n deps {}", imports_str.join(",")));
590 }
591 if !dep_info.exports.is_empty() {
592 let exports_str: Vec<&str> = dep_info
593 .exports
594 .iter()
595 .take(8)
596 .map(std::string::String::as_str)
597 .collect();
598 header.push_str(&format!("\n exports {}", exports_str.join(",")));
599 }
600 }
601
602 header
603}
604
605#[allow(clippy::too_many_arguments)]
606fn process_mode(
607 content: &str,
608 mode: &str,
609 file_ref: &str,
610 short: &str,
611 ext: &str,
612 original_tokens: usize,
613 crp_mode: CrpMode,
614 file_path: &str,
615 task: Option<&str>,
616) -> (String, usize) {
617 let line_count = content.lines().count();
618
619 match mode {
620 "auto" => {
621 let chosen = resolve_auto_mode(file_path, original_tokens, task);
622 process_mode(
623 content,
624 &chosen,
625 file_ref,
626 short,
627 ext,
628 original_tokens,
629 crp_mode,
630 file_path,
631 task,
632 )
633 }
634 "full" => format_full_output(
635 file_ref,
636 short,
637 ext,
638 content,
639 original_tokens,
640 line_count,
641 task,
642 ),
643 "signatures" => {
644 let sigs = signatures::extract_signatures(content, ext);
645 let dep_info = deps::extract_deps(content, ext);
646
647 let mut output = format!("{file_ref}={short} {line_count}L");
648 if !dep_info.imports.is_empty() {
649 let imports_str: Vec<&str> = dep_info
650 .imports
651 .iter()
652 .take(8)
653 .map(std::string::String::as_str)
654 .collect();
655 output.push_str(&format!("\n deps {}", imports_str.join(",")));
656 }
657 for sig in &sigs {
658 output.push('\n');
659 if crp_mode.is_tdd() {
660 output.push_str(&sig.to_tdd());
661 } else {
662 output.push_str(&sig.to_compact());
663 }
664 }
665 let sent = count_tokens(&output);
666 let savings = protocol::format_savings(original_tokens, sent);
667 (
668 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
669 sent,
670 )
671 }
672 "map" => {
673 if ext == "php" {
674 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
675 {
676 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
677 let sent = count_tokens(&output);
678 let savings = protocol::format_savings(original_tokens, sent);
679 output.push('\n');
680 output.push_str(&savings);
681 return (append_compressed_hint(&output, file_path), sent);
682 }
683 }
684
685 let sigs = signatures::extract_signatures(content, ext);
686 let dep_info = deps::extract_deps(content, ext);
687
688 let mut output = format!("{file_ref}={short} {line_count}L");
689
690 if !dep_info.imports.is_empty() {
691 output.push_str("\n deps: ");
692 output.push_str(&dep_info.imports.join(", "));
693 }
694
695 if !dep_info.exports.is_empty() {
696 output.push_str("\n exports: ");
697 output.push_str(&dep_info.exports.join(", "));
698 }
699
700 let key_sigs: Vec<&signatures::Signature> = sigs
701 .iter()
702 .filter(|s| s.is_exported || s.indent == 0)
703 .collect();
704
705 if !key_sigs.is_empty() {
706 output.push_str("\n API:");
707 for sig in &key_sigs {
708 output.push_str("\n ");
709 if crp_mode.is_tdd() {
710 output.push_str(&sig.to_tdd());
711 } else {
712 output.push_str(&sig.to_compact());
713 }
714 }
715 }
716
717 let sent = count_tokens(&output);
718 let savings = protocol::format_savings(original_tokens, sent);
719 (
720 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
721 sent,
722 )
723 }
724 "aggressive" => {
725 #[cfg(feature = "tree-sitter")]
726 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
727 #[cfg(not(feature = "tree-sitter"))]
728 let ast_pruned: Option<String> = None;
729
730 let base = ast_pruned.as_deref().unwrap_or(content);
731
732 let session_intent = crate::core::session::SessionState::load_latest()
733 .and_then(|s| s.active_structured_intent);
734 let raw = if let Some(ref intent) = session_intent {
735 compressor::task_aware_compress(base, Some(ext), intent)
736 } else {
737 compressor::aggressive_compress(base, Some(ext))
738 };
739 let compressed = compressor::safeguard_ratio(content, &raw);
740 let header = build_header(file_ref, short, ext, content, line_count, true);
741
742 let mut sym = SymbolMap::new();
743 let idents = symbol_map::extract_identifiers(&compressed, ext);
744 for ident in &idents {
745 sym.register(ident);
746 }
747
748 if sym.len() >= 3 {
749 let sym_table = sym.format_table();
750 let sym_applied = sym.apply(&compressed);
751 let orig_tok = count_tokens(&compressed);
752 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
753 let net = orig_tok.saturating_sub(comp_tok);
754 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
755 let savings = protocol::format_savings(original_tokens, comp_tok);
756 return (
757 append_compressed_hint(
758 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
759 file_path,
760 ),
761 comp_tok,
762 );
763 }
764 let savings = protocol::format_savings(original_tokens, orig_tok);
765 return (
766 append_compressed_hint(
767 &format!("{header}\n{compressed}\n{savings}"),
768 file_path,
769 ),
770 orig_tok,
771 );
772 }
773
774 let sent = count_tokens(&compressed);
775 let savings = protocol::format_savings(original_tokens, sent);
776 (
777 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
778 sent,
779 )
780 }
781 "entropy" => {
782 let result = entropy::entropy_compress_adaptive(content, file_path);
783 let avg_h = entropy::analyze_entropy(content).avg_entropy;
784 let header = build_header(file_ref, short, ext, content, line_count, false);
785 let techs = result.techniques.join(", ");
786 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
787 let sent = count_tokens(&output);
788 let savings = protocol::format_savings(original_tokens, sent);
789 let compression_ratio = if original_tokens > 0 {
790 1.0 - (sent as f64 / original_tokens as f64)
791 } else {
792 0.0
793 };
794 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
795 (
796 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
797 sent,
798 )
799 }
800 "task" => {
801 let task_str = task.unwrap_or("");
802 if task_str.is_empty() {
803 let header = build_header(file_ref, short, ext, content, line_count, true);
804 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
805 let sent = count_tokens(&out);
806 return (out, sent);
807 }
808 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
809 if keywords.is_empty() {
810 let header = build_header(file_ref, short, ext, content, line_count, true);
811 let out = format!(
812 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
813 );
814 let sent = count_tokens(&out);
815 return (out, sent);
816 }
817 let filtered =
818 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
819 let filtered_lines = filtered.lines().count();
820 let header = format!(
821 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
822 );
823 let project_root = detect_project_root(file_path);
824 let graph_ctx = crate::core::graph_context::build_graph_context(
825 file_path,
826 &project_root,
827 Some(crate::core::graph_context::GraphContextOptions::default()),
828 )
829 .map(|c| crate::core::graph_context::format_graph_context(&c))
830 .unwrap_or_default();
831
832 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
833 let savings = protocol::format_savings(original_tokens, sent);
834 (
835 append_compressed_hint(
836 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
837 file_path,
838 ),
839 sent,
840 )
841 }
842 "reference" => {
843 let tok = count_tokens(content);
844 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
845 let sent = count_tokens(&output);
846 let savings = protocol::format_savings(original_tokens, sent);
847 (format!("{output}\n{savings}"), sent)
848 }
849 mode if mode.starts_with("lines:") => {
850 let range_str = &mode[6..];
851 let extracted = extract_line_range(content, range_str);
852 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
853 let sent = count_tokens(&extracted);
854 let savings = protocol::format_savings(original_tokens, sent);
855 (format!("{header}\n{extracted}\n{savings}"), sent)
856 }
857 unknown => {
858 let header = build_header(file_ref, short, ext, content, line_count, true);
859 let out = format!(
860 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
861 );
862 let sent = count_tokens(&out);
863 (out, sent)
864 }
865 }
866}
867
868fn extract_line_range(content: &str, range_str: &str) -> String {
869 let lines: Vec<&str> = content.lines().collect();
870 let total = lines.len();
871 let mut selected = Vec::new();
872
873 for part in range_str.split(',') {
874 let part = part.trim();
875 if let Some((start_s, end_s)) = part.split_once('-') {
876 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
877 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
878 for i in start..=end {
879 if i >= 1 && i <= total {
880 selected.push(format!("{i:>4}| {}", lines[i - 1]));
881 }
882 }
883 } else if let Ok(n) = part.parse::<usize>() {
884 if n >= 1 && n <= total {
885 selected.push(format!("{n:>4}| {}", lines[n - 1]));
886 }
887 }
888 }
889
890 if selected.is_empty() {
891 "No lines matched the range.".to_string()
892 } else {
893 selected.join("\n")
894 }
895}
896
897fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
898 let short = protocol::shorten_path(path);
899 let old_content = cache.get(path).map(|e| e.content.clone());
900
901 let new_content = match read_file_lossy(path) {
902 Ok(c) => c,
903 Err(e) => {
904 let msg = format!("ERROR: {e}");
905 let tokens = count_tokens(&msg);
906 return (msg, tokens);
907 }
908 };
909
910 let original_tokens = count_tokens(&new_content);
911
912 let diff_output = if let Some(old) = &old_content {
913 compressor::diff_content(old, &new_content)
914 } else {
915 format!("[first read]\n{new_content}")
916 };
917
918 cache.store(path, new_content);
919
920 let sent = count_tokens(&diff_output);
921 let savings = protocol::format_savings(original_tokens, sent);
922 (
923 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
924 sent,
925 )
926}
927
928#[cfg(test)]
929mod tests {
930 use super::*;
931 use std::time::Duration;
932
933 #[test]
934 fn test_header_toon_format_no_brackets() {
935 let content = "use std::io;\nfn main() {}\n";
936 let header = build_header("F1", "main.rs", "rs", content, 2, false);
937 assert!(!header.contains('['));
938 assert!(!header.contains(']'));
939 assert!(header.contains("F1=main.rs 2L"));
940 }
941
942 #[test]
943 fn test_header_toon_deps_indented() {
944 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
945 let header = build_header("F1", "main.rs", "rs", content, 3, true);
946 if header.contains("deps") {
947 assert!(
948 header.contains("\n deps "),
949 "deps should use indented TOON format"
950 );
951 assert!(
952 !header.contains("deps:["),
953 "deps should not use bracket format"
954 );
955 }
956 }
957
958 #[test]
959 fn test_header_toon_saves_tokens() {
960 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
961 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
962 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
963 let old_tokens = count_tokens(&old_header);
964 let new_tokens = count_tokens(&new_header);
965 assert!(
966 new_tokens <= old_tokens,
967 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
968 );
969 }
970
971 #[test]
972 fn test_tdd_symbols_are_compact() {
973 let symbols = [
974 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
975 ];
976 for sym in &symbols {
977 let tok = count_tokens(sym);
978 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
979 }
980 }
981
982 #[test]
983 fn test_task_mode_filters_content() {
984 let content = (0..200)
985 .map(|i| {
986 if i % 20 == 0 {
987 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
988 } else {
989 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
990 }
991 })
992 .collect::<Vec<_>>()
993 .join("\n");
994 let full_tokens = count_tokens(&content);
995 let task = Some("fix bug in validate_token");
996 let (result, result_tokens) = process_mode(
997 &content,
998 "task",
999 "F1",
1000 "test.rs",
1001 "rs",
1002 full_tokens,
1003 CrpMode::Off,
1004 "test.rs",
1005 task,
1006 );
1007 assert!(
1008 result_tokens < full_tokens,
1009 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1010 );
1011 assert!(
1012 result.contains("task-filtered"),
1013 "output should contain task-filtered marker"
1014 );
1015 }
1016
1017 #[test]
1018 fn test_task_mode_without_task_returns_full() {
1019 let content = "fn main() {}\nfn helper() {}\n";
1020 let tokens = count_tokens(content);
1021 let (result, _sent) = process_mode(
1022 content,
1023 "task",
1024 "F1",
1025 "test.rs",
1026 "rs",
1027 tokens,
1028 CrpMode::Off,
1029 "test.rs",
1030 None,
1031 );
1032 assert!(
1033 result.contains("no task set"),
1034 "should indicate no task: {result}"
1035 );
1036 }
1037
1038 #[test]
1039 fn test_reference_mode_one_line() {
1040 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1041 let tokens = count_tokens(content);
1042 let (result, _sent) = process_mode(
1043 content,
1044 "reference",
1045 "F1",
1046 "test.rs",
1047 "rs",
1048 tokens,
1049 CrpMode::Off,
1050 "test.rs",
1051 None,
1052 );
1053 let lines: Vec<&str> = result.lines().collect();
1054 assert!(
1055 lines.len() <= 3,
1056 "reference mode should be very compact, got {} lines",
1057 lines.len()
1058 );
1059 assert!(result.contains("lines"), "should contain line count");
1060 assert!(result.contains("tok"), "should contain token count");
1061 }
1062
1063 #[test]
1064 fn cached_lines_mode_invalidates_on_mtime_change() {
1065 let dir = tempfile::tempdir().unwrap();
1066 let path = dir.path().join("file.txt");
1067 let p = path.to_string_lossy().to_string();
1068
1069 std::fs::write(&path, "one\nsecond\n").unwrap();
1070 let mut cache = SessionCache::new();
1071
1072 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1073 let l1: Vec<&str> = r1.content.lines().collect();
1074 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1075 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1076 assert_eq!(got1, "one");
1077
1078 std::thread::sleep(Duration::from_secs(1));
1079 std::fs::write(&path, "two\nsecond\n").unwrap();
1080
1081 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1082 let l2: Vec<&str> = r2.content.lines().collect();
1083 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1084 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1085 assert_eq!(got2, "two");
1086 }
1087
1088 #[test]
1089 #[cfg_attr(tarpaulin, ignore)]
1090 fn benchmark_task_conditioned_compression() {
1091 let content = generate_benchmark_code(200);
1093 let full_tokens = count_tokens(&content);
1094 let task = Some("fix authentication in validate_token");
1095
1096 let (_full_output, full_tok) = process_mode(
1097 &content,
1098 "full",
1099 "F1",
1100 "server.rs",
1101 "rs",
1102 full_tokens,
1103 CrpMode::Off,
1104 "server.rs",
1105 task,
1106 );
1107 let (_task_output, task_tok) = process_mode(
1108 &content,
1109 "task",
1110 "F1",
1111 "server.rs",
1112 "rs",
1113 full_tokens,
1114 CrpMode::Off,
1115 "server.rs",
1116 task,
1117 );
1118 let (_sig_output, sig_tok) = process_mode(
1119 &content,
1120 "signatures",
1121 "F1",
1122 "server.rs",
1123 "rs",
1124 full_tokens,
1125 CrpMode::Off,
1126 "server.rs",
1127 task,
1128 );
1129 let (_ref_output, ref_tok) = process_mode(
1130 &content,
1131 "reference",
1132 "F1",
1133 "server.rs",
1134 "rs",
1135 full_tokens,
1136 CrpMode::Off,
1137 "server.rs",
1138 task,
1139 );
1140
1141 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1142 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1143 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1144 eprintln!(
1145 " task: {task_tok:>6} tokens ({:.0}% savings)",
1146 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1147 );
1148 eprintln!(
1149 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1150 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1151 );
1152 eprintln!(
1153 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1154 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1155 );
1156 eprintln!("================================================\n");
1157
1158 assert!(task_tok < full_tok, "task mode should save tokens");
1159 assert!(sig_tok < full_tok, "signatures should save tokens");
1160 assert!(ref_tok < sig_tok, "reference should be most compact");
1161 }
1162
1163 fn generate_benchmark_code(lines: usize) -> String {
1164 let mut code = Vec::with_capacity(lines);
1165 code.push("use std::collections::HashMap;".to_string());
1166 code.push("use crate::core::auth;".to_string());
1167 code.push(String::new());
1168 code.push("pub struct Server {".to_string());
1169 code.push(" config: Config,".to_string());
1170 code.push(" cache: HashMap<String, String>,".to_string());
1171 code.push("}".to_string());
1172 code.push(String::new());
1173 code.push("impl Server {".to_string());
1174 code.push(
1175 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1176 .to_string(),
1177 );
1178 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1179 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1180 code.push(" return Err(AuthError::Expired);".to_string());
1181 code.push(" }".to_string());
1182 code.push(" Ok(decoded.claims)".to_string());
1183 code.push(" }".to_string());
1184 code.push(String::new());
1185
1186 let remaining = lines.saturating_sub(code.len());
1187 for i in 0..remaining {
1188 if i % 30 == 0 {
1189 code.push(format!(
1190 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1191 ));
1192 } else if i % 30 == 29 {
1193 code.push(" }".to_string());
1194 } else {
1195 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1196 }
1197 }
1198 code.push("}".to_string());
1199 code.join("\n")
1200 }
1201
1202 #[test]
1203 fn instruction_file_detection() {
1204 assert!(is_instruction_file(
1205 "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1206 ));
1207 assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1208 assert!(is_instruction_file("/project/AGENTS.md"));
1209 assert!(is_instruction_file("/project/.cursorrules"));
1210 assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1211 assert!(is_instruction_file("/skills/some-skill/README.md"));
1212
1213 assert!(!is_instruction_file("/project/src/main.rs"));
1214 assert!(!is_instruction_file("/project/config.json"));
1215 assert!(!is_instruction_file("/project/data/report.csv"));
1216 }
1217
1218 #[test]
1219 fn resolve_auto_mode_returns_full_for_instruction_files() {
1220 let mode = resolve_auto_mode(
1221 "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1222 5000,
1223 Some("read"),
1224 );
1225 assert_eq!(mode, "full", "SKILL.md must always be read in full");
1226
1227 let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1228 assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1229
1230 let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1231 assert_eq!(mode, "full", ".cursorrules must always be read in full");
1232 }
1233}