1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32 if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40 format!(
41 "{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
42 )
43}
44
45pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
49 if crate::core::binary_detect::is_binary_file(path) {
50 let msg = crate::core::binary_detect::binary_file_message(path);
51 return Err(std::io::Error::other(msg));
52 }
53
54 if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
55 if let Ok(cwd) = std::env::current_dir() {
56 let root = crate::core::pathjail::canonicalize_or_self(&cwd);
57 if !canonical.starts_with(&root) {
58 let allow = crate::core::pathjail::allow_paths_from_env_and_config();
59 let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
60 .ok()
61 .is_some_and(|d| canonical.starts_with(d));
62 let tmp_ok = canonical.starts_with(std::env::temp_dir());
63 if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
64 tracing::warn!(
65 "defense-in-depth: path may escape project root: {}",
66 canonical.display()
67 );
68 }
69 }
70 }
71 }
72
73 let cap = crate::core::limits::max_read_bytes();
74 let meta = std::fs::metadata(path).map_err(|e| {
75 std::io::Error::other(format!("cannot stat file (refusing unbounded read): {e}"))
76 })?;
77 if meta.len() > cap as u64 {
78 return Err(std::io::Error::other(format!(
79 "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
80 Increase the limit or use a line-range read: mode=\"lines:1-100\"",
81 meta.len(),
82 cap
83 )));
84 }
85
86 let bytes = std::fs::read(path)?;
87 match String::from_utf8(bytes) {
88 Ok(s) => Ok(s),
89 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
90 }
91}
92
93pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
95 handle_with_options(cache, path, mode, false, crp_mode, None)
96}
97
98pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
100 handle_with_options(cache, path, mode, true, crp_mode, None)
101}
102
103pub fn handle_with_task(
105 cache: &mut SessionCache,
106 path: &str,
107 mode: &str,
108 crp_mode: CrpMode,
109 task: Option<&str>,
110) -> String {
111 handle_with_options(cache, path, mode, false, crp_mode, task)
112}
113
114pub fn handle_with_task_resolved(
116 cache: &mut SessionCache,
117 path: &str,
118 mode: &str,
119 crp_mode: CrpMode,
120 task: Option<&str>,
121) -> ReadOutput {
122 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
123}
124
125pub fn handle_fresh_with_task(
127 cache: &mut SessionCache,
128 path: &str,
129 mode: &str,
130 crp_mode: CrpMode,
131 task: Option<&str>,
132) -> String {
133 handle_with_options(cache, path, mode, true, crp_mode, task)
134}
135
136pub fn handle_fresh_with_task_resolved(
138 cache: &mut SessionCache,
139 path: &str,
140 mode: &str,
141 crp_mode: CrpMode,
142 task: Option<&str>,
143) -> ReadOutput {
144 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
145}
146
147fn handle_with_options(
148 cache: &mut SessionCache,
149 path: &str,
150 mode: &str,
151 fresh: bool,
152 crp_mode: CrpMode,
153 task: Option<&str>,
154) -> String {
155 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
156}
157
158fn handle_with_options_resolved(
159 cache: &mut SessionCache,
160 path: &str,
161 mode: &str,
162 fresh: bool,
163 crp_mode: CrpMode,
164 task: Option<&str>,
165) -> ReadOutput {
166 let file_ref = cache.get_file_ref(path);
167 let short = protocol::shorten_path(path);
168 let ext = Path::new(path)
169 .extension()
170 .and_then(|e| e.to_str())
171 .unwrap_or("");
172
173 if fresh {
174 cache.invalidate(path);
175 }
176
177 if mode == "diff" {
178 let (out, sent) = handle_diff(cache, path, &file_ref);
179 return ReadOutput {
180 content: out,
181 resolved_mode: "diff".into(),
182 output_tokens: sent,
183 };
184 }
185
186 if mode != "full" {
187 if let Some(existing) = cache.get(path) {
188 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
189 if stale {
190 cache.invalidate(path);
191 }
192 }
193 }
194
195 if let Some(existing) = cache.get(path) {
196 if mode == "full" {
197 let (out, sent) =
198 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
199 let out = crate::core::redaction::redact_text_if_enabled(&out);
200 return ReadOutput {
201 content: out,
202 resolved_mode: "full".into(),
203 output_tokens: sent,
204 };
205 }
206 let content = existing.content.clone();
207 let original_tokens = existing.original_tokens;
208 let resolved_mode = if mode == "auto" {
209 resolve_auto_mode(path, original_tokens, task)
210 } else {
211 mode.to_string()
212 };
213 if is_cacheable_mode(&resolved_mode) {
214 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
215 if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
216 let sent = count_tokens(cached_output);
217 let out = crate::core::redaction::redact_text_if_enabled(cached_output);
218 return ReadOutput {
219 content: out,
220 resolved_mode,
221 output_tokens: sent,
222 };
223 }
224 }
225 let (out, sent) = process_mode(
226 &content,
227 &resolved_mode,
228 &file_ref,
229 &short,
230 ext,
231 original_tokens,
232 crp_mode,
233 path,
234 task,
235 );
236 if is_cacheable_mode(&resolved_mode) {
237 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
238 cache.set_compressed(path, &cache_key, out.clone());
239 }
240 let out = crate::core::redaction::redact_text_if_enabled(&out);
241 return ReadOutput {
242 content: out,
243 resolved_mode,
244 output_tokens: sent,
245 };
246 }
247
248 let content = match read_file_lossy(path) {
249 Ok(c) => c,
250 Err(e) => {
251 let msg = format!("ERROR: {e}");
252 let tokens = count_tokens(&msg);
253 return ReadOutput {
254 content: msg,
255 resolved_mode: "error".into(),
256 output_tokens: tokens,
257 };
258 }
259 };
260
261 let similar_hint = find_similar_and_update_semantic_index(path, &content);
262 let graph_hint = build_graph_related_hint(path);
263
264 let store_result = cache.store(path, content.clone());
265
266 if mode == "full" {
267 cache.mark_full_delivered(path);
268 let (mut output, sent) = format_full_output(
269 &file_ref,
270 &short,
271 ext,
272 &content,
273 store_result.original_tokens,
274 store_result.line_count,
275 task,
276 );
277 if let Some(hint) = &graph_hint {
278 output.push_str(&format!("\n{hint}"));
279 }
280 if let Some(hint) = similar_hint {
281 output.push_str(&format!("\n{hint}"));
282 }
283 let output = crate::core::redaction::redact_text_if_enabled(&output);
284 return ReadOutput {
285 content: output,
286 resolved_mode: "full".into(),
287 output_tokens: sent,
288 };
289 }
290
291 let resolved_mode = if mode == "auto" {
292 resolve_auto_mode(path, store_result.original_tokens, task)
293 } else {
294 mode.to_string()
295 };
296
297 let (mut output, _sent) = process_mode(
298 &content,
299 &resolved_mode,
300 &file_ref,
301 &short,
302 ext,
303 store_result.original_tokens,
304 crp_mode,
305 path,
306 task,
307 );
308 if is_cacheable_mode(&resolved_mode) {
309 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
310 cache.set_compressed(path, &cache_key, output.clone());
311 }
312 if let Some(hint) = &graph_hint {
313 output.push_str(&format!("\n{hint}"));
314 }
315 if let Some(hint) = similar_hint {
316 output.push_str(&format!("\n{hint}"));
317 }
318 let output = crate::core::redaction::redact_text_if_enabled(&output);
319 let final_tokens = count_tokens(&output);
320 ReadOutput {
321 content: output,
322 resolved_mode,
323 output_tokens: final_tokens,
324 }
325}
326
327pub fn is_instruction_file(path: &str) -> bool {
328 let lower = path.to_lowercase();
329 let filename = std::path::Path::new(&lower)
330 .file_name()
331 .and_then(|f| f.to_str())
332 .unwrap_or("");
333
334 matches!(
335 filename,
336 "skill.md"
337 | "agents.md"
338 | "rules.md"
339 | ".cursorrules"
340 | ".clinerules"
341 | "lean-ctx.md"
342 | "lean-ctx.mdc"
343 ) || lower.contains("/skills/")
344 || lower.contains("/.cursor/rules/")
345 || lower.contains("/.claude/rules/")
346 || lower.contains("/agents.md")
347}
348
349fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
350 if is_instruction_file(file_path) {
351 return "full".to_string();
352 }
353
354 let intent_query = task.unwrap_or("read");
357 let route = crate::core::intent_router::route_v1(intent_query);
358 let intent_mode = &route.decision.effective_read_mode;
359 if intent_mode != "auto" && intent_mode != "reference" {
360 return intent_mode.clone();
361 }
362
363 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
365 let predictor = crate::core::mode_predictor::ModePredictor::new();
366 let mut predicted = predictor
367 .predict_best_mode(&sig)
368 .unwrap_or_else(|| "full".to_string());
369 if predicted == "auto" {
370 predicted = "full".to_string();
371 }
372
373 if let Some(project_root) =
375 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
376 {
377 let ext = std::path::Path::new(file_path)
378 .extension()
379 .and_then(|e| e.to_str())
380 .unwrap_or("");
381 let bucket = match original_tokens {
382 0..=2000 => "sm",
383 2001..=10000 => "md",
384 10001..=50000 => "lg",
385 _ => "xl",
386 };
387 let bandit_key = format!("{ext}_{bucket}");
388 let mut store = crate::core::bandit::BanditStore::load(&project_root);
389 let bandit = store.get_or_create(&bandit_key);
390 let arm = bandit.select_arm();
391 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
392 predicted = "aggressive".to_string();
393 }
394 }
395
396 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
398 let chosen = policy.choose_auto_mode(task, &predicted);
399
400 if original_tokens > 2000 {
401 if predicted == "map" || predicted == "signatures" {
402 if chosen != "map" && chosen != "signatures" {
403 return predicted;
404 }
405 } else if chosen == "full" && predicted != "full" {
406 return predicted;
407 }
408 }
409
410 chosen
411}
412
413fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
414 let cfg = crate::core::config::Config::load();
415 let profile = crate::core::config::MemoryProfile::effective(&cfg);
416 if !profile.semantic_cache_enabled() {
417 return None;
418 }
419
420 let project_root = detect_project_root(path);
421 let session_id = format!("{}", std::process::id());
422 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
423
424 let similar = index.find_similar(content, 0.7);
425 let relevant: Vec<_> = similar
426 .into_iter()
427 .filter(|(p, _)| p != path)
428 .take(3)
429 .collect();
430
431 index.add_file(path, content, &session_id);
432 let _ = index.save(&project_root);
433
434 if relevant.is_empty() {
435 return None;
436 }
437
438 let hints: Vec<String> = relevant
439 .iter()
440 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
441 .collect();
442
443 Some(format!(
444 "[semantic: {} similar file(s) in cache]\n{}",
445 relevant.len(),
446 hints.join("\n")
447 ))
448}
449
450fn detect_project_root(path: &str) -> String {
451 crate::core::protocol::detect_project_root_or_cwd(path)
452}
453
454fn build_graph_related_hint(path: &str) -> Option<String> {
455 let project_root = detect_project_root(path);
456 crate::core::graph_context::build_related_hint(path, &project_root, 5)
457}
458
459const AUTO_DELTA_THRESHOLD: f64 = 0.6;
460
461fn handle_full_with_auto_delta(
463 cache: &mut SessionCache,
464 path: &str,
465 file_ref: &str,
466 short: &str,
467 ext: &str,
468 task: Option<&str>,
469) -> (String, usize) {
470 let Ok(disk_content) = read_file_lossy(path) else {
471 cache.record_cache_hit(path);
472 let out = if let Some(existing) = cache.get(path) {
473 format!(
474 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
475 existing.read_count, existing.line_count
476 )
477 } else {
478 format!("[file read failed and no cached version available] {file_ref}={short}")
479 };
480 let sent = count_tokens(&out);
481 return (out, sent);
482 };
483
484 let old_content = cache
485 .get(path)
486 .map(|e| e.content.clone())
487 .unwrap_or_default();
488 let store_result = cache.store(path, disk_content.clone());
489
490 if store_result.was_hit {
491 if store_result.full_content_delivered {
492 let out = format!(
493 "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
494 store_result.read_count, store_result.line_count
495 );
496 let sent = count_tokens(&out);
497 return (out, sent);
498 }
499 cache.mark_full_delivered(path);
500 return format_full_output(
501 file_ref,
502 short,
503 ext,
504 &disk_content,
505 store_result.original_tokens,
506 store_result.line_count,
507 task,
508 );
509 }
510
511 let diff = compressor::diff_content(&old_content, &disk_content);
512 let diff_tokens = count_tokens(&diff);
513 let full_tokens = store_result.original_tokens;
514
515 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
516 let savings = protocol::format_savings(full_tokens, diff_tokens);
517 let out = format!(
518 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
519 disk_content.lines().count()
520 );
521 return (out, diff_tokens);
522 }
523
524 format_full_output(
525 file_ref,
526 short,
527 ext,
528 &disk_content,
529 store_result.original_tokens,
530 store_result.line_count,
531 task,
532 )
533}
534
535fn format_full_output(
536 file_ref: &str,
537 short: &str,
538 ext: &str,
539 content: &str,
540 original_tokens: usize,
541 line_count: usize,
542 task: Option<&str>,
543) -> (String, usize) {
544 let tokens = original_tokens;
545 let metadata = build_header(file_ref, short, ext, content, line_count, true);
546
547 let mut reordered: Option<String> = None;
548 {
549 let profile = crate::core::profiles::active_profile();
550 let cfg = profile.layout;
551 if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
552 let task_str = task.unwrap_or("");
553 if !task_str.is_empty() {
554 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
555 let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
556 content, &keywords, &cfg,
557 );
558 if !r.skipped && r.changed {
559 reordered = Some(r.output);
560 }
561 }
562 }
563 }
564
565 let content_for_output = reordered.as_deref().unwrap_or(content);
566
567 let mut sym = SymbolMap::new();
568 let idents = symbol_map::extract_identifiers(content_for_output, ext);
569 for ident in &idents {
570 sym.register(ident);
571 }
572
573 if sym.len() >= 3 {
574 let sym_table = sym.format_table();
575 let compressed = sym.apply(content_for_output);
576 let original_tok = count_tokens(content_for_output);
577 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
578 let net_saving = original_tok.saturating_sub(compressed_tok);
579 if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
580 let output = format!("{metadata}\n{compressed}{sym_table}");
581 let sent = count_tokens(&output);
582 let savings = protocol::format_savings(tokens, sent);
583 return (format!("{output}\n{savings}"), sent);
584 }
585 }
586
587 let output = format!("{metadata}\n{content_for_output}");
588 let sent = count_tokens(&output);
589 let savings = protocol::format_savings(tokens, sent);
590 (format!("{output}\n{savings}"), sent)
591}
592
593fn build_header(
594 file_ref: &str,
595 short: &str,
596 ext: &str,
597 content: &str,
598 line_count: usize,
599 include_deps: bool,
600) -> String {
601 let mut header = format!("{file_ref}={short} {line_count}L");
602
603 if include_deps {
604 let dep_info = deps::extract_deps(content, ext);
605 if !dep_info.imports.is_empty() {
606 let imports_str: Vec<&str> = dep_info
607 .imports
608 .iter()
609 .take(8)
610 .map(std::string::String::as_str)
611 .collect();
612 header.push_str(&format!("\n deps {}", imports_str.join(",")));
613 }
614 if !dep_info.exports.is_empty() {
615 let exports_str: Vec<&str> = dep_info
616 .exports
617 .iter()
618 .take(8)
619 .map(std::string::String::as_str)
620 .collect();
621 header.push_str(&format!("\n exports {}", exports_str.join(",")));
622 }
623 }
624
625 header
626}
627
628#[allow(clippy::too_many_arguments)]
629fn process_mode(
630 content: &str,
631 mode: &str,
632 file_ref: &str,
633 short: &str,
634 ext: &str,
635 original_tokens: usize,
636 crp_mode: CrpMode,
637 file_path: &str,
638 task: Option<&str>,
639) -> (String, usize) {
640 let line_count = content.lines().count();
641
642 match mode {
643 "auto" => {
644 let chosen = resolve_auto_mode(file_path, original_tokens, task);
645 process_mode(
646 content,
647 &chosen,
648 file_ref,
649 short,
650 ext,
651 original_tokens,
652 crp_mode,
653 file_path,
654 task,
655 )
656 }
657 "full" => format_full_output(
658 file_ref,
659 short,
660 ext,
661 content,
662 original_tokens,
663 line_count,
664 task,
665 ),
666 "signatures" => {
667 let sigs = signatures::extract_signatures(content, ext);
668 let dep_info = deps::extract_deps(content, ext);
669
670 let mut output = format!("{file_ref}={short} {line_count}L");
671 if !dep_info.imports.is_empty() {
672 let imports_str: Vec<&str> = dep_info
673 .imports
674 .iter()
675 .take(8)
676 .map(std::string::String::as_str)
677 .collect();
678 output.push_str(&format!("\n deps {}", imports_str.join(",")));
679 }
680 for sig in &sigs {
681 output.push('\n');
682 if crp_mode.is_tdd() {
683 output.push_str(&sig.to_tdd());
684 } else {
685 output.push_str(&sig.to_compact());
686 }
687 }
688 let sent = count_tokens(&output);
689 let savings = protocol::format_savings(original_tokens, sent);
690 (
691 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
692 sent,
693 )
694 }
695 "map" => {
696 if ext == "php" {
697 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
698 {
699 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
700 let sent = count_tokens(&output);
701 let savings = protocol::format_savings(original_tokens, sent);
702 output.push('\n');
703 output.push_str(&savings);
704 return (append_compressed_hint(&output, file_path), sent);
705 }
706 }
707
708 let sigs = signatures::extract_signatures(content, ext);
709 let dep_info = deps::extract_deps(content, ext);
710
711 let mut output = format!("{file_ref}={short} {line_count}L");
712
713 if !dep_info.imports.is_empty() {
714 output.push_str("\n deps: ");
715 output.push_str(&dep_info.imports.join(", "));
716 }
717
718 if !dep_info.exports.is_empty() {
719 output.push_str("\n exports: ");
720 output.push_str(&dep_info.exports.join(", "));
721 }
722
723 let key_sigs: Vec<&signatures::Signature> = sigs
724 .iter()
725 .filter(|s| s.is_exported || s.indent == 0)
726 .collect();
727
728 if !key_sigs.is_empty() {
729 output.push_str("\n API:");
730 for sig in &key_sigs {
731 output.push_str("\n ");
732 if crp_mode.is_tdd() {
733 output.push_str(&sig.to_tdd());
734 } else {
735 output.push_str(&sig.to_compact());
736 }
737 }
738 }
739
740 let sent = count_tokens(&output);
741 let savings = protocol::format_savings(original_tokens, sent);
742 (
743 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
744 sent,
745 )
746 }
747 "aggressive" => {
748 #[cfg(feature = "tree-sitter")]
749 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
750 #[cfg(not(feature = "tree-sitter"))]
751 let ast_pruned: Option<String> = None;
752
753 let base = ast_pruned.as_deref().unwrap_or(content);
754
755 let session_intent = crate::core::session::SessionState::load_latest()
756 .and_then(|s| s.active_structured_intent);
757 let raw = if let Some(ref intent) = session_intent {
758 compressor::task_aware_compress(base, Some(ext), intent)
759 } else {
760 compressor::aggressive_compress(base, Some(ext))
761 };
762 let compressed = compressor::safeguard_ratio(content, &raw);
763 let header = build_header(file_ref, short, ext, content, line_count, true);
764
765 let mut sym = SymbolMap::new();
766 let idents = symbol_map::extract_identifiers(&compressed, ext);
767 for ident in &idents {
768 sym.register(ident);
769 }
770
771 if sym.len() >= 3 {
772 let sym_table = sym.format_table();
773 let sym_applied = sym.apply(&compressed);
774 let orig_tok = count_tokens(&compressed);
775 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
776 let net = orig_tok.saturating_sub(comp_tok);
777 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
778 let savings = protocol::format_savings(original_tokens, comp_tok);
779 return (
780 append_compressed_hint(
781 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
782 file_path,
783 ),
784 comp_tok,
785 );
786 }
787 let savings = protocol::format_savings(original_tokens, orig_tok);
788 return (
789 append_compressed_hint(
790 &format!("{header}\n{compressed}\n{savings}"),
791 file_path,
792 ),
793 orig_tok,
794 );
795 }
796
797 let sent = count_tokens(&compressed);
798 let savings = protocol::format_savings(original_tokens, sent);
799 (
800 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
801 sent,
802 )
803 }
804 "entropy" => {
805 let result = entropy::entropy_compress_adaptive(content, file_path);
806 let avg_h = entropy::analyze_entropy(content).avg_entropy;
807 let header = build_header(file_ref, short, ext, content, line_count, false);
808 let techs = result.techniques.join(", ");
809 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
810 let sent = count_tokens(&output);
811 let savings = protocol::format_savings(original_tokens, sent);
812 let compression_ratio = if original_tokens > 0 {
813 1.0 - (sent as f64 / original_tokens as f64)
814 } else {
815 0.0
816 };
817 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
818 (
819 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
820 sent,
821 )
822 }
823 "task" => {
824 let task_str = task.unwrap_or("");
825 if task_str.is_empty() {
826 let header = build_header(file_ref, short, ext, content, line_count, true);
827 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
828 let sent = count_tokens(&out);
829 return (out, sent);
830 }
831 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
832 if keywords.is_empty() {
833 let header = build_header(file_ref, short, ext, content, line_count, true);
834 let out = format!(
835 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
836 );
837 let sent = count_tokens(&out);
838 return (out, sent);
839 }
840 let filtered =
841 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
842 let filtered_lines = filtered.lines().count();
843 let header = format!(
844 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
845 );
846 let project_root = detect_project_root(file_path);
847 let graph_ctx = crate::core::graph_context::build_graph_context(
848 file_path,
849 &project_root,
850 Some(crate::core::graph_context::GraphContextOptions::default()),
851 )
852 .map(|c| crate::core::graph_context::format_graph_context(&c))
853 .unwrap_or_default();
854
855 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
856 let savings = protocol::format_savings(original_tokens, sent);
857 (
858 append_compressed_hint(
859 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
860 file_path,
861 ),
862 sent,
863 )
864 }
865 "reference" => {
866 let tok = count_tokens(content);
867 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
868 let sent = count_tokens(&output);
869 let savings = protocol::format_savings(original_tokens, sent);
870 (format!("{output}\n{savings}"), sent)
871 }
872 mode if mode.starts_with("lines:") => {
873 let range_str = &mode[6..];
874 let extracted = extract_line_range(content, range_str);
875 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
876 let sent = count_tokens(&extracted);
877 let savings = protocol::format_savings(original_tokens, sent);
878 (format!("{header}\n{extracted}\n{savings}"), sent)
879 }
880 unknown => {
881 let header = build_header(file_ref, short, ext, content, line_count, true);
882 let out = format!(
883 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
884 );
885 let sent = count_tokens(&out);
886 (out, sent)
887 }
888 }
889}
890
891fn extract_line_range(content: &str, range_str: &str) -> String {
892 let lines: Vec<&str> = content.lines().collect();
893 let total = lines.len();
894 let mut selected = Vec::new();
895
896 for part in range_str.split(',') {
897 let part = part.trim();
898 if let Some((start_s, end_s)) = part.split_once('-') {
899 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
900 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
901 for i in start..=end {
902 if i >= 1 && i <= total {
903 selected.push(format!("{i:>4}| {}", lines[i - 1]));
904 }
905 }
906 } else if let Ok(n) = part.parse::<usize>() {
907 if n >= 1 && n <= total {
908 selected.push(format!("{n:>4}| {}", lines[n - 1]));
909 }
910 }
911 }
912
913 if selected.is_empty() {
914 "No lines matched the range.".to_string()
915 } else {
916 selected.join("\n")
917 }
918}
919
920fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
921 let short = protocol::shorten_path(path);
922 let old_content = cache.get(path).map(|e| e.content.clone());
923
924 let new_content = match read_file_lossy(path) {
925 Ok(c) => c,
926 Err(e) => {
927 let msg = format!("ERROR: {e}");
928 let tokens = count_tokens(&msg);
929 return (msg, tokens);
930 }
931 };
932
933 let original_tokens = count_tokens(&new_content);
934
935 let diff_output = if let Some(old) = &old_content {
936 compressor::diff_content(old, &new_content)
937 } else {
938 format!("[first read]\n{new_content}")
939 };
940
941 cache.store(path, new_content);
942
943 let sent = count_tokens(&diff_output);
944 let savings = protocol::format_savings(original_tokens, sent);
945 (
946 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
947 sent,
948 )
949}
950
951#[cfg(test)]
952mod tests {
953 use super::*;
954 use std::time::Duration;
955
956 #[test]
957 fn test_header_toon_format_no_brackets() {
958 let content = "use std::io;\nfn main() {}\n";
959 let header = build_header("F1", "main.rs", "rs", content, 2, false);
960 assert!(!header.contains('['));
961 assert!(!header.contains(']'));
962 assert!(header.contains("F1=main.rs 2L"));
963 }
964
965 #[test]
966 fn test_header_toon_deps_indented() {
967 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
968 let header = build_header("F1", "main.rs", "rs", content, 3, true);
969 if header.contains("deps") {
970 assert!(
971 header.contains("\n deps "),
972 "deps should use indented TOON format"
973 );
974 assert!(
975 !header.contains("deps:["),
976 "deps should not use bracket format"
977 );
978 }
979 }
980
981 #[test]
982 fn test_header_toon_saves_tokens() {
983 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
984 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
985 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
986 let old_tokens = count_tokens(&old_header);
987 let new_tokens = count_tokens(&new_header);
988 assert!(
989 new_tokens <= old_tokens,
990 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
991 );
992 }
993
994 #[test]
995 fn test_tdd_symbols_are_compact() {
996 let symbols = [
997 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
998 ];
999 for sym in &symbols {
1000 let tok = count_tokens(sym);
1001 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1002 }
1003 }
1004
1005 #[test]
1006 fn test_task_mode_filters_content() {
1007 let content = (0..200)
1008 .map(|i| {
1009 if i % 20 == 0 {
1010 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1011 } else {
1012 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1013 }
1014 })
1015 .collect::<Vec<_>>()
1016 .join("\n");
1017 let full_tokens = count_tokens(&content);
1018 let task = Some("fix bug in validate_token");
1019 let (result, result_tokens) = process_mode(
1020 &content,
1021 "task",
1022 "F1",
1023 "test.rs",
1024 "rs",
1025 full_tokens,
1026 CrpMode::Off,
1027 "test.rs",
1028 task,
1029 );
1030 assert!(
1031 result_tokens < full_tokens,
1032 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1033 );
1034 assert!(
1035 result.contains("task-filtered"),
1036 "output should contain task-filtered marker"
1037 );
1038 }
1039
1040 #[test]
1041 fn test_task_mode_without_task_returns_full() {
1042 let content = "fn main() {}\nfn helper() {}\n";
1043 let tokens = count_tokens(content);
1044 let (result, _sent) = process_mode(
1045 content,
1046 "task",
1047 "F1",
1048 "test.rs",
1049 "rs",
1050 tokens,
1051 CrpMode::Off,
1052 "test.rs",
1053 None,
1054 );
1055 assert!(
1056 result.contains("no task set"),
1057 "should indicate no task: {result}"
1058 );
1059 }
1060
1061 #[test]
1062 fn test_reference_mode_one_line() {
1063 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1064 let tokens = count_tokens(content);
1065 let (result, _sent) = process_mode(
1066 content,
1067 "reference",
1068 "F1",
1069 "test.rs",
1070 "rs",
1071 tokens,
1072 CrpMode::Off,
1073 "test.rs",
1074 None,
1075 );
1076 let lines: Vec<&str> = result.lines().collect();
1077 assert!(
1078 lines.len() <= 3,
1079 "reference mode should be very compact, got {} lines",
1080 lines.len()
1081 );
1082 assert!(result.contains("lines"), "should contain line count");
1083 assert!(result.contains("tok"), "should contain token count");
1084 }
1085
1086 #[test]
1087 fn cached_lines_mode_invalidates_on_mtime_change() {
1088 let dir = tempfile::tempdir().unwrap();
1089 let path = dir.path().join("file.txt");
1090 let p = path.to_string_lossy().to_string();
1091
1092 std::fs::write(&path, "one\nsecond\n").unwrap();
1093 let mut cache = SessionCache::new();
1094
1095 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1096 let l1: Vec<&str> = r1.content.lines().collect();
1097 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1098 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1099 assert_eq!(got1, "one");
1100
1101 std::thread::sleep(Duration::from_secs(1));
1102 std::fs::write(&path, "two\nsecond\n").unwrap();
1103
1104 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1105 let l2: Vec<&str> = r2.content.lines().collect();
1106 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1107 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1108 assert_eq!(got2, "two");
1109 }
1110
1111 #[test]
1112 #[cfg_attr(tarpaulin, ignore)]
1113 fn benchmark_task_conditioned_compression() {
1114 let content = generate_benchmark_code(200);
1116 let full_tokens = count_tokens(&content);
1117 let task = Some("fix authentication in validate_token");
1118
1119 let (_full_output, full_tok) = process_mode(
1120 &content,
1121 "full",
1122 "F1",
1123 "server.rs",
1124 "rs",
1125 full_tokens,
1126 CrpMode::Off,
1127 "server.rs",
1128 task,
1129 );
1130 let (_task_output, task_tok) = process_mode(
1131 &content,
1132 "task",
1133 "F1",
1134 "server.rs",
1135 "rs",
1136 full_tokens,
1137 CrpMode::Off,
1138 "server.rs",
1139 task,
1140 );
1141 let (_sig_output, sig_tok) = process_mode(
1142 &content,
1143 "signatures",
1144 "F1",
1145 "server.rs",
1146 "rs",
1147 full_tokens,
1148 CrpMode::Off,
1149 "server.rs",
1150 task,
1151 );
1152 let (_ref_output, ref_tok) = process_mode(
1153 &content,
1154 "reference",
1155 "F1",
1156 "server.rs",
1157 "rs",
1158 full_tokens,
1159 CrpMode::Off,
1160 "server.rs",
1161 task,
1162 );
1163
1164 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1165 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1166 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1167 eprintln!(
1168 " task: {task_tok:>6} tokens ({:.0}% savings)",
1169 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1170 );
1171 eprintln!(
1172 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1173 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1174 );
1175 eprintln!(
1176 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1177 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1178 );
1179 eprintln!("================================================\n");
1180
1181 assert!(task_tok < full_tok, "task mode should save tokens");
1182 assert!(sig_tok < full_tok, "signatures should save tokens");
1183 assert!(ref_tok < sig_tok, "reference should be most compact");
1184 }
1185
1186 fn generate_benchmark_code(lines: usize) -> String {
1187 let mut code = Vec::with_capacity(lines);
1188 code.push("use std::collections::HashMap;".to_string());
1189 code.push("use crate::core::auth;".to_string());
1190 code.push(String::new());
1191 code.push("pub struct Server {".to_string());
1192 code.push(" config: Config,".to_string());
1193 code.push(" cache: HashMap<String, String>,".to_string());
1194 code.push("}".to_string());
1195 code.push(String::new());
1196 code.push("impl Server {".to_string());
1197 code.push(
1198 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1199 .to_string(),
1200 );
1201 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1202 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1203 code.push(" return Err(AuthError::Expired);".to_string());
1204 code.push(" }".to_string());
1205 code.push(" Ok(decoded.claims)".to_string());
1206 code.push(" }".to_string());
1207 code.push(String::new());
1208
1209 let remaining = lines.saturating_sub(code.len());
1210 for i in 0..remaining {
1211 if i % 30 == 0 {
1212 code.push(format!(
1213 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1214 ));
1215 } else if i % 30 == 29 {
1216 code.push(" }".to_string());
1217 } else {
1218 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1219 }
1220 }
1221 code.push("}".to_string());
1222 code.join("\n")
1223 }
1224
1225 #[test]
1226 fn instruction_file_detection() {
1227 assert!(is_instruction_file(
1228 "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1229 ));
1230 assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1231 assert!(is_instruction_file("/project/AGENTS.md"));
1232 assert!(is_instruction_file("/project/.cursorrules"));
1233 assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1234 assert!(is_instruction_file("/skills/some-skill/README.md"));
1235
1236 assert!(!is_instruction_file("/project/src/main.rs"));
1237 assert!(!is_instruction_file("/project/config.json"));
1238 assert!(!is_instruction_file("/project/data/report.csv"));
1239 }
1240
1241 #[test]
1242 fn resolve_auto_mode_returns_full_for_instruction_files() {
1243 let mode = resolve_auto_mode(
1244 "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1245 5000,
1246 Some("read"),
1247 );
1248 assert_eq!(mode, "full", "SKILL.md must always be read in full");
1249
1250 let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1251 assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1252
1253 let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1254 assert_eq!(mode, "full", ".cursorrules must always be read in full");
1255 }
1256}