1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32 if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40 format!(
41 "{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
42 )
43}
44
45pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
49 if crate::core::binary_detect::is_binary_file(path) {
50 let msg = crate::core::binary_detect::binary_file_message(path);
51 return Err(std::io::Error::other(msg));
52 }
53
54 if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
55 if let Ok(cwd) = std::env::current_dir() {
56 let root = crate::core::pathjail::canonicalize_or_self(&cwd);
57 if !canonical.starts_with(&root) {
58 let allow = crate::core::pathjail::allow_paths_from_env_and_config();
59 let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
60 .ok()
61 .is_some_and(|d| canonical.starts_with(d));
62 let tmp_ok = canonical.starts_with(std::env::temp_dir());
63 if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
64 tracing::warn!(
65 "defense-in-depth: path may escape project root: {}",
66 canonical.display()
67 );
68 }
69 }
70 }
71 }
72
73 let cap = crate::core::limits::max_read_bytes();
74
75 let file = open_with_retry(path)?;
76 let meta = file
77 .metadata()
78 .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
79 if meta.len() > cap as u64 {
80 return Err(std::io::Error::other(format!(
81 "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
82 Increase the limit or use a line-range read: mode=\"lines:1-100\"",
83 meta.len(),
84 cap
85 )));
86 }
87
88 use std::io::Read;
89 let mut bytes = Vec::with_capacity(meta.len() as usize);
90 std::io::BufReader::new(file).read_to_end(&mut bytes)?;
91 match String::from_utf8(bytes) {
92 Ok(s) => Ok(s),
93 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
94 }
95}
96
97fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
100 match std::fs::File::open(path) {
101 Ok(f) => Ok(f),
102 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
103 std::thread::sleep(std::time::Duration::from_millis(50));
104 std::fs::File::open(path)
105 }
106 Err(e) => Err(e),
107 }
108}
109
110pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
112 handle_with_options(cache, path, mode, false, crp_mode, None)
113}
114
115pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
117 handle_with_options(cache, path, mode, true, crp_mode, None)
118}
119
120pub fn handle_with_task(
122 cache: &mut SessionCache,
123 path: &str,
124 mode: &str,
125 crp_mode: CrpMode,
126 task: Option<&str>,
127) -> String {
128 handle_with_options(cache, path, mode, false, crp_mode, task)
129}
130
131pub fn handle_with_task_resolved(
133 cache: &mut SessionCache,
134 path: &str,
135 mode: &str,
136 crp_mode: CrpMode,
137 task: Option<&str>,
138) -> ReadOutput {
139 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
140}
141
142pub fn handle_fresh_with_task(
144 cache: &mut SessionCache,
145 path: &str,
146 mode: &str,
147 crp_mode: CrpMode,
148 task: Option<&str>,
149) -> String {
150 handle_with_options(cache, path, mode, true, crp_mode, task)
151}
152
153pub fn handle_fresh_with_task_resolved(
155 cache: &mut SessionCache,
156 path: &str,
157 mode: &str,
158 crp_mode: CrpMode,
159 task: Option<&str>,
160) -> ReadOutput {
161 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
162}
163
164fn handle_with_options(
165 cache: &mut SessionCache,
166 path: &str,
167 mode: &str,
168 fresh: bool,
169 crp_mode: CrpMode,
170 task: Option<&str>,
171) -> String {
172 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
173}
174
175fn handle_with_options_resolved(
176 cache: &mut SessionCache,
177 path: &str,
178 mode: &str,
179 fresh: bool,
180 crp_mode: CrpMode,
181 task: Option<&str>,
182) -> ReadOutput {
183 let file_ref = cache.get_file_ref(path);
184 let short = protocol::shorten_path(path);
185 let ext = Path::new(path)
186 .extension()
187 .and_then(|e| e.to_str())
188 .unwrap_or("");
189
190 if fresh {
191 cache.invalidate(path);
192 }
193
194 if mode == "diff" {
195 let (out, sent) = handle_diff(cache, path, &file_ref);
196 return ReadOutput {
197 content: out,
198 resolved_mode: "diff".into(),
199 output_tokens: sent,
200 };
201 }
202
203 if mode != "full" {
204 if let Some(existing) = cache.get(path) {
205 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
206 if stale {
207 cache.invalidate(path);
208 }
209 }
210 }
211
212 if let Some(existing) = cache.get(path) {
213 if mode == "full" {
214 let (out, sent) =
215 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
216 let out = crate::core::redaction::redact_text_if_enabled(&out);
217 return ReadOutput {
218 content: out,
219 resolved_mode: "full".into(),
220 output_tokens: sent,
221 };
222 }
223 let content = existing.content();
224 let original_tokens = existing.original_tokens;
225 let resolved_mode = if mode == "auto" {
226 resolve_auto_mode(path, original_tokens, task)
227 } else {
228 mode.to_string()
229 };
230 if is_cacheable_mode(&resolved_mode) {
231 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
232 if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
233 let sent = count_tokens(cached_output);
234 let out = crate::core::redaction::redact_text_if_enabled(cached_output);
235 return ReadOutput {
236 content: out,
237 resolved_mode,
238 output_tokens: sent,
239 };
240 }
241 }
242 let (out, sent) = process_mode(
243 &content,
244 &resolved_mode,
245 &file_ref,
246 &short,
247 ext,
248 original_tokens,
249 crp_mode,
250 path,
251 task,
252 );
253 if is_cacheable_mode(&resolved_mode) {
254 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
255 cache.set_compressed(path, &cache_key, out.clone());
256 }
257 let out = crate::core::redaction::redact_text_if_enabled(&out);
258 return ReadOutput {
259 content: out,
260 resolved_mode,
261 output_tokens: sent,
262 };
263 }
264
265 let content = match read_file_lossy(path) {
266 Ok(c) => c,
267 Err(e) => {
268 let msg = format!("ERROR: {e}");
269 let tokens = count_tokens(&msg);
270 return ReadOutput {
271 content: msg,
272 resolved_mode: "error".into(),
273 output_tokens: tokens,
274 };
275 }
276 };
277
278 let similar_hint = find_similar_and_update_semantic_index(path, &content);
279 let graph_hint = build_graph_related_hint(path);
280
281 let store_result = cache.store(path, &content);
282
283 if mode == "full" {
284 cache.mark_full_delivered(path);
285 let (mut output, sent) = format_full_output(
286 &file_ref,
287 &short,
288 ext,
289 &content,
290 store_result.original_tokens,
291 store_result.line_count,
292 task,
293 );
294 if let Some(hint) = &graph_hint {
295 output.push_str(&format!("\n{hint}"));
296 }
297 if let Some(hint) = similar_hint {
298 output.push_str(&format!("\n{hint}"));
299 }
300 let output = crate::core::redaction::redact_text_if_enabled(&output);
301 return ReadOutput {
302 content: output,
303 resolved_mode: "full".into(),
304 output_tokens: sent,
305 };
306 }
307
308 let resolved_mode = if mode == "auto" {
309 resolve_auto_mode(path, store_result.original_tokens, task)
310 } else {
311 mode.to_string()
312 };
313
314 let (mut output, _sent) = process_mode(
315 &content,
316 &resolved_mode,
317 &file_ref,
318 &short,
319 ext,
320 store_result.original_tokens,
321 crp_mode,
322 path,
323 task,
324 );
325 if is_cacheable_mode(&resolved_mode) {
326 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
327 cache.set_compressed(path, &cache_key, output.clone());
328 }
329 if let Some(hint) = &graph_hint {
330 output.push_str(&format!("\n{hint}"));
331 }
332 if let Some(hint) = similar_hint {
333 output.push_str(&format!("\n{hint}"));
334 }
335 let output = crate::core::redaction::redact_text_if_enabled(&output);
336 let final_tokens = count_tokens(&output);
337 ReadOutput {
338 content: output,
339 resolved_mode,
340 output_tokens: final_tokens,
341 }
342}
343
344pub fn is_instruction_file(path: &str) -> bool {
345 let lower = path.to_lowercase();
346 let filename = std::path::Path::new(&lower)
347 .file_name()
348 .and_then(|f| f.to_str())
349 .unwrap_or("");
350
351 matches!(
352 filename,
353 "skill.md"
354 | "agents.md"
355 | "rules.md"
356 | ".cursorrules"
357 | ".clinerules"
358 | "lean-ctx.md"
359 | "lean-ctx.mdc"
360 ) || lower.contains("/skills/")
361 || lower.contains("/.cursor/rules/")
362 || lower.contains("/.claude/rules/")
363 || lower.contains("/agents.md")
364}
365
366fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
367 if is_instruction_file(file_path) {
368 return "full".to_string();
369 }
370
371 let intent_query = task.unwrap_or("read");
374 let route = crate::core::intent_router::route_v1(intent_query);
375 let intent_mode = &route.decision.effective_read_mode;
376 if intent_mode != "auto" && intent_mode != "reference" {
377 return intent_mode.clone();
378 }
379
380 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
382 let predictor = crate::core::mode_predictor::ModePredictor::new();
383 let mut predicted = predictor
384 .predict_best_mode(&sig)
385 .unwrap_or_else(|| "full".to_string());
386 if predicted == "auto" {
387 predicted = "full".to_string();
388 }
389
390 if let Some(project_root) =
392 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
393 {
394 let ext = std::path::Path::new(file_path)
395 .extension()
396 .and_then(|e| e.to_str())
397 .unwrap_or("");
398 let bucket = match original_tokens {
399 0..=2000 => "sm",
400 2001..=10000 => "md",
401 10001..=50000 => "lg",
402 _ => "xl",
403 };
404 let bandit_key = format!("{ext}_{bucket}");
405 let mut store = crate::core::bandit::BanditStore::load(&project_root);
406 let bandit = store.get_or_create(&bandit_key);
407 let arm = bandit.select_arm();
408 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
409 predicted = "aggressive".to_string();
410 }
411 }
412
413 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
415 let chosen = policy.choose_auto_mode(task, &predicted);
416
417 if original_tokens > 2000 {
418 if predicted == "map" || predicted == "signatures" {
419 if chosen != "map" && chosen != "signatures" {
420 return predicted;
421 }
422 } else if chosen == "full" && predicted != "full" {
423 return predicted;
424 }
425 }
426
427 chosen
428}
429
430fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
431 let cfg = crate::core::config::Config::load();
432 let profile = crate::core::config::MemoryProfile::effective(&cfg);
433 if !profile.semantic_cache_enabled() {
434 return None;
435 }
436
437 let project_root = detect_project_root(path);
438 let session_id = format!("{}", std::process::id());
439 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
440
441 let similar = index.find_similar(content, 0.7);
442 let relevant: Vec<_> = similar
443 .into_iter()
444 .filter(|(p, _)| p != path)
445 .take(3)
446 .collect();
447
448 index.add_file(path, content, &session_id);
449 let _ = index.save(&project_root);
450
451 if relevant.is_empty() {
452 return None;
453 }
454
455 let hints: Vec<String> = relevant
456 .iter()
457 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
458 .collect();
459
460 Some(format!(
461 "[semantic: {} similar file(s) in cache]\n{}",
462 relevant.len(),
463 hints.join("\n")
464 ))
465}
466
467fn detect_project_root(path: &str) -> String {
468 crate::core::protocol::detect_project_root_or_cwd(path)
469}
470
471fn build_graph_related_hint(path: &str) -> Option<String> {
472 let project_root = detect_project_root(path);
473 crate::core::graph_context::build_related_hint(path, &project_root, 5)
474}
475
476const AUTO_DELTA_THRESHOLD: f64 = 0.6;
477
478fn handle_full_with_auto_delta(
480 cache: &mut SessionCache,
481 path: &str,
482 file_ref: &str,
483 short: &str,
484 ext: &str,
485 task: Option<&str>,
486) -> (String, usize) {
487 let Ok(disk_content) = read_file_lossy(path) else {
488 cache.record_cache_hit(path);
489 let out = if let Some(existing) = cache.get(path) {
490 format!(
491 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
492 existing.read_count, existing.line_count
493 )
494 } else {
495 format!("[file read failed and no cached version available] {file_ref}={short}")
496 };
497 let sent = count_tokens(&out);
498 return (out, sent);
499 };
500
501 let old_content = cache
502 .get(path)
503 .map(crate::core::cache::CacheEntry::content)
504 .unwrap_or_default();
505 let store_result = cache.store(path, &disk_content);
506
507 if store_result.was_hit {
508 if store_result.full_content_delivered {
509 let out = format!(
510 "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
511 store_result.read_count, store_result.line_count
512 );
513 let sent = count_tokens(&out);
514 return (out, sent);
515 }
516 cache.mark_full_delivered(path);
517 return format_full_output(
518 file_ref,
519 short,
520 ext,
521 &disk_content,
522 store_result.original_tokens,
523 store_result.line_count,
524 task,
525 );
526 }
527
528 let diff = compressor::diff_content(&old_content, &disk_content);
529 let diff_tokens = count_tokens(&diff);
530 let full_tokens = store_result.original_tokens;
531
532 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
533 let savings = protocol::format_savings(full_tokens, diff_tokens);
534 let out = format!(
535 "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
536 disk_content.lines().count()
537 );
538 return (out, diff_tokens);
539 }
540
541 format_full_output(
542 file_ref,
543 short,
544 ext,
545 &disk_content,
546 store_result.original_tokens,
547 store_result.line_count,
548 task,
549 )
550}
551
552fn format_full_output(
553 file_ref: &str,
554 short: &str,
555 ext: &str,
556 content: &str,
557 original_tokens: usize,
558 line_count: usize,
559 task: Option<&str>,
560) -> (String, usize) {
561 let tokens = original_tokens;
562 let metadata = build_header(file_ref, short, ext, content, line_count, true);
563
564 let mut reordered: Option<String> = None;
565 {
566 let profile = crate::core::profiles::active_profile();
567 let cfg = profile.layout;
568 if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
569 let task_str = task.unwrap_or("");
570 if !task_str.is_empty() {
571 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
572 let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
573 content, &keywords, &cfg,
574 );
575 if !r.skipped && r.changed {
576 reordered = Some(r.output);
577 }
578 }
579 }
580 }
581
582 let content_for_output = reordered.as_deref().unwrap_or(content);
583
584 let mut sym = SymbolMap::new();
585 let idents = symbol_map::extract_identifiers(content_for_output, ext);
586 for ident in &idents {
587 sym.register(ident);
588 }
589
590 if sym.len() >= 3 {
591 let sym_table = sym.format_table();
592 let compressed = sym.apply(content_for_output);
593 let original_tok = count_tokens(content_for_output);
594 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
595 let net_saving = original_tok.saturating_sub(compressed_tok);
596 if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
597 let output = format!("{metadata}\n{compressed}{sym_table}");
598 let sent = count_tokens(&output);
599 let savings = protocol::format_savings(tokens, sent);
600 return (format!("{output}\n{savings}"), sent);
601 }
602 }
603
604 let output = format!("{metadata}\n{content_for_output}");
605 let sent = count_tokens(&output);
606 let savings = protocol::format_savings(tokens, sent);
607 (format!("{output}\n{savings}"), sent)
608}
609
610fn build_header(
611 file_ref: &str,
612 short: &str,
613 ext: &str,
614 content: &str,
615 line_count: usize,
616 include_deps: bool,
617) -> String {
618 let mut header = format!("{file_ref}={short} {line_count}L");
619
620 if include_deps {
621 let dep_info = deps::extract_deps(content, ext);
622 if !dep_info.imports.is_empty() {
623 let imports_str: Vec<&str> = dep_info
624 .imports
625 .iter()
626 .take(8)
627 .map(std::string::String::as_str)
628 .collect();
629 header.push_str(&format!("\n deps {}", imports_str.join(",")));
630 }
631 if !dep_info.exports.is_empty() {
632 let exports_str: Vec<&str> = dep_info
633 .exports
634 .iter()
635 .take(8)
636 .map(std::string::String::as_str)
637 .collect();
638 header.push_str(&format!("\n exports {}", exports_str.join(",")));
639 }
640 }
641
642 header
643}
644
645#[allow(clippy::too_many_arguments)]
646fn process_mode(
647 content: &str,
648 mode: &str,
649 file_ref: &str,
650 short: &str,
651 ext: &str,
652 original_tokens: usize,
653 crp_mode: CrpMode,
654 file_path: &str,
655 task: Option<&str>,
656) -> (String, usize) {
657 let line_count = content.lines().count();
658
659 match mode {
660 "auto" => {
661 let chosen = resolve_auto_mode(file_path, original_tokens, task);
662 process_mode(
663 content,
664 &chosen,
665 file_ref,
666 short,
667 ext,
668 original_tokens,
669 crp_mode,
670 file_path,
671 task,
672 )
673 }
674 "full" => format_full_output(
675 file_ref,
676 short,
677 ext,
678 content,
679 original_tokens,
680 line_count,
681 task,
682 ),
683 "signatures" => {
684 let sigs = signatures::extract_signatures(content, ext);
685 let dep_info = deps::extract_deps(content, ext);
686
687 let mut output = format!("{file_ref}={short} {line_count}L");
688 if !dep_info.imports.is_empty() {
689 let imports_str: Vec<&str> = dep_info
690 .imports
691 .iter()
692 .take(8)
693 .map(std::string::String::as_str)
694 .collect();
695 output.push_str(&format!("\n deps {}", imports_str.join(",")));
696 }
697 for sig in &sigs {
698 output.push('\n');
699 if crp_mode.is_tdd() {
700 output.push_str(&sig.to_tdd());
701 } else {
702 output.push_str(&sig.to_compact());
703 }
704 }
705 let sent = count_tokens(&output);
706 let savings = protocol::format_savings(original_tokens, sent);
707 (
708 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
709 sent,
710 )
711 }
712 "map" => {
713 if ext == "php" {
714 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
715 {
716 let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
717 let sent = count_tokens(&output);
718 let savings = protocol::format_savings(original_tokens, sent);
719 output.push('\n');
720 output.push_str(&savings);
721 return (append_compressed_hint(&output, file_path), sent);
722 }
723 }
724
725 let sigs = signatures::extract_signatures(content, ext);
726 let dep_info = deps::extract_deps(content, ext);
727
728 let mut output = format!("{file_ref}={short} {line_count}L");
729
730 if !dep_info.imports.is_empty() {
731 output.push_str("\n deps: ");
732 output.push_str(&dep_info.imports.join(", "));
733 }
734
735 if !dep_info.exports.is_empty() {
736 output.push_str("\n exports: ");
737 output.push_str(&dep_info.exports.join(", "));
738 }
739
740 let key_sigs: Vec<&signatures::Signature> = sigs
741 .iter()
742 .filter(|s| s.is_exported || s.indent == 0)
743 .collect();
744
745 if !key_sigs.is_empty() {
746 output.push_str("\n API:");
747 for sig in &key_sigs {
748 output.push_str("\n ");
749 if crp_mode.is_tdd() {
750 output.push_str(&sig.to_tdd());
751 } else {
752 output.push_str(&sig.to_compact());
753 }
754 }
755 }
756
757 let sent = count_tokens(&output);
758 let savings = protocol::format_savings(original_tokens, sent);
759 (
760 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
761 sent,
762 )
763 }
764 "aggressive" => {
765 #[cfg(feature = "tree-sitter")]
766 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
767 #[cfg(not(feature = "tree-sitter"))]
768 let ast_pruned: Option<String> = None;
769
770 let base = ast_pruned.as_deref().unwrap_or(content);
771
772 let session_intent = crate::core::session::SessionState::load_latest()
773 .and_then(|s| s.active_structured_intent);
774 let raw = if let Some(ref intent) = session_intent {
775 compressor::task_aware_compress(base, Some(ext), intent)
776 } else {
777 compressor::aggressive_compress(base, Some(ext))
778 };
779 let compressed = compressor::safeguard_ratio(content, &raw);
780 let header = build_header(file_ref, short, ext, content, line_count, true);
781
782 let mut sym = SymbolMap::new();
783 let idents = symbol_map::extract_identifiers(&compressed, ext);
784 for ident in &idents {
785 sym.register(ident);
786 }
787
788 if sym.len() >= 3 {
789 let sym_table = sym.format_table();
790 let sym_applied = sym.apply(&compressed);
791 let orig_tok = count_tokens(&compressed);
792 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
793 let net = orig_tok.saturating_sub(comp_tok);
794 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
795 let savings = protocol::format_savings(original_tokens, comp_tok);
796 return (
797 append_compressed_hint(
798 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
799 file_path,
800 ),
801 comp_tok,
802 );
803 }
804 let savings = protocol::format_savings(original_tokens, orig_tok);
805 return (
806 append_compressed_hint(
807 &format!("{header}\n{compressed}\n{savings}"),
808 file_path,
809 ),
810 orig_tok,
811 );
812 }
813
814 let sent = count_tokens(&compressed);
815 let savings = protocol::format_savings(original_tokens, sent);
816 (
817 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
818 sent,
819 )
820 }
821 "entropy" => {
822 let result = entropy::entropy_compress_adaptive(content, file_path);
823 let avg_h = entropy::analyze_entropy(content).avg_entropy;
824 let header = build_header(file_ref, short, ext, content, line_count, false);
825 let techs = result.techniques.join(", ");
826 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
827 let sent = count_tokens(&output);
828 let savings = protocol::format_savings(original_tokens, sent);
829 let compression_ratio = if original_tokens > 0 {
830 1.0 - (sent as f64 / original_tokens as f64)
831 } else {
832 0.0
833 };
834 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
835 (
836 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
837 sent,
838 )
839 }
840 "task" => {
841 let task_str = task.unwrap_or("");
842 if task_str.is_empty() {
843 let header = build_header(file_ref, short, ext, content, line_count, true);
844 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
845 let sent = count_tokens(&out);
846 return (out, sent);
847 }
848 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
849 if keywords.is_empty() {
850 let header = build_header(file_ref, short, ext, content, line_count, true);
851 let out = format!(
852 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
853 );
854 let sent = count_tokens(&out);
855 return (out, sent);
856 }
857 let filtered =
858 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
859 let filtered_lines = filtered.lines().count();
860 let header = format!(
861 "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
862 );
863 let project_root = detect_project_root(file_path);
864 let graph_ctx = crate::core::graph_context::build_graph_context(
865 file_path,
866 &project_root,
867 Some(crate::core::graph_context::GraphContextOptions::default()),
868 )
869 .map(|c| crate::core::graph_context::format_graph_context(&c))
870 .unwrap_or_default();
871
872 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
873 let savings = protocol::format_savings(original_tokens, sent);
874 (
875 append_compressed_hint(
876 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
877 file_path,
878 ),
879 sent,
880 )
881 }
882 "reference" => {
883 let tok = count_tokens(content);
884 let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
885 let sent = count_tokens(&output);
886 let savings = protocol::format_savings(original_tokens, sent);
887 (format!("{output}\n{savings}"), sent)
888 }
889 mode if mode.starts_with("lines:") => {
890 let range_str = &mode[6..];
891 let extracted = extract_line_range(content, range_str);
892 let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
893 let sent = count_tokens(&extracted);
894 let savings = protocol::format_savings(original_tokens, sent);
895 (format!("{header}\n{extracted}\n{savings}"), sent)
896 }
897 unknown => {
898 let header = build_header(file_ref, short, ext, content, line_count, true);
899 let out = format!(
900 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
901 );
902 let sent = count_tokens(&out);
903 (out, sent)
904 }
905 }
906}
907
908fn extract_line_range(content: &str, range_str: &str) -> String {
909 let lines: Vec<&str> = content.lines().collect();
910 let total = lines.len();
911 let mut selected = Vec::new();
912
913 for part in range_str.split(',') {
914 let part = part.trim();
915 if let Some((start_s, end_s)) = part.split_once('-') {
916 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
917 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
918 for i in start..=end {
919 if i >= 1 && i <= total {
920 selected.push(format!("{i:>4}| {}", lines[i - 1]));
921 }
922 }
923 } else if let Ok(n) = part.parse::<usize>() {
924 if n >= 1 && n <= total {
925 selected.push(format!("{n:>4}| {}", lines[n - 1]));
926 }
927 }
928 }
929
930 if selected.is_empty() {
931 "No lines matched the range.".to_string()
932 } else {
933 selected.join("\n")
934 }
935}
936
937fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
938 let short = protocol::shorten_path(path);
939 let old_content = cache.get(path).map(crate::core::cache::CacheEntry::content);
940
941 let new_content = match read_file_lossy(path) {
942 Ok(c) => c,
943 Err(e) => {
944 let msg = format!("ERROR: {e}");
945 let tokens = count_tokens(&msg);
946 return (msg, tokens);
947 }
948 };
949
950 let original_tokens = count_tokens(&new_content);
951
952 let diff_output = if let Some(old) = &old_content {
953 compressor::diff_content(old, &new_content)
954 } else {
955 format!("[first read]\n{new_content}")
956 };
957
958 cache.store(path, &new_content);
959
960 let sent = count_tokens(&diff_output);
961 let savings = protocol::format_savings(original_tokens, sent);
962 (
963 format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
964 sent,
965 )
966}
967
968#[cfg(test)]
969mod tests {
970 use super::*;
971 use std::time::Duration;
972
973 #[test]
974 fn test_header_toon_format_no_brackets() {
975 let content = "use std::io;\nfn main() {}\n";
976 let header = build_header("F1", "main.rs", "rs", content, 2, false);
977 assert!(!header.contains('['));
978 assert!(!header.contains(']'));
979 assert!(header.contains("F1=main.rs 2L"));
980 }
981
982 #[test]
983 fn test_header_toon_deps_indented() {
984 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
985 let header = build_header("F1", "main.rs", "rs", content, 3, true);
986 if header.contains("deps") {
987 assert!(
988 header.contains("\n deps "),
989 "deps should use indented TOON format"
990 );
991 assert!(
992 !header.contains("deps:["),
993 "deps should not use bracket format"
994 );
995 }
996 }
997
998 #[test]
999 fn test_header_toon_saves_tokens() {
1000 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1001 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1002 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1003 let old_tokens = count_tokens(&old_header);
1004 let new_tokens = count_tokens(&new_header);
1005 assert!(
1006 new_tokens <= old_tokens,
1007 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1008 );
1009 }
1010
1011 #[test]
1012 fn test_tdd_symbols_are_compact() {
1013 let symbols = [
1014 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1015 ];
1016 for sym in &symbols {
1017 let tok = count_tokens(sym);
1018 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1019 }
1020 }
1021
1022 #[test]
1023 fn test_task_mode_filters_content() {
1024 let content = (0..200)
1025 .map(|i| {
1026 if i % 20 == 0 {
1027 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1028 } else {
1029 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1030 }
1031 })
1032 .collect::<Vec<_>>()
1033 .join("\n");
1034 let full_tokens = count_tokens(&content);
1035 let task = Some("fix bug in validate_token");
1036 let (result, result_tokens) = process_mode(
1037 &content,
1038 "task",
1039 "F1",
1040 "test.rs",
1041 "rs",
1042 full_tokens,
1043 CrpMode::Off,
1044 "test.rs",
1045 task,
1046 );
1047 assert!(
1048 result_tokens < full_tokens,
1049 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1050 );
1051 assert!(
1052 result.contains("task-filtered"),
1053 "output should contain task-filtered marker"
1054 );
1055 }
1056
1057 #[test]
1058 fn test_task_mode_without_task_returns_full() {
1059 let content = "fn main() {}\nfn helper() {}\n";
1060 let tokens = count_tokens(content);
1061 let (result, _sent) = process_mode(
1062 content,
1063 "task",
1064 "F1",
1065 "test.rs",
1066 "rs",
1067 tokens,
1068 CrpMode::Off,
1069 "test.rs",
1070 None,
1071 );
1072 assert!(
1073 result.contains("no task set"),
1074 "should indicate no task: {result}"
1075 );
1076 }
1077
1078 #[test]
1079 fn test_reference_mode_one_line() {
1080 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1081 let tokens = count_tokens(content);
1082 let (result, _sent) = process_mode(
1083 content,
1084 "reference",
1085 "F1",
1086 "test.rs",
1087 "rs",
1088 tokens,
1089 CrpMode::Off,
1090 "test.rs",
1091 None,
1092 );
1093 let lines: Vec<&str> = result.lines().collect();
1094 assert!(
1095 lines.len() <= 3,
1096 "reference mode should be very compact, got {} lines",
1097 lines.len()
1098 );
1099 assert!(result.contains("lines"), "should contain line count");
1100 assert!(result.contains("tok"), "should contain token count");
1101 }
1102
1103 #[test]
1104 fn cached_lines_mode_invalidates_on_mtime_change() {
1105 let dir = tempfile::tempdir().unwrap();
1106 let path = dir.path().join("file.txt");
1107 let p = path.to_string_lossy().to_string();
1108
1109 std::fs::write(&path, "one\nsecond\n").unwrap();
1110 let mut cache = SessionCache::new();
1111
1112 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1113 let l1: Vec<&str> = r1.content.lines().collect();
1114 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1115 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1116 assert_eq!(got1, "one");
1117
1118 std::thread::sleep(Duration::from_secs(1));
1119 std::fs::write(&path, "two\nsecond\n").unwrap();
1120
1121 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1122 let l2: Vec<&str> = r2.content.lines().collect();
1123 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1124 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1125 assert_eq!(got2, "two");
1126 }
1127
1128 #[test]
1129 #[cfg_attr(tarpaulin, ignore)]
1130 fn benchmark_task_conditioned_compression() {
1131 let content = generate_benchmark_code(200);
1133 let full_tokens = count_tokens(&content);
1134 let task = Some("fix authentication in validate_token");
1135
1136 let (_full_output, full_tok) = process_mode(
1137 &content,
1138 "full",
1139 "F1",
1140 "server.rs",
1141 "rs",
1142 full_tokens,
1143 CrpMode::Off,
1144 "server.rs",
1145 task,
1146 );
1147 let (_task_output, task_tok) = process_mode(
1148 &content,
1149 "task",
1150 "F1",
1151 "server.rs",
1152 "rs",
1153 full_tokens,
1154 CrpMode::Off,
1155 "server.rs",
1156 task,
1157 );
1158 let (_sig_output, sig_tok) = process_mode(
1159 &content,
1160 "signatures",
1161 "F1",
1162 "server.rs",
1163 "rs",
1164 full_tokens,
1165 CrpMode::Off,
1166 "server.rs",
1167 task,
1168 );
1169 let (_ref_output, ref_tok) = process_mode(
1170 &content,
1171 "reference",
1172 "F1",
1173 "server.rs",
1174 "rs",
1175 full_tokens,
1176 CrpMode::Off,
1177 "server.rs",
1178 task,
1179 );
1180
1181 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1182 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1183 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1184 eprintln!(
1185 " task: {task_tok:>6} tokens ({:.0}% savings)",
1186 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1187 );
1188 eprintln!(
1189 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1190 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1191 );
1192 eprintln!(
1193 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1194 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1195 );
1196 eprintln!("================================================\n");
1197
1198 assert!(task_tok < full_tok, "task mode should save tokens");
1199 assert!(sig_tok < full_tok, "signatures should save tokens");
1200 assert!(ref_tok < sig_tok, "reference should be most compact");
1201 }
1202
1203 fn generate_benchmark_code(lines: usize) -> String {
1204 let mut code = Vec::with_capacity(lines);
1205 code.push("use std::collections::HashMap;".to_string());
1206 code.push("use crate::core::auth;".to_string());
1207 code.push(String::new());
1208 code.push("pub struct Server {".to_string());
1209 code.push(" config: Config,".to_string());
1210 code.push(" cache: HashMap<String, String>,".to_string());
1211 code.push("}".to_string());
1212 code.push(String::new());
1213 code.push("impl Server {".to_string());
1214 code.push(
1215 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1216 .to_string(),
1217 );
1218 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1219 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1220 code.push(" return Err(AuthError::Expired);".to_string());
1221 code.push(" }".to_string());
1222 code.push(" Ok(decoded.claims)".to_string());
1223 code.push(" }".to_string());
1224 code.push(String::new());
1225
1226 let remaining = lines.saturating_sub(code.len());
1227 for i in 0..remaining {
1228 if i % 30 == 0 {
1229 code.push(format!(
1230 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1231 ));
1232 } else if i % 30 == 29 {
1233 code.push(" }".to_string());
1234 } else {
1235 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1236 }
1237 }
1238 code.push("}".to_string());
1239 code.join("\n")
1240 }
1241
1242 #[test]
1243 fn instruction_file_detection() {
1244 assert!(is_instruction_file(
1245 "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1246 ));
1247 assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1248 assert!(is_instruction_file("/project/AGENTS.md"));
1249 assert!(is_instruction_file("/project/.cursorrules"));
1250 assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1251 assert!(is_instruction_file("/skills/some-skill/README.md"));
1252
1253 assert!(!is_instruction_file("/project/src/main.rs"));
1254 assert!(!is_instruction_file("/project/config.json"));
1255 assert!(!is_instruction_file("/project/data/report.csv"));
1256 }
1257
1258 #[test]
1259 fn resolve_auto_mode_returns_full_for_instruction_files() {
1260 let mode = resolve_auto_mode(
1261 "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1262 5000,
1263 Some("read"),
1264 );
1265 assert_eq!(mode, "full", "SKILL.md must always be read in full");
1266
1267 let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1268 assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1269
1270 let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1271 assert_eq!(mode, "full", ".cursorrules must always be read in full");
1272 }
1273}