1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32 if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40 if !crate::core::profiles::active_profile()
41 .output_hints
42 .compressed_hint()
43 {
44 return output.to_string();
45 }
46 format!(
47 "{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
48 )
49}
50
51pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
55 if crate::core::binary_detect::is_binary_file(path) {
56 let msg = crate::core::binary_detect::binary_file_message(path);
57 return Err(std::io::Error::other(msg));
58 }
59
60 if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
61 if let Ok(cwd) = std::env::current_dir() {
62 let root = crate::core::pathjail::canonicalize_or_self(&cwd);
63 if !canonical.starts_with(&root) {
64 let allow = crate::core::pathjail::allow_paths_from_env_and_config();
65 let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
66 .ok()
67 .is_some_and(|d| canonical.starts_with(d));
68 let tmp_ok = canonical.starts_with(std::env::temp_dir());
69 if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
70 tracing::warn!(
71 "defense-in-depth: path may escape project root: {}",
72 canonical.display()
73 );
74 }
75 }
76 }
77 }
78
79 let cap = crate::core::limits::max_read_bytes();
80
81 let file = open_with_retry(path)?;
82 let meta = file
83 .metadata()
84 .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
85 if meta.len() > cap as u64 {
86 return Err(std::io::Error::other(format!(
87 "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
88 Increase the limit or use a line-range read: mode=\"lines:1-100\"",
89 meta.len(),
90 cap
91 )));
92 }
93
94 use std::io::Read;
95 let mut bytes = Vec::with_capacity(meta.len() as usize);
96 std::io::BufReader::new(file).read_to_end(&mut bytes)?;
97 match String::from_utf8(bytes) {
98 Ok(s) => Ok(s),
99 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
100 }
101}
102
103fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
106 match std::fs::File::open(path) {
107 Ok(f) => Ok(f),
108 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
109 std::thread::sleep(std::time::Duration::from_millis(50));
110 std::fs::File::open(path)
111 }
112 Err(e) => Err(e),
113 }
114}
115
116pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
118 handle_with_options(cache, path, mode, false, crp_mode, None)
119}
120
121pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
123 handle_with_options(cache, path, mode, true, crp_mode, None)
124}
125
126pub fn handle_with_task(
128 cache: &mut SessionCache,
129 path: &str,
130 mode: &str,
131 crp_mode: CrpMode,
132 task: Option<&str>,
133) -> String {
134 handle_with_options(cache, path, mode, false, crp_mode, task)
135}
136
137pub fn handle_with_task_resolved(
139 cache: &mut SessionCache,
140 path: &str,
141 mode: &str,
142 crp_mode: CrpMode,
143 task: Option<&str>,
144) -> ReadOutput {
145 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
146}
147
148pub fn handle_fresh_with_task(
150 cache: &mut SessionCache,
151 path: &str,
152 mode: &str,
153 crp_mode: CrpMode,
154 task: Option<&str>,
155) -> String {
156 handle_with_options(cache, path, mode, true, crp_mode, task)
157}
158
159pub fn handle_fresh_with_task_resolved(
161 cache: &mut SessionCache,
162 path: &str,
163 mode: &str,
164 crp_mode: CrpMode,
165 task: Option<&str>,
166) -> ReadOutput {
167 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
168}
169
170fn handle_with_options(
171 cache: &mut SessionCache,
172 path: &str,
173 mode: &str,
174 fresh: bool,
175 crp_mode: CrpMode,
176 task: Option<&str>,
177) -> String {
178 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
179}
180
181fn is_subagent_context() -> bool {
184 static IS_SUBAGENT: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
185 *IS_SUBAGENT.get_or_init(|| {
186 if std::env::var("LEAN_CTX_FORCE_FRESH").is_ok_and(|v| v == "1" || v == "true") {
187 return true;
188 }
189 std::env::var("CURSOR_TASK_ID").is_ok_and(|v| !v.is_empty())
190 })
191}
192
193fn handle_with_options_resolved(
194 cache: &mut SessionCache,
195 path: &str,
196 mode: &str,
197 fresh: bool,
198 crp_mode: CrpMode,
199 task: Option<&str>,
200) -> ReadOutput {
201 let effective_fresh = fresh || is_subagent_context();
202
203 if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
204 bt.next_seq();
205 }
206 let mut result = handle_with_options_inner(cache, path, mode, effective_fresh, crp_mode, task);
207
208 if let Some(entry) = cache.get_mut(path) {
209 entry.last_mode.clone_from(&result.resolved_mode);
210 }
211
212 let dedup_allowed = matches!(
213 result.resolved_mode.as_str(),
214 "map" | "signatures" | "aggressive" | "entropy" | "task"
215 );
216 if dedup_allowed {
217 if let Some(deduped) = cache.apply_dedup(path, &result.content) {
218 let new_tokens = count_tokens(&deduped);
219 if new_tokens < result.output_tokens {
220 result.content = deduped;
221 result.output_tokens = new_tokens;
222 }
223 }
224 }
225
226 if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
227 let original_tokens = cache.get(path).map_or(0, |e| e.original_tokens);
228 bt.record_read(
229 path,
230 &result.resolved_mode,
231 result.output_tokens,
232 original_tokens,
233 );
234 }
235
236 result
237}
238
239fn handle_with_options_inner(
240 cache: &mut SessionCache,
241 path: &str,
242 mode: &str,
243 fresh: bool,
244 crp_mode: CrpMode,
245 task: Option<&str>,
246) -> ReadOutput {
247 let file_ref = cache.get_file_ref(path);
248 let short = protocol::shorten_path(path);
249 let ext = Path::new(path)
250 .extension()
251 .and_then(|e| e.to_str())
252 .unwrap_or("");
253
254 if fresh {
255 if mode == "diff" {
256 let warning = "[warning] fresh+diff is redundant — fresh invalidates cache, no diff possible. Use mode=full with fresh=true instead.";
257 return ReadOutput {
258 content: warning.to_string(),
259 resolved_mode: "diff".into(),
260 output_tokens: count_tokens(warning),
261 };
262 }
263 cache.invalidate(path);
264 }
265
266 if mode == "diff" {
267 let (out, _) = handle_diff(cache, path, &file_ref);
268 let out = crate::core::redaction::redact_text_if_enabled(&out);
269 let sent = count_tokens(&out);
270 return ReadOutput {
271 content: out,
272 resolved_mode: "diff".into(),
273 output_tokens: sent,
274 };
275 }
276
277 if mode != "full" {
278 if let Some(existing) = cache.get(path) {
279 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
280 if stale {
281 cache.invalidate(path);
282 }
283 }
284 }
285
286 if let Some(existing) = cache.get(path) {
287 if mode == "full" {
288 let (out, _) = handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
289 let out = crate::core::redaction::redact_text_if_enabled(&out);
290 let sent = count_tokens(&out);
291 return ReadOutput {
292 content: out,
293 resolved_mode: "full".into(),
294 output_tokens: sent,
295 };
296 }
297 let original_tokens = existing.original_tokens;
298 let content_opt = existing.content();
299 if let Some(content) = content_opt {
300 let resolved_mode = if mode == "auto" {
301 resolve_auto_mode(path, original_tokens, task)
302 } else {
303 mode.to_string()
304 };
305 if is_cacheable_mode(&resolved_mode) {
306 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
307 if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
308 let out = crate::core::redaction::redact_text_if_enabled(cached_output);
309 let sent = count_tokens(&out);
310 return ReadOutput {
311 content: out,
312 resolved_mode,
313 output_tokens: sent,
314 };
315 }
316 }
317 let (out, _) = process_mode(
318 &content,
319 &resolved_mode,
320 &file_ref,
321 &short,
322 ext,
323 original_tokens,
324 crp_mode,
325 path,
326 task,
327 );
328 if is_cacheable_mode(&resolved_mode) {
329 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
330 cache.set_compressed(path, &cache_key, out.clone());
331 }
332 let out = crate::core::redaction::redact_text_if_enabled(&out);
333 let sent = count_tokens(&out);
334 return ReadOutput {
335 content: out,
336 resolved_mode,
337 output_tokens: sent,
338 };
339 }
340 cache.invalidate(path);
341 }
342
343 let content = match read_file_lossy(path) {
344 Ok(c) => c,
345 Err(e) => {
346 let msg = format!("ERROR: {e}");
347 let tokens = count_tokens(&msg);
348 return ReadOutput {
349 content: msg,
350 resolved_mode: "error".into(),
351 output_tokens: tokens,
352 };
353 }
354 };
355
356 let is_line_range = mode.starts_with("lines:");
358 let hints = crate::core::profiles::active_profile().output_hints;
359 let similar_hint = if !is_line_range && hints.semantic_hint() {
360 find_similar_and_update_semantic_index(path, &content)
361 } else {
362 None
363 };
364 let graph_hint = if !is_line_range && hints.related_hint() {
365 build_graph_related_hint(path)
366 } else {
367 None
368 };
369
370 let store_result = cache.store(path, &content);
371
372 if mode == "full" {
373 cache.mark_full_delivered(path);
374 let (mut output, _) = format_full_output(
375 &file_ref,
376 &short,
377 ext,
378 &content,
379 store_result.original_tokens,
380 store_result.line_count,
381 task,
382 );
383 if let Some(hint) = &graph_hint {
384 output.push_str(&format!("\n{hint}"));
385 }
386 if let Some(hint) = similar_hint {
387 output.push_str(&format!("\n{hint}"));
388 }
389 let output = crate::core::redaction::redact_text_if_enabled(&output);
390 let sent = count_tokens(&output);
391 return ReadOutput {
392 content: output,
393 resolved_mode: "full".into(),
394 output_tokens: sent,
395 };
396 }
397
398 let resolved_mode = if mode == "auto" {
399 resolve_auto_mode(path, store_result.original_tokens, task)
400 } else {
401 mode.to_string()
402 };
403
404 let (mut output, _sent) = process_mode(
405 &content,
406 &resolved_mode,
407 &file_ref,
408 &short,
409 ext,
410 store_result.original_tokens,
411 crp_mode,
412 path,
413 task,
414 );
415 if let Some(hint) = &graph_hint {
416 output.push_str(&format!("\n{hint}"));
417 }
418 if let Some(hint) = similar_hint {
419 output.push_str(&format!("\n{hint}"));
420 }
421 if is_cacheable_mode(&resolved_mode) {
422 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
423 cache.set_compressed(path, &cache_key, output.clone());
424 }
425 let output = crate::core::redaction::redact_text_if_enabled(&output);
426 let final_tokens = count_tokens(&output);
427 ReadOutput {
428 content: output,
429 resolved_mode,
430 output_tokens: final_tokens,
431 }
432}
433
434pub fn is_instruction_file(path: &str) -> bool {
435 let lower = path.to_lowercase();
436 let filename = std::path::Path::new(&lower)
437 .file_name()
438 .and_then(|f| f.to_str())
439 .unwrap_or("");
440
441 matches!(
442 filename,
443 "skill.md"
444 | "agents.md"
445 | "rules.md"
446 | ".cursorrules"
447 | ".clinerules"
448 | "lean-ctx.md"
449 | "lean-ctx.mdc"
450 ) || lower.contains("/skills/")
451 || lower.contains("/.cursor/rules/")
452 || lower.contains("/.claude/rules/")
453 || lower.contains("/agents.md")
454}
455
456fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
457 if is_instruction_file(file_path) {
458 return "full".to_string();
459 }
460
461 if let Ok(bt) = crate::core::bounce_tracker::global().lock() {
462 if bt.should_force_full(file_path) {
463 return "full".to_string();
464 }
465 }
466
467 let intent_query = task.unwrap_or("read");
468 let route = crate::core::intent_router::route_v1(intent_query);
469 let intent_mode = &route.decision.effective_read_mode;
470 if intent_mode != "auto" && intent_mode != "reference" {
471 return intent_mode.clone();
472 }
473
474 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
476 let predictor = crate::core::mode_predictor::ModePredictor::new();
477 let mut predicted = predictor
478 .predict_best_mode(&sig)
479 .unwrap_or_else(|| "full".to_string());
480 if predicted == "auto" {
481 predicted = "full".to_string();
482 }
483
484 if predicted != "full" {
487 if let Some(project_root) =
488 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
489 {
490 let ext = std::path::Path::new(file_path)
491 .extension()
492 .and_then(|e| e.to_str())
493 .unwrap_or("");
494 let bucket = match original_tokens {
495 0..=2000 => "sm",
496 2001..=10000 => "md",
497 10001..=50000 => "lg",
498 _ => "xl",
499 };
500 let bandit_key = format!("{ext}_{bucket}");
501 let mut store = crate::core::bandit::BanditStore::load(&project_root);
502 let bandit = store.get_or_create(&bandit_key);
503 let arm = bandit.select_arm();
504 if arm.budget_ratio < 0.25 && original_tokens > 2000 {
505 predicted = "aggressive".to_string();
506 }
507 }
508 }
509
510 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
512 let chosen = policy.choose_auto_mode(task, &predicted);
513
514 if original_tokens > 2000 {
515 if predicted == "map" || predicted == "signatures" {
516 if chosen != "map" && chosen != "signatures" {
517 return predicted;
518 }
519 } else if chosen == "full" && predicted != "full" {
520 return predicted;
521 }
522 }
523
524 chosen
525}
526
527fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
528 const MAX_CONTENT_BYTES_FOR_SEMANTIC: usize = 32_768;
529
530 if content.len() > MAX_CONTENT_BYTES_FOR_SEMANTIC {
531 return None;
532 }
533
534 let cfg = crate::core::config::Config::load();
535 let profile = crate::core::config::MemoryProfile::effective(&cfg);
536 if !profile.semantic_cache_enabled() {
537 return None;
538 }
539
540 let project_root = detect_project_root(path);
541 let session_id = format!("{}", std::process::id());
542 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
543
544 let similar = index.find_similar(content, 0.7);
545 let relevant: Vec<_> = similar
546 .into_iter()
547 .filter(|(p, _)| p != path)
548 .take(3)
549 .collect();
550
551 index.add_file(path, content, &session_id);
552 let _ = index.save(&project_root);
553
554 if relevant.is_empty() {
555 return None;
556 }
557
558 let hints: Vec<String> = relevant
559 .iter()
560 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
561 .collect();
562
563 Some(format!(
564 "[semantic: {} similar file(s) in cache]\n{}",
565 relevant.len(),
566 hints.join("\n")
567 ))
568}
569
570fn detect_project_root(path: &str) -> String {
571 crate::core::protocol::detect_project_root_or_cwd(path)
572}
573
574fn build_graph_related_hint(path: &str) -> Option<String> {
575 let project_root = detect_project_root(path);
576 crate::core::graph_context::build_related_hint(path, &project_root, 5)
577}
578
579const AUTO_DELTA_THRESHOLD: f64 = 0.6;
580
581fn handle_full_with_auto_delta(
583 cache: &mut SessionCache,
584 path: &str,
585 file_ref: &str,
586 short: &str,
587 ext: &str,
588 task: Option<&str>,
589) -> (String, usize) {
590 let Ok(disk_content) = read_file_lossy(path) else {
591 cache.record_cache_hit(path);
592 if let Some(existing) = cache.get(path) {
593 if !crate::core::protocol::meta_visible() {
594 if let Some(cached) = existing.content() {
595 return format_full_output(
596 file_ref,
597 short,
598 ext,
599 &cached,
600 existing.original_tokens,
601 existing.line_count,
602 task,
603 );
604 }
605 }
606 let out = format!(
607 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
608 existing.read_count, existing.line_count
609 );
610 let sent = count_tokens(&out);
611 return (out, sent);
612 }
613 let out = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
614 format!("[file read failed and no cached version available] {file_ref}={short}")
615 } else {
616 format!("[file read failed and no cached version available] {short}")
617 };
618 let sent = count_tokens(&out);
619 return (out, sent);
620 };
621
622 let old_content = cache
623 .get(path)
624 .and_then(crate::core::cache::CacheEntry::content)
625 .unwrap_or_default();
626 let store_result = cache.store(path, &disk_content);
627
628 if store_result.was_hit {
629 if store_result.full_content_delivered {
630 let out = if crate::core::protocol::meta_visible() {
631 format!(
632 "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
633 store_result.read_count, store_result.line_count
634 )
635 } else {
636 format!(
637 "{file_ref}={short} [unchanged, {}L, use cached context]",
638 store_result.line_count
639 )
640 };
641 let sent = count_tokens(&out);
642 return (out, sent);
643 }
644 cache.mark_full_delivered(path);
645 return format_full_output(
646 file_ref,
647 short,
648 ext,
649 &disk_content,
650 store_result.original_tokens,
651 store_result.line_count,
652 task,
653 );
654 }
655
656 let diff = compressor::diff_content(&old_content, &disk_content);
657 let diff_tokens = count_tokens(&diff);
658 let full_tokens = store_result.original_tokens;
659
660 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
661 let savings = protocol::format_savings(full_tokens, diff_tokens);
662 let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
663 format!("{file_ref}={short}")
664 } else {
665 short.to_string()
666 };
667 let out = format!(
668 "{head} [auto-delta] ∆{}L\n{diff}\n{savings}",
669 disk_content.lines().count()
670 );
671 return (out, diff_tokens);
672 }
673
674 format_full_output(
675 file_ref,
676 short,
677 ext,
678 &disk_content,
679 store_result.original_tokens,
680 store_result.line_count,
681 task,
682 )
683}
684
685fn format_full_output(
686 file_ref: &str,
687 short: &str,
688 ext: &str,
689 content: &str,
690 original_tokens: usize,
691 line_count: usize,
692 _task: Option<&str>,
693) -> (String, usize) {
694 let tokens = original_tokens;
695 let metadata = build_header(file_ref, short, ext, content, line_count, true);
696
697 let output = format!("{metadata}\n{content}");
698 let sent = count_tokens(&output);
699 (protocol::append_savings(&output, tokens, sent), sent)
700}
701
702fn build_header(
703 file_ref: &str,
704 short: &str,
705 ext: &str,
706 content: &str,
707 line_count: usize,
708 include_deps: bool,
709) -> String {
710 let mut header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
711 format!("{file_ref}={short} {line_count}L")
712 } else {
713 format!("{short} {line_count}L")
714 };
715
716 if include_deps {
717 let dep_info = deps::extract_deps(content, ext);
718 if !dep_info.imports.is_empty() {
719 let imports_str: Vec<&str> = dep_info
720 .imports
721 .iter()
722 .take(8)
723 .map(std::string::String::as_str)
724 .collect();
725 header.push_str(&format!("\n deps {}", imports_str.join(",")));
726 }
727 if !dep_info.exports.is_empty() {
728 let exports_str: Vec<&str> = dep_info
729 .exports
730 .iter()
731 .take(8)
732 .map(std::string::String::as_str)
733 .collect();
734 header.push_str(&format!("\n exports {}", exports_str.join(",")));
735 }
736 }
737
738 header
739}
740
741#[allow(clippy::too_many_arguments)]
742fn process_mode(
743 content: &str,
744 mode: &str,
745 file_ref: &str,
746 short: &str,
747 ext: &str,
748 original_tokens: usize,
749 crp_mode: CrpMode,
750 file_path: &str,
751 task: Option<&str>,
752) -> (String, usize) {
753 let line_count = content.lines().count();
754
755 match mode {
756 "auto" => {
757 let chosen = resolve_auto_mode(file_path, original_tokens, task);
758 process_mode(
759 content,
760 &chosen,
761 file_ref,
762 short,
763 ext,
764 original_tokens,
765 crp_mode,
766 file_path,
767 task,
768 )
769 }
770 "full" => format_full_output(
771 file_ref,
772 short,
773 ext,
774 content,
775 original_tokens,
776 line_count,
777 task,
778 ),
779 "signatures" => {
780 let sigs = signatures::extract_signatures(content, ext);
781 let dep_info = deps::extract_deps(content, ext);
782
783 let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
784 format!("{file_ref}={short} {line_count}L")
785 } else {
786 format!("{short} {line_count}L")
787 };
788 if !dep_info.imports.is_empty() {
789 let imports_str: Vec<&str> = dep_info
790 .imports
791 .iter()
792 .take(8)
793 .map(std::string::String::as_str)
794 .collect();
795 output.push_str(&format!("\n deps {}", imports_str.join(",")));
796 }
797 for sig in &sigs {
798 output.push('\n');
799 if crp_mode.is_tdd() {
800 output.push_str(&sig.to_tdd());
801 } else {
802 output.push_str(&sig.to_compact());
803 }
804 }
805 let sent = count_tokens(&output);
806 (
807 append_compressed_hint(
808 &protocol::append_savings(&output, original_tokens, sent),
809 file_path,
810 ),
811 sent,
812 )
813 }
814 "map" => {
815 if ext == "php" {
816 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
817 {
818 let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
819 format!("{file_ref}={short} {line_count}L\n{php_map}")
820 } else {
821 format!("{short} {line_count}L\n{php_map}")
822 };
823 let sent = count_tokens(&output);
824 let output = protocol::append_savings(&output, original_tokens, sent);
825 return (append_compressed_hint(&output, file_path), sent);
826 }
827 }
828
829 let sigs = signatures::extract_signatures(content, ext);
830 let dep_info = deps::extract_deps(content, ext);
831
832 let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
833 format!("{file_ref}={short} {line_count}L")
834 } else {
835 format!("{short} {line_count}L")
836 };
837
838 if !dep_info.imports.is_empty() {
839 output.push_str("\n deps: ");
840 output.push_str(&dep_info.imports.join(", "));
841 }
842
843 if !dep_info.exports.is_empty() {
844 output.push_str("\n exports: ");
845 output.push_str(&dep_info.exports.join(", "));
846 }
847
848 let key_sigs: Vec<&signatures::Signature> = sigs
849 .iter()
850 .filter(|s| s.is_exported || s.indent == 0)
851 .collect();
852
853 if !key_sigs.is_empty() {
854 output.push_str("\n API:");
855 for sig in &key_sigs {
856 output.push_str("\n ");
857 if crp_mode.is_tdd() {
858 output.push_str(&sig.to_tdd());
859 } else {
860 output.push_str(&sig.to_compact());
861 }
862 }
863 }
864
865 let sent = count_tokens(&output);
866 (
867 append_compressed_hint(
868 &protocol::append_savings(&output, original_tokens, sent),
869 file_path,
870 ),
871 sent,
872 )
873 }
874 "aggressive" => {
875 #[cfg(feature = "tree-sitter")]
876 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
877 #[cfg(not(feature = "tree-sitter"))]
878 let ast_pruned: Option<String> = None;
879
880 let base = ast_pruned.as_deref().unwrap_or(content);
881
882 let session_intent = crate::core::session::SessionState::load_latest()
883 .and_then(|s| s.active_structured_intent);
884 let raw = if let Some(ref intent) = session_intent {
885 compressor::task_aware_compress(base, Some(ext), intent)
886 } else {
887 compressor::aggressive_compress(base, Some(ext))
888 };
889 let compressed = compressor::safeguard_ratio(content, &raw);
890 let header = build_header(file_ref, short, ext, content, line_count, true);
891
892 let mut sym = SymbolMap::new();
893 let idents = symbol_map::extract_identifiers(&compressed, ext);
894 for ident in &idents {
895 sym.register(ident);
896 }
897
898 if sym.len() >= 3 {
899 let sym_table = sym.format_table();
900 let sym_applied = sym.apply(&compressed);
901 let orig_tok = count_tokens(&compressed);
902 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
903 let net = orig_tok.saturating_sub(comp_tok);
904 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
905 let savings = protocol::format_savings(original_tokens, comp_tok);
906 return (
907 append_compressed_hint(
908 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
909 file_path,
910 ),
911 comp_tok,
912 );
913 }
914 let savings = protocol::format_savings(original_tokens, orig_tok);
915 return (
916 append_compressed_hint(
917 &format!("{header}\n{compressed}\n{savings}"),
918 file_path,
919 ),
920 orig_tok,
921 );
922 }
923
924 let sent = count_tokens(&compressed);
925 let savings = protocol::format_savings(original_tokens, sent);
926 (
927 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
928 sent,
929 )
930 }
931 "entropy" => {
932 let result = entropy::entropy_compress_adaptive(content, file_path);
933 let avg_h = entropy::analyze_entropy(content).avg_entropy;
934 let header = build_header(file_ref, short, ext, content, line_count, false);
935 let techs = result.techniques.join(", ");
936 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
937 let sent = count_tokens(&output);
938 let savings = protocol::format_savings(original_tokens, sent);
939 let compression_ratio = if original_tokens > 0 {
940 1.0 - (sent as f64 / original_tokens as f64)
941 } else {
942 0.0
943 };
944 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
945 (
946 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
947 sent,
948 )
949 }
950 "task" => {
951 let task_str = task.unwrap_or("");
952 if task_str.is_empty() {
953 let header = build_header(file_ref, short, ext, content, line_count, true);
954 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
955 let sent = count_tokens(&out);
956 return (out, sent);
957 }
958 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
959 if keywords.is_empty() {
960 let header = build_header(file_ref, short, ext, content, line_count, true);
961 let out = format!(
962 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
963 );
964 let sent = count_tokens(&out);
965 return (out, sent);
966 }
967 let filtered =
968 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
969 let filtered_lines = filtered.lines().count();
970 let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
971 format!("{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
972 } else {
973 format!("{short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
974 };
975 let graph_ctx = if crate::core::profiles::active_profile()
976 .output_hints
977 .graph_context_block()
978 {
979 let project_root = detect_project_root(file_path);
980 crate::core::graph_context::build_graph_context(
981 file_path,
982 &project_root,
983 Some(crate::core::graph_context::GraphContextOptions::default()),
984 )
985 .map(|c| crate::core::graph_context::format_graph_context(&c))
986 .unwrap_or_default()
987 } else {
988 String::new()
989 };
990
991 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
992 let savings = protocol::format_savings(original_tokens, sent);
993 (
994 append_compressed_hint(
995 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
996 file_path,
997 ),
998 sent,
999 )
1000 }
1001 "reference" => {
1002 let tok = count_tokens(content);
1003 let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1004 format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})")
1005 } else {
1006 format!("{short}: {line_count} lines, {tok} tok ({ext})")
1007 };
1008 let sent = count_tokens(&output);
1009 let savings = protocol::format_savings(original_tokens, sent);
1010 (format!("{output}\n{savings}"), sent)
1011 }
1012 mode if mode.starts_with("lines:") => {
1013 let range_str = &mode[6..];
1014 let extracted = extract_line_range(content, range_str);
1015 let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1016 format!("{file_ref}={short} {line_count}L lines:{range_str}")
1017 } else {
1018 format!("{short} {line_count}L lines:{range_str}")
1019 };
1020 let sent = count_tokens(&extracted);
1021 let savings = protocol::format_savings(original_tokens, sent);
1022 (format!("{header}\n{extracted}\n{savings}"), sent)
1023 }
1024 unknown => {
1025 let header = build_header(file_ref, short, ext, content, line_count, true);
1026 let out = format!(
1027 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
1028 );
1029 let sent = count_tokens(&out);
1030 (out, sent)
1031 }
1032 }
1033}
1034
1035fn extract_line_range(content: &str, range_str: &str) -> String {
1036 let lines: Vec<&str> = content.lines().collect();
1037 let total = lines.len();
1038 let mut selected = Vec::new();
1039
1040 for part in range_str.split(',') {
1041 let part = part.trim();
1042 if let Some((start_s, end_s)) = part.split_once('-') {
1043 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
1044 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
1045 for i in start..=end {
1046 if i >= 1 && i <= total {
1047 selected.push(format!("{i:>4}| {}", lines[i - 1]));
1048 }
1049 }
1050 } else if let Ok(n) = part.parse::<usize>() {
1051 if n >= 1 && n <= total {
1052 selected.push(format!("{n:>4}| {}", lines[n - 1]));
1053 }
1054 }
1055 }
1056
1057 if selected.is_empty() {
1058 "No lines matched the range.".to_string()
1059 } else {
1060 selected.join("\n")
1061 }
1062}
1063
1064fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
1065 let short = protocol::shorten_path(path);
1066 let old_content = cache
1067 .get(path)
1068 .and_then(crate::core::cache::CacheEntry::content);
1069
1070 let new_content = match read_file_lossy(path) {
1071 Ok(c) => c,
1072 Err(e) => {
1073 let msg = format!("ERROR: {e}");
1074 let tokens = count_tokens(&msg);
1075 return (msg, tokens);
1076 }
1077 };
1078
1079 let original_tokens = count_tokens(&new_content);
1080
1081 let diff_output = if let Some(old) = &old_content {
1082 compressor::diff_content(old, &new_content)
1083 } else {
1084 format!("[first read]\n{new_content}")
1085 };
1086
1087 cache.store(path, &new_content);
1088
1089 let sent = count_tokens(&diff_output);
1090 let savings = protocol::format_savings(original_tokens, sent);
1091 let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1092 format!("{file_ref}={short}")
1093 } else {
1094 short.clone()
1095 };
1096 (format!("{head} [diff]\n{diff_output}\n{savings}"), sent)
1097}
1098
1099#[cfg(test)]
1100mod tests {
1101 use super::*;
1102 use std::time::Duration;
1103
1104 #[test]
1105 fn test_header_toon_format_no_brackets() {
1106 let _lock = crate::core::data_dir::test_env_lock();
1107 std::env::set_var("LEAN_CTX_META", "1");
1108 let content = "use std::io;\nfn main() {}\n";
1109 let header = build_header("F1", "main.rs", "rs", content, 2, false);
1110 assert!(!header.contains('['));
1111 assert!(!header.contains(']'));
1112 assert!(header.contains("F1=main.rs 2L"));
1113 std::env::remove_var("LEAN_CTX_META");
1114 }
1115
1116 #[test]
1117 fn test_header_toon_deps_indented() {
1118 let _lock = crate::core::data_dir::test_env_lock();
1119 std::env::set_var("LEAN_CTX_META", "1");
1120 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
1121 let header = build_header("F1", "main.rs", "rs", content, 3, true);
1122 if header.contains("deps") {
1123 assert!(
1124 header.contains("\n deps "),
1125 "deps should use indented TOON format"
1126 );
1127 assert!(
1128 !header.contains("deps:["),
1129 "deps should not use bracket format"
1130 );
1131 }
1132 std::env::remove_var("LEAN_CTX_META");
1133 }
1134
1135 #[test]
1136 fn test_header_toon_saves_tokens() {
1137 let _lock = crate::core::data_dir::test_env_lock();
1138 std::env::set_var("LEAN_CTX_META", "1");
1139 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1140 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1141 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1142 let old_tokens = count_tokens(&old_header);
1143 let new_tokens = count_tokens(&new_header);
1144 assert!(
1145 new_tokens <= old_tokens,
1146 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1147 );
1148 std::env::remove_var("LEAN_CTX_META");
1149 }
1150
1151 #[test]
1152 fn test_tdd_symbols_are_compact() {
1153 let symbols = [
1154 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1155 ];
1156 for sym in &symbols {
1157 let tok = count_tokens(sym);
1158 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1159 }
1160 }
1161
1162 #[test]
1163 fn test_task_mode_filters_content() {
1164 let content = (0..200)
1165 .map(|i| {
1166 if i % 20 == 0 {
1167 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1168 } else {
1169 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1170 }
1171 })
1172 .collect::<Vec<_>>()
1173 .join("\n");
1174 let full_tokens = count_tokens(&content);
1175 let task = Some("fix bug in validate_token");
1176 let (result, result_tokens) = process_mode(
1177 &content,
1178 "task",
1179 "F1",
1180 "test.rs",
1181 "rs",
1182 full_tokens,
1183 CrpMode::Off,
1184 "test.rs",
1185 task,
1186 );
1187 assert!(
1188 result_tokens < full_tokens,
1189 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1190 );
1191 assert!(
1192 result.contains("task-filtered"),
1193 "output should contain task-filtered marker"
1194 );
1195 }
1196
1197 #[test]
1198 fn test_task_mode_without_task_returns_full() {
1199 let content = "fn main() {}\nfn helper() {}\n";
1200 let tokens = count_tokens(content);
1201 let (result, _sent) = process_mode(
1202 content,
1203 "task",
1204 "F1",
1205 "test.rs",
1206 "rs",
1207 tokens,
1208 CrpMode::Off,
1209 "test.rs",
1210 None,
1211 );
1212 assert!(
1213 result.contains("no task set"),
1214 "should indicate no task: {result}"
1215 );
1216 }
1217
1218 #[test]
1219 fn test_reference_mode_one_line() {
1220 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1221 let tokens = count_tokens(content);
1222 let (result, _sent) = process_mode(
1223 content,
1224 "reference",
1225 "F1",
1226 "test.rs",
1227 "rs",
1228 tokens,
1229 CrpMode::Off,
1230 "test.rs",
1231 None,
1232 );
1233 let lines: Vec<&str> = result.lines().collect();
1234 assert!(
1235 lines.len() <= 3,
1236 "reference mode should be very compact, got {} lines",
1237 lines.len()
1238 );
1239 assert!(result.contains("lines"), "should contain line count");
1240 assert!(result.contains("tok"), "should contain token count");
1241 }
1242
1243 #[test]
1244 fn cached_lines_mode_invalidates_on_mtime_change() {
1245 let dir = tempfile::tempdir().unwrap();
1246 let path = dir.path().join("file.txt");
1247 let p = path.to_string_lossy().to_string();
1248
1249 std::fs::write(&path, "one\nsecond\n").unwrap();
1250 let mut cache = SessionCache::new();
1251
1252 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1253 let l1: Vec<&str> = r1.content.lines().collect();
1254 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1255 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1256 assert_eq!(got1, "one");
1257
1258 std::thread::sleep(Duration::from_secs(1));
1259 std::fs::write(&path, "two\nsecond\n").unwrap();
1260
1261 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1262 let l2: Vec<&str> = r2.content.lines().collect();
1263 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1264 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1265 assert_eq!(got2, "two");
1266 }
1267
1268 #[test]
1269 #[cfg_attr(tarpaulin, ignore)]
1270 fn benchmark_task_conditioned_compression() {
1271 let content = generate_benchmark_code(200);
1273 let full_tokens = count_tokens(&content);
1274 let task = Some("fix authentication in validate_token");
1275
1276 let (_full_output, full_tok) = process_mode(
1277 &content,
1278 "full",
1279 "F1",
1280 "server.rs",
1281 "rs",
1282 full_tokens,
1283 CrpMode::Off,
1284 "server.rs",
1285 task,
1286 );
1287 let (_task_output, task_tok) = process_mode(
1288 &content,
1289 "task",
1290 "F1",
1291 "server.rs",
1292 "rs",
1293 full_tokens,
1294 CrpMode::Off,
1295 "server.rs",
1296 task,
1297 );
1298 let (_sig_output, sig_tok) = process_mode(
1299 &content,
1300 "signatures",
1301 "F1",
1302 "server.rs",
1303 "rs",
1304 full_tokens,
1305 CrpMode::Off,
1306 "server.rs",
1307 task,
1308 );
1309 let (_ref_output, ref_tok) = process_mode(
1310 &content,
1311 "reference",
1312 "F1",
1313 "server.rs",
1314 "rs",
1315 full_tokens,
1316 CrpMode::Off,
1317 "server.rs",
1318 task,
1319 );
1320
1321 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1322 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1323 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1324 eprintln!(
1325 " task: {task_tok:>6} tokens ({:.0}% savings)",
1326 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1327 );
1328 eprintln!(
1329 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1330 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1331 );
1332 eprintln!(
1333 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1334 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1335 );
1336 eprintln!("================================================\n");
1337
1338 assert!(task_tok < full_tok, "task mode should save tokens");
1339 assert!(sig_tok < full_tok, "signatures should save tokens");
1340 assert!(ref_tok < sig_tok, "reference should be most compact");
1341 }
1342
1343 fn generate_benchmark_code(lines: usize) -> String {
1344 let mut code = Vec::with_capacity(lines);
1345 code.push("use std::collections::HashMap;".to_string());
1346 code.push("use crate::core::auth;".to_string());
1347 code.push(String::new());
1348 code.push("pub struct Server {".to_string());
1349 code.push(" config: Config,".to_string());
1350 code.push(" cache: HashMap<String, String>,".to_string());
1351 code.push("}".to_string());
1352 code.push(String::new());
1353 code.push("impl Server {".to_string());
1354 code.push(
1355 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1356 .to_string(),
1357 );
1358 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1359 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1360 code.push(" return Err(AuthError::Expired);".to_string());
1361 code.push(" }".to_string());
1362 code.push(" Ok(decoded.claims)".to_string());
1363 code.push(" }".to_string());
1364 code.push(String::new());
1365
1366 let remaining = lines.saturating_sub(code.len());
1367 for i in 0..remaining {
1368 if i % 30 == 0 {
1369 code.push(format!(
1370 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1371 ));
1372 } else if i % 30 == 29 {
1373 code.push(" }".to_string());
1374 } else {
1375 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1376 }
1377 }
1378 code.push("}".to_string());
1379 code.join("\n")
1380 }
1381
1382 #[test]
1383 fn instruction_file_detection() {
1384 assert!(is_instruction_file(
1385 "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1386 ));
1387 assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1388 assert!(is_instruction_file("/project/AGENTS.md"));
1389 assert!(is_instruction_file("/project/.cursorrules"));
1390 assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1391 assert!(is_instruction_file("/skills/some-skill/README.md"));
1392
1393 assert!(!is_instruction_file("/project/src/main.rs"));
1394 assert!(!is_instruction_file("/project/config.json"));
1395 assert!(!is_instruction_file("/project/data/report.csv"));
1396 }
1397
1398 #[test]
1399 fn resolve_auto_mode_returns_full_for_instruction_files() {
1400 let mode = resolve_auto_mode(
1401 "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1402 5000,
1403 Some("read"),
1404 );
1405 assert_eq!(mode, "full", "SKILL.md must always be read in full");
1406
1407 let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1408 assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1409
1410 let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1411 assert_eq!(mode, "full", ".cursorrules must always be read in full");
1412 }
1413}