1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32 if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40 format!(
41 "{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
42 )
43}
44
45pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
49 if crate::core::binary_detect::is_binary_file(path) {
50 let msg = crate::core::binary_detect::binary_file_message(path);
51 return Err(std::io::Error::other(msg));
52 }
53
54 if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
55 if let Ok(cwd) = std::env::current_dir() {
56 let root = crate::core::pathjail::canonicalize_or_self(&cwd);
57 if !canonical.starts_with(&root) {
58 let allow = crate::core::pathjail::allow_paths_from_env_and_config();
59 let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
60 .ok()
61 .is_some_and(|d| canonical.starts_with(d));
62 let tmp_ok = canonical.starts_with(std::env::temp_dir());
63 if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
64 tracing::warn!(
65 "defense-in-depth: path may escape project root: {}",
66 canonical.display()
67 );
68 }
69 }
70 }
71 }
72
73 let cap = crate::core::limits::max_read_bytes();
74
75 let file = open_with_retry(path)?;
76 let meta = file
77 .metadata()
78 .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
79 if meta.len() > cap as u64 {
80 return Err(std::io::Error::other(format!(
81 "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
82 Increase the limit or use a line-range read: mode=\"lines:1-100\"",
83 meta.len(),
84 cap
85 )));
86 }
87
88 use std::io::Read;
89 let mut bytes = Vec::with_capacity(meta.len() as usize);
90 std::io::BufReader::new(file).read_to_end(&mut bytes)?;
91 match String::from_utf8(bytes) {
92 Ok(s) => Ok(s),
93 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
94 }
95}
96
97fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
100 match std::fs::File::open(path) {
101 Ok(f) => Ok(f),
102 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
103 std::thread::sleep(std::time::Duration::from_millis(50));
104 std::fs::File::open(path)
105 }
106 Err(e) => Err(e),
107 }
108}
109
110pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
112 handle_with_options(cache, path, mode, false, crp_mode, None)
113}
114
115pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
117 handle_with_options(cache, path, mode, true, crp_mode, None)
118}
119
120pub fn handle_with_task(
122 cache: &mut SessionCache,
123 path: &str,
124 mode: &str,
125 crp_mode: CrpMode,
126 task: Option<&str>,
127) -> String {
128 handle_with_options(cache, path, mode, false, crp_mode, task)
129}
130
131pub fn handle_with_task_resolved(
133 cache: &mut SessionCache,
134 path: &str,
135 mode: &str,
136 crp_mode: CrpMode,
137 task: Option<&str>,
138) -> ReadOutput {
139 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
140}
141
142pub fn handle_fresh_with_task(
144 cache: &mut SessionCache,
145 path: &str,
146 mode: &str,
147 crp_mode: CrpMode,
148 task: Option<&str>,
149) -> String {
150 handle_with_options(cache, path, mode, true, crp_mode, task)
151}
152
153pub fn handle_fresh_with_task_resolved(
155 cache: &mut SessionCache,
156 path: &str,
157 mode: &str,
158 crp_mode: CrpMode,
159 task: Option<&str>,
160) -> ReadOutput {
161 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
162}
163
164fn handle_with_options(
165 cache: &mut SessionCache,
166 path: &str,
167 mode: &str,
168 fresh: bool,
169 crp_mode: CrpMode,
170 task: Option<&str>,
171) -> String {
172 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
173}
174
175fn handle_with_options_resolved(
176 cache: &mut SessionCache,
177 path: &str,
178 mode: &str,
179 fresh: bool,
180 crp_mode: CrpMode,
181 task: Option<&str>,
182) -> ReadOutput {
183 if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
184 bt.next_seq();
185 }
186 let mut result = handle_with_options_inner(cache, path, mode, fresh, crp_mode, task);
187
188 if result.resolved_mode != "full" && result.resolved_mode != "diff" {
189 if let Some(deduped) = cache.apply_dedup(path, &result.content) {
190 let new_tokens = count_tokens(&deduped);
191 if new_tokens < result.output_tokens {
192 result.content = deduped;
193 result.output_tokens = new_tokens;
194 }
195 }
196 }
197
198 if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
199 let original_tokens = cache.get(path).map_or(0, |e| e.original_tokens);
200 bt.record_read(
201 path,
202 &result.resolved_mode,
203 result.output_tokens,
204 original_tokens,
205 );
206 }
207
208 result
209}
210
211fn handle_with_options_inner(
212 cache: &mut SessionCache,
213 path: &str,
214 mode: &str,
215 fresh: bool,
216 crp_mode: CrpMode,
217 task: Option<&str>,
218) -> ReadOutput {
219 let file_ref = cache.get_file_ref(path);
220 let short = protocol::shorten_path(path);
221 let ext = Path::new(path)
222 .extension()
223 .and_then(|e| e.to_str())
224 .unwrap_or("");
225
226 if fresh {
227 cache.invalidate(path);
228 }
229
230 if mode == "diff" {
231 let (out, sent) = handle_diff(cache, path, &file_ref);
232 return ReadOutput {
233 content: out,
234 resolved_mode: "diff".into(),
235 output_tokens: sent,
236 };
237 }
238
239 if mode != "full" {
240 if let Some(existing) = cache.get(path) {
241 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
242 if stale {
243 cache.invalidate(path);
244 }
245 }
246 }
247
248 if let Some(existing) = cache.get(path) {
249 if mode == "full" {
250 let (out, sent) =
251 handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
252 let out = crate::core::redaction::redact_text_if_enabled(&out);
253 return ReadOutput {
254 content: out,
255 resolved_mode: "full".into(),
256 output_tokens: sent,
257 };
258 }
259 let content = existing.content();
260 let original_tokens = existing.original_tokens;
261 let resolved_mode = if mode == "auto" {
262 resolve_auto_mode(path, original_tokens, task)
263 } else {
264 mode.to_string()
265 };
266 if is_cacheable_mode(&resolved_mode) {
267 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
268 if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
269 let sent = count_tokens(cached_output);
270 let out = crate::core::redaction::redact_text_if_enabled(cached_output);
271 return ReadOutput {
272 content: out,
273 resolved_mode,
274 output_tokens: sent,
275 };
276 }
277 }
278 let (out, sent) = process_mode(
279 &content,
280 &resolved_mode,
281 &file_ref,
282 &short,
283 ext,
284 original_tokens,
285 crp_mode,
286 path,
287 task,
288 );
289 if is_cacheable_mode(&resolved_mode) {
290 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
291 cache.set_compressed(path, &cache_key, out.clone());
292 }
293 let out = crate::core::redaction::redact_text_if_enabled(&out);
294 return ReadOutput {
295 content: out,
296 resolved_mode,
297 output_tokens: sent,
298 };
299 }
300
301 let content = match read_file_lossy(path) {
302 Ok(c) => c,
303 Err(e) => {
304 let msg = format!("ERROR: {e}");
305 let tokens = count_tokens(&msg);
306 return ReadOutput {
307 content: msg,
308 resolved_mode: "error".into(),
309 output_tokens: tokens,
310 };
311 }
312 };
313
314 let similar_hint = find_similar_and_update_semantic_index(path, &content);
315 let graph_hint = build_graph_related_hint(path);
316
317 let store_result = cache.store(path, &content);
318
319 if mode == "full" {
320 cache.mark_full_delivered(path);
321 let (mut output, sent) = format_full_output(
322 &file_ref,
323 &short,
324 ext,
325 &content,
326 store_result.original_tokens,
327 store_result.line_count,
328 task,
329 );
330 if let Some(hint) = &graph_hint {
331 output.push_str(&format!("\n{hint}"));
332 }
333 if let Some(hint) = similar_hint {
334 output.push_str(&format!("\n{hint}"));
335 }
336 let output = crate::core::redaction::redact_text_if_enabled(&output);
337 return ReadOutput {
338 content: output,
339 resolved_mode: "full".into(),
340 output_tokens: sent,
341 };
342 }
343
344 let resolved_mode = if mode == "auto" {
345 resolve_auto_mode(path, store_result.original_tokens, task)
346 } else {
347 mode.to_string()
348 };
349
350 let (mut output, _sent) = process_mode(
351 &content,
352 &resolved_mode,
353 &file_ref,
354 &short,
355 ext,
356 store_result.original_tokens,
357 crp_mode,
358 path,
359 task,
360 );
361 if is_cacheable_mode(&resolved_mode) {
362 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
363 cache.set_compressed(path, &cache_key, output.clone());
364 }
365 if let Some(hint) = &graph_hint {
366 output.push_str(&format!("\n{hint}"));
367 }
368 if let Some(hint) = similar_hint {
369 output.push_str(&format!("\n{hint}"));
370 }
371 let output = crate::core::redaction::redact_text_if_enabled(&output);
372 let final_tokens = count_tokens(&output);
373 ReadOutput {
374 content: output,
375 resolved_mode,
376 output_tokens: final_tokens,
377 }
378}
379
380pub fn is_instruction_file(path: &str) -> bool {
381 let lower = path.to_lowercase();
382 let filename = std::path::Path::new(&lower)
383 .file_name()
384 .and_then(|f| f.to_str())
385 .unwrap_or("");
386
387 matches!(
388 filename,
389 "skill.md"
390 | "agents.md"
391 | "rules.md"
392 | ".cursorrules"
393 | ".clinerules"
394 | "lean-ctx.md"
395 | "lean-ctx.mdc"
396 ) || lower.contains("/skills/")
397 || lower.contains("/.cursor/rules/")
398 || lower.contains("/.claude/rules/")
399 || lower.contains("/agents.md")
400}
401
402fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
403 if is_instruction_file(file_path) {
404 return "full".to_string();
405 }
406
407 if let Ok(bt) = crate::core::bounce_tracker::global().lock() {
408 if bt.should_force_full(file_path) {
409 return "full".to_string();
410 }
411 }
412
413 let intent_query = task.unwrap_or("read");
414 let route = crate::core::intent_router::route_v1(intent_query);
415 let intent_mode = &route.decision.effective_read_mode;
416 if intent_mode != "auto" && intent_mode != "reference" {
417 return intent_mode.clone();
418 }
419
420 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
422 let predictor = crate::core::mode_predictor::ModePredictor::new();
423 let mut predicted = predictor
424 .predict_best_mode(&sig)
425 .unwrap_or_else(|| "full".to_string());
426 if predicted == "auto" {
427 predicted = "full".to_string();
428 }
429
430 if let Some(project_root) =
432 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
433 {
434 let ext = std::path::Path::new(file_path)
435 .extension()
436 .and_then(|e| e.to_str())
437 .unwrap_or("");
438 let bucket = match original_tokens {
439 0..=2000 => "sm",
440 2001..=10000 => "md",
441 10001..=50000 => "lg",
442 _ => "xl",
443 };
444 let bandit_key = format!("{ext}_{bucket}");
445 let mut store = crate::core::bandit::BanditStore::load(&project_root);
446 let bandit = store.get_or_create(&bandit_key);
447 let arm = bandit.select_arm();
448 if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
449 predicted = "aggressive".to_string();
450 }
451 }
452
453 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
455 let chosen = policy.choose_auto_mode(task, &predicted);
456
457 if original_tokens > 2000 {
458 if predicted == "map" || predicted == "signatures" {
459 if chosen != "map" && chosen != "signatures" {
460 return predicted;
461 }
462 } else if chosen == "full" && predicted != "full" {
463 return predicted;
464 }
465 }
466
467 chosen
468}
469
470fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
471 const MAX_CONTENT_BYTES_FOR_SEMANTIC: usize = 32_768;
472
473 if content.len() > MAX_CONTENT_BYTES_FOR_SEMANTIC {
474 return None;
475 }
476
477 let cfg = crate::core::config::Config::load();
478 let profile = crate::core::config::MemoryProfile::effective(&cfg);
479 if !profile.semantic_cache_enabled() {
480 return None;
481 }
482
483 let project_root = detect_project_root(path);
484 let session_id = format!("{}", std::process::id());
485 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
486
487 let similar = index.find_similar(content, 0.7);
488 let relevant: Vec<_> = similar
489 .into_iter()
490 .filter(|(p, _)| p != path)
491 .take(3)
492 .collect();
493
494 index.add_file(path, content, &session_id);
495 let _ = index.save(&project_root);
496
497 if relevant.is_empty() {
498 return None;
499 }
500
501 let hints: Vec<String> = relevant
502 .iter()
503 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
504 .collect();
505
506 Some(format!(
507 "[semantic: {} similar file(s) in cache]\n{}",
508 relevant.len(),
509 hints.join("\n")
510 ))
511}
512
513fn detect_project_root(path: &str) -> String {
514 crate::core::protocol::detect_project_root_or_cwd(path)
515}
516
517fn build_graph_related_hint(path: &str) -> Option<String> {
518 let project_root = detect_project_root(path);
519 crate::core::graph_context::build_related_hint(path, &project_root, 5)
520}
521
522const AUTO_DELTA_THRESHOLD: f64 = 0.6;
523
524fn handle_full_with_auto_delta(
526 cache: &mut SessionCache,
527 path: &str,
528 file_ref: &str,
529 short: &str,
530 ext: &str,
531 task: Option<&str>,
532) -> (String, usize) {
533 let Ok(disk_content) = read_file_lossy(path) else {
534 cache.record_cache_hit(path);
535 if let Some(existing) = cache.get(path) {
536 if !crate::core::protocol::meta_visible() {
537 let cached = existing.content();
538 return format_full_output(
539 file_ref,
540 short,
541 ext,
542 &cached,
543 existing.original_tokens,
544 existing.line_count,
545 task,
546 );
547 }
548 let out = format!(
549 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
550 existing.read_count, existing.line_count
551 );
552 let sent = count_tokens(&out);
553 return (out, sent);
554 }
555 let out = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
556 format!("[file read failed and no cached version available] {file_ref}={short}")
557 } else {
558 format!("[file read failed and no cached version available] {short}")
559 };
560 let sent = count_tokens(&out);
561 return (out, sent);
562 };
563
564 let old_content = cache
565 .get(path)
566 .map(crate::core::cache::CacheEntry::content)
567 .unwrap_or_default();
568 let store_result = cache.store(path, &disk_content);
569
570 if store_result.was_hit {
571 if store_result.full_content_delivered {
572 if crate::core::protocol::meta_visible() {
573 let out = format!(
574 "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
575 store_result.read_count, store_result.line_count
576 );
577 let sent = count_tokens(&out);
578 return (out, sent);
579 }
580 return (String::new(), 0);
581 }
582 cache.mark_full_delivered(path);
583 return format_full_output(
584 file_ref,
585 short,
586 ext,
587 &disk_content,
588 store_result.original_tokens,
589 store_result.line_count,
590 task,
591 );
592 }
593
594 let diff = compressor::diff_content(&old_content, &disk_content);
595 let diff_tokens = count_tokens(&diff);
596 let full_tokens = store_result.original_tokens;
597
598 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
599 let savings = protocol::format_savings(full_tokens, diff_tokens);
600 let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
601 format!("{file_ref}={short}")
602 } else {
603 short.to_string()
604 };
605 let out = format!(
606 "{head} [auto-delta] ∆{}L\n{diff}\n{savings}",
607 disk_content.lines().count()
608 );
609 return (out, diff_tokens);
610 }
611
612 format_full_output(
613 file_ref,
614 short,
615 ext,
616 &disk_content,
617 store_result.original_tokens,
618 store_result.line_count,
619 task,
620 )
621}
622
623fn format_full_output(
624 file_ref: &str,
625 short: &str,
626 ext: &str,
627 content: &str,
628 original_tokens: usize,
629 line_count: usize,
630 task: Option<&str>,
631) -> (String, usize) {
632 let tokens = original_tokens;
633 let metadata = build_header(file_ref, short, ext, content, line_count, true);
634
635 let mut reordered: Option<String> = None;
636 {
637 let profile = crate::core::profiles::active_profile();
638 let cfg = profile.layout;
639 if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
640 let task_str = task.unwrap_or("");
641 if !task_str.is_empty() {
642 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
643 let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
644 content, &keywords, &cfg,
645 );
646 if !r.skipped && r.changed {
647 reordered = Some(r.output);
648 }
649 }
650 }
651 }
652
653 let content_for_output = reordered.as_deref().unwrap_or(content);
654
655 let mut sym = SymbolMap::new();
656 let idents = symbol_map::extract_identifiers(content_for_output, ext);
657 for ident in &idents {
658 sym.register(ident);
659 }
660
661 if sym.len() >= 3 {
662 let sym_table = sym.format_table();
663 let compressed = sym.apply(content_for_output);
664 let original_tok = count_tokens(content_for_output);
665 let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
666 let net_saving = original_tok.saturating_sub(compressed_tok);
667 if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
668 let output = format!("{metadata}\n{compressed}{sym_table}");
669 let sent = count_tokens(&output);
670 return (protocol::append_savings(&output, tokens, sent), sent);
671 }
672 }
673
674 let output = format!("{metadata}\n{content_for_output}");
675 let sent = count_tokens(&output);
676 (protocol::append_savings(&output, tokens, sent), sent)
677}
678
679fn build_header(
680 file_ref: &str,
681 short: &str,
682 ext: &str,
683 content: &str,
684 line_count: usize,
685 include_deps: bool,
686) -> String {
687 let mut header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
688 format!("{file_ref}={short} {line_count}L")
689 } else {
690 format!("{short} {line_count}L")
691 };
692
693 if include_deps {
694 let dep_info = deps::extract_deps(content, ext);
695 if !dep_info.imports.is_empty() {
696 let imports_str: Vec<&str> = dep_info
697 .imports
698 .iter()
699 .take(8)
700 .map(std::string::String::as_str)
701 .collect();
702 header.push_str(&format!("\n deps {}", imports_str.join(",")));
703 }
704 if !dep_info.exports.is_empty() {
705 let exports_str: Vec<&str> = dep_info
706 .exports
707 .iter()
708 .take(8)
709 .map(std::string::String::as_str)
710 .collect();
711 header.push_str(&format!("\n exports {}", exports_str.join(",")));
712 }
713 }
714
715 header
716}
717
718#[allow(clippy::too_many_arguments)]
719fn process_mode(
720 content: &str,
721 mode: &str,
722 file_ref: &str,
723 short: &str,
724 ext: &str,
725 original_tokens: usize,
726 crp_mode: CrpMode,
727 file_path: &str,
728 task: Option<&str>,
729) -> (String, usize) {
730 let line_count = content.lines().count();
731
732 match mode {
733 "auto" => {
734 let chosen = resolve_auto_mode(file_path, original_tokens, task);
735 process_mode(
736 content,
737 &chosen,
738 file_ref,
739 short,
740 ext,
741 original_tokens,
742 crp_mode,
743 file_path,
744 task,
745 )
746 }
747 "full" => format_full_output(
748 file_ref,
749 short,
750 ext,
751 content,
752 original_tokens,
753 line_count,
754 task,
755 ),
756 "signatures" => {
757 let sigs = signatures::extract_signatures(content, ext);
758 let dep_info = deps::extract_deps(content, ext);
759
760 let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
761 format!("{file_ref}={short} {line_count}L")
762 } else {
763 format!("{short} {line_count}L")
764 };
765 if !dep_info.imports.is_empty() {
766 let imports_str: Vec<&str> = dep_info
767 .imports
768 .iter()
769 .take(8)
770 .map(std::string::String::as_str)
771 .collect();
772 output.push_str(&format!("\n deps {}", imports_str.join(",")));
773 }
774 for sig in &sigs {
775 output.push('\n');
776 if crp_mode.is_tdd() {
777 output.push_str(&sig.to_tdd());
778 } else {
779 output.push_str(&sig.to_compact());
780 }
781 }
782 let sent = count_tokens(&output);
783 (
784 append_compressed_hint(
785 &protocol::append_savings(&output, original_tokens, sent),
786 file_path,
787 ),
788 sent,
789 )
790 }
791 "map" => {
792 if ext == "php" {
793 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
794 {
795 let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
796 format!("{file_ref}={short} {line_count}L\n{php_map}")
797 } else {
798 format!("{short} {line_count}L\n{php_map}")
799 };
800 let sent = count_tokens(&output);
801 let output = protocol::append_savings(&output, original_tokens, sent);
802 return (append_compressed_hint(&output, file_path), sent);
803 }
804 }
805
806 let sigs = signatures::extract_signatures(content, ext);
807 let dep_info = deps::extract_deps(content, ext);
808
809 let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
810 format!("{file_ref}={short} {line_count}L")
811 } else {
812 format!("{short} {line_count}L")
813 };
814
815 if !dep_info.imports.is_empty() {
816 output.push_str("\n deps: ");
817 output.push_str(&dep_info.imports.join(", "));
818 }
819
820 if !dep_info.exports.is_empty() {
821 output.push_str("\n exports: ");
822 output.push_str(&dep_info.exports.join(", "));
823 }
824
825 let key_sigs: Vec<&signatures::Signature> = sigs
826 .iter()
827 .filter(|s| s.is_exported || s.indent == 0)
828 .collect();
829
830 if !key_sigs.is_empty() {
831 output.push_str("\n API:");
832 for sig in &key_sigs {
833 output.push_str("\n ");
834 if crp_mode.is_tdd() {
835 output.push_str(&sig.to_tdd());
836 } else {
837 output.push_str(&sig.to_compact());
838 }
839 }
840 }
841
842 let sent = count_tokens(&output);
843 (
844 append_compressed_hint(
845 &protocol::append_savings(&output, original_tokens, sent),
846 file_path,
847 ),
848 sent,
849 )
850 }
851 "aggressive" => {
852 #[cfg(feature = "tree-sitter")]
853 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
854 #[cfg(not(feature = "tree-sitter"))]
855 let ast_pruned: Option<String> = None;
856
857 let base = ast_pruned.as_deref().unwrap_or(content);
858
859 let session_intent = crate::core::session::SessionState::load_latest()
860 .and_then(|s| s.active_structured_intent);
861 let raw = if let Some(ref intent) = session_intent {
862 compressor::task_aware_compress(base, Some(ext), intent)
863 } else {
864 compressor::aggressive_compress(base, Some(ext))
865 };
866 let compressed = compressor::safeguard_ratio(content, &raw);
867 let header = build_header(file_ref, short, ext, content, line_count, true);
868
869 let mut sym = SymbolMap::new();
870 let idents = symbol_map::extract_identifiers(&compressed, ext);
871 for ident in &idents {
872 sym.register(ident);
873 }
874
875 if sym.len() >= 3 {
876 let sym_table = sym.format_table();
877 let sym_applied = sym.apply(&compressed);
878 let orig_tok = count_tokens(&compressed);
879 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
880 let net = orig_tok.saturating_sub(comp_tok);
881 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
882 let savings = protocol::format_savings(original_tokens, comp_tok);
883 return (
884 append_compressed_hint(
885 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
886 file_path,
887 ),
888 comp_tok,
889 );
890 }
891 let savings = protocol::format_savings(original_tokens, orig_tok);
892 return (
893 append_compressed_hint(
894 &format!("{header}\n{compressed}\n{savings}"),
895 file_path,
896 ),
897 orig_tok,
898 );
899 }
900
901 let sent = count_tokens(&compressed);
902 let savings = protocol::format_savings(original_tokens, sent);
903 (
904 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
905 sent,
906 )
907 }
908 "entropy" => {
909 let result = entropy::entropy_compress_adaptive(content, file_path);
910 let avg_h = entropy::analyze_entropy(content).avg_entropy;
911 let header = build_header(file_ref, short, ext, content, line_count, false);
912 let techs = result.techniques.join(", ");
913 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
914 let sent = count_tokens(&output);
915 let savings = protocol::format_savings(original_tokens, sent);
916 let compression_ratio = if original_tokens > 0 {
917 1.0 - (sent as f64 / original_tokens as f64)
918 } else {
919 0.0
920 };
921 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
922 (
923 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
924 sent,
925 )
926 }
927 "task" => {
928 let task_str = task.unwrap_or("");
929 if task_str.is_empty() {
930 let header = build_header(file_ref, short, ext, content, line_count, true);
931 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
932 let sent = count_tokens(&out);
933 return (out, sent);
934 }
935 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
936 if keywords.is_empty() {
937 let header = build_header(file_ref, short, ext, content, line_count, true);
938 let out = format!(
939 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
940 );
941 let sent = count_tokens(&out);
942 return (out, sent);
943 }
944 let filtered =
945 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
946 let filtered_lines = filtered.lines().count();
947 let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
948 format!("{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
949 } else {
950 format!("{short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
951 };
952 let project_root = detect_project_root(file_path);
953 let graph_ctx = crate::core::graph_context::build_graph_context(
954 file_path,
955 &project_root,
956 Some(crate::core::graph_context::GraphContextOptions::default()),
957 )
958 .map(|c| crate::core::graph_context::format_graph_context(&c))
959 .unwrap_or_default();
960
961 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
962 let savings = protocol::format_savings(original_tokens, sent);
963 (
964 append_compressed_hint(
965 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
966 file_path,
967 ),
968 sent,
969 )
970 }
971 "reference" => {
972 let tok = count_tokens(content);
973 let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
974 format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})")
975 } else {
976 format!("{short}: {line_count} lines, {tok} tok ({ext})")
977 };
978 let sent = count_tokens(&output);
979 let savings = protocol::format_savings(original_tokens, sent);
980 (format!("{output}\n{savings}"), sent)
981 }
982 mode if mode.starts_with("lines:") => {
983 let range_str = &mode[6..];
984 let extracted = extract_line_range(content, range_str);
985 let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
986 format!("{file_ref}={short} {line_count}L lines:{range_str}")
987 } else {
988 format!("{short} {line_count}L lines:{range_str}")
989 };
990 let sent = count_tokens(&extracted);
991 let savings = protocol::format_savings(original_tokens, sent);
992 (format!("{header}\n{extracted}\n{savings}"), sent)
993 }
994 unknown => {
995 let header = build_header(file_ref, short, ext, content, line_count, true);
996 let out = format!(
997 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
998 );
999 let sent = count_tokens(&out);
1000 (out, sent)
1001 }
1002 }
1003}
1004
1005fn extract_line_range(content: &str, range_str: &str) -> String {
1006 let lines: Vec<&str> = content.lines().collect();
1007 let total = lines.len();
1008 let mut selected = Vec::new();
1009
1010 for part in range_str.split(',') {
1011 let part = part.trim();
1012 if let Some((start_s, end_s)) = part.split_once('-') {
1013 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
1014 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
1015 for i in start..=end {
1016 if i >= 1 && i <= total {
1017 selected.push(format!("{i:>4}| {}", lines[i - 1]));
1018 }
1019 }
1020 } else if let Ok(n) = part.parse::<usize>() {
1021 if n >= 1 && n <= total {
1022 selected.push(format!("{n:>4}| {}", lines[n - 1]));
1023 }
1024 }
1025 }
1026
1027 if selected.is_empty() {
1028 "No lines matched the range.".to_string()
1029 } else {
1030 selected.join("\n")
1031 }
1032}
1033
1034fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
1035 let short = protocol::shorten_path(path);
1036 let old_content = cache.get(path).map(crate::core::cache::CacheEntry::content);
1037
1038 let new_content = match read_file_lossy(path) {
1039 Ok(c) => c,
1040 Err(e) => {
1041 let msg = format!("ERROR: {e}");
1042 let tokens = count_tokens(&msg);
1043 return (msg, tokens);
1044 }
1045 };
1046
1047 let original_tokens = count_tokens(&new_content);
1048
1049 let diff_output = if let Some(old) = &old_content {
1050 compressor::diff_content(old, &new_content)
1051 } else {
1052 format!("[first read]\n{new_content}")
1053 };
1054
1055 cache.store(path, &new_content);
1056
1057 let sent = count_tokens(&diff_output);
1058 let savings = protocol::format_savings(original_tokens, sent);
1059 let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1060 format!("{file_ref}={short}")
1061 } else {
1062 short.clone()
1063 };
1064 (format!("{head} [diff]\n{diff_output}\n{savings}"), sent)
1065}
1066
1067#[cfg(test)]
1068mod tests {
1069 use super::*;
1070 use std::time::Duration;
1071
1072 #[test]
1073 fn test_header_toon_format_no_brackets() {
1074 let _lock = crate::core::data_dir::test_env_lock();
1075 std::env::set_var("LEAN_CTX_META", "1");
1076 let content = "use std::io;\nfn main() {}\n";
1077 let header = build_header("F1", "main.rs", "rs", content, 2, false);
1078 assert!(!header.contains('['));
1079 assert!(!header.contains(']'));
1080 assert!(header.contains("F1=main.rs 2L"));
1081 std::env::remove_var("LEAN_CTX_META");
1082 }
1083
1084 #[test]
1085 fn test_header_toon_deps_indented() {
1086 let _lock = crate::core::data_dir::test_env_lock();
1087 std::env::set_var("LEAN_CTX_META", "1");
1088 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
1089 let header = build_header("F1", "main.rs", "rs", content, 3, true);
1090 if header.contains("deps") {
1091 assert!(
1092 header.contains("\n deps "),
1093 "deps should use indented TOON format"
1094 );
1095 assert!(
1096 !header.contains("deps:["),
1097 "deps should not use bracket format"
1098 );
1099 }
1100 std::env::remove_var("LEAN_CTX_META");
1101 }
1102
1103 #[test]
1104 fn test_header_toon_saves_tokens() {
1105 let _lock = crate::core::data_dir::test_env_lock();
1106 std::env::set_var("LEAN_CTX_META", "1");
1107 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1108 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1109 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1110 let old_tokens = count_tokens(&old_header);
1111 let new_tokens = count_tokens(&new_header);
1112 assert!(
1113 new_tokens <= old_tokens,
1114 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1115 );
1116 std::env::remove_var("LEAN_CTX_META");
1117 }
1118
1119 #[test]
1120 fn test_tdd_symbols_are_compact() {
1121 let symbols = [
1122 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1123 ];
1124 for sym in &symbols {
1125 let tok = count_tokens(sym);
1126 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1127 }
1128 }
1129
1130 #[test]
1131 fn test_task_mode_filters_content() {
1132 let content = (0..200)
1133 .map(|i| {
1134 if i % 20 == 0 {
1135 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1136 } else {
1137 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1138 }
1139 })
1140 .collect::<Vec<_>>()
1141 .join("\n");
1142 let full_tokens = count_tokens(&content);
1143 let task = Some("fix bug in validate_token");
1144 let (result, result_tokens) = process_mode(
1145 &content,
1146 "task",
1147 "F1",
1148 "test.rs",
1149 "rs",
1150 full_tokens,
1151 CrpMode::Off,
1152 "test.rs",
1153 task,
1154 );
1155 assert!(
1156 result_tokens < full_tokens,
1157 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1158 );
1159 assert!(
1160 result.contains("task-filtered"),
1161 "output should contain task-filtered marker"
1162 );
1163 }
1164
1165 #[test]
1166 fn test_task_mode_without_task_returns_full() {
1167 let content = "fn main() {}\nfn helper() {}\n";
1168 let tokens = count_tokens(content);
1169 let (result, _sent) = process_mode(
1170 content,
1171 "task",
1172 "F1",
1173 "test.rs",
1174 "rs",
1175 tokens,
1176 CrpMode::Off,
1177 "test.rs",
1178 None,
1179 );
1180 assert!(
1181 result.contains("no task set"),
1182 "should indicate no task: {result}"
1183 );
1184 }
1185
1186 #[test]
1187 fn test_reference_mode_one_line() {
1188 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1189 let tokens = count_tokens(content);
1190 let (result, _sent) = process_mode(
1191 content,
1192 "reference",
1193 "F1",
1194 "test.rs",
1195 "rs",
1196 tokens,
1197 CrpMode::Off,
1198 "test.rs",
1199 None,
1200 );
1201 let lines: Vec<&str> = result.lines().collect();
1202 assert!(
1203 lines.len() <= 3,
1204 "reference mode should be very compact, got {} lines",
1205 lines.len()
1206 );
1207 assert!(result.contains("lines"), "should contain line count");
1208 assert!(result.contains("tok"), "should contain token count");
1209 }
1210
1211 #[test]
1212 fn cached_lines_mode_invalidates_on_mtime_change() {
1213 let dir = tempfile::tempdir().unwrap();
1214 let path = dir.path().join("file.txt");
1215 let p = path.to_string_lossy().to_string();
1216
1217 std::fs::write(&path, "one\nsecond\n").unwrap();
1218 let mut cache = SessionCache::new();
1219
1220 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1221 let l1: Vec<&str> = r1.content.lines().collect();
1222 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1223 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1224 assert_eq!(got1, "one");
1225
1226 std::thread::sleep(Duration::from_secs(1));
1227 std::fs::write(&path, "two\nsecond\n").unwrap();
1228
1229 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1230 let l2: Vec<&str> = r2.content.lines().collect();
1231 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1232 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1233 assert_eq!(got2, "two");
1234 }
1235
1236 #[test]
1237 #[cfg_attr(tarpaulin, ignore)]
1238 fn benchmark_task_conditioned_compression() {
1239 let content = generate_benchmark_code(200);
1241 let full_tokens = count_tokens(&content);
1242 let task = Some("fix authentication in validate_token");
1243
1244 let (_full_output, full_tok) = process_mode(
1245 &content,
1246 "full",
1247 "F1",
1248 "server.rs",
1249 "rs",
1250 full_tokens,
1251 CrpMode::Off,
1252 "server.rs",
1253 task,
1254 );
1255 let (_task_output, task_tok) = process_mode(
1256 &content,
1257 "task",
1258 "F1",
1259 "server.rs",
1260 "rs",
1261 full_tokens,
1262 CrpMode::Off,
1263 "server.rs",
1264 task,
1265 );
1266 let (_sig_output, sig_tok) = process_mode(
1267 &content,
1268 "signatures",
1269 "F1",
1270 "server.rs",
1271 "rs",
1272 full_tokens,
1273 CrpMode::Off,
1274 "server.rs",
1275 task,
1276 );
1277 let (_ref_output, ref_tok) = process_mode(
1278 &content,
1279 "reference",
1280 "F1",
1281 "server.rs",
1282 "rs",
1283 full_tokens,
1284 CrpMode::Off,
1285 "server.rs",
1286 task,
1287 );
1288
1289 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1290 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1291 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1292 eprintln!(
1293 " task: {task_tok:>6} tokens ({:.0}% savings)",
1294 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1295 );
1296 eprintln!(
1297 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1298 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1299 );
1300 eprintln!(
1301 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1302 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1303 );
1304 eprintln!("================================================\n");
1305
1306 assert!(task_tok < full_tok, "task mode should save tokens");
1307 assert!(sig_tok < full_tok, "signatures should save tokens");
1308 assert!(ref_tok < sig_tok, "reference should be most compact");
1309 }
1310
1311 fn generate_benchmark_code(lines: usize) -> String {
1312 let mut code = Vec::with_capacity(lines);
1313 code.push("use std::collections::HashMap;".to_string());
1314 code.push("use crate::core::auth;".to_string());
1315 code.push(String::new());
1316 code.push("pub struct Server {".to_string());
1317 code.push(" config: Config,".to_string());
1318 code.push(" cache: HashMap<String, String>,".to_string());
1319 code.push("}".to_string());
1320 code.push(String::new());
1321 code.push("impl Server {".to_string());
1322 code.push(
1323 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1324 .to_string(),
1325 );
1326 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1327 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1328 code.push(" return Err(AuthError::Expired);".to_string());
1329 code.push(" }".to_string());
1330 code.push(" Ok(decoded.claims)".to_string());
1331 code.push(" }".to_string());
1332 code.push(String::new());
1333
1334 let remaining = lines.saturating_sub(code.len());
1335 for i in 0..remaining {
1336 if i % 30 == 0 {
1337 code.push(format!(
1338 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1339 ));
1340 } else if i % 30 == 29 {
1341 code.push(" }".to_string());
1342 } else {
1343 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1344 }
1345 }
1346 code.push("}".to_string());
1347 code.join("\n")
1348 }
1349
1350 #[test]
1351 fn instruction_file_detection() {
1352 assert!(is_instruction_file(
1353 "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1354 ));
1355 assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1356 assert!(is_instruction_file("/project/AGENTS.md"));
1357 assert!(is_instruction_file("/project/.cursorrules"));
1358 assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1359 assert!(is_instruction_file("/skills/some-skill/README.md"));
1360
1361 assert!(!is_instruction_file("/project/src/main.rs"));
1362 assert!(!is_instruction_file("/project/config.json"));
1363 assert!(!is_instruction_file("/project/data/report.csv"));
1364 }
1365
1366 #[test]
1367 fn resolve_auto_mode_returns_full_for_instruction_files() {
1368 let mode = resolve_auto_mode(
1369 "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1370 5000,
1371 Some("read"),
1372 );
1373 assert_eq!(mode, "full", "SKILL.md must always be read in full");
1374
1375 let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1376 assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1377
1378 let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1379 assert_eq!(mode, "full", ".cursorrules must always be read in full");
1380 }
1381}