1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32 if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 }
37}
38
39fn cache_hit_proof_line(content: &str, read_count: u32) -> Option<String> {
43 if read_count < 2 {
44 return None;
45 }
46 let first_line = content.lines().find(|l| !l.trim().is_empty())?;
47 let trimmed = first_line.trim();
48 if trimmed.len() > 60 {
49 Some(format!("{}...", &trimmed[..57]))
50 } else {
51 Some(trimmed.to_string())
52 }
53}
54
55fn append_compressed_hint(output: &str, file_path: &str) -> String {
56 if !crate::core::profiles::active_profile()
57 .output_hints
58 .compressed_hint()
59 {
60 return output.to_string();
61 }
62 format!(
63 "{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
64 )
65}
66
67pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
71 if crate::core::binary_detect::is_binary_file(path) {
72 let msg = crate::core::binary_detect::binary_file_message(path);
73 return Err(std::io::Error::other(msg));
74 }
75
76 if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
77 if let Ok(cwd) = std::env::current_dir() {
78 let root = crate::core::pathjail::canonicalize_or_self(&cwd);
79 if !canonical.starts_with(&root) {
80 let allow = crate::core::pathjail::allow_paths_from_env_and_config();
81 let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
82 .ok()
83 .is_some_and(|d| canonical.starts_with(d));
84 let tmp_ok = canonical.starts_with(std::env::temp_dir());
85 if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
86 tracing::warn!(
87 "defense-in-depth: path may escape project root: {}",
88 canonical.display()
89 );
90 }
91 }
92 }
93 }
94
95 let cap = crate::core::limits::max_read_bytes();
96
97 let file = open_with_retry(path)?;
98 let meta = file
99 .metadata()
100 .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
101 if meta.len() > cap as u64 {
102 return Err(std::io::Error::other(format!(
103 "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
104 Increase the limit or use a line-range read: mode=\"lines:1-100\"",
105 meta.len(),
106 cap
107 )));
108 }
109
110 use std::io::Read;
111 let mut bytes = Vec::with_capacity(meta.len() as usize);
112 std::io::BufReader::new(file).read_to_end(&mut bytes)?;
113 match String::from_utf8(bytes) {
114 Ok(s) => Ok(s),
115 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
116 }
117}
118
119fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
123 match open_nofollow(path) {
124 Ok(f) => Ok(f),
125 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
126 std::thread::sleep(std::time::Duration::from_millis(50));
127 open_nofollow(path)
128 }
129 Err(e) => Err(e),
130 }
131}
132
133#[cfg(unix)]
134fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
135 use std::os::unix::fs::OpenOptionsExt;
136 use std::path::Path;
137
138 let p = Path::new(path);
139 if let (Some(parent), Some(filename)) = (p.parent(), p.file_name()) {
144 if parent.exists() {
145 let canonical_parent = parent.canonicalize()?;
146 let canonical_path = canonical_parent.join(filename);
147 return std::fs::OpenOptions::new()
148 .read(true)
149 .custom_flags(libc::O_NOFOLLOW)
150 .open(&canonical_path);
151 }
152 }
153
154 std::fs::OpenOptions::new()
156 .read(true)
157 .custom_flags(libc::O_NOFOLLOW)
158 .open(path)
159}
160
161#[cfg(not(unix))]
162fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
163 std::fs::File::open(path)
164}
165
166pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
168 handle_with_options(cache, path, mode, false, crp_mode, None)
169}
170
171pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
173 handle_with_options(cache, path, mode, true, crp_mode, None)
174}
175
176pub fn handle_with_task(
178 cache: &mut SessionCache,
179 path: &str,
180 mode: &str,
181 crp_mode: CrpMode,
182 task: Option<&str>,
183) -> String {
184 handle_with_options(cache, path, mode, false, crp_mode, task)
185}
186
187pub fn handle_with_task_resolved(
189 cache: &mut SessionCache,
190 path: &str,
191 mode: &str,
192 crp_mode: CrpMode,
193 task: Option<&str>,
194) -> ReadOutput {
195 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
196}
197
198pub fn handle_fresh_with_task(
200 cache: &mut SessionCache,
201 path: &str,
202 mode: &str,
203 crp_mode: CrpMode,
204 task: Option<&str>,
205) -> String {
206 handle_with_options(cache, path, mode, true, crp_mode, task)
207}
208
209pub fn handle_fresh_with_task_resolved(
211 cache: &mut SessionCache,
212 path: &str,
213 mode: &str,
214 crp_mode: CrpMode,
215 task: Option<&str>,
216) -> ReadOutput {
217 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
218}
219
220fn handle_with_options(
221 cache: &mut SessionCache,
222 path: &str,
223 mode: &str,
224 fresh: bool,
225 crp_mode: CrpMode,
226 task: Option<&str>,
227) -> String {
228 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
229}
230
231fn is_subagent_context() -> bool {
234 static IS_SUBAGENT: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
235 *IS_SUBAGENT.get_or_init(|| {
236 if std::env::var("LEAN_CTX_FORCE_FRESH").is_ok_and(|v| v == "1" || v == "true") {
237 return true;
238 }
239 std::env::var("CURSOR_TASK_ID").is_ok_and(|v| !v.is_empty())
240 })
241}
242
243fn handle_with_options_resolved(
244 cache: &mut SessionCache,
245 path: &str,
246 mode: &str,
247 fresh: bool,
248 crp_mode: CrpMode,
249 task: Option<&str>,
250) -> ReadOutput {
251 let effective_fresh = fresh || is_subagent_context();
252
253 if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
254 bt.next_seq();
255 }
256 let mut result = handle_with_options_inner(cache, path, mode, effective_fresh, crp_mode, task);
257
258 if let Some(entry) = cache.get_mut(path) {
259 entry.last_mode.clone_from(&result.resolved_mode);
260 }
261
262 let dedup_allowed = matches!(
263 result.resolved_mode.as_str(),
264 "map" | "signatures" | "aggressive" | "entropy" | "task"
265 );
266 if dedup_allowed {
267 if let Some(deduped) = cache.apply_dedup(path, &result.content) {
268 let new_tokens = count_tokens(&deduped);
269 if new_tokens < result.output_tokens {
270 result.content = deduped;
271 result.output_tokens = new_tokens;
272 }
273 }
274 }
275
276 if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
277 let original_tokens = cache.get(path).map_or(0, |e| e.original_tokens);
278 bt.record_read(
279 path,
280 &result.resolved_mode,
281 result.output_tokens,
282 original_tokens,
283 );
284 }
285
286 result
287}
288
289fn handle_with_options_inner(
290 cache: &mut SessionCache,
291 path: &str,
292 mode: &str,
293 fresh: bool,
294 crp_mode: CrpMode,
295 task: Option<&str>,
296) -> ReadOutput {
297 let file_ref = cache.get_file_ref(path);
298 let short = protocol::shorten_path(path);
299 let ext = Path::new(path)
300 .extension()
301 .and_then(|e| e.to_str())
302 .unwrap_or("");
303
304 if fresh {
305 if mode == "diff" {
306 let warning = "[warning] fresh+diff is redundant — fresh invalidates cache, no diff possible. Use mode=full with fresh=true instead.";
307 return ReadOutput {
308 content: warning.to_string(),
309 resolved_mode: "diff".into(),
310 output_tokens: count_tokens(warning),
311 };
312 }
313 cache.invalidate(path);
314 }
315
316 if mode == "diff" {
317 let (out, _) = handle_diff(cache, path, &file_ref);
318 let out = crate::core::redaction::redact_text_if_enabled(&out);
319 let sent = count_tokens(&out);
320 return ReadOutput {
321 content: out,
322 resolved_mode: "diff".into(),
323 output_tokens: sent,
324 };
325 }
326
327 if mode != "full" {
328 if let Some(existing) = cache.get(path) {
329 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
330 if stale {
331 cache.invalidate(path);
332 }
333 }
334 }
335
336 let cache_snapshot = cache.get(path).map(|existing| {
339 (
340 existing.stored_mtime,
341 existing.read_count,
342 existing.line_count,
343 existing.original_tokens,
344 existing.content(),
345 )
346 });
347
348 if let Some((cached_mtime, read_count, line_count, original_tokens, content_opt)) =
349 cache_snapshot
350 {
351 if mode == "full" {
352 if !crate::core::cache::is_cache_entry_stale(path, cached_mtime) {
354 cache.record_cache_hit(path);
355 let out = if crate::core::protocol::meta_visible() {
356 format!(
357 "{file_ref}={short} [unchanged, {line_count}L, use cached context]\nFile unchanged on disk (same hash). If you haven't seen this content, use fresh=true to force re-read.",
358 )
359 } else {
360 let proof = content_opt
361 .as_deref()
362 .and_then(|c| cache_hit_proof_line(c, read_count));
363 let reads_note = if read_count > 3 {
364 format!(" (read {}x, unchanged)", read_count + 1)
365 } else {
366 String::new()
367 };
368 match proof {
369 Some(p) => format!(
370 "{file_ref}={short} [unchanged, {line_count}L, use cached context{reads_note} | first: \"{p}\"]"
371 ),
372 None => format!(
373 "{file_ref}={short} [unchanged, {line_count}L, use cached context{reads_note}]"
374 ),
375 }
376 };
377 let out = crate::core::redaction::redact_text_if_enabled(&out);
378 let sent = count_tokens(&out);
379 return ReadOutput {
380 content: out,
381 resolved_mode: "full".into(),
382 output_tokens: sent,
383 };
384 }
385 let (out, _) = handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
386 let out = crate::core::redaction::redact_text_if_enabled(&out);
387 let sent = count_tokens(&out);
388 return ReadOutput {
389 content: out,
390 resolved_mode: "full".into(),
391 output_tokens: sent,
392 };
393 }
394
395 let resolved_mode = if mode == "auto" {
398 resolve_auto_mode(path, original_tokens, task)
399 } else {
400 mode.to_string()
401 };
402
403 if is_cacheable_mode(&resolved_mode) {
404 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
405 let compressed_hit = cache.get_compressed(path, &cache_key).cloned();
406 if let Some(cached_output) = compressed_hit {
407 cache.record_cache_hit(path);
408 let out = crate::core::redaction::redact_text_if_enabled(&cached_output);
409 let sent = count_tokens(&out);
410 return ReadOutput {
411 content: out,
412 resolved_mode,
413 output_tokens: sent,
414 };
415 }
416 }
417
418 if let Some(content) = content_opt {
419 let (out, _) = process_mode(
420 &content,
421 &resolved_mode,
422 &file_ref,
423 &short,
424 ext,
425 original_tokens,
426 crp_mode,
427 path,
428 task,
429 );
430 if is_cacheable_mode(&resolved_mode) {
431 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
432 cache.set_compressed(path, &cache_key, out.clone());
433 }
434 let out = crate::core::redaction::redact_text_if_enabled(&out);
435 let sent = count_tokens(&out);
436 return ReadOutput {
437 content: out,
438 resolved_mode,
439 output_tokens: sent,
440 };
441 }
442 cache.invalidate(path);
443 }
444
445 let content = match read_file_lossy(path) {
446 Ok(c) => c,
447 Err(e) => {
448 let msg = format!("ERROR: {e}");
449 let tokens = count_tokens(&msg);
450 return ReadOutput {
451 content: msg,
452 resolved_mode: "error".into(),
453 output_tokens: tokens,
454 };
455 }
456 };
457
458 let store_result = cache.store(path, &content);
459
460 let is_line_range = mode.starts_with("lines:");
463 let hints = crate::core::profiles::active_profile().output_hints;
464 let is_repeat_read = store_result.read_count > 1;
465 let similar_hint = if !is_line_range && is_repeat_read && hints.semantic_hint() {
466 find_similar_and_update_semantic_index(path, &content)
467 } else {
468 None
469 };
470 let graph_hint = if !is_line_range && is_repeat_read && hints.related_hint() {
471 build_graph_related_hint(path)
472 } else {
473 None
474 };
475
476 if mode == "full" {
477 cache.mark_full_delivered(path);
478 let (mut output, _) = format_full_output(
479 &file_ref,
480 &short,
481 ext,
482 &content,
483 store_result.original_tokens,
484 store_result.line_count,
485 task,
486 );
487 if let Some(hint) = &graph_hint {
488 output.push_str(&format!("\n{hint}"));
489 }
490 if let Some(hint) = similar_hint {
491 output.push_str(&format!("\n{hint}"));
492 }
493 let output = crate::core::redaction::redact_text_if_enabled(&output);
494 let sent = count_tokens(&output);
495 return ReadOutput {
496 content: output,
497 resolved_mode: "full".into(),
498 output_tokens: sent,
499 };
500 }
501
502 let resolved_mode = if mode == "auto" {
503 resolve_auto_mode(path, store_result.original_tokens, task)
504 } else {
505 mode.to_string()
506 };
507
508 let (mut output, _sent) = process_mode(
509 &content,
510 &resolved_mode,
511 &file_ref,
512 &short,
513 ext,
514 store_result.original_tokens,
515 crp_mode,
516 path,
517 task,
518 );
519 if let Some(hint) = &graph_hint {
520 output.push_str(&format!("\n{hint}"));
521 }
522 if let Some(hint) = similar_hint {
523 output.push_str(&format!("\n{hint}"));
524 }
525 if is_cacheable_mode(&resolved_mode) {
526 let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
527 cache.set_compressed(path, &cache_key, output.clone());
528 }
529 let output = crate::core::redaction::redact_text_if_enabled(&output);
530 let final_tokens = count_tokens(&output);
531 ReadOutput {
532 content: output,
533 resolved_mode,
534 output_tokens: final_tokens,
535 }
536}
537
538pub fn is_instruction_file(path: &str) -> bool {
539 let lower = path.to_lowercase();
540 let filename = std::path::Path::new(&lower)
541 .file_name()
542 .and_then(|f| f.to_str())
543 .unwrap_or("");
544
545 matches!(
546 filename,
547 "skill.md"
548 | "agents.md"
549 | "rules.md"
550 | ".cursorrules"
551 | ".clinerules"
552 | "lean-ctx.md"
553 | "lean-ctx.mdc"
554 ) || lower.contains("/skills/")
555 || lower.contains("/.cursor/rules/")
556 || lower.contains("/.claude/rules/")
557 || lower.contains("/agents.md")
558}
559
560fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
561 if is_instruction_file(file_path) {
562 return "full".to_string();
563 }
564
565 if let Ok(bt) = crate::core::bounce_tracker::global().lock() {
566 if bt.should_force_full(file_path) {
567 return "full".to_string();
568 }
569 }
570
571 let intent_query = task.unwrap_or("read");
572 let route = crate::core::intent_router::route_v1(intent_query);
573 let intent_mode = &route.decision.effective_read_mode;
574 if intent_mode != "auto" && intent_mode != "reference" {
575 return intent_mode.clone();
576 }
577
578 let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
580 let predictor = crate::core::mode_predictor::ModePredictor::new();
581 let mut predicted = predictor
582 .predict_best_mode(&sig)
583 .unwrap_or_else(|| "full".to_string());
584 if predicted == "auto" {
585 predicted = "full".to_string();
586 }
587
588 if predicted != "full" {
591 if let Some(project_root) =
592 crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
593 {
594 let ext = std::path::Path::new(file_path)
595 .extension()
596 .and_then(|e| e.to_str())
597 .unwrap_or("");
598 let bucket = match original_tokens {
599 0..=2000 => "sm",
600 2001..=10000 => "md",
601 10001..=50000 => "lg",
602 _ => "xl",
603 };
604 let bandit_key = format!("{ext}_{bucket}");
605 let mut store = crate::core::bandit::BanditStore::load(&project_root);
606 let bandit = store.get_or_create(&bandit_key);
607 let arm = bandit.select_arm();
608 if arm.budget_ratio < 0.25 && original_tokens > 2000 {
609 predicted = "aggressive".to_string();
610 }
611 }
612 }
613
614 let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
616 let chosen = policy.choose_auto_mode(task, &predicted);
617
618 if original_tokens > 2000 {
619 if predicted == "map" || predicted == "signatures" {
620 if chosen != "map" && chosen != "signatures" {
621 return predicted;
622 }
623 } else if chosen == "full" && predicted != "full" {
624 return predicted;
625 }
626 }
627
628 chosen
629}
630
631fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
632 const MAX_CONTENT_BYTES_FOR_SEMANTIC: usize = 32_768;
633
634 if content.len() > MAX_CONTENT_BYTES_FOR_SEMANTIC {
635 return None;
636 }
637
638 let cfg = crate::core::config::Config::load();
639 let profile = crate::core::config::MemoryProfile::effective(&cfg);
640 if !profile.semantic_cache_enabled() {
641 return None;
642 }
643
644 let project_root = detect_project_root(path);
645 let session_id = format!("{}", std::process::id());
646 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
647
648 let similar = index.find_similar(content, 0.7);
649 let relevant: Vec<_> = similar
650 .into_iter()
651 .filter(|(p, _)| p != path)
652 .take(3)
653 .collect();
654
655 index.add_file(path, content, &session_id);
656 let _ = index.save(&project_root);
657
658 if relevant.is_empty() {
659 return None;
660 }
661
662 let hints: Vec<String> = relevant
663 .iter()
664 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
665 .collect();
666
667 Some(format!(
668 "[semantic: {} similar file(s) in cache]\n{}",
669 relevant.len(),
670 hints.join("\n")
671 ))
672}
673
674fn detect_project_root(path: &str) -> String {
675 crate::core::protocol::detect_project_root_or_cwd(path)
676}
677
678fn build_graph_related_hint(path: &str) -> Option<String> {
679 let project_root = detect_project_root(path);
680 crate::core::graph_context::build_related_hint(path, &project_root, 5)
681}
682
683const AUTO_DELTA_THRESHOLD: f64 = 0.6;
684
685fn handle_full_with_auto_delta(
687 cache: &mut SessionCache,
688 path: &str,
689 file_ref: &str,
690 short: &str,
691 ext: &str,
692 task: Option<&str>,
693) -> (String, usize) {
694 let Ok(disk_content) = read_file_lossy(path) else {
695 cache.record_cache_hit(path);
696 if let Some(existing) = cache.get(path) {
697 if !crate::core::protocol::meta_visible() {
698 if let Some(cached) = existing.content() {
699 return format_full_output(
700 file_ref,
701 short,
702 ext,
703 &cached,
704 existing.original_tokens,
705 existing.line_count,
706 task,
707 );
708 }
709 }
710 let out = format!(
711 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
712 existing.read_count, existing.line_count
713 );
714 let sent = count_tokens(&out);
715 return (out, sent);
716 }
717 let out = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
718 format!("[file read failed and no cached version available] {file_ref}={short}")
719 } else {
720 format!("[file read failed and no cached version available] {short}")
721 };
722 let sent = count_tokens(&out);
723 return (out, sent);
724 };
725
726 let old_content = cache
727 .get(path)
728 .and_then(crate::core::cache::CacheEntry::content)
729 .unwrap_or_default();
730 let store_result = cache.store(path, &disk_content);
731
732 if store_result.was_hit {
733 if store_result.full_content_delivered {
734 let out = if crate::core::protocol::meta_visible() {
735 format!(
736 "{file_ref}={short} [unchanged, {}L, use cached context]\nFile unchanged on disk (same hash). If you haven't seen this content, use fresh=true to force re-read.",
737 store_result.line_count
738 )
739 } else {
740 let proof = cache_hit_proof_line(&disk_content, store_result.read_count);
741 let reads_note = if store_result.read_count > 3 {
742 format!(" (read {}x, unchanged)", store_result.read_count)
743 } else {
744 String::new()
745 };
746 match proof {
747 Some(p) => format!(
748 "{file_ref}={short} [unchanged, {}L, use cached context{reads_note} | first: \"{p}\"]",
749 store_result.line_count
750 ),
751 None => format!(
752 "{file_ref}={short} [unchanged, {}L, use cached context{reads_note}]",
753 store_result.line_count
754 ),
755 }
756 };
757 let sent = count_tokens(&out);
758 return (out, sent);
759 }
760 cache.mark_full_delivered(path);
761 return format_full_output(
762 file_ref,
763 short,
764 ext,
765 &disk_content,
766 store_result.original_tokens,
767 store_result.line_count,
768 task,
769 );
770 }
771
772 let diff = compressor::diff_content(&old_content, &disk_content);
773 let diff_tokens = count_tokens(&diff);
774 let full_tokens = store_result.original_tokens;
775
776 if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
777 let savings = protocol::format_savings(full_tokens, diff_tokens);
778 let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
779 format!("{file_ref}={short}")
780 } else {
781 short.to_string()
782 };
783 let out = format!(
784 "{head} [auto-delta] ∆{}L\n{diff}\n{savings}",
785 disk_content.lines().count()
786 );
787 return (out, diff_tokens);
788 }
789
790 format_full_output(
791 file_ref,
792 short,
793 ext,
794 &disk_content,
795 store_result.original_tokens,
796 store_result.line_count,
797 task,
798 )
799}
800
801fn format_full_output(
802 file_ref: &str,
803 short: &str,
804 ext: &str,
805 content: &str,
806 original_tokens: usize,
807 line_count: usize,
808 _task: Option<&str>,
809) -> (String, usize) {
810 let tokens = original_tokens;
811 let metadata = build_header(file_ref, short, ext, content, line_count, true);
812
813 let output = format!("{metadata}\n{content}");
814 let sent = count_tokens(&output);
815 (protocol::append_savings(&output, tokens, sent), sent)
816}
817
818fn build_header(
819 file_ref: &str,
820 short: &str,
821 ext: &str,
822 content: &str,
823 line_count: usize,
824 include_deps: bool,
825) -> String {
826 let mut header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
827 format!("{file_ref}={short} {line_count}L")
828 } else {
829 format!("{short} {line_count}L")
830 };
831
832 if include_deps {
833 let dep_info = deps::extract_deps(content, ext);
834 if !dep_info.imports.is_empty() {
835 let imports_str: Vec<&str> = dep_info
836 .imports
837 .iter()
838 .take(8)
839 .map(std::string::String::as_str)
840 .collect();
841 header.push_str(&format!("\n deps {}", imports_str.join(",")));
842 }
843 if !dep_info.exports.is_empty() {
844 let exports_str: Vec<&str> = dep_info
845 .exports
846 .iter()
847 .take(8)
848 .map(std::string::String::as_str)
849 .collect();
850 header.push_str(&format!("\n exports {}", exports_str.join(",")));
851 }
852 }
853
854 header
855}
856
857#[allow(clippy::too_many_arguments)]
858fn process_mode(
859 content: &str,
860 mode: &str,
861 file_ref: &str,
862 short: &str,
863 ext: &str,
864 original_tokens: usize,
865 crp_mode: CrpMode,
866 file_path: &str,
867 task: Option<&str>,
868) -> (String, usize) {
869 let line_count = content.lines().count();
870
871 match mode {
872 "auto" => {
873 let chosen = resolve_auto_mode(file_path, original_tokens, task);
874 process_mode(
875 content,
876 &chosen,
877 file_ref,
878 short,
879 ext,
880 original_tokens,
881 crp_mode,
882 file_path,
883 task,
884 )
885 }
886 "full" => format_full_output(
887 file_ref,
888 short,
889 ext,
890 content,
891 original_tokens,
892 line_count,
893 task,
894 ),
895 "signatures" => {
896 let sigs = signatures::extract_signatures(content, ext);
897 let dep_info = deps::extract_deps(content, ext);
898
899 let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
900 format!("{file_ref}={short} {line_count}L")
901 } else {
902 format!("{short} {line_count}L")
903 };
904 if !dep_info.imports.is_empty() {
905 let imports_str: Vec<&str> = dep_info
906 .imports
907 .iter()
908 .take(8)
909 .map(std::string::String::as_str)
910 .collect();
911 output.push_str(&format!("\n deps {}", imports_str.join(",")));
912 }
913 for sig in &sigs {
914 output.push('\n');
915 if crp_mode.is_tdd() {
916 output.push_str(&sig.to_tdd());
917 } else {
918 output.push_str(&sig.to_compact());
919 }
920 }
921 let sent = count_tokens(&output);
922 (
923 append_compressed_hint(
924 &protocol::append_savings(&output, original_tokens, sent),
925 file_path,
926 ),
927 sent,
928 )
929 }
930 "map" => {
931 if ext == "php" {
932 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
933 {
934 let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
935 format!("{file_ref}={short} {line_count}L\n{php_map}")
936 } else {
937 format!("{short} {line_count}L\n{php_map}")
938 };
939 let sent = count_tokens(&output);
940 let output = protocol::append_savings(&output, original_tokens, sent);
941 return (append_compressed_hint(&output, file_path), sent);
942 }
943 }
944
945 let sigs = signatures::extract_signatures(content, ext);
946 let dep_info = deps::extract_deps(content, ext);
947
948 let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
949 format!("{file_ref}={short} {line_count}L")
950 } else {
951 format!("{short} {line_count}L")
952 };
953
954 if !dep_info.imports.is_empty() {
955 output.push_str("\n deps: ");
956 output.push_str(&dep_info.imports.join(", "));
957 }
958
959 if !dep_info.exports.is_empty() {
960 output.push_str("\n exports: ");
961 output.push_str(&dep_info.exports.join(", "));
962 }
963
964 let key_sigs: Vec<&signatures::Signature> = sigs
965 .iter()
966 .filter(|s| s.is_exported || s.indent == 0)
967 .collect();
968
969 if !key_sigs.is_empty() {
970 output.push_str("\n API:");
971 for sig in &key_sigs {
972 output.push_str("\n ");
973 if crp_mode.is_tdd() {
974 output.push_str(&sig.to_tdd());
975 } else {
976 output.push_str(&sig.to_compact());
977 }
978 }
979 }
980
981 let sent = count_tokens(&output);
982 (
983 append_compressed_hint(
984 &protocol::append_savings(&output, original_tokens, sent),
985 file_path,
986 ),
987 sent,
988 )
989 }
990 "aggressive" => {
991 #[cfg(feature = "tree-sitter")]
992 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
993 #[cfg(not(feature = "tree-sitter"))]
994 let ast_pruned: Option<String> = None;
995
996 let base = ast_pruned.as_deref().unwrap_or(content);
997
998 let session_intent = crate::core::session::SessionState::load_latest()
999 .and_then(|s| s.active_structured_intent);
1000 let raw = if let Some(ref intent) = session_intent {
1001 compressor::task_aware_compress(base, Some(ext), intent)
1002 } else {
1003 compressor::aggressive_compress(base, Some(ext))
1004 };
1005 let compressed = compressor::safeguard_ratio(content, &raw);
1006 let header = build_header(file_ref, short, ext, content, line_count, true);
1007
1008 let mut sym = SymbolMap::new();
1009 let idents = symbol_map::extract_identifiers(&compressed, ext);
1010 for ident in &idents {
1011 sym.register(ident);
1012 }
1013
1014 if sym.len() >= 3 {
1015 let sym_table = sym.format_table();
1016 let sym_applied = sym.apply(&compressed);
1017 let orig_tok = count_tokens(&compressed);
1018 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
1019 let net = orig_tok.saturating_sub(comp_tok);
1020 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
1021 let savings = protocol::format_savings(original_tokens, comp_tok);
1022 return (
1023 append_compressed_hint(
1024 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
1025 file_path,
1026 ),
1027 comp_tok,
1028 );
1029 }
1030 let savings = protocol::format_savings(original_tokens, orig_tok);
1031 return (
1032 append_compressed_hint(
1033 &format!("{header}\n{compressed}\n{savings}"),
1034 file_path,
1035 ),
1036 orig_tok,
1037 );
1038 }
1039
1040 let sent = count_tokens(&compressed);
1041 let savings = protocol::format_savings(original_tokens, sent);
1042 (
1043 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
1044 sent,
1045 )
1046 }
1047 "entropy" => {
1048 let result = entropy::entropy_compress_adaptive(content, file_path);
1049 let avg_h = entropy::analyze_entropy(content).avg_entropy;
1050 let header = build_header(file_ref, short, ext, content, line_count, false);
1051 let techs = result.techniques.join(", ");
1052 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
1053 let sent = count_tokens(&output);
1054 let savings = protocol::format_savings(original_tokens, sent);
1055 let compression_ratio = if original_tokens > 0 {
1056 1.0 - (sent as f64 / original_tokens as f64)
1057 } else {
1058 0.0
1059 };
1060 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
1061 (
1062 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
1063 sent,
1064 )
1065 }
1066 "task" => {
1067 let task_str = task.unwrap_or("");
1068 if task_str.is_empty() {
1069 let header = build_header(file_ref, short, ext, content, line_count, true);
1070 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
1071 let sent = count_tokens(&out);
1072 return (out, sent);
1073 }
1074 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
1075 if keywords.is_empty() {
1076 let header = build_header(file_ref, short, ext, content, line_count, true);
1077 let out = format!(
1078 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
1079 );
1080 let sent = count_tokens(&out);
1081 return (out, sent);
1082 }
1083 let filtered =
1084 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
1085 let filtered_lines = filtered.lines().count();
1086 let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1087 format!("{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1088 } else {
1089 format!("{short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1090 };
1091 let graph_ctx = if crate::core::profiles::active_profile()
1092 .output_hints
1093 .graph_context_block()
1094 {
1095 let project_root = detect_project_root(file_path);
1096 crate::core::graph_context::build_graph_context(
1097 file_path,
1098 &project_root,
1099 Some(crate::core::graph_context::GraphContextOptions::default()),
1100 )
1101 .map(|c| crate::core::graph_context::format_graph_context(&c))
1102 .unwrap_or_default()
1103 } else {
1104 String::new()
1105 };
1106
1107 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
1108 let savings = protocol::format_savings(original_tokens, sent);
1109 (
1110 append_compressed_hint(
1111 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
1112 file_path,
1113 ),
1114 sent,
1115 )
1116 }
1117 "reference" => {
1118 let tok = count_tokens(content);
1119 let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1120 format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})")
1121 } else {
1122 format!("{short}: {line_count} lines, {tok} tok ({ext})")
1123 };
1124 let sent = count_tokens(&output);
1125 let savings = protocol::format_savings(original_tokens, sent);
1126 (format!("{output}\n{savings}"), sent)
1127 }
1128 mode if mode.starts_with("lines:") => {
1129 let range_str = &mode[6..];
1130 let extracted = extract_line_range(content, range_str);
1131 let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1132 format!("{file_ref}={short} {line_count}L lines:{range_str}")
1133 } else {
1134 format!("{short} {line_count}L lines:{range_str}")
1135 };
1136 let sent = count_tokens(&extracted);
1137 let savings = protocol::format_savings(original_tokens, sent);
1138 (format!("{header}\n{extracted}\n{savings}"), sent)
1139 }
1140 unknown => {
1141 let header = build_header(file_ref, short, ext, content, line_count, true);
1142 let out = format!(
1143 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
1144 );
1145 let sent = count_tokens(&out);
1146 (out, sent)
1147 }
1148 }
1149}
1150
1151fn extract_line_range(content: &str, range_str: &str) -> String {
1152 let lines: Vec<&str> = content.lines().collect();
1153 let total = lines.len();
1154 let mut selected = Vec::new();
1155
1156 for part in range_str.split(',') {
1157 let part = part.trim();
1158 if let Some((start_s, end_s)) = part.split_once('-') {
1159 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
1160 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
1161 for i in start..=end {
1162 if i >= 1 && i <= total {
1163 selected.push(format!("{i:>4}| {}", lines[i - 1]));
1164 }
1165 }
1166 } else if let Ok(n) = part.parse::<usize>() {
1167 if n >= 1 && n <= total {
1168 selected.push(format!("{n:>4}| {}", lines[n - 1]));
1169 }
1170 }
1171 }
1172
1173 if selected.is_empty() {
1174 "No lines matched the range.".to_string()
1175 } else {
1176 selected.join("\n")
1177 }
1178}
1179
1180fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
1181 let short = protocol::shorten_path(path);
1182 let old_content = cache
1183 .get(path)
1184 .and_then(crate::core::cache::CacheEntry::content);
1185
1186 let new_content = match read_file_lossy(path) {
1187 Ok(c) => c,
1188 Err(e) => {
1189 let msg = format!("ERROR: {e}");
1190 let tokens = count_tokens(&msg);
1191 return (msg, tokens);
1192 }
1193 };
1194
1195 let original_tokens = count_tokens(&new_content);
1196
1197 let diff_output = if let Some(old) = &old_content {
1198 compressor::diff_content(old, &new_content)
1199 } else {
1200 cache.store(path, &new_content);
1203 let msg = format!(
1204 "{file_ref}={short} [no cached version for diff — use mode=full first, then diff on re-read]"
1205 );
1206 let sent = count_tokens(&msg);
1207 return (msg, sent);
1208 };
1209
1210 cache.store(path, &new_content);
1211
1212 let sent = count_tokens(&diff_output);
1213 let savings = protocol::format_savings(original_tokens, sent);
1214 let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1215 format!("{file_ref}={short}")
1216 } else {
1217 short.clone()
1218 };
1219 (format!("{head} [diff]\n{diff_output}\n{savings}"), sent)
1220}
1221
1222#[cfg(test)]
1223mod tests {
1224 use super::*;
1225 use std::time::Duration;
1226
1227 #[test]
1228 fn test_header_toon_format_no_brackets() {
1229 let _lock = crate::core::data_dir::test_env_lock();
1230 std::env::set_var("LEAN_CTX_META", "1");
1231 let content = "use std::io;\nfn main() {}\n";
1232 let header = build_header("F1", "main.rs", "rs", content, 2, false);
1233 assert!(!header.contains('['));
1234 assert!(!header.contains(']'));
1235 assert!(header.contains("F1=main.rs 2L"));
1236 std::env::remove_var("LEAN_CTX_META");
1237 }
1238
1239 #[test]
1240 fn test_header_toon_deps_indented() {
1241 let _lock = crate::core::data_dir::test_env_lock();
1242 std::env::set_var("LEAN_CTX_META", "1");
1243 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
1244 let header = build_header("F1", "main.rs", "rs", content, 3, true);
1245 if header.contains("deps") {
1246 assert!(
1247 header.contains("\n deps "),
1248 "deps should use indented TOON format"
1249 );
1250 assert!(
1251 !header.contains("deps:["),
1252 "deps should not use bracket format"
1253 );
1254 }
1255 std::env::remove_var("LEAN_CTX_META");
1256 }
1257
1258 #[test]
1259 fn test_header_toon_saves_tokens() {
1260 let _lock = crate::core::data_dir::test_env_lock();
1261 std::env::set_var("LEAN_CTX_META", "1");
1262 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1263 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1264 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1265 let old_tokens = count_tokens(&old_header);
1266 let new_tokens = count_tokens(&new_header);
1267 assert!(
1268 new_tokens <= old_tokens,
1269 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1270 );
1271 std::env::remove_var("LEAN_CTX_META");
1272 }
1273
1274 #[test]
1275 fn test_tdd_symbols_are_compact() {
1276 let symbols = [
1277 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1278 ];
1279 for sym in &symbols {
1280 let tok = count_tokens(sym);
1281 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1282 }
1283 }
1284
1285 #[test]
1286 fn test_task_mode_filters_content() {
1287 let content = (0..200)
1288 .map(|i| {
1289 if i % 20 == 0 {
1290 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1291 } else {
1292 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1293 }
1294 })
1295 .collect::<Vec<_>>()
1296 .join("\n");
1297 let full_tokens = count_tokens(&content);
1298 let task = Some("fix bug in validate_token");
1299 let (result, result_tokens) = process_mode(
1300 &content,
1301 "task",
1302 "F1",
1303 "test.rs",
1304 "rs",
1305 full_tokens,
1306 CrpMode::Off,
1307 "test.rs",
1308 task,
1309 );
1310 assert!(
1311 result_tokens < full_tokens,
1312 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1313 );
1314 assert!(
1315 result.contains("task-filtered"),
1316 "output should contain task-filtered marker"
1317 );
1318 }
1319
1320 #[test]
1321 fn test_task_mode_without_task_returns_full() {
1322 let content = "fn main() {}\nfn helper() {}\n";
1323 let tokens = count_tokens(content);
1324 let (result, _sent) = process_mode(
1325 content,
1326 "task",
1327 "F1",
1328 "test.rs",
1329 "rs",
1330 tokens,
1331 CrpMode::Off,
1332 "test.rs",
1333 None,
1334 );
1335 assert!(
1336 result.contains("no task set"),
1337 "should indicate no task: {result}"
1338 );
1339 }
1340
1341 #[test]
1342 fn test_reference_mode_one_line() {
1343 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1344 let tokens = count_tokens(content);
1345 let (result, _sent) = process_mode(
1346 content,
1347 "reference",
1348 "F1",
1349 "test.rs",
1350 "rs",
1351 tokens,
1352 CrpMode::Off,
1353 "test.rs",
1354 None,
1355 );
1356 let lines: Vec<&str> = result.lines().collect();
1357 assert!(
1358 lines.len() <= 3,
1359 "reference mode should be very compact, got {} lines",
1360 lines.len()
1361 );
1362 assert!(result.contains("lines"), "should contain line count");
1363 assert!(result.contains("tok"), "should contain token count");
1364 }
1365
1366 #[test]
1367 fn cached_lines_mode_invalidates_on_mtime_change() {
1368 let dir = tempfile::tempdir().unwrap();
1369 let path = dir.path().join("file.txt");
1370 let p = path.to_string_lossy().to_string();
1371
1372 std::fs::write(&path, "one\nsecond\n").unwrap();
1373 let mut cache = SessionCache::new();
1374
1375 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1376 let l1: Vec<&str> = r1.content.lines().collect();
1377 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1378 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1379 assert_eq!(got1, "one");
1380
1381 std::thread::sleep(Duration::from_secs(1));
1382 std::fs::write(&path, "two\nsecond\n").unwrap();
1383
1384 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1385 let l2: Vec<&str> = r2.content.lines().collect();
1386 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1387 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1388 assert_eq!(got2, "two");
1389 }
1390
1391 #[test]
1392 #[cfg_attr(tarpaulin, ignore)]
1393 fn benchmark_task_conditioned_compression() {
1394 let content = generate_benchmark_code(200);
1396 let full_tokens = count_tokens(&content);
1397 let task = Some("fix authentication in validate_token");
1398
1399 let (_full_output, full_tok) = process_mode(
1400 &content,
1401 "full",
1402 "F1",
1403 "server.rs",
1404 "rs",
1405 full_tokens,
1406 CrpMode::Off,
1407 "server.rs",
1408 task,
1409 );
1410 let (_task_output, task_tok) = process_mode(
1411 &content,
1412 "task",
1413 "F1",
1414 "server.rs",
1415 "rs",
1416 full_tokens,
1417 CrpMode::Off,
1418 "server.rs",
1419 task,
1420 );
1421 let (_sig_output, sig_tok) = process_mode(
1422 &content,
1423 "signatures",
1424 "F1",
1425 "server.rs",
1426 "rs",
1427 full_tokens,
1428 CrpMode::Off,
1429 "server.rs",
1430 task,
1431 );
1432 let (_ref_output, ref_tok) = process_mode(
1433 &content,
1434 "reference",
1435 "F1",
1436 "server.rs",
1437 "rs",
1438 full_tokens,
1439 CrpMode::Off,
1440 "server.rs",
1441 task,
1442 );
1443
1444 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1445 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1446 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1447 eprintln!(
1448 " task: {task_tok:>6} tokens ({:.0}% savings)",
1449 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1450 );
1451 eprintln!(
1452 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1453 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1454 );
1455 eprintln!(
1456 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1457 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1458 );
1459 eprintln!("================================================\n");
1460
1461 assert!(task_tok < full_tok, "task mode should save tokens");
1462 assert!(sig_tok < full_tok, "signatures should save tokens");
1463 assert!(ref_tok < sig_tok, "reference should be most compact");
1464 }
1465
1466 fn generate_benchmark_code(lines: usize) -> String {
1467 let mut code = Vec::with_capacity(lines);
1468 code.push("use std::collections::HashMap;".to_string());
1469 code.push("use crate::core::auth;".to_string());
1470 code.push(String::new());
1471 code.push("pub struct Server {".to_string());
1472 code.push(" config: Config,".to_string());
1473 code.push(" cache: HashMap<String, String>,".to_string());
1474 code.push("}".to_string());
1475 code.push(String::new());
1476 code.push("impl Server {".to_string());
1477 code.push(
1478 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1479 .to_string(),
1480 );
1481 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1482 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1483 code.push(" return Err(AuthError::Expired);".to_string());
1484 code.push(" }".to_string());
1485 code.push(" Ok(decoded.claims)".to_string());
1486 code.push(" }".to_string());
1487 code.push(String::new());
1488
1489 let remaining = lines.saturating_sub(code.len());
1490 for i in 0..remaining {
1491 if i % 30 == 0 {
1492 code.push(format!(
1493 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1494 ));
1495 } else if i % 30 == 29 {
1496 code.push(" }".to_string());
1497 } else {
1498 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1499 }
1500 }
1501 code.push("}".to_string());
1502 code.join("\n")
1503 }
1504
1505 #[test]
1506 fn instruction_file_detection() {
1507 assert!(is_instruction_file(
1508 "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1509 ));
1510 assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1511 assert!(is_instruction_file("/project/AGENTS.md"));
1512 assert!(is_instruction_file("/project/.cursorrules"));
1513 assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1514 assert!(is_instruction_file("/skills/some-skill/README.md"));
1515
1516 assert!(!is_instruction_file("/project/src/main.rs"));
1517 assert!(!is_instruction_file("/project/config.json"));
1518 assert!(!is_instruction_file("/project/data/report.csv"));
1519 }
1520
1521 #[test]
1522 fn resolve_auto_mode_returns_full_for_instruction_files() {
1523 let mode = resolve_auto_mode(
1524 "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1525 5000,
1526 Some("read"),
1527 );
1528 assert_eq!(mode, "full", "SKILL.md must always be read in full");
1529
1530 let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1531 assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1532
1533 let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1534 assert_eq!(mode, "full", ".cursorrules must always be read in full");
1535 }
1536}