1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13pub struct ReadOutput {
16 pub content: String,
17 pub resolved_mode: String,
18 pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28 CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode, task: Option<&str>) -> String {
32 let base = if crp_mode.is_tdd() {
33 format!("{mode}:tdd")
34 } else {
35 mode.to_string()
36 };
37 match task.map(str::trim).filter(|t| !t.is_empty()) {
40 Some(t) => {
41 use std::hash::{Hash, Hasher};
42 let mut h = std::collections::hash_map::DefaultHasher::new();
43 t.hash(&mut h);
44 format!("{base}:t{:x}", h.finish())
45 }
46 None => base,
47 }
48}
49
50fn cache_hit_proof_line(content: &str, read_count: u32) -> Option<String> {
54 if read_count < 2 {
55 return None;
56 }
57 let first_line = content.lines().find(|l| !l.trim().is_empty())?;
58 let trimmed = first_line.trim();
59 if trimmed.len() > 60 {
60 let mut end = 57;
61 while end > 0 && !trimmed.is_char_boundary(end) {
62 end -= 1;
63 }
64 Some(format!("{}...", &trimmed[..end]))
65 } else {
66 Some(trimmed.to_string())
67 }
68}
69
70fn append_compressed_hint(output: &str, file_path: &str) -> String {
71 if !crate::core::profiles::active_profile()
72 .output_hints
73 .compressed_hint()
74 {
75 return output.to_string();
76 }
77 format!(
78 "{output}\n{COMPRESSED_HINT}\n ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
79 )
80}
81
82pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
86 if crate::core::binary_detect::is_binary_file(path) {
87 let msg = crate::core::binary_detect::binary_file_message(path);
88 return Err(std::io::Error::other(msg));
89 }
90
91 {
92 let canonical =
93 crate::core::pathutil::safe_canonicalize_bounded(std::path::Path::new(path), 2000);
94 if let Ok(cwd) = std::env::current_dir() {
95 let root = crate::core::pathutil::safe_canonicalize_bounded(&cwd, 2000);
96 if !canonical.starts_with(&root) {
97 let allow = crate::core::pathjail::allow_paths_from_env_and_config();
98 let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
99 .ok()
100 .is_some_and(|d| canonical.starts_with(d));
101 let tmp_ok = canonical.starts_with(std::env::temp_dir());
102 if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
103 tracing::warn!(
104 "defense-in-depth: path may escape project root: {}",
105 canonical.display()
106 );
107 }
108 }
109 }
110 }
111
112 let cap = crate::core::limits::max_read_bytes();
113
114 let file = open_with_retry(path)?;
115 let meta = file
116 .metadata()
117 .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
118 if meta.len() > cap as u64 {
119 return Err(std::io::Error::other(format!(
120 "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
121 Increase the limit or use a line-range read: mode=\"lines:1-100\"",
122 meta.len(),
123 cap
124 )));
125 }
126
127 use std::io::Read;
128 let mut bytes = Vec::with_capacity(meta.len() as usize);
129 std::io::BufReader::new(file).read_to_end(&mut bytes)?;
130 match String::from_utf8(bytes) {
131 Ok(s) => Ok(s),
132 Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
133 }
134}
135
136fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
140 match open_nofollow(path) {
141 Ok(f) => Ok(f),
142 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
143 std::thread::sleep(std::time::Duration::from_millis(50));
144 open_nofollow(path).map_err(|e| {
145 if e.kind() == std::io::ErrorKind::NotFound {
146 std::io::Error::other(format!(
147 "file not found: {path} — verify the path with ctx_tree or ctx_search"
148 ))
149 } else {
150 e
151 }
152 })
153 }
154 Err(e) => Err(e),
155 }
156}
157
158#[cfg(unix)]
159fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
160 use std::os::unix::fs::OpenOptionsExt;
161 use std::path::Path;
162
163 let p = Path::new(path);
164 if let (Some(parent), Some(filename)) = (p.parent(), p.file_name()) {
169 if parent.exists() {
170 let canonical_parent = crate::core::pathutil::safe_canonicalize_bounded(parent, 2000);
171 let canonical_path = canonical_parent.join(filename);
172 return std::fs::OpenOptions::new()
173 .read(true)
174 .custom_flags(libc::O_NOFOLLOW)
175 .open(&canonical_path);
176 }
177 }
178
179 std::fs::OpenOptions::new()
181 .read(true)
182 .custom_flags(libc::O_NOFOLLOW)
183 .open(path)
184}
185
186#[cfg(not(unix))]
187fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
188 std::fs::File::open(path)
189}
190
191pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
193 handle_with_options(cache, path, mode, false, crp_mode, None)
194}
195
196pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
198 handle_with_options(cache, path, mode, true, crp_mode, None)
199}
200
201pub fn handle_with_task(
203 cache: &mut SessionCache,
204 path: &str,
205 mode: &str,
206 crp_mode: CrpMode,
207 task: Option<&str>,
208) -> String {
209 handle_with_options(cache, path, mode, false, crp_mode, task)
210}
211
212pub fn handle_with_task_resolved(
214 cache: &mut SessionCache,
215 path: &str,
216 mode: &str,
217 crp_mode: CrpMode,
218 task: Option<&str>,
219) -> ReadOutput {
220 handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
221}
222
223pub fn handle_fresh_with_task(
225 cache: &mut SessionCache,
226 path: &str,
227 mode: &str,
228 crp_mode: CrpMode,
229 task: Option<&str>,
230) -> String {
231 handle_with_options(cache, path, mode, true, crp_mode, task)
232}
233
234pub fn handle_fresh_with_task_resolved(
236 cache: &mut SessionCache,
237 path: &str,
238 mode: &str,
239 crp_mode: CrpMode,
240 task: Option<&str>,
241) -> ReadOutput {
242 handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
243}
244
245fn handle_with_options(
246 cache: &mut SessionCache,
247 path: &str,
248 mode: &str,
249 fresh: bool,
250 crp_mode: CrpMode,
251 task: Option<&str>,
252) -> String {
253 handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
254}
255
256fn is_subagent_context() -> bool {
259 static IS_SUBAGENT: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
260 *IS_SUBAGENT.get_or_init(|| {
261 if std::env::var("LEAN_CTX_FORCE_FRESH").is_ok_and(|v| v == "1" || v == "true") {
262 return true;
263 }
264 std::env::var("CURSOR_TASK_ID").is_ok_and(|v| !v.is_empty())
265 })
266}
267
268fn handle_with_options_resolved(
269 cache: &mut SessionCache,
270 path: &str,
271 mode: &str,
272 fresh: bool,
273 crp_mode: CrpMode,
274 task: Option<&str>,
275) -> ReadOutput {
276 let effective_fresh = fresh || is_subagent_context();
277
278 if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
279 bt.next_seq();
280 }
281 let mut result = handle_with_options_inner(cache, path, mode, effective_fresh, crp_mode, task);
282
283 if let Some(entry) = cache.get_mut(path) {
284 entry.last_mode.clone_from(&result.resolved_mode);
285 }
286
287 let dedup_allowed = matches!(
288 result.resolved_mode.as_str(),
289 "map" | "signatures" | "aggressive" | "entropy" | "task"
290 );
291 if dedup_allowed {
292 if let Some(deduped) = cache.apply_dedup(path, &result.content) {
293 let new_tokens = count_tokens(&deduped);
294 if new_tokens < result.output_tokens {
295 result.content = deduped;
296 result.output_tokens = new_tokens;
297 }
298 }
299 }
300
301 if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
302 let original_tokens = cache.get(path).map_or(0, |e| e.original_tokens);
303 bt.record_read(
304 path,
305 &result.resolved_mode,
306 result.output_tokens,
307 original_tokens,
308 );
309 }
310
311 result
312}
313
314fn handle_with_options_inner(
315 cache: &mut SessionCache,
316 path: &str,
317 mode: &str,
318 fresh: bool,
319 crp_mode: CrpMode,
320 task: Option<&str>,
321) -> ReadOutput {
322 let file_ref = cache.get_file_ref(path);
323 let short = protocol::shorten_path(path);
324 let ext = Path::new(path)
325 .extension()
326 .and_then(|e| e.to_str())
327 .unwrap_or("");
328
329 if fresh {
330 if mode == "diff" {
331 let warning = "[warning] fresh+diff is redundant — fresh invalidates cache, no diff possible. Use mode=full with fresh=true instead.";
332 return ReadOutput {
333 content: warning.to_string(),
334 resolved_mode: "diff".into(),
335 output_tokens: count_tokens(warning),
336 };
337 }
338 cache.invalidate(path);
339 }
340
341 if mode == "diff" {
342 let (out, _) = handle_diff(cache, path, &file_ref);
343 let out = crate::core::redaction::redact_text_if_enabled(&out);
344 let sent = count_tokens(&out);
345 return ReadOutput {
346 content: out,
347 resolved_mode: "diff".into(),
348 output_tokens: sent,
349 };
350 }
351
352 if mode != "full" {
353 if let Some(existing) = cache.get(path) {
354 let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
355 if stale {
356 cache.invalidate(path);
357 }
358 }
359 }
360
361 let cache_snapshot = cache.get(path).map(|existing| {
364 (
365 existing.stored_mtime,
366 existing.read_count,
367 existing.line_count,
368 existing.original_tokens,
369 existing.content(),
370 )
371 });
372
373 if let Some((cached_mtime, read_count, line_count, original_tokens, content_opt)) =
374 cache_snapshot
375 {
376 if mode == "full" {
377 let no_deg = crate::core::config::Config::load().no_degrade_effective();
378 let prof = crate::core::profiles::active_profile();
379 let force_full = no_deg
380 || (prof.read.default_mode_effective() == "full"
381 && prof.compression.crp_mode_effective() == "off");
382 let policy_allows_stub =
383 crate::server::compaction_sync::effective_cache_policy() != "safe" && !force_full;
384 if policy_allows_stub
385 && !crate::core::cache::is_cache_entry_stale(path, cached_mtime)
386 && cache.is_full_delivered(path)
387 {
388 cache.record_cache_hit(path);
389 let out = if crate::core::protocol::meta_visible() {
390 format!(
391 "{file_ref}={short} [unchanged {line_count}L]\nUnchanged on disk. Use fresh=true to force re-read.",
392 )
393 } else {
394 let proof = content_opt
395 .as_deref()
396 .and_then(|c| cache_hit_proof_line(c, read_count));
397 let reads_note = if read_count > 3 {
398 format!(" (read {}x)", read_count + 1)
399 } else {
400 String::new()
401 };
402 match proof {
403 Some(p) => format!(
404 "{file_ref}={short} [unchanged {line_count}L{reads_note} | \"{p}\"]"
405 ),
406 None => format!("{file_ref}={short} [unchanged {line_count}L{reads_note}]"),
407 }
408 };
409 let out = crate::core::redaction::redact_text_if_enabled(&out);
410 let sent = count_tokens(&out);
411 return ReadOutput {
412 content: out,
413 resolved_mode: "full".into(),
414 output_tokens: sent,
415 };
416 }
417 let (out, _) = handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
418 let out = crate::core::redaction::redact_text_if_enabled(&out);
419 let sent = count_tokens(&out);
420 return ReadOutput {
421 content: out,
422 resolved_mode: "full".into(),
423 output_tokens: sent,
424 };
425 }
426
427 let resolved_mode = if mode == "auto" {
430 resolve_auto_mode(path, original_tokens, task)
431 } else {
432 mode.to_string()
433 };
434
435 if is_cacheable_mode(&resolved_mode) {
436 let cache_key = compressed_cache_key(&resolved_mode, crp_mode, task);
437 let compressed_hit = cache.get_compressed(path, &cache_key).cloned();
438 if let Some(cached_output) = compressed_hit {
439 cache.record_cache_hit(path);
440 let out = crate::core::redaction::redact_text_if_enabled(&cached_output);
441 let sent = count_tokens(&out);
442 return ReadOutput {
443 content: out,
444 resolved_mode,
445 output_tokens: sent,
446 };
447 }
448 }
449
450 if let Some(content) = content_opt {
451 let (out, _) = process_mode(
452 &content,
453 &resolved_mode,
454 &file_ref,
455 &short,
456 ext,
457 original_tokens,
458 crp_mode,
459 path,
460 task,
461 );
462 if is_cacheable_mode(&resolved_mode) {
463 let cache_key = compressed_cache_key(&resolved_mode, crp_mode, task);
464 cache.set_compressed(path, &cache_key, out.clone());
465 }
466 let out = crate::core::redaction::redact_text_if_enabled(&out);
467 let sent = count_tokens(&out);
468 return ReadOutput {
469 content: out,
470 resolved_mode,
471 output_tokens: sent,
472 };
473 }
474 cache.invalidate(path);
475 }
476
477 let content = match read_file_lossy(path) {
478 Ok(c) => c,
479 Err(e) => {
480 let msg = format!("ERROR: {e}");
481 let tokens = count_tokens(&msg);
482 return ReadOutput {
483 content: msg,
484 resolved_mode: "error".into(),
485 output_tokens: tokens,
486 };
487 }
488 };
489
490 let store_result = cache.store(path, &content);
491
492 let is_line_range = mode.starts_with("lines:");
495 let hints = crate::core::profiles::active_profile().output_hints;
496 let is_repeat_read = store_result.read_count > 1;
497 let similar_hint = if !is_line_range && is_repeat_read && hints.semantic_hint() {
498 find_similar_and_update_semantic_index(path, &content)
499 } else {
500 None
501 };
502 let graph_hint = if !is_line_range && is_repeat_read && hints.related_hint() {
503 build_graph_related_hint(path)
504 } else {
505 None
506 };
507
508 if mode == "full" {
509 cache.mark_full_delivered(path);
510 let (mut output, _) = format_full_output(
511 &file_ref,
512 &short,
513 ext,
514 &content,
515 store_result.original_tokens,
516 store_result.line_count,
517 task,
518 );
519 if let Some(hint) = &graph_hint {
520 output.push_str(&format!("\n{hint}"));
521 }
522 if let Some(hint) = similar_hint {
523 output.push_str(&format!("\n{hint}"));
524 }
525 let output = crate::core::redaction::redact_text_if_enabled(&output);
526 let sent = count_tokens(&output);
527 return ReadOutput {
528 content: output,
529 resolved_mode: "full".into(),
530 output_tokens: sent,
531 };
532 }
533
534 let resolved_mode = if mode == "auto" {
535 resolve_auto_mode(path, store_result.original_tokens, task)
536 } else {
537 mode.to_string()
538 };
539
540 let (mut output, _sent) = process_mode(
541 &content,
542 &resolved_mode,
543 &file_ref,
544 &short,
545 ext,
546 store_result.original_tokens,
547 crp_mode,
548 path,
549 task,
550 );
551 if let Some(hint) = &graph_hint {
552 output.push_str(&format!("\n{hint}"));
553 }
554 if let Some(hint) = similar_hint {
555 output.push_str(&format!("\n{hint}"));
556 }
557 if is_cacheable_mode(&resolved_mode) {
558 let cache_key = compressed_cache_key(&resolved_mode, crp_mode, task);
559 cache.set_compressed(path, &cache_key, output.clone());
560 }
561 let output = crate::core::redaction::redact_text_if_enabled(&output);
562 let final_tokens = count_tokens(&output);
563 ReadOutput {
564 content: output,
565 resolved_mode,
566 output_tokens: final_tokens,
567 }
568}
569
570pub fn is_instruction_file(path: &str) -> bool {
571 let lower = path.to_lowercase();
572 let filename = std::path::Path::new(&lower)
573 .file_name()
574 .and_then(|f| f.to_str())
575 .unwrap_or("");
576
577 matches!(
578 filename,
579 "skill.md"
580 | "agents.md"
581 | "rules.md"
582 | ".cursorrules"
583 | ".clinerules"
584 | "lean-ctx.md"
585 | "lean-ctx.mdc"
586 ) || lower.contains("/skills/")
587 || lower.contains("/.cursor/rules/")
588 || lower.contains("/.claude/rules/")
589 || lower.contains("/agents.md")
590}
591
592fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
594 let ctx = crate::core::auto_mode_resolver::AutoModeContext {
595 path: file_path,
596 token_count: original_tokens,
597 task,
598 cache: None,
599 };
600 crate::core::auto_mode_resolver::resolve(&ctx).mode
601}
602
603fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
604 const MAX_CONTENT_BYTES_FOR_SEMANTIC: usize = 32_768;
605
606 if content.len() > MAX_CONTENT_BYTES_FOR_SEMANTIC {
607 return None;
608 }
609
610 let cfg = crate::core::config::Config::load();
611 let profile = crate::core::config::MemoryProfile::effective(&cfg);
612 if !profile.semantic_cache_enabled() {
613 return None;
614 }
615
616 let project_root = detect_project_root(path);
617 let session_id = format!("{}", std::process::id());
618 let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
619
620 let similar = index.find_similar(content, 0.7);
621 let relevant: Vec<_> = similar
622 .into_iter()
623 .filter(|(p, _)| p != path)
624 .take(3)
625 .collect();
626
627 index.add_file(path, content, &session_id);
628 if let Err(e) = index.save(&project_root) {
629 tracing::warn!("lean-ctx: failed to persist semantic index: {e}");
630 }
631
632 if relevant.is_empty() {
633 return None;
634 }
635
636 let hints: Vec<String> = relevant
637 .iter()
638 .map(|(p, score)| format!(" {p} ({:.0}% similar)", score * 100.0))
639 .collect();
640
641 Some(format!(
642 "[semantic: {} similar file(s) in cache]\n{}",
643 relevant.len(),
644 hints.join("\n")
645 ))
646}
647
648fn detect_project_root(path: &str) -> String {
649 crate::core::protocol::detect_project_root_or_cwd(path)
650}
651
652fn build_graph_related_hint(path: &str) -> Option<String> {
653 let project_root = detect_project_root(path);
654 crate::core::graph_context::build_related_hint(path, &project_root, 5)
655}
656
657const AUTO_DELTA_THRESHOLD: f64 = 0.6;
658
659fn handle_full_with_auto_delta(
661 cache: &mut SessionCache,
662 path: &str,
663 file_ref: &str,
664 short: &str,
665 ext: &str,
666 task: Option<&str>,
667) -> (String, usize) {
668 let _mode_guard = crate::core::savings_footer::ModeGuard::new("full");
669 let Ok(disk_content) = read_file_lossy(path) else {
670 cache.record_cache_hit(path);
671 if let Some(existing) = cache.get(path) {
672 if !crate::core::protocol::meta_visible() {
673 if let Some(cached) = existing.content() {
674 return format_full_output(
675 file_ref,
676 short,
677 ext,
678 &cached,
679 existing.original_tokens,
680 existing.line_count,
681 task,
682 );
683 }
684 }
685 let out = format!(
686 "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
687 existing.read_count, existing.line_count
688 );
689 let sent = count_tokens(&out);
690 return (out, sent);
691 }
692 let out = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
693 format!("[file read failed and no cached version available] {file_ref}={short}")
694 } else {
695 format!("[file read failed and no cached version available] {short}")
696 };
697 let sent = count_tokens(&out);
698 return (out, sent);
699 };
700
701 let no_deg = crate::core::config::Config::load().no_degrade_effective();
702 let prof = crate::core::profiles::active_profile();
703 let force_full = no_deg
704 || (prof.read.default_mode_effective() == "full"
705 && prof.compression.crp_mode_effective() == "off");
706
707 let old_content = cache
708 .get(path)
709 .and_then(crate::core::cache::CacheEntry::content)
710 .unwrap_or_default();
711 let store_result = cache.store(path, &disk_content);
712
713 if store_result.was_hit {
714 let policy_allows_stub =
715 crate::server::compaction_sync::effective_cache_policy() != "safe" && !force_full;
716 if policy_allows_stub && store_result.full_content_delivered {
717 let out = if crate::core::protocol::meta_visible() {
718 format!(
719 "{file_ref}={short} [unchanged {}L]\nUnchanged on disk. Use fresh=true to force re-read.",
720 store_result.line_count
721 )
722 } else {
723 let proof = cache_hit_proof_line(&disk_content, store_result.read_count);
724 let reads_note = if store_result.read_count > 3 {
725 format!(" (read {}x)", store_result.read_count)
726 } else {
727 String::new()
728 };
729 match proof {
730 Some(p) => format!(
731 "{file_ref}={short} [unchanged {}L{reads_note} | \"{p}\"]",
732 store_result.line_count
733 ),
734 None => format!(
735 "{file_ref}={short} [unchanged {}L{reads_note}]",
736 store_result.line_count
737 ),
738 }
739 };
740 let sent = count_tokens(&out);
741 return (out, sent);
742 }
743 cache.mark_full_delivered(path);
744 return format_full_output(
745 file_ref,
746 short,
747 ext,
748 &disk_content,
749 store_result.original_tokens,
750 store_result.line_count,
751 task,
752 );
753 }
754
755 let diff = compressor::diff_content(&old_content, &disk_content);
756 let diff_tokens = count_tokens(&diff);
757 let full_tokens = store_result.original_tokens;
758
759 if !force_full
760 && full_tokens > 0
761 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD)
762 {
763 let savings = protocol::format_savings(full_tokens, diff_tokens);
764 let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
765 format!("{file_ref}={short}")
766 } else {
767 short.to_string()
768 };
769 let out = format!(
770 "{head} [auto-delta] ∆{}L\n{diff}\n{savings}",
771 disk_content.lines().count()
772 );
773 return (out, diff_tokens);
774 }
775
776 format_full_output(
777 file_ref,
778 short,
779 ext,
780 &disk_content,
781 store_result.original_tokens,
782 store_result.line_count,
783 task,
784 )
785}
786
787fn format_full_output(
788 file_ref: &str,
789 short: &str,
790 ext: &str,
791 content: &str,
792 original_tokens: usize,
793 line_count: usize,
794 _task: Option<&str>,
795) -> (String, usize) {
796 let _mode_guard = crate::core::savings_footer::ModeGuard::new("full");
797 let tokens = original_tokens;
798 let metadata = build_header(file_ref, short, ext, content, line_count, true);
799
800 let output = format!("{metadata}\n{content}");
801 let sent = count_tokens(&output);
802 (protocol::append_savings(&output, tokens, sent), sent)
803}
804
805fn build_header(
806 file_ref: &str,
807 short: &str,
808 ext: &str,
809 content: &str,
810 line_count: usize,
811 include_deps: bool,
812) -> String {
813 let mut header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
814 format!("{file_ref}={short} {line_count}L")
815 } else {
816 format!("{short} {line_count}L")
817 };
818
819 if include_deps {
820 let dep_info = deps::extract_deps(content, ext);
821 if !dep_info.imports.is_empty() {
822 let imports_str: Vec<&str> = dep_info
823 .imports
824 .iter()
825 .take(8)
826 .map(std::string::String::as_str)
827 .collect();
828 header.push_str(&format!("\n deps {}", imports_str.join(",")));
829 }
830 if !dep_info.exports.is_empty() {
831 let exports_str: Vec<&str> = dep_info
832 .exports
833 .iter()
834 .take(8)
835 .map(std::string::String::as_str)
836 .collect();
837 header.push_str(&format!("\n exports {}", exports_str.join(",")));
838 }
839 }
840
841 header
842}
843
844#[allow(clippy::too_many_arguments)]
845fn process_mode(
846 content: &str,
847 mode: &str,
848 file_ref: &str,
849 short: &str,
850 ext: &str,
851 original_tokens: usize,
852 crp_mode: CrpMode,
853 file_path: &str,
854 task: Option<&str>,
855) -> (String, usize) {
856 let _mode_guard = crate::core::savings_footer::ModeGuard::new(mode);
857 let line_count = content.lines().count();
858
859 match mode {
860 "auto" => {
861 let chosen = resolve_auto_mode(file_path, original_tokens, task);
862 process_mode(
863 content,
864 &chosen,
865 file_ref,
866 short,
867 ext,
868 original_tokens,
869 crp_mode,
870 file_path,
871 task,
872 )
873 }
874 "full" => format_full_output(
875 file_ref,
876 short,
877 ext,
878 content,
879 original_tokens,
880 line_count,
881 task,
882 ),
883 "signatures" => {
884 let sigs = signatures::extract_signatures(content, ext);
885 let dep_info = deps::extract_deps(content, ext);
886
887 let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
888 format!("{file_ref}={short} {line_count}L")
889 } else {
890 format!("{short} {line_count}L")
891 };
892 if !dep_info.imports.is_empty() {
893 let imports_str: Vec<&str> = dep_info
894 .imports
895 .iter()
896 .take(8)
897 .map(std::string::String::as_str)
898 .collect();
899 output.push_str(&format!("\n deps {}", imports_str.join(",")));
900 }
901 for sig in &sigs {
902 output.push('\n');
903 if crp_mode.is_tdd() {
904 output.push_str(&sig.to_tdd());
905 } else {
906 output.push_str(&sig.to_compact());
907 }
908 }
909 if let Some(body) = task_relevant_body(content, file_path, ext, task) {
910 output.push('\n');
911 output.push_str(&body);
912 }
913 let sent = count_tokens(&output);
914 (
915 append_compressed_hint(
916 &protocol::append_savings(&output, original_tokens, sent),
917 file_path,
918 ),
919 sent,
920 )
921 }
922 "map" => {
923 if ext == "php" {
924 if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
925 {
926 let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
927 format!("{file_ref}={short} {line_count}L\n{php_map}")
928 } else {
929 format!("{short} {line_count}L\n{php_map}")
930 };
931 let sent = count_tokens(&output);
932 let output = protocol::append_savings(&output, original_tokens, sent);
933 return (append_compressed_hint(&output, file_path), sent);
934 }
935 }
936
937 let structured = match ext {
938 "md" | "mdx" | "rst" => {
939 crate::core::structured_read::extract_markdown_outline(content)
940 }
941 "json" => crate::core::structured_read::extract_json_structure(content),
942 "yaml" | "yml" => crate::core::structured_read::extract_yaml_structure(content),
943 "toml" => crate::core::structured_read::extract_toml_structure(content),
944 _ if file_path.to_lowercase().ends_with(".lock")
945 || file_path.to_lowercase().ends_with("go.sum") =>
946 {
947 crate::core::structured_read::extract_lock_summary(content, file_path)
948 }
949 _ => String::new(),
950 };
951
952 if !structured.is_empty() {
953 let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
954 format!("{file_ref}={short} {line_count}L\n{structured}")
955 } else {
956 format!("{short} {line_count}L\n{structured}")
957 };
958 let sent = count_tokens(&output);
959 output = protocol::append_savings(&output, original_tokens, sent);
960 return (append_compressed_hint(&output, file_path), sent);
961 }
962
963 let sigs = signatures::extract_signatures(content, ext);
964 let dep_info = deps::extract_deps(content, ext);
965
966 let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
967 format!("{file_ref}={short} {line_count}L")
968 } else {
969 format!("{short} {line_count}L")
970 };
971
972 if !dep_info.imports.is_empty() {
973 output.push_str("\n deps: ");
974 output.push_str(&dep_info.imports.join(", "));
975 }
976
977 if !dep_info.exports.is_empty() {
978 output.push_str("\n exports: ");
979 output.push_str(&dep_info.exports.join(", "));
980 }
981
982 let key_sigs: Vec<&signatures::Signature> = sigs
983 .iter()
984 .filter(|s| s.is_exported || s.indent == 0)
985 .collect();
986
987 if !key_sigs.is_empty() {
988 output.push_str("\n API:");
989 for sig in &key_sigs {
990 output.push_str("\n ");
991 if crp_mode.is_tdd() {
992 output.push_str(&sig.to_tdd());
993 } else {
994 output.push_str(&sig.to_compact());
995 }
996 }
997 }
998
999 if let Some(body) = task_relevant_body(content, file_path, ext, task) {
1000 output.push('\n');
1001 output.push_str(&body);
1002 }
1003
1004 let sent = count_tokens(&output);
1005 (
1006 append_compressed_hint(
1007 &protocol::append_savings(&output, original_tokens, sent),
1008 file_path,
1009 ),
1010 sent,
1011 )
1012 }
1013 "aggressive" => {
1014 #[cfg(feature = "tree-sitter")]
1015 let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
1016 #[cfg(not(feature = "tree-sitter"))]
1017 let ast_pruned: Option<String> = None;
1018
1019 let base = ast_pruned.as_deref().unwrap_or(content);
1020
1021 let session_intent = crate::core::session::SessionState::load_latest()
1022 .and_then(|s| s.active_structured_intent);
1023 let raw = if let Some(ref intent) = session_intent {
1024 compressor::task_aware_compress(base, Some(ext), intent)
1025 } else {
1026 compressor::aggressive_compress(base, Some(ext))
1027 };
1028 let compressed = compressor::safeguard_ratio(content, &raw);
1029 let header = build_header(file_ref, short, ext, content, line_count, true);
1030
1031 let mut sym = SymbolMap::new();
1032 let idents = symbol_map::extract_identifiers(&compressed, ext);
1033 for ident in &idents {
1034 sym.register(ident);
1035 }
1036
1037 if symbol_map::substitution_enabled() && sym.len() >= 3 {
1038 let sym_table = sym.format_table();
1039 let sym_applied = sym.apply(&compressed);
1040 let orig_tok = count_tokens(&compressed);
1041 let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
1042 let net = orig_tok.saturating_sub(comp_tok);
1043 if orig_tok > 0 && net * 100 / orig_tok >= 5 {
1044 let savings = protocol::format_savings(original_tokens, comp_tok);
1045 return (
1046 append_compressed_hint(
1047 &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
1048 file_path,
1049 ),
1050 comp_tok,
1051 );
1052 }
1053 let savings = protocol::format_savings(original_tokens, orig_tok);
1054 return (
1055 append_compressed_hint(
1056 &format!("{header}\n{compressed}\n{savings}"),
1057 file_path,
1058 ),
1059 orig_tok,
1060 );
1061 }
1062
1063 let sent = count_tokens(&compressed);
1064 let savings = protocol::format_savings(original_tokens, sent);
1065 (
1066 append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
1067 sent,
1068 )
1069 }
1070 "entropy" => {
1071 let result = entropy::entropy_compress_adaptive(content, file_path);
1072 let avg_h = entropy::analyze_entropy(content).avg_entropy;
1073 let header = build_header(file_ref, short, ext, content, line_count, false);
1074 let techs = result.techniques.join(", ");
1075 let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
1076 let sent = count_tokens(&output);
1077 let savings = protocol::format_savings(original_tokens, sent);
1078 let compression_ratio = if original_tokens > 0 {
1079 1.0 - (sent as f64 / original_tokens as f64)
1080 } else {
1081 0.0
1082 };
1083 crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
1084 (
1085 append_compressed_hint(&format!("{output}\n{savings}"), file_path),
1086 sent,
1087 )
1088 }
1089 "task" => {
1090 let task_str = task.unwrap_or("");
1091 if task_str.is_empty() {
1092 let header = build_header(file_ref, short, ext, content, line_count, true);
1093 let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
1094 let sent = count_tokens(&out);
1095 return (out, sent);
1096 }
1097 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
1098 if keywords.is_empty() {
1099 let header = build_header(file_ref, short, ext, content, line_count, true);
1100 let out = format!(
1101 "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
1102 );
1103 let sent = count_tokens(&out);
1104 return (out, sent);
1105 }
1106 let filtered =
1107 crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
1108 let filtered_lines = filtered.lines().count();
1109 let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1110 format!("{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1111 } else {
1112 format!("{short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1113 };
1114 let graph_ctx = if crate::core::profiles::active_profile()
1115 .output_hints
1116 .graph_context_block()
1117 {
1118 let project_root = detect_project_root(file_path);
1119 crate::core::graph_context::build_graph_context(
1120 file_path,
1121 &project_root,
1122 Some(crate::core::graph_context::GraphContextOptions::default()),
1123 )
1124 .map(|c| crate::core::graph_context::format_graph_context(&c))
1125 .unwrap_or_default()
1126 } else {
1127 String::new()
1128 };
1129
1130 let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
1131 let savings = protocol::format_savings(original_tokens, sent);
1132 (
1133 append_compressed_hint(
1134 &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
1135 file_path,
1136 ),
1137 sent,
1138 )
1139 }
1140 "reference" => {
1141 let tok = count_tokens(content);
1142 let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1143 format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})")
1144 } else {
1145 format!("{short}: {line_count} lines, {tok} tok ({ext})")
1146 };
1147 let sent = count_tokens(&output);
1148 let savings = protocol::format_savings(original_tokens, sent);
1149 (format!("{output}\n{savings}"), sent)
1150 }
1151 mode if mode.starts_with("lines:") => {
1152 let range_str = &mode[6..];
1153 let extracted = extract_line_range(content, range_str);
1154 let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1155 format!("{file_ref}={short} {line_count}L lines:{range_str}")
1156 } else {
1157 format!("{short} {line_count}L lines:{range_str}")
1158 };
1159 let sent = count_tokens(&extracted);
1160 let savings = protocol::format_savings(original_tokens, sent);
1161 (format!("{header}\n{extracted}\n{savings}"), sent)
1162 }
1163 unknown => {
1164 let header = build_header(file_ref, short, ext, content, line_count, true);
1165 let out = format!(
1166 "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
1167 );
1168 let sent = count_tokens(&out);
1169 (out, sent)
1170 }
1171 }
1172}
1173
1174fn task_relevant_body(
1182 content: &str,
1183 file_path: &str,
1184 ext: &str,
1185 task: Option<&str>,
1186) -> Option<String> {
1187 const MAX_BODY_LINES: usize = 80;
1188
1189 let task = task.map(str::trim).filter(|t| !t.is_empty())?;
1190 let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task);
1191 if keywords.is_empty() {
1192 return None;
1193 }
1194 let kw_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
1195
1196 let chunks = crate::core::chunks_ts::extract_chunks_ts(file_path, content, ext)?;
1197
1198 let mut best_idx: Option<usize> = None;
1200 let mut best_score = 0u8;
1201 for (i, ch) in chunks.iter().enumerate() {
1202 if ch.symbol_name.is_empty() {
1203 continue;
1204 }
1205 let name_l = ch.symbol_name.to_lowercase();
1206 let substr = kw_lower
1207 .iter()
1208 .any(|k| k.len() >= 3 && (name_l.contains(k.as_str()) || k.contains(name_l.as_str())));
1209 let score = if kw_lower.contains(&name_l) {
1210 2
1211 } else {
1212 u8::from(substr)
1213 };
1214 if score > best_score {
1215 best_score = score;
1216 best_idx = Some(i);
1217 }
1218 }
1219
1220 let ch = &chunks[best_idx?];
1221 let body_lines: Vec<&str> = ch.content.lines().collect();
1222 let total = body_lines.len();
1223 let shown = total.min(MAX_BODY_LINES);
1224 let body: String = body_lines[..shown]
1225 .iter()
1226 .enumerate()
1227 .map(|(i, l)| format!("{:>4}|{l}", ch.start_line + i))
1228 .collect::<Vec<_>>()
1229 .join("\n");
1230 let truncated = if shown < total {
1231 format!(
1232 "\n … +{} lines — ctx_read(mode=\"lines:{}-{}\")",
1233 total - shown,
1234 ch.start_line + shown,
1235 ch.end_line
1236 )
1237 } else {
1238 String::new()
1239 };
1240 Some(format!(
1241 " ▸ body {} L{}-{}:\n{body}{truncated}",
1242 ch.symbol_name, ch.start_line, ch.end_line
1243 ))
1244}
1245
1246fn extract_line_range(content: &str, range_str: &str) -> String {
1247 let lines: Vec<&str> = content.lines().collect();
1248 let total = lines.len();
1249 let mut selected = Vec::new();
1250
1251 for part in range_str.split(',') {
1252 let part = part.trim();
1253 if let Some((start_s, end_s)) = part.split_once('-') {
1254 let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
1255 let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
1256 for i in start..=end {
1257 if i >= 1 && i <= total {
1258 selected.push(format!("{i:>4}| {}", lines[i - 1]));
1259 }
1260 }
1261 } else if let Ok(n) = part.parse::<usize>() {
1262 if n >= 1 && n <= total {
1263 selected.push(format!("{n:>4}| {}", lines[n - 1]));
1264 }
1265 }
1266 }
1267
1268 if selected.is_empty() {
1269 "No lines matched the range.".to_string()
1270 } else {
1271 selected.join("\n")
1272 }
1273}
1274
1275fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
1276 let _mode_guard = crate::core::savings_footer::ModeGuard::new("diff");
1277 let short = protocol::shorten_path(path);
1278 let old_content = cache
1279 .get(path)
1280 .and_then(crate::core::cache::CacheEntry::content);
1281
1282 let new_content = match read_file_lossy(path) {
1283 Ok(c) => c,
1284 Err(e) => {
1285 let msg = format!("ERROR: {e}");
1286 let tokens = count_tokens(&msg);
1287 return (msg, tokens);
1288 }
1289 };
1290
1291 let original_tokens = count_tokens(&new_content);
1292
1293 let diff_output = if let Some(old) = &old_content {
1294 compressor::diff_content(old, &new_content)
1295 } else {
1296 cache.store(path, &new_content);
1299 let msg = format!(
1300 "{file_ref}={short} [no cached version for diff — use mode=full first, then diff on re-read]"
1301 );
1302 let sent = count_tokens(&msg);
1303 return (msg, sent);
1304 };
1305
1306 cache.store(path, &new_content);
1307
1308 let sent = count_tokens(&diff_output);
1309 let savings = protocol::format_savings(original_tokens, sent);
1310 let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1311 format!("{file_ref}={short}")
1312 } else {
1313 short.clone()
1314 };
1315 (format!("{head} [diff]\n{diff_output}\n{savings}"), sent)
1316}
1317
1318#[cfg(test)]
1319mod tests {
1320 use super::*;
1321 use std::time::Duration;
1322
1323 #[test]
1324 fn test_header_toon_format_no_brackets() {
1325 let _lock = crate::core::data_dir::test_env_lock();
1326 std::env::set_var("LEAN_CTX_META", "1");
1327 let content = "use std::io;\nfn main() {}\n";
1328 let header = build_header("F1", "main.rs", "rs", content, 2, false);
1329 assert!(!header.contains('['));
1330 assert!(!header.contains(']'));
1331 assert!(header.contains("F1=main.rs 2L"));
1332 std::env::remove_var("LEAN_CTX_META");
1333 }
1334
1335 #[test]
1336 fn test_header_toon_deps_indented() {
1337 let _lock = crate::core::data_dir::test_env_lock();
1338 std::env::set_var("LEAN_CTX_META", "1");
1339 let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
1340 let header = build_header("F1", "main.rs", "rs", content, 3, true);
1341 if header.contains("deps") {
1342 assert!(
1343 header.contains("\n deps "),
1344 "deps should use indented TOON format"
1345 );
1346 assert!(
1347 !header.contains("deps:["),
1348 "deps should not use bracket format"
1349 );
1350 }
1351 std::env::remove_var("LEAN_CTX_META");
1352 }
1353
1354 #[test]
1355 fn test_header_toon_saves_tokens() {
1356 let _lock = crate::core::data_dir::test_env_lock();
1357 std::env::set_var("LEAN_CTX_META", "1");
1358 let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1359 let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1360 let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1361 let old_tokens = count_tokens(&old_header);
1362 let new_tokens = count_tokens(&new_header);
1363 assert!(
1364 new_tokens <= old_tokens,
1365 "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1366 );
1367 std::env::remove_var("LEAN_CTX_META");
1368 }
1369
1370 #[test]
1371 fn test_tdd_symbols_are_compact() {
1372 let symbols = [
1373 "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1374 ];
1375 for sym in &symbols {
1376 let tok = count_tokens(sym);
1377 assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1378 }
1379 }
1380
1381 #[test]
1382 fn test_task_mode_filters_content() {
1383 let content = (0..200)
1384 .map(|i| {
1385 if i % 20 == 0 {
1386 format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1387 } else {
1388 format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1389 }
1390 })
1391 .collect::<Vec<_>>()
1392 .join("\n");
1393 let full_tokens = count_tokens(&content);
1394 let task = Some("fix bug in validate_token");
1395 let (result, result_tokens) = process_mode(
1396 &content,
1397 "task",
1398 "F1",
1399 "test.rs",
1400 "rs",
1401 full_tokens,
1402 CrpMode::Off,
1403 "test.rs",
1404 task,
1405 );
1406 assert!(
1407 result_tokens < full_tokens,
1408 "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1409 );
1410 assert!(
1411 result.contains("task-filtered"),
1412 "output should contain task-filtered marker"
1413 );
1414 }
1415
1416 #[test]
1417 fn test_task_mode_without_task_returns_full() {
1418 let content = "fn main() {}\nfn helper() {}\n";
1419 let tokens = count_tokens(content);
1420 let (result, _sent) = process_mode(
1421 content,
1422 "task",
1423 "F1",
1424 "test.rs",
1425 "rs",
1426 tokens,
1427 CrpMode::Off,
1428 "test.rs",
1429 None,
1430 );
1431 assert!(
1432 result.contains("no task set"),
1433 "should indicate no task: {result}"
1434 );
1435 }
1436
1437 #[test]
1438 fn test_reference_mode_one_line() {
1439 let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1440 let tokens = count_tokens(content);
1441 let (result, _sent) = process_mode(
1442 content,
1443 "reference",
1444 "F1",
1445 "test.rs",
1446 "rs",
1447 tokens,
1448 CrpMode::Off,
1449 "test.rs",
1450 None,
1451 );
1452 let lines: Vec<&str> = result.lines().collect();
1453 assert!(
1454 lines.len() <= 3,
1455 "reference mode should be very compact, got {} lines",
1456 lines.len()
1457 );
1458 assert!(result.contains("lines"), "should contain line count");
1459 assert!(result.contains("tok"), "should contain token count");
1460 }
1461
1462 #[test]
1463 fn cached_lines_mode_invalidates_on_mtime_change() {
1464 let dir = tempfile::tempdir().unwrap();
1465 let path = dir.path().join("file.txt");
1466 let p = path.to_string_lossy().to_string();
1467
1468 std::fs::write(&path, "one\nsecond\n").unwrap();
1469 let mut cache = SessionCache::new();
1470
1471 let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1472 let l1: Vec<&str> = r1.content.lines().collect();
1473 let got1 = l1.get(1).copied().unwrap_or_default().trim();
1474 let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1475 assert_eq!(got1, "one");
1476
1477 std::thread::sleep(Duration::from_secs(1));
1478 std::fs::write(&path, "two\nsecond\n").unwrap();
1479
1480 let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1481 let l2: Vec<&str> = r2.content.lines().collect();
1482 let got2 = l2.get(1).copied().unwrap_or_default().trim();
1483 let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1484 assert_eq!(got2, "two");
1485 }
1486
1487 #[test]
1488 #[cfg_attr(tarpaulin, ignore)]
1489 fn benchmark_task_conditioned_compression() {
1490 let content = generate_benchmark_code(200);
1492 let full_tokens = count_tokens(&content);
1493 let task = Some("fix authentication in validate_token");
1494
1495 let (_full_output, full_tok) = process_mode(
1496 &content,
1497 "full",
1498 "F1",
1499 "server.rs",
1500 "rs",
1501 full_tokens,
1502 CrpMode::Off,
1503 "server.rs",
1504 task,
1505 );
1506 let (_task_output, task_tok) = process_mode(
1507 &content,
1508 "task",
1509 "F1",
1510 "server.rs",
1511 "rs",
1512 full_tokens,
1513 CrpMode::Off,
1514 "server.rs",
1515 task,
1516 );
1517 let (_sig_output, sig_tok) = process_mode(
1518 &content,
1519 "signatures",
1520 "F1",
1521 "server.rs",
1522 "rs",
1523 full_tokens,
1524 CrpMode::Off,
1525 "server.rs",
1526 task,
1527 );
1528 let (_ref_output, ref_tok) = process_mode(
1529 &content,
1530 "reference",
1531 "F1",
1532 "server.rs",
1533 "rs",
1534 full_tokens,
1535 CrpMode::Off,
1536 "server.rs",
1537 task,
1538 );
1539
1540 eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1541 eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1542 eprintln!(" full: {full_tok:>6} tokens (baseline)");
1543 eprintln!(
1544 " task: {task_tok:>6} tokens ({:.0}% savings)",
1545 (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1546 );
1547 eprintln!(
1548 " signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1549 (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1550 );
1551 eprintln!(
1552 " reference: {ref_tok:>6} tokens ({:.0}% savings)",
1553 (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1554 );
1555 eprintln!("================================================\n");
1556
1557 assert!(task_tok < full_tok, "task mode should save tokens");
1558 assert!(sig_tok < full_tok, "signatures should save tokens");
1559 assert!(ref_tok < sig_tok, "reference should be most compact");
1560 }
1561
1562 fn generate_benchmark_code(lines: usize) -> String {
1563 let mut code = Vec::with_capacity(lines);
1564 code.push("use std::collections::HashMap;".to_string());
1565 code.push("use crate::core::auth;".to_string());
1566 code.push(String::new());
1567 code.push("pub struct Server {".to_string());
1568 code.push(" config: Config,".to_string());
1569 code.push(" cache: HashMap<String, String>,".to_string());
1570 code.push("}".to_string());
1571 code.push(String::new());
1572 code.push("impl Server {".to_string());
1573 code.push(
1574 " pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1575 .to_string(),
1576 );
1577 code.push(" let decoded = auth::decode_jwt(token)?;".to_string());
1578 code.push(" if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1579 code.push(" return Err(AuthError::Expired);".to_string());
1580 code.push(" }".to_string());
1581 code.push(" Ok(decoded.claims)".to_string());
1582 code.push(" }".to_string());
1583 code.push(String::new());
1584
1585 let remaining = lines.saturating_sub(code.len());
1586 for i in 0..remaining {
1587 if i % 30 == 0 {
1588 code.push(format!(
1589 " pub fn handler_{i}(&self, req: Request) -> Response {{"
1590 ));
1591 } else if i % 30 == 29 {
1592 code.push(" }".to_string());
1593 } else {
1594 code.push(format!(" let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1595 }
1596 }
1597 code.push("}".to_string());
1598 code.join("\n")
1599 }
1600
1601 #[test]
1602 fn map_mode_inlines_task_relevant_body() {
1603 let content = "pub fn alpha() {\n let a = 1;\n}\n\npub fn validate_token(t: &str) -> bool {\n let ok = check(t);\n ok\n}\n";
1604 let tokens = count_tokens(content);
1605 let (with_task, _) = process_mode(
1606 content,
1607 "map",
1608 "F1",
1609 "test.rs",
1610 "rs",
1611 tokens,
1612 CrpMode::Off,
1613 "test.rs",
1614 Some("fix bug in validate_token"),
1615 );
1616 assert!(
1617 with_task.contains("▸ body") && with_task.contains("validate_token"),
1618 "map with task should inline the matching body: {with_task}"
1619 );
1620 let (no_task, _) = process_mode(
1621 content,
1622 "map",
1623 "F1",
1624 "test.rs",
1625 "rs",
1626 tokens,
1627 CrpMode::Off,
1628 "test.rs",
1629 None,
1630 );
1631 assert!(
1632 !no_task.contains("▸ body"),
1633 "map without a task must not inline a body: {no_task}"
1634 );
1635 }
1636
1637 #[test]
1638 fn compressed_cache_key_distinguishes_task() {
1639 let no_task = compressed_cache_key("map", CrpMode::Off, None);
1640 let with_task = compressed_cache_key("map", CrpMode::Off, Some("fix login"));
1641 let other_task = compressed_cache_key("map", CrpMode::Off, Some("refactor db"));
1642 assert_eq!(no_task, "map");
1643 assert_ne!(with_task, no_task);
1644 assert_ne!(with_task, other_task);
1645 }
1646
1647 #[test]
1648 fn instruction_file_detection() {
1649 assert!(is_instruction_file(
1650 "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1651 ));
1652 assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1653 assert!(is_instruction_file("/project/AGENTS.md"));
1654 assert!(is_instruction_file("/project/.cursorrules"));
1655 assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1656 assert!(is_instruction_file("/skills/some-skill/README.md"));
1657
1658 assert!(!is_instruction_file("/project/src/main.rs"));
1659 assert!(!is_instruction_file("/project/config.json"));
1660 assert!(!is_instruction_file("/project/data/report.csv"));
1661 }
1662
1663 #[test]
1664 fn resolve_auto_mode_returns_full_for_instruction_files() {
1665 let mode = resolve_auto_mode(
1666 "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1667 5000,
1668 Some("read"),
1669 );
1670 assert_eq!(mode, "full", "SKILL.md must always be read in full");
1671
1672 let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1673 assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1674
1675 let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1676 assert_eq!(mode, "full", ".cursorrules must always be read in full");
1677 }
1678}