Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40    if !crate::core::profiles::active_profile()
41        .output_hints
42        .compressed_hint()
43    {
44        return output.to_string();
45    }
46    format!(
47        "{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
48    )
49}
50
51/// Reads a file as UTF-8 with lossy fallback, enforcing binary detection and max read size limit.
52/// Defense-in-depth: verifies that the canonical path stays within the process's project root
53/// (if determinable) even though callers SHOULD have already jail-checked the path.
54pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
55    if crate::core::binary_detect::is_binary_file(path) {
56        let msg = crate::core::binary_detect::binary_file_message(path);
57        return Err(std::io::Error::other(msg));
58    }
59
60    if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
61        if let Ok(cwd) = std::env::current_dir() {
62            let root = crate::core::pathjail::canonicalize_or_self(&cwd);
63            if !canonical.starts_with(&root) {
64                let allow = crate::core::pathjail::allow_paths_from_env_and_config();
65                let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
66                    .ok()
67                    .is_some_and(|d| canonical.starts_with(d));
68                let tmp_ok = canonical.starts_with(std::env::temp_dir());
69                if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
70                    tracing::warn!(
71                        "defense-in-depth: path may escape project root: {}",
72                        canonical.display()
73                    );
74                }
75            }
76        }
77    }
78
79    let cap = crate::core::limits::max_read_bytes();
80
81    let file = open_with_retry(path)?;
82    let meta = file
83        .metadata()
84        .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
85    if meta.len() > cap as u64 {
86        return Err(std::io::Error::other(format!(
87            "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
88             Increase the limit or use a line-range read: mode=\"lines:1-100\"",
89            meta.len(),
90            cap
91        )));
92    }
93
94    use std::io::Read;
95    let mut bytes = Vec::with_capacity(meta.len() as usize);
96    std::io::BufReader::new(file).read_to_end(&mut bytes)?;
97    match String::from_utf8(bytes) {
98        Ok(s) => Ok(s),
99        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
100    }
101}
102
103/// Opens a file, retrying once after a brief pause on NotFound.
104/// Works around overlay/FUSE stat-cache races in container runtimes (Docker, Codex).
105fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
106    match std::fs::File::open(path) {
107        Ok(f) => Ok(f),
108        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
109            std::thread::sleep(std::time::Duration::from_millis(50));
110            std::fs::File::open(path)
111        }
112        Err(e) => Err(e),
113    }
114}
115
116/// Reads a file through the cache and applies the requested compression mode.
117pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
118    handle_with_options(cache, path, mode, false, crp_mode, None)
119}
120
121/// Like `handle`, but invalidates the cache first to force a fresh disk read.
122pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
123    handle_with_options(cache, path, mode, true, crp_mode, None)
124}
125
126/// Reads a file with task-aware filtering to prioritize task-relevant content.
127pub fn handle_with_task(
128    cache: &mut SessionCache,
129    path: &str,
130    mode: &str,
131    crp_mode: CrpMode,
132    task: Option<&str>,
133) -> String {
134    handle_with_options(cache, path, mode, false, crp_mode, task)
135}
136
137/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
138pub fn handle_with_task_resolved(
139    cache: &mut SessionCache,
140    path: &str,
141    mode: &str,
142    crp_mode: CrpMode,
143    task: Option<&str>,
144) -> ReadOutput {
145    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
146}
147
148/// Fresh read with task-aware filtering (invalidates cache first).
149pub fn handle_fresh_with_task(
150    cache: &mut SessionCache,
151    path: &str,
152    mode: &str,
153    crp_mode: CrpMode,
154    task: Option<&str>,
155) -> String {
156    handle_with_options(cache, path, mode, true, crp_mode, task)
157}
158
159/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
160pub fn handle_fresh_with_task_resolved(
161    cache: &mut SessionCache,
162    path: &str,
163    mode: &str,
164    crp_mode: CrpMode,
165    task: Option<&str>,
166) -> ReadOutput {
167    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
168}
169
170fn handle_with_options(
171    cache: &mut SessionCache,
172    path: &str,
173    mode: &str,
174    fresh: bool,
175    crp_mode: CrpMode,
176    task: Option<&str>,
177) -> String {
178    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
179}
180
181/// Detects if the current execution context is a subagent (forked agent).
182/// Subagents inherit stale parent caches, so force-fresh prevents VERIFY FAIL.
183fn is_subagent_context() -> bool {
184    static IS_SUBAGENT: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
185    *IS_SUBAGENT.get_or_init(|| {
186        if std::env::var("LEAN_CTX_FORCE_FRESH").is_ok_and(|v| v == "1" || v == "true") {
187            return true;
188        }
189        std::env::var("CURSOR_TASK_ID").is_ok_and(|v| !v.is_empty())
190    })
191}
192
193fn handle_with_options_resolved(
194    cache: &mut SessionCache,
195    path: &str,
196    mode: &str,
197    fresh: bool,
198    crp_mode: CrpMode,
199    task: Option<&str>,
200) -> ReadOutput {
201    let effective_fresh = fresh || is_subagent_context();
202
203    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
204        bt.next_seq();
205    }
206    let mut result = handle_with_options_inner(cache, path, mode, effective_fresh, crp_mode, task);
207
208    if let Some(entry) = cache.get_mut(path) {
209        entry.last_mode.clone_from(&result.resolved_mode);
210    }
211
212    let dedup_allowed = matches!(
213        result.resolved_mode.as_str(),
214        "map" | "signatures" | "aggressive" | "entropy" | "task"
215    );
216    if dedup_allowed {
217        if let Some(deduped) = cache.apply_dedup(path, &result.content) {
218            let new_tokens = count_tokens(&deduped);
219            if new_tokens < result.output_tokens {
220                result.content = deduped;
221                result.output_tokens = new_tokens;
222            }
223        }
224    }
225
226    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
227        let original_tokens = cache.get(path).map_or(0, |e| e.original_tokens);
228        bt.record_read(
229            path,
230            &result.resolved_mode,
231            result.output_tokens,
232            original_tokens,
233        );
234    }
235
236    result
237}
238
239fn handle_with_options_inner(
240    cache: &mut SessionCache,
241    path: &str,
242    mode: &str,
243    fresh: bool,
244    crp_mode: CrpMode,
245    task: Option<&str>,
246) -> ReadOutput {
247    let file_ref = cache.get_file_ref(path);
248    let short = protocol::shorten_path(path);
249    let ext = Path::new(path)
250        .extension()
251        .and_then(|e| e.to_str())
252        .unwrap_or("");
253
254    if fresh {
255        if mode == "diff" {
256            let warning = "[warning] fresh+diff is redundant — fresh invalidates cache, no diff possible. Use mode=full with fresh=true instead.";
257            return ReadOutput {
258                content: warning.to_string(),
259                resolved_mode: "diff".into(),
260                output_tokens: count_tokens(warning),
261            };
262        }
263        cache.invalidate(path);
264    }
265
266    if mode == "diff" {
267        let (out, _) = handle_diff(cache, path, &file_ref);
268        let out = crate::core::redaction::redact_text_if_enabled(&out);
269        let sent = count_tokens(&out);
270        return ReadOutput {
271            content: out,
272            resolved_mode: "diff".into(),
273            output_tokens: sent,
274        };
275    }
276
277    if mode != "full" {
278        if let Some(existing) = cache.get(path) {
279            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
280            if stale {
281                cache.invalidate(path);
282            }
283        }
284    }
285
286    if let Some(existing) = cache.get(path) {
287        if mode == "full" {
288            let (out, _) = handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
289            let out = crate::core::redaction::redact_text_if_enabled(&out);
290            let sent = count_tokens(&out);
291            return ReadOutput {
292                content: out,
293                resolved_mode: "full".into(),
294                output_tokens: sent,
295            };
296        }
297        let original_tokens = existing.original_tokens;
298        let content_opt = existing.content();
299        if let Some(content) = content_opt {
300            let resolved_mode = if mode == "auto" {
301                resolve_auto_mode(path, original_tokens, task)
302            } else {
303                mode.to_string()
304            };
305            if is_cacheable_mode(&resolved_mode) {
306                let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
307                if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
308                    let out = crate::core::redaction::redact_text_if_enabled(cached_output);
309                    let sent = count_tokens(&out);
310                    return ReadOutput {
311                        content: out,
312                        resolved_mode,
313                        output_tokens: sent,
314                    };
315                }
316            }
317            let (out, _) = process_mode(
318                &content,
319                &resolved_mode,
320                &file_ref,
321                &short,
322                ext,
323                original_tokens,
324                crp_mode,
325                path,
326                task,
327            );
328            if is_cacheable_mode(&resolved_mode) {
329                let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
330                cache.set_compressed(path, &cache_key, out.clone());
331            }
332            let out = crate::core::redaction::redact_text_if_enabled(&out);
333            let sent = count_tokens(&out);
334            return ReadOutput {
335                content: out,
336                resolved_mode,
337                output_tokens: sent,
338            };
339        }
340        cache.invalidate(path);
341    }
342
343    let content = match read_file_lossy(path) {
344        Ok(c) => c,
345        Err(e) => {
346            let msg = format!("ERROR: {e}");
347            let tokens = count_tokens(&msg);
348            return ReadOutput {
349                content: msg,
350                resolved_mode: "error".into(),
351                output_tokens: tokens,
352            };
353        }
354    };
355
356    // Skip expensive hint computation for line-range reads (fast path)
357    let is_line_range = mode.starts_with("lines:");
358    let hints = crate::core::profiles::active_profile().output_hints;
359    let similar_hint = if !is_line_range && hints.semantic_hint() {
360        find_similar_and_update_semantic_index(path, &content)
361    } else {
362        None
363    };
364    let graph_hint = if !is_line_range && hints.related_hint() {
365        build_graph_related_hint(path)
366    } else {
367        None
368    };
369
370    let store_result = cache.store(path, &content);
371
372    if mode == "full" {
373        cache.mark_full_delivered(path);
374        let (mut output, _) = format_full_output(
375            &file_ref,
376            &short,
377            ext,
378            &content,
379            store_result.original_tokens,
380            store_result.line_count,
381            task,
382        );
383        if let Some(hint) = &graph_hint {
384            output.push_str(&format!("\n{hint}"));
385        }
386        if let Some(hint) = similar_hint {
387            output.push_str(&format!("\n{hint}"));
388        }
389        let output = crate::core::redaction::redact_text_if_enabled(&output);
390        let sent = count_tokens(&output);
391        return ReadOutput {
392            content: output,
393            resolved_mode: "full".into(),
394            output_tokens: sent,
395        };
396    }
397
398    let resolved_mode = if mode == "auto" {
399        resolve_auto_mode(path, store_result.original_tokens, task)
400    } else {
401        mode.to_string()
402    };
403
404    let (mut output, _sent) = process_mode(
405        &content,
406        &resolved_mode,
407        &file_ref,
408        &short,
409        ext,
410        store_result.original_tokens,
411        crp_mode,
412        path,
413        task,
414    );
415    if let Some(hint) = &graph_hint {
416        output.push_str(&format!("\n{hint}"));
417    }
418    if let Some(hint) = similar_hint {
419        output.push_str(&format!("\n{hint}"));
420    }
421    if is_cacheable_mode(&resolved_mode) {
422        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
423        cache.set_compressed(path, &cache_key, output.clone());
424    }
425    let output = crate::core::redaction::redact_text_if_enabled(&output);
426    let final_tokens = count_tokens(&output);
427    ReadOutput {
428        content: output,
429        resolved_mode,
430        output_tokens: final_tokens,
431    }
432}
433
434pub fn is_instruction_file(path: &str) -> bool {
435    let lower = path.to_lowercase();
436    let filename = std::path::Path::new(&lower)
437        .file_name()
438        .and_then(|f| f.to_str())
439        .unwrap_or("");
440
441    matches!(
442        filename,
443        "skill.md"
444            | "agents.md"
445            | "rules.md"
446            | ".cursorrules"
447            | ".clinerules"
448            | "lean-ctx.md"
449            | "lean-ctx.mdc"
450    ) || lower.contains("/skills/")
451        || lower.contains("/.cursor/rules/")
452        || lower.contains("/.claude/rules/")
453        || lower.contains("/agents.md")
454}
455
456fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
457    if is_instruction_file(file_path) {
458        return "full".to_string();
459    }
460
461    if let Ok(bt) = crate::core::bounce_tracker::global().lock() {
462        if bt.should_force_full(file_path) {
463            return "full".to_string();
464        }
465    }
466
467    let intent_query = task.unwrap_or("read");
468    let route = crate::core::intent_router::route_v1(intent_query);
469    let intent_mode = &route.decision.effective_read_mode;
470    if intent_mode != "auto" && intent_mode != "reference" {
471        return intent_mode.clone();
472    }
473
474    // Priority 2: FileSignature-based predictor
475    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
476    let predictor = crate::core::mode_predictor::ModePredictor::new();
477    let mut predicted = predictor
478        .predict_best_mode(&sig)
479        .unwrap_or_else(|| "full".to_string());
480    if predicted == "auto" {
481        predicted = "full".to_string();
482    }
483
484    // Priority 3: Bandit exploration when budget is tight
485    // SAFETY: Bandit NEVER overrides "full" — full is sacred (byte-accurate content needed for edits)
486    if predicted != "full" {
487        if let Some(project_root) =
488            crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
489        {
490            let ext = std::path::Path::new(file_path)
491                .extension()
492                .and_then(|e| e.to_str())
493                .unwrap_or("");
494            let bucket = match original_tokens {
495                0..=2000 => "sm",
496                2001..=10000 => "md",
497                10001..=50000 => "lg",
498                _ => "xl",
499            };
500            let bandit_key = format!("{ext}_{bucket}");
501            let mut store = crate::core::bandit::BanditStore::load(&project_root);
502            let bandit = store.get_or_create(&bandit_key);
503            let arm = bandit.select_arm();
504            if arm.budget_ratio < 0.25 && original_tokens > 2000 {
505                predicted = "aggressive".to_string();
506            }
507        }
508    }
509
510    // Priority 4: Adaptive mode policy
511    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
512    let chosen = policy.choose_auto_mode(task, &predicted);
513
514    if original_tokens > 2000 {
515        if predicted == "map" || predicted == "signatures" {
516            if chosen != "map" && chosen != "signatures" {
517                return predicted;
518            }
519        } else if chosen == "full" && predicted != "full" {
520            return predicted;
521        }
522    }
523
524    chosen
525}
526
527fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
528    const MAX_CONTENT_BYTES_FOR_SEMANTIC: usize = 32_768;
529
530    if content.len() > MAX_CONTENT_BYTES_FOR_SEMANTIC {
531        return None;
532    }
533
534    let cfg = crate::core::config::Config::load();
535    let profile = crate::core::config::MemoryProfile::effective(&cfg);
536    if !profile.semantic_cache_enabled() {
537        return None;
538    }
539
540    let project_root = detect_project_root(path);
541    let session_id = format!("{}", std::process::id());
542    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
543
544    let similar = index.find_similar(content, 0.7);
545    let relevant: Vec<_> = similar
546        .into_iter()
547        .filter(|(p, _)| p != path)
548        .take(3)
549        .collect();
550
551    index.add_file(path, content, &session_id);
552    let _ = index.save(&project_root);
553
554    if relevant.is_empty() {
555        return None;
556    }
557
558    let hints: Vec<String> = relevant
559        .iter()
560        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
561        .collect();
562
563    Some(format!(
564        "[semantic: {} similar file(s) in cache]\n{}",
565        relevant.len(),
566        hints.join("\n")
567    ))
568}
569
570fn detect_project_root(path: &str) -> String {
571    crate::core::protocol::detect_project_root_or_cwd(path)
572}
573
574fn build_graph_related_hint(path: &str) -> Option<String> {
575    let project_root = detect_project_root(path);
576    crate::core::graph_context::build_related_hint(path, &project_root, 5)
577}
578
579const AUTO_DELTA_THRESHOLD: f64 = 0.6;
580
581/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
582fn handle_full_with_auto_delta(
583    cache: &mut SessionCache,
584    path: &str,
585    file_ref: &str,
586    short: &str,
587    ext: &str,
588    task: Option<&str>,
589) -> (String, usize) {
590    let Ok(disk_content) = read_file_lossy(path) else {
591        cache.record_cache_hit(path);
592        if let Some(existing) = cache.get(path) {
593            if !crate::core::protocol::meta_visible() {
594                if let Some(cached) = existing.content() {
595                    return format_full_output(
596                        file_ref,
597                        short,
598                        ext,
599                        &cached,
600                        existing.original_tokens,
601                        existing.line_count,
602                        task,
603                    );
604                }
605            }
606            let out = format!(
607                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
608                existing.read_count, existing.line_count
609            );
610            let sent = count_tokens(&out);
611            return (out, sent);
612        }
613        let out = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
614            format!("[file read failed and no cached version available] {file_ref}={short}")
615        } else {
616            format!("[file read failed and no cached version available] {short}")
617        };
618        let sent = count_tokens(&out);
619        return (out, sent);
620    };
621
622    let old_content = cache
623        .get(path)
624        .and_then(crate::core::cache::CacheEntry::content)
625        .unwrap_or_default();
626    let store_result = cache.store(path, &disk_content);
627
628    if store_result.was_hit {
629        if store_result.full_content_delivered {
630            let out = if crate::core::protocol::meta_visible() {
631                format!(
632                    "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
633                    store_result.read_count, store_result.line_count
634                )
635            } else {
636                format!(
637                    "{file_ref}={short} [unchanged, {}L, use cached context]",
638                    store_result.line_count
639                )
640            };
641            let sent = count_tokens(&out);
642            return (out, sent);
643        }
644        cache.mark_full_delivered(path);
645        return format_full_output(
646            file_ref,
647            short,
648            ext,
649            &disk_content,
650            store_result.original_tokens,
651            store_result.line_count,
652            task,
653        );
654    }
655
656    let diff = compressor::diff_content(&old_content, &disk_content);
657    let diff_tokens = count_tokens(&diff);
658    let full_tokens = store_result.original_tokens;
659
660    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
661        let savings = protocol::format_savings(full_tokens, diff_tokens);
662        let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
663            format!("{file_ref}={short}")
664        } else {
665            short.to_string()
666        };
667        let out = format!(
668            "{head} [auto-delta] ∆{}L\n{diff}\n{savings}",
669            disk_content.lines().count()
670        );
671        return (out, diff_tokens);
672    }
673
674    format_full_output(
675        file_ref,
676        short,
677        ext,
678        &disk_content,
679        store_result.original_tokens,
680        store_result.line_count,
681        task,
682    )
683}
684
685fn format_full_output(
686    file_ref: &str,
687    short: &str,
688    ext: &str,
689    content: &str,
690    original_tokens: usize,
691    line_count: usize,
692    _task: Option<&str>,
693) -> (String, usize) {
694    let tokens = original_tokens;
695    let metadata = build_header(file_ref, short, ext, content, line_count, true);
696
697    let output = format!("{metadata}\n{content}");
698    let sent = count_tokens(&output);
699    (protocol::append_savings(&output, tokens, sent), sent)
700}
701
702fn build_header(
703    file_ref: &str,
704    short: &str,
705    ext: &str,
706    content: &str,
707    line_count: usize,
708    include_deps: bool,
709) -> String {
710    let mut header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
711        format!("{file_ref}={short} {line_count}L")
712    } else {
713        format!("{short} {line_count}L")
714    };
715
716    if include_deps {
717        let dep_info = deps::extract_deps(content, ext);
718        if !dep_info.imports.is_empty() {
719            let imports_str: Vec<&str> = dep_info
720                .imports
721                .iter()
722                .take(8)
723                .map(std::string::String::as_str)
724                .collect();
725            header.push_str(&format!("\n deps {}", imports_str.join(",")));
726        }
727        if !dep_info.exports.is_empty() {
728            let exports_str: Vec<&str> = dep_info
729                .exports
730                .iter()
731                .take(8)
732                .map(std::string::String::as_str)
733                .collect();
734            header.push_str(&format!("\n exports {}", exports_str.join(",")));
735        }
736    }
737
738    header
739}
740
741#[allow(clippy::too_many_arguments)]
742fn process_mode(
743    content: &str,
744    mode: &str,
745    file_ref: &str,
746    short: &str,
747    ext: &str,
748    original_tokens: usize,
749    crp_mode: CrpMode,
750    file_path: &str,
751    task: Option<&str>,
752) -> (String, usize) {
753    let line_count = content.lines().count();
754
755    match mode {
756        "auto" => {
757            let chosen = resolve_auto_mode(file_path, original_tokens, task);
758            process_mode(
759                content,
760                &chosen,
761                file_ref,
762                short,
763                ext,
764                original_tokens,
765                crp_mode,
766                file_path,
767                task,
768            )
769        }
770        "full" => format_full_output(
771            file_ref,
772            short,
773            ext,
774            content,
775            original_tokens,
776            line_count,
777            task,
778        ),
779        "signatures" => {
780            let sigs = signatures::extract_signatures(content, ext);
781            let dep_info = deps::extract_deps(content, ext);
782
783            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
784                format!("{file_ref}={short} {line_count}L")
785            } else {
786                format!("{short} {line_count}L")
787            };
788            if !dep_info.imports.is_empty() {
789                let imports_str: Vec<&str> = dep_info
790                    .imports
791                    .iter()
792                    .take(8)
793                    .map(std::string::String::as_str)
794                    .collect();
795                output.push_str(&format!("\n deps {}", imports_str.join(",")));
796            }
797            for sig in &sigs {
798                output.push('\n');
799                if crp_mode.is_tdd() {
800                    output.push_str(&sig.to_tdd());
801                } else {
802                    output.push_str(&sig.to_compact());
803                }
804            }
805            let sent = count_tokens(&output);
806            (
807                append_compressed_hint(
808                    &protocol::append_savings(&output, original_tokens, sent),
809                    file_path,
810                ),
811                sent,
812            )
813        }
814        "map" => {
815            if ext == "php" {
816                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
817                {
818                    let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
819                        format!("{file_ref}={short} {line_count}L\n{php_map}")
820                    } else {
821                        format!("{short} {line_count}L\n{php_map}")
822                    };
823                    let sent = count_tokens(&output);
824                    let output = protocol::append_savings(&output, original_tokens, sent);
825                    return (append_compressed_hint(&output, file_path), sent);
826                }
827            }
828
829            let sigs = signatures::extract_signatures(content, ext);
830            let dep_info = deps::extract_deps(content, ext);
831
832            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
833                format!("{file_ref}={short} {line_count}L")
834            } else {
835                format!("{short} {line_count}L")
836            };
837
838            if !dep_info.imports.is_empty() {
839                output.push_str("\n  deps: ");
840                output.push_str(&dep_info.imports.join(", "));
841            }
842
843            if !dep_info.exports.is_empty() {
844                output.push_str("\n  exports: ");
845                output.push_str(&dep_info.exports.join(", "));
846            }
847
848            let key_sigs: Vec<&signatures::Signature> = sigs
849                .iter()
850                .filter(|s| s.is_exported || s.indent == 0)
851                .collect();
852
853            if !key_sigs.is_empty() {
854                output.push_str("\n  API:");
855                for sig in &key_sigs {
856                    output.push_str("\n    ");
857                    if crp_mode.is_tdd() {
858                        output.push_str(&sig.to_tdd());
859                    } else {
860                        output.push_str(&sig.to_compact());
861                    }
862                }
863            }
864
865            let sent = count_tokens(&output);
866            (
867                append_compressed_hint(
868                    &protocol::append_savings(&output, original_tokens, sent),
869                    file_path,
870                ),
871                sent,
872            )
873        }
874        "aggressive" => {
875            #[cfg(feature = "tree-sitter")]
876            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
877            #[cfg(not(feature = "tree-sitter"))]
878            let ast_pruned: Option<String> = None;
879
880            let base = ast_pruned.as_deref().unwrap_or(content);
881
882            let session_intent = crate::core::session::SessionState::load_latest()
883                .and_then(|s| s.active_structured_intent);
884            let raw = if let Some(ref intent) = session_intent {
885                compressor::task_aware_compress(base, Some(ext), intent)
886            } else {
887                compressor::aggressive_compress(base, Some(ext))
888            };
889            let compressed = compressor::safeguard_ratio(content, &raw);
890            let header = build_header(file_ref, short, ext, content, line_count, true);
891
892            let mut sym = SymbolMap::new();
893            let idents = symbol_map::extract_identifiers(&compressed, ext);
894            for ident in &idents {
895                sym.register(ident);
896            }
897
898            if sym.len() >= 3 {
899                let sym_table = sym.format_table();
900                let sym_applied = sym.apply(&compressed);
901                let orig_tok = count_tokens(&compressed);
902                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
903                let net = orig_tok.saturating_sub(comp_tok);
904                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
905                    let savings = protocol::format_savings(original_tokens, comp_tok);
906                    return (
907                        append_compressed_hint(
908                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
909                            file_path,
910                        ),
911                        comp_tok,
912                    );
913                }
914                let savings = protocol::format_savings(original_tokens, orig_tok);
915                return (
916                    append_compressed_hint(
917                        &format!("{header}\n{compressed}\n{savings}"),
918                        file_path,
919                    ),
920                    orig_tok,
921                );
922            }
923
924            let sent = count_tokens(&compressed);
925            let savings = protocol::format_savings(original_tokens, sent);
926            (
927                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
928                sent,
929            )
930        }
931        "entropy" => {
932            let result = entropy::entropy_compress_adaptive(content, file_path);
933            let avg_h = entropy::analyze_entropy(content).avg_entropy;
934            let header = build_header(file_ref, short, ext, content, line_count, false);
935            let techs = result.techniques.join(", ");
936            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
937            let sent = count_tokens(&output);
938            let savings = protocol::format_savings(original_tokens, sent);
939            let compression_ratio = if original_tokens > 0 {
940                1.0 - (sent as f64 / original_tokens as f64)
941            } else {
942                0.0
943            };
944            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
945            (
946                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
947                sent,
948            )
949        }
950        "task" => {
951            let task_str = task.unwrap_or("");
952            if task_str.is_empty() {
953                let header = build_header(file_ref, short, ext, content, line_count, true);
954                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
955                let sent = count_tokens(&out);
956                return (out, sent);
957            }
958            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
959            if keywords.is_empty() {
960                let header = build_header(file_ref, short, ext, content, line_count, true);
961                let out = format!(
962                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
963                );
964                let sent = count_tokens(&out);
965                return (out, sent);
966            }
967            let filtered =
968                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
969            let filtered_lines = filtered.lines().count();
970            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
971                format!("{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
972            } else {
973                format!("{short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
974            };
975            let graph_ctx = if crate::core::profiles::active_profile()
976                .output_hints
977                .graph_context_block()
978            {
979                let project_root = detect_project_root(file_path);
980                crate::core::graph_context::build_graph_context(
981                    file_path,
982                    &project_root,
983                    Some(crate::core::graph_context::GraphContextOptions::default()),
984                )
985                .map(|c| crate::core::graph_context::format_graph_context(&c))
986                .unwrap_or_default()
987            } else {
988                String::new()
989            };
990
991            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
992            let savings = protocol::format_savings(original_tokens, sent);
993            (
994                append_compressed_hint(
995                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
996                    file_path,
997                ),
998                sent,
999            )
1000        }
1001        "reference" => {
1002            let tok = count_tokens(content);
1003            let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1004                format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})")
1005            } else {
1006                format!("{short}: {line_count} lines, {tok} tok ({ext})")
1007            };
1008            let sent = count_tokens(&output);
1009            let savings = protocol::format_savings(original_tokens, sent);
1010            (format!("{output}\n{savings}"), sent)
1011        }
1012        mode if mode.starts_with("lines:") => {
1013            let range_str = &mode[6..];
1014            let extracted = extract_line_range(content, range_str);
1015            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1016                format!("{file_ref}={short} {line_count}L lines:{range_str}")
1017            } else {
1018                format!("{short} {line_count}L lines:{range_str}")
1019            };
1020            let sent = count_tokens(&extracted);
1021            let savings = protocol::format_savings(original_tokens, sent);
1022            (format!("{header}\n{extracted}\n{savings}"), sent)
1023        }
1024        unknown => {
1025            let header = build_header(file_ref, short, ext, content, line_count, true);
1026            let out = format!(
1027                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
1028            );
1029            let sent = count_tokens(&out);
1030            (out, sent)
1031        }
1032    }
1033}
1034
1035fn extract_line_range(content: &str, range_str: &str) -> String {
1036    let lines: Vec<&str> = content.lines().collect();
1037    let total = lines.len();
1038    let mut selected = Vec::new();
1039
1040    for part in range_str.split(',') {
1041        let part = part.trim();
1042        if let Some((start_s, end_s)) = part.split_once('-') {
1043            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
1044            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
1045            for i in start..=end {
1046                if i >= 1 && i <= total {
1047                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
1048                }
1049            }
1050        } else if let Ok(n) = part.parse::<usize>() {
1051            if n >= 1 && n <= total {
1052                selected.push(format!("{n:>4}| {}", lines[n - 1]));
1053            }
1054        }
1055    }
1056
1057    if selected.is_empty() {
1058        "No lines matched the range.".to_string()
1059    } else {
1060        selected.join("\n")
1061    }
1062}
1063
1064fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
1065    let short = protocol::shorten_path(path);
1066    let old_content = cache
1067        .get(path)
1068        .and_then(crate::core::cache::CacheEntry::content);
1069
1070    let new_content = match read_file_lossy(path) {
1071        Ok(c) => c,
1072        Err(e) => {
1073            let msg = format!("ERROR: {e}");
1074            let tokens = count_tokens(&msg);
1075            return (msg, tokens);
1076        }
1077    };
1078
1079    let original_tokens = count_tokens(&new_content);
1080
1081    let diff_output = if let Some(old) = &old_content {
1082        compressor::diff_content(old, &new_content)
1083    } else {
1084        format!("[first read]\n{new_content}")
1085    };
1086
1087    cache.store(path, &new_content);
1088
1089    let sent = count_tokens(&diff_output);
1090    let savings = protocol::format_savings(original_tokens, sent);
1091    let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1092        format!("{file_ref}={short}")
1093    } else {
1094        short.clone()
1095    };
1096    (format!("{head} [diff]\n{diff_output}\n{savings}"), sent)
1097}
1098
1099#[cfg(test)]
1100mod tests {
1101    use super::*;
1102    use std::time::Duration;
1103
1104    #[test]
1105    fn test_header_toon_format_no_brackets() {
1106        let _lock = crate::core::data_dir::test_env_lock();
1107        std::env::set_var("LEAN_CTX_META", "1");
1108        let content = "use std::io;\nfn main() {}\n";
1109        let header = build_header("F1", "main.rs", "rs", content, 2, false);
1110        assert!(!header.contains('['));
1111        assert!(!header.contains(']'));
1112        assert!(header.contains("F1=main.rs 2L"));
1113        std::env::remove_var("LEAN_CTX_META");
1114    }
1115
1116    #[test]
1117    fn test_header_toon_deps_indented() {
1118        let _lock = crate::core::data_dir::test_env_lock();
1119        std::env::set_var("LEAN_CTX_META", "1");
1120        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
1121        let header = build_header("F1", "main.rs", "rs", content, 3, true);
1122        if header.contains("deps") {
1123            assert!(
1124                header.contains("\n deps "),
1125                "deps should use indented TOON format"
1126            );
1127            assert!(
1128                !header.contains("deps:["),
1129                "deps should not use bracket format"
1130            );
1131        }
1132        std::env::remove_var("LEAN_CTX_META");
1133    }
1134
1135    #[test]
1136    fn test_header_toon_saves_tokens() {
1137        let _lock = crate::core::data_dir::test_env_lock();
1138        std::env::set_var("LEAN_CTX_META", "1");
1139        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1140        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1141        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1142        let old_tokens = count_tokens(&old_header);
1143        let new_tokens = count_tokens(&new_header);
1144        assert!(
1145            new_tokens <= old_tokens,
1146            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1147        );
1148        std::env::remove_var("LEAN_CTX_META");
1149    }
1150
1151    #[test]
1152    fn test_tdd_symbols_are_compact() {
1153        let symbols = [
1154            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1155        ];
1156        for sym in &symbols {
1157            let tok = count_tokens(sym);
1158            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1159        }
1160    }
1161
1162    #[test]
1163    fn test_task_mode_filters_content() {
1164        let content = (0..200)
1165            .map(|i| {
1166                if i % 20 == 0 {
1167                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1168                } else {
1169                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1170                }
1171            })
1172            .collect::<Vec<_>>()
1173            .join("\n");
1174        let full_tokens = count_tokens(&content);
1175        let task = Some("fix bug in validate_token");
1176        let (result, result_tokens) = process_mode(
1177            &content,
1178            "task",
1179            "F1",
1180            "test.rs",
1181            "rs",
1182            full_tokens,
1183            CrpMode::Off,
1184            "test.rs",
1185            task,
1186        );
1187        assert!(
1188            result_tokens < full_tokens,
1189            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1190        );
1191        assert!(
1192            result.contains("task-filtered"),
1193            "output should contain task-filtered marker"
1194        );
1195    }
1196
1197    #[test]
1198    fn test_task_mode_without_task_returns_full() {
1199        let content = "fn main() {}\nfn helper() {}\n";
1200        let tokens = count_tokens(content);
1201        let (result, _sent) = process_mode(
1202            content,
1203            "task",
1204            "F1",
1205            "test.rs",
1206            "rs",
1207            tokens,
1208            CrpMode::Off,
1209            "test.rs",
1210            None,
1211        );
1212        assert!(
1213            result.contains("no task set"),
1214            "should indicate no task: {result}"
1215        );
1216    }
1217
1218    #[test]
1219    fn test_reference_mode_one_line() {
1220        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1221        let tokens = count_tokens(content);
1222        let (result, _sent) = process_mode(
1223            content,
1224            "reference",
1225            "F1",
1226            "test.rs",
1227            "rs",
1228            tokens,
1229            CrpMode::Off,
1230            "test.rs",
1231            None,
1232        );
1233        let lines: Vec<&str> = result.lines().collect();
1234        assert!(
1235            lines.len() <= 3,
1236            "reference mode should be very compact, got {} lines",
1237            lines.len()
1238        );
1239        assert!(result.contains("lines"), "should contain line count");
1240        assert!(result.contains("tok"), "should contain token count");
1241    }
1242
1243    #[test]
1244    fn cached_lines_mode_invalidates_on_mtime_change() {
1245        let dir = tempfile::tempdir().unwrap();
1246        let path = dir.path().join("file.txt");
1247        let p = path.to_string_lossy().to_string();
1248
1249        std::fs::write(&path, "one\nsecond\n").unwrap();
1250        let mut cache = SessionCache::new();
1251
1252        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1253        let l1: Vec<&str> = r1.content.lines().collect();
1254        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1255        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1256        assert_eq!(got1, "one");
1257
1258        std::thread::sleep(Duration::from_secs(1));
1259        std::fs::write(&path, "two\nsecond\n").unwrap();
1260
1261        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1262        let l2: Vec<&str> = r2.content.lines().collect();
1263        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1264        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1265        assert_eq!(got2, "two");
1266    }
1267
1268    #[test]
1269    #[cfg_attr(tarpaulin, ignore)]
1270    fn benchmark_task_conditioned_compression() {
1271        // Keep this reasonably small so CI coverage instrumentation stays fast.
1272        let content = generate_benchmark_code(200);
1273        let full_tokens = count_tokens(&content);
1274        let task = Some("fix authentication in validate_token");
1275
1276        let (_full_output, full_tok) = process_mode(
1277            &content,
1278            "full",
1279            "F1",
1280            "server.rs",
1281            "rs",
1282            full_tokens,
1283            CrpMode::Off,
1284            "server.rs",
1285            task,
1286        );
1287        let (_task_output, task_tok) = process_mode(
1288            &content,
1289            "task",
1290            "F1",
1291            "server.rs",
1292            "rs",
1293            full_tokens,
1294            CrpMode::Off,
1295            "server.rs",
1296            task,
1297        );
1298        let (_sig_output, sig_tok) = process_mode(
1299            &content,
1300            "signatures",
1301            "F1",
1302            "server.rs",
1303            "rs",
1304            full_tokens,
1305            CrpMode::Off,
1306            "server.rs",
1307            task,
1308        );
1309        let (_ref_output, ref_tok) = process_mode(
1310            &content,
1311            "reference",
1312            "F1",
1313            "server.rs",
1314            "rs",
1315            full_tokens,
1316            CrpMode::Off,
1317            "server.rs",
1318            task,
1319        );
1320
1321        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1322        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1323        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1324        eprintln!(
1325            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1326            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1327        );
1328        eprintln!(
1329            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1330            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1331        );
1332        eprintln!(
1333            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1334            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1335        );
1336        eprintln!("================================================\n");
1337
1338        assert!(task_tok < full_tok, "task mode should save tokens");
1339        assert!(sig_tok < full_tok, "signatures should save tokens");
1340        assert!(ref_tok < sig_tok, "reference should be most compact");
1341    }
1342
1343    fn generate_benchmark_code(lines: usize) -> String {
1344        let mut code = Vec::with_capacity(lines);
1345        code.push("use std::collections::HashMap;".to_string());
1346        code.push("use crate::core::auth;".to_string());
1347        code.push(String::new());
1348        code.push("pub struct Server {".to_string());
1349        code.push("    config: Config,".to_string());
1350        code.push("    cache: HashMap<String, String>,".to_string());
1351        code.push("}".to_string());
1352        code.push(String::new());
1353        code.push("impl Server {".to_string());
1354        code.push(
1355            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1356                .to_string(),
1357        );
1358        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1359        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1360        code.push("            return Err(AuthError::Expired);".to_string());
1361        code.push("        }".to_string());
1362        code.push("        Ok(decoded.claims)".to_string());
1363        code.push("    }".to_string());
1364        code.push(String::new());
1365
1366        let remaining = lines.saturating_sub(code.len());
1367        for i in 0..remaining {
1368            if i % 30 == 0 {
1369                code.push(format!(
1370                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1371                ));
1372            } else if i % 30 == 29 {
1373                code.push("    }".to_string());
1374            } else {
1375                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1376            }
1377        }
1378        code.push("}".to_string());
1379        code.join("\n")
1380    }
1381
1382    #[test]
1383    fn instruction_file_detection() {
1384        assert!(is_instruction_file(
1385            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1386        ));
1387        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1388        assert!(is_instruction_file("/project/AGENTS.md"));
1389        assert!(is_instruction_file("/project/.cursorrules"));
1390        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1391        assert!(is_instruction_file("/skills/some-skill/README.md"));
1392
1393        assert!(!is_instruction_file("/project/src/main.rs"));
1394        assert!(!is_instruction_file("/project/config.json"));
1395        assert!(!is_instruction_file("/project/data/report.csv"));
1396    }
1397
1398    #[test]
1399    fn resolve_auto_mode_returns_full_for_instruction_files() {
1400        let mode = resolve_auto_mode(
1401            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1402            5000,
1403            Some("read"),
1404        );
1405        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1406
1407        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1408        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1409
1410        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1411        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1412    }
1413}