Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40    format!(
41        "{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
42    )
43}
44
45/// Reads a file as UTF-8 with lossy fallback, enforcing binary detection and max read size limit.
46/// Defense-in-depth: verifies that the canonical path stays within the process's project root
47/// (if determinable) even though callers SHOULD have already jail-checked the path.
48pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
49    if crate::core::binary_detect::is_binary_file(path) {
50        let msg = crate::core::binary_detect::binary_file_message(path);
51        return Err(std::io::Error::other(msg));
52    }
53
54    if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
55        if let Ok(cwd) = std::env::current_dir() {
56            let root = crate::core::pathjail::canonicalize_or_self(&cwd);
57            if !canonical.starts_with(&root) {
58                let allow = crate::core::pathjail::allow_paths_from_env_and_config();
59                let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
60                    .ok()
61                    .is_some_and(|d| canonical.starts_with(d));
62                let tmp_ok = canonical.starts_with(std::env::temp_dir());
63                if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
64                    tracing::warn!(
65                        "defense-in-depth: path may escape project root: {}",
66                        canonical.display()
67                    );
68                }
69            }
70        }
71    }
72
73    let cap = crate::core::limits::max_read_bytes();
74
75    let file = open_with_retry(path)?;
76    let meta = file
77        .metadata()
78        .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
79    if meta.len() > cap as u64 {
80        return Err(std::io::Error::other(format!(
81            "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
82             Increase the limit or use a line-range read: mode=\"lines:1-100\"",
83            meta.len(),
84            cap
85        )));
86    }
87
88    use std::io::Read;
89    let mut bytes = Vec::with_capacity(meta.len() as usize);
90    std::io::BufReader::new(file).read_to_end(&mut bytes)?;
91    match String::from_utf8(bytes) {
92        Ok(s) => Ok(s),
93        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
94    }
95}
96
97/// Opens a file, retrying once after a brief pause on NotFound.
98/// Works around overlay/FUSE stat-cache races in container runtimes (Docker, Codex).
99fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
100    match std::fs::File::open(path) {
101        Ok(f) => Ok(f),
102        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
103            std::thread::sleep(std::time::Duration::from_millis(50));
104            std::fs::File::open(path)
105        }
106        Err(e) => Err(e),
107    }
108}
109
110/// Reads a file through the cache and applies the requested compression mode.
111pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
112    handle_with_options(cache, path, mode, false, crp_mode, None)
113}
114
115/// Like `handle`, but invalidates the cache first to force a fresh disk read.
116pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
117    handle_with_options(cache, path, mode, true, crp_mode, None)
118}
119
120/// Reads a file with task-aware filtering to prioritize task-relevant content.
121pub fn handle_with_task(
122    cache: &mut SessionCache,
123    path: &str,
124    mode: &str,
125    crp_mode: CrpMode,
126    task: Option<&str>,
127) -> String {
128    handle_with_options(cache, path, mode, false, crp_mode, task)
129}
130
131/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
132pub fn handle_with_task_resolved(
133    cache: &mut SessionCache,
134    path: &str,
135    mode: &str,
136    crp_mode: CrpMode,
137    task: Option<&str>,
138) -> ReadOutput {
139    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
140}
141
142/// Fresh read with task-aware filtering (invalidates cache first).
143pub fn handle_fresh_with_task(
144    cache: &mut SessionCache,
145    path: &str,
146    mode: &str,
147    crp_mode: CrpMode,
148    task: Option<&str>,
149) -> String {
150    handle_with_options(cache, path, mode, true, crp_mode, task)
151}
152
153/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
154pub fn handle_fresh_with_task_resolved(
155    cache: &mut SessionCache,
156    path: &str,
157    mode: &str,
158    crp_mode: CrpMode,
159    task: Option<&str>,
160) -> ReadOutput {
161    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
162}
163
164fn handle_with_options(
165    cache: &mut SessionCache,
166    path: &str,
167    mode: &str,
168    fresh: bool,
169    crp_mode: CrpMode,
170    task: Option<&str>,
171) -> String {
172    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
173}
174
175fn handle_with_options_resolved(
176    cache: &mut SessionCache,
177    path: &str,
178    mode: &str,
179    fresh: bool,
180    crp_mode: CrpMode,
181    task: Option<&str>,
182) -> ReadOutput {
183    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
184        bt.next_seq();
185    }
186    let mut result = handle_with_options_inner(cache, path, mode, fresh, crp_mode, task);
187
188    if result.resolved_mode != "full" && result.resolved_mode != "diff" {
189        if let Some(deduped) = cache.apply_dedup(path, &result.content) {
190            let new_tokens = count_tokens(&deduped);
191            if new_tokens < result.output_tokens {
192                result.content = deduped;
193                result.output_tokens = new_tokens;
194            }
195        }
196    }
197
198    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
199        let original_tokens = cache.get(path).map_or(0, |e| e.original_tokens);
200        bt.record_read(
201            path,
202            &result.resolved_mode,
203            result.output_tokens,
204            original_tokens,
205        );
206    }
207
208    result
209}
210
211fn handle_with_options_inner(
212    cache: &mut SessionCache,
213    path: &str,
214    mode: &str,
215    fresh: bool,
216    crp_mode: CrpMode,
217    task: Option<&str>,
218) -> ReadOutput {
219    let file_ref = cache.get_file_ref(path);
220    let short = protocol::shorten_path(path);
221    let ext = Path::new(path)
222        .extension()
223        .and_then(|e| e.to_str())
224        .unwrap_or("");
225
226    if fresh {
227        cache.invalidate(path);
228    }
229
230    if mode == "diff" {
231        let (out, sent) = handle_diff(cache, path, &file_ref);
232        return ReadOutput {
233            content: out,
234            resolved_mode: "diff".into(),
235            output_tokens: sent,
236        };
237    }
238
239    if mode != "full" {
240        if let Some(existing) = cache.get(path) {
241            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
242            if stale {
243                cache.invalidate(path);
244            }
245        }
246    }
247
248    if let Some(existing) = cache.get(path) {
249        if mode == "full" {
250            let (out, sent) =
251                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
252            let out = crate::core::redaction::redact_text_if_enabled(&out);
253            return ReadOutput {
254                content: out,
255                resolved_mode: "full".into(),
256                output_tokens: sent,
257            };
258        }
259        let content = existing.content();
260        let original_tokens = existing.original_tokens;
261        let resolved_mode = if mode == "auto" {
262            resolve_auto_mode(path, original_tokens, task)
263        } else {
264            mode.to_string()
265        };
266        if is_cacheable_mode(&resolved_mode) {
267            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
268            if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
269                let sent = count_tokens(cached_output);
270                let out = crate::core::redaction::redact_text_if_enabled(cached_output);
271                return ReadOutput {
272                    content: out,
273                    resolved_mode,
274                    output_tokens: sent,
275                };
276            }
277        }
278        let (out, sent) = process_mode(
279            &content,
280            &resolved_mode,
281            &file_ref,
282            &short,
283            ext,
284            original_tokens,
285            crp_mode,
286            path,
287            task,
288        );
289        if is_cacheable_mode(&resolved_mode) {
290            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
291            cache.set_compressed(path, &cache_key, out.clone());
292        }
293        let out = crate::core::redaction::redact_text_if_enabled(&out);
294        return ReadOutput {
295            content: out,
296            resolved_mode,
297            output_tokens: sent,
298        };
299    }
300
301    let content = match read_file_lossy(path) {
302        Ok(c) => c,
303        Err(e) => {
304            let msg = format!("ERROR: {e}");
305            let tokens = count_tokens(&msg);
306            return ReadOutput {
307                content: msg,
308                resolved_mode: "error".into(),
309                output_tokens: tokens,
310            };
311        }
312    };
313
314    let similar_hint = find_similar_and_update_semantic_index(path, &content);
315    let graph_hint = build_graph_related_hint(path);
316
317    let store_result = cache.store(path, &content);
318
319    if mode == "full" {
320        cache.mark_full_delivered(path);
321        let (mut output, sent) = format_full_output(
322            &file_ref,
323            &short,
324            ext,
325            &content,
326            store_result.original_tokens,
327            store_result.line_count,
328            task,
329        );
330        if let Some(hint) = &graph_hint {
331            output.push_str(&format!("\n{hint}"));
332        }
333        if let Some(hint) = similar_hint {
334            output.push_str(&format!("\n{hint}"));
335        }
336        let output = crate::core::redaction::redact_text_if_enabled(&output);
337        return ReadOutput {
338            content: output,
339            resolved_mode: "full".into(),
340            output_tokens: sent,
341        };
342    }
343
344    let resolved_mode = if mode == "auto" {
345        resolve_auto_mode(path, store_result.original_tokens, task)
346    } else {
347        mode.to_string()
348    };
349
350    let (mut output, _sent) = process_mode(
351        &content,
352        &resolved_mode,
353        &file_ref,
354        &short,
355        ext,
356        store_result.original_tokens,
357        crp_mode,
358        path,
359        task,
360    );
361    if is_cacheable_mode(&resolved_mode) {
362        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
363        cache.set_compressed(path, &cache_key, output.clone());
364    }
365    if let Some(hint) = &graph_hint {
366        output.push_str(&format!("\n{hint}"));
367    }
368    if let Some(hint) = similar_hint {
369        output.push_str(&format!("\n{hint}"));
370    }
371    let output = crate::core::redaction::redact_text_if_enabled(&output);
372    let final_tokens = count_tokens(&output);
373    ReadOutput {
374        content: output,
375        resolved_mode,
376        output_tokens: final_tokens,
377    }
378}
379
380pub fn is_instruction_file(path: &str) -> bool {
381    let lower = path.to_lowercase();
382    let filename = std::path::Path::new(&lower)
383        .file_name()
384        .and_then(|f| f.to_str())
385        .unwrap_or("");
386
387    matches!(
388        filename,
389        "skill.md"
390            | "agents.md"
391            | "rules.md"
392            | ".cursorrules"
393            | ".clinerules"
394            | "lean-ctx.md"
395            | "lean-ctx.mdc"
396    ) || lower.contains("/skills/")
397        || lower.contains("/.cursor/rules/")
398        || lower.contains("/.claude/rules/")
399        || lower.contains("/agents.md")
400}
401
402fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
403    if is_instruction_file(file_path) {
404        return "full".to_string();
405    }
406
407    if let Ok(bt) = crate::core::bounce_tracker::global().lock() {
408        if bt.should_force_full(file_path) {
409            return "full".to_string();
410        }
411    }
412
413    let intent_query = task.unwrap_or("read");
414    let route = crate::core::intent_router::route_v1(intent_query);
415    let intent_mode = &route.decision.effective_read_mode;
416    if intent_mode != "auto" && intent_mode != "reference" {
417        return intent_mode.clone();
418    }
419
420    // Priority 2: FileSignature-based predictor
421    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
422    let predictor = crate::core::mode_predictor::ModePredictor::new();
423    let mut predicted = predictor
424        .predict_best_mode(&sig)
425        .unwrap_or_else(|| "full".to_string());
426    if predicted == "auto" {
427        predicted = "full".to_string();
428    }
429
430    // Priority 3: Bandit exploration when budget is tight
431    if let Some(project_root) =
432        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
433    {
434        let ext = std::path::Path::new(file_path)
435            .extension()
436            .and_then(|e| e.to_str())
437            .unwrap_or("");
438        let bucket = match original_tokens {
439            0..=2000 => "sm",
440            2001..=10000 => "md",
441            10001..=50000 => "lg",
442            _ => "xl",
443        };
444        let bandit_key = format!("{ext}_{bucket}");
445        let mut store = crate::core::bandit::BanditStore::load(&project_root);
446        let bandit = store.get_or_create(&bandit_key);
447        let arm = bandit.select_arm();
448        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
449            predicted = "aggressive".to_string();
450        }
451    }
452
453    // Priority 4: Adaptive mode policy
454    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
455    let chosen = policy.choose_auto_mode(task, &predicted);
456
457    if original_tokens > 2000 {
458        if predicted == "map" || predicted == "signatures" {
459            if chosen != "map" && chosen != "signatures" {
460                return predicted;
461            }
462        } else if chosen == "full" && predicted != "full" {
463            return predicted;
464        }
465    }
466
467    chosen
468}
469
470fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
471    const MAX_CONTENT_BYTES_FOR_SEMANTIC: usize = 32_768;
472
473    if content.len() > MAX_CONTENT_BYTES_FOR_SEMANTIC {
474        return None;
475    }
476
477    let cfg = crate::core::config::Config::load();
478    let profile = crate::core::config::MemoryProfile::effective(&cfg);
479    if !profile.semantic_cache_enabled() {
480        return None;
481    }
482
483    let project_root = detect_project_root(path);
484    let session_id = format!("{}", std::process::id());
485    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
486
487    let similar = index.find_similar(content, 0.7);
488    let relevant: Vec<_> = similar
489        .into_iter()
490        .filter(|(p, _)| p != path)
491        .take(3)
492        .collect();
493
494    index.add_file(path, content, &session_id);
495    let _ = index.save(&project_root);
496
497    if relevant.is_empty() {
498        return None;
499    }
500
501    let hints: Vec<String> = relevant
502        .iter()
503        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
504        .collect();
505
506    Some(format!(
507        "[semantic: {} similar file(s) in cache]\n{}",
508        relevant.len(),
509        hints.join("\n")
510    ))
511}
512
513fn detect_project_root(path: &str) -> String {
514    crate::core::protocol::detect_project_root_or_cwd(path)
515}
516
517fn build_graph_related_hint(path: &str) -> Option<String> {
518    let project_root = detect_project_root(path);
519    crate::core::graph_context::build_related_hint(path, &project_root, 5)
520}
521
522const AUTO_DELTA_THRESHOLD: f64 = 0.6;
523
524/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
525fn handle_full_with_auto_delta(
526    cache: &mut SessionCache,
527    path: &str,
528    file_ref: &str,
529    short: &str,
530    ext: &str,
531    task: Option<&str>,
532) -> (String, usize) {
533    let Ok(disk_content) = read_file_lossy(path) else {
534        cache.record_cache_hit(path);
535        if let Some(existing) = cache.get(path) {
536            if !crate::core::protocol::meta_visible() {
537                let cached = existing.content();
538                return format_full_output(
539                    file_ref,
540                    short,
541                    ext,
542                    &cached,
543                    existing.original_tokens,
544                    existing.line_count,
545                    task,
546                );
547            }
548            let out = format!(
549                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
550                existing.read_count, existing.line_count
551            );
552            let sent = count_tokens(&out);
553            return (out, sent);
554        }
555        let out = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
556            format!("[file read failed and no cached version available] {file_ref}={short}")
557        } else {
558            format!("[file read failed and no cached version available] {short}")
559        };
560        let sent = count_tokens(&out);
561        return (out, sent);
562    };
563
564    let old_content = cache
565        .get(path)
566        .map(crate::core::cache::CacheEntry::content)
567        .unwrap_or_default();
568    let store_result = cache.store(path, &disk_content);
569
570    if store_result.was_hit {
571        if store_result.full_content_delivered {
572            if crate::core::protocol::meta_visible() {
573                let out = format!(
574                    "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
575                    store_result.read_count, store_result.line_count
576                );
577                let sent = count_tokens(&out);
578                return (out, sent);
579            }
580            return (String::new(), 0);
581        }
582        cache.mark_full_delivered(path);
583        return format_full_output(
584            file_ref,
585            short,
586            ext,
587            &disk_content,
588            store_result.original_tokens,
589            store_result.line_count,
590            task,
591        );
592    }
593
594    let diff = compressor::diff_content(&old_content, &disk_content);
595    let diff_tokens = count_tokens(&diff);
596    let full_tokens = store_result.original_tokens;
597
598    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
599        let savings = protocol::format_savings(full_tokens, diff_tokens);
600        let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
601            format!("{file_ref}={short}")
602        } else {
603            short.to_string()
604        };
605        let out = format!(
606            "{head} [auto-delta] ∆{}L\n{diff}\n{savings}",
607            disk_content.lines().count()
608        );
609        return (out, diff_tokens);
610    }
611
612    format_full_output(
613        file_ref,
614        short,
615        ext,
616        &disk_content,
617        store_result.original_tokens,
618        store_result.line_count,
619        task,
620    )
621}
622
623fn format_full_output(
624    file_ref: &str,
625    short: &str,
626    ext: &str,
627    content: &str,
628    original_tokens: usize,
629    line_count: usize,
630    task: Option<&str>,
631) -> (String, usize) {
632    let tokens = original_tokens;
633    let metadata = build_header(file_ref, short, ext, content, line_count, true);
634
635    let mut reordered: Option<String> = None;
636    {
637        let profile = crate::core::profiles::active_profile();
638        let cfg = profile.layout;
639        if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
640            let task_str = task.unwrap_or("");
641            if !task_str.is_empty() {
642                let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
643                let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
644                    content, &keywords, &cfg,
645                );
646                if !r.skipped && r.changed {
647                    reordered = Some(r.output);
648                }
649            }
650        }
651    }
652
653    let content_for_output = reordered.as_deref().unwrap_or(content);
654
655    let mut sym = SymbolMap::new();
656    let idents = symbol_map::extract_identifiers(content_for_output, ext);
657    for ident in &idents {
658        sym.register(ident);
659    }
660
661    if sym.len() >= 3 {
662        let sym_table = sym.format_table();
663        let compressed = sym.apply(content_for_output);
664        let original_tok = count_tokens(content_for_output);
665        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
666        let net_saving = original_tok.saturating_sub(compressed_tok);
667        if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
668            let output = format!("{metadata}\n{compressed}{sym_table}");
669            let sent = count_tokens(&output);
670            return (protocol::append_savings(&output, tokens, sent), sent);
671        }
672    }
673
674    let output = format!("{metadata}\n{content_for_output}");
675    let sent = count_tokens(&output);
676    (protocol::append_savings(&output, tokens, sent), sent)
677}
678
679fn build_header(
680    file_ref: &str,
681    short: &str,
682    ext: &str,
683    content: &str,
684    line_count: usize,
685    include_deps: bool,
686) -> String {
687    let mut header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
688        format!("{file_ref}={short} {line_count}L")
689    } else {
690        format!("{short} {line_count}L")
691    };
692
693    if include_deps {
694        let dep_info = deps::extract_deps(content, ext);
695        if !dep_info.imports.is_empty() {
696            let imports_str: Vec<&str> = dep_info
697                .imports
698                .iter()
699                .take(8)
700                .map(std::string::String::as_str)
701                .collect();
702            header.push_str(&format!("\n deps {}", imports_str.join(",")));
703        }
704        if !dep_info.exports.is_empty() {
705            let exports_str: Vec<&str> = dep_info
706                .exports
707                .iter()
708                .take(8)
709                .map(std::string::String::as_str)
710                .collect();
711            header.push_str(&format!("\n exports {}", exports_str.join(",")));
712        }
713    }
714
715    header
716}
717
718#[allow(clippy::too_many_arguments)]
719fn process_mode(
720    content: &str,
721    mode: &str,
722    file_ref: &str,
723    short: &str,
724    ext: &str,
725    original_tokens: usize,
726    crp_mode: CrpMode,
727    file_path: &str,
728    task: Option<&str>,
729) -> (String, usize) {
730    let line_count = content.lines().count();
731
732    match mode {
733        "auto" => {
734            let chosen = resolve_auto_mode(file_path, original_tokens, task);
735            process_mode(
736                content,
737                &chosen,
738                file_ref,
739                short,
740                ext,
741                original_tokens,
742                crp_mode,
743                file_path,
744                task,
745            )
746        }
747        "full" => format_full_output(
748            file_ref,
749            short,
750            ext,
751            content,
752            original_tokens,
753            line_count,
754            task,
755        ),
756        "signatures" => {
757            let sigs = signatures::extract_signatures(content, ext);
758            let dep_info = deps::extract_deps(content, ext);
759
760            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
761                format!("{file_ref}={short} {line_count}L")
762            } else {
763                format!("{short} {line_count}L")
764            };
765            if !dep_info.imports.is_empty() {
766                let imports_str: Vec<&str> = dep_info
767                    .imports
768                    .iter()
769                    .take(8)
770                    .map(std::string::String::as_str)
771                    .collect();
772                output.push_str(&format!("\n deps {}", imports_str.join(",")));
773            }
774            for sig in &sigs {
775                output.push('\n');
776                if crp_mode.is_tdd() {
777                    output.push_str(&sig.to_tdd());
778                } else {
779                    output.push_str(&sig.to_compact());
780                }
781            }
782            let sent = count_tokens(&output);
783            (
784                append_compressed_hint(
785                    &protocol::append_savings(&output, original_tokens, sent),
786                    file_path,
787                ),
788                sent,
789            )
790        }
791        "map" => {
792            if ext == "php" {
793                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
794                {
795                    let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
796                        format!("{file_ref}={short} {line_count}L\n{php_map}")
797                    } else {
798                        format!("{short} {line_count}L\n{php_map}")
799                    };
800                    let sent = count_tokens(&output);
801                    let output = protocol::append_savings(&output, original_tokens, sent);
802                    return (append_compressed_hint(&output, file_path), sent);
803                }
804            }
805
806            let sigs = signatures::extract_signatures(content, ext);
807            let dep_info = deps::extract_deps(content, ext);
808
809            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
810                format!("{file_ref}={short} {line_count}L")
811            } else {
812                format!("{short} {line_count}L")
813            };
814
815            if !dep_info.imports.is_empty() {
816                output.push_str("\n  deps: ");
817                output.push_str(&dep_info.imports.join(", "));
818            }
819
820            if !dep_info.exports.is_empty() {
821                output.push_str("\n  exports: ");
822                output.push_str(&dep_info.exports.join(", "));
823            }
824
825            let key_sigs: Vec<&signatures::Signature> = sigs
826                .iter()
827                .filter(|s| s.is_exported || s.indent == 0)
828                .collect();
829
830            if !key_sigs.is_empty() {
831                output.push_str("\n  API:");
832                for sig in &key_sigs {
833                    output.push_str("\n    ");
834                    if crp_mode.is_tdd() {
835                        output.push_str(&sig.to_tdd());
836                    } else {
837                        output.push_str(&sig.to_compact());
838                    }
839                }
840            }
841
842            let sent = count_tokens(&output);
843            (
844                append_compressed_hint(
845                    &protocol::append_savings(&output, original_tokens, sent),
846                    file_path,
847                ),
848                sent,
849            )
850        }
851        "aggressive" => {
852            #[cfg(feature = "tree-sitter")]
853            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
854            #[cfg(not(feature = "tree-sitter"))]
855            let ast_pruned: Option<String> = None;
856
857            let base = ast_pruned.as_deref().unwrap_or(content);
858
859            let session_intent = crate::core::session::SessionState::load_latest()
860                .and_then(|s| s.active_structured_intent);
861            let raw = if let Some(ref intent) = session_intent {
862                compressor::task_aware_compress(base, Some(ext), intent)
863            } else {
864                compressor::aggressive_compress(base, Some(ext))
865            };
866            let compressed = compressor::safeguard_ratio(content, &raw);
867            let header = build_header(file_ref, short, ext, content, line_count, true);
868
869            let mut sym = SymbolMap::new();
870            let idents = symbol_map::extract_identifiers(&compressed, ext);
871            for ident in &idents {
872                sym.register(ident);
873            }
874
875            if sym.len() >= 3 {
876                let sym_table = sym.format_table();
877                let sym_applied = sym.apply(&compressed);
878                let orig_tok = count_tokens(&compressed);
879                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
880                let net = orig_tok.saturating_sub(comp_tok);
881                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
882                    let savings = protocol::format_savings(original_tokens, comp_tok);
883                    return (
884                        append_compressed_hint(
885                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
886                            file_path,
887                        ),
888                        comp_tok,
889                    );
890                }
891                let savings = protocol::format_savings(original_tokens, orig_tok);
892                return (
893                    append_compressed_hint(
894                        &format!("{header}\n{compressed}\n{savings}"),
895                        file_path,
896                    ),
897                    orig_tok,
898                );
899            }
900
901            let sent = count_tokens(&compressed);
902            let savings = protocol::format_savings(original_tokens, sent);
903            (
904                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
905                sent,
906            )
907        }
908        "entropy" => {
909            let result = entropy::entropy_compress_adaptive(content, file_path);
910            let avg_h = entropy::analyze_entropy(content).avg_entropy;
911            let header = build_header(file_ref, short, ext, content, line_count, false);
912            let techs = result.techniques.join(", ");
913            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
914            let sent = count_tokens(&output);
915            let savings = protocol::format_savings(original_tokens, sent);
916            let compression_ratio = if original_tokens > 0 {
917                1.0 - (sent as f64 / original_tokens as f64)
918            } else {
919                0.0
920            };
921            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
922            (
923                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
924                sent,
925            )
926        }
927        "task" => {
928            let task_str = task.unwrap_or("");
929            if task_str.is_empty() {
930                let header = build_header(file_ref, short, ext, content, line_count, true);
931                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
932                let sent = count_tokens(&out);
933                return (out, sent);
934            }
935            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
936            if keywords.is_empty() {
937                let header = build_header(file_ref, short, ext, content, line_count, true);
938                let out = format!(
939                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
940                );
941                let sent = count_tokens(&out);
942                return (out, sent);
943            }
944            let filtered =
945                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
946            let filtered_lines = filtered.lines().count();
947            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
948                format!("{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
949            } else {
950                format!("{short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
951            };
952            let project_root = detect_project_root(file_path);
953            let graph_ctx = crate::core::graph_context::build_graph_context(
954                file_path,
955                &project_root,
956                Some(crate::core::graph_context::GraphContextOptions::default()),
957            )
958            .map(|c| crate::core::graph_context::format_graph_context(&c))
959            .unwrap_or_default();
960
961            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
962            let savings = protocol::format_savings(original_tokens, sent);
963            (
964                append_compressed_hint(
965                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
966                    file_path,
967                ),
968                sent,
969            )
970        }
971        "reference" => {
972            let tok = count_tokens(content);
973            let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
974                format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})")
975            } else {
976                format!("{short}: {line_count} lines, {tok} tok ({ext})")
977            };
978            let sent = count_tokens(&output);
979            let savings = protocol::format_savings(original_tokens, sent);
980            (format!("{output}\n{savings}"), sent)
981        }
982        mode if mode.starts_with("lines:") => {
983            let range_str = &mode[6..];
984            let extracted = extract_line_range(content, range_str);
985            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
986                format!("{file_ref}={short} {line_count}L lines:{range_str}")
987            } else {
988                format!("{short} {line_count}L lines:{range_str}")
989            };
990            let sent = count_tokens(&extracted);
991            let savings = protocol::format_savings(original_tokens, sent);
992            (format!("{header}\n{extracted}\n{savings}"), sent)
993        }
994        unknown => {
995            let header = build_header(file_ref, short, ext, content, line_count, true);
996            let out = format!(
997                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
998            );
999            let sent = count_tokens(&out);
1000            (out, sent)
1001        }
1002    }
1003}
1004
1005fn extract_line_range(content: &str, range_str: &str) -> String {
1006    let lines: Vec<&str> = content.lines().collect();
1007    let total = lines.len();
1008    let mut selected = Vec::new();
1009
1010    for part in range_str.split(',') {
1011        let part = part.trim();
1012        if let Some((start_s, end_s)) = part.split_once('-') {
1013            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
1014            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
1015            for i in start..=end {
1016                if i >= 1 && i <= total {
1017                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
1018                }
1019            }
1020        } else if let Ok(n) = part.parse::<usize>() {
1021            if n >= 1 && n <= total {
1022                selected.push(format!("{n:>4}| {}", lines[n - 1]));
1023            }
1024        }
1025    }
1026
1027    if selected.is_empty() {
1028        "No lines matched the range.".to_string()
1029    } else {
1030        selected.join("\n")
1031    }
1032}
1033
1034fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
1035    let short = protocol::shorten_path(path);
1036    let old_content = cache.get(path).map(crate::core::cache::CacheEntry::content);
1037
1038    let new_content = match read_file_lossy(path) {
1039        Ok(c) => c,
1040        Err(e) => {
1041            let msg = format!("ERROR: {e}");
1042            let tokens = count_tokens(&msg);
1043            return (msg, tokens);
1044        }
1045    };
1046
1047    let original_tokens = count_tokens(&new_content);
1048
1049    let diff_output = if let Some(old) = &old_content {
1050        compressor::diff_content(old, &new_content)
1051    } else {
1052        format!("[first read]\n{new_content}")
1053    };
1054
1055    cache.store(path, &new_content);
1056
1057    let sent = count_tokens(&diff_output);
1058    let savings = protocol::format_savings(original_tokens, sent);
1059    let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1060        format!("{file_ref}={short}")
1061    } else {
1062        short.clone()
1063    };
1064    (format!("{head} [diff]\n{diff_output}\n{savings}"), sent)
1065}
1066
1067#[cfg(test)]
1068mod tests {
1069    use super::*;
1070    use std::time::Duration;
1071
1072    #[test]
1073    fn test_header_toon_format_no_brackets() {
1074        let _lock = crate::core::data_dir::test_env_lock();
1075        std::env::set_var("LEAN_CTX_META", "1");
1076        let content = "use std::io;\nfn main() {}\n";
1077        let header = build_header("F1", "main.rs", "rs", content, 2, false);
1078        assert!(!header.contains('['));
1079        assert!(!header.contains(']'));
1080        assert!(header.contains("F1=main.rs 2L"));
1081        std::env::remove_var("LEAN_CTX_META");
1082    }
1083
1084    #[test]
1085    fn test_header_toon_deps_indented() {
1086        let _lock = crate::core::data_dir::test_env_lock();
1087        std::env::set_var("LEAN_CTX_META", "1");
1088        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
1089        let header = build_header("F1", "main.rs", "rs", content, 3, true);
1090        if header.contains("deps") {
1091            assert!(
1092                header.contains("\n deps "),
1093                "deps should use indented TOON format"
1094            );
1095            assert!(
1096                !header.contains("deps:["),
1097                "deps should not use bracket format"
1098            );
1099        }
1100        std::env::remove_var("LEAN_CTX_META");
1101    }
1102
1103    #[test]
1104    fn test_header_toon_saves_tokens() {
1105        let _lock = crate::core::data_dir::test_env_lock();
1106        std::env::set_var("LEAN_CTX_META", "1");
1107        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1108        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1109        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1110        let old_tokens = count_tokens(&old_header);
1111        let new_tokens = count_tokens(&new_header);
1112        assert!(
1113            new_tokens <= old_tokens,
1114            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1115        );
1116        std::env::remove_var("LEAN_CTX_META");
1117    }
1118
1119    #[test]
1120    fn test_tdd_symbols_are_compact() {
1121        let symbols = [
1122            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1123        ];
1124        for sym in &symbols {
1125            let tok = count_tokens(sym);
1126            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1127        }
1128    }
1129
1130    #[test]
1131    fn test_task_mode_filters_content() {
1132        let content = (0..200)
1133            .map(|i| {
1134                if i % 20 == 0 {
1135                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1136                } else {
1137                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1138                }
1139            })
1140            .collect::<Vec<_>>()
1141            .join("\n");
1142        let full_tokens = count_tokens(&content);
1143        let task = Some("fix bug in validate_token");
1144        let (result, result_tokens) = process_mode(
1145            &content,
1146            "task",
1147            "F1",
1148            "test.rs",
1149            "rs",
1150            full_tokens,
1151            CrpMode::Off,
1152            "test.rs",
1153            task,
1154        );
1155        assert!(
1156            result_tokens < full_tokens,
1157            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1158        );
1159        assert!(
1160            result.contains("task-filtered"),
1161            "output should contain task-filtered marker"
1162        );
1163    }
1164
1165    #[test]
1166    fn test_task_mode_without_task_returns_full() {
1167        let content = "fn main() {}\nfn helper() {}\n";
1168        let tokens = count_tokens(content);
1169        let (result, _sent) = process_mode(
1170            content,
1171            "task",
1172            "F1",
1173            "test.rs",
1174            "rs",
1175            tokens,
1176            CrpMode::Off,
1177            "test.rs",
1178            None,
1179        );
1180        assert!(
1181            result.contains("no task set"),
1182            "should indicate no task: {result}"
1183        );
1184    }
1185
1186    #[test]
1187    fn test_reference_mode_one_line() {
1188        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1189        let tokens = count_tokens(content);
1190        let (result, _sent) = process_mode(
1191            content,
1192            "reference",
1193            "F1",
1194            "test.rs",
1195            "rs",
1196            tokens,
1197            CrpMode::Off,
1198            "test.rs",
1199            None,
1200        );
1201        let lines: Vec<&str> = result.lines().collect();
1202        assert!(
1203            lines.len() <= 3,
1204            "reference mode should be very compact, got {} lines",
1205            lines.len()
1206        );
1207        assert!(result.contains("lines"), "should contain line count");
1208        assert!(result.contains("tok"), "should contain token count");
1209    }
1210
1211    #[test]
1212    fn cached_lines_mode_invalidates_on_mtime_change() {
1213        let dir = tempfile::tempdir().unwrap();
1214        let path = dir.path().join("file.txt");
1215        let p = path.to_string_lossy().to_string();
1216
1217        std::fs::write(&path, "one\nsecond\n").unwrap();
1218        let mut cache = SessionCache::new();
1219
1220        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1221        let l1: Vec<&str> = r1.content.lines().collect();
1222        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1223        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1224        assert_eq!(got1, "one");
1225
1226        std::thread::sleep(Duration::from_secs(1));
1227        std::fs::write(&path, "two\nsecond\n").unwrap();
1228
1229        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1230        let l2: Vec<&str> = r2.content.lines().collect();
1231        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1232        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1233        assert_eq!(got2, "two");
1234    }
1235
1236    #[test]
1237    #[cfg_attr(tarpaulin, ignore)]
1238    fn benchmark_task_conditioned_compression() {
1239        // Keep this reasonably small so CI coverage instrumentation stays fast.
1240        let content = generate_benchmark_code(200);
1241        let full_tokens = count_tokens(&content);
1242        let task = Some("fix authentication in validate_token");
1243
1244        let (_full_output, full_tok) = process_mode(
1245            &content,
1246            "full",
1247            "F1",
1248            "server.rs",
1249            "rs",
1250            full_tokens,
1251            CrpMode::Off,
1252            "server.rs",
1253            task,
1254        );
1255        let (_task_output, task_tok) = process_mode(
1256            &content,
1257            "task",
1258            "F1",
1259            "server.rs",
1260            "rs",
1261            full_tokens,
1262            CrpMode::Off,
1263            "server.rs",
1264            task,
1265        );
1266        let (_sig_output, sig_tok) = process_mode(
1267            &content,
1268            "signatures",
1269            "F1",
1270            "server.rs",
1271            "rs",
1272            full_tokens,
1273            CrpMode::Off,
1274            "server.rs",
1275            task,
1276        );
1277        let (_ref_output, ref_tok) = process_mode(
1278            &content,
1279            "reference",
1280            "F1",
1281            "server.rs",
1282            "rs",
1283            full_tokens,
1284            CrpMode::Off,
1285            "server.rs",
1286            task,
1287        );
1288
1289        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1290        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1291        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1292        eprintln!(
1293            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1294            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1295        );
1296        eprintln!(
1297            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1298            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1299        );
1300        eprintln!(
1301            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1302            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1303        );
1304        eprintln!("================================================\n");
1305
1306        assert!(task_tok < full_tok, "task mode should save tokens");
1307        assert!(sig_tok < full_tok, "signatures should save tokens");
1308        assert!(ref_tok < sig_tok, "reference should be most compact");
1309    }
1310
1311    fn generate_benchmark_code(lines: usize) -> String {
1312        let mut code = Vec::with_capacity(lines);
1313        code.push("use std::collections::HashMap;".to_string());
1314        code.push("use crate::core::auth;".to_string());
1315        code.push(String::new());
1316        code.push("pub struct Server {".to_string());
1317        code.push("    config: Config,".to_string());
1318        code.push("    cache: HashMap<String, String>,".to_string());
1319        code.push("}".to_string());
1320        code.push(String::new());
1321        code.push("impl Server {".to_string());
1322        code.push(
1323            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1324                .to_string(),
1325        );
1326        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1327        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1328        code.push("            return Err(AuthError::Expired);".to_string());
1329        code.push("        }".to_string());
1330        code.push("        Ok(decoded.claims)".to_string());
1331        code.push("    }".to_string());
1332        code.push(String::new());
1333
1334        let remaining = lines.saturating_sub(code.len());
1335        for i in 0..remaining {
1336            if i % 30 == 0 {
1337                code.push(format!(
1338                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1339                ));
1340            } else if i % 30 == 29 {
1341                code.push("    }".to_string());
1342            } else {
1343                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1344            }
1345        }
1346        code.push("}".to_string());
1347        code.join("\n")
1348    }
1349
1350    #[test]
1351    fn instruction_file_detection() {
1352        assert!(is_instruction_file(
1353            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1354        ));
1355        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1356        assert!(is_instruction_file("/project/AGENTS.md"));
1357        assert!(is_instruction_file("/project/.cursorrules"));
1358        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1359        assert!(is_instruction_file("/skills/some-skill/README.md"));
1360
1361        assert!(!is_instruction_file("/project/src/main.rs"));
1362        assert!(!is_instruction_file("/project/config.json"));
1363        assert!(!is_instruction_file("/project/data/report.csv"));
1364    }
1365
1366    #[test]
1367    fn resolve_auto_mode_returns_full_for_instruction_files() {
1368        let mode = resolve_auto_mode(
1369            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1370            5000,
1371            Some("read"),
1372        );
1373        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1374
1375        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1376        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1377
1378        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1379        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1380    }
1381}