Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40    format!(
41        "{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
42    )
43}
44
45/// Reads a file as UTF-8 with lossy fallback, enforcing binary detection and max read size limit.
46/// Defense-in-depth: verifies that the canonical path stays within the process's project root
47/// (if determinable) even though callers SHOULD have already jail-checked the path.
48pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
49    if crate::core::binary_detect::is_binary_file(path) {
50        let msg = crate::core::binary_detect::binary_file_message(path);
51        return Err(std::io::Error::other(msg));
52    }
53
54    if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
55        if let Ok(cwd) = std::env::current_dir() {
56            let root = crate::core::pathjail::canonicalize_or_self(&cwd);
57            if !canonical.starts_with(&root) {
58                let allow = crate::core::pathjail::allow_paths_from_env_and_config();
59                let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
60                    .ok()
61                    .is_some_and(|d| canonical.starts_with(d));
62                let tmp_ok = canonical.starts_with(std::env::temp_dir());
63                if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
64                    tracing::warn!(
65                        "defense-in-depth: path may escape project root: {}",
66                        canonical.display()
67                    );
68                }
69            }
70        }
71    }
72
73    let cap = crate::core::limits::max_read_bytes();
74
75    let file = open_with_retry(path)?;
76    let meta = file
77        .metadata()
78        .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
79    if meta.len() > cap as u64 {
80        return Err(std::io::Error::other(format!(
81            "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
82             Increase the limit or use a line-range read: mode=\"lines:1-100\"",
83            meta.len(),
84            cap
85        )));
86    }
87
88    use std::io::Read;
89    let mut bytes = Vec::with_capacity(meta.len() as usize);
90    std::io::BufReader::new(file).read_to_end(&mut bytes)?;
91    match String::from_utf8(bytes) {
92        Ok(s) => Ok(s),
93        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
94    }
95}
96
97/// Opens a file, retrying once after a brief pause on NotFound.
98/// Works around overlay/FUSE stat-cache races in container runtimes (Docker, Codex).
99fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
100    match std::fs::File::open(path) {
101        Ok(f) => Ok(f),
102        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
103            std::thread::sleep(std::time::Duration::from_millis(50));
104            std::fs::File::open(path)
105        }
106        Err(e) => Err(e),
107    }
108}
109
110/// Reads a file through the cache and applies the requested compression mode.
111pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
112    handle_with_options(cache, path, mode, false, crp_mode, None)
113}
114
115/// Like `handle`, but invalidates the cache first to force a fresh disk read.
116pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
117    handle_with_options(cache, path, mode, true, crp_mode, None)
118}
119
120/// Reads a file with task-aware filtering to prioritize task-relevant content.
121pub fn handle_with_task(
122    cache: &mut SessionCache,
123    path: &str,
124    mode: &str,
125    crp_mode: CrpMode,
126    task: Option<&str>,
127) -> String {
128    handle_with_options(cache, path, mode, false, crp_mode, task)
129}
130
131/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
132pub fn handle_with_task_resolved(
133    cache: &mut SessionCache,
134    path: &str,
135    mode: &str,
136    crp_mode: CrpMode,
137    task: Option<&str>,
138) -> ReadOutput {
139    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
140}
141
142/// Fresh read with task-aware filtering (invalidates cache first).
143pub fn handle_fresh_with_task(
144    cache: &mut SessionCache,
145    path: &str,
146    mode: &str,
147    crp_mode: CrpMode,
148    task: Option<&str>,
149) -> String {
150    handle_with_options(cache, path, mode, true, crp_mode, task)
151}
152
153/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
154pub fn handle_fresh_with_task_resolved(
155    cache: &mut SessionCache,
156    path: &str,
157    mode: &str,
158    crp_mode: CrpMode,
159    task: Option<&str>,
160) -> ReadOutput {
161    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
162}
163
164fn handle_with_options(
165    cache: &mut SessionCache,
166    path: &str,
167    mode: &str,
168    fresh: bool,
169    crp_mode: CrpMode,
170    task: Option<&str>,
171) -> String {
172    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
173}
174
175fn handle_with_options_resolved(
176    cache: &mut SessionCache,
177    path: &str,
178    mode: &str,
179    fresh: bool,
180    crp_mode: CrpMode,
181    task: Option<&str>,
182) -> ReadOutput {
183    let file_ref = cache.get_file_ref(path);
184    let short = protocol::shorten_path(path);
185    let ext = Path::new(path)
186        .extension()
187        .and_then(|e| e.to_str())
188        .unwrap_or("");
189
190    if fresh {
191        cache.invalidate(path);
192    }
193
194    if mode == "diff" {
195        let (out, sent) = handle_diff(cache, path, &file_ref);
196        return ReadOutput {
197            content: out,
198            resolved_mode: "diff".into(),
199            output_tokens: sent,
200        };
201    }
202
203    if mode != "full" {
204        if let Some(existing) = cache.get(path) {
205            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
206            if stale {
207                cache.invalidate(path);
208            }
209        }
210    }
211
212    if let Some(existing) = cache.get(path) {
213        if mode == "full" {
214            let (out, sent) =
215                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
216            let out = crate::core::redaction::redact_text_if_enabled(&out);
217            return ReadOutput {
218                content: out,
219                resolved_mode: "full".into(),
220                output_tokens: sent,
221            };
222        }
223        let content = existing.content();
224        let original_tokens = existing.original_tokens;
225        let resolved_mode = if mode == "auto" {
226            resolve_auto_mode(path, original_tokens, task)
227        } else {
228            mode.to_string()
229        };
230        if is_cacheable_mode(&resolved_mode) {
231            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
232            if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
233                let sent = count_tokens(cached_output);
234                let out = crate::core::redaction::redact_text_if_enabled(cached_output);
235                return ReadOutput {
236                    content: out,
237                    resolved_mode,
238                    output_tokens: sent,
239                };
240            }
241        }
242        let (out, sent) = process_mode(
243            &content,
244            &resolved_mode,
245            &file_ref,
246            &short,
247            ext,
248            original_tokens,
249            crp_mode,
250            path,
251            task,
252        );
253        if is_cacheable_mode(&resolved_mode) {
254            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
255            cache.set_compressed(path, &cache_key, out.clone());
256        }
257        let out = crate::core::redaction::redact_text_if_enabled(&out);
258        return ReadOutput {
259            content: out,
260            resolved_mode,
261            output_tokens: sent,
262        };
263    }
264
265    let content = match read_file_lossy(path) {
266        Ok(c) => c,
267        Err(e) => {
268            let msg = format!("ERROR: {e}");
269            let tokens = count_tokens(&msg);
270            return ReadOutput {
271                content: msg,
272                resolved_mode: "error".into(),
273                output_tokens: tokens,
274            };
275        }
276    };
277
278    let similar_hint = find_similar_and_update_semantic_index(path, &content);
279    let graph_hint = build_graph_related_hint(path);
280
281    let store_result = cache.store(path, &content);
282
283    if mode == "full" {
284        cache.mark_full_delivered(path);
285        let (mut output, sent) = format_full_output(
286            &file_ref,
287            &short,
288            ext,
289            &content,
290            store_result.original_tokens,
291            store_result.line_count,
292            task,
293        );
294        if let Some(hint) = &graph_hint {
295            output.push_str(&format!("\n{hint}"));
296        }
297        if let Some(hint) = similar_hint {
298            output.push_str(&format!("\n{hint}"));
299        }
300        let output = crate::core::redaction::redact_text_if_enabled(&output);
301        return ReadOutput {
302            content: output,
303            resolved_mode: "full".into(),
304            output_tokens: sent,
305        };
306    }
307
308    let resolved_mode = if mode == "auto" {
309        resolve_auto_mode(path, store_result.original_tokens, task)
310    } else {
311        mode.to_string()
312    };
313
314    let (mut output, _sent) = process_mode(
315        &content,
316        &resolved_mode,
317        &file_ref,
318        &short,
319        ext,
320        store_result.original_tokens,
321        crp_mode,
322        path,
323        task,
324    );
325    if is_cacheable_mode(&resolved_mode) {
326        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
327        cache.set_compressed(path, &cache_key, output.clone());
328    }
329    if let Some(hint) = &graph_hint {
330        output.push_str(&format!("\n{hint}"));
331    }
332    if let Some(hint) = similar_hint {
333        output.push_str(&format!("\n{hint}"));
334    }
335    let output = crate::core::redaction::redact_text_if_enabled(&output);
336    let final_tokens = count_tokens(&output);
337    ReadOutput {
338        content: output,
339        resolved_mode,
340        output_tokens: final_tokens,
341    }
342}
343
344pub fn is_instruction_file(path: &str) -> bool {
345    let lower = path.to_lowercase();
346    let filename = std::path::Path::new(&lower)
347        .file_name()
348        .and_then(|f| f.to_str())
349        .unwrap_or("");
350
351    matches!(
352        filename,
353        "skill.md"
354            | "agents.md"
355            | "rules.md"
356            | ".cursorrules"
357            | ".clinerules"
358            | "lean-ctx.md"
359            | "lean-ctx.mdc"
360    ) || lower.contains("/skills/")
361        || lower.contains("/.cursor/rules/")
362        || lower.contains("/.claude/rules/")
363        || lower.contains("/agents.md")
364}
365
366fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
367    if is_instruction_file(file_path) {
368        return "full".to_string();
369    }
370
371    // Priority 1: Intent Router with budget/pressure-aware degradation.
372    // Only fall through to Predictor/Bandit if the router returns "auto".
373    let intent_query = task.unwrap_or("read");
374    let route = crate::core::intent_router::route_v1(intent_query);
375    let intent_mode = &route.decision.effective_read_mode;
376    if intent_mode != "auto" && intent_mode != "reference" {
377        return intent_mode.clone();
378    }
379
380    // Priority 2: FileSignature-based predictor
381    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
382    let predictor = crate::core::mode_predictor::ModePredictor::new();
383    let mut predicted = predictor
384        .predict_best_mode(&sig)
385        .unwrap_or_else(|| "full".to_string());
386    if predicted == "auto" {
387        predicted = "full".to_string();
388    }
389
390    // Priority 3: Bandit exploration when budget is tight
391    if let Some(project_root) =
392        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
393    {
394        let ext = std::path::Path::new(file_path)
395            .extension()
396            .and_then(|e| e.to_str())
397            .unwrap_or("");
398        let bucket = match original_tokens {
399            0..=2000 => "sm",
400            2001..=10000 => "md",
401            10001..=50000 => "lg",
402            _ => "xl",
403        };
404        let bandit_key = format!("{ext}_{bucket}");
405        let mut store = crate::core::bandit::BanditStore::load(&project_root);
406        let bandit = store.get_or_create(&bandit_key);
407        let arm = bandit.select_arm();
408        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
409            predicted = "aggressive".to_string();
410        }
411    }
412
413    // Priority 4: Adaptive mode policy
414    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
415    let chosen = policy.choose_auto_mode(task, &predicted);
416
417    if original_tokens > 2000 {
418        if predicted == "map" || predicted == "signatures" {
419            if chosen != "map" && chosen != "signatures" {
420                return predicted;
421            }
422        } else if chosen == "full" && predicted != "full" {
423            return predicted;
424        }
425    }
426
427    chosen
428}
429
430fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
431    let cfg = crate::core::config::Config::load();
432    let profile = crate::core::config::MemoryProfile::effective(&cfg);
433    if !profile.semantic_cache_enabled() {
434        return None;
435    }
436
437    let project_root = detect_project_root(path);
438    let session_id = format!("{}", std::process::id());
439    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
440
441    let similar = index.find_similar(content, 0.7);
442    let relevant: Vec<_> = similar
443        .into_iter()
444        .filter(|(p, _)| p != path)
445        .take(3)
446        .collect();
447
448    index.add_file(path, content, &session_id);
449    let _ = index.save(&project_root);
450
451    if relevant.is_empty() {
452        return None;
453    }
454
455    let hints: Vec<String> = relevant
456        .iter()
457        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
458        .collect();
459
460    Some(format!(
461        "[semantic: {} similar file(s) in cache]\n{}",
462        relevant.len(),
463        hints.join("\n")
464    ))
465}
466
467fn detect_project_root(path: &str) -> String {
468    crate::core::protocol::detect_project_root_or_cwd(path)
469}
470
471fn build_graph_related_hint(path: &str) -> Option<String> {
472    let project_root = detect_project_root(path);
473    crate::core::graph_context::build_related_hint(path, &project_root, 5)
474}
475
476const AUTO_DELTA_THRESHOLD: f64 = 0.6;
477
478/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
479fn handle_full_with_auto_delta(
480    cache: &mut SessionCache,
481    path: &str,
482    file_ref: &str,
483    short: &str,
484    ext: &str,
485    task: Option<&str>,
486) -> (String, usize) {
487    let Ok(disk_content) = read_file_lossy(path) else {
488        cache.record_cache_hit(path);
489        let out = if let Some(existing) = cache.get(path) {
490            format!(
491                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
492                existing.read_count, existing.line_count
493            )
494        } else {
495            format!("[file read failed and no cached version available] {file_ref}={short}")
496        };
497        let sent = count_tokens(&out);
498        return (out, sent);
499    };
500
501    let old_content = cache
502        .get(path)
503        .map(crate::core::cache::CacheEntry::content)
504        .unwrap_or_default();
505    let store_result = cache.store(path, &disk_content);
506
507    if store_result.was_hit {
508        if store_result.full_content_delivered {
509            let out = format!(
510                "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
511                store_result.read_count, store_result.line_count
512            );
513            let sent = count_tokens(&out);
514            return (out, sent);
515        }
516        cache.mark_full_delivered(path);
517        return format_full_output(
518            file_ref,
519            short,
520            ext,
521            &disk_content,
522            store_result.original_tokens,
523            store_result.line_count,
524            task,
525        );
526    }
527
528    let diff = compressor::diff_content(&old_content, &disk_content);
529    let diff_tokens = count_tokens(&diff);
530    let full_tokens = store_result.original_tokens;
531
532    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
533        let savings = protocol::format_savings(full_tokens, diff_tokens);
534        let out = format!(
535            "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
536            disk_content.lines().count()
537        );
538        return (out, diff_tokens);
539    }
540
541    format_full_output(
542        file_ref,
543        short,
544        ext,
545        &disk_content,
546        store_result.original_tokens,
547        store_result.line_count,
548        task,
549    )
550}
551
552fn format_full_output(
553    file_ref: &str,
554    short: &str,
555    ext: &str,
556    content: &str,
557    original_tokens: usize,
558    line_count: usize,
559    task: Option<&str>,
560) -> (String, usize) {
561    let tokens = original_tokens;
562    let metadata = build_header(file_ref, short, ext, content, line_count, true);
563
564    let mut reordered: Option<String> = None;
565    {
566        let profile = crate::core::profiles::active_profile();
567        let cfg = profile.layout;
568        if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
569            let task_str = task.unwrap_or("");
570            if !task_str.is_empty() {
571                let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
572                let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
573                    content, &keywords, &cfg,
574                );
575                if !r.skipped && r.changed {
576                    reordered = Some(r.output);
577                }
578            }
579        }
580    }
581
582    let content_for_output = reordered.as_deref().unwrap_or(content);
583
584    let mut sym = SymbolMap::new();
585    let idents = symbol_map::extract_identifiers(content_for_output, ext);
586    for ident in &idents {
587        sym.register(ident);
588    }
589
590    if sym.len() >= 3 {
591        let sym_table = sym.format_table();
592        let compressed = sym.apply(content_for_output);
593        let original_tok = count_tokens(content_for_output);
594        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
595        let net_saving = original_tok.saturating_sub(compressed_tok);
596        if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
597            let output = format!("{metadata}\n{compressed}{sym_table}");
598            let sent = count_tokens(&output);
599            let savings = protocol::format_savings(tokens, sent);
600            return (format!("{output}\n{savings}"), sent);
601        }
602    }
603
604    let output = format!("{metadata}\n{content_for_output}");
605    let sent = count_tokens(&output);
606    let savings = protocol::format_savings(tokens, sent);
607    (format!("{output}\n{savings}"), sent)
608}
609
610fn build_header(
611    file_ref: &str,
612    short: &str,
613    ext: &str,
614    content: &str,
615    line_count: usize,
616    include_deps: bool,
617) -> String {
618    let mut header = format!("{file_ref}={short} {line_count}L");
619
620    if include_deps {
621        let dep_info = deps::extract_deps(content, ext);
622        if !dep_info.imports.is_empty() {
623            let imports_str: Vec<&str> = dep_info
624                .imports
625                .iter()
626                .take(8)
627                .map(std::string::String::as_str)
628                .collect();
629            header.push_str(&format!("\n deps {}", imports_str.join(",")));
630        }
631        if !dep_info.exports.is_empty() {
632            let exports_str: Vec<&str> = dep_info
633                .exports
634                .iter()
635                .take(8)
636                .map(std::string::String::as_str)
637                .collect();
638            header.push_str(&format!("\n exports {}", exports_str.join(",")));
639        }
640    }
641
642    header
643}
644
645#[allow(clippy::too_many_arguments)]
646fn process_mode(
647    content: &str,
648    mode: &str,
649    file_ref: &str,
650    short: &str,
651    ext: &str,
652    original_tokens: usize,
653    crp_mode: CrpMode,
654    file_path: &str,
655    task: Option<&str>,
656) -> (String, usize) {
657    let line_count = content.lines().count();
658
659    match mode {
660        "auto" => {
661            let chosen = resolve_auto_mode(file_path, original_tokens, task);
662            process_mode(
663                content,
664                &chosen,
665                file_ref,
666                short,
667                ext,
668                original_tokens,
669                crp_mode,
670                file_path,
671                task,
672            )
673        }
674        "full" => format_full_output(
675            file_ref,
676            short,
677            ext,
678            content,
679            original_tokens,
680            line_count,
681            task,
682        ),
683        "signatures" => {
684            let sigs = signatures::extract_signatures(content, ext);
685            let dep_info = deps::extract_deps(content, ext);
686
687            let mut output = format!("{file_ref}={short} {line_count}L");
688            if !dep_info.imports.is_empty() {
689                let imports_str: Vec<&str> = dep_info
690                    .imports
691                    .iter()
692                    .take(8)
693                    .map(std::string::String::as_str)
694                    .collect();
695                output.push_str(&format!("\n deps {}", imports_str.join(",")));
696            }
697            for sig in &sigs {
698                output.push('\n');
699                if crp_mode.is_tdd() {
700                    output.push_str(&sig.to_tdd());
701                } else {
702                    output.push_str(&sig.to_compact());
703                }
704            }
705            let sent = count_tokens(&output);
706            let savings = protocol::format_savings(original_tokens, sent);
707            (
708                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
709                sent,
710            )
711        }
712        "map" => {
713            if ext == "php" {
714                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
715                {
716                    let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
717                    let sent = count_tokens(&output);
718                    let savings = protocol::format_savings(original_tokens, sent);
719                    output.push('\n');
720                    output.push_str(&savings);
721                    return (append_compressed_hint(&output, file_path), sent);
722                }
723            }
724
725            let sigs = signatures::extract_signatures(content, ext);
726            let dep_info = deps::extract_deps(content, ext);
727
728            let mut output = format!("{file_ref}={short} {line_count}L");
729
730            if !dep_info.imports.is_empty() {
731                output.push_str("\n  deps: ");
732                output.push_str(&dep_info.imports.join(", "));
733            }
734
735            if !dep_info.exports.is_empty() {
736                output.push_str("\n  exports: ");
737                output.push_str(&dep_info.exports.join(", "));
738            }
739
740            let key_sigs: Vec<&signatures::Signature> = sigs
741                .iter()
742                .filter(|s| s.is_exported || s.indent == 0)
743                .collect();
744
745            if !key_sigs.is_empty() {
746                output.push_str("\n  API:");
747                for sig in &key_sigs {
748                    output.push_str("\n    ");
749                    if crp_mode.is_tdd() {
750                        output.push_str(&sig.to_tdd());
751                    } else {
752                        output.push_str(&sig.to_compact());
753                    }
754                }
755            }
756
757            let sent = count_tokens(&output);
758            let savings = protocol::format_savings(original_tokens, sent);
759            (
760                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
761                sent,
762            )
763        }
764        "aggressive" => {
765            #[cfg(feature = "tree-sitter")]
766            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
767            #[cfg(not(feature = "tree-sitter"))]
768            let ast_pruned: Option<String> = None;
769
770            let base = ast_pruned.as_deref().unwrap_or(content);
771
772            let session_intent = crate::core::session::SessionState::load_latest()
773                .and_then(|s| s.active_structured_intent);
774            let raw = if let Some(ref intent) = session_intent {
775                compressor::task_aware_compress(base, Some(ext), intent)
776            } else {
777                compressor::aggressive_compress(base, Some(ext))
778            };
779            let compressed = compressor::safeguard_ratio(content, &raw);
780            let header = build_header(file_ref, short, ext, content, line_count, true);
781
782            let mut sym = SymbolMap::new();
783            let idents = symbol_map::extract_identifiers(&compressed, ext);
784            for ident in &idents {
785                sym.register(ident);
786            }
787
788            if sym.len() >= 3 {
789                let sym_table = sym.format_table();
790                let sym_applied = sym.apply(&compressed);
791                let orig_tok = count_tokens(&compressed);
792                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
793                let net = orig_tok.saturating_sub(comp_tok);
794                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
795                    let savings = protocol::format_savings(original_tokens, comp_tok);
796                    return (
797                        append_compressed_hint(
798                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
799                            file_path,
800                        ),
801                        comp_tok,
802                    );
803                }
804                let savings = protocol::format_savings(original_tokens, orig_tok);
805                return (
806                    append_compressed_hint(
807                        &format!("{header}\n{compressed}\n{savings}"),
808                        file_path,
809                    ),
810                    orig_tok,
811                );
812            }
813
814            let sent = count_tokens(&compressed);
815            let savings = protocol::format_savings(original_tokens, sent);
816            (
817                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
818                sent,
819            )
820        }
821        "entropy" => {
822            let result = entropy::entropy_compress_adaptive(content, file_path);
823            let avg_h = entropy::analyze_entropy(content).avg_entropy;
824            let header = build_header(file_ref, short, ext, content, line_count, false);
825            let techs = result.techniques.join(", ");
826            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
827            let sent = count_tokens(&output);
828            let savings = protocol::format_savings(original_tokens, sent);
829            let compression_ratio = if original_tokens > 0 {
830                1.0 - (sent as f64 / original_tokens as f64)
831            } else {
832                0.0
833            };
834            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
835            (
836                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
837                sent,
838            )
839        }
840        "task" => {
841            let task_str = task.unwrap_or("");
842            if task_str.is_empty() {
843                let header = build_header(file_ref, short, ext, content, line_count, true);
844                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
845                let sent = count_tokens(&out);
846                return (out, sent);
847            }
848            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
849            if keywords.is_empty() {
850                let header = build_header(file_ref, short, ext, content, line_count, true);
851                let out = format!(
852                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
853                );
854                let sent = count_tokens(&out);
855                return (out, sent);
856            }
857            let filtered =
858                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
859            let filtered_lines = filtered.lines().count();
860            let header = format!(
861                "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
862            );
863            let project_root = detect_project_root(file_path);
864            let graph_ctx = crate::core::graph_context::build_graph_context(
865                file_path,
866                &project_root,
867                Some(crate::core::graph_context::GraphContextOptions::default()),
868            )
869            .map(|c| crate::core::graph_context::format_graph_context(&c))
870            .unwrap_or_default();
871
872            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
873            let savings = protocol::format_savings(original_tokens, sent);
874            (
875                append_compressed_hint(
876                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
877                    file_path,
878                ),
879                sent,
880            )
881        }
882        "reference" => {
883            let tok = count_tokens(content);
884            let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
885            let sent = count_tokens(&output);
886            let savings = protocol::format_savings(original_tokens, sent);
887            (format!("{output}\n{savings}"), sent)
888        }
889        mode if mode.starts_with("lines:") => {
890            let range_str = &mode[6..];
891            let extracted = extract_line_range(content, range_str);
892            let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
893            let sent = count_tokens(&extracted);
894            let savings = protocol::format_savings(original_tokens, sent);
895            (format!("{header}\n{extracted}\n{savings}"), sent)
896        }
897        unknown => {
898            let header = build_header(file_ref, short, ext, content, line_count, true);
899            let out = format!(
900                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
901            );
902            let sent = count_tokens(&out);
903            (out, sent)
904        }
905    }
906}
907
908fn extract_line_range(content: &str, range_str: &str) -> String {
909    let lines: Vec<&str> = content.lines().collect();
910    let total = lines.len();
911    let mut selected = Vec::new();
912
913    for part in range_str.split(',') {
914        let part = part.trim();
915        if let Some((start_s, end_s)) = part.split_once('-') {
916            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
917            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
918            for i in start..=end {
919                if i >= 1 && i <= total {
920                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
921                }
922            }
923        } else if let Ok(n) = part.parse::<usize>() {
924            if n >= 1 && n <= total {
925                selected.push(format!("{n:>4}| {}", lines[n - 1]));
926            }
927        }
928    }
929
930    if selected.is_empty() {
931        "No lines matched the range.".to_string()
932    } else {
933        selected.join("\n")
934    }
935}
936
937fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
938    let short = protocol::shorten_path(path);
939    let old_content = cache.get(path).map(crate::core::cache::CacheEntry::content);
940
941    let new_content = match read_file_lossy(path) {
942        Ok(c) => c,
943        Err(e) => {
944            let msg = format!("ERROR: {e}");
945            let tokens = count_tokens(&msg);
946            return (msg, tokens);
947        }
948    };
949
950    let original_tokens = count_tokens(&new_content);
951
952    let diff_output = if let Some(old) = &old_content {
953        compressor::diff_content(old, &new_content)
954    } else {
955        format!("[first read]\n{new_content}")
956    };
957
958    cache.store(path, &new_content);
959
960    let sent = count_tokens(&diff_output);
961    let savings = protocol::format_savings(original_tokens, sent);
962    (
963        format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
964        sent,
965    )
966}
967
968#[cfg(test)]
969mod tests {
970    use super::*;
971    use std::time::Duration;
972
973    #[test]
974    fn test_header_toon_format_no_brackets() {
975        let content = "use std::io;\nfn main() {}\n";
976        let header = build_header("F1", "main.rs", "rs", content, 2, false);
977        assert!(!header.contains('['));
978        assert!(!header.contains(']'));
979        assert!(header.contains("F1=main.rs 2L"));
980    }
981
982    #[test]
983    fn test_header_toon_deps_indented() {
984        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
985        let header = build_header("F1", "main.rs", "rs", content, 3, true);
986        if header.contains("deps") {
987            assert!(
988                header.contains("\n deps "),
989                "deps should use indented TOON format"
990            );
991            assert!(
992                !header.contains("deps:["),
993                "deps should not use bracket format"
994            );
995        }
996    }
997
998    #[test]
999    fn test_header_toon_saves_tokens() {
1000        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1001        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1002        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1003        let old_tokens = count_tokens(&old_header);
1004        let new_tokens = count_tokens(&new_header);
1005        assert!(
1006            new_tokens <= old_tokens,
1007            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1008        );
1009    }
1010
1011    #[test]
1012    fn test_tdd_symbols_are_compact() {
1013        let symbols = [
1014            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1015        ];
1016        for sym in &symbols {
1017            let tok = count_tokens(sym);
1018            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1019        }
1020    }
1021
1022    #[test]
1023    fn test_task_mode_filters_content() {
1024        let content = (0..200)
1025            .map(|i| {
1026                if i % 20 == 0 {
1027                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1028                } else {
1029                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1030                }
1031            })
1032            .collect::<Vec<_>>()
1033            .join("\n");
1034        let full_tokens = count_tokens(&content);
1035        let task = Some("fix bug in validate_token");
1036        let (result, result_tokens) = process_mode(
1037            &content,
1038            "task",
1039            "F1",
1040            "test.rs",
1041            "rs",
1042            full_tokens,
1043            CrpMode::Off,
1044            "test.rs",
1045            task,
1046        );
1047        assert!(
1048            result_tokens < full_tokens,
1049            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1050        );
1051        assert!(
1052            result.contains("task-filtered"),
1053            "output should contain task-filtered marker"
1054        );
1055    }
1056
1057    #[test]
1058    fn test_task_mode_without_task_returns_full() {
1059        let content = "fn main() {}\nfn helper() {}\n";
1060        let tokens = count_tokens(content);
1061        let (result, _sent) = process_mode(
1062            content,
1063            "task",
1064            "F1",
1065            "test.rs",
1066            "rs",
1067            tokens,
1068            CrpMode::Off,
1069            "test.rs",
1070            None,
1071        );
1072        assert!(
1073            result.contains("no task set"),
1074            "should indicate no task: {result}"
1075        );
1076    }
1077
1078    #[test]
1079    fn test_reference_mode_one_line() {
1080        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1081        let tokens = count_tokens(content);
1082        let (result, _sent) = process_mode(
1083            content,
1084            "reference",
1085            "F1",
1086            "test.rs",
1087            "rs",
1088            tokens,
1089            CrpMode::Off,
1090            "test.rs",
1091            None,
1092        );
1093        let lines: Vec<&str> = result.lines().collect();
1094        assert!(
1095            lines.len() <= 3,
1096            "reference mode should be very compact, got {} lines",
1097            lines.len()
1098        );
1099        assert!(result.contains("lines"), "should contain line count");
1100        assert!(result.contains("tok"), "should contain token count");
1101    }
1102
1103    #[test]
1104    fn cached_lines_mode_invalidates_on_mtime_change() {
1105        let dir = tempfile::tempdir().unwrap();
1106        let path = dir.path().join("file.txt");
1107        let p = path.to_string_lossy().to_string();
1108
1109        std::fs::write(&path, "one\nsecond\n").unwrap();
1110        let mut cache = SessionCache::new();
1111
1112        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1113        let l1: Vec<&str> = r1.content.lines().collect();
1114        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1115        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1116        assert_eq!(got1, "one");
1117
1118        std::thread::sleep(Duration::from_secs(1));
1119        std::fs::write(&path, "two\nsecond\n").unwrap();
1120
1121        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1122        let l2: Vec<&str> = r2.content.lines().collect();
1123        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1124        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1125        assert_eq!(got2, "two");
1126    }
1127
1128    #[test]
1129    #[cfg_attr(tarpaulin, ignore)]
1130    fn benchmark_task_conditioned_compression() {
1131        // Keep this reasonably small so CI coverage instrumentation stays fast.
1132        let content = generate_benchmark_code(200);
1133        let full_tokens = count_tokens(&content);
1134        let task = Some("fix authentication in validate_token");
1135
1136        let (_full_output, full_tok) = process_mode(
1137            &content,
1138            "full",
1139            "F1",
1140            "server.rs",
1141            "rs",
1142            full_tokens,
1143            CrpMode::Off,
1144            "server.rs",
1145            task,
1146        );
1147        let (_task_output, task_tok) = process_mode(
1148            &content,
1149            "task",
1150            "F1",
1151            "server.rs",
1152            "rs",
1153            full_tokens,
1154            CrpMode::Off,
1155            "server.rs",
1156            task,
1157        );
1158        let (_sig_output, sig_tok) = process_mode(
1159            &content,
1160            "signatures",
1161            "F1",
1162            "server.rs",
1163            "rs",
1164            full_tokens,
1165            CrpMode::Off,
1166            "server.rs",
1167            task,
1168        );
1169        let (_ref_output, ref_tok) = process_mode(
1170            &content,
1171            "reference",
1172            "F1",
1173            "server.rs",
1174            "rs",
1175            full_tokens,
1176            CrpMode::Off,
1177            "server.rs",
1178            task,
1179        );
1180
1181        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1182        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1183        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1184        eprintln!(
1185            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1186            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1187        );
1188        eprintln!(
1189            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1190            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1191        );
1192        eprintln!(
1193            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1194            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1195        );
1196        eprintln!("================================================\n");
1197
1198        assert!(task_tok < full_tok, "task mode should save tokens");
1199        assert!(sig_tok < full_tok, "signatures should save tokens");
1200        assert!(ref_tok < sig_tok, "reference should be most compact");
1201    }
1202
1203    fn generate_benchmark_code(lines: usize) -> String {
1204        let mut code = Vec::with_capacity(lines);
1205        code.push("use std::collections::HashMap;".to_string());
1206        code.push("use crate::core::auth;".to_string());
1207        code.push(String::new());
1208        code.push("pub struct Server {".to_string());
1209        code.push("    config: Config,".to_string());
1210        code.push("    cache: HashMap<String, String>,".to_string());
1211        code.push("}".to_string());
1212        code.push(String::new());
1213        code.push("impl Server {".to_string());
1214        code.push(
1215            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1216                .to_string(),
1217        );
1218        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1219        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1220        code.push("            return Err(AuthError::Expired);".to_string());
1221        code.push("        }".to_string());
1222        code.push("        Ok(decoded.claims)".to_string());
1223        code.push("    }".to_string());
1224        code.push(String::new());
1225
1226        let remaining = lines.saturating_sub(code.len());
1227        for i in 0..remaining {
1228            if i % 30 == 0 {
1229                code.push(format!(
1230                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1231                ));
1232            } else if i % 30 == 29 {
1233                code.push("    }".to_string());
1234            } else {
1235                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1236            }
1237        }
1238        code.push("}".to_string());
1239        code.join("\n")
1240    }
1241
1242    #[test]
1243    fn instruction_file_detection() {
1244        assert!(is_instruction_file(
1245            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1246        ));
1247        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1248        assert!(is_instruction_file("/project/AGENTS.md"));
1249        assert!(is_instruction_file("/project/.cursorrules"));
1250        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1251        assert!(is_instruction_file("/skills/some-skill/README.md"));
1252
1253        assert!(!is_instruction_file("/project/src/main.rs"));
1254        assert!(!is_instruction_file("/project/config.json"));
1255        assert!(!is_instruction_file("/project/data/report.csv"));
1256    }
1257
1258    #[test]
1259    fn resolve_auto_mode_returns_full_for_instruction_files() {
1260        let mode = resolve_auto_mode(
1261            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1262            5000,
1263            Some("read"),
1264        );
1265        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1266
1267        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1268        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1269
1270        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1271        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1272    }
1273}