Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40    format!(
41        "{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
42    )
43}
44
45/// Reads a file as UTF-8 with lossy fallback, enforcing binary detection and max read size limit.
46/// Defense-in-depth: verifies that the canonical path stays within the process's project root
47/// (if determinable) even though callers SHOULD have already jail-checked the path.
48pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
49    if crate::core::binary_detect::is_binary_file(path) {
50        let msg = crate::core::binary_detect::binary_file_message(path);
51        return Err(std::io::Error::other(msg));
52    }
53
54    if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
55        if let Ok(cwd) = std::env::current_dir() {
56            let root = crate::core::pathjail::canonicalize_or_self(&cwd);
57            if !canonical.starts_with(&root) {
58                let allow = crate::core::pathjail::allow_paths_from_env_and_config();
59                let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
60                    .ok()
61                    .is_some_and(|d| canonical.starts_with(d));
62                let tmp_ok = canonical.starts_with(std::env::temp_dir());
63                if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
64                    tracing::warn!(
65                        "defense-in-depth: path may escape project root: {}",
66                        canonical.display()
67                    );
68                }
69            }
70        }
71    }
72
73    let cap = crate::core::limits::max_read_bytes();
74    let meta = std::fs::metadata(path).map_err(|e| {
75        std::io::Error::other(format!("cannot stat file (refusing unbounded read): {e}"))
76    })?;
77    if meta.len() > cap as u64 {
78        return Err(std::io::Error::other(format!(
79            "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
80             Increase the limit or use a line-range read: mode=\"lines:1-100\"",
81            meta.len(),
82            cap
83        )));
84    }
85
86    let bytes = std::fs::read(path)?;
87    match String::from_utf8(bytes) {
88        Ok(s) => Ok(s),
89        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
90    }
91}
92
93/// Reads a file through the cache and applies the requested compression mode.
94pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
95    handle_with_options(cache, path, mode, false, crp_mode, None)
96}
97
98/// Like `handle`, but invalidates the cache first to force a fresh disk read.
99pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
100    handle_with_options(cache, path, mode, true, crp_mode, None)
101}
102
103/// Reads a file with task-aware filtering to prioritize task-relevant content.
104pub fn handle_with_task(
105    cache: &mut SessionCache,
106    path: &str,
107    mode: &str,
108    crp_mode: CrpMode,
109    task: Option<&str>,
110) -> String {
111    handle_with_options(cache, path, mode, false, crp_mode, task)
112}
113
114/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
115pub fn handle_with_task_resolved(
116    cache: &mut SessionCache,
117    path: &str,
118    mode: &str,
119    crp_mode: CrpMode,
120    task: Option<&str>,
121) -> ReadOutput {
122    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
123}
124
125/// Fresh read with task-aware filtering (invalidates cache first).
126pub fn handle_fresh_with_task(
127    cache: &mut SessionCache,
128    path: &str,
129    mode: &str,
130    crp_mode: CrpMode,
131    task: Option<&str>,
132) -> String {
133    handle_with_options(cache, path, mode, true, crp_mode, task)
134}
135
136/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
137pub fn handle_fresh_with_task_resolved(
138    cache: &mut SessionCache,
139    path: &str,
140    mode: &str,
141    crp_mode: CrpMode,
142    task: Option<&str>,
143) -> ReadOutput {
144    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
145}
146
147fn handle_with_options(
148    cache: &mut SessionCache,
149    path: &str,
150    mode: &str,
151    fresh: bool,
152    crp_mode: CrpMode,
153    task: Option<&str>,
154) -> String {
155    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
156}
157
158fn handle_with_options_resolved(
159    cache: &mut SessionCache,
160    path: &str,
161    mode: &str,
162    fresh: bool,
163    crp_mode: CrpMode,
164    task: Option<&str>,
165) -> ReadOutput {
166    let file_ref = cache.get_file_ref(path);
167    let short = protocol::shorten_path(path);
168    let ext = Path::new(path)
169        .extension()
170        .and_then(|e| e.to_str())
171        .unwrap_or("");
172
173    if fresh {
174        cache.invalidate(path);
175    }
176
177    if mode == "diff" {
178        let (out, sent) = handle_diff(cache, path, &file_ref);
179        return ReadOutput {
180            content: out,
181            resolved_mode: "diff".into(),
182            output_tokens: sent,
183        };
184    }
185
186    if mode != "full" {
187        if let Some(existing) = cache.get(path) {
188            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
189            if stale {
190                cache.invalidate(path);
191            }
192        }
193    }
194
195    if let Some(existing) = cache.get(path) {
196        if mode == "full" {
197            let (out, sent) =
198                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
199            let out = crate::core::redaction::redact_text_if_enabled(&out);
200            return ReadOutput {
201                content: out,
202                resolved_mode: "full".into(),
203                output_tokens: sent,
204            };
205        }
206        let content = existing.content.clone();
207        let original_tokens = existing.original_tokens;
208        let resolved_mode = if mode == "auto" {
209            resolve_auto_mode(path, original_tokens, task)
210        } else {
211            mode.to_string()
212        };
213        if is_cacheable_mode(&resolved_mode) {
214            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
215            if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
216                let sent = count_tokens(cached_output);
217                let out = crate::core::redaction::redact_text_if_enabled(cached_output);
218                return ReadOutput {
219                    content: out,
220                    resolved_mode,
221                    output_tokens: sent,
222                };
223            }
224        }
225        let (out, sent) = process_mode(
226            &content,
227            &resolved_mode,
228            &file_ref,
229            &short,
230            ext,
231            original_tokens,
232            crp_mode,
233            path,
234            task,
235        );
236        if is_cacheable_mode(&resolved_mode) {
237            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
238            cache.set_compressed(path, &cache_key, out.clone());
239        }
240        let out = crate::core::redaction::redact_text_if_enabled(&out);
241        return ReadOutput {
242            content: out,
243            resolved_mode,
244            output_tokens: sent,
245        };
246    }
247
248    let content = match read_file_lossy(path) {
249        Ok(c) => c,
250        Err(e) => {
251            let msg = format!("ERROR: {e}");
252            let tokens = count_tokens(&msg);
253            return ReadOutput {
254                content: msg,
255                resolved_mode: "error".into(),
256                output_tokens: tokens,
257            };
258        }
259    };
260
261    let similar_hint = find_similar_and_update_semantic_index(path, &content);
262    let graph_hint = build_graph_related_hint(path);
263
264    let store_result = cache.store(path, content.clone());
265
266    if mode == "full" {
267        cache.mark_full_delivered(path);
268        let (mut output, sent) = format_full_output(
269            &file_ref,
270            &short,
271            ext,
272            &content,
273            store_result.original_tokens,
274            store_result.line_count,
275            task,
276        );
277        if let Some(hint) = &graph_hint {
278            output.push_str(&format!("\n{hint}"));
279        }
280        if let Some(hint) = similar_hint {
281            output.push_str(&format!("\n{hint}"));
282        }
283        let output = crate::core::redaction::redact_text_if_enabled(&output);
284        return ReadOutput {
285            content: output,
286            resolved_mode: "full".into(),
287            output_tokens: sent,
288        };
289    }
290
291    let resolved_mode = if mode == "auto" {
292        resolve_auto_mode(path, store_result.original_tokens, task)
293    } else {
294        mode.to_string()
295    };
296
297    let (mut output, _sent) = process_mode(
298        &content,
299        &resolved_mode,
300        &file_ref,
301        &short,
302        ext,
303        store_result.original_tokens,
304        crp_mode,
305        path,
306        task,
307    );
308    if is_cacheable_mode(&resolved_mode) {
309        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
310        cache.set_compressed(path, &cache_key, output.clone());
311    }
312    if let Some(hint) = &graph_hint {
313        output.push_str(&format!("\n{hint}"));
314    }
315    if let Some(hint) = similar_hint {
316        output.push_str(&format!("\n{hint}"));
317    }
318    let output = crate::core::redaction::redact_text_if_enabled(&output);
319    let final_tokens = count_tokens(&output);
320    ReadOutput {
321        content: output,
322        resolved_mode,
323        output_tokens: final_tokens,
324    }
325}
326
327pub fn is_instruction_file(path: &str) -> bool {
328    let lower = path.to_lowercase();
329    let filename = std::path::Path::new(&lower)
330        .file_name()
331        .and_then(|f| f.to_str())
332        .unwrap_or("");
333
334    matches!(
335        filename,
336        "skill.md"
337            | "agents.md"
338            | "rules.md"
339            | ".cursorrules"
340            | ".clinerules"
341            | "lean-ctx.md"
342            | "lean-ctx.mdc"
343    ) || lower.contains("/skills/")
344        || lower.contains("/.cursor/rules/")
345        || lower.contains("/.claude/rules/")
346        || lower.contains("/agents.md")
347}
348
349fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
350    if is_instruction_file(file_path) {
351        return "full".to_string();
352    }
353
354    // Priority 1: Intent Router with budget/pressure-aware degradation.
355    // Only fall through to Predictor/Bandit if the router returns "auto".
356    let intent_query = task.unwrap_or("read");
357    let route = crate::core::intent_router::route_v1(intent_query);
358    let intent_mode = &route.decision.effective_read_mode;
359    if intent_mode != "auto" && intent_mode != "reference" {
360        return intent_mode.clone();
361    }
362
363    // Priority 2: FileSignature-based predictor
364    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
365    let predictor = crate::core::mode_predictor::ModePredictor::new();
366    let mut predicted = predictor
367        .predict_best_mode(&sig)
368        .unwrap_or_else(|| "full".to_string());
369    if predicted == "auto" {
370        predicted = "full".to_string();
371    }
372
373    // Priority 3: Bandit exploration when budget is tight
374    if let Some(project_root) =
375        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
376    {
377        let ext = std::path::Path::new(file_path)
378            .extension()
379            .and_then(|e| e.to_str())
380            .unwrap_or("");
381        let bucket = match original_tokens {
382            0..=2000 => "sm",
383            2001..=10000 => "md",
384            10001..=50000 => "lg",
385            _ => "xl",
386        };
387        let bandit_key = format!("{ext}_{bucket}");
388        let mut store = crate::core::bandit::BanditStore::load(&project_root);
389        let bandit = store.get_or_create(&bandit_key);
390        let arm = bandit.select_arm();
391        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
392            predicted = "aggressive".to_string();
393        }
394    }
395
396    // Priority 4: Adaptive mode policy
397    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
398    let chosen = policy.choose_auto_mode(task, &predicted);
399
400    if original_tokens > 2000 {
401        if predicted == "map" || predicted == "signatures" {
402            if chosen != "map" && chosen != "signatures" {
403                return predicted;
404            }
405        } else if chosen == "full" && predicted != "full" {
406            return predicted;
407        }
408    }
409
410    chosen
411}
412
413fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
414    let cfg = crate::core::config::Config::load();
415    let profile = crate::core::config::MemoryProfile::effective(&cfg);
416    if !profile.semantic_cache_enabled() {
417        return None;
418    }
419
420    let project_root = detect_project_root(path);
421    let session_id = format!("{}", std::process::id());
422    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
423
424    let similar = index.find_similar(content, 0.7);
425    let relevant: Vec<_> = similar
426        .into_iter()
427        .filter(|(p, _)| p != path)
428        .take(3)
429        .collect();
430
431    index.add_file(path, content, &session_id);
432    let _ = index.save(&project_root);
433
434    if relevant.is_empty() {
435        return None;
436    }
437
438    let hints: Vec<String> = relevant
439        .iter()
440        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
441        .collect();
442
443    Some(format!(
444        "[semantic: {} similar file(s) in cache]\n{}",
445        relevant.len(),
446        hints.join("\n")
447    ))
448}
449
450fn detect_project_root(path: &str) -> String {
451    crate::core::protocol::detect_project_root_or_cwd(path)
452}
453
454fn build_graph_related_hint(path: &str) -> Option<String> {
455    let project_root = detect_project_root(path);
456    crate::core::graph_context::build_related_hint(path, &project_root, 5)
457}
458
459const AUTO_DELTA_THRESHOLD: f64 = 0.6;
460
461/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
462fn handle_full_with_auto_delta(
463    cache: &mut SessionCache,
464    path: &str,
465    file_ref: &str,
466    short: &str,
467    ext: &str,
468    task: Option<&str>,
469) -> (String, usize) {
470    let Ok(disk_content) = read_file_lossy(path) else {
471        cache.record_cache_hit(path);
472        let out = if let Some(existing) = cache.get(path) {
473            format!(
474                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
475                existing.read_count, existing.line_count
476            )
477        } else {
478            format!("[file read failed and no cached version available] {file_ref}={short}")
479        };
480        let sent = count_tokens(&out);
481        return (out, sent);
482    };
483
484    let old_content = cache
485        .get(path)
486        .map(|e| e.content.clone())
487        .unwrap_or_default();
488    let store_result = cache.store(path, disk_content.clone());
489
490    if store_result.was_hit {
491        if store_result.full_content_delivered {
492            let out = format!(
493                "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
494                store_result.read_count, store_result.line_count
495            );
496            let sent = count_tokens(&out);
497            return (out, sent);
498        }
499        cache.mark_full_delivered(path);
500        return format_full_output(
501            file_ref,
502            short,
503            ext,
504            &disk_content,
505            store_result.original_tokens,
506            store_result.line_count,
507            task,
508        );
509    }
510
511    let diff = compressor::diff_content(&old_content, &disk_content);
512    let diff_tokens = count_tokens(&diff);
513    let full_tokens = store_result.original_tokens;
514
515    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
516        let savings = protocol::format_savings(full_tokens, diff_tokens);
517        let out = format!(
518            "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
519            disk_content.lines().count()
520        );
521        return (out, diff_tokens);
522    }
523
524    format_full_output(
525        file_ref,
526        short,
527        ext,
528        &disk_content,
529        store_result.original_tokens,
530        store_result.line_count,
531        task,
532    )
533}
534
535fn format_full_output(
536    file_ref: &str,
537    short: &str,
538    ext: &str,
539    content: &str,
540    original_tokens: usize,
541    line_count: usize,
542    task: Option<&str>,
543) -> (String, usize) {
544    let tokens = original_tokens;
545    let metadata = build_header(file_ref, short, ext, content, line_count, true);
546
547    let mut reordered: Option<String> = None;
548    {
549        let profile = crate::core::profiles::active_profile();
550        let cfg = profile.layout;
551        if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
552            let task_str = task.unwrap_or("");
553            if !task_str.is_empty() {
554                let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
555                let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
556                    content, &keywords, &cfg,
557                );
558                if !r.skipped && r.changed {
559                    reordered = Some(r.output);
560                }
561            }
562        }
563    }
564
565    let content_for_output = reordered.as_deref().unwrap_or(content);
566
567    let mut sym = SymbolMap::new();
568    let idents = symbol_map::extract_identifiers(content_for_output, ext);
569    for ident in &idents {
570        sym.register(ident);
571    }
572
573    if sym.len() >= 3 {
574        let sym_table = sym.format_table();
575        let compressed = sym.apply(content_for_output);
576        let original_tok = count_tokens(content_for_output);
577        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
578        let net_saving = original_tok.saturating_sub(compressed_tok);
579        if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
580            let output = format!("{metadata}\n{compressed}{sym_table}");
581            let sent = count_tokens(&output);
582            let savings = protocol::format_savings(tokens, sent);
583            return (format!("{output}\n{savings}"), sent);
584        }
585    }
586
587    let output = format!("{metadata}\n{content_for_output}");
588    let sent = count_tokens(&output);
589    let savings = protocol::format_savings(tokens, sent);
590    (format!("{output}\n{savings}"), sent)
591}
592
593fn build_header(
594    file_ref: &str,
595    short: &str,
596    ext: &str,
597    content: &str,
598    line_count: usize,
599    include_deps: bool,
600) -> String {
601    let mut header = format!("{file_ref}={short} {line_count}L");
602
603    if include_deps {
604        let dep_info = deps::extract_deps(content, ext);
605        if !dep_info.imports.is_empty() {
606            let imports_str: Vec<&str> = dep_info
607                .imports
608                .iter()
609                .take(8)
610                .map(std::string::String::as_str)
611                .collect();
612            header.push_str(&format!("\n deps {}", imports_str.join(",")));
613        }
614        if !dep_info.exports.is_empty() {
615            let exports_str: Vec<&str> = dep_info
616                .exports
617                .iter()
618                .take(8)
619                .map(std::string::String::as_str)
620                .collect();
621            header.push_str(&format!("\n exports {}", exports_str.join(",")));
622        }
623    }
624
625    header
626}
627
628#[allow(clippy::too_many_arguments)]
629fn process_mode(
630    content: &str,
631    mode: &str,
632    file_ref: &str,
633    short: &str,
634    ext: &str,
635    original_tokens: usize,
636    crp_mode: CrpMode,
637    file_path: &str,
638    task: Option<&str>,
639) -> (String, usize) {
640    let line_count = content.lines().count();
641
642    match mode {
643        "auto" => {
644            let chosen = resolve_auto_mode(file_path, original_tokens, task);
645            process_mode(
646                content,
647                &chosen,
648                file_ref,
649                short,
650                ext,
651                original_tokens,
652                crp_mode,
653                file_path,
654                task,
655            )
656        }
657        "full" => format_full_output(
658            file_ref,
659            short,
660            ext,
661            content,
662            original_tokens,
663            line_count,
664            task,
665        ),
666        "signatures" => {
667            let sigs = signatures::extract_signatures(content, ext);
668            let dep_info = deps::extract_deps(content, ext);
669
670            let mut output = format!("{file_ref}={short} {line_count}L");
671            if !dep_info.imports.is_empty() {
672                let imports_str: Vec<&str> = dep_info
673                    .imports
674                    .iter()
675                    .take(8)
676                    .map(std::string::String::as_str)
677                    .collect();
678                output.push_str(&format!("\n deps {}", imports_str.join(",")));
679            }
680            for sig in &sigs {
681                output.push('\n');
682                if crp_mode.is_tdd() {
683                    output.push_str(&sig.to_tdd());
684                } else {
685                    output.push_str(&sig.to_compact());
686                }
687            }
688            let sent = count_tokens(&output);
689            let savings = protocol::format_savings(original_tokens, sent);
690            (
691                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
692                sent,
693            )
694        }
695        "map" => {
696            if ext == "php" {
697                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
698                {
699                    let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
700                    let sent = count_tokens(&output);
701                    let savings = protocol::format_savings(original_tokens, sent);
702                    output.push('\n');
703                    output.push_str(&savings);
704                    return (append_compressed_hint(&output, file_path), sent);
705                }
706            }
707
708            let sigs = signatures::extract_signatures(content, ext);
709            let dep_info = deps::extract_deps(content, ext);
710
711            let mut output = format!("{file_ref}={short} {line_count}L");
712
713            if !dep_info.imports.is_empty() {
714                output.push_str("\n  deps: ");
715                output.push_str(&dep_info.imports.join(", "));
716            }
717
718            if !dep_info.exports.is_empty() {
719                output.push_str("\n  exports: ");
720                output.push_str(&dep_info.exports.join(", "));
721            }
722
723            let key_sigs: Vec<&signatures::Signature> = sigs
724                .iter()
725                .filter(|s| s.is_exported || s.indent == 0)
726                .collect();
727
728            if !key_sigs.is_empty() {
729                output.push_str("\n  API:");
730                for sig in &key_sigs {
731                    output.push_str("\n    ");
732                    if crp_mode.is_tdd() {
733                        output.push_str(&sig.to_tdd());
734                    } else {
735                        output.push_str(&sig.to_compact());
736                    }
737                }
738            }
739
740            let sent = count_tokens(&output);
741            let savings = protocol::format_savings(original_tokens, sent);
742            (
743                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
744                sent,
745            )
746        }
747        "aggressive" => {
748            #[cfg(feature = "tree-sitter")]
749            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
750            #[cfg(not(feature = "tree-sitter"))]
751            let ast_pruned: Option<String> = None;
752
753            let base = ast_pruned.as_deref().unwrap_or(content);
754
755            let session_intent = crate::core::session::SessionState::load_latest()
756                .and_then(|s| s.active_structured_intent);
757            let raw = if let Some(ref intent) = session_intent {
758                compressor::task_aware_compress(base, Some(ext), intent)
759            } else {
760                compressor::aggressive_compress(base, Some(ext))
761            };
762            let compressed = compressor::safeguard_ratio(content, &raw);
763            let header = build_header(file_ref, short, ext, content, line_count, true);
764
765            let mut sym = SymbolMap::new();
766            let idents = symbol_map::extract_identifiers(&compressed, ext);
767            for ident in &idents {
768                sym.register(ident);
769            }
770
771            if sym.len() >= 3 {
772                let sym_table = sym.format_table();
773                let sym_applied = sym.apply(&compressed);
774                let orig_tok = count_tokens(&compressed);
775                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
776                let net = orig_tok.saturating_sub(comp_tok);
777                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
778                    let savings = protocol::format_savings(original_tokens, comp_tok);
779                    return (
780                        append_compressed_hint(
781                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
782                            file_path,
783                        ),
784                        comp_tok,
785                    );
786                }
787                let savings = protocol::format_savings(original_tokens, orig_tok);
788                return (
789                    append_compressed_hint(
790                        &format!("{header}\n{compressed}\n{savings}"),
791                        file_path,
792                    ),
793                    orig_tok,
794                );
795            }
796
797            let sent = count_tokens(&compressed);
798            let savings = protocol::format_savings(original_tokens, sent);
799            (
800                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
801                sent,
802            )
803        }
804        "entropy" => {
805            let result = entropy::entropy_compress_adaptive(content, file_path);
806            let avg_h = entropy::analyze_entropy(content).avg_entropy;
807            let header = build_header(file_ref, short, ext, content, line_count, false);
808            let techs = result.techniques.join(", ");
809            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
810            let sent = count_tokens(&output);
811            let savings = protocol::format_savings(original_tokens, sent);
812            let compression_ratio = if original_tokens > 0 {
813                1.0 - (sent as f64 / original_tokens as f64)
814            } else {
815                0.0
816            };
817            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
818            (
819                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
820                sent,
821            )
822        }
823        "task" => {
824            let task_str = task.unwrap_or("");
825            if task_str.is_empty() {
826                let header = build_header(file_ref, short, ext, content, line_count, true);
827                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
828                let sent = count_tokens(&out);
829                return (out, sent);
830            }
831            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
832            if keywords.is_empty() {
833                let header = build_header(file_ref, short, ext, content, line_count, true);
834                let out = format!(
835                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
836                );
837                let sent = count_tokens(&out);
838                return (out, sent);
839            }
840            let filtered =
841                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
842            let filtered_lines = filtered.lines().count();
843            let header = format!(
844                "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
845            );
846            let project_root = detect_project_root(file_path);
847            let graph_ctx = crate::core::graph_context::build_graph_context(
848                file_path,
849                &project_root,
850                Some(crate::core::graph_context::GraphContextOptions::default()),
851            )
852            .map(|c| crate::core::graph_context::format_graph_context(&c))
853            .unwrap_or_default();
854
855            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
856            let savings = protocol::format_savings(original_tokens, sent);
857            (
858                append_compressed_hint(
859                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
860                    file_path,
861                ),
862                sent,
863            )
864        }
865        "reference" => {
866            let tok = count_tokens(content);
867            let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
868            let sent = count_tokens(&output);
869            let savings = protocol::format_savings(original_tokens, sent);
870            (format!("{output}\n{savings}"), sent)
871        }
872        mode if mode.starts_with("lines:") => {
873            let range_str = &mode[6..];
874            let extracted = extract_line_range(content, range_str);
875            let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
876            let sent = count_tokens(&extracted);
877            let savings = protocol::format_savings(original_tokens, sent);
878            (format!("{header}\n{extracted}\n{savings}"), sent)
879        }
880        unknown => {
881            let header = build_header(file_ref, short, ext, content, line_count, true);
882            let out = format!(
883                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
884            );
885            let sent = count_tokens(&out);
886            (out, sent)
887        }
888    }
889}
890
891fn extract_line_range(content: &str, range_str: &str) -> String {
892    let lines: Vec<&str> = content.lines().collect();
893    let total = lines.len();
894    let mut selected = Vec::new();
895
896    for part in range_str.split(',') {
897        let part = part.trim();
898        if let Some((start_s, end_s)) = part.split_once('-') {
899            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
900            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
901            for i in start..=end {
902                if i >= 1 && i <= total {
903                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
904                }
905            }
906        } else if let Ok(n) = part.parse::<usize>() {
907            if n >= 1 && n <= total {
908                selected.push(format!("{n:>4}| {}", lines[n - 1]));
909            }
910        }
911    }
912
913    if selected.is_empty() {
914        "No lines matched the range.".to_string()
915    } else {
916        selected.join("\n")
917    }
918}
919
920fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
921    let short = protocol::shorten_path(path);
922    let old_content = cache.get(path).map(|e| e.content.clone());
923
924    let new_content = match read_file_lossy(path) {
925        Ok(c) => c,
926        Err(e) => {
927            let msg = format!("ERROR: {e}");
928            let tokens = count_tokens(&msg);
929            return (msg, tokens);
930        }
931    };
932
933    let original_tokens = count_tokens(&new_content);
934
935    let diff_output = if let Some(old) = &old_content {
936        compressor::diff_content(old, &new_content)
937    } else {
938        format!("[first read]\n{new_content}")
939    };
940
941    cache.store(path, new_content);
942
943    let sent = count_tokens(&diff_output);
944    let savings = protocol::format_savings(original_tokens, sent);
945    (
946        format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
947        sent,
948    )
949}
950
951#[cfg(test)]
952mod tests {
953    use super::*;
954    use std::time::Duration;
955
956    #[test]
957    fn test_header_toon_format_no_brackets() {
958        let content = "use std::io;\nfn main() {}\n";
959        let header = build_header("F1", "main.rs", "rs", content, 2, false);
960        assert!(!header.contains('['));
961        assert!(!header.contains(']'));
962        assert!(header.contains("F1=main.rs 2L"));
963    }
964
965    #[test]
966    fn test_header_toon_deps_indented() {
967        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
968        let header = build_header("F1", "main.rs", "rs", content, 3, true);
969        if header.contains("deps") {
970            assert!(
971                header.contains("\n deps "),
972                "deps should use indented TOON format"
973            );
974            assert!(
975                !header.contains("deps:["),
976                "deps should not use bracket format"
977            );
978        }
979    }
980
981    #[test]
982    fn test_header_toon_saves_tokens() {
983        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
984        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
985        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
986        let old_tokens = count_tokens(&old_header);
987        let new_tokens = count_tokens(&new_header);
988        assert!(
989            new_tokens <= old_tokens,
990            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
991        );
992    }
993
994    #[test]
995    fn test_tdd_symbols_are_compact() {
996        let symbols = [
997            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
998        ];
999        for sym in &symbols {
1000            let tok = count_tokens(sym);
1001            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1002        }
1003    }
1004
1005    #[test]
1006    fn test_task_mode_filters_content() {
1007        let content = (0..200)
1008            .map(|i| {
1009                if i % 20 == 0 {
1010                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1011                } else {
1012                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1013                }
1014            })
1015            .collect::<Vec<_>>()
1016            .join("\n");
1017        let full_tokens = count_tokens(&content);
1018        let task = Some("fix bug in validate_token");
1019        let (result, result_tokens) = process_mode(
1020            &content,
1021            "task",
1022            "F1",
1023            "test.rs",
1024            "rs",
1025            full_tokens,
1026            CrpMode::Off,
1027            "test.rs",
1028            task,
1029        );
1030        assert!(
1031            result_tokens < full_tokens,
1032            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1033        );
1034        assert!(
1035            result.contains("task-filtered"),
1036            "output should contain task-filtered marker"
1037        );
1038    }
1039
1040    #[test]
1041    fn test_task_mode_without_task_returns_full() {
1042        let content = "fn main() {}\nfn helper() {}\n";
1043        let tokens = count_tokens(content);
1044        let (result, _sent) = process_mode(
1045            content,
1046            "task",
1047            "F1",
1048            "test.rs",
1049            "rs",
1050            tokens,
1051            CrpMode::Off,
1052            "test.rs",
1053            None,
1054        );
1055        assert!(
1056            result.contains("no task set"),
1057            "should indicate no task: {result}"
1058        );
1059    }
1060
1061    #[test]
1062    fn test_reference_mode_one_line() {
1063        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1064        let tokens = count_tokens(content);
1065        let (result, _sent) = process_mode(
1066            content,
1067            "reference",
1068            "F1",
1069            "test.rs",
1070            "rs",
1071            tokens,
1072            CrpMode::Off,
1073            "test.rs",
1074            None,
1075        );
1076        let lines: Vec<&str> = result.lines().collect();
1077        assert!(
1078            lines.len() <= 3,
1079            "reference mode should be very compact, got {} lines",
1080            lines.len()
1081        );
1082        assert!(result.contains("lines"), "should contain line count");
1083        assert!(result.contains("tok"), "should contain token count");
1084    }
1085
1086    #[test]
1087    fn cached_lines_mode_invalidates_on_mtime_change() {
1088        let dir = tempfile::tempdir().unwrap();
1089        let path = dir.path().join("file.txt");
1090        let p = path.to_string_lossy().to_string();
1091
1092        std::fs::write(&path, "one\nsecond\n").unwrap();
1093        let mut cache = SessionCache::new();
1094
1095        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1096        let l1: Vec<&str> = r1.content.lines().collect();
1097        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1098        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1099        assert_eq!(got1, "one");
1100
1101        std::thread::sleep(Duration::from_secs(1));
1102        std::fs::write(&path, "two\nsecond\n").unwrap();
1103
1104        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1105        let l2: Vec<&str> = r2.content.lines().collect();
1106        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1107        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1108        assert_eq!(got2, "two");
1109    }
1110
1111    #[test]
1112    #[cfg_attr(tarpaulin, ignore)]
1113    fn benchmark_task_conditioned_compression() {
1114        // Keep this reasonably small so CI coverage instrumentation stays fast.
1115        let content = generate_benchmark_code(200);
1116        let full_tokens = count_tokens(&content);
1117        let task = Some("fix authentication in validate_token");
1118
1119        let (_full_output, full_tok) = process_mode(
1120            &content,
1121            "full",
1122            "F1",
1123            "server.rs",
1124            "rs",
1125            full_tokens,
1126            CrpMode::Off,
1127            "server.rs",
1128            task,
1129        );
1130        let (_task_output, task_tok) = process_mode(
1131            &content,
1132            "task",
1133            "F1",
1134            "server.rs",
1135            "rs",
1136            full_tokens,
1137            CrpMode::Off,
1138            "server.rs",
1139            task,
1140        );
1141        let (_sig_output, sig_tok) = process_mode(
1142            &content,
1143            "signatures",
1144            "F1",
1145            "server.rs",
1146            "rs",
1147            full_tokens,
1148            CrpMode::Off,
1149            "server.rs",
1150            task,
1151        );
1152        let (_ref_output, ref_tok) = process_mode(
1153            &content,
1154            "reference",
1155            "F1",
1156            "server.rs",
1157            "rs",
1158            full_tokens,
1159            CrpMode::Off,
1160            "server.rs",
1161            task,
1162        );
1163
1164        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1165        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1166        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1167        eprintln!(
1168            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1169            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1170        );
1171        eprintln!(
1172            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1173            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1174        );
1175        eprintln!(
1176            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1177            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1178        );
1179        eprintln!("================================================\n");
1180
1181        assert!(task_tok < full_tok, "task mode should save tokens");
1182        assert!(sig_tok < full_tok, "signatures should save tokens");
1183        assert!(ref_tok < sig_tok, "reference should be most compact");
1184    }
1185
1186    fn generate_benchmark_code(lines: usize) -> String {
1187        let mut code = Vec::with_capacity(lines);
1188        code.push("use std::collections::HashMap;".to_string());
1189        code.push("use crate::core::auth;".to_string());
1190        code.push(String::new());
1191        code.push("pub struct Server {".to_string());
1192        code.push("    config: Config,".to_string());
1193        code.push("    cache: HashMap<String, String>,".to_string());
1194        code.push("}".to_string());
1195        code.push(String::new());
1196        code.push("impl Server {".to_string());
1197        code.push(
1198            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1199                .to_string(),
1200        );
1201        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1202        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1203        code.push("            return Err(AuthError::Expired);".to_string());
1204        code.push("        }".to_string());
1205        code.push("        Ok(decoded.claims)".to_string());
1206        code.push("    }".to_string());
1207        code.push(String::new());
1208
1209        let remaining = lines.saturating_sub(code.len());
1210        for i in 0..remaining {
1211            if i % 30 == 0 {
1212                code.push(format!(
1213                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1214                ));
1215            } else if i % 30 == 29 {
1216                code.push("    }".to_string());
1217            } else {
1218                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1219            }
1220        }
1221        code.push("}".to_string());
1222        code.join("\n")
1223    }
1224
1225    #[test]
1226    fn instruction_file_detection() {
1227        assert!(is_instruction_file(
1228            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1229        ));
1230        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1231        assert!(is_instruction_file("/project/AGENTS.md"));
1232        assert!(is_instruction_file("/project/.cursorrules"));
1233        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1234        assert!(is_instruction_file("/skills/some-skill/README.md"));
1235
1236        assert!(!is_instruction_file("/project/src/main.rs"));
1237        assert!(!is_instruction_file("/project/config.json"));
1238        assert!(!is_instruction_file("/project/data/report.csv"));
1239    }
1240
1241    #[test]
1242    fn resolve_auto_mode_returns_full_for_instruction_files() {
1243        let mode = resolve_auto_mode(
1244            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1245            5000,
1246            Some("read"),
1247        );
1248        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1249
1250        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1251        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1252
1253        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1254        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1255    }
1256}