Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40    format!("{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\")")
41}
42
43/// Reads a file as UTF-8 with lossy fallback, enforcing binary detection and max read size limit.
44/// Defense-in-depth: verifies that the canonical path stays within the process's project root
45/// (if determinable) even though callers SHOULD have already jail-checked the path.
46pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
47    if crate::core::binary_detect::is_binary_file(path) {
48        let msg = crate::core::binary_detect::binary_file_message(path);
49        return Err(std::io::Error::other(msg));
50    }
51
52    if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
53        if let Ok(cwd) = std::env::current_dir() {
54            let root = crate::core::pathjail::canonicalize_or_self(&cwd);
55            if !canonical.starts_with(&root) {
56                let allow = crate::core::pathjail::allow_paths_from_env_and_config();
57                let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
58                    .ok()
59                    .is_some_and(|d| canonical.starts_with(d));
60                let tmp_ok = canonical.starts_with(std::env::temp_dir());
61                if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
62                    tracing::warn!(
63                        "defense-in-depth: path may escape project root: {}",
64                        canonical.display()
65                    );
66                }
67            }
68        }
69    }
70
71    let cap = crate::core::limits::max_read_bytes();
72    let meta = std::fs::metadata(path).map_err(|e| {
73        std::io::Error::other(format!("cannot stat file (refusing unbounded read): {e}"))
74    })?;
75    if meta.len() > cap as u64 {
76        return Err(std::io::Error::other(format!(
77            "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
78             Increase the limit or use a line-range read: mode=\"lines:1-100\"",
79            meta.len(),
80            cap
81        )));
82    }
83
84    let bytes = std::fs::read(path)?;
85    match String::from_utf8(bytes) {
86        Ok(s) => Ok(s),
87        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
88    }
89}
90
91/// Reads a file through the cache and applies the requested compression mode.
92pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
93    handle_with_options(cache, path, mode, false, crp_mode, None)
94}
95
96/// Like `handle`, but invalidates the cache first to force a fresh disk read.
97pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
98    handle_with_options(cache, path, mode, true, crp_mode, None)
99}
100
101/// Reads a file with task-aware filtering to prioritize task-relevant content.
102pub fn handle_with_task(
103    cache: &mut SessionCache,
104    path: &str,
105    mode: &str,
106    crp_mode: CrpMode,
107    task: Option<&str>,
108) -> String {
109    handle_with_options(cache, path, mode, false, crp_mode, task)
110}
111
112/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
113pub fn handle_with_task_resolved(
114    cache: &mut SessionCache,
115    path: &str,
116    mode: &str,
117    crp_mode: CrpMode,
118    task: Option<&str>,
119) -> ReadOutput {
120    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
121}
122
123/// Fresh read with task-aware filtering (invalidates cache first).
124pub fn handle_fresh_with_task(
125    cache: &mut SessionCache,
126    path: &str,
127    mode: &str,
128    crp_mode: CrpMode,
129    task: Option<&str>,
130) -> String {
131    handle_with_options(cache, path, mode, true, crp_mode, task)
132}
133
134/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
135pub fn handle_fresh_with_task_resolved(
136    cache: &mut SessionCache,
137    path: &str,
138    mode: &str,
139    crp_mode: CrpMode,
140    task: Option<&str>,
141) -> ReadOutput {
142    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
143}
144
145fn handle_with_options(
146    cache: &mut SessionCache,
147    path: &str,
148    mode: &str,
149    fresh: bool,
150    crp_mode: CrpMode,
151    task: Option<&str>,
152) -> String {
153    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
154}
155
156fn handle_with_options_resolved(
157    cache: &mut SessionCache,
158    path: &str,
159    mode: &str,
160    fresh: bool,
161    crp_mode: CrpMode,
162    task: Option<&str>,
163) -> ReadOutput {
164    let file_ref = cache.get_file_ref(path);
165    let short = protocol::shorten_path(path);
166    let ext = Path::new(path)
167        .extension()
168        .and_then(|e| e.to_str())
169        .unwrap_or("");
170
171    if fresh {
172        cache.invalidate(path);
173    }
174
175    if mode == "diff" {
176        let (out, sent) = handle_diff(cache, path, &file_ref);
177        return ReadOutput {
178            content: out,
179            resolved_mode: "diff".into(),
180            output_tokens: sent,
181        };
182    }
183
184    if mode != "full" {
185        if let Some(existing) = cache.get(path) {
186            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
187            if stale {
188                cache.invalidate(path);
189            }
190        }
191    }
192
193    if let Some(existing) = cache.get(path) {
194        if mode == "full" {
195            let (out, sent) =
196                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
197            let out = crate::core::redaction::redact_text_if_enabled(&out);
198            return ReadOutput {
199                content: out,
200                resolved_mode: "full".into(),
201                output_tokens: sent,
202            };
203        }
204        let content = existing.content.clone();
205        let original_tokens = existing.original_tokens;
206        let resolved_mode = if mode == "auto" {
207            resolve_auto_mode(path, original_tokens, task)
208        } else {
209            mode.to_string()
210        };
211        if is_cacheable_mode(&resolved_mode) {
212            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
213            if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
214                let sent = count_tokens(cached_output);
215                let out = crate::core::redaction::redact_text_if_enabled(cached_output);
216                return ReadOutput {
217                    content: out,
218                    resolved_mode,
219                    output_tokens: sent,
220                };
221            }
222        }
223        let (out, sent) = process_mode(
224            &content,
225            &resolved_mode,
226            &file_ref,
227            &short,
228            ext,
229            original_tokens,
230            crp_mode,
231            path,
232            task,
233        );
234        if is_cacheable_mode(&resolved_mode) {
235            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
236            cache.set_compressed(path, &cache_key, out.clone());
237        }
238        let out = crate::core::redaction::redact_text_if_enabled(&out);
239        return ReadOutput {
240            content: out,
241            resolved_mode,
242            output_tokens: sent,
243        };
244    }
245
246    let content = match read_file_lossy(path) {
247        Ok(c) => c,
248        Err(e) => {
249            let msg = format!("ERROR: {e}");
250            let tokens = count_tokens(&msg);
251            return ReadOutput {
252                content: msg,
253                resolved_mode: "error".into(),
254                output_tokens: tokens,
255            };
256        }
257    };
258
259    let similar_hint = find_similar_and_update_semantic_index(path, &content);
260    let graph_hint = build_graph_related_hint(path);
261
262    let store_result = cache.store(path, content.clone());
263
264    if mode == "full" {
265        cache.mark_full_delivered(path);
266        let (mut output, sent) = format_full_output(
267            &file_ref,
268            &short,
269            ext,
270            &content,
271            store_result.original_tokens,
272            store_result.line_count,
273            task,
274        );
275        if let Some(hint) = &graph_hint {
276            output.push_str(&format!("\n{hint}"));
277        }
278        if let Some(hint) = similar_hint {
279            output.push_str(&format!("\n{hint}"));
280        }
281        let output = crate::core::redaction::redact_text_if_enabled(&output);
282        return ReadOutput {
283            content: output,
284            resolved_mode: "full".into(),
285            output_tokens: sent,
286        };
287    }
288
289    let resolved_mode = if mode == "auto" {
290        resolve_auto_mode(path, store_result.original_tokens, task)
291    } else {
292        mode.to_string()
293    };
294
295    let (mut output, _sent) = process_mode(
296        &content,
297        &resolved_mode,
298        &file_ref,
299        &short,
300        ext,
301        store_result.original_tokens,
302        crp_mode,
303        path,
304        task,
305    );
306    if is_cacheable_mode(&resolved_mode) {
307        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
308        cache.set_compressed(path, &cache_key, output.clone());
309    }
310    if let Some(hint) = &graph_hint {
311        output.push_str(&format!("\n{hint}"));
312    }
313    if let Some(hint) = similar_hint {
314        output.push_str(&format!("\n{hint}"));
315    }
316    let output = crate::core::redaction::redact_text_if_enabled(&output);
317    let final_tokens = count_tokens(&output);
318    ReadOutput {
319        content: output,
320        resolved_mode,
321        output_tokens: final_tokens,
322    }
323}
324
325pub fn is_instruction_file(path: &str) -> bool {
326    let lower = path.to_lowercase();
327    let filename = std::path::Path::new(&lower)
328        .file_name()
329        .and_then(|f| f.to_str())
330        .unwrap_or("");
331
332    matches!(
333        filename,
334        "skill.md"
335            | "agents.md"
336            | "rules.md"
337            | ".cursorrules"
338            | ".clinerules"
339            | "lean-ctx.md"
340            | "lean-ctx.mdc"
341    ) || lower.contains("/skills/")
342        || lower.contains("/.cursor/rules/")
343        || lower.contains("/.claude/rules/")
344        || lower.contains("/agents.md")
345}
346
347fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
348    if is_instruction_file(file_path) {
349        return "full".to_string();
350    }
351
352    // Priority 1: Intent Router with budget/pressure-aware degradation.
353    // Only fall through to Predictor/Bandit if the router returns "auto".
354    let intent_query = task.unwrap_or("read");
355    let route = crate::core::intent_router::route_v1(intent_query);
356    let intent_mode = &route.decision.effective_read_mode;
357    if intent_mode != "auto" && intent_mode != "reference" {
358        return intent_mode.clone();
359    }
360
361    // Priority 2: FileSignature-based predictor
362    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
363    let predictor = crate::core::mode_predictor::ModePredictor::new();
364    let mut predicted = predictor
365        .predict_best_mode(&sig)
366        .unwrap_or_else(|| "full".to_string());
367    if predicted == "auto" {
368        predicted = "full".to_string();
369    }
370
371    // Priority 3: Bandit exploration when budget is tight
372    if let Some(project_root) =
373        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
374    {
375        let ext = std::path::Path::new(file_path)
376            .extension()
377            .and_then(|e| e.to_str())
378            .unwrap_or("");
379        let bucket = match original_tokens {
380            0..=2000 => "sm",
381            2001..=10000 => "md",
382            10001..=50000 => "lg",
383            _ => "xl",
384        };
385        let bandit_key = format!("{ext}_{bucket}");
386        let mut store = crate::core::bandit::BanditStore::load(&project_root);
387        let bandit = store.get_or_create(&bandit_key);
388        let arm = bandit.select_arm();
389        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
390            predicted = "aggressive".to_string();
391        }
392    }
393
394    // Priority 4: Adaptive mode policy
395    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
396    let chosen = policy.choose_auto_mode(task, &predicted);
397
398    if original_tokens > 2000 {
399        if predicted == "map" || predicted == "signatures" {
400            if chosen != "map" && chosen != "signatures" {
401                return predicted;
402            }
403        } else if chosen == "full" && predicted != "full" {
404            return predicted;
405        }
406    }
407
408    chosen
409}
410
411fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
412    let cfg = crate::core::config::Config::load();
413    let profile = crate::core::config::MemoryProfile::effective(&cfg);
414    if !profile.semantic_cache_enabled() {
415        return None;
416    }
417
418    let project_root = detect_project_root(path);
419    let session_id = format!("{}", std::process::id());
420    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
421
422    let similar = index.find_similar(content, 0.7);
423    let relevant: Vec<_> = similar
424        .into_iter()
425        .filter(|(p, _)| p != path)
426        .take(3)
427        .collect();
428
429    index.add_file(path, content, &session_id);
430    let _ = index.save(&project_root);
431
432    if relevant.is_empty() {
433        return None;
434    }
435
436    let hints: Vec<String> = relevant
437        .iter()
438        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
439        .collect();
440
441    Some(format!(
442        "[semantic: {} similar file(s) in cache]\n{}",
443        relevant.len(),
444        hints.join("\n")
445    ))
446}
447
448fn detect_project_root(path: &str) -> String {
449    crate::core::protocol::detect_project_root_or_cwd(path)
450}
451
452fn build_graph_related_hint(path: &str) -> Option<String> {
453    let project_root = detect_project_root(path);
454    crate::core::graph_context::build_related_hint(path, &project_root, 5)
455}
456
457const AUTO_DELTA_THRESHOLD: f64 = 0.6;
458
459/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
460fn handle_full_with_auto_delta(
461    cache: &mut SessionCache,
462    path: &str,
463    file_ref: &str,
464    short: &str,
465    ext: &str,
466    task: Option<&str>,
467) -> (String, usize) {
468    let Ok(disk_content) = read_file_lossy(path) else {
469        cache.record_cache_hit(path);
470        let out = if let Some(existing) = cache.get(path) {
471            format!(
472                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
473                existing.read_count, existing.line_count
474            )
475        } else {
476            format!("[file read failed and no cached version available] {file_ref}={short}")
477        };
478        let sent = count_tokens(&out);
479        return (out, sent);
480    };
481
482    let old_content = cache
483        .get(path)
484        .map(|e| e.content.clone())
485        .unwrap_or_default();
486    let store_result = cache.store(path, disk_content.clone());
487
488    if store_result.was_hit {
489        if store_result.full_content_delivered {
490            let out = format!(
491                "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
492                store_result.read_count, store_result.line_count
493            );
494            let sent = count_tokens(&out);
495            return (out, sent);
496        }
497        cache.mark_full_delivered(path);
498        return format_full_output(
499            file_ref,
500            short,
501            ext,
502            &disk_content,
503            store_result.original_tokens,
504            store_result.line_count,
505            task,
506        );
507    }
508
509    let diff = compressor::diff_content(&old_content, &disk_content);
510    let diff_tokens = count_tokens(&diff);
511    let full_tokens = store_result.original_tokens;
512
513    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
514        let savings = protocol::format_savings(full_tokens, diff_tokens);
515        let out = format!(
516            "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
517            disk_content.lines().count()
518        );
519        return (out, diff_tokens);
520    }
521
522    format_full_output(
523        file_ref,
524        short,
525        ext,
526        &disk_content,
527        store_result.original_tokens,
528        store_result.line_count,
529        task,
530    )
531}
532
533fn format_full_output(
534    file_ref: &str,
535    short: &str,
536    ext: &str,
537    content: &str,
538    original_tokens: usize,
539    line_count: usize,
540    task: Option<&str>,
541) -> (String, usize) {
542    let tokens = original_tokens;
543    let metadata = build_header(file_ref, short, ext, content, line_count, true);
544
545    let mut reordered: Option<String> = None;
546    {
547        let profile = crate::core::profiles::active_profile();
548        let cfg = profile.layout;
549        if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
550            let task_str = task.unwrap_or("");
551            if !task_str.is_empty() {
552                let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
553                let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
554                    content, &keywords, &cfg,
555                );
556                if !r.skipped && r.changed {
557                    reordered = Some(r.output);
558                }
559            }
560        }
561    }
562
563    let content_for_output = reordered.as_deref().unwrap_or(content);
564
565    let mut sym = SymbolMap::new();
566    let idents = symbol_map::extract_identifiers(content_for_output, ext);
567    for ident in &idents {
568        sym.register(ident);
569    }
570
571    if sym.len() >= 3 {
572        let sym_table = sym.format_table();
573        let compressed = sym.apply(content_for_output);
574        let original_tok = count_tokens(content_for_output);
575        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
576        let net_saving = original_tok.saturating_sub(compressed_tok);
577        if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
578            let output = format!("{metadata}\n{compressed}{sym_table}");
579            let sent = count_tokens(&output);
580            let savings = protocol::format_savings(tokens, sent);
581            return (format!("{output}\n{savings}"), sent);
582        }
583    }
584
585    let output = format!("{metadata}\n{content_for_output}");
586    let sent = count_tokens(&output);
587    let savings = protocol::format_savings(tokens, sent);
588    (format!("{output}\n{savings}"), sent)
589}
590
591fn build_header(
592    file_ref: &str,
593    short: &str,
594    ext: &str,
595    content: &str,
596    line_count: usize,
597    include_deps: bool,
598) -> String {
599    let mut header = format!("{file_ref}={short} {line_count}L");
600
601    if include_deps {
602        let dep_info = deps::extract_deps(content, ext);
603        if !dep_info.imports.is_empty() {
604            let imports_str: Vec<&str> = dep_info
605                .imports
606                .iter()
607                .take(8)
608                .map(std::string::String::as_str)
609                .collect();
610            header.push_str(&format!("\n deps {}", imports_str.join(",")));
611        }
612        if !dep_info.exports.is_empty() {
613            let exports_str: Vec<&str> = dep_info
614                .exports
615                .iter()
616                .take(8)
617                .map(std::string::String::as_str)
618                .collect();
619            header.push_str(&format!("\n exports {}", exports_str.join(",")));
620        }
621    }
622
623    header
624}
625
626#[allow(clippy::too_many_arguments)]
627fn process_mode(
628    content: &str,
629    mode: &str,
630    file_ref: &str,
631    short: &str,
632    ext: &str,
633    original_tokens: usize,
634    crp_mode: CrpMode,
635    file_path: &str,
636    task: Option<&str>,
637) -> (String, usize) {
638    let line_count = content.lines().count();
639
640    match mode {
641        "auto" => {
642            let chosen = resolve_auto_mode(file_path, original_tokens, task);
643            process_mode(
644                content,
645                &chosen,
646                file_ref,
647                short,
648                ext,
649                original_tokens,
650                crp_mode,
651                file_path,
652                task,
653            )
654        }
655        "full" => format_full_output(
656            file_ref,
657            short,
658            ext,
659            content,
660            original_tokens,
661            line_count,
662            task,
663        ),
664        "signatures" => {
665            let sigs = signatures::extract_signatures(content, ext);
666            let dep_info = deps::extract_deps(content, ext);
667
668            let mut output = format!("{file_ref}={short} {line_count}L");
669            if !dep_info.imports.is_empty() {
670                let imports_str: Vec<&str> = dep_info
671                    .imports
672                    .iter()
673                    .take(8)
674                    .map(std::string::String::as_str)
675                    .collect();
676                output.push_str(&format!("\n deps {}", imports_str.join(",")));
677            }
678            for sig in &sigs {
679                output.push('\n');
680                if crp_mode.is_tdd() {
681                    output.push_str(&sig.to_tdd());
682                } else {
683                    output.push_str(&sig.to_compact());
684                }
685            }
686            let sent = count_tokens(&output);
687            let savings = protocol::format_savings(original_tokens, sent);
688            (
689                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
690                sent,
691            )
692        }
693        "map" => {
694            if ext == "php" {
695                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
696                {
697                    let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
698                    let sent = count_tokens(&output);
699                    let savings = protocol::format_savings(original_tokens, sent);
700                    output.push('\n');
701                    output.push_str(&savings);
702                    return (append_compressed_hint(&output, file_path), sent);
703                }
704            }
705
706            let sigs = signatures::extract_signatures(content, ext);
707            let dep_info = deps::extract_deps(content, ext);
708
709            let mut output = format!("{file_ref}={short} {line_count}L");
710
711            if !dep_info.imports.is_empty() {
712                output.push_str("\n  deps: ");
713                output.push_str(&dep_info.imports.join(", "));
714            }
715
716            if !dep_info.exports.is_empty() {
717                output.push_str("\n  exports: ");
718                output.push_str(&dep_info.exports.join(", "));
719            }
720
721            let key_sigs: Vec<&signatures::Signature> = sigs
722                .iter()
723                .filter(|s| s.is_exported || s.indent == 0)
724                .collect();
725
726            if !key_sigs.is_empty() {
727                output.push_str("\n  API:");
728                for sig in &key_sigs {
729                    output.push_str("\n    ");
730                    if crp_mode.is_tdd() {
731                        output.push_str(&sig.to_tdd());
732                    } else {
733                        output.push_str(&sig.to_compact());
734                    }
735                }
736            }
737
738            let sent = count_tokens(&output);
739            let savings = protocol::format_savings(original_tokens, sent);
740            (
741                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
742                sent,
743            )
744        }
745        "aggressive" => {
746            #[cfg(feature = "tree-sitter")]
747            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
748            #[cfg(not(feature = "tree-sitter"))]
749            let ast_pruned: Option<String> = None;
750
751            let base = ast_pruned.as_deref().unwrap_or(content);
752
753            let session_intent = crate::core::session::SessionState::load_latest()
754                .and_then(|s| s.active_structured_intent);
755            let raw = if let Some(ref intent) = session_intent {
756                compressor::task_aware_compress(base, Some(ext), intent)
757            } else {
758                compressor::aggressive_compress(base, Some(ext))
759            };
760            let compressed = compressor::safeguard_ratio(content, &raw);
761            let header = build_header(file_ref, short, ext, content, line_count, true);
762
763            let mut sym = SymbolMap::new();
764            let idents = symbol_map::extract_identifiers(&compressed, ext);
765            for ident in &idents {
766                sym.register(ident);
767            }
768
769            if sym.len() >= 3 {
770                let sym_table = sym.format_table();
771                let sym_applied = sym.apply(&compressed);
772                let orig_tok = count_tokens(&compressed);
773                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
774                let net = orig_tok.saturating_sub(comp_tok);
775                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
776                    let savings = protocol::format_savings(original_tokens, comp_tok);
777                    return (
778                        append_compressed_hint(
779                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
780                            file_path,
781                        ),
782                        comp_tok,
783                    );
784                }
785                let savings = protocol::format_savings(original_tokens, orig_tok);
786                return (
787                    append_compressed_hint(
788                        &format!("{header}\n{compressed}\n{savings}"),
789                        file_path,
790                    ),
791                    orig_tok,
792                );
793            }
794
795            let sent = count_tokens(&compressed);
796            let savings = protocol::format_savings(original_tokens, sent);
797            (
798                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
799                sent,
800            )
801        }
802        "entropy" => {
803            let result = entropy::entropy_compress_adaptive(content, file_path);
804            let avg_h = entropy::analyze_entropy(content).avg_entropy;
805            let header = build_header(file_ref, short, ext, content, line_count, false);
806            let techs = result.techniques.join(", ");
807            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
808            let sent = count_tokens(&output);
809            let savings = protocol::format_savings(original_tokens, sent);
810            let compression_ratio = if original_tokens > 0 {
811                1.0 - (sent as f64 / original_tokens as f64)
812            } else {
813                0.0
814            };
815            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
816            (
817                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
818                sent,
819            )
820        }
821        "task" => {
822            let task_str = task.unwrap_or("");
823            if task_str.is_empty() {
824                let header = build_header(file_ref, short, ext, content, line_count, true);
825                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
826                let sent = count_tokens(&out);
827                return (out, sent);
828            }
829            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
830            if keywords.is_empty() {
831                let header = build_header(file_ref, short, ext, content, line_count, true);
832                let out = format!(
833                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
834                );
835                let sent = count_tokens(&out);
836                return (out, sent);
837            }
838            let filtered =
839                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
840            let filtered_lines = filtered.lines().count();
841            let header = format!(
842                "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
843            );
844            let project_root = detect_project_root(file_path);
845            let graph_ctx = crate::core::graph_context::build_graph_context(
846                file_path,
847                &project_root,
848                Some(crate::core::graph_context::GraphContextOptions::default()),
849            )
850            .map(|c| crate::core::graph_context::format_graph_context(&c))
851            .unwrap_or_default();
852
853            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
854            let savings = protocol::format_savings(original_tokens, sent);
855            (
856                append_compressed_hint(
857                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
858                    file_path,
859                ),
860                sent,
861            )
862        }
863        "reference" => {
864            let tok = count_tokens(content);
865            let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
866            let sent = count_tokens(&output);
867            let savings = protocol::format_savings(original_tokens, sent);
868            (format!("{output}\n{savings}"), sent)
869        }
870        mode if mode.starts_with("lines:") => {
871            let range_str = &mode[6..];
872            let extracted = extract_line_range(content, range_str);
873            let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
874            let sent = count_tokens(&extracted);
875            let savings = protocol::format_savings(original_tokens, sent);
876            (format!("{header}\n{extracted}\n{savings}"), sent)
877        }
878        unknown => {
879            let header = build_header(file_ref, short, ext, content, line_count, true);
880            let out = format!(
881                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
882            );
883            let sent = count_tokens(&out);
884            (out, sent)
885        }
886    }
887}
888
889fn extract_line_range(content: &str, range_str: &str) -> String {
890    let lines: Vec<&str> = content.lines().collect();
891    let total = lines.len();
892    let mut selected = Vec::new();
893
894    for part in range_str.split(',') {
895        let part = part.trim();
896        if let Some((start_s, end_s)) = part.split_once('-') {
897            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
898            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
899            for i in start..=end {
900                if i >= 1 && i <= total {
901                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
902                }
903            }
904        } else if let Ok(n) = part.parse::<usize>() {
905            if n >= 1 && n <= total {
906                selected.push(format!("{n:>4}| {}", lines[n - 1]));
907            }
908        }
909    }
910
911    if selected.is_empty() {
912        "No lines matched the range.".to_string()
913    } else {
914        selected.join("\n")
915    }
916}
917
918fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
919    let short = protocol::shorten_path(path);
920    let old_content = cache.get(path).map(|e| e.content.clone());
921
922    let new_content = match read_file_lossy(path) {
923        Ok(c) => c,
924        Err(e) => {
925            let msg = format!("ERROR: {e}");
926            let tokens = count_tokens(&msg);
927            return (msg, tokens);
928        }
929    };
930
931    let original_tokens = count_tokens(&new_content);
932
933    let diff_output = if let Some(old) = &old_content {
934        compressor::diff_content(old, &new_content)
935    } else {
936        format!("[first read]\n{new_content}")
937    };
938
939    cache.store(path, new_content);
940
941    let sent = count_tokens(&diff_output);
942    let savings = protocol::format_savings(original_tokens, sent);
943    (
944        format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
945        sent,
946    )
947}
948
949#[cfg(test)]
950mod tests {
951    use super::*;
952    use std::time::Duration;
953
954    #[test]
955    fn test_header_toon_format_no_brackets() {
956        let content = "use std::io;\nfn main() {}\n";
957        let header = build_header("F1", "main.rs", "rs", content, 2, false);
958        assert!(!header.contains('['));
959        assert!(!header.contains(']'));
960        assert!(header.contains("F1=main.rs 2L"));
961    }
962
963    #[test]
964    fn test_header_toon_deps_indented() {
965        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
966        let header = build_header("F1", "main.rs", "rs", content, 3, true);
967        if header.contains("deps") {
968            assert!(
969                header.contains("\n deps "),
970                "deps should use indented TOON format"
971            );
972            assert!(
973                !header.contains("deps:["),
974                "deps should not use bracket format"
975            );
976        }
977    }
978
979    #[test]
980    fn test_header_toon_saves_tokens() {
981        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
982        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
983        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
984        let old_tokens = count_tokens(&old_header);
985        let new_tokens = count_tokens(&new_header);
986        assert!(
987            new_tokens <= old_tokens,
988            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
989        );
990    }
991
992    #[test]
993    fn test_tdd_symbols_are_compact() {
994        let symbols = [
995            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
996        ];
997        for sym in &symbols {
998            let tok = count_tokens(sym);
999            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1000        }
1001    }
1002
1003    #[test]
1004    fn test_task_mode_filters_content() {
1005        let content = (0..200)
1006            .map(|i| {
1007                if i % 20 == 0 {
1008                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1009                } else {
1010                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1011                }
1012            })
1013            .collect::<Vec<_>>()
1014            .join("\n");
1015        let full_tokens = count_tokens(&content);
1016        let task = Some("fix bug in validate_token");
1017        let (result, result_tokens) = process_mode(
1018            &content,
1019            "task",
1020            "F1",
1021            "test.rs",
1022            "rs",
1023            full_tokens,
1024            CrpMode::Off,
1025            "test.rs",
1026            task,
1027        );
1028        assert!(
1029            result_tokens < full_tokens,
1030            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1031        );
1032        assert!(
1033            result.contains("task-filtered"),
1034            "output should contain task-filtered marker"
1035        );
1036    }
1037
1038    #[test]
1039    fn test_task_mode_without_task_returns_full() {
1040        let content = "fn main() {}\nfn helper() {}\n";
1041        let tokens = count_tokens(content);
1042        let (result, _sent) = process_mode(
1043            content,
1044            "task",
1045            "F1",
1046            "test.rs",
1047            "rs",
1048            tokens,
1049            CrpMode::Off,
1050            "test.rs",
1051            None,
1052        );
1053        assert!(
1054            result.contains("no task set"),
1055            "should indicate no task: {result}"
1056        );
1057    }
1058
1059    #[test]
1060    fn test_reference_mode_one_line() {
1061        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1062        let tokens = count_tokens(content);
1063        let (result, _sent) = process_mode(
1064            content,
1065            "reference",
1066            "F1",
1067            "test.rs",
1068            "rs",
1069            tokens,
1070            CrpMode::Off,
1071            "test.rs",
1072            None,
1073        );
1074        let lines: Vec<&str> = result.lines().collect();
1075        assert!(
1076            lines.len() <= 3,
1077            "reference mode should be very compact, got {} lines",
1078            lines.len()
1079        );
1080        assert!(result.contains("lines"), "should contain line count");
1081        assert!(result.contains("tok"), "should contain token count");
1082    }
1083
1084    #[test]
1085    fn cached_lines_mode_invalidates_on_mtime_change() {
1086        let dir = tempfile::tempdir().unwrap();
1087        let path = dir.path().join("file.txt");
1088        let p = path.to_string_lossy().to_string();
1089
1090        std::fs::write(&path, "one\nsecond\n").unwrap();
1091        let mut cache = SessionCache::new();
1092
1093        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1094        let l1: Vec<&str> = r1.content.lines().collect();
1095        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1096        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1097        assert_eq!(got1, "one");
1098
1099        std::thread::sleep(Duration::from_secs(1));
1100        std::fs::write(&path, "two\nsecond\n").unwrap();
1101
1102        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1103        let l2: Vec<&str> = r2.content.lines().collect();
1104        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1105        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1106        assert_eq!(got2, "two");
1107    }
1108
1109    #[test]
1110    #[cfg_attr(tarpaulin, ignore)]
1111    fn benchmark_task_conditioned_compression() {
1112        // Keep this reasonably small so CI coverage instrumentation stays fast.
1113        let content = generate_benchmark_code(200);
1114        let full_tokens = count_tokens(&content);
1115        let task = Some("fix authentication in validate_token");
1116
1117        let (_full_output, full_tok) = process_mode(
1118            &content,
1119            "full",
1120            "F1",
1121            "server.rs",
1122            "rs",
1123            full_tokens,
1124            CrpMode::Off,
1125            "server.rs",
1126            task,
1127        );
1128        let (_task_output, task_tok) = process_mode(
1129            &content,
1130            "task",
1131            "F1",
1132            "server.rs",
1133            "rs",
1134            full_tokens,
1135            CrpMode::Off,
1136            "server.rs",
1137            task,
1138        );
1139        let (_sig_output, sig_tok) = process_mode(
1140            &content,
1141            "signatures",
1142            "F1",
1143            "server.rs",
1144            "rs",
1145            full_tokens,
1146            CrpMode::Off,
1147            "server.rs",
1148            task,
1149        );
1150        let (_ref_output, ref_tok) = process_mode(
1151            &content,
1152            "reference",
1153            "F1",
1154            "server.rs",
1155            "rs",
1156            full_tokens,
1157            CrpMode::Off,
1158            "server.rs",
1159            task,
1160        );
1161
1162        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1163        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1164        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1165        eprintln!(
1166            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1167            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1168        );
1169        eprintln!(
1170            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1171            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1172        );
1173        eprintln!(
1174            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1175            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1176        );
1177        eprintln!("================================================\n");
1178
1179        assert!(task_tok < full_tok, "task mode should save tokens");
1180        assert!(sig_tok < full_tok, "signatures should save tokens");
1181        assert!(ref_tok < sig_tok, "reference should be most compact");
1182    }
1183
1184    fn generate_benchmark_code(lines: usize) -> String {
1185        let mut code = Vec::with_capacity(lines);
1186        code.push("use std::collections::HashMap;".to_string());
1187        code.push("use crate::core::auth;".to_string());
1188        code.push(String::new());
1189        code.push("pub struct Server {".to_string());
1190        code.push("    config: Config,".to_string());
1191        code.push("    cache: HashMap<String, String>,".to_string());
1192        code.push("}".to_string());
1193        code.push(String::new());
1194        code.push("impl Server {".to_string());
1195        code.push(
1196            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1197                .to_string(),
1198        );
1199        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1200        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1201        code.push("            return Err(AuthError::Expired);".to_string());
1202        code.push("        }".to_string());
1203        code.push("        Ok(decoded.claims)".to_string());
1204        code.push("    }".to_string());
1205        code.push(String::new());
1206
1207        let remaining = lines.saturating_sub(code.len());
1208        for i in 0..remaining {
1209            if i % 30 == 0 {
1210                code.push(format!(
1211                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1212                ));
1213            } else if i % 30 == 29 {
1214                code.push("    }".to_string());
1215            } else {
1216                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1217            }
1218        }
1219        code.push("}".to_string());
1220        code.join("\n")
1221    }
1222
1223    #[test]
1224    fn instruction_file_detection() {
1225        assert!(is_instruction_file(
1226            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1227        ));
1228        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1229        assert!(is_instruction_file("/project/AGENTS.md"));
1230        assert!(is_instruction_file("/project/.cursorrules"));
1231        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1232        assert!(is_instruction_file("/skills/some-skill/README.md"));
1233
1234        assert!(!is_instruction_file("/project/src/main.rs"));
1235        assert!(!is_instruction_file("/project/config.json"));
1236        assert!(!is_instruction_file("/project/data/report.csv"));
1237    }
1238
1239    #[test]
1240    fn resolve_auto_mode_returns_full_for_instruction_files() {
1241        let mode = resolve_auto_mode(
1242            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1243            5000,
1244            Some("read"),
1245        );
1246        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1247
1248        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1249        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1250
1251        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1252        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1253    }
1254}