Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40    format!("{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\")")
41}
42
43/// Reads a file as UTF-8 with lossy fallback, enforcing the max read size limit.
44pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
45    let cap = crate::core::limits::max_read_bytes();
46    if let Ok(meta) = std::fs::metadata(path) {
47        if meta.len() > cap as u64 {
48            return Err(std::io::Error::other(format!(
49                "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
50                meta.len(),
51                cap
52            )));
53        }
54    }
55    let bytes = std::fs::read(path)?;
56    match String::from_utf8(bytes) {
57        Ok(s) => Ok(s),
58        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
59    }
60}
61
62/// Reads a file through the cache and applies the requested compression mode.
63pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
64    handle_with_options(cache, path, mode, false, crp_mode, None)
65}
66
67/// Like `handle`, but invalidates the cache first to force a fresh disk read.
68pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
69    handle_with_options(cache, path, mode, true, crp_mode, None)
70}
71
72/// Reads a file with task-aware filtering to prioritize task-relevant content.
73pub fn handle_with_task(
74    cache: &mut SessionCache,
75    path: &str,
76    mode: &str,
77    crp_mode: CrpMode,
78    task: Option<&str>,
79) -> String {
80    handle_with_options(cache, path, mode, false, crp_mode, task)
81}
82
83/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
84pub fn handle_with_task_resolved(
85    cache: &mut SessionCache,
86    path: &str,
87    mode: &str,
88    crp_mode: CrpMode,
89    task: Option<&str>,
90) -> ReadOutput {
91    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
92}
93
94/// Fresh read with task-aware filtering (invalidates cache first).
95pub fn handle_fresh_with_task(
96    cache: &mut SessionCache,
97    path: &str,
98    mode: &str,
99    crp_mode: CrpMode,
100    task: Option<&str>,
101) -> String {
102    handle_with_options(cache, path, mode, true, crp_mode, task)
103}
104
105/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
106pub fn handle_fresh_with_task_resolved(
107    cache: &mut SessionCache,
108    path: &str,
109    mode: &str,
110    crp_mode: CrpMode,
111    task: Option<&str>,
112) -> ReadOutput {
113    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
114}
115
116fn handle_with_options(
117    cache: &mut SessionCache,
118    path: &str,
119    mode: &str,
120    fresh: bool,
121    crp_mode: CrpMode,
122    task: Option<&str>,
123) -> String {
124    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
125}
126
127fn handle_with_options_resolved(
128    cache: &mut SessionCache,
129    path: &str,
130    mode: &str,
131    fresh: bool,
132    crp_mode: CrpMode,
133    task: Option<&str>,
134) -> ReadOutput {
135    let file_ref = cache.get_file_ref(path);
136    let short = protocol::shorten_path(path);
137    let ext = Path::new(path)
138        .extension()
139        .and_then(|e| e.to_str())
140        .unwrap_or("");
141
142    if fresh {
143        cache.invalidate(path);
144    }
145
146    if mode == "diff" {
147        let (out, sent) = handle_diff(cache, path, &file_ref);
148        return ReadOutput {
149            content: out,
150            resolved_mode: "diff".into(),
151            output_tokens: sent,
152        };
153    }
154
155    if mode != "full" {
156        if let Some(existing) = cache.get(path) {
157            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
158            if stale {
159                cache.invalidate(path);
160            }
161        }
162    }
163
164    if let Some(existing) = cache.get(path) {
165        if mode == "full" {
166            let (out, sent) =
167                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
168            let out = crate::core::redaction::redact_text_if_enabled(&out);
169            return ReadOutput {
170                content: out,
171                resolved_mode: "full".into(),
172                output_tokens: sent,
173            };
174        }
175        let content = existing.content.clone();
176        let original_tokens = existing.original_tokens;
177        let resolved_mode = if mode == "auto" {
178            resolve_auto_mode(path, original_tokens, task)
179        } else {
180            mode.to_string()
181        };
182        if is_cacheable_mode(&resolved_mode) {
183            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
184            if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
185                let sent = count_tokens(cached_output);
186                let out = crate::core::redaction::redact_text_if_enabled(cached_output);
187                return ReadOutput {
188                    content: out,
189                    resolved_mode,
190                    output_tokens: sent,
191                };
192            }
193        }
194        let (out, sent) = process_mode(
195            &content,
196            &resolved_mode,
197            &file_ref,
198            &short,
199            ext,
200            original_tokens,
201            crp_mode,
202            path,
203            task,
204        );
205        if is_cacheable_mode(&resolved_mode) {
206            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
207            cache.set_compressed(path, &cache_key, out.clone());
208        }
209        let out = crate::core::redaction::redact_text_if_enabled(&out);
210        return ReadOutput {
211            content: out,
212            resolved_mode,
213            output_tokens: sent,
214        };
215    }
216
217    let content = match read_file_lossy(path) {
218        Ok(c) => c,
219        Err(e) => {
220            let msg = format!("ERROR: {e}");
221            let tokens = count_tokens(&msg);
222            return ReadOutput {
223                content: msg,
224                resolved_mode: "error".into(),
225                output_tokens: tokens,
226            };
227        }
228    };
229
230    let similar_hint = find_similar_and_update_semantic_index(path, &content);
231    let graph_hint = build_graph_related_hint(path);
232
233    let store_result = cache.store(path, content.clone());
234
235    if mode == "full" {
236        cache.mark_full_delivered(path);
237        let (mut output, sent) = format_full_output(
238            &file_ref,
239            &short,
240            ext,
241            &content,
242            store_result.original_tokens,
243            store_result.line_count,
244            task,
245        );
246        if let Some(hint) = &graph_hint {
247            output.push_str(&format!("\n{hint}"));
248        }
249        if let Some(hint) = similar_hint {
250            output.push_str(&format!("\n{hint}"));
251        }
252        let output = crate::core::redaction::redact_text_if_enabled(&output);
253        return ReadOutput {
254            content: output,
255            resolved_mode: "full".into(),
256            output_tokens: sent,
257        };
258    }
259
260    let resolved_mode = if mode == "auto" {
261        resolve_auto_mode(path, store_result.original_tokens, task)
262    } else {
263        mode.to_string()
264    };
265
266    let (mut output, _sent) = process_mode(
267        &content,
268        &resolved_mode,
269        &file_ref,
270        &short,
271        ext,
272        store_result.original_tokens,
273        crp_mode,
274        path,
275        task,
276    );
277    if is_cacheable_mode(&resolved_mode) {
278        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
279        cache.set_compressed(path, &cache_key, output.clone());
280    }
281    if let Some(hint) = &graph_hint {
282        output.push_str(&format!("\n{hint}"));
283    }
284    if let Some(hint) = similar_hint {
285        output.push_str(&format!("\n{hint}"));
286    }
287    let output = crate::core::redaction::redact_text_if_enabled(&output);
288    let final_tokens = count_tokens(&output);
289    ReadOutput {
290        content: output,
291        resolved_mode,
292        output_tokens: final_tokens,
293    }
294}
295
296pub fn is_instruction_file(path: &str) -> bool {
297    let lower = path.to_lowercase();
298    let filename = std::path::Path::new(&lower)
299        .file_name()
300        .and_then(|f| f.to_str())
301        .unwrap_or("");
302
303    matches!(
304        filename,
305        "skill.md"
306            | "agents.md"
307            | "rules.md"
308            | ".cursorrules"
309            | ".clinerules"
310            | "lean-ctx.md"
311            | "lean-ctx.mdc"
312    ) || lower.contains("/skills/")
313        || lower.contains("/.cursor/rules/")
314        || lower.contains("/.claude/rules/")
315        || lower.contains("/agents.md")
316}
317
318fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
319    if is_instruction_file(file_path) {
320        return "full".to_string();
321    }
322
323    // Priority 1: Intent Router with budget/pressure-aware degradation.
324    // Only fall through to Predictor/Bandit if the router returns "auto".
325    let intent_query = task.unwrap_or("read");
326    let route = crate::core::intent_router::route_v1(intent_query);
327    let intent_mode = &route.decision.effective_read_mode;
328    if intent_mode != "auto" && intent_mode != "reference" {
329        return intent_mode.clone();
330    }
331
332    // Priority 2: FileSignature-based predictor
333    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
334    let predictor = crate::core::mode_predictor::ModePredictor::new();
335    let mut predicted = predictor
336        .predict_best_mode(&sig)
337        .unwrap_or_else(|| "full".to_string());
338    if predicted == "auto" {
339        predicted = "full".to_string();
340    }
341
342    // Priority 3: Bandit exploration when budget is tight
343    if let Some(project_root) =
344        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
345    {
346        let ext = std::path::Path::new(file_path)
347            .extension()
348            .and_then(|e| e.to_str())
349            .unwrap_or("");
350        let bucket = match original_tokens {
351            0..=2000 => "sm",
352            2001..=10000 => "md",
353            10001..=50000 => "lg",
354            _ => "xl",
355        };
356        let bandit_key = format!("{ext}_{bucket}");
357        let mut store = crate::core::bandit::BanditStore::load(&project_root);
358        let bandit = store.get_or_create(&bandit_key);
359        let arm = bandit.select_arm();
360        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
361            predicted = "aggressive".to_string();
362        }
363    }
364
365    // Priority 4: Adaptive mode policy
366    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
367    let chosen = policy.choose_auto_mode(task, &predicted);
368
369    if original_tokens > 2000 {
370        if predicted == "map" || predicted == "signatures" {
371            if chosen != "map" && chosen != "signatures" {
372                return predicted;
373            }
374        } else if chosen == "full" && predicted != "full" {
375            return predicted;
376        }
377    }
378
379    chosen
380}
381
382fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
383    let cfg = crate::core::config::Config::load();
384    let profile = crate::core::config::MemoryProfile::effective(&cfg);
385    if !profile.semantic_cache_enabled() {
386        return None;
387    }
388
389    let project_root = detect_project_root(path);
390    let session_id = format!("{}", std::process::id());
391    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
392
393    let similar = index.find_similar(content, 0.7);
394    let relevant: Vec<_> = similar
395        .into_iter()
396        .filter(|(p, _)| p != path)
397        .take(3)
398        .collect();
399
400    index.add_file(path, content, &session_id);
401    let _ = index.save(&project_root);
402
403    if relevant.is_empty() {
404        return None;
405    }
406
407    let hints: Vec<String> = relevant
408        .iter()
409        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
410        .collect();
411
412    Some(format!(
413        "[semantic: {} similar file(s) in cache]\n{}",
414        relevant.len(),
415        hints.join("\n")
416    ))
417}
418
419fn detect_project_root(path: &str) -> String {
420    crate::core::protocol::detect_project_root_or_cwd(path)
421}
422
423fn build_graph_related_hint(path: &str) -> Option<String> {
424    let project_root = detect_project_root(path);
425    crate::core::graph_context::build_related_hint(path, &project_root, 5)
426}
427
428const AUTO_DELTA_THRESHOLD: f64 = 0.6;
429
430/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
431fn handle_full_with_auto_delta(
432    cache: &mut SessionCache,
433    path: &str,
434    file_ref: &str,
435    short: &str,
436    ext: &str,
437    task: Option<&str>,
438) -> (String, usize) {
439    let Ok(disk_content) = read_file_lossy(path) else {
440        cache.record_cache_hit(path);
441        let out = if let Some(existing) = cache.get(path) {
442            format!(
443                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
444                existing.read_count, existing.line_count
445            )
446        } else {
447            format!("[file read failed and no cached version available] {file_ref}={short}")
448        };
449        let sent = count_tokens(&out);
450        return (out, sent);
451    };
452
453    let old_content = cache
454        .get(path)
455        .map(|e| e.content.clone())
456        .unwrap_or_default();
457    let store_result = cache.store(path, disk_content.clone());
458
459    if store_result.was_hit {
460        if store_result.full_content_delivered {
461            let out = format!(
462                "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
463                store_result.read_count, store_result.line_count
464            );
465            let sent = count_tokens(&out);
466            return (out, sent);
467        }
468        cache.mark_full_delivered(path);
469        return format_full_output(
470            file_ref,
471            short,
472            ext,
473            &disk_content,
474            store_result.original_tokens,
475            store_result.line_count,
476            task,
477        );
478    }
479
480    let diff = compressor::diff_content(&old_content, &disk_content);
481    let diff_tokens = count_tokens(&diff);
482    let full_tokens = store_result.original_tokens;
483
484    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
485        let savings = protocol::format_savings(full_tokens, diff_tokens);
486        let out = format!(
487            "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
488            disk_content.lines().count()
489        );
490        return (out, diff_tokens);
491    }
492
493    format_full_output(
494        file_ref,
495        short,
496        ext,
497        &disk_content,
498        store_result.original_tokens,
499        store_result.line_count,
500        task,
501    )
502}
503
504fn format_full_output(
505    file_ref: &str,
506    short: &str,
507    ext: &str,
508    content: &str,
509    original_tokens: usize,
510    line_count: usize,
511    task: Option<&str>,
512) -> (String, usize) {
513    let tokens = original_tokens;
514    let metadata = build_header(file_ref, short, ext, content, line_count, true);
515
516    let mut reordered: Option<String> = None;
517    {
518        let profile = crate::core::profiles::active_profile();
519        let cfg = profile.layout;
520        if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
521            let task_str = task.unwrap_or("");
522            if !task_str.is_empty() {
523                let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
524                let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
525                    content, &keywords, &cfg,
526                );
527                if !r.skipped && r.changed {
528                    reordered = Some(r.output);
529                }
530            }
531        }
532    }
533
534    let content_for_output = reordered.as_deref().unwrap_or(content);
535
536    let mut sym = SymbolMap::new();
537    let idents = symbol_map::extract_identifiers(content_for_output, ext);
538    for ident in &idents {
539        sym.register(ident);
540    }
541
542    if sym.len() >= 3 {
543        let sym_table = sym.format_table();
544        let compressed = sym.apply(content_for_output);
545        let original_tok = count_tokens(content_for_output);
546        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
547        let net_saving = original_tok.saturating_sub(compressed_tok);
548        if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
549            let output = format!("{metadata}\n{compressed}{sym_table}");
550            let sent = count_tokens(&output);
551            let savings = protocol::format_savings(tokens, sent);
552            return (format!("{output}\n{savings}"), sent);
553        }
554    }
555
556    let output = format!("{metadata}\n{content_for_output}");
557    let sent = count_tokens(&output);
558    let savings = protocol::format_savings(tokens, sent);
559    (format!("{output}\n{savings}"), sent)
560}
561
562fn build_header(
563    file_ref: &str,
564    short: &str,
565    ext: &str,
566    content: &str,
567    line_count: usize,
568    include_deps: bool,
569) -> String {
570    let mut header = format!("{file_ref}={short} {line_count}L");
571
572    if include_deps {
573        let dep_info = deps::extract_deps(content, ext);
574        if !dep_info.imports.is_empty() {
575            let imports_str: Vec<&str> = dep_info
576                .imports
577                .iter()
578                .take(8)
579                .map(std::string::String::as_str)
580                .collect();
581            header.push_str(&format!("\n deps {}", imports_str.join(",")));
582        }
583        if !dep_info.exports.is_empty() {
584            let exports_str: Vec<&str> = dep_info
585                .exports
586                .iter()
587                .take(8)
588                .map(std::string::String::as_str)
589                .collect();
590            header.push_str(&format!("\n exports {}", exports_str.join(",")));
591        }
592    }
593
594    header
595}
596
597#[allow(clippy::too_many_arguments)]
598fn process_mode(
599    content: &str,
600    mode: &str,
601    file_ref: &str,
602    short: &str,
603    ext: &str,
604    original_tokens: usize,
605    crp_mode: CrpMode,
606    file_path: &str,
607    task: Option<&str>,
608) -> (String, usize) {
609    let line_count = content.lines().count();
610
611    match mode {
612        "auto" => {
613            let chosen = resolve_auto_mode(file_path, original_tokens, task);
614            process_mode(
615                content,
616                &chosen,
617                file_ref,
618                short,
619                ext,
620                original_tokens,
621                crp_mode,
622                file_path,
623                task,
624            )
625        }
626        "full" => format_full_output(
627            file_ref,
628            short,
629            ext,
630            content,
631            original_tokens,
632            line_count,
633            task,
634        ),
635        "signatures" => {
636            let sigs = signatures::extract_signatures(content, ext);
637            let dep_info = deps::extract_deps(content, ext);
638
639            let mut output = format!("{file_ref}={short} {line_count}L");
640            if !dep_info.imports.is_empty() {
641                let imports_str: Vec<&str> = dep_info
642                    .imports
643                    .iter()
644                    .take(8)
645                    .map(std::string::String::as_str)
646                    .collect();
647                output.push_str(&format!("\n deps {}", imports_str.join(",")));
648            }
649            for sig in &sigs {
650                output.push('\n');
651                if crp_mode.is_tdd() {
652                    output.push_str(&sig.to_tdd());
653                } else {
654                    output.push_str(&sig.to_compact());
655                }
656            }
657            let sent = count_tokens(&output);
658            let savings = protocol::format_savings(original_tokens, sent);
659            (
660                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
661                sent,
662            )
663        }
664        "map" => {
665            if ext == "php" {
666                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
667                {
668                    let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
669                    let sent = count_tokens(&output);
670                    let savings = protocol::format_savings(original_tokens, sent);
671                    output.push('\n');
672                    output.push_str(&savings);
673                    return (append_compressed_hint(&output, file_path), sent);
674                }
675            }
676
677            let sigs = signatures::extract_signatures(content, ext);
678            let dep_info = deps::extract_deps(content, ext);
679
680            let mut output = format!("{file_ref}={short} {line_count}L");
681
682            if !dep_info.imports.is_empty() {
683                output.push_str("\n  deps: ");
684                output.push_str(&dep_info.imports.join(", "));
685            }
686
687            if !dep_info.exports.is_empty() {
688                output.push_str("\n  exports: ");
689                output.push_str(&dep_info.exports.join(", "));
690            }
691
692            let key_sigs: Vec<&signatures::Signature> = sigs
693                .iter()
694                .filter(|s| s.is_exported || s.indent == 0)
695                .collect();
696
697            if !key_sigs.is_empty() {
698                output.push_str("\n  API:");
699                for sig in &key_sigs {
700                    output.push_str("\n    ");
701                    if crp_mode.is_tdd() {
702                        output.push_str(&sig.to_tdd());
703                    } else {
704                        output.push_str(&sig.to_compact());
705                    }
706                }
707            }
708
709            let sent = count_tokens(&output);
710            let savings = protocol::format_savings(original_tokens, sent);
711            (
712                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
713                sent,
714            )
715        }
716        "aggressive" => {
717            #[cfg(feature = "tree-sitter")]
718            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
719            #[cfg(not(feature = "tree-sitter"))]
720            let ast_pruned: Option<String> = None;
721
722            let base = ast_pruned.as_deref().unwrap_or(content);
723
724            let session_intent = crate::core::session::SessionState::load_latest()
725                .and_then(|s| s.active_structured_intent);
726            let raw = if let Some(ref intent) = session_intent {
727                compressor::task_aware_compress(base, Some(ext), intent)
728            } else {
729                compressor::aggressive_compress(base, Some(ext))
730            };
731            let compressed = compressor::safeguard_ratio(content, &raw);
732            let header = build_header(file_ref, short, ext, content, line_count, true);
733
734            let mut sym = SymbolMap::new();
735            let idents = symbol_map::extract_identifiers(&compressed, ext);
736            for ident in &idents {
737                sym.register(ident);
738            }
739
740            if sym.len() >= 3 {
741                let sym_table = sym.format_table();
742                let sym_applied = sym.apply(&compressed);
743                let orig_tok = count_tokens(&compressed);
744                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
745                let net = orig_tok.saturating_sub(comp_tok);
746                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
747                    let savings = protocol::format_savings(original_tokens, comp_tok);
748                    return (
749                        append_compressed_hint(
750                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
751                            file_path,
752                        ),
753                        comp_tok,
754                    );
755                }
756                let savings = protocol::format_savings(original_tokens, orig_tok);
757                return (
758                    append_compressed_hint(
759                        &format!("{header}\n{compressed}\n{savings}"),
760                        file_path,
761                    ),
762                    orig_tok,
763                );
764            }
765
766            let sent = count_tokens(&compressed);
767            let savings = protocol::format_savings(original_tokens, sent);
768            (
769                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
770                sent,
771            )
772        }
773        "entropy" => {
774            let result = entropy::entropy_compress_adaptive(content, file_path);
775            let avg_h = entropy::analyze_entropy(content).avg_entropy;
776            let header = build_header(file_ref, short, ext, content, line_count, false);
777            let techs = result.techniques.join(", ");
778            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
779            let sent = count_tokens(&output);
780            let savings = protocol::format_savings(original_tokens, sent);
781            let compression_ratio = if original_tokens > 0 {
782                1.0 - (sent as f64 / original_tokens as f64)
783            } else {
784                0.0
785            };
786            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
787            (
788                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
789                sent,
790            )
791        }
792        "task" => {
793            let task_str = task.unwrap_or("");
794            if task_str.is_empty() {
795                let header = build_header(file_ref, short, ext, content, line_count, true);
796                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
797                let sent = count_tokens(&out);
798                return (out, sent);
799            }
800            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
801            if keywords.is_empty() {
802                let header = build_header(file_ref, short, ext, content, line_count, true);
803                let out = format!(
804                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
805                );
806                let sent = count_tokens(&out);
807                return (out, sent);
808            }
809            let filtered =
810                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
811            let filtered_lines = filtered.lines().count();
812            let header = format!(
813                "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
814            );
815            let project_root = detect_project_root(file_path);
816            let graph_ctx = crate::core::graph_context::build_graph_context(
817                file_path,
818                &project_root,
819                Some(crate::core::graph_context::GraphContextOptions::default()),
820            )
821            .map(|c| crate::core::graph_context::format_graph_context(&c))
822            .unwrap_or_default();
823
824            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
825            let savings = protocol::format_savings(original_tokens, sent);
826            (
827                append_compressed_hint(
828                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
829                    file_path,
830                ),
831                sent,
832            )
833        }
834        "reference" => {
835            let tok = count_tokens(content);
836            let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
837            let sent = count_tokens(&output);
838            let savings = protocol::format_savings(original_tokens, sent);
839            (format!("{output}\n{savings}"), sent)
840        }
841        mode if mode.starts_with("lines:") => {
842            let range_str = &mode[6..];
843            let extracted = extract_line_range(content, range_str);
844            let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
845            let sent = count_tokens(&extracted);
846            let savings = protocol::format_savings(original_tokens, sent);
847            (format!("{header}\n{extracted}\n{savings}"), sent)
848        }
849        unknown => {
850            let header = build_header(file_ref, short, ext, content, line_count, true);
851            let out = format!(
852                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
853            );
854            let sent = count_tokens(&out);
855            (out, sent)
856        }
857    }
858}
859
860fn extract_line_range(content: &str, range_str: &str) -> String {
861    let lines: Vec<&str> = content.lines().collect();
862    let total = lines.len();
863    let mut selected = Vec::new();
864
865    for part in range_str.split(',') {
866        let part = part.trim();
867        if let Some((start_s, end_s)) = part.split_once('-') {
868            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
869            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
870            for i in start..=end {
871                if i >= 1 && i <= total {
872                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
873                }
874            }
875        } else if let Ok(n) = part.parse::<usize>() {
876            if n >= 1 && n <= total {
877                selected.push(format!("{n:>4}| {}", lines[n - 1]));
878            }
879        }
880    }
881
882    if selected.is_empty() {
883        "No lines matched the range.".to_string()
884    } else {
885        selected.join("\n")
886    }
887}
888
889fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
890    let short = protocol::shorten_path(path);
891    let old_content = cache.get(path).map(|e| e.content.clone());
892
893    let new_content = match read_file_lossy(path) {
894        Ok(c) => c,
895        Err(e) => {
896            let msg = format!("ERROR: {e}");
897            let tokens = count_tokens(&msg);
898            return (msg, tokens);
899        }
900    };
901
902    let original_tokens = count_tokens(&new_content);
903
904    let diff_output = if let Some(old) = &old_content {
905        compressor::diff_content(old, &new_content)
906    } else {
907        format!("[first read]\n{new_content}")
908    };
909
910    cache.store(path, new_content);
911
912    let sent = count_tokens(&diff_output);
913    let savings = protocol::format_savings(original_tokens, sent);
914    (
915        format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
916        sent,
917    )
918}
919
920#[cfg(test)]
921mod tests {
922    use super::*;
923    use std::time::Duration;
924
925    #[test]
926    fn test_header_toon_format_no_brackets() {
927        let content = "use std::io;\nfn main() {}\n";
928        let header = build_header("F1", "main.rs", "rs", content, 2, false);
929        assert!(!header.contains('['));
930        assert!(!header.contains(']'));
931        assert!(header.contains("F1=main.rs 2L"));
932    }
933
934    #[test]
935    fn test_header_toon_deps_indented() {
936        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
937        let header = build_header("F1", "main.rs", "rs", content, 3, true);
938        if header.contains("deps") {
939            assert!(
940                header.contains("\n deps "),
941                "deps should use indented TOON format"
942            );
943            assert!(
944                !header.contains("deps:["),
945                "deps should not use bracket format"
946            );
947        }
948    }
949
950    #[test]
951    fn test_header_toon_saves_tokens() {
952        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
953        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
954        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
955        let old_tokens = count_tokens(&old_header);
956        let new_tokens = count_tokens(&new_header);
957        assert!(
958            new_tokens <= old_tokens,
959            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
960        );
961    }
962
963    #[test]
964    fn test_tdd_symbols_are_compact() {
965        let symbols = [
966            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
967        ];
968        for sym in &symbols {
969            let tok = count_tokens(sym);
970            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
971        }
972    }
973
974    #[test]
975    fn test_task_mode_filters_content() {
976        let content = (0..200)
977            .map(|i| {
978                if i % 20 == 0 {
979                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
980                } else {
981                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
982                }
983            })
984            .collect::<Vec<_>>()
985            .join("\n");
986        let full_tokens = count_tokens(&content);
987        let task = Some("fix bug in validate_token");
988        let (result, result_tokens) = process_mode(
989            &content,
990            "task",
991            "F1",
992            "test.rs",
993            "rs",
994            full_tokens,
995            CrpMode::Off,
996            "test.rs",
997            task,
998        );
999        assert!(
1000            result_tokens < full_tokens,
1001            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1002        );
1003        assert!(
1004            result.contains("task-filtered"),
1005            "output should contain task-filtered marker"
1006        );
1007    }
1008
1009    #[test]
1010    fn test_task_mode_without_task_returns_full() {
1011        let content = "fn main() {}\nfn helper() {}\n";
1012        let tokens = count_tokens(content);
1013        let (result, _sent) = process_mode(
1014            content,
1015            "task",
1016            "F1",
1017            "test.rs",
1018            "rs",
1019            tokens,
1020            CrpMode::Off,
1021            "test.rs",
1022            None,
1023        );
1024        assert!(
1025            result.contains("no task set"),
1026            "should indicate no task: {result}"
1027        );
1028    }
1029
1030    #[test]
1031    fn test_reference_mode_one_line() {
1032        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1033        let tokens = count_tokens(content);
1034        let (result, _sent) = process_mode(
1035            content,
1036            "reference",
1037            "F1",
1038            "test.rs",
1039            "rs",
1040            tokens,
1041            CrpMode::Off,
1042            "test.rs",
1043            None,
1044        );
1045        let lines: Vec<&str> = result.lines().collect();
1046        assert!(
1047            lines.len() <= 3,
1048            "reference mode should be very compact, got {} lines",
1049            lines.len()
1050        );
1051        assert!(result.contains("lines"), "should contain line count");
1052        assert!(result.contains("tok"), "should contain token count");
1053    }
1054
1055    #[test]
1056    fn cached_lines_mode_invalidates_on_mtime_change() {
1057        let dir = tempfile::tempdir().unwrap();
1058        let path = dir.path().join("file.txt");
1059        let p = path.to_string_lossy().to_string();
1060
1061        std::fs::write(&path, "one\nsecond\n").unwrap();
1062        let mut cache = SessionCache::new();
1063
1064        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1065        let l1: Vec<&str> = r1.content.lines().collect();
1066        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1067        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1068        assert_eq!(got1, "one");
1069
1070        std::thread::sleep(Duration::from_secs(1));
1071        std::fs::write(&path, "two\nsecond\n").unwrap();
1072
1073        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1074        let l2: Vec<&str> = r2.content.lines().collect();
1075        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1076        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1077        assert_eq!(got2, "two");
1078    }
1079
1080    #[test]
1081    #[cfg_attr(tarpaulin, ignore)]
1082    fn benchmark_task_conditioned_compression() {
1083        // Keep this reasonably small so CI coverage instrumentation stays fast.
1084        let content = generate_benchmark_code(200);
1085        let full_tokens = count_tokens(&content);
1086        let task = Some("fix authentication in validate_token");
1087
1088        let (_full_output, full_tok) = process_mode(
1089            &content,
1090            "full",
1091            "F1",
1092            "server.rs",
1093            "rs",
1094            full_tokens,
1095            CrpMode::Off,
1096            "server.rs",
1097            task,
1098        );
1099        let (_task_output, task_tok) = process_mode(
1100            &content,
1101            "task",
1102            "F1",
1103            "server.rs",
1104            "rs",
1105            full_tokens,
1106            CrpMode::Off,
1107            "server.rs",
1108            task,
1109        );
1110        let (_sig_output, sig_tok) = process_mode(
1111            &content,
1112            "signatures",
1113            "F1",
1114            "server.rs",
1115            "rs",
1116            full_tokens,
1117            CrpMode::Off,
1118            "server.rs",
1119            task,
1120        );
1121        let (_ref_output, ref_tok) = process_mode(
1122            &content,
1123            "reference",
1124            "F1",
1125            "server.rs",
1126            "rs",
1127            full_tokens,
1128            CrpMode::Off,
1129            "server.rs",
1130            task,
1131        );
1132
1133        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1134        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1135        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1136        eprintln!(
1137            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1138            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1139        );
1140        eprintln!(
1141            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1142            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1143        );
1144        eprintln!(
1145            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1146            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1147        );
1148        eprintln!("================================================\n");
1149
1150        assert!(task_tok < full_tok, "task mode should save tokens");
1151        assert!(sig_tok < full_tok, "signatures should save tokens");
1152        assert!(ref_tok < sig_tok, "reference should be most compact");
1153    }
1154
1155    fn generate_benchmark_code(lines: usize) -> String {
1156        let mut code = Vec::with_capacity(lines);
1157        code.push("use std::collections::HashMap;".to_string());
1158        code.push("use crate::core::auth;".to_string());
1159        code.push(String::new());
1160        code.push("pub struct Server {".to_string());
1161        code.push("    config: Config,".to_string());
1162        code.push("    cache: HashMap<String, String>,".to_string());
1163        code.push("}".to_string());
1164        code.push(String::new());
1165        code.push("impl Server {".to_string());
1166        code.push(
1167            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1168                .to_string(),
1169        );
1170        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1171        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1172        code.push("            return Err(AuthError::Expired);".to_string());
1173        code.push("        }".to_string());
1174        code.push("        Ok(decoded.claims)".to_string());
1175        code.push("    }".to_string());
1176        code.push(String::new());
1177
1178        let remaining = lines.saturating_sub(code.len());
1179        for i in 0..remaining {
1180            if i % 30 == 0 {
1181                code.push(format!(
1182                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1183                ));
1184            } else if i % 30 == 29 {
1185                code.push("    }".to_string());
1186            } else {
1187                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1188            }
1189        }
1190        code.push("}".to_string());
1191        code.join("\n")
1192    }
1193
1194    #[test]
1195    fn instruction_file_detection() {
1196        assert!(is_instruction_file(
1197            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1198        ));
1199        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1200        assert!(is_instruction_file("/project/AGENTS.md"));
1201        assert!(is_instruction_file("/project/.cursorrules"));
1202        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1203        assert!(is_instruction_file("/skills/some-skill/README.md"));
1204
1205        assert!(!is_instruction_file("/project/src/main.rs"));
1206        assert!(!is_instruction_file("/project/config.json"));
1207        assert!(!is_instruction_file("/project/data/report.csv"));
1208    }
1209
1210    #[test]
1211    fn resolve_auto_mode_returns_full_for_instruction_files() {
1212        let mode = resolve_auto_mode(
1213            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1214            5000,
1215            Some("read"),
1216        );
1217        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1218
1219        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1220        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1221
1222        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1223        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1224    }
1225}