Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40    format!("{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\")")
41}
42
43/// Reads a file as UTF-8 with lossy fallback, enforcing binary detection and max read size limit.
44pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
45    if crate::core::binary_detect::is_binary_file(path) {
46        let msg = crate::core::binary_detect::binary_file_message(path);
47        return Err(std::io::Error::other(msg));
48    }
49
50    let cap = crate::core::limits::max_read_bytes();
51    let meta = std::fs::metadata(path).map_err(|e| {
52        std::io::Error::other(format!("cannot stat file (refusing unbounded read): {e}"))
53    })?;
54    if meta.len() > cap as u64 {
55        return Err(std::io::Error::other(format!(
56            "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
57             Increase the limit or use a line-range read: mode=\"lines:1-100\"",
58            meta.len(),
59            cap
60        )));
61    }
62
63    let bytes = std::fs::read(path)?;
64    match String::from_utf8(bytes) {
65        Ok(s) => Ok(s),
66        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
67    }
68}
69
70/// Reads a file through the cache and applies the requested compression mode.
71pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
72    handle_with_options(cache, path, mode, false, crp_mode, None)
73}
74
75/// Like `handle`, but invalidates the cache first to force a fresh disk read.
76pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
77    handle_with_options(cache, path, mode, true, crp_mode, None)
78}
79
80/// Reads a file with task-aware filtering to prioritize task-relevant content.
81pub fn handle_with_task(
82    cache: &mut SessionCache,
83    path: &str,
84    mode: &str,
85    crp_mode: CrpMode,
86    task: Option<&str>,
87) -> String {
88    handle_with_options(cache, path, mode, false, crp_mode, task)
89}
90
91/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
92pub fn handle_with_task_resolved(
93    cache: &mut SessionCache,
94    path: &str,
95    mode: &str,
96    crp_mode: CrpMode,
97    task: Option<&str>,
98) -> ReadOutput {
99    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
100}
101
102/// Fresh read with task-aware filtering (invalidates cache first).
103pub fn handle_fresh_with_task(
104    cache: &mut SessionCache,
105    path: &str,
106    mode: &str,
107    crp_mode: CrpMode,
108    task: Option<&str>,
109) -> String {
110    handle_with_options(cache, path, mode, true, crp_mode, task)
111}
112
113/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
114pub fn handle_fresh_with_task_resolved(
115    cache: &mut SessionCache,
116    path: &str,
117    mode: &str,
118    crp_mode: CrpMode,
119    task: Option<&str>,
120) -> ReadOutput {
121    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
122}
123
124fn handle_with_options(
125    cache: &mut SessionCache,
126    path: &str,
127    mode: &str,
128    fresh: bool,
129    crp_mode: CrpMode,
130    task: Option<&str>,
131) -> String {
132    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
133}
134
135fn handle_with_options_resolved(
136    cache: &mut SessionCache,
137    path: &str,
138    mode: &str,
139    fresh: bool,
140    crp_mode: CrpMode,
141    task: Option<&str>,
142) -> ReadOutput {
143    let file_ref = cache.get_file_ref(path);
144    let short = protocol::shorten_path(path);
145    let ext = Path::new(path)
146        .extension()
147        .and_then(|e| e.to_str())
148        .unwrap_or("");
149
150    if fresh {
151        cache.invalidate(path);
152    }
153
154    if mode == "diff" {
155        let (out, sent) = handle_diff(cache, path, &file_ref);
156        return ReadOutput {
157            content: out,
158            resolved_mode: "diff".into(),
159            output_tokens: sent,
160        };
161    }
162
163    if mode != "full" {
164        if let Some(existing) = cache.get(path) {
165            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
166            if stale {
167                cache.invalidate(path);
168            }
169        }
170    }
171
172    if let Some(existing) = cache.get(path) {
173        if mode == "full" {
174            let (out, sent) =
175                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
176            let out = crate::core::redaction::redact_text_if_enabled(&out);
177            return ReadOutput {
178                content: out,
179                resolved_mode: "full".into(),
180                output_tokens: sent,
181            };
182        }
183        let content = existing.content.clone();
184        let original_tokens = existing.original_tokens;
185        let resolved_mode = if mode == "auto" {
186            resolve_auto_mode(path, original_tokens, task)
187        } else {
188            mode.to_string()
189        };
190        if is_cacheable_mode(&resolved_mode) {
191            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
192            if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
193                let sent = count_tokens(cached_output);
194                let out = crate::core::redaction::redact_text_if_enabled(cached_output);
195                return ReadOutput {
196                    content: out,
197                    resolved_mode,
198                    output_tokens: sent,
199                };
200            }
201        }
202        let (out, sent) = process_mode(
203            &content,
204            &resolved_mode,
205            &file_ref,
206            &short,
207            ext,
208            original_tokens,
209            crp_mode,
210            path,
211            task,
212        );
213        if is_cacheable_mode(&resolved_mode) {
214            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
215            cache.set_compressed(path, &cache_key, out.clone());
216        }
217        let out = crate::core::redaction::redact_text_if_enabled(&out);
218        return ReadOutput {
219            content: out,
220            resolved_mode,
221            output_tokens: sent,
222        };
223    }
224
225    let content = match read_file_lossy(path) {
226        Ok(c) => c,
227        Err(e) => {
228            let msg = format!("ERROR: {e}");
229            let tokens = count_tokens(&msg);
230            return ReadOutput {
231                content: msg,
232                resolved_mode: "error".into(),
233                output_tokens: tokens,
234            };
235        }
236    };
237
238    let similar_hint = find_similar_and_update_semantic_index(path, &content);
239    let graph_hint = build_graph_related_hint(path);
240
241    let store_result = cache.store(path, content.clone());
242
243    if mode == "full" {
244        cache.mark_full_delivered(path);
245        let (mut output, sent) = format_full_output(
246            &file_ref,
247            &short,
248            ext,
249            &content,
250            store_result.original_tokens,
251            store_result.line_count,
252            task,
253        );
254        if let Some(hint) = &graph_hint {
255            output.push_str(&format!("\n{hint}"));
256        }
257        if let Some(hint) = similar_hint {
258            output.push_str(&format!("\n{hint}"));
259        }
260        let output = crate::core::redaction::redact_text_if_enabled(&output);
261        return ReadOutput {
262            content: output,
263            resolved_mode: "full".into(),
264            output_tokens: sent,
265        };
266    }
267
268    let resolved_mode = if mode == "auto" {
269        resolve_auto_mode(path, store_result.original_tokens, task)
270    } else {
271        mode.to_string()
272    };
273
274    let (mut output, _sent) = process_mode(
275        &content,
276        &resolved_mode,
277        &file_ref,
278        &short,
279        ext,
280        store_result.original_tokens,
281        crp_mode,
282        path,
283        task,
284    );
285    if is_cacheable_mode(&resolved_mode) {
286        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
287        cache.set_compressed(path, &cache_key, output.clone());
288    }
289    if let Some(hint) = &graph_hint {
290        output.push_str(&format!("\n{hint}"));
291    }
292    if let Some(hint) = similar_hint {
293        output.push_str(&format!("\n{hint}"));
294    }
295    let output = crate::core::redaction::redact_text_if_enabled(&output);
296    let final_tokens = count_tokens(&output);
297    ReadOutput {
298        content: output,
299        resolved_mode,
300        output_tokens: final_tokens,
301    }
302}
303
304pub fn is_instruction_file(path: &str) -> bool {
305    let lower = path.to_lowercase();
306    let filename = std::path::Path::new(&lower)
307        .file_name()
308        .and_then(|f| f.to_str())
309        .unwrap_or("");
310
311    matches!(
312        filename,
313        "skill.md"
314            | "agents.md"
315            | "rules.md"
316            | ".cursorrules"
317            | ".clinerules"
318            | "lean-ctx.md"
319            | "lean-ctx.mdc"
320    ) || lower.contains("/skills/")
321        || lower.contains("/.cursor/rules/")
322        || lower.contains("/.claude/rules/")
323        || lower.contains("/agents.md")
324}
325
326fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
327    if is_instruction_file(file_path) {
328        return "full".to_string();
329    }
330
331    // Priority 1: Intent Router with budget/pressure-aware degradation.
332    // Only fall through to Predictor/Bandit if the router returns "auto".
333    let intent_query = task.unwrap_or("read");
334    let route = crate::core::intent_router::route_v1(intent_query);
335    let intent_mode = &route.decision.effective_read_mode;
336    if intent_mode != "auto" && intent_mode != "reference" {
337        return intent_mode.clone();
338    }
339
340    // Priority 2: FileSignature-based predictor
341    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
342    let predictor = crate::core::mode_predictor::ModePredictor::new();
343    let mut predicted = predictor
344        .predict_best_mode(&sig)
345        .unwrap_or_else(|| "full".to_string());
346    if predicted == "auto" {
347        predicted = "full".to_string();
348    }
349
350    // Priority 3: Bandit exploration when budget is tight
351    if let Some(project_root) =
352        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
353    {
354        let ext = std::path::Path::new(file_path)
355            .extension()
356            .and_then(|e| e.to_str())
357            .unwrap_or("");
358        let bucket = match original_tokens {
359            0..=2000 => "sm",
360            2001..=10000 => "md",
361            10001..=50000 => "lg",
362            _ => "xl",
363        };
364        let bandit_key = format!("{ext}_{bucket}");
365        let mut store = crate::core::bandit::BanditStore::load(&project_root);
366        let bandit = store.get_or_create(&bandit_key);
367        let arm = bandit.select_arm();
368        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
369            predicted = "aggressive".to_string();
370        }
371    }
372
373    // Priority 4: Adaptive mode policy
374    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
375    let chosen = policy.choose_auto_mode(task, &predicted);
376
377    if original_tokens > 2000 {
378        if predicted == "map" || predicted == "signatures" {
379            if chosen != "map" && chosen != "signatures" {
380                return predicted;
381            }
382        } else if chosen == "full" && predicted != "full" {
383            return predicted;
384        }
385    }
386
387    chosen
388}
389
390fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
391    let cfg = crate::core::config::Config::load();
392    let profile = crate::core::config::MemoryProfile::effective(&cfg);
393    if !profile.semantic_cache_enabled() {
394        return None;
395    }
396
397    let project_root = detect_project_root(path);
398    let session_id = format!("{}", std::process::id());
399    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
400
401    let similar = index.find_similar(content, 0.7);
402    let relevant: Vec<_> = similar
403        .into_iter()
404        .filter(|(p, _)| p != path)
405        .take(3)
406        .collect();
407
408    index.add_file(path, content, &session_id);
409    let _ = index.save(&project_root);
410
411    if relevant.is_empty() {
412        return None;
413    }
414
415    let hints: Vec<String> = relevant
416        .iter()
417        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
418        .collect();
419
420    Some(format!(
421        "[semantic: {} similar file(s) in cache]\n{}",
422        relevant.len(),
423        hints.join("\n")
424    ))
425}
426
427fn detect_project_root(path: &str) -> String {
428    crate::core::protocol::detect_project_root_or_cwd(path)
429}
430
431fn build_graph_related_hint(path: &str) -> Option<String> {
432    let project_root = detect_project_root(path);
433    crate::core::graph_context::build_related_hint(path, &project_root, 5)
434}
435
436const AUTO_DELTA_THRESHOLD: f64 = 0.6;
437
438/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
439fn handle_full_with_auto_delta(
440    cache: &mut SessionCache,
441    path: &str,
442    file_ref: &str,
443    short: &str,
444    ext: &str,
445    task: Option<&str>,
446) -> (String, usize) {
447    let Ok(disk_content) = read_file_lossy(path) else {
448        cache.record_cache_hit(path);
449        let out = if let Some(existing) = cache.get(path) {
450            format!(
451                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
452                existing.read_count, existing.line_count
453            )
454        } else {
455            format!("[file read failed and no cached version available] {file_ref}={short}")
456        };
457        let sent = count_tokens(&out);
458        return (out, sent);
459    };
460
461    let old_content = cache
462        .get(path)
463        .map(|e| e.content.clone())
464        .unwrap_or_default();
465    let store_result = cache.store(path, disk_content.clone());
466
467    if store_result.was_hit {
468        if store_result.full_content_delivered {
469            let out = format!(
470                "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
471                store_result.read_count, store_result.line_count
472            );
473            let sent = count_tokens(&out);
474            return (out, sent);
475        }
476        cache.mark_full_delivered(path);
477        return format_full_output(
478            file_ref,
479            short,
480            ext,
481            &disk_content,
482            store_result.original_tokens,
483            store_result.line_count,
484            task,
485        );
486    }
487
488    let diff = compressor::diff_content(&old_content, &disk_content);
489    let diff_tokens = count_tokens(&diff);
490    let full_tokens = store_result.original_tokens;
491
492    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
493        let savings = protocol::format_savings(full_tokens, diff_tokens);
494        let out = format!(
495            "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
496            disk_content.lines().count()
497        );
498        return (out, diff_tokens);
499    }
500
501    format_full_output(
502        file_ref,
503        short,
504        ext,
505        &disk_content,
506        store_result.original_tokens,
507        store_result.line_count,
508        task,
509    )
510}
511
512fn format_full_output(
513    file_ref: &str,
514    short: &str,
515    ext: &str,
516    content: &str,
517    original_tokens: usize,
518    line_count: usize,
519    task: Option<&str>,
520) -> (String, usize) {
521    let tokens = original_tokens;
522    let metadata = build_header(file_ref, short, ext, content, line_count, true);
523
524    let mut reordered: Option<String> = None;
525    {
526        let profile = crate::core::profiles::active_profile();
527        let cfg = profile.layout;
528        if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
529            let task_str = task.unwrap_or("");
530            if !task_str.is_empty() {
531                let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
532                let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
533                    content, &keywords, &cfg,
534                );
535                if !r.skipped && r.changed {
536                    reordered = Some(r.output);
537                }
538            }
539        }
540    }
541
542    let content_for_output = reordered.as_deref().unwrap_or(content);
543
544    let mut sym = SymbolMap::new();
545    let idents = symbol_map::extract_identifiers(content_for_output, ext);
546    for ident in &idents {
547        sym.register(ident);
548    }
549
550    if sym.len() >= 3 {
551        let sym_table = sym.format_table();
552        let compressed = sym.apply(content_for_output);
553        let original_tok = count_tokens(content_for_output);
554        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
555        let net_saving = original_tok.saturating_sub(compressed_tok);
556        if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
557            let output = format!("{metadata}\n{compressed}{sym_table}");
558            let sent = count_tokens(&output);
559            let savings = protocol::format_savings(tokens, sent);
560            return (format!("{output}\n{savings}"), sent);
561        }
562    }
563
564    let output = format!("{metadata}\n{content_for_output}");
565    let sent = count_tokens(&output);
566    let savings = protocol::format_savings(tokens, sent);
567    (format!("{output}\n{savings}"), sent)
568}
569
570fn build_header(
571    file_ref: &str,
572    short: &str,
573    ext: &str,
574    content: &str,
575    line_count: usize,
576    include_deps: bool,
577) -> String {
578    let mut header = format!("{file_ref}={short} {line_count}L");
579
580    if include_deps {
581        let dep_info = deps::extract_deps(content, ext);
582        if !dep_info.imports.is_empty() {
583            let imports_str: Vec<&str> = dep_info
584                .imports
585                .iter()
586                .take(8)
587                .map(std::string::String::as_str)
588                .collect();
589            header.push_str(&format!("\n deps {}", imports_str.join(",")));
590        }
591        if !dep_info.exports.is_empty() {
592            let exports_str: Vec<&str> = dep_info
593                .exports
594                .iter()
595                .take(8)
596                .map(std::string::String::as_str)
597                .collect();
598            header.push_str(&format!("\n exports {}", exports_str.join(",")));
599        }
600    }
601
602    header
603}
604
605#[allow(clippy::too_many_arguments)]
606fn process_mode(
607    content: &str,
608    mode: &str,
609    file_ref: &str,
610    short: &str,
611    ext: &str,
612    original_tokens: usize,
613    crp_mode: CrpMode,
614    file_path: &str,
615    task: Option<&str>,
616) -> (String, usize) {
617    let line_count = content.lines().count();
618
619    match mode {
620        "auto" => {
621            let chosen = resolve_auto_mode(file_path, original_tokens, task);
622            process_mode(
623                content,
624                &chosen,
625                file_ref,
626                short,
627                ext,
628                original_tokens,
629                crp_mode,
630                file_path,
631                task,
632            )
633        }
634        "full" => format_full_output(
635            file_ref,
636            short,
637            ext,
638            content,
639            original_tokens,
640            line_count,
641            task,
642        ),
643        "signatures" => {
644            let sigs = signatures::extract_signatures(content, ext);
645            let dep_info = deps::extract_deps(content, ext);
646
647            let mut output = format!("{file_ref}={short} {line_count}L");
648            if !dep_info.imports.is_empty() {
649                let imports_str: Vec<&str> = dep_info
650                    .imports
651                    .iter()
652                    .take(8)
653                    .map(std::string::String::as_str)
654                    .collect();
655                output.push_str(&format!("\n deps {}", imports_str.join(",")));
656            }
657            for sig in &sigs {
658                output.push('\n');
659                if crp_mode.is_tdd() {
660                    output.push_str(&sig.to_tdd());
661                } else {
662                    output.push_str(&sig.to_compact());
663                }
664            }
665            let sent = count_tokens(&output);
666            let savings = protocol::format_savings(original_tokens, sent);
667            (
668                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
669                sent,
670            )
671        }
672        "map" => {
673            if ext == "php" {
674                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
675                {
676                    let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
677                    let sent = count_tokens(&output);
678                    let savings = protocol::format_savings(original_tokens, sent);
679                    output.push('\n');
680                    output.push_str(&savings);
681                    return (append_compressed_hint(&output, file_path), sent);
682                }
683            }
684
685            let sigs = signatures::extract_signatures(content, ext);
686            let dep_info = deps::extract_deps(content, ext);
687
688            let mut output = format!("{file_ref}={short} {line_count}L");
689
690            if !dep_info.imports.is_empty() {
691                output.push_str("\n  deps: ");
692                output.push_str(&dep_info.imports.join(", "));
693            }
694
695            if !dep_info.exports.is_empty() {
696                output.push_str("\n  exports: ");
697                output.push_str(&dep_info.exports.join(", "));
698            }
699
700            let key_sigs: Vec<&signatures::Signature> = sigs
701                .iter()
702                .filter(|s| s.is_exported || s.indent == 0)
703                .collect();
704
705            if !key_sigs.is_empty() {
706                output.push_str("\n  API:");
707                for sig in &key_sigs {
708                    output.push_str("\n    ");
709                    if crp_mode.is_tdd() {
710                        output.push_str(&sig.to_tdd());
711                    } else {
712                        output.push_str(&sig.to_compact());
713                    }
714                }
715            }
716
717            let sent = count_tokens(&output);
718            let savings = protocol::format_savings(original_tokens, sent);
719            (
720                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
721                sent,
722            )
723        }
724        "aggressive" => {
725            #[cfg(feature = "tree-sitter")]
726            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
727            #[cfg(not(feature = "tree-sitter"))]
728            let ast_pruned: Option<String> = None;
729
730            let base = ast_pruned.as_deref().unwrap_or(content);
731
732            let session_intent = crate::core::session::SessionState::load_latest()
733                .and_then(|s| s.active_structured_intent);
734            let raw = if let Some(ref intent) = session_intent {
735                compressor::task_aware_compress(base, Some(ext), intent)
736            } else {
737                compressor::aggressive_compress(base, Some(ext))
738            };
739            let compressed = compressor::safeguard_ratio(content, &raw);
740            let header = build_header(file_ref, short, ext, content, line_count, true);
741
742            let mut sym = SymbolMap::new();
743            let idents = symbol_map::extract_identifiers(&compressed, ext);
744            for ident in &idents {
745                sym.register(ident);
746            }
747
748            if sym.len() >= 3 {
749                let sym_table = sym.format_table();
750                let sym_applied = sym.apply(&compressed);
751                let orig_tok = count_tokens(&compressed);
752                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
753                let net = orig_tok.saturating_sub(comp_tok);
754                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
755                    let savings = protocol::format_savings(original_tokens, comp_tok);
756                    return (
757                        append_compressed_hint(
758                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
759                            file_path,
760                        ),
761                        comp_tok,
762                    );
763                }
764                let savings = protocol::format_savings(original_tokens, orig_tok);
765                return (
766                    append_compressed_hint(
767                        &format!("{header}\n{compressed}\n{savings}"),
768                        file_path,
769                    ),
770                    orig_tok,
771                );
772            }
773
774            let sent = count_tokens(&compressed);
775            let savings = protocol::format_savings(original_tokens, sent);
776            (
777                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
778                sent,
779            )
780        }
781        "entropy" => {
782            let result = entropy::entropy_compress_adaptive(content, file_path);
783            let avg_h = entropy::analyze_entropy(content).avg_entropy;
784            let header = build_header(file_ref, short, ext, content, line_count, false);
785            let techs = result.techniques.join(", ");
786            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
787            let sent = count_tokens(&output);
788            let savings = protocol::format_savings(original_tokens, sent);
789            let compression_ratio = if original_tokens > 0 {
790                1.0 - (sent as f64 / original_tokens as f64)
791            } else {
792                0.0
793            };
794            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
795            (
796                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
797                sent,
798            )
799        }
800        "task" => {
801            let task_str = task.unwrap_or("");
802            if task_str.is_empty() {
803                let header = build_header(file_ref, short, ext, content, line_count, true);
804                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
805                let sent = count_tokens(&out);
806                return (out, sent);
807            }
808            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
809            if keywords.is_empty() {
810                let header = build_header(file_ref, short, ext, content, line_count, true);
811                let out = format!(
812                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
813                );
814                let sent = count_tokens(&out);
815                return (out, sent);
816            }
817            let filtered =
818                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
819            let filtered_lines = filtered.lines().count();
820            let header = format!(
821                "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
822            );
823            let project_root = detect_project_root(file_path);
824            let graph_ctx = crate::core::graph_context::build_graph_context(
825                file_path,
826                &project_root,
827                Some(crate::core::graph_context::GraphContextOptions::default()),
828            )
829            .map(|c| crate::core::graph_context::format_graph_context(&c))
830            .unwrap_or_default();
831
832            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
833            let savings = protocol::format_savings(original_tokens, sent);
834            (
835                append_compressed_hint(
836                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
837                    file_path,
838                ),
839                sent,
840            )
841        }
842        "reference" => {
843            let tok = count_tokens(content);
844            let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
845            let sent = count_tokens(&output);
846            let savings = protocol::format_savings(original_tokens, sent);
847            (format!("{output}\n{savings}"), sent)
848        }
849        mode if mode.starts_with("lines:") => {
850            let range_str = &mode[6..];
851            let extracted = extract_line_range(content, range_str);
852            let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
853            let sent = count_tokens(&extracted);
854            let savings = protocol::format_savings(original_tokens, sent);
855            (format!("{header}\n{extracted}\n{savings}"), sent)
856        }
857        unknown => {
858            let header = build_header(file_ref, short, ext, content, line_count, true);
859            let out = format!(
860                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
861            );
862            let sent = count_tokens(&out);
863            (out, sent)
864        }
865    }
866}
867
868fn extract_line_range(content: &str, range_str: &str) -> String {
869    let lines: Vec<&str> = content.lines().collect();
870    let total = lines.len();
871    let mut selected = Vec::new();
872
873    for part in range_str.split(',') {
874        let part = part.trim();
875        if let Some((start_s, end_s)) = part.split_once('-') {
876            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
877            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
878            for i in start..=end {
879                if i >= 1 && i <= total {
880                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
881                }
882            }
883        } else if let Ok(n) = part.parse::<usize>() {
884            if n >= 1 && n <= total {
885                selected.push(format!("{n:>4}| {}", lines[n - 1]));
886            }
887        }
888    }
889
890    if selected.is_empty() {
891        "No lines matched the range.".to_string()
892    } else {
893        selected.join("\n")
894    }
895}
896
897fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
898    let short = protocol::shorten_path(path);
899    let old_content = cache.get(path).map(|e| e.content.clone());
900
901    let new_content = match read_file_lossy(path) {
902        Ok(c) => c,
903        Err(e) => {
904            let msg = format!("ERROR: {e}");
905            let tokens = count_tokens(&msg);
906            return (msg, tokens);
907        }
908    };
909
910    let original_tokens = count_tokens(&new_content);
911
912    let diff_output = if let Some(old) = &old_content {
913        compressor::diff_content(old, &new_content)
914    } else {
915        format!("[first read]\n{new_content}")
916    };
917
918    cache.store(path, new_content);
919
920    let sent = count_tokens(&diff_output);
921    let savings = protocol::format_savings(original_tokens, sent);
922    (
923        format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
924        sent,
925    )
926}
927
928#[cfg(test)]
929mod tests {
930    use super::*;
931    use std::time::Duration;
932
933    #[test]
934    fn test_header_toon_format_no_brackets() {
935        let content = "use std::io;\nfn main() {}\n";
936        let header = build_header("F1", "main.rs", "rs", content, 2, false);
937        assert!(!header.contains('['));
938        assert!(!header.contains(']'));
939        assert!(header.contains("F1=main.rs 2L"));
940    }
941
942    #[test]
943    fn test_header_toon_deps_indented() {
944        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
945        let header = build_header("F1", "main.rs", "rs", content, 3, true);
946        if header.contains("deps") {
947            assert!(
948                header.contains("\n deps "),
949                "deps should use indented TOON format"
950            );
951            assert!(
952                !header.contains("deps:["),
953                "deps should not use bracket format"
954            );
955        }
956    }
957
958    #[test]
959    fn test_header_toon_saves_tokens() {
960        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
961        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
962        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
963        let old_tokens = count_tokens(&old_header);
964        let new_tokens = count_tokens(&new_header);
965        assert!(
966            new_tokens <= old_tokens,
967            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
968        );
969    }
970
971    #[test]
972    fn test_tdd_symbols_are_compact() {
973        let symbols = [
974            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
975        ];
976        for sym in &symbols {
977            let tok = count_tokens(sym);
978            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
979        }
980    }
981
982    #[test]
983    fn test_task_mode_filters_content() {
984        let content = (0..200)
985            .map(|i| {
986                if i % 20 == 0 {
987                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
988                } else {
989                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
990                }
991            })
992            .collect::<Vec<_>>()
993            .join("\n");
994        let full_tokens = count_tokens(&content);
995        let task = Some("fix bug in validate_token");
996        let (result, result_tokens) = process_mode(
997            &content,
998            "task",
999            "F1",
1000            "test.rs",
1001            "rs",
1002            full_tokens,
1003            CrpMode::Off,
1004            "test.rs",
1005            task,
1006        );
1007        assert!(
1008            result_tokens < full_tokens,
1009            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1010        );
1011        assert!(
1012            result.contains("task-filtered"),
1013            "output should contain task-filtered marker"
1014        );
1015    }
1016
1017    #[test]
1018    fn test_task_mode_without_task_returns_full() {
1019        let content = "fn main() {}\nfn helper() {}\n";
1020        let tokens = count_tokens(content);
1021        let (result, _sent) = process_mode(
1022            content,
1023            "task",
1024            "F1",
1025            "test.rs",
1026            "rs",
1027            tokens,
1028            CrpMode::Off,
1029            "test.rs",
1030            None,
1031        );
1032        assert!(
1033            result.contains("no task set"),
1034            "should indicate no task: {result}"
1035        );
1036    }
1037
1038    #[test]
1039    fn test_reference_mode_one_line() {
1040        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1041        let tokens = count_tokens(content);
1042        let (result, _sent) = process_mode(
1043            content,
1044            "reference",
1045            "F1",
1046            "test.rs",
1047            "rs",
1048            tokens,
1049            CrpMode::Off,
1050            "test.rs",
1051            None,
1052        );
1053        let lines: Vec<&str> = result.lines().collect();
1054        assert!(
1055            lines.len() <= 3,
1056            "reference mode should be very compact, got {} lines",
1057            lines.len()
1058        );
1059        assert!(result.contains("lines"), "should contain line count");
1060        assert!(result.contains("tok"), "should contain token count");
1061    }
1062
1063    #[test]
1064    fn cached_lines_mode_invalidates_on_mtime_change() {
1065        let dir = tempfile::tempdir().unwrap();
1066        let path = dir.path().join("file.txt");
1067        let p = path.to_string_lossy().to_string();
1068
1069        std::fs::write(&path, "one\nsecond\n").unwrap();
1070        let mut cache = SessionCache::new();
1071
1072        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1073        let l1: Vec<&str> = r1.content.lines().collect();
1074        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1075        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1076        assert_eq!(got1, "one");
1077
1078        std::thread::sleep(Duration::from_secs(1));
1079        std::fs::write(&path, "two\nsecond\n").unwrap();
1080
1081        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1082        let l2: Vec<&str> = r2.content.lines().collect();
1083        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1084        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1085        assert_eq!(got2, "two");
1086    }
1087
1088    #[test]
1089    #[cfg_attr(tarpaulin, ignore)]
1090    fn benchmark_task_conditioned_compression() {
1091        // Keep this reasonably small so CI coverage instrumentation stays fast.
1092        let content = generate_benchmark_code(200);
1093        let full_tokens = count_tokens(&content);
1094        let task = Some("fix authentication in validate_token");
1095
1096        let (_full_output, full_tok) = process_mode(
1097            &content,
1098            "full",
1099            "F1",
1100            "server.rs",
1101            "rs",
1102            full_tokens,
1103            CrpMode::Off,
1104            "server.rs",
1105            task,
1106        );
1107        let (_task_output, task_tok) = process_mode(
1108            &content,
1109            "task",
1110            "F1",
1111            "server.rs",
1112            "rs",
1113            full_tokens,
1114            CrpMode::Off,
1115            "server.rs",
1116            task,
1117        );
1118        let (_sig_output, sig_tok) = process_mode(
1119            &content,
1120            "signatures",
1121            "F1",
1122            "server.rs",
1123            "rs",
1124            full_tokens,
1125            CrpMode::Off,
1126            "server.rs",
1127            task,
1128        );
1129        let (_ref_output, ref_tok) = process_mode(
1130            &content,
1131            "reference",
1132            "F1",
1133            "server.rs",
1134            "rs",
1135            full_tokens,
1136            CrpMode::Off,
1137            "server.rs",
1138            task,
1139        );
1140
1141        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1142        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1143        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1144        eprintln!(
1145            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1146            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1147        );
1148        eprintln!(
1149            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1150            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1151        );
1152        eprintln!(
1153            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1154            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1155        );
1156        eprintln!("================================================\n");
1157
1158        assert!(task_tok < full_tok, "task mode should save tokens");
1159        assert!(sig_tok < full_tok, "signatures should save tokens");
1160        assert!(ref_tok < sig_tok, "reference should be most compact");
1161    }
1162
1163    fn generate_benchmark_code(lines: usize) -> String {
1164        let mut code = Vec::with_capacity(lines);
1165        code.push("use std::collections::HashMap;".to_string());
1166        code.push("use crate::core::auth;".to_string());
1167        code.push(String::new());
1168        code.push("pub struct Server {".to_string());
1169        code.push("    config: Config,".to_string());
1170        code.push("    cache: HashMap<String, String>,".to_string());
1171        code.push("}".to_string());
1172        code.push(String::new());
1173        code.push("impl Server {".to_string());
1174        code.push(
1175            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1176                .to_string(),
1177        );
1178        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1179        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1180        code.push("            return Err(AuthError::Expired);".to_string());
1181        code.push("        }".to_string());
1182        code.push("        Ok(decoded.claims)".to_string());
1183        code.push("    }".to_string());
1184        code.push(String::new());
1185
1186        let remaining = lines.saturating_sub(code.len());
1187        for i in 0..remaining {
1188            if i % 30 == 0 {
1189                code.push(format!(
1190                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1191                ));
1192            } else if i % 30 == 29 {
1193                code.push("    }".to_string());
1194            } else {
1195                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1196            }
1197        }
1198        code.push("}".to_string());
1199        code.join("\n")
1200    }
1201
1202    #[test]
1203    fn instruction_file_detection() {
1204        assert!(is_instruction_file(
1205            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1206        ));
1207        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1208        assert!(is_instruction_file("/project/AGENTS.md"));
1209        assert!(is_instruction_file("/project/.cursorrules"));
1210        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1211        assert!(is_instruction_file("/skills/some-skill/README.md"));
1212
1213        assert!(!is_instruction_file("/project/src/main.rs"));
1214        assert!(!is_instruction_file("/project/config.json"));
1215        assert!(!is_instruction_file("/project/data/report.csv"));
1216    }
1217
1218    #[test]
1219    fn resolve_auto_mode_returns_full_for_instruction_files() {
1220        let mode = resolve_auto_mode(
1221            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1222            5000,
1223            Some("read"),
1224        );
1225        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1226
1227        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1228        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1229
1230        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1231        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1232    }
1233}