Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40    format!("{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\")")
41}
42
43/// Reads a file as UTF-8 with lossy fallback, enforcing the max read size limit.
44pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
45    let cap = crate::core::limits::max_read_bytes();
46    if let Ok(meta) = std::fs::metadata(path) {
47        if meta.len() > cap as u64 {
48            return Err(std::io::Error::other(format!(
49                "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
50                meta.len(),
51                cap
52            )));
53        }
54    }
55    let bytes = std::fs::read(path)?;
56    match String::from_utf8(bytes) {
57        Ok(s) => Ok(s),
58        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
59    }
60}
61
62/// Reads a file through the cache and applies the requested compression mode.
63pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
64    handle_with_options(cache, path, mode, false, crp_mode, None)
65}
66
67/// Like `handle`, but invalidates the cache first to force a fresh disk read.
68pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
69    handle_with_options(cache, path, mode, true, crp_mode, None)
70}
71
72/// Reads a file with task-aware filtering to prioritize task-relevant content.
73pub fn handle_with_task(
74    cache: &mut SessionCache,
75    path: &str,
76    mode: &str,
77    crp_mode: CrpMode,
78    task: Option<&str>,
79) -> String {
80    handle_with_options(cache, path, mode, false, crp_mode, task)
81}
82
83/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
84pub fn handle_with_task_resolved(
85    cache: &mut SessionCache,
86    path: &str,
87    mode: &str,
88    crp_mode: CrpMode,
89    task: Option<&str>,
90) -> ReadOutput {
91    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
92}
93
94/// Fresh read with task-aware filtering (invalidates cache first).
95pub fn handle_fresh_with_task(
96    cache: &mut SessionCache,
97    path: &str,
98    mode: &str,
99    crp_mode: CrpMode,
100    task: Option<&str>,
101) -> String {
102    handle_with_options(cache, path, mode, true, crp_mode, task)
103}
104
105/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
106pub fn handle_fresh_with_task_resolved(
107    cache: &mut SessionCache,
108    path: &str,
109    mode: &str,
110    crp_mode: CrpMode,
111    task: Option<&str>,
112) -> ReadOutput {
113    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
114}
115
116fn handle_with_options(
117    cache: &mut SessionCache,
118    path: &str,
119    mode: &str,
120    fresh: bool,
121    crp_mode: CrpMode,
122    task: Option<&str>,
123) -> String {
124    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
125}
126
127fn handle_with_options_resolved(
128    cache: &mut SessionCache,
129    path: &str,
130    mode: &str,
131    fresh: bool,
132    crp_mode: CrpMode,
133    task: Option<&str>,
134) -> ReadOutput {
135    let file_ref = cache.get_file_ref(path);
136    let short = protocol::shorten_path(path);
137    let ext = Path::new(path)
138        .extension()
139        .and_then(|e| e.to_str())
140        .unwrap_or("");
141
142    if fresh {
143        cache.invalidate(path);
144    }
145
146    if mode == "diff" {
147        let (out, sent) = handle_diff(cache, path, &file_ref);
148        return ReadOutput {
149            content: out,
150            resolved_mode: "diff".into(),
151            output_tokens: sent,
152        };
153    }
154
155    if mode != "full" {
156        if let Some(existing) = cache.get(path) {
157            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
158            if stale {
159                cache.invalidate(path);
160            }
161        }
162    }
163
164    if let Some(existing) = cache.get(path) {
165        if mode == "full" {
166            let (out, sent) =
167                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
168            let out = crate::core::redaction::redact_text_if_enabled(&out);
169            return ReadOutput {
170                content: out,
171                resolved_mode: "full".into(),
172                output_tokens: sent,
173            };
174        }
175        let content = existing.content.clone();
176        let original_tokens = existing.original_tokens;
177        let resolved_mode = if mode == "auto" {
178            resolve_auto_mode(path, original_tokens, task)
179        } else {
180            mode.to_string()
181        };
182        if is_cacheable_mode(&resolved_mode) {
183            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
184            if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
185                let sent = count_tokens(cached_output);
186                let out = crate::core::redaction::redact_text_if_enabled(cached_output);
187                return ReadOutput {
188                    content: out,
189                    resolved_mode,
190                    output_tokens: sent,
191                };
192            }
193        }
194        let (out, sent) = process_mode(
195            &content,
196            &resolved_mode,
197            &file_ref,
198            &short,
199            ext,
200            original_tokens,
201            crp_mode,
202            path,
203            task,
204        );
205        if is_cacheable_mode(&resolved_mode) {
206            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
207            cache.set_compressed(path, &cache_key, out.clone());
208        }
209        let out = crate::core::redaction::redact_text_if_enabled(&out);
210        return ReadOutput {
211            content: out,
212            resolved_mode,
213            output_tokens: sent,
214        };
215    }
216
217    let content = match read_file_lossy(path) {
218        Ok(c) => c,
219        Err(e) => {
220            let msg = format!("ERROR: {e}");
221            let tokens = count_tokens(&msg);
222            return ReadOutput {
223                content: msg,
224                resolved_mode: "error".into(),
225                output_tokens: tokens,
226            };
227        }
228    };
229
230    let similar_hint = find_similar_and_update_semantic_index(path, &content);
231    let graph_hint = build_graph_related_hint(path);
232
233    let store_result = cache.store(path, content.clone());
234
235    if mode == "full" {
236        cache.mark_full_delivered(path);
237        let (mut output, sent) = format_full_output(
238            &file_ref,
239            &short,
240            ext,
241            &content,
242            store_result.original_tokens,
243            store_result.line_count,
244            task,
245        );
246        if let Some(hint) = &graph_hint {
247            output.push_str(&format!("\n{hint}"));
248        }
249        if let Some(hint) = similar_hint {
250            output.push_str(&format!("\n{hint}"));
251        }
252        let output = crate::core::redaction::redact_text_if_enabled(&output);
253        return ReadOutput {
254            content: output,
255            resolved_mode: "full".into(),
256            output_tokens: sent,
257        };
258    }
259
260    let resolved_mode = if mode == "auto" {
261        resolve_auto_mode(path, store_result.original_tokens, task)
262    } else {
263        mode.to_string()
264    };
265
266    let (mut output, _sent) = process_mode(
267        &content,
268        &resolved_mode,
269        &file_ref,
270        &short,
271        ext,
272        store_result.original_tokens,
273        crp_mode,
274        path,
275        task,
276    );
277    if is_cacheable_mode(&resolved_mode) {
278        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
279        cache.set_compressed(path, &cache_key, output.clone());
280    }
281    if let Some(hint) = &graph_hint {
282        output.push_str(&format!("\n{hint}"));
283    }
284    if let Some(hint) = similar_hint {
285        output.push_str(&format!("\n{hint}"));
286    }
287    let output = crate::core::redaction::redact_text_if_enabled(&output);
288    let final_tokens = count_tokens(&output);
289    ReadOutput {
290        content: output,
291        resolved_mode,
292        output_tokens: final_tokens,
293    }
294}
295
296fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
297    // Priority 1: Intent Router with budget/pressure-aware degradation.
298    // Only fall through to Predictor/Bandit if the router returns "auto".
299    let intent_query = task.unwrap_or("read");
300    let route = crate::core::intent_router::route_v1(intent_query);
301    let intent_mode = &route.decision.effective_read_mode;
302    if intent_mode != "auto" && intent_mode != "reference" {
303        return intent_mode.clone();
304    }
305
306    // Priority 2: FileSignature-based predictor
307    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
308    let predictor = crate::core::mode_predictor::ModePredictor::new();
309    let mut predicted = predictor
310        .predict_best_mode(&sig)
311        .unwrap_or_else(|| "full".to_string());
312    if predicted == "auto" {
313        predicted = "full".to_string();
314    }
315
316    // Priority 3: Bandit exploration when budget is tight
317    if let Some(project_root) =
318        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
319    {
320        let ext = std::path::Path::new(file_path)
321            .extension()
322            .and_then(|e| e.to_str())
323            .unwrap_or("");
324        let bucket = match original_tokens {
325            0..=2000 => "sm",
326            2001..=10000 => "md",
327            10001..=50000 => "lg",
328            _ => "xl",
329        };
330        let bandit_key = format!("{ext}_{bucket}");
331        let mut store = crate::core::bandit::BanditStore::load(&project_root);
332        let bandit = store.get_or_create(&bandit_key);
333        let arm = bandit.select_arm();
334        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
335            predicted = "aggressive".to_string();
336        }
337    }
338
339    // Priority 4: Adaptive mode policy
340    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
341    let chosen = policy.choose_auto_mode(task, &predicted);
342
343    if original_tokens > 2000 {
344        if predicted == "map" || predicted == "signatures" {
345            if chosen != "map" && chosen != "signatures" {
346                return predicted;
347            }
348        } else if chosen == "full" && predicted != "full" {
349            return predicted;
350        }
351    }
352
353    chosen
354}
355
356fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
357    let cfg = crate::core::config::Config::load();
358    let profile = crate::core::config::MemoryProfile::effective(&cfg);
359    if !profile.semantic_cache_enabled() {
360        return None;
361    }
362
363    let project_root = detect_project_root(path);
364    let session_id = format!("{}", std::process::id());
365    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
366
367    let similar = index.find_similar(content, 0.7);
368    let relevant: Vec<_> = similar
369        .into_iter()
370        .filter(|(p, _)| p != path)
371        .take(3)
372        .collect();
373
374    index.add_file(path, content, &session_id);
375    let _ = index.save(&project_root);
376
377    if relevant.is_empty() {
378        return None;
379    }
380
381    let hints: Vec<String> = relevant
382        .iter()
383        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
384        .collect();
385
386    Some(format!(
387        "[semantic: {} similar file(s) in cache]\n{}",
388        relevant.len(),
389        hints.join("\n")
390    ))
391}
392
393fn detect_project_root(path: &str) -> String {
394    crate::core::protocol::detect_project_root_or_cwd(path)
395}
396
397fn build_graph_related_hint(path: &str) -> Option<String> {
398    let project_root = detect_project_root(path);
399    crate::core::graph_context::build_related_hint(path, &project_root, 5)
400}
401
402const AUTO_DELTA_THRESHOLD: f64 = 0.6;
403
404/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
405fn handle_full_with_auto_delta(
406    cache: &mut SessionCache,
407    path: &str,
408    file_ref: &str,
409    short: &str,
410    ext: &str,
411    task: Option<&str>,
412) -> (String, usize) {
413    let Ok(disk_content) = read_file_lossy(path) else {
414        cache.record_cache_hit(path);
415        let out = if let Some(existing) = cache.get(path) {
416            format!(
417                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
418                existing.read_count, existing.line_count
419            )
420        } else {
421            format!("[file read failed and no cached version available] {file_ref}={short}")
422        };
423        let sent = count_tokens(&out);
424        return (out, sent);
425    };
426
427    let old_content = cache
428        .get(path)
429        .map(|e| e.content.clone())
430        .unwrap_or_default();
431    let store_result = cache.store(path, disk_content.clone());
432
433    if store_result.was_hit {
434        if store_result.full_content_delivered {
435            let out = format!(
436                "{file_ref}={short} cached {}t {}L\nFile content unchanged since last read (same hash). Already in your context window.",
437                store_result.read_count, store_result.line_count
438            );
439            let sent = count_tokens(&out);
440            return (out, sent);
441        }
442        cache.mark_full_delivered(path);
443        return format_full_output(
444            file_ref,
445            short,
446            ext,
447            &disk_content,
448            store_result.original_tokens,
449            store_result.line_count,
450            task,
451        );
452    }
453
454    let diff = compressor::diff_content(&old_content, &disk_content);
455    let diff_tokens = count_tokens(&diff);
456    let full_tokens = store_result.original_tokens;
457
458    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
459        let savings = protocol::format_savings(full_tokens, diff_tokens);
460        let out = format!(
461            "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
462            disk_content.lines().count()
463        );
464        return (out, diff_tokens);
465    }
466
467    format_full_output(
468        file_ref,
469        short,
470        ext,
471        &disk_content,
472        store_result.original_tokens,
473        store_result.line_count,
474        task,
475    )
476}
477
478fn format_full_output(
479    file_ref: &str,
480    short: &str,
481    ext: &str,
482    content: &str,
483    original_tokens: usize,
484    line_count: usize,
485    task: Option<&str>,
486) -> (String, usize) {
487    let tokens = original_tokens;
488    let metadata = build_header(file_ref, short, ext, content, line_count, true);
489
490    let mut reordered: Option<String> = None;
491    {
492        let profile = crate::core::profiles::active_profile();
493        let cfg = profile.layout;
494        if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
495            let task_str = task.unwrap_or("");
496            if !task_str.is_empty() {
497                let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
498                let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
499                    content, &keywords, &cfg,
500                );
501                if !r.skipped && r.changed {
502                    reordered = Some(r.output);
503                }
504            }
505        }
506    }
507
508    let content_for_output = reordered.as_deref().unwrap_or(content);
509
510    let mut sym = SymbolMap::new();
511    let idents = symbol_map::extract_identifiers(content_for_output, ext);
512    for ident in &idents {
513        sym.register(ident);
514    }
515
516    if sym.len() >= 3 {
517        let sym_table = sym.format_table();
518        let compressed = sym.apply(content_for_output);
519        let original_tok = count_tokens(content_for_output);
520        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
521        let net_saving = original_tok.saturating_sub(compressed_tok);
522        if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
523            let output = format!("{metadata}\n{compressed}{sym_table}");
524            let sent = count_tokens(&output);
525            let savings = protocol::format_savings(tokens, sent);
526            return (format!("{output}\n{savings}"), sent);
527        }
528    }
529
530    let output = format!("{metadata}\n{content_for_output}");
531    let sent = count_tokens(&output);
532    let savings = protocol::format_savings(tokens, sent);
533    (format!("{output}\n{savings}"), sent)
534}
535
536fn build_header(
537    file_ref: &str,
538    short: &str,
539    ext: &str,
540    content: &str,
541    line_count: usize,
542    include_deps: bool,
543) -> String {
544    let mut header = format!("{file_ref}={short} {line_count}L");
545
546    if include_deps {
547        let dep_info = deps::extract_deps(content, ext);
548        if !dep_info.imports.is_empty() {
549            let imports_str: Vec<&str> = dep_info
550                .imports
551                .iter()
552                .take(8)
553                .map(std::string::String::as_str)
554                .collect();
555            header.push_str(&format!("\n deps {}", imports_str.join(",")));
556        }
557        if !dep_info.exports.is_empty() {
558            let exports_str: Vec<&str> = dep_info
559                .exports
560                .iter()
561                .take(8)
562                .map(std::string::String::as_str)
563                .collect();
564            header.push_str(&format!("\n exports {}", exports_str.join(",")));
565        }
566    }
567
568    header
569}
570
571#[allow(clippy::too_many_arguments)]
572fn process_mode(
573    content: &str,
574    mode: &str,
575    file_ref: &str,
576    short: &str,
577    ext: &str,
578    original_tokens: usize,
579    crp_mode: CrpMode,
580    file_path: &str,
581    task: Option<&str>,
582) -> (String, usize) {
583    let line_count = content.lines().count();
584
585    match mode {
586        "auto" => {
587            let chosen = resolve_auto_mode(file_path, original_tokens, task);
588            process_mode(
589                content,
590                &chosen,
591                file_ref,
592                short,
593                ext,
594                original_tokens,
595                crp_mode,
596                file_path,
597                task,
598            )
599        }
600        "full" => format_full_output(
601            file_ref,
602            short,
603            ext,
604            content,
605            original_tokens,
606            line_count,
607            task,
608        ),
609        "signatures" => {
610            let sigs = signatures::extract_signatures(content, ext);
611            let dep_info = deps::extract_deps(content, ext);
612
613            let mut output = format!("{file_ref}={short} {line_count}L");
614            if !dep_info.imports.is_empty() {
615                let imports_str: Vec<&str> = dep_info
616                    .imports
617                    .iter()
618                    .take(8)
619                    .map(std::string::String::as_str)
620                    .collect();
621                output.push_str(&format!("\n deps {}", imports_str.join(",")));
622            }
623            for sig in &sigs {
624                output.push('\n');
625                if crp_mode.is_tdd() {
626                    output.push_str(&sig.to_tdd());
627                } else {
628                    output.push_str(&sig.to_compact());
629                }
630            }
631            let sent = count_tokens(&output);
632            let savings = protocol::format_savings(original_tokens, sent);
633            (
634                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
635                sent,
636            )
637        }
638        "map" => {
639            if ext == "php" {
640                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
641                {
642                    let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
643                    let sent = count_tokens(&output);
644                    let savings = protocol::format_savings(original_tokens, sent);
645                    output.push('\n');
646                    output.push_str(&savings);
647                    return (append_compressed_hint(&output, file_path), sent);
648                }
649            }
650
651            let sigs = signatures::extract_signatures(content, ext);
652            let dep_info = deps::extract_deps(content, ext);
653
654            let mut output = format!("{file_ref}={short} {line_count}L");
655
656            if !dep_info.imports.is_empty() {
657                output.push_str("\n  deps: ");
658                output.push_str(&dep_info.imports.join(", "));
659            }
660
661            if !dep_info.exports.is_empty() {
662                output.push_str("\n  exports: ");
663                output.push_str(&dep_info.exports.join(", "));
664            }
665
666            let key_sigs: Vec<&signatures::Signature> = sigs
667                .iter()
668                .filter(|s| s.is_exported || s.indent == 0)
669                .collect();
670
671            if !key_sigs.is_empty() {
672                output.push_str("\n  API:");
673                for sig in &key_sigs {
674                    output.push_str("\n    ");
675                    if crp_mode.is_tdd() {
676                        output.push_str(&sig.to_tdd());
677                    } else {
678                        output.push_str(&sig.to_compact());
679                    }
680                }
681            }
682
683            let sent = count_tokens(&output);
684            let savings = protocol::format_savings(original_tokens, sent);
685            (
686                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
687                sent,
688            )
689        }
690        "aggressive" => {
691            #[cfg(feature = "tree-sitter")]
692            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
693            #[cfg(not(feature = "tree-sitter"))]
694            let ast_pruned: Option<String> = None;
695
696            let base = ast_pruned.as_deref().unwrap_or(content);
697
698            let session_intent = crate::core::session::SessionState::load_latest()
699                .and_then(|s| s.active_structured_intent);
700            let raw = if let Some(ref intent) = session_intent {
701                compressor::task_aware_compress(base, Some(ext), intent)
702            } else {
703                compressor::aggressive_compress(base, Some(ext))
704            };
705            let compressed = compressor::safeguard_ratio(content, &raw);
706            let header = build_header(file_ref, short, ext, content, line_count, true);
707
708            let mut sym = SymbolMap::new();
709            let idents = symbol_map::extract_identifiers(&compressed, ext);
710            for ident in &idents {
711                sym.register(ident);
712            }
713
714            if sym.len() >= 3 {
715                let sym_table = sym.format_table();
716                let sym_applied = sym.apply(&compressed);
717                let orig_tok = count_tokens(&compressed);
718                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
719                let net = orig_tok.saturating_sub(comp_tok);
720                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
721                    let savings = protocol::format_savings(original_tokens, comp_tok);
722                    return (
723                        append_compressed_hint(
724                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
725                            file_path,
726                        ),
727                        comp_tok,
728                    );
729                }
730                let savings = protocol::format_savings(original_tokens, orig_tok);
731                return (
732                    append_compressed_hint(
733                        &format!("{header}\n{compressed}\n{savings}"),
734                        file_path,
735                    ),
736                    orig_tok,
737                );
738            }
739
740            let sent = count_tokens(&compressed);
741            let savings = protocol::format_savings(original_tokens, sent);
742            (
743                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
744                sent,
745            )
746        }
747        "entropy" => {
748            let result = entropy::entropy_compress_adaptive(content, file_path);
749            let avg_h = entropy::analyze_entropy(content).avg_entropy;
750            let header = build_header(file_ref, short, ext, content, line_count, false);
751            let techs = result.techniques.join(", ");
752            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
753            let sent = count_tokens(&output);
754            let savings = protocol::format_savings(original_tokens, sent);
755            let compression_ratio = if original_tokens > 0 {
756                1.0 - (sent as f64 / original_tokens as f64)
757            } else {
758                0.0
759            };
760            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
761            (
762                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
763                sent,
764            )
765        }
766        "task" => {
767            let task_str = task.unwrap_or("");
768            if task_str.is_empty() {
769                let header = build_header(file_ref, short, ext, content, line_count, true);
770                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
771                let sent = count_tokens(&out);
772                return (out, sent);
773            }
774            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
775            if keywords.is_empty() {
776                let header = build_header(file_ref, short, ext, content, line_count, true);
777                let out = format!(
778                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
779                );
780                let sent = count_tokens(&out);
781                return (out, sent);
782            }
783            let filtered =
784                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
785            let filtered_lines = filtered.lines().count();
786            let header = format!(
787                "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
788            );
789            let project_root = detect_project_root(file_path);
790            let graph_ctx = crate::core::graph_context::build_graph_context(
791                file_path,
792                &project_root,
793                Some(crate::core::graph_context::GraphContextOptions::default()),
794            )
795            .map(|c| crate::core::graph_context::format_graph_context(&c))
796            .unwrap_or_default();
797
798            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
799            let savings = protocol::format_savings(original_tokens, sent);
800            (
801                append_compressed_hint(
802                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
803                    file_path,
804                ),
805                sent,
806            )
807        }
808        "reference" => {
809            let tok = count_tokens(content);
810            let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
811            let sent = count_tokens(&output);
812            let savings = protocol::format_savings(original_tokens, sent);
813            (format!("{output}\n{savings}"), sent)
814        }
815        mode if mode.starts_with("lines:") => {
816            let range_str = &mode[6..];
817            let extracted = extract_line_range(content, range_str);
818            let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
819            let sent = count_tokens(&extracted);
820            let savings = protocol::format_savings(original_tokens, sent);
821            (format!("{header}\n{extracted}\n{savings}"), sent)
822        }
823        unknown => {
824            let header = build_header(file_ref, short, ext, content, line_count, true);
825            let out = format!(
826                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
827            );
828            let sent = count_tokens(&out);
829            (out, sent)
830        }
831    }
832}
833
834fn extract_line_range(content: &str, range_str: &str) -> String {
835    let lines: Vec<&str> = content.lines().collect();
836    let total = lines.len();
837    let mut selected = Vec::new();
838
839    for part in range_str.split(',') {
840        let part = part.trim();
841        if let Some((start_s, end_s)) = part.split_once('-') {
842            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
843            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
844            for i in start..=end {
845                if i >= 1 && i <= total {
846                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
847                }
848            }
849        } else if let Ok(n) = part.parse::<usize>() {
850            if n >= 1 && n <= total {
851                selected.push(format!("{n:>4}| {}", lines[n - 1]));
852            }
853        }
854    }
855
856    if selected.is_empty() {
857        "No lines matched the range.".to_string()
858    } else {
859        selected.join("\n")
860    }
861}
862
863fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
864    let short = protocol::shorten_path(path);
865    let old_content = cache.get(path).map(|e| e.content.clone());
866
867    let new_content = match read_file_lossy(path) {
868        Ok(c) => c,
869        Err(e) => {
870            let msg = format!("ERROR: {e}");
871            let tokens = count_tokens(&msg);
872            return (msg, tokens);
873        }
874    };
875
876    let original_tokens = count_tokens(&new_content);
877
878    let diff_output = if let Some(old) = &old_content {
879        compressor::diff_content(old, &new_content)
880    } else {
881        format!("[first read]\n{new_content}")
882    };
883
884    cache.store(path, new_content);
885
886    let sent = count_tokens(&diff_output);
887    let savings = protocol::format_savings(original_tokens, sent);
888    (
889        format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
890        sent,
891    )
892}
893
894#[cfg(test)]
895mod tests {
896    use super::*;
897    use std::time::Duration;
898
899    #[test]
900    fn test_header_toon_format_no_brackets() {
901        let content = "use std::io;\nfn main() {}\n";
902        let header = build_header("F1", "main.rs", "rs", content, 2, false);
903        assert!(!header.contains('['));
904        assert!(!header.contains(']'));
905        assert!(header.contains("F1=main.rs 2L"));
906    }
907
908    #[test]
909    fn test_header_toon_deps_indented() {
910        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
911        let header = build_header("F1", "main.rs", "rs", content, 3, true);
912        if header.contains("deps") {
913            assert!(
914                header.contains("\n deps "),
915                "deps should use indented TOON format"
916            );
917            assert!(
918                !header.contains("deps:["),
919                "deps should not use bracket format"
920            );
921        }
922    }
923
924    #[test]
925    fn test_header_toon_saves_tokens() {
926        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
927        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
928        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
929        let old_tokens = count_tokens(&old_header);
930        let new_tokens = count_tokens(&new_header);
931        assert!(
932            new_tokens <= old_tokens,
933            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
934        );
935    }
936
937    #[test]
938    fn test_tdd_symbols_are_compact() {
939        let symbols = [
940            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
941        ];
942        for sym in &symbols {
943            let tok = count_tokens(sym);
944            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
945        }
946    }
947
948    #[test]
949    fn test_task_mode_filters_content() {
950        let content = (0..200)
951            .map(|i| {
952                if i % 20 == 0 {
953                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
954                } else {
955                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
956                }
957            })
958            .collect::<Vec<_>>()
959            .join("\n");
960        let full_tokens = count_tokens(&content);
961        let task = Some("fix bug in validate_token");
962        let (result, result_tokens) = process_mode(
963            &content,
964            "task",
965            "F1",
966            "test.rs",
967            "rs",
968            full_tokens,
969            CrpMode::Off,
970            "test.rs",
971            task,
972        );
973        assert!(
974            result_tokens < full_tokens,
975            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
976        );
977        assert!(
978            result.contains("task-filtered"),
979            "output should contain task-filtered marker"
980        );
981    }
982
983    #[test]
984    fn test_task_mode_without_task_returns_full() {
985        let content = "fn main() {}\nfn helper() {}\n";
986        let tokens = count_tokens(content);
987        let (result, _sent) = process_mode(
988            content,
989            "task",
990            "F1",
991            "test.rs",
992            "rs",
993            tokens,
994            CrpMode::Off,
995            "test.rs",
996            None,
997        );
998        assert!(
999            result.contains("no task set"),
1000            "should indicate no task: {result}"
1001        );
1002    }
1003
1004    #[test]
1005    fn test_reference_mode_one_line() {
1006        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1007        let tokens = count_tokens(content);
1008        let (result, _sent) = process_mode(
1009            content,
1010            "reference",
1011            "F1",
1012            "test.rs",
1013            "rs",
1014            tokens,
1015            CrpMode::Off,
1016            "test.rs",
1017            None,
1018        );
1019        let lines: Vec<&str> = result.lines().collect();
1020        assert!(
1021            lines.len() <= 3,
1022            "reference mode should be very compact, got {} lines",
1023            lines.len()
1024        );
1025        assert!(result.contains("lines"), "should contain line count");
1026        assert!(result.contains("tok"), "should contain token count");
1027    }
1028
1029    #[test]
1030    fn cached_lines_mode_invalidates_on_mtime_change() {
1031        let dir = tempfile::tempdir().unwrap();
1032        let path = dir.path().join("file.txt");
1033        let p = path.to_string_lossy().to_string();
1034
1035        std::fs::write(&path, "one\nsecond\n").unwrap();
1036        let mut cache = SessionCache::new();
1037
1038        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1039        let l1: Vec<&str> = r1.content.lines().collect();
1040        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1041        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1042        assert_eq!(got1, "one");
1043
1044        std::thread::sleep(Duration::from_secs(1));
1045        std::fs::write(&path, "two\nsecond\n").unwrap();
1046
1047        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1048        let l2: Vec<&str> = r2.content.lines().collect();
1049        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1050        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1051        assert_eq!(got2, "two");
1052    }
1053
1054    #[test]
1055    #[cfg_attr(tarpaulin, ignore)]
1056    fn benchmark_task_conditioned_compression() {
1057        // Keep this reasonably small so CI coverage instrumentation stays fast.
1058        let content = generate_benchmark_code(200);
1059        let full_tokens = count_tokens(&content);
1060        let task = Some("fix authentication in validate_token");
1061
1062        let (_full_output, full_tok) = process_mode(
1063            &content,
1064            "full",
1065            "F1",
1066            "server.rs",
1067            "rs",
1068            full_tokens,
1069            CrpMode::Off,
1070            "server.rs",
1071            task,
1072        );
1073        let (_task_output, task_tok) = process_mode(
1074            &content,
1075            "task",
1076            "F1",
1077            "server.rs",
1078            "rs",
1079            full_tokens,
1080            CrpMode::Off,
1081            "server.rs",
1082            task,
1083        );
1084        let (_sig_output, sig_tok) = process_mode(
1085            &content,
1086            "signatures",
1087            "F1",
1088            "server.rs",
1089            "rs",
1090            full_tokens,
1091            CrpMode::Off,
1092            "server.rs",
1093            task,
1094        );
1095        let (_ref_output, ref_tok) = process_mode(
1096            &content,
1097            "reference",
1098            "F1",
1099            "server.rs",
1100            "rs",
1101            full_tokens,
1102            CrpMode::Off,
1103            "server.rs",
1104            task,
1105        );
1106
1107        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1108        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1109        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1110        eprintln!(
1111            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1112            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1113        );
1114        eprintln!(
1115            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1116            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1117        );
1118        eprintln!(
1119            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1120            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1121        );
1122        eprintln!("================================================\n");
1123
1124        assert!(task_tok < full_tok, "task mode should save tokens");
1125        assert!(sig_tok < full_tok, "signatures should save tokens");
1126        assert!(ref_tok < sig_tok, "reference should be most compact");
1127    }
1128
1129    fn generate_benchmark_code(lines: usize) -> String {
1130        let mut code = Vec::with_capacity(lines);
1131        code.push("use std::collections::HashMap;".to_string());
1132        code.push("use crate::core::auth;".to_string());
1133        code.push(String::new());
1134        code.push("pub struct Server {".to_string());
1135        code.push("    config: Config,".to_string());
1136        code.push("    cache: HashMap<String, String>,".to_string());
1137        code.push("}".to_string());
1138        code.push(String::new());
1139        code.push("impl Server {".to_string());
1140        code.push(
1141            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1142                .to_string(),
1143        );
1144        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1145        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1146        code.push("            return Err(AuthError::Expired);".to_string());
1147        code.push("        }".to_string());
1148        code.push("        Ok(decoded.claims)".to_string());
1149        code.push("    }".to_string());
1150        code.push(String::new());
1151
1152        let remaining = lines.saturating_sub(code.len());
1153        for i in 0..remaining {
1154            if i % 30 == 0 {
1155                code.push(format!(
1156                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1157                ));
1158            } else if i % 30 == 29 {
1159                code.push("    }".to_string());
1160            } else {
1161                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1162            }
1163        }
1164        code.push("}".to_string());
1165        code.join("\n")
1166    }
1167}