Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39fn append_compressed_hint(output: &str, file_path: &str) -> String {
40    format!("{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\")")
41}
42
43/// Reads a file as UTF-8 with lossy fallback, enforcing the max read size limit.
44pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
45    let cap = crate::core::limits::max_read_bytes();
46    if let Ok(meta) = std::fs::metadata(path) {
47        if meta.len() > cap as u64 {
48            return Err(std::io::Error::other(format!(
49                "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
50                meta.len(),
51                cap
52            )));
53        }
54    }
55    let bytes = std::fs::read(path)?;
56    match String::from_utf8(bytes) {
57        Ok(s) => Ok(s),
58        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
59    }
60}
61
62/// Reads a file through the cache and applies the requested compression mode.
63pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
64    handle_with_options(cache, path, mode, false, crp_mode, None)
65}
66
67/// Like `handle`, but invalidates the cache first to force a fresh disk read.
68pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
69    handle_with_options(cache, path, mode, true, crp_mode, None)
70}
71
72/// Reads a file with task-aware filtering to prioritize task-relevant content.
73pub fn handle_with_task(
74    cache: &mut SessionCache,
75    path: &str,
76    mode: &str,
77    crp_mode: CrpMode,
78    task: Option<&str>,
79) -> String {
80    handle_with_options(cache, path, mode, false, crp_mode, task)
81}
82
83/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
84pub fn handle_with_task_resolved(
85    cache: &mut SessionCache,
86    path: &str,
87    mode: &str,
88    crp_mode: CrpMode,
89    task: Option<&str>,
90) -> ReadOutput {
91    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
92}
93
94/// Fresh read with task-aware filtering (invalidates cache first).
95pub fn handle_fresh_with_task(
96    cache: &mut SessionCache,
97    path: &str,
98    mode: &str,
99    crp_mode: CrpMode,
100    task: Option<&str>,
101) -> String {
102    handle_with_options(cache, path, mode, true, crp_mode, task)
103}
104
105/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
106pub fn handle_fresh_with_task_resolved(
107    cache: &mut SessionCache,
108    path: &str,
109    mode: &str,
110    crp_mode: CrpMode,
111    task: Option<&str>,
112) -> ReadOutput {
113    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
114}
115
116fn handle_with_options(
117    cache: &mut SessionCache,
118    path: &str,
119    mode: &str,
120    fresh: bool,
121    crp_mode: CrpMode,
122    task: Option<&str>,
123) -> String {
124    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
125}
126
127fn handle_with_options_resolved(
128    cache: &mut SessionCache,
129    path: &str,
130    mode: &str,
131    fresh: bool,
132    crp_mode: CrpMode,
133    task: Option<&str>,
134) -> ReadOutput {
135    let file_ref = cache.get_file_ref(path);
136    let short = protocol::shorten_path(path);
137    let ext = Path::new(path)
138        .extension()
139        .and_then(|e| e.to_str())
140        .unwrap_or("");
141
142    if fresh {
143        cache.invalidate(path);
144    }
145
146    if mode == "diff" {
147        let (out, sent) = handle_diff(cache, path, &file_ref);
148        return ReadOutput {
149            content: out,
150            resolved_mode: "diff".into(),
151            output_tokens: sent,
152        };
153    }
154
155    if mode != "full" {
156        if let Some(existing) = cache.get(path) {
157            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
158            if stale {
159                cache.invalidate(path);
160            }
161        }
162    }
163
164    if let Some(existing) = cache.get(path) {
165        if mode == "full" {
166            let (out, sent) =
167                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
168            let out = crate::core::redaction::redact_text_if_enabled(&out);
169            return ReadOutput {
170                content: out,
171                resolved_mode: "full".into(),
172                output_tokens: sent,
173            };
174        }
175        let content = existing.content.clone();
176        let original_tokens = existing.original_tokens;
177        let resolved_mode = if mode == "auto" {
178            resolve_auto_mode(path, original_tokens, task)
179        } else {
180            mode.to_string()
181        };
182        if is_cacheable_mode(&resolved_mode) {
183            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
184            if let Some(cached_output) = cache.get_compressed(path, &cache_key) {
185                let sent = count_tokens(cached_output);
186                let out = crate::core::redaction::redact_text_if_enabled(cached_output);
187                return ReadOutput {
188                    content: out,
189                    resolved_mode,
190                    output_tokens: sent,
191                };
192            }
193        }
194        let (out, sent) = process_mode(
195            &content,
196            &resolved_mode,
197            &file_ref,
198            &short,
199            ext,
200            original_tokens,
201            crp_mode,
202            path,
203            task,
204        );
205        if is_cacheable_mode(&resolved_mode) {
206            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
207            cache.set_compressed(path, &cache_key, out.clone());
208        }
209        let out = crate::core::redaction::redact_text_if_enabled(&out);
210        return ReadOutput {
211            content: out,
212            resolved_mode,
213            output_tokens: sent,
214        };
215    }
216
217    let content = match read_file_lossy(path) {
218        Ok(c) => c,
219        Err(e) => {
220            let msg = format!("ERROR: {e}");
221            let tokens = count_tokens(&msg);
222            return ReadOutput {
223                content: msg,
224                resolved_mode: "error".into(),
225                output_tokens: tokens,
226            };
227        }
228    };
229
230    let similar_hint = find_semantic_similar(path, &content);
231    let graph_hint = build_graph_related_hint(path);
232
233    let store_result = cache.store(path, content.clone());
234
235    update_semantic_index(path, &content);
236
237    if mode == "full" {
238        let (mut output, sent) = format_full_output(
239            &file_ref,
240            &short,
241            ext,
242            &content,
243            store_result.original_tokens,
244            store_result.line_count,
245            task,
246        );
247        if let Some(hint) = &graph_hint {
248            output.push_str(&format!("\n{hint}"));
249        }
250        if let Some(hint) = similar_hint {
251            output.push_str(&format!("\n{hint}"));
252        }
253        let output = crate::core::redaction::redact_text_if_enabled(&output);
254        return ReadOutput {
255            content: output,
256            resolved_mode: "full".into(),
257            output_tokens: sent,
258        };
259    }
260
261    let resolved_mode = if mode == "auto" {
262        resolve_auto_mode(path, store_result.original_tokens, task)
263    } else {
264        mode.to_string()
265    };
266
267    let (mut output, _sent) = process_mode(
268        &content,
269        &resolved_mode,
270        &file_ref,
271        &short,
272        ext,
273        store_result.original_tokens,
274        crp_mode,
275        path,
276        task,
277    );
278    if is_cacheable_mode(&resolved_mode) {
279        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
280        cache.set_compressed(path, &cache_key, output.clone());
281    }
282    if let Some(hint) = &graph_hint {
283        output.push_str(&format!("\n{hint}"));
284    }
285    if let Some(hint) = similar_hint {
286        output.push_str(&format!("\n{hint}"));
287    }
288    let output = crate::core::redaction::redact_text_if_enabled(&output);
289    let final_tokens = count_tokens(&output);
290    ReadOutput {
291        content: output,
292        resolved_mode,
293        output_tokens: final_tokens,
294    }
295}
296
297fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
298    // Priority 1: Intent Router with budget/pressure-aware degradation.
299    // Only fall through to Predictor/Bandit if the router returns "auto".
300    let intent_query = task.unwrap_or("read");
301    let route = crate::core::intent_router::route_v1(intent_query);
302    let intent_mode = &route.decision.effective_read_mode;
303    if intent_mode != "auto" && intent_mode != "reference" {
304        return intent_mode.clone();
305    }
306
307    // Priority 2: FileSignature-based predictor
308    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
309    let predictor = crate::core::mode_predictor::ModePredictor::new();
310    let mut predicted = predictor
311        .predict_best_mode(&sig)
312        .unwrap_or_else(|| "full".to_string());
313    if predicted == "auto" {
314        predicted = "full".to_string();
315    }
316
317    // Priority 3: Bandit exploration when budget is tight
318    if let Some(project_root) =
319        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
320    {
321        let ext = std::path::Path::new(file_path)
322            .extension()
323            .and_then(|e| e.to_str())
324            .unwrap_or("");
325        let bucket = match original_tokens {
326            0..=2000 => "sm",
327            2001..=10000 => "md",
328            10001..=50000 => "lg",
329            _ => "xl",
330        };
331        let bandit_key = format!("{ext}_{bucket}");
332        let mut store = crate::core::bandit::BanditStore::load(&project_root);
333        let bandit = store.get_or_create(&bandit_key);
334        let arm = bandit.select_arm();
335        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
336            predicted = "aggressive".to_string();
337        }
338    }
339
340    // Priority 4: Adaptive mode policy
341    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
342    let chosen = policy.choose_auto_mode(task, &predicted);
343
344    if original_tokens > 2000 {
345        if predicted == "map" || predicted == "signatures" {
346            if chosen != "map" && chosen != "signatures" {
347                return predicted;
348            }
349        } else if chosen == "full" && predicted != "full" {
350            return predicted;
351        }
352    }
353
354    chosen
355}
356
357fn find_semantic_similar(path: &str, content: &str) -> Option<String> {
358    let project_root = detect_project_root(path);
359    let index = crate::core::semantic_cache::SemanticCacheIndex::load(&project_root)?;
360
361    let similar = index.find_similar(content, 0.7);
362    let relevant: Vec<_> = similar
363        .into_iter()
364        .filter(|(p, _)| p != path)
365        .take(3)
366        .collect();
367
368    if relevant.is_empty() {
369        return None;
370    }
371
372    let hints: Vec<String> = relevant
373        .iter()
374        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
375        .collect();
376
377    Some(format!(
378        "[semantic: {} similar file(s) in cache]\n{}",
379        relevant.len(),
380        hints.join("\n")
381    ))
382}
383
384fn update_semantic_index(path: &str, content: &str) {
385    let project_root = detect_project_root(path);
386    let session_id = format!("{}", std::process::id());
387    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
388    index.add_file(path, content, &session_id);
389    let _ = index.save(&project_root);
390}
391
392fn detect_project_root(path: &str) -> String {
393    crate::core::protocol::detect_project_root_or_cwd(path)
394}
395
396fn build_graph_related_hint(path: &str) -> Option<String> {
397    let project_root = detect_project_root(path);
398    crate::core::graph_context::build_related_hint(path, &project_root, 5)
399}
400
401const AUTO_DELTA_THRESHOLD: f64 = 0.6;
402
403/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
404fn handle_full_with_auto_delta(
405    cache: &mut SessionCache,
406    path: &str,
407    file_ref: &str,
408    short: &str,
409    ext: &str,
410    task: Option<&str>,
411) -> (String, usize) {
412    let Ok(disk_content) = read_file_lossy(path) else {
413        cache.record_cache_hit(path);
414        let out = if let Some(existing) = cache.get(path) {
415            format!(
416                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
417                existing.read_count, existing.line_count
418            )
419        } else {
420            format!("[file read failed and no cached version available] {file_ref}={short}")
421        };
422        let sent = count_tokens(&out);
423        return (out, sent);
424    };
425
426    let old_content = cache
427        .get(path)
428        .map(|e| e.content.clone())
429        .unwrap_or_default();
430    let store_result = cache.store(path, disk_content.clone());
431
432    if store_result.was_hit {
433        let out = format!(
434            "{file_ref}={short} cached {}t {}L\nFile already in context from previous read. Use fresh=true to re-read if content needed again.",
435            store_result.read_count, store_result.line_count
436        );
437        let sent = count_tokens(&out);
438        return (out, sent);
439    }
440
441    let diff = compressor::diff_content(&old_content, &disk_content);
442    let diff_tokens = count_tokens(&diff);
443    let full_tokens = store_result.original_tokens;
444
445    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
446        let savings = protocol::format_savings(full_tokens, diff_tokens);
447        let out = format!(
448            "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
449            disk_content.lines().count()
450        );
451        return (out, diff_tokens);
452    }
453
454    format_full_output(
455        file_ref,
456        short,
457        ext,
458        &disk_content,
459        store_result.original_tokens,
460        store_result.line_count,
461        task,
462    )
463}
464
465fn format_full_output(
466    file_ref: &str,
467    short: &str,
468    ext: &str,
469    content: &str,
470    original_tokens: usize,
471    line_count: usize,
472    task: Option<&str>,
473) -> (String, usize) {
474    let tokens = original_tokens;
475    let metadata = build_header(file_ref, short, ext, content, line_count, true);
476
477    let mut reordered: Option<String> = None;
478    {
479        let profile = crate::core::profiles::active_profile();
480        let cfg = profile.layout;
481        if cfg.enabled_effective() && line_count >= cfg.min_lines_effective() {
482            let task_str = task.unwrap_or("");
483            if !task_str.is_empty() {
484                let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
485                let r = crate::core::attention_layout_driver::maybe_reorder_for_attention(
486                    content, &keywords, &cfg,
487                );
488                if !r.skipped && r.changed {
489                    reordered = Some(r.output);
490                }
491            }
492        }
493    }
494
495    let content_for_output = reordered.as_deref().unwrap_or(content);
496
497    let mut sym = SymbolMap::new();
498    let idents = symbol_map::extract_identifiers(content_for_output, ext);
499    for ident in &idents {
500        sym.register(ident);
501    }
502
503    if sym.len() >= 3 {
504        let sym_table = sym.format_table();
505        let compressed = sym.apply(content_for_output);
506        let original_tok = count_tokens(content_for_output);
507        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
508        let net_saving = original_tok.saturating_sub(compressed_tok);
509        if original_tok > 0 && net_saving * 100 / original_tok >= 5 {
510            let output = format!("{metadata}\n{compressed}{sym_table}");
511            let sent = count_tokens(&output);
512            let savings = protocol::format_savings(tokens, sent);
513            return (format!("{output}\n{savings}"), sent);
514        }
515    }
516
517    let output = format!("{metadata}\n{content_for_output}");
518    let sent = count_tokens(&output);
519    let savings = protocol::format_savings(tokens, sent);
520    (format!("{output}\n{savings}"), sent)
521}
522
523fn build_header(
524    file_ref: &str,
525    short: &str,
526    ext: &str,
527    content: &str,
528    line_count: usize,
529    include_deps: bool,
530) -> String {
531    let mut header = format!("{file_ref}={short} {line_count}L");
532
533    if include_deps {
534        let dep_info = deps::extract_deps(content, ext);
535        if !dep_info.imports.is_empty() {
536            let imports_str: Vec<&str> = dep_info
537                .imports
538                .iter()
539                .take(8)
540                .map(std::string::String::as_str)
541                .collect();
542            header.push_str(&format!("\n deps {}", imports_str.join(",")));
543        }
544        if !dep_info.exports.is_empty() {
545            let exports_str: Vec<&str> = dep_info
546                .exports
547                .iter()
548                .take(8)
549                .map(std::string::String::as_str)
550                .collect();
551            header.push_str(&format!("\n exports {}", exports_str.join(",")));
552        }
553    }
554
555    header
556}
557
558#[allow(clippy::too_many_arguments)]
559fn process_mode(
560    content: &str,
561    mode: &str,
562    file_ref: &str,
563    short: &str,
564    ext: &str,
565    original_tokens: usize,
566    crp_mode: CrpMode,
567    file_path: &str,
568    task: Option<&str>,
569) -> (String, usize) {
570    let line_count = content.lines().count();
571
572    match mode {
573        "auto" => {
574            let chosen = resolve_auto_mode(file_path, original_tokens, task);
575            process_mode(
576                content,
577                &chosen,
578                file_ref,
579                short,
580                ext,
581                original_tokens,
582                crp_mode,
583                file_path,
584                task,
585            )
586        }
587        "full" => format_full_output(
588            file_ref,
589            short,
590            ext,
591            content,
592            original_tokens,
593            line_count,
594            task,
595        ),
596        "signatures" => {
597            let sigs = signatures::extract_signatures(content, ext);
598            let dep_info = deps::extract_deps(content, ext);
599
600            let mut output = format!("{file_ref}={short} {line_count}L");
601            if !dep_info.imports.is_empty() {
602                let imports_str: Vec<&str> = dep_info
603                    .imports
604                    .iter()
605                    .take(8)
606                    .map(std::string::String::as_str)
607                    .collect();
608                output.push_str(&format!("\n deps {}", imports_str.join(",")));
609            }
610            for sig in &sigs {
611                output.push('\n');
612                if crp_mode.is_tdd() {
613                    output.push_str(&sig.to_tdd());
614                } else {
615                    output.push_str(&sig.to_compact());
616                }
617            }
618            let sent = count_tokens(&output);
619            let savings = protocol::format_savings(original_tokens, sent);
620            (
621                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
622                sent,
623            )
624        }
625        "map" => {
626            if ext == "php" {
627                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
628                {
629                    let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
630                    let sent = count_tokens(&output);
631                    let savings = protocol::format_savings(original_tokens, sent);
632                    output.push('\n');
633                    output.push_str(&savings);
634                    return (append_compressed_hint(&output, file_path), sent);
635                }
636            }
637
638            let sigs = signatures::extract_signatures(content, ext);
639            let dep_info = deps::extract_deps(content, ext);
640
641            let mut output = format!("{file_ref}={short} {line_count}L");
642
643            if !dep_info.imports.is_empty() {
644                output.push_str("\n  deps: ");
645                output.push_str(&dep_info.imports.join(", "));
646            }
647
648            if !dep_info.exports.is_empty() {
649                output.push_str("\n  exports: ");
650                output.push_str(&dep_info.exports.join(", "));
651            }
652
653            let key_sigs: Vec<&signatures::Signature> = sigs
654                .iter()
655                .filter(|s| s.is_exported || s.indent == 0)
656                .collect();
657
658            if !key_sigs.is_empty() {
659                output.push_str("\n  API:");
660                for sig in &key_sigs {
661                    output.push_str("\n    ");
662                    if crp_mode.is_tdd() {
663                        output.push_str(&sig.to_tdd());
664                    } else {
665                        output.push_str(&sig.to_compact());
666                    }
667                }
668            }
669
670            let sent = count_tokens(&output);
671            let savings = protocol::format_savings(original_tokens, sent);
672            (
673                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
674                sent,
675            )
676        }
677        "aggressive" => {
678            #[cfg(feature = "tree-sitter")]
679            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
680            #[cfg(not(feature = "tree-sitter"))]
681            let ast_pruned: Option<String> = None;
682
683            let base = ast_pruned.as_deref().unwrap_or(content);
684
685            let session_intent = crate::core::session::SessionState::load_latest()
686                .and_then(|s| s.active_structured_intent);
687            let raw = if let Some(ref intent) = session_intent {
688                compressor::task_aware_compress(base, Some(ext), intent)
689            } else {
690                compressor::aggressive_compress(base, Some(ext))
691            };
692            let compressed = compressor::safeguard_ratio(content, &raw);
693            let header = build_header(file_ref, short, ext, content, line_count, true);
694
695            let mut sym = SymbolMap::new();
696            let idents = symbol_map::extract_identifiers(&compressed, ext);
697            for ident in &idents {
698                sym.register(ident);
699            }
700
701            if sym.len() >= 3 {
702                let sym_table = sym.format_table();
703                let sym_applied = sym.apply(&compressed);
704                let orig_tok = count_tokens(&compressed);
705                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
706                let net = orig_tok.saturating_sub(comp_tok);
707                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
708                    let savings = protocol::format_savings(original_tokens, comp_tok);
709                    return (
710                        append_compressed_hint(
711                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
712                            file_path,
713                        ),
714                        comp_tok,
715                    );
716                }
717                let savings = protocol::format_savings(original_tokens, orig_tok);
718                return (
719                    append_compressed_hint(
720                        &format!("{header}\n{compressed}\n{savings}"),
721                        file_path,
722                    ),
723                    orig_tok,
724                );
725            }
726
727            let sent = count_tokens(&compressed);
728            let savings = protocol::format_savings(original_tokens, sent);
729            (
730                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
731                sent,
732            )
733        }
734        "entropy" => {
735            let result = entropy::entropy_compress_adaptive(content, file_path);
736            let avg_h = entropy::analyze_entropy(content).avg_entropy;
737            let header = build_header(file_ref, short, ext, content, line_count, false);
738            let techs = result.techniques.join(", ");
739            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
740            let sent = count_tokens(&output);
741            let savings = protocol::format_savings(original_tokens, sent);
742            let compression_ratio = if original_tokens > 0 {
743                1.0 - (sent as f64 / original_tokens as f64)
744            } else {
745                0.0
746            };
747            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
748            (
749                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
750                sent,
751            )
752        }
753        "task" => {
754            let task_str = task.unwrap_or("");
755            if task_str.is_empty() {
756                let header = build_header(file_ref, short, ext, content, line_count, true);
757                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
758                let sent = count_tokens(&out);
759                return (out, sent);
760            }
761            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
762            if keywords.is_empty() {
763                let header = build_header(file_ref, short, ext, content, line_count, true);
764                let out = format!(
765                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
766                );
767                let sent = count_tokens(&out);
768                return (out, sent);
769            }
770            let filtered =
771                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
772            let filtered_lines = filtered.lines().count();
773            let header = format!(
774                "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
775            );
776            let project_root = detect_project_root(file_path);
777            let graph_ctx = crate::core::graph_context::build_graph_context(
778                file_path,
779                &project_root,
780                Some(crate::core::graph_context::GraphContextOptions::default()),
781            )
782            .map(|c| crate::core::graph_context::format_graph_context(&c))
783            .unwrap_or_default();
784
785            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
786            let savings = protocol::format_savings(original_tokens, sent);
787            (
788                append_compressed_hint(
789                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
790                    file_path,
791                ),
792                sent,
793            )
794        }
795        "reference" => {
796            let tok = count_tokens(content);
797            let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
798            let sent = count_tokens(&output);
799            let savings = protocol::format_savings(original_tokens, sent);
800            (format!("{output}\n{savings}"), sent)
801        }
802        mode if mode.starts_with("lines:") => {
803            let range_str = &mode[6..];
804            let extracted = extract_line_range(content, range_str);
805            let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
806            let sent = count_tokens(&extracted);
807            let savings = protocol::format_savings(original_tokens, sent);
808            (format!("{header}\n{extracted}\n{savings}"), sent)
809        }
810        unknown => {
811            let header = build_header(file_ref, short, ext, content, line_count, true);
812            let out = format!(
813                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
814            );
815            let sent = count_tokens(&out);
816            (out, sent)
817        }
818    }
819}
820
821fn extract_line_range(content: &str, range_str: &str) -> String {
822    let lines: Vec<&str> = content.lines().collect();
823    let total = lines.len();
824    let mut selected = Vec::new();
825
826    for part in range_str.split(',') {
827        let part = part.trim();
828        if let Some((start_s, end_s)) = part.split_once('-') {
829            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
830            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
831            for i in start..=end {
832                if i >= 1 && i <= total {
833                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
834                }
835            }
836        } else if let Ok(n) = part.parse::<usize>() {
837            if n >= 1 && n <= total {
838                selected.push(format!("{n:>4}| {}", lines[n - 1]));
839            }
840        }
841    }
842
843    if selected.is_empty() {
844        "No lines matched the range.".to_string()
845    } else {
846        selected.join("\n")
847    }
848}
849
850fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
851    let short = protocol::shorten_path(path);
852    let old_content = cache.get(path).map(|e| e.content.clone());
853
854    let new_content = match read_file_lossy(path) {
855        Ok(c) => c,
856        Err(e) => {
857            let msg = format!("ERROR: {e}");
858            let tokens = count_tokens(&msg);
859            return (msg, tokens);
860        }
861    };
862
863    let original_tokens = count_tokens(&new_content);
864
865    let diff_output = if let Some(old) = &old_content {
866        compressor::diff_content(old, &new_content)
867    } else {
868        format!("[first read]\n{new_content}")
869    };
870
871    cache.store(path, new_content);
872
873    let sent = count_tokens(&diff_output);
874    let savings = protocol::format_savings(original_tokens, sent);
875    (
876        format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}"),
877        sent,
878    )
879}
880
881#[cfg(test)]
882mod tests {
883    use super::*;
884    use std::time::Duration;
885
886    #[test]
887    fn test_header_toon_format_no_brackets() {
888        let content = "use std::io;\nfn main() {}\n";
889        let header = build_header("F1", "main.rs", "rs", content, 2, false);
890        assert!(!header.contains('['));
891        assert!(!header.contains(']'));
892        assert!(header.contains("F1=main.rs 2L"));
893    }
894
895    #[test]
896    fn test_header_toon_deps_indented() {
897        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
898        let header = build_header("F1", "main.rs", "rs", content, 3, true);
899        if header.contains("deps") {
900            assert!(
901                header.contains("\n deps "),
902                "deps should use indented TOON format"
903            );
904            assert!(
905                !header.contains("deps:["),
906                "deps should not use bracket format"
907            );
908        }
909    }
910
911    #[test]
912    fn test_header_toon_saves_tokens() {
913        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
914        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
915        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
916        let old_tokens = count_tokens(&old_header);
917        let new_tokens = count_tokens(&new_header);
918        assert!(
919            new_tokens <= old_tokens,
920            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
921        );
922    }
923
924    #[test]
925    fn test_tdd_symbols_are_compact() {
926        let symbols = [
927            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
928        ];
929        for sym in &symbols {
930            let tok = count_tokens(sym);
931            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
932        }
933    }
934
935    #[test]
936    fn test_task_mode_filters_content() {
937        let content = (0..200)
938            .map(|i| {
939                if i % 20 == 0 {
940                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
941                } else {
942                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
943                }
944            })
945            .collect::<Vec<_>>()
946            .join("\n");
947        let full_tokens = count_tokens(&content);
948        let task = Some("fix bug in validate_token");
949        let (result, result_tokens) = process_mode(
950            &content,
951            "task",
952            "F1",
953            "test.rs",
954            "rs",
955            full_tokens,
956            CrpMode::Off,
957            "test.rs",
958            task,
959        );
960        assert!(
961            result_tokens < full_tokens,
962            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
963        );
964        assert!(
965            result.contains("task-filtered"),
966            "output should contain task-filtered marker"
967        );
968    }
969
970    #[test]
971    fn test_task_mode_without_task_returns_full() {
972        let content = "fn main() {}\nfn helper() {}\n";
973        let tokens = count_tokens(content);
974        let (result, _sent) = process_mode(
975            content,
976            "task",
977            "F1",
978            "test.rs",
979            "rs",
980            tokens,
981            CrpMode::Off,
982            "test.rs",
983            None,
984        );
985        assert!(
986            result.contains("no task set"),
987            "should indicate no task: {result}"
988        );
989    }
990
991    #[test]
992    fn test_reference_mode_one_line() {
993        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
994        let tokens = count_tokens(content);
995        let (result, _sent) = process_mode(
996            content,
997            "reference",
998            "F1",
999            "test.rs",
1000            "rs",
1001            tokens,
1002            CrpMode::Off,
1003            "test.rs",
1004            None,
1005        );
1006        let lines: Vec<&str> = result.lines().collect();
1007        assert!(
1008            lines.len() <= 3,
1009            "reference mode should be very compact, got {} lines",
1010            lines.len()
1011        );
1012        assert!(result.contains("lines"), "should contain line count");
1013        assert!(result.contains("tok"), "should contain token count");
1014    }
1015
1016    #[test]
1017    fn cached_lines_mode_invalidates_on_mtime_change() {
1018        let dir = tempfile::tempdir().unwrap();
1019        let path = dir.path().join("file.txt");
1020        let p = path.to_string_lossy().to_string();
1021
1022        std::fs::write(&path, "one\nsecond\n").unwrap();
1023        let mut cache = SessionCache::new();
1024
1025        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1026        let l1: Vec<&str> = r1.content.lines().collect();
1027        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1028        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1029        assert_eq!(got1, "one");
1030
1031        std::thread::sleep(Duration::from_secs(1));
1032        std::fs::write(&path, "two\nsecond\n").unwrap();
1033
1034        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1035        let l2: Vec<&str> = r2.content.lines().collect();
1036        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1037        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1038        assert_eq!(got2, "two");
1039    }
1040
1041    #[test]
1042    #[cfg_attr(tarpaulin, ignore)]
1043    fn benchmark_task_conditioned_compression() {
1044        // Keep this reasonably small so CI coverage instrumentation stays fast.
1045        let content = generate_benchmark_code(200);
1046        let full_tokens = count_tokens(&content);
1047        let task = Some("fix authentication in validate_token");
1048
1049        let (_full_output, full_tok) = process_mode(
1050            &content,
1051            "full",
1052            "F1",
1053            "server.rs",
1054            "rs",
1055            full_tokens,
1056            CrpMode::Off,
1057            "server.rs",
1058            task,
1059        );
1060        let (_task_output, task_tok) = process_mode(
1061            &content,
1062            "task",
1063            "F1",
1064            "server.rs",
1065            "rs",
1066            full_tokens,
1067            CrpMode::Off,
1068            "server.rs",
1069            task,
1070        );
1071        let (_sig_output, sig_tok) = process_mode(
1072            &content,
1073            "signatures",
1074            "F1",
1075            "server.rs",
1076            "rs",
1077            full_tokens,
1078            CrpMode::Off,
1079            "server.rs",
1080            task,
1081        );
1082        let (_ref_output, ref_tok) = process_mode(
1083            &content,
1084            "reference",
1085            "F1",
1086            "server.rs",
1087            "rs",
1088            full_tokens,
1089            CrpMode::Off,
1090            "server.rs",
1091            task,
1092        );
1093
1094        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1095        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1096        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1097        eprintln!(
1098            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1099            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1100        );
1101        eprintln!(
1102            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1103            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1104        );
1105        eprintln!(
1106            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1107            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1108        );
1109        eprintln!("================================================\n");
1110
1111        assert!(task_tok < full_tok, "task mode should save tokens");
1112        assert!(sig_tok < full_tok, "signatures should save tokens");
1113        assert!(ref_tok < sig_tok, "reference should be most compact");
1114    }
1115
1116    fn generate_benchmark_code(lines: usize) -> String {
1117        let mut code = Vec::with_capacity(lines);
1118        code.push("use std::collections::HashMap;".to_string());
1119        code.push("use crate::core::auth;".to_string());
1120        code.push(String::new());
1121        code.push("pub struct Server {".to_string());
1122        code.push("    config: Config,".to_string());
1123        code.push("    cache: HashMap<String, String>,".to_string());
1124        code.push("}".to_string());
1125        code.push(String::new());
1126        code.push("impl Server {".to_string());
1127        code.push(
1128            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1129                .to_string(),
1130        );
1131        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1132        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1133        code.push("            return Err(AuthError::Expired);".to_string());
1134        code.push("        }".to_string());
1135        code.push("        Ok(decoded.claims)".to_string());
1136        code.push("    }".to_string());
1137        code.push(String::new());
1138
1139        let remaining = lines.saturating_sub(code.len());
1140        for i in 0..remaining {
1141            if i % 30 == 0 {
1142                code.push(format!(
1143                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1144                ));
1145            } else if i % 30 == 29 {
1146                code.push("    }".to_string());
1147            } else {
1148                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1149            }
1150        }
1151        code.push("}".to_string());
1152        code.join("\n")
1153    }
1154}