Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39/// Extracts a short proof-line from file content to include in cache-hit stubs.
40/// Returns the first non-empty line (truncated to 60 chars) as evidence the cache is valid.
41/// Only shown after 2+ reads to avoid noise on early interactions.
42fn cache_hit_proof_line(content: &str, read_count: u32) -> Option<String> {
43    if read_count < 2 {
44        return None;
45    }
46    let first_line = content.lines().find(|l| !l.trim().is_empty())?;
47    let trimmed = first_line.trim();
48    if trimmed.len() > 60 {
49        let mut end = 57;
50        while end > 0 && !trimmed.is_char_boundary(end) {
51            end -= 1;
52        }
53        Some(format!("{}...", &trimmed[..end]))
54    } else {
55        Some(trimmed.to_string())
56    }
57}
58
59fn append_compressed_hint(output: &str, file_path: &str) -> String {
60    if !crate::core::profiles::active_profile()
61        .output_hints
62        .compressed_hint()
63    {
64        return output.to_string();
65    }
66    format!(
67        "{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
68    )
69}
70
71/// Reads a file as UTF-8 with lossy fallback, enforcing binary detection and max read size limit.
72/// Defense-in-depth: verifies that the canonical path stays within the process's project root
73/// (if determinable) even though callers SHOULD have already jail-checked the path.
74pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
75    if crate::core::binary_detect::is_binary_file(path) {
76        let msg = crate::core::binary_detect::binary_file_message(path);
77        return Err(std::io::Error::other(msg));
78    }
79
80    {
81        let canonical =
82            crate::core::pathutil::safe_canonicalize_bounded(std::path::Path::new(path), 2000);
83        if let Ok(cwd) = std::env::current_dir() {
84            let root = crate::core::pathutil::safe_canonicalize_bounded(&cwd, 2000);
85            if !canonical.starts_with(&root) {
86                let allow = crate::core::pathjail::allow_paths_from_env_and_config();
87                let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
88                    .ok()
89                    .is_some_and(|d| canonical.starts_with(d));
90                let tmp_ok = canonical.starts_with(std::env::temp_dir());
91                if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
92                    tracing::warn!(
93                        "defense-in-depth: path may escape project root: {}",
94                        canonical.display()
95                    );
96                }
97            }
98        }
99    }
100
101    let cap = crate::core::limits::max_read_bytes();
102
103    let file = open_with_retry(path)?;
104    let meta = file
105        .metadata()
106        .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
107    if meta.len() > cap as u64 {
108        return Err(std::io::Error::other(format!(
109            "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
110             Increase the limit or use a line-range read: mode=\"lines:1-100\"",
111            meta.len(),
112            cap
113        )));
114    }
115
116    use std::io::Read;
117    let mut bytes = Vec::with_capacity(meta.len() as usize);
118    std::io::BufReader::new(file).read_to_end(&mut bytes)?;
119    match String::from_utf8(bytes) {
120        Ok(s) => Ok(s),
121        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
122    }
123}
124
125/// Opens a file, retrying once after a brief pause on NotFound.
126/// Works around overlay/FUSE stat-cache races in container runtimes (Docker, Codex).
127/// Uses O_NOFOLLOW on Unix for TOCTOU symlink protection.
128fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
129    match open_nofollow(path) {
130        Ok(f) => Ok(f),
131        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
132            std::thread::sleep(std::time::Duration::from_millis(50));
133            open_nofollow(path).map_err(|e| {
134                if e.kind() == std::io::ErrorKind::NotFound {
135                    std::io::Error::other(format!(
136                        "file not found: {path} — verify the path with ctx_tree or ctx_search"
137                    ))
138                } else {
139                    e
140                }
141            })
142        }
143        Err(e) => Err(e),
144    }
145}
146
147#[cfg(unix)]
148fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
149    use std::os::unix::fs::OpenOptionsExt;
150    use std::path::Path;
151
152    let p = Path::new(path);
153    // Canonicalize the parent directory (resolving symlinks in the directory path)
154    // but apply O_NOFOLLOW only to the final file component. This prevents
155    // symlink-following attacks on the target file while allowing legitimate
156    // directory symlinks (e.g., /tmp → /private/tmp on macOS).
157    if let (Some(parent), Some(filename)) = (p.parent(), p.file_name()) {
158        if parent.exists() {
159            let canonical_parent = crate::core::pathutil::safe_canonicalize_bounded(parent, 2000);
160            let canonical_path = canonical_parent.join(filename);
161            return std::fs::OpenOptions::new()
162                .read(true)
163                .custom_flags(libc::O_NOFOLLOW)
164                .open(&canonical_path);
165        }
166    }
167
168    // Fallback: direct open with O_NOFOLLOW
169    std::fs::OpenOptions::new()
170        .read(true)
171        .custom_flags(libc::O_NOFOLLOW)
172        .open(path)
173}
174
175#[cfg(not(unix))]
176fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
177    std::fs::File::open(path)
178}
179
180/// Reads a file through the cache and applies the requested compression mode.
181pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
182    handle_with_options(cache, path, mode, false, crp_mode, None)
183}
184
185/// Like `handle`, but invalidates the cache first to force a fresh disk read.
186pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
187    handle_with_options(cache, path, mode, true, crp_mode, None)
188}
189
190/// Reads a file with task-aware filtering to prioritize task-relevant content.
191pub fn handle_with_task(
192    cache: &mut SessionCache,
193    path: &str,
194    mode: &str,
195    crp_mode: CrpMode,
196    task: Option<&str>,
197) -> String {
198    handle_with_options(cache, path, mode, false, crp_mode, task)
199}
200
201/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
202pub fn handle_with_task_resolved(
203    cache: &mut SessionCache,
204    path: &str,
205    mode: &str,
206    crp_mode: CrpMode,
207    task: Option<&str>,
208) -> ReadOutput {
209    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
210}
211
212/// Fresh read with task-aware filtering (invalidates cache first).
213pub fn handle_fresh_with_task(
214    cache: &mut SessionCache,
215    path: &str,
216    mode: &str,
217    crp_mode: CrpMode,
218    task: Option<&str>,
219) -> String {
220    handle_with_options(cache, path, mode, true, crp_mode, task)
221}
222
223/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
224pub fn handle_fresh_with_task_resolved(
225    cache: &mut SessionCache,
226    path: &str,
227    mode: &str,
228    crp_mode: CrpMode,
229    task: Option<&str>,
230) -> ReadOutput {
231    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
232}
233
234fn handle_with_options(
235    cache: &mut SessionCache,
236    path: &str,
237    mode: &str,
238    fresh: bool,
239    crp_mode: CrpMode,
240    task: Option<&str>,
241) -> String {
242    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
243}
244
245/// Detects if the current execution context is a subagent (forked agent).
246/// Subagents inherit stale parent caches, so force-fresh prevents VERIFY FAIL.
247fn is_subagent_context() -> bool {
248    static IS_SUBAGENT: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
249    *IS_SUBAGENT.get_or_init(|| {
250        if std::env::var("LEAN_CTX_FORCE_FRESH").is_ok_and(|v| v == "1" || v == "true") {
251            return true;
252        }
253        std::env::var("CURSOR_TASK_ID").is_ok_and(|v| !v.is_empty())
254    })
255}
256
257fn handle_with_options_resolved(
258    cache: &mut SessionCache,
259    path: &str,
260    mode: &str,
261    fresh: bool,
262    crp_mode: CrpMode,
263    task: Option<&str>,
264) -> ReadOutput {
265    let effective_fresh = fresh || is_subagent_context();
266
267    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
268        bt.next_seq();
269    }
270    let mut result = handle_with_options_inner(cache, path, mode, effective_fresh, crp_mode, task);
271
272    if let Some(entry) = cache.get_mut(path) {
273        entry.last_mode.clone_from(&result.resolved_mode);
274    }
275
276    let dedup_allowed = matches!(
277        result.resolved_mode.as_str(),
278        "map" | "signatures" | "aggressive" | "entropy" | "task"
279    );
280    if dedup_allowed {
281        if let Some(deduped) = cache.apply_dedup(path, &result.content) {
282            let new_tokens = count_tokens(&deduped);
283            if new_tokens < result.output_tokens {
284                result.content = deduped;
285                result.output_tokens = new_tokens;
286            }
287        }
288    }
289
290    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
291        let original_tokens = cache.get(path).map_or(0, |e| e.original_tokens);
292        bt.record_read(
293            path,
294            &result.resolved_mode,
295            result.output_tokens,
296            original_tokens,
297        );
298    }
299
300    result
301}
302
303fn handle_with_options_inner(
304    cache: &mut SessionCache,
305    path: &str,
306    mode: &str,
307    fresh: bool,
308    crp_mode: CrpMode,
309    task: Option<&str>,
310) -> ReadOutput {
311    let file_ref = cache.get_file_ref(path);
312    let short = protocol::shorten_path(path);
313    let ext = Path::new(path)
314        .extension()
315        .and_then(|e| e.to_str())
316        .unwrap_or("");
317
318    if fresh {
319        if mode == "diff" {
320            let warning = "[warning] fresh+diff is redundant — fresh invalidates cache, no diff possible. Use mode=full with fresh=true instead.";
321            return ReadOutput {
322                content: warning.to_string(),
323                resolved_mode: "diff".into(),
324                output_tokens: count_tokens(warning),
325            };
326        }
327        cache.invalidate(path);
328    }
329
330    if mode == "diff" {
331        let (out, _) = handle_diff(cache, path, &file_ref);
332        let out = crate::core::redaction::redact_text_if_enabled(&out);
333        let sent = count_tokens(&out);
334        return ReadOutput {
335            content: out,
336            resolved_mode: "diff".into(),
337            output_tokens: sent,
338        };
339    }
340
341    if mode != "full" {
342        if let Some(existing) = cache.get(path) {
343            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
344            if stale {
345                cache.invalidate(path);
346            }
347        }
348    }
349
350    // Extract immutable data from cache entry, then drop the borrow before
351    // any mutable operations (record_cache_hit, set_compressed, invalidate).
352    let cache_snapshot = cache.get(path).map(|existing| {
353        (
354            existing.stored_mtime,
355            existing.read_count,
356            existing.line_count,
357            existing.original_tokens,
358            existing.content(),
359        )
360    });
361
362    if let Some((cached_mtime, read_count, line_count, original_tokens, content_opt)) =
363        cache_snapshot
364    {
365        if mode == "full" {
366            // Fast mtime check: if file unchanged on disk AND full content was previously
367            // delivered, return a minimal stub. After host compaction, delivery flags are
368            // reset so the agent gets full content again automatically.
369            // "safe" policy never returns stubs — always delivers content.
370            let policy_allows_stub =
371                crate::server::compaction_sync::effective_cache_policy() != "safe";
372            if policy_allows_stub
373                && !crate::core::cache::is_cache_entry_stale(path, cached_mtime)
374                && cache.is_full_delivered(path)
375            {
376                cache.record_cache_hit(path);
377                let out = if crate::core::protocol::meta_visible() {
378                    format!(
379                        "{file_ref}={short} [unchanged, {line_count}L, use cached context]\nFile unchanged on disk (same hash). If you haven't seen this content, use fresh=true to force re-read.",
380                        )
381                } else {
382                    let proof = content_opt
383                        .as_deref()
384                        .and_then(|c| cache_hit_proof_line(c, read_count));
385                    let reads_note = if read_count > 3 {
386                        format!(" (read {}x, unchanged)", read_count + 1)
387                    } else {
388                        String::new()
389                    };
390                    match proof {
391                        Some(p) => format!(
392                            "{file_ref}={short} [unchanged, {line_count}L, use cached context{reads_note} | first: \"{p}\"]"
393                        ),
394                        None => format!(
395                            "{file_ref}={short} [unchanged, {line_count}L, use cached context{reads_note}]"
396                        ),
397                    }
398                };
399                let out = crate::core::redaction::redact_text_if_enabled(&out);
400                let sent = count_tokens(&out);
401                return ReadOutput {
402                    content: out,
403                    resolved_mode: "full".into(),
404                    output_tokens: sent,
405                };
406            }
407            let (out, _) = handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
408            let out = crate::core::redaction::redact_text_if_enabled(&out);
409            let sent = count_tokens(&out);
410            return ReadOutput {
411                content: out,
412                resolved_mode: "full".into(),
413                output_tokens: sent,
414            };
415        }
416
417        // Resolve mode first so we can check compressed output cache BEFORE
418        // decompressing the full content (avoids ~2-5ms zstd overhead on hits).
419        let resolved_mode = if mode == "auto" {
420            resolve_auto_mode(path, original_tokens, task)
421        } else {
422            mode.to_string()
423        };
424
425        if is_cacheable_mode(&resolved_mode) {
426            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
427            let compressed_hit = cache.get_compressed(path, &cache_key).cloned();
428            if let Some(cached_output) = compressed_hit {
429                cache.record_cache_hit(path);
430                let out = crate::core::redaction::redact_text_if_enabled(&cached_output);
431                let sent = count_tokens(&out);
432                return ReadOutput {
433                    content: out,
434                    resolved_mode,
435                    output_tokens: sent,
436                };
437            }
438        }
439
440        if let Some(content) = content_opt {
441            let (out, _) = process_mode(
442                &content,
443                &resolved_mode,
444                &file_ref,
445                &short,
446                ext,
447                original_tokens,
448                crp_mode,
449                path,
450                task,
451            );
452            if is_cacheable_mode(&resolved_mode) {
453                let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
454                cache.set_compressed(path, &cache_key, out.clone());
455            }
456            let out = crate::core::redaction::redact_text_if_enabled(&out);
457            let sent = count_tokens(&out);
458            return ReadOutput {
459                content: out,
460                resolved_mode,
461                output_tokens: sent,
462            };
463        }
464        cache.invalidate(path);
465    }
466
467    let content = match read_file_lossy(path) {
468        Ok(c) => c,
469        Err(e) => {
470            let msg = format!("ERROR: {e}");
471            let tokens = count_tokens(&msg);
472            return ReadOutput {
473                content: msg,
474                resolved_mode: "error".into(),
475                output_tokens: tokens,
476            };
477        }
478    };
479
480    let store_result = cache.store(path, &content);
481
482    // Skip expensive hint computation for line-range reads and first reads.
483    // Hints are only useful from the 2nd read onwards when the file is contextually relevant.
484    let is_line_range = mode.starts_with("lines:");
485    let hints = crate::core::profiles::active_profile().output_hints;
486    let is_repeat_read = store_result.read_count > 1;
487    let similar_hint = if !is_line_range && is_repeat_read && hints.semantic_hint() {
488        find_similar_and_update_semantic_index(path, &content)
489    } else {
490        None
491    };
492    let graph_hint = if !is_line_range && is_repeat_read && hints.related_hint() {
493        build_graph_related_hint(path)
494    } else {
495        None
496    };
497
498    if mode == "full" {
499        cache.mark_full_delivered(path);
500        let (mut output, _) = format_full_output(
501            &file_ref,
502            &short,
503            ext,
504            &content,
505            store_result.original_tokens,
506            store_result.line_count,
507            task,
508        );
509        if let Some(hint) = &graph_hint {
510            output.push_str(&format!("\n{hint}"));
511        }
512        if let Some(hint) = similar_hint {
513            output.push_str(&format!("\n{hint}"));
514        }
515        let output = crate::core::redaction::redact_text_if_enabled(&output);
516        let sent = count_tokens(&output);
517        return ReadOutput {
518            content: output,
519            resolved_mode: "full".into(),
520            output_tokens: sent,
521        };
522    }
523
524    let resolved_mode = if mode == "auto" {
525        resolve_auto_mode(path, store_result.original_tokens, task)
526    } else {
527        mode.to_string()
528    };
529
530    let (mut output, _sent) = process_mode(
531        &content,
532        &resolved_mode,
533        &file_ref,
534        &short,
535        ext,
536        store_result.original_tokens,
537        crp_mode,
538        path,
539        task,
540    );
541    if let Some(hint) = &graph_hint {
542        output.push_str(&format!("\n{hint}"));
543    }
544    if let Some(hint) = similar_hint {
545        output.push_str(&format!("\n{hint}"));
546    }
547    if is_cacheable_mode(&resolved_mode) {
548        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
549        cache.set_compressed(path, &cache_key, output.clone());
550    }
551    let output = crate::core::redaction::redact_text_if_enabled(&output);
552    let final_tokens = count_tokens(&output);
553    ReadOutput {
554        content: output,
555        resolved_mode,
556        output_tokens: final_tokens,
557    }
558}
559
560pub fn is_instruction_file(path: &str) -> bool {
561    let lower = path.to_lowercase();
562    let filename = std::path::Path::new(&lower)
563        .file_name()
564        .and_then(|f| f.to_str())
565        .unwrap_or("");
566
567    matches!(
568        filename,
569        "skill.md"
570            | "agents.md"
571            | "rules.md"
572            | ".cursorrules"
573            | ".clinerules"
574            | "lean-ctx.md"
575            | "lean-ctx.mdc"
576    ) || lower.contains("/skills/")
577        || lower.contains("/.cursor/rules/")
578        || lower.contains("/.claude/rules/")
579        || lower.contains("/agents.md")
580}
581
582fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
583    if is_instruction_file(file_path) {
584        return "full".to_string();
585    }
586
587    if let Ok(bt) = crate::core::bounce_tracker::global().lock() {
588        if bt.should_force_full(file_path) {
589            return "full".to_string();
590        }
591    }
592
593    let intent_query = task.unwrap_or("read");
594    let route = crate::core::intent_router::route_v1(intent_query);
595    let intent_mode = &route.decision.effective_read_mode;
596    if intent_mode != "auto" && intent_mode != "reference" {
597        return intent_mode.clone();
598    }
599
600    // Priority 2: FileSignature-based predictor
601    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
602    let predictor = crate::core::mode_predictor::ModePredictor::new();
603    let mut predicted = predictor
604        .predict_best_mode(&sig)
605        .unwrap_or_else(|| "full".to_string());
606    if predicted == "auto" {
607        predicted = "full".to_string();
608    }
609
610    // Priority 3: Bandit exploration when budget is tight
611    // SAFETY: Bandit NEVER overrides "full" — full is sacred (byte-accurate content needed for edits)
612    if predicted != "full" {
613        if let Some(project_root) =
614            crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
615        {
616            let ext = std::path::Path::new(file_path)
617                .extension()
618                .and_then(|e| e.to_str())
619                .unwrap_or("");
620            let bucket = match original_tokens {
621                0..=2000 => "sm",
622                2001..=10000 => "md",
623                10001..=50000 => "lg",
624                _ => "xl",
625            };
626            let bandit_key = format!("{ext}_{bucket}");
627            let mut store = crate::core::bandit::BanditStore::load(&project_root);
628            let bandit = store.get_or_create(&bandit_key);
629            let arm = bandit.select_arm();
630            if arm.budget_ratio < 0.25 && original_tokens > 2000 {
631                predicted = "aggressive".to_string();
632            }
633        }
634    }
635
636    // Priority 4: Adaptive mode policy
637    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
638    let chosen = policy.choose_auto_mode(task, &predicted);
639
640    if original_tokens > 2000 {
641        if predicted == "map" || predicted == "signatures" {
642            if chosen != "map" && chosen != "signatures" {
643                return predicted;
644            }
645        } else if chosen == "full" && predicted != "full" {
646            return predicted;
647        }
648    }
649
650    chosen
651}
652
653fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
654    const MAX_CONTENT_BYTES_FOR_SEMANTIC: usize = 32_768;
655
656    if content.len() > MAX_CONTENT_BYTES_FOR_SEMANTIC {
657        return None;
658    }
659
660    let cfg = crate::core::config::Config::load();
661    let profile = crate::core::config::MemoryProfile::effective(&cfg);
662    if !profile.semantic_cache_enabled() {
663        return None;
664    }
665
666    let project_root = detect_project_root(path);
667    let session_id = format!("{}", std::process::id());
668    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
669
670    let similar = index.find_similar(content, 0.7);
671    let relevant: Vec<_> = similar
672        .into_iter()
673        .filter(|(p, _)| p != path)
674        .take(3)
675        .collect();
676
677    index.add_file(path, content, &session_id);
678    let _ = index.save(&project_root);
679
680    if relevant.is_empty() {
681        return None;
682    }
683
684    let hints: Vec<String> = relevant
685        .iter()
686        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
687        .collect();
688
689    Some(format!(
690        "[semantic: {} similar file(s) in cache]\n{}",
691        relevant.len(),
692        hints.join("\n")
693    ))
694}
695
696fn detect_project_root(path: &str) -> String {
697    crate::core::protocol::detect_project_root_or_cwd(path)
698}
699
700fn build_graph_related_hint(path: &str) -> Option<String> {
701    let project_root = detect_project_root(path);
702    crate::core::graph_context::build_related_hint(path, &project_root, 5)
703}
704
705const AUTO_DELTA_THRESHOLD: f64 = 0.6;
706
707/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
708fn handle_full_with_auto_delta(
709    cache: &mut SessionCache,
710    path: &str,
711    file_ref: &str,
712    short: &str,
713    ext: &str,
714    task: Option<&str>,
715) -> (String, usize) {
716    let Ok(disk_content) = read_file_lossy(path) else {
717        cache.record_cache_hit(path);
718        if let Some(existing) = cache.get(path) {
719            if !crate::core::protocol::meta_visible() {
720                if let Some(cached) = existing.content() {
721                    return format_full_output(
722                        file_ref,
723                        short,
724                        ext,
725                        &cached,
726                        existing.original_tokens,
727                        existing.line_count,
728                        task,
729                    );
730                }
731            }
732            let out = format!(
733                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
734                existing.read_count, existing.line_count
735            );
736            let sent = count_tokens(&out);
737            return (out, sent);
738        }
739        let out = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
740            format!("[file read failed and no cached version available] {file_ref}={short}")
741        } else {
742            format!("[file read failed and no cached version available] {short}")
743        };
744        let sent = count_tokens(&out);
745        return (out, sent);
746    };
747
748    let old_content = cache
749        .get(path)
750        .and_then(crate::core::cache::CacheEntry::content)
751        .unwrap_or_default();
752    let store_result = cache.store(path, &disk_content);
753
754    if store_result.was_hit {
755        let policy_allows_stub = crate::server::compaction_sync::effective_cache_policy() != "safe";
756        if policy_allows_stub && store_result.full_content_delivered {
757            let out = if crate::core::protocol::meta_visible() {
758                format!(
759                    "{file_ref}={short} [unchanged, {}L, use cached context]\nFile unchanged on disk (same hash). If you haven't seen this content, use fresh=true to force re-read.",
760                    store_result.line_count
761                )
762            } else {
763                let proof = cache_hit_proof_line(&disk_content, store_result.read_count);
764                let reads_note = if store_result.read_count > 3 {
765                    format!(" (read {}x, unchanged)", store_result.read_count)
766                } else {
767                    String::new()
768                };
769                match proof {
770                    Some(p) => format!(
771                        "{file_ref}={short} [unchanged, {}L, use cached context{reads_note} | first: \"{p}\"]",
772                        store_result.line_count
773                    ),
774                    None => format!(
775                        "{file_ref}={short} [unchanged, {}L, use cached context{reads_note}]",
776                        store_result.line_count
777                    ),
778                }
779            };
780            let sent = count_tokens(&out);
781            return (out, sent);
782        }
783        cache.mark_full_delivered(path);
784        return format_full_output(
785            file_ref,
786            short,
787            ext,
788            &disk_content,
789            store_result.original_tokens,
790            store_result.line_count,
791            task,
792        );
793    }
794
795    let diff = compressor::diff_content(&old_content, &disk_content);
796    let diff_tokens = count_tokens(&diff);
797    let full_tokens = store_result.original_tokens;
798
799    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
800        let savings = protocol::format_savings(full_tokens, diff_tokens);
801        let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
802            format!("{file_ref}={short}")
803        } else {
804            short.to_string()
805        };
806        let out = format!(
807            "{head} [auto-delta] ∆{}L\n{diff}\n{savings}",
808            disk_content.lines().count()
809        );
810        return (out, diff_tokens);
811    }
812
813    format_full_output(
814        file_ref,
815        short,
816        ext,
817        &disk_content,
818        store_result.original_tokens,
819        store_result.line_count,
820        task,
821    )
822}
823
824fn format_full_output(
825    file_ref: &str,
826    short: &str,
827    ext: &str,
828    content: &str,
829    original_tokens: usize,
830    line_count: usize,
831    _task: Option<&str>,
832) -> (String, usize) {
833    let tokens = original_tokens;
834    let metadata = build_header(file_ref, short, ext, content, line_count, true);
835
836    let output = format!("{metadata}\n{content}");
837    let sent = count_tokens(&output);
838    (protocol::append_savings(&output, tokens, sent), sent)
839}
840
841fn build_header(
842    file_ref: &str,
843    short: &str,
844    ext: &str,
845    content: &str,
846    line_count: usize,
847    include_deps: bool,
848) -> String {
849    let mut header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
850        format!("{file_ref}={short} {line_count}L")
851    } else {
852        format!("{short} {line_count}L")
853    };
854
855    if include_deps {
856        let dep_info = deps::extract_deps(content, ext);
857        if !dep_info.imports.is_empty() {
858            let imports_str: Vec<&str> = dep_info
859                .imports
860                .iter()
861                .take(8)
862                .map(std::string::String::as_str)
863                .collect();
864            header.push_str(&format!("\n deps {}", imports_str.join(",")));
865        }
866        if !dep_info.exports.is_empty() {
867            let exports_str: Vec<&str> = dep_info
868                .exports
869                .iter()
870                .take(8)
871                .map(std::string::String::as_str)
872                .collect();
873            header.push_str(&format!("\n exports {}", exports_str.join(",")));
874        }
875    }
876
877    header
878}
879
880#[allow(clippy::too_many_arguments)]
881fn process_mode(
882    content: &str,
883    mode: &str,
884    file_ref: &str,
885    short: &str,
886    ext: &str,
887    original_tokens: usize,
888    crp_mode: CrpMode,
889    file_path: &str,
890    task: Option<&str>,
891) -> (String, usize) {
892    let line_count = content.lines().count();
893
894    match mode {
895        "auto" => {
896            let chosen = resolve_auto_mode(file_path, original_tokens, task);
897            process_mode(
898                content,
899                &chosen,
900                file_ref,
901                short,
902                ext,
903                original_tokens,
904                crp_mode,
905                file_path,
906                task,
907            )
908        }
909        "full" => format_full_output(
910            file_ref,
911            short,
912            ext,
913            content,
914            original_tokens,
915            line_count,
916            task,
917        ),
918        "signatures" => {
919            let sigs = signatures::extract_signatures(content, ext);
920            let dep_info = deps::extract_deps(content, ext);
921
922            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
923                format!("{file_ref}={short} {line_count}L")
924            } else {
925                format!("{short} {line_count}L")
926            };
927            if !dep_info.imports.is_empty() {
928                let imports_str: Vec<&str> = dep_info
929                    .imports
930                    .iter()
931                    .take(8)
932                    .map(std::string::String::as_str)
933                    .collect();
934                output.push_str(&format!("\n deps {}", imports_str.join(",")));
935            }
936            for sig in &sigs {
937                output.push('\n');
938                if crp_mode.is_tdd() {
939                    output.push_str(&sig.to_tdd());
940                } else {
941                    output.push_str(&sig.to_compact());
942                }
943            }
944            let sent = count_tokens(&output);
945            (
946                append_compressed_hint(
947                    &protocol::append_savings(&output, original_tokens, sent),
948                    file_path,
949                ),
950                sent,
951            )
952        }
953        "map" => {
954            if ext == "php" {
955                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
956                {
957                    let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
958                        format!("{file_ref}={short} {line_count}L\n{php_map}")
959                    } else {
960                        format!("{short} {line_count}L\n{php_map}")
961                    };
962                    let sent = count_tokens(&output);
963                    let output = protocol::append_savings(&output, original_tokens, sent);
964                    return (append_compressed_hint(&output, file_path), sent);
965                }
966            }
967
968            let sigs = signatures::extract_signatures(content, ext);
969            let dep_info = deps::extract_deps(content, ext);
970
971            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
972                format!("{file_ref}={short} {line_count}L")
973            } else {
974                format!("{short} {line_count}L")
975            };
976
977            if !dep_info.imports.is_empty() {
978                output.push_str("\n  deps: ");
979                output.push_str(&dep_info.imports.join(", "));
980            }
981
982            if !dep_info.exports.is_empty() {
983                output.push_str("\n  exports: ");
984                output.push_str(&dep_info.exports.join(", "));
985            }
986
987            let key_sigs: Vec<&signatures::Signature> = sigs
988                .iter()
989                .filter(|s| s.is_exported || s.indent == 0)
990                .collect();
991
992            if !key_sigs.is_empty() {
993                output.push_str("\n  API:");
994                for sig in &key_sigs {
995                    output.push_str("\n    ");
996                    if crp_mode.is_tdd() {
997                        output.push_str(&sig.to_tdd());
998                    } else {
999                        output.push_str(&sig.to_compact());
1000                    }
1001                }
1002            }
1003
1004            let sent = count_tokens(&output);
1005            (
1006                append_compressed_hint(
1007                    &protocol::append_savings(&output, original_tokens, sent),
1008                    file_path,
1009                ),
1010                sent,
1011            )
1012        }
1013        "aggressive" => {
1014            #[cfg(feature = "tree-sitter")]
1015            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
1016            #[cfg(not(feature = "tree-sitter"))]
1017            let ast_pruned: Option<String> = None;
1018
1019            let base = ast_pruned.as_deref().unwrap_or(content);
1020
1021            let session_intent = crate::core::session::SessionState::load_latest()
1022                .and_then(|s| s.active_structured_intent);
1023            let raw = if let Some(ref intent) = session_intent {
1024                compressor::task_aware_compress(base, Some(ext), intent)
1025            } else {
1026                compressor::aggressive_compress(base, Some(ext))
1027            };
1028            let compressed = compressor::safeguard_ratio(content, &raw);
1029            let header = build_header(file_ref, short, ext, content, line_count, true);
1030
1031            let mut sym = SymbolMap::new();
1032            let idents = symbol_map::extract_identifiers(&compressed, ext);
1033            for ident in &idents {
1034                sym.register(ident);
1035            }
1036
1037            if sym.len() >= 3 {
1038                let sym_table = sym.format_table();
1039                let sym_applied = sym.apply(&compressed);
1040                let orig_tok = count_tokens(&compressed);
1041                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
1042                let net = orig_tok.saturating_sub(comp_tok);
1043                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
1044                    let savings = protocol::format_savings(original_tokens, comp_tok);
1045                    return (
1046                        append_compressed_hint(
1047                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
1048                            file_path,
1049                        ),
1050                        comp_tok,
1051                    );
1052                }
1053                let savings = protocol::format_savings(original_tokens, orig_tok);
1054                return (
1055                    append_compressed_hint(
1056                        &format!("{header}\n{compressed}\n{savings}"),
1057                        file_path,
1058                    ),
1059                    orig_tok,
1060                );
1061            }
1062
1063            let sent = count_tokens(&compressed);
1064            let savings = protocol::format_savings(original_tokens, sent);
1065            (
1066                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
1067                sent,
1068            )
1069        }
1070        "entropy" => {
1071            let result = entropy::entropy_compress_adaptive(content, file_path);
1072            let avg_h = entropy::analyze_entropy(content).avg_entropy;
1073            let header = build_header(file_ref, short, ext, content, line_count, false);
1074            let techs = result.techniques.join(", ");
1075            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
1076            let sent = count_tokens(&output);
1077            let savings = protocol::format_savings(original_tokens, sent);
1078            let compression_ratio = if original_tokens > 0 {
1079                1.0 - (sent as f64 / original_tokens as f64)
1080            } else {
1081                0.0
1082            };
1083            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
1084            (
1085                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
1086                sent,
1087            )
1088        }
1089        "task" => {
1090            let task_str = task.unwrap_or("");
1091            if task_str.is_empty() {
1092                let header = build_header(file_ref, short, ext, content, line_count, true);
1093                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
1094                let sent = count_tokens(&out);
1095                return (out, sent);
1096            }
1097            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
1098            if keywords.is_empty() {
1099                let header = build_header(file_ref, short, ext, content, line_count, true);
1100                let out = format!(
1101                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
1102                );
1103                let sent = count_tokens(&out);
1104                return (out, sent);
1105            }
1106            let filtered =
1107                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
1108            let filtered_lines = filtered.lines().count();
1109            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1110                format!("{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1111            } else {
1112                format!("{short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1113            };
1114            let graph_ctx = if crate::core::profiles::active_profile()
1115                .output_hints
1116                .graph_context_block()
1117            {
1118                let project_root = detect_project_root(file_path);
1119                crate::core::graph_context::build_graph_context(
1120                    file_path,
1121                    &project_root,
1122                    Some(crate::core::graph_context::GraphContextOptions::default()),
1123                )
1124                .map(|c| crate::core::graph_context::format_graph_context(&c))
1125                .unwrap_or_default()
1126            } else {
1127                String::new()
1128            };
1129
1130            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
1131            let savings = protocol::format_savings(original_tokens, sent);
1132            (
1133                append_compressed_hint(
1134                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
1135                    file_path,
1136                ),
1137                sent,
1138            )
1139        }
1140        "reference" => {
1141            let tok = count_tokens(content);
1142            let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1143                format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})")
1144            } else {
1145                format!("{short}: {line_count} lines, {tok} tok ({ext})")
1146            };
1147            let sent = count_tokens(&output);
1148            let savings = protocol::format_savings(original_tokens, sent);
1149            (format!("{output}\n{savings}"), sent)
1150        }
1151        mode if mode.starts_with("lines:") => {
1152            let range_str = &mode[6..];
1153            let extracted = extract_line_range(content, range_str);
1154            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1155                format!("{file_ref}={short} {line_count}L lines:{range_str}")
1156            } else {
1157                format!("{short} {line_count}L lines:{range_str}")
1158            };
1159            let sent = count_tokens(&extracted);
1160            let savings = protocol::format_savings(original_tokens, sent);
1161            (format!("{header}\n{extracted}\n{savings}"), sent)
1162        }
1163        unknown => {
1164            let header = build_header(file_ref, short, ext, content, line_count, true);
1165            let out = format!(
1166                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
1167            );
1168            let sent = count_tokens(&out);
1169            (out, sent)
1170        }
1171    }
1172}
1173
1174fn extract_line_range(content: &str, range_str: &str) -> String {
1175    let lines: Vec<&str> = content.lines().collect();
1176    let total = lines.len();
1177    let mut selected = Vec::new();
1178
1179    for part in range_str.split(',') {
1180        let part = part.trim();
1181        if let Some((start_s, end_s)) = part.split_once('-') {
1182            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
1183            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
1184            for i in start..=end {
1185                if i >= 1 && i <= total {
1186                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
1187                }
1188            }
1189        } else if let Ok(n) = part.parse::<usize>() {
1190            if n >= 1 && n <= total {
1191                selected.push(format!("{n:>4}| {}", lines[n - 1]));
1192            }
1193        }
1194    }
1195
1196    if selected.is_empty() {
1197        "No lines matched the range.".to_string()
1198    } else {
1199        selected.join("\n")
1200    }
1201}
1202
1203fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
1204    let short = protocol::shorten_path(path);
1205    let old_content = cache
1206        .get(path)
1207        .and_then(crate::core::cache::CacheEntry::content);
1208
1209    let new_content = match read_file_lossy(path) {
1210        Ok(c) => c,
1211        Err(e) => {
1212            let msg = format!("ERROR: {e}");
1213            let tokens = count_tokens(&msg);
1214            return (msg, tokens);
1215        }
1216    };
1217
1218    let original_tokens = count_tokens(&new_content);
1219
1220    let diff_output = if let Some(old) = &old_content {
1221        compressor::diff_content(old, &new_content)
1222    } else {
1223        // No previous version cached — store content for future diffs but
1224        // return a short guidance message instead of dumping the full file.
1225        cache.store(path, &new_content);
1226        let msg = format!(
1227            "{file_ref}={short} [no cached version for diff — use mode=full first, then diff on re-read]"
1228        );
1229        let sent = count_tokens(&msg);
1230        return (msg, sent);
1231    };
1232
1233    cache.store(path, &new_content);
1234
1235    let sent = count_tokens(&diff_output);
1236    let savings = protocol::format_savings(original_tokens, sent);
1237    let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1238        format!("{file_ref}={short}")
1239    } else {
1240        short.clone()
1241    };
1242    (format!("{head} [diff]\n{diff_output}\n{savings}"), sent)
1243}
1244
1245#[cfg(test)]
1246mod tests {
1247    use super::*;
1248    use std::time::Duration;
1249
1250    #[test]
1251    fn test_header_toon_format_no_brackets() {
1252        let _lock = crate::core::data_dir::test_env_lock();
1253        std::env::set_var("LEAN_CTX_META", "1");
1254        let content = "use std::io;\nfn main() {}\n";
1255        let header = build_header("F1", "main.rs", "rs", content, 2, false);
1256        assert!(!header.contains('['));
1257        assert!(!header.contains(']'));
1258        assert!(header.contains("F1=main.rs 2L"));
1259        std::env::remove_var("LEAN_CTX_META");
1260    }
1261
1262    #[test]
1263    fn test_header_toon_deps_indented() {
1264        let _lock = crate::core::data_dir::test_env_lock();
1265        std::env::set_var("LEAN_CTX_META", "1");
1266        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
1267        let header = build_header("F1", "main.rs", "rs", content, 3, true);
1268        if header.contains("deps") {
1269            assert!(
1270                header.contains("\n deps "),
1271                "deps should use indented TOON format"
1272            );
1273            assert!(
1274                !header.contains("deps:["),
1275                "deps should not use bracket format"
1276            );
1277        }
1278        std::env::remove_var("LEAN_CTX_META");
1279    }
1280
1281    #[test]
1282    fn test_header_toon_saves_tokens() {
1283        let _lock = crate::core::data_dir::test_env_lock();
1284        std::env::set_var("LEAN_CTX_META", "1");
1285        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1286        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1287        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1288        let old_tokens = count_tokens(&old_header);
1289        let new_tokens = count_tokens(&new_header);
1290        assert!(
1291            new_tokens <= old_tokens,
1292            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1293        );
1294        std::env::remove_var("LEAN_CTX_META");
1295    }
1296
1297    #[test]
1298    fn test_tdd_symbols_are_compact() {
1299        let symbols = [
1300            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1301        ];
1302        for sym in &symbols {
1303            let tok = count_tokens(sym);
1304            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1305        }
1306    }
1307
1308    #[test]
1309    fn test_task_mode_filters_content() {
1310        let content = (0..200)
1311            .map(|i| {
1312                if i % 20 == 0 {
1313                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1314                } else {
1315                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1316                }
1317            })
1318            .collect::<Vec<_>>()
1319            .join("\n");
1320        let full_tokens = count_tokens(&content);
1321        let task = Some("fix bug in validate_token");
1322        let (result, result_tokens) = process_mode(
1323            &content,
1324            "task",
1325            "F1",
1326            "test.rs",
1327            "rs",
1328            full_tokens,
1329            CrpMode::Off,
1330            "test.rs",
1331            task,
1332        );
1333        assert!(
1334            result_tokens < full_tokens,
1335            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1336        );
1337        assert!(
1338            result.contains("task-filtered"),
1339            "output should contain task-filtered marker"
1340        );
1341    }
1342
1343    #[test]
1344    fn test_task_mode_without_task_returns_full() {
1345        let content = "fn main() {}\nfn helper() {}\n";
1346        let tokens = count_tokens(content);
1347        let (result, _sent) = process_mode(
1348            content,
1349            "task",
1350            "F1",
1351            "test.rs",
1352            "rs",
1353            tokens,
1354            CrpMode::Off,
1355            "test.rs",
1356            None,
1357        );
1358        assert!(
1359            result.contains("no task set"),
1360            "should indicate no task: {result}"
1361        );
1362    }
1363
1364    #[test]
1365    fn test_reference_mode_one_line() {
1366        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1367        let tokens = count_tokens(content);
1368        let (result, _sent) = process_mode(
1369            content,
1370            "reference",
1371            "F1",
1372            "test.rs",
1373            "rs",
1374            tokens,
1375            CrpMode::Off,
1376            "test.rs",
1377            None,
1378        );
1379        let lines: Vec<&str> = result.lines().collect();
1380        assert!(
1381            lines.len() <= 3,
1382            "reference mode should be very compact, got {} lines",
1383            lines.len()
1384        );
1385        assert!(result.contains("lines"), "should contain line count");
1386        assert!(result.contains("tok"), "should contain token count");
1387    }
1388
1389    #[test]
1390    fn cached_lines_mode_invalidates_on_mtime_change() {
1391        let dir = tempfile::tempdir().unwrap();
1392        let path = dir.path().join("file.txt");
1393        let p = path.to_string_lossy().to_string();
1394
1395        std::fs::write(&path, "one\nsecond\n").unwrap();
1396        let mut cache = SessionCache::new();
1397
1398        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1399        let l1: Vec<&str> = r1.content.lines().collect();
1400        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1401        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1402        assert_eq!(got1, "one");
1403
1404        std::thread::sleep(Duration::from_secs(1));
1405        std::fs::write(&path, "two\nsecond\n").unwrap();
1406
1407        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1408        let l2: Vec<&str> = r2.content.lines().collect();
1409        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1410        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1411        assert_eq!(got2, "two");
1412    }
1413
1414    #[test]
1415    #[cfg_attr(tarpaulin, ignore)]
1416    fn benchmark_task_conditioned_compression() {
1417        // Keep this reasonably small so CI coverage instrumentation stays fast.
1418        let content = generate_benchmark_code(200);
1419        let full_tokens = count_tokens(&content);
1420        let task = Some("fix authentication in validate_token");
1421
1422        let (_full_output, full_tok) = process_mode(
1423            &content,
1424            "full",
1425            "F1",
1426            "server.rs",
1427            "rs",
1428            full_tokens,
1429            CrpMode::Off,
1430            "server.rs",
1431            task,
1432        );
1433        let (_task_output, task_tok) = process_mode(
1434            &content,
1435            "task",
1436            "F1",
1437            "server.rs",
1438            "rs",
1439            full_tokens,
1440            CrpMode::Off,
1441            "server.rs",
1442            task,
1443        );
1444        let (_sig_output, sig_tok) = process_mode(
1445            &content,
1446            "signatures",
1447            "F1",
1448            "server.rs",
1449            "rs",
1450            full_tokens,
1451            CrpMode::Off,
1452            "server.rs",
1453            task,
1454        );
1455        let (_ref_output, ref_tok) = process_mode(
1456            &content,
1457            "reference",
1458            "F1",
1459            "server.rs",
1460            "rs",
1461            full_tokens,
1462            CrpMode::Off,
1463            "server.rs",
1464            task,
1465        );
1466
1467        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1468        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1469        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1470        eprintln!(
1471            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1472            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1473        );
1474        eprintln!(
1475            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1476            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1477        );
1478        eprintln!(
1479            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1480            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1481        );
1482        eprintln!("================================================\n");
1483
1484        assert!(task_tok < full_tok, "task mode should save tokens");
1485        assert!(sig_tok < full_tok, "signatures should save tokens");
1486        assert!(ref_tok < sig_tok, "reference should be most compact");
1487    }
1488
1489    fn generate_benchmark_code(lines: usize) -> String {
1490        let mut code = Vec::with_capacity(lines);
1491        code.push("use std::collections::HashMap;".to_string());
1492        code.push("use crate::core::auth;".to_string());
1493        code.push(String::new());
1494        code.push("pub struct Server {".to_string());
1495        code.push("    config: Config,".to_string());
1496        code.push("    cache: HashMap<String, String>,".to_string());
1497        code.push("}".to_string());
1498        code.push(String::new());
1499        code.push("impl Server {".to_string());
1500        code.push(
1501            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1502                .to_string(),
1503        );
1504        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1505        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1506        code.push("            return Err(AuthError::Expired);".to_string());
1507        code.push("        }".to_string());
1508        code.push("        Ok(decoded.claims)".to_string());
1509        code.push("    }".to_string());
1510        code.push(String::new());
1511
1512        let remaining = lines.saturating_sub(code.len());
1513        for i in 0..remaining {
1514            if i % 30 == 0 {
1515                code.push(format!(
1516                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1517                ));
1518            } else if i % 30 == 29 {
1519                code.push("    }".to_string());
1520            } else {
1521                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1522            }
1523        }
1524        code.push("}".to_string());
1525        code.join("\n")
1526    }
1527
1528    #[test]
1529    fn instruction_file_detection() {
1530        assert!(is_instruction_file(
1531            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1532        ));
1533        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1534        assert!(is_instruction_file("/project/AGENTS.md"));
1535        assert!(is_instruction_file("/project/.cursorrules"));
1536        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1537        assert!(is_instruction_file("/skills/some-skill/README.md"));
1538
1539        assert!(!is_instruction_file("/project/src/main.rs"));
1540        assert!(!is_instruction_file("/project/config.json"));
1541        assert!(!is_instruction_file("/project/data/report.csv"));
1542    }
1543
1544    #[test]
1545    fn resolve_auto_mode_returns_full_for_instruction_files() {
1546        let mode = resolve_auto_mode(
1547            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1548            5000,
1549            Some("read"),
1550        );
1551        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1552
1553        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1554        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1555
1556        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1557        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1558    }
1559}