Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39/// Extracts a short proof-line from file content to include in cache-hit stubs.
40/// Returns the first non-empty line (truncated to 60 chars) as evidence the cache is valid.
41/// Only shown after 2+ reads to avoid noise on early interactions.
42fn cache_hit_proof_line(content: &str, read_count: u32) -> Option<String> {
43    if read_count < 2 {
44        return None;
45    }
46    let first_line = content.lines().find(|l| !l.trim().is_empty())?;
47    let trimmed = first_line.trim();
48    if trimmed.len() > 60 {
49        Some(format!("{}...", &trimmed[..57]))
50    } else {
51        Some(trimmed.to_string())
52    }
53}
54
55fn append_compressed_hint(output: &str, file_path: &str) -> String {
56    if !crate::core::profiles::active_profile()
57        .output_hints
58        .compressed_hint()
59    {
60        return output.to_string();
61    }
62    format!(
63        "{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
64    )
65}
66
67/// Reads a file as UTF-8 with lossy fallback, enforcing binary detection and max read size limit.
68/// Defense-in-depth: verifies that the canonical path stays within the process's project root
69/// (if determinable) even though callers SHOULD have already jail-checked the path.
70pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
71    if crate::core::binary_detect::is_binary_file(path) {
72        let msg = crate::core::binary_detect::binary_file_message(path);
73        return Err(std::io::Error::other(msg));
74    }
75
76    {
77        let canonical =
78            crate::core::pathutil::safe_canonicalize_bounded(std::path::Path::new(path), 2000);
79        if let Ok(cwd) = std::env::current_dir() {
80            let root = crate::core::pathutil::safe_canonicalize_bounded(&cwd, 2000);
81            if !canonical.starts_with(&root) {
82                let allow = crate::core::pathjail::allow_paths_from_env_and_config();
83                let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
84                    .ok()
85                    .is_some_and(|d| canonical.starts_with(d));
86                let tmp_ok = canonical.starts_with(std::env::temp_dir());
87                if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
88                    tracing::warn!(
89                        "defense-in-depth: path may escape project root: {}",
90                        canonical.display()
91                    );
92                }
93            }
94        }
95    }
96
97    let cap = crate::core::limits::max_read_bytes();
98
99    let file = open_with_retry(path)?;
100    let meta = file
101        .metadata()
102        .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
103    if meta.len() > cap as u64 {
104        return Err(std::io::Error::other(format!(
105            "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
106             Increase the limit or use a line-range read: mode=\"lines:1-100\"",
107            meta.len(),
108            cap
109        )));
110    }
111
112    use std::io::Read;
113    let mut bytes = Vec::with_capacity(meta.len() as usize);
114    std::io::BufReader::new(file).read_to_end(&mut bytes)?;
115    match String::from_utf8(bytes) {
116        Ok(s) => Ok(s),
117        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
118    }
119}
120
121/// Opens a file, retrying once after a brief pause on NotFound.
122/// Works around overlay/FUSE stat-cache races in container runtimes (Docker, Codex).
123/// Uses O_NOFOLLOW on Unix for TOCTOU symlink protection.
124fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
125    match open_nofollow(path) {
126        Ok(f) => Ok(f),
127        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
128            std::thread::sleep(std::time::Duration::from_millis(50));
129            open_nofollow(path)
130        }
131        Err(e) => Err(e),
132    }
133}
134
135#[cfg(unix)]
136fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
137    use std::os::unix::fs::OpenOptionsExt;
138    use std::path::Path;
139
140    let p = Path::new(path);
141    // Canonicalize the parent directory (resolving symlinks in the directory path)
142    // but apply O_NOFOLLOW only to the final file component. This prevents
143    // symlink-following attacks on the target file while allowing legitimate
144    // directory symlinks (e.g., /tmp → /private/tmp on macOS).
145    if let (Some(parent), Some(filename)) = (p.parent(), p.file_name()) {
146        if parent.exists() {
147            let canonical_parent = crate::core::pathutil::safe_canonicalize_bounded(parent, 2000);
148            let canonical_path = canonical_parent.join(filename);
149            return std::fs::OpenOptions::new()
150                .read(true)
151                .custom_flags(libc::O_NOFOLLOW)
152                .open(&canonical_path);
153        }
154    }
155
156    // Fallback: direct open with O_NOFOLLOW
157    std::fs::OpenOptions::new()
158        .read(true)
159        .custom_flags(libc::O_NOFOLLOW)
160        .open(path)
161}
162
163#[cfg(not(unix))]
164fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
165    std::fs::File::open(path)
166}
167
168/// Reads a file through the cache and applies the requested compression mode.
169pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
170    handle_with_options(cache, path, mode, false, crp_mode, None)
171}
172
173/// Like `handle`, but invalidates the cache first to force a fresh disk read.
174pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
175    handle_with_options(cache, path, mode, true, crp_mode, None)
176}
177
178/// Reads a file with task-aware filtering to prioritize task-relevant content.
179pub fn handle_with_task(
180    cache: &mut SessionCache,
181    path: &str,
182    mode: &str,
183    crp_mode: CrpMode,
184    task: Option<&str>,
185) -> String {
186    handle_with_options(cache, path, mode, false, crp_mode, task)
187}
188
189/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
190pub fn handle_with_task_resolved(
191    cache: &mut SessionCache,
192    path: &str,
193    mode: &str,
194    crp_mode: CrpMode,
195    task: Option<&str>,
196) -> ReadOutput {
197    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
198}
199
200/// Fresh read with task-aware filtering (invalidates cache first).
201pub fn handle_fresh_with_task(
202    cache: &mut SessionCache,
203    path: &str,
204    mode: &str,
205    crp_mode: CrpMode,
206    task: Option<&str>,
207) -> String {
208    handle_with_options(cache, path, mode, true, crp_mode, task)
209}
210
211/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
212pub fn handle_fresh_with_task_resolved(
213    cache: &mut SessionCache,
214    path: &str,
215    mode: &str,
216    crp_mode: CrpMode,
217    task: Option<&str>,
218) -> ReadOutput {
219    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
220}
221
222fn handle_with_options(
223    cache: &mut SessionCache,
224    path: &str,
225    mode: &str,
226    fresh: bool,
227    crp_mode: CrpMode,
228    task: Option<&str>,
229) -> String {
230    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
231}
232
233/// Detects if the current execution context is a subagent (forked agent).
234/// Subagents inherit stale parent caches, so force-fresh prevents VERIFY FAIL.
235fn is_subagent_context() -> bool {
236    static IS_SUBAGENT: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
237    *IS_SUBAGENT.get_or_init(|| {
238        if std::env::var("LEAN_CTX_FORCE_FRESH").is_ok_and(|v| v == "1" || v == "true") {
239            return true;
240        }
241        std::env::var("CURSOR_TASK_ID").is_ok_and(|v| !v.is_empty())
242    })
243}
244
245fn handle_with_options_resolved(
246    cache: &mut SessionCache,
247    path: &str,
248    mode: &str,
249    fresh: bool,
250    crp_mode: CrpMode,
251    task: Option<&str>,
252) -> ReadOutput {
253    let effective_fresh = fresh || is_subagent_context();
254
255    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
256        bt.next_seq();
257    }
258    let mut result = handle_with_options_inner(cache, path, mode, effective_fresh, crp_mode, task);
259
260    if let Some(entry) = cache.get_mut(path) {
261        entry.last_mode.clone_from(&result.resolved_mode);
262    }
263
264    let dedup_allowed = matches!(
265        result.resolved_mode.as_str(),
266        "map" | "signatures" | "aggressive" | "entropy" | "task"
267    );
268    if dedup_allowed {
269        if let Some(deduped) = cache.apply_dedup(path, &result.content) {
270            let new_tokens = count_tokens(&deduped);
271            if new_tokens < result.output_tokens {
272                result.content = deduped;
273                result.output_tokens = new_tokens;
274            }
275        }
276    }
277
278    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
279        let original_tokens = cache.get(path).map_or(0, |e| e.original_tokens);
280        bt.record_read(
281            path,
282            &result.resolved_mode,
283            result.output_tokens,
284            original_tokens,
285        );
286    }
287
288    result
289}
290
291fn handle_with_options_inner(
292    cache: &mut SessionCache,
293    path: &str,
294    mode: &str,
295    fresh: bool,
296    crp_mode: CrpMode,
297    task: Option<&str>,
298) -> ReadOutput {
299    let file_ref = cache.get_file_ref(path);
300    let short = protocol::shorten_path(path);
301    let ext = Path::new(path)
302        .extension()
303        .and_then(|e| e.to_str())
304        .unwrap_or("");
305
306    if fresh {
307        if mode == "diff" {
308            let warning = "[warning] fresh+diff is redundant — fresh invalidates cache, no diff possible. Use mode=full with fresh=true instead.";
309            return ReadOutput {
310                content: warning.to_string(),
311                resolved_mode: "diff".into(),
312                output_tokens: count_tokens(warning),
313            };
314        }
315        cache.invalidate(path);
316    }
317
318    if mode == "diff" {
319        let (out, _) = handle_diff(cache, path, &file_ref);
320        let out = crate::core::redaction::redact_text_if_enabled(&out);
321        let sent = count_tokens(&out);
322        return ReadOutput {
323            content: out,
324            resolved_mode: "diff".into(),
325            output_tokens: sent,
326        };
327    }
328
329    if mode != "full" {
330        if let Some(existing) = cache.get(path) {
331            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
332            if stale {
333                cache.invalidate(path);
334            }
335        }
336    }
337
338    // Extract immutable data from cache entry, then drop the borrow before
339    // any mutable operations (record_cache_hit, set_compressed, invalidate).
340    let cache_snapshot = cache.get(path).map(|existing| {
341        (
342            existing.stored_mtime,
343            existing.read_count,
344            existing.line_count,
345            existing.original_tokens,
346            existing.content(),
347        )
348    });
349
350    if let Some((cached_mtime, read_count, line_count, original_tokens, content_opt)) =
351        cache_snapshot
352    {
353        if mode == "full" {
354            // Fast mtime check: if file unchanged on disk AND full content was previously
355            // delivered, return a minimal stub. After host compaction, delivery flags are
356            // reset so the agent gets full content again automatically.
357            // "safe" policy never returns stubs — always delivers content.
358            let policy_allows_stub =
359                crate::server::compaction_sync::effective_cache_policy() != "safe";
360            if policy_allows_stub
361                && !crate::core::cache::is_cache_entry_stale(path, cached_mtime)
362                && cache.is_full_delivered(path)
363            {
364                cache.record_cache_hit(path);
365                let out = if crate::core::protocol::meta_visible() {
366                    format!(
367                        "{file_ref}={short} [unchanged, {line_count}L, use cached context]\nFile unchanged on disk (same hash). If you haven't seen this content, use fresh=true to force re-read.",
368                        )
369                } else {
370                    let proof = content_opt
371                        .as_deref()
372                        .and_then(|c| cache_hit_proof_line(c, read_count));
373                    let reads_note = if read_count > 3 {
374                        format!(" (read {}x, unchanged)", read_count + 1)
375                    } else {
376                        String::new()
377                    };
378                    match proof {
379                        Some(p) => format!(
380                            "{file_ref}={short} [unchanged, {line_count}L, use cached context{reads_note} | first: \"{p}\"]"
381                        ),
382                        None => format!(
383                            "{file_ref}={short} [unchanged, {line_count}L, use cached context{reads_note}]"
384                        ),
385                    }
386                };
387                let out = crate::core::redaction::redact_text_if_enabled(&out);
388                let sent = count_tokens(&out);
389                return ReadOutput {
390                    content: out,
391                    resolved_mode: "full".into(),
392                    output_tokens: sent,
393                };
394            }
395            let (out, _) = handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
396            let out = crate::core::redaction::redact_text_if_enabled(&out);
397            let sent = count_tokens(&out);
398            return ReadOutput {
399                content: out,
400                resolved_mode: "full".into(),
401                output_tokens: sent,
402            };
403        }
404
405        // Resolve mode first so we can check compressed output cache BEFORE
406        // decompressing the full content (avoids ~2-5ms zstd overhead on hits).
407        let resolved_mode = if mode == "auto" {
408            resolve_auto_mode(path, original_tokens, task)
409        } else {
410            mode.to_string()
411        };
412
413        if is_cacheable_mode(&resolved_mode) {
414            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
415            let compressed_hit = cache.get_compressed(path, &cache_key).cloned();
416            if let Some(cached_output) = compressed_hit {
417                cache.record_cache_hit(path);
418                let out = crate::core::redaction::redact_text_if_enabled(&cached_output);
419                let sent = count_tokens(&out);
420                return ReadOutput {
421                    content: out,
422                    resolved_mode,
423                    output_tokens: sent,
424                };
425            }
426        }
427
428        if let Some(content) = content_opt {
429            let (out, _) = process_mode(
430                &content,
431                &resolved_mode,
432                &file_ref,
433                &short,
434                ext,
435                original_tokens,
436                crp_mode,
437                path,
438                task,
439            );
440            if is_cacheable_mode(&resolved_mode) {
441                let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
442                cache.set_compressed(path, &cache_key, out.clone());
443            }
444            let out = crate::core::redaction::redact_text_if_enabled(&out);
445            let sent = count_tokens(&out);
446            return ReadOutput {
447                content: out,
448                resolved_mode,
449                output_tokens: sent,
450            };
451        }
452        cache.invalidate(path);
453    }
454
455    let content = match read_file_lossy(path) {
456        Ok(c) => c,
457        Err(e) => {
458            let msg = format!("ERROR: {e}");
459            let tokens = count_tokens(&msg);
460            return ReadOutput {
461                content: msg,
462                resolved_mode: "error".into(),
463                output_tokens: tokens,
464            };
465        }
466    };
467
468    let store_result = cache.store(path, &content);
469
470    // Skip expensive hint computation for line-range reads and first reads.
471    // Hints are only useful from the 2nd read onwards when the file is contextually relevant.
472    let is_line_range = mode.starts_with("lines:");
473    let hints = crate::core::profiles::active_profile().output_hints;
474    let is_repeat_read = store_result.read_count > 1;
475    let similar_hint = if !is_line_range && is_repeat_read && hints.semantic_hint() {
476        find_similar_and_update_semantic_index(path, &content)
477    } else {
478        None
479    };
480    let graph_hint = if !is_line_range && is_repeat_read && hints.related_hint() {
481        build_graph_related_hint(path)
482    } else {
483        None
484    };
485
486    if mode == "full" {
487        cache.mark_full_delivered(path);
488        let (mut output, _) = format_full_output(
489            &file_ref,
490            &short,
491            ext,
492            &content,
493            store_result.original_tokens,
494            store_result.line_count,
495            task,
496        );
497        if let Some(hint) = &graph_hint {
498            output.push_str(&format!("\n{hint}"));
499        }
500        if let Some(hint) = similar_hint {
501            output.push_str(&format!("\n{hint}"));
502        }
503        let output = crate::core::redaction::redact_text_if_enabled(&output);
504        let sent = count_tokens(&output);
505        return ReadOutput {
506            content: output,
507            resolved_mode: "full".into(),
508            output_tokens: sent,
509        };
510    }
511
512    let resolved_mode = if mode == "auto" {
513        resolve_auto_mode(path, store_result.original_tokens, task)
514    } else {
515        mode.to_string()
516    };
517
518    let (mut output, _sent) = process_mode(
519        &content,
520        &resolved_mode,
521        &file_ref,
522        &short,
523        ext,
524        store_result.original_tokens,
525        crp_mode,
526        path,
527        task,
528    );
529    if let Some(hint) = &graph_hint {
530        output.push_str(&format!("\n{hint}"));
531    }
532    if let Some(hint) = similar_hint {
533        output.push_str(&format!("\n{hint}"));
534    }
535    if is_cacheable_mode(&resolved_mode) {
536        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
537        cache.set_compressed(path, &cache_key, output.clone());
538    }
539    let output = crate::core::redaction::redact_text_if_enabled(&output);
540    let final_tokens = count_tokens(&output);
541    ReadOutput {
542        content: output,
543        resolved_mode,
544        output_tokens: final_tokens,
545    }
546}
547
548pub fn is_instruction_file(path: &str) -> bool {
549    let lower = path.to_lowercase();
550    let filename = std::path::Path::new(&lower)
551        .file_name()
552        .and_then(|f| f.to_str())
553        .unwrap_or("");
554
555    matches!(
556        filename,
557        "skill.md"
558            | "agents.md"
559            | "rules.md"
560            | ".cursorrules"
561            | ".clinerules"
562            | "lean-ctx.md"
563            | "lean-ctx.mdc"
564    ) || lower.contains("/skills/")
565        || lower.contains("/.cursor/rules/")
566        || lower.contains("/.claude/rules/")
567        || lower.contains("/agents.md")
568}
569
570fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
571    if is_instruction_file(file_path) {
572        return "full".to_string();
573    }
574
575    if let Ok(bt) = crate::core::bounce_tracker::global().lock() {
576        if bt.should_force_full(file_path) {
577            return "full".to_string();
578        }
579    }
580
581    let intent_query = task.unwrap_or("read");
582    let route = crate::core::intent_router::route_v1(intent_query);
583    let intent_mode = &route.decision.effective_read_mode;
584    if intent_mode != "auto" && intent_mode != "reference" {
585        return intent_mode.clone();
586    }
587
588    // Priority 2: FileSignature-based predictor
589    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
590    let predictor = crate::core::mode_predictor::ModePredictor::new();
591    let mut predicted = predictor
592        .predict_best_mode(&sig)
593        .unwrap_or_else(|| "full".to_string());
594    if predicted == "auto" {
595        predicted = "full".to_string();
596    }
597
598    // Priority 3: Bandit exploration when budget is tight
599    // SAFETY: Bandit NEVER overrides "full" — full is sacred (byte-accurate content needed for edits)
600    if predicted != "full" {
601        if let Some(project_root) =
602            crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
603        {
604            let ext = std::path::Path::new(file_path)
605                .extension()
606                .and_then(|e| e.to_str())
607                .unwrap_or("");
608            let bucket = match original_tokens {
609                0..=2000 => "sm",
610                2001..=10000 => "md",
611                10001..=50000 => "lg",
612                _ => "xl",
613            };
614            let bandit_key = format!("{ext}_{bucket}");
615            let mut store = crate::core::bandit::BanditStore::load(&project_root);
616            let bandit = store.get_or_create(&bandit_key);
617            let arm = bandit.select_arm();
618            if arm.budget_ratio < 0.25 && original_tokens > 2000 {
619                predicted = "aggressive".to_string();
620            }
621        }
622    }
623
624    // Priority 4: Adaptive mode policy
625    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
626    let chosen = policy.choose_auto_mode(task, &predicted);
627
628    if original_tokens > 2000 {
629        if predicted == "map" || predicted == "signatures" {
630            if chosen != "map" && chosen != "signatures" {
631                return predicted;
632            }
633        } else if chosen == "full" && predicted != "full" {
634            return predicted;
635        }
636    }
637
638    chosen
639}
640
641fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
642    const MAX_CONTENT_BYTES_FOR_SEMANTIC: usize = 32_768;
643
644    if content.len() > MAX_CONTENT_BYTES_FOR_SEMANTIC {
645        return None;
646    }
647
648    let cfg = crate::core::config::Config::load();
649    let profile = crate::core::config::MemoryProfile::effective(&cfg);
650    if !profile.semantic_cache_enabled() {
651        return None;
652    }
653
654    let project_root = detect_project_root(path);
655    let session_id = format!("{}", std::process::id());
656    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
657
658    let similar = index.find_similar(content, 0.7);
659    let relevant: Vec<_> = similar
660        .into_iter()
661        .filter(|(p, _)| p != path)
662        .take(3)
663        .collect();
664
665    index.add_file(path, content, &session_id);
666    let _ = index.save(&project_root);
667
668    if relevant.is_empty() {
669        return None;
670    }
671
672    let hints: Vec<String> = relevant
673        .iter()
674        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
675        .collect();
676
677    Some(format!(
678        "[semantic: {} similar file(s) in cache]\n{}",
679        relevant.len(),
680        hints.join("\n")
681    ))
682}
683
684fn detect_project_root(path: &str) -> String {
685    crate::core::protocol::detect_project_root_or_cwd(path)
686}
687
688fn build_graph_related_hint(path: &str) -> Option<String> {
689    let project_root = detect_project_root(path);
690    crate::core::graph_context::build_related_hint(path, &project_root, 5)
691}
692
693const AUTO_DELTA_THRESHOLD: f64 = 0.6;
694
695/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
696fn handle_full_with_auto_delta(
697    cache: &mut SessionCache,
698    path: &str,
699    file_ref: &str,
700    short: &str,
701    ext: &str,
702    task: Option<&str>,
703) -> (String, usize) {
704    let Ok(disk_content) = read_file_lossy(path) else {
705        cache.record_cache_hit(path);
706        if let Some(existing) = cache.get(path) {
707            if !crate::core::protocol::meta_visible() {
708                if let Some(cached) = existing.content() {
709                    return format_full_output(
710                        file_ref,
711                        short,
712                        ext,
713                        &cached,
714                        existing.original_tokens,
715                        existing.line_count,
716                        task,
717                    );
718                }
719            }
720            let out = format!(
721                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
722                existing.read_count, existing.line_count
723            );
724            let sent = count_tokens(&out);
725            return (out, sent);
726        }
727        let out = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
728            format!("[file read failed and no cached version available] {file_ref}={short}")
729        } else {
730            format!("[file read failed and no cached version available] {short}")
731        };
732        let sent = count_tokens(&out);
733        return (out, sent);
734    };
735
736    let old_content = cache
737        .get(path)
738        .and_then(crate::core::cache::CacheEntry::content)
739        .unwrap_or_default();
740    let store_result = cache.store(path, &disk_content);
741
742    if store_result.was_hit {
743        let policy_allows_stub = crate::server::compaction_sync::effective_cache_policy() != "safe";
744        if policy_allows_stub && store_result.full_content_delivered {
745            let out = if crate::core::protocol::meta_visible() {
746                format!(
747                    "{file_ref}={short} [unchanged, {}L, use cached context]\nFile unchanged on disk (same hash). If you haven't seen this content, use fresh=true to force re-read.",
748                    store_result.line_count
749                )
750            } else {
751                let proof = cache_hit_proof_line(&disk_content, store_result.read_count);
752                let reads_note = if store_result.read_count > 3 {
753                    format!(" (read {}x, unchanged)", store_result.read_count)
754                } else {
755                    String::new()
756                };
757                match proof {
758                    Some(p) => format!(
759                        "{file_ref}={short} [unchanged, {}L, use cached context{reads_note} | first: \"{p}\"]",
760                        store_result.line_count
761                    ),
762                    None => format!(
763                        "{file_ref}={short} [unchanged, {}L, use cached context{reads_note}]",
764                        store_result.line_count
765                    ),
766                }
767            };
768            let sent = count_tokens(&out);
769            return (out, sent);
770        }
771        cache.mark_full_delivered(path);
772        return format_full_output(
773            file_ref,
774            short,
775            ext,
776            &disk_content,
777            store_result.original_tokens,
778            store_result.line_count,
779            task,
780        );
781    }
782
783    let diff = compressor::diff_content(&old_content, &disk_content);
784    let diff_tokens = count_tokens(&diff);
785    let full_tokens = store_result.original_tokens;
786
787    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
788        let savings = protocol::format_savings(full_tokens, diff_tokens);
789        let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
790            format!("{file_ref}={short}")
791        } else {
792            short.to_string()
793        };
794        let out = format!(
795            "{head} [auto-delta] ∆{}L\n{diff}\n{savings}",
796            disk_content.lines().count()
797        );
798        return (out, diff_tokens);
799    }
800
801    format_full_output(
802        file_ref,
803        short,
804        ext,
805        &disk_content,
806        store_result.original_tokens,
807        store_result.line_count,
808        task,
809    )
810}
811
812fn format_full_output(
813    file_ref: &str,
814    short: &str,
815    ext: &str,
816    content: &str,
817    original_tokens: usize,
818    line_count: usize,
819    _task: Option<&str>,
820) -> (String, usize) {
821    let tokens = original_tokens;
822    let metadata = build_header(file_ref, short, ext, content, line_count, true);
823
824    let output = format!("{metadata}\n{content}");
825    let sent = count_tokens(&output);
826    (protocol::append_savings(&output, tokens, sent), sent)
827}
828
829fn build_header(
830    file_ref: &str,
831    short: &str,
832    ext: &str,
833    content: &str,
834    line_count: usize,
835    include_deps: bool,
836) -> String {
837    let mut header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
838        format!("{file_ref}={short} {line_count}L")
839    } else {
840        format!("{short} {line_count}L")
841    };
842
843    if include_deps {
844        let dep_info = deps::extract_deps(content, ext);
845        if !dep_info.imports.is_empty() {
846            let imports_str: Vec<&str> = dep_info
847                .imports
848                .iter()
849                .take(8)
850                .map(std::string::String::as_str)
851                .collect();
852            header.push_str(&format!("\n deps {}", imports_str.join(",")));
853        }
854        if !dep_info.exports.is_empty() {
855            let exports_str: Vec<&str> = dep_info
856                .exports
857                .iter()
858                .take(8)
859                .map(std::string::String::as_str)
860                .collect();
861            header.push_str(&format!("\n exports {}", exports_str.join(",")));
862        }
863    }
864
865    header
866}
867
868#[allow(clippy::too_many_arguments)]
869fn process_mode(
870    content: &str,
871    mode: &str,
872    file_ref: &str,
873    short: &str,
874    ext: &str,
875    original_tokens: usize,
876    crp_mode: CrpMode,
877    file_path: &str,
878    task: Option<&str>,
879) -> (String, usize) {
880    let line_count = content.lines().count();
881
882    match mode {
883        "auto" => {
884            let chosen = resolve_auto_mode(file_path, original_tokens, task);
885            process_mode(
886                content,
887                &chosen,
888                file_ref,
889                short,
890                ext,
891                original_tokens,
892                crp_mode,
893                file_path,
894                task,
895            )
896        }
897        "full" => format_full_output(
898            file_ref,
899            short,
900            ext,
901            content,
902            original_tokens,
903            line_count,
904            task,
905        ),
906        "signatures" => {
907            let sigs = signatures::extract_signatures(content, ext);
908            let dep_info = deps::extract_deps(content, ext);
909
910            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
911                format!("{file_ref}={short} {line_count}L")
912            } else {
913                format!("{short} {line_count}L")
914            };
915            if !dep_info.imports.is_empty() {
916                let imports_str: Vec<&str> = dep_info
917                    .imports
918                    .iter()
919                    .take(8)
920                    .map(std::string::String::as_str)
921                    .collect();
922                output.push_str(&format!("\n deps {}", imports_str.join(",")));
923            }
924            for sig in &sigs {
925                output.push('\n');
926                if crp_mode.is_tdd() {
927                    output.push_str(&sig.to_tdd());
928                } else {
929                    output.push_str(&sig.to_compact());
930                }
931            }
932            let sent = count_tokens(&output);
933            (
934                append_compressed_hint(
935                    &protocol::append_savings(&output, original_tokens, sent),
936                    file_path,
937                ),
938                sent,
939            )
940        }
941        "map" => {
942            if ext == "php" {
943                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
944                {
945                    let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
946                        format!("{file_ref}={short} {line_count}L\n{php_map}")
947                    } else {
948                        format!("{short} {line_count}L\n{php_map}")
949                    };
950                    let sent = count_tokens(&output);
951                    let output = protocol::append_savings(&output, original_tokens, sent);
952                    return (append_compressed_hint(&output, file_path), sent);
953                }
954            }
955
956            let sigs = signatures::extract_signatures(content, ext);
957            let dep_info = deps::extract_deps(content, ext);
958
959            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
960                format!("{file_ref}={short} {line_count}L")
961            } else {
962                format!("{short} {line_count}L")
963            };
964
965            if !dep_info.imports.is_empty() {
966                output.push_str("\n  deps: ");
967                output.push_str(&dep_info.imports.join(", "));
968            }
969
970            if !dep_info.exports.is_empty() {
971                output.push_str("\n  exports: ");
972                output.push_str(&dep_info.exports.join(", "));
973            }
974
975            let key_sigs: Vec<&signatures::Signature> = sigs
976                .iter()
977                .filter(|s| s.is_exported || s.indent == 0)
978                .collect();
979
980            if !key_sigs.is_empty() {
981                output.push_str("\n  API:");
982                for sig in &key_sigs {
983                    output.push_str("\n    ");
984                    if crp_mode.is_tdd() {
985                        output.push_str(&sig.to_tdd());
986                    } else {
987                        output.push_str(&sig.to_compact());
988                    }
989                }
990            }
991
992            let sent = count_tokens(&output);
993            (
994                append_compressed_hint(
995                    &protocol::append_savings(&output, original_tokens, sent),
996                    file_path,
997                ),
998                sent,
999            )
1000        }
1001        "aggressive" => {
1002            #[cfg(feature = "tree-sitter")]
1003            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
1004            #[cfg(not(feature = "tree-sitter"))]
1005            let ast_pruned: Option<String> = None;
1006
1007            let base = ast_pruned.as_deref().unwrap_or(content);
1008
1009            let session_intent = crate::core::session::SessionState::load_latest()
1010                .and_then(|s| s.active_structured_intent);
1011            let raw = if let Some(ref intent) = session_intent {
1012                compressor::task_aware_compress(base, Some(ext), intent)
1013            } else {
1014                compressor::aggressive_compress(base, Some(ext))
1015            };
1016            let compressed = compressor::safeguard_ratio(content, &raw);
1017            let header = build_header(file_ref, short, ext, content, line_count, true);
1018
1019            let mut sym = SymbolMap::new();
1020            let idents = symbol_map::extract_identifiers(&compressed, ext);
1021            for ident in &idents {
1022                sym.register(ident);
1023            }
1024
1025            if sym.len() >= 3 {
1026                let sym_table = sym.format_table();
1027                let sym_applied = sym.apply(&compressed);
1028                let orig_tok = count_tokens(&compressed);
1029                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
1030                let net = orig_tok.saturating_sub(comp_tok);
1031                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
1032                    let savings = protocol::format_savings(original_tokens, comp_tok);
1033                    return (
1034                        append_compressed_hint(
1035                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
1036                            file_path,
1037                        ),
1038                        comp_tok,
1039                    );
1040                }
1041                let savings = protocol::format_savings(original_tokens, orig_tok);
1042                return (
1043                    append_compressed_hint(
1044                        &format!("{header}\n{compressed}\n{savings}"),
1045                        file_path,
1046                    ),
1047                    orig_tok,
1048                );
1049            }
1050
1051            let sent = count_tokens(&compressed);
1052            let savings = protocol::format_savings(original_tokens, sent);
1053            (
1054                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
1055                sent,
1056            )
1057        }
1058        "entropy" => {
1059            let result = entropy::entropy_compress_adaptive(content, file_path);
1060            let avg_h = entropy::analyze_entropy(content).avg_entropy;
1061            let header = build_header(file_ref, short, ext, content, line_count, false);
1062            let techs = result.techniques.join(", ");
1063            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
1064            let sent = count_tokens(&output);
1065            let savings = protocol::format_savings(original_tokens, sent);
1066            let compression_ratio = if original_tokens > 0 {
1067                1.0 - (sent as f64 / original_tokens as f64)
1068            } else {
1069                0.0
1070            };
1071            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
1072            (
1073                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
1074                sent,
1075            )
1076        }
1077        "task" => {
1078            let task_str = task.unwrap_or("");
1079            if task_str.is_empty() {
1080                let header = build_header(file_ref, short, ext, content, line_count, true);
1081                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
1082                let sent = count_tokens(&out);
1083                return (out, sent);
1084            }
1085            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
1086            if keywords.is_empty() {
1087                let header = build_header(file_ref, short, ext, content, line_count, true);
1088                let out = format!(
1089                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
1090                );
1091                let sent = count_tokens(&out);
1092                return (out, sent);
1093            }
1094            let filtered =
1095                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
1096            let filtered_lines = filtered.lines().count();
1097            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1098                format!("{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1099            } else {
1100                format!("{short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1101            };
1102            let graph_ctx = if crate::core::profiles::active_profile()
1103                .output_hints
1104                .graph_context_block()
1105            {
1106                let project_root = detect_project_root(file_path);
1107                crate::core::graph_context::build_graph_context(
1108                    file_path,
1109                    &project_root,
1110                    Some(crate::core::graph_context::GraphContextOptions::default()),
1111                )
1112                .map(|c| crate::core::graph_context::format_graph_context(&c))
1113                .unwrap_or_default()
1114            } else {
1115                String::new()
1116            };
1117
1118            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
1119            let savings = protocol::format_savings(original_tokens, sent);
1120            (
1121                append_compressed_hint(
1122                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
1123                    file_path,
1124                ),
1125                sent,
1126            )
1127        }
1128        "reference" => {
1129            let tok = count_tokens(content);
1130            let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1131                format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})")
1132            } else {
1133                format!("{short}: {line_count} lines, {tok} tok ({ext})")
1134            };
1135            let sent = count_tokens(&output);
1136            let savings = protocol::format_savings(original_tokens, sent);
1137            (format!("{output}\n{savings}"), sent)
1138        }
1139        mode if mode.starts_with("lines:") => {
1140            let range_str = &mode[6..];
1141            let extracted = extract_line_range(content, range_str);
1142            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1143                format!("{file_ref}={short} {line_count}L lines:{range_str}")
1144            } else {
1145                format!("{short} {line_count}L lines:{range_str}")
1146            };
1147            let sent = count_tokens(&extracted);
1148            let savings = protocol::format_savings(original_tokens, sent);
1149            (format!("{header}\n{extracted}\n{savings}"), sent)
1150        }
1151        unknown => {
1152            let header = build_header(file_ref, short, ext, content, line_count, true);
1153            let out = format!(
1154                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
1155            );
1156            let sent = count_tokens(&out);
1157            (out, sent)
1158        }
1159    }
1160}
1161
1162fn extract_line_range(content: &str, range_str: &str) -> String {
1163    let lines: Vec<&str> = content.lines().collect();
1164    let total = lines.len();
1165    let mut selected = Vec::new();
1166
1167    for part in range_str.split(',') {
1168        let part = part.trim();
1169        if let Some((start_s, end_s)) = part.split_once('-') {
1170            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
1171            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
1172            for i in start..=end {
1173                if i >= 1 && i <= total {
1174                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
1175                }
1176            }
1177        } else if let Ok(n) = part.parse::<usize>() {
1178            if n >= 1 && n <= total {
1179                selected.push(format!("{n:>4}| {}", lines[n - 1]));
1180            }
1181        }
1182    }
1183
1184    if selected.is_empty() {
1185        "No lines matched the range.".to_string()
1186    } else {
1187        selected.join("\n")
1188    }
1189}
1190
1191fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
1192    let short = protocol::shorten_path(path);
1193    let old_content = cache
1194        .get(path)
1195        .and_then(crate::core::cache::CacheEntry::content);
1196
1197    let new_content = match read_file_lossy(path) {
1198        Ok(c) => c,
1199        Err(e) => {
1200            let msg = format!("ERROR: {e}");
1201            let tokens = count_tokens(&msg);
1202            return (msg, tokens);
1203        }
1204    };
1205
1206    let original_tokens = count_tokens(&new_content);
1207
1208    let diff_output = if let Some(old) = &old_content {
1209        compressor::diff_content(old, &new_content)
1210    } else {
1211        // No previous version cached — store content for future diffs but
1212        // return a short guidance message instead of dumping the full file.
1213        cache.store(path, &new_content);
1214        let msg = format!(
1215            "{file_ref}={short} [no cached version for diff — use mode=full first, then diff on re-read]"
1216        );
1217        let sent = count_tokens(&msg);
1218        return (msg, sent);
1219    };
1220
1221    cache.store(path, &new_content);
1222
1223    let sent = count_tokens(&diff_output);
1224    let savings = protocol::format_savings(original_tokens, sent);
1225    let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1226        format!("{file_ref}={short}")
1227    } else {
1228        short.clone()
1229    };
1230    (format!("{head} [diff]\n{diff_output}\n{savings}"), sent)
1231}
1232
1233#[cfg(test)]
1234mod tests {
1235    use super::*;
1236    use std::time::Duration;
1237
1238    #[test]
1239    fn test_header_toon_format_no_brackets() {
1240        let _lock = crate::core::data_dir::test_env_lock();
1241        std::env::set_var("LEAN_CTX_META", "1");
1242        let content = "use std::io;\nfn main() {}\n";
1243        let header = build_header("F1", "main.rs", "rs", content, 2, false);
1244        assert!(!header.contains('['));
1245        assert!(!header.contains(']'));
1246        assert!(header.contains("F1=main.rs 2L"));
1247        std::env::remove_var("LEAN_CTX_META");
1248    }
1249
1250    #[test]
1251    fn test_header_toon_deps_indented() {
1252        let _lock = crate::core::data_dir::test_env_lock();
1253        std::env::set_var("LEAN_CTX_META", "1");
1254        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
1255        let header = build_header("F1", "main.rs", "rs", content, 3, true);
1256        if header.contains("deps") {
1257            assert!(
1258                header.contains("\n deps "),
1259                "deps should use indented TOON format"
1260            );
1261            assert!(
1262                !header.contains("deps:["),
1263                "deps should not use bracket format"
1264            );
1265        }
1266        std::env::remove_var("LEAN_CTX_META");
1267    }
1268
1269    #[test]
1270    fn test_header_toon_saves_tokens() {
1271        let _lock = crate::core::data_dir::test_env_lock();
1272        std::env::set_var("LEAN_CTX_META", "1");
1273        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1274        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1275        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1276        let old_tokens = count_tokens(&old_header);
1277        let new_tokens = count_tokens(&new_header);
1278        assert!(
1279            new_tokens <= old_tokens,
1280            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1281        );
1282        std::env::remove_var("LEAN_CTX_META");
1283    }
1284
1285    #[test]
1286    fn test_tdd_symbols_are_compact() {
1287        let symbols = [
1288            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1289        ];
1290        for sym in &symbols {
1291            let tok = count_tokens(sym);
1292            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1293        }
1294    }
1295
1296    #[test]
1297    fn test_task_mode_filters_content() {
1298        let content = (0..200)
1299            .map(|i| {
1300                if i % 20 == 0 {
1301                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1302                } else {
1303                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1304                }
1305            })
1306            .collect::<Vec<_>>()
1307            .join("\n");
1308        let full_tokens = count_tokens(&content);
1309        let task = Some("fix bug in validate_token");
1310        let (result, result_tokens) = process_mode(
1311            &content,
1312            "task",
1313            "F1",
1314            "test.rs",
1315            "rs",
1316            full_tokens,
1317            CrpMode::Off,
1318            "test.rs",
1319            task,
1320        );
1321        assert!(
1322            result_tokens < full_tokens,
1323            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1324        );
1325        assert!(
1326            result.contains("task-filtered"),
1327            "output should contain task-filtered marker"
1328        );
1329    }
1330
1331    #[test]
1332    fn test_task_mode_without_task_returns_full() {
1333        let content = "fn main() {}\nfn helper() {}\n";
1334        let tokens = count_tokens(content);
1335        let (result, _sent) = process_mode(
1336            content,
1337            "task",
1338            "F1",
1339            "test.rs",
1340            "rs",
1341            tokens,
1342            CrpMode::Off,
1343            "test.rs",
1344            None,
1345        );
1346        assert!(
1347            result.contains("no task set"),
1348            "should indicate no task: {result}"
1349        );
1350    }
1351
1352    #[test]
1353    fn test_reference_mode_one_line() {
1354        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1355        let tokens = count_tokens(content);
1356        let (result, _sent) = process_mode(
1357            content,
1358            "reference",
1359            "F1",
1360            "test.rs",
1361            "rs",
1362            tokens,
1363            CrpMode::Off,
1364            "test.rs",
1365            None,
1366        );
1367        let lines: Vec<&str> = result.lines().collect();
1368        assert!(
1369            lines.len() <= 3,
1370            "reference mode should be very compact, got {} lines",
1371            lines.len()
1372        );
1373        assert!(result.contains("lines"), "should contain line count");
1374        assert!(result.contains("tok"), "should contain token count");
1375    }
1376
1377    #[test]
1378    fn cached_lines_mode_invalidates_on_mtime_change() {
1379        let dir = tempfile::tempdir().unwrap();
1380        let path = dir.path().join("file.txt");
1381        let p = path.to_string_lossy().to_string();
1382
1383        std::fs::write(&path, "one\nsecond\n").unwrap();
1384        let mut cache = SessionCache::new();
1385
1386        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1387        let l1: Vec<&str> = r1.content.lines().collect();
1388        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1389        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1390        assert_eq!(got1, "one");
1391
1392        std::thread::sleep(Duration::from_secs(1));
1393        std::fs::write(&path, "two\nsecond\n").unwrap();
1394
1395        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1396        let l2: Vec<&str> = r2.content.lines().collect();
1397        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1398        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1399        assert_eq!(got2, "two");
1400    }
1401
1402    #[test]
1403    #[cfg_attr(tarpaulin, ignore)]
1404    fn benchmark_task_conditioned_compression() {
1405        // Keep this reasonably small so CI coverage instrumentation stays fast.
1406        let content = generate_benchmark_code(200);
1407        let full_tokens = count_tokens(&content);
1408        let task = Some("fix authentication in validate_token");
1409
1410        let (_full_output, full_tok) = process_mode(
1411            &content,
1412            "full",
1413            "F1",
1414            "server.rs",
1415            "rs",
1416            full_tokens,
1417            CrpMode::Off,
1418            "server.rs",
1419            task,
1420        );
1421        let (_task_output, task_tok) = process_mode(
1422            &content,
1423            "task",
1424            "F1",
1425            "server.rs",
1426            "rs",
1427            full_tokens,
1428            CrpMode::Off,
1429            "server.rs",
1430            task,
1431        );
1432        let (_sig_output, sig_tok) = process_mode(
1433            &content,
1434            "signatures",
1435            "F1",
1436            "server.rs",
1437            "rs",
1438            full_tokens,
1439            CrpMode::Off,
1440            "server.rs",
1441            task,
1442        );
1443        let (_ref_output, ref_tok) = process_mode(
1444            &content,
1445            "reference",
1446            "F1",
1447            "server.rs",
1448            "rs",
1449            full_tokens,
1450            CrpMode::Off,
1451            "server.rs",
1452            task,
1453        );
1454
1455        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1456        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1457        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1458        eprintln!(
1459            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1460            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1461        );
1462        eprintln!(
1463            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1464            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1465        );
1466        eprintln!(
1467            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1468            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1469        );
1470        eprintln!("================================================\n");
1471
1472        assert!(task_tok < full_tok, "task mode should save tokens");
1473        assert!(sig_tok < full_tok, "signatures should save tokens");
1474        assert!(ref_tok < sig_tok, "reference should be most compact");
1475    }
1476
1477    fn generate_benchmark_code(lines: usize) -> String {
1478        let mut code = Vec::with_capacity(lines);
1479        code.push("use std::collections::HashMap;".to_string());
1480        code.push("use crate::core::auth;".to_string());
1481        code.push(String::new());
1482        code.push("pub struct Server {".to_string());
1483        code.push("    config: Config,".to_string());
1484        code.push("    cache: HashMap<String, String>,".to_string());
1485        code.push("}".to_string());
1486        code.push(String::new());
1487        code.push("impl Server {".to_string());
1488        code.push(
1489            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1490                .to_string(),
1491        );
1492        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1493        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1494        code.push("            return Err(AuthError::Expired);".to_string());
1495        code.push("        }".to_string());
1496        code.push("        Ok(decoded.claims)".to_string());
1497        code.push("    }".to_string());
1498        code.push(String::new());
1499
1500        let remaining = lines.saturating_sub(code.len());
1501        for i in 0..remaining {
1502            if i % 30 == 0 {
1503                code.push(format!(
1504                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1505                ));
1506            } else if i % 30 == 29 {
1507                code.push("    }".to_string());
1508            } else {
1509                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1510            }
1511        }
1512        code.push("}".to_string());
1513        code.join("\n")
1514    }
1515
1516    #[test]
1517    fn instruction_file_detection() {
1518        assert!(is_instruction_file(
1519            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1520        ));
1521        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1522        assert!(is_instruction_file("/project/AGENTS.md"));
1523        assert!(is_instruction_file("/project/.cursorrules"));
1524        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1525        assert!(is_instruction_file("/skills/some-skill/README.md"));
1526
1527        assert!(!is_instruction_file("/project/src/main.rs"));
1528        assert!(!is_instruction_file("/project/config.json"));
1529        assert!(!is_instruction_file("/project/data/report.csv"));
1530    }
1531
1532    #[test]
1533    fn resolve_auto_mode_returns_full_for_instruction_files() {
1534        let mode = resolve_auto_mode(
1535            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1536            5000,
1537            Some("read"),
1538        );
1539        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1540
1541        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1542        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1543
1544        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1545        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1546    }
1547}