Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode) -> String {
32    if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    }
37}
38
39/// Extracts a short proof-line from file content to include in cache-hit stubs.
40/// Returns the first non-empty line (truncated to 60 chars) as evidence the cache is valid.
41/// Only shown after 2+ reads to avoid noise on early interactions.
42fn cache_hit_proof_line(content: &str, read_count: u32) -> Option<String> {
43    if read_count < 2 {
44        return None;
45    }
46    let first_line = content.lines().find(|l| !l.trim().is_empty())?;
47    let trimmed = first_line.trim();
48    if trimmed.len() > 60 {
49        Some(format!("{}...", &trimmed[..57]))
50    } else {
51        Some(trimmed.to_string())
52    }
53}
54
55fn append_compressed_hint(output: &str, file_path: &str) -> String {
56    if !crate::core::profiles::active_profile()
57        .output_hints
58        .compressed_hint()
59    {
60        return output.to_string();
61    }
62    format!(
63        "{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
64    )
65}
66
67/// Reads a file as UTF-8 with lossy fallback, enforcing binary detection and max read size limit.
68/// Defense-in-depth: verifies that the canonical path stays within the process's project root
69/// (if determinable) even though callers SHOULD have already jail-checked the path.
70pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
71    if crate::core::binary_detect::is_binary_file(path) {
72        let msg = crate::core::binary_detect::binary_file_message(path);
73        return Err(std::io::Error::other(msg));
74    }
75
76    if let Ok(canonical) = std::path::Path::new(path).canonicalize() {
77        if let Ok(cwd) = std::env::current_dir() {
78            let root = crate::core::pathjail::canonicalize_or_self(&cwd);
79            if !canonical.starts_with(&root) {
80                let allow = crate::core::pathjail::allow_paths_from_env_and_config();
81                let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
82                    .ok()
83                    .is_some_and(|d| canonical.starts_with(d));
84                let tmp_ok = canonical.starts_with(std::env::temp_dir());
85                if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
86                    tracing::warn!(
87                        "defense-in-depth: path may escape project root: {}",
88                        canonical.display()
89                    );
90                }
91            }
92        }
93    }
94
95    let cap = crate::core::limits::max_read_bytes();
96
97    let file = open_with_retry(path)?;
98    let meta = file
99        .metadata()
100        .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
101    if meta.len() > cap as u64 {
102        return Err(std::io::Error::other(format!(
103            "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
104             Increase the limit or use a line-range read: mode=\"lines:1-100\"",
105            meta.len(),
106            cap
107        )));
108    }
109
110    use std::io::Read;
111    let mut bytes = Vec::with_capacity(meta.len() as usize);
112    std::io::BufReader::new(file).read_to_end(&mut bytes)?;
113    match String::from_utf8(bytes) {
114        Ok(s) => Ok(s),
115        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
116    }
117}
118
119/// Opens a file, retrying once after a brief pause on NotFound.
120/// Works around overlay/FUSE stat-cache races in container runtimes (Docker, Codex).
121/// Uses O_NOFOLLOW on Unix for TOCTOU symlink protection.
122fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
123    match open_nofollow(path) {
124        Ok(f) => Ok(f),
125        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
126            std::thread::sleep(std::time::Duration::from_millis(50));
127            open_nofollow(path)
128        }
129        Err(e) => Err(e),
130    }
131}
132
133#[cfg(unix)]
134fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
135    use std::os::unix::fs::OpenOptionsExt;
136    use std::path::Path;
137
138    let p = Path::new(path);
139    // Canonicalize the parent directory (resolving symlinks in the directory path)
140    // but apply O_NOFOLLOW only to the final file component. This prevents
141    // symlink-following attacks on the target file while allowing legitimate
142    // directory symlinks (e.g., /tmp → /private/tmp on macOS).
143    if let (Some(parent), Some(filename)) = (p.parent(), p.file_name()) {
144        if parent.exists() {
145            let canonical_parent = parent.canonicalize()?;
146            let canonical_path = canonical_parent.join(filename);
147            return std::fs::OpenOptions::new()
148                .read(true)
149                .custom_flags(libc::O_NOFOLLOW)
150                .open(&canonical_path);
151        }
152    }
153
154    // Fallback: direct open with O_NOFOLLOW
155    std::fs::OpenOptions::new()
156        .read(true)
157        .custom_flags(libc::O_NOFOLLOW)
158        .open(path)
159}
160
161#[cfg(not(unix))]
162fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
163    std::fs::File::open(path)
164}
165
166/// Reads a file through the cache and applies the requested compression mode.
167pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
168    handle_with_options(cache, path, mode, false, crp_mode, None)
169}
170
171/// Like `handle`, but invalidates the cache first to force a fresh disk read.
172pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
173    handle_with_options(cache, path, mode, true, crp_mode, None)
174}
175
176/// Reads a file with task-aware filtering to prioritize task-relevant content.
177pub fn handle_with_task(
178    cache: &mut SessionCache,
179    path: &str,
180    mode: &str,
181    crp_mode: CrpMode,
182    task: Option<&str>,
183) -> String {
184    handle_with_options(cache, path, mode, false, crp_mode, task)
185}
186
187/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
188pub fn handle_with_task_resolved(
189    cache: &mut SessionCache,
190    path: &str,
191    mode: &str,
192    crp_mode: CrpMode,
193    task: Option<&str>,
194) -> ReadOutput {
195    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
196}
197
198/// Fresh read with task-aware filtering (invalidates cache first).
199pub fn handle_fresh_with_task(
200    cache: &mut SessionCache,
201    path: &str,
202    mode: &str,
203    crp_mode: CrpMode,
204    task: Option<&str>,
205) -> String {
206    handle_with_options(cache, path, mode, true, crp_mode, task)
207}
208
209/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
210pub fn handle_fresh_with_task_resolved(
211    cache: &mut SessionCache,
212    path: &str,
213    mode: &str,
214    crp_mode: CrpMode,
215    task: Option<&str>,
216) -> ReadOutput {
217    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
218}
219
220fn handle_with_options(
221    cache: &mut SessionCache,
222    path: &str,
223    mode: &str,
224    fresh: bool,
225    crp_mode: CrpMode,
226    task: Option<&str>,
227) -> String {
228    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
229}
230
231/// Detects if the current execution context is a subagent (forked agent).
232/// Subagents inherit stale parent caches, so force-fresh prevents VERIFY FAIL.
233fn is_subagent_context() -> bool {
234    static IS_SUBAGENT: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
235    *IS_SUBAGENT.get_or_init(|| {
236        if std::env::var("LEAN_CTX_FORCE_FRESH").is_ok_and(|v| v == "1" || v == "true") {
237            return true;
238        }
239        std::env::var("CURSOR_TASK_ID").is_ok_and(|v| !v.is_empty())
240    })
241}
242
243fn handle_with_options_resolved(
244    cache: &mut SessionCache,
245    path: &str,
246    mode: &str,
247    fresh: bool,
248    crp_mode: CrpMode,
249    task: Option<&str>,
250) -> ReadOutput {
251    let effective_fresh = fresh || is_subagent_context();
252
253    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
254        bt.next_seq();
255    }
256    let mut result = handle_with_options_inner(cache, path, mode, effective_fresh, crp_mode, task);
257
258    if let Some(entry) = cache.get_mut(path) {
259        entry.last_mode.clone_from(&result.resolved_mode);
260    }
261
262    let dedup_allowed = matches!(
263        result.resolved_mode.as_str(),
264        "map" | "signatures" | "aggressive" | "entropy" | "task"
265    );
266    if dedup_allowed {
267        if let Some(deduped) = cache.apply_dedup(path, &result.content) {
268            let new_tokens = count_tokens(&deduped);
269            if new_tokens < result.output_tokens {
270                result.content = deduped;
271                result.output_tokens = new_tokens;
272            }
273        }
274    }
275
276    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
277        let original_tokens = cache.get(path).map_or(0, |e| e.original_tokens);
278        bt.record_read(
279            path,
280            &result.resolved_mode,
281            result.output_tokens,
282            original_tokens,
283        );
284    }
285
286    result
287}
288
289fn handle_with_options_inner(
290    cache: &mut SessionCache,
291    path: &str,
292    mode: &str,
293    fresh: bool,
294    crp_mode: CrpMode,
295    task: Option<&str>,
296) -> ReadOutput {
297    let file_ref = cache.get_file_ref(path);
298    let short = protocol::shorten_path(path);
299    let ext = Path::new(path)
300        .extension()
301        .and_then(|e| e.to_str())
302        .unwrap_or("");
303
304    if fresh {
305        if mode == "diff" {
306            let warning = "[warning] fresh+diff is redundant — fresh invalidates cache, no diff possible. Use mode=full with fresh=true instead.";
307            return ReadOutput {
308                content: warning.to_string(),
309                resolved_mode: "diff".into(),
310                output_tokens: count_tokens(warning),
311            };
312        }
313        cache.invalidate(path);
314    }
315
316    if mode == "diff" {
317        let (out, _) = handle_diff(cache, path, &file_ref);
318        let out = crate::core::redaction::redact_text_if_enabled(&out);
319        let sent = count_tokens(&out);
320        return ReadOutput {
321            content: out,
322            resolved_mode: "diff".into(),
323            output_tokens: sent,
324        };
325    }
326
327    if mode != "full" {
328        if let Some(existing) = cache.get(path) {
329            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
330            if stale {
331                cache.invalidate(path);
332            }
333        }
334    }
335
336    // Extract immutable data from cache entry, then drop the borrow before
337    // any mutable operations (record_cache_hit, set_compressed, invalidate).
338    let cache_snapshot = cache.get(path).map(|existing| {
339        (
340            existing.stored_mtime,
341            existing.read_count,
342            existing.line_count,
343            existing.original_tokens,
344            existing.content(),
345        )
346    });
347
348    if let Some((cached_mtime, read_count, line_count, original_tokens, content_opt)) =
349        cache_snapshot
350    {
351        if mode == "full" {
352            // Fast mtime check: if file unchanged on disk, skip re-reading entirely.
353            if !crate::core::cache::is_cache_entry_stale(path, cached_mtime) {
354                cache.record_cache_hit(path);
355                let out = if crate::core::protocol::meta_visible() {
356                    format!(
357                        "{file_ref}={short} [unchanged, {line_count}L, use cached context]\nFile unchanged on disk (same hash). If you haven't seen this content, use fresh=true to force re-read.",
358                        )
359                } else {
360                    let proof = content_opt
361                        .as_deref()
362                        .and_then(|c| cache_hit_proof_line(c, read_count));
363                    let reads_note = if read_count > 3 {
364                        format!(" (read {}x, unchanged)", read_count + 1)
365                    } else {
366                        String::new()
367                    };
368                    match proof {
369                        Some(p) => format!(
370                            "{file_ref}={short} [unchanged, {line_count}L, use cached context{reads_note} | first: \"{p}\"]"
371                        ),
372                        None => format!(
373                            "{file_ref}={short} [unchanged, {line_count}L, use cached context{reads_note}]"
374                        ),
375                    }
376                };
377                let out = crate::core::redaction::redact_text_if_enabled(&out);
378                let sent = count_tokens(&out);
379                return ReadOutput {
380                    content: out,
381                    resolved_mode: "full".into(),
382                    output_tokens: sent,
383                };
384            }
385            let (out, _) = handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
386            let out = crate::core::redaction::redact_text_if_enabled(&out);
387            let sent = count_tokens(&out);
388            return ReadOutput {
389                content: out,
390                resolved_mode: "full".into(),
391                output_tokens: sent,
392            };
393        }
394
395        // Resolve mode first so we can check compressed output cache BEFORE
396        // decompressing the full content (avoids ~2-5ms zstd overhead on hits).
397        let resolved_mode = if mode == "auto" {
398            resolve_auto_mode(path, original_tokens, task)
399        } else {
400            mode.to_string()
401        };
402
403        if is_cacheable_mode(&resolved_mode) {
404            let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
405            let compressed_hit = cache.get_compressed(path, &cache_key).cloned();
406            if let Some(cached_output) = compressed_hit {
407                cache.record_cache_hit(path);
408                let out = crate::core::redaction::redact_text_if_enabled(&cached_output);
409                let sent = count_tokens(&out);
410                return ReadOutput {
411                    content: out,
412                    resolved_mode,
413                    output_tokens: sent,
414                };
415            }
416        }
417
418        if let Some(content) = content_opt {
419            let (out, _) = process_mode(
420                &content,
421                &resolved_mode,
422                &file_ref,
423                &short,
424                ext,
425                original_tokens,
426                crp_mode,
427                path,
428                task,
429            );
430            if is_cacheable_mode(&resolved_mode) {
431                let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
432                cache.set_compressed(path, &cache_key, out.clone());
433            }
434            let out = crate::core::redaction::redact_text_if_enabled(&out);
435            let sent = count_tokens(&out);
436            return ReadOutput {
437                content: out,
438                resolved_mode,
439                output_tokens: sent,
440            };
441        }
442        cache.invalidate(path);
443    }
444
445    let content = match read_file_lossy(path) {
446        Ok(c) => c,
447        Err(e) => {
448            let msg = format!("ERROR: {e}");
449            let tokens = count_tokens(&msg);
450            return ReadOutput {
451                content: msg,
452                resolved_mode: "error".into(),
453                output_tokens: tokens,
454            };
455        }
456    };
457
458    let store_result = cache.store(path, &content);
459
460    // Skip expensive hint computation for line-range reads and first reads.
461    // Hints are only useful from the 2nd read onwards when the file is contextually relevant.
462    let is_line_range = mode.starts_with("lines:");
463    let hints = crate::core::profiles::active_profile().output_hints;
464    let is_repeat_read = store_result.read_count > 1;
465    let similar_hint = if !is_line_range && is_repeat_read && hints.semantic_hint() {
466        find_similar_and_update_semantic_index(path, &content)
467    } else {
468        None
469    };
470    let graph_hint = if !is_line_range && is_repeat_read && hints.related_hint() {
471        build_graph_related_hint(path)
472    } else {
473        None
474    };
475
476    if mode == "full" {
477        cache.mark_full_delivered(path);
478        let (mut output, _) = format_full_output(
479            &file_ref,
480            &short,
481            ext,
482            &content,
483            store_result.original_tokens,
484            store_result.line_count,
485            task,
486        );
487        if let Some(hint) = &graph_hint {
488            output.push_str(&format!("\n{hint}"));
489        }
490        if let Some(hint) = similar_hint {
491            output.push_str(&format!("\n{hint}"));
492        }
493        let output = crate::core::redaction::redact_text_if_enabled(&output);
494        let sent = count_tokens(&output);
495        return ReadOutput {
496            content: output,
497            resolved_mode: "full".into(),
498            output_tokens: sent,
499        };
500    }
501
502    let resolved_mode = if mode == "auto" {
503        resolve_auto_mode(path, store_result.original_tokens, task)
504    } else {
505        mode.to_string()
506    };
507
508    let (mut output, _sent) = process_mode(
509        &content,
510        &resolved_mode,
511        &file_ref,
512        &short,
513        ext,
514        store_result.original_tokens,
515        crp_mode,
516        path,
517        task,
518    );
519    if let Some(hint) = &graph_hint {
520        output.push_str(&format!("\n{hint}"));
521    }
522    if let Some(hint) = similar_hint {
523        output.push_str(&format!("\n{hint}"));
524    }
525    if is_cacheable_mode(&resolved_mode) {
526        let cache_key = compressed_cache_key(&resolved_mode, crp_mode);
527        cache.set_compressed(path, &cache_key, output.clone());
528    }
529    let output = crate::core::redaction::redact_text_if_enabled(&output);
530    let final_tokens = count_tokens(&output);
531    ReadOutput {
532        content: output,
533        resolved_mode,
534        output_tokens: final_tokens,
535    }
536}
537
538pub fn is_instruction_file(path: &str) -> bool {
539    let lower = path.to_lowercase();
540    let filename = std::path::Path::new(&lower)
541        .file_name()
542        .and_then(|f| f.to_str())
543        .unwrap_or("");
544
545    matches!(
546        filename,
547        "skill.md"
548            | "agents.md"
549            | "rules.md"
550            | ".cursorrules"
551            | ".clinerules"
552            | "lean-ctx.md"
553            | "lean-ctx.mdc"
554    ) || lower.contains("/skills/")
555        || lower.contains("/.cursor/rules/")
556        || lower.contains("/.claude/rules/")
557        || lower.contains("/agents.md")
558}
559
560fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
561    if is_instruction_file(file_path) {
562        return "full".to_string();
563    }
564
565    if let Ok(bt) = crate::core::bounce_tracker::global().lock() {
566        if bt.should_force_full(file_path) {
567            return "full".to_string();
568        }
569    }
570
571    let intent_query = task.unwrap_or("read");
572    let route = crate::core::intent_router::route_v1(intent_query);
573    let intent_mode = &route.decision.effective_read_mode;
574    if intent_mode != "auto" && intent_mode != "reference" {
575        return intent_mode.clone();
576    }
577
578    // Priority 2: FileSignature-based predictor
579    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
580    let predictor = crate::core::mode_predictor::ModePredictor::new();
581    let mut predicted = predictor
582        .predict_best_mode(&sig)
583        .unwrap_or_else(|| "full".to_string());
584    if predicted == "auto" {
585        predicted = "full".to_string();
586    }
587
588    // Priority 3: Bandit exploration when budget is tight
589    // SAFETY: Bandit NEVER overrides "full" — full is sacred (byte-accurate content needed for edits)
590    if predicted != "full" {
591        if let Some(project_root) =
592            crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
593        {
594            let ext = std::path::Path::new(file_path)
595                .extension()
596                .and_then(|e| e.to_str())
597                .unwrap_or("");
598            let bucket = match original_tokens {
599                0..=2000 => "sm",
600                2001..=10000 => "md",
601                10001..=50000 => "lg",
602                _ => "xl",
603            };
604            let bandit_key = format!("{ext}_{bucket}");
605            let mut store = crate::core::bandit::BanditStore::load(&project_root);
606            let bandit = store.get_or_create(&bandit_key);
607            let arm = bandit.select_arm();
608            if arm.budget_ratio < 0.25 && original_tokens > 2000 {
609                predicted = "aggressive".to_string();
610            }
611        }
612    }
613
614    // Priority 4: Adaptive mode policy
615    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
616    let chosen = policy.choose_auto_mode(task, &predicted);
617
618    if original_tokens > 2000 {
619        if predicted == "map" || predicted == "signatures" {
620            if chosen != "map" && chosen != "signatures" {
621                return predicted;
622            }
623        } else if chosen == "full" && predicted != "full" {
624            return predicted;
625        }
626    }
627
628    chosen
629}
630
631fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
632    const MAX_CONTENT_BYTES_FOR_SEMANTIC: usize = 32_768;
633
634    if content.len() > MAX_CONTENT_BYTES_FOR_SEMANTIC {
635        return None;
636    }
637
638    let cfg = crate::core::config::Config::load();
639    let profile = crate::core::config::MemoryProfile::effective(&cfg);
640    if !profile.semantic_cache_enabled() {
641        return None;
642    }
643
644    let project_root = detect_project_root(path);
645    let session_id = format!("{}", std::process::id());
646    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
647
648    let similar = index.find_similar(content, 0.7);
649    let relevant: Vec<_> = similar
650        .into_iter()
651        .filter(|(p, _)| p != path)
652        .take(3)
653        .collect();
654
655    index.add_file(path, content, &session_id);
656    let _ = index.save(&project_root);
657
658    if relevant.is_empty() {
659        return None;
660    }
661
662    let hints: Vec<String> = relevant
663        .iter()
664        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
665        .collect();
666
667    Some(format!(
668        "[semantic: {} similar file(s) in cache]\n{}",
669        relevant.len(),
670        hints.join("\n")
671    ))
672}
673
674fn detect_project_root(path: &str) -> String {
675    crate::core::protocol::detect_project_root_or_cwd(path)
676}
677
678fn build_graph_related_hint(path: &str) -> Option<String> {
679    let project_root = detect_project_root(path);
680    crate::core::graph_context::build_related_hint(path, &project_root, 5)
681}
682
683const AUTO_DELTA_THRESHOLD: f64 = 0.6;
684
685/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
686fn handle_full_with_auto_delta(
687    cache: &mut SessionCache,
688    path: &str,
689    file_ref: &str,
690    short: &str,
691    ext: &str,
692    task: Option<&str>,
693) -> (String, usize) {
694    let Ok(disk_content) = read_file_lossy(path) else {
695        cache.record_cache_hit(path);
696        if let Some(existing) = cache.get(path) {
697            if !crate::core::protocol::meta_visible() {
698                if let Some(cached) = existing.content() {
699                    return format_full_output(
700                        file_ref,
701                        short,
702                        ext,
703                        &cached,
704                        existing.original_tokens,
705                        existing.line_count,
706                        task,
707                    );
708                }
709            }
710            let out = format!(
711                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
712                existing.read_count, existing.line_count
713            );
714            let sent = count_tokens(&out);
715            return (out, sent);
716        }
717        let out = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
718            format!("[file read failed and no cached version available] {file_ref}={short}")
719        } else {
720            format!("[file read failed and no cached version available] {short}")
721        };
722        let sent = count_tokens(&out);
723        return (out, sent);
724    };
725
726    let old_content = cache
727        .get(path)
728        .and_then(crate::core::cache::CacheEntry::content)
729        .unwrap_or_default();
730    let store_result = cache.store(path, &disk_content);
731
732    if store_result.was_hit {
733        if store_result.full_content_delivered {
734            let out = if crate::core::protocol::meta_visible() {
735                format!(
736                    "{file_ref}={short} [unchanged, {}L, use cached context]\nFile unchanged on disk (same hash). If you haven't seen this content, use fresh=true to force re-read.",
737                    store_result.line_count
738                )
739            } else {
740                let proof = cache_hit_proof_line(&disk_content, store_result.read_count);
741                let reads_note = if store_result.read_count > 3 {
742                    format!(" (read {}x, unchanged)", store_result.read_count)
743                } else {
744                    String::new()
745                };
746                match proof {
747                    Some(p) => format!(
748                        "{file_ref}={short} [unchanged, {}L, use cached context{reads_note} | first: \"{p}\"]",
749                        store_result.line_count
750                    ),
751                    None => format!(
752                        "{file_ref}={short} [unchanged, {}L, use cached context{reads_note}]",
753                        store_result.line_count
754                    ),
755                }
756            };
757            let sent = count_tokens(&out);
758            return (out, sent);
759        }
760        cache.mark_full_delivered(path);
761        return format_full_output(
762            file_ref,
763            short,
764            ext,
765            &disk_content,
766            store_result.original_tokens,
767            store_result.line_count,
768            task,
769        );
770    }
771
772    let diff = compressor::diff_content(&old_content, &disk_content);
773    let diff_tokens = count_tokens(&diff);
774    let full_tokens = store_result.original_tokens;
775
776    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
777        let savings = protocol::format_savings(full_tokens, diff_tokens);
778        let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
779            format!("{file_ref}={short}")
780        } else {
781            short.to_string()
782        };
783        let out = format!(
784            "{head} [auto-delta] ∆{}L\n{diff}\n{savings}",
785            disk_content.lines().count()
786        );
787        return (out, diff_tokens);
788    }
789
790    format_full_output(
791        file_ref,
792        short,
793        ext,
794        &disk_content,
795        store_result.original_tokens,
796        store_result.line_count,
797        task,
798    )
799}
800
801fn format_full_output(
802    file_ref: &str,
803    short: &str,
804    ext: &str,
805    content: &str,
806    original_tokens: usize,
807    line_count: usize,
808    _task: Option<&str>,
809) -> (String, usize) {
810    let tokens = original_tokens;
811    let metadata = build_header(file_ref, short, ext, content, line_count, true);
812
813    let output = format!("{metadata}\n{content}");
814    let sent = count_tokens(&output);
815    (protocol::append_savings(&output, tokens, sent), sent)
816}
817
818fn build_header(
819    file_ref: &str,
820    short: &str,
821    ext: &str,
822    content: &str,
823    line_count: usize,
824    include_deps: bool,
825) -> String {
826    let mut header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
827        format!("{file_ref}={short} {line_count}L")
828    } else {
829        format!("{short} {line_count}L")
830    };
831
832    if include_deps {
833        let dep_info = deps::extract_deps(content, ext);
834        if !dep_info.imports.is_empty() {
835            let imports_str: Vec<&str> = dep_info
836                .imports
837                .iter()
838                .take(8)
839                .map(std::string::String::as_str)
840                .collect();
841            header.push_str(&format!("\n deps {}", imports_str.join(",")));
842        }
843        if !dep_info.exports.is_empty() {
844            let exports_str: Vec<&str> = dep_info
845                .exports
846                .iter()
847                .take(8)
848                .map(std::string::String::as_str)
849                .collect();
850            header.push_str(&format!("\n exports {}", exports_str.join(",")));
851        }
852    }
853
854    header
855}
856
857#[allow(clippy::too_many_arguments)]
858fn process_mode(
859    content: &str,
860    mode: &str,
861    file_ref: &str,
862    short: &str,
863    ext: &str,
864    original_tokens: usize,
865    crp_mode: CrpMode,
866    file_path: &str,
867    task: Option<&str>,
868) -> (String, usize) {
869    let line_count = content.lines().count();
870
871    match mode {
872        "auto" => {
873            let chosen = resolve_auto_mode(file_path, original_tokens, task);
874            process_mode(
875                content,
876                &chosen,
877                file_ref,
878                short,
879                ext,
880                original_tokens,
881                crp_mode,
882                file_path,
883                task,
884            )
885        }
886        "full" => format_full_output(
887            file_ref,
888            short,
889            ext,
890            content,
891            original_tokens,
892            line_count,
893            task,
894        ),
895        "signatures" => {
896            let sigs = signatures::extract_signatures(content, ext);
897            let dep_info = deps::extract_deps(content, ext);
898
899            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
900                format!("{file_ref}={short} {line_count}L")
901            } else {
902                format!("{short} {line_count}L")
903            };
904            if !dep_info.imports.is_empty() {
905                let imports_str: Vec<&str> = dep_info
906                    .imports
907                    .iter()
908                    .take(8)
909                    .map(std::string::String::as_str)
910                    .collect();
911                output.push_str(&format!("\n deps {}", imports_str.join(",")));
912            }
913            for sig in &sigs {
914                output.push('\n');
915                if crp_mode.is_tdd() {
916                    output.push_str(&sig.to_tdd());
917                } else {
918                    output.push_str(&sig.to_compact());
919                }
920            }
921            let sent = count_tokens(&output);
922            (
923                append_compressed_hint(
924                    &protocol::append_savings(&output, original_tokens, sent),
925                    file_path,
926                ),
927                sent,
928            )
929        }
930        "map" => {
931            if ext == "php" {
932                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
933                {
934                    let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
935                        format!("{file_ref}={short} {line_count}L\n{php_map}")
936                    } else {
937                        format!("{short} {line_count}L\n{php_map}")
938                    };
939                    let sent = count_tokens(&output);
940                    let output = protocol::append_savings(&output, original_tokens, sent);
941                    return (append_compressed_hint(&output, file_path), sent);
942                }
943            }
944
945            let sigs = signatures::extract_signatures(content, ext);
946            let dep_info = deps::extract_deps(content, ext);
947
948            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
949                format!("{file_ref}={short} {line_count}L")
950            } else {
951                format!("{short} {line_count}L")
952            };
953
954            if !dep_info.imports.is_empty() {
955                output.push_str("\n  deps: ");
956                output.push_str(&dep_info.imports.join(", "));
957            }
958
959            if !dep_info.exports.is_empty() {
960                output.push_str("\n  exports: ");
961                output.push_str(&dep_info.exports.join(", "));
962            }
963
964            let key_sigs: Vec<&signatures::Signature> = sigs
965                .iter()
966                .filter(|s| s.is_exported || s.indent == 0)
967                .collect();
968
969            if !key_sigs.is_empty() {
970                output.push_str("\n  API:");
971                for sig in &key_sigs {
972                    output.push_str("\n    ");
973                    if crp_mode.is_tdd() {
974                        output.push_str(&sig.to_tdd());
975                    } else {
976                        output.push_str(&sig.to_compact());
977                    }
978                }
979            }
980
981            let sent = count_tokens(&output);
982            (
983                append_compressed_hint(
984                    &protocol::append_savings(&output, original_tokens, sent),
985                    file_path,
986                ),
987                sent,
988            )
989        }
990        "aggressive" => {
991            #[cfg(feature = "tree-sitter")]
992            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
993            #[cfg(not(feature = "tree-sitter"))]
994            let ast_pruned: Option<String> = None;
995
996            let base = ast_pruned.as_deref().unwrap_or(content);
997
998            let session_intent = crate::core::session::SessionState::load_latest()
999                .and_then(|s| s.active_structured_intent);
1000            let raw = if let Some(ref intent) = session_intent {
1001                compressor::task_aware_compress(base, Some(ext), intent)
1002            } else {
1003                compressor::aggressive_compress(base, Some(ext))
1004            };
1005            let compressed = compressor::safeguard_ratio(content, &raw);
1006            let header = build_header(file_ref, short, ext, content, line_count, true);
1007
1008            let mut sym = SymbolMap::new();
1009            let idents = symbol_map::extract_identifiers(&compressed, ext);
1010            for ident in &idents {
1011                sym.register(ident);
1012            }
1013
1014            if sym.len() >= 3 {
1015                let sym_table = sym.format_table();
1016                let sym_applied = sym.apply(&compressed);
1017                let orig_tok = count_tokens(&compressed);
1018                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
1019                let net = orig_tok.saturating_sub(comp_tok);
1020                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
1021                    let savings = protocol::format_savings(original_tokens, comp_tok);
1022                    return (
1023                        append_compressed_hint(
1024                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
1025                            file_path,
1026                        ),
1027                        comp_tok,
1028                    );
1029                }
1030                let savings = protocol::format_savings(original_tokens, orig_tok);
1031                return (
1032                    append_compressed_hint(
1033                        &format!("{header}\n{compressed}\n{savings}"),
1034                        file_path,
1035                    ),
1036                    orig_tok,
1037                );
1038            }
1039
1040            let sent = count_tokens(&compressed);
1041            let savings = protocol::format_savings(original_tokens, sent);
1042            (
1043                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
1044                sent,
1045            )
1046        }
1047        "entropy" => {
1048            let result = entropy::entropy_compress_adaptive(content, file_path);
1049            let avg_h = entropy::analyze_entropy(content).avg_entropy;
1050            let header = build_header(file_ref, short, ext, content, line_count, false);
1051            let techs = result.techniques.join(", ");
1052            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
1053            let sent = count_tokens(&output);
1054            let savings = protocol::format_savings(original_tokens, sent);
1055            let compression_ratio = if original_tokens > 0 {
1056                1.0 - (sent as f64 / original_tokens as f64)
1057            } else {
1058                0.0
1059            };
1060            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
1061            (
1062                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
1063                sent,
1064            )
1065        }
1066        "task" => {
1067            let task_str = task.unwrap_or("");
1068            if task_str.is_empty() {
1069                let header = build_header(file_ref, short, ext, content, line_count, true);
1070                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
1071                let sent = count_tokens(&out);
1072                return (out, sent);
1073            }
1074            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
1075            if keywords.is_empty() {
1076                let header = build_header(file_ref, short, ext, content, line_count, true);
1077                let out = format!(
1078                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
1079                );
1080                let sent = count_tokens(&out);
1081                return (out, sent);
1082            }
1083            let filtered =
1084                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
1085            let filtered_lines = filtered.lines().count();
1086            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1087                format!("{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1088            } else {
1089                format!("{short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1090            };
1091            let graph_ctx = if crate::core::profiles::active_profile()
1092                .output_hints
1093                .graph_context_block()
1094            {
1095                let project_root = detect_project_root(file_path);
1096                crate::core::graph_context::build_graph_context(
1097                    file_path,
1098                    &project_root,
1099                    Some(crate::core::graph_context::GraphContextOptions::default()),
1100                )
1101                .map(|c| crate::core::graph_context::format_graph_context(&c))
1102                .unwrap_or_default()
1103            } else {
1104                String::new()
1105            };
1106
1107            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
1108            let savings = protocol::format_savings(original_tokens, sent);
1109            (
1110                append_compressed_hint(
1111                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
1112                    file_path,
1113                ),
1114                sent,
1115            )
1116        }
1117        "reference" => {
1118            let tok = count_tokens(content);
1119            let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1120                format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})")
1121            } else {
1122                format!("{short}: {line_count} lines, {tok} tok ({ext})")
1123            };
1124            let sent = count_tokens(&output);
1125            let savings = protocol::format_savings(original_tokens, sent);
1126            (format!("{output}\n{savings}"), sent)
1127        }
1128        mode if mode.starts_with("lines:") => {
1129            let range_str = &mode[6..];
1130            let extracted = extract_line_range(content, range_str);
1131            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1132                format!("{file_ref}={short} {line_count}L lines:{range_str}")
1133            } else {
1134                format!("{short} {line_count}L lines:{range_str}")
1135            };
1136            let sent = count_tokens(&extracted);
1137            let savings = protocol::format_savings(original_tokens, sent);
1138            (format!("{header}\n{extracted}\n{savings}"), sent)
1139        }
1140        unknown => {
1141            let header = build_header(file_ref, short, ext, content, line_count, true);
1142            let out = format!(
1143                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
1144            );
1145            let sent = count_tokens(&out);
1146            (out, sent)
1147        }
1148    }
1149}
1150
1151fn extract_line_range(content: &str, range_str: &str) -> String {
1152    let lines: Vec<&str> = content.lines().collect();
1153    let total = lines.len();
1154    let mut selected = Vec::new();
1155
1156    for part in range_str.split(',') {
1157        let part = part.trim();
1158        if let Some((start_s, end_s)) = part.split_once('-') {
1159            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
1160            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
1161            for i in start..=end {
1162                if i >= 1 && i <= total {
1163                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
1164                }
1165            }
1166        } else if let Ok(n) = part.parse::<usize>() {
1167            if n >= 1 && n <= total {
1168                selected.push(format!("{n:>4}| {}", lines[n - 1]));
1169            }
1170        }
1171    }
1172
1173    if selected.is_empty() {
1174        "No lines matched the range.".to_string()
1175    } else {
1176        selected.join("\n")
1177    }
1178}
1179
1180fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
1181    let short = protocol::shorten_path(path);
1182    let old_content = cache
1183        .get(path)
1184        .and_then(crate::core::cache::CacheEntry::content);
1185
1186    let new_content = match read_file_lossy(path) {
1187        Ok(c) => c,
1188        Err(e) => {
1189            let msg = format!("ERROR: {e}");
1190            let tokens = count_tokens(&msg);
1191            return (msg, tokens);
1192        }
1193    };
1194
1195    let original_tokens = count_tokens(&new_content);
1196
1197    let diff_output = if let Some(old) = &old_content {
1198        compressor::diff_content(old, &new_content)
1199    } else {
1200        // No previous version cached — store content for future diffs but
1201        // return a short guidance message instead of dumping the full file.
1202        cache.store(path, &new_content);
1203        let msg = format!(
1204            "{file_ref}={short} [no cached version for diff — use mode=full first, then diff on re-read]"
1205        );
1206        let sent = count_tokens(&msg);
1207        return (msg, sent);
1208    };
1209
1210    cache.store(path, &new_content);
1211
1212    let sent = count_tokens(&diff_output);
1213    let savings = protocol::format_savings(original_tokens, sent);
1214    let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1215        format!("{file_ref}={short}")
1216    } else {
1217        short.clone()
1218    };
1219    (format!("{head} [diff]\n{diff_output}\n{savings}"), sent)
1220}
1221
1222#[cfg(test)]
1223mod tests {
1224    use super::*;
1225    use std::time::Duration;
1226
1227    #[test]
1228    fn test_header_toon_format_no_brackets() {
1229        let _lock = crate::core::data_dir::test_env_lock();
1230        std::env::set_var("LEAN_CTX_META", "1");
1231        let content = "use std::io;\nfn main() {}\n";
1232        let header = build_header("F1", "main.rs", "rs", content, 2, false);
1233        assert!(!header.contains('['));
1234        assert!(!header.contains(']'));
1235        assert!(header.contains("F1=main.rs 2L"));
1236        std::env::remove_var("LEAN_CTX_META");
1237    }
1238
1239    #[test]
1240    fn test_header_toon_deps_indented() {
1241        let _lock = crate::core::data_dir::test_env_lock();
1242        std::env::set_var("LEAN_CTX_META", "1");
1243        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
1244        let header = build_header("F1", "main.rs", "rs", content, 3, true);
1245        if header.contains("deps") {
1246            assert!(
1247                header.contains("\n deps "),
1248                "deps should use indented TOON format"
1249            );
1250            assert!(
1251                !header.contains("deps:["),
1252                "deps should not use bracket format"
1253            );
1254        }
1255        std::env::remove_var("LEAN_CTX_META");
1256    }
1257
1258    #[test]
1259    fn test_header_toon_saves_tokens() {
1260        let _lock = crate::core::data_dir::test_env_lock();
1261        std::env::set_var("LEAN_CTX_META", "1");
1262        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1263        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1264        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1265        let old_tokens = count_tokens(&old_header);
1266        let new_tokens = count_tokens(&new_header);
1267        assert!(
1268            new_tokens <= old_tokens,
1269            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1270        );
1271        std::env::remove_var("LEAN_CTX_META");
1272    }
1273
1274    #[test]
1275    fn test_tdd_symbols_are_compact() {
1276        let symbols = [
1277            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1278        ];
1279        for sym in &symbols {
1280            let tok = count_tokens(sym);
1281            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1282        }
1283    }
1284
1285    #[test]
1286    fn test_task_mode_filters_content() {
1287        let content = (0..200)
1288            .map(|i| {
1289                if i % 20 == 0 {
1290                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1291                } else {
1292                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1293                }
1294            })
1295            .collect::<Vec<_>>()
1296            .join("\n");
1297        let full_tokens = count_tokens(&content);
1298        let task = Some("fix bug in validate_token");
1299        let (result, result_tokens) = process_mode(
1300            &content,
1301            "task",
1302            "F1",
1303            "test.rs",
1304            "rs",
1305            full_tokens,
1306            CrpMode::Off,
1307            "test.rs",
1308            task,
1309        );
1310        assert!(
1311            result_tokens < full_tokens,
1312            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1313        );
1314        assert!(
1315            result.contains("task-filtered"),
1316            "output should contain task-filtered marker"
1317        );
1318    }
1319
1320    #[test]
1321    fn test_task_mode_without_task_returns_full() {
1322        let content = "fn main() {}\nfn helper() {}\n";
1323        let tokens = count_tokens(content);
1324        let (result, _sent) = process_mode(
1325            content,
1326            "task",
1327            "F1",
1328            "test.rs",
1329            "rs",
1330            tokens,
1331            CrpMode::Off,
1332            "test.rs",
1333            None,
1334        );
1335        assert!(
1336            result.contains("no task set"),
1337            "should indicate no task: {result}"
1338        );
1339    }
1340
1341    #[test]
1342    fn test_reference_mode_one_line() {
1343        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1344        let tokens = count_tokens(content);
1345        let (result, _sent) = process_mode(
1346            content,
1347            "reference",
1348            "F1",
1349            "test.rs",
1350            "rs",
1351            tokens,
1352            CrpMode::Off,
1353            "test.rs",
1354            None,
1355        );
1356        let lines: Vec<&str> = result.lines().collect();
1357        assert!(
1358            lines.len() <= 3,
1359            "reference mode should be very compact, got {} lines",
1360            lines.len()
1361        );
1362        assert!(result.contains("lines"), "should contain line count");
1363        assert!(result.contains("tok"), "should contain token count");
1364    }
1365
1366    #[test]
1367    fn cached_lines_mode_invalidates_on_mtime_change() {
1368        let dir = tempfile::tempdir().unwrap();
1369        let path = dir.path().join("file.txt");
1370        let p = path.to_string_lossy().to_string();
1371
1372        std::fs::write(&path, "one\nsecond\n").unwrap();
1373        let mut cache = SessionCache::new();
1374
1375        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1376        let l1: Vec<&str> = r1.content.lines().collect();
1377        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1378        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1379        assert_eq!(got1, "one");
1380
1381        std::thread::sleep(Duration::from_secs(1));
1382        std::fs::write(&path, "two\nsecond\n").unwrap();
1383
1384        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1385        let l2: Vec<&str> = r2.content.lines().collect();
1386        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1387        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1388        assert_eq!(got2, "two");
1389    }
1390
1391    #[test]
1392    #[cfg_attr(tarpaulin, ignore)]
1393    fn benchmark_task_conditioned_compression() {
1394        // Keep this reasonably small so CI coverage instrumentation stays fast.
1395        let content = generate_benchmark_code(200);
1396        let full_tokens = count_tokens(&content);
1397        let task = Some("fix authentication in validate_token");
1398
1399        let (_full_output, full_tok) = process_mode(
1400            &content,
1401            "full",
1402            "F1",
1403            "server.rs",
1404            "rs",
1405            full_tokens,
1406            CrpMode::Off,
1407            "server.rs",
1408            task,
1409        );
1410        let (_task_output, task_tok) = process_mode(
1411            &content,
1412            "task",
1413            "F1",
1414            "server.rs",
1415            "rs",
1416            full_tokens,
1417            CrpMode::Off,
1418            "server.rs",
1419            task,
1420        );
1421        let (_sig_output, sig_tok) = process_mode(
1422            &content,
1423            "signatures",
1424            "F1",
1425            "server.rs",
1426            "rs",
1427            full_tokens,
1428            CrpMode::Off,
1429            "server.rs",
1430            task,
1431        );
1432        let (_ref_output, ref_tok) = process_mode(
1433            &content,
1434            "reference",
1435            "F1",
1436            "server.rs",
1437            "rs",
1438            full_tokens,
1439            CrpMode::Off,
1440            "server.rs",
1441            task,
1442        );
1443
1444        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1445        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1446        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1447        eprintln!(
1448            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1449            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1450        );
1451        eprintln!(
1452            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1453            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1454        );
1455        eprintln!(
1456            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1457            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1458        );
1459        eprintln!("================================================\n");
1460
1461        assert!(task_tok < full_tok, "task mode should save tokens");
1462        assert!(sig_tok < full_tok, "signatures should save tokens");
1463        assert!(ref_tok < sig_tok, "reference should be most compact");
1464    }
1465
1466    fn generate_benchmark_code(lines: usize) -> String {
1467        let mut code = Vec::with_capacity(lines);
1468        code.push("use std::collections::HashMap;".to_string());
1469        code.push("use crate::core::auth;".to_string());
1470        code.push(String::new());
1471        code.push("pub struct Server {".to_string());
1472        code.push("    config: Config,".to_string());
1473        code.push("    cache: HashMap<String, String>,".to_string());
1474        code.push("}".to_string());
1475        code.push(String::new());
1476        code.push("impl Server {".to_string());
1477        code.push(
1478            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1479                .to_string(),
1480        );
1481        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1482        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1483        code.push("            return Err(AuthError::Expired);".to_string());
1484        code.push("        }".to_string());
1485        code.push("        Ok(decoded.claims)".to_string());
1486        code.push("    }".to_string());
1487        code.push(String::new());
1488
1489        let remaining = lines.saturating_sub(code.len());
1490        for i in 0..remaining {
1491            if i % 30 == 0 {
1492                code.push(format!(
1493                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1494                ));
1495            } else if i % 30 == 29 {
1496                code.push("    }".to_string());
1497            } else {
1498                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1499            }
1500        }
1501        code.push("}".to_string());
1502        code.join("\n")
1503    }
1504
1505    #[test]
1506    fn instruction_file_detection() {
1507        assert!(is_instruction_file(
1508            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1509        ));
1510        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1511        assert!(is_instruction_file("/project/AGENTS.md"));
1512        assert!(is_instruction_file("/project/.cursorrules"));
1513        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1514        assert!(is_instruction_file("/skills/some-skill/README.md"));
1515
1516        assert!(!is_instruction_file("/project/src/main.rs"));
1517        assert!(!is_instruction_file("/project/config.json"));
1518        assert!(!is_instruction_file("/project/data/report.csv"));
1519    }
1520
1521    #[test]
1522    fn resolve_auto_mode_returns_full_for_instruction_files() {
1523        let mode = resolve_auto_mode(
1524            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1525            5000,
1526            Some("read"),
1527        );
1528        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1529
1530        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1531        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1532
1533        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1534        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1535    }
1536}