Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13/// Pre-counted read output carrying the output string, resolved mode,
14/// and token count computed during mode processing.
15pub struct ReadOutput {
16    pub content: String,
17    pub resolved_mode: String,
18    /// Approximate output token count from mode processing.
19    /// The dispatch layer recounts the final assembled string for accurate savings.
20    pub output_tokens: usize,
21}
22
23const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
24
25const CACHEABLE_MODES: &[&str] = &["map", "signatures"];
26
27fn is_cacheable_mode(mode: &str) -> bool {
28    CACHEABLE_MODES.contains(&mode)
29}
30
31fn compressed_cache_key(mode: &str, crp_mode: CrpMode, task: Option<&str>) -> String {
32    let base = if crp_mode.is_tdd() {
33        format!("{mode}:tdd")
34    } else {
35        mode.to_string()
36    };
37    // map/signatures output now embeds a task-relevant body, so task-aware and
38    // task-free variants must cache under distinct keys.
39    match task.map(str::trim).filter(|t| !t.is_empty()) {
40        Some(t) => {
41            use std::hash::{Hash, Hasher};
42            let mut h = std::collections::hash_map::DefaultHasher::new();
43            t.hash(&mut h);
44            format!("{base}:t{:x}", h.finish())
45        }
46        None => base,
47    }
48}
49
50/// Extracts a short proof-line from file content to include in cache-hit stubs.
51/// Returns the first non-empty line (truncated to 60 chars) as evidence the cache is valid.
52/// Only shown after 2+ reads to avoid noise on early interactions.
53fn cache_hit_proof_line(content: &str, read_count: u32) -> Option<String> {
54    if read_count < 2 {
55        return None;
56    }
57    let first_line = content.lines().find(|l| !l.trim().is_empty())?;
58    let trimmed = first_line.trim();
59    if trimmed.len() > 60 {
60        let mut end = 57;
61        while end > 0 && !trimmed.is_char_boundary(end) {
62            end -= 1;
63        }
64        Some(format!("{}...", &trimmed[..end]))
65    } else {
66        Some(trimmed.to_string())
67    }
68}
69
70fn append_compressed_hint(output: &str, file_path: &str) -> String {
71    if !crate::core::profiles::active_profile()
72        .output_hints
73        .compressed_hint()
74    {
75        return output.to_string();
76    }
77    format!(
78        "{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\") | ctx_retrieve(\"{file_path}\")"
79    )
80}
81
82/// Reads a file as UTF-8 with lossy fallback, enforcing binary detection and max read size limit.
83/// Defense-in-depth: verifies that the canonical path stays within the process's project root
84/// (if determinable) even though callers SHOULD have already jail-checked the path.
85pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
86    if crate::core::binary_detect::is_binary_file(path) {
87        let msg = crate::core::binary_detect::binary_file_message(path);
88        return Err(std::io::Error::other(msg));
89    }
90
91    {
92        let canonical =
93            crate::core::pathutil::safe_canonicalize_bounded(std::path::Path::new(path), 2000);
94        if let Ok(cwd) = std::env::current_dir() {
95            let root = crate::core::pathutil::safe_canonicalize_bounded(&cwd, 2000);
96            if !canonical.starts_with(&root) {
97                let allow = crate::core::pathjail::allow_paths_from_env_and_config();
98                let data_dir_ok = crate::core::data_dir::lean_ctx_data_dir()
99                    .ok()
100                    .is_some_and(|d| canonical.starts_with(d));
101                let tmp_ok = canonical.starts_with(std::env::temp_dir());
102                if !allow.iter().any(|a| canonical.starts_with(a)) && !data_dir_ok && !tmp_ok {
103                    tracing::warn!(
104                        "defense-in-depth: path may escape project root: {}",
105                        canonical.display()
106                    );
107                }
108            }
109        }
110    }
111
112    let cap = crate::core::limits::max_read_bytes();
113
114    let file = open_with_retry(path)?;
115    let meta = file
116        .metadata()
117        .map_err(|e| std::io::Error::other(format!("cannot stat open file descriptor: {e}")))?;
118    if meta.len() > cap as u64 {
119        return Err(std::io::Error::other(format!(
120            "file too large ({} bytes, limit {} bytes via LCTX_MAX_READ_BYTES). \
121             Increase the limit or use a line-range read: mode=\"lines:1-100\"",
122            meta.len(),
123            cap
124        )));
125    }
126
127    use std::io::Read;
128    let mut bytes = Vec::with_capacity(meta.len() as usize);
129    std::io::BufReader::new(file).read_to_end(&mut bytes)?;
130    match String::from_utf8(bytes) {
131        Ok(s) => Ok(s),
132        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
133    }
134}
135
136/// Opens a file, retrying once after a brief pause on NotFound.
137/// Works around overlay/FUSE stat-cache races in container runtimes (Docker, Codex).
138/// Uses O_NOFOLLOW on Unix for TOCTOU symlink protection.
139fn open_with_retry(path: &str) -> Result<std::fs::File, std::io::Error> {
140    match open_nofollow(path) {
141        Ok(f) => Ok(f),
142        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
143            std::thread::sleep(std::time::Duration::from_millis(50));
144            open_nofollow(path).map_err(|e| {
145                if e.kind() == std::io::ErrorKind::NotFound {
146                    std::io::Error::other(format!(
147                        "file not found: {path} — verify the path with ctx_tree or ctx_search"
148                    ))
149                } else {
150                    e
151                }
152            })
153        }
154        Err(e) => Err(e),
155    }
156}
157
158#[cfg(unix)]
159fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
160    use std::os::unix::fs::OpenOptionsExt;
161    use std::path::Path;
162
163    let p = Path::new(path);
164    // Canonicalize the parent directory (resolving symlinks in the directory path)
165    // but apply O_NOFOLLOW only to the final file component. This prevents
166    // symlink-following attacks on the target file while allowing legitimate
167    // directory symlinks (e.g., /tmp → /private/tmp on macOS).
168    if let (Some(parent), Some(filename)) = (p.parent(), p.file_name()) {
169        if parent.exists() {
170            let canonical_parent = crate::core::pathutil::safe_canonicalize_bounded(parent, 2000);
171            let canonical_path = canonical_parent.join(filename);
172            return std::fs::OpenOptions::new()
173                .read(true)
174                .custom_flags(libc::O_NOFOLLOW)
175                .open(&canonical_path);
176        }
177    }
178
179    // Fallback: direct open with O_NOFOLLOW
180    std::fs::OpenOptions::new()
181        .read(true)
182        .custom_flags(libc::O_NOFOLLOW)
183        .open(path)
184}
185
186#[cfg(not(unix))]
187fn open_nofollow(path: &str) -> Result<std::fs::File, std::io::Error> {
188    std::fs::File::open(path)
189}
190
191/// Reads a file through the cache and applies the requested compression mode.
192pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
193    handle_with_options(cache, path, mode, false, crp_mode, None)
194}
195
196/// Like `handle`, but invalidates the cache first to force a fresh disk read.
197pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
198    handle_with_options(cache, path, mode, true, crp_mode, None)
199}
200
201/// Reads a file with task-aware filtering to prioritize task-relevant content.
202pub fn handle_with_task(
203    cache: &mut SessionCache,
204    path: &str,
205    mode: &str,
206    crp_mode: CrpMode,
207    task: Option<&str>,
208) -> String {
209    handle_with_options(cache, path, mode, false, crp_mode, task)
210}
211
212/// Like `handle_with_task`, also returns the resolved mode name and pre-counted tokens.
213pub fn handle_with_task_resolved(
214    cache: &mut SessionCache,
215    path: &str,
216    mode: &str,
217    crp_mode: CrpMode,
218    task: Option<&str>,
219) -> ReadOutput {
220    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
221}
222
223/// Fresh read with task-aware filtering (invalidates cache first).
224pub fn handle_fresh_with_task(
225    cache: &mut SessionCache,
226    path: &str,
227    mode: &str,
228    crp_mode: CrpMode,
229    task: Option<&str>,
230) -> String {
231    handle_with_options(cache, path, mode, true, crp_mode, task)
232}
233
234/// Fresh read with task-aware filtering, also returns the resolved mode name and pre-counted tokens.
235pub fn handle_fresh_with_task_resolved(
236    cache: &mut SessionCache,
237    path: &str,
238    mode: &str,
239    crp_mode: CrpMode,
240    task: Option<&str>,
241) -> ReadOutput {
242    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
243}
244
245fn handle_with_options(
246    cache: &mut SessionCache,
247    path: &str,
248    mode: &str,
249    fresh: bool,
250    crp_mode: CrpMode,
251    task: Option<&str>,
252) -> String {
253    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).content
254}
255
256/// Detects if the current execution context is a subagent (forked agent).
257/// Subagents inherit stale parent caches, so force-fresh prevents VERIFY FAIL.
258fn is_subagent_context() -> bool {
259    static IS_SUBAGENT: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
260    *IS_SUBAGENT.get_or_init(|| {
261        if std::env::var("LEAN_CTX_FORCE_FRESH").is_ok_and(|v| v == "1" || v == "true") {
262            return true;
263        }
264        std::env::var("CURSOR_TASK_ID").is_ok_and(|v| !v.is_empty())
265    })
266}
267
268fn handle_with_options_resolved(
269    cache: &mut SessionCache,
270    path: &str,
271    mode: &str,
272    fresh: bool,
273    crp_mode: CrpMode,
274    task: Option<&str>,
275) -> ReadOutput {
276    let effective_fresh = fresh || is_subagent_context();
277
278    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
279        bt.next_seq();
280    }
281    let mut result = handle_with_options_inner(cache, path, mode, effective_fresh, crp_mode, task);
282
283    if let Some(entry) = cache.get_mut(path) {
284        entry.last_mode.clone_from(&result.resolved_mode);
285    }
286
287    let dedup_allowed = matches!(
288        result.resolved_mode.as_str(),
289        "map" | "signatures" | "aggressive" | "entropy" | "task"
290    );
291    if dedup_allowed {
292        if let Some(deduped) = cache.apply_dedup(path, &result.content) {
293            let new_tokens = count_tokens(&deduped);
294            if new_tokens < result.output_tokens {
295                result.content = deduped;
296                result.output_tokens = new_tokens;
297            }
298        }
299    }
300
301    if let Ok(mut bt) = crate::core::bounce_tracker::global().lock() {
302        let original_tokens = cache.get(path).map_or(0, |e| e.original_tokens);
303        bt.record_read(
304            path,
305            &result.resolved_mode,
306            result.output_tokens,
307            original_tokens,
308        );
309    }
310
311    result
312}
313
314fn handle_with_options_inner(
315    cache: &mut SessionCache,
316    path: &str,
317    mode: &str,
318    fresh: bool,
319    crp_mode: CrpMode,
320    task: Option<&str>,
321) -> ReadOutput {
322    let file_ref = cache.get_file_ref(path);
323    let short = protocol::shorten_path(path);
324    let ext = Path::new(path)
325        .extension()
326        .and_then(|e| e.to_str())
327        .unwrap_or("");
328
329    if fresh {
330        if mode == "diff" {
331            let warning = "[warning] fresh+diff is redundant — fresh invalidates cache, no diff possible. Use mode=full with fresh=true instead.";
332            return ReadOutput {
333                content: warning.to_string(),
334                resolved_mode: "diff".into(),
335                output_tokens: count_tokens(warning),
336            };
337        }
338        cache.invalidate(path);
339    }
340
341    if mode == "diff" {
342        let (out, _) = handle_diff(cache, path, &file_ref);
343        let out = crate::core::redaction::redact_text_if_enabled(&out);
344        let sent = count_tokens(&out);
345        return ReadOutput {
346            content: out,
347            resolved_mode: "diff".into(),
348            output_tokens: sent,
349        };
350    }
351
352    if mode != "full" {
353        if let Some(existing) = cache.get(path) {
354            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
355            if stale {
356                cache.invalidate(path);
357            }
358        }
359    }
360
361    // Extract immutable data from cache entry, then drop the borrow before
362    // any mutable operations (record_cache_hit, set_compressed, invalidate).
363    let cache_snapshot = cache.get(path).map(|existing| {
364        (
365            existing.stored_mtime,
366            existing.read_count,
367            existing.line_count,
368            existing.original_tokens,
369            existing.content(),
370        )
371    });
372
373    if let Some((cached_mtime, read_count, line_count, original_tokens, content_opt)) =
374        cache_snapshot
375    {
376        if mode == "full" {
377            let no_deg = crate::core::config::Config::load().no_degrade_effective();
378            let prof = crate::core::profiles::active_profile();
379            let force_full = no_deg
380                || (prof.read.default_mode_effective() == "full"
381                    && prof.compression.crp_mode_effective() == "off");
382            let policy_allows_stub =
383                crate::server::compaction_sync::effective_cache_policy() != "safe" && !force_full;
384            if policy_allows_stub
385                && !crate::core::cache::is_cache_entry_stale(path, cached_mtime)
386                && cache.is_full_delivered(path)
387            {
388                cache.record_cache_hit(path);
389                let out = if crate::core::protocol::meta_visible() {
390                    format!(
391                        "{file_ref}={short} [unchanged {line_count}L]\nUnchanged on disk. Use fresh=true to force re-read.",
392                        )
393                } else {
394                    let proof = content_opt
395                        .as_deref()
396                        .and_then(|c| cache_hit_proof_line(c, read_count));
397                    let reads_note = if read_count > 3 {
398                        format!(" (read {}x)", read_count + 1)
399                    } else {
400                        String::new()
401                    };
402                    match proof {
403                        Some(p) => format!(
404                            "{file_ref}={short} [unchanged {line_count}L{reads_note} | \"{p}\"]"
405                        ),
406                        None => format!("{file_ref}={short} [unchanged {line_count}L{reads_note}]"),
407                    }
408                };
409                let out = crate::core::redaction::redact_text_if_enabled(&out);
410                let sent = count_tokens(&out);
411                return ReadOutput {
412                    content: out,
413                    resolved_mode: "full".into(),
414                    output_tokens: sent,
415                };
416            }
417            let (out, _) = handle_full_with_auto_delta(cache, path, &file_ref, &short, ext, task);
418            let out = crate::core::redaction::redact_text_if_enabled(&out);
419            let sent = count_tokens(&out);
420            return ReadOutput {
421                content: out,
422                resolved_mode: "full".into(),
423                output_tokens: sent,
424            };
425        }
426
427        // Resolve mode first so we can check compressed output cache BEFORE
428        // decompressing the full content (avoids ~2-5ms zstd overhead on hits).
429        let resolved_mode = if mode == "auto" {
430            resolve_auto_mode(path, original_tokens, task)
431        } else {
432            mode.to_string()
433        };
434
435        if is_cacheable_mode(&resolved_mode) {
436            let cache_key = compressed_cache_key(&resolved_mode, crp_mode, task);
437            let compressed_hit = cache.get_compressed(path, &cache_key).cloned();
438            if let Some(cached_output) = compressed_hit {
439                cache.record_cache_hit(path);
440                let out = crate::core::redaction::redact_text_if_enabled(&cached_output);
441                let sent = count_tokens(&out);
442                return ReadOutput {
443                    content: out,
444                    resolved_mode,
445                    output_tokens: sent,
446                };
447            }
448        }
449
450        if let Some(content) = content_opt {
451            let (out, _) = process_mode(
452                &content,
453                &resolved_mode,
454                &file_ref,
455                &short,
456                ext,
457                original_tokens,
458                crp_mode,
459                path,
460                task,
461            );
462            if is_cacheable_mode(&resolved_mode) {
463                let cache_key = compressed_cache_key(&resolved_mode, crp_mode, task);
464                cache.set_compressed(path, &cache_key, out.clone());
465            }
466            let out = crate::core::redaction::redact_text_if_enabled(&out);
467            let sent = count_tokens(&out);
468            return ReadOutput {
469                content: out,
470                resolved_mode,
471                output_tokens: sent,
472            };
473        }
474        cache.invalidate(path);
475    }
476
477    let content = match read_file_lossy(path) {
478        Ok(c) => c,
479        Err(e) => {
480            let msg = format!("ERROR: {e}");
481            let tokens = count_tokens(&msg);
482            return ReadOutput {
483                content: msg,
484                resolved_mode: "error".into(),
485                output_tokens: tokens,
486            };
487        }
488    };
489
490    let store_result = cache.store(path, &content);
491
492    // Skip expensive hint computation for line-range reads and first reads.
493    // Hints are only useful from the 2nd read onwards when the file is contextually relevant.
494    let is_line_range = mode.starts_with("lines:");
495    let hints = crate::core::profiles::active_profile().output_hints;
496    let is_repeat_read = store_result.read_count > 1;
497    let similar_hint = if !is_line_range && is_repeat_read && hints.semantic_hint() {
498        find_similar_and_update_semantic_index(path, &content)
499    } else {
500        None
501    };
502    let graph_hint = if !is_line_range && is_repeat_read && hints.related_hint() {
503        build_graph_related_hint(path)
504    } else {
505        None
506    };
507
508    if mode == "full" {
509        cache.mark_full_delivered(path);
510        let (mut output, _) = format_full_output(
511            &file_ref,
512            &short,
513            ext,
514            &content,
515            store_result.original_tokens,
516            store_result.line_count,
517            task,
518        );
519        if let Some(hint) = &graph_hint {
520            output.push_str(&format!("\n{hint}"));
521        }
522        if let Some(hint) = similar_hint {
523            output.push_str(&format!("\n{hint}"));
524        }
525        let output = crate::core::redaction::redact_text_if_enabled(&output);
526        let sent = count_tokens(&output);
527        return ReadOutput {
528            content: output,
529            resolved_mode: "full".into(),
530            output_tokens: sent,
531        };
532    }
533
534    let resolved_mode = if mode == "auto" {
535        resolve_auto_mode(path, store_result.original_tokens, task)
536    } else {
537        mode.to_string()
538    };
539
540    let (mut output, _sent) = process_mode(
541        &content,
542        &resolved_mode,
543        &file_ref,
544        &short,
545        ext,
546        store_result.original_tokens,
547        crp_mode,
548        path,
549        task,
550    );
551    if let Some(hint) = &graph_hint {
552        output.push_str(&format!("\n{hint}"));
553    }
554    if let Some(hint) = similar_hint {
555        output.push_str(&format!("\n{hint}"));
556    }
557    if is_cacheable_mode(&resolved_mode) {
558        let cache_key = compressed_cache_key(&resolved_mode, crp_mode, task);
559        cache.set_compressed(path, &cache_key, output.clone());
560    }
561    let output = crate::core::redaction::redact_text_if_enabled(&output);
562    let final_tokens = count_tokens(&output);
563    ReadOutput {
564        content: output,
565        resolved_mode,
566        output_tokens: final_tokens,
567    }
568}
569
570pub fn is_instruction_file(path: &str) -> bool {
571    let lower = path.to_lowercase();
572    let filename = std::path::Path::new(&lower)
573        .file_name()
574        .and_then(|f| f.to_str())
575        .unwrap_or("");
576
577    matches!(
578        filename,
579        "skill.md"
580            | "agents.md"
581            | "rules.md"
582            | ".cursorrules"
583            | ".clinerules"
584            | "lean-ctx.md"
585            | "lean-ctx.mdc"
586    ) || lower.contains("/skills/")
587        || lower.contains("/.cursor/rules/")
588        || lower.contains("/.claude/rules/")
589        || lower.contains("/agents.md")
590}
591
592/// Delegates to the unified `auto_mode_resolver::resolve()`.
593fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
594    let ctx = crate::core::auto_mode_resolver::AutoModeContext {
595        path: file_path,
596        token_count: original_tokens,
597        task,
598        cache: None,
599    };
600    crate::core::auto_mode_resolver::resolve(&ctx).mode
601}
602
603fn find_similar_and_update_semantic_index(path: &str, content: &str) -> Option<String> {
604    const MAX_CONTENT_BYTES_FOR_SEMANTIC: usize = 32_768;
605
606    if content.len() > MAX_CONTENT_BYTES_FOR_SEMANTIC {
607        return None;
608    }
609
610    let cfg = crate::core::config::Config::load();
611    let profile = crate::core::config::MemoryProfile::effective(&cfg);
612    if !profile.semantic_cache_enabled() {
613        return None;
614    }
615
616    let project_root = detect_project_root(path);
617    let session_id = format!("{}", std::process::id());
618    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
619
620    let similar = index.find_similar(content, 0.7);
621    let relevant: Vec<_> = similar
622        .into_iter()
623        .filter(|(p, _)| p != path)
624        .take(3)
625        .collect();
626
627    index.add_file(path, content, &session_id);
628    if let Err(e) = index.save(&project_root) {
629        tracing::warn!("lean-ctx: failed to persist semantic index: {e}");
630    }
631
632    if relevant.is_empty() {
633        return None;
634    }
635
636    let hints: Vec<String> = relevant
637        .iter()
638        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
639        .collect();
640
641    Some(format!(
642        "[semantic: {} similar file(s) in cache]\n{}",
643        relevant.len(),
644        hints.join("\n")
645    ))
646}
647
648fn detect_project_root(path: &str) -> String {
649    crate::core::protocol::detect_project_root_or_cwd(path)
650}
651
652fn build_graph_related_hint(path: &str) -> Option<String> {
653    let project_root = detect_project_root(path);
654    crate::core::graph_context::build_related_hint(path, &project_root, 5)
655}
656
657const AUTO_DELTA_THRESHOLD: f64 = 0.6;
658
659/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
660fn handle_full_with_auto_delta(
661    cache: &mut SessionCache,
662    path: &str,
663    file_ref: &str,
664    short: &str,
665    ext: &str,
666    task: Option<&str>,
667) -> (String, usize) {
668    let _mode_guard = crate::core::savings_footer::ModeGuard::new("full");
669    let Ok(disk_content) = read_file_lossy(path) else {
670        cache.record_cache_hit(path);
671        if let Some(existing) = cache.get(path) {
672            if !crate::core::protocol::meta_visible() {
673                if let Some(cached) = existing.content() {
674                    return format_full_output(
675                        file_ref,
676                        short,
677                        ext,
678                        &cached,
679                        existing.original_tokens,
680                        existing.line_count,
681                        task,
682                    );
683                }
684            }
685            let out = format!(
686                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
687                existing.read_count, existing.line_count
688            );
689            let sent = count_tokens(&out);
690            return (out, sent);
691        }
692        let out = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
693            format!("[file read failed and no cached version available] {file_ref}={short}")
694        } else {
695            format!("[file read failed and no cached version available] {short}")
696        };
697        let sent = count_tokens(&out);
698        return (out, sent);
699    };
700
701    let no_deg = crate::core::config::Config::load().no_degrade_effective();
702    let prof = crate::core::profiles::active_profile();
703    let force_full = no_deg
704        || (prof.read.default_mode_effective() == "full"
705            && prof.compression.crp_mode_effective() == "off");
706
707    let old_content = cache
708        .get(path)
709        .and_then(crate::core::cache::CacheEntry::content)
710        .unwrap_or_default();
711    let store_result = cache.store(path, &disk_content);
712
713    if store_result.was_hit {
714        let policy_allows_stub =
715            crate::server::compaction_sync::effective_cache_policy() != "safe" && !force_full;
716        if policy_allows_stub && store_result.full_content_delivered {
717            let out = if crate::core::protocol::meta_visible() {
718                format!(
719                    "{file_ref}={short} [unchanged {}L]\nUnchanged on disk. Use fresh=true to force re-read.",
720                    store_result.line_count
721                )
722            } else {
723                let proof = cache_hit_proof_line(&disk_content, store_result.read_count);
724                let reads_note = if store_result.read_count > 3 {
725                    format!(" (read {}x)", store_result.read_count)
726                } else {
727                    String::new()
728                };
729                match proof {
730                    Some(p) => format!(
731                        "{file_ref}={short} [unchanged {}L{reads_note} | \"{p}\"]",
732                        store_result.line_count
733                    ),
734                    None => format!(
735                        "{file_ref}={short} [unchanged {}L{reads_note}]",
736                        store_result.line_count
737                    ),
738                }
739            };
740            let sent = count_tokens(&out);
741            return (out, sent);
742        }
743        cache.mark_full_delivered(path);
744        return format_full_output(
745            file_ref,
746            short,
747            ext,
748            &disk_content,
749            store_result.original_tokens,
750            store_result.line_count,
751            task,
752        );
753    }
754
755    let diff = compressor::diff_content(&old_content, &disk_content);
756    let diff_tokens = count_tokens(&diff);
757    let full_tokens = store_result.original_tokens;
758
759    if !force_full
760        && full_tokens > 0
761        && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD)
762    {
763        let savings = protocol::format_savings(full_tokens, diff_tokens);
764        let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
765            format!("{file_ref}={short}")
766        } else {
767            short.to_string()
768        };
769        let out = format!(
770            "{head} [auto-delta] ∆{}L\n{diff}\n{savings}",
771            disk_content.lines().count()
772        );
773        return (out, diff_tokens);
774    }
775
776    format_full_output(
777        file_ref,
778        short,
779        ext,
780        &disk_content,
781        store_result.original_tokens,
782        store_result.line_count,
783        task,
784    )
785}
786
787fn format_full_output(
788    file_ref: &str,
789    short: &str,
790    ext: &str,
791    content: &str,
792    original_tokens: usize,
793    line_count: usize,
794    _task: Option<&str>,
795) -> (String, usize) {
796    let _mode_guard = crate::core::savings_footer::ModeGuard::new("full");
797    let tokens = original_tokens;
798    let metadata = build_header(file_ref, short, ext, content, line_count, true);
799
800    let output = format!("{metadata}\n{content}");
801    let sent = count_tokens(&output);
802    (protocol::append_savings(&output, tokens, sent), sent)
803}
804
805fn build_header(
806    file_ref: &str,
807    short: &str,
808    ext: &str,
809    content: &str,
810    line_count: usize,
811    include_deps: bool,
812) -> String {
813    let mut header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
814        format!("{file_ref}={short} {line_count}L")
815    } else {
816        format!("{short} {line_count}L")
817    };
818
819    if include_deps {
820        let dep_info = deps::extract_deps(content, ext);
821        if !dep_info.imports.is_empty() {
822            let imports_str: Vec<&str> = dep_info
823                .imports
824                .iter()
825                .take(8)
826                .map(std::string::String::as_str)
827                .collect();
828            header.push_str(&format!("\n deps {}", imports_str.join(",")));
829        }
830        if !dep_info.exports.is_empty() {
831            let exports_str: Vec<&str> = dep_info
832                .exports
833                .iter()
834                .take(8)
835                .map(std::string::String::as_str)
836                .collect();
837            header.push_str(&format!("\n exports {}", exports_str.join(",")));
838        }
839    }
840
841    header
842}
843
844#[allow(clippy::too_many_arguments)]
845fn process_mode(
846    content: &str,
847    mode: &str,
848    file_ref: &str,
849    short: &str,
850    ext: &str,
851    original_tokens: usize,
852    crp_mode: CrpMode,
853    file_path: &str,
854    task: Option<&str>,
855) -> (String, usize) {
856    let _mode_guard = crate::core::savings_footer::ModeGuard::new(mode);
857    let line_count = content.lines().count();
858
859    match mode {
860        "auto" => {
861            let chosen = resolve_auto_mode(file_path, original_tokens, task);
862            process_mode(
863                content,
864                &chosen,
865                file_ref,
866                short,
867                ext,
868                original_tokens,
869                crp_mode,
870                file_path,
871                task,
872            )
873        }
874        "full" => format_full_output(
875            file_ref,
876            short,
877            ext,
878            content,
879            original_tokens,
880            line_count,
881            task,
882        ),
883        "signatures" => {
884            let sigs = signatures::extract_signatures(content, ext);
885            let dep_info = deps::extract_deps(content, ext);
886
887            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
888                format!("{file_ref}={short} {line_count}L")
889            } else {
890                format!("{short} {line_count}L")
891            };
892            if !dep_info.imports.is_empty() {
893                let imports_str: Vec<&str> = dep_info
894                    .imports
895                    .iter()
896                    .take(8)
897                    .map(std::string::String::as_str)
898                    .collect();
899                output.push_str(&format!("\n deps {}", imports_str.join(",")));
900            }
901            for sig in &sigs {
902                output.push('\n');
903                if crp_mode.is_tdd() {
904                    output.push_str(&sig.to_tdd());
905                } else {
906                    output.push_str(&sig.to_compact());
907                }
908            }
909            if let Some(body) = task_relevant_body(content, file_path, ext, task) {
910                output.push('\n');
911                output.push_str(&body);
912            }
913            let sent = count_tokens(&output);
914            (
915                append_compressed_hint(
916                    &protocol::append_savings(&output, original_tokens, sent),
917                    file_path,
918                ),
919                sent,
920            )
921        }
922        "map" => {
923            if ext == "php" {
924                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
925                {
926                    let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
927                        format!("{file_ref}={short} {line_count}L\n{php_map}")
928                    } else {
929                        format!("{short} {line_count}L\n{php_map}")
930                    };
931                    let sent = count_tokens(&output);
932                    let output = protocol::append_savings(&output, original_tokens, sent);
933                    return (append_compressed_hint(&output, file_path), sent);
934                }
935            }
936
937            let structured = match ext {
938                "md" | "mdx" | "rst" => {
939                    crate::core::structured_read::extract_markdown_outline(content)
940                }
941                "json" => crate::core::structured_read::extract_json_structure(content),
942                "yaml" | "yml" => crate::core::structured_read::extract_yaml_structure(content),
943                "toml" => crate::core::structured_read::extract_toml_structure(content),
944                _ if file_path.to_lowercase().ends_with(".lock")
945                    || file_path.to_lowercase().ends_with("go.sum") =>
946                {
947                    crate::core::structured_read::extract_lock_summary(content, file_path)
948                }
949                _ => String::new(),
950            };
951
952            if !structured.is_empty() {
953                let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
954                    format!("{file_ref}={short} {line_count}L\n{structured}")
955                } else {
956                    format!("{short} {line_count}L\n{structured}")
957                };
958                let sent = count_tokens(&output);
959                output = protocol::append_savings(&output, original_tokens, sent);
960                return (append_compressed_hint(&output, file_path), sent);
961            }
962
963            let sigs = signatures::extract_signatures(content, ext);
964            let dep_info = deps::extract_deps(content, ext);
965
966            let mut output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
967                format!("{file_ref}={short} {line_count}L")
968            } else {
969                format!("{short} {line_count}L")
970            };
971
972            if !dep_info.imports.is_empty() {
973                output.push_str("\n  deps: ");
974                output.push_str(&dep_info.imports.join(", "));
975            }
976
977            if !dep_info.exports.is_empty() {
978                output.push_str("\n  exports: ");
979                output.push_str(&dep_info.exports.join(", "));
980            }
981
982            let key_sigs: Vec<&signatures::Signature> = sigs
983                .iter()
984                .filter(|s| s.is_exported || s.indent == 0)
985                .collect();
986
987            if !key_sigs.is_empty() {
988                output.push_str("\n  API:");
989                for sig in &key_sigs {
990                    output.push_str("\n    ");
991                    if crp_mode.is_tdd() {
992                        output.push_str(&sig.to_tdd());
993                    } else {
994                        output.push_str(&sig.to_compact());
995                    }
996                }
997            }
998
999            if let Some(body) = task_relevant_body(content, file_path, ext, task) {
1000                output.push('\n');
1001                output.push_str(&body);
1002            }
1003
1004            let sent = count_tokens(&output);
1005            (
1006                append_compressed_hint(
1007                    &protocol::append_savings(&output, original_tokens, sent),
1008                    file_path,
1009                ),
1010                sent,
1011            )
1012        }
1013        "aggressive" => {
1014            #[cfg(feature = "tree-sitter")]
1015            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
1016            #[cfg(not(feature = "tree-sitter"))]
1017            let ast_pruned: Option<String> = None;
1018
1019            let base = ast_pruned.as_deref().unwrap_or(content);
1020
1021            let session_intent = crate::core::session::SessionState::load_latest()
1022                .and_then(|s| s.active_structured_intent);
1023            let raw = if let Some(ref intent) = session_intent {
1024                compressor::task_aware_compress(base, Some(ext), intent)
1025            } else {
1026                compressor::aggressive_compress(base, Some(ext))
1027            };
1028            let compressed = compressor::safeguard_ratio(content, &raw);
1029            let header = build_header(file_ref, short, ext, content, line_count, true);
1030
1031            let mut sym = SymbolMap::new();
1032            let idents = symbol_map::extract_identifiers(&compressed, ext);
1033            for ident in &idents {
1034                sym.register(ident);
1035            }
1036
1037            if symbol_map::substitution_enabled() && sym.len() >= 3 {
1038                let sym_table = sym.format_table();
1039                let sym_applied = sym.apply(&compressed);
1040                let orig_tok = count_tokens(&compressed);
1041                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
1042                let net = orig_tok.saturating_sub(comp_tok);
1043                if orig_tok > 0 && net * 100 / orig_tok >= 5 {
1044                    let savings = protocol::format_savings(original_tokens, comp_tok);
1045                    return (
1046                        append_compressed_hint(
1047                            &format!("{header}\n{sym_applied}{sym_table}\n{savings}"),
1048                            file_path,
1049                        ),
1050                        comp_tok,
1051                    );
1052                }
1053                let savings = protocol::format_savings(original_tokens, orig_tok);
1054                return (
1055                    append_compressed_hint(
1056                        &format!("{header}\n{compressed}\n{savings}"),
1057                        file_path,
1058                    ),
1059                    orig_tok,
1060                );
1061            }
1062
1063            let sent = count_tokens(&compressed);
1064            let savings = protocol::format_savings(original_tokens, sent);
1065            (
1066                append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path),
1067                sent,
1068            )
1069        }
1070        "entropy" => {
1071            let result = entropy::entropy_compress_adaptive(content, file_path);
1072            let avg_h = entropy::analyze_entropy(content).avg_entropy;
1073            let header = build_header(file_ref, short, ext, content, line_count, false);
1074            let techs = result.techniques.join(", ");
1075            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
1076            let sent = count_tokens(&output);
1077            let savings = protocol::format_savings(original_tokens, sent);
1078            let compression_ratio = if original_tokens > 0 {
1079                1.0 - (sent as f64 / original_tokens as f64)
1080            } else {
1081                0.0
1082            };
1083            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
1084            (
1085                append_compressed_hint(&format!("{output}\n{savings}"), file_path),
1086                sent,
1087            )
1088        }
1089        "task" => {
1090            let task_str = task.unwrap_or("");
1091            if task_str.is_empty() {
1092                let header = build_header(file_ref, short, ext, content, line_count, true);
1093                let out = format!("{header}\n{content}\n[task mode: no task set — returned full]");
1094                let sent = count_tokens(&out);
1095                return (out, sent);
1096            }
1097            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
1098            if keywords.is_empty() {
1099                let header = build_header(file_ref, short, ext, content, line_count, true);
1100                let out = format!(
1101                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
1102                );
1103                let sent = count_tokens(&out);
1104                return (out, sent);
1105            }
1106            let filtered =
1107                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
1108            let filtered_lines = filtered.lines().count();
1109            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1110                format!("{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1111            } else {
1112                format!("{short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]")
1113            };
1114            let graph_ctx = if crate::core::profiles::active_profile()
1115                .output_hints
1116                .graph_context_block()
1117            {
1118                let project_root = detect_project_root(file_path);
1119                crate::core::graph_context::build_graph_context(
1120                    file_path,
1121                    &project_root,
1122                    Some(crate::core::graph_context::GraphContextOptions::default()),
1123                )
1124                .map(|c| crate::core::graph_context::format_graph_context(&c))
1125                .unwrap_or_default()
1126            } else {
1127                String::new()
1128            };
1129
1130            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
1131            let savings = protocol::format_savings(original_tokens, sent);
1132            (
1133                append_compressed_hint(
1134                    &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
1135                    file_path,
1136                ),
1137                sent,
1138            )
1139        }
1140        "reference" => {
1141            let tok = count_tokens(content);
1142            let output = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1143                format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})")
1144            } else {
1145                format!("{short}: {line_count} lines, {tok} tok ({ext})")
1146            };
1147            let sent = count_tokens(&output);
1148            let savings = protocol::format_savings(original_tokens, sent);
1149            (format!("{output}\n{savings}"), sent)
1150        }
1151        mode if mode.starts_with("lines:") => {
1152            let range_str = &mode[6..];
1153            let extracted = extract_line_range(content, range_str);
1154            let header = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1155                format!("{file_ref}={short} {line_count}L lines:{range_str}")
1156            } else {
1157                format!("{short} {line_count}L lines:{range_str}")
1158            };
1159            let sent = count_tokens(&extracted);
1160            let savings = protocol::format_savings(original_tokens, sent);
1161            (format!("{header}\n{extracted}\n{savings}"), sent)
1162        }
1163        unknown => {
1164            let header = build_header(file_ref, short, ext, content, line_count, true);
1165            let out = format!(
1166                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
1167            );
1168            let sent = count_tokens(&out);
1169            (out, sent)
1170        }
1171    }
1172}
1173
1174/// When a task is active, find the symbol whose name best matches a task
1175/// keyword and return its body as numbered source lines (capped).
1176///
1177/// `map`/`signatures` stay compact but include the one symbol body the agent is
1178/// most likely about to read, avoiding a follow-up full read. Uses the
1179/// tree-sitter chunk extractor (which carries spans + body across languages); a
1180/// no-op when tree-sitter is unavailable.
1181fn task_relevant_body(
1182    content: &str,
1183    file_path: &str,
1184    ext: &str,
1185    task: Option<&str>,
1186) -> Option<String> {
1187    const MAX_BODY_LINES: usize = 80;
1188
1189    let task = task.map(str::trim).filter(|t| !t.is_empty())?;
1190    let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task);
1191    if keywords.is_empty() {
1192        return None;
1193    }
1194    let kw_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
1195
1196    let chunks = crate::core::chunks_ts::extract_chunks_ts(file_path, content, ext)?;
1197
1198    // Score: exact name match (2) beats substring overlap (1).
1199    let mut best_idx: Option<usize> = None;
1200    let mut best_score = 0u8;
1201    for (i, ch) in chunks.iter().enumerate() {
1202        if ch.symbol_name.is_empty() {
1203            continue;
1204        }
1205        let name_l = ch.symbol_name.to_lowercase();
1206        let substr = kw_lower
1207            .iter()
1208            .any(|k| k.len() >= 3 && (name_l.contains(k.as_str()) || k.contains(name_l.as_str())));
1209        let score = if kw_lower.contains(&name_l) {
1210            2
1211        } else {
1212            u8::from(substr)
1213        };
1214        if score > best_score {
1215            best_score = score;
1216            best_idx = Some(i);
1217        }
1218    }
1219
1220    let ch = &chunks[best_idx?];
1221    let body_lines: Vec<&str> = ch.content.lines().collect();
1222    let total = body_lines.len();
1223    let shown = total.min(MAX_BODY_LINES);
1224    let body: String = body_lines[..shown]
1225        .iter()
1226        .enumerate()
1227        .map(|(i, l)| format!("{:>4}|{l}", ch.start_line + i))
1228        .collect::<Vec<_>>()
1229        .join("\n");
1230    let truncated = if shown < total {
1231        format!(
1232            "\n  … +{} lines — ctx_read(mode=\"lines:{}-{}\")",
1233            total - shown,
1234            ch.start_line + shown,
1235            ch.end_line
1236        )
1237    } else {
1238        String::new()
1239    };
1240    Some(format!(
1241        "  ▸ body {} L{}-{}:\n{body}{truncated}",
1242        ch.symbol_name, ch.start_line, ch.end_line
1243    ))
1244}
1245
1246fn extract_line_range(content: &str, range_str: &str) -> String {
1247    let lines: Vec<&str> = content.lines().collect();
1248    let total = lines.len();
1249    let mut selected = Vec::new();
1250
1251    for part in range_str.split(',') {
1252        let part = part.trim();
1253        if let Some((start_s, end_s)) = part.split_once('-') {
1254            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
1255            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
1256            for i in start..=end {
1257                if i >= 1 && i <= total {
1258                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
1259                }
1260            }
1261        } else if let Ok(n) = part.parse::<usize>() {
1262            if n >= 1 && n <= total {
1263                selected.push(format!("{n:>4}| {}", lines[n - 1]));
1264            }
1265        }
1266    }
1267
1268    if selected.is_empty() {
1269        "No lines matched the range.".to_string()
1270    } else {
1271        selected.join("\n")
1272    }
1273}
1274
1275fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> (String, usize) {
1276    let _mode_guard = crate::core::savings_footer::ModeGuard::new("diff");
1277    let short = protocol::shorten_path(path);
1278    let old_content = cache
1279        .get(path)
1280        .and_then(crate::core::cache::CacheEntry::content);
1281
1282    let new_content = match read_file_lossy(path) {
1283        Ok(c) => c,
1284        Err(e) => {
1285            let msg = format!("ERROR: {e}");
1286            let tokens = count_tokens(&msg);
1287            return (msg, tokens);
1288        }
1289    };
1290
1291    let original_tokens = count_tokens(&new_content);
1292
1293    let diff_output = if let Some(old) = &old_content {
1294        compressor::diff_content(old, &new_content)
1295    } else {
1296        // No previous version cached — store content for future diffs but
1297        // return a short guidance message instead of dumping the full file.
1298        cache.store(path, &new_content);
1299        let msg = format!(
1300            "{file_ref}={short} [no cached version for diff — use mode=full first, then diff on re-read]"
1301        );
1302        let sent = count_tokens(&msg);
1303        return (msg, sent);
1304    };
1305
1306    cache.store(path, &new_content);
1307
1308    let sent = count_tokens(&diff_output);
1309    let savings = protocol::format_savings(original_tokens, sent);
1310    let head = if crate::core::protocol::meta_visible() && !file_ref.is_empty() {
1311        format!("{file_ref}={short}")
1312    } else {
1313        short.clone()
1314    };
1315    (format!("{head} [diff]\n{diff_output}\n{savings}"), sent)
1316}
1317
1318#[cfg(test)]
1319mod tests {
1320    use super::*;
1321    use std::time::Duration;
1322
1323    #[test]
1324    fn test_header_toon_format_no_brackets() {
1325        let _lock = crate::core::data_dir::test_env_lock();
1326        std::env::set_var("LEAN_CTX_META", "1");
1327        let content = "use std::io;\nfn main() {}\n";
1328        let header = build_header("F1", "main.rs", "rs", content, 2, false);
1329        assert!(!header.contains('['));
1330        assert!(!header.contains(']'));
1331        assert!(header.contains("F1=main.rs 2L"));
1332        std::env::remove_var("LEAN_CTX_META");
1333    }
1334
1335    #[test]
1336    fn test_header_toon_deps_indented() {
1337        let _lock = crate::core::data_dir::test_env_lock();
1338        std::env::set_var("LEAN_CTX_META", "1");
1339        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
1340        let header = build_header("F1", "main.rs", "rs", content, 3, true);
1341        if header.contains("deps") {
1342            assert!(
1343                header.contains("\n deps "),
1344                "deps should use indented TOON format"
1345            );
1346            assert!(
1347                !header.contains("deps:["),
1348                "deps should not use bracket format"
1349            );
1350        }
1351        std::env::remove_var("LEAN_CTX_META");
1352    }
1353
1354    #[test]
1355    fn test_header_toon_saves_tokens() {
1356        let _lock = crate::core::data_dir::test_env_lock();
1357        std::env::set_var("LEAN_CTX_META", "1");
1358        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
1359        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
1360        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
1361        let old_tokens = count_tokens(&old_header);
1362        let new_tokens = count_tokens(&new_header);
1363        assert!(
1364            new_tokens <= old_tokens,
1365            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
1366        );
1367        std::env::remove_var("LEAN_CTX_META");
1368    }
1369
1370    #[test]
1371    fn test_tdd_symbols_are_compact() {
1372        let symbols = [
1373            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
1374        ];
1375        for sym in &symbols {
1376            let tok = count_tokens(sym);
1377            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
1378        }
1379    }
1380
1381    #[test]
1382    fn test_task_mode_filters_content() {
1383        let content = (0..200)
1384            .map(|i| {
1385                if i % 20 == 0 {
1386                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
1387                } else {
1388                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
1389                }
1390            })
1391            .collect::<Vec<_>>()
1392            .join("\n");
1393        let full_tokens = count_tokens(&content);
1394        let task = Some("fix bug in validate_token");
1395        let (result, result_tokens) = process_mode(
1396            &content,
1397            "task",
1398            "F1",
1399            "test.rs",
1400            "rs",
1401            full_tokens,
1402            CrpMode::Off,
1403            "test.rs",
1404            task,
1405        );
1406        assert!(
1407            result_tokens < full_tokens,
1408            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
1409        );
1410        assert!(
1411            result.contains("task-filtered"),
1412            "output should contain task-filtered marker"
1413        );
1414    }
1415
1416    #[test]
1417    fn test_task_mode_without_task_returns_full() {
1418        let content = "fn main() {}\nfn helper() {}\n";
1419        let tokens = count_tokens(content);
1420        let (result, _sent) = process_mode(
1421            content,
1422            "task",
1423            "F1",
1424            "test.rs",
1425            "rs",
1426            tokens,
1427            CrpMode::Off,
1428            "test.rs",
1429            None,
1430        );
1431        assert!(
1432            result.contains("no task set"),
1433            "should indicate no task: {result}"
1434        );
1435    }
1436
1437    #[test]
1438    fn test_reference_mode_one_line() {
1439        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
1440        let tokens = count_tokens(content);
1441        let (result, _sent) = process_mode(
1442            content,
1443            "reference",
1444            "F1",
1445            "test.rs",
1446            "rs",
1447            tokens,
1448            CrpMode::Off,
1449            "test.rs",
1450            None,
1451        );
1452        let lines: Vec<&str> = result.lines().collect();
1453        assert!(
1454            lines.len() <= 3,
1455            "reference mode should be very compact, got {} lines",
1456            lines.len()
1457        );
1458        assert!(result.contains("lines"), "should contain line count");
1459        assert!(result.contains("tok"), "should contain token count");
1460    }
1461
1462    #[test]
1463    fn cached_lines_mode_invalidates_on_mtime_change() {
1464        let dir = tempfile::tempdir().unwrap();
1465        let path = dir.path().join("file.txt");
1466        let p = path.to_string_lossy().to_string();
1467
1468        std::fs::write(&path, "one\nsecond\n").unwrap();
1469        let mut cache = SessionCache::new();
1470
1471        let r1 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1472        let l1: Vec<&str> = r1.content.lines().collect();
1473        let got1 = l1.get(1).copied().unwrap_or_default().trim();
1474        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
1475        assert_eq!(got1, "one");
1476
1477        std::thread::sleep(Duration::from_secs(1));
1478        std::fs::write(&path, "two\nsecond\n").unwrap();
1479
1480        let r2 = handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
1481        let l2: Vec<&str> = r2.content.lines().collect();
1482        let got2 = l2.get(1).copied().unwrap_or_default().trim();
1483        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
1484        assert_eq!(got2, "two");
1485    }
1486
1487    #[test]
1488    #[cfg_attr(tarpaulin, ignore)]
1489    fn benchmark_task_conditioned_compression() {
1490        // Keep this reasonably small so CI coverage instrumentation stays fast.
1491        let content = generate_benchmark_code(200);
1492        let full_tokens = count_tokens(&content);
1493        let task = Some("fix authentication in validate_token");
1494
1495        let (_full_output, full_tok) = process_mode(
1496            &content,
1497            "full",
1498            "F1",
1499            "server.rs",
1500            "rs",
1501            full_tokens,
1502            CrpMode::Off,
1503            "server.rs",
1504            task,
1505        );
1506        let (_task_output, task_tok) = process_mode(
1507            &content,
1508            "task",
1509            "F1",
1510            "server.rs",
1511            "rs",
1512            full_tokens,
1513            CrpMode::Off,
1514            "server.rs",
1515            task,
1516        );
1517        let (_sig_output, sig_tok) = process_mode(
1518            &content,
1519            "signatures",
1520            "F1",
1521            "server.rs",
1522            "rs",
1523            full_tokens,
1524            CrpMode::Off,
1525            "server.rs",
1526            task,
1527        );
1528        let (_ref_output, ref_tok) = process_mode(
1529            &content,
1530            "reference",
1531            "F1",
1532            "server.rs",
1533            "rs",
1534            full_tokens,
1535            CrpMode::Off,
1536            "server.rs",
1537            task,
1538        );
1539
1540        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
1541        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
1542        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
1543        eprintln!(
1544            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
1545            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
1546        );
1547        eprintln!(
1548            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
1549            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
1550        );
1551        eprintln!(
1552            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
1553            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
1554        );
1555        eprintln!("================================================\n");
1556
1557        assert!(task_tok < full_tok, "task mode should save tokens");
1558        assert!(sig_tok < full_tok, "signatures should save tokens");
1559        assert!(ref_tok < sig_tok, "reference should be most compact");
1560    }
1561
1562    fn generate_benchmark_code(lines: usize) -> String {
1563        let mut code = Vec::with_capacity(lines);
1564        code.push("use std::collections::HashMap;".to_string());
1565        code.push("use crate::core::auth;".to_string());
1566        code.push(String::new());
1567        code.push("pub struct Server {".to_string());
1568        code.push("    config: Config,".to_string());
1569        code.push("    cache: HashMap<String, String>,".to_string());
1570        code.push("}".to_string());
1571        code.push(String::new());
1572        code.push("impl Server {".to_string());
1573        code.push(
1574            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
1575                .to_string(),
1576        );
1577        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
1578        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
1579        code.push("            return Err(AuthError::Expired);".to_string());
1580        code.push("        }".to_string());
1581        code.push("        Ok(decoded.claims)".to_string());
1582        code.push("    }".to_string());
1583        code.push(String::new());
1584
1585        let remaining = lines.saturating_sub(code.len());
1586        for i in 0..remaining {
1587            if i % 30 == 0 {
1588                code.push(format!(
1589                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
1590                ));
1591            } else if i % 30 == 29 {
1592                code.push("    }".to_string());
1593            } else {
1594                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
1595            }
1596        }
1597        code.push("}".to_string());
1598        code.join("\n")
1599    }
1600
1601    #[test]
1602    fn map_mode_inlines_task_relevant_body() {
1603        let content = "pub fn alpha() {\n    let a = 1;\n}\n\npub fn validate_token(t: &str) -> bool {\n    let ok = check(t);\n    ok\n}\n";
1604        let tokens = count_tokens(content);
1605        let (with_task, _) = process_mode(
1606            content,
1607            "map",
1608            "F1",
1609            "test.rs",
1610            "rs",
1611            tokens,
1612            CrpMode::Off,
1613            "test.rs",
1614            Some("fix bug in validate_token"),
1615        );
1616        assert!(
1617            with_task.contains("▸ body") && with_task.contains("validate_token"),
1618            "map with task should inline the matching body: {with_task}"
1619        );
1620        let (no_task, _) = process_mode(
1621            content,
1622            "map",
1623            "F1",
1624            "test.rs",
1625            "rs",
1626            tokens,
1627            CrpMode::Off,
1628            "test.rs",
1629            None,
1630        );
1631        assert!(
1632            !no_task.contains("▸ body"),
1633            "map without a task must not inline a body: {no_task}"
1634        );
1635    }
1636
1637    #[test]
1638    fn compressed_cache_key_distinguishes_task() {
1639        let no_task = compressed_cache_key("map", CrpMode::Off, None);
1640        let with_task = compressed_cache_key("map", CrpMode::Off, Some("fix login"));
1641        let other_task = compressed_cache_key("map", CrpMode::Off, Some("refactor db"));
1642        assert_eq!(no_task, "map");
1643        assert_ne!(with_task, no_task);
1644        assert_ne!(with_task, other_task);
1645    }
1646
1647    #[test]
1648    fn instruction_file_detection() {
1649        assert!(is_instruction_file(
1650            "/home/user/.pi/agent/skills/committing-changes/SKILL.md"
1651        ));
1652        assert!(is_instruction_file("/workspace/.cursor/rules/lean-ctx.mdc"));
1653        assert!(is_instruction_file("/project/AGENTS.md"));
1654        assert!(is_instruction_file("/project/.cursorrules"));
1655        assert!(is_instruction_file("/home/user/.claude/rules/my-rule.md"));
1656        assert!(is_instruction_file("/skills/some-skill/README.md"));
1657
1658        assert!(!is_instruction_file("/project/src/main.rs"));
1659        assert!(!is_instruction_file("/project/config.json"));
1660        assert!(!is_instruction_file("/project/data/report.csv"));
1661    }
1662
1663    #[test]
1664    fn resolve_auto_mode_returns_full_for_instruction_files() {
1665        let mode = resolve_auto_mode(
1666            "/home/user/.pi/agent/skills/committing-changes/SKILL.md",
1667            5000,
1668            Some("read"),
1669        );
1670        assert_eq!(mode, "full", "SKILL.md must always be read in full");
1671
1672        let mode = resolve_auto_mode("/workspace/AGENTS.md", 3000, Some("read"));
1673        assert_eq!(mode, "full", "AGENTS.md must always be read in full");
1674
1675        let mode = resolve_auto_mode("/workspace/.cursorrules", 2000, None);
1676        assert_eq!(mode, "full", ".cursorrules must always be read in full");
1677    }
1678}