Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
14
15fn append_compressed_hint(output: &str, file_path: &str) -> String {
16    format!("{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\")")
17}
18
19/// Reads a file as UTF-8 with lossy fallback, enforcing the max read size limit.
20pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
21    let cap = crate::core::limits::max_read_bytes();
22    if let Ok(meta) = std::fs::metadata(path) {
23        if meta.len() > cap as u64 {
24            return Err(std::io::Error::other(format!(
25                "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
26                meta.len(),
27                cap
28            )));
29        }
30    }
31    let bytes = std::fs::read(path)?;
32    match String::from_utf8(bytes) {
33        Ok(s) => Ok(s),
34        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
35    }
36}
37
38/// Reads a file through the cache and applies the requested compression mode.
39pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
40    handle_with_options(cache, path, mode, false, crp_mode, None)
41}
42
43/// Like `handle`, but invalidates the cache first to force a fresh disk read.
44pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
45    handle_with_options(cache, path, mode, true, crp_mode, None)
46}
47
48/// Reads a file with task-aware filtering to prioritize task-relevant content.
49pub fn handle_with_task(
50    cache: &mut SessionCache,
51    path: &str,
52    mode: &str,
53    crp_mode: CrpMode,
54    task: Option<&str>,
55) -> String {
56    handle_with_options(cache, path, mode, false, crp_mode, task)
57}
58
59/// Like `handle_with_task`, also returns the resolved mode name.
60pub fn handle_with_task_resolved(
61    cache: &mut SessionCache,
62    path: &str,
63    mode: &str,
64    crp_mode: CrpMode,
65    task: Option<&str>,
66) -> (String, String) {
67    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
68}
69
70/// Fresh read with task-aware filtering (invalidates cache first).
71pub fn handle_fresh_with_task(
72    cache: &mut SessionCache,
73    path: &str,
74    mode: &str,
75    crp_mode: CrpMode,
76    task: Option<&str>,
77) -> String {
78    handle_with_options(cache, path, mode, true, crp_mode, task)
79}
80
81/// Fresh read with task-aware filtering, also returns the resolved mode name.
82pub fn handle_fresh_with_task_resolved(
83    cache: &mut SessionCache,
84    path: &str,
85    mode: &str,
86    crp_mode: CrpMode,
87    task: Option<&str>,
88) -> (String, String) {
89    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
90}
91
92fn handle_with_options(
93    cache: &mut SessionCache,
94    path: &str,
95    mode: &str,
96    fresh: bool,
97    crp_mode: CrpMode,
98    task: Option<&str>,
99) -> String {
100    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).0
101}
102
103fn handle_with_options_resolved(
104    cache: &mut SessionCache,
105    path: &str,
106    mode: &str,
107    fresh: bool,
108    crp_mode: CrpMode,
109    task: Option<&str>,
110) -> (String, String) {
111    let file_ref = cache.get_file_ref(path);
112    let short = protocol::shorten_path(path);
113    let ext = Path::new(path)
114        .extension()
115        .and_then(|e| e.to_str())
116        .unwrap_or("");
117
118    if fresh {
119        cache.invalidate(path);
120    }
121
122    if mode == "diff" {
123        return (handle_diff(cache, path, &file_ref), "diff".to_string());
124    }
125
126    if mode != "full" {
127        if let Some(existing) = cache.get(path) {
128            let stale = crate::core::cache::is_cache_entry_stale(path, existing.stored_mtime);
129            if stale {
130                cache.invalidate(path);
131            }
132        }
133    }
134
135    if let Some(existing) = cache.get(path) {
136        if mode == "full" {
137            return (
138                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext),
139                "full".to_string(),
140            );
141        }
142        let content = existing.content.clone();
143        let original_tokens = existing.original_tokens;
144        let resolved_mode = if mode == "auto" {
145            resolve_auto_mode(path, original_tokens, task)
146        } else {
147            mode.to_string()
148        };
149        let out = process_mode(
150            &content,
151            &resolved_mode,
152            &file_ref,
153            &short,
154            ext,
155            original_tokens,
156            crp_mode,
157            path,
158            task,
159        );
160        return (out, resolved_mode);
161    }
162
163    let content = match read_file_lossy(path) {
164        Ok(c) => c,
165        Err(e) => return (format!("ERROR: {e}"), "error".to_string()),
166    };
167
168    let similar_hint = find_semantic_similar(path, &content);
169
170    let store_result = cache.store(path, content.clone());
171
172    update_semantic_index(path, &content);
173
174    if mode == "full" {
175        let mut output = format_full_output(
176            &file_ref,
177            &short,
178            ext,
179            &content,
180            store_result.original_tokens,
181            store_result.line_count,
182        );
183        if let Some(hint) = similar_hint {
184            output.push_str(&format!("\n{hint}"));
185        }
186        return (output, "full".to_string());
187    }
188
189    let resolved_mode = if mode == "auto" {
190        resolve_auto_mode(path, store_result.original_tokens, task)
191    } else {
192        mode.to_string()
193    };
194
195    let mut output = process_mode(
196        &content,
197        &resolved_mode,
198        &file_ref,
199        &short,
200        ext,
201        store_result.original_tokens,
202        crp_mode,
203        path,
204        task,
205    );
206    if let Some(hint) = similar_hint {
207        output.push_str(&format!("\n{hint}"));
208    }
209    (output, resolved_mode)
210}
211
212fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
213    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
214    let predictor = crate::core::mode_predictor::ModePredictor::new();
215    let mut predicted = predictor
216        .predict_best_mode(&sig)
217        .unwrap_or_else(|| "full".to_string());
218    if predicted == "auto" {
219        predicted = "full".to_string();
220    }
221
222    if let Some(project_root) =
223        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
224    {
225        let ext = std::path::Path::new(file_path)
226            .extension()
227            .and_then(|e| e.to_str())
228            .unwrap_or("");
229        let bucket = match original_tokens {
230            0..=2000 => "sm",
231            2001..=10000 => "md",
232            10001..=50000 => "lg",
233            _ => "xl",
234        };
235        let bandit_key = format!("{ext}_{bucket}");
236        let mut store = crate::core::bandit::BanditStore::load(&project_root);
237        let bandit = store.get_or_create(&bandit_key);
238        let arm = bandit.select_arm();
239        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
240            predicted = "aggressive".to_string();
241        }
242    }
243
244    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
245    policy.choose_auto_mode(task, &predicted)
246}
247
248fn find_semantic_similar(path: &str, content: &str) -> Option<String> {
249    let project_root = detect_project_root(path);
250    let index = crate::core::semantic_cache::SemanticCacheIndex::load(&project_root)?;
251
252    let similar = index.find_similar(content, 0.7);
253    let relevant: Vec<_> = similar
254        .into_iter()
255        .filter(|(p, _)| p != path)
256        .take(3)
257        .collect();
258
259    if relevant.is_empty() {
260        return None;
261    }
262
263    let hints: Vec<String> = relevant
264        .iter()
265        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
266        .collect();
267
268    Some(format!(
269        "[semantic: {} similar file(s) in cache]\n{}",
270        relevant.len(),
271        hints.join("\n")
272    ))
273}
274
275fn update_semantic_index(path: &str, content: &str) {
276    let project_root = detect_project_root(path);
277    let session_id = format!("{}", std::process::id());
278    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
279    index.add_file(path, content, &session_id);
280    let _ = index.save(&project_root);
281}
282
283fn detect_project_root(path: &str) -> String {
284    crate::core::protocol::detect_project_root_or_cwd(path)
285}
286
287const AUTO_DELTA_THRESHOLD: f64 = 0.6;
288
289/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
290fn handle_full_with_auto_delta(
291    cache: &mut SessionCache,
292    path: &str,
293    file_ref: &str,
294    short: &str,
295    ext: &str,
296) -> String {
297    let Ok(disk_content) = read_file_lossy(path) else {
298        cache.record_cache_hit(path);
299        return if let Some(existing) = cache.get(path) {
300            format!(
301                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
302                existing.read_count, existing.line_count
303            )
304        } else {
305            format!("[file read failed and no cached version available] {file_ref}={short}")
306        };
307    };
308
309    let old_content = cache
310        .get(path)
311        .map(|e| e.content.clone())
312        .unwrap_or_default();
313    let store_result = cache.store(path, disk_content.clone());
314
315    if store_result.was_hit {
316        return format!(
317            "{file_ref}={short} cached {}t {}L\nFile already in context from previous read. Use fresh=true to re-read if content needed again.",
318            store_result.read_count, store_result.line_count
319        );
320    }
321
322    let diff = compressor::diff_content(&old_content, &disk_content);
323    let diff_tokens = count_tokens(&diff);
324    let full_tokens = store_result.original_tokens;
325
326    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
327        let savings = protocol::format_savings(full_tokens, diff_tokens);
328        return format!(
329            "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
330            disk_content.lines().count()
331        );
332    }
333
334    format_full_output(
335        file_ref,
336        short,
337        ext,
338        &disk_content,
339        store_result.original_tokens,
340        store_result.line_count,
341    )
342}
343
344fn format_full_output(
345    file_ref: &str,
346    short: &str,
347    ext: &str,
348    content: &str,
349    original_tokens: usize,
350    line_count: usize,
351) -> String {
352    let tokens = original_tokens;
353    let metadata = build_header(file_ref, short, ext, content, line_count, true);
354
355    let mut sym = SymbolMap::new();
356    let idents = symbol_map::extract_identifiers(content, ext);
357    for ident in &idents {
358        sym.register(ident);
359    }
360
361    let sym_beneficial = if sym.len() >= 3 {
362        let sym_table = sym.format_table();
363        let compressed = sym.apply(content);
364        let original_tok = count_tokens(content);
365        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
366        let net_saving = original_tok.saturating_sub(compressed_tok);
367        original_tok > 0 && net_saving * 100 / original_tok >= 5
368    } else {
369        false
370    };
371
372    if sym_beneficial {
373        let compressed_content = sym.apply(content);
374        let sym_table = sym.format_table();
375        let output = format!("{compressed_content}{sym_table}\n{metadata}");
376        let sent = count_tokens(&output);
377        let savings = protocol::format_savings(tokens, sent);
378        return format!("{output}\n{savings}");
379    }
380
381    let output = format!("{content}\n{metadata}");
382    let sent = count_tokens(&output);
383    let savings = protocol::format_savings(tokens, sent);
384    format!("{output}\n{savings}")
385}
386
387fn build_header(
388    file_ref: &str,
389    short: &str,
390    ext: &str,
391    content: &str,
392    line_count: usize,
393    include_deps: bool,
394) -> String {
395    let mut header = format!("{file_ref}={short} {line_count}L");
396
397    if include_deps {
398        let dep_info = deps::extract_deps(content, ext);
399        if !dep_info.imports.is_empty() {
400            let imports_str: Vec<&str> = dep_info
401                .imports
402                .iter()
403                .take(8)
404                .map(std::string::String::as_str)
405                .collect();
406            header.push_str(&format!("\n deps {}", imports_str.join(",")));
407        }
408        if !dep_info.exports.is_empty() {
409            let exports_str: Vec<&str> = dep_info
410                .exports
411                .iter()
412                .take(8)
413                .map(std::string::String::as_str)
414                .collect();
415            header.push_str(&format!("\n exports {}", exports_str.join(",")));
416        }
417    }
418
419    header
420}
421
422#[allow(clippy::too_many_arguments)]
423fn process_mode(
424    content: &str,
425    mode: &str,
426    file_ref: &str,
427    short: &str,
428    ext: &str,
429    original_tokens: usize,
430    crp_mode: CrpMode,
431    file_path: &str,
432    task: Option<&str>,
433) -> String {
434    let line_count = content.lines().count();
435
436    match mode {
437        "auto" => {
438            let chosen = resolve_auto_mode(file_path, original_tokens, task);
439            process_mode(
440                content,
441                &chosen,
442                file_ref,
443                short,
444                ext,
445                original_tokens,
446                crp_mode,
447                file_path,
448                task,
449            )
450        }
451        "signatures" => {
452            let sigs = signatures::extract_signatures(content, ext);
453            let dep_info = deps::extract_deps(content, ext);
454
455            let mut output = format!("{file_ref}={short} {line_count}L");
456            if !dep_info.imports.is_empty() {
457                let imports_str: Vec<&str> = dep_info
458                    .imports
459                    .iter()
460                    .take(8)
461                    .map(std::string::String::as_str)
462                    .collect();
463                output.push_str(&format!("\n deps {}", imports_str.join(",")));
464            }
465            for sig in &sigs {
466                output.push('\n');
467                if crp_mode.is_tdd() {
468                    output.push_str(&sig.to_tdd());
469                } else {
470                    output.push_str(&sig.to_compact());
471                }
472            }
473            let sent = count_tokens(&output);
474            let savings = protocol::format_savings(original_tokens, sent);
475            append_compressed_hint(&format!("{output}\n{savings}"), file_path)
476        }
477        "map" => {
478            if ext == "php" {
479                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
480                {
481                    let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
482                    let sent = count_tokens(&output);
483                    let savings = protocol::format_savings(original_tokens, sent);
484                    output.push('\n');
485                    output.push_str(&savings);
486                    return append_compressed_hint(&output, file_path);
487                }
488            }
489
490            let sigs = signatures::extract_signatures(content, ext);
491            let dep_info = deps::extract_deps(content, ext);
492
493            let mut output = format!("{file_ref}={short} {line_count}L");
494
495            if !dep_info.imports.is_empty() {
496                output.push_str("\n  deps: ");
497                output.push_str(&dep_info.imports.join(", "));
498            }
499
500            if !dep_info.exports.is_empty() {
501                output.push_str("\n  exports: ");
502                output.push_str(&dep_info.exports.join(", "));
503            }
504
505            let key_sigs: Vec<&signatures::Signature> = sigs
506                .iter()
507                .filter(|s| s.is_exported || s.indent == 0)
508                .collect();
509
510            if !key_sigs.is_empty() {
511                output.push_str("\n  API:");
512                for sig in &key_sigs {
513                    output.push_str("\n    ");
514                    if crp_mode.is_tdd() {
515                        output.push_str(&sig.to_tdd());
516                    } else {
517                        output.push_str(&sig.to_compact());
518                    }
519                }
520            }
521
522            let sent = count_tokens(&output);
523            let savings = protocol::format_savings(original_tokens, sent);
524            append_compressed_hint(&format!("{output}\n{savings}"), file_path)
525        }
526        "aggressive" => {
527            #[cfg(feature = "tree-sitter")]
528            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
529            #[cfg(not(feature = "tree-sitter"))]
530            let ast_pruned: Option<String> = None;
531
532            let base = ast_pruned.as_deref().unwrap_or(content);
533
534            let session_intent = crate::core::session::SessionState::load_latest()
535                .and_then(|s| s.active_structured_intent);
536            let raw = if let Some(ref intent) = session_intent {
537                compressor::task_aware_compress(base, Some(ext), intent)
538            } else {
539                compressor::aggressive_compress(base, Some(ext))
540            };
541            let compressed = compressor::safeguard_ratio(content, &raw);
542            let header = build_header(file_ref, short, ext, content, line_count, true);
543
544            let mut sym = SymbolMap::new();
545            let idents = symbol_map::extract_identifiers(&compressed, ext);
546            for ident in &idents {
547                sym.register(ident);
548            }
549
550            let sym_beneficial = if sym.len() >= 3 {
551                let sym_table = sym.format_table();
552                let sym_applied = sym.apply(&compressed);
553                let orig_tok = count_tokens(&compressed);
554                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
555                let net = orig_tok.saturating_sub(comp_tok);
556                orig_tok > 0 && net * 100 / orig_tok >= 5
557            } else {
558                false
559            };
560
561            if sym_beneficial {
562                let sym_output = sym.apply(&compressed);
563                let sym_table = sym.format_table();
564                let sent = count_tokens(&sym_output) + count_tokens(&sym_table);
565                let savings = protocol::format_savings(original_tokens, sent);
566                return append_compressed_hint(
567                    &format!("{header}\n{sym_output}{sym_table}\n{savings}"),
568                    file_path,
569                );
570            }
571
572            let sent = count_tokens(&compressed);
573            let savings = protocol::format_savings(original_tokens, sent);
574            append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path)
575        }
576        "entropy" => {
577            let result = entropy::entropy_compress_adaptive(content, file_path);
578            let avg_h = entropy::analyze_entropy(content).avg_entropy;
579            let header = build_header(file_ref, short, ext, content, line_count, false);
580            let techs = result.techniques.join(", ");
581            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
582            let sent = count_tokens(&output);
583            let savings = protocol::format_savings(original_tokens, sent);
584            let compression_ratio = if original_tokens > 0 {
585                1.0 - (sent as f64 / original_tokens as f64)
586            } else {
587                0.0
588            };
589            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
590            append_compressed_hint(&format!("{output}\n{savings}"), file_path)
591        }
592        "task" => {
593            let task_str = task.unwrap_or("");
594            if task_str.is_empty() {
595                let header = build_header(file_ref, short, ext, content, line_count, true);
596                return format!("{header}\n{content}\n[task mode: no task set — returned full]");
597            }
598            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
599            if keywords.is_empty() {
600                let header = build_header(file_ref, short, ext, content, line_count, true);
601                return format!(
602                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
603                );
604            }
605            let filtered =
606                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
607            let filtered_lines = filtered.lines().count();
608            let header = format!(
609                "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
610            );
611            let project_root = detect_project_root(file_path);
612            let graph_ctx = crate::core::graph_context::build_graph_context(
613                file_path,
614                &project_root,
615                Some(crate::core::graph_context::GraphContextOptions::default()),
616            )
617            .map(|c| crate::core::graph_context::format_graph_context(&c))
618            .unwrap_or_default();
619
620            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
621            let savings = protocol::format_savings(original_tokens, sent);
622            append_compressed_hint(
623                &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
624                file_path,
625            )
626        }
627        "reference" => {
628            let tok = count_tokens(content);
629            let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
630            let sent = count_tokens(&output);
631            let savings = protocol::format_savings(original_tokens, sent);
632            format!("{output}\n{savings}")
633        }
634        mode if mode.starts_with("lines:") => {
635            let range_str = &mode[6..];
636            let extracted = extract_line_range(content, range_str);
637            let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
638            let sent = count_tokens(&extracted);
639            let savings = protocol::format_savings(original_tokens, sent);
640            format!("{header}\n{extracted}\n{savings}")
641        }
642        unknown => {
643            let header = build_header(file_ref, short, ext, content, line_count, true);
644            format!(
645                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
646            )
647        }
648    }
649}
650
651fn extract_line_range(content: &str, range_str: &str) -> String {
652    let lines: Vec<&str> = content.lines().collect();
653    let total = lines.len();
654    let mut selected = Vec::new();
655
656    for part in range_str.split(',') {
657        let part = part.trim();
658        if let Some((start_s, end_s)) = part.split_once('-') {
659            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
660            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
661            for i in start..=end {
662                if i >= 1 && i <= total {
663                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
664                }
665            }
666        } else if let Ok(n) = part.parse::<usize>() {
667            if n >= 1 && n <= total {
668                selected.push(format!("{n:>4}| {}", lines[n - 1]));
669            }
670        }
671    }
672
673    if selected.is_empty() {
674        "No lines matched the range.".to_string()
675    } else {
676        selected.join("\n")
677    }
678}
679
680fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> String {
681    let short = protocol::shorten_path(path);
682    let old_content = cache.get(path).map(|e| e.content.clone());
683
684    let new_content = match read_file_lossy(path) {
685        Ok(c) => c,
686        Err(e) => return format!("ERROR: {e}"),
687    };
688
689    let original_tokens = count_tokens(&new_content);
690
691    let diff_output = if let Some(old) = &old_content {
692        compressor::diff_content(old, &new_content)
693    } else {
694        format!("[first read]\n{new_content}")
695    };
696
697    cache.store(path, new_content);
698
699    let sent = count_tokens(&diff_output);
700    let savings = protocol::format_savings(original_tokens, sent);
701    format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}")
702}
703
704#[cfg(test)]
705mod tests {
706    use super::*;
707    use std::time::Duration;
708
709    #[test]
710    fn test_header_toon_format_no_brackets() {
711        let content = "use std::io;\nfn main() {}\n";
712        let header = build_header("F1", "main.rs", "rs", content, 2, false);
713        assert!(!header.contains('['));
714        assert!(!header.contains(']'));
715        assert!(header.contains("F1=main.rs 2L"));
716    }
717
718    #[test]
719    fn test_header_toon_deps_indented() {
720        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
721        let header = build_header("F1", "main.rs", "rs", content, 3, true);
722        if header.contains("deps") {
723            assert!(
724                header.contains("\n deps "),
725                "deps should use indented TOON format"
726            );
727            assert!(
728                !header.contains("deps:["),
729                "deps should not use bracket format"
730            );
731        }
732    }
733
734    #[test]
735    fn test_header_toon_saves_tokens() {
736        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
737        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
738        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
739        let old_tokens = count_tokens(&old_header);
740        let new_tokens = count_tokens(&new_header);
741        assert!(
742            new_tokens <= old_tokens,
743            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
744        );
745    }
746
747    #[test]
748    fn test_tdd_symbols_are_compact() {
749        let symbols = [
750            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
751        ];
752        for sym in &symbols {
753            let tok = count_tokens(sym);
754            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
755        }
756    }
757
758    #[test]
759    fn test_task_mode_filters_content() {
760        let content = (0..200)
761            .map(|i| {
762                if i % 20 == 0 {
763                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
764                } else {
765                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
766                }
767            })
768            .collect::<Vec<_>>()
769            .join("\n");
770        let full_tokens = count_tokens(&content);
771        let task = Some("fix bug in validate_token");
772        let result = process_mode(
773            &content,
774            "task",
775            "F1",
776            "test.rs",
777            "rs",
778            full_tokens,
779            CrpMode::Off,
780            "test.rs",
781            task,
782        );
783        let result_tokens = count_tokens(&result);
784        assert!(
785            result_tokens < full_tokens,
786            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
787        );
788        assert!(
789            result.contains("task-filtered"),
790            "output should contain task-filtered marker"
791        );
792    }
793
794    #[test]
795    fn test_task_mode_without_task_returns_full() {
796        let content = "fn main() {}\nfn helper() {}\n";
797        let tokens = count_tokens(content);
798        let result = process_mode(
799            content,
800            "task",
801            "F1",
802            "test.rs",
803            "rs",
804            tokens,
805            CrpMode::Off,
806            "test.rs",
807            None,
808        );
809        assert!(
810            result.contains("no task set"),
811            "should indicate no task: {result}"
812        );
813    }
814
815    #[test]
816    fn test_reference_mode_one_line() {
817        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
818        let tokens = count_tokens(content);
819        let result = process_mode(
820            content,
821            "reference",
822            "F1",
823            "test.rs",
824            "rs",
825            tokens,
826            CrpMode::Off,
827            "test.rs",
828            None,
829        );
830        let lines: Vec<&str> = result.lines().collect();
831        assert!(
832            lines.len() <= 3,
833            "reference mode should be very compact, got {} lines",
834            lines.len()
835        );
836        assert!(result.contains("lines"), "should contain line count");
837        assert!(result.contains("tok"), "should contain token count");
838    }
839
840    #[test]
841    fn cached_lines_mode_invalidates_on_mtime_change() {
842        let dir = tempfile::tempdir().unwrap();
843        let path = dir.path().join("file.txt");
844        let p = path.to_string_lossy().to_string();
845
846        std::fs::write(&path, "one\nsecond\n").unwrap();
847        let mut cache = SessionCache::new();
848
849        let (out1, _mode1) =
850            handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
851        let l1: Vec<&str> = out1.lines().collect();
852        let got1 = l1.get(1).copied().unwrap_or_default().trim();
853        let got1 = got1.split_once('|').map_or(got1, |(_, s)| s.trim());
854        assert_eq!(got1, "one");
855
856        std::thread::sleep(Duration::from_secs(1));
857        std::fs::write(&path, "two\nsecond\n").unwrap();
858
859        let (out2, _mode2) =
860            handle_with_task_resolved(&mut cache, &p, "lines:1-1", CrpMode::Off, None);
861        let l2: Vec<&str> = out2.lines().collect();
862        let got2 = l2.get(1).copied().unwrap_or_default().trim();
863        let got2 = got2.split_once('|').map_or(got2, |(_, s)| s.trim());
864        assert_eq!(got2, "two");
865    }
866
867    #[test]
868    #[cfg_attr(tarpaulin, ignore)]
869    fn benchmark_task_conditioned_compression() {
870        // Keep this reasonably small so CI coverage instrumentation stays fast.
871        let content = generate_benchmark_code(200);
872        let full_tokens = count_tokens(&content);
873        let task = Some("fix authentication in validate_token");
874
875        let full_output = process_mode(
876            &content,
877            "full",
878            "F1",
879            "server.rs",
880            "rs",
881            full_tokens,
882            CrpMode::Off,
883            "server.rs",
884            task,
885        );
886        let task_output = process_mode(
887            &content,
888            "task",
889            "F1",
890            "server.rs",
891            "rs",
892            full_tokens,
893            CrpMode::Off,
894            "server.rs",
895            task,
896        );
897        let sig_output = process_mode(
898            &content,
899            "signatures",
900            "F1",
901            "server.rs",
902            "rs",
903            full_tokens,
904            CrpMode::Off,
905            "server.rs",
906            task,
907        );
908        let ref_output = process_mode(
909            &content,
910            "reference",
911            "F1",
912            "server.rs",
913            "rs",
914            full_tokens,
915            CrpMode::Off,
916            "server.rs",
917            task,
918        );
919
920        let full_tok = count_tokens(&full_output);
921        let task_tok = count_tokens(&task_output);
922        let sig_tok = count_tokens(&sig_output);
923        let ref_tok = count_tokens(&ref_output);
924
925        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
926        eprintln!("Source: 200-line Rust file, task='fix authentication in validate_token'");
927        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
928        eprintln!(
929            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
930            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
931        );
932        eprintln!(
933            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
934            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
935        );
936        eprintln!(
937            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
938            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
939        );
940        eprintln!("================================================\n");
941
942        assert!(task_tok < full_tok, "task mode should save tokens");
943        assert!(sig_tok < full_tok, "signatures should save tokens");
944        assert!(ref_tok < sig_tok, "reference should be most compact");
945    }
946
947    fn generate_benchmark_code(lines: usize) -> String {
948        let mut code = Vec::with_capacity(lines);
949        code.push("use std::collections::HashMap;".to_string());
950        code.push("use crate::core::auth;".to_string());
951        code.push(String::new());
952        code.push("pub struct Server {".to_string());
953        code.push("    config: Config,".to_string());
954        code.push("    cache: HashMap<String, String>,".to_string());
955        code.push("}".to_string());
956        code.push(String::new());
957        code.push("impl Server {".to_string());
958        code.push(
959            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
960                .to_string(),
961        );
962        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
963        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
964        code.push("            return Err(AuthError::Expired);".to_string());
965        code.push("        }".to_string());
966        code.push("        Ok(decoded.claims)".to_string());
967        code.push("    }".to_string());
968        code.push(String::new());
969
970        let remaining = lines.saturating_sub(code.len());
971        for i in 0..remaining {
972            if i % 30 == 0 {
973                code.push(format!(
974                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
975                ));
976            } else if i % 30 == 29 {
977                code.push("    }".to_string());
978            } else {
979                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
980            }
981        }
982        code.push("}".to_string());
983        code.join("\n")
984    }
985}