Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
14
15fn append_compressed_hint(output: &str, file_path: &str) -> String {
16    format!("{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\")")
17}
18
19pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
20    let cap = crate::core::limits::max_read_bytes();
21    if let Ok(meta) = std::fs::metadata(path) {
22        if meta.len() > cap as u64 {
23            return Err(std::io::Error::other(format!(
24                "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
25                meta.len(),
26                cap
27            )));
28        }
29    }
30    let bytes = std::fs::read(path)?;
31    match String::from_utf8(bytes) {
32        Ok(s) => Ok(s),
33        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
34    }
35}
36
37pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
38    handle_with_options(cache, path, mode, false, crp_mode, None)
39}
40
41pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
42    handle_with_options(cache, path, mode, true, crp_mode, None)
43}
44
45pub fn handle_with_task(
46    cache: &mut SessionCache,
47    path: &str,
48    mode: &str,
49    crp_mode: CrpMode,
50    task: Option<&str>,
51) -> String {
52    handle_with_options(cache, path, mode, false, crp_mode, task)
53}
54
55pub fn handle_with_task_resolved(
56    cache: &mut SessionCache,
57    path: &str,
58    mode: &str,
59    crp_mode: CrpMode,
60    task: Option<&str>,
61) -> (String, String) {
62    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
63}
64
65pub fn handle_fresh_with_task(
66    cache: &mut SessionCache,
67    path: &str,
68    mode: &str,
69    crp_mode: CrpMode,
70    task: Option<&str>,
71) -> String {
72    handle_with_options(cache, path, mode, true, crp_mode, task)
73}
74
75pub fn handle_fresh_with_task_resolved(
76    cache: &mut SessionCache,
77    path: &str,
78    mode: &str,
79    crp_mode: CrpMode,
80    task: Option<&str>,
81) -> (String, String) {
82    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
83}
84
85fn handle_with_options(
86    cache: &mut SessionCache,
87    path: &str,
88    mode: &str,
89    fresh: bool,
90    crp_mode: CrpMode,
91    task: Option<&str>,
92) -> String {
93    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).0
94}
95
96fn handle_with_options_resolved(
97    cache: &mut SessionCache,
98    path: &str,
99    mode: &str,
100    fresh: bool,
101    crp_mode: CrpMode,
102    task: Option<&str>,
103) -> (String, String) {
104    let file_ref = cache.get_file_ref(path);
105    let short = protocol::shorten_path(path);
106    let ext = Path::new(path)
107        .extension()
108        .and_then(|e| e.to_str())
109        .unwrap_or("");
110
111    if fresh {
112        cache.invalidate(path);
113    }
114
115    if mode == "diff" {
116        return (handle_diff(cache, path, &file_ref), "diff".to_string());
117    }
118
119    if let Some(existing) = cache.get(path) {
120        if mode == "full" {
121            return (
122                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext),
123                "full".to_string(),
124            );
125        }
126        let content = existing.content.clone();
127        let original_tokens = existing.original_tokens;
128        let resolved_mode = if mode == "auto" {
129            resolve_auto_mode(path, original_tokens, task)
130        } else {
131            mode.to_string()
132        };
133        let out = process_mode(
134            &content,
135            &resolved_mode,
136            &file_ref,
137            &short,
138            ext,
139            original_tokens,
140            crp_mode,
141            path,
142            task,
143        );
144        return (out, resolved_mode);
145    }
146
147    let content = match read_file_lossy(path) {
148        Ok(c) => c,
149        Err(e) => return (format!("ERROR: {e}"), "error".to_string()),
150    };
151
152    let similar_hint = find_semantic_similar(path, &content);
153
154    let store_result = cache.store(path, content.clone());
155
156    update_semantic_index(path, &content);
157
158    if mode == "full" {
159        let mut output = format_full_output(
160            &file_ref,
161            &short,
162            ext,
163            &content,
164            store_result.original_tokens,
165            store_result.line_count,
166        );
167        if let Some(hint) = similar_hint {
168            output.push_str(&format!("\n{hint}"));
169        }
170        return (output, "full".to_string());
171    }
172
173    let resolved_mode = if mode == "auto" {
174        resolve_auto_mode(path, store_result.original_tokens, task)
175    } else {
176        mode.to_string()
177    };
178
179    let mut output = process_mode(
180        &content,
181        &resolved_mode,
182        &file_ref,
183        &short,
184        ext,
185        store_result.original_tokens,
186        crp_mode,
187        path,
188        task,
189    );
190    if let Some(hint) = similar_hint {
191        output.push_str(&format!("\n{hint}"));
192    }
193    (output, resolved_mode)
194}
195
196fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
197    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
198    let predictor = crate::core::mode_predictor::ModePredictor::new();
199    let mut predicted = predictor
200        .predict_best_mode(&sig)
201        .unwrap_or_else(|| "full".to_string());
202    if predicted == "auto" {
203        predicted = "full".to_string();
204    }
205
206    if let Some(project_root) =
207        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
208    {
209        let ext = std::path::Path::new(file_path)
210            .extension()
211            .and_then(|e| e.to_str())
212            .unwrap_or("");
213        let bucket = match original_tokens {
214            0..=2000 => "sm",
215            2001..=10000 => "md",
216            10001..=50000 => "lg",
217            _ => "xl",
218        };
219        let bandit_key = format!("{ext}_{bucket}");
220        let mut store = crate::core::bandit::BanditStore::load(&project_root);
221        let bandit = store.get_or_create(&bandit_key);
222        let arm = bandit.select_arm();
223        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
224            predicted = "aggressive".to_string();
225        }
226    }
227
228    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
229    policy.choose_auto_mode(task, &predicted)
230}
231
232fn find_semantic_similar(path: &str, content: &str) -> Option<String> {
233    let project_root = detect_project_root(path);
234    let index = crate::core::semantic_cache::SemanticCacheIndex::load(&project_root)?;
235
236    let similar = index.find_similar(content, 0.7);
237    let relevant: Vec<_> = similar
238        .into_iter()
239        .filter(|(p, _)| p != path)
240        .take(3)
241        .collect();
242
243    if relevant.is_empty() {
244        return None;
245    }
246
247    let hints: Vec<String> = relevant
248        .iter()
249        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
250        .collect();
251
252    Some(format!(
253        "[semantic: {} similar file(s) in cache]\n{}",
254        relevant.len(),
255        hints.join("\n")
256    ))
257}
258
259fn update_semantic_index(path: &str, content: &str) {
260    let project_root = detect_project_root(path);
261    let session_id = format!("{}", std::process::id());
262    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
263    index.add_file(path, content, &session_id);
264    let _ = index.save(&project_root);
265}
266
267fn detect_project_root(path: &str) -> String {
268    crate::core::protocol::detect_project_root_or_cwd(path)
269}
270
271const AUTO_DELTA_THRESHOLD: f64 = 0.6;
272
273/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
274fn handle_full_with_auto_delta(
275    cache: &mut SessionCache,
276    path: &str,
277    file_ref: &str,
278    short: &str,
279    ext: &str,
280) -> String {
281    let disk_content = match read_file_lossy(path) {
282        Ok(c) => c,
283        Err(_) => {
284            cache.record_cache_hit(path);
285            let existing = cache.get(path).unwrap();
286            return format!(
287                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
288                existing.read_count, existing.line_count
289            );
290        }
291    };
292
293    let old_content = cache.get(path).unwrap().content.clone();
294    let store_result = cache.store(path, disk_content.clone());
295
296    if store_result.was_hit {
297        return format!(
298            "{file_ref}={short} cached {}t {}L\nFile already in context from previous read. Use fresh=true to re-read if content needed again.",
299            store_result.read_count, store_result.line_count
300        );
301    }
302
303    let diff = compressor::diff_content(&old_content, &disk_content);
304    let diff_tokens = count_tokens(&diff);
305    let full_tokens = store_result.original_tokens;
306
307    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
308        let savings = protocol::format_savings(full_tokens, diff_tokens);
309        return format!(
310            "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
311            disk_content.lines().count()
312        );
313    }
314
315    format_full_output(
316        file_ref,
317        short,
318        ext,
319        &disk_content,
320        store_result.original_tokens,
321        store_result.line_count,
322    )
323}
324
325fn format_full_output(
326    file_ref: &str,
327    short: &str,
328    ext: &str,
329    content: &str,
330    original_tokens: usize,
331    line_count: usize,
332) -> String {
333    let tokens = original_tokens;
334    let metadata = build_header(file_ref, short, ext, content, line_count, true);
335
336    let mut sym = SymbolMap::new();
337    let idents = symbol_map::extract_identifiers(content, ext);
338    for ident in &idents {
339        sym.register(ident);
340    }
341
342    let sym_beneficial = if sym.len() >= 3 {
343        let sym_table = sym.format_table();
344        let compressed = sym.apply(content);
345        let original_tok = count_tokens(content);
346        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
347        let net_saving = original_tok.saturating_sub(compressed_tok);
348        original_tok > 0 && net_saving * 100 / original_tok >= 5
349    } else {
350        false
351    };
352
353    if sym_beneficial {
354        let compressed_content = sym.apply(content);
355        let sym_table = sym.format_table();
356        let output = format!("{compressed_content}{sym_table}\n{metadata}");
357        let sent = count_tokens(&output);
358        let savings = protocol::format_savings(tokens, sent);
359        return format!("{output}\n{savings}");
360    }
361
362    let output = format!("{content}\n{metadata}");
363    let sent = count_tokens(&output);
364    let savings = protocol::format_savings(tokens, sent);
365    format!("{output}\n{savings}")
366}
367
368fn build_header(
369    file_ref: &str,
370    short: &str,
371    ext: &str,
372    content: &str,
373    line_count: usize,
374    include_deps: bool,
375) -> String {
376    let mut header = format!("{file_ref}={short} {line_count}L");
377
378    if include_deps {
379        let dep_info = deps::extract_deps(content, ext);
380        if !dep_info.imports.is_empty() {
381            let imports_str: Vec<&str> = dep_info
382                .imports
383                .iter()
384                .take(8)
385                .map(|s| s.as_str())
386                .collect();
387            header.push_str(&format!("\n deps {}", imports_str.join(",")));
388        }
389        if !dep_info.exports.is_empty() {
390            let exports_str: Vec<&str> = dep_info
391                .exports
392                .iter()
393                .take(8)
394                .map(|s| s.as_str())
395                .collect();
396            header.push_str(&format!("\n exports {}", exports_str.join(",")));
397        }
398    }
399
400    header
401}
402
403#[allow(clippy::too_many_arguments)]
404fn process_mode(
405    content: &str,
406    mode: &str,
407    file_ref: &str,
408    short: &str,
409    ext: &str,
410    original_tokens: usize,
411    crp_mode: CrpMode,
412    file_path: &str,
413    task: Option<&str>,
414) -> String {
415    let line_count = content.lines().count();
416
417    match mode {
418        "auto" => {
419            let chosen = resolve_auto_mode(file_path, original_tokens, task);
420            process_mode(
421                content,
422                &chosen,
423                file_ref,
424                short,
425                ext,
426                original_tokens,
427                crp_mode,
428                file_path,
429                task,
430            )
431        }
432        "signatures" => {
433            let sigs = signatures::extract_signatures(content, ext);
434            let dep_info = deps::extract_deps(content, ext);
435
436            let mut output = format!("{file_ref}={short} {line_count}L");
437            if !dep_info.imports.is_empty() {
438                let imports_str: Vec<&str> = dep_info
439                    .imports
440                    .iter()
441                    .take(8)
442                    .map(|s| s.as_str())
443                    .collect();
444                output.push_str(&format!("\n deps {}", imports_str.join(",")));
445            }
446            for sig in &sigs {
447                output.push('\n');
448                if crp_mode.is_tdd() {
449                    output.push_str(&sig.to_tdd());
450                } else {
451                    output.push_str(&sig.to_compact());
452                }
453            }
454            let sent = count_tokens(&output);
455            let savings = protocol::format_savings(original_tokens, sent);
456            append_compressed_hint(&format!("{output}\n{savings}"), file_path)
457        }
458        "map" => {
459            if ext == "php" {
460                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
461                {
462                    let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
463                    let sent = count_tokens(&output);
464                    let savings = protocol::format_savings(original_tokens, sent);
465                    output.push('\n');
466                    output.push_str(&savings);
467                    return append_compressed_hint(&output, file_path);
468                }
469            }
470
471            let sigs = signatures::extract_signatures(content, ext);
472            let dep_info = deps::extract_deps(content, ext);
473
474            let mut output = format!("{file_ref}={short} {line_count}L");
475
476            if !dep_info.imports.is_empty() {
477                output.push_str("\n  deps: ");
478                output.push_str(&dep_info.imports.join(", "));
479            }
480
481            if !dep_info.exports.is_empty() {
482                output.push_str("\n  exports: ");
483                output.push_str(&dep_info.exports.join(", "));
484            }
485
486            let key_sigs: Vec<&signatures::Signature> = sigs
487                .iter()
488                .filter(|s| s.is_exported || s.indent == 0)
489                .collect();
490
491            if !key_sigs.is_empty() {
492                output.push_str("\n  API:");
493                for sig in &key_sigs {
494                    output.push_str("\n    ");
495                    if crp_mode.is_tdd() {
496                        output.push_str(&sig.to_tdd());
497                    } else {
498                        output.push_str(&sig.to_compact());
499                    }
500                }
501            }
502
503            let sent = count_tokens(&output);
504            let savings = protocol::format_savings(original_tokens, sent);
505            append_compressed_hint(&format!("{output}\n{savings}"), file_path)
506        }
507        "aggressive" => {
508            #[cfg(feature = "tree-sitter")]
509            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
510            #[cfg(not(feature = "tree-sitter"))]
511            let ast_pruned: Option<String> = None;
512
513            let base = ast_pruned.as_deref().unwrap_or(content);
514
515            let session_intent = crate::core::session::SessionState::load_latest()
516                .and_then(|s| s.active_structured_intent);
517            let raw = if let Some(ref intent) = session_intent {
518                compressor::task_aware_compress(base, Some(ext), intent)
519            } else {
520                compressor::aggressive_compress(base, Some(ext))
521            };
522            let compressed = compressor::safeguard_ratio(content, &raw);
523            let header = build_header(file_ref, short, ext, content, line_count, true);
524
525            let mut sym = SymbolMap::new();
526            let idents = symbol_map::extract_identifiers(&compressed, ext);
527            for ident in &idents {
528                sym.register(ident);
529            }
530
531            let sym_beneficial = if sym.len() >= 3 {
532                let sym_table = sym.format_table();
533                let sym_applied = sym.apply(&compressed);
534                let orig_tok = count_tokens(&compressed);
535                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
536                let net = orig_tok.saturating_sub(comp_tok);
537                orig_tok > 0 && net * 100 / orig_tok >= 5
538            } else {
539                false
540            };
541
542            if sym_beneficial {
543                let sym_output = sym.apply(&compressed);
544                let sym_table = sym.format_table();
545                let sent = count_tokens(&sym_output) + count_tokens(&sym_table);
546                let savings = protocol::format_savings(original_tokens, sent);
547                return append_compressed_hint(
548                    &format!("{header}\n{sym_output}{sym_table}\n{savings}"),
549                    file_path,
550                );
551            }
552
553            let sent = count_tokens(&compressed);
554            let savings = protocol::format_savings(original_tokens, sent);
555            append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path)
556        }
557        "entropy" => {
558            let result = entropy::entropy_compress_adaptive(content, file_path);
559            let avg_h = entropy::analyze_entropy(content).avg_entropy;
560            let header = build_header(file_ref, short, ext, content, line_count, false);
561            let techs = result.techniques.join(", ");
562            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
563            let sent = count_tokens(&output);
564            let savings = protocol::format_savings(original_tokens, sent);
565            let compression_ratio = if original_tokens > 0 {
566                1.0 - (sent as f64 / original_tokens as f64)
567            } else {
568                0.0
569            };
570            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
571            append_compressed_hint(&format!("{output}\n{savings}"), file_path)
572        }
573        "task" => {
574            let task_str = task.unwrap_or("");
575            if task_str.is_empty() {
576                let header = build_header(file_ref, short, ext, content, line_count, true);
577                return format!("{header}\n{content}\n[task mode: no task set — returned full]");
578            }
579            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
580            if keywords.is_empty() {
581                let header = build_header(file_ref, short, ext, content, line_count, true);
582                return format!(
583                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
584                );
585            }
586            let filtered =
587                crate::core::task_relevance::information_bottleneck_filter(content, &keywords, 0.3);
588            let filtered_lines = filtered.lines().count();
589            let header = format!(
590                "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
591            );
592            let project_root = detect_project_root(file_path);
593            let graph_ctx = crate::core::graph_context::build_graph_context(
594                file_path,
595                &project_root,
596                Some(crate::core::graph_context::GraphContextOptions::default()),
597            )
598            .map(|c| crate::core::graph_context::format_graph_context(&c))
599            .unwrap_or_default();
600
601            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
602            let savings = protocol::format_savings(original_tokens, sent);
603            append_compressed_hint(
604                &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
605                file_path,
606            )
607        }
608        "reference" => {
609            let tok = count_tokens(content);
610            let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
611            let sent = count_tokens(&output);
612            let savings = protocol::format_savings(original_tokens, sent);
613            format!("{output}\n{savings}")
614        }
615        mode if mode.starts_with("lines:") => {
616            let range_str = &mode[6..];
617            let extracted = extract_line_range(content, range_str);
618            let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
619            let sent = count_tokens(&extracted);
620            let savings = protocol::format_savings(original_tokens, sent);
621            format!("{header}\n{extracted}\n{savings}")
622        }
623        unknown => {
624            let header = build_header(file_ref, short, ext, content, line_count, true);
625            format!(
626                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
627            )
628        }
629    }
630}
631
632fn extract_line_range(content: &str, range_str: &str) -> String {
633    let lines: Vec<&str> = content.lines().collect();
634    let total = lines.len();
635    let mut selected = Vec::new();
636
637    for part in range_str.split(',') {
638        let part = part.trim();
639        if let Some((start_s, end_s)) = part.split_once('-') {
640            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
641            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
642            for i in start..=end {
643                if i >= 1 && i <= total {
644                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
645                }
646            }
647        } else if let Ok(n) = part.parse::<usize>() {
648            if n >= 1 && n <= total {
649                selected.push(format!("{n:>4}| {}", lines[n - 1]));
650            }
651        }
652    }
653
654    if selected.is_empty() {
655        "No lines matched the range.".to_string()
656    } else {
657        selected.join("\n")
658    }
659}
660
661fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> String {
662    let short = protocol::shorten_path(path);
663    let old_content = cache.get(path).map(|e| e.content.clone());
664
665    let new_content = match read_file_lossy(path) {
666        Ok(c) => c,
667        Err(e) => return format!("ERROR: {e}"),
668    };
669
670    let original_tokens = count_tokens(&new_content);
671
672    let diff_output = if let Some(old) = &old_content {
673        compressor::diff_content(old, &new_content)
674    } else {
675        format!("[first read]\n{new_content}")
676    };
677
678    cache.store(path, new_content);
679
680    let sent = count_tokens(&diff_output);
681    let savings = protocol::format_savings(original_tokens, sent);
682    format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}")
683}
684
685#[cfg(test)]
686mod tests {
687    use super::*;
688
689    #[test]
690    fn test_header_toon_format_no_brackets() {
691        let content = "use std::io;\nfn main() {}\n";
692        let header = build_header("F1", "main.rs", "rs", content, 2, false);
693        assert!(!header.contains('['));
694        assert!(!header.contains(']'));
695        assert!(header.contains("F1=main.rs 2L"));
696    }
697
698    #[test]
699    fn test_header_toon_deps_indented() {
700        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
701        let header = build_header("F1", "main.rs", "rs", content, 3, true);
702        if header.contains("deps") {
703            assert!(
704                header.contains("\n deps "),
705                "deps should use indented TOON format"
706            );
707            assert!(
708                !header.contains("deps:["),
709                "deps should not use bracket format"
710            );
711        }
712    }
713
714    #[test]
715    fn test_header_toon_saves_tokens() {
716        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
717        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
718        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
719        let old_tokens = count_tokens(&old_header);
720        let new_tokens = count_tokens(&new_header);
721        assert!(
722            new_tokens <= old_tokens,
723            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
724        );
725    }
726
727    #[test]
728    fn test_tdd_symbols_are_compact() {
729        let symbols = [
730            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
731        ];
732        for sym in &symbols {
733            let tok = count_tokens(sym);
734            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
735        }
736    }
737
738    #[test]
739    fn test_task_mode_filters_content() {
740        let content = (0..200)
741            .map(|i| {
742                if i % 20 == 0 {
743                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
744                } else {
745                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
746                }
747            })
748            .collect::<Vec<_>>()
749            .join("\n");
750        let full_tokens = count_tokens(&content);
751        let task = Some("fix bug in validate_token");
752        let result = process_mode(
753            &content,
754            "task",
755            "F1",
756            "test.rs",
757            "rs",
758            full_tokens,
759            CrpMode::Off,
760            "test.rs",
761            task,
762        );
763        let result_tokens = count_tokens(&result);
764        assert!(
765            result_tokens < full_tokens,
766            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
767        );
768        assert!(
769            result.contains("task-filtered"),
770            "output should contain task-filtered marker"
771        );
772    }
773
774    #[test]
775    fn test_task_mode_without_task_returns_full() {
776        let content = "fn main() {}\nfn helper() {}\n";
777        let tokens = count_tokens(content);
778        let result = process_mode(
779            content,
780            "task",
781            "F1",
782            "test.rs",
783            "rs",
784            tokens,
785            CrpMode::Off,
786            "test.rs",
787            None,
788        );
789        assert!(
790            result.contains("no task set"),
791            "should indicate no task: {result}"
792        );
793    }
794
795    #[test]
796    fn test_reference_mode_one_line() {
797        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
798        let tokens = count_tokens(content);
799        let result = process_mode(
800            content,
801            "reference",
802            "F1",
803            "test.rs",
804            "rs",
805            tokens,
806            CrpMode::Off,
807            "test.rs",
808            None,
809        );
810        let lines: Vec<&str> = result.lines().collect();
811        assert!(
812            lines.len() <= 3,
813            "reference mode should be very compact, got {} lines",
814            lines.len()
815        );
816        assert!(result.contains("lines"), "should contain line count");
817        assert!(result.contains("tok"), "should contain token count");
818    }
819
820    #[test]
821    fn benchmark_task_conditioned_compression() {
822        let content = generate_benchmark_code(500);
823        let full_tokens = count_tokens(&content);
824        let task = Some("fix authentication in validate_token");
825
826        let full_output = process_mode(
827            &content,
828            "full",
829            "F1",
830            "server.rs",
831            "rs",
832            full_tokens,
833            CrpMode::Off,
834            "server.rs",
835            task,
836        );
837        let task_output = process_mode(
838            &content,
839            "task",
840            "F1",
841            "server.rs",
842            "rs",
843            full_tokens,
844            CrpMode::Off,
845            "server.rs",
846            task,
847        );
848        let sig_output = process_mode(
849            &content,
850            "signatures",
851            "F1",
852            "server.rs",
853            "rs",
854            full_tokens,
855            CrpMode::Off,
856            "server.rs",
857            task,
858        );
859        let ref_output = process_mode(
860            &content,
861            "reference",
862            "F1",
863            "server.rs",
864            "rs",
865            full_tokens,
866            CrpMode::Off,
867            "server.rs",
868            task,
869        );
870
871        let full_tok = count_tokens(&full_output);
872        let task_tok = count_tokens(&task_output);
873        let sig_tok = count_tokens(&sig_output);
874        let ref_tok = count_tokens(&ref_output);
875
876        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
877        eprintln!("Source: 500-line Rust file, task='fix authentication in validate_token'");
878        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
879        eprintln!(
880            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
881            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
882        );
883        eprintln!(
884            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
885            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
886        );
887        eprintln!(
888            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
889            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
890        );
891        eprintln!("================================================\n");
892
893        assert!(task_tok < full_tok, "task mode should save tokens");
894        assert!(sig_tok < full_tok, "signatures should save tokens");
895        assert!(ref_tok < sig_tok, "reference should be most compact");
896    }
897
898    fn generate_benchmark_code(lines: usize) -> String {
899        let mut code = Vec::with_capacity(lines);
900        code.push("use std::collections::HashMap;".to_string());
901        code.push("use crate::core::auth;".to_string());
902        code.push(String::new());
903        code.push("pub struct Server {".to_string());
904        code.push("    config: Config,".to_string());
905        code.push("    cache: HashMap<String, String>,".to_string());
906        code.push("}".to_string());
907        code.push(String::new());
908        code.push("impl Server {".to_string());
909        code.push(
910            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
911                .to_string(),
912        );
913        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
914        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
915        code.push("            return Err(AuthError::Expired);".to_string());
916        code.push("        }".to_string());
917        code.push("        Ok(decoded.claims)".to_string());
918        code.push("    }".to_string());
919        code.push(String::new());
920
921        let remaining = lines.saturating_sub(code.len());
922        for i in 0..remaining {
923            if i % 30 == 0 {
924                code.push(format!(
925                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
926                ));
927            } else if i % 30 == 29 {
928                code.push("    }".to_string());
929            } else {
930                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
931            }
932        }
933        code.push("}".to_string());
934        code.join("\n")
935    }
936}