Skip to main content

lean_ctx/tools/
ctx_read.rs

1use std::path::Path;
2
3use crate::core::cache::SessionCache;
4use crate::core::compressor;
5use crate::core::deps;
6use crate::core::entropy;
7use crate::core::protocol;
8use crate::core::signatures;
9use crate::core::symbol_map::{self, SymbolMap};
10use crate::core::tokens::count_tokens;
11use crate::tools::CrpMode;
12
13const COMPRESSED_HINT: &str = "[compressed — use mode=\"full\" for complete source]";
14
15fn append_compressed_hint(output: &str, file_path: &str) -> String {
16    format!("{output}\n{COMPRESSED_HINT}\n  ctx_read(\"{file_path}\", mode=\"full\")")
17}
18
19pub fn read_file_lossy(path: &str) -> Result<String, std::io::Error> {
20    let cap = crate::core::limits::max_read_bytes();
21    if let Ok(meta) = std::fs::metadata(path) {
22        if meta.len() > cap as u64 {
23            return Err(std::io::Error::other(format!(
24                "file too large ({} bytes, cap {} via LCTX_MAX_READ_BYTES)",
25                meta.len(),
26                cap
27            )));
28        }
29    }
30    let bytes = std::fs::read(path)?;
31    match String::from_utf8(bytes) {
32        Ok(s) => Ok(s),
33        Err(e) => Ok(String::from_utf8_lossy(e.as_bytes()).into_owned()),
34    }
35}
36
37pub fn handle(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
38    handle_with_options(cache, path, mode, false, crp_mode, None)
39}
40
41pub fn handle_fresh(cache: &mut SessionCache, path: &str, mode: &str, crp_mode: CrpMode) -> String {
42    handle_with_options(cache, path, mode, true, crp_mode, None)
43}
44
45pub fn handle_with_task(
46    cache: &mut SessionCache,
47    path: &str,
48    mode: &str,
49    crp_mode: CrpMode,
50    task: Option<&str>,
51) -> String {
52    handle_with_options(cache, path, mode, false, crp_mode, task)
53}
54
55pub fn handle_with_task_resolved(
56    cache: &mut SessionCache,
57    path: &str,
58    mode: &str,
59    crp_mode: CrpMode,
60    task: Option<&str>,
61) -> (String, String) {
62    handle_with_options_resolved(cache, path, mode, false, crp_mode, task)
63}
64
65pub fn handle_fresh_with_task(
66    cache: &mut SessionCache,
67    path: &str,
68    mode: &str,
69    crp_mode: CrpMode,
70    task: Option<&str>,
71) -> String {
72    handle_with_options(cache, path, mode, true, crp_mode, task)
73}
74
75pub fn handle_fresh_with_task_resolved(
76    cache: &mut SessionCache,
77    path: &str,
78    mode: &str,
79    crp_mode: CrpMode,
80    task: Option<&str>,
81) -> (String, String) {
82    handle_with_options_resolved(cache, path, mode, true, crp_mode, task)
83}
84
85fn handle_with_options(
86    cache: &mut SessionCache,
87    path: &str,
88    mode: &str,
89    fresh: bool,
90    crp_mode: CrpMode,
91    task: Option<&str>,
92) -> String {
93    handle_with_options_resolved(cache, path, mode, fresh, crp_mode, task).0
94}
95
96fn handle_with_options_resolved(
97    cache: &mut SessionCache,
98    path: &str,
99    mode: &str,
100    fresh: bool,
101    crp_mode: CrpMode,
102    task: Option<&str>,
103) -> (String, String) {
104    let file_ref = cache.get_file_ref(path);
105    let short = protocol::shorten_path(path);
106    let ext = Path::new(path)
107        .extension()
108        .and_then(|e| e.to_str())
109        .unwrap_or("");
110
111    if fresh {
112        cache.invalidate(path);
113    }
114
115    if mode == "diff" {
116        return (handle_diff(cache, path, &file_ref), "diff".to_string());
117    }
118
119    if let Some(existing) = cache.get(path) {
120        if mode == "full" {
121            return (
122                handle_full_with_auto_delta(cache, path, &file_ref, &short, ext),
123                "full".to_string(),
124            );
125        }
126        let content = existing.content.clone();
127        let original_tokens = existing.original_tokens;
128        let resolved_mode = if mode == "auto" {
129            resolve_auto_mode(path, original_tokens, task)
130        } else {
131            mode.to_string()
132        };
133        let out = process_mode(
134            &content,
135            &resolved_mode,
136            &file_ref,
137            &short,
138            ext,
139            original_tokens,
140            crp_mode,
141            path,
142            task,
143        );
144        return (out, resolved_mode);
145    }
146
147    let content = match read_file_lossy(path) {
148        Ok(c) => c,
149        Err(e) => return (format!("ERROR: {e}"), "error".to_string()),
150    };
151
152    let similar_hint = find_semantic_similar(path, &content);
153
154    let store_result = cache.store(path, content.clone());
155
156    update_semantic_index(path, &content);
157
158    if mode == "full" {
159        let mut output = format_full_output(
160            &file_ref,
161            &short,
162            ext,
163            &content,
164            store_result.original_tokens,
165            store_result.line_count,
166        );
167        if let Some(hint) = similar_hint {
168            output.push_str(&format!("\n{hint}"));
169        }
170        return (output, "full".to_string());
171    }
172
173    let resolved_mode = if mode == "auto" {
174        resolve_auto_mode(path, store_result.original_tokens, task)
175    } else {
176        mode.to_string()
177    };
178
179    let mut output = process_mode(
180        &content,
181        &resolved_mode,
182        &file_ref,
183        &short,
184        ext,
185        store_result.original_tokens,
186        crp_mode,
187        path,
188        task,
189    );
190    if let Some(hint) = similar_hint {
191        output.push_str(&format!("\n{hint}"));
192    }
193    (output, resolved_mode)
194}
195
196fn resolve_auto_mode(file_path: &str, original_tokens: usize, task: Option<&str>) -> String {
197    let sig = crate::core::mode_predictor::FileSignature::from_path(file_path, original_tokens);
198    let predictor = crate::core::mode_predictor::ModePredictor::new();
199    let mut predicted = predictor
200        .predict_best_mode(&sig)
201        .unwrap_or_else(|| "full".to_string());
202    if predicted == "auto" {
203        predicted = "full".to_string();
204    }
205
206    if let Some(project_root) =
207        crate::core::session::SessionState::load_latest().and_then(|s| s.project_root)
208    {
209        let ext = std::path::Path::new(file_path)
210            .extension()
211            .and_then(|e| e.to_str())
212            .unwrap_or("");
213        let bucket = match original_tokens {
214            0..=2000 => "sm",
215            2001..=10000 => "md",
216            10001..=50000 => "lg",
217            _ => "xl",
218        };
219        let bandit_key = format!("{ext}_{bucket}");
220        let mut store = crate::core::bandit::BanditStore::load(&project_root);
221        let bandit = store.get_or_create(&bandit_key);
222        let arm = bandit.select_arm();
223        if arm.budget_ratio < 0.25 && predicted == "full" && original_tokens > 2000 {
224            predicted = "aggressive".to_string();
225        }
226    }
227
228    if let Some(session) = crate::core::session::SessionState::load_latest() {
229        if let Some(task_type) = session.active_task_type() {
230            predicted = refine_mode_by_task_type(&predicted, task_type, original_tokens);
231        }
232    }
233
234    let policy = crate::core::adaptive_mode_policy::AdaptiveModePolicyStore::load();
235    policy.choose_auto_mode(task, &predicted)
236}
237
238fn refine_mode_by_task_type(
239    current: &str,
240    task_type: crate::core::intent_engine::TaskType,
241    token_count: usize,
242) -> String {
243    use crate::core::intent_engine::TaskType;
244
245    match task_type {
246        TaskType::FixBug | TaskType::Debug => {
247            if token_count > 5000 && current == "full" {
248                return "task".to_string();
249            }
250            current.to_string()
251        }
252        TaskType::Refactor | TaskType::Review => {
253            if token_count > 3000 && current == "full" {
254                return "signatures".to_string();
255            }
256            current.to_string()
257        }
258        TaskType::Generate => {
259            if token_count > 8000 && current == "full" {
260                return "signatures".to_string();
261            }
262            current.to_string()
263        }
264        TaskType::Explore => {
265            if token_count > 5000 && current == "full" {
266                return "map".to_string();
267            }
268            current.to_string()
269        }
270        TaskType::Test => {
271            if token_count > 10000 && current == "full" {
272                return "aggressive".to_string();
273            }
274            current.to_string()
275        }
276        TaskType::Config | TaskType::Deploy => current.to_string(),
277    }
278}
279
280fn find_semantic_similar(path: &str, content: &str) -> Option<String> {
281    let project_root = detect_project_root(path);
282    let index = crate::core::semantic_cache::SemanticCacheIndex::load(&project_root)?;
283
284    let similar = index.find_similar(content, 0.7);
285    let relevant: Vec<_> = similar
286        .into_iter()
287        .filter(|(p, _)| p != path)
288        .take(3)
289        .collect();
290
291    if relevant.is_empty() {
292        return None;
293    }
294
295    let hints: Vec<String> = relevant
296        .iter()
297        .map(|(p, score)| format!("  {p} ({:.0}% similar)", score * 100.0))
298        .collect();
299
300    Some(format!(
301        "[semantic: {} similar file(s) in cache]\n{}",
302        relevant.len(),
303        hints.join("\n")
304    ))
305}
306
307fn update_semantic_index(path: &str, content: &str) {
308    let project_root = detect_project_root(path);
309    let session_id = format!("{}", std::process::id());
310    let mut index = crate::core::semantic_cache::SemanticCacheIndex::load_or_create(&project_root);
311    index.add_file(path, content, &session_id);
312    let _ = index.save(&project_root);
313}
314
315fn detect_project_root(path: &str) -> String {
316    crate::core::protocol::detect_project_root_or_cwd(path)
317}
318
319const AUTO_DELTA_THRESHOLD: f64 = 0.6;
320
321/// Re-reads from disk; if content changed and delta is compact, sends auto-delta.
322fn handle_full_with_auto_delta(
323    cache: &mut SessionCache,
324    path: &str,
325    file_ref: &str,
326    short: &str,
327    ext: &str,
328) -> String {
329    let disk_content = match read_file_lossy(path) {
330        Ok(c) => c,
331        Err(_) => {
332            cache.record_cache_hit(path);
333            let existing = cache.get(path).unwrap();
334            return format!(
335                "[using cached version — file read failed]\n{file_ref}={short} cached {}t {}L",
336                existing.read_count, existing.line_count
337            );
338        }
339    };
340
341    let old_content = cache.get(path).unwrap().content.clone();
342    let store_result = cache.store(path, disk_content.clone());
343
344    if store_result.was_hit {
345        return format!(
346            "{file_ref}={short} cached {}t {}L\nFile already in context from previous read. Use fresh=true to re-read if content needed again.",
347            store_result.read_count, store_result.line_count
348        );
349    }
350
351    let diff = compressor::diff_content(&old_content, &disk_content);
352    let diff_tokens = count_tokens(&diff);
353    let full_tokens = store_result.original_tokens;
354
355    if full_tokens > 0 && (diff_tokens as f64) < (full_tokens as f64 * AUTO_DELTA_THRESHOLD) {
356        let savings = protocol::format_savings(full_tokens, diff_tokens);
357        return format!(
358            "{file_ref}={short} [auto-delta] ∆{}L\n{diff}\n{savings}",
359            disk_content.lines().count()
360        );
361    }
362
363    format_full_output(
364        file_ref,
365        short,
366        ext,
367        &disk_content,
368        store_result.original_tokens,
369        store_result.line_count,
370    )
371}
372
373fn format_full_output(
374    file_ref: &str,
375    short: &str,
376    ext: &str,
377    content: &str,
378    original_tokens: usize,
379    line_count: usize,
380) -> String {
381    let tokens = original_tokens;
382    let metadata = build_header(file_ref, short, ext, content, line_count, true);
383
384    let mut sym = SymbolMap::new();
385    let idents = symbol_map::extract_identifiers(content, ext);
386    for ident in &idents {
387        sym.register(ident);
388    }
389
390    let sym_beneficial = if sym.len() >= 3 {
391        let sym_table = sym.format_table();
392        let compressed = sym.apply(content);
393        let original_tok = count_tokens(content);
394        let compressed_tok = count_tokens(&compressed) + count_tokens(&sym_table);
395        let net_saving = original_tok.saturating_sub(compressed_tok);
396        original_tok > 0 && net_saving * 100 / original_tok >= 5
397    } else {
398        false
399    };
400
401    if sym_beneficial {
402        let compressed_content = sym.apply(content);
403        let sym_table = sym.format_table();
404        let output = format!("{compressed_content}{sym_table}\n{metadata}");
405        let sent = count_tokens(&output);
406        let savings = protocol::format_savings(tokens, sent);
407        return format!("{output}\n{savings}");
408    }
409
410    let output = format!("{content}\n{metadata}");
411    let sent = count_tokens(&output);
412    let savings = protocol::format_savings(tokens, sent);
413    format!("{output}\n{savings}")
414}
415
416fn build_header(
417    file_ref: &str,
418    short: &str,
419    ext: &str,
420    content: &str,
421    line_count: usize,
422    include_deps: bool,
423) -> String {
424    let mut header = format!("{file_ref}={short} {line_count}L");
425
426    if include_deps {
427        let dep_info = deps::extract_deps(content, ext);
428        if !dep_info.imports.is_empty() {
429            let imports_str: Vec<&str> = dep_info
430                .imports
431                .iter()
432                .take(8)
433                .map(|s| s.as_str())
434                .collect();
435            header.push_str(&format!("\n deps {}", imports_str.join(",")));
436        }
437        if !dep_info.exports.is_empty() {
438            let exports_str: Vec<&str> = dep_info
439                .exports
440                .iter()
441                .take(8)
442                .map(|s| s.as_str())
443                .collect();
444            header.push_str(&format!("\n exports {}", exports_str.join(",")));
445        }
446    }
447
448    header
449}
450
451#[allow(clippy::too_many_arguments)]
452fn process_mode(
453    content: &str,
454    mode: &str,
455    file_ref: &str,
456    short: &str,
457    ext: &str,
458    original_tokens: usize,
459    crp_mode: CrpMode,
460    file_path: &str,
461    task: Option<&str>,
462) -> String {
463    let line_count = content.lines().count();
464
465    match mode {
466        "auto" => {
467            let chosen = resolve_auto_mode(file_path, original_tokens, task);
468            process_mode(
469                content,
470                &chosen,
471                file_ref,
472                short,
473                ext,
474                original_tokens,
475                crp_mode,
476                file_path,
477                task,
478            )
479        }
480        "signatures" => {
481            let sigs = signatures::extract_signatures(content, ext);
482            let dep_info = deps::extract_deps(content, ext);
483
484            let mut output = format!("{file_ref}={short} {line_count}L");
485            if !dep_info.imports.is_empty() {
486                let imports_str: Vec<&str> = dep_info
487                    .imports
488                    .iter()
489                    .take(8)
490                    .map(|s| s.as_str())
491                    .collect();
492                output.push_str(&format!("\n deps {}", imports_str.join(",")));
493            }
494            for sig in &sigs {
495                output.push('\n');
496                if crp_mode.is_tdd() {
497                    output.push_str(&sig.to_tdd());
498                } else {
499                    output.push_str(&sig.to_compact());
500                }
501            }
502            let sent = count_tokens(&output);
503            let savings = protocol::format_savings(original_tokens, sent);
504            append_compressed_hint(&format!("{output}\n{savings}"), file_path)
505        }
506        "map" => {
507            if ext == "php" {
508                if let Some(php_map) = crate::core::patterns::php::compress_php_map(content, short)
509                {
510                    let mut output = format!("{file_ref}={short} {line_count}L\n{php_map}");
511                    let sent = count_tokens(&output);
512                    let savings = protocol::format_savings(original_tokens, sent);
513                    output.push('\n');
514                    output.push_str(&savings);
515                    return append_compressed_hint(&output, file_path);
516                }
517            }
518
519            let sigs = signatures::extract_signatures(content, ext);
520            let dep_info = deps::extract_deps(content, ext);
521
522            let mut output = format!("{file_ref}={short} {line_count}L");
523
524            if !dep_info.imports.is_empty() {
525                output.push_str("\n  deps: ");
526                output.push_str(&dep_info.imports.join(", "));
527            }
528
529            if !dep_info.exports.is_empty() {
530                output.push_str("\n  exports: ");
531                output.push_str(&dep_info.exports.join(", "));
532            }
533
534            let key_sigs: Vec<&signatures::Signature> = sigs
535                .iter()
536                .filter(|s| s.is_exported || s.indent == 0)
537                .collect();
538
539            if !key_sigs.is_empty() {
540                output.push_str("\n  API:");
541                for sig in &key_sigs {
542                    output.push_str("\n    ");
543                    if crp_mode.is_tdd() {
544                        output.push_str(&sig.to_tdd());
545                    } else {
546                        output.push_str(&sig.to_compact());
547                    }
548                }
549            }
550
551            let sent = count_tokens(&output);
552            let savings = protocol::format_savings(original_tokens, sent);
553            append_compressed_hint(&format!("{output}\n{savings}"), file_path)
554        }
555        "aggressive" => {
556            #[cfg(feature = "tree-sitter")]
557            let ast_pruned = crate::core::signatures_ts::ast_prune(content, ext);
558            #[cfg(not(feature = "tree-sitter"))]
559            let ast_pruned: Option<String> = None;
560
561            let base = ast_pruned.as_deref().unwrap_or(content);
562
563            let session_intent = crate::core::session::SessionState::load_latest()
564                .and_then(|s| s.active_structured_intent);
565            let raw = if let Some(ref intent) = session_intent {
566                compressor::task_aware_compress(base, Some(ext), intent)
567            } else {
568                compressor::aggressive_compress(base, Some(ext))
569            };
570            let compressed = compressor::safeguard_ratio(content, &raw);
571            let header = build_header(file_ref, short, ext, content, line_count, true);
572
573            let mut sym = SymbolMap::new();
574            let idents = symbol_map::extract_identifiers(&compressed, ext);
575            for ident in &idents {
576                sym.register(ident);
577            }
578
579            let sym_beneficial = if sym.len() >= 3 {
580                let sym_table = sym.format_table();
581                let sym_applied = sym.apply(&compressed);
582                let orig_tok = count_tokens(&compressed);
583                let comp_tok = count_tokens(&sym_applied) + count_tokens(&sym_table);
584                let net = orig_tok.saturating_sub(comp_tok);
585                orig_tok > 0 && net * 100 / orig_tok >= 5
586            } else {
587                false
588            };
589
590            if sym_beneficial {
591                let sym_output = sym.apply(&compressed);
592                let sym_table = sym.format_table();
593                let sent = count_tokens(&sym_output) + count_tokens(&sym_table);
594                let savings = protocol::format_savings(original_tokens, sent);
595                return append_compressed_hint(
596                    &format!("{header}\n{sym_output}{sym_table}\n{savings}"),
597                    file_path,
598                );
599            }
600
601            let sent = count_tokens(&compressed);
602            let savings = protocol::format_savings(original_tokens, sent);
603            append_compressed_hint(&format!("{header}\n{compressed}\n{savings}"), file_path)
604        }
605        "entropy" => {
606            let result = entropy::entropy_compress_adaptive(content, file_path);
607            let avg_h = entropy::analyze_entropy(content).avg_entropy;
608            let header = build_header(file_ref, short, ext, content, line_count, false);
609            let techs = result.techniques.join(", ");
610            let output = format!("{header} H̄={avg_h:.1} [{techs}]\n{}", result.output);
611            let sent = count_tokens(&output);
612            let savings = protocol::format_savings(original_tokens, sent);
613            let compression_ratio = if original_tokens > 0 {
614                1.0 - (sent as f64 / original_tokens as f64)
615            } else {
616                0.0
617            };
618            crate::core::adaptive_thresholds::report_bandit_outcome(compression_ratio > 0.15);
619            append_compressed_hint(&format!("{output}\n{savings}"), file_path)
620        }
621        "task" => {
622            let task_str = task.unwrap_or("");
623            if task_str.is_empty() {
624                let header = build_header(file_ref, short, ext, content, line_count, true);
625                return format!("{header}\n{content}\n[task mode: no task set — returned full]");
626            }
627            let (_files, keywords) = crate::core::task_relevance::parse_task_hints(task_str);
628            if keywords.is_empty() {
629                let header = build_header(file_ref, short, ext, content, line_count, true);
630                return format!(
631                    "{header}\n{content}\n[task mode: no keywords extracted — returned full]"
632                );
633            }
634            let classified_type = crate::core::intent_engine::classify(task_str).task_type;
635            let filtered = crate::core::task_relevance::information_bottleneck_filter_typed(
636                content,
637                &keywords,
638                0.3,
639                Some(classified_type),
640            );
641            let filtered_lines = filtered.lines().count();
642            let header = format!(
643                "{file_ref}={short} {line_count}L [task-filtered: {line_count}→{filtered_lines}]"
644            );
645            let project_root = detect_project_root(file_path);
646            let graph_ctx = crate::core::graph_context::build_graph_context(
647                file_path,
648                &project_root,
649                Some(crate::core::graph_context::GraphContextOptions::default()),
650            )
651            .map(|c| crate::core::graph_context::format_graph_context(&c))
652            .unwrap_or_default();
653
654            let sent = count_tokens(&filtered) + count_tokens(&header) + count_tokens(&graph_ctx);
655            let savings = protocol::format_savings(original_tokens, sent);
656            append_compressed_hint(
657                &format!("{header}\n{filtered}{graph_ctx}\n{savings}"),
658                file_path,
659            )
660        }
661        "reference" => {
662            let tok = count_tokens(content);
663            let output = format!("{file_ref}={short}: {line_count} lines, {tok} tok ({ext})");
664            let sent = count_tokens(&output);
665            let savings = protocol::format_savings(original_tokens, sent);
666            format!("{output}\n{savings}")
667        }
668        mode if mode.starts_with("lines:") => {
669            let range_str = &mode[6..];
670            let extracted = extract_line_range(content, range_str);
671            let header = format!("{file_ref}={short} {line_count}L lines:{range_str}");
672            let sent = count_tokens(&extracted);
673            let savings = protocol::format_savings(original_tokens, sent);
674            format!("{header}\n{extracted}\n{savings}")
675        }
676        unknown => {
677            let header = build_header(file_ref, short, ext, content, line_count, true);
678            format!(
679                "[WARNING: unknown mode '{unknown}', falling back to full]\n{header}\n{content}"
680            )
681        }
682    }
683}
684
685fn extract_line_range(content: &str, range_str: &str) -> String {
686    let lines: Vec<&str> = content.lines().collect();
687    let total = lines.len();
688    let mut selected = Vec::new();
689
690    for part in range_str.split(',') {
691        let part = part.trim();
692        if let Some((start_s, end_s)) = part.split_once('-') {
693            let start = start_s.trim().parse::<usize>().unwrap_or(1).max(1);
694            let end = end_s.trim().parse::<usize>().unwrap_or(total).min(total);
695            for i in start..=end {
696                if i >= 1 && i <= total {
697                    selected.push(format!("{i:>4}| {}", lines[i - 1]));
698                }
699            }
700        } else if let Ok(n) = part.parse::<usize>() {
701            if n >= 1 && n <= total {
702                selected.push(format!("{n:>4}| {}", lines[n - 1]));
703            }
704        }
705    }
706
707    if selected.is_empty() {
708        "No lines matched the range.".to_string()
709    } else {
710        selected.join("\n")
711    }
712}
713
714fn handle_diff(cache: &mut SessionCache, path: &str, file_ref: &str) -> String {
715    let short = protocol::shorten_path(path);
716    let old_content = cache.get(path).map(|e| e.content.clone());
717
718    let new_content = match read_file_lossy(path) {
719        Ok(c) => c,
720        Err(e) => return format!("ERROR: {e}"),
721    };
722
723    let original_tokens = count_tokens(&new_content);
724
725    let diff_output = if let Some(old) = &old_content {
726        compressor::diff_content(old, &new_content)
727    } else {
728        format!("[first read]\n{new_content}")
729    };
730
731    cache.store(path, new_content);
732
733    let sent = count_tokens(&diff_output);
734    let savings = protocol::format_savings(original_tokens, sent);
735    format!("{file_ref}={short} [diff]\n{diff_output}\n{savings}")
736}
737
738#[cfg(test)]
739mod tests {
740    use super::*;
741
742    #[test]
743    fn test_header_toon_format_no_brackets() {
744        let content = "use std::io;\nfn main() {}\n";
745        let header = build_header("F1", "main.rs", "rs", content, 2, false);
746        assert!(!header.contains('['));
747        assert!(!header.contains(']'));
748        assert!(header.contains("F1=main.rs 2L"));
749    }
750
751    #[test]
752    fn test_header_toon_deps_indented() {
753        let content = "use crate::core::cache;\nuse crate::tools;\npub fn main() {}\n";
754        let header = build_header("F1", "main.rs", "rs", content, 3, true);
755        if header.contains("deps") {
756            assert!(
757                header.contains("\n deps "),
758                "deps should use indented TOON format"
759            );
760            assert!(
761                !header.contains("deps:["),
762                "deps should not use bracket format"
763            );
764        }
765    }
766
767    #[test]
768    fn test_header_toon_saves_tokens() {
769        let content = "use crate::foo;\nuse crate::bar;\npub fn baz() {}\npub fn qux() {}\n";
770        let old_header = "F1=main.rs [4L +] deps:[foo,bar] exports:[baz,qux]".to_string();
771        let new_header = build_header("F1", "main.rs", "rs", content, 4, true);
772        let old_tokens = count_tokens(&old_header);
773        let new_tokens = count_tokens(&new_header);
774        assert!(
775            new_tokens <= old_tokens,
776            "TOON header ({new_tokens} tok) should be <= old format ({old_tokens} tok)"
777        );
778    }
779
780    #[test]
781    fn test_tdd_symbols_are_compact() {
782        let symbols = [
783            "⊕", "⊖", "∆", "→", "⇒", "✓", "✗", "⚠", "λ", "§", "∂", "τ", "ε",
784        ];
785        for sym in &symbols {
786            let tok = count_tokens(sym);
787            assert!(tok <= 2, "Symbol {sym} should be 1-2 tokens, got {tok}");
788        }
789    }
790
791    #[test]
792    fn test_task_mode_filters_content() {
793        let content = (0..200)
794            .map(|i| {
795                if i % 20 == 0 {
796                    format!("fn validate_token(token: &str) -> bool {{ /* line {i} */ }}")
797                } else {
798                    format!("fn unrelated_helper_{i}(x: i32) -> i32 {{ x + {i} }}")
799                }
800            })
801            .collect::<Vec<_>>()
802            .join("\n");
803        let full_tokens = count_tokens(&content);
804        let task = Some("fix bug in validate_token");
805        let result = process_mode(
806            &content,
807            "task",
808            "F1",
809            "test.rs",
810            "rs",
811            full_tokens,
812            CrpMode::Off,
813            "test.rs",
814            task,
815        );
816        let result_tokens = count_tokens(&result);
817        assert!(
818            result_tokens < full_tokens,
819            "task mode ({result_tokens} tok) should be less than full ({full_tokens} tok)"
820        );
821        assert!(
822            result.contains("task-filtered"),
823            "output should contain task-filtered marker"
824        );
825    }
826
827    #[test]
828    fn test_task_mode_without_task_returns_full() {
829        let content = "fn main() {}\nfn helper() {}\n";
830        let tokens = count_tokens(content);
831        let result = process_mode(
832            content,
833            "task",
834            "F1",
835            "test.rs",
836            "rs",
837            tokens,
838            CrpMode::Off,
839            "test.rs",
840            None,
841        );
842        assert!(
843            result.contains("no task set"),
844            "should indicate no task: {result}"
845        );
846    }
847
848    #[test]
849    fn test_reference_mode_one_line() {
850        let content = "fn main() {}\nfn helper() {}\nfn other() {}\n";
851        let tokens = count_tokens(content);
852        let result = process_mode(
853            content,
854            "reference",
855            "F1",
856            "test.rs",
857            "rs",
858            tokens,
859            CrpMode::Off,
860            "test.rs",
861            None,
862        );
863        let lines: Vec<&str> = result.lines().collect();
864        assert!(
865            lines.len() <= 3,
866            "reference mode should be very compact, got {} lines",
867            lines.len()
868        );
869        assert!(result.contains("lines"), "should contain line count");
870        assert!(result.contains("tok"), "should contain token count");
871    }
872
873    #[test]
874    fn benchmark_task_conditioned_compression() {
875        let content = generate_benchmark_code(500);
876        let full_tokens = count_tokens(&content);
877        let task = Some("fix authentication in validate_token");
878
879        let full_output = process_mode(
880            &content,
881            "full",
882            "F1",
883            "server.rs",
884            "rs",
885            full_tokens,
886            CrpMode::Off,
887            "server.rs",
888            task,
889        );
890        let task_output = process_mode(
891            &content,
892            "task",
893            "F1",
894            "server.rs",
895            "rs",
896            full_tokens,
897            CrpMode::Off,
898            "server.rs",
899            task,
900        );
901        let sig_output = process_mode(
902            &content,
903            "signatures",
904            "F1",
905            "server.rs",
906            "rs",
907            full_tokens,
908            CrpMode::Off,
909            "server.rs",
910            task,
911        );
912        let ref_output = process_mode(
913            &content,
914            "reference",
915            "F1",
916            "server.rs",
917            "rs",
918            full_tokens,
919            CrpMode::Off,
920            "server.rs",
921            task,
922        );
923
924        let full_tok = count_tokens(&full_output);
925        let task_tok = count_tokens(&task_output);
926        let sig_tok = count_tokens(&sig_output);
927        let ref_tok = count_tokens(&ref_output);
928
929        eprintln!("\n=== Task-Conditioned Compression Benchmark ===");
930        eprintln!("Source: 500-line Rust file, task='fix authentication in validate_token'");
931        eprintln!("  full:       {full_tok:>6} tokens (baseline)");
932        eprintln!(
933            "  task:       {task_tok:>6} tokens ({:.0}% savings)",
934            (1.0 - task_tok as f64 / full_tok as f64) * 100.0
935        );
936        eprintln!(
937            "  signatures: {sig_tok:>6} tokens ({:.0}% savings)",
938            (1.0 - sig_tok as f64 / full_tok as f64) * 100.0
939        );
940        eprintln!(
941            "  reference:  {ref_tok:>6} tokens ({:.0}% savings)",
942            (1.0 - ref_tok as f64 / full_tok as f64) * 100.0
943        );
944        eprintln!("================================================\n");
945
946        assert!(task_tok < full_tok, "task mode should save tokens");
947        assert!(sig_tok < full_tok, "signatures should save tokens");
948        assert!(ref_tok < sig_tok, "reference should be most compact");
949    }
950
951    fn generate_benchmark_code(lines: usize) -> String {
952        let mut code = Vec::with_capacity(lines);
953        code.push("use std::collections::HashMap;".to_string());
954        code.push("use crate::core::auth;".to_string());
955        code.push(String::new());
956        code.push("pub struct Server {".to_string());
957        code.push("    config: Config,".to_string());
958        code.push("    cache: HashMap<String, String>,".to_string());
959        code.push("}".to_string());
960        code.push(String::new());
961        code.push("impl Server {".to_string());
962        code.push(
963            "    pub fn validate_token(&self, token: &str) -> Result<Claims, AuthError> {"
964                .to_string(),
965        );
966        code.push("        let decoded = auth::decode_jwt(token)?;".to_string());
967        code.push("        if decoded.exp < chrono::Utc::now().timestamp() {".to_string());
968        code.push("            return Err(AuthError::Expired);".to_string());
969        code.push("        }".to_string());
970        code.push("        Ok(decoded.claims)".to_string());
971        code.push("    }".to_string());
972        code.push(String::new());
973
974        let remaining = lines.saturating_sub(code.len());
975        for i in 0..remaining {
976            if i % 30 == 0 {
977                code.push(format!(
978                    "    pub fn handler_{i}(&self, req: Request) -> Response {{"
979                ));
980            } else if i % 30 == 29 {
981                code.push("    }".to_string());
982            } else {
983                code.push(format!("        let val_{i} = self.cache.get(\"key_{i}\").unwrap_or(&\"default\".to_string());"));
984            }
985        }
986        code.push("}".to_string());
987        code.join("\n")
988    }
989}