Skip to main content

lean_ctx/tools/
ctx_analyze.rs

1use std::path::Path;
2
3use crate::core::compressor;
4use crate::core::entropy;
5use crate::core::signatures;
6use crate::core::symbol_map::{self, SymbolMap};
7use crate::core::tokens::count_tokens;
8use crate::tools::CrpMode;
9
10pub fn handle(path: &str, crp_mode: CrpMode) -> String {
11    let content = match std::fs::read_to_string(path) {
12        Ok(c) => c,
13        Err(e) => return format!("ERROR: {e}"),
14    };
15
16    let short = crate::core::protocol::shorten_path(path);
17    let ext = Path::new(path)
18        .extension()
19        .and_then(|e| e.to_str())
20        .unwrap_or("");
21
22    let line_count = content.lines().count();
23    let raw_tokens = count_tokens(&content);
24    let analysis = entropy::analyze_entropy(&content);
25    let entropy_result = entropy::entropy_compress(&content);
26
27    let sigs = signatures::extract_signatures(&content, ext);
28    let sig_output: String = sigs
29        .iter()
30        .map(|s| {
31            if crp_mode.is_tdd() {
32                s.to_tdd()
33            } else {
34                s.to_compact()
35            }
36        })
37        .collect::<Vec<_>>()
38        .join("\n");
39    let sig_tokens = count_tokens(&sig_output);
40
41    let aggressive = compressor::aggressive_compress(&content, Some(ext));
42    let agg_tokens = count_tokens(&aggressive);
43
44    let cache_tokens = 13usize;
45
46    let mut sections = Vec::new();
47    sections.push(format!(
48        "ANALYSIS: {short} ({line_count}L, {raw_tokens} tok)\n"
49    ));
50
51    sections.push("Entropy Distribution:".to_string());
52    sections.push(format!("  H̄ = {:.1} bits/char", analysis.avg_entropy));
53    sections.push(format!(
54        "  Low-entropy (H<2.0): {} lines ({:.0}%)",
55        analysis.low_entropy_count,
56        if analysis.total_lines > 0 {
57            analysis.low_entropy_count as f64 / analysis.total_lines as f64 * 100.0
58        } else {
59            0.0
60        }
61    ));
62    sections.push(format!(
63        "  High-entropy (H>4.0): {} lines ({:.0}%)",
64        analysis.high_entropy_count,
65        if analysis.total_lines > 0 {
66            analysis.high_entropy_count as f64 / analysis.total_lines as f64 * 100.0
67        } else {
68            0.0
69        }
70    ));
71
72    sections.push(String::new());
73    sections.push("Strategy Comparison:".to_string());
74    sections.push(format_strategy("raw", raw_tokens, raw_tokens));
75    sections.push(format_strategy("aggressive", agg_tokens, raw_tokens));
76
77    let sig_label = if crp_mode.is_tdd() {
78        "signatures (tdd)"
79    } else {
80        "signatures"
81    };
82    sections.push(format_strategy(sig_label, sig_tokens, raw_tokens));
83
84    sections.push(format_strategy(
85        "entropy",
86        entropy_result.compressed_tokens,
87        raw_tokens,
88    ));
89
90    if crp_mode.is_tdd() {
91        let mut sym = SymbolMap::new();
92        let idents = symbol_map::extract_identifiers(&content, ext);
93        for ident in &idents {
94            sym.register(ident);
95        }
96        let tdd_agg = sym.apply(&aggressive);
97        let tdd_table = sym.format_table();
98        let tdd_agg_tokens = count_tokens(&tdd_agg) + count_tokens(&tdd_table);
99        sections.push(format_strategy(
100            "aggressive + §MAP",
101            tdd_agg_tokens,
102            raw_tokens,
103        ));
104    }
105
106    sections.push(format_strategy("cache hit", cache_tokens, raw_tokens));
107
108    sections.push(String::new());
109
110    let mut strategies = vec![
111        ("signatures", sig_tokens),
112        ("entropy", entropy_result.compressed_tokens),
113        ("aggressive", agg_tokens),
114    ];
115    if crp_mode.is_tdd() {
116        strategies.push(("signatures (tdd)", sig_tokens));
117    }
118    if let Some(best) = strategies.iter().min_by_key(|(_, t)| *t) {
119        sections.push(format!(
120            "Recommendation: {} (best first-read savings)",
121            best.0
122        ));
123    }
124
125    let k = entropy::kolmogorov_proxy(&content);
126    let k_class = entropy::compressibility_class(&content);
127    sections.push(format!(
128        "Kolmogorov proxy: K={k:.3} — compressibility: {}",
129        k_class.label()
130    ));
131
132    sections.join("\n")
133}
134
135fn format_strategy(name: &str, tokens: usize, baseline: usize) -> String {
136    if tokens >= baseline {
137        format!("  {name:<24} {tokens:>6} tok  —")
138    } else {
139        let pct = ((baseline - tokens) as f64 / baseline as f64 * 100.0).round() as usize;
140        format!("  {name:<24} {tokens:>6} tok  -{pct}%")
141    }
142}