lean_ctx/tools/
ctx_analyze.rs1use std::path::Path;
2
3use crate::core::compressor;
4use crate::core::entropy;
5use crate::core::signatures;
6use crate::core::symbol_map::{self, SymbolMap};
7use crate::core::tokens::count_tokens;
8use crate::tools::CrpMode;
9
10pub fn handle(path: &str, crp_mode: CrpMode) -> String {
11 let content = match std::fs::read_to_string(path) {
12 Ok(c) => c,
13 Err(e) => return format!("ERROR: {e}"),
14 };
15
16 let short = crate::core::protocol::shorten_path(path);
17 let ext = Path::new(path)
18 .extension()
19 .and_then(|e| e.to_str())
20 .unwrap_or("");
21
22 let line_count = content.lines().count();
23 let raw_tokens = count_tokens(&content);
24 let analysis = entropy::analyze_entropy(&content);
25 let entropy_result = entropy::entropy_compress(&content);
26
27 let sigs = signatures::extract_signatures(&content, ext);
28 let sig_output: String = sigs
29 .iter()
30 .map(|s| {
31 if crp_mode.is_tdd() {
32 s.to_tdd()
33 } else {
34 s.to_compact()
35 }
36 })
37 .collect::<Vec<_>>()
38 .join("\n");
39 let sig_tokens = count_tokens(&sig_output);
40
41 let aggressive = compressor::aggressive_compress(&content, Some(ext));
42 let agg_tokens = count_tokens(&aggressive);
43
44 let cache_tokens = 13usize;
45
46 let mut sections = Vec::new();
47 sections.push(format!(
48 "ANALYSIS: {short} ({line_count}L, {raw_tokens} tok)\n"
49 ));
50
51 sections.push("Entropy Distribution:".to_string());
52 sections.push(format!(" H̄ = {:.1} bits/char", analysis.avg_entropy));
53 sections.push(format!(
54 " Low-entropy (H<2.0): {} lines ({:.0}%)",
55 analysis.low_entropy_count,
56 if analysis.total_lines > 0 {
57 analysis.low_entropy_count as f64 / analysis.total_lines as f64 * 100.0
58 } else {
59 0.0
60 }
61 ));
62 sections.push(format!(
63 " High-entropy (H>4.0): {} lines ({:.0}%)",
64 analysis.high_entropy_count,
65 if analysis.total_lines > 0 {
66 analysis.high_entropy_count as f64 / analysis.total_lines as f64 * 100.0
67 } else {
68 0.0
69 }
70 ));
71
72 sections.push(String::new());
73 sections.push("Strategy Comparison:".to_string());
74 sections.push(format_strategy("raw", raw_tokens, raw_tokens));
75 sections.push(format_strategy("aggressive", agg_tokens, raw_tokens));
76
77 let sig_label = if crp_mode.is_tdd() {
78 "signatures (tdd)"
79 } else {
80 "signatures"
81 };
82 sections.push(format_strategy(sig_label, sig_tokens, raw_tokens));
83
84 sections.push(format_strategy(
85 "entropy",
86 entropy_result.compressed_tokens,
87 raw_tokens,
88 ));
89
90 if crp_mode.is_tdd() {
91 let mut sym = SymbolMap::new();
92 let idents = symbol_map::extract_identifiers(&content, ext);
93 for ident in &idents {
94 sym.register(ident);
95 }
96 let tdd_agg = sym.apply(&aggressive);
97 let tdd_table = sym.format_table();
98 let tdd_agg_tokens = count_tokens(&tdd_agg) + count_tokens(&tdd_table);
99 sections.push(format_strategy(
100 "aggressive + §MAP",
101 tdd_agg_tokens,
102 raw_tokens,
103 ));
104 }
105
106 sections.push(format_strategy("cache hit", cache_tokens, raw_tokens));
107
108 sections.push(String::new());
109
110 let mut strategies = vec![
111 ("signatures", sig_tokens),
112 ("entropy", entropy_result.compressed_tokens),
113 ("aggressive", agg_tokens),
114 ];
115 if crp_mode.is_tdd() {
116 strategies.push(("signatures (tdd)", sig_tokens));
117 }
118 if let Some(best) = strategies.iter().min_by_key(|(_, t)| *t) {
119 sections.push(format!(
120 "Recommendation: {} (best first-read savings)",
121 best.0
122 ));
123 }
124
125 let k = entropy::kolmogorov_proxy(&content);
126 let k_class = entropy::compressibility_class(&content);
127 sections.push(format!(
128 "Kolmogorov proxy: K={k:.3} — compressibility: {}",
129 k_class.label()
130 ));
131
132 sections.join("\n")
133}
134
135fn format_strategy(name: &str, tokens: usize, baseline: usize) -> String {
136 if tokens >= baseline {
137 format!(" {name:<24} {tokens:>6} tok —")
138 } else {
139 let pct = ((baseline - tokens) as f64 / baseline as f64 * 100.0).round() as usize;
140 format!(" {name:<24} {tokens:>6} tok -{pct}%")
141 }
142}