Skip to main content

lean_ctx/core/benchmark_compare/
report.rs

1use super::competitors::CompetitorProfile;
2use super::metrics::{self, ComparativeMetrics};
3use super::system_info::SystemInfo;
4
5#[derive(Debug)]
6pub struct CompareReport {
7    pub metrics: ComparativeMetrics,
8    pub system: SystemInfo,
9    pub competitors: Vec<CompetitorProfile>,
10}
11
12pub fn generate_markdown(report: &CompareReport) -> String {
13    let mut out = Vec::new();
14
15    write_header(&mut out, report);
16    write_methodology(&mut out);
17    write_compression_table(&mut out, report);
18    write_mode_detail(&mut out, report);
19    write_search_latency(&mut out, report);
20    write_cold_start(&mut out, report);
21    write_disk_footprint(&mut out, report);
22    write_feature_matrix(&mut out, report);
23    write_session_simulation(&mut out, report);
24    write_chart(&mut out, report);
25    write_system_info(&mut out, &report.system);
26    write_reproducibility(&mut out);
27    write_footer(&mut out);
28
29    out.join("\n")
30}
31
32pub fn generate_terminal(report: &CompareReport) -> String {
33    let mut out = Vec::new();
34    let sep = "\u{2550}".repeat(72);
35
36    out.push(sep.clone());
37    out.push("  lean-ctx Head-to-Head Benchmark".to_string());
38    out.push(sep.clone());
39    out.push(String::new());
40
41    out.push("  Compression Comparison:".to_string());
42    out.push(format!(
43        "  {:<28} {:>12} {:>12} {:>10}",
44        "Tool", "Compression", "Search", "Features"
45    ));
46    out.push(format!("  {}", "\u{2500}".repeat(66)));
47
48    for c in &report.competitors {
49        let comp = c
50            .compression_pct
51            .map_or("N/A".to_string(), |p| format!("{p:.0}%"));
52        let search = if c.supports_search { "Yes" } else { "No" };
53        out.push(format!(
54            "  {:<28} {:>12} {:>12} {:>10}",
55            c.name, comp, search, c.feature_count
56        ));
57    }
58
59    for mc in &report.metrics.mode_comparisons {
60        if mc.mode == "full" {
61            continue;
62        }
63        out.push(format!(
64            "  {:<28} {:>11.1}% {:>12} {:>10}",
65            format!("lean-ctx ({})", mc.mode),
66            mc.avg_compression_pct,
67            "Yes",
68            report.metrics.feature_count
69        ));
70    }
71
72    out.push(String::new());
73
74    let avg_search = metrics::avg_search_latency_us(&report.metrics.search_latencies);
75    out.push(format!(
76        "  Avg search latency: {}",
77        metrics::format_duration_us(avg_search)
78    ));
79    out.push(format!(
80        "  Cold start: {}",
81        metrics::format_duration_us(report.metrics.cold_start.total_us)
82    ));
83    out.push(format!(
84        "  Disk footprint: {}",
85        metrics::format_bytes(report.metrics.disk_footprint.total_index_bytes)
86    ));
87
88    out.push(sep);
89    out.join("\n")
90}
91
92fn write_header(out: &mut Vec<String>, report: &CompareReport) {
93    out.push("# lean-ctx Benchmark: Head-to-Head Comparison".to_string());
94    out.push(String::new());
95    out.push(format!(
96        "> Generated by lean-ctx v{} on {}",
97        report.system.lean_ctx_version,
98        chrono::Local::now().format("%Y-%m-%d %H:%M:%S")
99    ));
100    out.push(String::new());
101    out.push(format!(
102        "**Project:** `{}`  ",
103        report.metrics.project_benchmark.root
104    ));
105    out.push(format!(
106        "**Files measured:** {}  ",
107        report.metrics.project_benchmark.files_measured
108    ));
109    out.push(format!(
110        "**Total raw tokens:** {}  ",
111        format_num(report.metrics.project_benchmark.total_raw_tokens)
112    ));
113    out.push(String::new());
114}
115
116fn write_methodology(out: &mut Vec<String>) {
117    out.push("## Methodology".to_string());
118    out.push(String::new());
119    out.push(
120        "All lean-ctx measurements are **real values** measured on the test repository. \
121              Competitor numbers use **published figures** from their official documentation, \
122              papers, or README files. Sources are cited in the comparison table."
123            .to_string(),
124    );
125    out.push(String::new());
126    out.push("- **Token counting**: tiktoken `o200k_base` (GPT-4o tokenizer)".to_string());
127    out.push("- **Compression**: Each lean-ctx read mode is applied to the same files".to_string());
128    out.push("- **Latency**: Wall-clock time, median of all measured files".to_string());
129    out.push("- **Quality**: Preservation score (structural + semantic fidelity)".to_string());
130    out.push(String::new());
131}
132
133fn write_compression_table(out: &mut Vec<String>, report: &CompareReport) {
134    out.push("## Compression Comparison".to_string());
135    out.push(String::new());
136    out.push("| Tool | Compression | Tokens | Source |".to_string());
137    out.push("|------|------------:|-------:|--------|".to_string());
138
139    let raw_tokens = report.metrics.project_benchmark.total_raw_tokens;
140
141    for c in &report.competitors {
142        let comp = c
143            .compression_pct
144            .map_or("N/A".to_string(), |p| format!("{p:.0}%"));
145        let tokens = c.compression_pct.map_or("—".to_string(), |p| {
146            format_num((raw_tokens as f64 * (1.0 - p / 100.0)) as usize)
147        });
148        out.push(format!(
149            "| {} | {} | {} | {} |",
150            c.name, comp, tokens, c.source
151        ));
152    }
153
154    for mc in &report.metrics.mode_comparisons {
155        if mc.mode == "full" {
156            continue;
157        }
158        out.push(format!(
159            "| **lean-ctx ({})** | **{:.1}%** | **{}** | Measured |",
160            mc.mode,
161            mc.avg_compression_pct,
162            format_num(mc.total_compressed_tokens),
163        ));
164    }
165
166    out.push(String::new());
167}
168
169fn write_mode_detail(out: &mut Vec<String>, report: &CompareReport) {
170    out.push("## lean-ctx Mode Performance".to_string());
171    out.push(String::new());
172    out.push("| Mode | Compression | Latency | Quality | Use Case |".to_string());
173    out.push("|------|------------:|--------:|--------:|----------|".to_string());
174
175    for mc in &report.metrics.mode_comparisons {
176        let use_case = match mc.mode.as_str() {
177            "full" => "Editing files (cached, ~13 tok on re-read)",
178            "map" => "Understanding structure, deps, exports",
179            "signatures" => "API surface only",
180            "aggressive" => "Maximum compression for large files",
181            "entropy" => "Information-theoretic filtering",
182            _ => "",
183        };
184        let quality = if mc.avg_quality > 0.0 {
185            format!("{:.0}%", mc.avg_quality * 100.0)
186        } else {
187            "100%".to_string()
188        };
189        out.push(format!(
190            "| {} | {:.1}% | {} | {} | {} |",
191            mc.mode,
192            mc.avg_compression_pct,
193            metrics::format_duration_us(mc.avg_latency_us),
194            quality,
195            use_case,
196        ));
197    }
198
199    out.push(String::new());
200}
201
202fn write_search_latency(out: &mut Vec<String>, report: &CompareReport) {
203    out.push("## Search Latency".to_string());
204    out.push(String::new());
205    out.push("| Query | BM25 Latency | Results |".to_string());
206    out.push("|-------|-------------:|--------:|".to_string());
207
208    for sl in &report.metrics.search_latencies {
209        out.push(format!(
210            "| `{}` | {} | {} |",
211            sl.query,
212            metrics::format_duration_us(sl.bm25_us),
213            sl.result_count,
214        ));
215    }
216
217    let avg = metrics::avg_search_latency_us(&report.metrics.search_latencies);
218    out.push(format!(
219        "| **Average** | **{}** | — |",
220        metrics::format_duration_us(avg)
221    ));
222    out.push(String::new());
223}
224
225fn write_cold_start(out: &mut Vec<String>, report: &CompareReport) {
226    let cs = &report.metrics.cold_start;
227    out.push("## Cold Start Performance".to_string());
228    out.push(String::new());
229    out.push("| Phase | Duration |".to_string());
230    out.push("|-------|--------:|".to_string());
231    out.push(format!(
232        "| File scan | {} |",
233        metrics::format_duration_us(cs.scan_us)
234    ));
235    out.push(format!(
236        "| BM25 index build | {} |",
237        metrics::format_duration_us(cs.bm25_build_us)
238    ));
239    out.push(format!(
240        "| First file read + tokenize | {} |",
241        metrics::format_duration_us(cs.first_read_us)
242    ));
243    out.push(format!(
244        "| **Total cold start** | **{}** |",
245        metrics::format_duration_us(cs.total_us)
246    ));
247    out.push(String::new());
248}
249
250fn write_disk_footprint(out: &mut Vec<String>, report: &CompareReport) {
251    let df = &report.metrics.disk_footprint;
252    out.push("## Disk Footprint".to_string());
253    out.push(String::new());
254    out.push("| Component | Size |".to_string());
255    out.push("|-----------|-----:|".to_string());
256    out.push(format!(
257        "| BM25 index | {} |",
258        metrics::format_bytes(df.bm25_index_bytes)
259    ));
260    out.push(format!(
261        "| Total `.lean-ctx/` | {} |",
262        metrics::format_bytes(df.total_index_bytes)
263    ));
264    out.push(String::new());
265}
266
267fn write_feature_matrix(out: &mut Vec<String>, report: &CompareReport) {
268    out.push("## Feature Comparison".to_string());
269    out.push(String::new());
270    out.push("| Feature | Raw | Repomix | aider | codebase-memory | **lean-ctx** |".to_string());
271    out.push("|---------|:---:|:-------:|:-----:|:---------------:|:------------:|".to_string());
272
273    let features = [
274        ("Multi-mode compression", [false, false, false, false, true]),
275        ("BM25 code search", [false, false, false, true, true]),
276        ("Session caching", [false, false, true, true, true]),
277        (
278            "Cross-session memory (CCP)",
279            [false, false, false, true, true],
280        ),
281        (
282            "Shell output compression",
283            [false, false, false, false, true],
284        ),
285        ("Call graph analysis", [false, false, false, false, true]),
286        ("Repo map generation", [false, true, true, false, true]),
287        ("Knowledge base", [false, false, false, true, true]),
288        (
289            "Tree-sitter AST (18 langs)",
290            [false, true, true, false, true],
291        ),
292        ("MCP server", [false, false, false, true, true]),
293    ];
294
295    for (feature, support) in &features {
296        let cells: Vec<&str> = support
297            .iter()
298            .map(|s| if *s { "✅" } else { "—" })
299            .collect();
300        out.push(format!(
301            "| {} | {} | {} | {} | {} | {} |",
302            feature, cells[0], cells[1], cells[2], cells[3], cells[4]
303        ));
304    }
305
306    out.push(String::new());
307    out.push(format!(
308        "**lean-ctx feature count:** {} operations across {} MCP tools",
309        report.metrics.feature_count,
310        crate::server::registry::tool_count()
311    ));
312    out.push(String::new());
313}
314
315fn write_session_simulation(out: &mut Vec<String>, report: &CompareReport) {
316    let s = &report.metrics.project_benchmark.session_sim;
317    out.push("## Session Simulation (30-min coding)".to_string());
318    out.push(String::new());
319    out.push("| Approach | Tokens | Cost | Savings |".to_string());
320    out.push("|----------|-------:|-----:|--------:|".to_string());
321
322    out.push(format!(
323        "| Raw (no compression) | {} | ${:.3} | — |",
324        format_num(s.raw_tokens),
325        s.raw_cost
326    ));
327
328    let lean_pct = if s.raw_tokens > 0 {
329        (1.0 - s.lean_tokens as f64 / s.raw_tokens as f64) * 100.0
330    } else {
331        0.0
332    };
333    out.push(format!(
334        "| lean-ctx (no CCP) | {} | ${:.3} | {:.1}% |",
335        format_num(s.lean_tokens),
336        s.lean_cost,
337        lean_pct
338    ));
339
340    let ccp_pct = if s.raw_tokens > 0 {
341        (1.0 - s.lean_ccp_tokens as f64 / s.raw_tokens as f64) * 100.0
342    } else {
343        0.0
344    };
345    out.push(format!(
346        "| **lean-ctx + CCP** | **{}** | **${:.3}** | **{:.1}%** |",
347        format_num(s.lean_ccp_tokens),
348        s.ccp_cost,
349        ccp_pct
350    ));
351
352    out.push(String::new());
353}
354
355fn write_chart(out: &mut Vec<String>, report: &CompareReport) {
356    out.push("## Compression Visualization".to_string());
357    out.push(String::new());
358    out.push("```".to_string());
359    out.push("Compression % (higher = better)".to_string());
360    out.push(String::new());
361
362    let mut entries: Vec<(String, f64)> = Vec::new();
363
364    for c in &report.competitors {
365        if let Some(pct) = c.compression_pct {
366            entries.push((c.name.to_string(), pct));
367        }
368    }
369    for mc in &report.metrics.mode_comparisons {
370        if mc.mode != "full" {
371            entries.push((format!("lean-ctx ({})", mc.mode), mc.avg_compression_pct));
372        }
373    }
374
375    entries.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
376
377    let max_bar = 50;
378    for (name, pct) in &entries {
379        let bar_len = (pct / 100.0 * max_bar as f64) as usize;
380        let bar = "\u{2588}".repeat(bar_len);
381        out.push(format!("{name:<28} {bar} {pct:.1}%"));
382    }
383
384    out.push("```".to_string());
385    out.push(String::new());
386}
387
388fn write_system_info(out: &mut Vec<String>, sys: &SystemInfo) {
389    out.push("## System Information".to_string());
390    out.push(String::new());
391    out.push(format!("- **OS:** {} {}", sys.os, sys.arch));
392    out.push(format!(
393        "- **CPU:** {} ({} cores)",
394        sys.cpu_brand, sys.cpu_cores
395    ));
396    out.push(format!("- **RAM:** {:.1} GB", sys.memory_gb));
397    out.push(format!("- **lean-ctx:** v{}", sys.lean_ctx_version));
398    out.push(format!("- **Rust:** {}", sys.rust_version));
399    out.push(String::new());
400}
401
402fn write_reproducibility(out: &mut Vec<String>) {
403    out.push("## Reproducibility".to_string());
404    out.push(String::new());
405    out.push("```bash".to_string());
406    out.push("# Install lean-ctx".to_string());
407    out.push("cargo install lean-ctx".to_string());
408    out.push(String::new());
409    out.push("# Run the comparative benchmark on this repo".to_string());
410    out.push("lean-ctx benchmark compare".to_string());
411    out.push(String::new());
412    out.push("# Run on a specific repository".to_string());
413    out.push("lean-ctx benchmark compare --repo /path/to/repo".to_string());
414    out.push(String::new());
415    out.push("# Output to file".to_string());
416    out.push("lean-ctx benchmark compare --output BENCHMARKS.md".to_string());
417    out.push("```".to_string());
418    out.push(String::new());
419}
420
421fn write_footer(out: &mut Vec<String>) {
422    out.push("---".to_string());
423    out.push(String::new());
424    out.push(format!(
425        "*Generated by [lean-ctx](https://leanctx.com) v{} — Context Runtime for AI Agents*",
426        env!("CARGO_PKG_VERSION")
427    ));
428    out.push(String::new());
429    out.push("**Disclaimer:** Competitor numbers are from published sources (docs, papers, READMEs). \
430              lean-ctx numbers are measured live. Different test repos will produce different results. \
431              Run `lean-ctx benchmark compare` on your own codebase for project-specific numbers.".to_string());
432}
433
434fn format_num(n: usize) -> String {
435    if n >= 1_000_000 {
436        format!("{:.1}M", n as f64 / 1_000_000.0)
437    } else if n >= 1_000 {
438        format!("{:.1}K", n as f64 / 1_000.0)
439    } else {
440        format!("{n}")
441    }
442}
443
444#[cfg(test)]
445mod tests {
446    use super::*;
447    use crate::core::benchmark_compare::{competitors, system_info};
448    use std::path::Path;
449
450    fn make_test_report() -> CompareReport {
451        let metrics = metrics::measure_all(Path::new("src"));
452        CompareReport {
453            metrics,
454            system: system_info::collect(),
455            competitors: competitors::all_competitors(),
456        }
457    }
458
459    #[test]
460    fn markdown_contains_all_sections() {
461        let report = make_test_report();
462        let md = generate_markdown(&report);
463
464        assert!(md.contains("# lean-ctx Benchmark: Head-to-Head Comparison"));
465        assert!(md.contains("## Methodology"));
466        assert!(md.contains("## Compression Comparison"));
467        assert!(md.contains("## lean-ctx Mode Performance"));
468        assert!(md.contains("## Search Latency"));
469        assert!(md.contains("## Cold Start Performance"));
470        assert!(md.contains("## Disk Footprint"));
471        assert!(md.contains("## Feature Comparison"));
472        assert!(md.contains("## Session Simulation"));
473        assert!(md.contains("## Compression Visualization"));
474        assert!(md.contains("## System Information"));
475        assert!(md.contains("## Reproducibility"));
476        assert!(md.contains("Disclaimer"));
477    }
478
479    #[test]
480    fn markdown_contains_competitors() {
481        let report = make_test_report();
482        let md = generate_markdown(&report);
483
484        assert!(md.contains("Repomix"));
485        assert!(md.contains("codebase-memory"));
486        assert!(md.contains("Raw file read"));
487    }
488
489    #[test]
490    fn terminal_output_is_readable() {
491        let report = make_test_report();
492        let term = generate_terminal(&report);
493
494        assert!(term.contains("Head-to-Head Benchmark"));
495        assert!(term.contains("Compression Comparison"));
496        assert!(term.contains("Avg search latency"));
497    }
498
499    #[test]
500    fn format_num_ranges() {
501        assert_eq!(format_num(500), "500");
502        assert_eq!(format_num(1500), "1.5K");
503        assert_eq!(format_num(2_500_000), "2.5M");
504    }
505}