1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use std::time::Instant;
4
5use walkdir::WalkDir;
6
7use crate::core::compressor;
8use crate::core::deps;
9use crate::core::entropy;
10use crate::core::preservation;
11use crate::core::signatures;
12use crate::core::tokens::count_tokens;
13
14const COST_PER_TOKEN: f64 = crate::core::stats::DEFAULT_INPUT_PRICE_PER_M / 1_000_000.0;
15const MAX_FILE_SIZE: u64 = 100 * 1024;
16const MAX_FILES: usize = 50;
17fn cache_hit_tokens() -> usize {
18 let stub = "F1=src/example.rs [unchanged, 500L, use cached context]";
19 count_tokens(stub)
20}
21
22#[derive(Debug, Clone)]
25pub struct ModeMeasurement {
26 pub mode: String,
27 pub tokens: usize,
28 pub savings_pct: f64,
29 pub latency_us: u64,
30 pub preservation_score: f64,
31}
32
33#[derive(Debug, Clone)]
34pub struct FileMeasurement {
35 #[allow(dead_code)]
36 pub path: String,
37 pub ext: String,
38 pub raw_tokens: usize,
39 pub modes: Vec<ModeMeasurement>,
40}
41
42#[derive(Debug, Clone)]
43pub struct LanguageStats {
44 pub ext: String,
45 pub count: usize,
46 pub total_tokens: usize,
47 pub best_mode: String,
48 pub best_mode_tokens: usize,
49 pub best_savings_pct: f64,
50}
51
52#[derive(Debug, Clone)]
53pub struct ModeSummary {
54 pub mode: String,
55 pub total_compressed_tokens: usize,
56 pub avg_savings_pct: f64,
57 pub avg_latency_us: u64,
58 pub avg_preservation: f64,
59}
60
61#[derive(Debug, Clone)]
62pub struct SessionSimResult {
63 pub raw_tokens: usize,
64 pub lean_tokens: usize,
65 pub lean_ccp_tokens: usize,
66 pub raw_cost: f64,
67 pub lean_cost: f64,
68 pub ccp_cost: f64,
69}
70
71#[derive(Debug, Clone)]
72pub struct ProjectBenchmark {
73 pub root: String,
74 pub files_scanned: usize,
75 pub files_measured: usize,
76 pub total_raw_tokens: usize,
77 pub languages: Vec<LanguageStats>,
78 pub mode_summaries: Vec<ModeSummary>,
79 pub session_sim: SessionSimResult,
80 #[allow(dead_code)]
81 pub file_results: Vec<FileMeasurement>,
82}
83
84fn is_skipped_dir(name: &str) -> bool {
87 matches!(
88 name,
89 "node_modules"
90 | ".git"
91 | "target"
92 | "dist"
93 | "build"
94 | ".next"
95 | ".nuxt"
96 | "__pycache__"
97 | ".cache"
98 | "coverage"
99 | "vendor"
100 | ".svn"
101 | ".hg"
102 )
103}
104
105fn is_text_ext(ext: &str) -> bool {
106 matches!(
107 ext,
108 "rs" | "ts"
109 | "tsx"
110 | "js"
111 | "jsx"
112 | "py"
113 | "go"
114 | "java"
115 | "c"
116 | "cpp"
117 | "h"
118 | "hpp"
119 | "cs"
120 | "kt"
121 | "swift"
122 | "rb"
123 | "php"
124 | "vue"
125 | "svelte"
126 | "html"
127 | "css"
128 | "scss"
129 | "less"
130 | "json"
131 | "yaml"
132 | "yml"
133 | "toml"
134 | "xml"
135 | "md"
136 | "txt"
137 | "sh"
138 | "bash"
139 | "zsh"
140 | "fish"
141 | "sql"
142 | "graphql"
143 | "proto"
144 | "ex"
145 | "exs"
146 | "zig"
147 | "lua"
148 | "r"
149 | "R"
150 | "dart"
151 | "scala"
152 )
153}
154
155fn scan_project(root: &str) -> Vec<PathBuf> {
156 let mut files: Vec<(PathBuf, u64)> = Vec::new();
157
158 for entry in WalkDir::new(root)
159 .max_depth(8)
160 .into_iter()
161 .filter_entry(|e| {
162 let name = e.file_name().to_string_lossy();
163 if e.file_type().is_dir() {
164 if e.depth() > 0 && name.starts_with('.') {
165 return false;
166 }
167 return !is_skipped_dir(&name);
168 }
169 true
170 })
171 {
172 let Ok(entry) = entry else { continue };
173
174 if entry.file_type().is_dir() {
175 continue;
176 }
177
178 let path = entry.path().to_path_buf();
179 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
180
181 if !is_text_ext(ext) {
182 continue;
183 }
184
185 let size = entry.metadata().map_or(0, |m| m.len());
186 if size == 0 || size > MAX_FILE_SIZE {
187 continue;
188 }
189
190 files.push((path, size));
191 }
192
193 files.sort_by_key(|x| std::cmp::Reverse(x.1));
194
195 let mut selected = Vec::new();
196 let mut ext_counts: HashMap<String, usize> = HashMap::new();
197
198 for (path, _size) in &files {
199 if selected.len() >= MAX_FILES {
200 break;
201 }
202 let ext = path
203 .extension()
204 .and_then(|e| e.to_str())
205 .unwrap_or("")
206 .to_string();
207 let count = ext_counts.entry(ext.clone()).or_insert(0);
208 if *count < 10 {
209 *count += 1;
210 selected.push(path.clone());
211 }
212 }
213
214 selected
215}
216
217fn measure_mode(content: &str, ext: &str, mode: &str, raw_tokens: usize) -> ModeMeasurement {
220 let start = Instant::now();
221
222 let compressed = match mode {
223 "map" => {
224 let sigs = signatures::extract_signatures(content, ext);
225 let dep_info = deps::extract_deps(content, ext);
226 let mut parts = Vec::new();
227 if !dep_info.imports.is_empty() {
228 parts.push(format!("deps: {}", dep_info.imports.join(", ")));
229 }
230 if !dep_info.exports.is_empty() {
231 parts.push(format!("exports: {}", dep_info.exports.join(", ")));
232 }
233 let key_sigs: Vec<String> = sigs
234 .iter()
235 .filter(|s| s.is_exported || s.indent == 0)
236 .map(super::signatures::Signature::to_compact)
237 .collect();
238 if !key_sigs.is_empty() {
239 parts.push(key_sigs.join("\n"));
240 }
241 parts.join("\n")
242 }
243 "signatures" => {
244 let sigs = signatures::extract_signatures(content, ext);
245 sigs.iter()
246 .map(super::signatures::Signature::to_compact)
247 .collect::<Vec<_>>()
248 .join("\n")
249 }
250 "aggressive" => compressor::aggressive_compress(content, Some(ext)),
251 "entropy" => entropy::entropy_compress(content).output,
252 "cache_hit" => format!(
253 "F1=src/file.{ext} [unchanged, {}L, use cached context]",
254 content.lines().count()
255 ),
256 _ => content.to_string(),
257 };
258
259 let latency = start.elapsed();
260 let tokens = count_tokens(&compressed);
261
262 let savings_pct = if raw_tokens > 0 {
263 (1.0 - tokens as f64 / raw_tokens as f64) * 100.0
264 } else {
265 0.0
266 };
267
268 let preservation_score = if mode == "cache_hit" {
269 -1.0
270 } else {
271 preservation::measure(content, &compressed, ext).overall()
272 };
273
274 ModeMeasurement {
275 mode: mode.to_string(),
276 tokens,
277 savings_pct,
278 latency_us: latency.as_micros() as u64,
279 preservation_score,
280 }
281}
282
283fn measure_file(path: &Path, root: &str) -> Option<FileMeasurement> {
284 let content = std::fs::read_to_string(path).ok()?;
285 if content.is_empty() {
286 return None;
287 }
288
289 let ext = path
290 .extension()
291 .and_then(|e| e.to_str())
292 .unwrap_or("")
293 .to_string();
294
295 let raw_tokens = count_tokens(&content);
296 if raw_tokens == 0 {
297 return None;
298 }
299
300 let modes = ["map", "signatures", "aggressive", "entropy", "cache_hit"];
301 let measurements: Vec<ModeMeasurement> = modes
302 .iter()
303 .map(|m| measure_mode(&content, &ext, m, raw_tokens))
304 .collect();
305
306 let display_path = path
307 .strip_prefix(root)
308 .unwrap_or(path)
309 .to_string_lossy()
310 .to_string();
311
312 Some(FileMeasurement {
313 path: display_path,
314 ext,
315 raw_tokens,
316 modes: measurements,
317 })
318}
319
320fn is_mode_applicable_for_ext(mode: &str, ext: &str, tokens: usize) -> bool {
327 if tokens == 0 {
328 return false;
329 }
330 let is_structural_mode = matches!(mode, "map" | "signatures");
331 if !is_structural_mode {
332 return true;
333 }
334 let code_exts = [
336 "rs", "ts", "tsx", "js", "jsx", "py", "go", "java", "kt", "c", "cpp", "h", "hpp", "cs",
337 "rb", "swift", "scala", "zig", "lua", "php", "dart", "ex", "exs", "elm", "hs", "ml",
338 "svelte", "vue", "sh", "bash", "zsh",
339 ];
340 code_exts.contains(&ext)
341}
342
343fn aggregate_languages(files: &[FileMeasurement]) -> Vec<LanguageStats> {
344 struct LangAccum {
345 count: usize,
346 total_tokens: usize,
347 mode_tokens: HashMap<String, usize>,
348 }
349
350 let mut map: HashMap<String, LangAccum> = HashMap::new();
351 for f in files {
352 let entry = map.entry(f.ext.clone()).or_insert_with(|| LangAccum {
353 count: 0,
354 total_tokens: 0,
355 mode_tokens: HashMap::new(),
356 });
357 entry.count += 1;
358 entry.total_tokens += f.raw_tokens;
359 for m in &f.modes {
360 *entry.mode_tokens.entry(m.mode.clone()).or_insert(0) += m.tokens;
361 }
362 }
363
364 let mut stats: Vec<LanguageStats> = map
365 .into_iter()
366 .map(|(ext, acc)| {
367 let (best_mode, best_tokens) = acc
368 .mode_tokens
369 .iter()
370 .filter(|(m, _)| m.as_str() != "cache_hit")
371 .filter(|(m, t)| is_mode_applicable_for_ext(m, &ext, **t))
372 .min_by_key(|(_, t)| **t)
373 .map_or_else(
374 || ("full".to_string(), acc.total_tokens),
375 |(m, t)| (m.clone(), *t),
376 );
377
378 let savings = if acc.total_tokens > 0 {
379 (1.0 - best_tokens as f64 / acc.total_tokens as f64) * 100.0
380 } else {
381 0.0
382 };
383
384 LanguageStats {
385 ext,
386 count: acc.count,
387 total_tokens: acc.total_tokens,
388 best_mode,
389 best_mode_tokens: best_tokens,
390 best_savings_pct: savings,
391 }
392 })
393 .collect();
394 stats.sort_by_key(|x| std::cmp::Reverse(x.total_tokens));
395 stats
396}
397
398fn aggregate_modes(files: &[FileMeasurement]) -> Vec<ModeSummary> {
399 let mode_names = ["map", "signatures", "aggressive", "entropy", "cache_hit"];
400 let mut summaries = Vec::new();
401
402 for mode_name in &mode_names {
403 let mut total_tokens = 0usize;
404 let mut total_savings = 0.0f64;
405 let mut total_latency = 0u64;
406 let mut total_preservation = 0.0f64;
407 let mut preservation_count = 0usize;
408 let mut count = 0usize;
409
410 for f in files {
411 if let Some(m) = f.modes.iter().find(|m| m.mode == *mode_name) {
412 total_tokens += m.tokens;
413 total_savings += m.savings_pct;
414 total_latency += m.latency_us;
415 if m.preservation_score >= 0.0 {
416 total_preservation += m.preservation_score;
417 preservation_count += 1;
418 }
419 count += 1;
420 }
421 }
422
423 if count == 0 {
424 continue;
425 }
426
427 summaries.push(ModeSummary {
428 mode: mode_name.to_string(),
429 total_compressed_tokens: total_tokens,
430 avg_savings_pct: total_savings / count as f64,
431 avg_latency_us: total_latency / count as u64,
432 avg_preservation: if preservation_count > 0 {
433 total_preservation / preservation_count as f64
434 } else {
435 -1.0
436 },
437 });
438 }
439
440 summaries
441}
442
443fn simulate_session(files: &[FileMeasurement]) -> SessionSimResult {
446 if files.is_empty() {
447 return SessionSimResult {
448 raw_tokens: 0,
449 lean_tokens: 0,
450 lean_ccp_tokens: 0,
451 raw_cost: 0.0,
452 lean_cost: 0.0,
453 ccp_cost: 0.0,
454 };
455 }
456
457 let file_count = files.len().min(15);
458 let selected = &files[..file_count];
459
460 let first_read_raw: usize = selected.iter().map(|f| f.raw_tokens).sum();
461
462 let first_read_lean: usize = selected
463 .iter()
464 .enumerate()
465 .map(|(i, f)| {
466 let mode = if i % 3 == 0 { "aggressive" } else { "map" };
467 f.modes
468 .iter()
469 .find(|m| m.mode == mode)
470 .map_or(f.raw_tokens, |m| m.tokens)
471 })
472 .sum();
473
474 let cache_reread_count = 10usize.min(file_count);
475 let cache_raw: usize = selected[..cache_reread_count]
476 .iter()
477 .map(|f| f.raw_tokens)
478 .sum();
479 let cache_lean: usize = cache_reread_count * cache_hit_tokens();
480
481 let shell_count = 8usize;
482 let shell_raw = shell_count * 500;
483 let shell_lean = shell_count * 200;
484
485 let resume_raw: usize = selected.iter().map(|f| f.raw_tokens).sum();
486 let resume_lean: usize = selected
487 .iter()
488 .map(|f| {
489 f.modes
490 .iter()
491 .find(|m| m.mode == "map")
492 .map_or(f.raw_tokens, |m| m.tokens)
493 })
494 .sum();
495 let resume_ccp = 400usize;
496
497 let raw_total = first_read_raw + cache_raw + shell_raw + resume_raw;
498 let lean_total = first_read_lean + cache_lean + shell_lean + resume_lean;
499 let ccp_total = first_read_lean + cache_lean + shell_lean + resume_ccp;
500
501 SessionSimResult {
502 raw_tokens: raw_total,
503 lean_tokens: lean_total,
504 lean_ccp_tokens: ccp_total,
505 raw_cost: raw_total as f64 * COST_PER_TOKEN,
506 lean_cost: lean_total as f64 * COST_PER_TOKEN,
507 ccp_cost: ccp_total as f64 * COST_PER_TOKEN,
508 }
509}
510
511pub fn run_project_benchmark(path: &str) -> ProjectBenchmark {
514 let root = if path.is_empty() { "." } else { path };
515 let scanned = scan_project(root);
516 let files_scanned = scanned.len();
517
518 let file_results: Vec<FileMeasurement> = scanned
519 .iter()
520 .filter_map(|p| measure_file(p, root))
521 .collect();
522
523 let total_raw_tokens: usize = file_results.iter().map(|f| f.raw_tokens).sum();
524 let languages = aggregate_languages(&file_results);
525 let mode_summaries = aggregate_modes(&file_results);
526 let session_sim = simulate_session(&file_results);
527
528 ProjectBenchmark {
529 root: root.to_string(),
530 files_scanned,
531 files_measured: file_results.len(),
532 total_raw_tokens,
533 languages,
534 mode_summaries,
535 session_sim,
536 file_results,
537 }
538}
539
540pub fn format_terminal(b: &ProjectBenchmark) -> String {
543 let mut out = Vec::new();
544 let sep = "\u{2550}".repeat(66);
545
546 out.push(sep.clone());
547 out.push(format!(" lean-ctx Benchmark — {}", b.root));
548 out.push(sep.clone());
549
550 let lang_summary: Vec<String> = b
551 .languages
552 .iter()
553 .take(5)
554 .map(|l| format!("{} {}", l.count, l.ext))
555 .collect();
556 out.push(format!(
557 " Scanned: {} files ({})",
558 b.files_measured,
559 lang_summary.join(", ")
560 ));
561 out.push(format!(
562 " Total raw tokens: {}",
563 format_num(b.total_raw_tokens)
564 ));
565 out.push(String::new());
566
567 out.push(" Compression by Language:".to_string());
568 out.push(format!(
569 " {:<10} {:>6} {:>10} {:>10} {:>10} {:>10}",
570 "Lang", "Files", "Raw Tok", "Best Mode", "Compressed", "Savings"
571 ));
572 out.push(format!(" {}", "\u{2500}".repeat(62)));
573 for l in &b.languages {
574 out.push(format!(
575 " {:<10} {:>6} {:>10} {:>10} {:>10} {:>9.1}%",
576 l.ext,
577 l.count,
578 format_num(l.total_tokens),
579 l.best_mode,
580 format_num(l.best_mode_tokens),
581 l.best_savings_pct,
582 ));
583 }
584 out.push(String::new());
585
586 out.push(" Mode Performance:".to_string());
587 out.push(format!(
588 " {:<14} {:>10} {:>10} {:>10} {:>10}",
589 "Mode", "Tokens", "Savings", "Latency", "Quality"
590 ));
591 out.push(format!(" {}", "\u{2500}".repeat(58)));
592
593 for m in &b.mode_summaries {
594 let qual = if m.avg_preservation < 0.0 {
595 "N/A".to_string()
596 } else {
597 format!("{:.1}%", m.avg_preservation * 100.0)
598 };
599 let latency = if m.avg_latency_us > 1000 {
600 format!("{:.1}ms", m.avg_latency_us as f64 / 1000.0)
601 } else {
602 format!("{}μs", m.avg_latency_us)
603 };
604 out.push(format!(
605 " {:<14} {:>10} {:>9.1}% {:>10} {:>10}",
606 m.mode,
607 format_num(m.total_compressed_tokens),
608 m.avg_savings_pct,
609 latency,
610 qual,
611 ));
612 }
613
614 out.push(String::new());
615 out.push(" Session Simulation (30-min coding):".to_string());
616 out.push(format!(
617 " {:<24} {:>10} {:>10} {:>10}",
618 "Approach", "Tokens", "Cost", "Savings"
619 ));
620 out.push(format!(" {}", "\u{2500}".repeat(58)));
621
622 let s = &b.session_sim;
623 out.push(format!(
624 " {:<24} {:>10} {:>10} {:>10}",
625 "Raw (no compression)",
626 format_num(s.raw_tokens),
627 format!("${:.3}", s.raw_cost),
628 "\u{2014}",
629 ));
630
631 let lean_pct = if s.raw_tokens > 0 {
632 (1.0 - s.lean_tokens as f64 / s.raw_tokens as f64) * 100.0
633 } else {
634 0.0
635 };
636 out.push(format!(
637 " {:<24} {:>10} {:>10} {:>9.1}%",
638 "lean-ctx (no CCP)",
639 format_num(s.lean_tokens),
640 format!("${:.3}", s.lean_cost),
641 lean_pct,
642 ));
643
644 let ccp_pct = if s.raw_tokens > 0 {
645 (1.0 - s.lean_ccp_tokens as f64 / s.raw_tokens as f64) * 100.0
646 } else {
647 0.0
648 };
649 out.push(format!(
650 " {:<24} {:>10} {:>10} {:>9.1}%",
651 "lean-ctx + CCP",
652 format_num(s.lean_ccp_tokens),
653 format!("${:.3}", s.ccp_cost),
654 ccp_pct,
655 ));
656
657 out.push(sep.clone());
658 out.join("\n")
659}
660
661pub fn format_markdown(b: &ProjectBenchmark) -> String {
664 let mut out = Vec::new();
665
666 out.push("# lean-ctx Benchmark Report".to_string());
667 out.push(String::new());
668 out.push(format!("**Project:** `{}`", b.root));
669 out.push(format!("**Files measured:** {}", b.files_measured));
670 out.push(format!(
671 "**Total raw tokens:** {}",
672 format_num(b.total_raw_tokens)
673 ));
674 out.push(String::new());
675
676 out.push("## Compression by Language".to_string());
677 out.push(String::new());
678 out.push("| Language | Files | Raw Tokens | Best Mode | Compressed | Savings |".to_string());
679 out.push("|----------|------:|-----------:|-----------|----------:|--------:|".to_string());
680 for l in &b.languages {
681 out.push(format!(
682 "| {} | {} | {} | {} | {} | {:.1}% |",
683 l.ext,
684 l.count,
685 format_num(l.total_tokens),
686 l.best_mode,
687 format_num(l.best_mode_tokens),
688 l.best_savings_pct,
689 ));
690 }
691 out.push(String::new());
692
693 out.push("## Mode Performance".to_string());
694 out.push(String::new());
695 out.push("| Mode | Tokens | Savings | Latency | Quality |".to_string());
696 out.push("|------|-------:|--------:|--------:|--------:|".to_string());
697 for m in &b.mode_summaries {
698 let qual = if m.avg_preservation < 0.0 {
699 "N/A".to_string()
700 } else {
701 format!("{:.1}%", m.avg_preservation * 100.0)
702 };
703 let latency = if m.avg_latency_us > 1000 {
704 format!("{:.1}ms", m.avg_latency_us as f64 / 1000.0)
705 } else {
706 format!("{}μs", m.avg_latency_us)
707 };
708 out.push(format!(
709 "| {} | {} | {:.1}% | {} | {} |",
710 m.mode,
711 format_num(m.total_compressed_tokens),
712 m.avg_savings_pct,
713 latency,
714 qual
715 ));
716 }
717 out.push(String::new());
718
719 out.push("## Session Simulation (30-min coding)".to_string());
720 out.push(String::new());
721 out.push("| Approach | Tokens | Cost | Savings |".to_string());
722 out.push("|----------|-------:|-----:|--------:|".to_string());
723
724 let s = &b.session_sim;
725 out.push(format!(
726 "| Raw (no compression) | {} | ${:.3} | — |",
727 format_num(s.raw_tokens),
728 s.raw_cost
729 ));
730
731 let lean_pct = if s.raw_tokens > 0 {
732 (1.0 - s.lean_tokens as f64 / s.raw_tokens as f64) * 100.0
733 } else {
734 0.0
735 };
736 out.push(format!(
737 "| lean-ctx (no CCP) | {} | ${:.3} | {:.1}% |",
738 format_num(s.lean_tokens),
739 s.lean_cost,
740 lean_pct
741 ));
742
743 let ccp_pct = if s.raw_tokens > 0 {
744 (1.0 - s.lean_ccp_tokens as f64 / s.raw_tokens as f64) * 100.0
745 } else {
746 0.0
747 };
748 out.push(format!(
749 "| lean-ctx + CCP | {} | ${:.3} | {:.1}% |",
750 format_num(s.lean_ccp_tokens),
751 s.ccp_cost,
752 ccp_pct
753 ));
754
755 out.push(String::new());
756 out.push(format!(
757 "*Generated by lean-ctx benchmark v{} — https://leanctx.com*",
758 env!("CARGO_PKG_VERSION")
759 ));
760
761 out.join("\n")
762}
763
764pub fn format_json(b: &ProjectBenchmark) -> String {
767 let modes: Vec<serde_json::Value> = b.mode_summaries.iter().map(|m| {
768 serde_json::json!({
769 "mode": m.mode,
770 "total_compressed_tokens": m.total_compressed_tokens,
771 "avg_savings_pct": round2(m.avg_savings_pct),
772 "avg_latency_us": m.avg_latency_us,
773 "avg_preservation": if m.avg_preservation < 0.0 { serde_json::Value::Null } else { serde_json::json!(round2(m.avg_preservation * 100.0)) },
774 })
775 }).collect();
776
777 let languages: Vec<serde_json::Value> = b
778 .languages
779 .iter()
780 .map(|l| {
781 serde_json::json!({
782 "ext": l.ext,
783 "count": l.count,
784 "total_tokens": l.total_tokens,
785 "best_mode": l.best_mode,
786 "best_mode_tokens": l.best_mode_tokens,
787 "best_savings_pct": round2(l.best_savings_pct),
788 })
789 })
790 .collect();
791
792 let file_details: Vec<serde_json::Value> = b
793 .file_results
794 .iter()
795 .map(|f| {
796 let file_modes: Vec<serde_json::Value> = f
797 .modes
798 .iter()
799 .map(|m| {
800 serde_json::json!({
801 "mode": m.mode,
802 "tokens": m.tokens,
803 "savings_pct": round2(m.savings_pct),
804 "latency_us": m.latency_us,
805 "preservation": if m.preservation_score < 0.0 {
806 serde_json::Value::Null
807 } else {
808 serde_json::json!(round2(m.preservation_score * 100.0))
809 },
810 })
811 })
812 .collect();
813 serde_json::json!({
814 "path": f.path,
815 "ext": f.ext,
816 "raw_tokens": f.raw_tokens,
817 "modes": file_modes,
818 })
819 })
820 .collect();
821
822 let s = &b.session_sim;
823 let report = serde_json::json!({
824 "version": env!("CARGO_PKG_VERSION"),
825 "root": b.root,
826 "files_scanned": b.files_scanned,
827 "files_measured": b.files_measured,
828 "total_raw_tokens": b.total_raw_tokens,
829 "languages": languages,
830 "mode_summaries": modes,
831 "files": file_details,
832 "session_simulation": {
833 "raw_tokens": s.raw_tokens,
834 "lean_tokens": s.lean_tokens,
835 "lean_ccp_tokens": s.lean_ccp_tokens,
836 "raw_cost_usd": round2(s.raw_cost),
837 "lean_cost_usd": round2(s.lean_cost),
838 "ccp_cost_usd": round2(s.ccp_cost),
839 },
840 });
841
842 serde_json::to_string_pretty(&report).unwrap_or_else(|_| "{}".to_string())
843}
844
845fn format_num(n: usize) -> String {
848 if n >= 1_000_000 {
849 format!("{:.1}M", n as f64 / 1_000_000.0)
850 } else if n >= 1_000 {
851 format!("{:.1}K", n as f64 / 1_000.0)
852 } else {
853 format!("{n}")
854 }
855}
856
857fn round2(v: f64) -> f64 {
858 (v * 100.0).round() / 100.0
859}
860
861#[cfg(test)]
862mod tests {
863 use super::*;
864
865 fn mock_file(path: &str, ext: &str, raw: usize, modes: Vec<(&str, usize)>) -> FileMeasurement {
866 FileMeasurement {
867 path: path.to_string(),
868 ext: ext.to_string(),
869 raw_tokens: raw,
870 modes: modes
871 .into_iter()
872 .map(|(mode, tokens)| ModeMeasurement {
873 mode: mode.to_string(),
874 tokens,
875 savings_pct: if raw > 0 {
876 (1.0 - tokens as f64 / raw as f64) * 100.0
877 } else {
878 0.0
879 },
880 latency_us: 100,
881 preservation_score: 0.85,
882 })
883 .collect(),
884 }
885 }
886
887 #[test]
888 fn aggregate_languages_computes_best_mode() {
889 let files = vec![
890 mock_file(
891 "a.rs",
892 "rs",
893 1000,
894 vec![("map", 400), ("signatures", 200), ("aggressive", 300)],
895 ),
896 mock_file(
897 "b.rs",
898 "rs",
899 800,
900 vec![("map", 300), ("signatures", 150), ("aggressive", 250)],
901 ),
902 mock_file(
903 "c.py",
904 "py",
905 600,
906 vec![("map", 100), ("signatures", 250), ("aggressive", 200)],
907 ),
908 ];
909
910 let langs = aggregate_languages(&files);
911 assert_eq!(langs.len(), 2);
912
913 let rs = langs.iter().find(|l| l.ext == "rs").unwrap();
914 assert_eq!(rs.count, 2);
915 assert_eq!(rs.total_tokens, 1800);
916 assert_eq!(rs.best_mode, "signatures");
917 assert_eq!(rs.best_mode_tokens, 350);
918 assert!(rs.best_savings_pct > 80.0);
919
920 let py = langs.iter().find(|l| l.ext == "py").unwrap();
921 assert_eq!(py.best_mode, "map");
922 assert_eq!(py.best_mode_tokens, 100);
923 }
924
925 #[test]
926 fn aggregate_modes_averages() {
927 let files = vec![
928 mock_file("a.rs", "rs", 1000, vec![("map", 400), ("aggressive", 300)]),
929 mock_file("b.rs", "rs", 500, vec![("map", 200), ("aggressive", 100)]),
930 ];
931
932 let modes = aggregate_modes(&files);
933 let map = modes.iter().find(|m| m.mode == "map").unwrap();
934 assert_eq!(map.total_compressed_tokens, 600);
935 assert!(map.avg_savings_pct > 50.0);
936 }
937
938 #[test]
939 fn session_sim_empty_files() {
940 let result = simulate_session(&[]);
941 assert_eq!(result.raw_tokens, 0);
942 assert_eq!(result.lean_tokens, 0);
943 assert!((result.raw_cost).abs() < f64::EPSILON);
944 }
945
946 #[test]
947 fn session_sim_basic() {
948 let files: Vec<FileMeasurement> = (0..5)
949 .map(|i| {
950 mock_file(
951 &format!("file_{i}.rs"),
952 "rs",
953 2000,
954 vec![
955 ("map", 800),
956 ("aggressive", 600),
957 ("cache_hit", cache_hit_tokens()),
958 ],
959 )
960 })
961 .collect();
962 let result = simulate_session(&files);
963 assert!(result.raw_tokens > 0);
964 assert!(result.lean_tokens < result.raw_tokens);
965 assert!(
966 result.lean_ccp_tokens < result.lean_tokens,
967 "CCP resume ({}) should beat map-based resume ({}) with enough files",
968 result.lean_ccp_tokens,
969 result.lean_tokens
970 );
971 }
972
973 #[test]
974 fn format_json_includes_files_and_language_savings() {
975 let files = vec![mock_file(
976 "src/main.rs",
977 "rs",
978 500,
979 vec![("map", 200), ("signatures", 100), ("cache_hit", 13)],
980 )];
981 let bench = ProjectBenchmark {
982 root: ".".to_string(),
983 files_scanned: 1,
984 files_measured: 1,
985 total_raw_tokens: 500,
986 languages: aggregate_languages(&files),
987 mode_summaries: aggregate_modes(&files),
988 session_sim: simulate_session(&files),
989 file_results: files,
990 };
991
992 let json_str = format_json(&bench);
993 let parsed: serde_json::Value = serde_json::from_str(&json_str).unwrap();
994
995 assert!(parsed["files"].is_array());
996 assert_eq!(parsed["files"].as_array().unwrap().len(), 1);
997 assert_eq!(parsed["files"][0]["path"], "src/main.rs");
998 assert!(parsed["files"][0]["modes"].is_array());
999
1000 assert!(parsed["languages"][0]["best_mode"].is_string());
1001 assert!(parsed["languages"][0]["best_savings_pct"].is_number());
1002 }
1003
1004 #[test]
1005 fn format_markdown_contains_language_savings() {
1006 let files = vec![mock_file(
1007 "lib.rs",
1008 "rs",
1009 1000,
1010 vec![("map", 300), ("signatures", 200)],
1011 )];
1012 let bench = ProjectBenchmark {
1013 root: ".".to_string(),
1014 files_scanned: 1,
1015 files_measured: 1,
1016 total_raw_tokens: 1000,
1017 languages: aggregate_languages(&files),
1018 mode_summaries: aggregate_modes(&files),
1019 session_sim: simulate_session(&files),
1020 file_results: files,
1021 };
1022
1023 let md = format_markdown(&bench);
1024 assert!(md.contains("Compression by Language"));
1025 assert!(md.contains("Best Mode"));
1026 assert!(md.contains("Savings"));
1027 }
1028
1029 #[test]
1030 fn format_terminal_contains_language_section() {
1031 let files = vec![mock_file(
1032 "app.py",
1033 "py",
1034 800,
1035 vec![("map", 200), ("aggressive", 300)],
1036 )];
1037 let bench = ProjectBenchmark {
1038 root: ".".to_string(),
1039 files_scanned: 1,
1040 files_measured: 1,
1041 total_raw_tokens: 800,
1042 languages: aggregate_languages(&files),
1043 mode_summaries: aggregate_modes(&files),
1044 session_sim: simulate_session(&files),
1045 file_results: files,
1046 };
1047
1048 let out = format_terminal(&bench);
1049 assert!(out.contains("Compression by Language"));
1050 assert!(out.contains("py"));
1051 assert!(out.contains("Best Mode"));
1052 }
1053
1054 #[test]
1055 fn run_project_benchmark_on_current_crate() {
1056 let bench = run_project_benchmark("src");
1057 assert!(bench.files_measured > 0);
1058 assert!(bench.total_raw_tokens > 0);
1059 assert!(!bench.languages.is_empty());
1060 assert!(!bench.mode_summaries.is_empty());
1061
1062 for lang in &bench.languages {
1063 assert!(!lang.best_mode.is_empty());
1064 assert!(lang.best_savings_pct >= 0.0);
1065 }
1066
1067 let json = format_json(&bench);
1068 let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
1069 assert!(!parsed["files"].as_array().unwrap().is_empty());
1070
1071 let md = format_markdown(&bench);
1072 assert!(md.contains("lean-ctx Benchmark Report"));
1073
1074 let term = format_terminal(&bench);
1075 assert!(term.contains("Session Simulation"));
1076 }
1077}