1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use std::time::Instant;
4
5use walkdir::WalkDir;
6
7use crate::core::compressor;
8use crate::core::deps;
9use crate::core::entropy;
10use crate::core::preservation;
11use crate::core::signatures;
12use crate::core::tokens::count_tokens;
13
14const COST_PER_TOKEN: f64 = crate::core::stats::DEFAULT_INPUT_PRICE_PER_M / 1_000_000.0;
15const MAX_FILE_SIZE: u64 = 100 * 1024;
16const MAX_FILES: usize = 50;
17const CACHE_HIT_TOKENS: usize = 13;
18
19#[derive(Debug, Clone)]
22pub struct ModeMeasurement {
23 pub mode: String,
24 pub tokens: usize,
25 pub savings_pct: f64,
26 pub latency_us: u64,
27 pub preservation_score: f64,
28}
29
30#[derive(Debug, Clone)]
31pub struct FileMeasurement {
32 #[allow(dead_code)]
33 pub path: String,
34 pub ext: String,
35 pub raw_tokens: usize,
36 pub modes: Vec<ModeMeasurement>,
37}
38
39#[derive(Debug, Clone)]
40pub struct LanguageStats {
41 pub ext: String,
42 pub count: usize,
43 pub total_tokens: usize,
44 pub best_mode: String,
45 pub best_mode_tokens: usize,
46 pub best_savings_pct: f64,
47}
48
49#[derive(Debug, Clone)]
50pub struct ModeSummary {
51 pub mode: String,
52 pub total_compressed_tokens: usize,
53 pub avg_savings_pct: f64,
54 pub avg_latency_us: u64,
55 pub avg_preservation: f64,
56}
57
58#[derive(Debug, Clone)]
59pub struct SessionSimResult {
60 pub raw_tokens: usize,
61 pub lean_tokens: usize,
62 pub lean_ccp_tokens: usize,
63 pub raw_cost: f64,
64 pub lean_cost: f64,
65 pub ccp_cost: f64,
66}
67
68#[derive(Debug, Clone)]
69pub struct ProjectBenchmark {
70 pub root: String,
71 pub files_scanned: usize,
72 pub files_measured: usize,
73 pub total_raw_tokens: usize,
74 pub languages: Vec<LanguageStats>,
75 pub mode_summaries: Vec<ModeSummary>,
76 pub session_sim: SessionSimResult,
77 #[allow(dead_code)]
78 pub file_results: Vec<FileMeasurement>,
79}
80
81fn is_skipped_dir(name: &str) -> bool {
84 matches!(
85 name,
86 "node_modules"
87 | ".git"
88 | "target"
89 | "dist"
90 | "build"
91 | ".next"
92 | ".nuxt"
93 | "__pycache__"
94 | ".cache"
95 | "coverage"
96 | "vendor"
97 | ".svn"
98 | ".hg"
99 )
100}
101
102fn is_text_ext(ext: &str) -> bool {
103 matches!(
104 ext,
105 "rs" | "ts"
106 | "tsx"
107 | "js"
108 | "jsx"
109 | "py"
110 | "go"
111 | "java"
112 | "c"
113 | "cpp"
114 | "h"
115 | "hpp"
116 | "cs"
117 | "kt"
118 | "swift"
119 | "rb"
120 | "php"
121 | "vue"
122 | "svelte"
123 | "html"
124 | "css"
125 | "scss"
126 | "less"
127 | "json"
128 | "yaml"
129 | "yml"
130 | "toml"
131 | "xml"
132 | "md"
133 | "txt"
134 | "sh"
135 | "bash"
136 | "zsh"
137 | "fish"
138 | "sql"
139 | "graphql"
140 | "proto"
141 | "ex"
142 | "exs"
143 | "zig"
144 | "lua"
145 | "r"
146 | "R"
147 | "dart"
148 | "scala"
149 )
150}
151
152fn scan_project(root: &str) -> Vec<PathBuf> {
153 let mut files: Vec<(PathBuf, u64)> = Vec::new();
154
155 for entry in WalkDir::new(root)
156 .max_depth(8)
157 .into_iter()
158 .filter_entry(|e| {
159 let name = e.file_name().to_string_lossy();
160 if e.file_type().is_dir() {
161 if e.depth() > 0 && name.starts_with('.') {
162 return false;
163 }
164 return !is_skipped_dir(&name);
165 }
166 true
167 })
168 {
169 let Ok(entry) = entry else { continue };
170
171 if entry.file_type().is_dir() {
172 continue;
173 }
174
175 let path = entry.path().to_path_buf();
176 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
177
178 if !is_text_ext(ext) {
179 continue;
180 }
181
182 let size = entry.metadata().map_or(0, |m| m.len());
183 if size == 0 || size > MAX_FILE_SIZE {
184 continue;
185 }
186
187 files.push((path, size));
188 }
189
190 files.sort_by_key(|x| std::cmp::Reverse(x.1));
191
192 let mut selected = Vec::new();
193 let mut ext_counts: HashMap<String, usize> = HashMap::new();
194
195 for (path, _size) in &files {
196 if selected.len() >= MAX_FILES {
197 break;
198 }
199 let ext = path
200 .extension()
201 .and_then(|e| e.to_str())
202 .unwrap_or("")
203 .to_string();
204 let count = ext_counts.entry(ext.clone()).or_insert(0);
205 if *count < 10 {
206 *count += 1;
207 selected.push(path.clone());
208 }
209 }
210
211 selected
212}
213
214fn measure_mode(content: &str, ext: &str, mode: &str, raw_tokens: usize) -> ModeMeasurement {
217 let start = Instant::now();
218
219 let compressed = match mode {
220 "map" => {
221 let sigs = signatures::extract_signatures(content, ext);
222 let dep_info = deps::extract_deps(content, ext);
223 let mut parts = Vec::new();
224 if !dep_info.imports.is_empty() {
225 parts.push(format!("deps: {}", dep_info.imports.join(", ")));
226 }
227 if !dep_info.exports.is_empty() {
228 parts.push(format!("exports: {}", dep_info.exports.join(", ")));
229 }
230 let key_sigs: Vec<String> = sigs
231 .iter()
232 .filter(|s| s.is_exported || s.indent == 0)
233 .map(super::signatures::Signature::to_compact)
234 .collect();
235 if !key_sigs.is_empty() {
236 parts.push(key_sigs.join("\n"));
237 }
238 parts.join("\n")
239 }
240 "signatures" => {
241 let sigs = signatures::extract_signatures(content, ext);
242 sigs.iter()
243 .map(super::signatures::Signature::to_compact)
244 .collect::<Vec<_>>()
245 .join("\n")
246 }
247 "aggressive" => compressor::aggressive_compress(content, Some(ext)),
248 "entropy" => entropy::entropy_compress(content).output,
249 "cache_hit" => "cached re-read ~13tok".to_string(),
250 _ => content.to_string(),
251 };
252
253 let latency = start.elapsed();
254 let tokens = if mode == "cache_hit" {
255 CACHE_HIT_TOKENS
256 } else {
257 count_tokens(&compressed)
258 };
259
260 let savings_pct = if raw_tokens > 0 {
261 (1.0 - tokens as f64 / raw_tokens as f64) * 100.0
262 } else {
263 0.0
264 };
265
266 let preservation_score = if mode == "cache_hit" {
267 -1.0
268 } else {
269 preservation::measure(content, &compressed, ext).overall()
270 };
271
272 ModeMeasurement {
273 mode: mode.to_string(),
274 tokens,
275 savings_pct,
276 latency_us: latency.as_micros() as u64,
277 preservation_score,
278 }
279}
280
281fn measure_file(path: &Path, root: &str) -> Option<FileMeasurement> {
282 let content = std::fs::read_to_string(path).ok()?;
283 if content.is_empty() {
284 return None;
285 }
286
287 let ext = path
288 .extension()
289 .and_then(|e| e.to_str())
290 .unwrap_or("")
291 .to_string();
292
293 let raw_tokens = count_tokens(&content);
294 if raw_tokens == 0 {
295 return None;
296 }
297
298 let modes = ["map", "signatures", "aggressive", "entropy", "cache_hit"];
299 let measurements: Vec<ModeMeasurement> = modes
300 .iter()
301 .map(|m| measure_mode(&content, &ext, m, raw_tokens))
302 .collect();
303
304 let display_path = path
305 .strip_prefix(root)
306 .unwrap_or(path)
307 .to_string_lossy()
308 .to_string();
309
310 Some(FileMeasurement {
311 path: display_path,
312 ext,
313 raw_tokens,
314 modes: measurements,
315 })
316}
317
318fn is_mode_applicable_for_ext(mode: &str, ext: &str, tokens: usize) -> bool {
325 if tokens == 0 {
326 return false;
327 }
328 let is_structural_mode = matches!(mode, "map" | "signatures");
329 if !is_structural_mode {
330 return true;
331 }
332 let code_exts = [
334 "rs", "ts", "tsx", "js", "jsx", "py", "go", "java", "kt", "c", "cpp", "h", "hpp", "cs",
335 "rb", "swift", "scala", "zig", "lua", "php", "dart", "ex", "exs", "elm", "hs", "ml",
336 "svelte", "vue", "sh", "bash", "zsh",
337 ];
338 code_exts.contains(&ext)
339}
340
341fn aggregate_languages(files: &[FileMeasurement]) -> Vec<LanguageStats> {
342 struct LangAccum {
343 count: usize,
344 total_tokens: usize,
345 mode_tokens: HashMap<String, usize>,
346 }
347
348 let mut map: HashMap<String, LangAccum> = HashMap::new();
349 for f in files {
350 let entry = map.entry(f.ext.clone()).or_insert_with(|| LangAccum {
351 count: 0,
352 total_tokens: 0,
353 mode_tokens: HashMap::new(),
354 });
355 entry.count += 1;
356 entry.total_tokens += f.raw_tokens;
357 for m in &f.modes {
358 *entry.mode_tokens.entry(m.mode.clone()).or_insert(0) += m.tokens;
359 }
360 }
361
362 let mut stats: Vec<LanguageStats> = map
363 .into_iter()
364 .map(|(ext, acc)| {
365 let (best_mode, best_tokens) = acc
366 .mode_tokens
367 .iter()
368 .filter(|(m, _)| m.as_str() != "cache_hit")
369 .filter(|(m, t)| is_mode_applicable_for_ext(m, &ext, **t))
370 .min_by_key(|(_, t)| **t)
371 .map_or_else(
372 || ("full".to_string(), acc.total_tokens),
373 |(m, t)| (m.clone(), *t),
374 );
375
376 let savings = if acc.total_tokens > 0 {
377 (1.0 - best_tokens as f64 / acc.total_tokens as f64) * 100.0
378 } else {
379 0.0
380 };
381
382 LanguageStats {
383 ext,
384 count: acc.count,
385 total_tokens: acc.total_tokens,
386 best_mode,
387 best_mode_tokens: best_tokens,
388 best_savings_pct: savings,
389 }
390 })
391 .collect();
392 stats.sort_by_key(|x| std::cmp::Reverse(x.total_tokens));
393 stats
394}
395
396fn aggregate_modes(files: &[FileMeasurement]) -> Vec<ModeSummary> {
397 let mode_names = ["map", "signatures", "aggressive", "entropy", "cache_hit"];
398 let mut summaries = Vec::new();
399
400 for mode_name in &mode_names {
401 let mut total_tokens = 0usize;
402 let mut total_savings = 0.0f64;
403 let mut total_latency = 0u64;
404 let mut total_preservation = 0.0f64;
405 let mut preservation_count = 0usize;
406 let mut count = 0usize;
407
408 for f in files {
409 if let Some(m) = f.modes.iter().find(|m| m.mode == *mode_name) {
410 total_tokens += m.tokens;
411 total_savings += m.savings_pct;
412 total_latency += m.latency_us;
413 if m.preservation_score >= 0.0 {
414 total_preservation += m.preservation_score;
415 preservation_count += 1;
416 }
417 count += 1;
418 }
419 }
420
421 if count == 0 {
422 continue;
423 }
424
425 summaries.push(ModeSummary {
426 mode: mode_name.to_string(),
427 total_compressed_tokens: total_tokens,
428 avg_savings_pct: total_savings / count as f64,
429 avg_latency_us: total_latency / count as u64,
430 avg_preservation: if preservation_count > 0 {
431 total_preservation / preservation_count as f64
432 } else {
433 -1.0
434 },
435 });
436 }
437
438 summaries
439}
440
441fn simulate_session(files: &[FileMeasurement]) -> SessionSimResult {
444 if files.is_empty() {
445 return SessionSimResult {
446 raw_tokens: 0,
447 lean_tokens: 0,
448 lean_ccp_tokens: 0,
449 raw_cost: 0.0,
450 lean_cost: 0.0,
451 ccp_cost: 0.0,
452 };
453 }
454
455 let file_count = files.len().min(15);
456 let selected = &files[..file_count];
457
458 let first_read_raw: usize = selected.iter().map(|f| f.raw_tokens).sum();
459
460 let first_read_lean: usize = selected
461 .iter()
462 .enumerate()
463 .map(|(i, f)| {
464 let mode = if i % 3 == 0 { "aggressive" } else { "map" };
465 f.modes
466 .iter()
467 .find(|m| m.mode == mode)
468 .map_or(f.raw_tokens, |m| m.tokens)
469 })
470 .sum();
471
472 let cache_reread_count = 10usize.min(file_count);
473 let cache_raw: usize = selected[..cache_reread_count]
474 .iter()
475 .map(|f| f.raw_tokens)
476 .sum();
477 let cache_lean: usize = cache_reread_count * CACHE_HIT_TOKENS;
478
479 let shell_count = 8usize;
480 let shell_raw = shell_count * 500;
481 let shell_lean = shell_count * 200;
482
483 let resume_raw: usize = selected.iter().map(|f| f.raw_tokens).sum();
484 let resume_lean: usize = selected
485 .iter()
486 .map(|f| {
487 f.modes
488 .iter()
489 .find(|m| m.mode == "map")
490 .map_or(f.raw_tokens, |m| m.tokens)
491 })
492 .sum();
493 let resume_ccp = 400usize;
494
495 let raw_total = first_read_raw + cache_raw + shell_raw + resume_raw;
496 let lean_total = first_read_lean + cache_lean + shell_lean + resume_lean;
497 let ccp_total = first_read_lean + cache_lean + shell_lean + resume_ccp;
498
499 SessionSimResult {
500 raw_tokens: raw_total,
501 lean_tokens: lean_total,
502 lean_ccp_tokens: ccp_total,
503 raw_cost: raw_total as f64 * COST_PER_TOKEN,
504 lean_cost: lean_total as f64 * COST_PER_TOKEN,
505 ccp_cost: ccp_total as f64 * COST_PER_TOKEN,
506 }
507}
508
509pub fn run_project_benchmark(path: &str) -> ProjectBenchmark {
512 let root = if path.is_empty() { "." } else { path };
513 let scanned = scan_project(root);
514 let files_scanned = scanned.len();
515
516 let file_results: Vec<FileMeasurement> = scanned
517 .iter()
518 .filter_map(|p| measure_file(p, root))
519 .collect();
520
521 let total_raw_tokens: usize = file_results.iter().map(|f| f.raw_tokens).sum();
522 let languages = aggregate_languages(&file_results);
523 let mode_summaries = aggregate_modes(&file_results);
524 let session_sim = simulate_session(&file_results);
525
526 ProjectBenchmark {
527 root: root.to_string(),
528 files_scanned,
529 files_measured: file_results.len(),
530 total_raw_tokens,
531 languages,
532 mode_summaries,
533 session_sim,
534 file_results,
535 }
536}
537
538pub fn format_terminal(b: &ProjectBenchmark) -> String {
541 let mut out = Vec::new();
542 let sep = "\u{2550}".repeat(66);
543
544 out.push(sep.clone());
545 out.push(format!(" lean-ctx Benchmark — {}", b.root));
546 out.push(sep.clone());
547
548 let lang_summary: Vec<String> = b
549 .languages
550 .iter()
551 .take(5)
552 .map(|l| format!("{} {}", l.count, l.ext))
553 .collect();
554 out.push(format!(
555 " Scanned: {} files ({})",
556 b.files_measured,
557 lang_summary.join(", ")
558 ));
559 out.push(format!(
560 " Total raw tokens: {}",
561 format_num(b.total_raw_tokens)
562 ));
563 out.push(String::new());
564
565 out.push(" Compression by Language:".to_string());
566 out.push(format!(
567 " {:<10} {:>6} {:>10} {:>10} {:>10} {:>10}",
568 "Lang", "Files", "Raw Tok", "Best Mode", "Compressed", "Savings"
569 ));
570 out.push(format!(" {}", "\u{2500}".repeat(62)));
571 for l in &b.languages {
572 out.push(format!(
573 " {:<10} {:>6} {:>10} {:>10} {:>10} {:>9.1}%",
574 l.ext,
575 l.count,
576 format_num(l.total_tokens),
577 l.best_mode,
578 format_num(l.best_mode_tokens),
579 l.best_savings_pct,
580 ));
581 }
582 out.push(String::new());
583
584 out.push(" Mode Performance:".to_string());
585 out.push(format!(
586 " {:<14} {:>10} {:>10} {:>10} {:>10}",
587 "Mode", "Tokens", "Savings", "Latency", "Quality"
588 ));
589 out.push(format!(" {}", "\u{2500}".repeat(58)));
590
591 for m in &b.mode_summaries {
592 let qual = if m.avg_preservation < 0.0 {
593 "N/A".to_string()
594 } else {
595 format!("{:.1}%", m.avg_preservation * 100.0)
596 };
597 let latency = if m.avg_latency_us > 1000 {
598 format!("{:.1}ms", m.avg_latency_us as f64 / 1000.0)
599 } else {
600 format!("{}μs", m.avg_latency_us)
601 };
602 out.push(format!(
603 " {:<14} {:>10} {:>9.1}% {:>10} {:>10}",
604 m.mode,
605 format_num(m.total_compressed_tokens),
606 m.avg_savings_pct,
607 latency,
608 qual,
609 ));
610 }
611
612 out.push(String::new());
613 out.push(" Session Simulation (30-min coding):".to_string());
614 out.push(format!(
615 " {:<24} {:>10} {:>10} {:>10}",
616 "Approach", "Tokens", "Cost", "Savings"
617 ));
618 out.push(format!(" {}", "\u{2500}".repeat(58)));
619
620 let s = &b.session_sim;
621 out.push(format!(
622 " {:<24} {:>10} {:>10} {:>10}",
623 "Raw (no compression)",
624 format_num(s.raw_tokens),
625 format!("${:.3}", s.raw_cost),
626 "\u{2014}",
627 ));
628
629 let lean_pct = if s.raw_tokens > 0 {
630 (1.0 - s.lean_tokens as f64 / s.raw_tokens as f64) * 100.0
631 } else {
632 0.0
633 };
634 out.push(format!(
635 " {:<24} {:>10} {:>10} {:>9.1}%",
636 "lean-ctx (no CCP)",
637 format_num(s.lean_tokens),
638 format!("${:.3}", s.lean_cost),
639 lean_pct,
640 ));
641
642 let ccp_pct = if s.raw_tokens > 0 {
643 (1.0 - s.lean_ccp_tokens as f64 / s.raw_tokens as f64) * 100.0
644 } else {
645 0.0
646 };
647 out.push(format!(
648 " {:<24} {:>10} {:>10} {:>9.1}%",
649 "lean-ctx + CCP",
650 format_num(s.lean_ccp_tokens),
651 format!("${:.3}", s.ccp_cost),
652 ccp_pct,
653 ));
654
655 out.push(sep.clone());
656 out.join("\n")
657}
658
659pub fn format_markdown(b: &ProjectBenchmark) -> String {
662 let mut out = Vec::new();
663
664 out.push("# lean-ctx Benchmark Report".to_string());
665 out.push(String::new());
666 out.push(format!("**Project:** `{}`", b.root));
667 out.push(format!("**Files measured:** {}", b.files_measured));
668 out.push(format!(
669 "**Total raw tokens:** {}",
670 format_num(b.total_raw_tokens)
671 ));
672 out.push(String::new());
673
674 out.push("## Compression by Language".to_string());
675 out.push(String::new());
676 out.push("| Language | Files | Raw Tokens | Best Mode | Compressed | Savings |".to_string());
677 out.push("|----------|------:|-----------:|-----------|----------:|--------:|".to_string());
678 for l in &b.languages {
679 out.push(format!(
680 "| {} | {} | {} | {} | {} | {:.1}% |",
681 l.ext,
682 l.count,
683 format_num(l.total_tokens),
684 l.best_mode,
685 format_num(l.best_mode_tokens),
686 l.best_savings_pct,
687 ));
688 }
689 out.push(String::new());
690
691 out.push("## Mode Performance".to_string());
692 out.push(String::new());
693 out.push("| Mode | Tokens | Savings | Latency | Quality |".to_string());
694 out.push("|------|-------:|--------:|--------:|--------:|".to_string());
695 for m in &b.mode_summaries {
696 let qual = if m.avg_preservation < 0.0 {
697 "N/A".to_string()
698 } else {
699 format!("{:.1}%", m.avg_preservation * 100.0)
700 };
701 let latency = if m.avg_latency_us > 1000 {
702 format!("{:.1}ms", m.avg_latency_us as f64 / 1000.0)
703 } else {
704 format!("{}μs", m.avg_latency_us)
705 };
706 out.push(format!(
707 "| {} | {} | {:.1}% | {} | {} |",
708 m.mode,
709 format_num(m.total_compressed_tokens),
710 m.avg_savings_pct,
711 latency,
712 qual
713 ));
714 }
715 out.push(String::new());
716
717 out.push("## Session Simulation (30-min coding)".to_string());
718 out.push(String::new());
719 out.push("| Approach | Tokens | Cost | Savings |".to_string());
720 out.push("|----------|-------:|-----:|--------:|".to_string());
721
722 let s = &b.session_sim;
723 out.push(format!(
724 "| Raw (no compression) | {} | ${:.3} | — |",
725 format_num(s.raw_tokens),
726 s.raw_cost
727 ));
728
729 let lean_pct = if s.raw_tokens > 0 {
730 (1.0 - s.lean_tokens as f64 / s.raw_tokens as f64) * 100.0
731 } else {
732 0.0
733 };
734 out.push(format!(
735 "| lean-ctx (no CCP) | {} | ${:.3} | {:.1}% |",
736 format_num(s.lean_tokens),
737 s.lean_cost,
738 lean_pct
739 ));
740
741 let ccp_pct = if s.raw_tokens > 0 {
742 (1.0 - s.lean_ccp_tokens as f64 / s.raw_tokens as f64) * 100.0
743 } else {
744 0.0
745 };
746 out.push(format!(
747 "| lean-ctx + CCP | {} | ${:.3} | {:.1}% |",
748 format_num(s.lean_ccp_tokens),
749 s.ccp_cost,
750 ccp_pct
751 ));
752
753 out.push(String::new());
754 out.push(format!(
755 "*Generated by lean-ctx benchmark v{} — https://leanctx.com*",
756 env!("CARGO_PKG_VERSION")
757 ));
758
759 out.join("\n")
760}
761
762pub fn format_json(b: &ProjectBenchmark) -> String {
765 let modes: Vec<serde_json::Value> = b.mode_summaries.iter().map(|m| {
766 serde_json::json!({
767 "mode": m.mode,
768 "total_compressed_tokens": m.total_compressed_tokens,
769 "avg_savings_pct": round2(m.avg_savings_pct),
770 "avg_latency_us": m.avg_latency_us,
771 "avg_preservation": if m.avg_preservation < 0.0 { serde_json::Value::Null } else { serde_json::json!(round2(m.avg_preservation * 100.0)) },
772 })
773 }).collect();
774
775 let languages: Vec<serde_json::Value> = b
776 .languages
777 .iter()
778 .map(|l| {
779 serde_json::json!({
780 "ext": l.ext,
781 "count": l.count,
782 "total_tokens": l.total_tokens,
783 "best_mode": l.best_mode,
784 "best_mode_tokens": l.best_mode_tokens,
785 "best_savings_pct": round2(l.best_savings_pct),
786 })
787 })
788 .collect();
789
790 let file_details: Vec<serde_json::Value> = b
791 .file_results
792 .iter()
793 .map(|f| {
794 let file_modes: Vec<serde_json::Value> = f
795 .modes
796 .iter()
797 .map(|m| {
798 serde_json::json!({
799 "mode": m.mode,
800 "tokens": m.tokens,
801 "savings_pct": round2(m.savings_pct),
802 "latency_us": m.latency_us,
803 "preservation": if m.preservation_score < 0.0 {
804 serde_json::Value::Null
805 } else {
806 serde_json::json!(round2(m.preservation_score * 100.0))
807 },
808 })
809 })
810 .collect();
811 serde_json::json!({
812 "path": f.path,
813 "ext": f.ext,
814 "raw_tokens": f.raw_tokens,
815 "modes": file_modes,
816 })
817 })
818 .collect();
819
820 let s = &b.session_sim;
821 let report = serde_json::json!({
822 "version": env!("CARGO_PKG_VERSION"),
823 "root": b.root,
824 "files_scanned": b.files_scanned,
825 "files_measured": b.files_measured,
826 "total_raw_tokens": b.total_raw_tokens,
827 "languages": languages,
828 "mode_summaries": modes,
829 "files": file_details,
830 "session_simulation": {
831 "raw_tokens": s.raw_tokens,
832 "lean_tokens": s.lean_tokens,
833 "lean_ccp_tokens": s.lean_ccp_tokens,
834 "raw_cost_usd": round2(s.raw_cost),
835 "lean_cost_usd": round2(s.lean_cost),
836 "ccp_cost_usd": round2(s.ccp_cost),
837 },
838 });
839
840 serde_json::to_string_pretty(&report).unwrap_or_else(|_| "{}".to_string())
841}
842
843fn format_num(n: usize) -> String {
846 if n >= 1_000_000 {
847 format!("{:.1}M", n as f64 / 1_000_000.0)
848 } else if n >= 1_000 {
849 format!("{:.1}K", n as f64 / 1_000.0)
850 } else {
851 format!("{n}")
852 }
853}
854
855fn round2(v: f64) -> f64 {
856 (v * 100.0).round() / 100.0
857}
858
859#[cfg(test)]
860mod tests {
861 use super::*;
862
863 fn mock_file(path: &str, ext: &str, raw: usize, modes: Vec<(&str, usize)>) -> FileMeasurement {
864 FileMeasurement {
865 path: path.to_string(),
866 ext: ext.to_string(),
867 raw_tokens: raw,
868 modes: modes
869 .into_iter()
870 .map(|(mode, tokens)| ModeMeasurement {
871 mode: mode.to_string(),
872 tokens,
873 savings_pct: if raw > 0 {
874 (1.0 - tokens as f64 / raw as f64) * 100.0
875 } else {
876 0.0
877 },
878 latency_us: 100,
879 preservation_score: 0.85,
880 })
881 .collect(),
882 }
883 }
884
885 #[test]
886 fn aggregate_languages_computes_best_mode() {
887 let files = vec![
888 mock_file(
889 "a.rs",
890 "rs",
891 1000,
892 vec![("map", 400), ("signatures", 200), ("aggressive", 300)],
893 ),
894 mock_file(
895 "b.rs",
896 "rs",
897 800,
898 vec![("map", 300), ("signatures", 150), ("aggressive", 250)],
899 ),
900 mock_file(
901 "c.py",
902 "py",
903 600,
904 vec![("map", 100), ("signatures", 250), ("aggressive", 200)],
905 ),
906 ];
907
908 let langs = aggregate_languages(&files);
909 assert_eq!(langs.len(), 2);
910
911 let rs = langs.iter().find(|l| l.ext == "rs").unwrap();
912 assert_eq!(rs.count, 2);
913 assert_eq!(rs.total_tokens, 1800);
914 assert_eq!(rs.best_mode, "signatures");
915 assert_eq!(rs.best_mode_tokens, 350);
916 assert!(rs.best_savings_pct > 80.0);
917
918 let py = langs.iter().find(|l| l.ext == "py").unwrap();
919 assert_eq!(py.best_mode, "map");
920 assert_eq!(py.best_mode_tokens, 100);
921 }
922
923 #[test]
924 fn aggregate_modes_averages() {
925 let files = vec![
926 mock_file("a.rs", "rs", 1000, vec![("map", 400), ("aggressive", 300)]),
927 mock_file("b.rs", "rs", 500, vec![("map", 200), ("aggressive", 100)]),
928 ];
929
930 let modes = aggregate_modes(&files);
931 let map = modes.iter().find(|m| m.mode == "map").unwrap();
932 assert_eq!(map.total_compressed_tokens, 600);
933 assert!(map.avg_savings_pct > 50.0);
934 }
935
936 #[test]
937 fn session_sim_empty_files() {
938 let result = simulate_session(&[]);
939 assert_eq!(result.raw_tokens, 0);
940 assert_eq!(result.lean_tokens, 0);
941 assert!((result.raw_cost).abs() < f64::EPSILON);
942 }
943
944 #[test]
945 fn session_sim_basic() {
946 let files: Vec<FileMeasurement> = (0..5)
947 .map(|i| {
948 mock_file(
949 &format!("file_{i}.rs"),
950 "rs",
951 2000,
952 vec![
953 ("map", 800),
954 ("aggressive", 600),
955 ("cache_hit", CACHE_HIT_TOKENS),
956 ],
957 )
958 })
959 .collect();
960 let result = simulate_session(&files);
961 assert!(result.raw_tokens > 0);
962 assert!(result.lean_tokens < result.raw_tokens);
963 assert!(
964 result.lean_ccp_tokens < result.lean_tokens,
965 "CCP resume ({}) should beat map-based resume ({}) with enough files",
966 result.lean_ccp_tokens,
967 result.lean_tokens
968 );
969 }
970
971 #[test]
972 fn format_json_includes_files_and_language_savings() {
973 let files = vec![mock_file(
974 "src/main.rs",
975 "rs",
976 500,
977 vec![("map", 200), ("signatures", 100), ("cache_hit", 13)],
978 )];
979 let bench = ProjectBenchmark {
980 root: ".".to_string(),
981 files_scanned: 1,
982 files_measured: 1,
983 total_raw_tokens: 500,
984 languages: aggregate_languages(&files),
985 mode_summaries: aggregate_modes(&files),
986 session_sim: simulate_session(&files),
987 file_results: files,
988 };
989
990 let json_str = format_json(&bench);
991 let parsed: serde_json::Value = serde_json::from_str(&json_str).unwrap();
992
993 assert!(parsed["files"].is_array());
994 assert_eq!(parsed["files"].as_array().unwrap().len(), 1);
995 assert_eq!(parsed["files"][0]["path"], "src/main.rs");
996 assert!(parsed["files"][0]["modes"].is_array());
997
998 assert!(parsed["languages"][0]["best_mode"].is_string());
999 assert!(parsed["languages"][0]["best_savings_pct"].is_number());
1000 }
1001
1002 #[test]
1003 fn format_markdown_contains_language_savings() {
1004 let files = vec![mock_file(
1005 "lib.rs",
1006 "rs",
1007 1000,
1008 vec![("map", 300), ("signatures", 200)],
1009 )];
1010 let bench = ProjectBenchmark {
1011 root: ".".to_string(),
1012 files_scanned: 1,
1013 files_measured: 1,
1014 total_raw_tokens: 1000,
1015 languages: aggregate_languages(&files),
1016 mode_summaries: aggregate_modes(&files),
1017 session_sim: simulate_session(&files),
1018 file_results: files,
1019 };
1020
1021 let md = format_markdown(&bench);
1022 assert!(md.contains("Compression by Language"));
1023 assert!(md.contains("Best Mode"));
1024 assert!(md.contains("Savings"));
1025 }
1026
1027 #[test]
1028 fn format_terminal_contains_language_section() {
1029 let files = vec![mock_file(
1030 "app.py",
1031 "py",
1032 800,
1033 vec![("map", 200), ("aggressive", 300)],
1034 )];
1035 let bench = ProjectBenchmark {
1036 root: ".".to_string(),
1037 files_scanned: 1,
1038 files_measured: 1,
1039 total_raw_tokens: 800,
1040 languages: aggregate_languages(&files),
1041 mode_summaries: aggregate_modes(&files),
1042 session_sim: simulate_session(&files),
1043 file_results: files,
1044 };
1045
1046 let out = format_terminal(&bench);
1047 assert!(out.contains("Compression by Language"));
1048 assert!(out.contains("py"));
1049 assert!(out.contains("Best Mode"));
1050 }
1051
1052 #[test]
1053 fn run_project_benchmark_on_current_crate() {
1054 let bench = run_project_benchmark("src");
1055 assert!(bench.files_measured > 0);
1056 assert!(bench.total_raw_tokens > 0);
1057 assert!(!bench.languages.is_empty());
1058 assert!(!bench.mode_summaries.is_empty());
1059
1060 for lang in &bench.languages {
1061 assert!(!lang.best_mode.is_empty());
1062 assert!(lang.best_savings_pct >= 0.0);
1063 }
1064
1065 let json = format_json(&bench);
1066 let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
1067 assert!(!parsed["files"].as_array().unwrap().is_empty());
1068
1069 let md = format_markdown(&bench);
1070 assert!(md.contains("lean-ctx Benchmark Report"));
1071
1072 let term = format_terminal(&bench);
1073 assert!(term.contains("Session Simulation"));
1074 }
1075}