1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use std::time::Instant;
4
5use walkdir::WalkDir;
6
7use crate::core::compressor;
8use crate::core::deps;
9use crate::core::entropy;
10use crate::core::preservation;
11use crate::core::signatures;
12use crate::core::tokens::count_tokens;
13
14const COST_PER_TOKEN: f64 = crate::core::stats::DEFAULT_INPUT_PRICE_PER_M / 1_000_000.0;
15const MAX_FILE_SIZE: u64 = 100 * 1024;
16const MAX_FILES: usize = 50;
17const CACHE_HIT_TOKENS: usize = 13;
18
19#[derive(Debug, Clone)]
22pub struct ModeMeasurement {
23 pub mode: String,
24 pub tokens: usize,
25 pub savings_pct: f64,
26 pub latency_us: u64,
27 pub preservation_score: f64,
28}
29
30#[derive(Debug, Clone)]
31pub struct FileMeasurement {
32 #[allow(dead_code)]
33 pub path: String,
34 pub ext: String,
35 pub raw_tokens: usize,
36 pub modes: Vec<ModeMeasurement>,
37}
38
39#[derive(Debug, Clone)]
40pub struct LanguageStats {
41 pub ext: String,
42 pub count: usize,
43 pub total_tokens: usize,
44 pub best_mode: String,
45 pub best_mode_tokens: usize,
46 pub best_savings_pct: f64,
47}
48
49#[derive(Debug, Clone)]
50pub struct ModeSummary {
51 pub mode: String,
52 pub total_compressed_tokens: usize,
53 pub avg_savings_pct: f64,
54 pub avg_latency_us: u64,
55 pub avg_preservation: f64,
56}
57
58#[derive(Debug, Clone)]
59pub struct SessionSimResult {
60 pub raw_tokens: usize,
61 pub lean_tokens: usize,
62 pub lean_ccp_tokens: usize,
63 pub raw_cost: f64,
64 pub lean_cost: f64,
65 pub ccp_cost: f64,
66}
67
68#[derive(Debug, Clone)]
69pub struct ProjectBenchmark {
70 pub root: String,
71 pub files_scanned: usize,
72 pub files_measured: usize,
73 pub total_raw_tokens: usize,
74 pub languages: Vec<LanguageStats>,
75 pub mode_summaries: Vec<ModeSummary>,
76 pub session_sim: SessionSimResult,
77 #[allow(dead_code)]
78 pub file_results: Vec<FileMeasurement>,
79}
80
81fn is_skipped_dir(name: &str) -> bool {
84 matches!(
85 name,
86 "node_modules"
87 | ".git"
88 | "target"
89 | "dist"
90 | "build"
91 | ".next"
92 | ".nuxt"
93 | "__pycache__"
94 | ".cache"
95 | "coverage"
96 | "vendor"
97 | ".svn"
98 | ".hg"
99 )
100}
101
102fn is_text_ext(ext: &str) -> bool {
103 matches!(
104 ext,
105 "rs" | "ts"
106 | "tsx"
107 | "js"
108 | "jsx"
109 | "py"
110 | "go"
111 | "java"
112 | "c"
113 | "cpp"
114 | "h"
115 | "hpp"
116 | "cs"
117 | "kt"
118 | "swift"
119 | "rb"
120 | "php"
121 | "vue"
122 | "svelte"
123 | "html"
124 | "css"
125 | "scss"
126 | "less"
127 | "json"
128 | "yaml"
129 | "yml"
130 | "toml"
131 | "xml"
132 | "md"
133 | "txt"
134 | "sh"
135 | "bash"
136 | "zsh"
137 | "fish"
138 | "sql"
139 | "graphql"
140 | "proto"
141 | "ex"
142 | "exs"
143 | "zig"
144 | "lua"
145 | "r"
146 | "R"
147 | "dart"
148 | "scala"
149 )
150}
151
152fn scan_project(root: &str) -> Vec<PathBuf> {
153 let mut files: Vec<(PathBuf, u64)> = Vec::new();
154
155 for entry in WalkDir::new(root)
156 .max_depth(8)
157 .into_iter()
158 .filter_entry(|e| {
159 let name = e.file_name().to_string_lossy();
160 if e.file_type().is_dir() {
161 if e.depth() > 0 && name.starts_with('.') {
162 return false;
163 }
164 return !is_skipped_dir(&name);
165 }
166 true
167 })
168 {
169 let Ok(entry) = entry else { continue };
170
171 if entry.file_type().is_dir() {
172 continue;
173 }
174
175 let path = entry.path().to_path_buf();
176 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
177
178 if !is_text_ext(ext) {
179 continue;
180 }
181
182 let size = entry.metadata().map_or(0, |m| m.len());
183 if size == 0 || size > MAX_FILE_SIZE {
184 continue;
185 }
186
187 files.push((path, size));
188 }
189
190 files.sort_by_key(|x| std::cmp::Reverse(x.1));
191
192 let mut selected = Vec::new();
193 let mut ext_counts: HashMap<String, usize> = HashMap::new();
194
195 for (path, _size) in &files {
196 if selected.len() >= MAX_FILES {
197 break;
198 }
199 let ext = path
200 .extension()
201 .and_then(|e| e.to_str())
202 .unwrap_or("")
203 .to_string();
204 let count = ext_counts.entry(ext.clone()).or_insert(0);
205 if *count < 10 {
206 *count += 1;
207 selected.push(path.clone());
208 }
209 }
210
211 selected
212}
213
214fn measure_mode(content: &str, ext: &str, mode: &str, raw_tokens: usize) -> ModeMeasurement {
217 let start = Instant::now();
218
219 let compressed = match mode {
220 "map" => {
221 let sigs = signatures::extract_signatures(content, ext);
222 let dep_info = deps::extract_deps(content, ext);
223 let mut parts = Vec::new();
224 if !dep_info.imports.is_empty() {
225 parts.push(format!("deps: {}", dep_info.imports.join(", ")));
226 }
227 if !dep_info.exports.is_empty() {
228 parts.push(format!("exports: {}", dep_info.exports.join(", ")));
229 }
230 let key_sigs: Vec<String> = sigs
231 .iter()
232 .filter(|s| s.is_exported || s.indent == 0)
233 .map(super::signatures::Signature::to_compact)
234 .collect();
235 if !key_sigs.is_empty() {
236 parts.push(key_sigs.join("\n"));
237 }
238 parts.join("\n")
239 }
240 "signatures" => {
241 let sigs = signatures::extract_signatures(content, ext);
242 sigs.iter()
243 .map(super::signatures::Signature::to_compact)
244 .collect::<Vec<_>>()
245 .join("\n")
246 }
247 "aggressive" => compressor::aggressive_compress(content, Some(ext)),
248 "entropy" => entropy::entropy_compress(content).output,
249 "cache_hit" => "cached re-read ~13tok".to_string(),
250 _ => content.to_string(),
251 };
252
253 let latency = start.elapsed();
254 let tokens = if mode == "cache_hit" {
255 CACHE_HIT_TOKENS
256 } else {
257 count_tokens(&compressed)
258 };
259
260 let savings_pct = if raw_tokens > 0 {
261 (1.0 - tokens as f64 / raw_tokens as f64) * 100.0
262 } else {
263 0.0
264 };
265
266 let preservation_score = if mode == "cache_hit" {
267 -1.0
268 } else {
269 preservation::measure(content, &compressed, ext).overall()
270 };
271
272 ModeMeasurement {
273 mode: mode.to_string(),
274 tokens,
275 savings_pct,
276 latency_us: latency.as_micros() as u64,
277 preservation_score,
278 }
279}
280
281fn measure_file(path: &Path, root: &str) -> Option<FileMeasurement> {
282 let content = std::fs::read_to_string(path).ok()?;
283 if content.is_empty() {
284 return None;
285 }
286
287 let ext = path
288 .extension()
289 .and_then(|e| e.to_str())
290 .unwrap_or("")
291 .to_string();
292
293 let raw_tokens = count_tokens(&content);
294 if raw_tokens == 0 {
295 return None;
296 }
297
298 let modes = ["map", "signatures", "aggressive", "entropy", "cache_hit"];
299 let measurements: Vec<ModeMeasurement> = modes
300 .iter()
301 .map(|m| measure_mode(&content, &ext, m, raw_tokens))
302 .collect();
303
304 let display_path = path
305 .strip_prefix(root)
306 .unwrap_or(path)
307 .to_string_lossy()
308 .to_string();
309
310 Some(FileMeasurement {
311 path: display_path,
312 ext,
313 raw_tokens,
314 modes: measurements,
315 })
316}
317
318fn aggregate_languages(files: &[FileMeasurement]) -> Vec<LanguageStats> {
321 struct LangAccum {
322 count: usize,
323 total_tokens: usize,
324 mode_tokens: HashMap<String, usize>,
325 }
326
327 let mut map: HashMap<String, LangAccum> = HashMap::new();
328 for f in files {
329 let entry = map.entry(f.ext.clone()).or_insert_with(|| LangAccum {
330 count: 0,
331 total_tokens: 0,
332 mode_tokens: HashMap::new(),
333 });
334 entry.count += 1;
335 entry.total_tokens += f.raw_tokens;
336 for m in &f.modes {
337 *entry.mode_tokens.entry(m.mode.clone()).or_insert(0) += m.tokens;
338 }
339 }
340
341 let mut stats: Vec<LanguageStats> = map
342 .into_iter()
343 .map(|(ext, acc)| {
344 let (best_mode, best_tokens) = acc
345 .mode_tokens
346 .iter()
347 .filter(|(m, _)| m.as_str() != "cache_hit")
348 .min_by_key(|(_, t)| **t)
349 .map_or_else(
350 || ("none".to_string(), acc.total_tokens),
351 |(m, t)| (m.clone(), *t),
352 );
353
354 let savings = if acc.total_tokens > 0 {
355 (1.0 - best_tokens as f64 / acc.total_tokens as f64) * 100.0
356 } else {
357 0.0
358 };
359
360 LanguageStats {
361 ext,
362 count: acc.count,
363 total_tokens: acc.total_tokens,
364 best_mode,
365 best_mode_tokens: best_tokens,
366 best_savings_pct: savings,
367 }
368 })
369 .collect();
370 stats.sort_by_key(|x| std::cmp::Reverse(x.total_tokens));
371 stats
372}
373
374fn aggregate_modes(files: &[FileMeasurement]) -> Vec<ModeSummary> {
375 let mode_names = ["map", "signatures", "aggressive", "entropy", "cache_hit"];
376 let mut summaries = Vec::new();
377
378 for mode_name in &mode_names {
379 let mut total_tokens = 0usize;
380 let mut total_savings = 0.0f64;
381 let mut total_latency = 0u64;
382 let mut total_preservation = 0.0f64;
383 let mut preservation_count = 0usize;
384 let mut count = 0usize;
385
386 for f in files {
387 if let Some(m) = f.modes.iter().find(|m| m.mode == *mode_name) {
388 total_tokens += m.tokens;
389 total_savings += m.savings_pct;
390 total_latency += m.latency_us;
391 if m.preservation_score >= 0.0 {
392 total_preservation += m.preservation_score;
393 preservation_count += 1;
394 }
395 count += 1;
396 }
397 }
398
399 if count == 0 {
400 continue;
401 }
402
403 summaries.push(ModeSummary {
404 mode: mode_name.to_string(),
405 total_compressed_tokens: total_tokens,
406 avg_savings_pct: total_savings / count as f64,
407 avg_latency_us: total_latency / count as u64,
408 avg_preservation: if preservation_count > 0 {
409 total_preservation / preservation_count as f64
410 } else {
411 -1.0
412 },
413 });
414 }
415
416 summaries
417}
418
419fn simulate_session(files: &[FileMeasurement]) -> SessionSimResult {
422 if files.is_empty() {
423 return SessionSimResult {
424 raw_tokens: 0,
425 lean_tokens: 0,
426 lean_ccp_tokens: 0,
427 raw_cost: 0.0,
428 lean_cost: 0.0,
429 ccp_cost: 0.0,
430 };
431 }
432
433 let file_count = files.len().min(15);
434 let selected = &files[..file_count];
435
436 let first_read_raw: usize = selected.iter().map(|f| f.raw_tokens).sum();
437
438 let first_read_lean: usize = selected
439 .iter()
440 .enumerate()
441 .map(|(i, f)| {
442 let mode = if i % 3 == 0 { "aggressive" } else { "map" };
443 f.modes
444 .iter()
445 .find(|m| m.mode == mode)
446 .map_or(f.raw_tokens, |m| m.tokens)
447 })
448 .sum();
449
450 let cache_reread_count = 10usize.min(file_count);
451 let cache_raw: usize = selected[..cache_reread_count]
452 .iter()
453 .map(|f| f.raw_tokens)
454 .sum();
455 let cache_lean: usize = cache_reread_count * CACHE_HIT_TOKENS;
456
457 let shell_count = 8usize;
458 let shell_raw = shell_count * 500;
459 let shell_lean = shell_count * 200;
460
461 let resume_raw: usize = selected.iter().map(|f| f.raw_tokens).sum();
462 let resume_lean: usize = selected
463 .iter()
464 .map(|f| {
465 f.modes
466 .iter()
467 .find(|m| m.mode == "map")
468 .map_or(f.raw_tokens, |m| m.tokens)
469 })
470 .sum();
471 let resume_ccp = 400usize;
472
473 let raw_total = first_read_raw + cache_raw + shell_raw + resume_raw;
474 let lean_total = first_read_lean + cache_lean + shell_lean + resume_lean;
475 let ccp_total = first_read_lean + cache_lean + shell_lean + resume_ccp;
476
477 SessionSimResult {
478 raw_tokens: raw_total,
479 lean_tokens: lean_total,
480 lean_ccp_tokens: ccp_total,
481 raw_cost: raw_total as f64 * COST_PER_TOKEN,
482 lean_cost: lean_total as f64 * COST_PER_TOKEN,
483 ccp_cost: ccp_total as f64 * COST_PER_TOKEN,
484 }
485}
486
487pub fn run_project_benchmark(path: &str) -> ProjectBenchmark {
490 let root = if path.is_empty() { "." } else { path };
491 let scanned = scan_project(root);
492 let files_scanned = scanned.len();
493
494 let file_results: Vec<FileMeasurement> = scanned
495 .iter()
496 .filter_map(|p| measure_file(p, root))
497 .collect();
498
499 let total_raw_tokens: usize = file_results.iter().map(|f| f.raw_tokens).sum();
500 let languages = aggregate_languages(&file_results);
501 let mode_summaries = aggregate_modes(&file_results);
502 let session_sim = simulate_session(&file_results);
503
504 ProjectBenchmark {
505 root: root.to_string(),
506 files_scanned,
507 files_measured: file_results.len(),
508 total_raw_tokens,
509 languages,
510 mode_summaries,
511 session_sim,
512 file_results,
513 }
514}
515
516pub fn format_terminal(b: &ProjectBenchmark) -> String {
519 let mut out = Vec::new();
520 let sep = "\u{2550}".repeat(66);
521
522 out.push(sep.clone());
523 out.push(format!(" lean-ctx Benchmark — {}", b.root));
524 out.push(sep.clone());
525
526 let lang_summary: Vec<String> = b
527 .languages
528 .iter()
529 .take(5)
530 .map(|l| format!("{} {}", l.count, l.ext))
531 .collect();
532 out.push(format!(
533 " Scanned: {} files ({})",
534 b.files_measured,
535 lang_summary.join(", ")
536 ));
537 out.push(format!(
538 " Total raw tokens: {}",
539 format_num(b.total_raw_tokens)
540 ));
541 out.push(String::new());
542
543 out.push(" Compression by Language:".to_string());
544 out.push(format!(
545 " {:<10} {:>6} {:>10} {:>10} {:>10} {:>10}",
546 "Lang", "Files", "Raw Tok", "Best Mode", "Compressed", "Savings"
547 ));
548 out.push(format!(" {}", "\u{2500}".repeat(62)));
549 for l in &b.languages {
550 out.push(format!(
551 " {:<10} {:>6} {:>10} {:>10} {:>10} {:>9.1}%",
552 l.ext,
553 l.count,
554 format_num(l.total_tokens),
555 l.best_mode,
556 format_num(l.best_mode_tokens),
557 l.best_savings_pct,
558 ));
559 }
560 out.push(String::new());
561
562 out.push(" Mode Performance:".to_string());
563 out.push(format!(
564 " {:<14} {:>10} {:>10} {:>10} {:>10}",
565 "Mode", "Tokens", "Savings", "Latency", "Quality"
566 ));
567 out.push(format!(" {}", "\u{2500}".repeat(58)));
568
569 for m in &b.mode_summaries {
570 let qual = if m.avg_preservation < 0.0 {
571 "N/A".to_string()
572 } else {
573 format!("{:.1}%", m.avg_preservation * 100.0)
574 };
575 let latency = if m.avg_latency_us > 1000 {
576 format!("{:.1}ms", m.avg_latency_us as f64 / 1000.0)
577 } else {
578 format!("{}μs", m.avg_latency_us)
579 };
580 out.push(format!(
581 " {:<14} {:>10} {:>9.1}% {:>10} {:>10}",
582 m.mode,
583 format_num(m.total_compressed_tokens),
584 m.avg_savings_pct,
585 latency,
586 qual,
587 ));
588 }
589
590 out.push(String::new());
591 out.push(" Session Simulation (30-min coding):".to_string());
592 out.push(format!(
593 " {:<24} {:>10} {:>10} {:>10}",
594 "Approach", "Tokens", "Cost", "Savings"
595 ));
596 out.push(format!(" {}", "\u{2500}".repeat(58)));
597
598 let s = &b.session_sim;
599 out.push(format!(
600 " {:<24} {:>10} {:>10} {:>10}",
601 "Raw (no compression)",
602 format_num(s.raw_tokens),
603 format!("${:.3}", s.raw_cost),
604 "\u{2014}",
605 ));
606
607 let lean_pct = if s.raw_tokens > 0 {
608 (1.0 - s.lean_tokens as f64 / s.raw_tokens as f64) * 100.0
609 } else {
610 0.0
611 };
612 out.push(format!(
613 " {:<24} {:>10} {:>10} {:>9.1}%",
614 "lean-ctx (no CCP)",
615 format_num(s.lean_tokens),
616 format!("${:.3}", s.lean_cost),
617 lean_pct,
618 ));
619
620 let ccp_pct = if s.raw_tokens > 0 {
621 (1.0 - s.lean_ccp_tokens as f64 / s.raw_tokens as f64) * 100.0
622 } else {
623 0.0
624 };
625 out.push(format!(
626 " {:<24} {:>10} {:>10} {:>9.1}%",
627 "lean-ctx + CCP",
628 format_num(s.lean_ccp_tokens),
629 format!("${:.3}", s.ccp_cost),
630 ccp_pct,
631 ));
632
633 out.push(sep.clone());
634 out.join("\n")
635}
636
637pub fn format_markdown(b: &ProjectBenchmark) -> String {
640 let mut out = Vec::new();
641
642 out.push("# lean-ctx Benchmark Report".to_string());
643 out.push(String::new());
644 out.push(format!("**Project:** `{}`", b.root));
645 out.push(format!("**Files measured:** {}", b.files_measured));
646 out.push(format!(
647 "**Total raw tokens:** {}",
648 format_num(b.total_raw_tokens)
649 ));
650 out.push(String::new());
651
652 out.push("## Compression by Language".to_string());
653 out.push(String::new());
654 out.push("| Language | Files | Raw Tokens | Best Mode | Compressed | Savings |".to_string());
655 out.push("|----------|------:|-----------:|-----------|----------:|--------:|".to_string());
656 for l in &b.languages {
657 out.push(format!(
658 "| {} | {} | {} | {} | {} | {:.1}% |",
659 l.ext,
660 l.count,
661 format_num(l.total_tokens),
662 l.best_mode,
663 format_num(l.best_mode_tokens),
664 l.best_savings_pct,
665 ));
666 }
667 out.push(String::new());
668
669 out.push("## Mode Performance".to_string());
670 out.push(String::new());
671 out.push("| Mode | Tokens | Savings | Latency | Quality |".to_string());
672 out.push("|------|-------:|--------:|--------:|--------:|".to_string());
673 for m in &b.mode_summaries {
674 let qual = if m.avg_preservation < 0.0 {
675 "N/A".to_string()
676 } else {
677 format!("{:.1}%", m.avg_preservation * 100.0)
678 };
679 let latency = if m.avg_latency_us > 1000 {
680 format!("{:.1}ms", m.avg_latency_us as f64 / 1000.0)
681 } else {
682 format!("{}μs", m.avg_latency_us)
683 };
684 out.push(format!(
685 "| {} | {} | {:.1}% | {} | {} |",
686 m.mode,
687 format_num(m.total_compressed_tokens),
688 m.avg_savings_pct,
689 latency,
690 qual
691 ));
692 }
693 out.push(String::new());
694
695 out.push("## Session Simulation (30-min coding)".to_string());
696 out.push(String::new());
697 out.push("| Approach | Tokens | Cost | Savings |".to_string());
698 out.push("|----------|-------:|-----:|--------:|".to_string());
699
700 let s = &b.session_sim;
701 out.push(format!(
702 "| Raw (no compression) | {} | ${:.3} | — |",
703 format_num(s.raw_tokens),
704 s.raw_cost
705 ));
706
707 let lean_pct = if s.raw_tokens > 0 {
708 (1.0 - s.lean_tokens as f64 / s.raw_tokens as f64) * 100.0
709 } else {
710 0.0
711 };
712 out.push(format!(
713 "| lean-ctx (no CCP) | {} | ${:.3} | {:.1}% |",
714 format_num(s.lean_tokens),
715 s.lean_cost,
716 lean_pct
717 ));
718
719 let ccp_pct = if s.raw_tokens > 0 {
720 (1.0 - s.lean_ccp_tokens as f64 / s.raw_tokens as f64) * 100.0
721 } else {
722 0.0
723 };
724 out.push(format!(
725 "| lean-ctx + CCP | {} | ${:.3} | {:.1}% |",
726 format_num(s.lean_ccp_tokens),
727 s.ccp_cost,
728 ccp_pct
729 ));
730
731 out.push(String::new());
732 out.push(format!(
733 "*Generated by lean-ctx benchmark v{} — https://leanctx.com*",
734 env!("CARGO_PKG_VERSION")
735 ));
736
737 out.join("\n")
738}
739
740pub fn format_json(b: &ProjectBenchmark) -> String {
743 let modes: Vec<serde_json::Value> = b.mode_summaries.iter().map(|m| {
744 serde_json::json!({
745 "mode": m.mode,
746 "total_compressed_tokens": m.total_compressed_tokens,
747 "avg_savings_pct": round2(m.avg_savings_pct),
748 "avg_latency_us": m.avg_latency_us,
749 "avg_preservation": if m.avg_preservation < 0.0 { serde_json::Value::Null } else { serde_json::json!(round2(m.avg_preservation * 100.0)) },
750 })
751 }).collect();
752
753 let languages: Vec<serde_json::Value> = b
754 .languages
755 .iter()
756 .map(|l| {
757 serde_json::json!({
758 "ext": l.ext,
759 "count": l.count,
760 "total_tokens": l.total_tokens,
761 "best_mode": l.best_mode,
762 "best_mode_tokens": l.best_mode_tokens,
763 "best_savings_pct": round2(l.best_savings_pct),
764 })
765 })
766 .collect();
767
768 let file_details: Vec<serde_json::Value> = b
769 .file_results
770 .iter()
771 .map(|f| {
772 let file_modes: Vec<serde_json::Value> = f
773 .modes
774 .iter()
775 .map(|m| {
776 serde_json::json!({
777 "mode": m.mode,
778 "tokens": m.tokens,
779 "savings_pct": round2(m.savings_pct),
780 "latency_us": m.latency_us,
781 "preservation": if m.preservation_score < 0.0 {
782 serde_json::Value::Null
783 } else {
784 serde_json::json!(round2(m.preservation_score * 100.0))
785 },
786 })
787 })
788 .collect();
789 serde_json::json!({
790 "path": f.path,
791 "ext": f.ext,
792 "raw_tokens": f.raw_tokens,
793 "modes": file_modes,
794 })
795 })
796 .collect();
797
798 let s = &b.session_sim;
799 let report = serde_json::json!({
800 "version": env!("CARGO_PKG_VERSION"),
801 "root": b.root,
802 "files_scanned": b.files_scanned,
803 "files_measured": b.files_measured,
804 "total_raw_tokens": b.total_raw_tokens,
805 "languages": languages,
806 "mode_summaries": modes,
807 "files": file_details,
808 "session_simulation": {
809 "raw_tokens": s.raw_tokens,
810 "lean_tokens": s.lean_tokens,
811 "lean_ccp_tokens": s.lean_ccp_tokens,
812 "raw_cost_usd": round2(s.raw_cost),
813 "lean_cost_usd": round2(s.lean_cost),
814 "ccp_cost_usd": round2(s.ccp_cost),
815 },
816 });
817
818 serde_json::to_string_pretty(&report).unwrap_or_else(|_| "{}".to_string())
819}
820
821fn format_num(n: usize) -> String {
824 if n >= 1_000_000 {
825 format!("{:.1}M", n as f64 / 1_000_000.0)
826 } else if n >= 1_000 {
827 format!("{:.1}K", n as f64 / 1_000.0)
828 } else {
829 format!("{n}")
830 }
831}
832
833fn round2(v: f64) -> f64 {
834 (v * 100.0).round() / 100.0
835}
836
837#[cfg(test)]
838mod tests {
839 use super::*;
840
841 fn mock_file(path: &str, ext: &str, raw: usize, modes: Vec<(&str, usize)>) -> FileMeasurement {
842 FileMeasurement {
843 path: path.to_string(),
844 ext: ext.to_string(),
845 raw_tokens: raw,
846 modes: modes
847 .into_iter()
848 .map(|(mode, tokens)| ModeMeasurement {
849 mode: mode.to_string(),
850 tokens,
851 savings_pct: if raw > 0 {
852 (1.0 - tokens as f64 / raw as f64) * 100.0
853 } else {
854 0.0
855 },
856 latency_us: 100,
857 preservation_score: 0.85,
858 })
859 .collect(),
860 }
861 }
862
863 #[test]
864 fn aggregate_languages_computes_best_mode() {
865 let files = vec![
866 mock_file(
867 "a.rs",
868 "rs",
869 1000,
870 vec![("map", 400), ("signatures", 200), ("aggressive", 300)],
871 ),
872 mock_file(
873 "b.rs",
874 "rs",
875 800,
876 vec![("map", 300), ("signatures", 150), ("aggressive", 250)],
877 ),
878 mock_file(
879 "c.py",
880 "py",
881 600,
882 vec![("map", 100), ("signatures", 250), ("aggressive", 200)],
883 ),
884 ];
885
886 let langs = aggregate_languages(&files);
887 assert_eq!(langs.len(), 2);
888
889 let rs = langs.iter().find(|l| l.ext == "rs").unwrap();
890 assert_eq!(rs.count, 2);
891 assert_eq!(rs.total_tokens, 1800);
892 assert_eq!(rs.best_mode, "signatures");
893 assert_eq!(rs.best_mode_tokens, 350);
894 assert!(rs.best_savings_pct > 80.0);
895
896 let py = langs.iter().find(|l| l.ext == "py").unwrap();
897 assert_eq!(py.best_mode, "map");
898 assert_eq!(py.best_mode_tokens, 100);
899 }
900
901 #[test]
902 fn aggregate_modes_averages() {
903 let files = vec![
904 mock_file("a.rs", "rs", 1000, vec![("map", 400), ("aggressive", 300)]),
905 mock_file("b.rs", "rs", 500, vec![("map", 200), ("aggressive", 100)]),
906 ];
907
908 let modes = aggregate_modes(&files);
909 let map = modes.iter().find(|m| m.mode == "map").unwrap();
910 assert_eq!(map.total_compressed_tokens, 600);
911 assert!(map.avg_savings_pct > 50.0);
912 }
913
914 #[test]
915 fn session_sim_empty_files() {
916 let result = simulate_session(&[]);
917 assert_eq!(result.raw_tokens, 0);
918 assert_eq!(result.lean_tokens, 0);
919 assert!((result.raw_cost).abs() < f64::EPSILON);
920 }
921
922 #[test]
923 fn session_sim_basic() {
924 let files: Vec<FileMeasurement> = (0..5)
925 .map(|i| {
926 mock_file(
927 &format!("file_{i}.rs"),
928 "rs",
929 2000,
930 vec![
931 ("map", 800),
932 ("aggressive", 600),
933 ("cache_hit", CACHE_HIT_TOKENS),
934 ],
935 )
936 })
937 .collect();
938 let result = simulate_session(&files);
939 assert!(result.raw_tokens > 0);
940 assert!(result.lean_tokens < result.raw_tokens);
941 assert!(
942 result.lean_ccp_tokens < result.lean_tokens,
943 "CCP resume ({}) should beat map-based resume ({}) with enough files",
944 result.lean_ccp_tokens,
945 result.lean_tokens
946 );
947 }
948
949 #[test]
950 fn format_json_includes_files_and_language_savings() {
951 let files = vec![mock_file(
952 "src/main.rs",
953 "rs",
954 500,
955 vec![("map", 200), ("signatures", 100), ("cache_hit", 13)],
956 )];
957 let bench = ProjectBenchmark {
958 root: ".".to_string(),
959 files_scanned: 1,
960 files_measured: 1,
961 total_raw_tokens: 500,
962 languages: aggregate_languages(&files),
963 mode_summaries: aggregate_modes(&files),
964 session_sim: simulate_session(&files),
965 file_results: files,
966 };
967
968 let json_str = format_json(&bench);
969 let parsed: serde_json::Value = serde_json::from_str(&json_str).unwrap();
970
971 assert!(parsed["files"].is_array());
972 assert_eq!(parsed["files"].as_array().unwrap().len(), 1);
973 assert_eq!(parsed["files"][0]["path"], "src/main.rs");
974 assert!(parsed["files"][0]["modes"].is_array());
975
976 assert!(parsed["languages"][0]["best_mode"].is_string());
977 assert!(parsed["languages"][0]["best_savings_pct"].is_number());
978 }
979
980 #[test]
981 fn format_markdown_contains_language_savings() {
982 let files = vec![mock_file(
983 "lib.rs",
984 "rs",
985 1000,
986 vec![("map", 300), ("signatures", 200)],
987 )];
988 let bench = ProjectBenchmark {
989 root: ".".to_string(),
990 files_scanned: 1,
991 files_measured: 1,
992 total_raw_tokens: 1000,
993 languages: aggregate_languages(&files),
994 mode_summaries: aggregate_modes(&files),
995 session_sim: simulate_session(&files),
996 file_results: files,
997 };
998
999 let md = format_markdown(&bench);
1000 assert!(md.contains("Compression by Language"));
1001 assert!(md.contains("Best Mode"));
1002 assert!(md.contains("Savings"));
1003 }
1004
1005 #[test]
1006 fn format_terminal_contains_language_section() {
1007 let files = vec![mock_file(
1008 "app.py",
1009 "py",
1010 800,
1011 vec![("map", 200), ("aggressive", 300)],
1012 )];
1013 let bench = ProjectBenchmark {
1014 root: ".".to_string(),
1015 files_scanned: 1,
1016 files_measured: 1,
1017 total_raw_tokens: 800,
1018 languages: aggregate_languages(&files),
1019 mode_summaries: aggregate_modes(&files),
1020 session_sim: simulate_session(&files),
1021 file_results: files,
1022 };
1023
1024 let out = format_terminal(&bench);
1025 assert!(out.contains("Compression by Language"));
1026 assert!(out.contains("py"));
1027 assert!(out.contains("Best Mode"));
1028 }
1029
1030 #[test]
1031 fn run_project_benchmark_on_current_crate() {
1032 let bench = run_project_benchmark("src");
1033 assert!(bench.files_measured > 0);
1034 assert!(bench.total_raw_tokens > 0);
1035 assert!(!bench.languages.is_empty());
1036 assert!(!bench.mode_summaries.is_empty());
1037
1038 for lang in &bench.languages {
1039 assert!(!lang.best_mode.is_empty());
1040 assert!(lang.best_savings_pct >= 0.0);
1041 }
1042
1043 let json = format_json(&bench);
1044 let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
1045 assert!(!parsed["files"].as_array().unwrap().is_empty());
1046
1047 let md = format_markdown(&bench);
1048 assert!(md.contains("lean-ctx Benchmark Report"));
1049
1050 let term = format_terminal(&bench);
1051 assert!(term.contains("Session Simulation"));
1052 }
1053}