1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3use std::time::Instant;
4
5use walkdir::WalkDir;
6
7use crate::core::compressor;
8use crate::core::deps;
9use crate::core::entropy;
10use crate::core::preservation;
11use crate::core::quality;
12use crate::core::signatures;
13use crate::core::tokens::count_tokens;
14
15const COST_PER_TOKEN: f64 = crate::core::stats::DEFAULT_INPUT_PRICE_PER_M / 1_000_000.0;
16const MAX_FILE_SIZE: u64 = 100 * 1024;
17const MAX_FILES: usize = 50;
18const CACHE_HIT_TOKENS: usize = 13;
19
20#[derive(Debug, Clone)]
23pub struct ModeMeasurement {
24 pub mode: String,
25 pub tokens: usize,
26 pub savings_pct: f64,
27 pub latency_us: u64,
28 pub preservation_score: f64,
29}
30
31#[derive(Debug, Clone)]
32pub struct FileMeasurement {
33 #[allow(dead_code)]
34 pub path: String,
35 pub ext: String,
36 pub raw_tokens: usize,
37 pub modes: Vec<ModeMeasurement>,
38}
39
40#[derive(Debug, Clone)]
41pub struct LanguageStats {
42 pub ext: String,
43 pub count: usize,
44 pub total_tokens: usize,
45}
46
47#[derive(Debug, Clone)]
48pub struct ModeSummary {
49 pub mode: String,
50 pub total_compressed_tokens: usize,
51 pub avg_savings_pct: f64,
52 pub avg_latency_us: u64,
53 pub avg_preservation: f64,
54}
55
56#[derive(Debug, Clone)]
57pub struct SessionSimResult {
58 pub raw_tokens: usize,
59 pub lean_tokens: usize,
60 pub lean_ccp_tokens: usize,
61 pub raw_cost: f64,
62 pub lean_cost: f64,
63 pub ccp_cost: f64,
64}
65
66#[derive(Debug, Clone)]
67pub struct ProjectBenchmark {
68 pub root: String,
69 pub files_scanned: usize,
70 pub files_measured: usize,
71 pub total_raw_tokens: usize,
72 pub languages: Vec<LanguageStats>,
73 pub mode_summaries: Vec<ModeSummary>,
74 pub session_sim: SessionSimResult,
75 #[allow(dead_code)]
76 pub file_results: Vec<FileMeasurement>,
77}
78
79fn is_skipped_dir(name: &str) -> bool {
82 matches!(
83 name,
84 "node_modules"
85 | ".git"
86 | "target"
87 | "dist"
88 | "build"
89 | ".next"
90 | ".nuxt"
91 | "__pycache__"
92 | ".cache"
93 | "coverage"
94 | "vendor"
95 | ".svn"
96 | ".hg"
97 )
98}
99
100fn is_text_ext(ext: &str) -> bool {
101 matches!(
102 ext,
103 "rs" | "ts"
104 | "tsx"
105 | "js"
106 | "jsx"
107 | "py"
108 | "go"
109 | "java"
110 | "c"
111 | "cpp"
112 | "h"
113 | "hpp"
114 | "cs"
115 | "kt"
116 | "swift"
117 | "rb"
118 | "php"
119 | "vue"
120 | "svelte"
121 | "html"
122 | "css"
123 | "scss"
124 | "less"
125 | "json"
126 | "yaml"
127 | "yml"
128 | "toml"
129 | "xml"
130 | "md"
131 | "txt"
132 | "sh"
133 | "bash"
134 | "zsh"
135 | "fish"
136 | "sql"
137 | "graphql"
138 | "proto"
139 | "ex"
140 | "exs"
141 | "zig"
142 | "lua"
143 | "r"
144 | "R"
145 | "dart"
146 | "scala"
147 )
148}
149
150fn scan_project(root: &str) -> Vec<PathBuf> {
151 let mut files: Vec<(PathBuf, u64)> = Vec::new();
152
153 for entry in WalkDir::new(root)
154 .max_depth(8)
155 .into_iter()
156 .filter_entry(|e| {
157 let name = e.file_name().to_string_lossy();
158 if e.file_type().is_dir() {
159 if e.depth() > 0 && name.starts_with('.') {
160 return false;
161 }
162 return !is_skipped_dir(&name);
163 }
164 true
165 })
166 {
167 let entry = match entry {
168 Ok(e) => e,
169 Err(_) => continue,
170 };
171
172 if entry.file_type().is_dir() {
173 continue;
174 }
175
176 let path = entry.path().to_path_buf();
177 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
178
179 if !is_text_ext(ext) {
180 continue;
181 }
182
183 let size = entry.metadata().map(|m| m.len()).unwrap_or(0);
184 if size == 0 || size > MAX_FILE_SIZE {
185 continue;
186 }
187
188 files.push((path, size));
189 }
190
191 files.sort_by(|a, b| b.1.cmp(&a.1));
192
193 let mut selected = Vec::new();
194 let mut ext_counts: HashMap<String, usize> = HashMap::new();
195
196 for (path, _size) in &files {
197 if selected.len() >= MAX_FILES {
198 break;
199 }
200 let ext = path
201 .extension()
202 .and_then(|e| e.to_str())
203 .unwrap_or("")
204 .to_string();
205 let count = ext_counts.entry(ext.clone()).or_insert(0);
206 if *count < 10 {
207 *count += 1;
208 selected.push(path.clone());
209 }
210 }
211
212 selected
213}
214
215fn measure_mode(content: &str, ext: &str, mode: &str, raw_tokens: usize) -> ModeMeasurement {
218 let start = Instant::now();
219
220 let compressed = match mode {
221 "map" => {
222 let sigs = signatures::extract_signatures(content, ext);
223 let dep_info = deps::extract_deps(content, ext);
224 let mut parts = Vec::new();
225 if !dep_info.imports.is_empty() {
226 parts.push(format!("deps: {}", dep_info.imports.join(", ")));
227 }
228 if !dep_info.exports.is_empty() {
229 parts.push(format!("exports: {}", dep_info.exports.join(", ")));
230 }
231 let key_sigs: Vec<String> = sigs
232 .iter()
233 .filter(|s| s.is_exported || s.indent == 0)
234 .map(|s| s.to_compact())
235 .collect();
236 if !key_sigs.is_empty() {
237 parts.push(key_sigs.join("\n"));
238 }
239 parts.join("\n")
240 }
241 "signatures" => {
242 let sigs = signatures::extract_signatures(content, ext);
243 sigs.iter()
244 .map(|s| s.to_compact())
245 .collect::<Vec<_>>()
246 .join("\n")
247 }
248 "aggressive" => compressor::aggressive_compress(content, Some(ext)),
249 "entropy" => entropy::entropy_compress(content).output,
250 "cache_hit" => "cached re-read ~13tok".to_string(),
251 _ => content.to_string(),
252 };
253
254 let latency = start.elapsed();
255 let tokens = if mode == "cache_hit" {
256 CACHE_HIT_TOKENS
257 } else {
258 count_tokens(&compressed)
259 };
260
261 let savings_pct = if raw_tokens > 0 {
262 (1.0 - tokens as f64 / raw_tokens as f64) * 100.0
263 } else {
264 0.0
265 };
266
267 let preservation_score = if mode == "cache_hit" {
268 -1.0
269 } else {
270 preservation::measure(content, &compressed, ext).overall()
271 };
272
273 ModeMeasurement {
274 mode: mode.to_string(),
275 tokens,
276 savings_pct,
277 latency_us: latency.as_micros() as u64,
278 preservation_score,
279 }
280}
281
282fn measure_file(path: &Path, root: &str) -> Option<FileMeasurement> {
283 let content = std::fs::read_to_string(path).ok()?;
284 if content.is_empty() {
285 return None;
286 }
287
288 let ext = path
289 .extension()
290 .and_then(|e| e.to_str())
291 .unwrap_or("")
292 .to_string();
293
294 let raw_tokens = count_tokens(&content);
295 if raw_tokens == 0 {
296 return None;
297 }
298
299 let modes = ["map", "signatures", "aggressive", "entropy", "cache_hit"];
300 let measurements: Vec<ModeMeasurement> = modes
301 .iter()
302 .map(|m| measure_mode(&content, &ext, m, raw_tokens))
303 .collect();
304
305 let display_path = path
306 .strip_prefix(root)
307 .unwrap_or(path)
308 .to_string_lossy()
309 .to_string();
310
311 Some(FileMeasurement {
312 path: display_path,
313 ext,
314 raw_tokens,
315 modes: measurements,
316 })
317}
318
319fn aggregate_languages(files: &[FileMeasurement]) -> Vec<LanguageStats> {
322 let mut map: HashMap<String, (usize, usize)> = HashMap::new();
323 for f in files {
324 let entry = map.entry(f.ext.clone()).or_insert((0, 0));
325 entry.0 += 1;
326 entry.1 += f.raw_tokens;
327 }
328 let mut stats: Vec<LanguageStats> = map
329 .into_iter()
330 .map(|(ext, (count, total_tokens))| LanguageStats {
331 ext,
332 count,
333 total_tokens,
334 })
335 .collect();
336 stats.sort_by(|a, b| b.total_tokens.cmp(&a.total_tokens));
337 stats
338}
339
340fn aggregate_modes(files: &[FileMeasurement]) -> Vec<ModeSummary> {
341 let mode_names = ["map", "signatures", "aggressive", "entropy", "cache_hit"];
342 let mut summaries = Vec::new();
343
344 for mode_name in &mode_names {
345 let mut total_tokens = 0usize;
346 let mut total_savings = 0.0f64;
347 let mut total_latency = 0u64;
348 let mut total_preservation = 0.0f64;
349 let mut preservation_count = 0usize;
350 let mut count = 0usize;
351
352 for f in files {
353 if let Some(m) = f.modes.iter().find(|m| m.mode == *mode_name) {
354 total_tokens += m.tokens;
355 total_savings += m.savings_pct;
356 total_latency += m.latency_us;
357 if m.preservation_score >= 0.0 {
358 total_preservation += m.preservation_score;
359 preservation_count += 1;
360 }
361 count += 1;
362 }
363 }
364
365 if count == 0 {
366 continue;
367 }
368
369 summaries.push(ModeSummary {
370 mode: mode_name.to_string(),
371 total_compressed_tokens: total_tokens,
372 avg_savings_pct: total_savings / count as f64,
373 avg_latency_us: total_latency / count as u64,
374 avg_preservation: if preservation_count > 0 {
375 total_preservation / preservation_count as f64
376 } else {
377 -1.0
378 },
379 });
380 }
381
382 summaries
383}
384
385fn simulate_session(files: &[FileMeasurement]) -> SessionSimResult {
388 if files.is_empty() {
389 return SessionSimResult {
390 raw_tokens: 0,
391 lean_tokens: 0,
392 lean_ccp_tokens: 0,
393 raw_cost: 0.0,
394 lean_cost: 0.0,
395 ccp_cost: 0.0,
396 };
397 }
398
399 let file_count = files.len().min(15);
400 let selected = &files[..file_count];
401
402 let first_read_raw: usize = selected.iter().map(|f| f.raw_tokens).sum();
403
404 let first_read_lean: usize = selected
405 .iter()
406 .enumerate()
407 .map(|(i, f)| {
408 let mode = if i % 3 == 0 { "aggressive" } else { "map" };
409 f.modes
410 .iter()
411 .find(|m| m.mode == mode)
412 .map(|m| m.tokens)
413 .unwrap_or(f.raw_tokens)
414 })
415 .sum();
416
417 let cache_reread_count = 10usize.min(file_count);
418 let cache_raw: usize = selected[..cache_reread_count]
419 .iter()
420 .map(|f| f.raw_tokens)
421 .sum();
422 let cache_lean: usize = cache_reread_count * CACHE_HIT_TOKENS;
423
424 let shell_count = 8usize;
425 let shell_raw = shell_count * 500;
426 let shell_lean = shell_count * 200;
427
428 let resume_raw: usize = selected.iter().map(|f| f.raw_tokens).sum();
429 let resume_lean: usize = selected
430 .iter()
431 .map(|f| {
432 f.modes
433 .iter()
434 .find(|m| m.mode == "map")
435 .map(|m| m.tokens)
436 .unwrap_or(f.raw_tokens)
437 })
438 .sum();
439 let resume_ccp = 400usize;
440
441 let raw_total = first_read_raw + cache_raw + shell_raw + resume_raw;
442 let lean_total = first_read_lean + cache_lean + shell_lean + resume_lean;
443 let ccp_total = first_read_lean + cache_lean + shell_lean + resume_ccp;
444
445 SessionSimResult {
446 raw_tokens: raw_total,
447 lean_tokens: lean_total,
448 lean_ccp_tokens: ccp_total,
449 raw_cost: raw_total as f64 * COST_PER_TOKEN,
450 lean_cost: lean_total as f64 * COST_PER_TOKEN,
451 ccp_cost: ccp_total as f64 * COST_PER_TOKEN,
452 }
453}
454
455pub fn run_project_benchmark(path: &str) -> ProjectBenchmark {
458 let root = if path.is_empty() { "." } else { path };
459 let scanned = scan_project(root);
460 let files_scanned = scanned.len();
461
462 let file_results: Vec<FileMeasurement> = scanned
463 .iter()
464 .filter_map(|p| measure_file(p, root))
465 .collect();
466
467 let total_raw_tokens: usize = file_results.iter().map(|f| f.raw_tokens).sum();
468 let languages = aggregate_languages(&file_results);
469 let mode_summaries = aggregate_modes(&file_results);
470 let session_sim = simulate_session(&file_results);
471
472 ProjectBenchmark {
473 root: root.to_string(),
474 files_scanned,
475 files_measured: file_results.len(),
476 total_raw_tokens,
477 languages,
478 mode_summaries,
479 session_sim,
480 file_results,
481 }
482}
483
484pub fn format_terminal(b: &ProjectBenchmark) -> String {
487 let mut out = Vec::new();
488 let sep = "\u{2550}".repeat(66);
489
490 out.push(sep.to_string());
491 out.push(format!(" lean-ctx Benchmark — {}", b.root));
492 out.push(sep.to_string());
493
494 let lang_summary: Vec<String> = b
495 .languages
496 .iter()
497 .take(5)
498 .map(|l| format!("{} {}", l.count, l.ext))
499 .collect();
500 out.push(format!(
501 " Scanned: {} files ({})",
502 b.files_measured,
503 lang_summary.join(", ")
504 ));
505 out.push(format!(
506 " Total raw tokens: {}",
507 format_num(b.total_raw_tokens)
508 ));
509 out.push(String::new());
510
511 out.push(" Mode Performance:".to_string());
512 out.push(format!(
513 " {:<14} {:>10} {:>10} {:>10} {:>10}",
514 "Mode", "Tokens", "Savings", "Latency", "Quality"
515 ));
516 out.push(format!(" {}", "\u{2500}".repeat(58)));
517
518 for m in &b.mode_summaries {
519 let qual = if m.avg_preservation < 0.0 {
520 "N/A".to_string()
521 } else {
522 format!("{:.1}%", m.avg_preservation * 100.0)
523 };
524 let latency = if m.avg_latency_us > 1000 {
525 format!("{:.1}ms", m.avg_latency_us as f64 / 1000.0)
526 } else {
527 format!("{}μs", m.avg_latency_us)
528 };
529 out.push(format!(
530 " {:<14} {:>10} {:>9.1}% {:>10} {:>10}",
531 m.mode,
532 format_num(m.total_compressed_tokens),
533 m.avg_savings_pct,
534 latency,
535 qual,
536 ));
537 }
538
539 out.push(String::new());
540 out.push(" Session Simulation (30-min coding):".to_string());
541 out.push(format!(
542 " {:<24} {:>10} {:>10} {:>10}",
543 "Approach", "Tokens", "Cost", "Savings"
544 ));
545 out.push(format!(" {}", "\u{2500}".repeat(58)));
546
547 let s = &b.session_sim;
548 out.push(format!(
549 " {:<24} {:>10} {:>10} {:>10}",
550 "Raw (no compression)",
551 format_num(s.raw_tokens),
552 format!("${:.3}", s.raw_cost),
553 "\u{2014}",
554 ));
555
556 let lean_pct = if s.raw_tokens > 0 {
557 (1.0 - s.lean_tokens as f64 / s.raw_tokens as f64) * 100.0
558 } else {
559 0.0
560 };
561 out.push(format!(
562 " {:<24} {:>10} {:>10} {:>9.1}%",
563 "lean-ctx (no CCP)",
564 format_num(s.lean_tokens),
565 format!("${:.3}", s.lean_cost),
566 lean_pct,
567 ));
568
569 let ccp_pct = if s.raw_tokens > 0 {
570 (1.0 - s.lean_ccp_tokens as f64 / s.raw_tokens as f64) * 100.0
571 } else {
572 0.0
573 };
574 out.push(format!(
575 " {:<24} {:>10} {:>10} {:>9.1}%",
576 "lean-ctx + CCP",
577 format_num(s.lean_ccp_tokens),
578 format!("${:.3}", s.ccp_cost),
579 ccp_pct,
580 ));
581
582 out.push(sep.to_string());
583 out.join("\n")
584}
585
586pub fn format_markdown(b: &ProjectBenchmark) -> String {
589 let mut out = Vec::new();
590
591 out.push("# lean-ctx Benchmark Report".to_string());
592 out.push(String::new());
593 out.push(format!("**Project:** `{}`", b.root));
594 out.push(format!("**Files measured:** {}", b.files_measured));
595 out.push(format!(
596 "**Total raw tokens:** {}",
597 format_num(b.total_raw_tokens)
598 ));
599 out.push(String::new());
600
601 out.push("## Languages".to_string());
602 out.push(String::new());
603 out.push("| Extension | Files | Tokens |".to_string());
604 out.push("|-----------|------:|-------:|".to_string());
605 for l in &b.languages {
606 out.push(format!(
607 "| {} | {} | {} |",
608 l.ext,
609 l.count,
610 format_num(l.total_tokens)
611 ));
612 }
613 out.push(String::new());
614
615 out.push("## Mode Performance".to_string());
616 out.push(String::new());
617 out.push("| Mode | Tokens | Savings | Latency | Quality |".to_string());
618 out.push("|------|-------:|--------:|--------:|--------:|".to_string());
619 for m in &b.mode_summaries {
620 let qual = if m.avg_preservation < 0.0 {
621 "N/A".to_string()
622 } else {
623 format!("{:.1}%", m.avg_preservation * 100.0)
624 };
625 let latency = if m.avg_latency_us > 1000 {
626 format!("{:.1}ms", m.avg_latency_us as f64 / 1000.0)
627 } else {
628 format!("{}μs", m.avg_latency_us)
629 };
630 out.push(format!(
631 "| {} | {} | {:.1}% | {} | {} |",
632 m.mode,
633 format_num(m.total_compressed_tokens),
634 m.avg_savings_pct,
635 latency,
636 qual
637 ));
638 }
639 out.push(String::new());
640
641 out.push("## Session Simulation (30-min coding)".to_string());
642 out.push(String::new());
643 out.push("| Approach | Tokens | Cost | Savings |".to_string());
644 out.push("|----------|-------:|-----:|--------:|".to_string());
645
646 let s = &b.session_sim;
647 out.push(format!(
648 "| Raw (no compression) | {} | ${:.3} | — |",
649 format_num(s.raw_tokens),
650 s.raw_cost
651 ));
652
653 let lean_pct = if s.raw_tokens > 0 {
654 (1.0 - s.lean_tokens as f64 / s.raw_tokens as f64) * 100.0
655 } else {
656 0.0
657 };
658 out.push(format!(
659 "| lean-ctx (no CCP) | {} | ${:.3} | {:.1}% |",
660 format_num(s.lean_tokens),
661 s.lean_cost,
662 lean_pct
663 ));
664
665 let ccp_pct = if s.raw_tokens > 0 {
666 (1.0 - s.lean_ccp_tokens as f64 / s.raw_tokens as f64) * 100.0
667 } else {
668 0.0
669 };
670 out.push(format!(
671 "| lean-ctx + CCP | {} | ${:.3} | {:.1}% |",
672 format_num(s.lean_ccp_tokens),
673 s.ccp_cost,
674 ccp_pct
675 ));
676
677 out.push(String::new());
678 out.push(format!(
679 "*Generated by lean-ctx benchmark v{} — https://leanctx.com*",
680 env!("CARGO_PKG_VERSION")
681 ));
682
683 out.join("\n")
684}
685
686pub fn format_json(b: &ProjectBenchmark) -> String {
689 let modes: Vec<serde_json::Value> = b.mode_summaries.iter().map(|m| {
690 serde_json::json!({
691 "mode": m.mode,
692 "total_compressed_tokens": m.total_compressed_tokens,
693 "avg_savings_pct": round2(m.avg_savings_pct),
694 "avg_latency_us": m.avg_latency_us,
695 "avg_preservation": if m.avg_preservation < 0.0 { serde_json::Value::Null } else { serde_json::json!(round2(m.avg_preservation * 100.0)) },
696 })
697 }).collect();
698
699 let languages: Vec<serde_json::Value> = b
700 .languages
701 .iter()
702 .map(|l| {
703 serde_json::json!({
704 "ext": l.ext,
705 "count": l.count,
706 "total_tokens": l.total_tokens,
707 })
708 })
709 .collect();
710
711 let s = &b.session_sim;
712 let report = serde_json::json!({
713 "version": env!("CARGO_PKG_VERSION"),
714 "root": b.root,
715 "files_scanned": b.files_scanned,
716 "files_measured": b.files_measured,
717 "total_raw_tokens": b.total_raw_tokens,
718 "languages": languages,
719 "mode_summaries": modes,
720 "session_simulation": {
721 "raw_tokens": s.raw_tokens,
722 "lean_tokens": s.lean_tokens,
723 "lean_ccp_tokens": s.lean_ccp_tokens,
724 "raw_cost_usd": round2(s.raw_cost),
725 "lean_cost_usd": round2(s.lean_cost),
726 "ccp_cost_usd": round2(s.ccp_cost),
727 },
728 });
729
730 serde_json::to_string_pretty(&report).unwrap_or_else(|_| "{}".to_string())
731}
732
733#[derive(Debug, Clone)]
736#[allow(dead_code)]
737pub struct CepComparison {
738 pub mode: String,
739 pub tokens_without_cep: usize,
740 pub tokens_with_cep: usize,
741 pub quality_score: f64,
742 pub quality_passed: bool,
743}
744
745#[allow(dead_code)]
746pub fn run_cep_comparison(path: &str) -> Vec<CepComparison> {
747 let content = match std::fs::read_to_string(path) {
748 Ok(c) => c,
749 Err(_) => return Vec::new(),
750 };
751
752 let ext = Path::new(path)
753 .extension()
754 .and_then(|e| e.to_str())
755 .unwrap_or("rs");
756
757 let raw_tokens = count_tokens(&content);
758 if raw_tokens == 0 {
759 return Vec::new();
760 }
761
762 let modes = ["map", "signatures", "aggressive", "entropy"];
763 let mut comparisons = Vec::new();
764
765 for mode in &modes {
766 let measurement = measure_mode(&content, ext, mode, raw_tokens);
767
768 let compressed = match *mode {
769 "aggressive" => compressor::aggressive_compress(&content, Some(ext)),
770 "entropy" => entropy::entropy_compress(&content).output,
771 "signatures" => signatures::extract_signatures(&content, ext)
772 .iter()
773 .map(|s| s.to_compact())
774 .collect::<Vec<_>>()
775 .join("\n"),
776 _ => {
777 let dep_info = deps::extract_deps(&content, ext);
778 let sigs = signatures::extract_signatures(&content, ext);
779 let mut parts = Vec::new();
780 if !dep_info.imports.is_empty() {
781 parts.push(dep_info.imports.join(","));
782 }
783 let key_sigs: Vec<String> = sigs
784 .iter()
785 .filter(|s| s.is_exported || s.indent == 0)
786 .map(|s| s.to_compact())
787 .collect();
788 if !key_sigs.is_empty() {
789 parts.push(key_sigs.join("\n"));
790 }
791 parts.join("\n")
792 }
793 };
794
795 let q = quality::score(&content, &compressed, ext);
796 let cep_overhead = 5;
797 let tokens_with_cep = measurement.tokens + cep_overhead;
798
799 comparisons.push(CepComparison {
800 mode: mode.to_string(),
801 tokens_without_cep: measurement.tokens,
802 tokens_with_cep,
803 quality_score: q.composite,
804 quality_passed: q.passed,
805 });
806 }
807
808 comparisons
809}
810
811#[allow(dead_code)]
812pub fn format_cep_comparison(comparisons: &[CepComparison], path: &str) -> String {
813 let mut out = Vec::new();
814 let short = crate::core::protocol::shorten_path(path);
815
816 out.push(format!("CEP A/B Benchmark — {short}"));
817 out.push("═".repeat(60));
818 out.push(format!(
819 "{:<14} {:>8} {:>8} {:>8} {:>6}",
820 "Mode", "Without", "With CEP", "Quality", "Pass"
821 ));
822 out.push("─".repeat(60));
823
824 for c in comparisons {
825 let pass = if c.quality_passed { "✓" } else { "✗" };
826 out.push(format!(
827 "{:<14} {:>8} {:>8} {:>7.0}% {:>6}",
828 c.mode,
829 format_num(c.tokens_without_cep),
830 format_num(c.tokens_with_cep),
831 c.quality_score * 100.0,
832 pass,
833 ));
834 }
835
836 out.push("═".repeat(60));
837 out.join("\n")
838}
839
840fn format_num(n: usize) -> String {
843 if n >= 1_000_000 {
844 format!("{:.1}M", n as f64 / 1_000_000.0)
845 } else if n >= 1_000 {
846 format!("{:.1}K", n as f64 / 1_000.0)
847 } else {
848 format!("{n}")
849 }
850}
851
852fn round2(v: f64) -> f64 {
853 (v * 100.0).round() / 100.0
854}