lean_ctx/core/benchmark_compare/
metrics.rs1use std::path::{Path, PathBuf};
2use std::time::Instant;
3
4use crate::core::benchmark::{self, ModeSummary, ProjectBenchmark};
5use crate::core::bm25_index::BM25Index;
6use crate::core::tokens::count_tokens;
7
8#[derive(Debug, Clone)]
9pub struct SearchLatency {
10 pub query: String,
11 pub bm25_us: u64,
12 pub result_count: usize,
13}
14
15#[derive(Debug, Clone)]
16pub struct DiskFootprint {
17 pub bm25_index_bytes: u64,
18 pub total_index_bytes: u64,
19}
20
21#[derive(Debug, Clone)]
22pub struct ColdStartTiming {
23 pub scan_us: u64,
24 pub bm25_build_us: u64,
25 pub first_read_us: u64,
26 pub total_us: u64,
27}
28
29#[derive(Debug, Clone)]
30pub struct ModeComparison {
31 pub mode: String,
32 pub avg_compression_pct: f64,
33 pub avg_latency_us: u64,
34 pub avg_quality: f64,
35 pub total_raw_tokens: usize,
36 pub total_compressed_tokens: usize,
37}
38
39#[derive(Debug, Clone)]
40pub struct ComparativeMetrics {
41 pub project_benchmark: ProjectBenchmark,
42 pub mode_comparisons: Vec<ModeComparison>,
43 pub search_latencies: Vec<SearchLatency>,
44 pub disk_footprint: DiskFootprint,
45 pub cold_start: ColdStartTiming,
46 pub feature_count: usize,
47}
48
49const SEARCH_QUERIES: &[&str] = &[
50 "function",
51 "error handling",
52 "configuration",
53 "parse",
54 "test",
55];
56
57pub fn measure_all(root: &Path) -> ComparativeMetrics {
58 let root_str = root.to_string_lossy();
59
60 let project_benchmark = benchmark::run_project_benchmark(&root_str);
61 let mode_comparisons = build_mode_comparisons(&project_benchmark);
62 let search_latencies = measure_search_latency(root);
63 let disk_footprint = measure_disk_footprint(root);
64 let cold_start = measure_cold_start(root);
65
66 ComparativeMetrics {
67 project_benchmark,
68 mode_comparisons,
69 search_latencies,
70 disk_footprint,
71 cold_start,
72 feature_count: count_features(),
73 }
74}
75
76fn build_mode_comparisons(bench: &ProjectBenchmark) -> Vec<ModeComparison> {
77 let mode_names = ["full", "map", "signatures", "aggressive", "entropy"];
78
79 mode_names
80 .iter()
81 .filter_map(|mode_name| {
82 let summary = if *mode_name == "full" {
83 Some(ModeSummary {
84 mode: "full".to_string(),
85 total_compressed_tokens: bench.total_raw_tokens,
86 avg_savings_pct: 0.0,
87 avg_latency_us: 0,
88 avg_preservation: 1.0,
89 })
90 } else {
91 bench
92 .mode_summaries
93 .iter()
94 .find(|m| m.mode == *mode_name)
95 .cloned()
96 };
97
98 summary.map(|s| ModeComparison {
99 mode: s.mode.clone(),
100 avg_compression_pct: s.avg_savings_pct,
101 avg_latency_us: s.avg_latency_us,
102 avg_quality: if s.avg_preservation < 0.0 {
103 0.0
104 } else {
105 s.avg_preservation
106 },
107 total_raw_tokens: bench.total_raw_tokens,
108 total_compressed_tokens: s.total_compressed_tokens,
109 })
110 })
111 .collect()
112}
113
114fn measure_search_latency(root: &Path) -> Vec<SearchLatency> {
115 let index = BM25Index::load_or_build_fast(root);
116
117 SEARCH_QUERIES
118 .iter()
119 .map(|query| {
120 let start = Instant::now();
121 let results = index.search(query, 10);
122 let elapsed = start.elapsed();
123
124 SearchLatency {
125 query: (*query).to_string(),
126 bm25_us: elapsed.as_micros() as u64,
127 result_count: results.len(),
128 }
129 })
130 .collect()
131}
132
133fn measure_disk_footprint(root: &Path) -> DiskFootprint {
134 let bm25_path = BM25Index::index_file_path(root);
135 let bm25_bytes = std::fs::metadata(&bm25_path).map_or(0, |m| m.len());
136
137 let index_dir = root.join(".lean-ctx");
138 let total_bytes = if index_dir.exists() {
139 walkdir::WalkDir::new(&index_dir)
140 .into_iter()
141 .filter_map(Result::ok)
142 .filter(|e| e.file_type().is_file())
143 .map(|e| e.metadata().map_or(0, |m| m.len()))
144 .sum()
145 } else {
146 bm25_bytes
147 };
148
149 DiskFootprint {
150 bm25_index_bytes: bm25_bytes,
151 total_index_bytes: total_bytes,
152 }
153}
154
155fn measure_cold_start(root: &Path) -> ColdStartTiming {
156 let scan_start = Instant::now();
157 let files = list_text_files(root, 20);
158 let scan_us = scan_start.elapsed().as_micros() as u64;
159
160 let bm25_start = Instant::now();
161 let _index = BM25Index::build_from_directory(root);
162 let bm25_build_us = bm25_start.elapsed().as_micros() as u64;
163
164 let read_start = Instant::now();
165 if let Some(first_file) = files.first() {
166 if let Ok(content) = std::fs::read_to_string(first_file) {
167 let _ = count_tokens(&content);
168 }
169 }
170 let first_read_us = read_start.elapsed().as_micros() as u64;
171
172 ColdStartTiming {
173 scan_us,
174 bm25_build_us,
175 first_read_us,
176 total_us: scan_us + bm25_build_us + first_read_us,
177 }
178}
179
180fn list_text_files(root: &Path, max: usize) -> Vec<PathBuf> {
181 let code_exts = [
182 "rs", "ts", "tsx", "js", "py", "go", "java", "c", "cpp", "rb",
183 ];
184
185 walkdir::WalkDir::new(root)
186 .max_depth(6)
187 .into_iter()
188 .filter_entry(|e| {
189 let name = e.file_name().to_string_lossy();
190 if e.file_type().is_dir() {
191 return !matches!(
192 name.as_ref(),
193 "node_modules" | ".git" | "target" | "dist" | "build" | "__pycache__"
194 );
195 }
196 true
197 })
198 .filter_map(Result::ok)
199 .filter(|e| e.file_type().is_file())
200 .filter(|e| {
201 e.path()
202 .extension()
203 .and_then(|x| x.to_str())
204 .is_some_and(|ext| code_exts.contains(&ext))
205 })
206 .take(max)
207 .map(walkdir::DirEntry::into_path)
208 .collect()
209}
210
211fn count_features() -> usize {
212 let read_modes = 10;
218 let search = 2; let compression = 3; let session = 3; let analysis = 3; let ops = 2; read_modes + search + compression + session + analysis + ops
224}
225
226pub fn avg_search_latency_us(latencies: &[SearchLatency]) -> u64 {
227 if latencies.is_empty() {
228 return 0;
229 }
230 let total: u64 = latencies.iter().map(|l| l.bm25_us).sum();
231 total / latencies.len() as u64
232}
233
234pub fn format_bytes(bytes: u64) -> String {
235 if bytes >= 1_048_576 {
236 format!("{:.1} MB", bytes as f64 / 1_048_576.0)
237 } else if bytes >= 1024 {
238 format!("{:.1} KB", bytes as f64 / 1024.0)
239 } else {
240 format!("{bytes} B")
241 }
242}
243
244pub fn format_duration_us(us: u64) -> String {
245 if us >= 1_000_000 {
246 format!("{:.2}s", us as f64 / 1_000_000.0)
247 } else if us >= 1000 {
248 format!("{:.1}ms", us as f64 / 1000.0)
249 } else {
250 format!("{us}μs")
251 }
252}
253
254#[cfg(test)]
255mod tests {
256 use super::*;
257
258 #[test]
259 fn format_bytes_ranges() {
260 assert_eq!(format_bytes(500), "500 B");
261 assert_eq!(format_bytes(2048), "2.0 KB");
262 assert_eq!(format_bytes(5_242_880), "5.0 MB");
263 }
264
265 #[test]
266 fn format_duration_ranges() {
267 assert_eq!(format_duration_us(500), "500μs");
268 assert_eq!(format_duration_us(1500), "1.5ms");
269 assert_eq!(format_duration_us(2_500_000), "2.50s");
270 }
271
272 #[test]
273 fn count_features_is_reasonable() {
274 let n = count_features();
275 assert!(n >= 15, "lean-ctx has many features; got {n}");
276 assert!(n <= 50, "feature count should be realistic; got {n}");
277 }
278
279 #[test]
280 fn avg_search_latency_empty() {
281 assert_eq!(avg_search_latency_us(&[]), 0);
282 }
283
284 #[test]
285 fn build_mode_comparisons_includes_full() {
286 let bench = crate::core::benchmark::run_project_benchmark("src");
287 let comps = build_mode_comparisons(&bench);
288 assert!(comps.iter().any(|c| c.mode == "full"));
289 assert!(comps.iter().any(|c| c.mode == "map"));
290 }
291
292 #[test]
293 fn measure_all_on_src() {
294 let root = Path::new("src");
295 let metrics = measure_all(root);
296 assert!(metrics.project_benchmark.files_measured > 0);
297 assert!(!metrics.mode_comparisons.is_empty());
298 assert!(!metrics.search_latencies.is_empty());
299 assert!(metrics.feature_count > 0);
300 }
301}