ruvector_mincut/optimization/
benchmark.rs

1//! Comprehensive Benchmark Suite for j-Tree + BMSSP Optimizations
2//!
3//! Measures before/after performance for each optimization:
4//! - DSpar: 5.9x target speedup
5//! - Cache: 10x target for repeated queries
6//! - SIMD: 2-4x target for distance operations
7//! - Pool: 50-75% memory reduction
8//! - Parallel: Near-linear scaling
9//! - WASM Batch: 10x FFI overhead reduction
10//!
11//! Target: Combined 10x speedup over naive implementation
12
13use super::cache::{CacheConfig, PathDistanceCache};
14use super::dspar::{DegreePresparse, PresparseConfig};
15use super::parallel::{LevelUpdateResult, ParallelConfig, ParallelLevelUpdater, WorkItem};
16use super::pool::{LevelData, LevelPool, PoolConfig};
17use super::simd_distance::{DistanceArray, SimdDistanceOps};
18use super::wasm_batch::{BatchConfig, WasmBatchOps};
19use crate::graph::DynamicGraph;
20use std::collections::HashSet;
21use std::time::{Duration, Instant};
22
23/// Single benchmark result
24#[derive(Debug, Clone)]
25pub struct BenchmarkResult {
26    /// Name of the benchmark
27    pub name: String,
28    /// Baseline time (naive implementation)
29    pub baseline_us: u64,
30    /// Optimized time
31    pub optimized_us: u64,
32    /// Speedup factor (baseline / optimized)
33    pub speedup: f64,
34    /// Target speedup
35    pub target_speedup: f64,
36    /// Whether target was achieved
37    pub target_achieved: bool,
38    /// Memory usage baseline (bytes)
39    pub baseline_memory: usize,
40    /// Memory usage optimized (bytes)
41    pub optimized_memory: usize,
42    /// Memory reduction percentage
43    pub memory_reduction_percent: f64,
44    /// Additional metrics
45    pub metrics: Vec<(String, f64)>,
46}
47
48impl BenchmarkResult {
49    /// Create new result
50    pub fn new(name: &str, baseline_us: u64, optimized_us: u64, target_speedup: f64) -> Self {
51        let speedup = if optimized_us > 0 {
52            baseline_us as f64 / optimized_us as f64
53        } else {
54            f64::INFINITY
55        };
56
57        Self {
58            name: name.to_string(),
59            baseline_us,
60            optimized_us,
61            speedup,
62            target_speedup,
63            target_achieved: speedup >= target_speedup,
64            baseline_memory: 0,
65            optimized_memory: 0,
66            memory_reduction_percent: 0.0,
67            metrics: Vec::new(),
68        }
69    }
70
71    /// Set memory metrics
72    pub fn with_memory(mut self, baseline: usize, optimized: usize) -> Self {
73        self.baseline_memory = baseline;
74        self.optimized_memory = optimized;
75        self.memory_reduction_percent = if baseline > 0 {
76            100.0 * (1.0 - (optimized as f64 / baseline as f64))
77        } else {
78            0.0
79        };
80        self
81    }
82
83    /// Add custom metric
84    pub fn add_metric(&mut self, name: &str, value: f64) {
85        self.metrics.push((name.to_string(), value));
86    }
87}
88
89/// Individual optimization benchmark
90#[derive(Debug, Clone)]
91pub struct OptimizationBenchmark {
92    /// Optimization name
93    pub name: String,
94    /// Results for different workloads
95    pub results: Vec<BenchmarkResult>,
96    /// Overall assessment
97    pub summary: BenchmarkSummary,
98}
99
100/// Summary of benchmark results
101#[derive(Debug, Clone, Default)]
102pub struct BenchmarkSummary {
103    /// Average speedup achieved
104    pub avg_speedup: f64,
105    /// Minimum speedup
106    pub min_speedup: f64,
107    /// Maximum speedup
108    pub max_speedup: f64,
109    /// Percentage of targets achieved
110    pub targets_achieved_percent: f64,
111    /// Overall memory reduction
112    pub avg_memory_reduction: f64,
113}
114
115/// Comprehensive benchmark suite
116pub struct BenchmarkSuite {
117    /// Test graph sizes
118    sizes: Vec<usize>,
119    /// Number of iterations per test
120    iterations: usize,
121    /// Results
122    results: Vec<OptimizationBenchmark>,
123}
124
125impl BenchmarkSuite {
126    /// Create new benchmark suite
127    pub fn new() -> Self {
128        Self {
129            sizes: vec![100, 1000, 10000],
130            iterations: 10,
131            results: Vec::new(),
132        }
133    }
134
135    /// Set test sizes
136    pub fn with_sizes(mut self, sizes: Vec<usize>) -> Self {
137        self.sizes = sizes;
138        self
139    }
140
141    /// Set iterations
142    pub fn with_iterations(mut self, iterations: usize) -> Self {
143        self.iterations = iterations;
144        self
145    }
146
147    /// Run all benchmarks
148    pub fn run_all(&mut self) -> &Vec<OptimizationBenchmark> {
149        self.results.clear();
150
151        self.results.push(self.benchmark_dspar());
152        self.results.push(self.benchmark_cache());
153        self.results.push(self.benchmark_simd());
154        self.results.push(self.benchmark_pool());
155        self.results.push(self.benchmark_parallel());
156        self.results.push(self.benchmark_wasm_batch());
157
158        &self.results
159    }
160
161    /// Get combined speedup estimate
162    pub fn combined_speedup(&self) -> f64 {
163        if self.results.is_empty() {
164            return 1.0;
165        }
166
167        // Estimate combined speedup (conservative: product of square roots)
168        // Skip results with zero or negative speedup to avoid NaN
169        let mut combined = 1.0;
170        let mut count = 0;
171        for result in &self.results {
172            let speedup = result.summary.avg_speedup;
173            if speedup > 0.0 && speedup.is_finite() {
174                combined *= speedup.sqrt();
175                count += 1;
176            }
177        }
178
179        if count == 0 {
180            return 1.0;
181        }
182
183        combined
184    }
185
186    /// Benchmark DSpar (Degree-based presparse)
187    fn benchmark_dspar(&self) -> OptimizationBenchmark {
188        let mut results = Vec::new();
189
190        for &size in &self.sizes {
191            let graph = create_test_graph(size, size * 5);
192
193            // Baseline: process all edges
194            let baseline_start = Instant::now();
195            for _ in 0..self.iterations {
196                let edges = graph.edges();
197                let _count = edges.len();
198            }
199            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
200
201            // Optimized: DSpar filtering
202            let mut dspar = DegreePresparse::with_config(PresparseConfig {
203                target_sparsity: 0.1,
204                ..Default::default()
205            });
206
207            let opt_start = Instant::now();
208            for _ in 0..self.iterations {
209                let _ = dspar.presparse(&graph);
210            }
211            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
212
213            let mut result = BenchmarkResult::new(
214                &format!("DSpar n={}", size),
215                baseline_us,
216                opt_us,
217                5.9, // Target speedup
218            );
219
220            // Get sparsification stats
221            let sparse_result = dspar.presparse(&graph);
222            result.add_metric("sparsity_ratio", sparse_result.stats.sparsity_ratio);
223            result.add_metric(
224                "edges_reduced",
225                (sparse_result.stats.original_edges - sparse_result.stats.sparse_edges) as f64,
226            );
227
228            results.push(result);
229        }
230
231        compute_summary("DSpar", results)
232    }
233
234    /// Benchmark cache performance
235    fn benchmark_cache(&self) -> OptimizationBenchmark {
236        let mut results = Vec::new();
237
238        for &size in &self.sizes {
239            // Baseline: no caching (compute every time)
240            let baseline_start = Instant::now();
241            let mut total = 0.0;
242            for _ in 0..self.iterations {
243                for i in 0..size {
244                    // Simulate distance computation
245                    total += (i as f64 * 1.414).sqrt();
246                }
247            }
248            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
249            let _ = total; // Prevent optimization
250
251            // Optimized: with caching
252            let cache = PathDistanceCache::with_config(CacheConfig {
253                max_entries: size,
254                ..Default::default()
255            });
256
257            // Warm up cache
258            for i in 0..(size / 2) {
259                cache.insert(i as u64, (i + 1) as u64, (i as f64).sqrt());
260            }
261
262            let opt_start = Instant::now();
263            for _ in 0..self.iterations {
264                for i in 0..size {
265                    if cache.get(i as u64, (i + 1) as u64).is_none() {
266                        cache.insert(i as u64, (i + 1) as u64, (i as f64).sqrt());
267                    }
268                }
269            }
270            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
271
272            let mut result = BenchmarkResult::new(
273                &format!("Cache n={}", size),
274                baseline_us,
275                opt_us,
276                10.0, // Target speedup for cached hits
277            );
278
279            let stats = cache.stats();
280            result.add_metric("hit_rate", stats.hit_rate());
281            result.add_metric("cache_size", stats.size as f64);
282
283            results.push(result);
284        }
285
286        compute_summary("Cache", results)
287    }
288
289    /// Benchmark SIMD operations
290    fn benchmark_simd(&self) -> OptimizationBenchmark {
291        let mut results = Vec::new();
292
293        for &size in &self.sizes {
294            let mut arr = DistanceArray::new(size);
295
296            // Initialize with test data
297            for i in 0..size {
298                arr.set(i as u64, (i as f64) * 0.5 + 1.0);
299            }
300            arr.set((size / 2) as u64, 0.1); // Min value
301
302            // Baseline: naive find_min
303            let baseline_start = Instant::now();
304            for _ in 0..self.iterations {
305                let data = arr.as_slice();
306                let mut min_val = f64::INFINITY;
307                let mut min_idx = 0;
308                for (i, &d) in data.iter().enumerate() {
309                    if d < min_val {
310                        min_val = d;
311                        min_idx = i;
312                    }
313                }
314                let _ = (min_val, min_idx);
315            }
316            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
317
318            // Optimized: SIMD find_min
319            let opt_start = Instant::now();
320            for _ in 0..self.iterations {
321                let _ = SimdDistanceOps::find_min(&arr);
322            }
323            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
324
325            let result = BenchmarkResult::new(
326                &format!("SIMD find_min n={}", size),
327                baseline_us,
328                opt_us.max(1), // Avoid divide by zero
329                2.0,           // Target speedup
330            );
331
332            results.push(result);
333
334            // Also benchmark relax_batch
335            let neighbors: Vec<_> = (0..(size / 10).min(100))
336                .map(|i| ((i * 10) as u64, 1.0))
337                .collect();
338
339            let baseline_start = Instant::now();
340            let mut arr_baseline = DistanceArray::new(size);
341            for _ in 0..self.iterations {
342                let data = arr_baseline.as_mut_slice();
343                for &(idx, weight) in &neighbors {
344                    let idx = idx as usize;
345                    if idx < data.len() {
346                        let new_dist = 0.0 + weight;
347                        if new_dist < data[idx] {
348                            data[idx] = new_dist;
349                        }
350                    }
351                }
352            }
353            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
354
355            let mut arr_opt = DistanceArray::new(size);
356            let opt_start = Instant::now();
357            for _ in 0..self.iterations {
358                SimdDistanceOps::relax_batch(&mut arr_opt, 0.0, &neighbors);
359            }
360            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
361
362            let result = BenchmarkResult::new(
363                &format!("SIMD relax_batch n={}", size),
364                baseline_us,
365                opt_us.max(1),
366                2.0,
367            );
368
369            results.push(result);
370        }
371
372        compute_summary("SIMD", results)
373    }
374
375    /// Benchmark pool allocation
376    fn benchmark_pool(&self) -> OptimizationBenchmark {
377        let mut results = Vec::new();
378
379        for &size in &self.sizes {
380            // Baseline: allocate/deallocate each time
381            let baseline_start = Instant::now();
382            let mut baseline_memory = 0usize;
383            for _ in 0..self.iterations {
384                let mut levels = Vec::new();
385                for i in 0..10 {
386                    let level = LevelData::new(i, size);
387                    baseline_memory = baseline_memory.max(std::mem::size_of_val(&level));
388                    levels.push(level);
389                }
390                // Drop all
391                drop(levels);
392            }
393            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
394
395            // Optimized: pool allocation with lazy deallocation
396            let pool = LevelPool::with_config(PoolConfig {
397                max_materialized_levels: 5,
398                lazy_dealloc: true,
399                ..Default::default()
400            });
401
402            let opt_start = Instant::now();
403            for _ in 0..self.iterations {
404                for i in 0..10 {
405                    let level = pool.allocate_level(i, size);
406                    pool.materialize(i, level);
407                }
408                // Some evictions happen automatically
409            }
410            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
411
412            let stats = pool.stats();
413
414            let mut result =
415                BenchmarkResult::new(&format!("Pool n={}", size), baseline_us, opt_us.max(1), 2.0);
416
417            result = result.with_memory(
418                baseline_memory * 10,  // Baseline: all levels materialized
419                stats.pool_size_bytes, // Optimized: only max_materialized
420            );
421
422            result.add_metric("evictions", stats.evictions as f64);
423            result.add_metric("materialized_levels", stats.materialized_levels as f64);
424
425            results.push(result);
426        }
427
428        compute_summary("Pool", results)
429    }
430
431    /// Benchmark parallel processing
432    fn benchmark_parallel(&self) -> OptimizationBenchmark {
433        let mut results = Vec::new();
434
435        for &size in &self.sizes {
436            let levels: Vec<usize> = (0..100).collect();
437
438            // Baseline: sequential processing
439            let baseline_start = Instant::now();
440            for _ in 0..self.iterations {
441                let _results: Vec<_> = levels
442                    .iter()
443                    .map(|&level| {
444                        // Simulate work
445                        let mut sum = 0.0;
446                        for i in 0..(size / 100).max(1) {
447                            sum += (i as f64).sqrt();
448                        }
449                        LevelUpdateResult {
450                            level,
451                            cut_value: sum,
452                            partition: HashSet::new(),
453                            time_us: 0,
454                        }
455                    })
456                    .collect();
457            }
458            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
459
460            // Optimized: parallel processing
461            let updater = ParallelLevelUpdater::with_config(ParallelConfig {
462                min_parallel_size: 10,
463                ..Default::default()
464            });
465
466            let opt_start = Instant::now();
467            for _ in 0..self.iterations {
468                let _results = updater.process_parallel(&levels, |level| {
469                    let mut sum = 0.0;
470                    for i in 0..(size / 100).max(1) {
471                        sum += (i as f64).sqrt();
472                    }
473                    LevelUpdateResult {
474                        level,
475                        cut_value: sum,
476                        partition: HashSet::new(),
477                        time_us: 0,
478                    }
479                });
480            }
481            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
482
483            let result = BenchmarkResult::new(
484                &format!("Parallel n={}", size),
485                baseline_us,
486                opt_us.max(1),
487                2.0, // Conservative target (depends on core count)
488            );
489
490            results.push(result);
491        }
492
493        compute_summary("Parallel", results)
494    }
495
496    /// Benchmark WASM batch operations
497    fn benchmark_wasm_batch(&self) -> OptimizationBenchmark {
498        let mut results = Vec::new();
499
500        for &size in &self.sizes {
501            let edges: Vec<_> = (0..size).map(|i| (i as u64, (i + 1) as u64, 1.0)).collect();
502
503            // Baseline: individual operations
504            let baseline_start = Instant::now();
505            for _ in 0..self.iterations {
506                // Simulate individual FFI calls
507                for edge in &edges {
508                    let _ = edge; // FFI overhead simulation
509                    std::hint::black_box(edge);
510                }
511            }
512            let baseline_us = baseline_start.elapsed().as_micros() as u64 / self.iterations as u64;
513
514            // Optimized: batch operations
515            let mut batch = WasmBatchOps::with_config(BatchConfig {
516                max_batch_size: 1024,
517                ..Default::default()
518            });
519
520            let opt_start = Instant::now();
521            for _ in 0..self.iterations {
522                batch.queue_insert_edges(edges.clone());
523                let _ = batch.execute_batch();
524            }
525            let opt_us = opt_start.elapsed().as_micros() as u64 / self.iterations as u64;
526
527            let stats = batch.stats();
528
529            let mut result = BenchmarkResult::new(
530                &format!("WASM Batch n={}", size),
531                baseline_us,
532                opt_us.max(1),
533                10.0,
534            );
535
536            result.add_metric("avg_items_per_op", stats.avg_items_per_op);
537
538            results.push(result);
539        }
540
541        compute_summary("WASM Batch", results)
542    }
543
544    /// Get results
545    pub fn results(&self) -> &Vec<OptimizationBenchmark> {
546        &self.results
547    }
548
549    /// Generate report string
550    pub fn report(&self) -> String {
551        let mut report = String::new();
552
553        report.push_str("=== j-Tree + BMSSP Optimization Benchmark Report ===\n\n");
554
555        for opt in &self.results {
556            report.push_str(&format!("## {} Optimization\n", opt.name));
557            report.push_str(&format!(
558                "   Average Speedup: {:.2}x\n",
559                opt.summary.avg_speedup
560            ));
561            report.push_str(&format!(
562                "   Min/Max: {:.2}x / {:.2}x\n",
563                opt.summary.min_speedup, opt.summary.max_speedup
564            ));
565            report.push_str(&format!(
566                "   Targets Achieved: {:.0}%\n",
567                opt.summary.targets_achieved_percent
568            ));
569
570            if opt.summary.avg_memory_reduction > 0.0 {
571                report.push_str(&format!(
572                    "   Memory Reduction: {:.1}%\n",
573                    opt.summary.avg_memory_reduction
574                ));
575            }
576
577            report.push_str("\n   Details:\n");
578            for result in &opt.results {
579                report.push_str(&format!(
580                    "   - {}: {:.2}x (target: {:.2}x) {}\n",
581                    result.name,
582                    result.speedup,
583                    result.target_speedup,
584                    if result.target_achieved {
585                        "[OK]"
586                    } else {
587                        "[MISS]"
588                    }
589                ));
590            }
591            report.push_str("\n");
592        }
593
594        let combined = self.combined_speedup();
595        report.push_str(&format!("## Combined Speedup Estimate: {:.2}x\n", combined));
596        report.push_str(&format!("   Target: 10x\n"));
597        report.push_str(&format!(
598            "   Status: {}\n",
599            if combined >= 10.0 {
600                "TARGET ACHIEVED"
601            } else {
602                "In Progress"
603            }
604        ));
605
606        report
607    }
608}
609
610impl Default for BenchmarkSuite {
611    fn default() -> Self {
612        Self::new()
613    }
614}
615
616/// Helper to create test graph
617fn create_test_graph(vertices: usize, edges: usize) -> DynamicGraph {
618    let graph = DynamicGraph::new();
619
620    // Create vertices
621    for i in 0..vertices {
622        graph.add_vertex(i as u64);
623    }
624
625    // Create random-ish edges
626    let mut edge_count = 0;
627    for i in 0..vertices {
628        for j in (i + 1)..vertices {
629            if edge_count >= edges {
630                break;
631            }
632            let _ = graph.insert_edge(i as u64, j as u64, 1.0);
633            edge_count += 1;
634        }
635        if edge_count >= edges {
636            break;
637        }
638    }
639
640    graph
641}
642
643/// Compute summary from results
644fn compute_summary(name: &str, results: Vec<BenchmarkResult>) -> OptimizationBenchmark {
645    if results.is_empty() {
646        return OptimizationBenchmark {
647            name: name.to_string(),
648            results: Vec::new(),
649            summary: BenchmarkSummary::default(),
650        };
651    }
652
653    let speedups: Vec<f64> = results.iter().map(|r| r.speedup).collect();
654    let achieved: Vec<bool> = results.iter().map(|r| r.target_achieved).collect();
655    let memory_reductions: Vec<f64> = results
656        .iter()
657        .filter(|r| r.baseline_memory > 0)
658        .map(|r| r.memory_reduction_percent)
659        .collect();
660
661    let avg_speedup = speedups.iter().sum::<f64>() / speedups.len() as f64;
662    let min_speedup = speedups.iter().copied().fold(f64::INFINITY, f64::min);
663    let max_speedup = speedups.iter().copied().fold(0.0, f64::max);
664    let achieved_count = achieved.iter().filter(|&&a| a).count();
665    let targets_achieved_percent = 100.0 * achieved_count as f64 / achieved.len() as f64;
666
667    let avg_memory_reduction = if memory_reductions.is_empty() {
668        0.0
669    } else {
670        memory_reductions.iter().sum::<f64>() / memory_reductions.len() as f64
671    };
672
673    OptimizationBenchmark {
674        name: name.to_string(),
675        results,
676        summary: BenchmarkSummary {
677            avg_speedup,
678            min_speedup,
679            max_speedup,
680            targets_achieved_percent,
681            avg_memory_reduction,
682        },
683    }
684}
685
686#[cfg(test)]
687mod tests {
688    use super::*;
689
690    #[test]
691    fn test_benchmark_result() {
692        let result = BenchmarkResult::new("test", 1000, 100, 5.0);
693
694        assert_eq!(result.speedup, 10.0);
695        assert!(result.target_achieved);
696    }
697
698    #[test]
699    fn test_benchmark_result_memory() {
700        let result = BenchmarkResult::new("test", 100, 50, 1.0).with_memory(1000, 250);
701
702        assert_eq!(result.memory_reduction_percent, 75.0);
703    }
704
705    #[test]
706    fn test_create_test_graph() {
707        let graph = create_test_graph(10, 20);
708
709        assert_eq!(graph.num_vertices(), 10);
710        assert!(graph.num_edges() <= 20);
711    }
712
713    #[test]
714    fn test_benchmark_suite_small() {
715        let mut suite = BenchmarkSuite::new()
716            .with_sizes(vec![10])
717            .with_iterations(1);
718
719        let results = suite.run_all();
720
721        assert!(!results.is_empty());
722    }
723
724    #[test]
725    fn test_combined_speedup() {
726        let mut suite = BenchmarkSuite::new()
727            .with_sizes(vec![10])
728            .with_iterations(1);
729
730        suite.run_all();
731        let combined = suite.combined_speedup();
732
733        // For very small inputs, overhead may exceed benefit
734        // Just verify we get a valid positive result
735        assert!(
736            combined > 0.0 && combined.is_finite(),
737            "Combined speedup {} should be positive and finite",
738            combined
739        );
740    }
741
742    #[test]
743    fn test_report_generation() {
744        let mut suite = BenchmarkSuite::new()
745            .with_sizes(vec![10])
746            .with_iterations(1);
747
748        suite.run_all();
749        let report = suite.report();
750
751        assert!(report.contains("Benchmark Report"));
752        assert!(report.contains("DSpar"));
753        assert!(report.contains("Combined Speedup"));
754    }
755}
ruvector_mincut/optimization/benchmark.rs

ruvector_mincut/optimization/
benchmark.rs