lcpfs 2026.1.102

// Copyright 2025 LunaOS Contributors
// SPDX-License-Identifier: Apache-2.0
//
// LCPFS Benchmark Suite
// Performance measurement for all major subsystems.

//! # LCPFS Benchmark Suite
//!
//! Comprehensive benchmarks for measuring LCPFS performance characteristics.
//!
//! ## Subsystems Benchmarked
//!
//! - **Checksums**: BLAKE3, SHA-256 throughput
//! - **Compression**: LZ4, ZSTD (CPU and GPU paths)
//! - **ARC Cache**: Hit rate, eviction performance
//! - **Deduplication**: DDT lookup, hash comparison
//! - **I/O**: Sequential read/write, random I/O
//! - **RAID-Z**: Parity calculation, reconstruction
//!
//! ## Running Benchmarks
//!
//! ```bash
//! # Run all benchmarks
//! cargo test --release lcpfs_bench -- --nocapture
//!
//! # Run specific benchmark
//! cargo test --release bench_checksum -- --nocapture
//! ```
//!
//! ## Results Format
//!
//! Results are printed in a consistent format:
//! ```text
//! [BENCH] <name>: <throughput> MB/s (<time>)
//! ```

use alloc::string::String;
use alloc::vec;
use alloc::vec::Vec;

// ═══════════════════════════════════════════════════════════════════════════════
// BENCHMARK UTILITIES
// ═══════════════════════════════════════════════════════════════════════════════

/// Generate realistic benchmark data using pseudo-random + patterned mix.
///
/// This simulates real-world data like text, code, and structured files
/// which have ~50% compressibility.
fn generate_realistic_data(size: usize) -> Vec<u8> {
    let mut data = vec![0u8; size];
    let mut seed: u64 = 0xDEADBEEF;
    for (i, byte) in data.iter_mut().enumerate() {
        if i % 2 == 0 {
            // Xorshift64 PRNG for pseudo-random bytes
            seed ^= seed << 13;
            seed ^= seed >> 7;
            seed ^= seed << 17;
            *byte = seed as u8;
        } else {
            // Patterned bytes
            *byte = (i % 256) as u8;
        }
    }
    data
}

/// Benchmark result
#[derive(Debug, Clone)]
pub struct BenchResult {
    /// Benchmark name
    pub name: &'static str,
    /// Total bytes processed
    pub bytes_processed: u64,
    /// Total iterations
    pub iterations: u64,
    /// Total time in nanoseconds
    pub total_ns: u64,
}

impl BenchResult {
    /// Calculate throughput in MB/s
    pub fn throughput_mbps(&self) -> f64 {
        if self.total_ns == 0 {
            return 0.0;
        }
        let bytes_per_sec = (self.bytes_processed as f64) / (self.total_ns as f64 / 1e9);
        bytes_per_sec / (1024.0 * 1024.0)
    }

    /// Calculate operations per second
    pub fn ops_per_sec(&self) -> f64 {
        if self.total_ns == 0 {
            return 0.0;
        }
        (self.iterations as f64) / (self.total_ns as f64 / 1e9)
    }

    /// Calculate latency in microseconds per operation
    pub fn latency_us(&self) -> f64 {
        if self.iterations == 0 {
            return 0.0;
        }
        (self.total_ns as f64 / 1000.0) / (self.iterations as f64)
    }

    /// Format result as string
    pub fn format(&self) -> String {
        if self.bytes_processed > 0 {
            alloc::format!(
                "[BENCH] {}: {:.1} MB/s ({} iterations, {:.1} ms)",
                self.name,
                self.throughput_mbps(),
                self.iterations,
                self.total_ns as f64 / 1e6
            )
        } else {
            alloc::format!(
                "[BENCH] {}: {:.0} ops/s ({:.1} µs/op)",
                self.name,
                self.ops_per_sec(),
                self.latency_us()
            )
        }
    }
}

/// Simple timer using CPU cycles (approximation without std)
pub struct Timer {
    start: u64,
}

impl Timer {
    /// Start timer
    pub fn start() -> Self {
        Self { start: Self::now() }
    }

    /// Get elapsed nanoseconds (approximate using rdtsc)
    pub fn elapsed_ns(&self) -> u64 {
        let end = Self::now();
        let cycles = end.saturating_sub(self.start);
        // Assume ~3 GHz CPU, 1 cycle ≈ 0.33 ns
        cycles / 3
    }

    /// Get current timestamp (using rdtsc or fallback)
    #[inline]
    fn now() -> u64 {
        #[cfg(target_arch = "x86_64")]
        {
            // Use rdtsc for high-precision timing
            unsafe { core::arch::x86_64::_rdtsc() }
        }
        #[cfg(not(target_arch = "x86_64"))]
        {
            // Fallback: just count iterations
            0
        }
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// CHECKSUM BENCHMARKS
// ═══════════════════════════════════════════════════════════════════════════════

/// Benchmark BLAKE3 checksum performance
pub fn bench_blake3(data_size: usize, iterations: usize) -> BenchResult {
    use crate::integrity::checksum::Checksum;

    let data = generate_realistic_data(data_size);
    let timer = Timer::start();

    for _ in 0..iterations {
        let _ = Checksum::calculate(&data);
    }

    BenchResult {
        name: "BLAKE3 checksum",
        bytes_processed: (data_size * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

/// Benchmark SHA-256 hash performance (for dedup)
pub fn bench_sha256(data_size: usize, iterations: usize) -> BenchResult {
    use sha2::{Digest, Sha256};

    let data = generate_realistic_data(data_size);
    let timer = Timer::start();

    for _ in 0..iterations {
        let mut hasher = Sha256::new();
        hasher.update(&data);
        let _ = hasher.finalize();
    }

    BenchResult {
        name: "SHA-256 hash",
        bytes_processed: (data_size * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// COMPRESSION BENCHMARKS
// ═══════════════════════════════════════════════════════════════════════════════

/// Benchmark LZ4 compression performance (realistic mixed data)
///
/// Uses pseudo-random data with ~50% compressibility to simulate
/// real-world workloads (text, code, structured data).
pub fn bench_lz4_compress(data_size: usize, iterations: usize) -> BenchResult {
    let data = generate_realistic_data(data_size);

    let timer = Timer::start();

    for _ in 0..iterations {
        let _ = lz4_flex::compress_prepend_size(&data);
    }

    BenchResult {
        name: "LZ4 compress",
        bytes_processed: (data_size * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

/// Benchmark LZ4 decompression performance (realistic mixed data)
pub fn bench_lz4_decompress(data_size: usize, iterations: usize) -> BenchResult {
    let data = generate_realistic_data(data_size);
    let compressed = lz4_flex::compress_prepend_size(&data);

    let timer = Timer::start();

    for _ in 0..iterations {
        let _ = lz4_flex::decompress_size_prepended(&compressed);
    }

    BenchResult {
        name: "LZ4 decompress",
        bytes_processed: (data_size * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// ARC CACHE BENCHMARKS
// ═══════════════════════════════════════════════════════════════════════════════

/// Benchmark ARC cache lookup performance
pub fn bench_arc_lookup(iterations: usize) -> BenchResult {
    use crate::cache::arc::arc_read;
    use crate::fscore::structs::Dva;

    // Populate cache first
    let dva = Dva {
        vdev: 0,
        offset: 12345,
    };

    let timer = Timer::start();

    for _ in 0..iterations {
        let _ = arc_read(&dva);
    }

    BenchResult {
        name: "ARC lookup",
        bytes_processed: 0,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

/// Benchmark ARC cache insert performance
pub fn bench_arc_insert(data_size: usize, iterations: usize) -> BenchResult {
    use crate::cache::arc::arc_cache;
    use crate::fscore::structs::Dva;

    let data = generate_realistic_data(data_size);
    let timer = Timer::start();

    for i in 0..iterations {
        let dva = Dva {
            vdev: 0,
            offset: i as u64 * 4096,
        };
        arc_cache(dva, data.clone());
    }

    BenchResult {
        name: "ARC insert",
        bytes_processed: (data_size * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// ENCRYPTION BENCHMARKS
// ═══════════════════════════════════════════════════════════════════════════════

/// Benchmark ChaCha20-Poly1305 encryption throughput
pub fn bench_chacha20_encrypt(data_size: usize, iterations: usize) -> BenchResult {
    use chacha20poly1305_nostd::ChaCha20Poly1305;

    let key = [0x42u8; 32];
    let nonce = [0x24u8; 12];
    let cipher = ChaCha20Poly1305::new(&key).expect("valid key");

    let plaintext = generate_realistic_data(data_size);
    let timer = Timer::start();

    for _ in 0..iterations {
        let _ = cipher.encrypt(&nonce, &plaintext, None);
    }

    BenchResult {
        name: "ChaCha20-Poly1305",
        bytes_processed: (data_size * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

/// Benchmark key derivation (PBKDF2-SHA256)
pub fn bench_pbkdf2(iterations: usize) -> BenchResult {
    use hmac::Hmac;
    use sha2::Sha256;

    let password = b"test_password_for_benchmark";
    let salt = b"random_salt_value_16b";
    let mut derived_key = [0u8; 32];

    let timer = Timer::start();

    for _ in 0..iterations {
        // 1000 rounds for benchmark (real use would be 10000+)
        pbkdf2::pbkdf2::<Hmac<Sha256>>(password, salt, 1000, &mut derived_key)
            .expect("valid output length");
        core::hint::black_box(&derived_key);
    }

    BenchResult {
        name: "PBKDF2 (1000 rounds)",
        bytes_processed: 0,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// CHECKSUM VERIFICATION BENCHMARKS
// ═══════════════════════════════════════════════════════════════════════════════

/// Benchmark checksum verification (compute + compare)
pub fn bench_checksum_verify(data_size: usize, iterations: usize) -> BenchResult {
    use crate::integrity::checksum::Checksum;
    use crate::mgmt::security::constant_time_checksum_eq;

    let data = generate_realistic_data(data_size);
    let expected = Checksum::calculate(&data);

    let timer = Timer::start();

    for _ in 0..iterations {
        let computed = Checksum::calculate(&data);
        let _ = constant_time_checksum_eq(&computed, &expected);
    }

    BenchResult {
        name: "Checksum verify",
        bytes_processed: (data_size * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// RAID-Z BENCHMARKS
// ═══════════════════════════════════════════════════════════════════════════════

/// Benchmark RAID-Z1 parity calculation (XOR-based, cached)
///
/// Note: Data fits in CPU cache, so this measures peak XOR throughput.
/// Real-world performance is limited by memory/storage bandwidth.
pub fn bench_raidz1_parity(data_size: usize, iterations: usize) -> BenchResult {
    // RAID-Z1 uses simple XOR parity (P = D0 ^ D1 ^ D2 ^ ...)
    let stripe_count = 4;
    let stripe_size = data_size / stripe_count;
    let stripes: Vec<Vec<u8>> = (0..stripe_count)
        .map(|i| vec![(i as u8).wrapping_mul(0x55); stripe_size])
        .collect();

    let timer = Timer::start();

    for _ in 0..iterations {
        // Calculate XOR parity across all stripes
        let mut parity = vec![0u8; stripe_size];
        for stripe in &stripes {
            for (p, &d) in parity.iter_mut().zip(stripe.iter()) {
                *p ^= d;
            }
        }
        core::hint::black_box(&parity);
    }

    BenchResult {
        name: "RAID-Z1 parity (cached)",
        bytes_processed: (data_size * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// I/O BENCHMARKS
// ═══════════════════════════════════════════════════════════════════════════════

/// Benchmark sequential write throughput (memory copy simulation)
///
/// Note: This measures the ZIO pipeline overhead, not actual disk I/O.
/// Real disk performance depends on storage device.
pub fn bench_sequential_write(data_size: usize, iterations: usize) -> BenchResult {
    // Simulate block writes to a buffer (represents ZIO pipeline)
    let mut buffer = vec![0u8; data_size];
    let data = generate_realistic_data(data_size);

    let timer = Timer::start();

    for _ in 0..iterations {
        buffer.copy_from_slice(&data);
        core::hint::black_box(&buffer);
    }

    BenchResult {
        name: "Sequential write",
        bytes_processed: (data_size * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

/// Benchmark sequential read throughput (memory copy simulation)
pub fn bench_sequential_read(data_size: usize, iterations: usize) -> BenchResult {
    // Source buffer with data
    let source = generate_realistic_data(data_size);
    let mut dest = vec![0u8; data_size];

    let timer = Timer::start();

    for _ in 0..iterations {
        dest.copy_from_slice(&source);
        core::hint::black_box(&dest);
    }

    BenchResult {
        name: "Sequential read",
        bytes_processed: (data_size * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

/// Benchmark random 4K block access (simulated)
pub fn bench_random_4k(iterations: usize) -> BenchResult {
    // Large buffer to simulate random access across different blocks
    let buffer_size = 16 * 1024 * 1024; // 16 MB
    let buffer = generate_realistic_data(buffer_size);
    let mut dest = [0u8; 4096];

    // Simple PRNG for deterministic "random" offsets
    let mut seed: u64 = 0x12345678;

    let timer = Timer::start();

    for _ in 0..iterations {
        // Simple xorshift for pseudo-random offset
        seed ^= seed << 13;
        seed ^= seed >> 7;
        seed ^= seed << 17;
        let offset = (seed as usize) % (buffer_size - 4096);

        dest.copy_from_slice(&buffer[offset..offset + 4096]);
        core::hint::black_box(&dest);
    }

    BenchResult {
        name: "Random 4K read",
        bytes_processed: (4096 * iterations) as u64,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// DEDUPLICATION BENCHMARKS
// ═══════════════════════════════════════════════════════════════════════════════

/// Benchmark dedup hash table lookup
pub fn bench_dedup_lookup(iterations: usize) -> BenchResult {
    use crate::dedup::dedup::DDT;

    // Create some entries first
    let data = vec![0xABu8; 4096];
    let _ = DDT.lock().dedup(&data);

    let timer = Timer::start();

    for _ in 0..iterations {
        let _ = DDT.lock().dedup(&data);
    }

    BenchResult {
        name: "Dedup lookup",
        bytes_processed: 0,
        iterations: iterations as u64,
        total_ns: timer.elapsed_ns(),
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// COMPREHENSIVE BENCHMARK RUNNER
// ═══════════════════════════════════════════════════════════════════════════════

/// Run all benchmarks and return results
pub fn run_all_benchmarks() -> Vec<BenchResult> {
    vec![
        // Checksum benchmarks (1 MB, 100 iterations)
        bench_blake3(1024 * 1024, 100),
        bench_sha256(1024 * 1024, 100),
        bench_checksum_verify(4096, 1000),
        // Compression benchmarks (1 MB, 100 iterations)
        bench_lz4_compress(1024 * 1024, 100),
        bench_lz4_decompress(1024 * 1024, 100),
        // Encryption benchmarks
        bench_chacha20_encrypt(4096, 1000),
        bench_pbkdf2(100),
        // ARC benchmarks (10000 iterations)
        bench_arc_lookup(10000),
        bench_arc_insert(4096, 10000),
        // RAID-Z benchmarks (1 MB, 100 iterations)
        bench_raidz1_parity(1024 * 1024, 100),
        // I/O benchmarks (1 MB, 100 iterations)
        bench_sequential_write(1024 * 1024, 100),
        bench_sequential_read(1024 * 1024, 100),
        bench_random_4k(10000),
        // Dedup benchmarks (10000 iterations)
        bench_dedup_lookup(10000),
    ]
}

/// Print all benchmark results
pub fn print_benchmark_report() {
    let results = run_all_benchmarks();

    for result in &results {
        // Using log! macro if available, otherwise just collect
        #[cfg(feature = "std")]
        println!("{}", result.format());
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// BENCHMARK SUMMARY
// ═══════════════════════════════════════════════════════════════════════════════

/// Benchmark summary with aggregated metrics
#[derive(Debug, Clone, Default)]
pub struct BenchmarkSummary {
    /// Checksum throughput (average MB/s)
    pub checksum_mbps: f64,
    /// Compression throughput (average MB/s)
    pub compression_mbps: f64,
    /// Encryption throughput (MB/s)
    pub encryption_mbps: f64,
    /// Key derivation ops/sec
    pub kdf_ops_per_sec: f64,
    /// Cache operations per second
    pub cache_ops_per_sec: f64,
    /// RAID-Z parity throughput (MB/s)
    pub raidz_mbps: f64,
    /// Sequential write throughput (MB/s)
    pub seq_write_mbps: f64,
    /// Sequential read throughput (MB/s)
    pub seq_read_mbps: f64,
    /// Random 4K IOPS
    pub random_4k_iops: f64,
    /// Dedup operations per second
    pub dedup_ops_per_sec: f64,
}

impl BenchmarkSummary {
    /// Create summary from benchmark results
    pub fn from_results(results: &[BenchResult]) -> Self {
        let mut summary = Self::default();

        for result in results {
            match result.name {
                "BLAKE3 checksum" | "SHA-256 hash" => {
                    summary.checksum_mbps =
                        (summary.checksum_mbps + result.throughput_mbps()) / 2.0;
                    if summary.checksum_mbps == 0.0 {
                        summary.checksum_mbps = result.throughput_mbps();
                    }
                }
                "LZ4 compress" | "LZ4 decompress" => {
                    summary.compression_mbps =
                        (summary.compression_mbps + result.throughput_mbps()) / 2.0;
                    if summary.compression_mbps == 0.0 {
                        summary.compression_mbps = result.throughput_mbps();
                    }
                }
                "ARC lookup" | "ARC insert" => {
                    summary.cache_ops_per_sec =
                        (summary.cache_ops_per_sec + result.ops_per_sec()) / 2.0;
                    if summary.cache_ops_per_sec == 0.0 {
                        summary.cache_ops_per_sec = result.ops_per_sec();
                    }
                }
                "RAID-Z1 parity (cached)" => {
                    summary.raidz_mbps = result.throughput_mbps();
                }
                "Sequential write" => {
                    summary.seq_write_mbps = result.throughput_mbps();
                }
                "Sequential read" => {
                    summary.seq_read_mbps = result.throughput_mbps();
                }
                "Random 4K read" => {
                    summary.random_4k_iops = result.ops_per_sec();
                }
                "Dedup lookup" => {
                    summary.dedup_ops_per_sec = result.ops_per_sec();
                }
                "ChaCha20-Poly1305" => {
                    summary.encryption_mbps = result.throughput_mbps();
                }
                "PBKDF2 (1000 rounds)" => {
                    summary.kdf_ops_per_sec = result.ops_per_sec();
                }
                _ => {}
            }
        }

        summary
    }
}

// ═══════════════════════════════════════════════════════════════════════════════
// TESTS (also serve as benchmarks)
// ═══════════════════════════════════════════════════════════════════════════════

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_bench_blake3() {
        let result = bench_blake3(4096, 10);
        assert!(result.iterations == 10);
        assert!(result.bytes_processed == 4096 * 10);
    }

    #[test]
    fn test_bench_lz4_compress() {
        let result = bench_lz4_compress(4096, 10);
        assert!(result.iterations == 10);
        assert!(result.throughput_mbps() >= 0.0);
    }

    #[test]
    fn test_bench_lz4_decompress() {
        let result = bench_lz4_decompress(4096, 10);
        assert!(result.iterations == 10);
    }

    #[test]
    fn test_bench_arc_lookup() {
        let result = bench_arc_lookup(100);
        assert!(result.iterations == 100);
        assert!(result.ops_per_sec() >= 0.0);
    }

    #[test]
    fn test_bench_raidz1_parity() {
        let result = bench_raidz1_parity(4096, 10);
        assert!(result.iterations == 10);
    }

    #[test]
    fn test_run_all_benchmarks() {
        let results = run_all_benchmarks();
        assert!(!results.is_empty());

        let summary = BenchmarkSummary::from_results(&results);
        assert!(summary.checksum_mbps >= 0.0);
    }

    /// Full benchmark report with output - run with: cargo test --release lcpfs_benchmark_report -- --nocapture
    #[test]
    fn lcpfs_benchmark_report() {
        extern crate std;
        use std::println;

        println!("\n╔══════════════════════════════════════════════════════════════════╗");
        println!("║                    LCPFS BENCHMARK SUITE                         ║");
        println!("╚══════════════════════════════════════════════════════════════════╝\n");

        let results = run_all_benchmarks();

        println!("┌─────────────────────────────────────────────────────────────────┐");
        println!("│ CHECKSUM PERFORMANCE                                            │");
        println!("├─────────────────────────────────────────────────────────────────┤");
        for r in results.iter().filter(|r| {
            r.name.contains("checksum") || r.name.contains("hash") || r.name.contains("verify")
        }) {
            println!("│  {:64}│", r.format());
        }

        println!("├─────────────────────────────────────────────────────────────────┤");
        println!("│ COMPRESSION PERFORMANCE                                         │");
        println!("├─────────────────────────────────────────────────────────────────┤");
        for r in results.iter().filter(|r| r.name.contains("LZ4")) {
            println!("│  {:64}│", r.format());
        }

        println!("├─────────────────────────────────────────────────────────────────┤");
        println!("│ ENCRYPTION PERFORMANCE                                          │");
        println!("├─────────────────────────────────────────────────────────────────┤");
        for r in results
            .iter()
            .filter(|r| r.name.contains("ChaCha") || r.name.contains("PBKDF2"))
        {
            println!("│  {:64}│", r.format());
        }

        println!("├─────────────────────────────────────────────────────────────────┤");
        println!("│ CACHE PERFORMANCE                                               │");
        println!("├─────────────────────────────────────────────────────────────────┤");
        for r in results.iter().filter(|r| r.name.contains("ARC")) {
            println!("│  {:64}│", r.format());
        }

        println!("├─────────────────────────────────────────────────────────────────┤");
        println!("│ RAID-Z PERFORMANCE                                              │");
        println!("├─────────────────────────────────────────────────────────────────┤");
        for r in results.iter().filter(|r| r.name.contains("RAID")) {
            println!("│  {:64}│", r.format());
        }

        println!("├─────────────────────────────────────────────────────────────────┤");
        println!("│ I/O PERFORMANCE                                                 │");
        println!("├─────────────────────────────────────────────────────────────────┤");
        for r in results
            .iter()
            .filter(|r| r.name.contains("Sequential") || r.name.contains("Random"))
        {
            println!("│  {:64}│", r.format());
        }

        println!("├─────────────────────────────────────────────────────────────────┤");
        println!("│ DEDUP PERFORMANCE                                               │");
        println!("├─────────────────────────────────────────────────────────────────┤");
        for r in results.iter().filter(|r| r.name.contains("Dedup")) {
            println!("│  {:64}│", r.format());
        }

        let summary = BenchmarkSummary::from_results(&results);
        println!("└─────────────────────────────────────────────────────────────────┘");
        println!("\n╔══════════════════════════════════════════════════════════════════╗");
        println!("║                         SUMMARY                                  ║");
        println!("╠══════════════════════════════════════════════════════════════════╣");
        println!(
            "║  Checksum:        {:>10.0} MB/s                                ║",
            summary.checksum_mbps
        );
        println!(
            "║  Compression:     {:>10.0} MB/s                                ║",
            summary.compression_mbps
        );
        println!(
            "║  Encryption:      {:>10.0} MB/s                                ║",
            summary.encryption_mbps
        );
        println!(
            "║  KDF:             {:>10.0} ops/s                               ║",
            summary.kdf_ops_per_sec
        );
        println!(
            "║  Cache:           {:>10.0} ops/s                               ║",
            summary.cache_ops_per_sec
        );
        println!(
            "║  RAID-Z Parity:   {:>10.0} MB/s                                ║",
            summary.raidz_mbps
        );
        println!(
            "║  Seq Write:       {:>10.0} MB/s                                ║",
            summary.seq_write_mbps
        );
        println!(
            "║  Seq Read:        {:>10.0} MB/s                                ║",
            summary.seq_read_mbps
        );
        println!(
            "║  Random 4K:       {:>10.0} IOPS                                ║",
            summary.random_4k_iops
        );
        println!(
            "║  Dedup:           {:>10.0} ops/s                               ║",
            summary.dedup_ops_per_sec
        );
        println!("╚══════════════════════════════════════════════════════════════════╝\n");
    }

    #[test]
    fn test_bench_result_format() {
        let result = BenchResult {
            name: "test",
            bytes_processed: 1024 * 1024,
            iterations: 100,
            total_ns: 1_000_000_000, // 1 second
        };

        // 1 MB in 1 second = 1 MB/s
        assert!((result.throughput_mbps() - 1.0).abs() < 0.1);

        let formatted = result.format();
        assert!(formatted.contains("test"));
        assert!(formatted.contains("MB/s"));
    }
}