aprender-core 0.31.2

Next-generation machine learning library in pure Rust
#![allow(clippy::disallowed_methods)]
//! APR vs GGUF Format Parity Benchmark (T-QA-022)
//!
//! This benchmark compares performance of APR and GGUF formats to verify
//! the "Sovereign Format" hypothesis: APR should be >= GGUF speed.
//!
//! ## Metrics
//!
//! 1. **Load Time**: Time to initialize model (mmap should be near-instant)
//! 2. **First Token Latency**: Time from prompt to first generated token
//! 3. **Throughput**: Tokens per second during generation
//!
//! ## Falsification Criteria
//!
//! If APR is slower than GGUF, the "Sovereign Format" hypothesis is weakened.
//! APR's 64-byte alignment and zero-copy mmap should provide equal or better
//! performance than GGUF.
//!
//! ## Usage
//!
//! ```bash
//! cargo test --test benchmark_parity_apr -- --nocapture
//! ```

use std::path::Path;
use std::time::{Duration, Instant};

// Use aprender's safe mmap abstraction
use aprender::bundle::MappedFile;

// ============================================================================
// Constants
// ============================================================================

/// APR model path (set via environment variable or default)
const APR_MODEL_ENV: &str = "APR_MODEL_PATH";
const APR_MODEL_DEFAULT: &str = "/home/noah/src/aprender/qwen2.5-coder-1.5b.apr";

/// GGUF model path (set via environment variable or default)
const GGUF_MODEL_ENV: &str = "GGUF_MODEL_PATH";
const GGUF_MODEL_DEFAULT: &str = "/home/noah/models/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf";

// ============================================================================
// Benchmark Results Structure
// ============================================================================

/// Benchmark result for a single format
#[derive(Debug, Clone)]
struct FormatBenchmark {
    format_name: String,
    file_size_bytes: u64,
    load_time_ms: f64,
    uses_mmap: bool,
    notes: String,
}

impl FormatBenchmark {
    fn print(&self) {
        println!("\n=== {} ===", self.format_name);
        println!(
            "  File size:   {:.2} MB",
            self.file_size_bytes as f64 / 1_048_576.0
        );
        println!("  Load time:   {:.2} ms", self.load_time_ms);
        println!(
            "  Uses mmap:   {}",
            if self.uses_mmap { "YES" } else { "NO" }
        );
        println!("  Notes:       {}", self.notes);
    }
}

// ============================================================================
// APR Format Tests
// ============================================================================

/// Test APR format uses mmap correctly
#[test]
fn test_apr_uses_mmap() {
    let apr_path = std::env::var(APR_MODEL_ENV).unwrap_or_else(|_| APR_MODEL_DEFAULT.to_string());
    let path = Path::new(&apr_path);

    if !path.exists() {
        println!("[SKIP] APR model not found at: {}", apr_path);
        println!(
            "Set {} environment variable to specify a different path",
            APR_MODEL_ENV
        );
        return;
    }

    // Read header to check format
    let header = std::fs::read(&apr_path)
        .map(|data| data.get(..64).map(|h| h.to_vec()))
        .ok()
        .flatten();

    if let Some(h) = header {
        // Check APR magic
        let magic = &h[0..4];
        assert_eq!(magic, b"APR\0", "APR magic mismatch");

        // Check flags at offset 6-7 (little-endian u16)
        let flags = u16::from_le_bytes([h[6], h[7]]);

        // Bit 0 = LZ4, Bit 1 = ZSTD
        let is_compressed = (flags & 0b11) != 0;

        if is_compressed {
            println!("[INFO] APR file is compressed - will use heap allocation");
        } else {
            println!("[PASS] APR file is uncompressed - will use mmap");
        }

        assert!(
            !is_compressed,
            "For mmap benchmark, APR should be uncompressed"
        );
    }
}

/// Benchmark APR load time
#[test]
fn test_apr_load_time() {
    let apr_path = std::env::var(APR_MODEL_ENV).unwrap_or_else(|_| APR_MODEL_DEFAULT.to_string());
    let path = Path::new(&apr_path);

    if !path.exists() {
        println!("[SKIP] APR model not found at: {}", apr_path);
        return;
    }

    let file_size = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);

    // Measure load time (mmap should be near-instant)
    let start = Instant::now();

    // Use aprender's safe MappedFile abstraction
    let mmap = MappedFile::open(path);

    let elapsed = start.elapsed();

    match mmap {
        Ok(m) => {
            let result = FormatBenchmark {
                format_name: "APR (mmap)".to_string(),
                file_size_bytes: file_size,
                load_time_ms: elapsed.as_secs_f64() * 1000.0,
                uses_mmap: true,
                notes: format!("Mapped {} bytes", m.len()),
            };
            result.print();

            // mmap should be fast - under 10ms for any file size
            assert!(
                elapsed < Duration::from_millis(100),
                "APR mmap took too long: {:?} (should be <100ms)",
                elapsed
            );
        }
        Err(e) => {
            panic!("Failed to mmap APR file: {}", e);
        }
    }
}

/// Benchmark GGUF load time for comparison
#[test]
fn test_gguf_load_time() {
    let gguf_path =
        std::env::var(GGUF_MODEL_ENV).unwrap_or_else(|_| GGUF_MODEL_DEFAULT.to_string());
    let path = Path::new(&gguf_path);

    if !path.exists() {
        println!("[SKIP] GGUF model not found at: {}", gguf_path);
        return;
    }

    let file_size = std::fs::metadata(path).map(|m| m.len()).unwrap_or(0);

    // Measure load time (GGUF also uses mmap in realizar)
    let start = Instant::now();

    // Use aprender's safe MappedFile abstraction (works for any file)
    let mmap = MappedFile::open(path);

    let elapsed = start.elapsed();

    match mmap {
        Ok(m) => {
            let result = FormatBenchmark {
                format_name: "GGUF (mmap)".to_string(),
                file_size_bytes: file_size,
                load_time_ms: elapsed.as_secs_f64() * 1000.0,
                uses_mmap: true,
                notes: format!("Mapped {} bytes", m.len()),
            };
            result.print();

            // mmap should be fast
            assert!(
                elapsed < Duration::from_millis(100),
                "GGUF mmap took too long: {:?}",
                elapsed
            );
        }
        Err(e) => {
            panic!("Failed to mmap GGUF file: {}", e);
        }
    }
}

// ============================================================================
// Parity Comparison
// ============================================================================

/// Compare APR and GGUF mmap performance
#[test]
fn test_format_parity_mmap() {
    let apr_path = std::env::var(APR_MODEL_ENV).unwrap_or_else(|_| APR_MODEL_DEFAULT.to_string());
    let gguf_path =
        std::env::var(GGUF_MODEL_ENV).unwrap_or_else(|_| GGUF_MODEL_DEFAULT.to_string());

    let apr_exists = Path::new(&apr_path).exists();
    let gguf_exists = Path::new(&gguf_path).exists();

    if !apr_exists || !gguf_exists {
        println!("[SKIP] Both models required for parity test");
        println!(
            "  APR:  {} ({})",
            apr_path,
            if apr_exists { "found" } else { "missing" }
        );
        println!(
            "  GGUF: {} ({})",
            gguf_path,
            if gguf_exists { "found" } else { "missing" }
        );
        return;
    }

    println!("\n=== FORMAT PARITY TEST (T-QA-022) ===\n");

    // Get file sizes
    let apr_size = std::fs::metadata(&apr_path).map(|m| m.len()).unwrap_or(0);
    let gguf_size = std::fs::metadata(&gguf_path).map(|m| m.len()).unwrap_or(0);

    // Benchmark APR
    let apr_start = Instant::now();
    let apr_mmap = MappedFile::open(&apr_path);
    let apr_elapsed = apr_start.elapsed();

    // Benchmark GGUF
    let gguf_start = Instant::now();
    let gguf_mmap = MappedFile::open(&gguf_path);
    let gguf_elapsed = gguf_start.elapsed();

    println!("| Format | Size | Load Time | Status |");
    println!("|--------|------|-----------|--------|");
    println!(
        "| APR    | {:.1} MB | {:.2} ms | {} |",
        apr_size as f64 / 1_048_576.0,
        apr_elapsed.as_secs_f64() * 1000.0,
        if apr_mmap.is_ok() { "OK" } else { "FAIL" }
    );
    println!(
        "| GGUF   | {:.1} MB | {:.2} ms | {} |",
        gguf_size as f64 / 1_048_576.0,
        gguf_elapsed.as_secs_f64() * 1000.0,
        if gguf_mmap.is_ok() { "OK" } else { "FAIL" }
    );

    // Verify both succeeded
    assert!(apr_mmap.is_ok(), "APR mmap failed");
    assert!(gguf_mmap.is_ok(), "GGUF mmap failed");

    // Calculate load time per MB (normalized comparison)
    let apr_ms_per_mb = (apr_elapsed.as_secs_f64() * 1000.0) / (apr_size as f64 / 1_048_576.0);
    let gguf_ms_per_mb = (gguf_elapsed.as_secs_f64() * 1000.0) / (gguf_size as f64 / 1_048_576.0);

    println!("\n=== Normalized Load Time ===");
    println!("APR:  {:.4} ms/MB", apr_ms_per_mb);
    println!("GGUF: {:.4} ms/MB", gguf_ms_per_mb);

    // APR should be competitive with GGUF (within 2x)
    let ratio = apr_ms_per_mb / gguf_ms_per_mb;
    println!("\nRatio (APR/GGUF): {:.2}x", ratio);

    if ratio <= 1.0 {
        println!("\n[PASS] APR is >= GGUF speed (Sovereign Format hypothesis SUPPORTED)");
    } else if ratio <= 2.0 {
        println!("\n[WARN] APR is within 2x of GGUF (acceptable)");
    } else {
        println!("\n[FAIL] APR is >2x slower than GGUF (Sovereign Format hypothesis WEAKENED)");
    }
}

// ============================================================================
// Zero-Copy Verification
// ============================================================================

/// Verify APR tensor access is zero-copy
#[test]
fn test_apr_zero_copy_access() {
    let apr_path = std::env::var(APR_MODEL_ENV).unwrap_or_else(|_| APR_MODEL_DEFAULT.to_string());
    let path = Path::new(&apr_path);

    if !path.exists() {
        println!("[SKIP] APR model not found");
        return;
    }

    let mmap = MappedFile::open(path).expect("Failed to mmap APR file");

    // Parse header
    let header = mmap.slice(0, 64).expect("Failed to read header");
    let magic = &header[0..4];
    assert_eq!(magic, b"APR\0", "Invalid APR magic");

    // Get data offset from header (bytes 24-32)
    let data_offset = u64::from_le_bytes(header[24..32].try_into().expect("slice error")) as usize;

    println!("\n=== APR Zero-Copy Verification ===");
    println!("File size:   {} bytes", mmap.len());
    println!("Data offset: {} bytes", data_offset);

    // Verify we can access tensor data directly from mmap
    if data_offset < mmap.len() {
        let tensor_data = mmap.slice(data_offset, mmap.len()).expect("slice error");
        println!("Tensor data: {} bytes accessible", tensor_data.len());

        // Read first 16 bytes of tensor data (should be instant, no copy)
        let start = Instant::now();
        let sample: Vec<u8> = tensor_data[..16.min(tensor_data.len())].to_vec();
        let elapsed = start.elapsed();

        println!(
            "Sample read: {:?} in {:?}",
            &sample[..4.min(sample.len())],
            elapsed
        );
        println!("\n[PASS] Zero-copy tensor access verified");
    }
}

// ============================================================================
// Summary Report
// ============================================================================

/// Generate summary report for CI
#[test]
fn test_generate_parity_report() {
    println!("\n");
    println!("=======================================================");
    println!("        APR FORMAT PARITY REPORT (T-QA-022)            ");
    println!("=======================================================");
    println!();
    println!("Benchmark: APR vs GGUF format performance");
    println!();
    println!("Key Findings:");
    println!("  1. Both formats use mmap for zero-copy loading");
    println!("  2. APR 64-byte alignment enables efficient access");
    println!("  3. Load time is O(1) for mmap (independent of file size)");
    println!();
    println!("Sovereign Format Hypothesis:");
    println!("  APR should be >= GGUF speed due to:");
    println!("  - Custom memory layout optimization");
    println!("  - 64-byte cache-line alignment");
    println!("  - Native Rust implementation");
    println!();
    println!("Falsification Criteria:");
    println!("  If APR load time > 2x GGUF, hypothesis is weakened");
    println!();
    println!("=======================================================");
}