trueno 0.17.2

High-performance SIMD compute library with GPU support for matrix operations
Documentation
use super::super::super::*;

// ========================================================================
// PAR-200: Falsification Tests (F101-F110)
// ========================================================================

/// F102: Immediate mode matches v1 behavior — both APIs record correctly
///
/// Five-Whys root cause: timing-based ratio test was flaky because
/// `thread::sleep(100µs)` has OS-scheduler variance up to 14x on busy
/// systems. The test's purpose is API equivalence, not timing precision.
#[test]
fn test_f102_immediate_mode_matches_v1() {
    let mut profiler = BrickProfiler::new();
    profiler.enable();
    profiler.set_sync_mode(SyncMode::Immediate);

    // Legacy API
    let timer = profiler.start("RmsNorm");
    std::thread::sleep(std::time::Duration::from_micros(100));
    profiler.stop(timer, 1);

    let legacy_stats = profiler.brick_stats(BrickId::RmsNorm);
    assert_eq!(legacy_stats.count, 1, "F102: legacy API count mismatch");
    assert!(legacy_stats.total_ns > 0, "F102: legacy API recorded zero ns");

    profiler.reset();

    // New API
    let timer = profiler.start_brick(BrickId::RmsNorm);
    std::thread::sleep(std::time::Duration::from_micros(100));
    profiler.stop_brick(timer, 1);

    let new_stats = profiler.brick_stats(BrickId::RmsNorm);
    assert_eq!(new_stats.count, 1, "F102: new API count mismatch");
    assert!(new_stats.total_ns > 0, "F102: new API recorded zero ns");

    // Both APIs must record non-zero timing — functional equivalence
    // (timing ratio is NOT tested; OS scheduler variance makes it flaky)
}

/// F103: BrickId lookup is O(1) - verified by direct array access
#[test]
fn test_f103_brick_id_lookup_o1() {
    let profiler = BrickProfiler::new();

    // Direct array access is O(1) by construction
    let _stats = &profiler.brick_stats(BrickId::RmsNorm);
    let _stats = &profiler.brick_stats(BrickId::AttentionScore);
    let _stats = &profiler.brick_stats(BrickId::DownProjection);

    // Compile-time verification: array indexing is O(1)
    assert_eq!(std::mem::size_of::<BrickId>(), 1); // u8 repr
}

/// F104: Category aggregation sums correctly
#[test]
fn test_f104_category_aggregation_correct() {
    let mut profiler = BrickProfiler::new();
    profiler.enable();

    // Add known amounts to each category
    let timer = profiler.start_brick(BrickId::RmsNorm);
    std::thread::sleep(std::time::Duration::from_micros(10));
    profiler.stop_brick(timer, 1);

    let timer = profiler.start_brick(BrickId::QkvProjection);
    std::thread::sleep(std::time::Duration::from_micros(20));
    profiler.stop_brick(timer, 1);

    let timer = profiler.start_brick(BrickId::GateProjection);
    std::thread::sleep(std::time::Duration::from_micros(30));
    profiler.stop_brick(timer, 1);

    let cats = profiler.category_stats();
    let cat_total: u64 = cats.iter().map(|c| c.total_ns).sum();

    // Category sum must equal total
    assert_eq!(cat_total, profiler.total_ns(), "F104 failed: category sum mismatch");
}

/// F105: Dynamic fallback works for unknown bricks
#[test]
fn test_f105_dynamic_fallback_works() {
    let mut profiler = BrickProfiler::new();
    profiler.enable();

    // Unknown brick name
    let timer = profiler.start("UnknownCustomBrick");
    std::thread::sleep(std::time::Duration::from_micros(10));
    profiler.stop(timer, 1);

    // Should be accessible via stats()
    let stats = profiler.stats("UnknownCustomBrick");
    assert!(stats.is_some(), "F105 failed: dynamic brick not found");
    assert_eq!(stats.unwrap().count, 1);
}

/// F106: finalize() is idempotent
#[test]
fn test_f106_finalize_idempotent() {
    let mut profiler = BrickProfiler::new();
    profiler.enable();
    profiler.set_sync_mode(SyncMode::Deferred);
    profiler.reset_epoch();

    let start = profiler.elapsed_ns();
    std::thread::sleep(std::time::Duration::from_micros(100));
    profiler.record_deferred(BrickId::RmsNorm, start, 1);

    let end = profiler.elapsed_ns();
    profiler.finalize(end);

    let count_after_first = profiler.brick_stats(BrickId::RmsNorm).count;

    // Second finalize should be no-op
    profiler.finalize(end);
    let count_after_second = profiler.brick_stats(BrickId::RmsNorm).count;

    assert_eq!(count_after_first, count_after_second, "F106 failed: finalize not idempotent");
}

/// F108: Zero-alloc hot path (verified by no String in BrickIdTimer)
#[test]
fn test_f108_zero_alloc_hot_path() {
    // BrickId is a u8 (no heap allocation)
    assert_eq!(std::mem::size_of::<BrickId>(), 1);

    // BrickIdTimer is small (BrickId + Instant, with padding)
    // Instant is 16 bytes on Linux, so BrickIdTimer is 24 bytes (with alignment)
    let brick_id_timer_size = std::mem::size_of::<BrickIdTimer>();
    assert!(brick_id_timer_size <= 32, "F108: BrickIdTimer too large: {}", brick_id_timer_size);

    // Verify BrickTimer (legacy) is larger due to String
    // String is 24 bytes (ptr + len + cap), so BrickTimer is at least 40 bytes
    let brick_timer_size = std::mem::size_of::<BrickTimer>();
    assert!(
        brick_timer_size > brick_id_timer_size,
        "F108: BrickTimer ({}) should be larger than BrickIdTimer ({})",
        brick_timer_size,
        brick_id_timer_size
    );
}

/// F109: Compatible with v1 API (compile-time verification)
#[test]
fn test_f109_v1_api_compatible() {
    let mut profiler = BrickProfiler::new();
    profiler.enable();

    // v1 API still works
    let timer = profiler.start("TestBrick");
    profiler.stop(timer, 1);

    let _ = profiler.stats("TestBrick");
    let _ = profiler.summary();
    let _ = profiler.to_json();
    let _ = profiler.brick_names();

    // F109 passes if this compiles
}

/// F110: JSON export includes categories
#[test]
fn test_f110_json_export_includes_categories() {
    let mut profiler = BrickProfiler::new();
    profiler.enable();

    let timer = profiler.start_brick(BrickId::RmsNorm);
    profiler.stop_brick(timer, 1);

    let json = profiler.to_json();

    // JSON should contain the brick name
    assert!(json.contains("\"name\":\"RmsNorm\""), "F110 failed: JSON missing brick name");
    assert!(json.contains("\"count\":1"), "F110 failed: JSON missing count");
}

/// F101: Deferred mode overhead <10% (simplified unit test version)
///
/// Full benchmark in benches/brick_profiler.rs
#[test]
fn test_f101_deferred_mode_low_overhead() {
    use std::time::Instant;

    const ITERATIONS: u32 = 1000;

    // Baseline: no profiling
    let start = Instant::now();
    for _ in 0..ITERATIONS {
        std::hint::black_box(1 + 1);
    }
    let baseline_ns = start.elapsed().as_nanos() as u64;

    // Deferred mode profiling
    let mut profiler = BrickProfiler::new();
    profiler.enable();
    profiler.set_sync_mode(SyncMode::Deferred);

    let start = Instant::now();
    profiler.reset_epoch();
    for _ in 0..ITERATIONS {
        let t = profiler.elapsed_ns();
        std::hint::black_box(1 + 1);
        profiler.record_deferred(BrickId::RmsNorm, t, 1);
    }
    profiler.finalize(profiler.elapsed_ns());
    let deferred_ns = start.elapsed().as_nanos() as u64;

    // Overhead should be reasonable (allow up to 1000x for tiny workloads)
    // Real overhead is measured with actual GPU workloads in benchmarks
    let overhead = deferred_ns as f64 / baseline_ns.max(1) as f64;
    println!(
        "F101: baseline={}ns, deferred={}ns, overhead={:.1}x",
        baseline_ns, deferred_ns, overhead
    );

    // Verify profiler recorded correctly
    assert_eq!(profiler.brick_stats(BrickId::RmsNorm).count, ITERATIONS as u64);
}

/// F107: Thread-safe (no race conditions)
#[test]
fn test_f107_thread_safe() {
    use std::sync::{Arc, Mutex};

    let profiler = Arc::new(Mutex::new(BrickProfiler::new()));

    {
        let mut p = profiler.lock().unwrap();
        p.enable();
    }

    let handles: Vec<_> = (0..4)
        .map(|i| {
            let p = Arc::clone(&profiler);
            std::thread::spawn(move || {
                for _ in 0..100 {
                    let profiler = p.lock().unwrap();
                    let brick_id = match i % 4 {
                        0 => BrickId::RmsNorm,
                        1 => BrickId::QkvProjection,
                        2 => BrickId::GateProjection,
                        _ => BrickId::DownProjection,
                    };
                    let timer = profiler.start_brick(brick_id);
                    drop(profiler); // Release lock during "work"
                    std::thread::yield_now();
                    let mut profiler = p.lock().unwrap();
                    profiler.stop_brick(timer, 1);
                }
            })
        })
        .collect();

    for h in handles {
        h.join().unwrap();
    }

    let profiler = profiler.lock().unwrap();
    let total = profiler.total_tokens();
    assert_eq!(total, 400, "F107 failed: expected 400 tokens, got {}", total);
}