rustsim 0.0.1 - Docs.rs

//! GPU-accelerated batch stepping test.
//!
//! Demonstrates the SoA extract -> GPU/CPU kernel -> write-back pipeline.
//! With `--features cuda` and a CUDA device: runs on GPU.
//! Without: runs the same kernel logic on CPU as a fallback.

use rand::rngs::StdRng;
use rand::SeedableRng;
use rustsim::prelude::*;
use std::time::Instant;

const N: u64 = 1_000_000;

// --- Agent ---

#[derive(Debug, Clone)]
struct Particle {
    id: AgentId,
    x: f32,
    vx: f32,
}

impl Agent for Particle {
    fn id(&self) -> AgentId {
        self.id
    }
}

impl SoaExtractable for Particle {
    fn num_columns() -> usize {
        2
    }

    fn column_names() -> Vec<&'static str> {
        vec!["x", "vx"]
    }

    fn extract_row(&self, columns: &mut [Vec<f32>]) {
        columns[0].push(self.x);
        columns[1].push(self.vx);
    }

    fn write_back_row(&mut self, columns: &[&[f32]], row: usize) {
        self.x = columns[0][row];
        // vx is read-only for this kernel, but we could update it too
    }
}

// --- CPU kernel: x[i] += vx[i] for all i ---

fn integrate_cpu(columns: &mut [Vec<f32>], n: usize) {
    let (x_col, rest) = columns.split_at_mut(1);
    let x = &mut x_col[0];
    let vx = &rest[0];
    for i in 0..n {
        x[i] += vx[i];
    }
}

// --- Test ---

#[test]
fn gpu_accelerated_million_agents() {
    // Populate
    let mut store = HashMapStore::new();
    for i in 1..=N {
        store.insert(Particle {
            id: i,
            x: 0.0,
            vx: 0.001,
        });
    }

    let backend = detect_backend();
    eprintln!("[GPU test] Detected backend: {}", backend);

    // Run 10 batch steps
    let t0 = Instant::now();
    for step in 0..10 {
        let result = cpu_batch_step::<Particle, _, _>(&store, integrate_cpu);

        if step == 0 {
            eprintln!(
                "[GPU test] Step {} via {}: {} agents, kernel {} us",
                step, result.backend, result.agent_count, result.kernel_us
            );
        }
    }
    let total_ms = t0.elapsed().as_millis();
    let per_step = total_ms as f64 / 10.0;

    eprintln!(
        "[GPU test] 10 steps x {} agents in {} ms ({:.1} ms/step)",
        N, total_ms, per_step
    );

    // Verify: each agent moved 10 * 0.001 = 0.01
    let a1 = store.get(1).unwrap();
    let expected = 10.0 * 0.001f32;
    assert!(
        (a1.x - expected).abs() < 1e-5,
        "agent 1: expected x={}, got x={}",
        expected,
        a1.x
    );
    drop(a1);

    // Verify a mid-range agent
    let mid = store.get(500_000).unwrap();
    assert!(
        (mid.x - expected).abs() < 1e-5,
        "agent 500000: expected x={}, got x={}",
        expected,
        mid.x
    );
    drop(mid);

    eprintln!("[GPU test] Correctness verified.");

    // Timing assertion
    assert!(
        per_step < 5_000.0,
        "batch step should take < 5s (took {:.0} ms)",
        per_step
    );
}

/// Compare: CPU batch-step SoA vs standard per-agent stepping.
/// The SoA path should be competitive or faster due to better cache locality.
#[test]
fn soa_vs_aos_comparison() {
    // --- SoA path ---
    let mut store_soa = HashMapStore::new();
    for i in 1..=N {
        store_soa.insert(Particle {
            id: i,
            x: 0.0,
            vx: 0.001,
        });
    }

    let t_soa = Instant::now();
    for _ in 0..10 {
        cpu_batch_step::<Particle, _, _>(&store_soa, integrate_cpu);
    }
    let soa_ms = t_soa.elapsed().as_millis();

    // --- AoS path (standard per-agent stepping) ---
    type PM = StandardModel<
        rustsim_spaces::nothing::NothingSpace,
        Particle,
        HashMapStore<Particle>,
        (),
        StdRng,
        Fastest,
    >;

    fn particle_step(
        agent: &mut Particle,
        _ctx: &mut StepContext<
            '_,
            rustsim_spaces::nothing::NothingSpace,
            Particle,
            (),
            StdRng,
            Fastest,
        >,
    ) {
        agent.x += agent.vx;
    }

    let mut store_aos = HashMapStore::new();
    for i in 1..=N {
        store_aos.insert(Particle {
            id: i,
            x: 0.0,
            vx: 0.001,
        });
    }
    let mut model = PM::new(
        store_aos,
        rustsim_spaces::nothing::NothingSpace,
        Fastest::new(),
        (),
        StdRng::seed_from_u64(42),
        Some(Box::new(particle_step)),
        None,
        true,
    );

    let t_aos = Instant::now();
    model.step_n(10);
    let aos_ms = t_aos.elapsed().as_millis();

    eprintln!(
        "[SoA vs AoS] 10 steps x {} agents: SoA={} ms, AoS={} ms",
        N, soa_ms, aos_ms
    );

    // Both should produce the same result
    let expected = 10.0 * 0.001f32;
    let soa_x = store_soa.get(1).unwrap().x;
    let aos_x = model.agent(1).unwrap().x;
    assert!(
        (soa_x - expected).abs() < 1e-5,
        "SoA agent 1 wrong: {}",
        soa_x
    );
    assert!(
        (aos_x - expected).abs() < 1e-5,
        "AoS agent 1 wrong: {}",
        aos_x
    );

    eprintln!("[SoA vs AoS] Both paths produce correct results.");
}