use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use rustsim::prelude::*;
#[derive(Debug, Clone)]
struct Particle {
id: AgentId,
x: f32,
vx: f32,
}
impl Agent for Particle {
fn id(&self) -> AgentId {
self.id
}
}
impl SoaExtractable for Particle {
fn num_columns() -> usize {
2
}
fn column_names() -> Vec<&'static str> {
vec!["x", "vx"]
}
fn extract_row(&self, columns: &mut [Vec<f32>]) {
columns[0].push(self.x);
columns[1].push(self.vx);
}
fn write_back_row(&mut self, columns: &[&[f32]], row: usize) {
self.x = columns[0][row];
}
}
fn build_store(n: u64) -> HashMapStore<Particle> {
let mut store = HashMapStore::new();
for i in 1..=n {
store.insert(Particle {
id: i,
x: 0.0,
vx: 0.001,
});
}
store
}
fn advance(columns: &mut [Vec<f32>], n: usize) {
let (x_col, rest) = columns.split_at_mut(1);
let x = &mut x_col[0];
let vx = &rest[0];
for i in 0..n {
x[i] += vx[i];
}
}
#[inline(always)]
fn compute_heavy_step(x: f32, vx: f32) -> f32 {
let mut acc = x + vx;
for _ in 0..4 {
acc = acc.sin().mul_add(acc.cos(), vx);
acc = acc.tanh();
}
acc
}
fn advance_heavy(columns: &mut [Vec<f32>], n: usize) {
let (x_col, rest) = columns.split_at_mut(1);
let x = &mut x_col[0];
let vx = &rest[0];
for i in 0..n {
x[i] = compute_heavy_step(x[i], vx[i]);
}
}
#[cfg(feature = "rayon")]
fn advance_par(chunk_start: usize, slices: &mut [&mut [f32]]) {
let _ = chunk_start;
let (x_slice, rest) = slices.split_at_mut(1);
let x = &mut *x_slice[0];
let vx = &*rest[0];
for i in 0..x.len() {
x[i] += vx[i];
}
}
#[cfg(feature = "rayon")]
fn advance_par_heavy(chunk_start: usize, slices: &mut [&mut [f32]]) {
let _ = chunk_start;
let (x_slice, rest) = slices.split_at_mut(1);
let x = &mut *x_slice[0];
let vx = &*rest[0];
for i in 0..x.len() {
x[i] = compute_heavy_step(x[i], vx[i]);
}
}
fn bench_cpu_scaling(c: &mut Criterion) {
let mut group = c.benchmark_group("cpu_scaling");
group.sample_size(20);
for &n in &[10_000u64, 100_000, 1_000_000] {
let store = build_store(n);
group.throughput(Throughput::Elements(n));
group.bench_with_input(BenchmarkId::new("cpu_batch_step/mem", n), &n, |b, _| {
b.iter(|| {
let r = cpu_batch_step::<Particle, _, _>(&store, advance);
black_box(r.agent_count);
});
});
group.bench_with_input(BenchmarkId::new("cpu_batch_step/compute", n), &n, |b, _| {
b.iter(|| {
let r = cpu_batch_step::<Particle, _, _>(&store, advance_heavy);
black_box(r.agent_count);
});
});
#[cfg(feature = "rayon")]
{
let chunk_size = (n as usize / 16).max(1024);
group.bench_with_input(BenchmarkId::new("par_batch_step/mem", n), &n, |b, _| {
b.iter(|| {
let r = par_batch_step::<Particle, _, _>(&store, chunk_size, advance_par);
black_box(r.agent_count);
});
});
group.bench_with_input(BenchmarkId::new("par_batch_step/compute", n), &n, |b, _| {
b.iter(|| {
let r = par_batch_step::<Particle, _, _>(&store, chunk_size, advance_par_heavy);
black_box(r.agent_count);
});
});
}
}
group.finish();
}
criterion_group!(benches, bench_cpu_scaling);
criterion_main!(benches);