use crate::Backend;
use std::collections::VecDeque;
use std::marker::PhantomData;
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct BackendTolerance {
pub scalar_vs_simd: f32,
pub simd_vs_gpu: f32,
pub gpu_vs_gpu: f32,
}
impl Default for BackendTolerance {
fn default() -> Self {
Self { scalar_vs_simd: 0.0, simd_vs_gpu: 1e-5, gpu_vs_gpu: 1e-6 }
}
}
impl BackendTolerance {
#[must_use]
pub const fn strict() -> Self {
Self { scalar_vs_simd: 0.0, simd_vs_gpu: 0.0, gpu_vs_gpu: 0.0 }
}
#[must_use]
pub const fn relaxed() -> Self {
Self { scalar_vs_simd: 1e-6, simd_vs_gpu: 1e-4, gpu_vs_gpu: 1e-5 }
}
#[must_use]
pub fn for_backends(&self, a: Backend, b: Backend) -> f32 {
match (a, b) {
(Backend::Scalar, Backend::Scalar) => 0.0,
(
Backend::Scalar,
Backend::SSE2
| Backend::AVX
| Backend::AVX2
| Backend::AVX512
| Backend::NEON
| Backend::WasmSIMD
| Backend::Auto,
)
| (
Backend::SSE2
| Backend::AVX
| Backend::AVX2
| Backend::AVX512
| Backend::NEON
| Backend::WasmSIMD
| Backend::Auto,
Backend::Scalar,
) => self.scalar_vs_simd,
(Backend::GPU, Backend::GPU) => self.gpu_vs_gpu,
(
Backend::GPU,
Backend::Scalar
| Backend::SSE2
| Backend::AVX
| Backend::AVX2
| Backend::AVX512
| Backend::NEON
| Backend::WasmSIMD
| Backend::Auto,
)
| (
Backend::Scalar
| Backend::SSE2
| Backend::AVX
| Backend::AVX2
| Backend::AVX512
| Backend::NEON
| Backend::WasmSIMD
| Backend::Auto,
Backend::GPU,
) => self.simd_vs_gpu,
(
Backend::SSE2
| Backend::AVX
| Backend::AVX2
| Backend::AVX512
| Backend::NEON
| Backend::WasmSIMD
| Backend::Auto,
Backend::SSE2
| Backend::AVX
| Backend::AVX2
| Backend::AVX512
| Backend::NEON
| Backend::WasmSIMD
| Backend::Auto,
) => self.scalar_vs_simd,
}
}
}
#[derive(Debug, Clone)]
pub struct BackendSelector {
gpu_threshold: usize,
parallel_threshold: usize,
}
impl Default for BackendSelector {
fn default() -> Self {
Self { gpu_threshold: 100_000, parallel_threshold: 1_000 }
}
}
impl BackendSelector {
#[must_use]
pub const fn new(gpu_threshold: usize, parallel_threshold: usize) -> Self {
Self { gpu_threshold, parallel_threshold }
}
#[must_use]
pub const fn gpu_threshold(&self) -> usize {
self.gpu_threshold
}
#[must_use]
pub const fn parallel_threshold(&self) -> usize {
self.parallel_threshold
}
#[must_use]
pub fn select_for_size(&self, size: usize, gpu_available: bool) -> BackendCategory {
if size < self.parallel_threshold {
BackendCategory::SimdOnly
} else if size < self.gpu_threshold {
BackendCategory::SimdParallel
} else if gpu_available {
BackendCategory::Gpu
} else {
BackendCategory::SimdParallel }
}
#[must_use]
pub fn is_at_gpu_boundary(&self, size: usize) -> bool {
size == self.gpu_threshold || size == self.gpu_threshold - 1
}
#[must_use]
pub fn is_at_parallel_boundary(&self, size: usize) -> bool {
size == self.parallel_threshold || size == self.parallel_threshold - 1
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BackendCategory {
SimdOnly,
SimdParallel,
Gpu,
}
#[derive(Debug, Clone)]
pub struct SimulationTest {
pub backend: Backend,
pub input_size: usize,
pub cycle: u32,
pub seed: u64,
}
#[derive(Debug)]
pub struct HeijunkaScheduler {
queue: VecDeque<SimulationTest>,
backends: Vec<Backend>,
}
impl HeijunkaScheduler {
#[must_use]
pub fn new(
backends: Vec<Backend>,
input_sizes: Vec<usize>,
cycles_per_backend: u32,
master_seed: u64,
) -> Self {
let mut queue = VecDeque::new();
for size in &input_sizes {
for backend in &backends {
for cycle in 0..cycles_per_backend {
let seed = compute_seed(*backend, *size, cycle, master_seed);
queue.push_back(SimulationTest {
backend: *backend,
input_size: *size,
cycle,
seed,
});
}
}
}
Self { queue, backends: backends.clone() }
}
pub fn next_test(&mut self) -> Option<SimulationTest> {
self.queue.pop_front()
}
#[must_use]
pub fn remaining(&self) -> usize {
self.queue.len()
}
#[must_use]
pub fn backends(&self) -> &[Backend] {
&self.backends
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.queue.is_empty()
}
}
pub(crate) fn compute_seed(backend: Backend, size: usize, cycle: u32, master_seed: u64) -> u64 {
let backend_bits = backend as u64;
let size_bits = size as u64;
let cycle_bits = u64::from(cycle);
master_seed
.wrapping_add(backend_bits.wrapping_mul(0x9E37_79B9_7F4A_7C15))
.wrapping_add(size_bits.wrapping_mul(0x6A09_E667_BB67_AE85))
.wrapping_add(cycle_bits.wrapping_mul(0x3C6E_F372_FE94_F82B))
}
#[derive(Debug, Clone)]
pub struct SimTestConfigBuilder<S> {
seed: u64,
tolerance: BackendTolerance,
backends: Vec<Backend>,
input_sizes: Vec<usize>,
cycles: u32,
_state: PhantomData<S>,
}
pub struct NeedsSeed;
pub struct Ready;
impl Default for SimTestConfigBuilder<NeedsSeed> {
fn default() -> Self {
Self::new()
}
}
impl SimTestConfigBuilder<NeedsSeed> {
#[must_use]
pub fn new() -> Self {
Self {
seed: 0,
tolerance: BackendTolerance::default(),
backends: vec![Backend::Scalar, Backend::AVX2],
input_sizes: vec![100, 1_000, 10_000, 100_000],
cycles: 10,
_state: PhantomData,
}
}
#[must_use]
pub fn seed(self, seed: u64) -> SimTestConfigBuilder<Ready> {
SimTestConfigBuilder {
seed,
tolerance: self.tolerance,
backends: self.backends,
input_sizes: self.input_sizes,
cycles: self.cycles,
_state: PhantomData,
}
}
}
impl SimTestConfigBuilder<Ready> {
#[must_use]
pub fn tolerance(mut self, tolerance: BackendTolerance) -> Self {
self.tolerance = tolerance;
self
}
#[must_use]
pub fn backends(mut self, backends: Vec<Backend>) -> Self {
self.backends = backends;
self
}
#[must_use]
pub fn input_sizes(mut self, sizes: Vec<usize>) -> Self {
self.input_sizes = sizes;
self
}
#[must_use]
pub fn cycles(mut self, cycles: u32) -> Self {
self.cycles = cycles;
self
}
#[must_use]
pub fn build(self) -> SimTestConfig {
SimTestConfig {
seed: self.seed,
tolerance: self.tolerance,
backends: self.backends,
input_sizes: self.input_sizes,
cycles: self.cycles,
}
}
}
#[derive(Debug, Clone)]
pub struct SimTestConfig {
pub seed: u64,
pub tolerance: BackendTolerance,
pub backends: Vec<Backend>,
pub input_sizes: Vec<usize>,
pub cycles: u32,
}
impl SimTestConfig {
#[must_use]
pub fn builder() -> SimTestConfigBuilder<NeedsSeed> {
SimTestConfigBuilder::new()
}
#[must_use]
pub fn create_scheduler(&self) -> HeijunkaScheduler {
HeijunkaScheduler::new(
self.backends.clone(),
self.input_sizes.clone(),
self.cycles,
self.seed,
)
}
}
#[cfg(test)]
mod tests;
#[cfg(test)]
mod proptests;