use log::info;
use rayon::prelude::*;
use std::cell::UnsafeCell;
macro_rules! map_option {
($some_optional:expr, $value_identifier:ident => $value_transformation:expr) => {
match $some_optional {
Some($value_identifier) => Some($value_transformation),
None => None,
}
};
}
#[derive(Copy, Clone)]
pub(crate) struct UnsafeSlice<'a, T> {
slice: &'a [UnsafeCell<T>],
}
unsafe impl<T: Send + Sync> Send for UnsafeSlice<'_, T> {}
unsafe impl<T: Send + Sync> Sync for UnsafeSlice<'_, T> {}
impl<'a, T> UnsafeSlice<'a, T> {
pub fn new(slice: &'a mut [T]) -> Self {
let ptr = slice as *mut [T] as *const [UnsafeCell<T>];
Self {
slice: unsafe { &*ptr },
}
}
pub fn len(&self) -> usize {
self.slice.len()
}
#[allow(clippy::mut_from_ref)]
pub unsafe fn get_mut_unchecked(&self, i: usize) -> &mut T {
debug_assert!(i < self.len(), "index out of bounds");
unsafe { &mut *self.slice.get_unchecked(i).get() }
}
}
pub(crate) fn reserve_total<T>(vec: &mut Vec<T>, total_capacity: usize) {
if total_capacity > vec.capacity() {
vec.reserve(total_capacity - vec.capacity());
}
}
#[allow(unused)]
pub(crate) fn resize_and_fill<T: Clone + Send + Sync>(
vec: &mut Vec<T>,
new_len: usize,
value: T,
par: bool,
) {
if par {
par_resize_and_fill(vec, new_len, value);
} else {
seq_resize_and_fill(vec, new_len, value);
}
}
#[allow(unused)]
pub(crate) fn seq_resize_and_fill<T: Clone>(vec: &mut Vec<T>, new_len: usize, value: T) {
let old_len = vec.len();
vec.iter_mut()
.take(old_len.min(new_len))
.for_each(|v| *v = value.clone());
vec.resize(new_len, value);
}
#[allow(unused)]
pub(crate) fn par_resize_and_fill<T: Clone + Send + Sync>(
vec: &mut Vec<T>,
new_len: usize,
value: T,
) {
let old_len = vec.len();
vec.par_iter_mut()
.with_min_len(8)
.take(old_len.min(new_len))
.for_each(|v| *v = value.clone());
vec.resize(new_len, value);
}
pub struct ParallelPolicy {
pub min_task_size: usize,
pub tasks_per_thread: usize,
}
impl Default for ParallelPolicy {
fn default() -> Self {
Self {
min_task_size: 256,
tasks_per_thread: 8,
}
}
}
pub(crate) struct ChunkSize {
pub num_items: usize,
pub num_chunks: usize,
pub chunk_size: usize,
}
impl ChunkSize {
pub(crate) fn new(parallel_policy: &ParallelPolicy, num_items: usize) -> Self {
let num_threads = rayon::current_num_threads();
let equal_distribution = num_items / num_threads;
let chunk_size = if parallel_policy.min_task_size > equal_distribution {
equal_distribution
} else {
let num_tasks = parallel_policy.tasks_per_thread * num_threads;
(num_items / num_tasks).max(parallel_policy.min_task_size)
}
.max(16);
let num_chunks = if num_items % chunk_size == 0 {
num_items / chunk_size
} else {
num_items / chunk_size + 1
};
Self {
num_items,
num_chunks,
chunk_size,
}
}
pub(crate) fn log<S1: AsRef<str>, S2: AsRef<str>>(&self, item_name: S1, purpose: S2) {
info!(
"Splitting {} {} into {} chunks (with {} particles each) for {}",
self.num_items,
item_name.as_ref(),
self.num_chunks,
self.chunk_size,
purpose.as_ref(),
);
}
pub(crate) fn with_log<S1: AsRef<str>, S2: AsRef<str>>(
self,
item_name: S1,
purpose: S2,
) -> Self {
self.log(item_name, purpose);
self
}
}
#[allow(dead_code)]
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(crate) enum SimdFeatures {
Avx2Fma,
Neon,
}
pub(crate) fn detect_simd_support() -> Option<SimdFeatures> {
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
{
if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
return Some(SimdFeatures::Avx2Fma);
}
}
#[cfg(target_arch = "aarch64")]
{
if std::arch::is_aarch64_feature_detected!("neon") {
return Some(SimdFeatures::Neon);
}
}
None
}