use crate::gto::prelude_dev::*;
use num::traits::{MulAdd, NumAssignOps};
use num::{Num, Zero};
#[repr(align(64))]
#[derive(Clone, Debug, Copy)]
pub struct FpSimd<T: Copy, const N: usize = SIMDD>(pub [T; N]);
#[allow(non_camel_case_types)]
pub type f64simd = FpSimd<f64, SIMDD>;
impl<T: Copy, const N: usize> Index<usize> for FpSimd<T, N> {
type Output = T;
#[inline(always)]
fn index(&self, index: usize) -> &Self::Output {
&self.0[index]
}
}
impl<T: Copy, const N: usize> IndexMut<usize> for FpSimd<T, N> {
#[inline(always)]
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.0[index]
}
}
impl<T: Zero + Copy, const N: usize> FpSimd<T, N> {
#[inline(always)]
pub fn zero() -> Self {
FpSimd([T::zero(); N])
}
#[inline(always)]
#[allow(clippy::uninit_assumed_init)]
#[allow(invalid_value)]
pub unsafe fn uninit() -> Self {
core::mem::MaybeUninit::uninit().assume_init()
}
#[inline(always)]
pub const fn splat(val: T) -> Self {
FpSimd([val; N])
}
#[inline(always)]
pub fn fill(&mut self, val: T) {
self.0 = [val; N];
}
#[inline]
pub fn map<F>(&self, f: F) -> Self
where
F: Fn(T) -> T,
{
FpSimd(self.0.map(f))
}
}
#[duplicate_item(
Trait trait_fn;
[Add] [add];
[Sub] [sub];
[Mul] [mul];
[Div] [div];
)]
mod impl_trait_for_f64simd {
use super::*;
impl<T: Num + Copy> Trait for FpSimd<T, SIMDD> {
type Output = Self;
#[inline(always)]
fn trait_fn(self, rhs: Self) -> Self::Output {
FpSimd([
T::trait_fn(self[0], rhs[0]),
T::trait_fn(self[1], rhs[1]),
T::trait_fn(self[2], rhs[2]),
T::trait_fn(self[3], rhs[3]),
T::trait_fn(self[4], rhs[4]),
T::trait_fn(self[5], rhs[5]),
T::trait_fn(self[6], rhs[6]),
T::trait_fn(self[7], rhs[7]),
])
}
}
impl<T: Num + Copy> Trait<T> for FpSimd<T, SIMDD> {
type Output = Self;
#[inline(always)]
fn trait_fn(self, rhs: T) -> Self::Output {
FpSimd([
T::trait_fn(self[0], rhs),
T::trait_fn(self[1], rhs),
T::trait_fn(self[2], rhs),
T::trait_fn(self[3], rhs),
T::trait_fn(self[4], rhs),
T::trait_fn(self[5], rhs),
T::trait_fn(self[6], rhs),
T::trait_fn(self[7], rhs),
])
}
}
}
#[duplicate_item(
Trait trait_fn;
[AddAssign] [add_assign];
[SubAssign] [sub_assign];
[MulAssign] [mul_assign];
[DivAssign] [div_assign];
)]
impl<T: NumAssignOps + Copy> Trait for FpSimd<T, SIMDD> {
#[inline(always)]
fn trait_fn(&mut self, rhs: Self) {
self[0].trait_fn(rhs[0]);
self[1].trait_fn(rhs[1]);
self[2].trait_fn(rhs[2]);
self[3].trait_fn(rhs[3]);
self[4].trait_fn(rhs[4]);
self[5].trait_fn(rhs[5]);
self[6].trait_fn(rhs[6]);
self[7].trait_fn(rhs[7]);
}
}
impl<T: Neg<Output = T> + Copy> Neg for FpSimd<T, SIMDD> {
type Output = Self;
#[inline(always)]
fn neg(self) -> Self::Output {
FpSimd([-self[0], -self[1], -self[2], -self[3], -self[4], -self[5], -self[6], -self[7]])
}
}
impl<T> FpSimd<T, SIMDD>
where
T: MulAdd<Output = T> + Copy,
{
#[inline(always)]
pub fn mul_add(self, b: FpSimd<T, SIMDD>, c: FpSimd<T, SIMDD>) -> FpSimd<T, SIMDD> {
FpSimd([
self[0].mul_add(b[0], c[0]),
self[1].mul_add(b[1], c[1]),
self[2].mul_add(b[2], c[2]),
self[3].mul_add(b[3], c[3]),
self[4].mul_add(b[4], c[4]),
self[5].mul_add(b[5], c[5]),
self[6].mul_add(b[6], c[6]),
self[7].mul_add(b[7], c[7]),
])
}
#[inline(always)]
pub fn fma_from(&mut self, b: FpSimd<T, SIMDD>, c: FpSimd<T, SIMDD>) {
*self = FpSimd([
b[0].mul_add(c[0], self[0]),
b[1].mul_add(c[1], self[1]),
b[2].mul_add(c[2], self[2]),
b[3].mul_add(c[3], self[3]),
b[4].mul_add(c[4], self[4]),
b[5].mul_add(c[5], self[5]),
b[6].mul_add(c[6], self[6]),
b[7].mul_add(c[7], self[7]),
])
}
}
impl f64simd {
#[inline(always)]
pub fn is_gto_zero(&self) -> bool {
for i in 0..SIMDD {
if self[i].abs() > GTOZERO {
return false;
}
}
true
}
}
#[repr(align(64))]
#[derive(Clone, Debug, Copy)]
pub struct Blk<T: Copy, const NLANE: usize>(pub [FpSimd<T, SIMDD>; NLANE]);
#[allow(non_camel_case_types)]
pub type f64blk<const NLANE: usize> = Blk<f64, NLANE>;
#[allow(non_camel_case_types)]
pub type c64blk<const NLANE: usize> = Blk<Complex<f64>, NLANE>;
impl<T: Copy, const NLANE: usize> Index<usize> for Blk<T, NLANE> {
type Output = T;
#[inline(always)]
fn index(&self, index: usize) -> &Self::Output {
&self.0[index / SIMDD][index % SIMDD]
}
}
impl<T: Copy, const NLANE: usize> IndexMut<usize> for Blk<T, NLANE> {
#[inline(always)]
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.0[index / SIMDD][index % SIMDD]
}
}
impl<T: Zero + Copy, const NLANE: usize> Blk<T, NLANE> {
#[inline(always)]
#[allow(clippy::uninit_assumed_init)]
#[allow(invalid_value)]
pub unsafe fn uninit() -> Self {
Blk(MaybeUninit::uninit().assume_init())
}
#[inline(always)]
pub const fn splat(val: T) -> Self {
Blk([FpSimd::splat(val); NLANE])
}
#[inline(always)]
pub fn fill(&mut self, val: T) {
for i in 0..NLANE {
self.0[i] = FpSimd::splat(val);
}
}
#[inline(always)]
pub unsafe fn read_ensure(&mut self, src: &[T]) {
for i in 0..NLANE {
self.0[i] = FpSimd([
src[i * SIMDD],
src[i * SIMDD + 1],
src[i * SIMDD + 2],
src[i * SIMDD + 3],
src[i * SIMDD + 4],
src[i * SIMDD + 5],
src[i * SIMDD + 6],
src[i * SIMDD + 7],
]);
}
}
#[inline]
pub fn read(&mut self, src: &[T]) {
let blksize = NLANE * SIMDD;
let len_slc = if src.len() < blksize { src.len() } else { blksize };
if len_slc >= blksize {
unsafe { self.read_ensure(src) }
} else {
for i in 0..len_slc {
self[i] = src[i];
}
}
}
#[inline(always)]
pub unsafe fn write_ensure(&self, dst: &mut [T]) {
for i in 0..NLANE {
dst[i * SIMDD] = self.0[i][0];
dst[i * SIMDD + 1] = self.0[i][1];
dst[i * SIMDD + 2] = self.0[i][2];
dst[i * SIMDD + 3] = self.0[i][3];
dst[i * SIMDD + 4] = self.0[i][4];
dst[i * SIMDD + 5] = self.0[i][5];
dst[i * SIMDD + 6] = self.0[i][6];
dst[i * SIMDD + 7] = self.0[i][7];
}
}
#[inline]
pub fn write(&self, dst: &mut [T]) {
let blksize = NLANE * SIMDD;
let len_slc = if dst.len() < blksize { dst.len() } else { blksize };
if len_slc >= blksize {
unsafe { self.write_ensure(dst) }
} else {
for i in 0..len_slc {
dst[i] = self[i];
}
}
}
}
impl<T: Copy, const NLANE: usize> Blk<T, NLANE> {
#[inline(always)]
pub const fn as_simdd_slice(&self) -> &[FpSimd<T, SIMDD>; NLANE] {
&self.0
}
#[inline(always)]
pub fn as_simdd_slice_mut(&mut self) -> &mut [FpSimd<T, SIMDD>; NLANE] {
&mut self.0
}
#[inline(always)]
pub const fn get_simdd(&self, index: usize) -> FpSimd<T, SIMDD> {
let slice = self.as_simdd_slice();
slice[index]
}
#[inline(always)]
pub fn get_simdd_mut(&mut self, index: usize) -> &mut FpSimd<T, SIMDD> {
let slice = self.as_simdd_slice_mut();
&mut slice[index]
}
}
pub fn gto_l_iter(l: usize) -> Box<dyn Iterator<Item = (usize, usize, usize)>> {
match l {
0 => Box::new(std::iter::once((0, 0, 0))),
1 => Box::new([(1, 0, 0), (0, 1, 0), (0, 0, 1)].into_iter()),
2 => Box::new([(2, 0, 0), (1, 1, 0), (1, 0, 1), (0, 2, 0), (0, 1, 1), (0, 0, 2)].into_iter()),
3 => Box::new(
[(3, 0, 0), (2, 1, 0), (2, 0, 1), (1, 2, 0), (1, 1, 1), (1, 0, 2), (0, 3, 0), (0, 2, 1), (0, 1, 2), (0, 0, 3)].into_iter(),
),
4 => Box::new(
[
(4, 0, 0),
(3, 1, 0),
(3, 0, 1),
(2, 2, 0),
(2, 1, 1),
(2, 0, 2),
(1, 3, 0),
(1, 2, 1),
(1, 1, 2),
(1, 0, 3),
(0, 4, 0),
(0, 3, 1),
(0, 2, 2),
(0, 1, 3),
(0, 0, 4),
]
.into_iter(),
),
5 => Box::new(
[
(5, 0, 0),
(4, 1, 0),
(4, 0, 1),
(3, 2, 0),
(3, 1, 1),
(3, 0, 2),
(2, 3, 0),
(2, 2, 1),
(2, 1, 2),
(2, 0, 3),
(1, 4, 0),
(1, 3, 1),
(1, 2, 2),
(1, 1, 3),
(1, 0, 4),
(0, 5, 0),
(0, 4, 1),
(0, 3, 2),
(0, 2, 3),
(0, 1, 4),
(0, 0, 5),
]
.into_iter(),
),
_ => Box::new((0..=l).flat_map(move |lx| (0..=(l - lx)).map(move |ly| (lx, ly, l - lx - ly)))),
}
}
pub fn gto_nabla1_simdd(f1: &mut [[f64simd; 3]], f0: &[[f64simd; 3]], l: usize, alpha: f64) {
let a2 = FpSimd::<f64>::splat(-2.0 * alpha);
f1[0][X] = a2 * f0[1][X];
f1[0][Y] = a2 * f0[1][Y];
f1[0][Z] = a2 * f0[1][Z];
for i in 1..=l {
let i_f64 = FpSimd::<f64>::splat(i as f64);
f1[i][X] = i_f64 * f0[i - 1][X] + a2 * f0[i + 1][X];
f1[i][Y] = i_f64 * f0[i - 1][Y] + a2 * f0[i + 1][Y];
f1[i][Z] = i_f64 * f0[i - 1][Z] + a2 * f0[i + 1][Z];
}
}
pub fn gto_x1_simdd(f1: &mut [[f64simd; 3]], f0: &[[f64simd; 3]], l: usize, ri: [f64; 3]) {
let ri_x = f64simd::splat(ri[X]);
let ri_y = f64simd::splat(ri[Y]);
let ri_z = f64simd::splat(ri[Z]);
for i in 0..=l {
f1[i][X] = f0[i][X].mul_add(ri_x, f0[i + 1][X]);
f1[i][Y] = f0[i][Y].mul_add(ri_y, f0[i + 1][Y]);
f1[i][Z] = f0[i][Z].mul_add(ri_z, f0[i + 1][Z]);
}
}
pub fn gto_r_simdd(f1: &mut [[f64simd; 3]], f0: &[[f64simd; 3]], l: usize) {
for i in 0..=l {
f1[i][X] = f0[i + 1][X];
f1[i][Y] = f0[i + 1][Y];
f1[i][Z] = f0[i + 1][Z];
}
}
pub fn gto_prim_exp<const NLANE: usize>(eprim: &mut [f64blk<NLANE>], coord: &[f64blk<NLANE>; 3], alpha: &[f64], fac: f64, nprim: usize) {
let mut rr = unsafe { f64blk::<NLANE>::uninit() };
for g in 0..NLANE {
let x = coord[0].get_simdd(g);
let y = coord[1].get_simdd(g);
let z = coord[2].get_simdd(g);
*rr.get_simdd_mut(g) = x * x + y * y + z * z;
}
for p in 0..nprim {
for g in 0..NLANE {
let arr = rr.get_simdd(g) * alpha[p];
*eprim[p].get_simdd_mut(g) = (-arr).map(f64::exp) * fac;
}
}
}
#[test]
fn test_blkf64_uninit() {
#[allow(clippy::uninit_assumed_init)]
#[allow(invalid_value)]
let v: [[f64blk<48>; 10]; 10] = unsafe { [[f64blk::uninit(); 10]; 10] };
println!("{:?}", v);
}