rill-core 0.5.0-beta.1

//! # x86/x86_64 SIMD реализации
//!
//! Использует SSE2, SSE4.1, AVX, AVX2 и AVX512 инструкции для векторных операций.

#![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#![allow(unused_imports)]
#![allow(dead_code)]

use super::super::traits::*;
use crate::Transcendental;

// -----------------------------------------------------------------------------
// SIMD типы
// -----------------------------------------------------------------------------

/// Вектор из 4 элементов f32 (SSE)
#[derive(Copy, Clone, Debug, PartialEq)]
#[repr(transparent)]
pub struct F32x4([f32; 4]);

/// Вектор из 8 элементов f32 (AVX)
#[derive(Copy, Clone, Debug, PartialEq)]
#[repr(transparent)]
pub struct F32x8([f32; 8]);

/// Вектор из 16 элементов f32 (AVX512)
#[derive(Copy, Clone, Debug, PartialEq)]
#[repr(transparent)]
pub struct F32x16([f32; 16]);

/// Вектор из 2 элементов f64 (SSE2)
#[derive(Copy, Clone, Debug, PartialEq)]
#[repr(transparent)]
pub struct F64x2([f64; 2]);

/// Вектор из 4 элементов f64 (AVX)
#[derive(Copy, Clone, Debug, PartialEq)]
#[repr(transparent)]
pub struct F64x4([f64; 4]);

/// Вектор из 8 элементов f64 (AVX512)
#[derive(Copy, Clone, Debug, PartialEq)]
#[repr(transparent)]
pub struct F64x8([f64; 8]);

// -----------------------------------------------------------------------------
// Реализация Vector для F32x4
// -----------------------------------------------------------------------------

impl Vector<f32, 4> for F32x4 {
    fn splat(value: f32) -> Self {
        F32x4([value; 4])
    }

    fn load(slice: &[f32]) -> Self {
        let mut arr = [0.0; 4];
        arr.copy_from_slice(&slice[0..4]);
        F32x4(arr)
    }

    fn store(&self, slice: &mut [f32]) {
        slice[0..4].copy_from_slice(&self.0);
    }

    fn extract(&self, index: usize) -> f32 {
        self.0[index]
    }

    fn insert(&self, index: usize, value: f32) -> Self {
        let mut arr = self.0;
        arr[index] = value;
        F32x4(arr)
    }

    fn add(&self, other: &Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i] + other.0[i];
        }
        F32x4(arr)
    }

    fn sub(&self, other: &Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i] - other.0[i];
        }
        F32x4(arr)
    }

    fn mul(&self, other: &Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i] * other.0[i];
        }
        F32x4(arr)
    }

    fn div(&self, other: &Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i] / other.0[i];
        }
        F32x4(arr)
    }

    fn rem(&self, other: &Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i] % other.0[i];
        }
        F32x4(arr)
    }

    fn neg(&self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = -self.0[i];
        }
        F32x4(arr)
    }

    fn abs(&self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i].abs();
        }
        F32x4(arr)
    }

    fn min(&self, other: &Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i].min(other.0[i]);
        }
        F32x4(arr)
    }

    fn max(&self, other: &Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i].max(other.0[i]);
        }
        F32x4(arr)
    }

    fn clamp(&self, min: &Self, max: &Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i].clamp(min.0[i], max.0[i]);
        }
        F32x4(arr)
    }

}

impl VectorTranscendental<f32, 4> for F32x4 {
    fn sqrt(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].sqrt(); } F32x4(a) }
    fn exp(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].exp(); } F32x4(a) }
    fn ln(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].ln(); } F32x4(a) }
    fn sin(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].sin(); } F32x4(a) }
    fn cos(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].cos(); } F32x4(a) }
    fn tan(&self) -> Self { let mut a = [0.0; 4]; for i in 0..4 { a[i] = self.0[i].tan(); } F32x4(a) }
}

// Пока реализуем остальные типы как заглушки (скалярные версии)
// В будущем здесь будут настоящие SIMD инструкции через core::arch::x86_64

use std::ops::{Add, Div, Mul, Neg, Rem, Sub};

impl Add for F32x4 {
    type Output = Self;

    fn add(self, rhs: Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i] + rhs.0[i];
        }
        F32x4(arr)
    }
}

impl Sub for F32x4 {
    type Output = Self;

    fn sub(self, rhs: Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i] - rhs.0[i];
        }
        F32x4(arr)
    }
}

impl Mul for F32x4 {
    type Output = Self;

    fn mul(self, rhs: Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i] * rhs.0[i];
        }
        F32x4(arr)
    }
}

impl Div for F32x4 {
    type Output = Self;

    fn div(self, rhs: Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i] / rhs.0[i];
        }
        F32x4(arr)
    }
}

impl Rem for F32x4 {
    type Output = Self;

    fn rem(self, rhs: Self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = self.0[i] % rhs.0[i];
        }
        F32x4(arr)
    }
}

impl Neg for F32x4 {
    type Output = Self;

    fn neg(self) -> Self {
        let mut arr = [0.0; 4];
        for i in 0..4 {
            arr[i] = -self.0[i];
        }
        F32x4(arr)
    }
}

impl Default for F32x4 {
    fn default() -> Self {
        F32x4([0.0; 4])
    }
}

// -----------------------------------------------------------------------------
// Тесты
// -----------------------------------------------------------------------------

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_f32x4_splat() {
        let v = F32x4::splat(2.5);
        assert_eq!(v.extract(0), 2.5);
        assert_eq!(v.extract(3), 2.5);
    }

    #[test]
    fn test_f32x4_add() {
        let a = F32x4::splat(1.0);
        let b = F32x4::splat(2.0);
        let c = a + b;
        assert_eq!(c.extract(0), 3.0);
    }

    #[test]
    fn test_f32x4_mul() {
        let a = F32x4::splat(3.0);
        let b = F32x4::splat(4.0);
        let c = a * b;
        assert_eq!(c.extract(0), 12.0);
    }

    #[test]
    fn test_f32x4_sin() {
        let a = F32x4::splat(0.0);
        let b = a.sin();
        assert_eq!(b.extract(0), 0.0);
    }
}