polylane 0.15.0

// Copyright 2025 Gabriel Bjørnager Jensen.
//
// This Source Code Form is subject to the terms of
// the Mozilla Public License, v. 2.0. If a copy of
// the MPL was not distributed with this file, you
// can obtain one at:
// <https://mozilla.org/MPL/2.0/>.

use crate::ValidLayout;
#[allow(unused_imports)]
use crate::detail::DataKind;
use crate::mask::{Mask, MaskLayout};
use crate::num::SimdFloat;
use crate::simd::{Simd, SimdLayout, SimdScalar};

use core::cmp::Ordering;
use core::num::FpCategory;
use multitype::{Float, Uint};

#[cfg(target_arch = "x86")]
#[allow(unused_imports)]
use core::arch::x86::*;

#[cfg(target_arch = "x86_64")]
#[allow(unused_imports)]
use core::arch::x86_64::*;

impl<T, const N: usize> crate::num::seal::SimdFloat for Simd<T, N>
where
	T: SimdScalar,

	SimdLayout<T, N>: ValidLayout,
{}

impl<T, const N: usize> SimdFloat<T, N> for Simd<T, N>
where
	T: SimdScalar + Float<Bits: SimdScalar + Uint<Int: SimdScalar>>,

	SimdLayout<T, N>: ValidLayout,
{
	const EPSILON:         Self = Self::splat(T::EPSILON);

	const MIN:             Self = Self::splat(T::MIN);
	const MIN_POSITIVE:    Self = Self::splat(T::MIN_POSITIVE);
	const MAX:             Self = Self::splat(T::MAX);

	const INFINITY:        Self = Self::splat(T::INFINITY);
	const NEG_INFINITY:    Self = Self::splat(T::NEG_INFINITY);

	const NAN:             Self = Self::splat(T::NAN);

	const E:               Self = Self::splat(T::E);
	const PI:              Self = Self::splat(T::PI);
	const TAU:             Self = Self::splat(T::TAU);

	const LOG2_10:         Self = Self::splat(T::LOG2_10);
	const LOG2_E:          Self = Self::splat(T::LOG2_E);
	const LN_2:            Self = Self::splat(T::LN_2);
	const LN_10:           Self = Self::splat(T::LN_10);
	const LOG10_2:         Self = Self::splat(T::LOG10_2);
	const LOG10_E:         Self = Self::splat(T::LOG10_E);

	const SQRT_2:          Self = Self::splat(T::SQRT_2);

	const FRAC_1_PI:       Self = Self::splat(T::FRAC_1_PI);
	const FRAC_1_SQRT_2:   Self = Self::splat(T::FRAC_1_SQRT_2);
	const FRAC_2_PI:       Self = Self::splat(T::FRAC_2_PI);
	const FRAC_2_SQRT_PI:  Self = Self::splat(T::FRAC_2_SQRT_PI);
	const FRAC_PI_2:       Self = Self::splat(T::FRAC_PI_2);
	const FRAC_PI_3:       Self = Self::splat(T::FRAC_PI_3);
	const FRAC_PI_4:       Self = Self::splat(T::FRAC_PI_4);
	const FRAC_PI_6:       Self = Self::splat(T::FRAC_PI_6);
	const FRAC_PI_8:       Self = Self::splat(T::FRAC_PI_8);

	const ZERO:       Self = Self::splat(T::ZERO);
	const ONE:        Self = Self::splat(T::ONE);

	#[inline(always)]
	fn from_bits(bits: Simd<T::Bits, N>) -> Self
	where
		SimdLayout<T::Bits, N>: ValidLayout,
	{
		unsafe { bits.transmute() }
	}

	#[cfg(feature = "std")]
	#[inline]
	fn mul_add(mut self, mul: Self, add: Self) -> Self {
		let mut i = 0;
		while i < N {
			let this = &mut self[i];
			let mul  = mul[i];
			let add  = add[i];

			*this = this.clamp(mul, add);
			i += 1;
		}

		self
	}

	#[inline]
	fn recip(self) -> Self {
		Self::splat(T::from(1u8)) / self
	}

	#[cfg(feature = "std")]
	#[inline]
	fn div_euclid(self, rhs: Self) -> Self {
		self.zip_with(rhs, T::div_euclid)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn rem_euclid(self, rhs: Self) -> Self {
		self.zip_with(rhs, T::rem_euclid)
	}

	#[inline]
	fn abs(self) -> Self {
		self.map(T::abs)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn powf(self, rhs: Self) -> Self {
		self.zip_with(rhs, T::powf)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn powi(self, rhs: Simd<i32, N>) -> Self
	where
		SimdLayout<i32, N>: ValidLayout,
	{
		self.zip_with(rhs, T::powi)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn exp(self) -> Self {
		self.map(T::exp)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn exp_m1(self) -> Self {
		self.map(T::exp_m1)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn exp2(self) -> Self {
		self.map(T::exp2)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn sqrt(self) -> Self {
		self.map(T::sqrt)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn cbrt(self) -> Self {
		self.map(T::cbrt)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn log(self, rhs: Self) -> Self {
		self.zip_with(rhs, T::log)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn ln(self) -> Self {
		self.map(T::ln)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn ln_1p(self) -> Self {
		self.map(T::ln_1p)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn log2(self) -> Self {
		self.map(T::log2)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn log10(self) -> Self {
		self.map(T::log10)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn hypot(self, rhs: Self) -> Self {
		self.zip_with(rhs, T::hypot)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn sin(self) -> Self {
		self.map(T::sin)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn cos(self) -> Self {
		self.map(T::cos)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn tan(self) -> Self {
		self.map(T::tan)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn asin(self) -> Self {
		self.map(T::asin)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn acos(self) -> Self {
		self.map(T::acos)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn atan(self) -> Self {
		self.map(T::atan)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn atan2(self, rhs: Self) -> Self {
		self.zip_with(rhs, T::atan2)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn sinh(self) -> Self {
		self.map(T::sinh)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn cosh(self) -> Self {
		self.map(T::cosh)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn tanh(self) -> Self {
		self.map(T::tanh)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn asinh(self) -> Self {
		self.map(T::asinh)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn acosh(self) -> Self {
		self.map(T::acosh)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn atanh(self) -> Self {
		self.map(T::atanh)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn round(self) -> Self {
		self.map(T::round)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn round_ties_even(self) -> Self {
		self.map(T::round_ties_even)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn floor(self) -> Self {
		match (T::DATA_KIND, N) {
			#[cfg(target_feature = "sse4.1")]
			(DataKind::F32, 4) => unsafe {
				let lhs = self.to_native();

				let result = _mm_floor_ps(lhs);
				Self::from_native(result)
			}

			#[cfg(target_feature = "avx")]
			(DataKind::F32, 8) => unsafe {
				let lhs = self.to_native();

				let result = _mm256_floor_ps(lhs);
				Self::from_native(result)
			}

			#[cfg(target_feature = "sse4.1")]
			(DataKind::F64, 2) => unsafe {
				let lhs = self.to_native();

				let result = _mm_floor_pd(lhs);
				Self::from_native(result)
			}

			#[cfg(target_feature = "avx")]
			(DataKind::F64, 4) => unsafe {
				let lhs = self.to_native();

				let result = _mm256_floor_pd(lhs);
				Self::from_native(result)
			}

			_ => {
				self.map(T::floor)
			}
		}
	}

	#[cfg(feature = "std")]
	#[inline]
	fn ceil(self) -> Self {
		match (T::DATA_KIND, N) {
			#[cfg(target_feature = "sse4.1")]
			(DataKind::F32, 4) => unsafe {
				let lhs = self.to_native();

				let result = _mm_ceil_ps(lhs);
				Self::from_native(result)
			}

			#[cfg(target_feature = "avx")]
			(DataKind::F32, 8) => unsafe {
				let lhs = self.to_native();

				let result = _mm256_ceil_ps(lhs);
				Self::from_native(result)
			}

			#[cfg(target_feature = "sse4.1")]
			(DataKind::F64, 2) => unsafe {
				let lhs = self.to_native();

				let result = _mm_ceil_pd(lhs);
				Self::from_native(result)
			}

			#[cfg(target_feature = "avx")]
			(DataKind::F64, 4) => unsafe {
				let lhs = self.to_native();

				let result = _mm256_ceil_pd(lhs);
				Self::from_native(result)
			}

			_ => {
				self.map(T::ceil)
			}
		}
	}

	#[cfg(feature = "std")]
	#[inline]
	fn trunc(self) -> Self {
		self.map(T::trunc)
	}

	#[cfg(feature = "std")]
	#[inline]
	fn fract(self) -> Self {
		self.map(T::fract)
	}

	#[inline]
	fn to_radians(self) -> Self {
		let factor = Self::PI / Self::splat(T::from(180u8));
		self * factor
	}

	#[inline]
	fn to_degrees(self) -> Self {
		let factor = Self::splat(T::from(180u8)) / Self::PI;
		self * factor
	}

	#[inline]
	fn next_down(self) -> Self {
		self.map(T::next_down)
	}

	#[inline]
	fn next_up(self) -> Self {
		self.map(T::next_up)
	}

	#[inline]
	fn copysign(self, rhs: Self) -> Self {
		self.zip_with(rhs, T::copysign)
	}

	#[inline]
	fn midpoint(self, rhs: Self) -> Self {
		self.zip_with(rhs, T::midpoint)
	}

	#[inline]
	fn clamp(mut self, min: Self, max: Self) -> Self {
		let mut i = 0;
		while i < N {
			let this = &mut self[i];
			let min  = min[i];
			let max  = max[i];

			*this = this.clamp(min, max);
			i += 1;
		}

		self
	}

	#[inline]
	fn min(self, rhs: Self) -> Self {
		self.zip_with(rhs, T::min)
	}

	#[inline]
	fn max(self, rhs: Self) -> Self {
		self.zip_with(rhs, T::max)
	}

	#[inline]
	fn signum(self) -> Self {
		self.map(T::signum)
	}

	#[inline]
	fn classify(self) -> [FpCategory; N] {
		self.to_array().map(T::classify)
	}

	#[inline]
	fn total_cmp(&self, rhs: &Self) -> [Ordering; N] {
		// `Equal` is just a placeholder.
		let mut result = [Ordering::Equal; N];

		for i in 0..N {
			let slot = &mut result[i];
			let lhs  = &self[i];
			let rhs  = &rhs[i];

			*slot = lhs.total_cmp(rhs);
		}

		result
	}

	#[inline]
	fn is_nan(self) -> Mask<N>
	where
		MaskLayout<N>: ValidLayout,
	{
		self.to_array().map(T::is_nan).into()
	}

	#[inline]
	fn is_infinite(self) -> Mask<N>
	where
		MaskLayout<N>: ValidLayout,
	{
		self.to_array().map(T::is_infinite).into()
	}

	#[inline]
	fn is_finite(self) -> Mask<N>
	where
		MaskLayout<N>: ValidLayout,
	{
		self.to_array().map(T::is_finite).into()
	}

	#[inline]
	fn is_normal(self) -> Mask<N>
	where
		MaskLayout<N>: ValidLayout,
	{
		self.to_array().map(T::is_normal).into()
	}

	#[inline]
	fn is_subnormal(self) -> Mask<N>
	where
		MaskLayout<N>: ValidLayout,
	{
		self.to_array().map(T::is_subnormal).into()
	}

	#[inline]
	fn is_sign_negative(self) -> Mask<N>
	where
		MaskLayout<N>: ValidLayout,
	{
		self.to_array().map(T::is_sign_negative).into()
	}

	#[inline]
	fn is_sign_positive(self) -> Mask<N>
	where
		MaskLayout<N>: ValidLayout,
	{
		self.to_array().map(T::is_sign_positive).into()
	}

	#[inline(always)]
	fn to_bits(self) -> Simd<T::Bits, N>
	where
		SimdLayout<T::Bits, N>: ValidLayout,
	{
		unsafe { self.transmute() }
	}
}