#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
use super::exp_log::{exp_f32, exp_f64, log_f32, log_f64};
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn tanh_f32(x: __m256) -> __m256 {
let two = _mm256_set1_ps(2.0);
let one = _mm256_set1_ps(1.0);
let exp2x = exp_f32(_mm256_mul_ps(two, x));
let num = _mm256_sub_ps(exp2x, one);
let den = _mm256_add_ps(exp2x, one);
_mm256_div_ps(num, den)
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn tanh_f64(x: __m256d) -> __m256d {
let two = _mm256_set1_pd(2.0);
let one = _mm256_set1_pd(1.0);
let exp2x = exp_f64(_mm256_mul_pd(two, x));
let num = _mm256_sub_pd(exp2x, one);
let den = _mm256_add_pd(exp2x, one);
_mm256_div_pd(num, den)
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn sinh_f32(x: __m256) -> __m256 {
let half = _mm256_set1_ps(0.5);
let exp_x = exp_f32(x);
let exp_neg_x = exp_f32(_mm256_sub_ps(_mm256_setzero_ps(), x));
_mm256_mul_ps(half, _mm256_sub_ps(exp_x, exp_neg_x))
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn sinh_f64(x: __m256d) -> __m256d {
let half = _mm256_set1_pd(0.5);
let exp_x = exp_f64(x);
let exp_neg_x = exp_f64(_mm256_sub_pd(_mm256_setzero_pd(), x));
_mm256_mul_pd(half, _mm256_sub_pd(exp_x, exp_neg_x))
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn cosh_f32(x: __m256) -> __m256 {
let half = _mm256_set1_ps(0.5);
let exp_x = exp_f32(x);
let exp_neg_x = exp_f32(_mm256_sub_ps(_mm256_setzero_ps(), x));
_mm256_mul_ps(half, _mm256_add_ps(exp_x, exp_neg_x))
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn cosh_f64(x: __m256d) -> __m256d {
let half = _mm256_set1_pd(0.5);
let exp_x = exp_f64(x);
let exp_neg_x = exp_f64(_mm256_sub_pd(_mm256_setzero_pd(), x));
_mm256_mul_pd(half, _mm256_add_pd(exp_x, exp_neg_x))
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn asinh_f32(x: __m256) -> __m256 {
let one = _mm256_set1_ps(1.0);
let x2 = _mm256_mul_ps(x, x);
let sqrt_term = _mm256_sqrt_ps(_mm256_add_ps(x2, one));
log_f32(_mm256_add_ps(x, sqrt_term))
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn asinh_f64(x: __m256d) -> __m256d {
let one = _mm256_set1_pd(1.0);
let x2 = _mm256_mul_pd(x, x);
let sqrt_term = _mm256_sqrt_pd(_mm256_add_pd(x2, one));
log_f64(_mm256_add_pd(x, sqrt_term))
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn acosh_f32(x: __m256) -> __m256 {
let one = _mm256_set1_ps(1.0);
let x2 = _mm256_mul_ps(x, x);
let sqrt_term = _mm256_sqrt_ps(_mm256_sub_ps(x2, one));
log_f32(_mm256_add_ps(x, sqrt_term))
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn acosh_f64(x: __m256d) -> __m256d {
let one = _mm256_set1_pd(1.0);
let x2 = _mm256_mul_pd(x, x);
let sqrt_term = _mm256_sqrt_pd(_mm256_sub_pd(x2, one));
log_f64(_mm256_add_pd(x, sqrt_term))
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn atanh_f32(x: __m256) -> __m256 {
let half = _mm256_set1_ps(0.5);
let one = _mm256_set1_ps(1.0);
let one_plus_x = _mm256_add_ps(one, x);
let one_minus_x = _mm256_sub_ps(one, x);
let ratio = _mm256_div_ps(one_plus_x, one_minus_x);
_mm256_mul_ps(half, log_f32(ratio))
}
#[target_feature(enable = "avx2", enable = "fma")]
#[inline]
pub unsafe fn atanh_f64(x: __m256d) -> __m256d {
let half = _mm256_set1_pd(0.5);
let one = _mm256_set1_pd(1.0);
let one_plus_x = _mm256_add_pd(one, x);
let one_minus_x = _mm256_sub_pd(one, x);
let ratio = _mm256_div_pd(one_plus_x, one_minus_x);
_mm256_mul_pd(half, log_f64(ratio))
}