use crate::error::SpecialResult;
use scirs2_core::ndarray::{Array1, ArrayView1};
#[cfg(feature = "simd")]
use scirs2_core::simd::*;
#[cfg(feature = "parallel")]
use scirs2_core::parallel_ops::*;
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn gamma_f32_simd(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let chunksize = 8; let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
let x_slice = &input.as_slice().expect("Operation failed")[start..end];
let results = simd_gamma_approx_f32(x_slice);
output.as_slice_mut().expect("Operation failed")[start..end].copy_from_slice(&results);
}
for i in (chunks * chunksize)..len {
output[i] = crate::gamma::gamma(input[i] as f64) as f32;
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn gamma_f64_simd(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
let len = input.len();
let mut output = Array1::zeros(len);
let chunksize = 4; let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
let x_slice = &input.as_slice().expect("Operation failed")[start..end];
let results = simd_gamma_approx_f64(x_slice);
output.as_slice_mut().expect("Operation failed")[start..end].copy_from_slice(&results);
}
for i in (chunks * chunksize)..len {
output[i] = crate::gamma::gamma(input[i]);
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_gamma_approx_f32(x: &[f32]) -> Vec<f32> {
#[allow(dead_code)]
const G: f32 = 7.0;
#[allow(dead_code)]
const LANCZOS_COEFFS: [f32; 9] = [
0.99999999999980993,
676.5203681218851,
-1259.1392167224028,
771.32342877765313,
-176.61502916214059,
12.507343278686905,
-0.13857109526572012,
9.9843695780195716e-6,
1.5056327351493116e-7,
];
let mut result = vec![0.0f32; x.len()];
for i in 0..x.len() {
let xi = x[i];
if xi <= 0.0 {
result[i] = f32::NAN;
} else if xi < 0.5 {
let pi_sin_pi_z = std::f32::consts::PI / (std::f32::consts::PI * xi).sin();
result[i] = pi_sin_pi_z / lanczos_gamma_f32(1.0 - xi);
} else if xi < 1.5 {
result[i] = lanczos_gamma_f32(xi);
} else {
let mut z = xi;
let mut result_mult = 1.0;
while z > 1.5 {
z -= 1.0;
result_mult *= z;
}
result[i] = lanczos_gamma_f32(z) * result_mult;
}
}
result
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn lanczos_gamma_f32(z: f32) -> f32 {
const G: f32 = 7.0;
const LANCZOS_COEFFS: [f32; 9] = [
0.99999999999980993,
676.5203681218851,
-1259.1392167224028,
771.32342877765313,
-176.61502916214059,
12.507343278686905,
-0.13857109526572012,
9.9843695780195716e-6,
1.5056327351493116e-7,
];
let sqrt_2pi = (2.0 * std::f32::consts::PI).sqrt();
let mut ag = LANCZOS_COEFFS[0];
for i in 1..LANCZOS_COEFFS.len() {
ag += LANCZOS_COEFFS[i] / (z + i as f32);
}
let term1 = sqrt_2pi * ag / z;
let term2 = ((z + G - 0.5) / std::f32::consts::E).powf(z - 0.5);
term1 * term2
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_gamma_approx_f64(x: &[f64]) -> Vec<f64> {
let mut result = vec![0.0f64; x.len()];
for i in 0..x.len() {
result[i] = crate::gamma::gamma(x[i]);
}
result
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn lanczos_gamma_f64(z: f64) -> f64 {
const G: f64 = 7.0;
const LANCZOS_COEFFS: [f64; 15] = [
0.99999999999999709182,
57.156235665862923517,
-59.597960355475491248,
14.136097974741747174,
-0.49191381609762019978,
0.000033994649984811888699,
0.00004652362892704858047,
-0.0000098374475304879564677,
0.00000015808870322491248884,
-0.00000002103937310653993906,
0.0000000016125516516672222819,
-0.00000000006050073988424023865,
0.000000000013525146073944673582,
-0.000000000000020085822498639073869,
0.0000000000000010773925999973567529,
];
let sqrt_2pi = (2.0 * std::f64::consts::PI).sqrt();
let mut ag = LANCZOS_COEFFS[0];
for i in 1..LANCZOS_COEFFS.len() {
ag += LANCZOS_COEFFS[i] / (z + i as f64);
}
let term1 = sqrt_2pi * ag / z;
let term2 = ((z + G - 0.5) / std::f64::consts::E).powf(z - 0.5);
term1 * term2
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn exp_f32_simd(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
if let Some(input_slice) = input.as_slice() {
if let Some(output_slice) = output.as_slice_mut() {
simd_exp_f32_slice(input_slice, output_slice);
return Ok(output);
}
}
for i in 0..len {
output[i] = input[i].exp();
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_exp_f32_slice(input: &[f32], output: &mut [f32]) {
let chunksize = 8;
let chunks = input.len() / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
for j in start..end {
output[j] = input[j].exp();
}
}
for i in (chunks * chunksize)..input.len() {
output[i] = input[i].exp();
}
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn erf_f32_simd(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let chunksize = 8;
let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
for j in start..end {
output[j] = crate::erf::erf(input[j] as f64) as f32;
}
}
for i in (chunks * chunksize)..len {
output[i] = crate::erf::erf(input[i] as f64) as f32;
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn j0_f32_simd(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let chunksize = 8;
let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
for j in start..end {
output[j] = crate::bessel::j0(input[j] as f64) as f32;
}
}
for i in (chunks * chunksize)..len {
output[i] = crate::bessel::j0(input[i] as f64) as f32;
}
Ok(output)
}
#[allow(dead_code)]
pub fn vectorized_special_ops() -> SpecialResult<()> {
#[cfg(feature = "simd")]
{
let capabilities = detect_simd_capabilities();
println!("SIMD capabilities detected:");
println!(" AVX2: {}", capabilities.has_avx2);
println!(" AVX512: {}", capabilities.has_avx512);
println!(" FMA: {}", capabilities.has_fma);
println!(" f32 vector width: {}", capabilities.vector_width_f32);
println!(" f64 vector width: {}", capabilities.vector_width_f64);
}
#[cfg(not(feature = "simd"))]
{
println!("SIMD features not enabled. Use --features simd to enable SIMD optimizations.");
}
Ok(())
}
#[cfg(feature = "parallel")]
#[allow(dead_code)]
pub fn gamma_f64_parallel(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
let len = input.len();
let mut output = Array1::zeros(len);
if len > 1000 {
if let (Some(input_slice), Some(output_slice)) = (input.as_slice(), output.as_slice_mut()) {
output_slice
.par_iter_mut()
.zip(input_slice.par_iter())
.for_each(|(out, &inp)| {
*out = crate::gamma::gamma(inp);
});
} else {
use scirs2_core::ndarray::Zip;
Zip::from(&mut output).and(input).par_for_each(|out, &inp| {
*out = crate::gamma::gamma(inp);
});
}
} else {
for i in 0..len {
output[i] = crate::gamma::gamma(input[i]);
}
}
Ok(output)
}
#[cfg(feature = "parallel")]
#[allow(dead_code)]
pub fn j0_f64_parallel(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
let len = input.len();
let mut output = Array1::zeros(len);
if len > 1000 {
if let (Some(input_slice), Some(output_slice)) = (input.as_slice(), output.as_slice_mut()) {
output_slice
.par_iter_mut()
.zip(input_slice.par_iter())
.for_each(|(out, &inp)| {
*out = crate::bessel::j0(inp);
});
} else {
use scirs2_core::ndarray::Zip;
Zip::from(&mut output).and(input).par_for_each(|out, &inp| {
*out = crate::bessel::j0(inp);
});
}
} else {
for i in 0..len {
output[i] = crate::bessel::j0(input[i]);
}
}
Ok(output)
}
#[cfg(all(feature = "simd", feature = "parallel"))]
#[allow(dead_code)]
pub fn gamma_f32_simd_parallel(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
const PARALLEL_THRESHOLD: usize = 10000;
const SIMD_CHUNK_SIZE: usize = 8;
if len > PARALLEL_THRESHOLD {
let _chunks = len / SIMD_CHUNK_SIZE;
let _remainder = len % SIMD_CHUNK_SIZE;
if let (Some(input_slice), Some(output_slice)) = (input.as_slice(), output.as_slice_mut()) {
output_slice
.par_iter_mut()
.zip(input_slice.par_iter())
.for_each(|(out, &inp)| {
*out = crate::gamma::gamma(inp as f64) as f32;
});
} else {
use scirs2_core::ndarray::Zip;
Zip::from(&mut output).and(input).par_for_each(|out, &inp| {
*out = crate::gamma::gamma(inp as f64) as f32;
});
}
} else if len > 1000 {
return gamma_f32_simd(input);
} else {
for i in 0..len {
output[i] = crate::gamma::gamma(input[i] as f64) as f32;
}
}
Ok(output)
}
#[cfg(feature = "parallel")]
#[allow(dead_code)]
pub fn benchmark_parallel_performance(size: usize) -> SpecialResult<()> {
use std::time::Instant;
let data_f64: Array1<f64> =
Array1::from_vec((0..size).map(|i| (i as f64) * 0.001 + 1.0).collect());
println!("Benchmarking parallel performance with {} elements:", size);
let start = Instant::now();
let _sequential: Array1<f64> = data_f64.mapv(crate::gamma::gamma);
let sequential_time = start.elapsed();
let start = Instant::now();
let _parallel = gamma_f64_parallel(&data_f64.view())?;
let parallel_time = start.elapsed();
println!(" Sequential: {:?}", sequential_time);
println!(" Parallel: {:?}", parallel_time);
let speedup = sequential_time.as_nanos() as f64 / parallel_time.as_nanos() as f64;
println!(" Speedup: {:.2}x", speedup);
let bessel_data: Array1<f64> = Array1::from_vec((0..size).map(|i| (i as f64) * 0.01).collect());
let start = Instant::now();
let _sequential: Array1<f64> = bessel_data.mapv(crate::bessel::j0);
let sequential_time = start.elapsed();
let start = Instant::now();
let _parallel = j0_f64_parallel(&bessel_data.view())?;
let parallel_time = start.elapsed();
println!(" Bessel J0:");
println!(" Sequential: {:?}", sequential_time);
println!(" Parallel: {:?}", parallel_time);
let speedup = sequential_time.as_nanos() as f64 / parallel_time.as_nanos() as f64;
println!(" Speedup: {:.2}x", speedup);
Ok(())
}
#[allow(dead_code)]
pub fn adaptive_gamma_processing(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
let len = input.len();
#[cfg(all(feature = "simd", feature = "parallel"))]
{
if len > 50000 {
let input_f32: Array1<f32> = input.mapv(|x| x as f32);
let result_f32 = gamma_f32_simd_parallel(&input_f32.view())?;
return Ok(result_f32.mapv(|x| x as f64));
}
}
#[cfg(feature = "parallel")]
{
if len > 10000 {
return gamma_f64_parallel(input);
}
}
#[cfg(feature = "simd")]
{
if len > 1000 {
return gamma_f64_simd(input);
}
}
let mut output = Array1::zeros(len);
for i in 0..len {
output[i] = crate::gamma::gamma(input[i]);
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn benchmark_simd_performance(size: usize) -> SpecialResult<()> {
use std::time::Instant;
let data_f32: Array1<f32> =
Array1::from_vec((0..size).map(|i| (i as f32) * 0.01 + 1.0).collect());
println!("Benchmarking SIMD performance with {} elements:", size);
let start = Instant::now();
let _result_simd = gamma_f32_simd(&data_f32.view())?;
let simd_time = start.elapsed();
let start = Instant::now();
let _result_scalar: Array1<f32> = data_f32.mapv(|x| crate::gamma::gamma(x as f64) as f32);
let scalar_time = start.elapsed();
println!(" Gamma f32:");
println!(" SIMD: {:?}", simd_time);
println!(" Scalar: {:?}", scalar_time);
println!(
" Speedup: {:.2}x",
scalar_time.as_nanos() as f64 / simd_time.as_nanos() as f64
);
let start = Instant::now();
let _result_simd = exp_f32_simd(&data_f32.view())?;
let simd_time = start.elapsed();
let start = Instant::now();
let _result_scalar: Array1<f32> = data_f32.mapv(|x| x.exp());
let scalar_time = start.elapsed();
println!(" Exp f32:");
println!(" SIMD: {:?}", simd_time);
println!(" Scalar: {:?}", scalar_time);
println!(
" Speedup: {:.2}x",
scalar_time.as_nanos() as f64 / simd_time.as_nanos() as f64
);
Ok(())
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn log_f32_simd(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let chunksize = 8; let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
let x_slice = &input.as_slice().expect("Operation failed")[start..end];
let results = simd_log_approx_f32(x_slice);
output.as_slice_mut().expect("Operation failed")[start..end].copy_from_slice(&results);
}
for i in (chunks * chunksize)..len {
output[i] = input[i].ln();
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn sin_f32_simd(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let chunksize = 8;
let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
let x_slice = &input.as_slice().expect("Operation failed")[start..end];
let results = simd_sin_approx_f32(x_slice);
output.as_slice_mut().expect("Operation failed")[start..end].copy_from_slice(&results);
}
for i in (chunks * chunksize)..len {
output[i] = input[i].sin();
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn cos_f32_simd(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let chunksize = 8;
let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
let x_slice = &input.as_slice().expect("Operation failed")[start..end];
let results = simd_cos_approx_f32(x_slice);
output.as_slice_mut().expect("Operation failed")[start..end].copy_from_slice(&results);
}
for i in (chunks * chunksize)..len {
output[i] = input[i].cos();
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn j1_f32_simd(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let chunksize = 8;
let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
let x_slice = &input.as_slice().expect("Operation failed")[start..end];
let results = simd_j1_approx_f32(x_slice);
output.as_slice_mut().expect("Operation failed")[start..end].copy_from_slice(&results);
}
for i in (chunks * chunksize)..len {
output[i] = crate::bessel::j1(input[i] as f64) as f32;
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn erfc_f32_simd(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let chunksize = 8;
let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
let x_slice = &input.as_slice().expect("Operation failed")[start..end];
let results = simd_erfc_approx_f32(x_slice);
output.as_slice_mut().expect("Operation failed")[start..end].copy_from_slice(&results);
}
for i in (chunks * chunksize)..len {
output[i] = crate::erf::erfc(input[i] as f64) as f32;
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn digamma_f32_simd(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let chunksize = 8;
let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
let x_slice = &input.as_slice().expect("Operation failed")[start..end];
let results = simd_digamma_approx_f32(x_slice);
output.as_slice_mut().expect("Operation failed")[start..end].copy_from_slice(&results);
}
for i in (chunks * chunksize)..len {
output[i] = crate::gamma::digamma(input[i] as f64) as f32;
}
Ok(output)
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_log_approx_f32(x: &[f32]) -> Vec<f32> {
x.iter().map(|&val| val.ln()).collect()
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_sin_approx_f32(x: &[f32]) -> Vec<f32> {
x.iter().map(|&val| val.sin()).collect()
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_cos_approx_f32(x: &[f32]) -> Vec<f32> {
x.iter().map(|&val| val.cos()).collect()
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_j0_approx_f32(x: &[f32]) -> Vec<f32> {
x.iter()
.map(|&val| {
if val.abs() < 1e-6 {
1.0 - val * val * 0.25
} else {
let x2 = val * val;
let num = 1.0 - 0.25 * x2 + 0.015625 * x2 * x2 - 0.000434028 * x2 * x2 * x2;
let den = 1.0 + 0.0625 * x2;
num / den
}
})
.collect()
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_j1_approx_f32(x: &[f32]) -> Vec<f32> {
x.iter()
.map(|&val| {
if val.abs() < 1e-6 {
val * 0.5
} else {
let x2 = val * val;
let num = val * (0.5 - 0.056249985 * x2 + 0.002659732 * x2 * x2);
let den = 1.0 + 0.25 * x2;
num / den
}
})
.collect()
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_erf_approx_f32(x: &[f32]) -> Vec<f32> {
x.iter()
.map(|&val| {
let t = 1.0 / (1.0 + 0.3275911 * val.abs());
let poly = t
* (0.254829592
+ t * (-0.284496736
+ t * (1.421413741 + t * (-1.453152027 + t * 1.061405429))));
let erfc_result = poly * (-val * val).exp();
let erf_result = if val >= 0.0 {
1.0 - erfc_result
} else {
erfc_result - 1.0
};
erf_result
})
.collect()
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_erfc_approx_f32(x: &[f32]) -> Vec<f32> {
x.iter()
.map(|&val| {
let t = 1.0 / (1.0 + 0.3275911 * val.abs());
let poly = t
* (0.254829592
+ t * (-0.284496736
+ t * (1.421413741 + t * (-1.453152027 + t * 1.061405429))));
let result = poly * (-val * val).exp();
if val >= 0.0 {
result
} else {
2.0 - result
}
})
.collect()
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_digamma_approx_f32(x: &[f32]) -> Vec<f32> {
x.iter()
.map(|&val| {
if val < 0.5 {
let pi = std::f32::consts::PI;
simd_digamma_positive(1.0 - val) + pi / (pi * val).sin().tan()
} else {
simd_digamma_positive(val)
}
})
.collect()
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn simd_digamma_positive(x: f32) -> f32 {
if x > 8.0 {
x.ln() - 1.0 / (2.0 * x) - 1.0 / (12.0 * x * x)
} else {
let mut result = 0.0;
let mut curr_x = x;
while curr_x < 8.0 {
result -= 1.0 / curr_x;
curr_x += 1.0;
}
result + simd_digamma_positive(curr_x)
}
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
pub fn multi_function_simd_f32(
input: &ArrayView1<f32>,
functions: &[&str],
) -> SpecialResult<Vec<Array1<f32>>> {
let len = input.len();
let num_functions = functions.len();
let mut outputs: Vec<Array1<f32>> = vec![Array1::zeros(len); num_functions];
let chunksize = 8;
let chunks = len / chunksize;
for i in 0..chunks {
let start = i * chunksize;
let end = start + chunksize;
let x_slice = &input.as_slice().expect("Operation failed")[start..end];
for (func_idx, &func_name) in functions.iter().enumerate() {
let results = match func_name {
"gamma" => simd_gamma_approx_f32(x_slice),
"log" => simd_log_approx_f32(x_slice),
"sin" => simd_sin_approx_f32(x_slice),
"cos" => simd_cos_approx_f32(x_slice),
"j0" => simd_j0_approx_f32(x_slice),
"j1" => simd_j1_approx_f32(x_slice),
"erf" => simd_erf_approx_f32(x_slice),
"erfc" => simd_erfc_approx_f32(x_slice),
"digamma" => simd_digamma_approx_f32(x_slice),
_ => {
return Err(crate::error::SpecialError::ValueError(format!(
"Unknown function: {}",
func_name
)))
}
};
outputs[func_idx].as_slice_mut().expect("Operation failed")[start..end]
.copy_from_slice(&results);
}
}
for i in (chunks * chunksize)..len {
for (func_idx, &func_name) in functions.iter().enumerate() {
outputs[func_idx][i] = match func_name {
"gamma" => crate::gamma::gamma(input[i] as f64) as f32,
"log" => input[i].ln(),
"sin" => input[i].sin(),
"cos" => input[i].cos(),
"j0" => crate::bessel::j0(input[i] as f64) as f32,
"j1" => crate::bessel::j1(input[i] as f64) as f32,
"erf" => crate::erf::erf(input[i] as f64) as f32,
"erfc" => crate::erf::erfc(input[i] as f64) as f32,
"digamma" => crate::gamma::digamma(input[i] as f64) as f32,
_ => {
return Err(crate::error::SpecialError::ValueError(format!(
"Unknown function: {}",
func_name
)))
}
};
}
}
Ok(outputs)
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
#[test]
#[cfg(feature = "simd")]
fn test_gamma_f32_simd() {
let input = Array1::from_vec(vec![1.0f32, 2.0, 3.0, 4.0, 5.0]);
let result = gamma_f32_simd(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 1.0, epsilon = 1e-5);
assert_relative_eq!(result[1], 1.0, epsilon = 1e-5);
assert_relative_eq!(result[2], 2.0, epsilon = 1e-5);
assert_relative_eq!(result[3], 6.0, epsilon = 1e-5);
assert_relative_eq!(result[4], 24.0, epsilon = 1e-2); }
#[test]
#[cfg(feature = "simd")]
fn test_log_f32_simd() {
let input = Array1::from_vec(vec![1.0f32, 2.0, std::f32::consts::E, 10.0]);
let result = log_f32_simd(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 0.0, epsilon = 1e-5); assert_relative_eq!(result[1], 2.0f32.ln(), epsilon = 1e-5);
assert_relative_eq!(result[2], 1.0, epsilon = 1e-5); assert_relative_eq!(result[3], 10.0f32.ln(), epsilon = 1e-5);
}
#[test]
#[cfg(feature = "simd")]
fn test_sin_cos_f32_simd() {
let input = Array1::from_vec(vec![
0.0f32,
std::f32::consts::PI / 2.0,
std::f32::consts::PI,
]);
let sin_result = sin_f32_simd(&input.view()).expect("Operation failed");
assert_relative_eq!(sin_result[0], 0.0, epsilon = 1e-5);
assert_relative_eq!(sin_result[1], 1.0, epsilon = 1e-5);
assert_relative_eq!(sin_result[2], 0.0, epsilon = 1e-4);
let cos_result = cos_f32_simd(&input.view()).expect("Operation failed");
assert_relative_eq!(cos_result[0], 1.0, epsilon = 1e-5);
assert_relative_eq!(cos_result[1], 0.0, epsilon = 1e-4);
assert_relative_eq!(cos_result[2], -1.0, epsilon = 1e-5);
}
#[test]
#[cfg(feature = "simd")]
fn test_j1_f32_simd() {
let input = Array1::from_vec(vec![0.0f32, 1.0, 2.0, 3.0]);
let result = j1_f32_simd(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 0.0, epsilon = 1e-5);
assert!(result[1].abs() > 0.1); assert!(result[2].abs() > 0.1); assert!(result[3].abs() > 0.1); }
#[test]
#[cfg(feature = "simd")]
fn test_erfc_f32_simd() {
let input = Array1::from_vec(vec![0.0f32, 1.0, 2.0, -1.0]);
let result = erfc_f32_simd(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 1.0, epsilon = 1e-4);
assert!(result[1] < 1.0 && result[1] > 0.0); assert!(result[2] < result[1]); assert!(result[3] > 1.0); }
#[test]
#[cfg(feature = "simd")]
fn test_multi_function_simd() {
let input = Array1::from_vec(vec![1.0f32, 2.0, 3.0]);
let functions = vec!["gamma", "log", "sin"];
let results = multi_function_simd_f32(&input.view(), &functions).expect("Operation failed");
assert_eq!(results.len(), 3); assert_eq!(results[0].len(), 3);
assert_relative_eq!(results[0][0], 1.0, epsilon = 1e-4); assert_relative_eq!(results[0][1], 1.0, epsilon = 1e-4); assert_relative_eq!(results[0][2], 2.0, epsilon = 1e-4);
assert_relative_eq!(results[1][0], 0.0, epsilon = 1e-5);
assert_relative_eq!(results[2][0], 1.0f32.sin(), epsilon = 1e-5);
}
#[test]
#[cfg(feature = "simd")]
fn test_digamma_f32_simd() {
let input = Array1::from_vec(vec![1.0f32, 2.0, 3.0, 0.5]);
let result = digamma_f32_simd(&input.view()).expect("Operation failed");
for &val in result.iter() {
assert!(val.is_finite());
}
assert!(result[1] > result[0]);
assert!(result[2] > result[1]);
}
#[test]
#[cfg(feature = "simd")]
fn test_gamma_f64_simd() {
let input = Array1::from_vec(vec![1.0f64, 2.0, 3.0, 4.0, 5.0]);
let result = gamma_f64_simd(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 1.0, epsilon = 1e-10);
assert_relative_eq!(result[1], 1.0, epsilon = 1e-10);
assert_relative_eq!(result[2], 2.0, epsilon = 1e-10);
assert_relative_eq!(result[3], 6.0, epsilon = 1e-10);
assert_relative_eq!(result[4], 24.0, epsilon = 1e-5); }
#[test]
#[cfg(feature = "simd")]
fn test_exp_f32_simd() {
let input = Array1::from_vec(vec![0.0f32, 1.0, 2.0, 3.0]);
let result = exp_f32_simd(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 1.0, epsilon = 1e-6);
assert_relative_eq!(result[1], std::f32::consts::E, epsilon = 1e-6);
assert_relative_eq!(result[2], std::f32::consts::E.powi(2), epsilon = 1e-5);
assert_relative_eq!(result[3], std::f32::consts::E.powi(3), epsilon = 1e-4);
}
#[test]
#[cfg(feature = "simd")]
fn test_j0_f32_simd() {
let input = Array1::from_vec(vec![0.0f32, 1.0, 2.0, 3.0]);
let result = j0_f32_simd(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 1.0, epsilon = 1e-6);
for i in 1..input.len() {
let expected = crate::bessel::j0(input[i] as f64) as f32;
assert_relative_eq!(result[i], expected, epsilon = 1e-6);
}
}
#[test]
fn test_vectorized_special_ops() {
assert!(vectorized_special_ops().is_ok());
}
#[test]
#[cfg(feature = "simd")]
fn test_simd_capabilities() {
let capabilities = detect_simd_capabilities();
assert!(capabilities.vector_width_f32 >= 1);
assert!(capabilities.vector_width_f64 >= 1);
}
#[test]
#[cfg(feature = "parallel")]
fn test_gamma_f64_parallel() {
let input = Array1::from_vec(vec![1.0f64, 2.0, 3.0, 4.0, 5.0]);
let result = gamma_f64_parallel(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 1.0, epsilon = 1e-10);
assert_relative_eq!(result[1], 1.0, epsilon = 1e-10);
assert_relative_eq!(result[2], 2.0, epsilon = 1e-10);
assert_relative_eq!(result[3], 6.0, epsilon = 1e-10);
assert_relative_eq!(result[4], 24.0, epsilon = 1e-10);
}
#[test]
#[cfg(feature = "parallel")]
fn test_j0_f64_parallel() {
let input = Array1::from_vec(vec![0.0f64, 1.0, 2.0, 3.0]);
let result = j0_f64_parallel(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 1.0, epsilon = 1e-10);
for i in 1..input.len() {
let expected = crate::bessel::j0(input[i]);
assert_relative_eq!(result[i], expected, epsilon = 1e-10);
}
}
#[test]
#[cfg(all(feature = "simd", feature = "parallel"))]
fn test_gamma_f32_simd_parallel() {
let input = Array1::from_vec(vec![1.0f32, 2.0, 3.0, 4.0, 5.0]);
let result = gamma_f32_simd_parallel(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 1.0, epsilon = 1e-5);
assert_relative_eq!(result[1], 1.0, epsilon = 1e-5);
assert_relative_eq!(result[2], 2.0, epsilon = 1e-5);
assert_relative_eq!(result[3], 6.0, epsilon = 1e-5);
assert_relative_eq!(result[4], 24.0, epsilon = 1e-2);
}
#[test]
fn test_adaptive_gamma_processing() {
let input = Array1::from_vec(vec![1.0f64, 2.0, 3.0, 4.0, 5.0]);
let result = adaptive_gamma_processing(&input.view()).expect("Operation failed");
assert_relative_eq!(result[0], 1.0, epsilon = 1e-10);
assert_relative_eq!(result[1], 1.0, epsilon = 1e-10);
assert_relative_eq!(result[2], 2.0, epsilon = 1e-10);
assert_relative_eq!(result[3], 6.0, epsilon = 1e-10);
assert_relative_eq!(result[4], 24.0, epsilon = 1e-10);
}
#[test]
#[cfg(feature = "parallel")]
fn test_parallel_threshold_behavior() {
let smallinput = Array1::from_vec((0..10).map(|i| i as f64 + 1.0).collect());
let small_result = gamma_f64_parallel(&smallinput.view()).expect("Operation failed");
let largeinput = Array1::from_vec((0..2000).map(|i| (i as f64) * 0.001 + 1.0).collect());
let large_result = gamma_f64_parallel(&largeinput.view()).expect("Operation failed");
assert!(small_result.len() == 10);
assert!(large_result.len() == 2000);
assert!(small_result[0] > 0.0);
assert!(large_result[0] > 0.0);
}
}