use crate::error::SpecialResult;
use scirs2_core::ndarray::{Array1, ArrayView1};
use scirs2_core::simd_ops::SimdUnifiedOps;
pub fn batch_gamma_f64(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
let len = input.len();
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_gamma_f64: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_gamma_f64: output must be contiguous");
for i in 0..len {
output_slice[i] = crate::gamma::gamma(input_slice[i]);
}
Ok(output)
}
pub fn batch_gamma_f32(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_gamma_f32: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_gamma_f32: output must be contiguous");
for i in 0..len {
output_slice[i] = crate::gamma::gamma(input_slice[i] as f64) as f32;
}
Ok(output)
}
pub fn batch_lgamma_f64(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
Ok(f64::simd_ln_gamma(input))
}
pub fn batch_lgamma_f32(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
Ok(f32::simd_ln_gamma(input))
}
pub fn batch_erf_f64(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
Ok(f64::simd_erf(input))
}
pub fn batch_erf_f32(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
Ok(f32::simd_erf(input))
}
pub fn batch_erfc_f64(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
Ok(f64::simd_erfc(input))
}
pub fn batch_erfc_f32(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
Ok(f32::simd_erfc(input))
}
pub fn batch_bessel_j0_f64(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
let len = input.len();
if len == 0 {
return Ok(Array1::zeros(0));
}
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_bessel_j0_f64: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_bessel_j0_f64: output must be contiguous");
for i in 0..len {
output_slice[i] = crate::bessel::j0(input_slice[i]);
}
Ok(output)
}
pub fn batch_bessel_j0_f32(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
if len == 0 {
return Ok(Array1::zeros(0));
}
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_bessel_j0_f32: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_bessel_j0_f32: output must be contiguous");
const CHUNK: usize = 256;
for chunk_start in (0..len).step_by(CHUNK) {
let chunk_end = (chunk_start + CHUNK).min(len);
let chunk_len = chunk_end - chunk_start;
let mut small_indices = Vec::with_capacity(chunk_len);
let mut large_indices = Vec::with_capacity(chunk_len);
for i in chunk_start..chunk_end {
if input_slice[i].abs() <= 8.0 {
small_indices.push(i);
} else {
large_indices.push(i);
}
}
for &i in &small_indices {
output_slice[i] = j0_small_f32(input_slice[i]);
}
if !large_indices.is_empty() {
let large_x: Array1<f32> =
Array1::from_vec(large_indices.iter().map(|&i| input_slice[i]).collect());
let large_abs = f32::simd_abs(&large_x.view());
let inv_x = f32::simd_recip(&large_abs.view());
let inv_x2 = f32::simd_square(&inv_x.view());
let (pp, qp) = j0_asymptotic_pq_f32(&inv_x2.view());
let correction = f32::simd_div(&pp.view(), &qp.view());
let phase_corr = f32::simd_mul(&correction.view(), &inv_x.view());
let pi_over_4 = Array1::from_elem(large_indices.len(), std::f32::consts::FRAC_PI_4);
let theta_base = f32::simd_sub(&large_abs.view(), &pi_over_4.view());
let theta = f32::simd_add(&theta_base.view(), &phase_corr.view());
let two_over_pi = Array1::from_elem(large_indices.len(), 2.0f32 / std::f32::consts::PI);
let amp_arg = f32::simd_div(&two_over_pi.view(), &large_abs.view());
let amp_base = f32::simd_sqrt(&_arg.view());
let cos_theta = f32::simd_cos(&theta.view());
let j0_large = f32::simd_mul(&_base.view(), &cos_theta.view());
for (idx, &i) in large_indices.iter().enumerate() {
output_slice[i] = j0_large[idx];
}
}
}
Ok(output)
}
pub fn batch_bessel_j1_f64(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
let len = input.len();
if len == 0 {
return Ok(Array1::zeros(0));
}
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_bessel_j1_f64: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_bessel_j1_f64: output must be contiguous");
for i in 0..len {
output_slice[i] = crate::bessel::j1(input_slice[i]);
}
Ok(output)
}
pub fn batch_bessel_j1_f32(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
if len == 0 {
return Ok(Array1::zeros(0));
}
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_bessel_j1_f32: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_bessel_j1_f32: output must be contiguous");
const CHUNK: usize = 256;
for chunk_start in (0..len).step_by(CHUNK) {
let chunk_end = (chunk_start + CHUNK).min(len);
let chunk_len = chunk_end - chunk_start;
let mut small_indices = Vec::with_capacity(chunk_len);
let mut large_indices = Vec::with_capacity(chunk_len);
for i in chunk_start..chunk_end {
if input_slice[i].abs() <= 8.0f32 {
small_indices.push(i);
} else {
large_indices.push(i);
}
}
for &i in &small_indices {
output_slice[i] = j1_small_f32(input_slice[i]);
}
if !large_indices.is_empty() {
let large_x: Array1<f32> =
Array1::from_vec(large_indices.iter().map(|&i| input_slice[i]).collect());
let large_abs = f32::simd_abs(&large_x.view());
let signs: Array1<f32> = Array1::from_vec(
large_indices
.iter()
.map(|&i| input_slice[i].signum())
.collect(),
);
let inv_x = f32::simd_recip(&large_abs.view());
let inv_x2 = f32::simd_square(&inv_x.view());
let (pp, qp) = j1_asymptotic_pq_f32(&inv_x2.view());
let correction = f32::simd_div(&pp.view(), &qp.view());
let phase_corr = f32::simd_mul(&correction.view(), &inv_x.view());
let three_pi_over_4 =
Array1::from_elem(large_indices.len(), 3.0f32 * std::f32::consts::FRAC_PI_4);
let theta_base = f32::simd_sub(&large_abs.view(), &three_pi_over_4.view());
let theta = f32::simd_add(&theta_base.view(), &phase_corr.view());
let two_over_pi = Array1::from_elem(large_indices.len(), 2.0f32 / std::f32::consts::PI);
let amp_arg = f32::simd_div(&two_over_pi.view(), &large_abs.view());
let amp_base = f32::simd_sqrt(&_arg.view());
let cos_theta = f32::simd_cos(&theta.view());
let j1_unsigned = f32::simd_mul(&_base.view(), &cos_theta.view());
let j1_large = f32::simd_mul(&j1_unsigned.view(), &signs.view());
for (idx, &i) in large_indices.iter().enumerate() {
output_slice[i] = j1_large[idx];
}
}
}
Ok(output)
}
pub fn batch_bessel_y0_f64(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
let len = input.len();
if len == 0 {
return Ok(Array1::zeros(0));
}
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_bessel_y0_f64: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_bessel_y0_f64: output must be contiguous");
for i in 0..len {
let x = input_slice[i];
if x <= 0.0 {
output_slice[i] = f64::NAN;
} else {
output_slice[i] = crate::bessel::y0(x);
}
}
Ok(output)
}
pub fn batch_bessel_y0_f32(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
if len == 0 {
return Ok(Array1::zeros(0));
}
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_bessel_y0_f32: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_bessel_y0_f32: output must be contiguous");
const CHUNK: usize = 256;
for chunk_start in (0..len).step_by(CHUNK) {
let chunk_end = (chunk_start + CHUNK).min(len);
let chunk_len = chunk_end - chunk_start;
let mut small_indices = Vec::with_capacity(chunk_len);
let mut large_indices = Vec::with_capacity(chunk_len);
let mut invalid_indices = Vec::with_capacity(chunk_len);
for i in chunk_start..chunk_end {
let x = input_slice[i];
if x <= 0.0 {
invalid_indices.push(i);
} else if x <= 8.0 {
small_indices.push(i);
} else {
large_indices.push(i);
}
}
for &i in &invalid_indices {
output_slice[i] = f32::NAN;
}
for &i in &small_indices {
output_slice[i] = y0_small_f32(input_slice[i]);
}
if !large_indices.is_empty() {
let large_x: Array1<f32> =
Array1::from_vec(large_indices.iter().map(|&i| input_slice[i]).collect());
let inv_x = f32::simd_recip(&large_x.view());
let inv_x2 = f32::simd_square(&inv_x.view());
let (pp, qp) = j0_asymptotic_pq_f32(&inv_x2.view());
let correction = f32::simd_div(&pp.view(), &qp.view());
let phase_corr = f32::simd_mul(&correction.view(), &inv_x.view());
let pi_over_4 = Array1::from_elem(large_indices.len(), std::f32::consts::FRAC_PI_4);
let theta_base = f32::simd_sub(&large_x.view(), &pi_over_4.view());
let theta = f32::simd_add(&theta_base.view(), &phase_corr.view());
let two_over_pi = Array1::from_elem(large_indices.len(), 2.0f32 / std::f32::consts::PI);
let amp_arg = f32::simd_div(&two_over_pi.view(), &large_x.view());
let amp_base = f32::simd_sqrt(&_arg.view());
let sin_theta = f32::simd_sin(&theta.view());
let y0_large = f32::simd_mul(&_base.view(), &sin_theta.view());
for (idx, &i) in large_indices.iter().enumerate() {
output_slice[i] = y0_large[idx];
}
}
}
Ok(output)
}
pub fn batch_bessel_y1_f64(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
let len = input.len();
if len == 0 {
return Ok(Array1::zeros(0));
}
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_bessel_y1_f64: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_bessel_y1_f64: output must be contiguous");
for i in 0..len {
let x = input_slice[i];
if x <= 0.0 {
output_slice[i] = f64::NAN;
} else {
output_slice[i] = crate::bessel::y1(x);
}
}
Ok(output)
}
pub fn batch_bessel_y1_f32(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
if len == 0 {
return Ok(Array1::zeros(0));
}
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_bessel_y1_f32: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_bessel_y1_f32: output must be contiguous");
const CHUNK: usize = 256;
for chunk_start in (0..len).step_by(CHUNK) {
let chunk_end = (chunk_start + CHUNK).min(len);
let chunk_len = chunk_end - chunk_start;
let mut small_indices = Vec::with_capacity(chunk_len);
let mut large_indices = Vec::with_capacity(chunk_len);
let mut invalid_indices = Vec::with_capacity(chunk_len);
for i in chunk_start..chunk_end {
let x = input_slice[i];
if x <= 0.0 {
invalid_indices.push(i);
} else if x <= 8.0f32 {
small_indices.push(i);
} else {
large_indices.push(i);
}
}
for &i in &invalid_indices {
output_slice[i] = f32::NAN;
}
for &i in &small_indices {
output_slice[i] = y1_small_f32(input_slice[i]);
}
if !large_indices.is_empty() {
let large_x: Array1<f32> =
Array1::from_vec(large_indices.iter().map(|&i| input_slice[i]).collect());
let inv_x = f32::simd_recip(&large_x.view());
let inv_x2 = f32::simd_square(&inv_x.view());
let (pp, qp) = j1_asymptotic_pq_f32(&inv_x2.view());
let correction = f32::simd_div(&pp.view(), &qp.view());
let phase_corr = f32::simd_mul(&correction.view(), &inv_x.view());
let three_pi_over_4 =
Array1::from_elem(large_indices.len(), 3.0f32 * std::f32::consts::FRAC_PI_4);
let theta_base = f32::simd_sub(&large_x.view(), &three_pi_over_4.view());
let theta = f32::simd_add(&theta_base.view(), &phase_corr.view());
let two_over_pi = Array1::from_elem(large_indices.len(), 2.0f32 / std::f32::consts::PI);
let amp_arg = f32::simd_div(&two_over_pi.view(), &large_x.view());
let amp_base = f32::simd_sqrt(&_arg.view());
let sin_theta = f32::simd_sin(&theta.view());
let y1_large = f32::simd_mul(&_base.view(), &sin_theta.view());
for (idx, &i) in large_indices.iter().enumerate() {
output_slice[i] = y1_large[idx];
}
}
}
Ok(output)
}
pub fn batch_beta_f64(a: &ArrayView1<f64>, b: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
if a.len() != b.len() {
return Err(crate::error::SpecialError::ValueError(format!(
"batch_beta_f64: arrays must have same length, got {} and {}",
a.len(),
b.len()
)));
}
let len = a.len();
let mut output = Array1::zeros(len);
let a_slice = a.as_slice().expect("batch_beta_f64: a must be contiguous");
let b_slice = b.as_slice().expect("batch_beta_f64: b must be contiguous");
let out_slice = output
.as_slice_mut()
.expect("batch_beta_f64: output must be contiguous");
for i in 0..len {
out_slice[i] = crate::gamma::beta(a_slice[i], b_slice[i]);
}
Ok(output)
}
pub fn batch_beta_f32(a: &ArrayView1<f32>, b: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
if a.len() != b.len() {
return Err(crate::error::SpecialError::ValueError(format!(
"batch_beta_f32: arrays must have same length, got {} and {}",
a.len(),
b.len()
)));
}
let len = a.len();
let mut output = Array1::zeros(len);
let a_slice = a.as_slice().expect("batch_beta_f32: a must be contiguous");
let b_slice = b.as_slice().expect("batch_beta_f32: b must be contiguous");
let out_slice = output
.as_slice_mut()
.expect("batch_beta_f32: output must be contiguous");
for i in 0..len {
out_slice[i] = crate::gamma::beta(a_slice[i] as f64, b_slice[i] as f64) as f32;
}
Ok(output)
}
pub fn batch_digamma_f64(input: &ArrayView1<f64>) -> SpecialResult<Array1<f64>> {
let len = input.len();
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_digamma_f64: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_digamma_f64: output must be contiguous");
for i in 0..len {
output_slice[i] = crate::gamma::digamma(input_slice[i]);
}
Ok(output)
}
pub fn batch_digamma_f32(input: &ArrayView1<f32>) -> SpecialResult<Array1<f32>> {
let len = input.len();
let mut output = Array1::zeros(len);
let input_slice = input
.as_slice()
.expect("batch_digamma_f32: input must be contiguous");
let output_slice = output
.as_slice_mut()
.expect("batch_digamma_f32: output must be contiguous");
for i in 0..len {
output_slice[i] = crate::gamma::digamma(input_slice[i] as f64) as f32;
}
Ok(output)
}
#[inline]
fn j0_small_f64(x: f64) -> f64 {
crate::bessel::j0(x)
}
#[inline]
fn j0_small_f32(x: f32) -> f32 {
crate::bessel::j0(x as f64) as f32
}
fn j0_asymptotic_pq_f64(inv_x2: &ArrayView1<f64>) -> (Array1<f64>, Array1<f64>) {
const PP: [f64; 7] = [
7.96936729297347051624e-04,
8.28352392107440799803e-02,
1.23953371646414204986e+00,
5.44725003058768775090e+00,
8.74898656407846601684e+00,
5.82710030969399747997e+00,
1.42913747931138186111e+00,
];
const PQ: [f64; 7] = [
1.05765535021489477296e-02,
1.07350949974840114119e+00,
1.55991991605856106940e+01,
6.84024704406119199112e+01,
1.09049968706444649496e+02,
7.26419323946610921696e+01,
1.78544528399427143362e+01,
];
let len = inv_x2.len();
let mut pp_result = Array1::zeros(len);
let mut pq_result = Array1::zeros(len);
let inv_x2_slice = inv_x2
.as_slice()
.expect("j0_asymptotic_pq_f64: inv_x2 must be contiguous");
for i in 0..len {
let z = inv_x2_slice[i];
let mut p = PP[6];
for j in (0..6).rev() {
p = p * z + PP[j];
}
pp_result[i] = p;
let mut q = PQ[6];
for j in (0..6).rev() {
q = q * z + PQ[j];
}
pq_result[i] = q;
}
(pp_result, pq_result)
}
fn j0_asymptotic_pq_f32(inv_x2: &ArrayView1<f32>) -> (Array1<f32>, Array1<f32>) {
let len = inv_x2.len();
let mut pp_result = Array1::zeros(len);
let mut pq_result = Array1::zeros(len);
const PP: [f32; 5] = [
7.96936729297347051624e-04,
8.28352392107440799803e-02,
1.23953371646414204986e+00,
5.44725003058768775090e+00,
1.42913747931138186111e+00,
];
const PQ: [f32; 5] = [
1.05765535021489477296e-02,
1.07350949974840114119e+00,
1.55991991605856106940e+01,
6.84024704406119199112e+01,
1.78544528399427143362e+01,
];
let inv_x2_slice = inv_x2
.as_slice()
.expect("j0_asymptotic_pq_f32: inv_x2 must be contiguous");
for i in 0..len {
let z = inv_x2_slice[i];
let mut p = PP[4];
for j in (0..4).rev() {
p = p * z + PP[j];
}
pp_result[i] = p;
let mut q = PQ[4];
for j in (0..4).rev() {
q = q * z + PQ[j];
}
pq_result[i] = q;
}
(pp_result, pq_result)
}
#[inline]
fn j1_small_f64(x: f64) -> f64 {
crate::bessel::j1(x)
}
#[inline]
fn j1_small_f32(x: f32) -> f32 {
crate::bessel::j1(x as f64) as f32
}
fn j1_asymptotic_pq_f64(inv_x2: &ArrayView1<f64>) -> (Array1<f64>, Array1<f64>) {
const PP: [f64; 7] = [
7.62125616208173112003e-04,
7.31397056940849243622e-02,
1.12719608129684260397e+00,
5.11207951146461568876e+00,
8.42404590141772420928e+00,
5.21451598682361504063e+00,
1.00000000000000000254e+00,
];
const PQ: [f64; 7] = [
5.71050241285120902666e-04,
6.88948006001962595e-02,
1.10514232634061696926e+00,
5.07386386128601488557e+00,
8.39985554327604159757e+00,
5.20982848682361821619e+00,
9.99999999999999997461e-01,
];
let len = inv_x2.len();
let mut pp_result = Array1::zeros(len);
let mut pq_result = Array1::zeros(len);
let inv_x2_slice = inv_x2
.as_slice()
.expect("j1_asymptotic_pq_f64: inv_x2 must be contiguous");
for i in 0..len {
let z = inv_x2_slice[i];
let mut p = PP[6];
for j in (0..6).rev() {
p = p * z + PP[j];
}
pp_result[i] = p;
let mut q = PQ[6];
for j in (0..6).rev() {
q = q * z + PQ[j];
}
pq_result[i] = q;
}
(pp_result, pq_result)
}
fn j1_asymptotic_pq_f32(inv_x2: &ArrayView1<f32>) -> (Array1<f32>, Array1<f32>) {
let len = inv_x2.len();
let mut pp_result = Array1::zeros(len);
let mut pq_result = Array1::zeros(len);
const PP: [f32; 5] = [
7.62125616208173112003e-04,
7.31397056940849243622e-02,
1.12719608129684260397e+00,
5.11207951146461568876e+00,
1.00000000000000000254e+00,
];
const PQ: [f32; 5] = [
5.71050241285120902666e-04,
6.88948006001962595e-02,
1.10514232634061696926e+00,
5.07386386128601488557e+00,
9.99999999999999997461e-01,
];
let inv_x2_slice = inv_x2
.as_slice()
.expect("j1_asymptotic_pq_f32: inv_x2 must be contiguous");
for i in 0..len {
let z = inv_x2_slice[i];
let mut p = PP[4];
for j in (0..4).rev() {
p = p * z + PP[j];
}
pp_result[i] = p;
let mut q = PQ[4];
for j in (0..4).rev() {
q = q * z + PQ[j];
}
pq_result[i] = q;
}
(pp_result, pq_result)
}
#[inline]
fn y0_small_f64(x: f64) -> f64 {
crate::bessel::y0(x)
}
#[inline]
fn y0_small_f32(x: f32) -> f32 {
crate::bessel::y0(x as f64) as f32
}
#[inline]
fn y1_small_f64(x: f64) -> f64 {
crate::bessel::y1(x)
}
#[inline]
fn y1_small_f32(x: f32) -> f32 {
crate::bessel::y1(x as f64) as f32
}