use crate::error::{FFTError, FFTResult};
use std::f64::consts::PI;
use scirs2_core::simd_ops::{
simd_add_f32_ultra_vec, simd_cos_f32_ultra_vec, simd_div_f32_ultra_vec, simd_exp_f32_ultra_vec,
simd_fma_f32_ultra_vec, simd_mul_f32_ultra_vec, simd_pow_f32_ultra_vec, simd_sin_f32_ultra_vec,
simd_sub_f32_ultra_vec, PlatformCapabilities, SimdUnifiedOps,
};
#[allow(dead_code)]
pub fn fht(
a: &[f64],
dln: f64,
mu: f64,
offset: Option<f64>,
bias: Option<f64>,
) -> FFTResult<Vec<f64>> {
let n = a.len();
if n == 0 {
return Err(FFTError::ValueError(
"Input array cannot be empty".to_string(),
));
}
let offset = offset.unwrap_or(0.0);
let bias = bias.unwrap_or(0.0);
let coeffs = fht_coefficients(n, dln, mu, offset, bias)?;
let modified_input: Vec<f64> = a
.iter()
.zip(coeffs.iter())
.map(|(&ai, &ci)| ai * ci)
.collect();
let spectrum = crate::fft(&modified_input, None)?;
let result: Vec<f64> = spectrum.iter().map(|c| c.re).take(n).collect();
Ok(result)
}
#[allow(dead_code)]
pub fn ifht(
a: &[f64],
dln: f64,
mu: f64,
offset: Option<f64>,
bias: Option<f64>,
) -> FFTResult<Vec<f64>> {
let bias_inv = -bias.unwrap_or(0.0);
fht(a, dln, mu, offset, Some(bias_inv))
}
#[allow(dead_code)]
pub fn fhtoffset(_dln: f64, _mu: f64, initial: Option<f64>, bias: Option<f64>) -> FFTResult<f64> {
let bias = bias.unwrap_or(0.0);
let initial = initial.unwrap_or(0.0);
if bias == 0.0 {
Ok(0.0)
} else {
Ok(initial)
}
}
#[allow(dead_code)]
fn fht_coefficients(n: usize, dln: f64, mu: f64, offset: f64, bias: f64) -> FFTResult<Vec<f64>> {
let mut coeffs = vec![0.0; n];
for (i, coeff) in coeffs.iter_mut().enumerate() {
let m = i as f64 - n as f64 / 2.0;
let k = 2.0 * PI * m / (n as f64 * dln);
let basic_coeff = k.powf(mu) * (-(k * k) / 4.0).exp();
let biased_coeff = if bias != 0.0 {
basic_coeff * (1.0 + bias * k * k).powf(-bias / 2.0)
} else {
basic_coeff
};
let phase = offset * k;
*coeff = biased_coeff * phase.cos();
}
Ok(coeffs)
}
#[allow(dead_code)]
pub fn fht_sample_points(n: usize, dln: f64, offset: f64) -> Vec<f64> {
(0..n)
.map(|i| ((i as f64 - n as f64 / 2.0) * dln + offset).exp())
.collect()
}
#[allow(dead_code)]
pub fn fht_bandwidth_saturated_simd(
a: &[f64],
dln: f64,
mu: f64,
offset: Option<f64>,
bias: Option<f64>,
) -> FFTResult<Vec<f64>> {
use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
let n = a.len();
if n == 0 {
return Err(FFTError::ValueError(
"Input array cannot be empty".to_string(),
));
}
let offset = offset.unwrap_or(0.0);
let bias = bias.unwrap_or(0.0);
let caps = PlatformCapabilities::detect();
if n >= 64 && (caps.has_avx2() || caps.has_avx512()) {
fht_bandwidth_saturated_simd_impl(a, dln, mu, offset, bias)
} else {
fht(a, dln, mu, Some(offset), Some(bias))
}
}
#[allow(dead_code)]
fn fht_bandwidth_saturated_simd_impl(
a: &[f64],
dln: f64,
mu: f64,
offset: f64,
bias: f64,
) -> FFTResult<Vec<f64>> {
use scirs2_core::simd_ops::SimdUnifiedOps;
let n = a.len();
let coeffs = fht_coefficients_bandwidth_saturated_simd(n, dln, mu, offset, bias)?;
let modified_input = fht_multiply_bandwidth_saturated_simd(a, &coeffs)?;
let spectrum = crate::fft(&modified_input, None)?;
let result = fht_extract_real_bandwidth_saturated_simd(&spectrum, n)?;
Ok(result)
}
#[allow(dead_code)]
fn fht_coefficients_bandwidth_saturated_simd(
n: usize,
dln: f64,
mu: f64,
offset: f64,
bias: f64,
) -> FFTResult<Vec<f64>> {
use scirs2_core::simd_ops::SimdUnifiedOps;
let mut coeffs = vec![0.0; n];
let chunk_size = 8;
let dln_f32 = dln as f32;
let mu_f32 = mu as f32;
let offset_f32 = offset as f32;
let bias_f32 = bias as f32;
let n_f32 = n as f32;
let two_pi = (2.0 * PI) as f32;
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let mut indices = vec![0.0f32; chunk_size];
for (i, idx) in indices.iter_mut().enumerate() {
*idx = (chunk_start + i) as f32;
}
let mut m_values = vec![0.0f32; chunk_size];
let n_half = vec![n_f32 / 2.0; chunk_size];
simd_sub_f32_ultra_vec(&indices, &n_half, &mut m_values);
let mut k_values = vec![0.0f32; chunk_size];
let mut temp = vec![0.0f32; chunk_size];
let two_pi_vec = vec![two_pi; chunk_size];
let n_dln = vec![n_f32 * dln_f32; chunk_size];
simd_mul_f32_ultra_vec(&two_pi_vec, &m_values, &mut temp);
simd_div_f32_ultra_vec(&temp, &n_dln, &mut k_values);
let mut k_pow_mu = vec![0.0f32; chunk_size];
let mu_vec = vec![mu_f32; chunk_size];
simd_pow_f32_ultra_vec(&k_values, &mu_vec, &mut k_pow_mu);
let mut k_squared = vec![0.0f32; chunk_size];
simd_mul_f32_ultra_vec(&k_values, &k_values, &mut k_squared);
let mut neg_k_squared_quarter = vec![0.0f32; chunk_size];
let quarter_neg = vec![-0.25f32; chunk_size];
simd_mul_f32_ultra_vec(&k_squared, &quarter_neg, &mut neg_k_squared_quarter);
let mut exp_term = vec![0.0f32; chunk_size];
simd_exp_f32_ultra_vec(&neg_k_squared_quarter, &mut exp_term);
let mut basic_coeff = vec![0.0f32; chunk_size];
simd_mul_f32_ultra_vec(&k_pow_mu, &exp_term, &mut basic_coeff);
let mut biased_coeff = vec![0.0f32; chunk_size];
if bias != 0.0 {
let mut bias_k_squared = vec![0.0f32; chunk_size];
let bias_vec = vec![bias_f32; chunk_size];
simd_mul_f32_ultra_vec(&bias_vec, &k_squared, &mut bias_k_squared);
let mut one_plus_bias_k_sq = vec![0.0f32; chunk_size];
let ones = vec![1.0f32; chunk_size];
simd_add_f32_ultra_vec(&ones, &bias_k_squared, &mut one_plus_bias_k_sq);
let mut bias_correction = vec![0.0f32; chunk_size];
let neg_bias_half = vec![-bias_f32 / 2.0; chunk_size];
simd_pow_f32_ultra_vec(&one_plus_bias_k_sq, &neg_bias_half, &mut bias_correction);
simd_mul_f32_ultra_vec(&basic_coeff, &bias_correction, &mut biased_coeff);
} else {
biased_coeff.copy_from_slice(&basic_coeff);
}
let mut phase_terms = vec![0.0f32; chunk_size];
if offset != 0.0 {
let mut offset_k = vec![0.0f32; chunk_size];
let offset_vec = vec![offset_f32; chunk_size];
simd_mul_f32_ultra_vec(&offset_vec, &k_values, &mut offset_k);
simd_cos_f32_ultra_vec(&offset_k, &mut phase_terms);
} else {
phase_terms.fill(1.0);
}
let mut final_coeff = vec![0.0f32; chunk_size];
simd_mul_f32_ultra_vec(&biased_coeff, &phase_terms, &mut final_coeff);
for (i, &coeff) in final_coeff.iter().enumerate() {
coeffs[chunk_start + i] = coeff as f64;
}
} else {
for i in chunk_start..chunk_end {
let m = i as f64 - n as f64 / 2.0;
let k = 2.0 * PI * m / (n as f64 * dln);
let basic_coeff = k.powf(mu) * (-(k * k) / 4.0).exp();
let biased_coeff = if bias != 0.0 {
basic_coeff * (1.0 + bias * k * k).powf(-bias / 2.0)
} else {
basic_coeff
};
let phase = offset * k;
coeffs[i] = biased_coeff * phase.cos();
}
}
}
Ok(coeffs)
}
#[allow(dead_code)]
fn fht_multiply_bandwidth_saturated_simd(a: &[f64], coeffs: &[f64]) -> FFTResult<Vec<f64>> {
use scirs2_core::simd_ops::SimdUnifiedOps;
let n = a.len();
let mut result = vec![0.0; n];
let chunk_size = 8;
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let mut a_chunk: Vec<f32> = a[chunk_start..chunk_end]
.iter()
.map(|&x| x as f32)
.collect();
let mut coeffs_chunk: Vec<f32> = coeffs[chunk_start..chunk_end]
.iter()
.map(|&x| x as f32)
.collect();
let mut product_chunk = vec![0.0f32; chunk_size];
simd_mul_f32_ultra_vec(&a_chunk, &coeffs_chunk, &mut product_chunk);
for (i, &prod) in product_chunk.iter().enumerate() {
result[chunk_start + i] = prod as f64;
}
} else {
for i in chunk_start..chunk_end {
result[i] = a[i] * coeffs[i];
}
}
}
Ok(result)
}
#[allow(dead_code)]
fn fht_extract_real_bandwidth_saturated_simd(
spectrum: &[scirs2_core::numeric::Complex64],
n: usize,
) -> FFTResult<Vec<f64>> {
use scirs2_core::simd_ops::SimdUnifiedOps;
let mut result = vec![0.0; n];
let chunk_size = 8;
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let mut real_parts = vec![0.0f32; chunk_size];
for (i, &complex_val) in spectrum[chunk_start..chunk_end].iter().enumerate() {
real_parts[i] = complex_val.re as f32;
}
for (i, &real_val) in real_parts.iter().enumerate() {
result[chunk_start + i] = real_val as f64;
}
} else {
for i in chunk_start..chunk_end {
result[i] = spectrum[i].re;
}
}
}
Ok(result)
}
#[allow(dead_code)]
pub fn ifht_bandwidth_saturated_simd(
a: &[f64],
dln: f64,
mu: f64,
offset: Option<f64>,
bias: Option<f64>,
) -> FFTResult<Vec<f64>> {
let bias_inv = -bias.unwrap_or(0.0);
fht_bandwidth_saturated_simd(a, dln, mu, offset, Some(bias_inv))
}
#[allow(dead_code)]
pub fn fht_sample_points_bandwidth_saturated_simd(n: usize, dln: f64, offset: f64) -> Vec<f64> {
use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
let caps = PlatformCapabilities::detect();
if n >= 32 && (caps.has_avx2() || caps.has_avx512()) {
fht_sample_points_simd_impl(n, dln, offset)
} else {
fht_sample_points(n, dln, offset)
}
}
#[allow(dead_code)]
fn fht_sample_points_simd_impl(n: usize, dln: f64, offset: f64) -> Vec<f64> {
use scirs2_core::simd_ops::SimdUnifiedOps;
let mut points = vec![0.0; n];
let chunk_size = 8;
let dln_f32 = dln as f32;
let offset_f32 = offset as f32;
let n_f32 = n as f32;
for chunk_start in (0..n).step_by(chunk_size) {
let chunk_end = (chunk_start + chunk_size).min(n);
let chunk_len = chunk_end - chunk_start;
if chunk_len == chunk_size {
let mut indices = vec![0.0f32; chunk_size];
for (i, idx) in indices.iter_mut().enumerate() {
*idx = (chunk_start + i) as f32;
}
let mut arguments = vec![0.0f32; chunk_size];
let n_half = vec![n_f32 / 2.0; chunk_size];
let dln_vec = vec![dln_f32; chunk_size];
let offset_vec = vec![offset_f32; chunk_size];
let mut i_minus_n_half = vec![0.0f32; chunk_size];
simd_sub_f32_ultra_vec(&indices, &n_half, &mut i_minus_n_half);
let mut temp = vec![0.0f32; chunk_size];
simd_mul_f32_ultra_vec(&i_minus_n_half, &dln_vec, &mut temp);
simd_add_f32_ultra_vec(&temp, &offset_vec, &mut arguments);
let mut exp_values = vec![0.0f32; chunk_size];
simd_exp_f32_ultra_vec(&arguments, &mut exp_values);
for (i, &exp_val) in exp_values.iter().enumerate() {
points[chunk_start + i] = exp_val as f64;
}
} else {
for i in chunk_start..chunk_end {
let arg = (i as f64 - n as f64 / 2.0) * dln + offset;
points[i] = arg.exp();
}
}
}
points
}
#[allow(dead_code)]
pub fn fft_log_bandwidth_saturated_simd(
input: &[f64],
dln: f64,
mu: f64,
offset: Option<f64>,
bias: Option<f64>,
k_opt: Option<f64>,
) -> FFTResult<(Vec<f64>, Vec<f64>)> {
use scirs2_core::simd_ops::PlatformCapabilities;
let n = input.len();
let caps = PlatformCapabilities::detect();
if n >= 128 && (caps.has_avx2() || caps.has_avx512()) {
let offset = offset.unwrap_or(0.0);
let k_opt = k_opt.unwrap_or(1.0);
let output = fht_bandwidth_saturated_simd(input, dln, mu, Some(offset), bias)?;
let k_points =
fht_sample_points_bandwidth_saturated_simd(n, 2.0 * PI / (n as f64 * dln), -offset);
Ok((output, k_points))
} else {
let offset = offset.unwrap_or(0.0);
let output = fht(input, dln, mu, Some(offset), bias)?;
let k_points = fht_sample_points(n, 2.0 * PI / (n as f64 * dln), -offset);
Ok((output, k_points))
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
#[test]
fn test_fht_basic() {
let n = 64;
let dln = 0.1;
let mu = 0.0;
let x: Vec<f64> = (0..n)
.map(|i| ((i as f64 - n as f64 / 2.0) * dln).exp())
.collect();
let y = fht(&x, dln, mu, None, None).expect("Operation failed");
assert_eq!(y.len(), n);
let x_recovered = ifht(&y, dln, mu, None, None).expect("Operation failed");
assert_eq!(x_recovered.len(), n);
}
#[test]
fn test_fhtoffset() {
let dln = 0.1;
let mu = 0.5;
let offset1 = fhtoffset(dln, mu, None, Some(0.0)).expect("Operation failed");
assert_relative_eq!(offset1, 0.0, epsilon = 1e-10);
let offset2 = fhtoffset(dln, mu, Some(0.5), Some(1.0)).expect("Operation failed");
assert_relative_eq!(offset2, 0.5, epsilon = 1e-10);
}
#[test]
fn test_sample_points() {
let n = 8;
let dln = 0.5;
let offset = 1.0;
let points = fht_sample_points(n, dln, offset);
assert_eq!(points.len(), n);
for i in 1..n {
let ratio = points[i] / points[i - 1];
assert_relative_eq!(ratio.ln(), dln, epsilon = 1e-10);
}
}
}