1use crate::error::{FFTError, FFTResult};
7use std::f64::consts::PI;
8
9use scirs2_core::simd_ops::{
11 simd_add_f32_ultra_vec, simd_cos_f32_ultra_vec, simd_div_f32_ultra_vec, simd_exp_f32_ultra_vec,
12 simd_fma_f32_ultra_vec, simd_mul_f32_ultra_vec, simd_pow_f32_ultra_vec, simd_sin_f32_ultra_vec,
13 simd_sub_f32_ultra_vec, PlatformCapabilities, SimdUnifiedOps,
14};
15
16#[allow(dead_code)]
33pub fn fht(
34 a: &[f64],
35 dln: f64,
36 mu: f64,
37 offset: Option<f64>,
38 bias: Option<f64>,
39) -> FFTResult<Vec<f64>> {
40 let n = a.len();
41 if n == 0 {
42 return Err(FFTError::ValueError(
43 "Input array cannot be empty".to_string(),
44 ));
45 }
46
47 let offset = offset.unwrap_or(0.0);
48 let bias = bias.unwrap_or(0.0);
49
50 let coeffs = fht_coefficients(n, dln, mu, offset, bias)?;
52
53 let modified_input: Vec<f64> = a
55 .iter()
56 .zip(coeffs.iter())
57 .map(|(&ai, &ci)| ai * ci)
58 .collect();
59
60 let spectrum = crate::fft(&modified_input, None)?;
62
63 let result: Vec<f64> = spectrum.iter().map(|c| c.re).take(n).collect();
65
66 Ok(result)
67}
68
69#[allow(dead_code)]
86pub fn ifht(
87 a: &[f64],
88 dln: f64,
89 mu: f64,
90 offset: Option<f64>,
91 bias: Option<f64>,
92) -> FFTResult<Vec<f64>> {
93 let bias_inv = -bias.unwrap_or(0.0);
95 fht(a, dln, mu, offset, Some(bias_inv))
96}
97
98#[allow(dead_code)]
114pub fn fhtoffset(_dln: f64, _mu: f64, initial: Option<f64>, bias: Option<f64>) -> FFTResult<f64> {
115 let bias = bias.unwrap_or(0.0);
116 let initial = initial.unwrap_or(0.0);
117
118 if bias == 0.0 {
120 Ok(0.0)
121 } else {
122 Ok(initial)
125 }
126}
127
128#[allow(dead_code)]
130fn fht_coefficients(n: usize, dln: f64, mu: f64, offset: f64, bias: f64) -> FFTResult<Vec<f64>> {
131 let mut coeffs = vec![0.0; n];
132
133 for (i, coeff) in coeffs.iter_mut().enumerate() {
135 let m = i as f64 - n as f64 / 2.0;
136 let k = 2.0 * PI * m / (n as f64 * dln);
137
138 let basic_coeff = k.powf(mu) * (-(k * k) / 4.0).exp();
140
141 let biased_coeff = if bias != 0.0 {
143 basic_coeff * (1.0 + bias * k * k).powf(-bias / 2.0)
144 } else {
145 basic_coeff
146 };
147
148 let phase = offset * k;
150 *coeff = biased_coeff * phase.cos();
151 }
152
153 Ok(coeffs)
154}
155
156#[allow(dead_code)]
171pub fn fht_sample_points(n: usize, dln: f64, offset: f64) -> Vec<f64> {
172 (0..n)
173 .map(|i| ((i as f64 - n as f64 / 2.0) * dln + offset).exp())
174 .collect()
175}
176
177#[cfg(test)]
178mod tests {
179 use super::*;
180 use approx::assert_relative_eq;
181
182 #[test]
183 fn test_fht_basic() {
184 let n = 64;
185 let dln = 0.1;
186 let mu = 0.0;
187
188 let x: Vec<f64> = (0..n)
190 .map(|i| ((i as f64 - n as f64 / 2.0) * dln).exp())
191 .collect();
192
193 let y = fht(&x, dln, mu, None, None).unwrap();
195 assert_eq!(y.len(), n);
196
197 let x_recovered = ifht(&y, dln, mu, None, None).unwrap();
199 assert_eq!(x_recovered.len(), n);
200 }
201
202 #[test]
203 fn test_fhtoffset() {
204 let dln = 0.1;
205 let mu = 0.5;
206
207 let offset1 = fhtoffset(dln, mu, None, Some(0.0)).unwrap();
209 assert_relative_eq!(offset1, 0.0, epsilon = 1e-10);
210
211 let offset2 = fhtoffset(dln, mu, Some(0.5), Some(1.0)).unwrap();
213 assert_relative_eq!(offset2, 0.5, epsilon = 1e-10);
214 }
215
216 #[test]
217 fn test_sample_points() {
218 let n = 8;
219 let dln = 0.5;
220 let offset = 1.0;
221
222 let points = fht_sample_points(n, dln, offset);
223 assert_eq!(points.len(), n);
224
225 for i in 1..n {
227 let ratio = points[i] / points[i - 1];
228 assert_relative_eq!(ratio.ln(), dln, epsilon = 1e-10);
229 }
230 }
231}
232
233#[allow(dead_code)]
256pub fn fht_bandwidth_saturated_simd(
257 a: &[f64],
258 dln: f64,
259 mu: f64,
260 offset: Option<f64>,
261 bias: Option<f64>,
262) -> FFTResult<Vec<f64>> {
263 use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
264
265 let n = a.len();
266 if n == 0 {
267 return Err(FFTError::ValueError(
268 "Input array cannot be empty".to_string(),
269 ));
270 }
271
272 let offset = offset.unwrap_or(0.0);
273 let bias = bias.unwrap_or(0.0);
274
275 let caps = PlatformCapabilities::detect();
277
278 if n >= 64 && (caps.has_avx2() || caps.has_avx512()) {
280 fht_bandwidth_saturated_simd_impl(a, dln, mu, offset, bias)
281 } else {
282 fht(a, dln, mu, Some(offset), Some(bias))
284 }
285}
286
287#[allow(dead_code)]
289fn fht_bandwidth_saturated_simd_impl(
290 a: &[f64],
291 dln: f64,
292 mu: f64,
293 offset: f64,
294 bias: f64,
295) -> FFTResult<Vec<f64>> {
296 use scirs2_core::simd_ops::SimdUnifiedOps;
297
298 let n = a.len();
299
300 let coeffs = fht_coefficients_bandwidth_saturated_simd(n, dln, mu, offset, bias)?;
302
303 let modified_input = fht_multiply_bandwidth_saturated_simd(a, &coeffs)?;
305
306 let spectrum = crate::fft(&modified_input, None)?;
308
309 let result = fht_extract_real_bandwidth_saturated_simd(&spectrum, n)?;
311
312 Ok(result)
313}
314
315#[allow(dead_code)]
317fn fht_coefficients_bandwidth_saturated_simd(
318 n: usize,
319 dln: f64,
320 mu: f64,
321 offset: f64,
322 bias: f64,
323) -> FFTResult<Vec<f64>> {
324 use scirs2_core::simd_ops::SimdUnifiedOps;
325
326 let mut coeffs = vec![0.0; n];
327 let chunk_size = 8; let dln_f32 = dln as f32;
331 let mu_f32 = mu as f32;
332 let offset_f32 = offset as f32;
333 let bias_f32 = bias as f32;
334 let n_f32 = n as f32;
335 let two_pi = (2.0 * PI) as f32;
336
337 for chunk_start in (0..n).step_by(chunk_size) {
338 let chunk_end = (chunk_start + chunk_size).min(n);
339 let chunk_len = chunk_end - chunk_start;
340
341 if chunk_len == chunk_size {
342 let mut indices = vec![0.0f32; chunk_size];
344 for (i, idx) in indices.iter_mut().enumerate() {
345 *idx = (chunk_start + i) as f32;
346 }
347
348 let mut m_values = vec![0.0f32; chunk_size];
350 let n_half = vec![n_f32 / 2.0; chunk_size];
351 simd_sub_f32_ultra_vec(&indices, &n_half, &mut m_values);
352
353 let mut k_values = vec![0.0f32; chunk_size];
355 let mut temp = vec![0.0f32; chunk_size];
356 let two_pi_vec = vec![two_pi; chunk_size];
357 let n_dln = vec![n_f32 * dln_f32; chunk_size];
358
359 simd_mul_f32_ultra_vec(&two_pi_vec, &m_values, &mut temp);
360 simd_div_f32_ultra_vec(&temp, &n_dln, &mut k_values);
361
362 let mut k_pow_mu = vec![0.0f32; chunk_size];
364 let mu_vec = vec![mu_f32; chunk_size];
365 simd_pow_f32_ultra_vec(&k_values, &mu_vec, &mut k_pow_mu);
366
367 let mut k_squared = vec![0.0f32; chunk_size];
369 simd_mul_f32_ultra_vec(&k_values, &k_values, &mut k_squared);
370
371 let mut neg_k_squared_quarter = vec![0.0f32; chunk_size];
372 let quarter_neg = vec![-0.25f32; chunk_size];
373 simd_mul_f32_ultra_vec(&k_squared, &quarter_neg, &mut neg_k_squared_quarter);
374
375 let mut exp_term = vec![0.0f32; chunk_size];
376 simd_exp_f32_ultra_vec(&neg_k_squared_quarter, &mut exp_term);
377
378 let mut basic_coeff = vec![0.0f32; chunk_size];
380 simd_mul_f32_ultra_vec(&k_pow_mu, &exp_term, &mut basic_coeff);
381
382 let mut biased_coeff = vec![0.0f32; chunk_size];
384 if bias != 0.0 {
385 let mut bias_k_squared = vec![0.0f32; chunk_size];
387 let bias_vec = vec![bias_f32; chunk_size];
388 simd_mul_f32_ultra_vec(&bias_vec, &k_squared, &mut bias_k_squared);
389
390 let mut one_plus_bias_k_sq = vec![0.0f32; chunk_size];
391 let ones = vec![1.0f32; chunk_size];
392 simd_add_f32_ultra_vec(&ones, &bias_k_squared, &mut one_plus_bias_k_sq);
393
394 let mut bias_correction = vec![0.0f32; chunk_size];
395 let neg_bias_half = vec![-bias_f32 / 2.0; chunk_size];
396 simd_pow_f32_ultra_vec(&one_plus_bias_k_sq, &neg_bias_half, &mut bias_correction);
397
398 simd_mul_f32_ultra_vec(&basic_coeff, &bias_correction, &mut biased_coeff);
399 } else {
400 biased_coeff.copy_from_slice(&basic_coeff);
401 }
402
403 let mut phase_terms = vec![0.0f32; chunk_size];
405 if offset != 0.0 {
406 let mut offset_k = vec![0.0f32; chunk_size];
407 let offset_vec = vec![offset_f32; chunk_size];
408 simd_mul_f32_ultra_vec(&offset_vec, &k_values, &mut offset_k);
409
410 simd_cos_f32_ultra_vec(&offset_k, &mut phase_terms);
411 } else {
412 phase_terms.fill(1.0);
413 }
414
415 let mut final_coeff = vec![0.0f32; chunk_size];
417 simd_mul_f32_ultra_vec(&biased_coeff, &phase_terms, &mut final_coeff);
418
419 for (i, &coeff) in final_coeff.iter().enumerate() {
421 coeffs[chunk_start + i] = coeff as f64;
422 }
423 } else {
424 for i in chunk_start..chunk_end {
426 let m = i as f64 - n as f64 / 2.0;
427 let k = 2.0 * PI * m / (n as f64 * dln);
428
429 let basic_coeff = k.powf(mu) * (-(k * k) / 4.0).exp();
430
431 let biased_coeff = if bias != 0.0 {
432 basic_coeff * (1.0 + bias * k * k).powf(-bias / 2.0)
433 } else {
434 basic_coeff
435 };
436
437 let phase = offset * k;
438 coeffs[i] = biased_coeff * phase.cos();
439 }
440 }
441 }
442
443 Ok(coeffs)
444}
445
446#[allow(dead_code)]
448fn fht_multiply_bandwidth_saturated_simd(a: &[f64], coeffs: &[f64]) -> FFTResult<Vec<f64>> {
449 use scirs2_core::simd_ops::SimdUnifiedOps;
450
451 let n = a.len();
452 let mut result = vec![0.0; n];
453 let chunk_size = 8;
454
455 for chunk_start in (0..n).step_by(chunk_size) {
456 let chunk_end = (chunk_start + chunk_size).min(n);
457 let chunk_len = chunk_end - chunk_start;
458
459 if chunk_len == chunk_size {
460 let mut a_chunk: Vec<f32> = a[chunk_start..chunk_end]
462 .iter()
463 .map(|&x| x as f32)
464 .collect();
465 let mut coeffs_chunk: Vec<f32> = coeffs[chunk_start..chunk_end]
466 .iter()
467 .map(|&x| x as f32)
468 .collect();
469
470 let mut product_chunk = vec![0.0f32; chunk_size];
472 simd_mul_f32_ultra_vec(&a_chunk, &coeffs_chunk, &mut product_chunk);
473
474 for (i, &prod) in product_chunk.iter().enumerate() {
476 result[chunk_start + i] = prod as f64;
477 }
478 } else {
479 for i in chunk_start..chunk_end {
481 result[i] = a[i] * coeffs[i];
482 }
483 }
484 }
485
486 Ok(result)
487}
488
489#[allow(dead_code)]
491fn fht_extract_real_bandwidth_saturated_simd(
492 spectrum: &[num_complex::Complex64],
493 n: usize,
494) -> FFTResult<Vec<f64>> {
495 use scirs2_core::simd_ops::SimdUnifiedOps;
496
497 let mut result = vec![0.0; n];
498 let chunk_size = 8;
499
500 for chunk_start in (0..n).step_by(chunk_size) {
501 let chunk_end = (chunk_start + chunk_size).min(n);
502 let chunk_len = chunk_end - chunk_start;
503
504 if chunk_len == chunk_size {
505 let mut real_parts = vec![0.0f32; chunk_size];
507 for (i, &complex_val) in spectrum[chunk_start..chunk_end].iter().enumerate() {
508 real_parts[i] = complex_val.re as f32;
509 }
510
511 for (i, &real_val) in real_parts.iter().enumerate() {
513 result[chunk_start + i] = real_val as f64;
514 }
515 } else {
516 for i in chunk_start..chunk_end {
518 result[i] = spectrum[i].re;
519 }
520 }
521 }
522
523 Ok(result)
524}
525
526#[allow(dead_code)]
528pub fn ifht_bandwidth_saturated_simd(
529 a: &[f64],
530 dln: f64,
531 mu: f64,
532 offset: Option<f64>,
533 bias: Option<f64>,
534) -> FFTResult<Vec<f64>> {
535 let bias_inv = -bias.unwrap_or(0.0);
537 fht_bandwidth_saturated_simd(a, dln, mu, offset, Some(bias_inv))
538}
539
540#[allow(dead_code)]
544pub fn fht_sample_points_bandwidth_saturated_simd(n: usize, dln: f64, offset: f64) -> Vec<f64> {
545 use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
546
547 let caps = PlatformCapabilities::detect();
548
549 if n >= 32 && (caps.has_avx2() || caps.has_avx512()) {
551 fht_sample_points_simd_impl(n, dln, offset)
552 } else {
553 fht_sample_points(n, dln, offset)
555 }
556}
557
558#[allow(dead_code)]
560fn fht_sample_points_simd_impl(n: usize, dln: f64, offset: f64) -> Vec<f64> {
561 use scirs2_core::simd_ops::SimdUnifiedOps;
562
563 let mut points = vec![0.0; n];
564 let chunk_size = 8;
565
566 let dln_f32 = dln as f32;
568 let offset_f32 = offset as f32;
569 let n_f32 = n as f32;
570
571 for chunk_start in (0..n).step_by(chunk_size) {
572 let chunk_end = (chunk_start + chunk_size).min(n);
573 let chunk_len = chunk_end - chunk_start;
574
575 if chunk_len == chunk_size {
576 let mut indices = vec![0.0f32; chunk_size];
578 for (i, idx) in indices.iter_mut().enumerate() {
579 *idx = (chunk_start + i) as f32;
580 }
581
582 let mut arguments = vec![0.0f32; chunk_size];
584 let n_half = vec![n_f32 / 2.0; chunk_size];
585 let dln_vec = vec![dln_f32; chunk_size];
586 let offset_vec = vec![offset_f32; chunk_size];
587
588 let mut i_minus_n_half = vec![0.0f32; chunk_size];
590 simd_sub_f32_ultra_vec(&indices, &n_half, &mut i_minus_n_half);
591
592 let mut temp = vec![0.0f32; chunk_size];
594 simd_mul_f32_ultra_vec(&i_minus_n_half, &dln_vec, &mut temp);
595
596 simd_add_f32_ultra_vec(&temp, &offset_vec, &mut arguments);
598
599 let mut exp_values = vec![0.0f32; chunk_size];
601 simd_exp_f32_ultra_vec(&arguments, &mut exp_values);
602
603 for (i, &exp_val) in exp_values.iter().enumerate() {
605 points[chunk_start + i] = exp_val as f64;
606 }
607 } else {
608 for i in chunk_start..chunk_end {
610 let arg = (i as f64 - n as f64 / 2.0) * dln + offset;
611 points[i] = arg.exp();
612 }
613 }
614 }
615
616 points
617}
618
619#[allow(dead_code)]
624pub fn fft_log_bandwidth_saturated_simd(
625 input: &[f64],
626 dln: f64,
627 mu: f64,
628 offset: Option<f64>,
629 bias: Option<f64>,
630 k_opt: Option<f64>,
631) -> FFTResult<(Vec<f64>, Vec<f64>)> {
632 use scirs2_core::simd_ops::PlatformCapabilities;
633
634 let n = input.len();
635 let caps = PlatformCapabilities::detect();
636
637 if n >= 128 && (caps.has_avx2() || caps.has_avx512()) {
639 let offset = offset.unwrap_or(0.0);
640 let k_opt = k_opt.unwrap_or(1.0);
641
642 let output = fht_bandwidth_saturated_simd(input, dln, mu, Some(offset), bias)?;
644
645 let k_points =
647 fht_sample_points_bandwidth_saturated_simd(n, 2.0 * PI / (n as f64 * dln), -offset);
648
649 Ok((output, k_points))
650 } else {
651 let offset = offset.unwrap_or(0.0);
653 let output = fht(input, dln, mu, Some(offset), bias)?;
654 let k_points = fht_sample_points(n, 2.0 * PI / (n as f64 * dln), -offset);
655
656 Ok((output, k_points))
657 }
658}