1use crate::error::{FFTError, FFTResult};
7use std::f64::consts::PI;
8
9use scirs2_core::simd_ops::{
11 simd_add_f32_ultra_vec, simd_cos_f32_ultra_vec, simd_div_f32_ultra_vec, simd_exp_f32_ultra_vec,
12 simd_fma_f32_ultra_vec, simd_mul_f32_ultra_vec, simd_pow_f32_ultra_vec, simd_sin_f32_ultra_vec,
13 simd_sub_f32_ultra_vec, PlatformCapabilities, SimdUnifiedOps,
14};
15
16#[allow(dead_code)]
33pub fn fht(
34 a: &[f64],
35 dln: f64,
36 mu: f64,
37 offset: Option<f64>,
38 bias: Option<f64>,
39) -> FFTResult<Vec<f64>> {
40 let n = a.len();
41 if n == 0 {
42 return Err(FFTError::ValueError(
43 "Input array cannot be empty".to_string(),
44 ));
45 }
46
47 let offset = offset.unwrap_or(0.0);
48 let bias = bias.unwrap_or(0.0);
49
50 let coeffs = fht_coefficients(n, dln, mu, offset, bias)?;
52
53 let modified_input: Vec<f64> = a
55 .iter()
56 .zip(coeffs.iter())
57 .map(|(&ai, &ci)| ai * ci)
58 .collect();
59
60 let spectrum = crate::fft(&modified_input, None)?;
62
63 let result: Vec<f64> = spectrum.iter().map(|c| c.re).take(n).collect();
65
66 Ok(result)
67}
68
69#[allow(dead_code)]
86pub fn ifht(
87 a: &[f64],
88 dln: f64,
89 mu: f64,
90 offset: Option<f64>,
91 bias: Option<f64>,
92) -> FFTResult<Vec<f64>> {
93 let bias_inv = -bias.unwrap_or(0.0);
95 fht(a, dln, mu, offset, Some(bias_inv))
96}
97
98#[allow(dead_code)]
114pub fn fhtoffset(_dln: f64, _mu: f64, initial: Option<f64>, bias: Option<f64>) -> FFTResult<f64> {
115 let bias = bias.unwrap_or(0.0);
116 let initial = initial.unwrap_or(0.0);
117
118 if bias == 0.0 {
120 Ok(0.0)
121 } else {
122 Ok(initial)
125 }
126}
127
128#[allow(dead_code)]
130fn fht_coefficients(n: usize, dln: f64, mu: f64, offset: f64, bias: f64) -> FFTResult<Vec<f64>> {
131 let mut coeffs = vec![0.0; n];
132
133 for (i, coeff) in coeffs.iter_mut().enumerate() {
135 let m = i as f64 - n as f64 / 2.0;
136 let k = 2.0 * PI * m / (n as f64 * dln);
137
138 let basic_coeff = k.powf(mu) * (-(k * k) / 4.0).exp();
140
141 let biased_coeff = if bias != 0.0 {
143 basic_coeff * (1.0 + bias * k * k).powf(-bias / 2.0)
144 } else {
145 basic_coeff
146 };
147
148 let phase = offset * k;
150 *coeff = biased_coeff * phase.cos();
151 }
152
153 Ok(coeffs)
154}
155
156#[allow(dead_code)]
171pub fn fht_sample_points(n: usize, dln: f64, offset: f64) -> Vec<f64> {
172 (0..n)
173 .map(|i| ((i as f64 - n as f64 / 2.0) * dln + offset).exp())
174 .collect()
175}
176
177#[allow(dead_code)]
200pub fn fht_bandwidth_saturated_simd(
201 a: &[f64],
202 dln: f64,
203 mu: f64,
204 offset: Option<f64>,
205 bias: Option<f64>,
206) -> FFTResult<Vec<f64>> {
207 use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
208
209 let n = a.len();
210 if n == 0 {
211 return Err(FFTError::ValueError(
212 "Input array cannot be empty".to_string(),
213 ));
214 }
215
216 let offset = offset.unwrap_or(0.0);
217 let bias = bias.unwrap_or(0.0);
218
219 let caps = PlatformCapabilities::detect();
221
222 if n >= 64 && (caps.has_avx2() || caps.has_avx512()) {
224 fht_bandwidth_saturated_simd_impl(a, dln, mu, offset, bias)
225 } else {
226 fht(a, dln, mu, Some(offset), Some(bias))
228 }
229}
230
231#[allow(dead_code)]
233fn fht_bandwidth_saturated_simd_impl(
234 a: &[f64],
235 dln: f64,
236 mu: f64,
237 offset: f64,
238 bias: f64,
239) -> FFTResult<Vec<f64>> {
240 use scirs2_core::simd_ops::SimdUnifiedOps;
241
242 let n = a.len();
243
244 let coeffs = fht_coefficients_bandwidth_saturated_simd(n, dln, mu, offset, bias)?;
246
247 let modified_input = fht_multiply_bandwidth_saturated_simd(a, &coeffs)?;
249
250 let spectrum = crate::fft(&modified_input, None)?;
252
253 let result = fht_extract_real_bandwidth_saturated_simd(&spectrum, n)?;
255
256 Ok(result)
257}
258
259#[allow(dead_code)]
261fn fht_coefficients_bandwidth_saturated_simd(
262 n: usize,
263 dln: f64,
264 mu: f64,
265 offset: f64,
266 bias: f64,
267) -> FFTResult<Vec<f64>> {
268 use scirs2_core::simd_ops::SimdUnifiedOps;
269
270 let mut coeffs = vec![0.0; n];
271 let chunk_size = 8; let dln_f32 = dln as f32;
275 let mu_f32 = mu as f32;
276 let offset_f32 = offset as f32;
277 let bias_f32 = bias as f32;
278 let n_f32 = n as f32;
279 let two_pi = (2.0 * PI) as f32;
280
281 for chunk_start in (0..n).step_by(chunk_size) {
282 let chunk_end = (chunk_start + chunk_size).min(n);
283 let chunk_len = chunk_end - chunk_start;
284
285 if chunk_len == chunk_size {
286 let mut indices = vec![0.0f32; chunk_size];
288 for (i, idx) in indices.iter_mut().enumerate() {
289 *idx = (chunk_start + i) as f32;
290 }
291
292 let mut m_values = vec![0.0f32; chunk_size];
294 let n_half = vec![n_f32 / 2.0; chunk_size];
295 simd_sub_f32_ultra_vec(&indices, &n_half, &mut m_values);
296
297 let mut k_values = vec![0.0f32; chunk_size];
299 let mut temp = vec![0.0f32; chunk_size];
300 let two_pi_vec = vec![two_pi; chunk_size];
301 let n_dln = vec![n_f32 * dln_f32; chunk_size];
302
303 simd_mul_f32_ultra_vec(&two_pi_vec, &m_values, &mut temp);
304 simd_div_f32_ultra_vec(&temp, &n_dln, &mut k_values);
305
306 let mut k_pow_mu = vec![0.0f32; chunk_size];
308 let mu_vec = vec![mu_f32; chunk_size];
309 simd_pow_f32_ultra_vec(&k_values, &mu_vec, &mut k_pow_mu);
310
311 let mut k_squared = vec![0.0f32; chunk_size];
313 simd_mul_f32_ultra_vec(&k_values, &k_values, &mut k_squared);
314
315 let mut neg_k_squared_quarter = vec![0.0f32; chunk_size];
316 let quarter_neg = vec![-0.25f32; chunk_size];
317 simd_mul_f32_ultra_vec(&k_squared, &quarter_neg, &mut neg_k_squared_quarter);
318
319 let mut exp_term = vec![0.0f32; chunk_size];
320 simd_exp_f32_ultra_vec(&neg_k_squared_quarter, &mut exp_term);
321
322 let mut basic_coeff = vec![0.0f32; chunk_size];
324 simd_mul_f32_ultra_vec(&k_pow_mu, &exp_term, &mut basic_coeff);
325
326 let mut biased_coeff = vec![0.0f32; chunk_size];
328 if bias != 0.0 {
329 let mut bias_k_squared = vec![0.0f32; chunk_size];
331 let bias_vec = vec![bias_f32; chunk_size];
332 simd_mul_f32_ultra_vec(&bias_vec, &k_squared, &mut bias_k_squared);
333
334 let mut one_plus_bias_k_sq = vec![0.0f32; chunk_size];
335 let ones = vec![1.0f32; chunk_size];
336 simd_add_f32_ultra_vec(&ones, &bias_k_squared, &mut one_plus_bias_k_sq);
337
338 let mut bias_correction = vec![0.0f32; chunk_size];
339 let neg_bias_half = vec![-bias_f32 / 2.0; chunk_size];
340 simd_pow_f32_ultra_vec(&one_plus_bias_k_sq, &neg_bias_half, &mut bias_correction);
341
342 simd_mul_f32_ultra_vec(&basic_coeff, &bias_correction, &mut biased_coeff);
343 } else {
344 biased_coeff.copy_from_slice(&basic_coeff);
345 }
346
347 let mut phase_terms = vec![0.0f32; chunk_size];
349 if offset != 0.0 {
350 let mut offset_k = vec![0.0f32; chunk_size];
351 let offset_vec = vec![offset_f32; chunk_size];
352 simd_mul_f32_ultra_vec(&offset_vec, &k_values, &mut offset_k);
353
354 simd_cos_f32_ultra_vec(&offset_k, &mut phase_terms);
355 } else {
356 phase_terms.fill(1.0);
357 }
358
359 let mut final_coeff = vec![0.0f32; chunk_size];
361 simd_mul_f32_ultra_vec(&biased_coeff, &phase_terms, &mut final_coeff);
362
363 for (i, &coeff) in final_coeff.iter().enumerate() {
365 coeffs[chunk_start + i] = coeff as f64;
366 }
367 } else {
368 for i in chunk_start..chunk_end {
370 let m = i as f64 - n as f64 / 2.0;
371 let k = 2.0 * PI * m / (n as f64 * dln);
372
373 let basic_coeff = k.powf(mu) * (-(k * k) / 4.0).exp();
374
375 let biased_coeff = if bias != 0.0 {
376 basic_coeff * (1.0 + bias * k * k).powf(-bias / 2.0)
377 } else {
378 basic_coeff
379 };
380
381 let phase = offset * k;
382 coeffs[i] = biased_coeff * phase.cos();
383 }
384 }
385 }
386
387 Ok(coeffs)
388}
389
390#[allow(dead_code)]
392fn fht_multiply_bandwidth_saturated_simd(a: &[f64], coeffs: &[f64]) -> FFTResult<Vec<f64>> {
393 use scirs2_core::simd_ops::SimdUnifiedOps;
394
395 let n = a.len();
396 let mut result = vec![0.0; n];
397 let chunk_size = 8;
398
399 for chunk_start in (0..n).step_by(chunk_size) {
400 let chunk_end = (chunk_start + chunk_size).min(n);
401 let chunk_len = chunk_end - chunk_start;
402
403 if chunk_len == chunk_size {
404 let mut a_chunk: Vec<f32> = a[chunk_start..chunk_end]
406 .iter()
407 .map(|&x| x as f32)
408 .collect();
409 let mut coeffs_chunk: Vec<f32> = coeffs[chunk_start..chunk_end]
410 .iter()
411 .map(|&x| x as f32)
412 .collect();
413
414 let mut product_chunk = vec![0.0f32; chunk_size];
416 simd_mul_f32_ultra_vec(&a_chunk, &coeffs_chunk, &mut product_chunk);
417
418 for (i, &prod) in product_chunk.iter().enumerate() {
420 result[chunk_start + i] = prod as f64;
421 }
422 } else {
423 for i in chunk_start..chunk_end {
425 result[i] = a[i] * coeffs[i];
426 }
427 }
428 }
429
430 Ok(result)
431}
432
433#[allow(dead_code)]
435fn fht_extract_real_bandwidth_saturated_simd(
436 spectrum: &[scirs2_core::numeric::Complex64],
437 n: usize,
438) -> FFTResult<Vec<f64>> {
439 use scirs2_core::simd_ops::SimdUnifiedOps;
440
441 let mut result = vec![0.0; n];
442 let chunk_size = 8;
443
444 for chunk_start in (0..n).step_by(chunk_size) {
445 let chunk_end = (chunk_start + chunk_size).min(n);
446 let chunk_len = chunk_end - chunk_start;
447
448 if chunk_len == chunk_size {
449 let mut real_parts = vec![0.0f32; chunk_size];
451 for (i, &complex_val) in spectrum[chunk_start..chunk_end].iter().enumerate() {
452 real_parts[i] = complex_val.re as f32;
453 }
454
455 for (i, &real_val) in real_parts.iter().enumerate() {
457 result[chunk_start + i] = real_val as f64;
458 }
459 } else {
460 for i in chunk_start..chunk_end {
462 result[i] = spectrum[i].re;
463 }
464 }
465 }
466
467 Ok(result)
468}
469
470#[allow(dead_code)]
472pub fn ifht_bandwidth_saturated_simd(
473 a: &[f64],
474 dln: f64,
475 mu: f64,
476 offset: Option<f64>,
477 bias: Option<f64>,
478) -> FFTResult<Vec<f64>> {
479 let bias_inv = -bias.unwrap_or(0.0);
481 fht_bandwidth_saturated_simd(a, dln, mu, offset, Some(bias_inv))
482}
483
484#[allow(dead_code)]
488pub fn fht_sample_points_bandwidth_saturated_simd(n: usize, dln: f64, offset: f64) -> Vec<f64> {
489 use scirs2_core::simd_ops::{PlatformCapabilities, SimdUnifiedOps};
490
491 let caps = PlatformCapabilities::detect();
492
493 if n >= 32 && (caps.has_avx2() || caps.has_avx512()) {
495 fht_sample_points_simd_impl(n, dln, offset)
496 } else {
497 fht_sample_points(n, dln, offset)
499 }
500}
501
502#[allow(dead_code)]
504fn fht_sample_points_simd_impl(n: usize, dln: f64, offset: f64) -> Vec<f64> {
505 use scirs2_core::simd_ops::SimdUnifiedOps;
506
507 let mut points = vec![0.0; n];
508 let chunk_size = 8;
509
510 let dln_f32 = dln as f32;
512 let offset_f32 = offset as f32;
513 let n_f32 = n as f32;
514
515 for chunk_start in (0..n).step_by(chunk_size) {
516 let chunk_end = (chunk_start + chunk_size).min(n);
517 let chunk_len = chunk_end - chunk_start;
518
519 if chunk_len == chunk_size {
520 let mut indices = vec![0.0f32; chunk_size];
522 for (i, idx) in indices.iter_mut().enumerate() {
523 *idx = (chunk_start + i) as f32;
524 }
525
526 let mut arguments = vec![0.0f32; chunk_size];
528 let n_half = vec![n_f32 / 2.0; chunk_size];
529 let dln_vec = vec![dln_f32; chunk_size];
530 let offset_vec = vec![offset_f32; chunk_size];
531
532 let mut i_minus_n_half = vec![0.0f32; chunk_size];
534 simd_sub_f32_ultra_vec(&indices, &n_half, &mut i_minus_n_half);
535
536 let mut temp = vec![0.0f32; chunk_size];
538 simd_mul_f32_ultra_vec(&i_minus_n_half, &dln_vec, &mut temp);
539
540 simd_add_f32_ultra_vec(&temp, &offset_vec, &mut arguments);
542
543 let mut exp_values = vec![0.0f32; chunk_size];
545 simd_exp_f32_ultra_vec(&arguments, &mut exp_values);
546
547 for (i, &exp_val) in exp_values.iter().enumerate() {
549 points[chunk_start + i] = exp_val as f64;
550 }
551 } else {
552 for i in chunk_start..chunk_end {
554 let arg = (i as f64 - n as f64 / 2.0) * dln + offset;
555 points[i] = arg.exp();
556 }
557 }
558 }
559
560 points
561}
562
563#[allow(dead_code)]
568pub fn fft_log_bandwidth_saturated_simd(
569 input: &[f64],
570 dln: f64,
571 mu: f64,
572 offset: Option<f64>,
573 bias: Option<f64>,
574 k_opt: Option<f64>,
575) -> FFTResult<(Vec<f64>, Vec<f64>)> {
576 use scirs2_core::simd_ops::PlatformCapabilities;
577
578 let n = input.len();
579 let caps = PlatformCapabilities::detect();
580
581 if n >= 128 && (caps.has_avx2() || caps.has_avx512()) {
583 let offset = offset.unwrap_or(0.0);
584 let k_opt = k_opt.unwrap_or(1.0);
585
586 let output = fht_bandwidth_saturated_simd(input, dln, mu, Some(offset), bias)?;
588
589 let k_points =
591 fht_sample_points_bandwidth_saturated_simd(n, 2.0 * PI / (n as f64 * dln), -offset);
592
593 Ok((output, k_points))
594 } else {
595 let offset = offset.unwrap_or(0.0);
597 let output = fht(input, dln, mu, Some(offset), bias)?;
598 let k_points = fht_sample_points(n, 2.0 * PI / (n as f64 * dln), -offset);
599
600 Ok((output, k_points))
601 }
602}
603
604#[cfg(test)]
605mod tests {
606 use super::*;
607 use approx::assert_relative_eq;
608
609 #[test]
610 fn test_fht_basic() {
611 let n = 64;
612 let dln = 0.1;
613 let mu = 0.0;
614
615 let x: Vec<f64> = (0..n)
617 .map(|i| ((i as f64 - n as f64 / 2.0) * dln).exp())
618 .collect();
619
620 let y = fht(&x, dln, mu, None, None).unwrap();
622 assert_eq!(y.len(), n);
623
624 let x_recovered = ifht(&y, dln, mu, None, None).unwrap();
626 assert_eq!(x_recovered.len(), n);
627 }
628
629 #[test]
630 fn test_fhtoffset() {
631 let dln = 0.1;
632 let mu = 0.5;
633
634 let offset1 = fhtoffset(dln, mu, None, Some(0.0)).unwrap();
636 assert_relative_eq!(offset1, 0.0, epsilon = 1e-10);
637
638 let offset2 = fhtoffset(dln, mu, Some(0.5), Some(1.0)).unwrap();
640 assert_relative_eq!(offset2, 0.5, epsilon = 1e-10);
641 }
642
643 #[test]
644 fn test_sample_points() {
645 let n = 8;
646 let dln = 0.5;
647 let offset = 1.0;
648
649 let points = fht_sample_points(n, dln, offset);
650 assert_eq!(points.len(), n);
651
652 for i in 1..n {
654 let ratio = points[i] / points[i - 1];
655 assert_relative_eq!(ratio.ln(), dln, epsilon = 1e-10);
656 }
657 }
658}