scirs2_fft/gpu_fft/types.rs
1//! Types for the GPU-accelerated FFT pipeline.
2//!
3//! Provides configuration, error, and result types used throughout
4//! the `gpu_fft` module. The implementation is a pure-Rust simulation
5//! of a tile-based GPU FFT pipeline; no actual GPU calls are made.
6
7use scirs2_core::numeric::Complex64;
8
9// ─────────────────────────────────────────────────────────────────────────────
10// Normalization mode
11// ─────────────────────────────────────────────────────────────────────────────
12
13/// How to normalise after an FFT.
14///
15/// * `None` – raw DFT output (no scaling)
16/// * `Forward` – multiply by `1/N` after the forward transform
17/// * `Backward` – multiply by `1/N` after the inverse transform (SciPy default)
18/// * `Ortho` – multiply by `1/√N` in both directions (unitary)
19#[non_exhaustive]
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
21pub enum NormalizationMode {
22 /// No normalisation.
23 #[default]
24 None,
25 /// Scale by `1/N` (applied to the *forward* transform).
26 Forward,
27 /// Scale by `1/N` (applied to the *inverse* transform; SciPy default).
28 Backward,
29 /// Scale by `1/√N` so the DFT matrix is unitary.
30 Ortho,
31}
32
33// ─────────────────────────────────────────────────────────────────────────────
34// Transform direction
35// ─────────────────────────────────────────────────────────────────────────────
36
37/// Direction of the FFT.
38#[non_exhaustive]
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub enum FftDirection {
41 /// Forward DFT (negative-exponent convention).
42 Forward,
43 /// Inverse DFT (positive-exponent convention).
44 Inverse,
45}
46
47// ─────────────────────────────────────────────────────────────────────────────
48// Configuration
49// ─────────────────────────────────────────────────────────────────────────────
50
51/// Configuration for the GPU FFT pipeline.
52///
53/// All fields have sensible defaults via [`Default`].
54#[derive(Debug, Clone)]
55pub struct GpuFftConfig {
56 /// Number of points processed per simulated GPU tile (default: 256).
57 pub tile_size: usize,
58 /// Number of independent transforms processed simultaneously (default: 8).
59 pub batch_size: usize,
60 /// Simulate shared-memory tiling optimisation (default: true).
61 pub use_shared_memory: bool,
62 /// Normalisation applied after each transform (default: `None`).
63 pub normalization: NormalizationMode,
64}
65
66impl Default for GpuFftConfig {
67 fn default() -> Self {
68 Self {
69 tile_size: 256,
70 batch_size: 8,
71 use_shared_memory: true,
72 normalization: NormalizationMode::None,
73 }
74 }
75}
76
77// ─────────────────────────────────────────────────────────────────────────────
78// Error type
79// ─────────────────────────────────────────────────────────────────────────────
80
81/// Errors that can arise in the GPU FFT pipeline.
82#[non_exhaustive]
83#[derive(Debug, thiserror::Error)]
84pub enum GpuFftError {
85 /// FFT size is zero or below the minimum supported size.
86 #[error("FFT size {0} is too small (minimum is 2)")]
87 SizeTooSmall(usize),
88 /// Cooley-Tukey path requires a power-of-two input but a non-power-of-two
89 /// was given *without* Bluestein fallback.
90 #[error("FFT size {0} is not a power of two; use bluestein_gpu for arbitrary sizes")]
91 NonPowerOfTwo(usize),
92 /// A batch of zero elements was submitted.
93 #[error("Batch is empty; provide at least one signal")]
94 BatchEmpty,
95 /// A memory allocation would have failed.
96 #[error("GPU buffer allocation failed for {0} bytes")]
97 AllocationFailed(usize),
98 /// An internal kernel launch encountered an error.
99 #[error("Kernel launch failed: {0}")]
100 KernelLaunchFailed(String),
101 /// The output length requested for C2R is inconsistent.
102 #[error("C2R output length {requested} is inconsistent with input length {input_len}")]
103 InvalidOutputLength {
104 /// Requested output length.
105 requested: usize,
106 /// Input (complex) length that constrains valid choices.
107 input_len: usize,
108 },
109}
110
111/// Convenience `Result` alias for the GPU FFT pipeline.
112pub type GpuFftResult<T> = Result<T, GpuFftError>;
113
114// ─────────────────────────────────────────────────────────────────────────────
115// Plan
116// ─────────────────────────────────────────────────────────────────────────────
117
118/// A compiled FFT plan that caches twiddle factors.
119///
120/// Create plans through [`crate::gpu_fft::pipeline::GpuFftPipeline::plan`]
121/// rather than constructing this directly.
122#[derive(Debug, Clone)]
123pub struct GpuFftPlan {
124 /// Transform size (number of complex points).
125 pub size: usize,
126 /// Direction encoded in the plan.
127 pub direction: FftDirection,
128 /// Configuration snapshot used when the plan was compiled.
129 pub config: GpuFftConfig,
130 /// Precomputed twiddle factors `W_N^k = exp(-2πi·k/N)` for `k = 0..N/2`.
131 pub twiddle_cache: Vec<Complex64>,
132}
133
134// ─────────────────────────────────────────────────────────────────────────────
135// Batch result
136// ─────────────────────────────────────────────────────────────────────────────
137
138/// Result of a batched GPU FFT execution.
139#[derive(Debug, Clone)]
140pub struct BatchFftResult {
141 /// Per-signal output spectra, each of length equal to the input.
142 pub outputs: Vec<Vec<Complex64>>,
143 /// Wall-clock duration of the (simulated) kernel in nanoseconds.
144 pub elapsed_ns: u64,
145}