Skip to main content

scirs2_fft/gpu_fft/
types.rs

1//! Types for the GPU-accelerated FFT pipeline.
2//!
3//! Provides configuration, error, and result types used throughout
4//! the `gpu_fft` module.  The implementation is a pure-Rust simulation
5//! of a tile-based GPU FFT pipeline; no actual GPU calls are made.
6
7use scirs2_core::numeric::Complex64;
8
9// ─────────────────────────────────────────────────────────────────────────────
10// Normalization mode
11// ─────────────────────────────────────────────────────────────────────────────
12
13/// How to normalise after an FFT.
14///
15/// * `None`     – raw DFT output (no scaling)
16/// * `Forward`  – multiply by `1/N` after the forward transform
17/// * `Backward` – multiply by `1/N` after the inverse transform (SciPy default)
18/// * `Ortho`    – multiply by `1/√N` in both directions (unitary)
19#[non_exhaustive]
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
21pub enum NormalizationMode {
22    /// No normalisation.
23    #[default]
24    None,
25    /// Scale by `1/N` (applied to the *forward* transform).
26    Forward,
27    /// Scale by `1/N` (applied to the *inverse* transform; SciPy default).
28    Backward,
29    /// Scale by `1/√N` so the DFT matrix is unitary.
30    Ortho,
31}
32
33// ─────────────────────────────────────────────────────────────────────────────
34// Transform direction
35// ─────────────────────────────────────────────────────────────────────────────
36
37/// Direction of the FFT.
38#[non_exhaustive]
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub enum FftDirection {
41    /// Forward DFT (negative-exponent convention).
42    Forward,
43    /// Inverse DFT (positive-exponent convention).
44    Inverse,
45}
46
47// ─────────────────────────────────────────────────────────────────────────────
48// Configuration
49// ─────────────────────────────────────────────────────────────────────────────
50
51/// Configuration for the GPU FFT pipeline.
52///
53/// All fields have sensible defaults via [`Default`].
54#[derive(Debug, Clone)]
55pub struct GpuFftConfig {
56    /// Number of points processed per simulated GPU tile (default: 256).
57    pub tile_size: usize,
58    /// Number of independent transforms processed simultaneously (default: 8).
59    pub batch_size: usize,
60    /// Simulate shared-memory tiling optimisation (default: true).
61    pub use_shared_memory: bool,
62    /// Normalisation applied after each transform (default: `None`).
63    pub normalization: NormalizationMode,
64}
65
66impl Default for GpuFftConfig {
67    fn default() -> Self {
68        Self {
69            tile_size: 256,
70            batch_size: 8,
71            use_shared_memory: true,
72            normalization: NormalizationMode::None,
73        }
74    }
75}
76
77// ─────────────────────────────────────────────────────────────────────────────
78// Error type
79// ─────────────────────────────────────────────────────────────────────────────
80
81/// Errors that can arise in the GPU FFT pipeline.
82#[non_exhaustive]
83#[derive(Debug, thiserror::Error)]
84pub enum GpuFftError {
85    /// FFT size is zero or below the minimum supported size.
86    #[error("FFT size {0} is too small (minimum is 2)")]
87    SizeTooSmall(usize),
88    /// Cooley-Tukey path requires a power-of-two input but a non-power-of-two
89    /// was given *without* Bluestein fallback.
90    #[error("FFT size {0} is not a power of two; use bluestein_gpu for arbitrary sizes")]
91    NonPowerOfTwo(usize),
92    /// A batch of zero elements was submitted.
93    #[error("Batch is empty; provide at least one signal")]
94    BatchEmpty,
95    /// A memory allocation would have failed.
96    #[error("GPU buffer allocation failed for {0} bytes")]
97    AllocationFailed(usize),
98    /// An internal kernel launch encountered an error.
99    #[error("Kernel launch failed: {0}")]
100    KernelLaunchFailed(String),
101    /// The output length requested for C2R is inconsistent.
102    #[error("C2R output length {requested} is inconsistent with input length {input_len}")]
103    InvalidOutputLength {
104        /// Requested output length.
105        requested: usize,
106        /// Input (complex) length that constrains valid choices.
107        input_len: usize,
108    },
109}
110
111/// Convenience `Result` alias for the GPU FFT pipeline.
112pub type GpuFftResult<T> = Result<T, GpuFftError>;
113
114// ─────────────────────────────────────────────────────────────────────────────
115// Plan
116// ─────────────────────────────────────────────────────────────────────────────
117
118/// A compiled FFT plan that caches twiddle factors.
119///
120/// Create plans through [`crate::gpu_fft::pipeline::GpuFftPipeline::plan`]
121/// rather than constructing this directly.
122#[derive(Debug, Clone)]
123pub struct GpuFftPlan {
124    /// Transform size (number of complex points).
125    pub size: usize,
126    /// Direction encoded in the plan.
127    pub direction: FftDirection,
128    /// Configuration snapshot used when the plan was compiled.
129    pub config: GpuFftConfig,
130    /// Precomputed twiddle factors `W_N^k = exp(-2πi·k/N)` for `k = 0..N/2`.
131    pub twiddle_cache: Vec<Complex64>,
132}
133
134// ─────────────────────────────────────────────────────────────────────────────
135// Batch result
136// ─────────────────────────────────────────────────────────────────────────────
137
138/// Result of a batched GPU FFT execution.
139#[derive(Debug, Clone)]
140pub struct BatchFftResult {
141    /// Per-signal output spectra, each of length equal to the input.
142    pub outputs: Vec<Vec<Complex64>>,
143    /// Wall-clock duration of the (simulated) kernel in nanoseconds.
144    pub elapsed_ns: u64,
145}