use scirs2_core::numeric::Complex64;
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum NormalizationMode {
#[default]
None,
Forward,
Backward,
Ortho,
}
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FftDirection {
Forward,
Inverse,
}
#[derive(Debug, Clone)]
pub struct GpuFftConfig {
pub tile_size: usize,
pub batch_size: usize,
pub use_shared_memory: bool,
pub normalization: NormalizationMode,
}
impl Default for GpuFftConfig {
fn default() -> Self {
Self {
tile_size: 256,
batch_size: 8,
use_shared_memory: true,
normalization: NormalizationMode::None,
}
}
}
#[non_exhaustive]
#[derive(Debug, thiserror::Error)]
pub enum GpuFftError {
#[error("FFT size {0} is too small (minimum is 2)")]
SizeTooSmall(usize),
#[error("FFT size {0} is not a power of two; use bluestein_gpu for arbitrary sizes")]
NonPowerOfTwo(usize),
#[error("Batch is empty; provide at least one signal")]
BatchEmpty,
#[error("GPU buffer allocation failed for {0} bytes")]
AllocationFailed(usize),
#[error("Kernel launch failed: {0}")]
KernelLaunchFailed(String),
#[error("C2R output length {requested} is inconsistent with input length {input_len}")]
InvalidOutputLength {
requested: usize,
input_len: usize,
},
}
pub type GpuFftResult<T> = Result<T, GpuFftError>;
#[derive(Debug, Clone)]
pub struct GpuFftPlan {
pub size: usize,
pub direction: FftDirection,
pub config: GpuFftConfig,
pub twiddle_cache: Vec<Complex64>,
}
#[derive(Debug, Clone)]
pub struct BatchFftResult {
pub outputs: Vec<Vec<Complex64>>,
pub elapsed_ns: u64,
}