Skip to main content

spectrograms/
lib.rs

1#![warn(clippy::all)]
2#![warn(clippy::pedantic)]
3#![warn(clippy::nursery)]
4#![allow(unused_unsafe)]
5#![allow(clippy::cast_possible_truncation)]
6#![allow(clippy::cast_precision_loss)]
7#![allow(clippy::cast_sign_loss)] // False positives with NonZeroUsize conversions
8#![allow(clippy::cast_possible_wrap)] // False positives with NonZeroUsize conversions
9#![allow(clippy::module_name_repetitions)]
10#![allow(clippy::too_many_lines)]
11#![allow(clippy::collapsible_if)]
12#![allow(clippy::if_same_then_else)]
13#![allow(clippy::unnecessary_cast)]
14#![allow(clippy::tuple_array_conversions)] // False positives with ndarray indexing
15#![allow(clippy::identity_op)]
16#![allow(clippy::needless_borrows_for_generic_args)]
17#![allow(clippy::needless_pass_by_value)] // False positives with PyO3
18#![allow(clippy::trivially_copy_pass_by_ref)] // False positives with PyO3 (likes of __repr__ and any pymethod requires &self)
19#![allow(clippy::unsafe_derive_deserialize)]
20#![allow(clippy::multiple_unsafe_ops_per_block)]
21#![allow(clippy::doc_markdown)]
22#![warn(clippy::missing_errors_doc)]
23#![warn(clippy::iter_cloned_collect)]
24#![warn(clippy::panic_in_result_fn)]
25#![warn(clippy::undocumented_unsafe_blocks)]
26
27//! # Spectrograms - FFT-Based Computations
28//!
29//! High-performance FFT-based computations for audio and image processing.
30//!
31//! # Overview
32//!
33//! This library provides:
34//! - **1D FFTs**: For time-series and audio signals
35//! - **2D FFTs**: For images and spatial data
36//! - **Spectrograms**: Time-frequency representations (STFT, Mel, ERB, CQT)
37//! - **Image operations**: Convolution, filtering, edge detection
38//! - **Two backends**: `RealFFT` (pure Rust) or FFTW (fastest)
39//! - **Plan-based API**: Reusable plans for batch processing
40//!
41//! # Domain Organization
42//!
43//! The library is organized by application domain:
44//!
45//! - [`audio`] - Audio processing (spectrograms, MFCC, chroma, pitch analysis)
46//! - [`image`] - Image processing (convolution, filtering, frequency analysis)
47//! - [`mod@fft`] - Core FFT operations (1D and 2D transforms)
48//!
49//! All functionality is also exported at the crate root for convenience.
50//!
51//! # Audio Processing
52//!
53//! Compute various types of spectrograms:
54//! - Linear-frequency spectrograms
55//! - Mel-frequency spectrograms
56//! - ERB spectrograms
57//! - Logarithmic-frequency spectrograms
58//! - CQT (Constant-Q Transform)
59//!
60//! With multiple amplitude scales:
61//! - Power (`|X|²`)
62//! - Magnitude (`|X|`)
63//! - Decibels (`10·log₁₀(power)`)
64//!
65//! # Image Processing
66//!
67//! Frequency-domain operations for images:
68//! - 2D FFT and inverse FFT
69//! - Convolution via FFT (faster for large kernels)
70//! - Spatial filtering (low-pass, high-pass, band-pass)
71//! - Edge detection
72//! - Sharpening and blurring
73//!
74//! # Features
75//!
76//! - **Two FFT backends**: `RealFFT` (default, pure Rust) or FFTW (fastest performance)
77//! - **Plan-based computation**: Reuse FFT plans for efficient batch processing
78//! - **Comprehensive window functions**: Hanning, Hamming, Blackman, Kaiser, Gaussian, etc.
79//! - **Type-safe API**: Compile-time guarantees for spectrogram types
80//!
81//! # Quick Start
82//!
83//! ## Audio: Compute a Mel Spectrogram
84//!
85//! ```
86//! use spectrograms::*;
87//! use std::f64::consts::PI;
88//! use non_empty_slice::NonEmptyVec;
89//!
90//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
91//! // Generate a sine wave at 440 Hz
92//! let sample_rate = 16000.0;
93//! let samples_vec: Vec<f64> = (0..16000)
94//!     .map(|i| (2.0 * PI * 440.0 * i as f64 / sample_rate).sin())
95//!     .collect();
96//! let samples = NonEmptyVec::new(samples_vec).unwrap();
97//!
98//! // Set up parameters
99//! let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
100//! let params = SpectrogramParams::new(stft, sample_rate)?;
101//! let mel = MelParams::new(nzu!(80), 0.0, 8000.0)?;
102//!
103//! // Compute Mel spectrogram
104//! let spec = MelPowerSpectrogram::compute(samples.as_ref(), &params, &mel, None)?;
105//! println!("Computed {} bins x {} frames", spec.n_bins(), spec.n_frames());
106//! # Ok(())
107//! # }
108//! ```
109//!
110//! ## Image: Apply Gaussian Blur via FFT
111//!
112//! ```
113//! use spectrograms::image_ops::*;
114//! use spectrograms::nzu;
115//! use ndarray::Array2;
116//!
117//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
118//! // Create a 256x256 image
119//! let image = Array2::<f64>::from_shape_fn((256, 256), |(i, j)| {
120//!     ((i as f64 - 128.0).powi(2) + (j as f64 - 128.0).powi(2)).sqrt()
121//! });
122//!
123//! // Apply Gaussian blur
124//! let kernel = gaussian_kernel_2d(nzu!(9), 2.0)?;
125//! let blurred = convolve_fft(&image.view(), &kernel.view())?;
126//! # Ok(())
127//! # }
128//! ```
129//!
130//! ## General: 2D FFT
131//!
132//! ```
133//! use spectrograms::fft2d::*;
134//! use ndarray::Array2;
135//!
136//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
137//! let data = Array2::<f64>::zeros((128, 128));
138//! let spectrum = fft2d(&data.view())?;
139//! let power = power_spectrum_2d(&data.view())?;
140//! # Ok(())
141//! # }
142//! ```
143//!
144//! # Feature Flags
145//!
146//! The library requires exactly one FFT backend:
147//!
148//! - `realfft` (default): Pure-Rust FFT implementation, no system dependencies
149//! - `fftw`: Uses FFTW C library for fastest performance (requires system install)
150//!
151//! # Examples
152//!
153//! ## Mel Spectrogram
154//!
155//! ```
156//! use spectrograms::*;
157//! use non_empty_slice::non_empty_vec;
158//!
159//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
160//! let samples = non_empty_vec![0.0; nzu!(16000)];
161//!
162//! let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
163//! let params = SpectrogramParams::new(stft, 16000.0)?;
164//! let mel = MelParams::new(nzu!(80), 0.0, 8000.0)?;
165//! let db = LogParams::new(-80.0)?;
166//!
167//! let spec = MelDbSpectrogram::compute(samples.as_ref(), &params, &mel, Some(&db))?;
168//! # Ok(())
169//! # }
170//! ```
171//!
172//! ## MDCT (Modified Discrete Cosine Transform)
173//!
174//! ```
175//! use spectrograms::*;
176//! use non_empty_slice::NonEmptyVec;
177//!
178//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
179//! let samples: Vec<f64> = (0..4096).map(|i| (i as f64 * 0.01).sin()).collect();
180//! let samples = NonEmptyVec::new(samples).unwrap();
181//!
182//! // Sine window gives perfect reconstruction at 50% hop
183//! let params = MdctParams::sine_window(nzu!(512))?;
184//!
185//! let coefficients = mdct(samples.as_non_empty_slice(), &params)?;
186//! let reconstructed = imdct(&coefficients, &params, Some(samples.len().get()))?;
187//! # Ok(())
188//! # }
189//! ```
190//!
191//! ## Efficient Batch Processing
192//!
193//! ```
194//! use spectrograms::*;
195//! use non_empty_slice::non_empty_vec;
196//!
197//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
198//! let signals = vec![non_empty_vec![0.0; nzu!(16000)], non_empty_vec![0.0; nzu!(16000)]];
199//!
200//! let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
201//! let params = SpectrogramParams::new(stft, 16000.0)?;
202//!
203//! // Create plan once, reuse for all signals
204//! let planner = SpectrogramPlanner::new();
205//! let mut plan = planner.linear_plan::<Power>(&params, None)?;
206//!
207//! for signal in &signals {
208//!     let spec = plan.compute(&signal)?;
209//!     // Process spec...
210//! }
211//! # Ok(())
212//! # }
213//! ```
214
215pub mod binaural;
216mod chroma;
217mod cqt;
218mod erb;
219mod error;
220pub mod fft2d;
221mod fft_backend;
222pub mod image_ops;
223mod mdct;
224mod mfcc;
225mod spectrogram;
226mod window;
227
228#[cfg(feature = "python")]
229pub mod python;
230
231// ============================================================================
232// Domain-Specific Module Organization
233// ============================================================================
234
235/// Audio processing utilities (spectrograms, MFCC, chroma, etc.)
236///
237/// This module contains all audio-related functionality:
238/// - Spectrogram computation (Linear, Mel, ERB, CQT)
239/// - MFCC (Mel-Frequency Cepstral Coefficients)
240/// - Chromagram (pitch class profiles)
241/// - Window functions
242///
243/// # Examples
244///
245/// ```
246/// use spectrograms::{nzu, audio::*};
247/// use non_empty_slice::non_empty_vec;
248///
249/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
250/// let samples = non_empty_vec![0.0; nzu!(16000)];
251/// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
252/// let params = SpectrogramParams::new(stft, 16000.0)?;
253/// let spec = LinearPowerSpectrogram::compute(&samples, &params, None)?;
254/// # Ok(())
255/// # }
256/// ```
257pub mod audio {
258    pub use crate::chroma::*;
259    pub use crate::cqt::*;
260    pub use crate::erb::*;
261    pub use crate::mfcc::*;
262    pub use crate::spectrogram::*;
263    pub use crate::window::*;
264}
265
266/// Image processing utilities (convolution, filtering, etc.)
267///
268/// This module contains image processing operations using 2D FFTs:
269/// - Convolution and correlation
270/// - Spatial filtering (low-pass, high-pass, band-pass)
271/// - Edge detection
272/// - Sharpening and blurring
273///
274/// # Examples
275///
276/// ```
277/// use spectrograms::image::*;
278/// use spectrograms::nzu;
279/// use ndarray::Array2;
280///
281/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
282/// let image = Array2::<f64>::zeros((128, 128));
283/// let kernel = gaussian_kernel_2d(nzu!(5), 1.0)?;
284/// let blurred = convolve_fft(&image.view(), &kernel.view())?;
285/// # Ok(())
286/// # }
287/// ```
288pub mod image {
289    pub use crate::image_ops::*;
290}
291
292/// Core FFT operations (1D and 2D)
293///
294/// This module provides direct access to FFT functions:
295/// - 1D FFT: `fft()`, `rfft()`, `irfft()`
296/// - 2D FFT: `fft2d()`, `ifft2d()`
297/// - STFT: `stft()`, `istft()`
298/// - Power/magnitude spectra
299///
300/// # Examples
301///
302/// ```
303/// use spectrograms::{nzu, fft::*};
304/// use ndarray::Array2;
305/// use non_empty_slice::non_empty_vec;
306///
307/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
308/// // 1D FFT
309/// let signal = non_empty_vec![0.0; nzu!(1024)];
310/// let spectrum = rfft(&signal, nzu!(1024))?;
311///
312/// // 2D FFT
313/// let image = Array2::<f64>::zeros((128, 128));
314/// let spectrum_2d = fft2d(&image.view())?;
315/// # Ok(())
316/// # }
317/// ```
318pub mod fft {
319    pub use crate::fft2d::*;
320    pub use crate::spectrogram::{
321        fft, irfft, istft, magnitude_spectrum, power_spectrum, rfft, stft,
322    };
323}
324
325// Re-export everything at top level for backward compatibility
326pub use chroma::{
327    ChromaNorm, ChromaParams, Chromagram, N_CHROMA, chromagram, chromagram_from_spectrogram,
328};
329pub use cqt::{CqtParams, CqtResult, cqt};
330pub use erb::{ErbParams, GammatoneParams};
331pub use error::{SpectrogramError, SpectrogramResult};
332pub use fft_backend::{
333    C2cPlan, C2cPlanF32, C2rPlan, C2rPlanner, R2cPlan, R2cPlanF32, R2cPlanner, r2c_output_size,
334};
335pub use fft2d::*;
336pub use image_ops::*;
337pub use mdct::{MdctParams, imdct, imdct_f32, mdct, mdct_f32};
338pub use mfcc::{Mfcc, MfccParams, mfcc, mfcc_from_log_mel};
339pub use spectrogram::*;
340pub use window::{
341    WindowType, blackman_window, gaussian_window, hamming_window, hanning_window, kaiser_window,
342    rectangular_window,
343};
344#[macro_export]
345macro_rules! nzu {
346    ($rate:expr) => {{
347        const RATE: usize = $rate;
348        const { assert!(RATE > 0, "non zero usize must be greater than 0") };
349        // SAFETY: We just asserted RATE > 0 at compile time
350        unsafe { ::core::num::NonZeroUsize::new_unchecked(RATE) }
351    }};
352}
353
354#[cfg(all(feature = "fftw", feature = "realfft"))]
355compile_error!(
356    "Features 'fftw' and 'realfft' are mutually exclusive. Please enable only one of them."
357);
358
359#[cfg(not(any(feature = "fftw", feature = "realfft")))]
360compile_error!("At least one FFT backend feature must be enabled: 'fftw' or 'realfft'.");
361
362#[cfg(feature = "realfft")]
363pub use fft_backend::realfft_backend::*;
364
365#[cfg(feature = "fftw")]
366pub use fft_backend::fftw_backend::*;
367
368/// Python module definition for `PyO3`.
369///
370/// This module is only available when the `python` feature is enabled.
371#[cfg(feature = "python")]
372use pyo3::prelude::*;
373
374#[cfg(feature = "python")]
375#[pymodule]
376fn _spectrograms(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
377    python::register_module(py, m)?;
378    m.add("__version__", env!("CARGO_PKG_VERSION"))?;
379    Ok(())
380}
381
382pub(crate) fn min_max_single_pass<A: AsRef<[f64]>>(data: A) -> (f64, f64) {
383    let mut min_val = f64::INFINITY;
384    let mut max_val = f64::NEG_INFINITY;
385    for &val in data.as_ref() {
386        if val < min_val {
387            min_val = val;
388        }
389        if val > max_val {
390            max_val = val;
391        }
392    }
393    (min_val, max_val)
394}