spectrograms/spectrogram.rs
1use std::marker::PhantomData;
2use std::num::NonZeroUsize;
3use std::ops::{Deref, DerefMut};
4
5use ndarray::{Array1, Array2};
6use non_empty_slice::{NonEmptySlice, NonEmptyVec, non_empty_vec};
7use num_complex::Complex;
8
9#[cfg(feature = "python")]
10use pyo3::prelude::*;
11
12use crate::cqt::CqtKernel;
13use crate::erb::ErbFilterbank;
14use crate::{
15 CqtParams, ErbParams, R2cPlan, SpectrogramError, SpectrogramResult, WindowType,
16 min_max_single_pass, nzu,
17};
18const EPS: f64 = 1e-12;
19
20//
21// ========================
22// Sparse Matrix for efficient filterbank multiplication
23// ========================
24//
25
26/// Row-wise sparse matrix optimized for matrix-vector multiplication.
27///
28/// This structure stores sparse data as a vector of vectors, where each row maintains
29/// its own list of non-zero values and corresponding column indices. This is more
30/// flexible than traditional CSR format and allows efficient row-by-row construction.
31///
32/// This structure is designed for matrices with very few non-zero values per row,
33/// such as mel filterbanks (triangular filters) and logarithmic frequency mappings
34/// (linear interpolation between 1-2 adjacent bins).
35///
36/// For typical spectrograms:
37/// - `LogHz` interpolation: Only 1-2 non-zeros per row (~99% sparse)
38/// - Mel filterbank: ~10-50 non-zeros per row depending on FFT size (~90-98% sparse)
39///
40/// By storing only non-zero values, we avoid wasting CPU cycles multiplying by zero,
41/// which can provide 10-100x speedup compared to dense matrix multiplication.
42#[derive(Debug, Clone)]
43struct SparseMatrix {
44 /// Number of rows
45 nrows: usize,
46 /// Number of columns
47 ncols: usize,
48 /// Non-zero values for each row (row-major order)
49 values: Vec<Vec<f64>>,
50 /// Column indices for each non-zero value
51 indices: Vec<Vec<usize>>,
52}
53
54impl SparseMatrix {
55 /// Create a new sparse matrix with the given dimensions.
56 fn new(nrows: usize, ncols: usize) -> Self {
57 Self {
58 nrows,
59 ncols,
60 values: vec![Vec::new(); nrows],
61 indices: vec![Vec::new(); nrows],
62 }
63 }
64
65 /// Set a value in the matrix. Only stores if value is non-zero.
66 ///
67 /// # Panics (debug mode only)
68 /// Panics in debug builds if row or col are out of bounds.
69 fn set(&mut self, row: usize, col: usize, value: f64) {
70 debug_assert!(
71 row < self.nrows && col < self.ncols,
72 "SparseMatrix index out of bounds: ({}, {}) for {}x{} matrix",
73 row,
74 col,
75 self.nrows,
76 self.ncols
77 );
78 if row >= self.nrows || col >= self.ncols {
79 return;
80 }
81
82 // Only store non-zero values (with small epsilon for numerical stability)
83 if value.abs() > 1e-10 {
84 self.values[row].push(value);
85 self.indices[row].push(col);
86 }
87 }
88
89 /// Get the number of rows.
90 const fn nrows(&self) -> usize {
91 self.nrows
92 }
93
94 /// Get the number of columns.
95 const fn ncols(&self) -> usize {
96 self.ncols
97 }
98
99 /// Perform sparse matrix-vector multiplication: out = self * input
100 /// This is much faster than dense multiplication when the matrix is sparse.
101 #[inline]
102 fn multiply_vec(&self, input: &[f64], out: &mut [f64]) {
103 debug_assert_eq!(input.len(), self.ncols);
104 debug_assert_eq!(out.len(), self.nrows);
105
106 for (row_idx, (row_values, row_indices)) in
107 self.values.iter().zip(&self.indices).enumerate()
108 {
109 let mut acc = 0.0;
110 for (&value, &col_idx) in row_values.iter().zip(row_indices) {
111 acc += value * input[col_idx];
112 }
113 out[row_idx] = acc;
114 }
115 }
116}
117
118// Linear frequency
119pub type LinearPowerSpectrogram = Spectrogram<LinearHz, Power>;
120pub type LinearMagnitudeSpectrogram = Spectrogram<LinearHz, Magnitude>;
121pub type LinearDbSpectrogram = Spectrogram<LinearHz, Decibels>;
122pub type LinearSpectrogram<AmpScale> = Spectrogram<LinearHz, AmpScale>;
123
124// Log-frequency (e.g. CQT-style)
125pub type LogHzPowerSpectrogram = Spectrogram<LogHz, Power>;
126pub type LogHzMagnitudeSpectrogram = Spectrogram<LogHz, Magnitude>;
127pub type LogHzDbSpectrogram = Spectrogram<LogHz, Decibels>;
128pub type LogHzSpectrogram<AmpScale> = Spectrogram<LogHz, AmpScale>;
129
130// ERB / gammatone
131pub type ErbPowerSpectrogram = Spectrogram<Erb, Power>;
132pub type ErbMagnitudeSpectrogram = Spectrogram<Erb, Magnitude>;
133pub type ErbDbSpectrogram = Spectrogram<Erb, Decibels>;
134pub type GammatonePowerSpectrogram = ErbPowerSpectrogram;
135pub type GammatoneMagnitudeSpectrogram = ErbMagnitudeSpectrogram;
136pub type GammatoneDbSpectrogram = ErbDbSpectrogram;
137pub type ErbSpectrogram<AmpScale> = Spectrogram<Erb, AmpScale>;
138pub type GammatoneSpectrogram<AmpScale> = ErbSpectrogram<AmpScale>;
139
140// Mel
141pub type MelMagnitudeSpectrogram = Spectrogram<Mel, Magnitude>;
142pub type MelPowerSpectrogram = Spectrogram<Mel, Power>;
143pub type MelDbSpectrogram = Spectrogram<Mel, Decibels>;
144pub type LogMelSpectrogram = MelDbSpectrogram;
145pub type MelSpectrogram<AmpScale> = Spectrogram<Mel, AmpScale>;
146
147// CQT
148pub type CqtPowerSpectrogram = Spectrogram<Cqt, Power>;
149pub type CqtMagnitudeSpectrogram = Spectrogram<Cqt, Magnitude>;
150pub type CqtDbSpectrogram = Spectrogram<Cqt, Decibels>;
151pub type CqtSpectrogram<AmpScale> = Spectrogram<Cqt, AmpScale>;
152
153use crate::fft_backend::r2c_output_size;
154
155/// A spectrogram plan is the compiled, reusable execution object.
156///
157/// It owns:
158/// - FFT plan (reusable)
159/// - window samples
160/// - mapping (identity / mel filterbank / etc.)
161/// - amplitude scaling config
162/// - workspace buffers to avoid allocations in hot loops
163///
164/// It computes one specific spectrogram type: `Spectrogram<FreqScale, AmpScale>`.
165///
166/// # Type Parameters
167///
168/// - `FreqScale`: Frequency scale type (e.g. `LinearHz`, `LogHz`, `Mel`, etc.)
169/// - `AmpScale`: Amplitude scaling type (e.g. `Power`, `Magnitude`, `Decibels`, etc.)
170pub struct SpectrogramPlan<FreqScale, AmpScale>
171where
172 AmpScale: AmpScaleSpec + 'static,
173 FreqScale: Copy + Clone + 'static,
174{
175 params: SpectrogramParams,
176
177 stft: StftPlan,
178 mapping: FrequencyMapping<FreqScale>,
179 scaling: AmplitudeScaling<AmpScale>,
180
181 freq_axis: FrequencyAxis<FreqScale>,
182 workspace: Workspace,
183
184 _amp: PhantomData<AmpScale>,
185}
186
187impl<FreqScale, AmpScale> SpectrogramPlan<FreqScale, AmpScale>
188where
189 AmpScale: AmpScaleSpec + 'static,
190 FreqScale: Copy + Clone + 'static,
191{
192 /// Get the spectrogram parameters used to create this plan.
193 ///
194 /// # Returns
195 ///
196 /// A reference to the `SpectrogramParams` used in this plan.
197 #[inline]
198 #[must_use]
199 pub const fn params(&self) -> &SpectrogramParams {
200 &self.params
201 }
202
203 /// Get the frequency axis for this spectrogram plan.
204 ///
205 /// # Returns
206 ///
207 /// A reference to the `FrequencyAxis<FreqScale>` used in this plan.
208 #[inline]
209 #[must_use]
210 pub const fn freq_axis(&self) -> &FrequencyAxis<FreqScale> {
211 &self.freq_axis
212 }
213
214 /// Compute a spectrogram for a mono signal.
215 ///
216 /// This function performs:
217 /// - framing + windowing
218 /// - FFT per frame
219 /// - magnitude/power
220 /// - frequency mapping (identity/mel/etc.)
221 /// - amplitude scaling (linear or dB)
222 ///
223 /// It allocates the output `Array2` once, but does not allocate per-frame.
224 ///
225 /// # Arguments
226 ///
227 /// * `samples` - Audio samples
228 ///
229 /// # Returns
230 ///
231 /// A `Spectrogram<FreqScale, AmpScale>` containing the computed spectrogram.
232 ///
233 /// # Errors
234 ///
235 /// Returns an error if STFT computation or mapping fails.
236 #[inline]
237 pub fn compute(
238 &mut self,
239 samples: &NonEmptySlice<f64>,
240 ) -> SpectrogramResult<Spectrogram<FreqScale, AmpScale>> {
241 let n_frames = self.stft.frame_count(samples.len())?;
242 let n_bins = self.mapping.output_bins();
243
244 // Create output matrix: (n_bins, n_frames)
245 let mut data = Array2::<f64>::zeros((n_bins.get(), n_frames.get()));
246
247 // Ensure workspace is correctly sized
248 self.workspace
249 .ensure_sizes(self.stft.n_fft, self.stft.out_len, n_bins);
250
251 // Main loop: fill each frame (column)
252 // TODO: Parallelize with rayon once thread-safety issues are resolved
253 for frame_idx in 0..n_frames.get() {
254 // CQT needs unwindowed frames because its kernels already contain windowing.
255 // Other mappings use the FFT spectrum, so they need the windowed frame.
256 if self.mapping.kind.needs_unwindowed_frame() {
257 // Fill frame without windowing (for CQT)
258 self.stft
259 .fill_frame_unwindowed(samples, frame_idx, &mut self.workspace)?;
260 } else {
261 // Compute windowed frame spectrum (for FFT-based mappings)
262 self.stft
263 .compute_frame_spectrum(samples, frame_idx, &mut self.workspace)?;
264 }
265
266 // mapping: spectrum(out_len) -> mapped(n_bins)
267 // For CQT, this uses workspace.frame; for others, workspace.spectrum
268 // For ERB, we need the complex FFT output (fft_out)
269 // We need to borrow workspace fields separately to avoid borrow conflicts
270 let Workspace {
271 spectrum,
272 mapped,
273 frame,
274 ..
275 } = &mut self.workspace;
276
277 self.mapping.apply(spectrum, frame, mapped)?;
278
279 // amplitude scaling in-place on mapped vector
280 self.scaling.apply_in_place(mapped)?;
281
282 // write column into output
283 for (row, &val) in mapped.iter().enumerate() {
284 data[[row, frame_idx]] = val;
285 }
286 }
287
288 let times = build_time_axis_seconds(&self.params, n_frames);
289 let axes = Axes::new(self.freq_axis.clone(), times);
290
291 Ok(Spectrogram::new(data, axes, self.params.clone()))
292 }
293
294 /// Compute a single frame of the spectrogram.
295 ///
296 /// This is useful for streaming/online processing where you want to
297 /// process audio frame-by-frame without computing the entire spectrogram.
298 ///
299 /// # Arguments
300 ///
301 /// * `samples` - Audio samples (must contain at least enough samples for the requested frame)
302 /// * `frame_idx` - Frame index to compute
303 ///
304 /// # Returns
305 ///
306 /// A vector of frequency bin values for the requested frame.
307 ///
308 /// # Errors
309 ///
310 /// Returns an error if the frame index is out of bounds or if STFT computation fails.
311 ///
312 /// # Examples
313 ///
314 /// ```
315 /// use spectrograms::*;
316 /// use non_empty_slice::non_empty_vec;
317 ///
318 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
319 /// let samples = non_empty_vec![0.0; nzu!(16000)];
320 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
321 /// let params = SpectrogramParams::new(stft, 16000.0)?;
322 ///
323 /// let planner = SpectrogramPlanner::new();
324 /// let mut plan = planner.linear_plan::<Power>(¶ms, None)?;
325 ///
326 /// // Compute just the first frame
327 /// let frame = plan.compute_frame(&samples, 0)?;
328 /// assert_eq!(frame.len(), nzu!(257)); // n_fft/2 + 1
329 /// # Ok(())
330 /// # }
331 /// ```
332 #[inline]
333 pub fn compute_frame(
334 &mut self,
335 samples: &NonEmptySlice<f64>,
336 frame_idx: usize,
337 ) -> SpectrogramResult<NonEmptyVec<f64>> {
338 let n_bins = self.mapping.output_bins();
339
340 // Ensure workspace is correctly sized
341 self.workspace
342 .ensure_sizes(self.stft.n_fft, self.stft.out_len, n_bins);
343
344 // CQT needs unwindowed frames because its kernels already contain windowing.
345 // Other mappings use the FFT spectrum, so they need the windowed frame.
346 if self.mapping.kind.needs_unwindowed_frame() {
347 // Fill frame without windowing (for CQT)
348 self.stft
349 .fill_frame_unwindowed(samples, frame_idx, &mut self.workspace)?;
350 } else {
351 // Compute windowed frame spectrum (for FFT-based mappings)
352 self.stft
353 .compute_frame_spectrum(samples, frame_idx, &mut self.workspace)?;
354 }
355
356 // Apply mapping (using split borrows to avoid borrow conflicts)
357 let Workspace {
358 spectrum,
359 mapped,
360 frame,
361 ..
362 } = &mut self.workspace;
363
364 self.mapping.apply(spectrum, frame, mapped)?;
365
366 // Apply amplitude scaling
367 self.scaling.apply_in_place(mapped)?;
368
369 Ok(mapped.clone())
370 }
371
372 /// Compute spectrogram into a pre-allocated buffer.
373 ///
374 /// This avoids allocating the output matrix, which is useful when
375 /// you want to reuse buffers or have strict memory requirements.
376 ///
377 /// # Arguments
378 ///
379 /// * `samples` - Audio samples
380 /// * `output` - Pre-allocated output matrix (must be correct size: `n_bins` x `n_frames`)
381 ///
382 /// # Returns
383 ///
384 /// An empty result on success.
385 ///
386 /// # Errors
387 ///
388 /// Returns an error if the output buffer dimensions don't match the expected size.
389 ///
390 /// # Examples
391 ///
392 /// ```
393 /// use spectrograms::*;
394 /// use ndarray::Array2;
395 /// use non_empty_slice::non_empty_vec;
396 ///
397 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
398 /// let samples = non_empty_vec![0.0; nzu!(16000)];
399 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
400 /// let params = SpectrogramParams::new(stft, 16000.0)?;
401 ///
402 /// let planner = SpectrogramPlanner::new();
403 /// let mut plan = planner.linear_plan::<Power>(¶ms, None)?;
404 ///
405 /// // Pre-allocate output buffer
406 /// let mut output = Array2::<f64>::zeros((257, 63));
407 /// plan.compute_into(&samples, &mut output)?;
408 /// # Ok(())
409 /// # }
410 /// ```
411 #[inline]
412 pub fn compute_into(
413 &mut self,
414 samples: &NonEmptySlice<f64>,
415 output: &mut Array2<f64>,
416 ) -> SpectrogramResult<()> {
417 let n_frames = self.stft.frame_count(samples.len())?;
418 let n_bins = self.mapping.output_bins();
419
420 // Validate output dimensions
421 if output.nrows() != n_bins.get() {
422 return Err(SpectrogramError::dimension_mismatch(
423 n_bins.get(),
424 output.nrows(),
425 ));
426 }
427 if output.ncols() != n_frames.get() {
428 return Err(SpectrogramError::dimension_mismatch(
429 n_frames.get(),
430 output.ncols(),
431 ));
432 }
433
434 // Ensure workspace is correctly sized
435 self.workspace
436 .ensure_sizes(self.stft.n_fft, self.stft.out_len, n_bins);
437
438 // Main loop: fill each frame (column)
439 for frame_idx in 0..n_frames.get() {
440 // CQT needs unwindowed frames because its kernels already contain windowing.
441 // Other mappings use the FFT spectrum, so they need the windowed frame.
442 if self.mapping.kind.needs_unwindowed_frame() {
443 // Fill frame without windowing (for CQT)
444 self.stft
445 .fill_frame_unwindowed(samples, frame_idx, &mut self.workspace)?;
446 } else {
447 // Compute windowed frame spectrum (for FFT-based mappings)
448 self.stft
449 .compute_frame_spectrum(samples, frame_idx, &mut self.workspace)?;
450 }
451
452 // mapping: spectrum(out_len) -> mapped(n_bins)
453 // For CQT, this uses workspace.frame; for others, workspace.spectrum
454 // For ERB, we need the complex FFT output (fft_out)
455 // We need to borrow workspace fields separately to avoid borrow conflicts
456 let Workspace {
457 spectrum,
458 mapped,
459 frame,
460 ..
461 } = &mut self.workspace;
462
463 self.mapping.apply(spectrum, frame, mapped)?;
464
465 // amplitude scaling in-place on mapped vector
466 self.scaling.apply_in_place(mapped)?;
467
468 // write column into output
469 for (row, &val) in mapped.iter().enumerate() {
470 output[[row, frame_idx]] = val;
471 }
472 }
473
474 Ok(())
475 }
476
477 /// Get the expected output dimensions for a given signal length.
478 ///
479 /// # Arguments
480 ///
481 /// * `signal_length` - Length of the input signal in samples.
482 ///
483 /// # Returns
484 ///
485 /// A tuple `(n_bins, n_frames)` representing the output spectrogram shape.
486 ///
487 /// # Errors
488 ///
489 /// Returns an error if the signal length is too short to produce any frames.
490 ///
491 /// # Examples
492 ///
493 /// ```
494 /// use spectrograms::*;
495 ///
496 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
497 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
498 /// let params = SpectrogramParams::new(stft, 16000.0)?;
499 ///
500 /// let planner = SpectrogramPlanner::new();
501 /// let plan = planner.linear_plan::<Power>(¶ms, None)?;
502 ///
503 /// let (n_bins, n_frames) = plan.output_shape(nzu!(16000))?;
504 /// assert_eq!(n_bins, nzu!(257));
505 /// assert_eq!(n_frames, nzu!(63));
506 /// # Ok(())
507 /// # }
508 /// ```
509 #[inline]
510 pub fn output_shape(
511 &self,
512 signal_length: NonZeroUsize,
513 ) -> SpectrogramResult<(NonZeroUsize, NonZeroUsize)> {
514 let n_frames = self.stft.frame_count(signal_length)?;
515 let n_bins = self.mapping.output_bins();
516 Ok((n_bins, n_frames))
517 }
518}
519
520/// STFT (Short-Time Fourier Transform) result containing complex frequency bins.
521///
522/// This is the raw STFT output before any frequency mapping or amplitude scaling.
523///
524/// # Fields
525///
526/// - `data`: Complex STFT matrix with shape (`frequency_bins`, `time_frames`)
527/// - `frequencies`: Frequency axis in Hz
528/// - `sample_rate`: Sample rate in Hz
529/// - `params`: STFT computation parameters
530#[derive(Debug, Clone)]
531#[non_exhaustive]
532pub struct StftResult {
533 /// Complex STFT matrix with shape (`frequency_bins`, `time_frames`)
534 pub data: Array2<Complex<f64>>,
535 /// Frequency axis in Hz
536 pub frequencies: NonEmptyVec<f64>,
537 /// Sample rate in Hz
538 pub sample_rate: f64,
539 pub params: StftParams,
540}
541
542impl StftResult {
543 /// Get the number of frequency bins.
544 ///
545 /// # Returns
546 ///
547 /// Number of frequency bins in the STFT result.
548 #[inline]
549 #[must_use]
550 pub fn n_bins(&self) -> NonZeroUsize {
551 // safety: nrows() > 0 for NonEmptyVec
552 unsafe { NonZeroUsize::new_unchecked(self.data.nrows()) }
553 }
554
555 /// Get the number of time frames.
556 ///
557 /// # Returns
558 ///
559 /// Number of time frames in the STFT result.
560 #[inline]
561 #[must_use]
562 pub fn n_frames(&self) -> NonZeroUsize {
563 // safety: ncols() > 0 for NonEmptyVec
564 unsafe { NonZeroUsize::new_unchecked(self.data.ncols()) }
565 }
566
567 /// Get the frequency resolution in Hz
568 ///
569 /// # Returns
570 ///
571 /// Frequency bin width in Hz.
572 #[inline]
573 #[must_use]
574 pub fn frequency_resolution(&self) -> f64 {
575 self.sample_rate / self.params.n_fft().get() as f64
576 }
577
578 /// Get the time resolution in seconds.
579 ///
580 /// # Returns
581 ///
582 /// Time between successive frames in seconds.
583 #[inline]
584 #[must_use]
585 pub fn time_resolution(&self) -> f64 {
586 self.params.hop_size().get() as f64 / self.sample_rate
587 }
588
589 /// Normalizes self.data to remove the complex aspect of it.
590 ///
591 /// # Returns
592 ///
593 /// An Array2<f64> containing the norms of each complex number in self.data.
594 #[inline]
595 pub fn norm(&self) -> Array2<f64> {
596 self.as_ref().mapv(Complex::norm)
597 }
598}
599
600impl AsRef<Array2<Complex<f64>>> for StftResult {
601 #[inline]
602 fn as_ref(&self) -> &Array2<Complex<f64>> {
603 &self.data
604 }
605}
606
607impl AsMut<Array2<Complex<f64>>> for StftResult {
608 #[inline]
609 fn as_mut(&mut self) -> &mut Array2<Complex<f64>> {
610 &mut self.data
611 }
612}
613
614impl Deref for StftResult {
615 type Target = Array2<Complex<f64>>;
616
617 #[inline]
618 fn deref(&self) -> &Self::Target {
619 &self.data
620 }
621}
622
623impl DerefMut for StftResult {
624 #[inline]
625 fn deref_mut(&mut self) -> &mut Self::Target {
626 &mut self.data
627 }
628}
629
630/// A planner is an object that can build spectrogram plans.
631///
632/// In your design, this is where:
633/// - FFT plans are created
634/// - mapping matrices are compiled
635/// - axes are computed
636///
637/// This allows you to keep plan building separate from the output types.
638#[derive(Debug, Default)]
639#[non_exhaustive]
640pub struct SpectrogramPlanner;
641
642impl SpectrogramPlanner {
643 /// Create a new spectrogram planner.
644 ///
645 /// # Returns
646 ///
647 /// A new `SpectrogramPlanner` instance.
648 #[inline]
649 #[must_use]
650 pub const fn new() -> Self {
651 Self
652 }
653
654 /// Compute the Short-Time Fourier Transform (STFT) of a signal.
655 ///
656 /// This returns the raw complex STFT matrix before any frequency mapping
657 /// or amplitude scaling. Useful for applications that need the full complex
658 /// spectrum or custom processing.
659 ///
660 /// # Arguments
661 ///
662 /// * `samples` - Audio samples (any type that can be converted to a slice)
663 /// * `params` - STFT computation parameters
664 ///
665 /// # Returns
666 ///
667 /// An `StftResult` containing the complex STFT matrix and metadata.
668 ///
669 /// # Errors
670 ///
671 /// Returns an error if STFT computation fails.
672 ///
673 /// # Examples
674 ///
675 /// ```
676 /// use spectrograms::*;
677 /// use non_empty_slice::non_empty_vec;
678 ///
679 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
680 /// let samples = non_empty_vec![0.0; nzu!(16000)];
681 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
682 /// let params = SpectrogramParams::new(stft, 16000.0)?;
683 ///
684 /// let planner = SpectrogramPlanner::new();
685 /// let stft_result = planner.compute_stft(&samples, ¶ms)?;
686 ///
687 /// println!("STFT: {} bins x {} frames", stft_result.n_bins(), stft_result.n_frames());
688 /// # Ok(())
689 /// # }
690 /// ```
691 ///
692 /// # Performance Note
693 ///
694 /// This method creates a new FFT plan each time. For processing multiple
695 /// signals, create a reusable plan with `StftPlan::new()` instead.
696 ///
697 /// # Examples
698 ///
699 /// ```rust
700 /// use spectrograms::*;
701 /// use non_empty_slice::non_empty_vec;
702 ///
703 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
704 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
705 /// let params = SpectrogramParams::new(stft, 16000.0)?;
706 ///
707 /// // One-shot (convenient)
708 /// let planner = SpectrogramPlanner::new();
709 /// let stft_result = planner.compute_stft(&non_empty_vec![0.0; nzu!(16000)], ¶ms)?;
710 ///
711 /// // Reusable plan (efficient for batch)
712 /// let mut plan = StftPlan::new(¶ms)?;
713 /// for signal in &[non_empty_vec![0.0; nzu!(16000)], non_empty_vec![1.0; nzu!(16000)]] {
714 /// let stft = plan.compute(&signal, ¶ms)?;
715 /// }
716 /// # Ok(())
717 /// # }
718 /// ```
719 #[inline]
720 pub fn compute_stft(
721 &self,
722 samples: &NonEmptySlice<f64>,
723 params: &SpectrogramParams,
724 ) -> SpectrogramResult<StftResult> {
725 let mut plan = StftPlan::new(params)?;
726 plan.compute(samples, params)
727 }
728
729 /// Compute the power spectrum of a single audio frame.
730 ///
731 /// This is useful for real-time processing or analyzing individual frames.
732 ///
733 /// # Arguments
734 ///
735 /// * `samples` - Audio frame (length ≤ n_fft, will be zero-padded if shorter)
736 /// * `n_fft` - FFT size
737 /// * `window` - Window type to apply
738 ///
739 /// # Returns
740 ///
741 /// A vector of power values (|X|²) with length `n_fft/2` + 1.
742 ///
743 /// # Automatic Zero-Padding
744 ///
745 /// If the input signal is shorter than `n_fft`, it will be automatically
746 /// zero-padded to the required length.
747 ///
748 /// # Errors
749 ///
750 /// Returns `InvalidInput` error if the input length exceeds `n_fft`.
751 ///
752 /// # Examples
753 ///
754 /// ```
755 /// use spectrograms::*;
756 /// use non_empty_slice::non_empty_vec;
757 ///
758 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
759 /// let frame = non_empty_vec![0.0; nzu!(512)];
760 ///
761 /// let planner = SpectrogramPlanner::new();
762 /// let power = planner.compute_power_spectrum(frame.as_ref(), nzu!(512), WindowType::Hanning)?;
763 ///
764 /// assert_eq!(power.len(), nzu!(257)); // 512/2 + 1
765 /// # Ok(())
766 /// # }
767 /// ```
768 #[inline]
769 pub fn compute_power_spectrum(
770 &self,
771 samples: &NonEmptySlice<f64>,
772 n_fft: NonZeroUsize,
773 window: WindowType,
774 ) -> SpectrogramResult<NonEmptyVec<f64>> {
775 if samples.len() > n_fft {
776 return Err(SpectrogramError::invalid_input(format!(
777 "Input length ({}) exceeds FFT size ({})",
778 samples.len(),
779 n_fft
780 )));
781 }
782
783 let window_samples = make_window(window, n_fft);
784 let out_len = r2c_output_size(n_fft.get());
785
786 // Create FFT plan
787 #[cfg(feature = "realfft")]
788 let mut fft = {
789 let mut planner = crate::RealFftPlanner::new();
790 let plan = planner.get_or_create(n_fft.get());
791 crate::RealFftPlan::new(n_fft.get(), plan)
792 };
793
794 #[cfg(feature = "fftw")]
795 let mut fft = {
796 use std::sync::Arc;
797 let plan = crate::FftwPlanner::build_plan(n_fft.get())?;
798 crate::FftwPlan::new(Arc::new(plan))
799 };
800
801 // Apply window and compute FFT
802 let mut windowed = vec![0.0; n_fft.get()];
803 for i in 0..samples.len().get() {
804 windowed[i] = samples[i] * window_samples[i];
805 }
806 // The rest is already zero-padded
807 let mut fft_out = vec![Complex::new(0.0, 0.0); out_len];
808 fft.process(&windowed, &mut fft_out)?;
809
810 // Convert to power
811 let power: Vec<f64> = fft_out.iter().map(num_complex::Complex::norm_sqr).collect();
812 // safety: power is non-empty since n_fft > 0
813 Ok(unsafe { NonEmptyVec::new_unchecked(power) })
814 }
815
816 /// Compute the magnitude spectrum of a single audio frame.
817 ///
818 /// This is useful for real-time processing or analyzing individual frames.
819 ///
820 /// # Arguments
821 ///
822 /// * `samples` - Audio frame (length ≤ n_fft, will be zero-padded if shorter)
823 /// * `n_fft` - FFT size
824 /// * `window` - Window type to apply
825 ///
826 /// # Returns
827 ///
828 /// A vector of magnitude values (|X|) with length `n_fft/2` + 1.
829 ///
830 /// # Automatic Zero-Padding
831 ///
832 /// If the input signal is shorter than `n_fft`, it will be automatically
833 /// zero-padded to the required length.
834 ///
835 /// # Errors
836 ///
837 /// Returns `InvalidInput` error if the input length exceeds `n_fft`.
838 ///
839 /// # Examples
840 ///
841 /// ```
842 /// use spectrograms::*;
843 /// use non_empty_slice::non_empty_vec;
844 ///
845 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
846 /// let frame = non_empty_vec![0.0; nzu!(512)];
847 ///
848 /// let planner = SpectrogramPlanner::new();
849 /// let magnitude = planner.compute_magnitude_spectrum(frame.as_ref(), nzu!(512), WindowType::Hanning)?;
850 ///
851 /// assert_eq!(magnitude.len(), nzu!(257)); // 512/2 + 1
852 /// # Ok(())
853 /// # }
854 /// ```
855 #[inline]
856 pub fn compute_magnitude_spectrum(
857 &self,
858 samples: &NonEmptySlice<f64>,
859 n_fft: NonZeroUsize,
860 window: WindowType,
861 ) -> SpectrogramResult<NonEmptyVec<f64>> {
862 let power = self.compute_power_spectrum(samples, n_fft, window)?;
863 let power = power.iter().map(|&p| p.sqrt()).collect::<Vec<f64>>();
864 // safety: power is non-empty since power_spectrum returned successfully
865 Ok(unsafe { NonEmptyVec::new_unchecked(power) })
866 }
867
868 /// Build a linear-frequency spectrogram plan.
869 ///
870 /// # Type Parameters
871 ///
872 /// `AmpScale` determines whether output is:
873 /// - Magnitude
874 /// - Power
875 /// - Decibels
876 ///
877 /// # Arguments
878 ///
879 /// * `params` - Spectrogram parameters
880 /// * `db` - Logarithmic scaling parameters (only used if `AmpScale
881 /// is `Decibels`)
882 ///
883 /// # Returns
884 ///
885 /// A `SpectrogramPlan` configured for linear-frequency spectrogram computation.
886 ///
887 /// # Errors
888 ///
889 /// Returns an error if the plan cannot be created due to invalid parameters.
890 #[inline]
891 pub fn linear_plan<AmpScale>(
892 &self,
893 params: &SpectrogramParams,
894 db: Option<&LogParams>, // only used when AmpScale = Decibels
895 ) -> SpectrogramResult<SpectrogramPlan<LinearHz, AmpScale>>
896 where
897 AmpScale: AmpScaleSpec + 'static,
898 {
899 let stft = StftPlan::new(params)?;
900 let mapping = FrequencyMapping::<LinearHz>::new(params)?;
901 let scaling = AmplitudeScaling::<AmpScale>::new(db);
902 let freq_axis = build_frequency_axis::<LinearHz>(params, &mapping);
903
904 let workspace = Workspace::new(stft.n_fft, stft.out_len, mapping.output_bins());
905
906 Ok(SpectrogramPlan {
907 params: params.clone(),
908 stft,
909 mapping,
910 scaling,
911 freq_axis,
912 workspace,
913 _amp: PhantomData,
914 })
915 }
916
917 /// Build a mel-frequency spectrogram plan.
918 ///
919 /// This compiles a mel filterbank matrix and caches it inside the plan.
920 ///
921 /// # Type Parameters
922 ///
923 /// `AmpScale`: determines whether output is:
924 /// - Magnitude
925 /// - Power
926 /// - Decibels
927 ///
928 /// # Arguments
929 ///
930 /// * `params` - Spectrogram parameters
931 /// * `mel` - Mel-specific parameters
932 /// * `db` - Logarithmic scaling parameters (only used if `AmpScale` is `Decibels`)
933 ///
934 /// # Returns
935 ///
936 /// A `SpectrogramPlan` configured for mel spectrogram computation.
937 ///
938 /// # Errors
939 ///
940 /// Returns an error if the plan cannot be created due to invalid parameters.
941 #[inline]
942 pub fn mel_plan<AmpScale>(
943 &self,
944 params: &SpectrogramParams,
945 mel: &MelParams,
946 db: Option<&LogParams>, // only used when AmpScale = Decibels
947 ) -> SpectrogramResult<SpectrogramPlan<Mel, AmpScale>>
948 where
949 AmpScale: AmpScaleSpec + 'static,
950 {
951 // cross-validation: mel range must be compatible with sample rate
952 let nyquist = params.nyquist_hz();
953 if mel.f_max() > nyquist {
954 return Err(SpectrogramError::invalid_input(
955 "mel f_max must be <= Nyquist",
956 ));
957 }
958
959 let stft = StftPlan::new(params)?;
960 let mapping = FrequencyMapping::<Mel>::new_mel(params, mel)?;
961 let scaling = AmplitudeScaling::<AmpScale>::new(db);
962 let freq_axis = build_frequency_axis::<Mel>(params, &mapping);
963
964 let workspace = Workspace::new(stft.n_fft, stft.out_len, mapping.output_bins());
965
966 Ok(SpectrogramPlan {
967 params: params.clone(),
968 stft,
969 mapping,
970 scaling,
971 freq_axis,
972 workspace,
973 _amp: PhantomData,
974 })
975 }
976
977 /// Build an ERB-scale spectrogram plan.
978 ///
979 /// This creates a spectrogram with ERB-spaced frequency bands using gammatone
980 /// filterbank approximation in the frequency domain.
981 ///
982 /// # Type Parameters
983 ///
984 /// `AmpScale`: determines whether output is:
985 /// - Magnitude
986 /// - Power
987 /// - Decibels
988 ///
989 /// # Arguments
990 ///
991 /// * `params` - Spectrogram parameters
992 /// * `erb` - ERB-specific parameters
993 /// * `db` - Logarithmic scaling parameters (only used if `AmpScale` is `Decibels`)
994 ///
995 /// # Returns
996 ///
997 /// A `SpectrogramPlan` configured for ERB spectrogram computation.
998 ///
999 /// # Errors
1000 ///
1001 /// Returns an error if the plan cannot be created due to invalid parameters.
1002 #[inline]
1003 pub fn erb_plan<AmpScale>(
1004 &self,
1005 params: &SpectrogramParams,
1006 erb: &ErbParams,
1007 db: Option<&LogParams>,
1008 ) -> SpectrogramResult<SpectrogramPlan<Erb, AmpScale>>
1009 where
1010 AmpScale: AmpScaleSpec + 'static,
1011 {
1012 // cross-validation: erb range must be compatible with sample rate
1013 let nyquist = params.nyquist_hz();
1014 if erb.f_max() > nyquist {
1015 return Err(SpectrogramError::invalid_input(format!(
1016 "f_max={} exceeds Nyquist={}",
1017 erb.f_max(),
1018 nyquist
1019 )));
1020 }
1021
1022 let stft = StftPlan::new(params)?;
1023 let mapping = FrequencyMapping::<Erb>::new_erb(params, erb)?;
1024 let scaling = AmplitudeScaling::<AmpScale>::new(db);
1025 let freq_axis = build_frequency_axis::<Erb>(params, &mapping);
1026
1027 let workspace = Workspace::new(stft.n_fft, stft.out_len, mapping.output_bins());
1028
1029 Ok(SpectrogramPlan {
1030 params: params.clone(),
1031 stft,
1032 mapping,
1033 scaling,
1034 freq_axis,
1035 workspace,
1036 _amp: PhantomData,
1037 })
1038 }
1039
1040 /// Build a log-frequency plan.
1041 ///
1042 /// This creates a spectrogram with logarithmically-spaced frequency bins.
1043 ///
1044 /// # Type Parameters
1045 ///
1046 /// `AmpScale`: determines whether output is:
1047 /// - Magnitude
1048 /// - Power
1049 /// - Decibels
1050 ///
1051 /// # Arguments
1052 ///
1053 /// * `params` - Spectrogram parameters
1054 /// * `loghz` - LogHz-specific parameters
1055 /// * `db` - Logarithmic scaling parameters (only used if `AmpScale` is `Decibels`)
1056 ///
1057 /// # Returns
1058 ///
1059 /// A `SpectrogramPlan` configured for log-frequency spectrogram computation.
1060 ///
1061 /// # Errors
1062 ///
1063 /// Returns an error if the plan cannot be created due to invalid parameters.
1064 #[inline]
1065 pub fn log_hz_plan<AmpScale>(
1066 &self,
1067 params: &SpectrogramParams,
1068 loghz: &LogHzParams,
1069 db: Option<&LogParams>,
1070 ) -> SpectrogramResult<SpectrogramPlan<LogHz, AmpScale>>
1071 where
1072 AmpScale: AmpScaleSpec + 'static,
1073 {
1074 // cross-validation: loghz range must be compatible with sample rate
1075 let nyquist = params.nyquist_hz();
1076 if loghz.f_max() > nyquist {
1077 return Err(SpectrogramError::invalid_input(format!(
1078 "f_max={} exceeds Nyquist={}",
1079 loghz.f_max(),
1080 nyquist
1081 )));
1082 }
1083
1084 let stft = StftPlan::new(params)?;
1085 let mapping = FrequencyMapping::<LogHz>::new_loghz(params, loghz)?;
1086 let scaling = AmplitudeScaling::<AmpScale>::new(db);
1087 let freq_axis = build_frequency_axis::<LogHz>(params, &mapping);
1088
1089 let workspace = Workspace::new(stft.n_fft, stft.out_len, mapping.output_bins());
1090
1091 Ok(SpectrogramPlan {
1092 params: params.clone(),
1093 stft,
1094 mapping,
1095 scaling,
1096 freq_axis,
1097 workspace,
1098 _amp: PhantomData,
1099 })
1100 }
1101
1102 /// Build a cqt spectrogram plan.
1103 ///
1104 /// # Type Parameters
1105 ///
1106 /// `AmpScale`: determines whether output is:
1107 /// - Magnitude
1108 /// - Power
1109 /// - Decibels
1110 ///
1111 /// # Arguments
1112 ///
1113 /// * `params` - Spectrogram parameters
1114 /// * `cqt` - CQT-specific parameters
1115 /// * `db` - Logarithmic scaling parameters (only used if `AmpScale` is `Decibels`)
1116 ///
1117 /// # Returns
1118 ///
1119 /// A `SpectrogramPlan` configured for CQT spectrogram computation.
1120 ///
1121 /// # Errors
1122 ///
1123 /// Returns an error if the plan cannot be created due to invalid parameters.
1124 #[inline]
1125 pub fn cqt_plan<AmpScale>(
1126 &self,
1127 params: &SpectrogramParams,
1128 cqt: &CqtParams,
1129 db: Option<&LogParams>, // only used when AmpScale = Decibels
1130 ) -> SpectrogramResult<SpectrogramPlan<Cqt, AmpScale>>
1131 where
1132 AmpScale: AmpScaleSpec + 'static,
1133 {
1134 let stft = StftPlan::new(params)?;
1135 let mapping = FrequencyMapping::<Cqt>::new(params, cqt)?;
1136 let scaling = AmplitudeScaling::<AmpScale>::new(db);
1137 let freq_axis = build_frequency_axis::<Cqt>(params, &mapping);
1138
1139 let workspace = Workspace::new(stft.n_fft, stft.out_len, mapping.output_bins());
1140
1141 Ok(SpectrogramPlan {
1142 params: params.clone(),
1143 stft,
1144 mapping,
1145 scaling,
1146 freq_axis,
1147 workspace,
1148 _amp: PhantomData,
1149 })
1150 }
1151}
1152
1153/// STFT plan containing reusable FFT plan and buffers.
1154///
1155/// This struct is responsible for performing the Short-Time Fourier Transform (STFT)
1156/// on audio signals based on the provided parameters.
1157///
1158/// It encapsulates the FFT plan, windowing function, and internal buffers to efficiently
1159/// compute the STFT for multiple frames of audio data.
1160///
1161/// # Fields
1162///
1163/// - `n_fft`: Size of the FFT.
1164/// - `hop_size`: Hop size between consecutive frames.
1165/// - `window`: Windowing function samples.
1166/// - `centre`: Whether to centre the frames with padding.
1167/// - `out_len`: Length of the FFT output.
1168/// - `fft`: Boxed FFT plan for real-to-complex transformation.
1169/// - `fft_out`: Internal buffer for FFT output.
1170/// - `frame`: Internal buffer for windowed audio frames.
1171pub struct StftPlan {
1172 n_fft: NonZeroUsize,
1173 hop_size: NonZeroUsize,
1174 window: NonEmptyVec<f64>,
1175 centre: bool,
1176
1177 out_len: NonZeroUsize,
1178
1179 // FFT plan (reused for all frames)
1180 fft: Box<dyn R2cPlan>,
1181
1182 // internal scratch
1183 fft_out: NonEmptyVec<Complex<f64>>,
1184 frame: NonEmptyVec<f64>,
1185}
1186
1187impl StftPlan {
1188 /// Create a new STFT plan from parameters.
1189 ///
1190 /// # Arguments
1191 ///
1192 /// * `params` - Spectrogram parameters containing STFT config
1193 ///
1194 /// # Returns
1195 ///
1196 /// A new `StftPlan` instance.
1197 ///
1198 /// # Errors
1199 ///
1200 /// Returns an error if the FFT plan cannot be created.
1201 #[inline]
1202 pub fn new(params: &SpectrogramParams) -> SpectrogramResult<Self> {
1203 let stft = params.stft();
1204 let n_fft = stft.n_fft();
1205 let hop_size = stft.hop_size();
1206 let centre = stft.centre();
1207
1208 let window = make_window(stft.window(), n_fft);
1209
1210 let out_len = r2c_output_size(n_fft.get());
1211 let out_len = NonZeroUsize::new(out_len)
1212 .ok_or_else(|| SpectrogramError::invalid_input("FFT output length must be non-zero"))?;
1213
1214 #[cfg(feature = "realfft")]
1215 let fft = {
1216 let mut planner = crate::RealFftPlanner::new();
1217 let plan = planner.get_or_create(n_fft.get());
1218 let plan = crate::RealFftPlan::new(n_fft.get(), plan);
1219 Box::new(plan)
1220 };
1221
1222 #[cfg(feature = "fftw")]
1223 let fft = {
1224 use std::sync::Arc;
1225 let plan = crate::FftwPlanner::build_plan(n_fft.get())?;
1226 Box::new(crate::FftwPlan::new(Arc::new(plan)))
1227 };
1228
1229 Ok(Self {
1230 n_fft,
1231 hop_size,
1232 window,
1233 centre,
1234 out_len,
1235 fft,
1236 fft_out: non_empty_vec![Complex::new(0.0, 0.0); out_len],
1237 frame: non_empty_vec![0.0; n_fft],
1238 })
1239 }
1240
1241 fn frame_count(&self, n_samples: NonZeroUsize) -> SpectrogramResult<NonZeroUsize> {
1242 // Framing policy:
1243 // - centre = true: implicit padding of n_fft/2 on both sides
1244 // - centre = false: no padding
1245 //
1246 // Define the number of frames such that each frame has a valid centre sample position.
1247 let pad = if self.centre { self.n_fft.get() / 2 } else { 0 };
1248 let padded_len = n_samples.get() + 2 * pad;
1249
1250 if padded_len < self.n_fft.get() {
1251 // still produce 1 frame (all padding / partial)
1252 return Ok(nzu!(1));
1253 }
1254
1255 let remaining = padded_len - self.n_fft.get();
1256 let n_frames = remaining / self.hop_size().get() + 1;
1257 let n_frames = NonZeroUsize::new(n_frames).ok_or_else(|| {
1258 SpectrogramError::invalid_input("computed number of frames must be non-zero")
1259 })?;
1260 Ok(n_frames)
1261 }
1262
1263 /// Compute one frame FFT using internal buffers only.
1264 fn compute_frame_fft_simple(
1265 &mut self,
1266 samples: &NonEmptySlice<f64>,
1267 frame_idx: usize,
1268 ) -> SpectrogramResult<()> {
1269 let out = self.frame.as_mut_slice();
1270 debug_assert_eq!(out.len(), self.n_fft.get());
1271
1272 let pad = if self.centre { self.n_fft.get() / 2 } else { 0 };
1273 let start = frame_idx
1274 .checked_mul(self.hop_size.get())
1275 .ok_or_else(|| SpectrogramError::invalid_input("frame index overflow"))?;
1276
1277 // Fill windowed frame
1278 for (i, sample) in out.iter_mut().enumerate().take(self.n_fft.get()) {
1279 let v_idx = start + i;
1280 let s_idx = v_idx as isize - pad as isize;
1281
1282 let sample_val = if s_idx < 0 || (s_idx as usize) >= samples.len().get() {
1283 0.0
1284 } else {
1285 samples[s_idx as usize]
1286 };
1287 *sample = sample_val * self.window[i];
1288 }
1289
1290 // Compute FFT
1291 let fft_out = self.fft_out.as_mut_slice();
1292 self.fft.process(out, fft_out)?;
1293
1294 Ok(())
1295 }
1296
1297 /// Compute one frame spectrum into workspace:
1298 /// - fills windowed frame
1299 /// - runs FFT
1300 /// - converts to magnitude/power based on `AmpScale` later
1301 fn compute_frame_spectrum(
1302 &mut self,
1303 samples: &NonEmptySlice<f64>,
1304 frame_idx: usize,
1305 workspace: &mut Workspace,
1306 ) -> SpectrogramResult<()> {
1307 let out = workspace.frame.as_mut_slice();
1308
1309 // self.fill_frame(samples, frame_idx, frame)?;
1310 debug_assert_eq!(out.len(), self.n_fft.get());
1311
1312 let pad = if self.centre { self.n_fft.get() / 2 } else { 0 };
1313 let start = frame_idx
1314 .checked_mul(self.hop_size().get())
1315 .ok_or_else(|| SpectrogramError::invalid_input("frame index overflow"))?;
1316
1317 // The "virtual" signal is samples with pad zeros on both sides.
1318 // Virtual index 0..padded_len
1319 // Map virtual index to original samples by subtracting pad.
1320 for (i, sample) in out.iter_mut().enumerate().take(self.n_fft.get()) {
1321 let v_idx = start + i;
1322 let s_idx = v_idx as isize - pad as isize;
1323
1324 let sample_val = if s_idx < 0 || (s_idx as usize) >= samples.len().get() {
1325 0.0
1326 } else {
1327 samples[s_idx as usize]
1328 };
1329
1330 *sample = sample_val * self.window[i];
1331 }
1332 let fft_out = workspace.fft_out.as_mut_slice();
1333 // FFT
1334 self.fft.process(out, fft_out)?;
1335
1336 // Convert complex spectrum to linear magnitude OR power here? No:
1337 // Keep "spectrum" as power by default? That would entangle semantics.
1338 //
1339 // Instead, we store magnitude^2 (power) as the canonical intermediate,
1340 // and let AmpScale decide later whether output is magnitude or power.
1341 //
1342 // This is consistent and avoids recomputing norms multiple times.
1343 for (i, c) in workspace.fft_out.iter().enumerate() {
1344 workspace.spectrum[i] = c.norm_sqr();
1345 }
1346
1347 Ok(())
1348 }
1349
1350 /// Fill a time-domain frame WITHOUT applying the window.
1351 ///
1352 /// This is used for CQT mapping, where the CQT kernels already contain
1353 /// windowing applied during kernel generation. Applying the STFT window
1354 /// would result in double-windowing.
1355 ///
1356 /// # Arguments
1357 ///
1358 /// * `samples` - Input audio samples
1359 /// * `frame_idx` - Frame index
1360 /// * `workspace` - Workspace containing the frame buffer to fill
1361 ///
1362 /// # Errors
1363 ///
1364 /// Returns an error if frame index is out of bounds.
1365 fn fill_frame_unwindowed(
1366 &self,
1367 samples: &NonEmptySlice<f64>,
1368 frame_idx: usize,
1369 workspace: &mut Workspace,
1370 ) -> SpectrogramResult<()> {
1371 let out = workspace.frame.as_mut_slice();
1372 debug_assert_eq!(out.len(), self.n_fft.get());
1373
1374 let pad = if self.centre { self.n_fft.get() / 2 } else { 0 };
1375 let start = frame_idx
1376 .checked_mul(self.hop_size().get())
1377 .ok_or_else(|| SpectrogramError::invalid_input("frame index overflow"))?;
1378
1379 // Fill frame WITHOUT windowing
1380 for (i, sample) in out.iter_mut().enumerate().take(self.n_fft.get()) {
1381 let v_idx = start + i;
1382 let s_idx = v_idx as isize - pad as isize;
1383
1384 let sample_val = if s_idx < 0 || (s_idx as usize) >= samples.len().get() {
1385 0.0
1386 } else {
1387 samples[s_idx as usize]
1388 };
1389
1390 // No window multiplication - just copy the sample
1391 *sample = sample_val;
1392 }
1393
1394 Ok(())
1395 }
1396
1397 /// Compute the full STFT for a signal, returning an `StftResult`.
1398 ///
1399 /// This is a convenience method that handles frame iteration and
1400 /// builds the complete STFT matrix.
1401 ///
1402 ///
1403 /// # Arguments
1404 ///
1405 /// * `samples` - Input audio samples
1406 /// * `params` - STFT computation parameters
1407 ///
1408 /// # Returns
1409 ///
1410 /// An `StftResult` containing the complex STFT matrix and metadata.
1411 ///
1412 /// # Errors
1413 ///
1414 /// Returns an error if computation fails.
1415 ///
1416 /// # Examples
1417 ///
1418 /// ```rust
1419 /// use spectrograms::*;
1420 /// use non_empty_slice::non_empty_vec;
1421 ///
1422 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1423 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
1424 /// let params = SpectrogramParams::new(stft, 16000.0)?;
1425 /// let mut plan = StftPlan::new(¶ms)?;
1426 ///
1427 /// let samples = non_empty_vec![0.0; nzu!(16000)];
1428 /// let stft_result = plan.compute(&samples, ¶ms)?;
1429 ///
1430 /// println!("STFT: {} bins x {} frames", stft_result.n_bins(), stft_result.n_frames());
1431 /// # Ok(())
1432 /// # }
1433 /// ```
1434 #[inline]
1435 pub fn compute(
1436 &mut self,
1437 samples: &NonEmptySlice<f64>,
1438 params: &SpectrogramParams,
1439 ) -> SpectrogramResult<StftResult> {
1440 let n_frames = self.frame_count(samples.len())?;
1441 let n_bins = self.out_len;
1442
1443 // Allocate output matrix (frequency_bins x time_frames)
1444 let mut data = Array2::<Complex<f64>>::zeros((n_bins.get(), n_frames.get()));
1445
1446 // Compute each frame
1447 for frame_idx in 0..n_frames.get() {
1448 self.compute_frame_fft_simple(samples, frame_idx)?;
1449
1450 // Copy from internal buffer to output
1451 for (bin_idx, &value) in self.fft_out.iter().enumerate() {
1452 data[[bin_idx, frame_idx]] = value;
1453 }
1454 }
1455
1456 // Build frequency axis
1457 let frequencies: Vec<f64> = (0..n_bins.get())
1458 .map(|k| k as f64 * params.sample_rate_hz() / params.stft().n_fft().get() as f64)
1459 .collect();
1460 // SAFETY: n_bins > 0
1461 let frequencies = unsafe { NonEmptyVec::new_unchecked(frequencies) };
1462
1463 Ok(StftResult {
1464 data,
1465 frequencies,
1466 sample_rate: params.sample_rate_hz(),
1467 params: params.stft().clone(),
1468 })
1469 }
1470
1471 /// Compute a single frame of STFT, returning the complex spectrum.
1472 ///
1473 /// This is useful for streaming/online processing where you want to
1474 /// process audio frame-by-frame.
1475 ///
1476 /// # Arguments
1477 ///
1478 /// * `samples` - Input audio samples
1479 /// * `frame_idx` - Index of the frame to compute
1480 ///
1481 /// # Returns
1482 ///
1483 /// A `NonEmptyVec` containing the complex spectrum for the specified frame.
1484 ///
1485 /// # Errors
1486 ///
1487 /// Returns an error if computation fails.
1488 ///
1489 /// # Examples
1490 ///
1491 /// ```rust
1492 /// use spectrograms::*;
1493 /// use non_empty_slice::non_empty_vec;
1494 ///
1495 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1496 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
1497 /// let params = SpectrogramParams::new(stft, 16000.0)?;
1498 /// let mut plan = StftPlan::new(¶ms)?;
1499 ///
1500 /// let samples = non_empty_vec![0.0; nzu!(16000)];
1501 /// let (_, n_frames) = plan.output_shape(samples.len())?;
1502 ///
1503 /// for frame_idx in 0..n_frames.get() {
1504 /// let spectrum = plan.compute_frame_simple(&samples, frame_idx)?;
1505 /// // Process spectrum...
1506 /// }
1507 /// # Ok(())
1508 /// # }
1509 /// ```
1510 #[inline]
1511 pub fn compute_frame_simple(
1512 &mut self,
1513 samples: &NonEmptySlice<f64>,
1514 frame_idx: usize,
1515 ) -> SpectrogramResult<NonEmptyVec<Complex<f64>>> {
1516 self.compute_frame_fft_simple(samples, frame_idx)?;
1517 Ok(self.fft_out.clone())
1518 }
1519
1520 /// Compute STFT into a pre-allocated buffer.
1521 ///
1522 /// This avoids allocating the output matrix, useful for reusing buffers.
1523 ///
1524 /// # Arguments
1525 ///
1526 /// * `samples` - Input audio samples
1527 /// * `output` - Pre-allocated output buffer (shape: `n_bins` x `n_frames`)
1528 ///
1529 /// # Returns
1530 ///
1531 /// `Ok(())` on success, or an error if dimensions mismatch.
1532 ///
1533 /// # Errors
1534 ///
1535 /// Returns `DimensionMismatch` error if the output buffer has incorrect shape.
1536 ///
1537 /// # Examples
1538 ///
1539 /// ```rust
1540 /// use spectrograms::*;
1541 /// use ndarray::Array2;
1542 /// use num_complex::Complex;
1543 /// use non_empty_slice::non_empty_vec;
1544 ///
1545 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1546 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
1547 /// let params = SpectrogramParams::new(stft, 16000.0)?;
1548 /// let mut plan = StftPlan::new(¶ms)?;
1549 ///
1550 /// let samples = non_empty_vec![0.0; nzu!(16000)];
1551 /// let (n_bins, n_frames) = plan.output_shape(samples.len())?;
1552 /// let mut output = Array2::<Complex<f64>>::zeros((n_bins.get(), n_frames.get()));
1553 ///
1554 /// plan.compute_into(&samples, &mut output)?;
1555 /// # Ok(())
1556 /// # }
1557 /// ```
1558 #[inline]
1559 pub fn compute_into(
1560 &mut self,
1561 samples: &NonEmptySlice<f64>,
1562 output: &mut Array2<Complex<f64>>,
1563 ) -> SpectrogramResult<()> {
1564 let n_frames = self.frame_count(samples.len())?;
1565 let n_bins = self.out_len;
1566
1567 // Validate output dimensions
1568 if output.nrows() != n_bins.get() {
1569 return Err(SpectrogramError::dimension_mismatch(
1570 n_bins.get(),
1571 output.nrows(),
1572 ));
1573 }
1574 if output.ncols() != n_frames.get() {
1575 return Err(SpectrogramError::dimension_mismatch(
1576 n_frames.get(),
1577 output.ncols(),
1578 ));
1579 }
1580
1581 // Compute into pre-allocated buffer
1582 for frame_idx in 0..n_frames.get() {
1583 self.compute_frame_fft_simple(samples, frame_idx)?;
1584
1585 for (bin_idx, &value) in self.fft_out.iter().enumerate() {
1586 output[[bin_idx, frame_idx]] = value;
1587 }
1588 }
1589
1590 Ok(())
1591 }
1592
1593 /// Get the expected output dimensions for a given signal length.
1594 ///
1595 /// # Arguments
1596 ///
1597 /// * `signal_length` - Length of the input signal in samples
1598 ///
1599 /// # Returns
1600 ///
1601 /// A tuple `(n_frequency_bins, n_time_frames)`.
1602 ///
1603 /// # Errors
1604 ///
1605 /// Returns an error if the computed number of frames is invalid.
1606 #[inline]
1607 pub fn output_shape(
1608 &self,
1609 signal_length: NonZeroUsize,
1610 ) -> SpectrogramResult<(NonZeroUsize, NonZeroUsize)> {
1611 let n_frames = self.frame_count(signal_length)?;
1612 Ok((self.out_len, n_frames))
1613 }
1614
1615 /// Get the number of frequency bins in the output.
1616 ///
1617 /// # Returns
1618 ///
1619 /// The number of frequency bins.
1620 #[inline]
1621 #[must_use]
1622 pub const fn n_bins(&self) -> NonZeroUsize {
1623 self.out_len
1624 }
1625
1626 /// Get the FFT size.
1627 ///
1628 /// # Returns
1629 ///
1630 /// The FFT size.
1631 #[inline]
1632 #[must_use]
1633 pub const fn n_fft(&self) -> NonZeroUsize {
1634 self.n_fft
1635 }
1636
1637 /// Get the hop size.
1638 ///
1639 /// # Returns
1640 ///
1641 /// The hop size.
1642 #[inline]
1643 #[must_use]
1644 pub const fn hop_size(&self) -> NonZeroUsize {
1645 self.hop_size
1646 }
1647}
1648
1649#[derive(Debug, Clone)]
1650enum MappingKind {
1651 Identity {
1652 out_len: NonZeroUsize,
1653 },
1654 Mel {
1655 matrix: SparseMatrix,
1656 }, // shape: (n_mels, out_len)
1657 LogHz {
1658 matrix: SparseMatrix,
1659 frequencies: NonEmptyVec<f64>,
1660 }, // shape: (n_bins, out_len)
1661 Erb {
1662 filterbank: ErbFilterbank,
1663 },
1664 Cqt {
1665 kernel: CqtKernel,
1666 },
1667}
1668
1669impl MappingKind {
1670 /// Check if this mapping requires unwindowed time-domain frames.
1671 ///
1672 /// CQT kernels already contain windowing applied during kernel generation,
1673 /// so they should receive unwindowed frames to avoid double-windowing.
1674 /// All other mappings work on FFT spectra and don't use the time-domain frame.
1675 const fn needs_unwindowed_frame(&self) -> bool {
1676 matches!(self, Self::Cqt { .. })
1677 }
1678}
1679
1680/// Typed mapping wrapper.
1681#[derive(Debug, Clone)]
1682struct FrequencyMapping<FreqScale> {
1683 kind: MappingKind,
1684 _marker: PhantomData<FreqScale>,
1685}
1686
1687impl FrequencyMapping<LinearHz> {
1688 fn new(params: &SpectrogramParams) -> SpectrogramResult<Self> {
1689 let out_len = r2c_output_size(params.stft().n_fft().get());
1690 let out_len = NonZeroUsize::new(out_len)
1691 .ok_or_else(|| SpectrogramError::invalid_input("FFT output length must be non-zero"))?;
1692 Ok(Self {
1693 kind: MappingKind::Identity { out_len },
1694 _marker: PhantomData,
1695 })
1696 }
1697}
1698
1699impl FrequencyMapping<Mel> {
1700 fn new_mel(params: &SpectrogramParams, mel: &MelParams) -> SpectrogramResult<Self> {
1701 let n_fft = params.stft().n_fft();
1702 let out_len = r2c_output_size(n_fft.get());
1703 let out_len = NonZeroUsize::new(out_len)
1704 .ok_or_else(|| SpectrogramError::invalid_input("FFT output length must be non-zero"))?;
1705
1706 // Validate: mel bins must be <= something sensible
1707 if mel.n_mels() > nzu!(10_000) {
1708 return Err(SpectrogramError::invalid_input(
1709 "n_mels is unreasonably large",
1710 ));
1711 }
1712
1713 let matrix = build_mel_filterbank_matrix(
1714 params.sample_rate_hz(),
1715 n_fft,
1716 mel.n_mels(),
1717 mel.f_min(),
1718 mel.f_max(),
1719 mel.norm(),
1720 )?;
1721
1722 // matrix must be (n_mels, out_len)
1723 if matrix.nrows() != mel.n_mels().get() || matrix.ncols() != out_len.get() {
1724 return Err(SpectrogramError::invalid_input(
1725 "mel filterbank matrix shape mismatch",
1726 ));
1727 }
1728
1729 Ok(Self {
1730 kind: MappingKind::Mel { matrix },
1731 _marker: PhantomData,
1732 })
1733 }
1734}
1735
1736impl FrequencyMapping<LogHz> {
1737 fn new_loghz(params: &SpectrogramParams, loghz: &LogHzParams) -> SpectrogramResult<Self> {
1738 let n_fft = params.stft().n_fft();
1739 let out_len = r2c_output_size(n_fft.get());
1740 let out_len = NonZeroUsize::new(out_len)
1741 .ok_or_else(|| SpectrogramError::invalid_input("FFT output length must be non-zero"))?;
1742 // Validate: n_bins must be <= something sensible
1743 if loghz.n_bins() > nzu!(10_000) {
1744 return Err(SpectrogramError::invalid_input(
1745 "n_bins is unreasonably large",
1746 ));
1747 }
1748
1749 let (matrix, frequencies) = build_loghz_matrix(
1750 params.sample_rate_hz(),
1751 n_fft,
1752 loghz.n_bins(),
1753 loghz.f_min(),
1754 loghz.f_max(),
1755 )?;
1756
1757 // matrix must be (n_bins, out_len)
1758 if matrix.nrows() != loghz.n_bins().get() || matrix.ncols() != out_len.get() {
1759 return Err(SpectrogramError::invalid_input(
1760 "loghz matrix shape mismatch",
1761 ));
1762 }
1763
1764 Ok(Self {
1765 kind: MappingKind::LogHz {
1766 matrix,
1767 frequencies,
1768 },
1769 _marker: PhantomData,
1770 })
1771 }
1772}
1773
1774impl FrequencyMapping<Erb> {
1775 fn new_erb(params: &SpectrogramParams, erb: &crate::erb::ErbParams) -> SpectrogramResult<Self> {
1776 let n_fft = params.stft().n_fft();
1777 let sample_rate = params.sample_rate_hz();
1778
1779 // Validate: n_filters must be <= something sensible
1780 if erb.n_filters() > nzu!(10_000) {
1781 return Err(SpectrogramError::invalid_input(
1782 "n_filters is unreasonably large",
1783 ));
1784 }
1785
1786 // Generate ERB filterbank with pre-computed frequency responses
1787 let filterbank = crate::erb::ErbFilterbank::generate(erb, sample_rate, n_fft)?;
1788
1789 Ok(Self {
1790 kind: MappingKind::Erb { filterbank },
1791 _marker: PhantomData,
1792 })
1793 }
1794}
1795
1796impl FrequencyMapping<Cqt> {
1797 fn new(params: &SpectrogramParams, cqt: &CqtParams) -> SpectrogramResult<Self> {
1798 let sample_rate = params.sample_rate_hz();
1799 let n_fft = params.stft().n_fft();
1800
1801 // Validate that frequency range is reasonable
1802 let f_max = cqt.bin_frequency(cqt.num_bins().get().saturating_sub(1));
1803 if f_max >= sample_rate / 2.0 {
1804 return Err(SpectrogramError::invalid_input(
1805 "CQT maximum frequency must be below Nyquist frequency",
1806 ));
1807 }
1808
1809 // Generate CQT kernel using n_fft as the signal length for kernel generation
1810 let kernel = CqtKernel::generate(cqt, sample_rate, n_fft);
1811
1812 Ok(Self {
1813 kind: MappingKind::Cqt { kernel },
1814 _marker: PhantomData,
1815 })
1816 }
1817}
1818
1819impl<FreqScale> FrequencyMapping<FreqScale> {
1820 const fn output_bins(&self) -> NonZeroUsize {
1821 // safety: all variants ensure output bins > 0 OR rely on a matrix that is guaranteed to have rows > 0
1822 match &self.kind {
1823 MappingKind::Identity { out_len } => *out_len,
1824 // safety: matrix.nrows() > 0
1825 MappingKind::LogHz { matrix, .. } | MappingKind::Mel { matrix } => unsafe {
1826 NonZeroUsize::new_unchecked(matrix.nrows())
1827 },
1828 MappingKind::Erb { filterbank, .. } => filterbank.num_filters(),
1829 MappingKind::Cqt { kernel, .. } => kernel.num_bins(),
1830 }
1831 }
1832
1833 fn apply(
1834 &self,
1835 spectrum: &NonEmptySlice<f64>,
1836 frame: &NonEmptySlice<f64>,
1837 out: &mut NonEmptySlice<f64>,
1838 ) -> SpectrogramResult<()> {
1839 match &self.kind {
1840 MappingKind::Identity { out_len } => {
1841 if spectrum.len() != *out_len {
1842 return Err(SpectrogramError::dimension_mismatch(
1843 (*out_len).get(),
1844 spectrum.len().get(),
1845 ));
1846 }
1847 if out.len() != *out_len {
1848 return Err(SpectrogramError::dimension_mismatch(
1849 (*out_len).get(),
1850 out.len().get(),
1851 ));
1852 }
1853 out.copy_from_slice(spectrum);
1854 Ok(())
1855 }
1856 MappingKind::LogHz { matrix, .. } | MappingKind::Mel { matrix } => {
1857 let out_bins = matrix.nrows();
1858 let in_bins = matrix.ncols();
1859
1860 if spectrum.len().get() != in_bins {
1861 return Err(SpectrogramError::dimension_mismatch(
1862 in_bins,
1863 spectrum.len().get(),
1864 ));
1865 }
1866 if out.len().get() != out_bins {
1867 return Err(SpectrogramError::dimension_mismatch(
1868 out_bins,
1869 out.len().get(),
1870 ));
1871 }
1872
1873 // Sparse matrix-vector multiplication: out = matrix * spectrum
1874 matrix.multiply_vec(spectrum, out);
1875 Ok(())
1876 }
1877 MappingKind::Erb { filterbank } => {
1878 // Apply ERB filterbank using pre-computed frequency responses
1879 // The filterbank already has |H(f)|^2 pre-computed, so we just
1880 // apply it to the power spectrum
1881 let erb_out = filterbank.apply_to_power_spectrum(spectrum)?;
1882
1883 if out.len().get() != erb_out.len().get() {
1884 return Err(SpectrogramError::dimension_mismatch(
1885 erb_out.len().get(),
1886 out.len().get(),
1887 ));
1888 }
1889
1890 out.copy_from_slice(&erb_out);
1891 Ok(())
1892 }
1893 MappingKind::Cqt { kernel } => {
1894 // CQT works on time-domain unwindowed frame (not FFT spectrum).
1895 // The CQT kernels contain windowing applied during kernel generation,
1896 // so the input frame should be unwindowed to avoid double-windowing.
1897 let cqt_complex = kernel.apply(frame)?;
1898
1899 if out.len().get() != cqt_complex.len().get() {
1900 return Err(SpectrogramError::dimension_mismatch(
1901 cqt_complex.len().get(),
1902 out.len().get(),
1903 ));
1904 }
1905
1906 // Convert complex coefficients to power (|z|^2)
1907 // This matches the convention where intermediate values are in power domain
1908 for (i, c) in cqt_complex.iter().enumerate() {
1909 out[i] = c.norm_sqr();
1910 }
1911
1912 Ok(())
1913 }
1914 }
1915 }
1916
1917 fn frequencies_hz(&self, params: &SpectrogramParams) -> NonEmptyVec<f64> {
1918 match &self.kind {
1919 MappingKind::Identity { out_len } => {
1920 // Standard R2C bins: k * sr / n_fft
1921 let n_fft = params.stft().n_fft().get() as f64;
1922 let sr = params.sample_rate_hz();
1923 let df = sr / n_fft;
1924
1925 let mut f = Vec::with_capacity((*out_len).get());
1926 for k in 0..(*out_len).get() {
1927 f.push(k as f64 * df);
1928 }
1929 // safety: out_len > 0
1930 unsafe { NonEmptyVec::new_unchecked(f) }
1931 }
1932 MappingKind::Mel { matrix } => {
1933 // For mel, the axis is defined by the mel band centre frequencies.
1934 // We compute and store them consistently with how we built the filterbank.
1935 let n_mels = matrix.nrows();
1936 // safety: n_mels > 0
1937 let n_mels = unsafe { NonZeroUsize::new_unchecked(n_mels) };
1938 mel_band_centres_hz(n_mels, params.sample_rate_hz(), params.nyquist_hz())
1939 }
1940 MappingKind::LogHz { frequencies, .. } => {
1941 // Frequencies are stored when the mapping is created
1942 frequencies.clone()
1943 }
1944 MappingKind::Erb { filterbank, .. } => {
1945 // ERB center frequencies
1946 filterbank.center_frequencies().to_non_empty_vec()
1947 }
1948 MappingKind::Cqt { kernel, .. } => {
1949 // CQT center frequencies from the kernel
1950 kernel.frequencies().to_non_empty_vec()
1951 }
1952 }
1953 }
1954}
1955
1956//
1957// ========================
1958// Amplitude scaling
1959// ========================
1960//
1961
1962/// Marker trait so we can specialise behaviour by `AmpScale`.
1963pub trait AmpScaleSpec: Sized + Send + Sync {
1964 /// Apply conversion from power-domain value to the desired amplitude scale.
1965 ///
1966 /// # Arguments
1967 ///
1968 /// - `power`: input power-domain value.
1969 ///
1970 /// # Returns
1971 ///
1972 /// Converted amplitude value.
1973 fn apply_from_power(power: f64) -> f64;
1974
1975 /// Apply dB conversion in-place on a power-domain vector.
1976 ///
1977 /// This is a no-op for Power and Magnitude scales.
1978 ///
1979 /// # Arguments
1980 ///
1981 /// - `x`: power-domain values to convert to dB in-place.
1982 /// - `floor_db`: dB floor value to apply.
1983 ///
1984 /// # Returns
1985 ///
1986 /// `SpectrogramResult<()>`: Ok on success, error on invalid input.
1987 ///
1988 /// # Errors
1989 ///
1990 /// - If `floor_db` is not finite.
1991 fn apply_db_in_place(x: &mut [f64], floor_db: f64) -> SpectrogramResult<()>;
1992}
1993
1994impl AmpScaleSpec for Power {
1995 #[inline]
1996 fn apply_from_power(power: f64) -> f64 {
1997 power
1998 }
1999
2000 #[inline]
2001 fn apply_db_in_place(_x: &mut [f64], _floor_db: f64) -> SpectrogramResult<()> {
2002 Ok(())
2003 }
2004}
2005
2006impl AmpScaleSpec for Magnitude {
2007 #[inline]
2008 fn apply_from_power(power: f64) -> f64 {
2009 power.sqrt()
2010 }
2011
2012 #[inline]
2013 fn apply_db_in_place(_x: &mut [f64], _floor_db: f64) -> SpectrogramResult<()> {
2014 Ok(())
2015 }
2016}
2017
2018impl AmpScaleSpec for Decibels {
2019 #[inline]
2020 fn apply_from_power(power: f64) -> f64 {
2021 // dB conversion is applied in batch, not here.
2022 power
2023 }
2024
2025 #[inline]
2026 fn apply_db_in_place(x: &mut [f64], floor_db: f64) -> SpectrogramResult<()> {
2027 // Convert power -> dB: 10*log10(max(power, eps))
2028 // where eps is derived from floor_db to ensure consistency
2029 if !floor_db.is_finite() {
2030 return Err(SpectrogramError::invalid_input("floor_db must be finite"));
2031 }
2032
2033 // Convert floor_db to linear scale to get epsilon
2034 // e.g., floor_db = -80 dB -> eps = 10^(-80/10) = 1e-8
2035 let eps = 10.0_f64.powf(floor_db / 10.0);
2036
2037 for v in x.iter_mut() {
2038 // Clamp power to epsilon before log to avoid log(0) and ensure floor
2039 *v = 10.0 * v.max(eps).log10();
2040 }
2041 Ok(())
2042 }
2043}
2044
2045/// Amplitude scaling configuration.
2046///
2047/// This handles conversion from power-domain intermediate to the desired amplitude scale (Power, Magnitude, Decibels).
2048#[derive(Debug, Clone)]
2049struct AmplitudeScaling<AmpScale> {
2050 db_floor: Option<f64>,
2051 _marker: PhantomData<AmpScale>,
2052}
2053
2054impl<AmpScale> AmplitudeScaling<AmpScale>
2055where
2056 AmpScale: AmpScaleSpec + 'static,
2057{
2058 fn new(db: Option<&LogParams>) -> Self {
2059 let db_floor = db.map(LogParams::floor_db);
2060 Self {
2061 db_floor,
2062 _marker: PhantomData,
2063 }
2064 }
2065
2066 /// Apply amplitude scaling in-place on a mapped spectrum vector.
2067 ///
2068 /// The input vector is assumed to be in the *power* domain (|X|^2),
2069 /// because the STFT stage produces power as the canonical intermediate.
2070 ///
2071 /// - Power: leaves values unchanged.
2072 /// - Magnitude: sqrt(power).
2073 /// - Decibels: converts power -> dB and floors at `db_floor`.
2074 pub fn apply_in_place(&self, x: &mut [f64]) -> SpectrogramResult<()> {
2075 // Convert from canonical power-domain intermediate into the requested linear domain.
2076 for v in x.iter_mut() {
2077 *v = AmpScale::apply_from_power(*v);
2078 }
2079
2080 // Apply dB conversion if configured (no-op for Power/Magnitude via trait impls).
2081 if let Some(floor_db) = self.db_floor {
2082 AmpScale::apply_db_in_place(x, floor_db)?;
2083 }
2084
2085 Ok(())
2086 }
2087}
2088
2089#[derive(Debug, Clone)]
2090struct Workspace {
2091 spectrum: NonEmptyVec<f64>, // out_len (power spectrum)
2092 mapped: NonEmptyVec<f64>, // n_bins (after mapping)
2093 frame: NonEmptyVec<f64>, // n_fft (windowed frame for FFT)
2094 fft_out: NonEmptyVec<Complex<f64>>, // out_len (FFT output)
2095}
2096
2097impl Workspace {
2098 fn new(n_fft: NonZeroUsize, out_len: NonZeroUsize, n_bins: NonZeroUsize) -> Self {
2099 Self {
2100 spectrum: non_empty_vec![0.0; out_len],
2101 mapped: non_empty_vec![0.0; n_bins],
2102 frame: non_empty_vec![0.0; n_fft],
2103 fft_out: non_empty_vec![Complex::new(0.0, 0.0); out_len],
2104 }
2105 }
2106
2107 fn ensure_sizes(&mut self, n_fft: NonZeroUsize, out_len: NonZeroUsize, n_bins: NonZeroUsize) {
2108 if self.spectrum.len() != out_len {
2109 self.spectrum.resize(out_len, 0.0);
2110 }
2111 if self.mapped.len() != n_bins {
2112 self.mapped.resize(n_bins, 0.0);
2113 }
2114 if self.frame.len() != n_fft {
2115 self.frame.resize(n_fft, 0.0);
2116 }
2117 if self.fft_out.len() != out_len {
2118 self.fft_out.resize(out_len, Complex::new(0.0, 0.0));
2119 }
2120 }
2121}
2122
2123fn build_frequency_axis<FreqScale>(
2124 params: &SpectrogramParams,
2125 mapping: &FrequencyMapping<FreqScale>,
2126) -> FrequencyAxis<FreqScale>
2127where
2128 FreqScale: Copy + Clone + 'static,
2129{
2130 let frequencies = mapping.frequencies_hz(params);
2131 FrequencyAxis::new(frequencies)
2132}
2133
2134fn build_time_axis_seconds(params: &SpectrogramParams, n_frames: NonZeroUsize) -> NonEmptyVec<f64> {
2135 let dt = params.frame_period_seconds();
2136 let mut times = Vec::with_capacity(n_frames.get());
2137
2138 for i in 0..n_frames.get() {
2139 times.push(i as f64 * dt);
2140 }
2141
2142 // safety: times is guaranteed non-empty since n_frames > 0
2143
2144 unsafe { NonEmptyVec::new_unchecked(times) }
2145}
2146
2147/// Generate window function samples.
2148///
2149/// Supports various window types including Rectangular, Hanning, Hamming, Blackman, Kaiser, and Gaussian.
2150///
2151/// # Arguments
2152///
2153/// * `window` - The type of window function to generate.
2154/// * `n_fft` - The size of the FFT, which determines the length of the window.
2155///
2156/// # Returns
2157///
2158/// A `NonEmptyVec<f64>` containing the window function samples.
2159///
2160/// # Panics
2161///
2162/// Panics if a custom window is provided with a size that does not match `n_fft`.
2163#[inline]
2164#[must_use]
2165pub fn make_window(window: WindowType, n_fft: NonZeroUsize) -> NonEmptyVec<f64> {
2166 let n_fft = n_fft.get();
2167 let mut w = vec![0.0; n_fft];
2168
2169 match window {
2170 WindowType::Rectangular => {
2171 w.fill(1.0);
2172 }
2173 WindowType::Hanning => {
2174 // Hann: 0.5 - 0.5*cos(2Ï€n/(N-1))
2175 let n1 = (n_fft - 1) as f64;
2176 for (n, v) in w.iter_mut().enumerate() {
2177 *v = 0.5f64.mul_add(-(2.0 * std::f64::consts::PI * (n as f64) / n1).cos(), 0.5);
2178 }
2179 }
2180 WindowType::Hamming => {
2181 // Hamming: 0.54 - 0.46*cos(2Ï€n/(N-1))
2182 let n1 = (n_fft - 1) as f64;
2183 for (n, v) in w.iter_mut().enumerate() {
2184 *v = 0.46f64.mul_add(-(2.0 * std::f64::consts::PI * (n as f64) / n1).cos(), 0.54);
2185 }
2186 }
2187 WindowType::Blackman => {
2188 // Blackman: 0.42 - 0.5*cos(2Ï€n/(N-1)) + 0.08*cos(4Ï€n/(N-1))
2189 let n1 = (n_fft - 1) as f64;
2190 for (n, v) in w.iter_mut().enumerate() {
2191 let a = 2.0 * std::f64::consts::PI * (n as f64) / n1;
2192 *v = 0.08f64.mul_add((2.0 * a).cos(), 0.5f64.mul_add(-a.cos(), 0.42));
2193 }
2194 }
2195 WindowType::Kaiser { beta } => {
2196 (0..n_fft).for_each(|i| {
2197 let n = i as f64;
2198 let n_max: f64 = (n_fft - 1) as f64;
2199 let alpha: f64 = (n - n_max / 2.0) / (n_max / 2.0);
2200 let bessel_arg = beta * alpha.mul_add(-alpha, 1.0).sqrt();
2201 // Simplified approximation of modified Bessel function
2202 let x = 1.0
2203 + bessel_arg / 2.0
2204 // Normalize by I0(beta) approximation
2205 / (1.0 + beta / 2.0);
2206 w[i] = x;
2207 });
2208 }
2209 WindowType::Gaussian { std } => (0..n_fft).for_each(|i| {
2210 let n = i as f64;
2211 let center: f64 = (n_fft - 1) as f64 / 2.0;
2212 let exponent: f64 = -0.5 * ((n - center) / std).powi(2);
2213 w[i] = exponent.exp();
2214 }),
2215 WindowType::Custom { coefficients, size } => {
2216 assert!(
2217 size.get() == n_fft,
2218 "Custom window size mismatch: expected {}, got {}. \
2219 Custom windows must be pre-computed with the exact FFT size.",
2220 n_fft,
2221 size.get()
2222 );
2223 w.copy_from_slice(&coefficients);
2224 }
2225 }
2226
2227 // safety: window is guaranteed non-empty since n_fft > 0
2228 unsafe { NonEmptyVec::new_unchecked(w) }
2229}
2230
2231/// Convert Hz to mel scale using Slaney formula (librosa default, htk=False).
2232///
2233/// Uses a hybrid scale:
2234/// - Linear below 1000 Hz: mel = hz / (200/3)
2235/// - Logarithmic above 1000 Hz: mel = 15 + log(hz/1000) / log_step
2236///
2237/// This matches librosa's default behavior.
2238fn hz_to_mel(hz: f64) -> f64 {
2239 const F_MIN: f64 = 0.0;
2240 const F_SP: f64 = 200.0 / 3.0; // ~66.667
2241 const MIN_LOG_HZ: f64 = 1000.0;
2242 const MIN_LOG_MEL: f64 = (MIN_LOG_HZ - F_MIN) / F_SP; // = 15.0
2243 const LOGSTEP: f64 = 0.068_751_777_420_949_23; // ln(6.4) / 27
2244 if hz >= MIN_LOG_HZ {
2245 // Logarithmic region
2246 MIN_LOG_MEL + (hz / MIN_LOG_HZ).ln() / LOGSTEP
2247 } else {
2248 // Linear region
2249 (hz - F_MIN) / F_SP
2250 }
2251}
2252
2253/// Convert mel to Hz using Slaney formula (librosa default, htk=False).
2254///
2255/// Inverse of hz_to_mel.
2256fn mel_to_hz(mel: f64) -> f64 {
2257 const F_MIN: f64 = 0.0;
2258 const F_SP: f64 = 200.0 / 3.0; // ~66.667
2259 const MIN_LOG_HZ: f64 = 1000.0;
2260 const MIN_LOG_MEL: f64 = (MIN_LOG_HZ - F_MIN) / F_SP; // = 15.0
2261 const LOGSTEP: f64 = 0.068_751_777_420_949_23; // ln(6.4) / 27
2262
2263 if mel >= MIN_LOG_MEL {
2264 // Logarithmic region
2265 MIN_LOG_HZ * (LOGSTEP * (mel - MIN_LOG_MEL)).exp()
2266 } else {
2267 // Linear region
2268 F_SP.mul_add(mel, F_MIN)
2269 }
2270}
2271
2272fn build_mel_filterbank_matrix(
2273 sample_rate_hz: f64,
2274 n_fft: NonZeroUsize,
2275 n_mels: NonZeroUsize,
2276 f_min: f64,
2277 f_max: f64,
2278 norm: MelNorm,
2279) -> SpectrogramResult<SparseMatrix> {
2280 if sample_rate_hz <= 0.0 || !sample_rate_hz.is_finite() {
2281 return Err(SpectrogramError::invalid_input(
2282 "sample_rate_hz must be finite and > 0",
2283 ));
2284 }
2285 if f_min < 0.0 || f_min.is_infinite() {
2286 return Err(SpectrogramError::invalid_input("f_min must be >= 0"));
2287 }
2288 if f_max <= f_min {
2289 return Err(SpectrogramError::invalid_input("f_max must be > f_min"));
2290 }
2291 if f_max > sample_rate_hz * 0.5 {
2292 return Err(SpectrogramError::invalid_input("f_max must be <= Nyquist"));
2293 }
2294 let n_mels = n_mels.get();
2295 let n_fft = n_fft.get();
2296 let out_len = r2c_output_size(n_fft);
2297
2298 // FFT bin frequencies
2299 let df = sample_rate_hz / n_fft as f64;
2300
2301 // Mel points: n_mels + 2 (for triangular edges)
2302 let mel_min = hz_to_mel(f_min);
2303 let mel_max = hz_to_mel(f_max);
2304
2305 let n_points = n_mels + 2;
2306 let step = (mel_max - mel_min) / (n_points - 1) as f64;
2307
2308 let mut mel_points = Vec::with_capacity(n_points);
2309 for i in 0..n_points {
2310 mel_points.push((i as f64).mul_add(step, mel_min));
2311 }
2312
2313 let mut hz_points = Vec::with_capacity(n_points);
2314 for m in &mel_points {
2315 hz_points.push(mel_to_hz(*m));
2316 }
2317
2318 // Build filterbank as sparse matrix (librosa-style, in frequency space)
2319 // This builds triangular filters based on actual frequencies, not bin indices
2320 let mut fb = SparseMatrix::new(n_mels, out_len);
2321
2322 for m in 0..n_mels {
2323 let freq_left = hz_points[m];
2324 let freq_center = hz_points[m + 1];
2325 let freq_right = hz_points[m + 2];
2326
2327 let fdiff_left = freq_center - freq_left;
2328 let fdiff_right = freq_right - freq_center;
2329
2330 if fdiff_left == 0.0 || fdiff_right == 0.0 {
2331 // Degenerate triangle, skip
2332 continue;
2333 }
2334
2335 // For each FFT bin, compute the triangular weight based on its frequency
2336 for k in 0..out_len {
2337 let bin_freq = k as f64 * df;
2338
2339 // Lower slope: rises from freq_left to freq_center
2340 let lower = (bin_freq - freq_left) / fdiff_left;
2341
2342 // Upper slope: falls from freq_center to freq_right
2343 let upper = (freq_right - bin_freq) / fdiff_right;
2344
2345 // Triangle is the minimum of the two slopes, clipped to [0, 1]
2346 let weight = lower.min(upper).clamp(0.0, 1.0);
2347
2348 if weight > 0.0 {
2349 fb.set(m, k, weight);
2350 }
2351 }
2352 }
2353
2354 // Apply normalization
2355 match norm {
2356 MelNorm::None => {
2357 // No normalization needed
2358 }
2359 MelNorm::Slaney => {
2360 // Slaney-style area normalization: 2 / (hz_max - hz_min) for each triangle
2361 // NOTE: Uses Hz bandwidth, not mel bandwidth (to match librosa's implementation)
2362 for m in 0..n_mels {
2363 let mel_left = mel_points[m];
2364 let mel_right = mel_points[m + 2];
2365 let hz_left = mel_to_hz(mel_left);
2366 let hz_right = mel_to_hz(mel_right);
2367 let enorm = 2.0 / (hz_right - hz_left);
2368
2369 // Normalize all values in this row
2370 for val in &mut fb.values[m] {
2371 *val *= enorm;
2372 }
2373 }
2374 }
2375 MelNorm::L1 => {
2376 // L1 normalization: sum of weights = 1.0
2377 for m in 0..n_mels {
2378 let sum: f64 = fb.values[m].iter().sum();
2379 if sum > 0.0 {
2380 let normalizer = 1.0 / sum;
2381 for val in &mut fb.values[m] {
2382 *val *= normalizer;
2383 }
2384 }
2385 }
2386 }
2387 MelNorm::L2 => {
2388 // L2 normalization: L2 norm = 1.0
2389 for m in 0..n_mels {
2390 let norm_val: f64 = fb.values[m].iter().map(|&v| v * v).sum::<f64>().sqrt();
2391 if norm_val > 0.0 {
2392 let normalizer = 1.0 / norm_val;
2393 for val in &mut fb.values[m] {
2394 *val *= normalizer;
2395 }
2396 }
2397 }
2398 }
2399 }
2400
2401 Ok(fb)
2402}
2403
2404/// Build a logarithmic frequency interpolation matrix.
2405///
2406/// Maps linearly-spaced FFT bins to logarithmically-spaced frequency bins
2407/// using linear interpolation.
2408fn build_loghz_matrix(
2409 sample_rate_hz: f64,
2410 n_fft: NonZeroUsize,
2411 n_bins: NonZeroUsize,
2412 f_min: f64,
2413 f_max: f64,
2414) -> SpectrogramResult<(SparseMatrix, NonEmptyVec<f64>)> {
2415 if sample_rate_hz <= 0.0 || !sample_rate_hz.is_finite() {
2416 return Err(SpectrogramError::invalid_input(
2417 "sample_rate_hz must be finite and > 0",
2418 ));
2419 }
2420 if f_min <= 0.0 || f_min.is_infinite() {
2421 return Err(SpectrogramError::invalid_input(
2422 "f_min must be finite and > 0",
2423 ));
2424 }
2425 if f_max <= f_min {
2426 return Err(SpectrogramError::invalid_input("f_max must be > f_min"));
2427 }
2428 if f_max > sample_rate_hz * 0.5 {
2429 return Err(SpectrogramError::invalid_input("f_max must be <= Nyquist"));
2430 }
2431
2432 let n_bins = n_bins.get();
2433 let n_fft = n_fft.get();
2434
2435 let out_len = r2c_output_size(n_fft);
2436 let df = sample_rate_hz / n_fft as f64;
2437
2438 // Generate logarithmically-spaced frequencies
2439 let log_f_min = f_min.ln();
2440 let log_f_max = f_max.ln();
2441 let log_step = (log_f_max - log_f_min) / (n_bins - 1) as f64;
2442
2443 let mut log_frequencies = Vec::with_capacity(n_bins);
2444 for i in 0..n_bins {
2445 let log_f = (i as f64).mul_add(log_step, log_f_min);
2446 log_frequencies.push(log_f.exp());
2447 }
2448 // safety: n_bins > 0
2449 let log_frequencies = unsafe { NonEmptyVec::new_unchecked(log_frequencies) };
2450
2451 // Build interpolation matrix as sparse matrix
2452 let mut matrix = SparseMatrix::new(n_bins, out_len);
2453
2454 for (bin_idx, &target_freq) in log_frequencies.iter().enumerate() {
2455 // Find the two FFT bins that bracket this frequency
2456 let exact_bin = target_freq / df;
2457 let lower_bin = exact_bin.floor() as usize;
2458 let upper_bin = (exact_bin.ceil() as usize).min(out_len - 1);
2459
2460 if lower_bin >= out_len {
2461 continue;
2462 }
2463
2464 if lower_bin == upper_bin {
2465 // Exact match
2466 matrix.set(bin_idx, lower_bin, 1.0);
2467 } else {
2468 // Linear interpolation
2469 let frac = exact_bin - lower_bin as f64;
2470 matrix.set(bin_idx, lower_bin, 1.0 - frac);
2471 if upper_bin < out_len {
2472 matrix.set(bin_idx, upper_bin, frac);
2473 }
2474 }
2475 }
2476
2477 Ok((matrix, log_frequencies))
2478}
2479
2480fn mel_band_centres_hz(
2481 n_mels: NonZeroUsize,
2482 sample_rate_hz: f64,
2483 nyquist_hz: f64,
2484) -> NonEmptyVec<f64> {
2485 let f_min = 0.0;
2486 let f_max = nyquist_hz.min(sample_rate_hz * 0.5);
2487
2488 let mel_min = hz_to_mel(f_min);
2489 let mel_max = hz_to_mel(f_max);
2490 let n_mels = n_mels.get();
2491 let step = (mel_max - mel_min) / (n_mels + 1) as f64;
2492
2493 let mut centres = Vec::with_capacity(n_mels);
2494 for i in 0..n_mels {
2495 let mel = (i as f64 + 1.0).mul_add(step, mel_min);
2496 centres.push(mel_to_hz(mel));
2497 }
2498 // safety: centres is guaranteed non-empty since n_mels > 0
2499 unsafe { NonEmptyVec::new_unchecked(centres) }
2500}
2501
2502/// Spectrogram structure holding the computed spectrogram data and metadata.
2503///
2504/// # Type Parameters
2505///
2506/// * `FreqScale`: The frequency scale type (e.g., `LinearHz`, `Mel`, `LogHz`, etc.).
2507/// * `AmpScale`: The amplitude scale type (e.g., `Power`, `Magnitude`, `Decibels`).
2508///
2509/// # Fields
2510///
2511/// * `data`: A 2D array containing the spectrogram data.
2512/// * `axes`: The axes of the spectrogram (frequency and time).
2513/// * `params`: The parameters used to compute the spectrogram.
2514/// * `_amp`: A phantom data marker for the amplitude scale type.
2515#[derive(Debug, Clone)]
2516#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
2517pub struct Spectrogram<FreqScale, AmpScale>
2518where
2519 AmpScale: AmpScaleSpec + 'static,
2520 FreqScale: Copy + Clone + 'static,
2521{
2522 data: Array2<f64>,
2523 axes: Axes<FreqScale>,
2524 params: SpectrogramParams,
2525 #[cfg_attr(feature = "serde", serde(skip))]
2526 _amp: PhantomData<AmpScale>,
2527}
2528
2529impl<FreqScale, AmpScale> Spectrogram<FreqScale, AmpScale>
2530where
2531 AmpScale: AmpScaleSpec + 'static,
2532 FreqScale: Copy + Clone + 'static,
2533{
2534 /// Get the X-axis label.
2535 ///
2536 /// # Returns
2537 ///
2538 /// A static string slice representing the X-axis label.
2539 #[inline]
2540 #[must_use]
2541 pub const fn x_axis_label() -> &'static str {
2542 "Time (s)"
2543 }
2544
2545 /// Get the Y-axis label based on the frequency scale type.
2546 ///
2547 /// # Returns
2548 ///
2549 /// A static string slice representing the Y-axis label.
2550 #[inline]
2551 #[must_use]
2552 pub fn y_axis_label() -> &'static str {
2553 match std::any::TypeId::of::<FreqScale>() {
2554 id if id == std::any::TypeId::of::<LinearHz>() => "Frequency (Hz)",
2555 id if id == std::any::TypeId::of::<Mel>() => "Frequency (Mel)",
2556 id if id == std::any::TypeId::of::<LogHz>() => "Frequency (Log Hz)",
2557 id if id == std::any::TypeId::of::<Erb>() => "Frequency (ERB)",
2558 id if id == std::any::TypeId::of::<Cqt>() => "Frequency (CQT Bins)",
2559 _ => "Frequency",
2560 }
2561 }
2562
2563 /// Internal constructor. Only callable inside the crate.
2564 ///
2565 /// All inputs must already be validated and consistent.
2566 pub(crate) fn new(data: Array2<f64>, axes: Axes<FreqScale>, params: SpectrogramParams) -> Self {
2567 debug_assert_eq!(data.nrows(), axes.frequencies().len().get());
2568 debug_assert_eq!(data.ncols(), axes.times().len().get());
2569
2570 Self {
2571 data,
2572 axes,
2573 params,
2574 _amp: PhantomData,
2575 }
2576 }
2577
2578 /// Set spectrogram data matrix
2579 ///
2580 /// # Arguments
2581 ///
2582 /// * `data` - The new spectrogram data matrix.
2583 #[inline]
2584 pub fn set_data(&mut self, data: Array2<f64>) {
2585 self.data = data;
2586 }
2587
2588 /// Spectrogram data matrix
2589 ///
2590 /// # Returns
2591 ///
2592 /// A reference to the spectrogram data matrix.
2593 #[inline]
2594 #[must_use]
2595 pub const fn data(&self) -> &Array2<f64> {
2596 &self.data
2597 }
2598
2599 /// Axes of the spectrogram
2600 ///
2601 /// # Returns
2602 ///
2603 /// A reference to the axes of the spectrogram.
2604 #[inline]
2605 #[must_use]
2606 pub const fn axes(&self) -> &Axes<FreqScale> {
2607 &self.axes
2608 }
2609
2610 /// Frequency axis in Hz
2611 ///
2612 /// # Returns
2613 ///
2614 /// A reference to the frequency axis in Hz.
2615 #[inline]
2616 #[must_use]
2617 pub fn frequencies(&self) -> &NonEmptySlice<f64> {
2618 self.axes.frequencies()
2619 }
2620
2621 /// Frequency range in Hz (min, max)
2622 ///
2623 /// # Returns
2624 ///
2625 /// A tuple containing the minimum and maximum frequencies in Hz.
2626 #[inline]
2627 #[must_use]
2628 pub const fn frequency_range(&self) -> (f64, f64) {
2629 self.axes.frequency_range()
2630 }
2631
2632 /// Time axis in seconds
2633 ///
2634 /// # Returns
2635 ///
2636 /// A reference to the time axis in seconds.
2637 #[inline]
2638 #[must_use]
2639 pub fn times(&self) -> &NonEmptySlice<f64> {
2640 self.axes.times()
2641 }
2642
2643 /// Spectrogram computation parameters
2644 ///
2645 /// # Returns
2646 ///
2647 /// A reference to the spectrogram computation parameters.
2648 #[inline]
2649 #[must_use]
2650 pub const fn params(&self) -> &SpectrogramParams {
2651 &self.params
2652 }
2653
2654 /// Duration of the spectrogram in seconds
2655 ///
2656 /// # Returns
2657 ///
2658 /// The duration of the spectrogram in seconds.
2659 #[inline]
2660 #[must_use]
2661 pub fn duration(&self) -> f64 {
2662 self.axes.duration()
2663 }
2664
2665 /// If this is a dB spectrogram, return the (min, max) dB values. otherwise do the maths to compute dB range.
2666 ///
2667 /// # Returns
2668 ///
2669 /// The (min, max) dB values of the spectrogram, or `None` if the amplitude scale is unknown.
2670 #[inline]
2671 #[must_use]
2672 pub fn db_range(&self) -> Option<(f64, f64)> {
2673 let type_self = std::any::TypeId::of::<AmpScale>();
2674
2675 if type_self == std::any::TypeId::of::<Decibels>() {
2676 let (min, max) = min_max_single_pass(self.data.as_slice()?);
2677 Some((min, max))
2678 } else if type_self == std::any::TypeId::of::<Power>() {
2679 // Not a dB spectrogram; compute dB range from power values
2680 let mut min_db = f64::INFINITY;
2681 let mut max_db = f64::NEG_INFINITY;
2682 for &v in &self.data {
2683 let db = 10.0 * (v + EPS).log10();
2684 if db < min_db {
2685 min_db = db;
2686 }
2687 if db > max_db {
2688 max_db = db;
2689 }
2690 }
2691 Some((min_db, max_db))
2692 } else if type_self == std::any::TypeId::of::<Magnitude>() {
2693 // Not a dB spectrogram; compute dB range from magnitude values
2694 let mut min_db = f64::INFINITY;
2695 let mut max_db = f64::NEG_INFINITY;
2696
2697 for &v in &self.data {
2698 let power = v * v;
2699 let db = 10.0 * (power + EPS).log10();
2700 if db < min_db {
2701 min_db = db;
2702 }
2703 if db > max_db {
2704 max_db = db;
2705 }
2706 }
2707
2708 Some((min_db, max_db))
2709 } else {
2710 // Unknown AmpScale type; return dummy values
2711 None
2712 }
2713 }
2714
2715 /// Number of frequency bins
2716 ///
2717 /// # Returns
2718 ///
2719 /// The number of frequency bins in the spectrogram.
2720 #[inline]
2721 #[must_use]
2722 pub fn n_bins(&self) -> NonZeroUsize {
2723 // safety: data.nrows() > 0 is guaranteed by construction
2724 unsafe { NonZeroUsize::new_unchecked(self.data.nrows()) }
2725 }
2726
2727 /// Number of time frames in the spectrogram
2728 ///
2729 /// # Returns
2730 ///
2731 /// The number of time frames (columns) in the spectrogram.
2732 #[inline]
2733 #[must_use]
2734 pub fn n_frames(&self) -> NonZeroUsize {
2735 // safety: data.ncols() > 0 is guaranteed by construction
2736 unsafe { NonZeroUsize::new_unchecked(self.data.ncols()) }
2737 }
2738}
2739
2740impl<FreqScale, AmpScale> AsRef<Array2<f64>> for Spectrogram<FreqScale, AmpScale>
2741where
2742 FreqScale: Copy + Clone + 'static,
2743 AmpScale: AmpScaleSpec + 'static,
2744{
2745 #[inline]
2746 fn as_ref(&self) -> &Array2<f64> {
2747 &self.data
2748 }
2749}
2750
2751impl<FreqScale, AmpScale> Deref for Spectrogram<FreqScale, AmpScale>
2752where
2753 FreqScale: Copy + Clone + 'static,
2754 AmpScale: AmpScaleSpec + 'static,
2755{
2756 type Target = Array2<f64>;
2757
2758 #[inline]
2759 fn deref(&self) -> &Self::Target {
2760 &self.data
2761 }
2762}
2763
2764impl<AmpScale> Spectrogram<LinearHz, AmpScale>
2765where
2766 AmpScale: AmpScaleSpec + 'static,
2767{
2768 /// Compute a linear-frequency spectrogram from audio samples.
2769 ///
2770 /// This is a convenience method that creates a planner internally and computes
2771 /// the spectrogram in one call. For processing multiple signals with the same
2772 /// parameters, use [`SpectrogramPlanner::linear_plan`] to create a reusable plan.
2773 ///
2774 /// # Arguments
2775 ///
2776 /// * `samples` - Audio samples (any type that can be converted to a slice)
2777 /// * `params` - Spectrogram computation parameters
2778 /// * `db` - Optional logarithmic scaling parameters (only used when `AmpScale = Decibels`)
2779 ///
2780 /// # Returns
2781 ///
2782 /// A linear-frequency spectrogram with the specified amplitude scale.
2783 ///
2784 /// # Errors
2785 ///
2786 /// Returns an error if:
2787 /// - The samples slice is empty
2788 /// - Parameters are invalid
2789 /// - FFT computation fails
2790 ///
2791 /// # Examples
2792 ///
2793 /// ```
2794 /// use spectrograms::*;
2795 /// use non_empty_slice::non_empty_vec;
2796 ///
2797 /// # fn example() -> SpectrogramResult<()> {
2798 /// // Create a simple test signal
2799 /// let sample_rate = 16000.0;
2800 /// let samples_vec: Vec<f64> = (0..16000).map(|i| {
2801 /// (2.0 * std::f64::consts::PI * 440.0 * i as f64 / sample_rate).sin()
2802 /// }).collect();
2803 /// let samples = non_empty_slice::NonEmptyVec::new(samples_vec).unwrap();
2804 ///
2805 /// // Set up parameters
2806 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
2807 /// let params = SpectrogramParams::new(stft, sample_rate)?;
2808 ///
2809 /// // Compute power spectrogram
2810 /// let spec = LinearPowerSpectrogram::compute(&samples, ¶ms, None)?;
2811 ///
2812 /// println!("Computed spectrogram: {} bins x {} frames", spec.n_bins(), spec.n_frames());
2813 /// # Ok(())
2814 /// # }
2815 /// ```
2816 #[inline]
2817 pub fn compute(
2818 samples: &NonEmptySlice<f64>,
2819 params: &SpectrogramParams,
2820 db: Option<&LogParams>,
2821 ) -> SpectrogramResult<Self> {
2822 let planner = SpectrogramPlanner::new();
2823 let mut plan = planner.linear_plan(params, db)?;
2824 plan.compute(samples)
2825 }
2826}
2827
2828impl<AmpScale> Spectrogram<Mel, AmpScale>
2829where
2830 AmpScale: AmpScaleSpec + 'static,
2831{
2832 /// Compute a mel-frequency spectrogram from audio samples.
2833 ///
2834 /// This is a convenience method that creates a planner internally and computes
2835 /// the spectrogram in one call. For processing multiple signals with the same
2836 /// parameters, use [`SpectrogramPlanner::mel_plan`] to create a reusable plan.
2837 ///
2838 /// # Arguments
2839 ///
2840 /// * `samples` - Audio samples (any type that can be converted to a slice)
2841 /// * `params` - Spectrogram computation parameters
2842 /// * `mel` - Mel filterbank parameters
2843 /// * `db` - Optional logarithmic scaling parameters (only used when `AmpScale = Decibels`)
2844 ///
2845 /// # Returns
2846 ///
2847 /// A mel-frequency spectrogram with the specified amplitude scale.
2848 ///
2849 /// # Errors
2850 ///
2851 /// Returns an error if:
2852 /// - The samples slice is empty
2853 /// - Parameters are invalid
2854 /// - Mel `f_max` exceeds Nyquist frequency
2855 /// - FFT computation fails
2856 ///
2857 /// # Examples
2858 ///
2859 /// ```
2860 /// use spectrograms::*;
2861 /// use non_empty_slice::non_empty_vec;
2862 ///
2863 /// # fn example() -> SpectrogramResult<()> {
2864 /// // Create a simple test signal
2865 /// let sample_rate = 16000.0;
2866 /// let samples_vec: Vec<f64> = (0..16000).map(|i| {
2867 /// (2.0 * std::f64::consts::PI * 440.0 * i as f64 / sample_rate).sin()
2868 /// }).collect();
2869 /// let samples = non_empty_slice::NonEmptyVec::new(samples_vec).unwrap();
2870 ///
2871 /// // Set up parameters
2872 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
2873 /// let params = SpectrogramParams::new(stft, sample_rate)?;
2874 /// let mel = MelParams::new(nzu!(80), 0.0, 8000.0)?;
2875 ///
2876 /// // Compute mel spectrogram in dB scale
2877 /// let db = LogParams::new(-80.0)?;
2878 /// let spec = MelDbSpectrogram::compute(&samples, ¶ms, &mel, Some(&db))?;
2879 ///
2880 /// println!("Computed mel spectrogram: {} mels x {} frames", spec.n_bins(), spec.n_frames());
2881 /// # Ok(())
2882 /// # }
2883 /// ```
2884 #[inline]
2885 pub fn compute(
2886 samples: &NonEmptySlice<f64>,
2887 params: &SpectrogramParams,
2888 mel: &MelParams,
2889 db: Option<&LogParams>,
2890 ) -> SpectrogramResult<Self> {
2891 let planner = SpectrogramPlanner::new();
2892 let mut plan = planner.mel_plan(params, mel, db)?;
2893 plan.compute(samples)
2894 }
2895}
2896
2897impl<AmpScale> Spectrogram<Erb, AmpScale>
2898where
2899 AmpScale: AmpScaleSpec + 'static,
2900{
2901 /// Compute an ERB-frequency spectrogram from audio samples.
2902 ///
2903 /// This is a convenience method that creates a planner internally and computes
2904 /// the spectrogram in one call. For processing multiple signals with the same
2905 /// parameters, use [`SpectrogramPlanner::erb_plan`] to create a reusable plan.
2906 ///
2907 /// # Arguments
2908 ///
2909 /// * `samples` - Audio samples (any type that can be converted to a slice)
2910 /// * `params` - Spectrogram computation parameters
2911 /// * `erb` - ERB frequency scale parameters
2912 /// * `db` - Optional logarithmic scaling parameters (only used when `AmpScale = Decibels`)
2913 ///
2914 /// # Returns
2915 ///
2916 /// An ERB-scale spectrogram with the specified amplitude scale.
2917 ///
2918 /// # Errors
2919 ///
2920 /// Returns an error if:
2921 /// - The samples slice is empty
2922 /// - Parameters are invalid
2923 /// - FFT computation fails
2924 ///
2925 /// # Examples
2926 ///
2927 /// ```
2928 /// use spectrograms::*;
2929 /// use non_empty_slice::non_empty_vec;
2930 ///
2931 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
2932 /// let samples = non_empty_vec![0.0; nzu!(16000)];
2933 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
2934 /// let params = SpectrogramParams::new(stft, 16000.0)?;
2935 /// let erb = ErbParams::speech_standard();
2936 ///
2937 /// let spec = ErbPowerSpectrogram::compute(&samples, ¶ms, &erb, None)?;
2938 /// assert_eq!(spec.n_bins(), nzu!(40));
2939 /// # Ok(())
2940 /// # }
2941 /// ```
2942 #[inline]
2943 pub fn compute(
2944 samples: &NonEmptySlice<f64>,
2945 params: &SpectrogramParams,
2946 erb: &ErbParams,
2947 db: Option<&LogParams>,
2948 ) -> SpectrogramResult<Self> {
2949 let planner = SpectrogramPlanner::new();
2950 let mut plan = planner.erb_plan(params, erb, db)?;
2951 plan.compute(samples)
2952 }
2953}
2954
2955impl<AmpScale> Spectrogram<LogHz, AmpScale>
2956where
2957 AmpScale: AmpScaleSpec + 'static,
2958{
2959 /// Compute a logarithmic-frequency spectrogram from audio samples.
2960 ///
2961 /// This is a convenience method that creates a planner internally and computes
2962 /// the spectrogram in one call. For processing multiple signals with the same
2963 /// parameters, use [`SpectrogramPlanner::log_hz_plan`] to create a reusable plan.
2964 ///
2965 /// # Arguments
2966 ///
2967 /// * `samples` - Audio samples (any type that can be converted to a slice)
2968 /// * `params` - Spectrogram computation parameters
2969 /// * `loghz` - Logarithmic frequency scale parameters
2970 /// * `db` - Optional logarithmic scaling parameters (only used when `AmpScale = Decibels`)
2971 ///
2972 /// # Returns
2973 ///
2974 /// A logarithmic-frequency spectrogram with the specified amplitude scale.
2975 ///
2976 /// # Errors
2977 ///
2978 /// Returns an error if:
2979 /// - The samples slice is empty
2980 /// - Parameters are invalid
2981 /// - FFT computation fails
2982 ///
2983 /// # Examples
2984 ///
2985 /// ```
2986 /// use spectrograms::*;
2987 /// use non_empty_slice::non_empty_vec;
2988 ///
2989 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
2990 /// let samples = non_empty_vec![0.0; nzu!(16000)];
2991 /// let stft = StftParams::new(nzu!(512), nzu!(256), WindowType::Hanning, true)?;
2992 /// let params = SpectrogramParams::new(stft, 16000.0)?;
2993 /// let loghz = LogHzParams::new(nzu!(128), 20.0, 8000.0)?;
2994 ///
2995 /// let spec = LogHzPowerSpectrogram::compute(&samples, ¶ms, &loghz, None)?;
2996 /// assert_eq!(spec.n_bins(), nzu!(128));
2997 /// # Ok(())
2998 /// # }
2999 /// ```
3000 #[inline]
3001 pub fn compute(
3002 samples: &NonEmptySlice<f64>,
3003 params: &SpectrogramParams,
3004 loghz: &LogHzParams,
3005 db: Option<&LogParams>,
3006 ) -> SpectrogramResult<Self> {
3007 let planner = SpectrogramPlanner::new();
3008 let mut plan = planner.log_hz_plan(params, loghz, db)?;
3009 plan.compute(samples)
3010 }
3011}
3012
3013impl<AmpScale> Spectrogram<Cqt, AmpScale>
3014where
3015 AmpScale: AmpScaleSpec + 'static,
3016{
3017 /// Compute a constant-Q transform (CQT) spectrogram from audio samples.
3018 ///
3019 /// # Arguments
3020 ///
3021 /// * `samples` - Audio samples (any type that can be converted to a slice)
3022 /// * `params` - Spectrogram computation parameters
3023 /// * `cqt` - CQT parameters
3024 /// * `db` - Optional logarithmic scaling parameters (only used when `AmpScale = Decibels`)
3025 ///
3026 /// # Returns
3027 ///
3028 /// A CQT spectrogram with the specified amplitude scale.
3029 ///
3030 /// # Errors
3031 ///
3032 /// Returns an error if:
3033 ///
3034 #[inline]
3035 pub fn compute(
3036 samples: &NonEmptySlice<f64>,
3037 params: &SpectrogramParams,
3038 cqt: &CqtParams,
3039 db: Option<&LogParams>,
3040 ) -> SpectrogramResult<Self> {
3041 let planner = SpectrogramPlanner::new();
3042 let mut plan = planner.cqt_plan(params, cqt, db)?;
3043 plan.compute(samples)
3044 }
3045}
3046
3047// ========================
3048// Display implementations
3049// ========================
3050
3051/// Helper function to get amplitude scale name
3052fn amp_scale_name<AmpScale>() -> &'static str
3053where
3054 AmpScale: AmpScaleSpec + 'static,
3055{
3056 match std::any::TypeId::of::<AmpScale>() {
3057 id if id == std::any::TypeId::of::<Power>() => "Power",
3058 id if id == std::any::TypeId::of::<Magnitude>() => "Magnitude",
3059 id if id == std::any::TypeId::of::<Decibels>() => "Decibels",
3060 _ => "Unknown",
3061 }
3062}
3063
3064/// Helper function to get frequency scale name
3065fn freq_scale_name<FreqScale>() -> &'static str
3066where
3067 FreqScale: Copy + Clone + 'static,
3068{
3069 match std::any::TypeId::of::<FreqScale>() {
3070 id if id == std::any::TypeId::of::<LinearHz>() => "Linear Hz",
3071 id if id == std::any::TypeId::of::<LogHz>() => "Log Hz",
3072 id if id == std::any::TypeId::of::<Mel>() => "Mel",
3073 id if id == std::any::TypeId::of::<Erb>() => "ERB",
3074 id if id == std::any::TypeId::of::<Cqt>() => "CQT",
3075 _ => "Unknown",
3076 }
3077}
3078
3079impl<FreqScale, AmpScale> core::fmt::Display for Spectrogram<FreqScale, AmpScale>
3080where
3081 AmpScale: AmpScaleSpec + 'static,
3082 FreqScale: Copy + Clone + 'static,
3083{
3084 #[inline]
3085 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
3086 let (freq_min, freq_max) = self.frequency_range();
3087 let duration = self.duration();
3088 let (rows, cols) = self.data.dim();
3089
3090 // Alternative formatting (#) provides more detailed output with data
3091 if f.alternate() {
3092 writeln!(f, "Spectrogram {{")?;
3093 writeln!(f, " Frequency Scale: {}", freq_scale_name::<FreqScale>())?;
3094 writeln!(f, " Amplitude Scale: {}", amp_scale_name::<AmpScale>())?;
3095 writeln!(f, " Shape: {rows} frequency bins × {cols} time frames")?;
3096 writeln!(f, " Frequency Range: {freq_min:.2} Hz - {freq_max:.2} Hz")?;
3097 writeln!(f, " Duration: {duration:.3} s")?;
3098 writeln!(f)?;
3099
3100 // Display parameters
3101 writeln!(f, " Parameters:")?;
3102 writeln!(f, " Sample Rate: {} Hz", self.params.sample_rate_hz())?;
3103 writeln!(f, " FFT Size: {}", self.params.stft().n_fft())?;
3104 writeln!(f, " Hop Size: {}", self.params.stft().hop_size())?;
3105 writeln!(f, " Window: {:?}", self.params.stft().window())?;
3106 writeln!(f, " Centered: {}", self.params.stft().centre())?;
3107 writeln!(f)?;
3108
3109 // Display data statistics
3110 let data_slice = self.data.as_slice().unwrap_or(&[]);
3111 if !data_slice.is_empty() {
3112 let (min_val, max_val) = min_max_single_pass(data_slice);
3113 let mean = data_slice.iter().sum::<f64>() / data_slice.len() as f64;
3114 writeln!(f, " Data Statistics:")?;
3115 writeln!(f, " Min: {min_val:.6}")?;
3116 writeln!(f, " Max: {max_val:.6}")?;
3117 writeln!(f, " Mean: {mean:.6}")?;
3118 writeln!(f)?;
3119 }
3120
3121 // Display actual data (truncated if too large)
3122 writeln!(f, " Data Matrix:")?;
3123 let max_rows_to_display = 5;
3124 let max_cols_to_display = 5;
3125
3126 for i in 0..rows.min(max_rows_to_display) {
3127 write!(f, " [")?;
3128 for j in 0..cols.min(max_cols_to_display) {
3129 if j > 0 {
3130 write!(f, ", ")?;
3131 }
3132 write!(f, "{:9.4}", self.data[[i, j]])?;
3133 }
3134 if cols > max_cols_to_display {
3135 write!(f, ", ... ({} more)", cols - max_cols_to_display)?;
3136 }
3137 writeln!(f, "]")?;
3138 }
3139
3140 if rows > max_rows_to_display {
3141 writeln!(f, " ... ({} more rows)", rows - max_rows_to_display)?;
3142 }
3143
3144 write!(f, "}}")?;
3145 } else {
3146 // Default formatting: compact summary
3147 write!(
3148 f,
3149 "Spectrogram<{}, {}>[{}x{}] ({:.2}-{:.2} Hz, {:.3}s)",
3150 freq_scale_name::<FreqScale>(),
3151 amp_scale_name::<AmpScale>(),
3152 rows,
3153 cols,
3154 freq_min,
3155 freq_max,
3156 duration
3157 )?;
3158 }
3159
3160 Ok(())
3161 }
3162}
3163
3164#[derive(Debug, Clone)]
3165#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3166pub struct FrequencyAxis<FreqScale>
3167where
3168 FreqScale: Copy + Clone + 'static,
3169{
3170 frequencies: NonEmptyVec<f64>,
3171 #[cfg_attr(feature = "serde", serde(skip))]
3172 _marker: PhantomData<FreqScale>,
3173}
3174
3175impl<FreqScale> FrequencyAxis<FreqScale>
3176where
3177 FreqScale: Copy + Clone + 'static,
3178{
3179 pub(crate) const fn new(frequencies: NonEmptyVec<f64>) -> Self {
3180 Self {
3181 frequencies,
3182 _marker: PhantomData,
3183 }
3184 }
3185
3186 /// Get the frequency values in Hz.
3187 ///
3188 /// # Returns
3189 ///
3190 /// Returns a non-empty slice of frequencies.
3191 #[inline]
3192 #[must_use]
3193 pub fn frequencies(&self) -> &NonEmptySlice<f64> {
3194 &self.frequencies
3195 }
3196
3197 /// Get the frequency range (min, max) in Hz.
3198 ///
3199 /// # Returns
3200 ///
3201 /// Returns a tuple containing the minimum and maximum frequency.
3202 #[inline]
3203 #[must_use]
3204 pub const fn frequency_range(&self) -> (f64, f64) {
3205 let data = self.frequencies.as_slice();
3206 let min = data[0];
3207 let max_idx = data.len().saturating_sub(1); // safe for non-empty
3208 let max = data[max_idx];
3209 (min, max)
3210 }
3211
3212 /// Get the number of frequency bins.
3213 ///
3214 /// # Returns
3215 ///
3216 /// Returns the number of frequency bins as a NonZeroUsize.
3217 #[inline]
3218 #[must_use]
3219 pub const fn len(&self) -> NonZeroUsize {
3220 self.frequencies.len()
3221 }
3222}
3223
3224/// Spectrogram axes container.
3225///
3226/// Holds frequency and time axes.
3227#[derive(Debug, Clone)]
3228#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3229pub struct Axes<FreqScale>
3230where
3231 FreqScale: Copy + Clone + 'static,
3232{
3233 freq: FrequencyAxis<FreqScale>,
3234 times: NonEmptyVec<f64>,
3235}
3236
3237impl<FreqScale> Axes<FreqScale>
3238where
3239 FreqScale: Copy + Clone + 'static,
3240{
3241 pub(crate) const fn new(freq: FrequencyAxis<FreqScale>, times: NonEmptyVec<f64>) -> Self {
3242 Self { freq, times }
3243 }
3244
3245 /// Get the frequency values in Hz.
3246 ///
3247 /// # Returns
3248 ///
3249 /// Returns a non-empty slice of frequencies.
3250 #[inline]
3251 #[must_use]
3252 pub fn frequencies(&self) -> &NonEmptySlice<f64> {
3253 self.freq.frequencies()
3254 }
3255
3256 /// Get the time values in seconds.
3257 ///
3258 /// # Returns
3259 ///
3260 /// Returns a non-empty slice of time values.
3261 #[inline]
3262 #[must_use]
3263 pub fn times(&self) -> &NonEmptySlice<f64> {
3264 &self.times
3265 }
3266
3267 /// Get the frequency range (min, max) in Hz.
3268 ///
3269 /// # Returns
3270 ///
3271 /// Returns a tuple containing the minimum and maximum frequency.
3272 #[inline]
3273 #[must_use]
3274 pub const fn frequency_range(&self) -> (f64, f64) {
3275 self.freq.frequency_range()
3276 }
3277
3278 /// Get the duration of the spectrogram in seconds.
3279 ///
3280 /// # Returns
3281 ///
3282 /// Returns the duration in seconds.
3283 #[inline]
3284 #[must_use]
3285 pub fn duration(&self) -> f64 {
3286 *self.times.last()
3287 }
3288}
3289
3290// Enum types for frequency and amplitude scales
3291
3292/// Linear frequency scale
3293#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3294#[cfg_attr(feature = "python", pyclass)]
3295#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3296#[non_exhaustive]
3297pub enum LinearHz {
3298 _Phantom,
3299}
3300
3301/// Logarithmic frequency scale
3302#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3303#[cfg_attr(feature = "python", pyclass)]
3304#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3305#[non_exhaustive]
3306pub enum LogHz {
3307 _Phantom,
3308}
3309
3310/// Mel frequency scale
3311#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3312#[cfg_attr(feature = "python", pyclass)]
3313#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3314#[non_exhaustive]
3315pub enum Mel {
3316 _Phantom,
3317}
3318
3319/// ERB/gammatone frequency scale
3320#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3321#[cfg_attr(feature = "python", pyclass)]
3322#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3323#[non_exhaustive]
3324pub enum Erb {
3325 _Phantom,
3326}
3327pub type Gammatone = Erb;
3328
3329/// Constant-Q Transform frequency scale
3330#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3331#[cfg_attr(feature = "python", pyclass)]
3332#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3333#[non_exhaustive]
3334pub enum Cqt {
3335 _Phantom,
3336}
3337
3338// Amplitude scales
3339
3340/// Power amplitude scale
3341#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3342#[cfg_attr(feature = "python", pyclass)]
3343#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3344#[non_exhaustive]
3345pub enum Power {
3346 _Phantom,
3347}
3348
3349/// Decibel amplitude scale
3350#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3351#[cfg_attr(feature = "python", pyclass)]
3352#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3353#[non_exhaustive]
3354pub enum Decibels {
3355 _Phantom,
3356}
3357
3358/// Magnitude amplitude scale
3359#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
3360#[cfg_attr(feature = "python", pyclass)]
3361#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3362#[non_exhaustive]
3363pub enum Magnitude {
3364 _Phantom,
3365}
3366
3367/// STFT parameters for spectrogram computation.
3368///
3369/// * `n_fft`: Size of the FFT window.
3370/// * `hop_size`: Number of samples between successive frames.
3371/// * window: Window function to apply to each frame.
3372/// * centre: Whether to pad the input signal so that frames are centered.
3373#[derive(Debug, Clone, PartialEq)]
3374#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3375pub struct StftParams {
3376 n_fft: NonZeroUsize,
3377 hop_size: NonZeroUsize,
3378 window: WindowType,
3379 centre: bool,
3380}
3381
3382impl StftParams {
3383 /// Create new STFT parameters.
3384 ///
3385 /// # Arguments
3386 ///
3387 /// * `n_fft` - Size of the FFT window
3388 /// * `hop_size` - Number of samples between successive frames
3389 /// * `window` - Window function to apply to each frame
3390 /// * `centre` - Whether to pad the input signal so that frames are centered
3391 ///
3392 /// # Errors
3393 ///
3394 /// Returns an error if:
3395 /// - `hop_size` > `n_fft`
3396 /// - Custom window size doesn't match `n_fft`
3397 ///
3398 /// # Returns
3399 ///
3400 /// New `StftParams` instance.
3401 #[inline]
3402 pub fn new(
3403 n_fft: NonZeroUsize,
3404 hop_size: NonZeroUsize,
3405 window: WindowType,
3406 centre: bool,
3407 ) -> SpectrogramResult<Self> {
3408 if hop_size.get() > n_fft.get() {
3409 return Err(SpectrogramError::invalid_input("hop_size must be <= n_fft"));
3410 }
3411
3412 // Validate custom window size matches n_fft
3413 if let WindowType::Custom { size, .. } = &window {
3414 if size.get() != n_fft.get() {
3415 return Err(SpectrogramError::invalid_input(format!(
3416 "Custom window size ({}) must match n_fft ({})",
3417 size.get(),
3418 n_fft.get()
3419 )));
3420 }
3421 }
3422
3423 Ok(Self {
3424 n_fft,
3425 hop_size,
3426 window,
3427 centre,
3428 })
3429 }
3430
3431 const unsafe fn new_unchecked(
3432 n_fft: NonZeroUsize,
3433 hop_size: NonZeroUsize,
3434 window: WindowType,
3435 centre: bool,
3436 ) -> Self {
3437 Self {
3438 n_fft,
3439 hop_size,
3440 window,
3441 centre,
3442 }
3443 }
3444
3445 /// Get the FFT window size.
3446 ///
3447 /// # Returns
3448 ///
3449 /// The FFT window size.
3450 #[inline]
3451 #[must_use]
3452 pub const fn n_fft(&self) -> NonZeroUsize {
3453 self.n_fft
3454 }
3455
3456 /// Get the hop size (samples between successive frames).
3457 ///
3458 /// # Returns
3459 ///
3460 /// The hop size.
3461 #[inline]
3462 #[must_use]
3463 pub const fn hop_size(&self) -> NonZeroUsize {
3464 self.hop_size
3465 }
3466
3467 /// Get the window function.
3468 ///
3469 /// # Returns
3470 ///
3471 /// The window function.
3472 #[inline]
3473 #[must_use]
3474 pub fn window(&self) -> WindowType {
3475 self.window.clone()
3476 }
3477
3478 /// Get whether frames are centered (input signal is padded).
3479 ///
3480 /// # Returns
3481 ///
3482 /// `true` if frames are centered, `false` otherwise.
3483 #[inline]
3484 #[must_use]
3485 pub const fn centre(&self) -> bool {
3486 self.centre
3487 }
3488
3489 /// Create a builder for STFT parameters.
3490 ///
3491 /// # Returns
3492 ///
3493 /// A `StftParamsBuilder` instance.
3494 ///
3495 /// # Examples
3496 ///
3497 /// ```
3498 /// use spectrograms::{StftParams, WindowType, nzu};
3499 ///
3500 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
3501 /// let stft = StftParams::builder()
3502 /// .n_fft(nzu!(2048))
3503 /// .hop_size(nzu!(512))
3504 /// .window(WindowType::Hanning)
3505 /// .centre(true)
3506 /// .build()?;
3507 ///
3508 /// assert_eq!(stft.n_fft(), nzu!(2048));
3509 /// assert_eq!(stft.hop_size(), nzu!(512));
3510 /// # Ok(())
3511 /// # }
3512 /// ```
3513 #[inline]
3514 #[must_use]
3515 pub fn builder() -> StftParamsBuilder {
3516 StftParamsBuilder::default()
3517 }
3518}
3519
3520/// Builder for [`StftParams`].
3521#[derive(Debug, Clone)]
3522pub struct StftParamsBuilder {
3523 n_fft: Option<NonZeroUsize>,
3524 hop_size: Option<NonZeroUsize>,
3525 window: WindowType,
3526 centre: bool,
3527}
3528
3529impl Default for StftParamsBuilder {
3530 #[inline]
3531 fn default() -> Self {
3532 Self {
3533 n_fft: None,
3534 hop_size: None,
3535 window: WindowType::Hanning,
3536 centre: true,
3537 }
3538 }
3539}
3540
3541impl StftParamsBuilder {
3542 /// Set the FFT window size.
3543 ///
3544 /// # Arguments
3545 ///
3546 /// * `n_fft` - Size of the FFT window
3547 ///
3548 /// # Returns
3549 ///
3550 /// The builder with the updated FFT window size.
3551 #[inline]
3552 #[must_use]
3553 pub const fn n_fft(mut self, n_fft: NonZeroUsize) -> Self {
3554 self.n_fft = Some(n_fft);
3555 self
3556 }
3557
3558 /// Set the hop size (samples between successive frames).
3559 ///
3560 /// # Arguments
3561 ///
3562 /// * `hop_size` - Number of samples between successive frames
3563 ///
3564 /// # Returns
3565 ///
3566 /// The builder with the updated hop size.
3567 #[inline]
3568 #[must_use]
3569 pub const fn hop_size(mut self, hop_size: NonZeroUsize) -> Self {
3570 self.hop_size = Some(hop_size);
3571 self
3572 }
3573
3574 /// Set the window function.
3575 ///
3576 /// # Arguments
3577 ///
3578 /// * `window` - Window function to apply to each frame
3579 ///
3580 /// # Returns
3581 ///
3582 /// The builder with the updated window function.
3583 #[inline]
3584 #[must_use]
3585 pub fn window(mut self, window: WindowType) -> Self {
3586 self.window = window;
3587 self
3588 }
3589
3590 /// Set whether to center frames (pad input signal).
3591 #[inline]
3592 #[must_use]
3593 pub const fn centre(mut self, centre: bool) -> Self {
3594 self.centre = centre;
3595 self
3596 }
3597
3598 /// Build the [`StftParams`].
3599 ///
3600 /// # Errors
3601 ///
3602 /// Returns an error if:
3603 /// - `n_fft` or `hop_size` are not set or are zero
3604 /// - `hop_size` > `n_fft`
3605 #[inline]
3606 pub fn build(self) -> SpectrogramResult<StftParams> {
3607 let n_fft = self
3608 .n_fft
3609 .ok_or_else(|| SpectrogramError::invalid_input("n_fft must be set"))?;
3610 let hop_size = self
3611 .hop_size
3612 .ok_or_else(|| SpectrogramError::invalid_input("hop_size must be set"))?;
3613
3614 StftParams::new(n_fft, hop_size, self.window, self.centre)
3615 }
3616}
3617
3618//
3619// ========================
3620// Mel parameters
3621// ========================
3622//
3623
3624/// Mel filterbank normalization strategy.
3625///
3626/// Determines how the triangular mel filters are normalized after construction.
3627#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3628#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3629#[non_exhaustive]
3630#[derive(Default)]
3631pub enum MelNorm {
3632 /// No normalization (triangular filters with peak = 1.0).
3633 ///
3634 /// This is the default and fastest option.
3635 #[default]
3636 None,
3637
3638 /// Slaney-style area normalization (librosa default).
3639 ///
3640 /// Each mel filter is divided by its bandwidth in Hz: `2.0 / (f_max - f_min)`.
3641 /// This ensures constant energy per mel band regardless of bandwidth.
3642 ///
3643 /// Use this for compatibility with librosa's default behavior.
3644 Slaney,
3645
3646 /// L1 normalization (sum of weights = 1.0).
3647 ///
3648 /// Each mel filter's weights are divided by their sum.
3649 /// Useful when you want each filter to act as a weighted average.
3650 L1,
3651
3652 /// L2 normalization (Euclidean norm = 1.0).
3653 ///
3654 /// Each mel filter's weights are divided by their L2 norm.
3655 /// Provides unit-norm filters in the L2 sense.
3656 L2,
3657}
3658
3659/// Mel filter bank parameters
3660///
3661/// * `n_mels`: Number of mel bands
3662/// * `f_min`: Minimum frequency (Hz)
3663/// * `f_max`: Maximum frequency (Hz)
3664/// * `norm`: Filterbank normalization strategy
3665#[derive(Debug, Clone, Copy, PartialEq)]
3666#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3667pub struct MelParams {
3668 n_mels: NonZeroUsize,
3669 f_min: f64,
3670 f_max: f64,
3671 norm: MelNorm,
3672}
3673
3674impl MelParams {
3675 /// Create new mel filter bank parameters.
3676 ///
3677 /// # Arguments
3678 ///
3679 /// * `n_mels` - Number of mel bands
3680 /// * `f_min` - Minimum frequency (Hz)
3681 /// * `f_max` - Maximum frequency (Hz)
3682 ///
3683 /// # Errors
3684 ///
3685 /// Returns an error if:
3686 /// - `f_min` is not >= 0
3687 /// - `f_max` is not > `f_min`
3688 ///
3689 /// # Returns
3690 ///
3691 /// A `MelParams` instance with no normalization (default).
3692 #[inline]
3693 pub fn new(n_mels: NonZeroUsize, f_min: f64, f_max: f64) -> SpectrogramResult<Self> {
3694 Self::with_norm(n_mels, f_min, f_max, MelNorm::None)
3695 }
3696
3697 /// Create new mel filter bank parameters with specified normalization.
3698 ///
3699 /// # Arguments
3700 ///
3701 /// * `n_mels` - Number of mel bands
3702 /// * `f_min` - Minimum frequency (Hz)
3703 /// * `f_max` - Maximum frequency (Hz)
3704 /// * `norm` - Filterbank normalization strategy
3705 ///
3706 /// # Errors
3707 ///
3708 /// Returns an error if:
3709 /// - `f_min` is not >= 0
3710 /// - `f_max` is not > `f_min`
3711 ///
3712 /// # Returns
3713 ///
3714 /// A `MelParams` instance.
3715 #[inline]
3716 pub fn with_norm(
3717 n_mels: NonZeroUsize,
3718 f_min: f64,
3719 f_max: f64,
3720 norm: MelNorm,
3721 ) -> SpectrogramResult<Self> {
3722 if f_min < 0.0 {
3723 return Err(SpectrogramError::invalid_input("f_min must be >= 0"));
3724 }
3725
3726 if f_max <= f_min {
3727 return Err(SpectrogramError::invalid_input("f_max must be > f_min"));
3728 }
3729
3730 Ok(Self {
3731 n_mels,
3732 f_min,
3733 f_max,
3734 norm,
3735 })
3736 }
3737
3738 const unsafe fn new_unchecked(n_mels: NonZeroUsize, f_min: f64, f_max: f64) -> Self {
3739 Self {
3740 n_mels,
3741 f_min,
3742 f_max,
3743 norm: MelNorm::None,
3744 }
3745 }
3746
3747 /// Get the number of mel bands.
3748 ///
3749 /// # Returns
3750 ///
3751 /// The number of mel bands.
3752 #[inline]
3753 #[must_use]
3754 pub const fn n_mels(&self) -> NonZeroUsize {
3755 self.n_mels
3756 }
3757
3758 /// Get the minimum frequency (Hz).
3759 ///
3760 /// # Returns
3761 ///
3762 /// The minimum frequency in Hz.
3763 #[inline]
3764 #[must_use]
3765 pub const fn f_min(&self) -> f64 {
3766 self.f_min
3767 }
3768
3769 /// Get the maximum frequency (Hz).
3770 ///
3771 /// # Returns
3772 ///
3773 /// The maximum frequency in Hz.
3774 #[inline]
3775 #[must_use]
3776 pub const fn f_max(&self) -> f64 {
3777 self.f_max
3778 }
3779
3780 /// Get the filterbank normalization strategy.
3781 ///
3782 /// # Returns
3783 ///
3784 /// The normalization strategy.
3785 #[inline]
3786 #[must_use]
3787 pub const fn norm(&self) -> MelNorm {
3788 self.norm
3789 }
3790
3791 /// Create standard mel filterbank parameters.
3792 ///
3793 /// Uses 128 mel bands from 0 Hz to the Nyquist frequency.
3794 ///
3795 /// # Arguments
3796 ///
3797 /// * `sample_rate` - Sample rate in Hz (used to determine `f_max`)
3798 ///
3799 /// # Returns
3800 ///
3801 /// A `MelParams` instance with standard settings.
3802 ///
3803 /// # Panics
3804 ///
3805 /// Panics if `sample_rate` is not greater than 0.
3806 #[inline]
3807 #[must_use]
3808 pub const fn standard(sample_rate: f64) -> Self {
3809 assert!(sample_rate > 0.0);
3810 // safety: parameters are known to be valid
3811 unsafe { Self::new_unchecked(nzu!(128), 0.0, sample_rate / 2.0) }
3812 }
3813
3814 /// Create mel filterbank parameters optimized for speech.
3815 ///
3816 /// Uses 40 mel bands from 0 Hz to 8000 Hz (typical speech bandwidth).
3817 ///
3818 /// # Returns
3819 ///
3820 /// A `MelParams` instance with speech-optimized settings.
3821 #[inline]
3822 #[must_use]
3823 pub const fn speech_standard() -> Self {
3824 // safety: parameters are known to be valid
3825 unsafe { Self::new_unchecked(nzu!(40), 0.0, 8000.0) }
3826 }
3827}
3828
3829//
3830// ========================
3831// LogHz parameters
3832// ========================
3833//
3834
3835/// Logarithmic frequency scale parameters
3836///
3837/// * `n_bins`: Number of logarithmically-spaced frequency bins
3838/// * `f_min`: Minimum frequency (Hz)
3839/// * `f_max`: Maximum frequency (Hz)
3840#[derive(Debug, Clone, Copy, PartialEq)]
3841#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3842pub struct LogHzParams {
3843 n_bins: NonZeroUsize,
3844 f_min: f64,
3845 f_max: f64,
3846}
3847
3848impl LogHzParams {
3849 /// Create new logarithmic frequency scale parameters.
3850 ///
3851 /// # Arguments
3852 ///
3853 /// * `n_bins` - Number of logarithmically-spaced frequency bins
3854 /// * `f_min` - Minimum frequency (Hz)
3855 /// * `f_max` - Maximum frequency (Hz)
3856 ///
3857 /// # Errors
3858 ///
3859 /// Returns an error if:
3860 /// - `f_min` is not finite and > 0
3861 /// - `f_max` is not > `f_min`
3862 ///
3863 /// # Returns
3864 ///
3865 /// A `LogHzParams` instance.
3866 #[inline]
3867 pub fn new(n_bins: NonZeroUsize, f_min: f64, f_max: f64) -> SpectrogramResult<Self> {
3868 if !(f_min > 0.0 && f_min.is_finite()) {
3869 return Err(SpectrogramError::invalid_input(
3870 "f_min must be finite and > 0",
3871 ));
3872 }
3873
3874 if f_max <= f_min {
3875 return Err(SpectrogramError::invalid_input("f_max must be > f_min"));
3876 }
3877
3878 Ok(Self {
3879 n_bins,
3880 f_min,
3881 f_max,
3882 })
3883 }
3884
3885 const unsafe fn new_unchecked(n_bins: NonZeroUsize, f_min: f64, f_max: f64) -> Self {
3886 Self {
3887 n_bins,
3888 f_min,
3889 f_max,
3890 }
3891 }
3892
3893 /// Get the number of frequency bins.
3894 ///
3895 /// # Returns
3896 ///
3897 /// The number of frequency bins.
3898 #[inline]
3899 #[must_use]
3900 pub const fn n_bins(&self) -> NonZeroUsize {
3901 self.n_bins
3902 }
3903
3904 /// Get the minimum frequency (Hz).
3905 ///
3906 /// # Returns
3907 ///
3908 /// The minimum frequency in Hz.
3909 #[inline]
3910 #[must_use]
3911 pub const fn f_min(&self) -> f64 {
3912 self.f_min
3913 }
3914
3915 /// Get the maximum frequency (Hz).
3916 ///
3917 /// # Returns
3918 ///
3919 /// The maximum frequency in Hz.
3920 #[inline]
3921 #[must_use]
3922 pub const fn f_max(&self) -> f64 {
3923 self.f_max
3924 }
3925
3926 /// Create standard logarithmic frequency parameters.
3927 ///
3928 /// Uses 128 log bins from 20 Hz to the Nyquist frequency.
3929 ///
3930 /// # Arguments
3931 ///
3932 /// * `sample_rate` - Sample rate in Hz (used to determine `f_max`)
3933 #[inline]
3934 #[must_use]
3935 pub fn standard(sample_rate: f64) -> Self {
3936 // safety: parameters are known to be valid
3937 unsafe { Self::new_unchecked(nzu!(128), 20.0, sample_rate / 2.0) }
3938 }
3939
3940 /// Create logarithmic frequency parameters optimized for music.
3941 ///
3942 /// Uses 84 bins (7 octaves * 12 bins/octave) from 27.5 Hz (A0) to 4186 Hz (C8).
3943 #[inline]
3944 #[must_use]
3945 pub const fn music_standard() -> Self {
3946 // safety: parameters are known to be valid
3947 unsafe { Self::new_unchecked(nzu!(84), 27.5, 4186.0) }
3948 }
3949}
3950
3951//
3952// ========================
3953// Log scaling parameters
3954// ========================
3955//
3956
3957#[derive(Debug, Clone, Copy, PartialEq)]
3958#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
3959pub struct LogParams {
3960 floor_db: f64,
3961}
3962
3963impl LogParams {
3964 /// Create new logarithmic scaling parameters.
3965 ///
3966 /// # Arguments
3967 ///
3968 /// * `floor_db` - Minimum dB value (floor) for logarithmic scaling
3969 ///
3970 /// # Errors
3971 ///
3972 /// Returns an error if `floor_db` is not finite.
3973 ///
3974 /// # Returns
3975 ///
3976 /// A `LogParams` instance.
3977 #[inline]
3978 pub fn new(floor_db: f64) -> SpectrogramResult<Self> {
3979 if !floor_db.is_finite() {
3980 return Err(SpectrogramError::invalid_input("floor_db must be finite"));
3981 }
3982
3983 Ok(Self { floor_db })
3984 }
3985
3986 /// Get the floor dB value.
3987 #[inline]
3988 #[must_use]
3989 pub const fn floor_db(&self) -> f64 {
3990 self.floor_db
3991 }
3992}
3993
3994/// Spectrogram computation parameters.
3995///
3996/// * `stft`: STFT parameters
3997/// * `sample_rate_hz`: Sample rate in Hz
3998#[derive(Debug, Clone)]
3999#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
4000pub struct SpectrogramParams {
4001 stft: StftParams,
4002 sample_rate_hz: f64,
4003}
4004
4005impl SpectrogramParams {
4006 /// Create new spectrogram parameters.
4007 ///
4008 /// # Arguments
4009 ///
4010 /// * `stft` - STFT parameters
4011 /// * `sample_rate_hz` - Sample rate in Hz
4012 ///
4013 /// # Errors
4014 ///
4015 /// Returns an error if the sample rate is not positive and finite.
4016 ///
4017 /// # Returns
4018 ///
4019 /// A `SpectrogramParams` instance.
4020 #[inline]
4021 pub fn new(stft: StftParams, sample_rate_hz: f64) -> SpectrogramResult<Self> {
4022 if !(sample_rate_hz > 0.0 && sample_rate_hz.is_finite()) {
4023 return Err(SpectrogramError::invalid_input(
4024 "sample_rate_hz must be finite and > 0",
4025 ));
4026 }
4027
4028 Ok(Self {
4029 stft,
4030 sample_rate_hz,
4031 })
4032 }
4033
4034 /// Create a builder for spectrogram parameters.
4035 ///
4036 /// # Errors
4037 ///
4038 /// Returns an error if required parameters are not set or are invalid.
4039 ///
4040 /// # Returns
4041 ///
4042 /// A builder for [`SpectrogramParams`].
4043 ///
4044 /// # Examples
4045 ///
4046 /// ```
4047 /// use spectrograms::{SpectrogramParams, WindowType, nzu};
4048 ///
4049 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4050 /// let params = SpectrogramParams::builder()
4051 /// .sample_rate(16000.0)
4052 /// .n_fft(nzu!(512))
4053 /// .hop_size(nzu!(256))
4054 /// .window(WindowType::Hanning)
4055 /// .centre(true)
4056 /// .build()?;
4057 ///
4058 /// assert_eq!(params.sample_rate_hz(), 16000.0);
4059 /// # Ok(())
4060 /// # }
4061 /// ```
4062 #[inline]
4063 #[must_use]
4064 pub fn builder() -> SpectrogramParamsBuilder {
4065 SpectrogramParamsBuilder::default()
4066 }
4067
4068 /// Create default parameters for speech processing.
4069 ///
4070 /// # Arguments
4071 ///
4072 /// * `sample_rate_hz` - Sample rate in Hz
4073 ///
4074 /// # Returns
4075 ///
4076 /// A `SpectrogramParams` instance with default settings for music analysis.
4077 ///
4078 /// # Errors
4079 ///
4080 /// Returns an error if the sample rate is not positive and finite.
4081 ///
4082 /// Uses:
4083 /// - `n_fft`: 512 (32ms at 16kHz)
4084 /// - `hop_size`: 160 (10ms at 16kHz)
4085 /// - window: Hanning
4086 /// - centre: true
4087 #[inline]
4088 pub fn speech_default(sample_rate_hz: f64) -> SpectrogramResult<Self> {
4089 // safety: parameters are known to be valid
4090 let stft =
4091 unsafe { StftParams::new_unchecked(nzu!(512), nzu!(160), WindowType::Hanning, true) };
4092
4093 Self::new(stft, sample_rate_hz)
4094 }
4095
4096 /// Create default parameters for music processing.
4097 ///
4098 /// # Arguments
4099 ///
4100 /// * `sample_rate_hz` - Sample rate in Hz
4101 ///
4102 /// # Returns
4103 ///
4104 /// A `SpectrogramParams` instance with default settings for music analysis.
4105 ///
4106 /// # Errors
4107 ///
4108 /// Returns an error if the sample rate is not positive and finite.
4109 ///
4110 /// Uses:
4111 /// - `n_fft`: 2048 (46ms at 44.1kHz)
4112 /// - `hop_size`: 512 (11.6ms at 44.1kHz)
4113 /// - window: Hanning
4114 /// - centre: true
4115 #[inline]
4116 pub fn music_default(sample_rate_hz: f64) -> SpectrogramResult<Self> {
4117 // safety: parameters are known to be valid
4118 let stft =
4119 unsafe { StftParams::new_unchecked(nzu!(2048), nzu!(512), WindowType::Hanning, true) };
4120 Self::new(stft, sample_rate_hz)
4121 }
4122
4123 /// Get the STFT parameters.
4124 #[inline]
4125 #[must_use]
4126 pub const fn stft(&self) -> &StftParams {
4127 &self.stft
4128 }
4129
4130 /// Get the sample rate in Hz.
4131 #[inline]
4132 #[must_use]
4133 pub const fn sample_rate_hz(&self) -> f64 {
4134 self.sample_rate_hz
4135 }
4136
4137 /// Get the frame period in seconds.
4138 #[inline]
4139 #[must_use]
4140 #[allow(clippy::cast_precision_loss)]
4141 pub fn frame_period_seconds(&self) -> f64 {
4142 self.stft.hop_size().get() as f64 / self.sample_rate_hz
4143 }
4144
4145 /// Get the Nyquist frequency in Hz.
4146 #[inline]
4147 #[must_use]
4148 pub fn nyquist_hz(&self) -> f64 {
4149 self.sample_rate_hz * 0.5
4150 }
4151}
4152
4153/// Builder for [`SpectrogramParams`].
4154#[derive(Debug, Clone)]
4155pub struct SpectrogramParamsBuilder {
4156 sample_rate: Option<f64>,
4157 n_fft: Option<NonZeroUsize>,
4158 hop_size: Option<NonZeroUsize>,
4159 window: WindowType,
4160 centre: bool,
4161}
4162
4163impl Default for SpectrogramParamsBuilder {
4164 #[inline]
4165 fn default() -> Self {
4166 Self {
4167 sample_rate: None,
4168 n_fft: None,
4169 hop_size: None,
4170 window: WindowType::Hanning,
4171 centre: true,
4172 }
4173 }
4174}
4175
4176impl SpectrogramParamsBuilder {
4177 /// Set the sample rate in Hz.
4178 ///
4179 /// # Arguments
4180 ///
4181 /// * `sample_rate` - Sample rate in Hz.
4182 ///
4183 /// # Returns
4184 ///
4185 /// The updated builder instance.
4186 #[inline]
4187 #[must_use]
4188 pub const fn sample_rate(mut self, sample_rate: f64) -> Self {
4189 self.sample_rate = Some(sample_rate);
4190 self
4191 }
4192
4193 /// Set the FFT window size.
4194 ///
4195 /// # Arguments
4196 ///
4197 /// * `n_fft` - FFT size.
4198 ///
4199 /// # Returns
4200 ///
4201 /// The updated builder instance.
4202 #[inline]
4203 #[must_use]
4204 pub const fn n_fft(mut self, n_fft: NonZeroUsize) -> Self {
4205 self.n_fft = Some(n_fft);
4206 self
4207 }
4208
4209 /// Set the hop size (samples between successive frames).
4210 ///
4211 /// # Arguments
4212 ///
4213 /// * `hop_size` - Hop size in samples.
4214 ///
4215 /// # Returns
4216 ///
4217 /// The updated builder instance.
4218 #[inline]
4219 #[must_use]
4220 pub const fn hop_size(mut self, hop_size: NonZeroUsize) -> Self {
4221 self.hop_size = Some(hop_size);
4222 self
4223 }
4224
4225 /// Set the window function.
4226 ///
4227 /// # Arguments
4228 ///
4229 /// * `window` - Window function to apply to each frame.
4230 ///
4231 /// # Returns
4232 ///
4233 /// The updated builder instance.
4234 #[inline]
4235 #[must_use]
4236 pub fn window(mut self, window: WindowType) -> Self {
4237 self.window = window;
4238 self
4239 }
4240
4241 /// Set whether to center frames (pad input signal).
4242 ///
4243 /// # Arguments
4244 ///
4245 /// * `centre` - If true, frames are centered by padding the input signal.
4246 ///
4247 /// # Returns
4248 ///
4249 /// The updated builder instance.
4250 #[inline]
4251 #[must_use]
4252 pub const fn centre(mut self, centre: bool) -> Self {
4253 self.centre = centre;
4254 self
4255 }
4256
4257 /// Build the [`SpectrogramParams`].
4258 ///
4259 /// # Errors
4260 ///
4261 /// Returns an error if required parameters are not set or are invalid.
4262 ///
4263 /// # Returns
4264 ///
4265 /// A `SpectrogramParams` instance.
4266 #[inline]
4267 pub fn build(self) -> SpectrogramResult<SpectrogramParams> {
4268 let sample_rate = self
4269 .sample_rate
4270 .ok_or_else(|| SpectrogramError::invalid_input("sample_rate must be set"))?;
4271 let n_fft = self
4272 .n_fft
4273 .ok_or_else(|| SpectrogramError::invalid_input("n_fft must be set"))?;
4274 let hop_size = self
4275 .hop_size
4276 .ok_or_else(|| SpectrogramError::invalid_input("hop_size must be set"))?;
4277
4278 let stft = StftParams::new(n_fft, hop_size, self.window, self.centre)?;
4279 SpectrogramParams::new(stft, sample_rate)
4280 }
4281}
4282
4283//
4284// ========================
4285// Standalone FFT Functions
4286// ========================
4287//
4288
4289/// Compute the real-to-complex FFT of a real-valued signal.
4290///
4291/// This function performs a forward FFT on real-valued input, returning the
4292/// complex frequency domain representation. Only the positive frequencies
4293/// are returned (length = `n_fft/2` + 1) due to conjugate symmetry.
4294///
4295/// # Arguments
4296///
4297/// * `samples` - Input signal (length ≤ n_fft, will be zero-padded if shorter)
4298/// * `n_fft` - FFT size
4299///
4300/// # Returns
4301///
4302/// A vector of complex frequency bins with length `n_fft/2` + 1.
4303///
4304/// # Automatic Zero-Padding
4305///
4306/// If the input signal is shorter than `n_fft`, it will be automatically
4307/// zero-padded to the required length. This is standard DSP practice and
4308/// preserves frequency resolution (bin spacing = sample_rate / n_fft).
4309///
4310/// ```
4311/// use spectrograms::{fft, nzu};
4312/// use non_empty_slice::non_empty_vec;
4313///
4314/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4315/// let signal = non_empty_vec![1.0, 2.0, 3.0]; // Only 3 samples
4316/// let spectrum = fft(&signal, nzu!(8))?; // Automatically padded to 8
4317/// assert_eq!(spectrum.len(), 5); // Output: 8/2 + 1 = 5 bins
4318/// # Ok(())
4319/// # }
4320/// ```
4321///
4322/// # Errors
4323///
4324/// Returns `InvalidInput` error if the input length exceeds `n_fft`.
4325///
4326/// # Examples
4327///
4328/// ```
4329/// use spectrograms::*;
4330/// use non_empty_slice::non_empty_vec;
4331///
4332/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4333/// let signal = non_empty_vec![0.0; nzu!(512)];
4334/// let spectrum = fft(&signal, nzu!(512))?;
4335///
4336/// assert_eq!(spectrum.len(), 257); // 512/2 + 1
4337/// # Ok(())
4338/// # }
4339/// ```
4340#[inline]
4341pub fn fft(
4342 samples: &NonEmptySlice<f64>,
4343 n_fft: NonZeroUsize,
4344) -> SpectrogramResult<Array1<Complex<f64>>> {
4345 if samples.len() > n_fft {
4346 return Err(SpectrogramError::invalid_input(format!(
4347 "Input length ({}) exceeds FFT size ({})",
4348 samples.len(),
4349 n_fft
4350 )));
4351 }
4352
4353 let out_len = r2c_output_size(n_fft.get());
4354
4355 // Get FFT plan from global cache (or create if first use)
4356 #[cfg(feature = "realfft")]
4357 let mut fft = {
4358 use crate::fft_backend::get_or_create_r2c_plan;
4359 let plan = get_or_create_r2c_plan(n_fft.get())?;
4360 // Clone the plan to get our own mutable copy with independent scratch buffer
4361 // This is cheap - only clones the scratch buffer, not the expensive twiddle factors
4362 (*plan).clone()
4363 };
4364
4365 #[cfg(feature = "fftw")]
4366 let mut fft = {
4367 use std::sync::Arc;
4368 let plan = crate::FftwPlanner::build_plan(n_fft.get())?;
4369 crate::FftwPlan::new(Arc::new(plan))
4370 };
4371
4372 let input = if samples.len() < n_fft {
4373 let mut padded = vec![0.0; n_fft.get()];
4374 padded[..samples.len().get()].copy_from_slice(samples);
4375 // safety: samples.len() < n_fft checked above and n_fft > 0
4376
4377 unsafe { NonEmptyVec::new_unchecked(padded) }
4378 } else {
4379 samples.to_non_empty_vec()
4380 };
4381
4382 let mut output = vec![Complex::new(0.0, 0.0); out_len];
4383 fft.process(&input, &mut output)?;
4384 let output = Array1::from_vec(output);
4385 Ok(output)
4386}
4387
4388#[inline]
4389/// Compute the real-valued fft of a signal.
4390///
4391/// # Arguments
4392/// * `samples` - Input signal (length ≤ n_fft, will be zero-padded if shorter)
4393/// * `n_fft` - FFT size
4394///
4395/// # Returns
4396///
4397/// An array with length `n_fft/2` + 1.
4398///
4399/// # Errors
4400///
4401/// Returns `InvalidInput` error if the input length exceeds `n_fft`.
4402///
4403/// # Examples
4404///
4405/// ```
4406/// use spectrograms::*;
4407/// use non_empty_slice::non_empty_vec;
4408///
4409/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4410/// let signal = non_empty_vec![0.0; nzu!(512)];
4411/// let rfft_result = rfft(&signal, nzu!(512))?;
4412/// // equivalent to
4413/// let fft_result = fft(&signal, nzu!(512))?;
4414/// let rfft_result = fft_result.mapv(num_complex::Complex::norm);
4415/// # Ok(())
4416/// # }
4417///
4418pub fn rfft(samples: &NonEmptySlice<f64>, n_fft: NonZeroUsize) -> SpectrogramResult<Array1<f64>> {
4419 Ok(fft(samples, n_fft)?.mapv(Complex::norm))
4420}
4421
4422/// Compute the power spectrum of a signal (|X|²).
4423///
4424/// This function applies an optional window function and computes the
4425/// power spectrum via FFT. The result contains only positive frequencies.
4426///
4427/// # Arguments
4428///
4429/// * `samples` - Input signal (length ≤ n_fft, will be zero-padded if shorter)
4430/// * `n_fft` - FFT size
4431/// * `window` - Optional window function (None for rectangular window)
4432///
4433/// # Returns
4434///
4435/// A vector of power values with length `n_fft/2` + 1.
4436///
4437/// # Automatic Zero-Padding
4438///
4439/// If the input signal is shorter than `n_fft`, it will be automatically
4440/// zero-padded to the required length. This is standard DSP practice and
4441/// preserves frequency resolution (bin spacing = sample_rate / n_fft).
4442///
4443/// ```
4444/// use spectrograms::{power_spectrum, nzu};
4445/// use non_empty_slice::non_empty_vec;
4446///
4447/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4448/// let signal = non_empty_vec![1.0, 2.0, 3.0]; // Only 3 samples
4449/// let power = power_spectrum(&signal, nzu!(8), None)?;
4450/// assert_eq!(power.len(), nzu!(5)); // Output: 8/2 + 1 = 5 bins
4451/// # Ok(())
4452/// # }
4453/// ```
4454///
4455/// # Errors
4456///
4457/// Returns `InvalidInput` error if the input length exceeds `n_fft`.
4458///
4459/// # Examples
4460///
4461/// ```
4462/// use spectrograms::*;
4463/// use non_empty_slice::non_empty_vec;
4464///
4465/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4466/// let signal = non_empty_vec![0.0; nzu!(512)];
4467/// let power = power_spectrum(&signal, nzu!(512), Some(WindowType::Hanning))?;
4468///
4469/// assert_eq!(power.len(), nzu!(257)); // 512/2 + 1
4470/// # Ok(())
4471/// # }
4472/// ```
4473#[inline]
4474pub fn power_spectrum(
4475 samples: &NonEmptySlice<f64>,
4476 n_fft: NonZeroUsize,
4477 window: Option<WindowType>,
4478) -> SpectrogramResult<NonEmptyVec<f64>> {
4479 if samples.len() > n_fft {
4480 return Err(SpectrogramError::invalid_input(format!(
4481 "Input length ({}) exceeds FFT size ({})",
4482 samples.len(),
4483 n_fft
4484 )));
4485 }
4486
4487 let mut windowed = vec![0.0; n_fft.get()];
4488 windowed[..samples.len().get()].copy_from_slice(samples);
4489
4490 if let Some(win_type) = window {
4491 let window_samples = make_window(win_type, n_fft);
4492 for i in 0..n_fft.get() {
4493 windowed[i] *= window_samples[i];
4494 }
4495 }
4496
4497 // safety: windowed is non-empty since n_fft > 0
4498 let windowed = unsafe { NonEmptySlice::new_unchecked(&windowed) };
4499 let fft_result = fft(windowed, n_fft)?;
4500 let fft_result = fft_result
4501 .iter()
4502 .map(num_complex::Complex::norm_sqr)
4503 .collect();
4504 // safety: fft_result is non-empty since fft returned successfully
4505 Ok(unsafe { NonEmptyVec::new_unchecked(fft_result) })
4506}
4507
4508/// Compute the magnitude spectrum of a signal (|X|).
4509///
4510/// This function applies an optional window function and computes the
4511/// magnitude spectrum via FFT. The result contains only positive frequencies.
4512///
4513/// # Arguments
4514///
4515/// * `samples` - Input signal (length ≤ n_fft, will be zero-padded if shorter)
4516/// * `n_fft` - FFT size
4517/// * `window` - Optional window function (None for rectangular window)
4518///
4519/// # Automatic Zero-Padding
4520///
4521/// If the input signal is shorter than `n_fft`, it will be automatically
4522/// zero-padded to the required length. This preserves frequency resolution.
4523///
4524/// # Errors
4525///
4526/// Returns `InvalidInput` error if the input length exceeds `n_fft`.
4527///
4528/// # Returns
4529///
4530/// A vector of magnitude values with length `n_fft/2` + 1.
4531///
4532/// # Examples
4533///
4534/// ```
4535/// use spectrograms::*;
4536/// use non_empty_slice::non_empty_vec;
4537///
4538/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4539/// let signal = non_empty_vec![0.0; nzu!(512)];
4540/// let magnitude = magnitude_spectrum(&signal, nzu!(512), Some(WindowType::Hanning))?;
4541///
4542/// assert_eq!(magnitude.len(), nzu!(257)); // 512/2 + 1
4543/// # Ok(())
4544/// # }
4545/// ```
4546#[inline]
4547pub fn magnitude_spectrum(
4548 samples: &NonEmptySlice<f64>,
4549 n_fft: NonZeroUsize,
4550 window: Option<WindowType>,
4551) -> SpectrogramResult<NonEmptyVec<f64>> {
4552 let power = power_spectrum(samples, n_fft, window)?;
4553 let power = power.iter().map(|&p| p.sqrt()).collect();
4554 // safety: power is non-empty since power_spectrum returned successfully
4555 Ok(unsafe { NonEmptyVec::new_unchecked(power) })
4556}
4557
4558/// Compute the Short-Time Fourier Transform (STFT) of a signal.
4559///
4560/// This function computes the STFT by applying a sliding window and FFT
4561/// to sequential frames of the input signal.
4562///
4563/// # Arguments
4564///
4565/// * `samples` - Input signal (any type that can be converted to a slice)
4566/// * `n_fft` - FFT size
4567/// * `hop_size` - Number of samples between successive frames
4568/// * `window` - Window function to apply to each frame
4569/// * `center` - If true, pad the signal to center frames
4570///
4571/// # Returns
4572///
4573/// A 2D array with shape (`frequency_bins`, `time_frames`) containing complex STFT values.
4574///
4575/// # Errors
4576///
4577/// Returns an error if:
4578/// - `hop_size` > `n_fft`
4579/// - STFT computation fails
4580///
4581/// # Examples
4582///
4583/// ```
4584/// use spectrograms::*;
4585/// use non_empty_slice::non_empty_vec;
4586///
4587/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4588/// let signal = non_empty_vec![0.0; nzu!(16000)];
4589/// let stft_matrix = stft(&signal, nzu!(512), nzu!(256), WindowType::Hanning, true)?;
4590///
4591/// println!("STFT: {} bins x {} frames", stft_matrix.nrows(), stft_matrix.ncols());
4592/// # Ok(())
4593/// # }
4594/// ```
4595#[inline]
4596pub fn stft(
4597 samples: &NonEmptySlice<f64>,
4598 n_fft: NonZeroUsize,
4599 hop_size: NonZeroUsize,
4600 window: WindowType,
4601 center: bool,
4602) -> SpectrogramResult<Array2<Complex<f64>>> {
4603 let stft_params = StftParams::new(n_fft, hop_size, window, center)?;
4604 let params = SpectrogramParams::new(stft_params, 1.0)?; // dummy sample rate
4605
4606 let planner = SpectrogramPlanner::new();
4607 let result = planner.compute_stft(samples, ¶ms)?;
4608
4609 Ok(result.data)
4610}
4611
4612/// Compute the inverse real FFT (complex-to-real IFFT).
4613///
4614/// This function performs an inverse FFT, converting frequency domain data
4615/// back to the time domain. Only the positive frequencies need to be provided
4616/// (length = `n_fft/2` + 1) due to conjugate symmetry.
4617///
4618/// # Arguments
4619///
4620/// * `spectrum` - Complex frequency bins (length should be `n_fft/2` + 1)
4621/// * `n_fft` - FFT size (length of the output signal)
4622///
4623/// # Returns
4624///
4625/// A vector of real-valued time-domain samples with length `n_fft`.
4626///
4627/// # Errors
4628///
4629/// Returns an error if:
4630/// - `spectrum` length doesn't match `n_fft/2` + 1
4631/// - Inverse FFT computation fails
4632///
4633/// # Examples
4634///
4635/// ```
4636/// use spectrograms::*;
4637/// use non_empty_slice::{non_empty_vec, NonEmptySlice};
4638/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4639/// // Forward FFT
4640/// let signal = non_empty_vec![1.0, 0.0, -1.0, 0.0, 1.0, 0.0, -1.0, 0.0];
4641/// let spectrum = fft(&signal, nzu!(8))?;
4642/// let slice = spectrum.as_slice().unwrap();
4643/// let spectrum_slice = NonEmptySlice::new(slice).unwrap();
4644/// // Inverse FFT
4645/// let reconstructed = irfft(spectrum_slice, nzu!(8))?;
4646///
4647/// assert_eq!(reconstructed.len(), nzu!(8));
4648/// # Ok(())
4649/// # }
4650/// ```
4651#[inline]
4652pub fn irfft(
4653 spectrum: &NonEmptySlice<Complex<f64>>,
4654 n_fft: NonZeroUsize,
4655) -> SpectrogramResult<NonEmptyVec<f64>> {
4656 use crate::fft_backend::{C2rPlan, r2c_output_size};
4657
4658 let n_fft = n_fft.get();
4659 let expected_len = r2c_output_size(n_fft);
4660 if spectrum.len().get() != expected_len {
4661 return Err(SpectrogramError::dimension_mismatch(
4662 expected_len,
4663 spectrum.len().get(),
4664 ));
4665 }
4666
4667 // Get inverse FFT plan from global cache (or create if first use)
4668 #[cfg(feature = "realfft")]
4669 let mut ifft = {
4670 use crate::fft_backend::get_or_create_c2r_plan;
4671 let plan = get_or_create_c2r_plan(n_fft)?;
4672 // Clone to get our own mutable copy with independent scratch buffer
4673 (*plan).clone()
4674 };
4675
4676 #[cfg(feature = "fftw")]
4677 let mut ifft = {
4678 use crate::fft_backend::C2rPlanner;
4679 let mut planner = crate::FftwPlanner::new();
4680 planner.plan_c2r(n_fft)?
4681 };
4682
4683 let mut output = vec![0.0; n_fft];
4684 ifft.process(spectrum.as_slice(), &mut output)?;
4685
4686 // Safety: output is non-empty since n_fft > 0
4687 Ok(unsafe { NonEmptyVec::new_unchecked(output) })
4688}
4689
4690/// Reconstruct a time-domain signal from its STFT using overlap-add.
4691///
4692/// This function performs the inverse Short-Time Fourier Transform, converting
4693/// a 2D complex STFT matrix back to a 1D time-domain signal using overlap-add
4694/// synthesis with the specified window function.
4695///
4696/// # Arguments
4697///
4698/// * `stft_matrix` - Complex STFT with shape (`frequency_bins`, `time_frames`)
4699/// * `n_fft` - FFT size
4700/// * `hop_size` - Number of samples between successive frames
4701/// * `window` - Window function to apply (should match forward STFT window)
4702/// * `center` - If true, assume the forward STFT was centered
4703///
4704/// # Returns
4705///
4706/// A vector of reconstructed time-domain samples.
4707///
4708/// # Errors
4709///
4710/// Returns an error if:
4711/// - `stft_matrix` dimensions are inconsistent with `n_fft`
4712/// - `hop_size` > `n_fft`
4713/// - Inverse STFT computation fails
4714///
4715/// # Examples
4716///
4717/// ```
4718/// use spectrograms::*;
4719/// use non_empty_slice::non_empty_vec;
4720///
4721/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4722/// // Generate signal
4723/// let signal = non_empty_vec![1.0; nzu!(16000)];
4724///
4725/// // Forward STFT
4726/// let stft_matrix = stft(&signal, nzu!(512), nzu!(256), WindowType::Hanning, true)?;
4727///
4728/// // Inverse STFT
4729/// let reconstructed = istft(&stft_matrix, nzu!(512), nzu!(256), WindowType::Hanning, true)?;
4730///
4731/// println!("Original: {} samples", signal.len());
4732/// println!("Reconstructed: {} samples", reconstructed.len());
4733/// # Ok(())
4734/// # }
4735/// ```
4736#[inline]
4737pub fn istft(
4738 stft_matrix: &Array2<Complex<f64>>,
4739 n_fft: NonZeroUsize,
4740 hop_size: NonZeroUsize,
4741 window: WindowType,
4742 center: bool,
4743) -> SpectrogramResult<Vec<f64>> {
4744 use crate::fft_backend::{C2rPlan, C2rPlanner, r2c_output_size};
4745
4746 let n_bins = stft_matrix.nrows();
4747 let n_frames = stft_matrix.ncols();
4748
4749 let expected_bins = r2c_output_size(n_fft.get());
4750 if n_bins != expected_bins {
4751 return Err(SpectrogramError::dimension_mismatch(expected_bins, n_bins));
4752 }
4753 if hop_size.get() > n_fft.get() {
4754 return Err(SpectrogramError::invalid_input("hop_size must be <= n_fft"));
4755 }
4756 // Create inverse FFT plan
4757 #[cfg(feature = "realfft")]
4758 let mut ifft = {
4759 let mut planner = crate::RealFftPlanner::new();
4760 planner.plan_c2r(n_fft.get())?
4761 };
4762
4763 #[cfg(feature = "fftw")]
4764 let mut ifft = {
4765 let mut planner = crate::FftwPlanner::new();
4766 planner.plan_c2r(n_fft.get())?
4767 };
4768
4769 // Generate window
4770 let window_samples = make_window(window, n_fft);
4771 let n_fft = n_fft.get();
4772 let hop_size = hop_size.get();
4773 // Calculate output length
4774 let pad = if center { n_fft / 2 } else { 0 };
4775 let output_len = (n_frames - 1) * hop_size + n_fft;
4776 let unpadded_len = output_len.saturating_sub(2 * pad);
4777
4778 // Allocate output buffer and normalization buffer
4779 let mut output = vec![0.0; output_len];
4780 let mut norm = vec![0.0; output_len];
4781
4782 // Overlap-add synthesis
4783 let mut frame_buffer = vec![Complex::new(0.0, 0.0); n_bins];
4784 let mut time_frame = vec![0.0; n_fft];
4785
4786 for frame_idx in 0..n_frames {
4787 // Extract complex frame from STFT matrix
4788 for bin_idx in 0..n_bins {
4789 frame_buffer[bin_idx] = stft_matrix[[bin_idx, frame_idx]];
4790 }
4791
4792 // Inverse FFT
4793 ifft.process(&frame_buffer, &mut time_frame)?;
4794
4795 // Apply window
4796 for i in 0..n_fft {
4797 time_frame[i] *= window_samples[i];
4798 }
4799
4800 // Overlap-add into output buffer
4801 let start = frame_idx * hop_size;
4802 for i in 0..n_fft {
4803 let pos = start + i;
4804 if pos < output_len {
4805 output[pos] += time_frame[i];
4806 norm[pos] += window_samples[i] * window_samples[i];
4807 }
4808 }
4809 }
4810
4811 // Normalize by window energy
4812 for i in 0..output_len {
4813 if norm[i] > 1e-10 {
4814 output[i] /= norm[i];
4815 }
4816 }
4817
4818 // Remove padding if centered
4819 if center && unpadded_len > 0 {
4820 let start = pad;
4821 let end = start + unpadded_len;
4822 output = output[start..end.min(output_len)].to_vec();
4823 }
4824
4825 Ok(output)
4826}
4827
4828//
4829// ========================
4830// Reusable FFT Plans
4831// ========================
4832//
4833
4834/// A reusable FFT planner for efficient repeated FFT operations.
4835///
4836/// This planner caches FFT plans internally, making repeated FFT operations
4837/// of the same size much more efficient than calling `fft()` repeatedly.
4838///
4839/// # Examples
4840///
4841/// ```
4842/// use spectrograms::*;
4843/// use non_empty_slice::non_empty_vec;
4844///
4845/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4846/// let mut planner = FftPlanner::new();
4847///
4848/// // Process multiple signals of the same size efficiently
4849/// for _ in 0..100 {
4850/// let signal = non_empty_vec![0.0; nzu!(512)];
4851/// let spectrum = planner.fft(&signal, nzu!(512))?;
4852/// // ... process spectrum ...
4853/// }
4854/// # Ok(())
4855/// # }
4856/// ```
4857pub struct FftPlanner {
4858 #[cfg(feature = "realfft")]
4859 inner: crate::RealFftPlanner,
4860 #[cfg(feature = "fftw")]
4861 inner: crate::FftwPlanner,
4862}
4863
4864impl FftPlanner {
4865 /// Create a new FFT planner with empty cache.
4866 #[inline]
4867 #[must_use]
4868 pub fn new() -> Self {
4869 Self {
4870 #[cfg(feature = "realfft")]
4871 inner: crate::RealFftPlanner::new(),
4872 #[cfg(feature = "fftw")]
4873 inner: crate::FftwPlanner::new(),
4874 }
4875 }
4876
4877 /// Compute forward FFT, reusing cached plans.
4878 ///
4879 /// This is more efficient than calling the standalone `fft()` function
4880 /// repeatedly for the same FFT size.
4881 ///
4882 /// # Automatic Zero-Padding
4883 ///
4884 /// If the input signal is shorter than `n_fft`, it will be automatically
4885 /// zero-padded to the required length.
4886 ///
4887 /// # Errors
4888 ///
4889 /// Returns `InvalidInput` error if the input length exceeds `n_fft`.
4890 ///
4891 /// # Examples
4892 ///
4893 /// ```
4894 /// use spectrograms::*;
4895 /// use non_empty_slice::non_empty_vec;
4896 ///
4897 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4898 /// let mut planner = FftPlanner::new();
4899 ///
4900 /// let signal = non_empty_vec![1.0; nzu!(512)];
4901 /// let spectrum = planner.fft(&signal, nzu!(512))?;
4902 ///
4903 /// assert_eq!(spectrum.len(), 257); // 512/2 + 1
4904 /// # Ok(())
4905 /// # }
4906 /// ```
4907 #[inline]
4908 pub fn fft(
4909 &mut self,
4910 samples: &NonEmptySlice<f64>,
4911 n_fft: NonZeroUsize,
4912 ) -> SpectrogramResult<Array1<Complex<f64>>> {
4913 use crate::fft_backend::{R2cPlan, R2cPlanner, r2c_output_size};
4914
4915 if samples.len() > n_fft {
4916 return Err(SpectrogramError::invalid_input(format!(
4917 "Input length ({}) exceeds FFT size ({})",
4918 samples.len(),
4919 n_fft
4920 )));
4921 }
4922
4923 let out_len = r2c_output_size(n_fft.get());
4924 let mut plan = self.inner.plan_r2c(n_fft.get())?;
4925
4926 let input = if samples.len() < n_fft {
4927 let mut padded = vec![0.0; n_fft.get()];
4928 padded[..samples.len().get()].copy_from_slice(samples);
4929
4930 // safety: samples.len() < n_fft checked above and n_fft > 0
4931 unsafe { NonEmptyVec::new_unchecked(padded) }
4932 } else {
4933 samples.to_non_empty_vec()
4934 };
4935
4936 let mut output = vec![Complex::new(0.0, 0.0); out_len];
4937 plan.process(&input, &mut output)?;
4938
4939 let output = Array1::from_vec(output);
4940 Ok(output)
4941 }
4942
4943 /// Compute forward real FFT magnitude
4944 ///
4945 /// # Errors
4946 ///
4947 /// Returns an error if:
4948 /// - `n_fft` doesn't match the samples length
4949 ///
4950 ///
4951 #[inline]
4952 pub fn rfft(
4953 &mut self,
4954 samples: &NonEmptySlice<f64>,
4955 n_fft: NonZeroUsize,
4956 ) -> SpectrogramResult<Array1<f64>> {
4957 let fft_with_complex = fft(samples, n_fft)?;
4958 Ok(fft_with_complex.mapv(Complex::norm))
4959 }
4960
4961 /// Compute inverse FFT, reusing cached plans.
4962 ///
4963 /// This is more efficient than calling the standalone `irfft()` function
4964 /// repeatedly for the same FFT size.
4965 ///
4966 /// # Errors
4967 /// Returns an error if:
4968 ///
4969 /// - The calculated expected length of `spectrum` doesn't match its actual length
4970 ///
4971 /// # Examples
4972 ///
4973 /// ```
4974 /// use spectrograms::*;
4975 /// use non_empty_slice::{non_empty_vec, NonEmptySlice};
4976 ///
4977 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
4978 /// let mut planner = FftPlanner::new();
4979 ///
4980 /// // Forward FFT
4981 /// let signal = non_empty_vec![1.0; nzu!(512)];
4982 /// let spectrum = planner.fft(&signal, nzu!(512))?;
4983 ///
4984 /// // Inverse FFT
4985 /// let spectrum_slice = NonEmptySlice::new(spectrum.as_slice().unwrap()).unwrap();
4986 /// let reconstructed = planner.irfft(spectrum_slice, nzu!(512))?;
4987 ///
4988 /// assert_eq!(reconstructed.len(), nzu!(512));
4989 /// # Ok(())
4990 /// # }
4991 /// ```
4992 #[inline]
4993 pub fn irfft(
4994 &mut self,
4995 spectrum: &NonEmptySlice<Complex<f64>>,
4996 n_fft: NonZeroUsize,
4997 ) -> SpectrogramResult<NonEmptyVec<f64>> {
4998 use crate::fft_backend::{C2rPlan, C2rPlanner, r2c_output_size};
4999
5000 let expected_len = r2c_output_size(n_fft.get());
5001 if spectrum.len().get() != expected_len {
5002 return Err(SpectrogramError::dimension_mismatch(
5003 expected_len,
5004 spectrum.len().get(),
5005 ));
5006 }
5007
5008 let mut plan = self.inner.plan_c2r(n_fft.get())?;
5009 let mut output = vec![0.0; n_fft.get()];
5010 plan.process(spectrum, &mut output)?;
5011 // Safety: output is non-empty since n_fft > 0
5012 let output = unsafe { NonEmptyVec::new_unchecked(output) };
5013 Ok(output)
5014 }
5015
5016 /// Compute power spectrum with optional windowing, reusing cached plans.
5017 ///
5018 /// # Automatic Zero-Padding
5019 ///
5020 /// If the input signal is shorter than `n_fft`, it will be automatically
5021 /// zero-padded to the required length.
5022 ///
5023 /// # Errors
5024 /// Returns `InvalidInput` error if the input length exceeds `n_fft`.
5025 ///
5026 /// # Examples
5027 ///
5028 /// ```
5029 /// use spectrograms::*;
5030 /// use non_empty_slice::non_empty_vec;
5031 ///
5032 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
5033 /// let mut planner = FftPlanner::new();
5034 ///
5035 /// let signal = non_empty_vec![1.0; nzu!(512)];
5036 /// let power = planner.power_spectrum(&signal, nzu!(512), Some(WindowType::Hanning))?;
5037 ///
5038 /// assert_eq!(power.len(), nzu!(257));
5039 /// # Ok(())
5040 /// # }
5041 /// ```
5042 #[inline]
5043 pub fn power_spectrum(
5044 &mut self,
5045 samples: &NonEmptySlice<f64>,
5046 n_fft: NonZeroUsize,
5047 window: Option<WindowType>,
5048 ) -> SpectrogramResult<NonEmptyVec<f64>> {
5049 if samples.len() > n_fft {
5050 return Err(SpectrogramError::invalid_input(format!(
5051 "Input length ({}) exceeds FFT size ({})",
5052 samples.len(),
5053 n_fft
5054 )));
5055 }
5056
5057 let mut windowed = vec![0.0; n_fft.get()];
5058 windowed[..samples.len().get()].copy_from_slice(samples);
5059 if let Some(win_type) = window {
5060 let window_samples = make_window(win_type, n_fft);
5061 for i in 0..n_fft.get() {
5062 windowed[i] *= window_samples[i];
5063 }
5064 }
5065
5066 // safety: windowed is non-empty since n_fft > 0
5067 let windowed = unsafe { NonEmptySlice::new_unchecked(&windowed) };
5068 let fft_result = self.fft(windowed, n_fft)?;
5069 let f = fft_result
5070 .iter()
5071 .map(num_complex::Complex::norm_sqr)
5072 .collect();
5073 // safety: fft_result is non-empty since fft returned successfully
5074 Ok(unsafe { NonEmptyVec::new_unchecked(f) })
5075 }
5076
5077 /// Compute magnitude spectrum with optional windowing, reusing cached plans.
5078 ///
5079 /// # Automatic Zero-Padding
5080 ///
5081 /// If the input signal is shorter than `n_fft`, it will be automatically
5082 /// zero-padded to the required length.
5083 ///
5084 /// # Errors
5085 /// Returns `InvalidInput` error if the input length exceeds `n_fft`.
5086 ///
5087 /// # Examples
5088 ///
5089 /// ```
5090 /// use spectrograms::*;
5091 /// use non_empty_slice::non_empty_vec;
5092 ///
5093 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
5094 /// let mut planner = FftPlanner::new();
5095 ///
5096 /// let signal = non_empty_vec![1.0; nzu!(512)];
5097 /// let magnitude = planner.magnitude_spectrum(&signal, nzu!(512), Some(WindowType::Hanning))?;
5098 ///
5099 /// assert_eq!(magnitude.len(), nzu!(257));
5100 /// # Ok(())
5101 /// # }
5102 /// ```
5103 #[inline]
5104 pub fn magnitude_spectrum(
5105 &mut self,
5106 samples: &NonEmptySlice<f64>,
5107 n_fft: NonZeroUsize,
5108 window: Option<WindowType>,
5109 ) -> SpectrogramResult<NonEmptyVec<f64>> {
5110 let power = self.power_spectrum(samples, n_fft, window)?;
5111 let power = power.iter().map(|&p| p.sqrt()).collect::<Vec<f64>>();
5112 // safety: power is non-empty since power_spectrum returned successfully
5113 Ok(unsafe { NonEmptyVec::new_unchecked(power) })
5114 }
5115}
5116
5117impl Default for FftPlanner {
5118 #[inline]
5119 fn default() -> Self {
5120 Self::new()
5121 }
5122}
5123
5124#[cfg(test)]
5125mod tests {
5126 use super::*;
5127
5128 #[test]
5129 fn test_sparse_matrix_basic() {
5130 // Create a simple 3x5 sparse matrix
5131 let mut sparse = SparseMatrix::new(3, 5);
5132
5133 // Row 0: only column 1 has value 2.0
5134 sparse.set(0, 1, 2.0);
5135
5136 // Row 1: columns 2 and 3
5137 sparse.set(1, 2, 0.5);
5138 sparse.set(1, 3, 1.5);
5139
5140 // Row 2: columns 0 and 4
5141 sparse.set(2, 0, 3.0);
5142 sparse.set(2, 4, 1.0);
5143
5144 // Test matrix-vector multiplication
5145 let input = vec![1.0, 2.0, 3.0, 4.0, 5.0];
5146 let mut output = vec![0.0; 3];
5147
5148 sparse.multiply_vec(&input, &mut output);
5149
5150 // Expected results:
5151 // Row 0: 2.0 * 2.0 = 4.0
5152 // Row 1: 0.5 * 3.0 + 1.5 * 4.0 = 1.5 + 6.0 = 7.5
5153 // Row 2: 3.0 * 1.0 + 1.0 * 5.0 = 3.0 + 5.0 = 8.0
5154 assert_eq!(output[0], 4.0);
5155 assert_eq!(output[1], 7.5);
5156 assert_eq!(output[2], 8.0);
5157 }
5158
5159 #[test]
5160 fn test_sparse_matrix_zeros_ignored() {
5161 // Verify that zero values are not stored
5162 let mut sparse = SparseMatrix::new(2, 3);
5163
5164 sparse.set(0, 0, 1.0);
5165 sparse.set(0, 1, 0.0); // Should be ignored
5166 sparse.set(0, 2, 2.0);
5167
5168 // Only 2 values should be stored in row 0
5169 assert_eq!(sparse.values[0].len(), 2);
5170 assert_eq!(sparse.indices[0].len(), 2);
5171
5172 // The stored indices should be 0 and 2
5173 assert_eq!(sparse.indices[0], vec![0, 2]);
5174 assert_eq!(sparse.values[0], vec![1.0, 2.0]);
5175 }
5176
5177 #[test]
5178 fn test_loghz_matrix_sparsity() {
5179 // Verify that LogHz matrices are very sparse (1-2 non-zeros per row)
5180 let sample_rate = 16000.0;
5181 let n_fft = nzu!(512);
5182 let n_bins = nzu!(128);
5183 let f_min = 20.0;
5184 let f_max = sample_rate / 2.0;
5185
5186 let (matrix, _freqs) =
5187 build_loghz_matrix(sample_rate, n_fft, n_bins, f_min, f_max).unwrap();
5188
5189 // Each row should have at most 2 non-zero values (linear interpolation)
5190 for row_idx in 0..matrix.nrows() {
5191 let nnz = matrix.values[row_idx].len();
5192 assert!(
5193 nnz <= 2,
5194 "Row {} has {} non-zeros, expected at most 2",
5195 row_idx,
5196 nnz
5197 );
5198 assert!(nnz >= 1, "Row {} has no non-zeros", row_idx);
5199 }
5200
5201 // Total non-zeros should be close to n_bins * 2
5202 let total_nnz: usize = matrix.values.iter().map(|v| v.len()).sum();
5203 assert!(total_nnz <= n_bins.get() * 2);
5204 assert!(total_nnz >= n_bins.get()); // At least 1 per row
5205 }
5206
5207 #[test]
5208 fn test_mel_matrix_sparsity() {
5209 // Verify that Mel matrices are sparse (triangular filters)
5210 let sample_rate = 16000.0;
5211 let n_fft = nzu!(512);
5212 let n_mels = nzu!(40);
5213 let f_min = 0.0;
5214 let f_max = sample_rate / 2.0;
5215
5216 let matrix =
5217 build_mel_filterbank_matrix(sample_rate, n_fft, n_mels, f_min, f_max, MelNorm::None)
5218 .unwrap();
5219
5220 let n_fft_bins = r2c_output_size(n_fft.get());
5221
5222 // Calculate sparsity
5223 let total_nnz: usize = matrix.values.iter().map(|v| v.len()).sum();
5224 let total_elements = n_mels.get() * n_fft_bins;
5225 let sparsity = 1.0 - (total_nnz as f64 / total_elements as f64);
5226
5227 // Mel filterbanks should be >80% sparse
5228 assert!(
5229 sparsity > 0.8,
5230 "Mel matrix sparsity is only {:.1}%, expected >80%",
5231 sparsity * 100.0
5232 );
5233
5234 // Each mel filter should have significantly fewer than n_fft_bins non-zeros
5235 for row_idx in 0..matrix.nrows() {
5236 let nnz = matrix.values[row_idx].len();
5237 assert!(
5238 nnz < n_fft_bins / 2,
5239 "Mel filter {} is not sparse enough",
5240 row_idx
5241 );
5242 }
5243 }
5244}