speech_prep/decoder/
types.rs

1/// Decoded audio data with normalized samples and metadata.
2///
3/// All samples are normalized to the range [-1.0, 1.0] regardless of the
4/// source bit depth. This normalization enables consistent processing across
5/// different audio formats and bit depths.
6#[derive(Debug, Clone, PartialEq)]
7pub struct DecodedAudio {
8    /// Normalized audio samples in the range [-1.0, 1.0].
9    /// For stereo audio, samples are interleaved (L, R, L, R, ...).
10    pub samples: Vec<f32>,
11    /// Sample rate in Hz (e.g., 44100, 48000).
12    pub sample_rate: u32,
13    /// Number of audio channels (1 = mono, 2 = stereo).
14    pub channels: u8,
15    /// Bit depth of the source PCM data (e.g., 16, 24).
16    pub bit_depth: u16,
17    /// Total duration in seconds, calculated from sample count and rate.
18    pub duration_sec: f64,
19}
20
21impl DecodedAudio {
22    /// Total number of audio frames (samples per channel).
23    ///
24    /// For stereo audio with 1000 total samples, this returns 500 frames.
25    #[must_use]
26    pub fn frame_count(&self) -> usize {
27        if self.channels == 0 {
28            0
29        } else {
30            self.samples.len() / self.channels as usize
31        }
32    }
33
34    /// Verify all samples are within normalized bounds [-1.0, 1.0].
35    ///
36    /// Returns `true` if all samples are properly normalized.
37    #[must_use]
38    pub fn is_normalized(&self) -> bool {
39        self.samples.iter().all(|&s| (-1.0..=1.0).contains(&s))
40    }
41}
42
43/// Mono audio with metadata from channel mixing.
44///
45/// Contains the mixed mono samples plus diagnostic information about the
46/// original channel layout and peak amplitudes before/after mixing.
47#[derive(Debug, Clone, PartialEq)]
48pub struct MixedAudio {
49    /// Mono audio samples in the range [-1.0, 1.0].
50    pub samples: Vec<f32>,
51    /// Original number of channels before mixing (1 = already mono).
52    pub original_channels: u8,
53    /// Peak amplitude in the original multi-channel audio.
54    pub peak_before_mix: f32,
55    /// Peak amplitude after mixing to mono.
56    pub peak_after_mix: f32,
57}
58
59impl MixedAudio {
60    /// Total number of mono samples.
61    #[must_use]
62    pub fn sample_count(&self) -> usize {
63        self.samples.len()
64    }
65
66    /// Check if any clipping occurred during mixing.
67    ///
68    /// Returns `true` if the peak amplitude equals 1.0 (indicating potential
69    /// clipping at the boundaries).
70    #[must_use]
71    pub fn is_clipped(&self) -> bool {
72        (self.peak_after_mix - 1.0).abs() < f32::EPSILON
73    }
74
75    /// Calculate the peak reduction ratio from mixing.
76    ///
77    /// Returns the ratio of post-mix peak to pre-mix peak. A value of 1.0
78    /// means no amplitude change, <1.0 means reduction, >1.0 means
79    /// amplification (rare with averaging).
80    #[must_use]
81    pub fn peak_ratio(&self) -> f32 {
82        if self.peak_before_mix.abs() < f32::EPSILON {
83            1.0 // Avoid division by zero for silent input
84        } else {
85            self.peak_after_mix / self.peak_before_mix
86        }
87    }
88}
speech_prep/decoder/types.rs

speech_prep/decoder/
types.rs