speech_prep/decoder/types.rs
1/// Decoded audio data with normalized samples and metadata.
2///
3/// All samples are normalized to the range [-1.0, 1.0] regardless of the
4/// source bit depth. This normalization enables consistent processing across
5/// different audio formats and bit depths.
6#[derive(Debug, Clone, PartialEq)]
7pub struct DecodedAudio {
8 /// Normalized audio samples in the range [-1.0, 1.0].
9 /// For stereo audio, samples are interleaved (L, R, L, R, ...).
10 pub samples: Vec<f32>,
11 /// Sample rate in Hz (e.g., 44100, 48000).
12 pub sample_rate: u32,
13 /// Number of audio channels (1 = mono, 2 = stereo).
14 pub channels: u8,
15 /// Bit depth of the source PCM data (e.g., 16, 24).
16 pub bit_depth: u16,
17 /// Total duration in seconds, calculated from sample count and rate.
18 pub duration_sec: f64,
19}
20
21impl DecodedAudio {
22 /// Total number of audio frames (samples per channel).
23 ///
24 /// For stereo audio with 1000 total samples, this returns 500 frames.
25 #[must_use]
26 pub fn frame_count(&self) -> usize {
27 if self.channels == 0 {
28 0
29 } else {
30 self.samples.len() / self.channels as usize
31 }
32 }
33
34 /// Verify all samples are within normalized bounds [-1.0, 1.0].
35 ///
36 /// Returns `true` if all samples are properly normalized.
37 #[must_use]
38 pub fn is_normalized(&self) -> bool {
39 self.samples.iter().all(|&s| (-1.0..=1.0).contains(&s))
40 }
41}
42
43/// Mono audio with metadata from channel mixing.
44///
45/// Contains the mixed mono samples plus diagnostic information about the
46/// original channel layout and peak amplitudes before/after mixing.
47#[derive(Debug, Clone, PartialEq)]
48pub struct MixedAudio {
49 /// Mono audio samples in the range [-1.0, 1.0].
50 pub samples: Vec<f32>,
51 /// Original number of channels before mixing (1 = already mono).
52 pub original_channels: u8,
53 /// Peak amplitude in the original multi-channel audio.
54 pub peak_before_mix: f32,
55 /// Peak amplitude after mixing to mono.
56 pub peak_after_mix: f32,
57}
58
59impl MixedAudio {
60 /// Total number of mono samples.
61 #[must_use]
62 pub fn sample_count(&self) -> usize {
63 self.samples.len()
64 }
65
66 /// Check if any clipping occurred during mixing.
67 ///
68 /// Returns `true` if the peak amplitude equals 1.0 (indicating potential
69 /// clipping at the boundaries).
70 #[must_use]
71 pub fn is_clipped(&self) -> bool {
72 (self.peak_after_mix - 1.0).abs() < f32::EPSILON
73 }
74
75 /// Calculate the peak reduction ratio from mixing.
76 ///
77 /// Returns the ratio of post-mix peak to pre-mix peak. A value of 1.0
78 /// means no amplitude change, <1.0 means reduction, >1.0 means
79 /// amplification (rare with averaging).
80 #[must_use]
81 pub fn peak_ratio(&self) -> f32 {
82 if self.peak_before_mix.abs() < f32::EPSILON {
83 1.0 // Avoid division by zero for silent input
84 } else {
85 self.peak_after_mix / self.peak_before_mix
86 }
87 }
88}