Skip to main content

audio_analysis_core/
clip.rs

1use crate::{
2    normalized_samples, seconds_to_samples, AudioFormatSpec, InterpolationMode, SampleRate,
3};
4use math_signal_core::resample_interleaved;
5use video_analysis_core::{AudioBuffer, DetectError, OwnedAudioFrame, Result, Timestamp};
6
7#[derive(Debug, Clone, PartialEq)]
8/// Owned interleaved f32 clip for whole-buffer editing.
9pub struct AudioClip {
10    /// Sample rate in hertz.
11    pub sample_rate: u32,
12    /// Number of interleaved channels.
13    pub channels: u16,
14    /// Interleaved f32 samples.
15    pub samples: Vec<f32>,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19/// Policy for concatenating clips with different formats.
20pub enum ConcatPolicy {
21    /// Require sample rate and channel count to match exactly.
22    RequireSameFormat,
23    /// Resample all clips to the first clip's sample rate. Channel counts must match.
24    ResampleToFirst,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28/// Policy for mixing clips with different lengths.
29pub enum MixPolicy {
30    /// Require identical format and length.
31    RequireSameFormat,
32    /// Pad shorter clips with silence.
33    PadToLongest,
34    /// Truncate longer clips to the shortest input.
35    TruncateToShortest,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39/// Fade curve shape.
40pub enum FadeCurve {
41    /// Linear gain ramp.
42    Linear,
43    /// Equal-power sine/cosine ramp.
44    EqualPower,
45    /// Smooth exponential-style ramp.
46    Exponential,
47}
48
49impl AudioClip {
50    /// Creates a validated clip.
51    pub fn new(sample_rate: u32, channels: u16, samples: Vec<f32>) -> Result<Self> {
52        AudioFormatSpec::new(sample_rate, channels)?;
53        if !samples.len().is_multiple_of(channels as usize) {
54            return Err(DetectError::InvalidArgument(format!(
55                "audio clip sample length {} is not divisible by channel count {channels}",
56                samples.len()
57            )));
58        }
59        if samples.iter().any(|sample| !sample.is_finite()) {
60            return Err(DetectError::InvalidArgument(
61                "audio clip samples must be finite".to_string(),
62            ));
63        }
64        Ok(Self {
65            sample_rate,
66            channels,
67            samples,
68        })
69    }
70
71    /// Builds a single clip from ordered frames.
72    pub fn from_frames(frames: &[OwnedAudioFrame]) -> Result<Self> {
73        if frames.is_empty() {
74            return Err(DetectError::InvalidArgument(
75                "audio clip requires at least one frame".to_string(),
76            ));
77        }
78        let sample_rate = frames[0].sample_rate;
79        let channels = frames[0].channels;
80        let mut samples = Vec::new();
81        for frame in frames {
82            if frame.sample_rate != sample_rate || frame.channels != channels {
83                return Err(DetectError::InvalidArgument(
84                    "all frames must share sample rate and channel count".to_string(),
85                ));
86            }
87            let normalized = normalized_samples(&frame.data);
88            if !normalized.len().is_multiple_of(channels as usize) {
89                return Err(DetectError::InvalidArgument(format!(
90                    "audio frame sample length {} is not divisible by channel count {channels}",
91                    normalized.len()
92                )));
93            }
94            samples.extend(normalized);
95        }
96        Self::new(sample_rate, channels, samples)
97    }
98
99    /// Converts the clip to one owned frame at the supplied timestamp.
100    pub fn to_frame(&self, timestamp: Timestamp) -> Result<OwnedAudioFrame> {
101        OwnedAudioFrame::new(
102            timestamp,
103            self.sample_rate,
104            self.channels,
105            AudioBuffer::F32(self.samples.clone()),
106        )
107    }
108
109    /// Returns the number of samples per channel.
110    pub fn samples_per_channel(&self) -> usize {
111        self.samples.len() / self.channels as usize
112    }
113
114    /// Returns clip duration in seconds.
115    pub fn duration_seconds(&self) -> f64 {
116        if self.sample_rate == 0 {
117            return 0.0;
118        }
119        self.samples_per_channel() as f64 / self.sample_rate as f64
120    }
121
122    /// Slices by per-channel sample indices.
123    pub fn slice_samples(&self, start_sample: u64, end_sample: u64) -> Result<Self> {
124        if start_sample > end_sample {
125            return Err(DetectError::InvalidArgument(
126                "slice start_sample must be less than or equal to end_sample".to_string(),
127            ));
128        }
129        let total = self.samples_per_channel() as u64;
130        if end_sample > total {
131            return Err(DetectError::InvalidArgument(format!(
132                "slice end_sample {end_sample} exceeds clip length {total}"
133            )));
134        }
135        let channels = self.channels as usize;
136        let start = start_sample as usize * channels;
137        let end = end_sample as usize * channels;
138        Self::new(
139            self.sample_rate,
140            self.channels,
141            self.samples[start..end].to_vec(),
142        )
143    }
144
145    /// Slices by seconds.
146    pub fn slice_seconds(&self, start_seconds: f64, end_seconds: f64) -> Result<Self> {
147        if start_seconds > end_seconds {
148            return Err(DetectError::InvalidArgument(
149                "slice start_seconds must be less than or equal to end_seconds".to_string(),
150            ));
151        }
152        let start = seconds_to_samples(start_seconds, self.sample_rate)?;
153        let end = seconds_to_samples(end_seconds, self.sample_rate)?;
154        self.slice_samples(start, end)
155    }
156
157    /// Splits a clip at ordered boundary times.
158    pub fn split_at_seconds(&self, boundaries: &[f64]) -> Result<Vec<Self>> {
159        let mut sample_boundaries = Vec::with_capacity(boundaries.len() + 2);
160        sample_boundaries.push(0);
161        let total = self.samples_per_channel() as u64;
162        let mut previous = 0;
163        for boundary in boundaries {
164            let sample = seconds_to_samples(*boundary, self.sample_rate)?;
165            if sample < previous || sample > total {
166                return Err(DetectError::InvalidArgument(
167                    "split boundaries must be ordered and inside the clip duration".to_string(),
168                ));
169            }
170            sample_boundaries.push(sample);
171            previous = sample;
172        }
173        sample_boundaries.push(total);
174        sample_boundaries
175            .windows(2)
176            .map(|range| self.slice_samples(range[0], range[1]))
177            .collect()
178    }
179
180    /// Concatenates clips.
181    pub fn concat(clips: &[Self], policy: ConcatPolicy) -> Result<Self> {
182        let first = clips.first().ok_or_else(|| {
183            DetectError::InvalidArgument("concat requires at least one clip".to_string())
184        })?;
185        let mut samples = Vec::new();
186        for clip in clips {
187            if clip.channels != first.channels {
188                return Err(DetectError::InvalidArgument(
189                    "concat requires matching channel counts".to_string(),
190                ));
191            }
192            match policy {
193                ConcatPolicy::RequireSameFormat if clip.sample_rate != first.sample_rate => {
194                    return Err(DetectError::InvalidArgument(
195                        "concat requires matching sample rates".to_string(),
196                    ));
197                }
198                ConcatPolicy::RequireSameFormat => samples.extend_from_slice(&clip.samples),
199                ConcatPolicy::ResampleToFirst => {
200                    let converted = if clip.sample_rate == first.sample_rate {
201                        clip.samples.clone()
202                    } else {
203                        resample_interleaved(
204                            &clip.samples,
205                            clip.channels,
206                            SampleRate::new(clip.sample_rate)?,
207                            SampleRate::new(first.sample_rate)?,
208                            InterpolationMode::Linear,
209                        )?
210                    };
211                    samples.extend(converted);
212                }
213            }
214        }
215        Self::new(first.sample_rate, first.channels, samples)
216    }
217
218    /// Mixes clips by summing matching interleaved samples.
219    pub fn mix(clips: &[Self], policy: MixPolicy) -> Result<Self> {
220        let first = clips.first().ok_or_else(|| {
221            DetectError::InvalidArgument("mix requires at least one clip".to_string())
222        })?;
223        for clip in clips {
224            if clip.sample_rate != first.sample_rate || clip.channels != first.channels {
225                return Err(DetectError::InvalidArgument(
226                    "mix requires matching sample rates and channel counts".to_string(),
227                ));
228            }
229        }
230        let target_len = match policy {
231            MixPolicy::RequireSameFormat => {
232                let len = first.samples.len();
233                if clips.iter().any(|clip| clip.samples.len() != len) {
234                    return Err(DetectError::InvalidArgument(
235                        "mix RequireSameFormat requires identical sample lengths".to_string(),
236                    ));
237                }
238                len
239            }
240            MixPolicy::PadToLongest => clips
241                .iter()
242                .map(|clip| clip.samples.len())
243                .max()
244                .unwrap_or(0),
245            MixPolicy::TruncateToShortest => clips
246                .iter()
247                .map(|clip| clip.samples.len())
248                .min()
249                .unwrap_or(0),
250        };
251        let mut mixed = vec![0.0; target_len];
252        for clip in clips {
253            for (index, sample) in clip.samples.iter().take(target_len).enumerate() {
254                mixed[index] += *sample;
255            }
256        }
257        Self::new(first.sample_rate, first.channels, mixed)
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264    use crate::samples_to_seconds;
265    use video_analysis_core::{AudioBuffer, Timebase, Timestamp};
266
267    fn clip(samples: Vec<f32>) -> AudioClip {
268        AudioClip::new(4, 2, samples).unwrap()
269    }
270
271    #[test]
272    fn validates_audio_clip_format() {
273        assert!(AudioClip::new(0, 1, vec![0.0]).is_err());
274        assert!(AudioClip::new(48_000, 0, vec![0.0]).is_err());
275        assert!(AudioClip::new(48_000, 2, vec![0.0]).is_err());
276        assert!(AudioClip::new(48_000, 1, vec![f32::NAN]).is_err());
277    }
278
279    #[test]
280    fn converts_frames_and_slices() {
281        let frame = OwnedAudioFrame::new(
282            Timestamp::new(0, Timebase::new(1, 4)),
283            4,
284            2,
285            AudioBuffer::F32(vec![0.0, 0.1, 0.2, 0.3]),
286        )
287        .unwrap();
288        let clip = AudioClip::from_frames(&[frame]).unwrap();
289        assert_eq!(clip.samples_per_channel(), 2);
290        assert_eq!(samples_to_seconds(2, 4).unwrap(), clip.duration_seconds());
291        assert_eq!(clip.slice_samples(1, 2).unwrap().samples, vec![0.2, 0.3]);
292        assert!(clip.slice_seconds(0.75, 0.25).is_err());
293    }
294
295    #[test]
296    fn split_and_concat_round_trip() {
297        let input = clip(vec![0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]);
298        let parts = input.split_at_seconds(&[0.5]).unwrap();
299        assert_eq!(parts.len(), 2);
300        let output = AudioClip::concat(&parts, ConcatPolicy::RequireSameFormat).unwrap();
301        assert_eq!(output.samples, input.samples);
302    }
303
304    #[test]
305    fn concat_and_mix_validate_policies() {
306        let a = AudioClip::new(4, 1, vec![1.0, 2.0]).unwrap();
307        let b = AudioClip::new(8, 1, vec![3.0, 4.0]).unwrap();
308        assert!(
309            AudioClip::concat(&[a.clone(), b.clone()], ConcatPolicy::RequireSameFormat).is_err()
310        );
311        assert!(AudioClip::concat(&[a.clone(), b], ConcatPolicy::ResampleToFirst).is_ok());
312
313        let c = AudioClip::new(4, 1, vec![1.0]).unwrap();
314        assert!(AudioClip::mix(&[a.clone(), c.clone()], MixPolicy::RequireSameFormat).is_err());
315        assert_eq!(
316            AudioClip::mix(&[a.clone(), c.clone()], MixPolicy::PadToLongest)
317                .unwrap()
318                .samples,
319            vec![2.0, 2.0]
320        );
321        assert_eq!(
322            AudioClip::mix(&[a, c], MixPolicy::TruncateToShortest)
323                .unwrap()
324                .samples,
325            vec![2.0]
326        );
327    }
328}