1use crate::{
2 normalized_samples, seconds_to_samples, AudioFormatSpec, InterpolationMode, SampleRate,
3};
4use math_signal_core::resample_interleaved;
5use video_analysis_core::{AudioBuffer, DetectError, OwnedAudioFrame, Result, Timestamp};
6
7#[derive(Debug, Clone, PartialEq)]
8pub struct AudioClip {
10 pub sample_rate: u32,
12 pub channels: u16,
14 pub samples: Vec<f32>,
16}
17
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum ConcatPolicy {
21 RequireSameFormat,
23 ResampleToFirst,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum MixPolicy {
30 RequireSameFormat,
32 PadToLongest,
34 TruncateToShortest,
36}
37
38#[derive(Debug, Clone, Copy, PartialEq, Eq)]
39pub enum FadeCurve {
41 Linear,
43 EqualPower,
45 Exponential,
47}
48
49impl AudioClip {
50 pub fn new(sample_rate: u32, channels: u16, samples: Vec<f32>) -> Result<Self> {
52 AudioFormatSpec::new(sample_rate, channels)?;
53 if !samples.len().is_multiple_of(channels as usize) {
54 return Err(DetectError::InvalidArgument(format!(
55 "audio clip sample length {} is not divisible by channel count {channels}",
56 samples.len()
57 )));
58 }
59 if samples.iter().any(|sample| !sample.is_finite()) {
60 return Err(DetectError::InvalidArgument(
61 "audio clip samples must be finite".to_string(),
62 ));
63 }
64 Ok(Self {
65 sample_rate,
66 channels,
67 samples,
68 })
69 }
70
71 pub fn from_frames(frames: &[OwnedAudioFrame]) -> Result<Self> {
73 if frames.is_empty() {
74 return Err(DetectError::InvalidArgument(
75 "audio clip requires at least one frame".to_string(),
76 ));
77 }
78 let sample_rate = frames[0].sample_rate;
79 let channels = frames[0].channels;
80 let mut samples = Vec::new();
81 for frame in frames {
82 if frame.sample_rate != sample_rate || frame.channels != channels {
83 return Err(DetectError::InvalidArgument(
84 "all frames must share sample rate and channel count".to_string(),
85 ));
86 }
87 let normalized = normalized_samples(&frame.data);
88 if !normalized.len().is_multiple_of(channels as usize) {
89 return Err(DetectError::InvalidArgument(format!(
90 "audio frame sample length {} is not divisible by channel count {channels}",
91 normalized.len()
92 )));
93 }
94 samples.extend(normalized);
95 }
96 Self::new(sample_rate, channels, samples)
97 }
98
99 pub fn to_frame(&self, timestamp: Timestamp) -> Result<OwnedAudioFrame> {
101 OwnedAudioFrame::new(
102 timestamp,
103 self.sample_rate,
104 self.channels,
105 AudioBuffer::F32(self.samples.clone()),
106 )
107 }
108
109 pub fn samples_per_channel(&self) -> usize {
111 self.samples.len() / self.channels as usize
112 }
113
114 pub fn duration_seconds(&self) -> f64 {
116 if self.sample_rate == 0 {
117 return 0.0;
118 }
119 self.samples_per_channel() as f64 / self.sample_rate as f64
120 }
121
122 pub fn slice_samples(&self, start_sample: u64, end_sample: u64) -> Result<Self> {
124 if start_sample > end_sample {
125 return Err(DetectError::InvalidArgument(
126 "slice start_sample must be less than or equal to end_sample".to_string(),
127 ));
128 }
129 let total = self.samples_per_channel() as u64;
130 if end_sample > total {
131 return Err(DetectError::InvalidArgument(format!(
132 "slice end_sample {end_sample} exceeds clip length {total}"
133 )));
134 }
135 let channels = self.channels as usize;
136 let start = start_sample as usize * channels;
137 let end = end_sample as usize * channels;
138 Self::new(
139 self.sample_rate,
140 self.channels,
141 self.samples[start..end].to_vec(),
142 )
143 }
144
145 pub fn slice_seconds(&self, start_seconds: f64, end_seconds: f64) -> Result<Self> {
147 if start_seconds > end_seconds {
148 return Err(DetectError::InvalidArgument(
149 "slice start_seconds must be less than or equal to end_seconds".to_string(),
150 ));
151 }
152 let start = seconds_to_samples(start_seconds, self.sample_rate)?;
153 let end = seconds_to_samples(end_seconds, self.sample_rate)?;
154 self.slice_samples(start, end)
155 }
156
157 pub fn split_at_seconds(&self, boundaries: &[f64]) -> Result<Vec<Self>> {
159 let mut sample_boundaries = Vec::with_capacity(boundaries.len() + 2);
160 sample_boundaries.push(0);
161 let total = self.samples_per_channel() as u64;
162 let mut previous = 0;
163 for boundary in boundaries {
164 let sample = seconds_to_samples(*boundary, self.sample_rate)?;
165 if sample < previous || sample > total {
166 return Err(DetectError::InvalidArgument(
167 "split boundaries must be ordered and inside the clip duration".to_string(),
168 ));
169 }
170 sample_boundaries.push(sample);
171 previous = sample;
172 }
173 sample_boundaries.push(total);
174 sample_boundaries
175 .windows(2)
176 .map(|range| self.slice_samples(range[0], range[1]))
177 .collect()
178 }
179
180 pub fn concat(clips: &[Self], policy: ConcatPolicy) -> Result<Self> {
182 let first = clips.first().ok_or_else(|| {
183 DetectError::InvalidArgument("concat requires at least one clip".to_string())
184 })?;
185 let mut samples = Vec::new();
186 for clip in clips {
187 if clip.channels != first.channels {
188 return Err(DetectError::InvalidArgument(
189 "concat requires matching channel counts".to_string(),
190 ));
191 }
192 match policy {
193 ConcatPolicy::RequireSameFormat if clip.sample_rate != first.sample_rate => {
194 return Err(DetectError::InvalidArgument(
195 "concat requires matching sample rates".to_string(),
196 ));
197 }
198 ConcatPolicy::RequireSameFormat => samples.extend_from_slice(&clip.samples),
199 ConcatPolicy::ResampleToFirst => {
200 let converted = if clip.sample_rate == first.sample_rate {
201 clip.samples.clone()
202 } else {
203 resample_interleaved(
204 &clip.samples,
205 clip.channels,
206 SampleRate::new(clip.sample_rate)?,
207 SampleRate::new(first.sample_rate)?,
208 InterpolationMode::Linear,
209 )?
210 };
211 samples.extend(converted);
212 }
213 }
214 }
215 Self::new(first.sample_rate, first.channels, samples)
216 }
217
218 pub fn mix(clips: &[Self], policy: MixPolicy) -> Result<Self> {
220 let first = clips.first().ok_or_else(|| {
221 DetectError::InvalidArgument("mix requires at least one clip".to_string())
222 })?;
223 for clip in clips {
224 if clip.sample_rate != first.sample_rate || clip.channels != first.channels {
225 return Err(DetectError::InvalidArgument(
226 "mix requires matching sample rates and channel counts".to_string(),
227 ));
228 }
229 }
230 let target_len = match policy {
231 MixPolicy::RequireSameFormat => {
232 let len = first.samples.len();
233 if clips.iter().any(|clip| clip.samples.len() != len) {
234 return Err(DetectError::InvalidArgument(
235 "mix RequireSameFormat requires identical sample lengths".to_string(),
236 ));
237 }
238 len
239 }
240 MixPolicy::PadToLongest => clips
241 .iter()
242 .map(|clip| clip.samples.len())
243 .max()
244 .unwrap_or(0),
245 MixPolicy::TruncateToShortest => clips
246 .iter()
247 .map(|clip| clip.samples.len())
248 .min()
249 .unwrap_or(0),
250 };
251 let mut mixed = vec![0.0; target_len];
252 for clip in clips {
253 for (index, sample) in clip.samples.iter().take(target_len).enumerate() {
254 mixed[index] += *sample;
255 }
256 }
257 Self::new(first.sample_rate, first.channels, mixed)
258 }
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264 use crate::samples_to_seconds;
265 use video_analysis_core::{AudioBuffer, Timebase, Timestamp};
266
267 fn clip(samples: Vec<f32>) -> AudioClip {
268 AudioClip::new(4, 2, samples).unwrap()
269 }
270
271 #[test]
272 fn validates_audio_clip_format() {
273 assert!(AudioClip::new(0, 1, vec![0.0]).is_err());
274 assert!(AudioClip::new(48_000, 0, vec![0.0]).is_err());
275 assert!(AudioClip::new(48_000, 2, vec![0.0]).is_err());
276 assert!(AudioClip::new(48_000, 1, vec![f32::NAN]).is_err());
277 }
278
279 #[test]
280 fn converts_frames_and_slices() {
281 let frame = OwnedAudioFrame::new(
282 Timestamp::new(0, Timebase::new(1, 4)),
283 4,
284 2,
285 AudioBuffer::F32(vec![0.0, 0.1, 0.2, 0.3]),
286 )
287 .unwrap();
288 let clip = AudioClip::from_frames(&[frame]).unwrap();
289 assert_eq!(clip.samples_per_channel(), 2);
290 assert_eq!(samples_to_seconds(2, 4).unwrap(), clip.duration_seconds());
291 assert_eq!(clip.slice_samples(1, 2).unwrap().samples, vec![0.2, 0.3]);
292 assert!(clip.slice_seconds(0.75, 0.25).is_err());
293 }
294
295 #[test]
296 fn split_and_concat_round_trip() {
297 let input = clip(vec![0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]);
298 let parts = input.split_at_seconds(&[0.5]).unwrap();
299 assert_eq!(parts.len(), 2);
300 let output = AudioClip::concat(&parts, ConcatPolicy::RequireSameFormat).unwrap();
301 assert_eq!(output.samples, input.samples);
302 }
303
304 #[test]
305 fn concat_and_mix_validate_policies() {
306 let a = AudioClip::new(4, 1, vec![1.0, 2.0]).unwrap();
307 let b = AudioClip::new(8, 1, vec![3.0, 4.0]).unwrap();
308 assert!(
309 AudioClip::concat(&[a.clone(), b.clone()], ConcatPolicy::RequireSameFormat).is_err()
310 );
311 assert!(AudioClip::concat(&[a.clone(), b], ConcatPolicy::ResampleToFirst).is_ok());
312
313 let c = AudioClip::new(4, 1, vec![1.0]).unwrap();
314 assert!(AudioClip::mix(&[a.clone(), c.clone()], MixPolicy::RequireSameFormat).is_err());
315 assert_eq!(
316 AudioClip::mix(&[a.clone(), c.clone()], MixPolicy::PadToLongest)
317 .unwrap()
318 .samples,
319 vec![2.0, 2.0]
320 );
321 assert_eq!(
322 AudioClip::mix(&[a, c], MixPolicy::TruncateToShortest)
323 .unwrap()
324 .samples,
325 vec![2.0]
326 );
327 }
328}