stratum_dsp/preprocessing/
silence.rs1use crate::error::AnalysisError;
24
25#[derive(Debug, Clone)]
27pub struct SilenceDetector {
28 pub threshold_db: f32,
31
32 pub min_duration_ms: u32,
35
36 pub frame_size: usize,
38}
39
40impl Default for SilenceDetector {
41 fn default() -> Self {
42 Self {
43 threshold_db: -40.0,
44 min_duration_ms: 500,
45 frame_size: 2048,
46 }
47 }
48}
49
50#[derive(Debug, Clone)]
52pub struct SilenceRegion {
53 pub start_sample: usize,
55 pub end_sample: usize,
57 pub duration_seconds: f32,
59}
60
61pub fn detect_and_trim(
102 samples: &[f32],
103 sample_rate: u32,
104 detector: SilenceDetector,
105) -> Result<(Vec<f32>, Vec<(usize, usize)>), AnalysisError> {
106 if samples.is_empty() {
108 return Ok((Vec::new(), Vec::new()));
109 }
110
111 if sample_rate == 0 {
112 return Err(AnalysisError::InvalidInput("Sample rate must be > 0".to_string()));
113 }
114
115 if detector.frame_size == 0 {
116 return Err(AnalysisError::InvalidInput("Frame size must be > 0".to_string()));
117 }
118
119 if detector.frame_size > samples.len() {
120 log::warn!("Frame size ({}) larger than audio length ({}), treating as single frame",
121 detector.frame_size, samples.len());
122 }
123
124 log::debug!("Detecting silence: {} samples at {} Hz, threshold={:.1} dB, min_duration={} ms",
125 samples.len(), sample_rate, detector.threshold_db, detector.min_duration_ms);
126
127 let threshold_linear = 10.0_f32.powf(detector.threshold_db / 20.0);
129
130 let hop_size = detector.frame_size / 2; let num_frames = if samples.len() >= detector.frame_size {
133 (samples.len() - detector.frame_size) / hop_size + 1
134 } else {
135 1 };
137
138 let mut frame_rms = Vec::with_capacity(num_frames);
139 let mut frame_starts = Vec::with_capacity(num_frames);
140
141 for i in 0..num_frames {
142 let start = i * hop_size;
143 let end = (start + detector.frame_size).min(samples.len());
144
145 let sum_sq: f32 = samples[start..end]
147 .iter()
148 .map(|&x| x * x)
149 .sum();
150
151 let rms = if end > start {
152 (sum_sq / (end - start) as f32).sqrt()
153 } else {
154 0.0
155 };
156
157 frame_rms.push(rms);
158 frame_starts.push(start);
159 }
160
161 let mut frame_is_silent = Vec::with_capacity(num_frames);
163 for &rms in &frame_rms {
164 frame_is_silent.push(rms <= threshold_linear);
165 }
166
167 let min_duration_samples = (detector.min_duration_ms as f32 / 1000.0 * sample_rate as f32) as usize;
170 let min_duration_frames = (min_duration_samples + hop_size - 1) / hop_size; let mut silence_regions: Vec<SilenceRegion> = Vec::new();
174 let mut in_silence = false;
175 let mut silence_start_frame = 0;
176
177 for (frame_idx, &is_silent) in frame_is_silent.iter().enumerate() {
178 if is_silent && !in_silence {
179 in_silence = true;
181 silence_start_frame = frame_idx;
182 } else if !is_silent && in_silence {
183 in_silence = false;
185 let silence_end_frame = frame_idx;
186 let silence_duration_frames = silence_end_frame - silence_start_frame;
187
188 if silence_duration_frames >= min_duration_frames ||
190 silence_start_frame == 0 ||
191 silence_end_frame == num_frames {
192 let start_sample = frame_starts[silence_start_frame];
193 let end_sample = if silence_end_frame < frame_starts.len() {
194 frame_starts[silence_end_frame]
195 } else {
196 samples.len()
197 };
198
199 silence_regions.push(SilenceRegion {
200 start_sample,
201 end_sample,
202 duration_seconds: (end_sample - start_sample) as f32 / sample_rate as f32,
203 });
204 }
205 }
206 }
207
208 if in_silence {
210 let silence_duration_frames = num_frames - silence_start_frame;
211 if silence_duration_frames >= min_duration_frames || silence_start_frame == 0 {
212 let start_sample = frame_starts[silence_start_frame];
213 silence_regions.push(SilenceRegion {
214 start_sample,
215 end_sample: samples.len(),
216 duration_seconds: (samples.len() - start_sample) as f32 / sample_rate as f32,
217 });
218 }
219 }
220
221 let trim_start = if let Some(first_region) = silence_regions.first() {
223 if first_region.start_sample == 0 {
224 first_region.end_sample
225 } else {
226 0
227 }
228 } else {
229 0
230 };
231
232 let trim_end = if let Some(last_region) = silence_regions.last() {
233 if last_region.end_sample == samples.len() {
234 last_region.start_sample
235 } else {
236 samples.len()
237 }
238 } else {
239 samples.len()
240 };
241
242 let trim_start = trim_start.min(trim_end);
244 let trim_end = trim_end.max(trim_start);
245
246 let trimmed = if trim_start < trim_end && trim_end <= samples.len() {
248 samples[trim_start..trim_end].to_vec()
249 } else {
250 Vec::new()
251 };
252
253 let silence_map: Vec<(usize, usize)> = silence_regions
255 .iter()
256 .map(|r| (r.start_sample, r.end_sample))
257 .collect();
258
259 log::debug!("Silence detection: trimmed from {} to {} samples, found {} silence regions",
260 samples.len(), trimmed.len(), silence_map.len());
261
262 Ok((trimmed, silence_map))
263}
264
265#[cfg(test)]
266mod tests {
267 use super::*;
268
269 fn generate_test_audio_with_silence(
271 total_samples: usize,
272 audio_start: usize,
273 audio_end: usize,
274 amplitude: f32,
275 ) -> Vec<f32> {
276 let mut samples = vec![0.0f32; total_samples];
277 for i in audio_start..audio_end.min(total_samples) {
279 samples[i] = amplitude * (i as f32 / 1000.0).sin(); }
281 samples
282 }
283
284 #[test]
285 fn test_detect_and_trim_leading_trailing() {
286 let total_samples = 44100 * 3; let audio_start = 44100; let audio_end = 44100 * 2; let samples = generate_test_audio_with_silence(total_samples, audio_start, audio_end, 0.5);
291
292 let detector = SilenceDetector::default();
293 let (trimmed, silence_map) = detect_and_trim(&samples, 44100, detector).unwrap();
294
295 assert!(trimmed.len() < samples.len(), "Should trim some silence");
297 assert!(!trimmed.is_empty(), "Should keep audio content");
298
299 assert!(!silence_map.is_empty(), "Should detect silence regions: {:?}", silence_map);
301 }
302
303 #[test]
304 fn test_detect_and_trim_all_silent() {
305 let samples = vec![0.0f32; 44100];
307
308 let detector = SilenceDetector::default();
309 let (trimmed, _silence_map) = detect_and_trim(&samples, 44100, detector).unwrap();
310
311 assert!(trimmed.is_empty() || trimmed.iter().all(|&x| x.abs() < 1e-6),
313 "All silent audio should be trimmed");
314 }
315
316 #[test]
317 fn test_detect_and_trim_no_silence() {
318 let mut samples = vec![0.0f32; 44100];
320 for i in 0..samples.len() {
321 samples[i] = 0.5 * (i as f32 / 1000.0).sin();
322 }
323
324 let detector = SilenceDetector {
325 threshold_db: -60.0, ..Default::default()
327 };
328 let (trimmed, _silence_map) = detect_and_trim(&samples, 44100, detector).unwrap();
329
330 assert!(trimmed.len() > samples.len() / 2,
332 "Should keep most audio when no silence detected");
333 }
334
335 #[test]
336 fn test_detect_and_trim_invalid_parameters() {
337 let samples = vec![0.5f32; 44100];
338 let detector = SilenceDetector::default();
339
340 let result = detect_and_trim(&samples, 0, detector.clone());
342 assert!(result.is_err());
343
344 let mut bad_detector = detector.clone();
346 bad_detector.frame_size = 0;
347 let result = detect_and_trim(&samples, 44100, bad_detector);
348 assert!(result.is_err());
349 }
350
351 #[test]
352 fn test_detect_and_trim_empty_samples() {
353 let samples = vec![];
354 let detector = SilenceDetector::default();
355 let (trimmed, silence_map) = detect_and_trim(&samples, 44100, detector).unwrap();
356
357 assert!(trimmed.is_empty());
358 assert!(silence_map.is_empty());
359 }
360
361 #[test]
362 fn test_detect_and_trim_threshold_sensitivity() {
363 let mut samples = vec![0.0f32; 44100 * 2];
365 for i in 0..22050 {
367 samples[i] = 0.01; }
369 for i in 22050..44100 {
371 samples[i] = 0.5; }
373
374 let detector_low = SilenceDetector {
376 threshold_db: -60.0,
377 ..Default::default()
378 };
379 let (_, silence_map_low) = detect_and_trim(&samples, 44100, detector_low).unwrap();
380
381 let detector_high = SilenceDetector {
383 threshold_db: -20.0,
384 ..Default::default()
385 };
386 let (_, silence_map_high) = detect_and_trim(&samples, 44100, detector_high).unwrap();
387
388 let total_silence_low: usize = silence_map_low.iter()
390 .map(|(start, end)| end - start)
391 .sum();
392 let total_silence_high: usize = silence_map_high.iter()
393 .map(|(start, end)| end - start)
394 .sum();
395
396 assert!(total_silence_high >= total_silence_low,
397 "Higher threshold should detect more silence");
398 }
399
400 #[test]
401 fn test_detect_and_trim_min_duration() {
402 let mut samples = vec![0.5f32; 44100 * 2];
404 for i in 10000..15000 {
406 samples[i] = 0.0;
407 }
408
409 let detector = SilenceDetector {
410 min_duration_ms: 500, ..Default::default()
412 };
413 let (_, _silence_map) = detect_and_trim(&samples, 44100, detector).unwrap();
414
415 }
419}
420