1#![warn(missing_docs)]
14
15mod loudness;
16mod silence;
17mod wav;
18mod waveform;
19
20use std::collections::HashMap;
21use std::sync::Arc;
22use std::time::Duration;
23
24use rskit_errors::{AppError, AppResult, ErrorCode};
25use rskit_media::{
26 AudioTrackInfo, ChannelLayout, Codec, Format, MediaMetadata, MediaProbe, MediaType, Registry,
27 Resolution, SampleRate, SilenceInterval, Timestamp, Track, TrackKind, codec, format,
28};
29use rskit_storage::FileSource;
30use tokio::io::AsyncReadExt;
31
32use crate::loudness::LoudnessMeter;
33use crate::silence::{SilenceConfig, detect_silence};
34use crate::wav::WavReader;
35use crate::waveform::{WaveformConfig, generate_waveform};
36
37#[derive(Debug, Clone)]
39pub struct Config {
40 pub max_probe_bytes: u64,
42 pub metadata_waveform_bins: usize,
44}
45
46impl Default for Config {
47 fn default() -> Self {
48 Self {
49 max_probe_bytes: 64 * 1024 * 1024,
50 metadata_waveform_bins: 20,
51 }
52 }
53}
54
55impl Config {
56 #[must_use]
58 pub fn with_max_probe_bytes(mut self, max_probe_bytes: u64) -> Self {
59 self.max_probe_bytes = max_probe_bytes;
60 self
61 }
62
63 #[must_use]
65 pub fn with_metadata_waveform_bins(mut self, metadata_waveform_bins: usize) -> Self {
66 self.metadata_waveform_bins = metadata_waveform_bins;
67 self
68 }
69}
70
71pub fn register(registry: &mut Registry, config: Config) -> AppResult<()> {
73 let config = Arc::new(config);
74 registry.register_probe(
75 "audio",
76 Arc::new(move || {
77 Ok(Arc::new(AudioProbe {
78 config: Arc::clone(&config),
79 }))
80 }),
81 )
82}
83
84struct AudioProbe {
85 config: Arc<Config>,
86}
87
88#[async_trait::async_trait]
89impl MediaProbe for AudioProbe {
90 async fn probe(&self, source: &FileSource) -> AppResult<MediaMetadata> {
91 let wav = self.read_wav(source).await?;
92 Ok(metadata_for_wav(&wav, self.config.metadata_waveform_bins))
93 }
94
95 async fn thumbnail(
96 &self,
97 _source: &FileSource,
98 _at: Timestamp,
99 _resolution: Option<Resolution>,
100 ) -> AppResult<FileSource> {
101 unsupported("audio thumbnail extraction is not supported by the pure Rust audio backend")
102 }
103
104 async fn thumbnails(
105 &self,
106 _source: &FileSource,
107 _interval: Duration,
108 _resolution: Option<Resolution>,
109 ) -> AppResult<Vec<FileSource>> {
110 unsupported("audio thumbnail extraction is not supported by the pure Rust audio backend")
111 }
112
113 async fn silence_detect(
114 &self,
115 source: &FileSource,
116 min_duration: Duration,
117 noise_threshold_db: f64,
118 ) -> AppResult<Vec<SilenceInterval>> {
119 let wav = self.read_wav(source).await?;
120 let threshold = 10f64.powf(noise_threshold_db / 20.0) as f32;
121 let config = SilenceConfig {
122 threshold,
123 min_duration_secs: min_duration.as_secs_f64(),
124 };
125
126 Ok(detect_silence(&wav, &config)
127 .into_iter()
128 .map(|region| SilenceInterval {
129 start: Timestamp::from_seconds(region.start_secs),
130 end: Timestamp::from_seconds(region.end_secs),
131 duration: Duration::from_secs_f64(region.duration_secs()),
132 })
133 .collect())
134 }
135}
136
137impl AudioProbe {
138 async fn read_wav(&self, source: &FileSource) -> AppResult<WavReader> {
139 let data = read_bounded(source, self.config.max_probe_bytes).await?;
140 WavReader::from_bytes(&data)
141 }
142}
143
144async fn read_bounded(source: &FileSource, max_bytes: u64) -> AppResult<Vec<u8>> {
145 let mut reader = source.reader().await?.take(max_bytes.saturating_add(1));
146 let capacity = usize::try_from(max_bytes.min(1024 * 1024)).map_err(|_| {
147 AppError::new(
148 ErrorCode::InvalidInput,
149 "audio probe byte limit does not fit in memory",
150 )
151 })?;
152 let mut data = Vec::with_capacity(capacity);
153 reader.read_to_end(&mut data).await.map_err(|error| {
154 AppError::new(
155 ErrorCode::Internal,
156 format!("failed to read audio source: {error}"),
157 )
158 })?;
159 if data.len() as u64 > max_bytes {
160 return Err(AppError::new(
161 ErrorCode::InvalidInput,
162 format!("audio source exceeds probe limit of {max_bytes} bytes"),
163 ));
164 }
165 Ok(data)
166}
167
168fn metadata_for_wav(wav: &WavReader, waveform_bins: usize) -> MediaMetadata {
169 let duration = Duration::from_secs_f64(wav.duration_secs());
170 let channels = channel_layout(wav.spec.channels);
171 let bitrate = u64::from(wav.spec.sample_rate)
172 .saturating_mul(u64::from(wav.spec.channels))
173 .saturating_mul(u64::from(wav.spec.bits_per_sample));
174 let loudness = LoudnessMeter::measure(wav);
175 let waveform = generate_waveform(
176 wav,
177 &WaveformConfig {
178 bins: waveform_bins,
179 channel: None,
180 },
181 );
182
183 let mut tags = HashMap::new();
184 tags.insert("audio.peak".to_owned(), loudness.peak.to_string());
185 tags.insert("audio.peak_db".to_owned(), loudness.peak_db.to_string());
186 tags.insert("audio.rms".to_owned(), loudness.rms.to_string());
187 tags.insert("audio.rms_db".to_owned(), loudness.rms_db.to_string());
188 tags.insert("audio.lufs".to_owned(), loudness.lufs.to_string());
189 tags.insert("audio.waveform_bins".to_owned(), waveform.len().to_string());
190 if let Some(max_peak) = waveform.iter().map(|point| point.peak).reduce(f32::max) {
191 tags.insert("audio.waveform_peak".to_owned(), max_peak.to_string());
192 }
193 if let Some(max_rms) = waveform.iter().map(|point| point.rms).reduce(f32::max) {
194 tags.insert("audio.waveform_rms".to_owned(), max_rms.to_string());
195 }
196 if let Some(min_sample) = waveform.iter().map(|point| point.min).reduce(f32::min) {
197 tags.insert("audio.waveform_min".to_owned(), min_sample.to_string());
198 }
199 if let Some(max_sample) = waveform.iter().map(|point| point.max).reduce(f32::max) {
200 tags.insert("audio.waveform_max".to_owned(), max_sample.to_string());
201 }
202
203 MediaMetadata {
204 media_type: MediaType::Audio,
205 format: Format::new(format::WAV),
206 duration: Some(duration),
207 size: None,
208 bitrate: Some(bitrate),
209 tracks: vec![Track {
210 index: 0,
211 kind: TrackKind::Audio,
212 codec: Some(Codec::new(codec::audio::PCM)),
213 bitrate: Some(bitrate),
214 language: None,
215 is_default: true,
216 title: None,
217 duration: Some(duration),
218 video: None,
219 audio: Some(AudioTrackInfo {
220 sample_rate: SampleRate::hz(wav.spec.sample_rate),
221 channels,
222 bit_depth: Some(wav.spec.bits_per_sample as u8),
223 }),
224 subtitle: None,
225 }],
226 tags,
227 created_at: None,
228 }
229}
230
231fn channel_layout(channels: u16) -> ChannelLayout {
232 match channels {
233 1 => ChannelLayout::Mono,
234 2 => ChannelLayout::Stereo,
235 6 => ChannelLayout::Surround51,
236 8 => ChannelLayout::Surround71,
237 channels => ChannelLayout::Custom(channels),
238 }
239}
240
241fn unsupported<T>(message: &'static str) -> AppResult<T> {
242 Err(AppError::new(ErrorCode::InvalidInput, message))
243}
244
245#[cfg(test)]
246mod tests {
247 use super::*;
248 use rskit_errors::ErrorCode;
249
250 #[test]
251 fn config_builders_and_channel_layouts_are_deterministic() {
252 let config = Config::default()
253 .with_max_probe_bytes(128)
254 .with_metadata_waveform_bins(4);
255
256 assert_eq!(config.max_probe_bytes, 128);
257 assert_eq!(config.metadata_waveform_bins, 4);
258 assert_eq!(channel_layout(1), ChannelLayout::Mono);
259 assert_eq!(channel_layout(2), ChannelLayout::Stereo);
260 assert_eq!(channel_layout(6), ChannelLayout::Surround51);
261 assert_eq!(channel_layout(8), ChannelLayout::Surround71);
262 assert_eq!(channel_layout(3), ChannelLayout::Custom(3));
263 }
264
265 #[test]
266 fn unsupported_returns_invalid_input() {
267 let err = unsupported::<()>("not supported").unwrap_err();
268
269 assert_eq!(err.code(), ErrorCode::InvalidInput);
270 assert!(err.message().contains("not supported"));
271 }
272
273 #[tokio::test]
274 async fn read_bounded_rejects_sources_over_limit() {
275 let source = FileSource::Bytes(bytes::Bytes::from_static(b"abcdef"));
276
277 let err = read_bounded(&source, 3).await.unwrap_err();
278
279 assert_eq!(err.code(), ErrorCode::InvalidInput);
280 assert!(err.message().contains("exceeds probe limit"));
281 }
282
283 #[test]
284 fn metadata_for_wav_handles_custom_channels_and_empty_waveform() {
285 let wav = WavReader {
286 spec: wav::WavSpec {
287 channels: 3,
288 sample_rate: 48_000,
289 bits_per_sample: 16,
290 },
291 samples: vec![0.0; 9],
292 };
293
294 let metadata = metadata_for_wav(&wav, 0);
295
296 assert_eq!(metadata.media_type, MediaType::Audio);
297 assert_eq!(metadata.bitrate, Some(48_000 * 3 * 16));
298 assert_eq!(
299 metadata.tags.get("audio.waveform_bins").map(String::as_str),
300 Some("0")
301 );
302 assert!(!metadata.tags.contains_key("audio.waveform_peak"));
303 let track = metadata.tracks.first().unwrap();
304 assert_eq!(
305 track.audio.as_ref().map(|audio| audio.channels),
306 Some(ChannelLayout::Custom(3))
307 );
308 }
309}