Skip to main content

rskit_media_audio/
lib.rs

1//! Pure Rust audio processing — no FFmpeg dependency.
2//!
3//! Provides lightweight audio analysis and processing for common tasks:
4//! - WAV file reading/writing
5//! - Waveform generation (peak / RMS)
6//! - Silence detection
7//! - Loudness measurement (peak, RMS, EBU R128 approximation)
8//! - Volume adjustment and fade effects
9//!
10//! For complex operations (encoding, format conversion, filters) use
11//! [`rskit-media-ffmpeg`](../rskit_media_ffmpeg) instead.
12
13#![warn(missing_docs)]
14
15mod loudness;
16mod silence;
17mod wav;
18mod waveform;
19
20use std::collections::HashMap;
21use std::sync::Arc;
22use std::time::Duration;
23
24use rskit_errors::{AppError, AppResult, ErrorCode};
25use rskit_media::{
26    AudioTrackInfo, ChannelLayout, Codec, Format, MediaMetadata, MediaProbe, MediaType, Registry,
27    Resolution, SampleRate, SilenceInterval, Timestamp, Track, TrackKind, codec, format,
28};
29use rskit_storage::FileSource;
30use tokio::io::AsyncReadExt;
31
32use crate::loudness::LoudnessMeter;
33use crate::silence::{SilenceConfig, detect_silence};
34use crate::wav::WavReader;
35use crate::waveform::{WaveformConfig, generate_waveform};
36
37/// Configuration for the pure Rust audio backend.
38#[derive(Debug, Clone)]
39pub struct Config {
40    /// Maximum source size read into memory while probing.
41    pub max_probe_bytes: u64,
42    /// Number of waveform bins summarized into metadata tags during probing.
43    pub metadata_waveform_bins: usize,
44}
45
46impl Default for Config {
47    fn default() -> Self {
48        Self {
49            max_probe_bytes: 64 * 1024 * 1024,
50            metadata_waveform_bins: 20,
51        }
52    }
53}
54
55impl Config {
56    /// Override the maximum source size read into memory while probing.
57    #[must_use]
58    pub fn with_max_probe_bytes(mut self, max_probe_bytes: u64) -> Self {
59        self.max_probe_bytes = max_probe_bytes;
60        self
61    }
62
63    /// Override the waveform bin count summarized into metadata tags.
64    #[must_use]
65    pub fn with_metadata_waveform_bins(mut self, metadata_waveform_bins: usize) -> Self {
66        self.metadata_waveform_bins = metadata_waveform_bins;
67        self
68    }
69}
70
71/// Register the audio backend.
72pub fn register(registry: &mut Registry, config: Config) -> AppResult<()> {
73    let config = Arc::new(config);
74    registry.register_probe(
75        "audio",
76        Arc::new(move || {
77            Ok(Arc::new(AudioProbe {
78                config: Arc::clone(&config),
79            }))
80        }),
81    )
82}
83
84struct AudioProbe {
85    config: Arc<Config>,
86}
87
88#[async_trait::async_trait]
89impl MediaProbe for AudioProbe {
90    async fn probe(&self, source: &FileSource) -> AppResult<MediaMetadata> {
91        let wav = self.read_wav(source).await?;
92        Ok(metadata_for_wav(&wav, self.config.metadata_waveform_bins))
93    }
94
95    async fn thumbnail(
96        &self,
97        _source: &FileSource,
98        _at: Timestamp,
99        _resolution: Option<Resolution>,
100    ) -> AppResult<FileSource> {
101        unsupported("audio thumbnail extraction is not supported by the pure Rust audio backend")
102    }
103
104    async fn thumbnails(
105        &self,
106        _source: &FileSource,
107        _interval: Duration,
108        _resolution: Option<Resolution>,
109    ) -> AppResult<Vec<FileSource>> {
110        unsupported("audio thumbnail extraction is not supported by the pure Rust audio backend")
111    }
112
113    async fn silence_detect(
114        &self,
115        source: &FileSource,
116        min_duration: Duration,
117        noise_threshold_db: f64,
118    ) -> AppResult<Vec<SilenceInterval>> {
119        let wav = self.read_wav(source).await?;
120        let threshold = 10f64.powf(noise_threshold_db / 20.0) as f32;
121        let config = SilenceConfig {
122            threshold,
123            min_duration_secs: min_duration.as_secs_f64(),
124        };
125
126        Ok(detect_silence(&wav, &config)
127            .into_iter()
128            .map(|region| SilenceInterval {
129                start: Timestamp::from_seconds(region.start_secs),
130                end: Timestamp::from_seconds(region.end_secs),
131                duration: Duration::from_secs_f64(region.duration_secs()),
132            })
133            .collect())
134    }
135}
136
137impl AudioProbe {
138    async fn read_wav(&self, source: &FileSource) -> AppResult<WavReader> {
139        let data = read_bounded(source, self.config.max_probe_bytes).await?;
140        WavReader::from_bytes(&data)
141    }
142}
143
144async fn read_bounded(source: &FileSource, max_bytes: u64) -> AppResult<Vec<u8>> {
145    let mut reader = source.reader().await?.take(max_bytes.saturating_add(1));
146    let capacity = usize::try_from(max_bytes.min(1024 * 1024)).map_err(|_| {
147        AppError::new(
148            ErrorCode::InvalidInput,
149            "audio probe byte limit does not fit in memory",
150        )
151    })?;
152    let mut data = Vec::with_capacity(capacity);
153    reader.read_to_end(&mut data).await.map_err(|error| {
154        AppError::new(
155            ErrorCode::Internal,
156            format!("failed to read audio source: {error}"),
157        )
158    })?;
159    if data.len() as u64 > max_bytes {
160        return Err(AppError::new(
161            ErrorCode::InvalidInput,
162            format!("audio source exceeds probe limit of {max_bytes} bytes"),
163        ));
164    }
165    Ok(data)
166}
167
168fn metadata_for_wav(wav: &WavReader, waveform_bins: usize) -> MediaMetadata {
169    let duration = Duration::from_secs_f64(wav.duration_secs());
170    let channels = channel_layout(wav.spec.channels);
171    let bitrate = u64::from(wav.spec.sample_rate)
172        .saturating_mul(u64::from(wav.spec.channels))
173        .saturating_mul(u64::from(wav.spec.bits_per_sample));
174    let loudness = LoudnessMeter::measure(wav);
175    let waveform = generate_waveform(
176        wav,
177        &WaveformConfig {
178            bins: waveform_bins,
179            channel: None,
180        },
181    );
182
183    let mut tags = HashMap::new();
184    tags.insert("audio.peak".to_owned(), loudness.peak.to_string());
185    tags.insert("audio.peak_db".to_owned(), loudness.peak_db.to_string());
186    tags.insert("audio.rms".to_owned(), loudness.rms.to_string());
187    tags.insert("audio.rms_db".to_owned(), loudness.rms_db.to_string());
188    tags.insert("audio.lufs".to_owned(), loudness.lufs.to_string());
189    tags.insert("audio.waveform_bins".to_owned(), waveform.len().to_string());
190    if let Some(max_peak) = waveform.iter().map(|point| point.peak).reduce(f32::max) {
191        tags.insert("audio.waveform_peak".to_owned(), max_peak.to_string());
192    }
193    if let Some(max_rms) = waveform.iter().map(|point| point.rms).reduce(f32::max) {
194        tags.insert("audio.waveform_rms".to_owned(), max_rms.to_string());
195    }
196    if let Some(min_sample) = waveform.iter().map(|point| point.min).reduce(f32::min) {
197        tags.insert("audio.waveform_min".to_owned(), min_sample.to_string());
198    }
199    if let Some(max_sample) = waveform.iter().map(|point| point.max).reduce(f32::max) {
200        tags.insert("audio.waveform_max".to_owned(), max_sample.to_string());
201    }
202
203    MediaMetadata {
204        media_type: MediaType::Audio,
205        format: Format::new(format::WAV),
206        duration: Some(duration),
207        size: None,
208        bitrate: Some(bitrate),
209        tracks: vec![Track {
210            index: 0,
211            kind: TrackKind::Audio,
212            codec: Some(Codec::new(codec::audio::PCM)),
213            bitrate: Some(bitrate),
214            language: None,
215            is_default: true,
216            title: None,
217            duration: Some(duration),
218            video: None,
219            audio: Some(AudioTrackInfo {
220                sample_rate: SampleRate::hz(wav.spec.sample_rate),
221                channels,
222                bit_depth: Some(wav.spec.bits_per_sample as u8),
223            }),
224            subtitle: None,
225        }],
226        tags,
227        created_at: None,
228    }
229}
230
231fn channel_layout(channels: u16) -> ChannelLayout {
232    match channels {
233        1 => ChannelLayout::Mono,
234        2 => ChannelLayout::Stereo,
235        6 => ChannelLayout::Surround51,
236        8 => ChannelLayout::Surround71,
237        channels => ChannelLayout::Custom(channels),
238    }
239}
240
241fn unsupported<T>(message: &'static str) -> AppResult<T> {
242    Err(AppError::new(ErrorCode::InvalidInput, message))
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248    use rskit_errors::ErrorCode;
249
250    #[test]
251    fn config_builders_and_channel_layouts_are_deterministic() {
252        let config = Config::default()
253            .with_max_probe_bytes(128)
254            .with_metadata_waveform_bins(4);
255
256        assert_eq!(config.max_probe_bytes, 128);
257        assert_eq!(config.metadata_waveform_bins, 4);
258        assert_eq!(channel_layout(1), ChannelLayout::Mono);
259        assert_eq!(channel_layout(2), ChannelLayout::Stereo);
260        assert_eq!(channel_layout(6), ChannelLayout::Surround51);
261        assert_eq!(channel_layout(8), ChannelLayout::Surround71);
262        assert_eq!(channel_layout(3), ChannelLayout::Custom(3));
263    }
264
265    #[test]
266    fn unsupported_returns_invalid_input() {
267        let err = unsupported::<()>("not supported").unwrap_err();
268
269        assert_eq!(err.code(), ErrorCode::InvalidInput);
270        assert!(err.message().contains("not supported"));
271    }
272
273    #[tokio::test]
274    async fn read_bounded_rejects_sources_over_limit() {
275        let source = FileSource::Bytes(bytes::Bytes::from_static(b"abcdef"));
276
277        let err = read_bounded(&source, 3).await.unwrap_err();
278
279        assert_eq!(err.code(), ErrorCode::InvalidInput);
280        assert!(err.message().contains("exceeds probe limit"));
281    }
282
283    #[test]
284    fn metadata_for_wav_handles_custom_channels_and_empty_waveform() {
285        let wav = WavReader {
286            spec: wav::WavSpec {
287                channels: 3,
288                sample_rate: 48_000,
289                bits_per_sample: 16,
290            },
291            samples: vec![0.0; 9],
292        };
293
294        let metadata = metadata_for_wav(&wav, 0);
295
296        assert_eq!(metadata.media_type, MediaType::Audio);
297        assert_eq!(metadata.bitrate, Some(48_000 * 3 * 16));
298        assert_eq!(
299            metadata.tags.get("audio.waveform_bins").map(String::as_str),
300            Some("0")
301        );
302        assert!(!metadata.tags.contains_key("audio.waveform_peak"));
303        let track = metadata.tracks.first().unwrap();
304        assert_eq!(
305            track.audio.as_ref().map(|audio| audio.channels),
306            Some(ChannelLayout::Custom(3))
307        );
308    }
309}