Skip to main content

oxideav_core/registry/
container.rs

1//! Container traits (demuxer + muxer) and a registry.
2//!
3//! This module defines the abstract [`Demuxer`] / [`Muxer`] traits that
4//! every container implementation (oxideav-mp4, oxideav-mkv,
5//! oxideav-flac, oxideav-ogg, …) fulfils, plus a
6//! [`ContainerRegistry`] that consumers of the framework use to pick a
7//! demuxer by probe bytes or filename hint.
8
9use std::collections::HashMap;
10use std::io::{Read, Seek, SeekFrom, Write};
11
12use crate::{CodecResolver, Error, Packet, Result, StreamInfo};
13
14// ───────────────────────── traits ─────────────────────────
15
16/// Reads a container and emits packets per stream.
17pub trait Demuxer: Send {
18    /// Name of the container format (e.g., `"wav"`).
19    fn format_name(&self) -> &str;
20
21    /// Streams in this container. Stable across the lifetime of the demuxer.
22    fn streams(&self) -> &[StreamInfo];
23
24    /// Read the next packet from any stream. Returns `Error::Eof` at end.
25    fn next_packet(&mut self) -> Result<Packet>;
26
27    /// Hint that only the listed stream indices will be consumed by the
28    /// pipeline. Demuxers that can efficiently skip inactive streams at
29    /// the container level (e.g., MKV cluster-aware, MP4 trak-aware)
30    /// should override this. The default is a no-op — the pipeline
31    /// drops unwanted packets on the floor.
32    fn set_active_streams(&mut self, _indices: &[u32]) {}
33
34    /// Seek to the nearest keyframe at or before `pts` (in the given
35    /// stream's time base). Returns the actual timestamp seeked to, or
36    /// `Error::Unsupported` if this demuxer can't seek.
37    fn seek_to(&mut self, _stream_index: u32, _pts: i64) -> Result<i64> {
38        Err(Error::unsupported("this demuxer does not support seeking"))
39    }
40
41    /// Container-level metadata as ordered (key, value) pairs.
42    /// Keys follow a loose convention borrowed from Vorbis comments:
43    /// `title`, `artist`, `album`, `comment`, `date`, `sample_name:<n>`,
44    /// `channels`, `n_patterns`, etc. Demuxers that carry no metadata
45    /// return an empty slice (the default).
46    fn metadata(&self) -> &[(String, String)] {
47        &[]
48    }
49    /// Container-level duration, if known. Default is `None` — callers
50    /// may fall back to the longest per-stream duration. Expressed as
51    /// microseconds for portability; convert to seconds at the edge.
52    fn duration_micros(&self) -> Option<i64> {
53        None
54    }
55
56    /// Attached pictures (cover art, artist photos, ...) embedded in
57    /// the container. Returns an empty slice (the default) when the
58    /// container carries none or doesn't support them. Containers that
59    /// do — ID3v2 on MP3, `METADATA_BLOCK_PICTURE` on FLAC, `covr`
60    /// atoms on MP4, etc. — override this to expose the images.
61    fn attached_pictures(&self) -> &[crate::AttachedPicture] {
62        &[]
63    }
64
65    /// Structured chapter / cue list. Default returns an empty slice
66    /// for back-compat; demuxers that carry chapters (MKV `Chapters`,
67    /// MP4 chapter track, Ogg `CHAPTERnn=` Vorbis comments, …) should
68    /// override and return [`Chapter`](crate::Chapter) records in
69    /// presentation order. Coexists with the legacy `chapter:N:*`
70    /// flat-metadata keys; new consumers should prefer this.
71    fn chapters(&self) -> &[crate::Chapter] {
72        &[]
73    }
74
75    /// Structured attachment list. Default returns an empty slice for
76    /// back-compat; demuxers that carry attachments (MKV `Attachments`,
77    /// …) should override and return [`Attachment`](crate::Attachment)
78    /// records in container order. Coexists with the legacy
79    /// `attachment:N:*` flat-metadata keys; new consumers should prefer
80    /// this.
81    fn attachments(&self) -> &[crate::Attachment] {
82        &[]
83    }
84}
85
86/// Writes packets into a container.
87pub trait Muxer: Send {
88    fn format_name(&self) -> &str;
89
90    /// Write the container header. Must be called after stream configuration
91    /// and before the first `write_packet`.
92    fn write_header(&mut self) -> Result<()>;
93
94    fn write_packet(&mut self, packet: &Packet) -> Result<()>;
95
96    /// Finalize the file (write index, patch in total sizes, etc.).
97    fn write_trailer(&mut self) -> Result<()>;
98}
99
100/// Factory that tries to open a stream as a particular container format.
101///
102/// Implementations should read the minimum needed to confirm the format and
103/// return `Error::InvalidData` if the stream is not in this format.
104///
105/// The `codecs` parameter carries a resolver that converts container-
106/// level codec tags (FourCCs, WAVEFORMATEX wFormatTag, Matroska
107/// CodecIDs, …) into [`CodecId`](crate::CodecId) values.
108pub type OpenDemuxerFn =
109    fn(input: Box<dyn ReadSeek>, codecs: &dyn CodecResolver) -> Result<Box<dyn Demuxer>>;
110
111/// Factory that creates a muxer for a set of streams.
112pub type OpenMuxerFn =
113    fn(output: Box<dyn WriteSeek>, streams: &[StreamInfo]) -> Result<Box<dyn Muxer>>;
114
115/// Information passed to a content-based [`ProbeFn`].
116///
117/// `buf` holds the first few KB of the input — enough to recognise the
118/// magic bytes of any container we know about. `ext` carries the file
119/// extension as a hint (lowercase, no leading dot); some containers
120/// (raw MP3 with no ID3v2, headerless tracker formats) need it to break
121/// ties with otherwise weak signatures.
122pub struct ProbeData<'a> {
123    pub buf: &'a [u8],
124    pub ext: Option<&'a str>,
125}
126
127/// Confidence score returned by a [`ProbeFn`]. `0` means no match.
128/// Higher means more certain. Conventional values:
129///
130/// * `100` – unambiguous magic bytes at a known offset
131/// * `75`  – signature match corroborated by file extension
132/// * `50`  – signature match without extension corroboration
133/// * `25`  – extension match only (no content signature available)
134pub type ProbeScore = u8;
135
136/// Maximum probe score (alias for `100`).
137pub const MAX_PROBE_SCORE: ProbeScore = 100;
138/// Default score returned when only the file extension matches.
139pub const PROBE_SCORE_EXTENSION: ProbeScore = 25;
140
141/// Content-based format detection function.
142///
143/// Returns a [`ProbeScore`] in `0..=100`. Implementations should be
144/// pure (no I/O, no allocation beyond the stack) and fast — they may
145/// be invoked once per registered demuxer on every input file.
146pub type ContainerProbeFn = fn(probe: &ProbeData) -> ProbeScore;
147
148/// Convenience trait bundle for seekable readers.
149pub trait ReadSeek: Read + Seek + Send {}
150impl<T: Read + Seek + Send> ReadSeek for T {}
151
152/// Convenience trait bundle for seekable writers.
153pub trait WriteSeek: Write + Seek + Send {}
154impl<T: Write + Seek + Send> WriteSeek for T {}
155
156// ───────────────────────── ContainerRegistry ─────────────────────────
157
158#[derive(Default)]
159pub struct ContainerRegistry {
160    demuxers: HashMap<String, OpenDemuxerFn>,
161    muxers: HashMap<String, OpenMuxerFn>,
162    /// Lowercase file extension → container name (e.g. "wav" → "wav").
163    extensions: HashMap<String, String>,
164    /// Container name → content-probe function. Optional — containers
165    /// without a probe still work but require an extension hint or an
166    /// explicit format name.
167    probes: HashMap<String, ContainerProbeFn>,
168}
169
170impl ContainerRegistry {
171    pub fn new() -> Self {
172        Self::default()
173    }
174
175    pub fn register_demuxer(&mut self, name: &str, open: OpenDemuxerFn) {
176        self.demuxers.insert(name.to_owned(), open);
177    }
178
179    pub fn register_muxer(&mut self, name: &str, open: OpenMuxerFn) {
180        self.muxers.insert(name.to_owned(), open);
181    }
182
183    pub fn register_extension(&mut self, ext: &str, container_name: &str) {
184        self.extensions
185            .insert(ext.to_lowercase(), container_name.to_owned());
186    }
187
188    /// Attach a content-based probe to a registered demuxer. Called by
189    /// the registry's [`probe_input`](Self::probe_input) to detect the
190    /// container format from the first few KB of an input stream.
191    pub fn register_probe(&mut self, container_name: &str, probe: ContainerProbeFn) {
192        self.probes.insert(container_name.to_owned(), probe);
193    }
194
195    pub fn demuxer_names(&self) -> impl Iterator<Item = &str> {
196        self.demuxers.keys().map(|s| s.as_str())
197    }
198
199    pub fn muxer_names(&self) -> impl Iterator<Item = &str> {
200        self.muxers.keys().map(|s| s.as_str())
201    }
202
203    /// Open a demuxer explicitly by format name. The `codecs` resolver
204    /// is passed through to the demuxer so it can translate the
205    /// container's in-stream codec tags (FourCCs / wFormatTag /
206    /// Matroska CodecIDs) into [`CodecId`](crate::CodecId)
207    /// values. Demuxers that don't need tag resolution can ignore it.
208    pub fn open_demuxer(
209        &self,
210        name: &str,
211        input: Box<dyn ReadSeek>,
212        codecs: &dyn CodecResolver,
213    ) -> Result<Box<dyn Demuxer>> {
214        let open = self
215            .demuxers
216            .get(name)
217            .ok_or_else(|| Error::FormatNotFound(name.to_owned()))?;
218        open(input, codecs)
219    }
220
221    /// Open a muxer by format name.
222    pub fn open_muxer(
223        &self,
224        name: &str,
225        output: Box<dyn WriteSeek>,
226        streams: &[StreamInfo],
227    ) -> Result<Box<dyn Muxer>> {
228        let open = self
229            .muxers
230            .get(name)
231            .ok_or_else(|| Error::FormatNotFound(name.to_owned()))?;
232        open(output, streams)
233    }
234
235    /// Look up a container name from a file extension (no leading dot).
236    pub fn container_for_extension(&self, ext: &str) -> Option<&str> {
237        self.extensions.get(&ext.to_lowercase()).map(|s| s.as_str())
238    }
239
240    /// Detect the container format by reading the first ~256 KB of the
241    /// input, scoring each registered probe, and returning the highest-
242    /// scoring container's name. The extension is passed to probes as a
243    /// hint — they may use it to break ties when their signature is weak.
244    ///
245    /// Falls back to the extension table if no probe scores above zero.
246    /// The input cursor is restored to its starting position on success
247    /// and on the I/O failure paths that allow it.
248    pub fn probe_input(&self, input: &mut dyn ReadSeek, ext_hint: Option<&str>) -> Result<String> {
249        const PROBE_BUF_SIZE: usize = 256 * 1024;
250
251        let saved_pos = input.stream_position()?;
252        input.seek(SeekFrom::Start(0))?;
253        let mut buf = vec![0u8; PROBE_BUF_SIZE];
254        let mut got = 0;
255        while got < buf.len() {
256            match input.read(&mut buf[got..]) {
257                Ok(0) => break,
258                Ok(n) => got += n,
259                Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
260                Err(e) => {
261                    let _ = input.seek(SeekFrom::Start(saved_pos));
262                    return Err(e.into());
263                }
264            }
265        }
266        buf.truncate(got);
267        input.seek(SeekFrom::Start(saved_pos))?;
268
269        let ext_lower = ext_hint.map(|s| s.to_ascii_lowercase());
270        let probe_data = ProbeData {
271            buf: &buf,
272            ext: ext_lower.as_deref(),
273        };
274
275        let mut best: Option<(&str, ProbeScore)> = None;
276        for (name, probe) in &self.probes {
277            let score = probe(&probe_data);
278            if score == 0 {
279                continue;
280            }
281            match best {
282                Some((_, prev)) if score <= prev => {}
283                _ => best = Some((name.as_str(), score)),
284            }
285        }
286        if let Some((name, _)) = best {
287            return Ok(name.to_owned());
288        }
289
290        // Fall back to extension lookup with the conventional weak score.
291        if let Some(ext) = ext_hint {
292            if let Some(name) = self.container_for_extension(ext) {
293                let _ = PROBE_SCORE_EXTENSION; // export retained for symmetry
294                return Ok(name.to_owned());
295            }
296        }
297
298        Err(Error::FormatNotFound(
299            "no registered demuxer recognises this input".into(),
300        ))
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307
308    struct DummyDemuxer;
309
310    impl Demuxer for DummyDemuxer {
311        fn format_name(&self) -> &str {
312            "dummy"
313        }
314        fn streams(&self) -> &[StreamInfo] {
315            &[]
316        }
317        fn next_packet(&mut self) -> Result<Packet> {
318            Err(Error::Eof)
319        }
320    }
321
322    #[test]
323    fn default_seek_to_is_unsupported() {
324        let mut d = DummyDemuxer;
325        match d.seek_to(0, 0) {
326            Err(Error::Unsupported(_)) => {}
327            other => panic!(
328                "expected default seek_to to return Unsupported, got {:?}",
329                other
330            ),
331        }
332    }
333
334    #[test]
335    fn default_chapters_and_attachments_are_empty() {
336        // A demuxer that overrides nothing must compile and return
337        // empty slices for both structured accessors. This is the
338        // back-compat contract that lets every existing demuxer pick
339        // up the new API without source changes.
340        let d = DummyDemuxer;
341        assert!(d.chapters().is_empty());
342        assert!(d.attachments().is_empty());
343        assert!(d.attached_pictures().is_empty());
344        assert!(d.metadata().is_empty());
345        assert_eq!(d.duration_micros(), None);
346    }
347}