oxideav_core/registry/container.rs
1//! Container traits (demuxer + muxer) and a registry.
2//!
3//! This module defines the abstract [`Demuxer`] / [`Muxer`] traits that
4//! every container implementation (oxideav-mp4, oxideav-mkv,
5//! oxideav-flac, oxideav-ogg, …) fulfils, plus a
6//! [`ContainerRegistry`] that consumers of the framework use to pick a
7//! demuxer by probe bytes or filename hint.
8
9use std::collections::HashMap;
10use std::io::{Read, Seek, SeekFrom, Write};
11
12use crate::{CodecResolver, Error, Packet, Result, StreamInfo};
13
14// ───────────────────────── traits ─────────────────────────
15
16/// Reads a container and emits packets per stream.
17pub trait Demuxer: Send {
18 /// Name of the container format (e.g., `"wav"`).
19 fn format_name(&self) -> &str;
20
21 /// Streams in this container. Stable across the lifetime of the demuxer.
22 fn streams(&self) -> &[StreamInfo];
23
24 /// Read the next packet from any stream. Returns `Error::Eof` at end.
25 fn next_packet(&mut self) -> Result<Packet>;
26
27 /// Hint that only the listed stream indices will be consumed by the
28 /// pipeline. Demuxers that can efficiently skip inactive streams at
29 /// the container level (e.g., MKV cluster-aware, MP4 trak-aware)
30 /// should override this. The default is a no-op — the pipeline
31 /// drops unwanted packets on the floor.
32 fn set_active_streams(&mut self, _indices: &[u32]) {}
33
34 /// Seek to the nearest keyframe at or before `pts` (in the given
35 /// stream's time base). Returns the actual timestamp seeked to, or
36 /// `Error::Unsupported` if this demuxer can't seek.
37 fn seek_to(&mut self, _stream_index: u32, _pts: i64) -> Result<i64> {
38 Err(Error::unsupported("this demuxer does not support seeking"))
39 }
40
41 /// Container-level metadata as ordered (key, value) pairs.
42 /// Keys follow a loose convention borrowed from Vorbis comments:
43 /// `title`, `artist`, `album`, `comment`, `date`, `sample_name:<n>`,
44 /// `channels`, `n_patterns`, etc. Demuxers that carry no metadata
45 /// return an empty slice (the default).
46 fn metadata(&self) -> &[(String, String)] {
47 &[]
48 }
49 /// Container-level duration, if known. Default is `None` — callers
50 /// may fall back to the longest per-stream duration. Expressed as
51 /// microseconds for portability; convert to seconds at the edge.
52 fn duration_micros(&self) -> Option<i64> {
53 None
54 }
55
56 /// Attached pictures (cover art, artist photos, ...) embedded in
57 /// the container. Returns an empty slice (the default) when the
58 /// container carries none or doesn't support them. Containers that
59 /// do — ID3v2 on MP3, `METADATA_BLOCK_PICTURE` on FLAC, `covr`
60 /// atoms on MP4, etc. — override this to expose the images.
61 fn attached_pictures(&self) -> &[crate::AttachedPicture] {
62 &[]
63 }
64
65 /// Structured chapter / cue list. Default returns an empty slice
66 /// for back-compat; demuxers that carry chapters (MKV `Chapters`,
67 /// MP4 chapter track, Ogg `CHAPTERnn=` Vorbis comments, …) should
68 /// override and return [`Chapter`](crate::Chapter) records in
69 /// presentation order. Coexists with the legacy `chapter:N:*`
70 /// flat-metadata keys; new consumers should prefer this.
71 fn chapters(&self) -> &[crate::Chapter] {
72 &[]
73 }
74
75 /// Structured attachment list. Default returns an empty slice for
76 /// back-compat; demuxers that carry attachments (MKV `Attachments`,
77 /// …) should override and return [`Attachment`](crate::Attachment)
78 /// records in container order. Coexists with the legacy
79 /// `attachment:N:*` flat-metadata keys; new consumers should prefer
80 /// this.
81 fn attachments(&self) -> &[crate::Attachment] {
82 &[]
83 }
84}
85
86/// Writes packets into a container.
87pub trait Muxer: Send {
88 fn format_name(&self) -> &str;
89
90 /// Write the container header. Must be called after stream configuration
91 /// and before the first `write_packet`.
92 fn write_header(&mut self) -> Result<()>;
93
94 fn write_packet(&mut self, packet: &Packet) -> Result<()>;
95
96 /// Finalize the file (write index, patch in total sizes, etc.).
97 fn write_trailer(&mut self) -> Result<()>;
98}
99
100/// Factory that tries to open a stream as a particular container format.
101///
102/// Implementations should read the minimum needed to confirm the format and
103/// return `Error::InvalidData` if the stream is not in this format.
104///
105/// The `codecs` parameter carries a resolver that converts container-
106/// level codec tags (FourCCs, WAVEFORMATEX wFormatTag, Matroska
107/// CodecIDs, …) into [`CodecId`](crate::CodecId) values.
108pub type OpenDemuxerFn =
109 fn(input: Box<dyn ReadSeek>, codecs: &dyn CodecResolver) -> Result<Box<dyn Demuxer>>;
110
111/// Factory that creates a muxer for a set of streams.
112pub type OpenMuxerFn =
113 fn(output: Box<dyn WriteSeek>, streams: &[StreamInfo]) -> Result<Box<dyn Muxer>>;
114
115/// Information passed to a content-based [`ProbeFn`].
116///
117/// `buf` holds the first few KB of the input — enough to recognise the
118/// magic bytes of any container we know about. `ext` carries the file
119/// extension as a hint (lowercase, no leading dot); some containers
120/// (raw MP3 with no ID3v2, headerless tracker formats) need it to break
121/// ties with otherwise weak signatures.
122pub struct ProbeData<'a> {
123 pub buf: &'a [u8],
124 pub ext: Option<&'a str>,
125}
126
127/// Confidence score returned by a [`ProbeFn`]. `0` means no match.
128/// Higher means more certain. Conventional values:
129///
130/// * `100` – unambiguous magic bytes at a known offset
131/// * `75` – signature match corroborated by file extension
132/// * `50` – signature match without extension corroboration
133/// * `25` – extension match only (no content signature available)
134pub type ProbeScore = u8;
135
136/// Maximum probe score (alias for `100`).
137pub const MAX_PROBE_SCORE: ProbeScore = 100;
138/// Default score returned when only the file extension matches.
139pub const PROBE_SCORE_EXTENSION: ProbeScore = 25;
140
141/// Content-based format detection function.
142///
143/// Returns a [`ProbeScore`] in `0..=100`. Implementations should be
144/// pure (no I/O, no allocation beyond the stack) and fast — they may
145/// be invoked once per registered demuxer on every input file.
146pub type ContainerProbeFn = fn(probe: &ProbeData) -> ProbeScore;
147
148/// Convenience trait bundle for seekable readers.
149pub trait ReadSeek: Read + Seek + Send {}
150impl<T: Read + Seek + Send> ReadSeek for T {}
151
152/// Convenience trait bundle for seekable writers.
153pub trait WriteSeek: Write + Seek + Send {}
154impl<T: Write + Seek + Send> WriteSeek for T {}
155
156// ───────────────────────── ContainerRegistry ─────────────────────────
157
158#[derive(Default)]
159pub struct ContainerRegistry {
160 demuxers: HashMap<String, OpenDemuxerFn>,
161 muxers: HashMap<String, OpenMuxerFn>,
162 /// Lowercase file extension → container name (e.g. "wav" → "wav").
163 extensions: HashMap<String, String>,
164 /// Container name → content-probe function. Optional — containers
165 /// without a probe still work but require an extension hint or an
166 /// explicit format name.
167 probes: HashMap<String, ContainerProbeFn>,
168}
169
170impl ContainerRegistry {
171 pub fn new() -> Self {
172 Self::default()
173 }
174
175 pub fn register_demuxer(&mut self, name: &str, open: OpenDemuxerFn) {
176 self.demuxers.insert(name.to_owned(), open);
177 }
178
179 pub fn register_muxer(&mut self, name: &str, open: OpenMuxerFn) {
180 self.muxers.insert(name.to_owned(), open);
181 }
182
183 pub fn register_extension(&mut self, ext: &str, container_name: &str) {
184 self.extensions
185 .insert(ext.to_lowercase(), container_name.to_owned());
186 }
187
188 /// Attach a content-based probe to a registered demuxer. Called by
189 /// the registry's [`probe_input`](Self::probe_input) to detect the
190 /// container format from the first few KB of an input stream.
191 pub fn register_probe(&mut self, container_name: &str, probe: ContainerProbeFn) {
192 self.probes.insert(container_name.to_owned(), probe);
193 }
194
195 pub fn demuxer_names(&self) -> impl Iterator<Item = &str> {
196 self.demuxers.keys().map(|s| s.as_str())
197 }
198
199 pub fn muxer_names(&self) -> impl Iterator<Item = &str> {
200 self.muxers.keys().map(|s| s.as_str())
201 }
202
203 /// Open a demuxer explicitly by format name. The `codecs` resolver
204 /// is passed through to the demuxer so it can translate the
205 /// container's in-stream codec tags (FourCCs / wFormatTag /
206 /// Matroska CodecIDs) into [`CodecId`](crate::CodecId)
207 /// values. Demuxers that don't need tag resolution can ignore it.
208 pub fn open_demuxer(
209 &self,
210 name: &str,
211 input: Box<dyn ReadSeek>,
212 codecs: &dyn CodecResolver,
213 ) -> Result<Box<dyn Demuxer>> {
214 let open = self
215 .demuxers
216 .get(name)
217 .ok_or_else(|| Error::FormatNotFound(name.to_owned()))?;
218 open(input, codecs)
219 }
220
221 /// Open a muxer by format name.
222 pub fn open_muxer(
223 &self,
224 name: &str,
225 output: Box<dyn WriteSeek>,
226 streams: &[StreamInfo],
227 ) -> Result<Box<dyn Muxer>> {
228 let open = self
229 .muxers
230 .get(name)
231 .ok_or_else(|| Error::FormatNotFound(name.to_owned()))?;
232 open(output, streams)
233 }
234
235 /// Look up a container name from a file extension (no leading dot).
236 pub fn container_for_extension(&self, ext: &str) -> Option<&str> {
237 self.extensions.get(&ext.to_lowercase()).map(|s| s.as_str())
238 }
239
240 /// Detect the container format by reading the first ~256 KB of the
241 /// input, scoring each registered probe, and returning the highest-
242 /// scoring container's name. The extension is passed to probes as a
243 /// hint — they may use it to break ties when their signature is weak.
244 ///
245 /// Falls back to the extension table if no probe scores above zero.
246 /// The input cursor is restored to its starting position on success
247 /// and on the I/O failure paths that allow it.
248 pub fn probe_input(&self, input: &mut dyn ReadSeek, ext_hint: Option<&str>) -> Result<String> {
249 const PROBE_BUF_SIZE: usize = 256 * 1024;
250
251 let saved_pos = input.stream_position()?;
252 input.seek(SeekFrom::Start(0))?;
253 let mut buf = vec![0u8; PROBE_BUF_SIZE];
254 let mut got = 0;
255 while got < buf.len() {
256 match input.read(&mut buf[got..]) {
257 Ok(0) => break,
258 Ok(n) => got += n,
259 Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
260 Err(e) => {
261 let _ = input.seek(SeekFrom::Start(saved_pos));
262 return Err(e.into());
263 }
264 }
265 }
266 buf.truncate(got);
267 input.seek(SeekFrom::Start(saved_pos))?;
268
269 let ext_lower = ext_hint.map(|s| s.to_ascii_lowercase());
270 let probe_data = ProbeData {
271 buf: &buf,
272 ext: ext_lower.as_deref(),
273 };
274
275 let mut best: Option<(&str, ProbeScore)> = None;
276 for (name, probe) in &self.probes {
277 let score = probe(&probe_data);
278 if score == 0 {
279 continue;
280 }
281 match best {
282 Some((_, prev)) if score <= prev => {}
283 _ => best = Some((name.as_str(), score)),
284 }
285 }
286 if let Some((name, _)) = best {
287 return Ok(name.to_owned());
288 }
289
290 // Fall back to extension lookup with the conventional weak score.
291 if let Some(ext) = ext_hint {
292 if let Some(name) = self.container_for_extension(ext) {
293 let _ = PROBE_SCORE_EXTENSION; // export retained for symmetry
294 return Ok(name.to_owned());
295 }
296 }
297
298 Err(Error::FormatNotFound(
299 "no registered demuxer recognises this input".into(),
300 ))
301 }
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307
308 struct DummyDemuxer;
309
310 impl Demuxer for DummyDemuxer {
311 fn format_name(&self) -> &str {
312 "dummy"
313 }
314 fn streams(&self) -> &[StreamInfo] {
315 &[]
316 }
317 fn next_packet(&mut self) -> Result<Packet> {
318 Err(Error::Eof)
319 }
320 }
321
322 #[test]
323 fn default_seek_to_is_unsupported() {
324 let mut d = DummyDemuxer;
325 match d.seek_to(0, 0) {
326 Err(Error::Unsupported(_)) => {}
327 other => panic!(
328 "expected default seek_to to return Unsupported, got {:?}",
329 other
330 ),
331 }
332 }
333
334 #[test]
335 fn default_chapters_and_attachments_are_empty() {
336 // A demuxer that overrides nothing must compile and return
337 // empty slices for both structured accessors. This is the
338 // back-compat contract that lets every existing demuxer pick
339 // up the new API without source changes.
340 let d = DummyDemuxer;
341 assert!(d.chapters().is_empty());
342 assert!(d.attachments().is_empty());
343 assert!(d.attached_pictures().is_empty());
344 assert!(d.metadata().is_empty());
345 assert_eq!(d.duration_micros(), None);
346 }
347}