Skip to main content

oxideav_core/
stream.rs

1//! Stream metadata shared between containers and codecs.
2
3use crate::format::{MediaType, PixelFormat, SampleFormat};
4use crate::rational::Rational;
5use crate::time::TimeBase;
6
7/// A stable identifier for a codec. Codec crates register a `CodecId` so the
8/// codec registry can look them up by name.
9#[derive(Clone, Debug, PartialEq, Eq, Hash)]
10pub struct CodecId(pub String);
11
12impl CodecId {
13    pub fn new(s: impl Into<String>) -> Self {
14        Self(s.into())
15    }
16
17    pub fn as_str(&self) -> &str {
18        &self.0
19    }
20}
21
22impl From<&str> for CodecId {
23    fn from(s: &str) -> Self {
24        Self(s.to_owned())
25    }
26}
27
28impl std::fmt::Display for CodecId {
29    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
30        write!(f, "{}", self.0)
31    }
32}
33
34/// A codec identifier scoped to a container format — the thing a
35/// demuxer reads out of the file to name a codec. Resolved to a
36/// [`CodecId`] by the codec registry.
37///
38/// Centralising these in the registry (instead of each container
39/// hand-rolling its own FourCC → CodecId table) lets:
40///
41/// * a codec crate declare its own tag claims in `register()`, keeping
42///   ownership co-located with the decoder;
43/// * multiple codecs claim the same tag with priority ordering;
44/// * optional per-claim probes disambiguate the tag-collision cases
45///   that happen everywhere in the wild (DIV3 that's actually MPEG-4
46///   Part 2, XVID that's actually MS-MPEG4v3, audio wFormatTag=0x0055
47///   that could be MP3 or — very rarely — something else, etc.).
48#[derive(Clone, Debug, PartialEq, Eq, Hash)]
49pub enum CodecTag {
50    /// Four-character code used by AVI's `bmih.biCompression`, MP4 /
51    /// QuickTime sample-entry type, Matroska V_/A_ tags built around
52    /// FourCC, and many others. Always stored with alphabetic bytes
53    /// upper-cased so lookups are case-insensitive; non-alphabetic
54    /// bytes are preserved as-is.
55    Fourcc([u8; 4]),
56
57    /// AVI / WAV `WAVEFORMATEX::wFormatTag` (e.g. 0x0001 = PCM,
58    /// 0x0055 = MP3, 0x00FF = "raw" AAC, 0x1610 = AAC ADTS).
59    WaveFormat(u16),
60
61    /// MP4 ObjectTypeIndication (ISO/IEC 14496-1 Table 5 / the values
62    /// in an MP4 `esds` `DecoderConfigDescriptor`). e.g. 0x40 = MPEG-4
63    /// AAC, 0x20 = MPEG-4 Visual, 0x69 = MP3.
64    Mp4ObjectType(u8),
65
66    /// Matroska `CodecID` element (full string, e.g.
67    /// `"V_MPEG4/ISO/AVC"`, `"A_AAC"`, `"A_VORBIS"`).
68    Matroska(String),
69}
70
71impl CodecTag {
72    /// Build a FourCC tag, upper-casing alphabetic bytes.
73    pub fn fourcc(raw: &[u8; 4]) -> Self {
74        let mut out = [0u8; 4];
75        for i in 0..4 {
76            out[i] = raw[i].to_ascii_uppercase();
77        }
78        Self::Fourcc(out)
79    }
80
81    pub fn wave_format(tag: u16) -> Self {
82        Self::WaveFormat(tag)
83    }
84
85    pub fn mp4_object_type(oti: u8) -> Self {
86        Self::Mp4ObjectType(oti)
87    }
88
89    pub fn matroska(id: impl Into<String>) -> Self {
90        Self::Matroska(id.into())
91    }
92}
93
94impl std::fmt::Display for CodecTag {
95    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96        match self {
97            Self::Fourcc(fcc) => {
98                // Print as bytes when ASCII-printable, else as hex.
99                if fcc.iter().all(|b| b.is_ascii_graphic() || *b == b' ') {
100                    write!(f, "fourcc({})", std::str::from_utf8(fcc).unwrap_or("????"))
101                } else {
102                    write!(
103                        f,
104                        "fourcc(0x{:02X}{:02X}{:02X}{:02X})",
105                        fcc[0], fcc[1], fcc[2], fcc[3]
106                    )
107                }
108            }
109            Self::WaveFormat(t) => write!(f, "wFormatTag(0x{t:04X})"),
110            Self::Mp4ObjectType(o) => write!(f, "mp4_oti(0x{o:02X})"),
111            Self::Matroska(s) => write!(f, "matroska({s})"),
112        }
113    }
114}
115
116/// Context passed to a codec's probe function during tag resolution.
117///
118/// Built by the demuxer from whatever it has already parsed (stream
119/// format block, a peek at the first packet, numeric hints like
120/// `bits_per_sample`). Probes read fields directly; the struct is
121/// `#[non_exhaustive]` so additional hints can be added later without
122/// breaking codec crates that match on it.
123///
124/// The canonical construction pattern, for a demuxer:
125///
126/// ```
127/// # use oxideav_core::{CodecTag, ProbeContext};
128/// let tag = CodecTag::wave_format(0x0001);
129/// let ctx = ProbeContext::new(&tag)
130///     .bits(24)
131///     .channels(2)
132///     .sample_rate(48_000);
133/// # let _ = ctx;
134/// ```
135///
136/// Codec authors read fields like `ctx.bits_per_sample` / `ctx.tag`
137/// directly — `#[non_exhaustive]` forbids struct-literal construction
138/// from outside this crate but does not restrict field access.
139#[non_exhaustive]
140#[derive(Clone, Debug)]
141pub struct ProbeContext<'a> {
142    /// The tag being resolved — always set.
143    pub tag: &'a CodecTag,
144    /// Raw container-level stream-format blob if available
145    /// (e.g. WAVEFORMATEX, BITMAPINFOHEADER, MP4 sample-entry bytes,
146    /// Matroska `CodecPrivate`). Format is container-specific.
147    pub header: Option<&'a [u8]>,
148    /// First packet bytes if the demuxer has already read one.
149    /// Most demuxers resolve tags at stream-discovery time before any
150    /// packet exists; this is `None` in that case.
151    pub packet: Option<&'a [u8]>,
152    /// Audio: bits per sample (from WAVEFORMATEX, MP4 sample entry,
153    /// Matroska `BitDepth`, etc.).
154    pub bits_per_sample: Option<u16>,
155    pub channels: Option<u16>,
156    pub sample_rate: Option<u32>,
157    pub width: Option<u32>,
158    pub height: Option<u32>,
159}
160
161impl<'a> ProbeContext<'a> {
162    /// Start building a context for `tag` with every hint field empty.
163    pub fn new(tag: &'a CodecTag) -> Self {
164        Self {
165            tag,
166            header: None,
167            packet: None,
168            bits_per_sample: None,
169            channels: None,
170            sample_rate: None,
171            width: None,
172            height: None,
173        }
174    }
175
176    pub fn header(mut self, h: &'a [u8]) -> Self {
177        self.header = Some(h);
178        self
179    }
180
181    pub fn packet(mut self, p: &'a [u8]) -> Self {
182        self.packet = Some(p);
183        self
184    }
185
186    pub fn bits(mut self, n: u16) -> Self {
187        self.bits_per_sample = Some(n);
188        self
189    }
190
191    pub fn channels(mut self, n: u16) -> Self {
192        self.channels = Some(n);
193        self
194    }
195
196    pub fn sample_rate(mut self, n: u32) -> Self {
197        self.sample_rate = Some(n);
198        self
199    }
200
201    pub fn width(mut self, n: u32) -> Self {
202        self.width = Some(n);
203        self
204    }
205
206    pub fn height(mut self, n: u32) -> Self {
207        self.height = Some(n);
208        self
209    }
210}
211
212/// Confidence value returned by a probe. `1.0` means "certainly me",
213/// `0.0` means "not me", values in between mean "partial evidence — if
214/// no higher-confidence claim exists, this should win". The registry
215/// picks the claim with the highest returned confidence and skips any
216/// that return `0.0`.
217pub type Confidence = f32;
218
219/// A probe function a codec attaches to its registration to
220/// disambiguate tag collisions. Called once per candidate
221/// registration during `resolve_tag`.
222pub type ProbeFn = fn(&ProbeContext) -> Confidence;
223
224/// Resolve a [`CodecTag`] (FourCC / WAVEFORMATEX / Matroska id / …) to a
225/// [`CodecId`]. The [`oxideav-codec`](https://crates.io/crates/oxideav-codec)
226/// registry implements this, but defining the trait here lets
227/// containers consume tag resolution via `&dyn CodecResolver` without
228/// pulling in the codec crate as a direct dependency.
229pub trait CodecResolver: Sync {
230    /// Resolve the tag in `ctx.tag` to a codec id. Implementations walk
231    /// every registration whose tag set contains the tag, call each
232    /// probe (treating `None` as "always 1.0"), and return the id with
233    /// the highest resulting confidence. Ties are broken by
234    /// registration order.
235    fn resolve_tag(&self, ctx: &ProbeContext) -> Option<CodecId>;
236}
237
238/// Null resolver that resolves nothing — useful as a default when a
239/// caller doesn't have a real registry handy (e.g. unit tests, or
240/// legacy callers of the tag-free `open()` APIs).
241#[derive(Default, Clone, Copy)]
242pub struct NullCodecResolver;
243
244impl CodecResolver for NullCodecResolver {
245    fn resolve_tag(&self, _ctx: &ProbeContext) -> Option<CodecId> {
246        None
247    }
248}
249
250/// Codec-level parameters shared between demuxer/muxer and en/decoder.
251#[derive(Clone, Debug)]
252pub struct CodecParameters {
253    pub codec_id: CodecId,
254    pub media_type: MediaType,
255
256    // Audio-specific
257    pub sample_rate: Option<u32>,
258    pub channels: Option<u16>,
259    pub sample_format: Option<SampleFormat>,
260
261    // Video-specific
262    pub width: Option<u32>,
263    pub height: Option<u32>,
264    pub pixel_format: Option<PixelFormat>,
265    pub frame_rate: Option<Rational>,
266
267    /// Per-codec setup bytes (e.g., SPS/PPS, OpusHead). Format defined by codec.
268    pub extradata: Vec<u8>,
269
270    pub bit_rate: Option<u64>,
271}
272
273impl CodecParameters {
274    pub fn audio(codec_id: CodecId) -> Self {
275        Self {
276            codec_id,
277            media_type: MediaType::Audio,
278            sample_rate: None,
279            channels: None,
280            sample_format: None,
281            width: None,
282            height: None,
283            pixel_format: None,
284            frame_rate: None,
285            extradata: Vec::new(),
286            bit_rate: None,
287        }
288    }
289
290    /// True when `self` and `other` have the same codec_id and core
291    /// format parameters (sample_rate/channels/sample_format for audio,
292    /// width/height/pixel_format for video). Extradata and bitrate
293    /// differences are tolerated — many containers rewrite extradata
294    /// losslessly during a copy operation.
295    pub fn matches_core(&self, other: &CodecParameters) -> bool {
296        self.codec_id == other.codec_id
297            && self.sample_rate == other.sample_rate
298            && self.channels == other.channels
299            && self.sample_format == other.sample_format
300            && self.width == other.width
301            && self.height == other.height
302            && self.pixel_format == other.pixel_format
303    }
304
305    pub fn video(codec_id: CodecId) -> Self {
306        Self {
307            codec_id,
308            media_type: MediaType::Video,
309            sample_rate: None,
310            channels: None,
311            sample_format: None,
312            width: None,
313            height: None,
314            pixel_format: None,
315            frame_rate: None,
316            extradata: Vec::new(),
317            bit_rate: None,
318        }
319    }
320}
321
322/// Description of a single stream inside a container.
323#[derive(Clone, Debug)]
324pub struct StreamInfo {
325    pub index: u32,
326    pub time_base: TimeBase,
327    pub duration: Option<i64>,
328    pub start_time: Option<i64>,
329    pub params: CodecParameters,
330}
331
332#[cfg(test)]
333mod codec_tag_tests {
334    use super::*;
335
336    #[test]
337    fn fourcc_uppercases_on_construction() {
338        let t = CodecTag::fourcc(b"div3");
339        assert_eq!(t, CodecTag::Fourcc(*b"DIV3"));
340        // Non-alphabetic bytes preserved unchanged.
341        let t2 = CodecTag::fourcc(b"MP42");
342        assert_eq!(t2, CodecTag::Fourcc(*b"MP42"));
343        let t3 = CodecTag::fourcc(&[0xFF, b'a', 0x00, b'1']);
344        assert_eq!(t3, CodecTag::Fourcc([0xFF, b'A', 0x00, b'1']));
345    }
346
347    #[test]
348    fn fourcc_equality_case_insensitive_via_ctor() {
349        assert_eq!(CodecTag::fourcc(b"xvid"), CodecTag::fourcc(b"XVID"));
350        assert_eq!(CodecTag::fourcc(b"DiV3"), CodecTag::fourcc(b"div3"));
351    }
352
353    #[test]
354    fn display_printable_fourcc() {
355        assert_eq!(CodecTag::fourcc(b"XVID").to_string(), "fourcc(XVID)");
356    }
357
358    #[test]
359    fn display_non_printable_fourcc_as_hex() {
360        let t = CodecTag::Fourcc([0x00, 0x00, 0x00, 0x01]);
361        assert_eq!(t.to_string(), "fourcc(0x00000001)");
362    }
363
364    #[test]
365    fn display_wave_format() {
366        assert_eq!(
367            CodecTag::wave_format(0x0055).to_string(),
368            "wFormatTag(0x0055)"
369        );
370    }
371
372    #[test]
373    fn display_mp4_oti() {
374        assert_eq!(CodecTag::mp4_object_type(0x40).to_string(), "mp4_oti(0x40)");
375    }
376
377    #[test]
378    fn display_matroska() {
379        assert_eq!(
380            CodecTag::matroska("V_MPEG4/ISO/AVC").to_string(),
381            "matroska(V_MPEG4/ISO/AVC)",
382        );
383    }
384
385    #[test]
386    fn null_resolver_resolves_nothing() {
387        let r = NullCodecResolver;
388        let xvid = CodecTag::fourcc(b"XVID");
389        assert!(r.resolve_tag(&ProbeContext::new(&xvid)).is_none());
390        let wf = CodecTag::wave_format(0x0055);
391        assert!(r.resolve_tag(&ProbeContext::new(&wf)).is_none());
392    }
393
394    #[test]
395    fn probe_context_builder_fills_hints() {
396        let tag = CodecTag::wave_format(0x0001);
397        let ctx = ProbeContext::new(&tag)
398            .bits(24)
399            .channels(2)
400            .sample_rate(48_000)
401            .header(&[1, 2, 3])
402            .packet(&[4, 5]);
403        assert_eq!(ctx.bits_per_sample, Some(24));
404        assert_eq!(ctx.channels, Some(2));
405        assert_eq!(ctx.sample_rate, Some(48_000));
406        assert_eq!(ctx.header.unwrap(), &[1, 2, 3]);
407        assert_eq!(ctx.packet.unwrap(), &[4, 5]);
408    }
409}