Skip to main content

oxideav_core/
stream.rs

1//! Stream metadata shared between containers and codecs.
2
3use crate::format::{ChannelLayout, MediaType, PixelFormat, SampleFormat};
4use crate::limits::DecoderLimits;
5use crate::options::CodecOptions;
6use crate::rational::Rational;
7use crate::time::TimeBase;
8
9/// A stable identifier for a codec. Codec crates register a `CodecId` so the
10/// codec registry can look them up by name.
11#[derive(Clone, Debug, PartialEq, Eq, Hash)]
12pub struct CodecId(pub String);
13
14impl CodecId {
15    pub fn new(s: impl Into<String>) -> Self {
16        Self(s.into())
17    }
18
19    pub fn as_str(&self) -> &str {
20        &self.0
21    }
22}
23
24impl From<&str> for CodecId {
25    fn from(s: &str) -> Self {
26        Self(s.to_owned())
27    }
28}
29
30impl std::fmt::Display for CodecId {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        write!(f, "{}", self.0)
33    }
34}
35
36/// A codec identifier scoped to a container format — the thing a
37/// demuxer reads out of the file to name a codec. Resolved to a
38/// [`CodecId`] by the codec registry.
39///
40/// Centralising these in the registry (instead of each container
41/// hand-rolling its own FourCC → CodecId table) lets:
42///
43/// * a codec crate declare its own tag claims in `register()`, keeping
44///   ownership co-located with the decoder;
45/// * multiple codecs claim the same tag with priority ordering;
46/// * optional per-claim probes disambiguate the tag-collision cases
47///   that happen everywhere in the wild (DIV3 that's actually MPEG-4
48///   Part 2, XVID that's actually MS-MPEG4v3, audio wFormatTag=0x0055
49///   that could be MP3 or — very rarely — something else, etc.).
50#[derive(Clone, Debug, PartialEq, Eq, Hash)]
51pub enum CodecTag {
52    /// Four-character code used by AVI's `bmih.biCompression`, MP4 /
53    /// QuickTime sample-entry type, Matroska V_/A_ tags built around
54    /// FourCC, and many others. Always stored with alphabetic bytes
55    /// upper-cased so lookups are case-insensitive; non-alphabetic
56    /// bytes are preserved as-is.
57    Fourcc([u8; 4]),
58
59    /// AVI / WAV `WAVEFORMATEX::wFormatTag` (e.g. 0x0001 = PCM,
60    /// 0x0055 = MP3, 0x00FF = "raw" AAC, 0x1610 = AAC ADTS).
61    WaveFormat(u16),
62
63    /// MP4 ObjectTypeIndication (ISO/IEC 14496-1 Table 5 / the values
64    /// in an MP4 `esds` `DecoderConfigDescriptor`). e.g. 0x40 = MPEG-4
65    /// AAC, 0x20 = MPEG-4 Visual, 0x69 = MP3.
66    Mp4ObjectType(u8),
67
68    /// Matroska `CodecID` element (full string, e.g.
69    /// `"V_MPEG4/ISO/AVC"`, `"A_AAC"`, `"A_VORBIS"`).
70    Matroska(String),
71}
72
73impl CodecTag {
74    /// Build a FourCC tag, upper-casing alphabetic bytes.
75    pub fn fourcc(raw: &[u8; 4]) -> Self {
76        let mut out = [0u8; 4];
77        for i in 0..4 {
78            out[i] = raw[i].to_ascii_uppercase();
79        }
80        Self::Fourcc(out)
81    }
82
83    pub fn wave_format(tag: u16) -> Self {
84        Self::WaveFormat(tag)
85    }
86
87    pub fn mp4_object_type(oti: u8) -> Self {
88        Self::Mp4ObjectType(oti)
89    }
90
91    pub fn matroska(id: impl Into<String>) -> Self {
92        Self::Matroska(id.into())
93    }
94}
95
96impl std::fmt::Display for CodecTag {
97    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98        match self {
99            Self::Fourcc(fcc) => {
100                // Print as bytes when ASCII-printable, else as hex.
101                if fcc.iter().all(|b| b.is_ascii_graphic() || *b == b' ') {
102                    write!(f, "fourcc({})", std::str::from_utf8(fcc).unwrap_or("????"))
103                } else {
104                    write!(
105                        f,
106                        "fourcc(0x{:02X}{:02X}{:02X}{:02X})",
107                        fcc[0], fcc[1], fcc[2], fcc[3]
108                    )
109                }
110            }
111            Self::WaveFormat(t) => write!(f, "wFormatTag(0x{t:04X})"),
112            Self::Mp4ObjectType(o) => write!(f, "mp4_oti(0x{o:02X})"),
113            Self::Matroska(s) => write!(f, "matroska({s})"),
114        }
115    }
116}
117
118/// Context passed to a codec's probe function during tag resolution.
119///
120/// Built by the demuxer from whatever it has already parsed (stream
121/// format block, a peek at the first packet, numeric hints like
122/// `bits_per_sample`). Probes read fields directly; the struct is
123/// `#[non_exhaustive]` so additional hints can be added later without
124/// breaking codec crates that match on it.
125///
126/// The canonical construction pattern, for a demuxer:
127///
128/// ```
129/// # use oxideav_core::{CodecTag, ProbeContext};
130/// let tag = CodecTag::wave_format(0x0001);
131/// let ctx = ProbeContext::new(&tag)
132///     .bits(24)
133///     .channels(2)
134///     .sample_rate(48_000);
135/// # let _ = ctx;
136/// ```
137///
138/// Codec authors read fields like `ctx.bits_per_sample` / `ctx.tag`
139/// directly — `#[non_exhaustive]` forbids struct-literal construction
140/// from outside this crate but does not restrict field access.
141#[non_exhaustive]
142#[derive(Clone, Debug)]
143pub struct ProbeContext<'a> {
144    /// The tag being resolved — always set.
145    pub tag: &'a CodecTag,
146    /// Raw container-level stream-format blob if available
147    /// (e.g. WAVEFORMATEX, BITMAPINFOHEADER, MP4 sample-entry bytes,
148    /// Matroska `CodecPrivate`). Format is container-specific.
149    pub header: Option<&'a [u8]>,
150    /// First packet bytes if the demuxer has already read one.
151    /// Most demuxers resolve tags at stream-discovery time before any
152    /// packet exists; this is `None` in that case.
153    pub packet: Option<&'a [u8]>,
154    /// Audio: bits per sample (from WAVEFORMATEX, MP4 sample entry,
155    /// Matroska `BitDepth`, etc.).
156    pub bits_per_sample: Option<u16>,
157    pub channels: Option<u16>,
158    pub sample_rate: Option<u32>,
159    pub width: Option<u32>,
160    pub height: Option<u32>,
161}
162
163impl<'a> ProbeContext<'a> {
164    /// Start building a context for `tag` with every hint field empty.
165    pub fn new(tag: &'a CodecTag) -> Self {
166        Self {
167            tag,
168            header: None,
169            packet: None,
170            bits_per_sample: None,
171            channels: None,
172            sample_rate: None,
173            width: None,
174            height: None,
175        }
176    }
177
178    pub fn header(mut self, h: &'a [u8]) -> Self {
179        self.header = Some(h);
180        self
181    }
182
183    pub fn packet(mut self, p: &'a [u8]) -> Self {
184        self.packet = Some(p);
185        self
186    }
187
188    pub fn bits(mut self, n: u16) -> Self {
189        self.bits_per_sample = Some(n);
190        self
191    }
192
193    pub fn channels(mut self, n: u16) -> Self {
194        self.channels = Some(n);
195        self
196    }
197
198    pub fn sample_rate(mut self, n: u32) -> Self {
199        self.sample_rate = Some(n);
200        self
201    }
202
203    pub fn width(mut self, n: u32) -> Self {
204        self.width = Some(n);
205        self
206    }
207
208    pub fn height(mut self, n: u32) -> Self {
209        self.height = Some(n);
210        self
211    }
212}
213
214/// Confidence value returned by a probe. `1.0` means "certainly me",
215/// `0.0` means "not me", values in between mean "partial evidence — if
216/// no higher-confidence claim exists, this should win". The registry
217/// picks the claim with the highest returned confidence and skips any
218/// that return `0.0`.
219pub type Confidence = f32;
220
221/// A probe function a codec attaches to its registration to
222/// disambiguate tag collisions. Called once per candidate
223/// registration during `resolve_tag`.
224pub type ProbeFn = fn(&ProbeContext) -> Confidence;
225
226/// Resolve a [`CodecTag`] (FourCC / WAVEFORMATEX / Matroska id / …) to a
227/// [`CodecId`]. The [`oxideav-codec`](https://crates.io/crates/oxideav-codec)
228/// registry implements this, but defining the trait here lets
229/// containers consume tag resolution via `&dyn CodecResolver` without
230/// pulling in the codec crate as a direct dependency.
231pub trait CodecResolver: Sync {
232    /// Resolve the tag in `ctx.tag` to a codec id. Implementations walk
233    /// every registration whose tag set contains the tag, call each
234    /// probe (treating `None` as "always 1.0"), and return the id with
235    /// the highest resulting confidence. Ties are broken by
236    /// registration order.
237    fn resolve_tag(&self, ctx: &ProbeContext) -> Option<CodecId>;
238}
239
240/// Null resolver that resolves nothing — useful as a default when a
241/// caller doesn't have a real registry handy (e.g. unit tests, or
242/// legacy callers of the tag-free `open()` APIs).
243#[derive(Default, Clone, Copy)]
244pub struct NullCodecResolver;
245
246impl CodecResolver for NullCodecResolver {
247    fn resolve_tag(&self, _ctx: &ProbeContext) -> Option<CodecId> {
248        None
249    }
250}
251
252/// Codec-level parameters shared between demuxer/muxer and en/decoder.
253///
254/// **Marked `#[non_exhaustive]`** — construction via struct-literal
255/// syntax is not supported. Use the [`audio`](Self::audio) /
256/// [`video`](Self::video) constructors (or functional-update
257/// `CodecParameters { ..base }` syntax) so new fields can be added
258/// without another semver break.
259#[derive(Clone, Debug)]
260#[non_exhaustive]
261pub struct CodecParameters {
262    pub codec_id: CodecId,
263    pub media_type: MediaType,
264
265    // Audio-specific
266    pub sample_rate: Option<u32>,
267    pub channels: Option<u16>,
268    pub sample_format: Option<SampleFormat>,
269    /// Speaker layout for the audio stream. **This is the canonical
270    /// answer to "what layout does this stream have?"** — layout is a
271    /// stream-level property and is intentionally *not* duplicated on
272    /// individual [`AudioFrame`](crate::AudioFrame)s.
273    ///
274    /// Optional and additive alongside [`channels`](Self::channels): a
275    /// codec/container that only knows the count can leave this `None`
276    /// and consumers will fall back to [`ChannelLayout::from_count`]
277    /// via [`Self::resolved_layout`]. When both are set, they must
278    /// agree on channel count.
279    pub channel_layout: Option<ChannelLayout>,
280
281    // Video-specific
282    pub width: Option<u32>,
283    pub height: Option<u32>,
284    pub pixel_format: Option<PixelFormat>,
285    pub frame_rate: Option<Rational>,
286
287    /// Per-codec setup bytes (e.g., SPS/PPS, OpusHead). Format defined by codec.
288    pub extradata: Vec<u8>,
289
290    pub bit_rate: Option<u64>,
291
292    /// Codec-specific tuning knobs (e.g. `{"interlace": "true"}` for PNG's
293    /// Adam7 encode, `{"crf": "23"}` for h264). Empty by default. The shape
294    /// is declared by each codec's options struct — see
295    /// [`crate::options`]. Parsed once at encoder/decoder construction;
296    /// the hot path never touches this.
297    pub options: CodecOptions,
298
299    /// DoS-protection caps threaded into every decoder constructed from
300    /// these parameters. See [`DecoderLimits`] for the semantics of each
301    /// field. Defaults are conservative-but-finite (32 k × 32 k pixels,
302    /// 1 GiB per arena, etc.) — every existing real-world stream
303    /// decodes unchanged. Tighten via [`Self::with_limits`] when the
304    /// caller wants to harden the pipeline against untrusted input.
305    pub limits: DecoderLimits,
306}
307
308impl CodecParameters {
309    pub fn audio(codec_id: CodecId) -> Self {
310        Self {
311            codec_id,
312            media_type: MediaType::Audio,
313            sample_rate: None,
314            channels: None,
315            sample_format: None,
316            channel_layout: None,
317            width: None,
318            height: None,
319            pixel_format: None,
320            frame_rate: None,
321            extradata: Vec::new(),
322            bit_rate: None,
323            options: CodecOptions::default(),
324            limits: DecoderLimits::default(),
325        }
326    }
327
328    /// True when `self` and `other` have the same codec_id and core
329    /// format parameters (sample_rate/channels/sample_format for audio,
330    /// width/height/pixel_format for video). Extradata and bitrate
331    /// differences are tolerated — many containers rewrite extradata
332    /// losslessly during a copy operation. `channel_layout` is compared
333    /// only via the channel count (through [`Self::resolved_layout`]) so
334    /// a stream that surfaces an explicit layout still matches a
335    /// count-only stream of the same width.
336    pub fn matches_core(&self, other: &CodecParameters) -> bool {
337        self.codec_id == other.codec_id
338            && self.sample_rate == other.sample_rate
339            && self.channels == other.channels
340            && self.sample_format == other.sample_format
341            && self.width == other.width
342            && self.height == other.height
343            && self.pixel_format == other.pixel_format
344    }
345
346    pub fn video(codec_id: CodecId) -> Self {
347        Self {
348            codec_id,
349            media_type: MediaType::Video,
350            sample_rate: None,
351            channels: None,
352            sample_format: None,
353            channel_layout: None,
354            width: None,
355            height: None,
356            pixel_format: None,
357            frame_rate: None,
358            extradata: Vec::new(),
359            bit_rate: None,
360            options: CodecOptions::default(),
361            limits: DecoderLimits::default(),
362        }
363    }
364
365    /// Construct subtitle codec parameters. No format-specific fields
366    /// are populated — subtitle codecs typically only carry an opaque
367    /// `extradata` blob (the format's header / style block) and the
368    /// codec id.
369    pub fn subtitle(codec_id: CodecId) -> Self {
370        Self {
371            codec_id,
372            media_type: MediaType::Subtitle,
373            sample_rate: None,
374            channels: None,
375            sample_format: None,
376            channel_layout: None,
377            width: None,
378            height: None,
379            pixel_format: None,
380            frame_rate: None,
381            extradata: Vec::new(),
382            bit_rate: None,
383            options: CodecOptions::default(),
384            limits: DecoderLimits::default(),
385        }
386    }
387
388    /// Construct generic data-stream codec parameters (timed metadata,
389    /// chapters, etc.). Like [`Self::subtitle`], no format-specific
390    /// fields are populated.
391    pub fn data(codec_id: CodecId) -> Self {
392        Self {
393            codec_id,
394            media_type: MediaType::Data,
395            sample_rate: None,
396            channels: None,
397            sample_format: None,
398            channel_layout: None,
399            width: None,
400            height: None,
401            pixel_format: None,
402            frame_rate: None,
403            extradata: Vec::new(),
404            bit_rate: None,
405            options: CodecOptions::default(),
406            limits: DecoderLimits::default(),
407        }
408    }
409
410    /// Builder method: set the channel count.
411    ///
412    /// Pairs with [`Self::channel_layout`] for the layout. The two are
413    /// kept as independent fields so a codec that only knows one or the
414    /// other can populate just the field it has; [`Self::resolved_layout`]
415    /// derives a layout from whatever is set.
416    pub fn channels(mut self, n: u16) -> Self {
417        self.channels = Some(n);
418        self
419    }
420
421    /// Builder method: set the channel layout. Mirrors
422    /// [`Self::channels`]; setting one does not auto-fill the other —
423    /// use [`Self::resolved_layout`] / [`Self::resolved_channels`] at
424    /// read time to bridge the two.
425    pub fn channel_layout(mut self, layout: ChannelLayout) -> Self {
426        self.channel_layout = Some(layout);
427        self
428    }
429
430    /// Best-effort layout: prefers an explicit [`Self::channel_layout`]
431    /// when set, otherwise infers one from [`Self::channels`] via
432    /// [`ChannelLayout::from_count`]. Returns `None` only when neither
433    /// field is populated (e.g. video / data streams, or audio params
434    /// surfaced before the codec has been opened).
435    ///
436    /// This is the canonical call-site for resolving a stream's
437    /// channel layout — frames do *not* carry layout, so audio
438    /// consumers (downmix, device routing, channel-aware filters)
439    /// should read it from the stream's `CodecParameters` once and
440    /// pass it down with the frame.
441    pub fn resolved_layout(&self) -> Option<ChannelLayout> {
442        self.channel_layout
443            .or_else(|| self.channels.map(ChannelLayout::from_count))
444    }
445
446    /// Best-effort channel count: prefers an explicit
447    /// [`Self::channels`] when set, otherwise reads the count off
448    /// [`Self::channel_layout`]. Returns `None` only when neither
449    /// field is populated.
450    pub fn resolved_channels(&self) -> Option<u16> {
451        self.channels
452            .or_else(|| self.channel_layout.map(|l| l.channel_count()))
453    }
454
455    /// Read-only access to the DoS-protection caps for any decoder
456    /// constructed from these parameters. See [`DecoderLimits`].
457    pub fn limits(&self) -> &DecoderLimits {
458        &self.limits
459    }
460
461    /// Builder method: replace the [`DecoderLimits`] for these
462    /// parameters. Use to tighten caps before passing parameters into
463    /// `make_decoder` (e.g. when processing untrusted uploads on a
464    /// shared server).
465    ///
466    /// ```
467    /// # use oxideav_core::{CodecId, CodecParameters, DecoderLimits};
468    /// let limits = DecoderLimits::default()
469    ///     .with_max_pixels_per_frame(4096 * 4096)
470    ///     .with_max_arenas_in_flight(2);
471    /// let p = CodecParameters::video(CodecId::new("h263")).with_limits(limits);
472    /// assert_eq!(p.limits().max_pixels_per_frame, 4096 * 4096);
473    /// ```
474    pub fn with_limits(mut self, limits: DecoderLimits) -> Self {
475        self.limits = limits;
476        self
477    }
478}
479
480/// Description of a single stream inside a container.
481#[derive(Clone, Debug)]
482pub struct StreamInfo {
483    pub index: u32,
484    pub time_base: TimeBase,
485    pub duration: Option<i64>,
486    pub start_time: Option<i64>,
487    pub params: CodecParameters,
488}
489
490#[cfg(test)]
491mod codec_tag_tests {
492    use super::*;
493
494    #[test]
495    fn fourcc_uppercases_on_construction() {
496        let t = CodecTag::fourcc(b"div3");
497        assert_eq!(t, CodecTag::Fourcc(*b"DIV3"));
498        // Non-alphabetic bytes preserved unchanged.
499        let t2 = CodecTag::fourcc(b"MP42");
500        assert_eq!(t2, CodecTag::Fourcc(*b"MP42"));
501        let t3 = CodecTag::fourcc(&[0xFF, b'a', 0x00, b'1']);
502        assert_eq!(t3, CodecTag::Fourcc([0xFF, b'A', 0x00, b'1']));
503    }
504
505    #[test]
506    fn fourcc_equality_case_insensitive_via_ctor() {
507        assert_eq!(CodecTag::fourcc(b"xvid"), CodecTag::fourcc(b"XVID"));
508        assert_eq!(CodecTag::fourcc(b"DiV3"), CodecTag::fourcc(b"div3"));
509    }
510
511    #[test]
512    fn display_printable_fourcc() {
513        assert_eq!(CodecTag::fourcc(b"XVID").to_string(), "fourcc(XVID)");
514    }
515
516    #[test]
517    fn display_non_printable_fourcc_as_hex() {
518        let t = CodecTag::Fourcc([0x00, 0x00, 0x00, 0x01]);
519        assert_eq!(t.to_string(), "fourcc(0x00000001)");
520    }
521
522    #[test]
523    fn display_wave_format() {
524        assert_eq!(
525            CodecTag::wave_format(0x0055).to_string(),
526            "wFormatTag(0x0055)"
527        );
528    }
529
530    #[test]
531    fn display_mp4_oti() {
532        assert_eq!(CodecTag::mp4_object_type(0x40).to_string(), "mp4_oti(0x40)");
533    }
534
535    #[test]
536    fn display_matroska() {
537        assert_eq!(
538            CodecTag::matroska("V_MPEG4/ISO/AVC").to_string(),
539            "matroska(V_MPEG4/ISO/AVC)",
540        );
541    }
542
543    #[test]
544    fn null_resolver_resolves_nothing() {
545        let r = NullCodecResolver;
546        let xvid = CodecTag::fourcc(b"XVID");
547        assert!(r.resolve_tag(&ProbeContext::new(&xvid)).is_none());
548        let wf = CodecTag::wave_format(0x0055);
549        assert!(r.resolve_tag(&ProbeContext::new(&wf)).is_none());
550    }
551
552    #[test]
553    fn probe_context_builder_fills_hints() {
554        let tag = CodecTag::wave_format(0x0001);
555        let ctx = ProbeContext::new(&tag)
556            .bits(24)
557            .channels(2)
558            .sample_rate(48_000)
559            .header(&[1, 2, 3])
560            .packet(&[4, 5]);
561        assert_eq!(ctx.bits_per_sample, Some(24));
562        assert_eq!(ctx.channels, Some(2));
563        assert_eq!(ctx.sample_rate, Some(48_000));
564        assert_eq!(ctx.header.unwrap(), &[1, 2, 3]);
565        assert_eq!(ctx.packet.unwrap(), &[4, 5]);
566    }
567}
568
569#[cfg(test)]
570mod channel_layout_plumbing_tests {
571    use super::*;
572
573    #[test]
574    fn audio_params_default_to_no_layout() {
575        let p = CodecParameters::audio(CodecId::new("pcm_s16le"));
576        assert!(p.channel_layout.is_none());
577        assert!(p.channels.is_none());
578        assert!(p.resolved_layout().is_none());
579        assert!(p.resolved_channels().is_none());
580    }
581
582    #[test]
583    fn channels_only_infers_layout_via_from_count() {
584        let p = CodecParameters::audio(CodecId::new("pcm_s16le")).channels(6);
585        assert_eq!(p.channels, Some(6));
586        assert!(p.channel_layout.is_none());
587        assert_eq!(p.resolved_layout(), Some(ChannelLayout::Surround51));
588        assert_eq!(p.resolved_channels(), Some(6));
589    }
590
591    #[test]
592    fn explicit_layout_wins_over_count() {
593        let p = CodecParameters::audio(CodecId::new("ac3"))
594            .channels(6)
595            .channel_layout(ChannelLayout::Surround60);
596        // 6ch by-count would default to Surround51, but the explicit
597        // layout overrides.
598        assert_eq!(p.resolved_layout(), Some(ChannelLayout::Surround60));
599        assert_eq!(p.resolved_channels(), Some(6));
600    }
601
602    #[test]
603    fn layout_only_yields_count_via_resolved_channels() {
604        let p =
605            CodecParameters::audio(CodecId::new("ac3")).channel_layout(ChannelLayout::Surround71);
606        assert!(p.channels.is_none());
607        assert_eq!(p.resolved_channels(), Some(8));
608        assert_eq!(p.resolved_layout(), Some(ChannelLayout::Surround71));
609    }
610}