oxideav_core/stream.rs
1//! Stream metadata shared between containers and codecs.
2
3use crate::format::{ChannelLayout, MediaType, PixelFormat, SampleFormat};
4use crate::limits::DecoderLimits;
5use crate::options::CodecOptions;
6use crate::rational::Rational;
7use crate::time::TimeBase;
8
9/// A stable identifier for a codec. Codec crates register a `CodecId` so the
10/// codec registry can look them up by name.
11#[derive(Clone, Debug, PartialEq, Eq, Hash)]
12pub struct CodecId(pub String);
13
14impl CodecId {
15 pub fn new(s: impl Into<String>) -> Self {
16 Self(s.into())
17 }
18
19 pub fn as_str(&self) -> &str {
20 &self.0
21 }
22}
23
24impl From<&str> for CodecId {
25 fn from(s: &str) -> Self {
26 Self(s.to_owned())
27 }
28}
29
30impl std::fmt::Display for CodecId {
31 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32 write!(f, "{}", self.0)
33 }
34}
35
36/// A codec identifier scoped to a container format — the thing a
37/// demuxer reads out of the file to name a codec. Resolved to a
38/// [`CodecId`] by the codec registry.
39///
40/// Centralising these in the registry (instead of each container
41/// hand-rolling its own FourCC → CodecId table) lets:
42///
43/// * a codec crate declare its own tag claims in `register()`, keeping
44/// ownership co-located with the decoder;
45/// * multiple codecs claim the same tag with priority ordering;
46/// * optional per-claim probes disambiguate the tag-collision cases
47/// that happen everywhere in the wild (DIV3 that's actually MPEG-4
48/// Part 2, XVID that's actually MS-MPEG4v3, audio wFormatTag=0x0055
49/// that could be MP3 or — very rarely — something else, etc.).
50#[derive(Clone, Debug, PartialEq, Eq, Hash)]
51pub enum CodecTag {
52 /// Four-character code used by AVI's `bmih.biCompression`, MP4 /
53 /// QuickTime sample-entry type, Matroska V_/A_ tags built around
54 /// FourCC, and many others. Always stored with alphabetic bytes
55 /// upper-cased so lookups are case-insensitive; non-alphabetic
56 /// bytes are preserved as-is.
57 Fourcc([u8; 4]),
58
59 /// AVI / WAV `WAVEFORMATEX::wFormatTag` (e.g. 0x0001 = PCM,
60 /// 0x0055 = MP3, 0x00FF = "raw" AAC, 0x1610 = AAC ADTS).
61 WaveFormat(u16),
62
63 /// MP4 ObjectTypeIndication (ISO/IEC 14496-1 Table 5 / the values
64 /// in an MP4 `esds` `DecoderConfigDescriptor`). e.g. 0x40 = MPEG-4
65 /// AAC, 0x20 = MPEG-4 Visual, 0x69 = MP3.
66 Mp4ObjectType(u8),
67
68 /// Matroska `CodecID` element (full string, e.g.
69 /// `"V_MPEG4/ISO/AVC"`, `"A_AAC"`, `"A_VORBIS"`).
70 Matroska(String),
71}
72
73impl CodecTag {
74 /// Build a FourCC tag, upper-casing alphabetic bytes.
75 pub fn fourcc(raw: &[u8; 4]) -> Self {
76 let mut out = [0u8; 4];
77 for i in 0..4 {
78 out[i] = raw[i].to_ascii_uppercase();
79 }
80 Self::Fourcc(out)
81 }
82
83 pub fn wave_format(tag: u16) -> Self {
84 Self::WaveFormat(tag)
85 }
86
87 pub fn mp4_object_type(oti: u8) -> Self {
88 Self::Mp4ObjectType(oti)
89 }
90
91 pub fn matroska(id: impl Into<String>) -> Self {
92 Self::Matroska(id.into())
93 }
94}
95
96impl std::fmt::Display for CodecTag {
97 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98 match self {
99 Self::Fourcc(fcc) => {
100 // Print as bytes when ASCII-printable, else as hex.
101 if fcc.iter().all(|b| b.is_ascii_graphic() || *b == b' ') {
102 write!(f, "fourcc({})", std::str::from_utf8(fcc).unwrap_or("????"))
103 } else {
104 write!(
105 f,
106 "fourcc(0x{:02X}{:02X}{:02X}{:02X})",
107 fcc[0], fcc[1], fcc[2], fcc[3]
108 )
109 }
110 }
111 Self::WaveFormat(t) => write!(f, "wFormatTag(0x{t:04X})"),
112 Self::Mp4ObjectType(o) => write!(f, "mp4_oti(0x{o:02X})"),
113 Self::Matroska(s) => write!(f, "matroska({s})"),
114 }
115 }
116}
117
118/// Context passed to a codec's probe function during tag resolution.
119///
120/// Built by the demuxer from whatever it has already parsed (stream
121/// format block, a peek at the first packet, numeric hints like
122/// `bits_per_sample`). Probes read fields directly; the struct is
123/// `#[non_exhaustive]` so additional hints can be added later without
124/// breaking codec crates that match on it.
125///
126/// The canonical construction pattern, for a demuxer:
127///
128/// ```
129/// # use oxideav_core::{CodecTag, ProbeContext};
130/// let tag = CodecTag::wave_format(0x0001);
131/// let ctx = ProbeContext::new(&tag)
132/// .bits(24)
133/// .channels(2)
134/// .sample_rate(48_000);
135/// # let _ = ctx;
136/// ```
137///
138/// Codec authors read fields like `ctx.bits_per_sample` / `ctx.tag`
139/// directly — `#[non_exhaustive]` forbids struct-literal construction
140/// from outside this crate but does not restrict field access.
141#[non_exhaustive]
142#[derive(Clone, Debug)]
143pub struct ProbeContext<'a> {
144 /// The tag being resolved — always set.
145 pub tag: &'a CodecTag,
146 /// Raw container-level stream-format blob if available
147 /// (e.g. WAVEFORMATEX, BITMAPINFOHEADER, MP4 sample-entry bytes,
148 /// Matroska `CodecPrivate`). Format is container-specific.
149 pub header: Option<&'a [u8]>,
150 /// First packet bytes if the demuxer has already read one.
151 /// Most demuxers resolve tags at stream-discovery time before any
152 /// packet exists; this is `None` in that case.
153 pub packet: Option<&'a [u8]>,
154 /// Audio: bits per sample (from WAVEFORMATEX, MP4 sample entry,
155 /// Matroska `BitDepth`, etc.).
156 pub bits_per_sample: Option<u16>,
157 pub channels: Option<u16>,
158 pub sample_rate: Option<u32>,
159 pub width: Option<u32>,
160 pub height: Option<u32>,
161}
162
163impl<'a> ProbeContext<'a> {
164 /// Start building a context for `tag` with every hint field empty.
165 pub fn new(tag: &'a CodecTag) -> Self {
166 Self {
167 tag,
168 header: None,
169 packet: None,
170 bits_per_sample: None,
171 channels: None,
172 sample_rate: None,
173 width: None,
174 height: None,
175 }
176 }
177
178 pub fn header(mut self, h: &'a [u8]) -> Self {
179 self.header = Some(h);
180 self
181 }
182
183 pub fn packet(mut self, p: &'a [u8]) -> Self {
184 self.packet = Some(p);
185 self
186 }
187
188 pub fn bits(mut self, n: u16) -> Self {
189 self.bits_per_sample = Some(n);
190 self
191 }
192
193 pub fn channels(mut self, n: u16) -> Self {
194 self.channels = Some(n);
195 self
196 }
197
198 pub fn sample_rate(mut self, n: u32) -> Self {
199 self.sample_rate = Some(n);
200 self
201 }
202
203 pub fn width(mut self, n: u32) -> Self {
204 self.width = Some(n);
205 self
206 }
207
208 pub fn height(mut self, n: u32) -> Self {
209 self.height = Some(n);
210 self
211 }
212}
213
214/// Confidence value returned by a probe. `1.0` means "certainly me",
215/// `0.0` means "not me", values in between mean "partial evidence — if
216/// no higher-confidence claim exists, this should win". The registry
217/// picks the claim with the highest returned confidence and skips any
218/// that return `0.0`.
219pub type Confidence = f32;
220
221/// A probe function a codec attaches to its registration to
222/// disambiguate tag collisions. Called once per candidate
223/// registration during `resolve_tag`.
224pub type ProbeFn = fn(&ProbeContext) -> Confidence;
225
226/// Resolve a [`CodecTag`] (FourCC / WAVEFORMATEX / Matroska id / …) to a
227/// [`CodecId`]. The [`oxideav-codec`](https://crates.io/crates/oxideav-codec)
228/// registry implements this, but defining the trait here lets
229/// containers consume tag resolution via `&dyn CodecResolver` without
230/// pulling in the codec crate as a direct dependency.
231pub trait CodecResolver: Sync {
232 /// Resolve the tag in `ctx.tag` to a codec id. Implementations walk
233 /// every registration whose tag set contains the tag, call each
234 /// probe (treating `None` as "always 1.0"), and return the id with
235 /// the highest resulting confidence. Ties are broken by
236 /// registration order.
237 fn resolve_tag(&self, ctx: &ProbeContext) -> Option<CodecId>;
238}
239
240/// Null resolver that resolves nothing — useful as a default when a
241/// caller doesn't have a real registry handy (e.g. unit tests, or
242/// legacy callers of the tag-free `open()` APIs).
243#[derive(Default, Clone, Copy)]
244pub struct NullCodecResolver;
245
246impl CodecResolver for NullCodecResolver {
247 fn resolve_tag(&self, _ctx: &ProbeContext) -> Option<CodecId> {
248 None
249 }
250}
251
252/// Codec-level parameters shared between demuxer/muxer and en/decoder.
253///
254/// **Marked `#[non_exhaustive]`** — construction via struct-literal
255/// syntax is not supported. Use the [`audio`](Self::audio) /
256/// [`video`](Self::video) constructors (or functional-update
257/// `CodecParameters { ..base }` syntax) so new fields can be added
258/// without another semver break.
259#[derive(Clone, Debug)]
260#[non_exhaustive]
261pub struct CodecParameters {
262 pub codec_id: CodecId,
263 pub media_type: MediaType,
264
265 // Audio-specific
266 pub sample_rate: Option<u32>,
267 pub channels: Option<u16>,
268 pub sample_format: Option<SampleFormat>,
269 /// Speaker layout for the audio stream. **This is the canonical
270 /// answer to "what layout does this stream have?"** — layout is a
271 /// stream-level property and is intentionally *not* duplicated on
272 /// individual [`AudioFrame`](crate::AudioFrame)s.
273 ///
274 /// Optional and additive alongside [`channels`](Self::channels): a
275 /// codec/container that only knows the count can leave this `None`
276 /// and consumers will fall back to [`ChannelLayout::from_count`]
277 /// via [`Self::resolved_layout`]. When both are set, they must
278 /// agree on channel count.
279 pub channel_layout: Option<ChannelLayout>,
280
281 // Video-specific
282 pub width: Option<u32>,
283 pub height: Option<u32>,
284 pub pixel_format: Option<PixelFormat>,
285 pub frame_rate: Option<Rational>,
286
287 /// Per-codec setup bytes (e.g., SPS/PPS, OpusHead). Format defined by codec.
288 pub extradata: Vec<u8>,
289
290 pub bit_rate: Option<u64>,
291
292 /// Codec-specific tuning knobs (e.g. `{"interlace": "true"}` for PNG's
293 /// Adam7 encode, `{"crf": "23"}` for h264). Empty by default. The shape
294 /// is declared by each codec's options struct — see
295 /// [`crate::options`]. Parsed once at encoder/decoder construction;
296 /// the hot path never touches this.
297 pub options: CodecOptions,
298
299 /// DoS-protection caps threaded into every decoder constructed from
300 /// these parameters. See [`DecoderLimits`] for the semantics of each
301 /// field. Defaults are conservative-but-finite (32 k × 32 k pixels,
302 /// 1 GiB per arena, etc.) — every existing real-world stream
303 /// decodes unchanged. Tighten via [`Self::with_limits`] when the
304 /// caller wants to harden the pipeline against untrusted input.
305 pub limits: DecoderLimits,
306}
307
308impl CodecParameters {
309 pub fn audio(codec_id: CodecId) -> Self {
310 Self {
311 codec_id,
312 media_type: MediaType::Audio,
313 sample_rate: None,
314 channels: None,
315 sample_format: None,
316 channel_layout: None,
317 width: None,
318 height: None,
319 pixel_format: None,
320 frame_rate: None,
321 extradata: Vec::new(),
322 bit_rate: None,
323 options: CodecOptions::default(),
324 limits: DecoderLimits::default(),
325 }
326 }
327
328 /// True when `self` and `other` have the same codec_id and core
329 /// format parameters (sample_rate/channels/sample_format for audio,
330 /// width/height/pixel_format for video). Extradata and bitrate
331 /// differences are tolerated — many containers rewrite extradata
332 /// losslessly during a copy operation. `channel_layout` is compared
333 /// only via the channel count (through [`Self::resolved_layout`]) so
334 /// a stream that surfaces an explicit layout still matches a
335 /// count-only stream of the same width.
336 pub fn matches_core(&self, other: &CodecParameters) -> bool {
337 self.codec_id == other.codec_id
338 && self.sample_rate == other.sample_rate
339 && self.channels == other.channels
340 && self.sample_format == other.sample_format
341 && self.width == other.width
342 && self.height == other.height
343 && self.pixel_format == other.pixel_format
344 }
345
346 pub fn video(codec_id: CodecId) -> Self {
347 Self {
348 codec_id,
349 media_type: MediaType::Video,
350 sample_rate: None,
351 channels: None,
352 sample_format: None,
353 channel_layout: None,
354 width: None,
355 height: None,
356 pixel_format: None,
357 frame_rate: None,
358 extradata: Vec::new(),
359 bit_rate: None,
360 options: CodecOptions::default(),
361 limits: DecoderLimits::default(),
362 }
363 }
364
365 /// Construct subtitle codec parameters. No format-specific fields
366 /// are populated — subtitle codecs typically only carry an opaque
367 /// `extradata` blob (the format's header / style block) and the
368 /// codec id.
369 pub fn subtitle(codec_id: CodecId) -> Self {
370 Self {
371 codec_id,
372 media_type: MediaType::Subtitle,
373 sample_rate: None,
374 channels: None,
375 sample_format: None,
376 channel_layout: None,
377 width: None,
378 height: None,
379 pixel_format: None,
380 frame_rate: None,
381 extradata: Vec::new(),
382 bit_rate: None,
383 options: CodecOptions::default(),
384 limits: DecoderLimits::default(),
385 }
386 }
387
388 /// Construct generic data-stream codec parameters (timed metadata,
389 /// chapters, etc.). Like [`Self::subtitle`], no format-specific
390 /// fields are populated.
391 pub fn data(codec_id: CodecId) -> Self {
392 Self {
393 codec_id,
394 media_type: MediaType::Data,
395 sample_rate: None,
396 channels: None,
397 sample_format: None,
398 channel_layout: None,
399 width: None,
400 height: None,
401 pixel_format: None,
402 frame_rate: None,
403 extradata: Vec::new(),
404 bit_rate: None,
405 options: CodecOptions::default(),
406 limits: DecoderLimits::default(),
407 }
408 }
409
410 /// Builder method: set the channel count.
411 ///
412 /// Pairs with [`Self::channel_layout`] for the layout. The two are
413 /// kept as independent fields so a codec that only knows one or the
414 /// other can populate just the field it has; [`Self::resolved_layout`]
415 /// derives a layout from whatever is set.
416 pub fn channels(mut self, n: u16) -> Self {
417 self.channels = Some(n);
418 self
419 }
420
421 /// Builder method: set the channel layout. Mirrors
422 /// [`Self::channels`]; setting one does not auto-fill the other —
423 /// use [`Self::resolved_layout`] / [`Self::resolved_channels`] at
424 /// read time to bridge the two.
425 pub fn channel_layout(mut self, layout: ChannelLayout) -> Self {
426 self.channel_layout = Some(layout);
427 self
428 }
429
430 /// Best-effort layout: prefers an explicit [`Self::channel_layout`]
431 /// when set, otherwise infers one from [`Self::channels`] via
432 /// [`ChannelLayout::from_count`]. Returns `None` only when neither
433 /// field is populated (e.g. video / data streams, or audio params
434 /// surfaced before the codec has been opened).
435 ///
436 /// This is the canonical call-site for resolving a stream's
437 /// channel layout — frames do *not* carry layout, so audio
438 /// consumers (downmix, device routing, channel-aware filters)
439 /// should read it from the stream's `CodecParameters` once and
440 /// pass it down with the frame.
441 pub fn resolved_layout(&self) -> Option<ChannelLayout> {
442 self.channel_layout
443 .or_else(|| self.channels.map(ChannelLayout::from_count))
444 }
445
446 /// Best-effort channel count: prefers an explicit
447 /// [`Self::channels`] when set, otherwise reads the count off
448 /// [`Self::channel_layout`]. Returns `None` only when neither
449 /// field is populated.
450 pub fn resolved_channels(&self) -> Option<u16> {
451 self.channels
452 .or_else(|| self.channel_layout.map(|l| l.channel_count()))
453 }
454
455 /// Read-only access to the DoS-protection caps for any decoder
456 /// constructed from these parameters. See [`DecoderLimits`].
457 pub fn limits(&self) -> &DecoderLimits {
458 &self.limits
459 }
460
461 /// Builder method: replace the [`DecoderLimits`] for these
462 /// parameters. Use to tighten caps before passing parameters into
463 /// `make_decoder` (e.g. when processing untrusted uploads on a
464 /// shared server).
465 ///
466 /// ```
467 /// # use oxideav_core::{CodecId, CodecParameters, DecoderLimits};
468 /// let limits = DecoderLimits::default()
469 /// .with_max_pixels_per_frame(4096 * 4096)
470 /// .with_max_arenas_in_flight(2);
471 /// let p = CodecParameters::video(CodecId::new("h263")).with_limits(limits);
472 /// assert_eq!(p.limits().max_pixels_per_frame, 4096 * 4096);
473 /// ```
474 pub fn with_limits(mut self, limits: DecoderLimits) -> Self {
475 self.limits = limits;
476 self
477 }
478}
479
480/// Description of a single stream inside a container.
481#[derive(Clone, Debug)]
482pub struct StreamInfo {
483 pub index: u32,
484 pub time_base: TimeBase,
485 pub duration: Option<i64>,
486 pub start_time: Option<i64>,
487 pub params: CodecParameters,
488}
489
490#[cfg(test)]
491mod codec_tag_tests {
492 use super::*;
493
494 #[test]
495 fn fourcc_uppercases_on_construction() {
496 let t = CodecTag::fourcc(b"div3");
497 assert_eq!(t, CodecTag::Fourcc(*b"DIV3"));
498 // Non-alphabetic bytes preserved unchanged.
499 let t2 = CodecTag::fourcc(b"MP42");
500 assert_eq!(t2, CodecTag::Fourcc(*b"MP42"));
501 let t3 = CodecTag::fourcc(&[0xFF, b'a', 0x00, b'1']);
502 assert_eq!(t3, CodecTag::Fourcc([0xFF, b'A', 0x00, b'1']));
503 }
504
505 #[test]
506 fn fourcc_equality_case_insensitive_via_ctor() {
507 assert_eq!(CodecTag::fourcc(b"xvid"), CodecTag::fourcc(b"XVID"));
508 assert_eq!(CodecTag::fourcc(b"DiV3"), CodecTag::fourcc(b"div3"));
509 }
510
511 #[test]
512 fn display_printable_fourcc() {
513 assert_eq!(CodecTag::fourcc(b"XVID").to_string(), "fourcc(XVID)");
514 }
515
516 #[test]
517 fn display_non_printable_fourcc_as_hex() {
518 let t = CodecTag::Fourcc([0x00, 0x00, 0x00, 0x01]);
519 assert_eq!(t.to_string(), "fourcc(0x00000001)");
520 }
521
522 #[test]
523 fn display_wave_format() {
524 assert_eq!(
525 CodecTag::wave_format(0x0055).to_string(),
526 "wFormatTag(0x0055)"
527 );
528 }
529
530 #[test]
531 fn display_mp4_oti() {
532 assert_eq!(CodecTag::mp4_object_type(0x40).to_string(), "mp4_oti(0x40)");
533 }
534
535 #[test]
536 fn display_matroska() {
537 assert_eq!(
538 CodecTag::matroska("V_MPEG4/ISO/AVC").to_string(),
539 "matroska(V_MPEG4/ISO/AVC)",
540 );
541 }
542
543 #[test]
544 fn null_resolver_resolves_nothing() {
545 let r = NullCodecResolver;
546 let xvid = CodecTag::fourcc(b"XVID");
547 assert!(r.resolve_tag(&ProbeContext::new(&xvid)).is_none());
548 let wf = CodecTag::wave_format(0x0055);
549 assert!(r.resolve_tag(&ProbeContext::new(&wf)).is_none());
550 }
551
552 #[test]
553 fn probe_context_builder_fills_hints() {
554 let tag = CodecTag::wave_format(0x0001);
555 let ctx = ProbeContext::new(&tag)
556 .bits(24)
557 .channels(2)
558 .sample_rate(48_000)
559 .header(&[1, 2, 3])
560 .packet(&[4, 5]);
561 assert_eq!(ctx.bits_per_sample, Some(24));
562 assert_eq!(ctx.channels, Some(2));
563 assert_eq!(ctx.sample_rate, Some(48_000));
564 assert_eq!(ctx.header.unwrap(), &[1, 2, 3]);
565 assert_eq!(ctx.packet.unwrap(), &[4, 5]);
566 }
567}
568
569#[cfg(test)]
570mod channel_layout_plumbing_tests {
571 use super::*;
572
573 #[test]
574 fn audio_params_default_to_no_layout() {
575 let p = CodecParameters::audio(CodecId::new("pcm_s16le"));
576 assert!(p.channel_layout.is_none());
577 assert!(p.channels.is_none());
578 assert!(p.resolved_layout().is_none());
579 assert!(p.resolved_channels().is_none());
580 }
581
582 #[test]
583 fn channels_only_infers_layout_via_from_count() {
584 let p = CodecParameters::audio(CodecId::new("pcm_s16le")).channels(6);
585 assert_eq!(p.channels, Some(6));
586 assert!(p.channel_layout.is_none());
587 assert_eq!(p.resolved_layout(), Some(ChannelLayout::Surround51));
588 assert_eq!(p.resolved_channels(), Some(6));
589 }
590
591 #[test]
592 fn explicit_layout_wins_over_count() {
593 let p = CodecParameters::audio(CodecId::new("ac3"))
594 .channels(6)
595 .channel_layout(ChannelLayout::Surround60);
596 // 6ch by-count would default to Surround51, but the explicit
597 // layout overrides.
598 assert_eq!(p.resolved_layout(), Some(ChannelLayout::Surround60));
599 assert_eq!(p.resolved_channels(), Some(6));
600 }
601
602 #[test]
603 fn layout_only_yields_count_via_resolved_channels() {
604 let p =
605 CodecParameters::audio(CodecId::new("ac3")).channel_layout(ChannelLayout::Surround71);
606 assert!(p.channels.is_none());
607 assert_eq!(p.resolved_channels(), Some(8));
608 assert_eq!(p.resolved_layout(), Some(ChannelLayout::Surround71));
609 }
610}