oxideav_core/stream.rs
1//! Stream metadata shared between containers and codecs.
2
3use crate::format::{MediaType, PixelFormat, SampleFormat};
4use crate::options::CodecOptions;
5use crate::rational::Rational;
6use crate::time::TimeBase;
7
8/// A stable identifier for a codec. Codec crates register a `CodecId` so the
9/// codec registry can look them up by name.
10#[derive(Clone, Debug, PartialEq, Eq, Hash)]
11pub struct CodecId(pub String);
12
13impl CodecId {
14 pub fn new(s: impl Into<String>) -> Self {
15 Self(s.into())
16 }
17
18 pub fn as_str(&self) -> &str {
19 &self.0
20 }
21}
22
23impl From<&str> for CodecId {
24 fn from(s: &str) -> Self {
25 Self(s.to_owned())
26 }
27}
28
29impl std::fmt::Display for CodecId {
30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31 write!(f, "{}", self.0)
32 }
33}
34
35/// A codec identifier scoped to a container format — the thing a
36/// demuxer reads out of the file to name a codec. Resolved to a
37/// [`CodecId`] by the codec registry.
38///
39/// Centralising these in the registry (instead of each container
40/// hand-rolling its own FourCC → CodecId table) lets:
41///
42/// * a codec crate declare its own tag claims in `register()`, keeping
43/// ownership co-located with the decoder;
44/// * multiple codecs claim the same tag with priority ordering;
45/// * optional per-claim probes disambiguate the tag-collision cases
46/// that happen everywhere in the wild (DIV3 that's actually MPEG-4
47/// Part 2, XVID that's actually MS-MPEG4v3, audio wFormatTag=0x0055
48/// that could be MP3 or — very rarely — something else, etc.).
49#[derive(Clone, Debug, PartialEq, Eq, Hash)]
50pub enum CodecTag {
51 /// Four-character code used by AVI's `bmih.biCompression`, MP4 /
52 /// QuickTime sample-entry type, Matroska V_/A_ tags built around
53 /// FourCC, and many others. Always stored with alphabetic bytes
54 /// upper-cased so lookups are case-insensitive; non-alphabetic
55 /// bytes are preserved as-is.
56 Fourcc([u8; 4]),
57
58 /// AVI / WAV `WAVEFORMATEX::wFormatTag` (e.g. 0x0001 = PCM,
59 /// 0x0055 = MP3, 0x00FF = "raw" AAC, 0x1610 = AAC ADTS).
60 WaveFormat(u16),
61
62 /// MP4 ObjectTypeIndication (ISO/IEC 14496-1 Table 5 / the values
63 /// in an MP4 `esds` `DecoderConfigDescriptor`). e.g. 0x40 = MPEG-4
64 /// AAC, 0x20 = MPEG-4 Visual, 0x69 = MP3.
65 Mp4ObjectType(u8),
66
67 /// Matroska `CodecID` element (full string, e.g.
68 /// `"V_MPEG4/ISO/AVC"`, `"A_AAC"`, `"A_VORBIS"`).
69 Matroska(String),
70}
71
72impl CodecTag {
73 /// Build a FourCC tag, upper-casing alphabetic bytes.
74 pub fn fourcc(raw: &[u8; 4]) -> Self {
75 let mut out = [0u8; 4];
76 for i in 0..4 {
77 out[i] = raw[i].to_ascii_uppercase();
78 }
79 Self::Fourcc(out)
80 }
81
82 pub fn wave_format(tag: u16) -> Self {
83 Self::WaveFormat(tag)
84 }
85
86 pub fn mp4_object_type(oti: u8) -> Self {
87 Self::Mp4ObjectType(oti)
88 }
89
90 pub fn matroska(id: impl Into<String>) -> Self {
91 Self::Matroska(id.into())
92 }
93}
94
95impl std::fmt::Display for CodecTag {
96 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97 match self {
98 Self::Fourcc(fcc) => {
99 // Print as bytes when ASCII-printable, else as hex.
100 if fcc.iter().all(|b| b.is_ascii_graphic() || *b == b' ') {
101 write!(f, "fourcc({})", std::str::from_utf8(fcc).unwrap_or("????"))
102 } else {
103 write!(
104 f,
105 "fourcc(0x{:02X}{:02X}{:02X}{:02X})",
106 fcc[0], fcc[1], fcc[2], fcc[3]
107 )
108 }
109 }
110 Self::WaveFormat(t) => write!(f, "wFormatTag(0x{t:04X})"),
111 Self::Mp4ObjectType(o) => write!(f, "mp4_oti(0x{o:02X})"),
112 Self::Matroska(s) => write!(f, "matroska({s})"),
113 }
114 }
115}
116
117/// Context passed to a codec's probe function during tag resolution.
118///
119/// Built by the demuxer from whatever it has already parsed (stream
120/// format block, a peek at the first packet, numeric hints like
121/// `bits_per_sample`). Probes read fields directly; the struct is
122/// `#[non_exhaustive]` so additional hints can be added later without
123/// breaking codec crates that match on it.
124///
125/// The canonical construction pattern, for a demuxer:
126///
127/// ```
128/// # use oxideav_core::{CodecTag, ProbeContext};
129/// let tag = CodecTag::wave_format(0x0001);
130/// let ctx = ProbeContext::new(&tag)
131/// .bits(24)
132/// .channels(2)
133/// .sample_rate(48_000);
134/// # let _ = ctx;
135/// ```
136///
137/// Codec authors read fields like `ctx.bits_per_sample` / `ctx.tag`
138/// directly — `#[non_exhaustive]` forbids struct-literal construction
139/// from outside this crate but does not restrict field access.
140#[non_exhaustive]
141#[derive(Clone, Debug)]
142pub struct ProbeContext<'a> {
143 /// The tag being resolved — always set.
144 pub tag: &'a CodecTag,
145 /// Raw container-level stream-format blob if available
146 /// (e.g. WAVEFORMATEX, BITMAPINFOHEADER, MP4 sample-entry bytes,
147 /// Matroska `CodecPrivate`). Format is container-specific.
148 pub header: Option<&'a [u8]>,
149 /// First packet bytes if the demuxer has already read one.
150 /// Most demuxers resolve tags at stream-discovery time before any
151 /// packet exists; this is `None` in that case.
152 pub packet: Option<&'a [u8]>,
153 /// Audio: bits per sample (from WAVEFORMATEX, MP4 sample entry,
154 /// Matroska `BitDepth`, etc.).
155 pub bits_per_sample: Option<u16>,
156 pub channels: Option<u16>,
157 pub sample_rate: Option<u32>,
158 pub width: Option<u32>,
159 pub height: Option<u32>,
160}
161
162impl<'a> ProbeContext<'a> {
163 /// Start building a context for `tag` with every hint field empty.
164 pub fn new(tag: &'a CodecTag) -> Self {
165 Self {
166 tag,
167 header: None,
168 packet: None,
169 bits_per_sample: None,
170 channels: None,
171 sample_rate: None,
172 width: None,
173 height: None,
174 }
175 }
176
177 pub fn header(mut self, h: &'a [u8]) -> Self {
178 self.header = Some(h);
179 self
180 }
181
182 pub fn packet(mut self, p: &'a [u8]) -> Self {
183 self.packet = Some(p);
184 self
185 }
186
187 pub fn bits(mut self, n: u16) -> Self {
188 self.bits_per_sample = Some(n);
189 self
190 }
191
192 pub fn channels(mut self, n: u16) -> Self {
193 self.channels = Some(n);
194 self
195 }
196
197 pub fn sample_rate(mut self, n: u32) -> Self {
198 self.sample_rate = Some(n);
199 self
200 }
201
202 pub fn width(mut self, n: u32) -> Self {
203 self.width = Some(n);
204 self
205 }
206
207 pub fn height(mut self, n: u32) -> Self {
208 self.height = Some(n);
209 self
210 }
211}
212
213/// Confidence value returned by a probe. `1.0` means "certainly me",
214/// `0.0` means "not me", values in between mean "partial evidence — if
215/// no higher-confidence claim exists, this should win". The registry
216/// picks the claim with the highest returned confidence and skips any
217/// that return `0.0`.
218pub type Confidence = f32;
219
220/// A probe function a codec attaches to its registration to
221/// disambiguate tag collisions. Called once per candidate
222/// registration during `resolve_tag`.
223pub type ProbeFn = fn(&ProbeContext) -> Confidence;
224
225/// Resolve a [`CodecTag`] (FourCC / WAVEFORMATEX / Matroska id / …) to a
226/// [`CodecId`]. The [`oxideav-codec`](https://crates.io/crates/oxideav-codec)
227/// registry implements this, but defining the trait here lets
228/// containers consume tag resolution via `&dyn CodecResolver` without
229/// pulling in the codec crate as a direct dependency.
230pub trait CodecResolver: Sync {
231 /// Resolve the tag in `ctx.tag` to a codec id. Implementations walk
232 /// every registration whose tag set contains the tag, call each
233 /// probe (treating `None` as "always 1.0"), and return the id with
234 /// the highest resulting confidence. Ties are broken by
235 /// registration order.
236 fn resolve_tag(&self, ctx: &ProbeContext) -> Option<CodecId>;
237}
238
239/// Null resolver that resolves nothing — useful as a default when a
240/// caller doesn't have a real registry handy (e.g. unit tests, or
241/// legacy callers of the tag-free `open()` APIs).
242#[derive(Default, Clone, Copy)]
243pub struct NullCodecResolver;
244
245impl CodecResolver for NullCodecResolver {
246 fn resolve_tag(&self, _ctx: &ProbeContext) -> Option<CodecId> {
247 None
248 }
249}
250
251/// Codec-level parameters shared between demuxer/muxer and en/decoder.
252///
253/// **Marked `#[non_exhaustive]`** — construction via struct-literal
254/// syntax is not supported. Use the [`audio`](Self::audio) /
255/// [`video`](Self::video) constructors (or functional-update
256/// `CodecParameters { ..base }` syntax) so new fields can be added
257/// without another semver break.
258#[derive(Clone, Debug)]
259#[non_exhaustive]
260pub struct CodecParameters {
261 pub codec_id: CodecId,
262 pub media_type: MediaType,
263
264 // Audio-specific
265 pub sample_rate: Option<u32>,
266 pub channels: Option<u16>,
267 pub sample_format: Option<SampleFormat>,
268
269 // Video-specific
270 pub width: Option<u32>,
271 pub height: Option<u32>,
272 pub pixel_format: Option<PixelFormat>,
273 pub frame_rate: Option<Rational>,
274
275 /// Per-codec setup bytes (e.g., SPS/PPS, OpusHead). Format defined by codec.
276 pub extradata: Vec<u8>,
277
278 pub bit_rate: Option<u64>,
279
280 /// Codec-specific tuning knobs (e.g. `{"interlace": "true"}` for PNG's
281 /// Adam7 encode, `{"crf": "23"}` for h264). Empty by default. The shape
282 /// is declared by each codec's options struct — see
283 /// [`crate::options`]. Parsed once at encoder/decoder construction;
284 /// the hot path never touches this.
285 pub options: CodecOptions,
286}
287
288impl CodecParameters {
289 pub fn audio(codec_id: CodecId) -> Self {
290 Self {
291 codec_id,
292 media_type: MediaType::Audio,
293 sample_rate: None,
294 channels: None,
295 sample_format: None,
296 width: None,
297 height: None,
298 pixel_format: None,
299 frame_rate: None,
300 extradata: Vec::new(),
301 bit_rate: None,
302 options: CodecOptions::default(),
303 }
304 }
305
306 /// True when `self` and `other` have the same codec_id and core
307 /// format parameters (sample_rate/channels/sample_format for audio,
308 /// width/height/pixel_format for video). Extradata and bitrate
309 /// differences are tolerated — many containers rewrite extradata
310 /// losslessly during a copy operation.
311 pub fn matches_core(&self, other: &CodecParameters) -> bool {
312 self.codec_id == other.codec_id
313 && self.sample_rate == other.sample_rate
314 && self.channels == other.channels
315 && self.sample_format == other.sample_format
316 && self.width == other.width
317 && self.height == other.height
318 && self.pixel_format == other.pixel_format
319 }
320
321 pub fn video(codec_id: CodecId) -> Self {
322 Self {
323 codec_id,
324 media_type: MediaType::Video,
325 sample_rate: None,
326 channels: None,
327 sample_format: None,
328 width: None,
329 height: None,
330 pixel_format: None,
331 frame_rate: None,
332 extradata: Vec::new(),
333 bit_rate: None,
334 options: CodecOptions::default(),
335 }
336 }
337}
338
339/// Description of a single stream inside a container.
340#[derive(Clone, Debug)]
341pub struct StreamInfo {
342 pub index: u32,
343 pub time_base: TimeBase,
344 pub duration: Option<i64>,
345 pub start_time: Option<i64>,
346 pub params: CodecParameters,
347}
348
349#[cfg(test)]
350mod codec_tag_tests {
351 use super::*;
352
353 #[test]
354 fn fourcc_uppercases_on_construction() {
355 let t = CodecTag::fourcc(b"div3");
356 assert_eq!(t, CodecTag::Fourcc(*b"DIV3"));
357 // Non-alphabetic bytes preserved unchanged.
358 let t2 = CodecTag::fourcc(b"MP42");
359 assert_eq!(t2, CodecTag::Fourcc(*b"MP42"));
360 let t3 = CodecTag::fourcc(&[0xFF, b'a', 0x00, b'1']);
361 assert_eq!(t3, CodecTag::Fourcc([0xFF, b'A', 0x00, b'1']));
362 }
363
364 #[test]
365 fn fourcc_equality_case_insensitive_via_ctor() {
366 assert_eq!(CodecTag::fourcc(b"xvid"), CodecTag::fourcc(b"XVID"));
367 assert_eq!(CodecTag::fourcc(b"DiV3"), CodecTag::fourcc(b"div3"));
368 }
369
370 #[test]
371 fn display_printable_fourcc() {
372 assert_eq!(CodecTag::fourcc(b"XVID").to_string(), "fourcc(XVID)");
373 }
374
375 #[test]
376 fn display_non_printable_fourcc_as_hex() {
377 let t = CodecTag::Fourcc([0x00, 0x00, 0x00, 0x01]);
378 assert_eq!(t.to_string(), "fourcc(0x00000001)");
379 }
380
381 #[test]
382 fn display_wave_format() {
383 assert_eq!(
384 CodecTag::wave_format(0x0055).to_string(),
385 "wFormatTag(0x0055)"
386 );
387 }
388
389 #[test]
390 fn display_mp4_oti() {
391 assert_eq!(CodecTag::mp4_object_type(0x40).to_string(), "mp4_oti(0x40)");
392 }
393
394 #[test]
395 fn display_matroska() {
396 assert_eq!(
397 CodecTag::matroska("V_MPEG4/ISO/AVC").to_string(),
398 "matroska(V_MPEG4/ISO/AVC)",
399 );
400 }
401
402 #[test]
403 fn null_resolver_resolves_nothing() {
404 let r = NullCodecResolver;
405 let xvid = CodecTag::fourcc(b"XVID");
406 assert!(r.resolve_tag(&ProbeContext::new(&xvid)).is_none());
407 let wf = CodecTag::wave_format(0x0055);
408 assert!(r.resolve_tag(&ProbeContext::new(&wf)).is_none());
409 }
410
411 #[test]
412 fn probe_context_builder_fills_hints() {
413 let tag = CodecTag::wave_format(0x0001);
414 let ctx = ProbeContext::new(&tag)
415 .bits(24)
416 .channels(2)
417 .sample_rate(48_000)
418 .header(&[1, 2, 3])
419 .packet(&[4, 5]);
420 assert_eq!(ctx.bits_per_sample, Some(24));
421 assert_eq!(ctx.channels, Some(2));
422 assert_eq!(ctx.sample_rate, Some(48_000));
423 assert_eq!(ctx.header.unwrap(), &[1, 2, 3]);
424 assert_eq!(ctx.packet.unwrap(), &[4, 5]);
425 }
426}