Skip to main content

oxideav_codec/
registry.rs

1//! In-process codec registry.
2//!
3//! Every codec crate declares itself with one [`CodecInfo`] value —
4//! capabilities, factory functions, the container tags it claims, and
5//! (optionally) a probe function used to disambiguate genuine tag
6//! collisions. The registry stores those registrations and exposes
7//! three orthogonal lookups:
8//!
9//! - **id-keyed** — `make_decoder(params)` / `make_encoder(params)` walk
10//!   the implementations registered under `params.codec_id`, filter by
11//!   capability restrictions, and try them in priority order with init-
12//!   time fallback.
13//! - **tag-keyed** — `resolve_tag(&ProbeContext)` walks every
14//!   registration whose `tags` contains `ctx.tag`, calls each probe
15//!   (treating `None` as "returns 1.0"), and returns the id with the
16//!   highest resulting confidence. First-registered wins on ties.
17//! - **diagnostic** — `all_implementations`, `all_tag_registrations`.
18//!
19//! The tag path explicitly DOES NOT short-circuit on "first claim with
20//! no probe" — every claimant is asked, so a lower-priority probed
21//! claim can out-rank a higher-priority unprobed one when the content
22//! is actually ambiguous (DIV3 XVID-with-real-MSMPEG4 payload etc.).
23
24use std::collections::HashMap;
25
26use oxideav_core::{
27    CodecCapabilities, CodecId, CodecParameters, CodecPreferences, CodecResolver, CodecTag, Error,
28    ProbeContext, ProbeFn, Result,
29};
30
31use crate::{Decoder, DecoderFactory, Encoder, EncoderFactory};
32
33/// A single registration: capabilities, decoder/encoder factories,
34/// optional probe, and the container tags this codec claims.
35///
36/// Codec crates build one of these per codec id inside their
37/// `register(reg)` function and hand it to
38/// [`CodecRegistry::register`]. The struct is `#[non_exhaustive]` so
39/// additional fields can be added without breaking existing codec
40/// crates — construction is only possible through
41/// [`CodecInfo::new`] plus the builder methods below.
42#[non_exhaustive]
43pub struct CodecInfo {
44    pub id: CodecId,
45    pub capabilities: CodecCapabilities,
46    pub decoder_factory: Option<DecoderFactory>,
47    pub encoder_factory: Option<EncoderFactory>,
48    /// Probe function that returns a confidence in `0.0..=1.0` for a
49    /// given [`ProbeContext`]. `None` means "confidence 1.0 for every
50    /// claimed tag" — the correct default for codecs whose tag claims
51    /// are unambiguous.
52    pub probe: Option<ProbeFn>,
53    /// Tags this codec is willing to be looked up under. One codec may
54    /// claim many tags (an AAC decoder covers several WaveFormat ids,
55    /// a FourCC, an MP4 OTI, and a Matroska CodecID string at once).
56    pub tags: Vec<CodecTag>,
57}
58
59impl CodecInfo {
60    /// Start a new registration for `id` with empty capabilities, no
61    /// factories, no probe, and no tags. Chain the builder methods
62    /// below to fill it in, then hand the result to
63    /// [`CodecRegistry::register`].
64    pub fn new(id: CodecId) -> Self {
65        Self {
66            capabilities: CodecCapabilities::audio(id.as_str()),
67            id,
68            decoder_factory: None,
69            encoder_factory: None,
70            probe: None,
71            tags: Vec::new(),
72        }
73    }
74
75    /// Replace the capability description. The default built by
76    /// [`Self::new`] is a placeholder (audio-flavoured, no flags); every
77    /// real registration should call this.
78    pub fn capabilities(mut self, caps: CodecCapabilities) -> Self {
79        self.capabilities = caps;
80        self
81    }
82
83    pub fn decoder(mut self, factory: DecoderFactory) -> Self {
84        self.decoder_factory = Some(factory);
85        self
86    }
87
88    pub fn encoder(mut self, factory: EncoderFactory) -> Self {
89        self.encoder_factory = Some(factory);
90        self
91    }
92
93    pub fn probe(mut self, probe: ProbeFn) -> Self {
94        self.probe = Some(probe);
95        self
96    }
97
98    /// Claim a single container tag for this codec. Equivalent to
99    /// `.tags([tag])` but avoids the array ceremony for single-tag
100    /// claims.
101    pub fn tag(mut self, tag: CodecTag) -> Self {
102        self.tags.push(tag);
103        self
104    }
105
106    /// Claim a set of container tags for this codec. Takes any
107    /// iterable (arrays, `Vec`, `Option`, …) so the common case of a
108    /// codec with 3-6 tags reads as one clean block.
109    pub fn tags(mut self, tags: impl IntoIterator<Item = CodecTag>) -> Self {
110        self.tags.extend(tags);
111        self
112    }
113}
114
115/// Internal per-impl record held inside the registry's id map. Kept
116/// distinct from [`CodecInfo`] so the id map stays cheap to walk
117/// during `make_decoder` / `make_encoder` lookups.
118#[derive(Clone)]
119pub struct CodecImplementation {
120    pub caps: CodecCapabilities,
121    pub make_decoder: Option<DecoderFactory>,
122    pub make_encoder: Option<EncoderFactory>,
123}
124
125#[derive(Default)]
126pub struct CodecRegistry {
127    /// id → list of implementations. Each registered codec appends one
128    /// entry here. `make_decoder` / `make_encoder` walk this list in
129    /// preference order.
130    impls: HashMap<CodecId, Vec<CodecImplementation>>,
131    /// Append-only list of every registration — the `tag_index` stores
132    /// offsets into this vector.
133    registrations: Vec<RegistrationRecord>,
134    /// Tag → indices into `registrations`. Indices are stored in
135    /// registration order so tie-breaking in `resolve_tag` is
136    /// deterministic (first-registered wins).
137    tag_index: HashMap<CodecTag, Vec<usize>>,
138}
139
140/// Internal registry record. Mirrors the subset of [`CodecInfo`]
141/// needed at resolve time.
142struct RegistrationRecord {
143    id: CodecId,
144    probe: Option<ProbeFn>,
145}
146
147impl CodecRegistry {
148    pub fn new() -> Self {
149        Self::default()
150    }
151
152    /// Register one codec. Expands into:
153    ///   * an entry in the id → implementations map (for
154    ///     `make_decoder` / `make_encoder`);
155    ///   * an entry in the tag index for every claimed tag (for
156    ///     `resolve_tag`).
157    ///
158    /// Calling `register` multiple times with the same id is allowed
159    /// and how multi-implementation codecs (software-plus-hardware
160    /// FLAC, for example) are expressed.
161    pub fn register(&mut self, info: CodecInfo) {
162        let CodecInfo {
163            id,
164            capabilities,
165            decoder_factory,
166            encoder_factory,
167            probe,
168            tags,
169        } = info;
170
171        let caps = {
172            let mut c = capabilities;
173            if decoder_factory.is_some() {
174                c = c.with_decode();
175            }
176            if encoder_factory.is_some() {
177                c = c.with_encode();
178            }
179            c
180        };
181
182        // Only record an implementation entry when at least one factory
183        // is present. A "tag-only" CodecInfo — used to attach extra tag
184        // claims to a codec that was already registered with factories —
185        // shouldn't pollute the impl list.
186        if decoder_factory.is_some() || encoder_factory.is_some() {
187            self.impls
188                .entry(id.clone())
189                .or_default()
190                .push(CodecImplementation {
191                    caps,
192                    make_decoder: decoder_factory,
193                    make_encoder: encoder_factory,
194                });
195        }
196
197        let record_idx = self.registrations.len();
198        self.registrations.push(RegistrationRecord {
199            id: id.clone(),
200            probe,
201        });
202        for tag in tags {
203            self.tag_index.entry(tag).or_default().push(record_idx);
204        }
205    }
206
207    pub fn has_decoder(&self, id: &CodecId) -> bool {
208        self.impls
209            .get(id)
210            .map(|v| v.iter().any(|i| i.make_decoder.is_some()))
211            .unwrap_or(false)
212    }
213
214    pub fn has_encoder(&self, id: &CodecId) -> bool {
215        self.impls
216            .get(id)
217            .map(|v| v.iter().any(|i| i.make_encoder.is_some()))
218            .unwrap_or(false)
219    }
220
221    /// Build a decoder for `params`. Walks all implementations matching the
222    /// codec id in increasing priority order, skipping any excluded by the
223    /// caller's preferences. Init-time fallback: if a higher-priority impl's
224    /// constructor returns an error, the next candidate is tried.
225    pub fn make_decoder_with(
226        &self,
227        params: &CodecParameters,
228        prefs: &CodecPreferences,
229    ) -> Result<Box<dyn Decoder>> {
230        let candidates = self
231            .impls
232            .get(&params.codec_id)
233            .ok_or_else(|| Error::CodecNotFound(params.codec_id.to_string()))?;
234        let mut ranked: Vec<&CodecImplementation> = candidates
235            .iter()
236            .filter(|i| i.make_decoder.is_some() && !prefs.excludes(&i.caps))
237            .filter(|i| caps_fit_params(&i.caps, params, false))
238            .collect();
239        ranked.sort_by_key(|i| prefs.effective_priority(&i.caps));
240        let mut last_err: Option<Error> = None;
241        for imp in ranked {
242            match (imp.make_decoder.unwrap())(params) {
243                Ok(d) => return Ok(d),
244                Err(e) => last_err = Some(e),
245            }
246        }
247        Err(last_err.unwrap_or_else(|| {
248            Error::CodecNotFound(format!(
249                "no decoder for {} accepts the requested parameters",
250                params.codec_id
251            ))
252        }))
253    }
254
255    /// Build an encoder, with the same priority + fallback semantics.
256    pub fn make_encoder_with(
257        &self,
258        params: &CodecParameters,
259        prefs: &CodecPreferences,
260    ) -> Result<Box<dyn Encoder>> {
261        let candidates = self
262            .impls
263            .get(&params.codec_id)
264            .ok_or_else(|| Error::CodecNotFound(params.codec_id.to_string()))?;
265        let mut ranked: Vec<&CodecImplementation> = candidates
266            .iter()
267            .filter(|i| i.make_encoder.is_some() && !prefs.excludes(&i.caps))
268            .filter(|i| caps_fit_params(&i.caps, params, true))
269            .collect();
270        ranked.sort_by_key(|i| prefs.effective_priority(&i.caps));
271        let mut last_err: Option<Error> = None;
272        for imp in ranked {
273            match (imp.make_encoder.unwrap())(params) {
274                Ok(e) => return Ok(e),
275                Err(e) => last_err = Some(e),
276            }
277        }
278        Err(last_err.unwrap_or_else(|| {
279            Error::CodecNotFound(format!(
280                "no encoder for {} accepts the requested parameters",
281                params.codec_id
282            ))
283        }))
284    }
285
286    /// Default-preference shorthand for `make_decoder_with`.
287    pub fn make_decoder(&self, params: &CodecParameters) -> Result<Box<dyn Decoder>> {
288        self.make_decoder_with(params, &CodecPreferences::default())
289    }
290
291    /// Default-preference shorthand for `make_encoder_with`.
292    pub fn make_encoder(&self, params: &CodecParameters) -> Result<Box<dyn Encoder>> {
293        self.make_encoder_with(params, &CodecPreferences::default())
294    }
295
296    /// Iterate codec ids that have at least one decoder implementation.
297    pub fn decoder_ids(&self) -> impl Iterator<Item = &CodecId> {
298        self.impls
299            .iter()
300            .filter(|(_, v)| v.iter().any(|i| i.make_decoder.is_some()))
301            .map(|(id, _)| id)
302    }
303
304    pub fn encoder_ids(&self) -> impl Iterator<Item = &CodecId> {
305        self.impls
306            .iter()
307            .filter(|(_, v)| v.iter().any(|i| i.make_encoder.is_some()))
308            .map(|(id, _)| id)
309    }
310
311    /// All registered implementations of a given codec id.
312    pub fn implementations(&self, id: &CodecId) -> &[CodecImplementation] {
313        self.impls.get(id).map(|v| v.as_slice()).unwrap_or(&[])
314    }
315
316    /// Iterator over every (codec_id, impl) pair — useful for `oxideav list`
317    /// to show capability flags per implementation.
318    pub fn all_implementations(&self) -> impl Iterator<Item = (&CodecId, &CodecImplementation)> {
319        self.impls
320            .iter()
321            .flat_map(|(id, v)| v.iter().map(move |i| (id, i)))
322    }
323
324    /// Iterator over every `(tag, codec_id)` pair currently registered —
325    /// used by `oxideav tags` debug output and by tests that want to
326    /// walk the tag surface.
327    pub fn all_tag_registrations(&self) -> impl Iterator<Item = (&CodecTag, &CodecId)> {
328        self.tag_index.iter().flat_map(move |(tag, idxs)| {
329            idxs.iter().map(move |&i| (tag, &self.registrations[i].id))
330        })
331    }
332
333    /// Inherent form of tag resolution that returns a reference.
334    /// The owned-value form used by container code lives behind the
335    /// [`CodecResolver`] trait impl below.
336    ///
337    /// Walks every registration that claimed `ctx.tag`, calls its
338    /// probe with `ctx`, and returns the id of the registration that
339    /// scored highest. Probes that return `0.0` are discarded; ties
340    /// on confidence are broken by registration order (first wins).
341    /// Registrations with no probe are treated as returning `1.0`.
342    pub fn resolve_tag_ref(&self, ctx: &ProbeContext) -> Option<&CodecId> {
343        let idxs = self.tag_index.get(ctx.tag)?;
344        let mut best: Option<(f32, usize)> = None;
345        for &i in idxs {
346            let rec = &self.registrations[i];
347            let conf = match rec.probe {
348                Some(f) => f(ctx),
349                None => 1.0,
350            };
351            if conf <= 0.0 {
352                continue;
353            }
354            best = match best {
355                None => Some((conf, i)),
356                Some((bc, _)) if conf > bc => Some((conf, i)),
357                other => other,
358            };
359        }
360        best.map(|(_, i)| &self.registrations[i].id)
361    }
362}
363
364/// Implement the shared [`CodecResolver`] interface so container
365/// demuxers can accept `&dyn CodecResolver` without depending on
366/// this crate directly — the trait lives in oxideav-core.
367impl CodecResolver for CodecRegistry {
368    fn resolve_tag(&self, ctx: &ProbeContext) -> Option<CodecId> {
369        self.resolve_tag_ref(ctx).cloned()
370    }
371}
372
373/// Check whether an implementation's restrictions are compatible with the
374/// requested codec parameters. `for_encode` swaps the rare cases where a
375/// restriction only applies one way.
376fn caps_fit_params(caps: &CodecCapabilities, p: &CodecParameters, for_encode: bool) -> bool {
377    let _ = for_encode; // reserved for future use (e.g. encode-only bitrate caps)
378    if let (Some(max), Some(w)) = (caps.max_width, p.width) {
379        if w > max {
380            return false;
381        }
382    }
383    if let (Some(max), Some(h)) = (caps.max_height, p.height) {
384        if h > max {
385            return false;
386        }
387    }
388    if let (Some(max), Some(br)) = (caps.max_bitrate, p.bit_rate) {
389        if br > max {
390            return false;
391        }
392    }
393    if let (Some(max), Some(sr)) = (caps.max_sample_rate, p.sample_rate) {
394        if sr > max {
395            return false;
396        }
397    }
398    if let (Some(max), Some(ch)) = (caps.max_channels, p.channels) {
399        if ch > max {
400            return false;
401        }
402    }
403    true
404}
405
406#[cfg(test)]
407mod tag_tests {
408    use super::*;
409    use oxideav_core::CodecCapabilities;
410
411    /// Probe: return 1.0 iff the peeked bytes look like MS-MPEG4 (no
412    /// 0x000001 start code in the first few bytes).
413    fn probe_msmpeg4(ctx: &ProbeContext) -> f32 {
414        match ctx.packet {
415            Some(d) if !d.windows(3).take(6).any(|w| w == [0x00, 0x00, 0x01]) => 1.0,
416            Some(_) => 0.0,
417            None => 0.5, // no data yet — weak evidence
418        }
419    }
420
421    /// Probe: return 1.0 iff the peeked bytes look like MPEG-4 Part 2
422    /// (starts with a 0x000001 start code in the first few bytes).
423    fn probe_mpeg4_part2(ctx: &ProbeContext) -> f32 {
424        match ctx.packet {
425            Some(d) if d.windows(3).take(6).any(|w| w == [0x00, 0x00, 0x01]) => 1.0,
426            Some(_) => 0.0,
427            None => 0.5,
428        }
429    }
430
431    fn info(id: &str) -> CodecInfo {
432        CodecInfo::new(CodecId::new(id)).capabilities(CodecCapabilities::audio(id))
433    }
434
435    #[test]
436    fn resolve_single_claim_no_probe() {
437        let mut reg = CodecRegistry::new();
438        reg.register(info("flac").tag(CodecTag::fourcc(b"FLAC")));
439        let t = CodecTag::fourcc(b"FLAC");
440        assert_eq!(
441            reg.resolve_tag_ref(&ProbeContext::new(&t))
442                .map(|c| c.as_str()),
443            Some("flac"),
444        );
445    }
446
447    #[test]
448    fn resolve_missing_tag_returns_none() {
449        let reg = CodecRegistry::new();
450        let t = CodecTag::fourcc(b"????");
451        assert!(reg.resolve_tag_ref(&ProbeContext::new(&t)).is_none());
452    }
453
454    #[test]
455    fn unprobed_claims_tie_first_registered_wins() {
456        // Two unprobed claims on the same tag: deterministic order.
457        let mut reg = CodecRegistry::new();
458        reg.register(info("first").tag(CodecTag::fourcc(b"TEST")));
459        reg.register(info("second").tag(CodecTag::fourcc(b"TEST")));
460        let t = CodecTag::fourcc(b"TEST");
461        assert_eq!(
462            reg.resolve_tag_ref(&ProbeContext::new(&t))
463                .map(|c| c.as_str()),
464            Some("first"),
465        );
466    }
467
468    #[test]
469    fn probe_picks_matching_bitstream() {
470        // The core bug fix: every probe is asked and the highest
471        // confidence wins regardless of registration order.
472        let mut reg = CodecRegistry::new();
473        reg.register(
474            info("msmpeg4v3")
475                .probe(probe_msmpeg4)
476                .tag(CodecTag::fourcc(b"DIV3")),
477        );
478        reg.register(
479            info("mpeg4video")
480                .probe(probe_mpeg4_part2)
481                .tag(CodecTag::fourcc(b"DIV3")),
482        );
483
484        let mpeg4_part2 = [0x00u8, 0x00, 0x01, 0xB0, 0x01, 0x00];
485        let ms_mpeg4 = [0x85u8, 0x3F, 0xD4, 0x80, 0x00, 0xA2];
486        let tag = CodecTag::fourcc(b"DIV3");
487
488        let ctx_part2 = ProbeContext::new(&tag).packet(&mpeg4_part2);
489        assert_eq!(
490            reg.resolve_tag_ref(&ctx_part2).map(|c| c.as_str()),
491            Some("mpeg4video"),
492        );
493        let ctx_ms = ProbeContext::new(&tag).packet(&ms_mpeg4);
494        assert_eq!(
495            reg.resolve_tag_ref(&ctx_ms).map(|c| c.as_str()),
496            Some("msmpeg4v3"),
497        );
498    }
499
500    #[test]
501    fn unprobed_claim_wins_against_low_confidence_probe() {
502        // One codec claims a tag without a probe (→ confidence 1.0)
503        // and another claims it with a probe returning 0.3. The
504        // unprobed one wins — a codec that knows it owns the tag
505        // outright should not lose to a speculative probe.
506        let mut reg = CodecRegistry::new();
507        reg.register(info("owner").tag(CodecTag::fourcc(b"OWN_")));
508        reg.register(
509            info("speculative")
510                .probe(|_| 0.3)
511                .tag(CodecTag::fourcc(b"OWN_")),
512        );
513        let t = CodecTag::fourcc(b"OWN_");
514        assert_eq!(
515            reg.resolve_tag_ref(&ProbeContext::new(&t))
516                .map(|c| c.as_str()),
517            Some("owner"),
518        );
519    }
520
521    #[test]
522    fn probe_returning_zero_is_skipped() {
523        let mut reg = CodecRegistry::new();
524        reg.register(
525            info("refuses")
526                .probe(|_| 0.0)
527                .tag(CodecTag::fourcc(b"MAYB")),
528        );
529        reg.register(info("fallback").tag(CodecTag::fourcc(b"MAYB")));
530        let t = CodecTag::fourcc(b"MAYB");
531        let ctx = ProbeContext::new(&t).packet(b"hello");
532        assert_eq!(
533            reg.resolve_tag_ref(&ctx).map(|c| c.as_str()),
534            Some("fallback"),
535        );
536    }
537
538    #[test]
539    fn fourcc_case_insensitive_lookup() {
540        let mut reg = CodecRegistry::new();
541        reg.register(info("vid").tag(CodecTag::fourcc(b"div3")));
542        // Registered as "DIV3" (uppercase via ctor); lookup using
543        // lowercase / mixed case also hits.
544        let upper = CodecTag::fourcc(b"DIV3");
545        let lower = CodecTag::fourcc(b"div3");
546        let mixed = CodecTag::fourcc(b"DiV3");
547        assert!(reg.resolve_tag_ref(&ProbeContext::new(&upper)).is_some());
548        assert!(reg.resolve_tag_ref(&ProbeContext::new(&lower)).is_some());
549        assert!(reg.resolve_tag_ref(&ProbeContext::new(&mixed)).is_some());
550    }
551
552    #[test]
553    fn wave_format_and_matroska_tags_work() {
554        let mut reg = CodecRegistry::new();
555        reg.register(info("mp3").tag(CodecTag::wave_format(0x0055)));
556        reg.register(info("h264").tag(CodecTag::matroska("V_MPEG4/ISO/AVC")));
557        let wf = CodecTag::wave_format(0x0055);
558        let mk = CodecTag::matroska("V_MPEG4/ISO/AVC");
559        assert_eq!(
560            reg.resolve_tag_ref(&ProbeContext::new(&wf))
561                .map(|c| c.as_str()),
562            Some("mp3"),
563        );
564        assert_eq!(
565            reg.resolve_tag_ref(&ProbeContext::new(&mk))
566                .map(|c| c.as_str()),
567            Some("h264"),
568        );
569    }
570
571    #[test]
572    fn mp4_object_type_tag_works() {
573        let mut reg = CodecRegistry::new();
574        reg.register(info("aac").tag(CodecTag::mp4_object_type(0x40)));
575        let t = CodecTag::mp4_object_type(0x40);
576        assert_eq!(
577            reg.resolve_tag_ref(&ProbeContext::new(&t))
578                .map(|c| c.as_str()),
579            Some("aac"),
580        );
581    }
582
583    #[test]
584    fn multi_tag_claim_all_resolve() {
585        let mut reg = CodecRegistry::new();
586        reg.register(info("aac").tags([
587            CodecTag::fourcc(b"MP4A"),
588            CodecTag::wave_format(0x00FF),
589            CodecTag::mp4_object_type(0x40),
590            CodecTag::matroska("A_AAC"),
591        ]));
592        for t in [
593            CodecTag::fourcc(b"MP4A"),
594            CodecTag::wave_format(0x00FF),
595            CodecTag::mp4_object_type(0x40),
596            CodecTag::matroska("A_AAC"),
597        ] {
598            assert_eq!(
599                reg.resolve_tag_ref(&ProbeContext::new(&t))
600                    .map(|c| c.as_str()),
601                Some("aac"),
602                "tag {t:?} did not resolve",
603            );
604        }
605    }
606}