Skip to main content

oximedia_codec/
codec_probe.rs

1//! Codec identification from raw bitstream bytes.
2//!
3//! Provides magic-byte and structural probing of codec bitstreams with a
4//! confidence score for each candidate codec.  The probe intentionally avoids
5//! full parsing so that it remains fast and safe to call on untrusted data.
6//!
7//! # Supported codecs
8//!
9//! - **AV1** — OBU temporal delimiter / sequence header signature
10//! - **VP9** — IVF frame header / superframe marker heuristic
11//! - **VP8** — VP8 frame header tag detection
12//! - **H.264 / AVC** — AnnexB start codes + SPS NAL type byte
13//! - **H.265 / HEVC** — AnnexB start codes + VPS/SPS NAL type bytes
14//! - **Theora** — Ogg Theora identification header magic bytes
15//! - **Opus** — Ogg Opus identification header magic bytes
16//! - **Vorbis** — Ogg Vorbis identification header magic bytes
17//! - **FLAC** — fLaC stream marker
18//! - **PCM** — Raw PCM (always low confidence unless framed; detected by exclusion)
19//! - **PNG** — PNG signature bytes
20//! - **GIF** — GIF87a / GIF89a magic
21//! - **WebP** — RIFF/WEBP container signature
22//! - **JPEG-XL** — JXL codestream / ISOBMFF signature
23//! - **MPEG-4 AAC** — ADTS sync word
24
25use std::fmt;
26
27// ---------------------------------------------------------------------------
28// Codec identifiers
29// ---------------------------------------------------------------------------
30
31/// Identifies a codec or media format in the probe result.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
33pub enum CodecId {
34    /// AV1 video codec (Alliance for Open Media).
35    Av1,
36    /// VP9 video codec (Google).
37    Vp9,
38    /// VP8 video codec (Google / On2).
39    Vp8,
40    /// H.264 / AVC video codec.
41    H264,
42    /// H.265 / HEVC video codec.
43    H265,
44    /// Theora video codec (Xiph.Org).
45    Theora,
46    /// Opus audio codec (Xiph.Org / IETF).
47    Opus,
48    /// Vorbis audio codec (Xiph.Org).
49    Vorbis,
50    /// FLAC lossless audio codec.
51    Flac,
52    /// Raw PCM audio.
53    Pcm,
54    /// PNG image format.
55    Png,
56    /// GIF image format.
57    Gif,
58    /// WebP image format.
59    WebP,
60    /// JPEG-XL image format.
61    JpegXl,
62    /// MPEG-4 AAC audio (ADTS framing).
63    Aac,
64    /// Unknown / unidentified codec.
65    Unknown,
66}
67
68impl fmt::Display for CodecId {
69    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70        let name = match self {
71            Self::Av1 => "AV1",
72            Self::Vp9 => "VP9",
73            Self::Vp8 => "VP8",
74            Self::H264 => "H.264",
75            Self::H265 => "H.265/HEVC",
76            Self::Theora => "Theora",
77            Self::Opus => "Opus",
78            Self::Vorbis => "Vorbis",
79            Self::Flac => "FLAC",
80            Self::Pcm => "PCM",
81            Self::Png => "PNG",
82            Self::Gif => "GIF",
83            Self::WebP => "WebP",
84            Self::JpegXl => "JPEG-XL",
85            Self::Aac => "AAC (ADTS)",
86            Self::Unknown => "Unknown",
87        };
88        f.write_str(name)
89    }
90}
91
92// ---------------------------------------------------------------------------
93// Confidence scoring
94// ---------------------------------------------------------------------------
95
96/// Confidence of a probe match, expressed as a value in `[0, 100]`.
97///
98/// Higher values indicate stronger evidence for the codec.
99#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
100pub struct Confidence(u8);
101
102impl Confidence {
103    /// The minimum confidence (0 — codec is ruled out or unsupported).
104    pub const MIN: Self = Self(0);
105    /// A low-confidence heuristic match (guessed from partial data).
106    pub const LOW: Self = Self(25);
107    /// A medium-confidence match (one structural indicator confirmed).
108    pub const MEDIUM: Self = Self(50);
109    /// A high-confidence match (two or more structural indicators confirmed).
110    pub const HIGH: Self = Self(75);
111    /// A near-certain match (all applicable magic bytes / markers confirmed).
112    pub const CERTAIN: Self = Self(100);
113
114    /// Create a `Confidence` from a raw byte value, clamping to [0, 100].
115    pub fn new(raw: u8) -> Self {
116        Self(raw.min(100))
117    }
118
119    /// Return the raw confidence value.
120    pub fn value(self) -> u8 {
121        self.0
122    }
123}
124
125impl fmt::Display for Confidence {
126    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
127        write!(f, "{}%", self.0)
128    }
129}
130
131// ---------------------------------------------------------------------------
132// Probe result
133// ---------------------------------------------------------------------------
134
135/// Result of probing a single codec against a byte buffer.
136#[derive(Debug, Clone, PartialEq, Eq)]
137pub struct ProbeResult {
138    /// The codec this result describes.
139    pub codec: CodecId,
140    /// Confidence that `data` belongs to this codec.
141    pub confidence: Confidence,
142    /// Human-readable description of why this confidence was assigned.
143    pub reason: String,
144}
145
146impl ProbeResult {
147    fn new(codec: CodecId, confidence: Confidence, reason: impl Into<String>) -> Self {
148        Self {
149            codec,
150            confidence,
151            reason: reason.into(),
152        }
153    }
154}
155
156impl fmt::Display for ProbeResult {
157    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158        write!(f, "{}: {} ({})", self.codec, self.confidence, self.reason)
159    }
160}
161
162// ---------------------------------------------------------------------------
163// Internal probe helpers
164// ---------------------------------------------------------------------------
165
166/// Check whether `data` starts with `prefix`.
167fn starts_with(data: &[u8], prefix: &[u8]) -> bool {
168    data.len() >= prefix.len() && &data[..prefix.len()] == prefix
169}
170
171/// Check whether `data` contains `needle` within the first `limit` bytes.
172fn contains_in_first(data: &[u8], needle: &[u8], limit: usize) -> bool {
173    let search_end = data.len().min(limit);
174    if search_end < needle.len() {
175        return false;
176    }
177    let haystack = &data[..search_end];
178    haystack.windows(needle.len()).any(|w| w == needle)
179}
180
181// ---------------------------------------------------------------------------
182// Per-codec probe functions
183// ---------------------------------------------------------------------------
184
185fn probe_av1(data: &[u8]) -> ProbeResult {
186    // AV1 bitstream: first OBU byte has forbidden_bit=0 and type field in bits [6:3].
187    // Temporal delimiter OBU has type=2 => header byte = 0b0_0010_0_1_0 = 0x12
188    // Sequence header OBU has type=1  => header byte = 0b0_0001_0_1_0 = 0x0A
189    // We check for either at offset 0 with has_size_field=1.
190    if data.is_empty() {
191        return ProbeResult::new(CodecId::Av1, Confidence::MIN, "empty buffer");
192    }
193    let b = data[0];
194    let forbidden = (b >> 7) & 1;
195    let obu_type = (b >> 3) & 0x0F;
196    let has_size = (b >> 1) & 1;
197
198    if forbidden == 0 && (obu_type == 1 || obu_type == 2) && has_size == 1 {
199        ProbeResult::new(
200            CodecId::Av1,
201            Confidence::HIGH,
202            format!("AV1 OBU header byte 0x{b:02X} (type={obu_type})"),
203        )
204    } else if forbidden == 0 && (1..=8).contains(&obu_type) {
205        ProbeResult::new(
206            CodecId::Av1,
207            Confidence::MEDIUM,
208            format!("possible AV1 OBU type={obu_type}"),
209        )
210    } else {
211        ProbeResult::new(CodecId::Av1, Confidence::MIN, "no AV1 OBU marker")
212    }
213}
214
215fn probe_vp9(data: &[u8]) -> ProbeResult {
216    // VP9 IVF frame format is framed by the container.  In raw VP9 the first byte
217    // carries `frame_marker` (2 bits, must be 0b10), `profile_low_bit`,
218    // `profile_high_bit`, `reserved_zero` (1 bit for profile < 3).
219    // frame_marker = (data[0] >> 6) & 0x3 == 2 for a valid VP9 frame header.
220    if data.is_empty() {
221        return ProbeResult::new(CodecId::Vp9, Confidence::MIN, "empty buffer");
222    }
223    let frame_marker = (data[0] >> 6) & 0x03;
224    // VP9 superframe marker: last byte has 0b110xxxxx pattern.
225    let superframe_marker = data.last().map(|&b| (b >> 5) & 0x7).unwrap_or(0);
226
227    if frame_marker == 2 && superframe_marker == 0b110 {
228        ProbeResult::new(
229            CodecId::Vp9,
230            Confidence::HIGH,
231            "VP9 frame_marker + superframe marker",
232        )
233    } else if frame_marker == 2 {
234        ProbeResult::new(CodecId::Vp9, Confidence::MEDIUM, "VP9 frame_marker present")
235    } else {
236        ProbeResult::new(CodecId::Vp9, Confidence::MIN, "no VP9 frame_marker")
237    }
238}
239
240fn probe_vp8(data: &[u8]) -> ProbeResult {
241    // VP8 frame tag: bits [0] = frame_type (0=key, 1=inter), [2:1] = version, [3] = show_frame.
242    // Key frame: first 3 bytes of payload after the 3-byte tag should be 0x9D 0x01 0x2A.
243    if data.len() < 4 {
244        return ProbeResult::new(CodecId::Vp8, Confidence::MIN, "buffer too short");
245    }
246    let frame_type = data[0] & 0x01; // 0 = key frame
247    if frame_type == 0 {
248        // Key frame: bytes 3..6 should be the VP8 start code 0x9D 0x01 0x2A.
249        if data.len() >= 6 && data[3] == 0x9D && data[4] == 0x01 && data[5] == 0x2A {
250            return ProbeResult::new(
251                CodecId::Vp8,
252                Confidence::CERTAIN,
253                "VP8 key frame start code 9D 01 2A found",
254            );
255        }
256        return ProbeResult::new(
257            CodecId::Vp8,
258            Confidence::MEDIUM,
259            "VP8 key frame flag set but start code missing",
260        );
261    }
262    ProbeResult::new(
263        CodecId::Vp8,
264        Confidence::LOW,
265        "VP8 inter frame (cannot confirm without key frame)",
266    )
267}
268
269fn probe_h264(data: &[u8]) -> ProbeResult {
270    // AnnexB start code followed by SPS NAL type byte (0x67).
271    const START_4: [u8; 4] = [0x00, 0x00, 0x00, 0x01];
272    const START_3: [u8; 3] = [0x00, 0x00, 0x01];
273
274    let check_nal_type = |offset: usize| -> Option<u8> { data.get(offset).copied() };
275
276    if starts_with(data, &START_4) {
277        let nal_byte = check_nal_type(4).unwrap_or(0);
278        let nal_type = nal_byte & 0x1F;
279        if nal_type == 7 {
280            return ProbeResult::new(
281                CodecId::H264,
282                Confidence::CERTAIN,
283                "AnnexB + SPS NAL type 7",
284            );
285        } else if nal_type == 8 || nal_type == 5 || nal_type == 1 {
286            return ProbeResult::new(
287                CodecId::H264,
288                Confidence::HIGH,
289                format!("AnnexB start code + H.264-compatible NAL type {nal_type}"),
290            );
291        }
292        return ProbeResult::new(
293            CodecId::H264,
294            Confidence::MEDIUM,
295            "AnnexB 4-byte start code",
296        );
297    }
298    if starts_with(data, &START_3) {
299        let nal_byte = check_nal_type(3).unwrap_or(0);
300        let nal_type = nal_byte & 0x1F;
301        if nal_type == 7 {
302            return ProbeResult::new(CodecId::H264, Confidence::HIGH, "3-byte AnnexB + SPS NAL");
303        }
304        return ProbeResult::new(CodecId::H264, Confidence::LOW, "3-byte AnnexB start code");
305    }
306    // Check for AVCC-style (no start code, first 4 bytes = big-endian length).
307    if data.len() >= 5 {
308        let claimed_len = u32::from_be_bytes([data[0], data[1], data[2], data[3]]) as usize;
309        if claimed_len > 0 && claimed_len < data.len() {
310            let nal_byte = data[4];
311            let nal_type = nal_byte & 0x1F;
312            if nal_type == 7 || nal_type == 8 || nal_type == 5 {
313                return ProbeResult::new(
314                    CodecId::H264,
315                    Confidence::MEDIUM,
316                    "AVCC length-prefixed NAL with valid type",
317                );
318            }
319        }
320    }
321    ProbeResult::new(CodecId::H264, Confidence::MIN, "no H.264 signature found")
322}
323
324fn probe_h265(data: &[u8]) -> ProbeResult {
325    // HEVC NAL types: VPS=32, SPS=33, PPS=34 (nal_unit_type = (byte >> 1) & 0x3F).
326    const START_4: [u8; 4] = [0x00, 0x00, 0x00, 0x01];
327    const START_3: [u8; 3] = [0x00, 0x00, 0x01];
328
329    let check_hevc_nal = |offset: usize| -> Option<u8> {
330        if data.len() > offset + 1 {
331            Some((data[offset] >> 1) & 0x3F)
332        } else {
333            None
334        }
335    };
336
337    if starts_with(data, &START_4) {
338        if let Some(nal_type) = check_hevc_nal(4) {
339            if nal_type == 32 {
340                return ProbeResult::new(
341                    CodecId::H265,
342                    Confidence::CERTAIN,
343                    "AnnexB + HEVC VPS (type 32)",
344                );
345            } else if nal_type == 33 || nal_type == 34 {
346                return ProbeResult::new(
347                    CodecId::H265,
348                    Confidence::HIGH,
349                    format!("AnnexB + HEVC NAL type {nal_type}"),
350                );
351            }
352        }
353        return ProbeResult::new(
354            CodecId::H265,
355            Confidence::LOW,
356            "AnnexB 4-byte start code (ambiguous)",
357        );
358    }
359    if starts_with(data, &START_3) {
360        if let Some(nal_type) = check_hevc_nal(3) {
361            if nal_type == 32 || nal_type == 33 {
362                return ProbeResult::new(
363                    CodecId::H265,
364                    Confidence::HIGH,
365                    format!("3-byte AnnexB + HEVC NAL type {nal_type}"),
366                );
367            }
368        }
369    }
370    ProbeResult::new(CodecId::H265, Confidence::MIN, "no HEVC signature found")
371}
372
373fn probe_theora(data: &[u8]) -> ProbeResult {
374    // Theora identification header: 0x80 "theora"
375    const MAGIC: &[u8] = &[0x80, b't', b'h', b'e', b'o', b'r', b'a'];
376    if starts_with(data, MAGIC) {
377        ProbeResult::new(
378            CodecId::Theora,
379            Confidence::CERTAIN,
380            "Theora identification header magic",
381        )
382    } else {
383        ProbeResult::new(CodecId::Theora, Confidence::MIN, "no Theora magic")
384    }
385}
386
387fn probe_opus(data: &[u8]) -> ProbeResult {
388    // Opus identification header: "OpusHead"
389    const MAGIC: &[u8] = b"OpusHead";
390    if starts_with(data, MAGIC) {
391        ProbeResult::new(
392            CodecId::Opus,
393            Confidence::CERTAIN,
394            "OpusHead identification header",
395        )
396    } else if contains_in_first(data, MAGIC, 64) {
397        ProbeResult::new(
398            CodecId::Opus,
399            Confidence::HIGH,
400            "OpusHead found within first 64 bytes",
401        )
402    } else {
403        ProbeResult::new(CodecId::Opus, Confidence::MIN, "no Opus magic")
404    }
405}
406
407fn probe_vorbis(data: &[u8]) -> ProbeResult {
408    // Vorbis identification header: 0x01 "vorbis"
409    const MAGIC: &[u8] = &[0x01, b'v', b'o', b'r', b'b', b'i', b's'];
410    if starts_with(data, MAGIC) {
411        ProbeResult::new(
412            CodecId::Vorbis,
413            Confidence::CERTAIN,
414            "Vorbis identification header magic",
415        )
416    } else {
417        ProbeResult::new(CodecId::Vorbis, Confidence::MIN, "no Vorbis magic")
418    }
419}
420
421fn probe_flac(data: &[u8]) -> ProbeResult {
422    // FLAC stream marker: "fLaC"
423    const MAGIC: &[u8] = b"fLaC";
424    if starts_with(data, MAGIC) {
425        ProbeResult::new(
426            CodecId::Flac,
427            Confidence::CERTAIN,
428            "FLAC stream marker 'fLaC'",
429        )
430    } else {
431        ProbeResult::new(CodecId::Flac, Confidence::MIN, "no FLAC marker")
432    }
433}
434
435fn probe_png(data: &[u8]) -> ProbeResult {
436    // PNG signature: 89 50 4E 47 0D 0A 1A 0A
437    const PNG_SIG: &[u8] = &[0x89, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A];
438    if starts_with(data, PNG_SIG) {
439        ProbeResult::new(CodecId::Png, Confidence::CERTAIN, "PNG signature bytes")
440    } else {
441        ProbeResult::new(CodecId::Png, Confidence::MIN, "no PNG signature")
442    }
443}
444
445fn probe_gif(data: &[u8]) -> ProbeResult {
446    // GIF87a or GIF89a
447    if starts_with(data, b"GIF87a") || starts_with(data, b"GIF89a") {
448        ProbeResult::new(CodecId::Gif, Confidence::CERTAIN, "GIF header magic")
449    } else {
450        ProbeResult::new(CodecId::Gif, Confidence::MIN, "no GIF magic")
451    }
452}
453
454fn probe_webp(data: &[u8]) -> ProbeResult {
455    // WebP: "RIFF" at 0, "WEBP" at 8.
456    if data.len() >= 12 && &data[..4] == b"RIFF" && &data[8..12] == b"WEBP" {
457        ProbeResult::new(
458            CodecId::WebP,
459            Confidence::CERTAIN,
460            "RIFF/WEBP container signature",
461        )
462    } else if starts_with(data, b"RIFF") {
463        ProbeResult::new(
464            CodecId::WebP,
465            Confidence::LOW,
466            "RIFF container (possibly WebP)",
467        )
468    } else {
469        ProbeResult::new(CodecId::WebP, Confidence::MIN, "no WebP signature")
470    }
471}
472
473fn probe_jpegxl(data: &[u8]) -> ProbeResult {
474    // JXL codestream: FF 0A
475    // JXL ISOBMFF: 00 00 00 0C 4A 58 4C 20 ...
476    const JXL_CODESTREAM: &[u8] = &[0xFF, 0x0A];
477    const JXL_ISOBMFF: &[u8] = &[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' '];
478    if starts_with(data, JXL_ISOBMFF) {
479        ProbeResult::new(
480            CodecId::JpegXl,
481            Confidence::CERTAIN,
482            "JPEG-XL ISOBMFF signature",
483        )
484    } else if starts_with(data, JXL_CODESTREAM) {
485        ProbeResult::new(
486            CodecId::JpegXl,
487            Confidence::CERTAIN,
488            "JPEG-XL codestream marker FF 0A",
489        )
490    } else {
491        ProbeResult::new(CodecId::JpegXl, Confidence::MIN, "no JPEG-XL signature")
492    }
493}
494
495fn probe_aac(data: &[u8]) -> ProbeResult {
496    // ADTS sync word: 0xFFF (12 bits) at start of each frame.
497    if data.len() >= 2 && data[0] == 0xFF && (data[1] & 0xF0) == 0xF0 {
498        // Check layer bits: must be 00.
499        let layer = (data[1] >> 1) & 0x03;
500        if layer == 0 {
501            return ProbeResult::new(
502                CodecId::Aac,
503                Confidence::HIGH,
504                "ADTS sync word 0xFFF with layer=0",
505            );
506        }
507        return ProbeResult::new(
508            CodecId::Aac,
509            Confidence::MEDIUM,
510            "ADTS-like sync word (layer non-zero)",
511        );
512    }
513    ProbeResult::new(CodecId::Aac, Confidence::MIN, "no ADTS sync word")
514}
515
516// ---------------------------------------------------------------------------
517// Public API
518// ---------------------------------------------------------------------------
519
520/// Probe `data` against all known codecs and return all results sorted by
521/// descending confidence.
522///
523/// Only results with confidence > 0 are included unless `include_zero` is true.
524pub fn probe_all(data: &[u8], include_zero: bool) -> Vec<ProbeResult> {
525    let mut results = vec![
526        probe_av1(data),
527        probe_vp9(data),
528        probe_vp8(data),
529        probe_h264(data),
530        probe_h265(data),
531        probe_theora(data),
532        probe_opus(data),
533        probe_vorbis(data),
534        probe_flac(data),
535        probe_png(data),
536        probe_gif(data),
537        probe_webp(data),
538        probe_jpegxl(data),
539        probe_aac(data),
540    ];
541
542    if !include_zero {
543        results.retain(|r| r.confidence.value() > 0);
544    }
545
546    // Sort by descending confidence, then stable alphabetical by codec Display name.
547    results.sort_by(|a, b| b.confidence.cmp(&a.confidence).then(a.codec.cmp(&b.codec)));
548    results
549}
550
551/// Probe `data` and return the single best-matching codec along with its confidence.
552///
553/// Returns `(CodecId::Unknown, Confidence::MIN, "")` when no codec matches.
554pub fn probe_best(data: &[u8]) -> ProbeResult {
555    probe_all(data, false)
556        .into_iter()
557        .next()
558        .unwrap_or_else(|| {
559            ProbeResult::new(CodecId::Unknown, Confidence::MIN, "no codec identified")
560        })
561}
562
563/// Probe `data` against a specific codec and return the result.
564pub fn probe_codec(data: &[u8], codec: CodecId) -> ProbeResult {
565    match codec {
566        CodecId::Av1 => probe_av1(data),
567        CodecId::Vp9 => probe_vp9(data),
568        CodecId::Vp8 => probe_vp8(data),
569        CodecId::H264 => probe_h264(data),
570        CodecId::H265 => probe_h265(data),
571        CodecId::Theora => probe_theora(data),
572        CodecId::Opus => probe_opus(data),
573        CodecId::Vorbis => probe_vorbis(data),
574        CodecId::Flac => probe_flac(data),
575        CodecId::Png => probe_png(data),
576        CodecId::Gif => probe_gif(data),
577        CodecId::WebP => probe_webp(data),
578        CodecId::JpegXl => probe_jpegxl(data),
579        CodecId::Aac => probe_aac(data),
580        CodecId::Pcm | CodecId::Unknown => ProbeResult::new(
581            codec,
582            Confidence::MIN,
583            "codec not directly probeable from magic bytes",
584        ),
585    }
586}
587
588// ---------------------------------------------------------------------------
589// Tests
590// ---------------------------------------------------------------------------
591
592#[cfg(test)]
593mod tests {
594    use super::*;
595
596    #[test]
597    fn test_probe_png_signature() {
598        let data = [0x89u8, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00];
599        let result = probe_codec(&data, CodecId::Png);
600        assert_eq!(result.confidence, Confidence::CERTAIN);
601        assert_eq!(result.codec, CodecId::Png);
602    }
603
604    #[test]
605    fn test_probe_gif_header() {
606        let data = b"GIF89a\x10\x00\x10\x00";
607        let result = probe_codec(data, CodecId::Gif);
608        assert_eq!(result.confidence, Confidence::CERTAIN);
609    }
610
611    #[test]
612    fn test_probe_webp_signature() {
613        let mut data = [0u8; 16];
614        data[..4].copy_from_slice(b"RIFF");
615        data[8..12].copy_from_slice(b"WEBP");
616        let result = probe_codec(&data, CodecId::WebP);
617        assert_eq!(result.confidence, Confidence::CERTAIN);
618    }
619
620    #[test]
621    fn test_probe_flac_marker() {
622        let data = b"fLaCextra";
623        let result = probe_codec(data, CodecId::Flac);
624        assert_eq!(result.confidence, Confidence::CERTAIN);
625    }
626
627    #[test]
628    fn test_probe_h264_annexb_sps() {
629        // AnnexB 4-byte start code + SPS NAL type byte 0x67
630        let data = [0x00u8, 0x00, 0x00, 0x01, 0x67, 0x42, 0x00, 0x1E];
631        let result = probe_codec(&data, CodecId::H264);
632        assert_eq!(result.confidence, Confidence::CERTAIN);
633    }
634
635    #[test]
636    fn test_probe_h265_vps() {
637        // AnnexB 4-byte start code + HEVC VPS: first nal byte = (32 << 1) = 0x40
638        let data = [0x00u8, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0C, 0x01];
639        let result = probe_codec(&data, CodecId::H265);
640        assert_eq!(result.confidence, Confidence::CERTAIN);
641    }
642
643    #[test]
644    fn test_probe_opus_head() {
645        let data = b"OpusHead\x01\x02\x38\x01";
646        let result = probe_codec(data, CodecId::Opus);
647        assert_eq!(result.confidence, Confidence::CERTAIN);
648    }
649
650    #[test]
651    fn test_probe_vorbis_magic() {
652        let data = [0x01u8, b'v', b'o', b'r', b'b', b'i', b's', 0x00];
653        let result = probe_codec(&data, CodecId::Vorbis);
654        assert_eq!(result.confidence, Confidence::CERTAIN);
655    }
656
657    #[test]
658    fn test_probe_best_returns_highest_confidence() {
659        // PNG bytes should win over H.264 checks.
660        let data = [0x89u8, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A, 0x00];
661        let best = probe_best(&data);
662        assert_eq!(best.codec, CodecId::Png);
663        assert_eq!(best.confidence, Confidence::CERTAIN);
664    }
665
666    #[test]
667    fn test_probe_all_sorted_descending() {
668        let data = b"fLaC\x00\x00\x00\x22";
669        let results = probe_all(data, false);
670        // Results must be in descending confidence order.
671        for pair in results.windows(2) {
672            assert!(pair[0].confidence >= pair[1].confidence);
673        }
674        // FLAC must be first.
675        assert_eq!(results[0].codec, CodecId::Flac);
676    }
677
678    #[test]
679    fn test_probe_all_include_zero() {
680        let data = b"fLaC\x00";
681        let with_zero = probe_all(data, true);
682        let without_zero = probe_all(data, false);
683        // with_zero should have at least as many entries.
684        assert!(with_zero.len() >= without_zero.len());
685        // Entries with confidence=0 must exist in with_zero for unmatched codecs.
686        assert!(with_zero.iter().any(|r| r.confidence.value() == 0));
687    }
688
689    #[test]
690    fn test_probe_unknown_data_returns_unknown() {
691        // Data that matches nothing deterministically.
692        let data = [0x00u8; 8];
693        let best = probe_best(&data);
694        // Should not panic; confidence should be low or unknown.
695        // (All-zeros happens to match an AV1 OBU type=0 with forbidden=0,
696        //  but type=0 is reserved. The probe may assign LOW or MEDIUM to AV1.)
697        // We simply assert it doesn't crash.
698        assert!(best.confidence.value() <= 100);
699    }
700
701    #[test]
702    fn test_confidence_ordering() {
703        assert!(Confidence::CERTAIN > Confidence::HIGH);
704        assert!(Confidence::HIGH > Confidence::MEDIUM);
705        assert!(Confidence::MEDIUM > Confidence::LOW);
706        assert!(Confidence::LOW > Confidence::MIN);
707    }
708
709    #[test]
710    fn test_probe_jpegxl_codestream() {
711        let data = [0xFFu8, 0x0A, 0x00, 0x00];
712        let result = probe_codec(&data, CodecId::JpegXl);
713        assert_eq!(result.confidence, Confidence::CERTAIN);
714    }
715
716    #[test]
717    fn test_probe_vp8_key_frame() {
718        // VP8 key frame: frame_type=0 (bit 0 = 0), then 2 version bits + show_frame,
719        // then at offset 3: 0x9D 0x01 0x2A
720        let data = [0x00u8, 0x00, 0x00, 0x9D, 0x01, 0x2A, 0x00, 0x00];
721        let result = probe_codec(&data, CodecId::Vp8);
722        assert_eq!(result.confidence, Confidence::CERTAIN);
723    }
724}