1use std::fmt;
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
33pub enum CodecId {
34 Av1,
36 Vp9,
38 Vp8,
40 H264,
42 H265,
44 Theora,
46 Opus,
48 Vorbis,
50 Flac,
52 Pcm,
54 Png,
56 Gif,
58 WebP,
60 JpegXl,
62 Aac,
64 Unknown,
66}
67
68impl fmt::Display for CodecId {
69 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70 let name = match self {
71 Self::Av1 => "AV1",
72 Self::Vp9 => "VP9",
73 Self::Vp8 => "VP8",
74 Self::H264 => "H.264",
75 Self::H265 => "H.265/HEVC",
76 Self::Theora => "Theora",
77 Self::Opus => "Opus",
78 Self::Vorbis => "Vorbis",
79 Self::Flac => "FLAC",
80 Self::Pcm => "PCM",
81 Self::Png => "PNG",
82 Self::Gif => "GIF",
83 Self::WebP => "WebP",
84 Self::JpegXl => "JPEG-XL",
85 Self::Aac => "AAC (ADTS)",
86 Self::Unknown => "Unknown",
87 };
88 f.write_str(name)
89 }
90}
91
92#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
100pub struct Confidence(u8);
101
102impl Confidence {
103 pub const MIN: Self = Self(0);
105 pub const LOW: Self = Self(25);
107 pub const MEDIUM: Self = Self(50);
109 pub const HIGH: Self = Self(75);
111 pub const CERTAIN: Self = Self(100);
113
114 pub fn new(raw: u8) -> Self {
116 Self(raw.min(100))
117 }
118
119 pub fn value(self) -> u8 {
121 self.0
122 }
123}
124
125impl fmt::Display for Confidence {
126 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
127 write!(f, "{}%", self.0)
128 }
129}
130
131#[derive(Debug, Clone, PartialEq, Eq)]
137pub struct ProbeResult {
138 pub codec: CodecId,
140 pub confidence: Confidence,
142 pub reason: String,
144}
145
146impl ProbeResult {
147 fn new(codec: CodecId, confidence: Confidence, reason: impl Into<String>) -> Self {
148 Self {
149 codec,
150 confidence,
151 reason: reason.into(),
152 }
153 }
154}
155
156impl fmt::Display for ProbeResult {
157 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
158 write!(f, "{}: {} ({})", self.codec, self.confidence, self.reason)
159 }
160}
161
162fn starts_with(data: &[u8], prefix: &[u8]) -> bool {
168 data.len() >= prefix.len() && &data[..prefix.len()] == prefix
169}
170
171fn contains_in_first(data: &[u8], needle: &[u8], limit: usize) -> bool {
173 let search_end = data.len().min(limit);
174 if search_end < needle.len() {
175 return false;
176 }
177 let haystack = &data[..search_end];
178 haystack.windows(needle.len()).any(|w| w == needle)
179}
180
181fn probe_av1(data: &[u8]) -> ProbeResult {
186 if data.is_empty() {
191 return ProbeResult::new(CodecId::Av1, Confidence::MIN, "empty buffer");
192 }
193 let b = data[0];
194 let forbidden = (b >> 7) & 1;
195 let obu_type = (b >> 3) & 0x0F;
196 let has_size = (b >> 1) & 1;
197
198 if forbidden == 0 && (obu_type == 1 || obu_type == 2) && has_size == 1 {
199 ProbeResult::new(
200 CodecId::Av1,
201 Confidence::HIGH,
202 format!("AV1 OBU header byte 0x{b:02X} (type={obu_type})"),
203 )
204 } else if forbidden == 0 && (1..=8).contains(&obu_type) {
205 ProbeResult::new(
206 CodecId::Av1,
207 Confidence::MEDIUM,
208 format!("possible AV1 OBU type={obu_type}"),
209 )
210 } else {
211 ProbeResult::new(CodecId::Av1, Confidence::MIN, "no AV1 OBU marker")
212 }
213}
214
215fn probe_vp9(data: &[u8]) -> ProbeResult {
216 if data.is_empty() {
221 return ProbeResult::new(CodecId::Vp9, Confidence::MIN, "empty buffer");
222 }
223 let frame_marker = (data[0] >> 6) & 0x03;
224 let superframe_marker = data.last().map(|&b| (b >> 5) & 0x7).unwrap_or(0);
226
227 if frame_marker == 2 && superframe_marker == 0b110 {
228 ProbeResult::new(
229 CodecId::Vp9,
230 Confidence::HIGH,
231 "VP9 frame_marker + superframe marker",
232 )
233 } else if frame_marker == 2 {
234 ProbeResult::new(CodecId::Vp9, Confidence::MEDIUM, "VP9 frame_marker present")
235 } else {
236 ProbeResult::new(CodecId::Vp9, Confidence::MIN, "no VP9 frame_marker")
237 }
238}
239
240fn probe_vp8(data: &[u8]) -> ProbeResult {
241 if data.len() < 4 {
244 return ProbeResult::new(CodecId::Vp8, Confidence::MIN, "buffer too short");
245 }
246 let frame_type = data[0] & 0x01; if frame_type == 0 {
248 if data.len() >= 6 && data[3] == 0x9D && data[4] == 0x01 && data[5] == 0x2A {
250 return ProbeResult::new(
251 CodecId::Vp8,
252 Confidence::CERTAIN,
253 "VP8 key frame start code 9D 01 2A found",
254 );
255 }
256 return ProbeResult::new(
257 CodecId::Vp8,
258 Confidence::MEDIUM,
259 "VP8 key frame flag set but start code missing",
260 );
261 }
262 ProbeResult::new(
263 CodecId::Vp8,
264 Confidence::LOW,
265 "VP8 inter frame (cannot confirm without key frame)",
266 )
267}
268
269fn probe_h264(data: &[u8]) -> ProbeResult {
270 const START_4: [u8; 4] = [0x00, 0x00, 0x00, 0x01];
272 const START_3: [u8; 3] = [0x00, 0x00, 0x01];
273
274 let check_nal_type = |offset: usize| -> Option<u8> { data.get(offset).copied() };
275
276 if starts_with(data, &START_4) {
277 let nal_byte = check_nal_type(4).unwrap_or(0);
278 let nal_type = nal_byte & 0x1F;
279 if nal_type == 7 {
280 return ProbeResult::new(
281 CodecId::H264,
282 Confidence::CERTAIN,
283 "AnnexB + SPS NAL type 7",
284 );
285 } else if nal_type == 8 || nal_type == 5 || nal_type == 1 {
286 return ProbeResult::new(
287 CodecId::H264,
288 Confidence::HIGH,
289 format!("AnnexB start code + H.264-compatible NAL type {nal_type}"),
290 );
291 }
292 return ProbeResult::new(
293 CodecId::H264,
294 Confidence::MEDIUM,
295 "AnnexB 4-byte start code",
296 );
297 }
298 if starts_with(data, &START_3) {
299 let nal_byte = check_nal_type(3).unwrap_or(0);
300 let nal_type = nal_byte & 0x1F;
301 if nal_type == 7 {
302 return ProbeResult::new(CodecId::H264, Confidence::HIGH, "3-byte AnnexB + SPS NAL");
303 }
304 return ProbeResult::new(CodecId::H264, Confidence::LOW, "3-byte AnnexB start code");
305 }
306 if data.len() >= 5 {
308 let claimed_len = u32::from_be_bytes([data[0], data[1], data[2], data[3]]) as usize;
309 if claimed_len > 0 && claimed_len < data.len() {
310 let nal_byte = data[4];
311 let nal_type = nal_byte & 0x1F;
312 if nal_type == 7 || nal_type == 8 || nal_type == 5 {
313 return ProbeResult::new(
314 CodecId::H264,
315 Confidence::MEDIUM,
316 "AVCC length-prefixed NAL with valid type",
317 );
318 }
319 }
320 }
321 ProbeResult::new(CodecId::H264, Confidence::MIN, "no H.264 signature found")
322}
323
324fn probe_h265(data: &[u8]) -> ProbeResult {
325 const START_4: [u8; 4] = [0x00, 0x00, 0x00, 0x01];
327 const START_3: [u8; 3] = [0x00, 0x00, 0x01];
328
329 let check_hevc_nal = |offset: usize| -> Option<u8> {
330 if data.len() > offset + 1 {
331 Some((data[offset] >> 1) & 0x3F)
332 } else {
333 None
334 }
335 };
336
337 if starts_with(data, &START_4) {
338 if let Some(nal_type) = check_hevc_nal(4) {
339 if nal_type == 32 {
340 return ProbeResult::new(
341 CodecId::H265,
342 Confidence::CERTAIN,
343 "AnnexB + HEVC VPS (type 32)",
344 );
345 } else if nal_type == 33 || nal_type == 34 {
346 return ProbeResult::new(
347 CodecId::H265,
348 Confidence::HIGH,
349 format!("AnnexB + HEVC NAL type {nal_type}"),
350 );
351 }
352 }
353 return ProbeResult::new(
354 CodecId::H265,
355 Confidence::LOW,
356 "AnnexB 4-byte start code (ambiguous)",
357 );
358 }
359 if starts_with(data, &START_3) {
360 if let Some(nal_type) = check_hevc_nal(3) {
361 if nal_type == 32 || nal_type == 33 {
362 return ProbeResult::new(
363 CodecId::H265,
364 Confidence::HIGH,
365 format!("3-byte AnnexB + HEVC NAL type {nal_type}"),
366 );
367 }
368 }
369 }
370 ProbeResult::new(CodecId::H265, Confidence::MIN, "no HEVC signature found")
371}
372
373fn probe_theora(data: &[u8]) -> ProbeResult {
374 const MAGIC: &[u8] = &[0x80, b't', b'h', b'e', b'o', b'r', b'a'];
376 if starts_with(data, MAGIC) {
377 ProbeResult::new(
378 CodecId::Theora,
379 Confidence::CERTAIN,
380 "Theora identification header magic",
381 )
382 } else {
383 ProbeResult::new(CodecId::Theora, Confidence::MIN, "no Theora magic")
384 }
385}
386
387fn probe_opus(data: &[u8]) -> ProbeResult {
388 const MAGIC: &[u8] = b"OpusHead";
390 if starts_with(data, MAGIC) {
391 ProbeResult::new(
392 CodecId::Opus,
393 Confidence::CERTAIN,
394 "OpusHead identification header",
395 )
396 } else if contains_in_first(data, MAGIC, 64) {
397 ProbeResult::new(
398 CodecId::Opus,
399 Confidence::HIGH,
400 "OpusHead found within first 64 bytes",
401 )
402 } else {
403 ProbeResult::new(CodecId::Opus, Confidence::MIN, "no Opus magic")
404 }
405}
406
407fn probe_vorbis(data: &[u8]) -> ProbeResult {
408 const MAGIC: &[u8] = &[0x01, b'v', b'o', b'r', b'b', b'i', b's'];
410 if starts_with(data, MAGIC) {
411 ProbeResult::new(
412 CodecId::Vorbis,
413 Confidence::CERTAIN,
414 "Vorbis identification header magic",
415 )
416 } else {
417 ProbeResult::new(CodecId::Vorbis, Confidence::MIN, "no Vorbis magic")
418 }
419}
420
421fn probe_flac(data: &[u8]) -> ProbeResult {
422 const MAGIC: &[u8] = b"fLaC";
424 if starts_with(data, MAGIC) {
425 ProbeResult::new(
426 CodecId::Flac,
427 Confidence::CERTAIN,
428 "FLAC stream marker 'fLaC'",
429 )
430 } else {
431 ProbeResult::new(CodecId::Flac, Confidence::MIN, "no FLAC marker")
432 }
433}
434
435fn probe_png(data: &[u8]) -> ProbeResult {
436 const PNG_SIG: &[u8] = &[0x89, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A];
438 if starts_with(data, PNG_SIG) {
439 ProbeResult::new(CodecId::Png, Confidence::CERTAIN, "PNG signature bytes")
440 } else {
441 ProbeResult::new(CodecId::Png, Confidence::MIN, "no PNG signature")
442 }
443}
444
445fn probe_gif(data: &[u8]) -> ProbeResult {
446 if starts_with(data, b"GIF87a") || starts_with(data, b"GIF89a") {
448 ProbeResult::new(CodecId::Gif, Confidence::CERTAIN, "GIF header magic")
449 } else {
450 ProbeResult::new(CodecId::Gif, Confidence::MIN, "no GIF magic")
451 }
452}
453
454fn probe_webp(data: &[u8]) -> ProbeResult {
455 if data.len() >= 12 && &data[..4] == b"RIFF" && &data[8..12] == b"WEBP" {
457 ProbeResult::new(
458 CodecId::WebP,
459 Confidence::CERTAIN,
460 "RIFF/WEBP container signature",
461 )
462 } else if starts_with(data, b"RIFF") {
463 ProbeResult::new(
464 CodecId::WebP,
465 Confidence::LOW,
466 "RIFF container (possibly WebP)",
467 )
468 } else {
469 ProbeResult::new(CodecId::WebP, Confidence::MIN, "no WebP signature")
470 }
471}
472
473fn probe_jpegxl(data: &[u8]) -> ProbeResult {
474 const JXL_CODESTREAM: &[u8] = &[0xFF, 0x0A];
477 const JXL_ISOBMFF: &[u8] = &[0x00, 0x00, 0x00, 0x0C, b'J', b'X', b'L', b' '];
478 if starts_with(data, JXL_ISOBMFF) {
479 ProbeResult::new(
480 CodecId::JpegXl,
481 Confidence::CERTAIN,
482 "JPEG-XL ISOBMFF signature",
483 )
484 } else if starts_with(data, JXL_CODESTREAM) {
485 ProbeResult::new(
486 CodecId::JpegXl,
487 Confidence::CERTAIN,
488 "JPEG-XL codestream marker FF 0A",
489 )
490 } else {
491 ProbeResult::new(CodecId::JpegXl, Confidence::MIN, "no JPEG-XL signature")
492 }
493}
494
495fn probe_aac(data: &[u8]) -> ProbeResult {
496 if data.len() >= 2 && data[0] == 0xFF && (data[1] & 0xF0) == 0xF0 {
498 let layer = (data[1] >> 1) & 0x03;
500 if layer == 0 {
501 return ProbeResult::new(
502 CodecId::Aac,
503 Confidence::HIGH,
504 "ADTS sync word 0xFFF with layer=0",
505 );
506 }
507 return ProbeResult::new(
508 CodecId::Aac,
509 Confidence::MEDIUM,
510 "ADTS-like sync word (layer non-zero)",
511 );
512 }
513 ProbeResult::new(CodecId::Aac, Confidence::MIN, "no ADTS sync word")
514}
515
516pub fn probe_all(data: &[u8], include_zero: bool) -> Vec<ProbeResult> {
525 let mut results = vec![
526 probe_av1(data),
527 probe_vp9(data),
528 probe_vp8(data),
529 probe_h264(data),
530 probe_h265(data),
531 probe_theora(data),
532 probe_opus(data),
533 probe_vorbis(data),
534 probe_flac(data),
535 probe_png(data),
536 probe_gif(data),
537 probe_webp(data),
538 probe_jpegxl(data),
539 probe_aac(data),
540 ];
541
542 if !include_zero {
543 results.retain(|r| r.confidence.value() > 0);
544 }
545
546 results.sort_by(|a, b| b.confidence.cmp(&a.confidence).then(a.codec.cmp(&b.codec)));
548 results
549}
550
551pub fn probe_best(data: &[u8]) -> ProbeResult {
555 probe_all(data, false)
556 .into_iter()
557 .next()
558 .unwrap_or_else(|| {
559 ProbeResult::new(CodecId::Unknown, Confidence::MIN, "no codec identified")
560 })
561}
562
563pub fn probe_codec(data: &[u8], codec: CodecId) -> ProbeResult {
565 match codec {
566 CodecId::Av1 => probe_av1(data),
567 CodecId::Vp9 => probe_vp9(data),
568 CodecId::Vp8 => probe_vp8(data),
569 CodecId::H264 => probe_h264(data),
570 CodecId::H265 => probe_h265(data),
571 CodecId::Theora => probe_theora(data),
572 CodecId::Opus => probe_opus(data),
573 CodecId::Vorbis => probe_vorbis(data),
574 CodecId::Flac => probe_flac(data),
575 CodecId::Png => probe_png(data),
576 CodecId::Gif => probe_gif(data),
577 CodecId::WebP => probe_webp(data),
578 CodecId::JpegXl => probe_jpegxl(data),
579 CodecId::Aac => probe_aac(data),
580 CodecId::Pcm | CodecId::Unknown => ProbeResult::new(
581 codec,
582 Confidence::MIN,
583 "codec not directly probeable from magic bytes",
584 ),
585 }
586}
587
588#[cfg(test)]
593mod tests {
594 use super::*;
595
596 #[test]
597 fn test_probe_png_signature() {
598 let data = [0x89u8, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00];
599 let result = probe_codec(&data, CodecId::Png);
600 assert_eq!(result.confidence, Confidence::CERTAIN);
601 assert_eq!(result.codec, CodecId::Png);
602 }
603
604 #[test]
605 fn test_probe_gif_header() {
606 let data = b"GIF89a\x10\x00\x10\x00";
607 let result = probe_codec(data, CodecId::Gif);
608 assert_eq!(result.confidence, Confidence::CERTAIN);
609 }
610
611 #[test]
612 fn test_probe_webp_signature() {
613 let mut data = [0u8; 16];
614 data[..4].copy_from_slice(b"RIFF");
615 data[8..12].copy_from_slice(b"WEBP");
616 let result = probe_codec(&data, CodecId::WebP);
617 assert_eq!(result.confidence, Confidence::CERTAIN);
618 }
619
620 #[test]
621 fn test_probe_flac_marker() {
622 let data = b"fLaCextra";
623 let result = probe_codec(data, CodecId::Flac);
624 assert_eq!(result.confidence, Confidence::CERTAIN);
625 }
626
627 #[test]
628 fn test_probe_h264_annexb_sps() {
629 let data = [0x00u8, 0x00, 0x00, 0x01, 0x67, 0x42, 0x00, 0x1E];
631 let result = probe_codec(&data, CodecId::H264);
632 assert_eq!(result.confidence, Confidence::CERTAIN);
633 }
634
635 #[test]
636 fn test_probe_h265_vps() {
637 let data = [0x00u8, 0x00, 0x00, 0x01, 0x40, 0x01, 0x0C, 0x01];
639 let result = probe_codec(&data, CodecId::H265);
640 assert_eq!(result.confidence, Confidence::CERTAIN);
641 }
642
643 #[test]
644 fn test_probe_opus_head() {
645 let data = b"OpusHead\x01\x02\x38\x01";
646 let result = probe_codec(data, CodecId::Opus);
647 assert_eq!(result.confidence, Confidence::CERTAIN);
648 }
649
650 #[test]
651 fn test_probe_vorbis_magic() {
652 let data = [0x01u8, b'v', b'o', b'r', b'b', b'i', b's', 0x00];
653 let result = probe_codec(&data, CodecId::Vorbis);
654 assert_eq!(result.confidence, Confidence::CERTAIN);
655 }
656
657 #[test]
658 fn test_probe_best_returns_highest_confidence() {
659 let data = [0x89u8, b'P', b'N', b'G', 0x0D, 0x0A, 0x1A, 0x0A, 0x00];
661 let best = probe_best(&data);
662 assert_eq!(best.codec, CodecId::Png);
663 assert_eq!(best.confidence, Confidence::CERTAIN);
664 }
665
666 #[test]
667 fn test_probe_all_sorted_descending() {
668 let data = b"fLaC\x00\x00\x00\x22";
669 let results = probe_all(data, false);
670 for pair in results.windows(2) {
672 assert!(pair[0].confidence >= pair[1].confidence);
673 }
674 assert_eq!(results[0].codec, CodecId::Flac);
676 }
677
678 #[test]
679 fn test_probe_all_include_zero() {
680 let data = b"fLaC\x00";
681 let with_zero = probe_all(data, true);
682 let without_zero = probe_all(data, false);
683 assert!(with_zero.len() >= without_zero.len());
685 assert!(with_zero.iter().any(|r| r.confidence.value() == 0));
687 }
688
689 #[test]
690 fn test_probe_unknown_data_returns_unknown() {
691 let data = [0x00u8; 8];
693 let best = probe_best(&data);
694 assert!(best.confidence.value() <= 100);
699 }
700
701 #[test]
702 fn test_confidence_ordering() {
703 assert!(Confidence::CERTAIN > Confidence::HIGH);
704 assert!(Confidence::HIGH > Confidence::MEDIUM);
705 assert!(Confidence::MEDIUM > Confidence::LOW);
706 assert!(Confidence::LOW > Confidence::MIN);
707 }
708
709 #[test]
710 fn test_probe_jpegxl_codestream() {
711 let data = [0xFFu8, 0x0A, 0x00, 0x00];
712 let result = probe_codec(&data, CodecId::JpegXl);
713 assert_eq!(result.confidence, Confidence::CERTAIN);
714 }
715
716 #[test]
717 fn test_probe_vp8_key_frame() {
718 let data = [0x00u8, 0x00, 0x00, 0x9D, 0x01, 0x2A, 0x00, 0x00];
721 let result = probe_codec(&data, CodecId::Vp8);
722 assert_eq!(result.confidence, Confidence::CERTAIN);
723 }
724}