1#![allow(dead_code)]
25#![allow(clippy::cast_precision_loss)]
26
27use std::collections::HashMap;
28
29#[derive(Debug, Clone)]
35pub struct FormatInfo {
36 pub path: String,
38 pub container: String,
40 pub video_codec: Option<String>,
42 pub audio_codec: Option<String>,
44 pub duration_secs: Option<f64>,
46 pub width: Option<u32>,
48 pub height: Option<u32>,
50 pub sample_rate: Option<u32>,
52 pub audio_channels: Option<u32>,
54 pub phash: Option<u64>,
56 pub audio_fingerprint: Option<Vec<u8>>,
58}
59
60impl FormatInfo {
61 #[must_use]
63 pub fn new(path: impl Into<String>, container: impl Into<String>) -> Self {
64 Self {
65 path: path.into(),
66 container: container.into(),
67 video_codec: None,
68 audio_codec: None,
69 duration_secs: None,
70 width: None,
71 height: None,
72 sample_rate: None,
73 audio_channels: None,
74 phash: None,
75 audio_fingerprint: None,
76 }
77 }
78
79 #[must_use]
81 pub fn with_duration(mut self, secs: f64) -> Self {
82 self.duration_secs = Some(secs);
83 self
84 }
85
86 #[must_use]
88 pub fn with_resolution(mut self, w: u32, h: u32) -> Self {
89 self.width = Some(w);
90 self.height = Some(h);
91 self
92 }
93
94 #[must_use]
96 pub fn with_codecs(mut self, video: Option<String>, audio: Option<String>) -> Self {
97 self.video_codec = video;
98 self.audio_codec = audio;
99 self
100 }
101
102 #[must_use]
104 pub fn with_phash(mut self, hash: u64) -> Self {
105 self.phash = Some(hash);
106 self
107 }
108
109 #[must_use]
111 pub fn with_audio_fingerprint(mut self, fp: Vec<u8>) -> Self {
112 self.audio_fingerprint = Some(fp);
113 self
114 }
115
116 #[must_use]
118 pub fn with_audio_info(mut self, sample_rate: u32, channels: u32) -> Self {
119 self.sample_rate = Some(sample_rate);
120 self.audio_channels = Some(channels);
121 self
122 }
123
124 #[must_use]
126 pub fn is_different_format(&self, other: &Self) -> bool {
127 self.container.to_lowercase() != other.container.to_lowercase()
128 }
129}
130
131#[derive(Debug, Clone)]
137pub struct CrossFormatConfig {
138 pub max_duration_diff_secs: f64,
140 pub max_phash_distance: u32,
142 pub min_audio_similarity: f64,
144 pub confidence_threshold: f64,
146 pub weight_duration: f64,
148 pub weight_resolution: f64,
150 pub weight_phash: f64,
152 pub weight_audio: f64,
154}
155
156impl Default for CrossFormatConfig {
157 fn default() -> Self {
158 Self {
159 max_duration_diff_secs: 0.5,
160 max_phash_distance: 8,
161 min_audio_similarity: 0.80,
162 confidence_threshold: 0.75,
163 weight_duration: 0.25,
164 weight_resolution: 0.15,
165 weight_phash: 0.35,
166 weight_audio: 0.25,
167 }
168 }
169}
170
171impl CrossFormatConfig {
172 #[must_use]
174 pub fn normalised_weights(&self) -> (f64, f64, f64, f64) {
175 let total =
176 self.weight_duration + self.weight_resolution + self.weight_phash + self.weight_audio;
177 if total < f64::EPSILON {
178 return (0.25, 0.25, 0.25, 0.25);
179 }
180 (
181 self.weight_duration / total,
182 self.weight_resolution / total,
183 self.weight_phash / total,
184 self.weight_audio / total,
185 )
186 }
187}
188
189#[derive(Debug, Clone)]
195pub struct CrossFormatMatch {
196 pub path_a: String,
198 pub path_b: String,
200 pub container_a: String,
202 pub container_b: String,
204 pub confidence: f64,
206 pub signal_scores: SignalScores,
208}
209
210#[derive(Debug, Clone)]
212pub struct SignalScores {
213 pub duration: Option<f64>,
215 pub resolution: Option<f64>,
217 pub phash: Option<f64>,
219 pub audio: Option<f64>,
221}
222
223#[derive(Debug, Clone)]
229pub struct CrossFormatGroup {
230 pub files: Vec<String>,
232 pub containers: Vec<String>,
234 pub best_confidence: f64,
236}
237
238fn duration_similarity(a: Option<f64>, b: Option<f64>, max_diff: f64) -> Option<f64> {
246 match (a, b) {
247 (Some(da), Some(db)) => {
248 let diff = (da - db).abs();
249 if max_diff < f64::EPSILON {
250 return Some(if diff < f64::EPSILON { 1.0 } else { 0.0 });
251 }
252 Some((1.0 - diff / max_diff).max(0.0))
253 }
254 _ => None,
255 }
256}
257
258fn resolution_similarity(
262 w_a: Option<u32>,
263 h_a: Option<u32>,
264 w_b: Option<u32>,
265 h_b: Option<u32>,
266) -> Option<f64> {
267 match (w_a, h_a, w_b, h_b) {
268 (Some(wa), Some(ha), Some(wb), Some(hb)) => {
269 let w_ratio = wa.min(wb) as f64 / wa.max(wb).max(1) as f64;
271 let h_ratio = ha.min(hb) as f64 / ha.max(hb).max(1) as f64;
272
273 let score = if wa == wb && ha == hb {
274 1.0
275 } else if w_ratio > 0.99 && h_ratio > 0.99 {
276 0.95
278 } else if w_ratio > 0.95 && h_ratio > 0.95 {
279 0.85
281 } else if (wa == wb) || (ha == hb) {
282 0.5
284 } else {
285 0.0
286 };
287
288 Some(score)
289 }
290 _ => None,
291 }
292}
293
294fn phash_similarity(a: Option<u64>, b: Option<u64>, max_distance: u32) -> Option<f64> {
296 match (a, b) {
297 (Some(ha), Some(hb)) => {
298 let dist = (ha ^ hb).count_ones();
299 if dist > max_distance {
300 Some(0.0)
301 } else {
302 Some(1.0 - dist as f64 / 64.0)
303 }
304 }
305 _ => None,
306 }
307}
308
309fn audio_fingerprint_similarity(a: &Option<Vec<u8>>, b: &Option<Vec<u8>>) -> Option<f64> {
311 match (a.as_ref(), b.as_ref()) {
312 (Some(fa), Some(fb)) => {
313 if fa.is_empty() || fb.is_empty() {
314 return Some(0.0);
315 }
316 let len = fa.len().min(fb.len());
317 let total_bits = len * 8;
318 if total_bits == 0 {
319 return Some(0.0);
320 }
321 let differing_bits: u32 = fa
322 .iter()
323 .zip(fb.iter())
324 .take(len)
325 .map(|(a, b)| (a ^ b).count_ones())
326 .sum();
327 Some(1.0 - differing_bits as f64 / total_bits as f64)
328 }
329 _ => None,
330 }
331}
332
333#[derive(Debug)]
339pub struct CrossFormatDetector {
340 config: CrossFormatConfig,
341 items: Vec<FormatInfo>,
342}
343
344impl CrossFormatDetector {
345 #[must_use]
347 pub fn new(config: CrossFormatConfig) -> Self {
348 Self {
349 config,
350 items: Vec::new(),
351 }
352 }
353
354 #[must_use]
356 pub fn with_defaults() -> Self {
357 Self::new(CrossFormatConfig::default())
358 }
359
360 pub fn add(&mut self, info: FormatInfo) {
362 self.items.push(info);
363 }
364
365 pub fn add_batch(&mut self, infos: impl IntoIterator<Item = FormatInfo>) {
367 self.items.extend(infos);
368 }
369
370 #[must_use]
372 pub fn item_count(&self) -> usize {
373 self.items.len()
374 }
375
376 fn compare_pair(&self, a: &FormatInfo, b: &FormatInfo) -> Option<CrossFormatMatch> {
378 if !a.is_different_format(b) {
380 return None;
381 }
382
383 if let (Some(da), Some(db)) = (a.duration_secs, b.duration_secs) {
385 if (da - db).abs() > self.config.max_duration_diff_secs * 2.0 {
386 return None;
387 }
388 }
389
390 let dur_sim = duration_similarity(
391 a.duration_secs,
392 b.duration_secs,
393 self.config.max_duration_diff_secs,
394 );
395 let res_sim = resolution_similarity(a.width, a.height, b.width, b.height);
396 let phash_sim = phash_similarity(a.phash, b.phash, self.config.max_phash_distance);
397 let audio_sim = audio_fingerprint_similarity(&a.audio_fingerprint, &b.audio_fingerprint);
398
399 let signal_scores = SignalScores {
400 duration: dur_sim,
401 resolution: res_sim,
402 phash: phash_sim,
403 audio: audio_sim,
404 };
405
406 let (wd, wr, wp, wa) = self.config.normalised_weights();
408 let mut weighted_sum = 0.0;
409 let mut weight_sum = 0.0;
410
411 if let Some(s) = dur_sim {
412 weighted_sum += s * wd;
413 weight_sum += wd;
414 }
415 if let Some(s) = res_sim {
416 weighted_sum += s * wr;
417 weight_sum += wr;
418 }
419 if let Some(s) = phash_sim {
420 weighted_sum += s * wp;
421 weight_sum += wp;
422 }
423 if let Some(s) = audio_sim {
424 weighted_sum += s * wa;
425 weight_sum += wa;
426 }
427
428 if weight_sum < f64::EPSILON {
429 return None;
430 }
431
432 let confidence = weighted_sum / weight_sum;
433
434 if confidence >= self.config.confidence_threshold {
435 Some(CrossFormatMatch {
436 path_a: a.path.clone(),
437 path_b: b.path.clone(),
438 container_a: a.container.clone(),
439 container_b: b.container.clone(),
440 confidence,
441 signal_scores,
442 })
443 } else {
444 None
445 }
446 }
447
448 #[must_use]
450 pub fn find_matches(&self) -> Vec<CrossFormatMatch> {
451 let mut matches = Vec::new();
452 let mut seen_pairs = std::collections::HashSet::new();
453
454 let buckets = self.bucket_by_duration();
456
457 for bucket in buckets.values() {
458 if bucket.len() < 2 {
459 continue;
460 }
461 for i in 0..bucket.len() {
462 for j in (i + 1)..bucket.len() {
463 let (lo, hi) = if bucket[i] < bucket[j] {
464 (bucket[i], bucket[j])
465 } else {
466 (bucket[j], bucket[i])
467 };
468 if !seen_pairs.insert((lo, hi)) {
469 continue; }
471 if let Some(m) = self.compare_pair(&self.items[lo], &self.items[hi]) {
472 matches.push(m);
473 }
474 }
475 }
476 }
477
478 matches.sort_by(|a, b| {
480 b.confidence
481 .partial_cmp(&a.confidence)
482 .unwrap_or(std::cmp::Ordering::Equal)
483 });
484 matches
485 }
486
487 #[must_use]
489 pub fn find_groups(&self) -> Vec<CrossFormatGroup> {
490 let matches = self.find_matches();
491 if matches.is_empty() {
492 return Vec::new();
493 }
494
495 let mut path_to_idx: HashMap<&str, usize> = HashMap::new();
497 for (i, item) in self.items.iter().enumerate() {
498 path_to_idx.insert(&item.path, i);
499 }
500
501 let n = self.items.len();
503 let mut parent: Vec<usize> = (0..n).collect();
504
505 let find = |parent: &mut Vec<usize>, mut x: usize| -> usize {
506 while parent[x] != x {
507 parent[x] = parent[parent[x]]; x = parent[x];
509 }
510 x
511 };
512
513 let mut best_confidence: Vec<f64> = vec![0.0; n];
514
515 for m in &matches {
516 if let (Some(&ia), Some(&ib)) = (
517 path_to_idx.get(m.path_a.as_str()),
518 path_to_idx.get(m.path_b.as_str()),
519 ) {
520 let ra = find(&mut parent, ia);
521 let rb = find(&mut parent, ib);
522 if ra != rb {
523 parent[ra] = rb;
524 }
525 best_confidence[ia] = best_confidence[ia].max(m.confidence);
526 best_confidence[ib] = best_confidence[ib].max(m.confidence);
527 }
528 }
529
530 let mut groups_map: HashMap<usize, Vec<usize>> = HashMap::new();
532 for i in 0..n {
533 let root = find(&mut parent, i);
534 groups_map.entry(root).or_default().push(i);
535 }
536
537 groups_map
538 .into_values()
539 .filter(|g| g.len() > 1)
540 .filter(|g| {
541 let containers: std::collections::HashSet<&str> = g
543 .iter()
544 .map(|&i| self.items[i].container.as_str())
545 .collect();
546 containers.len() > 1
547 })
548 .map(|g| {
549 let mut containers: Vec<String> =
550 g.iter().map(|&i| self.items[i].container.clone()).collect();
551 containers.sort();
552 containers.dedup();
553
554 let bc = g.iter().map(|&i| best_confidence[i]).fold(0.0f64, f64::max);
555
556 CrossFormatGroup {
557 files: g.iter().map(|&i| self.items[i].path.clone()).collect(),
558 containers,
559 best_confidence: bc,
560 }
561 })
562 .collect()
563 }
564
565 fn bucket_by_duration(&self) -> HashMap<i64, Vec<usize>> {
567 let mut buckets: HashMap<i64, Vec<usize>> = HashMap::new();
568 let bucket_width = self.config.max_duration_diff_secs.max(0.5);
569
570 for (idx, item) in self.items.iter().enumerate() {
571 match item.duration_secs {
572 Some(d) => {
573 let primary = (d / bucket_width) as i64;
576 for offset in -1..=1 {
577 buckets.entry(primary + offset).or_default().push(idx);
578 }
579 }
580 None => {
581 buckets.entry(i64::MIN).or_default().push(idx);
583 }
584 }
585 }
586
587 for bucket in buckets.values_mut() {
589 bucket.sort_unstable();
590 bucket.dedup();
591 }
592
593 buckets
594 }
595}
596
597#[cfg(test)]
602mod tests {
603 use super::*;
604
605 #[test]
606 fn test_format_info_creation() {
607 let info = FormatInfo::new("video.mp4", "mp4")
608 .with_duration(120.5)
609 .with_resolution(1920, 1080);
610 assert_eq!(info.path, "video.mp4");
611 assert_eq!(info.container, "mp4");
612 assert_eq!(info.duration_secs, Some(120.5));
613 assert_eq!(info.width, Some(1920));
614 assert_eq!(info.height, Some(1080));
615 }
616
617 #[test]
618 fn test_is_different_format() {
619 let a = FormatInfo::new("a.mp4", "mp4");
620 let b = FormatInfo::new("b.mkv", "mkv");
621 let c = FormatInfo::new("c.mp4", "MP4");
622
623 assert!(a.is_different_format(&b));
624 assert!(!a.is_different_format(&c)); }
626
627 #[test]
628 fn test_duration_similarity_identical() {
629 let sim = duration_similarity(Some(120.0), Some(120.0), 0.5);
630 assert_eq!(sim, Some(1.0));
631 }
632
633 #[test]
634 fn test_duration_similarity_close() {
635 let sim = duration_similarity(Some(120.0), Some(120.3), 0.5);
636 let s = sim.expect("should be Some");
637 assert!(s > 0.3 && s < 1.0, "sim = {s}");
638 }
639
640 #[test]
641 fn test_duration_similarity_too_far() {
642 let sim = duration_similarity(Some(120.0), Some(121.0), 0.5);
643 let s = sim.expect("should be Some");
644 assert_eq!(s, 0.0);
645 }
646
647 #[test]
648 fn test_duration_similarity_missing() {
649 assert!(duration_similarity(None, Some(120.0), 0.5).is_none());
650 assert!(duration_similarity(Some(120.0), None, 0.5).is_none());
651 }
652
653 #[test]
654 fn test_resolution_similarity_exact() {
655 let sim = resolution_similarity(Some(1920), Some(1080), Some(1920), Some(1080));
656 assert_eq!(sim, Some(1.0));
657 }
658
659 #[test]
660 fn test_resolution_similarity_different() {
661 let sim = resolution_similarity(Some(1920), Some(1080), Some(1280), Some(720));
662 let s = sim.expect("should be Some");
663 assert_eq!(s, 0.0);
664 }
665
666 #[test]
667 fn test_resolution_similarity_partial() {
668 let sim = resolution_similarity(Some(1920), Some(1080), Some(1920), Some(720));
669 let s = sim.expect("should be Some");
670 assert_eq!(s, 0.5);
671 }
672
673 #[test]
674 fn test_resolution_similarity_missing() {
675 assert!(resolution_similarity(None, Some(1080), Some(1920), Some(1080)).is_none());
676 }
677
678 #[test]
679 fn test_phash_similarity_identical() {
680 let sim = phash_similarity(Some(0xDEADBEEF), Some(0xDEADBEEF), 8);
681 assert_eq!(sim, Some(1.0));
682 }
683
684 #[test]
685 fn test_phash_similarity_close() {
686 let a = 0xFFFF_FFFF_FFFF_FFFFu64;
687 let b = a ^ 0b1111; let sim = phash_similarity(Some(a), Some(b), 8);
689 let s = sim.expect("should be Some");
690 assert!(s > 0.9, "sim = {s}");
691 }
692
693 #[test]
694 fn test_phash_similarity_too_far() {
695 let sim = phash_similarity(Some(0x0), Some(0xFFFF_FFFF_FFFF_FFFF), 8);
696 let s = sim.expect("should be Some");
697 assert_eq!(s, 0.0);
698 }
699
700 #[test]
701 fn test_audio_fingerprint_similarity_identical() {
702 let fp = vec![0xAB, 0xCD, 0xEF, 0x01];
703 let sim = audio_fingerprint_similarity(&Some(fp.clone()), &Some(fp));
704 assert_eq!(sim, Some(1.0));
705 }
706
707 #[test]
708 fn test_audio_fingerprint_similarity_different() {
709 let a = vec![0xFF, 0xFF, 0xFF, 0xFF];
710 let b = vec![0x00, 0x00, 0x00, 0x00];
711 let sim = audio_fingerprint_similarity(&Some(a), &Some(b));
712 assert_eq!(sim, Some(0.0));
713 }
714
715 #[test]
716 fn test_audio_fingerprint_similarity_missing() {
717 let fp = vec![0xAB];
718 assert!(audio_fingerprint_similarity(&None, &Some(fp)).is_none());
719 }
720
721 #[test]
722 fn test_cross_format_config_normalised_weights() {
723 let config = CrossFormatConfig::default();
724 let (wd, wr, wp, wa) = config.normalised_weights();
725 let total = wd + wr + wp + wa;
726 assert!((total - 1.0).abs() < 1e-10);
727 }
728
729 #[test]
730 fn test_detector_identical_content_different_format() {
731 let mut detector = CrossFormatDetector::with_defaults();
732
733 let hash = 0xDEAD_BEEF_CAFE_BABEu64;
734 let fp = vec![0xAB, 0xCD, 0xEF, 0x01, 0x23, 0x45, 0x67, 0x89];
735
736 detector.add(
737 FormatInfo::new("video.mp4", "mp4")
738 .with_duration(120.0)
739 .with_resolution(1920, 1080)
740 .with_phash(hash)
741 .with_audio_fingerprint(fp.clone()),
742 );
743 detector.add(
744 FormatInfo::new("video.mkv", "mkv")
745 .with_duration(120.0)
746 .with_resolution(1920, 1080)
747 .with_phash(hash)
748 .with_audio_fingerprint(fp),
749 );
750
751 let matches = detector.find_matches();
752 assert_eq!(matches.len(), 1);
753 assert!(matches[0].confidence > 0.99);
754 }
755
756 #[test]
757 fn test_detector_same_format_not_matched() {
758 let mut detector = CrossFormatDetector::with_defaults();
759
760 let hash = 0xDEAD_BEEF_CAFE_BABEu64;
761 detector.add(
762 FormatInfo::new("a.mp4", "mp4")
763 .with_duration(120.0)
764 .with_phash(hash),
765 );
766 detector.add(
767 FormatInfo::new("b.mp4", "mp4")
768 .with_duration(120.0)
769 .with_phash(hash),
770 );
771
772 let matches = detector.find_matches();
773 assert!(matches.is_empty(), "same format should not be matched");
774 }
775
776 #[test]
777 fn test_detector_duration_too_different() {
778 let mut detector = CrossFormatDetector::with_defaults();
779
780 detector.add(
781 FormatInfo::new("short.mp4", "mp4")
782 .with_duration(60.0)
783 .with_resolution(1920, 1080),
784 );
785 detector.add(
786 FormatInfo::new("long.mkv", "mkv")
787 .with_duration(120.0)
788 .with_resolution(1920, 1080),
789 );
790
791 let matches = detector.find_matches();
792 assert!(
793 matches.is_empty(),
794 "very different durations should not match"
795 );
796 }
797
798 #[test]
799 fn test_detector_find_groups() {
800 let mut detector = CrossFormatDetector::with_defaults();
801
802 let hash = 0xAAAA_BBBB_CCCC_DDDDu64;
803 let fp = vec![0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88];
804
805 for (path, container) in &[
806 ("video.mp4", "mp4"),
807 ("video.mkv", "mkv"),
808 ("video.webm", "webm"),
809 ] {
810 detector.add(
811 FormatInfo::new(*path, *container)
812 .with_duration(90.0)
813 .with_resolution(1280, 720)
814 .with_phash(hash)
815 .with_audio_fingerprint(fp.clone()),
816 );
817 }
818
819 let groups = detector.find_groups();
820 assert_eq!(groups.len(), 1);
821 assert_eq!(groups[0].files.len(), 3);
822 assert!(groups[0].containers.len() >= 2);
823 assert!(groups[0].best_confidence > 0.9);
824 }
825
826 #[test]
827 fn test_detector_two_separate_groups() {
828 let mut detector = CrossFormatDetector::with_defaults();
829
830 detector.add(
832 FormatInfo::new("a.mp4", "mp4")
833 .with_duration(60.0)
834 .with_resolution(1920, 1080)
835 .with_phash(0x1111_1111_1111_1111),
836 );
837 detector.add(
838 FormatInfo::new("a.mkv", "mkv")
839 .with_duration(60.0)
840 .with_resolution(1920, 1080)
841 .with_phash(0x1111_1111_1111_1111),
842 );
843
844 detector.add(
846 FormatInfo::new("b.mp4", "mp4")
847 .with_duration(300.0)
848 .with_resolution(1280, 720)
849 .with_phash(0xFFFF_FFFF_FFFF_FFFF),
850 );
851 detector.add(
852 FormatInfo::new("b.webm", "webm")
853 .with_duration(300.0)
854 .with_resolution(1280, 720)
855 .with_phash(0xFFFF_FFFF_FFFF_FFFF),
856 );
857
858 let groups = detector.find_groups();
859 assert_eq!(groups.len(), 2);
860 }
861
862 #[test]
863 fn test_detector_empty_pool() {
864 let detector = CrossFormatDetector::with_defaults();
865 assert!(detector.find_matches().is_empty());
866 assert!(detector.find_groups().is_empty());
867 }
868
869 #[test]
870 fn test_detector_single_item() {
871 let mut detector = CrossFormatDetector::with_defaults();
872 detector.add(FormatInfo::new("only.mp4", "mp4").with_duration(60.0));
873 assert!(detector.find_matches().is_empty());
874 }
875
876 #[test]
877 fn test_detector_partial_signals() {
878 let mut detector = CrossFormatDetector::new(CrossFormatConfig {
880 confidence_threshold: 0.5, ..CrossFormatConfig::default()
882 });
883
884 detector.add(
885 FormatInfo::new("video.mp4", "mp4")
886 .with_duration(120.0)
887 .with_resolution(1920, 1080),
888 );
889 detector.add(
890 FormatInfo::new("video.mkv", "mkv")
891 .with_duration(120.0)
892 .with_resolution(1920, 1080),
893 );
894
895 let matches = detector.find_matches();
896 assert_eq!(matches.len(), 1);
897 assert!(matches[0].confidence >= 0.5);
899 }
900
901 #[test]
902 fn test_detector_audio_only_content() {
903 let mut detector = CrossFormatDetector::with_defaults();
904
905 let fp = vec![0xAA; 32];
906 detector.add(
907 FormatInfo::new("song.flac", "flac")
908 .with_duration(180.0)
909 .with_audio_fingerprint(fp.clone())
910 .with_audio_info(44100, 2),
911 );
912 detector.add(
913 FormatInfo::new("song.ogg", "ogg")
914 .with_duration(180.0)
915 .with_audio_fingerprint(fp)
916 .with_audio_info(44100, 2),
917 );
918
919 let matches = detector.find_matches();
920 assert_eq!(matches.len(), 1);
921 assert!(matches[0].confidence > 0.7);
922 }
923
924 #[test]
925 fn test_signal_scores_populated() {
926 let mut detector = CrossFormatDetector::with_defaults();
927
928 let hash = 0xDEAD_BEEF_CAFE_BABEu64;
929 detector.add(
930 FormatInfo::new("a.mp4", "mp4")
931 .with_duration(100.0)
932 .with_resolution(1920, 1080)
933 .with_phash(hash),
934 );
935 detector.add(
936 FormatInfo::new("a.mkv", "mkv")
937 .with_duration(100.0)
938 .with_resolution(1920, 1080)
939 .with_phash(hash),
940 );
941
942 let matches = detector.find_matches();
943 assert_eq!(matches.len(), 1);
944
945 let scores = &matches[0].signal_scores;
946 assert_eq!(scores.duration, Some(1.0));
947 assert_eq!(scores.resolution, Some(1.0));
948 assert_eq!(scores.phash, Some(1.0));
949 assert!(scores.audio.is_none()); }
951
952 #[test]
953 fn test_item_count() {
954 let mut detector = CrossFormatDetector::with_defaults();
955 assert_eq!(detector.item_count(), 0);
956 detector.add(FormatInfo::new("a.mp4", "mp4"));
957 detector.add(FormatInfo::new("b.mkv", "mkv"));
958 assert_eq!(detector.item_count(), 2);
959 }
960
961 #[test]
962 fn test_add_batch() {
963 let mut detector = CrossFormatDetector::with_defaults();
964 detector.add_batch(vec![
965 FormatInfo::new("a.mp4", "mp4"),
966 FormatInfo::new("b.mkv", "mkv"),
967 FormatInfo::new("c.webm", "webm"),
968 ]);
969 assert_eq!(detector.item_count(), 3);
970 }
971
972 #[test]
973 fn test_resolution_similarity_near_identical() {
974 let sim = resolution_similarity(Some(1920), Some(1080), Some(1918), Some(1080));
976 let s = sim.expect("should be Some");
977 assert!(s > 0.8, "near-identical resolution should score high: {s}");
978 }
979
980 #[test]
981 fn test_matches_sorted_by_confidence() {
982 let mut detector = CrossFormatDetector::new(CrossFormatConfig {
983 confidence_threshold: 0.3,
984 ..CrossFormatConfig::default()
985 });
986
987 detector.add(
989 FormatInfo::new("a.mp4", "mp4")
990 .with_duration(100.0)
991 .with_resolution(1920, 1080)
992 .with_phash(0xAAAA),
993 );
994 detector.add(
995 FormatInfo::new("a.mkv", "mkv")
996 .with_duration(100.0)
997 .with_resolution(1920, 1080)
998 .with_phash(0xAAAA),
999 );
1000
1001 detector.add(
1003 FormatInfo::new("b.mp4", "mp4")
1004 .with_duration(200.0)
1005 .with_resolution(1280, 720)
1006 .with_phash(0xBBBB),
1007 );
1008 detector.add(
1009 FormatInfo::new("b.webm", "webm")
1010 .with_duration(200.2)
1011 .with_resolution(1280, 720)
1012 .with_phash(0xBBBB),
1013 );
1014
1015 let matches = detector.find_matches();
1016 assert!(matches.len() >= 2);
1017 for i in 1..matches.len() {
1019 assert!(matches[i - 1].confidence >= matches[i].confidence);
1020 }
1021 }
1022
1023 #[test]
1024 fn test_format_info_builders() {
1025 let info = FormatInfo::new("test.mp4", "mp4")
1026 .with_codecs(Some("av1".into()), Some("opus".into()))
1027 .with_audio_info(48000, 6);
1028 assert_eq!(info.video_codec.as_deref(), Some("av1"));
1029 assert_eq!(info.audio_codec.as_deref(), Some("opus"));
1030 assert_eq!(info.sample_rate, Some(48000));
1031 assert_eq!(info.audio_channels, Some(6));
1032 }
1033}