1use crate::{DedupError, DedupResult};
11use std::path::{Path, PathBuf};
12
13#[derive(Debug, Clone, PartialEq)]
15pub struct MediaMetadata {
16 pub path: PathBuf,
18
19 pub size: u64,
21
22 pub title: Option<String>,
25
26 pub duration: Option<f64>,
28
29 pub width: Option<u32>,
31
32 pub height: Option<u32>,
34
35 pub bitrate: Option<u64>,
37
38 pub framerate: Option<f64>,
40
41 pub sample_rate: Option<u32>,
43
44 pub channels: Option<u16>,
46
47 pub video_codec: Option<String>,
49
50 pub audio_codec: Option<String>,
52
53 pub container: Option<String>,
55
56 pub created: Option<i64>,
58
59 pub modified: Option<i64>,
61}
62
63impl MediaMetadata {
64 #[must_use]
66 pub fn new(path: PathBuf, size: u64) -> Self {
67 Self {
68 path,
69 size,
70 title: None,
71 duration: None,
72 width: None,
73 height: None,
74 bitrate: None,
75 framerate: None,
76 sample_rate: None,
77 channels: None,
78 video_codec: None,
79 audio_codec: None,
80 container: None,
81 created: None,
82 modified: None,
83 }
84 }
85
86 #[must_use]
88 pub fn filename(&self) -> String {
89 self.path
90 .file_stem()
91 .and_then(|s| s.to_str())
92 .unwrap_or("")
93 .to_string()
94 }
95
96 #[must_use]
98 pub fn extension(&self) -> String {
99 self.path
100 .extension()
101 .and_then(|s| s.to_str())
102 .unwrap_or("")
103 .to_lowercase()
104 }
105
106 #[must_use]
108 pub fn resolution(&self) -> Option<String> {
109 match (self.width, self.height) {
110 (Some(w), Some(h)) => Some(format!("{w}x{h}")),
111 _ => None,
112 }
113 }
114
115 #[must_use]
117 pub fn is_video(&self) -> bool {
118 self.width.is_some() && self.height.is_some()
119 }
120
121 #[must_use]
123 pub fn is_audio(&self) -> bool {
124 self.sample_rate.is_some() && !self.is_video()
125 }
126
127 #[must_use]
129 pub fn aspect_ratio(&self) -> Option<f64> {
130 match (self.width, self.height) {
131 (Some(w), Some(h)) if h > 0 => Some(f64::from(w) / f64::from(h)),
132 _ => None,
133 }
134 }
135}
136
137#[derive(Debug, Clone)]
139pub struct MetadataSimilarity {
140 pub filename_similarity: f64,
143
144 pub title_fuzzy_score: f64,
148
149 pub duration_match: f64,
151
152 pub resolution_match: f64,
154
155 pub codec_match: f64,
157
158 pub size_similarity: f64,
160
161 pub container_match: f64,
163}
164
165impl MetadataSimilarity {
166 #[must_use]
186 pub fn overall_score(&self) -> f64 {
187 let name_score = self.filename_similarity.max(self.title_fuzzy_score);
188 name_score * 0.30
189 + self.duration_match * 0.20
190 + self.resolution_match * 0.20
191 + self.codec_match * 0.15
192 + self.size_similarity * 0.10
193 + self.container_match * 0.05
194 }
195
196 #[must_use]
198 pub fn is_similar(&self, threshold: f64) -> bool {
199 self.overall_score() >= threshold
200 }
201}
202
203#[must_use]
210pub fn compare_metadata(meta1: &MediaMetadata, meta2: &MediaMetadata) -> MetadataSimilarity {
211 let filename_similarity = compare_filenames(&meta1.filename(), &meta2.filename());
212 let title_fuzzy_score = compare_titles(meta1.title.as_deref(), meta2.title.as_deref());
213 let duration_match = compare_durations(meta1.duration, meta2.duration);
214 let resolution_match = compare_resolutions(meta1, meta2);
215 let codec_match = compare_codecs(meta1, meta2);
216 let size_similarity = compare_sizes(meta1.size, meta2.size);
217 let container_match = compare_containers(&meta1.container, &meta2.container);
218
219 MetadataSimilarity {
220 filename_similarity,
221 title_fuzzy_score,
222 duration_match,
223 resolution_match,
224 codec_match,
225 size_similarity,
226 container_match,
227 }
228}
229
230#[must_use]
235pub fn compare_titles(title1: Option<&str>, title2: Option<&str>) -> f64 {
236 match (title1, title2) {
237 (Some(t1), Some(t2)) => {
238 if t1.eq_ignore_ascii_case(t2) {
239 return 1.0;
240 }
241 let matcher = crate::fuzzy_match::FilenameMatcher::new(0.0);
244 matcher.similarity(t1, t2).value()
245 }
246 _ => 0.0,
248 }
249}
250
251#[must_use]
270pub fn compare_filenames(name1: &str, name2: &str) -> f64 {
271 let norm1 = normalize_filename(name1);
272 let norm2 = normalize_filename(name2);
273
274 if norm1 == norm2 {
275 return 1.0;
276 }
277
278 if norm1.is_empty() || norm2.is_empty() {
279 return 0.0;
280 }
281
282 let raw_score = raw_filename_similarity(&norm1, &norm2);
284
285 let matcher = crate::fuzzy_match::FilenameMatcher::new(0.0); let media_aware_score = matcher.similarity(name1, name2).value();
289
290 raw_score.max(media_aware_score)
293}
294
295fn raw_filename_similarity(norm1: &str, norm2: &str) -> f64 {
297 let distance = levenshtein_distance(norm1, norm2);
299 let max_len = norm1.len().max(norm2.len());
300 let edit_score = 1.0 - (distance as f64 / max_len as f64);
301
302 let tokens1 = tokenize_filename(norm1);
304 let tokens2 = tokenize_filename(norm2);
305 let token_score = if tokens1.is_empty() && tokens2.is_empty() {
306 1.0
307 } else {
308 let intersection = tokens1.intersection(&tokens2).count();
309 let union = tokens1.union(&tokens2).count();
310 if union == 0 {
311 0.0
312 } else {
313 intersection as f64 / union as f64
314 }
315 };
316
317 let bigrams1 = char_bigrams(norm1);
319 let bigrams2 = char_bigrams(norm2);
320 let bigram_score = if bigrams1.is_empty() && bigrams2.is_empty() {
321 1.0
322 } else {
323 let mut overlap = 0usize;
324 for (bg, count_a) in &bigrams1 {
325 if let Some(count_b) = bigrams2.get(bg) {
326 overlap += (*count_a).min(*count_b);
327 }
328 }
329 let total_a: usize = bigrams1.values().sum();
330 let total_b: usize = bigrams2.values().sum();
331 let denom = total_a + total_b;
332 if denom == 0 {
333 0.0
334 } else {
335 2.0 * overlap as f64 / denom as f64
336 }
337 };
338
339 edit_score * 0.40 + token_score * 0.35 + bigram_score * 0.25
341}
342
343fn normalize_filename(name: &str) -> String {
345 name.to_lowercase()
346 .chars()
347 .map(|c| {
348 if c.is_alphanumeric() {
349 c
350 } else {
351 ' '
353 }
354 })
355 .collect::<String>()
356 .split_whitespace()
357 .collect::<Vec<_>>()
358 .join(" ")
359}
360
361fn tokenize_filename(name: &str) -> std::collections::HashSet<String> {
363 name.split_whitespace().map(|s| s.to_string()).collect()
364}
365
366fn char_bigrams(s: &str) -> std::collections::HashMap<(char, char), usize> {
368 let chars: Vec<char> = s.chars().collect();
369 let mut map = std::collections::HashMap::new();
370 if chars.len() >= 2 {
371 for pair in chars.windows(2) {
372 *map.entry((pair[0], pair[1])).or_insert(0) += 1;
373 }
374 }
375 map
376}
377
378fn levenshtein_distance(s1: &str, s2: &str) -> usize {
380 let len1 = s1.chars().count();
381 let len2 = s2.chars().count();
382
383 if len1 == 0 {
384 return len2;
385 }
386 if len2 == 0 {
387 return len1;
388 }
389
390 let mut matrix = vec![vec![0usize; len2 + 1]; len1 + 1];
391
392 for i in 0..=len1 {
394 matrix[i][0] = i;
395 }
396 for j in 0..=len2 {
397 matrix[0][j] = j;
398 }
399
400 let s1_chars: Vec<char> = s1.chars().collect();
402 let s2_chars: Vec<char> = s2.chars().collect();
403
404 for i in 1..=len1 {
405 for j in 1..=len2 {
406 let cost = if s1_chars[i - 1] == s2_chars[j - 1] {
407 0
408 } else {
409 1
410 };
411
412 matrix[i][j] = (matrix[i - 1][j] + 1) .min(matrix[i][j - 1] + 1) .min(matrix[i - 1][j - 1] + cost); }
416 }
417
418 matrix[len1][len2]
419}
420
421#[must_use]
423pub fn compare_durations(dur1: Option<f64>, dur2: Option<f64>) -> f64 {
424 match (dur1, dur2) {
425 (Some(d1), Some(d2)) => {
426 if d1 == 0.0 && d2 == 0.0 {
427 return 1.0;
428 }
429
430 let max_dur = d1.max(d2);
431 if max_dur == 0.0 {
432 return 1.0;
433 }
434
435 let diff = (d1 - d2).abs();
436 let tolerance = max_dur * 0.02; if diff <= tolerance {
439 1.0
440 } else {
441 (1.0 - diff / max_dur).max(0.0)
442 }
443 }
444 (None, None) => 0.5, _ => 0.0, }
447}
448
449#[must_use]
451pub fn compare_resolutions(meta1: &MediaMetadata, meta2: &MediaMetadata) -> f64 {
452 match ((meta1.width, meta1.height), (meta2.width, meta2.height)) {
453 ((Some(w1), Some(h1)), (Some(w2), Some(h2))) => {
454 if w1 == w2 && h1 == h2 {
455 1.0
456 } else {
457 let ar1 = f64::from(w1) / f64::from(h1);
459 let ar2 = f64::from(w2) / f64::from(h2);
460
461 let ar_diff = (ar1 - ar2).abs();
462 if ar_diff < 0.01 {
463 0.5
465 } else {
466 0.0
467 }
468 }
469 }
470 ((None, None), (None, None)) => 0.5, _ => 0.0, }
473}
474
475#[must_use]
477pub fn compare_codecs(meta1: &MediaMetadata, meta2: &MediaMetadata) -> f64 {
478 let video_match = compare_strings(&meta1.video_codec, &meta2.video_codec);
479 let audio_match = compare_strings(&meta1.audio_codec, &meta2.audio_codec);
480
481 (video_match + audio_match) / 2.0
483}
484
485fn compare_strings(s1: &Option<String>, s2: &Option<String>) -> f64 {
487 match (s1, s2) {
488 (Some(a), Some(b)) => {
489 if a.eq_ignore_ascii_case(b) {
490 1.0
491 } else {
492 0.0
493 }
494 }
495 (None, None) => 0.5, _ => 0.0, }
498}
499
500#[must_use]
502pub fn compare_sizes(size1: u64, size2: u64) -> f64 {
503 if size1 == 0 && size2 == 0 {
504 return 1.0;
505 }
506
507 let max_size = size1.max(size2);
508 if max_size == 0 {
509 return 1.0;
510 }
511
512 let diff = (size1 as i64 - size2 as i64).unsigned_abs();
513 let tolerance = (max_size as f64 * 0.05) as u64; if diff <= tolerance {
516 1.0
517 } else {
518 (1.0 - diff as f64 / max_size as f64).max(0.0)
519 }
520}
521
522#[must_use]
524pub fn compare_containers(cont1: &Option<String>, cont2: &Option<String>) -> f64 {
525 compare_strings(cont1, cont2)
526}
527
528pub fn extract_metadata(path: impl AsRef<Path>) -> DedupResult<MediaMetadata> {
534 let path = path.as_ref();
535
536 if !path.exists() {
537 return Err(DedupError::FileNotFound(path.to_path_buf()));
538 }
539
540 let file_metadata = std::fs::metadata(path)?;
541 let size = file_metadata.len();
542
543 let mut metadata = MediaMetadata::new(path.to_path_buf(), size);
544
545 if let Ok(created) = file_metadata.created() {
547 if let Ok(duration) = created.duration_since(std::time::UNIX_EPOCH) {
548 metadata.created = Some(duration.as_secs() as i64);
549 }
550 }
551
552 if let Ok(modified) = file_metadata.modified() {
553 if let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH) {
554 metadata.modified = Some(duration.as_secs() as i64);
555 }
556 }
557
558 let ext = path
560 .extension()
561 .and_then(|s| s.to_str())
562 .unwrap_or("")
563 .to_lowercase();
564
565 metadata.container = Some(ext);
566
567 detect_format_from_magic(path, &mut metadata);
569
570 Ok(metadata)
571}
572
573fn detect_format_from_magic(path: &Path, metadata: &mut MediaMetadata) {
575 use std::io::Read;
576
577 let mut file = match std::fs::File::open(path) {
578 Ok(f) => f,
579 Err(_) => return,
580 };
581
582 let mut buf = [0u8; 64];
583 let n = match file.read(&mut buf) {
584 Ok(n) => n,
585 Err(_) => return,
586 };
587
588 if n < 4 {
589 return;
590 }
591
592 let bytes = &buf[..n];
593
594 if bytes.starts_with(&[0x1A, 0x45, 0xDF, 0xA3]) {
596 let search_range = &bytes[4..n.min(32)];
598 let is_webm = search_range.windows(4).any(|w| w == b"webm");
599 if is_webm {
600 metadata.container = Some("webm".to_string());
601 } else {
602 metadata.container = Some("mkv".to_string());
603 }
604 metadata.video_codec = Some("vp9".to_string());
605 metadata.audio_codec = Some("opus".to_string());
606 return;
607 }
608
609 if n >= 12 && &bytes[4..8] == b"ftyp" {
611 let brand = &bytes[8..12];
612 if brand == b"qt " {
613 metadata.container = Some("mov".to_string());
614 } else if brand == b"M4A " {
615 metadata.container = Some("m4a".to_string());
616 } else if brand == b"M4V " {
617 metadata.container = Some("m4v".to_string());
618 } else {
619 metadata.container = Some("mp4".to_string());
620 }
621 metadata.video_codec = Some("h264".to_string());
622 metadata.audio_codec = Some("aac".to_string());
623 return;
624 }
625
626 if n >= 12 && bytes.starts_with(b"RIFF") && &bytes[8..12] == b"WAVE" {
628 metadata.container = Some("wav".to_string());
629 metadata.audio_codec = Some("pcm".to_string());
630 if n >= 28 {
633 let channels = u16::from_le_bytes([bytes[22], bytes[23]]);
634 let sample_rate = u32::from_le_bytes([bytes[24], bytes[25], bytes[26], bytes[27]]);
635 if channels > 0 {
636 metadata.channels = Some(channels);
637 }
638 if sample_rate > 0 {
639 metadata.sample_rate = Some(sample_rate);
640 }
641 }
642 return;
643 }
644
645 if bytes.starts_with(b"fLaC") {
647 metadata.container = Some("flac".to_string());
648 metadata.audio_codec = Some("flac".to_string());
649 if n >= 29 {
659 let b0 = bytes[26] as u32;
661 let b1 = bytes[27] as u32;
662 let b2 = bytes[28] as u32;
663 let sample_rate = (b0 << 12) | (b1 << 4) | (b2 >> 4);
664 if sample_rate > 0 {
665 metadata.sample_rate = Some(sample_rate);
666 }
667 let channels = ((b2 >> 1) & 0x07) + 1;
669 metadata.channels = Some(channels as u16);
670 }
671 return;
672 }
673
674 if bytes.starts_with(b"OggS") {
676 metadata.container = Some("ogg".to_string());
677 let page_data = if n > 28 { &bytes[28..] } else { &bytes[4..] };
680 if page_data.windows(8).any(|w| w == b"OpusHead") {
681 metadata.audio_codec = Some("opus".to_string());
682 } else if page_data
683 .windows(7)
684 .any(|w| w == b"\x01vorbis" || w == b"\x03vorbis")
685 {
686 metadata.audio_codec = Some("vorbis".to_string());
687 } else if page_data.windows(6).any(|w| w == b"vorbis") {
688 metadata.audio_codec = Some("vorbis".to_string());
689 } else {
690 metadata.audio_codec = Some("vorbis".to_string());
691 }
692 return;
693 }
694
695 if n >= 1 && bytes[0] == 0x47 {
697 let is_ts = (n >= 189 && bytes[188] == 0x47) || (n >= 1 && bytes[0] == 0x47 && n < 189);
699 if is_ts {
700 metadata.container = Some("ts".to_string());
701 metadata.video_codec = Some("h264".to_string());
702 metadata.audio_codec = Some("aac".to_string());
703 return;
704 }
705 }
706
707 if bytes.starts_with(b"ID3") {
709 metadata.container = Some("mp3".to_string());
710 metadata.audio_codec = Some("mp3".to_string());
711 return;
712 }
713
714 if n >= 2 && bytes[0] == 0xFF && bytes[1] >= 0xE0 {
716 metadata.container = Some("mp3".to_string());
717 metadata.audio_codec = Some("mp3".to_string());
718 }
719}
720
721#[must_use]
723pub fn find_metadata_duplicates(
724 metadata_list: &[MediaMetadata],
725 threshold: f64,
726) -> Vec<Vec<usize>> {
727 let mut groups = Vec::new();
728 let mut processed = vec![false; metadata_list.len()];
729
730 for i in 0..metadata_list.len() {
731 if processed[i] {
732 continue;
733 }
734
735 let mut group = vec![i];
736
737 for j in (i + 1)..metadata_list.len() {
738 if processed[j] {
739 continue;
740 }
741
742 let similarity = compare_metadata(&metadata_list[i], &metadata_list[j]);
743
744 if similarity.is_similar(threshold) {
745 group.push(j);
746 processed[j] = true;
747 }
748 }
749
750 if group.len() > 1 {
751 groups.push(group);
752 }
753
754 processed[i] = true;
755 }
756
757 groups
758}
759
760#[must_use]
762pub fn fuzzy_search(query: &str, candidates: &[String], threshold: f64) -> Vec<(usize, f64)> {
763 let mut results = Vec::new();
764
765 for (i, candidate) in candidates.iter().enumerate() {
766 let similarity = compare_filenames(query, candidate);
767
768 if similarity >= threshold {
769 results.push((i, similarity));
770 }
771 }
772
773 results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
775
776 results
777}
778
779#[must_use]
781pub fn metadata_quality(metadata: &MediaMetadata) -> f64 {
782 let mut score = 0.0;
783 let mut total = 0.0;
784
785 total += 1.0;
787 if metadata.duration.is_some() {
788 score += 1.0;
789 }
790
791 total += 1.0;
792 if metadata.width.is_some() && metadata.height.is_some() {
793 score += 1.0;
794 }
795
796 total += 1.0;
797 if metadata.bitrate.is_some() {
798 score += 1.0;
799 }
800
801 total += 1.0;
802 if metadata.framerate.is_some() || metadata.sample_rate.is_some() {
803 score += 1.0;
804 }
805
806 total += 1.0;
807 if metadata.video_codec.is_some() || metadata.audio_codec.is_some() {
808 score += 1.0;
809 }
810
811 total += 1.0;
812 if metadata.container.is_some() {
813 score += 1.0;
814 }
815
816 score / total
817}
818
819#[cfg(test)]
820mod tests {
821 use super::*;
822
823 fn create_test_metadata(name: &str, duration: f64, width: u32, height: u32) -> MediaMetadata {
824 let mut meta = MediaMetadata::new(PathBuf::from(name), 1000000);
825 meta.duration = Some(duration);
826 meta.width = Some(width);
827 meta.height = Some(height);
828 meta
829 }
830
831 #[test]
832 fn test_metadata_creation() {
833 let meta = MediaMetadata::new(PathBuf::from("test.mp4"), 1000);
834 assert_eq!(meta.size, 1000);
835 assert_eq!(meta.extension(), "mp4");
836 }
837
838 #[test]
839 fn test_filename_extraction() {
840 let meta = MediaMetadata::new(PathBuf::from("/path/to/video.mp4"), 1000);
841 assert_eq!(meta.filename(), "video");
842 assert_eq!(meta.extension(), "mp4");
843 }
844
845 #[test]
846 fn test_resolution() {
847 let mut meta = MediaMetadata::new(PathBuf::from("test.mp4"), 1000);
848 meta.width = Some(1920);
849 meta.height = Some(1080);
850
851 assert_eq!(meta.resolution(), Some("1920x1080".to_string()));
852 assert!(meta.is_video());
853 }
854
855 #[test]
856 fn test_aspect_ratio() {
857 let mut meta = MediaMetadata::new(PathBuf::from("test.mp4"), 1000);
858 meta.width = Some(1920);
859 meta.height = Some(1080);
860
861 let ar = meta.aspect_ratio().expect("operation should succeed");
862 assert!((ar - 16.0 / 9.0).abs() < 0.01);
863 }
864
865 #[test]
866 fn test_filename_comparison() {
867 assert_eq!(compare_filenames("video", "video"), 1.0);
868 assert!(compare_filenames("video1", "video2") > 0.5);
869 assert!(compare_filenames("test", "completely_different") < 0.5);
870
871 assert_eq!(compare_filenames("VIDEO", "video"), 1.0);
873
874 assert_eq!(compare_filenames("my-video", "my_video"), 1.0);
876 }
877
878 #[test]
879 fn test_levenshtein_distance() {
880 assert_eq!(levenshtein_distance("", ""), 0);
881 assert_eq!(levenshtein_distance("abc", "abc"), 0);
882 assert_eq!(levenshtein_distance("abc", "ab"), 1);
883 assert_eq!(levenshtein_distance("abc", "def"), 3);
884 assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
885 }
886
887 #[test]
888 fn test_duration_comparison() {
889 assert_eq!(compare_durations(Some(100.0), Some(100.0)), 1.0);
890 assert!(compare_durations(Some(100.0), Some(101.0)) > 0.9); assert!(compare_durations(Some(100.0), Some(200.0)) < 0.9);
892 assert_eq!(compare_durations(None, None), 0.5);
893 assert_eq!(compare_durations(Some(100.0), None), 0.0);
894 }
895
896 #[test]
897 fn test_resolution_comparison() {
898 let meta1 = create_test_metadata("video1.mp4", 100.0, 1920, 1080);
899 let meta2 = create_test_metadata("video2.mp4", 100.0, 1920, 1080);
900 let meta3 = create_test_metadata("video3.mp4", 100.0, 1280, 720);
901 let meta4 = create_test_metadata("video4.mp4", 100.0, 3840, 2160);
902
903 assert_eq!(compare_resolutions(&meta1, &meta2), 1.0); assert_eq!(compare_resolutions(&meta1, &meta4), 0.5); assert_eq!(compare_resolutions(&meta1, &meta3), 0.5); }
907
908 #[test]
909 fn test_size_comparison() {
910 assert_eq!(compare_sizes(1000, 1000), 1.0);
911 assert!(compare_sizes(1000, 1040) > 0.9); assert!(compare_sizes(1000, 2000) < 0.9);
913 }
914
915 #[test]
916 fn test_codec_comparison() {
917 let mut meta1 = create_test_metadata("video1.mp4", 100.0, 1920, 1080);
918 meta1.video_codec = Some("av1".to_string());
919 meta1.audio_codec = Some("opus".to_string());
920
921 let mut meta2 = create_test_metadata("video2.mp4", 100.0, 1920, 1080);
922 meta2.video_codec = Some("av1".to_string());
923 meta2.audio_codec = Some("opus".to_string());
924
925 let mut meta3 = create_test_metadata("video3.mp4", 100.0, 1920, 1080);
926 meta3.video_codec = Some("vp9".to_string());
927 meta3.audio_codec = Some("opus".to_string());
928
929 let match12 = compare_codecs(&meta1, &meta2);
930 let match13 = compare_codecs(&meta1, &meta3);
931
932 assert_eq!(match12, 1.0); assert_eq!(match13, 0.5); }
935
936 #[test]
937 fn test_metadata_similarity() {
938 let meta1 = create_test_metadata("video_clip.mp4", 100.0, 1920, 1080);
939 let meta2 = create_test_metadata("video_clip_copy.mp4", 100.0, 1920, 1080);
940
941 let similarity = compare_metadata(&meta1, &meta2);
942
943 assert!(similarity.filename_similarity > 0.6);
944 assert_eq!(similarity.duration_match, 1.0);
945 assert_eq!(similarity.resolution_match, 1.0);
946 assert!(similarity.is_similar(0.8));
947 }
948
949 #[test]
950 fn test_fuzzy_search() {
951 let candidates = vec![
952 "video_clip.mp4".to_string(),
953 "audio_track.mp3".to_string(),
954 "video_clip_2.mp4".to_string(),
955 "completely_different.mov".to_string(),
956 ];
957
958 let results = fuzzy_search("video clip", &candidates, 0.5);
959
960 assert!(!results.is_empty());
961 assert!(results[0].1 > 0.5); }
963
964 #[test]
965 fn test_metadata_quality() {
966 let mut meta = MediaMetadata::new(PathBuf::from("test.mp4"), 1000);
967 assert!(metadata_quality(&meta) < 0.2); meta.duration = Some(100.0);
970 meta.width = Some(1920);
971 meta.height = Some(1080);
972 meta.bitrate = Some(5000000);
973 meta.framerate = Some(30.0);
974 meta.video_codec = Some("av1".to_string());
975 meta.container = Some("mp4".to_string());
976
977 assert!(metadata_quality(&meta) > 0.9); }
979
980 #[test]
981 fn test_find_metadata_duplicates() {
982 let metadata_list = vec![
983 create_test_metadata("video1.mp4", 100.0, 1920, 1080),
984 create_test_metadata("video1_copy.mp4", 100.0, 1920, 1080),
985 create_test_metadata("video2.mp4", 200.0, 1280, 720),
986 create_test_metadata("video1_copy2.mp4", 100.0, 1920, 1080),
987 ];
988
989 let groups = find_metadata_duplicates(&metadata_list, 0.8);
990
991 assert_eq!(groups.len(), 1); assert!(groups[0].len() >= 2); }
994
995 #[test]
998 fn test_filename_comparison_strips_codec_tags() {
999 let score = compare_filenames(
1001 "The.Movie.2024.1080p.x264.mkv",
1002 "The.Movie.2024.720p.x265.mp4",
1003 );
1004 assert!(
1005 score > 0.8,
1006 "Same movie with different codecs/resolutions should score > 0.8, got {score}"
1007 );
1008 }
1009
1010 #[test]
1011 fn test_filename_comparison_strips_release_markers() {
1012 let score = compare_filenames(
1013 "Movie.Title.2024.BluRay.REMUX.mkv",
1014 "Movie.Title.2024.WEB-DL.mp4",
1015 );
1016 assert!(
1017 score > 0.8,
1018 "Same movie with different release types should score > 0.8, got {score}"
1019 );
1020 }
1021
1022 #[test]
1023 fn test_filename_comparison_different_content() {
1024 let score = compare_filenames("Inception.2010.1080p.mkv", "Interstellar.2014.720p.mp4");
1026 assert!(
1027 score < 0.8,
1028 "Different movies should score < 0.8, got {score}"
1029 );
1030 }
1031
1032 #[test]
1033 fn test_filename_comparison_uhd_vs_hd() {
1034 let score = compare_filenames(
1035 "Documentary.2024.2160p.HDR.mkv",
1036 "Documentary.2024.1080p.SDR.mp4",
1037 );
1038 assert!(
1039 score > 0.7,
1040 "Same content at different quality tiers should be similar, got {score}"
1041 );
1042 }
1043
1044 #[test]
1045 fn test_filename_comparison_audio_codecs_stripped() {
1046 let score = compare_filenames("Concert.2024.FLAC.mkv", "Concert.2024.AAC.mp4");
1047 assert!(
1048 score > 0.8,
1049 "Same content with different audio codecs should match, got {score}"
1050 );
1051 }
1052
1053 #[test]
1054 fn test_metadata_comparison_uses_fuzzy_filename() {
1055 let mut meta1 = create_test_metadata("Movie.2024.1080p.x264.BluRay.mkv", 100.0, 1920, 1080);
1056 meta1.video_codec = Some("h264".to_string());
1057 meta1.audio_codec = Some("aac".to_string());
1058
1059 let mut meta2 = create_test_metadata("Movie.2024.720p.x265.WEB-DL.mp4", 100.0, 1280, 720);
1060 meta2.video_codec = Some("h265".to_string());
1061 meta2.audio_codec = Some("opus".to_string());
1062
1063 let sim = compare_metadata(&meta1, &meta2);
1064 assert!(
1066 sim.filename_similarity > 0.7,
1067 "Media-aware filename comparison should score > 0.7, got {}",
1068 sim.filename_similarity
1069 );
1070 }
1071
1072 #[test]
1075 fn test_metadata_fuzzy_similar_titles() {
1076 let mut meta1 = create_test_metadata("video_a.mp4", 100.0, 1920, 1080);
1077 meta1.title = Some("The Great Documentary 2024".to_string());
1078
1079 let mut meta2 = create_test_metadata("video_b.mp4", 100.0, 1920, 1080);
1080 meta2.title = Some("the great documentary 2024".to_string());
1081
1082 let sim = compare_metadata(&meta1, &meta2);
1083 assert!(
1085 sim.title_fuzzy_score >= 0.9,
1086 "Near-identical titles (case difference) should score >= 0.9, got {}",
1087 sim.title_fuzzy_score
1088 );
1089 assert!(
1091 sim.overall_score() >= 0.5,
1092 "Overall score should be raised by the high title signal, got {}",
1093 sim.overall_score()
1094 );
1095 }
1096
1097 #[test]
1098 fn test_metadata_fuzzy_different_titles() {
1099 let mut meta1 = create_test_metadata("video_a.mp4", 100.0, 1920, 1080);
1100 meta1.title = Some("Inception".to_string());
1101
1102 let mut meta2 = create_test_metadata("video_b.mp4", 100.0, 1920, 1080);
1103 meta2.title = Some("Interstellar".to_string());
1104
1105 let sim = compare_metadata(&meta1, &meta2);
1106 assert!(
1107 sim.title_fuzzy_score < 0.7,
1108 "Clearly different titles should score < 0.7, got {}",
1109 sim.title_fuzzy_score
1110 );
1111 }
1112
1113 #[test]
1114 fn test_metadata_no_title_title_score_is_zero() {
1115 let meta1 = create_test_metadata("video_a.mp4", 100.0, 1920, 1080);
1116 let meta2 = create_test_metadata("video_b.mp4", 100.0, 1920, 1080);
1117 let sim = compare_metadata(&meta1, &meta2);
1118 assert_eq!(
1119 sim.title_fuzzy_score, 0.0,
1120 "Missing title tags should produce title_fuzzy_score of 0.0"
1121 );
1122 }
1123
1124 #[test]
1125 fn test_compare_titles_identical_case_insensitive() {
1126 assert_eq!(
1127 compare_titles(Some("Hello World"), Some("hello world")),
1128 1.0
1129 );
1130 }
1131
1132 #[test]
1133 fn test_compare_titles_none_returns_zero() {
1134 assert_eq!(compare_titles(None, Some("title")), 0.0);
1135 assert_eq!(compare_titles(Some("title"), None), 0.0);
1136 assert_eq!(compare_titles(None, None), 0.0);
1137 }
1138
1139 #[test]
1140 fn test_compare_titles_similar_strips_noise() {
1141 let score = compare_titles(Some("The Movie 1080p"), Some("The Movie 720p"));
1143 assert!(
1144 score > 0.7,
1145 "Title comparison should strip resolution noise, got {score}"
1146 );
1147 }
1148}