1extern crate chromaprint_rust;
2#[cfg(feature = "rayon")]
3extern crate rayon;
4
5use std::collections::{BTreeMap, BinaryHeap, HashMap, HashSet};
6use std::fmt::Display;
7use std::path::{Path, PathBuf};
8use std::time::Duration;
9
10use chromaprint_rust as chromaprint;
11#[cfg(feature = "rayon")]
12use rayon::prelude::*;
13
14use crate::util;
15use crate::Result;
16
17use super::{Analyzer, FrameHashes};
18
19#[derive(serde::Deserialize, serde::Serialize)]
20struct SkipFile {
21 pub opening: Option<(f32, f32)>,
22 pub ending: Option<(f32, f32)>,
23 pub md5: String,
24}
25
26type ComparatorHeap = BinaryHeap<ComparatorHeapEntry>;
27
28#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
29struct ComparatorHeapEntry {
30 score: usize,
31 src_longest_run: (Duration, Duration),
32 dst_longest_run: (Duration, Duration),
33 src_match_hash: u32,
34 dst_match_hash: u32,
35 is_src_opening: bool,
36 is_src_ending: bool,
37 is_dst_opening: bool,
38 is_dst_ending: bool,
39 src_hash_duration: Duration,
40 dst_hash_duration: Duration,
41}
42
43impl Display for ComparatorHeapEntry {
44 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45 write!(
46 f,
47 "score: {}, src_longest_run: {:?}, dst_longest_run: {:?}, src_match_hash: {}, dst_match_hash: {}",
48 self.score, self.src_longest_run, self.dst_longest_run, self.src_match_hash, self.dst_match_hash,
49 )
50 }
51}
52
53#[derive(Debug)]
54struct OpeningAndEndingInfo {
55 src_openings: Vec<ComparatorHeapEntry>,
56 dst_openings: Vec<ComparatorHeapEntry>,
57 src_endings: Vec<ComparatorHeapEntry>,
58 dst_endings: Vec<ComparatorHeapEntry>,
59}
60
61impl OpeningAndEndingInfo {
62 pub fn is_empty(&self) -> bool {
63 self.src_openings.is_empty()
64 && self.dst_openings.is_empty()
65 && self.src_endings.is_empty()
66 && self.dst_endings.is_empty()
67 }
68}
69
70#[derive(Copy, Clone, Debug, Default)]
72pub struct SearchResult {
73 opening: Option<(Duration, Duration)>,
74 ending: Option<(Duration, Duration)>,
75}
76
77#[derive(Debug)]
80pub struct Comparator<P: AsRef<Path>> {
81 videos: Vec<P>,
82 openings_only: bool,
83 hash_match_threshold: u32,
84 opening_search_percentage: f32,
85 ending_search_percentage: f32,
86 min_opening_duration: Duration,
87 min_ending_duration: Duration,
88 time_padding: Duration,
89}
90
91impl<P: AsRef<Path>> Default for Comparator<P> {
92 fn default() -> Self {
93 Self {
94 videos: Vec::new(),
95 openings_only: false,
96 hash_match_threshold: super::DEFAULT_HASH_MATCH_THRESHOLD as u32,
97 opening_search_percentage: super::DEFAULT_OPENING_SEARCH_PERCENTAGE,
98 ending_search_percentage: super::DEFAULT_ENDING_SEARCH_PERCENTAGE,
99 min_opening_duration: Duration::from_secs(super::DEFAULT_MIN_OPENING_DURATION as u64),
100 min_ending_duration: Duration::from_secs(super::DEFAULT_MIN_ENDING_DURATION as u64),
101 time_padding: Duration::ZERO,
102 }
103 }
104}
105
106impl<P: AsRef<Path>> From<Analyzer<P>> for Comparator<P> {
107 fn from(analyzer: Analyzer<P>) -> Self {
110 let mut comparator = Self::default();
111 comparator.videos = analyzer.videos;
113 comparator
114 }
115}
116
117impl<P: AsRef<Path> + Ord> Comparator<P> {
118 pub fn from_files(videos: impl Into<Vec<P>>) -> Self {
120 let mut comparator = Self::default();
121 comparator.videos = videos.into();
122 comparator.videos.sort();
123 comparator
124 }
125}
126
127impl<P: AsRef<Path>> Comparator<P> {
128 pub fn videos(&self) -> &[P] {
130 &self.videos
131 }
132
133 pub fn with_openings_only(mut self, openings_only: bool) -> Self {
135 self.openings_only = openings_only;
136 self
137 }
138
139 pub fn with_hash_match_threshold(mut self, hash_match_threshold: u32) -> Self {
141 self.hash_match_threshold = hash_match_threshold;
142 self
143 }
144
145 pub fn with_opening_search_percentage(mut self, opening_search_percentage: f32) -> Self {
147 self.opening_search_percentage = opening_search_percentage;
148 self
149 }
150
151 pub fn with_ending_search_percentage(mut self, ending_search_percentage: f32) -> Self {
153 self.ending_search_percentage = ending_search_percentage;
154 self
155 }
156
157 pub fn with_min_opening_duration(mut self, min_opening_duration: Duration) -> Self {
159 self.min_opening_duration = min_opening_duration;
160 self
161 }
162
163 pub fn with_min_ending_duration(mut self, min_ending_duration: Duration) -> Self {
165 self.min_ending_duration = min_ending_duration;
166 self
167 }
168
169 pub fn with_time_padding(mut self, time_padding: Duration) -> Self {
171 self.time_padding = time_padding;
172 self
173 }
174
175 #[inline]
176 fn compute_hash_for_match(hashes: &[(u32, Duration)], (start, end): (usize, usize)) -> u32 {
177 let hashes: Vec<u32> = hashes.iter().map(|t| t.0).collect();
178 chromaprint::simhash::simhash32(&hashes[start..end + 1])
179 }
180
181 fn longest_common_hash_match(
184 &self,
185 src: &[(u32, Duration)],
186 dst: &[(u32, Duration)],
187 src_max_opening_time: Duration,
188 src_min_ending_time: Duration,
189 dst_max_opening_time: Duration,
190 dst_min_ending_time: Duration,
191 src_hash_duration: Duration,
192 dst_hash_duration: Duration,
193 ) -> Vec<ComparatorHeapEntry> {
194 let mut heap: ComparatorHeap = BinaryHeap::new();
196
197 let mut table: Vec<Vec<usize>> = vec![vec![0; dst.len() + 1]; src.len() + 1];
199 for i in 0..src.len() {
200 for j in 0..dst.len() {
201 let (src_hash, dst_hash) = (src[i].0, dst[j].0);
202 if i == 0 || j == 0 {
203 table[i][j] = 0;
204 } else if u32::count_ones(src_hash ^ dst_hash) <= self.hash_match_threshold {
205 table[i][j] = table[i - 1][j - 1] + 1;
206 } else {
207 table[i][j] = 0;
208 }
209 }
210 }
211
212 let mut i = src.len() - 1;
215 while i > 0 {
216 let mut j = dst.len() - 1;
217 while j > 0 {
218 if table[i][j] == 0
222 || (i < src.len() - 1 && j < dst.len() - 1 && table[i + 1][j + 1] != 0)
223 {
224 j -= 1;
225 continue;
226 }
227
228 let (src_start_idx, src_end_idx) = (i - table[i][j], i);
233 let (dst_start_idx, dst_end_idx) = (j - table[i][j], j);
234 let (src_start, src_end) = (src[src_start_idx].1, src[src_end_idx].1);
235 let (dst_start, dst_end) = (dst[dst_start_idx].1, dst[dst_end_idx].1);
236 let (is_src_opening, is_src_ending) = (
237 src_end < src_max_opening_time,
238 src_start > src_min_ending_time,
239 );
240 let (is_dst_opening, is_dst_ending) = (
241 dst_end < dst_max_opening_time,
242 dst_start > dst_min_ending_time,
243 );
244
245 let is_src_valid = (is_src_opening
247 && (src_end - src_start) >= self.min_opening_duration)
248 || (is_src_ending && (src_end - src_start) >= self.min_ending_duration);
249 let is_dst_valid = (is_dst_opening
250 && (dst_end - dst_start) >= self.min_opening_duration)
251 || (is_dst_ending && (dst_end - dst_start) >= self.min_ending_duration);
252 let is_valid = is_src_valid && is_dst_valid;
253
254 if !is_valid {
256 j -= 1;
257 continue;
258 }
259
260 let is_ending = (is_src_ending
262 && (src_end - src_start) >= self.min_ending_duration)
263 || (is_dst_ending && (dst_end - dst_start) >= self.min_ending_duration);
264 if is_ending && self.openings_only {
265 j -= 1;
266 continue;
267 }
268
269 let src_match_hash =
271 Self::compute_hash_for_match(src, (src_start_idx, src_end_idx));
272 let dst_match_hash =
273 Self::compute_hash_for_match(dst, (dst_start_idx, dst_end_idx));
274
275 let entry = ComparatorHeapEntry {
276 score: table[i][j],
277 src_longest_run: (src_start, src_end),
278 dst_longest_run: (dst_start, dst_end),
279 src_match_hash,
280 dst_match_hash,
281 is_src_opening,
282 is_src_ending,
283 is_dst_opening,
284 is_dst_ending,
285 src_hash_duration,
286 dst_hash_duration,
287 };
288
289 heap.push(entry);
290
291 j -= 1;
292 }
293
294 i -= 1;
295 }
296
297 heap.into()
298 }
299
300 fn find_opening_and_ending(
301 &self,
302 src_hashes: &super::analyzer::FrameHashes,
303 dst_hashes: &super::analyzer::FrameHashes,
304 ) -> OpeningAndEndingInfo {
305 let _g = tracing::span!(tracing::Level::TRACE, "find_opening_and_ending");
306
307 let src_hash_data = &src_hashes.data;
308 let dst_hash_data = &dst_hashes.data;
309 let src_hash_duration = Duration::from_secs_f32(src_hashes.hash_duration);
310 let dst_hash_duration = Duration::from_secs_f32(dst_hashes.hash_duration);
311
312 let src_opening_search_idx =
314 ((src_hash_data.len() - 1) as f32 * self.opening_search_percentage) as usize;
315 let src_ending_search_idx =
316 ((src_hash_data.len() - 1) as f32 * (1.0 - self.ending_search_percentage)) as usize;
317 let dst_opening_search_idx =
318 ((dst_hash_data.len() - 1) as f32 * self.opening_search_percentage) as usize;
319 let dst_ending_search_idx =
320 ((dst_hash_data.len() - 1) as f32 * (1.0 - self.ending_search_percentage)) as usize;
321 let src_max_opening_time = src_hash_data[src_opening_search_idx].1;
322 let src_min_ending_time = src_hash_data[src_ending_search_idx].1;
323 let dst_max_opening_time = dst_hash_data[dst_opening_search_idx].1;
324 let dst_min_ending_time = dst_hash_data[dst_ending_search_idx].1;
325
326 let entries = self.longest_common_hash_match(
327 src_hash_data,
328 dst_hash_data,
329 src_max_opening_time,
330 src_min_ending_time,
331 dst_max_opening_time,
332 dst_min_ending_time,
333 src_hash_duration,
334 dst_hash_duration,
335 );
336
337 tracing::debug!(
338 num_matches = entries.len(),
339 "finished sliding window analysis"
340 );
341
342 let (mut src_valid_openings, mut src_valid_endings) = (Vec::new(), Vec::new());
343 let (mut dst_valid_openings, mut dst_valid_endings) = (Vec::new(), Vec::new());
344
345 for entry in entries {
347 let (is_src_opening, is_src_ending) = (entry.is_src_opening, entry.is_src_ending);
348 let (is_dst_opening, is_dst_ending) = (entry.is_dst_opening, entry.is_dst_ending);
349 if is_src_opening {
350 src_valid_openings.push(entry.clone());
351 } else if is_src_ending {
352 src_valid_endings.push(entry.clone());
353 }
354 if is_dst_opening {
355 dst_valid_openings.push(entry.clone());
356 } else if is_dst_ending {
357 dst_valid_endings.push(entry.clone());
358 }
359 }
360
361 OpeningAndEndingInfo {
362 src_openings: src_valid_openings,
363 dst_openings: dst_valid_openings,
364 src_endings: src_valid_endings,
365 dst_endings: dst_valid_endings,
366 }
367 }
368
369 fn check_skip_file(video: impl AsRef<Path>) -> Result<bool> {
370 let skip_file = video
371 .as_ref()
372 .to_owned()
373 .with_extension(super::SKIP_FILE_EXT);
374 if !skip_file.exists() {
375 return Ok(false);
376 }
377
378 let md5 = crate::util::compute_header_md5sum(video)?;
380
381 let f = std::fs::File::open(&skip_file)?;
383 let skip_file: SkipFile = serde_json::from_reader(&f).unwrap();
384
385 Ok(skip_file.md5 == md5)
386 }
387
388 fn create_skip_file(&self, video: impl AsRef<Path>, result: SearchResult) -> Result<()> {
389 let opening = result
390 .opening
391 .map(|(start, end)| (start.as_secs_f32(), end.as_secs_f32()));
392 let ending = result
393 .ending
394 .map(|(start, end)| (start.as_secs_f32(), end.as_secs_f32()));
395 if opening.is_none() && ending.is_none() {
396 return Ok(());
397 }
398
399 let md5 = crate::util::compute_header_md5sum(&video)?;
400 let skip_file = video
401 .as_ref()
402 .to_owned()
403 .with_extension(super::SKIP_FILE_EXT);
404 let mut skip_file = std::fs::File::create(skip_file)?;
405 let data = SkipFile {
406 opening,
407 ending,
408 md5,
409 };
410 serde_json::to_writer(&mut skip_file, &data)?;
411
412 Ok(())
413 }
414
415 fn display_opening_ending_info(&self, result: SearchResult) {
416 if let Some(opening) = result.opening {
417 let (start, end) = opening;
418 println!(
419 "* Opening - {:?}-{:?}",
420 util::format_time(start),
421 util::format_time(end)
422 );
423 } else {
424 println!("* Opening - N/A");
425 }
426
427 if !self.openings_only {
429 if let Some(ending) = result.ending {
430 let (start, end) = ending;
431 println!(
432 "* Ending - {:?}-{:?}",
433 util::format_time(start),
434 util::format_time(end)
435 );
436 } else {
437 println!("* Ending - N/A");
438 }
439 }
440 }
441
442 fn search(
443 &self,
444 src_idx: usize,
445 dst_idx: usize,
446 frame_hash_map: &[FrameHashes],
447 ) -> Result<OpeningAndEndingInfo> {
448 tracing::debug!("started audio comparator");
449
450 let (src_frame_hashes, dst_frame_hashes) =
451 (&frame_hash_map[src_idx], &frame_hash_map[dst_idx]);
452
453 tracing::debug!("starting search for opening and ending");
454 let info = self.find_opening_and_ending(src_frame_hashes, dst_frame_hashes);
455 tracing::debug!("finished search for opening and ending");
456
457 Ok(info)
458 }
459
460 fn find_best_match(&self, matches: &[(&OpeningAndEndingInfo, bool)]) -> Option<SearchResult> {
465 if matches.len() == 0 {
466 return None;
467 }
468
469 let mut candidates = Vec::new();
470
471 for (m, is_source) in matches {
472 if *is_source {
473 for e in &m.src_openings {
474 let o = (e.src_longest_run, e.src_hash_duration, e.src_match_hash);
475 candidates.push((o, true));
476 }
477 for e in &m.src_endings {
478 let o = (e.src_longest_run, e.src_hash_duration, e.src_match_hash);
479 candidates.push((o, false));
480 }
481 } else {
482 for e in &m.dst_openings {
483 let o = (e.dst_longest_run, e.dst_hash_duration, e.dst_match_hash);
484 candidates.push((o, true));
485 }
486 for e in &m.dst_endings {
487 let o = (e.dst_longest_run, e.dst_hash_duration, e.dst_match_hash);
488 candidates.push((o, false));
489 }
490 }
491 }
492
493 let mut distinct_matches: HashMap<usize, HashSet<usize>> = HashMap::new();
494
495 for (i, (c, _)) in candidates.iter().enumerate() {
496 for (j, (other, _)) in candidates.iter().enumerate() {
497 let dist = u32::count_ones(c.2 ^ other.2);
498
499 if dist >= self.hash_match_threshold + (self.hash_match_threshold / 2) {
501 continue;
502 }
503
504 distinct_matches
505 .entry(i)
506 .or_insert(HashSet::new())
507 .insert(j);
508 distinct_matches
509 .entry(j)
510 .or_insert(HashSet::new())
511 .insert(i);
512 }
513 }
514
515 let mut best: SearchResult = Default::default();
516
517 let mut best_openings = distinct_matches
518 .iter()
519 .filter(|(k, _)| {
520 let (_, is_opening) = candidates[**k];
521 is_opening
522 })
523 .map(|(k, v)| {
524 let (((start, end), _, _), _) = candidates[*k];
525 let count = v.len() as i64;
526 let duration_secs = (end - start).as_secs_f32();
527 (-(count as f32 * 0.3 + duration_secs * 0.7), *k)
529 })
530 .collect::<Vec<_>>();
531 best_openings.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
533
534 if let Some((_, idx)) = best_openings.first() {
535 let (((start, end), hash_duration, _), _) = candidates[*idx];
536 best.opening = Some((
537 start + self.time_padding,
539 end - self.time_padding - hash_duration,
541 ));
542 }
543
544 if !self.openings_only {
546 let mut best_endings = distinct_matches
547 .iter()
548 .filter(|(k, _)| {
549 let (_, is_opening) = candidates[**k];
550 !is_opening
551 })
552 .map(|(k, v)| {
553 let (((start, end), _, _), _) = candidates[*k];
554 let count = v.len() as i64;
555 let duration_secs = (end - start).as_secs_f32();
556 (-(count as f32 * 0.3 + duration_secs * 0.7), *k)
558 })
559 .collect::<Vec<_>>();
560 best_endings.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
561
562 if let Some((_, idx)) = best_endings.first() {
563 let (((start, end), hash_duration, _), _) = candidates[*idx];
564 best.ending = Some((
565 start + self.time_padding,
567 end - self.time_padding - hash_duration,
569 ));
570 }
571 }
572
573 Some(best)
574 }
575}
576
577impl<P: AsRef<Path> + Sync> Comparator<P> {
578 pub fn run_with_frame_hashes(
584 &self,
585 frame_hashes: Vec<FrameHashes>,
586 display: bool,
587 use_skip_files: bool,
588 write_skip_files: bool,
589 threading: bool,
590 ) -> Result<BTreeMap<PathBuf, SearchResult>> {
591 let mut pairs = Vec::new();
594 let mut processed_videos = vec![false; self.videos.len()];
595
596 for i in 0..self.videos.len() {
597 for j in 0..self.videos.len() {
598 if i == j || processed_videos[j] {
599 continue;
600 }
601 pairs.push((i, j));
602 }
603 processed_videos[i] = true;
604 }
605
606 let mut data = Vec::new();
607
608 if cfg!(feature = "rayon") && threading {
609 #[cfg(feature = "rayon")]
611 {
612 data = pairs
613 .par_iter()
614 .map(|(src_idx, dst_idx)| {
615 (
616 *src_idx,
617 *dst_idx,
618 self.search(*src_idx, *dst_idx, &frame_hashes).unwrap(),
619 )
620 })
621 .filter(|(_, _, info)| !info.is_empty())
622 .collect::<Vec<_>>();
623 }
624 } else {
625 data.extend(
626 pairs
627 .iter()
628 .map(|(src_idx, dst_idx)| {
629 (
630 *src_idx,
631 *dst_idx,
632 self.search(*src_idx, *dst_idx, &frame_hashes).unwrap(),
633 )
634 })
635 .filter(|(_, _, info)| !info.is_empty()),
636 );
637 }
638
639 let mut info_map: Vec<Vec<(&OpeningAndEndingInfo, bool)>> =
643 vec![Vec::new(); self.videos.len()];
644 for (src_idx, dst_idx, info) in &data {
645 info_map[*src_idx].push((info, true));
646 info_map[*dst_idx].push((info, false));
647 }
648
649 let mut match_map = BTreeMap::new();
650
651 for (idx, matches) in info_map.into_iter().enumerate() {
654 let path = self.videos[idx].as_ref().to_owned();
655 if display {
656 println!("\n{}\n", path.display());
657 }
658
659 if use_skip_files && Self::check_skip_file(&path)? {
661 if display {
662 println!("Skipping due to existing skip file...");
663 }
664 continue;
665 }
666
667 let result = self.find_best_match(&matches);
668 if result.is_none() {
669 if display {
670 if self.openings_only {
671 println!("No opening found.");
672 } else {
673 println!("No opening or ending found.");
674 }
675 }
676 continue;
677 }
678 let result = result.unwrap();
679 if display {
680 self.display_opening_ending_info(result);
681 }
682 if write_skip_files {
683 self.create_skip_file(&path, result)?;
684 }
685 match_map.insert(path, result.clone());
686 }
687
688 Ok(match_map)
689 }
690
691 pub fn run(
698 &self,
699 analyze: bool,
700 display: bool,
701 use_skip_files: bool,
702 write_skip_files: bool,
703 threading: bool,
704 ) -> Result<BTreeMap<PathBuf, SearchResult>> {
705 let mut frame_hashes = Vec::with_capacity(self.videos.len());
709
710 for video in &self.videos {
711 let video = video.as_ref();
712 let f = FrameHashes::from_video(video, analyze)?;
713 frame_hashes.push(f);
714 }
715
716 self.run_with_frame_hashes(
717 frame_hashes,
718 display,
719 use_skip_files,
720 write_skip_files,
721 threading,
722 )
723 }
724}
725
726#[cfg(test)]
727mod test {
728 use super::*;
729
730 fn get_sample_paths() -> Vec<PathBuf> {
731 let resources = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("resources");
732 vec![
733 resources.join("sample-5s.mp4"),
734 resources.join("sample-shifted-4s.mp4"),
735 ]
736 }
737
738 #[test]
739 fn test_comparator() {
740 let paths = get_sample_paths();
743 let comparator = Comparator::from_files(paths)
744 .with_min_opening_duration(Duration::from_millis(300))
745 .with_min_ending_duration(Duration::from_millis(300));
746 let data = comparator.run(true, true, false, false, false).unwrap();
747 assert_eq!(data.len(), 2);
748 }
749}