needle/audio/
comparator.rs

1extern crate chromaprint_rust;
2#[cfg(feature = "rayon")]
3extern crate rayon;
4
5use std::collections::{BTreeMap, BinaryHeap, HashMap, HashSet};
6use std::fmt::Display;
7use std::path::{Path, PathBuf};
8use std::time::Duration;
9
10use chromaprint_rust as chromaprint;
11#[cfg(feature = "rayon")]
12use rayon::prelude::*;
13
14use crate::util;
15use crate::Result;
16
17use super::{Analyzer, FrameHashes};
18
19#[derive(serde::Deserialize, serde::Serialize)]
20struct SkipFile {
21    pub opening: Option<(f32, f32)>,
22    pub ending: Option<(f32, f32)>,
23    pub md5: String,
24}
25
26type ComparatorHeap = BinaryHeap<ComparatorHeapEntry>;
27
28#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
29struct ComparatorHeapEntry {
30    score: usize,
31    src_longest_run: (Duration, Duration),
32    dst_longest_run: (Duration, Duration),
33    src_match_hash: u32,
34    dst_match_hash: u32,
35    is_src_opening: bool,
36    is_src_ending: bool,
37    is_dst_opening: bool,
38    is_dst_ending: bool,
39    src_hash_duration: Duration,
40    dst_hash_duration: Duration,
41}
42
43impl Display for ComparatorHeapEntry {
44    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45        write!(
46            f,
47            "score: {}, src_longest_run: {:?}, dst_longest_run: {:?}, src_match_hash: {}, dst_match_hash: {}",
48            self.score, self.src_longest_run, self.dst_longest_run, self.src_match_hash, self.dst_match_hash,
49        )
50    }
51}
52
53#[derive(Debug)]
54struct OpeningAndEndingInfo {
55    src_openings: Vec<ComparatorHeapEntry>,
56    dst_openings: Vec<ComparatorHeapEntry>,
57    src_endings: Vec<ComparatorHeapEntry>,
58    dst_endings: Vec<ComparatorHeapEntry>,
59}
60
61impl OpeningAndEndingInfo {
62    pub fn is_empty(&self) -> bool {
63        self.src_openings.is_empty()
64            && self.dst_openings.is_empty()
65            && self.src_endings.is_empty()
66            && self.dst_endings.is_empty()
67    }
68}
69
70/// Represents a single result for a video file. This is output by [Comparator::run].
71#[derive(Copy, Clone, Debug, Default)]
72pub struct SearchResult {
73    opening: Option<(Duration, Duration)>,
74    ending: Option<(Duration, Duration)>,
75}
76
77/// Compares two or more video files using either existing [FrameHashes](super::FrameHashes) or by running an
78/// [Analyzer](super::Analyzer) in-place.
79#[derive(Debug)]
80pub struct Comparator<P: AsRef<Path>> {
81    videos: Vec<P>,
82    openings_only: bool,
83    hash_match_threshold: u32,
84    opening_search_percentage: f32,
85    ending_search_percentage: f32,
86    min_opening_duration: Duration,
87    min_ending_duration: Duration,
88    time_padding: Duration,
89}
90
91impl<P: AsRef<Path>> Default for Comparator<P> {
92    fn default() -> Self {
93        Self {
94            videos: Vec::new(),
95            openings_only: false,
96            hash_match_threshold: super::DEFAULT_HASH_MATCH_THRESHOLD as u32,
97            opening_search_percentage: super::DEFAULT_OPENING_SEARCH_PERCENTAGE,
98            ending_search_percentage: super::DEFAULT_ENDING_SEARCH_PERCENTAGE,
99            min_opening_duration: Duration::from_secs(super::DEFAULT_MIN_OPENING_DURATION as u64),
100            min_ending_duration: Duration::from_secs(super::DEFAULT_MIN_ENDING_DURATION as u64),
101            time_padding: Duration::ZERO,
102        }
103    }
104}
105
106impl<P: AsRef<Path>> From<Analyzer<P>> for Comparator<P> {
107    /// Constructs a [Comparator] from an [Analyzer]. This allows you to re-use the paths
108    /// and destroy the [Analyzer] when you're done with it.
109    fn from(analyzer: Analyzer<P>) -> Self {
110        let mut comparator = Self::default();
111        // Analyzer would already have sorted the videos.
112        comparator.videos = analyzer.videos;
113        comparator
114    }
115}
116
117impl<P: AsRef<Path> + Ord> Comparator<P> {
118    /// Constructs a [Comparator] from a list of video paths.
119    pub fn from_files(videos: impl Into<Vec<P>>) -> Self {
120        let mut comparator = Self::default();
121        comparator.videos = videos.into();
122        comparator.videos.sort();
123        comparator
124    }
125}
126
127impl<P: AsRef<Path>> Comparator<P> {
128    /// Returns the video paths used by this comparator.
129    pub fn videos(&self) -> &[P] {
130        &self.videos
131    }
132
133    /// Returns a new [Comparator] with the provided `openings_only`.
134    pub fn with_openings_only(mut self, openings_only: bool) -> Self {
135        self.openings_only = openings_only;
136        self
137    }
138
139    /// Returns a new [Comparator] with the provided `hash_match_threshold`.
140    pub fn with_hash_match_threshold(mut self, hash_match_threshold: u32) -> Self {
141        self.hash_match_threshold = hash_match_threshold;
142        self
143    }
144
145    /// Returns a new [Comparator] with the provided `opening_search_percentage`.
146    pub fn with_opening_search_percentage(mut self, opening_search_percentage: f32) -> Self {
147        self.opening_search_percentage = opening_search_percentage;
148        self
149    }
150
151    /// Returns a new [Comparator] with the provided `ending_search_percentage`.
152    pub fn with_ending_search_percentage(mut self, ending_search_percentage: f32) -> Self {
153        self.ending_search_percentage = ending_search_percentage;
154        self
155    }
156
157    /// Returns a new [Comparator] with the provided `min_opening_duration`.
158    pub fn with_min_opening_duration(mut self, min_opening_duration: Duration) -> Self {
159        self.min_opening_duration = min_opening_duration;
160        self
161    }
162
163    /// Returns a new [Comparator] with the provided `min_ending_duration`.
164    pub fn with_min_ending_duration(mut self, min_ending_duration: Duration) -> Self {
165        self.min_ending_duration = min_ending_duration;
166        self
167    }
168
169    /// Returns a new [Comparator] with the provided `time_padding`.
170    pub fn with_time_padding(mut self, time_padding: Duration) -> Self {
171        self.time_padding = time_padding;
172        self
173    }
174
175    #[inline]
176    fn compute_hash_for_match(hashes: &[(u32, Duration)], (start, end): (usize, usize)) -> u32 {
177        let hashes: Vec<u32> = hashes.iter().map(|t| t.0).collect();
178        chromaprint::simhash::simhash32(&hashes[start..end + 1])
179    }
180
181    /// Runs a LCS (longest common substring) search between the two sets of hashes. This runs in
182    /// O(n * m) time.
183    fn longest_common_hash_match(
184        &self,
185        src: &[(u32, Duration)],
186        dst: &[(u32, Duration)],
187        src_max_opening_time: Duration,
188        src_min_ending_time: Duration,
189        dst_max_opening_time: Duration,
190        dst_min_ending_time: Duration,
191        src_hash_duration: Duration,
192        dst_hash_duration: Duration,
193    ) -> Vec<ComparatorHeapEntry> {
194        // Heap to keep track of best hash matches in order of length.
195        let mut heap: ComparatorHeap = BinaryHeap::new();
196
197        // Build the DP table of substrings.
198        let mut table: Vec<Vec<usize>> = vec![vec![0; dst.len() + 1]; src.len() + 1];
199        for i in 0..src.len() {
200            for j in 0..dst.len() {
201                let (src_hash, dst_hash) = (src[i].0, dst[j].0);
202                if i == 0 || j == 0 {
203                    table[i][j] = 0;
204                } else if u32::count_ones(src_hash ^ dst_hash) <= self.hash_match_threshold {
205                    table[i][j] = table[i - 1][j - 1] + 1;
206                } else {
207                    table[i][j] = 0;
208                }
209            }
210        }
211
212        // Walk through the table and find all valid substrings and insert them into
213        // the heap.
214        let mut i = src.len() - 1;
215        while i > 0 {
216            let mut j = dst.len() - 1;
217            while j > 0 {
218                // We need to find an entry where the current entry is non-zero
219                // and the next entry is zero. This indicates that we are at the end
220                // of a substring.
221                if table[i][j] == 0
222                    || (i < src.len() - 1 && j < dst.len() - 1 && table[i + 1][j + 1] != 0)
223                {
224                    j -= 1;
225                    continue;
226                }
227
228                // Figure out whether this is an opening or an ending.
229                //
230                // If the sequence _ends_ before the maximum opening time, it is an opening.
231                // If the sequence _starts_ after the maximum ending time, it is an ending.
232                let (src_start_idx, src_end_idx) = (i - table[i][j], i);
233                let (dst_start_idx, dst_end_idx) = (j - table[i][j], j);
234                let (src_start, src_end) = (src[src_start_idx].1, src[src_end_idx].1);
235                let (dst_start, dst_end) = (dst[dst_start_idx].1, dst[dst_end_idx].1);
236                let (is_src_opening, is_src_ending) = (
237                    src_end < src_max_opening_time,
238                    src_start > src_min_ending_time,
239                );
240                let (is_dst_opening, is_dst_ending) = (
241                    dst_end < dst_max_opening_time,
242                    dst_start > dst_min_ending_time,
243                );
244
245                // A LCS result is only valid iff it is a valid opening or ending in the source _and_ the dest.
246                let is_src_valid = (is_src_opening
247                    && (src_end - src_start) >= self.min_opening_duration)
248                    || (is_src_ending && (src_end - src_start) >= self.min_ending_duration);
249                let is_dst_valid = (is_dst_opening
250                    && (dst_end - dst_start) >= self.min_opening_duration)
251                    || (is_dst_ending && (dst_end - dst_start) >= self.min_ending_duration);
252                let is_valid = is_src_valid && is_dst_valid;
253
254                // Skip invalid sequences.
255                if !is_valid {
256                    j -= 1;
257                    continue;
258                }
259
260                // If we do not need endings, skip them now.
261                let is_ending = (is_src_ending
262                    && (src_end - src_start) >= self.min_ending_duration)
263                    || (is_dst_ending && (dst_end - dst_start) >= self.min_ending_duration);
264                if is_ending && self.openings_only {
265                    j -= 1;
266                    continue;
267                }
268
269                // We have a valid entry at this point.
270                let src_match_hash =
271                    Self::compute_hash_for_match(src, (src_start_idx, src_end_idx));
272                let dst_match_hash =
273                    Self::compute_hash_for_match(dst, (dst_start_idx, dst_end_idx));
274
275                let entry = ComparatorHeapEntry {
276                    score: table[i][j],
277                    src_longest_run: (src_start, src_end),
278                    dst_longest_run: (dst_start, dst_end),
279                    src_match_hash,
280                    dst_match_hash,
281                    is_src_opening,
282                    is_src_ending,
283                    is_dst_opening,
284                    is_dst_ending,
285                    src_hash_duration,
286                    dst_hash_duration,
287                };
288
289                heap.push(entry);
290
291                j -= 1;
292            }
293
294            i -= 1;
295        }
296
297        heap.into()
298    }
299
300    fn find_opening_and_ending(
301        &self,
302        src_hashes: &super::analyzer::FrameHashes,
303        dst_hashes: &super::analyzer::FrameHashes,
304    ) -> OpeningAndEndingInfo {
305        let _g = tracing::span!(tracing::Level::TRACE, "find_opening_and_ending");
306
307        let src_hash_data = &src_hashes.data;
308        let dst_hash_data = &dst_hashes.data;
309        let src_hash_duration = Duration::from_secs_f32(src_hashes.hash_duration);
310        let dst_hash_duration = Duration::from_secs_f32(dst_hashes.hash_duration);
311
312        // Figure out the duration limits for opening and endings.
313        let src_opening_search_idx =
314            ((src_hash_data.len() - 1) as f32 * self.opening_search_percentage) as usize;
315        let src_ending_search_idx =
316            ((src_hash_data.len() - 1) as f32 * (1.0 - self.ending_search_percentage)) as usize;
317        let dst_opening_search_idx =
318            ((dst_hash_data.len() - 1) as f32 * self.opening_search_percentage) as usize;
319        let dst_ending_search_idx =
320            ((dst_hash_data.len() - 1) as f32 * (1.0 - self.ending_search_percentage)) as usize;
321        let src_max_opening_time = src_hash_data[src_opening_search_idx].1;
322        let src_min_ending_time = src_hash_data[src_ending_search_idx].1;
323        let dst_max_opening_time = dst_hash_data[dst_opening_search_idx].1;
324        let dst_min_ending_time = dst_hash_data[dst_ending_search_idx].1;
325
326        let entries = self.longest_common_hash_match(
327            src_hash_data,
328            dst_hash_data,
329            src_max_opening_time,
330            src_min_ending_time,
331            dst_max_opening_time,
332            dst_min_ending_time,
333            src_hash_duration,
334            dst_hash_duration,
335        );
336
337        tracing::debug!(
338            num_matches = entries.len(),
339            "finished sliding window analysis"
340        );
341
342        let (mut src_valid_openings, mut src_valid_endings) = (Vec::new(), Vec::new());
343        let (mut dst_valid_openings, mut dst_valid_endings) = (Vec::new(), Vec::new());
344
345        // TODO(aksiksi): Reduce duplication of memory here.
346        for entry in entries {
347            let (is_src_opening, is_src_ending) = (entry.is_src_opening, entry.is_src_ending);
348            let (is_dst_opening, is_dst_ending) = (entry.is_dst_opening, entry.is_dst_ending);
349            if is_src_opening {
350                src_valid_openings.push(entry.clone());
351            } else if is_src_ending {
352                src_valid_endings.push(entry.clone());
353            }
354            if is_dst_opening {
355                dst_valid_openings.push(entry.clone());
356            } else if is_dst_ending {
357                dst_valid_endings.push(entry.clone());
358            }
359        }
360
361        OpeningAndEndingInfo {
362            src_openings: src_valid_openings,
363            dst_openings: dst_valid_openings,
364            src_endings: src_valid_endings,
365            dst_endings: dst_valid_endings,
366        }
367    }
368
369    fn check_skip_file(video: impl AsRef<Path>) -> Result<bool> {
370        let skip_file = video
371            .as_ref()
372            .to_owned()
373            .with_extension(super::SKIP_FILE_EXT);
374        if !skip_file.exists() {
375            return Ok(false);
376        }
377
378        // Compute MD5 hash of the video header.
379        let md5 = crate::util::compute_header_md5sum(video)?;
380
381        // Read existing skip file and compare MD5 hashes.
382        let f = std::fs::File::open(&skip_file)?;
383        let skip_file: SkipFile = serde_json::from_reader(&f).unwrap();
384
385        Ok(skip_file.md5 == md5)
386    }
387
388    fn create_skip_file(&self, video: impl AsRef<Path>, result: SearchResult) -> Result<()> {
389        let opening = result
390            .opening
391            .map(|(start, end)| (start.as_secs_f32(), end.as_secs_f32()));
392        let ending = result
393            .ending
394            .map(|(start, end)| (start.as_secs_f32(), end.as_secs_f32()));
395        if opening.is_none() && ending.is_none() {
396            return Ok(());
397        }
398
399        let md5 = crate::util::compute_header_md5sum(&video)?;
400        let skip_file = video
401            .as_ref()
402            .to_owned()
403            .with_extension(super::SKIP_FILE_EXT);
404        let mut skip_file = std::fs::File::create(skip_file)?;
405        let data = SkipFile {
406            opening,
407            ending,
408            md5,
409        };
410        serde_json::to_writer(&mut skip_file, &data)?;
411
412        Ok(())
413    }
414
415    fn display_opening_ending_info(&self, result: SearchResult) {
416        if let Some(opening) = result.opening {
417            let (start, end) = opening;
418            println!(
419                "* Opening - {:?}-{:?}",
420                util::format_time(start),
421                util::format_time(end)
422            );
423        } else {
424            println!("* Opening - N/A");
425        }
426
427        // Disply ending information only when needed.
428        if !self.openings_only {
429            if let Some(ending) = result.ending {
430                let (start, end) = ending;
431                println!(
432                    "* Ending - {:?}-{:?}",
433                    util::format_time(start),
434                    util::format_time(end)
435                );
436            } else {
437                println!("* Ending - N/A");
438            }
439        }
440    }
441
442    fn search(
443        &self,
444        src_idx: usize,
445        dst_idx: usize,
446        frame_hash_map: &[FrameHashes],
447    ) -> Result<OpeningAndEndingInfo> {
448        tracing::debug!("started audio comparator");
449
450        let (src_frame_hashes, dst_frame_hashes) =
451            (&frame_hash_map[src_idx], &frame_hash_map[dst_idx]);
452
453        tracing::debug!("starting search for opening and ending");
454        let info = self.find_opening_and_ending(src_frame_hashes, dst_frame_hashes);
455        tracing::debug!("finished search for opening and ending");
456
457        Ok(info)
458    }
459
460    /// Find the best opening and ending candidate across all provided matches.
461    ///
462    /// The idea is simple: keep track of the longest opening and ending detected among all of the matches
463    /// and combine them to determine the best overall match.
464    fn find_best_match(&self, matches: &[(&OpeningAndEndingInfo, bool)]) -> Option<SearchResult> {
465        if matches.len() == 0 {
466            return None;
467        }
468
469        let mut candidates = Vec::new();
470
471        for (m, is_source) in matches {
472            if *is_source {
473                for e in &m.src_openings {
474                    let o = (e.src_longest_run, e.src_hash_duration, e.src_match_hash);
475                    candidates.push((o, true));
476                }
477                for e in &m.src_endings {
478                    let o = (e.src_longest_run, e.src_hash_duration, e.src_match_hash);
479                    candidates.push((o, false));
480                }
481            } else {
482                for e in &m.dst_openings {
483                    let o = (e.dst_longest_run, e.dst_hash_duration, e.dst_match_hash);
484                    candidates.push((o, true));
485                }
486                for e in &m.dst_endings {
487                    let o = (e.dst_longest_run, e.dst_hash_duration, e.dst_match_hash);
488                    candidates.push((o, false));
489                }
490            }
491        }
492
493        let mut distinct_matches: HashMap<usize, HashSet<usize>> = HashMap::new();
494
495        for (i, (c, _)) in candidates.iter().enumerate() {
496            for (j, (other, _)) in candidates.iter().enumerate() {
497                let dist = u32::count_ones(c.2 ^ other.2);
498
499                // Add a small bias to the hash match threshold when comparing sequence hashes.
500                if dist >= self.hash_match_threshold + (self.hash_match_threshold / 2) {
501                    continue;
502                }
503
504                distinct_matches
505                    .entry(i)
506                    .or_insert(HashSet::new())
507                    .insert(j);
508                distinct_matches
509                    .entry(j)
510                    .or_insert(HashSet::new())
511                    .insert(i);
512            }
513        }
514
515        let mut best: SearchResult = Default::default();
516
517        let mut best_openings = distinct_matches
518            .iter()
519            .filter(|(k, _)| {
520                let (_, is_opening) = candidates[**k];
521                is_opening
522            })
523            .map(|(k, v)| {
524                let (((start, end), _, _), _) = candidates[*k];
525                let count = v.len() as i64;
526                let duration_secs = (end - start).as_secs_f32();
527                // Weighted sum of count and duration, with more weight given to duration.
528                (-(count as f32 * 0.3 + duration_secs * 0.7), *k)
529            })
530            .collect::<Vec<_>>();
531        // We can't used .sort() because f32 is not `Ord`.
532        best_openings.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
533
534        if let Some((_, idx)) = best_openings.first() {
535            let (((start, end), hash_duration, _), _) = candidates[*idx];
536            best.opening = Some((
537                // Add a buffer between actual detected times and what we return to users.
538                start + self.time_padding,
539                // Adjust ending time using the configured hash duration.
540                end - self.time_padding - hash_duration,
541            ));
542        }
543
544        // Ending search is optional, so we handle it accordingly.
545        if !self.openings_only {
546            let mut best_endings = distinct_matches
547                .iter()
548                .filter(|(k, _)| {
549                    let (_, is_opening) = candidates[**k];
550                    !is_opening
551                })
552                .map(|(k, v)| {
553                    let (((start, end), _, _), _) = candidates[*k];
554                    let count = v.len() as i64;
555                    let duration_secs = (end - start).as_secs_f32();
556                    // Weighted sum of count and duration, with more weight given to duration.
557                    (-(count as f32 * 0.3 + duration_secs * 0.7), *k)
558                })
559                .collect::<Vec<_>>();
560            best_endings.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
561
562            if let Some((_, idx)) = best_endings.first() {
563                let (((start, end), hash_duration, _), _) = candidates[*idx];
564                best.ending = Some((
565                    // Add a buffer between actual detected times and what we return to users.
566                    start + self.time_padding,
567                    // Adjust ending time using the configured hash duration.
568                    end - self.time_padding - hash_duration,
569                ));
570            }
571        }
572
573        Some(best)
574    }
575}
576
577impl<P: AsRef<Path> + Sync> Comparator<P> {
578    /// Exactly the same as [Self::run], but uses the provided [FrameHashes] instead of reading
579    /// from disk or analyzing in-place.
580    ///
581    /// Note that the provided [FrameHashes] **must** be generated from an [Analyzer] using the
582    /// _same_ list of video paths.
583    pub fn run_with_frame_hashes(
584        &self,
585        frame_hashes: Vec<FrameHashes>,
586        display: bool,
587        use_skip_files: bool,
588        write_skip_files: bool,
589        threading: bool,
590    ) -> Result<BTreeMap<PathBuf, SearchResult>> {
591        // Build a list of video pairs for actual search. Pairs should only appear once.
592        // Given N videos, this will result in: (N * (N-1)) / 2 pairs
593        let mut pairs = Vec::new();
594        let mut processed_videos = vec![false; self.videos.len()];
595
596        for i in 0..self.videos.len() {
597            for j in 0..self.videos.len() {
598                if i == j || processed_videos[j] {
599                    continue;
600                }
601                pairs.push((i, j));
602            }
603            processed_videos[i] = true;
604        }
605
606        let mut data = Vec::new();
607
608        if cfg!(feature = "rayon") && threading {
609            // Perform the search in parallel for all pairs.
610            #[cfg(feature = "rayon")]
611            {
612                data = pairs
613                    .par_iter()
614                    .map(|(src_idx, dst_idx)| {
615                        (
616                            *src_idx,
617                            *dst_idx,
618                            self.search(*src_idx, *dst_idx, &frame_hashes).unwrap(),
619                        )
620                    })
621                    .filter(|(_, _, info)| !info.is_empty())
622                    .collect::<Vec<_>>();
623            }
624        } else {
625            data.extend(
626                pairs
627                    .iter()
628                    .map(|(src_idx, dst_idx)| {
629                        (
630                            *src_idx,
631                            *dst_idx,
632                            self.search(*src_idx, *dst_idx, &frame_hashes).unwrap(),
633                        )
634                    })
635                    .filter(|(_, _, info)| !info.is_empty()),
636            );
637        }
638
639        // This map tracks the generated info struct for each video path. A bool is included
640        // to allow determining whether the path is a source (true) or dest (false) in the info
641        // struct.
642        let mut info_map: Vec<Vec<(&OpeningAndEndingInfo, bool)>> =
643            vec![Vec::new(); self.videos.len()];
644        for (src_idx, dst_idx, info) in &data {
645            info_map[*src_idx].push((info, true));
646            info_map[*dst_idx].push((info, false));
647        }
648
649        let mut match_map = BTreeMap::new();
650
651        // For each path, find the best opening and ending candidate among the list
652        // of other videos. If required, display the result and write a skip file to disk.
653        for (idx, matches) in info_map.into_iter().enumerate() {
654            let path = self.videos[idx].as_ref().to_owned();
655            if display {
656                println!("\n{}\n", path.display());
657            }
658
659            // Skip match selection for this video if it already has a skip file on disk.
660            if use_skip_files && Self::check_skip_file(&path)? {
661                if display {
662                    println!("Skipping due to existing skip file...");
663                }
664                continue;
665            }
666
667            let result = self.find_best_match(&matches);
668            if result.is_none() {
669                if display {
670                    if self.openings_only {
671                        println!("No opening found.");
672                    } else {
673                        println!("No opening or ending found.");
674                    }
675                }
676                continue;
677            }
678            let result = result.unwrap();
679            if display {
680                self.display_opening_ending_info(result);
681            }
682            if write_skip_files {
683                self.create_skip_file(&path, result)?;
684            }
685            match_map.insert(path, result.clone());
686        }
687
688        Ok(match_map)
689    }
690
691    /// Runs the comparator.
692    ///
693    /// * If `analyze` is set to true, an `Analyzer` will be built for each video file and run in-place.
694    /// * If `use_skip_files` is set, if a skip file already exists for a video, the video will be skipped during this run. If `write_skip_files`
695    /// is set, a skip file will be written to disk once the comparator is completed.
696    /// * If `display` is set, the final results will be printed to stdout.
697    pub fn run(
698        &self,
699        analyze: bool,
700        display: bool,
701        use_skip_files: bool,
702        write_skip_files: bool,
703        threading: bool,
704    ) -> Result<BTreeMap<PathBuf, SearchResult>> {
705        // Stores frame hash data for each video to be analyzed.
706        // We load them all now to be able to handle in-place analysis when the `analyze`
707        // flag is passed in to this method.
708        let mut frame_hashes = Vec::with_capacity(self.videos.len());
709
710        for video in &self.videos {
711            let video = video.as_ref();
712            let f = FrameHashes::from_video(video, analyze)?;
713            frame_hashes.push(f);
714        }
715
716        self.run_with_frame_hashes(
717            frame_hashes,
718            display,
719            use_skip_files,
720            write_skip_files,
721            threading,
722        )
723    }
724}
725
726#[cfg(test)]
727mod test {
728    use super::*;
729
730    fn get_sample_paths() -> Vec<PathBuf> {
731        let resources = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("resources");
732        vec![
733            resources.join("sample-5s.mp4"),
734            resources.join("sample-shifted-4s.mp4"),
735        ]
736    }
737
738    #[test]
739    fn test_comparator() {
740        // TODO(aksiksi): Make this test actually do something. Right now, it doesn't really detect anything
741        // because the clips are too short.
742        let paths = get_sample_paths();
743        let comparator = Comparator::from_files(paths)
744            .with_min_opening_duration(Duration::from_millis(300))
745            .with_min_ending_duration(Duration::from_millis(300));
746        let data = comparator.run(true, true, false, false, false).unwrap();
747        assert_eq!(data.len(), 2);
748    }
749}