1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
//! Functions for detecting duplicates in a set of paths.
use ;
/// Returns a list of all sets of duplicate files in a set of paths.
///
/// Each entry in the output list will be a list of files from the input paths
/// which have the same contents. The order of the output is nondeterministic.
///
/// Runs in O(NF) time and O(nF) memory, where N is the total number of files,
/// n is the number of unique files, and F is the average file size.
///
/// ## Note
/// If you are loading more than a few files, prefer to use
/// [`get_duplicates_hashed`], since it uses much less memory. This is
/// intented to be used as a subroutine when the expected number of unique
/// files is small.
///
/// ## Example
/// ```no_run
/// // assume that a.txt and e.txt have the same contents, and b.txt, c.txt and d.txt have the same contents
/// let paths = vec![
/// Path::from("files/a.txt"),
/// Path::from("files/b.txt"),
/// Path::from("files/more_files/c.txt"),
/// Path::from("files/more_files/d.txt"),
/// Path::from("files/more_files/even_more_files/e.txt"),
/// Path::from("files/more_files/even_more_files/f.txt")
/// ];
/// let duplicates = get_duplicates(&paths);
/// let expected = vec![
/// vec!["files/a.txt", "files/more_files/even_more_files/e.txt"],
/// vec!["files/b.txt", "files/more_files/c.txt", "files/more_files.d.txt"]
/// ];
/// assert!(duplicates == expected);
/// ```
/// Returns a list of all sets of duplicate files in a set of paths in a memory efficient manner.
///
/// Each entry in the output list will be a list of files from the input paths
/// which have the same contents. The order of the output is nondeterministic.
///
/// Runs in O(NF) time and O(n) memory in expectation, where N is the total
/// number of files, n is the number of unique files, and F is the average
/// file size.
///
/// ## Example
/// ```no_run
/// // assume that a.txt and e.txt have the same contents, and b.txt, c.txt and d.txt have the same contents
/// let paths = vec![
/// Path::from("files/a.txt"),
/// Path::from("files/b.txt"),
/// Path::from("files/more_files/c.txt"),
/// Path::from("files/more_files/d.txt"),
/// Path::from("files/more_files/even_more_files/e.txt"),
/// Path::from("files/more_files/even_more_files/f.txt")
/// ];
/// let duplicates = get_duplicates(&paths);
/// let expected = vec![
/// vec!["files/a.txt", "files/more_files/even_more_files/e.txt"],
/// vec!["files/b.txt", "files/more_files/c.txt", "files/more_files.d.txt"]
/// ];
/// assert!(duplicates == expected);
/// ```