1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
use {
anyhow::Result,
crossbeam::channel,
crate::{
dup::Dup,
ext,
hash::FileHash,
},
fnv::FnvHashMap,
rayon::{
prelude::ParallelIterator,
iter::ParallelBridge,
},
std::{
fs,
path::{Path, PathBuf},
},
};
#[derive(Default)]
pub struct DupMap {
pub dups: FnvHashMap<FileHash, Dup>,
pub seen: usize,
}
impl DupMap {
pub fn add_file(&mut self, path: &Path) -> Result<()> {
let hash = FileHash::new(path)?;
let e = self.dups.entry(hash).or_default();
e.paths.push(path.to_path_buf());
self.seen += 1;
Ok(())
}
pub fn compile(&mut self) {
self.dups.retain(|_, d| d.paths.len()>1);
}
pub fn len(&self) -> usize {
self.dups.len()
}
pub fn build(root: PathBuf) -> Result<Self> {
let (s_matching_files, r_matching_files) = channel::unbounded();
let (s_hashed_files, r_hashed_files) = channel::unbounded::<(PathBuf, FileHash)>();
let file_generator = std::thread::spawn(move||{
let mut dirs = Vec::new();
dirs.push(root);
while let Some(dir) = dirs.pop() {
if let Ok(entries) = fs::read_dir(&dir) {
for e in entries.flatten() {
if let Ok(md) = e.metadata() {
let path = e.path();
let name = match path.file_name().and_then(|s| s.to_str()) {
Some(s) => s,
None => { continue; },
};
if md.is_dir() {
if name == "dev" {
continue;
}
dirs.push(path);
continue;
}
let ext = match path.extension().and_then(|s| s.to_str()) {
Some(s) => s,
None => { continue; },
};
if !ext::is_image(&ext) {
continue;
}
s_matching_files.send(path).unwrap();
}
}
}
}
});
r_matching_files.into_iter().par_bridge()
.for_each_with(s_hashed_files, |s, path| {
if let Ok(hash) = FileHash::new(&path) {
s.send((path, hash)).unwrap();
}
});
let mut dups: FnvHashMap<FileHash, Dup> = FnvHashMap::default();
let mut seen = 0;
r_hashed_files.iter()
.for_each(|(path, hash)| {
let e = dups.entry(hash).or_default();
e.paths.push(path.to_path_buf());
seen += 1;
});
file_generator.join().unwrap();
dups.retain(|_, d| d.paths.len()>1);
Ok(Self{
dups,
seen,
})
}
}