1#![allow(dead_code)]
8#![allow(clippy::cast_precision_loss)]
9
10use std::collections::HashMap;
11use std::path::PathBuf;
12
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
19pub struct HashEntry {
20 pub digest: String,
22 pub file_path: PathBuf,
24 pub file_size: u64,
26 pub added_at: u64,
28}
29
30impl HashEntry {
31 pub fn new(digest: &str, file_path: PathBuf, file_size: u64, added_at: u64) -> Self {
33 Self {
34 digest: digest.to_string(),
35 file_path,
36 file_size,
37 added_at,
38 }
39 }
40}
41
42pub struct HashStore {
50 entries: HashMap<String, Vec<HashEntry>>,
52 count: usize,
54}
55
56impl HashStore {
57 #[must_use]
59 pub fn new() -> Self {
60 Self {
61 entries: HashMap::new(),
62 count: 0,
63 }
64 }
65
66 #[must_use]
68 pub fn with_capacity(cap: usize) -> Self {
69 Self {
70 entries: HashMap::with_capacity(cap),
71 count: 0,
72 }
73 }
74
75 pub fn insert(&mut self, entry: HashEntry) {
77 self.entries
78 .entry(entry.digest.clone())
79 .or_default()
80 .push(entry);
81 self.count += 1;
82 }
83
84 #[must_use]
86 pub fn contains(&self, digest: &str) -> bool {
87 self.entries.contains_key(digest)
88 }
89
90 #[must_use]
92 pub fn get(&self, digest: &str) -> Option<&[HashEntry]> {
93 self.entries.get(digest).map(Vec::as_slice)
94 }
95
96 #[must_use]
98 pub fn len(&self) -> usize {
99 self.count
100 }
101
102 #[must_use]
104 pub fn is_empty(&self) -> bool {
105 self.count == 0
106 }
107
108 #[must_use]
110 pub fn unique_count(&self) -> usize {
111 self.entries.len()
112 }
113
114 #[must_use]
116 pub fn duplicate_groups(&self) -> Vec<(&str, &[HashEntry])> {
117 self.entries
118 .iter()
119 .filter(|(_, v)| v.len() > 1)
120 .map(|(k, v)| (k.as_str(), v.as_slice()))
121 .collect()
122 }
123
124 #[must_use]
126 pub fn duplicate_count(&self) -> usize {
127 self.entries
128 .values()
129 .filter(|v| v.len() > 1)
130 .map(|v| v.len() - 1)
131 .sum()
132 }
133
134 #[must_use]
136 pub fn duplicate_bytes(&self) -> u64 {
137 self.entries
138 .values()
139 .filter(|v| v.len() > 1)
140 .flat_map(|v| v.iter().skip(1))
141 .map(|e| e.file_size)
142 .sum()
143 }
144
145 pub fn remove(&mut self, digest: &str) -> Vec<HashEntry> {
147 if let Some(removed) = self.entries.remove(digest) {
148 self.count -= removed.len();
149 removed
150 } else {
151 Vec::new()
152 }
153 }
154
155 pub fn clear(&mut self) {
157 self.entries.clear();
158 self.count = 0;
159 }
160
161 pub fn digests(&self) -> impl Iterator<Item = &str> {
163 self.entries.keys().map(String::as_str)
164 }
165
166 #[must_use]
170 pub fn dedup_ratio(&self) -> f64 {
171 if self.count == 0 {
172 return 1.0;
173 }
174 self.unique_count() as f64 / self.count as f64
175 }
176}
177
178impl Default for HashStore {
179 fn default() -> Self {
180 Self::new()
181 }
182}
183
184#[cfg(test)]
189mod tests {
190 use super::*;
191
192 fn make_entry(digest: &str, path: &str, size: u64) -> HashEntry {
193 HashEntry::new(digest, PathBuf::from(path), size, 1000)
194 }
195
196 #[test]
197 fn test_new_store_empty() {
198 let store = HashStore::new();
199 assert!(store.is_empty());
200 assert_eq!(store.len(), 0);
201 assert_eq!(store.unique_count(), 0);
202 }
203
204 #[test]
205 fn test_insert_and_lookup() {
206 let mut store = HashStore::new();
207 store.insert(make_entry("aaa", "/a.mp4", 100));
208 assert!(store.contains("aaa"));
209 assert!(!store.contains("bbb"));
210 assert_eq!(store.len(), 1);
211 }
212
213 #[test]
214 fn test_get_entries() {
215 let mut store = HashStore::new();
216 store.insert(make_entry("hash1", "/file1.mp4", 200));
217 let entries = store.get("hash1").expect("operation should succeed");
218 assert_eq!(entries.len(), 1);
219 assert_eq!(entries[0].file_size, 200);
220 }
221
222 #[test]
223 fn test_duplicate_groups() {
224 let mut store = HashStore::new();
225 store.insert(make_entry("dup", "/a.mp4", 100));
226 store.insert(make_entry("dup", "/b.mp4", 100));
227 store.insert(make_entry("unique", "/c.mp4", 50));
228
229 let groups = store.duplicate_groups();
230 assert_eq!(groups.len(), 1);
231 assert_eq!(groups[0].0, "dup");
232 assert_eq!(groups[0].1.len(), 2);
233 }
234
235 #[test]
236 fn test_duplicate_count() {
237 let mut store = HashStore::new();
238 store.insert(make_entry("d", "/a.mp4", 10));
239 store.insert(make_entry("d", "/b.mp4", 10));
240 store.insert(make_entry("d", "/c.mp4", 10));
241 store.insert(make_entry("u", "/d.mp4", 10));
242
243 assert_eq!(store.duplicate_count(), 2); }
245
246 #[test]
247 fn test_duplicate_bytes() {
248 let mut store = HashStore::new();
249 store.insert(make_entry("h", "/a.mp4", 1000));
250 store.insert(make_entry("h", "/b.mp4", 1000));
251 store.insert(make_entry("h", "/c.mp4", 1500));
252
253 assert_eq!(store.duplicate_bytes(), 2500);
255 }
256
257 #[test]
258 fn test_remove() {
259 let mut store = HashStore::new();
260 store.insert(make_entry("rm", "/a.mp4", 10));
261 store.insert(make_entry("rm", "/b.mp4", 20));
262 store.insert(make_entry("keep", "/c.mp4", 30));
263
264 let removed = store.remove("rm");
265 assert_eq!(removed.len(), 2);
266 assert!(!store.contains("rm"));
267 assert_eq!(store.len(), 1);
268 }
269
270 #[test]
271 fn test_remove_nonexistent() {
272 let mut store = HashStore::new();
273 let removed = store.remove("nope");
274 assert!(removed.is_empty());
275 }
276
277 #[test]
278 fn test_clear() {
279 let mut store = HashStore::new();
280 store.insert(make_entry("a", "/x.mp4", 1));
281 store.insert(make_entry("b", "/y.mp4", 2));
282 store.clear();
283 assert!(store.is_empty());
284 assert_eq!(store.unique_count(), 0);
285 }
286
287 #[test]
288 fn test_dedup_ratio_no_dupes() {
289 let mut store = HashStore::new();
290 store.insert(make_entry("a", "/1.mp4", 10));
291 store.insert(make_entry("b", "/2.mp4", 20));
292 assert!((store.dedup_ratio() - 1.0).abs() < 1e-9);
293 }
294
295 #[test]
296 fn test_dedup_ratio_all_dupes() {
297 let mut store = HashStore::new();
298 store.insert(make_entry("same", "/1.mp4", 10));
299 store.insert(make_entry("same", "/2.mp4", 10));
300 store.insert(make_entry("same", "/3.mp4", 10));
301 assert!((store.dedup_ratio() - 1.0 / 3.0).abs() < 1e-9);
302 }
303
304 #[test]
305 fn test_dedup_ratio_empty() {
306 let store = HashStore::new();
307 assert!((store.dedup_ratio() - 1.0).abs() < 1e-9);
308 }
309
310 #[test]
311 fn test_with_capacity() {
312 let store = HashStore::with_capacity(1000);
313 assert!(store.is_empty());
314 }
315}