1#![allow(dead_code)]
4#![allow(clippy::cast_precision_loss)]
5#![allow(clippy::too_many_arguments)]
6
7use std::path::PathBuf;
8
9#[derive(Debug, Clone)]
11pub struct GroupStats {
12 pub count: usize,
14 pub total_bytes: u64,
16 pub representative_bytes: u64,
18 pub reclaimable_bytes: u64,
20 pub avg_similarity: f64,
22}
23
24impl GroupStats {
25 #[must_use]
27 pub fn compute(
28 members: &[(PathBuf, u64)],
29 representative: &PathBuf,
30 avg_similarity: f64,
31 ) -> Self {
32 let total_bytes: u64 = members.iter().map(|(_, s)| *s).sum();
33 let representative_bytes = members
34 .iter()
35 .find(|(p, _)| p == representative)
36 .map(|(_, s)| *s)
37 .unwrap_or(0);
38 let reclaimable_bytes = total_bytes.saturating_sub(representative_bytes);
39 Self {
40 count: members.len(),
41 total_bytes,
42 representative_bytes,
43 reclaimable_bytes,
44 avg_similarity,
45 }
46 }
47}
48
49#[derive(Debug, Clone, Default)]
51pub struct SpaceSavingsReport {
52 pub total_files: usize,
54 pub total_bytes: u64,
56 pub duplicate_groups: usize,
58 pub duplicate_files: usize,
60 pub reclaimable_bytes: u64,
62 pub savings_percent: f64,
64 pub group_stats: Vec<GroupStats>,
66}
67
68impl SpaceSavingsReport {
69 #[must_use]
71 pub fn new() -> Self {
72 Self::default()
73 }
74
75 pub fn add_group(&mut self, stats: GroupStats) {
77 self.duplicate_groups += 1;
78 self.duplicate_files += stats.count.saturating_sub(1);
79 self.reclaimable_bytes += stats.reclaimable_bytes;
80 self.group_stats.push(stats);
81 }
82
83 pub fn finalise(&mut self, total_files: usize, total_bytes: u64) {
85 self.total_files = total_files;
86 self.total_bytes = total_bytes;
87 self.savings_percent = if total_bytes > 0 {
88 100.0 * self.reclaimable_bytes as f64 / total_bytes as f64
89 } else {
90 0.0
91 };
92 }
93
94 #[must_use]
96 pub fn summary(&self) -> String {
97 format!(
98 "{} duplicate groups | {} redundant files | {} MB reclaimable ({:.1}%)",
99 self.duplicate_groups,
100 self.duplicate_files,
101 self.reclaimable_bytes / (1024 * 1024),
102 self.savings_percent,
103 )
104 }
105}
106
107#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
109pub enum ActionSeverity {
110 Info,
112 Suggestion,
114 Warning,
116 Critical,
118}
119
120#[derive(Debug, Clone)]
122pub struct ActionRecommendation {
123 pub severity: ActionSeverity,
125 pub message: String,
127 pub files_to_remove: Vec<PathBuf>,
129 pub keep: Option<PathBuf>,
131}
132
133impl ActionRecommendation {
134 #[must_use]
136 pub fn from_group(
137 representative: Option<PathBuf>,
138 members: &[PathBuf],
139 reclaimable_bytes: u64,
140 ) -> Self {
141 let severity = if reclaimable_bytes > 1_000_000_000 {
142 ActionSeverity::Critical
143 } else if reclaimable_bytes > 100_000_000 {
144 ActionSeverity::Warning
145 } else if reclaimable_bytes > 0 {
146 ActionSeverity::Suggestion
147 } else {
148 ActionSeverity::Info
149 };
150
151 let files_to_remove: Vec<PathBuf> = members
152 .iter()
153 .filter(|p| Some(*p) != representative.as_ref())
154 .cloned()
155 .collect();
156
157 let message = format!(
158 "Remove {} duplicate(s) to reclaim {} MB",
159 files_to_remove.len(),
160 reclaimable_bytes / (1024 * 1024),
161 );
162
163 Self {
164 severity,
165 message,
166 files_to_remove,
167 keep: representative,
168 }
169 }
170}
171
172#[must_use]
174pub fn generate_recommendations(
175 report: &SpaceSavingsReport,
176 clusters: &[ClusterInfo],
177) -> Vec<ActionRecommendation> {
178 clusters
179 .iter()
180 .zip(report.group_stats.iter())
181 .map(|(cluster, stats)| {
182 ActionRecommendation::from_group(
183 cluster.representative.clone(),
184 &cluster.members,
185 stats.reclaimable_bytes,
186 )
187 })
188 .collect()
189}
190
191#[derive(Debug, Clone)]
193pub struct ClusterInfo {
194 pub members: Vec<PathBuf>,
196 pub representative: Option<PathBuf>,
198}
199
200impl ClusterInfo {
201 #[must_use]
203 pub fn new(members: Vec<PathBuf>, representative: Option<PathBuf>) -> Self {
204 Self {
205 members,
206 representative,
207 }
208 }
209}
210
211#[derive(Debug, Clone)]
213pub struct SimilarityHistogram {
214 pub buckets: Vec<u64>,
216 pub num_buckets: usize,
218}
219
220impl SimilarityHistogram {
221 #[must_use]
223 pub fn new(num_buckets: usize) -> Self {
224 Self {
225 buckets: vec![0; num_buckets],
226 num_buckets,
227 }
228 }
229
230 pub fn record(&mut self, score: f64) {
232 let score = score.clamp(0.0, 1.0);
233 let idx = ((score * self.num_buckets as f64) as usize).min(self.num_buckets - 1);
234 self.buckets[idx] += 1;
235 }
236
237 #[must_use]
239 pub fn total(&self) -> u64 {
240 self.buckets.iter().sum()
241 }
242
243 #[must_use]
245 pub fn mode_bucket(&self) -> usize {
246 self.buckets
247 .iter()
248 .enumerate()
249 .max_by_key(|(_, &v)| v)
250 .map(|(i, _)| i)
251 .unwrap_or(0)
252 }
253}
254
255#[cfg(test)]
256mod tests {
257 use super::*;
258
259 fn pb(s: &str) -> PathBuf {
260 PathBuf::from(s)
261 }
262
263 #[test]
264 fn test_group_stats_compute() {
265 let members = vec![(pb("a.mp4"), 1000u64), (pb("b.mp4"), 2000u64)];
266 let rep = pb("a.mp4");
267 let stats = GroupStats::compute(&members, &rep, 0.95);
268 assert_eq!(stats.count, 2);
269 assert_eq!(stats.total_bytes, 3000);
270 assert_eq!(stats.representative_bytes, 1000);
271 assert_eq!(stats.reclaimable_bytes, 2000);
272 assert!((stats.avg_similarity - 0.95).abs() < 1e-9);
273 }
274
275 #[test]
276 fn test_group_stats_missing_representative() {
277 let members = vec![(pb("a.mp4"), 500u64)];
278 let rep = pb("missing.mp4");
279 let stats = GroupStats::compute(&members, &rep, 0.0);
280 assert_eq!(stats.representative_bytes, 0);
281 assert_eq!(stats.reclaimable_bytes, 500);
282 }
283
284 #[test]
285 fn test_space_savings_report_add_and_finalise() {
286 let mut report = SpaceSavingsReport::new();
287 let stats = GroupStats {
288 count: 3,
289 total_bytes: 9_000_000,
290 representative_bytes: 3_000_000,
291 reclaimable_bytes: 6_000_000,
292 avg_similarity: 0.98,
293 };
294 report.add_group(stats);
295 report.finalise(10, 20_000_000);
296 assert_eq!(report.duplicate_groups, 1);
297 assert_eq!(report.duplicate_files, 2);
298 assert_eq!(report.reclaimable_bytes, 6_000_000);
299 assert!((report.savings_percent - 30.0).abs() < 1e-6);
300 }
301
302 #[test]
303 fn test_space_savings_report_zero_total() {
304 let mut report = SpaceSavingsReport::new();
305 report.finalise(0, 0);
306 assert_eq!(report.savings_percent, 0.0);
307 }
308
309 #[test]
310 fn test_space_savings_report_summary() {
311 let mut report = SpaceSavingsReport::new();
312 let stats = GroupStats {
313 count: 2,
314 total_bytes: 2_097_152,
315 representative_bytes: 1_048_576,
316 reclaimable_bytes: 1_048_576,
317 avg_similarity: 0.9,
318 };
319 report.add_group(stats);
320 report.finalise(5, 10_485_760);
321 let s = report.summary();
322 assert!(s.contains("1 duplicate groups"));
323 assert!(s.contains("1 redundant files"));
324 }
325
326 #[test]
327 fn test_action_severity_ordering() {
328 assert!(ActionSeverity::Info < ActionSeverity::Suggestion);
329 assert!(ActionSeverity::Suggestion < ActionSeverity::Warning);
330 assert!(ActionSeverity::Warning < ActionSeverity::Critical);
331 }
332
333 #[test]
334 fn test_action_recommendation_critical() {
335 let members = vec![pb("a.mp4"), pb("b.mp4")];
336 let rec = ActionRecommendation::from_group(Some(pb("a.mp4")), &members, 2_000_000_000);
337 assert_eq!(rec.severity, ActionSeverity::Critical);
338 assert_eq!(rec.files_to_remove.len(), 1);
339 assert_eq!(rec.keep, Some(pb("a.mp4")));
340 }
341
342 #[test]
343 fn test_action_recommendation_suggestion() {
344 let members = vec![pb("a.mp4"), pb("b.mp4")];
345 let rec = ActionRecommendation::from_group(Some(pb("a.mp4")), &members, 50_000_000);
346 assert_eq!(rec.severity, ActionSeverity::Suggestion);
347 }
348
349 #[test]
350 fn test_action_recommendation_info() {
351 let members = vec![pb("a.mp4")];
352 let rec = ActionRecommendation::from_group(Some(pb("a.mp4")), &members, 0);
353 assert_eq!(rec.severity, ActionSeverity::Info);
354 assert!(rec.files_to_remove.is_empty());
355 }
356
357 #[test]
358 fn test_generate_recommendations() {
359 let mut report = SpaceSavingsReport::new();
360 report.add_group(GroupStats {
361 count: 2,
362 total_bytes: 200,
363 representative_bytes: 100,
364 reclaimable_bytes: 100,
365 avg_similarity: 0.9,
366 });
367 report.finalise(2, 200);
368 let clusters = vec![ClusterInfo::new(
369 vec![pb("a.mp4"), pb("b.mp4")],
370 Some(pb("a.mp4")),
371 )];
372 let recs = generate_recommendations(&report, &clusters);
373 assert_eq!(recs.len(), 1);
374 assert_eq!(recs[0].files_to_remove.len(), 1);
375 }
376
377 #[test]
378 fn test_similarity_histogram_record_and_total() {
379 let mut h = SimilarityHistogram::new(10);
380 h.record(0.0);
381 h.record(0.5);
382 h.record(0.5);
383 h.record(1.0);
384 assert_eq!(h.total(), 4);
385 }
386
387 #[test]
388 fn test_similarity_histogram_clamp() {
389 let mut h = SimilarityHistogram::new(10);
390 h.record(-0.1); h.record(1.5); assert_eq!(h.total(), 2);
393 }
394
395 #[test]
396 fn test_similarity_histogram_mode() {
397 let mut h = SimilarityHistogram::new(10);
398 h.record(0.95);
399 h.record(0.96);
400 h.record(0.97);
401 h.record(0.1);
402 assert_eq!(h.mode_bucket(), 9);
404 }
405}