1use chrono::{Datelike, Utc};
4
5use crate::models::{AssetType, DuplicateGroup};
6use crate::scoring::{detect_conflicts, MetadataConflict};
7
8use super::scenarios::{ScenarioMatch, TestScenario};
9
10const GPS_THRESHOLD: f64 = 0.0001;
12
13const LARGE_FILE_THRESHOLD: u64 = 50 * 1024 * 1024;
15
16pub fn detect_scenarios(group: &DuplicateGroup) -> Vec<ScenarioMatch> {
20 let mut matches = Vec::new();
21 let dup_id = &group.duplicate_id;
22
23 detect_group_size_scenarios(group, &mut matches, dup_id);
25
26 detect_dimension_scenarios(group, &mut matches, dup_id);
28
29 detect_consolidation_scenarios(group, &mut matches, dup_id);
31
32 detect_conflict_scenarios(group, &mut matches, dup_id);
34
35 detect_edge_case_scenarios(group, &mut matches, dup_id);
37
38 matches
39}
40
41fn detect_group_size_scenarios(
43 group: &DuplicateGroup,
44 matches: &mut Vec<ScenarioMatch>,
45 dup_id: &str,
46) {
47 let count = group.assets.len();
48
49 if count == 1 {
50 matches.push(ScenarioMatch {
51 scenario: TestScenario::X1SingleAssetGroup,
52 duplicate_id: dup_id.to_string(),
53 details: "Only 1 asset in group".to_string(),
54 });
55 }
56
57 if count >= 3 {
58 matches.push(ScenarioMatch {
59 scenario: TestScenario::W7ThreePlusDuplicates,
60 duplicate_id: dup_id.to_string(),
61 details: format!("{} assets in group", count),
62 });
63 }
64
65 if count >= 10 {
66 matches.push(ScenarioMatch {
67 scenario: TestScenario::X2LargeGroup,
68 duplicate_id: dup_id.to_string(),
69 details: format!("{} assets in group", count),
70 });
71 }
72}
73
74fn detect_dimension_scenarios(
76 group: &DuplicateGroup,
77 matches: &mut Vec<ScenarioMatch>,
78 dup_id: &str,
79) {
80 let dims: Vec<Option<(u32, u32)>> = group
82 .assets
83 .iter()
84 .map(|a| {
85 a.exif_info.as_ref().and_then(|e| {
86 match (e.exif_image_width, e.exif_image_height) {
87 (Some(w), Some(h)) => Some((w, h)),
88 _ => None,
89 }
90 })
91 })
92 .collect();
93
94 let has_dims: Vec<(u32, u32)> = dims.iter().filter_map(|d| *d).collect();
95 let with_dims_count = has_dims.len();
96 let without_dims_count = dims.len() - with_dims_count;
97
98 if with_dims_count == 0 && dims.len() > 1 {
100 matches.push(ScenarioMatch {
101 scenario: TestScenario::W6AllMissingDimensions,
102 duplicate_id: dup_id.to_string(),
103 details: format!("None of {} assets have dimensions", dims.len()),
104 });
105 return; }
107
108 if with_dims_count == 1 && without_dims_count > 0 {
110 matches.push(ScenarioMatch {
111 scenario: TestScenario::W5OnlyOneHasDimensions,
112 duplicate_id: dup_id.to_string(),
113 details: format!(
114 "1 asset has dimensions, {} missing",
115 without_dims_count
116 ),
117 });
118 }
119
120 if with_dims_count > 1 && without_dims_count > 0 {
122 matches.push(ScenarioMatch {
123 scenario: TestScenario::W4SomeMissingDimensions,
124 duplicate_id: dup_id.to_string(),
125 details: format!(
126 "{} have dimensions, {} missing",
127 with_dims_count, without_dims_count
128 ),
129 });
130 }
131
132 if has_dims.len() >= 2 {
134 let pixels: Vec<u64> = has_dims.iter().map(|(w, h)| u64::from(*w) * u64::from(*h)).collect();
135 let all_same_pixels = pixels.iter().all(|&p| p == pixels[0]);
136
137 let all_same_dims = has_dims.iter().all(|d| *d == has_dims[0]);
139
140 if all_same_dims {
141 let sizes: Vec<Option<u64>> = group
143 .assets
144 .iter()
145 .filter_map(|a| a.exif_info.as_ref())
146 .map(|e| e.file_size_in_byte)
147 .collect();
148
149 let valid_sizes: Vec<u64> = sizes.iter().filter_map(|s| *s).collect();
150 if valid_sizes.len() >= 2 {
151 let all_same_size = valid_sizes.iter().all(|&s| s == valid_sizes[0]);
152 if all_same_size {
153 matches.push(ScenarioMatch {
154 scenario: TestScenario::W3SameDimensionsSameSize,
155 duplicate_id: dup_id.to_string(),
156 details: format!(
157 "{}x{}, all {} bytes",
158 has_dims[0].0, has_dims[0].1, valid_sizes[0]
159 ),
160 });
161 } else {
162 matches.push(ScenarioMatch {
163 scenario: TestScenario::W2SameDimensionsDifferentSize,
164 duplicate_id: dup_id.to_string(),
165 details: format!(
166 "{}x{}, sizes: {:?}",
167 has_dims[0].0, has_dims[0].1, valid_sizes
168 ),
169 });
170 }
171 }
172 } else if all_same_pixels {
173 matches.push(ScenarioMatch {
175 scenario: TestScenario::W8SamePixelsDifferentAspect,
176 duplicate_id: dup_id.to_string(),
177 details: format!(
178 "Same {} pixels, dims: {:?}",
179 pixels[0], has_dims
180 ),
181 });
182 } else {
183 matches.push(ScenarioMatch {
185 scenario: TestScenario::W1ClearDimensionWinner,
186 duplicate_id: dup_id.to_string(),
187 details: format!("Dimensions: {:?}", has_dims),
188 });
189 }
190 }
191}
192
193fn detect_consolidation_scenarios(
195 group: &DuplicateGroup,
196 matches: &mut Vec<ScenarioMatch>,
197 dup_id: &str,
198) {
199 if group.assets.len() < 2 {
200 return;
201 }
202
203 let mut sorted = group.assets.clone();
205 sorted.sort_by(|a, b| {
206 let pixels_a = a
207 .exif_info
208 .as_ref()
209 .and_then(|e| match (e.exif_image_width, e.exif_image_height) {
210 (Some(w), Some(h)) => Some(u64::from(w) * u64::from(h)),
211 _ => None,
212 })
213 .unwrap_or(0);
214 let pixels_b = b
215 .exif_info
216 .as_ref()
217 .and_then(|e| match (e.exif_image_width, e.exif_image_height) {
218 (Some(w), Some(h)) => Some(u64::from(w) * u64::from(h)),
219 _ => None,
220 })
221 .unwrap_or(0);
222
223 match pixels_b.cmp(&pixels_a) {
224 std::cmp::Ordering::Equal => {
225 let size_a = a
226 .exif_info
227 .as_ref()
228 .and_then(|e| e.file_size_in_byte)
229 .unwrap_or(0);
230 let size_b = b
231 .exif_info
232 .as_ref()
233 .and_then(|e| e.file_size_in_byte)
234 .unwrap_or(0);
235 size_b.cmp(&size_a)
236 }
237 other => other,
238 }
239 });
240
241 let winner = &sorted[0];
242 let losers = &sorted[1..];
243
244 let winner_exif = winner.exif_info.as_ref();
246 let winner_has_gps = winner_exif.is_some_and(|e| e.has_gps());
247 let winner_has_datetime = winner_exif.is_some_and(|e| e.date_time_original.is_some());
248 let winner_has_description = winner_exif
249 .is_some_and(|e| e.description.as_ref().is_some_and(|d| !d.is_empty()));
250
251 let any_loser_has_gps = losers.iter().any(|l| {
253 l.exif_info.as_ref().is_some_and(|e| e.has_gps())
254 });
255 let any_loser_has_datetime = losers.iter().any(|l| {
256 l.exif_info.as_ref().is_some_and(|e| e.date_time_original.is_some())
257 });
258 let any_loser_has_description = losers.iter().any(|l| {
259 l.exif_info.as_ref()
260 .is_some_and(|e| e.description.as_ref().is_some_and(|d| !d.is_empty()))
261 });
262
263 if winner_has_gps && winner_has_datetime && winner_has_description {
265 matches.push(ScenarioMatch {
266 scenario: TestScenario::C8WinnerHasEverything,
267 duplicate_id: dup_id.to_string(),
268 details: "Winner has GPS, datetime, description".to_string(),
269 });
270 }
271
272 if winner_has_gps && any_loser_has_gps {
274 matches.push(ScenarioMatch {
275 scenario: TestScenario::C5BothHaveGps,
276 duplicate_id: dup_id.to_string(),
277 details: "Winner and loser(s) have GPS".to_string(),
278 });
279 }
280
281 if !winner_has_gps && any_loser_has_gps {
283 matches.push(ScenarioMatch {
284 scenario: TestScenario::C1WinnerLacksGpsLoserHas,
285 duplicate_id: dup_id.to_string(),
286 details: "Winner missing GPS, loser has it".to_string(),
287 });
288 }
289
290 if !winner_has_datetime && any_loser_has_datetime {
292 matches.push(ScenarioMatch {
293 scenario: TestScenario::C2WinnerLacksDatetimeLoserHas,
294 duplicate_id: dup_id.to_string(),
295 details: "Winner missing datetime, loser has it".to_string(),
296 });
297 }
298
299 if !winner_has_description && any_loser_has_description {
301 matches.push(ScenarioMatch {
302 scenario: TestScenario::C3WinnerLacksDescriptionLoserHas,
303 duplicate_id: dup_id.to_string(),
304 details: "Winner missing description, loser has it".to_string(),
305 });
306 }
307
308 if !winner_has_gps && !winner_has_datetime && !winner_has_description {
310 let loser_has_all = losers.iter().any(|l| {
311 let e = l.exif_info.as_ref();
312 let has_gps = e.is_some_and(|e| e.has_gps());
313 let has_dt = e.is_some_and(|e| e.date_time_original.is_some());
314 let has_desc = e.is_some_and(|e| e.description.as_ref().is_some_and(|d| !d.is_empty()));
315 has_gps && has_dt && has_desc
316 });
317 if loser_has_all {
318 matches.push(ScenarioMatch {
319 scenario: TestScenario::C4WinnerLacksAllLoserHasAll,
320 duplicate_id: dup_id.to_string(),
321 details: "Winner lacks GPS/datetime/description, loser has all".to_string(),
322 });
323 }
324 }
325
326 if losers.len() >= 2 {
328 let loser_gps: Vec<bool> = losers
329 .iter()
330 .map(|l| l.exif_info.as_ref().is_some_and(|e| e.has_gps()))
331 .collect();
332 let loser_dt: Vec<bool> = losers
333 .iter()
334 .map(|l| l.exif_info.as_ref().is_some_and(|e| e.date_time_original.is_some()))
335 .collect();
336 let loser_desc: Vec<bool> = losers
337 .iter()
338 .map(|l| {
339 l.exif_info.as_ref()
340 .is_some_and(|e| e.description.as_ref().is_some_and(|d| !d.is_empty()))
341 })
342 .collect();
343
344 let gps_sources: Vec<usize> = loser_gps.iter().enumerate().filter_map(|(i, &v)| if v { Some(i) } else { None }).collect();
346 let dt_sources: Vec<usize> = loser_dt.iter().enumerate().filter_map(|(i, &v)| if v { Some(i) } else { None }).collect();
347 let desc_sources: Vec<usize> = loser_desc.iter().enumerate().filter_map(|(i, &v)| if v { Some(i) } else { None }).collect();
348
349 let contributions = [gps_sources.first(), dt_sources.first(), desc_sources.first()];
351 let unique_contributors: std::collections::HashSet<_> = contributions.iter().filter_map(|&o| o).collect();
352 if unique_contributors.len() >= 2 {
353 matches.push(ScenarioMatch {
354 scenario: TestScenario::C6MultipleLosersContribute,
355 duplicate_id: dup_id.to_string(),
356 details: "Different losers contribute different metadata".to_string(),
357 });
358 }
359 }
360
361 let winner_needs_gps = !winner_has_gps;
363 let winner_needs_datetime = !winner_has_datetime;
364 let winner_needs_description = !winner_has_description;
365
366 if (winner_needs_gps || winner_needs_datetime || winner_needs_description)
367 && !any_loser_has_gps
368 && !any_loser_has_datetime
369 && !any_loser_has_description
370 {
371 matches.push(ScenarioMatch {
372 scenario: TestScenario::C7NoLoserHasNeeded,
373 duplicate_id: dup_id.to_string(),
374 details: "Winner missing metadata, no loser has it".to_string(),
375 });
376 }
377}
378
379fn detect_conflict_scenarios(
381 group: &DuplicateGroup,
382 matches: &mut Vec<ScenarioMatch>,
383 dup_id: &str,
384) {
385 let conflicts = detect_conflicts(&group.assets);
386
387 if conflicts.is_empty() {
388 matches.push(ScenarioMatch {
389 scenario: TestScenario::F7NoConflicts,
390 duplicate_id: dup_id.to_string(),
391 details: "No metadata conflicts".to_string(),
392 });
393 return;
394 }
395
396 let mut has_gps_conflict = false;
397 let mut has_timezone_conflict = false;
398 let mut has_camera_conflict = false;
399 let mut has_capture_time_conflict = false;
400
401 for conflict in &conflicts {
402 match conflict {
403 MetadataConflict::Gps { values } => {
404 has_gps_conflict = true;
405 matches.push(ScenarioMatch {
406 scenario: TestScenario::F1GpsConflict,
407 duplicate_id: dup_id.to_string(),
408 details: format!("{} different locations", values.len()),
409 });
410 }
411 MetadataConflict::Timezone { values } => {
412 has_timezone_conflict = true;
413 matches.push(ScenarioMatch {
414 scenario: TestScenario::F3TimezoneConflict,
415 duplicate_id: dup_id.to_string(),
416 details: format!("Timezones: {:?}", values),
417 });
418 }
419 MetadataConflict::CameraInfo { values } => {
420 has_camera_conflict = true;
421 matches.push(ScenarioMatch {
422 scenario: TestScenario::F4CameraConflict,
423 duplicate_id: dup_id.to_string(),
424 details: format!("Cameras: {:?}", values),
425 });
426 }
427 MetadataConflict::CaptureTime { values } => {
428 has_capture_time_conflict = true;
429 matches.push(ScenarioMatch {
430 scenario: TestScenario::F5CaptureTimeConflict,
431 duplicate_id: dup_id.to_string(),
432 details: format!("Times: {:?}", values),
433 });
434 }
435 }
436 }
437
438 let conflict_count = [has_gps_conflict, has_timezone_conflict, has_camera_conflict, has_capture_time_conflict]
440 .iter()
441 .filter(|&&v| v)
442 .count();
443 if conflict_count >= 2 {
444 matches.push(ScenarioMatch {
445 scenario: TestScenario::F6MultipleConflicts,
446 duplicate_id: dup_id.to_string(),
447 details: format!("{} different conflict types", conflict_count),
448 });
449 }
450
451 if !has_gps_conflict {
453 let gps_values: Vec<(f64, f64)> = group
454 .assets
455 .iter()
456 .filter_map(|a| a.exif_info.as_ref())
457 .filter_map(|e| match (e.latitude, e.longitude) {
458 (Some(lat), Some(lon)) => Some((lat, lon)),
459 _ => None,
460 })
461 .collect();
462
463 if gps_values.len() >= 2 {
464 let mut all_within = true;
466 for i in 0..gps_values.len() {
467 for j in (i + 1)..gps_values.len() {
468 let (lat1, lon1) = gps_values[i];
469 let (lat2, lon2) = gps_values[j];
470 if (lat1 - lat2).abs() > GPS_THRESHOLD || (lon1 - lon2).abs() > GPS_THRESHOLD {
471 all_within = false;
472 break;
473 }
474 }
475 }
476 if all_within {
477 matches.push(ScenarioMatch {
478 scenario: TestScenario::F2GpsWithinThreshold,
479 duplicate_id: dup_id.to_string(),
480 details: format!("{} GPS values within threshold", gps_values.len()),
481 });
482 }
483 }
484 }
485}
486
487fn detect_edge_case_scenarios(
489 group: &DuplicateGroup,
490 matches: &mut Vec<ScenarioMatch>,
491 dup_id: &str,
492) {
493 for asset in &group.assets {
494 let filename = &asset.original_file_name;
495 let lowercase = filename.to_lowercase();
496
497 if let Some(size) = asset.exif_info.as_ref().and_then(|e| e.file_size_in_byte)
499 && size > LARGE_FILE_THRESHOLD
500 {
501 matches.push(ScenarioMatch {
502 scenario: TestScenario::X3LargeFile,
503 duplicate_id: dup_id.to_string(),
504 details: format!("{}: {} bytes", filename, size),
505 });
506 }
507
508 if filename.chars().any(|c| "!@#$%^&*()[]{}|;'\"<>?".contains(c)) {
510 matches.push(ScenarioMatch {
511 scenario: TestScenario::X4SpecialCharsFilename,
512 duplicate_id: dup_id.to_string(),
513 details: format!("Filename: {}", filename),
514 });
515 }
516
517 if asset.asset_type == AssetType::Video {
519 matches.push(ScenarioMatch {
520 scenario: TestScenario::X5Video,
521 duplicate_id: dup_id.to_string(),
522 details: format!("Video: {}", filename),
523 });
524 }
525
526 if lowercase.ends_with(".png") {
528 matches.push(ScenarioMatch {
529 scenario: TestScenario::X7Png,
530 duplicate_id: dup_id.to_string(),
531 details: format!("PNG: {}", filename),
532 });
533 }
534
535 if let Some(desc) = asset.exif_info.as_ref().and_then(|e| e.description.as_ref())
537 && !desc.is_ascii()
538 {
539 matches.push(ScenarioMatch {
540 scenario: TestScenario::X9UnicodeDescription,
541 duplicate_id: dup_id.to_string(),
542 details: format!("Description: {}", desc),
543 });
544 }
545
546 if let Some(dt) = asset.exif_info.as_ref().and_then(|e| e.date_time_original.as_ref())
548 && let Some(year) = extract_year(dt)
549 {
550 if year < 1990 {
551 matches.push(ScenarioMatch {
552 scenario: TestScenario::X10VeryOldDate,
553 duplicate_id: dup_id.to_string(),
554 details: format!("Date: {}", dt),
555 });
556 }
557
558 let current_year = Utc::now().year();
559 if year > current_year {
560 matches.push(ScenarioMatch {
561 scenario: TestScenario::X11FutureDate,
562 duplicate_id: dup_id.to_string(),
563 details: format!("Date: {} (future)", dt),
564 });
565 }
566 }
567 }
568}
569
570fn extract_year(date_str: &str) -> Option<i32> {
572 let cleaned = date_str.replace(':', "-").replace('T', " ");
574 let year_str = cleaned.split(['-', ' ', '/']).next()?;
575 let year = year_str.parse::<i32>().ok()?;
576 if (1800..=2100).contains(&year) {
577 Some(year)
578 } else {
579 None
580 }
581}