Skip to main content

immich_lib/testing/
detector.rs

1//! Scenario detection logic for duplicate groups.
2
3use chrono::{Datelike, Utc};
4
5use crate::models::{AssetType, DuplicateGroup};
6use crate::scoring::{detect_conflicts, MetadataConflict};
7
8use super::scenarios::{ScenarioMatch, TestScenario};
9
10/// GPS coordinate threshold for conflict detection (~11 meters).
11const GPS_THRESHOLD: f64 = 0.0001;
12
13/// Large file threshold in bytes (50MB).
14const LARGE_FILE_THRESHOLD: u64 = 50 * 1024 * 1024;
15
16/// Detect all matching test scenarios for a duplicate group.
17///
18/// Analyzes the group and returns matches for all applicable scenarios.
19pub fn detect_scenarios(group: &DuplicateGroup) -> Vec<ScenarioMatch> {
20    let mut matches = Vec::new();
21    let dup_id = &group.duplicate_id;
22
23    // Group size checks
24    detect_group_size_scenarios(group, &mut matches, dup_id);
25
26    // Dimension-based winner selection
27    detect_dimension_scenarios(group, &mut matches, dup_id);
28
29    // Consolidation scenarios (winner vs loser metadata)
30    detect_consolidation_scenarios(group, &mut matches, dup_id);
31
32    // Conflict scenarios
33    detect_conflict_scenarios(group, &mut matches, dup_id);
34
35    // Edge cases
36    detect_edge_case_scenarios(group, &mut matches, dup_id);
37
38    matches
39}
40
41/// Detect group size scenarios (W7, X1, X2).
42fn detect_group_size_scenarios(
43    group: &DuplicateGroup,
44    matches: &mut Vec<ScenarioMatch>,
45    dup_id: &str,
46) {
47    let count = group.assets.len();
48
49    if count == 1 {
50        matches.push(ScenarioMatch {
51            scenario: TestScenario::X1SingleAssetGroup,
52            duplicate_id: dup_id.to_string(),
53            details: "Only 1 asset in group".to_string(),
54        });
55    }
56
57    if count >= 3 {
58        matches.push(ScenarioMatch {
59            scenario: TestScenario::W7ThreePlusDuplicates,
60            duplicate_id: dup_id.to_string(),
61            details: format!("{} assets in group", count),
62        });
63    }
64
65    if count >= 10 {
66        matches.push(ScenarioMatch {
67            scenario: TestScenario::X2LargeGroup,
68            duplicate_id: dup_id.to_string(),
69            details: format!("{} assets in group", count),
70        });
71    }
72}
73
74/// Detect dimension-based winner selection scenarios (W1-W6, W8).
75fn detect_dimension_scenarios(
76    group: &DuplicateGroup,
77    matches: &mut Vec<ScenarioMatch>,
78    dup_id: &str,
79) {
80    // Collect dimensions for each asset
81    let dims: Vec<Option<(u32, u32)>> = group
82        .assets
83        .iter()
84        .map(|a| {
85            a.exif_info.as_ref().and_then(|e| {
86                match (e.exif_image_width, e.exif_image_height) {
87                    (Some(w), Some(h)) => Some((w, h)),
88                    _ => None,
89                }
90            })
91        })
92        .collect();
93
94    let has_dims: Vec<(u32, u32)> = dims.iter().filter_map(|d| *d).collect();
95    let with_dims_count = has_dims.len();
96    let without_dims_count = dims.len() - with_dims_count;
97
98    // W6: All missing dimensions
99    if with_dims_count == 0 && dims.len() > 1 {
100        matches.push(ScenarioMatch {
101            scenario: TestScenario::W6AllMissingDimensions,
102            duplicate_id: dup_id.to_string(),
103            details: format!("None of {} assets have dimensions", dims.len()),
104        });
105        return; // Can't check other dimension scenarios
106    }
107
108    // W5: Only one has dimensions
109    if with_dims_count == 1 && without_dims_count > 0 {
110        matches.push(ScenarioMatch {
111            scenario: TestScenario::W5OnlyOneHasDimensions,
112            duplicate_id: dup_id.to_string(),
113            details: format!(
114                "1 asset has dimensions, {} missing",
115                without_dims_count
116            ),
117        });
118    }
119
120    // W4: Some missing dimensions (but not all, not just one)
121    if with_dims_count > 1 && without_dims_count > 0 {
122        matches.push(ScenarioMatch {
123            scenario: TestScenario::W4SomeMissingDimensions,
124            duplicate_id: dup_id.to_string(),
125            details: format!(
126                "{} have dimensions, {} missing",
127                with_dims_count, without_dims_count
128            ),
129        });
130    }
131
132    // Now analyze dimension differences (only if we have multiple with dims)
133    if has_dims.len() >= 2 {
134        let pixels: Vec<u64> = has_dims.iter().map(|(w, h)| u64::from(*w) * u64::from(*h)).collect();
135        let all_same_pixels = pixels.iter().all(|&p| p == pixels[0]);
136
137        // Check if all dimensions are exactly the same
138        let all_same_dims = has_dims.iter().all(|d| *d == has_dims[0]);
139
140        if all_same_dims {
141            // W2 or W3: Same dimensions, check file sizes
142            let sizes: Vec<Option<u64>> = group
143                .assets
144                .iter()
145                .filter_map(|a| a.exif_info.as_ref())
146                .map(|e| e.file_size_in_byte)
147                .collect();
148
149            let valid_sizes: Vec<u64> = sizes.iter().filter_map(|s| *s).collect();
150            if valid_sizes.len() >= 2 {
151                let all_same_size = valid_sizes.iter().all(|&s| s == valid_sizes[0]);
152                if all_same_size {
153                    matches.push(ScenarioMatch {
154                        scenario: TestScenario::W3SameDimensionsSameSize,
155                        duplicate_id: dup_id.to_string(),
156                        details: format!(
157                            "{}x{}, all {} bytes",
158                            has_dims[0].0, has_dims[0].1, valid_sizes[0]
159                        ),
160                    });
161                } else {
162                    matches.push(ScenarioMatch {
163                        scenario: TestScenario::W2SameDimensionsDifferentSize,
164                        duplicate_id: dup_id.to_string(),
165                        details: format!(
166                            "{}x{}, sizes: {:?}",
167                            has_dims[0].0, has_dims[0].1, valid_sizes
168                        ),
169                    });
170                }
171            }
172        } else if all_same_pixels {
173            // W8: Same pixel count, different aspect ratio
174            matches.push(ScenarioMatch {
175                scenario: TestScenario::W8SamePixelsDifferentAspect,
176                duplicate_id: dup_id.to_string(),
177                details: format!(
178                    "Same {} pixels, dims: {:?}",
179                    pixels[0], has_dims
180                ),
181            });
182        } else {
183            // W1: Clear dimension winner
184            matches.push(ScenarioMatch {
185                scenario: TestScenario::W1ClearDimensionWinner,
186                duplicate_id: dup_id.to_string(),
187                details: format!("Dimensions: {:?}", has_dims),
188            });
189        }
190    }
191}
192
193/// Detect consolidation scenarios (C1-C8).
194fn detect_consolidation_scenarios(
195    group: &DuplicateGroup,
196    matches: &mut Vec<ScenarioMatch>,
197    dup_id: &str,
198) {
199    if group.assets.len() < 2 {
200        return;
201    }
202
203    // Determine winner based on dimensions (same logic as DuplicateAnalysis)
204    let mut sorted = group.assets.clone();
205    sorted.sort_by(|a, b| {
206        let pixels_a = a
207            .exif_info
208            .as_ref()
209            .and_then(|e| match (e.exif_image_width, e.exif_image_height) {
210                (Some(w), Some(h)) => Some(u64::from(w) * u64::from(h)),
211                _ => None,
212            })
213            .unwrap_or(0);
214        let pixels_b = b
215            .exif_info
216            .as_ref()
217            .and_then(|e| match (e.exif_image_width, e.exif_image_height) {
218                (Some(w), Some(h)) => Some(u64::from(w) * u64::from(h)),
219                _ => None,
220            })
221            .unwrap_or(0);
222
223        match pixels_b.cmp(&pixels_a) {
224            std::cmp::Ordering::Equal => {
225                let size_a = a
226                    .exif_info
227                    .as_ref()
228                    .and_then(|e| e.file_size_in_byte)
229                    .unwrap_or(0);
230                let size_b = b
231                    .exif_info
232                    .as_ref()
233                    .and_then(|e| e.file_size_in_byte)
234                    .unwrap_or(0);
235                size_b.cmp(&size_a)
236            }
237            other => other,
238        }
239    });
240
241    let winner = &sorted[0];
242    let losers = &sorted[1..];
243
244    // Check winner metadata
245    let winner_exif = winner.exif_info.as_ref();
246    let winner_has_gps = winner_exif.is_some_and(|e| e.has_gps());
247    let winner_has_datetime = winner_exif.is_some_and(|e| e.date_time_original.is_some());
248    let winner_has_description = winner_exif
249        .is_some_and(|e| e.description.as_ref().is_some_and(|d| !d.is_empty()));
250
251    // Check loser metadata
252    let any_loser_has_gps = losers.iter().any(|l| {
253        l.exif_info.as_ref().is_some_and(|e| e.has_gps())
254    });
255    let any_loser_has_datetime = losers.iter().any(|l| {
256        l.exif_info.as_ref().is_some_and(|e| e.date_time_original.is_some())
257    });
258    let any_loser_has_description = losers.iter().any(|l| {
259        l.exif_info.as_ref()
260            .is_some_and(|e| e.description.as_ref().is_some_and(|d| !d.is_empty()))
261    });
262
263    // C8: Winner has everything
264    if winner_has_gps && winner_has_datetime && winner_has_description {
265        matches.push(ScenarioMatch {
266            scenario: TestScenario::C8WinnerHasEverything,
267            duplicate_id: dup_id.to_string(),
268            details: "Winner has GPS, datetime, description".to_string(),
269        });
270    }
271
272    // C5: Both have GPS
273    if winner_has_gps && any_loser_has_gps {
274        matches.push(ScenarioMatch {
275            scenario: TestScenario::C5BothHaveGps,
276            duplicate_id: dup_id.to_string(),
277            details: "Winner and loser(s) have GPS".to_string(),
278        });
279    }
280
281    // C1: Winner lacks GPS, loser has GPS
282    if !winner_has_gps && any_loser_has_gps {
283        matches.push(ScenarioMatch {
284            scenario: TestScenario::C1WinnerLacksGpsLoserHas,
285            duplicate_id: dup_id.to_string(),
286            details: "Winner missing GPS, loser has it".to_string(),
287        });
288    }
289
290    // C2: Winner lacks datetime, loser has datetime
291    if !winner_has_datetime && any_loser_has_datetime {
292        matches.push(ScenarioMatch {
293            scenario: TestScenario::C2WinnerLacksDatetimeLoserHas,
294            duplicate_id: dup_id.to_string(),
295            details: "Winner missing datetime, loser has it".to_string(),
296        });
297    }
298
299    // C3: Winner lacks description, loser has description
300    if !winner_has_description && any_loser_has_description {
301        matches.push(ScenarioMatch {
302            scenario: TestScenario::C3WinnerLacksDescriptionLoserHas,
303            duplicate_id: dup_id.to_string(),
304            details: "Winner missing description, loser has it".to_string(),
305        });
306    }
307
308    // C4: Winner lacks all, loser has all
309    if !winner_has_gps && !winner_has_datetime && !winner_has_description {
310        let loser_has_all = losers.iter().any(|l| {
311            let e = l.exif_info.as_ref();
312            let has_gps = e.is_some_and(|e| e.has_gps());
313            let has_dt = e.is_some_and(|e| e.date_time_original.is_some());
314            let has_desc = e.is_some_and(|e| e.description.as_ref().is_some_and(|d| !d.is_empty()));
315            has_gps && has_dt && has_desc
316        });
317        if loser_has_all {
318            matches.push(ScenarioMatch {
319                scenario: TestScenario::C4WinnerLacksAllLoserHasAll,
320                duplicate_id: dup_id.to_string(),
321                details: "Winner lacks GPS/datetime/description, loser has all".to_string(),
322            });
323        }
324    }
325
326    // C6: Multiple losers contribute different fields
327    if losers.len() >= 2 {
328        let loser_gps: Vec<bool> = losers
329            .iter()
330            .map(|l| l.exif_info.as_ref().is_some_and(|e| e.has_gps()))
331            .collect();
332        let loser_dt: Vec<bool> = losers
333            .iter()
334            .map(|l| l.exif_info.as_ref().is_some_and(|e| e.date_time_original.is_some()))
335            .collect();
336        let loser_desc: Vec<bool> = losers
337            .iter()
338            .map(|l| {
339                l.exif_info.as_ref()
340                    .is_some_and(|e| e.description.as_ref().is_some_and(|d| !d.is_empty()))
341            })
342            .collect();
343
344        // Check if different losers contribute different things
345        let gps_sources: Vec<usize> = loser_gps.iter().enumerate().filter_map(|(i, &v)| if v { Some(i) } else { None }).collect();
346        let dt_sources: Vec<usize> = loser_dt.iter().enumerate().filter_map(|(i, &v)| if v { Some(i) } else { None }).collect();
347        let desc_sources: Vec<usize> = loser_desc.iter().enumerate().filter_map(|(i, &v)| if v { Some(i) } else { None }).collect();
348
349        // If different losers are the source for different fields
350        let contributions = [gps_sources.first(), dt_sources.first(), desc_sources.first()];
351        let unique_contributors: std::collections::HashSet<_> = contributions.iter().filter_map(|&o| o).collect();
352        if unique_contributors.len() >= 2 {
353            matches.push(ScenarioMatch {
354                scenario: TestScenario::C6MultipleLosersContribute,
355                duplicate_id: dup_id.to_string(),
356                details: "Different losers contribute different metadata".to_string(),
357            });
358        }
359    }
360
361    // C7: No loser has what winner lacks
362    let winner_needs_gps = !winner_has_gps;
363    let winner_needs_datetime = !winner_has_datetime;
364    let winner_needs_description = !winner_has_description;
365
366    if (winner_needs_gps || winner_needs_datetime || winner_needs_description)
367        && !any_loser_has_gps
368        && !any_loser_has_datetime
369        && !any_loser_has_description
370    {
371        matches.push(ScenarioMatch {
372            scenario: TestScenario::C7NoLoserHasNeeded,
373            duplicate_id: dup_id.to_string(),
374            details: "Winner missing metadata, no loser has it".to_string(),
375        });
376    }
377}
378
379/// Detect conflict scenarios (F1-F7).
380fn detect_conflict_scenarios(
381    group: &DuplicateGroup,
382    matches: &mut Vec<ScenarioMatch>,
383    dup_id: &str,
384) {
385    let conflicts = detect_conflicts(&group.assets);
386
387    if conflicts.is_empty() {
388        matches.push(ScenarioMatch {
389            scenario: TestScenario::F7NoConflicts,
390            duplicate_id: dup_id.to_string(),
391            details: "No metadata conflicts".to_string(),
392        });
393        return;
394    }
395
396    let mut has_gps_conflict = false;
397    let mut has_timezone_conflict = false;
398    let mut has_camera_conflict = false;
399    let mut has_capture_time_conflict = false;
400
401    for conflict in &conflicts {
402        match conflict {
403            MetadataConflict::Gps { values } => {
404                has_gps_conflict = true;
405                matches.push(ScenarioMatch {
406                    scenario: TestScenario::F1GpsConflict,
407                    duplicate_id: dup_id.to_string(),
408                    details: format!("{} different locations", values.len()),
409                });
410            }
411            MetadataConflict::Timezone { values } => {
412                has_timezone_conflict = true;
413                matches.push(ScenarioMatch {
414                    scenario: TestScenario::F3TimezoneConflict,
415                    duplicate_id: dup_id.to_string(),
416                    details: format!("Timezones: {:?}", values),
417                });
418            }
419            MetadataConflict::CameraInfo { values } => {
420                has_camera_conflict = true;
421                matches.push(ScenarioMatch {
422                    scenario: TestScenario::F4CameraConflict,
423                    duplicate_id: dup_id.to_string(),
424                    details: format!("Cameras: {:?}", values),
425                });
426            }
427            MetadataConflict::CaptureTime { values } => {
428                has_capture_time_conflict = true;
429                matches.push(ScenarioMatch {
430                    scenario: TestScenario::F5CaptureTimeConflict,
431                    duplicate_id: dup_id.to_string(),
432                    details: format!("Times: {:?}", values),
433                });
434            }
435        }
436    }
437
438    // F6: Multiple conflicts
439    let conflict_count = [has_gps_conflict, has_timezone_conflict, has_camera_conflict, has_capture_time_conflict]
440        .iter()
441        .filter(|&&v| v)
442        .count();
443    if conflict_count >= 2 {
444        matches.push(ScenarioMatch {
445            scenario: TestScenario::F6MultipleConflicts,
446            duplicate_id: dup_id.to_string(),
447            details: format!("{} different conflict types", conflict_count),
448        });
449    }
450
451    // F2: GPS within threshold (no conflict detected but multiple GPS values exist)
452    if !has_gps_conflict {
453        let gps_values: Vec<(f64, f64)> = group
454            .assets
455            .iter()
456            .filter_map(|a| a.exif_info.as_ref())
457            .filter_map(|e| match (e.latitude, e.longitude) {
458                (Some(lat), Some(lon)) => Some((lat, lon)),
459                _ => None,
460            })
461            .collect();
462
463        if gps_values.len() >= 2 {
464            // Check if they're all within threshold
465            let mut all_within = true;
466            for i in 0..gps_values.len() {
467                for j in (i + 1)..gps_values.len() {
468                    let (lat1, lon1) = gps_values[i];
469                    let (lat2, lon2) = gps_values[j];
470                    if (lat1 - lat2).abs() > GPS_THRESHOLD || (lon1 - lon2).abs() > GPS_THRESHOLD {
471                        all_within = false;
472                        break;
473                    }
474                }
475            }
476            if all_within {
477                matches.push(ScenarioMatch {
478                    scenario: TestScenario::F2GpsWithinThreshold,
479                    duplicate_id: dup_id.to_string(),
480                    details: format!("{} GPS values within threshold", gps_values.len()),
481                });
482            }
483        }
484    }
485}
486
487/// Detect edge case scenarios (X3-X11).
488fn detect_edge_case_scenarios(
489    group: &DuplicateGroup,
490    matches: &mut Vec<ScenarioMatch>,
491    dup_id: &str,
492) {
493    for asset in &group.assets {
494        let filename = &asset.original_file_name;
495        let lowercase = filename.to_lowercase();
496
497        // X3: Large file (>50MB)
498        if let Some(size) = asset.exif_info.as_ref().and_then(|e| e.file_size_in_byte)
499            && size > LARGE_FILE_THRESHOLD
500        {
501            matches.push(ScenarioMatch {
502                scenario: TestScenario::X3LargeFile,
503                duplicate_id: dup_id.to_string(),
504                details: format!("{}: {} bytes", filename, size),
505            });
506        }
507
508        // X4: Special characters in filename
509        if filename.chars().any(|c| "!@#$%^&*()[]{}|;'\"<>?".contains(c)) {
510            matches.push(ScenarioMatch {
511                scenario: TestScenario::X4SpecialCharsFilename,
512                duplicate_id: dup_id.to_string(),
513                details: format!("Filename: {}", filename),
514            });
515        }
516
517        // X5: Video
518        if asset.asset_type == AssetType::Video {
519            matches.push(ScenarioMatch {
520                scenario: TestScenario::X5Video,
521                duplicate_id: dup_id.to_string(),
522                details: format!("Video: {}", filename),
523            });
524        }
525
526        // X7: PNG
527        if lowercase.ends_with(".png") {
528            matches.push(ScenarioMatch {
529                scenario: TestScenario::X7Png,
530                duplicate_id: dup_id.to_string(),
531                details: format!("PNG: {}", filename),
532            });
533        }
534
535        // X9: Unicode in description
536        if let Some(desc) = asset.exif_info.as_ref().and_then(|e| e.description.as_ref())
537            && !desc.is_ascii()
538        {
539            matches.push(ScenarioMatch {
540                scenario: TestScenario::X9UnicodeDescription,
541                duplicate_id: dup_id.to_string(),
542                details: format!("Description: {}", desc),
543            });
544        }
545
546        // X10: Very old date (<1990) and X11: Future date
547        if let Some(dt) = asset.exif_info.as_ref().and_then(|e| e.date_time_original.as_ref())
548            && let Some(year) = extract_year(dt)
549        {
550            if year < 1990 {
551                matches.push(ScenarioMatch {
552                    scenario: TestScenario::X10VeryOldDate,
553                    duplicate_id: dup_id.to_string(),
554                    details: format!("Date: {}", dt),
555                });
556            }
557
558            let current_year = Utc::now().year();
559            if year > current_year {
560                matches.push(ScenarioMatch {
561                    scenario: TestScenario::X11FutureDate,
562                    duplicate_id: dup_id.to_string(),
563                    details: format!("Date: {} (future)", dt),
564                });
565            }
566        }
567    }
568}
569
570/// Extract year from a date string (various formats).
571fn extract_year(date_str: &str) -> Option<i32> {
572    // Try common formats: "2023:01:15 12:00:00", "2023-01-15T12:00:00Z"
573    let cleaned = date_str.replace(':', "-").replace('T', " ");
574    let year_str = cleaned.split(['-', ' ', '/']).next()?;
575    let year = year_str.parse::<i32>().ok()?;
576    if (1800..=2100).contains(&year) {
577        Some(year)
578    } else {
579        None
580    }
581}