1use serde::{Deserialize, Serialize};
7
8use crate::models::{AssetResponse, DuplicateGroup};
9
10mod weights {
13 pub const GPS: u32 = 30; pub const TIMEZONE: u32 = 20; pub const CAMERA_INFO: u32 = 15; pub const CAPTURE_TIME: u32 = 15; pub const LENS_INFO: u32 = 10; pub const LOCATION: u32 = 10; }
20
21const GPS_THRESHOLD: f64 = 0.0001;
24
25#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
30pub struct MetadataScore {
31 pub gps: u32,
33
34 pub timezone: u32,
36
37 pub camera_info: u32,
39
40 pub capture_time: u32,
42
43 pub lens_info: u32,
45
46 pub location: u32,
48
49 pub total: u32,
51}
52
53impl PartialOrd for MetadataScore {
54 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
55 Some(self.cmp(other))
56 }
57}
58
59impl Ord for MetadataScore {
60 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
61 self.total.cmp(&other.total)
62 }
63}
64
65impl MetadataScore {
66 pub fn from_asset(asset: &AssetResponse) -> Self {
71 let Some(exif) = &asset.exif_info else {
72 return Self::default();
73 };
74
75 let gps = if exif.has_gps() { weights::GPS } else { 0 };
76 let timezone = if exif.has_timezone() {
77 weights::TIMEZONE
78 } else {
79 0
80 };
81 let camera_info = if exif.has_camera_info() {
82 weights::CAMERA_INFO
83 } else {
84 0
85 };
86 let capture_time = if exif.has_capture_time() {
87 weights::CAPTURE_TIME
88 } else {
89 0
90 };
91 let lens_info = if exif.has_lens_info() {
92 weights::LENS_INFO
93 } else {
94 0
95 };
96 let location = if exif.has_location() {
97 weights::LOCATION
98 } else {
99 0
100 };
101
102 let total = gps + timezone + camera_info + capture_time + lens_info + location;
103
104 Self {
105 gps,
106 timezone,
107 camera_info,
108 capture_time,
109 lens_info,
110 location,
111 total,
112 }
113 }
114}
115
116#[derive(Debug, Clone, Serialize, Deserialize)]
121#[serde(tag = "type", rename_all = "snake_case")]
122pub enum MetadataConflict {
123 Gps {
125 values: Vec<(f64, f64)>,
127 },
128
129 Timezone {
131 values: Vec<String>,
133 },
134
135 CameraInfo {
137 values: Vec<String>,
139 },
140
141 CaptureTime {
143 values: Vec<String>,
145 },
146}
147
148pub fn detect_conflicts(assets: &[AssetResponse]) -> Vec<MetadataConflict> {
162 let mut conflicts = Vec::new();
163
164 let gps_values: Vec<(f64, f64)> = assets
166 .iter()
167 .filter_map(|a| a.exif_info.as_ref())
168 .filter_map(|e| match (e.latitude, e.longitude) {
169 (Some(lat), Some(lon)) => Some((lat, lon)),
170 _ => None,
171 })
172 .collect();
173
174 if has_gps_conflict(&gps_values) {
175 let unique_gps = dedupe_gps(&gps_values);
176 conflicts.push(MetadataConflict::Gps { values: unique_gps });
177 }
178
179 let timezone_values: Vec<String> = assets
181 .iter()
182 .filter_map(|a| a.exif_info.as_ref())
183 .filter_map(|e| e.time_zone.clone())
184 .collect();
185
186 if let Some(unique) = find_unique_strings(&timezone_values) {
187 conflicts.push(MetadataConflict::Timezone { values: unique });
188 }
189
190 let camera_values: Vec<String> = assets
192 .iter()
193 .filter_map(|a| a.exif_info.as_ref())
194 .filter_map(|e| {
195 let make = e.make.as_deref().unwrap_or("");
196 let model = e.model.as_deref().unwrap_or("");
197 if make.is_empty() && model.is_empty() {
198 None
199 } else {
200 Some(format!("{} {}", make, model).trim().to_string())
201 }
202 })
203 .collect();
204
205 if let Some(unique) = find_unique_strings(&camera_values) {
206 conflicts.push(MetadataConflict::CameraInfo { values: unique });
207 }
208
209 let capture_time_values: Vec<String> = assets
211 .iter()
212 .filter_map(|a| a.exif_info.as_ref())
213 .filter_map(|e| e.date_time_original.clone())
214 .collect();
215
216 if let Some(unique) = find_unique_strings(&capture_time_values) {
217 conflicts.push(MetadataConflict::CaptureTime { values: unique });
218 }
219
220 conflicts
221}
222
223fn has_gps_conflict(coords: &[(f64, f64)]) -> bool {
225 if coords.len() < 2 {
226 return false;
227 }
228
229 for i in 0..coords.len() {
230 for j in (i + 1)..coords.len() {
231 let (lat1, lon1) = coords[i];
232 let (lat2, lon2) = coords[j];
233 if (lat1 - lat2).abs() > GPS_THRESHOLD || (lon1 - lon2).abs() > GPS_THRESHOLD {
234 return true;
235 }
236 }
237 }
238
239 false
240}
241
242fn dedupe_gps(coords: &[(f64, f64)]) -> Vec<(f64, f64)> {
244 let mut unique: Vec<(f64, f64)> = Vec::new();
245
246 for &(lat, lon) in coords {
247 let is_duplicate = unique.iter().any(|&(ulat, ulon)| {
248 (lat - ulat).abs() <= GPS_THRESHOLD && (lon - ulon).abs() <= GPS_THRESHOLD
249 });
250
251 if !is_duplicate {
252 unique.push((lat, lon));
253 }
254 }
255
256 unique
257}
258
259fn find_unique_strings(values: &[String]) -> Option<Vec<String>> {
262 if values.is_empty() {
263 return None;
264 }
265
266 let mut seen: Vec<String> = Vec::new();
267 let mut unique_original: Vec<String> = Vec::new();
268
269 for value in values {
270 let normalized = value.trim().to_lowercase();
271 if !normalized.is_empty() && !seen.contains(&normalized) {
272 seen.push(normalized);
273 unique_original.push(value.trim().to_string());
274 }
275 }
276
277 if unique_original.len() > 1 {
278 Some(unique_original)
279 } else {
280 None
281 }
282}
283
284#[derive(Debug, Clone, Serialize, Deserialize)]
286pub struct ScoredAsset {
287 pub asset_id: String,
289
290 pub filename: String,
292
293 pub score: MetadataScore,
295
296 pub file_size: Option<u64>,
298
299 pub dimensions: Option<(u32, u32)>,
301}
302
303#[derive(Debug, Clone, Serialize, Deserialize)]
308pub struct DuplicateAnalysis {
309 pub duplicate_id: String,
311
312 pub winner: ScoredAsset,
314
315 pub losers: Vec<ScoredAsset>,
317
318 pub conflicts: Vec<MetadataConflict>,
320
321 pub needs_review: bool,
323}
324
325impl DuplicateAnalysis {
326 pub fn from_group(group: &DuplicateGroup) -> Self {
343 let mut scored: Vec<ScoredAsset> = group
345 .assets
346 .iter()
347 .map(|asset| {
348 let dimensions = asset.exif_info.as_ref().and_then(|e| {
349 match (e.exif_image_width, e.exif_image_height) {
350 (Some(w), Some(h)) => Some((w, h)),
351 _ => None,
352 }
353 });
354 ScoredAsset {
355 asset_id: asset.id.clone(),
356 filename: asset.original_file_name.clone(),
357 score: MetadataScore::from_asset(asset),
358 file_size: asset.exif_info.as_ref().and_then(|e| e.file_size_in_byte),
359 dimensions,
360 }
361 })
362 .collect();
363
364 scored.sort_by(|a, b| {
366 let pixels_a = a
368 .dimensions
369 .map(|(w, h)| u64::from(w) * u64::from(h))
370 .unwrap_or(0);
371 let pixels_b = b
372 .dimensions
373 .map(|(w, h)| u64::from(w) * u64::from(h))
374 .unwrap_or(0);
375
376 match pixels_b.cmp(&pixels_a) {
377 std::cmp::Ordering::Equal => {
378 let size_a = a.file_size.unwrap_or(0);
380 let size_b = b.file_size.unwrap_or(0);
381 size_b.cmp(&size_a)
382 }
383 other => other,
384 }
385 });
386
387 let conflicts = detect_conflicts(&group.assets);
389 let needs_review = !conflicts.is_empty();
390
391 let winner = scored.remove(0);
393 let losers = scored;
394
395 Self {
396 duplicate_id: group.duplicate_id.clone(),
397 winner,
398 losers,
399 conflicts,
400 needs_review,
401 }
402 }
403}
404
405#[cfg(test)]
406mod tests {
407 use super::*;
408
409 #[test]
410 fn test_metadata_score_default() {
411 let score = MetadataScore::default();
412 assert_eq!(score.total, 0);
413 }
414
415 #[test]
416 fn test_gps_conflict_detection() {
417 let coords = vec![(51.5074, -0.1278), (51.5074, -0.1278)];
419 assert!(!has_gps_conflict(&coords));
420
421 let coords = vec![(51.5074, -0.1278), (52.0, -0.5)];
423 assert!(has_gps_conflict(&coords));
424 }
425
426 #[test]
427 fn test_find_unique_strings() {
428 let values = vec!["America/New_York".to_string()];
430 assert!(find_unique_strings(&values).is_none());
431
432 let values = vec!["America/New_York".to_string(), "america/new_york".to_string()];
434 assert!(find_unique_strings(&values).is_none());
435
436 let values = vec!["America/New_York".to_string(), "Europe/London".to_string()];
438 let unique = find_unique_strings(&values).unwrap();
439 assert_eq!(unique.len(), 2);
440 }
441}