use serde::{Deserialize, Serialize};
use crate::models::{AssetResponse, DuplicateGroup};
mod weights {
pub const GPS: u32 = 30; pub const TIMEZONE: u32 = 20; pub const CAMERA_INFO: u32 = 15; pub const CAPTURE_TIME: u32 = 15; pub const LENS_INFO: u32 = 10; pub const LOCATION: u32 = 10; }
const GPS_THRESHOLD: f64 = 0.0001;
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct MetadataScore {
pub gps: u32,
pub timezone: u32,
pub camera_info: u32,
pub capture_time: u32,
pub lens_info: u32,
pub location: u32,
pub total: u32,
}
impl PartialOrd for MetadataScore {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for MetadataScore {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.total.cmp(&other.total)
}
}
impl MetadataScore {
pub fn from_asset(asset: &AssetResponse) -> Self {
let Some(exif) = &asset.exif_info else {
return Self::default();
};
let gps = if exif.has_gps() { weights::GPS } else { 0 };
let timezone = if exif.has_timezone() {
weights::TIMEZONE
} else {
0
};
let camera_info = if exif.has_camera_info() {
weights::CAMERA_INFO
} else {
0
};
let capture_time = if exif.has_capture_time() {
weights::CAPTURE_TIME
} else {
0
};
let lens_info = if exif.has_lens_info() {
weights::LENS_INFO
} else {
0
};
let location = if exif.has_location() {
weights::LOCATION
} else {
0
};
let total = gps + timezone + camera_info + capture_time + lens_info + location;
Self {
gps,
timezone,
camera_info,
capture_time,
lens_info,
location,
total,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum MetadataConflict {
Gps {
values: Vec<(f64, f64)>,
},
Timezone {
values: Vec<String>,
},
CameraInfo {
values: Vec<String>,
},
CaptureTime {
values: Vec<String>,
},
}
pub fn detect_conflicts(assets: &[AssetResponse]) -> Vec<MetadataConflict> {
let mut conflicts = Vec::new();
let gps_values: Vec<(f64, f64)> = assets
.iter()
.filter_map(|a| a.exif_info.as_ref())
.filter_map(|e| match (e.latitude, e.longitude) {
(Some(lat), Some(lon)) => Some((lat, lon)),
_ => None,
})
.collect();
if has_gps_conflict(&gps_values) {
let unique_gps = dedupe_gps(&gps_values);
conflicts.push(MetadataConflict::Gps { values: unique_gps });
}
let timezone_values: Vec<String> = assets
.iter()
.filter_map(|a| a.exif_info.as_ref())
.filter_map(|e| e.time_zone.clone())
.collect();
if let Some(unique) = find_unique_strings(&timezone_values) {
conflicts.push(MetadataConflict::Timezone { values: unique });
}
let camera_values: Vec<String> = assets
.iter()
.filter_map(|a| a.exif_info.as_ref())
.filter_map(|e| {
let make = e.make.as_deref().unwrap_or("");
let model = e.model.as_deref().unwrap_or("");
if make.is_empty() && model.is_empty() {
None
} else {
Some(format!("{} {}", make, model).trim().to_string())
}
})
.collect();
if let Some(unique) = find_unique_strings(&camera_values) {
conflicts.push(MetadataConflict::CameraInfo { values: unique });
}
let capture_time_values: Vec<String> = assets
.iter()
.filter_map(|a| a.exif_info.as_ref())
.filter_map(|e| e.date_time_original.clone())
.collect();
if let Some(unique) = find_unique_strings(&capture_time_values) {
conflicts.push(MetadataConflict::CaptureTime { values: unique });
}
conflicts
}
fn has_gps_conflict(coords: &[(f64, f64)]) -> bool {
if coords.len() < 2 {
return false;
}
for i in 0..coords.len() {
for j in (i + 1)..coords.len() {
let (lat1, lon1) = coords[i];
let (lat2, lon2) = coords[j];
if (lat1 - lat2).abs() > GPS_THRESHOLD || (lon1 - lon2).abs() > GPS_THRESHOLD {
return true;
}
}
}
false
}
fn dedupe_gps(coords: &[(f64, f64)]) -> Vec<(f64, f64)> {
let mut unique: Vec<(f64, f64)> = Vec::new();
for &(lat, lon) in coords {
let is_duplicate = unique.iter().any(|&(ulat, ulon)| {
(lat - ulat).abs() <= GPS_THRESHOLD && (lon - ulon).abs() <= GPS_THRESHOLD
});
if !is_duplicate {
unique.push((lat, lon));
}
}
unique
}
fn find_unique_strings(values: &[String]) -> Option<Vec<String>> {
if values.is_empty() {
return None;
}
let mut seen: Vec<String> = Vec::new();
let mut unique_original: Vec<String> = Vec::new();
for value in values {
let normalized = value.trim().to_lowercase();
if !normalized.is_empty() && !seen.contains(&normalized) {
seen.push(normalized);
unique_original.push(value.trim().to_string());
}
}
if unique_original.len() > 1 {
Some(unique_original)
} else {
None
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScoredAsset {
pub asset_id: String,
pub filename: String,
pub score: MetadataScore,
pub file_size: Option<u64>,
pub dimensions: Option<(u32, u32)>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DuplicateAnalysis {
pub duplicate_id: String,
pub winner: ScoredAsset,
pub losers: Vec<ScoredAsset>,
pub conflicts: Vec<MetadataConflict>,
pub needs_review: bool,
}
impl DuplicateAnalysis {
pub fn from_group(group: &DuplicateGroup) -> Self {
let mut scored: Vec<ScoredAsset> = group
.assets
.iter()
.map(|asset| {
let dimensions = asset.exif_info.as_ref().and_then(|e| {
match (e.exif_image_width, e.exif_image_height) {
(Some(w), Some(h)) => Some((w, h)),
_ => None,
}
});
ScoredAsset {
asset_id: asset.id.clone(),
filename: asset.original_file_name.clone(),
score: MetadataScore::from_asset(asset),
file_size: asset.exif_info.as_ref().and_then(|e| e.file_size_in_byte),
dimensions,
}
})
.collect();
scored.sort_by(|a, b| {
let pixels_a = a
.dimensions
.map(|(w, h)| u64::from(w) * u64::from(h))
.unwrap_or(0);
let pixels_b = b
.dimensions
.map(|(w, h)| u64::from(w) * u64::from(h))
.unwrap_or(0);
match pixels_b.cmp(&pixels_a) {
std::cmp::Ordering::Equal => {
let size_a = a.file_size.unwrap_or(0);
let size_b = b.file_size.unwrap_or(0);
size_b.cmp(&size_a)
}
other => other,
}
});
let conflicts = detect_conflicts(&group.assets);
let needs_review = !conflicts.is_empty();
let winner = scored.remove(0);
let losers = scored;
Self {
duplicate_id: group.duplicate_id.clone(),
winner,
losers,
conflicts,
needs_review,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metadata_score_default() {
let score = MetadataScore::default();
assert_eq!(score.total, 0);
}
#[test]
fn test_gps_conflict_detection() {
let coords = vec![(51.5074, -0.1278), (51.5074, -0.1278)];
assert!(!has_gps_conflict(&coords));
let coords = vec![(51.5074, -0.1278), (52.0, -0.5)];
assert!(has_gps_conflict(&coords));
}
#[test]
fn test_find_unique_strings() {
let values = vec!["America/New_York".to_string()];
assert!(find_unique_strings(&values).is_none());
let values = vec!["America/New_York".to_string(), "america/new_york".to_string()];
assert!(find_unique_strings(&values).is_none());
let values = vec!["America/New_York".to_string(), "Europe/London".to_string()];
let unique = find_unique_strings(&values).unwrap();
assert_eq!(unique.len(), 2);
}
}