use crate::models::{AudibleMetadata, CurrentMetadata, MatchCandidate, MetadataDistance, MatchConfidence};
pub fn calculate_distance(
current: &CurrentMetadata,
candidate: &AudibleMetadata,
) -> MetadataDistance {
let mut distance = MetadataDistance::new();
if let Some(cur_title) = ¤t.title {
let cand_title = &candidate.title;
let title_dist = string_distance(cur_title, cand_title);
distance.add_penalty("title", title_dist, 0.4);
}
if let Some(cur_author) = ¤t.author {
let author_dist = candidate.authors.iter()
.map(|a| string_distance(cur_author, &a.name))
.min_by(|a, b| a.partial_cmp(b).unwrap())
.unwrap_or(1.0);
distance.add_penalty("author", author_dist, 0.3);
}
if let (Some(cur_year), Some(cand_year)) = (current.year, candidate.published_year) {
let year_dist = year_distance(cur_year, cand_year);
distance.add_penalty("year", year_dist, 0.1);
}
if let (Some(cur_dur), Some(cand_dur_ms)) = (current.duration, candidate.runtime_length_ms) {
let cand_dur_sec = cand_dur_ms as f64 / 1000.0;
let dur_dist = duration_distance(cur_dur, cand_dur_sec);
distance.add_penalty("duration", dur_dist, 0.2);
}
distance
}
pub fn string_distance(a: &str, b: &str) -> f64 {
let a_norm = normalize_string(a);
let b_norm = normalize_string(b);
let similarity = strsim::normalized_levenshtein(&a_norm, &b_norm);
1.0 - similarity
}
fn year_distance(a: u32, b: u32) -> f64 {
let diff = (a as i32 - b as i32).abs();
(diff as f64 / 10.0).min(1.0)
}
fn duration_distance(a: f64, b: f64) -> f64 {
let diff_ratio = ((a - b).abs() / a.max(b)).max(0.0);
if diff_ratio < 0.05 {
0.0
} else if diff_ratio < 0.20 {
((diff_ratio - 0.05) / 0.15) * 0.75
} else {
1.0
}
}
pub fn normalize_string(s: &str) -> String {
let mut normalized = s.to_lowercase().trim().to_string();
if normalized.starts_with("the ") {
normalized = normalized[4..].to_string();
}
normalized.retain(|c| c.is_alphanumeric() || c.is_whitespace());
normalized
}
pub fn score_and_sort(
current: &CurrentMetadata,
candidates: Vec<AudibleMetadata>,
) -> Vec<MatchCandidate> {
let mut scored: Vec<MatchCandidate> = candidates
.into_iter()
.map(|metadata| {
let distance = calculate_distance(current, &metadata);
let confidence = determine_confidence(distance.total_distance());
MatchCandidate {
distance,
metadata,
confidence,
}
})
.collect();
scored.sort_by(|a, b| {
a.distance.total_distance()
.partial_cmp(&b.distance.total_distance())
.unwrap()
});
scored
}
pub fn determine_confidence(distance: f64) -> MatchConfidence {
if distance < 0.04 {
MatchConfidence::Strong
} else if distance < 0.12 {
MatchConfidence::Medium
} else if distance < 0.20 {
MatchConfidence::Low
} else {
MatchConfidence::None
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_distance() {
assert_eq!(string_distance("Hello World", "Hello World"), 0.0);
assert_eq!(string_distance("hello world", "HELLO WORLD"), 0.0);
let dist = string_distance("Project Hail Mary", "Project Haile Mary");
assert!(dist > 0.0 && dist < 0.15);
let dist = string_distance("Completely Different", "Not the Same");
assert!(dist > 0.5);
}
#[test]
fn test_normalize_string() {
assert_eq!(normalize_string("The Hobbit"), "hobbit");
assert_eq!(normalize_string(" Project Hail Mary "), "project hail mary");
assert_eq!(normalize_string("Author's Name"), "authors name");
assert_eq!(normalize_string("Title! @ # $"), "title");
}
#[test]
fn test_year_distance() {
assert_eq!(year_distance(2020, 2020), 0.0); assert_eq!(year_distance(2020, 2025), 0.5); assert_eq!(year_distance(2020, 2030), 1.0); assert!(year_distance(2020, 2035) >= 1.0); }
#[test]
fn test_duration_distance() {
assert_eq!(duration_distance(3600.0, 3620.0), 0.0);
let dist = duration_distance(3600.0, 3960.0); assert!(dist > 0.0 && dist < 0.75);
let dist = duration_distance(3600.0, 4500.0); assert_eq!(dist, 1.0);
}
#[test]
fn test_determine_confidence() {
assert_eq!(determine_confidence(0.02), MatchConfidence::Strong);
assert_eq!(determine_confidence(0.08), MatchConfidence::Medium);
assert_eq!(determine_confidence(0.15), MatchConfidence::Low);
assert_eq!(determine_confidence(0.50), MatchConfidence::None);
}
}