audiobook_forge/utils/
scoring.rs1use crate::models::{AudibleMetadata, CurrentMetadata, MatchCandidate, MetadataDistance, MatchConfidence};
4
5pub fn calculate_distance(
7 current: &CurrentMetadata,
8 candidate: &AudibleMetadata,
9) -> MetadataDistance {
10 let mut distance = MetadataDistance::new();
11
12 if let Some(cur_title) = ¤t.title {
14 let cand_title = &candidate.title;
15 let title_dist = string_distance(cur_title, cand_title);
16 distance.add_penalty("title", title_dist, 0.4);
17 }
18
19 if let Some(cur_author) = ¤t.author {
21 let author_dist = candidate.authors.iter()
23 .map(|a| string_distance(cur_author, &a.name))
24 .min_by(|a, b| a.partial_cmp(b).unwrap())
25 .unwrap_or(1.0);
26 distance.add_penalty("author", author_dist, 0.3);
27 }
28
29 if let (Some(cur_year), Some(cand_year)) = (current.year, candidate.published_year) {
31 let year_dist = year_distance(cur_year, cand_year);
32 distance.add_penalty("year", year_dist, 0.1);
33 }
34
35 if let (Some(cur_dur), Some(cand_dur_ms)) = (current.duration, candidate.runtime_length_ms) {
37 let cand_dur_sec = cand_dur_ms as f64 / 1000.0;
38 let dur_dist = duration_distance(cur_dur, cand_dur_sec);
39 distance.add_penalty("duration", dur_dist, 0.2);
40 }
41
42 distance
43}
44
45pub fn string_distance(a: &str, b: &str) -> f64 {
47 let a_norm = normalize_string(a);
49 let b_norm = normalize_string(b);
50
51 let similarity = strsim::normalized_levenshtein(&a_norm, &b_norm);
53
54 1.0 - similarity
56}
57
58fn year_distance(a: u32, b: u32) -> f64 {
60 let diff = (a as i32 - b as i32).abs();
61 (diff as f64 / 10.0).min(1.0)
62}
63
64fn duration_distance(a: f64, b: f64) -> f64 {
66 let diff_ratio = ((a - b).abs() / a.max(b)).max(0.0);
67
68 if diff_ratio < 0.05 {
72 0.0
73 } else if diff_ratio < 0.20 {
74 ((diff_ratio - 0.05) / 0.15) * 0.75
75 } else {
76 1.0
77 }
78}
79
80pub fn normalize_string(s: &str) -> String {
82 let mut normalized = s.to_lowercase().trim().to_string();
83
84 if normalized.starts_with("the ") {
86 normalized = normalized[4..].to_string();
87 }
88
89 normalized.retain(|c| c.is_alphanumeric() || c.is_whitespace());
91
92 normalized
93}
94
95pub fn score_and_sort(
97 current: &CurrentMetadata,
98 candidates: Vec<AudibleMetadata>,
99) -> Vec<MatchCandidate> {
100 let mut scored: Vec<MatchCandidate> = candidates
101 .into_iter()
102 .map(|metadata| {
103 let distance = calculate_distance(current, &metadata);
104 let confidence = determine_confidence(distance.total_distance());
105 MatchCandidate {
106 distance,
107 metadata,
108 confidence,
109 }
110 })
111 .collect();
112
113 scored.sort_by(|a, b| {
115 a.distance.total_distance()
116 .partial_cmp(&b.distance.total_distance())
117 .unwrap()
118 });
119
120 scored
121}
122
123pub fn determine_confidence(distance: f64) -> MatchConfidence {
125 if distance < 0.04 {
126 MatchConfidence::Strong
127 } else if distance < 0.12 {
128 MatchConfidence::Medium
129 } else if distance < 0.20 {
130 MatchConfidence::Low
131 } else {
132 MatchConfidence::None
133 }
134}
135
136#[cfg(test)]
137mod tests {
138 use super::*;
139
140 #[test]
141 fn test_string_distance() {
142 assert_eq!(string_distance("Hello World", "Hello World"), 0.0);
144 assert_eq!(string_distance("hello world", "HELLO WORLD"), 0.0);
145
146 let dist = string_distance("Project Hail Mary", "Project Haile Mary");
148 assert!(dist > 0.0 && dist < 0.15); let dist = string_distance("Completely Different", "Not the Same");
152 assert!(dist > 0.5);
153 }
154
155 #[test]
156 fn test_normalize_string() {
157 assert_eq!(normalize_string("The Hobbit"), "hobbit");
158 assert_eq!(normalize_string(" Project Hail Mary "), "project hail mary");
159 assert_eq!(normalize_string("Author's Name"), "authors name");
160 assert_eq!(normalize_string("Title! @ # $"), "title");
161 }
162
163 #[test]
164 fn test_year_distance() {
165 assert_eq!(year_distance(2020, 2020), 0.0); assert_eq!(year_distance(2020, 2025), 0.5); assert_eq!(year_distance(2020, 2030), 1.0); assert!(year_distance(2020, 2035) >= 1.0); }
170
171 #[test]
172 fn test_duration_distance() {
173 assert_eq!(duration_distance(3600.0, 3620.0), 0.0); let dist = duration_distance(3600.0, 3960.0); assert!(dist > 0.0 && dist < 0.75);
179
180 let dist = duration_distance(3600.0, 4500.0); assert_eq!(dist, 1.0);
183 }
184
185 #[test]
186 fn test_determine_confidence() {
187 assert_eq!(determine_confidence(0.02), MatchConfidence::Strong);
188 assert_eq!(determine_confidence(0.08), MatchConfidence::Medium);
189 assert_eq!(determine_confidence(0.15), MatchConfidence::Low);
190 assert_eq!(determine_confidence(0.50), MatchConfidence::None);
191 }
192}