use strsim::jaro_winkler;
const SIMILARITY_THRESHOLD: f64 = 0.85;
#[derive(Debug, Clone)]
pub struct TitleMatch {
pub game_id: i64,
pub db_title: String,
pub score: f64,
}
pub fn match_title(rom_title: &str, candidates: &[(i64, String)]) -> Option<TitleMatch> {
let normalized_rom = normalize(rom_title);
candidates
.iter()
.map(|(id, title)| {
let normalized_db = normalize(title);
let score = jaro_winkler(&normalized_rom, &normalized_db);
(*id, title.as_str(), score)
})
.filter(|(_, _, score)| *score >= SIMILARITY_THRESHOLD)
.max_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal))
.map(|(game_id, db_title, score)| TitleMatch {
game_id,
db_title: db_title.to_string(),
score,
})
}
fn normalize(title: &str) -> String {
let lowered: String = title
.to_lowercase()
.chars()
.filter(|c| c.is_alphanumeric() || c.is_whitespace())
.collect();
let trimmed = lowered.split_whitespace().collect::<Vec<_>>().join(" ");
trimmed.strip_prefix("the ").unwrap_or(&trimmed).to_string()
}
#[cfg(test)]
mod tests {
use super::*;
fn candidates() -> Vec<(i64, String)> {
vec![
(5, "Donkey Kong".to_string()),
(112, "Super Mario Bros. 3".to_string()),
(113, "The Legend of Zelda".to_string()),
(121, "Kirby's Adventure".to_string()),
(123, "Metroid".to_string()),
(125, "Mega Man 5".to_string()),
(135, "Castlevania".to_string()),
(200, "Contra".to_string()),
(300, "Tetris".to_string()),
]
}
#[test]
fn exact_match() {
let result = match_title("Super Mario Bros. 3", &candidates());
assert!(result.is_some());
let m = result.unwrap();
assert_eq!(m.game_id, 112);
assert!(m.score > 0.99);
}
#[test]
fn case_insensitive_match() {
let result = match_title("super mario bros. 3", &candidates());
assert!(result.is_some());
assert_eq!(result.unwrap().game_id, 112);
}
#[test]
fn match_without_punctuation() {
let result = match_title("Kirbys Adventure", &candidates());
assert!(result.is_some());
assert_eq!(result.unwrap().game_id, 121);
}
#[test]
fn simple_exact_match() {
let result = match_title("Metroid", &candidates());
assert!(result.is_some());
assert_eq!(result.unwrap().game_id, 123);
}
#[test]
fn no_match_for_gibberish() {
let result = match_title("XYZ Unknown Game 999", &candidates());
assert!(result.is_none());
}
#[test]
fn normalize_strips_punctuation_extra_spaces_and_the_prefix() {
assert_eq!(normalize("Super Mario Bros. 3"), "super mario bros 3");
assert_eq!(normalize("Kirby's Adventure"), "kirbys adventure");
assert_eq!(normalize(" Mega Man 5 "), "mega man 5");
assert_eq!(normalize("The Legend of Zelda"), "legend of zelda");
}
#[test]
fn threshold_behavior() {
let result = match_title("Mega Man 5", &candidates());
assert!(result.is_some());
let m = result.unwrap();
assert_eq!(m.game_id, 125);
assert!(m.score >= SIMILARITY_THRESHOLD);
}
#[test]
fn best_match_wins_among_similar() {
let cands = vec![
(1, "Mega Man".to_string()),
(2, "Mega Man 2".to_string()),
(3, "Mega Man 3".to_string()),
(4, "Mega Man 4".to_string()),
(5, "Mega Man 5".to_string()),
];
let result = match_title("Mega Man 5", &cands);
assert!(result.is_some());
assert_eq!(result.unwrap().game_id, 5);
}
#[test]
fn match_with_the_prefix() {
let result = match_title("Legend of Zelda", &candidates());
assert!(result.is_some());
assert_eq!(result.unwrap().game_id, 113);
}
}