use super::models::Project;
fn jaro_similarity(s1: &str, s2: &str) -> f64 {
if s1 == s2 {
return 1.0;
}
if s1.is_empty() || s2.is_empty() {
return 0.0;
}
let s1_len = s1.len();
let s2_len = s2.len();
let match_distance = (s1_len.max(s2_len) / 2).saturating_sub(1);
let s1_chars: Vec<char> = s1.chars().collect();
let s2_chars: Vec<char> = s2.chars().collect();
let mut s1_matches = vec![false; s1_len];
let mut s2_matches = vec![false; s2_len];
let mut matches = 0;
for i in 0..s1_len {
let start = i.saturating_sub(match_distance);
let end = (i + match_distance + 1).min(s2_len);
for j in start..end {
if s2_matches[j] || s1_chars[i] != s2_chars[j] {
continue;
}
s1_matches[i] = true;
s2_matches[j] = true;
matches += 1;
break;
}
}
if matches == 0 {
return 0.0;
}
let mut transpositions = 0;
let mut k = 0;
for i in 0..s1_len {
if !s1_matches[i] {
continue;
}
while !s2_matches[k] {
k += 1;
}
if s1_chars[i] != s2_chars[k] {
transpositions += 1;
}
k += 1;
}
let matches_f64 = matches as f64;
(matches_f64 / s1_len as f64
+ matches_f64 / s2_len as f64
+ (matches_f64 - transpositions as f64 / 2.0) / matches_f64)
/ 3.0
}
pub fn jaro_winkler_distance(s1: &str, s2: &str) -> f64 {
let jaro = jaro_similarity(s1, s2);
let prefix_len = s1
.chars()
.zip(s2.chars())
.take(4)
.take_while(|(c1, c2)| c1 == c2)
.count();
let prefix_scale = 0.1;
jaro + (prefix_len as f64 * prefix_scale * (1.0 - jaro))
}
pub fn find_similar_projects(input: &str, all_projects: &[Project], threshold: f64) -> Vec<String> {
let input_lower = input.to_lowercase();
let mut matches: Vec<(f64, String)> = all_projects
.iter()
.map(|project| {
let project_name_lower = project.name.to_lowercase();
let score = jaro_winkler_distance(&input_lower, &project_name_lower);
(score, project.name.clone())
})
.filter(|(score, _)| *score >= threshold)
.collect();
matches.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
matches.into_iter().map(|(_, name)| name).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_jaro_winkler_identical() {
assert_eq!(jaro_winkler_distance("my-app", "my-app"), 1.0);
}
#[test]
fn test_jaro_winkler_completely_different() {
let score = jaro_winkler_distance("abc", "xyz");
assert!(score < 0.5);
}
#[test]
fn test_jaro_winkler_similar() {
let score = jaro_winkler_distance("my-ap", "my-app");
assert!(score > 0.9, "Expected score > 0.9, got {}", score);
}
#[test]
fn test_jaro_winkler_prefix_bonus() {
let score1 = jaro_winkler_distance("abcdef", "abcdex"); let score2 = jaro_winkler_distance("abcdef", "xbcdef");
assert!(
score1 > score2,
"Common prefix should score higher: score1={}, score2={}",
score1,
score2
);
}
#[test]
fn test_find_similar_projects_empty() {
let projects = vec![];
let similar = find_similar_projects("my-ap", &projects, 0.85);
assert_eq!(similar.len(), 0);
}
}