1use strsim::{damerau_levenshtein, jaro_winkler, levenshtein};
6
7#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
9pub enum Algorithm {
10 #[default]
15 JaroWinkler,
16
17 Levenshtein,
22
23 DamerauLevenshtein,
28}
29
30pub fn similarity(a: &str, b: &str, algo: Algorithm) -> f64 {
34 if a == b {
35 return 1.0;
36 }
37 if a.is_empty() || b.is_empty() {
38 return 0.0;
39 }
40
41 match algo {
42 Algorithm::JaroWinkler => jaro_winkler(a, b),
43 Algorithm::Levenshtein => {
44 let dist = levenshtein(a, b);
45 let max_len = a.len().max(b.len());
46 1.0 - (dist as f64 / max_len as f64)
47 }
48 Algorithm::DamerauLevenshtein => {
49 let dist = damerau_levenshtein(a, b);
50 let max_len = a.len().max(b.len());
51 1.0 - (dist as f64 / max_len as f64)
52 }
53 }
54}
55
56#[derive(Debug, Clone, PartialEq)]
58pub struct Match {
59 pub candidate: String,
61 pub similarity: f64,
63}
64
65impl Match {
66 pub fn new(candidate: impl Into<String>, similarity: f64) -> Self {
67 Self {
68 candidate: candidate.into(),
69 similarity,
70 }
71 }
72}
73
74pub fn find_closest<'a>(
78 input: &str,
79 candidates: impl IntoIterator<Item = &'a str>,
80 min_similarity: f64,
81 algo: Algorithm,
82) -> Option<Match> {
83 candidates
84 .into_iter()
85 .map(|c| Match::new(c, similarity(input, c, algo)))
86 .filter(|m| m.similarity >= min_similarity)
87 .max_by(|a, b| a.similarity.partial_cmp(&b.similarity).unwrap())
88}
89
90pub fn find_all_matches<'a>(
94 input: &str,
95 candidates: impl IntoIterator<Item = &'a str>,
96 min_similarity: f64,
97 algo: Algorithm,
98) -> Vec<Match> {
99 let mut matches: Vec<_> = candidates
100 .into_iter()
101 .map(|c| Match::new(c, similarity(input, c, algo)))
102 .filter(|m| m.similarity >= min_similarity)
103 .collect();
104
105 matches.sort_by(|a, b| b.similarity.partial_cmp(&a.similarity).unwrap());
106 matches
107}
108
109#[cfg(test)]
110mod tests {
111 use super::*;
112
113 #[test]
114 fn test_identical_strings() {
115 assert_eq!(similarity("hello", "hello", Algorithm::JaroWinkler), 1.0);
116 assert_eq!(similarity("hello", "hello", Algorithm::Levenshtein), 1.0);
117 assert_eq!(
118 similarity("hello", "hello", Algorithm::DamerauLevenshtein),
119 1.0
120 );
121 }
122
123 #[test]
124 fn test_empty_strings() {
125 assert_eq!(similarity("", "", Algorithm::JaroWinkler), 1.0);
126 assert_eq!(similarity("hello", "", Algorithm::JaroWinkler), 0.0);
127 assert_eq!(similarity("", "hello", Algorithm::JaroWinkler), 0.0);
128 }
129
130 #[test]
131 fn test_typo_detection_jaro_winkler() {
132 let sim = similarity("AddDeriv", "AddDerive", Algorithm::JaroWinkler);
134 assert!(sim > 0.9, "Expected > 0.9, got {}", sim);
135
136 let sim = similarity("RenamIdent", "RenameIdent", Algorithm::JaroWinkler);
137 assert!(sim > 0.9, "Expected > 0.9, got {}", sim);
138 }
139
140 #[test]
141 fn test_field_name_typo() {
142 let sim = similarity("target_name", "target", Algorithm::JaroWinkler);
143 assert!(sim > 0.7, "Expected > 0.7, got {}", sim);
144
145 let sim = similarity("struct_nam", "struct_name", Algorithm::JaroWinkler);
146 assert!(sim > 0.9, "Expected > 0.9, got {}", sim);
147 }
148
149 #[test]
150 fn test_find_closest() {
151 let candidates = ["AddDerive", "RemoveDerive", "AddField", "RemoveField"];
152 let result = find_closest(
153 "AddDeriv",
154 candidates.iter().copied(),
155 0.7,
156 Algorithm::JaroWinkler,
157 );
158
159 assert!(result.is_some());
160 let m = result.unwrap();
161 assert_eq!(m.candidate, "AddDerive");
162 assert!(m.similarity > 0.9);
163 }
164
165 #[test]
166 fn test_find_closest_no_match() {
167 let candidates = ["AddDerive", "RemoveDerive"];
168 let result = find_closest(
169 "CompletelyDifferent",
170 candidates.iter().copied(),
171 0.9, Algorithm::JaroWinkler,
173 );
174
175 assert!(result.is_none());
176 }
177
178 #[test]
179 fn test_find_all_matches() {
180 let candidates = ["target", "target_mod", "target_fn", "body"];
181 let matches = find_all_matches(
182 "target_name",
183 candidates.iter().copied(),
184 0.6,
185 Algorithm::JaroWinkler,
186 );
187
188 assert!(!matches.is_empty());
189 for i in 1..matches.len() {
191 assert!(matches[i - 1].similarity >= matches[i].similarity);
192 }
193 }
194
195 #[test]
196 fn test_transposition_damerau() {
197 let sim_dl = similarity("teh", "the", Algorithm::DamerauLevenshtein);
199 let sim_l = similarity("teh", "the", Algorithm::Levenshtein);
200 assert!(sim_dl >= sim_l);
202 }
203}