1pub(crate) fn case_aware_jaro_winkler(a: &str, b: &str) -> f64 {
6 let case_insensitive = strsim::jaro_winkler(&a.to_lowercase(), &b.to_lowercase());
7 let case_sensitive = strsim::jaro_winkler(a, b);
8
9 const CASE_WEIGHT: f64 = 0.1; (1.0 - CASE_WEIGHT) * case_insensitive + CASE_WEIGHT * case_sensitive
12}
13
14#[cfg(test)]
15mod case_aware_jaro_winkler_tests {
16 use crate::string_utils::case_aware_jaro_winkler;
17
18 fn sort_in_comparison<const N: usize>(
19 mut strings: [&'static str; N],
20 reference: &'static str,
21 ) -> [&'static str; N] {
22 strings.sort_by(|a, b| {
23 case_aware_jaro_winkler(b, reference).total_cmp(&case_aware_jaro_winkler(a, reference))
24 });
25 strings
26 }
27
28 #[test]
29 fn same_letter_different_case_is_more_similar_than_different_letters() {
30 assert_eq!(
31 sort_in_comparison(["word", "WORD", "other", "Word"], "Word"),
32 ["Word", "word", "WORD", "other"]
33 );
34 }
35
36 #[test]
37 fn exact_match_returns_highest_score() {
38 let score = case_aware_jaro_winkler("hello", "hello");
39 assert_eq!(score, 1.0, "Exact match should be 1.0");
40 }
41
42 #[test]
43 fn case_difference_costs_less_than_character_difference() {
44 let reference = "Test";
45 let case_only = case_aware_jaro_winkler("TEST", reference);
46 let char_diff = case_aware_jaro_winkler("Tast", reference);
47
48 assert!(
49 case_only > char_diff,
50 "Case-only difference ({}) should score higher than character substitution ({})",
51 case_only,
52 char_diff
53 );
54 }
55
56 #[test]
57 fn single_case_difference_vs_single_char_difference() {
58 let reference = "word";
59 let one_case = case_aware_jaro_winkler("Word", reference);
60 let one_char = case_aware_jaro_winkler("wopd", reference);
61
62 assert!(
63 one_case > one_char,
64 "Single case change ({}) should cost less than single char change ({})",
65 one_case,
66 one_char
67 );
68 }
69
70 #[test]
71 fn multiple_case_differences() {
72 let reference = "test";
74 let one_case = case_aware_jaro_winkler("Test", reference);
75 let two_case = case_aware_jaro_winkler("TEst", reference);
76 let all_case = case_aware_jaro_winkler("TEST", reference);
77
78 println!("One case diff: {}", one_case);
79 println!("Two case diffs: {}", two_case);
80 println!("All case diffs: {}", all_case);
81
82 assert!(
83 one_case > two_case && two_case > all_case,
84 "More case differences should decrease score: {} > {} > {}",
85 one_case,
86 two_case,
87 all_case
88 );
89 }
90
91 #[test]
92 fn different_length_strings_shorter_target() {
93 let reference = "Hi";
95 let score_longer = case_aware_jaro_winkler("HELLO", reference);
96 let score_case = case_aware_jaro_winkler("HI", reference);
97
98 println!("'HELLO' vs 'Hi': {}", score_longer);
99 println!("'HI' vs 'Hi': {}", score_case);
100
101 assert!(
104 score_case > score_longer,
105 "Same-length with case diff ({}) should beat longer string ({})",
106 score_case,
107 score_longer
108 );
109 }
110
111 #[test]
112 fn different_length_strings_longer_target() {
113 let reference = "HelloWorld";
114 let score_prefix_case = case_aware_jaro_winkler("helloworld", reference);
115 let score_partial = case_aware_jaro_winkler("Hello", reference);
116
117 println!("'helloworld' vs 'HelloWorld': {}", score_prefix_case);
118 println!("'Hello' vs 'HelloWorld': {}", score_partial);
119
120 assert!(
122 score_prefix_case > score_partial,
123 "Full-length case diff ({}) should beat partial match ({})",
124 score_prefix_case,
125 score_partial
126 );
127 }
128
129 #[test]
130 fn empty_string_edge_case() {
131 let score_empty = case_aware_jaro_winkler("", "");
132 let score_one_empty = case_aware_jaro_winkler("test", "");
133
134 assert_eq!(score_empty, 1.0, "Empty strings should match perfectly");
135 assert_eq!(score_one_empty, 0.0, "Empty vs non-empty should score 0");
136 }
137
138 #[test]
139 fn common_prefix_with_case_differences() {
140 let reference = "TestCase";
142 let prefix_case_diff = case_aware_jaro_winkler("testCase", reference);
143 let suffix_case_diff = case_aware_jaro_winkler("TestCASE", reference);
144 let no_prefix = case_aware_jaro_winkler("xestCase", reference);
145
146 println!("'testCase' vs 'TestCase': {}", prefix_case_diff);
147 println!("'TestCASE' vs 'TestCase': {}", suffix_case_diff);
148 println!("'xestCase' vs 'TestCase': {}", no_prefix);
149
150 assert!(
152 prefix_case_diff > no_prefix,
153 "Case diff should beat char substitution"
154 );
155 }
156
157 #[test]
158 fn transposition_vs_case_difference() {
159 let reference = "test";
162 let transposed = case_aware_jaro_winkler("tset", reference);
163 let case_diff = case_aware_jaro_winkler("TEST", reference);
164
165 println!("'tset' vs 'test' (transposed): {}", transposed);
166 println!("'TEST' vs 'test' (all case diff): {}", case_diff);
167
168 }
171
172 #[test]
173 fn mixed_case_and_character_differences() {
174 let reference = "Example";
175 let all_case_diff = case_aware_jaro_winkler("EXAMPLE", reference);
176 let one_char = case_aware_jaro_winkler("Examplf", reference);
177 let two_chars = case_aware_jaro_winkler("Exaople", reference);
178 let few_case_diff = case_aware_jaro_winkler("ExamplE", reference);
179
180 println!("'EXAMPLE' vs 'Example' (all case): {:.4}", all_case_diff);
181 println!("'ExamplE' vs 'Example' (few case): {:.4}", few_case_diff);
182 println!("'Examplf' vs 'Example' (1 char): {:.4}", one_char);
183 println!("'Exaople' vs 'Example' (2 chars): {:.4}", two_chars);
184
185 assert!(
187 few_case_diff > one_char,
188 "Few case diffs ({:.4}) should beat char diff ({:.4})",
189 few_case_diff,
190 one_char
191 );
192
193 println!(
197 "Note: All-caps scoring is {:.4}, which is acceptable for did-you-mean",
198 all_case_diff
199 );
200 }
201
202 #[test]
203 fn sort_by_quality_realistic_example() {
204 let results = sort_in_comparison(
206 [
207 "MyStruct", "myStruct", "MYSTRUCT", "my_struct", "MyString", "YourStruct", ],
214 "MyStruct",
215 );
216
217 println!("Sorted results for 'MyStruct':");
218 for (i, result) in results.iter().enumerate() {
219 println!(
220 " {}. {} (score: {:.4})",
221 i + 1,
222 result,
223 case_aware_jaro_winkler(result, "MyStruct")
224 );
225 }
226
227 assert_eq!(results[0], "MyStruct", "Exact match should be first");
229
230 let mystruct_pos = results.iter().position(|&s| s == "myStruct").unwrap();
235 let mystruct_caps_pos = results.iter().position(|&s| s == "MYSTRUCT").unwrap();
236
237 assert!(
238 mystruct_pos < mystruct_caps_pos,
239 "Fewer case differences should rank higher"
240 );
241 }
242
243 #[test]
244 fn potential_score_overflow() {
245 let reference = "test";
247 let all_case = case_aware_jaro_winkler("TEST", reference);
248
249 assert!(
250 all_case <= 1.0,
251 "Score should not exceed 1.0, got {}",
252 all_case
253 );
254 }
255
256 #[test]
257 fn length_normalization_exploration() {
258 let short = "Hi";
260 let long = "HiThere";
261
262 let score = case_aware_jaro_winkler("HI", short);
264
265 println!("Testing length normalization:");
266 println!(" 'HI' vs 'Hi' (same length): {}", score);
267 println!(
268 " 'HITHERE' vs 'HiThere': {}",
269 case_aware_jaro_winkler("HITHERE", long)
270 );
271 println!(
272 " 'HI' vs 'HiThere': {}",
273 case_aware_jaro_winkler("HI", long)
274 );
275
276 }
280
281 #[test]
282 fn realistic_naming_convention_variations() {
283 let reference = "getValue";
285
286 let camel_case = case_aware_jaro_winkler("getValue", reference);
287 let pascal_case = case_aware_jaro_winkler("GetValue", reference);
288 let snake_case = case_aware_jaro_winkler("get_value", reference);
289 let typo = case_aware_jaro_winkler("getValu", reference); let wrong_word = case_aware_jaro_winkler("setValue", reference);
291
292 println!("\nRealistic 'getValue' comparisons:");
293 println!(" getValue (exact): {:.4}", camel_case);
294 println!(" GetValue (Pascal): {:.4}", pascal_case);
295 println!(" get_value (snake): {:.4}", snake_case);
296 println!(" getValu (typo): {:.4}", typo);
297 println!(" setValue (wrong): {:.4}", wrong_word);
298
299 assert_eq!(camel_case, 1.0);
301
302 assert!(
304 pascal_case > typo,
305 "Case variation ({}) should beat typo ({})",
306 pascal_case,
307 typo
308 );
309
310 assert!(
312 pascal_case > wrong_word && snake_case > wrong_word,
313 "Case variations should beat different words"
314 );
315 }
316}