pub(crate) fn case_aware_jaro_winkler(a: &str, b: &str) -> f64 {
let case_insensitive = strsim::jaro_winkler(&a.to_lowercase(), &b.to_lowercase());
let case_sensitive = strsim::jaro_winkler(a, b);
const CASE_WEIGHT: f64 = 0.1; (1.0 - CASE_WEIGHT) * case_insensitive + CASE_WEIGHT * case_sensitive
}
#[cfg(test)]
mod case_aware_jaro_winkler_tests {
use crate::string_utils::case_aware_jaro_winkler;
fn sort_in_comparison<const N: usize>(
mut strings: [&'static str; N],
reference: &'static str,
) -> [&'static str; N] {
strings.sort_by(|a, b| {
case_aware_jaro_winkler(b, reference).total_cmp(&case_aware_jaro_winkler(a, reference))
});
strings
}
#[test]
fn same_letter_different_case_is_more_similar_than_different_letters() {
assert_eq!(
sort_in_comparison(["word", "WORD", "other", "Word"], "Word"),
["Word", "word", "WORD", "other"]
);
}
#[test]
fn exact_match_returns_highest_score() {
let score = case_aware_jaro_winkler("hello", "hello");
assert_eq!(score, 1.0, "Exact match should be 1.0");
}
#[test]
fn case_difference_costs_less_than_character_difference() {
let reference = "Test";
let case_only = case_aware_jaro_winkler("TEST", reference);
let char_diff = case_aware_jaro_winkler("Tast", reference);
assert!(
case_only > char_diff,
"Case-only difference ({}) should score higher than character substitution ({})",
case_only,
char_diff
);
}
#[test]
fn single_case_difference_vs_single_char_difference() {
let reference = "word";
let one_case = case_aware_jaro_winkler("Word", reference);
let one_char = case_aware_jaro_winkler("wopd", reference);
assert!(
one_case > one_char,
"Single case change ({}) should cost less than single char change ({})",
one_case,
one_char
);
}
#[test]
fn multiple_case_differences() {
let reference = "test";
let one_case = case_aware_jaro_winkler("Test", reference);
let two_case = case_aware_jaro_winkler("TEst", reference);
let all_case = case_aware_jaro_winkler("TEST", reference);
println!("One case diff: {}", one_case);
println!("Two case diffs: {}", two_case);
println!("All case diffs: {}", all_case);
assert!(
one_case > two_case && two_case > all_case,
"More case differences should decrease score: {} > {} > {}",
one_case,
two_case,
all_case
);
}
#[test]
fn different_length_strings_shorter_target() {
let reference = "Hi";
let score_longer = case_aware_jaro_winkler("HELLO", reference);
let score_case = case_aware_jaro_winkler("HI", reference);
println!("'HELLO' vs 'Hi': {}", score_longer);
println!("'HI' vs 'Hi': {}", score_case);
assert!(
score_case > score_longer,
"Same-length with case diff ({}) should beat longer string ({})",
score_case,
score_longer
);
}
#[test]
fn different_length_strings_longer_target() {
let reference = "HelloWorld";
let score_prefix_case = case_aware_jaro_winkler("helloworld", reference);
let score_partial = case_aware_jaro_winkler("Hello", reference);
println!("'helloworld' vs 'HelloWorld': {}", score_prefix_case);
println!("'Hello' vs 'HelloWorld': {}", score_partial);
assert!(
score_prefix_case > score_partial,
"Full-length case diff ({}) should beat partial match ({})",
score_prefix_case,
score_partial
);
}
#[test]
fn empty_string_edge_case() {
let score_empty = case_aware_jaro_winkler("", "");
let score_one_empty = case_aware_jaro_winkler("test", "");
assert_eq!(score_empty, 1.0, "Empty strings should match perfectly");
assert_eq!(score_one_empty, 0.0, "Empty vs non-empty should score 0");
}
#[test]
fn common_prefix_with_case_differences() {
let reference = "TestCase";
let prefix_case_diff = case_aware_jaro_winkler("testCase", reference);
let suffix_case_diff = case_aware_jaro_winkler("TestCASE", reference);
let no_prefix = case_aware_jaro_winkler("xestCase", reference);
println!("'testCase' vs 'TestCase': {}", prefix_case_diff);
println!("'TestCASE' vs 'TestCase': {}", suffix_case_diff);
println!("'xestCase' vs 'TestCase': {}", no_prefix);
assert!(
prefix_case_diff > no_prefix,
"Case diff should beat char substitution"
);
}
#[test]
fn transposition_vs_case_difference() {
let reference = "test";
let transposed = case_aware_jaro_winkler("tset", reference);
let case_diff = case_aware_jaro_winkler("TEST", reference);
println!("'tset' vs 'test' (transposed): {}", transposed);
println!("'TEST' vs 'test' (all case diff): {}", case_diff);
}
#[test]
fn mixed_case_and_character_differences() {
let reference = "Example";
let all_case_diff = case_aware_jaro_winkler("EXAMPLE", reference);
let one_char = case_aware_jaro_winkler("Examplf", reference);
let two_chars = case_aware_jaro_winkler("Exaople", reference);
let few_case_diff = case_aware_jaro_winkler("ExamplE", reference);
println!("'EXAMPLE' vs 'Example' (all case): {:.4}", all_case_diff);
println!("'ExamplE' vs 'Example' (few case): {:.4}", few_case_diff);
println!("'Examplf' vs 'Example' (1 char): {:.4}", one_char);
println!("'Exaople' vs 'Example' (2 chars): {:.4}", two_chars);
assert!(
few_case_diff > one_char,
"Few case diffs ({:.4}) should beat char diff ({:.4})",
few_case_diff,
one_char
);
println!(
"Note: All-caps scoring is {:.4}, which is acceptable for did-you-mean",
all_case_diff
);
}
#[test]
fn sort_by_quality_realistic_example() {
let results = sort_in_comparison(
[
"MyStruct", "myStruct", "MYSTRUCT", "my_struct", "MyString", "YourStruct", ],
"MyStruct",
);
println!("Sorted results for 'MyStruct':");
for (i, result) in results.iter().enumerate() {
println!(
" {}. {} (score: {:.4})",
i + 1,
result,
case_aware_jaro_winkler(result, "MyStruct")
);
}
assert_eq!(results[0], "MyStruct", "Exact match should be first");
let mystruct_pos = results.iter().position(|&s| s == "myStruct").unwrap();
let mystruct_caps_pos = results.iter().position(|&s| s == "MYSTRUCT").unwrap();
assert!(
mystruct_pos < mystruct_caps_pos,
"Fewer case differences should rank higher"
);
}
#[test]
fn potential_score_overflow() {
let reference = "test";
let all_case = case_aware_jaro_winkler("TEST", reference);
assert!(
all_case <= 1.0,
"Score should not exceed 1.0, got {}",
all_case
);
}
#[test]
fn length_normalization_exploration() {
let short = "Hi";
let long = "HiThere";
let score = case_aware_jaro_winkler("HI", short);
println!("Testing length normalization:");
println!(" 'HI' vs 'Hi' (same length): {}", score);
println!(
" 'HITHERE' vs 'HiThere': {}",
case_aware_jaro_winkler("HITHERE", long)
);
println!(
" 'HI' vs 'HiThere': {}",
case_aware_jaro_winkler("HI", long)
);
}
#[test]
fn realistic_naming_convention_variations() {
let reference = "getValue";
let camel_case = case_aware_jaro_winkler("getValue", reference);
let pascal_case = case_aware_jaro_winkler("GetValue", reference);
let snake_case = case_aware_jaro_winkler("get_value", reference);
let typo = case_aware_jaro_winkler("getValu", reference); let wrong_word = case_aware_jaro_winkler("setValue", reference);
println!("\nRealistic 'getValue' comparisons:");
println!(" getValue (exact): {:.4}", camel_case);
println!(" GetValue (Pascal): {:.4}", pascal_case);
println!(" get_value (snake): {:.4}", snake_case);
println!(" getValu (typo): {:.4}", typo);
println!(" setValue (wrong): {:.4}", wrong_word);
assert_eq!(camel_case, 1.0);
assert!(
pascal_case > typo,
"Case variation ({}) should beat typo ({})",
pascal_case,
typo
);
assert!(
pascal_case > wrong_word && snake_case > wrong_word,
"Case variations should beat different words"
);
}
}