fuzzy_cmp/fuzzy/
compare.rs

1// use crate::prelude::*;
2
3/// Returns levenshtein distance between strings
4pub fn distance(s1: &str, s2: &str) -> usize {
5    distance::levenshtein(&s1, &s2)
6}
7
8/// Compares 2 string by levenshtein distance
9pub fn compare(s1: &str, s2: &str) -> f32 {
10    let s1 = s1.trim().to_lowercase();
11    let s2 = s2.trim().to_lowercase();
12    
13    if s1 == s2 { return 1.0; }
14    else if s1.is_empty() || s2.is_empty() { return 0.0; }
15    
16    let dist = distance(&s1, &s2);
17    let max_len = s1.chars().count().max(s2.chars().count()).max(1);
18    let coof = 1.0 - (dist as f32 / max_len as f32);
19
20    coof.max(0.0).min(1.0)
21}
22
23/// Deep comparison using word-by-word scoring with positional bonus
24pub fn deep_compare(s1: &str, s2: &str, min_coef: f32) -> f32 {
25    let s1 = s1.trim().to_lowercase();
26    let s2 = s2.trim().to_lowercase();
27    if s1.is_empty() || s2.is_empty() { return 0.0; }
28
29    // split by words:
30    let words1: Vec<&str> = s1.split_whitespace().collect();
31    let words2: Vec<&str> = s2.split_whitespace().collect();
32    
33    // get len's:
34    let s1_len = words1.len();
35    let s2_len = words2.len();
36    let min_len = s1_len.min(s2_len);
37    let max_len = s1_len.max(s2_len);
38    
39    let mut scores = vec![0f32; max_len];
40    let mut covered = vec![0f32; max_len];
41    
42    // compare words (s1 vs s2):
43    for (i, word1) in words1.iter().take(max_len).enumerate() {
44        let mut best_score = 0.0f32;
45        
46        for (j, word2) in words2.iter().enumerate() {
47            let coef = compare(word1, word2);
48            let mut score = coef / 5.0;
49            
50            // add position bonus: |i - j| <= 1
51            if (i as i32 - j as i32).abs() <= 1 {
52                score += 1.0;
53            }
54            
55            // update best compare score:
56            best_score = best_score.max(score);
57
58            // update best coverage score:
59            if coef >= min_coef {
60                covered[j] = covered[j].max(coef);
61            }
62        }
63        
64        scores[i] = best_score.min(6.0);
65    }
66    
67    // calc compare scores:
68    let max_score = 6.0 * max_len as f32;
69    let compare_coef = scores.iter().sum::<f32>() / max_score;
70
71    // calc coverage scores:
72    let max_score = 1.0 * min_len as f32;
73    let coverage_coef = covered.iter().sum::<f32>() / max_score;
74    
75    compare_coef + coverage_coef
76}