str_similarity/
lib.rs

1use std::cmp::{self, min};
2
3pub struct MatchResult {
4  pub score: f64,
5  pub string: String,
6}
7
8pub struct BestMatchResult {
9  pub best_result_index: usize,
10  pub result: Vec<MatchResult>,
11}
12
13#[inline]
14pub fn compare_two_strings(first: &str, second: &str) -> f64 {
15  let first_len = first.len();
16  let second_len = second.len();
17  let mut d = vec![0; (first_len + 1) * (second_len + 1)];
18
19  for i in 0..=first_len {
20    d[i * (second_len + 1)] = i;
21  }
22  for j in 0..=second_len {
23    d[j] = j;
24  }
25
26  for i in 1..=first_len {
27    for j in 1..=second_len {
28      let cost = if first.get(i - 1..i) == second.get(j - 1..j) {
29        0
30      } else {
31        1
32      };
33      d[i * (second_len + 1) + j] = min(
34        d[(i - 1) * (second_len + 1) + j] + 1,
35        min(
36          d[i * (second_len + 1) + j - 1] + 1,
37          d[(i - 1) * (second_len + 1) + j - 1] + cost,
38        ),
39      );
40      if i > 1
41        && j > 1
42        && first.get(i - 1..i) == second.get(j - 2..j - 1)
43        && first.get(i - 2..i - 1) == second.get(j - 1..j)
44      {
45        d[i * (second_len + 1) + j] = min(
46          d[i * (second_len + 1) + j],
47          d[(i - 2) * (second_len + 1) + j - 2] + 1,
48        );
49      }
50    }
51  }
52
53  1.0
54    - (d[first_len * (second_len + 1) + second_len] as f64) / cmp::max(first_len, second_len) as f64
55}
56
57#[inline]
58pub fn find_best_match(string: &str, arr: Vec<&str>) -> BestMatchResult {
59  let mut result: Vec<MatchResult> = vec![];
60  let mut best_result_index: usize = 0;
61
62  for (index, item) in arr.iter().enumerate() {
63    let score = compare_two_strings(string, item);
64
65    result.push(MatchResult {
66      score,
67      string: item.to_string(),
68    });
69
70    if score > result[best_result_index].score {
71      best_result_index = index;
72    }
73  }
74
75  BestMatchResult {
76    best_result_index,
77    result,
78  }
79}
80
81#[cfg(test)]
82mod tests {
83  use super::{compare_two_strings, find_best_match};
84
85  #[test]
86  fn check_compare() {
87    let result: f64 = compare_two_strings("Night", "Nacht");
88    assert_eq!(result, 0.6);
89    let result: f64 = compare_two_strings("Night Night Night", "Night Night Night");
90    assert_eq!(result, 1.0);
91    let result: f64 = compare_two_strings("Night Night Night", "Nacht Nacht Nacht");
92    assert_eq!(result, 0.6470588235294117);
93  }
94
95  #[test]
96  fn check_best_match() {
97    let result = find_best_match("Night", vec!["Nacht", "Night", "Nacht"]);
98    assert_eq!(result.result.len(), 3);
99    assert_eq!(result.result[0].score, 0.6);
100    assert_eq!(result.result[1].score, 1.0);
101    assert_eq!(result.result[2].score, 0.6);
102    assert_eq!(result.best_result_index, 1);
103    let result = find_best_match(
104      "Night Night Night",
105      vec!["Nacht Nacht Nacht", "Night Night Night", "Night"],
106    );
107    assert_eq!(result.result.len(), 3);
108    assert_eq!(result.result[0].score, 0.6470588235294117);
109    assert_eq!(result.result[1].score, 1.0);
110    assert_eq!(result.result[2].score, 0.2941176470588235);
111    assert_eq!(result.best_result_index, 1);
112  }
113}