1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#[macro_use]
extern crate lazy_static;

use regex::Regex;
use std::collections::BTreeMap;

pub struct MatchResult {
  pub score: f64,
  pub string: String,
}

pub struct BestMatchResult {
  pub best_result_index: usize,
  pub result: Vec<MatchResult>,
}

pub fn compare_two_strings(first: &str, second: &str) -> f64 {
  lazy_static! {
    static ref RE: Regex = Regex::new("\\s+").unwrap();
  }

  let first = RE.replace_all(first, "");
  let second = RE.replace_all(second, "");

  if first == second {
    return 1.0;
  }

  let first_len = first.len();
  let second_len = second.len();

  if first_len < 2 || second_len < 2 {
    return 0.0;
  }

  let mut first_val: BTreeMap<&str, i32> = BTreeMap::new();

  for i in 0..first_len - 1 {
    let val = first.get(i..i + 2).unwrap();
    let count = first_val.get(val).unwrap_or(&0) + 1;
    first_val.insert(val, count);
  }

  let mut intersection_size: f64 = 0.0;
  for i in 0..second_len - 1 {
    let val = second.get(i..i + 2).unwrap();
    let count: i32 = *first_val.get(val).unwrap_or(&0);
    if count > 0 {
      intersection_size += 1.0;
      first_val.insert(val, count - 1);
    }
  }

  (2.0 * intersection_size) / (first_len + second_len - 2) as f64
}

pub fn find_best_match(string: &str, arr: Vec<&str>) -> BestMatchResult {
  let mut result: Vec<MatchResult> = vec![];
  let mut best_result_index: usize = 0;

  for (index, item) in arr.iter().enumerate() {
    let score = compare_two_strings(string, item);

    result.push(MatchResult {
      score,
      string: item.to_string(),
    });

    if score > result[best_result_index].score {
      best_result_index = index;
    }
  }

  BestMatchResult {
    best_result_index,
    result,
  }
}

#[cfg(test)]
mod tests {
  use super::{compare_two_strings, find_best_match};

  #[test]
  fn check_compare() {
    let result: f64 = compare_two_strings("Night", "Nacht");
    assert_eq!(result, 0.25);
    assert_ne!(result, 0.5);
  }

  #[test]
  fn check_compare_space() {
    let result: f64 = compare_two_strings("Night Night Night", "Nacht Nacht Nacht");
    assert_eq!(result, 0.35714285714285715);
    assert_ne!(result, 0.5);
  }

  #[test]
  fn check_best_match() {
    let result = find_best_match("Night", vec!["Nacht", "Night", "Nacht"]);
    assert_eq!(result.result.len(), 3);
    assert_eq!(result.result[0].score, 0.25);
    assert_eq!(result.result[1].score, 1.0);
    assert_eq!(result.result[2].score, 0.25);
    assert_eq!(result.best_result_index, 1);
  }

  #[test]
  fn check_best_match_space() {
    let result = find_best_match(
      "Night Night Night",
      vec!["Nacht Nacht Nacht", "Night Night Night", "Night"],
    );
    assert_eq!(result.result.len(), 3);
    assert_eq!(result.result[0].score, 0.35714285714285715);
    assert_eq!(result.result[1].score, 1.0);
    assert_eq!(result.result[2].score, 0.44444444444444444);
    assert_eq!(result.best_result_index, 1);
  }
}