1fn edit_distance(a: &[u8], b: &[u8]) -> usize {
3 if a.is_empty() {
4 if b.is_empty() {
5 0
6 }
7
8 else {
9 b.len()
10 }
11 }
12
13 else if b.is_empty() {
14 a.len()
15 }
16
17 else {
18 let i = a.len();
19 let j = b.len();
20 let mut cache = vec![vec![usize::MAX; j]; i];
21
22 edit_distance_impl(a, b, i - 1, j - 1, &mut cache)
23 }
24}
25
26fn preprocess(s: &[u8]) -> Vec<u8> {
29 let s = if s.len() > 256 {
31 &s[..256]
32 } else {
33 s
34 };
35
36 s.iter().map(
37 |c| {
38 let mut c = *c;
39 c.make_ascii_lowercase();
40
41 c
42 }
43 ).filter(
44 |c| *c != b'_'
45 ).collect()
46}
47
48pub fn get_closest_string(
49 candidates: &[String],
50 input: &str,
51) -> Option<String> {
52 let b = input.as_bytes();
53 let mut close_strings = vec![];
54
55 for c in candidates.iter() {
56 let dist = substr_edit_distance(b, c.as_bytes());
57
58 if dist <= input.len().min(c.len()) / 3 {
60 close_strings.push((c.to_string(), dist));
61
62 if dist == 0 {
63 break;
64 }
65 }
66 }
67
68 close_strings.sort_by_key(|(_, dist)| *dist);
69 close_strings.get(0).map(|(s, _)| s.to_string())
70}
71
72pub fn substr_edit_distance(sub: &[u8], s: &[u8]) -> usize {
75 let sub = &preprocess(sub);
76 let s = &preprocess(s);
77
78 if sub == s {
79 0
80 }
81
82 else if sub.len() == 1 && s.len() == 1 {
84 return (sub != s) as usize;
85 }
86
87 else if sub.len() > s.len() || s.len() < 4 {
88 edit_distance(sub, s)
89 }
90
91 else if sub.len() * 2 > s.len() {
92 let mut result = usize::MAX;
93
94 for start in 0..s.len() {
95 for end in (start + 1)..(s.len() + 1) {
96 result = result.min(edit_distance(sub, &s[start..end]));
97 }
98 }
99
100 result
101 }
102
103 else {
104 edit_distance(sub, s)
105 }
106}
107
108fn edit_distance_impl(a: &[u8], b: &[u8], i: usize, j: usize, cache: &mut Vec<Vec<usize>>) -> usize {
109 let indicator = (a[i] != b[j]) as usize;
110
111 if i == 0 && j == 0 {
112 return indicator;
113 }
114
115 if cache[i][j] != usize::MAX {
116 return cache[i][j];
117 }
118
119 let mut result = usize::MAX;
120
121 if i > 0 {
122 result = result.min(edit_distance_impl(a, b, i - 1, j, cache) + 1);
123 }
124
125 if j > 0 {
126 result = result.min(edit_distance_impl(a, b, i, j - 1, cache) + 1);
127 }
128
129 if i > 0 && j > 0 {
130 result = result.min(edit_distance_impl(a, b, i - 1, j - 1, cache) + indicator);
131 }
132
133 if i > 1 && j > 1 && a[i] == b[j - 1] && a[i - 1] == b[j] {
134 result = result.min(edit_distance_impl(a, b, i - 2, j - 2, cache) + indicator);
135 }
136
137 cache[i][j] = result;
138 result
139}
140
141#[test]
142fn dist_test() {
143 assert_eq!(substr_edit_distance(b"x", b"X"), 0);
144 assert_eq!(substr_edit_distance(b"x", b"y"), 1);
145 assert_eq!(substr_edit_distance(b"x", b"x1"), 1);
146 assert_eq!(substr_edit_distance(b"item", b"itme"), 1);
147 assert_eq!(substr_edit_distance(b"item", b"itm"), 1);
148 assert_eq!(substr_edit_distance(b"time", b"tiem"), 1);
149 assert_eq!(substr_edit_distance(b"time", b"sime"), 1);
150 assert_eq!(substr_edit_distance(b"Internal", b"Interal"), 1);
151 assert_eq!(substr_edit_distance(b"Interal", b"Internal"), 1);
152 assert_eq!(substr_edit_distance(b"qqqqq", b"qqqqq"), 0);
153 assert_eq!(substr_edit_distance(b"qqqqq", b"cqqqq"), 1);
154 assert_eq!(substr_edit_distance(b"cqqqq", b"qqqqq"), 1);
155 assert_eq!(substr_edit_distance(b"query", b"qeury"), 1);
156 assert_eq!(substr_edit_distance(b"interactive", b"intercative"), 1);
157 assert_eq!(substr_edit_distance(b"HTML", b"Programming Language"), 18);
158
159 assert_eq!(substr_edit_distance(b"edit_distan", b"substr_edit_distance"), 0);
160 assert_eq!(substr_edit_distance(b"edit_dustan", b"substr_edit_distance"), 1);
161
162 assert!(substr_edit_distance(
163 "Very Very Long String: I want to make sure that `edit_distance` is not an O(a^n) algorithm".repeat(256).as_bytes(),
164 "Another very very long string... 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".repeat(256).as_bytes(),
165 ) > 10 );
166}