rustc_edit_distance/
lib.rs1use std::{cmp, mem};
12
13pub fn edit_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
19 let mut a = &a.chars().collect::<Vec<_>>()[..];
20 let mut b = &b.chars().collect::<Vec<_>>()[..];
21
22 if a.len() < b.len() {
24 mem::swap(&mut a, &mut b);
25 }
26
27 let min_dist = a.len() - b.len();
28 if min_dist > limit {
30 return None;
31 }
32
33 while let Some(((b_char, b_rest), (a_char, a_rest))) = b.split_first().zip(a.split_first()) {
35 if a_char != b_char {
36 break;
37 }
38 a = a_rest;
39 b = b_rest;
40 }
41 while let Some(((b_char, b_rest), (a_char, a_rest))) = b.split_last().zip(a.split_last()) {
43 if a_char != b_char {
44 break;
45 }
46 a = a_rest;
47 b = b_rest;
48 }
49
50 if b.is_empty() {
53 return Some(min_dist);
54 }
55
56 let mut prev_prev = vec![usize::MAX; b.len() + 1];
57 let mut prev = (0..=b.len()).collect::<Vec<_>>();
58 let mut current = vec![0; b.len() + 1];
59
60 for i in 1..=a.len() {
62 current[0] = i;
63 let a_idx = i - 1;
64
65 for j in 1..=b.len() {
67 let b_idx = j - 1;
68
69 let substitution_cost = if a[a_idx] == b[b_idx] { 0 } else { 1 };
71
72 current[j] = cmp::min(
73 prev[j] + 1,
75 cmp::min(
76 current[j - 1] + 1,
78 prev[j - 1] + substitution_cost,
80 ),
81 );
82
83 if (i > 1) && (j > 1) && (a[a_idx] == b[b_idx - 1]) && (a[a_idx - 1] == b[b_idx]) {
84 current[j] = cmp::min(current[j], prev_prev[j - 2] + 1);
86 }
87 }
88
89 [prev_prev, prev, current] = [prev, current, prev_prev];
91 }
92
93 let distance = prev[b.len()];
95 (distance <= limit).then_some(distance)
96}
97
98pub fn find_best_match_for_name<'a>(
99 candidates: &[&'a str],
100 lookup: &'a str,
101 dist: Option<usize>,
102) -> Option<&'a str> {
103 find_best_match_for_name_impl(false, candidates, lookup, dist)
104}
105
106pub fn edit_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<usize> {
107 let n = a.chars().count();
108 let m = b.chars().count();
109
110 let big_len_diff = (n * 2) < m || (m * 2) < n;
113 let len_diff = m.abs_diff(n);
114 let distance = edit_distance(a, b, limit + len_diff)?;
115
116 let score = distance - len_diff;
118
119 let score = if score == 0 && len_diff > 0 && !big_len_diff {
122 1 } else if !big_len_diff {
124 score + (len_diff + 1) / 2
126 } else {
127 score + len_diff
129 };
130
131 (score <= limit).then_some(score)
132}
133
134pub fn find_best_match_for_name_impl<'a>(
135 use_substring_score: bool,
136 candidates: &[&'a str],
137 lookup: &'a str,
138 dist: Option<usize>,
139) -> Option<&'a str> {
140 let lookup_uppercase = lookup.to_uppercase();
141
142 if let Some(c) = candidates.iter().find(|c| {
147 c.to_uppercase() == lookup_uppercase
148 || c.to_uppercase().contains(&lookup_uppercase)
149 || lookup_uppercase.contains(&c.to_uppercase())
150 }) {
151 return Some(*c);
152 }
153
154 let lookup_len = lookup.chars().count();
157
158 let mut dist = dist.unwrap_or_else(|| cmp::max(lookup_len, 3) / 3);
159 let mut best = None;
160 let mut next_candidates = vec![];
162 for c in candidates {
163 match if use_substring_score {
164 edit_distance_with_substrings(lookup, c, dist)
165 } else {
166 edit_distance(lookup, c, dist)
167 } {
168 Some(0) => return Some(*c),
169 Some(d) => {
170 if use_substring_score {
171 if d < dist {
172 dist = d;
173 next_candidates.clear();
174 } else {
175 }
178 next_candidates.push(*c);
179 } else {
180 dist = d - 1;
181 }
182 best = Some(*c);
183 }
184 None => {}
185 }
186 }
187
188 if next_candidates.len() > 1 {
192 debug_assert!(use_substring_score);
193 best = find_best_match_for_name_impl(false, &next_candidates, lookup, Some(lookup.len()));
194 }
195 if best.is_some() {
196 return best;
197 }
198
199 find_match_by_sorted_words(candidates, lookup)
200}
201
202fn find_match_by_sorted_words<'a>(iter_names: &[&'a str], lookup: &str) -> Option<&'a str> {
203 let lookup_sorted_by_words = sort_by_words(lookup);
204 iter_names.iter().fold(None, |result, candidate| {
205 if sort_by_words(candidate) == lookup_sorted_by_words {
206 Some(*candidate)
207 } else {
208 result
209 }
210 })
211}
212
213fn sort_by_words(name: &str) -> Vec<&str> {
214 let mut split_words: Vec<&str> = name.split('_').collect();
215 split_words.sort_unstable();
217 split_words
218}